x86: kill 8253pit.h
[linux-2.6.git] / arch / x86 / kernel / process_64.c
blob6f9dbbe65eef7c109e9d986dfca3bdda69072e9e
1 /*
2 * linux/arch/x86-64/kernel/process.c
4 * Copyright (C) 1995 Linus Torvalds
6 * Pentium III FXSR, SSE support
7 * Gareth Hughes <gareth@valinux.com>, May 2000
8 *
9 * X86-64 port
10 * Andi Kleen.
12 * CPU hotplug support - ashok.raj@intel.com
16 * This file handles the architecture-dependent parts of process handling..
19 #include <stdarg.h>
21 #include <linux/cpu.h>
22 #include <linux/errno.h>
23 #include <linux/sched.h>
24 #include <linux/kernel.h>
25 #include <linux/mm.h>
26 #include <linux/fs.h>
27 #include <linux/elfcore.h>
28 #include <linux/smp.h>
29 #include <linux/slab.h>
30 #include <linux/user.h>
31 #include <linux/module.h>
32 #include <linux/a.out.h>
33 #include <linux/interrupt.h>
34 #include <linux/delay.h>
35 #include <linux/ptrace.h>
36 #include <linux/utsname.h>
37 #include <linux/random.h>
38 #include <linux/notifier.h>
39 #include <linux/kprobes.h>
40 #include <linux/kdebug.h>
41 #include <linux/tick.h>
43 #include <asm/uaccess.h>
44 #include <asm/pgtable.h>
45 #include <asm/system.h>
46 #include <asm/io.h>
47 #include <asm/processor.h>
48 #include <asm/i387.h>
49 #include <asm/mmu_context.h>
50 #include <asm/pda.h>
51 #include <asm/prctl.h>
52 #include <asm/desc.h>
53 #include <asm/proto.h>
54 #include <asm/ia32.h>
55 #include <asm/idle.h>
57 asmlinkage extern void ret_from_fork(void);
59 unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
61 unsigned long boot_option_idle_override = 0;
62 EXPORT_SYMBOL(boot_option_idle_override);
65 * Powermanagement idle function, if any..
67 void (*pm_idle)(void);
68 EXPORT_SYMBOL(pm_idle);
69 static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
71 static ATOMIC_NOTIFIER_HEAD(idle_notifier);
73 void idle_notifier_register(struct notifier_block *n)
75 atomic_notifier_chain_register(&idle_notifier, n);
77 EXPORT_SYMBOL_GPL(idle_notifier_register);
79 void idle_notifier_unregister(struct notifier_block *n)
81 atomic_notifier_chain_unregister(&idle_notifier, n);
83 EXPORT_SYMBOL(idle_notifier_unregister);
85 void enter_idle(void)
87 write_pda(isidle, 1);
88 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
91 static void __exit_idle(void)
93 if (test_and_clear_bit_pda(0, isidle) == 0)
94 return;
95 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
98 /* Called from interrupts to signify idle end */
99 void exit_idle(void)
101 /* idle loop has pid 0 */
102 if (current->pid)
103 return;
104 __exit_idle();
108 * We use this if we don't have any better
109 * idle routine..
111 static void default_idle(void)
113 current_thread_info()->status &= ~TS_POLLING;
115 * TS_POLLING-cleared state must be visible before we
116 * test NEED_RESCHED:
118 smp_mb();
119 local_irq_disable();
120 if (!need_resched()) {
121 /* Enables interrupts one instruction before HLT.
122 x86 special cases this so there is no race. */
123 safe_halt();
124 } else
125 local_irq_enable();
126 current_thread_info()->status |= TS_POLLING;
130 * On SMP it's slightly faster (but much more power-consuming!)
131 * to poll the ->need_resched flag instead of waiting for the
132 * cross-CPU IPI to arrive. Use this option with caution.
134 static void poll_idle (void)
136 local_irq_enable();
137 cpu_relax();
140 void cpu_idle_wait(void)
142 unsigned int cpu, this_cpu = get_cpu();
143 cpumask_t map, tmp = current->cpus_allowed;
145 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
146 put_cpu();
148 cpus_clear(map);
149 for_each_online_cpu(cpu) {
150 per_cpu(cpu_idle_state, cpu) = 1;
151 cpu_set(cpu, map);
154 __get_cpu_var(cpu_idle_state) = 0;
156 wmb();
157 do {
158 ssleep(1);
159 for_each_online_cpu(cpu) {
160 if (cpu_isset(cpu, map) &&
161 !per_cpu(cpu_idle_state, cpu))
162 cpu_clear(cpu, map);
164 cpus_and(map, map, cpu_online_map);
165 } while (!cpus_empty(map));
167 set_cpus_allowed(current, tmp);
169 EXPORT_SYMBOL_GPL(cpu_idle_wait);
171 #ifdef CONFIG_HOTPLUG_CPU
172 DECLARE_PER_CPU(int, cpu_state);
174 #include <asm/nmi.h>
175 /* We halt the CPU with physical CPU hotplug */
176 static inline void play_dead(void)
178 idle_task_exit();
179 wbinvd();
180 mb();
181 /* Ack it */
182 __get_cpu_var(cpu_state) = CPU_DEAD;
184 local_irq_disable();
185 while (1)
186 halt();
188 #else
189 static inline void play_dead(void)
191 BUG();
193 #endif /* CONFIG_HOTPLUG_CPU */
196 * The idle thread. There's no useful work to be
197 * done, so just try to conserve power and have a
198 * low exit latency (ie sit in a loop waiting for
199 * somebody to say that they'd like to reschedule)
201 void cpu_idle (void)
203 current_thread_info()->status |= TS_POLLING;
204 /* endless idle loop with no priority at all */
205 while (1) {
206 while (!need_resched()) {
207 void (*idle)(void);
209 if (__get_cpu_var(cpu_idle_state))
210 __get_cpu_var(cpu_idle_state) = 0;
212 tick_nohz_stop_sched_tick();
214 rmb();
215 idle = pm_idle;
216 if (!idle)
217 idle = default_idle;
218 if (cpu_is_offline(smp_processor_id()))
219 play_dead();
221 * Idle routines should keep interrupts disabled
222 * from here on, until they go to idle.
223 * Otherwise, idle callbacks can misfire.
225 local_irq_disable();
226 enter_idle();
227 idle();
228 /* In many cases the interrupt that ended idle
229 has already called exit_idle. But some idle
230 loops can be woken up without interrupt. */
231 __exit_idle();
234 tick_nohz_restart_sched_tick();
235 preempt_enable_no_resched();
236 schedule();
237 preempt_disable();
242 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
243 * which can obviate IPI to trigger checking of need_resched.
244 * We execute MONITOR against need_resched and enter optimized wait state
245 * through MWAIT. Whenever someone changes need_resched, we would be woken
246 * up from MWAIT (without an IPI).
248 * New with Core Duo processors, MWAIT can take some hints based on CPU
249 * capability.
251 void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
253 if (!need_resched()) {
254 __monitor((void *)&current_thread_info()->flags, 0, 0);
255 smp_mb();
256 if (!need_resched())
257 __mwait(eax, ecx);
261 /* Default MONITOR/MWAIT with no hints, used for default C1 state */
262 static void mwait_idle(void)
264 if (!need_resched()) {
265 __monitor((void *)&current_thread_info()->flags, 0, 0);
266 smp_mb();
267 if (!need_resched())
268 __sti_mwait(0, 0);
269 else
270 local_irq_enable();
271 } else {
272 local_irq_enable();
276 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
278 static int printed;
279 if (cpu_has(c, X86_FEATURE_MWAIT)) {
281 * Skip, if setup has overridden idle.
282 * One CPU supports mwait => All CPUs supports mwait
284 if (!pm_idle) {
285 if (!printed) {
286 printk(KERN_INFO "using mwait in idle threads.\n");
287 printed = 1;
289 pm_idle = mwait_idle;
294 static int __init idle_setup (char *str)
296 if (!strcmp(str, "poll")) {
297 printk("using polling idle threads.\n");
298 pm_idle = poll_idle;
299 } else if (!strcmp(str, "mwait"))
300 force_mwait = 1;
301 else
302 return -1;
304 boot_option_idle_override = 1;
305 return 0;
307 early_param("idle", idle_setup);
309 /* Prints also some state that isn't saved in the pt_regs */
310 void __show_regs(struct pt_regs * regs)
312 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
313 unsigned long d0, d1, d2, d3, d6, d7;
314 unsigned int fsindex,gsindex;
315 unsigned int ds,cs,es;
317 printk("\n");
318 print_modules();
319 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
320 current->pid, current->comm, print_tainted(),
321 init_utsname()->release,
322 (int)strcspn(init_utsname()->version, " "),
323 init_utsname()->version);
324 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
325 printk_address(regs->rip);
326 printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
327 regs->eflags);
328 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
329 regs->rax, regs->rbx, regs->rcx);
330 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
331 regs->rdx, regs->rsi, regs->rdi);
332 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
333 regs->rbp, regs->r8, regs->r9);
334 printk("R10: %016lx R11: %016lx R12: %016lx\n",
335 regs->r10, regs->r11, regs->r12);
336 printk("R13: %016lx R14: %016lx R15: %016lx\n",
337 regs->r13, regs->r14, regs->r15);
339 asm("movl %%ds,%0" : "=r" (ds));
340 asm("movl %%cs,%0" : "=r" (cs));
341 asm("movl %%es,%0" : "=r" (es));
342 asm("movl %%fs,%0" : "=r" (fsindex));
343 asm("movl %%gs,%0" : "=r" (gsindex));
345 rdmsrl(MSR_FS_BASE, fs);
346 rdmsrl(MSR_GS_BASE, gs);
347 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
349 cr0 = read_cr0();
350 cr2 = read_cr2();
351 cr3 = read_cr3();
352 cr4 = read_cr4();
354 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
355 fs,fsindex,gs,gsindex,shadowgs);
356 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
357 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
359 get_debugreg(d0, 0);
360 get_debugreg(d1, 1);
361 get_debugreg(d2, 2);
362 printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
363 get_debugreg(d3, 3);
364 get_debugreg(d6, 6);
365 get_debugreg(d7, 7);
366 printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
369 void show_regs(struct pt_regs *regs)
371 printk("CPU %d:", smp_processor_id());
372 __show_regs(regs);
373 show_trace(NULL, regs, (void *)(regs + 1));
377 * Free current thread data structures etc..
379 void exit_thread(void)
381 struct task_struct *me = current;
382 struct thread_struct *t = &me->thread;
384 if (me->thread.io_bitmap_ptr) {
385 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
387 kfree(t->io_bitmap_ptr);
388 t->io_bitmap_ptr = NULL;
389 clear_thread_flag(TIF_IO_BITMAP);
391 * Careful, clear this in the TSS too:
393 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
394 t->io_bitmap_max = 0;
395 put_cpu();
399 void flush_thread(void)
401 struct task_struct *tsk = current;
403 if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
404 clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
405 if (test_tsk_thread_flag(tsk, TIF_IA32)) {
406 clear_tsk_thread_flag(tsk, TIF_IA32);
407 } else {
408 set_tsk_thread_flag(tsk, TIF_IA32);
409 current_thread_info()->status |= TS_COMPAT;
412 clear_tsk_thread_flag(tsk, TIF_DEBUG);
414 tsk->thread.debugreg0 = 0;
415 tsk->thread.debugreg1 = 0;
416 tsk->thread.debugreg2 = 0;
417 tsk->thread.debugreg3 = 0;
418 tsk->thread.debugreg6 = 0;
419 tsk->thread.debugreg7 = 0;
420 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
422 * Forget coprocessor state..
424 clear_fpu(tsk);
425 clear_used_math();
428 void release_thread(struct task_struct *dead_task)
430 if (dead_task->mm) {
431 if (dead_task->mm->context.size) {
432 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
433 dead_task->comm,
434 dead_task->mm->context.ldt,
435 dead_task->mm->context.size);
436 BUG();
441 static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
443 struct user_desc ud = {
444 .base_addr = addr,
445 .limit = 0xfffff,
446 .seg_32bit = 1,
447 .limit_in_pages = 1,
448 .useable = 1,
450 struct n_desc_struct *desc = (void *)t->thread.tls_array;
451 desc += tls;
452 desc->a = LDT_entry_a(&ud);
453 desc->b = LDT_entry_b(&ud);
456 static inline u32 read_32bit_tls(struct task_struct *t, int tls)
458 struct desc_struct *desc = (void *)t->thread.tls_array;
459 desc += tls;
460 return desc->base0 |
461 (((u32)desc->base1) << 16) |
462 (((u32)desc->base2) << 24);
466 * This gets called before we allocate a new thread and copy
467 * the current task into it.
469 void prepare_to_copy(struct task_struct *tsk)
471 unlazy_fpu(tsk);
474 int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
475 unsigned long unused,
476 struct task_struct * p, struct pt_regs * regs)
478 int err;
479 struct pt_regs * childregs;
480 struct task_struct *me = current;
482 childregs = ((struct pt_regs *)
483 (THREAD_SIZE + task_stack_page(p))) - 1;
484 *childregs = *regs;
486 childregs->rax = 0;
487 childregs->rsp = rsp;
488 if (rsp == ~0UL)
489 childregs->rsp = (unsigned long)childregs;
491 p->thread.rsp = (unsigned long) childregs;
492 p->thread.rsp0 = (unsigned long) (childregs+1);
493 p->thread.userrsp = me->thread.userrsp;
495 set_tsk_thread_flag(p, TIF_FORK);
497 p->thread.fs = me->thread.fs;
498 p->thread.gs = me->thread.gs;
500 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
501 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
502 asm("mov %%es,%0" : "=m" (p->thread.es));
503 asm("mov %%ds,%0" : "=m" (p->thread.ds));
505 if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
506 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
507 if (!p->thread.io_bitmap_ptr) {
508 p->thread.io_bitmap_max = 0;
509 return -ENOMEM;
511 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
512 IO_BITMAP_BYTES);
513 set_tsk_thread_flag(p, TIF_IO_BITMAP);
517 * Set a new TLS for the child thread?
519 if (clone_flags & CLONE_SETTLS) {
520 #ifdef CONFIG_IA32_EMULATION
521 if (test_thread_flag(TIF_IA32))
522 err = ia32_child_tls(p, childregs);
523 else
524 #endif
525 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
526 if (err)
527 goto out;
529 err = 0;
530 out:
531 if (err && p->thread.io_bitmap_ptr) {
532 kfree(p->thread.io_bitmap_ptr);
533 p->thread.io_bitmap_max = 0;
535 return err;
539 * This special macro can be used to load a debugging register
541 #define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
543 static inline void __switch_to_xtra(struct task_struct *prev_p,
544 struct task_struct *next_p,
545 struct tss_struct *tss)
547 struct thread_struct *prev, *next;
549 prev = &prev_p->thread,
550 next = &next_p->thread;
552 if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
553 loaddebug(next, 0);
554 loaddebug(next, 1);
555 loaddebug(next, 2);
556 loaddebug(next, 3);
557 /* no 4 and 5 */
558 loaddebug(next, 6);
559 loaddebug(next, 7);
562 if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
564 * Copy the relevant range of the IO bitmap.
565 * Normally this is 128 bytes or less:
567 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
568 max(prev->io_bitmap_max, next->io_bitmap_max));
569 } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
571 * Clear any possible leftover bits:
573 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
578 * switch_to(x,y) should switch tasks from x to y.
580 * This could still be optimized:
581 * - fold all the options into a flag word and test it with a single test.
582 * - could test fs/gs bitsliced
584 * Kprobes not supported here. Set the probe on schedule instead.
586 __kprobes struct task_struct *
587 __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
589 struct thread_struct *prev = &prev_p->thread,
590 *next = &next_p->thread;
591 int cpu = smp_processor_id();
592 struct tss_struct *tss = &per_cpu(init_tss, cpu);
594 /* we're going to use this soon, after a few expensive things */
595 if (next_p->fpu_counter>5)
596 prefetch(&next->i387.fxsave);
599 * Reload esp0, LDT and the page table pointer:
601 tss->rsp0 = next->rsp0;
604 * Switch DS and ES.
605 * This won't pick up thread selector changes, but I guess that is ok.
607 asm volatile("mov %%es,%0" : "=m" (prev->es));
608 if (unlikely(next->es | prev->es))
609 loadsegment(es, next->es);
611 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
612 if (unlikely(next->ds | prev->ds))
613 loadsegment(ds, next->ds);
615 load_TLS(next, cpu);
618 * Switch FS and GS.
621 unsigned fsindex;
622 asm volatile("movl %%fs,%0" : "=r" (fsindex));
623 /* segment register != 0 always requires a reload.
624 also reload when it has changed.
625 when prev process used 64bit base always reload
626 to avoid an information leak. */
627 if (unlikely(fsindex | next->fsindex | prev->fs)) {
628 loadsegment(fs, next->fsindex);
629 /* check if the user used a selector != 0
630 * if yes clear 64bit base, since overloaded base
631 * is always mapped to the Null selector
633 if (fsindex)
634 prev->fs = 0;
636 /* when next process has a 64bit base use it */
637 if (next->fs)
638 wrmsrl(MSR_FS_BASE, next->fs);
639 prev->fsindex = fsindex;
642 unsigned gsindex;
643 asm volatile("movl %%gs,%0" : "=r" (gsindex));
644 if (unlikely(gsindex | next->gsindex | prev->gs)) {
645 load_gs_index(next->gsindex);
646 if (gsindex)
647 prev->gs = 0;
649 if (next->gs)
650 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
651 prev->gsindex = gsindex;
654 /* Must be after DS reload */
655 unlazy_fpu(prev_p);
658 * Switch the PDA and FPU contexts.
660 prev->userrsp = read_pda(oldrsp);
661 write_pda(oldrsp, next->userrsp);
662 write_pda(pcurrent, next_p);
664 write_pda(kernelstack,
665 (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
666 #ifdef CONFIG_CC_STACKPROTECTOR
667 write_pda(stack_canary, next_p->stack_canary);
669 * Build time only check to make sure the stack_canary is at
670 * offset 40 in the pda; this is a gcc ABI requirement
672 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
673 #endif
676 * Now maybe reload the debug registers and handle I/O bitmaps
678 if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
679 || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))
680 __switch_to_xtra(prev_p, next_p, tss);
682 /* If the task has used fpu the last 5 timeslices, just do a full
683 * restore of the math state immediately to avoid the trap; the
684 * chances of needing FPU soon are obviously high now
686 if (next_p->fpu_counter>5)
687 math_state_restore();
688 return prev_p;
692 * sys_execve() executes a new program.
694 asmlinkage
695 long sys_execve(char __user *name, char __user * __user *argv,
696 char __user * __user *envp, struct pt_regs regs)
698 long error;
699 char * filename;
701 filename = getname(name);
702 error = PTR_ERR(filename);
703 if (IS_ERR(filename))
704 return error;
705 error = do_execve(filename, argv, envp, &regs);
706 if (error == 0) {
707 task_lock(current);
708 current->ptrace &= ~PT_DTRACE;
709 task_unlock(current);
711 putname(filename);
712 return error;
715 void set_personality_64bit(void)
717 /* inherit personality from parent */
719 /* Make sure to be in 64bit mode */
720 clear_thread_flag(TIF_IA32);
722 /* TBD: overwrites user setup. Should have two bits.
723 But 64bit processes have always behaved this way,
724 so it's not too bad. The main problem is just that
725 32bit childs are affected again. */
726 current->personality &= ~READ_IMPLIES_EXEC;
729 asmlinkage long sys_fork(struct pt_regs *regs)
731 return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
734 asmlinkage long
735 sys_clone(unsigned long clone_flags, unsigned long newsp,
736 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
738 if (!newsp)
739 newsp = regs->rsp;
740 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
744 * This is trivial, and on the face of it looks like it
745 * could equally well be done in user mode.
747 * Not so, for quite unobvious reasons - register pressure.
748 * In user mode vfork() cannot have a stack frame, and if
749 * done by calling the "clone()" system call directly, you
750 * do not have enough call-clobbered registers to hold all
751 * the information you need.
753 asmlinkage long sys_vfork(struct pt_regs *regs)
755 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
756 NULL, NULL);
759 unsigned long get_wchan(struct task_struct *p)
761 unsigned long stack;
762 u64 fp,rip;
763 int count = 0;
765 if (!p || p == current || p->state==TASK_RUNNING)
766 return 0;
767 stack = (unsigned long)task_stack_page(p);
768 if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
769 return 0;
770 fp = *(u64 *)(p->thread.rsp);
771 do {
772 if (fp < (unsigned long)stack ||
773 fp > (unsigned long)stack+THREAD_SIZE)
774 return 0;
775 rip = *(u64 *)(fp+8);
776 if (!in_sched_functions(rip))
777 return rip;
778 fp = *(u64 *)fp;
779 } while (count++ < 16);
780 return 0;
783 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
785 int ret = 0;
786 int doit = task == current;
787 int cpu;
789 switch (code) {
790 case ARCH_SET_GS:
791 if (addr >= TASK_SIZE_OF(task))
792 return -EPERM;
793 cpu = get_cpu();
794 /* handle small bases via the GDT because that's faster to
795 switch. */
796 if (addr <= 0xffffffff) {
797 set_32bit_tls(task, GS_TLS, addr);
798 if (doit) {
799 load_TLS(&task->thread, cpu);
800 load_gs_index(GS_TLS_SEL);
802 task->thread.gsindex = GS_TLS_SEL;
803 task->thread.gs = 0;
804 } else {
805 task->thread.gsindex = 0;
806 task->thread.gs = addr;
807 if (doit) {
808 load_gs_index(0);
809 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
812 put_cpu();
813 break;
814 case ARCH_SET_FS:
815 /* Not strictly needed for fs, but do it for symmetry
816 with gs */
817 if (addr >= TASK_SIZE_OF(task))
818 return -EPERM;
819 cpu = get_cpu();
820 /* handle small bases via the GDT because that's faster to
821 switch. */
822 if (addr <= 0xffffffff) {
823 set_32bit_tls(task, FS_TLS, addr);
824 if (doit) {
825 load_TLS(&task->thread, cpu);
826 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
828 task->thread.fsindex = FS_TLS_SEL;
829 task->thread.fs = 0;
830 } else {
831 task->thread.fsindex = 0;
832 task->thread.fs = addr;
833 if (doit) {
834 /* set the selector to 0 to not confuse
835 __switch_to */
836 asm volatile("movl %0,%%fs" :: "r" (0));
837 ret = checking_wrmsrl(MSR_FS_BASE, addr);
840 put_cpu();
841 break;
842 case ARCH_GET_FS: {
843 unsigned long base;
844 if (task->thread.fsindex == FS_TLS_SEL)
845 base = read_32bit_tls(task, FS_TLS);
846 else if (doit)
847 rdmsrl(MSR_FS_BASE, base);
848 else
849 base = task->thread.fs;
850 ret = put_user(base, (unsigned long __user *)addr);
851 break;
853 case ARCH_GET_GS: {
854 unsigned long base;
855 unsigned gsindex;
856 if (task->thread.gsindex == GS_TLS_SEL)
857 base = read_32bit_tls(task, GS_TLS);
858 else if (doit) {
859 asm("movl %%gs,%0" : "=r" (gsindex));
860 if (gsindex)
861 rdmsrl(MSR_KERNEL_GS_BASE, base);
862 else
863 base = task->thread.gs;
865 else
866 base = task->thread.gs;
867 ret = put_user(base, (unsigned long __user *)addr);
868 break;
871 default:
872 ret = -EINVAL;
873 break;
876 return ret;
879 long sys_arch_prctl(int code, unsigned long addr)
881 return do_arch_prctl(current, code, addr);
885 * Capture the user space registers if the task is not running (in user space)
887 int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
889 struct pt_regs *pp, ptregs;
891 pp = task_pt_regs(tsk);
893 ptregs = *pp;
894 ptregs.cs &= 0xffff;
895 ptregs.ss &= 0xffff;
897 elf_core_copy_regs(regs, &ptregs);
899 return 1;
902 unsigned long arch_align_stack(unsigned long sp)
904 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
905 sp -= get_random_int() % 8192;
906 return sp & ~0xf;