Import 2.1.37pre7
[davej-history.git] / arch / i386 / kernel / process.c
blob33842a21fdf80f033bca511eaa575f38d88106c5
1 /*
2 * linux/arch/i386/kernel/process.c
4 * Copyright (C) 1995 Linus Torvalds
5 */
7 /*
8 * This file handles the architecture-dependent parts of process handling..
9 */
11 #define __KERNEL_SYSCALLS__
12 #include <stdarg.h>
14 #include <linux/errno.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/mm.h>
18 #include <linux/smp.h>
19 #include <linux/smp_lock.h>
20 #include <linux/stddef.h>
21 #include <linux/unistd.h>
22 #include <linux/ptrace.h>
23 #include <linux/malloc.h>
24 #include <linux/vmalloc.h>
25 #include <linux/user.h>
26 #include <linux/a.out.h>
27 #include <linux/interrupt.h>
28 #include <linux/config.h>
29 #include <linux/unistd.h>
30 #include <linux/delay.h>
31 #include <linux/smp.h>
32 #include <linux/reboot.h>
33 #include <linux/init.h>
34 #if defined(CONFIG_APM) && defined(CONFIG_APM_POWER_OFF)
35 #include <linux/apm_bios.h>
36 #endif
38 #include <asm/uaccess.h>
39 #include <asm/pgtable.h>
40 #include <asm/system.h>
41 #include <asm/io.h>
42 #include <asm/ldt.h>
44 #ifdef __SMP__
45 asmlinkage void ret_from_smpfork(void) __asm__("ret_from_smpfork");
46 #else
47 asmlinkage void ret_from_sys_call(void) __asm__("ret_from_sys_call");
48 #endif
50 #ifdef CONFIG_APM
51 extern int apm_do_idle(void);
52 extern void apm_do_busy(void);
53 #endif
55 static int hlt_counter=0;
57 #define HARD_IDLE_TIMEOUT (HZ / 3)
59 void disable_hlt(void)
61 hlt_counter++;
64 void enable_hlt(void)
66 hlt_counter--;
69 #ifndef __SMP__
71 static void hard_idle(void)
73 while (!need_resched) {
74 if (hlt_works_ok && !hlt_counter) {
75 #ifdef CONFIG_APM
76 /* If the APM BIOS is not enabled, or there
77 is an error calling the idle routine, we
78 should hlt if possible. We need to check
79 need_resched again because an interrupt
80 may have occurred in apm_do_idle(). */
81 start_bh_atomic();
82 if (!apm_do_idle() && !need_resched)
83 __asm__("hlt");
84 end_bh_atomic();
85 #else
86 __asm__("hlt");
87 #endif
89 if (need_resched)
90 break;
91 schedule();
93 #ifdef CONFIG_APM
94 apm_do_busy();
95 #endif
99 * The idle loop on a uniprocessor i386..
102 asmlinkage int sys_idle(void)
104 unsigned long start_idle = 0;
105 int ret = -EPERM;
107 lock_kernel();
108 if (current->pid != 0)
109 goto out;
110 /* endless idle loop with no priority at all */
111 current->priority = -100;
112 current->counter = -100;
113 for (;;)
116 * We are locked at this point. So we can safely call
117 * the APM bios knowing only one CPU at a time will do
118 * so.
120 if (!start_idle)
121 start_idle = jiffies;
122 if (jiffies - start_idle > HARD_IDLE_TIMEOUT)
124 hard_idle();
126 else
128 if (hlt_works_ok && !hlt_counter && !need_resched)
129 __asm__("hlt");
131 run_task_queue(&tq_scheduler);
132 if (need_resched)
133 start_idle = 0;
134 schedule();
136 ret = 0;
137 out:
138 unlock_kernel();
139 return ret;
142 #else
145 * This is being executed in task 0 'user space'.
148 int cpu_idle(void *unused)
150 current->priority = -100;
151 while(1)
153 if(cpu_data[smp_processor_id()].hlt_works_ok &&
154 !hlt_counter && !need_resched)
155 __asm("hlt");
157 * tq_scheduler currently assumes we're running in a process
158 * context (ie that we hold the kernel lock..)
160 if (tq_scheduler) {
161 lock_kernel();
162 run_task_queue(&tq_scheduler);
163 unlock_kernel();
165 /* endless idle loop with no priority at all */
166 current->counter = -100;
167 schedule();
171 asmlinkage int sys_idle(void)
173 cpu_idle(NULL);
174 return 0;
177 #endif
180 * This routine reboots the machine by asking the keyboard
181 * controller to pulse the reset-line low. We try that for a while,
182 * and if it doesn't work, we do some other stupid things.
184 static long no_idt[2] = {0, 0};
185 static int reboot_mode = 0;
186 static int reboot_thru_bios = 0;
188 __initfunc(void reboot_setup(char *str, int *ints))
190 while(1) {
191 switch (*str) {
192 case 'w': /* "warm" reboot (no memory testing etc) */
193 reboot_mode = 0x1234;
194 break;
195 case 'c': /* "cold" reboot (with memory testing etc) */
196 reboot_mode = 0x0;
197 break;
198 case 'b': /* "bios" reboot by jumping through the BIOS */
199 reboot_thru_bios = 1;
200 break;
201 case 'h': /* "hard" reboot by toggling RESET and/or crashing the CPU */
202 reboot_thru_bios = 0;
203 break;
205 if((str = strchr(str,',')) != NULL)
206 str++;
207 else
208 break;
213 /* The following code and data reboots the machine by switching to real
214 mode and jumping to the BIOS reset entry point, as if the CPU has
215 really been reset. The previous version asked the keyboard
216 controller to pulse the CPU reset line, which is more thorough, but
217 doesn't work with at least one type of 486 motherboard. It is easy
218 to stop this code working; hence the copious comments. */
220 static unsigned long long
221 real_mode_gdt_entries [3] =
223 0x0000000000000000ULL, /* Null descriptor */
224 0x00009a000000ffffULL, /* 16-bit real-mode 64k code at 0x00000000 */
225 0x000092000100ffffULL /* 16-bit real-mode 64k data at 0x00000100 */
228 static struct
230 unsigned short size __attribute__ ((packed));
231 unsigned long long * base __attribute__ ((packed));
233 real_mode_gdt = { sizeof (real_mode_gdt_entries) - 1, real_mode_gdt_entries },
234 real_mode_idt = { 0x3ff, 0 };
236 /* This is 16-bit protected mode code to disable paging and the cache,
237 switch to real mode and jump to the BIOS reset code.
239 The instruction that switches to real mode by writing to CR0 must be
240 followed immediately by a far jump instruction, which set CS to a
241 valid value for real mode, and flushes the prefetch queue to avoid
242 running instructions that have already been decoded in protected
243 mode.
245 Clears all the flags except ET, especially PG (paging), PE
246 (protected-mode enable) and TS (task switch for coprocessor state
247 save). Flushes the TLB after paging has been disabled. Sets CD and
248 NW, to disable the cache on a 486, and invalidates the cache. This
249 is more like the state of a 486 after reset. I don't know if
250 something else should be done for other chips.
252 More could be done here to set up the registers as if a CPU reset had
253 occurred; hopefully real BIOSes don't assume much. */
255 static unsigned char real_mode_switch [] =
257 0x66, 0x0f, 0x20, 0xc0, /* movl %cr0,%eax */
258 0x66, 0x83, 0xe0, 0x11, /* andl $0x00000011,%eax */
259 0x66, 0x0d, 0x00, 0x00, 0x00, 0x60, /* orl $0x60000000,%eax */
260 0x66, 0x0f, 0x22, 0xc0, /* movl %eax,%cr0 */
261 0x66, 0x0f, 0x22, 0xd8, /* movl %eax,%cr3 */
262 0x66, 0x0f, 0x20, 0xc3, /* movl %cr0,%ebx */
263 0x66, 0x81, 0xe3, 0x00, 0x00, 0x00, 0x60, /* andl $0x60000000,%ebx */
264 0x74, 0x02, /* jz f */
265 0x0f, 0x08, /* invd */
266 0x24, 0x10, /* f: andb $0x10,al */
267 0x66, 0x0f, 0x22, 0xc0, /* movl %eax,%cr0 */
268 0xea, 0x00, 0x00, 0xff, 0xff /* ljmp $0xffff,$0x0000 */
271 static inline void kb_wait(void)
273 int i;
275 for (i=0; i<0x10000; i++)
276 if ((inb_p(0x64) & 0x02) == 0)
277 break;
280 void machine_restart(char * __unused)
283 if(!reboot_thru_bios) {
284 #if 0
285 sti();
286 #endif
287 /* rebooting needs to touch the page at absolute addr 0 */
288 *((unsigned short *)__va(0x472)) = reboot_mode;
289 for (;;) {
290 int i;
291 for (i=0; i<100; i++) {
292 int j;
293 kb_wait();
294 for(j = 0; j < 100000 ; j++)
295 /* nothing */;
296 outb(0xfe,0x64); /* pulse reset low */
297 udelay(10);
299 __asm__ __volatile__("\tlidt %0": "=m" (no_idt));
303 cli ();
305 /* Write zero to CMOS register number 0x0f, which the BIOS POST
306 routine will recognize as telling it to do a proper reboot. (Well
307 that's what this book in front of me says -- it may only apply to
308 the Phoenix BIOS though, it's not clear). At the same time,
309 disable NMIs by setting the top bit in the CMOS address register,
310 as we're about to do peculiar things to the CPU. I'm not sure if
311 `outb_p' is needed instead of just `outb'. Use it to be on the
312 safe side. */
314 outb_p (0x8f, 0x70);
315 outb_p (0x00, 0x71);
317 /* Remap the kernel at virtual address zero, as well as offset zero
318 from the kernel segment. This assumes the kernel segment starts at
319 virtual address 0xc0000000. */
321 memcpy (swapper_pg_dir, swapper_pg_dir + 768,
322 sizeof (swapper_pg_dir [0]) * 256);
324 /* Make sure the first page is mapped to the start of physical memory.
325 It is normally not mapped, to trap kernel NULL pointer dereferences. */
327 pg0 [0] = 7;
330 * Use `swapper_pg_dir' as our page directory. We bother with
331 * `SET_PAGE_DIR' because although might be rebooting, but if we change
332 * the way we set root page dir in the future, then we wont break a
333 * seldom used feature ;)
336 SET_PAGE_DIR(current,swapper_pg_dir);
338 /* Write 0x1234 to absolute memory location 0x472. The BIOS reads
339 this on booting to tell it to "Bypass memory test (also warm
340 boot)". This seems like a fairly standard thing that gets set by
341 REBOOT.COM programs, and the previous reset routine did this
342 too. */
344 *((unsigned short *)0x472) = reboot_mode;
346 /* For the switch to real mode, copy some code to low memory. It has
347 to be in the first 64k because it is running in 16-bit mode, and it
348 has to have the same physical and virtual address, because it turns
349 off paging. Copy it near the end of the first page, out of the way
350 of BIOS variables. */
352 memcpy ((void *) (0x1000 - sizeof (real_mode_switch)),
353 real_mode_switch, sizeof (real_mode_switch));
355 /* Set up the IDT for real mode. */
357 __asm__ __volatile__ ("lidt %0" : : "m" (real_mode_idt));
359 /* Set up a GDT from which we can load segment descriptors for real
360 mode. The GDT is not used in real mode; it is just needed here to
361 prepare the descriptors. */
363 __asm__ __volatile__ ("lgdt %0" : : "m" (real_mode_gdt));
365 /* Load the data segment registers, and thus the descriptors ready for
366 real mode. The base address of each segment is 0x100, 16 times the
367 selector value being loaded here. This is so that the segment
368 registers don't have to be reloaded after switching to real mode:
369 the values are consistent for real mode operation already. */
371 __asm__ __volatile__ ("movw $0x0010,%%ax\n"
372 "\tmovw %%ax,%%ds\n"
373 "\tmovw %%ax,%%es\n"
374 "\tmovw %%ax,%%fs\n"
375 "\tmovw %%ax,%%gs\n"
376 "\tmovw %%ax,%%ss" : : : "eax");
378 /* Jump to the 16-bit code that we copied earlier. It disables paging
379 and the cache, switches to real mode, and jumps to the BIOS reset
380 entry point. */
382 __asm__ __volatile__ ("ljmp $0x0008,%0"
384 : "i" ((void *) (0x1000 - sizeof (real_mode_switch))));
387 void machine_halt(void)
391 void machine_power_off(void)
393 #if defined(CONFIG_APM) && defined(CONFIG_APM_POWER_OFF)
394 apm_set_power_state(APM_STATE_OFF);
395 #endif
399 void show_regs(struct pt_regs * regs)
401 printk("\n");
402 printk("EIP: %04x:[<%08lx>]",0xffff & regs->xcs,regs->eip);
403 if (regs->xcs & 3)
404 printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
405 printk(" EFLAGS: %08lx\n",regs->eflags);
406 printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
407 regs->eax,regs->ebx,regs->ecx,regs->edx);
408 printk("ESI: %08lx EDI: %08lx EBP: %08lx",
409 regs->esi, regs->edi, regs->ebp);
410 printk(" DS: %04x ES: %04x\n",
411 0xffff & regs->xds,0xffff & regs->xes);
415 * Free current thread data structures etc..
418 void exit_thread(void)
420 /* forget lazy i387 state */
421 if (last_task_used_math == current)
422 last_task_used_math = NULL;
423 /* forget local segments */
424 __asm__ __volatile__("mov %w0,%%fs ; mov %w0,%%gs ; lldt %w0"
425 : /* no outputs */
426 : "r" (0));
427 current->tss.ldt = 0;
428 if (current->ldt) {
429 void * ldt = current->ldt;
430 current->ldt = NULL;
431 vfree(ldt);
435 void flush_thread(void)
437 int i;
439 if (current->ldt) {
440 free_page((unsigned long) current->ldt);
441 current->ldt = NULL;
442 for (i=1 ; i<NR_TASKS ; i++) {
443 if (task[i] == current) {
444 set_ldt_desc(gdt+(i<<1)+
445 FIRST_LDT_ENTRY,&default_ldt, 1);
446 load_ldt(i);
451 for (i=0 ; i<8 ; i++)
452 current->debugreg[i] = 0;
455 * Forget coprocessor state..
457 #ifdef __SMP__
458 if (current->flags & PF_USEDFPU) {
459 stts();
461 #else
462 if (last_task_used_math == current) {
463 last_task_used_math = NULL;
464 stts();
466 #endif
467 current->used_math = 0;
468 current->flags &= ~PF_USEDFPU;
471 void release_thread(struct task_struct *dead_task)
475 int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,
476 struct task_struct * p, struct pt_regs * regs)
478 int i;
479 struct pt_regs * childregs;
481 p->tss.tr = _TSS(nr);
482 p->tss.ldt = _LDT(nr);
483 p->tss.es = KERNEL_DS;
484 p->tss.cs = KERNEL_CS;
485 p->tss.ss = KERNEL_DS;
486 p->tss.ds = KERNEL_DS;
487 p->tss.fs = USER_DS;
488 p->tss.gs = USER_DS;
489 p->tss.ss0 = KERNEL_DS;
490 p->tss.esp0 = 2*PAGE_SIZE + (unsigned long) p;
491 childregs = ((struct pt_regs *) (p->tss.esp0)) - 1;
492 p->tss.esp = (unsigned long) childregs;
493 #ifdef __SMP__
494 p->tss.eip = (unsigned long) ret_from_smpfork;
495 p->tss.eflags = regs->eflags & 0xffffcdff; /* iopl always 0 for a new process */
496 #else
497 p->tss.eip = (unsigned long) ret_from_sys_call;
498 p->tss.eflags = regs->eflags & 0xffffcfff; /* iopl always 0 for a new process */
499 #endif
500 p->tss.ebx = (unsigned long) p;
501 *childregs = *regs;
502 childregs->eax = 0;
503 childregs->esp = esp;
504 p->tss.back_link = 0;
505 if (p->ldt) {
506 p->ldt = (struct desc_struct*) vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
507 if (p->ldt != NULL)
508 memcpy(p->ldt, current->ldt, LDT_ENTRIES*LDT_ENTRY_SIZE);
510 set_tss_desc(gdt+(nr<<1)+FIRST_TSS_ENTRY,&(p->tss));
511 if (p->ldt)
512 set_ldt_desc(gdt+(nr<<1)+FIRST_LDT_ENTRY,p->ldt, 512);
513 else
514 set_ldt_desc(gdt+(nr<<1)+FIRST_LDT_ENTRY,&default_ldt, 1);
515 p->tss.bitmap = offsetof(struct thread_struct,io_bitmap);
516 for (i = 0; i < IO_BITMAP_SIZE+1 ; i++) /* IO bitmap is actually SIZE+1 */
517 p->tss.io_bitmap[i] = ~0;
518 if (last_task_used_math == current)
519 __asm__("clts ; fnsave %0 ; frstor %0":"=m" (p->tss.i387));
521 return 0;
525 * fill in the fpu structure for a core dump..
527 int dump_fpu (struct pt_regs * regs, struct user_i387_struct* fpu)
529 int fpvalid;
531 /* Flag indicating the math stuff is valid. We don't support this for the
532 soft-float routines yet */
533 if (hard_math) {
534 if ((fpvalid = current->used_math) != 0) {
535 if (last_task_used_math == current)
536 __asm__("clts ; fnsave %0": :"m" (*fpu));
537 else
538 memcpy(fpu,&current->tss.i387.hard,sizeof(*fpu));
540 } else {
541 /* we should dump the emulator state here, but we need to
542 convert it into standard 387 format first.. */
543 fpvalid = 0;
546 return fpvalid;
550 * fill in the user structure for a core dump..
552 void dump_thread(struct pt_regs * regs, struct user * dump)
554 int i;
556 /* changed the size calculations - should hopefully work better. lbt */
557 dump->magic = CMAGIC;
558 dump->start_code = 0;
559 dump->start_stack = regs->esp & ~(PAGE_SIZE - 1);
560 dump->u_tsize = ((unsigned long) current->mm->end_code) >> PAGE_SHIFT;
561 dump->u_dsize = ((unsigned long) (current->mm->brk + (PAGE_SIZE-1))) >> PAGE_SHIFT;
562 dump->u_dsize -= dump->u_tsize;
563 dump->u_ssize = 0;
564 for (i = 0; i < 8; i++)
565 dump->u_debugreg[i] = current->debugreg[i];
567 if (dump->start_stack < TASK_SIZE)
568 dump->u_ssize = ((unsigned long) (TASK_SIZE - dump->start_stack)) >> PAGE_SHIFT;
570 dump->regs.ebx = regs->ebx;
571 dump->regs.ecx = regs->ecx;
572 dump->regs.edx = regs->edx;
573 dump->regs.esi = regs->esi;
574 dump->regs.edi = regs->edi;
575 dump->regs.ebp = regs->ebp;
576 dump->regs.eax = regs->eax;
577 dump->regs.ds = regs->xds;
578 dump->regs.es = regs->xes;
579 __asm__("mov %%fs,%0":"=r" (dump->regs.fs));
580 __asm__("mov %%gs,%0":"=r" (dump->regs.gs));
581 dump->regs.orig_eax = regs->orig_eax;
582 dump->regs.eip = regs->eip;
583 dump->regs.cs = regs->xcs;
584 dump->regs.eflags = regs->eflags;
585 dump->regs.esp = regs->esp;
586 dump->regs.ss = regs->xss;
588 dump->u_fpvalid = dump_fpu (regs, &dump->i387);
591 asmlinkage int sys_fork(struct pt_regs regs)
593 int ret;
595 lock_kernel();
596 ret = do_fork(SIGCHLD, regs.esp, &regs);
597 unlock_kernel();
598 return ret;
601 asmlinkage int sys_clone(struct pt_regs regs)
603 unsigned long clone_flags;
604 unsigned long newsp;
605 int ret;
607 lock_kernel();
608 clone_flags = regs.ebx;
609 newsp = regs.ecx;
610 if (!newsp)
611 newsp = regs.esp;
612 ret = do_fork(clone_flags, newsp, &regs);
613 unlock_kernel();
614 return ret;
618 * sys_execve() executes a new program.
620 asmlinkage int sys_execve(struct pt_regs regs)
622 int error;
623 char * filename;
625 lock_kernel();
626 error = getname((char *) regs.ebx, &filename);
627 if (error)
628 goto out;
629 error = do_execve(filename, (char **) regs.ecx, (char **) regs.edx, &regs);
630 putname(filename);
631 out:
632 unlock_kernel();
633 return error;