2 * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
6 #include "linux/config.h"
7 #include "linux/kernel.h"
8 #include "linux/sched.h"
9 #include "linux/interrupt.h"
11 #include "linux/slab.h"
12 #include "linux/utsname.h"
14 #include "linux/utime.h"
15 #include "linux/smp_lock.h"
16 #include "linux/module.h"
17 #include "linux/init.h"
18 #include "linux/capability.h"
19 #include "asm/unistd.h"
21 #include "asm/segment.h"
23 #include "asm/pgtable.h"
24 #include "asm/processor.h"
25 #include "asm/tlbflush.h"
26 #include "asm/spinlock.h"
27 #include "asm/uaccess.h"
29 #include "user_util.h"
30 #include "kern_util.h"
32 #include "signal_kern.h"
33 #include "signal_user.h"
37 #include "time_user.h"
39 #include "frame_kern.h"
40 #include "sigcontext.h"
41 #include "2_5compat.h"
44 /* This is a per-cpu array. A processor only modifies its entry and it only
45 * cares about its entry, so it's OK if another processor is modifying its
48 struct cpu_task cpu_tasks
[NR_CPUS
] = { [0 ... NR_CPUS
- 1] = { -1, NULL
} };
50 struct task_struct
*get_task(int pid
, int require
)
52 struct task_struct
*task
, *ret
;
55 read_lock(&tasklist_lock
);
56 for_each_process(task
){
62 read_unlock(&tasklist_lock
);
63 if(require
&& (ret
== NULL
)) panic("get_task couldn't find a task\n");
67 int is_valid_pid(int pid
)
69 struct task_struct
*task
;
71 read_lock(&tasklist_lock
);
72 for_each_process(task
){
73 if(task
->thread
.extern_pid
== pid
){
74 read_unlock(&tasklist_lock
);
78 read_unlock(&tasklist_lock
);
82 int external_pid(void *t
)
84 struct task_struct
*task
= t
? t
: current
;
86 return(task
->thread
.extern_pid
);
89 int pid_to_processor_id(int pid
)
93 for(i
= 0; i
< ncpus
; i
++){
94 if(cpu_tasks
[i
].pid
== pid
) return(i
);
99 void free_stack(unsigned long stack
, int order
)
101 free_pages(stack
, order
);
104 void set_init_pid(int pid
)
108 init_task
.thread
.extern_pid
= pid
;
109 err
= os_pipe(init_task
.thread
.switch_pipe
, 1, 1);
110 if(err
) panic("Can't create switch pipe for init_task, errno = %d",
114 int set_user_mode(void *t
)
116 struct task_struct
*task
;
118 task
= t
? t
: current
;
119 if(task
->thread
.tracing
) return(1);
120 task
->thread
.request
.op
= OP_TRACE_ON
;
121 os_usr1_process(os_getpid());
125 void set_tracing(void *task
, int tracing
)
127 ((struct task_struct
*) task
)->thread
.tracing
= tracing
;
130 int is_tracing(void *t
)
132 return (((struct task_struct
*) t
)->thread
.tracing
);
135 unsigned long alloc_stack(int order
, int atomic
)
138 int flags
= GFP_KERNEL
;
140 if(atomic
) flags
|= GFP_ATOMIC
;
141 if((page
= __get_free_pages(flags
, order
)) == 0)
143 stack_protections(page
);
147 extern void schedule_tail(struct task_struct
*prev
);
149 static void new_thread_handler(int sig
)
154 fn
= current
->thread
.request
.u
.thread
.proc
;
155 arg
= current
->thread
.request
.u
.thread
.arg
;
156 current
->thread
.regs
.regs
.sc
= (void *) (&sig
+ 1);
157 suspend_new_thread(current
->thread
.switch_pipe
[0]);
164 free_page(current
->thread
.temp_stack
);
165 set_cmdline("(kernel thread)");
168 current
->thread
.prev_sched
= NULL
;
169 change_sig(SIGUSR1
, 1);
170 change_sig(SIGVTALRM
, 1);
171 change_sig(SIGPROF
, 1);
173 if(!run_kernel_thread(fn
, arg
, ¤t
->thread
.jmp
))
177 static int new_thread_proc(void *stack
)
179 change_sig(SIGIO
, 0);
180 change_sig(SIGVTALRM
, 0);
181 change_sig(SIGPROF
, 0);
182 init_new_thread(stack
, new_thread_handler
);
183 os_usr1_process(os_getpid());
187 int kernel_thread(int (*fn
)(void *), void * arg
, unsigned long flags
)
189 struct task_struct
*p
;
191 current
->thread
.request
.u
.thread
.proc
= fn
;
192 current
->thread
.request
.u
.thread
.arg
= arg
;
193 p
= do_fork(CLONE_VM
| flags
, 0, NULL
, 0, NULL
);
194 if(IS_ERR(p
)) panic("do_fork failed in kernel_thread");
198 void switch_mm(struct mm_struct
*prev
, struct mm_struct
*next
,
199 struct task_struct
*tsk
, unsigned cpu
)
202 clear_bit(cpu
, &prev
->cpu_vm_mask
);
203 set_bit(cpu
, &next
->cpu_vm_mask
);
206 void set_current(void *t
)
208 struct task_struct
*task
= t
;
210 cpu_tasks
[task
->thread_info
->cpu
] = ((struct cpu_task
)
211 { task
->thread
.extern_pid
, task
});
214 void *switch_to(void *prev
, void *next
, void *last
)
216 struct task_struct
*from
, *to
;
218 int vtalrm
, alrm
, prof
, err
, cpu
;
220 /* jailing and SMP are incompatible, so this doesn't need to be
228 to
->thread
.prev_sched
= from
;
230 cpu
= from
->thread_info
->cpu
;
232 forward_interrupts(to
->thread
.extern_pid
);
234 forward_ipi(cpu_data
[cpu
].ipi_pipe
[0], to
->thread
.extern_pid
);
236 local_irq_save(flags
);
238 vtalrm
= change_sig(SIGVTALRM
, 0);
239 alrm
= change_sig(SIGALRM
, 0);
240 prof
= change_sig(SIGPROF
, 0);
242 forward_pending_sigio(to
->thread
.extern_pid
);
248 err
= os_write_file(to
->thread
.switch_pipe
[1], &c
, sizeof(c
));
250 panic("write of switch_pipe failed, errno = %d", -err
);
253 if((from
->state
== TASK_ZOMBIE
) || (from
->state
== TASK_DEAD
))
254 os_kill_process(os_getpid());
256 err
= os_read_file(from
->thread
.switch_pipe
[0], &c
, sizeof(c
));
258 panic("read of switch_pipe failed, errno = %d", -err
);
260 /* This works around a nasty race with 'jail'. If we are switching
261 * between two threads of a threaded app and the incoming process
262 * runs before the outgoing process reaches the read, and it makes
263 * it all the way out to userspace, then it will have write-protected
264 * the outgoing process stack. Then, when the outgoing process
265 * returns from the write, it will segfault because it can no longer
266 * write its own stack. So, in order to avoid that, the incoming
267 * thread sits in a loop yielding until 'reading' is set. This
268 * isn't entirely safe, since there may be a reschedule from a timer
269 * happening between setting 'reading' and sleeping in read. But,
270 * it should get a whole quantum in which to reach the read and sleep,
271 * which should be enough.
275 while(!reading
) sched_yield();
278 change_sig(SIGVTALRM
, vtalrm
);
279 change_sig(SIGALRM
, alrm
);
280 change_sig(SIGPROF
, prof
);
285 local_irq_restore(flags
);
287 return(current
->thread
.prev_sched
);
290 void interrupt_end(void)
292 if(need_resched()) schedule();
293 if(test_tsk_thread_flag(current
, TIF_SIGPENDING
)) do_signal(0);
296 void release_thread(struct task_struct
*task
)
298 os_kill_process(task
->thread
.extern_pid
);
301 void exit_thread(void)
303 close(current
->thread
.switch_pipe
[0]);
304 close(current
->thread
.switch_pipe
[1]);
305 unprotect_stack((unsigned long) current
->thread_info
);
308 /* Signal masking - signals are blocked at the start of fork_tramp. They
309 * are re-enabled when finish_fork_handler is entered by fork_tramp hitting
310 * itself with a SIGUSR1. set_user_mode has to be run with SIGUSR1 off,
311 * so it is blocked before it's called. They are re-enabled on sigreturn
312 * despite the fact that they were blocked when the SIGUSR1 was issued because
313 * copy_thread copies the parent's signcontext, including the signal mask
314 * onto the signal frame.
317 void finish_fork_handler(int sig
)
319 current
->thread
.regs
.regs
.sc
= (void *) (&sig
+ 1);
320 suspend_new_thread(current
->thread
.switch_pipe
[0]);
326 change_sig(SIGVTALRM
, 1);
328 if(current
->mm
!= current
->parent
->mm
)
329 protect(uml_reserved
, high_physmem
- uml_reserved
, 1, 1, 0, 1);
330 task_protections((unsigned long) current
->thread_info
);
332 current
->thread
.prev_sched
= NULL
;
334 free_page(current
->thread
.temp_stack
);
335 change_sig(SIGUSR1
, 0);
336 set_user_mode(current
);
339 void *get_current(void)
344 /* This sigusr1 business works around a bug in gcc's -pg support.
345 * Normally a procedure's mcount call comes after esp has been copied to
346 * ebp and the new frame is constructed. With procedures with no locals,
347 * the mcount comes before, as the first thing that the procedure does.
348 * When that procedure is main for a thread, ebp comes in as NULL. So,
349 * when mcount dereferences it, it segfaults. So, UML works around this
350 * by adding a non-optimizable local to the various trampolines, fork_tramp
351 * and outer_tramp below, and exec_tramp.
354 static int sigusr1
= SIGUSR1
;
356 int fork_tramp(void *stack
)
360 change_sig(SIGIO
, 0);
361 change_sig(SIGVTALRM
, 0);
362 change_sig(SIGPROF
, 0);
363 init_new_thread(stack
, finish_fork_handler
);
365 kill(os_getpid(), sig
);
369 int copy_thread(int nr
, unsigned long clone_flags
, unsigned long sp
,
370 unsigned long stack_top
, struct task_struct
* p
,
371 struct pt_regs
*regs
)
375 int (*tramp
)(void *);
377 p
->thread
= (struct thread_struct
) INIT_THREAD
;
378 p
->thread
.kernel_stack
=
379 (unsigned long) p
->thread_info
+ 2 * PAGE_SIZE
;
381 if(current
->thread
.forking
)
384 tramp
= new_thread_proc
;
385 p
->thread
.request
.u
.thread
= current
->thread
.request
.u
.thread
;
388 err
= os_pipe(p
->thread
.switch_pipe
, 1, 1);
390 printk("copy_thread : pipe failed, errno = %d\n", -err
);
394 stack
= alloc_stack(0, 0);
396 printk(KERN_ERR
"copy_thread : failed to allocate "
397 "temporary stack\n");
401 clone_flags
&= CLONE_VM
;
402 p
->thread
.temp_stack
= stack
;
403 new_pid
= start_fork_tramp((void *) p
->thread
.kernel_stack
, stack
,
406 printk(KERN_ERR
"copy_thread : clone failed - errno = %d\n",
411 if(current
->thread
.forking
){
412 sc_to_sc(p
->thread
.regs
.regs
.sc
, current
->thread
.regs
.regs
.sc
);
413 PT_REGS_SET_SYSCALL_RETURN(&p
->thread
.regs
, 0);
414 if(sp
!= 0) PT_REGS_SP(&p
->thread
.regs
) = sp
;
416 p
->thread
.extern_pid
= new_pid
;
418 current
->thread
.request
.op
= OP_FORK
;
419 current
->thread
.request
.u
.fork
.pid
= new_pid
;
420 os_usr1_process(os_getpid());
424 void tracing_reboot(void)
426 current
->thread
.request
.op
= OP_REBOOT
;
427 os_usr1_process(os_getpid());
430 void tracing_halt(void)
432 current
->thread
.request
.op
= OP_HALT
;
433 os_usr1_process(os_getpid());
436 void tracing_cb(void (*proc
)(void *), void *arg
)
438 if(os_getpid() == tracing_pid
){
442 current
->thread
.request
.op
= OP_CB
;
443 current
->thread
.request
.u
.cb
.proc
= proc
;
444 current
->thread
.request
.u
.cb
.arg
= arg
;
445 os_usr1_process(os_getpid());
449 int do_proc_op(void *t
, int proc_id
)
451 struct task_struct
*task
;
452 struct thread_struct
*thread
;
456 thread
= &task
->thread
;
457 op
= thread
->request
.op
;
463 pid
= thread
->request
.u
.exec
.pid
;
464 do_exec(thread
->extern_pid
, pid
);
465 thread
->extern_pid
= pid
;
466 cpu_tasks
[task
->thread_info
->cpu
].pid
= pid
;
469 attach_process(thread
->request
.u
.fork
.pid
);
472 (*thread
->request
.u
.cb
.proc
)(thread
->request
.u
.cb
.arg
);
478 tracer_panic("Bad op in do_proc_op");
481 thread
->request
.op
= OP_NONE
;
485 unsigned long stack_sp(unsigned long page
)
487 return(page
+ PAGE_SIZE
- sizeof(void *));
490 int current_pid(void)
492 return(current
->pid
);
495 void default_idle(void)
499 atomic_inc(&init_mm
.mm_count
);
500 current
->mm
= &init_mm
;
501 current
->active_mm
= &init_mm
;
504 /* endless idle loop with no priority at all */
508 * although we are an idle CPU, we do not want to
509 * get into the scheduler unnecessarily.
511 irq_stat
[smp_processor_id()].idle_timestamp
= jiffies
;
534 unsigned long um_virt_to_phys(void *t
, unsigned long addr
)
536 struct task_struct
*task
;
542 if(task
->mm
== NULL
) return(0xffffffff);
543 pgd
= pgd_offset(task
->mm
, addr
);
544 pmd
= pmd_offset(pgd
, addr
);
545 if(!pmd_present(*pmd
)) return(0xffffffff);
546 pte
= pte_offset_kernel(pmd
, addr
);
547 if(!pte_present(*pte
)) return(0xffffffff);
548 return((pte_val(*pte
) & PAGE_MASK
) + (addr
& ~PAGE_MASK
));
551 char *current_cmd(void)
553 #if defined(CONFIG_SMP) || defined(CONFIG_HIGHMEM)
556 unsigned long addr
= um_virt_to_phys(current
, current
->mm
->arg_start
);
557 return addr
== 0xffffffff? "(Unknown)": __va(addr
);
561 void force_sigbus(void)
563 printk(KERN_ERR
"Killing pid %d because of a lack of memory\n",
566 sigaddset(¤t
->pending
.signal
, SIGBUS
);
568 current
->flags
|= PF_SIGNALED
;
569 do_exit(SIGBUS
| 0x80);
572 void dump_thread(struct pt_regs
*regs
, struct user
*u
)
576 void enable_hlt(void)
581 void disable_hlt(void)
583 panic("disable_hlt");
586 extern int signal_frame_size
;
588 void *um_kmalloc(int size
)
590 return(kmalloc(size
, GFP_KERNEL
));
593 void *um_kmalloc_atomic(int size
)
595 return(kmalloc(size
, GFP_ATOMIC
));
598 unsigned long get_fault_addr(void)
600 return((unsigned long) current
->thread
.fault_addr
);
603 EXPORT_SYMBOL(get_fault_addr
);
605 void clear_singlestep(void *t
)
607 struct task_struct
*task
= (struct task_struct
*) t
;
609 task
->ptrace
&= ~PT_DTRACE
;
612 int singlestepping(void *t
)
614 struct task_struct
*task
= (struct task_struct
*) t
;
616 if(task
->thread
.singlestep_syscall
)
618 return(task
->ptrace
& PT_DTRACE
);
621 void not_implemented(void)
623 printk(KERN_DEBUG
"Something isn't implemented in here\n");
626 EXPORT_SYMBOL(not_implemented
);
628 int user_context(unsigned long sp
)
630 return((sp
& (PAGE_MASK
<< 1)) != current
->thread
.kernel_stack
);
633 extern void remove_umid_dir(void);
634 __uml_exitcall(remove_umid_dir
);
636 extern exitcall_t __uml_exitcall_begin
, __uml_exitcall_end
;
638 void do_uml_exitcalls(void)
642 call
= &__uml_exitcall_end
;
643 while (--call
>= &__uml_exitcall_begin
)
647 void *round_up(unsigned long addr
)
649 return(ROUND_UP(addr
));
652 void *round_down(unsigned long addr
)
654 return(ROUND_DOWN(addr
));
657 char *uml_strdup(char *string
)
661 new = kmalloc(strlen(string
) + 1, GFP_KERNEL
);
662 if(new == NULL
) return(NULL
);
667 /* Changed by jail_setup, which is a setup */
670 int __init
jail_setup(char *line
, int *add
)
676 printf("'jail' may not used used in a kernel with CONFIG_SMP "
681 printf("'jail' may not used used in a kernel with CONFIG_HOSTFS "
685 #ifdef CONFIG_MODULES
686 printf("'jail' may not used used in a kernel with CONFIG_MODULES "
692 /* CAP_SYS_RAWIO controls the ability to open /dev/mem and /dev/kmem.
693 * Removing it from the bounding set eliminates the ability of anything
694 * to acquire it, and thus read or write kernel memory.
696 cap_lower(cap_bset
, CAP_SYS_RAWIO
);
701 __uml_setup("jail", jail_setup
,
703 " Enables the protection of kernel memory from processes.\n\n"
706 static void mprotect_kernel_mem(int w
)
708 unsigned long start
, end
;
710 if(!jail
|| (current
== &init_task
)) return;
712 start
= (unsigned long) current
->thread_info
+ PAGE_SIZE
;
713 end
= (unsigned long) current
->thread_info
+ PAGE_SIZE
* 4;
714 protect(uml_reserved
, start
- uml_reserved
, 1, w
, 1, 1);
715 protect(end
, high_physmem
- end
, 1, w
, 1, 1);
717 start
= (unsigned long) ROUND_DOWN(&_stext
);
718 end
= (unsigned long) ROUND_UP(&_etext
);
719 protect(start
, end
- start
, 1, w
, 1, 1);
721 start
= (unsigned long) ROUND_DOWN(&_unprotected_end
);
722 end
= (unsigned long) ROUND_UP(&_edata
);
723 protect(start
, end
- start
, 1, w
, 1, 1);
725 start
= (unsigned long) ROUND_DOWN(&__bss_start
);
726 end
= (unsigned long) ROUND_UP(brk_start
);
727 protect(start
, end
- start
, 1, w
, 1, 1);
729 mprotect_kernel_vm(w
);
732 /* No SMP problems since jailing and SMP are incompatible */
733 void unprotect_kernel_mem(void)
735 mprotect_kernel_mem(1);
738 void protect_kernel_mem(void)
740 mprotect_kernel_mem(0);
743 void *get_init_task(void)
745 return(&init_thread_union
.thread_info
.task
);
748 int copy_to_user_proc(void *to
, void *from
, int size
)
750 return(copy_to_user(to
, from
, size
));
753 int copy_from_user_proc(void *to
, void *from
, int size
)
755 return(copy_from_user(to
, from
, size
));
758 int clear_user_proc(void *buf
, int size
)
760 return(clear_user(buf
, size
));
763 void set_thread_sc(void *sc
)
765 current
->thread
.regs
.regs
.sc
= sc
;
768 int smp_sigio_handler(void)
771 int cpu
= current
->thread_info
->cpu
;
779 int um_in_interrupt(void)
781 return(in_interrupt());
786 return(current
->thread_info
->cpu
);
790 * Overrides for Emacs so that we follow Linus's tabbing style.
791 * Emacs will notice this stuff at the end of the file and automatically
792 * adjust the settings for this buffer only. This must remain at the end
794 * ---------------------------------------------------------------------------
796 * c-file-style: "linux"