2 * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
3 * Licensed under the GPL
6 #include "linux/sched.h"
7 #include "linux/signal.h"
8 #include "linux/kernel.h"
9 #include "linux/interrupt.h"
10 #include "linux/ptrace.h"
11 #include "asm/system.h"
12 #include "asm/pgalloc.h"
13 #include "asm/ptrace.h"
14 #include "asm/tlbflush.h"
16 #include "signal_user.h"
17 #include "kern_util.h"
18 #include "user_util.h"
21 #include "sigcontext.h"
22 #include "time_user.h"
29 void *switch_to_tt(void *prev
, void *next
, void *last
)
31 struct task_struct
*from
, *to
, *prev_sched
;
33 int err
, vtalrm
, alrm
, prof
, cpu
;
35 /* jailing and SMP are incompatible, so this doesn't need to be
43 to
->thread
.prev_sched
= from
;
45 cpu
= from
->thread_info
->cpu
;
47 forward_interrupts(to
->thread
.mode
.tt
.extern_pid
);
49 forward_ipi(cpu_data
[cpu
].ipi_pipe
[0], to
->thread
.mode
.tt
.extern_pid
);
51 local_irq_save(flags
);
53 vtalrm
= change_sig(SIGVTALRM
, 0);
54 alrm
= change_sig(SIGALRM
, 0);
55 prof
= change_sig(SIGPROF
, 0);
57 forward_pending_sigio(to
->thread
.mode
.tt
.extern_pid
);
63 err
= os_write_file(to
->thread
.mode
.tt
.switch_pipe
[1], &c
, sizeof(c
));
65 panic("write of switch_pipe failed, err = %d", -err
);
68 if((from
->state
== TASK_ZOMBIE
) || (from
->state
== TASK_DEAD
))
69 os_kill_process(os_getpid(), 0);
71 err
= os_read_file(from
->thread
.mode
.tt
.switch_pipe
[0], &c
, sizeof(c
));
73 panic("read of switch_pipe failed, errno = %d", -err
);
75 /* If the process that we have just scheduled away from has exited,
76 * then it needs to be killed here. The reason is that, even though
77 * it will kill itself when it next runs, that may be too late. Its
78 * stack will be freed, possibly before then, and if that happens,
79 * we have a use-after-free situation. So, it gets killed here
80 * in case it has not already killed itself.
82 prev_sched
= current
->thread
.prev_sched
;
83 if((prev_sched
->state
== TASK_ZOMBIE
) ||
84 (prev_sched
->state
== TASK_DEAD
))
85 os_kill_process(prev_sched
->thread
.mode
.tt
.extern_pid
, 1);
87 /* This works around a nasty race with 'jail'. If we are switching
88 * between two threads of a threaded app and the incoming process
89 * runs before the outgoing process reaches the read, and it makes
90 * it all the way out to userspace, then it will have write-protected
91 * the outgoing process stack. Then, when the outgoing process
92 * returns from the write, it will segfault because it can no longer
93 * write its own stack. So, in order to avoid that, the incoming
94 * thread sits in a loop yielding until 'reading' is set. This
95 * isn't entirely safe, since there may be a reschedule from a timer
96 * happening between setting 'reading' and sleeping in read. But,
97 * it should get a whole quantum in which to reach the read and sleep,
98 * which should be enough.
102 while(!reading
) sched_yield();
105 change_sig(SIGVTALRM
, vtalrm
);
106 change_sig(SIGALRM
, alrm
);
107 change_sig(SIGPROF
, prof
);
112 local_irq_restore(flags
);
114 return(current
->thread
.prev_sched
);
117 void release_thread_tt(struct task_struct
*task
)
119 int pid
= task
->thread
.mode
.tt
.extern_pid
;
121 if(os_getpid() != pid
)
122 os_kill_process(pid
, 0);
125 void exit_thread_tt(void)
127 os_close_file(current
->thread
.mode
.tt
.switch_pipe
[0]);
128 os_close_file(current
->thread
.mode
.tt
.switch_pipe
[1]);
131 void suspend_new_thread(int fd
)
136 os_stop_process(os_getpid());
137 err
= os_read_file(fd
, &c
, sizeof(c
));
139 panic("read failed in suspend_new_thread, err = %d", -err
);
142 void schedule_tail(task_t
*prev
);
144 static void new_thread_handler(int sig
)
146 unsigned long disable
;
150 fn
= current
->thread
.request
.u
.thread
.proc
;
151 arg
= current
->thread
.request
.u
.thread
.arg
;
153 UPT_SC(¤t
->thread
.regs
.regs
) = (void *) (&sig
+ 1);
154 disable
= (1 << (SIGVTALRM
- 1)) | (1 << (SIGALRM
- 1)) |
155 (1 << (SIGIO
- 1)) | (1 << (SIGPROF
- 1));
156 SC_SIGMASK(UPT_SC(¤t
->thread
.regs
.regs
)) &= ~disable
;
158 suspend_new_thread(current
->thread
.mode
.tt
.switch_pipe
[0]);
161 if(current
->thread
.prev_sched
!= NULL
)
162 schedule_tail(current
->thread
.prev_sched
);
163 current
->thread
.prev_sched
= NULL
;
165 init_new_thread_signals(1);
167 free_page(current
->thread
.temp_stack
);
168 set_cmdline("(kernel thread)");
170 change_sig(SIGUSR1
, 1);
171 change_sig(SIGVTALRM
, 1);
172 change_sig(SIGPROF
, 1);
174 if(!run_kernel_thread(fn
, arg
, ¤t
->thread
.exec_buf
))
177 /* XXX No set_user_mode here because a newly execed process will
178 * immediately segfault on its non-existent IP, coming straight back
179 * to the signal handler, which will call set_user_mode on its way
180 * out. This should probably change since it's confusing.
184 static int new_thread_proc(void *stack
)
186 /* local_irq_disable is needed to block out signals until this thread is
187 * properly scheduled. Otherwise, the tracing thread will get mighty
188 * upset about any signals that arrive before that.
189 * This has the complication that it sets the saved signal mask in
190 * the sigcontext to block signals. This gets restored when this
191 * thread (or a descendant, since they get a copy of this sigcontext)
192 * returns to userspace.
193 * So, this is compensated for elsewhere.
194 * XXX There is still a small window until local_irq_disable() actually
195 * finishes where signals are possible - shouldn't be a problem in
196 * practice since SIGIO hasn't been forwarded here yet, and the
197 * local_irq_disable should finish before a SIGVTALRM has time to be
202 init_new_thread_stack(stack
, new_thread_handler
);
203 os_usr1_process(os_getpid());
204 change_sig(SIGUSR1
, 1);
208 /* Signal masking - signals are blocked at the start of fork_tramp. They
209 * are re-enabled when finish_fork_handler is entered by fork_tramp hitting
210 * itself with a SIGUSR1. set_user_mode has to be run with SIGUSR1 off,
211 * so it is blocked before it's called. They are re-enabled on sigreturn
212 * despite the fact that they were blocked when the SIGUSR1 was issued because
213 * copy_thread copies the parent's sigcontext, including the signal mask
214 * onto the signal frame.
217 void finish_fork_handler(int sig
)
219 UPT_SC(¤t
->thread
.regs
.regs
) = (void *) (&sig
+ 1);
220 suspend_new_thread(current
->thread
.mode
.tt
.switch_pipe
[0]);
223 if(current
->thread
.prev_sched
!= NULL
)
224 schedule_tail(current
->thread
.prev_sched
);
225 current
->thread
.prev_sched
= NULL
;
228 change_sig(SIGVTALRM
, 1);
230 if(current
->mm
!= current
->parent
->mm
)
231 protect_memory(uml_reserved
, high_physmem
- uml_reserved
, 1,
233 task_protections((unsigned long) current_thread
);
235 free_page(current
->thread
.temp_stack
);
237 change_sig(SIGUSR1
, 0);
238 set_user_mode(current
);
241 int fork_tramp(void *stack
)
245 init_new_thread_stack(stack
, finish_fork_handler
);
247 os_usr1_process(os_getpid());
248 change_sig(SIGUSR1
, 1);
252 int copy_thread_tt(int nr
, unsigned long clone_flags
, unsigned long sp
,
253 unsigned long stack_top
, struct task_struct
* p
,
254 struct pt_regs
*regs
)
256 int (*tramp
)(void *);
260 if(current
->thread
.forking
)
263 tramp
= new_thread_proc
;
264 p
->thread
.request
.u
.thread
= current
->thread
.request
.u
.thread
;
267 err
= os_pipe(p
->thread
.mode
.tt
.switch_pipe
, 1, 1);
269 printk("copy_thread : pipe failed, err = %d\n", -err
);
273 stack
= alloc_stack(0, 0);
275 printk(KERN_ERR
"copy_thread : failed to allocate "
276 "temporary stack\n");
280 clone_flags
&= CLONE_VM
;
281 p
->thread
.temp_stack
= stack
;
282 new_pid
= start_fork_tramp(p
->thread_info
, stack
, clone_flags
, tramp
);
284 printk(KERN_ERR
"copy_thread : clone failed - errno = %d\n",
289 if(current
->thread
.forking
){
290 sc_to_sc(UPT_SC(&p
->thread
.regs
.regs
),
291 UPT_SC(¤t
->thread
.regs
.regs
));
292 SC_SET_SYSCALL_RETURN(UPT_SC(&p
->thread
.regs
.regs
), 0);
293 if(sp
!= 0) SC_SP(UPT_SC(&p
->thread
.regs
.regs
)) = sp
;
295 p
->thread
.mode
.tt
.extern_pid
= new_pid
;
297 current
->thread
.request
.op
= OP_FORK
;
298 current
->thread
.request
.u
.fork
.pid
= new_pid
;
299 os_usr1_process(os_getpid());
301 /* Enable the signal and then disable it to ensure that it is handled
302 * here, and nowhere else.
304 change_sig(SIGUSR1
, 1);
306 change_sig(SIGUSR1
, 0);
314 current
->thread
.request
.op
= OP_REBOOT
;
315 os_usr1_process(os_getpid());
316 change_sig(SIGUSR1
, 1);
321 current
->thread
.request
.op
= OP_HALT
;
322 os_usr1_process(os_getpid());
323 change_sig(SIGUSR1
, 1);
326 void kill_off_processes_tt(void)
328 struct task_struct
*p
;
333 if(p
->thread
.mode
.tt
.extern_pid
!= me
)
334 os_kill_process(p
->thread
.mode
.tt
.extern_pid
, 0);
336 if(init_task
.thread
.mode
.tt
.extern_pid
!= me
)
337 os_kill_process(init_task
.thread
.mode
.tt
.extern_pid
, 0);
340 void initial_thread_cb_tt(void (*proc
)(void *), void *arg
)
342 if(os_getpid() == tracing_pid
){
346 current
->thread
.request
.op
= OP_CB
;
347 current
->thread
.request
.u
.cb
.proc
= proc
;
348 current
->thread
.request
.u
.cb
.arg
= arg
;
349 os_usr1_process(os_getpid());
350 change_sig(SIGUSR1
, 1);
352 change_sig(SIGUSR1
, 0);
356 int do_proc_op(void *t
, int proc_id
)
358 struct task_struct
*task
;
359 struct thread_struct
*thread
;
363 thread
= &task
->thread
;
364 op
= thread
->request
.op
;
370 pid
= thread
->request
.u
.exec
.pid
;
371 do_exec(thread
->mode
.tt
.extern_pid
, pid
);
372 thread
->mode
.tt
.extern_pid
= pid
;
373 cpu_tasks
[task
->thread_info
->cpu
].pid
= pid
;
376 attach_process(thread
->request
.u
.fork
.pid
);
379 (*thread
->request
.u
.cb
.proc
)(thread
->request
.u
.cb
.arg
);
385 tracer_panic("Bad op in do_proc_op");
388 thread
->request
.op
= OP_NONE
;
392 void init_idle_tt(void)
397 /* Changed by jail_setup, which is a setup */
400 int __init
jail_setup(char *line
, int *add
)
406 printf("'jail' may not used used in a kernel with CONFIG_SMP "
411 printf("'jail' may not used used in a kernel with CONFIG_HOSTFS "
415 #ifdef CONFIG_MODULES
416 printf("'jail' may not used used in a kernel with CONFIG_MODULES "
422 /* CAP_SYS_RAWIO controls the ability to open /dev/mem and /dev/kmem.
423 * Removing it from the bounding set eliminates the ability of anything
424 * to acquire it, and thus read or write kernel memory.
426 cap_lower(cap_bset
, CAP_SYS_RAWIO
);
431 __uml_setup("jail", jail_setup
,
433 " Enables the protection of kernel memory from processes.\n\n"
436 static void mprotect_kernel_mem(int w
)
438 unsigned long start
, end
;
441 if(!jail
|| (current
== &init_task
)) return;
443 pages
= (1 << CONFIG_KERNEL_STACK_ORDER
);
445 start
= (unsigned long) current_thread
+ PAGE_SIZE
;
446 end
= (unsigned long) current_thread
+ PAGE_SIZE
* pages
;
447 protect_memory(uml_reserved
, start
- uml_reserved
, 1, w
, 1, 1);
448 protect_memory(end
, high_physmem
- end
, 1, w
, 1, 1);
450 start
= (unsigned long) UML_ROUND_DOWN(&_stext
);
451 end
= (unsigned long) UML_ROUND_UP(&_etext
);
452 protect_memory(start
, end
- start
, 1, w
, 1, 1);
454 start
= (unsigned long) UML_ROUND_DOWN(&_unprotected_end
);
455 end
= (unsigned long) UML_ROUND_UP(&_edata
);
456 protect_memory(start
, end
- start
, 1, w
, 1, 1);
458 start
= (unsigned long) UML_ROUND_DOWN(&__bss_start
);
459 end
= (unsigned long) UML_ROUND_UP(brk_start
);
460 protect_memory(start
, end
- start
, 1, w
, 1, 1);
462 mprotect_kernel_vm(w
);
465 void unprotect_kernel_mem(void)
467 mprotect_kernel_mem(1);
470 void protect_kernel_mem(void)
472 mprotect_kernel_mem(0);
475 extern void start_kernel(void);
477 static int start_kernel_proc(void *unused
)
484 cpu_tasks
[0].pid
= pid
;
485 cpu_tasks
[0].task
= current
;
487 cpu_online_map
= cpumask_of_cpu(0);
489 if(debug
) os_stop_process(pid
);
494 void set_tracing(void *task
, int tracing
)
496 ((struct task_struct
*) task
)->thread
.mode
.tt
.tracing
= tracing
;
499 int is_tracing(void *t
)
501 return (((struct task_struct
*) t
)->thread
.mode
.tt
.tracing
);
504 int set_user_mode(void *t
)
506 struct task_struct
*task
;
508 task
= t
? t
: current
;
509 if(task
->thread
.mode
.tt
.tracing
)
511 task
->thread
.request
.op
= OP_TRACE_ON
;
512 os_usr1_process(os_getpid());
516 void set_init_pid(int pid
)
520 init_task
.thread
.mode
.tt
.extern_pid
= pid
;
521 err
= os_pipe(init_task
.thread
.mode
.tt
.switch_pipe
, 1, 1);
523 panic("Can't create switch pipe for init_task, errno = %d",
527 int singlestepping_tt(void *t
)
529 struct task_struct
*task
= t
;
531 if(task
->thread
.mode
.tt
.singlestep_syscall
)
533 return(task
->ptrace
& PT_DTRACE
);
536 void clear_singlestep(void *t
)
538 struct task_struct
*task
= t
;
540 task
->ptrace
&= ~PT_DTRACE
;
543 int start_uml_tt(void)
548 pages
= (1 << CONFIG_KERNEL_STACK_ORDER
);
549 sp
= (void *) ((unsigned long) init_task
.thread_info
) +
550 pages
* PAGE_SIZE
- sizeof(unsigned long);
551 return(tracer(start_kernel_proc
, sp
));
554 int external_pid_tt(struct task_struct
*task
)
556 return(task
->thread
.mode
.tt
.extern_pid
);
559 int thread_pid_tt(struct task_struct
*task
)
561 return(task
->thread
.mode
.tt
.extern_pid
);
564 int is_valid_pid(int pid
)
566 struct task_struct
*task
;
568 read_lock(&tasklist_lock
);
569 for_each_process(task
){
570 if(task
->thread
.mode
.tt
.extern_pid
== pid
){
571 read_unlock(&tasklist_lock
);
575 read_unlock(&tasklist_lock
);
580 * Overrides for Emacs so that we follow Linus's tabbing style.
581 * Emacs will notice this stuff at the end of the file and automatically
582 * adjust the settings for this buffer only. This must remain at the end
584 * ---------------------------------------------------------------------------
586 * c-file-style: "linux"