- Andries Brouwer: final isofs pieces.
[davej-history.git] / kernel / exit.c
blobc60bafe8cdd6fed16a7a661c849b8b203cad084d
1 /*
2 * linux/kernel/exit.c
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
7 #include <linux/config.h>
8 #include <linux/malloc.h>
9 #include <linux/interrupt.h>
10 #include <linux/smp_lock.h>
11 #include <linux/module.h>
12 #ifdef CONFIG_BSD_PROCESS_ACCT
13 #include <linux/acct.h>
14 #endif
16 #include <asm/uaccess.h>
17 #include <asm/pgtable.h>
18 #include <asm/mmu_context.h>
20 extern void sem_exit (void);
21 extern struct task_struct *child_reaper;
23 int getrusage(struct task_struct *, int, struct rusage *);
25 static void release_task(struct task_struct * p)
27 if (p != current) {
28 #ifdef CONFIG_SMP
30 * Wait to make sure the process isn't on the
31 * runqueue (active on some other CPU still)
33 for (;;) {
34 task_lock(p);
35 if (!p->has_cpu)
36 break;
37 task_unlock(p);
38 do {
39 barrier();
40 } while (p->has_cpu);
42 task_unlock(p);
43 #endif
44 atomic_dec(&p->user->processes);
45 free_uid(p->user);
46 unhash_process(p);
48 release_thread(p);
49 current->cmin_flt += p->min_flt + p->cmin_flt;
50 current->cmaj_flt += p->maj_flt + p->cmaj_flt;
51 current->cnswap += p->nswap + p->cnswap;
53 * Potentially available timeslices are retrieved
54 * here - this way the parent does not get penalized
55 * for creating too many processes.
57 * (this cannot be used to artificially 'generate'
58 * timeslices, because any timeslice recovered here
59 * was given away by the parent in the first place.)
61 current->counter += p->counter;
62 if (current->counter >= MAX_COUNTER)
63 current->counter = MAX_COUNTER;
64 free_task_struct(p);
65 } else {
66 printk("task releasing itself\n");
71 * This checks not only the pgrp, but falls back on the pid if no
72 * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
73 * without this...
75 int session_of_pgrp(int pgrp)
77 struct task_struct *p;
78 int fallback;
80 fallback = -1;
81 read_lock(&tasklist_lock);
82 for_each_task(p) {
83 if (p->session <= 0)
84 continue;
85 if (p->pgrp == pgrp) {
86 fallback = p->session;
87 break;
89 if (p->pid == pgrp)
90 fallback = p->session;
92 read_unlock(&tasklist_lock);
93 return fallback;
97 * Determine if a process group is "orphaned", according to the POSIX
98 * definition in 2.2.2.52. Orphaned process groups are not to be affected
99 * by terminal-generated stop signals. Newly orphaned process groups are
100 * to receive a SIGHUP and a SIGCONT.
102 * "I ask you, have you ever known what it is to be an orphan?"
104 static int will_become_orphaned_pgrp(int pgrp, struct task_struct * ignored_task)
106 struct task_struct *p;
108 read_lock(&tasklist_lock);
109 for_each_task(p) {
110 if ((p == ignored_task) || (p->pgrp != pgrp) ||
111 (p->state == TASK_ZOMBIE) ||
112 (p->p_pptr->pid == 1))
113 continue;
114 if ((p->p_pptr->pgrp != pgrp) &&
115 (p->p_pptr->session == p->session)) {
116 read_unlock(&tasklist_lock);
117 return 0;
120 read_unlock(&tasklist_lock);
121 return 1; /* (sighing) "Often!" */
124 int is_orphaned_pgrp(int pgrp)
126 return will_become_orphaned_pgrp(pgrp, 0);
129 static inline int has_stopped_jobs(int pgrp)
131 int retval = 0;
132 struct task_struct * p;
134 read_lock(&tasklist_lock);
135 for_each_task(p) {
136 if (p->pgrp != pgrp)
137 continue;
138 if (p->state != TASK_STOPPED)
139 continue;
140 retval = 1;
141 break;
143 read_unlock(&tasklist_lock);
144 return retval;
148 * When we die, we re-parent all our children.
149 * Try to give them to another thread in our process
150 * group, and if no such member exists, give it to
151 * the global child reaper process (ie "init")
153 static inline void forget_original_parent(struct task_struct * father)
155 struct task_struct * p, *reaper;
157 read_lock(&tasklist_lock);
159 /* Next in our thread group */
160 reaper = next_thread(father);
161 if (reaper == father)
162 reaper = child_reaper;
164 for_each_task(p) {
165 if (p->p_opptr == father) {
166 /* We dont want people slaying init */
167 p->exit_signal = SIGCHLD;
168 p->self_exec_id++;
169 p->p_opptr = reaper;
170 if (p->pdeath_signal) send_sig(p->pdeath_signal, p, 0);
173 read_unlock(&tasklist_lock);
176 static inline void close_files(struct files_struct * files)
178 int i, j;
180 j = 0;
181 for (;;) {
182 unsigned long set;
183 i = j * __NFDBITS;
184 if (i >= files->max_fdset || i >= files->max_fds)
185 break;
186 set = files->open_fds->fds_bits[j++];
187 while (set) {
188 if (set & 1) {
189 struct file * file = xchg(&files->fd[i], NULL);
190 if (file)
191 filp_close(file, files);
193 i++;
194 set >>= 1;
199 void put_files_struct(struct files_struct *files)
201 if (atomic_dec_and_test(&files->count)) {
202 close_files(files);
204 * Free the fd and fdset arrays if we expanded them.
206 if (files->fd != &files->fd_array[0])
207 free_fd_array(files->fd, files->max_fds);
208 if (files->max_fdset > __FD_SETSIZE) {
209 free_fdset(files->open_fds, files->max_fdset);
210 free_fdset(files->close_on_exec, files->max_fdset);
212 kmem_cache_free(files_cachep, files);
216 static inline void __exit_files(struct task_struct *tsk)
218 struct files_struct * files = tsk->files;
220 if (files) {
221 task_lock(tsk);
222 tsk->files = NULL;
223 task_unlock(tsk);
224 put_files_struct(files);
228 void exit_files(struct task_struct *tsk)
230 __exit_files(tsk);
233 static inline void __put_fs_struct(struct fs_struct *fs)
235 /* No need to hold fs->lock if we are killing it */
236 if (atomic_dec_and_test(&fs->count)) {
237 dput(fs->root);
238 mntput(fs->rootmnt);
239 dput(fs->pwd);
240 mntput(fs->pwdmnt);
241 if (fs->altroot) {
242 dput(fs->altroot);
243 mntput(fs->altrootmnt);
245 kmem_cache_free(fs_cachep, fs);
249 void put_fs_struct(struct fs_struct *fs)
251 __put_fs_struct(fs);
254 static inline void __exit_fs(struct task_struct *tsk)
256 struct fs_struct * fs = tsk->fs;
258 if (fs) {
259 task_lock(tsk);
260 tsk->fs = NULL;
261 task_unlock(tsk);
262 __put_fs_struct(fs);
266 void exit_fs(struct task_struct *tsk)
268 __exit_fs(tsk);
272 * We can use these to temporarily drop into
273 * "lazy TLB" mode and back.
275 struct mm_struct * start_lazy_tlb(void)
277 struct mm_struct *mm = current->mm;
278 current->mm = NULL;
279 /* active_mm is still 'mm' */
280 atomic_inc(&mm->mm_count);
281 enter_lazy_tlb(mm, current, smp_processor_id());
282 return mm;
285 void end_lazy_tlb(struct mm_struct *mm)
287 struct mm_struct *active_mm = current->active_mm;
289 current->mm = mm;
290 if (mm != active_mm) {
291 current->active_mm = mm;
292 activate_mm(active_mm, mm);
294 mmdrop(active_mm);
298 * Turn us into a lazy TLB process if we
299 * aren't already..
301 static inline void __exit_mm(struct task_struct * tsk)
303 struct mm_struct * mm = tsk->mm;
305 if (mm) {
306 atomic_inc(&mm->mm_count);
307 mm_release();
308 if (mm != tsk->active_mm) BUG();
309 /* more a memory barrier than a real lock */
310 task_lock(tsk);
311 tsk->mm = NULL;
312 task_unlock(tsk);
313 enter_lazy_tlb(mm, current, smp_processor_id());
314 mmput(mm);
318 void exit_mm(struct task_struct *tsk)
320 __exit_mm(tsk);
324 * Send signals to all our closest relatives so that they know
325 * to properly mourn us..
327 static void exit_notify(void)
329 struct task_struct * p, *t;
331 forget_original_parent(current);
333 * Check to see if any process groups have become orphaned
334 * as a result of our exiting, and if they have any stopped
335 * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
337 * Case i: Our father is in a different pgrp than we are
338 * and we were the only connection outside, so our pgrp
339 * is about to become orphaned.
342 t = current->p_pptr;
344 if ((t->pgrp != current->pgrp) &&
345 (t->session == current->session) &&
346 will_become_orphaned_pgrp(current->pgrp, current) &&
347 has_stopped_jobs(current->pgrp)) {
348 kill_pg(current->pgrp,SIGHUP,1);
349 kill_pg(current->pgrp,SIGCONT,1);
352 /* Let father know we died
354 * Thread signals are configurable, but you aren't going to use
355 * that to send signals to arbitary processes.
356 * That stops right now.
358 * If the parent exec id doesn't match the exec id we saved
359 * when we started then we know the parent has changed security
360 * domain.
362 * If our self_exec id doesn't match our parent_exec_id then
363 * we have changed execution domain as these two values started
364 * the same after a fork.
368 if(current->exit_signal != SIGCHLD &&
369 ( current->parent_exec_id != t->self_exec_id ||
370 current->self_exec_id != current->parent_exec_id)
371 && !capable(CAP_KILL))
372 current->exit_signal = SIGCHLD;
376 * This loop does two things:
378 * A. Make init inherit all the child processes
379 * B. Check to see if any process groups have become orphaned
380 * as a result of our exiting, and if they have any stopped
381 * jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
384 write_lock_irq(&tasklist_lock);
385 do_notify_parent(current, current->exit_signal);
386 while (current->p_cptr != NULL) {
387 p = current->p_cptr;
388 current->p_cptr = p->p_osptr;
389 p->p_ysptr = NULL;
390 p->ptrace = 0;
392 p->p_pptr = p->p_opptr;
393 p->p_osptr = p->p_pptr->p_cptr;
394 if (p->p_osptr)
395 p->p_osptr->p_ysptr = p;
396 p->p_pptr->p_cptr = p;
397 if (p->state == TASK_ZOMBIE)
398 do_notify_parent(p, p->exit_signal);
400 * process group orphan check
401 * Case ii: Our child is in a different pgrp
402 * than we are, and it was the only connection
403 * outside, so the child pgrp is now orphaned.
405 if ((p->pgrp != current->pgrp) &&
406 (p->session == current->session)) {
407 int pgrp = p->pgrp;
409 write_unlock_irq(&tasklist_lock);
410 if (is_orphaned_pgrp(pgrp) && has_stopped_jobs(pgrp)) {
411 kill_pg(pgrp,SIGHUP,1);
412 kill_pg(pgrp,SIGCONT,1);
414 write_lock_irq(&tasklist_lock);
417 write_unlock_irq(&tasklist_lock);
419 if (current->leader)
420 disassociate_ctty(1);
423 NORET_TYPE void do_exit(long code)
425 struct task_struct *tsk = current;
427 if (in_interrupt())
428 printk("Aiee, killing interrupt handler\n");
429 if (!tsk->pid)
430 panic("Attempted to kill the idle task!");
431 if (tsk->pid == 1)
432 panic("Attempted to kill init!");
433 tsk->flags |= PF_EXITING;
434 del_timer_sync(&tsk->real_timer);
436 fake_volatile:
437 #ifdef CONFIG_BSD_PROCESS_ACCT
438 acct_process(code);
439 #endif
440 lock_kernel();
441 sem_exit();
442 __exit_mm(tsk);
443 __exit_files(tsk);
444 __exit_fs(tsk);
445 exit_sighand(tsk);
446 exit_thread();
447 tsk->state = TASK_ZOMBIE;
448 tsk->exit_code = code;
449 exit_notify();
450 put_exec_domain(tsk->exec_domain);
451 if (tsk->binfmt && tsk->binfmt->module)
452 __MOD_DEC_USE_COUNT(tsk->binfmt->module);
453 schedule();
455 * In order to get rid of the "volatile function does return" message
456 * I did this little loop that confuses gcc to think do_exit really
457 * is volatile. In fact it's schedule() that is volatile in some
458 * circumstances: when current->state = ZOMBIE, schedule() never
459 * returns.
461 * In fact the natural way to do all this is to have the label and the
462 * goto right after each other, but I put the fake_volatile label at
463 * the start of the function just in case something /really/ bad
464 * happens, and the schedule returns. This way we can try again. I'm
465 * not paranoid: it's just that everybody is out to get me.
467 goto fake_volatile;
470 NORET_TYPE void up_and_exit(struct semaphore *sem, long code)
472 if (sem)
473 up(sem);
475 do_exit(code);
478 asmlinkage long sys_exit(int error_code)
480 do_exit((error_code&0xff)<<8);
483 asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru)
485 int flag, retval;
486 DECLARE_WAITQUEUE(wait, current);
487 struct task_struct *tsk;
489 if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL))
490 return -EINVAL;
492 add_wait_queue(&current->wait_chldexit,&wait);
493 repeat:
494 flag = 0;
495 current->state = TASK_INTERRUPTIBLE;
496 read_lock(&tasklist_lock);
497 tsk = current;
498 do {
499 struct task_struct *p;
500 for (p = tsk->p_cptr ; p ; p = p->p_osptr) {
501 if (pid>0) {
502 if (p->pid != pid)
503 continue;
504 } else if (!pid) {
505 if (p->pgrp != current->pgrp)
506 continue;
507 } else if (pid != -1) {
508 if (p->pgrp != -pid)
509 continue;
511 /* Wait for all children (clone and not) if __WALL is set;
512 * otherwise, wait for clone children *only* if __WCLONE is
513 * set; otherwise, wait for non-clone children *only*. (Note:
514 * A "clone" child here is one that reports to its parent
515 * using a signal other than SIGCHLD.) */
516 if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
517 && !(options & __WALL))
518 continue;
519 flag = 1;
520 switch (p->state) {
521 case TASK_STOPPED:
522 if (!p->exit_code)
523 continue;
524 if (!(options & WUNTRACED) && !(p->ptrace & PT_PTRACED))
525 continue;
526 read_unlock(&tasklist_lock);
527 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
528 if (!retval && stat_addr)
529 retval = put_user((p->exit_code << 8) | 0x7f, stat_addr);
530 if (!retval) {
531 p->exit_code = 0;
532 retval = p->pid;
534 goto end_wait4;
535 case TASK_ZOMBIE:
536 current->times.tms_cutime += p->times.tms_utime + p->times.tms_cutime;
537 current->times.tms_cstime += p->times.tms_stime + p->times.tms_cstime;
538 read_unlock(&tasklist_lock);
539 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
540 if (!retval && stat_addr)
541 retval = put_user(p->exit_code, stat_addr);
542 if (retval)
543 goto end_wait4;
544 retval = p->pid;
545 if (p->p_opptr != p->p_pptr) {
546 write_lock_irq(&tasklist_lock);
547 REMOVE_LINKS(p);
548 p->p_pptr = p->p_opptr;
549 SET_LINKS(p);
550 do_notify_parent(p, SIGCHLD);
551 write_unlock_irq(&tasklist_lock);
552 } else
553 release_task(p);
554 goto end_wait4;
555 default:
556 continue;
559 if (options & __WNOTHREAD)
560 break;
561 tsk = next_thread(tsk);
562 } while (tsk != current);
563 read_unlock(&tasklist_lock);
564 if (flag) {
565 retval = 0;
566 if (options & WNOHANG)
567 goto end_wait4;
568 retval = -ERESTARTSYS;
569 if (signal_pending(current))
570 goto end_wait4;
571 schedule();
572 goto repeat;
574 retval = -ECHILD;
575 end_wait4:
576 current->state = TASK_RUNNING;
577 remove_wait_queue(&current->wait_chldexit,&wait);
578 return retval;
581 #if !defined(__alpha__) && !defined(__ia64__)
584 * sys_waitpid() remains for compatibility. waitpid() should be
585 * implemented by calling sys_wait4() from libc.a.
587 asmlinkage long sys_waitpid(pid_t pid,unsigned int * stat_addr, int options)
589 return sys_wait4(pid, stat_addr, options, NULL);
592 #endif