kernel/exit.c

   1 /*
   2  *  linux/kernel/exit.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6
   7 #include <linux/config.h>
   8 #include <linux/mm.h>
   9 #include <linux/slab.h>
  10 #include <linux/interrupt.h>
  11 #include <linux/smp_lock.h>
  12 #include <linux/module.h>
  13 #include <linux/completion.h>
  14 #include <linux/personality.h>
  15 #include <linux/tty.h>
  16 #include <linux/namespace.h>
  17 #include <linux/security.h>
  18 #include <linux/acct.h>
  19 #include <linux/file.h>
  20 #include <linux/binfmts.h>
  21 #include <linux/ptrace.h>
  22 #include <linux/profile.h>
  23
  24 #include <asm/uaccess.h>
  25 #include <asm/pgtable.h>
  26 #include <asm/mmu_context.h>
  27
  28 extern void sem_exit (void);
  29 extern struct task_struct *child_reaper;
  30
  31 int getrusage(struct task_struct *, int, struct rusage *);
  32
  33 static struct dentry * __unhash_process(struct task_struct *p)
  34 {
  35         struct dentry *proc_dentry;
  36
  37         nr_threads--;
  38         detach_pid(p, PIDTYPE_PID);
  39         detach_pid(p, PIDTYPE_TGID);
  40         if (thread_group_leader(p)) {
  41                 detach_pid(p, PIDTYPE_PGID);
  42                 detach_pid(p, PIDTYPE_SID);
  43         }
  44
  45         REMOVE_LINKS(p);
  46         proc_dentry = p->proc_dentry;
  47         if (unlikely(proc_dentry != NULL)) {
  48                 spin_lock(&dcache_lock);
  49                 if (!list_empty(&proc_dentry->d_hash)) {
  50                         dget_locked(proc_dentry);
  51                         list_del_init(&proc_dentry->d_hash);
  52                 } else
  53                         proc_dentry = NULL;
  54                 spin_unlock(&dcache_lock);
  55         }
  56         return proc_dentry;
  57 }
  58
  59 void release_task(struct task_struct * p)
  60 {
  61         struct dentry *proc_dentry;
  62         task_t *leader;
  63
  64         BUG_ON(p->state < TASK_ZOMBIE);
  65
  66         if (p != current)
  67                 wait_task_inactive(p);
  68
  69         atomic_dec(&p->user->processes);
  70         security_ops->task_free_security(p);
  71         free_uid(p->user);
  72         write_lock_irq(&tasklist_lock);
  73         if (unlikely(p->ptrace))
  74                 __ptrace_unlink(p);
  75         BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
  76         __exit_sighand(p);
  77         proc_dentry = __unhash_process(p);
  78
  79         /*
  80          * If we are the last non-leader member of the thread
  81          * group, and the leader is zombie, then notify the
  82          * group leader's parent process. (if it wants notification.)
  83          */
  84         leader = p->group_leader;
  85         if (leader != p && thread_group_empty(leader) &&
  86                     leader->state == TASK_ZOMBIE && leader->exit_signal != -1)
  87                 do_notify_parent(leader, leader->exit_signal);
  88
  89         p->parent->cutime += p->utime + p->cutime;
  90         p->parent->cstime += p->stime + p->cstime;
  91         p->parent->cmin_flt += p->min_flt + p->cmin_flt;
  92         p->parent->cmaj_flt += p->maj_flt + p->cmaj_flt;
  93         p->parent->cnswap += p->nswap + p->cnswap;
  94         sched_exit(p);
  95         write_unlock_irq(&tasklist_lock);
  96
  97         if (unlikely(proc_dentry != NULL)) {
  98                 shrink_dcache_parent(proc_dentry);
  99                 dput(proc_dentry);
 100         }
 101         release_thread(p);
 102         put_task_struct(p);
 103 }
 104
 105 /* we are using it only for SMP init */
 106
 107 void unhash_process(struct task_struct *p)
 108 {
 109         struct dentry *proc_dentry;
 110
 111         write_lock_irq(&tasklist_lock);
 112         proc_dentry = __unhash_process(p);
 113         write_unlock_irq(&tasklist_lock);
 114
 115         if (unlikely(proc_dentry != NULL)) {
 116                 shrink_dcache_parent(proc_dentry);
 117                 dput(proc_dentry);
 118         }
 119 }
 120
 121 /*
 122  * This checks not only the pgrp, but falls back on the pid if no
 123  * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
 124  * without this...
 125  */
 126 int session_of_pgrp(int pgrp)
 127 {
 128         struct task_struct *p;
 129         struct list_head *l;
 130         struct pid *pid;
 131         int sid = -1;
 132
 133         read_lock(&tasklist_lock);
 134         for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid)
 135                 if (p->session > 0) {
 136                         sid = p->session;
 137                         goto out;
 138                 }
 139         p = find_task_by_pid(pgrp);
 140         if (p)
 141                 sid = p->session;
 142 out:
 143         read_unlock(&tasklist_lock);
 144
 145         return sid;
 146 }
 147
 148 /*
 149  * Determine if a process group is "orphaned", according to the POSIX
 150  * definition in 2.2.2.52.  Orphaned process groups are not to be affected
 151  * by terminal-generated stop signals.  Newly orphaned process groups are
 152  * to receive a SIGHUP and a SIGCONT.
 153  *
 154  * "I ask you, have you ever known what it is to be an orphan?"
 155  */
 156 static int __will_become_orphaned_pgrp(int pgrp, task_t *ignored_task)
 157 {
 158         struct task_struct *p;
 159         struct list_head *l;
 160         struct pid *pid;
 161         int ret = 1;
 162
 163         for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid) {
 164                 if (p == ignored_task
 165                                 || p->state >= TASK_ZOMBIE
 166                                 || p->real_parent->pid == 1)
 167                         continue;
 168                 if (p->real_parent->pgrp != pgrp
 169                             && p->real_parent->session == p->session) {
 170                         ret = 0;
 171                         break;
 172                 }
 173         }
 174         return ret;     /* (sighing) "Often!" */
 175 }
 176
 177 static int will_become_orphaned_pgrp(int pgrp, struct task_struct * ignored_task)
 178 {
 179         int retval;
 180
 181         read_lock(&tasklist_lock);
 182         retval = __will_become_orphaned_pgrp(pgrp, ignored_task);
 183         read_unlock(&tasklist_lock);
 184
 185         return retval;
 186 }
 187
 188 int is_orphaned_pgrp(int pgrp)
 189 {
 190         return will_become_orphaned_pgrp(pgrp, 0);
 191 }
 192
 193 static inline int __has_stopped_jobs(int pgrp)
 194 {
 195         int retval = 0;
 196         struct task_struct *p;
 197         struct list_head *l;
 198         struct pid *pid;
 199
 200         for_each_task_pid(pgrp, PIDTYPE_PGID, p, l, pid) {
 201                 if (p->state != TASK_STOPPED)
 202                         continue;
 203                 retval = 1;
 204                 break;
 205         }
 206         return retval;
 207 }
 208
 209 static inline int has_stopped_jobs(int pgrp)
 210 {
 211         int retval;
 212
 213         read_lock(&tasklist_lock);
 214         retval = __has_stopped_jobs(pgrp);
 215         read_unlock(&tasklist_lock);
 216
 217         return retval;
 218 }
 219
 220 /**
 221  * reparent_to_init() - Reparent the calling kernel thread to the init task.
 222  *
 223  * If a kernel thread is launched as a result of a system call, or if
 224  * it ever exits, it should generally reparent itself to init so that
 225  * it is correctly cleaned up on exit.
 226  *
 227  * The various task state such as scheduling policy and priority may have
 228  * been inherited from a user process, so we reset them to sane values here.
 229  *
 230  * NOTE that reparent_to_init() gives the caller full capabilities.
 231  */
 232 void reparent_to_init(void)
 233 {
 234         write_lock_irq(&tasklist_lock);
 235
 236         ptrace_unlink(current);
 237         /* Reparent to init */
 238         REMOVE_LINKS(current);
 239         current->parent = child_reaper;
 240         current->real_parent = child_reaper;
 241         SET_LINKS(current);
 242
 243         /* Set the exit signal to SIGCHLD so we signal init on exit */
 244         current->exit_signal = SIGCHLD;
 245
 246         if ((current->policy == SCHED_NORMAL) && (task_nice(current) < 0))
 247                 set_user_nice(current, 0);
 248         /* cpus_allowed? */
 249         /* rt_priority? */
 250         /* signals? */
 251         security_ops->task_reparent_to_init(current);
 252         memcpy(current->rlim, init_task.rlim, sizeof(*(current->rlim)));
 253         current->user = INIT_USER;
 254
 255         write_unlock_irq(&tasklist_lock);
 256 }
 257
 258 /*
 259  *      Put all the gunge required to become a kernel thread without
 260  *      attached user resources in one place where it belongs.
 261  */
 262
 263 void daemonize(void)
 264 {
 265         struct fs_struct *fs;
 266
 267
 268         /*
 269          * If we were started as result of loading a module, close all of the
 270          * user space pages.  We don't need them, and if we didn't close them
 271          * they would be locked into memory.
 272          */
 273         exit_mm(current);
 274
 275         current->session = 1;
 276         current->pgrp = 1;
 277         current->tty = NULL;
 278
 279         /* Become as one with the init task */
 280
 281         exit_fs(current);       /* current->fs->count--; */
 282         fs = init_task.fs;
 283         current->fs = fs;
 284         atomic_inc(&fs->count);
 285         exit_files(current);
 286         current->files = init_task.files;
 287         atomic_inc(&current->files->count);
 288
 289         reparent_to_init();
 290 }
 291
 292 static inline void close_files(struct files_struct * files)
 293 {
 294         int i, j;
 295
 296         j = 0;
 297         for (;;) {
 298                 unsigned long set;
 299                 i = j * __NFDBITS;
 300                 if (i >= files->max_fdset || i >= files->max_fds)
 301                         break;
 302                 set = files->open_fds->fds_bits[j++];
 303                 while (set) {
 304                         if (set & 1) {
 305                                 struct file * file = xchg(&files->fd[i], NULL);
 306                                 if (file)
 307                                         filp_close(file, files);
 308                         }
 309                         i++;
 310                         set >>= 1;
 311                 }
 312         }
 313 }
 314
 315 void put_files_struct(struct files_struct *files)
 316 {
 317         if (atomic_dec_and_test(&files->count)) {
 318                 close_files(files);
 319                 /*
 320                  * Free the fd and fdset arrays if we expanded them.
 321                  */
 322                 if (files->fd != &files->fd_array[0])
 323                         free_fd_array(files->fd, files->max_fds);
 324                 if (files->max_fdset > __FD_SETSIZE) {
 325                         free_fdset(files->open_fds, files->max_fdset);
 326                         free_fdset(files->close_on_exec, files->max_fdset);
 327                 }
 328                 kmem_cache_free(files_cachep, files);
 329         }
 330 }
 331
 332 static inline void __exit_files(struct task_struct *tsk)
 333 {
 334         struct files_struct * files = tsk->files;
 335
 336         if (files) {
 337                 task_lock(tsk);
 338                 tsk->files = NULL;
 339                 task_unlock(tsk);
 340                 put_files_struct(files);
 341         }
 342 }
 343
 344 void exit_files(struct task_struct *tsk)
 345 {
 346         __exit_files(tsk);
 347 }
 348
 349 static inline void __put_fs_struct(struct fs_struct *fs)
 350 {
 351         /* No need to hold fs->lock if we are killing it */
 352         if (atomic_dec_and_test(&fs->count)) {
 353                 dput(fs->root);
 354                 mntput(fs->rootmnt);
 355                 dput(fs->pwd);
 356                 mntput(fs->pwdmnt);
 357                 if (fs->altroot) {
 358                         dput(fs->altroot);
 359                         mntput(fs->altrootmnt);
 360                 }
 361                 kmem_cache_free(fs_cachep, fs);
 362         }
 363 }
 364
 365 void put_fs_struct(struct fs_struct *fs)
 366 {
 367         __put_fs_struct(fs);
 368 }
 369
 370 static inline void __exit_fs(struct task_struct *tsk)
 371 {
 372         struct fs_struct * fs = tsk->fs;
 373
 374         if (fs) {
 375                 task_lock(tsk);
 376                 tsk->fs = NULL;
 377                 task_unlock(tsk);
 378                 __put_fs_struct(fs);
 379         }
 380 }
 381
 382 void exit_fs(struct task_struct *tsk)
 383 {
 384         __exit_fs(tsk);
 385 }
 386
 387 /*
 388  * We can use these to temporarily drop into
 389  * "lazy TLB" mode and back.
 390  */
 391 struct mm_struct * start_lazy_tlb(void)
 392 {
 393         struct mm_struct *mm = current->mm;
 394         current->mm = NULL;
 395         /* active_mm is still 'mm' */
 396         atomic_inc(&mm->mm_count);
 397         enter_lazy_tlb(mm, current, smp_processor_id());
 398         return mm;
 399 }
 400
 401 void end_lazy_tlb(struct mm_struct *mm)
 402 {
 403         struct mm_struct *active_mm = current->active_mm;
 404
 405         current->mm = mm;
 406         if (mm != active_mm) {
 407                 current->active_mm = mm;
 408                 activate_mm(active_mm, mm);
 409         }
 410         mmdrop(active_mm);
 411 }
 412
 413 /*
 414  * Turn us into a lazy TLB process if we
 415  * aren't already..
 416  */
 417 static inline void __exit_mm(struct task_struct * tsk)
 418 {
 419         struct mm_struct * mm = tsk->mm;
 420
 421         mm_release();
 422         if (mm) {
 423                 atomic_inc(&mm->mm_count);
 424                 if (mm != tsk->active_mm) BUG();
 425                 /* more a memory barrier than a real lock */
 426                 task_lock(tsk);
 427                 tsk->mm = NULL;
 428                 enter_lazy_tlb(mm, current, smp_processor_id());
 429                 task_unlock(tsk);
 430                 mmput(mm);
 431         }
 432 }
 433
 434 void exit_mm(struct task_struct *tsk)
 435 {
 436         __exit_mm(tsk);
 437 }
 438
 439 static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_reaper)
 440 {
 441         /*
 442          * Make sure we're not reparenting to ourselves and that
 443          * the parent is not a zombie.
 444          */
 445         if (p == reaper || reaper->state >= TASK_ZOMBIE)
 446                 p->real_parent = child_reaper;
 447         else
 448                 p->real_parent = reaper;
 449         if (p->parent == p->real_parent)
 450                 BUG();
 451 }
 452
 453 static inline void reparent_thread(task_t *p, task_t *father, int traced)
 454 {
 455         /* We dont want people slaying init.  */
 456         if (p->exit_signal != -1)
 457                 p->exit_signal = SIGCHLD;
 458         p->self_exec_id++;
 459
 460         if (p->pdeath_signal)
 461                 send_sig(p->pdeath_signal, p, 0);
 462
 463         /* Move the child from its dying parent to the new one.  */
 464         if (unlikely(traced)) {
 465                 /* Preserve ptrace links if someone else is tracing this child.  */
 466                 list_del_init(&p->ptrace_list);
 467                 if (p->parent != p->real_parent)
 468                         list_add(&p->ptrace_list, &p->real_parent->ptrace_children);
 469         } else {
 470                 /* If this child is being traced, then we're the one tracing it
 471                  * anyway, so let go of it.
 472                  */
 473                 p->ptrace = 0;
 474                 list_del_init(&p->sibling);
 475                 p->parent = p->real_parent;
 476                 list_add_tail(&p->sibling, &p->parent->children);
 477
 478                 /* If we'd notified the old parent about this child's death,
 479                  * also notify the new parent.
 480                  */
 481                 if (p->state == TASK_ZOMBIE && p->exit_signal != -1)
 482                         do_notify_parent(p, p->exit_signal);
 483         }
 484
 485         /*
 486          * process group orphan check
 487          * Case ii: Our child is in a different pgrp
 488          * than we are, and it was the only connection
 489          * outside, so the child pgrp is now orphaned.
 490          */
 491         if ((p->pgrp != father->pgrp) &&
 492             (p->session == father->session)) {
 493                 int pgrp = p->pgrp;
 494
 495                 if (__will_become_orphaned_pgrp(pgrp, 0) && __has_stopped_jobs(pgrp)) {
 496                         __kill_pg_info(SIGHUP, (void *)1, pgrp);
 497                         __kill_pg_info(SIGCONT, (void *)1, pgrp);
 498                 }
 499         }
 500 }
 501
 502 /*
 503  * When we die, we re-parent all our children.
 504  * Try to give them to another thread in our thread
 505  * group, and if no such member exists, give it to
 506  * the global child reaper process (ie "init")
 507  */
 508 static inline void forget_original_parent(struct task_struct * father)
 509 {
 510         struct task_struct *p, *reaper = father;
 511         struct list_head *_p, *_n;
 512
 513         reaper = father->group_leader;
 514         if (reaper == father)
 515                 reaper = child_reaper;
 516
 517         /*
 518          * There are only two places where our children can be:
 519          *
 520          * - in our child list
 521          * - in our ptraced child list
 522          *
 523          * Search them and reparent children.
 524          */
 525         list_for_each_safe(_p, _n, &father->children) {
 526                 p = list_entry(_p,struct task_struct,sibling);
 527                 if (father == p->real_parent) {
 528                         choose_new_parent(p, reaper, child_reaper);
 529                         reparent_thread(p, father, 0);
 530                 } else {
 531                         ptrace_unlink (p);
 532                         if (p->state == TASK_ZOMBIE && p->exit_signal != -1)
 533                                 do_notify_parent(p, p->exit_signal);
 534                 }
 535         }
 536         list_for_each_safe(_p, _n, &father->ptrace_children) {
 537                 p = list_entry(_p,struct task_struct,ptrace_list);
 538                 choose_new_parent(p, reaper, child_reaper);
 539                 reparent_thread(p, father, 1);
 540         }
 541 }
 542
 543 /*
 544  * Send signals to all our closest relatives so that they know
 545  * to properly mourn us..
 546  */
 547 static void exit_notify(void)
 548 {
 549         struct task_struct *t;
 550
 551         write_lock_irq(&tasklist_lock);
 552
 553         /*
 554          * This does two things:
 555          *
 556          * A.  Make init inherit all the child processes
 557          * B.  Check to see if any process groups have become orphaned
 558          *      as a result of our exiting, and if they have any stopped
 559          *      jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
 560          */
 561
 562         forget_original_parent(current);
 563         BUG_ON(!list_empty(&current->children));
 564
 565         /*
 566          * Check to see if any process groups have become orphaned
 567          * as a result of our exiting, and if they have any stopped
 568          * jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
 569          *
 570          * Case i: Our father is in a different pgrp than we are
 571          * and we were the only connection outside, so our pgrp
 572          * is about to become orphaned.
 573          */
 574
 575         t = current->parent;
 576
 577         if ((t->pgrp != current->pgrp) &&
 578             (t->session == current->session) &&
 579             __will_become_orphaned_pgrp(current->pgrp, current) &&
 580             __has_stopped_jobs(current->pgrp)) {
 581                 __kill_pg_info(SIGHUP, (void *)1, current->pgrp);
 582                 __kill_pg_info(SIGCONT, (void *)1, current->pgrp);
 583         }
 584
 585         /* Let father know we died
 586          *
 587          * Thread signals are configurable, but you aren't going to use
 588          * that to send signals to arbitary processes.
 589          * That stops right now.
 590          *
 591          * If the parent exec id doesn't match the exec id we saved
 592          * when we started then we know the parent has changed security
 593          * domain.
 594          *
 595          * If our self_exec id doesn't match our parent_exec_id then
 596          * we have changed execution domain as these two values started
 597          * the same after a fork.
 598          *
 599          */
 600
 601         if (current->exit_signal != SIGCHLD && current->exit_signal != -1 &&
 602             ( current->parent_exec_id != t->self_exec_id  ||
 603               current->self_exec_id != current->parent_exec_id)
 604             && !capable(CAP_KILL))
 605                 current->exit_signal = SIGCHLD;
 606
 607
 608         if (current->exit_signal != -1)
 609                 do_notify_parent(current, current->exit_signal);
 610
 611         current->state = TASK_ZOMBIE;
 612         /*
 613          * No need to unlock IRQs, we'll schedule() immediately
 614          * anyway. In the preemption case this also makes it
 615          * impossible for the task to get runnable again (thus
 616          * the "_raw_" unlock - to make sure we don't try to
 617          * preempt here).
 618          */
 619         _raw_write_unlock(&tasklist_lock);
 620 }
 621
 622 NORET_TYPE void do_exit(long code)
 623 {
 624         struct task_struct *tsk = current;
 625
 626         if (in_interrupt())
 627                 panic("Aiee, killing interrupt handler!");
 628         if (!tsk->pid)
 629                 panic("Attempted to kill the idle task!");
 630         if (tsk->pid == 1)
 631                 panic("Attempted to kill init!");
 632         tsk->flags |= PF_EXITING;
 633         del_timer_sync(&tsk->real_timer);
 634
 635         if (unlikely(in_atomic()))
 636                 printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
 637                                 current->comm, current->pid,
 638                                 preempt_count());
 639
 640         profile_exit_task(tsk);
 641
 642 fake_volatile:
 643         acct_process(code);
 644         __exit_mm(tsk);
 645
 646         sem_exit();
 647         __exit_files(tsk);
 648         __exit_fs(tsk);
 649         exit_namespace(tsk);
 650         exit_thread();
 651
 652         if (current->leader)
 653                 disassociate_ctty(1);
 654
 655         put_exec_domain(tsk->thread_info->exec_domain);
 656         if (tsk->binfmt && tsk->binfmt->module)
 657                 __MOD_DEC_USE_COUNT(tsk->binfmt->module);
 658
 659         tsk->exit_code = code;
 660         exit_notify();
 661         preempt_disable();
 662         if (current->exit_signal == -1)
 663                 release_task(current);
 664         schedule();
 665         BUG();
 666 /*
 667  * In order to get rid of the "volatile function does return" message
 668  * I did this little loop that confuses gcc to think do_exit really
 669  * is volatile. In fact it's schedule() that is volatile in some
 670  * circumstances: when current->state = ZOMBIE, schedule() never
 671  * returns.
 672  *
 673  * In fact the natural way to do all this is to have the label and the
 674  * goto right after each other, but I put the fake_volatile label at
 675  * the start of the function just in case something /really/ bad
 676  * happens, and the schedule returns. This way we can try again. I'm
 677  * not paranoid: it's just that everybody is out to get me.
 678  */
 679         goto fake_volatile;
 680 }
 681
 682 NORET_TYPE void complete_and_exit(struct completion *comp, long code)
 683 {
 684         if (comp)
 685                 complete(comp);
 686
 687         do_exit(code);
 688 }
 689
 690 asmlinkage long sys_exit(int error_code)
 691 {
 692         do_exit((error_code&0xff)<<8);
 693 }
 694
 695 task_t *next_thread(task_t *p)
 696 {
 697         struct pid_link *link = p->pids + PIDTYPE_TGID;
 698         struct list_head *tmp, *head = &link->pidptr->task_list;
 699
 700 #if CONFIG_SMP
 701         if (!p->sig)
 702                 BUG();
 703         if (!spin_is_locked(&p->sig->siglock) &&
 704                                 !rwlock_is_locked(&tasklist_lock))
 705                 BUG();
 706 #endif
 707         tmp = link->pid_chain.next;
 708         if (tmp == head)
 709                 tmp = head->next;
 710
 711         return pid_task(tmp, PIDTYPE_TGID);
 712 }
 713
 714 /*
 715  * this kills every thread in the thread group. Note that any externally
 716  * wait4()-ing process will get the correct exit code - even if this
 717  * thread is not the thread group leader.
 718  */
 719 asmlinkage long sys_exit_group(int error_code)
 720 {
 721         unsigned int exit_code = (error_code & 0xff) << 8;
 722
 723         if (!thread_group_empty(current)) {
 724                 struct signal_struct *sig = current->sig;
 725
 726                 spin_lock_irq(&sig->siglock);
 727                 if (sig->group_exit) {
 728                         spin_unlock_irq(&sig->siglock);
 729
 730                         /* another thread was faster: */
 731                         do_exit(sig->group_exit_code);
 732                 }
 733                 sig->group_exit = 1;
 734                 sig->group_exit_code = exit_code;
 735                 __broadcast_thread_group(current, SIGKILL);
 736                 spin_unlock_irq(&sig->siglock);
 737         }
 738
 739         do_exit(exit_code);
 740 }
 741
 742 static int eligible_child(pid_t pid, int options, task_t *p)
 743 {
 744         if (pid > 0) {
 745                 if (p->pid != pid)
 746                         return 0;
 747         } else if (!pid) {
 748                 if (p->pgrp != current->pgrp)
 749                         return 0;
 750         } else if (pid != -1) {
 751                 if (p->pgrp != -pid)
 752                         return 0;
 753         }
 754
 755         /*
 756          * Do not consider detached threads that are
 757          * not ptraced:
 758          */
 759         if (p->exit_signal == -1 && !p->ptrace)
 760                 return 0;
 761
 762         /* Wait for all children (clone and not) if __WALL is set;
 763          * otherwise, wait for clone children *only* if __WCLONE is
 764          * set; otherwise, wait for non-clone children *only*.  (Note:
 765          * A "clone" child here is one that reports to its parent
 766          * using a signal other than SIGCHLD.) */
 767         if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
 768             && !(options & __WALL))
 769                 return 0;
 770         /*
 771          * Do not consider thread group leaders that are
 772          * in a non-empty thread group:
 773          */
 774         if (current->tgid != p->tgid && delay_group_leader(p))
 775                 return 2;
 776
 777         if (security_ops->task_wait(p))
 778                 return 0;
 779
 780         return 1;
 781 }
 782
 783 asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru)
 784 {
 785         DECLARE_WAITQUEUE(wait, current);
 786         struct task_struct *tsk;
 787         unsigned long state;
 788         int flag, retval;
 789
 790         if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL))
 791                 return -EINVAL;
 792
 793         add_wait_queue(&current->wait_chldexit,&wait);
 794 repeat:
 795         flag = 0;
 796         current->state = TASK_INTERRUPTIBLE;
 797         read_lock(&tasklist_lock);
 798         tsk = current;
 799         do {
 800                 struct task_struct *p;
 801                 struct list_head *_p;
 802                 int ret;
 803
 804                 list_for_each(_p,&tsk->children) {
 805                         p = list_entry(_p,struct task_struct,sibling);
 806
 807                         ret = eligible_child(pid, options, p);
 808                         if (!ret)
 809                                 continue;
 810                         flag = 1;
 811
 812                         switch (p->state) {
 813                         case TASK_STOPPED:
 814                                 if (!p->exit_code)
 815                                         continue;
 816                                 if (!(options & WUNTRACED) && !(p->ptrace & PT_PTRACED))
 817                                         continue;
 818                                 read_unlock(&tasklist_lock);
 819
 820                                 /* move to end of parent's list to avoid starvation */
 821                                 write_lock_irq(&tasklist_lock);
 822                                 remove_parent(p);
 823                                 add_parent(p, p->parent);
 824                                 write_unlock_irq(&tasklist_lock);
 825                                 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
 826                                 if (!retval && stat_addr)
 827                                         retval = put_user((p->exit_code << 8) | 0x7f, stat_addr);
 828                                 if (!retval) {
 829                                         p->exit_code = 0;
 830                                         retval = p->pid;
 831                                 }
 832                                 goto end_wait4;
 833                         case TASK_ZOMBIE:
 834                                 /*
 835                                  * Eligible but we cannot release it yet:
 836                                  */
 837                                 if (ret == 2)
 838                                         continue;
 839                                 /*
 840                                  * Try to move the task's state to DEAD
 841                                  * only one thread is allowed to do this:
 842                                  */
 843                                 state = xchg(&p->state, TASK_DEAD);
 844                                 if (state != TASK_ZOMBIE)
 845                                         continue;
 846                                 read_unlock(&tasklist_lock);
 847
 848                                 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
 849                                 if (!retval && stat_addr) {
 850                                         if (p->sig->group_exit)
 851                                                 retval = put_user(p->sig->group_exit_code, stat_addr);
 852                                         else
 853                                                 retval = put_user(p->exit_code, stat_addr);
 854                                 }
 855                                 if (retval) {
 856                                         p->state = TASK_ZOMBIE;
 857                                         goto end_wait4;
 858                                 }
 859                                 retval = p->pid;
 860                                 if (p->real_parent != p->parent) {
 861                                         write_lock_irq(&tasklist_lock);
 862                                         __ptrace_unlink(p);
 863                                         do_notify_parent(p, SIGCHLD);
 864                                         p->state = TASK_ZOMBIE;
 865                                         write_unlock_irq(&tasklist_lock);
 866                                 } else
 867                                         release_task(p);
 868                                 goto end_wait4;
 869                         default:
 870                                 continue;
 871                         }
 872                 }
 873                 if (!flag) {
 874                         list_for_each (_p,&tsk->ptrace_children) {
 875                                 p = list_entry(_p,struct task_struct,ptrace_list);
 876                                 if (!eligible_child(pid, options, p))
 877                                         continue;
 878                                 flag = 1;
 879                                 break;
 880                         }
 881                 }
 882                 if (options & __WNOTHREAD)
 883                         break;
 884                 tsk = next_thread(tsk);
 885                 if (tsk->sig != current->sig)
 886                         BUG();
 887         } while (tsk != current);
 888         read_unlock(&tasklist_lock);
 889         if (flag) {
 890                 retval = 0;
 891                 if (options & WNOHANG)
 892                         goto end_wait4;
 893                 retval = -ERESTARTSYS;
 894                 if (signal_pending(current))
 895                         goto end_wait4;
 896                 schedule();
 897                 goto repeat;
 898         }
 899         retval = -ECHILD;
 900 end_wait4:
 901         current->state = TASK_RUNNING;
 902         remove_wait_queue(&current->wait_chldexit,&wait);
 903         return retval;
 904 }
 905
 906 #if !defined(__alpha__) && !defined(__ia64__) && !defined(__arm__)
 907
 908 /*
 909  * sys_waitpid() remains for compatibility. waitpid() should be
 910  * implemented by calling sys_wait4() from libc.a.
 911  */
 912 asmlinkage long sys_waitpid(pid_t pid,unsigned int * stat_addr, int options)
 913 {
 914         return sys_wait4(pid, stat_addr, options, NULL);
 915 }
 916
 917 #endif