kernel/exit.c

   1 /*
   2  *  linux/kernel/exit.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6
   7 #include <linux/config.h>
   8 #include <linux/malloc.h>
   9 #include <linux/interrupt.h>
  10 #include <linux/smp_lock.h>
  11 #include <linux/module.h>
  12 #ifdef CONFIG_BSD_PROCESS_ACCT
  13 #include <linux/acct.h>
  14 #endif
  15
  16 #include <asm/uaccess.h>
  17 #include <asm/pgtable.h>
  18 #include <asm/mmu_context.h>
  19
  20 extern void sem_exit (void);
  21 extern struct task_struct *child_reaper;
  22
  23 int getrusage(struct task_struct *, int, struct rusage *);
  24
  25 static void release_task(struct task_struct * p)
  26 {
  27         if (p != current) {
  28 #ifdef CONFIG_SMP
  29                 /*
  30                  * Wait to make sure the process isn't on the
  31                  * runqueue (active on some other CPU still)
  32                  */
  33                 for (;;) {
  34                         task_lock(p);
  35                         if (!p->has_cpu)
  36                                 break;
  37                         task_unlock(p);
  38                         do {
  39                                 barrier();
  40                         } while (p->has_cpu);
  41                 }
  42                 task_unlock(p);
  43 #endif
  44                 atomic_dec(&p->user->processes);
  45                 free_uid(p->user);
  46                 unhash_process(p);
  47
  48                 release_thread(p);
  49                 current->cmin_flt += p->min_flt + p->cmin_flt;
  50                 current->cmaj_flt += p->maj_flt + p->cmaj_flt;
  51                 current->cnswap += p->nswap + p->cnswap;
  52                 /*
  53                  * Potentially available timeslices are retrieved
  54                  * here - this way the parent does not get penalized
  55                  * for creating too many processes.
  56                  *
  57                  * (this cannot be used to artificially 'generate'
  58                  * timeslices, because any timeslice recovered here
  59                  * was given away by the parent in the first place.)
  60                  */
  61                 current->counter += p->counter;
  62                 if (current->counter >= MAX_COUNTER)
  63                         current->counter = MAX_COUNTER;
  64                 free_task_struct(p);
  65         } else {
  66                 printk("task releasing itself\n");
  67         }
  68 }
  69
  70 /*
  71  * This checks not only the pgrp, but falls back on the pid if no
  72  * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
  73  * without this...
  74  */
  75 int session_of_pgrp(int pgrp)
  76 {
  77         struct task_struct *p;
  78         int fallback;
  79
  80         fallback = -1;
  81         read_lock(&tasklist_lock);
  82         for_each_task(p) {
  83                 if (p->session <= 0)
  84                         continue;
  85                 if (p->pgrp == pgrp) {
  86                         fallback = p->session;
  87                         break;
  88                 }
  89                 if (p->pid == pgrp)
  90                         fallback = p->session;
  91         }
  92         read_unlock(&tasklist_lock);
  93         return fallback;
  94 }
  95
  96 /*
  97  * Determine if a process group is "orphaned", according to the POSIX
  98  * definition in 2.2.2.52.  Orphaned process groups are not to be affected
  99  * by terminal-generated stop signals.  Newly orphaned process groups are
 100  * to receive a SIGHUP and a SIGCONT.
 101  *
 102  * "I ask you, have you ever known what it is to be an orphan?"
 103  */
 104 static int will_become_orphaned_pgrp(int pgrp, struct task_struct * ignored_task)
 105 {
 106         struct task_struct *p;
 107
 108         read_lock(&tasklist_lock);
 109         for_each_task(p) {
 110                 if ((p == ignored_task) || (p->pgrp != pgrp) ||
 111                     (p->state == TASK_ZOMBIE) ||
 112                     (p->p_pptr->pid == 1))
 113                         continue;
 114                 if ((p->p_pptr->pgrp != pgrp) &&
 115                     (p->p_pptr->session == p->session)) {
 116                         read_unlock(&tasklist_lock);
 117                         return 0;
 118                 }
 119         }
 120         read_unlock(&tasklist_lock);
 121         return 1;       /* (sighing) "Often!" */
 122 }
 123
 124 int is_orphaned_pgrp(int pgrp)
 125 {
 126         return will_become_orphaned_pgrp(pgrp, 0);
 127 }
 128
 129 static inline int has_stopped_jobs(int pgrp)
 130 {
 131         int retval = 0;
 132         struct task_struct * p;
 133
 134         read_lock(&tasklist_lock);
 135         for_each_task(p) {
 136                 if (p->pgrp != pgrp)
 137                         continue;
 138                 if (p->state != TASK_STOPPED)
 139                         continue;
 140                 retval = 1;
 141                 break;
 142         }
 143         read_unlock(&tasklist_lock);
 144         return retval;
 145 }
 146
 147 /*
 148  * When we die, we re-parent all our children.
 149  * Try to give them to another thread in our process
 150  * group, and if no such member exists, give it to
 151  * the global child reaper process (ie "init")
 152  */
 153 static inline void forget_original_parent(struct task_struct * father)
 154 {
 155         struct task_struct * p, *reaper;
 156
 157         read_lock(&tasklist_lock);
 158
 159         /* Next in our thread group */
 160         reaper = next_thread(father);
 161         if (reaper == father)
 162                 reaper = child_reaper;
 163
 164         for_each_task(p) {
 165                 if (p->p_opptr == father) {
 166                         /* We dont want people slaying init */
 167                         p->exit_signal = SIGCHLD;
 168                         p->self_exec_id++;
 169                         p->p_opptr = reaper;
 170                         if (p->pdeath_signal) send_sig(p->pdeath_signal, p, 0);
 171                 }
 172         }
 173         read_unlock(&tasklist_lock);
 174 }
 175
 176 static inline void close_files(struct files_struct * files)
 177 {
 178         int i, j;
 179
 180         j = 0;
 181         for (;;) {
 182                 unsigned long set;
 183                 i = j * __NFDBITS;
 184                 if (i >= files->max_fdset || i >= files->max_fds)
 185                         break;
 186                 set = files->open_fds->fds_bits[j++];
 187                 while (set) {
 188                         if (set & 1) {
 189                                 struct file * file = xchg(&files->fd[i], NULL);
 190                                 if (file)
 191                                         filp_close(file, files);
 192                         }
 193                         i++;
 194                         set >>= 1;
 195                 }
 196         }
 197 }
 198
 199 void put_files_struct(struct files_struct *files)
 200 {
 201         if (atomic_dec_and_test(&files->count)) {
 202                 close_files(files);
 203                 /*
 204                  * Free the fd and fdset arrays if we expanded them.
 205                  */
 206                 if (files->fd != &files->fd_array[0])
 207                         free_fd_array(files->fd, files->max_fds);
 208                 if (files->max_fdset > __FD_SETSIZE) {
 209                         free_fdset(files->open_fds, files->max_fdset);
 210                         free_fdset(files->close_on_exec, files->max_fdset);
 211                 }
 212                 kmem_cache_free(files_cachep, files);
 213         }
 214 }
 215
 216 static inline void __exit_files(struct task_struct *tsk)
 217 {
 218         struct files_struct * files = tsk->files;
 219
 220         if (files) {
 221                 task_lock(tsk);
 222                 tsk->files = NULL;
 223                 task_unlock(tsk);
 224                 put_files_struct(files);
 225         }
 226 }
 227
 228 void exit_files(struct task_struct *tsk)
 229 {
 230         __exit_files(tsk);
 231 }
 232
 233 static inline void __put_fs_struct(struct fs_struct *fs)
 234 {
 235         /* No need to hold fs->lock if we are killing it */
 236         if (atomic_dec_and_test(&fs->count)) {
 237                 dput(fs->root);
 238                 mntput(fs->rootmnt);
 239                 dput(fs->pwd);
 240                 mntput(fs->pwdmnt);
 241                 if (fs->altroot) {
 242                         dput(fs->altroot);
 243                         mntput(fs->altrootmnt);
 244                 }
 245                 kmem_cache_free(fs_cachep, fs);
 246         }
 247 }
 248
 249 void put_fs_struct(struct fs_struct *fs)
 250 {
 251         __put_fs_struct(fs);
 252 }
 253
 254 static inline void __exit_fs(struct task_struct *tsk)
 255 {
 256         struct fs_struct * fs = tsk->fs;
 257
 258         if (fs) {
 259                 task_lock(tsk);
 260                 tsk->fs = NULL;
 261                 task_unlock(tsk);
 262                 __put_fs_struct(fs);
 263         }
 264 }
 265
 266 void exit_fs(struct task_struct *tsk)
 267 {
 268         __exit_fs(tsk);
 269 }
 270
 271 /*
 272  * We can use these to temporarily drop into
 273  * "lazy TLB" mode and back.
 274  */
 275 struct mm_struct * start_lazy_tlb(void)
 276 {
 277         struct mm_struct *mm = current->mm;
 278         current->mm = NULL;
 279         /* active_mm is still 'mm' */
 280         atomic_inc(&mm->mm_count);
 281         enter_lazy_tlb(mm, current, smp_processor_id());
 282         return mm;
 283 }
 284
 285 void end_lazy_tlb(struct mm_struct *mm)
 286 {
 287         struct mm_struct *active_mm = current->active_mm;
 288
 289         current->mm = mm;
 290         if (mm != active_mm) {
 291                 current->active_mm = mm;
 292                 activate_mm(active_mm, mm);
 293         }
 294         mmdrop(active_mm);
 295 }
 296
 297 /*
 298  * Turn us into a lazy TLB process if we
 299  * aren't already..
 300  */
 301 static inline void __exit_mm(struct task_struct * tsk)
 302 {
 303         struct mm_struct * mm = tsk->mm;
 304
 305         if (mm) {
 306                 atomic_inc(&mm->mm_count);
 307                 mm_release();
 308                 if (mm != tsk->active_mm) BUG();
 309                 /* more a memory barrier than a real lock */
 310                 task_lock(tsk);
 311                 tsk->mm = NULL;
 312                 task_unlock(tsk);
 313                 enter_lazy_tlb(mm, current, smp_processor_id());
 314                 mmput(mm);
 315         }
 316 }
 317
 318 void exit_mm(struct task_struct *tsk)
 319 {
 320         __exit_mm(tsk);
 321 }
 322
 323 /*
 324  * Send signals to all our closest relatives so that they know
 325  * to properly mourn us..
 326  */
 327 static void exit_notify(void)
 328 {
 329         struct task_struct * p, *t;
 330
 331         forget_original_parent(current);
 332         /*
 333          * Check to see if any process groups have become orphaned
 334          * as a result of our exiting, and if they have any stopped
 335          * jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
 336          *
 337          * Case i: Our father is in a different pgrp than we are
 338          * and we were the only connection outside, so our pgrp
 339          * is about to become orphaned.
 340          */
 341
 342         t = current->p_pptr;
 343
 344         if ((t->pgrp != current->pgrp) &&
 345             (t->session == current->session) &&
 346             will_become_orphaned_pgrp(current->pgrp, current) &&
 347             has_stopped_jobs(current->pgrp)) {
 348                 kill_pg(current->pgrp,SIGHUP,1);
 349                 kill_pg(current->pgrp,SIGCONT,1);
 350         }
 351
 352         /* Let father know we died
 353          *
 354          * Thread signals are configurable, but you aren't going to use
 355          * that to send signals to arbitary processes.
 356          * That stops right now.
 357          *
 358          * If the parent exec id doesn't match the exec id we saved
 359          * when we started then we know the parent has changed security
 360          * domain.
 361          *
 362          * If our self_exec id doesn't match our parent_exec_id then
 363          * we have changed execution domain as these two values started
 364          * the same after a fork.
 365          *
 366          */
 367
 368         if(current->exit_signal != SIGCHLD &&
 369             ( current->parent_exec_id != t->self_exec_id  ||
 370               current->self_exec_id != current->parent_exec_id)
 371             && !capable(CAP_KILL))
 372                 current->exit_signal = SIGCHLD;
 373
 374
 375         /*
 376          * This loop does two things:
 377          *
 378          * A.  Make init inherit all the child processes
 379          * B.  Check to see if any process groups have become orphaned
 380          *      as a result of our exiting, and if they have any stopped
 381          *      jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
 382          */
 383
 384         write_lock_irq(&tasklist_lock);
 385         do_notify_parent(current, current->exit_signal);
 386         while (current->p_cptr != NULL) {
 387                 p = current->p_cptr;
 388                 current->p_cptr = p->p_osptr;
 389                 p->p_ysptr = NULL;
 390                 p->ptrace = 0;
 391
 392                 p->p_pptr = p->p_opptr;
 393                 p->p_osptr = p->p_pptr->p_cptr;
 394                 if (p->p_osptr)
 395                         p->p_osptr->p_ysptr = p;
 396                 p->p_pptr->p_cptr = p;
 397                 if (p->state == TASK_ZOMBIE)
 398                         do_notify_parent(p, p->exit_signal);
 399                 /*
 400                  * process group orphan check
 401                  * Case ii: Our child is in a different pgrp
 402                  * than we are, and it was the only connection
 403                  * outside, so the child pgrp is now orphaned.
 404                  */
 405                 if ((p->pgrp != current->pgrp) &&
 406                     (p->session == current->session)) {
 407                         int pgrp = p->pgrp;
 408
 409                         write_unlock_irq(&tasklist_lock);
 410                         if (is_orphaned_pgrp(pgrp) && has_stopped_jobs(pgrp)) {
 411                                 kill_pg(pgrp,SIGHUP,1);
 412                                 kill_pg(pgrp,SIGCONT,1);
 413                         }
 414                         write_lock_irq(&tasklist_lock);
 415                 }
 416         }
 417         write_unlock_irq(&tasklist_lock);
 418
 419         if (current->leader)
 420                 disassociate_ctty(1);
 421 }
 422
 423 NORET_TYPE void do_exit(long code)
 424 {
 425         struct task_struct *tsk = current;
 426
 427         if (in_interrupt())
 428                 printk("Aiee, killing interrupt handler\n");
 429         if (!tsk->pid)
 430                 panic("Attempted to kill the idle task!");
 431         if (tsk->pid == 1)
 432                 panic("Attempted to kill init!");
 433         tsk->flags |= PF_EXITING;
 434         del_timer_sync(&tsk->real_timer);
 435
 436 fake_volatile:
 437 #ifdef CONFIG_BSD_PROCESS_ACCT
 438         acct_process(code);
 439 #endif
 440         lock_kernel();
 441         sem_exit();
 442         __exit_mm(tsk);
 443         __exit_files(tsk);
 444         __exit_fs(tsk);
 445         exit_sighand(tsk);
 446         exit_thread();
 447         tsk->state = TASK_ZOMBIE;
 448         tsk->exit_code = code;
 449         exit_notify();
 450         put_exec_domain(tsk->exec_domain);
 451         if (tsk->binfmt && tsk->binfmt->module)
 452                 __MOD_DEC_USE_COUNT(tsk->binfmt->module);
 453         schedule();
 454 /*
 455  * In order to get rid of the "volatile function does return" message
 456  * I did this little loop that confuses gcc to think do_exit really
 457  * is volatile. In fact it's schedule() that is volatile in some
 458  * circumstances: when current->state = ZOMBIE, schedule() never
 459  * returns.
 460  *
 461  * In fact the natural way to do all this is to have the label and the
 462  * goto right after each other, but I put the fake_volatile label at
 463  * the start of the function just in case something /really/ bad
 464  * happens, and the schedule returns. This way we can try again. I'm
 465  * not paranoid: it's just that everybody is out to get me.
 466  */
 467         goto fake_volatile;
 468 }
 469
 470 NORET_TYPE void up_and_exit(struct semaphore *sem, long code)
 471 {
 472         if (sem)
 473                 up(sem);
 474
 475         do_exit(code);
 476 }
 477
 478 asmlinkage long sys_exit(int error_code)
 479 {
 480         do_exit((error_code&0xff)<<8);
 481 }
 482
 483 asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru)
 484 {
 485         int flag, retval;
 486         DECLARE_WAITQUEUE(wait, current);
 487         struct task_struct *tsk;
 488
 489         if (options & ~(WNOHANG|WUNTRACED|__WNOTHREAD|__WCLONE|__WALL))
 490                 return -EINVAL;
 491
 492         add_wait_queue(&current->wait_chldexit,&wait);
 493 repeat:
 494         flag = 0;
 495         current->state = TASK_INTERRUPTIBLE;
 496         read_lock(&tasklist_lock);
 497         tsk = current;
 498         do {
 499                 struct task_struct *p;
 500                 for (p = tsk->p_cptr ; p ; p = p->p_osptr) {
 501                         if (pid>0) {
 502                                 if (p->pid != pid)
 503                                         continue;
 504                         } else if (!pid) {
 505                                 if (p->pgrp != current->pgrp)
 506                                         continue;
 507                         } else if (pid != -1) {
 508                                 if (p->pgrp != -pid)
 509                                         continue;
 510                         }
 511                         /* Wait for all children (clone and not) if __WALL is set;
 512                          * otherwise, wait for clone children *only* if __WCLONE is
 513                          * set; otherwise, wait for non-clone children *only*.  (Note:
 514                          * A "clone" child here is one that reports to its parent
 515                          * using a signal other than SIGCHLD.) */
 516                         if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
 517                             && !(options & __WALL))
 518                                 continue;
 519                         flag = 1;
 520                         switch (p->state) {
 521                         case TASK_STOPPED:
 522                                 if (!p->exit_code)
 523                                         continue;
 524                                 if (!(options & WUNTRACED) && !(p->ptrace & PT_PTRACED))
 525                                         continue;
 526                                 read_unlock(&tasklist_lock);
 527                                 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
 528                                 if (!retval && stat_addr)
 529                                         retval = put_user((p->exit_code << 8) | 0x7f, stat_addr);
 530                                 if (!retval) {
 531                                         p->exit_code = 0;
 532                                         retval = p->pid;
 533                                 }
 534                                 goto end_wait4;
 535                         case TASK_ZOMBIE:
 536                                 current->times.tms_cutime += p->times.tms_utime + p->times.tms_cutime;
 537                                 current->times.tms_cstime += p->times.tms_stime + p->times.tms_cstime;
 538                                 read_unlock(&tasklist_lock);
 539                                 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
 540                                 if (!retval && stat_addr)
 541                                         retval = put_user(p->exit_code, stat_addr);
 542                                 if (retval)
 543                                         goto end_wait4;
 544                                 retval = p->pid;
 545                                 if (p->p_opptr != p->p_pptr) {
 546                                         write_lock_irq(&tasklist_lock);
 547                                         REMOVE_LINKS(p);
 548                                         p->p_pptr = p->p_opptr;
 549                                         SET_LINKS(p);
 550                                         do_notify_parent(p, SIGCHLD);
 551                                         write_unlock_irq(&tasklist_lock);
 552                                 } else
 553                                         release_task(p);
 554                                 goto end_wait4;
 555                         default:
 556                                 continue;
 557                         }
 558                 }
 559                 if (options & __WNOTHREAD)
 560                         break;
 561                 tsk = next_thread(tsk);
 562         } while (tsk != current);
 563         read_unlock(&tasklist_lock);
 564         if (flag) {
 565                 retval = 0;
 566                 if (options & WNOHANG)
 567                         goto end_wait4;
 568                 retval = -ERESTARTSYS;
 569                 if (signal_pending(current))
 570                         goto end_wait4;
 571                 schedule();
 572                 goto repeat;
 573         }
 574         retval = -ECHILD;
 575 end_wait4:
 576         current->state = TASK_RUNNING;
 577         remove_wait_queue(&current->wait_chldexit,&wait);
 578         return retval;
 579 }
 580
 581 #if !defined(__alpha__) && !defined(__ia64__)
 582
 583 /*
 584  * sys_waitpid() remains for compatibility. waitpid() should be
 585  * implemented by calling sys_wait4() from libc.a.
 586  */
 587 asmlinkage long sys_waitpid(pid_t pid,unsigned int * stat_addr, int options)
 588 {
 589         return sys_wait4(pid, stat_addr, options, NULL);
 590 }
 591
 592 #endif