kernel/exit.c

   1 /*
   2  *  linux/kernel/exit.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6
   7 #include <linux/config.h>
   8 #include <linux/malloc.h>
   9 #include <linux/interrupt.h>
  10 #include <linux/smp_lock.h>
  11 #include <linux/module.h>
  12 #ifdef CONFIG_BSD_PROCESS_ACCT
  13 #include <linux/acct.h>
  14 #endif
  15
  16 #include <asm/uaccess.h>
  17 #include <asm/pgtable.h>
  18 #include <asm/mmu_context.h>
  19
  20 extern void sem_exit (void);
  21 extern struct task_struct *child_reaper;
  22
  23 int getrusage(struct task_struct *, int, struct rusage *);
  24
  25 void release(struct task_struct * p)
  26 {
  27         if (p != current) {
  28 #ifdef CONFIG_SMP
  29                 int has_cpu;
  30
  31                 /*
  32                  * Wait to make sure the process isn't on the
  33                  * runqueue (active on some other CPU still)
  34                  */
  35                 do {
  36                         spin_lock_irq(&runqueue_lock);
  37                         has_cpu = p->has_cpu;
  38                         spin_unlock_irq(&runqueue_lock);
  39                 } while (has_cpu);
  40 #endif
  41                 free_uid(p);
  42                 unhash_process(p);
  43
  44                 release_thread(p);
  45                 current->cmin_flt += p->min_flt + p->cmin_flt;
  46                 current->cmaj_flt += p->maj_flt + p->cmaj_flt;
  47                 current->cnswap += p->nswap + p->cnswap;
  48                 /*
  49                  * Potentially available timeslices are retrieved
  50                  * here - this way the parent does not get penalized
  51                  * for creating too many processes.
  52                  *
  53                  * (this cannot be used to artificially 'generate'
  54                  * timeslices, because any timeslice recovered here
  55                  * was given away by the parent in the first place.)
  56                  */
  57                 current->counter += p->counter;
  58                 if (current->counter >= MAX_COUNTER)
  59                         current->counter = MAX_COUNTER;
  60                 free_task_struct(p);
  61         } else {
  62                 printk("task releasing itself\n");
  63         }
  64 }
  65
  66 /*
  67  * This checks not only the pgrp, but falls back on the pid if no
  68  * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
  69  * without this...
  70  */
  71 int session_of_pgrp(int pgrp)
  72 {
  73         struct task_struct *p;
  74         int fallback;
  75
  76         fallback = -1;
  77         read_lock(&tasklist_lock);
  78         for_each_task(p) {
  79                 if (p->session <= 0)
  80                         continue;
  81                 if (p->pgrp == pgrp) {
  82                         fallback = p->session;
  83                         break;
  84                 }
  85                 if (p->pid == pgrp)
  86                         fallback = p->session;
  87         }
  88         read_unlock(&tasklist_lock);
  89         return fallback;
  90 }
  91
  92 /*
  93  * Determine if a process group is "orphaned", according to the POSIX
  94  * definition in 2.2.2.52.  Orphaned process groups are not to be affected
  95  * by terminal-generated stop signals.  Newly orphaned process groups are
  96  * to receive a SIGHUP and a SIGCONT.
  97  *
  98  * "I ask you, have you ever known what it is to be an orphan?"
  99  */
 100 static int will_become_orphaned_pgrp(int pgrp, struct task_struct * ignored_task)
 101 {
 102         struct task_struct *p;
 103
 104         read_lock(&tasklist_lock);
 105         for_each_task(p) {
 106                 if ((p == ignored_task) || (p->pgrp != pgrp) ||
 107                     (p->state == TASK_ZOMBIE) ||
 108                     (p->p_pptr->pid == 1))
 109                         continue;
 110                 if ((p->p_pptr->pgrp != pgrp) &&
 111                     (p->p_pptr->session == p->session)) {
 112                         read_unlock(&tasklist_lock);
 113                         return 0;
 114                 }
 115         }
 116         read_unlock(&tasklist_lock);
 117         return 1;       /* (sighing) "Often!" */
 118 }
 119
 120 int is_orphaned_pgrp(int pgrp)
 121 {
 122         return will_become_orphaned_pgrp(pgrp, 0);
 123 }
 124
 125 static inline int has_stopped_jobs(int pgrp)
 126 {
 127         int retval = 0;
 128         struct task_struct * p;
 129
 130         read_lock(&tasklist_lock);
 131         for_each_task(p) {
 132                 if (p->pgrp != pgrp)
 133                         continue;
 134                 if (p->state != TASK_STOPPED)
 135                         continue;
 136                 retval = 1;
 137                 break;
 138         }
 139         read_unlock(&tasklist_lock);
 140         return retval;
 141 }
 142
 143 static inline void forget_original_parent(struct task_struct * father)
 144 {
 145         struct task_struct * p;
 146
 147         read_lock(&tasklist_lock);
 148         for_each_task(p) {
 149                 if (p->p_opptr == father) {
 150                         /* We dont want people slaying init */
 151                         p->exit_signal = SIGCHLD;
 152                         p->self_exec_id++;
 153                         p->p_opptr = child_reaper; /* init */
 154                         if (p->pdeath_signal) send_sig(p->pdeath_signal, p, 0);
 155                 }
 156         }
 157         read_unlock(&tasklist_lock);
 158 }
 159
 160 static inline void close_files(struct files_struct * files)
 161 {
 162         int i, j;
 163
 164         j = 0;
 165         for (;;) {
 166                 unsigned long set;
 167                 i = j * __NFDBITS;
 168                 if (i >= files->max_fdset || i >= files->max_fds)
 169                         break;
 170                 set = files->open_fds->fds_bits[j++];
 171                 while (set) {
 172                         if (set & 1) {
 173                                 struct file * file = xchg(&files->fd[i], NULL);
 174                                 if (file)
 175                                         filp_close(file, files);
 176                         }
 177                         i++;
 178                         set >>= 1;
 179                 }
 180         }
 181 }
 182
 183 extern kmem_cache_t *files_cachep;
 184
 185 void put_files_struct(struct files_struct *files)
 186 {
 187         if (atomic_dec_and_test(&files->count)) {
 188                 close_files(files);
 189                 /*
 190                  * Free the fd and fdset arrays if we expanded them.
 191                  */
 192                 if (files->fd != &files->fd_array[0])
 193                         free_fd_array(files->fd, files->max_fds);
 194                 if (files->max_fdset > __FD_SETSIZE) {
 195                         free_fdset(files->open_fds, files->max_fdset);
 196                         free_fdset(files->close_on_exec, files->max_fdset);
 197                 }
 198                 kmem_cache_free(files_cachep, files);
 199         }
 200 }
 201
 202 static inline void __exit_files(struct task_struct *tsk)
 203 {
 204         struct files_struct * files = tsk->files;
 205
 206         if (files) {
 207                 task_lock(tsk);
 208                 tsk->files = NULL;
 209                 task_unlock(tsk);
 210                 put_files_struct(files);
 211         }
 212 }
 213
 214 void exit_files(struct task_struct *tsk)
 215 {
 216         __exit_files(tsk);
 217 }
 218 static inline void __put_fs_struct(struct fs_struct *fs)
 219 {
 220         /* No need to hold fs->lock if we are killing it */
 221         if (atomic_dec_and_test(&fs->count)) {
 222                 dput(fs->root);
 223                 mntput(fs->rootmnt);
 224                 dput(fs->pwd);
 225                 mntput(fs->pwdmnt);
 226                 if (fs->altroot) {
 227                         dput(fs->altroot);
 228                         mntput(fs->altrootmnt);
 229                 }
 230                 kfree(fs);
 231         }
 232 }
 233
 234 void put_fs_struct(struct fs_struct *fs)
 235 {
 236         __put_fs_struct(fs);
 237 }
 238
 239 static inline void __exit_fs(struct task_struct *tsk)
 240 {
 241         struct fs_struct * fs = tsk->fs;
 242
 243         if (fs) {
 244                 task_lock(tsk);
 245                 tsk->fs = NULL;
 246                 task_unlock(tsk);
 247                 __put_fs_struct(fs);
 248         }
 249 }
 250
 251 void exit_fs(struct task_struct *tsk)
 252 {
 253         __exit_fs(tsk);
 254 }
 255
 256 static inline void __exit_sighand(struct task_struct *tsk)
 257 {
 258         struct signal_struct * sig = tsk->sig;
 259
 260         if (sig) {
 261                 spin_lock_irq(&tsk->sigmask_lock);
 262                 tsk->sig = NULL;
 263                 spin_unlock_irq(&tsk->sigmask_lock);
 264                 if (atomic_dec_and_test(&sig->count))
 265                         kfree(sig);
 266         }
 267
 268         flush_signals(tsk);
 269 }
 270
 271 void exit_sighand(struct task_struct *tsk)
 272 {
 273         __exit_sighand(tsk);
 274 }
 275
 276 /*
 277  * We can use these to temporarily drop into
 278  * "lazy TLB" mode and back.
 279  */
 280 struct mm_struct * start_lazy_tlb(void)
 281 {
 282         struct mm_struct *mm = current->mm;
 283         current->mm = NULL;
 284         /* active_mm is still 'mm' */
 285         atomic_inc(&mm->mm_count);
 286         enter_lazy_tlb(mm, current, smp_processor_id());
 287         return mm;
 288 }
 289
 290 void end_lazy_tlb(struct mm_struct *mm)
 291 {
 292         struct mm_struct *active_mm = current->active_mm;
 293
 294         current->mm = mm;
 295         if (mm != active_mm) {
 296                 current->active_mm = mm;
 297                 activate_mm(active_mm, mm);
 298         }
 299         mmdrop(active_mm);
 300 }
 301
 302 /*
 303  * Turn us into a lazy TLB process if we
 304  * aren't already..
 305  */
 306 static inline void __exit_mm(struct task_struct * tsk)
 307 {
 308         struct mm_struct * mm = tsk->mm;
 309
 310         if (mm) {
 311                 atomic_inc(&mm->mm_count);
 312                 mm_release();
 313                 if (mm != tsk->active_mm) BUG();
 314                 /* more a memory barrier than a real lock */
 315                 task_lock(tsk);
 316                 tsk->mm = NULL;
 317                 task_unlock(tsk);
 318                 enter_lazy_tlb(mm, current, smp_processor_id());
 319                 mmput(mm);
 320         }
 321 }
 322
 323 void exit_mm(struct task_struct *tsk)
 324 {
 325         __exit_mm(tsk);
 326 }
 327
 328 /*
 329  * Send signals to all our closest relatives so that they know
 330  * to properly mourn us..
 331  */
 332 static void exit_notify(void)
 333 {
 334         struct task_struct * p, *t;
 335
 336         forget_original_parent(current);
 337         /*
 338          * Check to see if any process groups have become orphaned
 339          * as a result of our exiting, and if they have any stopped
 340          * jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
 341          *
 342          * Case i: Our father is in a different pgrp than we are
 343          * and we were the only connection outside, so our pgrp
 344          * is about to become orphaned.
 345          */
 346
 347         t = current->p_pptr;
 348
 349         if ((t->pgrp != current->pgrp) &&
 350             (t->session == current->session) &&
 351             will_become_orphaned_pgrp(current->pgrp, current) &&
 352             has_stopped_jobs(current->pgrp)) {
 353                 kill_pg(current->pgrp,SIGHUP,1);
 354                 kill_pg(current->pgrp,SIGCONT,1);
 355         }
 356
 357         /* Let father know we died
 358          *
 359          * Thread signals are configurable, but you aren't going to use
 360          * that to send signals to arbitary processes.
 361          * That stops right now.
 362          *
 363          * If the parent exec id doesn't match the exec id we saved
 364          * when we started then we know the parent has changed security
 365          * domain.
 366          *
 367          * If our self_exec id doesn't match our parent_exec_id then
 368          * we have changed execution domain as these two values started
 369          * the same after a fork.
 370          *
 371          */
 372
 373         if(current->exit_signal != SIGCHLD &&
 374             ( current->parent_exec_id != t->self_exec_id  ||
 375               current->self_exec_id != current->parent_exec_id)
 376             && !capable(CAP_KILL))
 377                 current->exit_signal = SIGCHLD;
 378
 379         notify_parent(current, current->exit_signal);
 380
 381         /*
 382          * This loop does two things:
 383          *
 384          * A.  Make init inherit all the child processes
 385          * B.  Check to see if any process groups have become orphaned
 386          *      as a result of our exiting, and if they have any stopped
 387          *      jobs, send them a SIGHUP and then a SIGCONT.  (POSIX 3.2.2.2)
 388          */
 389
 390         write_lock_irq(&tasklist_lock);
 391         while (current->p_cptr != NULL) {
 392                 p = current->p_cptr;
 393                 current->p_cptr = p->p_osptr;
 394                 p->p_ysptr = NULL;
 395                 p->ptrace = 0;
 396
 397                 p->p_pptr = p->p_opptr;
 398                 p->p_osptr = p->p_pptr->p_cptr;
 399                 if (p->p_osptr)
 400                         p->p_osptr->p_ysptr = p;
 401                 p->p_pptr->p_cptr = p;
 402                 if (p->state == TASK_ZOMBIE)
 403                         notify_parent(p, p->exit_signal);
 404                 /*
 405                  * process group orphan check
 406                  * Case ii: Our child is in a different pgrp
 407                  * than we are, and it was the only connection
 408                  * outside, so the child pgrp is now orphaned.
 409                  */
 410                 if ((p->pgrp != current->pgrp) &&
 411                     (p->session == current->session)) {
 412                         int pgrp = p->pgrp;
 413
 414                         write_unlock_irq(&tasklist_lock);
 415                         if (is_orphaned_pgrp(pgrp) && has_stopped_jobs(pgrp)) {
 416                                 kill_pg(pgrp,SIGHUP,1);
 417                                 kill_pg(pgrp,SIGCONT,1);
 418                         }
 419                         write_lock_irq(&tasklist_lock);
 420                 }
 421         }
 422         write_unlock_irq(&tasklist_lock);
 423
 424         if (current->leader)
 425                 disassociate_ctty(1);
 426 }
 427
 428 NORET_TYPE void do_exit(long code)
 429 {
 430         struct task_struct *tsk = current;
 431
 432         if (in_interrupt())
 433                 printk("Aiee, killing interrupt handler\n");
 434         if (!tsk->pid)
 435                 panic("Attempted to kill the idle task!");
 436         if (tsk->pid == 1)
 437                 panic("Attempted to kill init!");
 438         tsk->flags |= PF_EXITING;
 439         del_timer_sync(&tsk->real_timer);
 440
 441 fake_volatile:
 442 #ifdef CONFIG_BSD_PROCESS_ACCT
 443         acct_process(code);
 444 #endif
 445         lock_kernel();
 446         sem_exit();
 447         __exit_mm(tsk);
 448         __exit_files(tsk);
 449         __exit_fs(tsk);
 450         __exit_sighand(tsk);
 451         exit_thread();
 452         tsk->state = TASK_ZOMBIE;
 453         tsk->exit_code = code;
 454         exit_notify();
 455         put_exec_domain(tsk->exec_domain);
 456         if (tsk->binfmt && tsk->binfmt->module)
 457                 __MOD_DEC_USE_COUNT(tsk->binfmt->module);
 458         schedule();
 459 /*
 460  * In order to get rid of the "volatile function does return" message
 461  * I did this little loop that confuses gcc to think do_exit really
 462  * is volatile. In fact it's schedule() that is volatile in some
 463  * circumstances: when current->state = ZOMBIE, schedule() never
 464  * returns.
 465  *
 466  * In fact the natural way to do all this is to have the label and the
 467  * goto right after each other, but I put the fake_volatile label at
 468  * the start of the function just in case something /really/ bad
 469  * happens, and the schedule returns. This way we can try again. I'm
 470  * not paranoid: it's just that everybody is out to get me.
 471  */
 472         goto fake_volatile;
 473 }
 474
 475 asmlinkage long sys_exit(int error_code)
 476 {
 477         do_exit((error_code&0xff)<<8);
 478 }
 479
 480 asmlinkage long sys_wait4(pid_t pid,unsigned int * stat_addr, int options, struct rusage * ru)
 481 {
 482         int flag, retval;
 483         DECLARE_WAITQUEUE(wait, current);
 484         struct task_struct *p;
 485
 486         if (options & ~(WNOHANG|WUNTRACED|__WCLONE|__WALL))
 487                 return -EINVAL;
 488
 489         add_wait_queue(&current->wait_chldexit,&wait);
 490 repeat:
 491         flag = 0;
 492         current->state = TASK_INTERRUPTIBLE;
 493         read_lock(&tasklist_lock);
 494         for (p = current->p_cptr ; p ; p = p->p_osptr) {
 495                 if (pid>0) {
 496                         if (p->pid != pid)
 497                                 continue;
 498                 } else if (!pid) {
 499                         if (p->pgrp != current->pgrp)
 500                                 continue;
 501                 } else if (pid != -1) {
 502                         if (p->pgrp != -pid)
 503                                 continue;
 504                 }
 505                 /* Wait for all children (clone and not) if __WALL is set;
 506                  * otherwise, wait for clone children *only* if __WCLONE is
 507                  * set; otherwise, wait for non-clone children *only*.  (Note:
 508                  * A "clone" child here is one that reports to its parent
 509                  * using a signal other than SIGCHLD.) */
 510                 if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0))
 511                     && !(options & __WALL))
 512                         continue;
 513                 flag = 1;
 514                 switch (p->state) {
 515                         case TASK_STOPPED:
 516                                 if (!p->exit_code)
 517                                         continue;
 518                                 if (!(options & WUNTRACED) && !(p->ptrace & PT_PTRACED))
 519                                         continue;
 520                                 read_unlock(&tasklist_lock);
 521                                 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
 522                                 if (!retval && stat_addr)
 523                                         retval = put_user((p->exit_code << 8) | 0x7f, stat_addr);
 524                                 if (!retval) {
 525                                         p->exit_code = 0;
 526                                         retval = p->pid;
 527                                 }
 528                                 goto end_wait4;
 529                         case TASK_ZOMBIE:
 530                                 current->times.tms_cutime += p->times.tms_utime + p->times.tms_cutime;
 531                                 current->times.tms_cstime += p->times.tms_stime + p->times.tms_cstime;
 532                                 read_unlock(&tasklist_lock);
 533                                 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
 534                                 if (!retval && stat_addr)
 535                                         retval = put_user(p->exit_code, stat_addr);
 536                                 if (retval)
 537                                         goto end_wait4;
 538                                 retval = p->pid;
 539                                 if (p->p_opptr != p->p_pptr) {
 540                                         write_lock_irq(&tasklist_lock);
 541                                         REMOVE_LINKS(p);
 542                                         p->p_pptr = p->p_opptr;
 543                                         SET_LINKS(p);
 544                                         write_unlock_irq(&tasklist_lock);
 545                                         notify_parent(p, SIGCHLD);
 546                                 } else
 547                                         release(p);
 548                                 goto end_wait4;
 549                         default:
 550                                 continue;
 551                 }
 552         }
 553         read_unlock(&tasklist_lock);
 554         if (flag) {
 555                 retval = 0;
 556                 if (options & WNOHANG)
 557                         goto end_wait4;
 558                 retval = -ERESTARTSYS;
 559                 if (signal_pending(current))
 560                         goto end_wait4;
 561                 schedule();
 562                 goto repeat;
 563         }
 564         retval = -ECHILD;
 565 end_wait4:
 566         current->state = TASK_RUNNING;
 567         remove_wait_queue(&current->wait_chldexit,&wait);
 568         return retval;
 569 }
 570
 571 #if !defined(__alpha__) && !defined(__ia64__)
 572
 573 /*
 574  * sys_waitpid() remains for compatibility. waitpid() should be
 575  * implemented by calling sys_wait4() from libc.a.
 576  */
 577 asmlinkage long sys_waitpid(pid_t pid,unsigned int * stat_addr, int options)
 578 {
 579         return sys_wait4(pid, stat_addr, options, NULL);
 580 }
 581
 582 #endif