sys/kern/kern_exit.c

   1 /*
   2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
   3  *      The Regents of the University of California.  All rights reserved.
   4  * (c) UNIX System Laboratories, Inc.
   5  * All or some portions of this file are derived from material licensed
   6  * to the University of California by American Telephone and Telegraph
   7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   8  * the permission of UNIX System Laboratories, Inc.
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  * 1. Redistributions of source code must retain the above copyright
  14  *    notice, this list of conditions and the following disclaimer.
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  * 3. All advertising materials mentioning features or use of this software
  19  *    must display the following acknowledgement:
  20  *      This product includes software developed by the University of
  21  *      California, Berkeley and its contributors.
  22  * 4. Neither the name of the University nor the names of its contributors
  23  *    may be used to endorse or promote products derived from this software
  24  *    without specific prior written permission.
  25  *
  26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  36  * SUCH DAMAGE.
  37  *
  38  *      @(#)kern_exit.c 8.7 (Berkeley) 2/12/94
  39  * $FreeBSD: src/sys/kern/kern_exit.c,v 1.92.2.11 2003/01/13 22:51:16 dillon Exp $
  40  * $DragonFly: src/sys/kern/kern_exit.c,v 1.79 2007/03/12 21:05:48 corecode Exp $
  41  */
  42
  43 #include "opt_compat.h"
  44 #include "opt_ktrace.h"
  45
  46 #include <sys/param.h>
  47 #include <sys/systm.h>
  48 #include <sys/sysproto.h>
  49 #include <sys/kernel.h>
  50 #include <sys/malloc.h>
  51 #include <sys/proc.h>
  52 #include <sys/ktrace.h>
  53 #include <sys/pioctl.h>
  54 #include <sys/tty.h>
  55 #include <sys/wait.h>
  56 #include <sys/vnode.h>
  57 #include <sys/resourcevar.h>
  58 #include <sys/signalvar.h>
  59 #include <sys/taskqueue.h>
  60 #include <sys/ptrace.h>
  61 #include <sys/acct.h>           /* for acct_process() function prototype */
  62 #include <sys/filedesc.h>
  63 #include <sys/shm.h>
  64 #include <sys/sem.h>
  65 #include <sys/aio.h>
  66 #include <sys/jail.h>
  67 #include <sys/kern_syscall.h>
  68 #include <sys/upcall.h>
  69 #include <sys/caps.h>
  70 #include <sys/unistd.h>
  71
  72 #include <vm/vm.h>
  73 #include <vm/vm_param.h>
  74 #include <sys/lock.h>
  75 #include <vm/pmap.h>
  76 #include <vm/vm_map.h>
  77 #include <vm/vm_zone.h>
  78 #include <vm/vm_extern.h>
  79 #include <sys/user.h>
  80
  81 #include <sys/thread2.h>
  82
  83 static MALLOC_DEFINE(M_ATEXIT, "atexit", "atexit callback");
  84 static MALLOC_DEFINE(M_ZOMBIE, "zombie", "zombie proc status");
  85
  86 /*
  87  * callout list for things to do at exit time
  88  */
  89 struct exitlist {
  90         exitlist_fn function;
  91         TAILQ_ENTRY(exitlist) next;
  92 };
  93
  94 TAILQ_HEAD(exit_list_head, exitlist);
  95 static struct exit_list_head exit_list = TAILQ_HEAD_INITIALIZER(exit_list);
  96
  97 /*
  98  * LWP reaper data
  99  */
 100 struct task *deadlwp_task[MAXCPU];
 101 struct lwplist deadlwp_list[MAXCPU];
 102
 103 /*
 104  * exit --
 105  *      Death of process.
 106  *
 107  * SYS_EXIT_ARGS(int rval)
 108  */
 109 int
 110 sys_exit(struct exit_args *uap)
 111 {
 112         exit1(W_EXITCODE(uap->rval, 0));
 113         /* NOTREACHED */
 114 }
 115
 116 /*
 117  * Extended exit --
 118  *      Death of a lwp or process with optional bells and whistles.
 119  */
 120 int
 121 sys_extexit(struct extexit_args *uap)
 122 {
 123         int action, who;
 124         int error;
 125
 126         action = EXTEXIT_ACTION(uap->how);
 127         who = EXTEXIT_WHO(uap->how);
 128
 129         /* Check parameters before we might perform some action */
 130         switch (who) {
 131         case EXTEXIT_PROC:
 132         case EXTEXIT_LWP:
 133                 break;
 134
 135         default:
 136                 return (EINVAL);
 137         }
 138
 139         switch (action) {
 140         case EXTEXIT_SIMPLE:
 141                 break;
 142
 143         case EXTEXIT_SETINT:
 144                 error = copyout(&uap->status, uap->addr, sizeof(uap->status));
 145                 if (error)
 146                         return (error);
 147                 break;
 148
 149         default:
 150                 return (EINVAL);
 151         }
 152
 153         switch (who) {
 154         case EXTEXIT_LWP:
 155                 /*
 156                  * Be sure only to perform a simple lwp exit if there is at
 157                  * least one more lwp in the proc, which will call exit1()
 158                  * later, otherwise the proc will be an UNDEAD and not even a
 159                  * SZOMB!
 160                  */
 161                 if (curproc->p_nthreads > 1)
 162                         lwp_exit();
 163                 /* else last lwp in proc:  do the real thing */
 164                 /* FALLTHROUGH */
 165
 166         default:        /* to help gcc */
 167         case EXTEXIT_PROC:
 168                 exit1(W_EXITCODE(uap->status, 0));
 169                 /* NOTREACHED */
 170         }
 171
 172         /* NOTREACHED */
 173 }
 174
 175 void
 176 killlwps(struct lwp *lp)
 177 {
 178         struct proc *p = lp->lwp_proc;
 179         struct lwp *tlp;
 180
 181         KKASSERT((lp->lwp_flag & LWP_WEXIT) == 0);
 182
 183         FOREACH_LWP_IN_PROC(tlp, p) {
 184                 if (tlp == lp)
 185                         continue;       /* don't kill the current lwp */
 186                 tlp->lwp_flag |= LWP_WEXIT;
 187                 lwp_signotify(tlp);
 188         }
 189
 190         while (p->p_nthreads > 1) {
 191                 if (bootverbose)
 192                         kprintf("killlwps: waiting for %d lwps of pid %d to die\n",
 193                                 p->p_nthreads - 1, p->p_pid);
 194                 tsleep(&p->p_nthreads, 0, "killlwps", hz);
 195         }
 196 }
 197
 198 /*
 199  * Exit: deallocate address space and other resources, change proc state
 200  * to zombie, and unlink proc from allproc and parent's lists.  Save exit
 201  * status and rusage for wait().  Check for child processes and orphan them.
 202  */
 203 void
 204 exit1(int rv)
 205 {
 206         struct thread *td = curthread;
 207         struct proc *p = td->td_proc;
 208         struct lwp *lp = td->td_lwp;
 209         struct proc *q, *nq;
 210         struct vmspace *vm;
 211         struct vnode *vtmp;
 212         struct exitlist *ep;
 213
 214         if (p->p_pid == 1) {
 215                 kprintf("init died (signal %d, exit %d)\n",
 216                     WTERMSIG(rv), WEXITSTATUS(rv));
 217                 panic("Going nowhere without my init!");
 218         }
 219
 220         /*
 221          * Kill all other threads if there are any.
 222          *
 223          * If some other thread initiated our exit, do so.
 224          */
 225         if (lp->lwp_flag & LWP_WEXIT) {
 226                 KKASSERT(p->p_nthreads > 1);
 227                 lwp_exit();
 228         }
 229         if (p->p_nthreads > 1)
 230                 killlwps(lp);
 231
 232         caps_exit(lp->lwp_thread);
 233         aio_proc_rundown(p);
 234
 235         /* are we a task leader? */
 236         if(p == p->p_leader) {
 237                 struct kill_args killArgs;
 238                 killArgs.signum = SIGKILL;
 239                 q = p->p_peers;
 240                 while(q) {
 241                         killArgs.pid = q->p_pid;
 242                         /*
 243                          * The interface for kill is better
 244                          * than the internal signal
 245                          */
 246                         sys_kill(&killArgs);
 247                         nq = q;
 248                         q = q->p_peers;
 249                 }
 250                 while (p->p_peers)
 251                   tsleep((caddr_t)p, 0, "exit1", 0);
 252         }
 253
 254 #ifdef PGINPROF
 255         vmsizmon();
 256 #endif
 257         STOPEVENT(p, S_EXIT, rv);
 258         wakeup(&p->p_stype);    /* Wakeup anyone in procfs' PIOCWAIT */
 259
 260         /*
 261          * Check if any loadable modules need anything done at process exit.
 262          * e.g. SYSV IPC stuff
 263          * XXX what if one of these generates an error?
 264          */
 265         TAILQ_FOREACH(ep, &exit_list, next)
 266                 (*ep->function)(td);
 267
 268         if (p->p_flag & P_PROFIL)
 269                 stopprofclock(p);
 270         /*
 271          * If parent is waiting for us to exit or exec,
 272          * P_PPWAIT is set; we will wakeup the parent below.
 273          */
 274         p->p_flag &= ~(P_TRACED | P_PPWAIT);
 275         p->p_flag |= P_WEXIT;
 276         SIGEMPTYSET(p->p_siglist);
 277         SIGEMPTYSET(lp->lwp_siglist);
 278         if (timevalisset(&p->p_realtimer.it_value))
 279                 callout_stop(&p->p_ithandle);
 280
 281         /*
 282          * Reset any sigio structures pointing to us as a result of
 283          * F_SETOWN with our pid.
 284          */
 285         funsetownlst(&p->p_sigiolst);
 286
 287         /*
 288          * Close open files and release open-file table.
 289          * This may block!
 290          */
 291         fdfree(p);
 292         p->p_fd = NULL;
 293
 294         if(p->p_leader->p_peers) {
 295                 q = p->p_leader;
 296                 while(q->p_peers != p)
 297                         q = q->p_peers;
 298                 q->p_peers = p->p_peers;
 299                 wakeup((caddr_t)p->p_leader);
 300         }
 301
 302         /*
 303          * XXX Shutdown SYSV semaphores
 304          */
 305         semexit(p);
 306
 307         KKASSERT(p->p_numposixlocks == 0);
 308
 309         /* The next two chunks should probably be moved to vmspace_exit. */
 310         vm = p->p_vmspace;
 311
 312         /*
 313          * Release upcalls associated with this process
 314          */
 315         if (vm->vm_upcalls)
 316                 upc_release(vm, lp);
 317
 318         /* clean up data related to virtual kernel operation */
 319         if (p->p_vkernel)
 320                 vkernel_exit(p);
 321
 322         /*
 323          * Release user portion of address space.
 324          * This releases references to vnodes,
 325          * which could cause I/O if the file has been unlinked.
 326          * Need to do this early enough that we can still sleep.
 327          * Can't free the entire vmspace as the kernel stack
 328          * may be mapped within that space also.
 329          *
 330          * Processes sharing the same vmspace may exit in one order, and
 331          * get cleaned up by vmspace_exit() in a different order.  The
 332          * last exiting process to reach this point releases as much of
 333          * the environment as it can, and the last process cleaned up
 334          * by vmspace_exit() (which decrements exitingcnt) cleans up the
 335          * remainder.
 336          */
 337         ++vm->vm_exitingcnt;
 338         if (--vm->vm_refcnt == 0) {
 339                 shmexit(vm);
 340                 pmap_remove_pages(vmspace_pmap(vm), VM_MIN_USER_ADDRESS,
 341                                   VM_MAX_USER_ADDRESS);
 342                 vm_map_remove(&vm->vm_map, VM_MIN_USER_ADDRESS,
 343                               VM_MAX_USER_ADDRESS);
 344         }
 345
 346         if (SESS_LEADER(p)) {
 347                 struct session *sp = p->p_session;
 348                 struct vnode *vp;
 349
 350                 if (sp->s_ttyvp) {
 351                         /*
 352                          * We are the controlling process.  Signal the
 353                          * foreground process group, drain the controlling
 354                          * terminal, and revoke access to the controlling
 355                          * terminal.
 356                          *
 357                          * NOTE: while waiting for the process group to exit
 358                          * it is possible that one of the processes in the
 359                          * group will revoke the tty, so we have to recheck.
 360                          */
 361                         if (sp->s_ttyp && (sp->s_ttyp->t_session == sp)) {
 362                                 if (sp->s_ttyp->t_pgrp)
 363                                         pgsignal(sp->s_ttyp->t_pgrp, SIGHUP, 1);
 364                                 (void) ttywait(sp->s_ttyp);
 365                                 /*
 366                                  * The tty could have been revoked
 367                                  * if we blocked.
 368                                  */
 369                                 if ((vp = sp->s_ttyvp) != NULL) {
 370                                         ttyclosesession(sp, 0);
 371                                         vx_lock(vp);
 372                                         VOP_REVOKE(vp, REVOKEALL);
 373                                         vx_unlock(vp);
 374                                         vrele(vp);      /* s_ttyvp ref */
 375                                 }
 376                         }
 377                         /*
 378                          * Release the tty.  If someone has it open via
 379                          * /dev/tty then close it (since they no longer can
 380                          * once we've NULL'd it out).
 381                          */
 382                         if (sp->s_ttyvp)
 383                                 ttyclosesession(sp, 1);
 384                         /*
 385                          * s_ttyp is not zero'd; we use this to indicate
 386                          * that the session once had a controlling terminal.
 387                          * (for logging and informational purposes)
 388                          */
 389                 }
 390                 sp->s_leader = NULL;
 391         }
 392         fixjobc(p, p->p_pgrp, 0);
 393         (void)acct_process(p);
 394 #ifdef KTRACE
 395         /*
 396          * release trace file
 397          */
 398         if (p->p_tracenode)
 399                 ktrdestroy(&p->p_tracenode);
 400         p->p_traceflag = 0;
 401 #endif
 402         /*
 403          * Release reference to text vnode
 404          */
 405         if ((vtmp = p->p_textvp) != NULL) {
 406                 p->p_textvp = NULL;
 407                 vrele(vtmp);
 408         }
 409
 410         /*
 411          * Move the process to the zombie list.  This will block
 412          * until the process p_lock count reaches 0.  The process will
 413          * not be reaped until TDF_EXITING is set by cpu_thread_exit(),
 414          * which is called from cpu_proc_exit().
 415          */
 416         proc_move_allproc_zombie(p);
 417
 418         q = LIST_FIRST(&p->p_children);
 419         if (q)          /* only need this if any child is S_ZOMB */
 420                 wakeup((caddr_t) initproc);
 421         for (; q != 0; q = nq) {
 422                 nq = LIST_NEXT(q, p_sibling);
 423                 LIST_REMOVE(q, p_sibling);
 424                 LIST_INSERT_HEAD(&initproc->p_children, q, p_sibling);
 425                 q->p_pptr = initproc;
 426                 q->p_sigparent = SIGCHLD;
 427                 /*
 428                  * Traced processes are killed
 429                  * since their existence means someone is screwing up.
 430                  */
 431                 if (q->p_flag & P_TRACED) {
 432                         q->p_flag &= ~P_TRACED;
 433                         ksignal(q, SIGKILL);
 434                 }
 435         }
 436
 437         /*
 438          * Save exit status and final rusage info, adding in child rusage
 439          * info and self times.
 440          */
 441         p->p_xstat = rv;
 442         calcru_proc(p, &p->p_ru);
 443         ruadd(&p->p_ru, &p->p_cru);
 444
 445         /*
 446          * notify interested parties of our demise.
 447          */
 448         KNOTE(&p->p_klist, NOTE_EXIT);
 449
 450         /*
 451          * Notify parent that we're gone.  If parent has the PS_NOCLDWAIT
 452          * flag set, notify process 1 instead (and hope it will handle
 453          * this situation).
 454          */
 455         if (p->p_pptr->p_sigacts->ps_flag & PS_NOCLDWAIT) {
 456                 struct proc *pp = p->p_pptr;
 457                 proc_reparent(p, initproc);
 458                 /*
 459                  * If this was the last child of our parent, notify
 460                  * parent, so in case he was wait(2)ing, he will
 461                  * continue.
 462                  */
 463                 if (LIST_EMPTY(&pp->p_children))
 464                         wakeup((caddr_t)pp);
 465         }
 466
 467         if (p->p_sigparent && p->p_pptr != initproc) {
 468                 ksignal(p->p_pptr, p->p_sigparent);
 469         } else {
 470                 ksignal(p->p_pptr, SIGCHLD);
 471         }
 472
 473         wakeup((caddr_t)p->p_pptr);
 474         /*
 475          * cpu_exit is responsible for clearing curproc, since
 476          * it is heavily integrated with the thread/switching sequence.
 477          *
 478          * Other substructures are freed from wait().
 479          */
 480         plimit_free(&p->p_limit);
 481
 482         /*
 483          * Release the current user process designation on the process so
 484          * the userland scheduler can work in someone else.
 485          */
 486         p->p_usched->release_curproc(lp);
 487
 488         /*
 489          * Finally, call machine-dependent code to release the remaining
 490          * resources including address space, the kernel stack and pcb.
 491          * The address space is released by "vmspace_free(p->p_vmspace)";
 492          * This is machine-dependent, as we may have to change stacks
 493          * or ensure that the current one isn't reallocated before we
 494          * finish.  cpu_exit will end with a call to cpu_switch(), finishing
 495          * our execution (pun intended).
 496          */
 497         lwp_exit();
 498 }
 499
 500 void
 501 lwp_exit(void)
 502 {
 503         struct lwp *lp = curthread->td_lwp;
 504         struct proc *p = lp->lwp_proc;
 505
 506         /*
 507          * Nobody actually wakes us when the lock
 508          * count reaches zero, so just wait one tick.
 509          */
 510         while (lp->lwp_lock > 0)
 511                 tsleep(lp, 0, "lwpexit", 1);
 512
 513         /* Hand down resource usage to our proc */
 514         ruadd(&p->p_ru, &lp->lwp_ru);
 515
 516         --p->p_nthreads;
 517         LIST_REMOVE(lp, lwp_list);
 518         wakeup(&p->p_nthreads);
 519         LIST_INSERT_HEAD(&deadlwp_list[mycpuid], lp, lwp_list);
 520         taskqueue_enqueue(taskqueue_thread[mycpuid], deadlwp_task[mycpuid]);
 521         cpu_lwp_exit();
 522 }
 523
 524 /*
 525  * Wait until a lwp is completely dead.
 526  *
 527  * If the thread is still executing, which can't be waited upon,
 528  * return failure.  The caller is responsible of waiting a little
 529  * bit and checking again.
 530  *
 531  * Suggested use:
 532  * while (!lwp_wait(lp))
 533  *      tsleep(lp, 0, "lwpwait", 1);
 534  */
 535 static int
 536 lwp_wait(struct lwp *lp)
 537 {
 538         struct thread *td = lp->lwp_thread;;
 539
 540         KKASSERT(lwkt_preempted_proc() != lp);
 541
 542         while (lp->lwp_lock > 0)
 543                 tsleep(lp, 0, "lwpwait1", 1);
 544
 545         lwkt_wait_free(td);
 546
 547         /*
 548          * The lwp's thread may still be in the middle
 549          * of switching away, we can't rip its stack out from
 550          * under it until TDF_EXITING is set and both
 551          * TDF_RUNNING and TDF_PREEMPT_LOCK are clear.
 552          * TDF_PREEMPT_LOCK must be checked because TDF_RUNNING
 553          * will be cleared temporarily if a thread gets
 554          * preempted.
 555          *
 556          * YYY no wakeup occurs, so we simply return failure
 557          * and let the caller deal with sleeping and calling
 558          * us again.
 559          */
 560         if ((td->td_flags & (TDF_RUNNING|TDF_PREEMPT_LOCK|TDF_EXITING)) !=
 561             TDF_EXITING)
 562                 return (0);
 563
 564         return (1);
 565 }
 566
 567 /*
 568  * Release the resources associated with a lwp.
 569  * The lwp must be completely dead.
 570  */
 571 void
 572 lwp_dispose(struct lwp *lp)
 573 {
 574         struct thread *td = lp->lwp_thread;;
 575
 576         KKASSERT(lwkt_preempted_proc() != lp);
 577         KKASSERT(td->td_refs == 0);
 578         KKASSERT((td->td_flags & (TDF_RUNNING|TDF_PREEMPT_LOCK|TDF_EXITING)) ==
 579                  TDF_EXITING);
 580
 581         if (td != NULL) {
 582                 td->td_proc = NULL;
 583                 td->td_lwp = NULL;
 584                 lp->lwp_thread = NULL;
 585                 lwkt_free_thread(td);
 586         }
 587         zfree(lwp_zone, lp);
 588 }
 589
 590 int
 591 sys_wait4(struct wait_args *uap)
 592 {
 593         struct rusage rusage;
 594         int error, status;
 595
 596         error = kern_wait(uap->pid, uap->status ? &status : NULL,
 597             uap->options, uap->rusage ? &rusage : NULL, &uap->sysmsg_fds[0]);
 598
 599         if (error == 0 && uap->status)
 600                 error = copyout(&status, uap->status, sizeof(*uap->status));
 601         if (error == 0 && uap->rusage)
 602                 error = copyout(&rusage, uap->rusage, sizeof(*uap->rusage));
 603         return (error);
 604 }
 605
 606 /*
 607  * wait1()
 608  *
 609  * wait_args(int pid, int *status, int options, struct rusage *rusage)
 610  */
 611 int
 612 kern_wait(pid_t pid, int *status, int options, struct rusage *rusage, int *res)
 613 {
 614         struct thread *td = curthread;
 615         struct proc *q = td->td_proc;
 616         struct proc *p, *t;
 617         int nfound, error;
 618
 619         if (pid == 0)
 620                 pid = -q->p_pgid;
 621         if (options &~ (WUNTRACED|WNOHANG|WLINUXCLONE))
 622                 return (EINVAL);
 623 loop:
 624         /*
 625          * Hack for backwards compatibility with badly written user code.
 626          * Or perhaps we have to do this anyway, it is unclear. XXX
 627          *
 628          * The problem is that if a process group is stopped and the parent
 629          * is doing a wait*(..., WUNTRACED, ...), it will see the STOP
 630          * of the child and then stop itself when it tries to return from the
 631          * system call.  When the process group is resumed the parent will
 632          * then get the STOP status even though the child has now resumed
 633          * (a followup wait*() will get the CONT status).
 634          *
 635          * Previously the CONT would overwrite the STOP because the tstop
 636          * was handled within tsleep(), and the parent would only see
 637          * the CONT when both are stopped and continued together.  This litte
 638          * two-line hack restores this effect.
 639          */
 640         while (q->p_stat == SSTOP)
 641             tstop();
 642
 643         nfound = 0;
 644         LIST_FOREACH(p, &q->p_children, p_sibling) {
 645                 if (pid != WAIT_ANY &&
 646                     p->p_pid != pid && p->p_pgid != -pid)
 647                         continue;
 648
 649                 /* This special case handles a kthread spawned by linux_clone
 650                  * (see linux_misc.c).  The linux_wait4 and linux_waitpid
 651                  * functions need to be able to distinguish between waiting
 652                  * on a process and waiting on a thread.  It is a thread if
 653                  * p_sigparent is not SIGCHLD, and the WLINUXCLONE option
 654                  * signifies we want to wait for threads and not processes.
 655                  */
 656                 if ((p->p_sigparent != SIGCHLD) ^
 657                     ((options & WLINUXCLONE) != 0)) {
 658                         continue;
 659                 }
 660
 661                 nfound++;
 662                 if (p->p_stat == SZOMB) {
 663                         /*
 664                          * Other kernel threads may be in the middle of
 665                          * accessing the proc.  For example, kern/kern_proc.c
 666                          * could be blocked writing proc data to a sysctl.
 667                          * At the moment, if this occurs, we are not woken
 668                          * up and rely on a one-second retry.
 669                          */
 670                         while (p->p_lock)
 671                                 tsleep(p, 0, "reap3", hz);
 672
 673                         /* scheduling hook for heuristic */
 674                         /* XXX no lwp available, we need a different heuristic */
 675                         /*
 676                         p->p_usched->heuristic_exiting(td->td_lwp, deadlp);
 677                         */
 678
 679                         /* Take care of our return values. */
 680                         *res = p->p_pid;
 681                         if (status)
 682                                 *status = p->p_xstat;
 683                         if (rusage)
 684                                 *rusage = p->p_ru;
 685                         /*
 686                          * If we got the child via a ptrace 'attach',
 687                          * we need to give it back to the old parent.
 688                          */
 689                         if (p->p_oppid && (t = pfind(p->p_oppid))) {
 690                                 p->p_oppid = 0;
 691                                 proc_reparent(p, t);
 692                                 ksignal(t, SIGCHLD);
 693                                 wakeup((caddr_t)t);
 694                                 return (0);
 695                         }
 696                         p->p_xstat = 0;
 697                         ruadd(&q->p_cru, &p->p_ru);
 698
 699                         /*
 700                          * Decrement the count of procs running with this uid.
 701                          */
 702                         chgproccnt(p->p_ucred->cr_ruidinfo, -1, 0);
 703
 704                         /*
 705                          * Free up credentials.
 706                          */
 707                         crfree(p->p_ucred);
 708                         p->p_ucred = NULL;
 709
 710                         /*
 711                          * Remove unused arguments
 712                          */
 713                         if (p->p_args && --p->p_args->ar_ref == 0)
 714                                 FREE(p->p_args, M_PARGS);
 715
 716                         /*
 717                          * Finally finished with old proc entry.
 718                          * Unlink it from its process group and free it.
 719                          */
 720                         proc_remove_zombie(p);
 721                         leavepgrp(p);
 722
 723                         if (--p->p_sigacts->ps_refcnt == 0) {
 724                                 kfree(p->p_sigacts, M_SUBPROC);
 725                                 p->p_sigacts = NULL;
 726                         }
 727
 728                         vm_waitproc(p);
 729                         zfree(proc_zone, p);
 730                         nprocs--;
 731                         return (0);
 732                 }
 733                 if (p->p_stat == SSTOP && (p->p_flag & P_WAITED) == 0 &&
 734                     (p->p_flag & P_TRACED || options & WUNTRACED)) {
 735                         p->p_flag |= P_WAITED;
 736
 737                         *res = p->p_pid;
 738                         if (status)
 739                                 *status = W_STOPCODE(p->p_xstat);
 740                         /* Zero rusage so we get something consistent. */
 741                         if (rusage)
 742                                 bzero(rusage, sizeof(rusage));
 743                         return (0);
 744                 }
 745         }
 746         if (nfound == 0)
 747                 return (ECHILD);
 748         if (options & WNOHANG) {
 749                 *res = 0;
 750                 return (0);
 751         }
 752         error = tsleep((caddr_t)q, PCATCH, "wait", 0);
 753         if (error)
 754                 return (error);
 755         goto loop;
 756 }
 757
 758 /*
 759  * make process 'parent' the new parent of process 'child'.
 760  */
 761 void
 762 proc_reparent(struct proc *child, struct proc *parent)
 763 {
 764
 765         if (child->p_pptr == parent)
 766                 return;
 767
 768         LIST_REMOVE(child, p_sibling);
 769         LIST_INSERT_HEAD(&parent->p_children, child, p_sibling);
 770         child->p_pptr = parent;
 771 }
 772
 773 /*
 774  * The next two functions are to handle adding/deleting items on the
 775  * exit callout list
 776  *
 777  * at_exit():
 778  * Take the arguments given and put them onto the exit callout list,
 779  * However first make sure that it's not already there.
 780  * returns 0 on success.
 781  */
 782
 783 int
 784 at_exit(exitlist_fn function)
 785 {
 786         struct exitlist *ep;
 787
 788 #ifdef INVARIANTS
 789         /* Be noisy if the programmer has lost track of things */
 790         if (rm_at_exit(function))
 791                 kprintf("WARNING: exit callout entry (%p) already present\n",
 792                     function);
 793 #endif
 794         ep = kmalloc(sizeof(*ep), M_ATEXIT, M_NOWAIT);
 795         if (ep == NULL)
 796                 return (ENOMEM);
 797         ep->function = function;
 798         TAILQ_INSERT_TAIL(&exit_list, ep, next);
 799         return (0);
 800 }
 801
 802 /*
 803  * Scan the exit callout list for the given item and remove it.
 804  * Returns the number of items removed (0 or 1)
 805  */
 806 int
 807 rm_at_exit(exitlist_fn function)
 808 {
 809         struct exitlist *ep;
 810
 811         TAILQ_FOREACH(ep, &exit_list, next) {
 812                 if (ep->function == function) {
 813                         TAILQ_REMOVE(&exit_list, ep, next);
 814                         kfree(ep, M_ATEXIT);
 815                         return(1);
 816                 }
 817         }
 818         return (0);
 819 }
 820
 821
 822 /*
 823  * LWP reaper related code.
 824  */
 825
 826 static void
 827 reaplwps(void *context, int dummy)
 828 {
 829         struct lwplist *lwplist = context;
 830         struct lwp *lp;
 831
 832         while ((lp = LIST_FIRST(lwplist))) {
 833                 if (!lwp_wait(lp))
 834                         tsleep(lp, 0, "lwpreap", 1);
 835                 LIST_REMOVE(lp, lwp_list);
 836                 lwp_dispose(lp);
 837         }
 838 }
 839
 840 static void
 841 deadlwp_init(void)
 842 {
 843         int cpu;
 844
 845         for (cpu = 0; cpu < ncpus; cpu++) {
 846                 LIST_INIT(&deadlwp_list[cpu]);
 847                 deadlwp_task[cpu] = kmalloc(sizeof(*deadlwp_task[cpu]), M_DEVBUF, M_WAITOK);
 848                 TASK_INIT(deadlwp_task[cpu], 0, reaplwps, &deadlwp_list[cpu]);
 849         }
 850 }
 851
 852 SYSINIT(deadlwpinit, SI_SUB_CONFIGURE, SI_ORDER_ANY, deadlwp_init, NULL);