sys/kern/kern_fork.c

   1 /*
   2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
   3  *      The Regents of the University of California.  All rights reserved.
   4  * (c) UNIX System Laboratories, Inc.
   5  * All or some portions of this file are derived from material licensed
   6  * to the University of California by American Telephone and Telegraph
   7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   8  * the permission of UNIX System Laboratories, Inc.
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  * 1. Redistributions of source code must retain the above copyright
  14  *    notice, this list of conditions and the following disclaimer.
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  * 3. All advertising materials mentioning features or use of this software
  19  *    must display the following acknowledgement:
  20  *      This product includes software developed by the University of
  21  *      California, Berkeley and its contributors.
  22  * 4. Neither the name of the University nor the names of its contributors
  23  *    may be used to endorse or promote products derived from this software
  24  *    without specific prior written permission.
  25  *
  26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  36  * SUCH DAMAGE.
  37  *
  38  *      @(#)kern_fork.c 8.6 (Berkeley) 4/8/94
  39  * $FreeBSD: src/sys/kern/kern_fork.c,v 1.72.2.14 2003/06/26 04:15:10 silby Exp $
  40  * $DragonFly: src/sys/kern/kern_fork.c,v 1.32 2005/01/31 22:29:59 joerg Exp $
  41  */
  42
  43 #include "opt_ktrace.h"
  44
  45 #include <sys/param.h>
  46 #include <sys/systm.h>
  47 #include <sys/sysproto.h>
  48 #include <sys/filedesc.h>
  49 #include <sys/kernel.h>
  50 #include <sys/sysctl.h>
  51 #include <sys/malloc.h>
  52 #include <sys/proc.h>
  53 #include <sys/resourcevar.h>
  54 #include <sys/vnode.h>
  55 #include <sys/acct.h>
  56 #include <sys/ktrace.h>
  57 #include <sys/unistd.h>
  58 #include <sys/jail.h>
  59 #include <sys/caps.h>
  60
  61 #include <vm/vm.h>
  62 #include <sys/lock.h>
  63 #include <vm/pmap.h>
  64 #include <vm/vm_map.h>
  65 #include <vm/vm_extern.h>
  66 #include <vm/vm_zone.h>
  67
  68 #include <sys/vmmeter.h>
  69 #include <sys/user.h>
  70
  71 static MALLOC_DEFINE(M_ATFORK, "atfork", "atfork callback");
  72
  73 /*
  74  * These are the stuctures used to create a callout list for things to do
  75  * when forking a process
  76  */
  77 struct forklist {
  78         forklist_fn function;
  79         TAILQ_ENTRY(forklist) next;
  80 };
  81
  82 TAILQ_HEAD(forklist_head, forklist);
  83 static struct forklist_head fork_list = TAILQ_HEAD_INITIALIZER(fork_list);
  84
  85 int forksleep; /* Place for fork1() to sleep on. */
  86
  87 /* ARGSUSED */
  88 int
  89 fork(struct fork_args *uap)
  90 {
  91         struct proc *p = curproc;
  92         struct proc *p2;
  93         int error;
  94
  95         error = fork1(p, RFFDG | RFPROC, &p2);
  96         if (error == 0) {
  97                 start_forked_proc(p, p2);
  98                 uap->sysmsg_fds[0] = p2->p_pid;
  99                 uap->sysmsg_fds[1] = 0;
 100         }
 101         return error;
 102 }
 103
 104 /* ARGSUSED */
 105 int
 106 vfork(struct vfork_args *uap)
 107 {
 108         struct proc *p = curproc;
 109         struct proc *p2;
 110         int error;
 111
 112         error = fork1(p, RFFDG | RFPROC | RFPPWAIT | RFMEM, &p2);
 113         if (error == 0) {
 114                 start_forked_proc(p, p2);
 115                 uap->sysmsg_fds[0] = p2->p_pid;
 116                 uap->sysmsg_fds[1] = 0;
 117         }
 118         return error;
 119 }
 120
 121 /*
 122  * Handle rforks.  An rfork may (1) operate on the current process without
 123  * creating a new, (2) create a new process that shared the current process's
 124  * vmspace, signals, and/or descriptors, or (3) create a new process that does
 125  * not share these things (normal fork).
 126  *
 127  * Note that we only call start_forked_proc() if a new process is actually
 128  * created.
 129  *
 130  * rfork { int flags }
 131  */
 132 int
 133 rfork(struct rfork_args *uap)
 134 {
 135         struct proc *p = curproc;
 136         struct proc *p2;
 137         int error;
 138
 139         if ((uap->flags & RFKERNELONLY) != 0)
 140                 return (EINVAL);
 141
 142         error = fork1(p, uap->flags, &p2);
 143         if (error == 0) {
 144                 if (p2)
 145                         start_forked_proc(p, p2);
 146                 uap->sysmsg_fds[0] = p2 ? p2->p_pid : 0;
 147                 uap->sysmsg_fds[1] = 0;
 148         }
 149         return error;
 150 }
 151
 152
 153 int     nprocs = 1;             /* process 0 */
 154 static int nextpid = 0;
 155
 156 /*
 157  * Random component to nextpid generation.  We mix in a random factor to make
 158  * it a little harder to predict.  We sanity check the modulus value to avoid
 159  * doing it in critical paths.  Don't let it be too small or we pointlessly
 160  * waste randomness entropy, and don't let it be impossibly large.  Using a
 161  * modulus that is too big causes a LOT more process table scans and slows
 162  * down fork processing as the pidchecked caching is defeated.
 163  */
 164 static int randompid = 0;
 165
 166 static int
 167 sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)
 168 {
 169                 int error, pid;
 170
 171                 pid = randompid;
 172                 error = sysctl_handle_int(oidp, &pid, 0, req);
 173                 if (error || !req->newptr)
 174                         return (error);
 175                 if (pid < 0 || pid > PID_MAX - 100)     /* out of range */
 176                         pid = PID_MAX - 100;
 177                 else if (pid < 2)                       /* NOP */
 178                         pid = 0;
 179                 else if (pid < 100)                     /* Make it reasonable */
 180                         pid = 100;
 181                 randompid = pid;
 182                 return (error);
 183 }
 184
 185 SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW,
 186     0, 0, sysctl_kern_randompid, "I", "Random PID modulus");
 187
 188 int
 189 fork1(struct proc *p1, int flags, struct proc **procp)
 190 {
 191         struct proc *p2, *pptr;
 192         uid_t uid;
 193         struct proc *newproc;
 194         int ok;
 195         static int curfail = 0, pidchecked = 0;
 196         static struct timeval lastfail;
 197         struct forklist *ep;
 198         struct filedesc_to_leader *fdtol;
 199
 200         if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
 201                 return (EINVAL);
 202
 203         /*
 204          * Here we don't create a new process, but we divorce
 205          * certain parts of a process from itself.
 206          */
 207         if ((flags & RFPROC) == 0) {
 208
 209                 vm_fork(p1, 0, flags);
 210
 211                 /*
 212                  * Close all file descriptors.
 213                  */
 214                 if (flags & RFCFDG) {
 215                         struct filedesc *fdtmp;
 216                         fdtmp = fdinit(p1);
 217                         fdfree(p1);
 218                         p1->p_fd = fdtmp;
 219                 }
 220
 221                 /*
 222                  * Unshare file descriptors (from parent.)
 223                  */
 224                 if (flags & RFFDG) {
 225                         if (p1->p_fd->fd_refcnt > 1) {
 226                                 struct filedesc *newfd;
 227                                 newfd = fdcopy(p1);
 228                                 fdfree(p1);
 229                                 p1->p_fd = newfd;
 230                         }
 231                 }
 232                 *procp = NULL;
 233                 return (0);
 234         }
 235
 236         /*
 237          * Although process entries are dynamically created, we still keep
 238          * a global limit on the maximum number we will create.  Don't allow
 239          * a nonprivileged user to use the last ten processes; don't let root
 240          * exceed the limit. The variable nprocs is the current number of
 241          * processes, maxproc is the limit.
 242          */
 243         uid = p1->p_ucred->cr_ruid;
 244         if ((nprocs >= maxproc - 10 && uid != 0) || nprocs >= maxproc) {
 245                 if (ppsratecheck(&lastfail, &curfail, 1))
 246                         printf("maxproc limit exceeded by uid %d, please "
 247                                "see tuning(7) and login.conf(5).\n", uid);
 248                 tsleep(&forksleep, 0, "fork", hz / 2);
 249                 return (EAGAIN);
 250         }
 251         /*
 252          * Increment the nprocs resource before blocking can occur.  There
 253          * are hard-limits as to the number of processes that can run.
 254          */
 255         nprocs++;
 256
 257         /*
 258          * Increment the count of procs running with this uid. Don't allow
 259          * a nonprivileged user to exceed their current limit.
 260          */
 261         ok = chgproccnt(p1->p_ucred->cr_ruidinfo, 1,
 262                 (uid != 0) ? p1->p_rlimit[RLIMIT_NPROC].rlim_cur : 0);
 263         if (!ok) {
 264                 /*
 265                  * Back out the process count
 266                  */
 267                 nprocs--;
 268                 if (ppsratecheck(&lastfail, &curfail, 1))
 269                         printf("maxproc limit exceeded by uid %d, please "
 270                                "see tuning(7) and login.conf(5).\n", uid);
 271                 tsleep(&forksleep, 0, "fork", hz / 2);
 272                 return (EAGAIN);
 273         }
 274
 275         /* Allocate new proc. */
 276         newproc = zalloc(proc_zone);
 277
 278         /*
 279          * Setup linkage for kernel based threading
 280          */
 281         if ((flags & RFTHREAD) != 0) {
 282                 newproc->p_peers = p1->p_peers;
 283                 p1->p_peers = newproc;
 284                 newproc->p_leader = p1->p_leader;
 285         } else {
 286                 newproc->p_peers = 0;
 287                 newproc->p_leader = newproc;
 288         }
 289
 290         newproc->p_wakeup = 0;
 291         newproc->p_vmspace = NULL;
 292         TAILQ_INIT(&newproc->p_sysmsgq);
 293
 294         /*
 295          * Find an unused process ID.  We remember a range of unused IDs
 296          * ready to use (from nextpid+1 through pidchecked-1).
 297          */
 298         nextpid++;
 299         if (randompid)
 300                 nextpid += arc4random() % randompid;
 301 retry:
 302         /*
 303          * If the process ID prototype has wrapped around,
 304          * restart somewhat above 0, as the low-numbered procs
 305          * tend to include daemons that don't exit.
 306          */
 307         if (nextpid >= PID_MAX) {
 308                 nextpid = nextpid % PID_MAX;
 309                 if (nextpid < 100)
 310                         nextpid += 100;
 311                 pidchecked = 0;
 312         }
 313         if (nextpid >= pidchecked) {
 314                 int doingzomb = 0;
 315
 316                 pidchecked = PID_MAX;
 317                 /*
 318                  * Scan the active and zombie procs to check whether this pid
 319                  * is in use.  Remember the lowest pid that's greater
 320                  * than nextpid, so we can avoid checking for a while.
 321                  */
 322                 p2 = LIST_FIRST(&allproc);
 323 again:
 324                 for (; p2 != 0; p2 = LIST_NEXT(p2, p_list)) {
 325                         while (p2->p_pid == nextpid ||
 326                             p2->p_pgrp->pg_id == nextpid ||
 327                             p2->p_session->s_sid == nextpid) {
 328                                 nextpid++;
 329                                 if (nextpid >= pidchecked)
 330                                         goto retry;
 331                         }
 332                         if (p2->p_pid > nextpid && pidchecked > p2->p_pid)
 333                                 pidchecked = p2->p_pid;
 334                         if (p2->p_pgrp->pg_id > nextpid &&
 335                             pidchecked > p2->p_pgrp->pg_id)
 336                                 pidchecked = p2->p_pgrp->pg_id;
 337                         if (p2->p_session->s_sid > nextpid &&
 338                             pidchecked > p2->p_session->s_sid)
 339                                 pidchecked = p2->p_session->s_sid;
 340                 }
 341                 if (!doingzomb) {
 342                         doingzomb = 1;
 343                         p2 = LIST_FIRST(&zombproc);
 344                         goto again;
 345                 }
 346         }
 347
 348         p2 = newproc;
 349         p2->p_stat = SIDL;                      /* protect against others */
 350         p2->p_pid = nextpid;
 351         LIST_INSERT_HEAD(&allproc, p2, p_list);
 352         LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
 353
 354         /*
 355          * Make a proc table entry for the new process.
 356          * Start by zeroing the section of proc that is zero-initialized,
 357          * then copy the section that is copied directly from the parent.
 358          */
 359         bzero(&p2->p_startzero,
 360             (unsigned) ((caddr_t)&p2->p_endzero - (caddr_t)&p2->p_startzero));
 361         bcopy(&p1->p_startcopy, &p2->p_startcopy,
 362             (unsigned) ((caddr_t)&p2->p_endcopy - (caddr_t)&p2->p_startcopy));
 363
 364         p2->p_aioinfo = NULL;
 365
 366         /*
 367          * Duplicate sub-structures as needed.
 368          * Increase reference counts on shared objects.
 369          * The p_stats and p_sigacts substructs are set in vm_fork.
 370          */
 371         p2->p_flag = P_INMEM;
 372         if (p1->p_flag & P_PROFIL)
 373                 startprofclock(p2);
 374         p2->p_ucred = crhold(p1->p_ucred);
 375
 376         if (jailed(p2->p_ucred))
 377                 p2->p_flag |= P_JAILED;
 378
 379         if (p2->p_args)
 380                 p2->p_args->ar_ref++;
 381
 382         if (flags & RFSIGSHARE) {
 383                 p2->p_procsig = p1->p_procsig;
 384                 p2->p_procsig->ps_refcnt++;
 385                 if (p1->p_sigacts == &p1->p_addr->u_sigacts) {
 386                         struct sigacts *newsigacts;
 387                         int s;
 388
 389                         /* Create the shared sigacts structure */
 390                         MALLOC(newsigacts, struct sigacts *,
 391                             sizeof(struct sigacts), M_SUBPROC, M_WAITOK);
 392                         s = splhigh();
 393                         /*
 394                          * Set p_sigacts to the new shared structure.
 395                          * Note that this is updating p1->p_sigacts at the
 396                          * same time, since p_sigacts is just a pointer to
 397                          * the shared p_procsig->ps_sigacts.
 398                          */
 399                         p2->p_sigacts  = newsigacts;
 400                         bcopy(&p1->p_addr->u_sigacts, p2->p_sigacts,
 401                             sizeof(*p2->p_sigacts));
 402                         *p2->p_sigacts = p1->p_addr->u_sigacts;
 403                         splx(s);
 404                 }
 405         } else {
 406                 MALLOC(p2->p_procsig, struct procsig *, sizeof(struct procsig),
 407                     M_SUBPROC, M_WAITOK);
 408                 bcopy(p1->p_procsig, p2->p_procsig, sizeof(*p2->p_procsig));
 409                 p2->p_procsig->ps_refcnt = 1;
 410                 p2->p_sigacts = NULL;   /* finished in vm_fork() */
 411         }
 412         if (flags & RFLINUXTHPN)
 413                 p2->p_sigparent = SIGUSR1;
 414         else
 415                 p2->p_sigparent = SIGCHLD;
 416
 417         /* bump references to the text vnode (for procfs) */
 418         p2->p_textvp = p1->p_textvp;
 419         if (p2->p_textvp)
 420                 vref(p2->p_textvp);
 421
 422         if (flags & RFCFDG) {
 423                 p2->p_fd = fdinit(p1);
 424                 fdtol = NULL;
 425         } else if (flags & RFFDG) {
 426                 p2->p_fd = fdcopy(p1);
 427                 fdtol = NULL;
 428         } else {
 429                 p2->p_fd = fdshare(p1);
 430                 if (p1->p_fdtol == NULL)
 431                         p1->p_fdtol =
 432                                 filedesc_to_leader_alloc(NULL,
 433                                                          p1->p_leader);
 434                 if ((flags & RFTHREAD) != 0) {
 435                         /*
 436                          * Shared file descriptor table and
 437                          * shared process leaders.
 438                          */
 439                         fdtol = p1->p_fdtol;
 440                         fdtol->fdl_refcount++;
 441                 } else {
 442                         /*
 443                          * Shared file descriptor table, and
 444                          * different process leaders
 445                          */
 446                         fdtol = filedesc_to_leader_alloc(p1->p_fdtol, p2);
 447                 }
 448         }
 449         p2->p_fdtol = fdtol;
 450
 451         /*
 452          * If p_limit is still copy-on-write, bump refcnt,
 453          * otherwise get a copy that won't be modified.
 454          * (If PL_SHAREMOD is clear, the structure is shared
 455          * copy-on-write.)
 456          */
 457         if (p1->p_limit->p_lflags & PL_SHAREMOD) {
 458                 p2->p_limit = limcopy(p1->p_limit);
 459         } else {
 460                 p2->p_limit = p1->p_limit;
 461                 p2->p_limit->p_refcnt++;
 462         }
 463
 464         /*
 465          * Preserve some more flags in subprocess.  P_PROFIL has already
 466          * been preserved.
 467          */
 468         p2->p_flag |= p1->p_flag & (P_SUGID | P_ALTSTACK);
 469         if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
 470                 p2->p_flag |= P_CONTROLT;
 471         if (flags & RFPPWAIT)
 472                 p2->p_flag |= P_PPWAIT;
 473
 474         /*
 475          * Once we are on a pglist we may receive signals.  XXX we might
 476          * race a ^C being sent to the process group by not receiving it
 477          * at all prior to this line.
 478          */
 479         LIST_INSERT_AFTER(p1, p2, p_pglist);
 480
 481         /*
 482          * Attach the new process to its parent.
 483          *
 484          * If RFNOWAIT is set, the newly created process becomes a child
 485          * of init.  This effectively disassociates the child from the
 486          * parent.
 487          */
 488         if (flags & RFNOWAIT)
 489                 pptr = initproc;
 490         else
 491                 pptr = p1;
 492         p2->p_pptr = pptr;
 493         LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
 494         LIST_INIT(&p2->p_children);
 495         varsymset_init(&p2->p_varsymset, &p1->p_varsymset);
 496         callout_init(&p2->p_ithandle);
 497
 498 #ifdef KTRACE
 499         /*
 500          * Copy traceflag and tracefile if enabled.  If not inherited,
 501          * these were zeroed above but we still could have a trace race
 502          * so make sure p2's p_tracep is NULL.
 503          */
 504         if ((p1->p_traceflag & KTRFAC_INHERIT) && p2->p_tracep == NULL) {
 505                 p2->p_traceflag = p1->p_traceflag;
 506                 if ((p2->p_tracep = p1->p_tracep) != NULL)
 507                         vref(p2->p_tracep);
 508         }
 509 #endif
 510
 511         /*
 512          * Give the child process an estcpu skewed towards the batch side
 513          * of the parent.  This prevents batch programs from glitching
 514          * interactive programs when they are first started.  If the child
 515          * is not a batch program it's priority will be corrected by the
 516          * scheduler.
 517          *
 518          * The interactivity model always starts at 0 (par value).
 519          */
 520         p2->p_estcpu_fork = p2->p_estcpu =
 521                 ESTCPULIM(p1->p_estcpu + ESTCPURAMP);
 522         p2->p_interactive = 0;
 523
 524         /*
 525          * This begins the section where we must prevent the parent
 526          * from being swapped.
 527          */
 528         PHOLD(p1);
 529
 530         /*
 531          * Finish creating the child process.  It will return via a different
 532          * execution path later.  (ie: directly into user mode)
 533          */
 534         vm_fork(p1, p2, flags);
 535         caps_fork(p1, p2, flags);
 536
 537         if (flags == (RFFDG | RFPROC)) {
 538                 mycpu->gd_cnt.v_forks++;
 539                 mycpu->gd_cnt.v_forkpages += p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize;
 540         } else if (flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) {
 541                 mycpu->gd_cnt.v_vforks++;
 542                 mycpu->gd_cnt.v_vforkpages += p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize;
 543         } else if (p1 == &proc0) {
 544                 mycpu->gd_cnt.v_kthreads++;
 545                 mycpu->gd_cnt.v_kthreadpages += p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize;
 546         } else {
 547                 mycpu->gd_cnt.v_rforks++;
 548                 mycpu->gd_cnt.v_rforkpages += p2->p_vmspace->vm_dsize + p2->p_vmspace->vm_ssize;
 549         }
 550
 551         /*
 552          * Both processes are set up, now check if any loadable modules want
 553          * to adjust anything.
 554          *   What if they have an error? XXX
 555          */
 556         TAILQ_FOREACH(ep, &fork_list, next) {
 557                 (*ep->function)(p1, p2, flags);
 558         }
 559
 560         /*
 561          * Make child runnable and add to run queue.
 562          */
 563         microtime(&p2->p_thread->td_start);
 564         p2->p_acflag = AFORK;
 565
 566         /*
 567          * tell any interested parties about the new process
 568          */
 569         KNOTE(&p1->p_klist, NOTE_FORK | p2->p_pid);
 570
 571         /*
 572          * Return child proc pointer to parent.
 573          */
 574         *procp = p2;
 575         return (0);
 576 }
 577
 578 /*
 579  * The next two functionms are general routines to handle adding/deleting
 580  * items on the fork callout list.
 581  *
 582  * at_fork():
 583  * Take the arguments given and put them onto the fork callout list,
 584  * However first make sure that it's not already there.
 585  * Returns 0 on success or a standard error number.
 586  */
 587 int
 588 at_fork(forklist_fn function)
 589 {
 590         struct forklist *ep;
 591
 592 #ifdef INVARIANTS
 593         /* let the programmer know if he's been stupid */
 594         if (rm_at_fork(function)) {
 595                 printf("WARNING: fork callout entry (%p) already present\n",
 596                     function);
 597         }
 598 #endif
 599         ep = malloc(sizeof(*ep), M_ATFORK, M_WAITOK|M_ZERO);
 600         ep->function = function;
 601         TAILQ_INSERT_TAIL(&fork_list, ep, next);
 602         return (0);
 603 }
 604
 605 /*
 606  * Scan the exit callout list for the given item and remove it..
 607  * Returns the number of items removed (0 or 1)
 608  */
 609 int
 610 rm_at_fork(forklist_fn function)
 611 {
 612         struct forklist *ep;
 613
 614         TAILQ_FOREACH(ep, &fork_list, next) {
 615                 if (ep->function == function) {
 616                         TAILQ_REMOVE(&fork_list, ep, next);
 617                         free(ep, M_ATFORK);
 618                         return(1);
 619                 }
 620         }
 621         return (0);
 622 }
 623
 624 /*
 625  * Add a forked process to the run queue after any remaining setup, such
 626  * as setting the fork handler, has been completed.
 627  */
 628 void
 629 start_forked_proc(struct proc *p1, struct proc *p2)
 630 {
 631         /*
 632          * Move from SIDL to RUN queue, and activate the process's thread.
 633          * Activation of the thread effectively makes the process "a"
 634          * current process, so we do not setrunqueue().
 635          *
 636          * YYY setrunqueue works here but we should clean up the trampoline
 637          * code so we just schedule the LWKT thread and let the trampoline
 638          * deal with the userland scheduler on return to userland.
 639          */
 640         KASSERT(p2 && p2->p_stat == SIDL,
 641             ("cannot start forked process, bad status: %p", p2));
 642         resetpriority(p2);
 643         (void) splhigh();
 644         p2->p_stat = SRUN;
 645         setrunqueue(p2);
 646         (void) spl0();
 647
 648         /*
 649          * Now can be swapped.
 650          */
 651         PRELE(p1);
 652
 653         /*
 654          * Preserve synchronization semantics of vfork.  If waiting for
 655          * child to exec or exit, set P_PPWAIT on child, and sleep on our
 656          * proc (in case of exit).
 657          */
 658         while (p2->p_flag & P_PPWAIT)
 659                 tsleep(p1, 0, "ppwait", 0);
 660 }
 661