sys/kern/kern_proc.c

   1 /*
   2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  * 3. Neither the name of the University nor the names of its contributors
  14  *    may be used to endorse or promote products derived from this software
  15  *    without specific prior written permission.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27  * SUCH DAMAGE.
  28  */
  29
  30 #include <sys/param.h>
  31 #include <sys/systm.h>
  32 #include <sys/kernel.h>
  33 #include <sys/sysctl.h>
  34 #include <sys/malloc.h>
  35 #include <sys/proc.h>
  36 #include <sys/vnode.h>
  37 #include <sys/jail.h>
  38 #include <sys/filedesc.h>
  39 #include <sys/tty.h>
  40 #include <sys/dsched.h>
  41 #include <sys/signalvar.h>
  42 #include <sys/spinlock.h>
  43 #include <sys/random.h>
  44 #include <sys/vnode.h>
  45 #include <sys/exec.h>
  46 #include <vm/vm.h>
  47 #include <sys/lock.h>
  48 #include <vm/pmap.h>
  49 #include <vm/vm_map.h>
  50 #include <sys/user.h>
  51 #include <machine/smp.h>
  52
  53 #include <sys/refcount.h>
  54 #include <sys/spinlock2.h>
  55
  56 /*
  57  * Hash table size must be a power of two and is not currently dynamically
  58  * sized.  There is a trade-off between the linear scans which must iterate
  59  * all HSIZE elements and the number of elements which might accumulate
  60  * within each hash chain.
  61  */
  62 #define ALLPROC_HSIZE   256
  63 #define ALLPROC_HMASK   (ALLPROC_HSIZE - 1)
  64 #define ALLPROC_HASH(pid)       (pid & ALLPROC_HMASK)
  65 #define PGRP_HASH(pid)  (pid & ALLPROC_HMASK)
  66 #define SESS_HASH(pid)  (pid & ALLPROC_HMASK)
  67
  68 /*
  69  * pid_doms[] management, used to control how quickly a PID can be recycled.
  70  * Must be a multiple of ALLPROC_HSIZE for the proc_makepid() inner loops.
  71  *
  72  * WARNING! PIDDOM_DELAY should not be defined > 20 or so unless you change
  73  *          the array from int8_t's to int16_t's.
  74  */
  75 #define PIDDOM_COUNT    10      /* 10 pids per domain - reduce array size */
  76 #define PIDDOM_DELAY    10      /* min 10 seconds after exit before reuse */
  77 #define PIDDOM_SCALE    10      /* (10,000*SCALE)/sec performance guarantee */
  78 #define PIDSEL_DOMAINS  (PID_MAX * PIDDOM_SCALE / PIDDOM_COUNT /        \
  79                          ALLPROC_HSIZE * ALLPROC_HSIZE)
  80
  81 /* Used by libkvm */
  82 int allproc_hsize = ALLPROC_HSIZE;
  83
  84 LIST_HEAD(pidhashhead, proc);
  85
  86 static MALLOC_DEFINE(M_PGRP, "pgrp", "process group header");
  87 MALLOC_DEFINE(M_SESSION, "session", "session header");
  88 MALLOC_DEFINE(M_PROC, "proc", "Proc structures");
  89 MALLOC_DEFINE(M_LWP, "lwp", "lwp structures");
  90 MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures");
  91
  92 int ps_showallprocs = 1;
  93 static int ps_showallthreads = 1;
  94 SYSCTL_INT(_security, OID_AUTO, ps_showallprocs, CTLFLAG_RW,
  95     &ps_showallprocs, 0,
  96     "Unprivileged processes can see processes with different UID/GID");
  97 SYSCTL_INT(_security, OID_AUTO, ps_showallthreads, CTLFLAG_RW,
  98     &ps_showallthreads, 0,
  99     "Unprivileged processes can see kernel threads");
 100 static u_int pid_domain_skips;
 101 SYSCTL_UINT(_kern, OID_AUTO, pid_domain_skips, CTLFLAG_RW,
 102     &pid_domain_skips, 0,
 103     "Number of pid_doms[] skipped");
 104 static u_int pid_inner_skips;
 105 SYSCTL_UINT(_kern, OID_AUTO, pid_inner_skips, CTLFLAG_RW,
 106     &pid_inner_skips, 0,
 107     "Number of pid_doms[] skipped");
 108
 109 static void orphanpg(struct pgrp *pg);
 110 static void proc_makepid(struct proc *p, int random_offset);
 111
 112 /*
 113  * Process related lists (for proc_token, allproc, allpgrp, and allsess)
 114  */
 115 typedef struct procglob procglob_t;
 116
 117 static procglob_t       procglob[ALLPROC_HSIZE];
 118
 119 /*
 120  * We try our best to avoid recycling a PID too quickly.  We do this by
 121  * storing (uint8_t)time_second in the related pid domain on-reap and then
 122  * using that to skip-over the domain on-allocate.
 123  *
 124  * This array has to be fairly large to support a high fork/exec rate.
 125  * A ~100,000 entry array will support a 10-second reuse latency at
 126  * 10,000 execs/second, worst case.  Best-case multiply by PIDDOM_COUNT
 127  * (approximately 100,000 execs/second).
 128  *
 129  * Currently we allocate around a megabyte, making the worst-case fork
 130  * rate around 100,000/second.
 131  */
 132 static uint8_t *pid_doms;
 133
 134 /*
 135  * Random component to nextpid generation.  We mix in a random factor to make
 136  * it a little harder to predict.  We sanity check the modulus value to avoid
 137  * doing it in critical paths.  Don't let it be too small or we pointlessly
 138  * waste randomness entropy, and don't let it be impossibly large.  Using a
 139  * modulus that is too big causes a LOT more process table scans and slows
 140  * down fork processing as the pidchecked caching is defeated.
 141  */
 142 static int randompid = 0;
 143
 144 /*
 145  * No requirements.
 146  */
 147 static int
 148 sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)
 149 {
 150         int error, pid;
 151
 152         pid = randompid;
 153         error = sysctl_handle_int(oidp, &pid, 0, req);
 154         if (error || !req->newptr)
 155                 return (error);
 156         if (pid < 0 || pid > PID_MAX - 100)     /* out of range */
 157                 pid = PID_MAX - 100;
 158         else if (pid < 2)                       /* NOP */
 159                 pid = 0;
 160         else if (pid < 100)                     /* Make it reasonable */
 161                 pid = 100;
 162         randompid = pid;
 163         return (error);
 164 }
 165
 166 SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW,
 167             0, 0, sysctl_kern_randompid, "I", "Random PID modulus");
 168
 169 /*
 170  * Initialize global process hashing structures.
 171  *
 172  * These functions are ONLY called from the low level boot code and do
 173  * not lock their operations.
 174  */
 175 void
 176 procinit(void)
 177 {
 178         u_long i;
 179
 180         /*
 181          * Allocate dynamically.  This array can be large (~1MB) so don't
 182          * waste boot loader space.
 183          */
 184         pid_doms = kmalloc(sizeof(pid_doms[0]) * PIDSEL_DOMAINS,
 185                            M_PROC, M_WAITOK | M_ZERO);
 186
 187         /*
 188          * Avoid unnecessary stalls due to pid_doms[] values all being
 189          * the same.  Make sure that the allocation of pid 1 and pid 2
 190          * succeeds.
 191          */
 192         for (i = 0; i < PIDSEL_DOMAINS; ++i)
 193                 pid_doms[i] = (int8_t)i - (int8_t)(PIDDOM_DELAY + 1);
 194
 195         /*
 196          * Other misc init.
 197          */
 198         for (i = 0; i < ALLPROC_HSIZE; ++i) {
 199                 procglob_t *prg = &procglob[i];
 200                 LIST_INIT(&prg->allproc);
 201                 LIST_INIT(&prg->allsess);
 202                 LIST_INIT(&prg->allpgrp);
 203                 lwkt_token_init(&prg->proc_token, "allproc");
 204         }
 205         uihashinit();
 206 }
 207
 208 void
 209 procinsertinit(struct proc *p)
 210 {
 211         LIST_INSERT_HEAD(&procglob[ALLPROC_HASH(p->p_pid)].allproc,
 212                          p, p_list);
 213 }
 214
 215 void
 216 pgrpinsertinit(struct pgrp *pg)
 217 {
 218         LIST_INSERT_HEAD(&procglob[ALLPROC_HASH(pg->pg_id)].allpgrp,
 219                          pg, pg_list);
 220 }
 221
 222 void
 223 sessinsertinit(struct session *sess)
 224 {
 225         LIST_INSERT_HEAD(&procglob[ALLPROC_HASH(sess->s_sid)].allsess,
 226                          sess, s_list);
 227 }
 228
 229 /*
 230  * Process hold/release support functions.  Called via the PHOLD(),
 231  * PRELE(), and PSTALL() macros.
 232  *
 233  * p->p_lock is a simple hold count with a waiting interlock.  No wakeup()
 234  * is issued unless someone is actually waiting for the process.
 235  *
 236  * Most holds are short-term, allowing a process scan or other similar
 237  * operation to access a proc structure without it getting ripped out from
 238  * under us.  procfs and process-list sysctl ops also use the hold function
 239  * interlocked with various p_flags to keep the vmspace intact when reading
 240  * or writing a user process's address space.
 241  *
 242  * There are two situations where a hold count can be longer.  Exiting lwps
 243  * hold the process until the lwp is reaped, and the parent will hold the
 244  * child during vfork()/exec() sequences while the child is marked P_PPWAIT.
 245  *
 246  * The kernel waits for the hold count to drop to 0 (or 1 in some cases) at
 247  * various critical points in the fork/exec and exit paths before proceeding.
 248  */
 249 #define PLOCK_ZOMB      0x20000000
 250 #define PLOCK_WAITING   0x40000000
 251 #define PLOCK_MASK      0x1FFFFFFF
 252
 253 void
 254 pstall(struct proc *p, const char *wmesg, int count)
 255 {
 256         int o;
 257         int n;
 258
 259         for (;;) {
 260                 o = p->p_lock;
 261                 cpu_ccfence();
 262                 if ((o & PLOCK_MASK) <= count)
 263                         break;
 264                 n = o | PLOCK_WAITING;
 265                 tsleep_interlock(&p->p_lock, 0);
 266
 267                 /*
 268                  * If someone is trying to single-step the process during
 269                  * an exec or an exit they can deadlock us because procfs
 270                  * sleeps with the process held.
 271                  */
 272                 if (p->p_stops) {
 273                         if (p->p_flags & P_INEXEC) {
 274                                 wakeup(&p->p_stype);
 275                         } else if (p->p_flags & P_POSTEXIT) {
 276                                 spin_lock(&p->p_spin);
 277                                 p->p_stops = 0;
 278                                 p->p_step = 0;
 279                                 spin_unlock(&p->p_spin);
 280                                 wakeup(&p->p_stype);
 281                         }
 282                 }
 283
 284                 if (atomic_cmpset_int(&p->p_lock, o, n)) {
 285                         tsleep(&p->p_lock, PINTERLOCKED, wmesg, 0);
 286                 }
 287         }
 288 }
 289
 290 void
 291 phold(struct proc *p)
 292 {
 293         atomic_add_int(&p->p_lock, 1);
 294 }
 295
 296 /*
 297  * WARNING!  On last release (p) can become instantly invalid due to
 298  *           MP races.
 299  */
 300 void
 301 prele(struct proc *p)
 302 {
 303         int o;
 304         int n;
 305
 306         /*
 307          * Fast path
 308          */
 309         if (atomic_cmpset_int(&p->p_lock, 1, 0))
 310                 return;
 311
 312         /*
 313          * Slow path
 314          */
 315         for (;;) {
 316                 o = p->p_lock;
 317                 KKASSERT((o & PLOCK_MASK) > 0);
 318                 cpu_ccfence();
 319                 n = (o - 1) & ~PLOCK_WAITING;
 320                 if (atomic_cmpset_int(&p->p_lock, o, n)) {
 321                         if (o & PLOCK_WAITING)
 322                                 wakeup(&p->p_lock);
 323                         break;
 324                 }
 325         }
 326 }
 327
 328 /*
 329  * Hold and flag serialized for zombie reaping purposes.
 330  *
 331  * This function will fail if it has to block, returning non-zero with
 332  * neither the flag set or the hold count bumped.  Note that we must block
 333  * without holding a ref, meaning that the caller must ensure that (p)
 334  * remains valid through some other interlock (typically on its parent
 335  * process's p_token).
 336  *
 337  * Zero is returned on success.  The hold count will be incremented and
 338  * the serialization flag acquired.  Note that serialization is only against
 339  * other pholdzomb() calls, not against phold() calls.
 340  */
 341 int
 342 pholdzomb(struct proc *p)
 343 {
 344         int o;
 345         int n;
 346
 347         /*
 348          * Fast path
 349          */
 350         if (atomic_cmpset_int(&p->p_lock, 0, PLOCK_ZOMB | 1))
 351                 return(0);
 352
 353         /*
 354          * Slow path
 355          */
 356         for (;;) {
 357                 o = p->p_lock;
 358                 cpu_ccfence();
 359                 if ((o & PLOCK_ZOMB) == 0) {
 360                         n = (o + 1) | PLOCK_ZOMB;
 361                         if (atomic_cmpset_int(&p->p_lock, o, n))
 362                                 return(0);
 363                 } else {
 364                         KKASSERT((o & PLOCK_MASK) > 0);
 365                         n = o | PLOCK_WAITING;
 366                         tsleep_interlock(&p->p_lock, 0);
 367                         if (atomic_cmpset_int(&p->p_lock, o, n)) {
 368                                 tsleep(&p->p_lock, PINTERLOCKED, "phldz", 0);
 369                                 /* (p) can be ripped out at this point */
 370                                 return(1);
 371                         }
 372                 }
 373         }
 374 }
 375
 376 /*
 377  * Release PLOCK_ZOMB and the hold count, waking up any waiters.
 378  *
 379  * WARNING!  On last release (p) can become instantly invalid due to
 380  *           MP races.
 381  */
 382 void
 383 prelezomb(struct proc *p)
 384 {
 385         int o;
 386         int n;
 387
 388         /*
 389          * Fast path
 390          */
 391         if (atomic_cmpset_int(&p->p_lock, PLOCK_ZOMB | 1, 0))
 392                 return;
 393
 394         /*
 395          * Slow path
 396          */
 397         KKASSERT(p->p_lock & PLOCK_ZOMB);
 398         for (;;) {
 399                 o = p->p_lock;
 400                 KKASSERT((o & PLOCK_MASK) > 0);
 401                 cpu_ccfence();
 402                 n = (o - 1) & ~(PLOCK_ZOMB | PLOCK_WAITING);
 403                 if (atomic_cmpset_int(&p->p_lock, o, n)) {
 404                         if (o & PLOCK_WAITING)
 405                                 wakeup(&p->p_lock);
 406                         break;
 407                 }
 408         }
 409 }
 410
 411 /*
 412  * Is p an inferior of the current process?
 413  *
 414  * No requirements.
 415  */
 416 int
 417 inferior(struct proc *p)
 418 {
 419         struct proc *p2;
 420
 421         PHOLD(p);
 422         lwkt_gettoken_shared(&p->p_token);
 423         while (p != curproc) {
 424                 if (p->p_pid == 0) {
 425                         lwkt_reltoken(&p->p_token);
 426                         return (0);
 427                 }
 428                 p2 = p->p_pptr;
 429                 PHOLD(p2);
 430                 lwkt_reltoken(&p->p_token);
 431                 PRELE(p);
 432                 lwkt_gettoken_shared(&p2->p_token);
 433                 p = p2;
 434         }
 435         lwkt_reltoken(&p->p_token);
 436         PRELE(p);
 437
 438         return (1);
 439 }
 440
 441 /*
 442  * Locate a process by number.  The returned process will be referenced and
 443  * must be released with PRELE().
 444  *
 445  * No requirements.
 446  */
 447 struct proc *
 448 pfind(pid_t pid)
 449 {
 450         struct proc *p = curproc;
 451         procglob_t *prg;
 452         int n;
 453
 454         /*
 455          * Shortcut the current process
 456          */
 457         if (p && p->p_pid == pid) {
 458                 PHOLD(p);
 459                 return (p);
 460         }
 461
 462         /*
 463          * Otherwise find it in the hash table.
 464          */
 465         n = ALLPROC_HASH(pid);
 466         prg = &procglob[n];
 467
 468         lwkt_gettoken_shared(&prg->proc_token);
 469         LIST_FOREACH(p, &prg->allproc, p_list) {
 470                 if (p->p_stat == SZOMB)
 471                         continue;
 472                 if (p->p_pid == pid) {
 473                         PHOLD(p);
 474                         lwkt_reltoken(&prg->proc_token);
 475                         return (p);
 476                 }
 477         }
 478         lwkt_reltoken(&prg->proc_token);
 479
 480         return (NULL);
 481 }
 482
 483 /*
 484  * Locate a process by number.  The returned process is NOT referenced.
 485  * The result will not be stable and is typically only used to validate
 486  * against a process that the caller has in-hand.
 487  *
 488  * No requirements.
 489  */
 490 struct proc *
 491 pfindn(pid_t pid)
 492 {
 493         struct proc *p = curproc;
 494         procglob_t *prg;
 495         int n;
 496
 497         /*
 498          * Shortcut the current process
 499          */
 500         if (p && p->p_pid == pid)
 501                 return (p);
 502
 503         /*
 504          * Otherwise find it in the hash table.
 505          */
 506         n = ALLPROC_HASH(pid);
 507         prg = &procglob[n];
 508
 509         lwkt_gettoken_shared(&prg->proc_token);
 510         LIST_FOREACH(p, &prg->allproc, p_list) {
 511                 if (p->p_stat == SZOMB)
 512                         continue;
 513                 if (p->p_pid == pid) {
 514                         lwkt_reltoken(&prg->proc_token);
 515                         return (p);
 516                 }
 517         }
 518         lwkt_reltoken(&prg->proc_token);
 519
 520         return (NULL);
 521 }
 522
 523 /*
 524  * Locate a process on the zombie list.  Return a process or NULL.
 525  * The returned process will be referenced and the caller must release
 526  * it with PRELE().
 527  *
 528  * No other requirements.
 529  */
 530 struct proc *
 531 zpfind(pid_t pid)
 532 {
 533         struct proc *p = curproc;
 534         procglob_t *prg;
 535         int n;
 536
 537         /*
 538          * Shortcut the current process
 539          */
 540         if (p && p->p_pid == pid) {
 541                 PHOLD(p);
 542                 return (p);
 543         }
 544
 545         /*
 546          * Otherwise find it in the hash table.
 547          */
 548         n = ALLPROC_HASH(pid);
 549         prg = &procglob[n];
 550
 551         lwkt_gettoken_shared(&prg->proc_token);
 552         LIST_FOREACH(p, &prg->allproc, p_list) {
 553                 if (p->p_stat != SZOMB)
 554                         continue;
 555                 if (p->p_pid == pid) {
 556                         PHOLD(p);
 557                         lwkt_reltoken(&prg->proc_token);
 558                         return (p);
 559                 }
 560         }
 561         lwkt_reltoken(&prg->proc_token);
 562
 563         return (NULL);
 564 }
 565
 566
 567 void
 568 pgref(struct pgrp *pgrp)
 569 {
 570         refcount_acquire(&pgrp->pg_refs);
 571 }
 572
 573 void
 574 pgrel(struct pgrp *pgrp)
 575 {
 576         procglob_t *prg;
 577         int count;
 578         int n;
 579
 580         n = PGRP_HASH(pgrp->pg_id);
 581         prg = &procglob[n];
 582
 583         for (;;) {
 584                 count = pgrp->pg_refs;
 585                 cpu_ccfence();
 586                 KKASSERT(count > 0);
 587                 if (count == 1) {
 588                         lwkt_gettoken(&prg->proc_token);
 589                         if (atomic_cmpset_int(&pgrp->pg_refs, 1, 0))
 590                                 break;
 591                         lwkt_reltoken(&prg->proc_token);
 592                         /* retry */
 593                 } else {
 594                         if (atomic_cmpset_int(&pgrp->pg_refs, count, count - 1))
 595                                 return;
 596                         /* retry */
 597                 }
 598         }
 599
 600         /*
 601          * Successful 1->0 transition, pghash_spin is held.
 602          */
 603         LIST_REMOVE(pgrp, pg_list);
 604         if (pid_doms[pgrp->pg_id % PIDSEL_DOMAINS] != (uint8_t)time_second)
 605                 pid_doms[pgrp->pg_id % PIDSEL_DOMAINS] = (uint8_t)time_second;
 606
 607         /*
 608          * Reset any sigio structures pointing to us as a result of
 609          * F_SETOWN with our pgid.
 610          */
 611         funsetownlst(&pgrp->pg_sigiolst);
 612
 613         if (pgrp->pg_session->s_ttyp != NULL &&
 614             pgrp->pg_session->s_ttyp->t_pgrp == pgrp) {
 615                 pgrp->pg_session->s_ttyp->t_pgrp = NULL;
 616         }
 617         lwkt_reltoken(&prg->proc_token);
 618
 619         sess_rele(pgrp->pg_session);
 620         kfree(pgrp, M_PGRP);
 621 }
 622
 623 /*
 624  * Locate a process group by number.  The returned process group will be
 625  * referenced w/pgref() and must be released with pgrel() (or assigned
 626  * somewhere if you wish to keep the reference).
 627  *
 628  * No requirements.
 629  */
 630 struct pgrp *
 631 pgfind(pid_t pgid)
 632 {
 633         struct pgrp *pgrp;
 634         procglob_t *prg;
 635         int n;
 636
 637         n = PGRP_HASH(pgid);
 638         prg = &procglob[n];
 639         lwkt_gettoken_shared(&prg->proc_token);
 640
 641         LIST_FOREACH(pgrp, &prg->allpgrp, pg_list) {
 642                 if (pgrp->pg_id == pgid) {
 643                         refcount_acquire(&pgrp->pg_refs);
 644                         lwkt_reltoken(&prg->proc_token);
 645                         return (pgrp);
 646                 }
 647         }
 648         lwkt_reltoken(&prg->proc_token);
 649         return (NULL);
 650 }
 651
 652 /*
 653  * Move p to a new or existing process group (and session)
 654  *
 655  * No requirements.
 656  */
 657 int
 658 enterpgrp(struct proc *p, pid_t pgid, int mksess)
 659 {
 660         struct pgrp *pgrp;
 661         struct pgrp *opgrp;
 662         int error;
 663
 664         pgrp = pgfind(pgid);
 665
 666         KASSERT(pgrp == NULL || !mksess,
 667                 ("enterpgrp: setsid into non-empty pgrp"));
 668         KASSERT(!SESS_LEADER(p),
 669                 ("enterpgrp: session leader attempted setpgrp"));
 670
 671         if (pgrp == NULL) {
 672                 pid_t savepid = p->p_pid;
 673                 struct proc *np;
 674                 procglob_t *prg;
 675                 int n;
 676
 677                 /*
 678                  * new process group
 679                  */
 680                 KASSERT(p->p_pid == pgid,
 681                         ("enterpgrp: new pgrp and pid != pgid"));
 682                 pgrp = kmalloc(sizeof(struct pgrp), M_PGRP, M_WAITOK | M_ZERO);
 683                 pgrp->pg_id = pgid;
 684                 LIST_INIT(&pgrp->pg_members);
 685                 pgrp->pg_jobc = 0;
 686                 SLIST_INIT(&pgrp->pg_sigiolst);
 687                 lwkt_token_init(&pgrp->pg_token, "pgrp_token");
 688                 refcount_init(&pgrp->pg_refs, 1);
 689                 lockinit(&pgrp->pg_lock, "pgwt", 0, 0);
 690
 691                 n = PGRP_HASH(pgid);
 692                 prg = &procglob[n];
 693
 694                 if ((np = pfindn(savepid)) == NULL || np != p) {
 695                         lwkt_reltoken(&prg->proc_token);
 696                         error = ESRCH;
 697                         kfree(pgrp, M_PGRP);
 698                         goto fatal;
 699                 }
 700
 701                 lwkt_gettoken(&prg->proc_token);
 702                 if (mksess) {
 703                         struct session *sess;
 704
 705                         /*
 706                          * new session
 707                          */
 708                         sess = kmalloc(sizeof(struct session), M_SESSION,
 709                                        M_WAITOK | M_ZERO);
 710                         lwkt_gettoken(&p->p_token);
 711                         sess->s_leader = p;
 712                         sess->s_sid = p->p_pid;
 713                         sess->s_count = 1;
 714                         sess->s_ttyvp = NULL;
 715                         sess->s_ttyp = NULL;
 716                         bcopy(p->p_session->s_login, sess->s_login,
 717                               sizeof(sess->s_login));
 718                         pgrp->pg_session = sess;
 719                         KASSERT(p == curproc,
 720                                 ("enterpgrp: mksession and p != curproc"));
 721                         p->p_flags &= ~P_CONTROLT;
 722                         LIST_INSERT_HEAD(&prg->allsess, sess, s_list);
 723                         lwkt_reltoken(&p->p_token);
 724                 } else {
 725                         lwkt_gettoken(&p->p_token);
 726                         pgrp->pg_session = p->p_session;
 727                         sess_hold(pgrp->pg_session);
 728                         lwkt_reltoken(&p->p_token);
 729                 }
 730                 LIST_INSERT_HEAD(&prg->allpgrp, pgrp, pg_list);
 731
 732                 lwkt_reltoken(&prg->proc_token);
 733         } else if (pgrp == p->p_pgrp) {
 734                 pgrel(pgrp);
 735                 goto done;
 736         } /* else pgfind() referenced the pgrp */
 737
 738         lwkt_gettoken(&pgrp->pg_token);
 739         lwkt_gettoken(&p->p_token);
 740
 741         /*
 742          * Replace p->p_pgrp, handling any races that occur.
 743          */
 744         while ((opgrp = p->p_pgrp) != NULL) {
 745                 pgref(opgrp);
 746                 lwkt_gettoken(&opgrp->pg_token);
 747                 if (opgrp != p->p_pgrp) {
 748                         lwkt_reltoken(&opgrp->pg_token);
 749                         pgrel(opgrp);
 750                         continue;
 751                 }
 752                 LIST_REMOVE(p, p_pglist);
 753                 break;
 754         }
 755         p->p_pgrp = pgrp;
 756         LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
 757
 758         /*
 759          * Adjust eligibility of affected pgrps to participate in job control.
 760          * Increment eligibility counts before decrementing, otherwise we
 761          * could reach 0 spuriously during the first call.
 762          */
 763         fixjobc(p, pgrp, 1);
 764         if (opgrp) {
 765                 fixjobc(p, opgrp, 0);
 766                 lwkt_reltoken(&opgrp->pg_token);
 767                 pgrel(opgrp);   /* manual pgref */
 768                 pgrel(opgrp);   /* p->p_pgrp ref */
 769         }
 770         lwkt_reltoken(&p->p_token);
 771         lwkt_reltoken(&pgrp->pg_token);
 772 done:
 773         error = 0;
 774 fatal:
 775         return (error);
 776 }
 777
 778 /*
 779  * Remove process from process group
 780  *
 781  * No requirements.
 782  */
 783 int
 784 leavepgrp(struct proc *p)
 785 {
 786         struct pgrp *pg = p->p_pgrp;
 787
 788         lwkt_gettoken(&p->p_token);
 789         while ((pg = p->p_pgrp) != NULL) {
 790                 pgref(pg);
 791                 lwkt_gettoken(&pg->pg_token);
 792                 if (p->p_pgrp != pg) {
 793                         lwkt_reltoken(&pg->pg_token);
 794                         pgrel(pg);
 795                         continue;
 796                 }
 797                 p->p_pgrp = NULL;
 798                 LIST_REMOVE(p, p_pglist);
 799                 lwkt_reltoken(&pg->pg_token);
 800                 pgrel(pg);      /* manual pgref */
 801                 pgrel(pg);      /* p->p_pgrp ref */
 802                 break;
 803         }
 804         lwkt_reltoken(&p->p_token);
 805
 806         return (0);
 807 }
 808
 809 /*
 810  * Adjust the ref count on a session structure.  When the ref count falls to
 811  * zero the tty is disassociated from the session and the session structure
 812  * is freed.  Note that tty assocation is not itself ref-counted.
 813  *
 814  * No requirements.
 815  */
 816 void
 817 sess_hold(struct session *sp)
 818 {
 819         atomic_add_int(&sp->s_count, 1);
 820 }
 821
 822 /*
 823  * No requirements.
 824  */
 825 void
 826 sess_rele(struct session *sess)
 827 {
 828         procglob_t *prg;
 829         struct tty *tp;
 830         int count;
 831         int n;
 832
 833         n = SESS_HASH(sess->s_sid);
 834         prg = &procglob[n];
 835
 836         for (;;) {
 837                 count = sess->s_count;
 838                 cpu_ccfence();
 839                 KKASSERT(count > 0);
 840                 if (count == 1) {
 841                         lwkt_gettoken(&tty_token);
 842                         lwkt_gettoken(&prg->proc_token);
 843                         if (atomic_cmpset_int(&sess->s_count, 1, 0))
 844                                 break;
 845                         lwkt_reltoken(&prg->proc_token);
 846                         lwkt_reltoken(&tty_token);
 847                         /* retry */
 848                 } else {
 849                         if (atomic_cmpset_int(&sess->s_count, count, count - 1))
 850                                 return;
 851                         /* retry */
 852                 }
 853         }
 854
 855         /*
 856          * Successful 1->0 transition and tty_token is held.
 857          */
 858         LIST_REMOVE(sess, s_list);
 859         if (pid_doms[sess->s_sid % PIDSEL_DOMAINS] != (uint8_t)time_second)
 860                 pid_doms[sess->s_sid % PIDSEL_DOMAINS] = (uint8_t)time_second;
 861
 862         if (sess->s_ttyp && sess->s_ttyp->t_session) {
 863 #ifdef TTY_DO_FULL_CLOSE
 864                 /* FULL CLOSE, see ttyclearsession() */
 865                 KKASSERT(sess->s_ttyp->t_session == sess);
 866                 sess->s_ttyp->t_session = NULL;
 867 #else
 868                 /* HALF CLOSE, see ttyclearsession() */
 869                 if (sess->s_ttyp->t_session == sess)
 870                         sess->s_ttyp->t_session = NULL;
 871 #endif
 872         }
 873         if ((tp = sess->s_ttyp) != NULL) {
 874                 sess->s_ttyp = NULL;
 875                 ttyunhold(tp);
 876         }
 877         lwkt_reltoken(&prg->proc_token);
 878         lwkt_reltoken(&tty_token);
 879
 880         kfree(sess, M_SESSION);
 881 }
 882
 883 /*
 884  * Adjust pgrp jobc counters when specified process changes process group.
 885  * We count the number of processes in each process group that "qualify"
 886  * the group for terminal job control (those with a parent in a different
 887  * process group of the same session).  If that count reaches zero, the
 888  * process group becomes orphaned.  Check both the specified process'
 889  * process group and that of its children.
 890  * entering == 0 => p is leaving specified group.
 891  * entering == 1 => p is entering specified group.
 892  *
 893  * No requirements.
 894  */
 895 void
 896 fixjobc(struct proc *p, struct pgrp *pgrp, int entering)
 897 {
 898         struct pgrp *hispgrp;
 899         struct session *mysession;
 900         struct proc *np;
 901
 902         /*
 903          * Check p's parent to see whether p qualifies its own process
 904          * group; if so, adjust count for p's process group.
 905          */
 906         lwkt_gettoken(&p->p_token);     /* p_children scan */
 907         lwkt_gettoken(&pgrp->pg_token);
 908
 909         mysession = pgrp->pg_session;
 910         if ((hispgrp = p->p_pptr->p_pgrp) != pgrp &&
 911             hispgrp->pg_session == mysession) {
 912                 if (entering)
 913                         pgrp->pg_jobc++;
 914                 else if (--pgrp->pg_jobc == 0)
 915                         orphanpg(pgrp);
 916         }
 917
 918         /*
 919          * Check this process' children to see whether they qualify
 920          * their process groups; if so, adjust counts for children's
 921          * process groups.
 922          */
 923         LIST_FOREACH(np, &p->p_children, p_sibling) {
 924                 PHOLD(np);
 925                 lwkt_gettoken(&np->p_token);
 926                 if ((hispgrp = np->p_pgrp) != pgrp &&
 927                     hispgrp->pg_session == mysession &&
 928                     np->p_stat != SZOMB) {
 929                         pgref(hispgrp);
 930                         lwkt_gettoken(&hispgrp->pg_token);
 931                         if (entering)
 932                                 hispgrp->pg_jobc++;
 933                         else if (--hispgrp->pg_jobc == 0)
 934                                 orphanpg(hispgrp);
 935                         lwkt_reltoken(&hispgrp->pg_token);
 936                         pgrel(hispgrp);
 937                 }
 938                 lwkt_reltoken(&np->p_token);
 939                 PRELE(np);
 940         }
 941         KKASSERT(pgrp->pg_refs > 0);
 942         lwkt_reltoken(&pgrp->pg_token);
 943         lwkt_reltoken(&p->p_token);
 944 }
 945
 946 /*
 947  * A process group has become orphaned;
 948  * if there are any stopped processes in the group,
 949  * hang-up all process in that group.
 950  *
 951  * The caller must hold pg_token.
 952  */
 953 static void
 954 orphanpg(struct pgrp *pg)
 955 {
 956         struct proc *p;
 957
 958         LIST_FOREACH(p, &pg->pg_members, p_pglist) {
 959                 if (p->p_stat == SSTOP) {
 960                         LIST_FOREACH(p, &pg->pg_members, p_pglist) {
 961                                 ksignal(p, SIGHUP);
 962                                 ksignal(p, SIGCONT);
 963                         }
 964                         return;
 965                 }
 966         }
 967 }
 968
 969 /*
 970  * Add a new process to the allproc list and the PID hash.  This
 971  * also assigns a pid to the new process.
 972  *
 973  * No requirements.
 974  */
 975 void
 976 proc_add_allproc(struct proc *p)
 977 {
 978         int random_offset;
 979
 980         if ((random_offset = randompid) != 0) {
 981                 read_random(&random_offset, sizeof(random_offset));
 982                 random_offset = (random_offset & 0x7FFFFFFF) % randompid;
 983         }
 984         proc_makepid(p, random_offset);
 985 }
 986
 987 /*
 988  * Calculate a new process pid.  This function is integrated into
 989  * proc_add_allproc() to guarentee that the new pid is not reused before
 990  * the new process can be added to the allproc list.
 991  *
 992  * p_pid is assigned and the process is added to the allproc hash table
 993  *
 994  * WARNING! We need to allocate PIDs sequentially during early boot.
 995  *          In particular, init needs to have a pid of 1.
 996  */
 997 static
 998 void
 999 proc_makepid(struct proc *p, int random_offset)
1000 {
1001         static pid_t nextpid = 1;       /* heuristic, allowed to race */
1002         procglob_t *prg;
1003         struct pgrp *pg;
1004         struct proc *ps;
1005         struct session *sess;
1006         pid_t base;
1007         int8_t delta8;
1008         int retries;
1009         int n;
1010
1011         /*
1012          * Select the next pid base candidate.
1013          *
1014          * Check cyclement, do not allow a pid < 100.
1015          */
1016         retries = 0;
1017 retry:
1018         base = atomic_fetchadd_int(&nextpid, 1) + random_offset;
1019         if (base <= 0 || base >= PID_MAX) {
1020                 base = base % PID_MAX;
1021                 if (base < 0)
1022                         base = 100;
1023                 if (base < 100)
1024                         base += 100;
1025                 nextpid = base;         /* reset (SMP race ok) */
1026         }
1027
1028         /*
1029          * Do not allow a base pid to be selected from a domain that has
1030          * recently seen a pid/pgid/sessid reap.  Sleep a little if we looped
1031          * through all available domains.
1032          *
1033          * WARNING: We want the early pids to be allocated linearly,
1034          *          particularly pid 1 and pid 2.
1035          */
1036         if (++retries >= PIDSEL_DOMAINS)
1037                 tsleep(&nextpid, 0, "makepid", 1);
1038         if (base >= 100) {
1039                 delta8 = (int8_t)time_second -
1040                          (int8_t)pid_doms[base % PIDSEL_DOMAINS];
1041                 if (delta8 >= 0 && delta8 <= PIDDOM_DELAY) {
1042                         ++pid_domain_skips;
1043                         goto retry;
1044                 }
1045         }
1046
1047         /*
1048          * Calculate a hash index and find an unused process id within
1049          * the table, looping if we cannot find one.
1050          *
1051          * The inner loop increments by ALLPROC_HSIZE which keeps the
1052          * PID at the same pid_doms[] index as well as the same hash index.
1053          */
1054         n = ALLPROC_HASH(base);
1055         prg = &procglob[n];
1056         lwkt_gettoken(&prg->proc_token);
1057
1058 restart1:
1059         LIST_FOREACH(ps, &prg->allproc, p_list) {
1060                 if (ps->p_pid == base) {
1061                         base += ALLPROC_HSIZE;
1062                         if (base >= PID_MAX) {
1063                                 lwkt_reltoken(&prg->proc_token);
1064                                 goto retry;
1065                         }
1066                         ++pid_inner_skips;
1067                         goto restart1;
1068                 }
1069         }
1070         LIST_FOREACH(pg, &prg->allpgrp, pg_list) {
1071                 if (pg->pg_id == base) {
1072                         base += ALLPROC_HSIZE;
1073                         if (base >= PID_MAX) {
1074                                 lwkt_reltoken(&prg->proc_token);
1075                                 goto retry;
1076                         }
1077                         ++pid_inner_skips;
1078                         goto restart1;
1079                 }
1080         }
1081         LIST_FOREACH(sess, &prg->allsess, s_list) {
1082                 if (sess->s_sid == base) {
1083                         base += ALLPROC_HSIZE;
1084                         if (base >= PID_MAX) {
1085                                 lwkt_reltoken(&prg->proc_token);
1086                                 goto retry;
1087                         }
1088                         ++pid_inner_skips;
1089                         goto restart1;
1090                 }
1091         }
1092
1093         /*
1094          * Assign the pid and insert the process.
1095          */
1096         p->p_pid = base;
1097         LIST_INSERT_HEAD(&prg->allproc, p, p_list);
1098         lwkt_reltoken(&prg->proc_token);
1099 }
1100
1101 /*
1102  * Called from exit1 to place the process into a zombie state.
1103  * The process is removed from the pid hash and p_stat is set
1104  * to SZOMB.  Normal pfind[n]() calls will not find it any more.
1105  *
1106  * Caller must hold p->p_token.  We are required to wait until p_lock
1107  * becomes zero before we can manipulate the list, allowing allproc
1108  * scans to guarantee consistency during a list scan.
1109  */
1110 void
1111 proc_move_allproc_zombie(struct proc *p)
1112 {
1113         procglob_t *prg;
1114         int n;
1115
1116         n = ALLPROC_HASH(p->p_pid);
1117         prg = &procglob[n];
1118         PSTALL(p, "reap1", 0);
1119         lwkt_gettoken(&prg->proc_token);
1120
1121         PSTALL(p, "reap1a", 0);
1122         p->p_stat = SZOMB;
1123
1124         lwkt_reltoken(&prg->proc_token);
1125         dsched_exit_proc(p);
1126 }
1127
1128 /*
1129  * This routine is called from kern_wait() and will remove the process
1130  * from the zombie list and the sibling list.  This routine will block
1131  * if someone has a lock on the proces (p_lock).
1132  *
1133  * Caller must hold p->p_token.  We are required to wait until p_lock
1134  * becomes zero before we can manipulate the list, allowing allproc
1135  * scans to guarantee consistency during a list scan.
1136  */
1137 void
1138 proc_remove_zombie(struct proc *p)
1139 {
1140         procglob_t *prg;
1141         int n;
1142
1143         n = ALLPROC_HASH(p->p_pid);
1144         prg = &procglob[n];
1145
1146         PSTALL(p, "reap2", 0);
1147         lwkt_gettoken(&prg->proc_token);
1148         PSTALL(p, "reap2a", 0);
1149         LIST_REMOVE(p, p_list);         /* from remove master list */
1150         LIST_REMOVE(p, p_sibling);      /* and from sibling list */
1151         p->p_pptr = NULL;
1152         if (pid_doms[p->p_pid % PIDSEL_DOMAINS] != (uint8_t)time_second)
1153                 pid_doms[p->p_pid % PIDSEL_DOMAINS] = (uint8_t)time_second;
1154         lwkt_reltoken(&prg->proc_token);
1155 }
1156
1157 /*
1158  * Handle various requirements prior to returning to usermode.  Called from
1159  * platform trap and system call code.
1160  */
1161 void
1162 lwpuserret(struct lwp *lp)
1163 {
1164         struct proc *p = lp->lwp_proc;
1165
1166         if (lp->lwp_mpflags & LWP_MP_VNLRU) {
1167                 atomic_clear_int(&lp->lwp_mpflags, LWP_MP_VNLRU);
1168                 allocvnode_gc();
1169         }
1170         if (lp->lwp_mpflags & LWP_MP_WEXIT) {
1171                 lwkt_gettoken(&p->p_token);
1172                 lwp_exit(0, NULL);
1173                 lwkt_reltoken(&p->p_token);     /* NOT REACHED */
1174         }
1175 }
1176
1177 /*
1178  * Kernel threads run from user processes can also accumulate deferred
1179  * actions which need to be acted upon.  Callers include:
1180  *
1181  * nfsd         - Can allocate lots of vnodes
1182  */
1183 void
1184 lwpkthreaddeferred(void)
1185 {
1186         struct lwp *lp = curthread->td_lwp;
1187
1188         if (lp) {
1189                 if (lp->lwp_mpflags & LWP_MP_VNLRU) {
1190                         atomic_clear_int(&lp->lwp_mpflags, LWP_MP_VNLRU);
1191                         allocvnode_gc();
1192                 }
1193         }
1194 }
1195
1196 void
1197 proc_usermap(struct proc *p, int invfork)
1198 {
1199         struct sys_upmap *upmap;
1200
1201         lwkt_gettoken(&p->p_token);
1202         upmap = kmalloc(roundup2(sizeof(*upmap), PAGE_SIZE), M_PROC,
1203                         M_WAITOK | M_ZERO);
1204         if (p->p_upmap == NULL) {
1205                 upmap->header[0].type = UKPTYPE_VERSION;
1206                 upmap->header[0].offset = offsetof(struct sys_upmap, version);
1207                 upmap->header[1].type = UPTYPE_RUNTICKS;
1208                 upmap->header[1].offset = offsetof(struct sys_upmap, runticks);
1209                 upmap->header[2].type = UPTYPE_FORKID;
1210                 upmap->header[2].offset = offsetof(struct sys_upmap, forkid);
1211                 upmap->header[3].type = UPTYPE_PID;
1212                 upmap->header[3].offset = offsetof(struct sys_upmap, pid);
1213                 upmap->header[4].type = UPTYPE_PROC_TITLE;
1214                 upmap->header[4].offset = offsetof(struct sys_upmap,proc_title);
1215                 upmap->header[5].type = UPTYPE_INVFORK;
1216                 upmap->header[5].offset = offsetof(struct sys_upmap, invfork);
1217
1218                 upmap->version = UPMAP_VERSION;
1219                 upmap->pid = p->p_pid;
1220                 upmap->forkid = p->p_forkid;
1221                 upmap->invfork = invfork;
1222                 p->p_upmap = upmap;
1223         } else {
1224                 kfree(upmap, M_PROC);
1225         }
1226         lwkt_reltoken(&p->p_token);
1227 }
1228
1229 void
1230 proc_userunmap(struct proc *p)
1231 {
1232         struct sys_upmap *upmap;
1233
1234         lwkt_gettoken(&p->p_token);
1235         if ((upmap = p->p_upmap) != NULL) {
1236                 p->p_upmap = NULL;
1237                 kfree(upmap, M_PROC);
1238         }
1239         lwkt_reltoken(&p->p_token);
1240 }
1241
1242 /*
1243  * Scan all processes on the allproc list.  The process is automatically
1244  * held for the callback.  A return value of -1 terminates the loop.
1245  * Zombie procs are skipped.
1246  *
1247  * The callback is made with the process held and proc_token held.
1248  *
1249  * We limit the scan to the number of processes as-of the start of
1250  * the scan so as not to get caught up in an endless loop if new processes
1251  * are created more quickly than we can scan the old ones.  Add a little
1252  * slop to try to catch edge cases since nprocs can race.
1253  *
1254  * No requirements.
1255  */
1256 void
1257 allproc_scan(int (*callback)(struct proc *, void *), void *data, int segmented)
1258 {
1259         int limit = nprocs + ncpus;
1260         struct proc *p;
1261         int ns;
1262         int ne;
1263         int r;
1264         int n;
1265
1266         if (segmented) {
1267                 int id = mycpu->gd_cpuid;
1268                 ns = id * ALLPROC_HSIZE / ncpus;
1269                 ne = (id + 1) * ALLPROC_HSIZE / ncpus;
1270         } else {
1271                 ns = 0;
1272                 ne = ALLPROC_HSIZE;
1273         }
1274
1275         /*
1276          * prg->proc_token protects the allproc list and PHOLD() prevents the
1277          * process from being removed from the allproc list or the zombproc
1278          * list.
1279          */
1280         for (n = ns; n < ne; ++n) {
1281                 procglob_t *prg = &procglob[n];
1282                 if (LIST_FIRST(&prg->allproc) == NULL)
1283                         continue;
1284                 lwkt_gettoken(&prg->proc_token);
1285                 LIST_FOREACH(p, &prg->allproc, p_list) {
1286                         if (p->p_stat == SZOMB)
1287                                 continue;
1288                         PHOLD(p);
1289                         r = callback(p, data);
1290                         PRELE(p);
1291                         if (r < 0)
1292                                 break;
1293                         if (--limit < 0)
1294                                 break;
1295                 }
1296                 lwkt_reltoken(&prg->proc_token);
1297
1298                 /*
1299                  * Check if asked to stop early
1300                  */
1301                 if (p)
1302                         break;
1303         }
1304 }
1305
1306 /*
1307  * Scan all lwps of processes on the allproc list.  The lwp is automatically
1308  * held for the callback.  A return value of -1 terminates the loop.
1309  *
1310  * The callback is made with the proces and lwp both held, and proc_token held.
1311  *
1312  * No requirements.
1313  */
1314 void
1315 alllwp_scan(int (*callback)(struct lwp *, void *), void *data, int segmented)
1316 {
1317         struct proc *p;
1318         struct lwp *lp;
1319         int ns;
1320         int ne;
1321         int r = 0;
1322         int n;
1323
1324         if (segmented) {
1325                 int id = mycpu->gd_cpuid;
1326                 ns = id * ALLPROC_HSIZE / ncpus;
1327                 ne = (id + 1) * ALLPROC_HSIZE / ncpus;
1328         } else {
1329                 ns = 0;
1330                 ne = ALLPROC_HSIZE;
1331         }
1332
1333         for (n = ns; n < ne; ++n) {
1334                 procglob_t *prg = &procglob[n];
1335
1336                 if (LIST_FIRST(&prg->allproc) == NULL)
1337                         continue;
1338                 lwkt_gettoken(&prg->proc_token);
1339                 LIST_FOREACH(p, &prg->allproc, p_list) {
1340                         if (p->p_stat == SZOMB)
1341                                 continue;
1342                         PHOLD(p);
1343                         lwkt_gettoken(&p->p_token);
1344                         FOREACH_LWP_IN_PROC(lp, p) {
1345                                 LWPHOLD(lp);
1346                                 r = callback(lp, data);
1347                                 LWPRELE(lp);
1348                         }
1349                         lwkt_reltoken(&p->p_token);
1350                         PRELE(p);
1351                         if (r < 0)
1352                                 break;
1353                 }
1354                 lwkt_reltoken(&prg->proc_token);
1355
1356                 /*
1357                  * Asked to exit early
1358                  */
1359                 if (p)
1360                         break;
1361         }
1362 }
1363
1364 /*
1365  * Scan all processes on the zombproc list.  The process is automatically
1366  * held for the callback.  A return value of -1 terminates the loop.
1367  *
1368  * No requirements.
1369  * The callback is made with the proces held and proc_token held.
1370  */
1371 void
1372 zombproc_scan(int (*callback)(struct proc *, void *), void *data)
1373 {
1374         struct proc *p;
1375         int r;
1376         int n;
1377
1378         /*
1379          * prg->proc_token protects the allproc list and PHOLD() prevents the
1380          * process from being removed from the allproc list or the zombproc
1381          * list.
1382          */
1383         for (n = 0; n < ALLPROC_HSIZE; ++n) {
1384                 procglob_t *prg = &procglob[n];
1385
1386                 if (LIST_FIRST(&prg->allproc) == NULL)
1387                         continue;
1388                 lwkt_gettoken(&prg->proc_token);
1389                 LIST_FOREACH(p, &prg->allproc, p_list) {
1390                         if (p->p_stat != SZOMB)
1391                                 continue;
1392                         PHOLD(p);
1393                         r = callback(p, data);
1394                         PRELE(p);
1395                         if (r < 0)
1396                                 break;
1397                 }
1398                 lwkt_reltoken(&prg->proc_token);
1399
1400                 /*
1401                  * Check if asked to stop early
1402                  */
1403                 if (p)
1404                         break;
1405         }
1406 }
1407
1408 #include "opt_ddb.h"
1409 #ifdef DDB
1410 #include <ddb/ddb.h>
1411
1412 /*
1413  * Debugging only
1414  */
1415 DB_SHOW_COMMAND(pgrpdump, pgrpdump)
1416 {
1417         struct pgrp *pgrp;
1418         struct proc *p;
1419         procglob_t *prg;
1420         int i;
1421
1422         for (i = 0; i < ALLPROC_HSIZE; ++i) {
1423                 prg = &procglob[i];
1424
1425                 if (LIST_EMPTY(&prg->allpgrp))
1426                         continue;
1427                 kprintf("\tindx %d\n", i);
1428                 LIST_FOREACH(pgrp, &prg->allpgrp, pg_list) {
1429                         kprintf("\tpgrp %p, pgid %ld, sess %p, "
1430                                 "sesscnt %d, mem %p\n",
1431                                 (void *)pgrp, (long)pgrp->pg_id,
1432                                 (void *)pgrp->pg_session,
1433                                 pgrp->pg_session->s_count,
1434                                 (void *)LIST_FIRST(&pgrp->pg_members));
1435                         LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
1436                                 kprintf("\t\tpid %ld addr %p pgrp %p\n",
1437                                         (long)p->p_pid, (void *)p,
1438                                         (void *)p->p_pgrp);
1439                         }
1440                 }
1441         }
1442 }
1443 #endif /* DDB */
1444
1445 /*
1446  * The caller must hold proc_token.
1447  */
1448 static int
1449 sysctl_out_proc(struct proc *p, struct sysctl_req *req, int flags)
1450 {
1451         struct kinfo_proc ki;
1452         struct lwp *lp;
1453         int skp = 0, had_output = 0;
1454         int error;
1455
1456         bzero(&ki, sizeof(ki));
1457         lwkt_gettoken_shared(&p->p_token);
1458         fill_kinfo_proc(p, &ki);
1459         if ((flags & KERN_PROC_FLAG_LWP) == 0)
1460                 skp = 1;
1461         error = 0;
1462         FOREACH_LWP_IN_PROC(lp, p) {
1463                 LWPHOLD(lp);
1464                 fill_kinfo_lwp(lp, &ki.kp_lwp);
1465                 had_output = 1;
1466                 error = SYSCTL_OUT(req, &ki, sizeof(ki));
1467                 LWPRELE(lp);
1468                 if (error)
1469                         break;
1470                 if (skp)
1471                         break;
1472         }
1473         lwkt_reltoken(&p->p_token);
1474         /* We need to output at least the proc, even if there is no lwp. */
1475         if (had_output == 0) {
1476                 error = SYSCTL_OUT(req, &ki, sizeof(ki));
1477         }
1478         return (error);
1479 }
1480
1481 /*
1482  * The caller must hold proc_token.
1483  */
1484 static int
1485 sysctl_out_proc_kthread(struct thread *td, struct sysctl_req *req)
1486 {
1487         struct kinfo_proc ki;
1488         int error;
1489
1490         fill_kinfo_proc_kthread(td, &ki);
1491         error = SYSCTL_OUT(req, &ki, sizeof(ki));
1492         if (error)
1493                 return error;
1494         return(0);
1495 }
1496
1497 /*
1498  * No requirements.
1499  */
1500 static int
1501 sysctl_kern_proc(SYSCTL_HANDLER_ARGS)
1502 {
1503         int *name = (int *)arg1;
1504         int oid = oidp->oid_number;
1505         u_int namelen = arg2;
1506         struct proc *p;
1507         struct thread *td;
1508         struct thread *marker;
1509         int flags = 0;
1510         int error = 0;
1511         int n;
1512         int origcpu;
1513         struct ucred *cr1 = curproc->p_ucred;
1514
1515         flags = oid & KERN_PROC_FLAGMASK;
1516         oid &= ~KERN_PROC_FLAGMASK;
1517
1518         if ((oid == KERN_PROC_ALL && namelen != 0) ||
1519             (oid != KERN_PROC_ALL && namelen != 1)) {
1520                 return (EINVAL);
1521         }
1522
1523         /*
1524          * proc_token protects the allproc list and PHOLD() prevents the
1525          * process from being removed from the allproc list or the zombproc
1526          * list.
1527          */
1528         if (oid == KERN_PROC_PID) {
1529                 p = pfind((pid_t)name[0]);
1530                 if (p) {
1531                         if (PRISON_CHECK(cr1, p->p_ucred))
1532                                 error = sysctl_out_proc(p, req, flags);
1533                         PRELE(p);
1534                 }
1535                 goto post_threads;
1536         }
1537         p = NULL;
1538
1539         if (!req->oldptr) {
1540                 /* overestimate by 5 procs */
1541                 error = SYSCTL_OUT(req, 0, sizeof (struct kinfo_proc) * 5);
1542                 if (error)
1543                         goto post_threads;
1544         }
1545
1546         for (n = 0; n < ALLPROC_HSIZE; ++n) {
1547                 procglob_t *prg = &procglob[n];
1548
1549                 if (LIST_EMPTY(&prg->allproc))
1550                         continue;
1551                 lwkt_gettoken_shared(&prg->proc_token);
1552                 LIST_FOREACH(p, &prg->allproc, p_list) {
1553                         /*
1554                          * Show a user only their processes.
1555                          */
1556                         if ((!ps_showallprocs) &&
1557                                 (p->p_ucred == NULL || p_trespass(cr1, p->p_ucred))) {
1558                                 continue;
1559                         }
1560                         /*
1561                          * Skip embryonic processes.
1562                          */
1563                         if (p->p_stat == SIDL)
1564                                 continue;
1565                         /*
1566                          * TODO - make more efficient (see notes below).
1567                          * do by session.
1568                          */
1569                         switch (oid) {
1570                         case KERN_PROC_PGRP:
1571                                 /* could do this by traversing pgrp */
1572                                 if (p->p_pgrp == NULL ||
1573                                     p->p_pgrp->pg_id != (pid_t)name[0])
1574                                         continue;
1575                                 break;
1576
1577                         case KERN_PROC_TTY:
1578                                 if ((p->p_flags & P_CONTROLT) == 0 ||
1579                                     p->p_session == NULL ||
1580                                     p->p_session->s_ttyp == NULL ||
1581                                     dev2udev(p->p_session->s_ttyp->t_dev) !=
1582                                         (udev_t)name[0])
1583                                         continue;
1584                                 break;
1585
1586                         case KERN_PROC_UID:
1587                                 if (p->p_ucred == NULL ||
1588                                     p->p_ucred->cr_uid != (uid_t)name[0])
1589                                         continue;
1590                                 break;
1591
1592                         case KERN_PROC_RUID:
1593                                 if (p->p_ucred == NULL ||
1594                                     p->p_ucred->cr_ruid != (uid_t)name[0])
1595                                         continue;
1596                                 break;
1597                         }
1598
1599                         if (!PRISON_CHECK(cr1, p->p_ucred))
1600                                 continue;
1601                         PHOLD(p);
1602                         error = sysctl_out_proc(p, req, flags);
1603                         PRELE(p);
1604                         if (error) {
1605                                 lwkt_reltoken(&prg->proc_token);
1606                                 goto post_threads;
1607                         }
1608                 }
1609                 lwkt_reltoken(&prg->proc_token);
1610         }
1611
1612         /*
1613          * Iterate over all active cpus and scan their thread list.  Start
1614          * with the next logical cpu and end with our original cpu.  We
1615          * migrate our own thread to each target cpu in order to safely scan
1616          * its thread list.  In the last loop we migrate back to our original
1617          * cpu.
1618          */
1619         origcpu = mycpu->gd_cpuid;
1620         if (!ps_showallthreads || jailed(cr1))
1621                 goto post_threads;
1622
1623         marker = kmalloc(sizeof(struct thread), M_TEMP, M_WAITOK|M_ZERO);
1624         marker->td_flags = TDF_MARKER;
1625         error = 0;
1626
1627         for (n = 1; n <= ncpus; ++n) {
1628                 globaldata_t rgd;
1629                 int nid;
1630
1631                 nid = (origcpu + n) % ncpus;
1632                 if (CPUMASK_TESTBIT(smp_active_mask, nid) == 0)
1633                         continue;
1634                 rgd = globaldata_find(nid);
1635                 lwkt_setcpu_self(rgd);
1636
1637                 crit_enter();
1638                 TAILQ_INSERT_TAIL(&rgd->gd_tdallq, marker, td_allq);
1639
1640                 while ((td = TAILQ_PREV(marker, lwkt_queue, td_allq)) != NULL) {
1641                         TAILQ_REMOVE(&rgd->gd_tdallq, marker, td_allq);
1642                         TAILQ_INSERT_BEFORE(td, marker, td_allq);
1643                         if (td->td_flags & TDF_MARKER)
1644                                 continue;
1645                         if (td->td_proc)
1646                                 continue;
1647
1648                         lwkt_hold(td);
1649                         crit_exit();
1650
1651                         switch (oid) {
1652                         case KERN_PROC_PGRP:
1653                         case KERN_PROC_TTY:
1654                         case KERN_PROC_UID:
1655                         case KERN_PROC_RUID:
1656                                 break;
1657                         default:
1658                                 error = sysctl_out_proc_kthread(td, req);
1659                                 break;
1660                         }
1661                         lwkt_rele(td);
1662                         crit_enter();
1663                         if (error)
1664                                 break;
1665                 }
1666                 TAILQ_REMOVE(&rgd->gd_tdallq, marker, td_allq);
1667                 crit_exit();
1668
1669                 if (error)
1670                         break;
1671         }
1672
1673         /*
1674          * Userland scheduler expects us to return on the same cpu we
1675          * started on.
1676          */
1677         if (mycpu->gd_cpuid != origcpu)
1678                 lwkt_setcpu_self(globaldata_find(origcpu));
1679
1680         kfree(marker, M_TEMP);
1681
1682 post_threads:
1683         return (error);
1684 }
1685
1686 /*
1687  * This sysctl allows a process to retrieve the argument list or process
1688  * title for another process without groping around in the address space
1689  * of the other process.  It also allow a process to set its own "process
1690  * title to a string of its own choice.
1691  *
1692  * No requirements.
1693  */
1694 static int
1695 sysctl_kern_proc_args(SYSCTL_HANDLER_ARGS)
1696 {
1697         int *name = (int*) arg1;
1698         u_int namelen = arg2;
1699         struct proc *p;
1700         struct pargs *opa;
1701         struct pargs *pa;
1702         int error = 0;
1703         struct ucred *cr1 = curproc->p_ucred;
1704
1705         if (namelen != 1)
1706                 return (EINVAL);
1707
1708         p = pfind((pid_t)name[0]);
1709         if (p == NULL)
1710                 goto done;
1711         lwkt_gettoken(&p->p_token);
1712
1713         if ((!ps_argsopen) && p_trespass(cr1, p->p_ucred))
1714                 goto done;
1715
1716         if (req->newptr && curproc != p) {
1717                 error = EPERM;
1718                 goto done;
1719         }
1720         if (req->oldptr) {
1721                 if (p->p_upmap != NULL && p->p_upmap->proc_title[0]) {
1722                         /*
1723                          * Args set via writable user process mmap.
1724                          * We must calculate the string length manually
1725                          * because the user data can change at any time.
1726                          */
1727                         size_t n;
1728                         char *base;
1729
1730                         base = p->p_upmap->proc_title;
1731                         for (n = 0; n < UPMAP_MAXPROCTITLE - 1; ++n) {
1732                                 if (base[n] == 0)
1733                                         break;
1734                         }
1735                         error = SYSCTL_OUT(req, base, n);
1736                         if (error == 0)
1737                                 error = SYSCTL_OUT(req, "", 1);
1738                 } else if ((pa = p->p_args) != NULL) {
1739                         /*
1740                          * Args set by setproctitle() sysctl.
1741                          */
1742                         refcount_acquire(&pa->ar_ref);
1743                         error = SYSCTL_OUT(req, pa->ar_args, pa->ar_length);
1744                         if (refcount_release(&pa->ar_ref))
1745                                 kfree(pa, M_PARGS);
1746                 }
1747         }
1748         if (req->newptr == NULL)
1749                 goto done;
1750
1751         if (req->newlen + sizeof(struct pargs) > ps_arg_cache_limit) {
1752                 goto done;
1753         }
1754
1755         pa = kmalloc(sizeof(struct pargs) + req->newlen, M_PARGS, M_WAITOK);
1756         refcount_init(&pa->ar_ref, 1);
1757         pa->ar_length = req->newlen;
1758         error = SYSCTL_IN(req, pa->ar_args, req->newlen);
1759         if (error) {
1760                 kfree(pa, M_PARGS);
1761                 goto done;
1762         }
1763
1764
1765         /*
1766          * Replace p_args with the new pa.  p_args may have previously
1767          * been NULL.
1768          */
1769         opa = p->p_args;
1770         p->p_args = pa;
1771
1772         if (opa) {
1773                 KKASSERT(opa->ar_ref > 0);
1774                 if (refcount_release(&opa->ar_ref)) {
1775                         kfree(opa, M_PARGS);
1776                         /* opa = NULL; */
1777                 }
1778         }
1779 done:
1780         if (p) {
1781                 lwkt_reltoken(&p->p_token);
1782                 PRELE(p);
1783         }
1784         return (error);
1785 }
1786
1787 static int
1788 sysctl_kern_proc_cwd(SYSCTL_HANDLER_ARGS)
1789 {
1790         int *name = (int*) arg1;
1791         u_int namelen = arg2;
1792         struct proc *p;
1793         int error = 0;
1794         char *fullpath, *freepath;
1795         struct ucred *cr1 = curproc->p_ucred;
1796
1797         if (namelen != 1)
1798                 return (EINVAL);
1799
1800         p = pfind((pid_t)name[0]);
1801         if (p == NULL)
1802                 goto done;
1803         lwkt_gettoken_shared(&p->p_token);
1804
1805         /*
1806          * If we are not allowed to see other args, we certainly shouldn't
1807          * get the cwd either. Also check the usual trespassing.
1808          */
1809         if ((!ps_argsopen) && p_trespass(cr1, p->p_ucred))
1810                 goto done;
1811
1812         if (req->oldptr && p->p_fd != NULL && p->p_fd->fd_ncdir.ncp) {
1813                 struct nchandle nch;
1814
1815                 cache_copy(&p->p_fd->fd_ncdir, &nch);
1816                 error = cache_fullpath(p, &nch, NULL,
1817                                        &fullpath, &freepath, 0);
1818                 cache_drop(&nch);
1819                 if (error)
1820                         goto done;
1821                 error = SYSCTL_OUT(req, fullpath, strlen(fullpath) + 1);
1822                 kfree(freepath, M_TEMP);
1823         }
1824
1825 done:
1826         if (p) {
1827                 lwkt_reltoken(&p->p_token);
1828                 PRELE(p);
1829         }
1830         return (error);
1831 }
1832
1833 /*
1834  * This sysctl allows a process to retrieve the path of the executable for
1835  * itself or another process.
1836  */
1837 static int
1838 sysctl_kern_proc_pathname(SYSCTL_HANDLER_ARGS)
1839 {
1840         pid_t *pidp = (pid_t *)arg1;
1841         unsigned int arglen = arg2;
1842         struct proc *p;
1843         char *retbuf, *freebuf;
1844         int error = 0;
1845         struct nchandle nch;
1846
1847         if (arglen != 1)
1848                 return (EINVAL);
1849         if (*pidp == -1) {      /* -1 means this process */
1850                 p = curproc;
1851         } else {
1852                 p = pfind(*pidp);
1853                 if (p == NULL)
1854                         return (ESRCH);
1855         }
1856
1857         cache_copy(&p->p_textnch, &nch);
1858         error = cache_fullpath(p, &nch, NULL, &retbuf, &freebuf, 0);
1859         cache_drop(&nch);
1860         if (error)
1861                 goto done;
1862         error = SYSCTL_OUT(req, retbuf, strlen(retbuf) + 1);
1863         kfree(freebuf, M_TEMP);
1864 done:
1865         if (*pidp != -1)
1866                 PRELE(p);
1867
1868         return (error);
1869 }
1870
1871 static int
1872 sysctl_kern_proc_sigtramp(SYSCTL_HANDLER_ARGS)
1873 {
1874         /*int *name = (int *)arg1;*/
1875         u_int namelen = arg2;
1876         struct kinfo_sigtramp kst;
1877         const struct sysentvec *sv;
1878         int error;
1879
1880         if (namelen > 1)
1881                 return (EINVAL);
1882         /* ignore pid if passed in (freebsd compatibility) */
1883
1884         sv = curproc->p_sysent;
1885         bzero(&kst, sizeof(kst));
1886         if (sv->sv_szsigcode) {
1887                 intptr_t sigbase;
1888
1889                 sigbase = trunc_page64((intptr_t)PS_STRINGS -
1890                                        *sv->sv_szsigcode);
1891                 sigbase -= SZSIGCODE_EXTRA_BYTES;
1892
1893                 kst.ksigtramp_start = (void *)sigbase;
1894                 kst.ksigtramp_end = (void *)(sigbase + *sv->sv_szsigcode);
1895         }
1896         error = SYSCTL_OUT(req, &kst, sizeof(kst));
1897
1898         return (error);
1899 }
1900
1901 SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD,  0, "Process table");
1902
1903 SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT,
1904         0, 0, sysctl_kern_proc, "S,proc", "Return entire process table");
1905
1906 SYSCTL_NODE(_kern_proc, KERN_PROC_PGRP, pgrp, CTLFLAG_RD,
1907         sysctl_kern_proc, "Process table");
1908
1909 SYSCTL_NODE(_kern_proc, KERN_PROC_TTY, tty, CTLFLAG_RD,
1910         sysctl_kern_proc, "Process table");
1911
1912 SYSCTL_NODE(_kern_proc, KERN_PROC_UID, uid, CTLFLAG_RD,
1913         sysctl_kern_proc, "Process table");
1914
1915 SYSCTL_NODE(_kern_proc, KERN_PROC_RUID, ruid, CTLFLAG_RD,
1916         sysctl_kern_proc, "Process table");
1917
1918 SYSCTL_NODE(_kern_proc, KERN_PROC_PID, pid, CTLFLAG_RD,
1919         sysctl_kern_proc, "Process table");
1920
1921 SYSCTL_NODE(_kern_proc, (KERN_PROC_ALL | KERN_PROC_FLAG_LWP), all_lwp, CTLFLAG_RD,
1922         sysctl_kern_proc, "Process table");
1923
1924 SYSCTL_NODE(_kern_proc, (KERN_PROC_PGRP | KERN_PROC_FLAG_LWP), pgrp_lwp, CTLFLAG_RD,
1925         sysctl_kern_proc, "Process table");
1926
1927 SYSCTL_NODE(_kern_proc, (KERN_PROC_TTY | KERN_PROC_FLAG_LWP), tty_lwp, CTLFLAG_RD,
1928         sysctl_kern_proc, "Process table");
1929
1930 SYSCTL_NODE(_kern_proc, (KERN_PROC_UID | KERN_PROC_FLAG_LWP), uid_lwp, CTLFLAG_RD,
1931         sysctl_kern_proc, "Process table");
1932
1933 SYSCTL_NODE(_kern_proc, (KERN_PROC_RUID | KERN_PROC_FLAG_LWP), ruid_lwp, CTLFLAG_RD,
1934         sysctl_kern_proc, "Process table");
1935
1936 SYSCTL_NODE(_kern_proc, (KERN_PROC_PID | KERN_PROC_FLAG_LWP), pid_lwp, CTLFLAG_RD,
1937         sysctl_kern_proc, "Process table");
1938
1939 SYSCTL_NODE(_kern_proc, KERN_PROC_ARGS, args, CTLFLAG_RW | CTLFLAG_ANYBODY,
1940         sysctl_kern_proc_args, "Process argument list");
1941
1942 SYSCTL_NODE(_kern_proc, KERN_PROC_CWD, cwd, CTLFLAG_RD | CTLFLAG_ANYBODY,
1943         sysctl_kern_proc_cwd, "Process argument list");
1944
1945 static SYSCTL_NODE(_kern_proc, KERN_PROC_PATHNAME, pathname, CTLFLAG_RD,
1946         sysctl_kern_proc_pathname, "Process executable path");
1947
1948 SYSCTL_PROC(_kern_proc, KERN_PROC_SIGTRAMP, sigtramp, CTLFLAG_RD|CTLTYPE_STRUCT,
1949         0, 0, sysctl_kern_proc_sigtramp, "S,sigtramp",
1950         "Return sigtramp address range");