Unleashed v1.4
[unleashed.git] / kernel / os / pid.c
blob93bd9369c0b06d5cac2c1e105fe0495adfbb894e
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <sys/sysmacros.h>
32 #include <sys/proc.h>
33 #include <sys/kmem.h>
34 #include <sys/tuneable.h>
35 #include <sys/var.h>
36 #include <sys/cred.h>
37 #include <sys/systm.h>
38 #include <sys/prsystm.h>
39 #include <sys/vnode.h>
40 #include <sys/session.h>
41 #include <sys/cpuvar.h>
42 #include <sys/cmn_err.h>
43 #include <sys/bitmap.h>
44 #include <sys/debug.h>
45 #include <sys/project.h>
46 #include <sys/task.h>
47 #include <sys/zone.h>
49 /* directory entries for /proc */
50 union procent {
51 proc_t *pe_proc;
52 union procent *pe_next;
55 struct pid pid0 = {
56 0, /* pid_prinactive */
57 1, /* pid_pgorphaned */
58 0, /* pid_padding */
59 0, /* pid_prslot */
60 0, /* pid_id */
61 NULL, /* pid_pglink */
62 NULL, /* pid_pgtail */
63 NULL, /* pid_link */
64 3 /* pid_ref */
67 static int pid_hashlen = 4; /* desired average hash chain length */
68 static int pid_hashsz; /* number of buckets in the hash table */
70 #define HASHPID(pid) (pidhash[((pid)&(pid_hashsz-1))])
72 extern uint_t nproc;
73 extern struct kmem_cache *process_cache;
74 static void upcount_init(void);
76 kmutex_t pidlock; /* global process lock */
77 kmutex_t pr_pidlock; /* /proc global process lock */
78 kcondvar_t *pr_pid_cv; /* for /proc, one per process slot */
79 struct plock *proc_lock; /* persistent array of p_lock's */
82 * See the comment above pid_getlockslot() for a detailed explanation of this
83 * constant. Note that a PLOCK_SHIFT of 3 implies 64-byte coherence
84 * granularity; if the coherence granularity is ever changed, this constant
85 * should be modified to reflect the change to minimize proc_lock false
86 * sharing (correctness, however, is guaranteed regardless of the coherence
87 * granularity).
89 #define PLOCK_SHIFT 3
91 static kmutex_t pidlinklock;
92 static struct pid **pidhash;
93 static pid_t minpid;
94 static pid_t mpid = FAMOUS_PIDS; /* one more than the last famous pid */
95 static union procent *procdir;
96 static union procent *procentfree;
98 static struct pid *
99 pid_lookup(pid_t pid)
101 struct pid *pidp;
103 ASSERT(MUTEX_HELD(&pidlinklock));
105 for (pidp = HASHPID(pid); pidp; pidp = pidp->pid_link) {
106 if (pidp->pid_id == pid) {
107 ASSERT(pidp->pid_ref > 0);
108 break;
111 return (pidp);
114 void
115 pid_setmin(void)
117 if (jump_pid && jump_pid > mpid)
118 minpid = mpid = jump_pid;
119 else
120 minpid = mpid;
124 * When prslots are simply used as an index to determine a process' p_lock,
125 * adjacent prslots share adjacent p_locks. On machines where the size
126 * of a mutex is smaller than that of a cache line (which, as of this writing,
127 * is true for all machines on which Solaris runs), this can potentially
128 * induce false sharing. The standard solution for false sharing is to pad
129 * out one's data structures (in this case, struct plock). However,
130 * given the size and (generally) sparse use of the proc_lock array, this
131 * is suboptimal. We therefore stride through the proc_lock array with
132 * a stride of PLOCK_SHIFT. PLOCK_SHIFT should be defined as:
134 * log_2 (coherence_granularity / sizeof (kmutex_t))
136 * Under this scheme, false sharing is still possible -- but only when
137 * the number of active processes is very large. Note that the one-to-one
138 * mapping between prslots and lockslots is maintained.
140 static int
141 pid_getlockslot(int prslot)
143 int even = (v.v_proc >> PLOCK_SHIFT) << PLOCK_SHIFT;
144 int perlap = even >> PLOCK_SHIFT;
146 if (prslot >= even)
147 return (prslot);
149 return (((prslot % perlap) << PLOCK_SHIFT) + (prslot / perlap));
153 * This function allocates a pid structure, a free pid, and optionally a
154 * slot in the proc table for it.
156 * pid_allocate() returns the new pid on success, -1 on failure.
158 pid_t
159 pid_allocate(proc_t *prp, pid_t pid, int flags)
161 struct pid *pidp;
162 union procent *pep;
163 pid_t newpid, startpid;
165 pidp = kmem_zalloc(sizeof (struct pid), KM_SLEEP);
167 mutex_enter(&pidlinklock);
168 if ((flags & PID_ALLOC_PROC) && (pep = procentfree) == NULL) {
170 * ran out of /proc directory entries
172 goto failed;
175 if (pid != 0) {
176 VERIFY(minpid == 0);
177 VERIFY3P(pid, <, mpid);
178 VERIFY3P(pid_lookup(pid), ==, NULL);
179 newpid = pid;
180 } else {
182 * Allocate a pid
184 ASSERT(minpid <= mpid && mpid < maxpid);
186 startpid = mpid;
187 for (;;) {
188 newpid = mpid;
189 if (++mpid == maxpid)
190 mpid = minpid;
192 if (pid_lookup(newpid) == NULL)
193 break;
195 if (mpid == startpid)
196 goto failed;
201 * Put pid into the pid hash table.
203 pidp->pid_link = HASHPID(newpid);
204 HASHPID(newpid) = pidp;
205 pidp->pid_ref = 1;
206 pidp->pid_id = newpid;
208 if (flags & PID_ALLOC_PROC) {
209 procentfree = pep->pe_next;
210 pidp->pid_prslot = pep - procdir;
211 pep->pe_proc = prp;
212 prp->p_pidp = pidp;
213 prp->p_lockp = &proc_lock[pid_getlockslot(pidp->pid_prslot)];
214 } else {
215 pidp->pid_prslot = 0;
218 mutex_exit(&pidlinklock);
220 return (newpid);
222 failed:
223 mutex_exit(&pidlinklock);
224 kmem_free(pidp, sizeof (struct pid));
225 return (-1);
229 * decrement the reference count for pid
232 pid_rele(struct pid *pidp)
234 struct pid **pidpp;
236 mutex_enter(&pidlinklock);
237 ASSERT(pidp != &pid0);
239 pidpp = &HASHPID(pidp->pid_id);
240 for (;;) {
241 ASSERT(*pidpp != NULL);
242 if (*pidpp == pidp)
243 break;
244 pidpp = &(*pidpp)->pid_link;
247 *pidpp = pidp->pid_link;
248 mutex_exit(&pidlinklock);
250 kmem_free(pidp, sizeof (*pidp));
251 return (0);
254 void
255 proc_entry_free(struct pid *pidp)
257 mutex_enter(&pidlinklock);
258 pidp->pid_prinactive = 1;
259 procdir[pidp->pid_prslot].pe_next = procentfree;
260 procentfree = &procdir[pidp->pid_prslot];
261 mutex_exit(&pidlinklock);
265 * The original task needs to be passed in since the process has already been
266 * detached from the task at this point in time.
268 void
269 pid_exit(proc_t *prp, struct task *tk)
271 struct pid *pidp;
272 zone_t *zone = prp->p_zone;
274 ASSERT(MUTEX_HELD(&pidlock));
277 * Exit process group. If it is NULL, it's because fork failed
278 * before calling pgjoin().
280 ASSERT(prp->p_pgidp != NULL || prp->p_stat == SIDL);
281 if (prp->p_pgidp != NULL)
282 pgexit(prp);
284 sess_rele(prp->p_sessp, B_TRUE);
286 pidp = prp->p_pidp;
288 proc_entry_free(pidp);
290 if (practive == prp) {
291 practive = prp->p_next;
294 if (prp->p_next) {
295 prp->p_next->p_prev = prp->p_prev;
297 if (prp->p_prev) {
298 prp->p_prev->p_next = prp->p_next;
301 PID_RELE(pidp);
303 mutex_destroy(&prp->p_crlock);
304 kmem_cache_free(process_cache, prp);
305 nproc--;
308 * Decrement the process counts of the original task, project and zone.
310 mutex_enter(&zone->zone_nlwps_lock);
311 tk->tk_nprocs--;
312 tk->tk_proj->kpj_nprocs--;
313 zone->zone_nprocs--;
314 mutex_exit(&zone->zone_nlwps_lock);
318 * Find a process visible from the specified zone given its process ID.
320 proc_t *
321 prfind_zone(pid_t pid, zoneid_t zoneid)
323 struct pid *pidp;
324 proc_t *p;
326 ASSERT(MUTEX_HELD(&pidlock));
328 mutex_enter(&pidlinklock);
329 pidp = pid_lookup(pid);
330 mutex_exit(&pidlinklock);
331 if (pidp != NULL && pidp->pid_prinactive == 0) {
332 p = procdir[pidp->pid_prslot].pe_proc;
333 if (zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid)
334 return (p);
336 return (NULL);
340 * Find a process given its process ID. This obeys zone restrictions,
341 * so if the caller is in a non-global zone it won't find processes
342 * associated with other zones. Use prfind_zone(pid, ALL_ZONES) to
343 * bypass this restriction.
345 proc_t *
346 prfind(pid_t pid)
348 zoneid_t zoneid;
350 if (INGLOBALZONE(curproc))
351 zoneid = ALL_ZONES;
352 else
353 zoneid = getzoneid();
354 return (prfind_zone(pid, zoneid));
357 proc_t *
358 pgfind_zone(pid_t pgid, zoneid_t zoneid)
360 struct pid *pidp;
362 ASSERT(MUTEX_HELD(&pidlock));
364 mutex_enter(&pidlinklock);
365 pidp = pid_lookup(pgid);
366 mutex_exit(&pidlinklock);
367 if (pidp != NULL) {
368 proc_t *p = pidp->pid_pglink;
370 if (zoneid == ALL_ZONES || pgid == 0 || p == NULL ||
371 p->p_zone->zone_id == zoneid)
372 return (p);
374 return (NULL);
378 * return the head of the list of processes whose process group ID is 'pgid',
379 * or NULL, if no such process group
381 proc_t *
382 pgfind(pid_t pgid)
384 zoneid_t zoneid;
386 if (INGLOBALZONE(curproc))
387 zoneid = ALL_ZONES;
388 else
389 zoneid = getzoneid();
390 return (pgfind_zone(pgid, zoneid));
394 * Sets P_PR_LOCK on a non-system process. Process must be fully created
395 * and not exiting to succeed.
397 * Returns 0 on success.
398 * Returns 1 if P_PR_LOCK is set.
399 * Returns -1 if proc is in invalid state.
402 sprtrylock_proc(proc_t *p)
404 ASSERT(MUTEX_HELD(&p->p_lock));
406 /* skip system and incomplete processes */
407 if (p->p_stat == SIDL || p->p_stat == SZOMB ||
408 (p->p_flag & (SSYS | SEXITING | SEXITLWPS))) {
409 return (-1);
412 if (p->p_proc_flag & P_PR_LOCK)
413 return (1);
415 p->p_proc_flag |= P_PR_LOCK;
416 THREAD_KPRI_REQUEST();
418 return (0);
422 * Wait for P_PR_LOCK to become clear. Returns with p_lock dropped,
423 * and the proc pointer no longer valid, as the proc may have exited.
425 void
426 sprwaitlock_proc(proc_t *p)
428 kmutex_t *mp;
430 ASSERT(MUTEX_HELD(&p->p_lock));
431 ASSERT(p->p_proc_flag & P_PR_LOCK);
434 * p_lock is persistent, but p itself is not -- it could
435 * vanish during cv_wait(). Load p->p_lock now so we can
436 * drop it after cv_wait() without referencing p.
438 mp = &p->p_lock;
439 cv_wait(&pr_pid_cv[p->p_slot], mp);
440 mutex_exit(mp);
444 * If pid exists, find its proc, acquire its p_lock and mark it P_PR_LOCK.
445 * Returns the proc pointer on success, NULL on failure. sprlock() is
446 * really just a stripped-down version of pr_p_lock() to allow practive
447 * walkers like dofusers() and dumpsys() to synchronize with /proc.
449 proc_t *
450 sprlock_zone(pid_t pid, zoneid_t zoneid)
452 proc_t *p;
453 int ret;
455 for (;;) {
456 mutex_enter(&pidlock);
457 if ((p = prfind_zone(pid, zoneid)) == NULL) {
458 mutex_exit(&pidlock);
459 return (NULL);
461 mutex_enter(&p->p_lock);
462 mutex_exit(&pidlock);
464 if (panicstr)
465 return (p);
467 ret = sprtrylock_proc(p);
468 if (ret == -1) {
469 mutex_exit(&p->p_lock);
470 return (NULL);
471 } else if (ret == 0) {
472 break;
474 sprwaitlock_proc(p);
476 return (p);
479 proc_t *
480 sprlock(pid_t pid)
482 zoneid_t zoneid;
484 if (INGLOBALZONE(curproc))
485 zoneid = ALL_ZONES;
486 else
487 zoneid = getzoneid();
488 return (sprlock_zone(pid, zoneid));
491 void
492 sprlock_proc(proc_t *p)
494 ASSERT(MUTEX_HELD(&p->p_lock));
496 while (p->p_proc_flag & P_PR_LOCK) {
497 cv_wait(&pr_pid_cv[p->p_slot], &p->p_lock);
500 p->p_proc_flag |= P_PR_LOCK;
501 THREAD_KPRI_REQUEST();
504 void
505 sprunlock(proc_t *p)
507 if (panicstr) {
508 mutex_exit(&p->p_lock);
509 return;
512 ASSERT(p->p_proc_flag & P_PR_LOCK);
513 ASSERT(MUTEX_HELD(&p->p_lock));
515 cv_signal(&pr_pid_cv[p->p_slot]);
516 p->p_proc_flag &= ~P_PR_LOCK;
517 mutex_exit(&p->p_lock);
518 THREAD_KPRI_RELEASE();
521 void
522 pid_init(void)
524 int i;
526 pid_hashsz = 1 << highbit(v.v_proc / pid_hashlen);
528 pidhash = kmem_zalloc(sizeof (struct pid *) * pid_hashsz, KM_SLEEP);
529 procdir = kmem_alloc(sizeof (union procent) * v.v_proc, KM_SLEEP);
530 pr_pid_cv = kmem_zalloc(sizeof (kcondvar_t) * v.v_proc, KM_SLEEP);
531 proc_lock = kmem_zalloc(sizeof (struct plock) * v.v_proc, KM_SLEEP);
533 nproc = 1;
534 practive = proc_sched;
535 proc_sched->p_next = NULL;
536 procdir[0].pe_proc = proc_sched;
538 procentfree = &procdir[1];
539 for (i = 1; i < v.v_proc - 1; i++)
540 procdir[i].pe_next = &procdir[i+1];
541 procdir[i].pe_next = NULL;
543 HASHPID(0) = &pid0;
545 upcount_init();
548 proc_t *
549 pid_entry(int slot)
551 union procent *pep;
552 proc_t *prp;
554 ASSERT(MUTEX_HELD(&pidlock));
555 ASSERT(slot >= 0 && slot < v.v_proc);
557 pep = procdir[slot].pe_next;
558 if (pep >= procdir && pep < &procdir[v.v_proc])
559 return (NULL);
560 prp = procdir[slot].pe_proc;
561 if (prp != 0 && prp->p_stat == SIDL)
562 return (NULL);
563 return (prp);
567 * Send the specified signal to all processes whose process group ID is
568 * equal to 'pgid'
571 void
572 signal(pid_t pgid, int sig)
574 struct pid *pidp;
575 proc_t *prp;
577 mutex_enter(&pidlock);
578 mutex_enter(&pidlinklock);
579 if (pgid == 0 || (pidp = pid_lookup(pgid)) == NULL) {
580 mutex_exit(&pidlinklock);
581 mutex_exit(&pidlock);
582 return;
584 mutex_exit(&pidlinklock);
585 for (prp = pidp->pid_pglink; prp; prp = prp->p_pglink) {
586 mutex_enter(&prp->p_lock);
587 sigtoproc(prp, NULL, sig);
588 mutex_exit(&prp->p_lock);
590 mutex_exit(&pidlock);
594 * Send the specified signal to the specified process
597 void
598 prsignal(struct pid *pidp, int sig)
600 if (!(pidp->pid_prinactive))
601 psignal(procdir[pidp->pid_prslot].pe_proc, sig);
604 #include <sys/sunddi.h>
607 * DDI/DKI interfaces for drivers to send signals to processes
611 * obtain an opaque reference to a process for signaling
613 void *
614 proc_ref(void)
616 struct pid *pidp;
618 mutex_enter(&pidlock);
619 pidp = curproc->p_pidp;
620 PID_HOLD(pidp);
621 mutex_exit(&pidlock);
623 return (pidp);
627 * release a reference to a process
628 * - a process can exit even if a driver has a reference to it
629 * - one proc_unref for every proc_ref
631 void
632 proc_unref(void *pref)
634 mutex_enter(&pidlock);
635 PID_RELE((struct pid *)pref);
636 mutex_exit(&pidlock);
640 * send a signal to a process
642 * - send the process the signal
643 * - if the process went away, return a -1
644 * - if the process is still there return 0
647 proc_signal(void *pref, int sig)
649 struct pid *pidp = pref;
651 prsignal(pidp, sig);
652 return (pidp->pid_prinactive ? -1 : 0);
656 static struct upcount **upc_hash; /* a boot time allocated array */
657 static ulong_t upc_hashmask;
658 #define UPC_HASH(x, y) ((ulong_t)(x ^ y) & upc_hashmask)
661 * Get us off the ground. Called once at boot.
663 void
664 upcount_init(void)
666 ulong_t upc_hashsize;
669 * An entry per MB of memory is our current guess
672 * 2^20 is a meg, so shifting right by 20 - PAGESHIFT
673 * converts pages to megs (without overflowing a u_int
674 * if you have more than 4G of memory, like ptob(physmem)/1M
675 * would).
677 upc_hashsize = (1 << highbit(physmem >> (20 - PAGESHIFT)));
678 upc_hashmask = upc_hashsize - 1;
679 upc_hash = kmem_zalloc(upc_hashsize * sizeof (struct upcount *),
680 KM_SLEEP);
684 * Increment the number of processes associated with a given uid and zoneid.
686 void
687 upcount_inc(uid_t uid, zoneid_t zoneid)
689 struct upcount **upc, **hupc;
690 struct upcount *new;
692 ASSERT(MUTEX_HELD(&pidlock));
693 new = NULL;
694 hupc = &upc_hash[UPC_HASH(uid, zoneid)];
695 top:
696 upc = hupc;
697 while ((*upc) != NULL) {
698 if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) {
699 (*upc)->up_count++;
700 if (new) {
702 * did not need `new' afterall.
704 kmem_free(new, sizeof (*new));
706 return;
708 upc = &(*upc)->up_next;
712 * There is no entry for this <uid,zoneid> pair.
713 * Allocate one. If we have to drop pidlock, check
714 * again.
716 if (new == NULL) {
717 new = kmem_alloc(sizeof (*new), KM_NOSLEEP);
718 if (new == NULL) {
719 mutex_exit(&pidlock);
720 new = kmem_alloc(sizeof (*new),
721 KM_SLEEP);
722 mutex_enter(&pidlock);
723 goto top;
729 * On the assumption that a new user is going to do some
730 * more forks, put the new upcount structure on the front.
732 upc = hupc;
734 new->up_uid = uid;
735 new->up_zoneid = zoneid;
736 new->up_count = 1;
737 new->up_next = *upc;
739 *upc = new;
743 * Decrement the number of processes a given uid and zoneid has.
745 void
746 upcount_dec(uid_t uid, zoneid_t zoneid)
748 struct upcount **upc;
749 struct upcount *done;
751 ASSERT(MUTEX_HELD(&pidlock));
753 upc = &upc_hash[UPC_HASH(uid, zoneid)];
754 while ((*upc) != NULL) {
755 if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) {
756 (*upc)->up_count--;
757 if ((*upc)->up_count == 0) {
758 done = *upc;
759 *upc = (*upc)->up_next;
760 kmem_free(done, sizeof (*done));
762 return;
764 upc = &(*upc)->up_next;
766 cmn_err(CE_PANIC, "decr_upcount-off the end");
770 * Returns the number of processes a uid has.
771 * Non-existent uid's are assumed to have no processes.
774 upcount_get(uid_t uid, zoneid_t zoneid)
776 struct upcount *upc;
778 ASSERT(MUTEX_HELD(&pidlock));
780 upc = upc_hash[UPC_HASH(uid, zoneid)];
781 while (upc != NULL) {
782 if (upc->up_uid == uid && upc->up_zoneid == zoneid) {
783 return (upc->up_count);
785 upc = upc->up_next;
787 return (0);