4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <sys/sysmacros.h>
34 #include <sys/tuneable.h>
37 #include <sys/systm.h>
38 #include <sys/prsystm.h>
39 #include <sys/vnode.h>
40 #include <sys/session.h>
41 #include <sys/cpuvar.h>
42 #include <sys/cmn_err.h>
43 #include <sys/bitmap.h>
44 #include <sys/debug.h>
45 #include <sys/project.h>
49 /* directory entries for /proc */
52 union procent
*pe_next
;
56 0, /* pid_prinactive */
57 1, /* pid_pgorphaned */
61 NULL
, /* pid_pglink */
62 NULL
, /* pid_pgtail */
67 static int pid_hashlen
= 4; /* desired average hash chain length */
68 static int pid_hashsz
; /* number of buckets in the hash table */
70 #define HASHPID(pid) (pidhash[((pid)&(pid_hashsz-1))])
73 extern struct kmem_cache
*process_cache
;
74 static void upcount_init(void);
76 kmutex_t pidlock
; /* global process lock */
77 kmutex_t pr_pidlock
; /* /proc global process lock */
78 kcondvar_t
*pr_pid_cv
; /* for /proc, one per process slot */
79 struct plock
*proc_lock
; /* persistent array of p_lock's */
82 * See the comment above pid_getlockslot() for a detailed explanation of this
83 * constant. Note that a PLOCK_SHIFT of 3 implies 64-byte coherence
84 * granularity; if the coherence granularity is ever changed, this constant
85 * should be modified to reflect the change to minimize proc_lock false
86 * sharing (correctness, however, is guaranteed regardless of the coherence
91 static kmutex_t pidlinklock
;
92 static struct pid
**pidhash
;
94 static pid_t mpid
= FAMOUS_PIDS
; /* one more than the last famous pid */
95 static union procent
*procdir
;
96 static union procent
*procentfree
;
103 ASSERT(MUTEX_HELD(&pidlinklock
));
105 for (pidp
= HASHPID(pid
); pidp
; pidp
= pidp
->pid_link
) {
106 if (pidp
->pid_id
== pid
) {
107 ASSERT(pidp
->pid_ref
> 0);
117 if (jump_pid
&& jump_pid
> mpid
)
118 minpid
= mpid
= jump_pid
;
124 * When prslots are simply used as an index to determine a process' p_lock,
125 * adjacent prslots share adjacent p_locks. On machines where the size
126 * of a mutex is smaller than that of a cache line (which, as of this writing,
127 * is true for all machines on which Solaris runs), this can potentially
128 * induce false sharing. The standard solution for false sharing is to pad
129 * out one's data structures (in this case, struct plock). However,
130 * given the size and (generally) sparse use of the proc_lock array, this
131 * is suboptimal. We therefore stride through the proc_lock array with
132 * a stride of PLOCK_SHIFT. PLOCK_SHIFT should be defined as:
134 * log_2 (coherence_granularity / sizeof (kmutex_t))
136 * Under this scheme, false sharing is still possible -- but only when
137 * the number of active processes is very large. Note that the one-to-one
138 * mapping between prslots and lockslots is maintained.
141 pid_getlockslot(int prslot
)
143 int even
= (v
.v_proc
>> PLOCK_SHIFT
) << PLOCK_SHIFT
;
144 int perlap
= even
>> PLOCK_SHIFT
;
149 return (((prslot
% perlap
) << PLOCK_SHIFT
) + (prslot
/ perlap
));
153 * This function allocates a pid structure, a free pid, and optionally a
154 * slot in the proc table for it.
156 * pid_allocate() returns the new pid on success, -1 on failure.
159 pid_allocate(proc_t
*prp
, pid_t pid
, int flags
)
163 pid_t newpid
, startpid
;
165 pidp
= kmem_zalloc(sizeof (struct pid
), KM_SLEEP
);
167 mutex_enter(&pidlinklock
);
168 if ((flags
& PID_ALLOC_PROC
) && (pep
= procentfree
) == NULL
) {
170 * ran out of /proc directory entries
177 VERIFY3P(pid
, <, mpid
);
178 VERIFY3P(pid_lookup(pid
), ==, NULL
);
184 ASSERT(minpid
<= mpid
&& mpid
< maxpid
);
189 if (++mpid
== maxpid
)
192 if (pid_lookup(newpid
) == NULL
)
195 if (mpid
== startpid
)
201 * Put pid into the pid hash table.
203 pidp
->pid_link
= HASHPID(newpid
);
204 HASHPID(newpid
) = pidp
;
206 pidp
->pid_id
= newpid
;
208 if (flags
& PID_ALLOC_PROC
) {
209 procentfree
= pep
->pe_next
;
210 pidp
->pid_prslot
= pep
- procdir
;
213 prp
->p_lockp
= &proc_lock
[pid_getlockslot(pidp
->pid_prslot
)];
215 pidp
->pid_prslot
= 0;
218 mutex_exit(&pidlinklock
);
223 mutex_exit(&pidlinklock
);
224 kmem_free(pidp
, sizeof (struct pid
));
229 * decrement the reference count for pid
232 pid_rele(struct pid
*pidp
)
236 mutex_enter(&pidlinklock
);
237 ASSERT(pidp
!= &pid0
);
239 pidpp
= &HASHPID(pidp
->pid_id
);
241 ASSERT(*pidpp
!= NULL
);
244 pidpp
= &(*pidpp
)->pid_link
;
247 *pidpp
= pidp
->pid_link
;
248 mutex_exit(&pidlinklock
);
250 kmem_free(pidp
, sizeof (*pidp
));
255 proc_entry_free(struct pid
*pidp
)
257 mutex_enter(&pidlinklock
);
258 pidp
->pid_prinactive
= 1;
259 procdir
[pidp
->pid_prslot
].pe_next
= procentfree
;
260 procentfree
= &procdir
[pidp
->pid_prslot
];
261 mutex_exit(&pidlinklock
);
265 * The original task needs to be passed in since the process has already been
266 * detached from the task at this point in time.
269 pid_exit(proc_t
*prp
, struct task
*tk
)
272 zone_t
*zone
= prp
->p_zone
;
274 ASSERT(MUTEX_HELD(&pidlock
));
277 * Exit process group. If it is NULL, it's because fork failed
278 * before calling pgjoin().
280 ASSERT(prp
->p_pgidp
!= NULL
|| prp
->p_stat
== SIDL
);
281 if (prp
->p_pgidp
!= NULL
)
284 sess_rele(prp
->p_sessp
, B_TRUE
);
288 proc_entry_free(pidp
);
290 if (practive
== prp
) {
291 practive
= prp
->p_next
;
295 prp
->p_next
->p_prev
= prp
->p_prev
;
298 prp
->p_prev
->p_next
= prp
->p_next
;
303 mutex_destroy(&prp
->p_crlock
);
304 kmem_cache_free(process_cache
, prp
);
308 * Decrement the process counts of the original task, project and zone.
310 mutex_enter(&zone
->zone_nlwps_lock
);
312 tk
->tk_proj
->kpj_nprocs
--;
314 mutex_exit(&zone
->zone_nlwps_lock
);
318 * Find a process visible from the specified zone given its process ID.
321 prfind_zone(pid_t pid
, zoneid_t zoneid
)
326 ASSERT(MUTEX_HELD(&pidlock
));
328 mutex_enter(&pidlinklock
);
329 pidp
= pid_lookup(pid
);
330 mutex_exit(&pidlinklock
);
331 if (pidp
!= NULL
&& pidp
->pid_prinactive
== 0) {
332 p
= procdir
[pidp
->pid_prslot
].pe_proc
;
333 if (zoneid
== ALL_ZONES
|| p
->p_zone
->zone_id
== zoneid
)
340 * Find a process given its process ID. This obeys zone restrictions,
341 * so if the caller is in a non-global zone it won't find processes
342 * associated with other zones. Use prfind_zone(pid, ALL_ZONES) to
343 * bypass this restriction.
350 if (INGLOBALZONE(curproc
))
353 zoneid
= getzoneid();
354 return (prfind_zone(pid
, zoneid
));
358 pgfind_zone(pid_t pgid
, zoneid_t zoneid
)
362 ASSERT(MUTEX_HELD(&pidlock
));
364 mutex_enter(&pidlinklock
);
365 pidp
= pid_lookup(pgid
);
366 mutex_exit(&pidlinklock
);
368 proc_t
*p
= pidp
->pid_pglink
;
370 if (zoneid
== ALL_ZONES
|| pgid
== 0 || p
== NULL
||
371 p
->p_zone
->zone_id
== zoneid
)
378 * return the head of the list of processes whose process group ID is 'pgid',
379 * or NULL, if no such process group
386 if (INGLOBALZONE(curproc
))
389 zoneid
= getzoneid();
390 return (pgfind_zone(pgid
, zoneid
));
394 * Sets P_PR_LOCK on a non-system process. Process must be fully created
395 * and not exiting to succeed.
397 * Returns 0 on success.
398 * Returns 1 if P_PR_LOCK is set.
399 * Returns -1 if proc is in invalid state.
402 sprtrylock_proc(proc_t
*p
)
404 ASSERT(MUTEX_HELD(&p
->p_lock
));
406 /* skip system and incomplete processes */
407 if (p
->p_stat
== SIDL
|| p
->p_stat
== SZOMB
||
408 (p
->p_flag
& (SSYS
| SEXITING
| SEXITLWPS
))) {
412 if (p
->p_proc_flag
& P_PR_LOCK
)
415 p
->p_proc_flag
|= P_PR_LOCK
;
416 THREAD_KPRI_REQUEST();
422 * Wait for P_PR_LOCK to become clear. Returns with p_lock dropped,
423 * and the proc pointer no longer valid, as the proc may have exited.
426 sprwaitlock_proc(proc_t
*p
)
430 ASSERT(MUTEX_HELD(&p
->p_lock
));
431 ASSERT(p
->p_proc_flag
& P_PR_LOCK
);
434 * p_lock is persistent, but p itself is not -- it could
435 * vanish during cv_wait(). Load p->p_lock now so we can
436 * drop it after cv_wait() without referencing p.
439 cv_wait(&pr_pid_cv
[p
->p_slot
], mp
);
444 * If pid exists, find its proc, acquire its p_lock and mark it P_PR_LOCK.
445 * Returns the proc pointer on success, NULL on failure. sprlock() is
446 * really just a stripped-down version of pr_p_lock() to allow practive
447 * walkers like dofusers() and dumpsys() to synchronize with /proc.
450 sprlock_zone(pid_t pid
, zoneid_t zoneid
)
456 mutex_enter(&pidlock
);
457 if ((p
= prfind_zone(pid
, zoneid
)) == NULL
) {
458 mutex_exit(&pidlock
);
461 mutex_enter(&p
->p_lock
);
462 mutex_exit(&pidlock
);
467 ret
= sprtrylock_proc(p
);
469 mutex_exit(&p
->p_lock
);
471 } else if (ret
== 0) {
484 if (INGLOBALZONE(curproc
))
487 zoneid
= getzoneid();
488 return (sprlock_zone(pid
, zoneid
));
492 sprlock_proc(proc_t
*p
)
494 ASSERT(MUTEX_HELD(&p
->p_lock
));
496 while (p
->p_proc_flag
& P_PR_LOCK
) {
497 cv_wait(&pr_pid_cv
[p
->p_slot
], &p
->p_lock
);
500 p
->p_proc_flag
|= P_PR_LOCK
;
501 THREAD_KPRI_REQUEST();
508 mutex_exit(&p
->p_lock
);
512 ASSERT(p
->p_proc_flag
& P_PR_LOCK
);
513 ASSERT(MUTEX_HELD(&p
->p_lock
));
515 cv_signal(&pr_pid_cv
[p
->p_slot
]);
516 p
->p_proc_flag
&= ~P_PR_LOCK
;
517 mutex_exit(&p
->p_lock
);
518 THREAD_KPRI_RELEASE();
526 pid_hashsz
= 1 << highbit(v
.v_proc
/ pid_hashlen
);
528 pidhash
= kmem_zalloc(sizeof (struct pid
*) * pid_hashsz
, KM_SLEEP
);
529 procdir
= kmem_alloc(sizeof (union procent
) * v
.v_proc
, KM_SLEEP
);
530 pr_pid_cv
= kmem_zalloc(sizeof (kcondvar_t
) * v
.v_proc
, KM_SLEEP
);
531 proc_lock
= kmem_zalloc(sizeof (struct plock
) * v
.v_proc
, KM_SLEEP
);
534 practive
= proc_sched
;
535 proc_sched
->p_next
= NULL
;
536 procdir
[0].pe_proc
= proc_sched
;
538 procentfree
= &procdir
[1];
539 for (i
= 1; i
< v
.v_proc
- 1; i
++)
540 procdir
[i
].pe_next
= &procdir
[i
+1];
541 procdir
[i
].pe_next
= NULL
;
554 ASSERT(MUTEX_HELD(&pidlock
));
555 ASSERT(slot
>= 0 && slot
< v
.v_proc
);
557 pep
= procdir
[slot
].pe_next
;
558 if (pep
>= procdir
&& pep
< &procdir
[v
.v_proc
])
560 prp
= procdir
[slot
].pe_proc
;
561 if (prp
!= 0 && prp
->p_stat
== SIDL
)
567 * Send the specified signal to all processes whose process group ID is
572 signal(pid_t pgid
, int sig
)
577 mutex_enter(&pidlock
);
578 mutex_enter(&pidlinklock
);
579 if (pgid
== 0 || (pidp
= pid_lookup(pgid
)) == NULL
) {
580 mutex_exit(&pidlinklock
);
581 mutex_exit(&pidlock
);
584 mutex_exit(&pidlinklock
);
585 for (prp
= pidp
->pid_pglink
; prp
; prp
= prp
->p_pglink
) {
586 mutex_enter(&prp
->p_lock
);
587 sigtoproc(prp
, NULL
, sig
);
588 mutex_exit(&prp
->p_lock
);
590 mutex_exit(&pidlock
);
594 * Send the specified signal to the specified process
598 prsignal(struct pid
*pidp
, int sig
)
600 if (!(pidp
->pid_prinactive
))
601 psignal(procdir
[pidp
->pid_prslot
].pe_proc
, sig
);
604 #include <sys/sunddi.h>
607 * DDI/DKI interfaces for drivers to send signals to processes
611 * obtain an opaque reference to a process for signaling
618 mutex_enter(&pidlock
);
619 pidp
= curproc
->p_pidp
;
621 mutex_exit(&pidlock
);
627 * release a reference to a process
628 * - a process can exit even if a driver has a reference to it
629 * - one proc_unref for every proc_ref
632 proc_unref(void *pref
)
634 mutex_enter(&pidlock
);
635 PID_RELE((struct pid
*)pref
);
636 mutex_exit(&pidlock
);
640 * send a signal to a process
642 * - send the process the signal
643 * - if the process went away, return a -1
644 * - if the process is still there return 0
647 proc_signal(void *pref
, int sig
)
649 struct pid
*pidp
= pref
;
652 return (pidp
->pid_prinactive
? -1 : 0);
656 static struct upcount
**upc_hash
; /* a boot time allocated array */
657 static ulong_t upc_hashmask
;
658 #define UPC_HASH(x, y) ((ulong_t)(x ^ y) & upc_hashmask)
661 * Get us off the ground. Called once at boot.
666 ulong_t upc_hashsize
;
669 * An entry per MB of memory is our current guess
672 * 2^20 is a meg, so shifting right by 20 - PAGESHIFT
673 * converts pages to megs (without overflowing a u_int
674 * if you have more than 4G of memory, like ptob(physmem)/1M
677 upc_hashsize
= (1 << highbit(physmem
>> (20 - PAGESHIFT
)));
678 upc_hashmask
= upc_hashsize
- 1;
679 upc_hash
= kmem_zalloc(upc_hashsize
* sizeof (struct upcount
*),
684 * Increment the number of processes associated with a given uid and zoneid.
687 upcount_inc(uid_t uid
, zoneid_t zoneid
)
689 struct upcount
**upc
, **hupc
;
692 ASSERT(MUTEX_HELD(&pidlock
));
694 hupc
= &upc_hash
[UPC_HASH(uid
, zoneid
)];
697 while ((*upc
) != NULL
) {
698 if ((*upc
)->up_uid
== uid
&& (*upc
)->up_zoneid
== zoneid
) {
702 * did not need `new' afterall.
704 kmem_free(new, sizeof (*new));
708 upc
= &(*upc
)->up_next
;
712 * There is no entry for this <uid,zoneid> pair.
713 * Allocate one. If we have to drop pidlock, check
717 new = kmem_alloc(sizeof (*new), KM_NOSLEEP
);
719 mutex_exit(&pidlock
);
720 new = kmem_alloc(sizeof (*new),
722 mutex_enter(&pidlock
);
729 * On the assumption that a new user is going to do some
730 * more forks, put the new upcount structure on the front.
735 new->up_zoneid
= zoneid
;
743 * Decrement the number of processes a given uid and zoneid has.
746 upcount_dec(uid_t uid
, zoneid_t zoneid
)
748 struct upcount
**upc
;
749 struct upcount
*done
;
751 ASSERT(MUTEX_HELD(&pidlock
));
753 upc
= &upc_hash
[UPC_HASH(uid
, zoneid
)];
754 while ((*upc
) != NULL
) {
755 if ((*upc
)->up_uid
== uid
&& (*upc
)->up_zoneid
== zoneid
) {
757 if ((*upc
)->up_count
== 0) {
759 *upc
= (*upc
)->up_next
;
760 kmem_free(done
, sizeof (*done
));
764 upc
= &(*upc
)->up_next
;
766 cmn_err(CE_PANIC
, "decr_upcount-off the end");
770 * Returns the number of processes a uid has.
771 * Non-existent uid's are assumed to have no processes.
774 upcount_get(uid_t uid
, zoneid_t zoneid
)
778 ASSERT(MUTEX_HELD(&pidlock
));
780 upc
= upc_hash
[UPC_HASH(uid
, zoneid
)];
781 while (upc
!= NULL
) {
782 if (upc
->up_uid
== uid
&& upc
->up_zoneid
== zoneid
) {
783 return (upc
->up_count
);