6514 AS_* lock macros simplification
[illumos-gate.git] / usr / src / uts / common / fs / proc / prsubr.c
blob28950bf9728beee65eac1da19dd20be669672bc5
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
30 #include <sys/types.h>
31 #include <sys/t_lock.h>
32 #include <sys/param.h>
33 #include <sys/cmn_err.h>
34 #include <sys/cred.h>
35 #include <sys/priv.h>
36 #include <sys/debug.h>
37 #include <sys/errno.h>
38 #include <sys/inline.h>
39 #include <sys/kmem.h>
40 #include <sys/mman.h>
41 #include <sys/proc.h>
42 #include <sys/brand.h>
43 #include <sys/sobject.h>
44 #include <sys/sysmacros.h>
45 #include <sys/systm.h>
46 #include <sys/uio.h>
47 #include <sys/var.h>
48 #include <sys/vfs.h>
49 #include <sys/vnode.h>
50 #include <sys/session.h>
51 #include <sys/pcb.h>
52 #include <sys/signal.h>
53 #include <sys/user.h>
54 #include <sys/disp.h>
55 #include <sys/class.h>
56 #include <sys/ts.h>
57 #include <sys/bitmap.h>
58 #include <sys/poll.h>
59 #include <sys/shm_impl.h>
60 #include <sys/fault.h>
61 #include <sys/syscall.h>
62 #include <sys/procfs.h>
63 #include <sys/processor.h>
64 #include <sys/cpuvar.h>
65 #include <sys/copyops.h>
66 #include <sys/time.h>
67 #include <sys/msacct.h>
68 #include <vm/as.h>
69 #include <vm/rm.h>
70 #include <vm/seg.h>
71 #include <vm/seg_vn.h>
72 #include <vm/seg_dev.h>
73 #include <vm/seg_spt.h>
74 #include <vm/page.h>
75 #include <sys/vmparam.h>
76 #include <sys/swap.h>
77 #include <fs/proc/prdata.h>
78 #include <sys/task.h>
79 #include <sys/project.h>
80 #include <sys/contract_impl.h>
81 #include <sys/contract/process.h>
82 #include <sys/contract/process_impl.h>
83 #include <sys/schedctl.h>
84 #include <sys/pool.h>
85 #include <sys/zone.h>
86 #include <sys/atomic.h>
87 #include <sys/sdt.h>
89 #define MAX_ITERS_SPIN 5
91 typedef struct prpagev {
92 uint_t *pg_protv; /* vector of page permissions */
93 char *pg_incore; /* vector of incore flags */
94 size_t pg_npages; /* number of pages in protv and incore */
95 ulong_t pg_pnbase; /* pn within segment of first protv element */
96 } prpagev_t;
98 size_t pagev_lim = 256 * 1024; /* limit on number of pages in prpagev_t */
100 extern struct seg_ops segdev_ops; /* needs a header file */
101 extern struct seg_ops segspt_shmops; /* needs a header file */
103 static int set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t);
104 static void clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t);
107 * Choose an lwp from the complete set of lwps for the process.
108 * This is called for any operation applied to the process
109 * file descriptor that requires an lwp to operate upon.
111 * Returns a pointer to the thread for the selected LWP,
112 * and with the dispatcher lock held for the thread.
114 * The algorithm for choosing an lwp is critical for /proc semantics;
115 * don't touch this code unless you know all of the implications.
117 kthread_t *
118 prchoose(proc_t *p)
120 kthread_t *t;
121 kthread_t *t_onproc = NULL; /* running on processor */
122 kthread_t *t_run = NULL; /* runnable, on disp queue */
123 kthread_t *t_sleep = NULL; /* sleeping */
124 kthread_t *t_hold = NULL; /* sleeping, performing hold */
125 kthread_t *t_susp = NULL; /* suspended stop */
126 kthread_t *t_jstop = NULL; /* jobcontrol stop, w/o directed stop */
127 kthread_t *t_jdstop = NULL; /* jobcontrol stop with directed stop */
128 kthread_t *t_req = NULL; /* requested stop */
129 kthread_t *t_istop = NULL; /* event-of-interest stop */
130 kthread_t *t_dtrace = NULL; /* DTrace stop */
132 ASSERT(MUTEX_HELD(&p->p_lock));
135 * If the agent lwp exists, it takes precedence over all others.
137 if ((t = p->p_agenttp) != NULL) {
138 thread_lock(t);
139 return (t);
142 if ((t = p->p_tlist) == NULL) /* start at the head of the list */
143 return (t);
144 do { /* for eacn lwp in the process */
145 if (VSTOPPED(t)) { /* virtually stopped */
146 if (t_req == NULL)
147 t_req = t;
148 continue;
151 thread_lock(t); /* make sure thread is in good state */
152 switch (t->t_state) {
153 default:
154 panic("prchoose: bad thread state %d, thread 0x%p",
155 t->t_state, (void *)t);
156 /*NOTREACHED*/
157 case TS_SLEEP:
158 /* this is filthy */
159 if (t->t_wchan == (caddr_t)&p->p_holdlwps &&
160 t->t_wchan0 == NULL) {
161 if (t_hold == NULL)
162 t_hold = t;
163 } else {
164 if (t_sleep == NULL)
165 t_sleep = t;
167 break;
168 case TS_RUN:
169 case TS_WAIT:
170 if (t_run == NULL)
171 t_run = t;
172 break;
173 case TS_ONPROC:
174 if (t_onproc == NULL)
175 t_onproc = t;
176 break;
177 case TS_ZOMB: /* last possible choice */
178 break;
179 case TS_STOPPED:
180 switch (t->t_whystop) {
181 case PR_SUSPENDED:
182 if (t_susp == NULL)
183 t_susp = t;
184 break;
185 case PR_JOBCONTROL:
186 if (t->t_proc_flag & TP_PRSTOP) {
187 if (t_jdstop == NULL)
188 t_jdstop = t;
189 } else {
190 if (t_jstop == NULL)
191 t_jstop = t;
193 break;
194 case PR_REQUESTED:
195 if (t->t_dtrace_stop && t_dtrace == NULL)
196 t_dtrace = t;
197 else if (t_req == NULL)
198 t_req = t;
199 break;
200 case PR_SYSENTRY:
201 case PR_SYSEXIT:
202 case PR_SIGNALLED:
203 case PR_FAULTED:
205 * Make an lwp calling exit() be the
206 * last lwp seen in the process.
208 if (t_istop == NULL ||
209 (t_istop->t_whystop == PR_SYSENTRY &&
210 t_istop->t_whatstop == SYS_exit))
211 t_istop = t;
212 break;
213 case PR_CHECKPOINT: /* can't happen? */
214 break;
215 default:
216 panic("prchoose: bad t_whystop %d, thread 0x%p",
217 t->t_whystop, (void *)t);
218 /*NOTREACHED*/
220 break;
222 thread_unlock(t);
223 } while ((t = t->t_forw) != p->p_tlist);
225 if (t_onproc)
226 t = t_onproc;
227 else if (t_run)
228 t = t_run;
229 else if (t_sleep)
230 t = t_sleep;
231 else if (t_jstop)
232 t = t_jstop;
233 else if (t_jdstop)
234 t = t_jdstop;
235 else if (t_istop)
236 t = t_istop;
237 else if (t_dtrace)
238 t = t_dtrace;
239 else if (t_req)
240 t = t_req;
241 else if (t_hold)
242 t = t_hold;
243 else if (t_susp)
244 t = t_susp;
245 else /* TS_ZOMB */
246 t = p->p_tlist;
248 if (t != NULL)
249 thread_lock(t);
250 return (t);
254 * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop.
255 * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI
256 * on the /proc file descriptor. Called from stop() when a traced
257 * process stops on an event of interest. Also called from exit()
258 * and prinvalidate() to indicate POLLHUP and POLLERR respectively.
260 void
261 prnotify(struct vnode *vp)
263 prcommon_t *pcp = VTOP(vp)->pr_common;
265 mutex_enter(&pcp->prc_mutex);
266 cv_broadcast(&pcp->prc_wait);
267 mutex_exit(&pcp->prc_mutex);
268 if (pcp->prc_flags & PRC_POLL) {
270 * We call pollwakeup() with POLLHUP to ensure that
271 * the pollers are awakened even if they are polling
272 * for nothing (i.e., waiting for the process to exit).
273 * This enables the use of the PRC_POLL flag for optimization
274 * (we can turn off PRC_POLL only if we know no pollers remain).
276 pcp->prc_flags &= ~PRC_POLL;
277 pollwakeup(&pcp->prc_pollhead, POLLHUP);
281 /* called immediately below, in prfree() */
282 static void
283 prfreenotify(vnode_t *vp)
285 prnode_t *pnp;
286 prcommon_t *pcp;
288 while (vp != NULL) {
289 pnp = VTOP(vp);
290 pcp = pnp->pr_common;
291 ASSERT(pcp->prc_thread == NULL);
292 pcp->prc_proc = NULL;
294 * We can't call prnotify() here because we are holding
295 * pidlock. We assert that there is no need to.
297 mutex_enter(&pcp->prc_mutex);
298 cv_broadcast(&pcp->prc_wait);
299 mutex_exit(&pcp->prc_mutex);
300 ASSERT(!(pcp->prc_flags & PRC_POLL));
302 vp = pnp->pr_next;
303 pnp->pr_next = NULL;
308 * Called from a hook in freeproc() when a traced process is removed
309 * from the process table. The proc-table pointers of all associated
310 * /proc vnodes are cleared to indicate that the process has gone away.
312 void
313 prfree(proc_t *p)
315 uint_t slot = p->p_slot;
317 ASSERT(MUTEX_HELD(&pidlock));
320 * Block the process against /proc so it can be freed.
321 * It cannot be freed while locked by some controlling process.
322 * Lock ordering:
323 * pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex
325 mutex_enter(&pr_pidlock); /* protects pcp->prc_proc */
326 mutex_enter(&p->p_lock);
327 while (p->p_proc_flag & P_PR_LOCK) {
328 mutex_exit(&pr_pidlock);
329 cv_wait(&pr_pid_cv[slot], &p->p_lock);
330 mutex_exit(&p->p_lock);
331 mutex_enter(&pr_pidlock);
332 mutex_enter(&p->p_lock);
335 ASSERT(p->p_tlist == NULL);
337 prfreenotify(p->p_plist);
338 p->p_plist = NULL;
340 prfreenotify(p->p_trace);
341 p->p_trace = NULL;
344 * We broadcast to wake up everyone waiting for this process.
345 * No one can reach this process from this point on.
347 cv_broadcast(&pr_pid_cv[slot]);
349 mutex_exit(&p->p_lock);
350 mutex_exit(&pr_pidlock);
354 * Called from a hook in exit() when a traced process is becoming a zombie.
356 void
357 prexit(proc_t *p)
359 ASSERT(MUTEX_HELD(&p->p_lock));
361 if (pr_watch_active(p)) {
362 pr_free_watchpoints(p);
363 watch_disable(curthread);
365 /* pr_free_watched_pages() is called in exit(), after dropping p_lock */
366 if (p->p_trace) {
367 VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY;
368 prnotify(p->p_trace);
370 cv_broadcast(&pr_pid_cv[p->p_slot]); /* pauselwps() */
374 * Called when a thread calls lwp_exit().
376 void
377 prlwpexit(kthread_t *t)
379 vnode_t *vp;
380 prnode_t *pnp;
381 prcommon_t *pcp;
382 proc_t *p = ttoproc(t);
383 lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry;
385 ASSERT(t == curthread);
386 ASSERT(MUTEX_HELD(&p->p_lock));
389 * The process must be blocked against /proc to do this safely.
390 * The lwp must not disappear while the process is marked P_PR_LOCK.
391 * It is the caller's responsibility to have called prbarrier(p).
393 ASSERT(!(p->p_proc_flag & P_PR_LOCK));
395 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
396 pnp = VTOP(vp);
397 pcp = pnp->pr_common;
398 if (pcp->prc_thread == t) {
399 pcp->prc_thread = NULL;
400 pcp->prc_flags |= PRC_DESTROY;
404 for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) {
405 pnp = VTOP(vp);
406 pcp = pnp->pr_common;
407 pcp->prc_thread = NULL;
408 pcp->prc_flags |= PRC_DESTROY;
409 prnotify(vp);
412 if (p->p_trace)
413 prnotify(p->p_trace);
417 * Called when a zombie thread is joined or when a
418 * detached lwp exits. Called from lwp_hash_out().
420 void
421 prlwpfree(proc_t *p, lwpent_t *lep)
423 vnode_t *vp;
424 prnode_t *pnp;
425 prcommon_t *pcp;
427 ASSERT(MUTEX_HELD(&p->p_lock));
430 * The process must be blocked against /proc to do this safely.
431 * The lwp must not disappear while the process is marked P_PR_LOCK.
432 * It is the caller's responsibility to have called prbarrier(p).
434 ASSERT(!(p->p_proc_flag & P_PR_LOCK));
436 vp = lep->le_trace;
437 lep->le_trace = NULL;
438 while (vp) {
439 prnotify(vp);
440 pnp = VTOP(vp);
441 pcp = pnp->pr_common;
442 ASSERT(pcp->prc_thread == NULL &&
443 (pcp->prc_flags & PRC_DESTROY));
444 pcp->prc_tslot = -1;
445 vp = pnp->pr_next;
446 pnp->pr_next = NULL;
449 if (p->p_trace)
450 prnotify(p->p_trace);
454 * Called from a hook in exec() when a thread starts exec().
456 void
457 prexecstart(void)
459 proc_t *p = ttoproc(curthread);
460 klwp_t *lwp = ttolwp(curthread);
463 * The P_PR_EXEC flag blocks /proc operations for
464 * the duration of the exec().
465 * We can't start exec() while the process is
466 * locked by /proc, so we call prbarrier().
467 * lwp_nostop keeps the process from being stopped
468 * via job control for the duration of the exec().
471 ASSERT(MUTEX_HELD(&p->p_lock));
472 prbarrier(p);
473 lwp->lwp_nostop++;
474 p->p_proc_flag |= P_PR_EXEC;
478 * Called from a hook in exec() when a thread finishes exec().
479 * The thread may or may not have succeeded. Some other thread
480 * may have beat it to the punch.
482 void
483 prexecend(void)
485 proc_t *p = ttoproc(curthread);
486 klwp_t *lwp = ttolwp(curthread);
487 vnode_t *vp;
488 prnode_t *pnp;
489 prcommon_t *pcp;
490 model_t model = p->p_model;
491 id_t tid = curthread->t_tid;
492 int tslot = curthread->t_dslot;
494 ASSERT(MUTEX_HELD(&p->p_lock));
496 lwp->lwp_nostop--;
497 if (p->p_flag & SEXITLWPS) {
499 * We are on our way to exiting because some
500 * other thread beat us in the race to exec().
501 * Don't clear the P_PR_EXEC flag in this case.
503 return;
507 * Wake up anyone waiting in /proc for the process to complete exec().
509 p->p_proc_flag &= ~P_PR_EXEC;
510 if ((vp = p->p_trace) != NULL) {
511 pcp = VTOP(vp)->pr_common;
512 mutex_enter(&pcp->prc_mutex);
513 cv_broadcast(&pcp->prc_wait);
514 mutex_exit(&pcp->prc_mutex);
515 for (; vp != NULL; vp = pnp->pr_next) {
516 pnp = VTOP(vp);
517 pnp->pr_common->prc_datamodel = model;
520 if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) {
522 * We dealt with the process common above.
524 ASSERT(p->p_trace != NULL);
525 pcp = VTOP(vp)->pr_common;
526 mutex_enter(&pcp->prc_mutex);
527 cv_broadcast(&pcp->prc_wait);
528 mutex_exit(&pcp->prc_mutex);
529 for (; vp != NULL; vp = pnp->pr_next) {
530 pnp = VTOP(vp);
531 pcp = pnp->pr_common;
532 pcp->prc_datamodel = model;
533 pcp->prc_tid = tid;
534 pcp->prc_tslot = tslot;
540 * Called from a hook in relvm() just before freeing the address space.
541 * We free all the watched areas now.
543 void
544 prrelvm(void)
546 proc_t *p = ttoproc(curthread);
548 mutex_enter(&p->p_lock);
549 prbarrier(p); /* block all other /proc operations */
550 if (pr_watch_active(p)) {
551 pr_free_watchpoints(p);
552 watch_disable(curthread);
554 mutex_exit(&p->p_lock);
555 pr_free_watched_pages(p);
559 * Called from hooks in exec-related code when a traced process
560 * attempts to exec(2) a setuid/setgid program or an unreadable
561 * file. Rather than fail the exec we invalidate the associated
562 * /proc vnodes so that subsequent attempts to use them will fail.
564 * All /proc vnodes, except directory vnodes, are retained on a linked
565 * list (rooted at p_plist in the process structure) until last close.
567 * A controlling process must re-open the /proc files in order to
568 * regain control.
570 void
571 prinvalidate(struct user *up)
573 kthread_t *t = curthread;
574 proc_t *p = ttoproc(t);
575 vnode_t *vp;
576 prnode_t *pnp;
577 int writers = 0;
579 mutex_enter(&p->p_lock);
580 prbarrier(p); /* block all other /proc operations */
583 * At this moment, there can be only one lwp in the process.
585 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
588 * Invalidate any currently active /proc vnodes.
590 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
591 pnp = VTOP(vp);
592 switch (pnp->pr_type) {
593 case PR_PSINFO: /* these files can read by anyone */
594 case PR_LPSINFO:
595 case PR_LWPSINFO:
596 case PR_LWPDIR:
597 case PR_LWPIDDIR:
598 case PR_USAGE:
599 case PR_LUSAGE:
600 case PR_LWPUSAGE:
601 break;
602 default:
603 pnp->pr_flags |= PR_INVAL;
604 break;
608 * Wake up anyone waiting for the process or lwp.
609 * p->p_trace is guaranteed to be non-NULL if there
610 * are any open /proc files for this process.
612 if ((vp = p->p_trace) != NULL) {
613 prcommon_t *pcp = VTOP(vp)->pr_pcommon;
615 prnotify(vp);
617 * Are there any writers?
619 if ((writers = pcp->prc_writers) != 0) {
621 * Clear the exclusive open flag (old /proc interface).
622 * Set prc_selfopens equal to prc_writers so that
623 * the next O_EXCL|O_WRITE open will succeed
624 * even with existing (though invalid) writers.
625 * prclose() must decrement prc_selfopens when
626 * the invalid files are closed.
628 pcp->prc_flags &= ~PRC_EXCL;
629 ASSERT(pcp->prc_selfopens <= writers);
630 pcp->prc_selfopens = writers;
633 vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace;
634 while (vp != NULL) {
636 * We should not invalidate the lwpiddir vnodes,
637 * but the necessities of maintaining the old
638 * ioctl()-based version of /proc require it.
640 pnp = VTOP(vp);
641 pnp->pr_flags |= PR_INVAL;
642 prnotify(vp);
643 vp = pnp->pr_next;
647 * If any tracing flags are in effect and any vnodes are open for
648 * writing then set the requested-stop and run-on-last-close flags.
649 * Otherwise, clear all tracing flags.
651 t->t_proc_flag &= ~TP_PAUSE;
652 if ((p->p_proc_flag & P_PR_TRACE) && writers) {
653 t->t_proc_flag |= TP_PRSTOP;
654 aston(t); /* so ISSIG will see the flag */
655 p->p_proc_flag |= P_PR_RUNLCL;
656 } else {
657 premptyset(&up->u_entrymask); /* syscalls */
658 premptyset(&up->u_exitmask);
659 up->u_systrap = 0;
660 premptyset(&p->p_sigmask); /* signals */
661 premptyset(&p->p_fltmask); /* faults */
662 t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
663 p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
664 prnostep(ttolwp(t));
667 mutex_exit(&p->p_lock);
671 * Acquire the controlled process's p_lock and mark it P_PR_LOCK.
672 * Return with pr_pidlock held in all cases.
673 * Return with p_lock held if the the process still exists.
674 * Return value is the process pointer if the process still exists, else NULL.
675 * If we lock the process, give ourself kernel priority to avoid deadlocks;
676 * this is undone in prunlock().
678 proc_t *
679 pr_p_lock(prnode_t *pnp)
681 proc_t *p;
682 prcommon_t *pcp;
684 mutex_enter(&pr_pidlock);
685 if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL)
686 return (NULL);
687 mutex_enter(&p->p_lock);
688 while (p->p_proc_flag & P_PR_LOCK) {
690 * This cv/mutex pair is persistent even if
691 * the process disappears while we sleep.
693 kcondvar_t *cv = &pr_pid_cv[p->p_slot];
694 kmutex_t *mp = &p->p_lock;
696 mutex_exit(&pr_pidlock);
697 cv_wait(cv, mp);
698 mutex_exit(mp);
699 mutex_enter(&pr_pidlock);
700 if (pcp->prc_proc == NULL)
701 return (NULL);
702 ASSERT(p == pcp->prc_proc);
703 mutex_enter(&p->p_lock);
705 p->p_proc_flag |= P_PR_LOCK;
706 THREAD_KPRI_REQUEST();
707 return (p);
711 * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock.
712 * This prevents any lwp of the process from disappearing and
713 * blocks most operations that a process can perform on itself.
714 * Returns 0 on success, a non-zero error number on failure.
716 * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when
717 * the subject process is a zombie (ZYES) or fail for zombies (ZNO).
719 * error returns:
720 * ENOENT: process or lwp has disappeared or process is exiting
721 * (or has become a zombie and zdisp == ZNO).
722 * EAGAIN: procfs vnode has become invalid.
723 * EINTR: signal arrived while waiting for exec to complete.
726 prlock(prnode_t *pnp, int zdisp)
728 prcommon_t *pcp;
729 proc_t *p;
731 again:
732 pcp = pnp->pr_common;
733 p = pr_p_lock(pnp);
734 mutex_exit(&pr_pidlock);
737 * Return ENOENT immediately if there is no process.
739 if (p == NULL)
740 return (ENOENT);
742 ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL);
745 * Return ENOENT if process entered zombie state or is exiting
746 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies.
748 if (zdisp == ZNO &&
749 ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) {
750 prunlock(pnp);
751 return (ENOENT);
755 * If lwp-specific, check to see if lwp has disappeared.
757 if (pcp->prc_flags & PRC_LWP) {
758 if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) ||
759 pcp->prc_tslot == -1) {
760 prunlock(pnp);
761 return (ENOENT);
766 * Return EAGAIN if we have encountered a security violation.
767 * (The process exec'd a set-id or unreadable executable file.)
769 if (pnp->pr_flags & PR_INVAL) {
770 prunlock(pnp);
771 return (EAGAIN);
775 * If process is undergoing an exec(), wait for
776 * completion and then start all over again.
778 if (p->p_proc_flag & P_PR_EXEC) {
779 pcp = pnp->pr_pcommon; /* Put on the correct sleep queue */
780 mutex_enter(&pcp->prc_mutex);
781 prunlock(pnp);
782 if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) {
783 mutex_exit(&pcp->prc_mutex);
784 return (EINTR);
786 mutex_exit(&pcp->prc_mutex);
787 goto again;
791 * We return holding p->p_lock.
793 return (0);
797 * Undo prlock() and pr_p_lock().
798 * p->p_lock is still held; pr_pidlock is no longer held.
800 * prunmark() drops the P_PR_LOCK flag and wakes up another thread,
801 * if any, waiting for the flag to be dropped; it retains p->p_lock.
803 * prunlock() calls prunmark() and then drops p->p_lock.
805 void
806 prunmark(proc_t *p)
808 ASSERT(p->p_proc_flag & P_PR_LOCK);
809 ASSERT(MUTEX_HELD(&p->p_lock));
811 cv_signal(&pr_pid_cv[p->p_slot]);
812 p->p_proc_flag &= ~P_PR_LOCK;
813 THREAD_KPRI_RELEASE();
816 void
817 prunlock(prnode_t *pnp)
819 prcommon_t *pcp = pnp->pr_common;
820 proc_t *p = pcp->prc_proc;
823 * If we (or someone) gave it a SIGKILL, and it is not
824 * already a zombie, set it running unconditionally.
826 if ((p->p_flag & SKILLED) &&
827 !(p->p_flag & SEXITING) &&
828 !(pcp->prc_flags & PRC_DESTROY) &&
829 !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1))
830 (void) pr_setrun(pnp, 0);
831 prunmark(p);
832 mutex_exit(&p->p_lock);
836 * Called while holding p->p_lock to delay until the process is unlocked.
837 * We enter holding p->p_lock; p->p_lock is dropped and reacquired.
838 * The process cannot become locked again until p->p_lock is dropped.
840 void
841 prbarrier(proc_t *p)
843 ASSERT(MUTEX_HELD(&p->p_lock));
845 if (p->p_proc_flag & P_PR_LOCK) {
846 /* The process is locked; delay until not locked */
847 uint_t slot = p->p_slot;
849 while (p->p_proc_flag & P_PR_LOCK)
850 cv_wait(&pr_pid_cv[slot], &p->p_lock);
851 cv_signal(&pr_pid_cv[slot]);
856 * Return process/lwp status.
857 * The u-block is mapped in by this routine and unmapped at the end.
859 void
860 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp)
862 kthread_t *t;
864 ASSERT(MUTEX_HELD(&p->p_lock));
866 t = prchoose(p); /* returns locked thread */
867 ASSERT(t != NULL);
868 thread_unlock(t);
870 /* just bzero the process part, prgetlwpstatus() does the rest */
871 bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t));
872 sp->pr_nlwp = p->p_lwpcnt;
873 sp->pr_nzomb = p->p_zombcnt;
874 prassignset(&sp->pr_sigpend, &p->p_sig);
875 sp->pr_brkbase = (uintptr_t)p->p_brkbase;
876 sp->pr_brksize = p->p_brksize;
877 sp->pr_stkbase = (uintptr_t)prgetstackbase(p);
878 sp->pr_stksize = p->p_stksize;
879 sp->pr_pid = p->p_pid;
880 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
881 (p->p_flag & SZONETOP)) {
882 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
884 * Inside local zones, fake zsched's pid as parent pids for
885 * processes which reference processes outside of the zone.
887 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
888 } else {
889 sp->pr_ppid = p->p_ppid;
891 sp->pr_pgid = p->p_pgrp;
892 sp->pr_sid = p->p_sessp->s_sid;
893 sp->pr_taskid = p->p_task->tk_tkid;
894 sp->pr_projid = p->p_task->tk_proj->kpj_id;
895 sp->pr_zoneid = p->p_zone->zone_id;
896 hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
897 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
898 TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime);
899 TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime);
900 prassignset(&sp->pr_sigtrace, &p->p_sigmask);
901 prassignset(&sp->pr_flttrace, &p->p_fltmask);
902 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
903 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
904 switch (p->p_model) {
905 case DATAMODEL_ILP32:
906 sp->pr_dmodel = PR_MODEL_ILP32;
907 break;
908 case DATAMODEL_LP64:
909 sp->pr_dmodel = PR_MODEL_LP64;
910 break;
912 if (p->p_agenttp)
913 sp->pr_agentid = p->p_agenttp->t_tid;
915 /* get the chosen lwp's status */
916 prgetlwpstatus(t, &sp->pr_lwp, zp);
918 /* replicate the flags */
919 sp->pr_flags = sp->pr_lwp.pr_flags;
922 #ifdef _SYSCALL32_IMPL
923 void
924 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp)
926 proc_t *p = ttoproc(t);
927 klwp_t *lwp = ttolwp(t);
928 struct mstate *ms = &lwp->lwp_mstate;
929 hrtime_t usr, sys;
930 int flags;
931 ulong_t instr;
933 ASSERT(MUTEX_HELD(&p->p_lock));
935 bzero(sp, sizeof (*sp));
936 flags = 0L;
937 if (t->t_state == TS_STOPPED) {
938 flags |= PR_STOPPED;
939 if ((t->t_schedflag & TS_PSTART) == 0)
940 flags |= PR_ISTOP;
941 } else if (VSTOPPED(t)) {
942 flags |= PR_STOPPED|PR_ISTOP;
944 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
945 flags |= PR_DSTOP;
946 if (lwp->lwp_asleep)
947 flags |= PR_ASLEEP;
948 if (t == p->p_agenttp)
949 flags |= PR_AGENT;
950 if (!(t->t_proc_flag & TP_TWAIT))
951 flags |= PR_DETACH;
952 if (t->t_proc_flag & TP_DAEMON)
953 flags |= PR_DAEMON;
954 if (p->p_proc_flag & P_PR_FORK)
955 flags |= PR_FORK;
956 if (p->p_proc_flag & P_PR_RUNLCL)
957 flags |= PR_RLC;
958 if (p->p_proc_flag & P_PR_KILLCL)
959 flags |= PR_KLC;
960 if (p->p_proc_flag & P_PR_ASYNC)
961 flags |= PR_ASYNC;
962 if (p->p_proc_flag & P_PR_BPTADJ)
963 flags |= PR_BPTADJ;
964 if (p->p_proc_flag & P_PR_PTRACE)
965 flags |= PR_PTRACE;
966 if (p->p_flag & SMSACCT)
967 flags |= PR_MSACCT;
968 if (p->p_flag & SMSFORK)
969 flags |= PR_MSFORK;
970 if (p->p_flag & SVFWAIT)
971 flags |= PR_VFORKP;
972 sp->pr_flags = flags;
973 if (VSTOPPED(t)) {
974 sp->pr_why = PR_REQUESTED;
975 sp->pr_what = 0;
976 } else {
977 sp->pr_why = t->t_whystop;
978 sp->pr_what = t->t_whatstop;
980 sp->pr_lwpid = t->t_tid;
981 sp->pr_cursig = lwp->lwp_cursig;
982 prassignset(&sp->pr_lwppend, &t->t_sig);
983 schedctl_finish_sigblock(t);
984 prassignset(&sp->pr_lwphold, &t->t_hold);
985 if (t->t_whystop == PR_FAULTED) {
986 siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info);
987 if (t->t_whatstop == FLTPAGE)
988 sp->pr_info.si_addr =
989 (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr;
990 } else if (lwp->lwp_curinfo)
991 siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info);
992 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
993 sp->pr_info.si_zoneid != zp->zone_id) {
994 sp->pr_info.si_pid = zp->zone_zsched->p_pid;
995 sp->pr_info.si_uid = 0;
996 sp->pr_info.si_ctid = -1;
997 sp->pr_info.si_zoneid = zp->zone_id;
999 sp->pr_altstack.ss_sp =
1000 (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
1001 sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size;
1002 sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags;
1003 prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1004 sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext;
1005 sp->pr_ustack = (caddr32_t)lwp->lwp_ustack;
1006 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1007 sizeof (sp->pr_clname) - 1);
1008 if (flags & PR_STOPPED)
1009 hrt2ts32(t->t_stoptime, &sp->pr_tstamp);
1010 usr = ms->ms_acct[LMS_USER];
1011 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1012 scalehrtime(&usr);
1013 scalehrtime(&sys);
1014 hrt2ts32(usr, &sp->pr_utime);
1015 hrt2ts32(sys, &sp->pr_stime);
1018 * Fetch the current instruction, if not a system process.
1019 * We don't attempt this unless the lwp is stopped.
1021 if ((p->p_flag & SSYS) || p->p_as == &kas)
1022 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1023 else if (!(flags & PR_STOPPED))
1024 sp->pr_flags |= PR_PCINVAL;
1025 else if (!prfetchinstr(lwp, &instr))
1026 sp->pr_flags |= PR_PCINVAL;
1027 else
1028 sp->pr_instr = (uint32_t)instr;
1031 * Drop p_lock while touching the lwp's stack.
1033 mutex_exit(&p->p_lock);
1034 if (prisstep(lwp))
1035 sp->pr_flags |= PR_STEP;
1036 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1037 int i;
1039 sp->pr_syscall = get_syscall32_args(lwp,
1040 (int *)sp->pr_sysarg, &i);
1041 sp->pr_nsysarg = (ushort_t)i;
1043 if ((flags & PR_STOPPED) || t == curthread)
1044 prgetprregs32(lwp, sp->pr_reg);
1045 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1046 (flags & PR_VFORKP)) {
1047 long r1, r2;
1048 user_t *up;
1049 auxv_t *auxp;
1050 int i;
1052 sp->pr_errno = prgetrvals(lwp, &r1, &r2);
1053 if (sp->pr_errno == 0) {
1054 sp->pr_rval1 = (int32_t)r1;
1055 sp->pr_rval2 = (int32_t)r2;
1056 sp->pr_errpriv = PRIV_NONE;
1057 } else
1058 sp->pr_errpriv = lwp->lwp_badpriv;
1060 if (t->t_sysnum == SYS_execve) {
1061 up = PTOU(p);
1062 sp->pr_sysarg[0] = 0;
1063 sp->pr_sysarg[1] = (caddr32_t)up->u_argv;
1064 sp->pr_sysarg[2] = (caddr32_t)up->u_envp;
1065 for (i = 0, auxp = up->u_auxv;
1066 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1067 i++, auxp++) {
1068 if (auxp->a_type == AT_SUN_EXECNAME) {
1069 sp->pr_sysarg[0] =
1070 (caddr32_t)
1071 (uintptr_t)auxp->a_un.a_ptr;
1072 break;
1077 if (prhasfp())
1078 prgetprfpregs32(lwp, &sp->pr_fpreg);
1079 mutex_enter(&p->p_lock);
1082 void
1083 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp)
1085 kthread_t *t;
1087 ASSERT(MUTEX_HELD(&p->p_lock));
1089 t = prchoose(p); /* returns locked thread */
1090 ASSERT(t != NULL);
1091 thread_unlock(t);
1093 /* just bzero the process part, prgetlwpstatus32() does the rest */
1094 bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t));
1095 sp->pr_nlwp = p->p_lwpcnt;
1096 sp->pr_nzomb = p->p_zombcnt;
1097 prassignset(&sp->pr_sigpend, &p->p_sig);
1098 sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase;
1099 sp->pr_brksize = (uint32_t)p->p_brksize;
1100 sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p);
1101 sp->pr_stksize = (uint32_t)p->p_stksize;
1102 sp->pr_pid = p->p_pid;
1103 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
1104 (p->p_flag & SZONETOP)) {
1105 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
1107 * Inside local zones, fake zsched's pid as parent pids for
1108 * processes which reference processes outside of the zone.
1110 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
1111 } else {
1112 sp->pr_ppid = p->p_ppid;
1114 sp->pr_pgid = p->p_pgrp;
1115 sp->pr_sid = p->p_sessp->s_sid;
1116 sp->pr_taskid = p->p_task->tk_tkid;
1117 sp->pr_projid = p->p_task->tk_proj->kpj_id;
1118 sp->pr_zoneid = p->p_zone->zone_id;
1119 hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
1120 hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
1121 TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime);
1122 TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime);
1123 prassignset(&sp->pr_sigtrace, &p->p_sigmask);
1124 prassignset(&sp->pr_flttrace, &p->p_fltmask);
1125 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
1126 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
1127 switch (p->p_model) {
1128 case DATAMODEL_ILP32:
1129 sp->pr_dmodel = PR_MODEL_ILP32;
1130 break;
1131 case DATAMODEL_LP64:
1132 sp->pr_dmodel = PR_MODEL_LP64;
1133 break;
1135 if (p->p_agenttp)
1136 sp->pr_agentid = p->p_agenttp->t_tid;
1138 /* get the chosen lwp's status */
1139 prgetlwpstatus32(t, &sp->pr_lwp, zp);
1141 /* replicate the flags */
1142 sp->pr_flags = sp->pr_lwp.pr_flags;
1144 #endif /* _SYSCALL32_IMPL */
1147 * Return lwp status.
1149 void
1150 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp)
1152 proc_t *p = ttoproc(t);
1153 klwp_t *lwp = ttolwp(t);
1154 struct mstate *ms = &lwp->lwp_mstate;
1155 hrtime_t usr, sys;
1156 int flags;
1157 ulong_t instr;
1159 ASSERT(MUTEX_HELD(&p->p_lock));
1161 bzero(sp, sizeof (*sp));
1162 flags = 0L;
1163 if (t->t_state == TS_STOPPED) {
1164 flags |= PR_STOPPED;
1165 if ((t->t_schedflag & TS_PSTART) == 0)
1166 flags |= PR_ISTOP;
1167 } else if (VSTOPPED(t)) {
1168 flags |= PR_STOPPED|PR_ISTOP;
1170 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
1171 flags |= PR_DSTOP;
1172 if (lwp->lwp_asleep)
1173 flags |= PR_ASLEEP;
1174 if (t == p->p_agenttp)
1175 flags |= PR_AGENT;
1176 if (!(t->t_proc_flag & TP_TWAIT))
1177 flags |= PR_DETACH;
1178 if (t->t_proc_flag & TP_DAEMON)
1179 flags |= PR_DAEMON;
1180 if (p->p_proc_flag & P_PR_FORK)
1181 flags |= PR_FORK;
1182 if (p->p_proc_flag & P_PR_RUNLCL)
1183 flags |= PR_RLC;
1184 if (p->p_proc_flag & P_PR_KILLCL)
1185 flags |= PR_KLC;
1186 if (p->p_proc_flag & P_PR_ASYNC)
1187 flags |= PR_ASYNC;
1188 if (p->p_proc_flag & P_PR_BPTADJ)
1189 flags |= PR_BPTADJ;
1190 if (p->p_proc_flag & P_PR_PTRACE)
1191 flags |= PR_PTRACE;
1192 if (p->p_flag & SMSACCT)
1193 flags |= PR_MSACCT;
1194 if (p->p_flag & SMSFORK)
1195 flags |= PR_MSFORK;
1196 if (p->p_flag & SVFWAIT)
1197 flags |= PR_VFORKP;
1198 if (p->p_pgidp->pid_pgorphaned)
1199 flags |= PR_ORPHAN;
1200 if (p->p_pidflag & CLDNOSIGCHLD)
1201 flags |= PR_NOSIGCHLD;
1202 if (p->p_pidflag & CLDWAITPID)
1203 flags |= PR_WAITPID;
1204 sp->pr_flags = flags;
1205 if (VSTOPPED(t)) {
1206 sp->pr_why = PR_REQUESTED;
1207 sp->pr_what = 0;
1208 } else {
1209 sp->pr_why = t->t_whystop;
1210 sp->pr_what = t->t_whatstop;
1212 sp->pr_lwpid = t->t_tid;
1213 sp->pr_cursig = lwp->lwp_cursig;
1214 prassignset(&sp->pr_lwppend, &t->t_sig);
1215 schedctl_finish_sigblock(t);
1216 prassignset(&sp->pr_lwphold, &t->t_hold);
1217 if (t->t_whystop == PR_FAULTED)
1218 bcopy(&lwp->lwp_siginfo,
1219 &sp->pr_info, sizeof (k_siginfo_t));
1220 else if (lwp->lwp_curinfo)
1221 bcopy(&lwp->lwp_curinfo->sq_info,
1222 &sp->pr_info, sizeof (k_siginfo_t));
1223 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1224 sp->pr_info.si_zoneid != zp->zone_id) {
1225 sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1226 sp->pr_info.si_uid = 0;
1227 sp->pr_info.si_ctid = -1;
1228 sp->pr_info.si_zoneid = zp->zone_id;
1230 sp->pr_altstack = lwp->lwp_sigaltstack;
1231 prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1232 sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext;
1233 sp->pr_ustack = lwp->lwp_ustack;
1234 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1235 sizeof (sp->pr_clname) - 1);
1236 if (flags & PR_STOPPED)
1237 hrt2ts(t->t_stoptime, &sp->pr_tstamp);
1238 usr = ms->ms_acct[LMS_USER];
1239 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1240 scalehrtime(&usr);
1241 scalehrtime(&sys);
1242 hrt2ts(usr, &sp->pr_utime);
1243 hrt2ts(sys, &sp->pr_stime);
1246 * Fetch the current instruction, if not a system process.
1247 * We don't attempt this unless the lwp is stopped.
1249 if ((p->p_flag & SSYS) || p->p_as == &kas)
1250 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1251 else if (!(flags & PR_STOPPED))
1252 sp->pr_flags |= PR_PCINVAL;
1253 else if (!prfetchinstr(lwp, &instr))
1254 sp->pr_flags |= PR_PCINVAL;
1255 else
1256 sp->pr_instr = instr;
1259 * Drop p_lock while touching the lwp's stack.
1261 mutex_exit(&p->p_lock);
1262 if (prisstep(lwp))
1263 sp->pr_flags |= PR_STEP;
1264 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1265 int i;
1267 sp->pr_syscall = get_syscall_args(lwp,
1268 (long *)sp->pr_sysarg, &i);
1269 sp->pr_nsysarg = (ushort_t)i;
1271 if ((flags & PR_STOPPED) || t == curthread)
1272 prgetprregs(lwp, sp->pr_reg);
1273 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1274 (flags & PR_VFORKP)) {
1275 user_t *up;
1276 auxv_t *auxp;
1277 int i;
1279 sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2);
1280 if (sp->pr_errno == 0)
1281 sp->pr_errpriv = PRIV_NONE;
1282 else
1283 sp->pr_errpriv = lwp->lwp_badpriv;
1285 if (t->t_sysnum == SYS_execve) {
1286 up = PTOU(p);
1287 sp->pr_sysarg[0] = 0;
1288 sp->pr_sysarg[1] = (uintptr_t)up->u_argv;
1289 sp->pr_sysarg[2] = (uintptr_t)up->u_envp;
1290 for (i = 0, auxp = up->u_auxv;
1291 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1292 i++, auxp++) {
1293 if (auxp->a_type == AT_SUN_EXECNAME) {
1294 sp->pr_sysarg[0] =
1295 (uintptr_t)auxp->a_un.a_ptr;
1296 break;
1301 if (prhasfp())
1302 prgetprfpregs(lwp, &sp->pr_fpreg);
1303 mutex_enter(&p->p_lock);
1307 * Get the sigaction structure for the specified signal. The u-block
1308 * must already have been mapped in by the caller.
1310 void
1311 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp)
1313 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1315 bzero(sp, sizeof (*sp));
1317 if (sig != 0 && (unsigned)sig < nsig) {
1318 sp->sa_handler = up->u_signal[sig-1];
1319 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1320 if (sigismember(&up->u_sigonstack, sig))
1321 sp->sa_flags |= SA_ONSTACK;
1322 if (sigismember(&up->u_sigresethand, sig))
1323 sp->sa_flags |= SA_RESETHAND;
1324 if (sigismember(&up->u_sigrestart, sig))
1325 sp->sa_flags |= SA_RESTART;
1326 if (sigismember(&p->p_siginfo, sig))
1327 sp->sa_flags |= SA_SIGINFO;
1328 if (sigismember(&up->u_signodefer, sig))
1329 sp->sa_flags |= SA_NODEFER;
1330 if (sig == SIGCLD) {
1331 if (p->p_flag & SNOWAIT)
1332 sp->sa_flags |= SA_NOCLDWAIT;
1333 if ((p->p_flag & SJCTL) == 0)
1334 sp->sa_flags |= SA_NOCLDSTOP;
1339 #ifdef _SYSCALL32_IMPL
1340 void
1341 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp)
1343 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1345 bzero(sp, sizeof (*sp));
1347 if (sig != 0 && (unsigned)sig < nsig) {
1348 sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1];
1349 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1350 if (sigismember(&up->u_sigonstack, sig))
1351 sp->sa_flags |= SA_ONSTACK;
1352 if (sigismember(&up->u_sigresethand, sig))
1353 sp->sa_flags |= SA_RESETHAND;
1354 if (sigismember(&up->u_sigrestart, sig))
1355 sp->sa_flags |= SA_RESTART;
1356 if (sigismember(&p->p_siginfo, sig))
1357 sp->sa_flags |= SA_SIGINFO;
1358 if (sigismember(&up->u_signodefer, sig))
1359 sp->sa_flags |= SA_NODEFER;
1360 if (sig == SIGCLD) {
1361 if (p->p_flag & SNOWAIT)
1362 sp->sa_flags |= SA_NOCLDWAIT;
1363 if ((p->p_flag & SJCTL) == 0)
1364 sp->sa_flags |= SA_NOCLDSTOP;
1368 #endif /* _SYSCALL32_IMPL */
1371 * Count the number of segments in this process's address space.
1374 prnsegs(struct as *as, int reserved)
1376 int n = 0;
1377 struct seg *seg;
1379 ASSERT(as != &kas && AS_WRITE_HELD(as));
1381 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1382 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1383 caddr_t saddr, naddr;
1384 void *tmp = NULL;
1386 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1387 (void) pr_getprot(seg, reserved, &tmp,
1388 &saddr, &naddr, eaddr);
1389 if (saddr != naddr)
1390 n++;
1393 ASSERT(tmp == NULL);
1396 return (n);
1400 * Convert uint32_t to decimal string w/o leading zeros.
1401 * Add trailing null characters if 'len' is greater than string length.
1402 * Return the string length.
1405 pr_u32tos(uint32_t n, char *s, int len)
1407 char cbuf[11]; /* 32-bit unsigned integer fits in 10 digits */
1408 char *cp = cbuf;
1409 char *end = s + len;
1411 do {
1412 *cp++ = (char)(n % 10 + '0');
1413 n /= 10;
1414 } while (n);
1416 len = (int)(cp - cbuf);
1418 do {
1419 *s++ = *--cp;
1420 } while (cp > cbuf);
1422 while (s < end) /* optional pad */
1423 *s++ = '\0';
1425 return (len);
1429 * Convert uint64_t to decimal string w/o leading zeros.
1430 * Return the string length.
1432 static int
1433 pr_u64tos(uint64_t n, char *s)
1435 char cbuf[21]; /* 64-bit unsigned integer fits in 20 digits */
1436 char *cp = cbuf;
1437 int len;
1439 do {
1440 *cp++ = (char)(n % 10 + '0');
1441 n /= 10;
1442 } while (n);
1444 len = (int)(cp - cbuf);
1446 do {
1447 *s++ = *--cp;
1448 } while (cp > cbuf);
1450 return (len);
1453 void
1454 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr)
1456 char *s = name;
1457 struct vfs *vfsp;
1458 struct vfssw *vfsswp;
1460 if ((vfsp = vp->v_vfsp) != NULL &&
1461 ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) &&
1462 *vfsswp->vsw_name) {
1463 (void) strcpy(s, vfsswp->vsw_name);
1464 s += strlen(s);
1465 *s++ = '.';
1467 s += pr_u32tos(getmajor(vattr->va_fsid), s, 0);
1468 *s++ = '.';
1469 s += pr_u32tos(getminor(vattr->va_fsid), s, 0);
1470 *s++ = '.';
1471 s += pr_u64tos(vattr->va_nodeid, s);
1472 *s++ = '\0';
1475 struct seg *
1476 break_seg(proc_t *p)
1478 caddr_t addr = p->p_brkbase;
1479 struct seg *seg;
1480 struct vnode *vp;
1482 if (p->p_brksize != 0)
1483 addr += p->p_brksize - 1;
1484 seg = as_segat(p->p_as, addr);
1485 if (seg != NULL && seg->s_ops == &segvn_ops &&
1486 (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL))
1487 return (seg);
1488 return (NULL);
1492 * Implementation of service functions to handle procfs generic chained
1493 * copyout buffers.
1495 typedef struct pr_iobuf_list {
1496 list_node_t piol_link; /* buffer linkage */
1497 size_t piol_size; /* total size (header + data) */
1498 size_t piol_usedsize; /* amount to copy out from this buf */
1499 } piol_t;
1501 #define MAPSIZE (64 * 1024)
1502 #define PIOL_DATABUF(iol) ((void *)(&(iol)[1]))
1504 void
1505 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n)
1507 piol_t *iol;
1508 size_t initial_size = MIN(1, n) * itemsize;
1510 list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link));
1512 ASSERT(list_head(iolhead) == NULL);
1513 ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1514 ASSERT(initial_size > 0);
1517 * Someone creating chained copyout buffers may ask for less than
1518 * MAPSIZE if the amount of data to be buffered is known to be
1519 * smaller than that.
1520 * But in order to prevent involuntary self-denial of service,
1521 * the requested input size is clamped at MAPSIZE.
1523 initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol));
1524 iol = kmem_alloc(initial_size, KM_SLEEP);
1525 list_insert_head(iolhead, iol);
1526 iol->piol_usedsize = 0;
1527 iol->piol_size = initial_size;
1530 void *
1531 pr_iol_newbuf(list_t *iolhead, size_t itemsize)
1533 piol_t *iol;
1534 char *new;
1536 ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1537 ASSERT(list_head(iolhead) != NULL);
1539 iol = (piol_t *)list_tail(iolhead);
1541 if (iol->piol_size <
1542 iol->piol_usedsize + sizeof (*iol) + itemsize) {
1544 * Out of space in the current buffer. Allocate more.
1546 piol_t *newiol;
1548 newiol = kmem_alloc(MAPSIZE, KM_SLEEP);
1549 newiol->piol_size = MAPSIZE;
1550 newiol->piol_usedsize = 0;
1552 list_insert_after(iolhead, iol, newiol);
1553 iol = list_next(iolhead, iol);
1554 ASSERT(iol == newiol);
1556 new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize;
1557 iol->piol_usedsize += itemsize;
1558 bzero(new, itemsize);
1559 return (new);
1563 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin)
1565 int error = errin;
1566 piol_t *iol;
1568 while ((iol = list_head(iolhead)) != NULL) {
1569 list_remove(iolhead, iol);
1570 if (!error) {
1571 if (copyout(PIOL_DATABUF(iol), *tgt,
1572 iol->piol_usedsize))
1573 error = EFAULT;
1574 *tgt += iol->piol_usedsize;
1576 kmem_free(iol, iol->piol_size);
1578 list_destroy(iolhead);
1580 return (error);
1584 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin)
1586 offset_t off = uiop->uio_offset;
1587 char *base;
1588 size_t size;
1589 piol_t *iol;
1590 int error = errin;
1592 while ((iol = list_head(iolhead)) != NULL) {
1593 list_remove(iolhead, iol);
1594 base = PIOL_DATABUF(iol);
1595 size = iol->piol_usedsize;
1596 if (off <= size && error == 0 && uiop->uio_resid > 0)
1597 error = uiomove(base + off, size - off,
1598 UIO_READ, uiop);
1599 off = MAX(0, off - (offset_t)size);
1600 kmem_free(iol, iol->piol_size);
1602 list_destroy(iolhead);
1604 return (error);
1608 * Return an array of structures with memory map information.
1609 * We allocate here; the caller must deallocate.
1612 prgetmap(proc_t *p, int reserved, list_t *iolhead)
1614 struct as *as = p->p_as;
1615 prmap_t *mp;
1616 struct seg *seg;
1617 struct seg *brkseg, *stkseg;
1618 struct vnode *vp;
1619 struct vattr vattr;
1620 uint_t prot;
1622 ASSERT(as != &kas && AS_WRITE_HELD(as));
1625 * Request an initial buffer size that doesn't waste memory
1626 * if the address space has only a small number of segments.
1628 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1630 if ((seg = AS_SEGFIRST(as)) == NULL)
1631 return (0);
1633 brkseg = break_seg(p);
1634 stkseg = as_segat(as, prgetstackbase(p));
1636 do {
1637 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1638 caddr_t saddr, naddr;
1639 void *tmp = NULL;
1641 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1642 prot = pr_getprot(seg, reserved, &tmp,
1643 &saddr, &naddr, eaddr);
1644 if (saddr == naddr)
1645 continue;
1647 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1649 mp->pr_vaddr = (uintptr_t)saddr;
1650 mp->pr_size = naddr - saddr;
1651 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1652 mp->pr_mflags = 0;
1653 if (prot & PROT_READ)
1654 mp->pr_mflags |= MA_READ;
1655 if (prot & PROT_WRITE)
1656 mp->pr_mflags |= MA_WRITE;
1657 if (prot & PROT_EXEC)
1658 mp->pr_mflags |= MA_EXEC;
1659 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1660 mp->pr_mflags |= MA_SHARED;
1661 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1662 mp->pr_mflags |= MA_NORESERVE;
1663 if (seg->s_ops == &segspt_shmops ||
1664 (seg->s_ops == &segvn_ops &&
1665 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1666 mp->pr_mflags |= MA_ANON;
1667 if (seg == brkseg)
1668 mp->pr_mflags |= MA_BREAK;
1669 else if (seg == stkseg) {
1670 mp->pr_mflags |= MA_STACK;
1671 if (reserved) {
1672 size_t maxstack =
1673 ((size_t)p->p_stk_ctl +
1674 PAGEOFFSET) & PAGEMASK;
1675 mp->pr_vaddr =
1676 (uintptr_t)prgetstackbase(p) +
1677 p->p_stksize - maxstack;
1678 mp->pr_size = (uintptr_t)naddr -
1679 mp->pr_vaddr;
1682 if (seg->s_ops == &segspt_shmops)
1683 mp->pr_mflags |= MA_ISM | MA_SHM;
1684 mp->pr_pagesize = PAGESIZE;
1687 * Manufacture a filename for the "object" directory.
1689 vattr.va_mask = AT_FSID|AT_NODEID;
1690 if (seg->s_ops == &segvn_ops &&
1691 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1692 vp != NULL && vp->v_type == VREG &&
1693 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1694 if (vp == p->p_exec)
1695 (void) strcpy(mp->pr_mapname, "a.out");
1696 else
1697 pr_object_name(mp->pr_mapname,
1698 vp, &vattr);
1702 * Get the SysV shared memory id, if any.
1704 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1705 (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1706 SHMID_NONE) {
1707 if (mp->pr_shmid == SHMID_FREE)
1708 mp->pr_shmid = -1;
1710 mp->pr_mflags |= MA_SHM;
1711 } else {
1712 mp->pr_shmid = -1;
1715 ASSERT(tmp == NULL);
1716 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1718 return (0);
1721 #ifdef _SYSCALL32_IMPL
1723 prgetmap32(proc_t *p, int reserved, list_t *iolhead)
1725 struct as *as = p->p_as;
1726 prmap32_t *mp;
1727 struct seg *seg;
1728 struct seg *brkseg, *stkseg;
1729 struct vnode *vp;
1730 struct vattr vattr;
1731 uint_t prot;
1733 ASSERT(as != &kas && AS_WRITE_HELD(as));
1736 * Request an initial buffer size that doesn't waste memory
1737 * if the address space has only a small number of segments.
1739 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1741 if ((seg = AS_SEGFIRST(as)) == NULL)
1742 return (0);
1744 brkseg = break_seg(p);
1745 stkseg = as_segat(as, prgetstackbase(p));
1747 do {
1748 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1749 caddr_t saddr, naddr;
1750 void *tmp = NULL;
1752 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1753 prot = pr_getprot(seg, reserved, &tmp,
1754 &saddr, &naddr, eaddr);
1755 if (saddr == naddr)
1756 continue;
1758 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1760 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
1761 mp->pr_size = (size32_t)(naddr - saddr);
1762 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1763 mp->pr_mflags = 0;
1764 if (prot & PROT_READ)
1765 mp->pr_mflags |= MA_READ;
1766 if (prot & PROT_WRITE)
1767 mp->pr_mflags |= MA_WRITE;
1768 if (prot & PROT_EXEC)
1769 mp->pr_mflags |= MA_EXEC;
1770 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1771 mp->pr_mflags |= MA_SHARED;
1772 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1773 mp->pr_mflags |= MA_NORESERVE;
1774 if (seg->s_ops == &segspt_shmops ||
1775 (seg->s_ops == &segvn_ops &&
1776 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1777 mp->pr_mflags |= MA_ANON;
1778 if (seg == brkseg)
1779 mp->pr_mflags |= MA_BREAK;
1780 else if (seg == stkseg) {
1781 mp->pr_mflags |= MA_STACK;
1782 if (reserved) {
1783 size_t maxstack =
1784 ((size_t)p->p_stk_ctl +
1785 PAGEOFFSET) & PAGEMASK;
1786 uintptr_t vaddr =
1787 (uintptr_t)prgetstackbase(p) +
1788 p->p_stksize - maxstack;
1789 mp->pr_vaddr = (caddr32_t)vaddr;
1790 mp->pr_size = (size32_t)
1791 ((uintptr_t)naddr - vaddr);
1794 if (seg->s_ops == &segspt_shmops)
1795 mp->pr_mflags |= MA_ISM | MA_SHM;
1796 mp->pr_pagesize = PAGESIZE;
1799 * Manufacture a filename for the "object" directory.
1801 vattr.va_mask = AT_FSID|AT_NODEID;
1802 if (seg->s_ops == &segvn_ops &&
1803 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1804 vp != NULL && vp->v_type == VREG &&
1805 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1806 if (vp == p->p_exec)
1807 (void) strcpy(mp->pr_mapname, "a.out");
1808 else
1809 pr_object_name(mp->pr_mapname,
1810 vp, &vattr);
1814 * Get the SysV shared memory id, if any.
1816 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1817 (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1818 SHMID_NONE) {
1819 if (mp->pr_shmid == SHMID_FREE)
1820 mp->pr_shmid = -1;
1822 mp->pr_mflags |= MA_SHM;
1823 } else {
1824 mp->pr_shmid = -1;
1827 ASSERT(tmp == NULL);
1828 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1830 return (0);
1832 #endif /* _SYSCALL32_IMPL */
1835 * Return the size of the /proc page data file.
1837 size_t
1838 prpdsize(struct as *as)
1840 struct seg *seg;
1841 size_t size;
1843 ASSERT(as != &kas && AS_WRITE_HELD(as));
1845 if ((seg = AS_SEGFIRST(as)) == NULL)
1846 return (0);
1848 size = sizeof (prpageheader_t);
1849 do {
1850 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1851 caddr_t saddr, naddr;
1852 void *tmp = NULL;
1853 size_t npage;
1855 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1856 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1857 if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1858 size += sizeof (prasmap_t) + round8(npage);
1860 ASSERT(tmp == NULL);
1861 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1863 return (size);
1866 #ifdef _SYSCALL32_IMPL
1867 size_t
1868 prpdsize32(struct as *as)
1870 struct seg *seg;
1871 size_t size;
1873 ASSERT(as != &kas && AS_WRITE_HELD(as));
1875 if ((seg = AS_SEGFIRST(as)) == NULL)
1876 return (0);
1878 size = sizeof (prpageheader32_t);
1879 do {
1880 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1881 caddr_t saddr, naddr;
1882 void *tmp = NULL;
1883 size_t npage;
1885 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1886 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1887 if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1888 size += sizeof (prasmap32_t) + round8(npage);
1890 ASSERT(tmp == NULL);
1891 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1893 return (size);
1895 #endif /* _SYSCALL32_IMPL */
1898 * Read page data information.
1901 prpdread(proc_t *p, uint_t hatid, struct uio *uiop)
1903 struct as *as = p->p_as;
1904 caddr_t buf;
1905 size_t size;
1906 prpageheader_t *php;
1907 prasmap_t *pmp;
1908 struct seg *seg;
1909 int error;
1911 again:
1912 AS_LOCK_ENTER(as, RW_WRITER);
1914 if ((seg = AS_SEGFIRST(as)) == NULL) {
1915 AS_LOCK_EXIT(as);
1916 return (0);
1918 size = prpdsize(as);
1919 if (uiop->uio_resid < size) {
1920 AS_LOCK_EXIT(as);
1921 return (E2BIG);
1924 buf = kmem_zalloc(size, KM_SLEEP);
1925 php = (prpageheader_t *)buf;
1926 pmp = (prasmap_t *)(buf + sizeof (prpageheader_t));
1928 hrt2ts(gethrtime(), &php->pr_tstamp);
1929 php->pr_nmap = 0;
1930 php->pr_npage = 0;
1931 do {
1932 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1933 caddr_t saddr, naddr;
1934 void *tmp = NULL;
1936 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1937 struct vnode *vp;
1938 struct vattr vattr;
1939 size_t len;
1940 size_t npage;
1941 uint_t prot;
1942 uintptr_t next;
1944 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1945 if ((len = (size_t)(naddr - saddr)) == 0)
1946 continue;
1947 npage = len / PAGESIZE;
1948 next = (uintptr_t)(pmp + 1) + round8(npage);
1950 * It's possible that the address space can change
1951 * subtlely even though we're holding as->a_lock
1952 * due to the nondeterminism of page_exists() in
1953 * the presence of asychronously flushed pages or
1954 * mapped files whose sizes are changing.
1955 * page_exists() may be called indirectly from
1956 * pr_getprot() by a SEGOP_INCORE() routine.
1957 * If this happens we need to make sure we don't
1958 * overrun the buffer whose size we computed based
1959 * on the initial iteration through the segments.
1960 * Once we've detected an overflow, we need to clean
1961 * up the temporary memory allocated in pr_getprot()
1962 * and retry. If there's a pending signal, we return
1963 * EINTR so that this thread can be dislodged if
1964 * a latent bug causes us to spin indefinitely.
1966 if (next > (uintptr_t)buf + size) {
1967 pr_getprot_done(&tmp);
1968 AS_LOCK_EXIT(as);
1970 kmem_free(buf, size);
1972 if (ISSIG(curthread, JUSTLOOKING))
1973 return (EINTR);
1975 goto again;
1978 php->pr_nmap++;
1979 php->pr_npage += npage;
1980 pmp->pr_vaddr = (uintptr_t)saddr;
1981 pmp->pr_npage = npage;
1982 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1983 pmp->pr_mflags = 0;
1984 if (prot & PROT_READ)
1985 pmp->pr_mflags |= MA_READ;
1986 if (prot & PROT_WRITE)
1987 pmp->pr_mflags |= MA_WRITE;
1988 if (prot & PROT_EXEC)
1989 pmp->pr_mflags |= MA_EXEC;
1990 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1991 pmp->pr_mflags |= MA_SHARED;
1992 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1993 pmp->pr_mflags |= MA_NORESERVE;
1994 if (seg->s_ops == &segspt_shmops ||
1995 (seg->s_ops == &segvn_ops &&
1996 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1997 pmp->pr_mflags |= MA_ANON;
1998 if (seg->s_ops == &segspt_shmops)
1999 pmp->pr_mflags |= MA_ISM | MA_SHM;
2000 pmp->pr_pagesize = PAGESIZE;
2002 * Manufacture a filename for the "object" directory.
2004 vattr.va_mask = AT_FSID|AT_NODEID;
2005 if (seg->s_ops == &segvn_ops &&
2006 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2007 vp != NULL && vp->v_type == VREG &&
2008 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2009 if (vp == p->p_exec)
2010 (void) strcpy(pmp->pr_mapname, "a.out");
2011 else
2012 pr_object_name(pmp->pr_mapname,
2013 vp, &vattr);
2017 * Get the SysV shared memory id, if any.
2019 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2020 (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2021 SHMID_NONE) {
2022 if (pmp->pr_shmid == SHMID_FREE)
2023 pmp->pr_shmid = -1;
2025 pmp->pr_mflags |= MA_SHM;
2026 } else {
2027 pmp->pr_shmid = -1;
2030 hat_getstat(as, saddr, len, hatid,
2031 (char *)(pmp + 1), HAT_SYNC_ZERORM);
2032 pmp = (prasmap_t *)next;
2034 ASSERT(tmp == NULL);
2035 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2037 AS_LOCK_EXIT(as);
2039 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2040 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2041 kmem_free(buf, size);
2043 return (error);
2046 #ifdef _SYSCALL32_IMPL
2048 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop)
2050 struct as *as = p->p_as;
2051 caddr_t buf;
2052 size_t size;
2053 prpageheader32_t *php;
2054 prasmap32_t *pmp;
2055 struct seg *seg;
2056 int error;
2058 again:
2059 AS_LOCK_ENTER(as, RW_WRITER);
2061 if ((seg = AS_SEGFIRST(as)) == NULL) {
2062 AS_LOCK_EXIT(as);
2063 return (0);
2065 size = prpdsize32(as);
2066 if (uiop->uio_resid < size) {
2067 AS_LOCK_EXIT(as);
2068 return (E2BIG);
2071 buf = kmem_zalloc(size, KM_SLEEP);
2072 php = (prpageheader32_t *)buf;
2073 pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t));
2075 hrt2ts32(gethrtime(), &php->pr_tstamp);
2076 php->pr_nmap = 0;
2077 php->pr_npage = 0;
2078 do {
2079 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2080 caddr_t saddr, naddr;
2081 void *tmp = NULL;
2083 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2084 struct vnode *vp;
2085 struct vattr vattr;
2086 size_t len;
2087 size_t npage;
2088 uint_t prot;
2089 uintptr_t next;
2091 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2092 if ((len = (size_t)(naddr - saddr)) == 0)
2093 continue;
2094 npage = len / PAGESIZE;
2095 next = (uintptr_t)(pmp + 1) + round8(npage);
2097 * It's possible that the address space can change
2098 * subtlely even though we're holding as->a_lock
2099 * due to the nondeterminism of page_exists() in
2100 * the presence of asychronously flushed pages or
2101 * mapped files whose sizes are changing.
2102 * page_exists() may be called indirectly from
2103 * pr_getprot() by a SEGOP_INCORE() routine.
2104 * If this happens we need to make sure we don't
2105 * overrun the buffer whose size we computed based
2106 * on the initial iteration through the segments.
2107 * Once we've detected an overflow, we need to clean
2108 * up the temporary memory allocated in pr_getprot()
2109 * and retry. If there's a pending signal, we return
2110 * EINTR so that this thread can be dislodged if
2111 * a latent bug causes us to spin indefinitely.
2113 if (next > (uintptr_t)buf + size) {
2114 pr_getprot_done(&tmp);
2115 AS_LOCK_EXIT(as);
2117 kmem_free(buf, size);
2119 if (ISSIG(curthread, JUSTLOOKING))
2120 return (EINTR);
2122 goto again;
2125 php->pr_nmap++;
2126 php->pr_npage += npage;
2127 pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2128 pmp->pr_npage = (size32_t)npage;
2129 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2130 pmp->pr_mflags = 0;
2131 if (prot & PROT_READ)
2132 pmp->pr_mflags |= MA_READ;
2133 if (prot & PROT_WRITE)
2134 pmp->pr_mflags |= MA_WRITE;
2135 if (prot & PROT_EXEC)
2136 pmp->pr_mflags |= MA_EXEC;
2137 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2138 pmp->pr_mflags |= MA_SHARED;
2139 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2140 pmp->pr_mflags |= MA_NORESERVE;
2141 if (seg->s_ops == &segspt_shmops ||
2142 (seg->s_ops == &segvn_ops &&
2143 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2144 pmp->pr_mflags |= MA_ANON;
2145 if (seg->s_ops == &segspt_shmops)
2146 pmp->pr_mflags |= MA_ISM | MA_SHM;
2147 pmp->pr_pagesize = PAGESIZE;
2149 * Manufacture a filename for the "object" directory.
2151 vattr.va_mask = AT_FSID|AT_NODEID;
2152 if (seg->s_ops == &segvn_ops &&
2153 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2154 vp != NULL && vp->v_type == VREG &&
2155 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2156 if (vp == p->p_exec)
2157 (void) strcpy(pmp->pr_mapname, "a.out");
2158 else
2159 pr_object_name(pmp->pr_mapname,
2160 vp, &vattr);
2164 * Get the SysV shared memory id, if any.
2166 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2167 (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2168 SHMID_NONE) {
2169 if (pmp->pr_shmid == SHMID_FREE)
2170 pmp->pr_shmid = -1;
2172 pmp->pr_mflags |= MA_SHM;
2173 } else {
2174 pmp->pr_shmid = -1;
2177 hat_getstat(as, saddr, len, hatid,
2178 (char *)(pmp + 1), HAT_SYNC_ZERORM);
2179 pmp = (prasmap32_t *)next;
2181 ASSERT(tmp == NULL);
2182 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2184 AS_LOCK_EXIT(as);
2186 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2187 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2188 kmem_free(buf, size);
2190 return (error);
2192 #endif /* _SYSCALL32_IMPL */
2194 ushort_t
2195 prgetpctcpu(uint64_t pct)
2198 * The value returned will be relevant in the zone of the examiner,
2199 * which may not be the same as the zone which performed the procfs
2200 * mount.
2202 int nonline = zone_ncpus_online_get(curproc->p_zone);
2205 * Prorate over online cpus so we don't exceed 100%
2207 if (nonline > 1)
2208 pct /= nonline;
2209 pct >>= 16; /* convert to 16-bit scaled integer */
2210 if (pct > 0x8000) /* might happen, due to rounding */
2211 pct = 0x8000;
2212 return ((ushort_t)pct);
2216 * Return information used by ps(1).
2218 void
2219 prgetpsinfo(proc_t *p, psinfo_t *psp)
2221 kthread_t *t;
2222 struct cred *cred;
2223 hrtime_t hrutime, hrstime;
2225 ASSERT(MUTEX_HELD(&p->p_lock));
2227 if ((t = prchoose(p)) == NULL) /* returns locked thread */
2228 bzero(psp, sizeof (*psp));
2229 else {
2230 thread_unlock(t);
2231 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2235 * only export SSYS and SMSACCT; everything else is off-limits to
2236 * userland apps.
2238 psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2239 psp->pr_nlwp = p->p_lwpcnt;
2240 psp->pr_nzomb = p->p_zombcnt;
2241 mutex_enter(&p->p_crlock);
2242 cred = p->p_cred;
2243 psp->pr_uid = crgetruid(cred);
2244 psp->pr_euid = crgetuid(cred);
2245 psp->pr_gid = crgetrgid(cred);
2246 psp->pr_egid = crgetgid(cred);
2247 mutex_exit(&p->p_crlock);
2248 psp->pr_pid = p->p_pid;
2249 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2250 (p->p_flag & SZONETOP)) {
2251 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2253 * Inside local zones, fake zsched's pid as parent pids for
2254 * processes which reference processes outside of the zone.
2256 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2257 } else {
2258 psp->pr_ppid = p->p_ppid;
2260 psp->pr_pgid = p->p_pgrp;
2261 psp->pr_sid = p->p_sessp->s_sid;
2262 psp->pr_taskid = p->p_task->tk_tkid;
2263 psp->pr_projid = p->p_task->tk_proj->kpj_id;
2264 psp->pr_poolid = p->p_pool->pool_id;
2265 psp->pr_zoneid = p->p_zone->zone_id;
2266 if ((psp->pr_contract = PRCTID(p)) == 0)
2267 psp->pr_contract = -1;
2268 psp->pr_addr = (uintptr_t)prgetpsaddr(p);
2269 switch (p->p_model) {
2270 case DATAMODEL_ILP32:
2271 psp->pr_dmodel = PR_MODEL_ILP32;
2272 break;
2273 case DATAMODEL_LP64:
2274 psp->pr_dmodel = PR_MODEL_LP64;
2275 break;
2277 hrutime = mstate_aggr_state(p, LMS_USER);
2278 hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2279 hrt2ts((hrutime + hrstime), &psp->pr_time);
2280 TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2282 if (t == NULL) {
2283 int wcode = p->p_wcode; /* must be atomic read */
2285 if (wcode)
2286 psp->pr_wstat = wstat(wcode, p->p_wdata);
2287 psp->pr_ttydev = PRNODEV;
2288 psp->pr_lwp.pr_state = SZOMB;
2289 psp->pr_lwp.pr_sname = 'Z';
2290 psp->pr_lwp.pr_bindpro = PBIND_NONE;
2291 psp->pr_lwp.pr_bindpset = PS_NONE;
2292 } else {
2293 user_t *up = PTOU(p);
2294 struct as *as;
2295 dev_t d;
2296 extern dev_t rwsconsdev, rconsdev, uconsdev;
2298 d = cttydev(p);
2300 * If the controlling terminal is the real
2301 * or workstation console device, map to what the
2302 * user thinks is the console device. Handle case when
2303 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2305 if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2306 d = uconsdev;
2307 psp->pr_ttydev = (d == NODEV) ? PRNODEV : d;
2308 psp->pr_start = up->u_start;
2309 bcopy(up->u_comm, psp->pr_fname,
2310 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2311 bcopy(up->u_psargs, psp->pr_psargs,
2312 MIN(PRARGSZ-1, PSARGSZ));
2313 psp->pr_argc = up->u_argc;
2314 psp->pr_argv = up->u_argv;
2315 psp->pr_envp = up->u_envp;
2317 /* get the chosen lwp's lwpsinfo */
2318 prgetlwpsinfo(t, &psp->pr_lwp);
2320 /* compute %cpu for the process */
2321 if (p->p_lwpcnt == 1)
2322 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2323 else {
2324 uint64_t pct = 0;
2325 hrtime_t cur_time = gethrtime_unscaled();
2327 t = p->p_tlist;
2328 do {
2329 pct += cpu_update_pct(t, cur_time);
2330 } while ((t = t->t_forw) != p->p_tlist);
2332 psp->pr_pctcpu = prgetpctcpu(pct);
2334 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2335 psp->pr_size = 0;
2336 psp->pr_rssize = 0;
2337 } else {
2338 mutex_exit(&p->p_lock);
2339 AS_LOCK_ENTER(as, RW_READER);
2340 psp->pr_size = btopr(as->a_resvsize) *
2341 (PAGESIZE / 1024);
2342 psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024);
2343 psp->pr_pctmem = rm_pctmemory(as);
2344 AS_LOCK_EXIT(as);
2345 mutex_enter(&p->p_lock);
2350 #ifdef _SYSCALL32_IMPL
2351 void
2352 prgetpsinfo32(proc_t *p, psinfo32_t *psp)
2354 kthread_t *t;
2355 struct cred *cred;
2356 hrtime_t hrutime, hrstime;
2358 ASSERT(MUTEX_HELD(&p->p_lock));
2360 if ((t = prchoose(p)) == NULL) /* returns locked thread */
2361 bzero(psp, sizeof (*psp));
2362 else {
2363 thread_unlock(t);
2364 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2368 * only export SSYS and SMSACCT; everything else is off-limits to
2369 * userland apps.
2371 psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2372 psp->pr_nlwp = p->p_lwpcnt;
2373 psp->pr_nzomb = p->p_zombcnt;
2374 mutex_enter(&p->p_crlock);
2375 cred = p->p_cred;
2376 psp->pr_uid = crgetruid(cred);
2377 psp->pr_euid = crgetuid(cred);
2378 psp->pr_gid = crgetrgid(cred);
2379 psp->pr_egid = crgetgid(cred);
2380 mutex_exit(&p->p_crlock);
2381 psp->pr_pid = p->p_pid;
2382 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2383 (p->p_flag & SZONETOP)) {
2384 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2386 * Inside local zones, fake zsched's pid as parent pids for
2387 * processes which reference processes outside of the zone.
2389 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2390 } else {
2391 psp->pr_ppid = p->p_ppid;
2393 psp->pr_pgid = p->p_pgrp;
2394 psp->pr_sid = p->p_sessp->s_sid;
2395 psp->pr_taskid = p->p_task->tk_tkid;
2396 psp->pr_projid = p->p_task->tk_proj->kpj_id;
2397 psp->pr_poolid = p->p_pool->pool_id;
2398 psp->pr_zoneid = p->p_zone->zone_id;
2399 if ((psp->pr_contract = PRCTID(p)) == 0)
2400 psp->pr_contract = -1;
2401 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */
2402 switch (p->p_model) {
2403 case DATAMODEL_ILP32:
2404 psp->pr_dmodel = PR_MODEL_ILP32;
2405 break;
2406 case DATAMODEL_LP64:
2407 psp->pr_dmodel = PR_MODEL_LP64;
2408 break;
2410 hrutime = mstate_aggr_state(p, LMS_USER);
2411 hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2412 hrt2ts32(hrutime + hrstime, &psp->pr_time);
2413 TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2415 if (t == NULL) {
2416 extern int wstat(int, int); /* needs a header file */
2417 int wcode = p->p_wcode; /* must be atomic read */
2419 if (wcode)
2420 psp->pr_wstat = wstat(wcode, p->p_wdata);
2421 psp->pr_ttydev = PRNODEV32;
2422 psp->pr_lwp.pr_state = SZOMB;
2423 psp->pr_lwp.pr_sname = 'Z';
2424 } else {
2425 user_t *up = PTOU(p);
2426 struct as *as;
2427 dev_t d;
2428 extern dev_t rwsconsdev, rconsdev, uconsdev;
2430 d = cttydev(p);
2432 * If the controlling terminal is the real
2433 * or workstation console device, map to what the
2434 * user thinks is the console device. Handle case when
2435 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2437 if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2438 d = uconsdev;
2439 (void) cmpldev(&psp->pr_ttydev, d);
2440 TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start);
2441 bcopy(up->u_comm, psp->pr_fname,
2442 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2443 bcopy(up->u_psargs, psp->pr_psargs,
2444 MIN(PRARGSZ-1, PSARGSZ));
2445 psp->pr_argc = up->u_argc;
2446 psp->pr_argv = (caddr32_t)up->u_argv;
2447 psp->pr_envp = (caddr32_t)up->u_envp;
2449 /* get the chosen lwp's lwpsinfo */
2450 prgetlwpsinfo32(t, &psp->pr_lwp);
2452 /* compute %cpu for the process */
2453 if (p->p_lwpcnt == 1)
2454 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2455 else {
2456 uint64_t pct = 0;
2457 hrtime_t cur_time;
2459 t = p->p_tlist;
2460 cur_time = gethrtime_unscaled();
2461 do {
2462 pct += cpu_update_pct(t, cur_time);
2463 } while ((t = t->t_forw) != p->p_tlist);
2465 psp->pr_pctcpu = prgetpctcpu(pct);
2467 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2468 psp->pr_size = 0;
2469 psp->pr_rssize = 0;
2470 } else {
2471 mutex_exit(&p->p_lock);
2472 AS_LOCK_ENTER(as, RW_READER);
2473 psp->pr_size = (size32_t)
2474 (btopr(as->a_resvsize) * (PAGESIZE / 1024));
2475 psp->pr_rssize = (size32_t)
2476 (rm_asrss(as) * (PAGESIZE / 1024));
2477 psp->pr_pctmem = rm_pctmemory(as);
2478 AS_LOCK_EXIT(as);
2479 mutex_enter(&p->p_lock);
2484 * If we are looking at an LP64 process, zero out
2485 * the fields that cannot be represented in ILP32.
2487 if (p->p_model != DATAMODEL_ILP32) {
2488 psp->pr_size = 0;
2489 psp->pr_rssize = 0;
2490 psp->pr_argv = 0;
2491 psp->pr_envp = 0;
2495 #endif /* _SYSCALL32_IMPL */
2497 void
2498 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp)
2500 klwp_t *lwp = ttolwp(t);
2501 sobj_ops_t *sobj;
2502 char c, state;
2503 uint64_t pct;
2504 int retval, niceval;
2505 hrtime_t hrutime, hrstime;
2507 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
2509 bzero(psp, sizeof (*psp));
2511 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */
2512 psp->pr_lwpid = t->t_tid;
2513 psp->pr_addr = (uintptr_t)t;
2514 psp->pr_wchan = (uintptr_t)t->t_wchan;
2516 /* map the thread state enum into a process state enum */
2517 state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2518 switch (state) {
2519 case TS_SLEEP: state = SSLEEP; c = 'S'; break;
2520 case TS_RUN: state = SRUN; c = 'R'; break;
2521 case TS_ONPROC: state = SONPROC; c = 'O'; break;
2522 case TS_ZOMB: state = SZOMB; c = 'Z'; break;
2523 case TS_STOPPED: state = SSTOP; c = 'T'; break;
2524 case TS_WAIT: state = SWAIT; c = 'W'; break;
2525 default: state = 0; c = '?'; break;
2527 psp->pr_state = state;
2528 psp->pr_sname = c;
2529 if ((sobj = t->t_sobj_ops) != NULL)
2530 psp->pr_stype = SOBJ_TYPE(sobj);
2531 retval = CL_DONICE(t, NULL, 0, &niceval);
2532 if (retval == 0) {
2533 psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2534 psp->pr_nice = niceval + NZERO;
2536 psp->pr_syscall = t->t_sysnum;
2537 psp->pr_pri = t->t_pri;
2538 psp->pr_start.tv_sec = t->t_start;
2539 psp->pr_start.tv_nsec = 0L;
2540 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2541 scalehrtime(&hrutime);
2542 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2543 lwp->lwp_mstate.ms_acct[LMS_TRAP];
2544 scalehrtime(&hrstime);
2545 hrt2ts(hrutime + hrstime, &psp->pr_time);
2546 /* compute %cpu for the lwp */
2547 pct = cpu_update_pct(t, gethrtime_unscaled());
2548 psp->pr_pctcpu = prgetpctcpu(pct);
2549 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */
2550 if (psp->pr_cpu > 99)
2551 psp->pr_cpu = 99;
2553 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2554 sizeof (psp->pr_clname) - 1);
2555 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */
2556 psp->pr_onpro = t->t_cpu->cpu_id;
2557 psp->pr_bindpro = t->t_bind_cpu;
2558 psp->pr_bindpset = t->t_bind_pset;
2559 psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2562 #ifdef _SYSCALL32_IMPL
2563 void
2564 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp)
2566 proc_t *p = ttoproc(t);
2567 klwp_t *lwp = ttolwp(t);
2568 sobj_ops_t *sobj;
2569 char c, state;
2570 uint64_t pct;
2571 int retval, niceval;
2572 hrtime_t hrutime, hrstime;
2574 ASSERT(MUTEX_HELD(&p->p_lock));
2576 bzero(psp, sizeof (*psp));
2578 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */
2579 psp->pr_lwpid = t->t_tid;
2580 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */
2581 psp->pr_wchan = 0; /* cannot represent 64-bit addr in 32 bits */
2583 /* map the thread state enum into a process state enum */
2584 state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2585 switch (state) {
2586 case TS_SLEEP: state = SSLEEP; c = 'S'; break;
2587 case TS_RUN: state = SRUN; c = 'R'; break;
2588 case TS_ONPROC: state = SONPROC; c = 'O'; break;
2589 case TS_ZOMB: state = SZOMB; c = 'Z'; break;
2590 case TS_STOPPED: state = SSTOP; c = 'T'; break;
2591 case TS_WAIT: state = SWAIT; c = 'W'; break;
2592 default: state = 0; c = '?'; break;
2594 psp->pr_state = state;
2595 psp->pr_sname = c;
2596 if ((sobj = t->t_sobj_ops) != NULL)
2597 psp->pr_stype = SOBJ_TYPE(sobj);
2598 retval = CL_DONICE(t, NULL, 0, &niceval);
2599 if (retval == 0) {
2600 psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2601 psp->pr_nice = niceval + NZERO;
2602 } else {
2603 psp->pr_oldpri = 0;
2604 psp->pr_nice = 0;
2606 psp->pr_syscall = t->t_sysnum;
2607 psp->pr_pri = t->t_pri;
2608 psp->pr_start.tv_sec = (time32_t)t->t_start;
2609 psp->pr_start.tv_nsec = 0L;
2610 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2611 scalehrtime(&hrutime);
2612 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2613 lwp->lwp_mstate.ms_acct[LMS_TRAP];
2614 scalehrtime(&hrstime);
2615 hrt2ts32(hrutime + hrstime, &psp->pr_time);
2616 /* compute %cpu for the lwp */
2617 pct = cpu_update_pct(t, gethrtime_unscaled());
2618 psp->pr_pctcpu = prgetpctcpu(pct);
2619 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */
2620 if (psp->pr_cpu > 99)
2621 psp->pr_cpu = 99;
2623 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2624 sizeof (psp->pr_clname) - 1);
2625 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */
2626 psp->pr_onpro = t->t_cpu->cpu_id;
2627 psp->pr_bindpro = t->t_bind_cpu;
2628 psp->pr_bindpset = t->t_bind_pset;
2629 psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2631 #endif /* _SYSCALL32_IMPL */
2633 #ifdef _SYSCALL32_IMPL
2635 #define PR_COPY_FIELD(s, d, field) d->field = s->field
2637 #define PR_COPY_FIELD_ILP32(s, d, field) \
2638 if (s->pr_dmodel == PR_MODEL_ILP32) { \
2639 d->field = s->field; \
2642 #define PR_COPY_TIMESPEC(s, d, field) \
2643 TIMESPEC_TO_TIMESPEC32(&d->field, &s->field);
2645 #define PR_COPY_BUF(s, d, field) \
2646 bcopy(s->field, d->field, sizeof (d->field));
2648 #define PR_IGNORE_FIELD(s, d, field)
2650 void
2651 lwpsinfo_kto32(const struct lwpsinfo *src, struct lwpsinfo32 *dest)
2653 bzero(dest, sizeof (*dest));
2655 PR_COPY_FIELD(src, dest, pr_flag);
2656 PR_COPY_FIELD(src, dest, pr_lwpid);
2657 PR_IGNORE_FIELD(src, dest, pr_addr);
2658 PR_IGNORE_FIELD(src, dest, pr_wchan);
2659 PR_COPY_FIELD(src, dest, pr_stype);
2660 PR_COPY_FIELD(src, dest, pr_state);
2661 PR_COPY_FIELD(src, dest, pr_sname);
2662 PR_COPY_FIELD(src, dest, pr_nice);
2663 PR_COPY_FIELD(src, dest, pr_syscall);
2664 PR_COPY_FIELD(src, dest, pr_oldpri);
2665 PR_COPY_FIELD(src, dest, pr_cpu);
2666 PR_COPY_FIELD(src, dest, pr_pri);
2667 PR_COPY_FIELD(src, dest, pr_pctcpu);
2668 PR_COPY_TIMESPEC(src, dest, pr_start);
2669 PR_COPY_BUF(src, dest, pr_clname);
2670 PR_COPY_BUF(src, dest, pr_name);
2671 PR_COPY_FIELD(src, dest, pr_onpro);
2672 PR_COPY_FIELD(src, dest, pr_bindpro);
2673 PR_COPY_FIELD(src, dest, pr_bindpset);
2674 PR_COPY_FIELD(src, dest, pr_lgrp);
2677 void
2678 psinfo_kto32(const struct psinfo *src, struct psinfo32 *dest)
2680 bzero(dest, sizeof (*dest));
2682 PR_COPY_FIELD(src, dest, pr_flag);
2683 PR_COPY_FIELD(src, dest, pr_nlwp);
2684 PR_COPY_FIELD(src, dest, pr_pid);
2685 PR_COPY_FIELD(src, dest, pr_ppid);
2686 PR_COPY_FIELD(src, dest, pr_pgid);
2687 PR_COPY_FIELD(src, dest, pr_sid);
2688 PR_COPY_FIELD(src, dest, pr_uid);
2689 PR_COPY_FIELD(src, dest, pr_euid);
2690 PR_COPY_FIELD(src, dest, pr_gid);
2691 PR_COPY_FIELD(src, dest, pr_egid);
2692 PR_IGNORE_FIELD(src, dest, pr_addr);
2693 PR_COPY_FIELD_ILP32(src, dest, pr_size);
2694 PR_COPY_FIELD_ILP32(src, dest, pr_rssize);
2695 PR_COPY_FIELD(src, dest, pr_ttydev);
2696 PR_COPY_FIELD(src, dest, pr_pctcpu);
2697 PR_COPY_FIELD(src, dest, pr_pctmem);
2698 PR_COPY_TIMESPEC(src, dest, pr_start);
2699 PR_COPY_TIMESPEC(src, dest, pr_time);
2700 PR_COPY_TIMESPEC(src, dest, pr_ctime);
2701 PR_COPY_BUF(src, dest, pr_fname);
2702 PR_COPY_BUF(src, dest, pr_psargs);
2703 PR_COPY_FIELD(src, dest, pr_wstat);
2704 PR_COPY_FIELD(src, dest, pr_argc);
2705 PR_COPY_FIELD_ILP32(src, dest, pr_argv);
2706 PR_COPY_FIELD_ILP32(src, dest, pr_envp);
2707 PR_COPY_FIELD(src, dest, pr_dmodel);
2708 PR_COPY_FIELD(src, dest, pr_taskid);
2709 PR_COPY_FIELD(src, dest, pr_projid);
2710 PR_COPY_FIELD(src, dest, pr_nzomb);
2711 PR_COPY_FIELD(src, dest, pr_poolid);
2712 PR_COPY_FIELD(src, dest, pr_contract);
2713 PR_COPY_FIELD(src, dest, pr_poolid);
2714 PR_COPY_FIELD(src, dest, pr_poolid);
2716 lwpsinfo_kto32(&src->pr_lwp, &dest->pr_lwp);
2719 #undef PR_COPY_FIELD
2720 #undef PR_COPY_FIELD_ILP32
2721 #undef PR_COPY_TIMESPEC
2722 #undef PR_COPY_BUF
2723 #undef PR_IGNORE_FIELD
2725 #endif /* _SYSCALL32_IMPL */
2728 * This used to get called when microstate accounting was disabled but
2729 * microstate information was requested. Since Microstate accounting is on
2730 * regardless of the proc flags, this simply makes it appear to procfs that
2731 * microstate accounting is on. This is relatively meaningless since you
2732 * can't turn it off, but this is here for the sake of appearances.
2735 /*ARGSUSED*/
2736 void
2737 estimate_msacct(kthread_t *t, hrtime_t curtime)
2739 proc_t *p;
2741 if (t == NULL)
2742 return;
2744 p = ttoproc(t);
2745 ASSERT(MUTEX_HELD(&p->p_lock));
2748 * A system process (p0) could be referenced if the thread is
2749 * in the process of exiting. Don't turn on microstate accounting
2750 * in that case.
2752 if (p->p_flag & SSYS)
2753 return;
2756 * Loop through all the LWPs (kernel threads) in the process.
2758 t = p->p_tlist;
2759 do {
2760 t->t_proc_flag |= TP_MSACCT;
2761 } while ((t = t->t_forw) != p->p_tlist);
2763 p->p_flag |= SMSACCT; /* set process-wide MSACCT */
2767 * It's not really possible to disable microstate accounting anymore.
2768 * However, this routine simply turns off the ms accounting flags in a process
2769 * This way procfs can still pretend to turn microstate accounting on and
2770 * off for a process, but it actually doesn't do anything. This is
2771 * a neutered form of preemptive idiot-proofing.
2773 void
2774 disable_msacct(proc_t *p)
2776 kthread_t *t;
2778 ASSERT(MUTEX_HELD(&p->p_lock));
2780 p->p_flag &= ~SMSACCT; /* clear process-wide MSACCT */
2782 * Loop through all the LWPs (kernel threads) in the process.
2784 if ((t = p->p_tlist) != NULL) {
2785 do {
2786 /* clear per-thread flag */
2787 t->t_proc_flag &= ~TP_MSACCT;
2788 } while ((t = t->t_forw) != p->p_tlist);
2793 * Return resource usage information.
2795 void
2796 prgetusage(kthread_t *t, prhusage_t *pup)
2798 klwp_t *lwp = ttolwp(t);
2799 hrtime_t *mstimep;
2800 struct mstate *ms = &lwp->lwp_mstate;
2801 int state;
2802 int i;
2803 hrtime_t curtime;
2804 hrtime_t waitrq;
2805 hrtime_t tmp1;
2807 curtime = gethrtime_unscaled();
2809 pup->pr_lwpid = t->t_tid;
2810 pup->pr_count = 1;
2811 pup->pr_create = ms->ms_start;
2812 pup->pr_term = ms->ms_term;
2813 scalehrtime(&pup->pr_create);
2814 scalehrtime(&pup->pr_term);
2815 if (ms->ms_term == 0) {
2816 pup->pr_rtime = curtime - ms->ms_start;
2817 scalehrtime(&pup->pr_rtime);
2818 } else {
2819 pup->pr_rtime = ms->ms_term - ms->ms_start;
2820 scalehrtime(&pup->pr_rtime);
2824 pup->pr_utime = ms->ms_acct[LMS_USER];
2825 pup->pr_stime = ms->ms_acct[LMS_SYSTEM];
2826 pup->pr_ttime = ms->ms_acct[LMS_TRAP];
2827 pup->pr_tftime = ms->ms_acct[LMS_TFAULT];
2828 pup->pr_dftime = ms->ms_acct[LMS_DFAULT];
2829 pup->pr_kftime = ms->ms_acct[LMS_KFAULT];
2830 pup->pr_ltime = ms->ms_acct[LMS_USER_LOCK];
2831 pup->pr_slptime = ms->ms_acct[LMS_SLEEP];
2832 pup->pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
2833 pup->pr_stoptime = ms->ms_acct[LMS_STOPPED];
2835 prscaleusage(pup);
2838 * Adjust for time waiting in the dispatcher queue.
2840 waitrq = t->t_waitrq; /* hopefully atomic */
2841 if (waitrq != 0) {
2842 if (waitrq > curtime) {
2843 curtime = gethrtime_unscaled();
2845 tmp1 = curtime - waitrq;
2846 scalehrtime(&tmp1);
2847 pup->pr_wtime += tmp1;
2848 curtime = waitrq;
2852 * Adjust for time spent in current microstate.
2854 if (ms->ms_state_start > curtime) {
2855 curtime = gethrtime_unscaled();
2858 i = 0;
2859 do {
2860 switch (state = t->t_mstate) {
2861 case LMS_SLEEP:
2863 * Update the timer for the current sleep state.
2865 switch (state = ms->ms_prev) {
2866 case LMS_TFAULT:
2867 case LMS_DFAULT:
2868 case LMS_KFAULT:
2869 case LMS_USER_LOCK:
2870 break;
2871 default:
2872 state = LMS_SLEEP;
2873 break;
2875 break;
2876 case LMS_TFAULT:
2877 case LMS_DFAULT:
2878 case LMS_KFAULT:
2879 case LMS_USER_LOCK:
2880 state = LMS_SYSTEM;
2881 break;
2883 switch (state) {
2884 case LMS_USER: mstimep = &pup->pr_utime; break;
2885 case LMS_SYSTEM: mstimep = &pup->pr_stime; break;
2886 case LMS_TRAP: mstimep = &pup->pr_ttime; break;
2887 case LMS_TFAULT: mstimep = &pup->pr_tftime; break;
2888 case LMS_DFAULT: mstimep = &pup->pr_dftime; break;
2889 case LMS_KFAULT: mstimep = &pup->pr_kftime; break;
2890 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break;
2891 case LMS_SLEEP: mstimep = &pup->pr_slptime; break;
2892 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break;
2893 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break;
2894 default: panic("prgetusage: unknown microstate");
2896 tmp1 = curtime - ms->ms_state_start;
2897 if (tmp1 < 0) {
2898 curtime = gethrtime_unscaled();
2899 i++;
2900 continue;
2902 scalehrtime(&tmp1);
2903 } while (tmp1 < 0 && i < MAX_ITERS_SPIN);
2905 *mstimep += tmp1;
2907 /* update pup timestamp */
2908 pup->pr_tstamp = curtime;
2909 scalehrtime(&pup->pr_tstamp);
2912 * Resource usage counters.
2914 pup->pr_minf = lwp->lwp_ru.minflt;
2915 pup->pr_majf = lwp->lwp_ru.majflt;
2916 pup->pr_nswap = lwp->lwp_ru.nswap;
2917 pup->pr_inblk = lwp->lwp_ru.inblock;
2918 pup->pr_oublk = lwp->lwp_ru.oublock;
2919 pup->pr_msnd = lwp->lwp_ru.msgsnd;
2920 pup->pr_mrcv = lwp->lwp_ru.msgrcv;
2921 pup->pr_sigs = lwp->lwp_ru.nsignals;
2922 pup->pr_vctx = lwp->lwp_ru.nvcsw;
2923 pup->pr_ictx = lwp->lwp_ru.nivcsw;
2924 pup->pr_sysc = lwp->lwp_ru.sysc;
2925 pup->pr_ioch = lwp->lwp_ru.ioch;
2929 * Convert ms_acct stats from unscaled high-res time to nanoseconds
2931 void
2932 prscaleusage(prhusage_t *usg)
2934 scalehrtime(&usg->pr_utime);
2935 scalehrtime(&usg->pr_stime);
2936 scalehrtime(&usg->pr_ttime);
2937 scalehrtime(&usg->pr_tftime);
2938 scalehrtime(&usg->pr_dftime);
2939 scalehrtime(&usg->pr_kftime);
2940 scalehrtime(&usg->pr_ltime);
2941 scalehrtime(&usg->pr_slptime);
2942 scalehrtime(&usg->pr_wtime);
2943 scalehrtime(&usg->pr_stoptime);
2948 * Sum resource usage information.
2950 void
2951 praddusage(kthread_t *t, prhusage_t *pup)
2953 klwp_t *lwp = ttolwp(t);
2954 hrtime_t *mstimep;
2955 struct mstate *ms = &lwp->lwp_mstate;
2956 int state;
2957 int i;
2958 hrtime_t curtime;
2959 hrtime_t waitrq;
2960 hrtime_t tmp;
2961 prhusage_t conv;
2963 curtime = gethrtime_unscaled();
2965 if (ms->ms_term == 0) {
2966 tmp = curtime - ms->ms_start;
2967 scalehrtime(&tmp);
2968 pup->pr_rtime += tmp;
2969 } else {
2970 tmp = ms->ms_term - ms->ms_start;
2971 scalehrtime(&tmp);
2972 pup->pr_rtime += tmp;
2975 conv.pr_utime = ms->ms_acct[LMS_USER];
2976 conv.pr_stime = ms->ms_acct[LMS_SYSTEM];
2977 conv.pr_ttime = ms->ms_acct[LMS_TRAP];
2978 conv.pr_tftime = ms->ms_acct[LMS_TFAULT];
2979 conv.pr_dftime = ms->ms_acct[LMS_DFAULT];
2980 conv.pr_kftime = ms->ms_acct[LMS_KFAULT];
2981 conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK];
2982 conv.pr_slptime = ms->ms_acct[LMS_SLEEP];
2983 conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
2984 conv.pr_stoptime = ms->ms_acct[LMS_STOPPED];
2986 prscaleusage(&conv);
2988 pup->pr_utime += conv.pr_utime;
2989 pup->pr_stime += conv.pr_stime;
2990 pup->pr_ttime += conv.pr_ttime;
2991 pup->pr_tftime += conv.pr_tftime;
2992 pup->pr_dftime += conv.pr_dftime;
2993 pup->pr_kftime += conv.pr_kftime;
2994 pup->pr_ltime += conv.pr_ltime;
2995 pup->pr_slptime += conv.pr_slptime;
2996 pup->pr_wtime += conv.pr_wtime;
2997 pup->pr_stoptime += conv.pr_stoptime;
3000 * Adjust for time waiting in the dispatcher queue.
3002 waitrq = t->t_waitrq; /* hopefully atomic */
3003 if (waitrq != 0) {
3004 if (waitrq > curtime) {
3005 curtime = gethrtime_unscaled();
3007 tmp = curtime - waitrq;
3008 scalehrtime(&tmp);
3009 pup->pr_wtime += tmp;
3010 curtime = waitrq;
3014 * Adjust for time spent in current microstate.
3016 if (ms->ms_state_start > curtime) {
3017 curtime = gethrtime_unscaled();
3020 i = 0;
3021 do {
3022 switch (state = t->t_mstate) {
3023 case LMS_SLEEP:
3025 * Update the timer for the current sleep state.
3027 switch (state = ms->ms_prev) {
3028 case LMS_TFAULT:
3029 case LMS_DFAULT:
3030 case LMS_KFAULT:
3031 case LMS_USER_LOCK:
3032 break;
3033 default:
3034 state = LMS_SLEEP;
3035 break;
3037 break;
3038 case LMS_TFAULT:
3039 case LMS_DFAULT:
3040 case LMS_KFAULT:
3041 case LMS_USER_LOCK:
3042 state = LMS_SYSTEM;
3043 break;
3045 switch (state) {
3046 case LMS_USER: mstimep = &pup->pr_utime; break;
3047 case LMS_SYSTEM: mstimep = &pup->pr_stime; break;
3048 case LMS_TRAP: mstimep = &pup->pr_ttime; break;
3049 case LMS_TFAULT: mstimep = &pup->pr_tftime; break;
3050 case LMS_DFAULT: mstimep = &pup->pr_dftime; break;
3051 case LMS_KFAULT: mstimep = &pup->pr_kftime; break;
3052 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break;
3053 case LMS_SLEEP: mstimep = &pup->pr_slptime; break;
3054 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break;
3055 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break;
3056 default: panic("praddusage: unknown microstate");
3058 tmp = curtime - ms->ms_state_start;
3059 if (tmp < 0) {
3060 curtime = gethrtime_unscaled();
3061 i++;
3062 continue;
3064 scalehrtime(&tmp);
3065 } while (tmp < 0 && i < MAX_ITERS_SPIN);
3067 *mstimep += tmp;
3069 /* update pup timestamp */
3070 pup->pr_tstamp = curtime;
3071 scalehrtime(&pup->pr_tstamp);
3074 * Resource usage counters.
3076 pup->pr_minf += lwp->lwp_ru.minflt;
3077 pup->pr_majf += lwp->lwp_ru.majflt;
3078 pup->pr_nswap += lwp->lwp_ru.nswap;
3079 pup->pr_inblk += lwp->lwp_ru.inblock;
3080 pup->pr_oublk += lwp->lwp_ru.oublock;
3081 pup->pr_msnd += lwp->lwp_ru.msgsnd;
3082 pup->pr_mrcv += lwp->lwp_ru.msgrcv;
3083 pup->pr_sigs += lwp->lwp_ru.nsignals;
3084 pup->pr_vctx += lwp->lwp_ru.nvcsw;
3085 pup->pr_ictx += lwp->lwp_ru.nivcsw;
3086 pup->pr_sysc += lwp->lwp_ru.sysc;
3087 pup->pr_ioch += lwp->lwp_ru.ioch;
3091 * Convert a prhusage_t to a prusage_t.
3092 * This means convert each hrtime_t to a timestruc_t
3093 * and copy the count fields uint64_t => ulong_t.
3095 void
3096 prcvtusage(prhusage_t *pup, prusage_t *upup)
3098 uint64_t *ullp;
3099 ulong_t *ulp;
3100 int i;
3102 upup->pr_lwpid = pup->pr_lwpid;
3103 upup->pr_count = pup->pr_count;
3105 hrt2ts(pup->pr_tstamp, &upup->pr_tstamp);
3106 hrt2ts(pup->pr_create, &upup->pr_create);
3107 hrt2ts(pup->pr_term, &upup->pr_term);
3108 hrt2ts(pup->pr_rtime, &upup->pr_rtime);
3109 hrt2ts(pup->pr_utime, &upup->pr_utime);
3110 hrt2ts(pup->pr_stime, &upup->pr_stime);
3111 hrt2ts(pup->pr_ttime, &upup->pr_ttime);
3112 hrt2ts(pup->pr_tftime, &upup->pr_tftime);
3113 hrt2ts(pup->pr_dftime, &upup->pr_dftime);
3114 hrt2ts(pup->pr_kftime, &upup->pr_kftime);
3115 hrt2ts(pup->pr_ltime, &upup->pr_ltime);
3116 hrt2ts(pup->pr_slptime, &upup->pr_slptime);
3117 hrt2ts(pup->pr_wtime, &upup->pr_wtime);
3118 hrt2ts(pup->pr_stoptime, &upup->pr_stoptime);
3119 bzero(upup->filltime, sizeof (upup->filltime));
3121 ullp = &pup->pr_minf;
3122 ulp = &upup->pr_minf;
3123 for (i = 0; i < 22; i++)
3124 *ulp++ = (ulong_t)*ullp++;
3127 #ifdef _SYSCALL32_IMPL
3128 void
3129 prcvtusage32(prhusage_t *pup, prusage32_t *upup)
3131 uint64_t *ullp;
3132 uint32_t *ulp;
3133 int i;
3135 upup->pr_lwpid = pup->pr_lwpid;
3136 upup->pr_count = pup->pr_count;
3138 hrt2ts32(pup->pr_tstamp, &upup->pr_tstamp);
3139 hrt2ts32(pup->pr_create, &upup->pr_create);
3140 hrt2ts32(pup->pr_term, &upup->pr_term);
3141 hrt2ts32(pup->pr_rtime, &upup->pr_rtime);
3142 hrt2ts32(pup->pr_utime, &upup->pr_utime);
3143 hrt2ts32(pup->pr_stime, &upup->pr_stime);
3144 hrt2ts32(pup->pr_ttime, &upup->pr_ttime);
3145 hrt2ts32(pup->pr_tftime, &upup->pr_tftime);
3146 hrt2ts32(pup->pr_dftime, &upup->pr_dftime);
3147 hrt2ts32(pup->pr_kftime, &upup->pr_kftime);
3148 hrt2ts32(pup->pr_ltime, &upup->pr_ltime);
3149 hrt2ts32(pup->pr_slptime, &upup->pr_slptime);
3150 hrt2ts32(pup->pr_wtime, &upup->pr_wtime);
3151 hrt2ts32(pup->pr_stoptime, &upup->pr_stoptime);
3152 bzero(upup->filltime, sizeof (upup->filltime));
3154 ullp = &pup->pr_minf;
3155 ulp = &upup->pr_minf;
3156 for (i = 0; i < 22; i++)
3157 *ulp++ = (uint32_t)*ullp++;
3159 #endif /* _SYSCALL32_IMPL */
3162 * Determine whether a set is empty.
3165 setisempty(uint32_t *sp, uint_t n)
3167 while (n--)
3168 if (*sp++)
3169 return (0);
3170 return (1);
3174 * Utility routine for establishing a watched area in the process.
3175 * Keep the list of watched areas sorted by virtual address.
3178 set_watched_area(proc_t *p, struct watched_area *pwa)
3180 caddr_t vaddr = pwa->wa_vaddr;
3181 caddr_t eaddr = pwa->wa_eaddr;
3182 ulong_t flags = pwa->wa_flags;
3183 struct watched_area *target;
3184 avl_index_t where;
3185 int error = 0;
3187 /* we must not be holding p->p_lock, but the process must be locked */
3188 ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3189 ASSERT(p->p_proc_flag & P_PR_LOCK);
3192 * If this is our first watchpoint, enable watchpoints for the process.
3194 if (!pr_watch_active(p)) {
3195 kthread_t *t;
3197 mutex_enter(&p->p_lock);
3198 if ((t = p->p_tlist) != NULL) {
3199 do {
3200 watch_enable(t);
3201 } while ((t = t->t_forw) != p->p_tlist);
3203 mutex_exit(&p->p_lock);
3206 target = pr_find_watched_area(p, pwa, &where);
3207 if (target != NULL) {
3209 * We discovered an existing, overlapping watched area.
3210 * Allow it only if it is an exact match.
3212 if (target->wa_vaddr != vaddr ||
3213 target->wa_eaddr != eaddr)
3214 error = EINVAL;
3215 else if (target->wa_flags != flags) {
3216 error = set_watched_page(p, vaddr, eaddr,
3217 flags, target->wa_flags);
3218 target->wa_flags = flags;
3220 kmem_free(pwa, sizeof (struct watched_area));
3221 } else {
3222 avl_insert(&p->p_warea, pwa, where);
3223 error = set_watched_page(p, vaddr, eaddr, flags, 0);
3226 return (error);
3230 * Utility routine for clearing a watched area in the process.
3231 * Must be an exact match of the virtual address.
3232 * size and flags don't matter.
3235 clear_watched_area(proc_t *p, struct watched_area *pwa)
3237 struct watched_area *found;
3239 /* we must not be holding p->p_lock, but the process must be locked */
3240 ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3241 ASSERT(p->p_proc_flag & P_PR_LOCK);
3244 if (!pr_watch_active(p)) {
3245 kmem_free(pwa, sizeof (struct watched_area));
3246 return (0);
3250 * Look for a matching address in the watched areas. If a match is
3251 * found, clear the old watched area and adjust the watched page(s). It
3252 * is not an error if there is no match.
3254 if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL &&
3255 found->wa_vaddr == pwa->wa_vaddr) {
3256 clear_watched_page(p, found->wa_vaddr, found->wa_eaddr,
3257 found->wa_flags);
3258 avl_remove(&p->p_warea, found);
3259 kmem_free(found, sizeof (struct watched_area));
3262 kmem_free(pwa, sizeof (struct watched_area));
3265 * If we removed the last watched area from the process, disable
3266 * watchpoints.
3268 if (!pr_watch_active(p)) {
3269 kthread_t *t;
3271 mutex_enter(&p->p_lock);
3272 if ((t = p->p_tlist) != NULL) {
3273 do {
3274 watch_disable(t);
3275 } while ((t = t->t_forw) != p->p_tlist);
3277 mutex_exit(&p->p_lock);
3280 return (0);
3284 * Frees all the watched_area structures
3286 void
3287 pr_free_watchpoints(proc_t *p)
3289 struct watched_area *delp;
3290 void *cookie;
3292 cookie = NULL;
3293 while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL)
3294 kmem_free(delp, sizeof (struct watched_area));
3296 avl_destroy(&p->p_warea);
3300 * This one is called by the traced process to unwatch all the
3301 * pages while deallocating the list of watched_page structs.
3303 void
3304 pr_free_watched_pages(proc_t *p)
3306 struct as *as = p->p_as;
3307 struct watched_page *pwp;
3308 uint_t prot;
3309 int retrycnt, err;
3310 void *cookie;
3312 if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
3313 return;
3315 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
3316 AS_LOCK_ENTER(as, RW_WRITER);
3318 pwp = avl_first(&as->a_wpage);
3320 cookie = NULL;
3321 while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
3322 retrycnt = 0;
3323 if ((prot = pwp->wp_oprot) != 0) {
3324 caddr_t addr = pwp->wp_vaddr;
3325 struct seg *seg;
3326 retry:
3328 if ((pwp->wp_prot != prot ||
3329 (pwp->wp_flags & WP_NOWATCH)) &&
3330 (seg = as_segat(as, addr)) != NULL) {
3331 err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
3332 if (err == IE_RETRY) {
3333 ASSERT(retrycnt == 0);
3334 retrycnt++;
3335 goto retry;
3339 kmem_free(pwp, sizeof (struct watched_page));
3342 avl_destroy(&as->a_wpage);
3343 p->p_wprot = NULL;
3345 AS_LOCK_EXIT(as);
3349 * Insert a watched area into the list of watched pages.
3350 * If oflags is zero then we are adding a new watched area.
3351 * Otherwise we are changing the flags of an existing watched area.
3353 static int
3354 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr,
3355 ulong_t flags, ulong_t oflags)
3357 struct as *as = p->p_as;
3358 avl_tree_t *pwp_tree;
3359 struct watched_page *pwp, *newpwp;
3360 struct watched_page tpw;
3361 avl_index_t where;
3362 struct seg *seg;
3363 uint_t prot;
3364 caddr_t addr;
3367 * We need to pre-allocate a list of structures before we grab the
3368 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks
3369 * held.
3371 newpwp = NULL;
3372 for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3373 addr < eaddr; addr += PAGESIZE) {
3374 pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP);
3375 pwp->wp_list = newpwp;
3376 newpwp = pwp;
3379 AS_LOCK_ENTER(as, RW_WRITER);
3382 * Search for an existing watched page to contain the watched area.
3383 * If none is found, grab a new one from the available list
3384 * and insert it in the active list, keeping the list sorted
3385 * by user-level virtual address.
3387 if (p->p_flag & SVFWAIT)
3388 pwp_tree = &p->p_wpage;
3389 else
3390 pwp_tree = &as->a_wpage;
3392 again:
3393 if (avl_numnodes(pwp_tree) > prnwatch) {
3394 AS_LOCK_EXIT(as);
3395 while (newpwp != NULL) {
3396 pwp = newpwp->wp_list;
3397 kmem_free(newpwp, sizeof (struct watched_page));
3398 newpwp = pwp;
3400 return (E2BIG);
3403 tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3404 if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) {
3405 pwp = newpwp;
3406 newpwp = newpwp->wp_list;
3407 pwp->wp_list = NULL;
3408 pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr &
3409 (uintptr_t)PAGEMASK);
3410 avl_insert(pwp_tree, pwp, where);
3413 ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE);
3415 if (oflags & WA_READ)
3416 pwp->wp_read--;
3417 if (oflags & WA_WRITE)
3418 pwp->wp_write--;
3419 if (oflags & WA_EXEC)
3420 pwp->wp_exec--;
3422 ASSERT(pwp->wp_read >= 0);
3423 ASSERT(pwp->wp_write >= 0);
3424 ASSERT(pwp->wp_exec >= 0);
3426 if (flags & WA_READ)
3427 pwp->wp_read++;
3428 if (flags & WA_WRITE)
3429 pwp->wp_write++;
3430 if (flags & WA_EXEC)
3431 pwp->wp_exec++;
3433 if (!(p->p_flag & SVFWAIT)) {
3434 vaddr = pwp->wp_vaddr;
3435 if (pwp->wp_oprot == 0 &&
3436 (seg = as_segat(as, vaddr)) != NULL) {
3437 SEGOP_GETPROT(seg, vaddr, 0, &prot);
3438 pwp->wp_oprot = (uchar_t)prot;
3439 pwp->wp_prot = (uchar_t)prot;
3441 if (pwp->wp_oprot != 0) {
3442 prot = pwp->wp_oprot;
3443 if (pwp->wp_read)
3444 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3445 if (pwp->wp_write)
3446 prot &= ~PROT_WRITE;
3447 if (pwp->wp_exec)
3448 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3449 if (!(pwp->wp_flags & WP_NOWATCH) &&
3450 pwp->wp_prot != prot &&
3451 (pwp->wp_flags & WP_SETPROT) == 0) {
3452 pwp->wp_flags |= WP_SETPROT;
3453 pwp->wp_list = p->p_wprot;
3454 p->p_wprot = pwp;
3456 pwp->wp_prot = (uchar_t)prot;
3461 * If the watched area extends into the next page then do
3462 * it over again with the virtual address of the next page.
3464 if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr)
3465 goto again;
3467 AS_LOCK_EXIT(as);
3470 * Free any pages we may have over-allocated
3472 while (newpwp != NULL) {
3473 pwp = newpwp->wp_list;
3474 kmem_free(newpwp, sizeof (struct watched_page));
3475 newpwp = pwp;
3478 return (0);
3482 * Remove a watched area from the list of watched pages.
3483 * A watched area may extend over more than one page.
3485 static void
3486 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags)
3488 struct as *as = p->p_as;
3489 struct watched_page *pwp;
3490 struct watched_page tpw;
3491 avl_tree_t *tree;
3492 avl_index_t where;
3494 AS_LOCK_ENTER(as, RW_WRITER);
3496 if (p->p_flag & SVFWAIT)
3497 tree = &p->p_wpage;
3498 else
3499 tree = &as->a_wpage;
3501 tpw.wp_vaddr = vaddr =
3502 (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3503 pwp = avl_find(tree, &tpw, &where);
3504 if (pwp == NULL)
3505 pwp = avl_nearest(tree, where, AVL_AFTER);
3507 while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3508 ASSERT(vaddr <= pwp->wp_vaddr);
3510 if (flags & WA_READ)
3511 pwp->wp_read--;
3512 if (flags & WA_WRITE)
3513 pwp->wp_write--;
3514 if (flags & WA_EXEC)
3515 pwp->wp_exec--;
3517 if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) {
3519 * Reset the hat layer's protections on this page.
3521 if (pwp->wp_oprot != 0) {
3522 uint_t prot = pwp->wp_oprot;
3524 if (pwp->wp_read)
3525 prot &=
3526 ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3527 if (pwp->wp_write)
3528 prot &= ~PROT_WRITE;
3529 if (pwp->wp_exec)
3530 prot &=
3531 ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3532 if (!(pwp->wp_flags & WP_NOWATCH) &&
3533 pwp->wp_prot != prot &&
3534 (pwp->wp_flags & WP_SETPROT) == 0) {
3535 pwp->wp_flags |= WP_SETPROT;
3536 pwp->wp_list = p->p_wprot;
3537 p->p_wprot = pwp;
3539 pwp->wp_prot = (uchar_t)prot;
3541 } else {
3543 * No watched areas remain in this page.
3544 * Reset everything to normal.
3546 if (pwp->wp_oprot != 0) {
3547 pwp->wp_prot = pwp->wp_oprot;
3548 if ((pwp->wp_flags & WP_SETPROT) == 0) {
3549 pwp->wp_flags |= WP_SETPROT;
3550 pwp->wp_list = p->p_wprot;
3551 p->p_wprot = pwp;
3556 pwp = AVL_NEXT(tree, pwp);
3559 AS_LOCK_EXIT(as);
3563 * Return the original protections for the specified page.
3565 static void
3566 getwatchprot(struct as *as, caddr_t addr, uint_t *prot)
3568 struct watched_page *pwp;
3569 struct watched_page tpw;
3571 ASSERT(AS_LOCK_HELD(as));
3573 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3574 if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL)
3575 *prot = pwp->wp_oprot;
3578 static prpagev_t *
3579 pr_pagev_create(struct seg *seg, int check_noreserve)
3581 prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP);
3582 size_t total_pages = seg_pages(seg);
3585 * Limit the size of our vectors to pagev_lim pages at a time. We need
3586 * 4 or 5 bytes of storage per page, so this means we limit ourself
3587 * to about a megabyte of kernel heap by default.
3589 pagev->pg_npages = MIN(total_pages, pagev_lim);
3590 pagev->pg_pnbase = 0;
3592 pagev->pg_protv =
3593 kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP);
3595 if (check_noreserve)
3596 pagev->pg_incore =
3597 kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP);
3598 else
3599 pagev->pg_incore = NULL;
3601 return (pagev);
3604 static void
3605 pr_pagev_destroy(prpagev_t *pagev)
3607 if (pagev->pg_incore != NULL)
3608 kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char));
3610 kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t));
3611 kmem_free(pagev, sizeof (prpagev_t));
3614 static caddr_t
3615 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr)
3617 ulong_t lastpg = seg_page(seg, eaddr - 1);
3618 ulong_t pn, pnlim;
3619 caddr_t saddr;
3620 size_t len;
3622 ASSERT(addr >= seg->s_base && addr <= eaddr);
3624 if (addr == eaddr)
3625 return (eaddr);
3627 refill:
3628 ASSERT(addr < eaddr);
3629 pagev->pg_pnbase = seg_page(seg, addr);
3630 pnlim = pagev->pg_pnbase + pagev->pg_npages;
3631 saddr = addr;
3633 if (lastpg < pnlim)
3634 len = (size_t)(eaddr - addr);
3635 else
3636 len = pagev->pg_npages * PAGESIZE;
3638 if (pagev->pg_incore != NULL) {
3640 * INCORE cleverly has different semantics than GETPROT:
3641 * it returns info on pages up to but NOT including addr + len.
3643 SEGOP_INCORE(seg, addr, len, pagev->pg_incore);
3644 pn = pagev->pg_pnbase;
3646 do {
3648 * Guilty knowledge here: We know that segvn_incore
3649 * returns more than just the low-order bit that
3650 * indicates the page is actually in memory. If any
3651 * bits are set, then the page has backing store.
3653 if (pagev->pg_incore[pn++ - pagev->pg_pnbase])
3654 goto out;
3656 } while ((addr += PAGESIZE) < eaddr && pn < pnlim);
3659 * If we examined all the pages in the vector but we're not
3660 * at the end of the segment, take another lap.
3662 if (addr < eaddr)
3663 goto refill;
3667 * Need to take len - 1 because addr + len is the address of the
3668 * first byte of the page just past the end of what we want.
3670 out:
3671 SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv);
3672 return (addr);
3675 static caddr_t
3676 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg,
3677 caddr_t *saddrp, caddr_t eaddr, uint_t *protp)
3680 * Our starting address is either the specified address, or the base
3681 * address from the start of the pagev. If the latter is greater,
3682 * this means a previous call to pr_pagev_fill has already scanned
3683 * further than the end of the previous mapping.
3685 caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE;
3686 caddr_t addr = MAX(*saddrp, base);
3687 ulong_t pn = seg_page(seg, addr);
3688 uint_t prot, nprot;
3691 * If we're dealing with noreserve pages, then advance addr to
3692 * the address of the next page which has backing store.
3694 if (pagev->pg_incore != NULL) {
3695 while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) {
3696 if ((addr += PAGESIZE) == eaddr) {
3697 *saddrp = addr;
3698 prot = 0;
3699 goto out;
3701 if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3702 addr = pr_pagev_fill(pagev, seg, addr, eaddr);
3703 if (addr == eaddr) {
3704 *saddrp = addr;
3705 prot = 0;
3706 goto out;
3708 pn = seg_page(seg, addr);
3714 * Get the protections on the page corresponding to addr.
3716 pn = seg_page(seg, addr);
3717 ASSERT(pn >= pagev->pg_pnbase);
3718 ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages));
3720 prot = pagev->pg_protv[pn - pagev->pg_pnbase];
3721 getwatchprot(seg->s_as, addr, &prot);
3722 *saddrp = addr;
3725 * Now loop until we find a backed page with different protections
3726 * or we reach the end of this segment.
3728 while ((addr += PAGESIZE) < eaddr) {
3730 * If pn has advanced to the page number following what we
3731 * have information on, refill the page vector and reset
3732 * addr and pn. If pr_pagev_fill does not return the
3733 * address of the next page, we have a discontiguity and
3734 * thus have reached the end of the current mapping.
3736 if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3737 caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr);
3738 if (naddr != addr)
3739 goto out;
3740 pn = seg_page(seg, addr);
3744 * The previous page's protections are in prot, and it has
3745 * backing. If this page is MAP_NORESERVE and has no backing,
3746 * then end this mapping and return the previous protections.
3748 if (pagev->pg_incore != NULL &&
3749 pagev->pg_incore[pn - pagev->pg_pnbase] == 0)
3750 break;
3753 * Otherwise end the mapping if this page's protections (nprot)
3754 * are different than those in the previous page (prot).
3756 nprot = pagev->pg_protv[pn - pagev->pg_pnbase];
3757 getwatchprot(seg->s_as, addr, &nprot);
3759 if (nprot != prot)
3760 break;
3763 out:
3764 *protp = prot;
3765 return (addr);
3768 size_t
3769 pr_getsegsize(struct seg *seg, int reserved)
3771 size_t size = seg->s_size;
3774 * If we're interested in the reserved space, return the size of the
3775 * segment itself. Everything else in this function is a special case
3776 * to determine the actual underlying size of various segment types.
3778 if (reserved)
3779 return (size);
3782 * If this is a segvn mapping of a regular file, return the smaller
3783 * of the segment size and the remaining size of the file beyond
3784 * the file offset corresponding to seg->s_base.
3786 if (seg->s_ops == &segvn_ops) {
3787 vattr_t vattr;
3788 vnode_t *vp;
3790 vattr.va_mask = AT_SIZE;
3792 if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
3793 vp != NULL && vp->v_type == VREG &&
3794 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
3796 u_offset_t fsize = vattr.va_size;
3797 u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base);
3799 if (fsize < offset)
3800 fsize = 0;
3801 else
3802 fsize -= offset;
3804 fsize = roundup(fsize, (u_offset_t)PAGESIZE);
3806 if (fsize < (u_offset_t)size)
3807 size = (size_t)fsize;
3810 return (size);
3814 * If this is an ISM shared segment, don't include pages that are
3815 * beyond the real size of the spt segment that backs it.
3817 if (seg->s_ops == &segspt_shmops)
3818 return (MIN(spt_realsize(seg), size));
3821 * If this is segment is a mapping from /dev/null, then this is a
3822 * reservation of virtual address space and has no actual size.
3823 * Such segments are backed by segdev and have type set to neither
3824 * MAP_SHARED nor MAP_PRIVATE.
3826 if (seg->s_ops == &segdev_ops &&
3827 ((SEGOP_GETTYPE(seg, seg->s_base) &
3828 (MAP_SHARED | MAP_PRIVATE)) == 0))
3829 return (0);
3832 * If this segment doesn't match one of the special types we handle,
3833 * just return the size of the segment itself.
3835 return (size);
3838 uint_t
3839 pr_getprot(struct seg *seg, int reserved, void **tmp,
3840 caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
3842 struct as *as = seg->s_as;
3844 caddr_t saddr = *saddrp;
3845 caddr_t naddr;
3847 int check_noreserve;
3848 uint_t prot;
3850 union {
3851 struct segvn_data *svd;
3852 struct segdev_data *sdp;
3853 void *data;
3854 } s;
3856 s.data = seg->s_data;
3858 ASSERT(AS_WRITE_HELD(as));
3859 ASSERT(saddr >= seg->s_base && saddr < eaddr);
3860 ASSERT(eaddr <= seg->s_base + seg->s_size);
3863 * Don't include MAP_NORESERVE pages in the address range
3864 * unless their mappings have actually materialized.
3865 * We cheat by knowing that segvn is the only segment
3866 * driver that supports MAP_NORESERVE.
3868 check_noreserve =
3869 (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL &&
3870 (s.svd->vp == NULL || s.svd->vp->v_type != VREG) &&
3871 (s.svd->flags & MAP_NORESERVE));
3874 * Examine every page only as a last resort. We use guilty knowledge
3875 * of segvn and segdev to avoid this: if there are no per-page
3876 * protections present in the segment and we don't care about
3877 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment.
3879 if (!check_noreserve && saddr == seg->s_base &&
3880 seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) {
3881 prot = s.svd->prot;
3882 getwatchprot(as, saddr, &prot);
3883 naddr = eaddr;
3885 } else if (saddr == seg->s_base && seg->s_ops == &segdev_ops &&
3886 s.sdp != NULL && s.sdp->pageprot == 0) {
3887 prot = s.sdp->prot;
3888 getwatchprot(as, saddr, &prot);
3889 naddr = eaddr;
3891 } else {
3892 prpagev_t *pagev;
3895 * If addr is sitting at the start of the segment, then
3896 * create a page vector to store protection and incore
3897 * information for pages in the segment, and fill it.
3898 * Otherwise, we expect *tmp to address the prpagev_t
3899 * allocated by a previous call to this function.
3901 if (saddr == seg->s_base) {
3902 pagev = pr_pagev_create(seg, check_noreserve);
3903 saddr = pr_pagev_fill(pagev, seg, saddr, eaddr);
3905 ASSERT(*tmp == NULL);
3906 *tmp = pagev;
3908 ASSERT(saddr <= eaddr);
3909 *saddrp = saddr;
3911 if (saddr == eaddr) {
3912 naddr = saddr;
3913 prot = 0;
3914 goto out;
3917 } else {
3918 ASSERT(*tmp != NULL);
3919 pagev = (prpagev_t *)*tmp;
3922 naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot);
3923 ASSERT(naddr <= eaddr);
3926 out:
3927 if (naddr == eaddr)
3928 pr_getprot_done(tmp);
3929 *naddrp = naddr;
3930 return (prot);
3933 void
3934 pr_getprot_done(void **tmp)
3936 if (*tmp != NULL) {
3937 pr_pagev_destroy((prpagev_t *)*tmp);
3938 *tmp = NULL;
3943 * Return true iff the vnode is a /proc file from the object directory.
3946 pr_isobject(vnode_t *vp)
3948 return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT);
3952 * Return true iff the vnode is a /proc file opened by the process itself.
3955 pr_isself(vnode_t *vp)
3958 * XXX: To retain binary compatibility with the old
3959 * ioctl()-based version of /proc, we exempt self-opens
3960 * of /proc/<pid> from being marked close-on-exec.
3962 return (vn_matchops(vp, prvnodeops) &&
3963 (VTOP(vp)->pr_flags & PR_ISSELF) &&
3964 VTOP(vp)->pr_type != PR_PIDDIR);
3967 static ssize_t
3968 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr)
3970 ssize_t pagesize, hatsize;
3972 ASSERT(AS_WRITE_HELD(seg->s_as));
3973 ASSERT(IS_P2ALIGNED(saddr, PAGESIZE));
3974 ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE));
3975 ASSERT(saddr < eaddr);
3977 pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr);
3978 ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize));
3979 ASSERT(pagesize != 0);
3981 if (pagesize == -1)
3982 pagesize = PAGESIZE;
3984 saddr += P2NPHASE((uintptr_t)saddr, pagesize);
3986 while (saddr < eaddr) {
3987 if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr))
3988 break;
3989 ASSERT(IS_P2ALIGNED(saddr, pagesize));
3990 saddr += pagesize;
3993 *naddrp = ((saddr < eaddr) ? saddr : eaddr);
3994 return (hatsize);
3998 * Return an array of structures with extended memory map information.
3999 * We allocate here; the caller must deallocate.
4002 prgetxmap(proc_t *p, list_t *iolhead)
4004 struct as *as = p->p_as;
4005 prxmap_t *mp;
4006 struct seg *seg;
4007 struct seg *brkseg, *stkseg;
4008 struct vnode *vp;
4009 struct vattr vattr;
4010 uint_t prot;
4012 ASSERT(as != &kas && AS_WRITE_HELD(as));
4015 * Request an initial buffer size that doesn't waste memory
4016 * if the address space has only a small number of segments.
4018 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4020 if ((seg = AS_SEGFIRST(as)) == NULL)
4021 return (0);
4023 brkseg = break_seg(p);
4024 stkseg = as_segat(as, prgetstackbase(p));
4026 do {
4027 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4028 caddr_t saddr, naddr, baddr;
4029 void *tmp = NULL;
4030 ssize_t psz;
4031 char *parr;
4032 uint64_t npages;
4033 uint64_t pagenum;
4036 * Segment loop part one: iterate from the base of the segment
4037 * to its end, pausing at each address boundary (baddr) between
4038 * ranges that have different virtual memory protections.
4040 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4041 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4042 ASSERT(baddr >= saddr && baddr <= eaddr);
4045 * Segment loop part two: iterate from the current
4046 * position to the end of the protection boundary,
4047 * pausing at each address boundary (naddr) between
4048 * ranges that have different underlying page sizes.
4050 for (; saddr < baddr; saddr = naddr) {
4051 psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4052 ASSERT(naddr >= saddr && naddr <= baddr);
4054 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4056 mp->pr_vaddr = (uintptr_t)saddr;
4057 mp->pr_size = naddr - saddr;
4058 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4059 mp->pr_mflags = 0;
4060 if (prot & PROT_READ)
4061 mp->pr_mflags |= MA_READ;
4062 if (prot & PROT_WRITE)
4063 mp->pr_mflags |= MA_WRITE;
4064 if (prot & PROT_EXEC)
4065 mp->pr_mflags |= MA_EXEC;
4066 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4067 mp->pr_mflags |= MA_SHARED;
4068 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4069 mp->pr_mflags |= MA_NORESERVE;
4070 if (seg->s_ops == &segspt_shmops ||
4071 (seg->s_ops == &segvn_ops &&
4072 (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4073 vp == NULL)))
4074 mp->pr_mflags |= MA_ANON;
4075 if (seg == brkseg)
4076 mp->pr_mflags |= MA_BREAK;
4077 else if (seg == stkseg)
4078 mp->pr_mflags |= MA_STACK;
4079 if (seg->s_ops == &segspt_shmops)
4080 mp->pr_mflags |= MA_ISM | MA_SHM;
4082 mp->pr_pagesize = PAGESIZE;
4083 if (psz == -1) {
4084 mp->pr_hatpagesize = 0;
4085 } else {
4086 mp->pr_hatpagesize = psz;
4090 * Manufacture a filename for the "object" dir.
4092 mp->pr_dev = PRNODEV;
4093 vattr.va_mask = AT_FSID|AT_NODEID;
4094 if (seg->s_ops == &segvn_ops &&
4095 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4096 vp != NULL && vp->v_type == VREG &&
4097 VOP_GETATTR(vp, &vattr, 0, CRED(),
4098 NULL) == 0) {
4099 mp->pr_dev = vattr.va_fsid;
4100 mp->pr_ino = vattr.va_nodeid;
4101 if (vp == p->p_exec)
4102 (void) strcpy(mp->pr_mapname,
4103 "a.out");
4104 else
4105 pr_object_name(mp->pr_mapname,
4106 vp, &vattr);
4110 * Get the SysV shared memory id, if any.
4112 if ((mp->pr_mflags & MA_SHARED) &&
4113 p->p_segacct && (mp->pr_shmid = shmgetid(p,
4114 seg->s_base)) != SHMID_NONE) {
4115 if (mp->pr_shmid == SHMID_FREE)
4116 mp->pr_shmid = -1;
4118 mp->pr_mflags |= MA_SHM;
4119 } else {
4120 mp->pr_shmid = -1;
4123 npages = ((uintptr_t)(naddr - saddr)) >>
4124 PAGESHIFT;
4125 parr = kmem_zalloc(npages, KM_SLEEP);
4127 SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4129 for (pagenum = 0; pagenum < npages; pagenum++) {
4130 if (parr[pagenum] & SEG_PAGE_INCORE)
4131 mp->pr_rss++;
4132 if (parr[pagenum] & SEG_PAGE_ANON)
4133 mp->pr_anon++;
4134 if (parr[pagenum] & SEG_PAGE_LOCKED)
4135 mp->pr_locked++;
4137 kmem_free(parr, npages);
4140 ASSERT(tmp == NULL);
4141 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4143 return (0);
4147 * Return the process's credentials. We don't need a 32-bit equivalent of
4148 * this function because prcred_t and prcred32_t are actually the same.
4150 void
4151 prgetcred(proc_t *p, prcred_t *pcrp)
4153 mutex_enter(&p->p_crlock);
4154 cred2prcred(p->p_cred, pcrp);
4155 mutex_exit(&p->p_crlock);
4159 * Compute actual size of the prpriv_t structure.
4162 size_t
4163 prgetprivsize(void)
4165 return (priv_prgetprivsize(NULL));
4169 * Return the process's privileges. We don't need a 32-bit equivalent of
4170 * this function because prpriv_t and prpriv32_t are actually the same.
4172 void
4173 prgetpriv(proc_t *p, prpriv_t *pprp)
4175 mutex_enter(&p->p_crlock);
4176 cred2prpriv(p->p_cred, pprp);
4177 mutex_exit(&p->p_crlock);
4180 #ifdef _SYSCALL32_IMPL
4182 * Return an array of structures with HAT memory map information.
4183 * We allocate here; the caller must deallocate.
4186 prgetxmap32(proc_t *p, list_t *iolhead)
4188 struct as *as = p->p_as;
4189 prxmap32_t *mp;
4190 struct seg *seg;
4191 struct seg *brkseg, *stkseg;
4192 struct vnode *vp;
4193 struct vattr vattr;
4194 uint_t prot;
4196 ASSERT(as != &kas && AS_WRITE_HELD(as));
4199 * Request an initial buffer size that doesn't waste memory
4200 * if the address space has only a small number of segments.
4202 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4204 if ((seg = AS_SEGFIRST(as)) == NULL)
4205 return (0);
4207 brkseg = break_seg(p);
4208 stkseg = as_segat(as, prgetstackbase(p));
4210 do {
4211 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4212 caddr_t saddr, naddr, baddr;
4213 void *tmp = NULL;
4214 ssize_t psz;
4215 char *parr;
4216 uint64_t npages;
4217 uint64_t pagenum;
4220 * Segment loop part one: iterate from the base of the segment
4221 * to its end, pausing at each address boundary (baddr) between
4222 * ranges that have different virtual memory protections.
4224 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4225 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4226 ASSERT(baddr >= saddr && baddr <= eaddr);
4229 * Segment loop part two: iterate from the current
4230 * position to the end of the protection boundary,
4231 * pausing at each address boundary (naddr) between
4232 * ranges that have different underlying page sizes.
4234 for (; saddr < baddr; saddr = naddr) {
4235 psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4236 ASSERT(naddr >= saddr && naddr <= baddr);
4238 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4240 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
4241 mp->pr_size = (size32_t)(naddr - saddr);
4242 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4243 mp->pr_mflags = 0;
4244 if (prot & PROT_READ)
4245 mp->pr_mflags |= MA_READ;
4246 if (prot & PROT_WRITE)
4247 mp->pr_mflags |= MA_WRITE;
4248 if (prot & PROT_EXEC)
4249 mp->pr_mflags |= MA_EXEC;
4250 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4251 mp->pr_mflags |= MA_SHARED;
4252 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4253 mp->pr_mflags |= MA_NORESERVE;
4254 if (seg->s_ops == &segspt_shmops ||
4255 (seg->s_ops == &segvn_ops &&
4256 (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4257 vp == NULL)))
4258 mp->pr_mflags |= MA_ANON;
4259 if (seg == brkseg)
4260 mp->pr_mflags |= MA_BREAK;
4261 else if (seg == stkseg)
4262 mp->pr_mflags |= MA_STACK;
4263 if (seg->s_ops == &segspt_shmops)
4264 mp->pr_mflags |= MA_ISM | MA_SHM;
4266 mp->pr_pagesize = PAGESIZE;
4267 if (psz == -1) {
4268 mp->pr_hatpagesize = 0;
4269 } else {
4270 mp->pr_hatpagesize = psz;
4274 * Manufacture a filename for the "object" dir.
4276 mp->pr_dev = PRNODEV32;
4277 vattr.va_mask = AT_FSID|AT_NODEID;
4278 if (seg->s_ops == &segvn_ops &&
4279 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4280 vp != NULL && vp->v_type == VREG &&
4281 VOP_GETATTR(vp, &vattr, 0, CRED(),
4282 NULL) == 0) {
4283 (void) cmpldev(&mp->pr_dev,
4284 vattr.va_fsid);
4285 mp->pr_ino = vattr.va_nodeid;
4286 if (vp == p->p_exec)
4287 (void) strcpy(mp->pr_mapname,
4288 "a.out");
4289 else
4290 pr_object_name(mp->pr_mapname,
4291 vp, &vattr);
4295 * Get the SysV shared memory id, if any.
4297 if ((mp->pr_mflags & MA_SHARED) &&
4298 p->p_segacct && (mp->pr_shmid = shmgetid(p,
4299 seg->s_base)) != SHMID_NONE) {
4300 if (mp->pr_shmid == SHMID_FREE)
4301 mp->pr_shmid = -1;
4303 mp->pr_mflags |= MA_SHM;
4304 } else {
4305 mp->pr_shmid = -1;
4308 npages = ((uintptr_t)(naddr - saddr)) >>
4309 PAGESHIFT;
4310 parr = kmem_zalloc(npages, KM_SLEEP);
4312 SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4314 for (pagenum = 0; pagenum < npages; pagenum++) {
4315 if (parr[pagenum] & SEG_PAGE_INCORE)
4316 mp->pr_rss++;
4317 if (parr[pagenum] & SEG_PAGE_ANON)
4318 mp->pr_anon++;
4319 if (parr[pagenum] & SEG_PAGE_LOCKED)
4320 mp->pr_locked++;
4322 kmem_free(parr, npages);
4325 ASSERT(tmp == NULL);
4326 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4328 return (0);
4330 #endif /* _SYSCALL32_IMPL */