fs: rename AT_* to VATTR_*
[unleashed/lotheac.git] / kernel / fs / proc / prsubr.c
blobb73e0ce1792fdad426b9bf9eb2698184b60f41c2
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
25 * Copyright 2014 Garrett D'Amore <garrett@damore.org>
28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
31 #include <sys/types.h>
32 #include <sys/t_lock.h>
33 #include <sys/param.h>
34 #include <sys/cmn_err.h>
35 #include <sys/cred.h>
36 #include <sys/priv.h>
37 #include <sys/debug.h>
38 #include <sys/errno.h>
39 #include <sys/inline.h>
40 #include <sys/kmem.h>
41 #include <sys/mman.h>
42 #include <sys/proc.h>
43 #include <sys/brand.h>
44 #include <sys/sobject.h>
45 #include <sys/sysmacros.h>
46 #include <sys/systm.h>
47 #include <sys/uio.h>
48 #include <sys/var.h>
49 #include <sys/vfs.h>
50 #include <sys/vnode.h>
51 #include <sys/session.h>
52 #include <sys/pcb.h>
53 #include <sys/signal.h>
54 #include <sys/user.h>
55 #include <sys/disp.h>
56 #include <sys/class.h>
57 #include <sys/ts.h>
58 #include <sys/bitmap.h>
59 #include <sys/poll.h>
60 #include <sys/shm_impl.h>
61 #include <sys/fault.h>
62 #include <sys/syscall.h>
63 #include <sys/procfs.h>
64 #include <sys/processor.h>
65 #include <sys/cpuvar.h>
66 #include <sys/copyops.h>
67 #include <sys/time.h>
68 #include <sys/msacct.h>
69 #include <vm/as.h>
70 #include <vm/rm.h>
71 #include <vm/seg.h>
72 #include <vm/seg_vn.h>
73 #include <vm/seg_dev.h>
74 #include <vm/seg_spt.h>
75 #include <vm/page.h>
76 #include <sys/vmparam.h>
77 #include <sys/swap.h>
78 #include <sys/proc/prdata.h>
79 #include <sys/task.h>
80 #include <sys/project.h>
81 #include <sys/contract_impl.h>
82 #include <sys/contract/process.h>
83 #include <sys/contract/process_impl.h>
84 #include <sys/schedctl.h>
85 #include <sys/pool.h>
86 #include <sys/zone.h>
87 #include <sys/atomic.h>
88 #include <sys/sdt.h>
90 #define MAX_ITERS_SPIN 5
92 typedef struct prpagev {
93 uint_t *pg_protv; /* vector of page permissions */
94 char *pg_incore; /* vector of incore flags */
95 size_t pg_npages; /* number of pages in protv and incore */
96 ulong_t pg_pnbase; /* pn within segment of first protv element */
97 } prpagev_t;
99 size_t pagev_lim = 256 * 1024; /* limit on number of pages in prpagev_t */
101 extern const struct seg_ops segdev_ops; /* needs a header file */
102 extern const struct seg_ops segspt_shmops; /* needs a header file */
104 static int set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t);
105 static void clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t);
108 * Choose an lwp from the complete set of lwps for the process.
109 * This is called for any operation applied to the process
110 * file descriptor that requires an lwp to operate upon.
112 * Returns a pointer to the thread for the selected LWP,
113 * and with the dispatcher lock held for the thread.
115 * The algorithm for choosing an lwp is critical for /proc semantics;
116 * don't touch this code unless you know all of the implications.
118 kthread_t *
119 prchoose(proc_t *p)
121 kthread_t *t;
122 kthread_t *t_onproc = NULL; /* running on processor */
123 kthread_t *t_run = NULL; /* runnable, on disp queue */
124 kthread_t *t_sleep = NULL; /* sleeping */
125 kthread_t *t_hold = NULL; /* sleeping, performing hold */
126 kthread_t *t_susp = NULL; /* suspended stop */
127 kthread_t *t_jstop = NULL; /* jobcontrol stop, w/o directed stop */
128 kthread_t *t_jdstop = NULL; /* jobcontrol stop with directed stop */
129 kthread_t *t_req = NULL; /* requested stop */
130 kthread_t *t_istop = NULL; /* event-of-interest stop */
131 kthread_t *t_dtrace = NULL; /* DTrace stop */
133 ASSERT(MUTEX_HELD(&p->p_lock));
136 * If the agent lwp exists, it takes precedence over all others.
138 if ((t = p->p_agenttp) != NULL) {
139 thread_lock(t);
140 return (t);
143 if ((t = p->p_tlist) == NULL) /* start at the head of the list */
144 return (t);
145 do { /* for eacn lwp in the process */
146 if (VSTOPPED(t)) { /* virtually stopped */
147 if (t_req == NULL)
148 t_req = t;
149 continue;
152 thread_lock(t); /* make sure thread is in good state */
153 switch (t->t_state) {
154 default:
155 panic("prchoose: bad thread state %d, thread 0x%p",
156 t->t_state, (void *)t);
157 /*NOTREACHED*/
158 case TS_SLEEP:
159 /* this is filthy */
160 if (t->t_wchan == (caddr_t)&p->p_holdlwps &&
161 t->t_wchan0 == NULL) {
162 if (t_hold == NULL)
163 t_hold = t;
164 } else {
165 if (t_sleep == NULL)
166 t_sleep = t;
168 break;
169 case TS_RUN:
170 case TS_WAIT:
171 if (t_run == NULL)
172 t_run = t;
173 break;
174 case TS_ONPROC:
175 if (t_onproc == NULL)
176 t_onproc = t;
177 break;
178 case TS_ZOMB: /* last possible choice */
179 break;
180 case TS_STOPPED:
181 switch (t->t_whystop) {
182 case PR_SUSPENDED:
183 if (t_susp == NULL)
184 t_susp = t;
185 break;
186 case PR_JOBCONTROL:
187 if (t->t_proc_flag & TP_PRSTOP) {
188 if (t_jdstop == NULL)
189 t_jdstop = t;
190 } else {
191 if (t_jstop == NULL)
192 t_jstop = t;
194 break;
195 case PR_REQUESTED:
196 if (t->t_dtrace_stop && t_dtrace == NULL)
197 t_dtrace = t;
198 else if (t_req == NULL)
199 t_req = t;
200 break;
201 case PR_SYSENTRY:
202 case PR_SYSEXIT:
203 case PR_SIGNALLED:
204 case PR_FAULTED:
206 * Make an lwp calling exit() be the
207 * last lwp seen in the process.
209 if (t_istop == NULL ||
210 (t_istop->t_whystop == PR_SYSENTRY &&
211 t_istop->t_whatstop == SYS_exit))
212 t_istop = t;
213 break;
214 case PR_CHECKPOINT: /* can't happen? */
215 break;
216 default:
217 panic("prchoose: bad t_whystop %d, thread 0x%p",
218 t->t_whystop, (void *)t);
219 /*NOTREACHED*/
221 break;
223 thread_unlock(t);
224 } while ((t = t->t_forw) != p->p_tlist);
226 if (t_onproc)
227 t = t_onproc;
228 else if (t_run)
229 t = t_run;
230 else if (t_sleep)
231 t = t_sleep;
232 else if (t_jstop)
233 t = t_jstop;
234 else if (t_jdstop)
235 t = t_jdstop;
236 else if (t_istop)
237 t = t_istop;
238 else if (t_dtrace)
239 t = t_dtrace;
240 else if (t_req)
241 t = t_req;
242 else if (t_hold)
243 t = t_hold;
244 else if (t_susp)
245 t = t_susp;
246 else /* TS_ZOMB */
247 t = p->p_tlist;
249 if (t != NULL)
250 thread_lock(t);
251 return (t);
255 * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop.
256 * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI
257 * on the /proc file descriptor. Called from stop() when a traced
258 * process stops on an event of interest. Also called from exit()
259 * and prinvalidate() to indicate POLLHUP and POLLERR respectively.
261 void
262 prnotify(struct vnode *vp)
264 prcommon_t *pcp = VTOP(vp)->pr_common;
266 mutex_enter(&pcp->prc_mutex);
267 cv_broadcast(&pcp->prc_wait);
268 mutex_exit(&pcp->prc_mutex);
269 if (pcp->prc_flags & PRC_POLL) {
271 * We call pollwakeup() with POLLHUP to ensure that
272 * the pollers are awakened even if they are polling
273 * for nothing (i.e., waiting for the process to exit).
274 * This enables the use of the PRC_POLL flag for optimization
275 * (we can turn off PRC_POLL only if we know no pollers remain).
277 pcp->prc_flags &= ~PRC_POLL;
278 pollwakeup(&pcp->prc_pollhead, POLLHUP);
282 /* called immediately below, in prfree() */
283 static void
284 prfreenotify(vnode_t *vp)
286 prnode_t *pnp;
287 prcommon_t *pcp;
289 while (vp != NULL) {
290 pnp = VTOP(vp);
291 pcp = pnp->pr_common;
292 ASSERT(pcp->prc_thread == NULL);
293 pcp->prc_proc = NULL;
295 * We can't call prnotify() here because we are holding
296 * pidlock. We assert that there is no need to.
298 mutex_enter(&pcp->prc_mutex);
299 cv_broadcast(&pcp->prc_wait);
300 mutex_exit(&pcp->prc_mutex);
301 ASSERT(!(pcp->prc_flags & PRC_POLL));
303 vp = pnp->pr_next;
304 pnp->pr_next = NULL;
309 * Called from a hook in freeproc() when a traced process is removed
310 * from the process table. The proc-table pointers of all associated
311 * /proc vnodes are cleared to indicate that the process has gone away.
313 void
314 prfree(proc_t *p)
316 uint_t slot = p->p_slot;
318 ASSERT(MUTEX_HELD(&pidlock));
321 * Block the process against /proc so it can be freed.
322 * It cannot be freed while locked by some controlling process.
323 * Lock ordering:
324 * pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex
326 mutex_enter(&pr_pidlock); /* protects pcp->prc_proc */
327 mutex_enter(&p->p_lock);
328 while (p->p_proc_flag & P_PR_LOCK) {
329 mutex_exit(&pr_pidlock);
330 cv_wait(&pr_pid_cv[slot], &p->p_lock);
331 mutex_exit(&p->p_lock);
332 mutex_enter(&pr_pidlock);
333 mutex_enter(&p->p_lock);
336 ASSERT(p->p_tlist == NULL);
338 prfreenotify(p->p_plist);
339 p->p_plist = NULL;
341 prfreenotify(p->p_trace);
342 p->p_trace = NULL;
345 * We broadcast to wake up everyone waiting for this process.
346 * No one can reach this process from this point on.
348 cv_broadcast(&pr_pid_cv[slot]);
350 mutex_exit(&p->p_lock);
351 mutex_exit(&pr_pidlock);
355 * Called from a hook in exit() when a traced process is becoming a zombie.
357 void
358 prexit(proc_t *p)
360 ASSERT(MUTEX_HELD(&p->p_lock));
362 if (pr_watch_active(p)) {
363 pr_free_watchpoints(p);
364 watch_disable(curthread);
366 /* pr_free_watched_pages() is called in exit(), after dropping p_lock */
367 if (p->p_trace) {
368 VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY;
369 prnotify(p->p_trace);
371 cv_broadcast(&pr_pid_cv[p->p_slot]); /* pauselwps() */
375 * Called when a thread calls lwp_exit().
377 void
378 prlwpexit(kthread_t *t)
380 vnode_t *vp;
381 prnode_t *pnp;
382 prcommon_t *pcp;
383 proc_t *p = ttoproc(t);
384 lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry;
386 ASSERT(t == curthread);
387 ASSERT(MUTEX_HELD(&p->p_lock));
390 * The process must be blocked against /proc to do this safely.
391 * The lwp must not disappear while the process is marked P_PR_LOCK.
392 * It is the caller's responsibility to have called prbarrier(p).
394 ASSERT(!(p->p_proc_flag & P_PR_LOCK));
396 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
397 pnp = VTOP(vp);
398 pcp = pnp->pr_common;
399 if (pcp->prc_thread == t) {
400 pcp->prc_thread = NULL;
401 pcp->prc_flags |= PRC_DESTROY;
405 for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) {
406 pnp = VTOP(vp);
407 pcp = pnp->pr_common;
408 pcp->prc_thread = NULL;
409 pcp->prc_flags |= PRC_DESTROY;
410 prnotify(vp);
413 if (p->p_trace)
414 prnotify(p->p_trace);
418 * Called when a zombie thread is joined or when a
419 * detached lwp exits. Called from lwp_hash_out().
421 void
422 prlwpfree(proc_t *p, lwpent_t *lep)
424 vnode_t *vp;
425 prnode_t *pnp;
426 prcommon_t *pcp;
428 ASSERT(MUTEX_HELD(&p->p_lock));
431 * The process must be blocked against /proc to do this safely.
432 * The lwp must not disappear while the process is marked P_PR_LOCK.
433 * It is the caller's responsibility to have called prbarrier(p).
435 ASSERT(!(p->p_proc_flag & P_PR_LOCK));
437 vp = lep->le_trace;
438 lep->le_trace = NULL;
439 while (vp) {
440 prnotify(vp);
441 pnp = VTOP(vp);
442 pcp = pnp->pr_common;
443 ASSERT(pcp->prc_thread == NULL &&
444 (pcp->prc_flags & PRC_DESTROY));
445 pcp->prc_tslot = -1;
446 vp = pnp->pr_next;
447 pnp->pr_next = NULL;
450 if (p->p_trace)
451 prnotify(p->p_trace);
455 * Called from a hook in exec() when a thread starts exec().
457 void
458 prexecstart(void)
460 proc_t *p = ttoproc(curthread);
461 klwp_t *lwp = ttolwp(curthread);
464 * The P_PR_EXEC flag blocks /proc operations for
465 * the duration of the exec().
466 * We can't start exec() while the process is
467 * locked by /proc, so we call prbarrier().
468 * lwp_nostop keeps the process from being stopped
469 * via job control for the duration of the exec().
472 ASSERT(MUTEX_HELD(&p->p_lock));
473 prbarrier(p);
474 lwp->lwp_nostop++;
475 p->p_proc_flag |= P_PR_EXEC;
479 * Called from a hook in exec() when a thread finishes exec().
480 * The thread may or may not have succeeded. Some other thread
481 * may have beat it to the punch.
483 void
484 prexecend(void)
486 proc_t *p = ttoproc(curthread);
487 klwp_t *lwp = ttolwp(curthread);
488 vnode_t *vp;
489 prnode_t *pnp;
490 prcommon_t *pcp;
491 model_t model = p->p_model;
492 id_t tid = curthread->t_tid;
493 int tslot = curthread->t_dslot;
495 ASSERT(MUTEX_HELD(&p->p_lock));
497 lwp->lwp_nostop--;
498 if (p->p_flag & SEXITLWPS) {
500 * We are on our way to exiting because some
501 * other thread beat us in the race to exec().
502 * Don't clear the P_PR_EXEC flag in this case.
504 return;
508 * Wake up anyone waiting in /proc for the process to complete exec().
510 p->p_proc_flag &= ~P_PR_EXEC;
511 if ((vp = p->p_trace) != NULL) {
512 pcp = VTOP(vp)->pr_common;
513 mutex_enter(&pcp->prc_mutex);
514 cv_broadcast(&pcp->prc_wait);
515 mutex_exit(&pcp->prc_mutex);
516 for (; vp != NULL; vp = pnp->pr_next) {
517 pnp = VTOP(vp);
518 pnp->pr_common->prc_datamodel = model;
521 if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) {
523 * We dealt with the process common above.
525 ASSERT(p->p_trace != NULL);
526 pcp = VTOP(vp)->pr_common;
527 mutex_enter(&pcp->prc_mutex);
528 cv_broadcast(&pcp->prc_wait);
529 mutex_exit(&pcp->prc_mutex);
530 for (; vp != NULL; vp = pnp->pr_next) {
531 pnp = VTOP(vp);
532 pcp = pnp->pr_common;
533 pcp->prc_datamodel = model;
534 pcp->prc_tid = tid;
535 pcp->prc_tslot = tslot;
541 * Called from a hook in relvm() just before freeing the address space.
542 * We free all the watched areas now.
544 void
545 prrelvm(void)
547 proc_t *p = ttoproc(curthread);
549 mutex_enter(&p->p_lock);
550 prbarrier(p); /* block all other /proc operations */
551 if (pr_watch_active(p)) {
552 pr_free_watchpoints(p);
553 watch_disable(curthread);
555 mutex_exit(&p->p_lock);
556 pr_free_watched_pages(p);
560 * Called from hooks in exec-related code when a traced process
561 * attempts to exec(2) a setuid/setgid program or an unreadable
562 * file. Rather than fail the exec we invalidate the associated
563 * /proc vnodes so that subsequent attempts to use them will fail.
565 * All /proc vnodes, except directory vnodes, are retained on a linked
566 * list (rooted at p_plist in the process structure) until last close.
568 * A controlling process must re-open the /proc files in order to
569 * regain control.
571 void
572 prinvalidate(struct user *up)
574 kthread_t *t = curthread;
575 proc_t *p = ttoproc(t);
576 vnode_t *vp;
577 prnode_t *pnp;
578 int writers = 0;
580 mutex_enter(&p->p_lock);
581 prbarrier(p); /* block all other /proc operations */
584 * At this moment, there can be only one lwp in the process.
586 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
589 * Invalidate any currently active /proc vnodes.
591 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
592 pnp = VTOP(vp);
593 switch (pnp->pr_type) {
594 case PR_PSINFO: /* these files can read by anyone */
595 case PR_LPSINFO:
596 case PR_LWPSINFO:
597 case PR_LWPDIR:
598 case PR_LWPIDDIR:
599 case PR_USAGE:
600 case PR_LUSAGE:
601 case PR_LWPUSAGE:
602 break;
603 default:
604 pnp->pr_flags |= PR_INVAL;
605 break;
609 * Wake up anyone waiting for the process or lwp.
610 * p->p_trace is guaranteed to be non-NULL if there
611 * are any open /proc files for this process.
613 if ((vp = p->p_trace) != NULL) {
614 prcommon_t *pcp = VTOP(vp)->pr_pcommon;
616 prnotify(vp);
618 * Are there any writers?
620 if ((writers = pcp->prc_writers) != 0) {
622 * Clear the exclusive open flag (old /proc interface).
623 * Set prc_selfopens equal to prc_writers so that
624 * the next O_EXCL|O_WRITE open will succeed
625 * even with existing (though invalid) writers.
626 * prclose() must decrement prc_selfopens when
627 * the invalid files are closed.
629 pcp->prc_flags &= ~PRC_EXCL;
630 ASSERT(pcp->prc_selfopens <= writers);
631 pcp->prc_selfopens = writers;
634 vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace;
635 while (vp != NULL) {
637 * We should not invalidate the lwpiddir vnodes,
638 * but the necessities of maintaining the old
639 * ioctl()-based version of /proc require it.
641 pnp = VTOP(vp);
642 pnp->pr_flags |= PR_INVAL;
643 prnotify(vp);
644 vp = pnp->pr_next;
648 * If any tracing flags are in effect and any vnodes are open for
649 * writing then set the requested-stop and run-on-last-close flags.
650 * Otherwise, clear all tracing flags.
652 t->t_proc_flag &= ~TP_PAUSE;
653 if ((p->p_proc_flag & P_PR_TRACE) && writers) {
654 t->t_proc_flag |= TP_PRSTOP;
655 aston(t); /* so ISSIG will see the flag */
656 p->p_proc_flag |= P_PR_RUNLCL;
657 } else {
658 premptyset(&up->u_entrymask); /* syscalls */
659 premptyset(&up->u_exitmask);
660 up->u_systrap = 0;
661 premptyset(&p->p_sigmask); /* signals */
662 premptyset(&p->p_fltmask); /* faults */
663 t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
664 p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
665 prnostep(ttolwp(t));
668 mutex_exit(&p->p_lock);
672 * Acquire the controlled process's p_lock and mark it P_PR_LOCK.
673 * Return with pr_pidlock held in all cases.
674 * Return with p_lock held if the the process still exists.
675 * Return value is the process pointer if the process still exists, else NULL.
676 * If we lock the process, give ourself kernel priority to avoid deadlocks;
677 * this is undone in prunlock().
679 proc_t *
680 pr_p_lock(prnode_t *pnp)
682 proc_t *p;
683 prcommon_t *pcp;
685 mutex_enter(&pr_pidlock);
686 if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL)
687 return (NULL);
688 mutex_enter(&p->p_lock);
689 while (p->p_proc_flag & P_PR_LOCK) {
691 * This cv/mutex pair is persistent even if
692 * the process disappears while we sleep.
694 kcondvar_t *cv = &pr_pid_cv[p->p_slot];
695 kmutex_t *mp = &p->p_lock;
697 mutex_exit(&pr_pidlock);
698 cv_wait(cv, mp);
699 mutex_exit(mp);
700 mutex_enter(&pr_pidlock);
701 if (pcp->prc_proc == NULL)
702 return (NULL);
703 ASSERT(p == pcp->prc_proc);
704 mutex_enter(&p->p_lock);
706 p->p_proc_flag |= P_PR_LOCK;
707 THREAD_KPRI_REQUEST();
708 return (p);
712 * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock.
713 * This prevents any lwp of the process from disappearing and
714 * blocks most operations that a process can perform on itself.
715 * Returns 0 on success, a non-zero error number on failure.
717 * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when
718 * the subject process is a zombie (ZYES) or fail for zombies (ZNO).
720 * error returns:
721 * ENOENT: process or lwp has disappeared or process is exiting
722 * (or has become a zombie and zdisp == ZNO).
723 * EAGAIN: procfs vnode has become invalid.
724 * EINTR: signal arrived while waiting for exec to complete.
727 prlock(prnode_t *pnp, int zdisp)
729 prcommon_t *pcp;
730 proc_t *p;
732 again:
733 pcp = pnp->pr_common;
734 p = pr_p_lock(pnp);
735 mutex_exit(&pr_pidlock);
738 * Return ENOENT immediately if there is no process.
740 if (p == NULL)
741 return (ENOENT);
743 ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL);
746 * Return ENOENT if process entered zombie state or is exiting
747 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies.
749 if (zdisp == ZNO &&
750 ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) {
751 prunlock(pnp);
752 return (ENOENT);
756 * If lwp-specific, check to see if lwp has disappeared.
758 if (pcp->prc_flags & PRC_LWP) {
759 if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) ||
760 pcp->prc_tslot == -1) {
761 prunlock(pnp);
762 return (ENOENT);
767 * Return EAGAIN if we have encountered a security violation.
768 * (The process exec'd a set-id or unreadable executable file.)
770 if (pnp->pr_flags & PR_INVAL) {
771 prunlock(pnp);
772 return (EAGAIN);
776 * If process is undergoing an exec(), wait for
777 * completion and then start all over again.
779 if (p->p_proc_flag & P_PR_EXEC) {
780 pcp = pnp->pr_pcommon; /* Put on the correct sleep queue */
781 mutex_enter(&pcp->prc_mutex);
782 prunlock(pnp);
783 if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) {
784 mutex_exit(&pcp->prc_mutex);
785 return (EINTR);
787 mutex_exit(&pcp->prc_mutex);
788 goto again;
792 * We return holding p->p_lock.
794 return (0);
798 * Undo prlock() and pr_p_lock().
799 * p->p_lock is still held; pr_pidlock is no longer held.
801 * prunmark() drops the P_PR_LOCK flag and wakes up another thread,
802 * if any, waiting for the flag to be dropped; it retains p->p_lock.
804 * prunlock() calls prunmark() and then drops p->p_lock.
806 void
807 prunmark(proc_t *p)
809 ASSERT(p->p_proc_flag & P_PR_LOCK);
810 ASSERT(MUTEX_HELD(&p->p_lock));
812 cv_signal(&pr_pid_cv[p->p_slot]);
813 p->p_proc_flag &= ~P_PR_LOCK;
814 THREAD_KPRI_RELEASE();
817 void
818 prunlock(prnode_t *pnp)
820 prcommon_t *pcp = pnp->pr_common;
821 proc_t *p = pcp->prc_proc;
824 * If we (or someone) gave it a SIGKILL, and it is not
825 * already a zombie, set it running unconditionally.
827 if ((p->p_flag & SKILLED) &&
828 !(p->p_flag & SEXITING) &&
829 !(pcp->prc_flags & PRC_DESTROY) &&
830 !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1))
831 (void) pr_setrun(pnp, 0);
832 prunmark(p);
833 mutex_exit(&p->p_lock);
837 * Called while holding p->p_lock to delay until the process is unlocked.
838 * We enter holding p->p_lock; p->p_lock is dropped and reacquired.
839 * The process cannot become locked again until p->p_lock is dropped.
841 void
842 prbarrier(proc_t *p)
844 ASSERT(MUTEX_HELD(&p->p_lock));
846 if (p->p_proc_flag & P_PR_LOCK) {
847 /* The process is locked; delay until not locked */
848 uint_t slot = p->p_slot;
850 while (p->p_proc_flag & P_PR_LOCK)
851 cv_wait(&pr_pid_cv[slot], &p->p_lock);
852 cv_signal(&pr_pid_cv[slot]);
857 * Return process/lwp status.
858 * The u-block is mapped in by this routine and unmapped at the end.
860 void
861 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp)
863 kthread_t *t;
865 ASSERT(MUTEX_HELD(&p->p_lock));
867 t = prchoose(p); /* returns locked thread */
868 ASSERT(t != NULL);
869 thread_unlock(t);
871 /* just bzero the process part, prgetlwpstatus() does the rest */
872 bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t));
873 sp->pr_nlwp = p->p_lwpcnt;
874 sp->pr_nzomb = p->p_zombcnt;
875 prassignset(&sp->pr_sigpend, &p->p_sig);
876 sp->pr_brkbase = (uintptr_t)p->p_brkbase;
877 sp->pr_brksize = p->p_brksize;
878 sp->pr_stkbase = (uintptr_t)prgetstackbase(p);
879 sp->pr_stksize = p->p_stksize;
880 sp->pr_pid = p->p_pid;
881 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
882 (p->p_flag & SZONETOP)) {
883 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
885 * Inside local zones, fake zsched's pid as parent pids for
886 * processes which reference processes outside of the zone.
888 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
889 } else {
890 sp->pr_ppid = p->p_ppid;
892 sp->pr_pgid = p->p_pgrp;
893 sp->pr_sid = p->p_sessp->s_sid;
894 sp->pr_taskid = p->p_task->tk_tkid;
895 sp->pr_projid = p->p_task->tk_proj->kpj_id;
896 sp->pr_zoneid = p->p_zone->zone_id;
897 hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
898 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
899 TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime);
900 TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime);
901 prassignset(&sp->pr_sigtrace, &p->p_sigmask);
902 prassignset(&sp->pr_flttrace, &p->p_fltmask);
903 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
904 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
905 switch (p->p_model) {
906 case DATAMODEL_ILP32:
907 sp->pr_dmodel = PR_MODEL_ILP32;
908 break;
909 case DATAMODEL_LP64:
910 sp->pr_dmodel = PR_MODEL_LP64;
911 break;
913 if (p->p_agenttp)
914 sp->pr_agentid = p->p_agenttp->t_tid;
916 /* get the chosen lwp's status */
917 prgetlwpstatus(t, &sp->pr_lwp, zp);
919 /* replicate the flags */
920 sp->pr_flags = sp->pr_lwp.pr_flags;
923 #ifdef _SYSCALL32_IMPL
924 void
925 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp)
927 proc_t *p = ttoproc(t);
928 klwp_t *lwp = ttolwp(t);
929 struct mstate *ms = &lwp->lwp_mstate;
930 hrtime_t usr, sys;
931 int flags;
932 ulong_t instr;
934 ASSERT(MUTEX_HELD(&p->p_lock));
936 bzero(sp, sizeof (*sp));
937 flags = 0L;
938 if (t->t_state == TS_STOPPED) {
939 flags |= PR_STOPPED;
940 if ((t->t_schedflag & TS_PSTART) == 0)
941 flags |= PR_ISTOP;
942 } else if (VSTOPPED(t)) {
943 flags |= PR_STOPPED|PR_ISTOP;
945 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
946 flags |= PR_DSTOP;
947 if (lwp->lwp_asleep)
948 flags |= PR_ASLEEP;
949 if (t == p->p_agenttp)
950 flags |= PR_AGENT;
951 if (!(t->t_proc_flag & TP_TWAIT))
952 flags |= PR_DETACH;
953 if (t->t_proc_flag & TP_DAEMON)
954 flags |= PR_DAEMON;
955 if (p->p_proc_flag & P_PR_FORK)
956 flags |= PR_FORK;
957 if (p->p_proc_flag & P_PR_RUNLCL)
958 flags |= PR_RLC;
959 if (p->p_proc_flag & P_PR_KILLCL)
960 flags |= PR_KLC;
961 if (p->p_proc_flag & P_PR_ASYNC)
962 flags |= PR_ASYNC;
963 if (p->p_proc_flag & P_PR_BPTADJ)
964 flags |= PR_BPTADJ;
965 if (p->p_proc_flag & P_PR_PTRACE)
966 flags |= PR_PTRACE;
967 if (p->p_flag & SMSACCT)
968 flags |= PR_MSACCT;
969 if (p->p_flag & SMSFORK)
970 flags |= PR_MSFORK;
971 if (p->p_flag & SVFWAIT)
972 flags |= PR_VFORKP;
973 sp->pr_flags = flags;
974 if (VSTOPPED(t)) {
975 sp->pr_why = PR_REQUESTED;
976 sp->pr_what = 0;
977 } else {
978 sp->pr_why = t->t_whystop;
979 sp->pr_what = t->t_whatstop;
981 sp->pr_lwpid = t->t_tid;
982 sp->pr_cursig = lwp->lwp_cursig;
983 prassignset(&sp->pr_lwppend, &t->t_sig);
984 schedctl_finish_sigblock(t);
985 prassignset(&sp->pr_lwphold, &t->t_hold);
986 if (t->t_whystop == PR_FAULTED) {
987 siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info);
988 if (t->t_whatstop == FLTPAGE)
989 sp->pr_info.si_addr =
990 (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr;
991 } else if (lwp->lwp_curinfo)
992 siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info);
993 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
994 sp->pr_info.si_zoneid != zp->zone_id) {
995 sp->pr_info.si_pid = zp->zone_zsched->p_pid;
996 sp->pr_info.si_uid = 0;
997 sp->pr_info.si_ctid = -1;
998 sp->pr_info.si_zoneid = zp->zone_id;
1000 sp->pr_altstack.ss_sp =
1001 (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
1002 sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size;
1003 sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags;
1004 prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1005 sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext;
1006 sp->pr_ustack = (caddr32_t)lwp->lwp_ustack;
1007 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1008 sizeof (sp->pr_clname) - 1);
1009 if (flags & PR_STOPPED)
1010 hrt2ts32(t->t_stoptime, &sp->pr_tstamp);
1011 usr = ms->ms_acct[LMS_USER];
1012 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1013 scalehrtime(&usr);
1014 scalehrtime(&sys);
1015 hrt2ts32(usr, &sp->pr_utime);
1016 hrt2ts32(sys, &sp->pr_stime);
1019 * Fetch the current instruction, if not a system process.
1020 * We don't attempt this unless the lwp is stopped.
1022 if ((p->p_flag & SSYS) || p->p_as == &kas)
1023 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1024 else if (!(flags & PR_STOPPED))
1025 sp->pr_flags |= PR_PCINVAL;
1026 else if (!prfetchinstr(lwp, &instr))
1027 sp->pr_flags |= PR_PCINVAL;
1028 else
1029 sp->pr_instr = (uint32_t)instr;
1032 * Drop p_lock while touching the lwp's stack.
1034 mutex_exit(&p->p_lock);
1035 if (prisstep(lwp))
1036 sp->pr_flags |= PR_STEP;
1037 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1038 int i;
1040 sp->pr_syscall = get_syscall32_args(lwp,
1041 (int *)sp->pr_sysarg, &i);
1042 sp->pr_nsysarg = (ushort_t)i;
1044 if ((flags & PR_STOPPED) || t == curthread)
1045 prgetprregs32(lwp, sp->pr_reg);
1046 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1047 (flags & PR_VFORKP)) {
1048 long r1, r2;
1049 user_t *up;
1050 auxv_t *auxp;
1051 int i;
1053 sp->pr_errno = prgetrvals(lwp, &r1, &r2);
1054 if (sp->pr_errno == 0) {
1055 sp->pr_rval1 = (int32_t)r1;
1056 sp->pr_rval2 = (int32_t)r2;
1057 sp->pr_errpriv = PRIV_NONE;
1058 } else
1059 sp->pr_errpriv = lwp->lwp_badpriv;
1061 if (t->t_sysnum == SYS_execve) {
1062 up = PTOU(p);
1063 sp->pr_sysarg[0] = 0;
1064 sp->pr_sysarg[1] = (caddr32_t)up->u_argv;
1065 sp->pr_sysarg[2] = (caddr32_t)up->u_envp;
1066 for (i = 0, auxp = up->u_auxv;
1067 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1068 i++, auxp++) {
1069 if (auxp->a_type == AT_SUN_EXECNAME) {
1070 sp->pr_sysarg[0] =
1071 (caddr32_t)
1072 (uintptr_t)auxp->a_un.a_ptr;
1073 break;
1078 if (prhasfp())
1079 prgetprfpregs32(lwp, &sp->pr_fpreg);
1080 mutex_enter(&p->p_lock);
1083 void
1084 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp)
1086 kthread_t *t;
1088 ASSERT(MUTEX_HELD(&p->p_lock));
1090 t = prchoose(p); /* returns locked thread */
1091 ASSERT(t != NULL);
1092 thread_unlock(t);
1094 /* just bzero the process part, prgetlwpstatus32() does the rest */
1095 bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t));
1096 sp->pr_nlwp = p->p_lwpcnt;
1097 sp->pr_nzomb = p->p_zombcnt;
1098 prassignset(&sp->pr_sigpend, &p->p_sig);
1099 sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase;
1100 sp->pr_brksize = (uint32_t)p->p_brksize;
1101 sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p);
1102 sp->pr_stksize = (uint32_t)p->p_stksize;
1103 sp->pr_pid = p->p_pid;
1104 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
1105 (p->p_flag & SZONETOP)) {
1106 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
1108 * Inside local zones, fake zsched's pid as parent pids for
1109 * processes which reference processes outside of the zone.
1111 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
1112 } else {
1113 sp->pr_ppid = p->p_ppid;
1115 sp->pr_pgid = p->p_pgrp;
1116 sp->pr_sid = p->p_sessp->s_sid;
1117 sp->pr_taskid = p->p_task->tk_tkid;
1118 sp->pr_projid = p->p_task->tk_proj->kpj_id;
1119 sp->pr_zoneid = p->p_zone->zone_id;
1120 hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
1121 hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
1122 TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime);
1123 TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime);
1124 prassignset(&sp->pr_sigtrace, &p->p_sigmask);
1125 prassignset(&sp->pr_flttrace, &p->p_fltmask);
1126 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
1127 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
1128 switch (p->p_model) {
1129 case DATAMODEL_ILP32:
1130 sp->pr_dmodel = PR_MODEL_ILP32;
1131 break;
1132 case DATAMODEL_LP64:
1133 sp->pr_dmodel = PR_MODEL_LP64;
1134 break;
1136 if (p->p_agenttp)
1137 sp->pr_agentid = p->p_agenttp->t_tid;
1139 /* get the chosen lwp's status */
1140 prgetlwpstatus32(t, &sp->pr_lwp, zp);
1142 /* replicate the flags */
1143 sp->pr_flags = sp->pr_lwp.pr_flags;
1145 #endif /* _SYSCALL32_IMPL */
1148 * Return lwp status.
1150 void
1151 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp)
1153 proc_t *p = ttoproc(t);
1154 klwp_t *lwp = ttolwp(t);
1155 struct mstate *ms = &lwp->lwp_mstate;
1156 hrtime_t usr, sys;
1157 int flags;
1158 ulong_t instr;
1160 ASSERT(MUTEX_HELD(&p->p_lock));
1162 bzero(sp, sizeof (*sp));
1163 flags = 0L;
1164 if (t->t_state == TS_STOPPED) {
1165 flags |= PR_STOPPED;
1166 if ((t->t_schedflag & TS_PSTART) == 0)
1167 flags |= PR_ISTOP;
1168 } else if (VSTOPPED(t)) {
1169 flags |= PR_STOPPED|PR_ISTOP;
1171 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
1172 flags |= PR_DSTOP;
1173 if (lwp->lwp_asleep)
1174 flags |= PR_ASLEEP;
1175 if (t == p->p_agenttp)
1176 flags |= PR_AGENT;
1177 if (!(t->t_proc_flag & TP_TWAIT))
1178 flags |= PR_DETACH;
1179 if (t->t_proc_flag & TP_DAEMON)
1180 flags |= PR_DAEMON;
1181 if (p->p_proc_flag & P_PR_FORK)
1182 flags |= PR_FORK;
1183 if (p->p_proc_flag & P_PR_RUNLCL)
1184 flags |= PR_RLC;
1185 if (p->p_proc_flag & P_PR_KILLCL)
1186 flags |= PR_KLC;
1187 if (p->p_proc_flag & P_PR_ASYNC)
1188 flags |= PR_ASYNC;
1189 if (p->p_proc_flag & P_PR_BPTADJ)
1190 flags |= PR_BPTADJ;
1191 if (p->p_proc_flag & P_PR_PTRACE)
1192 flags |= PR_PTRACE;
1193 if (p->p_flag & SMSACCT)
1194 flags |= PR_MSACCT;
1195 if (p->p_flag & SMSFORK)
1196 flags |= PR_MSFORK;
1197 if (p->p_flag & SVFWAIT)
1198 flags |= PR_VFORKP;
1199 if (p->p_pgidp->pid_pgorphaned)
1200 flags |= PR_ORPHAN;
1201 if (p->p_pidflag & CLDNOSIGCHLD)
1202 flags |= PR_NOSIGCHLD;
1203 if (p->p_pidflag & CLDWAITPID)
1204 flags |= PR_WAITPID;
1205 sp->pr_flags = flags;
1206 if (VSTOPPED(t)) {
1207 sp->pr_why = PR_REQUESTED;
1208 sp->pr_what = 0;
1209 } else {
1210 sp->pr_why = t->t_whystop;
1211 sp->pr_what = t->t_whatstop;
1213 sp->pr_lwpid = t->t_tid;
1214 sp->pr_cursig = lwp->lwp_cursig;
1215 prassignset(&sp->pr_lwppend, &t->t_sig);
1216 schedctl_finish_sigblock(t);
1217 prassignset(&sp->pr_lwphold, &t->t_hold);
1218 if (t->t_whystop == PR_FAULTED)
1219 bcopy(&lwp->lwp_siginfo,
1220 &sp->pr_info, sizeof (k_siginfo_t));
1221 else if (lwp->lwp_curinfo)
1222 bcopy(&lwp->lwp_curinfo->sq_info,
1223 &sp->pr_info, sizeof (k_siginfo_t));
1224 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1225 sp->pr_info.si_zoneid != zp->zone_id) {
1226 sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1227 sp->pr_info.si_uid = 0;
1228 sp->pr_info.si_ctid = -1;
1229 sp->pr_info.si_zoneid = zp->zone_id;
1231 sp->pr_altstack = lwp->lwp_sigaltstack;
1232 prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1233 sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext;
1234 sp->pr_ustack = lwp->lwp_ustack;
1235 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1236 sizeof (sp->pr_clname) - 1);
1237 if (flags & PR_STOPPED)
1238 hrt2ts(t->t_stoptime, &sp->pr_tstamp);
1239 usr = ms->ms_acct[LMS_USER];
1240 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1241 scalehrtime(&usr);
1242 scalehrtime(&sys);
1243 hrt2ts(usr, &sp->pr_utime);
1244 hrt2ts(sys, &sp->pr_stime);
1247 * Fetch the current instruction, if not a system process.
1248 * We don't attempt this unless the lwp is stopped.
1250 if ((p->p_flag & SSYS) || p->p_as == &kas)
1251 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1252 else if (!(flags & PR_STOPPED))
1253 sp->pr_flags |= PR_PCINVAL;
1254 else if (!prfetchinstr(lwp, &instr))
1255 sp->pr_flags |= PR_PCINVAL;
1256 else
1257 sp->pr_instr = instr;
1260 * Drop p_lock while touching the lwp's stack.
1262 mutex_exit(&p->p_lock);
1263 if (prisstep(lwp))
1264 sp->pr_flags |= PR_STEP;
1265 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1266 int i;
1268 sp->pr_syscall = get_syscall_args(lwp,
1269 (long *)sp->pr_sysarg, &i);
1270 sp->pr_nsysarg = (ushort_t)i;
1272 if ((flags & PR_STOPPED) || t == curthread)
1273 prgetprregs(lwp, sp->pr_reg);
1274 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1275 (flags & PR_VFORKP)) {
1276 user_t *up;
1277 auxv_t *auxp;
1278 int i;
1280 sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2);
1281 if (sp->pr_errno == 0)
1282 sp->pr_errpriv = PRIV_NONE;
1283 else
1284 sp->pr_errpriv = lwp->lwp_badpriv;
1286 if (t->t_sysnum == SYS_execve) {
1287 up = PTOU(p);
1288 sp->pr_sysarg[0] = 0;
1289 sp->pr_sysarg[1] = (uintptr_t)up->u_argv;
1290 sp->pr_sysarg[2] = (uintptr_t)up->u_envp;
1291 for (i = 0, auxp = up->u_auxv;
1292 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1293 i++, auxp++) {
1294 if (auxp->a_type == AT_SUN_EXECNAME) {
1295 sp->pr_sysarg[0] =
1296 (uintptr_t)auxp->a_un.a_ptr;
1297 break;
1302 if (prhasfp())
1303 prgetprfpregs(lwp, &sp->pr_fpreg);
1304 mutex_enter(&p->p_lock);
1308 * Get the sigaction structure for the specified signal. The u-block
1309 * must already have been mapped in by the caller.
1311 void
1312 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp)
1314 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1316 bzero(sp, sizeof (*sp));
1318 if (sig != 0 && (unsigned)sig < nsig) {
1319 sp->sa_handler = up->u_signal[sig-1];
1320 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1321 if (sigismember(&up->u_sigonstack, sig))
1322 sp->sa_flags |= SA_ONSTACK;
1323 if (sigismember(&up->u_sigresethand, sig))
1324 sp->sa_flags |= SA_RESETHAND;
1325 if (sigismember(&up->u_sigrestart, sig))
1326 sp->sa_flags |= SA_RESTART;
1327 if (sigismember(&p->p_siginfo, sig))
1328 sp->sa_flags |= SA_SIGINFO;
1329 if (sigismember(&up->u_signodefer, sig))
1330 sp->sa_flags |= SA_NODEFER;
1331 if (sig == SIGCLD) {
1332 if (p->p_flag & SNOWAIT)
1333 sp->sa_flags |= SA_NOCLDWAIT;
1334 if ((p->p_flag & SJCTL) == 0)
1335 sp->sa_flags |= SA_NOCLDSTOP;
1340 #ifdef _SYSCALL32_IMPL
1341 void
1342 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp)
1344 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1346 bzero(sp, sizeof (*sp));
1348 if (sig != 0 && (unsigned)sig < nsig) {
1349 sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1];
1350 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1351 if (sigismember(&up->u_sigonstack, sig))
1352 sp->sa_flags |= SA_ONSTACK;
1353 if (sigismember(&up->u_sigresethand, sig))
1354 sp->sa_flags |= SA_RESETHAND;
1355 if (sigismember(&up->u_sigrestart, sig))
1356 sp->sa_flags |= SA_RESTART;
1357 if (sigismember(&p->p_siginfo, sig))
1358 sp->sa_flags |= SA_SIGINFO;
1359 if (sigismember(&up->u_signodefer, sig))
1360 sp->sa_flags |= SA_NODEFER;
1361 if (sig == SIGCLD) {
1362 if (p->p_flag & SNOWAIT)
1363 sp->sa_flags |= SA_NOCLDWAIT;
1364 if ((p->p_flag & SJCTL) == 0)
1365 sp->sa_flags |= SA_NOCLDSTOP;
1369 #endif /* _SYSCALL32_IMPL */
1372 * Count the number of segments in this process's address space.
1375 prnsegs(struct as *as, int reserved)
1377 int n = 0;
1378 struct seg *seg;
1380 ASSERT(as != &kas && AS_WRITE_HELD(as));
1382 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1383 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1384 caddr_t saddr, naddr;
1385 void *tmp = NULL;
1387 if ((seg->s_flags & S_HOLE) != 0) {
1388 continue;
1391 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1392 (void) pr_getprot(seg, reserved, &tmp,
1393 &saddr, &naddr, eaddr);
1394 if (saddr != naddr)
1395 n++;
1398 ASSERT(tmp == NULL);
1401 return (n);
1405 * Convert uint32_t to decimal string w/o leading zeros.
1406 * Add trailing null characters if 'len' is greater than string length.
1407 * Return the string length.
1410 pr_u32tos(uint32_t n, char *s, int len)
1412 char cbuf[11]; /* 32-bit unsigned integer fits in 10 digits */
1413 char *cp = cbuf;
1414 char *end = s + len;
1416 do {
1417 *cp++ = (char)(n % 10 + '0');
1418 n /= 10;
1419 } while (n);
1421 len = (int)(cp - cbuf);
1423 do {
1424 *s++ = *--cp;
1425 } while (cp > cbuf);
1427 while (s < end) /* optional pad */
1428 *s++ = '\0';
1430 return (len);
1434 * Convert uint64_t to decimal string w/o leading zeros.
1435 * Return the string length.
1437 static int
1438 pr_u64tos(uint64_t n, char *s)
1440 char cbuf[21]; /* 64-bit unsigned integer fits in 20 digits */
1441 char *cp = cbuf;
1442 int len;
1444 do {
1445 *cp++ = (char)(n % 10 + '0');
1446 n /= 10;
1447 } while (n);
1449 len = (int)(cp - cbuf);
1451 do {
1452 *s++ = *--cp;
1453 } while (cp > cbuf);
1455 return (len);
1458 void
1459 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr)
1461 char *s = name;
1462 struct vfs *vfsp;
1463 struct vfssw *vfsswp;
1465 if ((vfsp = vp->v_vfsp) != NULL &&
1466 ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) &&
1467 *vfsswp->vsw_name) {
1468 (void) strcpy(s, vfsswp->vsw_name);
1469 s += strlen(s);
1470 *s++ = '.';
1472 s += pr_u32tos(getmajor(vattr->va_fsid), s, 0);
1473 *s++ = '.';
1474 s += pr_u32tos(getminor(vattr->va_fsid), s, 0);
1475 *s++ = '.';
1476 s += pr_u64tos(vattr->va_nodeid, s);
1477 *s++ = '\0';
1480 struct seg *
1481 break_seg(proc_t *p)
1483 caddr_t addr = p->p_brkbase;
1484 struct seg *seg;
1485 struct vnode *vp;
1487 if (p->p_brksize != 0)
1488 addr += p->p_brksize - 1;
1489 seg = as_segat(p->p_as, addr);
1490 if (seg != NULL && seg->s_ops == &segvn_ops &&
1491 (segop_getvp(seg, seg->s_base, &vp) != 0 || vp == NULL))
1492 return (seg);
1493 return (NULL);
1497 * Implementation of service functions to handle procfs generic chained
1498 * copyout buffers.
1500 typedef struct pr_iobuf_list {
1501 list_node_t piol_link; /* buffer linkage */
1502 size_t piol_size; /* total size (header + data) */
1503 size_t piol_usedsize; /* amount to copy out from this buf */
1504 } piol_t;
1506 #define MAPSIZE (64 * 1024)
1507 #define PIOL_DATABUF(iol) ((void *)(&(iol)[1]))
1509 void
1510 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n)
1512 piol_t *iol;
1513 size_t initial_size = MIN(1, n) * itemsize;
1515 list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link));
1517 ASSERT(list_head(iolhead) == NULL);
1518 ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1519 ASSERT(initial_size > 0);
1522 * Someone creating chained copyout buffers may ask for less than
1523 * MAPSIZE if the amount of data to be buffered is known to be
1524 * smaller than that.
1525 * But in order to prevent involuntary self-denial of service,
1526 * the requested input size is clamped at MAPSIZE.
1528 initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol));
1529 iol = kmem_alloc(initial_size, KM_SLEEP);
1530 list_insert_head(iolhead, iol);
1531 iol->piol_usedsize = 0;
1532 iol->piol_size = initial_size;
1535 void *
1536 pr_iol_newbuf(list_t *iolhead, size_t itemsize)
1538 piol_t *iol;
1539 char *new;
1541 ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1542 ASSERT(list_head(iolhead) != NULL);
1544 iol = (piol_t *)list_tail(iolhead);
1546 if (iol->piol_size <
1547 iol->piol_usedsize + sizeof (*iol) + itemsize) {
1549 * Out of space in the current buffer. Allocate more.
1551 piol_t *newiol;
1553 newiol = kmem_alloc(MAPSIZE, KM_SLEEP);
1554 newiol->piol_size = MAPSIZE;
1555 newiol->piol_usedsize = 0;
1557 list_insert_after(iolhead, iol, newiol);
1558 iol = list_next(iolhead, iol);
1559 ASSERT(iol == newiol);
1561 new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize;
1562 iol->piol_usedsize += itemsize;
1563 bzero(new, itemsize);
1564 return (new);
1568 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin)
1570 int error = errin;
1571 piol_t *iol;
1573 while ((iol = list_head(iolhead)) != NULL) {
1574 list_remove(iolhead, iol);
1575 if (!error) {
1576 if (copyout(PIOL_DATABUF(iol), *tgt,
1577 iol->piol_usedsize))
1578 error = EFAULT;
1579 *tgt += iol->piol_usedsize;
1581 kmem_free(iol, iol->piol_size);
1583 list_destroy(iolhead);
1585 return (error);
1589 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin)
1591 offset_t off = uiop->uio_offset;
1592 char *base;
1593 size_t size;
1594 piol_t *iol;
1595 int error = errin;
1597 while ((iol = list_head(iolhead)) != NULL) {
1598 list_remove(iolhead, iol);
1599 base = PIOL_DATABUF(iol);
1600 size = iol->piol_usedsize;
1601 if (off <= size && error == 0 && uiop->uio_resid > 0)
1602 error = uiomove(base + off, size - off,
1603 UIO_READ, uiop);
1604 off = MAX(0, off - (offset_t)size);
1605 kmem_free(iol, iol->piol_size);
1607 list_destroy(iolhead);
1609 return (error);
1613 * Return an array of structures with memory map information.
1614 * We allocate here; the caller must deallocate.
1617 prgetmap(proc_t *p, int reserved, list_t *iolhead)
1619 struct as *as = p->p_as;
1620 prmap_t *mp;
1621 struct seg *seg;
1622 struct seg *brkseg, *stkseg;
1623 struct vnode *vp;
1624 struct vattr vattr;
1625 uint_t prot;
1627 ASSERT(as != &kas && AS_WRITE_HELD(as));
1630 * Request an initial buffer size that doesn't waste memory
1631 * if the address space has only a small number of segments.
1633 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1635 if ((seg = AS_SEGFIRST(as)) == NULL)
1636 return (0);
1638 brkseg = break_seg(p);
1639 stkseg = as_segat(as, prgetstackbase(p));
1641 do {
1642 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1643 caddr_t saddr, naddr;
1644 void *tmp = NULL;
1646 if ((seg->s_flags & S_HOLE) != 0) {
1647 continue;
1650 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1651 prot = pr_getprot(seg, reserved, &tmp,
1652 &saddr, &naddr, eaddr);
1653 if (saddr == naddr)
1654 continue;
1656 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1658 mp->pr_vaddr = (uintptr_t)saddr;
1659 mp->pr_size = naddr - saddr;
1660 mp->pr_offset = segop_getoffset(seg, saddr);
1661 mp->pr_mflags = 0;
1662 if (prot & PROT_READ)
1663 mp->pr_mflags |= MA_READ;
1664 if (prot & PROT_WRITE)
1665 mp->pr_mflags |= MA_WRITE;
1666 if (prot & PROT_EXEC)
1667 mp->pr_mflags |= MA_EXEC;
1668 if (segop_gettype(seg, saddr) & MAP_SHARED)
1669 mp->pr_mflags |= MA_SHARED;
1670 if (segop_gettype(seg, saddr) & MAP_NORESERVE)
1671 mp->pr_mflags |= MA_NORESERVE;
1672 if (seg->s_ops == &segspt_shmops ||
1673 (seg->s_ops == &segvn_ops &&
1674 (segop_getvp(seg, saddr, &vp) != 0 || vp == NULL)))
1675 mp->pr_mflags |= MA_ANON;
1676 if (seg == brkseg)
1677 mp->pr_mflags |= MA_BREAK;
1678 else if (seg == stkseg) {
1679 mp->pr_mflags |= MA_STACK;
1680 if (reserved) {
1681 size_t maxstack =
1682 ((size_t)p->p_stk_ctl +
1683 PAGEOFFSET) & PAGEMASK;
1684 mp->pr_vaddr =
1685 (uintptr_t)prgetstackbase(p) +
1686 p->p_stksize - maxstack;
1687 mp->pr_size = (uintptr_t)naddr -
1688 mp->pr_vaddr;
1691 if (seg->s_ops == &segspt_shmops)
1692 mp->pr_mflags |= MA_ISM | MA_SHM;
1693 mp->pr_pagesize = PAGESIZE;
1696 * Manufacture a filename for the "object" directory.
1698 vattr.va_mask = VATTR_FSID|VATTR_NODEID;
1699 if (seg->s_ops == &segvn_ops &&
1700 segop_getvp(seg, saddr, &vp) == 0 &&
1701 vp != NULL && vp->v_type == VREG &&
1702 fop_getattr(vp, &vattr, 0, CRED(), NULL) == 0) {
1703 if (vp == p->p_exec)
1704 (void) strcpy(mp->pr_mapname, "a.out");
1705 else
1706 pr_object_name(mp->pr_mapname,
1707 vp, &vattr);
1711 * Get the SysV shared memory id, if any.
1713 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1714 (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1715 SHMID_NONE) {
1716 if (mp->pr_shmid == SHMID_FREE)
1717 mp->pr_shmid = -1;
1719 mp->pr_mflags |= MA_SHM;
1720 } else {
1721 mp->pr_shmid = -1;
1724 ASSERT(tmp == NULL);
1725 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1727 return (0);
1730 #ifdef _SYSCALL32_IMPL
1732 prgetmap32(proc_t *p, int reserved, list_t *iolhead)
1734 struct as *as = p->p_as;
1735 prmap32_t *mp;
1736 struct seg *seg;
1737 struct seg *brkseg, *stkseg;
1738 struct vnode *vp;
1739 struct vattr vattr;
1740 uint_t prot;
1742 ASSERT(as != &kas && AS_WRITE_HELD(as));
1745 * Request an initial buffer size that doesn't waste memory
1746 * if the address space has only a small number of segments.
1748 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1750 if ((seg = AS_SEGFIRST(as)) == NULL)
1751 return (0);
1753 brkseg = break_seg(p);
1754 stkseg = as_segat(as, prgetstackbase(p));
1756 do {
1757 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1758 caddr_t saddr, naddr;
1759 void *tmp = NULL;
1761 if ((seg->s_flags & S_HOLE) != 0) {
1762 continue;
1765 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1766 prot = pr_getprot(seg, reserved, &tmp,
1767 &saddr, &naddr, eaddr);
1768 if (saddr == naddr)
1769 continue;
1771 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1773 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
1774 mp->pr_size = (size32_t)(naddr - saddr);
1775 mp->pr_offset = segop_getoffset(seg, saddr);
1776 mp->pr_mflags = 0;
1777 if (prot & PROT_READ)
1778 mp->pr_mflags |= MA_READ;
1779 if (prot & PROT_WRITE)
1780 mp->pr_mflags |= MA_WRITE;
1781 if (prot & PROT_EXEC)
1782 mp->pr_mflags |= MA_EXEC;
1783 if (segop_gettype(seg, saddr) & MAP_SHARED)
1784 mp->pr_mflags |= MA_SHARED;
1785 if (segop_gettype(seg, saddr) & MAP_NORESERVE)
1786 mp->pr_mflags |= MA_NORESERVE;
1787 if (seg->s_ops == &segspt_shmops ||
1788 (seg->s_ops == &segvn_ops &&
1789 (segop_getvp(seg, saddr, &vp) != 0 || vp == NULL)))
1790 mp->pr_mflags |= MA_ANON;
1791 if (seg == brkseg)
1792 mp->pr_mflags |= MA_BREAK;
1793 else if (seg == stkseg) {
1794 mp->pr_mflags |= MA_STACK;
1795 if (reserved) {
1796 size_t maxstack =
1797 ((size_t)p->p_stk_ctl +
1798 PAGEOFFSET) & PAGEMASK;
1799 uintptr_t vaddr =
1800 (uintptr_t)prgetstackbase(p) +
1801 p->p_stksize - maxstack;
1802 mp->pr_vaddr = (caddr32_t)vaddr;
1803 mp->pr_size = (size32_t)
1804 ((uintptr_t)naddr - vaddr);
1807 if (seg->s_ops == &segspt_shmops)
1808 mp->pr_mflags |= MA_ISM | MA_SHM;
1809 mp->pr_pagesize = PAGESIZE;
1812 * Manufacture a filename for the "object" directory.
1814 vattr.va_mask = VATTR_FSID|VATTR_NODEID;
1815 if (seg->s_ops == &segvn_ops &&
1816 segop_getvp(seg, saddr, &vp) == 0 &&
1817 vp != NULL && vp->v_type == VREG &&
1818 fop_getattr(vp, &vattr, 0, CRED(), NULL) == 0) {
1819 if (vp == p->p_exec)
1820 (void) strcpy(mp->pr_mapname, "a.out");
1821 else
1822 pr_object_name(mp->pr_mapname,
1823 vp, &vattr);
1827 * Get the SysV shared memory id, if any.
1829 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1830 (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1831 SHMID_NONE) {
1832 if (mp->pr_shmid == SHMID_FREE)
1833 mp->pr_shmid = -1;
1835 mp->pr_mflags |= MA_SHM;
1836 } else {
1837 mp->pr_shmid = -1;
1840 ASSERT(tmp == NULL);
1841 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1843 return (0);
1845 #endif /* _SYSCALL32_IMPL */
1848 * Return the size of the /proc page data file.
1850 size_t
1851 prpdsize(struct as *as)
1853 struct seg *seg;
1854 size_t size;
1856 ASSERT(as != &kas && AS_WRITE_HELD(as));
1858 if ((seg = AS_SEGFIRST(as)) == NULL)
1859 return (0);
1861 size = sizeof (prpageheader_t);
1862 do {
1863 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1864 caddr_t saddr, naddr;
1865 void *tmp = NULL;
1866 size_t npage;
1868 if ((seg->s_flags & S_HOLE) != 0) {
1869 continue;
1872 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1873 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1874 if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1875 size += sizeof (prasmap_t) + round8(npage);
1877 ASSERT(tmp == NULL);
1878 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1880 return (size);
1883 #ifdef _SYSCALL32_IMPL
1884 size_t
1885 prpdsize32(struct as *as)
1887 struct seg *seg;
1888 size_t size;
1890 ASSERT(as != &kas && AS_WRITE_HELD(as));
1892 if ((seg = AS_SEGFIRST(as)) == NULL)
1893 return (0);
1895 size = sizeof (prpageheader32_t);
1896 do {
1897 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1898 caddr_t saddr, naddr;
1899 void *tmp = NULL;
1900 size_t npage;
1902 if ((seg->s_flags & S_HOLE) != 0) {
1903 continue;
1906 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1907 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1908 if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1909 size += sizeof (prasmap32_t) + round8(npage);
1911 ASSERT(tmp == NULL);
1912 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1914 return (size);
1916 #endif /* _SYSCALL32_IMPL */
1919 * Read page data information.
1922 prpdread(proc_t *p, uint_t hatid, struct uio *uiop)
1924 struct as *as = p->p_as;
1925 caddr_t buf;
1926 size_t size;
1927 prpageheader_t *php;
1928 prasmap_t *pmp;
1929 struct seg *seg;
1930 int error;
1932 again:
1933 AS_LOCK_ENTER(as, RW_WRITER);
1935 if ((seg = AS_SEGFIRST(as)) == NULL) {
1936 AS_LOCK_EXIT(as);
1937 return (0);
1939 size = prpdsize(as);
1940 if (uiop->uio_resid < size) {
1941 AS_LOCK_EXIT(as);
1942 return (E2BIG);
1945 buf = kmem_zalloc(size, KM_SLEEP);
1946 php = (prpageheader_t *)buf;
1947 pmp = (prasmap_t *)(buf + sizeof (prpageheader_t));
1949 hrt2ts(gethrtime(), &php->pr_tstamp);
1950 php->pr_nmap = 0;
1951 php->pr_npage = 0;
1952 do {
1953 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1954 caddr_t saddr, naddr;
1955 void *tmp = NULL;
1957 if ((seg->s_flags & S_HOLE) != 0) {
1958 continue;
1961 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1962 struct vnode *vp;
1963 struct vattr vattr;
1964 size_t len;
1965 size_t npage;
1966 uint_t prot;
1967 uintptr_t next;
1969 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1970 if ((len = (size_t)(naddr - saddr)) == 0)
1971 continue;
1972 npage = len / PAGESIZE;
1973 next = (uintptr_t)(pmp + 1) + round8(npage);
1975 * It's possible that the address space can change
1976 * subtlely even though we're holding as->a_lock
1977 * due to the nondeterminism of page_exists() in
1978 * the presence of asychronously flushed pages or
1979 * mapped files whose sizes are changing.
1980 * page_exists() may be called indirectly from
1981 * pr_getprot() by a segop_incore() routine.
1982 * If this happens we need to make sure we don't
1983 * overrun the buffer whose size we computed based
1984 * on the initial iteration through the segments.
1985 * Once we've detected an overflow, we need to clean
1986 * up the temporary memory allocated in pr_getprot()
1987 * and retry. If there's a pending signal, we return
1988 * EINTR so that this thread can be dislodged if
1989 * a latent bug causes us to spin indefinitely.
1991 if (next > (uintptr_t)buf + size) {
1992 pr_getprot_done(&tmp);
1993 AS_LOCK_EXIT(as);
1995 kmem_free(buf, size);
1997 if (ISSIG(curthread, JUSTLOOKING))
1998 return (EINTR);
2000 goto again;
2003 php->pr_nmap++;
2004 php->pr_npage += npage;
2005 pmp->pr_vaddr = (uintptr_t)saddr;
2006 pmp->pr_npage = npage;
2007 pmp->pr_offset = segop_getoffset(seg, saddr);
2008 pmp->pr_mflags = 0;
2009 if (prot & PROT_READ)
2010 pmp->pr_mflags |= MA_READ;
2011 if (prot & PROT_WRITE)
2012 pmp->pr_mflags |= MA_WRITE;
2013 if (prot & PROT_EXEC)
2014 pmp->pr_mflags |= MA_EXEC;
2015 if (segop_gettype(seg, saddr) & MAP_SHARED)
2016 pmp->pr_mflags |= MA_SHARED;
2017 if (segop_gettype(seg, saddr) & MAP_NORESERVE)
2018 pmp->pr_mflags |= MA_NORESERVE;
2019 if (seg->s_ops == &segspt_shmops ||
2020 (seg->s_ops == &segvn_ops &&
2021 (segop_getvp(seg, saddr, &vp) != 0 || vp == NULL)))
2022 pmp->pr_mflags |= MA_ANON;
2023 if (seg->s_ops == &segspt_shmops)
2024 pmp->pr_mflags |= MA_ISM | MA_SHM;
2025 pmp->pr_pagesize = PAGESIZE;
2027 * Manufacture a filename for the "object" directory.
2029 vattr.va_mask = VATTR_FSID|VATTR_NODEID;
2030 if (seg->s_ops == &segvn_ops &&
2031 segop_getvp(seg, saddr, &vp) == 0 &&
2032 vp != NULL && vp->v_type == VREG &&
2033 fop_getattr(vp, &vattr, 0, CRED(), NULL) == 0) {
2034 if (vp == p->p_exec)
2035 (void) strcpy(pmp->pr_mapname, "a.out");
2036 else
2037 pr_object_name(pmp->pr_mapname,
2038 vp, &vattr);
2042 * Get the SysV shared memory id, if any.
2044 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2045 (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2046 SHMID_NONE) {
2047 if (pmp->pr_shmid == SHMID_FREE)
2048 pmp->pr_shmid = -1;
2050 pmp->pr_mflags |= MA_SHM;
2051 } else {
2052 pmp->pr_shmid = -1;
2055 hat_getstat(as, saddr, len, hatid,
2056 (char *)(pmp + 1), HAT_SYNC_ZERORM);
2057 pmp = (prasmap_t *)next;
2059 ASSERT(tmp == NULL);
2060 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2062 AS_LOCK_EXIT(as);
2064 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2065 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2066 kmem_free(buf, size);
2068 return (error);
2071 #ifdef _SYSCALL32_IMPL
2073 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop)
2075 struct as *as = p->p_as;
2076 caddr_t buf;
2077 size_t size;
2078 prpageheader32_t *php;
2079 prasmap32_t *pmp;
2080 struct seg *seg;
2081 int error;
2083 again:
2084 AS_LOCK_ENTER(as, RW_WRITER);
2086 if ((seg = AS_SEGFIRST(as)) == NULL) {
2087 AS_LOCK_EXIT(as);
2088 return (0);
2090 size = prpdsize32(as);
2091 if (uiop->uio_resid < size) {
2092 AS_LOCK_EXIT(as);
2093 return (E2BIG);
2096 buf = kmem_zalloc(size, KM_SLEEP);
2097 php = (prpageheader32_t *)buf;
2098 pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t));
2100 hrt2ts32(gethrtime(), &php->pr_tstamp);
2101 php->pr_nmap = 0;
2102 php->pr_npage = 0;
2103 do {
2104 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2105 caddr_t saddr, naddr;
2106 void *tmp = NULL;
2108 if ((seg->s_flags & S_HOLE) != 0) {
2109 continue;
2112 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2113 struct vnode *vp;
2114 struct vattr vattr;
2115 size_t len;
2116 size_t npage;
2117 uint_t prot;
2118 uintptr_t next;
2120 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2121 if ((len = (size_t)(naddr - saddr)) == 0)
2122 continue;
2123 npage = len / PAGESIZE;
2124 next = (uintptr_t)(pmp + 1) + round8(npage);
2126 * It's possible that the address space can change
2127 * subtlely even though we're holding as->a_lock
2128 * due to the nondeterminism of page_exists() in
2129 * the presence of asychronously flushed pages or
2130 * mapped files whose sizes are changing.
2131 * page_exists() may be called indirectly from
2132 * pr_getprot() by a segop_incore() routine.
2133 * If this happens we need to make sure we don't
2134 * overrun the buffer whose size we computed based
2135 * on the initial iteration through the segments.
2136 * Once we've detected an overflow, we need to clean
2137 * up the temporary memory allocated in pr_getprot()
2138 * and retry. If there's a pending signal, we return
2139 * EINTR so that this thread can be dislodged if
2140 * a latent bug causes us to spin indefinitely.
2142 if (next > (uintptr_t)buf + size) {
2143 pr_getprot_done(&tmp);
2144 AS_LOCK_EXIT(as);
2146 kmem_free(buf, size);
2148 if (ISSIG(curthread, JUSTLOOKING))
2149 return (EINTR);
2151 goto again;
2154 php->pr_nmap++;
2155 php->pr_npage += npage;
2156 pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2157 pmp->pr_npage = (size32_t)npage;
2158 pmp->pr_offset = segop_getoffset(seg, saddr);
2159 pmp->pr_mflags = 0;
2160 if (prot & PROT_READ)
2161 pmp->pr_mflags |= MA_READ;
2162 if (prot & PROT_WRITE)
2163 pmp->pr_mflags |= MA_WRITE;
2164 if (prot & PROT_EXEC)
2165 pmp->pr_mflags |= MA_EXEC;
2166 if (segop_gettype(seg, saddr) & MAP_SHARED)
2167 pmp->pr_mflags |= MA_SHARED;
2168 if (segop_gettype(seg, saddr) & MAP_NORESERVE)
2169 pmp->pr_mflags |= MA_NORESERVE;
2170 if (seg->s_ops == &segspt_shmops ||
2171 (seg->s_ops == &segvn_ops &&
2172 (segop_getvp(seg, saddr, &vp) != 0 || vp == NULL)))
2173 pmp->pr_mflags |= MA_ANON;
2174 if (seg->s_ops == &segspt_shmops)
2175 pmp->pr_mflags |= MA_ISM | MA_SHM;
2176 pmp->pr_pagesize = PAGESIZE;
2178 * Manufacture a filename for the "object" directory.
2180 vattr.va_mask = VATTR_FSID|VATTR_NODEID;
2181 if (seg->s_ops == &segvn_ops &&
2182 segop_getvp(seg, saddr, &vp) == 0 &&
2183 vp != NULL && vp->v_type == VREG &&
2184 fop_getattr(vp, &vattr, 0, CRED(), NULL) == 0) {
2185 if (vp == p->p_exec)
2186 (void) strcpy(pmp->pr_mapname, "a.out");
2187 else
2188 pr_object_name(pmp->pr_mapname,
2189 vp, &vattr);
2193 * Get the SysV shared memory id, if any.
2195 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2196 (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2197 SHMID_NONE) {
2198 if (pmp->pr_shmid == SHMID_FREE)
2199 pmp->pr_shmid = -1;
2201 pmp->pr_mflags |= MA_SHM;
2202 } else {
2203 pmp->pr_shmid = -1;
2206 hat_getstat(as, saddr, len, hatid,
2207 (char *)(pmp + 1), HAT_SYNC_ZERORM);
2208 pmp = (prasmap32_t *)next;
2210 ASSERT(tmp == NULL);
2211 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2213 AS_LOCK_EXIT(as);
2215 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2216 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2217 kmem_free(buf, size);
2219 return (error);
2221 #endif /* _SYSCALL32_IMPL */
2223 ushort_t
2224 prgetpctcpu(uint64_t pct)
2227 * The value returned will be relevant in the zone of the examiner,
2228 * which may not be the same as the zone which performed the procfs
2229 * mount.
2231 int nonline = zone_ncpus_online_get(curproc->p_zone);
2234 * Prorate over online cpus so we don't exceed 100%
2236 if (nonline > 1)
2237 pct /= nonline;
2238 pct >>= 16; /* convert to 16-bit scaled integer */
2239 if (pct > 0x8000) /* might happen, due to rounding */
2240 pct = 0x8000;
2241 return ((ushort_t)pct);
2245 * Return information used by ps(1).
2247 void
2248 prgetpsinfo(proc_t *p, psinfo_t *psp)
2250 kthread_t *t;
2251 struct cred *cred;
2252 hrtime_t hrutime, hrstime;
2254 ASSERT(MUTEX_HELD(&p->p_lock));
2256 if ((t = prchoose(p)) == NULL) /* returns locked thread */
2257 bzero(psp, sizeof (*psp));
2258 else {
2259 thread_unlock(t);
2260 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2264 * only export SSYS and SMSACCT; everything else is off-limits to
2265 * userland apps.
2267 psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2268 psp->pr_nlwp = p->p_lwpcnt;
2269 psp->pr_nzomb = p->p_zombcnt;
2270 mutex_enter(&p->p_crlock);
2271 cred = p->p_cred;
2272 psp->pr_uid = crgetruid(cred);
2273 psp->pr_euid = crgetuid(cred);
2274 psp->pr_gid = crgetrgid(cred);
2275 psp->pr_egid = crgetgid(cred);
2276 mutex_exit(&p->p_crlock);
2277 psp->pr_pid = p->p_pid;
2278 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2279 (p->p_flag & SZONETOP)) {
2280 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2282 * Inside local zones, fake zsched's pid as parent pids for
2283 * processes which reference processes outside of the zone.
2285 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2286 } else {
2287 psp->pr_ppid = p->p_ppid;
2289 psp->pr_pgid = p->p_pgrp;
2290 psp->pr_sid = p->p_sessp->s_sid;
2291 psp->pr_taskid = p->p_task->tk_tkid;
2292 psp->pr_projid = p->p_task->tk_proj->kpj_id;
2293 psp->pr_poolid = p->p_pool->pool_id;
2294 psp->pr_zoneid = p->p_zone->zone_id;
2295 if ((psp->pr_contract = PRCTID(p)) == 0)
2296 psp->pr_contract = -1;
2297 psp->pr_addr = (uintptr_t)prgetpsaddr(p);
2298 switch (p->p_model) {
2299 case DATAMODEL_ILP32:
2300 psp->pr_dmodel = PR_MODEL_ILP32;
2301 break;
2302 case DATAMODEL_LP64:
2303 psp->pr_dmodel = PR_MODEL_LP64;
2304 break;
2306 hrutime = mstate_aggr_state(p, LMS_USER);
2307 hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2308 hrt2ts((hrutime + hrstime), &psp->pr_time);
2309 TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2311 if (t == NULL) {
2312 int wcode = p->p_wcode; /* must be atomic read */
2314 if (wcode)
2315 psp->pr_wstat = wstat(wcode, p->p_wdata);
2316 psp->pr_ttydev = PRNODEV;
2317 psp->pr_lwp.pr_state = SZOMB;
2318 psp->pr_lwp.pr_sname = 'Z';
2319 psp->pr_lwp.pr_bindpro = PBIND_NONE;
2320 psp->pr_lwp.pr_bindpset = PS_NONE;
2321 } else {
2322 user_t *up = PTOU(p);
2323 struct as *as;
2324 dev_t d;
2325 extern dev_t rwsconsdev, rconsdev, uconsdev;
2327 d = cttydev(p);
2329 * If the controlling terminal is the real
2330 * or workstation console device, map to what the
2331 * user thinks is the console device. Handle case when
2332 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2334 if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2335 d = uconsdev;
2336 psp->pr_ttydev = (d == NODEV) ? PRNODEV : d;
2337 psp->pr_start = up->u_start;
2338 bcopy(up->u_comm, psp->pr_fname,
2339 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2340 bcopy(up->u_psargs, psp->pr_psargs,
2341 MIN(PRARGSZ-1, PSARGSZ));
2342 psp->pr_argc = up->u_argc;
2343 psp->pr_argv = up->u_argv;
2344 psp->pr_envp = up->u_envp;
2346 /* get the chosen lwp's lwpsinfo */
2347 prgetlwpsinfo(t, &psp->pr_lwp);
2349 /* compute %cpu for the process */
2350 if (p->p_lwpcnt == 1)
2351 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2352 else {
2353 uint64_t pct = 0;
2354 hrtime_t cur_time = gethrtime_unscaled();
2356 t = p->p_tlist;
2357 do {
2358 pct += cpu_update_pct(t, cur_time);
2359 } while ((t = t->t_forw) != p->p_tlist);
2361 psp->pr_pctcpu = prgetpctcpu(pct);
2363 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2364 psp->pr_size = 0;
2365 psp->pr_rssize = 0;
2366 } else {
2367 mutex_exit(&p->p_lock);
2368 AS_LOCK_ENTER(as, RW_READER);
2369 psp->pr_size = btopr(as->a_resvsize) *
2370 (PAGESIZE / 1024);
2371 psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024);
2372 psp->pr_pctmem = rm_pctmemory(as);
2373 AS_LOCK_EXIT(as);
2374 mutex_enter(&p->p_lock);
2379 #ifdef _SYSCALL32_IMPL
2380 void
2381 prgetpsinfo32(proc_t *p, psinfo32_t *psp)
2383 kthread_t *t;
2384 struct cred *cred;
2385 hrtime_t hrutime, hrstime;
2387 ASSERT(MUTEX_HELD(&p->p_lock));
2389 if ((t = prchoose(p)) == NULL) /* returns locked thread */
2390 bzero(psp, sizeof (*psp));
2391 else {
2392 thread_unlock(t);
2393 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2397 * only export SSYS and SMSACCT; everything else is off-limits to
2398 * userland apps.
2400 psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2401 psp->pr_nlwp = p->p_lwpcnt;
2402 psp->pr_nzomb = p->p_zombcnt;
2403 mutex_enter(&p->p_crlock);
2404 cred = p->p_cred;
2405 psp->pr_uid = crgetruid(cred);
2406 psp->pr_euid = crgetuid(cred);
2407 psp->pr_gid = crgetrgid(cred);
2408 psp->pr_egid = crgetgid(cred);
2409 mutex_exit(&p->p_crlock);
2410 psp->pr_pid = p->p_pid;
2411 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2412 (p->p_flag & SZONETOP)) {
2413 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2415 * Inside local zones, fake zsched's pid as parent pids for
2416 * processes which reference processes outside of the zone.
2418 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2419 } else {
2420 psp->pr_ppid = p->p_ppid;
2422 psp->pr_pgid = p->p_pgrp;
2423 psp->pr_sid = p->p_sessp->s_sid;
2424 psp->pr_taskid = p->p_task->tk_tkid;
2425 psp->pr_projid = p->p_task->tk_proj->kpj_id;
2426 psp->pr_poolid = p->p_pool->pool_id;
2427 psp->pr_zoneid = p->p_zone->zone_id;
2428 if ((psp->pr_contract = PRCTID(p)) == 0)
2429 psp->pr_contract = -1;
2430 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */
2431 switch (p->p_model) {
2432 case DATAMODEL_ILP32:
2433 psp->pr_dmodel = PR_MODEL_ILP32;
2434 break;
2435 case DATAMODEL_LP64:
2436 psp->pr_dmodel = PR_MODEL_LP64;
2437 break;
2439 hrutime = mstate_aggr_state(p, LMS_USER);
2440 hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2441 hrt2ts32(hrutime + hrstime, &psp->pr_time);
2442 TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2444 if (t == NULL) {
2445 extern int wstat(int, int); /* needs a header file */
2446 int wcode = p->p_wcode; /* must be atomic read */
2448 if (wcode)
2449 psp->pr_wstat = wstat(wcode, p->p_wdata);
2450 psp->pr_ttydev = PRNODEV32;
2451 psp->pr_lwp.pr_state = SZOMB;
2452 psp->pr_lwp.pr_sname = 'Z';
2453 } else {
2454 user_t *up = PTOU(p);
2455 struct as *as;
2456 dev_t d;
2457 extern dev_t rwsconsdev, rconsdev, uconsdev;
2459 d = cttydev(p);
2461 * If the controlling terminal is the real
2462 * or workstation console device, map to what the
2463 * user thinks is the console device. Handle case when
2464 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2466 if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2467 d = uconsdev;
2468 (void) cmpldev(&psp->pr_ttydev, d);
2469 TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start);
2470 bcopy(up->u_comm, psp->pr_fname,
2471 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2472 bcopy(up->u_psargs, psp->pr_psargs,
2473 MIN(PRARGSZ-1, PSARGSZ));
2474 psp->pr_argc = up->u_argc;
2475 psp->pr_argv = (caddr32_t)up->u_argv;
2476 psp->pr_envp = (caddr32_t)up->u_envp;
2478 /* get the chosen lwp's lwpsinfo */
2479 prgetlwpsinfo32(t, &psp->pr_lwp);
2481 /* compute %cpu for the process */
2482 if (p->p_lwpcnt == 1)
2483 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2484 else {
2485 uint64_t pct = 0;
2486 hrtime_t cur_time;
2488 t = p->p_tlist;
2489 cur_time = gethrtime_unscaled();
2490 do {
2491 pct += cpu_update_pct(t, cur_time);
2492 } while ((t = t->t_forw) != p->p_tlist);
2494 psp->pr_pctcpu = prgetpctcpu(pct);
2496 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2497 psp->pr_size = 0;
2498 psp->pr_rssize = 0;
2499 } else {
2500 mutex_exit(&p->p_lock);
2501 AS_LOCK_ENTER(as, RW_READER);
2502 psp->pr_size = (size32_t)
2503 (btopr(as->a_resvsize) * (PAGESIZE / 1024));
2504 psp->pr_rssize = (size32_t)
2505 (rm_asrss(as) * (PAGESIZE / 1024));
2506 psp->pr_pctmem = rm_pctmemory(as);
2507 AS_LOCK_EXIT(as);
2508 mutex_enter(&p->p_lock);
2513 * If we are looking at an LP64 process, zero out
2514 * the fields that cannot be represented in ILP32.
2516 if (p->p_model != DATAMODEL_ILP32) {
2517 psp->pr_size = 0;
2518 psp->pr_rssize = 0;
2519 psp->pr_argv = 0;
2520 psp->pr_envp = 0;
2524 #endif /* _SYSCALL32_IMPL */
2526 void
2527 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp)
2529 klwp_t *lwp = ttolwp(t);
2530 sobj_ops_t *sobj;
2531 char c, state;
2532 uint64_t pct;
2533 int retval, niceval;
2534 hrtime_t hrutime, hrstime;
2536 ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
2538 bzero(psp, sizeof (*psp));
2540 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */
2541 psp->pr_lwpid = t->t_tid;
2542 psp->pr_addr = (uintptr_t)t;
2543 psp->pr_wchan = (uintptr_t)t->t_wchan;
2545 /* map the thread state enum into a process state enum */
2546 state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2547 switch (state) {
2548 case TS_SLEEP: state = SSLEEP; c = 'S'; break;
2549 case TS_RUN: state = SRUN; c = 'R'; break;
2550 case TS_ONPROC: state = SONPROC; c = 'O'; break;
2551 case TS_ZOMB: state = SZOMB; c = 'Z'; break;
2552 case TS_STOPPED: state = SSTOP; c = 'T'; break;
2553 case TS_WAIT: state = SWAIT; c = 'W'; break;
2554 default: state = 0; c = '?'; break;
2556 psp->pr_state = state;
2557 psp->pr_sname = c;
2558 if ((sobj = t->t_sobj_ops) != NULL)
2559 psp->pr_stype = SOBJ_TYPE(sobj);
2560 retval = CL_DONICE(t, NULL, 0, &niceval);
2561 if (retval == 0) {
2562 psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2563 psp->pr_nice = niceval + NZERO;
2565 psp->pr_syscall = t->t_sysnum;
2566 psp->pr_pri = t->t_pri;
2567 psp->pr_start.tv_sec = t->t_start;
2568 psp->pr_start.tv_nsec = 0L;
2569 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2570 scalehrtime(&hrutime);
2571 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2572 lwp->lwp_mstate.ms_acct[LMS_TRAP];
2573 scalehrtime(&hrstime);
2574 hrt2ts(hrutime + hrstime, &psp->pr_time);
2575 /* compute %cpu for the lwp */
2576 pct = cpu_update_pct(t, gethrtime_unscaled());
2577 psp->pr_pctcpu = prgetpctcpu(pct);
2578 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */
2579 if (psp->pr_cpu > 99)
2580 psp->pr_cpu = 99;
2582 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2583 sizeof (psp->pr_clname) - 1);
2584 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */
2585 psp->pr_onpro = t->t_cpu->cpu_id;
2586 psp->pr_bindpro = t->t_bind_cpu;
2587 psp->pr_bindpset = t->t_bind_pset;
2588 psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2591 #ifdef _SYSCALL32_IMPL
2592 void
2593 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp)
2595 proc_t *p = ttoproc(t);
2596 klwp_t *lwp = ttolwp(t);
2597 sobj_ops_t *sobj;
2598 char c, state;
2599 uint64_t pct;
2600 int retval, niceval;
2601 hrtime_t hrutime, hrstime;
2603 ASSERT(MUTEX_HELD(&p->p_lock));
2605 bzero(psp, sizeof (*psp));
2607 psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */
2608 psp->pr_lwpid = t->t_tid;
2609 psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */
2610 psp->pr_wchan = 0; /* cannot represent 64-bit addr in 32 bits */
2612 /* map the thread state enum into a process state enum */
2613 state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
2614 switch (state) {
2615 case TS_SLEEP: state = SSLEEP; c = 'S'; break;
2616 case TS_RUN: state = SRUN; c = 'R'; break;
2617 case TS_ONPROC: state = SONPROC; c = 'O'; break;
2618 case TS_ZOMB: state = SZOMB; c = 'Z'; break;
2619 case TS_STOPPED: state = SSTOP; c = 'T'; break;
2620 case TS_WAIT: state = SWAIT; c = 'W'; break;
2621 default: state = 0; c = '?'; break;
2623 psp->pr_state = state;
2624 psp->pr_sname = c;
2625 if ((sobj = t->t_sobj_ops) != NULL)
2626 psp->pr_stype = SOBJ_TYPE(sobj);
2627 retval = CL_DONICE(t, NULL, 0, &niceval);
2628 if (retval == 0) {
2629 psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
2630 psp->pr_nice = niceval + NZERO;
2631 } else {
2632 psp->pr_oldpri = 0;
2633 psp->pr_nice = 0;
2635 psp->pr_syscall = t->t_sysnum;
2636 psp->pr_pri = t->t_pri;
2637 psp->pr_start.tv_sec = (time32_t)t->t_start;
2638 psp->pr_start.tv_nsec = 0L;
2639 hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
2640 scalehrtime(&hrutime);
2641 hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
2642 lwp->lwp_mstate.ms_acct[LMS_TRAP];
2643 scalehrtime(&hrstime);
2644 hrt2ts32(hrutime + hrstime, &psp->pr_time);
2645 /* compute %cpu for the lwp */
2646 pct = cpu_update_pct(t, gethrtime_unscaled());
2647 psp->pr_pctcpu = prgetpctcpu(pct);
2648 psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */
2649 if (psp->pr_cpu > 99)
2650 psp->pr_cpu = 99;
2652 (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
2653 sizeof (psp->pr_clname) - 1);
2654 bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */
2655 psp->pr_onpro = t->t_cpu->cpu_id;
2656 psp->pr_bindpro = t->t_bind_cpu;
2657 psp->pr_bindpset = t->t_bind_pset;
2658 psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
2660 #endif /* _SYSCALL32_IMPL */
2662 #ifdef _SYSCALL32_IMPL
2664 #define PR_COPY_FIELD(s, d, field) d->field = s->field
2666 #define PR_COPY_FIELD_ILP32(s, d, field) \
2667 if (s->pr_dmodel == PR_MODEL_ILP32) { \
2668 d->field = s->field; \
2671 #define PR_COPY_TIMESPEC(s, d, field) \
2672 TIMESPEC_TO_TIMESPEC32(&d->field, &s->field);
2674 #define PR_COPY_BUF(s, d, field) \
2675 bcopy(s->field, d->field, sizeof (d->field));
2677 #define PR_IGNORE_FIELD(s, d, field)
2679 void
2680 lwpsinfo_kto32(const struct lwpsinfo *src, struct lwpsinfo32 *dest)
2682 bzero(dest, sizeof (*dest));
2684 PR_COPY_FIELD(src, dest, pr_flag);
2685 PR_COPY_FIELD(src, dest, pr_lwpid);
2686 PR_IGNORE_FIELD(src, dest, pr_addr);
2687 PR_IGNORE_FIELD(src, dest, pr_wchan);
2688 PR_COPY_FIELD(src, dest, pr_stype);
2689 PR_COPY_FIELD(src, dest, pr_state);
2690 PR_COPY_FIELD(src, dest, pr_sname);
2691 PR_COPY_FIELD(src, dest, pr_nice);
2692 PR_COPY_FIELD(src, dest, pr_syscall);
2693 PR_COPY_FIELD(src, dest, pr_oldpri);
2694 PR_COPY_FIELD(src, dest, pr_cpu);
2695 PR_COPY_FIELD(src, dest, pr_pri);
2696 PR_COPY_FIELD(src, dest, pr_pctcpu);
2697 PR_COPY_TIMESPEC(src, dest, pr_start);
2698 PR_COPY_BUF(src, dest, pr_clname);
2699 PR_COPY_BUF(src, dest, pr_name);
2700 PR_COPY_FIELD(src, dest, pr_onpro);
2701 PR_COPY_FIELD(src, dest, pr_bindpro);
2702 PR_COPY_FIELD(src, dest, pr_bindpset);
2703 PR_COPY_FIELD(src, dest, pr_lgrp);
2706 void
2707 psinfo_kto32(const struct psinfo *src, struct psinfo32 *dest)
2709 bzero(dest, sizeof (*dest));
2711 PR_COPY_FIELD(src, dest, pr_flag);
2712 PR_COPY_FIELD(src, dest, pr_nlwp);
2713 PR_COPY_FIELD(src, dest, pr_pid);
2714 PR_COPY_FIELD(src, dest, pr_ppid);
2715 PR_COPY_FIELD(src, dest, pr_pgid);
2716 PR_COPY_FIELD(src, dest, pr_sid);
2717 PR_COPY_FIELD(src, dest, pr_uid);
2718 PR_COPY_FIELD(src, dest, pr_euid);
2719 PR_COPY_FIELD(src, dest, pr_gid);
2720 PR_COPY_FIELD(src, dest, pr_egid);
2721 PR_IGNORE_FIELD(src, dest, pr_addr);
2722 PR_COPY_FIELD_ILP32(src, dest, pr_size);
2723 PR_COPY_FIELD_ILP32(src, dest, pr_rssize);
2724 PR_COPY_FIELD(src, dest, pr_ttydev);
2725 PR_COPY_FIELD(src, dest, pr_pctcpu);
2726 PR_COPY_FIELD(src, dest, pr_pctmem);
2727 PR_COPY_TIMESPEC(src, dest, pr_start);
2728 PR_COPY_TIMESPEC(src, dest, pr_time);
2729 PR_COPY_TIMESPEC(src, dest, pr_ctime);
2730 PR_COPY_BUF(src, dest, pr_fname);
2731 PR_COPY_BUF(src, dest, pr_psargs);
2732 PR_COPY_FIELD(src, dest, pr_wstat);
2733 PR_COPY_FIELD(src, dest, pr_argc);
2734 PR_COPY_FIELD_ILP32(src, dest, pr_argv);
2735 PR_COPY_FIELD_ILP32(src, dest, pr_envp);
2736 PR_COPY_FIELD(src, dest, pr_dmodel);
2737 PR_COPY_FIELD(src, dest, pr_taskid);
2738 PR_COPY_FIELD(src, dest, pr_projid);
2739 PR_COPY_FIELD(src, dest, pr_nzomb);
2740 PR_COPY_FIELD(src, dest, pr_poolid);
2741 PR_COPY_FIELD(src, dest, pr_contract);
2742 PR_COPY_FIELD(src, dest, pr_poolid);
2743 PR_COPY_FIELD(src, dest, pr_poolid);
2745 lwpsinfo_kto32(&src->pr_lwp, &dest->pr_lwp);
2748 #undef PR_COPY_FIELD
2749 #undef PR_COPY_FIELD_ILP32
2750 #undef PR_COPY_TIMESPEC
2751 #undef PR_COPY_BUF
2752 #undef PR_IGNORE_FIELD
2754 #endif /* _SYSCALL32_IMPL */
2757 * This used to get called when microstate accounting was disabled but
2758 * microstate information was requested. Since Microstate accounting is on
2759 * regardless of the proc flags, this simply makes it appear to procfs that
2760 * microstate accounting is on. This is relatively meaningless since you
2761 * can't turn it off, but this is here for the sake of appearances.
2764 /*ARGSUSED*/
2765 void
2766 estimate_msacct(kthread_t *t, hrtime_t curtime)
2768 proc_t *p;
2770 if (t == NULL)
2771 return;
2773 p = ttoproc(t);
2774 ASSERT(MUTEX_HELD(&p->p_lock));
2777 * A system process (p0) could be referenced if the thread is
2778 * in the process of exiting. Don't turn on microstate accounting
2779 * in that case.
2781 if (p->p_flag & SSYS)
2782 return;
2785 * Loop through all the LWPs (kernel threads) in the process.
2787 t = p->p_tlist;
2788 do {
2789 t->t_proc_flag |= TP_MSACCT;
2790 } while ((t = t->t_forw) != p->p_tlist);
2792 p->p_flag |= SMSACCT; /* set process-wide MSACCT */
2796 * It's not really possible to disable microstate accounting anymore.
2797 * However, this routine simply turns off the ms accounting flags in a process
2798 * This way procfs can still pretend to turn microstate accounting on and
2799 * off for a process, but it actually doesn't do anything. This is
2800 * a neutered form of preemptive idiot-proofing.
2802 void
2803 disable_msacct(proc_t *p)
2805 kthread_t *t;
2807 ASSERT(MUTEX_HELD(&p->p_lock));
2809 p->p_flag &= ~SMSACCT; /* clear process-wide MSACCT */
2811 * Loop through all the LWPs (kernel threads) in the process.
2813 if ((t = p->p_tlist) != NULL) {
2814 do {
2815 /* clear per-thread flag */
2816 t->t_proc_flag &= ~TP_MSACCT;
2817 } while ((t = t->t_forw) != p->p_tlist);
2822 * Return resource usage information.
2824 void
2825 prgetusage(kthread_t *t, prhusage_t *pup)
2827 klwp_t *lwp = ttolwp(t);
2828 hrtime_t *mstimep;
2829 struct mstate *ms = &lwp->lwp_mstate;
2830 int state;
2831 int i;
2832 hrtime_t curtime;
2833 hrtime_t waitrq;
2834 hrtime_t tmp1;
2836 curtime = gethrtime_unscaled();
2838 pup->pr_lwpid = t->t_tid;
2839 pup->pr_count = 1;
2840 pup->pr_create = ms->ms_start;
2841 pup->pr_term = ms->ms_term;
2842 scalehrtime(&pup->pr_create);
2843 scalehrtime(&pup->pr_term);
2844 if (ms->ms_term == 0) {
2845 pup->pr_rtime = curtime - ms->ms_start;
2846 scalehrtime(&pup->pr_rtime);
2847 } else {
2848 pup->pr_rtime = ms->ms_term - ms->ms_start;
2849 scalehrtime(&pup->pr_rtime);
2853 pup->pr_utime = ms->ms_acct[LMS_USER];
2854 pup->pr_stime = ms->ms_acct[LMS_SYSTEM];
2855 pup->pr_ttime = ms->ms_acct[LMS_TRAP];
2856 pup->pr_tftime = ms->ms_acct[LMS_TFAULT];
2857 pup->pr_dftime = ms->ms_acct[LMS_DFAULT];
2858 pup->pr_kftime = ms->ms_acct[LMS_KFAULT];
2859 pup->pr_ltime = ms->ms_acct[LMS_USER_LOCK];
2860 pup->pr_slptime = ms->ms_acct[LMS_SLEEP];
2861 pup->pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
2862 pup->pr_stoptime = ms->ms_acct[LMS_STOPPED];
2864 prscaleusage(pup);
2867 * Adjust for time waiting in the dispatcher queue.
2869 waitrq = t->t_waitrq; /* hopefully atomic */
2870 if (waitrq != 0) {
2871 if (waitrq > curtime) {
2872 curtime = gethrtime_unscaled();
2874 tmp1 = curtime - waitrq;
2875 scalehrtime(&tmp1);
2876 pup->pr_wtime += tmp1;
2877 curtime = waitrq;
2881 * Adjust for time spent in current microstate.
2883 if (ms->ms_state_start > curtime) {
2884 curtime = gethrtime_unscaled();
2887 i = 0;
2888 do {
2889 switch (state = t->t_mstate) {
2890 case LMS_SLEEP:
2892 * Update the timer for the current sleep state.
2894 switch (state = ms->ms_prev) {
2895 case LMS_TFAULT:
2896 case LMS_DFAULT:
2897 case LMS_KFAULT:
2898 case LMS_USER_LOCK:
2899 break;
2900 default:
2901 state = LMS_SLEEP;
2902 break;
2904 break;
2905 case LMS_TFAULT:
2906 case LMS_DFAULT:
2907 case LMS_KFAULT:
2908 case LMS_USER_LOCK:
2909 state = LMS_SYSTEM;
2910 break;
2912 switch (state) {
2913 case LMS_USER: mstimep = &pup->pr_utime; break;
2914 case LMS_SYSTEM: mstimep = &pup->pr_stime; break;
2915 case LMS_TRAP: mstimep = &pup->pr_ttime; break;
2916 case LMS_TFAULT: mstimep = &pup->pr_tftime; break;
2917 case LMS_DFAULT: mstimep = &pup->pr_dftime; break;
2918 case LMS_KFAULT: mstimep = &pup->pr_kftime; break;
2919 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break;
2920 case LMS_SLEEP: mstimep = &pup->pr_slptime; break;
2921 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break;
2922 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break;
2923 default: panic("prgetusage: unknown microstate");
2925 tmp1 = curtime - ms->ms_state_start;
2926 if (tmp1 < 0) {
2927 curtime = gethrtime_unscaled();
2928 i++;
2929 continue;
2931 scalehrtime(&tmp1);
2932 } while (tmp1 < 0 && i < MAX_ITERS_SPIN);
2934 *mstimep += tmp1;
2936 /* update pup timestamp */
2937 pup->pr_tstamp = curtime;
2938 scalehrtime(&pup->pr_tstamp);
2941 * Resource usage counters.
2943 pup->pr_minf = lwp->lwp_ru.minflt;
2944 pup->pr_majf = lwp->lwp_ru.majflt;
2945 pup->pr_nswap = lwp->lwp_ru.nswap;
2946 pup->pr_inblk = lwp->lwp_ru.inblock;
2947 pup->pr_oublk = lwp->lwp_ru.oublock;
2948 pup->pr_msnd = lwp->lwp_ru.msgsnd;
2949 pup->pr_mrcv = lwp->lwp_ru.msgrcv;
2950 pup->pr_sigs = lwp->lwp_ru.nsignals;
2951 pup->pr_vctx = lwp->lwp_ru.nvcsw;
2952 pup->pr_ictx = lwp->lwp_ru.nivcsw;
2953 pup->pr_sysc = lwp->lwp_ru.sysc;
2954 pup->pr_ioch = lwp->lwp_ru.ioch;
2958 * Convert ms_acct stats from unscaled high-res time to nanoseconds
2960 void
2961 prscaleusage(prhusage_t *usg)
2963 scalehrtime(&usg->pr_utime);
2964 scalehrtime(&usg->pr_stime);
2965 scalehrtime(&usg->pr_ttime);
2966 scalehrtime(&usg->pr_tftime);
2967 scalehrtime(&usg->pr_dftime);
2968 scalehrtime(&usg->pr_kftime);
2969 scalehrtime(&usg->pr_ltime);
2970 scalehrtime(&usg->pr_slptime);
2971 scalehrtime(&usg->pr_wtime);
2972 scalehrtime(&usg->pr_stoptime);
2977 * Sum resource usage information.
2979 void
2980 praddusage(kthread_t *t, prhusage_t *pup)
2982 klwp_t *lwp = ttolwp(t);
2983 hrtime_t *mstimep;
2984 struct mstate *ms = &lwp->lwp_mstate;
2985 int state;
2986 int i;
2987 hrtime_t curtime;
2988 hrtime_t waitrq;
2989 hrtime_t tmp;
2990 prhusage_t conv;
2992 curtime = gethrtime_unscaled();
2994 if (ms->ms_term == 0) {
2995 tmp = curtime - ms->ms_start;
2996 scalehrtime(&tmp);
2997 pup->pr_rtime += tmp;
2998 } else {
2999 tmp = ms->ms_term - ms->ms_start;
3000 scalehrtime(&tmp);
3001 pup->pr_rtime += tmp;
3004 conv.pr_utime = ms->ms_acct[LMS_USER];
3005 conv.pr_stime = ms->ms_acct[LMS_SYSTEM];
3006 conv.pr_ttime = ms->ms_acct[LMS_TRAP];
3007 conv.pr_tftime = ms->ms_acct[LMS_TFAULT];
3008 conv.pr_dftime = ms->ms_acct[LMS_DFAULT];
3009 conv.pr_kftime = ms->ms_acct[LMS_KFAULT];
3010 conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK];
3011 conv.pr_slptime = ms->ms_acct[LMS_SLEEP];
3012 conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
3013 conv.pr_stoptime = ms->ms_acct[LMS_STOPPED];
3015 prscaleusage(&conv);
3017 pup->pr_utime += conv.pr_utime;
3018 pup->pr_stime += conv.pr_stime;
3019 pup->pr_ttime += conv.pr_ttime;
3020 pup->pr_tftime += conv.pr_tftime;
3021 pup->pr_dftime += conv.pr_dftime;
3022 pup->pr_kftime += conv.pr_kftime;
3023 pup->pr_ltime += conv.pr_ltime;
3024 pup->pr_slptime += conv.pr_slptime;
3025 pup->pr_wtime += conv.pr_wtime;
3026 pup->pr_stoptime += conv.pr_stoptime;
3029 * Adjust for time waiting in the dispatcher queue.
3031 waitrq = t->t_waitrq; /* hopefully atomic */
3032 if (waitrq != 0) {
3033 if (waitrq > curtime) {
3034 curtime = gethrtime_unscaled();
3036 tmp = curtime - waitrq;
3037 scalehrtime(&tmp);
3038 pup->pr_wtime += tmp;
3039 curtime = waitrq;
3043 * Adjust for time spent in current microstate.
3045 if (ms->ms_state_start > curtime) {
3046 curtime = gethrtime_unscaled();
3049 i = 0;
3050 do {
3051 switch (state = t->t_mstate) {
3052 case LMS_SLEEP:
3054 * Update the timer for the current sleep state.
3056 switch (state = ms->ms_prev) {
3057 case LMS_TFAULT:
3058 case LMS_DFAULT:
3059 case LMS_KFAULT:
3060 case LMS_USER_LOCK:
3061 break;
3062 default:
3063 state = LMS_SLEEP;
3064 break;
3066 break;
3067 case LMS_TFAULT:
3068 case LMS_DFAULT:
3069 case LMS_KFAULT:
3070 case LMS_USER_LOCK:
3071 state = LMS_SYSTEM;
3072 break;
3074 switch (state) {
3075 case LMS_USER: mstimep = &pup->pr_utime; break;
3076 case LMS_SYSTEM: mstimep = &pup->pr_stime; break;
3077 case LMS_TRAP: mstimep = &pup->pr_ttime; break;
3078 case LMS_TFAULT: mstimep = &pup->pr_tftime; break;
3079 case LMS_DFAULT: mstimep = &pup->pr_dftime; break;
3080 case LMS_KFAULT: mstimep = &pup->pr_kftime; break;
3081 case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break;
3082 case LMS_SLEEP: mstimep = &pup->pr_slptime; break;
3083 case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break;
3084 case LMS_STOPPED: mstimep = &pup->pr_stoptime; break;
3085 default: panic("praddusage: unknown microstate");
3087 tmp = curtime - ms->ms_state_start;
3088 if (tmp < 0) {
3089 curtime = gethrtime_unscaled();
3090 i++;
3091 continue;
3093 scalehrtime(&tmp);
3094 } while (tmp < 0 && i < MAX_ITERS_SPIN);
3096 *mstimep += tmp;
3098 /* update pup timestamp */
3099 pup->pr_tstamp = curtime;
3100 scalehrtime(&pup->pr_tstamp);
3103 * Resource usage counters.
3105 pup->pr_minf += lwp->lwp_ru.minflt;
3106 pup->pr_majf += lwp->lwp_ru.majflt;
3107 pup->pr_nswap += lwp->lwp_ru.nswap;
3108 pup->pr_inblk += lwp->lwp_ru.inblock;
3109 pup->pr_oublk += lwp->lwp_ru.oublock;
3110 pup->pr_msnd += lwp->lwp_ru.msgsnd;
3111 pup->pr_mrcv += lwp->lwp_ru.msgrcv;
3112 pup->pr_sigs += lwp->lwp_ru.nsignals;
3113 pup->pr_vctx += lwp->lwp_ru.nvcsw;
3114 pup->pr_ictx += lwp->lwp_ru.nivcsw;
3115 pup->pr_sysc += lwp->lwp_ru.sysc;
3116 pup->pr_ioch += lwp->lwp_ru.ioch;
3120 * Convert a prhusage_t to a prusage_t.
3121 * This means convert each hrtime_t to a timestruc_t
3122 * and copy the count fields uint64_t => ulong_t.
3124 void
3125 prcvtusage(prhusage_t *pup, prusage_t *upup)
3127 uint64_t *ullp;
3128 ulong_t *ulp;
3129 int i;
3131 upup->pr_lwpid = pup->pr_lwpid;
3132 upup->pr_count = pup->pr_count;
3134 hrt2ts(pup->pr_tstamp, &upup->pr_tstamp);
3135 hrt2ts(pup->pr_create, &upup->pr_create);
3136 hrt2ts(pup->pr_term, &upup->pr_term);
3137 hrt2ts(pup->pr_rtime, &upup->pr_rtime);
3138 hrt2ts(pup->pr_utime, &upup->pr_utime);
3139 hrt2ts(pup->pr_stime, &upup->pr_stime);
3140 hrt2ts(pup->pr_ttime, &upup->pr_ttime);
3141 hrt2ts(pup->pr_tftime, &upup->pr_tftime);
3142 hrt2ts(pup->pr_dftime, &upup->pr_dftime);
3143 hrt2ts(pup->pr_kftime, &upup->pr_kftime);
3144 hrt2ts(pup->pr_ltime, &upup->pr_ltime);
3145 hrt2ts(pup->pr_slptime, &upup->pr_slptime);
3146 hrt2ts(pup->pr_wtime, &upup->pr_wtime);
3147 hrt2ts(pup->pr_stoptime, &upup->pr_stoptime);
3148 bzero(upup->filltime, sizeof (upup->filltime));
3150 ullp = &pup->pr_minf;
3151 ulp = &upup->pr_minf;
3152 for (i = 0; i < 22; i++)
3153 *ulp++ = (ulong_t)*ullp++;
3156 #ifdef _SYSCALL32_IMPL
3157 void
3158 prcvtusage32(prhusage_t *pup, prusage32_t *upup)
3160 uint64_t *ullp;
3161 uint32_t *ulp;
3162 int i;
3164 upup->pr_lwpid = pup->pr_lwpid;
3165 upup->pr_count = pup->pr_count;
3167 hrt2ts32(pup->pr_tstamp, &upup->pr_tstamp);
3168 hrt2ts32(pup->pr_create, &upup->pr_create);
3169 hrt2ts32(pup->pr_term, &upup->pr_term);
3170 hrt2ts32(pup->pr_rtime, &upup->pr_rtime);
3171 hrt2ts32(pup->pr_utime, &upup->pr_utime);
3172 hrt2ts32(pup->pr_stime, &upup->pr_stime);
3173 hrt2ts32(pup->pr_ttime, &upup->pr_ttime);
3174 hrt2ts32(pup->pr_tftime, &upup->pr_tftime);
3175 hrt2ts32(pup->pr_dftime, &upup->pr_dftime);
3176 hrt2ts32(pup->pr_kftime, &upup->pr_kftime);
3177 hrt2ts32(pup->pr_ltime, &upup->pr_ltime);
3178 hrt2ts32(pup->pr_slptime, &upup->pr_slptime);
3179 hrt2ts32(pup->pr_wtime, &upup->pr_wtime);
3180 hrt2ts32(pup->pr_stoptime, &upup->pr_stoptime);
3181 bzero(upup->filltime, sizeof (upup->filltime));
3183 ullp = &pup->pr_minf;
3184 ulp = &upup->pr_minf;
3185 for (i = 0; i < 22; i++)
3186 *ulp++ = (uint32_t)*ullp++;
3188 #endif /* _SYSCALL32_IMPL */
3191 * Determine whether a set is empty.
3194 setisempty(uint32_t *sp, uint_t n)
3196 while (n--)
3197 if (*sp++)
3198 return (0);
3199 return (1);
3203 * Utility routine for establishing a watched area in the process.
3204 * Keep the list of watched areas sorted by virtual address.
3207 set_watched_area(proc_t *p, struct watched_area *pwa)
3209 caddr_t vaddr = pwa->wa_vaddr;
3210 caddr_t eaddr = pwa->wa_eaddr;
3211 ulong_t flags = pwa->wa_flags;
3212 struct watched_area *target;
3213 avl_index_t where;
3214 int error = 0;
3216 /* we must not be holding p->p_lock, but the process must be locked */
3217 ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3218 ASSERT(p->p_proc_flag & P_PR_LOCK);
3221 * If this is our first watchpoint, enable watchpoints for the process.
3223 if (!pr_watch_active(p)) {
3224 kthread_t *t;
3226 mutex_enter(&p->p_lock);
3227 if ((t = p->p_tlist) != NULL) {
3228 do {
3229 watch_enable(t);
3230 } while ((t = t->t_forw) != p->p_tlist);
3232 mutex_exit(&p->p_lock);
3235 target = pr_find_watched_area(p, pwa, &where);
3236 if (target != NULL) {
3238 * We discovered an existing, overlapping watched area.
3239 * Allow it only if it is an exact match.
3241 if (target->wa_vaddr != vaddr ||
3242 target->wa_eaddr != eaddr)
3243 error = EINVAL;
3244 else if (target->wa_flags != flags) {
3245 error = set_watched_page(p, vaddr, eaddr,
3246 flags, target->wa_flags);
3247 target->wa_flags = flags;
3249 kmem_free(pwa, sizeof (struct watched_area));
3250 } else {
3251 avl_insert(&p->p_warea, pwa, where);
3252 error = set_watched_page(p, vaddr, eaddr, flags, 0);
3255 return (error);
3259 * Utility routine for clearing a watched area in the process.
3260 * Must be an exact match of the virtual address.
3261 * size and flags don't matter.
3264 clear_watched_area(proc_t *p, struct watched_area *pwa)
3266 struct watched_area *found;
3268 /* we must not be holding p->p_lock, but the process must be locked */
3269 ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3270 ASSERT(p->p_proc_flag & P_PR_LOCK);
3273 if (!pr_watch_active(p)) {
3274 kmem_free(pwa, sizeof (struct watched_area));
3275 return (0);
3279 * Look for a matching address in the watched areas. If a match is
3280 * found, clear the old watched area and adjust the watched page(s). It
3281 * is not an error if there is no match.
3283 if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL &&
3284 found->wa_vaddr == pwa->wa_vaddr) {
3285 clear_watched_page(p, found->wa_vaddr, found->wa_eaddr,
3286 found->wa_flags);
3287 avl_remove(&p->p_warea, found);
3288 kmem_free(found, sizeof (struct watched_area));
3291 kmem_free(pwa, sizeof (struct watched_area));
3294 * If we removed the last watched area from the process, disable
3295 * watchpoints.
3297 if (!pr_watch_active(p)) {
3298 kthread_t *t;
3300 mutex_enter(&p->p_lock);
3301 if ((t = p->p_tlist) != NULL) {
3302 do {
3303 watch_disable(t);
3304 } while ((t = t->t_forw) != p->p_tlist);
3306 mutex_exit(&p->p_lock);
3309 return (0);
3313 * Frees all the watched_area structures
3315 void
3316 pr_free_watchpoints(proc_t *p)
3318 struct watched_area *delp;
3319 void *cookie;
3321 cookie = NULL;
3322 while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL)
3323 kmem_free(delp, sizeof (struct watched_area));
3325 avl_destroy(&p->p_warea);
3329 * This one is called by the traced process to unwatch all the
3330 * pages while deallocating the list of watched_page structs.
3332 void
3333 pr_free_watched_pages(proc_t *p)
3335 struct as *as = p->p_as;
3336 struct watched_page *pwp;
3337 uint_t prot;
3338 int retrycnt, err;
3339 void *cookie;
3341 if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
3342 return;
3344 ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
3345 AS_LOCK_ENTER(as, RW_WRITER);
3347 pwp = avl_first(&as->a_wpage);
3349 cookie = NULL;
3350 while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
3351 retrycnt = 0;
3352 if ((prot = pwp->wp_oprot) != 0) {
3353 caddr_t addr = pwp->wp_vaddr;
3354 struct seg *seg;
3355 retry:
3357 if ((pwp->wp_prot != prot ||
3358 (pwp->wp_flags & WP_NOWATCH)) &&
3359 (seg = as_segat(as, addr)) != NULL) {
3360 err = segop_setprot(seg, addr, PAGESIZE, prot);
3361 if (err == IE_RETRY) {
3362 ASSERT(retrycnt == 0);
3363 retrycnt++;
3364 goto retry;
3368 kmem_free(pwp, sizeof (struct watched_page));
3371 avl_destroy(&as->a_wpage);
3372 p->p_wprot = NULL;
3374 AS_LOCK_EXIT(as);
3378 * Insert a watched area into the list of watched pages.
3379 * If oflags is zero then we are adding a new watched area.
3380 * Otherwise we are changing the flags of an existing watched area.
3382 static int
3383 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr,
3384 ulong_t flags, ulong_t oflags)
3386 struct as *as = p->p_as;
3387 avl_tree_t *pwp_tree;
3388 struct watched_page *pwp, *newpwp;
3389 struct watched_page tpw;
3390 avl_index_t where;
3391 struct seg *seg;
3392 uint_t prot;
3393 caddr_t addr;
3396 * We need to pre-allocate a list of structures before we grab the
3397 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks
3398 * held.
3400 newpwp = NULL;
3401 for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3402 addr < eaddr; addr += PAGESIZE) {
3403 pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP);
3404 pwp->wp_list = newpwp;
3405 newpwp = pwp;
3408 AS_LOCK_ENTER(as, RW_WRITER);
3411 * Search for an existing watched page to contain the watched area.
3412 * If none is found, grab a new one from the available list
3413 * and insert it in the active list, keeping the list sorted
3414 * by user-level virtual address.
3416 if (p->p_flag & SVFWAIT)
3417 pwp_tree = &p->p_wpage;
3418 else
3419 pwp_tree = &as->a_wpage;
3421 again:
3422 if (avl_numnodes(pwp_tree) > prnwatch) {
3423 AS_LOCK_EXIT(as);
3424 while (newpwp != NULL) {
3425 pwp = newpwp->wp_list;
3426 kmem_free(newpwp, sizeof (struct watched_page));
3427 newpwp = pwp;
3429 return (E2BIG);
3432 tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3433 if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) {
3434 pwp = newpwp;
3435 newpwp = newpwp->wp_list;
3436 pwp->wp_list = NULL;
3437 pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr &
3438 (uintptr_t)PAGEMASK);
3439 avl_insert(pwp_tree, pwp, where);
3442 ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE);
3444 if (oflags & WA_READ)
3445 pwp->wp_read--;
3446 if (oflags & WA_WRITE)
3447 pwp->wp_write--;
3448 if (oflags & WA_EXEC)
3449 pwp->wp_exec--;
3451 ASSERT(pwp->wp_read >= 0);
3452 ASSERT(pwp->wp_write >= 0);
3453 ASSERT(pwp->wp_exec >= 0);
3455 if (flags & WA_READ)
3456 pwp->wp_read++;
3457 if (flags & WA_WRITE)
3458 pwp->wp_write++;
3459 if (flags & WA_EXEC)
3460 pwp->wp_exec++;
3462 if (!(p->p_flag & SVFWAIT)) {
3463 vaddr = pwp->wp_vaddr;
3464 if (pwp->wp_oprot == 0 &&
3465 (seg = as_segat(as, vaddr)) != NULL) {
3466 (void) segop_getprot(seg, vaddr, 0, &prot);
3467 pwp->wp_oprot = (uchar_t)prot;
3468 pwp->wp_prot = (uchar_t)prot;
3470 if (pwp->wp_oprot != 0) {
3471 prot = pwp->wp_oprot;
3472 if (pwp->wp_read)
3473 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3474 if (pwp->wp_write)
3475 prot &= ~PROT_WRITE;
3476 if (pwp->wp_exec)
3477 prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3478 if (!(pwp->wp_flags & WP_NOWATCH) &&
3479 pwp->wp_prot != prot &&
3480 (pwp->wp_flags & WP_SETPROT) == 0) {
3481 pwp->wp_flags |= WP_SETPROT;
3482 pwp->wp_list = p->p_wprot;
3483 p->p_wprot = pwp;
3485 pwp->wp_prot = (uchar_t)prot;
3490 * If the watched area extends into the next page then do
3491 * it over again with the virtual address of the next page.
3493 if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr)
3494 goto again;
3496 AS_LOCK_EXIT(as);
3499 * Free any pages we may have over-allocated
3501 while (newpwp != NULL) {
3502 pwp = newpwp->wp_list;
3503 kmem_free(newpwp, sizeof (struct watched_page));
3504 newpwp = pwp;
3507 return (0);
3511 * Remove a watched area from the list of watched pages.
3512 * A watched area may extend over more than one page.
3514 static void
3515 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags)
3517 struct as *as = p->p_as;
3518 struct watched_page *pwp;
3519 struct watched_page tpw;
3520 avl_tree_t *tree;
3521 avl_index_t where;
3523 AS_LOCK_ENTER(as, RW_WRITER);
3525 if (p->p_flag & SVFWAIT)
3526 tree = &p->p_wpage;
3527 else
3528 tree = &as->a_wpage;
3530 tpw.wp_vaddr = vaddr =
3531 (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
3532 pwp = avl_find(tree, &tpw, &where);
3533 if (pwp == NULL)
3534 pwp = avl_nearest(tree, where, AVL_AFTER);
3536 while (pwp != NULL && pwp->wp_vaddr < eaddr) {
3537 ASSERT(vaddr <= pwp->wp_vaddr);
3539 if (flags & WA_READ)
3540 pwp->wp_read--;
3541 if (flags & WA_WRITE)
3542 pwp->wp_write--;
3543 if (flags & WA_EXEC)
3544 pwp->wp_exec--;
3546 if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) {
3548 * Reset the hat layer's protections on this page.
3550 if (pwp->wp_oprot != 0) {
3551 uint_t prot = pwp->wp_oprot;
3553 if (pwp->wp_read)
3554 prot &=
3555 ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3556 if (pwp->wp_write)
3557 prot &= ~PROT_WRITE;
3558 if (pwp->wp_exec)
3559 prot &=
3560 ~(PROT_READ|PROT_WRITE|PROT_EXEC);
3561 if (!(pwp->wp_flags & WP_NOWATCH) &&
3562 pwp->wp_prot != prot &&
3563 (pwp->wp_flags & WP_SETPROT) == 0) {
3564 pwp->wp_flags |= WP_SETPROT;
3565 pwp->wp_list = p->p_wprot;
3566 p->p_wprot = pwp;
3568 pwp->wp_prot = (uchar_t)prot;
3570 } else {
3572 * No watched areas remain in this page.
3573 * Reset everything to normal.
3575 if (pwp->wp_oprot != 0) {
3576 pwp->wp_prot = pwp->wp_oprot;
3577 if ((pwp->wp_flags & WP_SETPROT) == 0) {
3578 pwp->wp_flags |= WP_SETPROT;
3579 pwp->wp_list = p->p_wprot;
3580 p->p_wprot = pwp;
3585 pwp = AVL_NEXT(tree, pwp);
3588 AS_LOCK_EXIT(as);
3592 * Return the original protections for the specified page.
3594 static void
3595 getwatchprot(struct as *as, caddr_t addr, uint_t *prot)
3597 struct watched_page *pwp;
3598 struct watched_page tpw;
3600 ASSERT(AS_LOCK_HELD(as));
3602 tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
3603 if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL)
3604 *prot = pwp->wp_oprot;
3607 static prpagev_t *
3608 pr_pagev_create(struct seg *seg, int check_noreserve)
3610 prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP);
3611 size_t total_pages = seg_pages(seg);
3614 * Limit the size of our vectors to pagev_lim pages at a time. We need
3615 * 4 or 5 bytes of storage per page, so this means we limit ourself
3616 * to about a megabyte of kernel heap by default.
3618 pagev->pg_npages = MIN(total_pages, pagev_lim);
3619 pagev->pg_pnbase = 0;
3621 pagev->pg_protv =
3622 kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP);
3624 if (check_noreserve)
3625 pagev->pg_incore =
3626 kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP);
3627 else
3628 pagev->pg_incore = NULL;
3630 return (pagev);
3633 static void
3634 pr_pagev_destroy(prpagev_t *pagev)
3636 if (pagev->pg_incore != NULL)
3637 kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char));
3639 kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t));
3640 kmem_free(pagev, sizeof (prpagev_t));
3643 static caddr_t
3644 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr)
3646 ulong_t lastpg = seg_page(seg, eaddr - 1);
3647 ulong_t pn, pnlim;
3648 caddr_t saddr;
3649 size_t len;
3651 ASSERT(addr >= seg->s_base && addr <= eaddr);
3653 if (addr == eaddr)
3654 return (eaddr);
3656 refill:
3657 ASSERT(addr < eaddr);
3658 pagev->pg_pnbase = seg_page(seg, addr);
3659 pnlim = pagev->pg_pnbase + pagev->pg_npages;
3660 saddr = addr;
3662 if (lastpg < pnlim)
3663 len = (size_t)(eaddr - addr);
3664 else
3665 len = pagev->pg_npages * PAGESIZE;
3667 if (pagev->pg_incore != NULL) {
3669 * INCORE cleverly has different semantics than GETPROT:
3670 * it returns info on pages up to but NOT including addr + len.
3672 (void) segop_incore(seg, addr, len, pagev->pg_incore);
3673 pn = pagev->pg_pnbase;
3675 do {
3677 * Guilty knowledge here: We know that segvn_incore
3678 * returns more than just the low-order bit that
3679 * indicates the page is actually in memory. If any
3680 * bits are set, then the page has backing store.
3682 if (pagev->pg_incore[pn++ - pagev->pg_pnbase])
3683 goto out;
3685 } while ((addr += PAGESIZE) < eaddr && pn < pnlim);
3688 * If we examined all the pages in the vector but we're not
3689 * at the end of the segment, take another lap.
3691 if (addr < eaddr)
3692 goto refill;
3696 * Need to take len - 1 because addr + len is the address of the
3697 * first byte of the page just past the end of what we want.
3699 out:
3700 (void) segop_getprot(seg, saddr, len - 1, pagev->pg_protv);
3701 return (addr);
3704 static caddr_t
3705 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg,
3706 caddr_t *saddrp, caddr_t eaddr, uint_t *protp)
3709 * Our starting address is either the specified address, or the base
3710 * address from the start of the pagev. If the latter is greater,
3711 * this means a previous call to pr_pagev_fill has already scanned
3712 * further than the end of the previous mapping.
3714 caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE;
3715 caddr_t addr = MAX(*saddrp, base);
3716 ulong_t pn = seg_page(seg, addr);
3717 uint_t prot, nprot;
3720 * If we're dealing with noreserve pages, then advance addr to
3721 * the address of the next page which has backing store.
3723 if (pagev->pg_incore != NULL) {
3724 while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) {
3725 if ((addr += PAGESIZE) == eaddr) {
3726 *saddrp = addr;
3727 prot = 0;
3728 goto out;
3730 if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3731 addr = pr_pagev_fill(pagev, seg, addr, eaddr);
3732 if (addr == eaddr) {
3733 *saddrp = addr;
3734 prot = 0;
3735 goto out;
3737 pn = seg_page(seg, addr);
3743 * Get the protections on the page corresponding to addr.
3745 pn = seg_page(seg, addr);
3746 ASSERT(pn >= pagev->pg_pnbase);
3747 ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages));
3749 prot = pagev->pg_protv[pn - pagev->pg_pnbase];
3750 getwatchprot(seg->s_as, addr, &prot);
3751 *saddrp = addr;
3754 * Now loop until we find a backed page with different protections
3755 * or we reach the end of this segment.
3757 while ((addr += PAGESIZE) < eaddr) {
3759 * If pn has advanced to the page number following what we
3760 * have information on, refill the page vector and reset
3761 * addr and pn. If pr_pagev_fill does not return the
3762 * address of the next page, we have a discontiguity and
3763 * thus have reached the end of the current mapping.
3765 if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
3766 caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr);
3767 if (naddr != addr)
3768 goto out;
3769 pn = seg_page(seg, addr);
3773 * The previous page's protections are in prot, and it has
3774 * backing. If this page is MAP_NORESERVE and has no backing,
3775 * then end this mapping and return the previous protections.
3777 if (pagev->pg_incore != NULL &&
3778 pagev->pg_incore[pn - pagev->pg_pnbase] == 0)
3779 break;
3782 * Otherwise end the mapping if this page's protections (nprot)
3783 * are different than those in the previous page (prot).
3785 nprot = pagev->pg_protv[pn - pagev->pg_pnbase];
3786 getwatchprot(seg->s_as, addr, &nprot);
3788 if (nprot != prot)
3789 break;
3792 out:
3793 *protp = prot;
3794 return (addr);
3797 size_t
3798 pr_getsegsize(struct seg *seg, int reserved)
3800 size_t size = seg->s_size;
3803 * If we're interested in the reserved space, return the size of the
3804 * segment itself. Everything else in this function is a special case
3805 * to determine the actual underlying size of various segment types.
3807 if (reserved)
3808 return (size);
3811 * If this is a segvn mapping of a regular file, return the smaller
3812 * of the segment size and the remaining size of the file beyond
3813 * the file offset corresponding to seg->s_base.
3815 if (seg->s_ops == &segvn_ops) {
3816 vattr_t vattr;
3817 vnode_t *vp;
3819 vattr.va_mask = VATTR_SIZE;
3821 if (segop_getvp(seg, seg->s_base, &vp) == 0 &&
3822 vp != NULL && vp->v_type == VREG &&
3823 fop_getattr(vp, &vattr, 0, CRED(), NULL) == 0) {
3825 uoff_t fsize = vattr.va_size;
3826 uoff_t offset = segop_getoffset(seg, seg->s_base);
3828 if (fsize < offset)
3829 fsize = 0;
3830 else
3831 fsize -= offset;
3833 fsize = roundup(fsize, (uoff_t)PAGESIZE);
3835 if (fsize < (uoff_t)size)
3836 size = (size_t)fsize;
3839 return (size);
3843 * If this is an ISM shared segment, don't include pages that are
3844 * beyond the real size of the spt segment that backs it.
3846 if (seg->s_ops == &segspt_shmops)
3847 return (MIN(spt_realsize(seg), size));
3850 * If this is segment is a mapping from /dev/null, then this is a
3851 * reservation of virtual address space and has no actual size.
3852 * Such segments are backed by segdev and have type set to neither
3853 * MAP_SHARED nor MAP_PRIVATE.
3855 if (seg->s_ops == &segdev_ops &&
3856 ((segop_gettype(seg, seg->s_base) &
3857 (MAP_SHARED | MAP_PRIVATE)) == 0))
3858 return (0);
3861 * If this segment doesn't match one of the special types we handle,
3862 * just return the size of the segment itself.
3864 return (size);
3867 uint_t
3868 pr_getprot(struct seg *seg, int reserved, void **tmp,
3869 caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
3871 struct as *as = seg->s_as;
3873 caddr_t saddr = *saddrp;
3874 caddr_t naddr;
3876 int check_noreserve;
3877 uint_t prot;
3879 union {
3880 struct segvn_data *svd;
3881 struct segdev_data *sdp;
3882 void *data;
3883 } s;
3885 s.data = seg->s_data;
3887 ASSERT(AS_WRITE_HELD(as));
3888 ASSERT(saddr >= seg->s_base && saddr < eaddr);
3889 ASSERT(eaddr <= seg->s_base + seg->s_size);
3892 * Don't include MAP_NORESERVE pages in the address range
3893 * unless their mappings have actually materialized.
3894 * We cheat by knowing that segvn is the only segment
3895 * driver that supports MAP_NORESERVE.
3897 check_noreserve =
3898 (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL &&
3899 (s.svd->vp == NULL || s.svd->vp->v_type != VREG) &&
3900 (s.svd->flags & MAP_NORESERVE));
3903 * Examine every page only as a last resort. We use guilty knowledge
3904 * of segvn and segdev to avoid this: if there are no per-page
3905 * protections present in the segment and we don't care about
3906 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment.
3908 if (!check_noreserve && saddr == seg->s_base &&
3909 seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) {
3910 prot = s.svd->prot;
3911 getwatchprot(as, saddr, &prot);
3912 naddr = eaddr;
3914 } else if (saddr == seg->s_base && seg->s_ops == &segdev_ops &&
3915 s.sdp != NULL && s.sdp->pageprot == 0) {
3916 prot = s.sdp->prot;
3917 getwatchprot(as, saddr, &prot);
3918 naddr = eaddr;
3920 } else {
3921 prpagev_t *pagev;
3924 * If addr is sitting at the start of the segment, then
3925 * create a page vector to store protection and incore
3926 * information for pages in the segment, and fill it.
3927 * Otherwise, we expect *tmp to address the prpagev_t
3928 * allocated by a previous call to this function.
3930 if (saddr == seg->s_base) {
3931 pagev = pr_pagev_create(seg, check_noreserve);
3932 saddr = pr_pagev_fill(pagev, seg, saddr, eaddr);
3934 ASSERT(*tmp == NULL);
3935 *tmp = pagev;
3937 ASSERT(saddr <= eaddr);
3938 *saddrp = saddr;
3940 if (saddr == eaddr) {
3941 naddr = saddr;
3942 prot = 0;
3943 goto out;
3946 } else {
3947 ASSERT(*tmp != NULL);
3948 pagev = (prpagev_t *)*tmp;
3951 naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot);
3952 ASSERT(naddr <= eaddr);
3955 out:
3956 if (naddr == eaddr)
3957 pr_getprot_done(tmp);
3958 *naddrp = naddr;
3959 return (prot);
3962 void
3963 pr_getprot_done(void **tmp)
3965 if (*tmp != NULL) {
3966 pr_pagev_destroy((prpagev_t *)*tmp);
3967 *tmp = NULL;
3972 * Return true iff the vnode is a /proc file from the object directory.
3975 pr_isobject(vnode_t *vp)
3977 return (vn_matchops(vp, &prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT);
3981 * Return true iff the vnode is a /proc file opened by the process itself.
3984 pr_isself(vnode_t *vp)
3987 * XXX: To retain binary compatibility with the old
3988 * ioctl()-based version of /proc, we exempt self-opens
3989 * of /proc/<pid> from being marked close-on-exec.
3991 return (vn_matchops(vp, &prvnodeops) &&
3992 (VTOP(vp)->pr_flags & PR_ISSELF) &&
3993 VTOP(vp)->pr_type != PR_PIDDIR);
3996 static ssize_t
3997 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr)
3999 ssize_t pagesize, hatsize;
4001 ASSERT(AS_WRITE_HELD(seg->s_as));
4002 ASSERT(IS_P2ALIGNED(saddr, PAGESIZE));
4003 ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE));
4004 ASSERT(saddr < eaddr);
4006 pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr);
4007 ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize));
4008 ASSERT(pagesize != 0);
4010 if (pagesize == -1)
4011 pagesize = PAGESIZE;
4013 saddr += P2NPHASE((uintptr_t)saddr, pagesize);
4015 while (saddr < eaddr) {
4016 if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr))
4017 break;
4018 ASSERT(IS_P2ALIGNED(saddr, pagesize));
4019 saddr += pagesize;
4022 *naddrp = ((saddr < eaddr) ? saddr : eaddr);
4023 return (hatsize);
4027 * Return an array of structures with extended memory map information.
4028 * We allocate here; the caller must deallocate.
4031 prgetxmap(proc_t *p, list_t *iolhead)
4033 struct as *as = p->p_as;
4034 prxmap_t *mp;
4035 struct seg *seg;
4036 struct seg *brkseg, *stkseg;
4037 struct vnode *vp;
4038 struct vattr vattr;
4039 uint_t prot;
4041 ASSERT(as != &kas && AS_WRITE_HELD(as));
4044 * Request an initial buffer size that doesn't waste memory
4045 * if the address space has only a small number of segments.
4047 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4049 if ((seg = AS_SEGFIRST(as)) == NULL)
4050 return (0);
4052 brkseg = break_seg(p);
4053 stkseg = as_segat(as, prgetstackbase(p));
4055 do {
4056 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4057 caddr_t saddr, naddr, baddr;
4058 void *tmp = NULL;
4059 ssize_t psz;
4060 char *parr;
4061 uint64_t npages;
4062 uint64_t pagenum;
4064 if ((seg->s_flags & S_HOLE) != 0) {
4065 continue;
4068 * Segment loop part one: iterate from the base of the segment
4069 * to its end, pausing at each address boundary (baddr) between
4070 * ranges that have different virtual memory protections.
4072 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4073 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4074 ASSERT(baddr >= saddr && baddr <= eaddr);
4077 * Segment loop part two: iterate from the current
4078 * position to the end of the protection boundary,
4079 * pausing at each address boundary (naddr) between
4080 * ranges that have different underlying page sizes.
4082 for (; saddr < baddr; saddr = naddr) {
4083 psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4084 ASSERT(naddr >= saddr && naddr <= baddr);
4086 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4088 mp->pr_vaddr = (uintptr_t)saddr;
4089 mp->pr_size = naddr - saddr;
4090 mp->pr_offset = segop_getoffset(seg, saddr);
4091 mp->pr_mflags = 0;
4092 if (prot & PROT_READ)
4093 mp->pr_mflags |= MA_READ;
4094 if (prot & PROT_WRITE)
4095 mp->pr_mflags |= MA_WRITE;
4096 if (prot & PROT_EXEC)
4097 mp->pr_mflags |= MA_EXEC;
4098 if (segop_gettype(seg, saddr) & MAP_SHARED)
4099 mp->pr_mflags |= MA_SHARED;
4100 if (segop_gettype(seg, saddr) & MAP_NORESERVE)
4101 mp->pr_mflags |= MA_NORESERVE;
4102 if (seg->s_ops == &segspt_shmops ||
4103 (seg->s_ops == &segvn_ops &&
4104 (segop_getvp(seg, saddr, &vp) != 0 ||
4105 vp == NULL)))
4106 mp->pr_mflags |= MA_ANON;
4107 if (seg == brkseg)
4108 mp->pr_mflags |= MA_BREAK;
4109 else if (seg == stkseg)
4110 mp->pr_mflags |= MA_STACK;
4111 if (seg->s_ops == &segspt_shmops)
4112 mp->pr_mflags |= MA_ISM | MA_SHM;
4114 mp->pr_pagesize = PAGESIZE;
4115 if (psz == -1) {
4116 mp->pr_hatpagesize = 0;
4117 } else {
4118 mp->pr_hatpagesize = psz;
4122 * Manufacture a filename for the "object" dir.
4124 mp->pr_dev = PRNODEV;
4125 vattr.va_mask = VATTR_FSID|VATTR_NODEID;
4126 if (seg->s_ops == &segvn_ops &&
4127 segop_getvp(seg, saddr, &vp) == 0 &&
4128 vp != NULL && vp->v_type == VREG &&
4129 fop_getattr(vp, &vattr, 0, CRED(),
4130 NULL) == 0) {
4131 mp->pr_dev = vattr.va_fsid;
4132 mp->pr_ino = vattr.va_nodeid;
4133 if (vp == p->p_exec)
4134 (void) strcpy(mp->pr_mapname,
4135 "a.out");
4136 else
4137 pr_object_name(mp->pr_mapname,
4138 vp, &vattr);
4142 * Get the SysV shared memory id, if any.
4144 if ((mp->pr_mflags & MA_SHARED) &&
4145 p->p_segacct && (mp->pr_shmid = shmgetid(p,
4146 seg->s_base)) != SHMID_NONE) {
4147 if (mp->pr_shmid == SHMID_FREE)
4148 mp->pr_shmid = -1;
4150 mp->pr_mflags |= MA_SHM;
4151 } else {
4152 mp->pr_shmid = -1;
4155 npages = ((uintptr_t)(naddr - saddr)) >>
4156 PAGESHIFT;
4157 parr = kmem_zalloc(npages, KM_SLEEP);
4159 (void) segop_incore(seg, saddr, naddr - saddr,
4160 parr);
4162 for (pagenum = 0; pagenum < npages; pagenum++) {
4163 if (parr[pagenum] & SEG_PAGE_INCORE)
4164 mp->pr_rss++;
4165 if (parr[pagenum] & SEG_PAGE_ANON)
4166 mp->pr_anon++;
4167 if (parr[pagenum] & SEG_PAGE_LOCKED)
4168 mp->pr_locked++;
4170 kmem_free(parr, npages);
4173 ASSERT(tmp == NULL);
4174 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4176 return (0);
4180 * Return the process's credentials. We don't need a 32-bit equivalent of
4181 * this function because prcred_t and prcred32_t are actually the same.
4183 void
4184 prgetcred(proc_t *p, prcred_t *pcrp)
4186 mutex_enter(&p->p_crlock);
4187 cred2prcred(p->p_cred, pcrp);
4188 mutex_exit(&p->p_crlock);
4191 void
4192 prgetsecflags(proc_t *p, prsecflags_t *psfp)
4194 ASSERT(psfp != NULL);
4196 psfp->pr_version = PRSECFLAGS_VERSION_CURRENT;
4197 psfp->pr_lower = p->p_secflags.psf_lower;
4198 psfp->pr_upper = p->p_secflags.psf_upper;
4199 psfp->pr_effective = p->p_secflags.psf_effective;
4200 psfp->pr_inherit = p->p_secflags.psf_inherit;
4204 * Compute actual size of the prpriv_t structure.
4207 size_t
4208 prgetprivsize(void)
4210 return (priv_prgetprivsize(NULL));
4214 * Return the process's privileges. We don't need a 32-bit equivalent of
4215 * this function because prpriv_t and prpriv32_t are actually the same.
4217 void
4218 prgetpriv(proc_t *p, prpriv_t *pprp)
4220 mutex_enter(&p->p_crlock);
4221 cred2prpriv(p->p_cred, pprp);
4222 mutex_exit(&p->p_crlock);
4225 #ifdef _SYSCALL32_IMPL
4227 * Return an array of structures with HAT memory map information.
4228 * We allocate here; the caller must deallocate.
4231 prgetxmap32(proc_t *p, list_t *iolhead)
4233 struct as *as = p->p_as;
4234 prxmap32_t *mp;
4235 struct seg *seg;
4236 struct seg *brkseg, *stkseg;
4237 struct vnode *vp;
4238 struct vattr vattr;
4239 uint_t prot;
4241 ASSERT(as != &kas && AS_WRITE_HELD(as));
4244 * Request an initial buffer size that doesn't waste memory
4245 * if the address space has only a small number of segments.
4247 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4249 if ((seg = AS_SEGFIRST(as)) == NULL)
4250 return (0);
4252 brkseg = break_seg(p);
4253 stkseg = as_segat(as, prgetstackbase(p));
4255 do {
4256 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4257 caddr_t saddr, naddr, baddr;
4258 void *tmp = NULL;
4259 ssize_t psz;
4260 char *parr;
4261 uint64_t npages;
4262 uint64_t pagenum;
4264 if ((seg->s_flags & S_HOLE) != 0) {
4265 continue;
4269 * Segment loop part one: iterate from the base of the segment
4270 * to its end, pausing at each address boundary (baddr) between
4271 * ranges that have different virtual memory protections.
4273 for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4274 prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4275 ASSERT(baddr >= saddr && baddr <= eaddr);
4278 * Segment loop part two: iterate from the current
4279 * position to the end of the protection boundary,
4280 * pausing at each address boundary (naddr) between
4281 * ranges that have different underlying page sizes.
4283 for (; saddr < baddr; saddr = naddr) {
4284 psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4285 ASSERT(naddr >= saddr && naddr <= baddr);
4287 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4289 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
4290 mp->pr_size = (size32_t)(naddr - saddr);
4291 mp->pr_offset = segop_getoffset(seg, saddr);
4292 mp->pr_mflags = 0;
4293 if (prot & PROT_READ)
4294 mp->pr_mflags |= MA_READ;
4295 if (prot & PROT_WRITE)
4296 mp->pr_mflags |= MA_WRITE;
4297 if (prot & PROT_EXEC)
4298 mp->pr_mflags |= MA_EXEC;
4299 if (segop_gettype(seg, saddr) & MAP_SHARED)
4300 mp->pr_mflags |= MA_SHARED;
4301 if (segop_gettype(seg, saddr) & MAP_NORESERVE)
4302 mp->pr_mflags |= MA_NORESERVE;
4303 if (seg->s_ops == &segspt_shmops ||
4304 (seg->s_ops == &segvn_ops &&
4305 (segop_getvp(seg, saddr, &vp) != 0 ||
4306 vp == NULL)))
4307 mp->pr_mflags |= MA_ANON;
4308 if (seg == brkseg)
4309 mp->pr_mflags |= MA_BREAK;
4310 else if (seg == stkseg)
4311 mp->pr_mflags |= MA_STACK;
4312 if (seg->s_ops == &segspt_shmops)
4313 mp->pr_mflags |= MA_ISM | MA_SHM;
4315 mp->pr_pagesize = PAGESIZE;
4316 if (psz == -1) {
4317 mp->pr_hatpagesize = 0;
4318 } else {
4319 mp->pr_hatpagesize = psz;
4323 * Manufacture a filename for the "object" dir.
4325 mp->pr_dev = PRNODEV32;
4326 vattr.va_mask = VATTR_FSID|VATTR_NODEID;
4327 if (seg->s_ops == &segvn_ops &&
4328 segop_getvp(seg, saddr, &vp) == 0 &&
4329 vp != NULL && vp->v_type == VREG &&
4330 fop_getattr(vp, &vattr, 0, CRED(),
4331 NULL) == 0) {
4332 (void) cmpldev(&mp->pr_dev,
4333 vattr.va_fsid);
4334 mp->pr_ino = vattr.va_nodeid;
4335 if (vp == p->p_exec)
4336 (void) strcpy(mp->pr_mapname,
4337 "a.out");
4338 else
4339 pr_object_name(mp->pr_mapname,
4340 vp, &vattr);
4344 * Get the SysV shared memory id, if any.
4346 if ((mp->pr_mflags & MA_SHARED) &&
4347 p->p_segacct && (mp->pr_shmid = shmgetid(p,
4348 seg->s_base)) != SHMID_NONE) {
4349 if (mp->pr_shmid == SHMID_FREE)
4350 mp->pr_shmid = -1;
4352 mp->pr_mflags |= MA_SHM;
4353 } else {
4354 mp->pr_shmid = -1;
4357 npages = ((uintptr_t)(naddr - saddr)) >>
4358 PAGESHIFT;
4359 parr = kmem_zalloc(npages, KM_SLEEP);
4361 (void) segop_incore(seg, saddr, naddr - saddr,
4362 parr);
4364 for (pagenum = 0; pagenum < npages; pagenum++) {
4365 if (parr[pagenum] & SEG_PAGE_INCORE)
4366 mp->pr_rss++;
4367 if (parr[pagenum] & SEG_PAGE_ANON)
4368 mp->pr_anon++;
4369 if (parr[pagenum] & SEG_PAGE_LOCKED)
4370 mp->pr_locked++;
4372 kmem_free(parr, npages);
4375 ASSERT(tmp == NULL);
4376 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4378 return (0);
4380 #endif /* _SYSCALL32_IMPL */