2 * Copyright (c) 2009 Robert N. M. Watson
5 * This software was developed at the University of Cambridge Computer
6 * Laboratory with support from a grant from Google, Inc.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * FreeBSD process descriptor facility.
33 * Some processes are represented by a file descriptor, which will be used in
34 * preference to signaling and pids for the purposes of process management,
35 * and is, in effect, a form of capability. When a process descriptor is
36 * used with a process, it ceases to be visible to certain traditional UNIX
37 * process facilities, such as waitpid(2).
41 * - At most one process descriptor will exist for any process, although
42 * references to that descriptor may be held from many processes (or even
43 * be in flight between processes over a local domain socket).
44 * - Last close on the process descriptor will terminate the process using
45 * SIGKILL and reparent it to init so that there's a process to reap it
46 * when it's done exiting.
47 * - If the process exits before the descriptor is closed, it will not
48 * generate SIGCHLD on termination, or be picked up by waitpid().
49 * - The pdkill(2) system call may be used to deliver a signal to the process
50 * using its process descriptor.
51 * - The pdwait4(2) system call may be used to block (or not) on a process
52 * descriptor to collect termination information.
56 * - How to handle ptrace(2)?
57 * - Will we want to add a pidtoprocdesc(2) system call to allow process
58 * descriptors to be created for processes without pdfork(2)?
61 #include <sys/cdefs.h>
62 __FBSDID("$FreeBSD$");
64 #include <sys/param.h>
65 #include <sys/capsicum.h>
66 #include <sys/fcntl.h>
68 #include <sys/filedesc.h>
69 #include <sys/kernel.h>
71 #include <sys/mutex.h>
74 #include <sys/procdesc.h>
75 #include <sys/resourcevar.h>
77 #include <sys/sysproto.h>
78 #include <sys/sysctl.h>
79 #include <sys/systm.h>
80 #include <sys/ucred.h>
83 #include <security/audit/audit.h>
87 FEATURE(process_descriptors
, "Process Descriptors");
89 static uma_zone_t procdesc_zone
;
91 static fo_poll_t procdesc_poll
;
92 static fo_kqfilter_t procdesc_kqfilter
;
93 static fo_stat_t procdesc_stat
;
94 static fo_close_t procdesc_close
;
95 static fo_fill_kinfo_t procdesc_fill_kinfo
;
97 static struct fileops procdesc_ops
= {
98 .fo_read
= invfo_rdwr
,
99 .fo_write
= invfo_rdwr
,
100 .fo_truncate
= invfo_truncate
,
101 .fo_ioctl
= invfo_ioctl
,
102 .fo_poll
= procdesc_poll
,
103 .fo_kqfilter
= procdesc_kqfilter
,
104 .fo_stat
= procdesc_stat
,
105 .fo_close
= procdesc_close
,
106 .fo_chmod
= invfo_chmod
,
107 .fo_chown
= invfo_chown
,
108 .fo_sendfile
= invfo_sendfile
,
109 .fo_fill_kinfo
= procdesc_fill_kinfo
,
110 .fo_flags
= DFLAG_PASSABLE
,
114 * Initialize with VFS so that process descriptors are available along with
115 * other file descriptor types. As long as it runs before init(8) starts,
116 * there shouldn't be a problem.
119 procdesc_init(void *dummy __unused
)
122 procdesc_zone
= uma_zcreate("procdesc", sizeof(struct procdesc
),
123 NULL
, NULL
, NULL
, NULL
, UMA_ALIGN_PTR
, 0);
124 if (procdesc_zone
== NULL
)
125 panic("procdesc_init: procdesc_zone not initialized");
127 SYSINIT(vfs
, SI_SUB_VFS
, SI_ORDER_ANY
, procdesc_init
, NULL
);
130 * Return a locked process given a process descriptor, or ESRCH if it has
134 procdesc_find(struct thread
*td
, int fd
, cap_rights_t
*rightsp
,
141 error
= fget(td
, fd
, rightsp
, &fp
);
144 if (fp
->f_type
!= DTYPE_PROCDESC
) {
149 sx_slock(&proctree_lock
);
150 if (pd
->pd_proc
!= NULL
) {
155 sx_sunlock(&proctree_lock
);
162 * Function to be used by procstat(1) sysctls when returning procdesc
166 procdesc_pid(struct file
*fp_procdesc
)
170 KASSERT(fp_procdesc
->f_type
== DTYPE_PROCDESC
,
171 ("procdesc_pid: !procdesc"));
173 pd
= fp_procdesc
->f_data
;
178 * Retrieve the PID associated with a process descriptor.
181 kern_pdgetpid(struct thread
*td
, int fd
, cap_rights_t
*rightsp
, pid_t
*pidp
)
186 error
= fget(td
, fd
, rightsp
, &fp
);
189 if (fp
->f_type
!= DTYPE_PROCDESC
) {
193 *pidp
= procdesc_pid(fp
);
200 * System call to return the pid of a process given its process descriptor.
203 sys_pdgetpid(struct thread
*td
, struct pdgetpid_args
*uap
)
209 AUDIT_ARG_FD(uap
->fd
);
210 error
= kern_pdgetpid(td
, uap
->fd
,
211 cap_rights_init(&rights
, CAP_PDGETPID
), &pid
);
213 error
= copyout(&pid
, uap
->pidp
, sizeof(pid
));
218 * When a new process is forked by pdfork(), a file descriptor is allocated
219 * by the fork code first, then the process is forked, and then we get a
220 * chance to set up the process descriptor. Failure is not permitted at this
221 * point, so procdesc_new() must succeed.
224 procdesc_new(struct proc
*p
, int flags
)
228 pd
= uma_zalloc(procdesc_zone
, M_WAITOK
| M_ZERO
);
230 pd
->pd_pid
= p
->p_pid
;
233 if (flags
& PD_DAEMON
)
234 pd
->pd_flags
|= PDF_DAEMON
;
235 PROCDESC_LOCK_INIT(pd
);
236 knlist_init_mtx(&pd
->pd_selinfo
.si_note
, &pd
->pd_lock
);
239 * Process descriptors start out with two references: one from their
240 * struct file, and the other from their struct proc.
242 refcount_init(&pd
->pd_refcount
, 2);
246 * Initialize a file with a process descriptor.
249 procdesc_finit(struct procdesc
*pdp
, struct file
*fp
)
252 finit(fp
, FREAD
| FWRITE
, DTYPE_PROCDESC
, pdp
, &procdesc_ops
);
256 procdesc_free(struct procdesc
*pd
)
260 * When the last reference is released, we assert that the descriptor
261 * has been closed, but not that the process has exited, as we will
262 * detach the descriptor before the process dies if the descript is
263 * closed, as we can't wait synchronously.
265 if (refcount_release(&pd
->pd_refcount
)) {
266 KASSERT(pd
->pd_proc
== NULL
,
267 ("procdesc_free: pd_proc != NULL"));
268 KASSERT((pd
->pd_flags
& PDF_CLOSED
),
269 ("procdesc_free: !PDF_CLOSED"));
271 knlist_destroy(&pd
->pd_selinfo
.si_note
);
272 PROCDESC_LOCK_DESTROY(pd
);
273 uma_zfree(procdesc_zone
, pd
);
278 * procdesc_exit() - notify a process descriptor that its process is exiting.
279 * We use the proctree_lock to ensure that process exit either happens
280 * strictly before or strictly after a concurrent call to procdesc_close().
283 procdesc_exit(struct proc
*p
)
287 sx_assert(&proctree_lock
, SA_XLOCKED
);
288 PROC_LOCK_ASSERT(p
, MA_OWNED
);
289 KASSERT(p
->p_procdesc
!= NULL
, ("procdesc_exit: p_procdesc NULL"));
294 KASSERT((pd
->pd_flags
& PDF_CLOSED
) == 0 || p
->p_pptr
== initproc
,
295 ("procdesc_exit: closed && parent not init"));
297 pd
->pd_flags
|= PDF_EXITED
;
298 pd
->pd_xstat
= KW_EXITCODE(p
->p_xexit
, p
->p_xsig
);
301 * If the process descriptor has been closed, then we have nothing
302 * to do; return 1 so that init will get SIGCHLD and do the reaping.
303 * Clean up the procdesc now rather than letting it happen during
306 if (pd
->pd_flags
& PDF_CLOSED
) {
309 p
->p_procdesc
= NULL
;
313 if (pd
->pd_flags
& PDF_SELECTED
) {
314 pd
->pd_flags
&= ~PDF_SELECTED
;
315 selwakeup(&pd
->pd_selinfo
);
317 KNOTE_LOCKED(&pd
->pd_selinfo
.si_note
, NOTE_EXIT
);
323 * When a process descriptor is reaped, perhaps as a result of close() or
324 * pdwait4(), release the process's reference on the process descriptor.
327 procdesc_reap(struct proc
*p
)
331 sx_assert(&proctree_lock
, SA_XLOCKED
);
332 KASSERT(p
->p_procdesc
!= NULL
, ("procdesc_reap: p_procdesc == NULL"));
336 p
->p_procdesc
= NULL
;
341 * procdesc_close() - last close on a process descriptor. If the process is
342 * still running, terminate with SIGKILL (unless PDF_DAEMON is set) and let
343 * init(8) clean up the mess; if not, we have to clean up the zombie ourselves.
346 procdesc_close(struct file
*fp
, struct thread
*td
)
351 KASSERT(fp
->f_type
== DTYPE_PROCDESC
, ("procdesc_close: !procdesc"));
354 fp
->f_ops
= &badfileops
;
357 sx_xlock(&proctree_lock
);
359 pd
->pd_flags
|= PDF_CLOSED
;
364 * This is the case where process' exit status was already
365 * collected and procdesc_reap() was already called.
367 sx_xunlock(&proctree_lock
);
370 if (p
->p_state
== PRS_ZOMBIE
) {
372 * If the process is already dead and just awaiting
373 * reaping, do that now. This will release the
374 * process's reference to the process descriptor when it
375 * calls back into procdesc_reap().
378 proc_reap(curthread
, p
, NULL
, 0);
381 * If the process is not yet dead, we need to kill it,
382 * but we can't wait around synchronously for it to go
383 * away, as that path leads to madness (and deadlocks).
384 * First, detach the process from its descriptor so that
385 * its exit status will be reported normally.
388 p
->p_procdesc
= NULL
;
392 * Next, reparent it to init(8) so that there's someone
393 * to pick up the pieces; finally, terminate with
396 p
->p_sigparent
= SIGCHLD
;
397 proc_reparent(p
, initproc
);
398 if ((pd
->pd_flags
& PDF_DAEMON
) == 0)
399 kern_psignal(p
, SIGKILL
);
401 sx_xunlock(&proctree_lock
);
406 * Release the file descriptor's reference on the process descriptor.
413 procdesc_poll(struct file
*fp
, int events
, struct ucred
*active_cred
,
422 if (pd
->pd_flags
& PDF_EXITED
)
425 selrecord(td
, &pd
->pd_selinfo
);
426 pd
->pd_flags
|= PDF_SELECTED
;
433 procdesc_kqops_detach(struct knote
*kn
)
437 pd
= kn
->kn_fp
->f_data
;
438 knlist_remove(&pd
->pd_selinfo
.si_note
, kn
, 0);
442 procdesc_kqops_event(struct knote
*kn
, long hint
)
447 pd
= kn
->kn_fp
->f_data
;
450 * Initial test after registration. Generate a NOTE_EXIT in
451 * case the process already terminated before registration.
453 event
= pd
->pd_flags
& PDF_EXITED
? NOTE_EXIT
: 0;
455 /* Mask off extra data. */
456 event
= (u_int
)hint
& NOTE_PCTRLMASK
;
459 /* If the user is interested in this event, record it. */
460 if (kn
->kn_sfflags
& event
)
461 kn
->kn_fflags
|= event
;
463 /* Process is gone, so flag the event as finished. */
464 if (event
== NOTE_EXIT
) {
465 kn
->kn_flags
|= EV_EOF
| EV_ONESHOT
;
466 if (kn
->kn_fflags
& NOTE_EXIT
)
467 kn
->kn_data
= pd
->pd_xstat
;
468 if (kn
->kn_fflags
== 0)
469 kn
->kn_flags
|= EV_DROP
;
473 return (kn
->kn_fflags
!= 0);
476 static struct filterops procdesc_kqops
= {
478 .f_detach
= procdesc_kqops_detach
,
479 .f_event
= procdesc_kqops_event
,
483 procdesc_kqfilter(struct file
*fp
, struct knote
*kn
)
488 switch (kn
->kn_filter
) {
489 case EVFILT_PROCDESC
:
490 kn
->kn_fop
= &procdesc_kqops
;
491 kn
->kn_flags
|= EV_CLEAR
;
492 knlist_add(&pd
->pd_selinfo
.si_note
, kn
, 0);
500 procdesc_stat(struct file
*fp
, struct stat
*sb
, struct ucred
*active_cred
,
504 struct timeval pstart
;
507 * XXXRW: Perhaps we should cache some more information from the
508 * process so that we can return it reliably here even after it has
509 * died. For example, caching its credential data.
511 bzero(sb
, sizeof(*sb
));
513 sx_slock(&proctree_lock
);
514 if (pd
->pd_proc
!= NULL
) {
515 PROC_LOCK(pd
->pd_proc
);
517 /* Set birth and [acm] times to process start time. */
518 pstart
= pd
->pd_proc
->p_stats
->p_start
;
519 timevaladd(&pstart
, &boottime
);
520 TIMEVAL_TO_TIMESPEC(&pstart
, &sb
->st_birthtim
);
521 sb
->st_atim
= sb
->st_birthtim
;
522 sb
->st_ctim
= sb
->st_birthtim
;
523 sb
->st_mtim
= sb
->st_birthtim
;
524 if (pd
->pd_proc
->p_state
!= PRS_ZOMBIE
)
525 sb
->st_mode
= S_IFREG
| S_IRWXU
;
527 sb
->st_mode
= S_IFREG
;
528 sb
->st_uid
= pd
->pd_proc
->p_ucred
->cr_ruid
;
529 sb
->st_gid
= pd
->pd_proc
->p_ucred
->cr_rgid
;
530 PROC_UNLOCK(pd
->pd_proc
);
532 sb
->st_mode
= S_IFREG
;
533 sx_sunlock(&proctree_lock
);
538 procdesc_fill_kinfo(struct file
*fp
, struct kinfo_file
*kif
,
539 struct filedesc
*fdp
)
541 struct procdesc
*pdp
;
543 kif
->kf_type
= KF_TYPE_PROCDESC
;
545 kif
->kf_un
.kf_proc
.kf_pid
= pdp
->pd_pid
;