2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * @(#)kern_descrip.c 8.6 (Berkeley) 4/19/94
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
40 #include "opt_compat.h"
42 #include "opt_ktrace.h"
44 #include <sys/param.h>
45 #include <sys/systm.h>
48 #include <sys/domain.h>
49 #include <sys/fcntl.h>
51 #include <sys/filedesc.h>
52 #include <sys/filio.h>
54 #include <sys/kernel.h>
55 #include <sys/limits.h>
57 #include <sys/malloc.h>
58 #include <sys/mount.h>
59 #include <sys/mqueue.h>
60 #include <sys/mutex.h>
61 #include <sys/namei.h>
64 #include <sys/protosw.h>
65 #include <sys/resourcevar.h>
66 #include <sys/signalvar.h>
67 #include <sys/socketvar.h>
70 #include <sys/syscallsubr.h>
71 #include <sys/sysctl.h>
72 #include <sys/sysproto.h>
74 #include <sys/unistd.h>
76 #include <sys/vnode.h>
78 #include <sys/ktrace.h>
81 #include <security/audit/audit.h>
87 static MALLOC_DEFINE(M_FILEDESC
, "filedesc", "Open file descriptor table");
88 static MALLOC_DEFINE(M_FILEDESC_TO_LEADER
, "filedesc_to_leader",
89 "file desc to leader structures");
90 static MALLOC_DEFINE(M_SIGIO
, "sigio", "sigio structures");
92 static uma_zone_t file_zone
;
95 /* Flags for do_dup() */
96 #define DUP_FIXED 0x1 /* Force fixed allocation */
97 #define DUP_FCNTL 0x2 /* fcntl()-style errors */
99 static int do_dup(struct thread
*td
, int flags
, int old
, int new,
101 static int fd_first_free(struct filedesc
*, int, int);
102 static int fd_last_used(struct filedesc
*, int, int);
103 static void fdgrowtable(struct filedesc
*, int);
104 static void fdunused(struct filedesc
*fdp
, int fd
);
105 static void fdused(struct filedesc
*fdp
, int fd
);
108 * A process is initially started out with NDFILE descriptors stored within
109 * this structure, selected to be enough for typical applications based on
110 * the historical limit of 20 open files (and the usage of descriptors by
111 * shells). If these descriptors are exhausted, a larger descriptor table
112 * may be allocated, up to a process' resource limit; the internal arrays
116 #define NDSLOTSIZE sizeof(NDSLOTTYPE)
117 #define NDENTRIES (NDSLOTSIZE * __CHAR_BIT)
118 #define NDSLOT(x) ((x) / NDENTRIES)
119 #define NDBIT(x) ((NDSLOTTYPE)1 << ((x) % NDENTRIES))
120 #define NDSLOTS(x) (((x) + NDENTRIES - 1) / NDENTRIES)
123 * Storage required per open file descriptor.
125 #define OFILESIZE (sizeof(struct file *) + sizeof(char))
128 * Basic allocation of descriptors:
129 * one of the above, plus arrays for NDFILE descriptors.
132 struct filedesc fd_fd
;
134 * These arrays are used when the number of open files is
135 * <= NDFILE, and are then pointed to by the pointers above.
137 struct file
*fd_dfiles
[NDFILE
];
138 char fd_dfileflags
[NDFILE
];
139 NDSLOTTYPE fd_dmap
[NDSLOTS(NDFILE
)];
143 * Descriptor management.
145 volatile int openfiles
; /* actual number of open files */
146 struct mtx sigio_lock
; /* mtx to protect pointers to sigio */
147 void (*mq_fdclose
)(struct thread
*td
, int fd
, struct file
*fp
);
149 /* A mutex to protect the association between a proc and filedesc. */
150 static struct mtx fdesc_mtx
;
153 * Find the first zero bit in the given bitmap, starting at low and not
154 * exceeding size - 1.
157 fd_first_free(struct filedesc
*fdp
, int low
, int size
)
159 NDSLOTTYPE
*map
= fdp
->fd_map
;
167 if (low
% NDENTRIES
) {
168 mask
= ~(~(NDSLOTTYPE
)0 >> (NDENTRIES
- (low
% NDENTRIES
)));
169 if ((mask
&= ~map
[off
]) != 0UL)
170 return (off
* NDENTRIES
+ ffsl(mask
) - 1);
173 for (maxoff
= NDSLOTS(size
); off
< maxoff
; ++off
)
174 if (map
[off
] != ~0UL)
175 return (off
* NDENTRIES
+ ffsl(~map
[off
]) - 1);
180 * Find the highest non-zero bit in the given bitmap, starting at low and
181 * not exceeding size - 1.
184 fd_last_used(struct filedesc
*fdp
, int low
, int size
)
186 NDSLOTTYPE
*map
= fdp
->fd_map
;
194 if (size
% NDENTRIES
) {
195 mask
= ~(~(NDSLOTTYPE
)0 << (size
% NDENTRIES
));
196 if ((mask
&= map
[off
]) != 0)
197 return (off
* NDENTRIES
+ flsl(mask
) - 1);
200 for (minoff
= NDSLOT(low
); off
>= minoff
; --off
)
202 return (off
* NDENTRIES
+ flsl(map
[off
]) - 1);
207 fdisused(struct filedesc
*fdp
, int fd
)
209 KASSERT(fd
>= 0 && fd
< fdp
->fd_nfiles
,
210 ("file descriptor %d out of range (0, %d)", fd
, fdp
->fd_nfiles
));
211 return ((fdp
->fd_map
[NDSLOT(fd
)] & NDBIT(fd
)) != 0);
215 * Mark a file descriptor as used.
218 fdused(struct filedesc
*fdp
, int fd
)
221 FILEDESC_XLOCK_ASSERT(fdp
);
222 KASSERT(!fdisused(fdp
, fd
),
223 ("fd already used"));
225 fdp
->fd_map
[NDSLOT(fd
)] |= NDBIT(fd
);
226 if (fd
> fdp
->fd_lastfile
)
227 fdp
->fd_lastfile
= fd
;
228 if (fd
== fdp
->fd_freefile
)
229 fdp
->fd_freefile
= fd_first_free(fdp
, fd
, fdp
->fd_nfiles
);
233 * Mark a file descriptor as unused.
236 fdunused(struct filedesc
*fdp
, int fd
)
239 FILEDESC_XLOCK_ASSERT(fdp
);
240 KASSERT(fdisused(fdp
, fd
),
241 ("fd is already unused"));
242 KASSERT(fdp
->fd_ofiles
[fd
] == NULL
,
243 ("fd is still in use"));
245 fdp
->fd_map
[NDSLOT(fd
)] &= ~NDBIT(fd
);
246 if (fd
< fdp
->fd_freefile
)
247 fdp
->fd_freefile
= fd
;
248 if (fd
== fdp
->fd_lastfile
)
249 fdp
->fd_lastfile
= fd_last_used(fdp
, 0, fd
);
253 * System calls on descriptors.
255 #ifndef _SYS_SYSPROTO_H_
256 struct getdtablesize_args
{
262 getdtablesize(struct thread
*td
, struct getdtablesize_args
*uap
)
264 struct proc
*p
= td
->td_proc
;
268 min((int)lim_cur(p
, RLIMIT_NOFILE
), maxfilesperproc
);
274 * Duplicate a file descriptor to a particular value.
276 * Note: keep in mind that a potential race condition exists when closing
277 * descriptors from a shared descriptor table (via rfork).
279 #ifndef _SYS_SYSPROTO_H_
287 dup2(struct thread
*td
, struct dup2_args
*uap
)
290 return (do_dup(td
, DUP_FIXED
, (int)uap
->from
, (int)uap
->to
,
295 * Duplicate a file descriptor.
297 #ifndef _SYS_SYSPROTO_H_
304 dup(struct thread
*td
, struct dup_args
*uap
)
307 return (do_dup(td
, 0, (int)uap
->fd
, 0, td
->td_retval
));
311 * The file control system call.
313 #ifndef _SYS_SYSPROTO_H_
322 fcntl(struct thread
*td
, struct fcntl_args
*uap
)
337 * Convert old flock structure to new.
339 error
= copyin((void *)(intptr_t)uap
->arg
, &ofl
, sizeof(ofl
));
340 fl
.l_start
= ofl
.l_start
;
341 fl
.l_len
= ofl
.l_len
;
342 fl
.l_pid
= ofl
.l_pid
;
343 fl
.l_type
= ofl
.l_type
;
344 fl
.l_whence
= ofl
.l_whence
;
364 error
= copyin((void *)(intptr_t)uap
->arg
, &fl
, sizeof(fl
));
373 error
= kern_fcntl(td
, uap
->fd
, cmd
, arg
);
376 if (uap
->cmd
== F_OGETLK
) {
377 ofl
.l_start
= fl
.l_start
;
378 ofl
.l_len
= fl
.l_len
;
379 ofl
.l_pid
= fl
.l_pid
;
380 ofl
.l_type
= fl
.l_type
;
381 ofl
.l_whence
= fl
.l_whence
;
382 error
= copyout(&ofl
, (void *)(intptr_t)uap
->arg
, sizeof(ofl
));
383 } else if (uap
->cmd
== F_GETLK
) {
384 error
= copyout(&fl
, (void *)(intptr_t)uap
->arg
, sizeof(fl
));
389 static inline struct file
*
390 fdtofp(int fd
, struct filedesc
*fdp
)
394 FILEDESC_LOCK_ASSERT(fdp
);
395 if ((unsigned)fd
>= fdp
->fd_nfiles
||
396 (fp
= fdp
->fd_ofiles
[fd
]) == NULL
)
402 kern_fcntl(struct thread
*td
, int fd
, int cmd
, intptr_t arg
)
404 struct filedesc
*fdp
;
422 error
= do_dup(td
, DUP_FCNTL
, fd
, tmp
, td
->td_retval
);
427 error
= do_dup(td
, DUP_FIXED
, fd
, tmp
, td
->td_retval
);
432 if ((fp
= fdtofp(fd
, fdp
)) == NULL
) {
433 FILEDESC_SUNLOCK(fdp
);
437 pop
= &fdp
->fd_ofileflags
[fd
];
438 td
->td_retval
[0] = (*pop
& UF_EXCLOSE
) ? FD_CLOEXEC
: 0;
439 FILEDESC_SUNLOCK(fdp
);
444 if ((fp
= fdtofp(fd
, fdp
)) == NULL
) {
445 FILEDESC_XUNLOCK(fdp
);
449 pop
= &fdp
->fd_ofileflags
[fd
];
450 *pop
= (*pop
&~ UF_EXCLOSE
) |
451 (arg
& FD_CLOEXEC
? UF_EXCLOSE
: 0);
452 FILEDESC_XUNLOCK(fdp
);
457 if ((fp
= fdtofp(fd
, fdp
)) == NULL
) {
458 FILEDESC_SUNLOCK(fdp
);
462 td
->td_retval
[0] = OFLAGS(fp
->f_flag
);
463 FILEDESC_SUNLOCK(fdp
);
468 if ((fp
= fdtofp(fd
, fdp
)) == NULL
) {
469 FILEDESC_SUNLOCK(fdp
);
474 FILEDESC_SUNLOCK(fdp
);
476 tmp
= flg
= fp
->f_flag
;
478 tmp
|= FFLAGS(arg
& ~O_ACCMODE
) & FCNTLFLAGS
;
479 } while(atomic_cmpset_int(&fp
->f_flag
, flg
, tmp
) == 0);
480 tmp
= fp
->f_flag
& FNONBLOCK
;
481 error
= fo_ioctl(fp
, FIONBIO
, &tmp
, td
->td_ucred
, td
);
486 tmp
= fp
->f_flag
& FASYNC
;
487 error
= fo_ioctl(fp
, FIOASYNC
, &tmp
, td
->td_ucred
, td
);
492 atomic_clear_int(&fp
->f_flag
, FNONBLOCK
);
494 (void)fo_ioctl(fp
, FIONBIO
, &tmp
, td
->td_ucred
, td
);
500 if ((fp
= fdtofp(fd
, fdp
)) == NULL
) {
501 FILEDESC_SUNLOCK(fdp
);
506 FILEDESC_SUNLOCK(fdp
);
507 error
= fo_ioctl(fp
, FIOGETOWN
, &tmp
, td
->td_ucred
, td
);
509 td
->td_retval
[0] = tmp
;
515 if ((fp
= fdtofp(fd
, fdp
)) == NULL
) {
516 FILEDESC_SUNLOCK(fdp
);
521 FILEDESC_SUNLOCK(fdp
);
523 error
= fo_ioctl(fp
, FIOSETOWN
, &tmp
, td
->td_ucred
, td
);
528 error
= priv_check(td
, PRIV_NFS_LOCKD
);
536 /* FALLTHROUGH F_SETLK */
541 if ((fp
= fdtofp(fd
, fdp
)) == NULL
) {
542 FILEDESC_SUNLOCK(fdp
);
546 if (fp
->f_type
!= DTYPE_VNODE
) {
547 FILEDESC_SUNLOCK(fdp
);
551 flp
= (struct flock
*)arg
;
552 if (flp
->l_whence
== SEEK_CUR
) {
553 if (fp
->f_offset
< 0 ||
555 fp
->f_offset
> OFF_MAX
- flp
->l_start
)) {
556 FILEDESC_SUNLOCK(fdp
);
560 flp
->l_start
+= fp
->f_offset
;
564 * VOP_ADVLOCK() may block.
567 FILEDESC_SUNLOCK(fdp
);
569 vfslocked
= VFS_LOCK_GIANT(vp
->v_mount
);
570 switch (flp
->l_type
) {
572 if ((fp
->f_flag
& FREAD
) == 0) {
576 PROC_LOCK(p
->p_leader
);
577 p
->p_leader
->p_flag
|= P_ADVLOCK
;
578 PROC_UNLOCK(p
->p_leader
);
579 error
= VOP_ADVLOCK(vp
, (caddr_t
)p
->p_leader
, F_SETLK
,
583 if ((fp
->f_flag
& FWRITE
) == 0) {
587 PROC_LOCK(p
->p_leader
);
588 p
->p_leader
->p_flag
|= P_ADVLOCK
;
589 PROC_UNLOCK(p
->p_leader
);
590 error
= VOP_ADVLOCK(vp
, (caddr_t
)p
->p_leader
, F_SETLK
,
594 error
= VOP_ADVLOCK(vp
, (caddr_t
)p
->p_leader
, F_UNLCK
,
599 * Temporary api for testing remote lock
602 if (flg
!= F_REMOTE
) {
606 error
= VOP_ADVLOCK(vp
, (caddr_t
)p
->p_leader
,
607 F_UNLCKSYS
, flp
, flg
);
613 VFS_UNLOCK_GIANT(vfslocked
);
615 /* Check for race with close */
617 if ((unsigned) fd
>= fdp
->fd_nfiles
||
618 fp
!= fdp
->fd_ofiles
[fd
]) {
619 FILEDESC_SUNLOCK(fdp
);
620 flp
->l_whence
= SEEK_SET
;
623 flp
->l_type
= F_UNLCK
;
624 vfslocked
= VFS_LOCK_GIANT(vp
->v_mount
);
625 (void) VOP_ADVLOCK(vp
, (caddr_t
)p
->p_leader
,
626 F_UNLCK
, flp
, F_POSIX
);
627 VFS_UNLOCK_GIANT(vfslocked
);
630 FILEDESC_SUNLOCK(fdp
);
636 if ((fp
= fdtofp(fd
, fdp
)) == NULL
) {
637 FILEDESC_SUNLOCK(fdp
);
641 if (fp
->f_type
!= DTYPE_VNODE
) {
642 FILEDESC_SUNLOCK(fdp
);
646 flp
= (struct flock
*)arg
;
647 if (flp
->l_type
!= F_RDLCK
&& flp
->l_type
!= F_WRLCK
&&
648 flp
->l_type
!= F_UNLCK
) {
649 FILEDESC_SUNLOCK(fdp
);
653 if (flp
->l_whence
== SEEK_CUR
) {
654 if ((flp
->l_start
> 0 &&
655 fp
->f_offset
> OFF_MAX
- flp
->l_start
) ||
657 fp
->f_offset
< OFF_MIN
- flp
->l_start
)) {
658 FILEDESC_SUNLOCK(fdp
);
662 flp
->l_start
+= fp
->f_offset
;
665 * VOP_ADVLOCK() may block.
668 FILEDESC_SUNLOCK(fdp
);
670 vfslocked
= VFS_LOCK_GIANT(vp
->v_mount
);
671 error
= VOP_ADVLOCK(vp
, (caddr_t
)p
->p_leader
, F_GETLK
, flp
,
673 VFS_UNLOCK_GIANT(vfslocked
);
681 VFS_UNLOCK_GIANT(vfslocked
);
686 * Common code for dup, dup2, fcntl(F_DUPFD) and fcntl(F_DUP2FD).
689 do_dup(struct thread
*td
, int flags
, int old
, int new,
692 struct filedesc
*fdp
;
696 int error
, holdleaders
, maxfd
;
702 * Verify we have a valid descriptor to dup from and possibly to
703 * dup to. Unlike dup() and dup2(), fcntl()'s F_DUPFD should
704 * return EINVAL when the new descriptor is out of bounds.
709 return (flags
& DUP_FCNTL
? EINVAL
: EBADF
);
711 maxfd
= min((int)lim_cur(p
, RLIMIT_NOFILE
), maxfilesperproc
);
714 return (flags
& DUP_FCNTL
? EINVAL
: EMFILE
);
717 if (old
>= fdp
->fd_nfiles
|| fdp
->fd_ofiles
[old
] == NULL
) {
718 FILEDESC_XUNLOCK(fdp
);
721 if (flags
& DUP_FIXED
&& old
== new) {
723 FILEDESC_XUNLOCK(fdp
);
726 fp
= fdp
->fd_ofiles
[old
];
730 * If the caller specified a file descriptor, make sure the file
731 * table is large enough to hold it, and grab it. Otherwise, just
732 * allocate a new descriptor the usual way. Since the filedesc
733 * lock may be temporarily dropped in the process, we have to look
736 if (flags
& DUP_FIXED
) {
737 if (new >= fdp
->fd_nfiles
)
738 fdgrowtable(fdp
, new + 1);
739 if (fdp
->fd_ofiles
[new] == NULL
)
742 if ((error
= fdalloc(td
, new, &new)) != 0) {
743 FILEDESC_XUNLOCK(fdp
);
750 * If the old file changed out from under us then treat it as a
751 * bad file descriptor. Userland should do its own locking to
754 if (fdp
->fd_ofiles
[old
] != fp
) {
755 /* we've allocated a descriptor which we won't use */
756 if (fdp
->fd_ofiles
[new] == NULL
)
758 FILEDESC_XUNLOCK(fdp
);
763 ("new fd is same as old"));
766 * Save info on the descriptor being overwritten. We cannot close
767 * it without introducing an ownership race for the slot, since we
768 * need to drop the filedesc lock to call closef().
770 * XXX this duplicates parts of close().
772 delfp
= fdp
->fd_ofiles
[new];
775 if (td
->td_proc
->p_fdtol
!= NULL
) {
777 * Ask fdfree() to sleep to ensure that all relevant
778 * process leaders can be traversed in closef().
780 fdp
->fd_holdleaderscount
++;
786 * Duplicate the source descriptor
788 fdp
->fd_ofiles
[new] = fp
;
789 fdp
->fd_ofileflags
[new] = fdp
->fd_ofileflags
[old
] &~ UF_EXCLOSE
;
790 if (new > fdp
->fd_lastfile
)
791 fdp
->fd_lastfile
= new;
795 * If we dup'd over a valid file, we now own the reference to it
796 * and must dispose of it using closef() semantics (as if a
797 * close() were performed on it).
799 * XXX this duplicates parts of close().
802 knote_fdclose(td
, new);
803 if (delfp
->f_type
== DTYPE_MQUEUE
)
804 mq_fdclose(td
, new, delfp
);
805 FILEDESC_XUNLOCK(fdp
);
806 (void) closef(delfp
, td
);
809 fdp
->fd_holdleaderscount
--;
810 if (fdp
->fd_holdleaderscount
== 0 &&
811 fdp
->fd_holdleaderswakeup
!= 0) {
812 fdp
->fd_holdleaderswakeup
= 0;
813 wakeup(&fdp
->fd_holdleaderscount
);
815 FILEDESC_XUNLOCK(fdp
);
818 FILEDESC_XUNLOCK(fdp
);
824 * If sigio is on the list associated with a process or process group,
825 * disable signalling from the device, remove sigio from the list and
829 funsetown(struct sigio
**sigiop
)
839 *(sigio
->sio_myref
) = NULL
;
840 if ((sigio
)->sio_pgid
< 0) {
841 struct pgrp
*pg
= (sigio
)->sio_pgrp
;
843 SLIST_REMOVE(&sigio
->sio_pgrp
->pg_sigiolst
, sigio
,
847 struct proc
*p
= (sigio
)->sio_proc
;
849 SLIST_REMOVE(&sigio
->sio_proc
->p_sigiolst
, sigio
,
854 crfree(sigio
->sio_ucred
);
855 FREE(sigio
, M_SIGIO
);
859 * Free a list of sigio structures.
860 * We only need to lock the SIGIO_LOCK because we have made ourselves
861 * inaccessible to callers of fsetown and therefore do not need to lock
862 * the proc or pgrp struct for the list manipulation.
865 funsetownlst(struct sigiolst
*sigiolst
)
871 sigio
= SLIST_FIRST(sigiolst
);
878 * Every entry of the list should belong
879 * to a single proc or pgrp.
881 if (sigio
->sio_pgid
< 0) {
882 pg
= sigio
->sio_pgrp
;
883 PGRP_LOCK_ASSERT(pg
, MA_NOTOWNED
);
884 } else /* if (sigio->sio_pgid > 0) */ {
886 PROC_LOCK_ASSERT(p
, MA_NOTOWNED
);
890 while ((sigio
= SLIST_FIRST(sigiolst
)) != NULL
) {
891 *(sigio
->sio_myref
) = NULL
;
893 KASSERT(sigio
->sio_pgid
< 0,
894 ("Proc sigio in pgrp sigio list"));
895 KASSERT(sigio
->sio_pgrp
== pg
,
896 ("Bogus pgrp in sigio list"));
898 SLIST_REMOVE(&pg
->pg_sigiolst
, sigio
, sigio
,
901 } else /* if (p != NULL) */ {
902 KASSERT(sigio
->sio_pgid
> 0,
903 ("Pgrp sigio in proc sigio list"));
904 KASSERT(sigio
->sio_proc
== p
,
905 ("Bogus proc in sigio list"));
907 SLIST_REMOVE(&p
->p_sigiolst
, sigio
, sigio
,
912 crfree(sigio
->sio_ucred
);
913 FREE(sigio
, M_SIGIO
);
920 * This is common code for FIOSETOWN ioctl called by fcntl(fd, F_SETOWN, arg).
922 * After permission checking, add a sigio structure to the sigio list for
923 * the process or process group.
926 fsetown(pid_t pgid
, struct sigio
**sigiop
)
940 /* Allocate and fill in the new sigio out of locks. */
941 MALLOC(sigio
, struct sigio
*, sizeof(struct sigio
), M_SIGIO
, M_WAITOK
);
942 sigio
->sio_pgid
= pgid
;
943 sigio
->sio_ucred
= crhold(curthread
->td_ucred
);
944 sigio
->sio_myref
= sigiop
;
946 sx_slock(&proctree_lock
);
955 * Policy - Don't allow a process to FSETOWN a process
956 * in another session.
958 * Remove this test to allow maximum flexibility or
959 * restrict FSETOWN to the current process or process
960 * group for maximum safety.
963 if (proc
->p_session
!= curthread
->td_proc
->p_session
) {
969 } else /* if (pgid < 0) */ {
970 pgrp
= pgfind(-pgid
);
978 * Policy - Don't allow a process to FSETOWN a process
979 * in another session.
981 * Remove this test to allow maximum flexibility or
982 * restrict FSETOWN to the current process or process
983 * group for maximum safety.
985 if (pgrp
->pg_session
!= curthread
->td_proc
->p_session
) {
996 * Since funsetownlst() is called without the proctree
997 * locked, we need to check for P_WEXIT.
998 * XXX: is ESRCH correct?
1000 if ((proc
->p_flag
& P_WEXIT
) != 0) {
1005 SLIST_INSERT_HEAD(&proc
->p_sigiolst
, sigio
, sio_pgsigio
);
1006 sigio
->sio_proc
= proc
;
1010 SLIST_INSERT_HEAD(&pgrp
->pg_sigiolst
, sigio
, sio_pgsigio
);
1011 sigio
->sio_pgrp
= pgrp
;
1014 sx_sunlock(&proctree_lock
);
1021 sx_sunlock(&proctree_lock
);
1022 crfree(sigio
->sio_ucred
);
1023 FREE(sigio
, M_SIGIO
);
1028 * This is common code for FIOGETOWN ioctl called by fcntl(fd, F_GETOWN, arg).
1032 struct sigio
**sigiop
;
1037 pgid
= (*sigiop
!= NULL
) ? (*sigiop
)->sio_pgid
: 0;
1043 * Close a file descriptor.
1045 #ifndef _SYS_SYSPROTO_H_
1054 struct close_args
*uap
;
1057 return (kern_close(td
, uap
->fd
));
1065 struct filedesc
*fdp
;
1072 fdp
= td
->td_proc
->p_fd
;
1074 AUDIT_SYSCLOSE(td
, fd
);
1076 FILEDESC_XLOCK(fdp
);
1077 if ((unsigned)fd
>= fdp
->fd_nfiles
||
1078 (fp
= fdp
->fd_ofiles
[fd
]) == NULL
) {
1079 FILEDESC_XUNLOCK(fdp
);
1082 fdp
->fd_ofiles
[fd
] = NULL
;
1083 fdp
->fd_ofileflags
[fd
] = 0;
1085 if (td
->td_proc
->p_fdtol
!= NULL
) {
1087 * Ask fdfree() to sleep to ensure that all relevant
1088 * process leaders can be traversed in closef().
1090 fdp
->fd_holdleaderscount
++;
1095 * We now hold the fp reference that used to be owned by the
1096 * descriptor array. We have to unlock the FILEDESC *AFTER*
1097 * knote_fdclose to prevent a race of the fd getting opened, a knote
1098 * added, and deleteing a knote for the new fd.
1100 knote_fdclose(td
, fd
);
1101 if (fp
->f_type
== DTYPE_MQUEUE
)
1102 mq_fdclose(td
, fd
, fp
);
1103 FILEDESC_XUNLOCK(fdp
);
1105 error
= closef(fp
, td
);
1107 FILEDESC_XLOCK(fdp
);
1108 fdp
->fd_holdleaderscount
--;
1109 if (fdp
->fd_holdleaderscount
== 0 &&
1110 fdp
->fd_holdleaderswakeup
!= 0) {
1111 fdp
->fd_holdleaderswakeup
= 0;
1112 wakeup(&fdp
->fd_holdleaderscount
);
1114 FILEDESC_XUNLOCK(fdp
);
1119 #if defined(COMPAT_43)
1121 * Return status information about a file descriptor.
1123 #ifndef _SYS_SYSPROTO_H_
1124 struct ofstat_args
{
1131 ofstat(struct thread
*td
, struct ofstat_args
*uap
)
1137 error
= kern_fstat(td
, uap
->fd
, &ub
);
1140 error
= copyout(&oub
, uap
->sb
, sizeof(oub
));
1144 #endif /* COMPAT_43 */
1147 * Return status information about a file descriptor.
1149 #ifndef _SYS_SYSPROTO_H_
1157 fstat(struct thread
*td
, struct fstat_args
*uap
)
1162 error
= kern_fstat(td
, uap
->fd
, &ub
);
1164 error
= copyout(&ub
, uap
->sb
, sizeof(ub
));
1169 kern_fstat(struct thread
*td
, int fd
, struct stat
*sbp
)
1176 if ((error
= fget(td
, fd
, &fp
)) != 0)
1179 AUDIT_ARG(file
, td
->td_proc
, fp
);
1181 error
= fo_stat(fp
, sbp
, td
->td_ucred
, td
);
1184 if (error
== 0 && KTRPOINT(td
, KTR_STRUCT
))
1191 * Return status information about a file descriptor.
1193 #ifndef _SYS_SYSPROTO_H_
1194 struct nfstat_args
{
1201 nfstat(struct thread
*td
, struct nfstat_args
*uap
)
1207 error
= kern_fstat(td
, uap
->fd
, &ub
);
1209 cvtnstat(&ub
, &nub
);
1210 error
= copyout(&nub
, uap
->sb
, sizeof(nub
));
1216 * Return pathconf information about a file descriptor.
1218 #ifndef _SYS_SYSPROTO_H_
1219 struct fpathconf_args
{
1226 fpathconf(struct thread
*td
, struct fpathconf_args
*uap
)
1232 if ((error
= fget(td
, uap
->fd
, &fp
)) != 0)
1235 /* If asynchronous I/O is available, it works for all descriptors. */
1236 if (uap
->name
== _PC_ASYNC_IO
) {
1237 td
->td_retval
[0] = async_io_version
;
1243 vfslocked
= VFS_LOCK_GIANT(vp
->v_mount
);
1244 vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
);
1245 error
= VOP_PATHCONF(vp
, uap
->name
, td
->td_retval
);
1247 VFS_UNLOCK_GIANT(vfslocked
);
1248 } else if (fp
->f_type
== DTYPE_PIPE
|| fp
->f_type
== DTYPE_SOCKET
) {
1249 if (uap
->name
!= _PC_PIPE_BUF
) {
1252 td
->td_retval
[0] = PIPE_BUF
;
1264 * Grow the file table to accomodate (at least) nfd descriptors. This may
1265 * block and drop the filedesc lock, but it will reacquire it before
1269 fdgrowtable(struct filedesc
*fdp
, int nfd
)
1271 struct file
**ntable
;
1273 int nnfiles
, onfiles
;
1276 FILEDESC_XLOCK_ASSERT(fdp
);
1278 KASSERT(fdp
->fd_nfiles
> 0,
1279 ("zero-length file table"));
1281 /* compute the size of the new table */
1282 onfiles
= fdp
->fd_nfiles
;
1283 nnfiles
= NDSLOTS(nfd
) * NDENTRIES
; /* round up */
1284 if (nnfiles
<= onfiles
)
1285 /* the table is already large enough */
1288 /* allocate a new table and (if required) new bitmaps */
1289 FILEDESC_XUNLOCK(fdp
);
1290 MALLOC(ntable
, struct file
**, nnfiles
* OFILESIZE
,
1291 M_FILEDESC
, M_ZERO
| M_WAITOK
);
1292 nfileflags
= (char *)&ntable
[nnfiles
];
1293 if (NDSLOTS(nnfiles
) > NDSLOTS(onfiles
))
1294 MALLOC(nmap
, NDSLOTTYPE
*, NDSLOTS(nnfiles
) * NDSLOTSIZE
,
1295 M_FILEDESC
, M_ZERO
| M_WAITOK
);
1298 FILEDESC_XLOCK(fdp
);
1301 * We now have new tables ready to go. Since we dropped the
1302 * filedesc lock to call malloc(), watch out for a race.
1304 onfiles
= fdp
->fd_nfiles
;
1305 if (onfiles
>= nnfiles
) {
1306 /* we lost the race, but that's OK */
1307 free(ntable
, M_FILEDESC
);
1309 free(nmap
, M_FILEDESC
);
1312 bcopy(fdp
->fd_ofiles
, ntable
, onfiles
* sizeof(*ntable
));
1313 bcopy(fdp
->fd_ofileflags
, nfileflags
, onfiles
);
1314 if (onfiles
> NDFILE
)
1315 free(fdp
->fd_ofiles
, M_FILEDESC
);
1316 fdp
->fd_ofiles
= ntable
;
1317 fdp
->fd_ofileflags
= nfileflags
;
1318 if (NDSLOTS(nnfiles
) > NDSLOTS(onfiles
)) {
1319 bcopy(fdp
->fd_map
, nmap
, NDSLOTS(onfiles
) * sizeof(*nmap
));
1320 if (NDSLOTS(onfiles
) > NDSLOTS(NDFILE
))
1321 free(fdp
->fd_map
, M_FILEDESC
);
1324 fdp
->fd_nfiles
= nnfiles
;
1328 * Allocate a file descriptor for the process.
1331 fdalloc(struct thread
*td
, int minfd
, int *result
)
1333 struct proc
*p
= td
->td_proc
;
1334 struct filedesc
*fdp
= p
->p_fd
;
1337 FILEDESC_XLOCK_ASSERT(fdp
);
1339 if (fdp
->fd_freefile
> minfd
)
1340 minfd
= fdp
->fd_freefile
;
1343 maxfd
= min((int)lim_cur(p
, RLIMIT_NOFILE
), maxfilesperproc
);
1347 * Search the bitmap for a free descriptor. If none is found, try
1348 * to grow the file table. Keep at it until we either get a file
1349 * descriptor or run into process or system limits; fdgrowtable()
1350 * may drop the filedesc lock, so we're in a race.
1353 fd
= fd_first_free(fdp
, minfd
, fdp
->fd_nfiles
);
1356 if (fd
< fdp
->fd_nfiles
)
1358 fdgrowtable(fdp
, min(fdp
->fd_nfiles
* 2, maxfd
));
1362 * Perform some sanity checks, then mark the file descriptor as
1363 * used and return it to the caller.
1365 KASSERT(!fdisused(fdp
, fd
),
1366 ("fd_first_free() returned non-free descriptor"));
1367 KASSERT(fdp
->fd_ofiles
[fd
] == NULL
,
1368 ("free descriptor isn't"));
1369 fdp
->fd_ofileflags
[fd
] = 0; /* XXX needed? */
1376 * Check to see whether n user file descriptors are available to the process
1380 fdavail(struct thread
*td
, int n
)
1382 struct proc
*p
= td
->td_proc
;
1383 struct filedesc
*fdp
= td
->td_proc
->p_fd
;
1387 FILEDESC_LOCK_ASSERT(fdp
);
1390 lim
= min((int)lim_cur(p
, RLIMIT_NOFILE
), maxfilesperproc
);
1392 if ((i
= lim
- fdp
->fd_nfiles
) > 0 && (n
-= i
) <= 0)
1394 last
= min(fdp
->fd_nfiles
, lim
);
1395 fpp
= &fdp
->fd_ofiles
[fdp
->fd_freefile
];
1396 for (i
= last
- fdp
->fd_freefile
; --i
>= 0; fpp
++) {
1397 if (*fpp
== NULL
&& --n
<= 0)
1404 * Create a new open file structure and allocate a file decriptor for the
1405 * process that refers to it. We add one reference to the file for the
1406 * descriptor table and one reference for resultfp. This is to prevent us
1407 * being preempted and the entry in the descriptor table closed after we
1408 * release the FILEDESC lock.
1411 falloc(struct thread
*td
, struct file
**resultfp
, int *resultfd
)
1413 struct proc
*p
= td
->td_proc
;
1416 int maxuserfiles
= maxfiles
- (maxfiles
/ 20);
1417 static struct timeval lastfail
;
1420 fp
= uma_zalloc(file_zone
, M_WAITOK
| M_ZERO
);
1421 if ((openfiles
>= maxuserfiles
&&
1422 priv_check(td
, PRIV_MAXFILES
) != 0) ||
1423 openfiles
>= maxfiles
) {
1424 if (ppsratecheck(&lastfail
, &curfail
, 1)) {
1425 printf("kern.maxfiles limit exceeded by uid %i, please see tuning(7).\n",
1426 td
->td_ucred
->cr_ruid
);
1428 uma_zfree(file_zone
, fp
);
1431 atomic_add_int(&openfiles
, 1);
1434 * If the process has file descriptor zero open, add the new file
1435 * descriptor to the list of open files at that point, otherwise
1436 * put it at the front of the list of open files.
1438 refcount_init(&fp
->f_count
, 1);
1441 fp
->f_cred
= crhold(td
->td_ucred
);
1442 fp
->f_ops
= &badfileops
;
1445 FILEDESC_XLOCK(p
->p_fd
);
1446 if ((error
= fdalloc(td
, 0, &i
))) {
1447 FILEDESC_XUNLOCK(p
->p_fd
);
1453 p
->p_fd
->fd_ofiles
[i
] = fp
;
1454 FILEDESC_XUNLOCK(p
->p_fd
);
1463 * Build a new filedesc structure from another.
1464 * Copy the current, root, and jail root vnode references.
1467 fdinit(struct filedesc
*fdp
)
1469 struct filedesc0
*newfdp
;
1471 newfdp
= malloc(sizeof *newfdp
, M_FILEDESC
, M_WAITOK
| M_ZERO
);
1472 FILEDESC_LOCK_INIT(&newfdp
->fd_fd
);
1474 FILEDESC_XLOCK(fdp
);
1475 newfdp
->fd_fd
.fd_cdir
= fdp
->fd_cdir
;
1476 if (newfdp
->fd_fd
.fd_cdir
)
1477 VREF(newfdp
->fd_fd
.fd_cdir
);
1478 newfdp
->fd_fd
.fd_rdir
= fdp
->fd_rdir
;
1479 if (newfdp
->fd_fd
.fd_rdir
)
1480 VREF(newfdp
->fd_fd
.fd_rdir
);
1481 newfdp
->fd_fd
.fd_jdir
= fdp
->fd_jdir
;
1482 if (newfdp
->fd_fd
.fd_jdir
)
1483 VREF(newfdp
->fd_fd
.fd_jdir
);
1484 FILEDESC_XUNLOCK(fdp
);
1487 /* Create the file descriptor table. */
1488 newfdp
->fd_fd
.fd_refcnt
= 1;
1489 newfdp
->fd_fd
.fd_holdcnt
= 1;
1490 newfdp
->fd_fd
.fd_cmask
= CMASK
;
1491 newfdp
->fd_fd
.fd_ofiles
= newfdp
->fd_dfiles
;
1492 newfdp
->fd_fd
.fd_ofileflags
= newfdp
->fd_dfileflags
;
1493 newfdp
->fd_fd
.fd_nfiles
= NDFILE
;
1494 newfdp
->fd_fd
.fd_map
= newfdp
->fd_dmap
;
1495 newfdp
->fd_fd
.fd_lastfile
= -1;
1496 return (&newfdp
->fd_fd
);
1499 static struct filedesc
*
1500 fdhold(struct proc
*p
)
1502 struct filedesc
*fdp
;
1504 mtx_lock(&fdesc_mtx
);
1508 mtx_unlock(&fdesc_mtx
);
1513 fddrop(struct filedesc
*fdp
)
1517 mtx_lock(&fdesc_mtx
);
1518 i
= --fdp
->fd_holdcnt
;
1519 mtx_unlock(&fdesc_mtx
);
1523 FILEDESC_LOCK_DESTROY(fdp
);
1524 FREE(fdp
, M_FILEDESC
);
1528 * Share a filedesc structure.
1531 fdshare(struct filedesc
*fdp
)
1534 FILEDESC_XLOCK(fdp
);
1536 FILEDESC_XUNLOCK(fdp
);
1541 * Unshare a filedesc structure, if necessary by making a copy
1544 fdunshare(struct proc
*p
, struct thread
*td
)
1547 FILEDESC_XLOCK(p
->p_fd
);
1548 if (p
->p_fd
->fd_refcnt
> 1) {
1549 struct filedesc
*tmp
;
1551 FILEDESC_XUNLOCK(p
->p_fd
);
1552 tmp
= fdcopy(p
->p_fd
);
1556 FILEDESC_XUNLOCK(p
->p_fd
);
1560 * Copy a filedesc structure. A NULL pointer in returns a NULL reference,
1561 * this is to ease callers, not catch errors.
1564 fdcopy(struct filedesc
*fdp
)
1566 struct filedesc
*newfdp
;
1569 /* Certain daemons might not have file descriptors. */
1573 newfdp
= fdinit(fdp
);
1574 FILEDESC_SLOCK(fdp
);
1575 while (fdp
->fd_lastfile
>= newfdp
->fd_nfiles
) {
1576 FILEDESC_SUNLOCK(fdp
);
1577 FILEDESC_XLOCK(newfdp
);
1578 fdgrowtable(newfdp
, fdp
->fd_lastfile
+ 1);
1579 FILEDESC_XUNLOCK(newfdp
);
1580 FILEDESC_SLOCK(fdp
);
1582 /* copy everything except kqueue descriptors */
1583 newfdp
->fd_freefile
= -1;
1584 for (i
= 0; i
<= fdp
->fd_lastfile
; ++i
) {
1585 if (fdisused(fdp
, i
) &&
1586 fdp
->fd_ofiles
[i
]->f_type
!= DTYPE_KQUEUE
) {
1587 newfdp
->fd_ofiles
[i
] = fdp
->fd_ofiles
[i
];
1588 newfdp
->fd_ofileflags
[i
] = fdp
->fd_ofileflags
[i
];
1589 fhold(newfdp
->fd_ofiles
[i
]);
1590 newfdp
->fd_lastfile
= i
;
1592 if (newfdp
->fd_freefile
== -1)
1593 newfdp
->fd_freefile
= i
;
1596 FILEDESC_SUNLOCK(fdp
);
1597 FILEDESC_XLOCK(newfdp
);
1598 for (i
= 0; i
<= newfdp
->fd_lastfile
; ++i
)
1599 if (newfdp
->fd_ofiles
[i
] != NULL
)
1601 FILEDESC_XUNLOCK(newfdp
);
1602 FILEDESC_SLOCK(fdp
);
1603 if (newfdp
->fd_freefile
== -1)
1604 newfdp
->fd_freefile
= i
;
1605 newfdp
->fd_cmask
= fdp
->fd_cmask
;
1606 FILEDESC_SUNLOCK(fdp
);
1611 * Release a filedesc structure.
1614 fdfree(struct thread
*td
)
1616 struct filedesc
*fdp
;
1619 struct filedesc_to_leader
*fdtol
;
1621 struct vnode
*cdir
, *jdir
, *rdir
, *vp
;
1624 /* Certain daemons might not have file descriptors. */
1625 fdp
= td
->td_proc
->p_fd
;
1629 /* Check for special need to clear POSIX style locks */
1630 fdtol
= td
->td_proc
->p_fdtol
;
1631 if (fdtol
!= NULL
) {
1632 FILEDESC_XLOCK(fdp
);
1633 KASSERT(fdtol
->fdl_refcount
> 0,
1634 ("filedesc_to_refcount botch: fdl_refcount=%d",
1635 fdtol
->fdl_refcount
));
1636 if (fdtol
->fdl_refcount
== 1 &&
1637 (td
->td_proc
->p_leader
->p_flag
& P_ADVLOCK
) != 0) {
1638 for (i
= 0, fpp
= fdp
->fd_ofiles
;
1639 i
<= fdp
->fd_lastfile
;
1642 (*fpp
)->f_type
!= DTYPE_VNODE
)
1646 FILEDESC_XUNLOCK(fdp
);
1647 lf
.l_whence
= SEEK_SET
;
1650 lf
.l_type
= F_UNLCK
;
1652 locked
= VFS_LOCK_GIANT(vp
->v_mount
);
1653 (void) VOP_ADVLOCK(vp
,
1654 (caddr_t
)td
->td_proc
->
1659 VFS_UNLOCK_GIANT(locked
);
1660 FILEDESC_XLOCK(fdp
);
1662 fpp
= fdp
->fd_ofiles
+ i
;
1666 if (fdtol
->fdl_refcount
== 1) {
1667 if (fdp
->fd_holdleaderscount
> 0 &&
1668 (td
->td_proc
->p_leader
->p_flag
& P_ADVLOCK
) != 0) {
1670 * close() or do_dup() has cleared a reference
1671 * in a shared file descriptor table.
1673 fdp
->fd_holdleaderswakeup
= 1;
1674 sx_sleep(&fdp
->fd_holdleaderscount
,
1675 FILEDESC_LOCK(fdp
), PLOCK
, "fdlhold", 0);
1678 if (fdtol
->fdl_holdcount
> 0) {
1680 * Ensure that fdtol->fdl_leader remains
1681 * valid in closef().
1683 fdtol
->fdl_wakeup
= 1;
1684 sx_sleep(fdtol
, FILEDESC_LOCK(fdp
), PLOCK
,
1689 fdtol
->fdl_refcount
--;
1690 if (fdtol
->fdl_refcount
== 0 &&
1691 fdtol
->fdl_holdcount
== 0) {
1692 fdtol
->fdl_next
->fdl_prev
= fdtol
->fdl_prev
;
1693 fdtol
->fdl_prev
->fdl_next
= fdtol
->fdl_next
;
1696 td
->td_proc
->p_fdtol
= NULL
;
1697 FILEDESC_XUNLOCK(fdp
);
1699 FREE(fdtol
, M_FILEDESC_TO_LEADER
);
1701 FILEDESC_XLOCK(fdp
);
1702 i
= --fdp
->fd_refcnt
;
1703 FILEDESC_XUNLOCK(fdp
);
1707 * We are the last reference to the structure, so we can
1708 * safely assume it will not change out from under us.
1710 fpp
= fdp
->fd_ofiles
;
1711 for (i
= fdp
->fd_lastfile
; i
-- >= 0; fpp
++) {
1713 (void) closef(*fpp
, td
);
1715 FILEDESC_XLOCK(fdp
);
1717 /* XXX This should happen earlier. */
1718 mtx_lock(&fdesc_mtx
);
1719 td
->td_proc
->p_fd
= NULL
;
1720 mtx_unlock(&fdesc_mtx
);
1722 if (fdp
->fd_nfiles
> NDFILE
)
1723 FREE(fdp
->fd_ofiles
, M_FILEDESC
);
1724 if (NDSLOTS(fdp
->fd_nfiles
) > NDSLOTS(NDFILE
))
1725 FREE(fdp
->fd_map
, M_FILEDESC
);
1729 cdir
= fdp
->fd_cdir
;
1730 fdp
->fd_cdir
= NULL
;
1731 rdir
= fdp
->fd_rdir
;
1732 fdp
->fd_rdir
= NULL
;
1733 jdir
= fdp
->fd_jdir
;
1734 fdp
->fd_jdir
= NULL
;
1735 FILEDESC_XUNLOCK(fdp
);
1738 locked
= VFS_LOCK_GIANT(cdir
->v_mount
);
1740 VFS_UNLOCK_GIANT(locked
);
1743 locked
= VFS_LOCK_GIANT(rdir
->v_mount
);
1745 VFS_UNLOCK_GIANT(locked
);
1748 locked
= VFS_LOCK_GIANT(jdir
->v_mount
);
1750 VFS_UNLOCK_GIANT(locked
);
1757 * For setugid programs, we don't want to people to use that setugidness
1758 * to generate error messages which write to a file which otherwise would
1759 * otherwise be off-limits to the process. We check for filesystems where
1760 * the vnode can change out from under us after execve (like [lin]procfs).
1762 * Since setugidsafety calls this only for fd 0, 1 and 2, this check is
1763 * sufficient. We also don't check for setugidness since we know we are.
1766 is_unsafe(struct file
*fp
)
1768 if (fp
->f_type
== DTYPE_VNODE
) {
1769 struct vnode
*vp
= fp
->f_vnode
;
1771 if ((vp
->v_vflag
& VV_PROCDEP
) != 0)
1778 * Make this setguid thing safe, if at all possible.
1781 setugidsafety(struct thread
*td
)
1783 struct filedesc
*fdp
;
1786 /* Certain daemons might not have file descriptors. */
1787 fdp
= td
->td_proc
->p_fd
;
1792 * Note: fdp->fd_ofiles may be reallocated out from under us while
1793 * we are blocked in a close. Be careful!
1795 FILEDESC_XLOCK(fdp
);
1796 for (i
= 0; i
<= fdp
->fd_lastfile
; i
++) {
1799 if (fdp
->fd_ofiles
[i
] && is_unsafe(fdp
->fd_ofiles
[i
])) {
1802 knote_fdclose(td
, i
);
1804 * NULL-out descriptor prior to close to avoid
1805 * a race while close blocks.
1807 fp
= fdp
->fd_ofiles
[i
];
1808 fdp
->fd_ofiles
[i
] = NULL
;
1809 fdp
->fd_ofileflags
[i
] = 0;
1811 FILEDESC_XUNLOCK(fdp
);
1812 (void) closef(fp
, td
);
1813 FILEDESC_XLOCK(fdp
);
1816 FILEDESC_XUNLOCK(fdp
);
1820 * If a specific file object occupies a specific file descriptor, close the
1821 * file descriptor entry and drop a reference on the file object. This is a
1822 * convenience function to handle a subsequent error in a function that calls
1823 * falloc() that handles the race that another thread might have closed the
1824 * file descriptor out from under the thread creating the file object.
1827 fdclose(struct filedesc
*fdp
, struct file
*fp
, int idx
, struct thread
*td
)
1830 FILEDESC_XLOCK(fdp
);
1831 if (fdp
->fd_ofiles
[idx
] == fp
) {
1832 fdp
->fd_ofiles
[idx
] = NULL
;
1834 FILEDESC_XUNLOCK(fdp
);
1837 FILEDESC_XUNLOCK(fdp
);
1841 * Close any files on exec?
1844 fdcloseexec(struct thread
*td
)
1846 struct filedesc
*fdp
;
1849 /* Certain daemons might not have file descriptors. */
1850 fdp
= td
->td_proc
->p_fd
;
1854 FILEDESC_XLOCK(fdp
);
1857 * We cannot cache fd_ofiles or fd_ofileflags since operations
1858 * may block and rip them out from under us.
1860 for (i
= 0; i
<= fdp
->fd_lastfile
; i
++) {
1861 if (fdp
->fd_ofiles
[i
] != NULL
&&
1862 (fdp
->fd_ofiles
[i
]->f_type
== DTYPE_MQUEUE
||
1863 (fdp
->fd_ofileflags
[i
] & UF_EXCLOSE
))) {
1866 knote_fdclose(td
, i
);
1868 * NULL-out descriptor prior to close to avoid
1869 * a race while close blocks.
1871 fp
= fdp
->fd_ofiles
[i
];
1872 fdp
->fd_ofiles
[i
] = NULL
;
1873 fdp
->fd_ofileflags
[i
] = 0;
1875 if (fp
->f_type
== DTYPE_MQUEUE
)
1876 mq_fdclose(td
, i
, fp
);
1877 FILEDESC_XUNLOCK(fdp
);
1878 (void) closef(fp
, td
);
1879 FILEDESC_XLOCK(fdp
);
1882 FILEDESC_XUNLOCK(fdp
);
1886 * It is unsafe for set[ug]id processes to be started with file
1887 * descriptors 0..2 closed, as these descriptors are given implicit
1888 * significance in the Standard C library. fdcheckstd() will create a
1889 * descriptor referencing /dev/null for each of stdin, stdout, and
1890 * stderr that is not already open.
1893 fdcheckstd(struct thread
*td
)
1895 struct filedesc
*fdp
;
1896 register_t retval
, save
;
1897 int i
, error
, devnull
;
1899 fdp
= td
->td_proc
->p_fd
;
1902 KASSERT(fdp
->fd_refcnt
== 1, ("the fdtable should not be shared"));
1905 for (i
= 0; i
< 3; i
++) {
1906 if (fdp
->fd_ofiles
[i
] != NULL
)
1909 save
= td
->td_retval
[0];
1910 error
= kern_open(td
, "/dev/null", UIO_SYSSPACE
,
1912 devnull
= td
->td_retval
[0];
1913 KASSERT(devnull
== i
, ("oof, we didn't get our fd"));
1914 td
->td_retval
[0] = save
;
1918 error
= do_dup(td
, DUP_FIXED
, devnull
, i
, &retval
);
1927 * Internal form of close. Decrement reference count on file structure.
1928 * Note: td may be NULL when closing a file that was being passed in a
1931 * XXXRW: Giant is not required for the caller, but often will be held; this
1932 * makes it moderately likely the Giant will be recursed in the VFS case.
1935 closef(struct file
*fp
, struct thread
*td
)
1939 struct filedesc_to_leader
*fdtol
;
1940 struct filedesc
*fdp
;
1943 * POSIX record locking dictates that any close releases ALL
1944 * locks owned by this process. This is handled by setting
1945 * a flag in the unlock to free ONLY locks obeying POSIX
1946 * semantics, and not to free BSD-style file locks.
1947 * If the descriptor was in a message, POSIX-style locks
1948 * aren't passed with the descriptor, and the thread pointer
1949 * will be NULL. Callers should be careful only to pass a
1950 * NULL thread pointer when there really is no owning
1951 * context that might have locks, or the locks will be
1954 if (fp
->f_type
== DTYPE_VNODE
&& td
!= NULL
) {
1958 vfslocked
= VFS_LOCK_GIANT(vp
->v_mount
);
1959 if ((td
->td_proc
->p_leader
->p_flag
& P_ADVLOCK
) != 0) {
1960 lf
.l_whence
= SEEK_SET
;
1963 lf
.l_type
= F_UNLCK
;
1964 (void) VOP_ADVLOCK(vp
, (caddr_t
)td
->td_proc
->p_leader
,
1965 F_UNLCK
, &lf
, F_POSIX
);
1967 fdtol
= td
->td_proc
->p_fdtol
;
1968 if (fdtol
!= NULL
) {
1970 * Handle special case where file descriptor table is
1971 * shared between multiple process leaders.
1973 fdp
= td
->td_proc
->p_fd
;
1974 FILEDESC_XLOCK(fdp
);
1975 for (fdtol
= fdtol
->fdl_next
;
1976 fdtol
!= td
->td_proc
->p_fdtol
;
1977 fdtol
= fdtol
->fdl_next
) {
1978 if ((fdtol
->fdl_leader
->p_flag
&
1981 fdtol
->fdl_holdcount
++;
1982 FILEDESC_XUNLOCK(fdp
);
1983 lf
.l_whence
= SEEK_SET
;
1986 lf
.l_type
= F_UNLCK
;
1988 (void) VOP_ADVLOCK(vp
,
1989 (caddr_t
)fdtol
->fdl_leader
,
1990 F_UNLCK
, &lf
, F_POSIX
);
1991 FILEDESC_XLOCK(fdp
);
1992 fdtol
->fdl_holdcount
--;
1993 if (fdtol
->fdl_holdcount
== 0 &&
1994 fdtol
->fdl_wakeup
!= 0) {
1995 fdtol
->fdl_wakeup
= 0;
1999 FILEDESC_XUNLOCK(fdp
);
2001 VFS_UNLOCK_GIANT(vfslocked
);
2003 return (fdrop(fp
, td
));
2007 * Initialize the file pointer with the specified properties.
2009 * The ops are set with release semantics to be certain that the flags, type,
2010 * and data are visible when ops is. This is to prevent ops methods from being
2011 * called with bad data.
2014 finit(struct file
*fp
, u_int flag
, short type
, void *data
, struct fileops
*ops
)
2019 atomic_store_rel_ptr((volatile uintptr_t *)&fp
->f_ops
, (uintptr_t)ops
);
2024 * Extract the file pointer associated with the specified descriptor for the
2025 * current user process.
2027 * If the descriptor doesn't exist, EBADF is returned.
2029 * If the descriptor exists but doesn't match 'flags' then return EBADF for
2030 * read attempts and EINVAL for write attempts.
2032 * If 'hold' is set (non-zero) the file's refcount will be bumped on return.
2033 * It should be dropped with fdrop(). If it is not set, then the refcount
2034 * will not be bumped however the thread's filedesc struct will be returned
2035 * locked (for fgetsock).
2037 * If an error occured the non-zero error is returned and *fpp is set to
2038 * NULL. Otherwise *fpp is set and zero is returned.
2041 _fget(struct thread
*td
, int fd
, struct file
**fpp
, int flags
, int hold
)
2043 struct filedesc
*fdp
;
2047 if (td
== NULL
|| (fdp
= td
->td_proc
->p_fd
) == NULL
)
2049 FILEDESC_SLOCK(fdp
);
2050 if ((fp
= fget_locked(fdp
, fd
)) == NULL
|| fp
->f_ops
== &badfileops
) {
2051 FILEDESC_SUNLOCK(fdp
);
2056 * FREAD and FWRITE failure return EBADF as per POSIX.
2058 * Only one flag, or 0, may be specified.
2060 if (flags
== FREAD
&& (fp
->f_flag
& FREAD
) == 0) {
2061 FILEDESC_SUNLOCK(fdp
);
2064 if (flags
== FWRITE
&& (fp
->f_flag
& FWRITE
) == 0) {
2065 FILEDESC_SUNLOCK(fdp
);
2070 FILEDESC_SUNLOCK(fdp
);
2077 fget(struct thread
*td
, int fd
, struct file
**fpp
)
2080 return(_fget(td
, fd
, fpp
, 0, 1));
2084 fget_read(struct thread
*td
, int fd
, struct file
**fpp
)
2087 return(_fget(td
, fd
, fpp
, FREAD
, 1));
2091 fget_write(struct thread
*td
, int fd
, struct file
**fpp
)
2094 return(_fget(td
, fd
, fpp
, FWRITE
, 1));
2098 * Like fget() but loads the underlying vnode, or returns an error if the
2099 * descriptor does not represent a vnode. Note that pipes use vnodes but
2100 * never have VM objects. The returned vnode will be vref()'d.
2102 * XXX: what about the unused flags ?
2105 _fgetvp(struct thread
*td
, int fd
, struct vnode
**vpp
, int flags
)
2111 if ((error
= _fget(td
, fd
, &fp
, flags
, 0)) != 0)
2113 if (fp
->f_vnode
== NULL
) {
2119 FILEDESC_SUNLOCK(td
->td_proc
->p_fd
);
2124 fgetvp(struct thread
*td
, int fd
, struct vnode
**vpp
)
2127 return (_fgetvp(td
, fd
, vpp
, 0));
2131 fgetvp_read(struct thread
*td
, int fd
, struct vnode
**vpp
)
2134 return (_fgetvp(td
, fd
, vpp
, FREAD
));
2139 fgetvp_write(struct thread
*td
, int fd
, struct vnode
**vpp
)
2142 return (_fgetvp(td
, fd
, vpp
, FWRITE
));
2147 * Like fget() but loads the underlying socket, or returns an error if the
2148 * descriptor does not represent a socket.
2150 * We bump the ref count on the returned socket. XXX Also obtain the SX lock
2153 * XXXRW: fgetsock() and fputsock() are deprecated, as consumers should rely
2154 * on their file descriptor reference to prevent the socket from being free'd
2158 fgetsock(struct thread
*td
, int fd
, struct socket
**spp
, u_int
*fflagp
)
2166 if ((error
= _fget(td
, fd
, &fp
, 0, 0)) != 0)
2168 if (fp
->f_type
!= DTYPE_SOCKET
) {
2173 *fflagp
= fp
->f_flag
;
2178 FILEDESC_SUNLOCK(td
->td_proc
->p_fd
);
2183 * Drop the reference count on the socket and XXX release the SX lock in the
2184 * future. The last reference closes the socket.
2186 * XXXRW: fputsock() is deprecated, see comment for fgetsock().
2189 fputsock(struct socket
*so
)
2198 * Handle the last reference to a file being closed.
2201 _fdrop(struct file
*fp
, struct thread
*td
)
2206 if (fp
->f_count
!= 0)
2207 panic("fdrop: count %d", fp
->f_count
);
2208 if (fp
->f_ops
!= &badfileops
)
2209 error
= fo_close(fp
, td
);
2211 * The f_cdevpriv cannot be assigned non-NULL value while we
2212 * are destroying the file.
2214 if (fp
->f_cdevpriv
!= NULL
)
2216 atomic_subtract_int(&openfiles
, 1);
2218 uma_zfree(file_zone
, fp
);
2224 * Apply an advisory lock on a file descriptor.
2226 * Just attempt to get a record lock of the requested type on the entire file
2227 * (l_whence = SEEK_SET, l_start = 0, l_len = 0).
2229 #ifndef _SYS_SYSPROTO_H_
2237 flock(struct thread
*td
, struct flock_args
*uap
)
2245 if ((error
= fget(td
, uap
->fd
, &fp
)) != 0)
2247 if (fp
->f_type
!= DTYPE_VNODE
) {
2249 return (EOPNOTSUPP
);
2253 vfslocked
= VFS_LOCK_GIANT(vp
->v_mount
);
2254 lf
.l_whence
= SEEK_SET
;
2257 if (uap
->how
& LOCK_UN
) {
2258 lf
.l_type
= F_UNLCK
;
2259 atomic_clear_int(&fp
->f_flag
, FHASLOCK
);
2260 error
= VOP_ADVLOCK(vp
, (caddr_t
)fp
, F_UNLCK
, &lf
, F_FLOCK
);
2263 if (uap
->how
& LOCK_EX
)
2264 lf
.l_type
= F_WRLCK
;
2265 else if (uap
->how
& LOCK_SH
)
2266 lf
.l_type
= F_RDLCK
;
2271 atomic_set_int(&fp
->f_flag
, FHASLOCK
);
2272 error
= VOP_ADVLOCK(vp
, (caddr_t
)fp
, F_SETLK
, &lf
,
2273 (uap
->how
& LOCK_NB
) ? F_FLOCK
: F_FLOCK
| F_WAIT
);
2276 VFS_UNLOCK_GIANT(vfslocked
);
2280 * Duplicate the specified descriptor to a free descriptor.
2283 dupfdopen(struct thread
*td
, struct filedesc
*fdp
, int indx
, int dfd
, int mode
, int error
)
2289 * If the to-be-dup'd fd number is greater than the allowed number
2290 * of file descriptors, or the fd to be dup'd has already been
2291 * closed, then reject.
2293 FILEDESC_XLOCK(fdp
);
2294 if (dfd
< 0 || dfd
>= fdp
->fd_nfiles
||
2295 (wfp
= fdp
->fd_ofiles
[dfd
]) == NULL
) {
2296 FILEDESC_XUNLOCK(fdp
);
2301 * There are two cases of interest here.
2303 * For ENODEV simply dup (dfd) to file descriptor (indx) and return.
2305 * For ENXIO steal away the file structure from (dfd) and store it in
2306 * (indx). (dfd) is effectively closed by this operation.
2308 * Any other error code is just returned.
2313 * Check that the mode the file is being opened for is a
2314 * subset of the mode of the existing descriptor.
2316 if (((mode
& (FREAD
|FWRITE
)) | wfp
->f_flag
) != wfp
->f_flag
) {
2317 FILEDESC_XUNLOCK(fdp
);
2320 fp
= fdp
->fd_ofiles
[indx
];
2321 fdp
->fd_ofiles
[indx
] = wfp
;
2322 fdp
->fd_ofileflags
[indx
] = fdp
->fd_ofileflags
[dfd
];
2326 FILEDESC_XUNLOCK(fdp
);
2329 * We now own the reference to fp that the ofiles[]
2330 * array used to own. Release it.
2337 * Steal away the file pointer from dfd and stuff it into indx.
2339 fp
= fdp
->fd_ofiles
[indx
];
2340 fdp
->fd_ofiles
[indx
] = fdp
->fd_ofiles
[dfd
];
2341 fdp
->fd_ofiles
[dfd
] = NULL
;
2342 fdp
->fd_ofileflags
[indx
] = fdp
->fd_ofileflags
[dfd
];
2343 fdp
->fd_ofileflags
[dfd
] = 0;
2347 FILEDESC_XUNLOCK(fdp
);
2350 * We now own the reference to fp that the ofiles[] array
2351 * used to own. Release it.
2358 FILEDESC_XUNLOCK(fdp
);
2365 * Scan all active processes to see if any of them have a current or root
2366 * directory of `olddp'. If so, replace them with the new mount point.
2369 mountcheckdirs(struct vnode
*olddp
, struct vnode
*newdp
)
2371 struct filedesc
*fdp
;
2375 if (vrefcnt(olddp
) == 1)
2377 sx_slock(&allproc_lock
);
2378 FOREACH_PROC_IN_SYSTEM(p
) {
2383 FILEDESC_XLOCK(fdp
);
2384 if (fdp
->fd_cdir
== olddp
) {
2386 fdp
->fd_cdir
= newdp
;
2389 if (fdp
->fd_rdir
== olddp
) {
2391 fdp
->fd_rdir
= newdp
;
2394 FILEDESC_XUNLOCK(fdp
);
2399 sx_sunlock(&allproc_lock
);
2400 if (rootvnode
== olddp
) {
2407 struct filedesc_to_leader
*
2408 filedesc_to_leader_alloc(struct filedesc_to_leader
*old
, struct filedesc
*fdp
, struct proc
*leader
)
2410 struct filedesc_to_leader
*fdtol
;
2412 MALLOC(fdtol
, struct filedesc_to_leader
*,
2413 sizeof(struct filedesc_to_leader
),
2414 M_FILEDESC_TO_LEADER
,
2416 fdtol
->fdl_refcount
= 1;
2417 fdtol
->fdl_holdcount
= 0;
2418 fdtol
->fdl_wakeup
= 0;
2419 fdtol
->fdl_leader
= leader
;
2421 FILEDESC_XLOCK(fdp
);
2422 fdtol
->fdl_next
= old
->fdl_next
;
2423 fdtol
->fdl_prev
= old
;
2424 old
->fdl_next
= fdtol
;
2425 fdtol
->fdl_next
->fdl_prev
= fdtol
;
2426 FILEDESC_XUNLOCK(fdp
);
2428 fdtol
->fdl_next
= fdtol
;
2429 fdtol
->fdl_prev
= fdtol
;
2435 * Get file structures globally.
2438 sysctl_kern_file(SYSCTL_HANDLER_ARGS
)
2441 struct filedesc
*fdp
;
2446 error
= sysctl_wire_old_buffer(req
, 0);
2449 if (req
->oldptr
== NULL
) {
2451 sx_slock(&allproc_lock
);
2452 FOREACH_PROC_IN_SYSTEM(p
) {
2453 if (p
->p_state
== PRS_NEW
)
2458 /* overestimates sparse tables. */
2459 if (fdp
->fd_lastfile
> 0)
2460 n
+= fdp
->fd_lastfile
;
2463 sx_sunlock(&allproc_lock
);
2464 return (SYSCTL_OUT(req
, 0, n
* sizeof(xf
)));
2467 bzero(&xf
, sizeof(xf
));
2468 xf
.xf_size
= sizeof(xf
);
2469 sx_slock(&allproc_lock
);
2470 FOREACH_PROC_IN_SYSTEM(p
) {
2471 if (p
->p_state
== PRS_NEW
)
2474 if (p_cansee(req
->td
, p
) != 0) {
2478 xf
.xf_pid
= p
->p_pid
;
2479 xf
.xf_uid
= p
->p_ucred
->cr_uid
;
2484 FILEDESC_SLOCK(fdp
);
2485 for (n
= 0; fdp
->fd_refcnt
> 0 && n
< fdp
->fd_nfiles
; ++n
) {
2486 if ((fp
= fdp
->fd_ofiles
[n
]) == NULL
)
2490 xf
.xf_data
= fp
->f_data
;
2491 xf
.xf_vnode
= fp
->f_vnode
;
2492 xf
.xf_type
= fp
->f_type
;
2493 xf
.xf_count
= fp
->f_count
;
2495 xf
.xf_offset
= fp
->f_offset
;
2496 xf
.xf_flag
= fp
->f_flag
;
2497 error
= SYSCTL_OUT(req
, &xf
, sizeof(xf
));
2501 FILEDESC_SUNLOCK(fdp
);
2506 sx_sunlock(&allproc_lock
);
2510 SYSCTL_PROC(_kern
, KERN_FILE
, file
, CTLTYPE_OPAQUE
|CTLFLAG_RD
,
2511 0, 0, sysctl_kern_file
, "S,xfile", "Entire file table");
2514 export_vnode_for_sysctl(struct vnode
*vp
, int type
,
2515 struct kinfo_file
*kif
, struct filedesc
*fdp
, struct sysctl_req
*req
)
2518 char *fullpath
, *freepath
;
2521 bzero(kif
, sizeof(*kif
));
2522 kif
->kf_structsize
= sizeof(*kif
);
2526 kif
->kf_type
= KF_TYPE_VNODE
;
2527 /* This function only handles directories. */
2528 KASSERT(vp
->v_type
== VDIR
, ("export_vnode_for_sysctl: vnode not directory"));
2529 kif
->kf_vnode_type
= KF_VTYPE_VDIR
;
2532 * This is not a true file descriptor, so we set a bogus refcount
2533 * and offset to indicate these fields should be ignored.
2535 kif
->kf_ref_count
= -1;
2536 kif
->kf_offset
= -1;
2540 FILEDESC_SUNLOCK(fdp
);
2541 vfslocked
= VFS_LOCK_GIANT(vp
->v_mount
);
2542 vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
);
2543 vn_fullpath(curthread
, vp
, &fullpath
, &freepath
);
2545 VFS_UNLOCK_GIANT(vfslocked
);
2546 strlcpy(kif
->kf_path
, fullpath
, sizeof(kif
->kf_path
));
2547 if (freepath
!= NULL
)
2548 free(freepath
, M_TEMP
);
2549 error
= SYSCTL_OUT(req
, kif
, sizeof(*kif
));
2550 FILEDESC_SLOCK(fdp
);
2555 * Get per-process file descriptors for use by procstat(1), et al.
2558 sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS
)
2560 char *fullpath
, *freepath
;
2561 struct kinfo_file
*kif
;
2562 struct filedesc
*fdp
;
2563 int error
, i
, *name
;
2572 if ((p
= pfind((pid_t
)name
[0])) == NULL
)
2574 if ((error
= p_candebug(curthread
, p
))) {
2582 kif
= malloc(sizeof(*kif
), M_TEMP
, M_WAITOK
);
2583 FILEDESC_SLOCK(fdp
);
2584 if (fdp
->fd_cdir
!= NULL
)
2585 export_vnode_for_sysctl(fdp
->fd_cdir
, KF_FD_TYPE_CWD
, kif
,
2587 if (fdp
->fd_rdir
!= NULL
)
2588 export_vnode_for_sysctl(fdp
->fd_rdir
, KF_FD_TYPE_ROOT
, kif
,
2590 if (fdp
->fd_jdir
!= NULL
)
2591 export_vnode_for_sysctl(fdp
->fd_jdir
, KF_FD_TYPE_JAIL
, kif
,
2593 for (i
= 0; i
< fdp
->fd_nfiles
; i
++) {
2594 if ((fp
= fdp
->fd_ofiles
[i
]) == NULL
)
2596 bzero(kif
, sizeof(*kif
));
2597 kif
->kf_structsize
= sizeof(*kif
);
2602 switch (fp
->f_type
) {
2604 kif
->kf_type
= KF_TYPE_VNODE
;
2609 kif
->kf_type
= KF_TYPE_SOCKET
;
2614 kif
->kf_type
= KF_TYPE_PIPE
;
2618 kif
->kf_type
= KF_TYPE_FIFO
;
2624 kif
->kf_type
= KF_TYPE_KQUEUE
;
2628 kif
->kf_type
= KF_TYPE_CRYPTO
;
2632 kif
->kf_type
= KF_TYPE_MQUEUE
;
2636 kif
->kf_type
= KF_TYPE_SHM
;
2640 kif
->kf_type
= KF_TYPE_SEM
;
2644 kif
->kf_type
= KF_TYPE_PTS
;
2649 kif
->kf_type
= KF_TYPE_UNKNOWN
;
2652 kif
->kf_ref_count
= fp
->f_count
;
2653 if (fp
->f_flag
& FREAD
)
2654 kif
->kf_flags
|= KF_FLAG_READ
;
2655 if (fp
->f_flag
& FWRITE
)
2656 kif
->kf_flags
|= KF_FLAG_WRITE
;
2657 if (fp
->f_flag
& FAPPEND
)
2658 kif
->kf_flags
|= KF_FLAG_APPEND
;
2659 if (fp
->f_flag
& FASYNC
)
2660 kif
->kf_flags
|= KF_FLAG_ASYNC
;
2661 if (fp
->f_flag
& FFSYNC
)
2662 kif
->kf_flags
|= KF_FLAG_FSYNC
;
2663 if (fp
->f_flag
& FNONBLOCK
)
2664 kif
->kf_flags
|= KF_FLAG_NONBLOCK
;
2665 if (fp
->f_flag
& O_DIRECT
)
2666 kif
->kf_flags
|= KF_FLAG_DIRECT
;
2667 if (fp
->f_flag
& FHASLOCK
)
2668 kif
->kf_flags
|= KF_FLAG_HASLOCK
;
2669 kif
->kf_offset
= fp
->f_offset
;
2672 switch (vp
->v_type
) {
2674 kif
->kf_vnode_type
= KF_VTYPE_VNON
;
2677 kif
->kf_vnode_type
= KF_VTYPE_VREG
;
2680 kif
->kf_vnode_type
= KF_VTYPE_VDIR
;
2683 kif
->kf_vnode_type
= KF_VTYPE_VBLK
;
2686 kif
->kf_vnode_type
= KF_VTYPE_VCHR
;
2689 kif
->kf_vnode_type
= KF_VTYPE_VLNK
;
2692 kif
->kf_vnode_type
= KF_VTYPE_VSOCK
;
2695 kif
->kf_vnode_type
= KF_VTYPE_VFIFO
;
2698 kif
->kf_vnode_type
= KF_VTYPE_VBAD
;
2701 kif
->kf_vnode_type
= KF_VTYPE_UNKNOWN
;
2705 * It is OK to drop the filedesc lock here as we will
2706 * re-validate and re-evaluate its properties when
2707 * the loop continues.
2711 FILEDESC_SUNLOCK(fdp
);
2712 vfslocked
= VFS_LOCK_GIANT(vp
->v_mount
);
2713 vn_lock(vp
, LK_EXCLUSIVE
| LK_RETRY
);
2714 vn_fullpath(curthread
, vp
, &fullpath
, &freepath
);
2716 VFS_UNLOCK_GIANT(vfslocked
);
2717 strlcpy(kif
->kf_path
, fullpath
,
2718 sizeof(kif
->kf_path
));
2719 if (freepath
!= NULL
)
2720 free(freepath
, M_TEMP
);
2721 FILEDESC_SLOCK(fdp
);
2724 struct sockaddr
*sa
;
2726 if (so
->so_proto
->pr_usrreqs
->pru_sockaddr(so
, &sa
)
2727 == 0 && sa
->sa_len
<= sizeof(kif
->kf_sa_local
)) {
2728 bcopy(sa
, &kif
->kf_sa_local
, sa
->sa_len
);
2731 if (so
->so_proto
->pr_usrreqs
->pru_peeraddr(so
, &sa
)
2732 == 00 && sa
->sa_len
<= sizeof(kif
->kf_sa_peer
)) {
2733 bcopy(sa
, &kif
->kf_sa_peer
, sa
->sa_len
);
2736 kif
->kf_sock_domain
=
2737 so
->so_proto
->pr_domain
->dom_family
;
2738 kif
->kf_sock_type
= so
->so_type
;
2739 kif
->kf_sock_protocol
= so
->so_proto
->pr_protocol
;
2742 strlcpy(kif
->kf_path
, tty_devname(tp
),
2743 sizeof(kif
->kf_path
));
2745 error
= SYSCTL_OUT(req
, kif
, sizeof(*kif
));
2749 FILEDESC_SUNLOCK(fdp
);
2755 static SYSCTL_NODE(_kern_proc
, KERN_PROC_FILEDESC
, filedesc
, CTLFLAG_RD
,
2756 sysctl_kern_proc_filedesc
, "Process filedesc entries");
2760 * For the purposes of debugging, generate a human-readable string for the
2764 file_type_to_name(short type
)
2794 * For the purposes of debugging, identify a process (if any, perhaps one of
2795 * many) that references the passed file in its file descriptor array. Return
2798 static struct proc
*
2799 file_to_first_proc(struct file
*fp
)
2801 struct filedesc
*fdp
;
2805 FOREACH_PROC_IN_SYSTEM(p
) {
2806 if (p
->p_state
== PRS_NEW
)
2811 for (n
= 0; n
< fdp
->fd_nfiles
; n
++) {
2812 if (fp
== fdp
->fd_ofiles
[n
])
2820 db_print_file(struct file
*fp
, int header
)
2825 db_printf("%8s %4s %8s %8s %4s %5s %6s %8s %5s %12s\n",
2826 "File", "Type", "Data", "Flag", "GCFl", "Count",
2827 "MCount", "Vnode", "FPID", "FCmd");
2828 p
= file_to_first_proc(fp
);
2829 db_printf("%8p %4s %8p %08x %04x %5d %6d %8p %5d %12s\n", fp
,
2830 file_type_to_name(fp
->f_type
), fp
->f_data
, fp
->f_flag
,
2831 0, fp
->f_count
, 0, fp
->f_vnode
,
2832 p
!= NULL
? p
->p_pid
: -1, p
!= NULL
? p
->p_comm
: "-");
2835 DB_SHOW_COMMAND(file
, db_show_file
)
2840 db_printf("usage: show file <addr>\n");
2843 fp
= (struct file
*)addr
;
2844 db_print_file(fp
, 1);
2847 DB_SHOW_COMMAND(files
, db_show_files
)
2849 struct filedesc
*fdp
;
2856 FOREACH_PROC_IN_SYSTEM(p
) {
2857 if (p
->p_state
== PRS_NEW
)
2859 if ((fdp
= p
->p_fd
) == NULL
)
2861 for (n
= 0; n
< fdp
->fd_nfiles
; ++n
) {
2862 if ((fp
= fdp
->fd_ofiles
[n
]) == NULL
)
2864 db_print_file(fp
, header
);
2871 SYSCTL_INT(_kern
, KERN_MAXFILESPERPROC
, maxfilesperproc
, CTLFLAG_RW
,
2872 &maxfilesperproc
, 0, "Maximum files allowed open per process");
2874 SYSCTL_INT(_kern
, KERN_MAXFILES
, maxfiles
, CTLFLAG_RW
,
2875 &maxfiles
, 0, "Maximum number of files");
2877 SYSCTL_INT(_kern
, OID_AUTO
, openfiles
, CTLFLAG_RD
,
2878 __DEVOLATILE(int *, &openfiles
), 0, "System-wide number of open files");
2882 filelistinit(void *dummy
)
2885 file_zone
= uma_zcreate("Files", sizeof(struct file
), NULL
, NULL
,
2886 NULL
, NULL
, UMA_ALIGN_PTR
, 0);
2887 mtx_init(&sigio_lock
, "sigio lock", NULL
, MTX_DEF
);
2888 mtx_init(&fdesc_mtx
, "fdesc", NULL
, MTX_DEF
);
2890 SYSINIT(select
, SI_SUB_LOCK
, SI_ORDER_FIRST
, filelistinit
, NULL
);
2892 /*-------------------------------------------------------------------*/
2895 badfo_readwrite(struct file
*fp
, struct uio
*uio
, struct ucred
*active_cred
, int flags
, struct thread
*td
)
2902 badfo_truncate(struct file
*fp
, off_t length
, struct ucred
*active_cred
, struct thread
*td
)
2909 badfo_ioctl(struct file
*fp
, u_long com
, void *data
, struct ucred
*active_cred
, struct thread
*td
)
2916 badfo_poll(struct file
*fp
, int events
, struct ucred
*active_cred
, struct thread
*td
)
2923 badfo_kqfilter(struct file
*fp
, struct knote
*kn
)
2930 badfo_stat(struct file
*fp
, struct stat
*sb
, struct ucred
*active_cred
, struct thread
*td
)
2937 badfo_close(struct file
*fp
, struct thread
*td
)
2943 struct fileops badfileops
= {
2944 .fo_read
= badfo_readwrite
,
2945 .fo_write
= badfo_readwrite
,
2946 .fo_truncate
= badfo_truncate
,
2947 .fo_ioctl
= badfo_ioctl
,
2948 .fo_poll
= badfo_poll
,
2949 .fo_kqfilter
= badfo_kqfilter
,
2950 .fo_stat
= badfo_stat
,
2951 .fo_close
= badfo_close
,
2955 /*-------------------------------------------------------------------*/
2958 * File Descriptor pseudo-device driver (/dev/fd/).
2960 * Opening minor device N dup()s the file (if any) connected to file
2961 * descriptor N belonging to the calling process. Note that this driver
2962 * consists of only the ``open()'' routine, because all subsequent
2963 * references to this file will be direct to the other driver.
2965 * XXX: we could give this one a cloning event handler if necessary.
2970 fdopen(struct cdev
*dev
, int mode
, int type
, struct thread
*td
)
2974 * XXX Kludge: set curthread->td_dupfd to contain the value of the
2975 * the file descriptor being sought for duplication. The error
2976 * return ensures that the vnode for this device will be released
2977 * by vn_open. Open will detect this special error and take the
2978 * actions in dupfdopen below. Other callers of vn_open or VOP_OPEN
2979 * will simply report the error.
2981 td
->td_dupfd
= dev2unit(dev
);
2985 static struct cdevsw fildesc_cdevsw
= {
2986 .d_version
= D_VERSION
,
2992 fildesc_drvinit(void *unused
)
2996 dev
= make_dev(&fildesc_cdevsw
, 0, UID_ROOT
, GID_WHEEL
, 0666, "fd/0");
2997 make_dev_alias(dev
, "stdin");
2998 dev
= make_dev(&fildesc_cdevsw
, 1, UID_ROOT
, GID_WHEEL
, 0666, "fd/1");
2999 make_dev_alias(dev
, "stdout");
3000 dev
= make_dev(&fildesc_cdevsw
, 2, UID_ROOT
, GID_WHEEL
, 0666, "fd/2");
3001 make_dev_alias(dev
, "stderr");
3004 SYSINIT(fildescdev
, SI_SUB_DRIVERS
, SI_ORDER_MIDDLE
, fildesc_drvinit
, NULL
);