kernel - remove FSMID support
[dragonfly.git] / sys / kern / vfs_vnops.c
blobd1c8575e8a5bc87d488b1b53b9073b04bade2125
1 /*
2 * Copyright (c) 1982, 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
38 * @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94
39 * $FreeBSD: src/sys/kern/vfs_vnops.c,v 1.87.2.13 2002/12/29 18:19:53 dillon Exp $
40 * $DragonFly: src/sys/kern/vfs_vnops.c,v 1.58 2008/06/28 17:59:49 dillon Exp $
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/fcntl.h>
46 #include <sys/file.h>
47 #include <sys/stat.h>
48 #include <sys/proc.h>
49 #include <sys/priv.h>
50 #include <sys/mount.h>
51 #include <sys/nlookup.h>
52 #include <sys/vnode.h>
53 #include <sys/buf.h>
54 #include <sys/filio.h>
55 #include <sys/ttycom.h>
56 #include <sys/conf.h>
57 #include <sys/sysctl.h>
58 #include <sys/syslog.h>
60 #include <sys/thread2.h>
61 #include <sys/mplock2.h>
63 static int vn_closefile (struct file *fp);
64 static int vn_ioctl (struct file *fp, u_long com, caddr_t data,
65 struct ucred *cred, struct sysmsg *msg);
66 static int vn_read (struct file *fp, struct uio *uio,
67 struct ucred *cred, int flags);
68 static int vn_poll (struct file *fp, int events, struct ucred *cred);
69 static int vn_kqfilter (struct file *fp, struct knote *kn);
70 static int vn_statfile (struct file *fp, struct stat *sb, struct ucred *cred);
71 static int vn_write (struct file *fp, struct uio *uio,
72 struct ucred *cred, int flags);
74 #ifdef SMP
75 static int read_mpsafe = 0;
76 SYSCTL_INT(_vfs, OID_AUTO, read_mpsafe, CTLFLAG_RW, &read_mpsafe, 0, "");
77 static int write_mpsafe = 0;
78 SYSCTL_INT(_vfs, OID_AUTO, write_mpsafe, CTLFLAG_RW, &write_mpsafe, 0, "");
79 static int getattr_mpsafe = 0;
80 SYSCTL_INT(_vfs, OID_AUTO, getattr_mpsafe, CTLFLAG_RW, &getattr_mpsafe, 0, "");
81 #else
82 #define read_mpsafe 0
83 #define write_mpsafe 0
84 #define getattr_mpsafe 0
85 #endif
87 struct fileops vnode_fileops = {
88 .fo_read = vn_read,
89 .fo_write = vn_write,
90 .fo_ioctl = vn_ioctl,
91 .fo_poll = vn_poll,
92 .fo_kqfilter = vn_kqfilter,
93 .fo_stat = vn_statfile,
94 .fo_close = vn_closefile,
95 .fo_shutdown = nofo_shutdown
99 * Common code for vnode open operations. Check permissions, and call
100 * the VOP_NOPEN or VOP_NCREATE routine.
102 * The caller is responsible for setting up nd with nlookup_init() and
103 * for cleaning it up with nlookup_done(), whether we return an error
104 * or not.
106 * On success nd->nl_open_vp will hold a referenced and, if requested,
107 * locked vnode. A locked vnode is requested via NLC_LOCKVP. If fp
108 * is non-NULL the vnode will be installed in the file pointer.
110 * NOTE: The vnode is referenced just once on return whether or not it
111 * is also installed in the file pointer.
114 vn_open(struct nlookupdata *nd, struct file *fp, int fmode, int cmode)
116 struct vnode *vp;
117 struct ucred *cred = nd->nl_cred;
118 struct vattr vat;
119 struct vattr *vap = &vat;
120 int error;
123 * Certain combinations are illegal
125 if ((fmode & (FWRITE | O_TRUNC)) == O_TRUNC)
126 return(EACCES);
129 * Lookup the path and create or obtain the vnode. After a
130 * successful lookup a locked nd->nl_nch will be returned.
132 * The result of this section should be a locked vnode.
134 * XXX with only a little work we should be able to avoid locking
135 * the vnode if FWRITE, O_CREAT, and O_TRUNC are *not* set.
137 nd->nl_flags |= NLC_OPEN;
138 if (fmode & O_APPEND)
139 nd->nl_flags |= NLC_APPEND;
140 if (fmode & O_TRUNC)
141 nd->nl_flags |= NLC_TRUNCATE;
142 if (fmode & FREAD)
143 nd->nl_flags |= NLC_READ;
144 if (fmode & FWRITE)
145 nd->nl_flags |= NLC_WRITE;
146 if ((fmode & O_EXCL) == 0 && (fmode & O_NOFOLLOW) == 0)
147 nd->nl_flags |= NLC_FOLLOW;
149 if (fmode & O_CREAT) {
151 * CONDITIONAL CREATE FILE CASE
153 * Setting NLC_CREATE causes a negative hit to store
154 * the negative hit ncp and not return an error. Then
155 * nc_error or nc_vp may be checked to see if the ncp
156 * represents a negative hit. NLC_CREATE also requires
157 * write permission on the governing directory or EPERM
158 * is returned.
160 nd->nl_flags |= NLC_CREATE;
161 nd->nl_flags |= NLC_REFDVP;
162 bwillinode(1);
163 error = nlookup(nd);
164 } else {
166 * NORMAL OPEN FILE CASE
168 error = nlookup(nd);
171 if (error)
172 return (error);
175 * split case to allow us to re-resolve and retry the ncp in case
176 * we get ESTALE.
178 again:
179 if (fmode & O_CREAT) {
180 if (nd->nl_nch.ncp->nc_vp == NULL) {
181 if ((error = ncp_writechk(&nd->nl_nch)) != 0)
182 return (error);
183 VATTR_NULL(vap);
184 vap->va_type = VREG;
185 vap->va_mode = cmode;
186 if (fmode & O_EXCL)
187 vap->va_vaflags |= VA_EXCLUSIVE;
188 error = VOP_NCREATE(&nd->nl_nch, nd->nl_dvp, &vp,
189 nd->nl_cred, vap);
190 if (error)
191 return (error);
192 fmode &= ~O_TRUNC;
193 /* locked vnode is returned */
194 } else {
195 if (fmode & O_EXCL) {
196 error = EEXIST;
197 } else {
198 error = cache_vget(&nd->nl_nch, cred,
199 LK_EXCLUSIVE, &vp);
201 if (error)
202 return (error);
203 fmode &= ~O_CREAT;
205 } else {
206 error = cache_vget(&nd->nl_nch, cred, LK_EXCLUSIVE, &vp);
207 if (error)
208 return (error);
212 * We have a locked vnode and ncp now. Note that the ncp will
213 * be cleaned up by the caller if nd->nl_nch is left intact.
215 if (vp->v_type == VLNK) {
216 error = EMLINK;
217 goto bad;
219 if (vp->v_type == VSOCK) {
220 error = EOPNOTSUPP;
221 goto bad;
223 if ((fmode & O_CREAT) == 0) {
224 if (fmode & (FWRITE | O_TRUNC)) {
225 if (vp->v_type == VDIR) {
226 error = EISDIR;
227 goto bad;
229 error = vn_writechk(vp, &nd->nl_nch);
230 if (error) {
232 * Special stale handling, re-resolve the
233 * vnode.
235 if (error == ESTALE) {
236 vput(vp);
237 vp = NULL;
238 cache_setunresolved(&nd->nl_nch);
239 error = cache_resolve(&nd->nl_nch, cred);
240 if (error == 0)
241 goto again;
243 goto bad;
247 if (fmode & O_TRUNC) {
248 vn_unlock(vp); /* XXX */
249 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */
250 VATTR_NULL(vap);
251 vap->va_size = 0;
252 error = VOP_SETATTR(vp, vap, cred);
253 if (error)
254 goto bad;
258 * Setup the fp so VOP_OPEN can override it. No descriptor has been
259 * associated with the fp yet so we own it clean.
261 * f_nchandle inherits nl_nch. This used to be necessary only for
262 * directories but now we do it unconditionally so f*() ops
263 * such as fchmod() can access the actual namespace that was
264 * used to open the file.
266 if (fp) {
267 if (nd->nl_flags & NLC_APPENDONLY)
268 fmode |= FAPPENDONLY;
269 fp->f_nchandle = nd->nl_nch;
270 cache_zero(&nd->nl_nch);
271 cache_unlock(&fp->f_nchandle);
275 * Get rid of nl_nch. vn_open does not return it (it returns the
276 * vnode or the file pointer). Note: we can't leave nl_nch locked
277 * through the VOP_OPEN anyway since the VOP_OPEN may block, e.g.
278 * on /dev/ttyd0
280 if (nd->nl_nch.ncp)
281 cache_put(&nd->nl_nch);
283 error = VOP_OPEN(vp, fmode, cred, fp);
284 if (error) {
286 * setting f_ops to &badfileops will prevent the descriptor
287 * code from trying to close and release the vnode, since
288 * the open failed we do not want to call close.
290 if (fp) {
291 fp->f_data = NULL;
292 fp->f_ops = &badfileops;
294 goto bad;
297 #if 0
299 * Assert that VREG files have been setup for vmio.
301 KASSERT(vp->v_type != VREG || vp->v_object != NULL,
302 ("vn_open: regular file was not VMIO enabled!"));
303 #endif
306 * Return the vnode. XXX needs some cleaning up. The vnode is
307 * only returned in the fp == NULL case.
309 if (fp == NULL) {
310 nd->nl_open_vp = vp;
311 nd->nl_vp_fmode = fmode;
312 if ((nd->nl_flags & NLC_LOCKVP) == 0)
313 vn_unlock(vp);
314 } else {
315 vput(vp);
317 return (0);
318 bad:
319 if (vp)
320 vput(vp);
321 return (error);
325 vn_opendisk(const char *devname, int fmode, struct vnode **vpp)
327 struct vnode *vp;
328 int error;
330 if (strncmp(devname, "/dev/", 5) == 0)
331 devname += 5;
332 if ((vp = getsynthvnode(devname)) == NULL) {
333 error = ENODEV;
334 } else {
335 error = VOP_OPEN(vp, fmode, proc0.p_ucred, NULL);
336 vn_unlock(vp);
337 if (error) {
338 vrele(vp);
339 vp = NULL;
342 *vpp = vp;
343 return (error);
347 * Check for write permissions on the specified vnode. nch may be NULL.
350 vn_writechk(struct vnode *vp, struct nchandle *nch)
353 * If there's shared text associated with
354 * the vnode, try to free it up once. If
355 * we fail, we can't allow writing.
357 if (vp->v_flag & VTEXT)
358 return (ETXTBSY);
361 * If the vnode represents a regular file, check the mount
362 * point via the nch. This may be a different mount point
363 * then the one embedded in the vnode (e.g. nullfs).
365 * We can still write to non-regular files (e.g. devices)
366 * via read-only mounts.
368 if (nch && nch->ncp && vp->v_type == VREG)
369 return (ncp_writechk(nch));
370 return (0);
374 * Check whether the underlying mount is read-only. The mount point
375 * referenced by the namecache may be different from the mount point
376 * used by the underlying vnode in the case of NULLFS, so a separate
377 * check is needed.
380 ncp_writechk(struct nchandle *nch)
382 if (nch->mount && (nch->mount->mnt_flag & MNT_RDONLY))
383 return (EROFS);
384 return(0);
388 * Vnode close call
391 vn_close(struct vnode *vp, int flags)
393 int error;
395 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
396 if (error == 0) {
397 error = VOP_CLOSE(vp, flags);
398 vn_unlock(vp);
400 vrele(vp);
401 return (error);
404 static __inline
406 sequential_heuristic(struct uio *uio, struct file *fp)
409 * Sequential heuristic - detect sequential operation
411 * NOTE: SMP: We allow f_seqcount updates to race.
413 if ((uio->uio_offset == 0 && fp->f_seqcount > 0) ||
414 uio->uio_offset == fp->f_nextoff) {
415 int tmpseq = fp->f_seqcount;
417 * XXX we assume that the filesystem block size is
418 * the default. Not true, but still gives us a pretty
419 * good indicator of how sequential the read operations
420 * are.
422 tmpseq += (uio->uio_resid + BKVASIZE - 1) / BKVASIZE;
423 if (tmpseq > IO_SEQMAX)
424 tmpseq = IO_SEQMAX;
425 fp->f_seqcount = tmpseq;
426 return(fp->f_seqcount << IO_SEQSHIFT);
430 * Not sequential, quick draw-down of seqcount
432 * NOTE: SMP: We allow f_seqcount updates to race.
434 if (fp->f_seqcount > 1)
435 fp->f_seqcount = 1;
436 else
437 fp->f_seqcount = 0;
438 return(0);
442 * get - lock and return the f_offset field.
443 * set - set and unlock the f_offset field.
445 * These routines serve the dual purpose of serializing access to the
446 * f_offset field (at least on i386) and guaranteeing operational integrity
447 * when multiple read()ers and write()ers are present on the same fp.
449 static __inline off_t
450 vn_get_fpf_offset(struct file *fp)
452 u_int flags;
453 u_int nflags;
456 * Shortcut critical path.
458 flags = fp->f_flag & ~FOFFSETLOCK;
459 if (atomic_cmpset_int(&fp->f_flag, flags, flags | FOFFSETLOCK))
460 return(fp->f_offset);
463 * The hard way
465 for (;;) {
466 flags = fp->f_flag;
467 if (flags & FOFFSETLOCK) {
468 nflags = flags | FOFFSETWAKE;
469 tsleep_interlock(&fp->f_flag, 0);
470 if (atomic_cmpset_int(&fp->f_flag, flags, nflags))
471 tsleep(&fp->f_flag, PINTERLOCKED, "fpoff", 0);
472 } else {
473 nflags = flags | FOFFSETLOCK;
474 if (atomic_cmpset_int(&fp->f_flag, flags, nflags))
475 break;
478 return(fp->f_offset);
481 static __inline void
482 vn_set_fpf_offset(struct file *fp, off_t offset)
484 u_int flags;
485 u_int nflags;
488 * We hold the lock so we can set the offset without interference.
490 fp->f_offset = offset;
493 * Normal release is already a reasonably critical path.
495 for (;;) {
496 flags = fp->f_flag;
497 nflags = flags & ~(FOFFSETLOCK | FOFFSETWAKE);
498 if (atomic_cmpset_int(&fp->f_flag, flags, nflags)) {
499 if (flags & FOFFSETWAKE)
500 wakeup(&fp->f_flag);
501 break;
506 static __inline off_t
507 vn_poll_fpf_offset(struct file *fp)
509 #if defined(__x86_64__) || !defined(SMP)
510 return(fp->f_offset);
511 #else
512 off_t off = vn_get_fpf_offset(fp);
513 vn_set_fpf_offset(fp, off);
514 return(off);
515 #endif
519 * Package up an I/O request on a vnode into a uio and do it.
522 vn_rdwr(enum uio_rw rw, struct vnode *vp, caddr_t base, int len,
523 off_t offset, enum uio_seg segflg, int ioflg,
524 struct ucred *cred, int *aresid)
526 struct uio auio;
527 struct iovec aiov;
528 struct ccms_lock ccms_lock;
529 int error;
531 if ((ioflg & IO_NODELOCKED) == 0)
532 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
533 auio.uio_iov = &aiov;
534 auio.uio_iovcnt = 1;
535 aiov.iov_base = base;
536 aiov.iov_len = len;
537 auio.uio_resid = len;
538 auio.uio_offset = offset;
539 auio.uio_segflg = segflg;
540 auio.uio_rw = rw;
541 auio.uio_td = curthread;
542 ccms_lock_get_uio(&vp->v_ccms, &ccms_lock, &auio);
543 if (rw == UIO_READ) {
544 error = VOP_READ(vp, &auio, ioflg, cred);
545 } else {
546 error = VOP_WRITE(vp, &auio, ioflg, cred);
548 ccms_lock_put(&vp->v_ccms, &ccms_lock);
549 if (aresid)
550 *aresid = auio.uio_resid;
551 else
552 if (auio.uio_resid && error == 0)
553 error = EIO;
554 if ((ioflg & IO_NODELOCKED) == 0)
555 vn_unlock(vp);
556 return (error);
560 * Package up an I/O request on a vnode into a uio and do it. The I/O
561 * request is split up into smaller chunks and we try to avoid saturating
562 * the buffer cache while potentially holding a vnode locked, so we
563 * check bwillwrite() before calling vn_rdwr(). We also call uio_yield()
564 * to give other processes a chance to lock the vnode (either other processes
565 * core'ing the same binary, or unrelated processes scanning the directory).
568 vn_rdwr_inchunks(enum uio_rw rw, struct vnode *vp, caddr_t base, int len,
569 off_t offset, enum uio_seg segflg, int ioflg,
570 struct ucred *cred, int *aresid)
572 int error = 0;
574 do {
575 int chunk;
578 * Force `offset' to a multiple of MAXBSIZE except possibly
579 * for the first chunk, so that filesystems only need to
580 * write full blocks except possibly for the first and last
581 * chunks.
583 chunk = MAXBSIZE - (uoff_t)offset % MAXBSIZE;
585 if (chunk > len)
586 chunk = len;
587 if (vp->v_type == VREG) {
588 switch(rw) {
589 case UIO_READ:
590 bwillread(chunk);
591 break;
592 case UIO_WRITE:
593 bwillwrite(chunk);
594 break;
597 error = vn_rdwr(rw, vp, base, chunk, offset, segflg,
598 ioflg, cred, aresid);
599 len -= chunk; /* aresid calc already includes length */
600 if (error)
601 break;
602 offset += chunk;
603 base += chunk;
604 uio_yield();
605 } while (len);
606 if (aresid)
607 *aresid += len;
608 return (error);
612 * MPALMOSTSAFE - acquires mplock
614 * File pointers can no longer get ripped up by revoke so
615 * we don't need to lock access to the vp.
617 * f_offset updates are not guaranteed against multiple readers
619 static int
620 vn_read(struct file *fp, struct uio *uio, struct ucred *cred, int flags)
622 struct ccms_lock ccms_lock;
623 struct vnode *vp;
624 int error, ioflag;
626 KASSERT(uio->uio_td == curthread,
627 ("uio_td %p is not td %p", uio->uio_td, curthread));
628 vp = (struct vnode *)fp->f_data;
630 ioflag = 0;
631 if (flags & O_FBLOCKING) {
632 /* ioflag &= ~IO_NDELAY; */
633 } else if (flags & O_FNONBLOCKING) {
634 ioflag |= IO_NDELAY;
635 } else if (fp->f_flag & FNONBLOCK) {
636 ioflag |= IO_NDELAY;
638 if (flags & O_FBUFFERED) {
639 /* ioflag &= ~IO_DIRECT; */
640 } else if (flags & O_FUNBUFFERED) {
641 ioflag |= IO_DIRECT;
642 } else if (fp->f_flag & O_DIRECT) {
643 ioflag |= IO_DIRECT;
645 if ((flags & O_FOFFSET) == 0 && (vp->v_flag & VNOTSEEKABLE) == 0)
646 uio->uio_offset = vn_get_fpf_offset(fp);
647 vn_lock(vp, LK_SHARED | LK_RETRY);
648 ioflag |= sequential_heuristic(uio, fp);
650 ccms_lock_get_uio(&vp->v_ccms, &ccms_lock, uio);
651 if (read_mpsafe && (vp->v_flag & VMP_READ)) {
652 error = VOP_READ(vp, uio, ioflag, cred);
653 } else {
654 get_mplock();
655 error = VOP_READ(vp, uio, ioflag, cred);
656 rel_mplock();
658 ccms_lock_put(&vp->v_ccms, &ccms_lock);
659 fp->f_nextoff = uio->uio_offset;
660 vn_unlock(vp);
661 if ((flags & O_FOFFSET) == 0 && (vp->v_flag & VNOTSEEKABLE) == 0)
662 vn_set_fpf_offset(fp, uio->uio_offset);
663 return (error);
667 * MPALMOSTSAFE - acquires mplock
669 static int
670 vn_write(struct file *fp, struct uio *uio, struct ucred *cred, int flags)
672 struct ccms_lock ccms_lock;
673 struct vnode *vp;
674 int error, ioflag;
676 KASSERT(uio->uio_td == curthread,
677 ("uio_td %p is not p %p", uio->uio_td, curthread));
678 vp = (struct vnode *)fp->f_data;
680 ioflag = IO_UNIT;
681 if (vp->v_type == VREG &&
682 ((fp->f_flag & O_APPEND) || (flags & O_FAPPEND))) {
683 ioflag |= IO_APPEND;
686 if (flags & O_FBLOCKING) {
687 /* ioflag &= ~IO_NDELAY; */
688 } else if (flags & O_FNONBLOCKING) {
689 ioflag |= IO_NDELAY;
690 } else if (fp->f_flag & FNONBLOCK) {
691 ioflag |= IO_NDELAY;
693 if (flags & O_FBUFFERED) {
694 /* ioflag &= ~IO_DIRECT; */
695 } else if (flags & O_FUNBUFFERED) {
696 ioflag |= IO_DIRECT;
697 } else if (fp->f_flag & O_DIRECT) {
698 ioflag |= IO_DIRECT;
700 if (flags & O_FASYNCWRITE) {
701 /* ioflag &= ~IO_SYNC; */
702 } else if (flags & O_FSYNCWRITE) {
703 ioflag |= IO_SYNC;
704 } else if (fp->f_flag & O_FSYNC) {
705 ioflag |= IO_SYNC;
708 if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS))
709 ioflag |= IO_SYNC;
710 if ((flags & O_FOFFSET) == 0)
711 uio->uio_offset = vn_get_fpf_offset(fp);
712 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
713 ioflag |= sequential_heuristic(uio, fp);
714 ccms_lock_get_uio(&vp->v_ccms, &ccms_lock, uio);
715 if (write_mpsafe && (vp->v_flag & VMP_WRITE)) {
716 error = VOP_WRITE(vp, uio, ioflag, cred);
717 } else {
718 get_mplock();
719 error = VOP_WRITE(vp, uio, ioflag, cred);
720 rel_mplock();
722 ccms_lock_put(&vp->v_ccms, &ccms_lock);
723 fp->f_nextoff = uio->uio_offset;
724 vn_unlock(vp);
725 if ((flags & O_FOFFSET) == 0)
726 vn_set_fpf_offset(fp, uio->uio_offset);
727 return (error);
731 * MPSAFE
733 static int
734 vn_statfile(struct file *fp, struct stat *sb, struct ucred *cred)
736 struct vnode *vp;
737 int error;
739 vp = (struct vnode *)fp->f_data;
740 error = vn_stat(vp, sb, cred);
741 return (error);
745 * MPSAFE (if vnode has VMP_GETATTR)
748 vn_stat(struct vnode *vp, struct stat *sb, struct ucred *cred)
750 struct vattr vattr;
751 struct vattr *vap;
752 int error;
753 u_short mode;
754 cdev_t dev;
756 vap = &vattr;
757 if (getattr_mpsafe && (vp->v_flag & VMP_GETATTR)) {
758 error = VOP_GETATTR(vp, vap);
759 } else {
760 get_mplock();
761 error = VOP_GETATTR(vp, vap);
762 rel_mplock();
764 if (error)
765 return (error);
768 * Zero the spare stat fields
770 sb->st_lspare = 0;
771 sb->st_qspare1 = 0;
772 sb->st_qspare2 = 0;
775 * Copy from vattr table
777 if (vap->va_fsid != VNOVAL)
778 sb->st_dev = vap->va_fsid;
779 else
780 sb->st_dev = vp->v_mount->mnt_stat.f_fsid.val[0];
781 sb->st_ino = vap->va_fileid;
782 mode = vap->va_mode;
783 switch (vap->va_type) {
784 case VREG:
785 mode |= S_IFREG;
786 break;
787 case VDATABASE:
788 mode |= S_IFDB;
789 break;
790 case VDIR:
791 mode |= S_IFDIR;
792 break;
793 case VBLK:
794 mode |= S_IFBLK;
795 break;
796 case VCHR:
797 mode |= S_IFCHR;
798 break;
799 case VLNK:
800 mode |= S_IFLNK;
801 /* This is a cosmetic change, symlinks do not have a mode. */
802 if (vp->v_mount->mnt_flag & MNT_NOSYMFOLLOW)
803 sb->st_mode &= ~ACCESSPERMS; /* 0000 */
804 else
805 sb->st_mode |= ACCESSPERMS; /* 0777 */
806 break;
807 case VSOCK:
808 mode |= S_IFSOCK;
809 break;
810 case VFIFO:
811 mode |= S_IFIFO;
812 break;
813 default:
814 return (EBADF);
816 sb->st_mode = mode;
817 if (vap->va_nlink > (nlink_t)-1)
818 sb->st_nlink = (nlink_t)-1;
819 else
820 sb->st_nlink = vap->va_nlink;
821 sb->st_uid = vap->va_uid;
822 sb->st_gid = vap->va_gid;
823 sb->st_rdev = dev2udev(vp->v_rdev);
824 sb->st_size = vap->va_size;
825 sb->st_atimespec = vap->va_atime;
826 sb->st_mtimespec = vap->va_mtime;
827 sb->st_ctimespec = vap->va_ctime;
830 * A VCHR and VBLK device may track the last access and last modified
831 * time independantly of the filesystem. This is particularly true
832 * because device read and write calls may bypass the filesystem.
834 if (vp->v_type == VCHR || vp->v_type == VBLK) {
835 dev = vp->v_rdev;
836 if (dev != NULL) {
837 if (dev->si_lastread) {
838 sb->st_atimespec.tv_sec = dev->si_lastread;
839 sb->st_atimespec.tv_nsec = 0;
841 if (dev->si_lastwrite) {
842 sb->st_atimespec.tv_sec = dev->si_lastwrite;
843 sb->st_atimespec.tv_nsec = 0;
849 * According to www.opengroup.org, the meaning of st_blksize is
850 * "a filesystem-specific preferred I/O block size for this
851 * object. In some filesystem types, this may vary from file
852 * to file"
853 * Default to PAGE_SIZE after much discussion.
856 if (vap->va_type == VREG) {
857 sb->st_blksize = vap->va_blocksize;
858 } else if (vn_isdisk(vp, NULL)) {
860 * XXX this is broken. If the device is not yet open (aka
861 * stat() call, aka v_rdev == NULL), how are we supposed
862 * to get a valid block size out of it?
864 dev = vp->v_rdev;
866 sb->st_blksize = dev->si_bsize_best;
867 if (sb->st_blksize < dev->si_bsize_phys)
868 sb->st_blksize = dev->si_bsize_phys;
869 if (sb->st_blksize < BLKDEV_IOSIZE)
870 sb->st_blksize = BLKDEV_IOSIZE;
871 } else {
872 sb->st_blksize = PAGE_SIZE;
875 sb->st_flags = vap->va_flags;
877 error = priv_check_cred(cred, PRIV_VFS_GENERATION, 0);
878 if (error)
879 sb->st_gen = 0;
880 else
881 sb->st_gen = (u_int32_t)vap->va_gen;
883 sb->st_blocks = vap->va_bytes / S_BLKSIZE;
884 return (0);
888 * MPALMOSTSAFE - acquires mplock
890 static int
891 vn_ioctl(struct file *fp, u_long com, caddr_t data, struct ucred *ucred,
892 struct sysmsg *msg)
894 struct vnode *vp = ((struct vnode *)fp->f_data);
895 struct vnode *ovp;
896 struct vattr vattr;
897 int error;
898 off_t size;
900 get_mplock();
902 switch (vp->v_type) {
903 case VREG:
904 case VDIR:
905 if (com == FIONREAD) {
906 error = VOP_GETATTR(vp, &vattr);
907 if (error)
908 break;
909 size = vattr.va_size;
910 if ((vp->v_flag & VNOTSEEKABLE) == 0)
911 size -= vn_poll_fpf_offset(fp);
912 if (size > 0x7FFFFFFF)
913 size = 0x7FFFFFFF;
914 *(int *)data = size;
915 error = 0;
916 break;
918 if (com == FIOASYNC) { /* XXX */
919 error = 0; /* XXX */
920 break;
922 /* fall into ... */
923 default:
924 #if 0
925 return (ENOTTY);
926 #endif
927 case VFIFO:
928 case VCHR:
929 case VBLK:
930 if (com == FIODTYPE) {
931 if (vp->v_type != VCHR && vp->v_type != VBLK) {
932 error = ENOTTY;
933 break;
935 *(int *)data = dev_dflags(vp->v_rdev) & D_TYPEMASK;
936 error = 0;
937 break;
939 error = VOP_IOCTL(vp, com, data, fp->f_flag, ucred, msg);
940 if (error == 0 && com == TIOCSCTTY) {
941 struct proc *p = curthread->td_proc;
942 struct session *sess;
944 if (p == NULL) {
945 error = ENOTTY;
946 break;
949 sess = p->p_session;
950 /* Do nothing if reassigning same control tty */
951 if (sess->s_ttyvp == vp) {
952 error = 0;
953 break;
956 /* Get rid of reference to old control tty */
957 ovp = sess->s_ttyvp;
958 vref(vp);
959 sess->s_ttyvp = vp;
960 if (ovp)
961 vrele(ovp);
963 break;
965 rel_mplock();
966 return (error);
970 * MPALMOSTSAFE - acquires mplock
972 static int
973 vn_poll(struct file *fp, int events, struct ucred *cred)
975 int error;
977 get_mplock();
978 error = VOP_POLL(((struct vnode *)fp->f_data), events, cred);
979 rel_mplock();
980 return (error);
984 * Check that the vnode is still valid, and if so
985 * acquire requested lock.
988 #ifndef DEBUG_LOCKS
989 vn_lock(struct vnode *vp, int flags)
990 #else
991 debug_vn_lock(struct vnode *vp, int flags, const char *filename, int line)
992 #endif
994 int error;
996 do {
997 #ifdef DEBUG_LOCKS
998 vp->filename = filename;
999 vp->line = line;
1000 error = debuglockmgr(&vp->v_lock, flags,
1001 "vn_lock", filename, line);
1002 #else
1003 error = lockmgr(&vp->v_lock, flags);
1004 #endif
1005 if (error == 0)
1006 break;
1007 } while (flags & LK_RETRY);
1010 * Because we (had better!) have a ref on the vnode, once it
1011 * goes to VRECLAIMED state it will not be recycled until all
1012 * refs go away. So we can just check the flag.
1014 if (error == 0 && (vp->v_flag & VRECLAIMED)) {
1015 lockmgr(&vp->v_lock, LK_RELEASE);
1016 error = ENOENT;
1018 return (error);
1021 void
1022 vn_unlock(struct vnode *vp)
1024 lockmgr(&vp->v_lock, LK_RELEASE);
1028 vn_islocked(struct vnode *vp)
1030 return (lockstatus(&vp->v_lock, curthread));
1034 * MPALMOSTSAFE - acquires mplock
1036 static int
1037 vn_closefile(struct file *fp)
1039 int error;
1041 get_mplock();
1042 fp->f_ops = &badfileops;
1043 error = vn_close(((struct vnode *)fp->f_data), fp->f_flag);
1044 rel_mplock();
1045 return (error);
1049 * MPALMOSTSAFE - acquires mplock
1051 static int
1052 vn_kqfilter(struct file *fp, struct knote *kn)
1054 int error;
1056 get_mplock();
1057 error = VOP_KQFILTER(((struct vnode *)fp->f_data), kn);
1058 rel_mplock();
1059 return (error);