kernel - Fix races created by a comedy of circumstansces (3)
[dragonfly.git] / sys / vfs / tmpfs / tmpfs_vnops.c
blob048f9899a40cec88c3ba2e474eea2c413f4fa284
1 /*-
2 * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
3 * All rights reserved.
5 * This code is derived from software contributed to The NetBSD Foundation
6 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
7 * 2005 program.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
30 * $NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $
34 * tmpfs vnode interface.
37 #include <sys/kernel.h>
38 #include <sys/kern_syscall.h>
39 #include <sys/param.h>
40 #include <sys/fcntl.h>
41 #include <sys/lockf.h>
42 #include <sys/priv.h>
43 #include <sys/proc.h>
44 #include <sys/resourcevar.h>
45 #include <sys/sched.h>
46 #include <sys/stat.h>
47 #include <sys/systm.h>
48 #include <sys/unistd.h>
49 #include <sys/vfsops.h>
50 #include <sys/vnode.h>
51 #include <sys/mountctl.h>
53 #include <vm/vm.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_object.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_pageout.h>
58 #include <vm/vm_pager.h>
59 #include <vm/swap_pager.h>
61 #include <sys/buf2.h>
62 #include <vm/vm_page2.h>
64 #include <vfs/fifofs/fifo.h>
65 #include <vfs/tmpfs/tmpfs_vnops.h>
66 #include "tmpfs.h"
68 static void tmpfs_strategy_done(struct bio *bio);
70 static __inline
71 void
72 tmpfs_knote(struct vnode *vp, int flags)
74 if (flags)
75 KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags);
79 /* --------------------------------------------------------------------- */
81 static int
82 tmpfs_nresolve(struct vop_nresolve_args *ap)
84 struct vnode *dvp = ap->a_dvp;
85 struct vnode *vp = NULL;
86 struct namecache *ncp = ap->a_nch->ncp;
87 struct tmpfs_node *tnode;
88 struct tmpfs_dirent *de;
89 struct tmpfs_node *dnode;
90 int error;
92 dnode = VP_TO_TMPFS_DIR(dvp);
94 TMPFS_NODE_LOCK_SH(dnode);
95 de = tmpfs_dir_lookup(dnode, NULL, ncp);
96 if (de == NULL) {
97 error = ENOENT;
98 } else {
100 * Allocate a vnode for the node we found.
102 tnode = de->td_node;
103 error = tmpfs_alloc_vp(dvp->v_mount, tnode,
104 LK_EXCLUSIVE | LK_RETRY, &vp);
105 if (error)
106 goto out;
107 KKASSERT(vp);
110 out:
111 TMPFS_NODE_UNLOCK(dnode);
113 if ((dnode->tn_status & TMPFS_NODE_ACCESSED) == 0) {
114 TMPFS_NODE_LOCK(dnode);
115 dnode->tn_status |= TMPFS_NODE_ACCESSED;
116 TMPFS_NODE_UNLOCK(dnode);
120 * Store the result of this lookup in the cache. Avoid this if the
121 * request was for creation, as it does not improve timings on
122 * emprical tests.
124 if (vp) {
125 vn_unlock(vp);
126 cache_setvp(ap->a_nch, vp);
127 vrele(vp);
128 } else if (error == ENOENT) {
129 cache_setvp(ap->a_nch, NULL);
131 return (error);
134 static int
135 tmpfs_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
137 struct vnode *dvp = ap->a_dvp;
138 struct vnode **vpp = ap->a_vpp;
139 struct tmpfs_node *dnode = VP_TO_TMPFS_NODE(dvp);
140 struct ucred *cred = ap->a_cred;
141 int error;
143 *vpp = NULL;
145 /* Check accessibility of requested node as a first step. */
146 error = VOP_ACCESS(dvp, VEXEC, cred);
147 if (error != 0)
148 return error;
150 if (dnode->tn_dir.tn_parent != NULL) {
151 /* Allocate a new vnode on the matching entry. */
152 error = tmpfs_alloc_vp(dvp->v_mount, dnode->tn_dir.tn_parent,
153 LK_EXCLUSIVE | LK_RETRY, vpp);
155 if (*vpp)
156 vn_unlock(*vpp);
158 return (*vpp == NULL) ? ENOENT : 0;
161 /* --------------------------------------------------------------------- */
163 static int
164 tmpfs_ncreate(struct vop_ncreate_args *ap)
166 struct vnode *dvp = ap->a_dvp;
167 struct vnode **vpp = ap->a_vpp;
168 struct namecache *ncp = ap->a_nch->ncp;
169 struct vattr *vap = ap->a_vap;
170 struct ucred *cred = ap->a_cred;
171 int error;
173 KKASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
175 error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
176 if (error == 0) {
177 cache_setunresolved(ap->a_nch);
178 cache_setvp(ap->a_nch, *vpp);
179 tmpfs_knote(dvp, NOTE_WRITE);
181 return (error);
183 /* --------------------------------------------------------------------- */
185 static int
186 tmpfs_nmknod(struct vop_nmknod_args *ap)
188 struct vnode *dvp = ap->a_dvp;
189 struct vnode **vpp = ap->a_vpp;
190 struct namecache *ncp = ap->a_nch->ncp;
191 struct vattr *vap = ap->a_vap;
192 struct ucred *cred = ap->a_cred;
193 int error;
195 if (vap->va_type != VBLK && vap->va_type != VCHR &&
196 vap->va_type != VFIFO) {
197 return (EINVAL);
200 error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
201 if (error == 0) {
202 cache_setunresolved(ap->a_nch);
203 cache_setvp(ap->a_nch, *vpp);
204 tmpfs_knote(dvp, NOTE_WRITE);
206 return error;
209 /* --------------------------------------------------------------------- */
211 static int
212 tmpfs_open(struct vop_open_args *ap)
214 struct vnode *vp = ap->a_vp;
215 int mode = ap->a_mode;
216 struct tmpfs_node *node;
217 int error;
219 node = VP_TO_TMPFS_NODE(vp);
221 #if 0
222 /* The file is still active but all its names have been removed
223 * (e.g. by a "rmdir $(pwd)"). It cannot be opened any more as
224 * it is about to die. */
225 if (node->tn_links < 1)
226 return (ENOENT);
227 #endif
229 /* If the file is marked append-only, deny write requests. */
230 if ((node->tn_flags & APPEND) &&
231 (mode & (FWRITE | O_APPEND)) == FWRITE) {
232 error = EPERM;
233 } else {
234 error = (vop_stdopen(ap));
237 return (error);
240 /* --------------------------------------------------------------------- */
242 static int
243 tmpfs_close(struct vop_close_args *ap)
245 struct vnode *vp = ap->a_vp;
246 struct tmpfs_node *node;
247 int error;
249 node = VP_TO_TMPFS_NODE(vp);
251 if (node->tn_links > 0) {
253 * Update node times. No need to do it if the node has
254 * been deleted, because it will vanish after we return.
256 tmpfs_update(vp);
259 error = vop_stdclose(ap);
261 return (error);
264 /* --------------------------------------------------------------------- */
267 tmpfs_access(struct vop_access_args *ap)
269 struct vnode *vp = ap->a_vp;
270 int error;
271 struct tmpfs_node *node;
273 node = VP_TO_TMPFS_NODE(vp);
275 switch (vp->v_type) {
276 case VDIR:
277 /* FALLTHROUGH */
278 case VLNK:
279 /* FALLTHROUGH */
280 case VREG:
281 if ((ap->a_mode & VWRITE) &&
282 (vp->v_mount->mnt_flag & MNT_RDONLY)) {
283 error = EROFS;
284 goto out;
286 break;
288 case VBLK:
289 /* FALLTHROUGH */
290 case VCHR:
291 /* FALLTHROUGH */
292 case VSOCK:
293 /* FALLTHROUGH */
294 case VFIFO:
295 break;
297 default:
298 error = EINVAL;
299 goto out;
302 if ((ap->a_mode & VWRITE) && (node->tn_flags & IMMUTABLE)) {
303 error = EPERM;
304 goto out;
307 error = vop_helper_access(ap, node->tn_uid, node->tn_gid,
308 node->tn_mode, 0);
309 out:
310 return error;
313 /* --------------------------------------------------------------------- */
316 tmpfs_getattr(struct vop_getattr_args *ap)
318 struct vnode *vp = ap->a_vp;
319 struct vattr *vap = ap->a_vap;
320 struct tmpfs_node *node;
322 node = VP_TO_TMPFS_NODE(vp);
324 tmpfs_update(vp);
326 TMPFS_NODE_LOCK_SH(node);
327 vap->va_type = vp->v_type;
328 vap->va_mode = node->tn_mode;
329 vap->va_nlink = node->tn_links;
330 vap->va_uid = node->tn_uid;
331 vap->va_gid = node->tn_gid;
332 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
333 vap->va_fileid = node->tn_id;
334 vap->va_size = node->tn_size;
335 vap->va_blocksize = PAGE_SIZE;
336 vap->va_atime.tv_sec = node->tn_atime;
337 vap->va_atime.tv_nsec = node->tn_atimensec;
338 vap->va_mtime.tv_sec = node->tn_mtime;
339 vap->va_mtime.tv_nsec = node->tn_mtimensec;
340 vap->va_ctime.tv_sec = node->tn_ctime;
341 vap->va_ctime.tv_nsec = node->tn_ctimensec;
342 vap->va_gen = node->tn_gen;
343 vap->va_flags = node->tn_flags;
344 if (vp->v_type == VBLK || vp->v_type == VCHR) {
345 vap->va_rmajor = umajor(node->tn_rdev);
346 vap->va_rminor = uminor(node->tn_rdev);
348 vap->va_bytes = round_page(node->tn_size);
349 vap->va_filerev = 0;
350 TMPFS_NODE_UNLOCK(node);
352 return 0;
355 /* --------------------------------------------------------------------- */
358 tmpfs_setattr(struct vop_setattr_args *ap)
360 struct vnode *vp = ap->a_vp;
361 struct vattr *vap = ap->a_vap;
362 struct ucred *cred = ap->a_cred;
363 struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp);
364 int error = 0;
365 int kflags = 0;
367 TMPFS_NODE_LOCK(node);
368 if (error == 0 && (vap->va_flags != VNOVAL)) {
369 error = tmpfs_chflags(vp, vap->va_flags, cred);
370 kflags |= NOTE_ATTRIB;
373 if (error == 0 && (vap->va_size != VNOVAL)) {
374 if (vap->va_size > node->tn_size)
375 kflags |= NOTE_WRITE | NOTE_EXTEND;
376 else
377 kflags |= NOTE_WRITE;
378 error = tmpfs_chsize(vp, vap->va_size, cred);
381 if (error == 0 && (vap->va_uid != (uid_t)VNOVAL ||
382 vap->va_gid != (gid_t)VNOVAL)) {
383 error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred);
384 kflags |= NOTE_ATTRIB;
387 if (error == 0 && (vap->va_mode != (mode_t)VNOVAL)) {
388 error = tmpfs_chmod(vp, vap->va_mode, cred);
389 kflags |= NOTE_ATTRIB;
392 if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
393 vap->va_atime.tv_nsec != VNOVAL) ||
394 (vap->va_mtime.tv_sec != VNOVAL &&
395 vap->va_mtime.tv_nsec != VNOVAL) )) {
396 error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
397 vap->va_vaflags, cred);
398 kflags |= NOTE_ATTRIB;
402 * Update the node times. We give preference to the error codes
403 * generated by this function rather than the ones that may arise
404 * from tmpfs_update.
406 tmpfs_update(vp);
407 TMPFS_NODE_UNLOCK(node);
408 tmpfs_knote(vp, kflags);
410 return (error);
413 /* --------------------------------------------------------------------- */
416 * fsync is usually a NOP, but we must take action when unmounting or
417 * when recycling.
419 static int
420 tmpfs_fsync(struct vop_fsync_args *ap)
422 struct tmpfs_node *node;
423 struct vnode *vp = ap->a_vp;
425 node = VP_TO_TMPFS_NODE(vp);
427 tmpfs_update(vp);
428 if (vp->v_type == VREG) {
429 if (vp->v_flag & VRECLAIMED) {
430 if (node->tn_links == 0)
431 tmpfs_truncate(vp, 0);
432 else
433 vfsync(ap->a_vp, ap->a_waitfor, 1, NULL, NULL);
436 return 0;
439 /* --------------------------------------------------------------------- */
441 static int
442 tmpfs_read(struct vop_read_args *ap)
444 struct buf *bp;
445 struct vnode *vp = ap->a_vp;
446 struct uio *uio = ap->a_uio;
447 struct tmpfs_node *node;
448 off_t base_offset;
449 size_t offset;
450 size_t len;
451 size_t resid;
452 int error;
455 * Check the basics
457 if (uio->uio_offset < 0)
458 return (EINVAL);
459 if (vp->v_type != VREG)
460 return (EINVAL);
463 * Extract node, try to shortcut the operation through
464 * the VM page cache, allowing us to avoid buffer cache
465 * overheads.
467 node = VP_TO_TMPFS_NODE(vp);
468 resid = uio->uio_resid;
469 error = vop_helper_read_shortcut(ap);
470 if (error)
471 return error;
472 if (uio->uio_resid == 0) {
473 if (resid)
474 goto finished;
475 return error;
479 * Fall-through to our normal read code.
481 while (uio->uio_resid > 0 && uio->uio_offset < node->tn_size) {
483 * Use buffer cache I/O (via tmpfs_strategy)
485 offset = (size_t)uio->uio_offset & TMPFS_BLKMASK64;
486 base_offset = (off_t)uio->uio_offset - offset;
487 bp = getcacheblk(vp, base_offset, TMPFS_BLKSIZE, 0);
488 if (bp == NULL) {
489 error = bread(vp, base_offset, TMPFS_BLKSIZE, &bp);
490 if (error) {
491 brelse(bp);
492 kprintf("tmpfs_read bread error %d\n", error);
493 break;
497 * tmpfs pretty much fiddles directly with the VM
498 * system, don't let it exhaust it or we won't play
499 * nice with other processes.
501 * Only do this if the VOP is coming from a normal
502 * read/write. The VM system handles the case for
503 * UIO_NOCOPY.
505 if (uio->uio_segflg != UIO_NOCOPY)
506 vm_wait_nominal();
508 bp->b_flags |= B_CLUSTEROK;
511 * Figure out how many bytes we can actually copy this loop.
513 len = TMPFS_BLKSIZE - offset;
514 if (len > uio->uio_resid)
515 len = uio->uio_resid;
516 if (len > node->tn_size - uio->uio_offset)
517 len = (size_t)(node->tn_size - uio->uio_offset);
519 error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio);
520 bqrelse(bp);
521 if (error) {
522 kprintf("tmpfs_read uiomove error %d\n", error);
523 break;
527 finished:
528 if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) {
529 TMPFS_NODE_LOCK(node);
530 node->tn_status |= TMPFS_NODE_ACCESSED;
531 TMPFS_NODE_UNLOCK(node);
533 return (error);
536 static int
537 tmpfs_write(struct vop_write_args *ap)
539 struct buf *bp;
540 struct vnode *vp = ap->a_vp;
541 struct uio *uio = ap->a_uio;
542 struct thread *td = uio->uio_td;
543 struct tmpfs_node *node;
544 boolean_t extended;
545 off_t oldsize;
546 int error;
547 off_t base_offset;
548 size_t offset;
549 size_t len;
550 struct rlimit limit;
551 int trivial = 0;
552 int kflags = 0;
553 int seqcount;
555 error = 0;
556 if (uio->uio_resid == 0) {
557 return error;
560 node = VP_TO_TMPFS_NODE(vp);
562 if (vp->v_type != VREG)
563 return (EINVAL);
564 seqcount = ap->a_ioflag >> 16;
566 TMPFS_NODE_LOCK(node);
568 oldsize = node->tn_size;
569 if (ap->a_ioflag & IO_APPEND)
570 uio->uio_offset = node->tn_size;
573 * Check for illegal write offsets.
575 if (uio->uio_offset + uio->uio_resid >
576 VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) {
577 error = EFBIG;
578 goto done;
582 * NOTE: Ignore if UIO does not come from a user thread (e.g. VN).
584 if (vp->v_type == VREG && td != NULL && td->td_lwp != NULL) {
585 error = kern_getrlimit(RLIMIT_FSIZE, &limit);
586 if (error)
587 goto done;
588 if (uio->uio_offset + uio->uio_resid > limit.rlim_cur) {
589 ksignal(td->td_proc, SIGXFSZ);
590 error = EFBIG;
591 goto done;
596 * Extend the file's size if necessary
598 extended = ((uio->uio_offset + uio->uio_resid) > node->tn_size);
600 while (uio->uio_resid > 0) {
602 * Don't completely blow out running buffer I/O
603 * when being hit from the pageout daemon.
605 if (uio->uio_segflg == UIO_NOCOPY &&
606 (ap->a_ioflag & IO_RECURSE) == 0) {
607 bwillwrite(TMPFS_BLKSIZE);
611 * Use buffer cache I/O (via tmpfs_strategy)
613 offset = (size_t)uio->uio_offset & TMPFS_BLKMASK64;
614 base_offset = (off_t)uio->uio_offset - offset;
615 len = TMPFS_BLKSIZE - offset;
616 if (len > uio->uio_resid)
617 len = uio->uio_resid;
619 if ((uio->uio_offset + len) > node->tn_size) {
620 trivial = (uio->uio_offset <= node->tn_size);
621 error = tmpfs_reg_resize(vp, uio->uio_offset + len,
622 trivial);
623 if (error)
624 break;
628 * Read to fill in any gaps. Theoretically we could
629 * optimize this if the write covers the entire buffer
630 * and is not a UIO_NOCOPY write, however this can lead
631 * to a security violation exposing random kernel memory
632 * (whatever junk was in the backing VM pages before).
634 * So just use bread() to do the right thing.
636 error = bread(vp, base_offset, TMPFS_BLKSIZE, &bp);
637 error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio);
638 if (error) {
639 kprintf("tmpfs_write uiomove error %d\n", error);
640 brelse(bp);
641 break;
644 if (uio->uio_offset > node->tn_size) {
645 node->tn_size = uio->uio_offset;
646 kflags |= NOTE_EXTEND;
648 kflags |= NOTE_WRITE;
651 * Always try to flush the page in the UIO_NOCOPY case. This
652 * can come from the pageout daemon or during vnode eviction.
653 * It is not necessarily going to be marked IO_ASYNC/IO_SYNC.
655 * For the normal case we buwrite(), dirtying the underlying
656 * VM pages instead of dirtying the buffer and releasing the
657 * buffer as a clean buffer. This allows tmpfs to use
658 * essentially all available memory to cache file data.
659 * If we used bdwrite() the buffer cache would wind up
660 * flushing the data to swap too quickly.
662 * But because tmpfs can seriously load the VM system we
663 * fall-back to using bdwrite() when free memory starts
664 * to get low. This shifts the load away from the VM system
665 * and makes tmpfs act more like a normal filesystem with
666 * regards to disk activity.
668 * tmpfs pretty much fiddles directly with the VM
669 * system, don't let it exhaust it or we won't play
670 * nice with other processes. Only do this if the
671 * VOP is coming from a normal read/write. The VM system
672 * handles the case for UIO_NOCOPY.
674 bp->b_flags |= B_CLUSTEROK;
675 if (uio->uio_segflg == UIO_NOCOPY) {
677 * Flush from the pageout daemon, deal with
678 * potentially very heavy tmpfs write activity
679 * causing long stalls in the pageout daemon
680 * before pages get to free/cache.
682 * (a) Under severe pressure setting B_DIRECT will
683 * cause a buffer release to try to free the
684 * underlying pages.
686 * (b) Under modest memory pressure the B_RELBUF
687 * alone is sufficient to get the pages moved
688 * to the cache. We could also force this by
689 * setting B_NOTMETA but that might have other
690 * unintended side-effects (e.g. setting
691 * PG_NOTMETA on the VM page).
693 * Hopefully this will unblock the VM system more
694 * quickly under extreme tmpfs write load.
696 if (vm_page_count_min(vm_page_free_hysteresis))
697 bp->b_flags |= B_DIRECT;
698 bp->b_flags |= B_AGE | B_RELBUF;
699 bp->b_act_count = 0; /* buffer->deactivate pgs */
700 cluster_awrite(bp);
701 } else if (vm_page_count_target()) {
703 * Normal (userland) write but we are low on memory,
704 * run the buffer the buffer cache.
706 bp->b_act_count = 0; /* buffer->deactivate pgs */
707 bdwrite(bp);
708 } else {
710 * Otherwise run the buffer directly through to the
711 * backing VM store.
713 buwrite(bp);
714 /*vm_wait_nominal();*/
717 if (bp->b_error) {
718 kprintf("tmpfs_write bwrite error %d\n", bp->b_error);
719 break;
723 if (error) {
724 if (extended) {
725 (void)tmpfs_reg_resize(vp, oldsize, trivial);
726 kflags &= ~NOTE_EXTEND;
728 goto done;
732 * Currently we don't set the mtime on files modified via mmap()
733 * because we can't tell the difference between those modifications
734 * and an attempt by the pageout daemon to flush tmpfs pages to
735 * swap.
737 * This is because in order to defer flushes as long as possible
738 * buwrite() works by marking the underlying VM pages dirty in
739 * order to be able to dispose of the buffer cache buffer without
740 * flushing it.
742 if (uio->uio_segflg != UIO_NOCOPY)
743 node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED;
744 if (extended)
745 node->tn_status |= TMPFS_NODE_CHANGED;
747 if (node->tn_mode & (S_ISUID | S_ISGID)) {
748 if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0))
749 node->tn_mode &= ~(S_ISUID | S_ISGID);
751 done:
752 TMPFS_NODE_UNLOCK(node);
753 if (kflags)
754 tmpfs_knote(vp, kflags);
756 return(error);
759 static int
760 tmpfs_advlock(struct vop_advlock_args *ap)
762 struct tmpfs_node *node;
763 struct vnode *vp = ap->a_vp;
764 int error;
766 node = VP_TO_TMPFS_NODE(vp);
767 error = (lf_advlock(ap, &node->tn_advlock, node->tn_size));
769 return (error);
773 * The strategy function is typically only called when memory pressure
774 * forces the system to attempt to pageout pages. It can also be called
775 * by [n]vtruncbuf() when a truncation cuts a page in half. Normal write
776 * operations
778 static int
779 tmpfs_strategy(struct vop_strategy_args *ap)
781 struct bio *bio = ap->a_bio;
782 struct bio *nbio;
783 struct buf *bp = bio->bio_buf;
784 struct vnode *vp = ap->a_vp;
785 struct tmpfs_node *node;
786 vm_object_t uobj;
787 vm_page_t m;
788 int i;
790 if (vp->v_type != VREG) {
791 bp->b_resid = bp->b_bcount;
792 bp->b_flags |= B_ERROR | B_INVAL;
793 bp->b_error = EINVAL;
794 biodone(bio);
795 return(0);
798 node = VP_TO_TMPFS_NODE(vp);
800 uobj = node->tn_reg.tn_aobj;
803 * Don't bother flushing to swap if there is no swap, just
804 * ensure that the pages are marked as needing a commit (still).
806 if (bp->b_cmd == BUF_CMD_WRITE && vm_swap_size == 0) {
807 for (i = 0; i < bp->b_xio.xio_npages; ++i) {
808 m = bp->b_xio.xio_pages[i];
809 vm_page_need_commit(m);
811 bp->b_resid = 0;
812 bp->b_error = 0;
813 biodone(bio);
814 } else {
815 nbio = push_bio(bio);
816 nbio->bio_done = tmpfs_strategy_done;
817 nbio->bio_offset = bio->bio_offset;
818 swap_pager_strategy(uobj, nbio);
820 return 0;
824 * If we were unable to commit the pages to swap make sure they are marked
825 * as needing a commit (again). If we were, clear the flag to allow the
826 * pages to be freed.
828 static void
829 tmpfs_strategy_done(struct bio *bio)
831 struct buf *bp;
832 vm_page_t m;
833 int i;
835 bp = bio->bio_buf;
837 if (bp->b_flags & B_ERROR) {
838 bp->b_flags &= ~B_ERROR;
839 bp->b_error = 0;
840 bp->b_resid = 0;
841 for (i = 0; i < bp->b_xio.xio_npages; ++i) {
842 m = bp->b_xio.xio_pages[i];
843 vm_page_need_commit(m);
845 } else {
846 for (i = 0; i < bp->b_xio.xio_npages; ++i) {
847 m = bp->b_xio.xio_pages[i];
848 vm_page_clear_commit(m);
851 bio = pop_bio(bio);
852 biodone(bio);
855 static int
856 tmpfs_bmap(struct vop_bmap_args *ap)
858 if (ap->a_doffsetp != NULL)
859 *ap->a_doffsetp = ap->a_loffset;
860 if (ap->a_runp != NULL)
861 *ap->a_runp = 0;
862 if (ap->a_runb != NULL)
863 *ap->a_runb = 0;
865 return 0;
868 /* --------------------------------------------------------------------- */
870 static int
871 tmpfs_nremove(struct vop_nremove_args *ap)
873 struct vnode *dvp = ap->a_dvp;
874 struct namecache *ncp = ap->a_nch->ncp;
875 struct vnode *vp;
876 int error;
877 struct tmpfs_dirent *de;
878 struct tmpfs_mount *tmp;
879 struct tmpfs_node *dnode;
880 struct tmpfs_node *node;
883 * We have to acquire the vp from ap->a_nch because we will likely
884 * unresolve the namecache entry, and a vrele/vput is needed to
885 * trigger the tmpfs_inactive/tmpfs_reclaim sequence.
887 * We have to use vget to clear any inactive state on the vnode,
888 * otherwise the vnode may remain inactive and thus tmpfs_inactive
889 * will not get called when we release it.
891 error = cache_vget(ap->a_nch, ap->a_cred, LK_SHARED, &vp);
892 KKASSERT(vp->v_mount == dvp->v_mount);
893 KKASSERT(error == 0);
894 vn_unlock(vp);
896 if (vp->v_type == VDIR) {
897 error = EISDIR;
898 goto out2;
901 dnode = VP_TO_TMPFS_DIR(dvp);
902 node = VP_TO_TMPFS_NODE(vp);
903 tmp = VFS_TO_TMPFS(vp->v_mount);
905 TMPFS_NODE_LOCK(dnode);
906 de = tmpfs_dir_lookup(dnode, node, ncp);
907 if (de == NULL) {
908 error = ENOENT;
909 goto out;
912 /* Files marked as immutable or append-only cannot be deleted. */
913 if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
914 (dnode->tn_flags & APPEND)) {
915 error = EPERM;
916 goto out;
919 /* Remove the entry from the directory; as it is a file, we do not
920 * have to change the number of hard links of the directory. */
921 tmpfs_dir_detach(dnode, de);
923 /* Free the directory entry we just deleted. Note that the node
924 * referred by it will not be removed until the vnode is really
925 * reclaimed. */
926 tmpfs_free_dirent(tmp, de);
928 if (node->tn_links > 0) {
929 TMPFS_NODE_LOCK(node);
930 node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
931 TMPFS_NODE_MODIFIED;
932 TMPFS_NODE_UNLOCK(node);
935 cache_unlink(ap->a_nch);
936 tmpfs_knote(vp, NOTE_DELETE);
937 error = 0;
939 out:
940 TMPFS_NODE_UNLOCK(dnode);
941 if (error == 0)
942 tmpfs_knote(dvp, NOTE_WRITE);
943 out2:
944 vrele(vp);
946 return error;
949 /* --------------------------------------------------------------------- */
951 static int
952 tmpfs_nlink(struct vop_nlink_args *ap)
954 struct vnode *dvp = ap->a_dvp;
955 struct vnode *vp = ap->a_vp;
956 struct namecache *ncp = ap->a_nch->ncp;
957 struct tmpfs_dirent *de;
958 struct tmpfs_node *node;
959 struct tmpfs_node *dnode;
960 int error;
962 KKASSERT(dvp != vp); /* XXX When can this be false? */
964 node = VP_TO_TMPFS_NODE(vp);
965 dnode = VP_TO_TMPFS_NODE(dvp);
966 TMPFS_NODE_LOCK(dnode);
968 /* XXX: Why aren't the following two tests done by the caller? */
970 /* Hard links of directories are forbidden. */
971 if (vp->v_type == VDIR) {
972 error = EPERM;
973 goto out;
976 /* Cannot create cross-device links. */
977 if (dvp->v_mount != vp->v_mount) {
978 error = EXDEV;
979 goto out;
982 /* Ensure that we do not overflow the maximum number of links imposed
983 * by the system. */
984 KKASSERT(node->tn_links <= LINK_MAX);
985 if (node->tn_links >= LINK_MAX) {
986 error = EMLINK;
987 goto out;
990 /* We cannot create links of files marked immutable or append-only. */
991 if (node->tn_flags & (IMMUTABLE | APPEND)) {
992 error = EPERM;
993 goto out;
996 /* Allocate a new directory entry to represent the node. */
997 error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node,
998 ncp->nc_name, ncp->nc_nlen, &de);
999 if (error != 0)
1000 goto out;
1002 /* Insert the new directory entry into the appropriate directory. */
1003 tmpfs_dir_attach(dnode, de);
1005 /* vp link count has changed, so update node times. */
1007 TMPFS_NODE_LOCK(node);
1008 node->tn_status |= TMPFS_NODE_CHANGED;
1009 TMPFS_NODE_UNLOCK(node);
1010 tmpfs_update(vp);
1012 tmpfs_knote(vp, NOTE_LINK);
1013 cache_setunresolved(ap->a_nch);
1014 cache_setvp(ap->a_nch, vp);
1015 error = 0;
1017 out:
1018 TMPFS_NODE_UNLOCK(dnode);
1019 if (error == 0)
1020 tmpfs_knote(dvp, NOTE_WRITE);
1021 return error;
1024 /* --------------------------------------------------------------------- */
1026 static int
1027 tmpfs_nrename(struct vop_nrename_args *ap)
1029 struct vnode *fdvp = ap->a_fdvp;
1030 struct namecache *fncp = ap->a_fnch->ncp;
1031 struct vnode *fvp = fncp->nc_vp;
1032 struct vnode *tdvp = ap->a_tdvp;
1033 struct namecache *tncp = ap->a_tnch->ncp;
1034 struct vnode *tvp;
1035 struct tmpfs_dirent *de, *tde;
1036 struct tmpfs_mount *tmp;
1037 struct tmpfs_node *fdnode;
1038 struct tmpfs_node *fnode;
1039 struct tmpfs_node *tnode;
1040 struct tmpfs_node *tdnode;
1041 char *newname;
1042 char *oldname;
1043 int error;
1045 KKASSERT(fdvp->v_mount == fvp->v_mount);
1048 * Because tvp can get overwritten we have to vget it instead of
1049 * just vref or use it, otherwise it's VINACTIVE flag may not get
1050 * cleared and the node won't get destroyed.
1052 error = cache_vget(ap->a_tnch, ap->a_cred, LK_SHARED, &tvp);
1053 if (error == 0) {
1054 tnode = VP_TO_TMPFS_NODE(tvp);
1055 vn_unlock(tvp);
1056 } else {
1057 tnode = NULL;
1060 /* Disallow cross-device renames.
1061 * XXX Why isn't this done by the caller? */
1062 if (fvp->v_mount != tdvp->v_mount ||
1063 (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
1064 error = EXDEV;
1065 goto out;
1068 tmp = VFS_TO_TMPFS(tdvp->v_mount);
1069 tdnode = VP_TO_TMPFS_DIR(tdvp);
1071 /* If source and target are the same file, there is nothing to do. */
1072 if (fvp == tvp) {
1073 error = 0;
1074 goto out;
1077 fdnode = VP_TO_TMPFS_DIR(fdvp);
1078 fnode = VP_TO_TMPFS_NODE(fvp);
1079 TMPFS_NODE_LOCK(fdnode);
1080 de = tmpfs_dir_lookup(fdnode, fnode, fncp);
1081 TMPFS_NODE_UNLOCK(fdnode); /* XXX depend on namecache lock */
1083 /* Avoid manipulating '.' and '..' entries. */
1084 if (de == NULL) {
1085 error = ENOENT;
1086 goto out_locked;
1088 KKASSERT(de->td_node == fnode);
1091 * If replacing an entry in the target directory and that entry
1092 * is a directory, it must be empty.
1094 * Kern_rename gurantees the destination to be a directory
1095 * if the source is one (it does?).
1097 if (tvp != NULL) {
1098 KKASSERT(tnode != NULL);
1100 if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1101 (tdnode->tn_flags & (APPEND | IMMUTABLE))) {
1102 error = EPERM;
1103 goto out_locked;
1106 if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) {
1107 if (tnode->tn_size > 0) {
1108 error = ENOTEMPTY;
1109 goto out_locked;
1111 } else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) {
1112 error = ENOTDIR;
1113 goto out_locked;
1114 } else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) {
1115 error = EISDIR;
1116 goto out_locked;
1117 } else {
1118 KKASSERT(fnode->tn_type != VDIR &&
1119 tnode->tn_type != VDIR);
1123 if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1124 (fdnode->tn_flags & (APPEND | IMMUTABLE))) {
1125 error = EPERM;
1126 goto out_locked;
1130 * Ensure that we have enough memory to hold the new name, if it
1131 * has to be changed.
1133 if (fncp->nc_nlen != tncp->nc_nlen ||
1134 bcmp(fncp->nc_name, tncp->nc_name, fncp->nc_nlen) != 0) {
1135 newname = kmalloc(tncp->nc_nlen + 1, tmp->tm_name_zone,
1136 M_WAITOK | M_NULLOK);
1137 if (newname == NULL) {
1138 error = ENOSPC;
1139 goto out_locked;
1141 bcopy(tncp->nc_name, newname, tncp->nc_nlen);
1142 newname[tncp->nc_nlen] = '\0';
1143 } else {
1144 newname = NULL;
1148 * Unlink entry from source directory. Note that the kernel has
1149 * already checked for illegal recursion cases (renaming a directory
1150 * into a subdirectory of itself).
1152 if (fdnode != tdnode) {
1153 tmpfs_dir_detach(fdnode, de);
1154 } else {
1155 /* XXX depend on namecache lock */
1156 TMPFS_NODE_LOCK(fdnode);
1157 KKASSERT(de == tmpfs_dir_lookup(fdnode, fnode, fncp));
1158 RB_REMOVE(tmpfs_dirtree, &fdnode->tn_dir.tn_dirtree, de);
1159 RB_REMOVE(tmpfs_dirtree_cookie,
1160 &fdnode->tn_dir.tn_cookietree, de);
1161 TMPFS_NODE_UNLOCK(fdnode);
1165 * Handle any name change. Swap with newname, we will
1166 * deallocate it at the end.
1168 if (newname != NULL) {
1169 #if 0
1170 TMPFS_NODE_LOCK(fnode);
1171 fnode->tn_status |= TMPFS_NODE_CHANGED;
1172 TMPFS_NODE_UNLOCK(fnode);
1173 #endif
1174 oldname = de->td_name;
1175 de->td_name = newname;
1176 de->td_namelen = (uint16_t)tncp->nc_nlen;
1177 newname = oldname;
1181 * If we are overwriting an entry, we have to remove the old one
1182 * from the target directory.
1184 if (tvp != NULL) {
1185 /* Remove the old entry from the target directory. */
1186 TMPFS_NODE_LOCK(tdnode);
1187 tde = tmpfs_dir_lookup(tdnode, tnode, tncp);
1188 tmpfs_dir_detach(tdnode, tde);
1189 TMPFS_NODE_UNLOCK(tdnode);
1190 tmpfs_knote(tdnode->tn_vnode, NOTE_DELETE);
1193 * Free the directory entry we just deleted. Note that the
1194 * node referred by it will not be removed until the vnode is
1195 * really reclaimed.
1197 tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), tde);
1198 /*cache_inval_vp(tvp, CINV_DESTROY);*/
1202 * Link entry to target directory. If the entry
1203 * represents a directory move the parent linkage
1204 * as well.
1206 if (fdnode != tdnode) {
1207 if (de->td_node->tn_type == VDIR) {
1208 TMPFS_VALIDATE_DIR(fnode);
1210 tmpfs_dir_attach(tdnode, de);
1211 } else {
1212 TMPFS_NODE_LOCK(tdnode);
1213 tdnode->tn_status |= TMPFS_NODE_MODIFIED;
1214 RB_INSERT(tmpfs_dirtree, &tdnode->tn_dir.tn_dirtree, de);
1215 RB_INSERT(tmpfs_dirtree_cookie,
1216 &tdnode->tn_dir.tn_cookietree, de);
1217 TMPFS_NODE_UNLOCK(tdnode);
1221 * Finish up
1223 if (newname) {
1224 kfree(newname, tmp->tm_name_zone);
1225 newname = NULL;
1227 cache_rename(ap->a_fnch, ap->a_tnch);
1228 tmpfs_knote(ap->a_fdvp, NOTE_WRITE);
1229 tmpfs_knote(ap->a_tdvp, NOTE_WRITE);
1230 if (fnode->tn_vnode)
1231 tmpfs_knote(fnode->tn_vnode, NOTE_RENAME);
1232 error = 0;
1234 out_locked:
1236 out:
1237 if (tvp)
1238 vrele(tvp);
1239 return error;
1242 /* --------------------------------------------------------------------- */
1244 static int
1245 tmpfs_nmkdir(struct vop_nmkdir_args *ap)
1247 struct vnode *dvp = ap->a_dvp;
1248 struct vnode **vpp = ap->a_vpp;
1249 struct namecache *ncp = ap->a_nch->ncp;
1250 struct vattr *vap = ap->a_vap;
1251 struct ucred *cred = ap->a_cred;
1252 int error;
1254 KKASSERT(vap->va_type == VDIR);
1256 error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
1257 if (error == 0) {
1258 cache_setunresolved(ap->a_nch);
1259 cache_setvp(ap->a_nch, *vpp);
1260 tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
1262 return error;
1265 /* --------------------------------------------------------------------- */
1267 static int
1268 tmpfs_nrmdir(struct vop_nrmdir_args *ap)
1270 struct vnode *dvp = ap->a_dvp;
1271 struct namecache *ncp = ap->a_nch->ncp;
1272 struct vnode *vp;
1273 struct tmpfs_dirent *de;
1274 struct tmpfs_mount *tmp;
1275 struct tmpfs_node *dnode;
1276 struct tmpfs_node *node;
1277 int error;
1280 * We have to acquire the vp from ap->a_nch because we will likely
1281 * unresolve the namecache entry, and a vrele/vput is needed to
1282 * trigger the tmpfs_inactive/tmpfs_reclaim sequence.
1284 * We have to use vget to clear any inactive state on the vnode,
1285 * otherwise the vnode may remain inactive and thus tmpfs_inactive
1286 * will not get called when we release it.
1288 error = cache_vget(ap->a_nch, ap->a_cred, LK_SHARED, &vp);
1289 KKASSERT(error == 0);
1290 vn_unlock(vp);
1293 * Prevalidate so we don't hit an assertion later
1295 if (vp->v_type != VDIR) {
1296 error = ENOTDIR;
1297 goto out;
1300 tmp = VFS_TO_TMPFS(dvp->v_mount);
1301 dnode = VP_TO_TMPFS_DIR(dvp);
1302 node = VP_TO_TMPFS_DIR(vp);
1305 * Directories with more than two entries ('.' and '..') cannot
1306 * be removed.
1308 if (node->tn_size > 0) {
1309 error = ENOTEMPTY;
1310 goto out;
1313 if ((dnode->tn_flags & APPEND)
1314 || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
1315 error = EPERM;
1316 goto out;
1320 * This invariant holds only if we are not trying to
1321 * remove "..". We checked for that above so this is safe now.
1323 KKASSERT(node->tn_dir.tn_parent == dnode);
1326 * Get the directory entry associated with node (vp). This
1327 * was filled by tmpfs_lookup while looking up the entry.
1329 TMPFS_NODE_LOCK(dnode);
1330 de = tmpfs_dir_lookup(dnode, node, ncp);
1331 KKASSERT(TMPFS_DIRENT_MATCHES(de, ncp->nc_name, ncp->nc_nlen));
1333 /* Check flags to see if we are allowed to remove the directory. */
1334 if ((dnode->tn_flags & APPEND) ||
1335 node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) {
1336 error = EPERM;
1337 TMPFS_NODE_UNLOCK(dnode);
1338 goto out;
1341 /* Detach the directory entry from the directory (dnode). */
1342 tmpfs_dir_detach(dnode, de);
1343 TMPFS_NODE_UNLOCK(dnode);
1345 /* No vnode should be allocated for this entry from this point */
1346 TMPFS_NODE_LOCK(dnode);
1347 TMPFS_ASSERT_ELOCKED(dnode);
1348 TMPFS_NODE_LOCK(node);
1349 TMPFS_ASSERT_ELOCKED(node);
1352 * Must set parent linkage to NULL (tested by ncreate to disallow
1353 * the creation of new files/dirs in a deleted directory)
1355 node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED |
1356 TMPFS_NODE_MODIFIED;
1358 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED |
1359 TMPFS_NODE_MODIFIED;
1361 TMPFS_NODE_UNLOCK(node);
1362 TMPFS_NODE_UNLOCK(dnode);
1364 /* Free the directory entry we just deleted. Note that the node
1365 * referred by it will not be removed until the vnode is really
1366 * reclaimed. */
1367 tmpfs_free_dirent(tmp, de);
1369 /* Release the deleted vnode (will destroy the node, notify
1370 * interested parties and clean it from the cache). */
1372 TMPFS_NODE_LOCK(dnode);
1373 dnode->tn_status |= TMPFS_NODE_CHANGED;
1374 TMPFS_NODE_UNLOCK(dnode);
1375 tmpfs_update(dvp);
1377 cache_unlink(ap->a_nch);
1378 tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
1379 error = 0;
1381 out:
1382 vrele(vp);
1384 return error;
1387 /* --------------------------------------------------------------------- */
1389 static int
1390 tmpfs_nsymlink(struct vop_nsymlink_args *ap)
1392 struct vnode *dvp = ap->a_dvp;
1393 struct vnode **vpp = ap->a_vpp;
1394 struct namecache *ncp = ap->a_nch->ncp;
1395 struct vattr *vap = ap->a_vap;
1396 struct ucred *cred = ap->a_cred;
1397 char *target = ap->a_target;
1398 int error;
1400 vap->va_type = VLNK;
1401 error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, target);
1402 if (error == 0) {
1403 tmpfs_knote(*vpp, NOTE_WRITE);
1404 cache_setunresolved(ap->a_nch);
1405 cache_setvp(ap->a_nch, *vpp);
1407 return error;
1410 /* --------------------------------------------------------------------- */
1412 static int
1413 tmpfs_readdir(struct vop_readdir_args *ap)
1415 struct vnode *vp = ap->a_vp;
1416 struct uio *uio = ap->a_uio;
1417 int *eofflag = ap->a_eofflag;
1418 off_t **cookies = ap->a_cookies;
1419 int *ncookies = ap->a_ncookies;
1420 struct tmpfs_mount *tmp;
1421 int error;
1422 off_t startoff;
1423 off_t cnt = 0;
1424 struct tmpfs_node *node;
1426 /* This operation only makes sense on directory nodes. */
1427 if (vp->v_type != VDIR) {
1428 return ENOTDIR;
1431 tmp = VFS_TO_TMPFS(vp->v_mount);
1432 node = VP_TO_TMPFS_DIR(vp);
1433 startoff = uio->uio_offset;
1435 if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) {
1436 error = tmpfs_dir_getdotdent(node, uio);
1437 if (error != 0) {
1438 TMPFS_NODE_LOCK_SH(node);
1439 goto outok;
1441 cnt++;
1444 if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) {
1445 /* may lock parent, cannot hold node lock */
1446 error = tmpfs_dir_getdotdotdent(tmp, node, uio);
1447 if (error != 0) {
1448 TMPFS_NODE_LOCK_SH(node);
1449 goto outok;
1451 cnt++;
1454 TMPFS_NODE_LOCK_SH(node);
1455 error = tmpfs_dir_getdents(node, uio, &cnt);
1457 outok:
1458 KKASSERT(error >= -1);
1460 if (error == -1)
1461 error = 0;
1463 if (eofflag != NULL)
1464 *eofflag =
1465 (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
1467 /* Update NFS-related variables. */
1468 if (error == 0 && cookies != NULL && ncookies != NULL) {
1469 off_t i;
1470 off_t off = startoff;
1471 struct tmpfs_dirent *de = NULL;
1473 *ncookies = cnt;
1474 *cookies = kmalloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
1476 for (i = 0; i < cnt; i++) {
1477 KKASSERT(off != TMPFS_DIRCOOKIE_EOF);
1478 if (off == TMPFS_DIRCOOKIE_DOT) {
1479 off = TMPFS_DIRCOOKIE_DOTDOT;
1480 } else {
1481 if (off == TMPFS_DIRCOOKIE_DOTDOT) {
1482 de = RB_MIN(tmpfs_dirtree_cookie,
1483 &node->tn_dir.tn_cookietree);
1484 } else if (de != NULL) {
1485 de = RB_NEXT(tmpfs_dirtree_cookie,
1486 &node->tn_dir.tn_cookietree, de);
1487 } else {
1488 de = tmpfs_dir_lookupbycookie(node,
1489 off);
1490 KKASSERT(de != NULL);
1491 de = RB_NEXT(tmpfs_dirtree_cookie,
1492 &node->tn_dir.tn_cookietree, de);
1494 if (de == NULL)
1495 off = TMPFS_DIRCOOKIE_EOF;
1496 else
1497 off = tmpfs_dircookie(de);
1499 (*cookies)[i] = off;
1501 KKASSERT(uio->uio_offset == off);
1503 TMPFS_NODE_UNLOCK(node);
1505 if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) {
1506 TMPFS_NODE_LOCK(node);
1507 node->tn_status |= TMPFS_NODE_ACCESSED;
1508 TMPFS_NODE_UNLOCK(node);
1510 return error;
1513 /* --------------------------------------------------------------------- */
1515 static int
1516 tmpfs_readlink(struct vop_readlink_args *ap)
1518 struct vnode *vp = ap->a_vp;
1519 struct uio *uio = ap->a_uio;
1520 int error;
1521 struct tmpfs_node *node;
1523 KKASSERT(uio->uio_offset == 0);
1524 KKASSERT(vp->v_type == VLNK);
1526 node = VP_TO_TMPFS_NODE(vp);
1527 TMPFS_NODE_LOCK_SH(node);
1528 error = uiomove(node->tn_link,
1529 MIN(node->tn_size, uio->uio_resid), uio);
1530 TMPFS_NODE_UNLOCK(node);
1531 if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) {
1532 TMPFS_NODE_LOCK(node);
1533 node->tn_status |= TMPFS_NODE_ACCESSED;
1534 TMPFS_NODE_UNLOCK(node);
1536 return error;
1539 /* --------------------------------------------------------------------- */
1541 static int
1542 tmpfs_inactive(struct vop_inactive_args *ap)
1544 struct vnode *vp = ap->a_vp;
1545 struct tmpfs_node *node;
1546 struct mount *mp;
1548 mp = vp->v_mount;
1549 lwkt_gettoken(&mp->mnt_token);
1550 node = VP_TO_TMPFS_NODE(vp);
1553 * Degenerate case
1555 if (node == NULL) {
1556 vrecycle(vp);
1557 lwkt_reltoken(&mp->mnt_token);
1558 return(0);
1562 * Get rid of unreferenced deleted vnodes sooner rather than
1563 * later so the data memory can be recovered immediately.
1565 * We must truncate the vnode to prevent the normal reclamation
1566 * path from flushing the data for the removed file to disk.
1568 TMPFS_NODE_LOCK(node);
1569 if ((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0 &&
1570 node->tn_links == 0)
1572 node->tn_vpstate = TMPFS_VNODE_DOOMED;
1573 TMPFS_NODE_UNLOCK(node);
1574 if (node->tn_type == VREG)
1575 tmpfs_truncate(vp, 0);
1576 vrecycle(vp);
1577 } else {
1578 TMPFS_NODE_UNLOCK(node);
1580 lwkt_reltoken(&mp->mnt_token);
1582 return 0;
1585 /* --------------------------------------------------------------------- */
1588 tmpfs_reclaim(struct vop_reclaim_args *ap)
1590 struct vnode *vp = ap->a_vp;
1591 struct tmpfs_mount *tmp;
1592 struct tmpfs_node *node;
1593 struct mount *mp;
1595 mp = vp->v_mount;
1596 lwkt_gettoken(&mp->mnt_token);
1598 node = VP_TO_TMPFS_NODE(vp);
1599 tmp = VFS_TO_TMPFS(vp->v_mount);
1600 KKASSERT(mp == tmp->tm_mount);
1602 tmpfs_free_vp(vp);
1605 * If the node referenced by this vnode was deleted by the
1606 * user, we must free its associated data structures now that
1607 * the vnode is being reclaimed.
1609 * Directories have an extra link ref.
1611 TMPFS_NODE_LOCK(node);
1612 if ((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0 &&
1613 node->tn_links == 0) {
1614 node->tn_vpstate = TMPFS_VNODE_DOOMED;
1615 tmpfs_free_node(tmp, node);
1616 /* eats the lock */
1617 } else {
1618 TMPFS_NODE_UNLOCK(node);
1620 lwkt_reltoken(&mp->mnt_token);
1622 KKASSERT(vp->v_data == NULL);
1623 return 0;
1626 /* --------------------------------------------------------------------- */
1628 static int
1629 tmpfs_mountctl(struct vop_mountctl_args *ap)
1631 struct tmpfs_mount *tmp;
1632 struct mount *mp;
1633 int rc;
1635 mp = ap->a_head.a_ops->head.vv_mount;
1636 lwkt_gettoken(&mp->mnt_token);
1638 switch (ap->a_op) {
1639 case (MOUNTCTL_SET_EXPORT):
1640 tmp = (struct tmpfs_mount *) mp->mnt_data;
1642 if (ap->a_ctllen != sizeof(struct export_args))
1643 rc = (EINVAL);
1644 else
1645 rc = vfs_export(mp, &tmp->tm_export,
1646 (const struct export_args *) ap->a_ctl);
1647 break;
1648 default:
1649 rc = vop_stdmountctl(ap);
1650 break;
1653 lwkt_reltoken(&mp->mnt_token);
1654 return (rc);
1657 /* --------------------------------------------------------------------- */
1659 static int
1660 tmpfs_print(struct vop_print_args *ap)
1662 struct vnode *vp = ap->a_vp;
1664 struct tmpfs_node *node;
1666 node = VP_TO_TMPFS_NODE(vp);
1668 kprintf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n",
1669 node, node->tn_flags, node->tn_links);
1670 kprintf("\tmode 0%o, owner %d, group %d, size %ju, status 0x%x\n",
1671 node->tn_mode, node->tn_uid, node->tn_gid,
1672 (uintmax_t)node->tn_size, node->tn_status);
1674 if (vp->v_type == VFIFO)
1675 fifo_printinfo(vp);
1677 kprintf("\n");
1679 return 0;
1682 /* --------------------------------------------------------------------- */
1684 static int
1685 tmpfs_pathconf(struct vop_pathconf_args *ap)
1687 struct vnode *vp = ap->a_vp;
1688 int name = ap->a_name;
1689 register_t *retval = ap->a_retval;
1690 struct tmpfs_mount *tmp;
1691 int error;
1693 error = 0;
1695 switch (name) {
1696 case _PC_CHOWN_RESTRICTED:
1697 *retval = 1;
1698 break;
1700 case _PC_FILESIZEBITS:
1701 tmp = VFS_TO_TMPFS(vp->v_mount);
1702 *retval = max(32, flsll(tmp->tm_pages_max * PAGE_SIZE) + 1);
1703 break;
1705 case _PC_LINK_MAX:
1706 *retval = LINK_MAX;
1707 break;
1709 case _PC_NAME_MAX:
1710 *retval = NAME_MAX;
1711 break;
1713 case _PC_NO_TRUNC:
1714 *retval = 1;
1715 break;
1717 case _PC_PATH_MAX:
1718 *retval = PATH_MAX;
1719 break;
1721 case _PC_PIPE_BUF:
1722 *retval = PIPE_BUF;
1723 break;
1725 case _PC_SYNC_IO:
1726 *retval = 1;
1727 break;
1729 case _PC_2_SYMLINKS:
1730 *retval = 1;
1731 break;
1733 default:
1734 error = EINVAL;
1737 return error;
1740 /************************************************************************
1741 * KQFILTER OPS *
1742 ************************************************************************/
1744 static void filt_tmpfsdetach(struct knote *kn);
1745 static int filt_tmpfsread(struct knote *kn, long hint);
1746 static int filt_tmpfswrite(struct knote *kn, long hint);
1747 static int filt_tmpfsvnode(struct knote *kn, long hint);
1749 static struct filterops tmpfsread_filtops =
1750 { FILTEROP_ISFD | FILTEROP_MPSAFE,
1751 NULL, filt_tmpfsdetach, filt_tmpfsread };
1752 static struct filterops tmpfswrite_filtops =
1753 { FILTEROP_ISFD | FILTEROP_MPSAFE,
1754 NULL, filt_tmpfsdetach, filt_tmpfswrite };
1755 static struct filterops tmpfsvnode_filtops =
1756 { FILTEROP_ISFD | FILTEROP_MPSAFE,
1757 NULL, filt_tmpfsdetach, filt_tmpfsvnode };
1759 static int
1760 tmpfs_kqfilter (struct vop_kqfilter_args *ap)
1762 struct vnode *vp = ap->a_vp;
1763 struct knote *kn = ap->a_kn;
1765 switch (kn->kn_filter) {
1766 case EVFILT_READ:
1767 kn->kn_fop = &tmpfsread_filtops;
1768 break;
1769 case EVFILT_WRITE:
1770 kn->kn_fop = &tmpfswrite_filtops;
1771 break;
1772 case EVFILT_VNODE:
1773 kn->kn_fop = &tmpfsvnode_filtops;
1774 break;
1775 default:
1776 return (EOPNOTSUPP);
1779 kn->kn_hook = (caddr_t)vp;
1781 knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
1783 return(0);
1786 static void
1787 filt_tmpfsdetach(struct knote *kn)
1789 struct vnode *vp = (void *)kn->kn_hook;
1791 knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
1794 static int
1795 filt_tmpfsread(struct knote *kn, long hint)
1797 struct vnode *vp = (void *)kn->kn_hook;
1798 struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp);
1799 off_t off;
1801 if (hint == NOTE_REVOKE) {
1802 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
1803 return(1);
1807 * Interlock against MP races when performing this function.
1809 TMPFS_NODE_LOCK_SH(node);
1810 off = node->tn_size - kn->kn_fp->f_offset;
1811 kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
1812 if (kn->kn_sfflags & NOTE_OLDAPI) {
1813 TMPFS_NODE_UNLOCK(node);
1814 return(1);
1816 if (kn->kn_data == 0) {
1817 kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
1819 TMPFS_NODE_UNLOCK(node);
1820 return (kn->kn_data != 0);
1823 static int
1824 filt_tmpfswrite(struct knote *kn, long hint)
1826 if (hint == NOTE_REVOKE)
1827 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
1828 kn->kn_data = 0;
1829 return (1);
1832 static int
1833 filt_tmpfsvnode(struct knote *kn, long hint)
1835 if (kn->kn_sfflags & hint)
1836 kn->kn_fflags |= hint;
1837 if (hint == NOTE_REVOKE) {
1838 kn->kn_flags |= (EV_EOF | EV_NODATA);
1839 return (1);
1841 return (kn->kn_fflags != 0);
1845 /* --------------------------------------------------------------------- */
1848 * vnode operations vector used for files stored in a tmpfs file system.
1850 struct vop_ops tmpfs_vnode_vops = {
1851 .vop_default = vop_defaultop,
1852 .vop_getpages = vop_stdgetpages,
1853 .vop_putpages = vop_stdputpages,
1854 .vop_ncreate = tmpfs_ncreate,
1855 .vop_nresolve = tmpfs_nresolve,
1856 .vop_nlookupdotdot = tmpfs_nlookupdotdot,
1857 .vop_nmknod = tmpfs_nmknod,
1858 .vop_open = tmpfs_open,
1859 .vop_close = tmpfs_close,
1860 .vop_access = tmpfs_access,
1861 .vop_getattr = tmpfs_getattr,
1862 .vop_setattr = tmpfs_setattr,
1863 .vop_read = tmpfs_read,
1864 .vop_write = tmpfs_write,
1865 .vop_fsync = tmpfs_fsync,
1866 .vop_mountctl = tmpfs_mountctl,
1867 .vop_nremove = tmpfs_nremove,
1868 .vop_nlink = tmpfs_nlink,
1869 .vop_nrename = tmpfs_nrename,
1870 .vop_nmkdir = tmpfs_nmkdir,
1871 .vop_nrmdir = tmpfs_nrmdir,
1872 .vop_nsymlink = tmpfs_nsymlink,
1873 .vop_readdir = tmpfs_readdir,
1874 .vop_readlink = tmpfs_readlink,
1875 .vop_inactive = tmpfs_inactive,
1876 .vop_reclaim = tmpfs_reclaim,
1877 .vop_print = tmpfs_print,
1878 .vop_pathconf = tmpfs_pathconf,
1879 .vop_bmap = tmpfs_bmap,
1880 .vop_strategy = tmpfs_strategy,
1881 .vop_advlock = tmpfs_advlock,
1882 .vop_kqfilter = tmpfs_kqfilter