kernel - VM rework part 9 - Precursor work for terminal pv_entry removal
[dragonfly.git] / sys / vfs / tmpfs / tmpfs_vnops.c
blobee636db754e2a1ab68a7fbd24a7a01626b04a68a
1 /*-
2 * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
3 * All rights reserved.
5 * This code is derived from software contributed to The NetBSD Foundation
6 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
7 * 2005 program.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
30 * $NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $
34 * tmpfs vnode interface.
37 #include <sys/kernel.h>
38 #include <sys/kern_syscall.h>
39 #include <sys/param.h>
40 #include <sys/fcntl.h>
41 #include <sys/lockf.h>
42 #include <sys/priv.h>
43 #include <sys/proc.h>
44 #include <sys/resourcevar.h>
45 #include <sys/sched.h>
46 #include <sys/stat.h>
47 #include <sys/systm.h>
48 #include <sys/sysctl.h>
49 #include <sys/unistd.h>
50 #include <sys/vfsops.h>
51 #include <sys/vnode.h>
52 #include <sys/mountctl.h>
54 #include <vm/vm.h>
55 #include <vm/vm_extern.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_pageout.h>
59 #include <vm/vm_pager.h>
60 #include <vm/swap_pager.h>
62 #include <sys/buf2.h>
63 #include <vm/vm_page2.h>
65 #include <vfs/fifofs/fifo.h>
66 #include <vfs/tmpfs/tmpfs_vnops.h>
67 #include "tmpfs.h"
69 static void tmpfs_strategy_done(struct bio *bio);
70 static void tmpfs_move_pages(vm_object_t src, vm_object_t dst);
72 static int tmpfs_cluster_enable = 1;
73 SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW, 0, "TMPFS filesystem");
74 SYSCTL_INT(_vfs_tmpfs, OID_AUTO, cluster_enable, CTLFLAG_RW,
75 &tmpfs_cluster_enable, 0, "");
77 static __inline
78 void
79 tmpfs_knote(struct vnode *vp, int flags)
81 if (flags)
82 KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags);
86 /* --------------------------------------------------------------------- */
88 static int
89 tmpfs_nresolve(struct vop_nresolve_args *ap)
91 struct vnode *dvp = ap->a_dvp;
92 struct vnode *vp = NULL;
93 struct namecache *ncp = ap->a_nch->ncp;
94 struct tmpfs_node *tnode;
95 struct tmpfs_dirent *de;
96 struct tmpfs_node *dnode;
97 int error;
99 dnode = VP_TO_TMPFS_DIR(dvp);
101 TMPFS_NODE_LOCK_SH(dnode);
102 loop:
103 de = tmpfs_dir_lookup(dnode, NULL, ncp);
104 if (de == NULL) {
105 error = ENOENT;
106 } else {
108 * Allocate a vnode for the node we found. Use
109 * tmpfs_alloc_vp()'s deadlock handling mode.
111 tnode = de->td_node;
112 error = tmpfs_alloc_vp(dvp->v_mount, dnode, tnode,
113 LK_EXCLUSIVE | LK_RETRY, &vp);
114 if (error == EAGAIN)
115 goto loop;
116 if (error)
117 goto out;
118 KKASSERT(vp);
121 out:
122 TMPFS_NODE_UNLOCK(dnode);
124 if ((dnode->tn_status & TMPFS_NODE_ACCESSED) == 0) {
125 TMPFS_NODE_LOCK(dnode);
126 dnode->tn_status |= TMPFS_NODE_ACCESSED;
127 TMPFS_NODE_UNLOCK(dnode);
131 * Store the result of this lookup in the cache. Avoid this if the
132 * request was for creation, as it does not improve timings on
133 * emprical tests.
135 if (vp) {
136 vn_unlock(vp);
137 cache_setvp(ap->a_nch, vp);
138 vrele(vp);
139 } else if (error == ENOENT) {
140 cache_setvp(ap->a_nch, NULL);
142 return (error);
145 static int
146 tmpfs_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
148 struct vnode *dvp = ap->a_dvp;
149 struct vnode **vpp = ap->a_vpp;
150 struct tmpfs_node *dnode = VP_TO_TMPFS_NODE(dvp);
151 struct ucred *cred = ap->a_cred;
152 int error;
154 *vpp = NULL;
156 /* Check accessibility of requested node as a first step. */
157 error = VOP_ACCESS(dvp, VEXEC, cred);
158 if (error != 0)
159 return error;
161 if (dnode->tn_dir.tn_parent != NULL) {
162 /* Allocate a new vnode on the matching entry. */
163 error = tmpfs_alloc_vp(dvp->v_mount,
164 NULL, dnode->tn_dir.tn_parent,
165 LK_EXCLUSIVE | LK_RETRY, vpp);
167 if (*vpp)
168 vn_unlock(*vpp);
170 return (*vpp == NULL) ? ENOENT : 0;
173 /* --------------------------------------------------------------------- */
175 static int
176 tmpfs_ncreate(struct vop_ncreate_args *ap)
178 struct vnode *dvp = ap->a_dvp;
179 struct vnode **vpp = ap->a_vpp;
180 struct namecache *ncp = ap->a_nch->ncp;
181 struct vattr *vap = ap->a_vap;
182 struct ucred *cred = ap->a_cred;
183 int error;
185 KKASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
187 error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
188 if (error == 0) {
189 cache_setunresolved(ap->a_nch);
190 cache_setvp(ap->a_nch, *vpp);
191 tmpfs_knote(dvp, NOTE_WRITE);
193 return (error);
195 /* --------------------------------------------------------------------- */
197 static int
198 tmpfs_nmknod(struct vop_nmknod_args *ap)
200 struct vnode *dvp = ap->a_dvp;
201 struct vnode **vpp = ap->a_vpp;
202 struct namecache *ncp = ap->a_nch->ncp;
203 struct vattr *vap = ap->a_vap;
204 struct ucred *cred = ap->a_cred;
205 int error;
207 if (vap->va_type != VBLK && vap->va_type != VCHR &&
208 vap->va_type != VFIFO) {
209 return (EINVAL);
212 error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
213 if (error == 0) {
214 cache_setunresolved(ap->a_nch);
215 cache_setvp(ap->a_nch, *vpp);
216 tmpfs_knote(dvp, NOTE_WRITE);
218 return error;
221 /* --------------------------------------------------------------------- */
223 static int
224 tmpfs_open(struct vop_open_args *ap)
226 struct vnode *vp = ap->a_vp;
227 int mode = ap->a_mode;
228 struct tmpfs_node *node;
229 int error;
231 node = VP_TO_TMPFS_NODE(vp);
233 #if 0
234 /* The file is still active but all its names have been removed
235 * (e.g. by a "rmdir $(pwd)"). It cannot be opened any more as
236 * it is about to die. */
237 if (node->tn_links < 1)
238 return (ENOENT);
239 #endif
241 /* If the file is marked append-only, deny write requests. */
242 if ((node->tn_flags & APPEND) &&
243 (mode & (FWRITE | O_APPEND)) == FWRITE) {
244 error = EPERM;
245 } else {
246 if (node->tn_reg.tn_pages_in_aobj) {
247 TMPFS_NODE_LOCK(node);
248 if (node->tn_reg.tn_pages_in_aobj) {
249 tmpfs_move_pages(node->tn_reg.tn_aobj,
250 vp->v_object);
251 node->tn_reg.tn_pages_in_aobj = 0;
253 TMPFS_NODE_UNLOCK(node);
255 error = vop_stdopen(ap);
258 return (error);
261 /* --------------------------------------------------------------------- */
263 static int
264 tmpfs_close(struct vop_close_args *ap)
266 struct vnode *vp = ap->a_vp;
267 struct tmpfs_node *node;
268 int error;
270 node = VP_TO_TMPFS_NODE(vp);
272 if (node->tn_links > 0) {
274 * Update node times. No need to do it if the node has
275 * been deleted, because it will vanish after we return.
277 tmpfs_update(vp);
280 error = vop_stdclose(ap);
282 return (error);
285 /* --------------------------------------------------------------------- */
288 tmpfs_access(struct vop_access_args *ap)
290 struct vnode *vp = ap->a_vp;
291 int error;
292 struct tmpfs_node *node;
294 node = VP_TO_TMPFS_NODE(vp);
296 switch (vp->v_type) {
297 case VDIR:
298 /* FALLTHROUGH */
299 case VLNK:
300 /* FALLTHROUGH */
301 case VREG:
302 if ((ap->a_mode & VWRITE) &&
303 (vp->v_mount->mnt_flag & MNT_RDONLY)) {
304 error = EROFS;
305 goto out;
307 break;
309 case VBLK:
310 /* FALLTHROUGH */
311 case VCHR:
312 /* FALLTHROUGH */
313 case VSOCK:
314 /* FALLTHROUGH */
315 case VFIFO:
316 break;
318 default:
319 error = EINVAL;
320 goto out;
323 if ((ap->a_mode & VWRITE) && (node->tn_flags & IMMUTABLE)) {
324 error = EPERM;
325 goto out;
328 error = vop_helper_access(ap, node->tn_uid, node->tn_gid,
329 node->tn_mode, 0);
330 out:
331 return error;
334 /* --------------------------------------------------------------------- */
337 tmpfs_getattr(struct vop_getattr_args *ap)
339 struct vnode *vp = ap->a_vp;
340 struct vattr *vap = ap->a_vap;
341 struct tmpfs_node *node;
343 node = VP_TO_TMPFS_NODE(vp);
345 tmpfs_update(vp);
347 TMPFS_NODE_LOCK_SH(node);
348 vap->va_type = vp->v_type;
349 vap->va_mode = node->tn_mode;
350 vap->va_nlink = node->tn_links;
351 vap->va_uid = node->tn_uid;
352 vap->va_gid = node->tn_gid;
353 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
354 vap->va_fileid = node->tn_id;
355 vap->va_size = node->tn_size;
356 vap->va_blocksize = PAGE_SIZE;
357 vap->va_atime.tv_sec = node->tn_atime;
358 vap->va_atime.tv_nsec = node->tn_atimensec;
359 vap->va_mtime.tv_sec = node->tn_mtime;
360 vap->va_mtime.tv_nsec = node->tn_mtimensec;
361 vap->va_ctime.tv_sec = node->tn_ctime;
362 vap->va_ctime.tv_nsec = node->tn_ctimensec;
363 vap->va_gen = node->tn_gen;
364 vap->va_flags = node->tn_flags;
365 if (vp->v_type == VBLK || vp->v_type == VCHR) {
366 vap->va_rmajor = umajor(node->tn_rdev);
367 vap->va_rminor = uminor(node->tn_rdev);
369 vap->va_bytes = round_page(node->tn_size);
370 vap->va_filerev = 0;
371 TMPFS_NODE_UNLOCK(node);
373 return 0;
376 /* --------------------------------------------------------------------- */
379 tmpfs_setattr(struct vop_setattr_args *ap)
381 struct vnode *vp = ap->a_vp;
382 struct vattr *vap = ap->a_vap;
383 struct ucred *cred = ap->a_cred;
384 struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp);
385 int error = 0;
386 int kflags = 0;
388 TMPFS_NODE_LOCK(node);
389 if (error == 0 && (vap->va_flags != VNOVAL)) {
390 error = tmpfs_chflags(vp, vap->va_flags, cred);
391 kflags |= NOTE_ATTRIB;
394 if (error == 0 && (vap->va_size != VNOVAL)) {
395 /* restore any saved pages before proceeding */
396 if (node->tn_reg.tn_pages_in_aobj) {
397 tmpfs_move_pages(node->tn_reg.tn_aobj, vp->v_object);
398 node->tn_reg.tn_pages_in_aobj = 0;
400 if (vap->va_size > node->tn_size)
401 kflags |= NOTE_WRITE | NOTE_EXTEND;
402 else
403 kflags |= NOTE_WRITE;
404 error = tmpfs_chsize(vp, vap->va_size, cred);
407 if (error == 0 && (vap->va_uid != (uid_t)VNOVAL ||
408 vap->va_gid != (gid_t)VNOVAL)) {
409 error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred);
410 kflags |= NOTE_ATTRIB;
413 if (error == 0 && (vap->va_mode != (mode_t)VNOVAL)) {
414 error = tmpfs_chmod(vp, vap->va_mode, cred);
415 kflags |= NOTE_ATTRIB;
418 if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
419 vap->va_atime.tv_nsec != VNOVAL) ||
420 (vap->va_mtime.tv_sec != VNOVAL &&
421 vap->va_mtime.tv_nsec != VNOVAL) )) {
422 error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
423 vap->va_vaflags, cred);
424 kflags |= NOTE_ATTRIB;
428 * Update the node times. We give preference to the error codes
429 * generated by this function rather than the ones that may arise
430 * from tmpfs_update.
432 tmpfs_update(vp);
433 TMPFS_NODE_UNLOCK(node);
434 tmpfs_knote(vp, kflags);
436 return (error);
439 /* --------------------------------------------------------------------- */
442 * fsync is usually a NOP, but we must take action when unmounting or
443 * when recycling.
445 static int
446 tmpfs_fsync(struct vop_fsync_args *ap)
448 struct tmpfs_node *node;
449 struct vnode *vp = ap->a_vp;
451 node = VP_TO_TMPFS_NODE(vp);
454 * tmpfs vnodes typically remain dirty, avoid long syncer scans
455 * by forcing removal from the syncer list.
457 vn_syncer_remove(vp, 1);
459 tmpfs_update(vp);
460 if (vp->v_type == VREG) {
461 if (vp->v_flag & VRECLAIMED) {
462 if (node->tn_links == 0)
463 tmpfs_truncate(vp, 0);
464 else
465 vfsync(ap->a_vp, ap->a_waitfor, 1, NULL, NULL);
469 return 0;
472 /* --------------------------------------------------------------------- */
474 static int
475 tmpfs_read(struct vop_read_args *ap)
477 struct buf *bp;
478 struct vnode *vp = ap->a_vp;
479 struct uio *uio = ap->a_uio;
480 struct tmpfs_node *node;
481 off_t base_offset;
482 size_t offset;
483 size_t len;
484 size_t resid;
485 int error;
486 int seqcount;
489 * Check the basics
491 if (uio->uio_offset < 0)
492 return (EINVAL);
493 if (vp->v_type != VREG)
494 return (EINVAL);
497 * Extract node, try to shortcut the operation through
498 * the VM page cache, allowing us to avoid buffer cache
499 * overheads.
501 node = VP_TO_TMPFS_NODE(vp);
502 resid = uio->uio_resid;
503 seqcount = ap->a_ioflag >> 16;
504 error = vop_helper_read_shortcut(ap);
505 if (error)
506 return error;
507 if (uio->uio_resid == 0) {
508 if (resid)
509 goto finished;
510 return error;
514 * restore any saved pages before proceeding
516 if (node->tn_reg.tn_pages_in_aobj) {
517 TMPFS_NODE_LOCK(node);
518 if (node->tn_reg.tn_pages_in_aobj) {
519 tmpfs_move_pages(node->tn_reg.tn_aobj, vp->v_object);
520 node->tn_reg.tn_pages_in_aobj = 0;
522 TMPFS_NODE_UNLOCK(node);
526 * Fall-through to our normal read code.
528 while (uio->uio_resid > 0 && uio->uio_offset < node->tn_size) {
530 * Use buffer cache I/O (via tmpfs_strategy)
532 offset = (size_t)uio->uio_offset & TMPFS_BLKMASK64;
533 base_offset = (off_t)uio->uio_offset - offset;
534 bp = getcacheblk(vp, base_offset, TMPFS_BLKSIZE, GETBLK_KVABIO);
535 if (bp == NULL) {
536 if (tmpfs_cluster_enable) {
537 error = cluster_readx(vp, node->tn_size,
538 base_offset,
539 TMPFS_BLKSIZE,
540 B_NOTMETA | B_KVABIO,
541 uio->uio_resid,
542 seqcount * MAXBSIZE,
543 &bp);
544 } else {
545 error = bread_kvabio(vp, base_offset,
546 TMPFS_BLKSIZE, &bp);
548 if (error) {
549 brelse(bp);
550 kprintf("tmpfs_read bread error %d\n", error);
551 break;
555 * tmpfs pretty much fiddles directly with the VM
556 * system, don't let it exhaust it or we won't play
557 * nice with other processes.
559 * Only do this if the VOP is coming from a normal
560 * read/write. The VM system handles the case for
561 * UIO_NOCOPY.
563 if (uio->uio_segflg != UIO_NOCOPY)
564 vm_wait_nominal();
566 bp->b_flags |= B_CLUSTEROK;
567 bkvasync(bp);
570 * Figure out how many bytes we can actually copy this loop.
572 len = TMPFS_BLKSIZE - offset;
573 if (len > uio->uio_resid)
574 len = uio->uio_resid;
575 if (len > node->tn_size - uio->uio_offset)
576 len = (size_t)(node->tn_size - uio->uio_offset);
578 error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio);
579 bqrelse(bp);
580 if (error) {
581 kprintf("tmpfs_read uiomove error %d\n", error);
582 break;
586 finished:
587 if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) {
588 TMPFS_NODE_LOCK(node);
589 node->tn_status |= TMPFS_NODE_ACCESSED;
590 TMPFS_NODE_UNLOCK(node);
592 return (error);
595 static int
596 tmpfs_write(struct vop_write_args *ap)
598 struct buf *bp;
599 struct vnode *vp = ap->a_vp;
600 struct uio *uio = ap->a_uio;
601 struct thread *td = uio->uio_td;
602 struct tmpfs_node *node;
603 boolean_t extended;
604 off_t oldsize;
605 int error;
606 off_t base_offset;
607 size_t offset;
608 size_t len;
609 struct rlimit limit;
610 int trivial = 0;
611 int kflags = 0;
612 int seqcount;
614 error = 0;
615 if (uio->uio_resid == 0) {
616 return error;
619 node = VP_TO_TMPFS_NODE(vp);
621 if (vp->v_type != VREG)
622 return (EINVAL);
623 seqcount = ap->a_ioflag >> 16;
625 TMPFS_NODE_LOCK(node);
628 * restore any saved pages before proceeding
630 if (node->tn_reg.tn_pages_in_aobj) {
631 tmpfs_move_pages(node->tn_reg.tn_aobj, vp->v_object);
632 node->tn_reg.tn_pages_in_aobj = 0;
635 oldsize = node->tn_size;
636 if (ap->a_ioflag & IO_APPEND)
637 uio->uio_offset = node->tn_size;
640 * Check for illegal write offsets.
642 if (uio->uio_offset + uio->uio_resid >
643 VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) {
644 error = EFBIG;
645 goto done;
649 * NOTE: Ignore if UIO does not come from a user thread (e.g. VN).
651 if (vp->v_type == VREG && td != NULL && td->td_lwp != NULL) {
652 error = kern_getrlimit(RLIMIT_FSIZE, &limit);
653 if (error)
654 goto done;
655 if (uio->uio_offset + uio->uio_resid > limit.rlim_cur) {
656 ksignal(td->td_proc, SIGXFSZ);
657 error = EFBIG;
658 goto done;
663 * Extend the file's size if necessary
665 extended = ((uio->uio_offset + uio->uio_resid) > node->tn_size);
667 while (uio->uio_resid > 0) {
669 * Don't completely blow out running buffer I/O
670 * when being hit from the pageout daemon.
672 if (uio->uio_segflg == UIO_NOCOPY &&
673 (ap->a_ioflag & IO_RECURSE) == 0) {
674 bwillwrite(TMPFS_BLKSIZE);
678 * Use buffer cache I/O (via tmpfs_strategy)
680 offset = (size_t)uio->uio_offset & TMPFS_BLKMASK64;
681 base_offset = (off_t)uio->uio_offset - offset;
682 len = TMPFS_BLKSIZE - offset;
683 if (len > uio->uio_resid)
684 len = uio->uio_resid;
686 if ((uio->uio_offset + len) > node->tn_size) {
687 trivial = (uio->uio_offset <= node->tn_size);
688 error = tmpfs_reg_resize(vp, uio->uio_offset + len,
689 trivial);
690 if (error)
691 break;
695 * Read to fill in any gaps. Theoretically we could
696 * optimize this if the write covers the entire buffer
697 * and is not a UIO_NOCOPY write, however this can lead
698 * to a security violation exposing random kernel memory
699 * (whatever junk was in the backing VM pages before).
701 * So just use bread() to do the right thing.
703 error = bread_kvabio(vp, base_offset, TMPFS_BLKSIZE, &bp);
704 bkvasync(bp);
705 error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio);
706 if (error) {
707 kprintf("tmpfs_write uiomove error %d\n", error);
708 brelse(bp);
709 break;
712 if (uio->uio_offset > node->tn_size) {
713 node->tn_size = uio->uio_offset;
714 kflags |= NOTE_EXTEND;
716 kflags |= NOTE_WRITE;
719 * Always try to flush the page in the UIO_NOCOPY case. This
720 * can come from the pageout daemon or during vnode eviction.
721 * It is not necessarily going to be marked IO_ASYNC/IO_SYNC.
723 * For the normal case we buwrite(), dirtying the underlying
724 * VM pages instead of dirtying the buffer and releasing the
725 * buffer as a clean buffer. This allows tmpfs to use
726 * essentially all available memory to cache file data.
727 * If we used bdwrite() the buffer cache would wind up
728 * flushing the data to swap too quickly.
730 * But because tmpfs can seriously load the VM system we
731 * fall-back to using bdwrite() when free memory starts
732 * to get low. This shifts the load away from the VM system
733 * and makes tmpfs act more like a normal filesystem with
734 * regards to disk activity.
736 * tmpfs pretty much fiddles directly with the VM
737 * system, don't let it exhaust it or we won't play
738 * nice with other processes. Only do this if the
739 * VOP is coming from a normal read/write. The VM system
740 * handles the case for UIO_NOCOPY.
742 bp->b_flags |= B_CLUSTEROK;
743 if (uio->uio_segflg == UIO_NOCOPY) {
745 * Flush from the pageout daemon, deal with
746 * potentially very heavy tmpfs write activity
747 * causing long stalls in the pageout daemon
748 * before pages get to free/cache.
750 * (a) Under severe pressure setting B_DIRECT will
751 * cause a buffer release to try to free the
752 * underlying pages.
754 * (b) Under modest memory pressure the B_RELBUF
755 * alone is sufficient to get the pages moved
756 * to the cache. We could also force this by
757 * setting B_NOTMETA but that might have other
758 * unintended side-effects (e.g. setting
759 * PG_NOTMETA on the VM page).
761 * Hopefully this will unblock the VM system more
762 * quickly under extreme tmpfs write load.
764 if (vm_page_count_min(vm_page_free_hysteresis))
765 bp->b_flags |= B_DIRECT;
766 bp->b_flags |= B_AGE | B_RELBUF;
767 bp->b_act_count = 0; /* buffer->deactivate pgs */
768 cluster_awrite(bp);
769 } else if (vm_page_count_target()) {
771 * Normal (userland) write but we are low on memory,
772 * run the buffer the buffer cache.
774 bp->b_act_count = 0; /* buffer->deactivate pgs */
775 bdwrite(bp);
776 } else {
778 * Otherwise run the buffer directly through to the
779 * backing VM store.
781 buwrite(bp);
782 /*vm_wait_nominal();*/
785 if (bp->b_error) {
786 kprintf("tmpfs_write bwrite error %d\n", bp->b_error);
787 break;
791 if (error) {
792 if (extended) {
793 (void)tmpfs_reg_resize(vp, oldsize, trivial);
794 kflags &= ~NOTE_EXTEND;
796 goto done;
800 * Currently we don't set the mtime on files modified via mmap()
801 * because we can't tell the difference between those modifications
802 * and an attempt by the pageout daemon to flush tmpfs pages to
803 * swap.
805 * This is because in order to defer flushes as long as possible
806 * buwrite() works by marking the underlying VM pages dirty in
807 * order to be able to dispose of the buffer cache buffer without
808 * flushing it.
810 if (uio->uio_segflg == UIO_NOCOPY) {
811 if (vp->v_flag & VLASTWRITETS) {
812 node->tn_mtime = vp->v_lastwrite_ts.tv_sec;
813 node->tn_mtimensec = vp->v_lastwrite_ts.tv_nsec;
815 } else {
816 node->tn_status |= TMPFS_NODE_MODIFIED;
817 vclrflags(vp, VLASTWRITETS);
820 if (extended)
821 node->tn_status |= TMPFS_NODE_CHANGED;
823 if (node->tn_mode & (S_ISUID | S_ISGID)) {
824 if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0))
825 node->tn_mode &= ~(S_ISUID | S_ISGID);
827 done:
828 TMPFS_NODE_UNLOCK(node);
829 if (kflags)
830 tmpfs_knote(vp, kflags);
832 return(error);
835 static int
836 tmpfs_advlock(struct vop_advlock_args *ap)
838 struct tmpfs_node *node;
839 struct vnode *vp = ap->a_vp;
840 int error;
842 node = VP_TO_TMPFS_NODE(vp);
843 error = (lf_advlock(ap, &node->tn_advlock, node->tn_size));
845 return (error);
849 * The strategy function is typically only called when memory pressure
850 * forces the system to attempt to pageout pages. It can also be called
851 * by [n]vtruncbuf() when a truncation cuts a page in half. Normal write
852 * operations
854 * We set VKVABIO for VREG files so bp->b_data may not be synchronized to
855 * our cpu. swap_pager_strategy() is all we really use, and it directly
856 * supports this.
858 static int
859 tmpfs_strategy(struct vop_strategy_args *ap)
861 struct bio *bio = ap->a_bio;
862 struct bio *nbio;
863 struct buf *bp = bio->bio_buf;
864 struct vnode *vp = ap->a_vp;
865 struct tmpfs_node *node;
866 vm_object_t uobj;
867 vm_page_t m;
868 int i;
870 if (vp->v_type != VREG) {
871 bp->b_resid = bp->b_bcount;
872 bp->b_flags |= B_ERROR | B_INVAL;
873 bp->b_error = EINVAL;
874 biodone(bio);
875 return(0);
878 node = VP_TO_TMPFS_NODE(vp);
880 uobj = node->tn_reg.tn_aobj;
883 * Don't bother flushing to swap if there is no swap, just
884 * ensure that the pages are marked as needing a commit (still).
886 if (bp->b_cmd == BUF_CMD_WRITE && vm_swap_size == 0) {
887 for (i = 0; i < bp->b_xio.xio_npages; ++i) {
888 m = bp->b_xio.xio_pages[i];
889 vm_page_need_commit(m);
891 bp->b_resid = 0;
892 bp->b_error = 0;
893 biodone(bio);
894 } else {
895 nbio = push_bio(bio);
896 nbio->bio_done = tmpfs_strategy_done;
897 nbio->bio_offset = bio->bio_offset;
898 swap_pager_strategy(uobj, nbio);
900 return 0;
904 * If we were unable to commit the pages to swap make sure they are marked
905 * as needing a commit (again). If we were, clear the flag to allow the
906 * pages to be freed.
908 * Do not error-out the buffer. In particular, vinvalbuf() needs to
909 * always work.
911 static void
912 tmpfs_strategy_done(struct bio *bio)
914 struct buf *bp;
915 vm_page_t m;
916 int i;
918 bp = bio->bio_buf;
920 if (bp->b_flags & B_ERROR) {
921 bp->b_flags &= ~B_ERROR;
922 bp->b_error = 0;
923 bp->b_resid = 0;
924 for (i = 0; i < bp->b_xio.xio_npages; ++i) {
925 m = bp->b_xio.xio_pages[i];
926 vm_page_need_commit(m);
928 } else {
929 for (i = 0; i < bp->b_xio.xio_npages; ++i) {
930 m = bp->b_xio.xio_pages[i];
931 vm_page_clear_commit(m);
934 bio = pop_bio(bio);
935 biodone(bio);
938 static int
939 tmpfs_bmap(struct vop_bmap_args *ap)
941 if (ap->a_doffsetp != NULL)
942 *ap->a_doffsetp = ap->a_loffset;
943 if (ap->a_runp != NULL)
944 *ap->a_runp = 0;
945 if (ap->a_runb != NULL)
946 *ap->a_runb = 0;
948 return 0;
951 /* --------------------------------------------------------------------- */
953 static int
954 tmpfs_nremove(struct vop_nremove_args *ap)
956 struct vnode *dvp = ap->a_dvp;
957 struct namecache *ncp = ap->a_nch->ncp;
958 struct vnode *vp;
959 int error;
960 struct tmpfs_dirent *de;
961 struct tmpfs_mount *tmp;
962 struct tmpfs_node *dnode;
963 struct tmpfs_node *node;
966 * We have to acquire the vp from ap->a_nch because we will likely
967 * unresolve the namecache entry, and a vrele/vput is needed to
968 * trigger the tmpfs_inactive/tmpfs_reclaim sequence.
970 * We have to use vget to clear any inactive state on the vnode,
971 * otherwise the vnode may remain inactive and thus tmpfs_inactive
972 * will not get called when we release it.
974 error = cache_vget(ap->a_nch, ap->a_cred, LK_SHARED, &vp);
975 KKASSERT(vp->v_mount == dvp->v_mount);
976 KKASSERT(error == 0);
977 vn_unlock(vp);
979 if (vp->v_type == VDIR) {
980 error = EISDIR;
981 goto out2;
984 dnode = VP_TO_TMPFS_DIR(dvp);
985 node = VP_TO_TMPFS_NODE(vp);
986 tmp = VFS_TO_TMPFS(vp->v_mount);
988 TMPFS_NODE_LOCK(dnode);
989 de = tmpfs_dir_lookup(dnode, node, ncp);
990 if (de == NULL) {
991 error = ENOENT;
992 TMPFS_NODE_UNLOCK(dnode);
993 goto out;
996 /* Files marked as immutable or append-only cannot be deleted. */
997 if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
998 (dnode->tn_flags & APPEND)) {
999 error = EPERM;
1000 TMPFS_NODE_UNLOCK(dnode);
1001 goto out;
1004 /* Remove the entry from the directory; as it is a file, we do not
1005 * have to change the number of hard links of the directory. */
1006 tmpfs_dir_detach(dnode, de);
1007 TMPFS_NODE_UNLOCK(dnode);
1009 /* Free the directory entry we just deleted. Note that the node
1010 * referred by it will not be removed until the vnode is really
1011 * reclaimed. */
1012 tmpfs_free_dirent(tmp, de);
1014 if (node->tn_links > 0) {
1015 TMPFS_NODE_LOCK(node);
1016 node->tn_status |= TMPFS_NODE_CHANGED;
1017 TMPFS_NODE_UNLOCK(node);
1020 cache_unlink(ap->a_nch);
1021 tmpfs_knote(vp, NOTE_DELETE);
1022 error = 0;
1024 out:
1025 if (error == 0)
1026 tmpfs_knote(dvp, NOTE_WRITE);
1027 out2:
1028 vrele(vp);
1030 return error;
1033 /* --------------------------------------------------------------------- */
1035 static int
1036 tmpfs_nlink(struct vop_nlink_args *ap)
1038 struct vnode *dvp = ap->a_dvp;
1039 struct vnode *vp = ap->a_vp;
1040 struct namecache *ncp = ap->a_nch->ncp;
1041 struct tmpfs_dirent *de;
1042 struct tmpfs_node *node;
1043 struct tmpfs_node *dnode;
1044 int error;
1046 KKASSERT(dvp != vp); /* XXX When can this be false? */
1048 node = VP_TO_TMPFS_NODE(vp);
1049 dnode = VP_TO_TMPFS_NODE(dvp);
1050 TMPFS_NODE_LOCK(dnode);
1052 /* XXX: Why aren't the following two tests done by the caller? */
1054 /* Hard links of directories are forbidden. */
1055 if (vp->v_type == VDIR) {
1056 error = EPERM;
1057 goto out;
1060 /* Cannot create cross-device links. */
1061 if (dvp->v_mount != vp->v_mount) {
1062 error = EXDEV;
1063 goto out;
1066 /* Ensure that we do not overflow the maximum number of links imposed
1067 * by the system. */
1068 KKASSERT(node->tn_links <= LINK_MAX);
1069 if (node->tn_links >= LINK_MAX) {
1070 error = EMLINK;
1071 goto out;
1074 /* We cannot create links of files marked immutable or append-only. */
1075 if (node->tn_flags & (IMMUTABLE | APPEND)) {
1076 error = EPERM;
1077 goto out;
1080 /* Allocate a new directory entry to represent the node. */
1081 error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node,
1082 ncp->nc_name, ncp->nc_nlen, &de);
1083 if (error != 0)
1084 goto out;
1086 /* Insert the new directory entry into the appropriate directory. */
1087 tmpfs_dir_attach(dnode, de);
1089 /* vp link count has changed, so update node times. */
1091 TMPFS_NODE_LOCK(node);
1092 node->tn_status |= TMPFS_NODE_CHANGED;
1093 TMPFS_NODE_UNLOCK(node);
1094 tmpfs_update(vp);
1096 tmpfs_knote(vp, NOTE_LINK);
1097 cache_setunresolved(ap->a_nch);
1098 cache_setvp(ap->a_nch, vp);
1099 error = 0;
1101 out:
1102 TMPFS_NODE_UNLOCK(dnode);
1103 if (error == 0)
1104 tmpfs_knote(dvp, NOTE_WRITE);
1105 return error;
1108 /* --------------------------------------------------------------------- */
1110 static int
1111 tmpfs_nrename(struct vop_nrename_args *ap)
1113 struct vnode *fdvp = ap->a_fdvp;
1114 struct namecache *fncp = ap->a_fnch->ncp;
1115 struct vnode *fvp = fncp->nc_vp;
1116 struct vnode *tdvp = ap->a_tdvp;
1117 struct namecache *tncp = ap->a_tnch->ncp;
1118 struct vnode *tvp;
1119 struct tmpfs_dirent *de, *tde;
1120 struct tmpfs_mount *tmp;
1121 struct tmpfs_node *fdnode;
1122 struct tmpfs_node *fnode;
1123 struct tmpfs_node *tnode;
1124 struct tmpfs_node *tdnode;
1125 char *newname;
1126 char *oldname;
1127 int error;
1129 KKASSERT(fdvp->v_mount == fvp->v_mount);
1132 * Because tvp can get overwritten we have to vget it instead of
1133 * just vref or use it, otherwise it's VINACTIVE flag may not get
1134 * cleared and the node won't get destroyed.
1136 error = cache_vget(ap->a_tnch, ap->a_cred, LK_SHARED, &tvp);
1137 if (error == 0) {
1138 tnode = VP_TO_TMPFS_NODE(tvp);
1139 vn_unlock(tvp);
1140 } else {
1141 tnode = NULL;
1144 /* Disallow cross-device renames.
1145 * XXX Why isn't this done by the caller? */
1146 if (fvp->v_mount != tdvp->v_mount ||
1147 (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
1148 error = EXDEV;
1149 goto out;
1152 tmp = VFS_TO_TMPFS(tdvp->v_mount);
1153 tdnode = VP_TO_TMPFS_DIR(tdvp);
1155 /* If source and target are the same file, there is nothing to do. */
1156 if (fvp == tvp) {
1157 error = 0;
1158 goto out;
1161 fdnode = VP_TO_TMPFS_DIR(fdvp);
1162 fnode = VP_TO_TMPFS_NODE(fvp);
1163 TMPFS_NODE_LOCK(fdnode);
1164 de = tmpfs_dir_lookup(fdnode, fnode, fncp);
1165 TMPFS_NODE_UNLOCK(fdnode); /* XXX depend on namecache lock */
1167 /* Avoid manipulating '.' and '..' entries. */
1168 if (de == NULL) {
1169 error = ENOENT;
1170 goto out_locked;
1172 KKASSERT(de->td_node == fnode);
1175 * If replacing an entry in the target directory and that entry
1176 * is a directory, it must be empty.
1178 * Kern_rename gurantees the destination to be a directory
1179 * if the source is one (it does?).
1181 if (tvp != NULL) {
1182 KKASSERT(tnode != NULL);
1184 if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1185 (tdnode->tn_flags & (APPEND | IMMUTABLE))) {
1186 error = EPERM;
1187 goto out_locked;
1190 if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) {
1191 if (tnode->tn_size > 0) {
1192 error = ENOTEMPTY;
1193 goto out_locked;
1195 } else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) {
1196 error = ENOTDIR;
1197 goto out_locked;
1198 } else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) {
1199 error = EISDIR;
1200 goto out_locked;
1201 } else {
1202 KKASSERT(fnode->tn_type != VDIR &&
1203 tnode->tn_type != VDIR);
1207 if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1208 (fdnode->tn_flags & (APPEND | IMMUTABLE))) {
1209 error = EPERM;
1210 goto out_locked;
1214 * Ensure that we have enough memory to hold the new name, if it
1215 * has to be changed.
1217 if (fncp->nc_nlen != tncp->nc_nlen ||
1218 bcmp(fncp->nc_name, tncp->nc_name, fncp->nc_nlen) != 0) {
1219 newname = kmalloc(tncp->nc_nlen + 1, tmp->tm_name_zone,
1220 M_WAITOK | M_NULLOK);
1221 if (newname == NULL) {
1222 error = ENOSPC;
1223 goto out_locked;
1225 bcopy(tncp->nc_name, newname, tncp->nc_nlen);
1226 newname[tncp->nc_nlen] = '\0';
1227 } else {
1228 newname = NULL;
1232 * Unlink entry from source directory. Note that the kernel has
1233 * already checked for illegal recursion cases (renaming a directory
1234 * into a subdirectory of itself).
1236 if (fdnode != tdnode) {
1237 tmpfs_dir_detach(fdnode, de);
1238 } else {
1239 /* XXX depend on namecache lock */
1240 TMPFS_NODE_LOCK(fdnode);
1241 KKASSERT(de == tmpfs_dir_lookup(fdnode, fnode, fncp));
1242 RB_REMOVE(tmpfs_dirtree, &fdnode->tn_dir.tn_dirtree, de);
1243 RB_REMOVE(tmpfs_dirtree_cookie,
1244 &fdnode->tn_dir.tn_cookietree, de);
1245 TMPFS_NODE_UNLOCK(fdnode);
1249 * Handle any name change. Swap with newname, we will
1250 * deallocate it at the end.
1252 if (newname != NULL) {
1253 #if 0
1254 TMPFS_NODE_LOCK(fnode);
1255 fnode->tn_status |= TMPFS_NODE_CHANGED;
1256 TMPFS_NODE_UNLOCK(fnode);
1257 #endif
1258 oldname = de->td_name;
1259 de->td_name = newname;
1260 de->td_namelen = (uint16_t)tncp->nc_nlen;
1261 newname = oldname;
1265 * If we are overwriting an entry, we have to remove the old one
1266 * from the target directory.
1268 if (tvp != NULL) {
1269 /* Remove the old entry from the target directory. */
1270 TMPFS_NODE_LOCK(tdnode);
1271 tde = tmpfs_dir_lookup(tdnode, tnode, tncp);
1272 tmpfs_dir_detach(tdnode, tde);
1273 TMPFS_NODE_UNLOCK(tdnode);
1274 tmpfs_knote(tdnode->tn_vnode, NOTE_DELETE);
1277 * Free the directory entry we just deleted. Note that the
1278 * node referred by it will not be removed until the vnode is
1279 * really reclaimed.
1281 tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), tde);
1282 /*cache_inval_vp(tvp, CINV_DESTROY);*/
1286 * Link entry to target directory. If the entry
1287 * represents a directory move the parent linkage
1288 * as well.
1290 if (fdnode != tdnode) {
1291 if (de->td_node->tn_type == VDIR) {
1292 TMPFS_VALIDATE_DIR(fnode);
1294 tmpfs_dir_attach(tdnode, de);
1295 } else {
1296 TMPFS_NODE_LOCK(tdnode);
1297 tdnode->tn_status |= TMPFS_NODE_MODIFIED;
1298 RB_INSERT(tmpfs_dirtree, &tdnode->tn_dir.tn_dirtree, de);
1299 RB_INSERT(tmpfs_dirtree_cookie,
1300 &tdnode->tn_dir.tn_cookietree, de);
1301 TMPFS_NODE_UNLOCK(tdnode);
1305 * Finish up
1307 if (newname) {
1308 kfree(newname, tmp->tm_name_zone);
1309 newname = NULL;
1311 cache_rename(ap->a_fnch, ap->a_tnch);
1312 tmpfs_knote(ap->a_fdvp, NOTE_WRITE);
1313 tmpfs_knote(ap->a_tdvp, NOTE_WRITE);
1314 if (fnode->tn_vnode)
1315 tmpfs_knote(fnode->tn_vnode, NOTE_RENAME);
1316 error = 0;
1318 out_locked:
1320 out:
1321 if (tvp)
1322 vrele(tvp);
1323 return error;
1326 /* --------------------------------------------------------------------- */
1328 static int
1329 tmpfs_nmkdir(struct vop_nmkdir_args *ap)
1331 struct vnode *dvp = ap->a_dvp;
1332 struct vnode **vpp = ap->a_vpp;
1333 struct namecache *ncp = ap->a_nch->ncp;
1334 struct vattr *vap = ap->a_vap;
1335 struct ucred *cred = ap->a_cred;
1336 int error;
1338 KKASSERT(vap->va_type == VDIR);
1340 error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
1341 if (error == 0) {
1342 cache_setunresolved(ap->a_nch);
1343 cache_setvp(ap->a_nch, *vpp);
1344 tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
1346 return error;
1349 /* --------------------------------------------------------------------- */
1351 static int
1352 tmpfs_nrmdir(struct vop_nrmdir_args *ap)
1354 struct vnode *dvp = ap->a_dvp;
1355 struct namecache *ncp = ap->a_nch->ncp;
1356 struct vnode *vp;
1357 struct tmpfs_dirent *de;
1358 struct tmpfs_mount *tmp;
1359 struct tmpfs_node *dnode;
1360 struct tmpfs_node *node;
1361 int error;
1364 * We have to acquire the vp from ap->a_nch because we will likely
1365 * unresolve the namecache entry, and a vrele/vput is needed to
1366 * trigger the tmpfs_inactive/tmpfs_reclaim sequence.
1368 * We have to use vget to clear any inactive state on the vnode,
1369 * otherwise the vnode may remain inactive and thus tmpfs_inactive
1370 * will not get called when we release it.
1372 error = cache_vget(ap->a_nch, ap->a_cred, LK_SHARED, &vp);
1373 KKASSERT(error == 0);
1374 vn_unlock(vp);
1377 * Prevalidate so we don't hit an assertion later
1379 if (vp->v_type != VDIR) {
1380 error = ENOTDIR;
1381 goto out;
1384 tmp = VFS_TO_TMPFS(dvp->v_mount);
1385 dnode = VP_TO_TMPFS_DIR(dvp);
1386 node = VP_TO_TMPFS_DIR(vp);
1389 * Directories with more than two entries ('.' and '..') cannot
1390 * be removed.
1392 if (node->tn_size > 0) {
1393 error = ENOTEMPTY;
1394 goto out;
1397 if ((dnode->tn_flags & APPEND)
1398 || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
1399 error = EPERM;
1400 goto out;
1404 * This invariant holds only if we are not trying to
1405 * remove "..". We checked for that above so this is safe now.
1407 KKASSERT(node->tn_dir.tn_parent == dnode);
1410 * Get the directory entry associated with node (vp). This
1411 * was filled by tmpfs_lookup while looking up the entry.
1413 TMPFS_NODE_LOCK(dnode);
1414 de = tmpfs_dir_lookup(dnode, node, ncp);
1415 KKASSERT(TMPFS_DIRENT_MATCHES(de, ncp->nc_name, ncp->nc_nlen));
1417 /* Check flags to see if we are allowed to remove the directory. */
1418 if ((dnode->tn_flags & APPEND) ||
1419 node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) {
1420 error = EPERM;
1421 TMPFS_NODE_UNLOCK(dnode);
1422 goto out;
1425 /* Detach the directory entry from the directory (dnode). */
1426 tmpfs_dir_detach(dnode, de);
1427 TMPFS_NODE_UNLOCK(dnode);
1429 /* No vnode should be allocated for this entry from this point */
1430 TMPFS_NODE_LOCK(dnode);
1431 TMPFS_ASSERT_ELOCKED(dnode);
1432 TMPFS_NODE_LOCK(node);
1433 TMPFS_ASSERT_ELOCKED(node);
1436 * Must set parent linkage to NULL (tested by ncreate to disallow
1437 * the creation of new files/dirs in a deleted directory)
1439 node->tn_status |= TMPFS_NODE_CHANGED;
1441 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED |
1442 TMPFS_NODE_MODIFIED;
1444 TMPFS_NODE_UNLOCK(node);
1445 TMPFS_NODE_UNLOCK(dnode);
1447 /* Free the directory entry we just deleted. Note that the node
1448 * referred by it will not be removed until the vnode is really
1449 * reclaimed. */
1450 tmpfs_free_dirent(tmp, de);
1452 /* Release the deleted vnode (will destroy the node, notify
1453 * interested parties and clean it from the cache). */
1455 TMPFS_NODE_LOCK(dnode);
1456 dnode->tn_status |= TMPFS_NODE_CHANGED;
1457 TMPFS_NODE_UNLOCK(dnode);
1458 tmpfs_update(dvp);
1460 cache_unlink(ap->a_nch);
1461 tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
1462 error = 0;
1464 out:
1465 vrele(vp);
1467 return error;
1470 /* --------------------------------------------------------------------- */
1472 static int
1473 tmpfs_nsymlink(struct vop_nsymlink_args *ap)
1475 struct vnode *dvp = ap->a_dvp;
1476 struct vnode **vpp = ap->a_vpp;
1477 struct namecache *ncp = ap->a_nch->ncp;
1478 struct vattr *vap = ap->a_vap;
1479 struct ucred *cred = ap->a_cred;
1480 char *target = ap->a_target;
1481 int error;
1483 vap->va_type = VLNK;
1484 error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, target);
1485 if (error == 0) {
1486 tmpfs_knote(*vpp, NOTE_WRITE);
1487 cache_setunresolved(ap->a_nch);
1488 cache_setvp(ap->a_nch, *vpp);
1490 return error;
1493 /* --------------------------------------------------------------------- */
1495 static int
1496 tmpfs_readdir(struct vop_readdir_args *ap)
1498 struct vnode *vp = ap->a_vp;
1499 struct uio *uio = ap->a_uio;
1500 int *eofflag = ap->a_eofflag;
1501 off_t **cookies = ap->a_cookies;
1502 int *ncookies = ap->a_ncookies;
1503 struct tmpfs_mount *tmp;
1504 int error;
1505 off_t startoff;
1506 off_t cnt = 0;
1507 struct tmpfs_node *node;
1509 /* This operation only makes sense on directory nodes. */
1510 if (vp->v_type != VDIR) {
1511 return ENOTDIR;
1514 tmp = VFS_TO_TMPFS(vp->v_mount);
1515 node = VP_TO_TMPFS_DIR(vp);
1516 startoff = uio->uio_offset;
1518 if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) {
1519 error = tmpfs_dir_getdotdent(node, uio);
1520 if (error != 0) {
1521 TMPFS_NODE_LOCK_SH(node);
1522 goto outok;
1524 cnt++;
1527 if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) {
1528 /* may lock parent, cannot hold node lock */
1529 error = tmpfs_dir_getdotdotdent(tmp, node, uio);
1530 if (error != 0) {
1531 TMPFS_NODE_LOCK_SH(node);
1532 goto outok;
1534 cnt++;
1537 TMPFS_NODE_LOCK_SH(node);
1538 error = tmpfs_dir_getdents(node, uio, &cnt);
1540 outok:
1541 KKASSERT(error >= -1);
1543 if (error == -1)
1544 error = 0;
1546 if (eofflag != NULL)
1547 *eofflag =
1548 (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
1550 /* Update NFS-related variables. */
1551 if (error == 0 && cookies != NULL && ncookies != NULL) {
1552 off_t i;
1553 off_t off = startoff;
1554 struct tmpfs_dirent *de = NULL;
1556 *ncookies = cnt;
1557 *cookies = kmalloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
1559 for (i = 0; i < cnt; i++) {
1560 KKASSERT(off != TMPFS_DIRCOOKIE_EOF);
1561 if (off == TMPFS_DIRCOOKIE_DOT) {
1562 off = TMPFS_DIRCOOKIE_DOTDOT;
1563 } else {
1564 if (off == TMPFS_DIRCOOKIE_DOTDOT) {
1565 de = RB_MIN(tmpfs_dirtree_cookie,
1566 &node->tn_dir.tn_cookietree);
1567 } else if (de != NULL) {
1568 de = RB_NEXT(tmpfs_dirtree_cookie,
1569 &node->tn_dir.tn_cookietree, de);
1570 } else {
1571 de = tmpfs_dir_lookupbycookie(node,
1572 off);
1573 KKASSERT(de != NULL);
1574 de = RB_NEXT(tmpfs_dirtree_cookie,
1575 &node->tn_dir.tn_cookietree, de);
1577 if (de == NULL)
1578 off = TMPFS_DIRCOOKIE_EOF;
1579 else
1580 off = tmpfs_dircookie(de);
1582 (*cookies)[i] = off;
1584 KKASSERT(uio->uio_offset == off);
1586 TMPFS_NODE_UNLOCK(node);
1588 if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) {
1589 TMPFS_NODE_LOCK(node);
1590 node->tn_status |= TMPFS_NODE_ACCESSED;
1591 TMPFS_NODE_UNLOCK(node);
1593 return error;
1596 /* --------------------------------------------------------------------- */
1598 static int
1599 tmpfs_readlink(struct vop_readlink_args *ap)
1601 struct vnode *vp = ap->a_vp;
1602 struct uio *uio = ap->a_uio;
1603 int error;
1604 struct tmpfs_node *node;
1606 KKASSERT(uio->uio_offset == 0);
1607 KKASSERT(vp->v_type == VLNK);
1609 node = VP_TO_TMPFS_NODE(vp);
1610 TMPFS_NODE_LOCK_SH(node);
1611 error = uiomove(node->tn_link,
1612 MIN(node->tn_size, uio->uio_resid), uio);
1613 TMPFS_NODE_UNLOCK(node);
1614 if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) {
1615 TMPFS_NODE_LOCK(node);
1616 node->tn_status |= TMPFS_NODE_ACCESSED;
1617 TMPFS_NODE_UNLOCK(node);
1619 return error;
1622 /* --------------------------------------------------------------------- */
1624 static int
1625 tmpfs_inactive(struct vop_inactive_args *ap)
1627 struct vnode *vp = ap->a_vp;
1628 struct tmpfs_node *node;
1629 struct mount *mp;
1631 mp = vp->v_mount;
1632 lwkt_gettoken(&mp->mnt_token);
1633 node = VP_TO_TMPFS_NODE(vp);
1636 * Degenerate case
1638 if (node == NULL) {
1639 vrecycle(vp);
1640 lwkt_reltoken(&mp->mnt_token);
1641 return(0);
1645 * Get rid of unreferenced deleted vnodes sooner rather than
1646 * later so the data memory can be recovered immediately.
1648 * We must truncate the vnode to prevent the normal reclamation
1649 * path from flushing the data for the removed file to disk.
1651 TMPFS_NODE_LOCK(node);
1652 if ((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0 &&
1653 node->tn_links == 0)
1655 node->tn_vpstate = TMPFS_VNODE_DOOMED;
1656 TMPFS_NODE_UNLOCK(node);
1657 if (node->tn_type == VREG)
1658 tmpfs_truncate(vp, 0);
1659 vrecycle(vp);
1660 } else {
1662 * We must retain any VM pages belonging to the vnode's
1663 * object as the vnode will destroy the object during a
1664 * later reclaim. We call vinvalbuf(V_SAVE) to clean
1665 * out the buffer cache.
1667 * On DragonFlyBSD, vnodes are not immediately deactivated
1668 * on the 1->0 refs, so this is a relatively optimal
1669 * operation. We have to do this in tmpfs_inactive()
1670 * because the pages will have already been thrown away
1671 * at the time tmpfs_reclaim() is called.
1673 if (node->tn_type == VREG &&
1674 node->tn_reg.tn_pages_in_aobj == 0) {
1675 vinvalbuf(vp, V_SAVE, 0, 0);
1676 KKASSERT(RB_EMPTY(&vp->v_rbdirty_tree));
1677 KKASSERT(RB_EMPTY(&vp->v_rbclean_tree));
1678 tmpfs_move_pages(vp->v_object, node->tn_reg.tn_aobj);
1679 node->tn_reg.tn_pages_in_aobj = 1;
1682 TMPFS_NODE_UNLOCK(node);
1684 lwkt_reltoken(&mp->mnt_token);
1686 return 0;
1689 /* --------------------------------------------------------------------- */
1692 tmpfs_reclaim(struct vop_reclaim_args *ap)
1694 struct vnode *vp = ap->a_vp;
1695 struct tmpfs_mount *tmp;
1696 struct tmpfs_node *node;
1697 struct mount *mp;
1699 mp = vp->v_mount;
1700 lwkt_gettoken(&mp->mnt_token);
1702 node = VP_TO_TMPFS_NODE(vp);
1703 tmp = VFS_TO_TMPFS(vp->v_mount);
1704 KKASSERT(mp == tmp->tm_mount);
1706 tmpfs_free_vp(vp);
1709 * If the node referenced by this vnode was deleted by the
1710 * user, we must free its associated data structures now that
1711 * the vnode is being reclaimed.
1713 * Directories have an extra link ref.
1715 TMPFS_NODE_LOCK(node);
1716 if ((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0 &&
1717 node->tn_links == 0) {
1718 node->tn_vpstate = TMPFS_VNODE_DOOMED;
1719 tmpfs_free_node(tmp, node);
1720 /* eats the lock */
1721 } else {
1722 TMPFS_NODE_UNLOCK(node);
1724 lwkt_reltoken(&mp->mnt_token);
1726 KKASSERT(vp->v_data == NULL);
1727 return 0;
1730 /* --------------------------------------------------------------------- */
1732 static int
1733 tmpfs_mountctl(struct vop_mountctl_args *ap)
1735 struct tmpfs_mount *tmp;
1736 struct mount *mp;
1737 int rc;
1739 mp = ap->a_head.a_ops->head.vv_mount;
1740 lwkt_gettoken(&mp->mnt_token);
1742 switch (ap->a_op) {
1743 case (MOUNTCTL_SET_EXPORT):
1744 tmp = (struct tmpfs_mount *) mp->mnt_data;
1746 if (ap->a_ctllen != sizeof(struct export_args))
1747 rc = (EINVAL);
1748 else
1749 rc = vfs_export(mp, &tmp->tm_export,
1750 (const struct export_args *) ap->a_ctl);
1751 break;
1752 default:
1753 rc = vop_stdmountctl(ap);
1754 break;
1757 lwkt_reltoken(&mp->mnt_token);
1758 return (rc);
1761 /* --------------------------------------------------------------------- */
1763 static int
1764 tmpfs_print(struct vop_print_args *ap)
1766 struct vnode *vp = ap->a_vp;
1768 struct tmpfs_node *node;
1770 node = VP_TO_TMPFS_NODE(vp);
1772 kprintf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n",
1773 node, node->tn_flags, node->tn_links);
1774 kprintf("\tmode 0%o, owner %d, group %d, size %ju, status 0x%x\n",
1775 node->tn_mode, node->tn_uid, node->tn_gid,
1776 (uintmax_t)node->tn_size, node->tn_status);
1778 if (vp->v_type == VFIFO)
1779 fifo_printinfo(vp);
1781 kprintf("\n");
1783 return 0;
1786 /* --------------------------------------------------------------------- */
1788 static int
1789 tmpfs_pathconf(struct vop_pathconf_args *ap)
1791 struct vnode *vp = ap->a_vp;
1792 int name = ap->a_name;
1793 register_t *retval = ap->a_retval;
1794 struct tmpfs_mount *tmp;
1795 int error;
1797 error = 0;
1799 switch (name) {
1800 case _PC_CHOWN_RESTRICTED:
1801 *retval = 1;
1802 break;
1804 case _PC_FILESIZEBITS:
1805 tmp = VFS_TO_TMPFS(vp->v_mount);
1806 *retval = max(32, flsll(tmp->tm_pages_max * PAGE_SIZE) + 1);
1807 break;
1809 case _PC_LINK_MAX:
1810 *retval = LINK_MAX;
1811 break;
1813 case _PC_NAME_MAX:
1814 *retval = NAME_MAX;
1815 break;
1817 case _PC_NO_TRUNC:
1818 *retval = 1;
1819 break;
1821 case _PC_PATH_MAX:
1822 *retval = PATH_MAX;
1823 break;
1825 case _PC_PIPE_BUF:
1826 *retval = PIPE_BUF;
1827 break;
1829 case _PC_SYNC_IO:
1830 *retval = 1;
1831 break;
1833 case _PC_2_SYMLINKS:
1834 *retval = 1;
1835 break;
1837 default:
1838 error = EINVAL;
1841 return error;
1844 /************************************************************************
1845 * KQFILTER OPS *
1846 ************************************************************************/
1848 static void filt_tmpfsdetach(struct knote *kn);
1849 static int filt_tmpfsread(struct knote *kn, long hint);
1850 static int filt_tmpfswrite(struct knote *kn, long hint);
1851 static int filt_tmpfsvnode(struct knote *kn, long hint);
1853 static struct filterops tmpfsread_filtops =
1854 { FILTEROP_ISFD | FILTEROP_MPSAFE,
1855 NULL, filt_tmpfsdetach, filt_tmpfsread };
1856 static struct filterops tmpfswrite_filtops =
1857 { FILTEROP_ISFD | FILTEROP_MPSAFE,
1858 NULL, filt_tmpfsdetach, filt_tmpfswrite };
1859 static struct filterops tmpfsvnode_filtops =
1860 { FILTEROP_ISFD | FILTEROP_MPSAFE,
1861 NULL, filt_tmpfsdetach, filt_tmpfsvnode };
1863 static int
1864 tmpfs_kqfilter (struct vop_kqfilter_args *ap)
1866 struct vnode *vp = ap->a_vp;
1867 struct knote *kn = ap->a_kn;
1869 switch (kn->kn_filter) {
1870 case EVFILT_READ:
1871 kn->kn_fop = &tmpfsread_filtops;
1872 break;
1873 case EVFILT_WRITE:
1874 kn->kn_fop = &tmpfswrite_filtops;
1875 break;
1876 case EVFILT_VNODE:
1877 kn->kn_fop = &tmpfsvnode_filtops;
1878 break;
1879 default:
1880 return (EOPNOTSUPP);
1883 kn->kn_hook = (caddr_t)vp;
1885 knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
1887 return(0);
1890 static void
1891 filt_tmpfsdetach(struct knote *kn)
1893 struct vnode *vp = (void *)kn->kn_hook;
1895 knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
1898 static int
1899 filt_tmpfsread(struct knote *kn, long hint)
1901 struct vnode *vp = (void *)kn->kn_hook;
1902 struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp);
1903 off_t off;
1905 if (hint == NOTE_REVOKE) {
1906 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
1907 return(1);
1911 * Interlock against MP races when performing this function.
1913 TMPFS_NODE_LOCK_SH(node);
1914 off = node->tn_size - kn->kn_fp->f_offset;
1915 kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
1916 if (kn->kn_sfflags & NOTE_OLDAPI) {
1917 TMPFS_NODE_UNLOCK(node);
1918 return(1);
1920 if (kn->kn_data == 0) {
1921 kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
1923 TMPFS_NODE_UNLOCK(node);
1924 return (kn->kn_data != 0);
1927 static int
1928 filt_tmpfswrite(struct knote *kn, long hint)
1930 if (hint == NOTE_REVOKE)
1931 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
1932 kn->kn_data = 0;
1933 return (1);
1936 static int
1937 filt_tmpfsvnode(struct knote *kn, long hint)
1939 if (kn->kn_sfflags & hint)
1940 kn->kn_fflags |= hint;
1941 if (hint == NOTE_REVOKE) {
1942 kn->kn_flags |= (EV_EOF | EV_NODATA);
1943 return (1);
1945 return (kn->kn_fflags != 0);
1949 * Helper to move VM pages between objects
1951 * NOTE: The vm_page_rename() dirties the page, so we can clear the
1952 * PG_NEED_COMMIT flag. If the pages are being moved into tn_aobj,
1953 * the pageout daemon will be able to page them out.
1955 static int
1956 tmpfs_move_pages_callback(vm_page_t p, void *data)
1958 struct rb_vm_page_scan_info *info = data;
1959 vm_pindex_t pindex;
1961 pindex = p->pindex;
1962 if (vm_page_busy_try(p, TRUE)) {
1963 vm_page_sleep_busy(p, TRUE, "tpgmov");
1964 info->error = -1;
1965 return -1;
1967 if (p->object != info->object || p->pindex != pindex) {
1968 vm_page_wakeup(p);
1969 info->error = -1;
1970 return -1;
1972 vm_page_rename(p, info->dest_object, pindex);
1973 vm_page_clear_commit(p);
1974 vm_page_wakeup(p);
1975 /* page automaticaly made dirty */
1977 return 0;
1980 static
1981 void
1982 tmpfs_move_pages(vm_object_t src, vm_object_t dst)
1984 struct rb_vm_page_scan_info info;
1986 vm_object_hold(src);
1987 vm_object_hold(dst);
1988 info.object = src;
1989 info.dest_object = dst;
1990 do {
1991 info.error = 1;
1992 vm_page_rb_tree_RB_SCAN(&src->rb_memq, NULL,
1993 tmpfs_move_pages_callback, &info);
1994 } while (info.error < 0);
1995 vm_object_drop(dst);
1996 vm_object_drop(src);
1999 /* --------------------------------------------------------------------- */
2002 * vnode operations vector used for files stored in a tmpfs file system.
2004 struct vop_ops tmpfs_vnode_vops = {
2005 .vop_default = vop_defaultop,
2006 .vop_getpages = vop_stdgetpages,
2007 .vop_putpages = vop_stdputpages,
2008 .vop_ncreate = tmpfs_ncreate,
2009 .vop_nresolve = tmpfs_nresolve,
2010 .vop_nlookupdotdot = tmpfs_nlookupdotdot,
2011 .vop_nmknod = tmpfs_nmknod,
2012 .vop_open = tmpfs_open,
2013 .vop_close = tmpfs_close,
2014 .vop_access = tmpfs_access,
2015 .vop_getattr = tmpfs_getattr,
2016 .vop_setattr = tmpfs_setattr,
2017 .vop_read = tmpfs_read,
2018 .vop_write = tmpfs_write,
2019 .vop_fsync = tmpfs_fsync,
2020 .vop_mountctl = tmpfs_mountctl,
2021 .vop_nremove = tmpfs_nremove,
2022 .vop_nlink = tmpfs_nlink,
2023 .vop_nrename = tmpfs_nrename,
2024 .vop_nmkdir = tmpfs_nmkdir,
2025 .vop_nrmdir = tmpfs_nrmdir,
2026 .vop_nsymlink = tmpfs_nsymlink,
2027 .vop_readdir = tmpfs_readdir,
2028 .vop_readlink = tmpfs_readlink,
2029 .vop_inactive = tmpfs_inactive,
2030 .vop_reclaim = tmpfs_reclaim,
2031 .vop_print = tmpfs_print,
2032 .vop_pathconf = tmpfs_pathconf,
2033 .vop_bmap = tmpfs_bmap,
2034 .vop_strategy = tmpfs_strategy,
2035 .vop_advlock = tmpfs_advlock,
2036 .vop_kqfilter = tmpfs_kqfilter