2 * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
5 * This code is derived from software contributed to The NetBSD Foundation
6 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
30 * $NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $
34 * tmpfs vnode interface.
37 #include <sys/kernel.h>
38 #include <sys/kern_syscall.h>
39 #include <sys/param.h>
40 #include <sys/fcntl.h>
41 #include <sys/lockf.h>
44 #include <sys/resourcevar.h>
45 #include <sys/sched.h>
47 #include <sys/systm.h>
48 #include <sys/sysctl.h>
49 #include <sys/unistd.h>
50 #include <sys/vfsops.h>
51 #include <sys/vnode.h>
52 #include <sys/mountctl.h>
55 #include <vm/vm_extern.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_pageout.h>
59 #include <vm/vm_pager.h>
60 #include <vm/swap_pager.h>
63 #include <vm/vm_page2.h>
65 #include <vfs/fifofs/fifo.h>
66 #include <vfs/tmpfs/tmpfs_vnops.h>
69 static void tmpfs_strategy_done(struct bio
*bio
);
70 static void tmpfs_move_pages(vm_object_t src
, vm_object_t dst
);
72 static int tmpfs_cluster_enable
= 1;
73 SYSCTL_NODE(_vfs
, OID_AUTO
, tmpfs
, CTLFLAG_RW
, 0, "TMPFS filesystem");
74 SYSCTL_INT(_vfs_tmpfs
, OID_AUTO
, cluster_enable
, CTLFLAG_RW
,
75 &tmpfs_cluster_enable
, 0, "");
79 tmpfs_knote(struct vnode
*vp
, int flags
)
82 KNOTE(&vp
->v_pollinfo
.vpi_kqinfo
.ki_note
, flags
);
86 /* --------------------------------------------------------------------- */
89 tmpfs_nresolve(struct vop_nresolve_args
*ap
)
91 struct vnode
*dvp
= ap
->a_dvp
;
92 struct vnode
*vp
= NULL
;
93 struct namecache
*ncp
= ap
->a_nch
->ncp
;
94 struct tmpfs_node
*tnode
;
95 struct tmpfs_dirent
*de
;
96 struct tmpfs_node
*dnode
;
99 dnode
= VP_TO_TMPFS_DIR(dvp
);
101 TMPFS_NODE_LOCK_SH(dnode
);
103 de
= tmpfs_dir_lookup(dnode
, NULL
, ncp
);
108 * Allocate a vnode for the node we found. Use
109 * tmpfs_alloc_vp()'s deadlock handling mode.
112 error
= tmpfs_alloc_vp(dvp
->v_mount
, dnode
, tnode
,
113 LK_EXCLUSIVE
| LK_RETRY
, &vp
);
122 TMPFS_NODE_UNLOCK(dnode
);
124 if ((dnode
->tn_status
& TMPFS_NODE_ACCESSED
) == 0) {
125 TMPFS_NODE_LOCK(dnode
);
126 dnode
->tn_status
|= TMPFS_NODE_ACCESSED
;
127 TMPFS_NODE_UNLOCK(dnode
);
131 * Store the result of this lookup in the cache. Avoid this if the
132 * request was for creation, as it does not improve timings on
137 cache_setvp(ap
->a_nch
, vp
);
139 } else if (error
== ENOENT
) {
140 cache_setvp(ap
->a_nch
, NULL
);
146 tmpfs_nlookupdotdot(struct vop_nlookupdotdot_args
*ap
)
148 struct vnode
*dvp
= ap
->a_dvp
;
149 struct vnode
**vpp
= ap
->a_vpp
;
150 struct tmpfs_node
*dnode
= VP_TO_TMPFS_NODE(dvp
);
151 struct ucred
*cred
= ap
->a_cred
;
156 /* Check accessibility of requested node as a first step. */
157 error
= VOP_ACCESS(dvp
, VEXEC
, cred
);
161 if (dnode
->tn_dir
.tn_parent
!= NULL
) {
162 /* Allocate a new vnode on the matching entry. */
163 error
= tmpfs_alloc_vp(dvp
->v_mount
,
164 NULL
, dnode
->tn_dir
.tn_parent
,
165 LK_EXCLUSIVE
| LK_RETRY
, vpp
);
170 return (*vpp
== NULL
) ? ENOENT
: 0;
173 /* --------------------------------------------------------------------- */
176 tmpfs_ncreate(struct vop_ncreate_args
*ap
)
178 struct vnode
*dvp
= ap
->a_dvp
;
179 struct vnode
**vpp
= ap
->a_vpp
;
180 struct namecache
*ncp
= ap
->a_nch
->ncp
;
181 struct vattr
*vap
= ap
->a_vap
;
182 struct ucred
*cred
= ap
->a_cred
;
185 KKASSERT(vap
->va_type
== VREG
|| vap
->va_type
== VSOCK
);
187 error
= tmpfs_alloc_file(dvp
, vpp
, vap
, ncp
, cred
, NULL
);
189 cache_setunresolved(ap
->a_nch
);
190 cache_setvp(ap
->a_nch
, *vpp
);
191 tmpfs_knote(dvp
, NOTE_WRITE
);
195 /* --------------------------------------------------------------------- */
198 tmpfs_nmknod(struct vop_nmknod_args
*ap
)
200 struct vnode
*dvp
= ap
->a_dvp
;
201 struct vnode
**vpp
= ap
->a_vpp
;
202 struct namecache
*ncp
= ap
->a_nch
->ncp
;
203 struct vattr
*vap
= ap
->a_vap
;
204 struct ucred
*cred
= ap
->a_cred
;
207 if (vap
->va_type
!= VBLK
&& vap
->va_type
!= VCHR
&&
208 vap
->va_type
!= VFIFO
) {
212 error
= tmpfs_alloc_file(dvp
, vpp
, vap
, ncp
, cred
, NULL
);
214 cache_setunresolved(ap
->a_nch
);
215 cache_setvp(ap
->a_nch
, *vpp
);
216 tmpfs_knote(dvp
, NOTE_WRITE
);
221 /* --------------------------------------------------------------------- */
224 tmpfs_open(struct vop_open_args
*ap
)
226 struct vnode
*vp
= ap
->a_vp
;
227 int mode
= ap
->a_mode
;
228 struct tmpfs_node
*node
;
231 node
= VP_TO_TMPFS_NODE(vp
);
234 /* The file is still active but all its names have been removed
235 * (e.g. by a "rmdir $(pwd)"). It cannot be opened any more as
236 * it is about to die. */
237 if (node
->tn_links
< 1)
241 /* If the file is marked append-only, deny write requests. */
242 if ((node
->tn_flags
& APPEND
) &&
243 (mode
& (FWRITE
| O_APPEND
)) == FWRITE
) {
246 if (node
->tn_reg
.tn_pages_in_aobj
) {
247 TMPFS_NODE_LOCK(node
);
248 if (node
->tn_reg
.tn_pages_in_aobj
) {
249 tmpfs_move_pages(node
->tn_reg
.tn_aobj
,
251 node
->tn_reg
.tn_pages_in_aobj
= 0;
253 TMPFS_NODE_UNLOCK(node
);
255 error
= vop_stdopen(ap
);
261 /* --------------------------------------------------------------------- */
264 tmpfs_close(struct vop_close_args
*ap
)
266 struct vnode
*vp
= ap
->a_vp
;
267 struct tmpfs_node
*node
;
270 node
= VP_TO_TMPFS_NODE(vp
);
272 if (node
->tn_links
> 0) {
274 * Update node times. No need to do it if the node has
275 * been deleted, because it will vanish after we return.
280 error
= vop_stdclose(ap
);
285 /* --------------------------------------------------------------------- */
288 tmpfs_access(struct vop_access_args
*ap
)
290 struct vnode
*vp
= ap
->a_vp
;
292 struct tmpfs_node
*node
;
294 node
= VP_TO_TMPFS_NODE(vp
);
296 switch (vp
->v_type
) {
302 if ((ap
->a_mode
& VWRITE
) &&
303 (vp
->v_mount
->mnt_flag
& MNT_RDONLY
)) {
323 if ((ap
->a_mode
& VWRITE
) && (node
->tn_flags
& IMMUTABLE
)) {
328 error
= vop_helper_access(ap
, node
->tn_uid
, node
->tn_gid
,
334 /* --------------------------------------------------------------------- */
337 tmpfs_getattr(struct vop_getattr_args
*ap
)
339 struct vnode
*vp
= ap
->a_vp
;
340 struct vattr
*vap
= ap
->a_vap
;
341 struct tmpfs_node
*node
;
343 node
= VP_TO_TMPFS_NODE(vp
);
347 TMPFS_NODE_LOCK_SH(node
);
348 vap
->va_type
= vp
->v_type
;
349 vap
->va_mode
= node
->tn_mode
;
350 vap
->va_nlink
= node
->tn_links
;
351 vap
->va_uid
= node
->tn_uid
;
352 vap
->va_gid
= node
->tn_gid
;
353 vap
->va_fsid
= vp
->v_mount
->mnt_stat
.f_fsid
.val
[0];
354 vap
->va_fileid
= node
->tn_id
;
355 vap
->va_size
= node
->tn_size
;
356 vap
->va_blocksize
= PAGE_SIZE
;
357 vap
->va_atime
.tv_sec
= node
->tn_atime
;
358 vap
->va_atime
.tv_nsec
= node
->tn_atimensec
;
359 vap
->va_mtime
.tv_sec
= node
->tn_mtime
;
360 vap
->va_mtime
.tv_nsec
= node
->tn_mtimensec
;
361 vap
->va_ctime
.tv_sec
= node
->tn_ctime
;
362 vap
->va_ctime
.tv_nsec
= node
->tn_ctimensec
;
363 vap
->va_gen
= node
->tn_gen
;
364 vap
->va_flags
= node
->tn_flags
;
365 if (vp
->v_type
== VBLK
|| vp
->v_type
== VCHR
) {
366 vap
->va_rmajor
= umajor(node
->tn_rdev
);
367 vap
->va_rminor
= uminor(node
->tn_rdev
);
369 vap
->va_bytes
= round_page(node
->tn_size
);
371 TMPFS_NODE_UNLOCK(node
);
376 /* --------------------------------------------------------------------- */
379 tmpfs_setattr(struct vop_setattr_args
*ap
)
381 struct vnode
*vp
= ap
->a_vp
;
382 struct vattr
*vap
= ap
->a_vap
;
383 struct ucred
*cred
= ap
->a_cred
;
384 struct tmpfs_node
*node
= VP_TO_TMPFS_NODE(vp
);
388 TMPFS_NODE_LOCK(node
);
389 if (error
== 0 && (vap
->va_flags
!= VNOVAL
)) {
390 error
= tmpfs_chflags(vp
, vap
->va_flags
, cred
);
391 kflags
|= NOTE_ATTRIB
;
394 if (error
== 0 && (vap
->va_size
!= VNOVAL
)) {
395 /* restore any saved pages before proceeding */
396 if (node
->tn_reg
.tn_pages_in_aobj
) {
397 tmpfs_move_pages(node
->tn_reg
.tn_aobj
, vp
->v_object
);
398 node
->tn_reg
.tn_pages_in_aobj
= 0;
400 if (vap
->va_size
> node
->tn_size
)
401 kflags
|= NOTE_WRITE
| NOTE_EXTEND
;
403 kflags
|= NOTE_WRITE
;
404 error
= tmpfs_chsize(vp
, vap
->va_size
, cred
);
407 if (error
== 0 && (vap
->va_uid
!= (uid_t
)VNOVAL
||
408 vap
->va_gid
!= (gid_t
)VNOVAL
)) {
409 error
= tmpfs_chown(vp
, vap
->va_uid
, vap
->va_gid
, cred
);
410 kflags
|= NOTE_ATTRIB
;
413 if (error
== 0 && (vap
->va_mode
!= (mode_t
)VNOVAL
)) {
414 error
= tmpfs_chmod(vp
, vap
->va_mode
, cred
);
415 kflags
|= NOTE_ATTRIB
;
418 if (error
== 0 && ((vap
->va_atime
.tv_sec
!= VNOVAL
&&
419 vap
->va_atime
.tv_nsec
!= VNOVAL
) ||
420 (vap
->va_mtime
.tv_sec
!= VNOVAL
&&
421 vap
->va_mtime
.tv_nsec
!= VNOVAL
) )) {
422 error
= tmpfs_chtimes(vp
, &vap
->va_atime
, &vap
->va_mtime
,
423 vap
->va_vaflags
, cred
);
424 kflags
|= NOTE_ATTRIB
;
428 * Update the node times. We give preference to the error codes
429 * generated by this function rather than the ones that may arise
433 TMPFS_NODE_UNLOCK(node
);
434 tmpfs_knote(vp
, kflags
);
439 /* --------------------------------------------------------------------- */
442 * fsync is usually a NOP, but we must take action when unmounting or
446 tmpfs_fsync(struct vop_fsync_args
*ap
)
448 struct tmpfs_node
*node
;
449 struct vnode
*vp
= ap
->a_vp
;
451 node
= VP_TO_TMPFS_NODE(vp
);
454 * tmpfs vnodes typically remain dirty, avoid long syncer scans
455 * by forcing removal from the syncer list.
457 vn_syncer_remove(vp
, 1);
460 if (vp
->v_type
== VREG
) {
461 if (vp
->v_flag
& VRECLAIMED
) {
462 if (node
->tn_links
== 0)
463 tmpfs_truncate(vp
, 0);
465 vfsync(ap
->a_vp
, ap
->a_waitfor
, 1, NULL
, NULL
);
472 /* --------------------------------------------------------------------- */
475 tmpfs_read(struct vop_read_args
*ap
)
478 struct vnode
*vp
= ap
->a_vp
;
479 struct uio
*uio
= ap
->a_uio
;
480 struct tmpfs_node
*node
;
491 if (uio
->uio_offset
< 0)
493 if (vp
->v_type
!= VREG
)
497 * Extract node, try to shortcut the operation through
498 * the VM page cache, allowing us to avoid buffer cache
501 node
= VP_TO_TMPFS_NODE(vp
);
502 resid
= uio
->uio_resid
;
503 seqcount
= ap
->a_ioflag
>> 16;
504 error
= vop_helper_read_shortcut(ap
);
507 if (uio
->uio_resid
== 0) {
514 * restore any saved pages before proceeding
516 if (node
->tn_reg
.tn_pages_in_aobj
) {
517 TMPFS_NODE_LOCK(node
);
518 if (node
->tn_reg
.tn_pages_in_aobj
) {
519 tmpfs_move_pages(node
->tn_reg
.tn_aobj
, vp
->v_object
);
520 node
->tn_reg
.tn_pages_in_aobj
= 0;
522 TMPFS_NODE_UNLOCK(node
);
526 * Fall-through to our normal read code.
528 while (uio
->uio_resid
> 0 && uio
->uio_offset
< node
->tn_size
) {
530 * Use buffer cache I/O (via tmpfs_strategy)
532 offset
= (size_t)uio
->uio_offset
& TMPFS_BLKMASK64
;
533 base_offset
= (off_t
)uio
->uio_offset
- offset
;
534 bp
= getcacheblk(vp
, base_offset
, TMPFS_BLKSIZE
, GETBLK_KVABIO
);
536 if (tmpfs_cluster_enable
) {
537 error
= cluster_readx(vp
, node
->tn_size
,
540 B_NOTMETA
| B_KVABIO
,
545 error
= bread_kvabio(vp
, base_offset
,
550 kprintf("tmpfs_read bread error %d\n", error
);
555 * tmpfs pretty much fiddles directly with the VM
556 * system, don't let it exhaust it or we won't play
557 * nice with other processes.
559 * Only do this if the VOP is coming from a normal
560 * read/write. The VM system handles the case for
563 if (uio
->uio_segflg
!= UIO_NOCOPY
)
566 bp
->b_flags
|= B_CLUSTEROK
;
570 * Figure out how many bytes we can actually copy this loop.
572 len
= TMPFS_BLKSIZE
- offset
;
573 if (len
> uio
->uio_resid
)
574 len
= uio
->uio_resid
;
575 if (len
> node
->tn_size
- uio
->uio_offset
)
576 len
= (size_t)(node
->tn_size
- uio
->uio_offset
);
578 error
= uiomovebp(bp
, (char *)bp
->b_data
+ offset
, len
, uio
);
581 kprintf("tmpfs_read uiomove error %d\n", error
);
587 if ((node
->tn_status
& TMPFS_NODE_ACCESSED
) == 0) {
588 TMPFS_NODE_LOCK(node
);
589 node
->tn_status
|= TMPFS_NODE_ACCESSED
;
590 TMPFS_NODE_UNLOCK(node
);
596 tmpfs_write(struct vop_write_args
*ap
)
599 struct vnode
*vp
= ap
->a_vp
;
600 struct uio
*uio
= ap
->a_uio
;
601 struct thread
*td
= uio
->uio_td
;
602 struct tmpfs_node
*node
;
615 if (uio
->uio_resid
== 0) {
619 node
= VP_TO_TMPFS_NODE(vp
);
621 if (vp
->v_type
!= VREG
)
623 seqcount
= ap
->a_ioflag
>> 16;
625 TMPFS_NODE_LOCK(node
);
628 * restore any saved pages before proceeding
630 if (node
->tn_reg
.tn_pages_in_aobj
) {
631 tmpfs_move_pages(node
->tn_reg
.tn_aobj
, vp
->v_object
);
632 node
->tn_reg
.tn_pages_in_aobj
= 0;
635 oldsize
= node
->tn_size
;
636 if (ap
->a_ioflag
& IO_APPEND
)
637 uio
->uio_offset
= node
->tn_size
;
640 * Check for illegal write offsets.
642 if (uio
->uio_offset
+ uio
->uio_resid
>
643 VFS_TO_TMPFS(vp
->v_mount
)->tm_maxfilesize
) {
649 * NOTE: Ignore if UIO does not come from a user thread (e.g. VN).
651 if (vp
->v_type
== VREG
&& td
!= NULL
&& td
->td_lwp
!= NULL
) {
652 error
= kern_getrlimit(RLIMIT_FSIZE
, &limit
);
655 if (uio
->uio_offset
+ uio
->uio_resid
> limit
.rlim_cur
) {
656 ksignal(td
->td_proc
, SIGXFSZ
);
663 * Extend the file's size if necessary
665 extended
= ((uio
->uio_offset
+ uio
->uio_resid
) > node
->tn_size
);
667 while (uio
->uio_resid
> 0) {
669 * Don't completely blow out running buffer I/O
670 * when being hit from the pageout daemon.
672 if (uio
->uio_segflg
== UIO_NOCOPY
&&
673 (ap
->a_ioflag
& IO_RECURSE
) == 0) {
674 bwillwrite(TMPFS_BLKSIZE
);
678 * Use buffer cache I/O (via tmpfs_strategy)
680 offset
= (size_t)uio
->uio_offset
& TMPFS_BLKMASK64
;
681 base_offset
= (off_t
)uio
->uio_offset
- offset
;
682 len
= TMPFS_BLKSIZE
- offset
;
683 if (len
> uio
->uio_resid
)
684 len
= uio
->uio_resid
;
686 if ((uio
->uio_offset
+ len
) > node
->tn_size
) {
687 trivial
= (uio
->uio_offset
<= node
->tn_size
);
688 error
= tmpfs_reg_resize(vp
, uio
->uio_offset
+ len
,
695 * Read to fill in any gaps. Theoretically we could
696 * optimize this if the write covers the entire buffer
697 * and is not a UIO_NOCOPY write, however this can lead
698 * to a security violation exposing random kernel memory
699 * (whatever junk was in the backing VM pages before).
701 * So just use bread() to do the right thing.
703 error
= bread_kvabio(vp
, base_offset
, TMPFS_BLKSIZE
, &bp
);
705 error
= uiomovebp(bp
, (char *)bp
->b_data
+ offset
, len
, uio
);
707 kprintf("tmpfs_write uiomove error %d\n", error
);
712 if (uio
->uio_offset
> node
->tn_size
) {
713 node
->tn_size
= uio
->uio_offset
;
714 kflags
|= NOTE_EXTEND
;
716 kflags
|= NOTE_WRITE
;
719 * Always try to flush the page in the UIO_NOCOPY case. This
720 * can come from the pageout daemon or during vnode eviction.
721 * It is not necessarily going to be marked IO_ASYNC/IO_SYNC.
723 * For the normal case we buwrite(), dirtying the underlying
724 * VM pages instead of dirtying the buffer and releasing the
725 * buffer as a clean buffer. This allows tmpfs to use
726 * essentially all available memory to cache file data.
727 * If we used bdwrite() the buffer cache would wind up
728 * flushing the data to swap too quickly.
730 * But because tmpfs can seriously load the VM system we
731 * fall-back to using bdwrite() when free memory starts
732 * to get low. This shifts the load away from the VM system
733 * and makes tmpfs act more like a normal filesystem with
734 * regards to disk activity.
736 * tmpfs pretty much fiddles directly with the VM
737 * system, don't let it exhaust it or we won't play
738 * nice with other processes. Only do this if the
739 * VOP is coming from a normal read/write. The VM system
740 * handles the case for UIO_NOCOPY.
742 bp
->b_flags
|= B_CLUSTEROK
;
743 if (uio
->uio_segflg
== UIO_NOCOPY
) {
745 * Flush from the pageout daemon, deal with
746 * potentially very heavy tmpfs write activity
747 * causing long stalls in the pageout daemon
748 * before pages get to free/cache.
750 * (a) Under severe pressure setting B_DIRECT will
751 * cause a buffer release to try to free the
754 * (b) Under modest memory pressure the B_RELBUF
755 * alone is sufficient to get the pages moved
756 * to the cache. We could also force this by
757 * setting B_NOTMETA but that might have other
758 * unintended side-effects (e.g. setting
759 * PG_NOTMETA on the VM page).
761 * Hopefully this will unblock the VM system more
762 * quickly under extreme tmpfs write load.
764 if (vm_page_count_min(vm_page_free_hysteresis
))
765 bp
->b_flags
|= B_DIRECT
;
766 bp
->b_flags
|= B_AGE
| B_RELBUF
;
767 bp
->b_act_count
= 0; /* buffer->deactivate pgs */
769 } else if (vm_page_count_target()) {
771 * Normal (userland) write but we are low on memory,
772 * run the buffer the buffer cache.
774 bp
->b_act_count
= 0; /* buffer->deactivate pgs */
778 * Otherwise run the buffer directly through to the
782 /*vm_wait_nominal();*/
786 kprintf("tmpfs_write bwrite error %d\n", bp
->b_error
);
793 (void)tmpfs_reg_resize(vp
, oldsize
, trivial
);
794 kflags
&= ~NOTE_EXTEND
;
800 * Currently we don't set the mtime on files modified via mmap()
801 * because we can't tell the difference between those modifications
802 * and an attempt by the pageout daemon to flush tmpfs pages to
805 * This is because in order to defer flushes as long as possible
806 * buwrite() works by marking the underlying VM pages dirty in
807 * order to be able to dispose of the buffer cache buffer without
810 if (uio
->uio_segflg
== UIO_NOCOPY
) {
811 if (vp
->v_flag
& VLASTWRITETS
) {
812 node
->tn_mtime
= vp
->v_lastwrite_ts
.tv_sec
;
813 node
->tn_mtimensec
= vp
->v_lastwrite_ts
.tv_nsec
;
816 node
->tn_status
|= TMPFS_NODE_MODIFIED
;
817 vclrflags(vp
, VLASTWRITETS
);
821 node
->tn_status
|= TMPFS_NODE_CHANGED
;
823 if (node
->tn_mode
& (S_ISUID
| S_ISGID
)) {
824 if (priv_check_cred(ap
->a_cred
, PRIV_VFS_RETAINSUGID
, 0))
825 node
->tn_mode
&= ~(S_ISUID
| S_ISGID
);
828 TMPFS_NODE_UNLOCK(node
);
830 tmpfs_knote(vp
, kflags
);
836 tmpfs_advlock(struct vop_advlock_args
*ap
)
838 struct tmpfs_node
*node
;
839 struct vnode
*vp
= ap
->a_vp
;
842 node
= VP_TO_TMPFS_NODE(vp
);
843 error
= (lf_advlock(ap
, &node
->tn_advlock
, node
->tn_size
));
849 * The strategy function is typically only called when memory pressure
850 * forces the system to attempt to pageout pages. It can also be called
851 * by [n]vtruncbuf() when a truncation cuts a page in half. Normal write
854 * We set VKVABIO for VREG files so bp->b_data may not be synchronized to
855 * our cpu. swap_pager_strategy() is all we really use, and it directly
859 tmpfs_strategy(struct vop_strategy_args
*ap
)
861 struct bio
*bio
= ap
->a_bio
;
863 struct buf
*bp
= bio
->bio_buf
;
864 struct vnode
*vp
= ap
->a_vp
;
865 struct tmpfs_node
*node
;
870 if (vp
->v_type
!= VREG
) {
871 bp
->b_resid
= bp
->b_bcount
;
872 bp
->b_flags
|= B_ERROR
| B_INVAL
;
873 bp
->b_error
= EINVAL
;
878 node
= VP_TO_TMPFS_NODE(vp
);
880 uobj
= node
->tn_reg
.tn_aobj
;
883 * Don't bother flushing to swap if there is no swap, just
884 * ensure that the pages are marked as needing a commit (still).
886 if (bp
->b_cmd
== BUF_CMD_WRITE
&& vm_swap_size
== 0) {
887 for (i
= 0; i
< bp
->b_xio
.xio_npages
; ++i
) {
888 m
= bp
->b_xio
.xio_pages
[i
];
889 vm_page_need_commit(m
);
895 nbio
= push_bio(bio
);
896 nbio
->bio_done
= tmpfs_strategy_done
;
897 nbio
->bio_offset
= bio
->bio_offset
;
898 swap_pager_strategy(uobj
, nbio
);
904 * If we were unable to commit the pages to swap make sure they are marked
905 * as needing a commit (again). If we were, clear the flag to allow the
908 * Do not error-out the buffer. In particular, vinvalbuf() needs to
912 tmpfs_strategy_done(struct bio
*bio
)
920 if (bp
->b_flags
& B_ERROR
) {
921 bp
->b_flags
&= ~B_ERROR
;
924 for (i
= 0; i
< bp
->b_xio
.xio_npages
; ++i
) {
925 m
= bp
->b_xio
.xio_pages
[i
];
926 vm_page_need_commit(m
);
929 for (i
= 0; i
< bp
->b_xio
.xio_npages
; ++i
) {
930 m
= bp
->b_xio
.xio_pages
[i
];
931 vm_page_clear_commit(m
);
939 tmpfs_bmap(struct vop_bmap_args
*ap
)
941 if (ap
->a_doffsetp
!= NULL
)
942 *ap
->a_doffsetp
= ap
->a_loffset
;
943 if (ap
->a_runp
!= NULL
)
945 if (ap
->a_runb
!= NULL
)
951 /* --------------------------------------------------------------------- */
954 tmpfs_nremove(struct vop_nremove_args
*ap
)
956 struct vnode
*dvp
= ap
->a_dvp
;
957 struct namecache
*ncp
= ap
->a_nch
->ncp
;
960 struct tmpfs_dirent
*de
;
961 struct tmpfs_mount
*tmp
;
962 struct tmpfs_node
*dnode
;
963 struct tmpfs_node
*node
;
966 * We have to acquire the vp from ap->a_nch because we will likely
967 * unresolve the namecache entry, and a vrele/vput is needed to
968 * trigger the tmpfs_inactive/tmpfs_reclaim sequence.
970 * We have to use vget to clear any inactive state on the vnode,
971 * otherwise the vnode may remain inactive and thus tmpfs_inactive
972 * will not get called when we release it.
974 error
= cache_vget(ap
->a_nch
, ap
->a_cred
, LK_SHARED
, &vp
);
975 KKASSERT(vp
->v_mount
== dvp
->v_mount
);
976 KKASSERT(error
== 0);
979 if (vp
->v_type
== VDIR
) {
984 dnode
= VP_TO_TMPFS_DIR(dvp
);
985 node
= VP_TO_TMPFS_NODE(vp
);
986 tmp
= VFS_TO_TMPFS(vp
->v_mount
);
988 TMPFS_NODE_LOCK(dnode
);
989 de
= tmpfs_dir_lookup(dnode
, node
, ncp
);
992 TMPFS_NODE_UNLOCK(dnode
);
996 /* Files marked as immutable or append-only cannot be deleted. */
997 if ((node
->tn_flags
& (IMMUTABLE
| APPEND
| NOUNLINK
)) ||
998 (dnode
->tn_flags
& APPEND
)) {
1000 TMPFS_NODE_UNLOCK(dnode
);
1004 /* Remove the entry from the directory; as it is a file, we do not
1005 * have to change the number of hard links of the directory. */
1006 tmpfs_dir_detach(dnode
, de
);
1007 TMPFS_NODE_UNLOCK(dnode
);
1009 /* Free the directory entry we just deleted. Note that the node
1010 * referred by it will not be removed until the vnode is really
1012 tmpfs_free_dirent(tmp
, de
);
1014 if (node
->tn_links
> 0) {
1015 TMPFS_NODE_LOCK(node
);
1016 node
->tn_status
|= TMPFS_NODE_CHANGED
;
1017 TMPFS_NODE_UNLOCK(node
);
1020 cache_unlink(ap
->a_nch
);
1021 tmpfs_knote(vp
, NOTE_DELETE
);
1026 tmpfs_knote(dvp
, NOTE_WRITE
);
1033 /* --------------------------------------------------------------------- */
1036 tmpfs_nlink(struct vop_nlink_args
*ap
)
1038 struct vnode
*dvp
= ap
->a_dvp
;
1039 struct vnode
*vp
= ap
->a_vp
;
1040 struct namecache
*ncp
= ap
->a_nch
->ncp
;
1041 struct tmpfs_dirent
*de
;
1042 struct tmpfs_node
*node
;
1043 struct tmpfs_node
*dnode
;
1046 KKASSERT(dvp
!= vp
); /* XXX When can this be false? */
1048 node
= VP_TO_TMPFS_NODE(vp
);
1049 dnode
= VP_TO_TMPFS_NODE(dvp
);
1050 TMPFS_NODE_LOCK(dnode
);
1052 /* XXX: Why aren't the following two tests done by the caller? */
1054 /* Hard links of directories are forbidden. */
1055 if (vp
->v_type
== VDIR
) {
1060 /* Cannot create cross-device links. */
1061 if (dvp
->v_mount
!= vp
->v_mount
) {
1066 /* Ensure that we do not overflow the maximum number of links imposed
1068 KKASSERT(node
->tn_links
<= LINK_MAX
);
1069 if (node
->tn_links
>= LINK_MAX
) {
1074 /* We cannot create links of files marked immutable or append-only. */
1075 if (node
->tn_flags
& (IMMUTABLE
| APPEND
)) {
1080 /* Allocate a new directory entry to represent the node. */
1081 error
= tmpfs_alloc_dirent(VFS_TO_TMPFS(vp
->v_mount
), node
,
1082 ncp
->nc_name
, ncp
->nc_nlen
, &de
);
1086 /* Insert the new directory entry into the appropriate directory. */
1087 tmpfs_dir_attach(dnode
, de
);
1089 /* vp link count has changed, so update node times. */
1091 TMPFS_NODE_LOCK(node
);
1092 node
->tn_status
|= TMPFS_NODE_CHANGED
;
1093 TMPFS_NODE_UNLOCK(node
);
1096 tmpfs_knote(vp
, NOTE_LINK
);
1097 cache_setunresolved(ap
->a_nch
);
1098 cache_setvp(ap
->a_nch
, vp
);
1102 TMPFS_NODE_UNLOCK(dnode
);
1104 tmpfs_knote(dvp
, NOTE_WRITE
);
1108 /* --------------------------------------------------------------------- */
1111 tmpfs_nrename(struct vop_nrename_args
*ap
)
1113 struct vnode
*fdvp
= ap
->a_fdvp
;
1114 struct namecache
*fncp
= ap
->a_fnch
->ncp
;
1115 struct vnode
*fvp
= fncp
->nc_vp
;
1116 struct vnode
*tdvp
= ap
->a_tdvp
;
1117 struct namecache
*tncp
= ap
->a_tnch
->ncp
;
1119 struct tmpfs_dirent
*de
, *tde
;
1120 struct tmpfs_mount
*tmp
;
1121 struct tmpfs_node
*fdnode
;
1122 struct tmpfs_node
*fnode
;
1123 struct tmpfs_node
*tnode
;
1124 struct tmpfs_node
*tdnode
;
1129 KKASSERT(fdvp
->v_mount
== fvp
->v_mount
);
1132 * Because tvp can get overwritten we have to vget it instead of
1133 * just vref or use it, otherwise it's VINACTIVE flag may not get
1134 * cleared and the node won't get destroyed.
1136 error
= cache_vget(ap
->a_tnch
, ap
->a_cred
, LK_SHARED
, &tvp
);
1138 tnode
= VP_TO_TMPFS_NODE(tvp
);
1144 /* Disallow cross-device renames.
1145 * XXX Why isn't this done by the caller? */
1146 if (fvp
->v_mount
!= tdvp
->v_mount
||
1147 (tvp
!= NULL
&& fvp
->v_mount
!= tvp
->v_mount
)) {
1152 tmp
= VFS_TO_TMPFS(tdvp
->v_mount
);
1153 tdnode
= VP_TO_TMPFS_DIR(tdvp
);
1155 /* If source and target are the same file, there is nothing to do. */
1161 fdnode
= VP_TO_TMPFS_DIR(fdvp
);
1162 fnode
= VP_TO_TMPFS_NODE(fvp
);
1163 TMPFS_NODE_LOCK(fdnode
);
1164 de
= tmpfs_dir_lookup(fdnode
, fnode
, fncp
);
1165 TMPFS_NODE_UNLOCK(fdnode
); /* XXX depend on namecache lock */
1167 /* Avoid manipulating '.' and '..' entries. */
1172 KKASSERT(de
->td_node
== fnode
);
1175 * If replacing an entry in the target directory and that entry
1176 * is a directory, it must be empty.
1178 * Kern_rename gurantees the destination to be a directory
1179 * if the source is one (it does?).
1182 KKASSERT(tnode
!= NULL
);
1184 if ((tnode
->tn_flags
& (NOUNLINK
| IMMUTABLE
| APPEND
)) ||
1185 (tdnode
->tn_flags
& (APPEND
| IMMUTABLE
))) {
1190 if (fnode
->tn_type
== VDIR
&& tnode
->tn_type
== VDIR
) {
1191 if (tnode
->tn_size
> 0) {
1195 } else if (fnode
->tn_type
== VDIR
&& tnode
->tn_type
!= VDIR
) {
1198 } else if (fnode
->tn_type
!= VDIR
&& tnode
->tn_type
== VDIR
) {
1202 KKASSERT(fnode
->tn_type
!= VDIR
&&
1203 tnode
->tn_type
!= VDIR
);
1207 if ((fnode
->tn_flags
& (NOUNLINK
| IMMUTABLE
| APPEND
)) ||
1208 (fdnode
->tn_flags
& (APPEND
| IMMUTABLE
))) {
1214 * Ensure that we have enough memory to hold the new name, if it
1215 * has to be changed.
1217 if (fncp
->nc_nlen
!= tncp
->nc_nlen
||
1218 bcmp(fncp
->nc_name
, tncp
->nc_name
, fncp
->nc_nlen
) != 0) {
1219 newname
= kmalloc(tncp
->nc_nlen
+ 1, tmp
->tm_name_zone
,
1220 M_WAITOK
| M_NULLOK
);
1221 if (newname
== NULL
) {
1225 bcopy(tncp
->nc_name
, newname
, tncp
->nc_nlen
);
1226 newname
[tncp
->nc_nlen
] = '\0';
1232 * Unlink entry from source directory. Note that the kernel has
1233 * already checked for illegal recursion cases (renaming a directory
1234 * into a subdirectory of itself).
1236 if (fdnode
!= tdnode
) {
1237 tmpfs_dir_detach(fdnode
, de
);
1239 /* XXX depend on namecache lock */
1240 TMPFS_NODE_LOCK(fdnode
);
1241 KKASSERT(de
== tmpfs_dir_lookup(fdnode
, fnode
, fncp
));
1242 RB_REMOVE(tmpfs_dirtree
, &fdnode
->tn_dir
.tn_dirtree
, de
);
1243 RB_REMOVE(tmpfs_dirtree_cookie
,
1244 &fdnode
->tn_dir
.tn_cookietree
, de
);
1245 TMPFS_NODE_UNLOCK(fdnode
);
1249 * Handle any name change. Swap with newname, we will
1250 * deallocate it at the end.
1252 if (newname
!= NULL
) {
1254 TMPFS_NODE_LOCK(fnode
);
1255 fnode
->tn_status
|= TMPFS_NODE_CHANGED
;
1256 TMPFS_NODE_UNLOCK(fnode
);
1258 oldname
= de
->td_name
;
1259 de
->td_name
= newname
;
1260 de
->td_namelen
= (uint16_t)tncp
->nc_nlen
;
1265 * If we are overwriting an entry, we have to remove the old one
1266 * from the target directory.
1269 /* Remove the old entry from the target directory. */
1270 TMPFS_NODE_LOCK(tdnode
);
1271 tde
= tmpfs_dir_lookup(tdnode
, tnode
, tncp
);
1272 tmpfs_dir_detach(tdnode
, tde
);
1273 TMPFS_NODE_UNLOCK(tdnode
);
1274 tmpfs_knote(tdnode
->tn_vnode
, NOTE_DELETE
);
1277 * Free the directory entry we just deleted. Note that the
1278 * node referred by it will not be removed until the vnode is
1281 tmpfs_free_dirent(VFS_TO_TMPFS(tvp
->v_mount
), tde
);
1282 /*cache_inval_vp(tvp, CINV_DESTROY);*/
1286 * Link entry to target directory. If the entry
1287 * represents a directory move the parent linkage
1290 if (fdnode
!= tdnode
) {
1291 if (de
->td_node
->tn_type
== VDIR
) {
1292 TMPFS_VALIDATE_DIR(fnode
);
1294 tmpfs_dir_attach(tdnode
, de
);
1296 TMPFS_NODE_LOCK(tdnode
);
1297 tdnode
->tn_status
|= TMPFS_NODE_MODIFIED
;
1298 RB_INSERT(tmpfs_dirtree
, &tdnode
->tn_dir
.tn_dirtree
, de
);
1299 RB_INSERT(tmpfs_dirtree_cookie
,
1300 &tdnode
->tn_dir
.tn_cookietree
, de
);
1301 TMPFS_NODE_UNLOCK(tdnode
);
1308 kfree(newname
, tmp
->tm_name_zone
);
1311 cache_rename(ap
->a_fnch
, ap
->a_tnch
);
1312 tmpfs_knote(ap
->a_fdvp
, NOTE_WRITE
);
1313 tmpfs_knote(ap
->a_tdvp
, NOTE_WRITE
);
1314 if (fnode
->tn_vnode
)
1315 tmpfs_knote(fnode
->tn_vnode
, NOTE_RENAME
);
1326 /* --------------------------------------------------------------------- */
1329 tmpfs_nmkdir(struct vop_nmkdir_args
*ap
)
1331 struct vnode
*dvp
= ap
->a_dvp
;
1332 struct vnode
**vpp
= ap
->a_vpp
;
1333 struct namecache
*ncp
= ap
->a_nch
->ncp
;
1334 struct vattr
*vap
= ap
->a_vap
;
1335 struct ucred
*cred
= ap
->a_cred
;
1338 KKASSERT(vap
->va_type
== VDIR
);
1340 error
= tmpfs_alloc_file(dvp
, vpp
, vap
, ncp
, cred
, NULL
);
1342 cache_setunresolved(ap
->a_nch
);
1343 cache_setvp(ap
->a_nch
, *vpp
);
1344 tmpfs_knote(dvp
, NOTE_WRITE
| NOTE_LINK
);
1349 /* --------------------------------------------------------------------- */
1352 tmpfs_nrmdir(struct vop_nrmdir_args
*ap
)
1354 struct vnode
*dvp
= ap
->a_dvp
;
1355 struct namecache
*ncp
= ap
->a_nch
->ncp
;
1357 struct tmpfs_dirent
*de
;
1358 struct tmpfs_mount
*tmp
;
1359 struct tmpfs_node
*dnode
;
1360 struct tmpfs_node
*node
;
1364 * We have to acquire the vp from ap->a_nch because we will likely
1365 * unresolve the namecache entry, and a vrele/vput is needed to
1366 * trigger the tmpfs_inactive/tmpfs_reclaim sequence.
1368 * We have to use vget to clear any inactive state on the vnode,
1369 * otherwise the vnode may remain inactive and thus tmpfs_inactive
1370 * will not get called when we release it.
1372 error
= cache_vget(ap
->a_nch
, ap
->a_cred
, LK_SHARED
, &vp
);
1373 KKASSERT(error
== 0);
1377 * Prevalidate so we don't hit an assertion later
1379 if (vp
->v_type
!= VDIR
) {
1384 tmp
= VFS_TO_TMPFS(dvp
->v_mount
);
1385 dnode
= VP_TO_TMPFS_DIR(dvp
);
1386 node
= VP_TO_TMPFS_DIR(vp
);
1389 * Directories with more than two entries ('.' and '..') cannot
1392 if (node
->tn_size
> 0) {
1397 if ((dnode
->tn_flags
& APPEND
)
1398 || (node
->tn_flags
& (NOUNLINK
| IMMUTABLE
| APPEND
))) {
1404 * This invariant holds only if we are not trying to
1405 * remove "..". We checked for that above so this is safe now.
1407 KKASSERT(node
->tn_dir
.tn_parent
== dnode
);
1410 * Get the directory entry associated with node (vp). This
1411 * was filled by tmpfs_lookup while looking up the entry.
1413 TMPFS_NODE_LOCK(dnode
);
1414 de
= tmpfs_dir_lookup(dnode
, node
, ncp
);
1415 KKASSERT(TMPFS_DIRENT_MATCHES(de
, ncp
->nc_name
, ncp
->nc_nlen
));
1417 /* Check flags to see if we are allowed to remove the directory. */
1418 if ((dnode
->tn_flags
& APPEND
) ||
1419 node
->tn_flags
& (NOUNLINK
| IMMUTABLE
| APPEND
)) {
1421 TMPFS_NODE_UNLOCK(dnode
);
1425 /* Detach the directory entry from the directory (dnode). */
1426 tmpfs_dir_detach(dnode
, de
);
1427 TMPFS_NODE_UNLOCK(dnode
);
1429 /* No vnode should be allocated for this entry from this point */
1430 TMPFS_NODE_LOCK(dnode
);
1431 TMPFS_ASSERT_ELOCKED(dnode
);
1432 TMPFS_NODE_LOCK(node
);
1433 TMPFS_ASSERT_ELOCKED(node
);
1436 * Must set parent linkage to NULL (tested by ncreate to disallow
1437 * the creation of new files/dirs in a deleted directory)
1439 node
->tn_status
|= TMPFS_NODE_CHANGED
;
1441 dnode
->tn_status
|= TMPFS_NODE_ACCESSED
| TMPFS_NODE_CHANGED
|
1442 TMPFS_NODE_MODIFIED
;
1444 TMPFS_NODE_UNLOCK(node
);
1445 TMPFS_NODE_UNLOCK(dnode
);
1447 /* Free the directory entry we just deleted. Note that the node
1448 * referred by it will not be removed until the vnode is really
1450 tmpfs_free_dirent(tmp
, de
);
1452 /* Release the deleted vnode (will destroy the node, notify
1453 * interested parties and clean it from the cache). */
1455 TMPFS_NODE_LOCK(dnode
);
1456 dnode
->tn_status
|= TMPFS_NODE_CHANGED
;
1457 TMPFS_NODE_UNLOCK(dnode
);
1460 cache_unlink(ap
->a_nch
);
1461 tmpfs_knote(dvp
, NOTE_WRITE
| NOTE_LINK
);
1470 /* --------------------------------------------------------------------- */
1473 tmpfs_nsymlink(struct vop_nsymlink_args
*ap
)
1475 struct vnode
*dvp
= ap
->a_dvp
;
1476 struct vnode
**vpp
= ap
->a_vpp
;
1477 struct namecache
*ncp
= ap
->a_nch
->ncp
;
1478 struct vattr
*vap
= ap
->a_vap
;
1479 struct ucred
*cred
= ap
->a_cred
;
1480 char *target
= ap
->a_target
;
1483 vap
->va_type
= VLNK
;
1484 error
= tmpfs_alloc_file(dvp
, vpp
, vap
, ncp
, cred
, target
);
1486 tmpfs_knote(*vpp
, NOTE_WRITE
);
1487 cache_setunresolved(ap
->a_nch
);
1488 cache_setvp(ap
->a_nch
, *vpp
);
1493 /* --------------------------------------------------------------------- */
1496 tmpfs_readdir(struct vop_readdir_args
*ap
)
1498 struct vnode
*vp
= ap
->a_vp
;
1499 struct uio
*uio
= ap
->a_uio
;
1500 int *eofflag
= ap
->a_eofflag
;
1501 off_t
**cookies
= ap
->a_cookies
;
1502 int *ncookies
= ap
->a_ncookies
;
1503 struct tmpfs_mount
*tmp
;
1507 struct tmpfs_node
*node
;
1509 /* This operation only makes sense on directory nodes. */
1510 if (vp
->v_type
!= VDIR
) {
1514 tmp
= VFS_TO_TMPFS(vp
->v_mount
);
1515 node
= VP_TO_TMPFS_DIR(vp
);
1516 startoff
= uio
->uio_offset
;
1518 if (uio
->uio_offset
== TMPFS_DIRCOOKIE_DOT
) {
1519 error
= tmpfs_dir_getdotdent(node
, uio
);
1521 TMPFS_NODE_LOCK_SH(node
);
1527 if (uio
->uio_offset
== TMPFS_DIRCOOKIE_DOTDOT
) {
1528 /* may lock parent, cannot hold node lock */
1529 error
= tmpfs_dir_getdotdotdent(tmp
, node
, uio
);
1531 TMPFS_NODE_LOCK_SH(node
);
1537 TMPFS_NODE_LOCK_SH(node
);
1538 error
= tmpfs_dir_getdents(node
, uio
, &cnt
);
1541 KKASSERT(error
>= -1);
1546 if (eofflag
!= NULL
)
1548 (error
== 0 && uio
->uio_offset
== TMPFS_DIRCOOKIE_EOF
);
1550 /* Update NFS-related variables. */
1551 if (error
== 0 && cookies
!= NULL
&& ncookies
!= NULL
) {
1553 off_t off
= startoff
;
1554 struct tmpfs_dirent
*de
= NULL
;
1557 *cookies
= kmalloc(cnt
* sizeof(off_t
), M_TEMP
, M_WAITOK
);
1559 for (i
= 0; i
< cnt
; i
++) {
1560 KKASSERT(off
!= TMPFS_DIRCOOKIE_EOF
);
1561 if (off
== TMPFS_DIRCOOKIE_DOT
) {
1562 off
= TMPFS_DIRCOOKIE_DOTDOT
;
1564 if (off
== TMPFS_DIRCOOKIE_DOTDOT
) {
1565 de
= RB_MIN(tmpfs_dirtree_cookie
,
1566 &node
->tn_dir
.tn_cookietree
);
1567 } else if (de
!= NULL
) {
1568 de
= RB_NEXT(tmpfs_dirtree_cookie
,
1569 &node
->tn_dir
.tn_cookietree
, de
);
1571 de
= tmpfs_dir_lookupbycookie(node
,
1573 KKASSERT(de
!= NULL
);
1574 de
= RB_NEXT(tmpfs_dirtree_cookie
,
1575 &node
->tn_dir
.tn_cookietree
, de
);
1578 off
= TMPFS_DIRCOOKIE_EOF
;
1580 off
= tmpfs_dircookie(de
);
1582 (*cookies
)[i
] = off
;
1584 KKASSERT(uio
->uio_offset
== off
);
1586 TMPFS_NODE_UNLOCK(node
);
1588 if ((node
->tn_status
& TMPFS_NODE_ACCESSED
) == 0) {
1589 TMPFS_NODE_LOCK(node
);
1590 node
->tn_status
|= TMPFS_NODE_ACCESSED
;
1591 TMPFS_NODE_UNLOCK(node
);
1596 /* --------------------------------------------------------------------- */
1599 tmpfs_readlink(struct vop_readlink_args
*ap
)
1601 struct vnode
*vp
= ap
->a_vp
;
1602 struct uio
*uio
= ap
->a_uio
;
1604 struct tmpfs_node
*node
;
1606 KKASSERT(uio
->uio_offset
== 0);
1607 KKASSERT(vp
->v_type
== VLNK
);
1609 node
= VP_TO_TMPFS_NODE(vp
);
1610 TMPFS_NODE_LOCK_SH(node
);
1611 error
= uiomove(node
->tn_link
,
1612 MIN(node
->tn_size
, uio
->uio_resid
), uio
);
1613 TMPFS_NODE_UNLOCK(node
);
1614 if ((node
->tn_status
& TMPFS_NODE_ACCESSED
) == 0) {
1615 TMPFS_NODE_LOCK(node
);
1616 node
->tn_status
|= TMPFS_NODE_ACCESSED
;
1617 TMPFS_NODE_UNLOCK(node
);
1622 /* --------------------------------------------------------------------- */
1625 tmpfs_inactive(struct vop_inactive_args
*ap
)
1627 struct vnode
*vp
= ap
->a_vp
;
1628 struct tmpfs_node
*node
;
1632 lwkt_gettoken(&mp
->mnt_token
);
1633 node
= VP_TO_TMPFS_NODE(vp
);
1640 lwkt_reltoken(&mp
->mnt_token
);
1645 * Get rid of unreferenced deleted vnodes sooner rather than
1646 * later so the data memory can be recovered immediately.
1648 * We must truncate the vnode to prevent the normal reclamation
1649 * path from flushing the data for the removed file to disk.
1651 TMPFS_NODE_LOCK(node
);
1652 if ((node
->tn_vpstate
& TMPFS_VNODE_ALLOCATING
) == 0 &&
1653 node
->tn_links
== 0)
1655 node
->tn_vpstate
= TMPFS_VNODE_DOOMED
;
1656 TMPFS_NODE_UNLOCK(node
);
1657 if (node
->tn_type
== VREG
)
1658 tmpfs_truncate(vp
, 0);
1662 * We must retain any VM pages belonging to the vnode's
1663 * object as the vnode will destroy the object during a
1664 * later reclaim. We call vinvalbuf(V_SAVE) to clean
1665 * out the buffer cache.
1667 * On DragonFlyBSD, vnodes are not immediately deactivated
1668 * on the 1->0 refs, so this is a relatively optimal
1669 * operation. We have to do this in tmpfs_inactive()
1670 * because the pages will have already been thrown away
1671 * at the time tmpfs_reclaim() is called.
1673 if (node
->tn_type
== VREG
&&
1674 node
->tn_reg
.tn_pages_in_aobj
== 0) {
1675 vinvalbuf(vp
, V_SAVE
, 0, 0);
1676 KKASSERT(RB_EMPTY(&vp
->v_rbdirty_tree
));
1677 KKASSERT(RB_EMPTY(&vp
->v_rbclean_tree
));
1678 tmpfs_move_pages(vp
->v_object
, node
->tn_reg
.tn_aobj
);
1679 node
->tn_reg
.tn_pages_in_aobj
= 1;
1682 TMPFS_NODE_UNLOCK(node
);
1684 lwkt_reltoken(&mp
->mnt_token
);
1689 /* --------------------------------------------------------------------- */
1692 tmpfs_reclaim(struct vop_reclaim_args
*ap
)
1694 struct vnode
*vp
= ap
->a_vp
;
1695 struct tmpfs_mount
*tmp
;
1696 struct tmpfs_node
*node
;
1700 lwkt_gettoken(&mp
->mnt_token
);
1702 node
= VP_TO_TMPFS_NODE(vp
);
1703 tmp
= VFS_TO_TMPFS(vp
->v_mount
);
1704 KKASSERT(mp
== tmp
->tm_mount
);
1709 * If the node referenced by this vnode was deleted by the
1710 * user, we must free its associated data structures now that
1711 * the vnode is being reclaimed.
1713 * Directories have an extra link ref.
1715 TMPFS_NODE_LOCK(node
);
1716 if ((node
->tn_vpstate
& TMPFS_VNODE_ALLOCATING
) == 0 &&
1717 node
->tn_links
== 0) {
1718 node
->tn_vpstate
= TMPFS_VNODE_DOOMED
;
1719 tmpfs_free_node(tmp
, node
);
1722 TMPFS_NODE_UNLOCK(node
);
1724 lwkt_reltoken(&mp
->mnt_token
);
1726 KKASSERT(vp
->v_data
== NULL
);
1730 /* --------------------------------------------------------------------- */
1733 tmpfs_mountctl(struct vop_mountctl_args
*ap
)
1735 struct tmpfs_mount
*tmp
;
1739 mp
= ap
->a_head
.a_ops
->head
.vv_mount
;
1740 lwkt_gettoken(&mp
->mnt_token
);
1743 case (MOUNTCTL_SET_EXPORT
):
1744 tmp
= (struct tmpfs_mount
*) mp
->mnt_data
;
1746 if (ap
->a_ctllen
!= sizeof(struct export_args
))
1749 rc
= vfs_export(mp
, &tmp
->tm_export
,
1750 (const struct export_args
*) ap
->a_ctl
);
1753 rc
= vop_stdmountctl(ap
);
1757 lwkt_reltoken(&mp
->mnt_token
);
1761 /* --------------------------------------------------------------------- */
1764 tmpfs_print(struct vop_print_args
*ap
)
1766 struct vnode
*vp
= ap
->a_vp
;
1768 struct tmpfs_node
*node
;
1770 node
= VP_TO_TMPFS_NODE(vp
);
1772 kprintf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n",
1773 node
, node
->tn_flags
, node
->tn_links
);
1774 kprintf("\tmode 0%o, owner %d, group %d, size %ju, status 0x%x\n",
1775 node
->tn_mode
, node
->tn_uid
, node
->tn_gid
,
1776 (uintmax_t)node
->tn_size
, node
->tn_status
);
1778 if (vp
->v_type
== VFIFO
)
1786 /* --------------------------------------------------------------------- */
1789 tmpfs_pathconf(struct vop_pathconf_args
*ap
)
1791 struct vnode
*vp
= ap
->a_vp
;
1792 int name
= ap
->a_name
;
1793 register_t
*retval
= ap
->a_retval
;
1794 struct tmpfs_mount
*tmp
;
1800 case _PC_CHOWN_RESTRICTED
:
1804 case _PC_FILESIZEBITS
:
1805 tmp
= VFS_TO_TMPFS(vp
->v_mount
);
1806 *retval
= max(32, flsll(tmp
->tm_pages_max
* PAGE_SIZE
) + 1);
1833 case _PC_2_SYMLINKS
:
1844 /************************************************************************
1846 ************************************************************************/
1848 static void filt_tmpfsdetach(struct knote
*kn
);
1849 static int filt_tmpfsread(struct knote
*kn
, long hint
);
1850 static int filt_tmpfswrite(struct knote
*kn
, long hint
);
1851 static int filt_tmpfsvnode(struct knote
*kn
, long hint
);
1853 static struct filterops tmpfsread_filtops
=
1854 { FILTEROP_ISFD
| FILTEROP_MPSAFE
,
1855 NULL
, filt_tmpfsdetach
, filt_tmpfsread
};
1856 static struct filterops tmpfswrite_filtops
=
1857 { FILTEROP_ISFD
| FILTEROP_MPSAFE
,
1858 NULL
, filt_tmpfsdetach
, filt_tmpfswrite
};
1859 static struct filterops tmpfsvnode_filtops
=
1860 { FILTEROP_ISFD
| FILTEROP_MPSAFE
,
1861 NULL
, filt_tmpfsdetach
, filt_tmpfsvnode
};
1864 tmpfs_kqfilter (struct vop_kqfilter_args
*ap
)
1866 struct vnode
*vp
= ap
->a_vp
;
1867 struct knote
*kn
= ap
->a_kn
;
1869 switch (kn
->kn_filter
) {
1871 kn
->kn_fop
= &tmpfsread_filtops
;
1874 kn
->kn_fop
= &tmpfswrite_filtops
;
1877 kn
->kn_fop
= &tmpfsvnode_filtops
;
1880 return (EOPNOTSUPP
);
1883 kn
->kn_hook
= (caddr_t
)vp
;
1885 knote_insert(&vp
->v_pollinfo
.vpi_kqinfo
.ki_note
, kn
);
1891 filt_tmpfsdetach(struct knote
*kn
)
1893 struct vnode
*vp
= (void *)kn
->kn_hook
;
1895 knote_remove(&vp
->v_pollinfo
.vpi_kqinfo
.ki_note
, kn
);
1899 filt_tmpfsread(struct knote
*kn
, long hint
)
1901 struct vnode
*vp
= (void *)kn
->kn_hook
;
1902 struct tmpfs_node
*node
= VP_TO_TMPFS_NODE(vp
);
1905 if (hint
== NOTE_REVOKE
) {
1906 kn
->kn_flags
|= (EV_EOF
| EV_NODATA
| EV_ONESHOT
);
1911 * Interlock against MP races when performing this function.
1913 TMPFS_NODE_LOCK_SH(node
);
1914 off
= node
->tn_size
- kn
->kn_fp
->f_offset
;
1915 kn
->kn_data
= (off
< INTPTR_MAX
) ? off
: INTPTR_MAX
;
1916 if (kn
->kn_sfflags
& NOTE_OLDAPI
) {
1917 TMPFS_NODE_UNLOCK(node
);
1920 if (kn
->kn_data
== 0) {
1921 kn
->kn_data
= (off
< INTPTR_MAX
) ? off
: INTPTR_MAX
;
1923 TMPFS_NODE_UNLOCK(node
);
1924 return (kn
->kn_data
!= 0);
1928 filt_tmpfswrite(struct knote
*kn
, long hint
)
1930 if (hint
== NOTE_REVOKE
)
1931 kn
->kn_flags
|= (EV_EOF
| EV_NODATA
| EV_ONESHOT
);
1937 filt_tmpfsvnode(struct knote
*kn
, long hint
)
1939 if (kn
->kn_sfflags
& hint
)
1940 kn
->kn_fflags
|= hint
;
1941 if (hint
== NOTE_REVOKE
) {
1942 kn
->kn_flags
|= (EV_EOF
| EV_NODATA
);
1945 return (kn
->kn_fflags
!= 0);
1949 * Helper to move VM pages between objects
1951 * NOTE: The vm_page_rename() dirties the page, so we can clear the
1952 * PG_NEED_COMMIT flag. If the pages are being moved into tn_aobj,
1953 * the pageout daemon will be able to page them out.
1956 tmpfs_move_pages_callback(vm_page_t p
, void *data
)
1958 struct rb_vm_page_scan_info
*info
= data
;
1962 if (vm_page_busy_try(p
, TRUE
)) {
1963 vm_page_sleep_busy(p
, TRUE
, "tpgmov");
1967 if (p
->object
!= info
->object
|| p
->pindex
!= pindex
) {
1972 vm_page_rename(p
, info
->dest_object
, pindex
);
1973 vm_page_clear_commit(p
);
1975 /* page automaticaly made dirty */
1982 tmpfs_move_pages(vm_object_t src
, vm_object_t dst
)
1984 struct rb_vm_page_scan_info info
;
1986 vm_object_hold(src
);
1987 vm_object_hold(dst
);
1989 info
.dest_object
= dst
;
1992 vm_page_rb_tree_RB_SCAN(&src
->rb_memq
, NULL
,
1993 tmpfs_move_pages_callback
, &info
);
1994 } while (info
.error
< 0);
1995 vm_object_drop(dst
);
1996 vm_object_drop(src
);
1999 /* --------------------------------------------------------------------- */
2002 * vnode operations vector used for files stored in a tmpfs file system.
2004 struct vop_ops tmpfs_vnode_vops
= {
2005 .vop_default
= vop_defaultop
,
2006 .vop_getpages
= vop_stdgetpages
,
2007 .vop_putpages
= vop_stdputpages
,
2008 .vop_ncreate
= tmpfs_ncreate
,
2009 .vop_nresolve
= tmpfs_nresolve
,
2010 .vop_nlookupdotdot
= tmpfs_nlookupdotdot
,
2011 .vop_nmknod
= tmpfs_nmknod
,
2012 .vop_open
= tmpfs_open
,
2013 .vop_close
= tmpfs_close
,
2014 .vop_access
= tmpfs_access
,
2015 .vop_getattr
= tmpfs_getattr
,
2016 .vop_setattr
= tmpfs_setattr
,
2017 .vop_read
= tmpfs_read
,
2018 .vop_write
= tmpfs_write
,
2019 .vop_fsync
= tmpfs_fsync
,
2020 .vop_mountctl
= tmpfs_mountctl
,
2021 .vop_nremove
= tmpfs_nremove
,
2022 .vop_nlink
= tmpfs_nlink
,
2023 .vop_nrename
= tmpfs_nrename
,
2024 .vop_nmkdir
= tmpfs_nmkdir
,
2025 .vop_nrmdir
= tmpfs_nrmdir
,
2026 .vop_nsymlink
= tmpfs_nsymlink
,
2027 .vop_readdir
= tmpfs_readdir
,
2028 .vop_readlink
= tmpfs_readlink
,
2029 .vop_inactive
= tmpfs_inactive
,
2030 .vop_reclaim
= tmpfs_reclaim
,
2031 .vop_print
= tmpfs_print
,
2032 .vop_pathconf
= tmpfs_pathconf
,
2033 .vop_bmap
= tmpfs_bmap
,
2034 .vop_strategy
= tmpfs_strategy
,
2035 .vop_advlock
= tmpfs_advlock
,
2036 .vop_kqfilter
= tmpfs_kqfilter