1 /* $NetBSD: tmpfs_vfsops.c,v 1.10 2005/12/11 12:24:29 christos Exp $ */
4 * Copyright (c) 2005 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
34 * Efficient memory file system.
36 * tmpfs is a file system that uses NetBSD's virtual memory sub-system
37 * (the well-known UVM) to store file data and metadata in an efficient
38 * way. This means that it does not follow the structure of an on-disk
39 * file system because it simply does not need to. Instead, it uses
40 * memory-specific data structures and algorithms to automatically
41 * allocate and release resources.
43 #include <sys/cdefs.h>
45 #include <sys/param.h>
46 #include <sys/limits.h>
48 #include <sys/mutex.h>
49 #include <sys/kernel.h>
51 #include <sys/systm.h>
52 #include <sys/sysctl.h>
53 #include <sys/objcache.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_param.h>
59 #include <vfs/tmpfs/tmpfs.h>
60 #include <vfs/tmpfs/tmpfs_vnops.h>
63 * Default permission for root node
65 #define TMPFS_DEFAULT_ROOT_MODE (S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
67 MALLOC_DEFINE(M_TMPFSMNT
, "tmpfs mount", "tmpfs mount structures");
68 MALLOC_DEFINE(M_TMPFSNAME
, "tmpfs name", "tmpfs file names");
69 MALLOC_DEFINE(M_TMPFS_DIRENT
, "tmpfs dirent", "tmpfs dirent structures");
70 MALLOC_DEFINE(M_TMPFS_NODE
, "tmpfs node", "tmpfs node structures");
72 /* --------------------------------------------------------------------- */
74 static int tmpfs_mount(struct mount
*, char *, caddr_t
, struct ucred
*);
75 static int tmpfs_unmount(struct mount
*, int);
76 static int tmpfs_root(struct mount
*, struct vnode
**);
77 static int tmpfs_fhtovp(struct mount
*, struct vnode
*, struct fid
*, struct vnode
**);
78 static int tmpfs_statfs(struct mount
*, struct statfs
*, struct ucred
*cred
);
80 /* --------------------------------------------------------------------- */
87 char *sname
= "vm.swap_info";
88 int soid
[SWI_MAXMIB
], oid
[2];
89 u_int unswdev
, total
, dmmax
, nswapdev
;
95 if (kernel_sysctlbyname("vm.dmmax", &dmmax
, &len
,
99 len
= sizeof(nswapdev
);
100 if (kernel_sysctlbyname("vm.nswapdev", &nswapdev
, &len
,
104 mibi
= (SWI_MAXMIB
- 1) * sizeof(int);
108 if (kernel_sysctl(oid
, 2,
109 soid
, &mibi
, (void *)sname
, strlen(sname
),
113 mibi
= (SWI_MAXMIB
- 1);
114 for (unswdev
= 0; unswdev
< nswapdev
; ++unswdev
) {
115 soid
[mibi
] = unswdev
;
116 len
= sizeof(struct swdevt
);
117 if (kernel_sysctl(soid
, mibi
+ 1, &swinfo
, &len
, NULL
, 0,
120 if (len
== sizeof(struct swdevt
))
121 total
+= (swinfo
.sw_nblks
- dmmax
);
127 /* --------------------------------------------------------------------- */
129 tmpfs_node_ctor(void *obj
, void *privdata
, int flags
)
131 struct tmpfs_node
*node
= (struct tmpfs_node
*)obj
;
138 node
->tn_vnode
= NULL
;
139 node
->tn_vpstate
= TMPFS_VNODE_WANT
;
145 tmpfs_node_dtor(void *obj
, void *privdata
)
147 struct tmpfs_node
*node
= (struct tmpfs_node
*)obj
;
148 node
->tn_type
= VNON
;
149 node
->tn_vpstate
= TMPFS_VNODE_DOOMED
;
153 tmpfs_node_init(void *args
, int flags
)
155 struct tmpfs_node
*node
= (struct tmpfs_node
*)objcache_malloc_alloc(args
, flags
);
158 lockinit(&node
->tn_interlock
, "tmpfs node interlock", 0, LK_CANRECURSE
);
159 node
->tn_gen
= karc4random();
165 tmpfs_node_fini(void *obj
, void *args
)
167 struct tmpfs_node
*node
= (struct tmpfs_node
*)obj
;
168 lockuninit(&node
->tn_interlock
);
169 objcache_malloc_free(obj
, args
);
172 struct objcache_malloc_args tmpfs_dirent_pool_malloc_args
=
173 { sizeof(struct tmpfs_dirent
), M_TMPFS_DIRENT
};
174 struct objcache_malloc_args tmpfs_node_pool_malloc_args
=
175 { sizeof(struct tmpfs_node
), M_TMPFS_NODE
};
178 tmpfs_mount(struct mount
*mp
, char *path
, caddr_t data
, struct ucred
*cred
)
180 struct tmpfs_mount
*tmp
;
181 struct tmpfs_node
*root
;
182 size_t pages
, mem_size
;
190 /* Root node attributes. */
191 uid_t root_uid
= cred
->cr_uid
;
192 gid_t root_gid
= cred
->cr_gid
;
193 mode_t root_mode
= (VREAD
| VWRITE
);
195 if (mp
->mnt_flag
& MNT_UPDATE
) {
196 /* XXX: There is no support yet to update file system
197 * settings. Should be added. */
202 kprintf("WARNING: TMPFS is considered to be a highly experimental "
203 "feature in DragonFly.\n");
205 /* Do not allow mounts if we do not have enough memory to preserve
206 * the minimum reserved pages. */
207 mem_size
= vmstats
.v_free_count
+ vmstats
.v_inactive_count
+ get_swpgtotal();
208 mem_size
-= mem_size
> vmstats
.v_wire_count
? vmstats
.v_wire_count
: mem_size
;
209 if (mem_size
< TMPFS_PAGES_RESERVED
)
213 * If mount by non-root, then verify that user has necessary
214 * permissions on the device.
216 if (cred
->cr_uid
!= 0) {
218 if ((mp
->mnt_flag
& MNT_RDONLY
) == 0)
222 /* Get the maximum number of memory pages this file system is
223 * allowed to use, based on the maximum size the user passed in
224 * the mount structure. A value of zero is treated as if the
225 * maximum available space was requested. */
226 if (size_max
< PAGE_SIZE
|| size_max
>= SIZE_MAX
)
229 pages
= howmany(size_max
, PAGE_SIZE
);
233 nodes
= 3 + pages
* PAGE_SIZE
/ 1024;
236 KKASSERT(nodes
>= 3);
238 /* Allocate the tmpfs mount structure and fill it. */
239 tmp
= (struct tmpfs_mount
*)kmalloc(sizeof(struct tmpfs_mount
),
240 M_TMPFSMNT
, M_WAITOK
| M_ZERO
);
242 lockinit(&(tmp
->allnode_lock
), "tmpfs allnode lock", 0, LK_CANRECURSE
);
243 tmp
->tm_nodes_max
= nodes
;
244 tmp
->tm_nodes_inuse
= 0;
245 tmp
->tm_maxfilesize
= (u_int64_t
)(vmstats
.v_page_count
+ get_swpgtotal()) * PAGE_SIZE
;
246 LIST_INIT(&tmp
->tm_nodes_used
);
248 tmp
->tm_pages_max
= pages
;
249 tmp
->tm_pages_used
= 0;
250 tmp
->tm_dirent_pool
= objcache_create( "tmpfs dirent cache",
253 objcache_malloc_alloc
, objcache_malloc_free
,
254 &tmpfs_dirent_pool_malloc_args
);
255 tmp
->tm_node_pool
= objcache_create( "tmpfs node cache",
257 tmpfs_node_ctor
, tmpfs_node_dtor
, NULL
,
258 tmpfs_node_init
, tmpfs_node_fini
,
259 &tmpfs_node_pool_malloc_args
);
261 /* Allocate the root node. */
262 error
= tmpfs_alloc_node(tmp
, VDIR
, root_uid
,
263 root_gid
, root_mode
& ALLPERMS
, NULL
, NULL
,
264 VNOVAL
, VNOVAL
, &root
);
266 if (error
!= 0 || root
== NULL
) {
267 objcache_destroy(tmp
->tm_node_pool
);
268 objcache_destroy(tmp
->tm_dirent_pool
);
269 kfree(tmp
, M_TMPFSMNT
);
272 KASSERT(root
->tn_id
>= 0, ("tmpfs root with invalid ino: %d", (int)root
->tn_id
));
275 mp
->mnt_flag
|= MNT_LOCAL
;
276 mp
->mnt_kern_flag
|= MNTK_RD_MPSAFE
| MNTK_WR_MPSAFE
| MNTK_GA_MPSAFE
|
277 MNTK_IN_MPSAFE
| MNTK_SG_MPSAFE
;
278 mp
->mnt_data
= (qaddr_t
)tmp
;
282 vfs_add_vnodeops(mp
, &tmpfs_vnode_vops
, &mp
->mnt_vn_norm_ops
);
283 vfs_add_vnodeops(mp
, &tmpfs_fifo_vops
, &mp
->mnt_vn_fifo_ops
);
285 copystr("tmpfs", mp
->mnt_stat
.f_mntfromname
, MNAMELEN
- 1, &size
);
286 bzero(mp
->mnt_stat
.f_mntfromname
+size
, MNAMELEN
- size
);
287 bzero(mp
->mnt_stat
.f_mntonname
, sizeof(mp
->mnt_stat
.f_mntonname
));
288 copyinstr(path
, mp
->mnt_stat
.f_mntonname
,
289 sizeof(mp
->mnt_stat
.f_mntonname
) -1,
292 tmpfs_statfs(mp
, &mp
->mnt_stat
, cred
);
297 /* --------------------------------------------------------------------- */
301 tmpfs_unmount(struct mount
*mp
, int mntflags
)
305 struct tmpfs_mount
*tmp
;
306 struct tmpfs_node
*node
;
309 /* Handle forced unmounts. */
310 if (mntflags
& MNT_FORCE
)
313 /* Finalize all pending I/O. */
314 error
= vflush(mp
, 0, flags
);
318 tmp
= VFS_TO_TMPFS(mp
);
320 /* Free all associated data. The loop iterates over the linked list
321 * we have containing all used nodes. For each of them that is
322 * a directory, we free all its directory entries. Note that after
323 * freeing a node, it will automatically go to the available list,
324 * so we will later have to iterate over it to release its items. */
325 node
= LIST_FIRST(&tmp
->tm_nodes_used
);
326 while (node
!= NULL
) {
327 struct tmpfs_node
*next
;
329 if (node
->tn_type
== VDIR
) {
330 struct tmpfs_dirent
*de
;
332 de
= TAILQ_FIRST(&node
->tn_dir
.tn_dirhead
);
334 struct tmpfs_dirent
*nde
;
336 nde
= TAILQ_NEXT(de
, td_entries
);
337 tmpfs_free_dirent(tmp
, de
, FALSE
);
339 node
->tn_size
-= sizeof(struct tmpfs_dirent
);
343 next
= LIST_NEXT(node
, tn_entries
);
348 node
->tn_vnode
= NULL
;
350 tmpfs_free_node(tmp
, node
);
354 objcache_destroy(tmp
->tm_dirent_pool
);
355 objcache_destroy(tmp
->tm_node_pool
);
357 lockuninit(&tmp
->allnode_lock
);
358 KKASSERT(tmp
->tm_pages_used
== 0);
359 KKASSERT(tmp
->tm_nodes_inuse
== 0);
361 /* Throw away the tmpfs_mount structure. */
362 kfree(mp
->mnt_data
, M_TMPFSMNT
);
365 mp
->mnt_flag
&= ~MNT_LOCAL
;
369 /* --------------------------------------------------------------------- */
372 tmpfs_root(struct mount
*mp
, struct vnode
**vpp
)
375 error
= tmpfs_alloc_vp(mp
, VFS_TO_TMPFS(mp
)->tm_root
, LK_EXCLUSIVE
, vpp
);
376 (*vpp
)->v_flag
|= VROOT
;
377 (*vpp
)->v_type
= VDIR
;
382 /* --------------------------------------------------------------------- */
385 tmpfs_fhtovp(struct mount
*mp
, struct vnode
*rootvp
, struct fid
*fhp
, struct vnode
**vpp
)
388 struct tmpfs_fid
*tfhp
;
389 struct tmpfs_mount
*tmp
;
390 struct tmpfs_node
*node
;
392 tmp
= VFS_TO_TMPFS(mp
);
394 tfhp
= (struct tmpfs_fid
*)fhp
;
395 if (tfhp
->tf_len
!= sizeof(struct tmpfs_fid
))
398 if (tfhp
->tf_id
>= tmp
->tm_nodes_max
)
404 LIST_FOREACH(node
, &tmp
->tm_nodes_used
, tn_entries
) {
405 if (node
->tn_id
== tfhp
->tf_id
&&
406 node
->tn_gen
== tfhp
->tf_gen
) {
414 return (tmpfs_alloc_vp(mp
, node
, LK_EXCLUSIVE
, vpp
));
419 /* --------------------------------------------------------------------- */
423 tmpfs_statfs(struct mount
*mp
, struct statfs
*sbp
, struct ucred
*cred
)
425 fsfilcnt_t freenodes
;
426 struct tmpfs_mount
*tmp
;
428 tmp
= VFS_TO_TMPFS(mp
);
430 sbp
->f_iosize
= PAGE_SIZE
;
431 sbp
->f_bsize
= PAGE_SIZE
;
433 sbp
->f_blocks
= TMPFS_PAGES_MAX(tmp
);
434 sbp
->f_bavail
= sbp
->f_bfree
= TMPFS_PAGES_AVAIL(tmp
);
436 freenodes
= MIN(tmp
->tm_nodes_max
- tmp
->tm_nodes_inuse
,
437 TMPFS_PAGES_AVAIL(tmp
) * PAGE_SIZE
/ sizeof(struct tmpfs_node
));
439 sbp
->f_files
= freenodes
+ tmp
->tm_nodes_inuse
;
440 sbp
->f_ffree
= freenodes
;
441 /* sbp->f_owner = tmp->tn_uid; */
446 /* --------------------------------------------------------------------- */
449 * tmpfs vfs operations.
452 static struct vfsops tmpfs_vfsops
= {
453 .vfs_mount
= tmpfs_mount
,
454 .vfs_unmount
= tmpfs_unmount
,
455 .vfs_root
= tmpfs_root
,
456 .vfs_statfs
= tmpfs_statfs
,
457 .vfs_fhtovp
= tmpfs_fhtovp
,
458 .vfs_sync
= vfs_stdsync
461 VFS_SET(tmpfs_vfsops
, tmpfs
, 0);