kernel - TMPFS - Initial port of NetBSD's tmpfs
[dragonfly.git] / sys / vfs / tmpfs / tmpfs_vfsops.c
blob3dde5a908d67e5b9123f580c4db50da839cccb12
1 /* $NetBSD: tmpfs_vfsops.c,v 1.10 2005/12/11 12:24:29 christos Exp $ */
3 /*-
4 * Copyright (c) 2005 The NetBSD Foundation, Inc.
5 * All rights reserved.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * 2005 program.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
34 * Efficient memory file system.
36 * tmpfs is a file system that uses NetBSD's virtual memory sub-system
37 * (the well-known UVM) to store file data and metadata in an efficient
38 * way. This means that it does not follow the structure of an on-disk
39 * file system because it simply does not need to. Instead, it uses
40 * memory-specific data structures and algorithms to automatically
41 * allocate and release resources.
43 #include <sys/cdefs.h>
44 #include <sys/conf.h>
45 #include <sys/param.h>
46 #include <sys/limits.h>
47 #include <sys/lock.h>
48 #include <sys/mutex.h>
49 #include <sys/kernel.h>
50 #include <sys/stat.h>
51 #include <sys/systm.h>
52 #include <sys/sysctl.h>
53 #include <sys/objcache.h>
55 #include <vm/vm.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_param.h>
59 #include <vfs/tmpfs/tmpfs.h>
60 #include <vfs/tmpfs/tmpfs_vnops.h>
63 * Default permission for root node
65 #define TMPFS_DEFAULT_ROOT_MODE (S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
67 MALLOC_DEFINE(M_TMPFSMNT, "tmpfs mount", "tmpfs mount structures");
68 MALLOC_DEFINE(M_TMPFSNAME, "tmpfs name", "tmpfs file names");
69 MALLOC_DEFINE(M_TMPFS_DIRENT, "tmpfs dirent", "tmpfs dirent structures");
70 MALLOC_DEFINE(M_TMPFS_NODE, "tmpfs node", "tmpfs node structures");
72 /* --------------------------------------------------------------------- */
74 static int tmpfs_mount(struct mount *, char *, caddr_t, struct ucred *);
75 static int tmpfs_unmount(struct mount *, int);
76 static int tmpfs_root(struct mount *, struct vnode **);
77 static int tmpfs_fhtovp(struct mount *, struct vnode *, struct fid *, struct vnode **);
78 static int tmpfs_statfs(struct mount *, struct statfs *, struct ucred *cred);
80 /* --------------------------------------------------------------------- */
82 #define SWI_MAXMIB 3
83 static u_int
84 get_swpgtotal(void)
86 struct swdevt swinfo;
87 char *sname = "vm.swap_info";
88 int soid[SWI_MAXMIB], oid[2];
89 u_int unswdev, total, dmmax, nswapdev;
90 size_t mibi, len;
92 total = 0;
94 len = sizeof(dmmax);
95 if (kernel_sysctlbyname("vm.dmmax", &dmmax, &len,
96 NULL, 0, NULL) != 0)
97 return total;
99 len = sizeof(nswapdev);
100 if (kernel_sysctlbyname("vm.nswapdev", &nswapdev, &len,
101 NULL, 0, NULL) != 0)
102 return total;
104 mibi = (SWI_MAXMIB - 1) * sizeof(int);
105 oid[0] = 0;
106 oid[1] = 3;
108 if (kernel_sysctl(oid, 2,
109 soid, &mibi, (void *)sname, strlen(sname),
110 NULL) != 0)
111 return total;
113 mibi = (SWI_MAXMIB - 1);
114 for (unswdev = 0; unswdev < nswapdev; ++unswdev) {
115 soid[mibi] = unswdev;
116 len = sizeof(struct swdevt);
117 if (kernel_sysctl(soid, mibi + 1, &swinfo, &len, NULL, 0,
118 NULL) != 0)
119 return total;
120 if (len == sizeof(struct swdevt))
121 total += (swinfo.sw_nblks - dmmax);
124 return total;
127 /* --------------------------------------------------------------------- */
128 static int
129 tmpfs_node_ctor(void *obj, void *privdata, int flags)
131 struct tmpfs_node *node = (struct tmpfs_node *)obj;
133 node->tn_gen++;
134 node->tn_size = 0;
135 node->tn_status = 0;
136 node->tn_flags = 0;
137 node->tn_links = 0;
138 node->tn_vnode = NULL;
139 node->tn_vpstate = TMPFS_VNODE_WANT;
141 return (1);
144 static void
145 tmpfs_node_dtor(void *obj, void *privdata)
147 struct tmpfs_node *node = (struct tmpfs_node *)obj;
148 node->tn_type = VNON;
149 node->tn_vpstate = TMPFS_VNODE_DOOMED;
152 static void*
153 tmpfs_node_init(void *args, int flags)
155 struct tmpfs_node *node = (struct tmpfs_node *)objcache_malloc_alloc(args, flags);
156 node->tn_id = 0;
158 lockinit(&node->tn_interlock, "tmpfs node interlock", 0, LK_CANRECURSE);
159 node->tn_gen = karc4random();
161 return node;
164 static void
165 tmpfs_node_fini(void *obj, void *args)
167 struct tmpfs_node *node = (struct tmpfs_node *)obj;
168 lockuninit(&node->tn_interlock);
169 objcache_malloc_free(obj, args);
172 struct objcache_malloc_args tmpfs_dirent_pool_malloc_args =
173 { sizeof(struct tmpfs_dirent), M_TMPFS_DIRENT };
174 struct objcache_malloc_args tmpfs_node_pool_malloc_args =
175 { sizeof(struct tmpfs_node), M_TMPFS_NODE };
177 static int
178 tmpfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred)
180 struct tmpfs_mount *tmp;
181 struct tmpfs_node *root;
182 size_t pages, mem_size;
183 ino_t nodes;
184 int error;
185 /* Size counters. */
186 ino_t nodes_max = 0;
187 size_t size_max = 0;
188 size_t size;
190 /* Root node attributes. */
191 uid_t root_uid = cred->cr_uid;
192 gid_t root_gid = cred->cr_gid;
193 mode_t root_mode = (VREAD | VWRITE);
195 if (mp->mnt_flag & MNT_UPDATE) {
196 /* XXX: There is no support yet to update file system
197 * settings. Should be added. */
199 return EOPNOTSUPP;
202 kprintf("WARNING: TMPFS is considered to be a highly experimental "
203 "feature in DragonFly.\n");
205 /* Do not allow mounts if we do not have enough memory to preserve
206 * the minimum reserved pages. */
207 mem_size = vmstats.v_free_count + vmstats.v_inactive_count + get_swpgtotal();
208 mem_size -= mem_size > vmstats.v_wire_count ? vmstats.v_wire_count : mem_size;
209 if (mem_size < TMPFS_PAGES_RESERVED)
210 return ENOSPC;
213 * If mount by non-root, then verify that user has necessary
214 * permissions on the device.
216 if (cred->cr_uid != 0) {
217 root_mode = VREAD;
218 if ((mp->mnt_flag & MNT_RDONLY) == 0)
219 root_mode |= VWRITE;
222 /* Get the maximum number of memory pages this file system is
223 * allowed to use, based on the maximum size the user passed in
224 * the mount structure. A value of zero is treated as if the
225 * maximum available space was requested. */
226 if (size_max < PAGE_SIZE || size_max >= SIZE_MAX)
227 pages = SIZE_MAX;
228 else
229 pages = howmany(size_max, PAGE_SIZE);
230 KKASSERT(pages > 0);
232 if (nodes_max <= 3)
233 nodes = 3 + pages * PAGE_SIZE / 1024;
234 else
235 nodes = nodes_max;
236 KKASSERT(nodes >= 3);
238 /* Allocate the tmpfs mount structure and fill it. */
239 tmp = (struct tmpfs_mount *)kmalloc(sizeof(struct tmpfs_mount),
240 M_TMPFSMNT, M_WAITOK | M_ZERO);
242 lockinit(&(tmp->allnode_lock), "tmpfs allnode lock", 0, LK_CANRECURSE);
243 tmp->tm_nodes_max = nodes;
244 tmp->tm_nodes_inuse = 0;
245 tmp->tm_maxfilesize = (u_int64_t)(vmstats.v_page_count + get_swpgtotal()) * PAGE_SIZE;
246 LIST_INIT(&tmp->tm_nodes_used);
248 tmp->tm_pages_max = pages;
249 tmp->tm_pages_used = 0;
250 tmp->tm_dirent_pool = objcache_create( "tmpfs dirent cache",
251 0, 0,
252 NULL, NULL, NULL,
253 objcache_malloc_alloc, objcache_malloc_free,
254 &tmpfs_dirent_pool_malloc_args);
255 tmp->tm_node_pool = objcache_create( "tmpfs node cache",
256 0, 0,
257 tmpfs_node_ctor, tmpfs_node_dtor, NULL,
258 tmpfs_node_init, tmpfs_node_fini,
259 &tmpfs_node_pool_malloc_args);
261 /* Allocate the root node. */
262 error = tmpfs_alloc_node(tmp, VDIR, root_uid,
263 root_gid, root_mode & ALLPERMS, NULL, NULL,
264 VNOVAL, VNOVAL, &root);
266 if (error != 0 || root == NULL) {
267 objcache_destroy(tmp->tm_node_pool);
268 objcache_destroy(tmp->tm_dirent_pool);
269 kfree(tmp, M_TMPFSMNT);
270 return error;
272 KASSERT(root->tn_id >= 0, ("tmpfs root with invalid ino: %d", (int)root->tn_id));
273 tmp->tm_root = root;
275 mp->mnt_flag |= MNT_LOCAL;
276 mp->mnt_kern_flag |= MNTK_RD_MPSAFE | MNTK_WR_MPSAFE | MNTK_GA_MPSAFE |
277 MNTK_IN_MPSAFE | MNTK_SG_MPSAFE;
278 mp->mnt_data = (qaddr_t)tmp;
279 vfs_getnewfsid(mp);
282 vfs_add_vnodeops(mp, &tmpfs_vnode_vops, &mp->mnt_vn_norm_ops);
283 vfs_add_vnodeops(mp, &tmpfs_fifo_vops, &mp->mnt_vn_fifo_ops);
285 copystr("tmpfs", mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
286 bzero(mp->mnt_stat.f_mntfromname +size, MNAMELEN - size);
287 bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname));
288 copyinstr(path, mp->mnt_stat.f_mntonname,
289 sizeof(mp->mnt_stat.f_mntonname) -1,
290 &size);
292 tmpfs_statfs(mp, &mp->mnt_stat, cred);
294 return 0;
297 /* --------------------------------------------------------------------- */
299 /* ARGSUSED2 */
300 static int
301 tmpfs_unmount(struct mount *mp, int mntflags)
303 int error;
304 int flags = 0;
305 struct tmpfs_mount *tmp;
306 struct tmpfs_node *node;
307 struct vnode *vp;
309 /* Handle forced unmounts. */
310 if (mntflags & MNT_FORCE)
311 flags |= FORCECLOSE;
313 /* Finalize all pending I/O. */
314 error = vflush(mp, 0, flags);
315 if (error != 0)
316 return error;
318 tmp = VFS_TO_TMPFS(mp);
320 /* Free all associated data. The loop iterates over the linked list
321 * we have containing all used nodes. For each of them that is
322 * a directory, we free all its directory entries. Note that after
323 * freeing a node, it will automatically go to the available list,
324 * so we will later have to iterate over it to release its items. */
325 node = LIST_FIRST(&tmp->tm_nodes_used);
326 while (node != NULL) {
327 struct tmpfs_node *next;
329 if (node->tn_type == VDIR) {
330 struct tmpfs_dirent *de;
332 de = TAILQ_FIRST(&node->tn_dir.tn_dirhead);
333 while (de != NULL) {
334 struct tmpfs_dirent *nde;
336 nde = TAILQ_NEXT(de, td_entries);
337 tmpfs_free_dirent(tmp, de, FALSE);
338 de = nde;
339 node->tn_size -= sizeof(struct tmpfs_dirent);
343 next = LIST_NEXT(node, tn_entries);
344 vp = node->tn_vnode;
345 if (vp != NULL) {
346 tmpfs_free_vp(vp);
347 vrecycle(vp);
348 node->tn_vnode = NULL;
350 tmpfs_free_node(tmp, node);
351 node = next;
354 objcache_destroy(tmp->tm_dirent_pool);
355 objcache_destroy(tmp->tm_node_pool);
357 lockuninit(&tmp->allnode_lock);
358 KKASSERT(tmp->tm_pages_used == 0);
359 KKASSERT(tmp->tm_nodes_inuse == 0);
361 /* Throw away the tmpfs_mount structure. */
362 kfree(mp->mnt_data, M_TMPFSMNT);
363 mp->mnt_data = NULL;
365 mp->mnt_flag &= ~MNT_LOCAL;
366 return 0;
369 /* --------------------------------------------------------------------- */
371 static int
372 tmpfs_root(struct mount *mp, struct vnode **vpp)
374 int error;
375 error = tmpfs_alloc_vp(mp, VFS_TO_TMPFS(mp)->tm_root, LK_EXCLUSIVE, vpp);
376 (*vpp)->v_flag |= VROOT;
377 (*vpp)->v_type = VDIR;
379 return error;
382 /* --------------------------------------------------------------------- */
384 static int
385 tmpfs_fhtovp(struct mount *mp, struct vnode *rootvp, struct fid *fhp, struct vnode **vpp)
387 boolean_t found;
388 struct tmpfs_fid *tfhp;
389 struct tmpfs_mount *tmp;
390 struct tmpfs_node *node;
392 tmp = VFS_TO_TMPFS(mp);
394 tfhp = (struct tmpfs_fid *)fhp;
395 if (tfhp->tf_len != sizeof(struct tmpfs_fid))
396 return EINVAL;
398 if (tfhp->tf_id >= tmp->tm_nodes_max)
399 return EINVAL;
401 found = FALSE;
403 TMPFS_LOCK(tmp);
404 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
405 if (node->tn_id == tfhp->tf_id &&
406 node->tn_gen == tfhp->tf_gen) {
407 found = TRUE;
408 break;
411 TMPFS_UNLOCK(tmp);
413 if (found)
414 return (tmpfs_alloc_vp(mp, node, LK_EXCLUSIVE, vpp));
416 return (EINVAL);
419 /* --------------------------------------------------------------------- */
421 /* ARGSUSED2 */
422 static int
423 tmpfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
425 fsfilcnt_t freenodes;
426 struct tmpfs_mount *tmp;
428 tmp = VFS_TO_TMPFS(mp);
430 sbp->f_iosize = PAGE_SIZE;
431 sbp->f_bsize = PAGE_SIZE;
433 sbp->f_blocks = TMPFS_PAGES_MAX(tmp);
434 sbp->f_bavail = sbp->f_bfree = TMPFS_PAGES_AVAIL(tmp);
436 freenodes = MIN(tmp->tm_nodes_max - tmp->tm_nodes_inuse,
437 TMPFS_PAGES_AVAIL(tmp) * PAGE_SIZE / sizeof(struct tmpfs_node));
439 sbp->f_files = freenodes + tmp->tm_nodes_inuse;
440 sbp->f_ffree = freenodes;
441 /* sbp->f_owner = tmp->tn_uid; */
443 return 0;
446 /* --------------------------------------------------------------------- */
449 * tmpfs vfs operations.
452 static struct vfsops tmpfs_vfsops = {
453 .vfs_mount = tmpfs_mount,
454 .vfs_unmount = tmpfs_unmount,
455 .vfs_root = tmpfs_root,
456 .vfs_statfs = tmpfs_statfs,
457 .vfs_fhtovp = tmpfs_fhtovp,
458 .vfs_sync = vfs_stdsync
461 VFS_SET(tmpfs_vfsops, tmpfs, 0);