kernel - TMPFS - Bug fixing pass - directory hierarchy
[dragonfly.git] / sys / vfs / tmpfs / tmpfs_vfsops.c
blobce0165da23e196b39e6424aabfd6e677a0d51e7f
1 /* $NetBSD: tmpfs_vfsops.c,v 1.10 2005/12/11 12:24:29 christos Exp $ */
3 /*-
4 * Copyright (c) 2005 The NetBSD Foundation, Inc.
5 * All rights reserved.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9 * 2005 program.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
34 * Efficient memory file system.
36 * tmpfs is a file system that uses NetBSD's virtual memory sub-system
37 * (the well-known UVM) to store file data and metadata in an efficient
38 * way. This means that it does not follow the structure of an on-disk
39 * file system because it simply does not need to. Instead, it uses
40 * memory-specific data structures and algorithms to automatically
41 * allocate and release resources.
43 #include <sys/cdefs.h>
44 #include <sys/conf.h>
45 #include <sys/param.h>
46 #include <sys/limits.h>
47 #include <sys/lock.h>
48 #include <sys/mutex.h>
49 #include <sys/kernel.h>
50 #include <sys/stat.h>
51 #include <sys/systm.h>
52 #include <sys/sysctl.h>
53 #include <sys/objcache.h>
55 #include <vm/vm.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_param.h>
59 #include <vfs/tmpfs/tmpfs.h>
60 #include <vfs/tmpfs/tmpfs_vnops.h>
63 * Default permission for root node
65 #define TMPFS_DEFAULT_ROOT_MODE (S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
67 MALLOC_DEFINE(M_TMPFSMNT, "tmpfs mount", "tmpfs mount structures");
68 MALLOC_DEFINE(M_TMPFSNAME, "tmpfs name", "tmpfs file names");
69 MALLOC_DEFINE(M_TMPFS_DIRENT, "tmpfs dirent", "tmpfs dirent structures");
70 MALLOC_DEFINE(M_TMPFS_NODE, "tmpfs node", "tmpfs node structures");
72 /* --------------------------------------------------------------------- */
74 static int tmpfs_mount(struct mount *, char *, caddr_t, struct ucred *);
75 static int tmpfs_unmount(struct mount *, int);
76 static int tmpfs_root(struct mount *, struct vnode **);
77 static int tmpfs_fhtovp(struct mount *, struct vnode *, struct fid *, struct vnode **);
78 static int tmpfs_statfs(struct mount *, struct statfs *, struct ucred *cred);
80 /* --------------------------------------------------------------------- */
82 #define SWI_MAXMIB 3
83 static u_int
84 get_swpgtotal(void)
86 struct swdevt swinfo;
87 char *sname = "vm.swap_info";
88 int soid[SWI_MAXMIB], oid[2];
89 u_int unswdev, total, dmmax, nswapdev;
90 size_t mibi, len;
92 total = 0;
94 len = sizeof(dmmax);
95 if (kernel_sysctlbyname("vm.dmmax", &dmmax, &len,
96 NULL, 0, NULL) != 0)
97 return total;
99 len = sizeof(nswapdev);
100 if (kernel_sysctlbyname("vm.nswapdev", &nswapdev, &len,
101 NULL, 0, NULL) != 0)
102 return total;
104 mibi = (SWI_MAXMIB - 1) * sizeof(int);
105 oid[0] = 0;
106 oid[1] = 3;
108 if (kernel_sysctl(oid, 2,
109 soid, &mibi, (void *)sname, strlen(sname),
110 NULL) != 0)
111 return total;
113 mibi = (SWI_MAXMIB - 1);
114 for (unswdev = 0; unswdev < nswapdev; ++unswdev) {
115 soid[mibi] = unswdev;
116 len = sizeof(struct swdevt);
117 if (kernel_sysctl(soid, mibi + 1, &swinfo, &len, NULL, 0,
118 NULL) != 0)
119 return total;
120 if (len == sizeof(struct swdevt))
121 total += (swinfo.sw_nblks - dmmax);
124 return total;
127 /* --------------------------------------------------------------------- */
129 tmpfs_node_ctor(void *obj, void *privdata, int flags)
131 struct tmpfs_node *node = (struct tmpfs_node *)obj;
133 node->tn_gen++;
134 node->tn_size = 0;
135 node->tn_status = 0;
136 node->tn_flags = 0;
137 node->tn_links = 0;
138 node->tn_vnode = NULL;
139 node->tn_vpstate = TMPFS_VNODE_WANT;
140 bzero(&node->tn_spec, sizeof(node->tn_spec));
142 return (1);
145 static void
146 tmpfs_node_dtor(void *obj, void *privdata)
148 struct tmpfs_node *node = (struct tmpfs_node *)obj;
149 node->tn_type = VNON;
150 node->tn_vpstate = TMPFS_VNODE_DOOMED;
153 static void*
154 tmpfs_node_init(void *args, int flags)
156 struct tmpfs_node *node = (struct tmpfs_node *)objcache_malloc_alloc(args, flags);
157 node->tn_id = 0;
159 lockinit(&node->tn_interlock, "tmpfs node interlock", 0, LK_CANRECURSE);
160 node->tn_gen = karc4random();
162 return node;
165 static void
166 tmpfs_node_fini(void *obj, void *args)
168 struct tmpfs_node *node = (struct tmpfs_node *)obj;
169 lockuninit(&node->tn_interlock);
170 objcache_malloc_free(obj, args);
173 struct objcache_malloc_args tmpfs_dirent_pool_malloc_args =
174 { sizeof(struct tmpfs_dirent), M_TMPFS_DIRENT };
175 struct objcache_malloc_args tmpfs_node_pool_malloc_args =
176 { sizeof(struct tmpfs_node), M_TMPFS_NODE };
178 static int
179 tmpfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred)
181 struct tmpfs_mount *tmp;
182 struct tmpfs_node *root;
183 size_t pages, mem_size;
184 ino_t nodes;
185 int error;
186 /* Size counters. */
187 ino_t nodes_max = 0;
188 size_t size_max = 0;
189 size_t size;
191 /* Root node attributes. */
192 uid_t root_uid = cred->cr_uid;
193 gid_t root_gid = cred->cr_gid;
194 mode_t root_mode = (VREAD | VWRITE);
196 if (mp->mnt_flag & MNT_UPDATE) {
197 /* XXX: There is no support yet to update file system
198 * settings. Should be added. */
200 return EOPNOTSUPP;
203 kprintf("WARNING: TMPFS is considered to be a highly experimental "
204 "feature in DragonFly.\n");
206 /* Do not allow mounts if we do not have enough memory to preserve
207 * the minimum reserved pages. */
208 mem_size = vmstats.v_free_count + vmstats.v_inactive_count + get_swpgtotal();
209 mem_size -= mem_size > vmstats.v_wire_count ? vmstats.v_wire_count : mem_size;
210 if (mem_size < TMPFS_PAGES_RESERVED)
211 return ENOSPC;
214 * If mount by non-root, then verify that user has necessary
215 * permissions on the device.
217 if (cred->cr_uid != 0) {
218 root_mode = VREAD;
219 if ((mp->mnt_flag & MNT_RDONLY) == 0)
220 root_mode |= VWRITE;
223 /* Get the maximum number of memory pages this file system is
224 * allowed to use, based on the maximum size the user passed in
225 * the mount structure. A value of zero is treated as if the
226 * maximum available space was requested. */
227 if (size_max < PAGE_SIZE || size_max >= SIZE_MAX)
228 pages = SIZE_MAX;
229 else
230 pages = howmany(size_max, PAGE_SIZE);
231 KKASSERT(pages > 0);
233 if (nodes_max <= 3)
234 nodes = 3 + pages * PAGE_SIZE / 1024;
235 else
236 nodes = nodes_max;
237 KKASSERT(nodes >= 3);
239 /* Allocate the tmpfs mount structure and fill it. */
240 tmp = (struct tmpfs_mount *)kmalloc(sizeof(struct tmpfs_mount),
241 M_TMPFSMNT, M_WAITOK | M_ZERO);
243 lockinit(&(tmp->allnode_lock), "tmpfs allnode lock", 0, LK_CANRECURSE);
244 tmp->tm_nodes_max = nodes;
245 tmp->tm_nodes_inuse = 0;
246 tmp->tm_maxfilesize = (u_int64_t)(vmstats.v_page_count + get_swpgtotal()) * PAGE_SIZE;
247 LIST_INIT(&tmp->tm_nodes_used);
249 tmp->tm_pages_max = pages;
250 tmp->tm_pages_used = 0;
251 tmp->tm_dirent_pool = objcache_create( "tmpfs dirent cache",
252 0, 0,
253 NULL, NULL, NULL,
254 objcache_malloc_alloc, objcache_malloc_free,
255 &tmpfs_dirent_pool_malloc_args);
256 tmp->tm_node_pool = objcache_create( "tmpfs node cache",
257 0, 0,
258 tmpfs_node_ctor, tmpfs_node_dtor, NULL,
259 tmpfs_node_init, tmpfs_node_fini,
260 &tmpfs_node_pool_malloc_args);
262 /* Allocate the root node. */
263 error = tmpfs_alloc_node(tmp, VDIR, root_uid,
264 root_gid, root_mode & ALLPERMS, NULL, NULL,
265 VNOVAL, VNOVAL, &root);
267 if (error != 0 || root == NULL) {
268 objcache_destroy(tmp->tm_node_pool);
269 objcache_destroy(tmp->tm_dirent_pool);
270 kfree(tmp, M_TMPFSMNT);
271 return error;
273 KASSERT(root->tn_id >= 0, ("tmpfs root with invalid ino: %d", (int)root->tn_id));
274 tmp->tm_root = root;
276 mp->mnt_flag |= MNT_LOCAL;
277 #if 0
278 mp->mnt_kern_flag |= MNTK_RD_MPSAFE | MNTK_WR_MPSAFE | MNTK_GA_MPSAFE |
279 MNTK_IN_MPSAFE | MNTK_SG_MPSAFE;
280 #endif
281 mp->mnt_data = (qaddr_t)tmp;
282 vfs_getnewfsid(mp);
285 vfs_add_vnodeops(mp, &tmpfs_vnode_vops, &mp->mnt_vn_norm_ops);
286 vfs_add_vnodeops(mp, &tmpfs_fifo_vops, &mp->mnt_vn_fifo_ops);
288 copystr("tmpfs", mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
289 bzero(mp->mnt_stat.f_mntfromname +size, MNAMELEN - size);
290 bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname));
291 copyinstr(path, mp->mnt_stat.f_mntonname,
292 sizeof(mp->mnt_stat.f_mntonname) -1,
293 &size);
295 tmpfs_statfs(mp, &mp->mnt_stat, cred);
297 return 0;
300 /* --------------------------------------------------------------------- */
302 /* ARGSUSED2 */
303 static int
304 tmpfs_unmount(struct mount *mp, int mntflags)
306 int error;
307 int flags = 0;
308 int found;
309 struct tmpfs_mount *tmp;
310 struct tmpfs_node *node;
312 /* Handle forced unmounts. */
313 if (mntflags & MNT_FORCE)
314 flags |= FORCECLOSE;
316 /* Tell vflush->vinvalbuf->fsync to throw away data */
317 tmp = VFS_TO_TMPFS(mp);
318 tmp->tm_flags |= TMPFS_FLAG_UNMOUNTING;
320 /* Finalize all pending I/O. */
321 error = vflush(mp, 0, flags);
322 if (error != 0)
323 return error;
326 * First pass get rid of all the directory entries and
327 * vnode associations. The directory structure will
328 * remain via the extra link count representing tn_dir.tn_parent.
330 * No vnodes should remain after the vflush above.
332 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
333 ++node->tn_links;
334 TMPFS_NODE_LOCK(node);
335 if (node->tn_type == VDIR) {
336 struct tmpfs_dirent *de;
338 while (!TAILQ_EMPTY(&node->tn_dir.tn_dirhead)) {
339 de = TAILQ_FIRST(&node->tn_dir.tn_dirhead);
340 tmpfs_free_dirent(tmp, de);
341 node->tn_size -= sizeof(struct tmpfs_dirent);
344 KKASSERT(node->tn_vnode == NULL);
345 #if 0
346 vp = node->tn_vnode;
347 if (vp != NULL) {
348 tmpfs_free_vp(vp);
349 vrecycle(vp);
350 node->tn_vnode = NULL;
352 #endif
353 TMPFS_NODE_UNLOCK(node);
354 --node->tn_links;
358 * Now get rid of all nodes. We can remove any node with a
359 * link count of 0 or any directory node with a link count of
360 * 1. The parents will not be destroyed until all their children
361 * have been destroyed.
363 * Recursion in tmpfs_free_node() can further modify the list so
364 * we cannot use a next pointer here.
366 * The root node will be destroyed by this loop (it will be last).
368 while (!LIST_EMPTY(&tmp->tm_nodes_used)) {
369 found = 0;
370 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
371 if (node->tn_links == 0 ||
372 (node->tn_links == 1 && node->tn_type == VDIR)) {
373 TMPFS_NODE_LOCK(node);
374 tmpfs_free_node(tmp, node);
375 /* eats lock */
376 found = 1;
377 break;
380 if (found == 0) {
381 kprintf("tmpfs: Cannot free entire node tree!");
382 break;
386 KKASSERT(tmp->tm_root == NULL);
388 objcache_destroy(tmp->tm_dirent_pool);
389 objcache_destroy(tmp->tm_node_pool);
391 lockuninit(&tmp->allnode_lock);
392 KKASSERT(tmp->tm_pages_used == 0);
393 KKASSERT(tmp->tm_nodes_inuse == 0);
395 /* Throw away the tmpfs_mount structure. */
396 kfree(tmp, M_TMPFSMNT);
397 mp->mnt_data = NULL;
399 mp->mnt_flag &= ~MNT_LOCAL;
400 return 0;
403 /* --------------------------------------------------------------------- */
405 static int
406 tmpfs_root(struct mount *mp, struct vnode **vpp)
408 struct tmpfs_mount *tmp;
409 int error;
411 tmp = VFS_TO_TMPFS(mp);
412 if (tmp->tm_root == NULL) {
413 kprintf("tmpfs_root: called without root node %p\n", mp);
414 print_backtrace();
415 *vpp = NULL;
416 error = EINVAL;
417 } else {
418 error = tmpfs_alloc_vp(mp, tmp->tm_root, LK_EXCLUSIVE, vpp);
419 (*vpp)->v_flag |= VROOT;
420 (*vpp)->v_type = VDIR;
422 return error;
425 /* --------------------------------------------------------------------- */
427 static int
428 tmpfs_fhtovp(struct mount *mp, struct vnode *rootvp, struct fid *fhp, struct vnode **vpp)
430 boolean_t found;
431 struct tmpfs_fid *tfhp;
432 struct tmpfs_mount *tmp;
433 struct tmpfs_node *node;
435 tmp = VFS_TO_TMPFS(mp);
437 tfhp = (struct tmpfs_fid *)fhp;
438 if (tfhp->tf_len != sizeof(struct tmpfs_fid))
439 return EINVAL;
441 if (tfhp->tf_id >= tmp->tm_nodes_max)
442 return EINVAL;
444 found = FALSE;
446 TMPFS_LOCK(tmp);
447 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
448 if (node->tn_id == tfhp->tf_id &&
449 node->tn_gen == tfhp->tf_gen) {
450 found = TRUE;
451 break;
454 TMPFS_UNLOCK(tmp);
456 if (found)
457 return (tmpfs_alloc_vp(mp, node, LK_EXCLUSIVE, vpp));
459 return (EINVAL);
462 /* --------------------------------------------------------------------- */
464 /* ARGSUSED2 */
465 static int
466 tmpfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
468 fsfilcnt_t freenodes;
469 struct tmpfs_mount *tmp;
471 tmp = VFS_TO_TMPFS(mp);
473 sbp->f_iosize = PAGE_SIZE;
474 sbp->f_bsize = PAGE_SIZE;
476 sbp->f_blocks = TMPFS_PAGES_MAX(tmp);
477 sbp->f_bavail = sbp->f_bfree = TMPFS_PAGES_AVAIL(tmp);
479 freenodes = MIN(tmp->tm_nodes_max - tmp->tm_nodes_inuse,
480 TMPFS_PAGES_AVAIL(tmp) * PAGE_SIZE / sizeof(struct tmpfs_node));
482 sbp->f_files = freenodes + tmp->tm_nodes_inuse;
483 sbp->f_ffree = freenodes;
484 /* sbp->f_owner = tmp->tn_uid; */
486 return 0;
489 /* --------------------------------------------------------------------- */
492 * tmpfs vfs operations.
495 static struct vfsops tmpfs_vfsops = {
496 .vfs_mount = tmpfs_mount,
497 .vfs_unmount = tmpfs_unmount,
498 .vfs_root = tmpfs_root,
499 .vfs_statfs = tmpfs_statfs,
500 .vfs_fhtovp = tmpfs_fhtovp,
501 .vfs_sync = vfs_stdsync
504 VFS_SET(tmpfs_vfsops, tmpfs, 0);