Import 2.3.99pre2-1
[davej-history.git] / fs / super.c
blobd54e850dfb83c8e9ba400317a81852a4df6f1327
1 /*
2 * linux/fs/super.c
4 * Copyright (C) 1991, 1992 Linus Torvalds
6 * super.c contains code to handle: - mount structures
7 * - super-block tables.
8 * - mount system call
9 * - umount system call
11 * Added options to /proc/mounts
12 * Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996.
14 * GK 2/5/95 - Changed to support mounting the root fs via NFS
16 * Added kerneld support: Jacques Gelinas and Bjorn Ekwall
17 * Added change_root: Werner Almesberger & Hans Lermen, Feb '96
18 * Added devfs support: Richard Gooch <rgooch@atnf.csiro.au>, 13-JAN-1998
21 #include <linux/config.h>
22 #include <linux/string.h>
23 #include <linux/malloc.h>
24 #include <linux/locks.h>
25 #include <linux/smp_lock.h>
26 #include <linux/devfs_fs_kernel.h>
27 #include <linux/fd.h>
28 #include <linux/init.h>
29 #include <linux/quotaops.h>
30 #include <linux/acct.h>
32 #include <asm/uaccess.h>
34 #include <linux/nfs_fs.h>
35 #include <linux/nfs_fs_sb.h>
36 #include <linux/nfs_mount.h>
38 #include <linux/kmod.h>
39 #define __NO_VERSION__
40 #include <linux/module.h>
43 * We use a semaphore to synchronize all mount/umount
44 * activity - imagine the mess if we have a race between
45 * unmounting a filesystem and re-mounting it (or something
46 * else).
48 static DECLARE_MUTEX(mount_sem);
50 extern void wait_for_keypress(void);
52 extern int root_mountflags;
54 static int do_remount_sb(struct super_block *sb, int flags, char * data);
56 /* this is initialized in init/main.c */
57 kdev_t ROOT_DEV;
59 int nr_super_blocks = 0;
60 int max_super_blocks = NR_SUPER;
61 LIST_HEAD(super_blocks);
64 * Handling of filesystem drivers list.
65 * Rules:
66 * Inclusion to/removals from/scanning of list are protected by spinlock.
67 * During the unload module must call unregister_filesystem().
68 * We can access the fields of list element if:
69 * 1) spinlock is held or
70 * 2) we hold the reference to the module.
71 * The latter can be guaranteed by call of try_inc_mod_count(); if it
72 * returned 0 we must skip the element, otherwise we got the reference.
73 * Once the reference is obtained we can drop the spinlock.
76 static struct file_system_type *file_systems = NULL;
77 static spinlock_t file_systems_lock = SPIN_LOCK_UNLOCKED;
79 static void put_filesystem(struct file_system_type *fs)
81 if (fs->owner)
82 __MOD_DEC_USE_COUNT(fs->owner);
85 static struct file_system_type **find_filesystem(const char *name)
87 struct file_system_type **p;
88 for (p=&file_systems; *p; p=&(*p)->next)
89 if (strcmp((*p)->name,name) == 0)
90 break;
91 return p;
94 int register_filesystem(struct file_system_type * fs)
96 int res = 0;
97 struct file_system_type ** p;
99 if (!fs)
100 return -EINVAL;
101 if (fs->next)
102 return -EBUSY;
103 spin_lock(&file_systems_lock);
104 p = find_filesystem(fs->name);
105 if (*p)
106 res = -EBUSY;
107 else
108 *p = fs;
109 spin_unlock(&file_systems_lock);
110 return res;
113 int unregister_filesystem(struct file_system_type * fs)
115 struct file_system_type ** tmp;
117 spin_lock(&file_systems_lock);
118 tmp = &file_systems;
119 while (*tmp) {
120 if (fs == *tmp) {
121 *tmp = fs->next;
122 fs->next = NULL;
123 spin_unlock(&file_systems_lock);
124 return 0;
126 tmp = &(*tmp)->next;
128 spin_unlock(&file_systems_lock);
129 return -EINVAL;
132 static int fs_index(const char * __name)
134 struct file_system_type * tmp;
135 char * name;
136 int err, index;
138 name = getname(__name);
139 err = PTR_ERR(name);
140 if (IS_ERR(name))
141 return err;
143 err = -EINVAL;
144 spin_lock(&file_systems_lock);
145 for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next, index++) {
146 if (strcmp(tmp->name,name) == 0) {
147 err = index;
148 break;
150 index++;
152 spin_unlock(&file_systems_lock);
153 putname(name);
154 return err;
157 static int fs_name(unsigned int index, char * buf)
159 struct file_system_type * tmp;
160 int len, res;
162 spin_lock(&file_systems_lock);
163 for (tmp = file_systems; tmp; tmp = tmp->next, index--)
164 if (index <= 0 && try_inc_mod_count(tmp->owner))
165 break;
166 spin_unlock(&file_systems_lock);
167 if (!tmp)
168 return -EINVAL;
170 /* OK, we got the reference, so we can safely block */
171 len = strlen(tmp->name) + 1;
172 res = copy_to_user(buf, tmp->name, len) ? -EFAULT : 0;
173 put_filesystem(tmp);
174 return res;
177 static int fs_maxindex(void)
179 struct file_system_type * tmp;
180 int index;
182 spin_lock(&file_systems_lock);
183 for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next, index++)
185 spin_unlock(&file_systems_lock);
186 return index;
190 * Whee.. Weird sysv syscall.
192 asmlinkage long sys_sysfs(int option, unsigned long arg1, unsigned long arg2)
194 int retval = -EINVAL;
196 lock_kernel();
197 switch (option) {
198 case 1:
199 retval = fs_index((const char *) arg1);
200 break;
202 case 2:
203 retval = fs_name(arg1, (char *) arg2);
204 break;
206 case 3:
207 retval = fs_maxindex();
208 break;
210 unlock_kernel();
211 return retval;
214 int get_filesystem_list(char * buf)
216 int len = 0;
217 struct file_system_type * tmp;
219 spin_lock(&file_systems_lock);
220 tmp = file_systems;
221 while (tmp && len < PAGE_SIZE - 80) {
222 len += sprintf(buf+len, "%s\t%s\n",
223 (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
224 tmp->name);
225 tmp = tmp->next;
227 spin_unlock(&file_systems_lock);
228 return len;
231 static struct file_system_type *get_fs_type(const char *name)
233 struct file_system_type *fs;
235 spin_lock(&file_systems_lock);
236 fs = *(find_filesystem(name));
237 if (fs && !try_inc_mod_count(fs->owner))
238 fs = NULL;
239 spin_unlock(&file_systems_lock);
240 if (!fs && (request_module(name) == 0)) {
241 spin_lock(&file_systems_lock);
242 fs = *(find_filesystem(name));
243 if (fs && !try_inc_mod_count(fs->owner))
244 fs = NULL;
245 spin_unlock(&file_systems_lock);
247 return fs;
251 struct vfsmount *vfsmntlist = NULL;
252 static struct vfsmount *vfsmnttail = NULL, *mru_vfsmnt = NULL;
255 * This part handles the management of the list of mounted filesystems.
257 struct vfsmount *lookup_vfsmnt(kdev_t dev)
259 struct vfsmount *lptr;
261 if (vfsmntlist == NULL)
262 return NULL;
264 if (mru_vfsmnt != NULL && mru_vfsmnt->mnt_dev == dev)
265 return (mru_vfsmnt);
267 for (lptr = vfsmntlist; lptr != NULL; lptr = lptr->mnt_next)
268 if (lptr->mnt_dev == dev) {
269 mru_vfsmnt = lptr;
270 return (lptr);
273 return NULL;
276 static struct vfsmount *add_vfsmnt(struct super_block *sb,
277 const char *dev_name, const char *dir_name)
279 struct vfsmount *lptr;
280 char *tmp, *name;
282 lptr = (struct vfsmount *)kmalloc(sizeof(struct vfsmount), GFP_KERNEL);
283 if (!lptr)
284 goto out;
285 memset(lptr, 0, sizeof(struct vfsmount));
287 lptr->mnt_sb = sb;
288 lptr->mnt_dev = sb->s_dev;
289 lptr->mnt_flags = sb->s_flags;
291 sema_init(&lptr->mnt_dquot.dqio_sem, 1);
292 sema_init(&lptr->mnt_dquot.dqoff_sem, 1);
293 lptr->mnt_dquot.flags = 0;
295 /* N.B. Is it really OK to have a vfsmount without names? */
296 if (dev_name && !IS_ERR(tmp = getname(dev_name))) {
297 name = (char *) kmalloc(strlen(tmp)+1, GFP_KERNEL);
298 if (name) {
299 strcpy(name, tmp);
300 lptr->mnt_devname = name;
302 putname(tmp);
304 if (dir_name && !IS_ERR(tmp = getname(dir_name))) {
305 name = (char *) kmalloc(strlen(tmp)+1, GFP_KERNEL);
306 if (name) {
307 strcpy(name, tmp);
308 lptr->mnt_dirname = name;
310 putname(tmp);
313 if (vfsmntlist == (struct vfsmount *)NULL) {
314 vfsmntlist = vfsmnttail = lptr;
315 } else {
316 vfsmnttail->mnt_next = lptr;
317 vfsmnttail = lptr;
319 out:
320 return lptr;
323 void remove_vfsmnt(kdev_t dev)
325 struct vfsmount *lptr, *tofree;
327 if (vfsmntlist == NULL)
328 return;
329 lptr = vfsmntlist;
330 if (lptr->mnt_dev == dev) {
331 tofree = lptr;
332 vfsmntlist = lptr->mnt_next;
333 if (vfsmnttail->mnt_dev == dev)
334 vfsmnttail = vfsmntlist;
335 } else {
336 while (lptr->mnt_next != NULL) {
337 if (lptr->mnt_next->mnt_dev == dev)
338 break;
339 lptr = lptr->mnt_next;
341 tofree = lptr->mnt_next;
342 if (tofree == NULL)
343 return;
344 lptr->mnt_next = lptr->mnt_next->mnt_next;
345 if (vfsmnttail->mnt_dev == dev)
346 vfsmnttail = lptr;
348 if (tofree == mru_vfsmnt)
349 mru_vfsmnt = NULL;
350 kfree(tofree->mnt_devname);
351 kfree(tofree->mnt_dirname);
352 kfree_s(tofree, sizeof(struct vfsmount));
355 static struct proc_fs_info {
356 int flag;
357 char *str;
358 } fs_info[] = {
359 { MS_NOEXEC, ",noexec" },
360 { MS_NOSUID, ",nosuid" },
361 { MS_NODEV, ",nodev" },
362 { MS_SYNCHRONOUS, ",sync" },
363 { MS_MANDLOCK, ",mand" },
364 { MS_NOATIME, ",noatime" },
365 { MS_NODIRATIME, ",nodiratime" },
366 #ifdef MS_NOSUB /* Can't find this except in mount.c */
367 { MS_NOSUB, ",nosub" },
368 #endif
369 { 0, NULL }
372 static struct proc_nfs_info {
373 int flag;
374 char *str;
375 } nfs_info[] = {
376 { NFS_MOUNT_SOFT, ",soft" },
377 { NFS_MOUNT_INTR, ",intr" },
378 { NFS_MOUNT_POSIX, ",posix" },
379 { NFS_MOUNT_NOCTO, ",nocto" },
380 { NFS_MOUNT_NOAC, ",noac" },
381 { 0, NULL }
384 int get_filesystem_info( char *buf )
386 struct vfsmount *tmp;
387 struct proc_fs_info *fs_infop;
388 struct proc_nfs_info *nfs_infop;
389 struct nfs_server *nfss;
390 int len = 0;
391 char *path,*buffer = (char *) __get_free_page(GFP_KERNEL);
393 if (!buffer) return 0;
394 for (tmp = vfsmntlist; tmp && len < PAGE_SIZE - 160;
395 tmp = tmp->mnt_next) {
396 path = d_path(tmp->mnt_sb->s_root, buffer, PAGE_SIZE);
397 if (!path)
398 continue;
399 len += sprintf( buf + len, "%s %s %s %s",
400 tmp->mnt_devname, path,
401 tmp->mnt_sb->s_type->name,
402 tmp->mnt_flags & MS_RDONLY ? "ro" : "rw" );
403 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
404 if (tmp->mnt_flags & fs_infop->flag) {
405 strcpy(buf + len, fs_infop->str);
406 len += strlen(fs_infop->str);
409 if (!strcmp("nfs", tmp->mnt_sb->s_type->name)) {
410 nfss = &tmp->mnt_sb->u.nfs_sb.s_server;
411 if (nfss->rsize != NFS_DEF_FILE_IO_BUFFER_SIZE) {
412 len += sprintf(buf+len, ",rsize=%d",
413 nfss->rsize);
415 if (nfss->wsize != NFS_DEF_FILE_IO_BUFFER_SIZE) {
416 len += sprintf(buf+len, ",wsize=%d",
417 nfss->wsize);
419 #if 0
420 if (nfss->timeo != 7*HZ/10) {
421 len += sprintf(buf+len, ",timeo=%d",
422 nfss->timeo*10/HZ);
424 if (nfss->retrans != 3) {
425 len += sprintf(buf+len, ",retrans=%d",
426 nfss->retrans);
428 #endif
429 if (nfss->acregmin != 3*HZ) {
430 len += sprintf(buf+len, ",acregmin=%d",
431 nfss->acregmin/HZ);
433 if (nfss->acregmax != 60*HZ) {
434 len += sprintf(buf+len, ",acregmax=%d",
435 nfss->acregmax/HZ);
437 if (nfss->acdirmin != 30*HZ) {
438 len += sprintf(buf+len, ",acdirmin=%d",
439 nfss->acdirmin/HZ);
441 if (nfss->acdirmax != 60*HZ) {
442 len += sprintf(buf+len, ",acdirmax=%d",
443 nfss->acdirmax/HZ);
445 for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
446 if (nfss->flags & nfs_infop->flag) {
447 strcpy(buf + len, nfs_infop->str);
448 len += strlen(nfs_infop->str);
451 len += sprintf(buf+len, ",addr=%s",
452 nfss->hostname);
454 len += sprintf( buf + len, " 0 0\n" );
457 free_page((unsigned long) buffer);
458 return len;
461 void __wait_on_super(struct super_block * sb)
463 DECLARE_WAITQUEUE(wait, current);
465 add_wait_queue(&sb->s_wait, &wait);
466 repeat:
467 set_current_state(TASK_UNINTERRUPTIBLE);
468 if (sb->s_lock) {
469 schedule();
470 goto repeat;
472 remove_wait_queue(&sb->s_wait, &wait);
473 current->state = TASK_RUNNING;
477 * Note: check the dirty flag before waiting, so we don't
478 * hold up the sync while mounting a device. (The newly
479 * mounted device won't need syncing.)
481 void sync_supers(kdev_t dev)
483 struct super_block * sb;
485 for (sb = sb_entry(super_blocks.next);
486 sb != sb_entry(&super_blocks);
487 sb = sb_entry(sb->s_list.next)) {
488 if (!sb->s_dev)
489 continue;
490 if (dev && sb->s_dev != dev)
491 continue;
492 if (!sb->s_dirt)
493 continue;
494 /* N.B. Should lock the superblock while writing */
495 wait_on_super(sb);
496 if (!sb->s_dev || !sb->s_dirt)
497 continue;
498 if (dev && (dev != sb->s_dev))
499 continue;
500 if (sb->s_op && sb->s_op->write_super)
501 sb->s_op->write_super(sb);
505 struct super_block * get_super(kdev_t dev)
507 struct super_block * s;
509 if (!dev)
510 return NULL;
511 restart:
512 s = sb_entry(super_blocks.next);
513 while (s != sb_entry(&super_blocks))
514 if (s->s_dev == dev) {
515 wait_on_super(s);
516 if (s->s_dev == dev)
517 return s;
518 goto restart;
519 } else
520 s = sb_entry(s->s_list.next);
521 return NULL;
524 asmlinkage long sys_ustat(dev_t dev, struct ustat * ubuf)
526 struct super_block *s;
527 struct ustat tmp;
528 struct statfs sbuf;
529 int err = -EINVAL;
531 lock_kernel();
532 s = get_super(to_kdev_t(dev));
533 if (s == NULL)
534 goto out;
535 err = vfs_statfs(s, &sbuf);
536 if (err)
537 goto out;
539 memset(&tmp,0,sizeof(struct ustat));
540 tmp.f_tfree = sbuf.f_bfree;
541 tmp.f_tinode = sbuf.f_ffree;
543 err = copy_to_user(ubuf,&tmp,sizeof(struct ustat)) ? -EFAULT : 0;
544 out:
545 unlock_kernel();
546 return err;
550 * Find a super_block with no device assigned.
552 struct super_block *get_empty_super(void)
554 struct super_block *s;
556 for (s = sb_entry(super_blocks.next);
557 s != sb_entry(&super_blocks);
558 s = sb_entry(s->s_list.next)) {
559 if (s->s_dev)
560 continue;
561 if (!s->s_lock)
562 return s;
563 printk("VFS: empty superblock %p locked!\n", s);
565 /* Need a new one... */
566 if (nr_super_blocks >= max_super_blocks)
567 return NULL;
568 s = kmalloc(sizeof(struct super_block), GFP_USER);
569 if (s) {
570 nr_super_blocks++;
571 memset(s, 0, sizeof(struct super_block));
572 INIT_LIST_HEAD(&s->s_dirty);
573 list_add (&s->s_list, super_blocks.prev);
574 init_waitqueue_head(&s->s_wait);
575 INIT_LIST_HEAD(&s->s_files);
577 return s;
580 static struct super_block * read_super(kdev_t dev, struct block_device *bdev,
581 struct file_system_type *type, int flags,
582 void *data, int silent)
584 struct super_block * s;
585 s = get_empty_super();
586 if (!s)
587 goto out;
588 s->s_dev = dev;
589 s->s_bdev = bdev;
590 s->s_flags = flags;
591 s->s_dirt = 0;
592 sema_init(&s->s_vfs_rename_sem,1);
593 sema_init(&s->s_nfsd_free_path_sem,1);
594 s->s_type = type;
595 lock_super(s);
596 if (!type->read_super(s, data, silent))
597 goto out_fail;
598 unlock_super(s);
599 /* tell bdcache that we are going to keep this one */
600 if (bdev)
601 atomic_inc(&bdev->bd_count);
602 out:
603 return s;
605 out_fail:
606 s->s_dev = 0;
607 s->s_bdev = 0;
608 s->s_type = NULL;
609 put_filesystem(type);
610 unlock_super(s);
611 return NULL;
615 * Unnamed block devices are dummy devices used by virtual
616 * filesystems which don't use real block-devices. -- jrs
619 static unsigned int unnamed_dev_in_use[256/(8*sizeof(unsigned int))] = { 0, };
621 kdev_t get_unnamed_dev(void)
623 int i;
625 for (i = 1; i < 256; i++) {
626 if (!test_and_set_bit(i,unnamed_dev_in_use))
627 return MKDEV(UNNAMED_MAJOR, i);
629 return 0;
632 void put_unnamed_dev(kdev_t dev)
634 if (!dev || MAJOR(dev) != UNNAMED_MAJOR)
635 return;
636 if (test_and_clear_bit(MINOR(dev), unnamed_dev_in_use))
637 return;
638 printk("VFS: put_unnamed_dev: freeing unused device %s\n",
639 kdevname(dev));
642 static int d_umount(struct super_block * sb)
644 struct dentry * root = sb->s_root;
645 struct dentry * covered = root->d_covers;
647 if (root->d_count != 1)
648 return -EBUSY;
650 if (root->d_inode->i_state)
651 return -EBUSY;
653 sb->s_root = NULL;
655 if (covered != root) {
656 root->d_covers = root;
657 covered->d_mounts = covered;
658 dput(covered);
660 dput(root);
661 return 0;
664 static void d_mount(struct dentry *covered, struct dentry *dentry)
666 if (covered->d_mounts != covered) {
667 printk("VFS: mount - already mounted\n");
668 return;
670 covered->d_mounts = dentry;
671 dentry->d_covers = covered;
674 static struct block_device *do_umount(kdev_t dev, int unmount_root, int flags)
676 struct super_block * sb;
677 struct block_device *bdev;
678 int retval;
680 retval = -ENOENT;
681 sb = get_super(dev);
682 if (!sb || !sb->s_root)
683 goto out;
686 * Before checking whether the filesystem is still busy,
687 * make sure the kernel doesn't hold any quota files open
688 * on the device. If the umount fails, too bad -- there
689 * are no quotas running any more. Just turn them on again.
691 DQUOT_OFF(dev);
692 acct_auto_close(dev);
695 * If we may have to abort operations to get out of this
696 * mount, and they will themselves hold resources we must
697 * allow the fs to do things. In the Unix tradition of
698 * 'Gee thats tricky lets do it in userspace' the umount_begin
699 * might fail to complete on the first run through as other tasks
700 * must return, and the like. Thats for the mount program to worry
701 * about for the moment.
704 if( (flags&MNT_FORCE) && sb->s_op->umount_begin)
705 sb->s_op->umount_begin(sb);
708 * Shrink dcache, then fsync. This guarantees that if the
709 * filesystem is quiescent at this point, then (a) only the
710 * root entry should be in use and (b) that root entry is
711 * clean.
713 shrink_dcache_sb(sb);
714 fsync_dev(dev);
716 if (sb == current->fs->root->d_sb && !unmount_root) {
718 * Special case for "unmounting" root ...
719 * we just try to remount it readonly.
721 retval = 0;
722 if (!(sb->s_flags & MS_RDONLY))
723 retval = do_remount_sb(sb, MS_RDONLY, 0);
724 return ERR_PTR(retval);
727 retval = d_umount(sb);
728 if (retval)
729 goto out;
731 if (sb->s_op) {
732 if (sb->s_op->write_super && sb->s_dirt)
733 sb->s_op->write_super(sb);
736 lock_super(sb);
737 if (sb->s_op) {
738 if (sb->s_op->put_super)
739 sb->s_op->put_super(sb);
742 /* Forget any remaining inodes */
743 if (invalidate_inodes(sb)) {
744 printk("VFS: Busy inodes after unmount. "
745 "Self-destruct in 5 seconds. Have a nice day...\n");
748 sb->s_dev = 0; /* Free the superblock */
749 bdev = sb->s_bdev;
750 sb->s_bdev = NULL;
751 put_filesystem(sb->s_type);
752 sb->s_type = NULL;
753 unlock_super(sb);
755 remove_vfsmnt(dev);
757 return bdev;
759 out:
760 return ERR_PTR(retval);
763 static int umount_dev(kdev_t dev, int flags)
765 int retval;
766 struct block_device *bdev;
768 retval = -ENXIO;
769 if (MAJOR(dev) >= MAX_BLKDEV)
770 goto out;
772 fsync_dev(dev);
774 down(&mount_sem);
776 bdev = do_umount(dev, 0, flags);
777 if (IS_ERR(bdev))
778 retval = PTR_ERR(bdev);
779 else {
780 retval = 0;
781 if (bdev) {
782 blkdev_put(bdev, BDEV_FS);
783 bdput(bdev);
784 } else {
785 put_unnamed_dev(dev);
788 up(&mount_sem);
789 out:
790 return retval;
794 * Now umount can handle mount points as well as block devices.
795 * This is important for filesystems which use unnamed block devices.
797 * We now support a flag for forced unmount like the other 'big iron'
798 * unixes. Our API is identical to OSF/1 to avoid making a mess of AMD
801 asmlinkage long sys_umount(char * name, int flags)
803 struct dentry * dentry;
804 int retval;
806 if (!capable(CAP_SYS_ADMIN))
807 return -EPERM;
809 lock_kernel();
810 dentry = namei(name);
811 retval = PTR_ERR(dentry);
812 if (!IS_ERR(dentry)) {
813 struct inode * inode = dentry->d_inode;
814 kdev_t dev = inode->i_rdev;
816 retval = 0;
817 if (S_ISBLK(inode->i_mode)) {
818 if (IS_NODEV(inode))
819 retval = -EACCES;
820 } else {
821 struct super_block *sb = inode->i_sb;
822 retval = -EINVAL;
823 if (sb && inode == sb->s_root->d_inode) {
824 dev = sb->s_dev;
825 retval = 0;
828 dput(dentry);
830 if (!retval)
831 retval = umount_dev(dev, flags);
833 unlock_kernel();
834 return retval;
838 * The 2.0 compatible umount. No flags.
841 asmlinkage long sys_oldumount(char * name)
843 return sys_umount(name,0);
847 * Check whether we can mount the specified device.
849 int fs_may_mount(kdev_t dev)
851 struct super_block * sb = get_super(dev);
852 int busy;
854 busy = sb && sb->s_root &&
855 (sb->s_root->d_count != 1 || sb->s_root->d_covers != sb->s_root);
856 return !busy;
860 * do_mount() does the actual mounting after sys_mount has done the ugly
861 * parameter parsing. When enough time has gone by, and everything uses the
862 * new mount() parameters, sys_mount() can then be cleaned up.
864 * We cannot mount a filesystem if it has active, used, or dirty inodes.
865 * We also have to flush all inode-data for this device, as the new mount
866 * might need new info.
868 * [21-Mar-97] T.Schoebel-Theuer: Now this can be overridden when
869 * supplying a leading "!" before the dir_name, allowing "stacks" of
870 * mounted filesystems. The stacking will only influence any pathname lookups
871 * _after_ the mount, but open file descriptors or working directories that
872 * are now covered remain valid. For example, when you overmount /home, any
873 * process with old cwd /home/joe will continue to use the old versions,
874 * as long as relative paths are used, but absolute paths like /home/joe/xxx
875 * will go to the new "top of stack" version. In general, crossing a
876 * mount point will always go to the top of stack element.
877 * Anyone using this new feature must know what he/she is doing.
880 int do_mount(struct block_device *bdev, const char *dev_name,
881 const char *dir_name, const char * type, int flags, void * data)
883 kdev_t dev;
884 struct dentry * dir_d;
885 struct super_block * sb;
886 struct vfsmount *vfsmnt;
887 struct file_system_type *fs_type;
888 int error;
890 if (bdev) {
891 mode_t mode = FMODE_READ; /* we always need it ;-) */
892 if (!(flags & MS_RDONLY))
893 mode |= FMODE_WRITE;
894 dev = to_kdev_t(bdev->bd_dev);
895 error = blkdev_get(bdev, mode, 0, BDEV_FS);
896 if (error)
897 return error;
898 } else {
899 dev = get_unnamed_dev();
900 if (!dev)
901 return -EMFILE; /* huh? */
904 error = -EACCES;
905 if (!(flags & MS_RDONLY) && dev && is_read_only(dev))
906 goto out;
909 * Do the lookup first to force automounting.
911 dir_d = namei(dir_name);
912 error = PTR_ERR(dir_d);
913 if (IS_ERR(dir_d))
914 goto out;
916 down(&mount_sem);
917 error = -ENOTDIR;
918 if (!S_ISDIR(dir_d->d_inode->i_mode))
919 goto dput_and_out;
921 error = -EBUSY;
922 if (dir_d->d_covers != dir_d)
923 goto dput_and_out;
925 error = -EINVAL;
926 if (!dev)
927 goto dput_and_out;
928 check_disk_change(dev);
929 sb = get_super(dev);
930 if (sb) {
931 /* Already mounted */
932 error = -EBUSY;
933 goto dput_and_out;
936 fs_type = get_fs_type(type);
937 if (!fs_type) {
938 printk("VFS: on device %s: get_fs_type(%s) failed\n",
939 kdevname(dev), type);
940 goto dput_and_out;
943 sb = read_super(dev, bdev, fs_type, flags, data, 0);
944 if (!sb)
945 goto fsput_and_out;
948 * We may have slept while reading the super block,
949 * so we check afterwards whether it's safe to mount.
951 error = -EBUSY;
952 if (!fs_may_mount(dev))
953 goto bdput_and_out;
955 error = -ENOMEM;
956 vfsmnt = add_vfsmnt(sb, dev_name, dir_name);
957 if (vfsmnt) {
958 d_mount(dget(dir_d), sb->s_root);
959 dput(dir_d);
960 up(&mount_sem);
961 return 0;
964 bdput_and_out:
965 /* FIXME: ->put_super() is needed here */
966 sb->s_bdev = NULL;
967 sb->s_dev = 0;
968 sb->s_type = NULL;
969 if (bdev)
970 bdput(bdev);
971 fsput_and_out:
972 put_filesystem(fs_type);
973 dput_and_out:
974 dput(dir_d);
975 up(&mount_sem);
976 out:
977 if (bdev)
978 blkdev_put(bdev, BDEV_FS);
979 else
980 put_unnamed_dev(dev);
981 return error;
986 * Alters the mount flags of a mounted file system. Only the mount point
987 * is used as a reference - file system type and the device are ignored.
990 static int do_remount_sb(struct super_block *sb, int flags, char *data)
992 int retval;
993 struct vfsmount *vfsmnt;
995 if (!(flags & MS_RDONLY) && sb->s_dev && is_read_only(sb->s_dev))
996 return -EACCES;
997 /*flags |= MS_RDONLY;*/
998 /* If we are remounting RDONLY, make sure there are no rw files open */
999 if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY))
1000 if (!fs_may_remount_ro(sb))
1001 return -EBUSY;
1002 if (sb->s_op && sb->s_op->remount_fs) {
1003 lock_super(sb);
1004 retval = sb->s_op->remount_fs(sb, &flags, data);
1005 unlock_super(sb);
1006 if (retval)
1007 return retval;
1009 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
1010 vfsmnt = lookup_vfsmnt(sb->s_dev);
1011 if (vfsmnt)
1012 vfsmnt->mnt_flags = sb->s_flags;
1015 * Invalidate the inodes, as some mount options may be changed.
1016 * N.B. If we are changing media, we should check the return
1017 * from invalidate_inodes ... can't allow _any_ open files.
1019 invalidate_inodes(sb);
1021 return 0;
1024 static int do_remount(const char *dir,int flags,char *data)
1026 struct dentry *dentry;
1027 int retval;
1029 dentry = namei(dir);
1030 retval = PTR_ERR(dentry);
1031 if (!IS_ERR(dentry)) {
1032 struct super_block * sb = dentry->d_inode->i_sb;
1034 retval = -ENODEV;
1035 if (sb) {
1036 retval = -EINVAL;
1037 if (dentry == sb->s_root) {
1039 * Shrink the dcache and sync the device.
1041 shrink_dcache_sb(sb);
1042 fsync_dev(sb->s_dev);
1043 if (flags & MS_RDONLY)
1044 acct_auto_close(sb->s_dev);
1045 retval = do_remount_sb(sb, flags, data);
1048 dput(dentry);
1050 return retval;
1053 static int copy_mount_options (const void * data, unsigned long *where)
1055 int i;
1056 unsigned long page;
1057 struct vm_area_struct * vma;
1059 *where = 0;
1060 if (!data)
1061 return 0;
1063 vma = find_vma(current->mm, (unsigned long) data);
1064 if (!vma || (unsigned long) data < vma->vm_start)
1065 return -EFAULT;
1066 if (!(vma->vm_flags & VM_READ))
1067 return -EFAULT;
1068 i = vma->vm_end - (unsigned long) data;
1069 if (PAGE_SIZE <= (unsigned long) i)
1070 i = PAGE_SIZE-1;
1071 if (!(page = __get_free_page(GFP_KERNEL))) {
1072 return -ENOMEM;
1074 if (copy_from_user((void *) page,data,i)) {
1075 free_page(page);
1076 return -EFAULT;
1078 *where = page;
1079 return 0;
1083 * Flags is a 16-bit value that allows up to 16 non-fs dependent flags to
1084 * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
1086 * data is a (void *) that can point to any structure up to
1087 * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
1088 * information (or be NULL).
1090 * NOTE! As old versions of mount() didn't use this setup, the flags
1091 * have to have a special 16-bit magic number in the high word:
1092 * 0xC0ED. If this magic word isn't present, the flags and data info
1093 * aren't used, as the syscall assumes we are talking to an older
1094 * version that didn't understand them.
1096 long do_sys_mount(char * dev_name, char * dir_name, unsigned long type_page,
1097 unsigned long new_flags, unsigned long data_page)
1099 struct file_system_type * fstype;
1100 struct dentry * dentry = NULL;
1101 struct inode * inode = NULL;
1102 struct block_device *bdev = NULL;
1103 int retval;
1104 unsigned long flags = 0;
1106 if (!capable(CAP_SYS_ADMIN))
1107 return -EPERM;
1109 if ((new_flags &
1110 (MS_MGC_MSK | MS_REMOUNT)) == (MS_MGC_VAL | MS_REMOUNT)) {
1111 retval = do_remount(dir_name,
1112 new_flags & ~MS_MGC_MSK & ~MS_REMOUNT,
1113 (char *) data_page);
1114 goto out;
1117 fstype = get_fs_type((char *) type_page);
1118 retval = -ENODEV;
1119 if (!fstype)
1120 goto out;
1122 if (fstype->fs_flags & FS_REQUIRES_DEV) {
1123 struct block_device_operations *bdops;
1125 dentry = namei(dev_name);
1126 retval = PTR_ERR(dentry);
1127 if (IS_ERR(dentry))
1128 goto fs_out;
1130 inode = dentry->d_inode;
1131 retval = -ENOTBLK;
1132 if (!S_ISBLK(inode->i_mode))
1133 goto dput_and_out;
1135 retval = -EACCES;
1136 if (IS_NODEV(inode))
1137 goto dput_and_out;
1139 bdev = inode->i_bdev;
1140 bdops = devfs_get_ops ( devfs_get_handle_from_inode (inode) );
1141 if (bdops) bdev->bd_op = bdops;
1144 if ((new_flags & MS_MGC_MSK) == MS_MGC_VAL)
1145 flags = new_flags & ~MS_MGC_MSK;
1147 retval = do_mount(bdev, dev_name, dir_name, fstype->name, flags,
1148 (void *) data_page);
1150 dput_and_out:
1151 dput(dentry);
1152 fs_out:
1153 put_filesystem(fstype);
1154 out:
1155 return retval;
1158 asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type,
1159 unsigned long new_flags, void * data)
1161 int retval;
1162 unsigned long data_page = 0;
1163 unsigned long type_page = 0;
1165 lock_kernel();
1166 retval = copy_mount_options (type, &type_page);
1167 if (retval < 0)
1168 goto out;
1170 /* copy_mount_options allows a NULL user pointer,
1171 * and just returns zero in that case. But if we
1172 * allow the type to be NULL we will crash.
1173 * Previously we did not check this case.
1175 if (type_page == 0) {
1176 retval = -EINVAL;
1177 goto out;
1180 retval = copy_mount_options (data, &data_page);
1181 if (retval >= 0) {
1182 retval = do_sys_mount(dev_name, dir_name, type_page,
1183 new_flags, data_page);
1184 free_page(data_page);
1186 free_page(type_page);
1187 out:
1188 unlock_kernel();
1189 return retval;
1192 void __init mount_root(void)
1194 struct file_system_type * fs_type;
1195 struct super_block * sb;
1196 struct vfsmount *vfsmnt;
1197 struct block_device *bdev = NULL;
1198 mode_t mode;
1199 int retval;
1200 void *handle;
1201 char path[64];
1202 int path_start = -1;
1204 #ifdef CONFIG_ROOT_NFS
1205 if (MAJOR(ROOT_DEV) == UNNAMED_MAJOR) {
1206 ROOT_DEV = 0;
1207 if ((fs_type = get_fs_type("nfs"))) {
1208 sb = get_empty_super(); /* "can't fail" */
1209 sb->s_dev = get_unnamed_dev();
1210 sb->s_bdev = NULL;
1211 sb->s_flags = root_mountflags;
1212 sema_init(&sb->s_vfs_rename_sem,1);
1213 sema_init(&sb->s_nfsd_free_path_sem,1);
1214 vfsmnt = add_vfsmnt(sb, "/dev/root", "/");
1215 if (vfsmnt) {
1216 if (nfs_root_mount(sb) >= 0) {
1217 sb->s_dirt = 0;
1218 sb->s_type = fs_type;
1219 current->fs->root = dget(sb->s_root);
1220 current->fs->pwd = dget(sb->s_root);
1221 ROOT_DEV = sb->s_dev;
1222 printk (KERN_NOTICE "VFS: Mounted root (NFS filesystem)%s.\n", (sb->s_flags & MS_RDONLY) ? " readonly" : "");
1223 return;
1225 remove_vfsmnt(sb->s_dev);
1227 put_unnamed_dev(sb->s_dev);
1228 sb->s_dev = 0;
1229 put_filesystem(fs_type);
1231 if (!ROOT_DEV) {
1232 printk(KERN_ERR "VFS: Unable to mount root fs via NFS, trying floppy.\n");
1233 ROOT_DEV = MKDEV(FLOPPY_MAJOR, 0);
1236 #endif
1238 #ifdef CONFIG_BLK_DEV_FD
1239 if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) {
1240 #ifdef CONFIG_BLK_DEV_RAM
1241 extern int rd_doload;
1242 extern void rd_load_secondary(void);
1243 #endif
1244 floppy_eject();
1245 #ifndef CONFIG_BLK_DEV_RAM
1246 printk(KERN_NOTICE "(Warning, this kernel has no ramdisk support)\n");
1247 #else
1248 /* rd_doload is 2 for a dual initrd/ramload setup */
1249 if(rd_doload==2)
1250 rd_load_secondary();
1251 else
1252 #endif
1254 printk(KERN_NOTICE "VFS: Insert root floppy and press ENTER\n");
1255 wait_for_keypress();
1258 #endif
1260 devfs_make_root (root_device_name);
1261 handle = devfs_find_handle (NULL, ROOT_DEVICE_NAME, 0,
1262 MAJOR (ROOT_DEV), MINOR (ROOT_DEV),
1263 DEVFS_SPECIAL_BLK, 1);
1264 if (handle) /* Sigh: bd*() functions only paper over the cracks */
1266 unsigned major, minor;
1268 devfs_get_maj_min (handle, &major, &minor);
1269 ROOT_DEV = MKDEV (major, minor);
1273 * Probably pure paranoia, but I'm less than happy about delving into
1274 * devfs crap and checking it right now. Later.
1276 if (!ROOT_DEV)
1277 panic("I have no root and I want to sream");
1279 bdev = bdget(kdev_t_to_nr(ROOT_DEV));
1280 if (!bdev)
1281 panic(__FUNCTION__ ": unable to allocate root device");
1282 bdev->bd_op = devfs_get_ops (handle);
1283 path_start = devfs_generate_path (handle, path + 5, sizeof (path) - 5);
1284 mode = FMODE_READ;
1285 if (!(root_mountflags & MS_RDONLY))
1286 mode |= FMODE_WRITE;
1287 retval = blkdev_get(bdev, mode, 0, BDEV_FS);
1288 if (retval == -EROFS) {
1289 root_mountflags |= MS_RDONLY;
1290 retval = blkdev_get(bdev, FMODE_READ, 0, BDEV_FS);
1292 if (retval) {
1294 * Allow the user to distinguish between failed open
1295 * and bad superblock on root device.
1297 printk ("VFS: Cannot open root device \"%s\" or %s\n",
1298 root_device_name, kdevname (ROOT_DEV));
1299 printk ("Please append a correct \"root=\" boot option\n");
1300 panic("VFS: Unable to mount root fs on %s",
1301 kdevname(ROOT_DEV));
1304 check_disk_change(ROOT_DEV);
1306 spin_lock(&file_systems_lock);
1307 for (fs_type = file_systems ; fs_type ; fs_type = fs_type->next) {
1308 if (!(fs_type->fs_flags & FS_REQUIRES_DEV))
1309 continue;
1310 if (!try_inc_mod_count(fs_type->owner))
1311 continue;
1312 spin_unlock(&file_systems_lock);
1313 sb = get_super(ROOT_DEV);
1314 if (sb) {
1315 /* Shouldn't we fail here? Oh, well... */
1316 sb->s_bdev = bdev;
1317 goto mount_it;
1319 sb = read_super(ROOT_DEV,bdev,fs_type,root_mountflags,NULL,1);
1320 if (sb)
1321 goto mount_it;
1322 spin_lock(&file_systems_lock);
1323 put_filesystem(fs_type);
1325 spin_unlock(&file_systems_lock);
1326 panic("VFS: Unable to mount root fs on %s",
1327 kdevname(ROOT_DEV));
1329 mount_it:
1330 sb->s_flags = root_mountflags;
1331 current->fs->root = dget(sb->s_root);
1332 current->fs->pwd = dget(sb->s_root);
1333 printk ("VFS: Mounted root (%s filesystem)%s.\n",
1334 fs_type->name,
1335 (sb->s_flags & MS_RDONLY) ? " readonly" : "");
1336 if (path_start >= 0) {
1337 devfs_mk_symlink (NULL,
1338 "root", 0, DEVFS_FL_DEFAULT,
1339 path + 5 + path_start, 0,
1340 NULL, NULL);
1341 memcpy (path + path_start, "/dev/", 5);
1342 vfsmnt = add_vfsmnt (sb, path + path_start,
1343 "/");
1345 else vfsmnt = add_vfsmnt (sb, "/dev/root", "/");
1346 if (vfsmnt) {
1347 bdput(bdev); /* sb holds a reference */
1348 return;
1350 panic("VFS: add_vfsmnt failed for root fs");
1354 static void chroot_fs_refs(struct dentry *old_root,
1355 struct dentry *new_root)
1357 struct task_struct *p;
1359 read_lock(&tasklist_lock);
1360 for_each_task(p) {
1361 if (!p->fs) continue;
1362 if (p->fs->root == old_root) {
1363 dput(old_root);
1364 p->fs->root = dget(new_root);
1365 printk(KERN_DEBUG "chroot_fs_refs: changed root of "
1366 "process %d\n",p->pid);
1368 if (p->fs->pwd == old_root) {
1369 dput(old_root);
1370 p->fs->pwd = dget(new_root);
1371 printk(KERN_DEBUG "chroot_fs_refs: changed cwd of "
1372 "process %d\n",p->pid);
1375 read_unlock(&tasklist_lock);
1380 * Moves the current root to put_root, and sets root/cwd of all processes
1381 * which had them on the old root to new_root.
1383 * Note:
1384 * - we don't move root/cwd if they are not at the root (reason: if something
1385 * cared enough to change them, it's probably wrong to force them elsewhere)
1386 * - it's okay to pick a root that isn't the root of a file system, e.g.
1387 * /nfs/my_root where /nfs is the mount point. Better avoid creating
1388 * unreachable mount points this way, though.
1391 asmlinkage long sys_pivot_root(const char *new_root, const char *put_old)
1393 struct dentry *root = current->fs->root;
1394 struct dentry *d_new_root, *d_put_old, *covered;
1395 struct dentry *root_dev_root, *new_root_dev_root;
1396 struct dentry *walk, *next;
1397 int error;
1399 if (!capable(CAP_SYS_ADMIN))
1400 return -EPERM;
1402 lock_kernel();
1403 d_new_root = namei(new_root);
1404 if (IS_ERR(d_new_root)) {
1405 error = PTR_ERR(d_new_root);
1406 goto out0;
1408 d_put_old = namei(put_old);
1409 if (IS_ERR(d_put_old)) {
1410 error = PTR_ERR(d_put_old);
1411 goto out1;
1413 down(&mount_sem);
1414 if (!d_new_root->d_inode || !d_put_old->d_inode) {
1415 error = -ENOENT;
1416 goto out2;
1418 if (!S_ISDIR(d_new_root->d_inode->i_mode) ||
1419 !S_ISDIR(d_put_old->d_inode->i_mode)) {
1420 error = -ENOTDIR;
1421 goto out2;
1423 error = -EBUSY;
1424 if (d_new_root->d_sb == root->d_sb || d_put_old->d_sb == root->d_sb)
1425 goto out2; /* loop */
1426 if (d_put_old != d_put_old->d_covers)
1427 goto out2; /* mount point is busy */
1428 error = -EINVAL;
1429 walk = d_put_old; /* make sure we can reach put_old from new_root */
1430 for (;;) {
1431 next = walk->d_covers->d_parent;
1432 if (next == walk)
1433 goto out2;
1434 if (next == d_new_root)
1435 break;
1436 walk = next;
1439 new_root_dev_root = d_new_root->d_sb->s_root;
1440 covered = new_root_dev_root->d_covers;
1441 new_root_dev_root->d_covers = new_root_dev_root;
1442 dput(covered);
1443 covered->d_mounts = covered;
1445 root_dev_root = root->d_sb->s_root;
1446 root_dev_root->d_covers = dget(d_put_old);
1447 d_put_old->d_mounts = root_dev_root;
1448 chroot_fs_refs(root,d_new_root);
1449 error = 0;
1450 out2:
1451 up(&mount_sem);
1452 dput(d_put_old);
1453 out1:
1454 dput(d_new_root);
1455 out0:
1456 unlock_kernel();
1457 return error;
1461 #ifdef CONFIG_BLK_DEV_INITRD
1463 int __init change_root(kdev_t new_root_dev,const char *put_old)
1465 kdev_t old_root_dev;
1466 struct vfsmount *vfsmnt;
1467 struct dentry *old_root,*old_pwd,*dir_d = NULL;
1468 int error;
1470 old_root = current->fs->root;
1471 old_pwd = current->fs->pwd;
1472 old_root_dev = ROOT_DEV;
1473 if (!fs_may_mount(new_root_dev)) {
1474 printk(KERN_CRIT "New root is busy. Staying in initrd.\n");
1475 return -EBUSY;
1477 /* First unmount devfs if mounted */
1478 dir_d = lookup_dentry ("/dev", NULL, 1);
1479 if (!IS_ERR(dir_d)) {
1480 struct super_block *sb = dir_d->d_inode->i_sb;
1482 if (sb && (dir_d->d_inode == sb->s_root->d_inode) &&
1483 (sb->s_magic == DEVFS_SUPER_MAGIC)) {
1484 dput (dir_d);
1485 do_umount (sb->s_dev, 0, 0);
1487 else dput (dir_d);
1489 ROOT_DEV = new_root_dev;
1490 mount_root();
1491 dput(old_root);
1492 dput(old_pwd);
1493 #if 1
1494 shrink_dcache();
1495 printk("change_root: old root has d_count=%d\n", old_root->d_count);
1496 #endif
1497 mount_devfs_fs ();
1499 * Get the new mount directory
1501 dir_d = lookup_dentry(put_old, NULL, 1);
1502 if (IS_ERR(dir_d)) {
1503 error = PTR_ERR(dir_d);
1504 } else if (!dir_d->d_inode) {
1505 dput(dir_d);
1506 error = -ENOENT;
1507 } else {
1508 error = 0;
1510 if (!error && dir_d->d_covers != dir_d) {
1511 dput(dir_d);
1512 error = -EBUSY;
1514 if (!error && !S_ISDIR(dir_d->d_inode->i_mode)) {
1515 dput(dir_d);
1516 error = -ENOTDIR;
1518 if (error) {
1519 struct block_device *bdev;
1521 printk(KERN_NOTICE "Trying to unmount old root ... ");
1522 bdev = do_umount(old_root_dev,1, 0);
1523 if (!IS_ERR(bdev)) {
1524 printk("okay\n");
1525 /* special: the old device driver is going to be
1526 a ramdisk and the point of this call is to free its
1527 protected memory (even if dirty). */
1528 destroy_buffers(old_root_dev);
1529 if (bdev) {
1530 blkdev_put(bdev, BDEV_FS);
1531 bdput(bdev);
1533 return 0;
1535 printk(KERN_ERR "error %ld\n",PTR_ERR(bdev));
1536 return error;
1538 remove_vfsmnt(old_root_dev);
1539 vfsmnt = add_vfsmnt(old_root->d_sb, "/dev/root.old", put_old);
1540 if (vfsmnt) {
1541 d_mount(dir_d,old_root);
1542 return 0;
1544 printk(KERN_CRIT "Trouble: add_vfsmnt failed\n");
1545 return -ENOMEM;
1548 #endif