Import 2.3.28
[davej-history.git] / fs / super.c
blob7b1b15e3c63b02476141b62749fa2998ce01f8f0
1 /*
2 * linux/fs/super.c
4 * Copyright (C) 1991, 1992 Linus Torvalds
6 * super.c contains code to handle: - mount structures
7 * - super-block tables.
8 * - mount system call
9 * - umount system call
11 * Added options to /proc/mounts
12 * Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996.
14 * GK 2/5/95 - Changed to support mounting the root fs via NFS
16 * Added kerneld support: Jacques Gelinas and Bjorn Ekwall
17 * Added change_root: Werner Almesberger & Hans Lermen, Feb '96
20 #include <linux/config.h>
21 #include <linux/malloc.h>
22 #include <linux/locks.h>
23 #include <linux/smp_lock.h>
24 #include <linux/fd.h>
25 #include <linux/init.h>
26 #include <linux/quotaops.h>
27 #include <linux/acct.h>
29 #include <asm/uaccess.h>
31 #include <linux/nfs_fs.h>
32 #include <linux/nfs_fs_sb.h>
33 #include <linux/nfs_mount.h>
35 #ifdef CONFIG_KMOD
36 #include <linux/kmod.h>
37 #endif
40 * We use a semaphore to synchronize all mount/umount
41 * activity - imagine the mess if we have a race between
42 * unmounting a filesystem and re-mounting it (or something
43 * else).
45 static DECLARE_MUTEX(mount_sem);
47 extern void wait_for_keypress(void);
48 extern struct file_operations * get_blkfops(unsigned int major);
50 extern int root_mountflags;
52 static int do_remount_sb(struct super_block *sb, int flags, char * data);
54 /* this is initialized in init/main.c */
55 kdev_t ROOT_DEV;
57 int nr_super_blocks = 0;
58 int max_super_blocks = NR_SUPER;
59 LIST_HEAD(super_blocks);
61 static struct file_system_type *file_systems = (struct file_system_type *) NULL;
62 struct vfsmount *vfsmntlist = (struct vfsmount *) NULL;
63 static struct vfsmount *vfsmnttail = (struct vfsmount *) NULL,
64 *mru_vfsmnt = (struct vfsmount *) NULL;
66 /*
67 * This part handles the management of the list of mounted filesystems.
69 struct vfsmount *lookup_vfsmnt(kdev_t dev)
71 struct vfsmount *lptr;
73 if (vfsmntlist == (struct vfsmount *)NULL)
74 return ((struct vfsmount *)NULL);
76 if (mru_vfsmnt != (struct vfsmount *)NULL &&
77 mru_vfsmnt->mnt_dev == dev)
78 return (mru_vfsmnt);
80 for (lptr = vfsmntlist;
81 lptr != (struct vfsmount *)NULL;
82 lptr = lptr->mnt_next)
83 if (lptr->mnt_dev == dev) {
84 mru_vfsmnt = lptr;
85 return (lptr);
88 return ((struct vfsmount *)NULL);
89 /* NOTREACHED */
92 static struct vfsmount *add_vfsmnt(struct super_block *sb,
93 const char *dev_name, const char *dir_name)
95 struct vfsmount *lptr;
96 char *tmp, *name;
98 lptr = (struct vfsmount *)kmalloc(sizeof(struct vfsmount), GFP_KERNEL);
99 if (!lptr)
100 goto out;
101 memset(lptr, 0, sizeof(struct vfsmount));
103 lptr->mnt_sb = sb;
104 lptr->mnt_dev = sb->s_dev;
105 lptr->mnt_flags = sb->s_flags;
107 sema_init(&lptr->mnt_dquot.dqio_sem, 1);
108 sema_init(&lptr->mnt_dquot.dqoff_sem, 1);
109 lptr->mnt_dquot.flags = 0;
111 /* N.B. Is it really OK to have a vfsmount without names? */
112 if (dev_name && !IS_ERR(tmp = getname(dev_name))) {
113 name = (char *) kmalloc(strlen(tmp)+1, GFP_KERNEL);
114 if (name) {
115 strcpy(name, tmp);
116 lptr->mnt_devname = name;
118 putname(tmp);
120 if (dir_name && !IS_ERR(tmp = getname(dir_name))) {
121 name = (char *) kmalloc(strlen(tmp)+1, GFP_KERNEL);
122 if (name) {
123 strcpy(name, tmp);
124 lptr->mnt_dirname = name;
126 putname(tmp);
129 if (vfsmntlist == (struct vfsmount *)NULL) {
130 vfsmntlist = vfsmnttail = lptr;
131 } else {
132 vfsmnttail->mnt_next = lptr;
133 vfsmnttail = lptr;
135 out:
136 return lptr;
139 void remove_vfsmnt(kdev_t dev)
141 struct vfsmount *lptr, *tofree;
143 if (vfsmntlist == (struct vfsmount *)NULL)
144 return;
145 lptr = vfsmntlist;
146 if (lptr->mnt_dev == dev) {
147 tofree = lptr;
148 vfsmntlist = lptr->mnt_next;
149 if (vfsmnttail->mnt_dev == dev)
150 vfsmnttail = vfsmntlist;
151 } else {
152 while (lptr->mnt_next != (struct vfsmount *)NULL) {
153 if (lptr->mnt_next->mnt_dev == dev)
154 break;
155 lptr = lptr->mnt_next;
157 tofree = lptr->mnt_next;
158 if (tofree == (struct vfsmount *)NULL)
159 return;
160 lptr->mnt_next = lptr->mnt_next->mnt_next;
161 if (vfsmnttail->mnt_dev == dev)
162 vfsmnttail = lptr;
164 if (tofree == mru_vfsmnt)
165 mru_vfsmnt = NULL;
166 kfree(tofree->mnt_devname);
167 kfree(tofree->mnt_dirname);
168 kfree_s(tofree, sizeof(struct vfsmount));
171 int register_filesystem(struct file_system_type * fs)
173 struct file_system_type ** tmp;
175 if (!fs)
176 return -EINVAL;
177 if (fs->next)
178 return -EBUSY;
179 tmp = &file_systems;
180 while (*tmp) {
181 if (strcmp((*tmp)->name, fs->name) == 0)
182 return -EBUSY;
183 tmp = &(*tmp)->next;
185 *tmp = fs;
186 return 0;
189 #ifdef CONFIG_MODULES
190 int unregister_filesystem(struct file_system_type * fs)
192 struct file_system_type ** tmp;
194 tmp = &file_systems;
195 while (*tmp) {
196 if (fs == *tmp) {
197 *tmp = fs->next;
198 fs->next = NULL;
199 return 0;
201 tmp = &(*tmp)->next;
203 return -EINVAL;
205 #endif
207 static int fs_index(const char * __name)
209 struct file_system_type * tmp;
210 char * name;
211 int err, index;
213 name = getname(__name);
214 err = PTR_ERR(name);
215 if (IS_ERR(name))
216 return err;
218 index = 0;
219 for (tmp = file_systems ; tmp ; tmp = tmp->next) {
220 if (strcmp(tmp->name, name) == 0) {
221 putname(name);
222 return index;
224 index++;
226 putname(name);
227 return -EINVAL;
230 static int fs_name(unsigned int index, char * buf)
232 struct file_system_type * tmp;
233 int len;
235 tmp = file_systems;
236 while (tmp && index > 0) {
237 tmp = tmp->next;
238 index--;
240 if (!tmp)
241 return -EINVAL;
242 len = strlen(tmp->name) + 1;
243 return copy_to_user(buf, tmp->name, len) ? -EFAULT : 0;
246 static int fs_maxindex(void)
248 struct file_system_type * tmp;
249 int index;
251 index = 0;
252 for (tmp = file_systems ; tmp ; tmp = tmp->next)
253 index++;
254 return index;
258 * Whee.. Weird sysv syscall.
260 asmlinkage long sys_sysfs(int option, unsigned long arg1, unsigned long arg2)
262 int retval = -EINVAL;
264 lock_kernel();
265 switch (option) {
266 case 1:
267 retval = fs_index((const char *) arg1);
268 break;
270 case 2:
271 retval = fs_name(arg1, (char *) arg2);
272 break;
274 case 3:
275 retval = fs_maxindex();
276 break;
278 unlock_kernel();
279 return retval;
282 static struct proc_fs_info {
283 int flag;
284 char *str;
285 } fs_info[] = {
286 { MS_NOEXEC, ",noexec" },
287 { MS_NOSUID, ",nosuid" },
288 { MS_NODEV, ",nodev" },
289 { MS_SYNCHRONOUS, ",sync" },
290 { MS_MANDLOCK, ",mand" },
291 { MS_NOATIME, ",noatime" },
292 { MS_NODIRATIME, ",nodiratime" },
293 #ifdef MS_NOSUB /* Can't find this except in mount.c */
294 { MS_NOSUB, ",nosub" },
295 #endif
296 { 0, NULL }
299 static struct proc_nfs_info {
300 int flag;
301 char *str;
302 } nfs_info[] = {
303 { NFS_MOUNT_SOFT, ",soft" },
304 { NFS_MOUNT_INTR, ",intr" },
305 { NFS_MOUNT_POSIX, ",posix" },
306 { NFS_MOUNT_NOCTO, ",nocto" },
307 { NFS_MOUNT_NOAC, ",noac" },
308 { 0, NULL }
311 int get_filesystem_info( char *buf )
313 struct vfsmount *tmp = vfsmntlist;
314 struct proc_fs_info *fs_infop;
315 struct proc_nfs_info *nfs_infop;
316 struct nfs_server *nfss;
317 int len = 0;
319 while ( tmp && len < PAGE_SIZE - 160)
321 len += sprintf( buf + len, "%s %s %s %s",
322 tmp->mnt_devname, tmp->mnt_dirname, tmp->mnt_sb->s_type->name,
323 tmp->mnt_flags & MS_RDONLY ? "ro" : "rw" );
324 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
325 if (tmp->mnt_flags & fs_infop->flag) {
326 strcpy(buf + len, fs_infop->str);
327 len += strlen(fs_infop->str);
330 if (!strcmp("nfs", tmp->mnt_sb->s_type->name)) {
331 nfss = &tmp->mnt_sb->u.nfs_sb.s_server;
332 if (nfss->rsize != NFS_DEF_FILE_IO_BUFFER_SIZE) {
333 len += sprintf(buf+len, ",rsize=%d",
334 nfss->rsize);
336 if (nfss->wsize != NFS_DEF_FILE_IO_BUFFER_SIZE) {
337 len += sprintf(buf+len, ",wsize=%d",
338 nfss->wsize);
340 #if 0
341 if (nfss->timeo != 7*HZ/10) {
342 len += sprintf(buf+len, ",timeo=%d",
343 nfss->timeo*10/HZ);
345 if (nfss->retrans != 3) {
346 len += sprintf(buf+len, ",retrans=%d",
347 nfss->retrans);
349 #endif
350 if (nfss->acregmin != 3*HZ) {
351 len += sprintf(buf+len, ",acregmin=%d",
352 nfss->acregmin/HZ);
354 if (nfss->acregmax != 60*HZ) {
355 len += sprintf(buf+len, ",acregmax=%d",
356 nfss->acregmax/HZ);
358 if (nfss->acdirmin != 30*HZ) {
359 len += sprintf(buf+len, ",acdirmin=%d",
360 nfss->acdirmin/HZ);
362 if (nfss->acdirmax != 60*HZ) {
363 len += sprintf(buf+len, ",acdirmax=%d",
364 nfss->acdirmax/HZ);
366 for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
367 if (nfss->flags & nfs_infop->flag) {
368 strcpy(buf + len, nfs_infop->str);
369 len += strlen(nfs_infop->str);
372 len += sprintf(buf+len, ",addr=%s",
373 nfss->hostname);
375 len += sprintf( buf + len, " 0 0\n" );
376 tmp = tmp->mnt_next;
379 return len;
382 int get_filesystem_list(char * buf)
384 int len = 0;
385 struct file_system_type * tmp;
387 tmp = file_systems;
388 while (tmp && len < PAGE_SIZE - 80) {
389 len += sprintf(buf+len, "%s\t%s\n",
390 (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
391 tmp->name);
392 tmp = tmp->next;
394 return len;
397 struct file_system_type *get_fs_type(const char *name)
399 struct file_system_type * fs = file_systems;
401 if (!name)
402 return fs;
403 for (fs = file_systems; fs && strcmp(fs->name, name); fs = fs->next)
405 #ifdef CONFIG_KMOD
406 if (!fs && (request_module(name) == 0)) {
407 for (fs = file_systems; fs && strcmp(fs->name, name); fs = fs->next)
410 #endif
412 return fs;
415 void __wait_on_super(struct super_block * sb)
417 DECLARE_WAITQUEUE(wait, current);
419 add_wait_queue(&sb->s_wait, &wait);
420 repeat:
421 set_current_state(TASK_UNINTERRUPTIBLE);
422 if (sb->s_lock) {
423 schedule();
424 goto repeat;
426 remove_wait_queue(&sb->s_wait, &wait);
427 current->state = TASK_RUNNING;
431 * Note: check the dirty flag before waiting, so we don't
432 * hold up the sync while mounting a device. (The newly
433 * mounted device won't need syncing.)
435 void sync_supers(kdev_t dev)
437 struct super_block * sb;
439 for (sb = sb_entry(super_blocks.next);
440 sb != sb_entry(&super_blocks);
441 sb = sb_entry(sb->s_list.next)) {
442 if (!sb->s_dev)
443 continue;
444 if (dev && sb->s_dev != dev)
445 continue;
446 if (!sb->s_dirt)
447 continue;
448 /* N.B. Should lock the superblock while writing */
449 wait_on_super(sb);
450 if (!sb->s_dev || !sb->s_dirt)
451 continue;
452 if (dev && (dev != sb->s_dev))
453 continue;
454 if (sb->s_op && sb->s_op->write_super)
455 sb->s_op->write_super(sb);
459 struct super_block * get_super(kdev_t dev)
461 struct super_block * s;
463 if (!dev)
464 return NULL;
465 restart:
466 s = sb_entry(super_blocks.next);
467 while (s != sb_entry(&super_blocks))
468 if (s->s_dev == dev) {
469 wait_on_super(s);
470 if (s->s_dev == dev)
471 return s;
472 goto restart;
473 } else
474 s = sb_entry(s->s_list.next);
475 return NULL;
478 asmlinkage long sys_ustat(dev_t dev, struct ustat * ubuf)
480 struct super_block *s;
481 struct ustat tmp;
482 struct statfs sbuf;
483 mm_segment_t old_fs;
484 int err = -EINVAL;
486 lock_kernel();
487 s = get_super(to_kdev_t(dev));
488 if (s == NULL)
489 goto out;
490 err = -ENOSYS;
491 if (!(s->s_op->statfs))
492 goto out;
494 old_fs = get_fs();
495 set_fs(get_ds());
496 s->s_op->statfs(s,&sbuf,sizeof(struct statfs));
497 set_fs(old_fs);
499 memset(&tmp,0,sizeof(struct ustat));
500 tmp.f_tfree = sbuf.f_bfree;
501 tmp.f_tinode = sbuf.f_ffree;
503 err = copy_to_user(ubuf,&tmp,sizeof(struct ustat)) ? -EFAULT : 0;
504 out:
505 unlock_kernel();
506 return err;
510 * Find a super_block with no device assigned.
512 struct super_block *get_empty_super(void)
514 struct super_block *s;
516 for (s = sb_entry(super_blocks.next);
517 s != sb_entry(&super_blocks);
518 s = sb_entry(s->s_list.next)) {
519 if (s->s_dev)
520 continue;
521 if (!s->s_lock)
522 return s;
523 printk("VFS: empty superblock %p locked!\n", s);
525 /* Need a new one... */
526 if (nr_super_blocks >= max_super_blocks)
527 return NULL;
528 s = kmalloc(sizeof(struct super_block), GFP_USER);
529 if (s) {
530 nr_super_blocks++;
531 memset(s, 0, sizeof(struct super_block));
532 INIT_LIST_HEAD(&s->s_dirty);
533 list_add (&s->s_list, super_blocks.prev);
534 init_waitqueue_head(&s->s_wait);
535 INIT_LIST_HEAD(&s->s_files);
537 return s;
540 static struct super_block * read_super(kdev_t dev,const char *name,int flags,
541 void *data, int silent)
543 struct super_block * s;
544 struct file_system_type *type;
546 if (!dev)
547 goto out_null;
548 check_disk_change(dev);
549 s = get_super(dev);
550 if (s)
551 goto out;
553 type = get_fs_type(name);
554 if (!type) {
555 printk("VFS: on device %s: get_fs_type(%s) failed\n",
556 kdevname(dev), name);
557 goto out;
559 s = get_empty_super();
560 if (!s)
561 goto out;
562 s->s_dev = dev;
563 s->s_flags = flags;
564 s->s_dirt = 0;
565 sema_init(&s->s_vfs_rename_sem,1);
566 /* N.B. Should lock superblock now ... */
567 if (!type->read_super(s, data, silent))
568 goto out_fail;
569 s->s_dev = dev; /* N.B. why do this again?? */
570 s->s_rd_only = 0;
571 s->s_type = type;
572 out:
573 return s;
575 /* N.B. s_dev should be cleared in type->read_super */
576 out_fail:
577 s->s_dev = 0;
578 out_null:
579 s = NULL;
580 goto out;
584 * Unnamed block devices are dummy devices used by virtual
585 * filesystems which don't use real block-devices. -- jrs
588 static unsigned int unnamed_dev_in_use[256/(8*sizeof(unsigned int))] = { 0, };
590 kdev_t get_unnamed_dev(void)
592 int i;
594 for (i = 1; i < 256; i++) {
595 if (!test_and_set_bit(i,unnamed_dev_in_use))
596 return MKDEV(UNNAMED_MAJOR, i);
598 return 0;
601 void put_unnamed_dev(kdev_t dev)
603 if (!dev || MAJOR(dev) != UNNAMED_MAJOR)
604 return;
605 if (test_and_clear_bit(MINOR(dev), unnamed_dev_in_use))
606 return;
607 printk("VFS: put_unnamed_dev: freeing unused device %s\n",
608 kdevname(dev));
611 static int d_umount(struct super_block * sb)
613 struct dentry * root = sb->s_root;
614 struct dentry * covered = root->d_covers;
616 if (root->d_count != 1)
617 return -EBUSY;
619 if (root->d_inode->i_state)
620 return -EBUSY;
622 sb->s_root = NULL;
624 if (covered != root) {
625 root->d_covers = root;
626 covered->d_mounts = covered;
627 dput(covered);
629 dput(root);
630 return 0;
633 static void d_mount(struct dentry *covered, struct dentry *dentry)
635 if (covered->d_mounts != covered) {
636 printk("VFS: mount - already mounted\n");
637 return;
639 covered->d_mounts = dentry;
640 dentry->d_covers = covered;
643 static int do_umount(kdev_t dev, int unmount_root, int flags)
645 struct super_block * sb;
646 int retval;
648 retval = -ENOENT;
649 sb = get_super(dev);
650 if (!sb || !sb->s_root)
651 goto out;
654 * Before checking whether the filesystem is still busy,
655 * make sure the kernel doesn't hold any quota files open
656 * on the device. If the umount fails, too bad -- there
657 * are no quotas running any more. Just turn them on again.
659 DQUOT_OFF(dev);
660 acct_auto_close(dev);
663 * If we may have to abort operations to get out of this
664 * mount, and they will themselves hold resources we must
665 * allow the fs to do things. In the Unix tradition of
666 * 'Gee thats tricky lets do it in userspace' the umount_begin
667 * might fail to complete on the first run through as other tasks
668 * must return, and the like. Thats for the mount program to worry
669 * about for the moment.
672 if( (flags&MNT_FORCE) && sb->s_op->umount_begin)
673 sb->s_op->umount_begin(sb);
676 * Shrink dcache, then fsync. This guarantees that if the
677 * filesystem is quiescent at this point, then (a) only the
678 * root entry should be in use and (b) that root entry is
679 * clean.
681 shrink_dcache_sb(sb);
682 fsync_dev(dev);
684 if (dev==ROOT_DEV && !unmount_root) {
686 * Special case for "unmounting" root ...
687 * we just try to remount it readonly.
689 retval = 0;
690 if (!(sb->s_flags & MS_RDONLY))
691 retval = do_remount_sb(sb, MS_RDONLY, 0);
692 return retval;
695 retval = d_umount(sb);
696 if (retval)
697 goto out;
699 if (sb->s_op) {
700 if (sb->s_op->write_super && sb->s_dirt)
701 sb->s_op->write_super(sb);
704 lock_super(sb);
705 if (sb->s_op) {
706 if (sb->s_op->put_super)
707 sb->s_op->put_super(sb);
710 /* Forget any remaining inodes */
711 if (invalidate_inodes(sb)) {
712 printk("VFS: Busy inodes after unmount. "
713 "Self-destruct in 5 seconds. Have a nice day...\n");
716 sb->s_dev = 0; /* Free the superblock */
717 unlock_super(sb);
719 remove_vfsmnt(dev);
720 out:
721 return retval;
724 static int umount_dev(kdev_t dev, int flags)
726 int retval;
727 struct inode * inode = get_empty_inode();
729 retval = -ENOMEM;
730 if (!inode)
731 goto out;
733 inode->i_rdev = dev;
734 retval = -ENXIO;
735 if (MAJOR(dev) >= MAX_BLKDEV)
736 goto out_iput;
738 fsync_dev(dev);
740 down(&mount_sem);
742 retval = do_umount(dev, 0, flags);
743 if (!retval) {
744 fsync_dev(dev);
745 if (dev != ROOT_DEV) {
746 blkdev_release(inode);
747 put_unnamed_dev(dev);
751 up(&mount_sem);
752 out_iput:
753 iput(inode);
754 out:
755 return retval;
759 * Now umount can handle mount points as well as block devices.
760 * This is important for filesystems which use unnamed block devices.
762 * There is a little kludge here with the dummy_inode. The current
763 * vfs release functions only use the r_dev field in the inode so
764 * we give them the info they need without using a real inode.
765 * If any other fields are ever needed by any block device release
766 * functions, they should be faked here. -- jrs
768 * We now support a flag for forced unmount like the other 'big iron'
769 * unixes. Our API is identical to OSF/1 to avoid making a mess of AMD
772 asmlinkage long sys_umount(char * name, int flags)
774 struct dentry * dentry;
775 int retval;
777 if (!capable(CAP_SYS_ADMIN))
778 return -EPERM;
780 lock_kernel();
781 dentry = namei(name);
782 retval = PTR_ERR(dentry);
783 if (!IS_ERR(dentry)) {
784 struct inode * inode = dentry->d_inode;
785 kdev_t dev = inode->i_rdev;
787 retval = 0;
788 if (S_ISBLK(inode->i_mode)) {
789 if (IS_NODEV(inode))
790 retval = -EACCES;
791 } else {
792 struct super_block *sb = inode->i_sb;
793 retval = -EINVAL;
794 if (sb && inode == sb->s_root->d_inode) {
795 dev = sb->s_dev;
796 retval = 0;
799 dput(dentry);
801 if (!retval)
802 retval = umount_dev(dev, flags);
804 unlock_kernel();
805 return retval;
809 * The 2.0 compatible umount. No flags.
812 asmlinkage long sys_oldumount(char * name)
814 return sys_umount(name,0);
818 * Check whether we can mount the specified device.
820 int fs_may_mount(kdev_t dev)
822 struct super_block * sb = get_super(dev);
823 int busy;
825 busy = sb && sb->s_root &&
826 (sb->s_root->d_count != 1 || sb->s_root->d_covers != sb->s_root);
827 return !busy;
831 * do_mount() does the actual mounting after sys_mount has done the ugly
832 * parameter parsing. When enough time has gone by, and everything uses the
833 * new mount() parameters, sys_mount() can then be cleaned up.
835 * We cannot mount a filesystem if it has active, used, or dirty inodes.
836 * We also have to flush all inode-data for this device, as the new mount
837 * might need new info.
839 * [21-Mar-97] T.Schoebel-Theuer: Now this can be overridden when
840 * supplying a leading "!" before the dir_name, allowing "stacks" of
841 * mounted filesystems. The stacking will only influence any pathname lookups
842 * _after_ the mount, but open file descriptors or working directories that
843 * are now covered remain valid. For example, when you overmount /home, any
844 * process with old cwd /home/joe will continue to use the old versions,
845 * as long as relative paths are used, but absolute paths like /home/joe/xxx
846 * will go to the new "top of stack" version. In general, crossing a
847 * mount point will always go to the top of stack element.
848 * Anyone using this new feature must know what he/she is doing.
851 int do_mount(kdev_t dev, const char * dev_name, const char * dir_name,
852 const char * type, int flags, void * data)
854 struct dentry * dir_d;
855 struct super_block * sb;
856 struct vfsmount *vfsmnt;
857 int error;
859 error = -EACCES;
860 if (!(flags & MS_RDONLY) && dev && is_read_only(dev))
861 goto out;
864 * Do the lookup first to force automounting.
866 dir_d = namei(dir_name);
867 error = PTR_ERR(dir_d);
868 if (IS_ERR(dir_d))
869 goto out;
871 down(&mount_sem);
872 error = -ENOTDIR;
873 if (!S_ISDIR(dir_d->d_inode->i_mode))
874 goto dput_and_out;
876 error = -EBUSY;
877 if (dir_d->d_covers != dir_d)
878 goto dput_and_out;
881 * Note: If the superblock already exists,
882 * read_super just does a get_super().
884 error = -EINVAL;
885 sb = read_super(dev, type, flags, data, 0);
886 if (!sb)
887 goto dput_and_out;
890 * We may have slept while reading the super block,
891 * so we check afterwards whether it's safe to mount.
893 error = -EBUSY;
894 if (!fs_may_mount(dev))
895 goto dput_and_out;
897 error = -ENOMEM;
898 vfsmnt = add_vfsmnt(sb, dev_name, dir_name);
899 if (vfsmnt) {
900 d_mount(dget(dir_d), sb->s_root);
901 error = 0;
904 dput_and_out:
905 dput(dir_d);
906 up(&mount_sem);
907 out:
908 return error;
913 * Alters the mount flags of a mounted file system. Only the mount point
914 * is used as a reference - file system type and the device are ignored.
915 * FS-specific mount options can't be altered by remounting.
918 static int do_remount_sb(struct super_block *sb, int flags, char *data)
920 int retval;
921 struct vfsmount *vfsmnt;
923 if (!(flags & MS_RDONLY) && sb->s_dev && is_read_only(sb->s_dev))
924 return -EACCES;
925 /*flags |= MS_RDONLY;*/
926 /* If we are remounting RDONLY, make sure there are no rw files open */
927 if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY))
928 if (!fs_may_remount_ro(sb))
929 return -EBUSY;
930 if (sb->s_op && sb->s_op->remount_fs) {
931 retval = sb->s_op->remount_fs(sb, &flags, data);
932 if (retval)
933 return retval;
935 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
936 vfsmnt = lookup_vfsmnt(sb->s_dev);
937 if (vfsmnt)
938 vfsmnt->mnt_flags = sb->s_flags;
941 * Invalidate the inodes, as some mount options may be changed.
942 * N.B. If we are changing media, we should check the return
943 * from invalidate_inodes ... can't allow _any_ open files.
945 invalidate_inodes(sb);
947 return 0;
950 static int do_remount(const char *dir,int flags,char *data)
952 struct dentry *dentry;
953 int retval;
955 dentry = namei(dir);
956 retval = PTR_ERR(dentry);
957 if (!IS_ERR(dentry)) {
958 struct super_block * sb = dentry->d_inode->i_sb;
960 retval = -ENODEV;
961 if (sb) {
962 retval = -EINVAL;
963 if (dentry == sb->s_root) {
965 * Shrink the dcache and sync the device.
967 shrink_dcache_sb(sb);
968 fsync_dev(sb->s_dev);
969 if (flags & MS_RDONLY)
970 acct_auto_close(sb->s_dev);
971 retval = do_remount_sb(sb, flags, data);
974 dput(dentry);
976 return retval;
979 static int copy_mount_options (const void * data, unsigned long *where)
981 int i;
982 unsigned long page;
983 struct vm_area_struct * vma;
985 *where = 0;
986 if (!data)
987 return 0;
989 vma = find_vma(current->mm, (unsigned long) data);
990 if (!vma || (unsigned long) data < vma->vm_start)
991 return -EFAULT;
992 if (!(vma->vm_flags & VM_READ))
993 return -EFAULT;
994 i = vma->vm_end - (unsigned long) data;
995 if (PAGE_SIZE <= (unsigned long) i)
996 i = PAGE_SIZE-1;
997 if (!(page = __get_free_page(GFP_KERNEL))) {
998 return -ENOMEM;
1000 if (copy_from_user((void *) page,data,i)) {
1001 free_page(page);
1002 return -EFAULT;
1004 *where = page;
1005 return 0;
1009 * Flags is a 16-bit value that allows up to 16 non-fs dependent flags to
1010 * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
1012 * data is a (void *) that can point to any structure up to
1013 * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
1014 * information (or be NULL).
1016 * NOTE! As old versions of mount() didn't use this setup, the flags
1017 * have to have a special 16-bit magic number in the high word:
1018 * 0xC0ED. If this magic word isn't present, the flags and data info
1019 * aren't used, as the syscall assumes we are talking to an older
1020 * version that didn't understand them.
1022 asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type,
1023 unsigned long new_flags, void * data)
1025 struct file_system_type * fstype;
1026 struct dentry * dentry = NULL;
1027 struct inode * inode = NULL;
1028 kdev_t dev;
1029 int retval;
1030 unsigned long flags = 0;
1031 unsigned long page = 0;
1032 struct file dummy; /* allows read-write or read-only flag */
1034 if (!capable(CAP_SYS_ADMIN))
1035 return -EPERM;
1036 lock_kernel();
1037 if ((new_flags &
1038 (MS_MGC_MSK | MS_REMOUNT)) == (MS_MGC_VAL | MS_REMOUNT)) {
1039 retval = copy_mount_options (data, &page);
1040 if (retval < 0)
1041 goto out;
1042 retval = do_remount(dir_name,
1043 new_flags & ~MS_MGC_MSK & ~MS_REMOUNT,
1044 (char *) page);
1045 free_page(page);
1046 goto out;
1049 retval = copy_mount_options (type, &page);
1050 if (retval < 0)
1051 goto out;
1052 fstype = get_fs_type((char *) page);
1053 free_page(page);
1054 retval = -ENODEV;
1055 if (!fstype)
1056 goto out;
1058 memset(&dummy, 0, sizeof(dummy));
1059 if (fstype->fs_flags & FS_REQUIRES_DEV) {
1060 dentry = namei(dev_name);
1061 retval = PTR_ERR(dentry);
1062 if (IS_ERR(dentry))
1063 goto out;
1065 inode = dentry->d_inode;
1066 retval = -ENOTBLK;
1067 if (!S_ISBLK(inode->i_mode))
1068 goto dput_and_out;
1070 retval = -EACCES;
1071 if (IS_NODEV(inode))
1072 goto dput_and_out;
1074 dev = inode->i_rdev;
1075 retval = -ENXIO;
1076 if (MAJOR(dev) >= MAX_BLKDEV)
1077 goto dput_and_out;
1079 retval = -ENOTBLK;
1080 dummy.f_op = get_blkfops(MAJOR(dev));
1081 if (!dummy.f_op)
1082 goto dput_and_out;
1084 if (dummy.f_op->open) {
1085 dummy.f_dentry = dentry;
1086 dummy.f_mode = (new_flags & MS_RDONLY) ? 1 : 3;
1087 retval = dummy.f_op->open(inode, &dummy);
1088 if (retval)
1089 goto dput_and_out;
1092 } else {
1093 retval = -EMFILE;
1094 if (!(dev = get_unnamed_dev()))
1095 goto out;
1098 page = 0;
1099 if ((new_flags & MS_MGC_MSK) == MS_MGC_VAL) {
1100 flags = new_flags & ~MS_MGC_MSK;
1101 retval = copy_mount_options(data, &page);
1102 if (retval < 0)
1103 goto clean_up;
1105 retval = do_mount(dev, dev_name, dir_name, fstype->name, flags,
1106 (void *) page);
1107 free_page(page);
1108 if (retval)
1109 goto clean_up;
1111 dput_and_out:
1112 dput(dentry);
1113 out:
1114 unlock_kernel();
1115 return retval;
1117 clean_up:
1118 if (dummy.f_op) {
1119 if (dummy.f_op->release)
1120 dummy.f_op->release(inode, NULL);
1121 } else
1122 put_unnamed_dev(dev);
1123 goto dput_and_out;
1126 void __init mount_root(void)
1128 struct file_system_type * fs_type;
1129 struct super_block * sb;
1130 struct vfsmount *vfsmnt;
1131 struct inode * d_inode = NULL;
1132 struct file filp;
1133 int retval;
1135 #ifdef CONFIG_ROOT_NFS
1136 if (MAJOR(ROOT_DEV) == UNNAMED_MAJOR) {
1137 ROOT_DEV = 0;
1138 if ((fs_type = get_fs_type("nfs"))) {
1139 sb = get_empty_super(); /* "can't fail" */
1140 sb->s_dev = get_unnamed_dev();
1141 sb->s_flags = root_mountflags;
1142 sema_init(&sb->s_vfs_rename_sem,1);
1143 vfsmnt = add_vfsmnt(sb, "/dev/root", "/");
1144 if (vfsmnt) {
1145 if (nfs_root_mount(sb) >= 0) {
1146 sb->s_dirt = 0;
1147 sb->s_type = fs_type;
1148 current->fs->root = dget(sb->s_root);
1149 current->fs->pwd = dget(sb->s_root);
1150 ROOT_DEV = sb->s_dev;
1151 printk (KERN_NOTICE "VFS: Mounted root (NFS filesystem)%s.\n", (sb->s_flags & MS_RDONLY) ? " readonly" : "");
1152 return;
1154 remove_vfsmnt(sb->s_dev);
1156 put_unnamed_dev(sb->s_dev);
1157 sb->s_dev = 0;
1159 if (!ROOT_DEV) {
1160 printk(KERN_ERR "VFS: Unable to mount root fs via NFS, trying floppy.\n");
1161 ROOT_DEV = MKDEV(FLOPPY_MAJOR, 0);
1164 #endif
1166 #ifdef CONFIG_BLK_DEV_FD
1167 if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) {
1168 #ifdef CONFIG_BLK_DEV_RAM
1169 extern int rd_doload;
1170 extern void rd_load_secondary(void);
1171 #endif
1172 floppy_eject();
1173 #ifndef CONFIG_BLK_DEV_RAM
1174 printk(KERN_NOTICE "(Warning, this kernel has no ramdisk support)\n");
1175 #else
1176 /* rd_doload is 2 for a dual initrd/ramload setup */
1177 if(rd_doload==2)
1178 rd_load_secondary();
1179 else
1180 #endif
1182 printk(KERN_NOTICE "VFS: Insert root floppy and press ENTER\n");
1183 wait_for_keypress();
1186 #endif
1188 memset(&filp, 0, sizeof(filp));
1189 d_inode = get_empty_inode();
1190 if (!d_inode)
1191 panic(__FUNCTION__ ": unable to allocate root inode");
1192 d_inode->i_rdev = ROOT_DEV;
1193 filp.f_dentry = NULL;
1194 if ( root_mountflags & MS_RDONLY)
1195 filp.f_mode = 1; /* read only */
1196 else
1197 filp.f_mode = 3; /* read write */
1198 retval = blkdev_open(d_inode, &filp);
1199 if (retval == -EROFS) {
1200 root_mountflags |= MS_RDONLY;
1201 filp.f_mode = 1;
1202 retval = blkdev_open(d_inode, &filp);
1204 iput(d_inode);
1205 if (retval)
1207 * Allow the user to distinguish between failed open
1208 * and bad superblock on root device.
1210 printk("VFS: Cannot open root device %s\n",
1211 kdevname(ROOT_DEV));
1212 else for (fs_type = file_systems ; fs_type ; fs_type = fs_type->next) {
1213 if (!(fs_type->fs_flags & FS_REQUIRES_DEV))
1214 continue;
1215 sb = read_super(ROOT_DEV,fs_type->name,root_mountflags,NULL,1);
1216 if (sb) {
1217 sb->s_flags = root_mountflags;
1218 current->fs->root = dget(sb->s_root);
1219 current->fs->pwd = dget(sb->s_root);
1220 printk ("VFS: Mounted root (%s filesystem)%s.\n",
1221 fs_type->name,
1222 (sb->s_flags & MS_RDONLY) ? " readonly" : "");
1223 vfsmnt = add_vfsmnt(sb, "/dev/root", "/");
1224 if (vfsmnt)
1225 return;
1226 panic("VFS: add_vfsmnt failed for root fs");
1229 panic("VFS: Unable to mount root fs on %s",
1230 kdevname(ROOT_DEV));
1234 #ifdef CONFIG_BLK_DEV_INITRD
1236 int __init change_root(kdev_t new_root_dev,const char *put_old)
1238 kdev_t old_root_dev;
1239 struct vfsmount *vfsmnt;
1240 struct dentry *old_root,*old_pwd,*dir_d = NULL;
1241 int error;
1243 old_root = current->fs->root;
1244 old_pwd = current->fs->pwd;
1245 old_root_dev = ROOT_DEV;
1246 if (!fs_may_mount(new_root_dev)) {
1247 printk(KERN_CRIT "New root is busy. Staying in initrd.\n");
1248 return -EBUSY;
1250 ROOT_DEV = new_root_dev;
1251 mount_root();
1252 dput(old_root);
1253 dput(old_pwd);
1254 #if 1
1255 shrink_dcache();
1256 printk("change_root: old root has d_count=%d\n", old_root->d_count);
1257 #endif
1259 * Get the new mount directory
1261 dir_d = lookup_dentry(put_old, NULL, 1);
1262 if (IS_ERR(dir_d)) {
1263 error = PTR_ERR(dir_d);
1264 } else if (!dir_d->d_inode) {
1265 dput(dir_d);
1266 error = -ENOENT;
1267 } else {
1268 error = 0;
1270 if (!error && dir_d->d_covers != dir_d) {
1271 dput(dir_d);
1272 error = -EBUSY;
1274 if (!error && !S_ISDIR(dir_d->d_inode->i_mode)) {
1275 dput(dir_d);
1276 error = -ENOTDIR;
1278 if (error) {
1279 int umount_error;
1281 printk(KERN_NOTICE "Trying to unmount old root ... ");
1282 umount_error = do_umount(old_root_dev,1, 0);
1283 if (!umount_error) {
1284 printk("okay\n");
1285 invalidate_buffers(old_root_dev);
1286 return 0;
1288 printk(KERN_ERR "error %d\n",umount_error);
1289 return error;
1291 remove_vfsmnt(old_root_dev);
1292 vfsmnt = add_vfsmnt(old_root->d_sb, "/dev/root.old", put_old);
1293 if (vfsmnt) {
1294 d_mount(dir_d,old_root);
1295 return 0;
1297 printk(KERN_CRIT "Trouble: add_vfsmnt failed\n");
1298 return -ENOMEM;
1301 #endif