Linux 2.1.81pre1
[davej-history.git] / fs / super.c
blobcefd90c9bd23c17a40b735019b19e83e4130ed41
1 /*
2 * linux/fs/super.c
4 * Copyright (C) 1991, 1992 Linus Torvalds
6 * super.c contains code to handle: - mount structures
7 * - super-block tables.
8 * - mount systemcall
9 * - umount systemcall
11 * Added options to /proc/mounts
12 * Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996.
14 * GK 2/5/95 - Changed to support mounting the root fs via NFS
16 * Added kerneld support: Jacques Gelinas and Bjorn Ekwall
17 * Added change_root: Werner Almesberger & Hans Lermen, Feb '96
20 #include <linux/config.h>
21 #include <linux/sched.h>
22 #include <linux/kernel.h>
23 #include <linux/mount.h>
24 #include <linux/malloc.h>
25 #include <linux/major.h>
26 #include <linux/stat.h>
27 #include <linux/errno.h>
28 #include <linux/string.h>
29 #include <linux/locks.h>
30 #include <linux/mm.h>
31 #include <linux/smp.h>
32 #include <linux/smp_lock.h>
33 #include <linux/fd.h>
34 #include <linux/init.h>
36 #include <asm/system.h>
37 #include <asm/uaccess.h>
38 #include <asm/bitops.h>
40 #ifdef CONFIG_KERNELD
41 #include <linux/kerneld.h>
42 #endif
44 #include <linux/nfs_fs.h>
45 #include <linux/nfs_fs_sb.h>
46 #include <linux/nfs_mount.h>
49 * We use a semaphore to synchronize all mount/umount
50 * activity - imagine the mess if we have a race between
51 * unmounting a filesystem and re-mounting it (or something
52 * else).
54 static struct semaphore mount_sem = MUTEX;
56 extern void wait_for_keypress(void);
57 extern struct file_operations * get_blkfops(unsigned int major);
59 extern int root_mountflags;
61 static int do_remount_sb(struct super_block *sb, int flags, char * data);
63 /* this is initialized in init/main.c */
64 kdev_t ROOT_DEV;
66 struct super_block super_blocks[NR_SUPER];
67 static struct file_system_type *file_systems = (struct file_system_type *) NULL;
68 struct vfsmount *vfsmntlist = (struct vfsmount *) NULL;
69 static struct vfsmount *vfsmnttail = (struct vfsmount *) NULL,
70 *mru_vfsmnt = (struct vfsmount *) NULL;
72 /*
73 * This part handles the management of the list of mounted filesystems.
75 struct vfsmount *lookup_vfsmnt(kdev_t dev)
77 struct vfsmount *lptr;
79 if (vfsmntlist == (struct vfsmount *)NULL)
80 return ((struct vfsmount *)NULL);
82 if (mru_vfsmnt != (struct vfsmount *)NULL &&
83 mru_vfsmnt->mnt_dev == dev)
84 return (mru_vfsmnt);
86 for (lptr = vfsmntlist;
87 lptr != (struct vfsmount *)NULL;
88 lptr = lptr->mnt_next)
89 if (lptr->mnt_dev == dev) {
90 mru_vfsmnt = lptr;
91 return (lptr);
94 return ((struct vfsmount *)NULL);
95 /* NOTREACHED */
98 static struct vfsmount *add_vfsmnt(struct super_block *sb,
99 const char *dev_name, const char *dir_name)
101 struct vfsmount *lptr;
102 char *tmp, *name;
104 lptr = (struct vfsmount *)kmalloc(sizeof(struct vfsmount), GFP_KERNEL);
105 if (!lptr)
106 goto out;
107 memset(lptr, 0, sizeof(struct vfsmount));
109 lptr->mnt_sb = sb;
110 lptr->mnt_dev = sb->s_dev;
111 lptr->mnt_flags = sb->s_flags;
112 sema_init(&lptr->mnt_sem, 1);
114 /* N.B. Is it really OK to have a vfsmount without names? */
115 if (dev_name && !IS_ERR(tmp = getname(dev_name))) {
116 name = (char *) kmalloc(strlen(tmp)+1, GFP_KERNEL);
117 if (name) {
118 strcpy(name, tmp);
119 lptr->mnt_devname = name;
121 putname(tmp);
123 if (dir_name && !IS_ERR(tmp = getname(dir_name))) {
124 name = (char *) kmalloc(strlen(tmp)+1, GFP_KERNEL);
125 if (name) {
126 strcpy(name, tmp);
127 lptr->mnt_dirname = name;
129 putname(tmp);
132 if (vfsmntlist == (struct vfsmount *)NULL) {
133 vfsmntlist = vfsmnttail = lptr;
134 } else {
135 vfsmnttail->mnt_next = lptr;
136 vfsmnttail = lptr;
138 out:
139 return lptr;
142 static void remove_vfsmnt(kdev_t dev)
144 struct vfsmount *lptr, *tofree;
146 if (vfsmntlist == (struct vfsmount *)NULL)
147 return;
148 lptr = vfsmntlist;
149 if (lptr->mnt_dev == dev) {
150 tofree = lptr;
151 vfsmntlist = lptr->mnt_next;
152 if (vfsmnttail->mnt_dev == dev)
153 vfsmnttail = vfsmntlist;
154 } else {
155 while (lptr->mnt_next != (struct vfsmount *)NULL) {
156 if (lptr->mnt_next->mnt_dev == dev)
157 break;
158 lptr = lptr->mnt_next;
160 tofree = lptr->mnt_next;
161 if (tofree == (struct vfsmount *)NULL)
162 return;
163 lptr->mnt_next = lptr->mnt_next->mnt_next;
164 if (vfsmnttail->mnt_dev == dev)
165 vfsmnttail = lptr;
167 if (tofree == mru_vfsmnt)
168 mru_vfsmnt = NULL;
169 kfree(tofree->mnt_devname);
170 kfree(tofree->mnt_dirname);
171 kfree_s(tofree, sizeof(struct vfsmount));
174 int register_filesystem(struct file_system_type * fs)
176 struct file_system_type ** tmp;
178 if (!fs)
179 return -EINVAL;
180 if (fs->next)
181 return -EBUSY;
182 tmp = &file_systems;
183 while (*tmp) {
184 if (strcmp((*tmp)->name, fs->name) == 0)
185 return -EBUSY;
186 tmp = &(*tmp)->next;
188 *tmp = fs;
189 return 0;
192 #ifdef CONFIG_MODULES
193 int unregister_filesystem(struct file_system_type * fs)
195 struct file_system_type ** tmp;
197 tmp = &file_systems;
198 while (*tmp) {
199 if (fs == *tmp) {
200 *tmp = fs->next;
201 fs->next = NULL;
202 return 0;
204 tmp = &(*tmp)->next;
206 return -EINVAL;
208 #endif
210 static int fs_index(const char * __name)
212 struct file_system_type * tmp;
213 char * name;
214 int err, index;
216 name = getname(__name);
217 err = PTR_ERR(name);
218 if (IS_ERR(name))
219 return err;
221 index = 0;
222 for (tmp = file_systems ; tmp ; tmp = tmp->next) {
223 if (strcmp(tmp->name, name) == 0) {
224 putname(name);
225 return index;
227 index++;
229 putname(name);
230 return -EINVAL;
233 static int fs_name(unsigned int index, char * buf)
235 struct file_system_type * tmp;
236 int len;
238 tmp = file_systems;
239 while (tmp && index > 0) {
240 tmp = tmp->next;
241 index--;
243 if (!tmp)
244 return -EINVAL;
245 len = strlen(tmp->name) + 1;
246 return copy_to_user(buf, tmp->name, len) ? -EFAULT : 0;
249 static int fs_maxindex(void)
251 struct file_system_type * tmp;
252 int index;
254 index = 0;
255 for (tmp = file_systems ; tmp ; tmp = tmp->next)
256 index++;
257 return index;
261 * Whee.. Weird sysv syscall.
263 asmlinkage int sys_sysfs(int option, unsigned long arg1, unsigned long arg2)
265 int retval = -EINVAL;
267 lock_kernel();
268 switch (option) {
269 case 1:
270 retval = fs_index((const char *) arg1);
271 break;
273 case 2:
274 retval = fs_name(arg1, (char *) arg2);
275 break;
277 case 3:
278 retval = fs_maxindex();
279 break;
281 unlock_kernel();
282 return retval;
285 static struct proc_fs_info {
286 int flag;
287 char *str;
288 } fs_info[] = {
289 { MS_NOEXEC, ",noexec" },
290 { MS_NOSUID, ",nosuid" },
291 { MS_NODEV, ",nodev" },
292 { MS_SYNCHRONOUS, ",sync" },
293 { MS_MANDLOCK, ",mand" },
294 { MS_NOATIME, ",noatime" },
295 #ifdef MS_NOSUB /* Can't find this except in mount.c */
296 { MS_NOSUB, ",nosub" },
297 #endif
298 { 0, NULL }
301 static struct proc_nfs_info {
302 int flag;
303 char *str;
304 } nfs_info[] = {
305 { NFS_MOUNT_SOFT, ",soft" },
306 { NFS_MOUNT_INTR, ",intr" },
307 { NFS_MOUNT_POSIX, ",posix" },
308 { NFS_MOUNT_NOCTO, ",nocto" },
309 { NFS_MOUNT_NOAC, ",noac" },
310 { 0, NULL }
313 int get_filesystem_info( char *buf )
315 struct vfsmount *tmp = vfsmntlist;
316 struct proc_fs_info *fs_infop;
317 struct proc_nfs_info *nfs_infop;
318 struct nfs_server *nfss;
319 int len = 0;
321 while ( tmp && len < PAGE_SIZE - 160)
323 len += sprintf( buf + len, "%s %s %s %s",
324 tmp->mnt_devname, tmp->mnt_dirname, tmp->mnt_sb->s_type->name,
325 tmp->mnt_flags & MS_RDONLY ? "ro" : "rw" );
326 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
327 if (tmp->mnt_flags & fs_infop->flag) {
328 strcpy(buf + len, fs_infop->str);
329 len += strlen(fs_infop->str);
332 if (!strcmp("nfs", tmp->mnt_sb->s_type->name)) {
333 nfss = &tmp->mnt_sb->u.nfs_sb.s_server;
334 if (nfss->rsize != NFS_DEF_FILE_IO_BUFFER_SIZE) {
335 len += sprintf(buf+len, ",rsize=%d",
336 nfss->rsize);
338 if (nfss->wsize != NFS_DEF_FILE_IO_BUFFER_SIZE) {
339 len += sprintf(buf+len, ",wsize=%d",
340 nfss->wsize);
342 #if 0
343 if (nfss->timeo != 7*HZ/10) {
344 len += sprintf(buf+len, ",timeo=%d",
345 nfss->timeo*10/HZ);
347 if (nfss->retrans != 3) {
348 len += sprintf(buf+len, ",retrans=%d",
349 nfss->retrans);
351 #endif
352 if (nfss->acregmin != 3*HZ) {
353 len += sprintf(buf+len, ",acregmin=%d",
354 nfss->acregmin/HZ);
356 if (nfss->acregmax != 60*HZ) {
357 len += sprintf(buf+len, ",acregmax=%d",
358 nfss->acregmax/HZ);
360 if (nfss->acdirmin != 30*HZ) {
361 len += sprintf(buf+len, ",acdirmin=%d",
362 nfss->acdirmin/HZ);
364 if (nfss->acdirmax != 60*HZ) {
365 len += sprintf(buf+len, ",acdirmax=%d",
366 nfss->acdirmax/HZ);
368 for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
369 if (nfss->flags & nfs_infop->flag) {
370 strcpy(buf + len, nfs_infop->str);
371 len += strlen(nfs_infop->str);
374 len += sprintf(buf+len, ",addr=%s",
375 nfss->hostname);
377 len += sprintf( buf + len, " 0 0\n" );
378 tmp = tmp->mnt_next;
381 return len;
384 int get_filesystem_list(char * buf)
386 int len = 0;
387 struct file_system_type * tmp;
389 tmp = file_systems;
390 while (tmp && len < PAGE_SIZE - 80) {
391 len += sprintf(buf+len, "%s\t%s\n",
392 (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
393 tmp->name);
394 tmp = tmp->next;
396 return len;
399 struct file_system_type *get_fs_type(const char *name)
401 struct file_system_type * fs = file_systems;
403 if (!name)
404 return fs;
405 for (fs = file_systems; fs && strcmp(fs->name, name); fs = fs->next)
407 #ifdef CONFIG_KERNELD
408 if (!fs && (request_module(name) == 0)) {
409 for (fs = file_systems; fs && strcmp(fs->name, name); fs = fs->next)
412 #endif
414 return fs;
417 void __wait_on_super(struct super_block * sb)
419 struct wait_queue wait = { current, NULL };
421 add_wait_queue(&sb->s_wait, &wait);
422 repeat:
423 current->state = TASK_UNINTERRUPTIBLE;
424 if (sb->s_lock) {
425 schedule();
426 goto repeat;
428 remove_wait_queue(&sb->s_wait, &wait);
429 current->state = TASK_RUNNING;
433 * Note: check the dirty flag before waiting, so we don't
434 * hold up the sync while mounting a device. (The newly
435 * mounted device won't need syncing.)
437 void sync_supers(kdev_t dev)
439 struct super_block * sb;
441 for (sb = super_blocks + 0 ; sb < super_blocks + NR_SUPER ; sb++) {
442 if (!sb->s_dev)
443 continue;
444 if (dev && sb->s_dev != dev)
445 continue;
446 if (!sb->s_dirt)
447 continue;
448 /* N.B. Should lock the superblock while writing */
449 wait_on_super(sb);
450 if (!sb->s_dev || !sb->s_dirt)
451 continue;
452 if (dev && (dev != sb->s_dev))
453 continue;
454 if (sb->s_op && sb->s_op->write_super)
455 sb->s_op->write_super(sb);
459 struct super_block * get_super(kdev_t dev)
461 struct super_block * s;
463 if (!dev)
464 return NULL;
465 restart:
466 s = 0+super_blocks;
467 while (s < NR_SUPER+super_blocks)
468 if (s->s_dev == dev) {
469 wait_on_super(s);
470 if (s->s_dev == dev)
471 return s;
472 goto restart;
473 } else
474 s++;
475 return NULL;
478 asmlinkage int sys_ustat(dev_t dev, struct ustat * ubuf)
480 struct super_block *s;
481 struct ustat tmp;
482 struct statfs sbuf;
483 mm_segment_t old_fs;
484 int err = -EINVAL;
486 lock_kernel();
487 s = get_super(to_kdev_t(dev));
488 if (s == NULL)
489 goto out;
490 err = -ENOSYS;
491 if (!(s->s_op->statfs))
492 goto out;
494 old_fs = get_fs();
495 set_fs(get_ds());
496 s->s_op->statfs(s,&sbuf,sizeof(struct statfs));
497 set_fs(old_fs);
499 memset(&tmp,0,sizeof(struct ustat));
500 tmp.f_tfree = sbuf.f_bfree;
501 tmp.f_tinode = sbuf.f_ffree;
503 err = copy_to_user(ubuf,&tmp,sizeof(struct ustat)) ? -EFAULT : 0;
504 out:
505 unlock_kernel();
506 return err;
510 * Find a super_block with no device assigned.
512 static struct super_block *get_empty_super(void)
514 struct super_block *s = 0+super_blocks;
516 for (; s < NR_SUPER+super_blocks; s++) {
517 if (s->s_dev)
518 continue;
519 if (!s->s_lock)
520 return s;
521 printk("VFS: empty superblock %p locked!\n", s);
523 return NULL;
526 static struct super_block * read_super(kdev_t dev,const char *name,int flags,
527 void *data, int silent)
529 struct super_block * s;
530 struct file_system_type *type;
532 if (!dev)
533 goto out_null;
534 check_disk_change(dev);
535 s = get_super(dev);
536 if (s)
537 goto out;
539 type = get_fs_type(name);
540 if (!type) {
541 printk("VFS: on device %s: get_fs_type(%s) failed\n",
542 kdevname(dev), name);
543 goto out;
545 s = get_empty_super();
546 if (!s)
547 goto out;
548 s->s_dev = dev;
549 s->s_flags = flags;
550 s->s_dirt = 0;
551 /* N.B. Should lock superblock now ... */
552 if (!type->read_super(s, data, silent))
553 goto out_fail;
554 s->s_dev = dev; /* N.B. why do this again?? */
555 s->s_rd_only = 0;
556 s->s_type = type;
557 out:
558 return s;
560 /* N.B. s_dev should be cleared in type->read_super */
561 out_fail:
562 s->s_dev = 0;
563 out_null:
564 s = NULL;
565 goto out;
569 * Unnamed block devices are dummy devices used by virtual
570 * filesystems which don't use real block-devices. -- jrs
573 static unsigned int unnamed_dev_in_use[256/(8*sizeof(unsigned int))] = { 0, };
575 kdev_t get_unnamed_dev(void)
577 int i;
579 for (i = 1; i < 256; i++) {
580 if (!test_and_set_bit(i,unnamed_dev_in_use))
581 return MKDEV(UNNAMED_MAJOR, i);
583 return 0;
586 void put_unnamed_dev(kdev_t dev)
588 if (!dev || MAJOR(dev) != UNNAMED_MAJOR)
589 return;
590 if (test_and_clear_bit(MINOR(dev), unnamed_dev_in_use))
591 return;
592 printk("VFS: put_unnamed_dev: freeing unused device %s\n",
593 kdevname(dev));
596 static int d_umount(struct super_block * sb)
598 struct dentry * root = sb->s_root;
599 struct dentry * covered = root->d_covers;
601 if (root->d_count != 1)
602 return -EBUSY;
604 if (root->d_inode->i_state)
605 return -EBUSY;
607 sb->s_root = NULL;
609 if (covered != root) {
610 root->d_covers = root;
611 covered->d_mounts = covered;
612 dput(covered);
614 dput(root);
615 return 0;
618 static void d_mount(struct dentry *covered, struct dentry *dentry)
620 if (covered->d_mounts != covered) {
621 printk("VFS: mount - already mounted\n");
622 return;
624 covered->d_mounts = dentry;
625 dentry->d_covers = covered;
628 static int do_umount(kdev_t dev, int unmount_root)
630 struct super_block * sb;
631 int retval;
633 retval = -ENOENT;
634 sb = get_super(dev);
635 if (!sb || !sb->s_root)
636 goto out;
639 * Before checking whether the filesystem is still busy,
640 * make sure the kernel doesn't hold any quotafiles open
641 * on the device. If the umount fails, too bad -- there
642 * are no quotas running anymore. Just turn them on again.
644 quota_off(dev, -1);
647 * Shrink dcache, then fsync. This guarantees that if the
648 * filesystem is quiescent at this point, then (a) only the
649 * root entry should be in use and (b) that root entry is
650 * clean.
652 shrink_dcache_sb(sb);
653 fsync_dev(dev);
655 if (dev==ROOT_DEV && !unmount_root) {
657 * Special case for "unmounting" root ...
658 * we just try to remount it readonly.
660 retval = 0;
661 if (!(sb->s_flags & MS_RDONLY))
662 retval = do_remount_sb(sb, MS_RDONLY, 0);
663 return retval;
666 retval = d_umount(sb);
667 if (retval)
668 goto out;
670 /* Forget any inodes */
671 if (invalidate_inodes(sb)) {
672 printk("VFS: Busy inodes after unmount. "
673 "Self-destruct in 5 seconds. Bye-bye..\n");
676 if (sb->s_op) {
677 if (sb->s_op->write_super && sb->s_dirt)
678 sb->s_op->write_super(sb);
679 if (sb->s_op->put_super)
680 sb->s_op->put_super(sb);
682 remove_vfsmnt(dev);
683 out:
684 return retval;
687 static int umount_dev(kdev_t dev)
689 int retval;
690 struct inode * inode = get_empty_inode();
692 retval = -ENOMEM;
693 if (!inode)
694 goto out;
696 inode->i_rdev = dev;
697 retval = -ENXIO;
698 if (MAJOR(dev) >= MAX_BLKDEV)
699 goto out_iput;
701 fsync_dev(dev);
703 down(&mount_sem);
705 retval = do_umount(dev,0);
706 if (!retval) {
707 fsync_dev(dev);
708 if (dev != ROOT_DEV) {
709 blkdev_release(inode);
710 put_unnamed_dev(dev);
714 up(&mount_sem);
715 out_iput:
716 iput(inode);
717 out:
718 return retval;
722 * Now umount can handle mount points as well as block devices.
723 * This is important for filesystems which use unnamed block devices.
725 * There is a little kludge here with the dummy_inode. The current
726 * vfs release functions only use the r_dev field in the inode so
727 * we give them the info they need without using a real inode.
728 * If any other fields are ever needed by any block device release
729 * functions, they should be faked here. -- jrs
732 asmlinkage int sys_umount(char * name)
734 struct dentry * dentry;
735 int retval;
737 if (!suser())
738 return -EPERM;
740 lock_kernel();
741 dentry = namei(name);
742 retval = PTR_ERR(dentry);
743 if (!IS_ERR(dentry)) {
744 struct inode * inode = dentry->d_inode;
745 kdev_t dev = inode->i_rdev;
747 retval = 0;
748 if (S_ISBLK(inode->i_mode)) {
749 if (IS_NODEV(inode))
750 retval = -EACCES;
751 } else {
752 struct super_block *sb = inode->i_sb;
753 retval = -EINVAL;
754 if (sb && inode == sb->s_root->d_inode) {
755 dev = sb->s_dev;
756 retval = 0;
759 dput(dentry);
761 if (!retval)
762 retval = umount_dev(dev);
764 unlock_kernel();
765 return retval;
769 * Check whether we can mount the specified device.
771 int fs_may_mount(kdev_t dev)
773 struct super_block * sb = get_super(dev);
774 int busy;
776 busy = sb && sb->s_root &&
777 (sb->s_root->d_count != 1 || sb->s_root->d_covers != sb->s_root);
778 return !busy;
782 * do_mount() does the actual mounting after sys_mount has done the ugly
783 * parameter parsing. When enough time has gone by, and everything uses the
784 * new mount() parameters, sys_mount() can then be cleaned up.
786 * We cannot mount a filesystem if it has active, used, or dirty inodes.
787 * We also have to flush all inode-data for this device, as the new mount
788 * might need new info.
790 * [21-Mar-97] T.Schoebel-Theuer: Now this can be overridden when
791 * supplying a leading "!" before the dir_name, allowing "stacks" of
792 * mounted filesystems. The stacking will only influence any pathname lookups
793 * _after_ the mount, but open filedescriptors or working directories that
794 * are now covered remain valid. For example, when you overmount /home, any
795 * process with old cwd /home/joe will continue to use the old versions,
796 * as long as relative paths are used, but absolute paths like /home/joe/xxx
797 * will go to the new "top of stack" version. In general, crossing a
798 * mountpoint will always go to the top of stack element.
799 * Anyone using this new feature must know what he/she is doing.
802 int do_mount(kdev_t dev, const char * dev_name, const char * dir_name, const char * type, int flags, void * data)
804 struct dentry * dir_d;
805 struct super_block * sb;
806 struct vfsmount *vfsmnt;
807 int error;
809 down(&mount_sem);
810 error = -EACCES;
811 if (!(flags & MS_RDONLY) && dev && is_read_only(dev))
812 goto out;
813 /*flags |= MS_RDONLY;*/
815 dir_d = namei(dir_name);
816 error = PTR_ERR(dir_d);
817 if (IS_ERR(dir_d))
818 goto out;
820 error = -ENOTDIR;
821 if (!S_ISDIR(dir_d->d_inode->i_mode))
822 goto dput_and_out;
824 error = -EBUSY;
825 if (dir_d->d_covers != dir_d)
826 goto dput_and_out;
829 * Note: If the superblock already exists,
830 * read_super just does a get_super().
832 error = -EINVAL;
833 sb = read_super(dev, type, flags, data, 0);
834 if (!sb)
835 goto dput_and_out;
838 * We may have slept while reading the super block,
839 * so we check afterwards whether it's safe to mount.
841 error = -EBUSY;
842 if (!fs_may_mount(dev))
843 goto dput_and_out;
845 error = -ENOMEM;
846 vfsmnt = add_vfsmnt(sb, dev_name, dir_name);
847 if (!vfsmnt)
848 goto dput_and_out;
849 d_mount(dir_d, sb->s_root);
850 error = 0; /* we don't dput(dir_d) - see umount */
852 out:
853 up(&mount_sem);
854 return error;
856 dput_and_out:
857 dput(dir_d);
858 goto out;
863 * Alters the mount flags of a mounted file system. Only the mount point
864 * is used as a reference - file system type and the device are ignored.
865 * FS-specific mount options can't be altered by remounting.
868 static int do_remount_sb(struct super_block *sb, int flags, char *data)
870 int retval;
871 struct vfsmount *vfsmnt;
874 * Invalidate the inodes, as some mount options may be changed.
875 * N.B. If we are changing media, we should check the return
876 * from invalidate_inodes ... can't allow _any_ open files.
878 invalidate_inodes(sb);
880 if (!(flags & MS_RDONLY) && sb->s_dev && is_read_only(sb->s_dev))
881 return -EACCES;
882 /*flags |= MS_RDONLY;*/
883 /* If we are remounting RDONLY, make sure there are no rw files open */
884 if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY))
885 if (!fs_may_remount_ro(sb))
886 return -EBUSY;
887 if (sb->s_op && sb->s_op->remount_fs) {
888 retval = sb->s_op->remount_fs(sb, &flags, data);
889 if (retval)
890 return retval;
892 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
893 vfsmnt = lookup_vfsmnt(sb->s_dev);
894 if (vfsmnt)
895 vfsmnt->mnt_flags = sb->s_flags;
896 return 0;
899 static int do_remount(const char *dir,int flags,char *data)
901 struct dentry *dentry;
902 int retval;
904 dentry = namei(dir);
905 retval = PTR_ERR(dentry);
906 if (!IS_ERR(dentry)) {
907 struct super_block * sb = dentry->d_inode->i_sb;
909 retval = -EINVAL;
910 if (dentry == sb->s_root) {
912 * Shrink the dcache and sync the device.
914 shrink_dcache_sb(sb);
915 fsync_dev(sb->s_dev);
916 retval = do_remount_sb(sb, flags, data);
918 dput(dentry);
920 return retval;
923 static int copy_mount_options (const void * data, unsigned long *where)
925 int i;
926 unsigned long page;
927 struct vm_area_struct * vma;
929 *where = 0;
930 if (!data)
931 return 0;
933 vma = find_vma(current->mm, (unsigned long) data);
934 if (!vma || (unsigned long) data < vma->vm_start)
935 return -EFAULT;
936 if (!(vma->vm_flags & VM_READ))
937 return -EFAULT;
938 i = vma->vm_end - (unsigned long) data;
939 if (PAGE_SIZE <= (unsigned long) i)
940 i = PAGE_SIZE-1;
941 if (!(page = __get_free_page(GFP_KERNEL))) {
942 return -ENOMEM;
944 if (copy_from_user((void *) page,data,i)) {
945 free_page(page);
946 return -EFAULT;
948 *where = page;
949 return 0;
953 * Flags is a 16-bit value that allows up to 16 non-fs dependent flags to
954 * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
956 * data is a (void *) that can point to any structure up to
957 * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
958 * information (or be NULL).
960 * NOTE! As old versions of mount() didn't use this setup, the flags
961 * have to have a special 16-bit magic number in the high word:
962 * 0xC0ED. If this magic word isn't present, the flags and data info
963 * aren't used, as the syscall assumes we are talking to an older
964 * version that didn't understand them.
966 asmlinkage int sys_mount(char * dev_name, char * dir_name, char * type,
967 unsigned long new_flags, void * data)
969 struct file_system_type * fstype;
970 struct dentry * dentry = NULL;
971 struct inode * inode = NULL;
972 kdev_t dev;
973 int retval = -EPERM;
974 unsigned long flags = 0;
975 unsigned long page = 0;
976 struct file dummy; /* allows read-write or read-only flag */
978 lock_kernel();
979 if (!suser())
980 goto out;
981 if ((new_flags &
982 (MS_MGC_MSK | MS_REMOUNT)) == (MS_MGC_VAL | MS_REMOUNT)) {
983 retval = copy_mount_options (data, &page);
984 if (retval < 0)
985 goto out;
986 retval = do_remount(dir_name,
987 new_flags & ~MS_MGC_MSK & ~MS_REMOUNT,
988 (char *) page);
989 free_page(page);
990 goto out;
993 retval = copy_mount_options (type, &page);
994 if (retval < 0)
995 goto out;
996 fstype = get_fs_type((char *) page);
997 free_page(page);
998 retval = -ENODEV;
999 if (!fstype)
1000 goto out;
1002 memset(&dummy, 0, sizeof(dummy));
1003 if (fstype->fs_flags & FS_REQUIRES_DEV) {
1004 dentry = namei(dev_name);
1005 retval = PTR_ERR(dentry);
1006 if (IS_ERR(dentry))
1007 goto out;
1009 inode = dentry->d_inode;
1010 retval = -ENOTBLK;
1011 if (!S_ISBLK(inode->i_mode))
1012 goto dput_and_out;
1014 retval = -EACCES;
1015 if (IS_NODEV(inode))
1016 goto dput_and_out;
1018 dev = inode->i_rdev;
1019 retval = -ENXIO;
1020 if (MAJOR(dev) >= MAX_BLKDEV)
1021 goto dput_and_out;
1023 retval = -ENOTBLK;
1024 dummy.f_op = get_blkfops(MAJOR(dev));
1025 if (!dummy.f_op)
1026 goto dput_and_out;
1028 if (dummy.f_op->open) {
1029 dummy.f_dentry = dentry;
1030 dummy.f_mode = (new_flags & MS_RDONLY) ? 1 : 3;
1031 retval = dummy.f_op->open(inode, &dummy);
1032 if (retval)
1033 goto dput_and_out;
1036 } else {
1037 retval = -EMFILE;
1038 if (!(dev = get_unnamed_dev()))
1039 goto out;
1042 page = 0;
1043 if ((new_flags & MS_MGC_MSK) == MS_MGC_VAL) {
1044 flags = new_flags & ~MS_MGC_MSK;
1045 retval = copy_mount_options(data, &page);
1046 if (retval < 0)
1047 goto clean_up;
1049 retval = do_mount(dev, dev_name, dir_name, fstype->name, flags,
1050 (void *) page);
1051 free_page(page);
1052 if (retval)
1053 goto clean_up;
1055 dput_and_out:
1056 dput(dentry);
1057 out:
1058 unlock_kernel();
1059 return retval;
1061 clean_up:
1062 if (dummy.f_op) {
1063 if (dummy.f_op->release)
1064 dummy.f_op->release(inode, NULL);
1065 } else
1066 put_unnamed_dev(dev);
1067 goto dput_and_out;
1070 __initfunc(static void do_mount_root(void))
1072 struct file_system_type * fs_type;
1073 struct super_block * sb;
1074 struct vfsmount *vfsmnt;
1075 struct inode * d_inode = NULL;
1076 struct file filp;
1077 int retval;
1079 #ifdef CONFIG_ROOT_NFS
1080 if (MAJOR(ROOT_DEV) == UNNAMED_MAJOR) {
1081 ROOT_DEV = 0;
1082 if ((fs_type = get_fs_type("nfs"))) {
1083 sb = get_empty_super(); /* "can't fail" */
1084 sb->s_dev = get_unnamed_dev();
1085 sb->s_flags = root_mountflags & ~MS_RDONLY;
1086 vfsmnt = add_vfsmnt(sb, "/dev/root", "/");
1087 if (vfsmnt) {
1088 if (nfs_root_mount(sb) >= 0) {
1089 sb->s_rd_only = 0;
1090 sb->s_dirt = 0;
1091 sb->s_type = fs_type;
1092 current->fs->root = dget(sb->s_root);
1093 current->fs->pwd = dget(sb->s_root);
1094 ROOT_DEV = sb->s_dev;
1095 printk (KERN_NOTICE "VFS: Mounted root (nfs filesystem).\n");
1096 return;
1098 remove_vfsmnt(sb->s_dev);
1100 put_unnamed_dev(sb->s_dev);
1101 sb->s_dev = 0;
1103 if (!ROOT_DEV) {
1104 printk(KERN_ERR "VFS: Unable to mount root fs via NFS, trying floppy.\n");
1105 ROOT_DEV = MKDEV(FLOPPY_MAJOR, 0);
1108 #endif
1110 #ifdef CONFIG_BLK_DEV_FD
1111 if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) {
1112 floppy_eject();
1113 #ifndef CONFIG_BLK_DEV_RAM
1114 printk(KERN_NOTICE "(Warning, this kernel has no ramdisk support)\n");
1115 #endif
1116 printk(KERN_NOTICE "VFS: Insert root floppy and press ENTER\n");
1117 wait_for_keypress();
1119 #endif
1121 memset(&filp, 0, sizeof(filp));
1122 d_inode = get_empty_inode();
1123 d_inode->i_rdev = ROOT_DEV;
1124 filp.f_dentry = NULL;
1125 if ( root_mountflags & MS_RDONLY)
1126 filp.f_mode = 1; /* read only */
1127 else
1128 filp.f_mode = 3; /* read write */
1129 retval = blkdev_open(d_inode, &filp);
1130 if (retval == -EROFS) {
1131 root_mountflags |= MS_RDONLY;
1132 filp.f_mode = 1;
1133 retval = blkdev_open(d_inode, &filp);
1135 iput(d_inode);
1136 if (retval)
1138 * Allow the user to distinguish between failed open
1139 * and bad superblock on root device.
1141 printk("VFS: Cannot open root device %s\n",
1142 kdevname(ROOT_DEV));
1143 else for (fs_type = file_systems ; fs_type ; fs_type = fs_type->next) {
1144 if (!(fs_type->fs_flags & FS_REQUIRES_DEV))
1145 continue;
1146 sb = read_super(ROOT_DEV,fs_type->name,root_mountflags,NULL,1);
1147 if (sb) {
1148 sb->s_flags = root_mountflags;
1149 current->fs->root = dget(sb->s_root);
1150 current->fs->pwd = dget(sb->s_root);
1151 printk ("VFS: Mounted root (%s filesystem)%s.\n",
1152 fs_type->name,
1153 (sb->s_flags & MS_RDONLY) ? " readonly" : "");
1154 vfsmnt = add_vfsmnt(sb, "/dev/root", "/");
1155 if (vfsmnt)
1156 return;
1157 panic("VFS: add_vfsmnt failed for root fs");
1160 panic("VFS: Unable to mount root fs on %s",
1161 kdevname(ROOT_DEV));
1165 __initfunc(void mount_root(void))
1167 struct super_block * sb = super_blocks;
1168 int i;
1170 memset(super_blocks, 0, sizeof(super_blocks));
1172 * Initialize the dirty inode list headers for the super blocks
1174 for (i = NR_SUPER ; i-- ; sb++)
1175 INIT_LIST_HEAD(&sb->s_dirty);
1177 do_mount_root();
1181 #ifdef CONFIG_BLK_DEV_INITRD
1183 extern int initmem_freed;
1185 __initfunc(static int do_change_root(kdev_t new_root_dev,const char *put_old))
1187 kdev_t old_root_dev;
1188 struct vfsmount *vfsmnt;
1189 struct dentry *old_root,*old_pwd,*dir_d = NULL;
1190 int error;
1192 old_root = current->fs->root;
1193 old_pwd = current->fs->pwd;
1194 old_root_dev = ROOT_DEV;
1195 if (!fs_may_mount(new_root_dev)) {
1196 printk(KERN_CRIT "New root is busy. Staying in initrd.\n");
1197 return -EBUSY;
1199 ROOT_DEV = new_root_dev;
1200 do_mount_root();
1201 dput(old_root);
1202 dput(old_pwd);
1203 #if 1
1204 shrink_dcache();
1205 printk("do_change_root: old root has d_count=%d\n", old_root->d_count);
1206 #endif
1208 * Get the new mount directory
1210 dir_d = lookup_dentry(put_old, NULL, 1);
1211 if (IS_ERR(dir_d)) {
1212 error = PTR_ERR(dir_d);
1213 } else if (!dir_d->d_inode) {
1214 dput(dir_d);
1215 error = -ENOENT;
1216 } else {
1217 error = 0;
1219 if (!error && dir_d->d_covers != dir_d) {
1220 dput(dir_d);
1221 error = -EBUSY;
1223 if (!error && !S_ISDIR(dir_d->d_inode->i_mode)) {
1224 dput(dir_d);
1225 error = -ENOTDIR;
1227 if (error) {
1228 int umount_error;
1230 printk(KERN_NOTICE "Trying to unmount old root ... ");
1231 umount_error = do_umount(old_root_dev,1);
1232 if (!umount_error) {
1233 printk("okay\n");
1234 invalidate_buffers(old_root_dev);
1235 return 0;
1237 printk(KERN_ERR "error %d\n",umount_error);
1238 return error;
1240 remove_vfsmnt(old_root_dev);
1241 vfsmnt = add_vfsmnt(old_root->d_sb, "/dev/root.old", put_old);
1242 if (vfsmnt) {
1243 d_mount(dir_d,old_root);
1244 return 0;
1246 printk(KERN_CRIT "Trouble: add_vfsmnt failed\n");
1247 return -ENOMEM;
1250 int change_root(kdev_t new_root_dev,const char *put_old)
1252 if (initmem_freed) {
1253 printk (KERN_CRIT "Initmem has been already freed. Staying in initrd\n");
1254 return -EBUSY;
1256 return do_change_root(new_root_dev, put_old);
1259 #endif