Import 2.2.8pre2
[davej-history.git] / fs / super.c
blob08475c01706880a3a9e35e14102545ba0b5a8823
1 /*
2 * linux/fs/super.c
4 * Copyright (C) 1991, 1992 Linus Torvalds
6 * super.c contains code to handle: - mount structures
7 * - super-block tables.
8 * - mount system call
9 * - umount system call
11 * Added options to /proc/mounts
12 * Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996.
14 * GK 2/5/95 - Changed to support mounting the root fs via NFS
16 * Added kerneld support: Jacques Gelinas and Bjorn Ekwall
17 * Added change_root: Werner Almesberger & Hans Lermen, Feb '96
20 #include <linux/config.h>
21 #include <linux/malloc.h>
22 #include <linux/locks.h>
23 #include <linux/smp_lock.h>
24 #include <linux/fd.h>
25 #include <linux/init.h>
26 #include <linux/quotaops.h>
27 #include <linux/acct.h>
29 #include <asm/uaccess.h>
31 #include <linux/nfs_fs.h>
32 #include <linux/nfs_fs_sb.h>
33 #include <linux/nfs_mount.h>
35 #ifdef CONFIG_KMOD
36 #include <linux/kmod.h>
37 #endif
40 * We use a semaphore to synchronize all mount/umount
41 * activity - imagine the mess if we have a race between
42 * unmounting a filesystem and re-mounting it (or something
43 * else).
45 static struct semaphore mount_sem = MUTEX;
47 extern void wait_for_keypress(void);
48 extern struct file_operations * get_blkfops(unsigned int major);
50 extern int root_mountflags;
52 static int do_remount_sb(struct super_block *sb, int flags, char * data);
54 /* this is initialized in init/main.c */
55 kdev_t ROOT_DEV;
57 int nr_super_blocks = 0;
58 int max_super_blocks = NR_SUPER;
59 LIST_HEAD(super_blocks);
61 static struct file_system_type *file_systems = (struct file_system_type *) NULL;
62 struct vfsmount *vfsmntlist = (struct vfsmount *) NULL;
63 static struct vfsmount *vfsmnttail = (struct vfsmount *) NULL,
64 *mru_vfsmnt = (struct vfsmount *) NULL;
66 /*
67 * This part handles the management of the list of mounted filesystems.
69 struct vfsmount *lookup_vfsmnt(kdev_t dev)
71 struct vfsmount *lptr;
73 if (vfsmntlist == (struct vfsmount *)NULL)
74 return ((struct vfsmount *)NULL);
76 if (mru_vfsmnt != (struct vfsmount *)NULL &&
77 mru_vfsmnt->mnt_dev == dev)
78 return (mru_vfsmnt);
80 for (lptr = vfsmntlist;
81 lptr != (struct vfsmount *)NULL;
82 lptr = lptr->mnt_next)
83 if (lptr->mnt_dev == dev) {
84 mru_vfsmnt = lptr;
85 return (lptr);
88 return ((struct vfsmount *)NULL);
89 /* NOTREACHED */
92 static struct vfsmount *add_vfsmnt(struct super_block *sb,
93 const char *dev_name, const char *dir_name)
95 struct vfsmount *lptr;
96 char *tmp, *name;
98 lptr = (struct vfsmount *)kmalloc(sizeof(struct vfsmount), GFP_KERNEL);
99 if (!lptr)
100 goto out;
101 memset(lptr, 0, sizeof(struct vfsmount));
103 lptr->mnt_sb = sb;
104 lptr->mnt_dev = sb->s_dev;
105 lptr->mnt_flags = sb->s_flags;
107 sema_init(&lptr->mnt_dquot.semaphore, 1);
108 lptr->mnt_dquot.flags = 0;
110 /* N.B. Is it really OK to have a vfsmount without names? */
111 if (dev_name && !IS_ERR(tmp = getname(dev_name))) {
112 name = (char *) kmalloc(strlen(tmp)+1, GFP_KERNEL);
113 if (name) {
114 strcpy(name, tmp);
115 lptr->mnt_devname = name;
117 putname(tmp);
119 if (dir_name && !IS_ERR(tmp = getname(dir_name))) {
120 name = (char *) kmalloc(strlen(tmp)+1, GFP_KERNEL);
121 if (name) {
122 strcpy(name, tmp);
123 lptr->mnt_dirname = name;
125 putname(tmp);
128 if (vfsmntlist == (struct vfsmount *)NULL) {
129 vfsmntlist = vfsmnttail = lptr;
130 } else {
131 vfsmnttail->mnt_next = lptr;
132 vfsmnttail = lptr;
134 out:
135 return lptr;
138 static void remove_vfsmnt(kdev_t dev)
140 struct vfsmount *lptr, *tofree;
142 if (vfsmntlist == (struct vfsmount *)NULL)
143 return;
144 lptr = vfsmntlist;
145 if (lptr->mnt_dev == dev) {
146 tofree = lptr;
147 vfsmntlist = lptr->mnt_next;
148 if (vfsmnttail->mnt_dev == dev)
149 vfsmnttail = vfsmntlist;
150 } else {
151 while (lptr->mnt_next != (struct vfsmount *)NULL) {
152 if (lptr->mnt_next->mnt_dev == dev)
153 break;
154 lptr = lptr->mnt_next;
156 tofree = lptr->mnt_next;
157 if (tofree == (struct vfsmount *)NULL)
158 return;
159 lptr->mnt_next = lptr->mnt_next->mnt_next;
160 if (vfsmnttail->mnt_dev == dev)
161 vfsmnttail = lptr;
163 if (tofree == mru_vfsmnt)
164 mru_vfsmnt = NULL;
165 kfree(tofree->mnt_devname);
166 kfree(tofree->mnt_dirname);
167 kfree_s(tofree, sizeof(struct vfsmount));
170 int register_filesystem(struct file_system_type * fs)
172 struct file_system_type ** tmp;
174 if (!fs)
175 return -EINVAL;
176 if (fs->next)
177 return -EBUSY;
178 tmp = &file_systems;
179 while (*tmp) {
180 if (strcmp((*tmp)->name, fs->name) == 0)
181 return -EBUSY;
182 tmp = &(*tmp)->next;
184 *tmp = fs;
185 return 0;
188 #ifdef CONFIG_MODULES
189 int unregister_filesystem(struct file_system_type * fs)
191 struct file_system_type ** tmp;
193 tmp = &file_systems;
194 while (*tmp) {
195 if (fs == *tmp) {
196 *tmp = fs->next;
197 fs->next = NULL;
198 return 0;
200 tmp = &(*tmp)->next;
202 return -EINVAL;
204 #endif
206 static int fs_index(const char * __name)
208 struct file_system_type * tmp;
209 char * name;
210 int err, index;
212 name = getname(__name);
213 err = PTR_ERR(name);
214 if (IS_ERR(name))
215 return err;
217 index = 0;
218 for (tmp = file_systems ; tmp ; tmp = tmp->next) {
219 if (strcmp(tmp->name, name) == 0) {
220 putname(name);
221 return index;
223 index++;
225 putname(name);
226 return -EINVAL;
229 static int fs_name(unsigned int index, char * buf)
231 struct file_system_type * tmp;
232 int len;
234 tmp = file_systems;
235 while (tmp && index > 0) {
236 tmp = tmp->next;
237 index--;
239 if (!tmp)
240 return -EINVAL;
241 len = strlen(tmp->name) + 1;
242 return copy_to_user(buf, tmp->name, len) ? -EFAULT : 0;
245 static int fs_maxindex(void)
247 struct file_system_type * tmp;
248 int index;
250 index = 0;
251 for (tmp = file_systems ; tmp ; tmp = tmp->next)
252 index++;
253 return index;
257 * Whee.. Weird sysv syscall.
259 asmlinkage int sys_sysfs(int option, unsigned long arg1, unsigned long arg2)
261 int retval = -EINVAL;
263 lock_kernel();
264 switch (option) {
265 case 1:
266 retval = fs_index((const char *) arg1);
267 break;
269 case 2:
270 retval = fs_name(arg1, (char *) arg2);
271 break;
273 case 3:
274 retval = fs_maxindex();
275 break;
277 unlock_kernel();
278 return retval;
281 static struct proc_fs_info {
282 int flag;
283 char *str;
284 } fs_info[] = {
285 { MS_NOEXEC, ",noexec" },
286 { MS_NOSUID, ",nosuid" },
287 { MS_NODEV, ",nodev" },
288 { MS_SYNCHRONOUS, ",sync" },
289 { MS_MANDLOCK, ",mand" },
290 { MS_NOATIME, ",noatime" },
291 { MS_NODIRATIME, ",nodiratime" },
292 #ifdef MS_NOSUB /* Can't find this except in mount.c */
293 { MS_NOSUB, ",nosub" },
294 #endif
295 { 0, NULL }
298 static struct proc_nfs_info {
299 int flag;
300 char *str;
301 } nfs_info[] = {
302 { NFS_MOUNT_SOFT, ",soft" },
303 { NFS_MOUNT_INTR, ",intr" },
304 { NFS_MOUNT_POSIX, ",posix" },
305 { NFS_MOUNT_NOCTO, ",nocto" },
306 { NFS_MOUNT_NOAC, ",noac" },
307 { 0, NULL }
310 int get_filesystem_info( char *buf )
312 struct vfsmount *tmp = vfsmntlist;
313 struct proc_fs_info *fs_infop;
314 struct proc_nfs_info *nfs_infop;
315 struct nfs_server *nfss;
316 int len = 0;
318 while ( tmp && len < PAGE_SIZE - 160)
320 len += sprintf( buf + len, "%s %s %s %s",
321 tmp->mnt_devname, tmp->mnt_dirname, tmp->mnt_sb->s_type->name,
322 tmp->mnt_flags & MS_RDONLY ? "ro" : "rw" );
323 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
324 if (tmp->mnt_flags & fs_infop->flag) {
325 strcpy(buf + len, fs_infop->str);
326 len += strlen(fs_infop->str);
329 if (!strcmp("nfs", tmp->mnt_sb->s_type->name)) {
330 nfss = &tmp->mnt_sb->u.nfs_sb.s_server;
331 if (nfss->rsize != NFS_DEF_FILE_IO_BUFFER_SIZE) {
332 len += sprintf(buf+len, ",rsize=%d",
333 nfss->rsize);
335 if (nfss->wsize != NFS_DEF_FILE_IO_BUFFER_SIZE) {
336 len += sprintf(buf+len, ",wsize=%d",
337 nfss->wsize);
339 #if 0
340 if (nfss->timeo != 7*HZ/10) {
341 len += sprintf(buf+len, ",timeo=%d",
342 nfss->timeo*10/HZ);
344 if (nfss->retrans != 3) {
345 len += sprintf(buf+len, ",retrans=%d",
346 nfss->retrans);
348 #endif
349 if (nfss->acregmin != 3*HZ) {
350 len += sprintf(buf+len, ",acregmin=%d",
351 nfss->acregmin/HZ);
353 if (nfss->acregmax != 60*HZ) {
354 len += sprintf(buf+len, ",acregmax=%d",
355 nfss->acregmax/HZ);
357 if (nfss->acdirmin != 30*HZ) {
358 len += sprintf(buf+len, ",acdirmin=%d",
359 nfss->acdirmin/HZ);
361 if (nfss->acdirmax != 60*HZ) {
362 len += sprintf(buf+len, ",acdirmax=%d",
363 nfss->acdirmax/HZ);
365 for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
366 if (nfss->flags & nfs_infop->flag) {
367 strcpy(buf + len, nfs_infop->str);
368 len += strlen(nfs_infop->str);
371 len += sprintf(buf+len, ",addr=%s",
372 nfss->hostname);
374 len += sprintf( buf + len, " 0 0\n" );
375 tmp = tmp->mnt_next;
378 return len;
381 int get_filesystem_list(char * buf)
383 int len = 0;
384 struct file_system_type * tmp;
386 tmp = file_systems;
387 while (tmp && len < PAGE_SIZE - 80) {
388 len += sprintf(buf+len, "%s\t%s\n",
389 (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
390 tmp->name);
391 tmp = tmp->next;
393 return len;
396 struct file_system_type *get_fs_type(const char *name)
398 struct file_system_type * fs = file_systems;
400 if (!name)
401 return fs;
402 for (fs = file_systems; fs && strcmp(fs->name, name); fs = fs->next)
404 #ifdef CONFIG_KMOD
405 if (!fs && (request_module(name) == 0)) {
406 for (fs = file_systems; fs && strcmp(fs->name, name); fs = fs->next)
409 #endif
411 return fs;
414 void __wait_on_super(struct super_block * sb)
416 struct wait_queue wait = { current, NULL };
418 add_wait_queue(&sb->s_wait, &wait);
419 repeat:
420 current->state = TASK_UNINTERRUPTIBLE;
421 if (sb->s_lock) {
422 schedule();
423 goto repeat;
425 remove_wait_queue(&sb->s_wait, &wait);
426 current->state = TASK_RUNNING;
430 * Note: check the dirty flag before waiting, so we don't
431 * hold up the sync while mounting a device. (The newly
432 * mounted device won't need syncing.)
434 void sync_supers(kdev_t dev)
436 struct super_block * sb;
438 for (sb = sb_entry(super_blocks.next);
439 sb != sb_entry(&super_blocks);
440 sb = sb_entry(sb->s_list.next)) {
441 if (!sb->s_dev)
442 continue;
443 if (dev && sb->s_dev != dev)
444 continue;
445 if (!sb->s_dirt)
446 continue;
447 /* N.B. Should lock the superblock while writing */
448 wait_on_super(sb);
449 if (!sb->s_dev || !sb->s_dirt)
450 continue;
451 if (dev && (dev != sb->s_dev))
452 continue;
453 if (sb->s_op && sb->s_op->write_super)
454 sb->s_op->write_super(sb);
458 struct super_block * get_super(kdev_t dev)
460 struct super_block * s;
462 if (!dev)
463 return NULL;
464 restart:
465 s = sb_entry(super_blocks.next);
466 while (s != sb_entry(&super_blocks))
467 if (s->s_dev == dev) {
468 wait_on_super(s);
469 if (s->s_dev == dev)
470 return s;
471 goto restart;
472 } else
473 s = sb_entry(s->s_list.next);
474 return NULL;
477 asmlinkage int sys_ustat(dev_t dev, struct ustat * ubuf)
479 struct super_block *s;
480 struct ustat tmp;
481 struct statfs sbuf;
482 mm_segment_t old_fs;
483 int err = -EINVAL;
485 lock_kernel();
486 s = get_super(to_kdev_t(dev));
487 if (s == NULL)
488 goto out;
489 err = -ENOSYS;
490 if (!(s->s_op->statfs))
491 goto out;
493 old_fs = get_fs();
494 set_fs(get_ds());
495 s->s_op->statfs(s,&sbuf,sizeof(struct statfs));
496 set_fs(old_fs);
498 memset(&tmp,0,sizeof(struct ustat));
499 tmp.f_tfree = sbuf.f_bfree;
500 tmp.f_tinode = sbuf.f_ffree;
502 err = copy_to_user(ubuf,&tmp,sizeof(struct ustat)) ? -EFAULT : 0;
503 out:
504 unlock_kernel();
505 return err;
509 * Find a super_block with no device assigned.
511 static struct super_block *get_empty_super(void)
513 struct super_block *s;
515 for (s = sb_entry(super_blocks.next);
516 s != sb_entry(&super_blocks);
517 s = sb_entry(s->s_list.next)) {
518 if (s->s_dev)
519 continue;
520 if (!s->s_lock)
521 return s;
522 printk("VFS: empty superblock %p locked!\n", s);
524 /* Need a new one... */
525 if (nr_super_blocks >= max_super_blocks)
526 return NULL;
527 s = kmalloc(sizeof(struct super_block), GFP_USER);
528 if (s) {
529 nr_super_blocks++;
530 memset(s, 0, sizeof(struct super_block));
531 INIT_LIST_HEAD(&s->s_dirty);
532 list_add (&s->s_list, super_blocks.prev);
534 return s;
537 static struct super_block * read_super(kdev_t dev,const char *name,int flags,
538 void *data, int silent)
540 struct super_block * s;
541 struct file_system_type *type;
543 if (!dev)
544 goto out_null;
545 check_disk_change(dev);
546 s = get_super(dev);
547 if (s)
548 goto out;
550 type = get_fs_type(name);
551 if (!type) {
552 printk("VFS: on device %s: get_fs_type(%s) failed\n",
553 kdevname(dev), name);
554 goto out;
556 s = get_empty_super();
557 if (!s)
558 goto out;
559 s->s_dev = dev;
560 s->s_flags = flags;
561 s->s_dirt = 0;
562 sema_init(&s->s_vfs_rename_sem,1);
563 /* N.B. Should lock superblock now ... */
564 if (!type->read_super(s, data, silent))
565 goto out_fail;
566 s->s_dev = dev; /* N.B. why do this again?? */
567 s->s_rd_only = 0;
568 s->s_type = type;
569 out:
570 return s;
572 /* N.B. s_dev should be cleared in type->read_super */
573 out_fail:
574 s->s_dev = 0;
575 out_null:
576 s = NULL;
577 goto out;
581 * Unnamed block devices are dummy devices used by virtual
582 * filesystems which don't use real block-devices. -- jrs
585 static unsigned int unnamed_dev_in_use[256/(8*sizeof(unsigned int))] = { 0, };
587 kdev_t get_unnamed_dev(void)
589 int i;
591 for (i = 1; i < 256; i++) {
592 if (!test_and_set_bit(i,unnamed_dev_in_use))
593 return MKDEV(UNNAMED_MAJOR, i);
595 return 0;
598 void put_unnamed_dev(kdev_t dev)
600 if (!dev || MAJOR(dev) != UNNAMED_MAJOR)
601 return;
602 if (test_and_clear_bit(MINOR(dev), unnamed_dev_in_use))
603 return;
604 printk("VFS: put_unnamed_dev: freeing unused device %s\n",
605 kdevname(dev));
608 static int d_umount(struct super_block * sb)
610 struct dentry * root = sb->s_root;
611 struct dentry * covered = root->d_covers;
613 if (root->d_count != 1)
614 return -EBUSY;
616 if (root->d_inode->i_state)
617 return -EBUSY;
619 sb->s_root = NULL;
621 if (covered != root) {
622 root->d_covers = root;
623 covered->d_mounts = covered;
624 dput(covered);
626 dput(root);
627 return 0;
630 static void d_mount(struct dentry *covered, struct dentry *dentry)
632 if (covered->d_mounts != covered) {
633 printk("VFS: mount - already mounted\n");
634 return;
636 covered->d_mounts = dentry;
637 dentry->d_covers = covered;
640 static int do_umount(kdev_t dev, int unmount_root, int flags)
642 struct super_block * sb;
643 int retval;
645 retval = -ENOENT;
646 sb = get_super(dev);
647 if (!sb || !sb->s_root)
648 goto out;
651 * Before checking whether the filesystem is still busy,
652 * make sure the kernel doesn't hold any quota files open
653 * on the device. If the umount fails, too bad -- there
654 * are no quotas running any more. Just turn them on again.
656 DQUOT_OFF(dev);
657 acct_auto_close(dev);
660 * If we may have to abort operations to get out of this
661 * mount, and they will themselves hold resources we must
662 * allow the fs to do things. In the Unix tradition of
663 * 'Gee thats tricky lets do it in userspace' the umount_begin
664 * might fail to complete on the first run through as other tasks
665 * must return, and the like. Thats for the mount program to worry
666 * about for the moment.
669 if( (flags&MNT_FORCE) && sb->s_op->umount_begin)
670 sb->s_op->umount_begin(sb);
673 * Shrink dcache, then fsync. This guarantees that if the
674 * filesystem is quiescent at this point, then (a) only the
675 * root entry should be in use and (b) that root entry is
676 * clean.
678 shrink_dcache_sb(sb);
679 fsync_dev(dev);
681 if (dev==ROOT_DEV && !unmount_root) {
683 * Special case for "unmounting" root ...
684 * we just try to remount it readonly.
686 retval = 0;
687 if (!(sb->s_flags & MS_RDONLY))
688 retval = do_remount_sb(sb, MS_RDONLY, 0);
689 return retval;
692 retval = d_umount(sb);
693 if (retval)
694 goto out;
696 if (sb->s_op) {
697 if (sb->s_op->write_super && sb->s_dirt)
698 sb->s_op->write_super(sb);
701 lock_super(sb);
702 if (sb->s_op) {
703 if (sb->s_op->put_super)
704 sb->s_op->put_super(sb);
707 /* Forget any remaining inodes */
708 if (invalidate_inodes(sb)) {
709 printk("VFS: Busy inodes after unmount. "
710 "Self-destruct in 5 seconds. Have a nice day...\n");
713 sb->s_dev = 0; /* Free the superblock */
714 unlock_super(sb);
716 remove_vfsmnt(dev);
717 out:
718 return retval;
721 static int umount_dev(kdev_t dev, int flags)
723 int retval;
724 struct inode * inode = get_empty_inode();
726 retval = -ENOMEM;
727 if (!inode)
728 goto out;
730 inode->i_rdev = dev;
731 retval = -ENXIO;
732 if (MAJOR(dev) >= MAX_BLKDEV)
733 goto out_iput;
735 fsync_dev(dev);
737 down(&mount_sem);
739 retval = do_umount(dev, 0, flags);
740 if (!retval) {
741 fsync_dev(dev);
742 if (dev != ROOT_DEV) {
743 blkdev_release(inode);
744 put_unnamed_dev(dev);
748 up(&mount_sem);
749 out_iput:
750 iput(inode);
751 out:
752 return retval;
756 * Now umount can handle mount points as well as block devices.
757 * This is important for filesystems which use unnamed block devices.
759 * There is a little kludge here with the dummy_inode. The current
760 * vfs release functions only use the r_dev field in the inode so
761 * we give them the info they need without using a real inode.
762 * If any other fields are ever needed by any block device release
763 * functions, they should be faked here. -- jrs
765 * We now support a flag for forced unmount like the other 'big iron'
766 * unixes. Our API is identical to OSF/1 to avoid making a mess of AMD
769 asmlinkage int sys_umount(char * name, int flags)
771 struct dentry * dentry;
772 int retval;
774 if (!capable(CAP_SYS_ADMIN))
775 return -EPERM;
777 lock_kernel();
778 dentry = namei(name);
779 retval = PTR_ERR(dentry);
780 if (!IS_ERR(dentry)) {
781 struct inode * inode = dentry->d_inode;
782 kdev_t dev = inode->i_rdev;
784 retval = 0;
785 if (S_ISBLK(inode->i_mode)) {
786 if (IS_NODEV(inode))
787 retval = -EACCES;
788 } else {
789 struct super_block *sb = inode->i_sb;
790 retval = -EINVAL;
791 if (sb && inode == sb->s_root->d_inode) {
792 dev = sb->s_dev;
793 retval = 0;
796 dput(dentry);
798 if (!retval)
799 retval = umount_dev(dev, flags);
801 unlock_kernel();
802 return retval;
806 * The 2.0 compatible umount. No flags.
809 asmlinkage int sys_oldumount(char * name)
811 return sys_umount(name,0);
815 * Check whether we can mount the specified device.
817 int fs_may_mount(kdev_t dev)
819 struct super_block * sb = get_super(dev);
820 int busy;
822 busy = sb && sb->s_root &&
823 (sb->s_root->d_count != 1 || sb->s_root->d_covers != sb->s_root);
824 return !busy;
828 * do_mount() does the actual mounting after sys_mount has done the ugly
829 * parameter parsing. When enough time has gone by, and everything uses the
830 * new mount() parameters, sys_mount() can then be cleaned up.
832 * We cannot mount a filesystem if it has active, used, or dirty inodes.
833 * We also have to flush all inode-data for this device, as the new mount
834 * might need new info.
836 * [21-Mar-97] T.Schoebel-Theuer: Now this can be overridden when
837 * supplying a leading "!" before the dir_name, allowing "stacks" of
838 * mounted filesystems. The stacking will only influence any pathname lookups
839 * _after_ the mount, but open file descriptors or working directories that
840 * are now covered remain valid. For example, when you overmount /home, any
841 * process with old cwd /home/joe will continue to use the old versions,
842 * as long as relative paths are used, but absolute paths like /home/joe/xxx
843 * will go to the new "top of stack" version. In general, crossing a
844 * mount point will always go to the top of stack element.
845 * Anyone using this new feature must know what he/she is doing.
848 int do_mount(kdev_t dev, const char * dev_name, const char * dir_name, const char * type, int flags, void * data)
850 struct dentry * dir_d;
851 struct super_block * sb;
852 struct vfsmount *vfsmnt;
853 int error;
855 error = -EACCES;
856 if (!(flags & MS_RDONLY) && dev && is_read_only(dev))
857 goto out;
860 * Do the lookup first to force automounting.
862 dir_d = namei(dir_name);
863 error = PTR_ERR(dir_d);
864 if (IS_ERR(dir_d))
865 goto out;
867 down(&mount_sem);
868 error = -ENOTDIR;
869 if (!S_ISDIR(dir_d->d_inode->i_mode))
870 goto dput_and_out;
872 error = -EBUSY;
873 if (dir_d->d_covers != dir_d)
874 goto dput_and_out;
877 * Note: If the superblock already exists,
878 * read_super just does a get_super().
880 error = -EINVAL;
881 sb = read_super(dev, type, flags, data, 0);
882 if (!sb)
883 goto dput_and_out;
886 * We may have slept while reading the super block,
887 * so we check afterwards whether it's safe to mount.
889 error = -EBUSY;
890 if (!fs_may_mount(dev))
891 goto dput_and_out;
893 error = -ENOMEM;
894 vfsmnt = add_vfsmnt(sb, dev_name, dir_name);
895 if (vfsmnt) {
896 d_mount(dget(dir_d), sb->s_root);
897 error = 0;
900 dput_and_out:
901 dput(dir_d);
902 up(&mount_sem);
903 out:
904 return error;
909 * Alters the mount flags of a mounted file system. Only the mount point
910 * is used as a reference - file system type and the device are ignored.
911 * FS-specific mount options can't be altered by remounting.
914 static int do_remount_sb(struct super_block *sb, int flags, char *data)
916 int retval;
917 struct vfsmount *vfsmnt;
920 * Invalidate the inodes, as some mount options may be changed.
921 * N.B. If we are changing media, we should check the return
922 * from invalidate_inodes ... can't allow _any_ open files.
924 invalidate_inodes(sb);
926 if (!(flags & MS_RDONLY) && sb->s_dev && is_read_only(sb->s_dev))
927 return -EACCES;
928 /*flags |= MS_RDONLY;*/
929 /* If we are remounting RDONLY, make sure there are no rw files open */
930 if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY))
931 if (!fs_may_remount_ro(sb))
932 return -EBUSY;
933 if (sb->s_op && sb->s_op->remount_fs) {
934 retval = sb->s_op->remount_fs(sb, &flags, data);
935 if (retval)
936 return retval;
938 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
939 vfsmnt = lookup_vfsmnt(sb->s_dev);
940 if (vfsmnt)
941 vfsmnt->mnt_flags = sb->s_flags;
942 return 0;
945 static int do_remount(const char *dir,int flags,char *data)
947 struct dentry *dentry;
948 int retval;
950 dentry = namei(dir);
951 retval = PTR_ERR(dentry);
952 if (!IS_ERR(dentry)) {
953 struct super_block * sb = dentry->d_inode->i_sb;
955 retval = -EINVAL;
956 if (dentry == sb->s_root) {
958 * Shrink the dcache and sync the device.
960 shrink_dcache_sb(sb);
961 fsync_dev(sb->s_dev);
962 if (flags & MS_RDONLY)
963 acct_auto_close(sb->s_dev);
964 retval = do_remount_sb(sb, flags, data);
966 dput(dentry);
968 return retval;
971 static int copy_mount_options (const void * data, unsigned long *where)
973 int i;
974 unsigned long page;
975 struct vm_area_struct * vma;
977 *where = 0;
978 if (!data)
979 return 0;
981 vma = find_vma(current->mm, (unsigned long) data);
982 if (!vma || (unsigned long) data < vma->vm_start)
983 return -EFAULT;
984 if (!(vma->vm_flags & VM_READ))
985 return -EFAULT;
986 i = vma->vm_end - (unsigned long) data;
987 if (PAGE_SIZE <= (unsigned long) i)
988 i = PAGE_SIZE-1;
989 if (!(page = __get_free_page(GFP_KERNEL))) {
990 return -ENOMEM;
992 if (copy_from_user((void *) page,data,i)) {
993 free_page(page);
994 return -EFAULT;
996 *where = page;
997 return 0;
1001 * Flags is a 16-bit value that allows up to 16 non-fs dependent flags to
1002 * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
1004 * data is a (void *) that can point to any structure up to
1005 * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
1006 * information (or be NULL).
1008 * NOTE! As old versions of mount() didn't use this setup, the flags
1009 * have to have a special 16-bit magic number in the high word:
1010 * 0xC0ED. If this magic word isn't present, the flags and data info
1011 * aren't used, as the syscall assumes we are talking to an older
1012 * version that didn't understand them.
1014 asmlinkage int sys_mount(char * dev_name, char * dir_name, char * type,
1015 unsigned long new_flags, void * data)
1017 struct file_system_type * fstype;
1018 struct dentry * dentry = NULL;
1019 struct inode * inode = NULL;
1020 kdev_t dev;
1021 int retval = -EPERM;
1022 unsigned long flags = 0;
1023 unsigned long page = 0;
1024 struct file dummy; /* allows read-write or read-only flag */
1026 lock_kernel();
1027 if (!capable(CAP_SYS_ADMIN))
1028 goto out;
1029 if ((new_flags &
1030 (MS_MGC_MSK | MS_REMOUNT)) == (MS_MGC_VAL | MS_REMOUNT)) {
1031 retval = copy_mount_options (data, &page);
1032 if (retval < 0)
1033 goto out;
1034 retval = do_remount(dir_name,
1035 new_flags & ~MS_MGC_MSK & ~MS_REMOUNT,
1036 (char *) page);
1037 free_page(page);
1038 goto out;
1041 retval = copy_mount_options (type, &page);
1042 if (retval < 0)
1043 goto out;
1044 fstype = get_fs_type((char *) page);
1045 free_page(page);
1046 retval = -ENODEV;
1047 if (!fstype)
1048 goto out;
1050 memset(&dummy, 0, sizeof(dummy));
1051 if (fstype->fs_flags & FS_REQUIRES_DEV) {
1052 dentry = namei(dev_name);
1053 retval = PTR_ERR(dentry);
1054 if (IS_ERR(dentry))
1055 goto out;
1057 inode = dentry->d_inode;
1058 retval = -ENOTBLK;
1059 if (!S_ISBLK(inode->i_mode))
1060 goto dput_and_out;
1062 retval = -EACCES;
1063 if (IS_NODEV(inode))
1064 goto dput_and_out;
1066 dev = inode->i_rdev;
1067 retval = -ENXIO;
1068 if (MAJOR(dev) >= MAX_BLKDEV)
1069 goto dput_and_out;
1071 retval = -ENOTBLK;
1072 dummy.f_op = get_blkfops(MAJOR(dev));
1073 if (!dummy.f_op)
1074 goto dput_and_out;
1076 if (dummy.f_op->open) {
1077 dummy.f_dentry = dentry;
1078 dummy.f_mode = (new_flags & MS_RDONLY) ? 1 : 3;
1079 retval = dummy.f_op->open(inode, &dummy);
1080 if (retval)
1081 goto dput_and_out;
1084 } else {
1085 retval = -EMFILE;
1086 if (!(dev = get_unnamed_dev()))
1087 goto out;
1090 page = 0;
1091 if ((new_flags & MS_MGC_MSK) == MS_MGC_VAL) {
1092 flags = new_flags & ~MS_MGC_MSK;
1093 retval = copy_mount_options(data, &page);
1094 if (retval < 0)
1095 goto clean_up;
1097 retval = do_mount(dev, dev_name, dir_name, fstype->name, flags,
1098 (void *) page);
1099 free_page(page);
1100 if (retval)
1101 goto clean_up;
1103 dput_and_out:
1104 dput(dentry);
1105 out:
1106 unlock_kernel();
1107 return retval;
1109 clean_up:
1110 if (dummy.f_op) {
1111 if (dummy.f_op->release)
1112 dummy.f_op->release(inode, NULL);
1113 } else
1114 put_unnamed_dev(dev);
1115 goto dput_and_out;
1118 void __init mount_root(void)
1120 struct file_system_type * fs_type;
1121 struct super_block * sb;
1122 struct vfsmount *vfsmnt;
1123 struct inode * d_inode = NULL;
1124 struct file filp;
1125 int retval;
1127 #ifdef CONFIG_ROOT_NFS
1128 if (MAJOR(ROOT_DEV) == UNNAMED_MAJOR) {
1129 ROOT_DEV = 0;
1130 if ((fs_type = get_fs_type("nfs"))) {
1131 sb = get_empty_super(); /* "can't fail" */
1132 sb->s_dev = get_unnamed_dev();
1133 sb->s_flags = root_mountflags;
1134 vfsmnt = add_vfsmnt(sb, "/dev/root", "/");
1135 if (vfsmnt) {
1136 if (nfs_root_mount(sb) >= 0) {
1137 sb->s_dirt = 0;
1138 sb->s_type = fs_type;
1139 current->fs->root = dget(sb->s_root);
1140 current->fs->pwd = dget(sb->s_root);
1141 ROOT_DEV = sb->s_dev;
1142 printk (KERN_NOTICE "VFS: Mounted root (NFS filesystem)%s.\n", (sb->s_flags & MS_RDONLY) ? " readonly" : "");
1143 return;
1145 remove_vfsmnt(sb->s_dev);
1147 put_unnamed_dev(sb->s_dev);
1148 sb->s_dev = 0;
1150 if (!ROOT_DEV) {
1151 printk(KERN_ERR "VFS: Unable to mount root fs via NFS, trying floppy.\n");
1152 ROOT_DEV = MKDEV(FLOPPY_MAJOR, 0);
1155 #endif
1157 #ifdef CONFIG_BLK_DEV_FD
1158 if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) {
1159 #ifdef CONFIG_BLK_DEV_RAM
1160 extern int rd_doload;
1161 #endif
1162 floppy_eject();
1163 #ifndef CONFIG_BLK_DEV_RAM
1164 printk(KERN_NOTICE "(Warning, this kernel has no ramdisk support)\n");
1165 #else
1166 /* rd_doload is 2 for a dual initrd/ramload setup */
1167 if(rd_doload==2)
1168 rd_load_secondary();
1169 else
1170 #endif
1172 printk(KERN_NOTICE "VFS: Insert root floppy and press ENTER\n");
1173 wait_for_keypress();
1176 #endif
1178 memset(&filp, 0, sizeof(filp));
1179 d_inode = get_empty_inode();
1180 d_inode->i_rdev = ROOT_DEV;
1181 filp.f_dentry = NULL;
1182 if ( root_mountflags & MS_RDONLY)
1183 filp.f_mode = 1; /* read only */
1184 else
1185 filp.f_mode = 3; /* read write */
1186 retval = blkdev_open(d_inode, &filp);
1187 if (retval == -EROFS) {
1188 root_mountflags |= MS_RDONLY;
1189 filp.f_mode = 1;
1190 retval = blkdev_open(d_inode, &filp);
1192 iput(d_inode);
1193 if (retval)
1195 * Allow the user to distinguish between failed open
1196 * and bad superblock on root device.
1198 printk("VFS: Cannot open root device %s\n",
1199 kdevname(ROOT_DEV));
1200 else for (fs_type = file_systems ; fs_type ; fs_type = fs_type->next) {
1201 if (!(fs_type->fs_flags & FS_REQUIRES_DEV))
1202 continue;
1203 sb = read_super(ROOT_DEV,fs_type->name,root_mountflags,NULL,1);
1204 if (sb) {
1205 sb->s_flags = root_mountflags;
1206 current->fs->root = dget(sb->s_root);
1207 current->fs->pwd = dget(sb->s_root);
1208 printk ("VFS: Mounted root (%s filesystem)%s.\n",
1209 fs_type->name,
1210 (sb->s_flags & MS_RDONLY) ? " readonly" : "");
1211 vfsmnt = add_vfsmnt(sb, "/dev/root", "/");
1212 if (vfsmnt)
1213 return;
1214 panic("VFS: add_vfsmnt failed for root fs");
1217 panic("VFS: Unable to mount root fs on %s",
1218 kdevname(ROOT_DEV));
1222 #ifdef CONFIG_BLK_DEV_INITRD
1224 int __init change_root(kdev_t new_root_dev,const char *put_old)
1226 kdev_t old_root_dev;
1227 struct vfsmount *vfsmnt;
1228 struct dentry *old_root,*old_pwd,*dir_d = NULL;
1229 int error;
1231 old_root = current->fs->root;
1232 old_pwd = current->fs->pwd;
1233 old_root_dev = ROOT_DEV;
1234 if (!fs_may_mount(new_root_dev)) {
1235 printk(KERN_CRIT "New root is busy. Staying in initrd.\n");
1236 return -EBUSY;
1238 ROOT_DEV = new_root_dev;
1239 mount_root();
1240 dput(old_root);
1241 dput(old_pwd);
1242 #if 1
1243 shrink_dcache();
1244 printk("change_root: old root has d_count=%d\n", old_root->d_count);
1245 #endif
1247 * Get the new mount directory
1249 dir_d = lookup_dentry(put_old, NULL, 1);
1250 if (IS_ERR(dir_d)) {
1251 error = PTR_ERR(dir_d);
1252 } else if (!dir_d->d_inode) {
1253 dput(dir_d);
1254 error = -ENOENT;
1255 } else {
1256 error = 0;
1258 if (!error && dir_d->d_covers != dir_d) {
1259 dput(dir_d);
1260 error = -EBUSY;
1262 if (!error && !S_ISDIR(dir_d->d_inode->i_mode)) {
1263 dput(dir_d);
1264 error = -ENOTDIR;
1266 if (error) {
1267 int umount_error;
1269 printk(KERN_NOTICE "Trying to unmount old root ... ");
1270 umount_error = do_umount(old_root_dev,1, 0);
1271 if (!umount_error) {
1272 printk("okay\n");
1273 invalidate_buffers(old_root_dev);
1274 return 0;
1276 printk(KERN_ERR "error %d\n",umount_error);
1277 return error;
1279 remove_vfsmnt(old_root_dev);
1280 vfsmnt = add_vfsmnt(old_root->d_sb, "/dev/root.old", put_old);
1281 if (vfsmnt) {
1282 d_mount(dir_d,old_root);
1283 return 0;
1285 printk(KERN_CRIT "Trouble: add_vfsmnt failed\n");
1286 return -ENOMEM;
1289 #endif