Import 2.1.118
[davej-history.git] / fs / super.c
blob843464421796c7d838d31aaec9c074d34b917a69
1 /*
2 * linux/fs/super.c
4 * Copyright (C) 1991, 1992 Linus Torvalds
6 * super.c contains code to handle: - mount structures
7 * - super-block tables.
8 * - mount system call
9 * - umount system call
11 * Added options to /proc/mounts
12 * Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996.
14 * GK 2/5/95 - Changed to support mounting the root fs via NFS
16 * Added kerneld support: Jacques Gelinas and Bjorn Ekwall
17 * Added change_root: Werner Almesberger & Hans Lermen, Feb '96
20 #include <linux/config.h>
21 #include <linux/sched.h>
22 #include <linux/kernel.h>
23 #include <linux/mount.h>
24 #include <linux/malloc.h>
25 #include <linux/major.h>
26 #include <linux/stat.h>
27 #include <linux/errno.h>
28 #include <linux/string.h>
29 #include <linux/locks.h>
30 #include <linux/mm.h>
31 #include <linux/smp.h>
32 #include <linux/smp_lock.h>
33 #include <linux/fd.h>
34 #include <linux/init.h>
35 #include <linux/quotaops.h>
37 #include <asm/system.h>
38 #include <asm/uaccess.h>
39 #include <asm/bitops.h>
41 #include <linux/nfs_fs.h>
42 #include <linux/nfs_fs_sb.h>
43 #include <linux/nfs_mount.h>
45 #ifdef CONFIG_KMOD
46 #include <linux/kmod.h>
47 #endif
50 * We use a semaphore to synchronize all mount/umount
51 * activity - imagine the mess if we have a race between
52 * unmounting a filesystem and re-mounting it (or something
53 * else).
55 static struct semaphore mount_sem = MUTEX;
57 #ifdef CONFIG_BSD_PROCESS_ACCT
58 extern void acct_auto_close(kdev_t);
59 #endif
61 extern void wait_for_keypress(void);
62 extern struct file_operations * get_blkfops(unsigned int major);
64 extern int root_mountflags;
66 static int do_remount_sb(struct super_block *sb, int flags, char * data);
68 /* this is initialized in init/main.c */
69 kdev_t ROOT_DEV;
71 int nr_super_blocks = 0;
72 int max_super_blocks = NR_SUPER;
73 LIST_HEAD(super_blocks);
75 static struct file_system_type *file_systems = (struct file_system_type *) NULL;
76 struct vfsmount *vfsmntlist = (struct vfsmount *) NULL;
77 static struct vfsmount *vfsmnttail = (struct vfsmount *) NULL,
78 *mru_vfsmnt = (struct vfsmount *) NULL;
80 /*
81 * This part handles the management of the list of mounted filesystems.
83 struct vfsmount *lookup_vfsmnt(kdev_t dev)
85 struct vfsmount *lptr;
87 if (vfsmntlist == (struct vfsmount *)NULL)
88 return ((struct vfsmount *)NULL);
90 if (mru_vfsmnt != (struct vfsmount *)NULL &&
91 mru_vfsmnt->mnt_dev == dev)
92 return (mru_vfsmnt);
94 for (lptr = vfsmntlist;
95 lptr != (struct vfsmount *)NULL;
96 lptr = lptr->mnt_next)
97 if (lptr->mnt_dev == dev) {
98 mru_vfsmnt = lptr;
99 return (lptr);
102 return ((struct vfsmount *)NULL);
103 /* NOTREACHED */
106 static struct vfsmount *add_vfsmnt(struct super_block *sb,
107 const char *dev_name, const char *dir_name)
109 struct vfsmount *lptr;
110 char *tmp, *name;
112 lptr = (struct vfsmount *)kmalloc(sizeof(struct vfsmount), GFP_KERNEL);
113 if (!lptr)
114 goto out;
115 memset(lptr, 0, sizeof(struct vfsmount));
117 lptr->mnt_sb = sb;
118 lptr->mnt_dev = sb->s_dev;
119 lptr->mnt_flags = sb->s_flags;
121 sema_init(&lptr->mnt_dquot.semaphore, 1);
122 lptr->mnt_dquot.flags = 0;
124 /* N.B. Is it really OK to have a vfsmount without names? */
125 if (dev_name && !IS_ERR(tmp = getname(dev_name))) {
126 name = (char *) kmalloc(strlen(tmp)+1, GFP_KERNEL);
127 if (name) {
128 strcpy(name, tmp);
129 lptr->mnt_devname = name;
131 putname(tmp);
133 if (dir_name && !IS_ERR(tmp = getname(dir_name))) {
134 name = (char *) kmalloc(strlen(tmp)+1, GFP_KERNEL);
135 if (name) {
136 strcpy(name, tmp);
137 lptr->mnt_dirname = name;
139 putname(tmp);
142 if (vfsmntlist == (struct vfsmount *)NULL) {
143 vfsmntlist = vfsmnttail = lptr;
144 } else {
145 vfsmnttail->mnt_next = lptr;
146 vfsmnttail = lptr;
148 out:
149 return lptr;
152 static void remove_vfsmnt(kdev_t dev)
154 struct vfsmount *lptr, *tofree;
156 if (vfsmntlist == (struct vfsmount *)NULL)
157 return;
158 lptr = vfsmntlist;
159 if (lptr->mnt_dev == dev) {
160 tofree = lptr;
161 vfsmntlist = lptr->mnt_next;
162 if (vfsmnttail->mnt_dev == dev)
163 vfsmnttail = vfsmntlist;
164 } else {
165 while (lptr->mnt_next != (struct vfsmount *)NULL) {
166 if (lptr->mnt_next->mnt_dev == dev)
167 break;
168 lptr = lptr->mnt_next;
170 tofree = lptr->mnt_next;
171 if (tofree == (struct vfsmount *)NULL)
172 return;
173 lptr->mnt_next = lptr->mnt_next->mnt_next;
174 if (vfsmnttail->mnt_dev == dev)
175 vfsmnttail = lptr;
177 if (tofree == mru_vfsmnt)
178 mru_vfsmnt = NULL;
179 kfree(tofree->mnt_devname);
180 kfree(tofree->mnt_dirname);
181 kfree_s(tofree, sizeof(struct vfsmount));
184 int register_filesystem(struct file_system_type * fs)
186 struct file_system_type ** tmp;
188 if (!fs)
189 return -EINVAL;
190 if (fs->next)
191 return -EBUSY;
192 tmp = &file_systems;
193 while (*tmp) {
194 if (strcmp((*tmp)->name, fs->name) == 0)
195 return -EBUSY;
196 tmp = &(*tmp)->next;
198 *tmp = fs;
199 return 0;
202 #ifdef CONFIG_MODULES
203 int unregister_filesystem(struct file_system_type * fs)
205 struct file_system_type ** tmp;
207 tmp = &file_systems;
208 while (*tmp) {
209 if (fs == *tmp) {
210 *tmp = fs->next;
211 fs->next = NULL;
212 return 0;
214 tmp = &(*tmp)->next;
216 return -EINVAL;
218 #endif
220 static int fs_index(const char * __name)
222 struct file_system_type * tmp;
223 char * name;
224 int err, index;
226 name = getname(__name);
227 err = PTR_ERR(name);
228 if (IS_ERR(name))
229 return err;
231 index = 0;
232 for (tmp = file_systems ; tmp ; tmp = tmp->next) {
233 if (strcmp(tmp->name, name) == 0) {
234 putname(name);
235 return index;
237 index++;
239 putname(name);
240 return -EINVAL;
243 static int fs_name(unsigned int index, char * buf)
245 struct file_system_type * tmp;
246 int len;
248 tmp = file_systems;
249 while (tmp && index > 0) {
250 tmp = tmp->next;
251 index--;
253 if (!tmp)
254 return -EINVAL;
255 len = strlen(tmp->name) + 1;
256 return copy_to_user(buf, tmp->name, len) ? -EFAULT : 0;
259 static int fs_maxindex(void)
261 struct file_system_type * tmp;
262 int index;
264 index = 0;
265 for (tmp = file_systems ; tmp ; tmp = tmp->next)
266 index++;
267 return index;
271 * Whee.. Weird sysv syscall.
273 asmlinkage int sys_sysfs(int option, unsigned long arg1, unsigned long arg2)
275 int retval = -EINVAL;
277 lock_kernel();
278 switch (option) {
279 case 1:
280 retval = fs_index((const char *) arg1);
281 break;
283 case 2:
284 retval = fs_name(arg1, (char *) arg2);
285 break;
287 case 3:
288 retval = fs_maxindex();
289 break;
291 unlock_kernel();
292 return retval;
295 static struct proc_fs_info {
296 int flag;
297 char *str;
298 } fs_info[] = {
299 { MS_NOEXEC, ",noexec" },
300 { MS_NOSUID, ",nosuid" },
301 { MS_NODEV, ",nodev" },
302 { MS_SYNCHRONOUS, ",sync" },
303 { MS_MANDLOCK, ",mand" },
304 { MS_NOATIME, ",noatime" },
305 { MS_NODIRATIME, ",nodiratime" },
306 #ifdef MS_NOSUB /* Can't find this except in mount.c */
307 { MS_NOSUB, ",nosub" },
308 #endif
309 { 0, NULL }
312 static struct proc_nfs_info {
313 int flag;
314 char *str;
315 } nfs_info[] = {
316 { NFS_MOUNT_SOFT, ",soft" },
317 { NFS_MOUNT_INTR, ",intr" },
318 { NFS_MOUNT_POSIX, ",posix" },
319 { NFS_MOUNT_NOCTO, ",nocto" },
320 { NFS_MOUNT_NOAC, ",noac" },
321 { 0, NULL }
324 int get_filesystem_info( char *buf )
326 struct vfsmount *tmp = vfsmntlist;
327 struct proc_fs_info *fs_infop;
328 struct proc_nfs_info *nfs_infop;
329 struct nfs_server *nfss;
330 int len = 0;
332 while ( tmp && len < PAGE_SIZE - 160)
334 len += sprintf( buf + len, "%s %s %s %s",
335 tmp->mnt_devname, tmp->mnt_dirname, tmp->mnt_sb->s_type->name,
336 tmp->mnt_flags & MS_RDONLY ? "ro" : "rw" );
337 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
338 if (tmp->mnt_flags & fs_infop->flag) {
339 strcpy(buf + len, fs_infop->str);
340 len += strlen(fs_infop->str);
343 if (!strcmp("nfs", tmp->mnt_sb->s_type->name)) {
344 nfss = &tmp->mnt_sb->u.nfs_sb.s_server;
345 if (nfss->rsize != NFS_DEF_FILE_IO_BUFFER_SIZE) {
346 len += sprintf(buf+len, ",rsize=%d",
347 nfss->rsize);
349 if (nfss->wsize != NFS_DEF_FILE_IO_BUFFER_SIZE) {
350 len += sprintf(buf+len, ",wsize=%d",
351 nfss->wsize);
353 #if 0
354 if (nfss->timeo != 7*HZ/10) {
355 len += sprintf(buf+len, ",timeo=%d",
356 nfss->timeo*10/HZ);
358 if (nfss->retrans != 3) {
359 len += sprintf(buf+len, ",retrans=%d",
360 nfss->retrans);
362 #endif
363 if (nfss->acregmin != 3*HZ) {
364 len += sprintf(buf+len, ",acregmin=%d",
365 nfss->acregmin/HZ);
367 if (nfss->acregmax != 60*HZ) {
368 len += sprintf(buf+len, ",acregmax=%d",
369 nfss->acregmax/HZ);
371 if (nfss->acdirmin != 30*HZ) {
372 len += sprintf(buf+len, ",acdirmin=%d",
373 nfss->acdirmin/HZ);
375 if (nfss->acdirmax != 60*HZ) {
376 len += sprintf(buf+len, ",acdirmax=%d",
377 nfss->acdirmax/HZ);
379 for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
380 if (nfss->flags & nfs_infop->flag) {
381 strcpy(buf + len, nfs_infop->str);
382 len += strlen(nfs_infop->str);
385 len += sprintf(buf+len, ",addr=%s",
386 nfss->hostname);
388 len += sprintf( buf + len, " 0 0\n" );
389 tmp = tmp->mnt_next;
392 return len;
395 int get_filesystem_list(char * buf)
397 int len = 0;
398 struct file_system_type * tmp;
400 tmp = file_systems;
401 while (tmp && len < PAGE_SIZE - 80) {
402 len += sprintf(buf+len, "%s\t%s\n",
403 (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
404 tmp->name);
405 tmp = tmp->next;
407 return len;
410 struct file_system_type *get_fs_type(const char *name)
412 struct file_system_type * fs = file_systems;
414 if (!name)
415 return fs;
416 for (fs = file_systems; fs && strcmp(fs->name, name); fs = fs->next)
418 #ifdef CONFIG_KMOD
419 if (!fs && (request_module(name) == 0)) {
420 for (fs = file_systems; fs && strcmp(fs->name, name); fs = fs->next)
423 #endif
425 return fs;
428 void __wait_on_super(struct super_block * sb)
430 struct wait_queue wait = { current, NULL };
432 add_wait_queue(&sb->s_wait, &wait);
433 repeat:
434 current->state = TASK_UNINTERRUPTIBLE;
435 if (sb->s_lock) {
436 schedule();
437 goto repeat;
439 remove_wait_queue(&sb->s_wait, &wait);
440 current->state = TASK_RUNNING;
444 * Note: check the dirty flag before waiting, so we don't
445 * hold up the sync while mounting a device. (The newly
446 * mounted device won't need syncing.)
448 void sync_supers(kdev_t dev)
450 struct super_block * sb;
452 for (sb = sb_entry(super_blocks.next);
453 sb != sb_entry(&super_blocks);
454 sb = sb_entry(sb->s_list.next)) {
455 if (!sb->s_dev)
456 continue;
457 if (dev && sb->s_dev != dev)
458 continue;
459 if (!sb->s_dirt)
460 continue;
461 /* N.B. Should lock the superblock while writing */
462 wait_on_super(sb);
463 if (!sb->s_dev || !sb->s_dirt)
464 continue;
465 if (dev && (dev != sb->s_dev))
466 continue;
467 if (sb->s_op && sb->s_op->write_super)
468 sb->s_op->write_super(sb);
472 struct super_block * get_super(kdev_t dev)
474 struct super_block * s;
476 if (!dev)
477 return NULL;
478 restart:
479 s = sb_entry(super_blocks.next);
480 while (s != sb_entry(&super_blocks))
481 if (s->s_dev == dev) {
482 wait_on_super(s);
483 if (s->s_dev == dev)
484 return s;
485 goto restart;
486 } else
487 s = sb_entry(s->s_list.next);
488 return NULL;
491 asmlinkage int sys_ustat(dev_t dev, struct ustat * ubuf)
493 struct super_block *s;
494 struct ustat tmp;
495 struct statfs sbuf;
496 mm_segment_t old_fs;
497 int err = -EINVAL;
499 lock_kernel();
500 s = get_super(to_kdev_t(dev));
501 if (s == NULL)
502 goto out;
503 err = -ENOSYS;
504 if (!(s->s_op->statfs))
505 goto out;
507 old_fs = get_fs();
508 set_fs(get_ds());
509 s->s_op->statfs(s,&sbuf,sizeof(struct statfs));
510 set_fs(old_fs);
512 memset(&tmp,0,sizeof(struct ustat));
513 tmp.f_tfree = sbuf.f_bfree;
514 tmp.f_tinode = sbuf.f_ffree;
516 err = copy_to_user(ubuf,&tmp,sizeof(struct ustat)) ? -EFAULT : 0;
517 out:
518 unlock_kernel();
519 return err;
523 * Find a super_block with no device assigned.
525 static struct super_block *get_empty_super(void)
527 struct super_block *s;
529 for (s = sb_entry(super_blocks.next);
530 s != sb_entry(&super_blocks);
531 s = sb_entry(s->s_list.next)) {
532 if (s->s_dev)
533 continue;
534 if (!s->s_lock)
535 return s;
536 printk("VFS: empty superblock %p locked!\n", s);
538 /* Need a new one... */
539 if (nr_super_blocks >= max_super_blocks)
540 return NULL;
541 s = kmalloc(sizeof(struct super_block), GFP_USER);
542 if (s) {
543 nr_super_blocks++;
544 memset(s, 0, sizeof(struct super_block));
545 INIT_LIST_HEAD(&s->s_dirty);
546 list_add (&s->s_list, super_blocks.prev);
548 return s;
551 static struct super_block * read_super(kdev_t dev,const char *name,int flags,
552 void *data, int silent)
554 struct super_block * s;
555 struct file_system_type *type;
557 if (!dev)
558 goto out_null;
559 check_disk_change(dev);
560 s = get_super(dev);
561 if (s)
562 goto out;
564 type = get_fs_type(name);
565 if (!type) {
566 printk("VFS: on device %s: get_fs_type(%s) failed\n",
567 kdevname(dev), name);
568 goto out;
570 s = get_empty_super();
571 if (!s)
572 goto out;
573 s->s_dev = dev;
574 s->s_flags = flags;
575 s->s_dirt = 0;
576 /* N.B. Should lock superblock now ... */
577 if (!type->read_super(s, data, silent))
578 goto out_fail;
579 s->s_dev = dev; /* N.B. why do this again?? */
580 s->s_rd_only = 0;
581 s->s_type = type;
582 out:
583 return s;
585 /* N.B. s_dev should be cleared in type->read_super */
586 out_fail:
587 s->s_dev = 0;
588 out_null:
589 s = NULL;
590 goto out;
594 * Unnamed block devices are dummy devices used by virtual
595 * filesystems which don't use real block-devices. -- jrs
598 static unsigned int unnamed_dev_in_use[256/(8*sizeof(unsigned int))] = { 0, };
600 kdev_t get_unnamed_dev(void)
602 int i;
604 for (i = 1; i < 256; i++) {
605 if (!test_and_set_bit(i,unnamed_dev_in_use))
606 return MKDEV(UNNAMED_MAJOR, i);
608 return 0;
611 void put_unnamed_dev(kdev_t dev)
613 if (!dev || MAJOR(dev) != UNNAMED_MAJOR)
614 return;
615 if (test_and_clear_bit(MINOR(dev), unnamed_dev_in_use))
616 return;
617 printk("VFS: put_unnamed_dev: freeing unused device %s\n",
618 kdevname(dev));
621 static int d_umount(struct super_block * sb)
623 struct dentry * root = sb->s_root;
624 struct dentry * covered = root->d_covers;
626 if (root->d_count != 1)
627 return -EBUSY;
629 if (root->d_inode->i_state)
630 return -EBUSY;
632 sb->s_root = NULL;
634 if (covered != root) {
635 root->d_covers = root;
636 covered->d_mounts = covered;
637 dput(covered);
639 dput(root);
640 return 0;
643 static void d_mount(struct dentry *covered, struct dentry *dentry)
645 if (covered->d_mounts != covered) {
646 printk("VFS: mount - already mounted\n");
647 return;
649 covered->d_mounts = dentry;
650 dentry->d_covers = covered;
653 static int do_umount(kdev_t dev, int unmount_root, int flags)
655 struct super_block * sb;
656 int retval;
658 retval = -ENOENT;
659 sb = get_super(dev);
660 if (!sb || !sb->s_root)
661 goto out;
664 * Before checking whether the filesystem is still busy,
665 * make sure the kernel doesn't hold any quota files open
666 * on the device. If the umount fails, too bad -- there
667 * are no quotas running any more. Just turn them on again.
669 DQUOT_OFF(dev);
671 #ifdef CONFIG_BSD_PROCESS_ACCT
672 (void) acct_auto_close(dev);
673 #endif
676 * If we may have to abort operations to get out of this
677 * mount, and they will themselves hold resources we must
678 * allow the fs to do things. In the Unix tradition of
679 * 'Gee thats tricky lets do it in userspace' the umount_begin
680 * might fail to complete on the first run through as other tasks
681 * must return, and the like. Thats for the mount program to worry
682 * about for the moment.
685 if( (flags&MNT_FORCE) && sb->s_op->umount_begin)
686 sb->s_op->umount_begin(sb);
689 * Shrink dcache, then fsync. This guarantees that if the
690 * filesystem is quiescent at this point, then (a) only the
691 * root entry should be in use and (b) that root entry is
692 * clean.
694 shrink_dcache_sb(sb);
695 fsync_dev(dev);
697 if (dev==ROOT_DEV && !unmount_root) {
699 * Special case for "unmounting" root ...
700 * we just try to remount it readonly.
702 retval = 0;
703 if (!(sb->s_flags & MS_RDONLY))
704 retval = do_remount_sb(sb, MS_RDONLY, 0);
705 return retval;
708 retval = d_umount(sb);
709 if (retval)
710 goto out;
712 if (sb->s_op) {
713 if (sb->s_op->write_super && sb->s_dirt)
714 sb->s_op->write_super(sb);
717 lock_super(sb);
718 if (sb->s_op) {
719 if (sb->s_op->put_super)
720 sb->s_op->put_super(sb);
723 /* Forget any remaining inodes */
724 if (invalidate_inodes(sb)) {
725 printk("VFS: Busy inodes after unmount. "
726 "Self-destruct in 5 seconds. Have a nice day...\n");
729 sb->s_dev = 0; /* Free the superblock */
730 unlock_super(sb);
732 remove_vfsmnt(dev);
733 out:
734 return retval;
737 static int umount_dev(kdev_t dev, int flags)
739 int retval;
740 struct inode * inode = get_empty_inode();
742 retval = -ENOMEM;
743 if (!inode)
744 goto out;
746 inode->i_rdev = dev;
747 retval = -ENXIO;
748 if (MAJOR(dev) >= MAX_BLKDEV)
749 goto out_iput;
751 fsync_dev(dev);
753 down(&mount_sem);
755 retval = do_umount(dev, 0, flags);
756 if (!retval) {
757 fsync_dev(dev);
758 if (dev != ROOT_DEV) {
759 blkdev_release(inode);
760 put_unnamed_dev(dev);
764 up(&mount_sem);
765 out_iput:
766 iput(inode);
767 out:
768 return retval;
772 * Now umount can handle mount points as well as block devices.
773 * This is important for filesystems which use unnamed block devices.
775 * There is a little kludge here with the dummy_inode. The current
776 * vfs release functions only use the r_dev field in the inode so
777 * we give them the info they need without using a real inode.
778 * If any other fields are ever needed by any block device release
779 * functions, they should be faked here. -- jrs
781 * We now support a flag for forced unmount like the other 'big iron'
782 * unixes. Our API is identical to OSF/1 to avoid making a mess of AMD
785 asmlinkage int sys_umount(char * name, int flags)
787 struct dentry * dentry;
788 int retval;
790 if (!capable(CAP_SYS_ADMIN))
791 return -EPERM;
793 lock_kernel();
794 dentry = namei(name);
795 retval = PTR_ERR(dentry);
796 if (!IS_ERR(dentry)) {
797 struct inode * inode = dentry->d_inode;
798 kdev_t dev = inode->i_rdev;
800 retval = 0;
801 if (S_ISBLK(inode->i_mode)) {
802 if (IS_NODEV(inode))
803 retval = -EACCES;
804 } else {
805 struct super_block *sb = inode->i_sb;
806 retval = -EINVAL;
807 if (sb && inode == sb->s_root->d_inode) {
808 dev = sb->s_dev;
809 retval = 0;
812 dput(dentry);
814 if (!retval)
815 retval = umount_dev(dev, flags);
817 unlock_kernel();
818 return retval;
822 * The 2.0 compatible umount. No flags.
825 asmlinkage int sys_oldumount(char * name)
827 return sys_umount(name,0);
831 * Check whether we can mount the specified device.
833 int fs_may_mount(kdev_t dev)
835 struct super_block * sb = get_super(dev);
836 int busy;
838 busy = sb && sb->s_root &&
839 (sb->s_root->d_count != 1 || sb->s_root->d_covers != sb->s_root);
840 return !busy;
844 * do_mount() does the actual mounting after sys_mount has done the ugly
845 * parameter parsing. When enough time has gone by, and everything uses the
846 * new mount() parameters, sys_mount() can then be cleaned up.
848 * We cannot mount a filesystem if it has active, used, or dirty inodes.
849 * We also have to flush all inode-data for this device, as the new mount
850 * might need new info.
852 * [21-Mar-97] T.Schoebel-Theuer: Now this can be overridden when
853 * supplying a leading "!" before the dir_name, allowing "stacks" of
854 * mounted filesystems. The stacking will only influence any pathname lookups
855 * _after_ the mount, but open file descriptors or working directories that
856 * are now covered remain valid. For example, when you overmount /home, any
857 * process with old cwd /home/joe will continue to use the old versions,
858 * as long as relative paths are used, but absolute paths like /home/joe/xxx
859 * will go to the new "top of stack" version. In general, crossing a
860 * mount point will always go to the top of stack element.
861 * Anyone using this new feature must know what he/she is doing.
864 int do_mount(kdev_t dev, const char * dev_name, const char * dir_name, const char * type, int flags, void * data)
866 struct dentry * dir_d;
867 struct super_block * sb;
868 struct vfsmount *vfsmnt;
869 int error;
871 down(&mount_sem);
872 error = -EACCES;
873 if (!(flags & MS_RDONLY) && dev && is_read_only(dev))
874 goto out;
875 /*flags |= MS_RDONLY;*/
877 dir_d = namei(dir_name);
878 error = PTR_ERR(dir_d);
879 if (IS_ERR(dir_d))
880 goto out;
882 error = -ENOTDIR;
883 if (!S_ISDIR(dir_d->d_inode->i_mode))
884 goto dput_and_out;
886 error = -EBUSY;
887 if (dir_d->d_covers != dir_d)
888 goto dput_and_out;
891 * Note: If the superblock already exists,
892 * read_super just does a get_super().
894 error = -EINVAL;
895 sb = read_super(dev, type, flags, data, 0);
896 if (!sb)
897 goto dput_and_out;
900 * We may have slept while reading the super block,
901 * so we check afterwards whether it's safe to mount.
903 error = -EBUSY;
904 if (!fs_may_mount(dev))
905 goto dput_and_out;
907 error = -ENOMEM;
908 vfsmnt = add_vfsmnt(sb, dev_name, dir_name);
909 if (!vfsmnt)
910 goto dput_and_out;
911 d_mount(dir_d, sb->s_root);
912 error = 0; /* we don't dput(dir_d) - see umount */
914 out:
915 up(&mount_sem);
916 return error;
918 dput_and_out:
919 dput(dir_d);
920 goto out;
925 * Alters the mount flags of a mounted file system. Only the mount point
926 * is used as a reference - file system type and the device are ignored.
927 * FS-specific mount options can't be altered by remounting.
930 static int do_remount_sb(struct super_block *sb, int flags, char *data)
932 int retval;
933 struct vfsmount *vfsmnt;
936 * Invalidate the inodes, as some mount options may be changed.
937 * N.B. If we are changing media, we should check the return
938 * from invalidate_inodes ... can't allow _any_ open files.
940 invalidate_inodes(sb);
942 if (!(flags & MS_RDONLY) && sb->s_dev && is_read_only(sb->s_dev))
943 return -EACCES;
944 /*flags |= MS_RDONLY;*/
945 /* If we are remounting RDONLY, make sure there are no rw files open */
946 if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY))
947 if (!fs_may_remount_ro(sb))
948 return -EBUSY;
949 if (sb->s_op && sb->s_op->remount_fs) {
950 retval = sb->s_op->remount_fs(sb, &flags, data);
951 if (retval)
952 return retval;
954 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
955 vfsmnt = lookup_vfsmnt(sb->s_dev);
956 if (vfsmnt)
957 vfsmnt->mnt_flags = sb->s_flags;
958 return 0;
961 static int do_remount(const char *dir,int flags,char *data)
963 struct dentry *dentry;
964 int retval;
966 dentry = namei(dir);
967 retval = PTR_ERR(dentry);
968 if (!IS_ERR(dentry)) {
969 struct super_block * sb = dentry->d_inode->i_sb;
971 retval = -EINVAL;
972 if (dentry == sb->s_root) {
974 * Shrink the dcache and sync the device.
976 shrink_dcache_sb(sb);
977 fsync_dev(sb->s_dev);
978 retval = do_remount_sb(sb, flags, data);
980 dput(dentry);
982 return retval;
985 static int copy_mount_options (const void * data, unsigned long *where)
987 int i;
988 unsigned long page;
989 struct vm_area_struct * vma;
991 *where = 0;
992 if (!data)
993 return 0;
995 vma = find_vma(current->mm, (unsigned long) data);
996 if (!vma || (unsigned long) data < vma->vm_start)
997 return -EFAULT;
998 if (!(vma->vm_flags & VM_READ))
999 return -EFAULT;
1000 i = vma->vm_end - (unsigned long) data;
1001 if (PAGE_SIZE <= (unsigned long) i)
1002 i = PAGE_SIZE-1;
1003 if (!(page = __get_free_page(GFP_KERNEL))) {
1004 return -ENOMEM;
1006 if (copy_from_user((void *) page,data,i)) {
1007 free_page(page);
1008 return -EFAULT;
1010 *where = page;
1011 return 0;
1015 * Flags is a 16-bit value that allows up to 16 non-fs dependent flags to
1016 * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
1018 * data is a (void *) that can point to any structure up to
1019 * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
1020 * information (or be NULL).
1022 * NOTE! As old versions of mount() didn't use this setup, the flags
1023 * have to have a special 16-bit magic number in the high word:
1024 * 0xC0ED. If this magic word isn't present, the flags and data info
1025 * aren't used, as the syscall assumes we are talking to an older
1026 * version that didn't understand them.
1028 asmlinkage int sys_mount(char * dev_name, char * dir_name, char * type,
1029 unsigned long new_flags, void * data)
1031 struct file_system_type * fstype;
1032 struct dentry * dentry = NULL;
1033 struct inode * inode = NULL;
1034 kdev_t dev;
1035 int retval = -EPERM;
1036 unsigned long flags = 0;
1037 unsigned long page = 0;
1038 struct file dummy; /* allows read-write or read-only flag */
1040 lock_kernel();
1041 if (!capable(CAP_SYS_ADMIN))
1042 goto out;
1043 if ((new_flags &
1044 (MS_MGC_MSK | MS_REMOUNT)) == (MS_MGC_VAL | MS_REMOUNT)) {
1045 retval = copy_mount_options (data, &page);
1046 if (retval < 0)
1047 goto out;
1048 retval = do_remount(dir_name,
1049 new_flags & ~MS_MGC_MSK & ~MS_REMOUNT,
1050 (char *) page);
1051 free_page(page);
1052 goto out;
1055 retval = copy_mount_options (type, &page);
1056 if (retval < 0)
1057 goto out;
1058 fstype = get_fs_type((char *) page);
1059 free_page(page);
1060 retval = -ENODEV;
1061 if (!fstype)
1062 goto out;
1064 memset(&dummy, 0, sizeof(dummy));
1065 if (fstype->fs_flags & FS_REQUIRES_DEV) {
1066 dentry = namei(dev_name);
1067 retval = PTR_ERR(dentry);
1068 if (IS_ERR(dentry))
1069 goto out;
1071 inode = dentry->d_inode;
1072 retval = -ENOTBLK;
1073 if (!S_ISBLK(inode->i_mode))
1074 goto dput_and_out;
1076 retval = -EACCES;
1077 if (IS_NODEV(inode))
1078 goto dput_and_out;
1080 dev = inode->i_rdev;
1081 retval = -ENXIO;
1082 if (MAJOR(dev) >= MAX_BLKDEV)
1083 goto dput_and_out;
1085 retval = -ENOTBLK;
1086 dummy.f_op = get_blkfops(MAJOR(dev));
1087 if (!dummy.f_op)
1088 goto dput_and_out;
1090 if (dummy.f_op->open) {
1091 dummy.f_dentry = dentry;
1092 dummy.f_mode = (new_flags & MS_RDONLY) ? 1 : 3;
1093 retval = dummy.f_op->open(inode, &dummy);
1094 if (retval)
1095 goto dput_and_out;
1098 } else {
1099 retval = -EMFILE;
1100 if (!(dev = get_unnamed_dev()))
1101 goto out;
1104 page = 0;
1105 if ((new_flags & MS_MGC_MSK) == MS_MGC_VAL) {
1106 flags = new_flags & ~MS_MGC_MSK;
1107 retval = copy_mount_options(data, &page);
1108 if (retval < 0)
1109 goto clean_up;
1111 retval = do_mount(dev, dev_name, dir_name, fstype->name, flags,
1112 (void *) page);
1113 free_page(page);
1114 if (retval)
1115 goto clean_up;
1117 dput_and_out:
1118 dput(dentry);
1119 out:
1120 unlock_kernel();
1121 return retval;
1123 clean_up:
1124 if (dummy.f_op) {
1125 if (dummy.f_op->release)
1126 dummy.f_op->release(inode, NULL);
1127 } else
1128 put_unnamed_dev(dev);
1129 goto dput_and_out;
1132 void __init mount_root(void)
1134 struct file_system_type * fs_type;
1135 struct super_block * sb;
1136 struct vfsmount *vfsmnt;
1137 struct inode * d_inode = NULL;
1138 struct file filp;
1139 int retval;
1141 #ifdef CONFIG_ROOT_NFS
1142 if (MAJOR(ROOT_DEV) == UNNAMED_MAJOR) {
1143 ROOT_DEV = 0;
1144 if ((fs_type = get_fs_type("nfs"))) {
1145 sb = get_empty_super(); /* "can't fail" */
1146 sb->s_dev = get_unnamed_dev();
1147 sb->s_flags = root_mountflags & ~MS_RDONLY;
1148 vfsmnt = add_vfsmnt(sb, "/dev/root", "/");
1149 if (vfsmnt) {
1150 if (nfs_root_mount(sb) >= 0) {
1151 sb->s_rd_only = 0;
1152 sb->s_dirt = 0;
1153 sb->s_type = fs_type;
1154 current->fs->root = dget(sb->s_root);
1155 current->fs->pwd = dget(sb->s_root);
1156 ROOT_DEV = sb->s_dev;
1157 printk (KERN_NOTICE "VFS: Mounted root (nfs filesystem).\n");
1158 return;
1160 remove_vfsmnt(sb->s_dev);
1162 put_unnamed_dev(sb->s_dev);
1163 sb->s_dev = 0;
1165 if (!ROOT_DEV) {
1166 printk(KERN_ERR "VFS: Unable to mount root fs via NFS, trying floppy.\n");
1167 ROOT_DEV = MKDEV(FLOPPY_MAJOR, 0);
1170 #endif
1172 #ifdef CONFIG_BLK_DEV_FD
1173 if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) {
1174 floppy_eject();
1175 #ifndef CONFIG_BLK_DEV_RAM
1176 printk(KERN_NOTICE "(Warning, this kernel has no ramdisk support)\n");
1177 #endif
1178 printk(KERN_NOTICE "VFS: Insert root floppy and press ENTER\n");
1179 wait_for_keypress();
1181 #endif
1183 memset(&filp, 0, sizeof(filp));
1184 d_inode = get_empty_inode();
1185 d_inode->i_rdev = ROOT_DEV;
1186 filp.f_dentry = NULL;
1187 if ( root_mountflags & MS_RDONLY)
1188 filp.f_mode = 1; /* read only */
1189 else
1190 filp.f_mode = 3; /* read write */
1191 retval = blkdev_open(d_inode, &filp);
1192 if (retval == -EROFS) {
1193 root_mountflags |= MS_RDONLY;
1194 filp.f_mode = 1;
1195 retval = blkdev_open(d_inode, &filp);
1197 iput(d_inode);
1198 if (retval)
1200 * Allow the user to distinguish between failed open
1201 * and bad superblock on root device.
1203 printk("VFS: Cannot open root device %s\n",
1204 kdevname(ROOT_DEV));
1205 else for (fs_type = file_systems ; fs_type ; fs_type = fs_type->next) {
1206 if (!(fs_type->fs_flags & FS_REQUIRES_DEV))
1207 continue;
1208 sb = read_super(ROOT_DEV,fs_type->name,root_mountflags,NULL,1);
1209 if (sb) {
1210 sb->s_flags = root_mountflags;
1211 current->fs->root = dget(sb->s_root);
1212 current->fs->pwd = dget(sb->s_root);
1213 printk ("VFS: Mounted root (%s filesystem)%s.\n",
1214 fs_type->name,
1215 (sb->s_flags & MS_RDONLY) ? " readonly" : "");
1216 vfsmnt = add_vfsmnt(sb, "/dev/root", "/");
1217 if (vfsmnt)
1218 return;
1219 panic("VFS: add_vfsmnt failed for root fs");
1222 panic("VFS: Unable to mount root fs on %s",
1223 kdevname(ROOT_DEV));
1227 #ifdef CONFIG_BLK_DEV_INITRD
1229 extern int initmem_freed;
1231 static int __init do_change_root(kdev_t new_root_dev,const char *put_old)
1233 kdev_t old_root_dev;
1234 struct vfsmount *vfsmnt;
1235 struct dentry *old_root,*old_pwd,*dir_d = NULL;
1236 int error;
1238 old_root = current->fs->root;
1239 old_pwd = current->fs->pwd;
1240 old_root_dev = ROOT_DEV;
1241 if (!fs_may_mount(new_root_dev)) {
1242 printk(KERN_CRIT "New root is busy. Staying in initrd.\n");
1243 return -EBUSY;
1245 ROOT_DEV = new_root_dev;
1246 mount_root();
1247 dput(old_root);
1248 dput(old_pwd);
1249 #if 1
1250 shrink_dcache();
1251 printk("do_change_root: old root has d_count=%d\n", old_root->d_count);
1252 #endif
1254 * Get the new mount directory
1256 dir_d = lookup_dentry(put_old, NULL, 1);
1257 if (IS_ERR(dir_d)) {
1258 error = PTR_ERR(dir_d);
1259 } else if (!dir_d->d_inode) {
1260 dput(dir_d);
1261 error = -ENOENT;
1262 } else {
1263 error = 0;
1265 if (!error && dir_d->d_covers != dir_d) {
1266 dput(dir_d);
1267 error = -EBUSY;
1269 if (!error && !S_ISDIR(dir_d->d_inode->i_mode)) {
1270 dput(dir_d);
1271 error = -ENOTDIR;
1273 if (error) {
1274 int umount_error;
1276 printk(KERN_NOTICE "Trying to unmount old root ... ");
1277 umount_error = do_umount(old_root_dev,1, 0);
1278 if (!umount_error) {
1279 printk("okay\n");
1280 invalidate_buffers(old_root_dev);
1281 return 0;
1283 printk(KERN_ERR "error %d\n",umount_error);
1284 return error;
1286 remove_vfsmnt(old_root_dev);
1287 vfsmnt = add_vfsmnt(old_root->d_sb, "/dev/root.old", put_old);
1288 if (vfsmnt) {
1289 d_mount(dir_d,old_root);
1290 return 0;
1292 printk(KERN_CRIT "Trouble: add_vfsmnt failed\n");
1293 return -ENOMEM;
1296 int change_root(kdev_t new_root_dev,const char *put_old)
1298 if (initmem_freed) {
1299 printk (KERN_CRIT "Initmem has been already freed. Staying in initrd\n");
1300 return -EBUSY;
1302 return do_change_root(new_root_dev, put_old);
1305 #endif