4 * Copyright (C) 1991, 1992 Linus Torvalds
6 * super.c contains code to handle: - mount structures
7 * - super-block tables.
11 * Added options to /proc/mounts
12 * Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996.
14 * GK 2/5/95 - Changed to support mounting the root fs via NFS
16 * Added kerneld support: Jacques Gelinas and Bjorn Ekwall
17 * Added change_root: Werner Almesberger & Hans Lermen, Feb '96
18 * Added devfs support: Richard Gooch <rgooch@atnf.csiro.au>, 13-JAN-1998
21 #include <linux/config.h>
22 #include <linux/string.h>
23 #include <linux/malloc.h>
24 #include <linux/locks.h>
25 #include <linux/smp_lock.h>
26 #include <linux/devfs_fs_kernel.h>
28 #include <linux/init.h>
29 #include <linux/quotaops.h>
30 #include <linux/acct.h>
32 #include <asm/uaccess.h>
34 #include <linux/nfs_fs.h>
35 #include <linux/nfs_fs_sb.h>
36 #include <linux/nfs_mount.h>
38 #include <linux/kmod.h>
39 #define __NO_VERSION__
40 #include <linux/module.h>
43 * We use a semaphore to synchronize all mount/umount
44 * activity - imagine the mess if we have a race between
45 * unmounting a filesystem and re-mounting it (or something
48 static DECLARE_MUTEX(mount_sem
);
50 extern void wait_for_keypress(void);
52 extern int root_mountflags
;
54 static int do_remount_sb(struct super_block
*sb
, int flags
, char * data
);
56 /* this is initialized in init/main.c */
59 int nr_super_blocks
= 0;
60 int max_super_blocks
= NR_SUPER
;
61 LIST_HEAD(super_blocks
);
64 * Handling of filesystem drivers list.
66 * Inclusion to/removals from/scanning of list are protected by spinlock.
67 * During the unload module must call unregister_filesystem().
68 * We can access the fields of list element if:
69 * 1) spinlock is held or
70 * 2) we hold the reference to the module.
71 * The latter can be guaranteed by call of try_inc_mod_count(); if it
72 * returned 0 we must skip the element, otherwise we got the reference.
73 * Once the reference is obtained we can drop the spinlock.
76 static struct file_system_type
*file_systems
= NULL
;
77 static spinlock_t file_systems_lock
= SPIN_LOCK_UNLOCKED
;
79 static void put_filesystem(struct file_system_type
*fs
)
82 __MOD_DEC_USE_COUNT(fs
->owner
);
85 static struct file_system_type
**find_filesystem(const char *name
)
87 struct file_system_type
**p
;
88 for (p
=&file_systems
; *p
; p
=&(*p
)->next
)
89 if (strcmp((*p
)->name
,name
) == 0)
94 int register_filesystem(struct file_system_type
* fs
)
97 struct file_system_type
** p
;
103 spin_lock(&file_systems_lock
);
104 p
= find_filesystem(fs
->name
);
109 spin_unlock(&file_systems_lock
);
113 int unregister_filesystem(struct file_system_type
* fs
)
115 struct file_system_type
** tmp
;
117 spin_lock(&file_systems_lock
);
123 spin_unlock(&file_systems_lock
);
128 spin_unlock(&file_systems_lock
);
132 static int fs_index(const char * __name
)
134 struct file_system_type
* tmp
;
138 name
= getname(__name
);
144 spin_lock(&file_systems_lock
);
145 for (tmp
=file_systems
, index
=0 ; tmp
; tmp
=tmp
->next
, index
++) {
146 if (strcmp(tmp
->name
,name
) == 0) {
151 spin_unlock(&file_systems_lock
);
156 static int fs_name(unsigned int index
, char * buf
)
158 struct file_system_type
* tmp
;
161 spin_lock(&file_systems_lock
);
162 for (tmp
= file_systems
; tmp
; tmp
= tmp
->next
, index
--)
163 if (index
<= 0 && try_inc_mod_count(tmp
->owner
))
165 spin_unlock(&file_systems_lock
);
169 /* OK, we got the reference, so we can safely block */
170 len
= strlen(tmp
->name
) + 1;
171 res
= copy_to_user(buf
, tmp
->name
, len
) ? -EFAULT
: 0;
176 static int fs_maxindex(void)
178 struct file_system_type
* tmp
;
181 spin_lock(&file_systems_lock
);
182 for (tmp
= file_systems
, index
= 0 ; tmp
; tmp
= tmp
->next
, index
++)
184 spin_unlock(&file_systems_lock
);
189 * Whee.. Weird sysv syscall.
191 asmlinkage
long sys_sysfs(int option
, unsigned long arg1
, unsigned long arg2
)
193 int retval
= -EINVAL
;
198 retval
= fs_index((const char *) arg1
);
202 retval
= fs_name(arg1
, (char *) arg2
);
206 retval
= fs_maxindex();
213 int get_filesystem_list(char * buf
)
216 struct file_system_type
* tmp
;
218 spin_lock(&file_systems_lock
);
220 while (tmp
&& len
< PAGE_SIZE
- 80) {
221 len
+= sprintf(buf
+len
, "%s\t%s\n",
222 (tmp
->fs_flags
& FS_REQUIRES_DEV
) ? "" : "nodev",
226 spin_unlock(&file_systems_lock
);
230 static struct file_system_type
*get_fs_type(const char *name
)
232 struct file_system_type
*fs
;
234 spin_lock(&file_systems_lock
);
235 fs
= *(find_filesystem(name
));
236 if (fs
&& !try_inc_mod_count(fs
->owner
))
238 spin_unlock(&file_systems_lock
);
239 if (!fs
&& (request_module(name
) == 0)) {
240 spin_lock(&file_systems_lock
);
241 fs
= *(find_filesystem(name
));
242 if (fs
&& !try_inc_mod_count(fs
->owner
))
244 spin_unlock(&file_systems_lock
);
249 struct vfsmount
*vfsmntlist
= NULL
;
250 static struct vfsmount
*vfsmnttail
= NULL
, *mru_vfsmnt
= NULL
;
252 static struct vfsmount
*add_vfsmnt(struct super_block
*sb
,
253 const char *dev_name
, const char *dir_name
)
255 struct vfsmount
*lptr
;
258 lptr
= (struct vfsmount
*)kmalloc(sizeof(struct vfsmount
), GFP_KERNEL
);
261 memset(lptr
, 0, sizeof(struct vfsmount
));
264 lptr
->mnt_dev
= sb
->s_dev
;
266 /* N.B. Is it really OK to have a vfsmount without names? */
267 if (dev_name
&& !IS_ERR(tmp
= getname(dev_name
))) {
268 name
= (char *) kmalloc(strlen(tmp
)+1, GFP_KERNEL
);
271 lptr
->mnt_devname
= name
;
275 if (dir_name
&& !IS_ERR(tmp
= getname(dir_name
))) {
276 name
= (char *) kmalloc(strlen(tmp
)+1, GFP_KERNEL
);
279 lptr
->mnt_dirname
= name
;
284 if (vfsmntlist
== (struct vfsmount
*)NULL
) {
285 vfsmntlist
= vfsmnttail
= lptr
;
287 vfsmnttail
->mnt_next
= lptr
;
294 void remove_vfsmnt(kdev_t dev
)
296 struct vfsmount
*lptr
, *tofree
;
298 if (vfsmntlist
== NULL
)
301 if (lptr
->mnt_dev
== dev
) {
303 vfsmntlist
= lptr
->mnt_next
;
304 if (vfsmnttail
->mnt_dev
== dev
)
305 vfsmnttail
= vfsmntlist
;
307 while (lptr
->mnt_next
!= NULL
) {
308 if (lptr
->mnt_next
->mnt_dev
== dev
)
310 lptr
= lptr
->mnt_next
;
312 tofree
= lptr
->mnt_next
;
315 lptr
->mnt_next
= lptr
->mnt_next
->mnt_next
;
316 if (vfsmnttail
->mnt_dev
== dev
)
319 if (tofree
== mru_vfsmnt
)
321 kfree(tofree
->mnt_devname
);
322 kfree(tofree
->mnt_dirname
);
323 kfree_s(tofree
, sizeof(struct vfsmount
));
326 static struct proc_fs_info
{
330 { MS_NOEXEC
, ",noexec" },
331 { MS_NOSUID
, ",nosuid" },
332 { MS_NODEV
, ",nodev" },
333 { MS_SYNCHRONOUS
, ",sync" },
334 { MS_MANDLOCK
, ",mand" },
335 { MS_NOATIME
, ",noatime" },
336 { MS_NODIRATIME
, ",nodiratime" },
337 #ifdef MS_NOSUB /* Can't find this except in mount.c */
338 { MS_NOSUB
, ",nosub" },
343 static struct proc_nfs_info
{
347 { NFS_MOUNT_SOFT
, ",soft" },
348 { NFS_MOUNT_INTR
, ",intr" },
349 { NFS_MOUNT_POSIX
, ",posix" },
350 { NFS_MOUNT_NOCTO
, ",nocto" },
351 { NFS_MOUNT_NOAC
, ",noac" },
355 int get_filesystem_info( char *buf
)
357 struct vfsmount
*tmp
;
358 struct proc_fs_info
*fs_infop
;
359 struct proc_nfs_info
*nfs_infop
;
360 struct nfs_server
*nfss
;
362 char *path
,*buffer
= (char *) __get_free_page(GFP_KERNEL
);
364 if (!buffer
) return 0;
365 for (tmp
= vfsmntlist
; tmp
&& len
< PAGE_SIZE
- 160;
366 tmp
= tmp
->mnt_next
) {
367 path
= d_path(tmp
->mnt_sb
->s_root
, buffer
, PAGE_SIZE
);
370 len
+= sprintf( buf
+ len
, "%s %s %s %s",
371 tmp
->mnt_devname
, path
,
372 tmp
->mnt_sb
->s_type
->name
,
373 tmp
->mnt_sb
->s_flags
& MS_RDONLY
? "ro" : "rw" );
374 for (fs_infop
= fs_info
; fs_infop
->flag
; fs_infop
++) {
375 if (tmp
->mnt_sb
->s_flags
& fs_infop
->flag
) {
376 strcpy(buf
+ len
, fs_infop
->str
);
377 len
+= strlen(fs_infop
->str
);
380 if (!strcmp("nfs", tmp
->mnt_sb
->s_type
->name
)) {
381 nfss
= &tmp
->mnt_sb
->u
.nfs_sb
.s_server
;
382 if (nfss
->rsize
!= NFS_DEF_FILE_IO_BUFFER_SIZE
) {
383 len
+= sprintf(buf
+len
, ",rsize=%d",
386 if (nfss
->wsize
!= NFS_DEF_FILE_IO_BUFFER_SIZE
) {
387 len
+= sprintf(buf
+len
, ",wsize=%d",
391 if (nfss
->timeo
!= 7*HZ
/10) {
392 len
+= sprintf(buf
+len
, ",timeo=%d",
395 if (nfss
->retrans
!= 3) {
396 len
+= sprintf(buf
+len
, ",retrans=%d",
400 if (nfss
->acregmin
!= 3*HZ
) {
401 len
+= sprintf(buf
+len
, ",acregmin=%d",
404 if (nfss
->acregmax
!= 60*HZ
) {
405 len
+= sprintf(buf
+len
, ",acregmax=%d",
408 if (nfss
->acdirmin
!= 30*HZ
) {
409 len
+= sprintf(buf
+len
, ",acdirmin=%d",
412 if (nfss
->acdirmax
!= 60*HZ
) {
413 len
+= sprintf(buf
+len
, ",acdirmax=%d",
416 for (nfs_infop
= nfs_info
; nfs_infop
->flag
; nfs_infop
++) {
417 if (nfss
->flags
& nfs_infop
->flag
) {
418 strcpy(buf
+ len
, nfs_infop
->str
);
419 len
+= strlen(nfs_infop
->str
);
422 len
+= sprintf(buf
+len
, ",addr=%s",
425 len
+= sprintf( buf
+ len
, " 0 0\n" );
428 free_page((unsigned long) buffer
);
432 void __wait_on_super(struct super_block
* sb
)
434 DECLARE_WAITQUEUE(wait
, current
);
436 add_wait_queue(&sb
->s_wait
, &wait
);
438 set_current_state(TASK_UNINTERRUPTIBLE
);
443 remove_wait_queue(&sb
->s_wait
, &wait
);
444 current
->state
= TASK_RUNNING
;
448 * Note: check the dirty flag before waiting, so we don't
449 * hold up the sync while mounting a device. (The newly
450 * mounted device won't need syncing.)
452 void sync_supers(kdev_t dev
)
454 struct super_block
* sb
;
456 for (sb
= sb_entry(super_blocks
.next
);
457 sb
!= sb_entry(&super_blocks
);
458 sb
= sb_entry(sb
->s_list
.next
)) {
461 if (dev
&& sb
->s_dev
!= dev
)
465 /* N.B. Should lock the superblock while writing */
467 if (!sb
->s_dev
|| !sb
->s_dirt
)
469 if (dev
&& (dev
!= sb
->s_dev
))
471 if (sb
->s_op
&& sb
->s_op
->write_super
)
472 sb
->s_op
->write_super(sb
);
476 struct super_block
* get_super(kdev_t dev
)
478 struct super_block
* s
;
483 s
= sb_entry(super_blocks
.next
);
484 while (s
!= sb_entry(&super_blocks
))
485 if (s
->s_dev
== dev
) {
491 s
= sb_entry(s
->s_list
.next
);
495 asmlinkage
long sys_ustat(dev_t dev
, struct ustat
* ubuf
)
497 struct super_block
*s
;
503 s
= get_super(to_kdev_t(dev
));
506 err
= vfs_statfs(s
, &sbuf
);
510 memset(&tmp
,0,sizeof(struct ustat
));
511 tmp
.f_tfree
= sbuf
.f_bfree
;
512 tmp
.f_tinode
= sbuf
.f_ffree
;
514 err
= copy_to_user(ubuf
,&tmp
,sizeof(struct ustat
)) ? -EFAULT
: 0;
521 * Find a super_block with no device assigned.
523 struct super_block
*get_empty_super(void)
525 struct super_block
*s
;
527 for (s
= sb_entry(super_blocks
.next
);
528 s
!= sb_entry(&super_blocks
);
529 s
= sb_entry(s
->s_list
.next
)) {
534 printk("VFS: empty superblock %p locked!\n", s
);
536 /* Need a new one... */
537 if (nr_super_blocks
>= max_super_blocks
)
539 s
= kmalloc(sizeof(struct super_block
), GFP_USER
);
542 memset(s
, 0, sizeof(struct super_block
));
543 INIT_LIST_HEAD(&s
->s_dirty
);
544 list_add (&s
->s_list
, super_blocks
.prev
);
545 init_waitqueue_head(&s
->s_wait
);
546 INIT_LIST_HEAD(&s
->s_files
);
551 static struct super_block
* read_super(kdev_t dev
, struct block_device
*bdev
,
552 struct file_system_type
*type
, int flags
,
553 void *data
, int silent
)
555 struct super_block
* s
;
556 s
= get_empty_super();
563 sema_init(&s
->s_vfs_rename_sem
,1);
564 sema_init(&s
->s_nfsd_free_path_sem
,1);
566 sema_init(&s
->s_dquot
.dqio_sem
, 1);
567 sema_init(&s
->s_dquot
.dqoff_sem
, 1);
568 s
->s_dquot
.flags
= 0;
570 if (!type
->read_super(s
, data
, silent
))
573 /* tell bdcache that we are going to keep this one */
575 atomic_inc(&bdev
->bd_count
);
588 * Unnamed block devices are dummy devices used by virtual
589 * filesystems which don't use real block-devices. -- jrs
592 static unsigned int unnamed_dev_in_use
[256/(8*sizeof(unsigned int))] = { 0, };
594 kdev_t
get_unnamed_dev(void)
598 for (i
= 1; i
< 256; i
++) {
599 if (!test_and_set_bit(i
,unnamed_dev_in_use
))
600 return MKDEV(UNNAMED_MAJOR
, i
);
605 void put_unnamed_dev(kdev_t dev
)
607 if (!dev
|| MAJOR(dev
) != UNNAMED_MAJOR
)
609 if (test_and_clear_bit(MINOR(dev
), unnamed_dev_in_use
))
611 printk("VFS: put_unnamed_dev: freeing unused device %s\n",
615 static int d_umount(struct super_block
* sb
)
617 struct dentry
* root
= sb
->s_root
;
618 struct dentry
* covered
= root
->d_covers
;
620 if (root
->d_count
!= 1)
623 if (root
->d_inode
->i_state
)
628 if (covered
!= root
) {
629 root
->d_covers
= root
;
630 covered
->d_mounts
= covered
;
637 static void d_mount(struct dentry
*covered
, struct dentry
*dentry
)
639 if (covered
->d_mounts
!= covered
) {
640 printk("VFS: mount - already mounted\n");
643 covered
->d_mounts
= dentry
;
644 dentry
->d_covers
= covered
;
647 static struct block_device
*do_umount(kdev_t dev
, int unmount_root
, int flags
)
649 struct super_block
* sb
;
650 struct block_device
*bdev
;
655 if (!sb
|| !sb
->s_root
)
659 * Before checking whether the filesystem is still busy,
660 * make sure the kernel doesn't hold any quota files open
661 * on the device. If the umount fails, too bad -- there
662 * are no quotas running any more. Just turn them on again.
665 acct_auto_close(dev
);
668 * If we may have to abort operations to get out of this
669 * mount, and they will themselves hold resources we must
670 * allow the fs to do things. In the Unix tradition of
671 * 'Gee thats tricky lets do it in userspace' the umount_begin
672 * might fail to complete on the first run through as other tasks
673 * must return, and the like. Thats for the mount program to worry
674 * about for the moment.
677 if( (flags
&MNT_FORCE
) && sb
->s_op
->umount_begin
)
678 sb
->s_op
->umount_begin(sb
);
681 * Shrink dcache, then fsync. This guarantees that if the
682 * filesystem is quiescent at this point, then (a) only the
683 * root entry should be in use and (b) that root entry is
686 shrink_dcache_sb(sb
);
689 if (sb
== current
->fs
->root
->d_sb
&& !unmount_root
) {
691 * Special case for "unmounting" root ...
692 * we just try to remount it readonly.
695 if (!(sb
->s_flags
& MS_RDONLY
))
696 retval
= do_remount_sb(sb
, MS_RDONLY
, 0);
697 return ERR_PTR(retval
);
700 retval
= d_umount(sb
);
705 if (sb
->s_op
->write_super
&& sb
->s_dirt
)
706 sb
->s_op
->write_super(sb
);
711 if (sb
->s_op
->put_super
)
712 sb
->s_op
->put_super(sb
);
715 /* Forget any remaining inodes */
716 if (invalidate_inodes(sb
)) {
717 printk("VFS: Busy inodes after unmount. "
718 "Self-destruct in 5 seconds. Have a nice day...\n");
721 sb
->s_dev
= 0; /* Free the superblock */
724 put_filesystem(sb
->s_type
);
733 return ERR_PTR(retval
);
736 static int umount_dev(kdev_t dev
, int flags
)
739 struct block_device
*bdev
;
742 if (MAJOR(dev
) >= MAX_BLKDEV
)
749 bdev
= do_umount(dev
, 0, flags
);
751 retval
= PTR_ERR(bdev
);
755 blkdev_put(bdev
, BDEV_FS
);
758 put_unnamed_dev(dev
);
767 * Now umount can handle mount points as well as block devices.
768 * This is important for filesystems which use unnamed block devices.
770 * We now support a flag for forced unmount like the other 'big iron'
771 * unixes. Our API is identical to OSF/1 to avoid making a mess of AMD
774 asmlinkage
long sys_umount(char * name
, int flags
)
776 struct dentry
* dentry
;
779 if (!capable(CAP_SYS_ADMIN
))
783 dentry
= namei(name
);
784 retval
= PTR_ERR(dentry
);
785 if (!IS_ERR(dentry
)) {
786 struct inode
* inode
= dentry
->d_inode
;
787 kdev_t dev
= inode
->i_rdev
;
790 if (S_ISBLK(inode
->i_mode
)) {
794 struct super_block
*sb
= inode
->i_sb
;
796 if (sb
&& inode
== sb
->s_root
->d_inode
) {
804 retval
= umount_dev(dev
, flags
);
811 * The 2.0 compatible umount. No flags.
814 asmlinkage
long sys_oldumount(char * name
)
816 return sys_umount(name
,0);
820 * Check whether we can mount the specified device.
822 int fs_may_mount(kdev_t dev
)
824 struct super_block
* sb
= get_super(dev
);
827 busy
= sb
&& sb
->s_root
&&
828 (sb
->s_root
->d_count
!= 1 || sb
->s_root
->d_covers
!= sb
->s_root
);
833 * do_mount() does the actual mounting after sys_mount has done the ugly
834 * parameter parsing. When enough time has gone by, and everything uses the
835 * new mount() parameters, sys_mount() can then be cleaned up.
837 * We cannot mount a filesystem if it has active, used, or dirty inodes.
838 * We also have to flush all inode-data for this device, as the new mount
839 * might need new info.
841 * [21-Mar-97] T.Schoebel-Theuer: Now this can be overridden when
842 * supplying a leading "!" before the dir_name, allowing "stacks" of
843 * mounted filesystems. The stacking will only influence any pathname lookups
844 * _after_ the mount, but open file descriptors or working directories that
845 * are now covered remain valid. For example, when you overmount /home, any
846 * process with old cwd /home/joe will continue to use the old versions,
847 * as long as relative paths are used, but absolute paths like /home/joe/xxx
848 * will go to the new "top of stack" version. In general, crossing a
849 * mount point will always go to the top of stack element.
850 * Anyone using this new feature must know what he/she is doing.
853 int do_mount(struct block_device
*bdev
, const char *dev_name
,
854 const char *dir_name
, const char * type
, int flags
, void * data
)
857 struct dentry
* dir_d
;
858 struct super_block
* sb
;
859 struct vfsmount
*vfsmnt
;
860 struct file_system_type
*fs_type
;
864 mode_t mode
= FMODE_READ
; /* we always need it ;-) */
865 if (!(flags
& MS_RDONLY
))
867 dev
= to_kdev_t(bdev
->bd_dev
);
868 error
= blkdev_get(bdev
, mode
, 0, BDEV_FS
);
872 dev
= get_unnamed_dev();
874 return -EMFILE
; /* huh? */
878 if (!(flags
& MS_RDONLY
) && dev
&& is_read_only(dev
))
882 * Do the lookup first to force automounting.
884 dir_d
= namei(dir_name
);
885 error
= PTR_ERR(dir_d
);
891 if (!S_ISDIR(dir_d
->d_inode
->i_mode
))
895 if (dir_d
->d_covers
!= dir_d
)
901 check_disk_change(dev
);
904 /* Already mounted */
909 fs_type
= get_fs_type(type
);
911 printk("VFS: on device %s: get_fs_type(%s) failed\n",
912 kdevname(dev
), type
);
916 sb
= read_super(dev
, bdev
, fs_type
, flags
, data
, 0);
921 * We may have slept while reading the super block,
922 * so we check afterwards whether it's safe to mount.
925 if (!fs_may_mount(dev
))
929 vfsmnt
= add_vfsmnt(sb
, dev_name
, dir_name
);
931 d_mount(dget(dir_d
), sb
->s_root
);
938 /* FIXME: ->put_super() is needed here */
945 put_filesystem(fs_type
);
951 blkdev_put(bdev
, BDEV_FS
);
953 put_unnamed_dev(dev
);
959 * Alters the mount flags of a mounted file system. Only the mount point
960 * is used as a reference - file system type and the device are ignored.
963 static int do_remount_sb(struct super_block
*sb
, int flags
, char *data
)
967 if (!(flags
& MS_RDONLY
) && sb
->s_dev
&& is_read_only(sb
->s_dev
))
969 /*flags |= MS_RDONLY;*/
970 /* If we are remounting RDONLY, make sure there are no rw files open */
971 if ((flags
& MS_RDONLY
) && !(sb
->s_flags
& MS_RDONLY
))
972 if (!fs_may_remount_ro(sb
))
974 if (sb
->s_op
&& sb
->s_op
->remount_fs
) {
976 retval
= sb
->s_op
->remount_fs(sb
, &flags
, data
);
981 sb
->s_flags
= (sb
->s_flags
& ~MS_RMT_MASK
) | (flags
& MS_RMT_MASK
);
984 * Invalidate the inodes, as some mount options may be changed.
985 * N.B. If we are changing media, we should check the return
986 * from invalidate_inodes ... can't allow _any_ open files.
988 invalidate_inodes(sb
);
993 static int do_remount(const char *dir
,int flags
,char *data
)
995 struct dentry
*dentry
;
999 retval
= PTR_ERR(dentry
);
1000 if (!IS_ERR(dentry
)) {
1001 struct super_block
* sb
= dentry
->d_inode
->i_sb
;
1006 if (dentry
== sb
->s_root
) {
1008 * Shrink the dcache and sync the device.
1010 shrink_dcache_sb(sb
);
1011 fsync_dev(sb
->s_dev
);
1012 if (flags
& MS_RDONLY
)
1013 acct_auto_close(sb
->s_dev
);
1014 retval
= do_remount_sb(sb
, flags
, data
);
1022 static int copy_mount_options (const void * data
, unsigned long *where
)
1026 struct vm_area_struct
* vma
;
1032 vma
= find_vma(current
->mm
, (unsigned long) data
);
1033 if (!vma
|| (unsigned long) data
< vma
->vm_start
)
1035 if (!(vma
->vm_flags
& VM_READ
))
1037 i
= vma
->vm_end
- (unsigned long) data
;
1038 if (PAGE_SIZE
<= (unsigned long) i
)
1040 if (!(page
= __get_free_page(GFP_KERNEL
))) {
1043 if (copy_from_user((void *) page
,data
,i
)) {
1052 * Flags is a 16-bit value that allows up to 16 non-fs dependent flags to
1053 * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
1055 * data is a (void *) that can point to any structure up to
1056 * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
1057 * information (or be NULL).
1059 * NOTE! As old versions of mount() didn't use this setup, the flags
1060 * have to have a special 16-bit magic number in the high word:
1061 * 0xC0ED. If this magic word isn't present, the flags and data info
1062 * aren't used, as the syscall assumes we are talking to an older
1063 * version that didn't understand them.
1065 long do_sys_mount(char * dev_name
, char * dir_name
, unsigned long type_page
,
1066 unsigned long new_flags
, unsigned long data_page
)
1068 struct file_system_type
* fstype
;
1069 struct dentry
* dentry
= NULL
;
1070 struct inode
* inode
= NULL
;
1071 struct block_device
*bdev
= NULL
;
1073 unsigned long flags
= 0;
1075 if (!capable(CAP_SYS_ADMIN
))
1079 (MS_MGC_MSK
| MS_REMOUNT
)) == (MS_MGC_VAL
| MS_REMOUNT
)) {
1080 retval
= do_remount(dir_name
,
1081 new_flags
& ~MS_MGC_MSK
& ~MS_REMOUNT
,
1082 (char *) data_page
);
1086 fstype
= get_fs_type((char *) type_page
);
1091 if (fstype
->fs_flags
& FS_REQUIRES_DEV
) {
1092 struct block_device_operations
*bdops
;
1094 dentry
= namei(dev_name
);
1095 retval
= PTR_ERR(dentry
);
1099 inode
= dentry
->d_inode
;
1101 if (!S_ISBLK(inode
->i_mode
))
1105 if (IS_NODEV(inode
))
1108 bdev
= inode
->i_bdev
;
1109 bdops
= devfs_get_ops ( devfs_get_handle_from_inode (inode
) );
1110 if (bdops
) bdev
->bd_op
= bdops
;
1113 if ((new_flags
& MS_MGC_MSK
) == MS_MGC_VAL
)
1114 flags
= new_flags
& ~MS_MGC_MSK
;
1116 retval
= do_mount(bdev
, dev_name
, dir_name
, fstype
->name
, flags
,
1117 (void *) data_page
);
1122 put_filesystem(fstype
);
1127 asmlinkage
long sys_mount(char * dev_name
, char * dir_name
, char * type
,
1128 unsigned long new_flags
, void * data
)
1131 unsigned long data_page
= 0;
1132 unsigned long type_page
= 0;
1135 retval
= copy_mount_options (type
, &type_page
);
1139 /* copy_mount_options allows a NULL user pointer,
1140 * and just returns zero in that case. But if we
1141 * allow the type to be NULL we will crash.
1142 * Previously we did not check this case.
1144 if (type_page
== 0) {
1149 retval
= copy_mount_options (data
, &data_page
);
1151 retval
= do_sys_mount(dev_name
, dir_name
, type_page
,
1152 new_flags
, data_page
);
1153 free_page(data_page
);
1155 free_page(type_page
);
1161 void __init
mount_root(void)
1163 struct file_system_type
* fs_type
;
1164 struct super_block
* sb
;
1165 struct vfsmount
*vfsmnt
;
1166 struct block_device
*bdev
= NULL
;
1171 int path_start
= -1;
1173 #ifdef CONFIG_ROOT_NFS
1174 if (MAJOR(ROOT_DEV
) == UNNAMED_MAJOR
) {
1176 if ((fs_type
= get_fs_type("nfs"))) {
1177 sb
= get_empty_super(); /* "can't fail" */
1178 sb
->s_dev
= get_unnamed_dev();
1180 sb
->s_flags
= root_mountflags
;
1181 sema_init(&sb
->s_vfs_rename_sem
,1);
1182 sema_init(&sb
->s_nfsd_free_path_sem
,1);
1183 vfsmnt
= add_vfsmnt(sb
, "/dev/root", "/");
1185 if (nfs_root_mount(sb
) >= 0) {
1187 sb
->s_type
= fs_type
;
1188 current
->fs
->root
= dget(sb
->s_root
);
1189 current
->fs
->pwd
= dget(sb
->s_root
);
1190 ROOT_DEV
= sb
->s_dev
;
1191 printk (KERN_NOTICE
"VFS: Mounted root (NFS filesystem)%s.\n", (sb
->s_flags
& MS_RDONLY
) ? " readonly" : "");
1194 remove_vfsmnt(sb
->s_dev
);
1196 put_unnamed_dev(sb
->s_dev
);
1198 put_filesystem(fs_type
);
1201 printk(KERN_ERR
"VFS: Unable to mount root fs via NFS, trying floppy.\n");
1202 ROOT_DEV
= MKDEV(FLOPPY_MAJOR
, 0);
1207 #ifdef CONFIG_BLK_DEV_FD
1208 if (MAJOR(ROOT_DEV
) == FLOPPY_MAJOR
) {
1209 #ifdef CONFIG_BLK_DEV_RAM
1210 extern int rd_doload
;
1211 extern void rd_load_secondary(void);
1214 #ifndef CONFIG_BLK_DEV_RAM
1215 printk(KERN_NOTICE
"(Warning, this kernel has no ramdisk support)\n");
1217 /* rd_doload is 2 for a dual initrd/ramload setup */
1219 rd_load_secondary();
1223 printk(KERN_NOTICE
"VFS: Insert root floppy and press ENTER\n");
1224 wait_for_keypress();
1229 devfs_make_root (root_device_name
);
1230 handle
= devfs_find_handle (NULL
, ROOT_DEVICE_NAME
, 0,
1231 MAJOR (ROOT_DEV
), MINOR (ROOT_DEV
),
1232 DEVFS_SPECIAL_BLK
, 1);
1233 if (handle
) /* Sigh: bd*() functions only paper over the cracks */
1235 unsigned major
, minor
;
1237 devfs_get_maj_min (handle
, &major
, &minor
);
1238 ROOT_DEV
= MKDEV (major
, minor
);
1242 * Probably pure paranoia, but I'm less than happy about delving into
1243 * devfs crap and checking it right now. Later.
1246 panic("I have no root and I want to scream");
1248 bdev
= bdget(kdev_t_to_nr(ROOT_DEV
));
1250 panic(__FUNCTION__
": unable to allocate root device");
1251 bdev
->bd_op
= devfs_get_ops (handle
);
1252 path_start
= devfs_generate_path (handle
, path
+ 5, sizeof (path
) - 5);
1254 if (!(root_mountflags
& MS_RDONLY
))
1255 mode
|= FMODE_WRITE
;
1256 retval
= blkdev_get(bdev
, mode
, 0, BDEV_FS
);
1257 if (retval
== -EROFS
) {
1258 root_mountflags
|= MS_RDONLY
;
1259 retval
= blkdev_get(bdev
, FMODE_READ
, 0, BDEV_FS
);
1263 * Allow the user to distinguish between failed open
1264 * and bad superblock on root device.
1266 printk ("VFS: Cannot open root device \"%s\" or %s\n",
1267 root_device_name
, kdevname (ROOT_DEV
));
1268 printk ("Please append a correct \"root=\" boot option\n");
1269 panic("VFS: Unable to mount root fs on %s",
1270 kdevname(ROOT_DEV
));
1273 check_disk_change(ROOT_DEV
);
1275 spin_lock(&file_systems_lock
);
1276 for (fs_type
= file_systems
; fs_type
; fs_type
= fs_type
->next
) {
1277 if (!(fs_type
->fs_flags
& FS_REQUIRES_DEV
))
1279 if (!try_inc_mod_count(fs_type
->owner
))
1281 spin_unlock(&file_systems_lock
);
1282 sb
= get_super(ROOT_DEV
);
1284 /* Shouldn't we fail here? Oh, well... */
1288 sb
= read_super(ROOT_DEV
,bdev
,fs_type
,root_mountflags
,NULL
,1);
1291 spin_lock(&file_systems_lock
);
1292 put_filesystem(fs_type
);
1294 spin_unlock(&file_systems_lock
);
1295 panic("VFS: Unable to mount root fs on %s",
1296 kdevname(ROOT_DEV
));
1299 sb
->s_flags
= root_mountflags
;
1300 current
->fs
->root
= dget(sb
->s_root
);
1301 current
->fs
->pwd
= dget(sb
->s_root
);
1302 printk ("VFS: Mounted root (%s filesystem)%s.\n",
1304 (sb
->s_flags
& MS_RDONLY
) ? " readonly" : "");
1305 if (path_start
>= 0) {
1306 devfs_mk_symlink (NULL
,
1307 "root", 0, DEVFS_FL_DEFAULT
,
1308 path
+ 5 + path_start
, 0,
1310 memcpy (path
+ path_start
, "/dev/", 5);
1311 vfsmnt
= add_vfsmnt (sb
, path
+ path_start
,
1314 else vfsmnt
= add_vfsmnt (sb
, "/dev/root", "/");
1316 bdput(bdev
); /* sb holds a reference */
1319 panic("VFS: add_vfsmnt failed for root fs");
1323 static void chroot_fs_refs(struct dentry
*old_root
,
1324 struct dentry
*new_root
)
1326 struct task_struct
*p
;
1328 read_lock(&tasklist_lock
);
1330 if (!p
->fs
) continue;
1331 if (p
->fs
->root
== old_root
) {
1333 p
->fs
->root
= dget(new_root
);
1334 printk(KERN_DEBUG
"chroot_fs_refs: changed root of "
1335 "process %d\n",p
->pid
);
1337 if (p
->fs
->pwd
== old_root
) {
1339 p
->fs
->pwd
= dget(new_root
);
1340 printk(KERN_DEBUG
"chroot_fs_refs: changed cwd of "
1341 "process %d\n",p
->pid
);
1344 read_unlock(&tasklist_lock
);
1349 * Moves the current root to put_root, and sets root/cwd of all processes
1350 * which had them on the old root to new_root.
1353 * - we don't move root/cwd if they are not at the root (reason: if something
1354 * cared enough to change them, it's probably wrong to force them elsewhere)
1355 * - it's okay to pick a root that isn't the root of a file system, e.g.
1356 * /nfs/my_root where /nfs is the mount point. Better avoid creating
1357 * unreachable mount points this way, though.
1360 asmlinkage
long sys_pivot_root(const char *new_root
, const char *put_old
)
1362 struct dentry
*root
= current
->fs
->root
;
1363 struct dentry
*d_new_root
, *d_put_old
, *covered
;
1364 struct dentry
*root_dev_root
, *new_root_dev_root
;
1365 struct dentry
*walk
, *next
;
1368 if (!capable(CAP_SYS_ADMIN
))
1372 d_new_root
= namei(new_root
);
1373 if (IS_ERR(d_new_root
)) {
1374 error
= PTR_ERR(d_new_root
);
1377 d_put_old
= namei(put_old
);
1378 if (IS_ERR(d_put_old
)) {
1379 error
= PTR_ERR(d_put_old
);
1383 if (!d_new_root
->d_inode
|| !d_put_old
->d_inode
) {
1387 if (!S_ISDIR(d_new_root
->d_inode
->i_mode
) ||
1388 !S_ISDIR(d_put_old
->d_inode
->i_mode
)) {
1393 if (d_new_root
->d_sb
== root
->d_sb
|| d_put_old
->d_sb
== root
->d_sb
)
1394 goto out2
; /* loop */
1395 if (d_put_old
!= d_put_old
->d_covers
)
1396 goto out2
; /* mount point is busy */
1398 walk
= d_put_old
; /* make sure we can reach put_old from new_root */
1400 next
= walk
->d_covers
->d_parent
;
1403 if (next
== d_new_root
)
1408 new_root_dev_root
= d_new_root
->d_sb
->s_root
;
1409 covered
= new_root_dev_root
->d_covers
;
1410 new_root_dev_root
->d_covers
= new_root_dev_root
;
1412 covered
->d_mounts
= covered
;
1414 root_dev_root
= root
->d_sb
->s_root
;
1415 root_dev_root
->d_covers
= dget(d_put_old
);
1416 d_put_old
->d_mounts
= root_dev_root
;
1417 chroot_fs_refs(root
,d_new_root
);
1430 #ifdef CONFIG_BLK_DEV_INITRD
1432 int __init
change_root(kdev_t new_root_dev
,const char *put_old
)
1434 kdev_t old_root_dev
;
1435 struct vfsmount
*vfsmnt
;
1436 struct dentry
*old_root
,*old_pwd
,*dir_d
= NULL
;
1439 old_root
= current
->fs
->root
;
1440 old_pwd
= current
->fs
->pwd
;
1441 old_root_dev
= ROOT_DEV
;
1442 if (!fs_may_mount(new_root_dev
)) {
1443 printk(KERN_CRIT
"New root is busy. Staying in initrd.\n");
1446 /* First unmount devfs if mounted */
1447 dir_d
= lookup_dentry ("/dev", NULL
, 1);
1448 if (!IS_ERR(dir_d
)) {
1449 struct super_block
*sb
= dir_d
->d_inode
->i_sb
;
1451 if (sb
&& (dir_d
->d_inode
== sb
->s_root
->d_inode
) &&
1452 (sb
->s_magic
== DEVFS_SUPER_MAGIC
)) {
1454 do_umount (sb
->s_dev
, 0, 0);
1458 ROOT_DEV
= new_root_dev
;
1464 printk("change_root: old root has d_count=%d\n", old_root
->d_count
);
1468 * Get the new mount directory
1470 dir_d
= lookup_dentry(put_old
, NULL
, 1);
1471 if (IS_ERR(dir_d
)) {
1472 error
= PTR_ERR(dir_d
);
1473 } else if (!dir_d
->d_inode
) {
1479 if (!error
&& dir_d
->d_covers
!= dir_d
) {
1483 if (!error
&& !S_ISDIR(dir_d
->d_inode
->i_mode
)) {
1488 struct block_device
*bdev
;
1490 printk(KERN_NOTICE
"Trying to unmount old root ... ");
1491 bdev
= do_umount(old_root_dev
,1, 0);
1492 if (!IS_ERR(bdev
)) {
1494 /* special: the old device driver is going to be
1495 a ramdisk and the point of this call is to free its
1496 protected memory (even if dirty). */
1497 destroy_buffers(old_root_dev
);
1499 blkdev_put(bdev
, BDEV_FS
);
1504 printk(KERN_ERR
"error %ld\n",PTR_ERR(bdev
));
1507 remove_vfsmnt(old_root_dev
);
1508 vfsmnt
= add_vfsmnt(old_root
->d_sb
, "/dev/root.old", put_old
);
1510 d_mount(dir_d
,old_root
);
1513 printk(KERN_CRIT
"Trouble: add_vfsmnt failed\n");