Import 2.3.99pre7-4
[davej-history.git] / fs / super.c
blob939ca9f368886e4a6d015bef2a47a8105aa8e9f7
1 /*
2 * linux/fs/super.c
4 * Copyright (C) 1991, 1992 Linus Torvalds
6 * super.c contains code to handle: - mount structures
7 * - super-block tables
8 * - filesystem drivers list
9 * - mount system call
10 * - umount system call
11 * - ustat system call
13 * Added options to /proc/mounts
14 * Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996.
16 * GK 2/5/95 - Changed to support mounting the root fs via NFS
18 * Added kerneld support: Jacques Gelinas and Bjorn Ekwall
19 * Added change_root: Werner Almesberger & Hans Lermen, Feb '96
20 * Added devfs support: Richard Gooch <rgooch@atnf.csiro.au>, 13-JAN-1998
23 #include <linux/config.h>
24 #include <linux/string.h>
25 #include <linux/malloc.h>
26 #include <linux/locks.h>
27 #include <linux/smp_lock.h>
28 #include <linux/devfs_fs_kernel.h>
29 #include <linux/fd.h>
30 #include <linux/init.h>
31 #include <linux/quotaops.h>
32 #include <linux/acct.h>
34 #include <asm/uaccess.h>
36 #include <linux/nfs_fs.h>
37 #include <linux/nfs_fs_sb.h>
38 #include <linux/nfs_mount.h>
40 #include <linux/kmod.h>
41 #define __NO_VERSION__
42 #include <linux/module.h>
45 * We use a semaphore to synchronize all mount/umount
46 * activity - imagine the mess if we have a race between
47 * unmounting a filesystem and re-mounting it (or something
48 * else).
50 static DECLARE_MUTEX(mount_sem);
52 extern void wait_for_keypress(void);
54 extern int root_mountflags;
56 static int do_remount_sb(struct super_block *sb, int flags, char * data);
58 /* this is initialized in init/main.c */
59 kdev_t ROOT_DEV;
61 int nr_super_blocks = 0;
62 int max_super_blocks = NR_SUPER;
63 LIST_HEAD(super_blocks);
66 * Handling of filesystem drivers list.
67 * Rules:
68 * Inclusion to/removals from/scanning of list are protected by spinlock.
69 * During the unload module must call unregister_filesystem().
70 * We can access the fields of list element if:
71 * 1) spinlock is held or
72 * 2) we hold the reference to the module.
73 * The latter can be guaranteed by call of try_inc_mod_count(); if it
74 * returned 0 we must skip the element, otherwise we got the reference.
75 * Once the reference is obtained we can drop the spinlock.
78 static struct file_system_type *file_systems = NULL;
79 static rwlock_t file_systems_lock = RW_LOCK_UNLOCKED;
81 /* WARNING: This can be used only if we _already_ own a reference */
82 static void get_filesystem(struct file_system_type *fs)
84 if (fs->owner)
85 __MOD_INC_USE_COUNT(fs->owner);
88 static void put_filesystem(struct file_system_type *fs)
90 if (fs->owner)
91 __MOD_DEC_USE_COUNT(fs->owner);
94 static struct file_system_type **find_filesystem(const char *name)
96 struct file_system_type **p;
97 for (p=&file_systems; *p; p=&(*p)->next)
98 if (strcmp((*p)->name,name) == 0)
99 break;
100 return p;
104 * register_filesystem - register a new filesystem
105 * @fs: the file system structure
107 * Adds the file system passed to the list of file systems the kernel
108 * is aware of for mount and other syscalls. Returns 0 on success,
109 * or a negative errno code on an error.
111 * The &struct file_system_type that is passed is linked into the kernel
112 * structures and must not be freed until the file system has been
113 * unregistered.
116 int register_filesystem(struct file_system_type * fs)
118 int res = 0;
119 struct file_system_type ** p;
121 if (!fs)
122 return -EINVAL;
123 if (fs->next)
124 return -EBUSY;
125 write_lock(&file_systems_lock);
126 p = find_filesystem(fs->name);
127 if (*p)
128 res = -EBUSY;
129 else
130 *p = fs;
131 write_unlock(&file_systems_lock);
132 return res;
136 * unregister_filesystem - unregister a file system
137 * @fs: filesystem to unregister
139 * Remove a file system that was previously successfully registered
140 * with the kernel. An error is returned if the file system is not found.
141 * Zero is returned on a success.
143 * Once this function has returned the &struct file_system_type structure
144 * may be freed or reused.
147 int unregister_filesystem(struct file_system_type * fs)
149 struct file_system_type ** tmp;
151 write_lock(&file_systems_lock);
152 tmp = &file_systems;
153 while (*tmp) {
154 if (fs == *tmp) {
155 *tmp = fs->next;
156 fs->next = NULL;
157 write_unlock(&file_systems_lock);
158 return 0;
160 tmp = &(*tmp)->next;
162 write_unlock(&file_systems_lock);
163 return -EINVAL;
166 static int fs_index(const char * __name)
168 struct file_system_type * tmp;
169 char * name;
170 int err, index;
172 name = getname(__name);
173 err = PTR_ERR(name);
174 if (IS_ERR(name))
175 return err;
177 err = -EINVAL;
178 read_lock(&file_systems_lock);
179 for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next, index++) {
180 if (strcmp(tmp->name,name) == 0) {
181 err = index;
182 break;
185 read_unlock(&file_systems_lock);
186 putname(name);
187 return err;
190 static int fs_name(unsigned int index, char * buf)
192 struct file_system_type * tmp;
193 int len, res;
195 read_lock(&file_systems_lock);
196 for (tmp = file_systems; tmp; tmp = tmp->next, index--)
197 if (index <= 0 && try_inc_mod_count(tmp->owner))
198 break;
199 read_unlock(&file_systems_lock);
200 if (!tmp)
201 return -EINVAL;
203 /* OK, we got the reference, so we can safely block */
204 len = strlen(tmp->name) + 1;
205 res = copy_to_user(buf, tmp->name, len) ? -EFAULT : 0;
206 put_filesystem(tmp);
207 return res;
210 static int fs_maxindex(void)
212 struct file_system_type * tmp;
213 int index;
215 read_lock(&file_systems_lock);
216 for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next, index++)
218 read_unlock(&file_systems_lock);
219 return index;
223 * Whee.. Weird sysv syscall.
225 asmlinkage long sys_sysfs(int option, unsigned long arg1, unsigned long arg2)
227 int retval = -EINVAL;
229 switch (option) {
230 case 1:
231 retval = fs_index((const char *) arg1);
232 break;
234 case 2:
235 retval = fs_name(arg1, (char *) arg2);
236 break;
238 case 3:
239 retval = fs_maxindex();
240 break;
242 return retval;
245 int get_filesystem_list(char * buf)
247 int len = 0;
248 struct file_system_type * tmp;
250 read_lock(&file_systems_lock);
251 tmp = file_systems;
252 while (tmp && len < PAGE_SIZE - 80) {
253 len += sprintf(buf+len, "%s\t%s\n",
254 (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
255 tmp->name);
256 tmp = tmp->next;
258 read_unlock(&file_systems_lock);
259 return len;
262 static struct file_system_type *get_fs_type(const char *name)
264 struct file_system_type *fs;
266 read_lock(&file_systems_lock);
267 fs = *(find_filesystem(name));
268 if (fs && !try_inc_mod_count(fs->owner))
269 fs = NULL;
270 read_unlock(&file_systems_lock);
271 if (!fs && (request_module(name) == 0)) {
272 read_lock(&file_systems_lock);
273 fs = *(find_filesystem(name));
274 if (fs && !try_inc_mod_count(fs->owner))
275 fs = NULL;
276 read_unlock(&file_systems_lock);
278 return fs;
281 static LIST_HEAD(vfsmntlist);
283 static struct vfsmount *add_vfsmnt(struct super_block *sb,
284 struct dentry *mountpoint,
285 struct dentry *root,
286 struct vfsmount *parent,
287 const char *dev_name,
288 const char *dir_name)
290 struct vfsmount *mnt;
291 char *name;
293 mnt = kmalloc(sizeof(struct vfsmount), GFP_KERNEL);
294 if (!mnt)
295 goto out;
296 memset(mnt, 0, sizeof(struct vfsmount));
298 atomic_set(&mnt->mnt_count,1);
299 mnt->mnt_sb = sb;
300 mnt->mnt_dev = sb->s_dev;
301 mnt->mnt_mountpoint = dget(mountpoint);
302 mnt->mnt_root = dget(root);
303 mnt->mnt_parent = parent ? mntget(parent) : mnt;
305 /* N.B. Is it really OK to have a vfsmount without names? */
306 if (dev_name) {
307 name = kmalloc(strlen(dev_name)+1, GFP_KERNEL);
308 if (name) {
309 strcpy(name, dev_name);
310 mnt->mnt_devname = name;
313 name = kmalloc(strlen(dir_name)+1, GFP_KERNEL);
314 if (name) {
315 strcpy(name, dir_name);
316 mnt->mnt_dirname = name;
319 list_add(&mnt->mnt_instances, &sb->s_mounts);
320 list_add(&mnt->mnt_clash, &mountpoint->d_vfsmnt);
321 list_add(&mnt->mnt_list, vfsmntlist.prev);
322 mountpoint->d_mounts = root;
323 root->d_covers = mountpoint;
324 out:
325 return mnt;
328 static void move_vfsmnt(struct vfsmount *mnt,
329 struct dentry *mountpoint,
330 struct vfsmount *parent,
331 const char *dev_name,
332 const char *dir_name)
334 struct dentry *old_mountpoint = mnt->mnt_mountpoint;
335 struct vfsmount *old_parent = mnt->mnt_parent;
336 char *new_devname = NULL, *new_dirname = NULL;
338 if (dev_name) {
339 new_devname = kmalloc(strlen(dev_name)+1, GFP_KERNEL);
340 if (new_devname)
341 strcpy(new_devname, dev_name);
343 if (dir_name) {
344 new_dirname = kmalloc(strlen(dir_name)+1, GFP_KERNEL);
345 if (new_dirname)
346 strcpy(new_dirname, dir_name);
349 /* flip names */
350 if (new_dirname) {
351 kfree(mnt->mnt_dirname);
352 mnt->mnt_dirname = new_dirname;
354 if (new_devname) {
355 kfree(mnt->mnt_devname);
356 mnt->mnt_devname = new_devname;
359 /* flip the linkage */
360 mnt->mnt_mountpoint = dget(mountpoint);
361 mnt->mnt_parent = parent ? mntget(parent) : mnt;
362 list_del(&mnt->mnt_clash);
363 list_add(&mnt->mnt_clash, &mountpoint->d_vfsmnt);
365 /* put the old stuff */
366 old_mountpoint->d_mounts = old_mountpoint;
367 mountpoint->d_mounts = mnt->mnt_sb->s_root;
368 mnt->mnt_sb->s_root->d_covers = mountpoint;
369 dput(old_mountpoint);
370 if (old_parent != mnt)
371 mntput(old_parent);
374 static void remove_vfsmnt(struct vfsmount *mnt)
376 struct dentry * root = mnt->mnt_sb->s_root;
377 struct dentry * covered = mnt->mnt_mountpoint;
378 /* First of all, remove it from all lists */
379 list_del(&mnt->mnt_instances);
380 list_del(&mnt->mnt_clash);
381 list_del(&mnt->mnt_list);
382 /* Now we can work safely */
383 if (mnt->mnt_parent != mnt)
384 mntput(mnt->mnt_parent);
386 root->d_covers = root;
387 covered->d_mounts = covered;
389 dput(mnt->mnt_mountpoint);
390 dput(mnt->mnt_root);
391 kfree(mnt->mnt_devname);
392 kfree(mnt->mnt_dirname);
393 kfree(mnt);
396 static struct proc_fs_info {
397 int flag;
398 char *str;
399 } fs_info[] = {
400 { MS_NOEXEC, ",noexec" },
401 { MS_NOSUID, ",nosuid" },
402 { MS_NODEV, ",nodev" },
403 { MS_SYNCHRONOUS, ",sync" },
404 { MS_MANDLOCK, ",mand" },
405 { MS_NOATIME, ",noatime" },
406 { MS_NODIRATIME, ",nodiratime" },
407 #ifdef MS_NOSUB /* Can't find this except in mount.c */
408 { MS_NOSUB, ",nosub" },
409 #endif
410 { 0, NULL }
413 static struct proc_nfs_info {
414 int flag;
415 char *str;
416 char *nostr;
417 } nfs_info[] = {
418 { NFS_MOUNT_SOFT, ",soft", ",hard" },
419 { NFS_MOUNT_INTR, ",intr", "" },
420 { NFS_MOUNT_POSIX, ",posix", "" },
421 { NFS_MOUNT_TCP, ",tcp", ",udp" },
422 { NFS_MOUNT_NOCTO, ",nocto", "" },
423 { NFS_MOUNT_NOAC, ",noac", "" },
424 { NFS_MOUNT_NONLM, ",nolock", ",lock" },
425 { 0, NULL, NULL }
428 int get_filesystem_info( char *buf )
430 struct list_head *p;
431 struct proc_fs_info *fs_infop;
432 struct proc_nfs_info *nfs_infop;
433 struct nfs_server *nfss;
434 int len = 0;
435 char *path,*buffer = (char *) __get_free_page(GFP_KERNEL);
437 if (!buffer) return 0;
438 for (p = vfsmntlist.next; p!=&vfsmntlist && len < PAGE_SIZE - 160;
439 p = p->next) {
440 struct vfsmount *tmp = list_entry(p, struct vfsmount, mnt_list);
441 path = d_path(tmp->mnt_root, tmp, buffer, PAGE_SIZE);
442 if (!path)
443 continue;
444 len += sprintf( buf + len, "%s %s %s %s",
445 tmp->mnt_devname, path,
446 tmp->mnt_sb->s_type->name,
447 tmp->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw" );
448 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
449 if (tmp->mnt_sb->s_flags & fs_infop->flag) {
450 strcpy(buf + len, fs_infop->str);
451 len += strlen(fs_infop->str);
454 if (!strcmp("nfs", tmp->mnt_sb->s_type->name)) {
455 nfss = &tmp->mnt_sb->u.nfs_sb.s_server;
456 len += sprintf(buf+len, ",v%d", nfss->rpc_ops->version);
458 len += sprintf(buf+len, ",rsize=%d", nfss->rsize);
460 len += sprintf(buf+len, ",wsize=%d", nfss->wsize);
461 #if 0
462 if (nfss->timeo != 7*HZ/10) {
463 len += sprintf(buf+len, ",timeo=%d",
464 nfss->timeo*10/HZ);
466 if (nfss->retrans != 3) {
467 len += sprintf(buf+len, ",retrans=%d",
468 nfss->retrans);
470 #endif
471 if (nfss->acregmin != 3*HZ) {
472 len += sprintf(buf+len, ",acregmin=%d",
473 nfss->acregmin/HZ);
475 if (nfss->acregmax != 60*HZ) {
476 len += sprintf(buf+len, ",acregmax=%d",
477 nfss->acregmax/HZ);
479 if (nfss->acdirmin != 30*HZ) {
480 len += sprintf(buf+len, ",acdirmin=%d",
481 nfss->acdirmin/HZ);
483 if (nfss->acdirmax != 60*HZ) {
484 len += sprintf(buf+len, ",acdirmax=%d",
485 nfss->acdirmax/HZ);
487 for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
488 char *str;
489 if (nfss->flags & nfs_infop->flag)
490 str = nfs_infop->str;
491 else
492 str = nfs_infop->nostr;
493 strcpy(buf + len, str);
494 len += strlen(str);
496 len += sprintf(buf+len, ",addr=%s",
497 nfss->hostname);
499 len += sprintf( buf + len, " 0 0\n" );
502 free_page((unsigned long) buffer);
503 return len;
507 * __wait_on_super - wait on a superblock
508 * @sb: superblock to wait on
510 * Waits for a superblock to become unlocked and then returns. It does
511 * not take the lock. This is an internal function. See wait_on_super().
514 void __wait_on_super(struct super_block * sb)
516 DECLARE_WAITQUEUE(wait, current);
518 add_wait_queue(&sb->s_wait, &wait);
519 repeat:
520 set_current_state(TASK_UNINTERRUPTIBLE);
521 if (sb->s_lock) {
522 schedule();
523 goto repeat;
525 remove_wait_queue(&sb->s_wait, &wait);
526 current->state = TASK_RUNNING;
530 * Note: check the dirty flag before waiting, so we don't
531 * hold up the sync while mounting a device. (The newly
532 * mounted device won't need syncing.)
534 void sync_supers(kdev_t dev)
536 struct super_block * sb;
538 for (sb = sb_entry(super_blocks.next);
539 sb != sb_entry(&super_blocks);
540 sb = sb_entry(sb->s_list.next)) {
541 if (!sb->s_dev)
542 continue;
543 if (dev && sb->s_dev != dev)
544 continue;
545 if (!sb->s_dirt)
546 continue;
547 lock_super(sb);
548 if (sb->s_dev && sb->s_dirt && (!dev || dev == sb->s_dev))
549 if (sb->s_op && sb->s_op->write_super)
550 sb->s_op->write_super(sb);
551 unlock_super(sb);
556 * get_super - get the superblock of a device
557 * @dev: device to get the superblock for
559 * Scans the superblock list and finds the superblock of the file system
560 * mounted on the device given. %NULL is returned if no match is found.
563 struct super_block * get_super(kdev_t dev)
565 struct super_block * s;
567 if (!dev)
568 return NULL;
569 restart:
570 s = sb_entry(super_blocks.next);
571 while (s != sb_entry(&super_blocks))
572 if (s->s_dev == dev) {
573 wait_on_super(s);
574 if (s->s_dev == dev)
575 return s;
576 goto restart;
577 } else
578 s = sb_entry(s->s_list.next);
579 return NULL;
582 asmlinkage long sys_ustat(dev_t dev, struct ustat * ubuf)
584 struct super_block *s;
585 struct ustat tmp;
586 struct statfs sbuf;
587 int err = -EINVAL;
589 lock_kernel();
590 s = get_super(to_kdev_t(dev));
591 if (s == NULL)
592 goto out;
593 err = vfs_statfs(s, &sbuf);
594 if (err)
595 goto out;
597 memset(&tmp,0,sizeof(struct ustat));
598 tmp.f_tfree = sbuf.f_bfree;
599 tmp.f_tinode = sbuf.f_ffree;
601 err = copy_to_user(ubuf,&tmp,sizeof(struct ustat)) ? -EFAULT : 0;
602 out:
603 unlock_kernel();
604 return err;
608 * get_empty_super - find empty superblocks
610 * Find a superblock with no device assigned. A free superblock is
611 * found and returned. If neccessary new superblocks are allocated.
612 * %NULL is returned if there are insufficient resources to complete
613 * the request.
616 struct super_block *get_empty_super(void)
618 struct super_block *s;
620 for (s = sb_entry(super_blocks.next);
621 s != sb_entry(&super_blocks);
622 s = sb_entry(s->s_list.next)) {
623 if (s->s_dev)
624 continue;
625 if (!s->s_lock)
626 return s;
627 printk("VFS: empty superblock %p locked!\n", s);
629 /* Need a new one... */
630 if (nr_super_blocks >= max_super_blocks)
631 return NULL;
632 s = kmalloc(sizeof(struct super_block), GFP_USER);
633 if (s) {
634 nr_super_blocks++;
635 memset(s, 0, sizeof(struct super_block));
636 INIT_LIST_HEAD(&s->s_dirty);
637 list_add (&s->s_list, super_blocks.prev);
638 init_waitqueue_head(&s->s_wait);
639 INIT_LIST_HEAD(&s->s_files);
640 INIT_LIST_HEAD(&s->s_mounts);
642 return s;
645 static struct super_block * read_super(kdev_t dev, struct block_device *bdev,
646 struct file_system_type *type, int flags,
647 void *data, int silent)
649 struct super_block * s;
650 s = get_empty_super();
651 if (!s)
652 goto out;
653 s->s_dev = dev;
654 s->s_bdev = bdev;
655 s->s_flags = flags;
656 s->s_dirt = 0;
657 sema_init(&s->s_vfs_rename_sem,1);
658 sema_init(&s->s_nfsd_free_path_sem,1);
659 s->s_type = type;
660 sema_init(&s->s_dquot.dqio_sem, 1);
661 sema_init(&s->s_dquot.dqoff_sem, 1);
662 s->s_dquot.flags = 0;
663 lock_super(s);
664 if (!type->read_super(s, data, silent))
665 goto out_fail;
666 unlock_super(s);
667 /* tell bdcache that we are going to keep this one */
668 if (bdev)
669 atomic_inc(&bdev->bd_count);
670 out:
671 return s;
673 out_fail:
674 s->s_dev = 0;
675 s->s_bdev = 0;
676 s->s_type = NULL;
677 unlock_super(s);
678 return NULL;
682 * Unnamed block devices are dummy devices used by virtual
683 * filesystems which don't use real block-devices. -- jrs
686 static unsigned int unnamed_dev_in_use[256/(8*sizeof(unsigned int))] = { 0, };
688 kdev_t get_unnamed_dev(void)
690 int i;
692 for (i = 1; i < 256; i++) {
693 if (!test_and_set_bit(i,unnamed_dev_in_use))
694 return MKDEV(UNNAMED_MAJOR, i);
696 return 0;
699 void put_unnamed_dev(kdev_t dev)
701 if (!dev || MAJOR(dev) != UNNAMED_MAJOR)
702 return;
703 if (test_and_clear_bit(MINOR(dev), unnamed_dev_in_use))
704 return;
705 printk("VFS: put_unnamed_dev: freeing unused device %s\n",
706 kdevname(dev));
709 static struct super_block *get_sb_bdev(struct file_system_type *fs_type,
710 char *dev_name, int flags, void * data)
712 struct dentry *dentry;
713 struct inode *inode;
714 struct block_device *bdev;
715 struct block_device_operations *bdops;
716 struct super_block * sb;
717 kdev_t dev;
718 int error;
719 /* What device it is? */
720 if (!dev_name || !*dev_name)
721 return ERR_PTR(-EINVAL);
722 dentry = lookup_dentry(dev_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE);
723 if (IS_ERR(dentry))
724 return (struct super_block *)dentry;
725 inode = dentry->d_inode;
726 error = -ENOTBLK;
727 if (!S_ISBLK(inode->i_mode))
728 goto out;
729 error = -EACCES;
730 if (IS_NODEV(inode))
731 goto out;
732 bdev = inode->i_bdev;
733 bdops = devfs_get_ops ( devfs_get_handle_from_inode (inode) );
734 if (bdops) bdev->bd_op = bdops;
735 /* Done with lookups, semaphore down */
736 down(&mount_sem);
737 dev = to_kdev_t(bdev->bd_dev);
738 check_disk_change(dev);
739 error = -EACCES;
740 if (!(flags & MS_RDONLY) && is_read_only(dev))
741 goto out;
742 sb = get_super(dev);
743 if (sb) {
744 error = -EBUSY;
745 goto out;
746 /* MOUNT_REWRITE: the following should be used
747 if (fs_type == sb->s_type) {
748 dput(dentry);
749 return sb;
752 } else {
753 mode_t mode = FMODE_READ; /* we always need it ;-) */
754 if (!(flags & MS_RDONLY))
755 mode |= FMODE_WRITE;
756 error = blkdev_get(bdev, mode, 0, BDEV_FS);
757 if (error)
758 goto out;
759 error = -EINVAL;
760 sb = read_super(dev, bdev, fs_type, flags, data, 0);
761 if (sb) {
762 get_filesystem(fs_type);
763 dput(dentry);
764 return sb;
766 blkdev_put(bdev, BDEV_FS);
768 out:
769 dput(dentry);
770 up(&mount_sem);
771 return ERR_PTR(error);
774 static struct super_block *get_sb_nodev(struct file_system_type *fs_type,
775 int flags, void * data)
777 kdev_t dev;
778 int error = -EMFILE;
779 down(&mount_sem);
780 dev = get_unnamed_dev();
781 if (dev) {
782 struct super_block * sb;
783 error = -EINVAL;
784 sb = read_super(dev, NULL, fs_type, flags, data, 0);
785 if (sb) {
786 get_filesystem(fs_type);
787 return sb;
789 put_unnamed_dev(dev);
791 up(&mount_sem);
792 return ERR_PTR(error);
795 static struct block_device *kill_super(struct super_block *sb, int umount_root)
797 struct block_device *bdev;
798 kdev_t dev;
799 dput(sb->s_root);
800 sb->s_root = NULL;
801 lock_super(sb);
802 if (sb->s_op) {
803 if (sb->s_op->write_super && sb->s_dirt)
804 sb->s_op->write_super(sb);
805 if (sb->s_op->put_super)
806 sb->s_op->put_super(sb);
809 /* Forget any remaining inodes */
810 if (invalidate_inodes(sb)) {
811 printk("VFS: Busy inodes after unmount. "
812 "Self-destruct in 5 seconds. Have a nice day...\n");
815 dev = sb->s_dev;
816 sb->s_dev = 0; /* Free the superblock */
817 bdev = sb->s_bdev;
818 sb->s_bdev = NULL;
819 put_filesystem(sb->s_type);
820 sb->s_type = NULL;
821 unlock_super(sb);
822 if (umount_root) {
823 /* special: the old device driver is going to be
824 a ramdisk and the point of this call is to free its
825 protected memory (even if dirty). */
826 destroy_buffers(dev);
828 if (bdev) {
829 blkdev_put(bdev, BDEV_FS);
830 bdput(bdev);
831 } else
832 put_unnamed_dev(dev);
833 return bdev;
837 * Alters the mount flags of a mounted file system. Only the mount point
838 * is used as a reference - file system type and the device are ignored.
841 static int do_remount_sb(struct super_block *sb, int flags, char *data)
843 int retval;
845 if (!(flags & MS_RDONLY) && sb->s_dev && is_read_only(sb->s_dev))
846 return -EACCES;
847 /*flags |= MS_RDONLY;*/
848 /* If we are remounting RDONLY, make sure there are no rw files open */
849 if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY))
850 if (!fs_may_remount_ro(sb))
851 return -EBUSY;
852 if (sb->s_op && sb->s_op->remount_fs) {
853 lock_super(sb);
854 retval = sb->s_op->remount_fs(sb, &flags, data);
855 unlock_super(sb);
856 if (retval)
857 return retval;
859 sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
862 * We can't invalidate inodes as we can loose data when remounting
863 * (someone might manage to alter data while we are waiting in lock_super()
864 * or in foo_remount_fs()))
867 return 0;
871 * Doesn't take quota and stuff into account. IOW, in some cases it will
872 * give false negatives. The main reason why it's here is that we need
873 * a non-destructive way to look for easily umountable filesystems.
875 int may_umount(struct vfsmount *mnt)
877 struct super_block * sb = mnt->mnt_sb;
878 struct dentry * root;
879 int count;
881 if (atomic_read(&mnt->mnt_count) > 2)
882 return -EBUSY;
884 if (mnt->mnt_instances.next != mnt->mnt_instances.prev)
885 return 0;
888 * OK, at that point we have only one instance. We should have
889 * one active reference from ->s_root, one active reference
890 * from ->mnt_root (which may be different) and possibly one
891 * active reference from ->mnt_mountpoint (if mnt->mnt_parent == mnt).
892 * Anything above that means that tree is busy.
895 root = sb->s_root;
897 count = d_active_refs(root);
898 if (mnt->mnt_parent == mnt)
899 count--;
900 if (count != 2)
901 return -EBUSY;
903 return 0;
906 static int do_umount(struct vfsmount *mnt, int umount_root, int flags)
908 struct super_block * sb = mnt->mnt_sb;
909 int count;
911 if (mnt == current->fs->rootmnt && !umount_root) {
912 int retval = 0;
914 * Special case for "unmounting" root ...
915 * we just try to remount it readonly.
917 mntput(mnt);
918 if (!(sb->s_flags & MS_RDONLY))
919 retval = do_remount_sb(sb, MS_RDONLY, 0);
920 return retval;
923 if (atomic_read(&mnt->mnt_count) > 2) {
924 mntput(mnt);
925 return -EBUSY;
928 if (mnt->mnt_instances.next != mnt->mnt_instances.prev) {
929 mntput(mnt);
930 remove_vfsmnt(mnt);
931 return 0;
935 * Before checking whether the filesystem is still busy,
936 * make sure the kernel doesn't hold any quota files open
937 * on the device. If the umount fails, too bad -- there
938 * are no quotas running any more. Just turn them on again.
940 DQUOT_OFF(sb);
941 acct_auto_close(sb->s_dev);
944 * If we may have to abort operations to get out of this
945 * mount, and they will themselves hold resources we must
946 * allow the fs to do things. In the Unix tradition of
947 * 'Gee thats tricky lets do it in userspace' the umount_begin
948 * might fail to complete on the first run through as other tasks
949 * must return, and the like. Thats for the mount program to worry
950 * about for the moment.
953 if( (flags&MNT_FORCE) && sb->s_op->umount_begin)
954 sb->s_op->umount_begin(sb);
957 * Shrink dcache, then fsync. This guarantees that if the
958 * filesystem is quiescent at this point, then (a) only the
959 * root entry should be in use and (b) that root entry is
960 * clean.
962 shrink_dcache_sb(sb);
963 fsync_dev(sb->s_dev);
965 /* Something might grab it again - redo checks */
967 if (atomic_read(&mnt->mnt_count) > 2) {
968 mntput(mnt);
969 return -EBUSY;
973 * OK, at that point we have only one instance. We should have
974 * one active reference from ->s_root, one active reference
975 * from ->mnt_root (which may be different) and possibly one
976 * active reference from ->mnt_mountpoint (if mnt->mnt_parent == mnt).
977 * Anything above that means that tree is busy.
980 count = d_active_refs(sb->s_root);
981 if (mnt->mnt_parent == mnt)
982 count--;
983 if (count != 2)
984 return -EBUSY;
986 if (sb->s_root->d_inode->i_state)
987 return -EBUSY;
989 /* OK, that's the point of no return */
990 mntput(mnt);
991 remove_vfsmnt(mnt);
993 kill_super(sb, umount_root);
994 return 0;
998 * Now umount can handle mount points as well as block devices.
999 * This is important for filesystems which use unnamed block devices.
1001 * We now support a flag for forced unmount like the other 'big iron'
1002 * unixes. Our API is identical to OSF/1 to avoid making a mess of AMD
1005 asmlinkage long sys_umount(char * name, int flags)
1007 struct nameidata nd;
1008 char *kname;
1009 int retval;
1010 struct super_block *sb;
1012 if (!capable(CAP_SYS_ADMIN))
1013 return -EPERM;
1015 lock_kernel();
1016 kname = getname(name);
1017 retval = PTR_ERR(kname);
1018 if (IS_ERR(kname))
1019 goto out;
1020 retval = 0;
1021 if (walk_init(kname, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &nd))
1022 retval = walk_name(kname, &nd);
1023 putname(kname);
1024 if (retval)
1025 goto out;
1026 sb = nd.dentry->d_inode->i_sb;
1027 retval = -EINVAL;
1028 if (nd.dentry!=nd.mnt->mnt_root)
1029 goto dput_and_out;
1030 dput(nd.dentry);
1031 /* puts nd.mnt */
1032 down(&mount_sem);
1033 retval = do_umount(nd.mnt, 0, flags);
1034 up(&mount_sem);
1035 goto out;
1036 dput_and_out:
1037 dput(nd.dentry);
1038 mntput(nd.mnt);
1039 out:
1040 unlock_kernel();
1041 return retval;
1045 * The 2.0 compatible umount. No flags.
1048 asmlinkage long sys_oldumount(char * name)
1050 return sys_umount(name,0);
1054 * change filesystem flags. dir should be a physical root of filesystem.
1055 * If you've mounted a non-root directory somewhere and want to do remount
1056 * on it - tough luck.
1059 static int do_remount(const char *dir,int flags,char *data)
1061 struct dentry *dentry;
1062 int retval;
1064 if (!capable(CAP_SYS_ADMIN))
1065 return -EPERM;
1067 dentry = lookup_dentry(dir, LOOKUP_FOLLOW|LOOKUP_POSITIVE);
1068 retval = PTR_ERR(dentry);
1069 if (!IS_ERR(dentry)) {
1070 struct super_block * sb = dentry->d_inode->i_sb;
1071 retval = -ENODEV;
1072 if (sb) {
1073 retval = -EINVAL;
1074 if (dentry == sb->s_root) {
1076 * Shrink the dcache and sync the device.
1078 shrink_dcache_sb(sb);
1079 fsync_dev(sb->s_dev);
1080 if (flags & MS_RDONLY)
1081 acct_auto_close(sb->s_dev);
1082 retval = do_remount_sb(sb, flags, data);
1085 dput(dentry);
1087 return retval;
1090 static int copy_mount_options (const void * data, unsigned long *where)
1092 int i;
1093 unsigned long page;
1094 struct vm_area_struct * vma;
1096 *where = 0;
1097 if (!data)
1098 return 0;
1100 vma = find_vma(current->mm, (unsigned long) data);
1101 if (!vma || (unsigned long) data < vma->vm_start)
1102 return -EFAULT;
1103 if (!(vma->vm_flags & VM_READ))
1104 return -EFAULT;
1105 i = vma->vm_end - (unsigned long) data;
1106 if (PAGE_SIZE <= (unsigned long) i)
1107 i = PAGE_SIZE-1;
1108 if (!(page = __get_free_page(GFP_KERNEL))) {
1109 return -ENOMEM;
1111 if (copy_from_user((void *) page,data,i)) {
1112 free_page(page);
1113 return -EFAULT;
1115 *where = page;
1116 return 0;
1120 * Flags is a 16-bit value that allows up to 16 non-fs dependent flags to
1121 * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
1123 * data is a (void *) that can point to any structure up to
1124 * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
1125 * information (or be NULL).
1127 * NOTE! As old versions of mount() didn't use this setup, the flags
1128 * have to have a special 16-bit magic number in the high word:
1129 * 0xC0ED. If this magic word isn't present, the flags and data info
1130 * aren't used, as the syscall assumes we are talking to an older
1131 * version that didn't understand them.
1133 long do_sys_mount(char * dev_name, char * dir_name, char *type_page,
1134 unsigned long new_flags, void *data_page)
1136 struct file_system_type * fstype;
1137 struct nameidata nd;
1138 struct vfsmount *mnt;
1139 struct super_block *sb;
1140 int retval = 0;
1141 unsigned long flags = 0;
1143 /* Basic sanity checks */
1145 if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
1146 return -EINVAL;
1147 if (!type_page || !memchr(type_page, 0, PAGE_SIZE))
1148 return -EINVAL;
1149 if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
1150 return -EINVAL;
1152 /* OK, looks good, now let's see what do they want */
1154 /* just change the flags? - capabilities are checked in do_remount() */
1155 if ((new_flags & (MS_MGC_MSK|MS_REMOUNT)) == (MS_MGC_VAL|MS_REMOUNT))
1156 return do_remount(dir_name, new_flags&~(MS_MGC_MSK|MS_REMOUNT),
1157 (char *) data_page);
1159 if ((new_flags & MS_MGC_MSK) == MS_MGC_VAL)
1160 flags = new_flags & ~MS_MGC_MSK;
1162 /* loopback mount? This is special - requires fewer capabilities */
1163 /* MOUNT_REWRITE: ... and is yet to be merged */
1165 /* for the rest we _really_ need capabilities... */
1166 if (!capable(CAP_SYS_ADMIN))
1167 return -EPERM;
1169 /* ... filesystem driver... */
1170 fstype = get_fs_type(type_page);
1171 if (!fstype)
1172 return -ENODEV;
1174 /* ... and mountpoint. Do the lookup first to force automounting. */
1175 if (walk_init(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE|LOOKUP_DIRECTORY, &nd))
1176 retval = walk_name(dir_name, &nd);
1177 if (retval)
1178 goto fs_out;
1180 /* get superblock, locks mount_sem on success */
1181 if (fstype->fs_flags & FS_REQUIRES_DEV)
1182 sb = get_sb_bdev(fstype, dev_name,flags, data_page);
1183 else
1184 sb = get_sb_nodev(fstype, flags, data_page);
1186 retval = PTR_ERR(sb);
1187 if (IS_ERR(sb))
1188 goto dput_out;
1190 retval = -ENOENT;
1191 if (d_unhashed(nd.dentry))
1192 goto fail;
1194 /* Something was mounted here while we slept */
1195 while(d_mountpoint(nd.dentry) && follow_down(&nd.mnt, &nd.dentry))
1198 retval = -ENOMEM;
1199 mnt = add_vfsmnt(sb, nd.dentry, sb->s_root, nd.mnt, dev_name, dir_name);
1200 if (!mnt)
1201 goto fail;
1202 retval = 0;
1203 unlock_out:
1204 up(&mount_sem);
1205 dput_out:
1206 dput(nd.dentry);
1207 mntput(nd.mnt);
1208 fs_out:
1209 put_filesystem(fstype);
1210 return retval;
1212 fail:
1213 if (list_empty(&sb->s_mounts))
1214 kill_super(sb, 0);
1215 goto unlock_out;
1218 asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type,
1219 unsigned long new_flags, void * data)
1221 int retval;
1222 unsigned long data_page = 0;
1223 unsigned long type_page = 0;
1224 unsigned long dev_page = 0;
1225 char *dir_page;
1227 lock_kernel();
1228 retval = copy_mount_options (type, &type_page);
1229 if (retval < 0)
1230 goto out;
1232 /* copy_mount_options allows a NULL user pointer,
1233 * and just returns zero in that case. But if we
1234 * allow the type to be NULL we will crash.
1235 * Previously we did not check this case.
1237 if (type_page == 0) {
1238 retval = -EINVAL;
1239 goto out;
1242 dir_page = getname(dir_name);
1243 retval = PTR_ERR(dir_page);
1244 if (IS_ERR(dir_page))
1245 goto out1;
1247 retval = copy_mount_options (dev_name, &dev_page);
1248 if (retval < 0)
1249 goto out2;
1250 retval = copy_mount_options (data, &data_page);
1251 if (retval >= 0) {
1252 retval = do_sys_mount((char*)dev_page,dir_page,(char*)type_page,
1253 new_flags, (void*)data_page);
1254 free_page(data_page);
1256 free_page(dev_page);
1257 out2:
1258 putname(dir_page);
1259 out1:
1260 free_page(type_page);
1261 out:
1262 unlock_kernel();
1263 return retval;
1266 void __init mount_root(void)
1268 struct file_system_type * fs_type;
1269 struct super_block * sb;
1270 struct vfsmount *vfsmnt;
1271 struct block_device *bdev = NULL;
1272 mode_t mode;
1273 int retval;
1274 void *handle;
1275 char path[64];
1276 int path_start = -1;
1278 #ifdef CONFIG_ROOT_NFS
1279 void *data;
1280 if (MAJOR(ROOT_DEV) != UNNAMED_MAJOR)
1281 goto skip_nfs;
1282 fs_type = get_fs_type("nfs");
1283 if (!fs_type)
1284 goto no_nfs;
1285 ROOT_DEV = get_unnamed_dev();
1286 if (!ROOT_DEV)
1288 * Your /linuxrc sucks worse than MSExchange - that's the
1289 * only way you could run out of anon devices at that point.
1291 goto no_anon;
1292 data = nfs_root_data();
1293 if (!data)
1294 goto no_server;
1295 sb = read_super(ROOT_DEV, NULL, fs_type, root_mountflags, data, 1);
1296 if (sb)
1298 * We _can_ fail there, but if that will happen we have no
1299 * chance anyway (no memory for vfsmnt and we _will_ need it,
1300 * no matter which fs we try to mount).
1302 goto mount_it;
1303 no_server:
1304 put_unnamed_dev(ROOT_DEV);
1305 no_anon:
1306 put_filesystem(fs_type);
1307 no_nfs:
1308 printk(KERN_ERR "VFS: Unable to mount root fs via NFS, trying floppy.\n");
1309 ROOT_DEV = MKDEV(FLOPPY_MAJOR, 0);
1310 skip_nfs:
1311 #endif
1313 #ifdef CONFIG_BLK_DEV_FD
1314 if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) {
1315 #ifdef CONFIG_BLK_DEV_RAM
1316 extern int rd_doload;
1317 extern void rd_load_secondary(void);
1318 #endif
1319 floppy_eject();
1320 #ifndef CONFIG_BLK_DEV_RAM
1321 printk(KERN_NOTICE "(Warning, this kernel has no ramdisk support)\n");
1322 #else
1323 /* rd_doload is 2 for a dual initrd/ramload setup */
1324 if(rd_doload==2)
1325 rd_load_secondary();
1326 else
1327 #endif
1329 printk(KERN_NOTICE "VFS: Insert root floppy and press ENTER\n");
1330 wait_for_keypress();
1333 #endif
1335 devfs_make_root (root_device_name);
1336 handle = devfs_find_handle (NULL, ROOT_DEVICE_NAME, 0,
1337 MAJOR (ROOT_DEV), MINOR (ROOT_DEV),
1338 DEVFS_SPECIAL_BLK, 1);
1339 if (handle) /* Sigh: bd*() functions only paper over the cracks */
1341 unsigned major, minor;
1343 devfs_get_maj_min (handle, &major, &minor);
1344 ROOT_DEV = MKDEV (major, minor);
1348 * Probably pure paranoia, but I'm less than happy about delving into
1349 * devfs crap and checking it right now. Later.
1351 if (!ROOT_DEV)
1352 panic("I have no root and I want to scream");
1354 bdev = bdget(kdev_t_to_nr(ROOT_DEV));
1355 if (!bdev)
1356 panic(__FUNCTION__ ": unable to allocate root device");
1357 bdev->bd_op = devfs_get_ops (handle);
1358 path_start = devfs_generate_path (handle, path + 5, sizeof (path) - 5);
1359 mode = FMODE_READ;
1360 if (!(root_mountflags & MS_RDONLY))
1361 mode |= FMODE_WRITE;
1362 retval = blkdev_get(bdev, mode, 0, BDEV_FS);
1363 if (retval == -EROFS) {
1364 root_mountflags |= MS_RDONLY;
1365 retval = blkdev_get(bdev, FMODE_READ, 0, BDEV_FS);
1367 if (retval) {
1369 * Allow the user to distinguish between failed open
1370 * and bad superblock on root device.
1372 printk ("VFS: Cannot open root device \"%s\" or %s\n",
1373 root_device_name, kdevname (ROOT_DEV));
1374 printk ("Please append a correct \"root=\" boot option\n");
1375 panic("VFS: Unable to mount root fs on %s",
1376 kdevname(ROOT_DEV));
1379 check_disk_change(ROOT_DEV);
1380 sb = get_super(ROOT_DEV);
1381 if (sb) {
1382 fs_type = sb->s_type;
1383 goto mount_it;
1386 read_lock(&file_systems_lock);
1387 for (fs_type = file_systems ; fs_type ; fs_type = fs_type->next) {
1388 if (!(fs_type->fs_flags & FS_REQUIRES_DEV))
1389 continue;
1390 if (!try_inc_mod_count(fs_type->owner))
1391 continue;
1392 read_unlock(&file_systems_lock);
1393 sb = read_super(ROOT_DEV,bdev,fs_type,root_mountflags,NULL,1);
1394 if (sb)
1395 goto mount_it;
1396 read_lock(&file_systems_lock);
1397 put_filesystem(fs_type);
1399 read_unlock(&file_systems_lock);
1400 panic("VFS: Unable to mount root fs on %s", kdevname(ROOT_DEV));
1402 mount_it:
1403 printk ("VFS: Mounted root (%s filesystem)%s.\n",
1404 fs_type->name,
1405 (sb->s_flags & MS_RDONLY) ? " readonly" : "");
1406 if (path_start >= 0) {
1407 devfs_mk_symlink (NULL,
1408 "root", 0, DEVFS_FL_DEFAULT,
1409 path + 5 + path_start, 0,
1410 NULL, NULL);
1411 memcpy (path + path_start, "/dev/", 5);
1412 vfsmnt = add_vfsmnt (sb, sb->s_root, sb->s_root, NULL,
1413 path + path_start, "/");
1415 else
1416 vfsmnt = add_vfsmnt (sb, sb->s_root, sb->s_root, NULL,
1417 "/dev/root", "/");
1418 if (vfsmnt) {
1419 set_fs_root(current->fs, vfsmnt, sb->s_root);
1420 set_fs_pwd(current->fs, vfsmnt, sb->s_root);
1421 if (bdev)
1422 bdput(bdev); /* sb holds a reference */
1423 return;
1425 panic("VFS: add_vfsmnt failed for root fs");
1429 static void chroot_fs_refs(struct dentry *old_root,
1430 struct vfsmount *old_rootmnt,
1431 struct dentry *new_root,
1432 struct vfsmount *new_rootmnt)
1434 struct task_struct *p;
1436 /* We can't afford dput() blocking under the tasklist_lock */
1437 mntget(old_rootmnt);
1438 dget(old_root);
1440 read_lock(&tasklist_lock);
1441 for_each_task(p) {
1442 if (!p->fs) continue;
1443 if (p->fs->root == old_root && p->fs->rootmnt == old_rootmnt)
1444 set_fs_root(p->fs, new_rootmnt, new_root);
1445 if (p->fs->pwd == old_root && p->fs->pwdmnt == old_rootmnt)
1446 set_fs_pwd(p->fs, new_rootmnt, new_root);
1448 read_unlock(&tasklist_lock);
1450 dput(old_root);
1451 mntput(old_rootmnt);
1455 * Moves the current root to put_root, and sets root/cwd of all processes
1456 * which had them on the old root to new_root.
1458 * Note:
1459 * - we don't move root/cwd if they are not at the root (reason: if something
1460 * cared enough to change them, it's probably wrong to force them elsewhere)
1461 * - it's okay to pick a root that isn't the root of a file system, e.g.
1462 * /nfs/my_root where /nfs is the mount point. Better avoid creating
1463 * unreachable mount points this way, though.
1466 asmlinkage long sys_pivot_root(const char *new_root, const char *put_old)
1468 struct dentry *root = current->fs->root;
1469 struct vfsmount *root_mnt = current->fs->rootmnt;
1470 struct vfsmount *tmp;
1471 struct nameidata new_nd, old_nd;
1472 char *name;
1473 int error;
1475 if (!capable(CAP_SYS_ADMIN))
1476 return -EPERM;
1478 lock_kernel();
1480 name = getname(new_root);
1481 error = PTR_ERR(name);
1482 if (IS_ERR(name))
1483 goto out0;
1484 error = 0;
1485 if (walk_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd))
1486 error = walk_name(name, &new_nd);
1487 putname(name);
1488 if (error)
1489 goto out0;
1491 name = getname(put_old);
1492 error = PTR_ERR(name);
1493 if (IS_ERR(name))
1494 goto out0;
1495 error = 0;
1496 if (walk_init(name, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd))
1497 error = walk_name(name, &old_nd);
1498 putname(name);
1499 if (error)
1500 goto out1;
1502 down(&mount_sem);
1503 error = -ENOENT;
1504 if (d_unhashed(new_nd.dentry) || d_unhashed(old_nd.dentry))
1505 goto out2;
1506 error = -EBUSY;
1507 if (new_nd.mnt == root_mnt || old_nd.mnt == root_mnt)
1508 goto out2; /* loop */
1509 error = -EINVAL;
1510 tmp = old_nd.mnt; /* make sure we can reach put_old from new_root */
1511 if (tmp != new_nd.mnt) {
1512 for (;;) {
1513 if (tmp->mnt_parent == tmp)
1514 goto out2;
1515 if (tmp->mnt_parent == new_nd.mnt)
1516 break;
1517 tmp = tmp->mnt_parent;
1519 if (!is_subdir(tmp->mnt_root, new_nd.dentry))
1520 goto out2;
1521 } else if (!is_subdir(old_nd.dentry, new_nd.dentry))
1522 goto out2;
1524 error = -ENOMEM;
1525 name = __getname();
1526 if (!name)
1527 goto out2;
1529 move_vfsmnt(new_nd.mnt, new_nd.dentry, NULL, NULL, "/");
1530 move_vfsmnt(root_mnt, old_nd.dentry, old_nd.mnt, NULL,
1531 __d_path(old_nd.dentry, old_nd.mnt, new_nd.dentry,
1532 new_nd.mnt, name, PAGE_SIZE));
1533 putname(name);
1534 chroot_fs_refs(root,root_mnt,new_nd.dentry,new_nd.mnt);
1535 error = 0;
1536 out2:
1537 up(&mount_sem);
1538 dput(old_nd.dentry);
1539 mntput(old_nd.mnt);
1540 out1:
1541 dput(new_nd.dentry);
1542 mntput(new_nd.mnt);
1543 out0:
1544 unlock_kernel();
1545 return error;
1549 #ifdef CONFIG_BLK_DEV_INITRD
1551 int __init change_root(kdev_t new_root_dev,const char *put_old)
1553 kdev_t old_root_dev = ROOT_DEV;
1554 struct vfsmount *old_rootmnt = mntget(current->fs->rootmnt);
1555 struct nameidata devfs_nd, nd;
1556 int error = 0;
1558 /* First unmount devfs if mounted */
1559 if (walk_init("/dev", LOOKUP_FOLLOW|LOOKUP_POSITIVE, &devfs_nd))
1560 error = walk_name("/dev", &devfs_nd);
1561 if (!error) {
1562 struct super_block *sb = devfs_nd.dentry->d_inode->i_sb;
1564 if (devfs_nd.mnt->mnt_sb->s_magic == DEVFS_SUPER_MAGIC &&
1565 devfs_nd.dentry == devfs_nd.mnt->mnt_root) {
1566 dput(devfs_nd.dentry);
1567 down(&mount_sem);
1568 /* puts devfs_nd.mnt */
1569 do_umount(devfs_nd.mnt, 0, 0);
1570 up(&mount_sem);
1571 } else {
1572 dput(devfs_nd.dentry);
1573 mntput(devfs_nd.mnt);
1576 ROOT_DEV = new_root_dev;
1577 mount_root();
1578 #if 1
1579 shrink_dcache();
1580 printk("change_root: old root has d_count=%d\n",
1581 old_rootmnt->mnt_root->d_count);
1582 #endif
1583 mount_devfs_fs ();
1585 * Get the new mount directory
1587 error = 0;
1588 if (walk_init(put_old, LOOKUP_FOLLOW|LOOKUP_POSITIVE|LOOKUP_DIRECTORY, &nd))
1589 error = walk_name(put_old, &nd);
1590 if (error) {
1591 int blivet;
1593 printk(KERN_NOTICE "Trying to unmount old root ... ");
1594 blivet = do_umount(old_rootmnt, 1, 0);
1595 if (!blivet) {
1596 printk("okay\n");
1597 return 0;
1599 printk(KERN_ERR "error %ld\n",blivet);
1600 return error;
1602 move_vfsmnt(old_rootmnt, nd.dentry, nd.mnt, "/dev/root.old", put_old);
1603 mntput(old_rootmnt);
1604 dput(nd.dentry);
1605 mntput(nd.mnt);
1606 return 0;
1609 #endif