fs/super.c

   1 /*
   2  *  linux/fs/super.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  *
   6  *  super.c contains code to handle: - mount structures
   7  *                                   - super-block tables.
   8  *                                   - mount system call
   9  *                                   - umount system call
  10  *
  11  *  Added options to /proc/mounts
  12  *  Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996.
  13  *
  14  * GK 2/5/95  -  Changed to support mounting the root fs via NFS
  15  *
  16  *  Added kerneld support: Jacques Gelinas and Bjorn Ekwall
  17  *  Added change_root: Werner Almesberger & Hans Lermen, Feb '96
  18  */
  19
  20 #include <linux/config.h>
  21 #include <linux/malloc.h>
  22 #include <linux/locks.h>
  23 #include <linux/smp_lock.h>
  24 #include <linux/fd.h>
  25 #include <linux/init.h>
  26 #include <linux/quotaops.h>
  27 #include <linux/acct.h>
  28
  29 #include <asm/uaccess.h>
  30
  31 #include <linux/nfs_fs.h>
  32 #include <linux/nfs_fs_sb.h>
  33 #include <linux/nfs_mount.h>
  34
  35 #ifdef CONFIG_KMOD
  36 #include <linux/kmod.h>
  37 #endif
  38
  39 /*
  40  * We use a semaphore to synchronize all mount/umount
  41  * activity - imagine the mess if we have a race between
  42  * unmounting a filesystem and re-mounting it (or something
  43  * else).
  44  */
  45 static DECLARE_MUTEX(mount_sem);
  46
  47 extern void wait_for_keypress(void);
  48 extern struct file_operations * get_blkfops(unsigned int major);
  49
  50 extern int root_mountflags;
  51
  52 static int do_remount_sb(struct super_block *sb, int flags, char * data);
  53
  54 /* this is initialized in init/main.c */
  55 kdev_t ROOT_DEV;
  56
  57 int nr_super_blocks = 0;
  58 int max_super_blocks = NR_SUPER;
  59 LIST_HEAD(super_blocks);
  60
  61 static struct file_system_type *file_systems = (struct file_system_type *) NULL;
  62 struct vfsmount *vfsmntlist = (struct vfsmount *) NULL;
  63 static struct vfsmount *vfsmnttail = (struct vfsmount *) NULL,
  64                        *mru_vfsmnt = (struct vfsmount *) NULL;
  65
  66 /*
  67  * This part handles the management of the list of mounted filesystems.
  68  */
  69 struct vfsmount *lookup_vfsmnt(kdev_t dev)
  70 {
  71         struct vfsmount *lptr;
  72
  73         if (vfsmntlist == (struct vfsmount *)NULL)
  74                 return ((struct vfsmount *)NULL);
  75
  76         if (mru_vfsmnt != (struct vfsmount *)NULL &&
  77             mru_vfsmnt->mnt_dev == dev)
  78                 return (mru_vfsmnt);
  79
  80         for (lptr = vfsmntlist;
  81              lptr != (struct vfsmount *)NULL;
  82              lptr = lptr->mnt_next)
  83                 if (lptr->mnt_dev == dev) {
  84                         mru_vfsmnt = lptr;
  85                         return (lptr);
  86                 }
  87
  88         return ((struct vfsmount *)NULL);
  89         /* NOTREACHED */
  90 }
  91
  92 static struct vfsmount *add_vfsmnt(struct super_block *sb,
  93                         const char *dev_name, const char *dir_name)
  94 {
  95         struct vfsmount *lptr;
  96         char *tmp, *name;
  97
  98         lptr = (struct vfsmount *)kmalloc(sizeof(struct vfsmount), GFP_KERNEL);
  99         if (!lptr)
 100                 goto out;
 101         memset(lptr, 0, sizeof(struct vfsmount));
 102
 103         lptr->mnt_sb = sb;
 104         lptr->mnt_dev = sb->s_dev;
 105         lptr->mnt_flags = sb->s_flags;
 106
 107         sema_init(&lptr->mnt_dquot.dqio_sem, 1);
 108         sema_init(&lptr->mnt_dquot.dqoff_sem, 1);
 109         lptr->mnt_dquot.flags = 0;
 110
 111         /* N.B. Is it really OK to have a vfsmount without names? */
 112         if (dev_name && !IS_ERR(tmp = getname(dev_name))) {
 113                 name = (char *) kmalloc(strlen(tmp)+1, GFP_KERNEL);
 114                 if (name) {
 115                         strcpy(name, tmp);
 116                         lptr->mnt_devname = name;
 117                 }
 118                 putname(tmp);
 119         }
 120         if (dir_name && !IS_ERR(tmp = getname(dir_name))) {
 121                 name = (char *) kmalloc(strlen(tmp)+1, GFP_KERNEL);
 122                 if (name) {
 123                         strcpy(name, tmp);
 124                         lptr->mnt_dirname = name;
 125                 }
 126                 putname(tmp);
 127         }
 128
 129         if (vfsmntlist == (struct vfsmount *)NULL) {
 130                 vfsmntlist = vfsmnttail = lptr;
 131         } else {
 132                 vfsmnttail->mnt_next = lptr;
 133                 vfsmnttail = lptr;
 134         }
 135 out:
 136         return lptr;
 137 }
 138
 139 void remove_vfsmnt(kdev_t dev)
 140 {
 141         struct vfsmount *lptr, *tofree;
 142
 143         if (vfsmntlist == (struct vfsmount *)NULL)
 144                 return;
 145         lptr = vfsmntlist;
 146         if (lptr->mnt_dev == dev) {
 147                 tofree = lptr;
 148                 vfsmntlist = lptr->mnt_next;
 149                 if (vfsmnttail->mnt_dev == dev)
 150                         vfsmnttail = vfsmntlist;
 151         } else {
 152                 while (lptr->mnt_next != (struct vfsmount *)NULL) {
 153                         if (lptr->mnt_next->mnt_dev == dev)
 154                                 break;
 155                         lptr = lptr->mnt_next;
 156                 }
 157                 tofree = lptr->mnt_next;
 158                 if (tofree == (struct vfsmount *)NULL)
 159                         return;
 160                 lptr->mnt_next = lptr->mnt_next->mnt_next;
 161                 if (vfsmnttail->mnt_dev == dev)
 162                         vfsmnttail = lptr;
 163         }
 164         if (tofree == mru_vfsmnt)
 165                 mru_vfsmnt = NULL;
 166         kfree(tofree->mnt_devname);
 167         kfree(tofree->mnt_dirname);
 168         kfree_s(tofree, sizeof(struct vfsmount));
 169 }
 170
 171 int register_filesystem(struct file_system_type * fs)
 172 {
 173         struct file_system_type ** tmp;
 174
 175         if (!fs)
 176                 return -EINVAL;
 177         if (fs->next)
 178                 return -EBUSY;
 179         tmp = &file_systems;
 180         while (*tmp) {
 181                 if (strcmp((*tmp)->name, fs->name) == 0)
 182                         return -EBUSY;
 183                 tmp = &(*tmp)->next;
 184         }
 185         *tmp = fs;
 186         return 0;
 187 }
 188
 189 #ifdef CONFIG_MODULES
 190 int unregister_filesystem(struct file_system_type * fs)
 191 {
 192         struct file_system_type ** tmp;
 193
 194         tmp = &file_systems;
 195         while (*tmp) {
 196                 if (fs == *tmp) {
 197                         *tmp = fs->next;
 198                         fs->next = NULL;
 199                         return 0;
 200                 }
 201                 tmp = &(*tmp)->next;
 202         }
 203         return -EINVAL;
 204 }
 205 #endif
 206
 207 static int fs_index(const char * __name)
 208 {
 209         struct file_system_type * tmp;
 210         char * name;
 211         int err, index;
 212
 213         name = getname(__name);
 214         err = PTR_ERR(name);
 215         if (IS_ERR(name))
 216                 return err;
 217
 218         index = 0;
 219         for (tmp = file_systems ; tmp ; tmp = tmp->next) {
 220                 if (strcmp(tmp->name, name) == 0) {
 221                         putname(name);
 222                         return index;
 223                 }
 224                 index++;
 225         }
 226         putname(name);
 227         return -EINVAL;
 228 }
 229
 230 static int fs_name(unsigned int index, char * buf)
 231 {
 232         struct file_system_type * tmp;
 233         int len;
 234
 235         tmp = file_systems;
 236         while (tmp && index > 0) {
 237                 tmp = tmp->next;
 238                 index--;
 239         }
 240         if (!tmp)
 241                 return -EINVAL;
 242         len = strlen(tmp->name) + 1;
 243         return copy_to_user(buf, tmp->name, len) ? -EFAULT : 0;
 244 }
 245
 246 static int fs_maxindex(void)
 247 {
 248         struct file_system_type * tmp;
 249         int index;
 250
 251         index = 0;
 252         for (tmp = file_systems ; tmp ; tmp = tmp->next)
 253                 index++;
 254         return index;
 255 }
 256
 257 /*
 258  * Whee.. Weird sysv syscall.
 259  */
 260 asmlinkage long sys_sysfs(int option, unsigned long arg1, unsigned long arg2)
 261 {
 262         int retval = -EINVAL;
 263
 264         lock_kernel();
 265         switch (option) {
 266                 case 1:
 267                         retval = fs_index((const char *) arg1);
 268                         break;
 269
 270                 case 2:
 271                         retval = fs_name(arg1, (char *) arg2);
 272                         break;
 273
 274                 case 3:
 275                         retval = fs_maxindex();
 276                         break;
 277         }
 278         unlock_kernel();
 279         return retval;
 280 }
 281
 282 static struct proc_fs_info {
 283         int flag;
 284         char *str;
 285 } fs_info[] = {
 286         { MS_NOEXEC, ",noexec" },
 287         { MS_NOSUID, ",nosuid" },
 288         { MS_NODEV, ",nodev" },
 289         { MS_SYNCHRONOUS, ",sync" },
 290         { MS_MANDLOCK, ",mand" },
 291         { MS_NOATIME, ",noatime" },
 292         { MS_NODIRATIME, ",nodiratime" },
 293 #ifdef MS_NOSUB                 /* Can't find this except in mount.c */
 294         { MS_NOSUB, ",nosub" },
 295 #endif
 296         { 0, NULL }
 297 };
 298
 299 static struct proc_nfs_info {
 300         int flag;
 301         char *str;
 302 } nfs_info[] = {
 303         { NFS_MOUNT_SOFT, ",soft" },
 304         { NFS_MOUNT_INTR, ",intr" },
 305         { NFS_MOUNT_POSIX, ",posix" },
 306         { NFS_MOUNT_NOCTO, ",nocto" },
 307         { NFS_MOUNT_NOAC, ",noac" },
 308         { 0, NULL }
 309 };
 310
 311 int get_filesystem_info( char *buf )
 312 {
 313         struct vfsmount *tmp = vfsmntlist;
 314         struct proc_fs_info *fs_infop;
 315         struct proc_nfs_info *nfs_infop;
 316         struct nfs_server *nfss;
 317         int len = 0;
 318
 319         while ( tmp && len < PAGE_SIZE - 160)
 320         {
 321                 len += sprintf( buf + len, "%s %s %s %s",
 322                         tmp->mnt_devname, tmp->mnt_dirname, tmp->mnt_sb->s_type->name,
 323                         tmp->mnt_flags & MS_RDONLY ? "ro" : "rw" );
 324                 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
 325                   if (tmp->mnt_flags & fs_infop->flag) {
 326                     strcpy(buf + len, fs_infop->str);
 327                     len += strlen(fs_infop->str);
 328                   }
 329                 }
 330                 if (!strcmp("nfs", tmp->mnt_sb->s_type->name)) {
 331                         nfss = &tmp->mnt_sb->u.nfs_sb.s_server;
 332                         if (nfss->rsize != NFS_DEF_FILE_IO_BUFFER_SIZE) {
 333                                 len += sprintf(buf+len, ",rsize=%d",
 334                                                nfss->rsize);
 335                         }
 336                         if (nfss->wsize != NFS_DEF_FILE_IO_BUFFER_SIZE) {
 337                                 len += sprintf(buf+len, ",wsize=%d",
 338                                                nfss->wsize);
 339                         }
 340 #if 0
 341                         if (nfss->timeo != 7*HZ/10) {
 342                                 len += sprintf(buf+len, ",timeo=%d",
 343                                                nfss->timeo*10/HZ);
 344                         }
 345                         if (nfss->retrans != 3) {
 346                                 len += sprintf(buf+len, ",retrans=%d",
 347                                                nfss->retrans);
 348                         }
 349 #endif
 350                         if (nfss->acregmin != 3*HZ) {
 351                                 len += sprintf(buf+len, ",acregmin=%d",
 352                                                nfss->acregmin/HZ);
 353                         }
 354                         if (nfss->acregmax != 60*HZ) {
 355                                 len += sprintf(buf+len, ",acregmax=%d",
 356                                                nfss->acregmax/HZ);
 357                         }
 358                         if (nfss->acdirmin != 30*HZ) {
 359                                 len += sprintf(buf+len, ",acdirmin=%d",
 360                                                nfss->acdirmin/HZ);
 361                         }
 362                         if (nfss->acdirmax != 60*HZ) {
 363                                 len += sprintf(buf+len, ",acdirmax=%d",
 364                                                nfss->acdirmax/HZ);
 365                         }
 366                         for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
 367                                 if (nfss->flags & nfs_infop->flag) {
 368                                         strcpy(buf + len, nfs_infop->str);
 369                                         len += strlen(nfs_infop->str);
 370                                 }
 371                         }
 372                         len += sprintf(buf+len, ",addr=%s",
 373                                        nfss->hostname);
 374                 }
 375                 len += sprintf( buf + len, " 0 0\n" );
 376                 tmp = tmp->mnt_next;
 377         }
 378
 379         return len;
 380 }
 381
 382 int get_filesystem_list(char * buf)
 383 {
 384         int len = 0;
 385         struct file_system_type * tmp;
 386
 387         tmp = file_systems;
 388         while (tmp && len < PAGE_SIZE - 80) {
 389                 len += sprintf(buf+len, "%s\t%s\n",
 390                         (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
 391                         tmp->name);
 392                 tmp = tmp->next;
 393         }
 394         return len;
 395 }
 396
 397 struct file_system_type *get_fs_type(const char *name)
 398 {
 399         struct file_system_type * fs = file_systems;
 400
 401         if (!name)
 402                 return fs;
 403         for (fs = file_systems; fs && strcmp(fs->name, name); fs = fs->next)
 404                 ;
 405 #ifdef CONFIG_KMOD
 406         if (!fs && (request_module(name) == 0)) {
 407                 for (fs = file_systems; fs && strcmp(fs->name, name); fs = fs->next)
 408                         ;
 409         }
 410 #endif
 411
 412         return fs;
 413 }
 414
 415 void __wait_on_super(struct super_block * sb)
 416 {
 417         DECLARE_WAITQUEUE(wait, current);
 418
 419         add_wait_queue(&sb->s_wait, &wait);
 420 repeat:
 421         set_current_state(TASK_UNINTERRUPTIBLE);
 422         if (sb->s_lock) {
 423                 schedule();
 424                 goto repeat;
 425         }
 426         remove_wait_queue(&sb->s_wait, &wait);
 427         current->state = TASK_RUNNING;
 428 }
 429
 430 /*
 431  * Note: check the dirty flag before waiting, so we don't
 432  * hold up the sync while mounting a device. (The newly
 433  * mounted device won't need syncing.)
 434  */
 435 void sync_supers(kdev_t dev)
 436 {
 437         struct super_block * sb;
 438
 439         for (sb = sb_entry(super_blocks.next);
 440              sb != sb_entry(&super_blocks);
 441              sb = sb_entry(sb->s_list.next)) {
 442                 if (!sb->s_dev)
 443                         continue;
 444                 if (dev && sb->s_dev != dev)
 445                         continue;
 446                 if (!sb->s_dirt)
 447                         continue;
 448                 /* N.B. Should lock the superblock while writing */
 449                 wait_on_super(sb);
 450                 if (!sb->s_dev || !sb->s_dirt)
 451                         continue;
 452                 if (dev && (dev != sb->s_dev))
 453                         continue;
 454                 if (sb->s_op && sb->s_op->write_super)
 455                         sb->s_op->write_super(sb);
 456         }
 457 }
 458
 459 struct super_block * get_super(kdev_t dev)
 460 {
 461         struct super_block * s;
 462
 463         if (!dev)
 464                 return NULL;
 465 restart:
 466         s = sb_entry(super_blocks.next);
 467         while (s != sb_entry(&super_blocks))
 468                 if (s->s_dev == dev) {
 469                         wait_on_super(s);
 470                         if (s->s_dev == dev)
 471                                 return s;
 472                         goto restart;
 473                 } else
 474                         s = sb_entry(s->s_list.next);
 475         return NULL;
 476 }
 477
 478 asmlinkage long sys_ustat(dev_t dev, struct ustat * ubuf)
 479 {
 480         struct super_block *s;
 481         struct ustat tmp;
 482         struct statfs sbuf;
 483         mm_segment_t old_fs;
 484         int err = -EINVAL;
 485
 486         lock_kernel();
 487         s = get_super(to_kdev_t(dev));
 488         if (s == NULL)
 489                 goto out;
 490         err = -ENOSYS;
 491         if (!(s->s_op->statfs))
 492                 goto out;
 493
 494         old_fs = get_fs();
 495         set_fs(get_ds());
 496         s->s_op->statfs(s,&sbuf,sizeof(struct statfs));
 497         set_fs(old_fs);
 498
 499         memset(&tmp,0,sizeof(struct ustat));
 500         tmp.f_tfree = sbuf.f_bfree;
 501         tmp.f_tinode = sbuf.f_ffree;
 502
 503         err = copy_to_user(ubuf,&tmp,sizeof(struct ustat)) ? -EFAULT : 0;
 504 out:
 505         unlock_kernel();
 506         return err;
 507 }
 508
 509 /*
 510  * Find a super_block with no device assigned.
 511  */
 512 struct super_block *get_empty_super(void)
 513 {
 514         struct super_block *s;
 515
 516         for (s  = sb_entry(super_blocks.next);
 517              s != sb_entry(&super_blocks);
 518              s  = sb_entry(s->s_list.next)) {
 519                 if (s->s_dev)
 520                         continue;
 521                 if (!s->s_lock)
 522                         return s;
 523                 printk("VFS: empty superblock %p locked!\n", s);
 524         }
 525         /* Need a new one... */
 526         if (nr_super_blocks >= max_super_blocks)
 527                 return NULL;
 528         s = kmalloc(sizeof(struct super_block),  GFP_USER);
 529         if (s) {
 530                 nr_super_blocks++;
 531                 memset(s, 0, sizeof(struct super_block));
 532                 INIT_LIST_HEAD(&s->s_dirty);
 533                 list_add (&s->s_list, super_blocks.prev);
 534                 init_waitqueue_head(&s->s_wait);
 535                 INIT_LIST_HEAD(&s->s_files);
 536         }
 537         return s;
 538 }
 539
 540 static struct super_block * read_super(kdev_t dev,const char *name,int flags,
 541                                        void *data, int silent)
 542 {
 543         struct super_block * s;
 544         struct file_system_type *type;
 545
 546         if (!dev)
 547                 goto out_null;
 548         check_disk_change(dev);
 549         s = get_super(dev);
 550         if (s)
 551                 goto out;
 552
 553         type = get_fs_type(name);
 554         if (!type) {
 555                 printk("VFS: on device %s: get_fs_type(%s) failed\n",
 556                        kdevname(dev), name);
 557                 goto out;
 558         }
 559         s = get_empty_super();
 560         if (!s)
 561                 goto out;
 562         s->s_dev = dev;
 563         s->s_flags = flags;
 564         s->s_dirt = 0;
 565         sema_init(&s->s_vfs_rename_sem,1);
 566         /* N.B. Should lock superblock now ... */
 567         if (!type->read_super(s, data, silent))
 568                 goto out_fail;
 569         s->s_dev = dev; /* N.B. why do this again?? */
 570         s->s_rd_only = 0;
 571         s->s_type = type;
 572 out:
 573         return s;
 574
 575         /* N.B. s_dev should be cleared in type->read_super */
 576 out_fail:
 577         s->s_dev = 0;
 578 out_null:
 579         s = NULL;
 580         goto out;
 581 }
 582
 583 /*
 584  * Unnamed block devices are dummy devices used by virtual
 585  * filesystems which don't use real block-devices.  -- jrs
 586  */
 587
 588 static unsigned int unnamed_dev_in_use[256/(8*sizeof(unsigned int))] = { 0, };
 589
 590 kdev_t get_unnamed_dev(void)
 591 {
 592         int i;
 593
 594         for (i = 1; i < 256; i++) {
 595                 if (!test_and_set_bit(i,unnamed_dev_in_use))
 596                         return MKDEV(UNNAMED_MAJOR, i);
 597         }
 598         return 0;
 599 }
 600
 601 void put_unnamed_dev(kdev_t dev)
 602 {
 603         if (!dev || MAJOR(dev) != UNNAMED_MAJOR)
 604                 return;
 605         if (test_and_clear_bit(MINOR(dev), unnamed_dev_in_use))
 606                 return;
 607         printk("VFS: put_unnamed_dev: freeing unused device %s\n",
 608                         kdevname(dev));
 609 }
 610
 611 static int d_umount(struct super_block * sb)
 612 {
 613         struct dentry * root = sb->s_root;
 614         struct dentry * covered = root->d_covers;
 615
 616         if (root->d_count != 1)
 617                 return -EBUSY;
 618
 619         if (root->d_inode->i_state)
 620                 return -EBUSY;
 621
 622         sb->s_root = NULL;
 623
 624         if (covered != root) {
 625                 root->d_covers = root;
 626                 covered->d_mounts = covered;
 627                 dput(covered);
 628         }
 629         dput(root);
 630         return 0;
 631 }
 632
 633 static void d_mount(struct dentry *covered, struct dentry *dentry)
 634 {
 635         if (covered->d_mounts != covered) {
 636                 printk("VFS: mount - already mounted\n");
 637                 return;
 638         }
 639         covered->d_mounts = dentry;
 640         dentry->d_covers = covered;
 641 }
 642
 643 static int do_umount(kdev_t dev, int unmount_root, int flags)
 644 {
 645         struct super_block * sb;
 646         int retval;
 647
 648         retval = -ENOENT;
 649         sb = get_super(dev);
 650         if (!sb || !sb->s_root)
 651                 goto out;
 652
 653         /*
 654          * Before checking whether the filesystem is still busy,
 655          * make sure the kernel doesn't hold any quota files open
 656          * on the device. If the umount fails, too bad -- there
 657          * are no quotas running any more. Just turn them on again.
 658          */
 659         DQUOT_OFF(dev);
 660         acct_auto_close(dev);
 661
 662         /*
 663          * If we may have to abort operations to get out of this
 664          * mount, and they will themselves hold resources we must
 665          * allow the fs to do things. In the Unix tradition of
 666          * 'Gee thats tricky lets do it in userspace' the umount_begin
 667          * might fail to complete on the first run through as other tasks
 668          * must return, and the like. Thats for the mount program to worry
 669          * about for the moment.
 670          */
 671
 672         if( (flags&MNT_FORCE) && sb->s_op->umount_begin)
 673                 sb->s_op->umount_begin(sb);
 674
 675         /*
 676          * Shrink dcache, then fsync. This guarantees that if the
 677          * filesystem is quiescent at this point, then (a) only the
 678          * root entry should be in use and (b) that root entry is
 679          * clean.
 680          */
 681         shrink_dcache_sb(sb);
 682         fsync_dev(dev);
 683
 684         if (dev==ROOT_DEV && !unmount_root) {
 685                 /*
 686                  * Special case for "unmounting" root ...
 687                  * we just try to remount it readonly.
 688                  */
 689                 retval = 0;
 690                 if (!(sb->s_flags & MS_RDONLY))
 691                         retval = do_remount_sb(sb, MS_RDONLY, 0);
 692                 return retval;
 693         }
 694
 695         retval = d_umount(sb);
 696         if (retval)
 697                 goto out;
 698
 699         if (sb->s_op) {
 700                 if (sb->s_op->write_super && sb->s_dirt)
 701                         sb->s_op->write_super(sb);
 702         }
 703
 704         lock_super(sb);
 705         if (sb->s_op) {
 706                 if (sb->s_op->put_super)
 707                         sb->s_op->put_super(sb);
 708         }
 709
 710         /* Forget any remaining inodes */
 711         if (invalidate_inodes(sb)) {
 712                 printk("VFS: Busy inodes after unmount. "
 713                         "Self-destruct in 5 seconds.  Have a nice day...\n");
 714         }
 715
 716         sb->s_dev = 0;          /* Free the superblock */
 717         unlock_super(sb);
 718
 719         remove_vfsmnt(dev);
 720 out:
 721         return retval;
 722 }
 723
 724 static int umount_dev(kdev_t dev, int flags)
 725 {
 726         int retval;
 727         struct inode * inode = get_empty_inode();
 728
 729         retval = -ENOMEM;
 730         if (!inode)
 731                 goto out;
 732
 733         inode->i_rdev = dev;
 734         retval = -ENXIO;
 735         if (MAJOR(dev) >= MAX_BLKDEV)
 736                 goto out_iput;
 737
 738         fsync_dev(dev);
 739
 740         down(&mount_sem);
 741
 742         retval = do_umount(dev, 0, flags);
 743         if (!retval) {
 744                 fsync_dev(dev);
 745                 if (dev != ROOT_DEV) {
 746                         blkdev_release(inode);
 747                         put_unnamed_dev(dev);
 748                 }
 749         }
 750
 751         up(&mount_sem);
 752 out_iput:
 753         iput(inode);
 754 out:
 755         return retval;
 756 }
 757
 758 /*
 759  * Now umount can handle mount points as well as block devices.
 760  * This is important for filesystems which use unnamed block devices.
 761  *
 762  * There is a little kludge here with the dummy_inode.  The current
 763  * vfs release functions only use the r_dev field in the inode so
 764  * we give them the info they need without using a real inode.
 765  * If any other fields are ever needed by any block device release
 766  * functions, they should be faked here.  -- jrs
 767  *
 768  * We now support a flag for forced unmount like the other 'big iron'
 769  * unixes. Our API is identical to OSF/1 to avoid making a mess of AMD
 770  */
 771
 772 asmlinkage long sys_umount(char * name, int flags)
 773 {
 774         struct dentry * dentry;
 775         int retval;
 776
 777         if (!capable(CAP_SYS_ADMIN))
 778                 return -EPERM;
 779
 780         lock_kernel();
 781         dentry = namei(name);
 782         retval = PTR_ERR(dentry);
 783         if (!IS_ERR(dentry)) {
 784                 struct inode * inode = dentry->d_inode;
 785                 kdev_t dev = inode->i_rdev;
 786
 787                 retval = 0;
 788                 if (S_ISBLK(inode->i_mode)) {
 789                         if (IS_NODEV(inode))
 790                                 retval = -EACCES;
 791                 } else {
 792                         struct super_block *sb = inode->i_sb;
 793                         retval = -EINVAL;
 794                         if (sb && inode == sb->s_root->d_inode) {
 795                                 dev = sb->s_dev;
 796                                 retval = 0;
 797                         }
 798                 }
 799                 dput(dentry);
 800
 801                 if (!retval)
 802                         retval = umount_dev(dev, flags);
 803         }
 804         unlock_kernel();
 805         return retval;
 806 }
 807
 808 /*
 809  *      The 2.0 compatible umount. No flags.
 810  */
 811
 812 asmlinkage long sys_oldumount(char * name)
 813 {
 814         return sys_umount(name,0);
 815 }
 816
 817 /*
 818  * Check whether we can mount the specified device.
 819  */
 820 int fs_may_mount(kdev_t dev)
 821 {
 822         struct super_block * sb = get_super(dev);
 823         int busy;
 824
 825         busy = sb && sb->s_root &&
 826                (sb->s_root->d_count != 1 || sb->s_root->d_covers != sb->s_root);
 827         return !busy;
 828 }
 829
 830 /*
 831  * do_mount() does the actual mounting after sys_mount has done the ugly
 832  * parameter parsing. When enough time has gone by, and everything uses the
 833  * new mount() parameters, sys_mount() can then be cleaned up.
 834  *
 835  * We cannot mount a filesystem if it has active, used, or dirty inodes.
 836  * We also have to flush all inode-data for this device, as the new mount
 837  * might need new info.
 838  *
 839  * [21-Mar-97] T.Schoebel-Theuer: Now this can be overridden when
 840  * supplying a leading "!" before the dir_name, allowing "stacks" of
 841  * mounted filesystems. The stacking will only influence any pathname lookups
 842  * _after_ the mount, but open file descriptors or working directories that
 843  * are now covered remain valid. For example, when you overmount /home, any
 844  * process with old cwd /home/joe will continue to use the old versions,
 845  * as long as relative paths are used, but absolute paths like /home/joe/xxx
 846  * will go to the new "top of stack" version. In general, crossing a
 847  * mount point will always go to the top of stack element.
 848  * Anyone using this new feature must know what he/she is doing.
 849  */
 850
 851 int do_mount(kdev_t dev, const char * dev_name, const char * dir_name,
 852              const char * type, int flags, void * data)
 853 {
 854         struct dentry * dir_d;
 855         struct super_block * sb;
 856         struct vfsmount *vfsmnt;
 857         int error;
 858
 859         error = -EACCES;
 860         if (!(flags & MS_RDONLY) && dev && is_read_only(dev))
 861                 goto out;
 862
 863         /*
 864          * Do the lookup first to force automounting.
 865          */
 866         dir_d = namei(dir_name);
 867         error = PTR_ERR(dir_d);
 868         if (IS_ERR(dir_d))
 869                 goto out;
 870
 871         down(&mount_sem);
 872         error = -ENOTDIR;
 873         if (!S_ISDIR(dir_d->d_inode->i_mode))
 874                 goto dput_and_out;
 875
 876         error = -EBUSY;
 877         if (dir_d->d_covers != dir_d)
 878                 goto dput_and_out;
 879
 880         /*
 881          * Note: If the superblock already exists,
 882          * read_super just does a get_super().
 883          */
 884         error = -EINVAL;
 885         sb = read_super(dev, type, flags, data, 0);
 886         if (!sb)
 887                 goto dput_and_out;
 888
 889         /*
 890          * We may have slept while reading the super block,
 891          * so we check afterwards whether it's safe to mount.
 892          */
 893         error = -EBUSY;
 894         if (!fs_may_mount(dev))
 895                 goto dput_and_out;
 896
 897         error = -ENOMEM;
 898         vfsmnt = add_vfsmnt(sb, dev_name, dir_name);
 899         if (vfsmnt) {
 900                 d_mount(dget(dir_d), sb->s_root);
 901                 error = 0;
 902         }
 903
 904 dput_and_out:
 905         dput(dir_d);
 906         up(&mount_sem);
 907 out:
 908         return error;
 909 }
 910
 911
 912 /*
 913  * Alters the mount flags of a mounted file system. Only the mount point
 914  * is used as a reference - file system type and the device are ignored.
 915  * FS-specific mount options can't be altered by remounting.
 916  */
 917
 918 static int do_remount_sb(struct super_block *sb, int flags, char *data)
 919 {
 920         int retval;
 921         struct vfsmount *vfsmnt;
 922
 923         if (!(flags & MS_RDONLY) && sb->s_dev && is_read_only(sb->s_dev))
 924                 return -EACCES;
 925                 /*flags |= MS_RDONLY;*/
 926         /* If we are remounting RDONLY, make sure there are no rw files open */
 927         if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY))
 928                 if (!fs_may_remount_ro(sb))
 929                         return -EBUSY;
 930         if (sb->s_op && sb->s_op->remount_fs) {
 931                 retval = sb->s_op->remount_fs(sb, &flags, data);
 932                 if (retval)
 933                         return retval;
 934         }
 935         sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
 936         vfsmnt = lookup_vfsmnt(sb->s_dev);
 937         if (vfsmnt)
 938                 vfsmnt->mnt_flags = sb->s_flags;
 939
 940         /*
 941          * Invalidate the inodes, as some mount options may be changed.
 942          * N.B. If we are changing media, we should check the return
 943          * from invalidate_inodes ... can't allow _any_ open files.
 944          */
 945         invalidate_inodes(sb);
 946
 947         return 0;
 948 }
 949
 950 static int do_remount(const char *dir,int flags,char *data)
 951 {
 952         struct dentry *dentry;
 953         int retval;
 954
 955         dentry = namei(dir);
 956         retval = PTR_ERR(dentry);
 957         if (!IS_ERR(dentry)) {
 958                 struct super_block * sb = dentry->d_inode->i_sb;
 959
 960                 retval = -ENODEV;
 961                 if (sb) {
 962                         retval = -EINVAL;
 963                         if (dentry == sb->s_root) {
 964                                 /*
 965                                  * Shrink the dcache and sync the device.
 966                                  */
 967                                 shrink_dcache_sb(sb);
 968                                 fsync_dev(sb->s_dev);
 969                                 if (flags & MS_RDONLY)
 970                                         acct_auto_close(sb->s_dev);
 971                                 retval = do_remount_sb(sb, flags, data);
 972                         }
 973                 }
 974                 dput(dentry);
 975         }
 976         return retval;
 977 }
 978
 979 static int copy_mount_options (const void * data, unsigned long *where)
 980 {
 981         int i;
 982         unsigned long page;
 983         struct vm_area_struct * vma;
 984
 985         *where = 0;
 986         if (!data)
 987                 return 0;
 988
 989         vma = find_vma(current->mm, (unsigned long) data);
 990         if (!vma || (unsigned long) data < vma->vm_start)
 991                 return -EFAULT;
 992         if (!(vma->vm_flags & VM_READ))
 993                 return -EFAULT;
 994         i = vma->vm_end - (unsigned long) data;
 995         if (PAGE_SIZE <= (unsigned long) i)
 996                 i = PAGE_SIZE-1;
 997         if (!(page = __get_free_page(GFP_KERNEL))) {
 998                 return -ENOMEM;
 999         }
1000         if (copy_from_user((void *) page,data,i)) {
1001                 free_page(page);
1002                 return -EFAULT;
1003         }
1004         *where = page;
1005         return 0;
1006 }
1007
1008 /*
1009  * Flags is a 16-bit value that allows up to 16 non-fs dependent flags to
1010  * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
1011  *
1012  * data is a (void *) that can point to any structure up to
1013  * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
1014  * information (or be NULL).
1015  *
1016  * NOTE! As old versions of mount() didn't use this setup, the flags
1017  * have to have a special 16-bit magic number in the high word:
1018  * 0xC0ED. If this magic word isn't present, the flags and data info
1019  * aren't used, as the syscall assumes we are talking to an older
1020  * version that didn't understand them.
1021  */
1022 asmlinkage long sys_mount(char * dev_name, char * dir_name, char * type,
1023                           unsigned long new_flags, void * data)
1024 {
1025         struct file_system_type * fstype;
1026         struct dentry * dentry = NULL;
1027         struct inode * inode = NULL;
1028         kdev_t dev;
1029         int retval;
1030         unsigned long flags = 0;
1031         unsigned long page = 0;
1032         struct file dummy;      /* allows read-write or read-only flag */
1033
1034         if (!capable(CAP_SYS_ADMIN))
1035                 return -EPERM;
1036         lock_kernel();
1037         if ((new_flags &
1038              (MS_MGC_MSK | MS_REMOUNT)) == (MS_MGC_VAL | MS_REMOUNT)) {
1039                 retval = copy_mount_options (data, &page);
1040                 if (retval < 0)
1041                         goto out;
1042                 retval = do_remount(dir_name,
1043                                     new_flags & ~MS_MGC_MSK & ~MS_REMOUNT,
1044                                     (char *) page);
1045                 free_page(page);
1046                 goto out;
1047         }
1048
1049         retval = copy_mount_options (type, &page);
1050         if (retval < 0)
1051                 goto out;
1052         fstype = get_fs_type((char *) page);
1053         free_page(page);
1054         retval = -ENODEV;
1055         if (!fstype)
1056                 goto out;
1057
1058         memset(&dummy, 0, sizeof(dummy));
1059         if (fstype->fs_flags & FS_REQUIRES_DEV) {
1060                 dentry = namei(dev_name);
1061                 retval = PTR_ERR(dentry);
1062                 if (IS_ERR(dentry))
1063                         goto out;
1064
1065                 inode = dentry->d_inode;
1066                 retval = -ENOTBLK;
1067                 if (!S_ISBLK(inode->i_mode))
1068                         goto dput_and_out;
1069
1070                 retval = -EACCES;
1071                 if (IS_NODEV(inode))
1072                         goto dput_and_out;
1073
1074                 dev = inode->i_rdev;
1075                 retval = -ENXIO;
1076                 if (MAJOR(dev) >= MAX_BLKDEV)
1077                         goto dput_and_out;
1078
1079                 retval = -ENOTBLK;
1080                 dummy.f_op = get_blkfops(MAJOR(dev));
1081                 if (!dummy.f_op)
1082                         goto dput_and_out;
1083
1084                 if (dummy.f_op->open) {
1085                         dummy.f_dentry = dentry;
1086                         dummy.f_mode = (new_flags & MS_RDONLY) ? 1 : 3;
1087                         retval = dummy.f_op->open(inode, &dummy);
1088                         if (retval)
1089                                 goto dput_and_out;
1090                 }
1091
1092         } else {
1093                 retval = -EMFILE;
1094                 if (!(dev = get_unnamed_dev()))
1095                         goto out;
1096         }
1097
1098         page = 0;
1099         if ((new_flags & MS_MGC_MSK) == MS_MGC_VAL) {
1100                 flags = new_flags & ~MS_MGC_MSK;
1101                 retval = copy_mount_options(data, &page);
1102                 if (retval < 0)
1103                         goto clean_up;
1104         }
1105         retval = do_mount(dev, dev_name, dir_name, fstype->name, flags,
1106                                 (void *) page);
1107         free_page(page);
1108         if (retval)
1109                 goto clean_up;
1110
1111 dput_and_out:
1112         dput(dentry);
1113 out:
1114         unlock_kernel();
1115         return retval;
1116
1117 clean_up:
1118         if (dummy.f_op) {
1119                 if (dummy.f_op->release)
1120                         dummy.f_op->release(inode, NULL);
1121         } else
1122                 put_unnamed_dev(dev);
1123         goto dput_and_out;
1124 }
1125
1126 void __init mount_root(void)
1127 {
1128         struct file_system_type * fs_type;
1129         struct super_block * sb;
1130         struct vfsmount *vfsmnt;
1131         struct inode * d_inode = NULL;
1132         struct file filp;
1133         int retval;
1134
1135 #ifdef CONFIG_ROOT_NFS
1136         if (MAJOR(ROOT_DEV) == UNNAMED_MAJOR) {
1137                 ROOT_DEV = 0;
1138                 if ((fs_type = get_fs_type("nfs"))) {
1139                         sb = get_empty_super(); /* "can't fail" */
1140                         sb->s_dev = get_unnamed_dev();
1141                         sb->s_flags = root_mountflags;
1142                         sema_init(&sb->s_vfs_rename_sem,1);
1143                         vfsmnt = add_vfsmnt(sb, "/dev/root", "/");
1144                         if (vfsmnt) {
1145                                 if (nfs_root_mount(sb) >= 0) {
1146                                         sb->s_dirt = 0;
1147                                         sb->s_type = fs_type;
1148                                         current->fs->root = dget(sb->s_root);
1149                                         current->fs->pwd = dget(sb->s_root);
1150                                         ROOT_DEV = sb->s_dev;
1151                                         printk (KERN_NOTICE "VFS: Mounted root (NFS filesystem)%s.\n", (sb->s_flags & MS_RDONLY) ? " readonly" : "");
1152                                         return;
1153                                 }
1154                                 remove_vfsmnt(sb->s_dev);
1155                         }
1156                         put_unnamed_dev(sb->s_dev);
1157                         sb->s_dev = 0;
1158                 }
1159                 if (!ROOT_DEV) {
1160                         printk(KERN_ERR "VFS: Unable to mount root fs via NFS, trying floppy.\n");
1161                         ROOT_DEV = MKDEV(FLOPPY_MAJOR, 0);
1162                 }
1163         }
1164 #endif
1165
1166 #ifdef CONFIG_BLK_DEV_FD
1167         if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) {
1168 #ifdef CONFIG_BLK_DEV_RAM
1169                 extern int rd_doload;
1170                 extern void rd_load_secondary(void);
1171 #endif
1172                 floppy_eject();
1173 #ifndef CONFIG_BLK_DEV_RAM
1174                 printk(KERN_NOTICE "(Warning, this kernel has no ramdisk support)\n");
1175 #else
1176                 /* rd_doload is 2 for a dual initrd/ramload setup */
1177                 if(rd_doload==2)
1178                         rd_load_secondary();
1179                 else
1180 #endif
1181                 {
1182                         printk(KERN_NOTICE "VFS: Insert root floppy and press ENTER\n");
1183                         wait_for_keypress();
1184                 }
1185         }
1186 #endif
1187
1188         memset(&filp, 0, sizeof(filp));
1189         d_inode = get_empty_inode();
1190         if (!d_inode)
1191                 panic(__FUNCTION__ ": unable to allocate root inode");
1192         d_inode->i_rdev = ROOT_DEV;
1193         filp.f_dentry = NULL;
1194         if ( root_mountflags & MS_RDONLY)
1195                 filp.f_mode = 1; /* read only */
1196         else
1197                 filp.f_mode = 3; /* read write */
1198         retval = blkdev_open(d_inode, &filp);
1199         if (retval == -EROFS) {
1200                 root_mountflags |= MS_RDONLY;
1201                 filp.f_mode = 1;
1202                 retval = blkdev_open(d_inode, &filp);
1203         }
1204         iput(d_inode);
1205         if (retval)
1206                 /*
1207                  * Allow the user to distinguish between failed open
1208                  * and bad superblock on root device.
1209                  */
1210                 printk("VFS: Cannot open root device %s\n",
1211                        kdevname(ROOT_DEV));
1212         else for (fs_type = file_systems ; fs_type ; fs_type = fs_type->next) {
1213                 if (!(fs_type->fs_flags & FS_REQUIRES_DEV))
1214                         continue;
1215                 sb = read_super(ROOT_DEV,fs_type->name,root_mountflags,NULL,1);
1216                 if (sb) {
1217                         sb->s_flags = root_mountflags;
1218                         current->fs->root = dget(sb->s_root);
1219                         current->fs->pwd = dget(sb->s_root);
1220                         printk ("VFS: Mounted root (%s filesystem)%s.\n",
1221                                 fs_type->name,
1222                                 (sb->s_flags & MS_RDONLY) ? " readonly" : "");
1223                         vfsmnt = add_vfsmnt(sb, "/dev/root", "/");
1224                         if (vfsmnt)
1225                                 return;
1226                         panic("VFS: add_vfsmnt failed for root fs");
1227                 }
1228         }
1229         panic("VFS: Unable to mount root fs on %s",
1230                 kdevname(ROOT_DEV));
1231 }
1232
1233
1234 #ifdef CONFIG_BLK_DEV_INITRD
1235
1236 int __init change_root(kdev_t new_root_dev,const char *put_old)
1237 {
1238         kdev_t old_root_dev;
1239         struct vfsmount *vfsmnt;
1240         struct dentry *old_root,*old_pwd,*dir_d = NULL;
1241         int error;
1242
1243         old_root = current->fs->root;
1244         old_pwd = current->fs->pwd;
1245         old_root_dev = ROOT_DEV;
1246         if (!fs_may_mount(new_root_dev)) {
1247                 printk(KERN_CRIT "New root is busy. Staying in initrd.\n");
1248                 return -EBUSY;
1249         }
1250         ROOT_DEV = new_root_dev;
1251         mount_root();
1252         dput(old_root);
1253         dput(old_pwd);
1254 #if 1
1255         shrink_dcache();
1256         printk("change_root: old root has d_count=%d\n", old_root->d_count);
1257 #endif
1258         /*
1259          * Get the new mount directory
1260          */
1261         dir_d = lookup_dentry(put_old, NULL, 1);
1262         if (IS_ERR(dir_d)) {
1263                 error = PTR_ERR(dir_d);
1264         } else if (!dir_d->d_inode) {
1265                 dput(dir_d);
1266                 error = -ENOENT;
1267         } else {
1268                 error = 0;
1269         }
1270         if (!error && dir_d->d_covers != dir_d) {
1271                 dput(dir_d);
1272                 error = -EBUSY;
1273         }
1274         if (!error && !S_ISDIR(dir_d->d_inode->i_mode)) {
1275                 dput(dir_d);
1276                 error = -ENOTDIR;
1277         }
1278         if (error) {
1279                 int umount_error;
1280
1281                 printk(KERN_NOTICE "Trying to unmount old root ... ");
1282                 umount_error = do_umount(old_root_dev,1, 0);
1283                 if (!umount_error) {
1284                         printk("okay\n");
1285                         invalidate_buffers(old_root_dev);
1286                         return 0;
1287                 }
1288                 printk(KERN_ERR "error %d\n",umount_error);
1289                 return error;
1290         }
1291         remove_vfsmnt(old_root_dev);
1292         vfsmnt = add_vfsmnt(old_root->d_sb, "/dev/root.old", put_old);
1293         if (vfsmnt) {
1294                 d_mount(dir_d,old_root);
1295                 return 0;
1296         }
1297         printk(KERN_CRIT "Trouble: add_vfsmnt failed\n");
1298         return -ENOMEM;
1299 }
1300
1301 #endif