fs/super.c

   1 /*
   2  *  linux/fs/super.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  *
   6  *  super.c contains code to handle: - mount structures
   7  *                                   - super-block tables.
   8  *                                   - mount system call
   9  *                                   - umount system call
  10  *
  11  *  Added options to /proc/mounts
  12  *  Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996.
  13  *
  14  * GK 2/5/95  -  Changed to support mounting the root fs via NFS
  15  *
  16  *  Added kerneld support: Jacques Gelinas and Bjorn Ekwall
  17  *  Added change_root: Werner Almesberger & Hans Lermen, Feb '96
  18  */
  19
  20 #include <linux/config.h>
  21 #include <linux/sched.h>
  22 #include <linux/kernel.h>
  23 #include <linux/mount.h>
  24 #include <linux/malloc.h>
  25 #include <linux/major.h>
  26 #include <linux/stat.h>
  27 #include <linux/errno.h>
  28 #include <linux/string.h>
  29 #include <linux/locks.h>
  30 #include <linux/mm.h>
  31 #include <linux/smp.h>
  32 #include <linux/smp_lock.h>
  33 #include <linux/fd.h>
  34 #include <linux/init.h>
  35 #include <linux/quotaops.h>
  36
  37 #include <asm/system.h>
  38 #include <asm/uaccess.h>
  39 #include <asm/bitops.h>
  40
  41 #include <linux/nfs_fs.h>
  42 #include <linux/nfs_fs_sb.h>
  43 #include <linux/nfs_mount.h>
  44
  45 #ifdef CONFIG_KMOD
  46 #include <linux/kmod.h>
  47 #endif
  48
  49 /*
  50  * We use a semaphore to synchronize all mount/umount
  51  * activity - imagine the mess if we have a race between
  52  * unmounting a filesystem and re-mounting it (or something
  53  * else).
  54  */
  55 static struct semaphore mount_sem = MUTEX;
  56
  57 #ifdef CONFIG_BSD_PROCESS_ACCT
  58 extern void acct_auto_close(kdev_t);
  59 #endif
  60
  61 extern void wait_for_keypress(void);
  62 extern struct file_operations * get_blkfops(unsigned int major);
  63
  64 extern int root_mountflags;
  65
  66 static int do_remount_sb(struct super_block *sb, int flags, char * data);
  67
  68 /* this is initialized in init/main.c */
  69 kdev_t ROOT_DEV;
  70
  71 int nr_super_blocks = 0;
  72 int max_super_blocks = NR_SUPER;
  73 LIST_HEAD(super_blocks);
  74
  75 static struct file_system_type *file_systems = (struct file_system_type *) NULL;
  76 struct vfsmount *vfsmntlist = (struct vfsmount *) NULL;
  77 static struct vfsmount *vfsmnttail = (struct vfsmount *) NULL,
  78                        *mru_vfsmnt = (struct vfsmount *) NULL;
  79
  80 /*
  81  * This part handles the management of the list of mounted filesystems.
  82  */
  83 struct vfsmount *lookup_vfsmnt(kdev_t dev)
  84 {
  85         struct vfsmount *lptr;
  86
  87         if (vfsmntlist == (struct vfsmount *)NULL)
  88                 return ((struct vfsmount *)NULL);
  89
  90         if (mru_vfsmnt != (struct vfsmount *)NULL &&
  91             mru_vfsmnt->mnt_dev == dev)
  92                 return (mru_vfsmnt);
  93
  94         for (lptr = vfsmntlist;
  95              lptr != (struct vfsmount *)NULL;
  96              lptr = lptr->mnt_next)
  97                 if (lptr->mnt_dev == dev) {
  98                         mru_vfsmnt = lptr;
  99                         return (lptr);
 100                 }
 101
 102         return ((struct vfsmount *)NULL);
 103         /* NOTREACHED */
 104 }
 105
 106 static struct vfsmount *add_vfsmnt(struct super_block *sb,
 107                         const char *dev_name, const char *dir_name)
 108 {
 109         struct vfsmount *lptr;
 110         char *tmp, *name;
 111
 112         lptr = (struct vfsmount *)kmalloc(sizeof(struct vfsmount), GFP_KERNEL);
 113         if (!lptr)
 114                 goto out;
 115         memset(lptr, 0, sizeof(struct vfsmount));
 116
 117         lptr->mnt_sb = sb;
 118         lptr->mnt_dev = sb->s_dev;
 119         lptr->mnt_flags = sb->s_flags;
 120
 121         sema_init(&lptr->mnt_dquot.semaphore, 1);
 122         lptr->mnt_dquot.flags = 0;
 123
 124         /* N.B. Is it really OK to have a vfsmount without names? */
 125         if (dev_name && !IS_ERR(tmp = getname(dev_name))) {
 126                 name = (char *) kmalloc(strlen(tmp)+1, GFP_KERNEL);
 127                 if (name) {
 128                         strcpy(name, tmp);
 129                         lptr->mnt_devname = name;
 130                 }
 131                 putname(tmp);
 132         }
 133         if (dir_name && !IS_ERR(tmp = getname(dir_name))) {
 134                 name = (char *) kmalloc(strlen(tmp)+1, GFP_KERNEL);
 135                 if (name) {
 136                         strcpy(name, tmp);
 137                         lptr->mnt_dirname = name;
 138                 }
 139                 putname(tmp);
 140         }
 141
 142         if (vfsmntlist == (struct vfsmount *)NULL) {
 143                 vfsmntlist = vfsmnttail = lptr;
 144         } else {
 145                 vfsmnttail->mnt_next = lptr;
 146                 vfsmnttail = lptr;
 147         }
 148 out:
 149         return lptr;
 150 }
 151
 152 static void remove_vfsmnt(kdev_t dev)
 153 {
 154         struct vfsmount *lptr, *tofree;
 155
 156         if (vfsmntlist == (struct vfsmount *)NULL)
 157                 return;
 158         lptr = vfsmntlist;
 159         if (lptr->mnt_dev == dev) {
 160                 tofree = lptr;
 161                 vfsmntlist = lptr->mnt_next;
 162                 if (vfsmnttail->mnt_dev == dev)
 163                         vfsmnttail = vfsmntlist;
 164         } else {
 165                 while (lptr->mnt_next != (struct vfsmount *)NULL) {
 166                         if (lptr->mnt_next->mnt_dev == dev)
 167                                 break;
 168                         lptr = lptr->mnt_next;
 169                 }
 170                 tofree = lptr->mnt_next;
 171                 if (tofree == (struct vfsmount *)NULL)
 172                         return;
 173                 lptr->mnt_next = lptr->mnt_next->mnt_next;
 174                 if (vfsmnttail->mnt_dev == dev)
 175                         vfsmnttail = lptr;
 176         }
 177         if (tofree == mru_vfsmnt)
 178                 mru_vfsmnt = NULL;
 179         kfree(tofree->mnt_devname);
 180         kfree(tofree->mnt_dirname);
 181         kfree_s(tofree, sizeof(struct vfsmount));
 182 }
 183
 184 int register_filesystem(struct file_system_type * fs)
 185 {
 186         struct file_system_type ** tmp;
 187
 188         if (!fs)
 189                 return -EINVAL;
 190         if (fs->next)
 191                 return -EBUSY;
 192         tmp = &file_systems;
 193         while (*tmp) {
 194                 if (strcmp((*tmp)->name, fs->name) == 0)
 195                         return -EBUSY;
 196                 tmp = &(*tmp)->next;
 197         }
 198         *tmp = fs;
 199         return 0;
 200 }
 201
 202 #ifdef CONFIG_MODULES
 203 int unregister_filesystem(struct file_system_type * fs)
 204 {
 205         struct file_system_type ** tmp;
 206
 207         tmp = &file_systems;
 208         while (*tmp) {
 209                 if (fs == *tmp) {
 210                         *tmp = fs->next;
 211                         fs->next = NULL;
 212                         return 0;
 213                 }
 214                 tmp = &(*tmp)->next;
 215         }
 216         return -EINVAL;
 217 }
 218 #endif
 219
 220 static int fs_index(const char * __name)
 221 {
 222         struct file_system_type * tmp;
 223         char * name;
 224         int err, index;
 225
 226         name = getname(__name);
 227         err = PTR_ERR(name);
 228         if (IS_ERR(name))
 229                 return err;
 230
 231         index = 0;
 232         for (tmp = file_systems ; tmp ; tmp = tmp->next) {
 233                 if (strcmp(tmp->name, name) == 0) {
 234                         putname(name);
 235                         return index;
 236                 }
 237                 index++;
 238         }
 239         putname(name);
 240         return -EINVAL;
 241 }
 242
 243 static int fs_name(unsigned int index, char * buf)
 244 {
 245         struct file_system_type * tmp;
 246         int len;
 247
 248         tmp = file_systems;
 249         while (tmp && index > 0) {
 250                 tmp = tmp->next;
 251                 index--;
 252         }
 253         if (!tmp)
 254                 return -EINVAL;
 255         len = strlen(tmp->name) + 1;
 256         return copy_to_user(buf, tmp->name, len) ? -EFAULT : 0;
 257 }
 258
 259 static int fs_maxindex(void)
 260 {
 261         struct file_system_type * tmp;
 262         int index;
 263
 264         index = 0;
 265         for (tmp = file_systems ; tmp ; tmp = tmp->next)
 266                 index++;
 267         return index;
 268 }
 269
 270 /*
 271  * Whee.. Weird sysv syscall.
 272  */
 273 asmlinkage int sys_sysfs(int option, unsigned long arg1, unsigned long arg2)
 274 {
 275         int retval = -EINVAL;
 276
 277         lock_kernel();
 278         switch (option) {
 279                 case 1:
 280                         retval = fs_index((const char *) arg1);
 281                         break;
 282
 283                 case 2:
 284                         retval = fs_name(arg1, (char *) arg2);
 285                         break;
 286
 287                 case 3:
 288                         retval = fs_maxindex();
 289                         break;
 290         }
 291         unlock_kernel();
 292         return retval;
 293 }
 294
 295 static struct proc_fs_info {
 296         int flag;
 297         char *str;
 298 } fs_info[] = {
 299         { MS_NOEXEC, ",noexec" },
 300         { MS_NOSUID, ",nosuid" },
 301         { MS_NODEV, ",nodev" },
 302         { MS_SYNCHRONOUS, ",sync" },
 303         { MS_MANDLOCK, ",mand" },
 304         { MS_NOATIME, ",noatime" },
 305         { MS_NODIRATIME, ",nodiratime" },
 306 #ifdef MS_NOSUB                 /* Can't find this except in mount.c */
 307         { MS_NOSUB, ",nosub" },
 308 #endif
 309         { 0, NULL }
 310 };
 311
 312 static struct proc_nfs_info {
 313         int flag;
 314         char *str;
 315 } nfs_info[] = {
 316         { NFS_MOUNT_SOFT, ",soft" },
 317         { NFS_MOUNT_INTR, ",intr" },
 318         { NFS_MOUNT_POSIX, ",posix" },
 319         { NFS_MOUNT_NOCTO, ",nocto" },
 320         { NFS_MOUNT_NOAC, ",noac" },
 321         { 0, NULL }
 322 };
 323
 324 int get_filesystem_info( char *buf )
 325 {
 326         struct vfsmount *tmp = vfsmntlist;
 327         struct proc_fs_info *fs_infop;
 328         struct proc_nfs_info *nfs_infop;
 329         struct nfs_server *nfss;
 330         int len = 0;
 331
 332         while ( tmp && len < PAGE_SIZE - 160)
 333         {
 334                 len += sprintf( buf + len, "%s %s %s %s",
 335                         tmp->mnt_devname, tmp->mnt_dirname, tmp->mnt_sb->s_type->name,
 336                         tmp->mnt_flags & MS_RDONLY ? "ro" : "rw" );
 337                 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
 338                   if (tmp->mnt_flags & fs_infop->flag) {
 339                     strcpy(buf + len, fs_infop->str);
 340                     len += strlen(fs_infop->str);
 341                   }
 342                 }
 343                 if (!strcmp("nfs", tmp->mnt_sb->s_type->name)) {
 344                         nfss = &tmp->mnt_sb->u.nfs_sb.s_server;
 345                         if (nfss->rsize != NFS_DEF_FILE_IO_BUFFER_SIZE) {
 346                                 len += sprintf(buf+len, ",rsize=%d",
 347                                                nfss->rsize);
 348                         }
 349                         if (nfss->wsize != NFS_DEF_FILE_IO_BUFFER_SIZE) {
 350                                 len += sprintf(buf+len, ",wsize=%d",
 351                                                nfss->wsize);
 352                         }
 353 #if 0
 354                         if (nfss->timeo != 7*HZ/10) {
 355                                 len += sprintf(buf+len, ",timeo=%d",
 356                                                nfss->timeo*10/HZ);
 357                         }
 358                         if (nfss->retrans != 3) {
 359                                 len += sprintf(buf+len, ",retrans=%d",
 360                                                nfss->retrans);
 361                         }
 362 #endif
 363                         if (nfss->acregmin != 3*HZ) {
 364                                 len += sprintf(buf+len, ",acregmin=%d",
 365                                                nfss->acregmin/HZ);
 366                         }
 367                         if (nfss->acregmax != 60*HZ) {
 368                                 len += sprintf(buf+len, ",acregmax=%d",
 369                                                nfss->acregmax/HZ);
 370                         }
 371                         if (nfss->acdirmin != 30*HZ) {
 372                                 len += sprintf(buf+len, ",acdirmin=%d",
 373                                                nfss->acdirmin/HZ);
 374                         }
 375                         if (nfss->acdirmax != 60*HZ) {
 376                                 len += sprintf(buf+len, ",acdirmax=%d",
 377                                                nfss->acdirmax/HZ);
 378                         }
 379                         for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
 380                                 if (nfss->flags & nfs_infop->flag) {
 381                                         strcpy(buf + len, nfs_infop->str);
 382                                         len += strlen(nfs_infop->str);
 383                                 }
 384                         }
 385                         len += sprintf(buf+len, ",addr=%s",
 386                                        nfss->hostname);
 387                 }
 388                 len += sprintf( buf + len, " 0 0\n" );
 389                 tmp = tmp->mnt_next;
 390         }
 391
 392         return len;
 393 }
 394
 395 int get_filesystem_list(char * buf)
 396 {
 397         int len = 0;
 398         struct file_system_type * tmp;
 399
 400         tmp = file_systems;
 401         while (tmp && len < PAGE_SIZE - 80) {
 402                 len += sprintf(buf+len, "%s\t%s\n",
 403                         (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
 404                         tmp->name);
 405                 tmp = tmp->next;
 406         }
 407         return len;
 408 }
 409
 410 struct file_system_type *get_fs_type(const char *name)
 411 {
 412         struct file_system_type * fs = file_systems;
 413
 414         if (!name)
 415                 return fs;
 416         for (fs = file_systems; fs && strcmp(fs->name, name); fs = fs->next)
 417                 ;
 418 #ifdef CONFIG_KMOD
 419         if (!fs && (request_module(name) == 0)) {
 420                 for (fs = file_systems; fs && strcmp(fs->name, name); fs = fs->next)
 421                         ;
 422         }
 423 #endif
 424
 425         return fs;
 426 }
 427
 428 void __wait_on_super(struct super_block * sb)
 429 {
 430         struct wait_queue wait = { current, NULL };
 431
 432         add_wait_queue(&sb->s_wait, &wait);
 433 repeat:
 434         current->state = TASK_UNINTERRUPTIBLE;
 435         if (sb->s_lock) {
 436                 schedule();
 437                 goto repeat;
 438         }
 439         remove_wait_queue(&sb->s_wait, &wait);
 440         current->state = TASK_RUNNING;
 441 }
 442
 443 /*
 444  * Note: check the dirty flag before waiting, so we don't
 445  * hold up the sync while mounting a device. (The newly
 446  * mounted device won't need syncing.)
 447  */
 448 void sync_supers(kdev_t dev)
 449 {
 450         struct super_block * sb;
 451
 452         for (sb = sb_entry(super_blocks.next);
 453              sb != sb_entry(&super_blocks);
 454              sb = sb_entry(sb->s_list.next)) {
 455                 if (!sb->s_dev)
 456                         continue;
 457                 if (dev && sb->s_dev != dev)
 458                         continue;
 459                 if (!sb->s_dirt)
 460                         continue;
 461                 /* N.B. Should lock the superblock while writing */
 462                 wait_on_super(sb);
 463                 if (!sb->s_dev || !sb->s_dirt)
 464                         continue;
 465                 if (dev && (dev != sb->s_dev))
 466                         continue;
 467                 if (sb->s_op && sb->s_op->write_super)
 468                         sb->s_op->write_super(sb);
 469         }
 470 }
 471
 472 struct super_block * get_super(kdev_t dev)
 473 {
 474         struct super_block * s;
 475
 476         if (!dev)
 477                 return NULL;
 478 restart:
 479         s = sb_entry(super_blocks.next);
 480         while (s != sb_entry(&super_blocks))
 481                 if (s->s_dev == dev) {
 482                         wait_on_super(s);
 483                         if (s->s_dev == dev)
 484                                 return s;
 485                         goto restart;
 486                 } else
 487                         s = sb_entry(s->s_list.next);
 488         return NULL;
 489 }
 490
 491 asmlinkage int sys_ustat(dev_t dev, struct ustat * ubuf)
 492 {
 493         struct super_block *s;
 494         struct ustat tmp;
 495         struct statfs sbuf;
 496         mm_segment_t old_fs;
 497         int err = -EINVAL;
 498
 499         lock_kernel();
 500         s = get_super(to_kdev_t(dev));
 501         if (s == NULL)
 502                 goto out;
 503         err = -ENOSYS;
 504         if (!(s->s_op->statfs))
 505                 goto out;
 506
 507         old_fs = get_fs();
 508         set_fs(get_ds());
 509         s->s_op->statfs(s,&sbuf,sizeof(struct statfs));
 510         set_fs(old_fs);
 511
 512         memset(&tmp,0,sizeof(struct ustat));
 513         tmp.f_tfree = sbuf.f_bfree;
 514         tmp.f_tinode = sbuf.f_ffree;
 515
 516         err = copy_to_user(ubuf,&tmp,sizeof(struct ustat)) ? -EFAULT : 0;
 517 out:
 518         unlock_kernel();
 519         return err;
 520 }
 521
 522 /*
 523  * Find a super_block with no device assigned.
 524  */
 525 static struct super_block *get_empty_super(void)
 526 {
 527         struct super_block *s;
 528
 529         for (s  = sb_entry(super_blocks.next);
 530              s != sb_entry(&super_blocks);
 531              s  = sb_entry(s->s_list.next)) {
 532                 if (s->s_dev)
 533                         continue;
 534                 if (!s->s_lock)
 535                         return s;
 536                 printk("VFS: empty superblock %p locked!\n", s);
 537         }
 538         /* Need a new one... */
 539         if (nr_super_blocks >= max_super_blocks)
 540                 return NULL;
 541         s = kmalloc(sizeof(struct super_block),  GFP_USER);
 542         if (s) {
 543                 nr_super_blocks++;
 544                 memset(s, 0, sizeof(struct super_block));
 545                 INIT_LIST_HEAD(&s->s_dirty);
 546                 list_add (&s->s_list, super_blocks.prev);
 547         }
 548         return s;
 549 }
 550
 551 static struct super_block * read_super(kdev_t dev,const char *name,int flags,
 552                                        void *data, int silent)
 553 {
 554         struct super_block * s;
 555         struct file_system_type *type;
 556
 557         if (!dev)
 558                 goto out_null;
 559         check_disk_change(dev);
 560         s = get_super(dev);
 561         if (s)
 562                 goto out;
 563
 564         type = get_fs_type(name);
 565         if (!type) {
 566                 printk("VFS: on device %s: get_fs_type(%s) failed\n",
 567                        kdevname(dev), name);
 568                 goto out;
 569         }
 570         s = get_empty_super();
 571         if (!s)
 572                 goto out;
 573         s->s_dev = dev;
 574         s->s_flags = flags;
 575         s->s_dirt = 0;
 576         /* N.B. Should lock superblock now ... */
 577         if (!type->read_super(s, data, silent))
 578                 goto out_fail;
 579         s->s_dev = dev; /* N.B. why do this again?? */
 580         s->s_rd_only = 0;
 581         s->s_type = type;
 582 out:
 583         return s;
 584
 585         /* N.B. s_dev should be cleared in type->read_super */
 586 out_fail:
 587         s->s_dev = 0;
 588 out_null:
 589         s = NULL;
 590         goto out;
 591 }
 592
 593 /*
 594  * Unnamed block devices are dummy devices used by virtual
 595  * filesystems which don't use real block-devices.  -- jrs
 596  */
 597
 598 static unsigned int unnamed_dev_in_use[256/(8*sizeof(unsigned int))] = { 0, };
 599
 600 kdev_t get_unnamed_dev(void)
 601 {
 602         int i;
 603
 604         for (i = 1; i < 256; i++) {
 605                 if (!test_and_set_bit(i,unnamed_dev_in_use))
 606                         return MKDEV(UNNAMED_MAJOR, i);
 607         }
 608         return 0;
 609 }
 610
 611 void put_unnamed_dev(kdev_t dev)
 612 {
 613         if (!dev || MAJOR(dev) != UNNAMED_MAJOR)
 614                 return;
 615         if (test_and_clear_bit(MINOR(dev), unnamed_dev_in_use))
 616                 return;
 617         printk("VFS: put_unnamed_dev: freeing unused device %s\n",
 618                         kdevname(dev));
 619 }
 620
 621 static int d_umount(struct super_block * sb)
 622 {
 623         struct dentry * root = sb->s_root;
 624         struct dentry * covered = root->d_covers;
 625
 626         if (root->d_count != 1)
 627                 return -EBUSY;
 628
 629         if (root->d_inode->i_state)
 630                 return -EBUSY;
 631
 632         sb->s_root = NULL;
 633
 634         if (covered != root) {
 635                 root->d_covers = root;
 636                 covered->d_mounts = covered;
 637                 dput(covered);
 638         }
 639         dput(root);
 640         return 0;
 641 }
 642
 643 static void d_mount(struct dentry *covered, struct dentry *dentry)
 644 {
 645         if (covered->d_mounts != covered) {
 646                 printk("VFS: mount - already mounted\n");
 647                 return;
 648         }
 649         covered->d_mounts = dentry;
 650         dentry->d_covers = covered;
 651 }
 652
 653 static int do_umount(kdev_t dev, int unmount_root, int flags)
 654 {
 655         struct super_block * sb;
 656         int retval;
 657
 658         retval = -ENOENT;
 659         sb = get_super(dev);
 660         if (!sb || !sb->s_root)
 661                 goto out;
 662
 663         /*
 664          * Before checking whether the filesystem is still busy,
 665          * make sure the kernel doesn't hold any quota files open
 666          * on the device. If the umount fails, too bad -- there
 667          * are no quotas running any more. Just turn them on again.
 668          */
 669         DQUOT_OFF(dev);
 670
 671 #ifdef CONFIG_BSD_PROCESS_ACCT
 672         (void) acct_auto_close(dev);
 673 #endif
 674
 675         /*
 676          * If we may have to abort operations to get out of this
 677          * mount, and they will themselves hold resources we must
 678          * allow the fs to do things. In the Unix tradition of
 679          * 'Gee thats tricky lets do it in userspace' the umount_begin
 680          * might fail to complete on the first run through as other tasks
 681          * must return, and the like. Thats for the mount program to worry
 682          * about for the moment.
 683          */
 684
 685         if( (flags&MNT_FORCE) && sb->s_op->umount_begin)
 686                 sb->s_op->umount_begin(sb);
 687
 688         /*
 689          * Shrink dcache, then fsync. This guarantees that if the
 690          * filesystem is quiescent at this point, then (a) only the
 691          * root entry should be in use and (b) that root entry is
 692          * clean.
 693          */
 694         shrink_dcache_sb(sb);
 695         fsync_dev(dev);
 696
 697         if (dev==ROOT_DEV && !unmount_root) {
 698                 /*
 699                  * Special case for "unmounting" root ...
 700                  * we just try to remount it readonly.
 701                  */
 702                 retval = 0;
 703                 if (!(sb->s_flags & MS_RDONLY))
 704                         retval = do_remount_sb(sb, MS_RDONLY, 0);
 705                 return retval;
 706         }
 707
 708         retval = d_umount(sb);
 709         if (retval)
 710                 goto out;
 711
 712         if (sb->s_op) {
 713                 if (sb->s_op->write_super && sb->s_dirt)
 714                         sb->s_op->write_super(sb);
 715         }
 716
 717         lock_super(sb);
 718         if (sb->s_op) {
 719                 if (sb->s_op->put_super)
 720                         sb->s_op->put_super(sb);
 721         }
 722
 723         /* Forget any remaining inodes */
 724         if (invalidate_inodes(sb)) {
 725                 printk("VFS: Busy inodes after unmount. "
 726                         "Self-destruct in 5 seconds.  Have a nice day...\n");
 727         }
 728
 729         sb->s_dev = 0;          /* Free the superblock */
 730         unlock_super(sb);
 731
 732         remove_vfsmnt(dev);
 733 out:
 734         return retval;
 735 }
 736
 737 static int umount_dev(kdev_t dev, int flags)
 738 {
 739         int retval;
 740         struct inode * inode = get_empty_inode();
 741
 742         retval = -ENOMEM;
 743         if (!inode)
 744                 goto out;
 745
 746         inode->i_rdev = dev;
 747         retval = -ENXIO;
 748         if (MAJOR(dev) >= MAX_BLKDEV)
 749                 goto out_iput;
 750
 751         fsync_dev(dev);
 752
 753         down(&mount_sem);
 754
 755         retval = do_umount(dev, 0, flags);
 756         if (!retval) {
 757                 fsync_dev(dev);
 758                 if (dev != ROOT_DEV) {
 759                         blkdev_release(inode);
 760                         put_unnamed_dev(dev);
 761                 }
 762         }
 763
 764         up(&mount_sem);
 765 out_iput:
 766         iput(inode);
 767 out:
 768         return retval;
 769 }
 770
 771 /*
 772  * Now umount can handle mount points as well as block devices.
 773  * This is important for filesystems which use unnamed block devices.
 774  *
 775  * There is a little kludge here with the dummy_inode.  The current
 776  * vfs release functions only use the r_dev field in the inode so
 777  * we give them the info they need without using a real inode.
 778  * If any other fields are ever needed by any block device release
 779  * functions, they should be faked here.  -- jrs
 780  *
 781  * We now support a flag for forced unmount like the other 'big iron'
 782  * unixes. Our API is identical to OSF/1 to avoid making a mess of AMD
 783  */
 784
 785 asmlinkage int sys_umount(char * name, int flags)
 786 {
 787         struct dentry * dentry;
 788         int retval;
 789
 790         if (!capable(CAP_SYS_ADMIN))
 791                 return -EPERM;
 792
 793         lock_kernel();
 794         dentry = namei(name);
 795         retval = PTR_ERR(dentry);
 796         if (!IS_ERR(dentry)) {
 797                 struct inode * inode = dentry->d_inode;
 798                 kdev_t dev = inode->i_rdev;
 799
 800                 retval = 0;
 801                 if (S_ISBLK(inode->i_mode)) {
 802                         if (IS_NODEV(inode))
 803                                 retval = -EACCES;
 804                 } else {
 805                         struct super_block *sb = inode->i_sb;
 806                         retval = -EINVAL;
 807                         if (sb && inode == sb->s_root->d_inode) {
 808                                 dev = sb->s_dev;
 809                                 retval = 0;
 810                         }
 811                 }
 812                 dput(dentry);
 813
 814                 if (!retval)
 815                         retval = umount_dev(dev, flags);
 816         }
 817         unlock_kernel();
 818         return retval;
 819 }
 820
 821 /*
 822  *      The 2.0 compatible umount. No flags.
 823  */
 824
 825 asmlinkage int sys_oldumount(char * name)
 826 {
 827         return sys_umount(name,0);
 828 }
 829
 830 /*
 831  * Check whether we can mount the specified device.
 832  */
 833 int fs_may_mount(kdev_t dev)
 834 {
 835         struct super_block * sb = get_super(dev);
 836         int busy;
 837
 838         busy = sb && sb->s_root &&
 839                (sb->s_root->d_count != 1 || sb->s_root->d_covers != sb->s_root);
 840         return !busy;
 841 }
 842
 843 /*
 844  * do_mount() does the actual mounting after sys_mount has done the ugly
 845  * parameter parsing. When enough time has gone by, and everything uses the
 846  * new mount() parameters, sys_mount() can then be cleaned up.
 847  *
 848  * We cannot mount a filesystem if it has active, used, or dirty inodes.
 849  * We also have to flush all inode-data for this device, as the new mount
 850  * might need new info.
 851  *
 852  * [21-Mar-97] T.Schoebel-Theuer: Now this can be overridden when
 853  * supplying a leading "!" before the dir_name, allowing "stacks" of
 854  * mounted filesystems. The stacking will only influence any pathname lookups
 855  * _after_ the mount, but open file descriptors or working directories that
 856  * are now covered remain valid. For example, when you overmount /home, any
 857  * process with old cwd /home/joe will continue to use the old versions,
 858  * as long as relative paths are used, but absolute paths like /home/joe/xxx
 859  * will go to the new "top of stack" version. In general, crossing a
 860  * mount point will always go to the top of stack element.
 861  * Anyone using this new feature must know what he/she is doing.
 862  */
 863
 864 int do_mount(kdev_t dev, const char * dev_name, const char * dir_name, const char * type, int flags, void * data)
 865 {
 866         struct dentry * dir_d;
 867         struct super_block * sb;
 868         struct vfsmount *vfsmnt;
 869         int error;
 870
 871         down(&mount_sem);
 872         error = -EACCES;
 873         if (!(flags & MS_RDONLY) && dev && is_read_only(dev))
 874                 goto out;
 875                 /*flags |= MS_RDONLY;*/
 876
 877         dir_d = namei(dir_name);
 878         error = PTR_ERR(dir_d);
 879         if (IS_ERR(dir_d))
 880                 goto out;
 881
 882         error = -ENOTDIR;
 883         if (!S_ISDIR(dir_d->d_inode->i_mode))
 884                 goto dput_and_out;
 885
 886         error = -EBUSY;
 887         if (dir_d->d_covers != dir_d)
 888                 goto dput_and_out;
 889
 890         /*
 891          * Note: If the superblock already exists,
 892          * read_super just does a get_super().
 893          */
 894         error = -EINVAL;
 895         sb = read_super(dev, type, flags, data, 0);
 896         if (!sb)
 897                 goto dput_and_out;
 898
 899         /*
 900          * We may have slept while reading the super block,
 901          * so we check afterwards whether it's safe to mount.
 902          */
 903         error = -EBUSY;
 904         if (!fs_may_mount(dev))
 905                 goto dput_and_out;
 906
 907         error = -ENOMEM;
 908         vfsmnt = add_vfsmnt(sb, dev_name, dir_name);
 909         if (!vfsmnt)
 910                 goto dput_and_out;
 911         d_mount(dir_d, sb->s_root);
 912         error = 0;      /* we don't dput(dir_d) - see umount */
 913
 914 out:
 915         up(&mount_sem);
 916         return error;
 917
 918 dput_and_out:
 919         dput(dir_d);
 920         goto out;
 921 }
 922
 923
 924 /*
 925  * Alters the mount flags of a mounted file system. Only the mount point
 926  * is used as a reference - file system type and the device are ignored.
 927  * FS-specific mount options can't be altered by remounting.
 928  */
 929
 930 static int do_remount_sb(struct super_block *sb, int flags, char *data)
 931 {
 932         int retval;
 933         struct vfsmount *vfsmnt;
 934
 935         /*
 936          * Invalidate the inodes, as some mount options may be changed.
 937          * N.B. If we are changing media, we should check the return
 938          * from invalidate_inodes ... can't allow _any_ open files.
 939          */
 940         invalidate_inodes(sb);
 941
 942         if (!(flags & MS_RDONLY) && sb->s_dev && is_read_only(sb->s_dev))
 943                 return -EACCES;
 944                 /*flags |= MS_RDONLY;*/
 945         /* If we are remounting RDONLY, make sure there are no rw files open */
 946         if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY))
 947                 if (!fs_may_remount_ro(sb))
 948                         return -EBUSY;
 949         if (sb->s_op && sb->s_op->remount_fs) {
 950                 retval = sb->s_op->remount_fs(sb, &flags, data);
 951                 if (retval)
 952                         return retval;
 953         }
 954         sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
 955         vfsmnt = lookup_vfsmnt(sb->s_dev);
 956         if (vfsmnt)
 957                 vfsmnt->mnt_flags = sb->s_flags;
 958         return 0;
 959 }
 960
 961 static int do_remount(const char *dir,int flags,char *data)
 962 {
 963         struct dentry *dentry;
 964         int retval;
 965
 966         dentry = namei(dir);
 967         retval = PTR_ERR(dentry);
 968         if (!IS_ERR(dentry)) {
 969                 struct super_block * sb = dentry->d_inode->i_sb;
 970
 971                 retval = -EINVAL;
 972                 if (dentry == sb->s_root) {
 973                         /*
 974                          * Shrink the dcache and sync the device.
 975                          */
 976                         shrink_dcache_sb(sb);
 977                         fsync_dev(sb->s_dev);
 978                         retval = do_remount_sb(sb, flags, data);
 979                 }
 980                 dput(dentry);
 981         }
 982         return retval;
 983 }
 984
 985 static int copy_mount_options (const void * data, unsigned long *where)
 986 {
 987         int i;
 988         unsigned long page;
 989         struct vm_area_struct * vma;
 990
 991         *where = 0;
 992         if (!data)
 993                 return 0;
 994
 995         vma = find_vma(current->mm, (unsigned long) data);
 996         if (!vma || (unsigned long) data < vma->vm_start)
 997                 return -EFAULT;
 998         if (!(vma->vm_flags & VM_READ))
 999                 return -EFAULT;
1000         i = vma->vm_end - (unsigned long) data;
1001         if (PAGE_SIZE <= (unsigned long) i)
1002                 i = PAGE_SIZE-1;
1003         if (!(page = __get_free_page(GFP_KERNEL))) {
1004                 return -ENOMEM;
1005         }
1006         if (copy_from_user((void *) page,data,i)) {
1007                 free_page(page);
1008                 return -EFAULT;
1009         }
1010         *where = page;
1011         return 0;
1012 }
1013
1014 /*
1015  * Flags is a 16-bit value that allows up to 16 non-fs dependent flags to
1016  * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
1017  *
1018  * data is a (void *) that can point to any structure up to
1019  * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
1020  * information (or be NULL).
1021  *
1022  * NOTE! As old versions of mount() didn't use this setup, the flags
1023  * have to have a special 16-bit magic number in the high word:
1024  * 0xC0ED. If this magic word isn't present, the flags and data info
1025  * aren't used, as the syscall assumes we are talking to an older
1026  * version that didn't understand them.
1027  */
1028 asmlinkage int sys_mount(char * dev_name, char * dir_name, char * type,
1029         unsigned long new_flags, void * data)
1030 {
1031         struct file_system_type * fstype;
1032         struct dentry * dentry = NULL;
1033         struct inode * inode = NULL;
1034         kdev_t dev;
1035         int retval = -EPERM;
1036         unsigned long flags = 0;
1037         unsigned long page = 0;
1038         struct file dummy;      /* allows read-write or read-only flag */
1039
1040         lock_kernel();
1041         if (!capable(CAP_SYS_ADMIN))
1042                 goto out;
1043         if ((new_flags &
1044              (MS_MGC_MSK | MS_REMOUNT)) == (MS_MGC_VAL | MS_REMOUNT)) {
1045                 retval = copy_mount_options (data, &page);
1046                 if (retval < 0)
1047                         goto out;
1048                 retval = do_remount(dir_name,
1049                                     new_flags & ~MS_MGC_MSK & ~MS_REMOUNT,
1050                                     (char *) page);
1051                 free_page(page);
1052                 goto out;
1053         }
1054
1055         retval = copy_mount_options (type, &page);
1056         if (retval < 0)
1057                 goto out;
1058         fstype = get_fs_type((char *) page);
1059         free_page(page);
1060         retval = -ENODEV;
1061         if (!fstype)
1062                 goto out;
1063
1064         memset(&dummy, 0, sizeof(dummy));
1065         if (fstype->fs_flags & FS_REQUIRES_DEV) {
1066                 dentry = namei(dev_name);
1067                 retval = PTR_ERR(dentry);
1068                 if (IS_ERR(dentry))
1069                         goto out;
1070
1071                 inode = dentry->d_inode;
1072                 retval = -ENOTBLK;
1073                 if (!S_ISBLK(inode->i_mode))
1074                         goto dput_and_out;
1075
1076                 retval = -EACCES;
1077                 if (IS_NODEV(inode))
1078                         goto dput_and_out;
1079
1080                 dev = inode->i_rdev;
1081                 retval = -ENXIO;
1082                 if (MAJOR(dev) >= MAX_BLKDEV)
1083                         goto dput_and_out;
1084
1085                 retval = -ENOTBLK;
1086                 dummy.f_op = get_blkfops(MAJOR(dev));
1087                 if (!dummy.f_op)
1088                         goto dput_and_out;
1089
1090                 if (dummy.f_op->open) {
1091                         dummy.f_dentry = dentry;
1092                         dummy.f_mode = (new_flags & MS_RDONLY) ? 1 : 3;
1093                         retval = dummy.f_op->open(inode, &dummy);
1094                         if (retval)
1095                                 goto dput_and_out;
1096                 }
1097
1098         } else {
1099                 retval = -EMFILE;
1100                 if (!(dev = get_unnamed_dev()))
1101                         goto out;
1102         }
1103
1104         page = 0;
1105         if ((new_flags & MS_MGC_MSK) == MS_MGC_VAL) {
1106                 flags = new_flags & ~MS_MGC_MSK;
1107                 retval = copy_mount_options(data, &page);
1108                 if (retval < 0)
1109                         goto clean_up;
1110         }
1111         retval = do_mount(dev, dev_name, dir_name, fstype->name, flags,
1112                                 (void *) page);
1113         free_page(page);
1114         if (retval)
1115                 goto clean_up;
1116
1117 dput_and_out:
1118         dput(dentry);
1119 out:
1120         unlock_kernel();
1121         return retval;
1122
1123 clean_up:
1124         if (dummy.f_op) {
1125                 if (dummy.f_op->release)
1126                         dummy.f_op->release(inode, NULL);
1127         } else
1128                 put_unnamed_dev(dev);
1129         goto dput_and_out;
1130 }
1131
1132 void __init mount_root(void)
1133 {
1134         struct file_system_type * fs_type;
1135         struct super_block * sb;
1136         struct vfsmount *vfsmnt;
1137         struct inode * d_inode = NULL;
1138         struct file filp;
1139         int retval;
1140
1141 #ifdef CONFIG_ROOT_NFS
1142         if (MAJOR(ROOT_DEV) == UNNAMED_MAJOR) {
1143                 ROOT_DEV = 0;
1144                 if ((fs_type = get_fs_type("nfs"))) {
1145                         sb = get_empty_super(); /* "can't fail" */
1146                         sb->s_dev = get_unnamed_dev();
1147                         sb->s_flags = root_mountflags & ~MS_RDONLY;
1148                         vfsmnt = add_vfsmnt(sb, "/dev/root", "/");
1149                         if (vfsmnt) {
1150                                 if (nfs_root_mount(sb) >= 0) {
1151                                         sb->s_rd_only = 0;
1152                                         sb->s_dirt = 0;
1153                                         sb->s_type = fs_type;
1154                                         current->fs->root = dget(sb->s_root);
1155                                         current->fs->pwd = dget(sb->s_root);
1156                                         ROOT_DEV = sb->s_dev;
1157                                         printk (KERN_NOTICE "VFS: Mounted root (nfs filesystem).\n");
1158                                         return;
1159                                 }
1160                                 remove_vfsmnt(sb->s_dev);
1161                         }
1162                         put_unnamed_dev(sb->s_dev);
1163                         sb->s_dev = 0;
1164                 }
1165                 if (!ROOT_DEV) {
1166                         printk(KERN_ERR "VFS: Unable to mount root fs via NFS, trying floppy.\n");
1167                         ROOT_DEV = MKDEV(FLOPPY_MAJOR, 0);
1168                 }
1169         }
1170 #endif
1171
1172 #ifdef CONFIG_BLK_DEV_FD
1173         if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) {
1174                 floppy_eject();
1175 #ifndef CONFIG_BLK_DEV_RAM
1176                 printk(KERN_NOTICE "(Warning, this kernel has no ramdisk support)\n");
1177 #endif
1178                 printk(KERN_NOTICE "VFS: Insert root floppy and press ENTER\n");
1179                 wait_for_keypress();
1180         }
1181 #endif
1182
1183         memset(&filp, 0, sizeof(filp));
1184         d_inode = get_empty_inode();
1185         d_inode->i_rdev = ROOT_DEV;
1186         filp.f_dentry = NULL;
1187         if ( root_mountflags & MS_RDONLY)
1188                 filp.f_mode = 1; /* read only */
1189         else
1190                 filp.f_mode = 3; /* read write */
1191         retval = blkdev_open(d_inode, &filp);
1192         if (retval == -EROFS) {
1193                 root_mountflags |= MS_RDONLY;
1194                 filp.f_mode = 1;
1195                 retval = blkdev_open(d_inode, &filp);
1196         }
1197         iput(d_inode);
1198         if (retval)
1199                 /*
1200                  * Allow the user to distinguish between failed open
1201                  * and bad superblock on root device.
1202                  */
1203                 printk("VFS: Cannot open root device %s\n",
1204                        kdevname(ROOT_DEV));
1205         else for (fs_type = file_systems ; fs_type ; fs_type = fs_type->next) {
1206                 if (!(fs_type->fs_flags & FS_REQUIRES_DEV))
1207                         continue;
1208                 sb = read_super(ROOT_DEV,fs_type->name,root_mountflags,NULL,1);
1209                 if (sb) {
1210                         sb->s_flags = root_mountflags;
1211                         current->fs->root = dget(sb->s_root);
1212                         current->fs->pwd = dget(sb->s_root);
1213                         printk ("VFS: Mounted root (%s filesystem)%s.\n",
1214                                 fs_type->name,
1215                                 (sb->s_flags & MS_RDONLY) ? " readonly" : "");
1216                         vfsmnt = add_vfsmnt(sb, "/dev/root", "/");
1217                         if (vfsmnt)
1218                                 return;
1219                         panic("VFS: add_vfsmnt failed for root fs");
1220                 }
1221         }
1222         panic("VFS: Unable to mount root fs on %s",
1223                 kdevname(ROOT_DEV));
1224 }
1225
1226
1227 #ifdef CONFIG_BLK_DEV_INITRD
1228
1229 extern int initmem_freed;
1230
1231 static int __init do_change_root(kdev_t new_root_dev,const char *put_old)
1232 {
1233         kdev_t old_root_dev;
1234         struct vfsmount *vfsmnt;
1235         struct dentry *old_root,*old_pwd,*dir_d = NULL;
1236         int error;
1237
1238         old_root = current->fs->root;
1239         old_pwd = current->fs->pwd;
1240         old_root_dev = ROOT_DEV;
1241         if (!fs_may_mount(new_root_dev)) {
1242                 printk(KERN_CRIT "New root is busy. Staying in initrd.\n");
1243                 return -EBUSY;
1244         }
1245         ROOT_DEV = new_root_dev;
1246         mount_root();
1247         dput(old_root);
1248         dput(old_pwd);
1249 #if 1
1250         shrink_dcache();
1251         printk("do_change_root: old root has d_count=%d\n", old_root->d_count);
1252 #endif
1253         /*
1254          * Get the new mount directory
1255          */
1256         dir_d = lookup_dentry(put_old, NULL, 1);
1257         if (IS_ERR(dir_d)) {
1258                 error = PTR_ERR(dir_d);
1259         } else if (!dir_d->d_inode) {
1260                 dput(dir_d);
1261                 error = -ENOENT;
1262         } else {
1263                 error = 0;
1264         }
1265         if (!error && dir_d->d_covers != dir_d) {
1266                 dput(dir_d);
1267                 error = -EBUSY;
1268         }
1269         if (!error && !S_ISDIR(dir_d->d_inode->i_mode)) {
1270                 dput(dir_d);
1271                 error = -ENOTDIR;
1272         }
1273         if (error) {
1274                 int umount_error;
1275
1276                 printk(KERN_NOTICE "Trying to unmount old root ... ");
1277                 umount_error = do_umount(old_root_dev,1, 0);
1278                 if (!umount_error) {
1279                         printk("okay\n");
1280                         invalidate_buffers(old_root_dev);
1281                         return 0;
1282                 }
1283                 printk(KERN_ERR "error %d\n",umount_error);
1284                 return error;
1285         }
1286         remove_vfsmnt(old_root_dev);
1287         vfsmnt = add_vfsmnt(old_root->d_sb, "/dev/root.old", put_old);
1288         if (vfsmnt) {
1289                 d_mount(dir_d,old_root);
1290                 return 0;
1291         }
1292         printk(KERN_CRIT "Trouble: add_vfsmnt failed\n");
1293         return -ENOMEM;
1294 }
1295
1296 int change_root(kdev_t new_root_dev,const char *put_old)
1297 {
1298         if (initmem_freed) {
1299                 printk (KERN_CRIT "Initmem has been already freed. Staying in initrd\n");
1300                 return -EBUSY;
1301         }
1302         return do_change_root(new_root_dev, put_old);
1303 }
1304
1305 #endif