fs/super.c

   1 /*
   2  *  linux/fs/super.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  *
   6  *  super.c contains code to handle: - mount structures
   7  *                                   - super-block tables.
   8  *                                   - mount system call
   9  *                                   - umount system call
  10  *
  11  *  Added options to /proc/mounts
  12  *  Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996.
  13  *
  14  * GK 2/5/95  -  Changed to support mounting the root fs via NFS
  15  *
  16  *  Added kerneld support: Jacques Gelinas and Bjorn Ekwall
  17  *  Added change_root: Werner Almesberger & Hans Lermen, Feb '96
  18  */
  19
  20 #include <linux/config.h>
  21 #include <linux/malloc.h>
  22 #include <linux/locks.h>
  23 #include <linux/smp_lock.h>
  24 #include <linux/fd.h>
  25 #include <linux/init.h>
  26 #include <linux/quotaops.h>
  27 #include <linux/acct.h>
  28
  29 #include <asm/uaccess.h>
  30
  31 #include <linux/nfs_fs.h>
  32 #include <linux/nfs_fs_sb.h>
  33 #include <linux/nfs_mount.h>
  34
  35 #ifdef CONFIG_KMOD
  36 #include <linux/kmod.h>
  37 #endif
  38
  39 /*
  40  * We use a semaphore to synchronize all mount/umount
  41  * activity - imagine the mess if we have a race between
  42  * unmounting a filesystem and re-mounting it (or something
  43  * else).
  44  */
  45 static DECLARE_MUTEX(mount_sem);
  46
  47 extern void wait_for_keypress(void);
  48 extern struct file_operations * get_blkfops(unsigned int major);
  49
  50 extern int root_mountflags;
  51
  52 static int do_remount_sb(struct super_block *sb, int flags, char * data);
  53
  54 /* this is initialized in init/main.c */
  55 kdev_t ROOT_DEV;
  56
  57 int nr_super_blocks = 0;
  58 int max_super_blocks = NR_SUPER;
  59 LIST_HEAD(super_blocks);
  60
  61 static struct file_system_type *file_systems = (struct file_system_type *) NULL;
  62 struct vfsmount *vfsmntlist = (struct vfsmount *) NULL;
  63 static struct vfsmount *vfsmnttail = (struct vfsmount *) NULL,
  64                        *mru_vfsmnt = (struct vfsmount *) NULL;
  65
  66 /*
  67  * This part handles the management of the list of mounted filesystems.
  68  */
  69 struct vfsmount *lookup_vfsmnt(kdev_t dev)
  70 {
  71         struct vfsmount *lptr;
  72
  73         if (vfsmntlist == (struct vfsmount *)NULL)
  74                 return ((struct vfsmount *)NULL);
  75
  76         if (mru_vfsmnt != (struct vfsmount *)NULL &&
  77             mru_vfsmnt->mnt_dev == dev)
  78                 return (mru_vfsmnt);
  79
  80         for (lptr = vfsmntlist;
  81              lptr != (struct vfsmount *)NULL;
  82              lptr = lptr->mnt_next)
  83                 if (lptr->mnt_dev == dev) {
  84                         mru_vfsmnt = lptr;
  85                         return (lptr);
  86                 }
  87
  88         return ((struct vfsmount *)NULL);
  89         /* NOTREACHED */
  90 }
  91
  92 static struct vfsmount *add_vfsmnt(struct super_block *sb,
  93                         const char *dev_name, const char *dir_name)
  94 {
  95         struct vfsmount *lptr;
  96         char *tmp, *name;
  97
  98         lptr = (struct vfsmount *)kmalloc(sizeof(struct vfsmount), GFP_KERNEL);
  99         if (!lptr)
 100                 goto out;
 101         memset(lptr, 0, sizeof(struct vfsmount));
 102
 103         lptr->mnt_sb = sb;
 104         lptr->mnt_dev = sb->s_dev;
 105         lptr->mnt_flags = sb->s_flags;
 106
 107         sema_init(&lptr->mnt_dquot.semaphore, 1);
 108         lptr->mnt_dquot.flags = 0;
 109
 110         /* N.B. Is it really OK to have a vfsmount without names? */
 111         if (dev_name && !IS_ERR(tmp = getname(dev_name))) {
 112                 name = (char *) kmalloc(strlen(tmp)+1, GFP_KERNEL);
 113                 if (name) {
 114                         strcpy(name, tmp);
 115                         lptr->mnt_devname = name;
 116                 }
 117                 putname(tmp);
 118         }
 119         if (dir_name && !IS_ERR(tmp = getname(dir_name))) {
 120                 name = (char *) kmalloc(strlen(tmp)+1, GFP_KERNEL);
 121                 if (name) {
 122                         strcpy(name, tmp);
 123                         lptr->mnt_dirname = name;
 124                 }
 125                 putname(tmp);
 126         }
 127
 128         if (vfsmntlist == (struct vfsmount *)NULL) {
 129                 vfsmntlist = vfsmnttail = lptr;
 130         } else {
 131                 vfsmnttail->mnt_next = lptr;
 132                 vfsmnttail = lptr;
 133         }
 134 out:
 135         return lptr;
 136 }
 137
 138 static void remove_vfsmnt(kdev_t dev)
 139 {
 140         struct vfsmount *lptr, *tofree;
 141
 142         if (vfsmntlist == (struct vfsmount *)NULL)
 143                 return;
 144         lptr = vfsmntlist;
 145         if (lptr->mnt_dev == dev) {
 146                 tofree = lptr;
 147                 vfsmntlist = lptr->mnt_next;
 148                 if (vfsmnttail->mnt_dev == dev)
 149                         vfsmnttail = vfsmntlist;
 150         } else {
 151                 while (lptr->mnt_next != (struct vfsmount *)NULL) {
 152                         if (lptr->mnt_next->mnt_dev == dev)
 153                                 break;
 154                         lptr = lptr->mnt_next;
 155                 }
 156                 tofree = lptr->mnt_next;
 157                 if (tofree == (struct vfsmount *)NULL)
 158                         return;
 159                 lptr->mnt_next = lptr->mnt_next->mnt_next;
 160                 if (vfsmnttail->mnt_dev == dev)
 161                         vfsmnttail = lptr;
 162         }
 163         if (tofree == mru_vfsmnt)
 164                 mru_vfsmnt = NULL;
 165         kfree(tofree->mnt_devname);
 166         kfree(tofree->mnt_dirname);
 167         kfree_s(tofree, sizeof(struct vfsmount));
 168 }
 169
 170 int register_filesystem(struct file_system_type * fs)
 171 {
 172         struct file_system_type ** tmp;
 173
 174         if (!fs)
 175                 return -EINVAL;
 176         if (fs->next)
 177                 return -EBUSY;
 178         tmp = &file_systems;
 179         while (*tmp) {
 180                 if (strcmp((*tmp)->name, fs->name) == 0)
 181                         return -EBUSY;
 182                 tmp = &(*tmp)->next;
 183         }
 184         *tmp = fs;
 185         return 0;
 186 }
 187
 188 #ifdef CONFIG_MODULES
 189 int unregister_filesystem(struct file_system_type * fs)
 190 {
 191         struct file_system_type ** tmp;
 192
 193         tmp = &file_systems;
 194         while (*tmp) {
 195                 if (fs == *tmp) {
 196                         *tmp = fs->next;
 197                         fs->next = NULL;
 198                         return 0;
 199                 }
 200                 tmp = &(*tmp)->next;
 201         }
 202         return -EINVAL;
 203 }
 204 #endif
 205
 206 static int fs_index(const char * __name)
 207 {
 208         struct file_system_type * tmp;
 209         char * name;
 210         int err, index;
 211
 212         name = getname(__name);
 213         err = PTR_ERR(name);
 214         if (IS_ERR(name))
 215                 return err;
 216
 217         index = 0;
 218         for (tmp = file_systems ; tmp ; tmp = tmp->next) {
 219                 if (strcmp(tmp->name, name) == 0) {
 220                         putname(name);
 221                         return index;
 222                 }
 223                 index++;
 224         }
 225         putname(name);
 226         return -EINVAL;
 227 }
 228
 229 static int fs_name(unsigned int index, char * buf)
 230 {
 231         struct file_system_type * tmp;
 232         int len;
 233
 234         tmp = file_systems;
 235         while (tmp && index > 0) {
 236                 tmp = tmp->next;
 237                 index--;
 238         }
 239         if (!tmp)
 240                 return -EINVAL;
 241         len = strlen(tmp->name) + 1;
 242         return copy_to_user(buf, tmp->name, len) ? -EFAULT : 0;
 243 }
 244
 245 static int fs_maxindex(void)
 246 {
 247         struct file_system_type * tmp;
 248         int index;
 249
 250         index = 0;
 251         for (tmp = file_systems ; tmp ; tmp = tmp->next)
 252                 index++;
 253         return index;
 254 }
 255
 256 /*
 257  * Whee.. Weird sysv syscall.
 258  */
 259 asmlinkage int sys_sysfs(int option, unsigned long arg1, unsigned long arg2)
 260 {
 261         int retval = -EINVAL;
 262
 263         lock_kernel();
 264         switch (option) {
 265                 case 1:
 266                         retval = fs_index((const char *) arg1);
 267                         break;
 268
 269                 case 2:
 270                         retval = fs_name(arg1, (char *) arg2);
 271                         break;
 272
 273                 case 3:
 274                         retval = fs_maxindex();
 275                         break;
 276         }
 277         unlock_kernel();
 278         return retval;
 279 }
 280
 281 static struct proc_fs_info {
 282         int flag;
 283         char *str;
 284 } fs_info[] = {
 285         { MS_NOEXEC, ",noexec" },
 286         { MS_NOSUID, ",nosuid" },
 287         { MS_NODEV, ",nodev" },
 288         { MS_SYNCHRONOUS, ",sync" },
 289         { MS_MANDLOCK, ",mand" },
 290         { MS_NOATIME, ",noatime" },
 291         { MS_NODIRATIME, ",nodiratime" },
 292 #ifdef MS_NOSUB                 /* Can't find this except in mount.c */
 293         { MS_NOSUB, ",nosub" },
 294 #endif
 295         { 0, NULL }
 296 };
 297
 298 static struct proc_nfs_info {
 299         int flag;
 300         char *str;
 301 } nfs_info[] = {
 302         { NFS_MOUNT_SOFT, ",soft" },
 303         { NFS_MOUNT_INTR, ",intr" },
 304         { NFS_MOUNT_POSIX, ",posix" },
 305         { NFS_MOUNT_NOCTO, ",nocto" },
 306         { NFS_MOUNT_NOAC, ",noac" },
 307         { 0, NULL }
 308 };
 309
 310 int get_filesystem_info( char *buf )
 311 {
 312         struct vfsmount *tmp = vfsmntlist;
 313         struct proc_fs_info *fs_infop;
 314         struct proc_nfs_info *nfs_infop;
 315         struct nfs_server *nfss;
 316         int len = 0;
 317
 318         while ( tmp && len < PAGE_SIZE - 160)
 319         {
 320                 len += sprintf( buf + len, "%s %s %s %s",
 321                         tmp->mnt_devname, tmp->mnt_dirname, tmp->mnt_sb->s_type->name,
 322                         tmp->mnt_flags & MS_RDONLY ? "ro" : "rw" );
 323                 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
 324                   if (tmp->mnt_flags & fs_infop->flag) {
 325                     strcpy(buf + len, fs_infop->str);
 326                     len += strlen(fs_infop->str);
 327                   }
 328                 }
 329                 if (!strcmp("nfs", tmp->mnt_sb->s_type->name)) {
 330                         nfss = &tmp->mnt_sb->u.nfs_sb.s_server;
 331                         if (nfss->rsize != NFS_DEF_FILE_IO_BUFFER_SIZE) {
 332                                 len += sprintf(buf+len, ",rsize=%d",
 333                                                nfss->rsize);
 334                         }
 335                         if (nfss->wsize != NFS_DEF_FILE_IO_BUFFER_SIZE) {
 336                                 len += sprintf(buf+len, ",wsize=%d",
 337                                                nfss->wsize);
 338                         }
 339 #if 0
 340                         if (nfss->timeo != 7*HZ/10) {
 341                                 len += sprintf(buf+len, ",timeo=%d",
 342                                                nfss->timeo*10/HZ);
 343                         }
 344                         if (nfss->retrans != 3) {
 345                                 len += sprintf(buf+len, ",retrans=%d",
 346                                                nfss->retrans);
 347                         }
 348 #endif
 349                         if (nfss->acregmin != 3*HZ) {
 350                                 len += sprintf(buf+len, ",acregmin=%d",
 351                                                nfss->acregmin/HZ);
 352                         }
 353                         if (nfss->acregmax != 60*HZ) {
 354                                 len += sprintf(buf+len, ",acregmax=%d",
 355                                                nfss->acregmax/HZ);
 356                         }
 357                         if (nfss->acdirmin != 30*HZ) {
 358                                 len += sprintf(buf+len, ",acdirmin=%d",
 359                                                nfss->acdirmin/HZ);
 360                         }
 361                         if (nfss->acdirmax != 60*HZ) {
 362                                 len += sprintf(buf+len, ",acdirmax=%d",
 363                                                nfss->acdirmax/HZ);
 364                         }
 365                         for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
 366                                 if (nfss->flags & nfs_infop->flag) {
 367                                         strcpy(buf + len, nfs_infop->str);
 368                                         len += strlen(nfs_infop->str);
 369                                 }
 370                         }
 371                         len += sprintf(buf+len, ",addr=%s",
 372                                        nfss->hostname);
 373                 }
 374                 len += sprintf( buf + len, " 0 0\n" );
 375                 tmp = tmp->mnt_next;
 376         }
 377
 378         return len;
 379 }
 380
 381 int get_filesystem_list(char * buf)
 382 {
 383         int len = 0;
 384         struct file_system_type * tmp;
 385
 386         tmp = file_systems;
 387         while (tmp && len < PAGE_SIZE - 80) {
 388                 len += sprintf(buf+len, "%s\t%s\n",
 389                         (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
 390                         tmp->name);
 391                 tmp = tmp->next;
 392         }
 393         return len;
 394 }
 395
 396 struct file_system_type *get_fs_type(const char *name)
 397 {
 398         struct file_system_type * fs = file_systems;
 399
 400         if (!name)
 401                 return fs;
 402         for (fs = file_systems; fs && strcmp(fs->name, name); fs = fs->next)
 403                 ;
 404 #ifdef CONFIG_KMOD
 405         if (!fs && (request_module(name) == 0)) {
 406                 for (fs = file_systems; fs && strcmp(fs->name, name); fs = fs->next)
 407                         ;
 408         }
 409 #endif
 410
 411         return fs;
 412 }
 413
 414 void __wait_on_super(struct super_block * sb)
 415 {
 416         DECLARE_WAITQUEUE(wait, current);
 417
 418         add_wait_queue(&sb->s_wait, &wait);
 419 repeat:
 420         current->state = TASK_UNINTERRUPTIBLE;
 421         if (sb->s_lock) {
 422                 schedule();
 423                 goto repeat;
 424         }
 425         remove_wait_queue(&sb->s_wait, &wait);
 426         current->state = TASK_RUNNING;
 427 }
 428
 429 /*
 430  * Note: check the dirty flag before waiting, so we don't
 431  * hold up the sync while mounting a device. (The newly
 432  * mounted device won't need syncing.)
 433  */
 434 void sync_supers(kdev_t dev)
 435 {
 436         struct super_block * sb;
 437
 438         for (sb = sb_entry(super_blocks.next);
 439              sb != sb_entry(&super_blocks);
 440              sb = sb_entry(sb->s_list.next)) {
 441                 if (!sb->s_dev)
 442                         continue;
 443                 if (dev && sb->s_dev != dev)
 444                         continue;
 445                 if (!sb->s_dirt)
 446                         continue;
 447                 /* N.B. Should lock the superblock while writing */
 448                 wait_on_super(sb);
 449                 if (!sb->s_dev || !sb->s_dirt)
 450                         continue;
 451                 if (dev && (dev != sb->s_dev))
 452                         continue;
 453                 if (sb->s_op && sb->s_op->write_super)
 454                         sb->s_op->write_super(sb);
 455         }
 456 }
 457
 458 struct super_block * get_super(kdev_t dev)
 459 {
 460         struct super_block * s;
 461
 462         if (!dev)
 463                 return NULL;
 464 restart:
 465         s = sb_entry(super_blocks.next);
 466         while (s != sb_entry(&super_blocks))
 467                 if (s->s_dev == dev) {
 468                         wait_on_super(s);
 469                         if (s->s_dev == dev)
 470                                 return s;
 471                         goto restart;
 472                 } else
 473                         s = sb_entry(s->s_list.next);
 474         return NULL;
 475 }
 476
 477 asmlinkage int sys_ustat(dev_t dev, struct ustat * ubuf)
 478 {
 479         struct super_block *s;
 480         struct ustat tmp;
 481         struct statfs sbuf;
 482         mm_segment_t old_fs;
 483         int err = -EINVAL;
 484
 485         lock_kernel();
 486         s = get_super(to_kdev_t(dev));
 487         if (s == NULL)
 488                 goto out;
 489         err = -ENOSYS;
 490         if (!(s->s_op->statfs))
 491                 goto out;
 492
 493         old_fs = get_fs();
 494         set_fs(get_ds());
 495         s->s_op->statfs(s,&sbuf,sizeof(struct statfs));
 496         set_fs(old_fs);
 497
 498         memset(&tmp,0,sizeof(struct ustat));
 499         tmp.f_tfree = sbuf.f_bfree;
 500         tmp.f_tinode = sbuf.f_ffree;
 501
 502         err = copy_to_user(ubuf,&tmp,sizeof(struct ustat)) ? -EFAULT : 0;
 503 out:
 504         unlock_kernel();
 505         return err;
 506 }
 507
 508 /*
 509  * Find a super_block with no device assigned.
 510  */
 511 static struct super_block *get_empty_super(void)
 512 {
 513         struct super_block *s;
 514
 515         for (s  = sb_entry(super_blocks.next);
 516              s != sb_entry(&super_blocks);
 517              s  = sb_entry(s->s_list.next)) {
 518                 if (s->s_dev)
 519                         continue;
 520                 if (!s->s_lock)
 521                         return s;
 522                 printk("VFS: empty superblock %p locked!\n", s);
 523         }
 524         /* Need a new one... */
 525         if (nr_super_blocks >= max_super_blocks)
 526                 return NULL;
 527         s = kmalloc(sizeof(struct super_block),  GFP_USER);
 528         if (s) {
 529                 nr_super_blocks++;
 530                 memset(s, 0, sizeof(struct super_block));
 531                 INIT_LIST_HEAD(&s->s_dirty);
 532                 list_add (&s->s_list, super_blocks.prev);
 533                 init_waitqueue_head(&s->s_wait);
 534                 INIT_LIST_HEAD(&s->s_files);
 535         }
 536         return s;
 537 }
 538
 539 static struct super_block * read_super(kdev_t dev,const char *name,int flags,
 540                                        void *data, int silent)
 541 {
 542         struct super_block * s;
 543         struct file_system_type *type;
 544
 545         if (!dev)
 546                 goto out_null;
 547         check_disk_change(dev);
 548         s = get_super(dev);
 549         if (s)
 550                 goto out;
 551
 552         type = get_fs_type(name);
 553         if (!type) {
 554                 printk("VFS: on device %s: get_fs_type(%s) failed\n",
 555                        kdevname(dev), name);
 556                 goto out;
 557         }
 558         s = get_empty_super();
 559         if (!s)
 560                 goto out;
 561         s->s_dev = dev;
 562         s->s_flags = flags;
 563         s->s_dirt = 0;
 564         sema_init(&s->s_vfs_rename_sem,1);
 565         /* N.B. Should lock superblock now ... */
 566         if (!type->read_super(s, data, silent))
 567                 goto out_fail;
 568         s->s_dev = dev; /* N.B. why do this again?? */
 569         s->s_rd_only = 0;
 570         s->s_type = type;
 571 out:
 572         return s;
 573
 574         /* N.B. s_dev should be cleared in type->read_super */
 575 out_fail:
 576         s->s_dev = 0;
 577 out_null:
 578         s = NULL;
 579         goto out;
 580 }
 581
 582 /*
 583  * Unnamed block devices are dummy devices used by virtual
 584  * filesystems which don't use real block-devices.  -- jrs
 585  */
 586
 587 static unsigned int unnamed_dev_in_use[256/(8*sizeof(unsigned int))] = { 0, };
 588
 589 kdev_t get_unnamed_dev(void)
 590 {
 591         int i;
 592
 593         for (i = 1; i < 256; i++) {
 594                 if (!test_and_set_bit(i,unnamed_dev_in_use))
 595                         return MKDEV(UNNAMED_MAJOR, i);
 596         }
 597         return 0;
 598 }
 599
 600 void put_unnamed_dev(kdev_t dev)
 601 {
 602         if (!dev || MAJOR(dev) != UNNAMED_MAJOR)
 603                 return;
 604         if (test_and_clear_bit(MINOR(dev), unnamed_dev_in_use))
 605                 return;
 606         printk("VFS: put_unnamed_dev: freeing unused device %s\n",
 607                         kdevname(dev));
 608 }
 609
 610 static int d_umount(struct super_block * sb)
 611 {
 612         struct dentry * root = sb->s_root;
 613         struct dentry * covered = root->d_covers;
 614
 615         if (root->d_count != 1)
 616                 return -EBUSY;
 617
 618         if (root->d_inode->i_state)
 619                 return -EBUSY;
 620
 621         sb->s_root = NULL;
 622
 623         if (covered != root) {
 624                 root->d_covers = root;
 625                 covered->d_mounts = covered;
 626                 dput(covered);
 627         }
 628         dput(root);
 629         return 0;
 630 }
 631
 632 static void d_mount(struct dentry *covered, struct dentry *dentry)
 633 {
 634         if (covered->d_mounts != covered) {
 635                 printk("VFS: mount - already mounted\n");
 636                 return;
 637         }
 638         covered->d_mounts = dentry;
 639         dentry->d_covers = covered;
 640 }
 641
 642 static int do_umount(kdev_t dev, int unmount_root, int flags)
 643 {
 644         struct super_block * sb;
 645         int retval;
 646
 647         retval = -ENOENT;
 648         sb = get_super(dev);
 649         if (!sb || !sb->s_root)
 650                 goto out;
 651
 652         /*
 653          * Before checking whether the filesystem is still busy,
 654          * make sure the kernel doesn't hold any quota files open
 655          * on the device. If the umount fails, too bad -- there
 656          * are no quotas running any more. Just turn them on again.
 657          */
 658         DQUOT_OFF(dev);
 659         acct_auto_close(dev);
 660
 661         /*
 662          * If we may have to abort operations to get out of this
 663          * mount, and they will themselves hold resources we must
 664          * allow the fs to do things. In the Unix tradition of
 665          * 'Gee thats tricky lets do it in userspace' the umount_begin
 666          * might fail to complete on the first run through as other tasks
 667          * must return, and the like. Thats for the mount program to worry
 668          * about for the moment.
 669          */
 670
 671         if( (flags&MNT_FORCE) && sb->s_op->umount_begin)
 672                 sb->s_op->umount_begin(sb);
 673
 674         /*
 675          * Shrink dcache, then fsync. This guarantees that if the
 676          * filesystem is quiescent at this point, then (a) only the
 677          * root entry should be in use and (b) that root entry is
 678          * clean.
 679          */
 680         shrink_dcache_sb(sb);
 681         fsync_dev(dev);
 682
 683         if (dev==ROOT_DEV && !unmount_root) {
 684                 /*
 685                  * Special case for "unmounting" root ...
 686                  * we just try to remount it readonly.
 687                  */
 688                 retval = 0;
 689                 if (!(sb->s_flags & MS_RDONLY))
 690                         retval = do_remount_sb(sb, MS_RDONLY, 0);
 691                 return retval;
 692         }
 693
 694         retval = d_umount(sb);
 695         if (retval)
 696                 goto out;
 697
 698         if (sb->s_op) {
 699                 if (sb->s_op->write_super && sb->s_dirt)
 700                         sb->s_op->write_super(sb);
 701         }
 702
 703         lock_super(sb);
 704         if (sb->s_op) {
 705                 if (sb->s_op->put_super)
 706                         sb->s_op->put_super(sb);
 707         }
 708
 709         /* Forget any remaining inodes */
 710         if (invalidate_inodes(sb)) {
 711                 printk("VFS: Busy inodes after unmount. "
 712                         "Self-destruct in 5 seconds.  Have a nice day...\n");
 713         }
 714
 715         sb->s_dev = 0;          /* Free the superblock */
 716         unlock_super(sb);
 717
 718         remove_vfsmnt(dev);
 719 out:
 720         return retval;
 721 }
 722
 723 static int umount_dev(kdev_t dev, int flags)
 724 {
 725         int retval;
 726         struct inode * inode = get_empty_inode();
 727
 728         retval = -ENOMEM;
 729         if (!inode)
 730                 goto out;
 731
 732         inode->i_rdev = dev;
 733         retval = -ENXIO;
 734         if (MAJOR(dev) >= MAX_BLKDEV)
 735                 goto out_iput;
 736
 737         fsync_dev(dev);
 738
 739         down(&mount_sem);
 740
 741         retval = do_umount(dev, 0, flags);
 742         if (!retval) {
 743                 fsync_dev(dev);
 744                 if (dev != ROOT_DEV) {
 745                         blkdev_release(inode);
 746                         put_unnamed_dev(dev);
 747                 }
 748         }
 749
 750         up(&mount_sem);
 751 out_iput:
 752         iput(inode);
 753 out:
 754         return retval;
 755 }
 756
 757 /*
 758  * Now umount can handle mount points as well as block devices.
 759  * This is important for filesystems which use unnamed block devices.
 760  *
 761  * There is a little kludge here with the dummy_inode.  The current
 762  * vfs release functions only use the r_dev field in the inode so
 763  * we give them the info they need without using a real inode.
 764  * If any other fields are ever needed by any block device release
 765  * functions, they should be faked here.  -- jrs
 766  *
 767  * We now support a flag for forced unmount like the other 'big iron'
 768  * unixes. Our API is identical to OSF/1 to avoid making a mess of AMD
 769  */
 770
 771 asmlinkage int sys_umount(char * name, int flags)
 772 {
 773         struct dentry * dentry;
 774         int retval;
 775
 776         if (!capable(CAP_SYS_ADMIN))
 777                 return -EPERM;
 778
 779         lock_kernel();
 780         dentry = namei(name);
 781         retval = PTR_ERR(dentry);
 782         if (!IS_ERR(dentry)) {
 783                 struct inode * inode = dentry->d_inode;
 784                 kdev_t dev = inode->i_rdev;
 785
 786                 retval = 0;
 787                 if (S_ISBLK(inode->i_mode)) {
 788                         if (IS_NODEV(inode))
 789                                 retval = -EACCES;
 790                 } else {
 791                         struct super_block *sb = inode->i_sb;
 792                         retval = -EINVAL;
 793                         if (sb && inode == sb->s_root->d_inode) {
 794                                 dev = sb->s_dev;
 795                                 retval = 0;
 796                         }
 797                 }
 798                 dput(dentry);
 799
 800                 if (!retval)
 801                         retval = umount_dev(dev, flags);
 802         }
 803         unlock_kernel();
 804         return retval;
 805 }
 806
 807 /*
 808  *      The 2.0 compatible umount. No flags.
 809  */
 810
 811 asmlinkage int sys_oldumount(char * name)
 812 {
 813         return sys_umount(name,0);
 814 }
 815
 816 /*
 817  * Check whether we can mount the specified device.
 818  */
 819 int fs_may_mount(kdev_t dev)
 820 {
 821         struct super_block * sb = get_super(dev);
 822         int busy;
 823
 824         busy = sb && sb->s_root &&
 825                (sb->s_root->d_count != 1 || sb->s_root->d_covers != sb->s_root);
 826         return !busy;
 827 }
 828
 829 /*
 830  * do_mount() does the actual mounting after sys_mount has done the ugly
 831  * parameter parsing. When enough time has gone by, and everything uses the
 832  * new mount() parameters, sys_mount() can then be cleaned up.
 833  *
 834  * We cannot mount a filesystem if it has active, used, or dirty inodes.
 835  * We also have to flush all inode-data for this device, as the new mount
 836  * might need new info.
 837  *
 838  * [21-Mar-97] T.Schoebel-Theuer: Now this can be overridden when
 839  * supplying a leading "!" before the dir_name, allowing "stacks" of
 840  * mounted filesystems. The stacking will only influence any pathname lookups
 841  * _after_ the mount, but open file descriptors or working directories that
 842  * are now covered remain valid. For example, when you overmount /home, any
 843  * process with old cwd /home/joe will continue to use the old versions,
 844  * as long as relative paths are used, but absolute paths like /home/joe/xxx
 845  * will go to the new "top of stack" version. In general, crossing a
 846  * mount point will always go to the top of stack element.
 847  * Anyone using this new feature must know what he/she is doing.
 848  */
 849
 850 int do_mount(kdev_t dev, const char * dev_name, const char * dir_name,
 851              const char * type, int flags, void * data)
 852 {
 853         struct dentry * dir_d;
 854         struct super_block * sb;
 855         struct vfsmount *vfsmnt;
 856         int error;
 857
 858         error = -EACCES;
 859         if (!(flags & MS_RDONLY) && dev && is_read_only(dev))
 860                 goto out;
 861
 862         /*
 863          * Do the lookup first to force automounting.
 864          */
 865         dir_d = namei(dir_name);
 866         error = PTR_ERR(dir_d);
 867         if (IS_ERR(dir_d))
 868                 goto out;
 869
 870         down(&mount_sem);
 871         error = -ENOTDIR;
 872         if (!S_ISDIR(dir_d->d_inode->i_mode))
 873                 goto dput_and_out;
 874
 875         error = -EBUSY;
 876         if (dir_d->d_covers != dir_d)
 877                 goto dput_and_out;
 878
 879         /*
 880          * Note: If the superblock already exists,
 881          * read_super just does a get_super().
 882          */
 883         error = -EINVAL;
 884         sb = read_super(dev, type, flags, data, 0);
 885         if (!sb)
 886                 goto dput_and_out;
 887
 888         /*
 889          * We may have slept while reading the super block,
 890          * so we check afterwards whether it's safe to mount.
 891          */
 892         error = -EBUSY;
 893         if (!fs_may_mount(dev))
 894                 goto dput_and_out;
 895
 896         error = -ENOMEM;
 897         vfsmnt = add_vfsmnt(sb, dev_name, dir_name);
 898         if (vfsmnt) {
 899                 d_mount(dget(dir_d), sb->s_root);
 900                 error = 0;
 901         }
 902
 903 dput_and_out:
 904         dput(dir_d);
 905         up(&mount_sem);
 906 out:
 907         return error;
 908 }
 909
 910
 911 /*
 912  * Alters the mount flags of a mounted file system. Only the mount point
 913  * is used as a reference - file system type and the device are ignored.
 914  * FS-specific mount options can't be altered by remounting.
 915  */
 916
 917 static int do_remount_sb(struct super_block *sb, int flags, char *data)
 918 {
 919         int retval;
 920         struct vfsmount *vfsmnt;
 921
 922         if (!(flags & MS_RDONLY) && sb->s_dev && is_read_only(sb->s_dev))
 923                 return -EACCES;
 924                 /*flags |= MS_RDONLY;*/
 925         /* If we are remounting RDONLY, make sure there are no rw files open */
 926         if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY))
 927                 if (!fs_may_remount_ro(sb))
 928                         return -EBUSY;
 929         if (sb->s_op && sb->s_op->remount_fs) {
 930                 retval = sb->s_op->remount_fs(sb, &flags, data);
 931                 if (retval)
 932                         return retval;
 933         }
 934         sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
 935         vfsmnt = lookup_vfsmnt(sb->s_dev);
 936         if (vfsmnt)
 937                 vfsmnt->mnt_flags = sb->s_flags;
 938
 939         /*
 940          * Invalidate the inodes, as some mount options may be changed.
 941          * N.B. If we are changing media, we should check the return
 942          * from invalidate_inodes ... can't allow _any_ open files.
 943          */
 944         invalidate_inodes(sb);
 945
 946         return 0;
 947 }
 948
 949 static int do_remount(const char *dir,int flags,char *data)
 950 {
 951         struct dentry *dentry;
 952         int retval;
 953
 954         dentry = namei(dir);
 955         retval = PTR_ERR(dentry);
 956         if (!IS_ERR(dentry)) {
 957                 struct super_block * sb = dentry->d_inode->i_sb;
 958
 959                 retval = -ENODEV;
 960                 if (sb) {
 961                         retval = -EINVAL;
 962                         if (dentry == sb->s_root) {
 963                                 /*
 964                                  * Shrink the dcache and sync the device.
 965                                  */
 966                                 shrink_dcache_sb(sb);
 967                                 fsync_dev(sb->s_dev);
 968                                 if (flags & MS_RDONLY)
 969                                         acct_auto_close(sb->s_dev);
 970                                 retval = do_remount_sb(sb, flags, data);
 971                         }
 972                 }
 973                 dput(dentry);
 974         }
 975         return retval;
 976 }
 977
 978 static int copy_mount_options (const void * data, unsigned long *where)
 979 {
 980         int i;
 981         unsigned long page;
 982         struct vm_area_struct * vma;
 983
 984         *where = 0;
 985         if (!data)
 986                 return 0;
 987
 988         vma = find_vma(current->mm, (unsigned long) data);
 989         if (!vma || (unsigned long) data < vma->vm_start)
 990                 return -EFAULT;
 991         if (!(vma->vm_flags & VM_READ))
 992                 return -EFAULT;
 993         i = vma->vm_end - (unsigned long) data;
 994         if (PAGE_SIZE <= (unsigned long) i)
 995                 i = PAGE_SIZE-1;
 996         if (!(page = __get_free_page(GFP_KERNEL))) {
 997                 return -ENOMEM;
 998         }
 999         if (copy_from_user((void *) page,data,i)) {
1000                 free_page(page);
1001                 return -EFAULT;
1002         }
1003         *where = page;
1004         return 0;
1005 }
1006
1007 /*
1008  * Flags is a 16-bit value that allows up to 16 non-fs dependent flags to
1009  * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
1010  *
1011  * data is a (void *) that can point to any structure up to
1012  * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
1013  * information (or be NULL).
1014  *
1015  * NOTE! As old versions of mount() didn't use this setup, the flags
1016  * have to have a special 16-bit magic number in the high word:
1017  * 0xC0ED. If this magic word isn't present, the flags and data info
1018  * aren't used, as the syscall assumes we are talking to an older
1019  * version that didn't understand them.
1020  */
1021 asmlinkage int sys_mount(char * dev_name, char * dir_name, char * type,
1022         unsigned long new_flags, void * data)
1023 {
1024         struct file_system_type * fstype;
1025         struct dentry * dentry = NULL;
1026         struct inode * inode = NULL;
1027         kdev_t dev;
1028         int retval = -EPERM;
1029         unsigned long flags = 0;
1030         unsigned long page = 0;
1031         struct file dummy;      /* allows read-write or read-only flag */
1032
1033         lock_kernel();
1034         if (!capable(CAP_SYS_ADMIN))
1035                 goto out;
1036         if ((new_flags &
1037              (MS_MGC_MSK | MS_REMOUNT)) == (MS_MGC_VAL | MS_REMOUNT)) {
1038                 retval = copy_mount_options (data, &page);
1039                 if (retval < 0)
1040                         goto out;
1041                 retval = do_remount(dir_name,
1042                                     new_flags & ~MS_MGC_MSK & ~MS_REMOUNT,
1043                                     (char *) page);
1044                 free_page(page);
1045                 goto out;
1046         }
1047
1048         retval = copy_mount_options (type, &page);
1049         if (retval < 0)
1050                 goto out;
1051         fstype = get_fs_type((char *) page);
1052         free_page(page);
1053         retval = -ENODEV;
1054         if (!fstype)
1055                 goto out;
1056
1057         memset(&dummy, 0, sizeof(dummy));
1058         if (fstype->fs_flags & FS_REQUIRES_DEV) {
1059                 dentry = namei(dev_name);
1060                 retval = PTR_ERR(dentry);
1061                 if (IS_ERR(dentry))
1062                         goto out;
1063
1064                 inode = dentry->d_inode;
1065                 retval = -ENOTBLK;
1066                 if (!S_ISBLK(inode->i_mode))
1067                         goto dput_and_out;
1068
1069                 retval = -EACCES;
1070                 if (IS_NODEV(inode))
1071                         goto dput_and_out;
1072
1073                 dev = inode->i_rdev;
1074                 retval = -ENXIO;
1075                 if (MAJOR(dev) >= MAX_BLKDEV)
1076                         goto dput_and_out;
1077
1078                 retval = -ENOTBLK;
1079                 dummy.f_op = get_blkfops(MAJOR(dev));
1080                 if (!dummy.f_op)
1081                         goto dput_and_out;
1082
1083                 if (dummy.f_op->open) {
1084                         dummy.f_dentry = dentry;
1085                         dummy.f_mode = (new_flags & MS_RDONLY) ? 1 : 3;
1086                         retval = dummy.f_op->open(inode, &dummy);
1087                         if (retval)
1088                                 goto dput_and_out;
1089                 }
1090
1091         } else {
1092                 retval = -EMFILE;
1093                 if (!(dev = get_unnamed_dev()))
1094                         goto out;
1095         }
1096
1097         page = 0;
1098         if ((new_flags & MS_MGC_MSK) == MS_MGC_VAL) {
1099                 flags = new_flags & ~MS_MGC_MSK;
1100                 retval = copy_mount_options(data, &page);
1101                 if (retval < 0)
1102                         goto clean_up;
1103         }
1104         retval = do_mount(dev, dev_name, dir_name, fstype->name, flags,
1105                                 (void *) page);
1106         free_page(page);
1107         if (retval)
1108                 goto clean_up;
1109
1110 dput_and_out:
1111         dput(dentry);
1112 out:
1113         unlock_kernel();
1114         return retval;
1115
1116 clean_up:
1117         if (dummy.f_op) {
1118                 if (dummy.f_op->release)
1119                         dummy.f_op->release(inode, NULL);
1120         } else
1121                 put_unnamed_dev(dev);
1122         goto dput_and_out;
1123 }
1124
1125 void __init mount_root(void)
1126 {
1127         struct file_system_type * fs_type;
1128         struct super_block * sb;
1129         struct vfsmount *vfsmnt;
1130         struct inode * d_inode = NULL;
1131         struct file filp;
1132         int retval;
1133
1134 #ifdef CONFIG_ROOT_NFS
1135         if (MAJOR(ROOT_DEV) == UNNAMED_MAJOR) {
1136                 ROOT_DEV = 0;
1137                 if ((fs_type = get_fs_type("nfs"))) {
1138                         sb = get_empty_super(); /* "can't fail" */
1139                         sb->s_dev = get_unnamed_dev();
1140                         sb->s_flags = root_mountflags;
1141                         sema_init(&sb->s_vfs_rename_sem,1);
1142                         vfsmnt = add_vfsmnt(sb, "/dev/root", "/");
1143                         if (vfsmnt) {
1144                                 if (nfs_root_mount(sb) >= 0) {
1145                                         sb->s_dirt = 0;
1146                                         sb->s_type = fs_type;
1147                                         current->fs->root = dget(sb->s_root);
1148                                         current->fs->pwd = dget(sb->s_root);
1149                                         ROOT_DEV = sb->s_dev;
1150                                         printk (KERN_NOTICE "VFS: Mounted root (NFS filesystem)%s.\n", (sb->s_flags & MS_RDONLY) ? " readonly" : "");
1151                                         return;
1152                                 }
1153                                 remove_vfsmnt(sb->s_dev);
1154                         }
1155                         put_unnamed_dev(sb->s_dev);
1156                         sb->s_dev = 0;
1157                 }
1158                 if (!ROOT_DEV) {
1159                         printk(KERN_ERR "VFS: Unable to mount root fs via NFS, trying floppy.\n");
1160                         ROOT_DEV = MKDEV(FLOPPY_MAJOR, 0);
1161                 }
1162         }
1163 #endif
1164
1165 #ifdef CONFIG_BLK_DEV_FD
1166         if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) {
1167 #ifdef CONFIG_BLK_DEV_RAM
1168                 extern int rd_doload;
1169 #endif
1170                 floppy_eject();
1171 #ifndef CONFIG_BLK_DEV_RAM
1172                 printk(KERN_NOTICE "(Warning, this kernel has no ramdisk support)\n");
1173 #else
1174                 /* rd_doload is 2 for a dual initrd/ramload setup */
1175                 if(rd_doload==2)
1176                         rd_load_secondary();
1177                 else
1178 #endif
1179                 {
1180                         printk(KERN_NOTICE "VFS: Insert root floppy and press ENTER\n");
1181                         wait_for_keypress();
1182                 }
1183         }
1184 #endif
1185
1186         memset(&filp, 0, sizeof(filp));
1187         d_inode = get_empty_inode();
1188         d_inode->i_rdev = ROOT_DEV;
1189         filp.f_dentry = NULL;
1190         if ( root_mountflags & MS_RDONLY)
1191                 filp.f_mode = 1; /* read only */
1192         else
1193                 filp.f_mode = 3; /* read write */
1194         retval = blkdev_open(d_inode, &filp);
1195         if (retval == -EROFS) {
1196                 root_mountflags |= MS_RDONLY;
1197                 filp.f_mode = 1;
1198                 retval = blkdev_open(d_inode, &filp);
1199         }
1200         iput(d_inode);
1201         if (retval)
1202                 /*
1203                  * Allow the user to distinguish between failed open
1204                  * and bad superblock on root device.
1205                  */
1206                 printk("VFS: Cannot open root device %s\n",
1207                        kdevname(ROOT_DEV));
1208         else for (fs_type = file_systems ; fs_type ; fs_type = fs_type->next) {
1209                 if (!(fs_type->fs_flags & FS_REQUIRES_DEV))
1210                         continue;
1211                 sb = read_super(ROOT_DEV,fs_type->name,root_mountflags,NULL,1);
1212                 if (sb) {
1213                         sb->s_flags = root_mountflags;
1214                         current->fs->root = dget(sb->s_root);
1215                         current->fs->pwd = dget(sb->s_root);
1216                         printk ("VFS: Mounted root (%s filesystem)%s.\n",
1217                                 fs_type->name,
1218                                 (sb->s_flags & MS_RDONLY) ? " readonly" : "");
1219                         vfsmnt = add_vfsmnt(sb, "/dev/root", "/");
1220                         if (vfsmnt)
1221                                 return;
1222                         panic("VFS: add_vfsmnt failed for root fs");
1223                 }
1224         }
1225         panic("VFS: Unable to mount root fs on %s",
1226                 kdevname(ROOT_DEV));
1227 }
1228
1229
1230 #ifdef CONFIG_BLK_DEV_INITRD
1231
1232 int __init change_root(kdev_t new_root_dev,const char *put_old)
1233 {
1234         kdev_t old_root_dev;
1235         struct vfsmount *vfsmnt;
1236         struct dentry *old_root,*old_pwd,*dir_d = NULL;
1237         int error;
1238
1239         old_root = current->fs->root;
1240         old_pwd = current->fs->pwd;
1241         old_root_dev = ROOT_DEV;
1242         if (!fs_may_mount(new_root_dev)) {
1243                 printk(KERN_CRIT "New root is busy. Staying in initrd.\n");
1244                 return -EBUSY;
1245         }
1246         ROOT_DEV = new_root_dev;
1247         mount_root();
1248         dput(old_root);
1249         dput(old_pwd);
1250 #if 1
1251         shrink_dcache();
1252         printk("change_root: old root has d_count=%d\n", old_root->d_count);
1253 #endif
1254         /*
1255          * Get the new mount directory
1256          */
1257         dir_d = lookup_dentry(put_old, NULL, 1);
1258         if (IS_ERR(dir_d)) {
1259                 error = PTR_ERR(dir_d);
1260         } else if (!dir_d->d_inode) {
1261                 dput(dir_d);
1262                 error = -ENOENT;
1263         } else {
1264                 error = 0;
1265         }
1266         if (!error && dir_d->d_covers != dir_d) {
1267                 dput(dir_d);
1268                 error = -EBUSY;
1269         }
1270         if (!error && !S_ISDIR(dir_d->d_inode->i_mode)) {
1271                 dput(dir_d);
1272                 error = -ENOTDIR;
1273         }
1274         if (error) {
1275                 int umount_error;
1276
1277                 printk(KERN_NOTICE "Trying to unmount old root ... ");
1278                 umount_error = do_umount(old_root_dev,1, 0);
1279                 if (!umount_error) {
1280                         printk("okay\n");
1281                         invalidate_buffers(old_root_dev);
1282                         return 0;
1283                 }
1284                 printk(KERN_ERR "error %d\n",umount_error);
1285                 return error;
1286         }
1287         remove_vfsmnt(old_root_dev);
1288         vfsmnt = add_vfsmnt(old_root->d_sb, "/dev/root.old", put_old);
1289         if (vfsmnt) {
1290                 d_mount(dir_d,old_root);
1291                 return 0;
1292         }
1293         printk(KERN_CRIT "Trouble: add_vfsmnt failed\n");
1294         return -ENOMEM;
1295 }
1296
1297 #endif