fs/super.c

   1 /*
   2  *  linux/fs/super.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  *
   6  *  super.c contains code to handle: - mount structures
   7  *                                   - super-block tables.
   8  *                                   - mount systemcall
   9  *                                   - umount systemcall
  10  *
  11  *  Added options to /proc/mounts
  12  *  Torbjörn Lindh (torbjorn.lindh@gopta.se), April 14, 1996.
  13  *
  14  * GK 2/5/95  -  Changed to support mounting the root fs via NFS
  15  *
  16  *  Added kerneld support: Jacques Gelinas and Bjorn Ekwall
  17  *  Added change_root: Werner Almesberger & Hans Lermen, Feb '96
  18  */
  19
  20 #include <linux/config.h>
  21 #include <linux/sched.h>
  22 #include <linux/kernel.h>
  23 #include <linux/mount.h>
  24 #include <linux/malloc.h>
  25 #include <linux/major.h>
  26 #include <linux/stat.h>
  27 #include <linux/errno.h>
  28 #include <linux/string.h>
  29 #include <linux/locks.h>
  30 #include <linux/mm.h>
  31 #include <linux/smp.h>
  32 #include <linux/smp_lock.h>
  33 #include <linux/fd.h>
  34 #include <linux/init.h>
  35
  36 #include <asm/system.h>
  37 #include <asm/uaccess.h>
  38 #include <asm/bitops.h>
  39
  40 #ifdef CONFIG_KERNELD
  41 #include <linux/kerneld.h>
  42 #endif
  43
  44 #include <linux/nfs_fs.h>
  45 #include <linux/nfs_fs_sb.h>
  46 #include <linux/nfs_mount.h>
  47
  48 /*
  49  * We use a semaphore to synchronize all mount/umount
  50  * activity - imagine the mess if we have a race between
  51  * unmounting a filesystem and re-mounting it (or something
  52  * else).
  53  */
  54 static struct semaphore mount_sem = MUTEX;
  55
  56 extern void wait_for_keypress(void);
  57 extern struct file_operations * get_blkfops(unsigned int major);
  58
  59 extern int root_mountflags;
  60
  61 static int do_remount_sb(struct super_block *sb, int flags, char * data);
  62
  63 /* this is initialized in init/main.c */
  64 kdev_t ROOT_DEV;
  65
  66 struct super_block super_blocks[NR_SUPER];
  67 static struct file_system_type *file_systems = (struct file_system_type *) NULL;
  68 struct vfsmount *vfsmntlist = (struct vfsmount *) NULL;
  69 static struct vfsmount *vfsmnttail = (struct vfsmount *) NULL,
  70                        *mru_vfsmnt = (struct vfsmount *) NULL;
  71
  72 /*
  73  * This part handles the management of the list of mounted filesystems.
  74  */
  75 struct vfsmount *lookup_vfsmnt(kdev_t dev)
  76 {
  77         struct vfsmount *lptr;
  78
  79         if (vfsmntlist == (struct vfsmount *)NULL)
  80                 return ((struct vfsmount *)NULL);
  81
  82         if (mru_vfsmnt != (struct vfsmount *)NULL &&
  83             mru_vfsmnt->mnt_dev == dev)
  84                 return (mru_vfsmnt);
  85
  86         for (lptr = vfsmntlist;
  87              lptr != (struct vfsmount *)NULL;
  88              lptr = lptr->mnt_next)
  89                 if (lptr->mnt_dev == dev) {
  90                         mru_vfsmnt = lptr;
  91                         return (lptr);
  92                 }
  93
  94         return ((struct vfsmount *)NULL);
  95         /* NOTREACHED */
  96 }
  97
  98 static struct vfsmount *add_vfsmnt(struct super_block *sb,
  99                         const char *dev_name, const char *dir_name)
 100 {
 101         struct vfsmount *lptr;
 102         char *tmp, *name;
 103
 104         lptr = (struct vfsmount *)kmalloc(sizeof(struct vfsmount), GFP_KERNEL);
 105         if (!lptr)
 106                 goto out;
 107         memset(lptr, 0, sizeof(struct vfsmount));
 108
 109         lptr->mnt_sb = sb;
 110         lptr->mnt_dev = sb->s_dev;
 111         lptr->mnt_flags = sb->s_flags;
 112         sema_init(&lptr->mnt_sem, 1);
 113
 114         /* N.B. Is it really OK to have a vfsmount without names? */
 115         if (dev_name && !IS_ERR(tmp = getname(dev_name))) {
 116                 name = (char *) kmalloc(strlen(tmp)+1, GFP_KERNEL);
 117                 if (name) {
 118                         strcpy(name, tmp);
 119                         lptr->mnt_devname = name;
 120                 }
 121                 putname(tmp);
 122         }
 123         if (dir_name && !IS_ERR(tmp = getname(dir_name))) {
 124                 name = (char *) kmalloc(strlen(tmp)+1, GFP_KERNEL);
 125                 if (name) {
 126                         strcpy(name, tmp);
 127                         lptr->mnt_dirname = name;
 128                 }
 129                 putname(tmp);
 130         }
 131
 132         if (vfsmntlist == (struct vfsmount *)NULL) {
 133                 vfsmntlist = vfsmnttail = lptr;
 134         } else {
 135                 vfsmnttail->mnt_next = lptr;
 136                 vfsmnttail = lptr;
 137         }
 138 out:
 139         return lptr;
 140 }
 141
 142 static void remove_vfsmnt(kdev_t dev)
 143 {
 144         struct vfsmount *lptr, *tofree;
 145
 146         if (vfsmntlist == (struct vfsmount *)NULL)
 147                 return;
 148         lptr = vfsmntlist;
 149         if (lptr->mnt_dev == dev) {
 150                 tofree = lptr;
 151                 vfsmntlist = lptr->mnt_next;
 152                 if (vfsmnttail->mnt_dev == dev)
 153                         vfsmnttail = vfsmntlist;
 154         } else {
 155                 while (lptr->mnt_next != (struct vfsmount *)NULL) {
 156                         if (lptr->mnt_next->mnt_dev == dev)
 157                                 break;
 158                         lptr = lptr->mnt_next;
 159                 }
 160                 tofree = lptr->mnt_next;
 161                 if (tofree == (struct vfsmount *)NULL)
 162                         return;
 163                 lptr->mnt_next = lptr->mnt_next->mnt_next;
 164                 if (vfsmnttail->mnt_dev == dev)
 165                         vfsmnttail = lptr;
 166         }
 167         if (tofree == mru_vfsmnt)
 168                 mru_vfsmnt = NULL;
 169         kfree(tofree->mnt_devname);
 170         kfree(tofree->mnt_dirname);
 171         kfree_s(tofree, sizeof(struct vfsmount));
 172 }
 173
 174 int register_filesystem(struct file_system_type * fs)
 175 {
 176         struct file_system_type ** tmp;
 177
 178         if (!fs)
 179                 return -EINVAL;
 180         if (fs->next)
 181                 return -EBUSY;
 182         tmp = &file_systems;
 183         while (*tmp) {
 184                 if (strcmp((*tmp)->name, fs->name) == 0)
 185                         return -EBUSY;
 186                 tmp = &(*tmp)->next;
 187         }
 188         *tmp = fs;
 189         return 0;
 190 }
 191
 192 #ifdef CONFIG_MODULES
 193 int unregister_filesystem(struct file_system_type * fs)
 194 {
 195         struct file_system_type ** tmp;
 196
 197         tmp = &file_systems;
 198         while (*tmp) {
 199                 if (fs == *tmp) {
 200                         *tmp = fs->next;
 201                         fs->next = NULL;
 202                         return 0;
 203                 }
 204                 tmp = &(*tmp)->next;
 205         }
 206         return -EINVAL;
 207 }
 208 #endif
 209
 210 static int fs_index(const char * __name)
 211 {
 212         struct file_system_type * tmp;
 213         char * name;
 214         int err, index;
 215
 216         name = getname(__name);
 217         err = PTR_ERR(name);
 218         if (IS_ERR(name))
 219                 return err;
 220
 221         index = 0;
 222         for (tmp = file_systems ; tmp ; tmp = tmp->next) {
 223                 if (strcmp(tmp->name, name) == 0) {
 224                         putname(name);
 225                         return index;
 226                 }
 227                 index++;
 228         }
 229         putname(name);
 230         return -EINVAL;
 231 }
 232
 233 static int fs_name(unsigned int index, char * buf)
 234 {
 235         struct file_system_type * tmp;
 236         int len;
 237
 238         tmp = file_systems;
 239         while (tmp && index > 0) {
 240                 tmp = tmp->next;
 241                 index--;
 242         }
 243         if (!tmp)
 244                 return -EINVAL;
 245         len = strlen(tmp->name) + 1;
 246         return copy_to_user(buf, tmp->name, len) ? -EFAULT : 0;
 247 }
 248
 249 static int fs_maxindex(void)
 250 {
 251         struct file_system_type * tmp;
 252         int index;
 253
 254         index = 0;
 255         for (tmp = file_systems ; tmp ; tmp = tmp->next)
 256                 index++;
 257         return index;
 258 }
 259
 260 /*
 261  * Whee.. Weird sysv syscall.
 262  */
 263 asmlinkage int sys_sysfs(int option, unsigned long arg1, unsigned long arg2)
 264 {
 265         int retval = -EINVAL;
 266
 267         lock_kernel();
 268         switch (option) {
 269                 case 1:
 270                         retval = fs_index((const char *) arg1);
 271                         break;
 272
 273                 case 2:
 274                         retval = fs_name(arg1, (char *) arg2);
 275                         break;
 276
 277                 case 3:
 278                         retval = fs_maxindex();
 279                         break;
 280         }
 281         unlock_kernel();
 282         return retval;
 283 }
 284
 285 static struct proc_fs_info {
 286         int flag;
 287         char *str;
 288 } fs_info[] = {
 289         { MS_NOEXEC, ",noexec" },
 290         { MS_NOSUID, ",nosuid" },
 291         { MS_NODEV, ",nodev" },
 292         { MS_SYNCHRONOUS, ",sync" },
 293         { MS_MANDLOCK, ",mand" },
 294         { MS_NOATIME, ",noatime" },
 295 #ifdef MS_NOSUB                 /* Can't find this except in mount.c */
 296         { MS_NOSUB, ",nosub" },
 297 #endif
 298         { 0, NULL }
 299 };
 300
 301 static struct proc_nfs_info {
 302         int flag;
 303         char *str;
 304 } nfs_info[] = {
 305         { NFS_MOUNT_SOFT, ",soft" },
 306         { NFS_MOUNT_INTR, ",intr" },
 307         { NFS_MOUNT_POSIX, ",posix" },
 308         { NFS_MOUNT_NOCTO, ",nocto" },
 309         { NFS_MOUNT_NOAC, ",noac" },
 310         { 0, NULL }
 311 };
 312
 313 int get_filesystem_info( char *buf )
 314 {
 315         struct vfsmount *tmp = vfsmntlist;
 316         struct proc_fs_info *fs_infop;
 317         struct proc_nfs_info *nfs_infop;
 318         struct nfs_server *nfss;
 319         int len = 0;
 320
 321         while ( tmp && len < PAGE_SIZE - 160)
 322         {
 323                 len += sprintf( buf + len, "%s %s %s %s",
 324                         tmp->mnt_devname, tmp->mnt_dirname, tmp->mnt_sb->s_type->name,
 325                         tmp->mnt_flags & MS_RDONLY ? "ro" : "rw" );
 326                 for (fs_infop = fs_info; fs_infop->flag; fs_infop++) {
 327                   if (tmp->mnt_flags & fs_infop->flag) {
 328                     strcpy(buf + len, fs_infop->str);
 329                     len += strlen(fs_infop->str);
 330                   }
 331                 }
 332                 if (!strcmp("nfs", tmp->mnt_sb->s_type->name)) {
 333                         nfss = &tmp->mnt_sb->u.nfs_sb.s_server;
 334                         if (nfss->rsize != NFS_DEF_FILE_IO_BUFFER_SIZE) {
 335                                 len += sprintf(buf+len, ",rsize=%d",
 336                                                nfss->rsize);
 337                         }
 338                         if (nfss->wsize != NFS_DEF_FILE_IO_BUFFER_SIZE) {
 339                                 len += sprintf(buf+len, ",wsize=%d",
 340                                                nfss->wsize);
 341                         }
 342 #if 0
 343                         if (nfss->timeo != 7*HZ/10) {
 344                                 len += sprintf(buf+len, ",timeo=%d",
 345                                                nfss->timeo*10/HZ);
 346                         }
 347                         if (nfss->retrans != 3) {
 348                                 len += sprintf(buf+len, ",retrans=%d",
 349                                                nfss->retrans);
 350                         }
 351 #endif
 352                         if (nfss->acregmin != 3*HZ) {
 353                                 len += sprintf(buf+len, ",acregmin=%d",
 354                                                nfss->acregmin/HZ);
 355                         }
 356                         if (nfss->acregmax != 60*HZ) {
 357                                 len += sprintf(buf+len, ",acregmax=%d",
 358                                                nfss->acregmax/HZ);
 359                         }
 360                         if (nfss->acdirmin != 30*HZ) {
 361                                 len += sprintf(buf+len, ",acdirmin=%d",
 362                                                nfss->acdirmin/HZ);
 363                         }
 364                         if (nfss->acdirmax != 60*HZ) {
 365                                 len += sprintf(buf+len, ",acdirmax=%d",
 366                                                nfss->acdirmax/HZ);
 367                         }
 368                         for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
 369                                 if (nfss->flags & nfs_infop->flag) {
 370                                         strcpy(buf + len, nfs_infop->str);
 371                                         len += strlen(nfs_infop->str);
 372                                 }
 373                         }
 374                         len += sprintf(buf+len, ",addr=%s",
 375                                        nfss->hostname);
 376                 }
 377                 len += sprintf( buf + len, " 0 0\n" );
 378                 tmp = tmp->mnt_next;
 379         }
 380
 381         return len;
 382 }
 383
 384 int get_filesystem_list(char * buf)
 385 {
 386         int len = 0;
 387         struct file_system_type * tmp;
 388
 389         tmp = file_systems;
 390         while (tmp && len < PAGE_SIZE - 80) {
 391                 len += sprintf(buf+len, "%s\t%s\n",
 392                         (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
 393                         tmp->name);
 394                 tmp = tmp->next;
 395         }
 396         return len;
 397 }
 398
 399 struct file_system_type *get_fs_type(const char *name)
 400 {
 401         struct file_system_type * fs = file_systems;
 402
 403         if (!name)
 404                 return fs;
 405         for (fs = file_systems; fs && strcmp(fs->name, name); fs = fs->next)
 406                 ;
 407 #ifdef CONFIG_KERNELD
 408         if (!fs && (request_module(name) == 0)) {
 409                 for (fs = file_systems; fs && strcmp(fs->name, name); fs = fs->next)
 410                         ;
 411         }
 412 #endif
 413
 414         return fs;
 415 }
 416
 417 void __wait_on_super(struct super_block * sb)
 418 {
 419         struct wait_queue wait = { current, NULL };
 420
 421         add_wait_queue(&sb->s_wait, &wait);
 422 repeat:
 423         current->state = TASK_UNINTERRUPTIBLE;
 424         if (sb->s_lock) {
 425                 schedule();
 426                 goto repeat;
 427         }
 428         remove_wait_queue(&sb->s_wait, &wait);
 429         current->state = TASK_RUNNING;
 430 }
 431
 432 /*
 433  * Note: check the dirty flag before waiting, so we don't
 434  * hold up the sync while mounting a device. (The newly
 435  * mounted device won't need syncing.)
 436  */
 437 void sync_supers(kdev_t dev)
 438 {
 439         struct super_block * sb;
 440
 441         for (sb = super_blocks + 0 ; sb < super_blocks + NR_SUPER ; sb++) {
 442                 if (!sb->s_dev)
 443                         continue;
 444                 if (dev && sb->s_dev != dev)
 445                         continue;
 446                 if (!sb->s_dirt)
 447                         continue;
 448                 /* N.B. Should lock the superblock while writing */
 449                 wait_on_super(sb);
 450                 if (!sb->s_dev || !sb->s_dirt)
 451                         continue;
 452                 if (dev && (dev != sb->s_dev))
 453                         continue;
 454                 if (sb->s_op && sb->s_op->write_super)
 455                         sb->s_op->write_super(sb);
 456         }
 457 }
 458
 459 struct super_block * get_super(kdev_t dev)
 460 {
 461         struct super_block * s;
 462
 463         if (!dev)
 464                 return NULL;
 465 restart:
 466         s = 0+super_blocks;
 467         while (s < NR_SUPER+super_blocks)
 468                 if (s->s_dev == dev) {
 469                         wait_on_super(s);
 470                         if (s->s_dev == dev)
 471                                 return s;
 472                         goto restart;
 473                 } else
 474                         s++;
 475         return NULL;
 476 }
 477
 478 asmlinkage int sys_ustat(dev_t dev, struct ustat * ubuf)
 479 {
 480         struct super_block *s;
 481         struct ustat tmp;
 482         struct statfs sbuf;
 483         mm_segment_t old_fs;
 484         int err = -EINVAL;
 485
 486         lock_kernel();
 487         s = get_super(to_kdev_t(dev));
 488         if (s == NULL)
 489                 goto out;
 490         err = -ENOSYS;
 491         if (!(s->s_op->statfs))
 492                 goto out;
 493
 494         old_fs = get_fs();
 495         set_fs(get_ds());
 496         s->s_op->statfs(s,&sbuf,sizeof(struct statfs));
 497         set_fs(old_fs);
 498
 499         memset(&tmp,0,sizeof(struct ustat));
 500         tmp.f_tfree = sbuf.f_bfree;
 501         tmp.f_tinode = sbuf.f_ffree;
 502
 503         err = copy_to_user(ubuf,&tmp,sizeof(struct ustat)) ? -EFAULT : 0;
 504 out:
 505         unlock_kernel();
 506         return err;
 507 }
 508
 509 /*
 510  * Find a super_block with no device assigned.
 511  */
 512 static struct super_block *get_empty_super(void)
 513 {
 514         struct super_block *s = 0+super_blocks;
 515
 516         for (; s < NR_SUPER+super_blocks; s++) {
 517                 if (s->s_dev)
 518                         continue;
 519                 if (!s->s_lock)
 520                         return s;
 521                 printk("VFS: empty superblock %p locked!\n", s);
 522         }
 523         return NULL;
 524 }
 525
 526 static struct super_block * read_super(kdev_t dev,const char *name,int flags,
 527                                        void *data, int silent)
 528 {
 529         struct super_block * s;
 530         struct file_system_type *type;
 531
 532         if (!dev)
 533                 goto out_null;
 534         check_disk_change(dev);
 535         s = get_super(dev);
 536         if (s)
 537                 goto out;
 538
 539         type = get_fs_type(name);
 540         if (!type) {
 541                 printk("VFS: on device %s: get_fs_type(%s) failed\n",
 542                        kdevname(dev), name);
 543                 goto out;
 544         }
 545         s = get_empty_super();
 546         if (!s)
 547                 goto out;
 548         s->s_dev = dev;
 549         s->s_flags = flags;
 550         s->s_dirt = 0;
 551         /* N.B. Should lock superblock now ... */
 552         if (!type->read_super(s, data, silent))
 553                 goto out_fail;
 554         s->s_dev = dev; /* N.B. why do this again?? */
 555         s->s_rd_only = 0;
 556         s->s_type = type;
 557 out:
 558         return s;
 559
 560         /* N.B. s_dev should be cleared in type->read_super */
 561 out_fail:
 562         s->s_dev = 0;
 563 out_null:
 564         s = NULL;
 565         goto out;
 566 }
 567
 568 /*
 569  * Unnamed block devices are dummy devices used by virtual
 570  * filesystems which don't use real block-devices.  -- jrs
 571  */
 572
 573 static unsigned int unnamed_dev_in_use[256/(8*sizeof(unsigned int))] = { 0, };
 574
 575 kdev_t get_unnamed_dev(void)
 576 {
 577         int i;
 578
 579         for (i = 1; i < 256; i++) {
 580                 if (!test_and_set_bit(i,unnamed_dev_in_use))
 581                         return MKDEV(UNNAMED_MAJOR, i);
 582         }
 583         return 0;
 584 }
 585
 586 void put_unnamed_dev(kdev_t dev)
 587 {
 588         if (!dev || MAJOR(dev) != UNNAMED_MAJOR)
 589                 return;
 590         if (test_and_clear_bit(MINOR(dev), unnamed_dev_in_use))
 591                 return;
 592         printk("VFS: put_unnamed_dev: freeing unused device %s\n",
 593                         kdevname(dev));
 594 }
 595
 596 static int d_umount(struct super_block * sb)
 597 {
 598         struct dentry * root = sb->s_root;
 599         struct dentry * covered = root->d_covers;
 600
 601         if (root->d_count != 1)
 602                 return -EBUSY;
 603
 604         if (root->d_inode->i_state)
 605                 return -EBUSY;
 606
 607         sb->s_root = NULL;
 608
 609         if (covered != root) {
 610                 root->d_covers = root;
 611                 covered->d_mounts = covered;
 612                 dput(covered);
 613         }
 614         dput(root);
 615         return 0;
 616 }
 617
 618 static void d_mount(struct dentry *covered, struct dentry *dentry)
 619 {
 620         if (covered->d_mounts != covered) {
 621                 printk("VFS: mount - already mounted\n");
 622                 return;
 623         }
 624         covered->d_mounts = dentry;
 625         dentry->d_covers = covered;
 626 }
 627
 628 static int do_umount(kdev_t dev, int unmount_root)
 629 {
 630         struct super_block * sb;
 631         int retval;
 632
 633         retval = -ENOENT;
 634         sb = get_super(dev);
 635         if (!sb || !sb->s_root)
 636                 goto out;
 637
 638         /*
 639          * Before checking whether the filesystem is still busy,
 640          * make sure the kernel doesn't hold any quotafiles open
 641          * on the device. If the umount fails, too bad -- there
 642          * are no quotas running anymore. Just turn them on again.
 643          */
 644         quota_off(dev, -1);
 645
 646         /*
 647          * Shrink dcache, then fsync. This guarantees that if the
 648          * filesystem is quiescent at this point, then (a) only the
 649          * root entry should be in use and (b) that root entry is
 650          * clean.
 651          */
 652         shrink_dcache_sb(sb);
 653         fsync_dev(dev);
 654
 655         if (dev==ROOT_DEV && !unmount_root) {
 656                 /*
 657                  * Special case for "unmounting" root ...
 658                  * we just try to remount it readonly.
 659                  */
 660                 retval = 0;
 661                 if (!(sb->s_flags & MS_RDONLY))
 662                         retval = do_remount_sb(sb, MS_RDONLY, 0);
 663                 return retval;
 664         }
 665
 666         retval = d_umount(sb);
 667         if (retval)
 668                 goto out;
 669
 670         /* Forget any inodes */
 671         if (invalidate_inodes(sb)) {
 672                 printk("VFS: Busy inodes after unmount. "
 673                         "Self-destruct in 5 seconds. Bye-bye..\n");
 674         }
 675
 676         if (sb->s_op) {
 677                 if (sb->s_op->write_super && sb->s_dirt)
 678                         sb->s_op->write_super(sb);
 679                 if (sb->s_op->put_super)
 680                         sb->s_op->put_super(sb);
 681         }
 682         remove_vfsmnt(dev);
 683 out:
 684         return retval;
 685 }
 686
 687 static int umount_dev(kdev_t dev)
 688 {
 689         int retval;
 690         struct inode * inode = get_empty_inode();
 691
 692         retval = -ENOMEM;
 693         if (!inode)
 694                 goto out;
 695
 696         inode->i_rdev = dev;
 697         retval = -ENXIO;
 698         if (MAJOR(dev) >= MAX_BLKDEV)
 699                 goto out_iput;
 700
 701         fsync_dev(dev);
 702
 703         down(&mount_sem);
 704
 705         retval = do_umount(dev,0);
 706         if (!retval) {
 707                 fsync_dev(dev);
 708                 if (dev != ROOT_DEV) {
 709                         blkdev_release(inode);
 710                         put_unnamed_dev(dev);
 711                 }
 712         }
 713
 714         up(&mount_sem);
 715 out_iput:
 716         iput(inode);
 717 out:
 718         return retval;
 719 }
 720
 721 /*
 722  * Now umount can handle mount points as well as block devices.
 723  * This is important for filesystems which use unnamed block devices.
 724  *
 725  * There is a little kludge here with the dummy_inode.  The current
 726  * vfs release functions only use the r_dev field in the inode so
 727  * we give them the info they need without using a real inode.
 728  * If any other fields are ever needed by any block device release
 729  * functions, they should be faked here.  -- jrs
 730  */
 731
 732 asmlinkage int sys_umount(char * name)
 733 {
 734         struct dentry * dentry;
 735         int retval;
 736
 737         if (!suser())
 738                 return -EPERM;
 739
 740         lock_kernel();
 741         dentry = namei(name);
 742         retval = PTR_ERR(dentry);
 743         if (!IS_ERR(dentry)) {
 744                 struct inode * inode = dentry->d_inode;
 745                 kdev_t dev = inode->i_rdev;
 746
 747                 retval = 0;
 748                 if (S_ISBLK(inode->i_mode)) {
 749                         if (IS_NODEV(inode))
 750                                 retval = -EACCES;
 751                 } else {
 752                         struct super_block *sb = inode->i_sb;
 753                         retval = -EINVAL;
 754                         if (sb && inode == sb->s_root->d_inode) {
 755                                 dev = sb->s_dev;
 756                                 retval = 0;
 757                         }
 758                 }
 759                 dput(dentry);
 760
 761                 if (!retval)
 762                         retval = umount_dev(dev);
 763         }
 764         unlock_kernel();
 765         return retval;
 766 }
 767
 768 /*
 769  * Check whether we can mount the specified device.
 770  */
 771 int fs_may_mount(kdev_t dev)
 772 {
 773         struct super_block * sb = get_super(dev);
 774         int busy;
 775
 776         busy = sb && sb->s_root &&
 777                (sb->s_root->d_count != 1 || sb->s_root->d_covers != sb->s_root);
 778         return !busy;
 779 }
 780
 781 /*
 782  * do_mount() does the actual mounting after sys_mount has done the ugly
 783  * parameter parsing. When enough time has gone by, and everything uses the
 784  * new mount() parameters, sys_mount() can then be cleaned up.
 785  *
 786  * We cannot mount a filesystem if it has active, used, or dirty inodes.
 787  * We also have to flush all inode-data for this device, as the new mount
 788  * might need new info.
 789  *
 790  * [21-Mar-97] T.Schoebel-Theuer: Now this can be overridden when
 791  * supplying a leading "!" before the dir_name, allowing "stacks" of
 792  * mounted filesystems. The stacking will only influence any pathname lookups
 793  * _after_ the mount, but open filedescriptors or working directories that
 794  * are now covered remain valid. For example, when you overmount /home, any
 795  * process with old cwd /home/joe will continue to use the old versions,
 796  * as long as relative paths are used, but absolute paths like /home/joe/xxx
 797  * will go to the new "top of stack" version. In general, crossing a
 798  * mountpoint will always go to the top of stack element.
 799  * Anyone using this new feature must know what he/she is doing.
 800  */
 801
 802 int do_mount(kdev_t dev, const char * dev_name, const char * dir_name, const char * type, int flags, void * data)
 803 {
 804         struct dentry * dir_d;
 805         struct super_block * sb;
 806         struct vfsmount *vfsmnt;
 807         int error;
 808
 809         down(&mount_sem);
 810         error = -EACCES;
 811         if (!(flags & MS_RDONLY) && dev && is_read_only(dev))
 812                 goto out;
 813                 /*flags |= MS_RDONLY;*/
 814
 815         dir_d = namei(dir_name);
 816         error = PTR_ERR(dir_d);
 817         if (IS_ERR(dir_d))
 818                 goto out;
 819
 820         error = -ENOTDIR;
 821         if (!S_ISDIR(dir_d->d_inode->i_mode))
 822                 goto dput_and_out;
 823
 824         error = -EBUSY;
 825         if (dir_d->d_covers != dir_d)
 826                 goto dput_and_out;
 827
 828         /*
 829          * Note: If the superblock already exists,
 830          * read_super just does a get_super().
 831          */
 832         error = -EINVAL;
 833         sb = read_super(dev, type, flags, data, 0);
 834         if (!sb)
 835                 goto dput_and_out;
 836
 837         /*
 838          * We may have slept while reading the super block,
 839          * so we check afterwards whether it's safe to mount.
 840          */
 841         error = -EBUSY;
 842         if (!fs_may_mount(dev))
 843                 goto dput_and_out;
 844
 845         error = -ENOMEM;
 846         vfsmnt = add_vfsmnt(sb, dev_name, dir_name);
 847         if (!vfsmnt)
 848                 goto dput_and_out;
 849         d_mount(dir_d, sb->s_root);
 850         error = 0;      /* we don't dput(dir_d) - see umount */
 851
 852 out:
 853         up(&mount_sem);
 854         return error;
 855
 856 dput_and_out:
 857         dput(dir_d);
 858         goto out;
 859 }
 860
 861
 862 /*
 863  * Alters the mount flags of a mounted file system. Only the mount point
 864  * is used as a reference - file system type and the device are ignored.
 865  * FS-specific mount options can't be altered by remounting.
 866  */
 867
 868 static int do_remount_sb(struct super_block *sb, int flags, char *data)
 869 {
 870         int retval;
 871         struct vfsmount *vfsmnt;
 872
 873         /*
 874          * Invalidate the inodes, as some mount options may be changed.
 875          * N.B. If we are changing media, we should check the return
 876          * from invalidate_inodes ... can't allow _any_ open files.
 877          */
 878         invalidate_inodes(sb);
 879
 880         if (!(flags & MS_RDONLY) && sb->s_dev && is_read_only(sb->s_dev))
 881                 return -EACCES;
 882                 /*flags |= MS_RDONLY;*/
 883         /* If we are remounting RDONLY, make sure there are no rw files open */
 884         if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY))
 885                 if (!fs_may_remount_ro(sb))
 886                         return -EBUSY;
 887         if (sb->s_op && sb->s_op->remount_fs) {
 888                 retval = sb->s_op->remount_fs(sb, &flags, data);
 889                 if (retval)
 890                         return retval;
 891         }
 892         sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
 893         vfsmnt = lookup_vfsmnt(sb->s_dev);
 894         if (vfsmnt)
 895                 vfsmnt->mnt_flags = sb->s_flags;
 896         return 0;
 897 }
 898
 899 static int do_remount(const char *dir,int flags,char *data)
 900 {
 901         struct dentry *dentry;
 902         int retval;
 903
 904         dentry = namei(dir);
 905         retval = PTR_ERR(dentry);
 906         if (!IS_ERR(dentry)) {
 907                 struct super_block * sb = dentry->d_inode->i_sb;
 908
 909                 retval = -EINVAL;
 910                 if (dentry == sb->s_root) {
 911                         /*
 912                          * Shrink the dcache and sync the device.
 913                          */
 914                         shrink_dcache_sb(sb);
 915                         fsync_dev(sb->s_dev);
 916                         retval = do_remount_sb(sb, flags, data);
 917                 }
 918                 dput(dentry);
 919         }
 920         return retval;
 921 }
 922
 923 static int copy_mount_options (const void * data, unsigned long *where)
 924 {
 925         int i;
 926         unsigned long page;
 927         struct vm_area_struct * vma;
 928
 929         *where = 0;
 930         if (!data)
 931                 return 0;
 932
 933         vma = find_vma(current->mm, (unsigned long) data);
 934         if (!vma || (unsigned long) data < vma->vm_start)
 935                 return -EFAULT;
 936         if (!(vma->vm_flags & VM_READ))
 937                 return -EFAULT;
 938         i = vma->vm_end - (unsigned long) data;
 939         if (PAGE_SIZE <= (unsigned long) i)
 940                 i = PAGE_SIZE-1;
 941         if (!(page = __get_free_page(GFP_KERNEL))) {
 942                 return -ENOMEM;
 943         }
 944         if (copy_from_user((void *) page,data,i)) {
 945                 free_page(page);
 946                 return -EFAULT;
 947         }
 948         *where = page;
 949         return 0;
 950 }
 951
 952 /*
 953  * Flags is a 16-bit value that allows up to 16 non-fs dependent flags to
 954  * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
 955  *
 956  * data is a (void *) that can point to any structure up to
 957  * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
 958  * information (or be NULL).
 959  *
 960  * NOTE! As old versions of mount() didn't use this setup, the flags
 961  * have to have a special 16-bit magic number in the high word:
 962  * 0xC0ED. If this magic word isn't present, the flags and data info
 963  * aren't used, as the syscall assumes we are talking to an older
 964  * version that didn't understand them.
 965  */
 966 asmlinkage int sys_mount(char * dev_name, char * dir_name, char * type,
 967         unsigned long new_flags, void * data)
 968 {
 969         struct file_system_type * fstype;
 970         struct dentry * dentry = NULL;
 971         struct inode * inode = NULL;
 972         kdev_t dev;
 973         int retval = -EPERM;
 974         unsigned long flags = 0;
 975         unsigned long page = 0;
 976         struct file dummy;      /* allows read-write or read-only flag */
 977
 978         lock_kernel();
 979         if (!suser())
 980                 goto out;
 981         if ((new_flags &
 982              (MS_MGC_MSK | MS_REMOUNT)) == (MS_MGC_VAL | MS_REMOUNT)) {
 983                 retval = copy_mount_options (data, &page);
 984                 if (retval < 0)
 985                         goto out;
 986                 retval = do_remount(dir_name,
 987                                     new_flags & ~MS_MGC_MSK & ~MS_REMOUNT,
 988                                     (char *) page);
 989                 free_page(page);
 990                 goto out;
 991         }
 992
 993         retval = copy_mount_options (type, &page);
 994         if (retval < 0)
 995                 goto out;
 996         fstype = get_fs_type((char *) page);
 997         free_page(page);
 998         retval = -ENODEV;
 999         if (!fstype)
1000                 goto out;
1001
1002         memset(&dummy, 0, sizeof(dummy));
1003         if (fstype->fs_flags & FS_REQUIRES_DEV) {
1004                 dentry = namei(dev_name);
1005                 retval = PTR_ERR(dentry);
1006                 if (IS_ERR(dentry))
1007                         goto out;
1008
1009                 inode = dentry->d_inode;
1010                 retval = -ENOTBLK;
1011                 if (!S_ISBLK(inode->i_mode))
1012                         goto dput_and_out;
1013
1014                 retval = -EACCES;
1015                 if (IS_NODEV(inode))
1016                         goto dput_and_out;
1017
1018                 dev = inode->i_rdev;
1019                 retval = -ENXIO;
1020                 if (MAJOR(dev) >= MAX_BLKDEV)
1021                         goto dput_and_out;
1022
1023                 retval = -ENOTBLK;
1024                 dummy.f_op = get_blkfops(MAJOR(dev));
1025                 if (!dummy.f_op)
1026                         goto dput_and_out;
1027
1028                 if (dummy.f_op->open) {
1029                         dummy.f_dentry = dentry;
1030                         dummy.f_mode = (new_flags & MS_RDONLY) ? 1 : 3;
1031                         retval = dummy.f_op->open(inode, &dummy);
1032                         if (retval)
1033                                 goto dput_and_out;
1034                 }
1035
1036         } else {
1037                 retval = -EMFILE;
1038                 if (!(dev = get_unnamed_dev()))
1039                         goto out;
1040         }
1041
1042         page = 0;
1043         if ((new_flags & MS_MGC_MSK) == MS_MGC_VAL) {
1044                 flags = new_flags & ~MS_MGC_MSK;
1045                 retval = copy_mount_options(data, &page);
1046                 if (retval < 0)
1047                         goto clean_up;
1048         }
1049         retval = do_mount(dev, dev_name, dir_name, fstype->name, flags,
1050                                 (void *) page);
1051         free_page(page);
1052         if (retval)
1053                 goto clean_up;
1054
1055 dput_and_out:
1056         dput(dentry);
1057 out:
1058         unlock_kernel();
1059         return retval;
1060
1061 clean_up:
1062         if (dummy.f_op) {
1063                 if (dummy.f_op->release)
1064                         dummy.f_op->release(inode, NULL);
1065         } else
1066                 put_unnamed_dev(dev);
1067         goto dput_and_out;
1068 }
1069
1070 __initfunc(static void do_mount_root(void))
1071 {
1072         struct file_system_type * fs_type;
1073         struct super_block * sb;
1074         struct vfsmount *vfsmnt;
1075         struct inode * d_inode = NULL;
1076         struct file filp;
1077         int retval;
1078
1079 #ifdef CONFIG_ROOT_NFS
1080         if (MAJOR(ROOT_DEV) == UNNAMED_MAJOR) {
1081                 ROOT_DEV = 0;
1082                 if ((fs_type = get_fs_type("nfs"))) {
1083                         sb = get_empty_super(); /* "can't fail" */
1084                         sb->s_dev = get_unnamed_dev();
1085                         sb->s_flags = root_mountflags & ~MS_RDONLY;
1086                         vfsmnt = add_vfsmnt(sb, "/dev/root", "/");
1087                         if (vfsmnt) {
1088                                 if (nfs_root_mount(sb) >= 0) {
1089                                         sb->s_rd_only = 0;
1090                                         sb->s_dirt = 0;
1091                                         sb->s_type = fs_type;
1092                                         current->fs->root = dget(sb->s_root);
1093                                         current->fs->pwd = dget(sb->s_root);
1094                                         ROOT_DEV = sb->s_dev;
1095                                         printk (KERN_NOTICE "VFS: Mounted root (nfs filesystem).\n");
1096                                         return;
1097                                 }
1098                                 remove_vfsmnt(sb->s_dev);
1099                         }
1100                         put_unnamed_dev(sb->s_dev);
1101                         sb->s_dev = 0;
1102                 }
1103                 if (!ROOT_DEV) {
1104                         printk(KERN_ERR "VFS: Unable to mount root fs via NFS, trying floppy.\n");
1105                         ROOT_DEV = MKDEV(FLOPPY_MAJOR, 0);
1106                 }
1107         }
1108 #endif
1109
1110 #ifdef CONFIG_BLK_DEV_FD
1111         if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) {
1112                 floppy_eject();
1113 #ifndef CONFIG_BLK_DEV_RAM
1114                 printk(KERN_NOTICE "(Warning, this kernel has no ramdisk support)\n");
1115 #endif
1116                 printk(KERN_NOTICE "VFS: Insert root floppy and press ENTER\n");
1117                 wait_for_keypress();
1118         }
1119 #endif
1120
1121         memset(&filp, 0, sizeof(filp));
1122         d_inode = get_empty_inode();
1123         d_inode->i_rdev = ROOT_DEV;
1124         filp.f_dentry = NULL;
1125         if ( root_mountflags & MS_RDONLY)
1126                 filp.f_mode = 1; /* read only */
1127         else
1128                 filp.f_mode = 3; /* read write */
1129         retval = blkdev_open(d_inode, &filp);
1130         if (retval == -EROFS) {
1131                 root_mountflags |= MS_RDONLY;
1132                 filp.f_mode = 1;
1133                 retval = blkdev_open(d_inode, &filp);
1134         }
1135         iput(d_inode);
1136         if (retval)
1137                 /*
1138                  * Allow the user to distinguish between failed open
1139                  * and bad superblock on root device.
1140                  */
1141                 printk("VFS: Cannot open root device %s\n",
1142                        kdevname(ROOT_DEV));
1143         else for (fs_type = file_systems ; fs_type ; fs_type = fs_type->next) {
1144                 if (!(fs_type->fs_flags & FS_REQUIRES_DEV))
1145                         continue;
1146                 sb = read_super(ROOT_DEV,fs_type->name,root_mountflags,NULL,1);
1147                 if (sb) {
1148                         sb->s_flags = root_mountflags;
1149                         current->fs->root = dget(sb->s_root);
1150                         current->fs->pwd = dget(sb->s_root);
1151                         printk ("VFS: Mounted root (%s filesystem)%s.\n",
1152                                 fs_type->name,
1153                                 (sb->s_flags & MS_RDONLY) ? " readonly" : "");
1154                         vfsmnt = add_vfsmnt(sb, "/dev/root", "/");
1155                         if (vfsmnt)
1156                                 return;
1157                         panic("VFS: add_vfsmnt failed for root fs");
1158                 }
1159         }
1160         panic("VFS: Unable to mount root fs on %s",
1161                 kdevname(ROOT_DEV));
1162 }
1163
1164
1165 __initfunc(void mount_root(void))
1166 {
1167         struct super_block * sb = super_blocks;
1168         int i;
1169
1170         memset(super_blocks, 0, sizeof(super_blocks));
1171         /*
1172          * Initialize the dirty inode list headers for the super blocks
1173          */
1174         for (i = NR_SUPER ; i-- ; sb++)
1175                 INIT_LIST_HEAD(&sb->s_dirty);
1176
1177         do_mount_root();
1178 }
1179
1180
1181 #ifdef CONFIG_BLK_DEV_INITRD
1182
1183 extern int initmem_freed;
1184
1185 __initfunc(static int do_change_root(kdev_t new_root_dev,const char *put_old))
1186 {
1187         kdev_t old_root_dev;
1188         struct vfsmount *vfsmnt;
1189         struct dentry *old_root,*old_pwd,*dir_d = NULL;
1190         int error;
1191
1192         old_root = current->fs->root;
1193         old_pwd = current->fs->pwd;
1194         old_root_dev = ROOT_DEV;
1195         if (!fs_may_mount(new_root_dev)) {
1196                 printk(KERN_CRIT "New root is busy. Staying in initrd.\n");
1197                 return -EBUSY;
1198         }
1199         ROOT_DEV = new_root_dev;
1200         do_mount_root();
1201         dput(old_root);
1202         dput(old_pwd);
1203 #if 1
1204         shrink_dcache();
1205         printk("do_change_root: old root has d_count=%d\n", old_root->d_count);
1206 #endif
1207         /*
1208          * Get the new mount directory
1209          */
1210         dir_d = lookup_dentry(put_old, NULL, 1);
1211         if (IS_ERR(dir_d)) {
1212                 error = PTR_ERR(dir_d);
1213         } else if (!dir_d->d_inode) {
1214                 dput(dir_d);
1215                 error = -ENOENT;
1216         } else {
1217                 error = 0;
1218         }
1219         if (!error && dir_d->d_covers != dir_d) {
1220                 dput(dir_d);
1221                 error = -EBUSY;
1222         }
1223         if (!error && !S_ISDIR(dir_d->d_inode->i_mode)) {
1224                 dput(dir_d);
1225                 error = -ENOTDIR;
1226         }
1227         if (error) {
1228                 int umount_error;
1229
1230                 printk(KERN_NOTICE "Trying to unmount old root ... ");
1231                 umount_error = do_umount(old_root_dev,1);
1232                 if (!umount_error) {
1233                         printk("okay\n");
1234                         invalidate_buffers(old_root_dev);
1235                         return 0;
1236                 }
1237                 printk(KERN_ERR "error %d\n",umount_error);
1238                 return error;
1239         }
1240         remove_vfsmnt(old_root_dev);
1241         vfsmnt = add_vfsmnt(old_root->d_sb, "/dev/root.old", put_old);
1242         if (vfsmnt) {
1243                 d_mount(dir_d,old_root);
1244                 return 0;
1245         }
1246         printk(KERN_CRIT "Trouble: add_vfsmnt failed\n");
1247         return -ENOMEM;
1248 }
1249
1250 int change_root(kdev_t new_root_dev,const char *put_old)
1251 {
1252         if (initmem_freed) {
1253                 printk (KERN_CRIT "Initmem has been already freed. Staying in initrd\n");
1254                 return -EBUSY;
1255         }
1256         return do_change_root(new_root_dev, put_old);
1257 }
1258
1259 #endif