kernel/fs/devfs/devfs_vfsops.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25
  26 /*
  27  * This is the device filesystem.
  28  *
  29  * It is a combination of a namer to drive autoconfiguration,
  30  * plus the access methods for the device drivers of the system.
  31  *
  32  * The prototype is fairly dependent on specfs for the latter part
  33  * of its implementation, though a final version would integrate the two.
  34  */
  35 #include <sys/types.h>
  36 #include <sys/param.h>
  37 #include <sys/sysmacros.h>
  38 #include <sys/systm.h>
  39 #include <sys/kmem.h>
  40 #include <sys/time.h>
  41 #include <sys/pathname.h>
  42 #include <sys/vfs.h>
  43 #include <sys/vnode.h>
  44 #include <sys/stat.h>
  45 #include <sys/uio.h>
  46 #include <sys/stat.h>
  47 #include <sys/errno.h>
  48 #include <sys/cmn_err.h>
  49 #include <sys/cred.h>
  50 #include <sys/statvfs.h>
  51 #include <sys/mount.h>
  52 #include <sys/debug.h>
  53 #include <sys/modctl.h>
  54 #include <sys/fs_subr.h>
  55 #include <sys/fs/dv_node.h>
  56 #include <sys/fs/snode.h>
  57 #include <sys/sunndi.h>
  58 #include <sys/policy.h>
  59 #include <sys/sunmdi.h>
  60
  61 /*
  62  * devfs vfs operations.
  63  */
  64 static int devfs_mount(struct vfs *, struct vnode *, struct mounta *,
  65     struct cred *);
  66 static int devfs_unmount(struct vfs *, int, struct cred *);
  67 static int devfs_root(struct vfs *, struct vnode **);
  68 static int devfs_statvfs(struct vfs *, struct statvfs64 *);
  69 static int devfs_mountroot(struct vfs *, enum whymountroot);
  70
  71 static int devfsinit(int, char *);
  72
  73 static vfsdef_t devfs_vfssw = {
  74         VFSDEF_VERSION,
  75         "devfs",        /* type name string */
  76         devfsinit,      /* init routine */
  77         0,              /* flags */
  78         NULL            /* mount options table prototype */
  79 };
  80
  81 static kmutex_t devfs_lock;     /* protects global data */
  82 static int devfstype;           /* fstype */
  83 static dev_t devfsdev;          /* the fictious 'device' we live on */
  84 static struct devfs_data *devfs_mntinfo;        /* linked list of instances */
  85
  86 /*
  87  * Module linkage information
  88  */
  89 static struct modlfs modlfs = {
  90         &mod_fsops, "devices filesystem", &devfs_vfssw
  91 };
  92
  93 static struct modlinkage modlinkage = {
  94         MODREV_1, (void *)&modlfs, NULL
  95 };
  96
  97 int
  98 _init(void)
  99 {
 100         int e;
 101
 102         mutex_init(&devfs_lock, "devfs lock", MUTEX_DEFAULT, NULL);
 103         dv_node_cache_init();
 104         if ((e = mod_install(&modlinkage)) != 0) {
 105                 dv_node_cache_fini();
 106                 mutex_destroy(&devfs_lock);
 107                 return (e);
 108         }
 109         dcmn_err(("devfs loaded\n"));
 110         return (0);
 111 }
 112
 113 int
 114 _fini(void)
 115 {
 116         return (EBUSY);
 117 }
 118
 119 int
 120 _info(struct modinfo *modinfop)
 121 {
 122         return (mod_info(&modlinkage, modinfop));
 123 }
 124
 125 static const struct vfsops devfs_vfsops = {
 126         .vfs_mount = devfs_mount,
 127         .vfs_unmount = devfs_unmount,
 128         .vfs_root = devfs_root,
 129         .vfs_statvfs = devfs_statvfs,
 130         .vfs_mountroot = devfs_mountroot,
 131 };
 132
 133 /*ARGSUSED1*/
 134 static int
 135 devfsinit(int fstype, char *name)
 136 {
 137         int error;
 138         int dev;
 139         extern major_t getudev(void);   /* gack - what a function */
 140
 141         devfstype = fstype;
 142         /*
 143          * Associate VFS ops vector with this fstype
 144          */
 145         error = vfs_setfsops(fstype, &devfs_vfsops);
 146         if (error != 0) {
 147                 cmn_err(CE_WARN, "devfsinit: bad fstype");
 148                 return (error);
 149         }
 150
 151         /*
 152          * Invent a dev_t (sigh).
 153          */
 154         if ((dev = getudev()) == DDI_MAJOR_T_NONE) {
 155                 cmn_err(CE_NOTE, "%s: can't get unique dev", devfs_vfssw.name);
 156                 dev = 0;
 157         }
 158         devfsdev = makedevice(dev, 0);
 159
 160         return (0);
 161 }
 162
 163 /*
 164  * The name of the mount point and the name of the attribute
 165  * filesystem are passed down from userland for now.
 166  */
 167 static int
 168 devfs_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap,
 169     struct cred *cr)
 170 {
 171         struct devfs_data *devfs_data;
 172         struct vnode *avp;
 173         struct dv_node *dv;
 174         struct vattr va;
 175
 176         dcmn_err(("devfs_mount\n"));
 177
 178         if (secpolicy_fs_mount(cr, mvp, vfsp) != 0)
 179                 return (EPERM);
 180
 181         /*
 182          * check that the mount point is sane
 183          */
 184         if (mvp->v_type != VDIR)
 185                 return (ENOTDIR);
 186
 187         ASSERT(uap->flags & MS_SYSSPACE);
 188         /*
 189          * Devfs can only be mounted from kernel during boot.
 190          * avp is the existing /devices, the same as the mount point.
 191          */
 192         avp = mvp;
 193
 194         /*
 195          * Create and initialize the vfs-private data.
 196          * This includes a hand-crafted root vnode (we build
 197          * this here mostly so that traverse() doesn't sleep
 198          * in VFS_ROOT()).
 199          */
 200         mutex_enter(&devfs_lock);
 201         ASSERT(devfs_mntinfo == NULL);
 202         dv = dv_mkroot(vfsp, devfsdev);
 203         dv->dv_attrvp = avp;            /* attribute root vp */
 204
 205         ASSERT(dv == dv->dv_dotdot);
 206
 207         devfs_data = kmem_zalloc(sizeof (struct devfs_data), KM_SLEEP);
 208         devfs_data->devfs_vfsp = vfsp;
 209         devfs_data->devfs_root = dv;
 210
 211         vfsp->vfs_data = (caddr_t)devfs_data;
 212         vfsp->vfs_fstype = devfstype;
 213         vfsp->vfs_dev = devfsdev;
 214         vfsp->vfs_bsize = DEV_BSIZE;
 215         vfsp->vfs_mtime = ddi_get_time();
 216         vfs_make_fsid(&vfsp->vfs_fsid, vfsp->vfs_dev, devfstype);
 217
 218         /* We're there. */
 219         devfs_mntinfo = devfs_data;
 220         mutex_exit(&devfs_lock);
 221
 222         va.va_mask = VATTR_ATIME|VATTR_MTIME;
 223         gethrestime(&va.va_atime);
 224         gethrestime(&va.va_mtime);
 225         (void) fop_setattr(DVTOV(dv), &va, 0, cr, NULL);
 226         return (0);
 227 }
 228
 229
 230 /*
 231  * We never unmount devfs in a real production system.
 232  */
 233 /*ARGSUSED*/
 234 static int
 235 devfs_unmount(struct vfs *vfsp, int flag, struct cred *cr)
 236 {
 237         return (EBUSY);
 238 }
 239
 240 /*
 241  * return root vnode for given vfs
 242  */
 243 static int
 244 devfs_root(struct vfs *vfsp, struct vnode **vpp)
 245 {
 246         dcmn_err(("devfs_root\n"));
 247         *vpp = DVTOV(VFSTODVFS(vfsp)->devfs_root);
 248         VN_HOLD(*vpp);
 249         return (0);
 250 }
 251
 252 /*
 253  * return 'generic superblock' information to userland.
 254  *
 255  * not much that we can usefully admit to here
 256  */
 257 static int
 258 devfs_statvfs(struct vfs *vfsp, struct statvfs64 *sbp)
 259 {
 260         extern kmem_cache_t *dv_node_cache;
 261
 262         dev32_t d32;
 263
 264         dcmn_err(("devfs_statvfs\n"));
 265         bzero(sbp, sizeof (*sbp));
 266         sbp->f_frsize = sbp->f_bsize = vfsp->vfs_bsize;
 267         /*
 268          * We could compute the number of devfsnodes here .. but since
 269          * it's dynamic anyway, it's not clear how useful this is.
 270          */
 271         sbp->f_files = kmem_cache_stat(dv_node_cache, "alloc");
 272
 273         /* no illusions that free/avail files is relevant to devfs */
 274         sbp->f_ffree = 0;
 275         sbp->f_favail = 0;
 276
 277         /* no illusions that blocks are relevant to devfs */
 278         sbp->f_bfree = 0;
 279         sbp->f_bavail = 0;
 280         sbp->f_blocks = 0;
 281
 282         (void) cmpldev(&d32, vfsp->vfs_dev);
 283         sbp->f_fsid = d32;
 284         (void) strcpy(sbp->f_basetype, vfssw[devfstype].vsw_name);
 285         sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
 286         sbp->f_namemax = MAXNAMELEN - 1;
 287         (void) strcpy(sbp->f_fstr, "devices");
 288
 289         return (0);
 290 }
 291
 292 /*
 293  * devfs always mount after root is mounted, so this should never
 294  * be invoked.
 295  */
 296 /*ARGSUSED*/
 297 static int
 298 devfs_mountroot(struct vfs *vfsp, enum whymountroot why)
 299 {
 300         dcmn_err(("devfs_mountroot\n"));
 301
 302         return (EINVAL);
 303 }
 304
 305 struct dv_node *
 306 devfs_dip_to_dvnode(dev_info_t *dip)
 307 {
 308         char *dirpath;
 309         struct vnode *dirvp;
 310
 311         ASSERT(dip != NULL);
 312
 313         /* no-op if devfs not mounted yet */
 314         if (devfs_mntinfo == NULL)
 315                 return (NULL);
 316
 317         /*
 318          * The lookupname below only looks up cached dv_nodes
 319          * because devfs_clean_key is set in thread specific data.
 320          */
 321         dirpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 322         (void) ddi_pathname(dip, dirpath);
 323         if (devfs_lookupname(dirpath, NULLVPP, &dirvp)) {
 324                 dcmn_err(("directory %s not found\n", dirpath));
 325                 kmem_free(dirpath, MAXPATHLEN);
 326                 return (NULL);
 327         }
 328
 329         kmem_free(dirpath, MAXPATHLEN);
 330         return (VTODV(dirvp));
 331 }
 332
 333 /*
 334  * If DV_CLEAN_FORCE devfs_clean is issued with a dip that is not the root
 335  * and not a vHCI we also need to clean any vHCI branches because they
 336  * may contain pHCI nodes. A detach_node() of a pHCI will fail if its
 337  * mdi_devi_offline() fails, and the mdi_devi_offline() of the last
 338  * pHCI will fail unless an ndi_devi_offline() of the Client nodes under
 339  * the vHCI is successful - which requires a clean vHCI branch to removed
 340  * the devi_refs associated with devfs vnodes.
 341  */
 342 static int
 343 devfs_clean_vhci(dev_info_t *dip, void *args)
 344 {
 345         struct dv_node  *dvp;
 346         uint_t          flags = (uint_t)(uintptr_t)args;
 347
 348         (void) tsd_set(devfs_clean_key, (void *)1);
 349         dvp = devfs_dip_to_dvnode(dip);
 350         if (dvp) {
 351                 (void) dv_cleandir(dvp, NULL, flags);
 352                 VN_RELE(DVTOV(dvp));
 353         }
 354         (void) tsd_set(devfs_clean_key, NULL);
 355         return (DDI_WALK_CONTINUE);
 356 }
 357
 358 /*
 359  * devfs_clean()
 360  *
 361  * Destroy unreferenced dv_node's and detach devices.
 362  *
 363  * devfs_clean will try its best to clean up unused nodes. It is
 364  * no longer valid to assume that just because devfs_clean fails,
 365  * the device is not removable. This is because device contracts
 366  * can result in userland processes releasing a device during the
 367  * device offline process in the kernel. Thus it is no longer
 368  * correct to fail an offline just because devfs_clean finds
 369  * referenced dv_nodes. To enforce this, devfs_clean() always
 370  * returns success i.e. 0.
 371  *
 372  * devfs_clean() may return before removing all possible nodes if
 373  * we cannot acquire locks in areas of the code where potential for
 374  * deadlock exists (see comments in dv_find() and dv_cleandir() for
 375  * examples of this).
 376  *
 377  * devfs caches unreferenced dv_node to speed by the performance
 378  * of ls, find, etc. devfs_clean() is invoked to cleanup cached
 379  * dv_nodes to reclaim memory as well as to facilitate device
 380  * removal (dv_node reference devinfo nodes, which prevents driver
 381  * detach).
 382  *
 383  * If a shell parks in a /devices directory, the dv_node will be
 384  * held, preventing the corresponding device to be detached.
 385  * This would be a denial of service against DR. To prevent this,
 386  * DR code calls devfs_clean() with the DV_CLEAN_FORCE flag.
 387  * The dv_cleandir() implementation does the right thing to ensure
 388  * successful DR.
 389  */
 390 int
 391 devfs_clean(dev_info_t *dip, char *devnm, uint_t flags)
 392 {
 393         struct dv_node          *dvp;
 394
 395         dcmn_err(("devfs_unconfigure: dip = 0x%p, flags = 0x%x",
 396             (void *)dip, flags));
 397
 398         /* avoid recursion back into the device tree */
 399         (void) tsd_set(devfs_clean_key, (void *)1);
 400         dvp = devfs_dip_to_dvnode(dip);
 401         if (dvp == NULL) {
 402                 (void) tsd_set(devfs_clean_key, NULL);
 403                 return (0);
 404         }
 405
 406         (void) dv_cleandir(dvp, devnm, flags);
 407         (void) tsd_set(devfs_clean_key, NULL);
 408         VN_RELE(DVTOV(dvp));
 409
 410         /*
 411          * If we are doing a DV_CLEAN_FORCE, and we did not start at the
 412          * root, and we did not start at a vHCI node then clean vHCI
 413          * branches too.  Failure to clean vHCI branch does not cause EBUSY.
 414          *
 415          * Also, to accommodate nexus callers that clean 'self' to DR 'child'
 416          * (like pcihp) we clean vHCIs even when dv_cleandir() of dip branch
 417          * above fails - this prevents a busy DR 'child' sibling from causing
 418          * the DR of 'child' to fail because a vHCI branch was not cleaned.
 419          */
 420         if ((flags & DV_CLEAN_FORCE) && (dip != ddi_root_node()) &&
 421             (mdi_component_is_vhci(dip, NULL) != MDI_SUCCESS)) {
 422                 /*
 423                  * NOTE: for backport the following is recommended
 424                  *      (void) devfs_clean_vhci(scsi_vhci_dip,
 425                  *          (void *)(uintptr_t)flags);
 426                  */
 427                 mdi_walk_vhcis(devfs_clean_vhci, (void *)(uintptr_t)flags);
 428         }
 429
 430         return (0);
 431 }
 432
 433 /*
 434  * lookup a devfs relative pathname, returning held vnodes for the final
 435  * component and the containing directory (if requested).
 436  *
 437  * NOTE: We can't use lookupname because this would use the current
 438  *      processes credentials (CRED) in the call lookuppnvp instead
 439  *      of kcred.  It also does not give you the flexibility so
 440  *      specify the directory to start the resolution in (devicesdir).
 441  */
 442 int
 443 devfs_lookupname(
 444         char    *pathname,              /* user pathname */
 445         vnode_t **dirvpp,               /* ret for ptr to parent dir vnode */
 446         vnode_t **compvpp)              /* ret for ptr to component vnode */
 447 {
 448         struct pathname pn;
 449         int             error;
 450
 451         ASSERT(devicesdir);             /* devfs must be initialized */
 452         ASSERT(pathname);               /* must have some path */
 453
 454         if (error = pn_get(pathname, UIO_SYSSPACE, &pn))
 455                 return (error);
 456
 457         /* make the path relative to /devices. */
 458         pn_skipslash(&pn);
 459         if (pn_pathleft(&pn) == 0) {
 460                 /* all we had was "\0" or "/" (which skipslash skiped) */
 461                 if (dirvpp)
 462                         *dirvpp = NULL;
 463                 if (compvpp) {
 464                         VN_HOLD(devicesdir);
 465                         *compvpp = devicesdir;
 466                 }
 467         } else {
 468                 /*
 469                  * Use devfs lookup to resolve pathname to the vnode for
 470                  * the device via relative lookup in devfs. Extra holds for
 471                  * using devicesdir as directory we are searching and for
 472                  * being our root without being == rootdir.
 473                  */
 474                 VN_HOLD(devicesdir);
 475                 VN_HOLD(devicesdir);
 476                 error = lookuppnvp(&pn, NULL, FOLLOW, dirvpp, compvpp,
 477                     devicesdir, devicesdir, kcred);
 478         }
 479         pn_free(&pn);
 480
 481         return (error);
 482 }
 483
 484 /*
 485  * Given a devfs path (without the /devices prefix), walk
 486  * the dv_node sub-tree rooted at the path.
 487  */
 488 int
 489 devfs_walk(
 490         char            *path,
 491         void            (*callback)(struct dv_node *, void *),
 492         void            *arg)
 493 {
 494         char *dirpath, *devnm;
 495         struct vnode    *dirvp;
 496
 497         ASSERT(path && callback);
 498
 499         if (*path != '/' || devfs_mntinfo == NULL)
 500                 return (ENXIO);
 501
 502         dcmn_err(("devfs_walk: path = %s", path));
 503
 504         dirpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 505
 506         (void) snprintf(dirpath, MAXPATHLEN, "/devices%s", path);
 507
 508         devnm = strrchr(dirpath, '/');
 509
 510         ASSERT(devnm);
 511
 512         *devnm++ = '\0';
 513
 514         if (lookupname(dirpath, UIO_SYSSPACE, 0, NULL, &dirvp)) {
 515                 dcmn_err(("directory %s not found\n", dirpath));
 516                 kmem_free(dirpath, MAXPATHLEN);
 517                 return (ENXIO);
 518         }
 519
 520         /*
 521          * if path == "/", visit the root dv_node
 522          */
 523         if (*devnm == '\0') {
 524                 callback(VTODV(dirvp), arg);
 525                 devnm = NULL;
 526         }
 527
 528         dv_walk(VTODV(dirvp), devnm, callback, arg);
 529
 530         VN_RELE(dirvp);
 531
 532         kmem_free(dirpath, MAXPATHLEN);
 533
 534         return (0);
 535 }
 536
 537 int
 538 devfs_devpolicy(vnode_t *vp, devplcy_t **dpp)
 539 {
 540         struct vnode *rvp;
 541         struct dv_node *dvp;
 542         int rval = -1;
 543
 544         /* fail if devfs not mounted yet */
 545         if (devfs_mntinfo == NULL)
 546                 return (rval);
 547
 548         if (fop_realvp(vp, &rvp, NULL) == 0 &&
 549             vn_matchops(rvp, &dv_vnodeops)) {
 550                 dvp = VTODV(rvp);
 551                 rw_enter(&dvp->dv_contents, RW_READER);
 552                 if (dvp->dv_priv) {
 553                         dphold(dvp->dv_priv);
 554                         *dpp = dvp->dv_priv;
 555                         rval = 0;
 556                 }
 557                 rw_exit(&dvp->dv_contents);
 558         }
 559         return (rval);
 560 }