usr/src/uts/common/fs/lookup.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21
  22 /*
  23  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  24  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
  25  * Copyright (c) 2015, Joyent, Inc.
  26  */
  27
  28 /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
  29 /*        All Rights Reserved   */
  30
  31 /*
  32  * University Copyright- Copyright (c) 1982, 1986, 1988
  33  * The Regents of the University of California
  34  * All Rights Reserved
  35  *
  36  * University Acknowledgment- Portions of this document are derived from
  37  * software developed by the University of California, Berkeley, and its
  38  * contributors.
  39  */
  40
  41 #include <sys/types.h>
  42 #include <sys/param.h>
  43 #include <sys/systm.h>
  44 #include <sys/cpuvar.h>
  45 #include <sys/errno.h>
  46 #include <sys/cred.h>
  47 #include <sys/user.h>
  48 #include <sys/uio.h>
  49 #include <sys/vfs.h>
  50 #include <sys/vnode.h>
  51 #include <sys/pathname.h>
  52 #include <sys/proc.h>
  53 #include <sys/vtrace.h>
  54 #include <sys/sysmacros.h>
  55 #include <sys/debug.h>
  56 #include <sys/dirent.h>
  57 #include <c2/audit.h>
  58 #include <sys/zone.h>
  59 #include <sys/dnlc.h>
  60 #include <sys/fs/snode.h>
  61
  62 /* Controls whether paths are stored with vnodes. */
  63 int vfs_vnode_path = 1;
  64
  65 int
  66 lookupname(
  67         char *fnamep,
  68         enum uio_seg seg,
  69         int followlink,
  70         vnode_t **dirvpp,
  71         vnode_t **compvpp)
  72 {
  73         return (lookupnameatcred(fnamep, seg, followlink, dirvpp, compvpp, NULL,
  74             CRED()));
  75 }
  76
  77 /*
  78  * Lookup the user file name,
  79  * Handle allocation and freeing of pathname buffer, return error.
  80  */
  81 int
  82 lookupnameatcred(
  83         char *fnamep,                   /* user pathname */
  84         enum uio_seg seg,               /* addr space that name is in */
  85         int followlink,                 /* follow sym links */
  86         vnode_t **dirvpp,               /* ret for ptr to parent dir vnode */
  87         vnode_t **compvpp,              /* ret for ptr to component vnode */
  88         vnode_t *startvp,               /* start path search from vp */
  89         cred_t *cr)                     /* credential */
  90 {
  91         char namebuf[TYPICALMAXPATHLEN];
  92         struct pathname lookpn;
  93         int error;
  94
  95         error = pn_get_buf(fnamep, seg, &lookpn, namebuf, sizeof (namebuf));
  96         if (error == 0) {
  97                 error = lookuppnatcred(&lookpn, NULL, followlink,
  98                     dirvpp, compvpp, startvp, cr);
  99         }
 100         if (error == ENAMETOOLONG) {
 101                 /*
 102                  * This thread used a pathname > TYPICALMAXPATHLEN bytes long.
 103                  */
 104                 if (error = pn_get(fnamep, seg, &lookpn))
 105                         return (error);
 106                 error = lookuppnatcred(&lookpn, NULL, followlink,
 107                     dirvpp, compvpp, startvp, cr);
 108                 pn_free(&lookpn);
 109         }
 110
 111         return (error);
 112 }
 113
 114 int
 115 lookupnameat(char *fnamep, enum uio_seg seg, int followlink,
 116     vnode_t **dirvpp, vnode_t **compvpp, vnode_t *startvp)
 117 {
 118         return (lookupnameatcred(fnamep, seg, followlink, dirvpp, compvpp,
 119             startvp, CRED()));
 120 }
 121
 122 int
 123 lookuppn(
 124         struct pathname *pnp,
 125         struct pathname *rpnp,
 126         int followlink,
 127         vnode_t **dirvpp,
 128         vnode_t **compvpp)
 129 {
 130         return (lookuppnatcred(pnp, rpnp, followlink, dirvpp, compvpp, NULL,
 131             CRED()));
 132 }
 133
 134 /*
 135  * Lookup the user file name from a given vp, using a specific credential.
 136  */
 137 int
 138 lookuppnatcred(
 139         struct pathname *pnp,           /* pathname to lookup */
 140         struct pathname *rpnp,          /* if non-NULL, return resolved path */
 141         int followlink,                 /* (don't) follow sym links */
 142         vnode_t **dirvpp,               /* ptr for parent vnode */
 143         vnode_t **compvpp,              /* ptr for entry vnode */
 144         vnode_t *startvp,               /* start search from this vp */
 145         cred_t *cr)                     /* user credential */
 146 {
 147         vnode_t *vp;    /* current directory vp */
 148         vnode_t *rootvp;
 149         proc_t *p = curproc;
 150
 151         if (pnp->pn_pathlen == 0)
 152                 return (ENOENT);
 153
 154         mutex_enter(&p->p_lock);        /* for u_rdir and u_cdir */
 155         if ((rootvp = PTOU(p)->u_rdir) == NULL)
 156                 rootvp = rootdir;
 157         else if (rootvp != rootdir)     /* no need to VN_HOLD rootdir */
 158                 VN_HOLD(rootvp);
 159
 160         if (pnp->pn_path[0] == '/') {
 161                 vp = rootvp;
 162         } else {
 163                 vp = (startvp == NULL) ? PTOU(p)->u_cdir : startvp;
 164         }
 165         VN_HOLD(vp);
 166         mutex_exit(&p->p_lock);
 167
 168         /*
 169          * Skip over leading slashes
 170          */
 171         if (pnp->pn_path[0] == '/') {
 172                 do {
 173                         pnp->pn_path++;
 174                         pnp->pn_pathlen--;
 175                 } while (pnp->pn_path[0] == '/');
 176         }
 177
 178         return (lookuppnvp(pnp, rpnp, followlink, dirvpp,
 179             compvpp, rootvp, vp, cr));
 180 }
 181
 182 int
 183 lookuppnat(struct pathname *pnp, struct pathname *rpnp,
 184     int followlink, vnode_t **dirvpp, vnode_t **compvpp,
 185     vnode_t *startvp)
 186 {
 187         return (lookuppnatcred(pnp, rpnp, followlink, dirvpp, compvpp, startvp,
 188             CRED()));
 189 }
 190
 191 /* Private flag to do our getcwd() dirty work */
 192 #define LOOKUP_CHECKREAD        0x10
 193 #define LOOKUP_MASK             (~LOOKUP_CHECKREAD)
 194
 195 /*
 196  * Starting at current directory, translate pathname pnp to end.
 197  * Leave pathname of final component in pnp, return the vnode
 198  * for the final component in *compvpp, and return the vnode
 199  * for the parent of the final component in dirvpp.
 200  *
 201  * This is the central routine in pathname translation and handles
 202  * multiple components in pathnames, separating them at /'s.  It also
 203  * implements mounted file systems and processes symbolic links.
 204  *
 205  * vp is the vnode where the directory search should start.
 206  *
 207  * Reference counts: vp must be held prior to calling this function.  rootvp
 208  * should only be held if rootvp != rootdir.
 209  */
 210 int
 211 lookuppnvp(
 212         struct pathname *pnp,           /* pathname to lookup */
 213         struct pathname *rpnp,          /* if non-NULL, return resolved path */
 214         int flags,                      /* follow symlinks */
 215         vnode_t **dirvpp,               /* ptr for parent vnode */
 216         vnode_t **compvpp,              /* ptr for entry vnode */
 217         vnode_t *rootvp,                /* rootvp */
 218         vnode_t *vp,                    /* directory to start search at */
 219         cred_t *cr)                     /* user's credential */
 220 {
 221         vnode_t *cvp;   /* current component vp */
 222         char component[MAXNAMELEN];     /* buffer for component (incl null) */
 223         int error;
 224         int nlink;
 225         int lookup_flags;
 226         struct pathname presrvd; /* case preserved name */
 227         struct pathname *pp = NULL;
 228         vnode_t *startvp;
 229         vnode_t *zonevp = curproc->p_zone->zone_rootvp;         /* zone root */
 230         int must_be_directory = 0;
 231         boolean_t retry_with_kcred;
 232         uint32_t auditing = AU_AUDITING();
 233
 234         CPU_STATS_ADDQ(CPU, sys, namei, 1);
 235         nlink = 0;
 236         cvp = NULL;
 237         if (rpnp)
 238                 rpnp->pn_pathlen = 0;
 239
 240         lookup_flags = dirvpp ? LOOKUP_DIR : 0;
 241         if (flags & FIGNORECASE) {
 242                 lookup_flags |= FIGNORECASE;
 243                 pn_alloc(&presrvd);
 244                 pp = &presrvd;
 245         }
 246
 247         if (auditing)
 248                 audit_anchorpath(pnp, vp == rootvp);
 249
 250         /*
 251          * Eliminate any trailing slashes in the pathname.
 252          * If there are any, we must follow all symlinks.
 253          * Also, we must guarantee that the last component is a directory.
 254          */
 255         if (pn_fixslash(pnp)) {
 256                 flags |= FOLLOW;
 257                 must_be_directory = 1;
 258         }
 259
 260         startvp = vp;
 261 next:
 262         retry_with_kcred = B_FALSE;
 263
 264         /*
 265          * Make sure we have a directory.
 266          */
 267         if (vp->v_type != VDIR) {
 268                 error = ENOTDIR;
 269                 goto bad;
 270         }
 271
 272         if (rpnp && VN_CMP(vp, rootvp))
 273                 (void) pn_set(rpnp, "/");
 274
 275         /*
 276          * Process the next component of the pathname.
 277          */
 278         if (error = pn_getcomponent(pnp, component)) {
 279                 goto bad;
 280         }
 281
 282         /*
 283          * Handle "..": two special cases.
 284          * 1. If we're at the root directory (e.g. after chroot or
 285          *    zone_enter) then change ".." to "." so we can't get
 286          *    out of this subtree.
 287          * 2. If this vnode is the root of a mounted file system,
 288          *    then replace it with the vnode that was mounted on
 289          *    so that we take the ".." in the other file system.
 290          */
 291         if (component[0] == '.' && component[1] == '.' && component[2] == 0) {
 292 checkforroot:
 293                 if (VN_CMP(vp, rootvp) || VN_CMP(vp, zonevp)) {
 294                         component[1] = '\0';
 295                 } else if (vp->v_flag & VROOT) {
 296                         vfs_t *vfsp;
 297                         cvp = vp;
 298
 299                         /*
 300                          * While we deal with the vfs pointer from the vnode
 301                          * the filesystem could have been forcefully unmounted
 302                          * and the vnode's v_vfsp could have been invalidated
 303                          * by VFS_UNMOUNT. Hence, we cache v_vfsp and use it
 304                          * with vfs_rlock_wait/vfs_unlock.
 305                          * It is safe to use the v_vfsp even it is freed by
 306                          * VFS_UNMOUNT because vfs_rlock_wait/vfs_unlock
 307                          * do not dereference v_vfsp. It is just used as a
 308                          * magic cookie.
 309                          * One more corner case here is the memory getting
 310                          * reused for another vfs structure. In this case
 311                          * lookuppnvp's vfs_rlock_wait will succeed, domount's
 312                          * vfs_lock will fail and domount will bail out with an
 313                          * error (EBUSY).
 314                          */
 315                         vfsp = cvp->v_vfsp;
 316
 317                         /*
 318                          * This lock is used to synchronize
 319                          * mounts/unmounts and lookups.
 320                          * Threads doing mounts/unmounts hold the
 321                          * writers version vfs_lock_wait().
 322                          */
 323
 324                         vfs_rlock_wait(vfsp);
 325
 326                         /*
 327                          * If this vnode is on a file system that
 328                          * has been forcibly unmounted,
 329                          * we can't proceed. Cancel this operation
 330                          * and return EIO.
 331                          *
 332                          * vfs_vnodecovered is NULL if unmounted.
 333                          * Currently, nfs uses VFS_UNMOUNTED to
 334                          * check if it's a forced-umount. Keep the
 335                          * same checking here as well even though it
 336                          * may not be needed.
 337                          */
 338                         if (((vp = cvp->v_vfsp->vfs_vnodecovered) == NULL) ||
 339                             (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) {
 340                                 vfs_unlock(vfsp);
 341                                 VN_RELE(cvp);
 342                                 if (pp)
 343                                         pn_free(pp);
 344                                 return (EIO);
 345                         }
 346                         VN_HOLD(vp);
 347                         vfs_unlock(vfsp);
 348                         VN_RELE(cvp);
 349                         cvp = NULL;
 350                         /*
 351                          * Crossing mount points. For eg: We are doing
 352                          * a lookup of ".." for file systems root vnode
 353                          * mounted here, and VOP_LOOKUP() (with covered vnode)
 354                          * will be on underlying file systems mount point
 355                          * vnode. Set retry_with_kcred flag as we might end
 356                          * up doing VOP_LOOKUP() with kcred if required.
 357                          */
 358                         retry_with_kcred = B_TRUE;
 359                         goto checkforroot;
 360                 }
 361         }
 362
 363         /*
 364          * LOOKUP_CHECKREAD is a private flag used by vnodetopath() to indicate
 365          * that we need to have read permission on every directory in the entire
 366          * path.  This is used to ensure that a forward-lookup of a cached value
 367          * has the same effect as a reverse-lookup when the cached value cannot
 368          * be found.
 369          */
 370         if ((flags & LOOKUP_CHECKREAD) &&
 371             (error = VOP_ACCESS(vp, VREAD, 0, cr, NULL)) != 0)
 372                 goto bad;
 373
 374         /*
 375          * Perform a lookup in the current directory.
 376          */
 377         error = VOP_LOOKUP(vp, component, &cvp, pnp, lookup_flags,
 378             rootvp, cr, NULL, NULL, pp);
 379
 380         /*
 381          * Retry with kcred - If crossing mount points & error is EACCES.
 382          *
 383          * If we are crossing mount points here and doing ".." lookup,
 384          * VOP_LOOKUP() might fail if the underlying file systems
 385          * mount point has no execute permission. In cases like these,
 386          * we retry VOP_LOOKUP() by giving as much privilage as possible
 387          * by passing kcred credentials.
 388          *
 389          * In case of hierarchical file systems, passing kcred still may
 390          * or may not work.
 391          * For eg: UFS FS --> Mount NFS FS --> Again mount UFS on some
 392          *                      directory inside NFS FS.
 393          */
 394         if ((error == EACCES) && retry_with_kcred)
 395                 error = VOP_LOOKUP(vp, component, &cvp, pnp, lookup_flags,
 396                     rootvp, zone_kcred(), NULL, NULL, pp);
 397
 398         if (error) {
 399                 cvp = NULL;
 400                 /*
 401                  * On error, return hard error if
 402                  * (a) we're not at the end of the pathname yet, or
 403                  * (b) the caller didn't want the parent directory, or
 404                  * (c) we failed for some reason other than a missing entry.
 405                  */
 406                 if (pn_pathleft(pnp) || dirvpp == NULL || error != ENOENT)
 407                         goto bad;
 408                 if (auditing) { /* directory access */
 409                         if (error = audit_savepath(pnp, vp, vp, error, cr))
 410                                 goto bad_noaudit;
 411                 }
 412
 413                 pn_setlast(pnp);
 414                 /*
 415                  * We inform the caller that the desired entry must be
 416                  * a directory by adding a '/' to the component name.
 417                  */
 418                 if (must_be_directory && (error = pn_addslash(pnp)) != 0)
 419                         goto bad;
 420                 *dirvpp = vp;
 421                 if (compvpp != NULL)
 422                         *compvpp = NULL;
 423                 if (rootvp != rootdir)
 424                         VN_RELE(rootvp);
 425                 if (pp)
 426                         pn_free(pp);
 427                 return (0);
 428         }
 429
 430         /*
 431          * Traverse mount points.
 432          * XXX why don't we need to hold a read lock here (call vn_vfsrlock)?
 433          * What prevents a concurrent update to v_vfsmountedhere?
 434          *      Possible answer: if mounting, we might not see the mount
 435          *      if it is concurrently coming into existence, but that's
 436          *      really not much different from the thread running a bit slower.
 437          *      If unmounting, we may get into traverse() when we shouldn't,
 438          *      but traverse() will catch this case for us.
 439          *      (For this to work, fetching v_vfsmountedhere had better
 440          *      be atomic!)
 441          */
 442         if (vn_mountedvfs(cvp) != NULL) {
 443                 if ((error = traverse(&cvp)) != 0)
 444                         goto bad;
 445         }
 446
 447         /*
 448          * If we hit a symbolic link and there is more path to be
 449          * translated or this operation does not wish to apply
 450          * to a link, then place the contents of the link at the
 451          * front of the remaining pathname.
 452          */
 453         if (cvp->v_type == VLNK && ((flags & FOLLOW) || pn_pathleft(pnp))) {
 454                 struct pathname linkpath;
 455
 456                 if (++nlink > MAXSYMLINKS) {
 457                         error = ELOOP;
 458                         goto bad;
 459                 }
 460                 pn_alloc(&linkpath);
 461                 if (error = pn_getsymlink(cvp, &linkpath, cr)) {
 462                         pn_free(&linkpath);
 463                         goto bad;
 464                 }
 465
 466                 if (auditing)
 467                         audit_symlink(pnp, &linkpath);
 468
 469                 if (pn_pathleft(&linkpath) == 0)
 470                         (void) pn_set(&linkpath, ".");
 471                 error = pn_insert(pnp, &linkpath, strlen(component));
 472                 pn_free(&linkpath);
 473                 if (error)
 474                         goto bad;
 475                 VN_RELE(cvp);
 476                 cvp = NULL;
 477                 if (pnp->pn_pathlen == 0) {
 478                         error = ENOENT;
 479                         goto bad;
 480                 }
 481                 if (pnp->pn_path[0] == '/') {
 482                         do {
 483                                 pnp->pn_path++;
 484                                 pnp->pn_pathlen--;
 485                         } while (pnp->pn_path[0] == '/');
 486                         VN_RELE(vp);
 487                         vp = rootvp;
 488                         VN_HOLD(vp);
 489                 }
 490                 if (auditing)
 491                         audit_anchorpath(pnp, vp == rootvp);
 492                 if (pn_fixslash(pnp)) {
 493                         flags |= FOLLOW;
 494                         must_be_directory = 1;
 495                 }
 496                 goto next;
 497         }
 498
 499         /*
 500          * If rpnp is non-NULL, remember the resolved path name therein.
 501          * Do not include "." components.  Collapse occurrences of
 502          * "previous/..", so long as "previous" is not itself "..".
 503          * Exhausting rpnp results in error ENAMETOOLONG.
 504          */
 505         if (rpnp && strcmp(component, ".") != 0) {
 506                 size_t len;
 507
 508                 if (strcmp(component, "..") == 0 &&
 509                     rpnp->pn_pathlen != 0 &&
 510                     !((rpnp->pn_pathlen > 2 &&
 511                     strncmp(rpnp->pn_path+rpnp->pn_pathlen-3, "/..", 3) == 0) ||
 512                     (rpnp->pn_pathlen == 2 &&
 513                     strncmp(rpnp->pn_path, "..", 2) == 0))) {
 514                         while (rpnp->pn_pathlen &&
 515                             rpnp->pn_path[rpnp->pn_pathlen-1] != '/')
 516                                 rpnp->pn_pathlen--;
 517                         if (rpnp->pn_pathlen > 1)
 518                                 rpnp->pn_pathlen--;
 519                         rpnp->pn_path[rpnp->pn_pathlen] = '\0';
 520                 } else {
 521                         if (rpnp->pn_pathlen != 0 &&
 522                             rpnp->pn_path[rpnp->pn_pathlen-1] != '/')
 523                                 rpnp->pn_path[rpnp->pn_pathlen++] = '/';
 524                         if (flags & FIGNORECASE) {
 525                                 /*
 526                                  * Return the case-preserved name
 527                                  * within the resolved path.
 528                                  */
 529                                 error = copystr(pp->pn_buf,
 530                                     rpnp->pn_path + rpnp->pn_pathlen,
 531                                     rpnp->pn_bufsize - rpnp->pn_pathlen, &len);
 532                         } else {
 533                                 error = copystr(component,
 534                                     rpnp->pn_path + rpnp->pn_pathlen,
 535                                     rpnp->pn_bufsize - rpnp->pn_pathlen, &len);
 536                         }
 537                         if (error)      /* copystr() returns ENAMETOOLONG */
 538                                 goto bad;
 539                         rpnp->pn_pathlen += (len - 1);
 540                         ASSERT(rpnp->pn_bufsize > rpnp->pn_pathlen);
 541                 }
 542         }
 543
 544         /*
 545          * If no more components, return last directory (if wanted) and
 546          * last component (if wanted).
 547          */
 548         if (pn_pathleft(pnp) == 0) {
 549                 /*
 550                  * If there was a trailing slash in the pathname,
 551                  * make sure the last component is a directory.
 552                  */
 553                 if (must_be_directory && cvp->v_type != VDIR) {
 554                         error = ENOTDIR;
 555                         goto bad;
 556                 }
 557                 if (dirvpp != NULL) {
 558                         /*
 559                          * Check that we have the real parent and not
 560                          * an alias of the last component.
 561                          */
 562                         if (vn_compare(vp, cvp)) {
 563                                 if (auditing)
 564                                         (void) audit_savepath(pnp, cvp, vp,
 565                                             EINVAL, cr);
 566                                 pn_setlast(pnp);
 567                                 VN_RELE(vp);
 568                                 VN_RELE(cvp);
 569                                 if (rootvp != rootdir)
 570                                         VN_RELE(rootvp);
 571                                 if (pp)
 572                                         pn_free(pp);
 573                                 return (EINVAL);
 574                         }
 575                         *dirvpp = vp;
 576                 } else
 577                         VN_RELE(vp);
 578                 if (auditing)
 579                         (void) audit_savepath(pnp, cvp, vp, 0, cr);
 580                 if (pnp->pn_path == pnp->pn_buf)
 581                         (void) pn_set(pnp, ".");
 582                 else
 583                         pn_setlast(pnp);
 584                 if (rpnp) {
 585                         if (VN_CMP(cvp, rootvp))
 586                                 (void) pn_set(rpnp, "/");
 587                         else if (rpnp->pn_pathlen == 0)
 588                                 (void) pn_set(rpnp, ".");
 589                 }
 590
 591                 if (compvpp != NULL)
 592                         *compvpp = cvp;
 593                 else
 594                         VN_RELE(cvp);
 595                 if (rootvp != rootdir)
 596                         VN_RELE(rootvp);
 597                 if (pp)
 598                         pn_free(pp);
 599                 return (0);
 600         }
 601
 602         /*
 603          * Skip over slashes from end of last component.
 604          */
 605         while (pnp->pn_path[0] == '/') {
 606                 pnp->pn_path++;
 607                 pnp->pn_pathlen--;
 608         }
 609
 610         /*
 611          * Searched through another level of directory:
 612          * release previous directory handle and save new (result
 613          * of lookup) as current directory.
 614          */
 615         VN_RELE(vp);
 616         vp = cvp;
 617         cvp = NULL;
 618         goto next;
 619
 620 bad:
 621         if (auditing)   /* reached end of path */
 622                 (void) audit_savepath(pnp, cvp, vp, error, cr);
 623 bad_noaudit:
 624         /*
 625          * Error.  Release vnodes and return.
 626          */
 627         if (cvp)
 628                 VN_RELE(cvp);
 629         /*
 630          * If the error was ESTALE and the current directory to look in
 631          * was the root for this lookup, the root for a mounted file
 632          * system, or the starting directory for lookups, then
 633          * return ENOENT instead of ESTALE.  In this case, no recovery
 634          * is possible by the higher level.  If ESTALE was returned for
 635          * some intermediate directory along the path, then recovery
 636          * is potentially possible and retrying from the higher level
 637          * will either correct the situation by purging stale cache
 638          * entries or eventually get back to the point where no recovery
 639          * is possible.
 640          */
 641         if (error == ESTALE &&
 642             (VN_CMP(vp, rootvp) || (vp->v_flag & VROOT) || vp == startvp))
 643                 error = ENOENT;
 644         VN_RELE(vp);
 645         if (rootvp != rootdir)
 646                 VN_RELE(rootvp);
 647         if (pp)
 648                 pn_free(pp);
 649         return (error);
 650 }
 651
 652 /*
 653  * Traverse a mount point.  Routine accepts a vnode pointer as a reference
 654  * parameter and performs the indirection, releasing the original vnode.
 655  */
 656 int
 657 traverse(vnode_t **cvpp)
 658 {
 659         int error = 0;
 660         vnode_t *cvp;
 661         vnode_t *tvp;
 662         vfs_t *vfsp;
 663
 664         cvp = *cvpp;
 665
 666         /*
 667          * If this vnode is mounted on, then we transparently indirect
 668          * to the vnode which is the root of the mounted file system.
 669          * Before we do this we must check that an unmount is not in
 670          * progress on this vnode.
 671          */
 672
 673         for (;;) {
 674                 /*
 675                  * Try to read lock the vnode.  If this fails because
 676                  * the vnode is already write locked, then check to
 677                  * see whether it is the current thread which locked
 678                  * the vnode.  If it is not, then read lock the vnode
 679                  * by waiting to acquire the lock.
 680                  *
 681                  * The code path in domount() is an example of support
 682                  * which needs to look up two pathnames and locks one
 683                  * of them in between the two lookups.
 684                  */
 685                 error = vn_vfsrlock(cvp);
 686                 if (error) {
 687                         if (!vn_vfswlock_held(cvp))
 688                                 error = vn_vfsrlock_wait(cvp);
 689                         if (error != 0) {
 690                                 /*
 691                                  * lookuppn() expects a held vnode to be
 692                                  * returned because it promptly calls
 693                                  * VN_RELE after the error return
 694                                  */
 695                                 *cvpp = cvp;
 696                                 return (error);
 697                         }
 698                 }
 699
 700                 /*
 701                  * Reached the end of the mount chain?
 702                  */
 703                 vfsp = vn_mountedvfs(cvp);
 704                 if (vfsp == NULL) {
 705                         vn_vfsunlock(cvp);
 706                         break;
 707                 }
 708
 709                 /*
 710                  * The read lock must be held across the call to VFS_ROOT() to
 711                  * prevent a concurrent unmount from destroying the vfs.
 712                  */
 713                 error = VFS_ROOT(vfsp, &tvp);
 714                 vn_vfsunlock(cvp);
 715
 716                 if (error)
 717                         break;
 718
 719                 VN_RELE(cvp);
 720
 721                 cvp = tvp;
 722         }
 723
 724         *cvpp = cvp;
 725         return (error);
 726 }
 727
 728 /*
 729  * Return the lowermost vnode if this is a mountpoint.
 730  */
 731 static vnode_t *
 732 vn_under(vnode_t *vp)
 733 {
 734         vnode_t *uvp;
 735         vfs_t *vfsp;
 736
 737         while (vp->v_flag & VROOT) {
 738
 739                 vfsp = vp->v_vfsp;
 740                 vfs_rlock_wait(vfsp);
 741                 if ((uvp = vfsp->vfs_vnodecovered) == NULL ||
 742                     (vfsp->vfs_flag & VFS_UNMOUNTED)) {
 743                         vfs_unlock(vfsp);
 744                         break;
 745                 }
 746                 VN_HOLD(uvp);
 747                 vfs_unlock(vfsp);
 748                 VN_RELE(vp);
 749                 vp = uvp;
 750         }
 751
 752         return (vp);
 753 }
 754
 755 static int
 756 vnode_match(vnode_t *v1, vnode_t *v2, cred_t *cr)
 757 {
 758         vattr_t v1attr, v2attr;
 759
 760         /*
 761          * If we have a device file, check to see if is a cloned open of the
 762          * same device.  For self-cloning devices, the major numbers will match.
 763          * For devices cloned through the 'clone' driver, the minor number of
 764          * the source device will be the same as the major number of the cloned
 765          * device.
 766          */
 767         if ((v1->v_type == VCHR || v1->v_type == VBLK) &&
 768             v1->v_type == v2->v_type) {
 769                 if ((spec_is_selfclone(v1) || spec_is_selfclone(v2)) &&
 770                     getmajor(v1->v_rdev) == getmajor(v2->v_rdev))
 771                         return (1);
 772
 773                 if (spec_is_clone(v1) &&
 774                     getmajor(v1->v_rdev) == getminor(v2->v_rdev))
 775                         return (1);
 776
 777                 if (spec_is_clone(v2) &&
 778                     getmajor(v2->v_rdev) == getminor(v1->v_rdev))
 779                         return (1);
 780         }
 781
 782         v1attr.va_mask = v2attr.va_mask = AT_TYPE;
 783
 784         /*
 785          * This check for symbolic links handles the pseudo-symlinks in procfs.
 786          * These particular links have v_type of VDIR, but the attributes have a
 787          * type of VLNK.  We need to avoid these links because otherwise if we
 788          * are currently in '/proc/self/fd', then '/proc/self/cwd' will compare
 789          * as the same vnode.
 790          */
 791         if (VOP_GETATTR(v1, &v1attr, 0, cr, NULL) != 0 ||
 792             VOP_GETATTR(v2, &v2attr, 0, cr, NULL) != 0 ||
 793             v1attr.va_type == VLNK || v2attr.va_type == VLNK)
 794                 return (0);
 795
 796         v1attr.va_mask = v2attr.va_mask = AT_TYPE | AT_FSID | AT_NODEID;
 797
 798         if (VOP_GETATTR(v1, &v1attr, ATTR_REAL, cr, NULL) != 0 ||
 799             VOP_GETATTR(v2, &v2attr, ATTR_REAL, cr, NULL) != 0)
 800                 return (0);
 801
 802         return (v1attr.va_fsid == v2attr.va_fsid &&
 803             v1attr.va_nodeid == v2attr.va_nodeid);
 804 }
 805
 806
 807 /*
 808  * Find the entry in the directory corresponding to the target vnode.
 809  */
 810 int
 811 dirfindvp(vnode_t *vrootp, vnode_t *dvp, vnode_t *tvp, cred_t *cr, char *dbuf,
 812     size_t dlen, dirent64_t **rdp)
 813 {
 814         size_t dbuflen;
 815         struct iovec iov;
 816         struct uio uio;
 817         int error;
 818         int eof;
 819         vnode_t *cmpvp;
 820         struct dirent64 *dp;
 821         pathname_t pnp;
 822
 823         ASSERT(dvp->v_type == VDIR);
 824
 825         /*
 826          * This is necessary because of the strange semantics of VOP_LOOKUP().
 827          */
 828         bzero(&pnp, sizeof (pnp));
 829
 830         eof = 0;
 831
 832         uio.uio_iov = &iov;
 833         uio.uio_iovcnt = 1;
 834         uio.uio_segflg = UIO_SYSSPACE;
 835         uio.uio_fmode = 0;
 836         uio.uio_extflg = UIO_COPY_CACHED;
 837         uio.uio_loffset = 0;
 838
 839         if ((error = VOP_ACCESS(dvp, VREAD, 0, cr, NULL)) != 0)
 840                 return (error);
 841
 842         while (!eof) {
 843                 uio.uio_resid = dlen;
 844                 iov.iov_base = dbuf;
 845                 iov.iov_len = dlen;
 846
 847                 (void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL);
 848                 error = VOP_READDIR(dvp, &uio, cr, &eof, NULL, 0);
 849                 VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL);
 850
 851                 dbuflen = dlen - uio.uio_resid;
 852
 853                 if (error || dbuflen == 0)
 854                         break;
 855
 856                 dp = (dirent64_t *)dbuf;
 857                 while ((intptr_t)dp < (intptr_t)dbuf + dbuflen) {
 858                         /*
 859                          * Ignore '.' and '..' entries
 860                          */
 861                         if (strcmp(dp->d_name, ".") == 0 ||
 862                             strcmp(dp->d_name, "..") == 0) {
 863                                 dp = (dirent64_t *)((intptr_t)dp +
 864                                     dp->d_reclen);
 865                                 continue;
 866                         }
 867
 868                         error = VOP_LOOKUP(dvp, dp->d_name, &cmpvp, &pnp, 0,
 869                             vrootp, cr, NULL, NULL, NULL);
 870
 871                         /*
 872                          * We only want to bail out if there was an error other
 873                          * than ENOENT.  Otherwise, it could be that someone
 874                          * just removed an entry since the readdir() call, and
 875                          * the entry we want is further on in the directory.
 876                          */
 877                         if (error == 0) {
 878                                 if (vnode_match(tvp, cmpvp, cr)) {
 879                                         VN_RELE(cmpvp);
 880                                         *rdp = dp;
 881                                         return (0);
 882                                 }
 883
 884                                 VN_RELE(cmpvp);
 885                         } else if (error != ENOENT) {
 886                                 return (error);
 887                         }
 888
 889                         dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen);
 890                 }
 891         }
 892
 893         /*
 894          * Something strange has happened, this directory does not contain the
 895          * specified vnode.  This should never happen in the normal case, since
 896          * we ensured that dvp is the parent of vp.  This is possible in some
 897          * rare conditions (races and the special .zfs directory).
 898          */
 899         if (error == 0) {
 900                 error = VOP_LOOKUP(dvp, ".zfs", &cmpvp, &pnp, 0, vrootp, cr,
 901                     NULL, NULL, NULL);
 902                 if (error == 0) {
 903                         if (vnode_match(tvp, cmpvp, cr)) {
 904                                 (void) strcpy(dp->d_name, ".zfs");
 905                                 dp->d_reclen = strlen(".zfs");
 906                                 dp->d_off = 2;
 907                                 dp->d_ino = 1;
 908                                 *rdp = dp;
 909                         } else {
 910                                 error = ENOENT;
 911                         }
 912                         VN_RELE(cmpvp);
 913                 }
 914         }
 915
 916         return (error);
 917 }
 918
 919 /*
 920  * Given a global path (from rootdir), and a vnode that is the current root,
 921  * return the portion of the path that is beneath the current root or NULL on
 922  * failure.  The path MUST be a resolved path (no '..' entries or symlinks),
 923  * otherwise this function will fail.
 924  */
 925 static char *
 926 localpath(char *path, struct vnode *vrootp, cred_t *cr)
 927 {
 928         vnode_t *vp;
 929         vnode_t *cvp;
 930         char component[MAXNAMELEN];
 931         char *ret = NULL;
 932         pathname_t pn;
 933
 934         /*
 935          * We use vn_compare() instead of VN_CMP() in order to detect lofs
 936          * mounts and stacked vnodes.
 937          */
 938         if (vn_compare(vrootp, rootdir))
 939                 return (path);
 940
 941         if (pn_get(path, UIO_SYSSPACE, &pn) != 0)
 942                 return (NULL);
 943
 944         vp = rootdir;
 945         VN_HOLD(vp);
 946
 947         if (vn_ismntpt(vp) && traverse(&vp) != 0) {
 948                 VN_RELE(vp);
 949                 pn_free(&pn);
 950                 return (NULL);
 951         }
 952
 953         while (pn_pathleft(&pn)) {
 954                 pn_skipslash(&pn);
 955
 956                 if (pn_getcomponent(&pn, component) != 0)
 957                         break;
 958
 959                 if (VOP_LOOKUP(vp, component, &cvp, &pn, 0, rootdir, cr,
 960                     NULL, NULL, NULL) != 0)
 961                         break;
 962                 VN_RELE(vp);
 963                 vp = cvp;
 964
 965                 if (vn_ismntpt(vp) && traverse(&vp) != 0)
 966                         break;
 967
 968                 if (vn_compare(vp, vrootp)) {
 969                         ret = path + (pn.pn_path - pn.pn_buf);
 970                         break;
 971                 }
 972         }
 973
 974         VN_RELE(vp);
 975         pn_free(&pn);
 976
 977         return (ret);
 978 }
 979
 980 /*
 981  * Given a directory, return the full, resolved path.  This looks up "..",
 982  * searches for the given vnode in the parent, appends the component, etc.  It
 983  * is used to implement vnodetopath() and getcwd() when the cached path fails.
 984  */
 985 static int
 986 dirtopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, int flags,
 987     cred_t *cr)
 988 {
 989         pathname_t pn, rpn, emptypn;
 990         vnode_t *cmpvp, *pvp = NULL;
 991         vnode_t *startvp = vp;
 992         int err = 0, vprivs;
 993         size_t complen;
 994         char *dbuf;
 995         dirent64_t *dp;
 996         char            *bufloc;
 997         size_t          dlen = DIRENT64_RECLEN(MAXPATHLEN);
 998         refstr_t        *mntpt;
 999
1000         /* Operation only allowed on directories */
1001         ASSERT(vp->v_type == VDIR);
1002
1003         /* We must have at least enough space for "/" */
1004         if (buflen < 2)
1005                 return (ENAMETOOLONG);
1006
1007         /* Start at end of string with terminating null */
1008         bufloc = &buf[buflen - 1];
1009         *bufloc = '\0';
1010
1011         pn_alloc(&pn);
1012         pn_alloc(&rpn);
1013         dbuf = kmem_alloc(dlen, KM_SLEEP);
1014         bzero(&emptypn, sizeof (emptypn));
1015
1016         /*
1017          * Begin with an additional reference on vp.  This will be decremented
1018          * during the loop.
1019          */
1020         VN_HOLD(vp);
1021
1022         for (;;) {
1023                 /*
1024                  * Return if we've reached the root.  If the buffer is empty,
1025                  * return '/'.  We explicitly don't use vn_compare(), since it
1026                  * compares the real vnodes.  A lofs mount of '/' would produce
1027                  * incorrect results otherwise.
1028                  */
1029                 if (VN_CMP(vrootp, vp)) {
1030                         if (*bufloc == '\0')
1031                                 *--bufloc = '/';
1032                         break;
1033                 }
1034
1035                 /*
1036                  * If we've reached the VFS root, something has gone wrong.  We
1037                  * should have reached the root in the above check.  The only
1038                  * explantation is that 'vp' is not contained withing the given
1039                  * root, in which case we return EPERM.
1040                  */
1041                 if (VN_CMP(rootdir, vp)) {
1042                         err = EPERM;
1043                         goto out;
1044                 }
1045
1046                 /*
1047                  * Shortcut: see if this vnode is a mountpoint.  If so,
1048                  * grab the path information from the vfs_t.
1049                  */
1050                 if (vp->v_flag & VROOT) {
1051
1052                         mntpt = vfs_getmntpoint(vp->v_vfsp);
1053                         if ((err = pn_set(&pn, (char *)refstr_value(mntpt)))
1054                             == 0) {
1055                                 refstr_rele(mntpt);
1056                                 rpn.pn_path = rpn.pn_buf;
1057
1058                                 /*
1059                                  * Ensure the mountpoint still exists.
1060                                  */
1061                                 VN_HOLD(vrootp);
1062                                 if (vrootp != rootdir)
1063                                         VN_HOLD(vrootp);
1064                                 if (lookuppnvp(&pn, &rpn, flags, NULL,
1065                                     &cmpvp, vrootp, vrootp, cr) == 0) {
1066
1067                                         if (VN_CMP(vp, cmpvp)) {
1068                                                 VN_RELE(cmpvp);
1069
1070                                                 complen = strlen(rpn.pn_path);
1071                                                 bufloc -= complen;
1072                                                 if (bufloc < buf) {
1073                                                         err = ERANGE;
1074                                                         goto out;
1075                                                 }
1076                                                 bcopy(rpn.pn_path, bufloc,
1077                                                     complen);
1078                                                 break;
1079                                         } else {
1080                                                 VN_RELE(cmpvp);
1081                                         }
1082                                 }
1083                         } else {
1084                                 refstr_rele(mntpt);
1085                         }
1086                 }
1087
1088                 /*
1089                  * Shortcut: see if this vnode has correct v_path. If so,
1090                  * we have the work done.
1091                  */
1092                 mutex_enter(&vp->v_lock);
1093                 if (vp->v_path != NULL) {
1094
1095                         if ((err = pn_set(&pn, vp->v_path)) == 0) {
1096                                 mutex_exit(&vp->v_lock);
1097                                 rpn.pn_path = rpn.pn_buf;
1098
1099                                 /*
1100                                  * Ensure the v_path pointing to correct vnode
1101                                  */
1102                                 VN_HOLD(vrootp);
1103                                 if (vrootp != rootdir)
1104                                         VN_HOLD(vrootp);
1105                                 if (lookuppnvp(&pn, &rpn, flags, NULL,
1106                                     &cmpvp, vrootp, vrootp, cr) == 0) {
1107
1108                                         if (VN_CMP(vp, cmpvp)) {
1109                                                 VN_RELE(cmpvp);
1110
1111                                                 complen = strlen(rpn.pn_path);
1112                                                 bufloc -= complen;
1113                                                 if (bufloc < buf) {
1114                                                         err = ERANGE;
1115                                                         goto out;
1116                                                 }
1117                                                 bcopy(rpn.pn_path, bufloc,
1118                                                     complen);
1119                                                 break;
1120                                         } else {
1121                                                 VN_RELE(cmpvp);
1122                                         }
1123                                 }
1124                         } else {
1125                                 mutex_exit(&vp->v_lock);
1126                         }
1127                 } else {
1128                         mutex_exit(&vp->v_lock);
1129                 }
1130
1131                 /*
1132                  * Shortcuts failed, search for this vnode in its parent.  If
1133                  * this is a mountpoint, then get the vnode underneath.
1134                  */
1135                 if (vp->v_flag & VROOT)
1136                         vp = vn_under(vp);
1137                 if ((err = VOP_LOOKUP(vp, "..", &pvp, &emptypn, 0, vrootp, cr,
1138                     NULL, NULL, NULL)) != 0)
1139                         goto out;
1140
1141                 /*
1142                  * With extended attributes, it's possible for a directory to
1143                  * have a parent that is a regular file.  Check for that here.
1144                  */
1145                 if (pvp->v_type != VDIR) {
1146                         err = ENOTDIR;
1147                         goto out;
1148                 }
1149
1150                 /*
1151                  * If this is true, something strange has happened.  This is
1152                  * only true if we are the root of a filesystem, which should
1153                  * have been caught by the check above.
1154                  */
1155                 if (VN_CMP(pvp, vp)) {
1156                         err = ENOENT;
1157                         goto out;
1158                 }
1159
1160                 /*
1161                  * Check if we have read and search privilege so, that
1162                  * we can lookup the path in the directory
1163                  */
1164                 vprivs = (flags & LOOKUP_CHECKREAD) ? VREAD | VEXEC : VEXEC;
1165                 if ((err = VOP_ACCESS(pvp, vprivs, 0, cr, NULL)) != 0) {
1166                         goto out;
1167                 }
1168
1169                 /*
1170                  * Search the parent directory for the entry corresponding to
1171                  * this vnode.
1172                  */
1173                 if ((err = dirfindvp(vrootp, pvp, vp, cr, dbuf, dlen, &dp))
1174                     != 0)
1175                         goto out;
1176                 complen = strlen(dp->d_name);
1177                 bufloc -= complen;
1178                 if (bufloc <= buf) {
1179                         err = ENAMETOOLONG;
1180                         goto out;
1181                 }
1182                 bcopy(dp->d_name, bufloc, complen);
1183
1184                 /* Prepend a slash to the current path.  */
1185                 *--bufloc = '/';
1186
1187                 /* And continue with the next component */
1188                 VN_RELE(vp);
1189                 vp = pvp;
1190                 pvp = NULL;
1191         }
1192
1193         /*
1194          * Place the path at the beginning of the buffer.
1195          */
1196         if (bufloc != buf)
1197                 ovbcopy(bufloc, buf, buflen - (bufloc - buf));
1198
1199 out:
1200         /*
1201          * If the error was ESTALE and the current directory to look in
1202          * was the root for this lookup, the root for a mounted file
1203          * system, or the starting directory for lookups, then
1204          * return ENOENT instead of ESTALE.  In this case, no recovery
1205          * is possible by the higher level.  If ESTALE was returned for
1206          * some intermediate directory along the path, then recovery
1207          * is potentially possible and retrying from the higher level
1208          * will either correct the situation by purging stale cache
1209          * entries or eventually get back to the point where no recovery
1210          * is possible.
1211          */
1212         if (err == ESTALE &&
1213             (VN_CMP(vp, vrootp) || (vp->v_flag & VROOT) || vp == startvp))
1214                 err = ENOENT;
1215
1216         kmem_free(dbuf, dlen);
1217         VN_RELE(vp);
1218         if (pvp)
1219                 VN_RELE(pvp);
1220         pn_free(&pn);
1221         pn_free(&rpn);
1222
1223         return (err);
1224 }
1225
1226 /*
1227  * The additional flag, LOOKUP_CHECKREAD, is used to enforce artificial
1228  * constraints in order to be standards compliant.  For example, if we have
1229  * the cached path of '/foo/bar', and '/foo' has permissions 100 (execute
1230  * only), then we can legitimately look up the path to the current working
1231  * directory without needing read permission.  Existing standards tests,
1232  * however, assume that we are determining the path by repeatedly looking up
1233  * "..".  We need to keep this behavior in order to maintain backwards
1234  * compatibility.
1235  */
1236 static int
1237 vnodetopath_common(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen,
1238     cred_t *cr, int flags)
1239 {
1240         pathname_t pn, rpn;
1241         int ret;
1242         vnode_t *compvp, *realvp;
1243         proc_t *p = curproc;
1244         int doclose = 0;
1245
1246         /*
1247          * If vrootp is NULL, get the root for curproc.  Callers with any other
1248          * requirements should pass in a different vrootp.
1249          */
1250         if (vrootp == NULL) {
1251                 mutex_enter(&p->p_lock);
1252                 if ((vrootp = PTOU(p)->u_rdir) == NULL)
1253                         vrootp = rootdir;
1254                 VN_HOLD(vrootp);
1255                 mutex_exit(&p->p_lock);
1256         } else {
1257                 VN_HOLD(vrootp);
1258         }
1259
1260         /*
1261          * This is to get around an annoying artifact of the /proc filesystem,
1262          * which is the behavior of {cwd/root}.  Trying to resolve this path
1263          * will result in /proc/pid/cwd instead of whatever the real working
1264          * directory is.  We can't rely on VOP_REALVP(), since that will break
1265          * lofs.  The only difference between procfs and lofs is that opening
1266          * the file will return the underling vnode in the case of procfs.
1267          */
1268         if (vp->v_type == VDIR && VOP_REALVP(vp, &realvp, NULL) == 0 &&
1269             realvp != vp) {
1270                 VN_HOLD(vp);
1271                 if (VOP_OPEN(&vp, FREAD, cr, NULL) == 0)
1272                         doclose = 1;
1273                 else
1274                         VN_RELE(vp);
1275         }
1276
1277         pn_alloc(&pn);
1278
1279         /*
1280          * Check to see if we have a cached path in the vnode.
1281          */
1282         mutex_enter(&vp->v_lock);
1283         if (vp->v_path != NULL) {
1284                 (void) pn_set(&pn, vp->v_path);
1285                 mutex_exit(&vp->v_lock);
1286
1287                 pn_alloc(&rpn);
1288
1289                 /* We should only cache absolute paths */
1290                 ASSERT(pn.pn_buf[0] == '/');
1291
1292                 /*
1293                  * If we are in a zone or a chroot environment, then we have to
1294                  * take additional steps, since the path to the root might not
1295                  * be readable with the current credentials, even though the
1296                  * process can legitmately access the file.  In this case, we
1297                  * do the following:
1298                  *
1299                  * lookuppnvp() with all privileges to get the resolved path.
1300                  * call localpath() to get the local portion of the path, and
1301                  * continue as normal.
1302                  *
1303                  * If the the conversion to a local path fails, then we continue
1304                  * as normal.  This is a heuristic to make process object file
1305                  * paths available from within a zone.  Because lofs doesn't
1306                  * support page operations, the vnode stored in the seg_t is
1307                  * actually the underlying real vnode, not the lofs node itself.
1308                  * Most of the time, the lofs path is the same as the underlying
1309                  * vnode (for example, /usr/lib/libc.so.1).
1310                  */
1311                 if (vrootp != rootdir) {
1312                         char *local = NULL;
1313                         VN_HOLD(rootdir);
1314                         if (lookuppnvp(&pn, &rpn, FOLLOW,
1315                             NULL, &compvp, rootdir, rootdir, kcred) == 0) {
1316                                 local = localpath(rpn.pn_path, vrootp,
1317                                     kcred);
1318                                 VN_RELE(compvp);
1319                         }
1320
1321                         /*
1322                          * The original pn was changed through lookuppnvp().
1323                          * Set it to local for next validation attempt.
1324                          */
1325                         if (local) {
1326                                 (void) pn_set(&pn, local);
1327                         } else {
1328                                 goto notcached;
1329                         }
1330                 }
1331
1332                 /*
1333                  * We should have a local path at this point, so start the
1334                  * search from the root of the current process.
1335                  */
1336                 VN_HOLD(vrootp);
1337                 if (vrootp != rootdir)
1338                         VN_HOLD(vrootp);
1339                 ret = lookuppnvp(&pn, &rpn, FOLLOW | flags, NULL,
1340                     &compvp, vrootp, vrootp, cr);
1341                 if (ret == 0) {
1342                         /*
1343                          * Check to see if the returned vnode is the same as
1344                          * the one we expect.  If not, give up.
1345                          */
1346                         if (!vn_compare(vp, compvp) &&
1347                             !vnode_match(vp, compvp, cr)) {
1348                                 VN_RELE(compvp);
1349                                 goto notcached;
1350                         }
1351
1352                         VN_RELE(compvp);
1353
1354                         /*
1355                          * Return the result.
1356                          */
1357                         if (buflen <= rpn.pn_pathlen)
1358                                 goto notcached;
1359
1360                         bcopy(rpn.pn_path, buf, rpn.pn_pathlen + 1);
1361                         pn_free(&pn);
1362                         pn_free(&rpn);
1363                         VN_RELE(vrootp);
1364                         if (doclose) {
1365                                 (void) VOP_CLOSE(vp, FREAD, 1, 0, cr, NULL);
1366                                 VN_RELE(vp);
1367                         }
1368                         return (0);
1369                 }
1370
1371 notcached:
1372                 pn_free(&rpn);
1373         } else {
1374                 mutex_exit(&vp->v_lock);
1375         }
1376
1377         pn_free(&pn);
1378
1379         if (vp->v_type != VDIR) {
1380                 ret = ENOENT;
1381         } else {
1382                 ret = dirtopath(vrootp, vp, buf, buflen, flags, cr);
1383         }
1384
1385         VN_RELE(vrootp);
1386         if (doclose) {
1387                 (void) VOP_CLOSE(vp, FREAD, 1, 0, cr, NULL);
1388                 VN_RELE(vp);
1389         }
1390
1391         return (ret);
1392 }
1393
1394 int
1395 vnodetopath(vnode_t *vrootp, vnode_t *vp, char *buf, size_t buflen, cred_t *cr)
1396 {
1397         return (vnodetopath_common(vrootp, vp, buf, buflen, cr, 0));
1398 }
1399
1400 int
1401 dogetcwd(char *buf, size_t buflen)
1402 {
1403         int ret;
1404         vnode_t *vp;
1405         vnode_t *compvp;
1406         refstr_t *cwd, *oldcwd;
1407         const char *value;
1408         pathname_t rpnp, pnp;
1409         proc_t *p = curproc;
1410
1411         /*
1412          * Check to see if there is a cached version of the cwd.  If so, lookup
1413          * the cached value and make sure it is the same vnode.
1414          */
1415         mutex_enter(&p->p_lock);
1416         if ((cwd = PTOU(p)->u_cwd) != NULL)
1417                 refstr_hold(cwd);
1418         vp = PTOU(p)->u_cdir;
1419         VN_HOLD(vp);
1420         mutex_exit(&p->p_lock);
1421
1422         /*
1423          * Make sure we have permission to access the current directory.
1424          */
1425         if ((ret = VOP_ACCESS(vp, VEXEC, 0, CRED(), NULL)) != 0) {
1426                 if (cwd != NULL)
1427                         refstr_rele(cwd);
1428                 VN_RELE(vp);
1429                 return (ret);
1430         }
1431
1432         if (cwd) {
1433                 value = refstr_value(cwd);
1434                 if ((ret = pn_get((char *)value, UIO_SYSSPACE, &pnp)) != 0) {
1435                         refstr_rele(cwd);
1436                         VN_RELE(vp);
1437                         return (ret);
1438                 }
1439
1440                 pn_alloc(&rpnp);
1441
1442                 if (lookuppn(&pnp, &rpnp, NO_FOLLOW, NULL, &compvp) == 0) {
1443
1444                         if (VN_CMP(vp, compvp) &&
1445                             strcmp(value, rpnp.pn_path) == 0) {
1446                                 VN_RELE(compvp);
1447                                 VN_RELE(vp);
1448                                 pn_free(&pnp);
1449                                 pn_free(&rpnp);
1450                                 if (strlen(value) + 1 > buflen) {
1451                                         refstr_rele(cwd);
1452                                         return (ENAMETOOLONG);
1453                                 }
1454                                 bcopy(value, buf, strlen(value) + 1);
1455                                 refstr_rele(cwd);
1456                                 return (0);
1457                         }
1458
1459                         VN_RELE(compvp);
1460                 }
1461
1462                 pn_free(&rpnp);
1463                 pn_free(&pnp);
1464
1465                 refstr_rele(cwd);
1466         }
1467
1468         ret = vnodetopath_common(NULL, vp, buf, buflen, CRED(),
1469             LOOKUP_CHECKREAD);
1470
1471         VN_RELE(vp);
1472
1473         /*
1474          * Store the new cwd and replace the existing cached copy.
1475          */
1476         if (ret == 0)
1477                 cwd = refstr_alloc(buf);
1478         else
1479                 cwd = NULL;
1480
1481         mutex_enter(&p->p_lock);
1482         oldcwd = PTOU(p)->u_cwd;
1483         PTOU(p)->u_cwd = cwd;
1484         mutex_exit(&p->p_lock);
1485
1486         if (oldcwd)
1487                 refstr_rele(oldcwd);
1488
1489         return (ret);
1490 }