2 * Copyright (c) 1982, 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * @(#)vfs_lookup.c 8.4 (Berkeley) 2/16/94
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
40 #include "opt_ktrace.h"
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/kernel.h>
47 #include <sys/fcntl.h>
49 #include <sys/mutex.h>
50 #include <sys/namei.h>
51 #include <sys/vnode.h>
52 #include <sys/mount.h>
53 #include <sys/filedesc.h>
55 #include <sys/syscallsubr.h>
56 #include <sys/sysctl.h>
58 #include <sys/ktrace.h>
61 #include <security/audit/audit.h>
62 #include <security/mac/mac_framework.h>
66 #define NAMEI_DIAGNOSTIC 1
67 #undef NAMEI_DIAGNOSTIC
70 * Allocation zone for namei
72 uma_zone_t namei_zone
;
74 * Placeholder vnode for mp traversal
76 static struct vnode
*vp_crossmp
;
79 nameiinit(void *dummy __unused
)
83 namei_zone
= uma_zcreate("NAMEI", MAXPATHLEN
, NULL
, NULL
, NULL
, NULL
,
85 error
= getnewvnode("crossmp", NULL
, &dead_vnodeops
, &vp_crossmp
);
87 panic("nameiinit: getnewvnode");
88 VN_LOCK_ASHARE(vp_crossmp
);
90 SYSINIT(vfs
, SI_SUB_VFS
, SI_ORDER_SECOND
, nameiinit
, NULL
);
93 static int lookup_shared
= 1;
95 static int lookup_shared
= 0;
97 SYSCTL_INT(_vfs
, OID_AUTO
, lookup_shared
, CTLFLAG_RW
, &lookup_shared
, 0,
98 "Enables/Disables shared locks for path name translation");
101 * Convert a pathname into a pointer to a locked vnode.
103 * The FOLLOW flag is set when symbolic links are to be followed
104 * when they occur at the end of the name translation process.
105 * Symbolic links are always followed for all other pathname
106 * components other than the last.
108 * The segflg defines whether the name is to be copied from user
109 * space or kernel space.
111 * Overall outline of namei:
114 * get starting directory
115 * while (!done && !error) {
116 * call lookup to search path.
117 * if symbolic link, massage name in buffer and continue
121 namei(struct nameidata
*ndp
)
123 struct filedesc
*fdp
; /* pointer to file descriptor state */
124 char *cp
; /* pointer into pathname argument */
125 struct vnode
*dp
; /* the directory we are searching */
126 struct iovec aiov
; /* uio for reading symbolic links */
129 struct componentname
*cnp
= &ndp
->ni_cnd
;
130 struct thread
*td
= cnp
->cn_thread
;
131 struct proc
*p
= td
->td_proc
;
134 KASSERT((cnp
->cn_flags
& MPSAFE
) != 0 || mtx_owned(&Giant
) != 0,
135 ("NOT MPSAFE and Giant not held"));
136 ndp
->ni_cnd
.cn_cred
= ndp
->ni_cnd
.cn_thread
->td_ucred
;
137 KASSERT(cnp
->cn_cred
&& p
, ("namei: bad cred/proc"));
138 KASSERT((cnp
->cn_nameiop
& (~OPMASK
)) == 0,
139 ("namei: nameiop contaminated with flags"));
140 KASSERT((cnp
->cn_flags
& OPMASK
) == 0,
141 ("namei: flags contaminated with nameiops"));
143 cnp
->cn_flags
&= ~LOCKSHARED
;
147 * Get a buffer for the name to be translated, and copy the
148 * name into the buffer.
150 if ((cnp
->cn_flags
& HASBUF
) == 0)
151 cnp
->cn_pnbuf
= uma_zalloc(namei_zone
, M_WAITOK
);
152 if (ndp
->ni_segflg
== UIO_SYSSPACE
)
153 error
= copystr(ndp
->ni_dirp
, cnp
->cn_pnbuf
,
154 MAXPATHLEN
, (size_t *)&ndp
->ni_pathlen
);
156 error
= copyinstr(ndp
->ni_dirp
, cnp
->cn_pnbuf
,
157 MAXPATHLEN
, (size_t *)&ndp
->ni_pathlen
);
159 /* If we are auditing the kernel pathname, save the user pathname. */
160 if (cnp
->cn_flags
& AUDITVNODE1
)
161 AUDIT_ARG(upath
, td
, cnp
->cn_pnbuf
, ARG_UPATH1
);
162 if (cnp
->cn_flags
& AUDITVNODE2
)
163 AUDIT_ARG(upath
, td
, cnp
->cn_pnbuf
, ARG_UPATH2
);
166 * Don't allow empty pathnames.
168 if (!error
&& *cnp
->cn_pnbuf
== '\0')
172 uma_zfree(namei_zone
, cnp
->cn_pnbuf
);
174 cnp
->cn_pnbuf
= NULL
;
175 cnp
->cn_nameptr
= NULL
;
182 if (KTRPOINT(td
, KTR_NAMEI
)) {
183 KASSERT(cnp
->cn_thread
== curthread
,
184 ("namei not using curthread"));
185 ktrnamei(cnp
->cn_pnbuf
);
190 * Get starting point for the translation.
193 ndp
->ni_rootdir
= fdp
->fd_rdir
;
194 ndp
->ni_topdir
= fdp
->fd_jdir
;
196 if (cnp
->cn_pnbuf
[0] != '/' && ndp
->ni_dirfd
!= AT_FDCWD
) {
197 error
= fgetvp(td
, ndp
->ni_dirfd
, &dp
);
198 FILEDESC_SUNLOCK(fdp
);
199 if (error
== 0 && dp
->v_type
!= VDIR
) {
200 vfslocked
= VFS_LOCK_GIANT(dp
->v_mount
);
202 VFS_UNLOCK_GIANT(vfslocked
);
206 uma_zfree(namei_zone
, cnp
->cn_pnbuf
);
208 cnp
->cn_pnbuf
= NULL
;
209 cnp
->cn_nameptr
= NULL
;
216 FILEDESC_SUNLOCK(fdp
);
218 vfslocked
= VFS_LOCK_GIANT(dp
->v_mount
);
221 * Check if root directory should replace current directory.
222 * Done at start of translation and after symbolic link.
224 cnp
->cn_nameptr
= cnp
->cn_pnbuf
;
225 if (*(cnp
->cn_nameptr
) == '/') {
227 VFS_UNLOCK_GIANT(vfslocked
);
228 while (*(cnp
->cn_nameptr
) == '/') {
232 dp
= ndp
->ni_rootdir
;
233 vfslocked
= VFS_LOCK_GIANT(dp
->v_mount
);
237 ndp
->ni_cnd
.cn_flags
|= GIANTHELD
;
238 ndp
->ni_startdir
= dp
;
241 uma_zfree(namei_zone
, cnp
->cn_pnbuf
);
243 cnp
->cn_pnbuf
= NULL
;
244 cnp
->cn_nameptr
= NULL
;
248 vfslocked
= (ndp
->ni_cnd
.cn_flags
& GIANTHELD
) != 0;
249 ndp
->ni_cnd
.cn_flags
&= ~GIANTHELD
;
251 * Check for symbolic link
253 if ((cnp
->cn_flags
& ISSYMLINK
) == 0) {
254 if ((cnp
->cn_flags
& (SAVENAME
| SAVESTART
)) == 0) {
255 uma_zfree(namei_zone
, cnp
->cn_pnbuf
);
257 cnp
->cn_pnbuf
= NULL
;
258 cnp
->cn_nameptr
= NULL
;
261 cnp
->cn_flags
|= HASBUF
;
263 if ((cnp
->cn_flags
& MPSAFE
) == 0) {
264 VFS_UNLOCK_GIANT(vfslocked
);
265 } else if (vfslocked
)
266 ndp
->ni_cnd
.cn_flags
|= GIANTHELD
;
269 if (ndp
->ni_loopcnt
++ >= MAXSYMLINKS
) {
274 if ((cnp
->cn_flags
& NOMACCHECK
) == 0) {
275 error
= mac_vnode_check_readlink(td
->td_ucred
,
281 if (ndp
->ni_pathlen
> 1)
282 cp
= uma_zalloc(namei_zone
, M_WAITOK
);
286 aiov
.iov_len
= MAXPATHLEN
;
287 auio
.uio_iov
= &aiov
;
290 auio
.uio_rw
= UIO_READ
;
291 auio
.uio_segflg
= UIO_SYSSPACE
;
292 auio
.uio_td
= (struct thread
*)0;
293 auio
.uio_resid
= MAXPATHLEN
;
294 error
= VOP_READLINK(ndp
->ni_vp
, &auio
, cnp
->cn_cred
);
296 if (ndp
->ni_pathlen
> 1)
297 uma_zfree(namei_zone
, cp
);
300 linklen
= MAXPATHLEN
- auio
.uio_resid
;
302 if (ndp
->ni_pathlen
> 1)
303 uma_zfree(namei_zone
, cp
);
307 if (linklen
+ ndp
->ni_pathlen
>= MAXPATHLEN
) {
308 if (ndp
->ni_pathlen
> 1)
309 uma_zfree(namei_zone
, cp
);
310 error
= ENAMETOOLONG
;
313 if (ndp
->ni_pathlen
> 1) {
314 bcopy(ndp
->ni_next
, cp
+ linklen
, ndp
->ni_pathlen
);
315 uma_zfree(namei_zone
, cnp
->cn_pnbuf
);
318 cnp
->cn_pnbuf
[linklen
] = '\0';
319 ndp
->ni_pathlen
+= linklen
;
323 uma_zfree(namei_zone
, cnp
->cn_pnbuf
);
325 cnp
->cn_pnbuf
= NULL
;
326 cnp
->cn_nameptr
= NULL
;
331 VFS_UNLOCK_GIANT(vfslocked
);
336 compute_cn_lkflags(struct mount
*mp
, int lkflags
)
339 ((lkflags
& LK_SHARED
) && !(mp
->mnt_kern_flag
& MNTK_LOOKUP_SHARED
))) {
340 lkflags
&= ~LK_SHARED
;
341 lkflags
|= LK_EXCLUSIVE
;
348 * This is a very central and rather complicated routine.
350 * The pathname is pointed to by ni_ptr and is of length ni_pathlen.
351 * The starting directory is taken from ni_startdir. The pathname is
352 * descended until done, or a symbolic link is encountered. The variable
353 * ni_more is clear if the path is completed; it is set to one if a
354 * symbolic link needing interpretation is encountered.
356 * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
357 * whether the name is to be looked up, created, renamed, or deleted.
358 * When CREATE, RENAME, or DELETE is specified, information usable in
359 * creating, renaming, or deleting a directory entry may be calculated.
360 * If flag has LOCKPARENT or'ed into it, the parent directory is returned
361 * locked. If flag has WANTPARENT or'ed into it, the parent directory is
362 * returned unlocked. Otherwise the parent directory is not returned. If
363 * the target of the pathname exists and LOCKLEAF is or'ed into the flag
364 * the target is returned locked, otherwise it is returned unlocked.
365 * When creating or renaming and LOCKPARENT is specified, the target may not
366 * be ".". When deleting and LOCKPARENT is specified, the target may be ".".
368 * Overall outline of lookup:
371 * identify next component of name at ndp->ni_ptr
372 * handle degenerate case where name is null string
373 * if .. and crossing mount points and on mounted filesys, find parent
374 * call VOP_LOOKUP routine for next component name
375 * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set
376 * component vnode returned in ni_vp (if it exists), locked.
377 * if result vnode is mounted on and crossing mount points,
378 * find mounted on vnode
379 * if more components of name, do next level at dirloop
380 * return the answer in ni_vp, locked if LOCKLEAF set
381 * if LOCKPARENT set, return locked parent in ni_dvp
382 * if WANTPARENT set, return unlocked parent in ni_dvp
385 lookup(struct nameidata
*ndp
)
387 char *cp
; /* pointer into pathname argument */
388 struct vnode
*dp
= 0; /* the directory we are searching */
389 struct vnode
*tdp
; /* saved dp */
390 struct mount
*mp
; /* mount table entry */
391 int docache
; /* == 0 do not cache last component */
392 int wantparent
; /* 1 => wantparent or lockparent flag */
393 int rdonly
; /* lookup read-only flag bit */
396 int dpunlocked
= 0; /* dp has already been unlocked */
397 struct componentname
*cnp
= &ndp
->ni_cnd
;
398 struct thread
*td
= cnp
->cn_thread
;
399 int vfslocked
; /* VFS Giant state for child */
400 int dvfslocked
; /* VFS Giant state for parent */
405 * Setup: break out flag bits into variables.
407 dvfslocked
= (ndp
->ni_cnd
.cn_flags
& GIANTHELD
) != 0;
409 ndp
->ni_cnd
.cn_flags
&= ~GIANTHELD
;
410 wantparent
= cnp
->cn_flags
& (LOCKPARENT
| WANTPARENT
);
411 KASSERT(cnp
->cn_nameiop
== LOOKUP
|| wantparent
,
412 ("CREATE, DELETE, RENAME require LOCKPARENT or WANTPARENT."));
413 docache
= (cnp
->cn_flags
& NOCACHE
) ^ NOCACHE
;
414 if (cnp
->cn_nameiop
== DELETE
||
415 (wantparent
&& cnp
->cn_nameiop
!= CREATE
&&
416 cnp
->cn_nameiop
!= LOOKUP
))
418 rdonly
= cnp
->cn_flags
& RDONLY
;
419 cnp
->cn_flags
&= ~ISSYMLINK
;
422 * We use shared locks until we hit the parent of the last cn then
423 * we adjust based on the requesting flags.
426 cnp
->cn_lkflags
= LK_SHARED
;
428 cnp
->cn_lkflags
= LK_EXCLUSIVE
;
429 dp
= ndp
->ni_startdir
;
430 ndp
->ni_startdir
= NULLVP
;
432 compute_cn_lkflags(dp
->v_mount
, cnp
->cn_lkflags
| LK_RETRY
));
436 * Search a new directory.
438 * The last component of the filename is left accessible via
439 * cnp->cn_nameptr for callers that need the name. Callers needing
440 * the name set the SAVENAME flag. When done, they assume
441 * responsibility for freeing the pathname buffer.
444 for (cp
= cnp
->cn_nameptr
; *cp
!= 0 && *cp
!= '/'; cp
++)
446 cnp
->cn_namelen
= cp
- cnp
->cn_nameptr
;
447 if (cnp
->cn_namelen
> NAME_MAX
) {
448 error
= ENAMETOOLONG
;
451 #ifdef NAMEI_DIAGNOSTIC
454 printf("{%s}: ", cnp
->cn_nameptr
);
457 ndp
->ni_pathlen
-= cnp
->cn_namelen
;
461 * Replace multiple slashes by a single slash and trailing slashes
462 * by a null. This must be done before VOP_LOOKUP() because some
463 * fs's don't know about trailing slashes. Remember if there were
464 * trailing slashes to handle symlinks, existing non-directories
465 * and non-existing files that won't be directories specially later.
468 while (*cp
== '/' && (cp
[1] == '/' || cp
[1] == '\0')) {
473 *ndp
->ni_next
= '\0'; /* XXX for direnter() ... */
478 cnp
->cn_flags
|= MAKEENTRY
;
479 if (*cp
== '\0' && docache
== 0)
480 cnp
->cn_flags
&= ~MAKEENTRY
;
481 if (cnp
->cn_namelen
== 2 &&
482 cnp
->cn_nameptr
[1] == '.' && cnp
->cn_nameptr
[0] == '.')
483 cnp
->cn_flags
|= ISDOTDOT
;
485 cnp
->cn_flags
&= ~ISDOTDOT
;
486 if (*ndp
->ni_next
== 0)
487 cnp
->cn_flags
|= ISLASTCN
;
489 cnp
->cn_flags
&= ~ISLASTCN
;
493 * Check for degenerate name (e.g. / or "")
494 * which is a way of talking about a directory,
495 * e.g. like "/." or ".".
497 if (cnp
->cn_nameptr
[0] == '\0') {
498 if (dp
->v_type
!= VDIR
) {
502 if (cnp
->cn_nameiop
!= LOOKUP
) {
512 if (cnp
->cn_flags
& AUDITVNODE1
)
513 AUDIT_ARG(vnode
, dp
, ARG_VNODE1
);
514 else if (cnp
->cn_flags
& AUDITVNODE2
)
515 AUDIT_ARG(vnode
, dp
, ARG_VNODE2
);
517 if (!(cnp
->cn_flags
& (LOCKPARENT
| LOCKLEAF
)))
519 /* XXX This should probably move to the top of function. */
520 if (cnp
->cn_flags
& SAVESTART
)
521 panic("lookup: SAVESTART");
526 * Handle "..": four special cases.
527 * 1. Return an error if this is the last component of
528 * the name and the operation is DELETE or RENAME.
529 * 2. If at root directory (e.g. after chroot)
530 * or at absolute root directory
531 * then ignore it so can't get out.
532 * 3. If this vnode is the root of a mounted
533 * filesystem, then replace it with the
534 * vnode which was mounted on so we take the
535 * .. in the other filesystem.
536 * 4. If the vnode is the top directory of
537 * the jail or chroot, don't let them out.
539 if (cnp
->cn_flags
& ISDOTDOT
) {
540 if ((cnp
->cn_flags
& ISLASTCN
) != 0 &&
541 (cnp
->cn_nameiop
== DELETE
|| cnp
->cn_nameiop
== RENAME
)) {
546 if (dp
== ndp
->ni_rootdir
||
547 dp
== ndp
->ni_topdir
||
549 ((dp
->v_vflag
& VV_ROOT
) != 0 &&
550 (cnp
->cn_flags
& NOCROSSMOUNT
) != 0)) {
553 vfslocked
= VFS_LOCK_GIANT(dp
->v_mount
);
557 if ((dp
->v_vflag
& VV_ROOT
) == 0)
559 if (dp
->v_iflag
& VI_DOOMED
) { /* forced unmount */
564 dp
= dp
->v_mount
->mnt_vnodecovered
;
565 tvfslocked
= dvfslocked
;
566 dvfslocked
= VFS_LOCK_GIANT(dp
->v_mount
);
569 VFS_UNLOCK_GIANT(tvfslocked
);
571 compute_cn_lkflags(dp
->v_mount
, cnp
->cn_lkflags
|
577 * We now have a segment name to search for, and a directory to search.
581 if ((cnp
->cn_flags
& NOMACCHECK
) == 0) {
582 error
= mac_vnode_check_lookup(td
->td_ucred
, dp
, cnp
);
589 ASSERT_VOP_LOCKED(dp
, "lookup");
590 VNASSERT(vfslocked
== 0, dp
, ("lookup: vfslocked %d", vfslocked
));
592 * If we have a shared lock we may need to upgrade the lock for the
595 if (dp
!= vp_crossmp
&&
596 VOP_ISLOCKED(dp
) == LK_SHARED
&&
597 (cnp
->cn_flags
& ISLASTCN
) && (cnp
->cn_flags
& LOCKPARENT
))
598 vn_lock(dp
, LK_UPGRADE
|LK_RETRY
);
600 * If we're looking up the last component and we need an exclusive
601 * lock, adjust our lkflags.
603 if ((cnp
->cn_flags
& (ISLASTCN
|LOCKSHARED
|LOCKLEAF
)) ==
605 cnp
->cn_lkflags
= LK_EXCLUSIVE
;
606 #ifdef NAMEI_DIAGNOSTIC
607 vprint("lookup in", dp
);
609 lkflags_save
= cnp
->cn_lkflags
;
610 cnp
->cn_lkflags
= compute_cn_lkflags(dp
->v_mount
, cnp
->cn_lkflags
);
611 if ((error
= VOP_LOOKUP(dp
, &ndp
->ni_vp
, cnp
)) != 0) {
612 cnp
->cn_lkflags
= lkflags_save
;
613 KASSERT(ndp
->ni_vp
== NULL
, ("leaf should be empty"));
614 #ifdef NAMEI_DIAGNOSTIC
615 printf("not found\n");
617 if ((error
== ENOENT
) &&
618 (dp
->v_vflag
& VV_ROOT
) && (dp
->v_mount
!= NULL
) &&
619 (dp
->v_mount
->mnt_flag
& MNT_UNION
)) {
621 dp
= dp
->v_mount
->mnt_vnodecovered
;
622 tvfslocked
= dvfslocked
;
623 dvfslocked
= VFS_LOCK_GIANT(dp
->v_mount
);
626 VFS_UNLOCK_GIANT(tvfslocked
);
628 compute_cn_lkflags(dp
->v_mount
, cnp
->cn_lkflags
|
633 if (error
!= EJUSTRETURN
)
636 * If creating and at end of pathname, then can consider
637 * allowing file to be created.
643 if (*cp
== '\0' && trailing_slash
&&
644 !(cnp
->cn_flags
& WILLBEDIR
)) {
648 if ((cnp
->cn_flags
& LOCKPARENT
) == 0)
651 * This is a temporary assert to make sure I know what the
654 KASSERT((cnp
->cn_flags
& (WANTPARENT
|LOCKPARENT
)) != 0,
655 ("lookup: Unhandled case."));
657 * We return with ni_vp NULL to indicate that the entry
658 * doesn't currently exist, leaving a pointer to the
659 * (possibly locked) directory vnode in ndp->ni_dvp.
661 if (cnp
->cn_flags
& SAVESTART
) {
662 ndp
->ni_startdir
= ndp
->ni_dvp
;
663 VREF(ndp
->ni_startdir
);
667 cnp
->cn_lkflags
= lkflags_save
;
668 #ifdef NAMEI_DIAGNOSTIC
672 * Take into account any additional components consumed by
673 * the underlying filesystem.
675 if (cnp
->cn_consume
> 0) {
676 cnp
->cn_nameptr
+= cnp
->cn_consume
;
677 ndp
->ni_next
+= cnp
->cn_consume
;
678 ndp
->ni_pathlen
-= cnp
->cn_consume
;
683 vfslocked
= VFS_LOCK_GIANT(dp
->v_mount
);
686 * Check to see if the vnode has been mounted on;
687 * if so find the root of the mounted filesystem.
689 while (dp
->v_type
== VDIR
&& (mp
= dp
->v_mountedhere
) &&
690 (cnp
->cn_flags
& NOCROSSMOUNT
) == 0) {
691 if (vfs_busy(mp
, 0, 0))
694 VFS_UNLOCK_GIANT(vfslocked
);
695 vfslocked
= VFS_LOCK_GIANT(mp
);
696 if (dp
!= ndp
->ni_dvp
)
700 VFS_UNLOCK_GIANT(dvfslocked
);
703 ndp
->ni_dvp
= vp_crossmp
;
704 error
= VFS_ROOT(mp
, compute_cn_lkflags(mp
, cnp
->cn_lkflags
), &tdp
, td
);
706 if (vn_lock(vp_crossmp
, LK_SHARED
| LK_NOWAIT
))
707 panic("vp_crossmp exclusively locked or reclaimed");
712 ndp
->ni_vp
= dp
= tdp
;
716 * Check for symbolic link
718 if ((dp
->v_type
== VLNK
) &&
719 ((cnp
->cn_flags
& FOLLOW
) || trailing_slash
||
720 *ndp
->ni_next
== '/')) {
721 cnp
->cn_flags
|= ISSYMLINK
;
722 if (dp
->v_iflag
& VI_DOOMED
) {
723 /* We can't know whether the directory was mounted with
724 * NOSYMFOLLOW, so we can't follow safely. */
728 if (dp
->v_mount
->mnt_flag
& MNT_NOSYMFOLLOW
) {
733 * Symlink code always expects an unlocked dvp.
735 if (ndp
->ni_dvp
!= ndp
->ni_vp
)
736 VOP_UNLOCK(ndp
->ni_dvp
, 0);
741 * Check for bogus trailing slashes.
743 if (trailing_slash
&& dp
->v_type
!= VDIR
) {
750 * Not a symbolic link. If more pathname,
751 * continue at next component, else return.
753 KASSERT((cnp
->cn_flags
& ISLASTCN
) || *ndp
->ni_next
== '/',
754 ("lookup: invalid path state."));
755 if (*ndp
->ni_next
== '/') {
756 cnp
->cn_nameptr
= ndp
->ni_next
;
757 while (*cnp
->cn_nameptr
== '/') {
761 if (ndp
->ni_dvp
!= dp
)
765 VFS_UNLOCK_GIANT(dvfslocked
);
766 dvfslocked
= vfslocked
; /* dp becomes dvp in dirloop */
771 * Disallow directory write attempts on read-only filesystems.
774 (cnp
->cn_nameiop
== DELETE
|| cnp
->cn_nameiop
== RENAME
)) {
778 if (cnp
->cn_flags
& SAVESTART
) {
779 ndp
->ni_startdir
= ndp
->ni_dvp
;
780 VREF(ndp
->ni_startdir
);
783 if (ndp
->ni_dvp
!= dp
)
787 VFS_UNLOCK_GIANT(dvfslocked
);
789 } else if ((cnp
->cn_flags
& LOCKPARENT
) == 0 && ndp
->ni_dvp
!= dp
)
790 VOP_UNLOCK(ndp
->ni_dvp
, 0);
792 if (cnp
->cn_flags
& AUDITVNODE1
)
793 AUDIT_ARG(vnode
, dp
, ARG_VNODE1
);
794 else if (cnp
->cn_flags
& AUDITVNODE2
)
795 AUDIT_ARG(vnode
, dp
, ARG_VNODE2
);
797 if ((cnp
->cn_flags
& LOCKLEAF
) == 0)
801 * Because of lookup_shared we may have the vnode shared locked, but
802 * the caller may want it to be exclusively locked.
804 if ((cnp
->cn_flags
& (ISLASTCN
| LOCKSHARED
| LOCKLEAF
)) ==
805 (ISLASTCN
| LOCKLEAF
) && VOP_ISLOCKED(dp
) != LK_EXCLUSIVE
) {
806 vn_lock(dp
, LK_UPGRADE
| LK_RETRY
);
808 if (vfslocked
&& dvfslocked
)
809 VFS_UNLOCK_GIANT(dvfslocked
); /* Only need one */
810 if (vfslocked
|| dvfslocked
)
811 ndp
->ni_cnd
.cn_flags
|= GIANTHELD
;
815 if (dp
!= ndp
->ni_dvp
)
822 VFS_UNLOCK_GIANT(vfslocked
);
823 VFS_UNLOCK_GIANT(dvfslocked
);
824 ndp
->ni_cnd
.cn_flags
&= ~GIANTHELD
;
830 * relookup - lookup a path name component
831 * Used by lookup to re-acquire things.
834 relookup(struct vnode
*dvp
, struct vnode
**vpp
, struct componentname
*cnp
)
836 struct vnode
*dp
= 0; /* the directory we are searching */
837 int wantparent
; /* 1 => wantparent or lockparent flag */
838 int rdonly
; /* lookup read-only flag bit */
841 KASSERT(cnp
->cn_flags
& ISLASTCN
,
842 ("relookup: Not given last component."));
844 * Setup: break out flag bits into variables.
846 wantparent
= cnp
->cn_flags
& (LOCKPARENT
|WANTPARENT
);
847 KASSERT(wantparent
, ("relookup: parent not wanted."));
848 rdonly
= cnp
->cn_flags
& RDONLY
;
849 cnp
->cn_flags
&= ~ISSYMLINK
;
851 cnp
->cn_lkflags
= LK_EXCLUSIVE
;
852 vn_lock(dp
, LK_EXCLUSIVE
| LK_RETRY
);
855 * Search a new directory.
857 * The last component of the filename is left accessible via
858 * cnp->cn_nameptr for callers that need the name. Callers needing
859 * the name set the SAVENAME flag. When done, they assume
860 * responsibility for freeing the pathname buffer.
862 #ifdef NAMEI_DIAGNOSTIC
863 printf("{%s}: ", cnp
->cn_nameptr
);
867 * Check for degenerate name (e.g. / or "")
868 * which is a way of talking about a directory,
869 * e.g. like "/." or ".".
871 if (cnp
->cn_nameptr
[0] == '\0') {
872 if (cnp
->cn_nameiop
!= LOOKUP
|| wantparent
) {
876 if (dp
->v_type
!= VDIR
) {
880 if (!(cnp
->cn_flags
& LOCKLEAF
))
883 /* XXX This should probably move to the top of function. */
884 if (cnp
->cn_flags
& SAVESTART
)
885 panic("lookup: SAVESTART");
889 if (cnp
->cn_flags
& ISDOTDOT
)
890 panic ("relookup: lookup on dot-dot");
893 * We now have a segment name to search for, and a directory to search.
895 #ifdef NAMEI_DIAGNOSTIC
896 vprint("search in:", dp
);
898 if ((error
= VOP_LOOKUP(dp
, vpp
, cnp
)) != 0) {
899 KASSERT(*vpp
== NULL
, ("leaf should be empty"));
900 if (error
!= EJUSTRETURN
)
903 * If creating and at end of pathname, then can consider
904 * allowing file to be created.
910 /* ASSERT(dvp == ndp->ni_startdir) */
911 if (cnp
->cn_flags
& SAVESTART
)
913 if ((cnp
->cn_flags
& LOCKPARENT
) == 0)
916 * This is a temporary assert to make sure I know what the
919 KASSERT((cnp
->cn_flags
& (WANTPARENT
|LOCKPARENT
)) != 0,
920 ("relookup: Unhandled case."));
922 * We return with ni_vp NULL to indicate that the entry
923 * doesn't currently exist, leaving a pointer to the
924 * (possibly locked) directory vnode in ndp->ni_dvp.
932 * Disallow directory write attempts on read-only filesystems.
935 (cnp
->cn_nameiop
== DELETE
|| cnp
->cn_nameiop
== RENAME
)) {
944 * Set the parent lock/ref state to the requested state.
946 if ((cnp
->cn_flags
& LOCKPARENT
) == 0 && dvp
!= dp
) {
951 } else if (!wantparent
)
954 * Check for symbolic link
956 KASSERT(dp
->v_type
!= VLNK
|| !(cnp
->cn_flags
& FOLLOW
),
957 ("relookup: symlink found.\n"));
959 /* ASSERT(dvp == ndp->ni_startdir) */
960 if (cnp
->cn_flags
& SAVESTART
)
963 if ((cnp
->cn_flags
& LOCKLEAF
) == 0)
973 * Free data allocated by namei(); see namei(9) for details.
976 NDFREE(struct nameidata
*ndp
, const u_int flags
)
984 if (!(flags
& NDF_NO_FREE_PNBUF
) &&
985 (ndp
->ni_cnd
.cn_flags
& HASBUF
)) {
986 uma_zfree(namei_zone
, ndp
->ni_cnd
.cn_pnbuf
);
987 ndp
->ni_cnd
.cn_flags
&= ~HASBUF
;
989 if (!(flags
& NDF_NO_VP_UNLOCK
) &&
990 (ndp
->ni_cnd
.cn_flags
& LOCKLEAF
) && ndp
->ni_vp
)
992 if (!(flags
& NDF_NO_VP_RELE
) && ndp
->ni_vp
) {
1001 VOP_UNLOCK(ndp
->ni_vp
, 0);
1002 if (!(flags
& NDF_NO_DVP_UNLOCK
) &&
1003 (ndp
->ni_cnd
.cn_flags
& LOCKPARENT
) &&
1004 ndp
->ni_dvp
!= ndp
->ni_vp
)
1006 if (!(flags
& NDF_NO_DVP_RELE
) &&
1007 (ndp
->ni_cnd
.cn_flags
& (LOCKPARENT
|WANTPARENT
))) {
1016 VOP_UNLOCK(ndp
->ni_dvp
, 0);
1017 if (!(flags
& NDF_NO_STARTDIR_RELE
) &&
1018 (ndp
->ni_cnd
.cn_flags
& SAVESTART
)) {
1019 vrele(ndp
->ni_startdir
);
1020 ndp
->ni_startdir
= NULL
;
1025 * Determine if there is a suitable alternate filename under the specified
1026 * prefix for the specified path. If the create flag is set, then the
1027 * alternate prefix will be used so long as the parent directory exists.
1028 * This is used by the various compatiblity ABIs so that Linux binaries prefer
1029 * files under /compat/linux for example. The chosen path (whether under
1030 * the prefix or under /) is returned in a kernel malloc'd buffer pointed
1031 * to by pathbuf. The caller is responsible for free'ing the buffer from
1032 * the M_TEMP bucket if one is returned.
1035 kern_alternate_path(struct thread
*td
, const char *prefix
, const char *path
,
1036 enum uio_seg pathseg
, char **pathbuf
, int create
, int dirfd
)
1038 struct nameidata nd
, ndroot
;
1039 char *ptr
, *buf
, *cp
;
1043 buf
= (char *) malloc(MAXPATHLEN
, M_TEMP
, M_WAITOK
);
1046 /* Copy the prefix into the new pathname as a starting point. */
1047 len
= strlcpy(buf
, prefix
, MAXPATHLEN
);
1048 if (len
>= MAXPATHLEN
) {
1053 sz
= MAXPATHLEN
- len
;
1056 /* Append the filename to the prefix. */
1057 if (pathseg
== UIO_SYSSPACE
)
1058 error
= copystr(path
, ptr
, sz
, &len
);
1060 error
= copyinstr(path
, ptr
, sz
, &len
);
1068 /* Only use a prefix with absolute pathnames. */
1074 if (dirfd
!= AT_FDCWD
) {
1076 * We want the original because the "prefix" is
1077 * included in the already opened dirfd.
1079 bcopy(ptr
, buf
, len
);
1084 * We know that there is a / somewhere in this pathname.
1085 * Search backwards for it, to find the file's parent dir
1086 * to see if it exists in the alternate tree. If it does,
1087 * and we want to create a file (cflag is set). We don't
1088 * need to worry about the root comparison in this case.
1092 for (cp
= &ptr
[len
] - 1; *cp
!= '/'; cp
--);
1095 NDINIT(&nd
, LOOKUP
, FOLLOW
| MPSAFE
, UIO_SYSSPACE
, buf
, td
);
1101 NDINIT(&nd
, LOOKUP
, FOLLOW
| MPSAFE
, UIO_SYSSPACE
, buf
, td
);
1108 * We now compare the vnode of the prefix to the one
1109 * vnode asked. If they resolve to be the same, then we
1110 * ignore the match so that the real root gets used.
1111 * This avoids the problem of traversing "../.." to find the
1112 * root directory and never finding it, because "/" resolves
1113 * to the emulation root directory. This is expensive :-(
1115 NDINIT(&ndroot
, LOOKUP
, FOLLOW
| MPSAFE
, UIO_SYSSPACE
, prefix
,
1118 /* We shouldn't ever get an error from this namei(). */
1119 error
= namei(&ndroot
);
1121 if (nd
.ni_vp
== ndroot
.ni_vp
)
1124 NDFREE(&ndroot
, NDF_ONLY_PNBUF
);
1125 vrele(ndroot
.ni_vp
);
1126 VFS_UNLOCK_GIANT(NDHASGIANT(&ndroot
));
1130 NDFREE(&nd
, NDF_ONLY_PNBUF
);
1132 VFS_UNLOCK_GIANT(NDHASGIANT(&nd
));
1135 /* If there was an error, use the original path name. */
1137 bcopy(ptr
, buf
, len
);