2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95
37 * $FreeBSD: /repoman/r/ncvs/src/sys/nfsclient/nfs_subs.c,v 1.128 2004/04/14 23:23:55 peadar Exp $
38 * $DragonFly: src/sys/vfs/nfs/nfs_subs.c,v 1.48 2008/09/17 21:44:24 dillon Exp $
42 * These functions support the macros and help fiddle mbuf chains for
43 * the nfs op functions. They do things like create the rpc header and
44 * copy data between mbuf chains and uio lists.
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/kernel.h>
51 #include <sys/mount.h>
52 #include <sys/vnode.h>
53 #include <sys/nlookup.h>
54 #include <sys/namei.h>
56 #include <sys/socket.h>
58 #include <sys/malloc.h>
59 #include <sys/sysent.h>
60 #include <sys/syscall.h>
62 #include <sys/objcache.h>
65 #include <vm/vm_object.h>
66 #include <vm/vm_extern.h>
67 #include <vm/vm_zone.h>
77 #include "nfsm_subs.h"
80 #include <netinet/in.h>
83 * Data items converted to xdr at startup, since they are constant
84 * This is kinda hokey, but may save a little time doing byte swaps
86 u_int32_t nfs_xdrneg1
;
87 u_int32_t rpc_reply
, rpc_msgdenied
, rpc_mismatch
, rpc_vers
;
88 u_int32_t rpc_auth_unix
, rpc_msgaccepted
, rpc_call
, rpc_autherr
;
89 u_int32_t rpc_auth_kerb
;
90 u_int32_t nfs_prog
, nfs_true
, nfs_false
;
92 /* And other global data */
93 static enum vtype nv2tov_type
[8]= {
94 VNON
, VREG
, VDIR
, VBLK
, VCHR
, VLNK
, VNON
, VNON
96 enum vtype nv3tov_type
[8]= {
97 VNON
, VREG
, VDIR
, VBLK
, VCHR
, VLNK
, VSOCK
, VFIFO
102 static int nfs_pbuf_freecnt
= -1; /* start out unlimited */
104 struct nfsmount_head nfs_mountq
= TAILQ_HEAD_INITIALIZER(nfs_mountq
);
105 struct nfssvc_sockhead nfssvc_sockhead
;
106 int nfssvc_sockhead_flag
;
107 struct nfsd_head nfsd_head
;
109 struct nfs_bufq nfs_bufq
;
110 struct nqfhhashhead
*nqfhhashtbl
;
113 static int nfs_prev_nfssvc_sy_narg
;
114 static sy_call_t
*nfs_prev_nfssvc_sy_call
;
119 * Mapping of old NFS Version 2 RPC numbers to generic numbers.
121 int nfsv3_procid
[NFS_NPROCS
] = {
150 #endif /* NFS_NOSERVER */
152 * and the reverse mapping from generic to Version 2 procedure numbers
154 int nfsv2_procid
[NFS_NPROCS
] = {
185 * Maps errno values to nfs error numbers.
186 * Use NFSERR_IO as the catch all for ones not specifically defined in
189 static u_char nfsrv_v2errmap
[ELAST
] = {
190 NFSERR_PERM
, NFSERR_NOENT
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
191 NFSERR_NXIO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
192 NFSERR_IO
, NFSERR_IO
, NFSERR_ACCES
, NFSERR_IO
, NFSERR_IO
,
193 NFSERR_IO
, NFSERR_EXIST
, NFSERR_IO
, NFSERR_NODEV
, NFSERR_NOTDIR
,
194 NFSERR_ISDIR
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
195 NFSERR_IO
, NFSERR_FBIG
, NFSERR_NOSPC
, NFSERR_IO
, NFSERR_ROFS
,
196 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
197 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
198 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
199 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
200 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
201 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
202 NFSERR_IO
, NFSERR_IO
, NFSERR_NAMETOL
, NFSERR_IO
, NFSERR_IO
,
203 NFSERR_NOTEMPTY
, NFSERR_IO
, NFSERR_IO
, NFSERR_DQUOT
, NFSERR_STALE
,
204 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
205 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
206 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
207 NFSERR_IO
/* << Last is 86 */
211 * Maps errno values to nfs error numbers.
212 * Although it is not obvious whether or not NFS clients really care if
213 * a returned error value is in the specified list for the procedure, the
214 * safest thing to do is filter them appropriately. For Version 2, the
215 * X/Open XNFS document is the only specification that defines error values
216 * for each RPC (The RFC simply lists all possible error values for all RPCs),
217 * so I have decided to not do this for Version 2.
218 * The first entry is the default error return and the rest are the valid
219 * errors for that RPC in increasing numeric order.
221 static short nfsv3err_null
[] = {
226 static short nfsv3err_getattr
[] = {
235 static short nfsv3err_setattr
[] = {
251 static short nfsv3err_lookup
[] = {
264 static short nfsv3err_access
[] = {
273 static short nfsv3err_readlink
[] = {
285 static short nfsv3err_read
[] = {
297 static short nfsv3err_write
[] = {
312 static short nfsv3err_create
[] = {
329 static short nfsv3err_mkdir
[] = {
346 static short nfsv3err_symlink
[] = {
363 static short nfsv3err_mknod
[] = {
381 static short nfsv3err_remove
[] = {
395 static short nfsv3err_rmdir
[] = {
413 static short nfsv3err_rename
[] = {
436 static short nfsv3err_link
[] = {
456 static short nfsv3err_readdir
[] = {
469 static short nfsv3err_readdirplus
[] = {
483 static short nfsv3err_fsstat
[] = {
492 static short nfsv3err_fsinfo
[] = {
500 static short nfsv3err_pathconf
[] = {
508 static short nfsv3err_commit
[] = {
517 static short *nfsrv_v3errmap
[] = {
535 nfsv3err_readdirplus
,
542 #endif /* NFS_NOSERVER */
545 extern int sys_nfssvc(struct proc
*, struct nfssvc_args
*, int *);
548 * This needs to return a monotonically increasing or close to monotonically
549 * increasing result, otherwise the write gathering queues won't work
558 return ((u_quad_t
)tv
.tv_sec
* 1000000 + (u_quad_t
)tv
.tv_usec
);
562 * Called once to initialize data structures...
565 nfs_init(struct vfsconf
*vfsp
)
567 callout_init(&nfs_timer_handle
);
568 nfsmount_zone
= zinit("NFSMOUNT", sizeof(struct nfsmount
), 0, 0, 1);
570 nfs_mount_type
= vfsp
->vfc_typenum
;
572 rpc_vers
= txdr_unsigned(RPC_VER2
);
573 rpc_call
= txdr_unsigned(RPC_CALL
);
574 rpc_reply
= txdr_unsigned(RPC_REPLY
);
575 rpc_msgdenied
= txdr_unsigned(RPC_MSGDENIED
);
576 rpc_msgaccepted
= txdr_unsigned(RPC_MSGACCEPTED
);
577 rpc_mismatch
= txdr_unsigned(RPC_MISMATCH
);
578 rpc_autherr
= txdr_unsigned(RPC_AUTHERR
);
579 rpc_auth_unix
= txdr_unsigned(RPCAUTH_UNIX
);
580 rpc_auth_kerb
= txdr_unsigned(RPCAUTH_KERB4
);
581 nfs_prog
= txdr_unsigned(NFS_PROG
);
582 nfs_true
= txdr_unsigned(TRUE
);
583 nfs_false
= txdr_unsigned(FALSE
);
584 nfs_xdrneg1
= txdr_unsigned(-1);
585 nfs_ticks
= (hz
* NFS_TICKINTVL
+ 500) / 1000;
588 nfs_nhinit(); /* Init the nfsnode table */
590 nfsrv_init(0); /* Init server data structures */
591 nfsrv_initcache(); /* Init the server request cache */
595 * Initialize reply list and start timer
599 nfs_prev_nfssvc_sy_narg
= sysent
[SYS_nfssvc
].sy_narg
;
600 sysent
[SYS_nfssvc
].sy_narg
= 2;
601 nfs_prev_nfssvc_sy_call
= sysent
[SYS_nfssvc
].sy_call
;
602 sysent
[SYS_nfssvc
].sy_call
= (sy_call_t
*)sys_nfssvc
;
604 nfs_pbuf_freecnt
= nswbuf
/ 2 + 1;
610 nfs_uninit(struct vfsconf
*vfsp
)
612 callout_stop(&nfs_timer_handle
);
614 sysent
[SYS_nfssvc
].sy_narg
= nfs_prev_nfssvc_sy_narg
;
615 sysent
[SYS_nfssvc
].sy_call
= nfs_prev_nfssvc_sy_call
;
620 * Attribute cache routines.
621 * nfs_loadattrcache() - loads or updates the cache contents from attributes
622 * that are on the mbuf list
623 * nfs_getattrcache() - returns valid attributes if found in cache, returns
628 * Load the attribute cache (that lives in the nfsnode entry) with
629 * the values on the mbuf list. Load *vaper with the attributes. vaper
632 * As a side effect n_mtime, which we use to determine if the file was
633 * modified by some other host, is set to the attribute timestamp and
634 * NRMODIFIED is set if the two values differ.
636 * WARNING: the mtime loaded into vaper does not necessarily represent
637 * n_mtime or n_attr.mtime due to NACC and NUPD.
640 nfs_loadattrcache(struct vnode
*vp
, struct mbuf
**mdp
, caddr_t
*dposp
,
641 struct vattr
*vaper
, int lattr_flags
)
644 struct nfs_fattr
*fp
;
654 struct timespec mtime
;
655 int v3
= NFS_ISV3(vp
);
658 t1
= (mtod(md
, caddr_t
) + md
->m_len
) - *dposp
;
659 if ((error
= nfsm_disct(mdp
, dposp
, NFSX_FATTR(v3
), t1
, &cp2
)) != 0)
661 fp
= (struct nfs_fattr
*)cp2
;
663 vtyp
= nfsv3tov_type(fp
->fa_type
);
664 vmode
= fxdr_unsigned(u_short
, fp
->fa_mode
);
665 rmajor
= (int)fxdr_unsigned(int, fp
->fa3_rdev
.specdata1
);
666 rminor
= (int)fxdr_unsigned(int, fp
->fa3_rdev
.specdata2
);
667 fxdr_nfsv3time(&fp
->fa3_mtime
, &mtime
);
669 vtyp
= nfsv2tov_type(fp
->fa_type
);
670 vmode
= fxdr_unsigned(u_short
, fp
->fa_mode
);
674 * The duplicate information returned in fa_type and fa_mode
675 * is an ambiguity in the NFS version 2 protocol.
677 * VREG should be taken literally as a regular file. If a
678 * server intents to return some type information differently
679 * in the upper bits of the mode field (e.g. for sockets, or
680 * FIFOs), NFSv2 mandates fa_type to be VNON. Anyway, we
681 * leave the examination of the mode bits even in the VREG
682 * case to avoid breakage for bogus servers, but we make sure
683 * that there are actually type bits set in the upper part of
684 * fa_mode (and failing that, trust the va_type field).
686 * NFSv3 cleared the issue, and requires fa_mode to not
687 * contain any type information (while also introduing sockets
688 * and FIFOs for fa_type).
690 if (vtyp
== VNON
|| (vtyp
== VREG
&& (vmode
& S_IFMT
) != 0))
691 vtyp
= IFTOVT(vmode
);
692 rdev
= fxdr_unsigned(int32_t, fp
->fa2_rdev
);
693 rmajor
= umajor(rdev
);
694 rminor
= uminor(rdev
);
695 fxdr_nfsv2time(&fp
->fa2_mtime
, &mtime
);
698 * Really ugly NFSv2 kludge.
700 if (vtyp
== VCHR
&& rdev
== (udev_t
)0xffffffff)
705 * If v_type == VNON it is a new node, so fill in the v_type,
706 * n_mtime fields. Check to see if it represents a special
707 * device, and if so, check for a possible alias. Once the
708 * correct vnode has been obtained, fill in the rest of the
712 if (vp
->v_type
!= vtyp
) {
713 nfs_setvtype(vp
, vtyp
);
714 if (vp
->v_type
== VFIFO
) {
715 vp
->v_ops
= &vp
->v_mount
->mnt_vn_fifo_ops
;
716 } else if (vp
->v_type
== VCHR
|| vp
->v_type
== VBLK
) {
717 vp
->v_ops
= &vp
->v_mount
->mnt_vn_spec_ops
;
718 addaliasu(vp
, rmajor
, rminor
);
720 vp
->v_ops
= &vp
->v_mount
->mnt_vn_use_ops
;
722 np
->n_mtime
= mtime
.tv_sec
;
723 } else if (np
->n_mtime
!= mtime
.tv_sec
) {
725 * If we haven't modified the file locally and the server
726 * timestamp does not match, then the server probably
727 * modified the file. We must flag this condition so
728 * the proper syncnronization can be done. We do not
729 * try to synchronize the state here because that
730 * could lead to an endless recursion.
732 * XXX loadattrcache can be set during the reply to a write,
733 * before the write timestamp is properly processed. To
734 * avoid unconditionally setting the rmodified bit (which
735 * has the effect of flushing the cache), we only do this
736 * check if the lmodified bit is not set.
738 np
->n_mtime
= mtime
.tv_sec
;
739 if ((lattr_flags
& NFS_LATTR_NOMTIMECHECK
) == 0)
740 np
->n_flag
|= NRMODIFIED
;
744 vap
->va_mode
= (vmode
& 07777);
745 vap
->va_rmajor
= rmajor
;
746 vap
->va_rminor
= rminor
;
747 vap
->va_mtime
= mtime
;
748 vap
->va_fsid
= vp
->v_mount
->mnt_stat
.f_fsid
.val
[0];
750 vap
->va_nlink
= fxdr_unsigned(u_short
, fp
->fa_nlink
);
751 vap
->va_uid
= fxdr_unsigned(uid_t
, fp
->fa_uid
);
752 vap
->va_gid
= fxdr_unsigned(gid_t
, fp
->fa_gid
);
753 vap
->va_size
= fxdr_hyper(&fp
->fa3_size
);
754 vap
->va_blocksize
= NFS_FABLKSIZE
;
755 vap
->va_bytes
= fxdr_hyper(&fp
->fa3_used
);
756 vap
->va_fileid
= fxdr_hyper(&fp
->fa3_fileid
);
757 fxdr_nfsv3time(&fp
->fa3_atime
, &vap
->va_atime
);
758 fxdr_nfsv3time(&fp
->fa3_ctime
, &vap
->va_ctime
);
762 vap
->va_nlink
= fxdr_unsigned(u_short
, fp
->fa_nlink
);
763 vap
->va_uid
= fxdr_unsigned(uid_t
, fp
->fa_uid
);
764 vap
->va_gid
= fxdr_unsigned(gid_t
, fp
->fa_gid
);
765 vap
->va_size
= fxdr_unsigned(u_int32_t
, fp
->fa2_size
);
766 vap
->va_blocksize
= fxdr_unsigned(int32_t, fp
->fa2_blocksize
);
767 vap
->va_bytes
= (u_quad_t
)fxdr_unsigned(int32_t, fp
->fa2_blocks
)
769 vap
->va_fileid
= fxdr_unsigned(int32_t, fp
->fa2_fileid
);
770 fxdr_nfsv2time(&fp
->fa2_atime
, &vap
->va_atime
);
772 vap
->va_ctime
.tv_sec
= fxdr_unsigned(u_int32_t
,
773 fp
->fa2_ctime
.nfsv2_sec
);
774 vap
->va_ctime
.tv_nsec
= 0;
775 vap
->va_gen
= fxdr_unsigned(u_int32_t
,fp
->fa2_ctime
.nfsv2_usec
);
778 np
->n_attrstamp
= time_second
;
779 if (vap
->va_size
!= np
->n_size
) {
780 if (vap
->va_type
== VREG
) {
781 if ((lattr_flags
& NFS_LATTR_NOSHRINK
) &&
782 vap
->va_size
< np
->n_size
) {
784 * We've been told not to shrink the file;
785 * zero np->n_attrstamp to indicate that
786 * the attributes are stale.
788 * This occurs primarily due to recursive
789 * NFS ops that are executed during periods
790 * where we cannot safely reduce the size of
793 * Additionally, write rpcs are broken down
794 * into buffers and np->n_size is
795 * pre-extended. Setting NRMODIFIED here
796 * can result in n_size getting reset to a
797 * lower value, which is NOT what we want.
798 * XXX this needs to be cleaned up a lot
801 vap
->va_size
= np
->n_size
;
803 if ((np
->n_flag
& NLMODIFIED
) == 0)
804 np
->n_flag
|= NRMODIFIED
;
805 } else if (np
->n_flag
& NLMODIFIED
) {
807 * We've modified the file: Use the larger
808 * of our size, and the server's size. At
809 * this point the cache coherency is all
810 * shot to hell. To try to handle multiple
811 * clients appending to the file at the same
812 * time mark that the server has changed
813 * the file if the server's notion of the
814 * file size is larger then our notion.
816 * XXX this needs work.
818 if (vap
->va_size
< np
->n_size
) {
819 vap
->va_size
= np
->n_size
;
821 np
->n_size
= vap
->va_size
;
822 np
->n_flag
|= NRMODIFIED
;
826 * Someone changed the file's size on the
827 * server and there are no local changes
828 * to get in the way, set the size and mark
831 np
->n_size
= vap
->va_size
;
832 np
->n_flag
|= NRMODIFIED
;
834 vnode_pager_setsize(vp
, np
->n_size
);
836 np
->n_size
= vap
->va_size
;
840 bcopy((caddr_t
)vap
, (caddr_t
)vaper
, sizeof(*vap
));
841 if (np
->n_flag
& NCHG
) {
842 if (np
->n_flag
& NACC
)
843 vaper
->va_atime
= np
->n_atim
;
844 if (np
->n_flag
& NUPD
)
845 vaper
->va_mtime
= np
->n_mtim
;
852 #include <sys/sysctl.h>
853 SYSCTL_DECL(_vfs_nfs
);
854 static int nfs_acdebug
;
855 SYSCTL_INT(_vfs_nfs
, OID_AUTO
, acdebug
, CTLFLAG_RW
, &nfs_acdebug
, 0, "");
859 * Check the time stamp
860 * If the cache is valid, copy contents to *vap and return 0
861 * otherwise return an error
864 nfs_getattrcache(struct vnode
*vp
, struct vattr
*vaper
)
868 struct nfsmount
*nmp
;
873 nmp
= VFSTONFS(vp
->v_mount
);
876 * Dynamic timeout based on how recently the file was modified.
877 * n_mtime is always valid.
879 timeo
= (get_approximate_time_t() - np
->n_mtime
) / 60;
883 kprintf("nfs_getattrcache: initial timeo = %d\n", timeo
);
886 if (vap
->va_type
== VDIR
) {
887 if ((np
->n_flag
& NLMODIFIED
) || timeo
< nmp
->nm_acdirmin
)
888 timeo
= nmp
->nm_acdirmin
;
889 else if (timeo
> nmp
->nm_acdirmax
)
890 timeo
= nmp
->nm_acdirmax
;
892 if ((np
->n_flag
& NLMODIFIED
) || timeo
< nmp
->nm_acregmin
)
893 timeo
= nmp
->nm_acregmin
;
894 else if (timeo
> nmp
->nm_acregmax
)
895 timeo
= nmp
->nm_acregmax
;
900 kprintf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n",
901 nmp
->nm_acregmin
, nmp
->nm_acregmax
,
902 nmp
->nm_acdirmin
, nmp
->nm_acdirmax
);
905 kprintf("nfs_getattrcache: age = %d; final timeo = %d\n",
906 (int)(time_second
- np
->n_attrstamp
), timeo
);
909 if (np
->n_attrstamp
== 0 || (time_second
- np
->n_attrstamp
) >= timeo
) {
910 nfsstats
.attrcache_misses
++;
913 nfsstats
.attrcache_hits
++;
916 * Our attribute cache can be stale due to modifications made on
917 * this host. XXX this is a bad hack. We need a more deterministic
918 * means of finding out which np fields are valid verses attr cache
919 * fields. We really should update the vattr info on the fly when
920 * making local changes.
922 if (vap
->va_size
!= np
->n_size
) {
923 if (vap
->va_type
== VREG
) {
924 if (np
->n_flag
& NLMODIFIED
) {
925 if (vap
->va_size
< np
->n_size
)
926 vap
->va_size
= np
->n_size
;
928 np
->n_size
= vap
->va_size
;
930 np
->n_size
= vap
->va_size
;
932 vnode_pager_setsize(vp
, np
->n_size
);
934 np
->n_size
= vap
->va_size
;
937 bcopy((caddr_t
)vap
, (caddr_t
)vaper
, sizeof(struct vattr
));
938 if (np
->n_flag
& NCHG
) {
939 if (np
->n_flag
& NACC
)
940 vaper
->va_atime
= np
->n_atim
;
941 if (np
->n_flag
& NUPD
)
942 vaper
->va_mtime
= np
->n_mtim
;
950 * Set up nameidata for a lookup() call and do it.
952 * If pubflag is set, this call is done for a lookup operation on the
953 * public filehandle. In that case we allow crossing mountpoints and
954 * absolute pathnames. However, the caller is expected to check that
955 * the lookup result is within the public fs, and deny access if
958 * dirp may be set whether an error is returned or not, and must be
959 * released by the caller.
961 * On return nd->nl_nch usually points to the target ncp, which may represent
964 * NOTE: the caller must call nlookup_done(nd) unconditionally on return
968 nfs_namei(struct nlookupdata
*nd
, struct ucred
*cred
, int nflags
,
969 struct vnode
**dvpp
, struct vnode
**vpp
,
970 fhandle_t
*fhp
, int len
,
971 struct nfssvc_sock
*slp
, struct sockaddr
*nam
, struct mbuf
**mdp
,
972 caddr_t
*dposp
, struct vnode
**dirpp
, struct thread
*td
,
973 int kerbflag
, int pubflag
)
977 char *fromcp
, *tocp
, *cp
;
984 namebuf
= objcache_get(namei_oc
, M_WAITOK
);
988 * Copy the name from the mbuf list to namebuf.
993 rem
= mtod(md
, caddr_t
) + md
->m_len
- fromcp
;
994 for (i
= 0; i
< len
; i
++) {
1001 fromcp
= mtod(md
, caddr_t
);
1004 if (*fromcp
== '\0' || (!pubflag
&& *fromcp
== '/')) {
1008 *tocp
++ = *fromcp
++;
1014 len
= nfsm_rndup(len
)-len
;
1018 else if ((error
= nfs_adv(mdp
, dposp
, len
, rem
)) != 0)
1023 * Extract and set starting directory. The returned dp is refd
1026 error
= nfsrv_fhtovp(fhp
, FALSE
, &mp
, &dp
, cred
, slp
,
1027 nam
, &rdonly
, kerbflag
, pubflag
);
1030 if (dp
->v_type
!= VDIR
) {
1037 * Set return directory. Reference to dp is implicitly transfered
1038 * to the returned pointer. This must be set before we potentially
1044 * read-only - NLC_DELETE, NLC_RENAME_DST are disallowed. NLC_CREATE
1045 * is passed through to nlookup() and will be disallowed
1046 * if the file does not already exist.
1049 nflags
|= NLC_NFS_RDONLY
;
1050 if (nflags
& (NLC_DELETE
| NLC_RENAME_DST
)) {
1057 * Oh joy. For WebNFS, handle those pesky '%' escapes,
1058 * and the 'native path' indicator.
1061 cp
= objcache_get(namei_oc
, M_WAITOK
);
1064 if ((unsigned char)*fromcp
>= WEBNFS_SPECCHAR_START
) {
1065 switch ((unsigned char)*fromcp
) {
1066 case WEBNFS_NATIVE_CHAR
:
1068 * 'Native' path for us is the same
1069 * as a path according to the NFS spec,
1070 * just skip the escape char.
1075 * More may be added in the future, range 0x80-0xff
1079 objcache_put(namei_oc
, cp
);
1084 * Translate the '%' escapes, URL-style.
1086 while (*fromcp
!= '\0') {
1087 if (*fromcp
== WEBNFS_ESC_CHAR
) {
1088 if (fromcp
[1] != '\0' && fromcp
[2] != '\0') {
1090 *tocp
++ = HEXSTRTOI(fromcp
);
1095 objcache_put(namei_oc
, cp
);
1099 *tocp
++ = *fromcp
++;
1102 objcache_put(namei_oc
, namebuf
);
1107 * Setup for search. We need to get a start directory from dp. Note
1108 * that dp is ref'd, but we no longer 'own' the ref (*dirpp owns it).
1111 nflags
|= NLC_NFS_NOSOFTLINKTRAV
;
1112 nflags
|= NLC_NOCROSSMOUNT
;
1116 * We need a starting ncp from the directory vnode dp. dp must not
1117 * be locked. The returned ncp will be refd but not locked.
1119 * If no suitable ncp is found we instruct cache_fromdvp() to create
1120 * one. If this fails the directory has probably been removed while
1121 * the target was chdir'd into it and any further lookup will fail.
1123 if ((error
= cache_fromdvp(dp
, cred
, 1, &nch
)) != 0)
1125 nlookup_init_raw(nd
, namebuf
, UIO_SYSSPACE
, nflags
, cred
, &nch
);
1129 * Ok, do the lookup.
1131 error
= nlookup(nd
);
1134 * If no error occured return the requested dvpp and vpp. If
1135 * NLC_CREATE was specified nd->nl_nch may represent a negative
1136 * cache hit in which case we do not attempt to obtain the vp.
1140 if (nd
->nl_nch
.ncp
->nc_parent
) {
1142 nch
.ncp
= nch
.ncp
->nc_parent
;
1143 error
= cache_vget(&nch
, nd
->nl_cred
,
1144 LK_EXCLUSIVE
, dvpp
);
1149 if (vpp
&& nd
->nl_nch
.ncp
->nc_vp
) {
1150 error
= cache_vget(&nd
->nl_nch
, nd
->nl_cred
, LK_EXCLUSIVE
, vpp
);
1153 if (dvpp
&& *dvpp
) {
1168 objcache_put(namei_oc
, namebuf
);
1173 * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked)
1174 * - look up fsid in mount list (if not found ret error)
1175 * - get vp and export rights by calling VFS_FHTOVP()
1176 * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
1177 * - if not lockflag unlock it with vn_unlock()
1180 nfsrv_fhtovp(fhandle_t
*fhp
, int lockflag
,
1181 struct mount
**mpp
, struct vnode
**vpp
,
1182 struct ucred
*cred
, struct nfssvc_sock
*slp
, struct sockaddr
*nam
,
1183 int *rdonlyp
, int kerbflag
, int pubflag
)
1187 struct ucred
*credanon
;
1189 #ifdef MNT_EXNORESPORT /* XXX needs mountd and /etc/exports help yet */
1190 struct sockaddr_int
*saddr
;
1196 if (nfs_ispublicfh(fhp
)) {
1197 if (!pubflag
|| !nfs_pub
.np_valid
)
1199 fhp
= &nfs_pub
.np_handle
;
1202 mp
= *mpp
= vfs_getvfs(&fhp
->fh_fsid
);
1205 error
= VFS_CHECKEXP(mp
, nam
, &exflags
, &credanon
);
1208 error
= VFS_FHTOVP(mp
, NULL
, &fhp
->fh_fid
, vpp
);
1211 #ifdef MNT_EXNORESPORT
1212 if (!(exflags
& (MNT_EXNORESPORT
|MNT_EXPUBLIC
))) {
1213 saddr
= (struct sockaddr_in
*)nam
;
1214 if (saddr
->sin_family
== AF_INET
&&
1215 ntohs(saddr
->sin_port
) >= IPPORT_RESERVED
) {
1218 return (NFSERR_AUTHERR
| AUTH_TOOWEAK
);
1223 * Check/setup credentials.
1225 if (exflags
& MNT_EXKERB
) {
1229 return (NFSERR_AUTHERR
| AUTH_TOOWEAK
);
1231 } else if (kerbflag
) {
1234 return (NFSERR_AUTHERR
| AUTH_TOOWEAK
);
1235 } else if (cred
->cr_uid
== 0 || (exflags
& MNT_EXPORTANON
)) {
1236 cred
->cr_uid
= credanon
->cr_uid
;
1237 for (i
= 0; i
< credanon
->cr_ngroups
&& i
< NGROUPS
; i
++)
1238 cred
->cr_groups
[i
] = credanon
->cr_groups
[i
];
1239 cred
->cr_ngroups
= i
;
1241 if (exflags
& MNT_EXRDONLY
)
1252 * WebNFS: check if a filehandle is a public filehandle. For v3, this
1253 * means a length of 0, for v2 it means all zeroes. nfsm_srvmtofh has
1254 * transformed this to all zeroes in both cases, so check for it.
1257 nfs_ispublicfh(fhandle_t
*fhp
)
1259 char *cp
= (char *)fhp
;
1262 for (i
= 0; i
< NFSX_V3FH
; i
++)
1268 #endif /* NFS_NOSERVER */
1270 * This function compares two net addresses by family and returns TRUE
1271 * if they are the same host.
1272 * If there is any doubt, return FALSE.
1273 * The AF_INET family is handled as a special case so that address mbufs
1274 * don't need to be saved to store "struct in_addr", which is only 4 bytes.
1277 netaddr_match(int family
, union nethostaddr
*haddr
, struct sockaddr
*nam
)
1279 struct sockaddr_in
*inetaddr
;
1283 inetaddr
= (struct sockaddr_in
*)nam
;
1284 if (inetaddr
->sin_family
== AF_INET
&&
1285 inetaddr
->sin_addr
.s_addr
== haddr
->had_inetaddr
)
1294 static nfsuint64 nfs_nullcookie
= { { 0, 0 } };
1296 * This function finds the directory cookie that corresponds to the
1297 * logical byte offset given.
1300 nfs_getcookie(struct nfsnode
*np
, off_t off
, int add
)
1302 struct nfsdmap
*dp
, *dp2
;
1305 pos
= (uoff_t
)off
/ NFS_DIRBLKSIZ
;
1306 if (pos
== 0 || off
< 0) {
1309 panic("nfs getcookie add at <= 0");
1311 return (&nfs_nullcookie
);
1314 dp
= np
->n_cookies
.lh_first
;
1317 MALLOC(dp
, struct nfsdmap
*, sizeof (struct nfsdmap
),
1318 M_NFSDIROFF
, M_WAITOK
);
1319 dp
->ndm_eocookie
= 0;
1320 LIST_INSERT_HEAD(&np
->n_cookies
, dp
, ndm_list
);
1324 while (pos
>= NFSNUMCOOKIES
) {
1325 pos
-= NFSNUMCOOKIES
;
1326 if (dp
->ndm_list
.le_next
) {
1327 if (!add
&& dp
->ndm_eocookie
< NFSNUMCOOKIES
&&
1328 pos
>= dp
->ndm_eocookie
)
1330 dp
= dp
->ndm_list
.le_next
;
1332 MALLOC(dp2
, struct nfsdmap
*, sizeof (struct nfsdmap
),
1333 M_NFSDIROFF
, M_WAITOK
);
1334 dp2
->ndm_eocookie
= 0;
1335 LIST_INSERT_AFTER(dp
, dp2
, ndm_list
);
1340 if (pos
>= dp
->ndm_eocookie
) {
1342 dp
->ndm_eocookie
= pos
+ 1;
1346 return (&dp
->ndm_cookies
[pos
]);
1350 * Invalidate cached directory information, except for the actual directory
1351 * blocks (which are invalidated separately).
1352 * Done mainly to avoid the use of stale offset cookies.
1355 nfs_invaldir(struct vnode
*vp
)
1357 struct nfsnode
*np
= VTONFS(vp
);
1360 if (vp
->v_type
!= VDIR
)
1361 panic("nfs: invaldir not dir");
1363 np
->n_direofoffset
= 0;
1364 np
->n_cookieverf
.nfsuquad
[0] = 0;
1365 np
->n_cookieverf
.nfsuquad
[1] = 0;
1366 if (np
->n_cookies
.lh_first
)
1367 np
->n_cookies
.lh_first
->ndm_eocookie
= 0;
1371 * Set the v_type field for an NFS client's vnode and initialize for
1372 * buffer cache operations if necessary.
1375 nfs_setvtype(struct vnode
*vp
, enum vtype vtyp
)
1383 vinitvmio(vp
, 0); /* needs VMIO, size not yet known */
1391 * The write verifier has changed (probably due to a server reboot), so all
1392 * B_NEEDCOMMIT blocks will have to be written again. Since they are on the
1393 * dirty block list as B_DELWRI, all this takes is clearing the B_NEEDCOMMIT
1394 * and B_CLUSTEROK flags. Once done the new write verifier can be set for the
1397 * B_CLUSTEROK must be cleared along with B_NEEDCOMMIT because stage 1 data
1398 * writes are not clusterable.
1401 static int nfs_clearcommit_bp(struct buf
*bp
, void *data __unused
);
1402 static int nfs_clearcommit_callback(struct mount
*mp
, struct vnode
*vp
,
1403 void *data __unused
);
1406 nfs_clearcommit(struct mount
*mp
)
1408 vmntvnodescan(mp
, VMSC_NOWAIT
, nfs_clearcommit_callback
, NULL
, NULL
);
1412 nfs_clearcommit_callback(struct mount
*mp
, struct vnode
*vp
,
1413 void *data __unused
)
1418 lwkt_gettoken(&vlock
, &vp
->v_token
);
1419 RB_SCAN(buf_rb_tree
, &vp
->v_rbdirty_tree
, NULL
,
1420 nfs_clearcommit_bp
, NULL
);
1421 lwkt_reltoken(&vlock
);
1427 nfs_clearcommit_bp(struct buf
*bp
, void *data __unused
)
1429 if (BUF_REFCNT(bp
) == 0 &&
1430 (bp
->b_flags
& (B_DELWRI
| B_NEEDCOMMIT
))
1431 == (B_DELWRI
| B_NEEDCOMMIT
)) {
1432 bp
->b_flags
&= ~(B_NEEDCOMMIT
| B_CLUSTEROK
);
1437 #ifndef NFS_NOSERVER
1439 * Map errnos to NFS error numbers. For Version 3 also filter out error
1440 * numbers not specified for the associated procedure.
1443 nfsrv_errmap(struct nfsrv_descript
*nd
, int err
)
1445 short *defaulterrp
, *errp
;
1447 if (nd
->nd_flag
& ND_NFSV3
) {
1448 if (nd
->nd_procnum
<= NFSPROC_COMMIT
) {
1449 errp
= defaulterrp
= nfsrv_v3errmap
[nd
->nd_procnum
];
1453 else if (*errp
> err
)
1456 return ((int)*defaulterrp
);
1458 return (err
& 0xffff);
1461 return ((int)nfsrv_v2errmap
[err
- 1]);
1466 * Sort the group list in increasing numerical order.
1467 * (Insertion sort by Chris Torek, who was grossed out by the bubble sort
1468 * that used to be here.)
1471 nfsrvw_sort(gid_t
*list
, int num
)
1476 /* Insertion sort. */
1477 for (i
= 1; i
< num
; i
++) {
1479 /* find correct slot for value v, moving others up */
1480 for (j
= i
; --j
>= 0 && v
< list
[j
];)
1481 list
[j
+ 1] = list
[j
];
1487 * copy credentials making sure that the result can be compared with bcmp().
1490 nfsrv_setcred(struct ucred
*incred
, struct ucred
*outcred
)
1494 bzero((caddr_t
)outcred
, sizeof (struct ucred
));
1495 outcred
->cr_ref
= 1;
1496 outcred
->cr_uid
= incred
->cr_uid
;
1497 outcred
->cr_ngroups
= incred
->cr_ngroups
;
1498 for (i
= 0; i
< incred
->cr_ngroups
; i
++)
1499 outcred
->cr_groups
[i
] = incred
->cr_groups
[i
];
1500 nfsrvw_sort(outcred
->cr_groups
, outcred
->cr_ngroups
);
1502 #endif /* NFS_NOSERVER */