2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95
37 * $FreeBSD: /repoman/r/ncvs/src/sys/nfsclient/nfs_subs.c,v 1.128 2004/04/14 23:23:55 peadar Exp $
38 * $DragonFly: src/sys/vfs/nfs/nfs_subs.c,v 1.48 2008/09/17 21:44:24 dillon Exp $
42 * These functions support the macros and help fiddle mbuf chains for
43 * the nfs op functions. They do things like create the rpc header and
44 * copy data between mbuf chains and uio lists.
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/kernel.h>
51 #include <sys/mount.h>
52 #include <sys/vnode.h>
53 #include <sys/nlookup.h>
54 #include <sys/namei.h>
56 #include <sys/socket.h>
58 #include <sys/malloc.h>
59 #include <sys/sysent.h>
60 #include <sys/syscall.h>
62 #include <sys/objcache.h>
65 #include <vm/vm_object.h>
66 #include <vm/vm_extern.h>
67 #include <vm/vm_zone.h>
77 #include "nfsm_subs.h"
80 #include <netinet/in.h>
83 * Data items converted to xdr at startup, since they are constant
84 * This is kinda hokey, but may save a little time doing byte swaps
86 u_int32_t nfs_xdrneg1
;
87 u_int32_t rpc_reply
, rpc_msgdenied
, rpc_mismatch
, rpc_vers
;
88 u_int32_t rpc_auth_unix
, rpc_msgaccepted
, rpc_call
, rpc_autherr
;
89 u_int32_t rpc_auth_kerb
;
90 u_int32_t nfs_prog
, nfs_true
, nfs_false
;
92 /* And other global data */
93 static enum vtype nv2tov_type
[8]= {
94 VNON
, VREG
, VDIR
, VBLK
, VCHR
, VLNK
, VNON
, VNON
96 enum vtype nv3tov_type
[8]= {
97 VNON
, VREG
, VDIR
, VBLK
, VCHR
, VLNK
, VSOCK
, VFIFO
102 static int nfs_pbuf_freecnt
= -1; /* start out unlimited */
104 struct nfsmount_head nfs_mountq
= TAILQ_HEAD_INITIALIZER(nfs_mountq
);
105 struct nfssvc_sockhead nfssvc_sockhead
;
106 int nfssvc_sockhead_flag
;
107 struct nfsd_head nfsd_head
;
109 struct nfs_bufq nfs_bufq
;
110 struct nqfhhashhead
*nqfhhashtbl
;
113 static int nfs_prev_nfssvc_sy_narg
;
114 static sy_call_t
*nfs_prev_nfssvc_sy_call
;
119 * Mapping of old NFS Version 2 RPC numbers to generic numbers.
121 int nfsv3_procid
[NFS_NPROCS
] = {
150 #endif /* NFS_NOSERVER */
152 * and the reverse mapping from generic to Version 2 procedure numbers
154 int nfsv2_procid
[NFS_NPROCS
] = {
185 * Maps errno values to nfs error numbers.
186 * Use NFSERR_IO as the catch all for ones not specifically defined in
189 static u_char nfsrv_v2errmap
[ELAST
] = {
190 NFSERR_PERM
, NFSERR_NOENT
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
191 NFSERR_NXIO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
192 NFSERR_IO
, NFSERR_IO
, NFSERR_ACCES
, NFSERR_IO
, NFSERR_IO
,
193 NFSERR_IO
, NFSERR_EXIST
, NFSERR_IO
, NFSERR_NODEV
, NFSERR_NOTDIR
,
194 NFSERR_ISDIR
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
195 NFSERR_IO
, NFSERR_FBIG
, NFSERR_NOSPC
, NFSERR_IO
, NFSERR_ROFS
,
196 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
197 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
198 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
199 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
200 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
201 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
202 NFSERR_IO
, NFSERR_IO
, NFSERR_NAMETOL
, NFSERR_IO
, NFSERR_IO
,
203 NFSERR_NOTEMPTY
, NFSERR_IO
, NFSERR_IO
, NFSERR_DQUOT
, NFSERR_STALE
,
204 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
205 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
206 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
207 NFSERR_IO
/* << Last is 86 */
211 * Maps errno values to nfs error numbers.
212 * Although it is not obvious whether or not NFS clients really care if
213 * a returned error value is in the specified list for the procedure, the
214 * safest thing to do is filter them appropriately. For Version 2, the
215 * X/Open XNFS document is the only specification that defines error values
216 * for each RPC (The RFC simply lists all possible error values for all RPCs),
217 * so I have decided to not do this for Version 2.
218 * The first entry is the default error return and the rest are the valid
219 * errors for that RPC in increasing numeric order.
221 static short nfsv3err_null
[] = {
226 static short nfsv3err_getattr
[] = {
235 static short nfsv3err_setattr
[] = {
251 static short nfsv3err_lookup
[] = {
264 static short nfsv3err_access
[] = {
273 static short nfsv3err_readlink
[] = {
285 static short nfsv3err_read
[] = {
297 static short nfsv3err_write
[] = {
312 static short nfsv3err_create
[] = {
329 static short nfsv3err_mkdir
[] = {
346 static short nfsv3err_symlink
[] = {
363 static short nfsv3err_mknod
[] = {
381 static short nfsv3err_remove
[] = {
395 static short nfsv3err_rmdir
[] = {
413 static short nfsv3err_rename
[] = {
436 static short nfsv3err_link
[] = {
456 static short nfsv3err_readdir
[] = {
469 static short nfsv3err_readdirplus
[] = {
483 static short nfsv3err_fsstat
[] = {
492 static short nfsv3err_fsinfo
[] = {
500 static short nfsv3err_pathconf
[] = {
508 static short nfsv3err_commit
[] = {
517 static short *nfsrv_v3errmap
[] = {
535 nfsv3err_readdirplus
,
542 #endif /* NFS_NOSERVER */
545 extern int sys_nfssvc(struct proc
*, struct nfssvc_args
*, int *);
548 * This needs to return a monotonically increasing or close to monotonically
549 * increasing result, otherwise the write gathering queues won't work
558 return ((u_quad_t
)tv
.tv_sec
* 1000000 + (u_quad_t
)tv
.tv_usec
);
562 * Called once to initialize data structures...
565 nfs_init(struct vfsconf
*vfsp
)
567 callout_init(&nfs_timer_handle
);
568 nfsmount_zone
= zinit("NFSMOUNT", sizeof(struct nfsmount
), 0, 0, 1);
570 nfs_mount_type
= vfsp
->vfc_typenum
;
572 rpc_vers
= txdr_unsigned(RPC_VER2
);
573 rpc_call
= txdr_unsigned(RPC_CALL
);
574 rpc_reply
= txdr_unsigned(RPC_REPLY
);
575 rpc_msgdenied
= txdr_unsigned(RPC_MSGDENIED
);
576 rpc_msgaccepted
= txdr_unsigned(RPC_MSGACCEPTED
);
577 rpc_mismatch
= txdr_unsigned(RPC_MISMATCH
);
578 rpc_autherr
= txdr_unsigned(RPC_AUTHERR
);
579 rpc_auth_unix
= txdr_unsigned(RPCAUTH_UNIX
);
580 rpc_auth_kerb
= txdr_unsigned(RPCAUTH_KERB4
);
581 nfs_prog
= txdr_unsigned(NFS_PROG
);
582 nfs_true
= txdr_unsigned(TRUE
);
583 nfs_false
= txdr_unsigned(FALSE
);
584 nfs_xdrneg1
= txdr_unsigned(-1);
585 nfs_ticks
= (hz
* NFS_TICKINTVL
+ 500) / 1000;
588 nfs_nhinit(); /* Init the nfsnode table */
590 nfsrv_init(0); /* Init server data structures */
591 nfsrv_initcache(); /* Init the server request cache */
595 * Mainly for vkernel operation. If memory is severely limited
597 if (nfs_maxasyncbio
> nmbclusters
* MCLBYTES
/ NFS_MAXDATA
/ 3)
598 nfs_maxasyncbio
= nmbclusters
* MCLBYTES
/ NFS_MAXDATA
/ 3;
599 if (nfs_maxasyncbio
< 4)
603 * Initialize reply list and start timer
607 nfs_prev_nfssvc_sy_narg
= sysent
[SYS_nfssvc
].sy_narg
;
608 sysent
[SYS_nfssvc
].sy_narg
= 2;
609 nfs_prev_nfssvc_sy_call
= sysent
[SYS_nfssvc
].sy_call
;
610 sysent
[SYS_nfssvc
].sy_call
= (sy_call_t
*)sys_nfssvc
;
612 nfs_pbuf_freecnt
= nswbuf
/ 2 + 1;
618 nfs_uninit(struct vfsconf
*vfsp
)
620 callout_stop(&nfs_timer_handle
);
622 sysent
[SYS_nfssvc
].sy_narg
= nfs_prev_nfssvc_sy_narg
;
623 sysent
[SYS_nfssvc
].sy_call
= nfs_prev_nfssvc_sy_call
;
628 * Attribute cache routines.
629 * nfs_loadattrcache() - loads or updates the cache contents from attributes
630 * that are on the mbuf list
631 * nfs_getattrcache() - returns valid attributes if found in cache, returns
636 * Load the attribute cache (that lives in the nfsnode entry) with
637 * the values on the mbuf list. Load *vaper with the attributes. vaper
640 * As a side effect n_mtime, which we use to determine if the file was
641 * modified by some other host, is set to the attribute timestamp and
642 * NRMODIFIED is set if the two values differ.
644 * WARNING: the mtime loaded into vaper does not necessarily represent
645 * n_mtime or n_attr.mtime due to NACC and NUPD.
648 nfs_loadattrcache(struct vnode
*vp
, struct mbuf
**mdp
, caddr_t
*dposp
,
649 struct vattr
*vaper
, int lattr_flags
)
652 struct nfs_fattr
*fp
;
662 struct timespec mtime
;
663 int v3
= NFS_ISV3(vp
);
666 t1
= (mtod(md
, caddr_t
) + md
->m_len
) - *dposp
;
667 if ((error
= nfsm_disct(mdp
, dposp
, NFSX_FATTR(v3
), t1
, &cp2
)) != 0)
669 fp
= (struct nfs_fattr
*)cp2
;
671 vtyp
= nfsv3tov_type(fp
->fa_type
);
672 vmode
= fxdr_unsigned(u_short
, fp
->fa_mode
);
673 rmajor
= (int)fxdr_unsigned(int, fp
->fa3_rdev
.specdata1
);
674 rminor
= (int)fxdr_unsigned(int, fp
->fa3_rdev
.specdata2
);
675 fxdr_nfsv3time(&fp
->fa3_mtime
, &mtime
);
677 vtyp
= nfsv2tov_type(fp
->fa_type
);
678 vmode
= fxdr_unsigned(u_short
, fp
->fa_mode
);
682 * The duplicate information returned in fa_type and fa_mode
683 * is an ambiguity in the NFS version 2 protocol.
685 * VREG should be taken literally as a regular file. If a
686 * server intents to return some type information differently
687 * in the upper bits of the mode field (e.g. for sockets, or
688 * FIFOs), NFSv2 mandates fa_type to be VNON. Anyway, we
689 * leave the examination of the mode bits even in the VREG
690 * case to avoid breakage for bogus servers, but we make sure
691 * that there are actually type bits set in the upper part of
692 * fa_mode (and failing that, trust the va_type field).
694 * NFSv3 cleared the issue, and requires fa_mode to not
695 * contain any type information (while also introduing sockets
696 * and FIFOs for fa_type).
698 if (vtyp
== VNON
|| (vtyp
== VREG
&& (vmode
& S_IFMT
) != 0))
699 vtyp
= IFTOVT(vmode
);
700 rdev
= fxdr_unsigned(int32_t, fp
->fa2_rdev
);
701 rmajor
= umajor(rdev
);
702 rminor
= uminor(rdev
);
703 fxdr_nfsv2time(&fp
->fa2_mtime
, &mtime
);
706 * Really ugly NFSv2 kludge.
708 if (vtyp
== VCHR
&& rdev
== (udev_t
)0xffffffff)
713 * If v_type == VNON it is a new node, so fill in the v_type,
714 * n_mtime fields. Check to see if it represents a special
715 * device, and if so, check for a possible alias. Once the
716 * correct vnode has been obtained, fill in the rest of the
720 if (vp
->v_type
!= vtyp
) {
721 nfs_setvtype(vp
, vtyp
);
722 if (vp
->v_type
== VFIFO
) {
723 vp
->v_ops
= &vp
->v_mount
->mnt_vn_fifo_ops
;
724 } else if (vp
->v_type
== VCHR
|| vp
->v_type
== VBLK
) {
725 vp
->v_ops
= &vp
->v_mount
->mnt_vn_spec_ops
;
726 addaliasu(vp
, rmajor
, rminor
);
728 vp
->v_ops
= &vp
->v_mount
->mnt_vn_use_ops
;
730 np
->n_mtime
= mtime
.tv_sec
;
731 } else if (np
->n_mtime
!= mtime
.tv_sec
) {
733 * If we haven't modified the file locally and the server
734 * timestamp does not match, then the server probably
735 * modified the file. We must flag this condition so
736 * the proper syncnronization can be done. We do not
737 * try to synchronize the state here because that
738 * could lead to an endless recursion.
740 * XXX loadattrcache can be set during the reply to a write,
741 * before the write timestamp is properly processed. To
742 * avoid unconditionally setting the rmodified bit (which
743 * has the effect of flushing the cache), we only do this
744 * check if the lmodified bit is not set.
746 np
->n_mtime
= mtime
.tv_sec
;
747 if ((lattr_flags
& NFS_LATTR_NOMTIMECHECK
) == 0)
748 np
->n_flag
|= NRMODIFIED
;
752 vap
->va_mode
= (vmode
& 07777);
753 vap
->va_rmajor
= rmajor
;
754 vap
->va_rminor
= rminor
;
755 vap
->va_mtime
= mtime
;
756 vap
->va_fsid
= vp
->v_mount
->mnt_stat
.f_fsid
.val
[0];
758 vap
->va_nlink
= fxdr_unsigned(u_short
, fp
->fa_nlink
);
759 vap
->va_uid
= fxdr_unsigned(uid_t
, fp
->fa_uid
);
760 vap
->va_gid
= fxdr_unsigned(gid_t
, fp
->fa_gid
);
761 vap
->va_size
= fxdr_hyper(&fp
->fa3_size
);
762 vap
->va_blocksize
= NFS_FABLKSIZE
;
763 vap
->va_bytes
= fxdr_hyper(&fp
->fa3_used
);
764 vap
->va_fileid
= fxdr_hyper(&fp
->fa3_fileid
);
765 fxdr_nfsv3time(&fp
->fa3_atime
, &vap
->va_atime
);
766 fxdr_nfsv3time(&fp
->fa3_ctime
, &vap
->va_ctime
);
770 vap
->va_nlink
= fxdr_unsigned(u_short
, fp
->fa_nlink
);
771 vap
->va_uid
= fxdr_unsigned(uid_t
, fp
->fa_uid
);
772 vap
->va_gid
= fxdr_unsigned(gid_t
, fp
->fa_gid
);
773 vap
->va_size
= fxdr_unsigned(u_int32_t
, fp
->fa2_size
);
774 vap
->va_blocksize
= fxdr_unsigned(int32_t, fp
->fa2_blocksize
);
775 vap
->va_bytes
= (u_quad_t
)fxdr_unsigned(int32_t, fp
->fa2_blocks
)
777 vap
->va_fileid
= fxdr_unsigned(int32_t, fp
->fa2_fileid
);
778 fxdr_nfsv2time(&fp
->fa2_atime
, &vap
->va_atime
);
780 vap
->va_ctime
.tv_sec
= fxdr_unsigned(u_int32_t
,
781 fp
->fa2_ctime
.nfsv2_sec
);
782 vap
->va_ctime
.tv_nsec
= 0;
783 vap
->va_gen
= fxdr_unsigned(u_int32_t
,fp
->fa2_ctime
.nfsv2_usec
);
786 np
->n_attrstamp
= time_second
;
787 if (vap
->va_size
!= np
->n_size
) {
788 if (vap
->va_type
== VREG
) {
789 if ((lattr_flags
& NFS_LATTR_NOSHRINK
) &&
790 vap
->va_size
< np
->n_size
) {
792 * We've been told not to shrink the file;
793 * zero np->n_attrstamp to indicate that
794 * the attributes are stale.
796 * This occurs primarily due to recursive
797 * NFS ops that are executed during periods
798 * where we cannot safely reduce the size of
801 * Additionally, write rpcs are broken down
802 * into buffers and np->n_size is
803 * pre-extended. Setting NRMODIFIED here
804 * can result in n_size getting reset to a
805 * lower value, which is NOT what we want.
806 * XXX this needs to be cleaned up a lot
809 vap
->va_size
= np
->n_size
;
811 if ((np
->n_flag
& NLMODIFIED
) == 0)
812 np
->n_flag
|= NRMODIFIED
;
813 } else if (np
->n_flag
& NLMODIFIED
) {
815 * We've modified the file: Use the larger
816 * of our size, and the server's size. At
817 * this point the cache coherency is all
818 * shot to hell. To try to handle multiple
819 * clients appending to the file at the same
820 * time mark that the server has changed
821 * the file if the server's notion of the
822 * file size is larger then our notion.
824 * XXX this needs work.
826 if (vap
->va_size
< np
->n_size
) {
827 vap
->va_size
= np
->n_size
;
829 np
->n_size
= vap
->va_size
;
830 np
->n_flag
|= NRMODIFIED
;
834 * Someone changed the file's size on the
835 * server and there are no local changes
836 * to get in the way, set the size and mark
839 np
->n_size
= vap
->va_size
;
840 np
->n_flag
|= NRMODIFIED
;
842 vnode_pager_setsize(vp
, np
->n_size
);
844 np
->n_size
= vap
->va_size
;
848 bcopy((caddr_t
)vap
, (caddr_t
)vaper
, sizeof(*vap
));
849 if (np
->n_flag
& NCHG
) {
850 if (np
->n_flag
& NACC
)
851 vaper
->va_atime
= np
->n_atim
;
852 if (np
->n_flag
& NUPD
)
853 vaper
->va_mtime
= np
->n_mtim
;
860 #include <sys/sysctl.h>
861 SYSCTL_DECL(_vfs_nfs
);
862 static int nfs_acdebug
;
863 SYSCTL_INT(_vfs_nfs
, OID_AUTO
, acdebug
, CTLFLAG_RW
, &nfs_acdebug
, 0, "");
867 * Check the time stamp
868 * If the cache is valid, copy contents to *vap and return 0
869 * otherwise return an error
872 nfs_getattrcache(struct vnode
*vp
, struct vattr
*vaper
)
876 struct nfsmount
*nmp
;
881 nmp
= VFSTONFS(vp
->v_mount
);
884 * Dynamic timeout based on how recently the file was modified.
885 * n_mtime is always valid.
887 timeo
= (get_approximate_time_t() - np
->n_mtime
) / 60;
891 kprintf("nfs_getattrcache: initial timeo = %d\n", timeo
);
894 if (vap
->va_type
== VDIR
) {
895 if ((np
->n_flag
& NLMODIFIED
) || timeo
< nmp
->nm_acdirmin
)
896 timeo
= nmp
->nm_acdirmin
;
897 else if (timeo
> nmp
->nm_acdirmax
)
898 timeo
= nmp
->nm_acdirmax
;
900 if ((np
->n_flag
& NLMODIFIED
) || timeo
< nmp
->nm_acregmin
)
901 timeo
= nmp
->nm_acregmin
;
902 else if (timeo
> nmp
->nm_acregmax
)
903 timeo
= nmp
->nm_acregmax
;
908 kprintf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n",
909 nmp
->nm_acregmin
, nmp
->nm_acregmax
,
910 nmp
->nm_acdirmin
, nmp
->nm_acdirmax
);
913 kprintf("nfs_getattrcache: age = %d; final timeo = %d\n",
914 (int)(time_second
- np
->n_attrstamp
), timeo
);
917 if (np
->n_attrstamp
== 0 || (time_second
- np
->n_attrstamp
) >= timeo
) {
918 nfsstats
.attrcache_misses
++;
921 nfsstats
.attrcache_hits
++;
924 * Our attribute cache can be stale due to modifications made on
925 * this host. XXX this is a bad hack. We need a more deterministic
926 * means of finding out which np fields are valid verses attr cache
927 * fields. We really should update the vattr info on the fly when
928 * making local changes.
930 if (vap
->va_size
!= np
->n_size
) {
931 if (vap
->va_type
== VREG
) {
932 if (np
->n_flag
& NLMODIFIED
) {
933 if (vap
->va_size
< np
->n_size
)
934 vap
->va_size
= np
->n_size
;
936 np
->n_size
= vap
->va_size
;
938 np
->n_size
= vap
->va_size
;
940 vnode_pager_setsize(vp
, np
->n_size
);
942 np
->n_size
= vap
->va_size
;
945 bcopy((caddr_t
)vap
, (caddr_t
)vaper
, sizeof(struct vattr
));
946 if (np
->n_flag
& NCHG
) {
947 if (np
->n_flag
& NACC
)
948 vaper
->va_atime
= np
->n_atim
;
949 if (np
->n_flag
& NUPD
)
950 vaper
->va_mtime
= np
->n_mtim
;
958 * Set up nameidata for a lookup() call and do it.
960 * If pubflag is set, this call is done for a lookup operation on the
961 * public filehandle. In that case we allow crossing mountpoints and
962 * absolute pathnames. However, the caller is expected to check that
963 * the lookup result is within the public fs, and deny access if
966 * dirp may be set whether an error is returned or not, and must be
967 * released by the caller.
969 * On return nd->nl_nch usually points to the target ncp, which may represent
972 * NOTE: the caller must call nlookup_done(nd) unconditionally on return
976 nfs_namei(struct nlookupdata
*nd
, struct ucred
*cred
, int nflags
,
977 struct vnode
**dvpp
, struct vnode
**vpp
,
978 fhandle_t
*fhp
, int len
,
979 struct nfssvc_sock
*slp
, struct sockaddr
*nam
, struct mbuf
**mdp
,
980 caddr_t
*dposp
, struct vnode
**dirpp
, struct thread
*td
,
981 int kerbflag
, int pubflag
)
985 char *fromcp
, *tocp
, *cp
;
992 namebuf
= objcache_get(namei_oc
, M_WAITOK
);
996 * Copy the name from the mbuf list to namebuf.
1001 rem
= mtod(md
, caddr_t
) + md
->m_len
- fromcp
;
1002 for (i
= 0; i
< len
; i
++) {
1009 fromcp
= mtod(md
, caddr_t
);
1012 if (*fromcp
== '\0' || (!pubflag
&& *fromcp
== '/')) {
1016 *tocp
++ = *fromcp
++;
1022 len
= nfsm_rndup(len
)-len
;
1026 else if ((error
= nfs_adv(mdp
, dposp
, len
, rem
)) != 0)
1031 * Extract and set starting directory. The returned dp is refd
1034 error
= nfsrv_fhtovp(fhp
, FALSE
, &mp
, &dp
, cred
, slp
,
1035 nam
, &rdonly
, kerbflag
, pubflag
);
1038 if (dp
->v_type
!= VDIR
) {
1045 * Set return directory. Reference to dp is implicitly transfered
1046 * to the returned pointer. This must be set before we potentially
1052 * read-only - NLC_DELETE, NLC_RENAME_DST are disallowed. NLC_CREATE
1053 * is passed through to nlookup() and will be disallowed
1054 * if the file does not already exist.
1057 nflags
|= NLC_NFS_RDONLY
;
1058 if (nflags
& (NLC_DELETE
| NLC_RENAME_DST
)) {
1065 * Oh joy. For WebNFS, handle those pesky '%' escapes,
1066 * and the 'native path' indicator.
1069 cp
= objcache_get(namei_oc
, M_WAITOK
);
1072 if ((unsigned char)*fromcp
>= WEBNFS_SPECCHAR_START
) {
1073 switch ((unsigned char)*fromcp
) {
1074 case WEBNFS_NATIVE_CHAR
:
1076 * 'Native' path for us is the same
1077 * as a path according to the NFS spec,
1078 * just skip the escape char.
1083 * More may be added in the future, range 0x80-0xff
1087 objcache_put(namei_oc
, cp
);
1092 * Translate the '%' escapes, URL-style.
1094 while (*fromcp
!= '\0') {
1095 if (*fromcp
== WEBNFS_ESC_CHAR
) {
1096 if (fromcp
[1] != '\0' && fromcp
[2] != '\0') {
1098 *tocp
++ = HEXSTRTOI(fromcp
);
1103 objcache_put(namei_oc
, cp
);
1107 *tocp
++ = *fromcp
++;
1110 objcache_put(namei_oc
, namebuf
);
1115 * Setup for search. We need to get a start directory from dp. Note
1116 * that dp is ref'd, but we no longer 'own' the ref (*dirpp owns it).
1119 nflags
|= NLC_NFS_NOSOFTLINKTRAV
;
1120 nflags
|= NLC_NOCROSSMOUNT
;
1124 * We need a starting ncp from the directory vnode dp. dp must not
1125 * be locked. The returned ncp will be refd but not locked.
1127 * If no suitable ncp is found we instruct cache_fromdvp() to create
1128 * one. If this fails the directory has probably been removed while
1129 * the target was chdir'd into it and any further lookup will fail.
1131 if ((error
= cache_fromdvp(dp
, cred
, 1, &nch
)) != 0)
1133 nlookup_init_raw(nd
, namebuf
, UIO_SYSSPACE
, nflags
, cred
, &nch
);
1137 * Ok, do the lookup.
1139 error
= nlookup(nd
);
1142 * If no error occured return the requested dvpp and vpp. If
1143 * NLC_CREATE was specified nd->nl_nch may represent a negative
1144 * cache hit in which case we do not attempt to obtain the vp.
1148 if (nd
->nl_nch
.ncp
->nc_parent
) {
1150 nch
.ncp
= nch
.ncp
->nc_parent
;
1151 error
= cache_vget(&nch
, nd
->nl_cred
,
1152 LK_EXCLUSIVE
, dvpp
);
1157 if (vpp
&& nd
->nl_nch
.ncp
->nc_vp
) {
1158 error
= cache_vget(&nd
->nl_nch
, nd
->nl_cred
, LK_EXCLUSIVE
, vpp
);
1161 if (dvpp
&& *dvpp
) {
1176 objcache_put(namei_oc
, namebuf
);
1181 * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked)
1182 * - look up fsid in mount list (if not found ret error)
1183 * - get vp and export rights by calling VFS_FHTOVP()
1184 * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
1185 * - if not lockflag unlock it with vn_unlock()
1188 nfsrv_fhtovp(fhandle_t
*fhp
, int lockflag
,
1189 struct mount
**mpp
, struct vnode
**vpp
,
1190 struct ucred
*cred
, struct nfssvc_sock
*slp
, struct sockaddr
*nam
,
1191 int *rdonlyp
, int kerbflag
, int pubflag
)
1195 struct ucred
*credanon
;
1197 #ifdef MNT_EXNORESPORT /* XXX needs mountd and /etc/exports help yet */
1198 struct sockaddr_int
*saddr
;
1204 if (nfs_ispublicfh(fhp
)) {
1205 if (!pubflag
|| !nfs_pub
.np_valid
)
1207 fhp
= &nfs_pub
.np_handle
;
1210 mp
= *mpp
= vfs_getvfs(&fhp
->fh_fsid
);
1213 error
= VFS_CHECKEXP(mp
, nam
, &exflags
, &credanon
);
1216 error
= VFS_FHTOVP(mp
, NULL
, &fhp
->fh_fid
, vpp
);
1219 #ifdef MNT_EXNORESPORT
1220 if (!(exflags
& (MNT_EXNORESPORT
|MNT_EXPUBLIC
))) {
1221 saddr
= (struct sockaddr_in
*)nam
;
1222 if (saddr
->sin_family
== AF_INET
&&
1223 ntohs(saddr
->sin_port
) >= IPPORT_RESERVED
) {
1226 return (NFSERR_AUTHERR
| AUTH_TOOWEAK
);
1231 * Check/setup credentials.
1233 if (exflags
& MNT_EXKERB
) {
1237 return (NFSERR_AUTHERR
| AUTH_TOOWEAK
);
1239 } else if (kerbflag
) {
1242 return (NFSERR_AUTHERR
| AUTH_TOOWEAK
);
1243 } else if (cred
->cr_uid
== 0 || (exflags
& MNT_EXPORTANON
)) {
1244 cred
->cr_uid
= credanon
->cr_uid
;
1245 for (i
= 0; i
< credanon
->cr_ngroups
&& i
< NGROUPS
; i
++)
1246 cred
->cr_groups
[i
] = credanon
->cr_groups
[i
];
1247 cred
->cr_ngroups
= i
;
1249 if (exflags
& MNT_EXRDONLY
)
1260 * WebNFS: check if a filehandle is a public filehandle. For v3, this
1261 * means a length of 0, for v2 it means all zeroes. nfsm_srvmtofh has
1262 * transformed this to all zeroes in both cases, so check for it.
1265 nfs_ispublicfh(fhandle_t
*fhp
)
1267 char *cp
= (char *)fhp
;
1270 for (i
= 0; i
< NFSX_V3FH
; i
++)
1276 #endif /* NFS_NOSERVER */
1278 * This function compares two net addresses by family and returns TRUE
1279 * if they are the same host.
1280 * If there is any doubt, return FALSE.
1281 * The AF_INET family is handled as a special case so that address mbufs
1282 * don't need to be saved to store "struct in_addr", which is only 4 bytes.
1285 netaddr_match(int family
, union nethostaddr
*haddr
, struct sockaddr
*nam
)
1287 struct sockaddr_in
*inetaddr
;
1291 inetaddr
= (struct sockaddr_in
*)nam
;
1292 if (inetaddr
->sin_family
== AF_INET
&&
1293 inetaddr
->sin_addr
.s_addr
== haddr
->had_inetaddr
)
1302 static nfsuint64 nfs_nullcookie
= { { 0, 0 } };
1304 * This function finds the directory cookie that corresponds to the
1305 * logical byte offset given.
1308 nfs_getcookie(struct nfsnode
*np
, off_t off
, int add
)
1310 struct nfsdmap
*dp
, *dp2
;
1313 pos
= (uoff_t
)off
/ NFS_DIRBLKSIZ
;
1314 if (pos
== 0 || off
< 0) {
1317 panic("nfs getcookie add at <= 0");
1319 return (&nfs_nullcookie
);
1322 dp
= np
->n_cookies
.lh_first
;
1325 MALLOC(dp
, struct nfsdmap
*, sizeof (struct nfsdmap
),
1326 M_NFSDIROFF
, M_WAITOK
);
1327 dp
->ndm_eocookie
= 0;
1328 LIST_INSERT_HEAD(&np
->n_cookies
, dp
, ndm_list
);
1332 while (pos
>= NFSNUMCOOKIES
) {
1333 pos
-= NFSNUMCOOKIES
;
1334 if (dp
->ndm_list
.le_next
) {
1335 if (!add
&& dp
->ndm_eocookie
< NFSNUMCOOKIES
&&
1336 pos
>= dp
->ndm_eocookie
)
1338 dp
= dp
->ndm_list
.le_next
;
1340 MALLOC(dp2
, struct nfsdmap
*, sizeof (struct nfsdmap
),
1341 M_NFSDIROFF
, M_WAITOK
);
1342 dp2
->ndm_eocookie
= 0;
1343 LIST_INSERT_AFTER(dp
, dp2
, ndm_list
);
1348 if (pos
>= dp
->ndm_eocookie
) {
1350 dp
->ndm_eocookie
= pos
+ 1;
1354 return (&dp
->ndm_cookies
[pos
]);
1358 * Invalidate cached directory information, except for the actual directory
1359 * blocks (which are invalidated separately).
1360 * Done mainly to avoid the use of stale offset cookies.
1363 nfs_invaldir(struct vnode
*vp
)
1365 struct nfsnode
*np
= VTONFS(vp
);
1368 if (vp
->v_type
!= VDIR
)
1369 panic("nfs: invaldir not dir");
1371 np
->n_direofoffset
= 0;
1372 np
->n_cookieverf
.nfsuquad
[0] = 0;
1373 np
->n_cookieverf
.nfsuquad
[1] = 0;
1374 if (np
->n_cookies
.lh_first
)
1375 np
->n_cookies
.lh_first
->ndm_eocookie
= 0;
1379 * Set the v_type field for an NFS client's vnode and initialize for
1380 * buffer cache operations if necessary.
1383 nfs_setvtype(struct vnode
*vp
, enum vtype vtyp
)
1391 vinitvmio(vp
, 0); /* needs VMIO, size not yet known */
1399 * The write verifier has changed (probably due to a server reboot), so all
1400 * B_NEEDCOMMIT blocks will have to be written again. Since they are on the
1401 * dirty block list as B_DELWRI, all this takes is clearing the B_NEEDCOMMIT
1402 * and B_CLUSTEROK flags. Once done the new write verifier can be set for the
1405 * B_CLUSTEROK must be cleared along with B_NEEDCOMMIT because stage 1 data
1406 * writes are not clusterable.
1409 static int nfs_clearcommit_bp(struct buf
*bp
, void *data __unused
);
1410 static int nfs_clearcommit_callback(struct mount
*mp
, struct vnode
*vp
,
1411 void *data __unused
);
1414 nfs_clearcommit(struct mount
*mp
)
1416 vmntvnodescan(mp
, VMSC_NOWAIT
, nfs_clearcommit_callback
, NULL
, NULL
);
1420 nfs_clearcommit_callback(struct mount
*mp
, struct vnode
*vp
,
1421 void *data __unused
)
1426 lwkt_gettoken(&vlock
, &vp
->v_token
);
1427 RB_SCAN(buf_rb_tree
, &vp
->v_rbdirty_tree
, NULL
,
1428 nfs_clearcommit_bp
, NULL
);
1429 lwkt_reltoken(&vlock
);
1435 nfs_clearcommit_bp(struct buf
*bp
, void *data __unused
)
1437 if (BUF_REFCNT(bp
) == 0 &&
1438 (bp
->b_flags
& (B_DELWRI
| B_NEEDCOMMIT
))
1439 == (B_DELWRI
| B_NEEDCOMMIT
)) {
1440 bp
->b_flags
&= ~(B_NEEDCOMMIT
| B_CLUSTEROK
);
1445 #ifndef NFS_NOSERVER
1447 * Map errnos to NFS error numbers. For Version 3 also filter out error
1448 * numbers not specified for the associated procedure.
1451 nfsrv_errmap(struct nfsrv_descript
*nd
, int err
)
1453 short *defaulterrp
, *errp
;
1455 if (nd
->nd_flag
& ND_NFSV3
) {
1456 if (nd
->nd_procnum
<= NFSPROC_COMMIT
) {
1457 errp
= defaulterrp
= nfsrv_v3errmap
[nd
->nd_procnum
];
1461 else if (*errp
> err
)
1464 return ((int)*defaulterrp
);
1466 return (err
& 0xffff);
1469 return ((int)nfsrv_v2errmap
[err
- 1]);
1474 * Sort the group list in increasing numerical order.
1475 * (Insertion sort by Chris Torek, who was grossed out by the bubble sort
1476 * that used to be here.)
1479 nfsrvw_sort(gid_t
*list
, int num
)
1484 /* Insertion sort. */
1485 for (i
= 1; i
< num
; i
++) {
1487 /* find correct slot for value v, moving others up */
1488 for (j
= i
; --j
>= 0 && v
< list
[j
];)
1489 list
[j
+ 1] = list
[j
];
1495 * copy credentials making sure that the result can be compared with bcmp().
1498 nfsrv_setcred(struct ucred
*incred
, struct ucred
*outcred
)
1502 bzero((caddr_t
)outcred
, sizeof (struct ucred
));
1503 outcred
->cr_ref
= 1;
1504 outcred
->cr_uid
= incred
->cr_uid
;
1505 outcred
->cr_ngroups
= incred
->cr_ngroups
;
1506 for (i
= 0; i
< incred
->cr_ngroups
; i
++)
1507 outcred
->cr_groups
[i
] = incred
->cr_groups
[i
];
1508 nfsrvw_sort(outcred
->cr_groups
, outcred
->cr_ngroups
);
1510 #endif /* NFS_NOSERVER */