2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Rick Macklem at The University of Guelph.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95
33 * $FreeBSD: /repoman/r/ncvs/src/sys/nfsclient/nfs_subs.c,v 1.128 2004/04/14 23:23:55 peadar Exp $
37 * These functions support the macros and help fiddle mbuf chains for
38 * the nfs op functions. They do things like create the rpc header and
39 * copy data between mbuf chains and uio lists.
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/kernel.h>
46 #include <sys/mount.h>
47 #include <sys/vnode.h>
48 #include <sys/nlookup.h>
49 #include <sys/namei.h>
51 #include <sys/socket.h>
53 #include <sys/malloc.h>
54 #include <sys/sysent.h>
55 #include <sys/syscall.h>
57 #include <sys/objcache.h>
60 #include <vm/vm_object.h>
61 #include <vm/vm_extern.h>
71 #include "nfsm_subs.h"
74 #include <netinet/in.h>
76 MALLOC_DEFINE(M_NFSMOUNT
, "NFS mount", "NFS mount");
79 * Data items converted to xdr at startup, since they are constant
80 * This is kinda hokey, but may save a little time doing byte swaps
82 u_int32_t nfs_xdrneg1
;
83 u_int32_t rpc_reply
, rpc_msgdenied
, rpc_mismatch
, rpc_vers
;
84 u_int32_t rpc_auth_unix
, rpc_msgaccepted
, rpc_call
, rpc_autherr
;
85 u_int32_t rpc_auth_kerb
;
86 u_int32_t nfs_prog
, nfs_true
, nfs_false
;
88 /* And other global data */
89 static enum vtype nv2tov_type
[8]= {
90 VNON
, VREG
, VDIR
, VBLK
, VCHR
, VLNK
, VNON
, VNON
92 enum vtype nv3tov_type
[8]= {
93 VNON
, VREG
, VDIR
, VBLK
, VCHR
, VLNK
, VSOCK
, VFIFO
99 * Protect master lists only. Primary protection uses the per-mount
100 * and per nfssvc_sock tokens.
102 struct lwkt_token nfs_token
= LWKT_TOKEN_INITIALIZER(unp_token
);
104 static long nfs_pbuf_freecnt
= -1; /* start out unlimited */
106 struct nfsmount_head nfs_mountq
= TAILQ_HEAD_INITIALIZER(nfs_mountq
);
107 struct nfssvc_sockhead nfssvc_sockhead
;
108 int nfssvc_sockhead_flag
;
109 struct nfsd_head nfsd_head
;
111 struct nfs_bufq nfs_bufq
;
112 struct nqfhhashhead
*nqfhhashtbl
;
115 static int nfs_prev_nfssvc_sy_narg
;
116 static sy_call_t
*nfs_prev_nfssvc_sy_call
;
121 * Mapping of old NFS Version 2 RPC numbers to generic numbers.
123 int nfsv3_procid
[NFS_NPROCS
] = {
152 #endif /* NFS_NOSERVER */
154 * and the reverse mapping from generic to Version 2 procedure numbers
156 int nfsv2_procid
[NFS_NPROCS
] = {
187 * Maps errno values to nfs error numbers.
188 * Use NFSERR_IO as the catch all for ones not specifically defined in
191 static u_char nfsrv_v2errmap
[ELAST
] = {
192 NFSERR_PERM
, NFSERR_NOENT
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
193 NFSERR_NXIO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
194 NFSERR_IO
, NFSERR_IO
, NFSERR_ACCES
, NFSERR_IO
, NFSERR_IO
,
195 NFSERR_IO
, NFSERR_EXIST
, NFSERR_IO
, NFSERR_NODEV
, NFSERR_NOTDIR
,
196 NFSERR_ISDIR
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
197 NFSERR_IO
, NFSERR_FBIG
, NFSERR_NOSPC
, NFSERR_IO
, NFSERR_ROFS
,
198 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
199 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
200 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
201 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
202 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
203 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
204 NFSERR_IO
, NFSERR_IO
, NFSERR_NAMETOL
, NFSERR_IO
, NFSERR_IO
,
205 NFSERR_NOTEMPTY
, NFSERR_IO
, NFSERR_IO
, NFSERR_DQUOT
, NFSERR_STALE
,
206 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
207 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
208 NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
, NFSERR_IO
,
209 NFSERR_IO
/* << Last is 86 */
213 * Maps errno values to nfs error numbers.
214 * Although it is not obvious whether or not NFS clients really care if
215 * a returned error value is in the specified list for the procedure, the
216 * safest thing to do is filter them appropriately. For Version 2, the
217 * X/Open XNFS document is the only specification that defines error values
218 * for each RPC (The RFC simply lists all possible error values for all RPCs),
219 * so I have decided to not do this for Version 2.
220 * The first entry is the default error return and the rest are the valid
221 * errors for that RPC in increasing numeric order.
223 static short nfsv3err_null
[] = {
228 static short nfsv3err_getattr
[] = {
237 static short nfsv3err_setattr
[] = {
253 static short nfsv3err_lookup
[] = {
266 static short nfsv3err_access
[] = {
275 static short nfsv3err_readlink
[] = {
287 static short nfsv3err_read
[] = {
299 static short nfsv3err_write
[] = {
314 static short nfsv3err_create
[] = {
331 static short nfsv3err_mkdir
[] = {
348 static short nfsv3err_symlink
[] = {
365 static short nfsv3err_mknod
[] = {
383 static short nfsv3err_remove
[] = {
397 static short nfsv3err_rmdir
[] = {
415 static short nfsv3err_rename
[] = {
438 static short nfsv3err_link
[] = {
458 static short nfsv3err_readdir
[] = {
471 static short nfsv3err_readdirplus
[] = {
485 static short nfsv3err_fsstat
[] = {
494 static short nfsv3err_fsinfo
[] = {
502 static short nfsv3err_pathconf
[] = {
510 static short nfsv3err_commit
[] = {
519 static short *nfsrv_v3errmap
[] = {
537 nfsv3err_readdirplus
,
544 #endif /* NFS_NOSERVER */
547 extern int sys_nfssvc(struct proc
*, struct nfssvc_args
*, int *);
550 * This needs to return a monotonically increasing or close to monotonically
551 * increasing result, otherwise the write gathering queues won't work
560 return ((u_quad_t
)tv
.tv_sec
* 1000000 + (u_quad_t
)tv
.tv_usec
);
564 * Called once to initialize data structures...
567 nfs_init(struct vfsconf
*vfsp
)
569 callout_init(&nfs_timer_handle
);
570 nfsmount_objcache
= objcache_create_simple(M_NFSMOUNT
, sizeof(struct nfsmount
));
572 nfs_mount_type
= vfsp
->vfc_typenum
;
574 rpc_vers
= txdr_unsigned(RPC_VER2
);
575 rpc_call
= txdr_unsigned(RPC_CALL
);
576 rpc_reply
= txdr_unsigned(RPC_REPLY
);
577 rpc_msgdenied
= txdr_unsigned(RPC_MSGDENIED
);
578 rpc_msgaccepted
= txdr_unsigned(RPC_MSGACCEPTED
);
579 rpc_mismatch
= txdr_unsigned(RPC_MISMATCH
);
580 rpc_autherr
= txdr_unsigned(RPC_AUTHERR
);
581 rpc_auth_unix
= txdr_unsigned(RPCAUTH_UNIX
);
582 rpc_auth_kerb
= txdr_unsigned(RPCAUTH_KERB4
);
583 nfs_prog
= txdr_unsigned(NFS_PROG
);
584 nfs_true
= txdr_unsigned(TRUE
);
585 nfs_false
= txdr_unsigned(FALSE
);
586 nfs_xdrneg1
= txdr_unsigned(-1);
587 nfs_ticks
= (hz
* NFS_TICKINTVL
+ 500) / 1000;
590 nfs_nhinit(); /* Init the nfsnode table */
592 nfsrv_init(0); /* Init server data structures */
593 nfsrv_initcache(); /* Init the server request cache */
597 * Mainly for vkernel operation. If memory is severely limited
599 if (nfs_maxasyncbio
> nmbclusters
* MCLBYTES
/ NFS_MAXDATA
/ 3)
600 nfs_maxasyncbio
= nmbclusters
* MCLBYTES
/ NFS_MAXDATA
/ 3;
601 if (nfs_maxasyncbio
< 4)
605 * Initialize reply list and start timer
607 nfs_timer_callout(0);
609 nfs_prev_nfssvc_sy_narg
= sysent
[SYS_nfssvc
].sy_narg
;
610 sysent
[SYS_nfssvc
].sy_narg
= 2;
611 nfs_prev_nfssvc_sy_call
= sysent
[SYS_nfssvc
].sy_call
;
612 sysent
[SYS_nfssvc
].sy_call
= (sy_call_t
*)sys_nfssvc
;
614 nfs_pbuf_freecnt
= nswbuf
/ 2 + 1;
620 nfs_uninit(struct vfsconf
*vfsp
)
622 callout_stop(&nfs_timer_handle
);
624 sysent
[SYS_nfssvc
].sy_narg
= nfs_prev_nfssvc_sy_narg
;
625 sysent
[SYS_nfssvc
].sy_call
= nfs_prev_nfssvc_sy_call
;
630 * Attribute cache routines.
631 * nfs_loadattrcache() - loads or updates the cache contents from attributes
632 * that are on the mbuf list
633 * nfs_getattrcache() - returns valid attributes if found in cache, returns
638 * Load the attribute cache (that lives in the nfsnode entry) with
639 * the values on the mbuf list. Load *vaper with the attributes. vaper
642 * As a side effect n_mtime, which we use to determine if the file was
643 * modified by some other host, is set to the attribute timestamp and
644 * NRMODIFIED is set if the two values differ.
646 * WARNING: the mtime loaded into vaper does not necessarily represent
647 * n_mtime or n_attr.mtime due to NACC and NUPD.
650 nfs_loadattrcache(struct vnode
*vp
, struct mbuf
**mdp
, caddr_t
*dposp
,
651 struct vattr
*vaper
, int lattr_flags
)
654 struct nfs_fattr
*fp
;
664 struct timespec mtime
;
665 int v3
= NFS_ISV3(vp
);
668 t1
= (mtod(md
, caddr_t
) + md
->m_len
) - *dposp
;
669 if ((error
= nfsm_disct(mdp
, dposp
, NFSX_FATTR(v3
), t1
, &cp2
)) != 0)
671 fp
= (struct nfs_fattr
*)cp2
;
673 vtyp
= nfsv3tov_type(fp
->fa_type
);
674 vmode
= fxdr_unsigned(u_short
, fp
->fa_mode
);
675 rmajor
= (int)fxdr_unsigned(int, fp
->fa3_rdev
.specdata1
);
676 rminor
= (int)fxdr_unsigned(int, fp
->fa3_rdev
.specdata2
);
677 fxdr_nfsv3time(&fp
->fa3_mtime
, &mtime
);
679 vtyp
= nfsv2tov_type(fp
->fa_type
);
680 vmode
= fxdr_unsigned(u_short
, fp
->fa_mode
);
684 * The duplicate information returned in fa_type and fa_mode
685 * is an ambiguity in the NFS version 2 protocol.
687 * VREG should be taken literally as a regular file. If a
688 * server intents to return some type information differently
689 * in the upper bits of the mode field (e.g. for sockets, or
690 * FIFOs), NFSv2 mandates fa_type to be VNON. Anyway, we
691 * leave the examination of the mode bits even in the VREG
692 * case to avoid breakage for bogus servers, but we make sure
693 * that there are actually type bits set in the upper part of
694 * fa_mode (and failing that, trust the va_type field).
696 * NFSv3 cleared the issue, and requires fa_mode to not
697 * contain any type information (while also introduing sockets
698 * and FIFOs for fa_type).
700 if (vtyp
== VNON
|| (vtyp
== VREG
&& (vmode
& S_IFMT
) != 0))
701 vtyp
= IFTOVT(vmode
);
702 rdev
= fxdr_unsigned(int32_t, fp
->fa2_rdev
);
703 rmajor
= umajor(rdev
);
704 rminor
= uminor(rdev
);
705 fxdr_nfsv2time(&fp
->fa2_mtime
, &mtime
);
708 * Really ugly NFSv2 kludge.
710 if (vtyp
== VCHR
&& rdev
== (udev_t
)0xffffffff)
715 * If v_type == VNON it is a new node, so fill in the v_type,
716 * n_mtime fields. Check to see if it represents a special
717 * device, and if so, check for a possible alias. Once the
718 * correct vnode has been obtained, fill in the rest of the
722 if (vp
->v_type
!= vtyp
) {
723 nfs_setvtype(vp
, vtyp
);
724 if (vp
->v_type
== VFIFO
) {
725 vp
->v_ops
= &vp
->v_mount
->mnt_vn_fifo_ops
;
726 } else if (vp
->v_type
== VCHR
|| vp
->v_type
== VBLK
) {
727 vp
->v_ops
= &vp
->v_mount
->mnt_vn_spec_ops
;
728 addaliasu(vp
, rmajor
, rminor
);
730 vp
->v_ops
= &vp
->v_mount
->mnt_vn_use_ops
;
732 np
->n_mtime
= mtime
.tv_sec
;
733 } else if (np
->n_mtime
!= mtime
.tv_sec
) {
735 * If we haven't modified the file locally and the server
736 * timestamp does not match, then the server probably
737 * modified the file. We must flag this condition so
738 * the proper syncnronization can be done. We do not
739 * try to synchronize the state here because that
740 * could lead to an endless recursion.
742 * XXX loadattrcache can be set during the reply to a write,
743 * before the write timestamp is properly processed. To
744 * avoid unconditionally setting the rmodified bit (which
745 * has the effect of flushing the cache), we only do this
746 * check if the lmodified bit is not set.
748 np
->n_mtime
= mtime
.tv_sec
;
749 if ((lattr_flags
& NFS_LATTR_NOMTIMECHECK
) == 0)
750 np
->n_flag
|= NRMODIFIED
;
754 vap
->va_mode
= (vmode
& 07777);
755 vap
->va_rmajor
= rmajor
;
756 vap
->va_rminor
= rminor
;
757 vap
->va_mtime
= mtime
;
758 vap
->va_fsid
= vp
->v_mount
->mnt_stat
.f_fsid
.val
[0];
760 vap
->va_nlink
= fxdr_unsigned(u_short
, fp
->fa_nlink
);
761 vap
->va_uid
= fxdr_unsigned(uid_t
, fp
->fa_uid
);
762 vap
->va_gid
= fxdr_unsigned(gid_t
, fp
->fa_gid
);
763 vap
->va_size
= fxdr_hyper(&fp
->fa3_size
);
764 vap
->va_blocksize
= NFS_FABLKSIZE
;
765 vap
->va_bytes
= fxdr_hyper(&fp
->fa3_used
);
766 vap
->va_fileid
= fxdr_hyper(&fp
->fa3_fileid
);
767 fxdr_nfsv3time(&fp
->fa3_atime
, &vap
->va_atime
);
768 fxdr_nfsv3time(&fp
->fa3_ctime
, &vap
->va_ctime
);
772 vap
->va_nlink
= fxdr_unsigned(u_short
, fp
->fa_nlink
);
773 vap
->va_uid
= fxdr_unsigned(uid_t
, fp
->fa_uid
);
774 vap
->va_gid
= fxdr_unsigned(gid_t
, fp
->fa_gid
);
775 vap
->va_size
= fxdr_unsigned(u_int32_t
, fp
->fa2_size
);
776 vap
->va_blocksize
= fxdr_unsigned(int32_t, fp
->fa2_blocksize
);
777 vap
->va_bytes
= (u_quad_t
)fxdr_unsigned(int32_t, fp
->fa2_blocks
)
779 vap
->va_fileid
= fxdr_unsigned(int32_t, fp
->fa2_fileid
);
780 fxdr_nfsv2time(&fp
->fa2_atime
, &vap
->va_atime
);
782 vap
->va_ctime
.tv_sec
= fxdr_unsigned(u_int32_t
,
783 fp
->fa2_ctime
.nfsv2_sec
);
784 vap
->va_ctime
.tv_nsec
= 0;
785 vap
->va_gen
= fxdr_unsigned(u_int32_t
,fp
->fa2_ctime
.nfsv2_usec
);
788 np
->n_attrstamp
= time_second
;
789 if (vap
->va_size
!= np
->n_size
) {
790 if (vap
->va_type
== VREG
) {
792 * Get rid of all the junk we had before and just
793 * set NRMODIFIED if NLMODIFIED is 0. Depend on
794 * occassionally flushing our dirty buffers to
795 * clear both the NLMODIFIED and NRMODIFIED flags.
797 if ((np
->n_flag
& NLMODIFIED
) == 0)
798 np
->n_flag
|= NRMODIFIED
;
800 if ((lattr_flags
& NFS_LATTR_NOSHRINK
) &&
801 vap
->va_size
< np
->n_size
) {
803 * We've been told not to shrink the file;
804 * zero np->n_attrstamp to indicate that
805 * the attributes are stale.
807 * This occurs primarily due to recursive
808 * NFS ops that are executed during periods
809 * where we cannot safely reduce the size of
812 * Additionally, write rpcs are broken down
813 * into buffers and np->n_size is
814 * pre-extended. Setting NRMODIFIED here
815 * can result in n_size getting reset to a
816 * lower value, which is NOT what we want.
817 * XXX this needs to be cleaned up a lot
820 vap
->va_size
= np
->n_size
;
822 if ((np
->n_flag
& NLMODIFIED
) == 0)
823 np
->n_flag
|= NRMODIFIED
;
824 } else if (np
->n_flag
& NLMODIFIED
) {
826 * We've modified the file: Use the larger
827 * of our size, and the server's size. At
828 * this point the cache coherency is all
829 * shot to hell. To try to handle multiple
830 * clients appending to the file at the same
831 * time mark that the server has changed
832 * the file if the server's notion of the
833 * file size is larger then our notion.
835 * XXX this needs work.
837 if (vap
->va_size
< np
->n_size
) {
838 vap
->va_size
= np
->n_size
;
840 np
->n_size
= vap
->va_size
;
841 np
->n_flag
|= NRMODIFIED
;
845 * Someone changed the file's size on the
846 * server and there are no local changes
847 * to get in the way, set the size and mark
850 np
->n_size
= vap
->va_size
;
851 np
->n_flag
|= NRMODIFIED
;
853 nvnode_pager_setsize(vp
, np
->n_size
, XXX
);
856 np
->n_size
= vap
->va_size
;
860 bcopy((caddr_t
)vap
, (caddr_t
)vaper
, sizeof(*vap
));
861 if (np
->n_flag
& NCHG
) {
862 if (np
->n_flag
& NACC
)
863 vaper
->va_atime
= np
->n_atim
;
864 if (np
->n_flag
& NUPD
)
865 vaper
->va_mtime
= np
->n_mtim
;
872 #include <sys/sysctl.h>
873 SYSCTL_DECL(_vfs_nfs
);
874 static int nfs_acdebug
;
875 SYSCTL_INT(_vfs_nfs
, OID_AUTO
, acdebug
, CTLFLAG_RW
, &nfs_acdebug
, 0, "");
879 * Check the time stamp
880 * If the cache is valid, copy contents to *vap and return 0
881 * otherwise return an error
884 nfs_getattrcache(struct vnode
*vp
, struct vattr
*vaper
)
888 struct nfsmount
*nmp
;
893 nmp
= VFSTONFS(vp
->v_mount
);
896 * Dynamic timeout based on how recently the file was modified.
897 * n_mtime is always valid.
899 timeo
= (get_approximate_time_t() - np
->n_mtime
) / 60;
903 kprintf("nfs_getattrcache: initial timeo = %d\n", timeo
);
906 if (vap
->va_type
== VDIR
) {
907 if ((np
->n_flag
& NLMODIFIED
) || timeo
< nmp
->nm_acdirmin
)
908 timeo
= nmp
->nm_acdirmin
;
909 else if (timeo
> nmp
->nm_acdirmax
)
910 timeo
= nmp
->nm_acdirmax
;
912 if ((np
->n_flag
& NLMODIFIED
) || timeo
< nmp
->nm_acregmin
)
913 timeo
= nmp
->nm_acregmin
;
914 else if (timeo
> nmp
->nm_acregmax
)
915 timeo
= nmp
->nm_acregmax
;
920 kprintf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n",
921 nmp
->nm_acregmin
, nmp
->nm_acregmax
,
922 nmp
->nm_acdirmin
, nmp
->nm_acdirmax
);
925 kprintf("nfs_getattrcache: age = %d; final timeo = %d\n",
926 (int)(time_second
- np
->n_attrstamp
), timeo
);
929 if (np
->n_attrstamp
== 0 || (time_second
- np
->n_attrstamp
) >= timeo
) {
930 nfsstats
.attrcache_misses
++;
933 nfsstats
.attrcache_hits
++;
936 * Our attribute cache can be stale due to modifications made on
937 * this host. XXX this is a bad hack. We need a more deterministic
938 * means of finding out which np fields are valid verses attr cache
939 * fields. We really should update the vattr info on the fly when
940 * making local changes.
942 if (vap
->va_size
!= np
->n_size
) {
943 if (vap
->va_type
== VREG
) {
944 if (np
->n_flag
& NLMODIFIED
)
945 vap
->va_size
= np
->n_size
;
946 nfs_meta_setsize(vp
, curthread
, vap
->va_size
, 0);
948 np
->n_size
= vap
->va_size
;
951 bcopy((caddr_t
)vap
, (caddr_t
)vaper
, sizeof(struct vattr
));
952 if (np
->n_flag
& NCHG
) {
953 if (np
->n_flag
& NACC
)
954 vaper
->va_atime
= np
->n_atim
;
955 if (np
->n_flag
& NUPD
)
956 vaper
->va_mtime
= np
->n_mtim
;
964 * Set up nameidata for a lookup() call and do it.
966 * If pubflag is set, this call is done for a lookup operation on the
967 * public filehandle. In that case we allow crossing mountpoints and
968 * absolute pathnames. However, the caller is expected to check that
969 * the lookup result is within the public fs, and deny access if
972 * dirp may be set whether an error is returned or not, and must be
973 * released by the caller.
975 * On return nd->nl_nch usually points to the target ncp, which may represent
978 * NOTE: the caller must call nlookup_done(nd) unconditionally on return
982 nfs_namei(struct nlookupdata
*nd
, struct ucred
*cred
, int nflags
,
983 struct vnode
**dvpp
, struct vnode
**vpp
,
984 fhandle_t
*fhp
, int len
,
985 struct nfssvc_sock
*slp
, struct sockaddr
*nam
, struct mbuf
**mdp
,
986 caddr_t
*dposp
, struct vnode
**dirpp
, struct thread
*td
,
987 int kerbflag
, int pubflag
)
991 char *fromcp
, *tocp
, *cp
;
998 namebuf
= objcache_get(namei_oc
, M_WAITOK
);
1002 * Copy the name from the mbuf list to namebuf.
1007 rem
= mtod(md
, caddr_t
) + md
->m_len
- fromcp
;
1008 for (i
= 0; i
< len
; i
++) {
1015 fromcp
= mtod(md
, caddr_t
);
1018 if (*fromcp
== '\0' || (!pubflag
&& *fromcp
== '/')) {
1022 *tocp
++ = *fromcp
++;
1028 len
= nfsm_rndup(len
)-len
;
1032 else if ((error
= nfs_adv(mdp
, dposp
, len
, rem
)) != 0)
1037 * Extract and set starting directory. The returned dp is refd
1040 error
= nfsrv_fhtovp(fhp
, FALSE
, &mp
, &dp
, cred
, slp
,
1041 nam
, &rdonly
, kerbflag
, pubflag
);
1044 if (dp
->v_type
!= VDIR
) {
1051 * Set return directory. Reference to dp is implicitly transfered
1052 * to the returned pointer. This must be set before we potentially
1058 * read-only - NLC_DELETE, NLC_RENAME_DST are disallowed. NLC_CREATE
1059 * is passed through to nlookup() and will be disallowed
1060 * if the file does not already exist.
1063 nflags
|= NLC_NFS_RDONLY
;
1064 if (nflags
& (NLC_DELETE
| NLC_RENAME_DST
)) {
1071 * Oh joy. For WebNFS, handle those pesky '%' escapes,
1072 * and the 'native path' indicator.
1075 cp
= objcache_get(namei_oc
, M_WAITOK
);
1078 if ((unsigned char)*fromcp
>= WEBNFS_SPECCHAR_START
) {
1079 switch ((unsigned char)*fromcp
) {
1080 case WEBNFS_NATIVE_CHAR
:
1082 * 'Native' path for us is the same
1083 * as a path according to the NFS spec,
1084 * just skip the escape char.
1089 * More may be added in the future, range 0x80-0xff
1093 objcache_put(namei_oc
, cp
);
1098 * Translate the '%' escapes, URL-style.
1100 while (*fromcp
!= '\0') {
1101 if (*fromcp
== WEBNFS_ESC_CHAR
) {
1102 if (fromcp
[1] != '\0' && fromcp
[2] != '\0') {
1104 *tocp
++ = HEXSTRTOI(fromcp
);
1109 objcache_put(namei_oc
, cp
);
1113 *tocp
++ = *fromcp
++;
1116 objcache_put(namei_oc
, namebuf
);
1121 * Setup for search. We need to get a start directory from dp. Note
1122 * that dp is ref'd, but we no longer 'own' the ref (*dirpp owns it).
1125 nflags
|= NLC_NFS_NOSOFTLINKTRAV
;
1126 nflags
|= NLC_NOCROSSMOUNT
;
1130 * We need a starting ncp from the directory vnode dp. dp must not
1131 * be locked. The returned ncp will be refd but not locked.
1133 * If no suitable ncp is found we instruct cache_fromdvp() to create
1134 * one. If this fails the directory has probably been removed while
1135 * the target was chdir'd into it and any further lookup will fail.
1137 if ((error
= cache_fromdvp(dp
, cred
, 1, &nch
)) != 0)
1139 nlookup_init_raw(nd
, namebuf
, UIO_SYSSPACE
, nflags
, cred
, &nch
);
1143 * Ok, do the lookup.
1145 error
= nlookup(nd
);
1148 * If no error occured return the requested dvpp and vpp. If
1149 * NLC_CREATE was specified nd->nl_nch may represent a negative
1150 * cache hit in which case we do not attempt to obtain the vp.
1154 if (nd
->nl_nch
.ncp
->nc_parent
) {
1156 nch
.ncp
= nch
.ncp
->nc_parent
;
1159 error
= cache_vget(&nch
, nd
->nl_cred
,
1160 LK_EXCLUSIVE
, dvpp
);
1166 if (vpp
&& nd
->nl_nch
.ncp
->nc_vp
) {
1167 error
= cache_vget(&nd
->nl_nch
, nd
->nl_cred
, LK_EXCLUSIVE
, vpp
);
1170 if (dvpp
&& *dvpp
) {
1185 objcache_put(namei_oc
, namebuf
);
1190 * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked)
1191 * - look up fsid in mount list (if not found ret error)
1192 * - get vp and export rights by calling VFS_FHTOVP()
1193 * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
1194 * - if not lockflag unlock it with vn_unlock()
1197 nfsrv_fhtovp(fhandle_t
*fhp
, int lockflag
,
1198 struct mount
**mpp
, struct vnode
**vpp
,
1199 struct ucred
*cred
, struct nfssvc_sock
*slp
, struct sockaddr
*nam
,
1200 int *rdonlyp
, int kerbflag
, int pubflag
)
1204 struct ucred
*credanon
;
1206 #ifdef MNT_EXNORESPORT /* XXX needs mountd and /etc/exports help yet */
1207 struct sockaddr_int
*saddr
;
1213 if (nfs_ispublicfh(fhp
)) {
1214 if (!pubflag
|| !nfs_pub
.np_valid
)
1216 fhp
= &nfs_pub
.np_handle
;
1219 mp
= *mpp
= vfs_getvfs(&fhp
->fh_fsid
);
1222 error
= VFS_CHECKEXP(mp
, nam
, &exflags
, &credanon
);
1225 error
= VFS_FHTOVP(mp
, NULL
, &fhp
->fh_fid
, vpp
);
1228 #ifdef MNT_EXNORESPORT
1229 if (!(exflags
& (MNT_EXNORESPORT
|MNT_EXPUBLIC
))) {
1230 saddr
= (struct sockaddr_in
*)nam
;
1231 if (saddr
->sin_family
== AF_INET
&&
1232 ntohs(saddr
->sin_port
) >= IPPORT_RESERVED
) {
1235 return (NFSERR_AUTHERR
| AUTH_TOOWEAK
);
1240 * Check/setup credentials.
1242 if (exflags
& MNT_EXKERB
) {
1246 return (NFSERR_AUTHERR
| AUTH_TOOWEAK
);
1248 } else if (kerbflag
) {
1251 return (NFSERR_AUTHERR
| AUTH_TOOWEAK
);
1252 } else if (cred
->cr_uid
== 0 || (exflags
& MNT_EXPORTANON
)) {
1253 cred
->cr_uid
= credanon
->cr_uid
;
1254 for (i
= 0; i
< credanon
->cr_ngroups
&& i
< NGROUPS
; i
++)
1255 cred
->cr_groups
[i
] = credanon
->cr_groups
[i
];
1256 cred
->cr_ngroups
= i
;
1258 if (exflags
& MNT_EXRDONLY
)
1269 * WebNFS: check if a filehandle is a public filehandle. For v3, this
1270 * means a length of 0, for v2 it means all zeroes. nfsm_srvmtofh has
1271 * transformed this to all zeroes in both cases, so check for it.
1274 nfs_ispublicfh(fhandle_t
*fhp
)
1276 char *cp
= (char *)fhp
;
1279 for (i
= 0; i
< NFSX_V3FH
; i
++)
1285 #endif /* NFS_NOSERVER */
1287 * This function compares two net addresses by family and returns TRUE
1288 * if they are the same host.
1289 * If there is any doubt, return FALSE.
1290 * The AF_INET family is handled as a special case so that address mbufs
1291 * don't need to be saved to store "struct in_addr", which is only 4 bytes.
1294 netaddr_match(int family
, union nethostaddr
*haddr
, struct sockaddr
*nam
)
1296 struct sockaddr_in
*inetaddr
;
1300 inetaddr
= (struct sockaddr_in
*)nam
;
1301 if (inetaddr
->sin_family
== AF_INET
&&
1302 inetaddr
->sin_addr
.s_addr
== haddr
->had_inetaddr
)
1311 static nfsuint64 nfs_nullcookie
= { { 0, 0 } };
1313 * This function finds the directory cookie that corresponds to the
1314 * logical byte offset given.
1317 nfs_getcookie(struct nfsnode
*np
, off_t off
, int add
)
1319 struct nfsdmap
*dp
, *dp2
;
1322 pos
= (uoff_t
)off
/ NFS_DIRBLKSIZ
;
1323 if (pos
== 0 || off
< 0) {
1326 panic("nfs getcookie add at <= 0");
1328 return (&nfs_nullcookie
);
1331 dp
= np
->n_cookies
.lh_first
;
1334 dp
= kmalloc(sizeof(struct nfsdmap
), M_NFSDIROFF
,
1336 dp
->ndm_eocookie
= 0;
1337 LIST_INSERT_HEAD(&np
->n_cookies
, dp
, ndm_list
);
1341 while (pos
>= NFSNUMCOOKIES
) {
1342 pos
-= NFSNUMCOOKIES
;
1343 if (dp
->ndm_list
.le_next
) {
1344 if (!add
&& dp
->ndm_eocookie
< NFSNUMCOOKIES
&&
1345 pos
>= dp
->ndm_eocookie
)
1347 dp
= dp
->ndm_list
.le_next
;
1349 dp2
= kmalloc(sizeof(struct nfsdmap
), M_NFSDIROFF
,
1351 dp2
->ndm_eocookie
= 0;
1352 LIST_INSERT_AFTER(dp
, dp2
, ndm_list
);
1357 if (pos
>= dp
->ndm_eocookie
) {
1359 dp
->ndm_eocookie
= pos
+ 1;
1363 return (&dp
->ndm_cookies
[pos
]);
1367 * Invalidate cached directory information, except for the actual directory
1368 * blocks (which are invalidated separately).
1369 * Done mainly to avoid the use of stale offset cookies.
1372 nfs_invaldir(struct vnode
*vp
)
1374 struct nfsnode
*np
= VTONFS(vp
);
1377 if (vp
->v_type
!= VDIR
)
1378 panic("nfs: invaldir not dir");
1380 np
->n_direofoffset
= 0;
1381 np
->n_cookieverf
.nfsuquad
[0] = 0;
1382 np
->n_cookieverf
.nfsuquad
[1] = 0;
1383 if (np
->n_cookies
.lh_first
)
1384 np
->n_cookies
.lh_first
->ndm_eocookie
= 0;
1388 * Set the v_type field for an NFS client's vnode and initialize for
1389 * buffer cache operations if necessary.
1392 nfs_setvtype(struct vnode
*vp
, enum vtype vtyp
)
1401 * Needs VMIO, size not yet known, and blocksize
1402 * is not really relevant if we are passing a
1405 vinitvmio(vp
, 0, PAGE_SIZE
, -1);
1413 * The write verifier has changed (probably due to a server reboot), so all
1414 * B_NEEDCOMMIT blocks will have to be written again. Since they are on the
1415 * dirty block list as B_DELWRI, all this takes is clearing the B_NEEDCOMMIT
1416 * and B_CLUSTEROK flags. Once done the new write verifier can be set for the
1419 * B_CLUSTEROK must be cleared along with B_NEEDCOMMIT because stage 1 data
1420 * writes are not clusterable.
1423 static int nfs_clearcommit_bp(struct buf
*bp
, void *data __unused
);
1424 static int nfs_clearcommit_callback(struct mount
*mp
, struct vnode
*vp
,
1425 void *data __unused
);
1428 nfs_clearcommit(struct mount
*mp
)
1430 vmntvnodescan(mp
, VMSC_NOWAIT
, nfs_clearcommit_callback
, NULL
, NULL
);
1434 nfs_clearcommit_callback(struct mount
*mp
, struct vnode
*vp
,
1435 void *data __unused
)
1438 lwkt_gettoken(&vp
->v_token
);
1439 RB_SCAN(buf_rb_tree
, &vp
->v_rbdirty_tree
, NULL
,
1440 nfs_clearcommit_bp
, NULL
);
1441 lwkt_reltoken(&vp
->v_token
);
1447 nfs_clearcommit_bp(struct buf
*bp
, void *data __unused
)
1449 if (BUF_REFCNT(bp
) == 0 &&
1450 (bp
->b_flags
& (B_DELWRI
| B_NEEDCOMMIT
))
1451 == (B_DELWRI
| B_NEEDCOMMIT
)) {
1452 bp
->b_flags
&= ~(B_NEEDCOMMIT
| B_CLUSTEROK
);
1457 #ifndef NFS_NOSERVER
1459 * Map errnos to NFS error numbers. For Version 3 also filter out error
1460 * numbers not specified for the associated procedure.
1463 nfsrv_errmap(struct nfsrv_descript
*nd
, int err
)
1465 short *defaulterrp
, *errp
;
1467 if (nd
->nd_flag
& ND_NFSV3
) {
1468 if (nd
->nd_procnum
<= NFSPROC_COMMIT
) {
1469 errp
= defaulterrp
= nfsrv_v3errmap
[nd
->nd_procnum
];
1473 else if (*errp
> err
)
1476 return ((int)*defaulterrp
);
1478 return (err
& 0xffff);
1481 return ((int)nfsrv_v2errmap
[err
- 1]);
1486 * Sort the group list in increasing numerical order.
1487 * (Insertion sort by Chris Torek, who was grossed out by the bubble sort
1488 * that used to be here.)
1491 nfsrvw_sort(gid_t
*list
, int num
)
1496 /* Insertion sort. */
1497 for (i
= 1; i
< num
; i
++) {
1499 /* find correct slot for value v, moving others up */
1500 for (j
= i
; --j
>= 0 && v
< list
[j
];)
1501 list
[j
+ 1] = list
[j
];
1507 * copy credentials making sure that the result can be compared with bcmp().
1510 nfsrv_setcred(struct ucred
*incred
, struct ucred
*outcred
)
1514 bzero((caddr_t
)outcred
, sizeof (struct ucred
));
1515 outcred
->cr_ref
= 1;
1516 outcred
->cr_uid
= incred
->cr_uid
;
1517 outcred
->cr_ngroups
= incred
->cr_ngroups
;
1518 for (i
= 0; i
< incred
->cr_ngroups
; i
++)
1519 outcred
->cr_groups
[i
] = incred
->cr_groups
[i
];
1520 nfsrvw_sort(outcred
->cr_groups
, outcred
->cr_ngroups
);
1522 #endif /* NFS_NOSERVER */