2986 nfs: exi refcounter leak at rfs3_lookup
[unleashed.git] / usr / src / uts / common / fs / nfs / nfs3_srv.c
blob4acbe92ad913c46ba295936073ba6971f7937ff2
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
27 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
30 #include <sys/param.h>
31 #include <sys/types.h>
32 #include <sys/systm.h>
33 #include <sys/cred.h>
34 #include <sys/buf.h>
35 #include <sys/vfs.h>
36 #include <sys/vnode.h>
37 #include <sys/uio.h>
38 #include <sys/errno.h>
39 #include <sys/sysmacros.h>
40 #include <sys/statvfs.h>
41 #include <sys/kmem.h>
42 #include <sys/dirent.h>
43 #include <sys/cmn_err.h>
44 #include <sys/debug.h>
45 #include <sys/systeminfo.h>
46 #include <sys/flock.h>
47 #include <sys/nbmlock.h>
48 #include <sys/policy.h>
49 #include <sys/sdt.h>
51 #include <rpc/types.h>
52 #include <rpc/auth.h>
53 #include <rpc/svc.h>
54 #include <rpc/rpc_rdma.h>
56 #include <nfs/nfs.h>
57 #include <nfs/export.h>
58 #include <nfs/nfs_cmd.h>
60 #include <sys/strsubr.h>
62 #include <sys/tsol/label.h>
63 #include <sys/tsol/tndb.h>
65 #include <sys/zone.h>
67 #include <inet/ip.h>
68 #include <inet/ip6.h>
71 * These are the interface routines for the server side of the
72 * Network File System. See the NFS version 3 protocol specification
73 * for a description of this interface.
76 static writeverf3 write3verf;
78 static int sattr3_to_vattr(sattr3 *, struct vattr *);
79 static int vattr_to_fattr3(struct vattr *, fattr3 *);
80 static int vattr_to_wcc_attr(struct vattr *, wcc_attr *);
81 static void vattr_to_pre_op_attr(struct vattr *, pre_op_attr *);
82 static void vattr_to_wcc_data(struct vattr *, struct vattr *, wcc_data *);
83 static int rdma_setup_read_data3(READ3args *, READ3resok *);
85 extern int nfs_loaned_buffers;
87 u_longlong_t nfs3_srv_caller_id;
89 /* ARGSUSED */
90 void
91 rfs3_getattr(GETATTR3args *args, GETATTR3res *resp, struct exportinfo *exi,
92 struct svc_req *req, cred_t *cr)
94 int error;
95 vnode_t *vp;
96 struct vattr va;
98 vp = nfs3_fhtovp(&args->object, exi);
100 DTRACE_NFSV3_4(op__getattr__start, struct svc_req *, req,
101 cred_t *, cr, vnode_t *, vp, GETATTR3args *, args);
103 if (vp == NULL) {
104 error = ESTALE;
105 goto out;
108 va.va_mask = AT_ALL;
109 error = rfs4_delegated_getattr(vp, &va, 0, cr);
111 if (!error) {
112 /* Lie about the object type for a referral */
113 if (vn_is_nfs_reparse(vp, cr))
114 va.va_type = VLNK;
116 /* overflow error if time or size is out of range */
117 error = vattr_to_fattr3(&va, &resp->resok.obj_attributes);
118 if (error)
119 goto out;
120 resp->status = NFS3_OK;
122 DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
123 cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
125 VN_RELE(vp);
127 return;
130 out:
131 if (curthread->t_flag & T_WOULDBLOCK) {
132 curthread->t_flag &= ~T_WOULDBLOCK;
133 resp->status = NFS3ERR_JUKEBOX;
134 } else
135 resp->status = puterrno3(error);
137 DTRACE_NFSV3_4(op__getattr__done, struct svc_req *, req,
138 cred_t *, cr, vnode_t *, vp, GETATTR3res *, resp);
140 if (vp != NULL)
141 VN_RELE(vp);
144 void *
145 rfs3_getattr_getfh(GETATTR3args *args)
148 return (&args->object);
151 void
152 rfs3_setattr(SETATTR3args *args, SETATTR3res *resp, struct exportinfo *exi,
153 struct svc_req *req, cred_t *cr)
155 int error;
156 vnode_t *vp;
157 struct vattr *bvap;
158 struct vattr bva;
159 struct vattr *avap;
160 struct vattr ava;
161 int flag;
162 int in_crit = 0;
163 struct flock64 bf;
164 caller_context_t ct;
166 bvap = NULL;
167 avap = NULL;
169 vp = nfs3_fhtovp(&args->object, exi);
171 DTRACE_NFSV3_4(op__setattr__start, struct svc_req *, req,
172 cred_t *, cr, vnode_t *, vp, SETATTR3args *, args);
174 if (vp == NULL) {
175 error = ESTALE;
176 goto out;
179 error = sattr3_to_vattr(&args->new_attributes, &ava);
180 if (error)
181 goto out;
183 if (is_system_labeled()) {
184 bslabel_t *clabel = req->rq_label;
186 ASSERT(clabel != NULL);
187 DTRACE_PROBE2(tx__rfs3__log__info__opsetattr__clabel, char *,
188 "got client label from request(1)", struct svc_req *, req);
190 if (!blequal(&l_admin_low->tsl_label, clabel)) {
191 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
192 exi)) {
193 resp->status = NFS3ERR_ACCES;
194 goto out1;
200 * We need to specially handle size changes because of
201 * possible conflicting NBMAND locks. Get into critical
202 * region before VOP_GETATTR, so the size attribute is
203 * valid when checking conflicts.
205 * Also, check to see if the v4 side of the server has
206 * delegated this file. If so, then we return JUKEBOX to
207 * allow the client to retrasmit its request.
209 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
210 if (nbl_need_check(vp)) {
211 nbl_start_crit(vp, RW_READER);
212 in_crit = 1;
216 bva.va_mask = AT_ALL;
217 error = rfs4_delegated_getattr(vp, &bva, 0, cr);
220 * If we can't get the attributes, then we can't do the
221 * right access checking. So, we'll fail the request.
223 if (error)
224 goto out;
226 bvap = &bva;
228 if (rdonly(exi, req) || vn_is_readonly(vp)) {
229 resp->status = NFS3ERR_ROFS;
230 goto out1;
233 if (args->guard.check &&
234 (args->guard.obj_ctime.seconds != bva.va_ctime.tv_sec ||
235 args->guard.obj_ctime.nseconds != bva.va_ctime.tv_nsec)) {
236 resp->status = NFS3ERR_NOT_SYNC;
237 goto out1;
240 if (args->new_attributes.mtime.set_it == SET_TO_CLIENT_TIME)
241 flag = ATTR_UTIME;
242 else
243 flag = 0;
246 * If the filesystem is exported with nosuid, then mask off
247 * the setuid and setgid bits.
249 if ((ava.va_mask & AT_MODE) && vp->v_type == VREG &&
250 (exi->exi_export.ex_flags & EX_NOSUID))
251 ava.va_mode &= ~(VSUID | VSGID);
253 ct.cc_sysid = 0;
254 ct.cc_pid = 0;
255 ct.cc_caller_id = nfs3_srv_caller_id;
256 ct.cc_flags = CC_DONTBLOCK;
259 * We need to specially handle size changes because it is
260 * possible for the client to create a file with modes
261 * which indicate read-only, but with the file opened for
262 * writing. If the client then tries to set the size of
263 * the file, then the normal access checking done in
264 * VOP_SETATTR would prevent the client from doing so,
265 * although it should be legal for it to do so. To get
266 * around this, we do the access checking for ourselves
267 * and then use VOP_SPACE which doesn't do the access
268 * checking which VOP_SETATTR does. VOP_SPACE can only
269 * operate on VREG files, let VOP_SETATTR handle the other
270 * extremely rare cases.
271 * Also the client should not be allowed to change the
272 * size of the file if there is a conflicting non-blocking
273 * mandatory lock in the region the change.
275 if (vp->v_type == VREG && (ava.va_mask & AT_SIZE)) {
276 if (in_crit) {
277 u_offset_t offset;
278 ssize_t length;
280 if (ava.va_size < bva.va_size) {
281 offset = ava.va_size;
282 length = bva.va_size - ava.va_size;
283 } else {
284 offset = bva.va_size;
285 length = ava.va_size - bva.va_size;
287 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
288 NULL)) {
289 error = EACCES;
290 goto out;
294 if (crgetuid(cr) == bva.va_uid && ava.va_size != bva.va_size) {
295 ava.va_mask &= ~AT_SIZE;
296 bf.l_type = F_WRLCK;
297 bf.l_whence = 0;
298 bf.l_start = (off64_t)ava.va_size;
299 bf.l_len = 0;
300 bf.l_sysid = 0;
301 bf.l_pid = 0;
302 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
303 (offset_t)ava.va_size, cr, &ct);
307 if (!error && ava.va_mask)
308 error = VOP_SETATTR(vp, &ava, flag, cr, &ct);
310 /* check if a monitor detected a delegation conflict */
311 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
312 resp->status = NFS3ERR_JUKEBOX;
313 goto out1;
316 ava.va_mask = AT_ALL;
317 avap = rfs4_delegated_getattr(vp, &ava, 0, cr) ? NULL : &ava;
320 * Force modified metadata out to stable storage.
322 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
324 if (error)
325 goto out;
327 if (in_crit)
328 nbl_end_crit(vp);
330 resp->status = NFS3_OK;
331 vattr_to_wcc_data(bvap, avap, &resp->resok.obj_wcc);
333 DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
334 cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
336 VN_RELE(vp);
338 return;
340 out:
341 if (curthread->t_flag & T_WOULDBLOCK) {
342 curthread->t_flag &= ~T_WOULDBLOCK;
343 resp->status = NFS3ERR_JUKEBOX;
344 } else
345 resp->status = puterrno3(error);
346 out1:
347 DTRACE_NFSV3_4(op__setattr__done, struct svc_req *, req,
348 cred_t *, cr, vnode_t *, vp, SETATTR3res *, resp);
350 if (vp != NULL) {
351 if (in_crit)
352 nbl_end_crit(vp);
353 VN_RELE(vp);
355 vattr_to_wcc_data(bvap, avap, &resp->resfail.obj_wcc);
358 void *
359 rfs3_setattr_getfh(SETATTR3args *args)
362 return (&args->object);
365 /* ARGSUSED */
366 void
367 rfs3_lookup(LOOKUP3args *args, LOOKUP3res *resp, struct exportinfo *exi,
368 struct svc_req *req, cred_t *cr)
370 int error;
371 vnode_t *vp;
372 vnode_t *dvp;
373 struct vattr *vap;
374 struct vattr va;
375 struct vattr *dvap;
376 struct vattr dva;
377 nfs_fh3 *fhp;
378 struct sec_ol sec = {0, 0};
379 bool_t publicfh_flag = FALSE, auth_weak = FALSE;
380 struct sockaddr *ca;
381 char *name = NULL;
383 dvap = NULL;
386 * Allow lookups from the root - the default
387 * location of the public filehandle.
389 if (exi != NULL && (exi->exi_export.ex_flags & EX_PUBLIC)) {
390 dvp = rootdir;
391 VN_HOLD(dvp);
393 DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
394 cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
395 } else {
396 dvp = nfs3_fhtovp(&args->what.dir, exi);
398 DTRACE_NFSV3_4(op__lookup__start, struct svc_req *, req,
399 cred_t *, cr, vnode_t *, dvp, LOOKUP3args *, args);
401 if (dvp == NULL) {
402 error = ESTALE;
403 goto out;
407 dva.va_mask = AT_ALL;
408 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
410 if (args->what.name == nfs3nametoolong) {
411 resp->status = NFS3ERR_NAMETOOLONG;
412 goto out1;
415 if (args->what.name == NULL || *(args->what.name) == '\0') {
416 resp->status = NFS3ERR_ACCES;
417 goto out1;
420 fhp = &args->what.dir;
421 if (strcmp(args->what.name, "..") == 0 &&
422 EQFID(&exi->exi_fid, FH3TOFIDP(fhp))) {
423 resp->status = NFS3ERR_NOENT;
424 goto out1;
427 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
428 name = nfscmd_convname(ca, exi, args->what.name,
429 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
431 if (name == NULL) {
432 resp->status = NFS3ERR_ACCES;
433 goto out1;
436 exi_hold(exi);
439 * If the public filehandle is used then allow
440 * a multi-component lookup
442 if (PUBLIC_FH3(&args->what.dir)) {
443 struct exportinfo *new;
445 publicfh_flag = TRUE;
447 error = rfs_publicfh_mclookup(name, dvp, cr, &vp,
448 &new, &sec);
450 if (error == 0) {
451 exi_rele(exi);
452 exi = new;
456 * Since WebNFS may bypass MOUNT, we need to ensure this
457 * request didn't come from an unlabeled admin_low client.
459 if (is_system_labeled() && error == 0) {
460 int addr_type;
461 void *ipaddr;
462 tsol_tpc_t *tp;
464 if (ca->sa_family == AF_INET) {
465 addr_type = IPV4_VERSION;
466 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
467 } else if (ca->sa_family == AF_INET6) {
468 addr_type = IPV6_VERSION;
469 ipaddr = &((struct sockaddr_in6 *)
470 ca)->sin6_addr;
472 tp = find_tpc(ipaddr, addr_type, B_FALSE);
473 if (tp == NULL || tp->tpc_tp.tp_doi !=
474 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
475 SUN_CIPSO) {
476 VN_RELE(vp);
477 resp->status = NFS3ERR_ACCES;
478 error = 1;
480 if (tp != NULL)
481 TPC_RELE(tp);
483 } else {
484 error = VOP_LOOKUP(dvp, name, &vp,
485 NULL, 0, NULL, cr, NULL, NULL, NULL);
488 if (name != args->what.name)
489 kmem_free(name, MAXPATHLEN + 1);
491 if (is_system_labeled() && error == 0) {
492 bslabel_t *clabel = req->rq_label;
494 ASSERT(clabel != NULL);
495 DTRACE_PROBE2(tx__rfs3__log__info__oplookup__clabel, char *,
496 "got client label from request(1)", struct svc_req *, req);
498 if (!blequal(&l_admin_low->tsl_label, clabel)) {
499 if (!do_rfs_label_check(clabel, dvp,
500 DOMINANCE_CHECK, exi)) {
501 VN_RELE(vp);
502 resp->status = NFS3ERR_ACCES;
503 error = 1;
508 dva.va_mask = AT_ALL;
509 dvap = VOP_GETATTR(dvp, &dva, 0, cr, NULL) ? NULL : &dva;
511 if (error)
512 goto out;
514 if (sec.sec_flags & SEC_QUERY) {
515 error = makefh3_ol(&resp->resok.object, exi, sec.sec_index);
516 } else {
517 error = makefh3(&resp->resok.object, vp, exi);
518 if (!error && publicfh_flag && !chk_clnt_sec(exi, req))
519 auth_weak = TRUE;
522 if (error) {
523 VN_RELE(vp);
524 goto out;
527 va.va_mask = AT_ALL;
528 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
530 exi_rele(exi);
531 VN_RELE(vp);
533 resp->status = NFS3_OK;
534 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
535 vattr_to_post_op_attr(dvap, &resp->resok.dir_attributes);
538 * If it's public fh, no 0x81, and client's flavor is
539 * invalid, set WebNFS status to WNFSERR_CLNT_FLAVOR now.
540 * Then set RPC status to AUTH_TOOWEAK in common_dispatch.
542 if (auth_weak)
543 resp->status = (enum nfsstat3)WNFSERR_CLNT_FLAVOR;
545 DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
546 cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
547 VN_RELE(dvp);
549 return;
551 out:
553 * The passed argument exportinfo is released by the
554 * caller, common_dispatch
556 exi_rele(exi);
558 if (curthread->t_flag & T_WOULDBLOCK) {
559 curthread->t_flag &= ~T_WOULDBLOCK;
560 resp->status = NFS3ERR_JUKEBOX;
561 } else
562 resp->status = puterrno3(error);
563 out1:
564 DTRACE_NFSV3_4(op__lookup__done, struct svc_req *, req,
565 cred_t *, cr, vnode_t *, dvp, LOOKUP3res *, resp);
567 if (dvp != NULL)
568 VN_RELE(dvp);
569 vattr_to_post_op_attr(dvap, &resp->resfail.dir_attributes);
573 void *
574 rfs3_lookup_getfh(LOOKUP3args *args)
577 return (&args->what.dir);
580 /* ARGSUSED */
581 void
582 rfs3_access(ACCESS3args *args, ACCESS3res *resp, struct exportinfo *exi,
583 struct svc_req *req, cred_t *cr)
585 int error;
586 vnode_t *vp;
587 struct vattr *vap;
588 struct vattr va;
589 int checkwriteperm;
590 boolean_t dominant_label = B_FALSE;
591 boolean_t equal_label = B_FALSE;
592 boolean_t admin_low_client;
594 vap = NULL;
596 vp = nfs3_fhtovp(&args->object, exi);
598 DTRACE_NFSV3_4(op__access__start, struct svc_req *, req,
599 cred_t *, cr, vnode_t *, vp, ACCESS3args *, args);
601 if (vp == NULL) {
602 error = ESTALE;
603 goto out;
607 * If the file system is exported read only, it is not appropriate
608 * to check write permissions for regular files and directories.
609 * Special files are interpreted by the client, so the underlying
610 * permissions are sent back to the client for interpretation.
612 if (rdonly(exi, req) && (vp->v_type == VREG || vp->v_type == VDIR))
613 checkwriteperm = 0;
614 else
615 checkwriteperm = 1;
618 * We need the mode so that we can correctly determine access
619 * permissions relative to a mandatory lock file. Access to
620 * mandatory lock files is denied on the server, so it might
621 * as well be reflected to the server during the open.
623 va.va_mask = AT_MODE;
624 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
625 if (error)
626 goto out;
628 vap = &va;
630 resp->resok.access = 0;
632 if (is_system_labeled()) {
633 bslabel_t *clabel = req->rq_label;
635 ASSERT(clabel != NULL);
636 DTRACE_PROBE2(tx__rfs3__log__info__opaccess__clabel, char *,
637 "got client label from request(1)", struct svc_req *, req);
639 if (!blequal(&l_admin_low->tsl_label, clabel)) {
640 if ((equal_label = do_rfs_label_check(clabel, vp,
641 EQUALITY_CHECK, exi)) == B_FALSE) {
642 dominant_label = do_rfs_label_check(clabel,
643 vp, DOMINANCE_CHECK, exi);
644 } else
645 dominant_label = B_TRUE;
646 admin_low_client = B_FALSE;
647 } else
648 admin_low_client = B_TRUE;
651 if (args->access & ACCESS3_READ) {
652 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
653 if (error) {
654 if (curthread->t_flag & T_WOULDBLOCK)
655 goto out;
656 } else if (!MANDLOCK(vp, va.va_mode) &&
657 (!is_system_labeled() || admin_low_client ||
658 dominant_label))
659 resp->resok.access |= ACCESS3_READ;
661 if ((args->access & ACCESS3_LOOKUP) && vp->v_type == VDIR) {
662 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
663 if (error) {
664 if (curthread->t_flag & T_WOULDBLOCK)
665 goto out;
666 } else if (!is_system_labeled() || admin_low_client ||
667 dominant_label)
668 resp->resok.access |= ACCESS3_LOOKUP;
670 if (checkwriteperm &&
671 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND))) {
672 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
673 if (error) {
674 if (curthread->t_flag & T_WOULDBLOCK)
675 goto out;
676 } else if (!MANDLOCK(vp, va.va_mode) &&
677 (!is_system_labeled() || admin_low_client || equal_label)) {
678 resp->resok.access |=
679 (args->access & (ACCESS3_MODIFY|ACCESS3_EXTEND));
682 if (checkwriteperm &&
683 (args->access & ACCESS3_DELETE) && vp->v_type == VDIR) {
684 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
685 if (error) {
686 if (curthread->t_flag & T_WOULDBLOCK)
687 goto out;
688 } else if (!is_system_labeled() || admin_low_client ||
689 equal_label)
690 resp->resok.access |= ACCESS3_DELETE;
692 if (args->access & ACCESS3_EXECUTE) {
693 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
694 if (error) {
695 if (curthread->t_flag & T_WOULDBLOCK)
696 goto out;
697 } else if (!MANDLOCK(vp, va.va_mode) &&
698 (!is_system_labeled() || admin_low_client ||
699 dominant_label))
700 resp->resok.access |= ACCESS3_EXECUTE;
703 va.va_mask = AT_ALL;
704 vap = rfs4_delegated_getattr(vp, &va, 0, cr) ? NULL : &va;
706 resp->status = NFS3_OK;
707 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
709 DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
710 cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
712 VN_RELE(vp);
714 return;
716 out:
717 if (curthread->t_flag & T_WOULDBLOCK) {
718 curthread->t_flag &= ~T_WOULDBLOCK;
719 resp->status = NFS3ERR_JUKEBOX;
720 } else
721 resp->status = puterrno3(error);
722 DTRACE_NFSV3_4(op__access__done, struct svc_req *, req,
723 cred_t *, cr, vnode_t *, vp, ACCESS3res *, resp);
724 if (vp != NULL)
725 VN_RELE(vp);
726 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
729 void *
730 rfs3_access_getfh(ACCESS3args *args)
733 return (&args->object);
736 /* ARGSUSED */
737 void
738 rfs3_readlink(READLINK3args *args, READLINK3res *resp, struct exportinfo *exi,
739 struct svc_req *req, cred_t *cr)
741 int error;
742 vnode_t *vp;
743 struct vattr *vap;
744 struct vattr va;
745 struct iovec iov;
746 struct uio uio;
747 char *data;
748 struct sockaddr *ca;
749 char *name = NULL;
750 int is_referral = 0;
752 vap = NULL;
754 vp = nfs3_fhtovp(&args->symlink, exi);
756 DTRACE_NFSV3_4(op__readlink__start, struct svc_req *, req,
757 cred_t *, cr, vnode_t *, vp, READLINK3args *, args);
759 if (vp == NULL) {
760 error = ESTALE;
761 goto out;
764 va.va_mask = AT_ALL;
765 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
766 if (error)
767 goto out;
769 vap = &va;
771 /* We lied about the object type for a referral */
772 if (vn_is_nfs_reparse(vp, cr))
773 is_referral = 1;
775 if (vp->v_type != VLNK && !is_referral) {
776 resp->status = NFS3ERR_INVAL;
777 goto out1;
780 if (MANDLOCK(vp, va.va_mode)) {
781 resp->status = NFS3ERR_ACCES;
782 goto out1;
785 if (is_system_labeled()) {
786 bslabel_t *clabel = req->rq_label;
788 ASSERT(clabel != NULL);
789 DTRACE_PROBE2(tx__rfs3__log__info__opreadlink__clabel, char *,
790 "got client label from request(1)", struct svc_req *, req);
792 if (!blequal(&l_admin_low->tsl_label, clabel)) {
793 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
794 exi)) {
795 resp->status = NFS3ERR_ACCES;
796 goto out1;
801 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
803 if (is_referral) {
804 char *s;
805 size_t strsz;
807 /* Get an artificial symlink based on a referral */
808 s = build_symlink(vp, cr, &strsz);
809 global_svstat_ptr[3][NFS_REFERLINKS].value.ui64++;
810 DTRACE_PROBE2(nfs3serv__func__referral__reflink,
811 vnode_t *, vp, char *, s);
812 if (s == NULL)
813 error = EINVAL;
814 else {
815 error = 0;
816 (void) strlcpy(data, s, MAXPATHLEN + 1);
817 kmem_free(s, strsz);
820 } else {
822 iov.iov_base = data;
823 iov.iov_len = MAXPATHLEN;
824 uio.uio_iov = &iov;
825 uio.uio_iovcnt = 1;
826 uio.uio_segflg = UIO_SYSSPACE;
827 uio.uio_extflg = UIO_COPY_CACHED;
828 uio.uio_loffset = 0;
829 uio.uio_resid = MAXPATHLEN;
831 error = VOP_READLINK(vp, &uio, cr, NULL);
833 if (!error)
834 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
837 va.va_mask = AT_ALL;
838 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
840 /* Lie about object type again just to be consistent */
841 if (is_referral && vap != NULL)
842 vap->va_type = VLNK;
844 #if 0 /* notyet */
846 * Don't do this. It causes local disk writes when just
847 * reading the file and the overhead is deemed larger
848 * than the benefit.
851 * Force modified metadata out to stable storage.
853 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
854 #endif
856 if (error) {
857 kmem_free(data, MAXPATHLEN + 1);
858 goto out;
861 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
862 name = nfscmd_convname(ca, exi, data, NFSCMD_CONV_OUTBOUND,
863 MAXPATHLEN + 1);
865 if (name == NULL) {
867 * Even though the conversion failed, we return
868 * something. We just don't translate it.
870 name = data;
873 resp->status = NFS3_OK;
874 vattr_to_post_op_attr(vap, &resp->resok.symlink_attributes);
875 resp->resok.data = name;
877 DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
878 cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
879 VN_RELE(vp);
881 if (name != data)
882 kmem_free(data, MAXPATHLEN + 1);
884 return;
886 out:
887 if (curthread->t_flag & T_WOULDBLOCK) {
888 curthread->t_flag &= ~T_WOULDBLOCK;
889 resp->status = NFS3ERR_JUKEBOX;
890 } else
891 resp->status = puterrno3(error);
892 out1:
893 DTRACE_NFSV3_4(op__readlink__done, struct svc_req *, req,
894 cred_t *, cr, vnode_t *, vp, READLINK3res *, resp);
895 if (vp != NULL)
896 VN_RELE(vp);
897 vattr_to_post_op_attr(vap, &resp->resfail.symlink_attributes);
900 void *
901 rfs3_readlink_getfh(READLINK3args *args)
904 return (&args->symlink);
907 void
908 rfs3_readlink_free(READLINK3res *resp)
911 if (resp->status == NFS3_OK)
912 kmem_free(resp->resok.data, MAXPATHLEN + 1);
916 * Server routine to handle read
917 * May handle RDMA data as well as mblks
919 /* ARGSUSED */
920 void
921 rfs3_read(READ3args *args, READ3res *resp, struct exportinfo *exi,
922 struct svc_req *req, cred_t *cr)
924 int error;
925 vnode_t *vp;
926 struct vattr *vap;
927 struct vattr va;
928 struct iovec iov;
929 struct uio uio;
930 u_offset_t offset;
931 mblk_t *mp = NULL;
932 int alloc_err = 0;
933 int in_crit = 0;
934 int need_rwunlock = 0;
935 caller_context_t ct;
936 int rdma_used = 0;
937 int loaned_buffers;
938 struct uio *uiop;
940 vap = NULL;
942 vp = nfs3_fhtovp(&args->file, exi);
944 DTRACE_NFSV3_4(op__read__start, struct svc_req *, req,
945 cred_t *, cr, vnode_t *, vp, READ3args *, args);
947 if (vp == NULL) {
948 error = ESTALE;
949 goto out;
952 if (args->wlist) {
953 if (args->count > clist_len(args->wlist)) {
954 error = EINVAL;
955 goto out;
957 rdma_used = 1;
960 /* use loaned buffers for TCP */
961 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
963 if (is_system_labeled()) {
964 bslabel_t *clabel = req->rq_label;
966 ASSERT(clabel != NULL);
967 DTRACE_PROBE2(tx__rfs3__log__info__opread__clabel, char *,
968 "got client label from request(1)", struct svc_req *, req);
970 if (!blequal(&l_admin_low->tsl_label, clabel)) {
971 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
972 exi)) {
973 resp->status = NFS3ERR_ACCES;
974 goto out1;
979 ct.cc_sysid = 0;
980 ct.cc_pid = 0;
981 ct.cc_caller_id = nfs3_srv_caller_id;
982 ct.cc_flags = CC_DONTBLOCK;
985 * Enter the critical region before calling VOP_RWLOCK
986 * to avoid a deadlock with write requests.
988 if (nbl_need_check(vp)) {
989 nbl_start_crit(vp, RW_READER);
990 in_crit = 1;
991 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
992 NULL)) {
993 error = EACCES;
994 goto out;
998 error = VOP_RWLOCK(vp, V_WRITELOCK_FALSE, &ct);
1000 /* check if a monitor detected a delegation conflict */
1001 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1002 resp->status = NFS3ERR_JUKEBOX;
1003 goto out1;
1006 need_rwunlock = 1;
1008 va.va_mask = AT_ALL;
1009 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1012 * If we can't get the attributes, then we can't do the
1013 * right access checking. So, we'll fail the request.
1015 if (error)
1016 goto out;
1018 vap = &va;
1020 if (vp->v_type != VREG) {
1021 resp->status = NFS3ERR_INVAL;
1022 goto out1;
1025 if (crgetuid(cr) != va.va_uid) {
1026 error = VOP_ACCESS(vp, VREAD, 0, cr, &ct);
1027 if (error) {
1028 if (curthread->t_flag & T_WOULDBLOCK)
1029 goto out;
1030 error = VOP_ACCESS(vp, VEXEC, 0, cr, &ct);
1031 if (error)
1032 goto out;
1036 if (MANDLOCK(vp, va.va_mode)) {
1037 resp->status = NFS3ERR_ACCES;
1038 goto out1;
1041 offset = args->offset;
1042 if (offset >= va.va_size) {
1043 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1044 if (in_crit)
1045 nbl_end_crit(vp);
1046 resp->status = NFS3_OK;
1047 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1048 resp->resok.count = 0;
1049 resp->resok.eof = TRUE;
1050 resp->resok.data.data_len = 0;
1051 resp->resok.data.data_val = NULL;
1052 resp->resok.data.mp = NULL;
1053 /* RDMA */
1054 resp->resok.wlist = args->wlist;
1055 resp->resok.wlist_len = resp->resok.count;
1056 if (resp->resok.wlist)
1057 clist_zero_len(resp->resok.wlist);
1058 goto done;
1061 if (args->count == 0) {
1062 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1063 if (in_crit)
1064 nbl_end_crit(vp);
1065 resp->status = NFS3_OK;
1066 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1067 resp->resok.count = 0;
1068 resp->resok.eof = FALSE;
1069 resp->resok.data.data_len = 0;
1070 resp->resok.data.data_val = NULL;
1071 resp->resok.data.mp = NULL;
1072 /* RDMA */
1073 resp->resok.wlist = args->wlist;
1074 resp->resok.wlist_len = resp->resok.count;
1075 if (resp->resok.wlist)
1076 clist_zero_len(resp->resok.wlist);
1077 goto done;
1081 * do not allocate memory more the max. allowed
1082 * transfer size
1084 if (args->count > rfs3_tsize(req))
1085 args->count = rfs3_tsize(req);
1087 if (loaned_buffers) {
1088 uiop = (uio_t *)rfs_setup_xuio(vp);
1089 ASSERT(uiop != NULL);
1090 uiop->uio_segflg = UIO_SYSSPACE;
1091 uiop->uio_loffset = args->offset;
1092 uiop->uio_resid = args->count;
1094 /* Jump to do the read if successful */
1095 if (VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cr, &ct) == 0) {
1097 * Need to hold the vnode until after VOP_RETZCBUF()
1098 * is called.
1100 VN_HOLD(vp);
1101 goto doio_read;
1104 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
1105 uiop->uio_loffset, int, uiop->uio_resid);
1107 uiop->uio_extflg = 0;
1108 /* failure to setup for zero copy */
1109 rfs_free_xuio((void *)uiop);
1110 loaned_buffers = 0;
1114 * If returning data via RDMA Write, then grab the chunk list.
1115 * If we aren't returning READ data w/RDMA_WRITE, then grab
1116 * a mblk.
1118 if (rdma_used) {
1119 (void) rdma_get_wchunk(req, &iov, args->wlist);
1120 } else {
1122 * mp will contain the data to be sent out in the read reply.
1123 * This will be freed after the reply has been sent out (by the
1124 * driver).
1125 * Let's roundup the data to a BYTES_PER_XDR_UNIT multiple, so
1126 * that the call to xdrmblk_putmblk() never fails.
1128 mp = allocb_wait(RNDUP(args->count), BPRI_MED, STR_NOSIG,
1129 &alloc_err);
1130 ASSERT(mp != NULL);
1131 ASSERT(alloc_err == 0);
1133 iov.iov_base = (caddr_t)mp->b_datap->db_base;
1134 iov.iov_len = args->count;
1137 uio.uio_iov = &iov;
1138 uio.uio_iovcnt = 1;
1139 uio.uio_segflg = UIO_SYSSPACE;
1140 uio.uio_extflg = UIO_COPY_CACHED;
1141 uio.uio_loffset = args->offset;
1142 uio.uio_resid = args->count;
1143 uiop = &uio;
1145 doio_read:
1146 error = VOP_READ(vp, uiop, 0, cr, &ct);
1148 if (error) {
1149 if (mp)
1150 freemsg(mp);
1151 /* check if a monitor detected a delegation conflict */
1152 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1153 resp->status = NFS3ERR_JUKEBOX;
1154 goto out1;
1156 goto out;
1159 /* make mblk using zc buffers */
1160 if (loaned_buffers) {
1161 mp = uio_to_mblk(uiop);
1162 ASSERT(mp != NULL);
1165 va.va_mask = AT_ALL;
1166 error = VOP_GETATTR(vp, &va, 0, cr, &ct);
1168 if (error)
1169 vap = NULL;
1170 else
1171 vap = &va;
1173 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1175 if (in_crit)
1176 nbl_end_crit(vp);
1178 resp->status = NFS3_OK;
1179 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
1180 resp->resok.count = args->count - uiop->uio_resid;
1181 if (!error && offset + resp->resok.count == va.va_size)
1182 resp->resok.eof = TRUE;
1183 else
1184 resp->resok.eof = FALSE;
1185 resp->resok.data.data_len = resp->resok.count;
1187 if (mp)
1188 rfs_rndup_mblks(mp, resp->resok.count, loaned_buffers);
1190 resp->resok.data.mp = mp;
1191 resp->resok.size = (uint_t)args->count;
1193 if (rdma_used) {
1194 resp->resok.data.data_val = (caddr_t)iov.iov_base;
1195 if (!rdma_setup_read_data3(args, &(resp->resok))) {
1196 resp->status = NFS3ERR_INVAL;
1198 } else {
1199 resp->resok.data.data_val = (caddr_t)mp->b_datap->db_base;
1200 (resp->resok).wlist = NULL;
1203 done:
1204 DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1205 cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1207 VN_RELE(vp);
1209 return;
1211 out:
1212 if (curthread->t_flag & T_WOULDBLOCK) {
1213 curthread->t_flag &= ~T_WOULDBLOCK;
1214 resp->status = NFS3ERR_JUKEBOX;
1215 } else
1216 resp->status = puterrno3(error);
1217 out1:
1218 DTRACE_NFSV3_4(op__read__done, struct svc_req *, req,
1219 cred_t *, cr, vnode_t *, vp, READ3res *, resp);
1221 if (vp != NULL) {
1222 if (need_rwunlock)
1223 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, &ct);
1224 if (in_crit)
1225 nbl_end_crit(vp);
1226 VN_RELE(vp);
1228 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
1231 void
1232 rfs3_read_free(READ3res *resp)
1234 mblk_t *mp;
1236 if (resp->status == NFS3_OK) {
1237 mp = resp->resok.data.mp;
1238 if (mp != NULL)
1239 freemsg(mp);
1243 void *
1244 rfs3_read_getfh(READ3args *args)
1247 return (&args->file);
1250 #define MAX_IOVECS 12
1252 #ifdef DEBUG
1253 static int rfs3_write_hits = 0;
1254 static int rfs3_write_misses = 0;
1255 #endif
1257 void
1258 rfs3_write(WRITE3args *args, WRITE3res *resp, struct exportinfo *exi,
1259 struct svc_req *req, cred_t *cr)
1261 int error;
1262 vnode_t *vp;
1263 struct vattr *bvap = NULL;
1264 struct vattr bva;
1265 struct vattr *avap = NULL;
1266 struct vattr ava;
1267 u_offset_t rlimit;
1268 struct uio uio;
1269 struct iovec iov[MAX_IOVECS];
1270 mblk_t *m;
1271 struct iovec *iovp;
1272 int iovcnt;
1273 int ioflag;
1274 cred_t *savecred;
1275 int in_crit = 0;
1276 int rwlock_ret = -1;
1277 caller_context_t ct;
1279 vp = nfs3_fhtovp(&args->file, exi);
1281 DTRACE_NFSV3_4(op__write__start, struct svc_req *, req,
1282 cred_t *, cr, vnode_t *, vp, WRITE3args *, args);
1284 if (vp == NULL) {
1285 error = ESTALE;
1286 goto err;
1289 if (is_system_labeled()) {
1290 bslabel_t *clabel = req->rq_label;
1292 ASSERT(clabel != NULL);
1293 DTRACE_PROBE2(tx__rfs3__log__info__opwrite__clabel, char *,
1294 "got client label from request(1)", struct svc_req *, req);
1296 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1297 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
1298 exi)) {
1299 resp->status = NFS3ERR_ACCES;
1300 goto err1;
1305 ct.cc_sysid = 0;
1306 ct.cc_pid = 0;
1307 ct.cc_caller_id = nfs3_srv_caller_id;
1308 ct.cc_flags = CC_DONTBLOCK;
1311 * We have to enter the critical region before calling VOP_RWLOCK
1312 * to avoid a deadlock with ufs.
1314 if (nbl_need_check(vp)) {
1315 nbl_start_crit(vp, RW_READER);
1316 in_crit = 1;
1317 if (nbl_conflict(vp, NBL_WRITE, args->offset, args->count, 0,
1318 NULL)) {
1319 error = EACCES;
1320 goto err;
1324 rwlock_ret = VOP_RWLOCK(vp, V_WRITELOCK_TRUE, &ct);
1326 /* check if a monitor detected a delegation conflict */
1327 if (rwlock_ret == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1328 resp->status = NFS3ERR_JUKEBOX;
1329 rwlock_ret = -1;
1330 goto err1;
1334 bva.va_mask = AT_ALL;
1335 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
1338 * If we can't get the attributes, then we can't do the
1339 * right access checking. So, we'll fail the request.
1341 if (error)
1342 goto err;
1344 bvap = &bva;
1345 avap = bvap;
1347 if (args->count != args->data.data_len) {
1348 resp->status = NFS3ERR_INVAL;
1349 goto err1;
1352 if (rdonly(exi, req)) {
1353 resp->status = NFS3ERR_ROFS;
1354 goto err1;
1357 if (vp->v_type != VREG) {
1358 resp->status = NFS3ERR_INVAL;
1359 goto err1;
1362 if (crgetuid(cr) != bva.va_uid &&
1363 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct)))
1364 goto err;
1366 if (MANDLOCK(vp, bva.va_mode)) {
1367 resp->status = NFS3ERR_ACCES;
1368 goto err1;
1371 if (args->count == 0) {
1372 resp->status = NFS3_OK;
1373 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1374 resp->resok.count = 0;
1375 resp->resok.committed = args->stable;
1376 resp->resok.verf = write3verf;
1377 goto out;
1380 if (args->mblk != NULL) {
1381 iovcnt = 0;
1382 for (m = args->mblk; m != NULL; m = m->b_cont)
1383 iovcnt++;
1384 if (iovcnt <= MAX_IOVECS) {
1385 #ifdef DEBUG
1386 rfs3_write_hits++;
1387 #endif
1388 iovp = iov;
1389 } else {
1390 #ifdef DEBUG
1391 rfs3_write_misses++;
1392 #endif
1393 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
1395 mblk_to_iov(args->mblk, iovcnt, iovp);
1397 } else if (args->rlist != NULL) {
1398 iovcnt = 1;
1399 iovp = iov;
1400 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
1401 iovp->iov_len = args->count;
1402 } else {
1403 iovcnt = 1;
1404 iovp = iov;
1405 iovp->iov_base = args->data.data_val;
1406 iovp->iov_len = args->count;
1409 uio.uio_iov = iovp;
1410 uio.uio_iovcnt = iovcnt;
1412 uio.uio_segflg = UIO_SYSSPACE;
1413 uio.uio_extflg = UIO_COPY_DEFAULT;
1414 uio.uio_loffset = args->offset;
1415 uio.uio_resid = args->count;
1416 uio.uio_llimit = curproc->p_fsz_ctl;
1417 rlimit = uio.uio_llimit - args->offset;
1418 if (rlimit < (u_offset_t)uio.uio_resid)
1419 uio.uio_resid = (int)rlimit;
1421 if (args->stable == UNSTABLE)
1422 ioflag = 0;
1423 else if (args->stable == FILE_SYNC)
1424 ioflag = FSYNC;
1425 else if (args->stable == DATA_SYNC)
1426 ioflag = FDSYNC;
1427 else {
1428 if (iovp != iov)
1429 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1430 resp->status = NFS3ERR_INVAL;
1431 goto err1;
1435 * We're changing creds because VM may fault and we need
1436 * the cred of the current thread to be used if quota
1437 * checking is enabled.
1439 savecred = curthread->t_cred;
1440 curthread->t_cred = cr;
1441 error = VOP_WRITE(vp, &uio, ioflag, cr, &ct);
1442 curthread->t_cred = savecred;
1444 if (iovp != iov)
1445 kmem_free(iovp, sizeof (*iovp) * iovcnt);
1447 /* check if a monitor detected a delegation conflict */
1448 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK)) {
1449 resp->status = NFS3ERR_JUKEBOX;
1450 goto err1;
1453 ava.va_mask = AT_ALL;
1454 avap = VOP_GETATTR(vp, &ava, 0, cr, &ct) ? NULL : &ava;
1456 if (error)
1457 goto err;
1460 * If we were unable to get the V_WRITELOCK_TRUE, then we
1461 * may not have accurate after attrs, so check if
1462 * we have both attributes, they have a non-zero va_seq, and
1463 * va_seq has changed by exactly one,
1464 * if not, turn off the before attr.
1466 if (rwlock_ret != V_WRITELOCK_TRUE) {
1467 if (bvap == NULL || avap == NULL ||
1468 bvap->va_seq == 0 || avap->va_seq == 0 ||
1469 avap->va_seq != (bvap->va_seq + 1)) {
1470 bvap = NULL;
1474 resp->status = NFS3_OK;
1475 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
1476 resp->resok.count = args->count - uio.uio_resid;
1477 resp->resok.committed = args->stable;
1478 resp->resok.verf = write3verf;
1479 goto out;
1481 err:
1482 if (curthread->t_flag & T_WOULDBLOCK) {
1483 curthread->t_flag &= ~T_WOULDBLOCK;
1484 resp->status = NFS3ERR_JUKEBOX;
1485 } else
1486 resp->status = puterrno3(error);
1487 err1:
1488 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
1489 out:
1490 DTRACE_NFSV3_4(op__write__done, struct svc_req *, req,
1491 cred_t *, cr, vnode_t *, vp, WRITE3res *, resp);
1493 if (vp != NULL) {
1494 if (rwlock_ret != -1)
1495 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, &ct);
1496 if (in_crit)
1497 nbl_end_crit(vp);
1498 VN_RELE(vp);
1502 void *
1503 rfs3_write_getfh(WRITE3args *args)
1506 return (&args->file);
1509 void
1510 rfs3_create(CREATE3args *args, CREATE3res *resp, struct exportinfo *exi,
1511 struct svc_req *req, cred_t *cr)
1513 int error;
1514 int in_crit = 0;
1515 vnode_t *vp;
1516 vnode_t *tvp = NULL;
1517 vnode_t *dvp;
1518 struct vattr *vap;
1519 struct vattr va;
1520 struct vattr *dbvap;
1521 struct vattr dbva;
1522 struct vattr *davap;
1523 struct vattr dava;
1524 enum vcexcl excl;
1525 nfstime3 *mtime;
1526 len_t reqsize;
1527 bool_t trunc;
1528 struct sockaddr *ca;
1529 char *name = NULL;
1531 dbvap = NULL;
1532 davap = NULL;
1534 dvp = nfs3_fhtovp(&args->where.dir, exi);
1536 DTRACE_NFSV3_4(op__create__start, struct svc_req *, req,
1537 cred_t *, cr, vnode_t *, dvp, CREATE3args *, args);
1539 if (dvp == NULL) {
1540 error = ESTALE;
1541 goto out;
1544 dbva.va_mask = AT_ALL;
1545 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1546 davap = dbvap;
1548 if (args->where.name == nfs3nametoolong) {
1549 resp->status = NFS3ERR_NAMETOOLONG;
1550 goto out1;
1553 if (args->where.name == NULL || *(args->where.name) == '\0') {
1554 resp->status = NFS3ERR_ACCES;
1555 goto out1;
1558 if (rdonly(exi, req)) {
1559 resp->status = NFS3ERR_ROFS;
1560 goto out1;
1563 if (is_system_labeled()) {
1564 bslabel_t *clabel = req->rq_label;
1566 ASSERT(clabel != NULL);
1567 DTRACE_PROBE2(tx__rfs3__log__info__opcreate__clabel, char *,
1568 "got client label from request(1)", struct svc_req *, req);
1570 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1571 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1572 exi)) {
1573 resp->status = NFS3ERR_ACCES;
1574 goto out1;
1579 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1580 name = nfscmd_convname(ca, exi, args->where.name,
1581 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1583 if (name == NULL) {
1584 /* This is really a Solaris EILSEQ */
1585 resp->status = NFS3ERR_INVAL;
1586 goto out1;
1589 if (args->how.mode == EXCLUSIVE) {
1590 va.va_mask = AT_TYPE | AT_MODE | AT_MTIME;
1591 va.va_type = VREG;
1592 va.va_mode = (mode_t)0;
1594 * Ensure no time overflows and that types match
1596 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1597 va.va_mtime.tv_sec = mtime->seconds % INT32_MAX;
1598 va.va_mtime.tv_nsec = mtime->nseconds;
1599 excl = EXCL;
1600 } else {
1601 error = sattr3_to_vattr(&args->how.createhow3_u.obj_attributes,
1602 &va);
1603 if (error)
1604 goto out;
1605 va.va_mask |= AT_TYPE;
1606 va.va_type = VREG;
1607 if (args->how.mode == GUARDED)
1608 excl = EXCL;
1609 else {
1610 excl = NONEXCL;
1613 * During creation of file in non-exclusive mode
1614 * if size of file is being set then make sure
1615 * that if the file already exists that no conflicting
1616 * non-blocking mandatory locks exists in the region
1617 * being modified. If there are conflicting locks fail
1618 * the operation with EACCES.
1620 if (va.va_mask & AT_SIZE) {
1621 struct vattr tva;
1624 * Does file already exist?
1626 error = VOP_LOOKUP(dvp, name, &tvp,
1627 NULL, 0, NULL, cr, NULL, NULL, NULL);
1630 * Check to see if the file has been delegated
1631 * to a v4 client. If so, then begin recall of
1632 * the delegation and return JUKEBOX to allow
1633 * the client to retrasmit its request.
1636 trunc = va.va_size == 0;
1637 if (!error &&
1638 rfs4_check_delegated(FWRITE, tvp, trunc)) {
1639 resp->status = NFS3ERR_JUKEBOX;
1640 goto out1;
1644 * Check for NBMAND lock conflicts
1646 if (!error && nbl_need_check(tvp)) {
1647 u_offset_t offset;
1648 ssize_t len;
1650 nbl_start_crit(tvp, RW_READER);
1651 in_crit = 1;
1653 tva.va_mask = AT_SIZE;
1654 error = VOP_GETATTR(tvp, &tva, 0, cr,
1655 NULL);
1657 * Can't check for conflicts, so return
1658 * error.
1660 if (error)
1661 goto out;
1663 offset = tva.va_size < va.va_size ?
1664 tva.va_size : va.va_size;
1665 len = tva.va_size < va.va_size ?
1666 va.va_size - tva.va_size :
1667 tva.va_size - va.va_size;
1668 if (nbl_conflict(tvp, NBL_WRITE,
1669 offset, len, 0, NULL)) {
1670 error = EACCES;
1671 goto out;
1673 } else if (tvp) {
1674 VN_RELE(tvp);
1675 tvp = NULL;
1679 if (va.va_mask & AT_SIZE)
1680 reqsize = va.va_size;
1684 * Must specify the mode.
1686 if (!(va.va_mask & AT_MODE)) {
1687 resp->status = NFS3ERR_INVAL;
1688 goto out1;
1692 * If the filesystem is exported with nosuid, then mask off
1693 * the setuid and setgid bits.
1695 if (va.va_type == VREG && (exi->exi_export.ex_flags & EX_NOSUID))
1696 va.va_mode &= ~(VSUID | VSGID);
1698 tryagain:
1700 * The file open mode used is VWRITE. If the client needs
1701 * some other semantic, then it should do the access checking
1702 * itself. It would have been nice to have the file open mode
1703 * passed as part of the arguments.
1705 error = VOP_CREATE(dvp, name, &va, excl, VWRITE,
1706 &vp, cr, 0, NULL, NULL);
1708 dava.va_mask = AT_ALL;
1709 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1711 if (error) {
1713 * If we got something other than file already exists
1714 * then just return this error. Otherwise, we got
1715 * EEXIST. If we were doing a GUARDED create, then
1716 * just return this error. Otherwise, we need to
1717 * make sure that this wasn't a duplicate of an
1718 * exclusive create request.
1720 * The assumption is made that a non-exclusive create
1721 * request will never return EEXIST.
1723 if (error != EEXIST || args->how.mode == GUARDED)
1724 goto out;
1726 * Lookup the file so that we can get a vnode for it.
1728 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0,
1729 NULL, cr, NULL, NULL, NULL);
1730 if (error) {
1732 * We couldn't find the file that we thought that
1733 * we just created. So, we'll just try creating
1734 * it again.
1736 if (error == ENOENT)
1737 goto tryagain;
1738 goto out;
1742 * If the file is delegated to a v4 client, go ahead
1743 * and initiate recall, this create is a hint that a
1744 * conflicting v3 open has occurred.
1747 if (rfs4_check_delegated(FWRITE, vp, FALSE)) {
1748 VN_RELE(vp);
1749 resp->status = NFS3ERR_JUKEBOX;
1750 goto out1;
1753 va.va_mask = AT_ALL;
1754 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1756 mtime = (nfstime3 *)&args->how.createhow3_u.verf;
1757 /* % with INT32_MAX to prevent overflows */
1758 if (args->how.mode == EXCLUSIVE && (vap == NULL ||
1759 vap->va_mtime.tv_sec !=
1760 (mtime->seconds % INT32_MAX) ||
1761 vap->va_mtime.tv_nsec != mtime->nseconds)) {
1762 VN_RELE(vp);
1763 error = EEXIST;
1764 goto out;
1766 } else {
1768 if ((args->how.mode == UNCHECKED ||
1769 args->how.mode == GUARDED) &&
1770 args->how.createhow3_u.obj_attributes.size.set_it &&
1771 va.va_size == 0)
1772 trunc = TRUE;
1773 else
1774 trunc = FALSE;
1776 if (rfs4_check_delegated(FWRITE, vp, trunc)) {
1777 VN_RELE(vp);
1778 resp->status = NFS3ERR_JUKEBOX;
1779 goto out1;
1782 va.va_mask = AT_ALL;
1783 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1786 * We need to check to make sure that the file got
1787 * created to the indicated size. If not, we do a
1788 * setattr to try to change the size, but we don't
1789 * try too hard. This shouldn't a problem as most
1790 * clients will only specifiy a size of zero which
1791 * local file systems handle. However, even if
1792 * the client does specify a non-zero size, it can
1793 * still recover by checking the size of the file
1794 * after it has created it and then issue a setattr
1795 * request of its own to set the size of the file.
1797 if (vap != NULL &&
1798 (args->how.mode == UNCHECKED ||
1799 args->how.mode == GUARDED) &&
1800 args->how.createhow3_u.obj_attributes.size.set_it &&
1801 vap->va_size != reqsize) {
1802 va.va_mask = AT_SIZE;
1803 va.va_size = reqsize;
1804 (void) VOP_SETATTR(vp, &va, 0, cr, NULL);
1805 va.va_mask = AT_ALL;
1806 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1810 if (name != args->where.name)
1811 kmem_free(name, MAXPATHLEN + 1);
1813 error = makefh3(&resp->resok.obj.handle, vp, exi);
1814 if (error)
1815 resp->resok.obj.handle_follows = FALSE;
1816 else
1817 resp->resok.obj.handle_follows = TRUE;
1820 * Force modified data and metadata out to stable storage.
1822 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
1823 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1825 VN_RELE(vp);
1826 if (tvp != NULL) {
1827 if (in_crit)
1828 nbl_end_crit(tvp);
1829 VN_RELE(tvp);
1832 resp->status = NFS3_OK;
1833 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1834 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1836 DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1837 cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1839 VN_RELE(dvp);
1840 return;
1842 out:
1843 if (curthread->t_flag & T_WOULDBLOCK) {
1844 curthread->t_flag &= ~T_WOULDBLOCK;
1845 resp->status = NFS3ERR_JUKEBOX;
1846 } else
1847 resp->status = puterrno3(error);
1848 out1:
1849 DTRACE_NFSV3_4(op__create__done, struct svc_req *, req,
1850 cred_t *, cr, vnode_t *, dvp, CREATE3res *, resp);
1852 if (name != NULL && name != args->where.name)
1853 kmem_free(name, MAXPATHLEN + 1);
1855 if (tvp != NULL) {
1856 if (in_crit)
1857 nbl_end_crit(tvp);
1858 VN_RELE(tvp);
1860 if (dvp != NULL)
1861 VN_RELE(dvp);
1862 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
1865 void *
1866 rfs3_create_getfh(CREATE3args *args)
1869 return (&args->where.dir);
1872 void
1873 rfs3_mkdir(MKDIR3args *args, MKDIR3res *resp, struct exportinfo *exi,
1874 struct svc_req *req, cred_t *cr)
1876 int error;
1877 vnode_t *vp = NULL;
1878 vnode_t *dvp;
1879 struct vattr *vap;
1880 struct vattr va;
1881 struct vattr *dbvap;
1882 struct vattr dbva;
1883 struct vattr *davap;
1884 struct vattr dava;
1885 struct sockaddr *ca;
1886 char *name = NULL;
1888 dbvap = NULL;
1889 davap = NULL;
1891 dvp = nfs3_fhtovp(&args->where.dir, exi);
1893 DTRACE_NFSV3_4(op__mkdir__start, struct svc_req *, req,
1894 cred_t *, cr, vnode_t *, dvp, MKDIR3args *, args);
1896 if (dvp == NULL) {
1897 error = ESTALE;
1898 goto out;
1901 dbva.va_mask = AT_ALL;
1902 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
1903 davap = dbvap;
1905 if (args->where.name == nfs3nametoolong) {
1906 resp->status = NFS3ERR_NAMETOOLONG;
1907 goto out1;
1910 if (args->where.name == NULL || *(args->where.name) == '\0') {
1911 resp->status = NFS3ERR_ACCES;
1912 goto out1;
1915 if (rdonly(exi, req)) {
1916 resp->status = NFS3ERR_ROFS;
1917 goto out1;
1920 if (is_system_labeled()) {
1921 bslabel_t *clabel = req->rq_label;
1923 ASSERT(clabel != NULL);
1924 DTRACE_PROBE2(tx__rfs3__log__info__opmkdir__clabel, char *,
1925 "got client label from request(1)", struct svc_req *, req);
1927 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1928 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
1929 exi)) {
1930 resp->status = NFS3ERR_ACCES;
1931 goto out1;
1936 error = sattr3_to_vattr(&args->attributes, &va);
1937 if (error)
1938 goto out;
1940 if (!(va.va_mask & AT_MODE)) {
1941 resp->status = NFS3ERR_INVAL;
1942 goto out1;
1945 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1946 name = nfscmd_convname(ca, exi, args->where.name,
1947 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1949 if (name == NULL) {
1950 resp->status = NFS3ERR_INVAL;
1951 goto out1;
1954 va.va_mask |= AT_TYPE;
1955 va.va_type = VDIR;
1957 error = VOP_MKDIR(dvp, name, &va, &vp, cr, NULL, 0, NULL);
1959 if (name != args->where.name)
1960 kmem_free(name, MAXPATHLEN + 1);
1962 dava.va_mask = AT_ALL;
1963 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
1966 * Force modified data and metadata out to stable storage.
1968 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1970 if (error)
1971 goto out;
1973 error = makefh3(&resp->resok.obj.handle, vp, exi);
1974 if (error)
1975 resp->resok.obj.handle_follows = FALSE;
1976 else
1977 resp->resok.obj.handle_follows = TRUE;
1979 va.va_mask = AT_ALL;
1980 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
1983 * Force modified data and metadata out to stable storage.
1985 (void) VOP_FSYNC(vp, 0, cr, NULL);
1987 VN_RELE(vp);
1989 resp->status = NFS3_OK;
1990 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
1991 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
1993 DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
1994 cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
1995 VN_RELE(dvp);
1997 return;
1999 out:
2000 if (curthread->t_flag & T_WOULDBLOCK) {
2001 curthread->t_flag &= ~T_WOULDBLOCK;
2002 resp->status = NFS3ERR_JUKEBOX;
2003 } else
2004 resp->status = puterrno3(error);
2005 out1:
2006 DTRACE_NFSV3_4(op__mkdir__done, struct svc_req *, req,
2007 cred_t *, cr, vnode_t *, dvp, MKDIR3res *, resp);
2008 if (dvp != NULL)
2009 VN_RELE(dvp);
2010 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2013 void *
2014 rfs3_mkdir_getfh(MKDIR3args *args)
2017 return (&args->where.dir);
2020 void
2021 rfs3_symlink(SYMLINK3args *args, SYMLINK3res *resp, struct exportinfo *exi,
2022 struct svc_req *req, cred_t *cr)
2024 int error;
2025 vnode_t *vp;
2026 vnode_t *dvp;
2027 struct vattr *vap;
2028 struct vattr va;
2029 struct vattr *dbvap;
2030 struct vattr dbva;
2031 struct vattr *davap;
2032 struct vattr dava;
2033 struct sockaddr *ca;
2034 char *name = NULL;
2035 char *symdata = NULL;
2037 dbvap = NULL;
2038 davap = NULL;
2040 dvp = nfs3_fhtovp(&args->where.dir, exi);
2042 DTRACE_NFSV3_4(op__symlink__start, struct svc_req *, req,
2043 cred_t *, cr, vnode_t *, dvp, SYMLINK3args *, args);
2045 if (dvp == NULL) {
2046 error = ESTALE;
2047 goto err;
2050 dbva.va_mask = AT_ALL;
2051 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2052 davap = dbvap;
2054 if (args->where.name == nfs3nametoolong) {
2055 resp->status = NFS3ERR_NAMETOOLONG;
2056 goto err1;
2059 if (args->where.name == NULL || *(args->where.name) == '\0') {
2060 resp->status = NFS3ERR_ACCES;
2061 goto err1;
2064 if (rdonly(exi, req)) {
2065 resp->status = NFS3ERR_ROFS;
2066 goto err1;
2069 if (is_system_labeled()) {
2070 bslabel_t *clabel = req->rq_label;
2072 ASSERT(clabel != NULL);
2073 DTRACE_PROBE2(tx__rfs3__log__info__opsymlink__clabel, char *,
2074 "got client label from request(1)", struct svc_req *, req);
2076 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2077 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2078 exi)) {
2079 resp->status = NFS3ERR_ACCES;
2080 goto err1;
2085 error = sattr3_to_vattr(&args->symlink.symlink_attributes, &va);
2086 if (error)
2087 goto err;
2089 if (!(va.va_mask & AT_MODE)) {
2090 resp->status = NFS3ERR_INVAL;
2091 goto err1;
2094 if (args->symlink.symlink_data == nfs3nametoolong) {
2095 resp->status = NFS3ERR_NAMETOOLONG;
2096 goto err1;
2099 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2100 name = nfscmd_convname(ca, exi, args->where.name,
2101 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2103 if (name == NULL) {
2104 /* This is really a Solaris EILSEQ */
2105 resp->status = NFS3ERR_INVAL;
2106 goto err1;
2109 symdata = nfscmd_convname(ca, exi, args->symlink.symlink_data,
2110 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2111 if (symdata == NULL) {
2112 /* This is really a Solaris EILSEQ */
2113 resp->status = NFS3ERR_INVAL;
2114 goto err1;
2118 va.va_mask |= AT_TYPE;
2119 va.va_type = VLNK;
2121 error = VOP_SYMLINK(dvp, name, &va, symdata, cr, NULL, 0);
2123 dava.va_mask = AT_ALL;
2124 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2126 if (error)
2127 goto err;
2129 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
2130 NULL, NULL, NULL);
2133 * Force modified data and metadata out to stable storage.
2135 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2138 resp->status = NFS3_OK;
2139 if (error) {
2140 resp->resok.obj.handle_follows = FALSE;
2141 vattr_to_post_op_attr(NULL, &resp->resok.obj_attributes);
2142 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2143 goto out;
2146 error = makefh3(&resp->resok.obj.handle, vp, exi);
2147 if (error)
2148 resp->resok.obj.handle_follows = FALSE;
2149 else
2150 resp->resok.obj.handle_follows = TRUE;
2152 va.va_mask = AT_ALL;
2153 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2156 * Force modified data and metadata out to stable storage.
2158 (void) VOP_FSYNC(vp, 0, cr, NULL);
2160 VN_RELE(vp);
2162 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2163 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2164 goto out;
2166 err:
2167 if (curthread->t_flag & T_WOULDBLOCK) {
2168 curthread->t_flag &= ~T_WOULDBLOCK;
2169 resp->status = NFS3ERR_JUKEBOX;
2170 } else
2171 resp->status = puterrno3(error);
2172 err1:
2173 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2174 out:
2175 if (name != NULL && name != args->where.name)
2176 kmem_free(name, MAXPATHLEN + 1);
2177 if (symdata != NULL && symdata != args->symlink.symlink_data)
2178 kmem_free(symdata, MAXPATHLEN + 1);
2180 DTRACE_NFSV3_4(op__symlink__done, struct svc_req *, req,
2181 cred_t *, cr, vnode_t *, dvp, SYMLINK3res *, resp);
2183 if (dvp != NULL)
2184 VN_RELE(dvp);
2187 void *
2188 rfs3_symlink_getfh(SYMLINK3args *args)
2191 return (&args->where.dir);
2194 void
2195 rfs3_mknod(MKNOD3args *args, MKNOD3res *resp, struct exportinfo *exi,
2196 struct svc_req *req, cred_t *cr)
2198 int error;
2199 vnode_t *vp;
2200 vnode_t *realvp;
2201 vnode_t *dvp;
2202 struct vattr *vap;
2203 struct vattr va;
2204 struct vattr *dbvap;
2205 struct vattr dbva;
2206 struct vattr *davap;
2207 struct vattr dava;
2208 int mode;
2209 enum vcexcl excl;
2210 struct sockaddr *ca;
2211 char *name = NULL;
2213 dbvap = NULL;
2214 davap = NULL;
2216 dvp = nfs3_fhtovp(&args->where.dir, exi);
2218 DTRACE_NFSV3_4(op__mknod__start, struct svc_req *, req,
2219 cred_t *, cr, vnode_t *, dvp, MKNOD3args *, args);
2221 if (dvp == NULL) {
2222 error = ESTALE;
2223 goto out;
2226 dbva.va_mask = AT_ALL;
2227 dbvap = VOP_GETATTR(dvp, &dbva, 0, cr, NULL) ? NULL : &dbva;
2228 davap = dbvap;
2230 if (args->where.name == nfs3nametoolong) {
2231 resp->status = NFS3ERR_NAMETOOLONG;
2232 goto out1;
2235 if (args->where.name == NULL || *(args->where.name) == '\0') {
2236 resp->status = NFS3ERR_ACCES;
2237 goto out1;
2240 if (rdonly(exi, req)) {
2241 resp->status = NFS3ERR_ROFS;
2242 goto out1;
2245 if (is_system_labeled()) {
2246 bslabel_t *clabel = req->rq_label;
2248 ASSERT(clabel != NULL);
2249 DTRACE_PROBE2(tx__rfs3__log__info__opmknod__clabel, char *,
2250 "got client label from request(1)", struct svc_req *, req);
2252 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2253 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2254 exi)) {
2255 resp->status = NFS3ERR_ACCES;
2256 goto out1;
2261 switch (args->what.type) {
2262 case NF3CHR:
2263 case NF3BLK:
2264 error = sattr3_to_vattr(
2265 &args->what.mknoddata3_u.device.dev_attributes, &va);
2266 if (error)
2267 goto out;
2268 if (secpolicy_sys_devices(cr) != 0) {
2269 resp->status = NFS3ERR_PERM;
2270 goto out1;
2272 if (args->what.type == NF3CHR)
2273 va.va_type = VCHR;
2274 else
2275 va.va_type = VBLK;
2276 va.va_rdev = makedevice(
2277 args->what.mknoddata3_u.device.spec.specdata1,
2278 args->what.mknoddata3_u.device.spec.specdata2);
2279 va.va_mask |= AT_TYPE | AT_RDEV;
2280 break;
2281 case NF3SOCK:
2282 error = sattr3_to_vattr(
2283 &args->what.mknoddata3_u.pipe_attributes, &va);
2284 if (error)
2285 goto out;
2286 va.va_type = VSOCK;
2287 va.va_mask |= AT_TYPE;
2288 break;
2289 case NF3FIFO:
2290 error = sattr3_to_vattr(
2291 &args->what.mknoddata3_u.pipe_attributes, &va);
2292 if (error)
2293 goto out;
2294 va.va_type = VFIFO;
2295 va.va_mask |= AT_TYPE;
2296 break;
2297 default:
2298 resp->status = NFS3ERR_BADTYPE;
2299 goto out1;
2303 * Must specify the mode.
2305 if (!(va.va_mask & AT_MODE)) {
2306 resp->status = NFS3ERR_INVAL;
2307 goto out1;
2310 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2311 name = nfscmd_convname(ca, exi, args->where.name,
2312 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2314 if (name == NULL) {
2315 resp->status = NFS3ERR_INVAL;
2316 goto out1;
2319 excl = EXCL;
2321 mode = 0;
2323 error = VOP_CREATE(dvp, name, &va, excl, mode,
2324 &vp, cr, 0, NULL, NULL);
2326 if (name != args->where.name)
2327 kmem_free(name, MAXPATHLEN + 1);
2329 dava.va_mask = AT_ALL;
2330 davap = VOP_GETATTR(dvp, &dava, 0, cr, NULL) ? NULL : &dava;
2333 * Force modified data and metadata out to stable storage.
2335 (void) VOP_FSYNC(dvp, 0, cr, NULL);
2337 if (error)
2338 goto out;
2340 resp->status = NFS3_OK;
2342 error = makefh3(&resp->resok.obj.handle, vp, exi);
2343 if (error)
2344 resp->resok.obj.handle_follows = FALSE;
2345 else
2346 resp->resok.obj.handle_follows = TRUE;
2348 va.va_mask = AT_ALL;
2349 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2352 * Force modified metadata out to stable storage.
2354 * if a underlying vp exists, pass it to VOP_FSYNC
2356 if (VOP_REALVP(vp, &realvp, NULL) == 0)
2357 (void) VOP_FSYNC(realvp, FNODSYNC, cr, NULL);
2358 else
2359 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
2361 VN_RELE(vp);
2363 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
2364 vattr_to_wcc_data(dbvap, davap, &resp->resok.dir_wcc);
2365 DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2366 cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2367 VN_RELE(dvp);
2368 return;
2370 out:
2371 if (curthread->t_flag & T_WOULDBLOCK) {
2372 curthread->t_flag &= ~T_WOULDBLOCK;
2373 resp->status = NFS3ERR_JUKEBOX;
2374 } else
2375 resp->status = puterrno3(error);
2376 out1:
2377 DTRACE_NFSV3_4(op__mknod__done, struct svc_req *, req,
2378 cred_t *, cr, vnode_t *, dvp, MKNOD3res *, resp);
2379 if (dvp != NULL)
2380 VN_RELE(dvp);
2381 vattr_to_wcc_data(dbvap, davap, &resp->resfail.dir_wcc);
2384 void *
2385 rfs3_mknod_getfh(MKNOD3args *args)
2388 return (&args->where.dir);
2391 void
2392 rfs3_remove(REMOVE3args *args, REMOVE3res *resp, struct exportinfo *exi,
2393 struct svc_req *req, cred_t *cr)
2395 int error = 0;
2396 vnode_t *vp;
2397 struct vattr *bvap;
2398 struct vattr bva;
2399 struct vattr *avap;
2400 struct vattr ava;
2401 vnode_t *targvp = NULL;
2402 struct sockaddr *ca;
2403 char *name = NULL;
2405 bvap = NULL;
2406 avap = NULL;
2408 vp = nfs3_fhtovp(&args->object.dir, exi);
2410 DTRACE_NFSV3_4(op__remove__start, struct svc_req *, req,
2411 cred_t *, cr, vnode_t *, vp, REMOVE3args *, args);
2413 if (vp == NULL) {
2414 error = ESTALE;
2415 goto err;
2418 bva.va_mask = AT_ALL;
2419 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2420 avap = bvap;
2422 if (vp->v_type != VDIR) {
2423 resp->status = NFS3ERR_NOTDIR;
2424 goto err1;
2427 if (args->object.name == nfs3nametoolong) {
2428 resp->status = NFS3ERR_NAMETOOLONG;
2429 goto err1;
2432 if (args->object.name == NULL || *(args->object.name) == '\0') {
2433 resp->status = NFS3ERR_ACCES;
2434 goto err1;
2437 if (rdonly(exi, req)) {
2438 resp->status = NFS3ERR_ROFS;
2439 goto err1;
2442 if (is_system_labeled()) {
2443 bslabel_t *clabel = req->rq_label;
2445 ASSERT(clabel != NULL);
2446 DTRACE_PROBE2(tx__rfs3__log__info__opremove__clabel, char *,
2447 "got client label from request(1)", struct svc_req *, req);
2449 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2450 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2451 exi)) {
2452 resp->status = NFS3ERR_ACCES;
2453 goto err1;
2458 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2459 name = nfscmd_convname(ca, exi, args->object.name,
2460 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2462 if (name == NULL) {
2463 resp->status = NFS3ERR_INVAL;
2464 goto err1;
2468 * Check for a conflict with a non-blocking mandatory share
2469 * reservation and V4 delegations
2471 error = VOP_LOOKUP(vp, name, &targvp, NULL, 0,
2472 NULL, cr, NULL, NULL, NULL);
2473 if (error != 0)
2474 goto err;
2476 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2477 resp->status = NFS3ERR_JUKEBOX;
2478 goto err1;
2481 if (!nbl_need_check(targvp)) {
2482 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2483 } else {
2484 nbl_start_crit(targvp, RW_READER);
2485 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
2486 error = EACCES;
2487 } else {
2488 error = VOP_REMOVE(vp, name, cr, NULL, 0);
2490 nbl_end_crit(targvp);
2492 VN_RELE(targvp);
2493 targvp = NULL;
2495 ava.va_mask = AT_ALL;
2496 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2499 * Force modified data and metadata out to stable storage.
2501 (void) VOP_FSYNC(vp, 0, cr, NULL);
2503 if (error)
2504 goto err;
2506 resp->status = NFS3_OK;
2507 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2508 goto out;
2510 err:
2511 if (curthread->t_flag & T_WOULDBLOCK) {
2512 curthread->t_flag &= ~T_WOULDBLOCK;
2513 resp->status = NFS3ERR_JUKEBOX;
2514 } else
2515 resp->status = puterrno3(error);
2516 err1:
2517 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2518 out:
2519 DTRACE_NFSV3_4(op__remove__done, struct svc_req *, req,
2520 cred_t *, cr, vnode_t *, vp, REMOVE3res *, resp);
2522 if (name != NULL && name != args->object.name)
2523 kmem_free(name, MAXPATHLEN + 1);
2525 if (vp != NULL)
2526 VN_RELE(vp);
2529 void *
2530 rfs3_remove_getfh(REMOVE3args *args)
2533 return (&args->object.dir);
2536 void
2537 rfs3_rmdir(RMDIR3args *args, RMDIR3res *resp, struct exportinfo *exi,
2538 struct svc_req *req, cred_t *cr)
2540 int error;
2541 vnode_t *vp;
2542 struct vattr *bvap;
2543 struct vattr bva;
2544 struct vattr *avap;
2545 struct vattr ava;
2546 struct sockaddr *ca;
2547 char *name = NULL;
2549 bvap = NULL;
2550 avap = NULL;
2552 vp = nfs3_fhtovp(&args->object.dir, exi);
2554 DTRACE_NFSV3_4(op__rmdir__start, struct svc_req *, req,
2555 cred_t *, cr, vnode_t *, vp, RMDIR3args *, args);
2557 if (vp == NULL) {
2558 error = ESTALE;
2559 goto err;
2562 bva.va_mask = AT_ALL;
2563 bvap = VOP_GETATTR(vp, &bva, 0, cr, NULL) ? NULL : &bva;
2564 avap = bvap;
2566 if (vp->v_type != VDIR) {
2567 resp->status = NFS3ERR_NOTDIR;
2568 goto err1;
2571 if (args->object.name == nfs3nametoolong) {
2572 resp->status = NFS3ERR_NAMETOOLONG;
2573 goto err1;
2576 if (args->object.name == NULL || *(args->object.name) == '\0') {
2577 resp->status = NFS3ERR_ACCES;
2578 goto err1;
2581 if (rdonly(exi, req)) {
2582 resp->status = NFS3ERR_ROFS;
2583 goto err1;
2586 if (is_system_labeled()) {
2587 bslabel_t *clabel = req->rq_label;
2589 ASSERT(clabel != NULL);
2590 DTRACE_PROBE2(tx__rfs3__log__info__opremovedir__clabel, char *,
2591 "got client label from request(1)", struct svc_req *, req);
2593 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2594 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
2595 exi)) {
2596 resp->status = NFS3ERR_ACCES;
2597 goto err1;
2602 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2603 name = nfscmd_convname(ca, exi, args->object.name,
2604 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2606 if (name == NULL) {
2607 resp->status = NFS3ERR_INVAL;
2608 goto err1;
2611 error = VOP_RMDIR(vp, name, rootdir, cr, NULL, 0);
2613 if (name != args->object.name)
2614 kmem_free(name, MAXPATHLEN + 1);
2616 ava.va_mask = AT_ALL;
2617 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
2620 * Force modified data and metadata out to stable storage.
2622 (void) VOP_FSYNC(vp, 0, cr, NULL);
2624 if (error) {
2626 * System V defines rmdir to return EEXIST, not ENOTEMPTY,
2627 * if the directory is not empty. A System V NFS server
2628 * needs to map NFS3ERR_EXIST to NFS3ERR_NOTEMPTY to transmit
2629 * over the wire.
2631 if (error == EEXIST)
2632 error = ENOTEMPTY;
2633 goto err;
2636 resp->status = NFS3_OK;
2637 vattr_to_wcc_data(bvap, avap, &resp->resok.dir_wcc);
2638 goto out;
2640 err:
2641 if (curthread->t_flag & T_WOULDBLOCK) {
2642 curthread->t_flag &= ~T_WOULDBLOCK;
2643 resp->status = NFS3ERR_JUKEBOX;
2644 } else
2645 resp->status = puterrno3(error);
2646 err1:
2647 vattr_to_wcc_data(bvap, avap, &resp->resfail.dir_wcc);
2648 out:
2649 DTRACE_NFSV3_4(op__rmdir__done, struct svc_req *, req,
2650 cred_t *, cr, vnode_t *, vp, RMDIR3res *, resp);
2651 if (vp != NULL)
2652 VN_RELE(vp);
2656 void *
2657 rfs3_rmdir_getfh(RMDIR3args *args)
2660 return (&args->object.dir);
2663 void
2664 rfs3_rename(RENAME3args *args, RENAME3res *resp, struct exportinfo *exi,
2665 struct svc_req *req, cred_t *cr)
2667 int error = 0;
2668 vnode_t *fvp;
2669 vnode_t *tvp;
2670 vnode_t *targvp;
2671 struct vattr *fbvap;
2672 struct vattr fbva;
2673 struct vattr *favap;
2674 struct vattr fava;
2675 struct vattr *tbvap;
2676 struct vattr tbva;
2677 struct vattr *tavap;
2678 struct vattr tava;
2679 nfs_fh3 *fh3;
2680 struct exportinfo *to_exi;
2681 vnode_t *srcvp = NULL;
2682 bslabel_t *clabel;
2683 struct sockaddr *ca;
2684 char *name = NULL;
2685 char *toname = NULL;
2687 fbvap = NULL;
2688 favap = NULL;
2689 tbvap = NULL;
2690 tavap = NULL;
2691 tvp = NULL;
2693 fvp = nfs3_fhtovp(&args->from.dir, exi);
2695 DTRACE_NFSV3_4(op__rename__start, struct svc_req *, req,
2696 cred_t *, cr, vnode_t *, fvp, RENAME3args *, args);
2698 if (fvp == NULL) {
2699 error = ESTALE;
2700 goto err;
2703 if (is_system_labeled()) {
2704 clabel = req->rq_label;
2705 ASSERT(clabel != NULL);
2706 DTRACE_PROBE2(tx__rfs3__log__info__oprename__clabel, char *,
2707 "got client label from request(1)", struct svc_req *, req);
2709 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2710 if (!do_rfs_label_check(clabel, fvp, EQUALITY_CHECK,
2711 exi)) {
2712 resp->status = NFS3ERR_ACCES;
2713 goto err1;
2718 fbva.va_mask = AT_ALL;
2719 fbvap = VOP_GETATTR(fvp, &fbva, 0, cr, NULL) ? NULL : &fbva;
2720 favap = fbvap;
2722 fh3 = &args->to.dir;
2723 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2724 if (to_exi == NULL) {
2725 resp->status = NFS3ERR_ACCES;
2726 goto err1;
2728 exi_rele(to_exi);
2730 if (to_exi != exi) {
2731 resp->status = NFS3ERR_XDEV;
2732 goto err1;
2735 tvp = nfs3_fhtovp(&args->to.dir, exi);
2736 if (tvp == NULL) {
2737 error = ESTALE;
2738 goto err;
2741 tbva.va_mask = AT_ALL;
2742 tbvap = VOP_GETATTR(tvp, &tbva, 0, cr, NULL) ? NULL : &tbva;
2743 tavap = tbvap;
2745 if (fvp->v_type != VDIR || tvp->v_type != VDIR) {
2746 resp->status = NFS3ERR_NOTDIR;
2747 goto err1;
2750 if (args->from.name == nfs3nametoolong ||
2751 args->to.name == nfs3nametoolong) {
2752 resp->status = NFS3ERR_NAMETOOLONG;
2753 goto err1;
2755 if (args->from.name == NULL || *(args->from.name) == '\0' ||
2756 args->to.name == NULL || *(args->to.name) == '\0') {
2757 resp->status = NFS3ERR_ACCES;
2758 goto err1;
2761 if (rdonly(exi, req)) {
2762 resp->status = NFS3ERR_ROFS;
2763 goto err1;
2766 if (is_system_labeled()) {
2767 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2768 if (!do_rfs_label_check(clabel, tvp, EQUALITY_CHECK,
2769 exi)) {
2770 resp->status = NFS3ERR_ACCES;
2771 goto err1;
2776 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2777 name = nfscmd_convname(ca, exi, args->from.name,
2778 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2780 if (name == NULL) {
2781 resp->status = NFS3ERR_INVAL;
2782 goto err1;
2785 toname = nfscmd_convname(ca, exi, args->to.name,
2786 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
2788 if (toname == NULL) {
2789 resp->status = NFS3ERR_INVAL;
2790 goto err1;
2794 * Check for a conflict with a non-blocking mandatory share
2795 * reservation or V4 delegations.
2797 error = VOP_LOOKUP(fvp, name, &srcvp, NULL, 0,
2798 NULL, cr, NULL, NULL, NULL);
2799 if (error != 0)
2800 goto err;
2803 * If we rename a delegated file we should recall the
2804 * delegation, since future opens should fail or would
2805 * refer to a new file.
2807 if (rfs4_check_delegated(FWRITE, srcvp, FALSE)) {
2808 resp->status = NFS3ERR_JUKEBOX;
2809 goto err1;
2813 * Check for renaming over a delegated file. Check rfs4_deleg_policy
2814 * first to avoid VOP_LOOKUP if possible.
2816 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE &&
2817 VOP_LOOKUP(tvp, toname, &targvp, NULL, 0, NULL, cr,
2818 NULL, NULL, NULL) == 0) {
2820 if (rfs4_check_delegated(FWRITE, targvp, TRUE)) {
2821 VN_RELE(targvp);
2822 resp->status = NFS3ERR_JUKEBOX;
2823 goto err1;
2825 VN_RELE(targvp);
2828 if (!nbl_need_check(srcvp)) {
2829 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2830 } else {
2831 nbl_start_crit(srcvp, RW_READER);
2832 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL))
2833 error = EACCES;
2834 else
2835 error = VOP_RENAME(fvp, name, tvp, toname, cr, NULL, 0);
2836 nbl_end_crit(srcvp);
2838 if (error == 0)
2839 vn_renamepath(tvp, srcvp, args->to.name,
2840 strlen(args->to.name));
2841 VN_RELE(srcvp);
2842 srcvp = NULL;
2844 fava.va_mask = AT_ALL;
2845 favap = VOP_GETATTR(fvp, &fava, 0, cr, NULL) ? NULL : &fava;
2846 tava.va_mask = AT_ALL;
2847 tavap = VOP_GETATTR(tvp, &tava, 0, cr, NULL) ? NULL : &tava;
2850 * Force modified data and metadata out to stable storage.
2852 (void) VOP_FSYNC(fvp, 0, cr, NULL);
2853 (void) VOP_FSYNC(tvp, 0, cr, NULL);
2855 if (error)
2856 goto err;
2858 resp->status = NFS3_OK;
2859 vattr_to_wcc_data(fbvap, favap, &resp->resok.fromdir_wcc);
2860 vattr_to_wcc_data(tbvap, tavap, &resp->resok.todir_wcc);
2861 goto out;
2863 err:
2864 if (curthread->t_flag & T_WOULDBLOCK) {
2865 curthread->t_flag &= ~T_WOULDBLOCK;
2866 resp->status = NFS3ERR_JUKEBOX;
2867 } else {
2868 resp->status = puterrno3(error);
2870 err1:
2871 vattr_to_wcc_data(fbvap, favap, &resp->resfail.fromdir_wcc);
2872 vattr_to_wcc_data(tbvap, tavap, &resp->resfail.todir_wcc);
2874 out:
2875 if (name != NULL && name != args->from.name)
2876 kmem_free(name, MAXPATHLEN + 1);
2877 if (toname != NULL && toname != args->to.name)
2878 kmem_free(toname, MAXPATHLEN + 1);
2880 DTRACE_NFSV3_4(op__rename__done, struct svc_req *, req,
2881 cred_t *, cr, vnode_t *, fvp, RENAME3res *, resp);
2882 if (fvp != NULL)
2883 VN_RELE(fvp);
2884 if (tvp != NULL)
2885 VN_RELE(tvp);
2888 void *
2889 rfs3_rename_getfh(RENAME3args *args)
2892 return (&args->from.dir);
2895 void
2896 rfs3_link(LINK3args *args, LINK3res *resp, struct exportinfo *exi,
2897 struct svc_req *req, cred_t *cr)
2899 int error;
2900 vnode_t *vp;
2901 vnode_t *dvp;
2902 struct vattr *vap;
2903 struct vattr va;
2904 struct vattr *bvap;
2905 struct vattr bva;
2906 struct vattr *avap;
2907 struct vattr ava;
2908 nfs_fh3 *fh3;
2909 struct exportinfo *to_exi;
2910 bslabel_t *clabel;
2911 struct sockaddr *ca;
2912 char *name = NULL;
2914 vap = NULL;
2915 bvap = NULL;
2916 avap = NULL;
2917 dvp = NULL;
2919 vp = nfs3_fhtovp(&args->file, exi);
2921 DTRACE_NFSV3_4(op__link__start, struct svc_req *, req,
2922 cred_t *, cr, vnode_t *, vp, LINK3args *, args);
2924 if (vp == NULL) {
2925 error = ESTALE;
2926 goto out;
2929 va.va_mask = AT_ALL;
2930 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
2932 fh3 = &args->link.dir;
2933 to_exi = checkexport(&fh3->fh3_fsid, FH3TOXFIDP(fh3));
2934 if (to_exi == NULL) {
2935 resp->status = NFS3ERR_ACCES;
2936 goto out1;
2938 exi_rele(to_exi);
2940 if (to_exi != exi) {
2941 resp->status = NFS3ERR_XDEV;
2942 goto out1;
2945 if (is_system_labeled()) {
2946 clabel = req->rq_label;
2948 ASSERT(clabel != NULL);
2949 DTRACE_PROBE2(tx__rfs3__log__info__oplink__clabel, char *,
2950 "got client label from request(1)", struct svc_req *, req);
2952 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2953 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2954 exi)) {
2955 resp->status = NFS3ERR_ACCES;
2956 goto out1;
2961 dvp = nfs3_fhtovp(&args->link.dir, exi);
2962 if (dvp == NULL) {
2963 error = ESTALE;
2964 goto out;
2967 bva.va_mask = AT_ALL;
2968 bvap = VOP_GETATTR(dvp, &bva, 0, cr, NULL) ? NULL : &bva;
2970 if (dvp->v_type != VDIR) {
2971 resp->status = NFS3ERR_NOTDIR;
2972 goto out1;
2975 if (args->link.name == nfs3nametoolong) {
2976 resp->status = NFS3ERR_NAMETOOLONG;
2977 goto out1;
2980 if (args->link.name == NULL || *(args->link.name) == '\0') {
2981 resp->status = NFS3ERR_ACCES;
2982 goto out1;
2985 if (rdonly(exi, req)) {
2986 resp->status = NFS3ERR_ROFS;
2987 goto out1;
2990 if (is_system_labeled()) {
2991 DTRACE_PROBE2(tx__rfs3__log__info__oplinkdir__clabel, char *,
2992 "got client label from request(1)", struct svc_req *, req);
2994 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2995 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
2996 exi)) {
2997 resp->status = NFS3ERR_ACCES;
2998 goto out1;
3003 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3004 name = nfscmd_convname(ca, exi, args->link.name,
3005 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
3007 if (name == NULL) {
3008 resp->status = NFS3ERR_SERVERFAULT;
3009 goto out1;
3012 error = VOP_LINK(dvp, vp, name, cr, NULL, 0);
3014 va.va_mask = AT_ALL;
3015 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3016 ava.va_mask = AT_ALL;
3017 avap = VOP_GETATTR(dvp, &ava, 0, cr, NULL) ? NULL : &ava;
3020 * Force modified data and metadata out to stable storage.
3022 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3023 (void) VOP_FSYNC(dvp, 0, cr, NULL);
3025 if (error)
3026 goto out;
3028 VN_RELE(dvp);
3030 resp->status = NFS3_OK;
3031 vattr_to_post_op_attr(vap, &resp->resok.file_attributes);
3032 vattr_to_wcc_data(bvap, avap, &resp->resok.linkdir_wcc);
3034 DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3035 cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3037 VN_RELE(vp);
3039 return;
3041 out:
3042 if (curthread->t_flag & T_WOULDBLOCK) {
3043 curthread->t_flag &= ~T_WOULDBLOCK;
3044 resp->status = NFS3ERR_JUKEBOX;
3045 } else
3046 resp->status = puterrno3(error);
3047 out1:
3048 if (name != NULL && name != args->link.name)
3049 kmem_free(name, MAXPATHLEN + 1);
3051 DTRACE_NFSV3_4(op__link__done, struct svc_req *, req,
3052 cred_t *, cr, vnode_t *, vp, LINK3res *, resp);
3054 if (vp != NULL)
3055 VN_RELE(vp);
3056 if (dvp != NULL)
3057 VN_RELE(dvp);
3058 vattr_to_post_op_attr(vap, &resp->resfail.file_attributes);
3059 vattr_to_wcc_data(bvap, avap, &resp->resfail.linkdir_wcc);
3062 void *
3063 rfs3_link_getfh(LINK3args *args)
3066 return (&args->file);
3070 * This macro defines the size of a response which contains attribute
3071 * information and one directory entry (whose length is specified by
3072 * the macro parameter). If the incoming request is larger than this,
3073 * then we are guaranteed to be able to return at one directory entry
3074 * if one exists. Therefore, we do not need to check for
3075 * NFS3ERR_TOOSMALL if the requested size is larger then this. If it
3076 * is not, then we need to check to make sure that this error does not
3077 * need to be returned.
3079 * NFS3_READDIR_MIN_COUNT is comprised of following :
3081 * status - 1 * BYTES_PER_XDR_UNIT
3082 * attr. flag - 1 * BYTES_PER_XDR_UNIT
3083 * cookie verifier - 2 * BYTES_PER_XDR_UNIT
3084 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3085 * boolean - 1 * BYTES_PER_XDR_UNIT
3086 * file id - 2 * BYTES_PER_XDR_UNIT
3087 * directory name length - 1 * BYTES_PER_XDR_UNIT
3088 * cookie - 2 * BYTES_PER_XDR_UNIT
3089 * end of list - 1 * BYTES_PER_XDR_UNIT
3090 * end of file - 1 * BYTES_PER_XDR_UNIT
3091 * Name length of directory to the nearest byte
3094 #define NFS3_READDIR_MIN_COUNT(length) \
3095 ((1 + 1 + 2 + NFS3_SIZEOF_FATTR3 + 1 + 2 + 1 + 2 + 1 + 1) * \
3096 BYTES_PER_XDR_UNIT + roundup((length), BYTES_PER_XDR_UNIT))
3098 /* ARGSUSED */
3099 void
3100 rfs3_readdir(READDIR3args *args, READDIR3res *resp, struct exportinfo *exi,
3101 struct svc_req *req, cred_t *cr)
3103 int error;
3104 vnode_t *vp;
3105 struct vattr *vap;
3106 struct vattr va;
3107 struct iovec iov;
3108 struct uio uio;
3109 char *data;
3110 int iseof;
3111 int bufsize;
3112 int namlen;
3113 uint_t count;
3114 struct sockaddr *ca;
3116 vap = NULL;
3118 vp = nfs3_fhtovp(&args->dir, exi);
3120 DTRACE_NFSV3_4(op__readdir__start, struct svc_req *, req,
3121 cred_t *, cr, vnode_t *, vp, READDIR3args *, args);
3123 if (vp == NULL) {
3124 error = ESTALE;
3125 goto out;
3128 if (is_system_labeled()) {
3129 bslabel_t *clabel = req->rq_label;
3131 ASSERT(clabel != NULL);
3132 DTRACE_PROBE2(tx__rfs3__log__info__opreaddir__clabel, char *,
3133 "got client label from request(1)", struct svc_req *, req);
3135 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3136 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3137 exi)) {
3138 resp->status = NFS3ERR_ACCES;
3139 goto out1;
3144 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3146 va.va_mask = AT_ALL;
3147 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3149 if (vp->v_type != VDIR) {
3150 resp->status = NFS3ERR_NOTDIR;
3151 goto out1;
3154 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3155 if (error)
3156 goto out;
3159 * Now don't allow arbitrary count to alloc;
3160 * allow the maximum not to exceed rfs3_tsize()
3162 if (args->count > rfs3_tsize(req))
3163 args->count = rfs3_tsize(req);
3166 * Make sure that there is room to read at least one entry
3167 * if any are available.
3169 if (args->count < DIRENT64_RECLEN(MAXNAMELEN))
3170 count = DIRENT64_RECLEN(MAXNAMELEN);
3171 else
3172 count = args->count;
3174 data = kmem_alloc(count, KM_SLEEP);
3176 iov.iov_base = data;
3177 iov.iov_len = count;
3178 uio.uio_iov = &iov;
3179 uio.uio_iovcnt = 1;
3180 uio.uio_segflg = UIO_SYSSPACE;
3181 uio.uio_extflg = UIO_COPY_CACHED;
3182 uio.uio_loffset = (offset_t)args->cookie;
3183 uio.uio_resid = count;
3185 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3187 va.va_mask = AT_ALL;
3188 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3190 if (error) {
3191 kmem_free(data, count);
3192 goto out;
3196 * If the count was not large enough to be able to guarantee
3197 * to be able to return at least one entry, then need to
3198 * check to see if NFS3ERR_TOOSMALL should be returned.
3200 if (args->count < NFS3_READDIR_MIN_COUNT(MAXNAMELEN)) {
3202 * bufsize is used to keep track of the size of the response.
3203 * It is primed with:
3204 * 1 for the status +
3205 * 1 for the dir_attributes.attributes boolean +
3206 * 2 for the cookie verifier
3207 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3208 * to bytes. If there are directory attributes to be
3209 * returned, then:
3210 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3211 * time BYTES_PER_XDR_UNIT is added to account for them.
3213 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3214 if (vap != NULL)
3215 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3217 * An entry is composed of:
3218 * 1 for the true/false list indicator +
3219 * 2 for the fileid +
3220 * 1 for the length of the name +
3221 * 2 for the cookie +
3222 * all times BYTES_PER_XDR_UNIT to convert from
3223 * XDR units to bytes, plus the length of the name
3224 * rounded up to the nearest BYTES_PER_XDR_UNIT.
3226 if (count != uio.uio_resid) {
3227 namlen = strlen(((struct dirent64 *)data)->d_name);
3228 bufsize += (1 + 2 + 1 + 2) * BYTES_PER_XDR_UNIT +
3229 roundup(namlen, BYTES_PER_XDR_UNIT);
3232 * We need to check to see if the number of bytes left
3233 * to go into the buffer will actually fit into the
3234 * buffer. This is calculated as the size of this
3235 * entry plus:
3236 * 1 for the true/false list indicator +
3237 * 1 for the eof indicator
3238 * times BYTES_PER_XDR_UNIT to convert from from
3239 * XDR units to bytes.
3241 bufsize += (1 + 1) * BYTES_PER_XDR_UNIT;
3242 if (bufsize > args->count) {
3243 kmem_free(data, count);
3244 resp->status = NFS3ERR_TOOSMALL;
3245 goto out1;
3250 * Have a valid readir buffer for the native character
3251 * set. Need to check if a conversion is necessary and
3252 * potentially rewrite the whole buffer. Note that if the
3253 * conversion expands names enough, the structure may not
3254 * fit. In this case, we need to drop entries until if fits
3255 * and patch the counts in order that the next readdir will
3256 * get the correct entries.
3258 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3259 data = nfscmd_convdirent(ca, exi, data, count, &resp->status);
3262 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3264 #if 0 /* notyet */
3266 * Don't do this. It causes local disk writes when just
3267 * reading the file and the overhead is deemed larger
3268 * than the benefit.
3271 * Force modified metadata out to stable storage.
3273 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3274 #endif
3276 resp->status = NFS3_OK;
3277 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3278 resp->resok.cookieverf = 0;
3279 resp->resok.reply.entries = (entry3 *)data;
3280 resp->resok.reply.eof = iseof;
3281 resp->resok.size = count - uio.uio_resid;
3282 resp->resok.count = args->count;
3283 resp->resok.freecount = count;
3285 DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3286 cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3288 VN_RELE(vp);
3290 return;
3292 out:
3293 if (curthread->t_flag & T_WOULDBLOCK) {
3294 curthread->t_flag &= ~T_WOULDBLOCK;
3295 resp->status = NFS3ERR_JUKEBOX;
3296 } else
3297 resp->status = puterrno3(error);
3298 out1:
3299 DTRACE_NFSV3_4(op__readdir__done, struct svc_req *, req,
3300 cred_t *, cr, vnode_t *, vp, READDIR3res *, resp);
3302 if (vp != NULL) {
3303 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3304 VN_RELE(vp);
3306 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3309 void *
3310 rfs3_readdir_getfh(READDIR3args *args)
3313 return (&args->dir);
3316 void
3317 rfs3_readdir_free(READDIR3res *resp)
3320 if (resp->status == NFS3_OK)
3321 kmem_free(resp->resok.reply.entries, resp->resok.freecount);
3324 #ifdef nextdp
3325 #undef nextdp
3326 #endif
3327 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
3330 * This macro computes the size of a response which contains
3331 * one directory entry including the attributes as well as file handle.
3332 * If the incoming request is larger than this, then we are guaranteed to be
3333 * able to return at least one more directory entry if one exists.
3335 * NFS3_READDIRPLUS_ENTRY is made up of the following:
3337 * boolean - 1 * BYTES_PER_XDR_UNIT
3338 * file id - 2 * BYTES_PER_XDR_UNIT
3339 * directory name length - 1 * BYTES_PER_XDR_UNIT
3340 * cookie - 2 * BYTES_PER_XDR_UNIT
3341 * attribute flag - 1 * BYTES_PER_XDR_UNIT
3342 * attributes - NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT
3343 * status byte for file handle - 1 * BYTES_PER_XDR_UNIT
3344 * length of a file handle - 1 * BYTES_PER_XDR_UNIT
3345 * Maximum length of a file handle (NFS3_MAXFHSIZE)
3346 * name length of the entry to the nearest bytes
3348 #define NFS3_READDIRPLUS_ENTRY(namelen) \
3349 ((1 + 2 + 1 + 2 + 1 + NFS3_SIZEOF_FATTR3 + 1 + 1) * \
3350 BYTES_PER_XDR_UNIT + \
3351 NFS3_MAXFHSIZE + roundup(namelen, BYTES_PER_XDR_UNIT))
3353 static int rfs3_readdir_unit = MAXBSIZE;
3355 /* ARGSUSED */
3356 void
3357 rfs3_readdirplus(READDIRPLUS3args *args, READDIRPLUS3res *resp,
3358 struct exportinfo *exi, struct svc_req *req, cred_t *cr)
3360 int error;
3361 vnode_t *vp;
3362 struct vattr *vap;
3363 struct vattr va;
3364 struct iovec iov;
3365 struct uio uio;
3366 char *data;
3367 int iseof;
3368 struct dirent64 *dp;
3369 vnode_t *nvp;
3370 struct vattr *nvap;
3371 struct vattr nva;
3372 entryplus3_info *infop = NULL;
3373 int size = 0;
3374 int nents = 0;
3375 int bufsize = 0;
3376 int entrysize = 0;
3377 int tofit = 0;
3378 int rd_unit = rfs3_readdir_unit;
3379 int prev_len;
3380 int space_left;
3381 int i;
3382 uint_t *namlen = NULL;
3383 char *ndata = NULL;
3384 struct sockaddr *ca;
3385 size_t ret;
3387 vap = NULL;
3389 vp = nfs3_fhtovp(&args->dir, exi);
3391 DTRACE_NFSV3_4(op__readdirplus__start, struct svc_req *, req,
3392 cred_t *, cr, vnode_t *, vp, READDIRPLUS3args *, args);
3394 if (vp == NULL) {
3395 error = ESTALE;
3396 goto out;
3399 if (is_system_labeled()) {
3400 bslabel_t *clabel = req->rq_label;
3402 ASSERT(clabel != NULL);
3403 DTRACE_PROBE2(tx__rfs3__log__info__opreaddirplus__clabel,
3404 char *, "got client label from request(1)",
3405 struct svc_req *, req);
3407 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3408 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3409 exi)) {
3410 resp->status = NFS3ERR_ACCES;
3411 goto out1;
3416 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
3418 va.va_mask = AT_ALL;
3419 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3421 if (vp->v_type != VDIR) {
3422 error = ENOTDIR;
3423 goto out;
3426 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
3427 if (error)
3428 goto out;
3431 * Don't allow arbitrary counts for allocation
3433 if (args->maxcount > rfs3_tsize(req))
3434 args->maxcount = rfs3_tsize(req);
3437 * Make sure that there is room to read at least one entry
3438 * if any are available
3440 args->dircount = MIN(args->dircount, args->maxcount);
3442 if (args->dircount < DIRENT64_RECLEN(MAXNAMELEN))
3443 args->dircount = DIRENT64_RECLEN(MAXNAMELEN);
3446 * This allocation relies on a minimum directory entry
3447 * being roughly 24 bytes. Therefore, the namlen array
3448 * will have enough space based on the maximum number of
3449 * entries to read.
3451 namlen = kmem_alloc(args->dircount, KM_SLEEP);
3453 space_left = args->dircount;
3454 data = kmem_alloc(args->dircount, KM_SLEEP);
3455 dp = (struct dirent64 *)data;
3456 uio.uio_iov = &iov;
3457 uio.uio_iovcnt = 1;
3458 uio.uio_segflg = UIO_SYSSPACE;
3459 uio.uio_extflg = UIO_COPY_CACHED;
3460 uio.uio_loffset = (offset_t)args->cookie;
3463 * bufsize is used to keep track of the size of the response as we
3464 * get post op attributes and filehandles for each entry. This is
3465 * an optimization as the server may have read more entries than will
3466 * fit in the buffer specified by maxcount. We stop calculating
3467 * post op attributes and filehandles once we have exceeded maxcount.
3468 * This will minimize the effect of truncation.
3470 * It is primed with:
3471 * 1 for the status +
3472 * 1 for the dir_attributes.attributes boolean +
3473 * 2 for the cookie verifier
3474 * all times BYTES_PER_XDR_UNIT to convert from XDR units
3475 * to bytes. If there are directory attributes to be
3476 * returned, then:
3477 * NFS3_SIZEOF_FATTR3 for the dir_attributes.attr fattr3
3478 * time BYTES_PER_XDR_UNIT is added to account for them.
3480 bufsize = (1 + 1 + 2) * BYTES_PER_XDR_UNIT;
3481 if (vap != NULL)
3482 bufsize += NFS3_SIZEOF_FATTR3 * BYTES_PER_XDR_UNIT;
3484 getmoredents:
3486 * Here we make a check so that our read unit is not larger than
3487 * the space left in the buffer.
3489 rd_unit = MIN(rd_unit, space_left);
3490 iov.iov_base = (char *)dp;
3491 iov.iov_len = rd_unit;
3492 uio.uio_resid = rd_unit;
3493 prev_len = rd_unit;
3495 error = VOP_READDIR(vp, &uio, cr, &iseof, NULL, 0);
3497 if (error) {
3498 kmem_free(data, args->dircount);
3499 goto out;
3502 if (uio.uio_resid == prev_len && !iseof) {
3503 if (nents == 0) {
3504 kmem_free(data, args->dircount);
3505 resp->status = NFS3ERR_TOOSMALL;
3506 goto out1;
3510 * We could not get any more entries, so get the attributes
3511 * and filehandle for the entries already obtained.
3513 goto good;
3517 * We estimate the size of the response by assuming the
3518 * entry exists and attributes and filehandle are also valid
3520 for (size = prev_len - uio.uio_resid;
3521 size > 0;
3522 size -= dp->d_reclen, dp = nextdp(dp)) {
3524 if (dp->d_ino == 0) {
3525 nents++;
3526 continue;
3529 namlen[nents] = strlen(dp->d_name);
3530 entrysize = NFS3_READDIRPLUS_ENTRY(namlen[nents]);
3533 * We need to check to see if the number of bytes left
3534 * to go into the buffer will actually fit into the
3535 * buffer. This is calculated as the size of this
3536 * entry plus:
3537 * 1 for the true/false list indicator +
3538 * 1 for the eof indicator
3539 * times BYTES_PER_XDR_UNIT to convert from XDR units
3540 * to bytes.
3542 * Also check the dircount limit against the first entry read
3545 tofit = entrysize + (1 + 1) * BYTES_PER_XDR_UNIT;
3546 if (bufsize + tofit > args->maxcount) {
3548 * We make a check here to see if this was the
3549 * first entry being measured. If so, then maxcount
3550 * was too small to begin with and so we need to
3551 * return with NFS3ERR_TOOSMALL.
3553 if (nents == 0) {
3554 kmem_free(data, args->dircount);
3555 resp->status = NFS3ERR_TOOSMALL;
3556 goto out1;
3558 iseof = FALSE;
3559 goto good;
3561 bufsize += entrysize;
3562 nents++;
3566 * If there is enough room to fit at least 1 more entry including
3567 * post op attributes and filehandle in the buffer AND that we haven't
3568 * exceeded dircount then go back and get some more.
3570 if (!iseof &&
3571 (args->maxcount - bufsize) >= NFS3_READDIRPLUS_ENTRY(MAXNAMELEN)) {
3572 space_left -= (prev_len - uio.uio_resid);
3573 if (space_left >= DIRENT64_RECLEN(MAXNAMELEN))
3574 goto getmoredents;
3576 /* else, fall through */
3578 good:
3579 va.va_mask = AT_ALL;
3580 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3582 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3584 infop = kmem_alloc(nents * sizeof (struct entryplus3_info), KM_SLEEP);
3585 resp->resok.infop = infop;
3587 dp = (struct dirent64 *)data;
3588 for (i = 0; i < nents; i++) {
3590 if (dp->d_ino == 0) {
3591 infop[i].attr.attributes = FALSE;
3592 infop[i].fh.handle_follows = FALSE;
3593 dp = nextdp(dp);
3594 continue;
3597 infop[i].namelen = namlen[i];
3599 error = VOP_LOOKUP(vp, dp->d_name, &nvp, NULL, 0, NULL, cr,
3600 NULL, NULL, NULL);
3601 if (error) {
3602 infop[i].attr.attributes = FALSE;
3603 infop[i].fh.handle_follows = FALSE;
3604 dp = nextdp(dp);
3605 continue;
3608 nva.va_mask = AT_ALL;
3609 nvap = rfs4_delegated_getattr(nvp, &nva, 0, cr) ? NULL : &nva;
3611 /* Lie about the object type for a referral */
3612 if (vn_is_nfs_reparse(nvp, cr))
3613 nvap->va_type = VLNK;
3615 vattr_to_post_op_attr(nvap, &infop[i].attr);
3617 error = makefh3(&infop[i].fh.handle, nvp, exi);
3618 if (!error)
3619 infop[i].fh.handle_follows = TRUE;
3620 else
3621 infop[i].fh.handle_follows = FALSE;
3623 VN_RELE(nvp);
3624 dp = nextdp(dp);
3627 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3628 ret = nfscmd_convdirplus(ca, exi, data, nents, args->dircount, &ndata);
3629 if (ndata == NULL)
3630 ndata = data;
3632 if (ret > 0) {
3634 * We had to drop one or more entries in order to fit
3635 * during the character conversion. We need to patch
3636 * up the size and eof info.
3638 if (iseof)
3639 iseof = FALSE;
3641 ret = nfscmd_dropped_entrysize((struct dirent64 *)data,
3642 nents, ret);
3646 #if 0 /* notyet */
3648 * Don't do this. It causes local disk writes when just
3649 * reading the file and the overhead is deemed larger
3650 * than the benefit.
3653 * Force modified metadata out to stable storage.
3655 (void) VOP_FSYNC(vp, FNODSYNC, cr, NULL);
3656 #endif
3658 kmem_free(namlen, args->dircount);
3660 resp->status = NFS3_OK;
3661 vattr_to_post_op_attr(vap, &resp->resok.dir_attributes);
3662 resp->resok.cookieverf = 0;
3663 resp->resok.reply.entries = (entryplus3 *)ndata;
3664 resp->resok.reply.eof = iseof;
3665 resp->resok.size = nents;
3666 resp->resok.count = args->dircount - ret;
3667 resp->resok.maxcount = args->maxcount;
3669 DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3670 cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3671 if (ndata != data)
3672 kmem_free(data, args->dircount);
3675 VN_RELE(vp);
3677 return;
3679 out:
3680 if (curthread->t_flag & T_WOULDBLOCK) {
3681 curthread->t_flag &= ~T_WOULDBLOCK;
3682 resp->status = NFS3ERR_JUKEBOX;
3683 } else {
3684 resp->status = puterrno3(error);
3686 out1:
3687 DTRACE_NFSV3_4(op__readdirplus__done, struct svc_req *, req,
3688 cred_t *, cr, vnode_t *, vp, READDIRPLUS3res *, resp);
3690 if (vp != NULL) {
3691 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
3692 VN_RELE(vp);
3695 if (namlen != NULL)
3696 kmem_free(namlen, args->dircount);
3698 vattr_to_post_op_attr(vap, &resp->resfail.dir_attributes);
3701 void *
3702 rfs3_readdirplus_getfh(READDIRPLUS3args *args)
3705 return (&args->dir);
3708 void
3709 rfs3_readdirplus_free(READDIRPLUS3res *resp)
3712 if (resp->status == NFS3_OK) {
3713 kmem_free(resp->resok.reply.entries, resp->resok.count);
3714 kmem_free(resp->resok.infop,
3715 resp->resok.size * sizeof (struct entryplus3_info));
3719 /* ARGSUSED */
3720 void
3721 rfs3_fsstat(FSSTAT3args *args, FSSTAT3res *resp, struct exportinfo *exi,
3722 struct svc_req *req, cred_t *cr)
3724 int error;
3725 vnode_t *vp;
3726 struct vattr *vap;
3727 struct vattr va;
3728 struct statvfs64 sb;
3730 vap = NULL;
3732 vp = nfs3_fhtovp(&args->fsroot, exi);
3734 DTRACE_NFSV3_4(op__fsstat__start, struct svc_req *, req,
3735 cred_t *, cr, vnode_t *, vp, FSSTAT3args *, args);
3737 if (vp == NULL) {
3738 error = ESTALE;
3739 goto out;
3742 if (is_system_labeled()) {
3743 bslabel_t *clabel = req->rq_label;
3745 ASSERT(clabel != NULL);
3746 DTRACE_PROBE2(tx__rfs3__log__info__opfsstat__clabel, char *,
3747 "got client label from request(1)", struct svc_req *, req);
3749 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3750 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3751 exi)) {
3752 resp->status = NFS3ERR_ACCES;
3753 goto out1;
3758 error = VFS_STATVFS(vp->v_vfsp, &sb);
3760 va.va_mask = AT_ALL;
3761 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3763 if (error)
3764 goto out;
3766 resp->status = NFS3_OK;
3767 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3768 if (sb.f_blocks != (fsblkcnt64_t)-1)
3769 resp->resok.tbytes = (size3)sb.f_frsize * (size3)sb.f_blocks;
3770 else
3771 resp->resok.tbytes = (size3)sb.f_blocks;
3772 if (sb.f_bfree != (fsblkcnt64_t)-1)
3773 resp->resok.fbytes = (size3)sb.f_frsize * (size3)sb.f_bfree;
3774 else
3775 resp->resok.fbytes = (size3)sb.f_bfree;
3776 if (sb.f_bavail != (fsblkcnt64_t)-1)
3777 resp->resok.abytes = (size3)sb.f_frsize * (size3)sb.f_bavail;
3778 else
3779 resp->resok.abytes = (size3)sb.f_bavail;
3780 resp->resok.tfiles = (size3)sb.f_files;
3781 resp->resok.ffiles = (size3)sb.f_ffree;
3782 resp->resok.afiles = (size3)sb.f_favail;
3783 resp->resok.invarsec = 0;
3785 DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3786 cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3787 VN_RELE(vp);
3789 return;
3791 out:
3792 if (curthread->t_flag & T_WOULDBLOCK) {
3793 curthread->t_flag &= ~T_WOULDBLOCK;
3794 resp->status = NFS3ERR_JUKEBOX;
3795 } else
3796 resp->status = puterrno3(error);
3797 out1:
3798 DTRACE_NFSV3_4(op__fsstat__done, struct svc_req *, req,
3799 cred_t *, cr, vnode_t *, vp, FSSTAT3res *, resp);
3801 if (vp != NULL)
3802 VN_RELE(vp);
3803 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
3806 void *
3807 rfs3_fsstat_getfh(FSSTAT3args *args)
3810 return (&args->fsroot);
3813 void
3814 rfs3_fsinfo(FSINFO3args *args, FSINFO3res *resp, struct exportinfo *exi,
3815 struct svc_req *req, cred_t *cr)
3817 vnode_t *vp;
3818 struct vattr *vap;
3819 struct vattr va;
3820 uint32_t xfer_size;
3821 ulong_t l = 0;
3822 int error;
3824 vp = nfs3_fhtovp(&args->fsroot, exi);
3826 DTRACE_NFSV3_4(op__fsinfo__start, struct svc_req *, req,
3827 cred_t *, cr, vnode_t *, vp, FSINFO3args *, args);
3829 if (vp == NULL) {
3830 if (curthread->t_flag & T_WOULDBLOCK) {
3831 curthread->t_flag &= ~T_WOULDBLOCK;
3832 resp->status = NFS3ERR_JUKEBOX;
3833 } else
3834 resp->status = NFS3ERR_STALE;
3835 vattr_to_post_op_attr(NULL, &resp->resfail.obj_attributes);
3836 goto out;
3839 if (is_system_labeled()) {
3840 bslabel_t *clabel = req->rq_label;
3842 ASSERT(clabel != NULL);
3843 DTRACE_PROBE2(tx__rfs3__log__info__opfsinfo__clabel, char *,
3844 "got client label from request(1)", struct svc_req *, req);
3846 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3847 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3848 exi)) {
3849 resp->status = NFS3ERR_STALE;
3850 vattr_to_post_op_attr(NULL,
3851 &resp->resfail.obj_attributes);
3852 goto out;
3857 va.va_mask = AT_ALL;
3858 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3860 resp->status = NFS3_OK;
3861 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3862 xfer_size = rfs3_tsize(req);
3863 resp->resok.rtmax = xfer_size;
3864 resp->resok.rtpref = xfer_size;
3865 resp->resok.rtmult = DEV_BSIZE;
3866 resp->resok.wtmax = xfer_size;
3867 resp->resok.wtpref = xfer_size;
3868 resp->resok.wtmult = DEV_BSIZE;
3869 resp->resok.dtpref = MAXBSIZE;
3872 * Large file spec: want maxfilesize based on limit of
3873 * underlying filesystem. We can guess 2^31-1 if need be.
3875 error = VOP_PATHCONF(vp, _PC_FILESIZEBITS, &l, cr, NULL);
3876 if (error) {
3877 resp->status = puterrno3(error);
3878 goto out;
3882 * If the underlying file system does not support _PC_FILESIZEBITS,
3883 * return a reasonable default. Note that error code on VOP_PATHCONF
3884 * will be 0, even if the underlying file system does not support
3885 * _PC_FILESIZEBITS.
3887 if (l == (ulong_t)-1) {
3888 resp->resok.maxfilesize = MAXOFF32_T;
3889 } else {
3890 if (l >= (sizeof (uint64_t) * 8))
3891 resp->resok.maxfilesize = INT64_MAX;
3892 else
3893 resp->resok.maxfilesize = (1LL << (l-1)) - 1;
3896 resp->resok.time_delta.seconds = 0;
3897 resp->resok.time_delta.nseconds = 1000;
3898 resp->resok.properties = FSF3_LINK | FSF3_SYMLINK |
3899 FSF3_HOMOGENEOUS | FSF3_CANSETTIME;
3901 DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3902 cred_t *, cr, vnode_t *, vp, FSINFO3res *, resp);
3904 VN_RELE(vp);
3906 return;
3908 out:
3909 DTRACE_NFSV3_4(op__fsinfo__done, struct svc_req *, req,
3910 cred_t *, cr, vnode_t *, NULL, FSINFO3res *, resp);
3911 if (vp != NULL)
3912 VN_RELE(vp);
3915 void *
3916 rfs3_fsinfo_getfh(FSINFO3args *args)
3919 return (&args->fsroot);
3922 /* ARGSUSED */
3923 void
3924 rfs3_pathconf(PATHCONF3args *args, PATHCONF3res *resp, struct exportinfo *exi,
3925 struct svc_req *req, cred_t *cr)
3927 int error;
3928 vnode_t *vp;
3929 struct vattr *vap;
3930 struct vattr va;
3931 ulong_t val;
3933 vap = NULL;
3935 vp = nfs3_fhtovp(&args->object, exi);
3937 DTRACE_NFSV3_4(op__pathconf__start, struct svc_req *, req,
3938 cred_t *, cr, vnode_t *, vp, PATHCONF3args *, args);
3940 if (vp == NULL) {
3941 error = ESTALE;
3942 goto out;
3945 if (is_system_labeled()) {
3946 bslabel_t *clabel = req->rq_label;
3948 ASSERT(clabel != NULL);
3949 DTRACE_PROBE2(tx__rfs3__log__info__oppathconf__clabel, char *,
3950 "got client label from request(1)", struct svc_req *, req);
3952 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3953 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3954 exi)) {
3955 resp->status = NFS3ERR_ACCES;
3956 goto out1;
3961 va.va_mask = AT_ALL;
3962 vap = VOP_GETATTR(vp, &va, 0, cr, NULL) ? NULL : &va;
3964 error = VOP_PATHCONF(vp, _PC_LINK_MAX, &val, cr, NULL);
3965 if (error)
3966 goto out;
3967 resp->resok.info.link_max = (uint32)val;
3969 error = VOP_PATHCONF(vp, _PC_NAME_MAX, &val, cr, NULL);
3970 if (error)
3971 goto out;
3972 resp->resok.info.name_max = (uint32)val;
3974 error = VOP_PATHCONF(vp, _PC_NO_TRUNC, &val, cr, NULL);
3975 if (error)
3976 goto out;
3977 if (val == 1)
3978 resp->resok.info.no_trunc = TRUE;
3979 else
3980 resp->resok.info.no_trunc = FALSE;
3982 error = VOP_PATHCONF(vp, _PC_CHOWN_RESTRICTED, &val, cr, NULL);
3983 if (error)
3984 goto out;
3985 if (val == 1)
3986 resp->resok.info.chown_restricted = TRUE;
3987 else
3988 resp->resok.info.chown_restricted = FALSE;
3990 resp->status = NFS3_OK;
3991 vattr_to_post_op_attr(vap, &resp->resok.obj_attributes);
3992 resp->resok.info.case_insensitive = FALSE;
3993 resp->resok.info.case_preserving = TRUE;
3994 DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
3995 cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
3996 VN_RELE(vp);
3997 return;
3999 out:
4000 if (curthread->t_flag & T_WOULDBLOCK) {
4001 curthread->t_flag &= ~T_WOULDBLOCK;
4002 resp->status = NFS3ERR_JUKEBOX;
4003 } else
4004 resp->status = puterrno3(error);
4005 out1:
4006 DTRACE_NFSV3_4(op__pathconf__done, struct svc_req *, req,
4007 cred_t *, cr, vnode_t *, vp, PATHCONF3res *, resp);
4008 if (vp != NULL)
4009 VN_RELE(vp);
4010 vattr_to_post_op_attr(vap, &resp->resfail.obj_attributes);
4013 void *
4014 rfs3_pathconf_getfh(PATHCONF3args *args)
4017 return (&args->object);
4020 void
4021 rfs3_commit(COMMIT3args *args, COMMIT3res *resp, struct exportinfo *exi,
4022 struct svc_req *req, cred_t *cr)
4024 int error;
4025 vnode_t *vp;
4026 struct vattr *bvap;
4027 struct vattr bva;
4028 struct vattr *avap;
4029 struct vattr ava;
4031 bvap = NULL;
4032 avap = NULL;
4034 vp = nfs3_fhtovp(&args->file, exi);
4036 DTRACE_NFSV3_4(op__commit__start, struct svc_req *, req,
4037 cred_t *, cr, vnode_t *, vp, COMMIT3args *, args);
4039 if (vp == NULL) {
4040 error = ESTALE;
4041 goto out;
4044 bva.va_mask = AT_ALL;
4045 error = VOP_GETATTR(vp, &bva, 0, cr, NULL);
4048 * If we can't get the attributes, then we can't do the
4049 * right access checking. So, we'll fail the request.
4051 if (error)
4052 goto out;
4054 bvap = &bva;
4056 if (rdonly(exi, req)) {
4057 resp->status = NFS3ERR_ROFS;
4058 goto out1;
4061 if (vp->v_type != VREG) {
4062 resp->status = NFS3ERR_INVAL;
4063 goto out1;
4066 if (is_system_labeled()) {
4067 bslabel_t *clabel = req->rq_label;
4069 ASSERT(clabel != NULL);
4070 DTRACE_PROBE2(tx__rfs3__log__info__opcommit__clabel, char *,
4071 "got client label from request(1)", struct svc_req *, req);
4073 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4074 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4075 exi)) {
4076 resp->status = NFS3ERR_ACCES;
4077 goto out1;
4082 if (crgetuid(cr) != bva.va_uid &&
4083 (error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL)))
4084 goto out;
4086 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
4088 ava.va_mask = AT_ALL;
4089 avap = VOP_GETATTR(vp, &ava, 0, cr, NULL) ? NULL : &ava;
4091 if (error)
4092 goto out;
4094 resp->status = NFS3_OK;
4095 vattr_to_wcc_data(bvap, avap, &resp->resok.file_wcc);
4096 resp->resok.verf = write3verf;
4098 DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4099 cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4101 VN_RELE(vp);
4103 return;
4105 out:
4106 if (curthread->t_flag & T_WOULDBLOCK) {
4107 curthread->t_flag &= ~T_WOULDBLOCK;
4108 resp->status = NFS3ERR_JUKEBOX;
4109 } else
4110 resp->status = puterrno3(error);
4111 out1:
4112 DTRACE_NFSV3_4(op__commit__done, struct svc_req *, req,
4113 cred_t *, cr, vnode_t *, vp, COMMIT3res *, resp);
4115 if (vp != NULL)
4116 VN_RELE(vp);
4117 vattr_to_wcc_data(bvap, avap, &resp->resfail.file_wcc);
4120 void *
4121 rfs3_commit_getfh(COMMIT3args *args)
4124 return (&args->file);
4127 static int
4128 sattr3_to_vattr(sattr3 *sap, struct vattr *vap)
4131 vap->va_mask = 0;
4133 if (sap->mode.set_it) {
4134 vap->va_mode = (mode_t)sap->mode.mode;
4135 vap->va_mask |= AT_MODE;
4137 if (sap->uid.set_it) {
4138 vap->va_uid = (uid_t)sap->uid.uid;
4139 vap->va_mask |= AT_UID;
4141 if (sap->gid.set_it) {
4142 vap->va_gid = (gid_t)sap->gid.gid;
4143 vap->va_mask |= AT_GID;
4145 if (sap->size.set_it) {
4146 if (sap->size.size > (size3)((u_longlong_t)-1))
4147 return (EINVAL);
4148 vap->va_size = sap->size.size;
4149 vap->va_mask |= AT_SIZE;
4151 if (sap->atime.set_it == SET_TO_CLIENT_TIME) {
4152 #ifndef _LP64
4153 /* check time validity */
4154 if (!NFS3_TIME_OK(sap->atime.atime.seconds))
4155 return (EOVERFLOW);
4156 #endif
4158 * nfs protocol defines times as unsigned so don't extend sign,
4159 * unless sysadmin set nfs_allow_preepoch_time.
4161 NFS_TIME_T_CONVERT(vap->va_atime.tv_sec,
4162 sap->atime.atime.seconds);
4163 vap->va_atime.tv_nsec = (uint32_t)sap->atime.atime.nseconds;
4164 vap->va_mask |= AT_ATIME;
4165 } else if (sap->atime.set_it == SET_TO_SERVER_TIME) {
4166 gethrestime(&vap->va_atime);
4167 vap->va_mask |= AT_ATIME;
4169 if (sap->mtime.set_it == SET_TO_CLIENT_TIME) {
4170 #ifndef _LP64
4171 /* check time validity */
4172 if (!NFS3_TIME_OK(sap->mtime.mtime.seconds))
4173 return (EOVERFLOW);
4174 #endif
4176 * nfs protocol defines times as unsigned so don't extend sign,
4177 * unless sysadmin set nfs_allow_preepoch_time.
4179 NFS_TIME_T_CONVERT(vap->va_mtime.tv_sec,
4180 sap->mtime.mtime.seconds);
4181 vap->va_mtime.tv_nsec = (uint32_t)sap->mtime.mtime.nseconds;
4182 vap->va_mask |= AT_MTIME;
4183 } else if (sap->mtime.set_it == SET_TO_SERVER_TIME) {
4184 gethrestime(&vap->va_mtime);
4185 vap->va_mask |= AT_MTIME;
4188 return (0);
4191 static ftype3 vt_to_nf3[] = {
4192 0, NF3REG, NF3DIR, NF3BLK, NF3CHR, NF3LNK, NF3FIFO, 0, 0, NF3SOCK, 0
4195 static int
4196 vattr_to_fattr3(struct vattr *vap, fattr3 *fap)
4199 ASSERT(vap->va_type >= VNON && vap->va_type <= VBAD);
4200 /* Return error if time or size overflow */
4201 if (! (NFS_VAP_TIME_OK(vap) && NFS3_SIZE_OK(vap->va_size))) {
4202 return (EOVERFLOW);
4204 fap->type = vt_to_nf3[vap->va_type];
4205 fap->mode = (mode3)(vap->va_mode & MODEMASK);
4206 fap->nlink = (uint32)vap->va_nlink;
4207 if (vap->va_uid == UID_NOBODY)
4208 fap->uid = (uid3)NFS_UID_NOBODY;
4209 else
4210 fap->uid = (uid3)vap->va_uid;
4211 if (vap->va_gid == GID_NOBODY)
4212 fap->gid = (gid3)NFS_GID_NOBODY;
4213 else
4214 fap->gid = (gid3)vap->va_gid;
4215 fap->size = (size3)vap->va_size;
4216 fap->used = (size3)DEV_BSIZE * (size3)vap->va_nblocks;
4217 fap->rdev.specdata1 = (uint32)getmajor(vap->va_rdev);
4218 fap->rdev.specdata2 = (uint32)getminor(vap->va_rdev);
4219 fap->fsid = (uint64)vap->va_fsid;
4220 fap->fileid = (fileid3)vap->va_nodeid;
4221 fap->atime.seconds = vap->va_atime.tv_sec;
4222 fap->atime.nseconds = vap->va_atime.tv_nsec;
4223 fap->mtime.seconds = vap->va_mtime.tv_sec;
4224 fap->mtime.nseconds = vap->va_mtime.tv_nsec;
4225 fap->ctime.seconds = vap->va_ctime.tv_sec;
4226 fap->ctime.nseconds = vap->va_ctime.tv_nsec;
4227 return (0);
4230 static int
4231 vattr_to_wcc_attr(struct vattr *vap, wcc_attr *wccap)
4234 /* Return error if time or size overflow */
4235 if (!(NFS_TIME_T_OK(vap->va_mtime.tv_sec) &&
4236 NFS_TIME_T_OK(vap->va_ctime.tv_sec) &&
4237 NFS3_SIZE_OK(vap->va_size))) {
4238 return (EOVERFLOW);
4240 wccap->size = (size3)vap->va_size;
4241 wccap->mtime.seconds = vap->va_mtime.tv_sec;
4242 wccap->mtime.nseconds = vap->va_mtime.tv_nsec;
4243 wccap->ctime.seconds = vap->va_ctime.tv_sec;
4244 wccap->ctime.nseconds = vap->va_ctime.tv_nsec;
4245 return (0);
4248 static void
4249 vattr_to_pre_op_attr(struct vattr *vap, pre_op_attr *poap)
4252 /* don't return attrs if time overflow */
4253 if ((vap != NULL) && !vattr_to_wcc_attr(vap, &poap->attr)) {
4254 poap->attributes = TRUE;
4255 } else
4256 poap->attributes = FALSE;
4259 void
4260 vattr_to_post_op_attr(struct vattr *vap, post_op_attr *poap)
4263 /* don't return attrs if time overflow */
4264 if ((vap != NULL) && !vattr_to_fattr3(vap, &poap->attr)) {
4265 poap->attributes = TRUE;
4266 } else
4267 poap->attributes = FALSE;
4270 static void
4271 vattr_to_wcc_data(struct vattr *bvap, struct vattr *avap, wcc_data *wccp)
4274 vattr_to_pre_op_attr(bvap, &wccp->before);
4275 vattr_to_post_op_attr(avap, &wccp->after);
4278 void
4279 rfs3_srvrinit(void)
4281 struct rfs3_verf_overlay {
4282 uint_t id; /* a "unique" identifier */
4283 int ts; /* a unique timestamp */
4284 } *verfp;
4285 timestruc_t now;
4288 * The following algorithm attempts to find a unique verifier
4289 * to be used as the write verifier returned from the server
4290 * to the client. It is important that this verifier change
4291 * whenever the server reboots. Of secondary importance, it
4292 * is important for the verifier to be unique between two
4293 * different servers.
4295 * Thus, an attempt is made to use the system hostid and the
4296 * current time in seconds when the nfssrv kernel module is
4297 * loaded. It is assumed that an NFS server will not be able
4298 * to boot and then to reboot in less than a second. If the
4299 * hostid has not been set, then the current high resolution
4300 * time is used. This will ensure different verifiers each
4301 * time the server reboots and minimize the chances that two
4302 * different servers will have the same verifier.
4305 #ifndef lint
4307 * We ASSERT that this constant logic expression is
4308 * always true because in the past, it wasn't.
4310 ASSERT(sizeof (*verfp) <= sizeof (write3verf));
4311 #endif
4313 gethrestime(&now);
4314 verfp = (struct rfs3_verf_overlay *)&write3verf;
4315 verfp->ts = (int)now.tv_sec;
4316 verfp->id = zone_get_hostid(NULL);
4318 if (verfp->id == 0)
4319 verfp->id = (uint_t)now.tv_nsec;
4321 nfs3_srv_caller_id = fs_new_caller_id();
4325 static int
4326 rdma_setup_read_data3(READ3args *args, READ3resok *rok)
4328 struct clist *wcl;
4329 int wlist_len;
4330 count3 count = rok->count;
4332 wcl = args->wlist;
4333 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
4334 return (FALSE);
4337 wcl = args->wlist;
4338 rok->wlist_len = wlist_len;
4339 rok->wlist = wcl;
4340 return (TRUE);
4343 void
4344 rfs3_srvrfini(void)
4346 /* Nothing to do */