4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
29 #include <sys/param.h>
30 #include <sys/types.h>
31 #include <sys/systm.h>
34 #include <sys/vnode.h>
35 #include <sys/pathname.h>
36 #include <sys/sysmacros.h>
38 #include <sys/kstat.h>
39 #include <sys/mkdev.h>
40 #include <sys/mount.h>
41 #include <sys/statvfs.h>
42 #include <sys/errno.h>
43 #include <sys/debug.h>
44 #include <sys/cmn_err.h>
45 #include <sys/utsname.h>
46 #include <sys/bootconf.h>
47 #include <sys/modctl.h>
49 #include <sys/flock.h>
51 #include <sys/stropts.h>
52 #include <sys/strsubr.h>
53 #include <sys/atomic.h>
55 #include <sys/policy.h>
59 #include <rpc/types.h>
61 #include <rpc/rpcsec_gss.h>
66 #include <nfs/nfs_clnt.h>
67 #include <nfs/mount.h>
68 #include <nfs/nfs_acl.h>
70 #include <sys/fs_subr.h>
73 #include <nfs/rnode4.h>
74 #include <nfs/nfs4_clnt.h>
75 #include <nfs/nfssys.h>
79 * These are "special" state IDs and file handles that
80 * match any delegation state ID or file handled. This
81 * is for testing purposes only.
84 stateid4 nfs4_deleg_any
= { 0x7FFFFFF0 };
85 char nfs4_deleg_fh
[] = "\0377\0376\0375\0374";
86 nfs_fh4 nfs4_deleg_anyfh
= { sizeof (nfs4_deleg_fh
)-1, nfs4_deleg_fh
};
87 nfsstat4 cb4_getattr_fail
= NFS4_OK
;
88 nfsstat4 cb4_recall_fail
= NFS4_OK
;
90 int nfs4_callback_debug
;
91 int nfs4_recall_debug
;
96 #define CB_NOTE(x) NFS4_DEBUG(nfs4_callback_debug, (CE_NOTE, x))
97 #define CB_WARN(x) NFS4_DEBUG(nfs4_callback_debug, (CE_WARN, x))
98 #define CB_WARN1(x, y) NFS4_DEBUG(nfs4_callback_debug, (CE_WARN, x, y))
100 enum nfs4_delegreturn_policy nfs4_delegreturn_policy
= INACTIVE
;
102 static zone_key_t nfs4_callback_zone_key
;
105 * NFS4_MAPSIZE is the number of bytes we are willing to consume
106 * for the block allocation map when the server grants a NFS_LIMIT_BLOCK
110 #define NFS4_MAPSIZE 8192
111 #define NFS4_MAPWORDS NFS4_MAPSIZE/sizeof (uint_t)
112 #define NbPW (NBBY*sizeof (uint_t))
114 static int nfs4_num_prognums
= 1024;
115 static SVC_CALLOUT_TABLE nfs4_cb_sct
;
120 int flags
; /* Flags for nfs4delegreturn_impl() */
123 static const struct nfs4_callback_stats nfs4_callback_stats_tmpl
= {
124 { "delegations", KSTAT_DATA_UINT64
},
125 { "cb_getattr", KSTAT_DATA_UINT64
},
126 { "cb_recall", KSTAT_DATA_UINT64
},
127 { "cb_null", KSTAT_DATA_UINT64
},
128 { "cb_dispatch", KSTAT_DATA_UINT64
},
129 { "delegaccept_r", KSTAT_DATA_UINT64
},
130 { "delegaccept_rw", KSTAT_DATA_UINT64
},
131 { "delegreturn", KSTAT_DATA_UINT64
},
132 { "callbacks", KSTAT_DATA_UINT64
},
133 { "claim_cur", KSTAT_DATA_UINT64
},
134 { "claim_cur_ok", KSTAT_DATA_UINT64
},
135 { "recall_trunc", KSTAT_DATA_UINT64
},
136 { "recall_failed", KSTAT_DATA_UINT64
},
137 { "return_limit_write", KSTAT_DATA_UINT64
},
138 { "return_limit_addmap", KSTAT_DATA_UINT64
},
139 { "deleg_recover", KSTAT_DATA_UINT64
},
140 { "cb_illegal", KSTAT_DATA_UINT64
}
143 struct nfs4_cb_port
{
144 list_node_t linkage
; /* linkage into per-zone port list */
145 char netid
[KNC_STRSIZE
];
146 char uaddr
[KNC_STRSIZE
];
147 char protofmly
[KNC_STRSIZE
];
148 char proto
[KNC_STRSIZE
];
151 static int cb_getattr_bytes
;
153 struct cb_recall_pass
{
155 int flags
; /* Flags for nfs4delegreturn_impl() */
159 static nfs4_open_stream_t
*get_next_deleg_stream(rnode4_t
*, int);
160 static void nfs4delegreturn_thread(struct cb_recall_pass
*);
161 static int deleg_reopen(vnode_t
*, bool_t
*, struct nfs4_callback_globals
*,
163 static void nfs4_dlistadd(rnode4_t
*, struct nfs4_callback_globals
*, int);
164 static void nfs4_dlistclean_impl(struct nfs4_callback_globals
*, int);
165 static int nfs4delegreturn_impl(rnode4_t
*, int,
166 struct nfs4_callback_globals
*);
167 static void nfs4delegreturn_cleanup_impl(rnode4_t
*, nfs4_server_t
*,
168 struct nfs4_callback_globals
*);
171 cb_getattr(nfs_cb_argop4
*argop
, nfs_cb_resop4
*resop
, struct svc_req
*req
,
172 struct compound_state
*cs
, struct nfs4_callback_globals
*ncg
)
174 CB_GETATTR4args
*args
= &argop
->nfs_cb_argop4_u
.opcbgetattr
;
175 CB_GETATTR4res
*resp
= &resop
->nfs_cb_resop4_u
.opcbgetattr
;
178 bool_t found
= FALSE
;
179 struct nfs4_server
*sp
;
183 fattr4_change change
;
187 ncg
->nfs4_callback_stats
.cb_getattr
.value
.ui64
++;
191 * error injection hook: set cb_getattr_fail global to
192 * NFS4 pcol error to be returned
194 if (cb4_getattr_fail
!= NFS4_OK
) {
195 *cs
->statusp
= resp
->status
= cb4_getattr_fail
;
200 resp
->obj_attributes
.attrmask
= 0;
202 mutex_enter(&ncg
->nfs4_cb_lock
);
203 sp
= ncg
->nfs4prog2server
[req
->rq_prog
- NFS4_CALLBACK
];
204 mutex_exit(&ncg
->nfs4_cb_lock
);
206 if (nfs4_server_vlock(sp
, 0) == FALSE
) {
208 CB_WARN("cb_getattr: cannot find server\n");
210 *cs
->statusp
= resp
->status
= NFS4ERR_BADHANDLE
;
215 * In cb_compound, callback_ident was validated against rq_prog,
216 * but we couldn't verify that it was set to the value we provided
217 * at setclientid time (because we didn't have server struct yet).
218 * Now we have the server struct, but don't have callback_ident
219 * handy. So, validate server struct program number against req
220 * RPC's prog number. At this point, we know the RPC prog num
221 * is valid (else we wouldn't be here); however, we don't know
222 * that it was the prog number we supplied to this server at
223 * setclientid time. If the prog numbers aren't equivalent, then
224 * log the problem and fail the request because either cbserv
225 * and/or cbclient are confused. This will probably never happen.
227 if (sp
->s_program
!= req
->rq_prog
) {
229 zcmn_err(getzoneid(), CE_WARN
,
230 "cb_getattr: wrong server program number srv=%d req=%d\n",
231 sp
->s_program
, req
->rq_prog
);
233 zcmn_err(getzoneid(), CE_WARN
,
234 "cb_getattr: wrong server program number\n");
236 mutex_exit(&sp
->s_lock
);
237 nfs4_server_rele(sp
);
238 *cs
->statusp
= resp
->status
= NFS4ERR_BADHANDLE
;
243 * Search the delegation list for a matching file handle;
244 * mutex on sp prevents the list from changing.
247 rp
= list_head(&sp
->s_deleg_list
);
248 for (; rp
!= NULL
; rp
= list_next(&sp
->s_deleg_list
, rp
)) {
249 nfs4_fhandle_t fhandle
;
251 sfh4_copyval(rp
->r_fh
, &fhandle
);
253 if ((fhandle
.fh_len
== args
->fh
.nfs_fh4_len
&&
254 bcmp(fhandle
.fh_buf
, args
->fh
.nfs_fh4_val
,
255 fhandle
.fh_len
) == 0)) {
261 if (nfs4_deleg_anyfh
.nfs_fh4_len
== args
->fh
.nfs_fh4_len
&&
262 bcmp(nfs4_deleg_anyfh
.nfs_fh4_val
, args
->fh
.nfs_fh4_val
,
263 args
->fh
.nfs_fh4_len
) == 0) {
272 * VN_HOLD the vnode before releasing s_lock to guarantee
273 * we have a valid vnode reference.
280 mutex_exit(&sp
->s_lock
);
281 nfs4_server_rele(sp
);
283 if (found
== FALSE
) {
285 CB_WARN("cb_getattr: bad fhandle\n");
287 *cs
->statusp
= resp
->status
= NFS4ERR_BADHANDLE
;
292 * Figure out which attributes the server wants. We only
293 * offer FATTR4_CHANGE & FATTR4_SIZE; ignore the rest.
295 fdata
= kmem_alloc(cb_getattr_bytes
, KM_SLEEP
);
298 * Don't actually need to create XDR to encode these
299 * simple data structures.
300 * xdrmem_create(&xdr, fdata, cb_getattr_bytes, XDR_ENCODE);
302 fap
= &resp
->obj_attributes
;
305 /* attrlist4_len starts at 0 and increases as attrs are processed */
306 fap
->attrlist4
= (char *)fdata
;
307 fap
->attrlist4_len
= 0;
309 /* don't supply attrs if request was zero */
310 if (args
->attr_request
!= 0) {
311 if (args
->attr_request
& FATTR4_CHANGE_MASK
) {
313 * If the file is mmapped, then increment the change
314 * attribute and return it. This will guarantee that
315 * the server will perceive that the file has changed
316 * if there is any chance that the client application
317 * has changed it. Otherwise, just return the change
318 * attribute as it has been updated by nfs4write_deleg.
321 mutex_enter(&rp
->r_statelock
);
322 mapcnt
= rp
->r_mapcnt
;
324 mutex_exit(&rp
->r_statelock
);
326 mutex_enter(&rp
->r_statev4_lock
);
328 * If object mapped, then always return new change.
329 * Otherwise, return change if object has dirty
330 * pages. If object doesn't have any dirty pages,
331 * then all changes have been pushed to server, so
332 * reset change to grant change.
335 rp
->r_deleg_change
++;
336 else if (! (rflag
& R4DIRTY
))
337 rp
->r_deleg_change
= rp
->r_deleg_change_grant
;
338 change
= rp
->r_deleg_change
;
339 mutex_exit(&rp
->r_statev4_lock
);
342 * Use inline XDR code directly, we know that we
343 * going to a memory buffer and it has enough
344 * space so it cannot fail.
346 IXDR_PUT_U_HYPER(fdata
, change
);
347 fap
->attrlist4_len
+= 2 * BYTES_PER_XDR_UNIT
;
348 fap
->attrmask
|= FATTR4_CHANGE_MASK
;
351 if (args
->attr_request
& FATTR4_SIZE_MASK
) {
353 * Use an atomic add of 0 to fetch a consistent view
354 * of r_size; this avoids having to take rw_lock
355 * which could cause a deadlock.
357 size
= atomic_add_64_nv((uint64_t *)&rp
->r_size
, 0);
360 * Use inline XDR code directly, we know that we
361 * going to a memory buffer and it has enough
362 * space so it cannot fail.
364 IXDR_PUT_U_HYPER(fdata
, size
);
365 fap
->attrlist4_len
+= 2 * BYTES_PER_XDR_UNIT
;
366 fap
->attrmask
|= FATTR4_SIZE_MASK
;
372 *cs
->statusp
= resp
->status
= NFS4_OK
;
376 cb_getattr_free(nfs_cb_resop4
*resop
)
378 if (resop
->nfs_cb_resop4_u
.opcbgetattr
.obj_attributes
.attrlist4
)
379 kmem_free(resop
->nfs_cb_resop4_u
.opcbgetattr
.
380 obj_attributes
.attrlist4
, cb_getattr_bytes
);
384 cb_recall(nfs_cb_argop4
*argop
, nfs_cb_resop4
*resop
, struct svc_req
*req
,
385 struct compound_state
*cs
, struct nfs4_callback_globals
*ncg
)
387 CB_RECALL4args
* args
= &argop
->nfs_cb_argop4_u
.opcbrecall
;
388 CB_RECALL4res
*resp
= &resop
->nfs_cb_resop4_u
.opcbrecall
;
391 struct nfs4_server
*sp
;
392 bool_t found
= FALSE
;
394 ncg
->nfs4_callback_stats
.cb_recall
.value
.ui64
++;
396 ASSERT(req
->rq_prog
>= NFS4_CALLBACK
);
397 ASSERT(req
->rq_prog
< NFS4_CALLBACK
+nfs4_num_prognums
);
401 * error injection hook: set cb_recall_fail global to
402 * NFS4 pcol error to be returned
404 if (cb4_recall_fail
!= NFS4_OK
) {
405 *cs
->statusp
= resp
->status
= cb4_recall_fail
;
410 mutex_enter(&ncg
->nfs4_cb_lock
);
411 sp
= ncg
->nfs4prog2server
[req
->rq_prog
- NFS4_CALLBACK
];
412 mutex_exit(&ncg
->nfs4_cb_lock
);
414 if (nfs4_server_vlock(sp
, 0) == FALSE
) {
416 CB_WARN("cb_recall: cannot find server\n");
418 *cs
->statusp
= resp
->status
= NFS4ERR_BADHANDLE
;
423 * Search the delegation list for a matching file handle
424 * AND stateid; mutex on sp prevents the list from changing.
427 rp
= list_head(&sp
->s_deleg_list
);
428 for (; rp
!= NULL
; rp
= list_next(&sp
->s_deleg_list
, rp
)) {
429 mutex_enter(&rp
->r_statev4_lock
);
431 /* check both state id and file handle! */
433 if ((bcmp(&rp
->r_deleg_stateid
, &args
->stateid
,
434 sizeof (stateid4
)) == 0)) {
435 nfs4_fhandle_t fhandle
;
437 sfh4_copyval(rp
->r_fh
, &fhandle
);
438 if ((fhandle
.fh_len
== args
->fh
.nfs_fh4_len
&&
439 bcmp(fhandle
.fh_buf
, args
->fh
.nfs_fh4_val
,
440 fhandle
.fh_len
) == 0)) {
446 CB_WARN("cb_recall: stateid OK, bad fh");
451 if (bcmp(&args
->stateid
, &nfs4_deleg_any
,
452 sizeof (stateid4
)) == 0) {
458 mutex_exit(&rp
->r_statev4_lock
);
462 * VN_HOLD the vnode before releasing s_lock to guarantee
463 * we have a valid vnode reference. The async thread will
464 * release the hold when it's done.
467 mutex_exit(&rp
->r_statev4_lock
);
471 mutex_exit(&sp
->s_lock
);
472 nfs4_server_rele(sp
);
474 if (found
== FALSE
) {
476 CB_WARN("cb_recall: bad stateid\n");
478 *cs
->statusp
= resp
->status
= NFS4ERR_BAD_STATEID
;
482 /* Fire up a thread to do the delegreturn */
483 nfs4delegreturn_async(rp
, NFS4_DR_RECALL
|NFS4_DR_REOPEN
,
486 *cs
->statusp
= resp
->status
= 0;
491 cb_recall_free(nfs_cb_resop4
*resop
)
493 /* nothing to do here, cb_recall doesn't kmem_alloc */
497 * This function handles the CB_NULL proc call from an NFSv4 Server.
499 * We take note that the server has sent a CB_NULL for later processing
500 * in the recovery logic. It is noted so we may pause slightly after the
501 * setclientid and before reopening files. The pause is to allow the
502 * NFSv4 Server time to receive the CB_NULL reply and adjust any of
503 * its internal structures such that it has the opportunity to grant
504 * delegations to reopened files.
510 cb_null(CB_COMPOUND4args
*args
, CB_COMPOUND4res
*resp
, struct svc_req
*req
,
511 struct nfs4_callback_globals
*ncg
)
513 struct nfs4_server
*sp
;
515 ncg
->nfs4_callback_stats
.cb_null
.value
.ui64
++;
517 ASSERT(req
->rq_prog
>= NFS4_CALLBACK
);
518 ASSERT(req
->rq_prog
< NFS4_CALLBACK
+nfs4_num_prognums
);
520 mutex_enter(&ncg
->nfs4_cb_lock
);
521 sp
= ncg
->nfs4prog2server
[req
->rq_prog
- NFS4_CALLBACK
];
522 mutex_exit(&ncg
->nfs4_cb_lock
);
524 if (nfs4_server_vlock(sp
, 0) != FALSE
) {
525 sp
->s_flags
|= N4S_CB_PINGED
;
526 cv_broadcast(&sp
->wait_cb_null
);
527 mutex_exit(&sp
->s_lock
);
528 nfs4_server_rele(sp
);
533 * cb_illegal args: void
534 * res : status (NFS4ERR_OP_CB_ILLEGAL)
538 cb_illegal(nfs_cb_argop4
*argop
, nfs_cb_resop4
*resop
, struct svc_req
*req
,
539 struct compound_state
*cs
, struct nfs4_callback_globals
*ncg
)
541 CB_ILLEGAL4res
*resp
= &resop
->nfs_cb_resop4_u
.opcbillegal
;
543 ncg
->nfs4_callback_stats
.cb_illegal
.value
.ui64
++;
544 resop
->resop
= OP_CB_ILLEGAL
;
545 *cs
->statusp
= resp
->status
= NFS4ERR_OP_ILLEGAL
;
549 cb_compound(CB_COMPOUND4args
*args
, CB_COMPOUND4res
*resp
, struct svc_req
*req
,
550 struct nfs4_callback_globals
*ncg
)
553 struct compound_state cs
;
554 nfs_cb_argop4
*argop
;
555 nfs_cb_resop4
*resop
, *new_res
;
558 bzero(&cs
, sizeof (cs
));
559 cs
.statusp
= &resp
->status
;
563 * Form a reply tag by copying over the reqeuest tag.
565 resp
->tag
.utf8string_len
= args
->tag
.utf8string_len
;
566 resp
->tag
.utf8string_val
= kmem_alloc(resp
->tag
.utf8string_len
,
568 bcopy(args
->tag
.utf8string_val
, resp
->tag
.utf8string_val
,
569 args
->tag
.utf8string_len
);
572 * XXX for now, minorversion should be zero
574 if (args
->minorversion
!= CB4_MINORVERSION
) {
577 resp
->status
= NFS4ERR_MINOR_VERS_MISMATCH
;
583 * Verify callback_ident. It doesn't really matter if it's wrong
584 * because we don't really use callback_ident -- we use prog number
585 * of the RPC request instead. In this case, just print a DEBUG
586 * console message to reveal brokenness of cbclient (at bkoff/cthon).
588 if (args
->callback_ident
!= req
->rq_prog
)
589 zcmn_err(getzoneid(), CE_WARN
,
590 "cb_compound: cb_client using wrong "
591 "callback_ident(%d), should be %d",
592 args
->callback_ident
, req
->rq_prog
);
595 resp
->array_len
= args
->array_len
;
596 resp
->array
= kmem_zalloc(args
->array_len
* sizeof (nfs_cb_resop4
),
599 for (i
= 0; i
< args
->array_len
&& cs
.cont
; i
++) {
601 argop
= &args
->array
[i
];
602 resop
= &resp
->array
[i
];
603 resop
->resop
= argop
->argop
;
604 op
= (uint_t
)resop
->resop
;
610 cb_getattr(argop
, resop
, req
, &cs
, ncg
);
615 cb_recall(argop
, resop
, req
, &cs
, ncg
);
624 * Handle OP_CB_ILLEGAL and any undefined opcode.
625 * Currently, the XDR code will return BADXDR
626 * if cb op doesn't decode to legal value, so
627 * it really only handles OP_CB_ILLEGAL.
630 cb_illegal(argop
, resop
, req
, &cs
, ncg
);
633 if (*cs
.statusp
!= NFS4_OK
)
637 * If not at last op, and if we are to stop, then
638 * compact the results array.
640 if ((i
+ 1) < args
->array_len
&& !cs
.cont
) {
642 new_res
= kmem_alloc(
643 (i
+1) * sizeof (nfs_cb_resop4
), KM_SLEEP
);
645 new_res
, (i
+1) * sizeof (nfs_cb_resop4
));
646 kmem_free(resp
->array
,
647 args
->array_len
* sizeof (nfs_cb_resop4
));
649 resp
->array_len
= i
+ 1;
650 resp
->array
= new_res
;
657 cb_compound_free(CB_COMPOUND4res
*resp
)
660 nfs_cb_resop4
*resop
;
662 if (resp
->tag
.utf8string_val
) {
663 UTF8STRING_FREE(resp
->tag
)
666 for (i
= 0; i
< resp
->array_len
; i
++) {
668 resop
= &resp
->array
[i
];
669 op
= (uint_t
)resop
->resop
;
675 cb_getattr_free(resop
);
680 cb_recall_free(resop
);
688 if (resp
->array
!= NULL
) {
689 kmem_free(resp
->array
,
690 resp
->array_len
* sizeof (nfs_cb_resop4
));
695 cb_dispatch(struct svc_req
*req
, SVCXPRT
*xprt
)
697 CB_COMPOUND4args args
;
699 struct nfs4_callback_globals
*ncg
;
701 bool_t (*xdr_args
)(), (*xdr_res
)();
702 void (*proc
)(CB_COMPOUND4args
*, CB_COMPOUND4res
*, struct svc_req
*,
703 struct nfs4_callback_globals
*);
704 void (*freeproc
)(CB_COMPOUND4res
*);
706 ncg
= zone_getspecific(nfs4_callback_zone_key
, nfs_zone());
709 ncg
->nfs4_callback_stats
.cb_dispatch
.value
.ui64
++;
711 switch (req
->rq_proc
) {
720 xdr_args
= xdr_CB_COMPOUND4args_clnt
;
721 xdr_res
= xdr_CB_COMPOUND4res
;
723 freeproc
= cb_compound_free
;
727 CB_WARN("cb_dispatch: no proc\n");
732 args
.tag
.utf8string_val
= NULL
;
735 if (!SVC_GETARGS(xprt
, xdr_args
, (caddr_t
)&args
)) {
737 CB_WARN("cb_dispatch: cannot getargs\n");
742 (*proc
)(&args
, &res
, req
, ncg
);
744 if (svc_sendreply(xprt
, xdr_res
, (caddr_t
)&res
) == FALSE
) {
746 CB_WARN("cb_dispatch: bad sendreply\n");
747 svcerr_systemerr(xprt
);
753 if (!SVC_FREEARGS(xprt
, xdr_args
, (caddr_t
)&args
)) {
755 CB_WARN("cb_dispatch: bad freeargs\n");
760 nfs4_getnextprogram(struct nfs4_callback_globals
*ncg
)
764 j
= ncg
->nfs4_program_hint
;
765 for (i
= 0; i
< nfs4_num_prognums
; i
++, j
++) {
767 if (j
>= nfs4_num_prognums
)
770 if (ncg
->nfs4prog2server
[j
] == NULL
) {
771 ncg
->nfs4_program_hint
= j
+1;
772 return (j
+NFS4_CALLBACK
);
780 nfs4callback_destroy(nfs4_server_t
*np
)
782 struct nfs4_callback_globals
*ncg
;
785 if (np
->s_program
== 0)
788 ncg
= np
->zone_globals
;
789 i
= np
->s_program
- NFS4_CALLBACK
;
791 mutex_enter(&ncg
->nfs4_cb_lock
);
793 ASSERT(ncg
->nfs4prog2server
[i
] == np
);
795 ncg
->nfs4prog2server
[i
] = NULL
;
797 if (i
< ncg
->nfs4_program_hint
)
798 ncg
->nfs4_program_hint
= i
;
800 mutex_exit(&ncg
->nfs4_cb_lock
);
804 * nfs4_setport - This function saves a netid and univeral address for
805 * the callback program. These values will be used during setclientid.
808 nfs4_setport(char *netid
, char *uaddr
, char *protofmly
, char *proto
,
809 struct nfs4_callback_globals
*ncg
)
811 struct nfs4_cb_port
*p
;
812 bool_t found
= FALSE
;
814 ASSERT(MUTEX_HELD(&ncg
->nfs4_cb_lock
));
816 p
= list_head(&ncg
->nfs4_cb_ports
);
817 for (; p
!= NULL
; p
= list_next(&ncg
->nfs4_cb_ports
, p
)) {
818 if (strcmp(p
->netid
, netid
) == 0) {
824 (void) strcpy(p
->uaddr
, uaddr
);
826 p
= kmem_alloc(sizeof (*p
), KM_SLEEP
);
828 (void) strcpy(p
->uaddr
, uaddr
);
829 (void) strcpy(p
->netid
, netid
);
830 (void) strcpy(p
->protofmly
, protofmly
);
831 (void) strcpy(p
->proto
, proto
);
832 list_insert_head(&ncg
->nfs4_cb_ports
, p
);
837 * nfs4_cb_args - This function is used to construct the callback
838 * portion of the arguments needed for setclientid.
842 nfs4_cb_args(nfs4_server_t
*np
, struct knetconfig
*knc
, SETCLIENTID4args
*args
)
844 struct nfs4_cb_port
*p
;
845 bool_t found
= FALSE
;
847 struct nfs4_callback_globals
*ncg
= np
->zone_globals
;
850 * This server structure may already have a program number
851 * assigned to it. This happens when the client has to
852 * re-issue SETCLIENTID. Just re-use the information.
854 if (np
->s_program
>= NFS4_CALLBACK
&&
855 np
->s_program
< NFS4_CALLBACK
+ nfs4_num_prognums
)
856 nfs4callback_destroy(np
);
858 mutex_enter(&ncg
->nfs4_cb_lock
);
860 p
= list_head(&ncg
->nfs4_cb_ports
);
861 for (; p
!= NULL
; p
= list_next(&ncg
->nfs4_cb_ports
, p
)) {
862 if (strcmp(p
->protofmly
, knc
->knc_protofmly
) == 0 &&
863 strcmp(p
->proto
, knc
->knc_proto
) == 0) {
869 if (found
== FALSE
) {
871 NFS4_DEBUG(nfs4_callback_debug
,
872 (CE_WARN
, "nfs4_cb_args: could not find netid for %s/%s\n",
873 knc
->knc_protofmly
, knc
->knc_proto
));
875 args
->callback
.cb_program
= 0;
876 args
->callback
.cb_location
.r_netid
= NULL
;
877 args
->callback
.cb_location
.r_addr
= NULL
;
878 args
->callback_ident
= 0;
879 mutex_exit(&ncg
->nfs4_cb_lock
);
883 if ((pgm
= nfs4_getnextprogram(ncg
)) == 0) {
884 CB_WARN("nfs4_cb_args: out of program numbers\n");
886 args
->callback
.cb_program
= 0;
887 args
->callback
.cb_location
.r_netid
= NULL
;
888 args
->callback
.cb_location
.r_addr
= NULL
;
889 args
->callback_ident
= 0;
890 mutex_exit(&ncg
->nfs4_cb_lock
);
894 ncg
->nfs4prog2server
[pgm
-NFS4_CALLBACK
] = np
;
895 args
->callback
.cb_program
= pgm
;
896 args
->callback
.cb_location
.r_netid
= p
->netid
;
897 args
->callback
.cb_location
.r_addr
= p
->uaddr
;
898 args
->callback_ident
= pgm
;
902 mutex_exit(&ncg
->nfs4_cb_lock
);
906 nfs4_dquery(struct nfs4_svc_args
*arg
, model_t model
)
912 STRUCT_HANDLE(nfs4_svc_args
, uap
);
914 STRUCT_SET_HANDLE(uap
, model
, arg
);
916 if ((fp
= getf(STRUCT_FGET(uap
, fd
))) == NULL
)
921 if (vp
== NULL
|| vp
->v_type
!= VREG
||
922 !vn_matchops(vp
, &nfs4_vnodeops
)) {
923 releasef(STRUCT_FGET(uap
, fd
));
930 * I can't convince myself that we need locking here. The
931 * rnode cannot disappear and the value returned is instantly
932 * stale anway, so why bother?
935 error
= suword32(STRUCT_FGETP(uap
, netid
), rp
->r_deleg_type
);
936 releasef(STRUCT_FGET(uap
, fd
));
942 * NFS4 client system call. This service does the
943 * necessary initialization for the callback program.
944 * This is fashioned after the server side interaction
945 * between nfsd and the kernel. On the client, the
946 * mount command forks and the child process does the
947 * necessary interaction with the kernel.
949 * uap->fd is the fd of an open transport provider
952 nfs4_svc(struct nfs4_svc_args
*arg
, model_t model
)
957 char buf
[KNC_STRSIZE
], uaddr
[KNC_STRSIZE
];
958 char protofmly
[KNC_STRSIZE
], proto
[KNC_STRSIZE
];
960 STRUCT_HANDLE(nfs4_svc_args
, uap
);
961 struct netbuf addrmask
;
963 SVCMASTERXPRT
*cb_xprt
;
964 struct nfs4_callback_globals
*ncg
;
967 STRUCT_SET_HANDLE(uap
, model
, arg
);
969 if (STRUCT_FGET(uap
, cmd
) == NFS4_DQUERY
)
970 return (nfs4_dquery(arg
, model
));
972 if (secpolicy_nfs(CRED()) != 0)
975 if ((fp
= getf(STRUCT_FGET(uap
, fd
))) == NULL
)
979 * Set read buffer size to rsize
980 * and add room for RPC headers.
982 readsize
= nfs3tsize() + (RPC_MAXDATASIZE
- NFS_MAXDATA
);
983 if (readsize
< RPC_MAXDATASIZE
)
984 readsize
= RPC_MAXDATASIZE
;
986 error
= copyinstr((const char *)STRUCT_FGETP(uap
, netid
), buf
,
989 releasef(STRUCT_FGET(uap
, fd
));
993 cmd
= STRUCT_FGET(uap
, cmd
);
995 if (cmd
& NFS4_KRPC_START
) {
996 addrmask
.len
= STRUCT_FGET(uap
, addrmask
.len
);
997 addrmask
.maxlen
= STRUCT_FGET(uap
, addrmask
.maxlen
);
998 addrmask
.buf
= kmem_alloc(addrmask
.maxlen
, KM_SLEEP
);
999 error
= copyin(STRUCT_FGETP(uap
, addrmask
.buf
), addrmask
.buf
,
1002 releasef(STRUCT_FGET(uap
, fd
));
1003 kmem_free(addrmask
.buf
, addrmask
.maxlen
);
1008 addrmask
.buf
= NULL
;
1010 error
= copyinstr((const char *)STRUCT_FGETP(uap
, addr
), uaddr
,
1011 sizeof (uaddr
), &len
);
1013 releasef(STRUCT_FGET(uap
, fd
));
1015 kmem_free(addrmask
.buf
, addrmask
.maxlen
);
1019 error
= copyinstr((const char *)STRUCT_FGETP(uap
, protofmly
), protofmly
,
1020 sizeof (protofmly
), &len
);
1022 releasef(STRUCT_FGET(uap
, fd
));
1024 kmem_free(addrmask
.buf
, addrmask
.maxlen
);
1028 error
= copyinstr((const char *)STRUCT_FGETP(uap
, proto
), proto
,
1029 sizeof (proto
), &len
);
1031 releasef(STRUCT_FGET(uap
, fd
));
1033 kmem_free(addrmask
.buf
, addrmask
.maxlen
);
1037 ncg
= zone_getspecific(nfs4_callback_zone_key
, nfs_zone());
1038 ASSERT(ncg
!= NULL
);
1040 mutex_enter(&ncg
->nfs4_cb_lock
);
1041 if (cmd
& NFS4_SETPORT
)
1042 nfs4_setport(buf
, uaddr
, protofmly
, proto
, ncg
);
1044 if (cmd
& NFS4_KRPC_START
) {
1045 error
= svc_tli_kcreate(fp
, readsize
, buf
, &addrmask
, &cb_xprt
,
1046 &nfs4_cb_sct
, NULL
, NFS_CB_SVCPOOL_ID
, FALSE
);
1048 CB_WARN1("nfs4_svc: svc_tli_kcreate failed %d\n",
1050 kmem_free(addrmask
.buf
, addrmask
.maxlen
);
1054 mutex_exit(&ncg
->nfs4_cb_lock
);
1055 releasef(STRUCT_FGET(uap
, fd
));
1059 struct nfs4_callback_globals
*
1060 nfs4_get_callback_globals(void)
1062 return (zone_getspecific(nfs4_callback_zone_key
, nfs_zone()));
1066 nfs4_callback_init_zone(zoneid_t zoneid
)
1068 kstat_t
*nfs4_callback_kstat
;
1069 struct nfs4_callback_globals
*ncg
;
1071 ncg
= kmem_zalloc(sizeof (*ncg
), KM_SLEEP
);
1073 ncg
->nfs4prog2server
= kmem_zalloc(nfs4_num_prognums
*
1074 sizeof (struct nfs4_server
*), KM_SLEEP
);
1076 /* initialize the dlist */
1077 mutex_init(&ncg
->nfs4_dlist_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
1078 list_create(&ncg
->nfs4_dlist
, sizeof (struct nfs4_dnode
),
1079 offsetof(struct nfs4_dnode
, linkage
));
1081 /* initialize cb_port list */
1082 mutex_init(&ncg
->nfs4_cb_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
1083 list_create(&ncg
->nfs4_cb_ports
, sizeof (struct nfs4_cb_port
),
1084 offsetof(struct nfs4_cb_port
, linkage
));
1086 /* get our own copy of the kstats */
1087 bcopy(&nfs4_callback_stats_tmpl
, &ncg
->nfs4_callback_stats
,
1088 sizeof (nfs4_callback_stats_tmpl
));
1089 /* register "nfs:0:nfs4_callback_stats" for this zone */
1090 if ((nfs4_callback_kstat
=
1091 kstat_create_zone("nfs", 0, "nfs4_callback_stats", "misc",
1093 sizeof (ncg
->nfs4_callback_stats
) / sizeof (kstat_named_t
),
1094 KSTAT_FLAG_VIRTUAL
| KSTAT_FLAG_WRITABLE
,
1096 nfs4_callback_kstat
->ks_data
= &ncg
->nfs4_callback_stats
;
1097 kstat_install(nfs4_callback_kstat
);
1103 nfs4_discard_delegations(struct nfs4_callback_globals
*ncg
)
1109 * It's OK here to just run through the registered "programs", as
1110 * servers without programs won't have any delegations to handle.
1112 for (i
= 0; i
< nfs4_num_prognums
; i
++) {
1115 mutex_enter(&ncg
->nfs4_cb_lock
);
1116 sp
= ncg
->nfs4prog2server
[i
];
1117 mutex_exit(&ncg
->nfs4_cb_lock
);
1119 if (nfs4_server_vlock(sp
, 1) == FALSE
)
1122 while ((rp
= list_head(&sp
->s_deleg_list
)) != NULL
) {
1123 mutex_enter(&rp
->r_statev4_lock
);
1124 if (rp
->r_deleg_type
== OPEN_DELEGATE_NONE
) {
1126 * We need to take matters into our own hands,
1127 * as nfs4delegreturn_cleanup_impl() won't
1128 * remove this from the list.
1130 list_remove(&sp
->s_deleg_list
, rp
);
1131 mutex_exit(&rp
->r_statev4_lock
);
1132 nfs4_dec_state_ref_count_nolock(sp
,
1137 mutex_exit(&rp
->r_statev4_lock
);
1139 mutex_exit(&sp
->s_lock
);
1141 * The following will remove the node from the list.
1143 nfs4delegreturn_cleanup_impl(rp
, sp
, ncg
);
1145 mutex_enter(&sp
->s_lock
);
1147 mutex_exit(&sp
->s_lock
);
1148 /* each removed list node reles a reference */
1149 while (num_removed
-- > 0)
1150 nfs4_server_rele(sp
);
1151 /* remove our reference for nfs4_server_vlock */
1152 nfs4_server_rele(sp
);
1158 nfs4_callback_shutdown_zone(zoneid_t zoneid
, void *data
)
1160 struct nfs4_callback_globals
*ncg
= data
;
1163 * Clean pending delegation return list.
1165 nfs4_dlistclean_impl(ncg
, NFS4_DR_DISCARD
);
1168 * Discard all delegations.
1170 nfs4_discard_delegations(ncg
);
1174 nfs4_callback_fini_zone(zoneid_t zoneid
, void *data
)
1176 struct nfs4_callback_globals
*ncg
= data
;
1177 struct nfs4_cb_port
*p
;
1178 nfs4_server_t
*sp
, *next
;
1179 nfs4_server_t freelist
;
1182 kstat_delete_byname_zone("nfs", 0, "nfs4_callback_stats", zoneid
);
1185 * Discard all delegations that may have crept in since we did the
1188 nfs4_discard_delegations(ncg
);
1190 * We're completely done with this zone and all associated
1191 * nfs4_server_t's. Any remaining nfs4_server_ts should only have one
1192 * more reference outstanding -- the reference we didn't release in
1193 * nfs4_renew_lease_thread().
1195 * Here we need to run through the global nfs4_server_lst as we need to
1196 * deal with nfs4_server_ts without programs, as they also have threads
1197 * created for them, and so have outstanding references that we need to
1200 freelist
.forw
= &freelist
;
1201 freelist
.back
= &freelist
;
1202 mutex_enter(&nfs4_server_lst_lock
);
1203 sp
= nfs4_server_lst
.forw
;
1204 while (sp
!= &nfs4_server_lst
) {
1206 if (sp
->zoneid
== zoneid
) {
1208 insque(sp
, &freelist
);
1212 mutex_exit(&nfs4_server_lst_lock
);
1215 while (sp
!= &freelist
) {
1217 nfs4_server_rele(sp
); /* free the list's reference */
1222 for (i
= 0; i
< nfs4_num_prognums
; i
++) {
1223 ASSERT(ncg
->nfs4prog2server
[i
] == NULL
);
1226 kmem_free(ncg
->nfs4prog2server
, nfs4_num_prognums
*
1227 sizeof (struct nfs4_server
*));
1229 mutex_enter(&ncg
->nfs4_cb_lock
);
1230 while ((p
= list_head(&ncg
->nfs4_cb_ports
)) != NULL
) {
1231 list_remove(&ncg
->nfs4_cb_ports
, p
);
1232 kmem_free(p
, sizeof (*p
));
1234 list_destroy(&ncg
->nfs4_cb_ports
);
1235 mutex_destroy(&ncg
->nfs4_cb_lock
);
1236 list_destroy(&ncg
->nfs4_dlist
);
1237 mutex_destroy(&ncg
->nfs4_dlist_lock
);
1238 kmem_free(ncg
, sizeof (*ncg
));
1242 nfs4_callback_init(void)
1245 SVC_CALLOUT
*nfs4_cb_sc
;
1247 /* initialize the callback table */
1248 nfs4_cb_sc
= kmem_alloc(nfs4_num_prognums
*
1249 sizeof (SVC_CALLOUT
), KM_SLEEP
);
1251 for (i
= 0; i
< nfs4_num_prognums
; i
++) {
1252 nfs4_cb_sc
[i
].sc_prog
= NFS4_CALLBACK
+i
;
1253 nfs4_cb_sc
[i
].sc_versmin
= NFS_CB
;
1254 nfs4_cb_sc
[i
].sc_versmax
= NFS_CB
;
1255 nfs4_cb_sc
[i
].sc_dispatch
= cb_dispatch
;
1258 nfs4_cb_sct
.sct_size
= nfs4_num_prognums
;
1259 nfs4_cb_sct
.sct_free
= FALSE
;
1260 nfs4_cb_sct
.sct_sc
= nfs4_cb_sc
;
1263 * Compute max bytes required for dyamically allocated parts
1264 * of cb_getattr reply. Only size and change are supported now.
1265 * If CB_GETATTR is changed to reply with additional attrs,
1266 * additional sizes must be added below.
1268 * fattr4_change + fattr4_size == uint64_t + uint64_t
1270 cb_getattr_bytes
= 2 * BYTES_PER_XDR_UNIT
+ 2 * BYTES_PER_XDR_UNIT
;
1272 zone_key_create(&nfs4_callback_zone_key
, nfs4_callback_init_zone
,
1273 nfs4_callback_shutdown_zone
, nfs4_callback_fini_zone
);
1277 nfs4_callback_fini(void)
1282 * NB: This function can be called from the *wrong* zone (ie, the zone that
1283 * 'rp' belongs to and the caller's zone may not be the same). This can happen
1284 * if the zone is going away and we get called from nfs4_async_inactive(). In
1285 * this case the globals will be NULL and we won't update the counters, which
1286 * doesn't matter as the zone is going away anyhow.
1289 nfs4delegreturn_cleanup_impl(rnode4_t
*rp
, nfs4_server_t
*np
,
1290 struct nfs4_callback_globals
*ncg
)
1292 mntinfo4_t
*mi
= VTOMI4(RTOV4(rp
));
1293 boolean_t need_rele
= B_FALSE
;
1296 * Caller must be holding mi_recovlock in read mode
1297 * to call here. This is provided by start_op.
1298 * Delegation management requires to grab s_lock
1299 * first and then r_statev4_lock.
1303 np
= find_nfs4_server_all(mi
, 1);
1308 mutex_enter(&np
->s_lock
);
1311 mutex_enter(&rp
->r_statev4_lock
);
1313 if (rp
->r_deleg_type
== OPEN_DELEGATE_NONE
) {
1314 mutex_exit(&rp
->r_statev4_lock
);
1315 mutex_exit(&np
->s_lock
);
1317 nfs4_server_rele(np
);
1322 * Free the cred originally held when
1323 * the delegation was granted. Caller must
1324 * hold this cred if it wants to use it after
1327 crfree(rp
->r_deleg_cred
);
1328 rp
->r_deleg_cred
= NULL
;
1329 rp
->r_deleg_type
= OPEN_DELEGATE_NONE
;
1330 rp
->r_deleg_needs_recovery
= OPEN_DELEGATE_NONE
;
1331 rp
->r_deleg_needs_recall
= FALSE
;
1332 rp
->r_deleg_return_pending
= FALSE
;
1335 * Remove the rnode from the server's list and
1336 * update the ref counts.
1338 list_remove(&np
->s_deleg_list
, rp
);
1339 mutex_exit(&rp
->r_statev4_lock
);
1340 nfs4_dec_state_ref_count_nolock(np
, mi
);
1341 mutex_exit(&np
->s_lock
);
1342 /* removed list node removes a reference */
1343 nfs4_server_rele(np
);
1345 nfs4_server_rele(np
);
1347 ncg
->nfs4_callback_stats
.delegations
.value
.ui64
--;
1351 nfs4delegreturn_cleanup(rnode4_t
*rp
, nfs4_server_t
*np
)
1353 struct nfs4_callback_globals
*ncg
;
1356 ncg
= np
->zone_globals
;
1357 } else if (nfs_zone() == VTOMI4(RTOV4(rp
))->mi_zone
) {
1358 ncg
= zone_getspecific(nfs4_callback_zone_key
, nfs_zone());
1359 ASSERT(ncg
!= NULL
);
1362 * Request coming from the wrong zone.
1364 ASSERT(getzoneid() == GLOBAL_ZONEID
);
1368 nfs4delegreturn_cleanup_impl(rp
, np
, ncg
);
1372 nfs4delegreturn_save_lost_rqst(int error
, nfs4_lost_rqst_t
*lost_rqstp
,
1373 cred_t
*cr
, vnode_t
*vp
)
1375 if (error
!= ETIMEDOUT
&& error
!= EINTR
&&
1376 !NFS4_FRC_UNMT_ERR(error
, vp
->v_vfsp
)) {
1377 lost_rqstp
->lr_op
= 0;
1381 NFS4_DEBUG(nfs4_lost_rqst_debug
, (CE_NOTE
,
1382 "nfs4close_save_lost_rqst: error %d", error
));
1384 lost_rqstp
->lr_op
= OP_DELEGRETURN
;
1386 * The vp is held and rele'd via the recovery code.
1387 * See nfs4_save_lost_rqst.
1389 lost_rqstp
->lr_vp
= vp
;
1390 lost_rqstp
->lr_dvp
= NULL
;
1391 lost_rqstp
->lr_oop
= NULL
;
1392 lost_rqstp
->lr_osp
= NULL
;
1393 lost_rqstp
->lr_lop
= NULL
;
1394 lost_rqstp
->lr_cr
= cr
;
1395 lost_rqstp
->lr_flk
= NULL
;
1396 lost_rqstp
->lr_putfirst
= FALSE
;
1400 nfs4delegreturn_otw(rnode4_t
*rp
, cred_t
*cr
, nfs4_error_t
*ep
)
1402 COMPOUND4args_clnt args
;
1403 COMPOUND4res_clnt res
;
1404 nfs_argop4 argops
[3];
1405 nfs4_ga_res_t
*garp
= NULL
;
1410 args
.ctag
= TAG_DELEGRETURN
;
1412 numops
= 3; /* PUTFH, GETATTR, DELEGRETURN */
1414 args
.array
= argops
;
1415 args
.array_len
= numops
;
1417 argops
[0].argop
= OP_CPUTFH
;
1418 argops
[0].nfs_argop4_u
.opcputfh
.sfh
= rp
->r_fh
;
1420 argops
[1].argop
= OP_GETATTR
;
1421 argops
[1].nfs_argop4_u
.opgetattr
.attr_request
= NFS4_VATTR_MASK
;
1422 argops
[1].nfs_argop4_u
.opgetattr
.mi
= VTOMI4(RTOV4(rp
));
1424 argops
[2].argop
= OP_DELEGRETURN
;
1425 argops
[2].nfs_argop4_u
.opdelegreturn
.deleg_stateid
=
1426 rp
->r_deleg_stateid
;
1429 rfs4call(VTOMI4(RTOV4(rp
)), &args
, &res
, cr
, &doqueue
, 0, ep
);
1434 if (res
.status
== NFS4_OK
) {
1435 garp
= &res
.array
[1].nfs_resop4_u
.opgetattr
.ga_res
;
1436 nfs4_attr_cache(RTOV4(rp
), garp
, t
, cr
, TRUE
, NULL
);
1439 xdr_free(xdr_COMPOUND4res_clnt
, (caddr_t
)&res
);
1443 nfs4_do_delegreturn(rnode4_t
*rp
, int flags
, cred_t
*cr
,
1444 struct nfs4_callback_globals
*ncg
)
1446 vnode_t
*vp
= RTOV4(rp
);
1447 mntinfo4_t
*mi
= VTOMI4(vp
);
1448 nfs4_lost_rqst_t lost_rqst
;
1449 nfs4_recov_state_t recov_state
;
1450 bool_t needrecov
= FALSE
, recovonly
, done
= FALSE
;
1451 nfs4_error_t e
= { 0, NFS4_OK
, RPC_SUCCESS
};
1453 ncg
->nfs4_callback_stats
.delegreturn
.value
.ui64
++;
1456 e
.error
= nfs4_start_fop(mi
, vp
, NULL
, OH_DELEGRETURN
,
1457 &recov_state
, &recovonly
);
1460 if (flags
& NFS4_DR_FORCE
) {
1461 (void) nfs_rw_enter_sig(&mi
->mi_recovlock
,
1463 nfs4delegreturn_cleanup_impl(rp
, NULL
, ncg
);
1464 nfs_rw_exit(&mi
->mi_recovlock
);
1470 * Check to see if the delegation has already been
1471 * returned by the recovery thread. The state of
1472 * the delegation cannot change at this point due
1473 * to start_fop and the r_deleg_recall_lock.
1475 if (rp
->r_deleg_type
== OPEN_DELEGATE_NONE
) {
1477 nfs4_end_op(mi
, vp
, NULL
, &recov_state
, needrecov
);
1483 * Delegation will be returned via the
1484 * recovery framework. Build a lost request
1485 * structure, start recovery and get out.
1487 nfs4_error_init(&e
, EINTR
);
1488 nfs4delegreturn_save_lost_rqst(e
.error
, &lost_rqst
,
1490 (void) nfs4_start_recovery(&e
, mi
, vp
,
1491 NULL
, &rp
->r_deleg_stateid
,
1492 lost_rqst
.lr_op
== OP_DELEGRETURN
?
1493 &lost_rqst
: NULL
, OP_DELEGRETURN
, NULL
,
1495 nfs4_end_op(mi
, vp
, NULL
, &recov_state
, needrecov
);
1499 nfs4delegreturn_otw(rp
, cr
, &e
);
1502 * Ignore some errors on delegreturn; no point in marking
1503 * the file dead on a state destroying operation.
1505 if (e
.error
== 0 && (nfs4_recov_marks_dead(e
.stat
) ||
1506 e
.stat
== NFS4ERR_BADHANDLE
||
1507 e
.stat
== NFS4ERR_STALE
))
1510 needrecov
= nfs4_needs_recovery(&e
, TRUE
, vp
->v_vfsp
);
1513 nfs4delegreturn_save_lost_rqst(e
.error
, &lost_rqst
,
1515 (void) nfs4_start_recovery(&e
, mi
, vp
,
1516 NULL
, &rp
->r_deleg_stateid
,
1517 lost_rqst
.lr_op
== OP_DELEGRETURN
?
1518 &lost_rqst
: NULL
, OP_DELEGRETURN
, NULL
,
1521 nfs4delegreturn_cleanup_impl(rp
, NULL
, ncg
);
1525 nfs4_end_op(mi
, vp
, NULL
, &recov_state
, needrecov
);
1531 * nfs4_resend_delegreturn - used to drive the delegreturn
1532 * operation via the recovery thread.
1535 nfs4_resend_delegreturn(nfs4_lost_rqst_t
*lorp
, nfs4_error_t
*ep
,
1538 rnode4_t
*rp
= VTOR4(lorp
->lr_vp
);
1540 /* If the file failed recovery, just quit. */
1541 mutex_enter(&rp
->r_statelock
);
1542 if (rp
->r_flags
& R4RECOVERR
) {
1545 mutex_exit(&rp
->r_statelock
);
1548 nfs4delegreturn_otw(rp
, lorp
->lr_cr
, ep
);
1551 * If recovery is now needed, then return the error
1552 * and status and let the recovery thread handle it,
1553 * including re-driving another delegreturn. Otherwise,
1554 * just give up and clean up the delegation.
1556 if (nfs4_needs_recovery(ep
, TRUE
, lorp
->lr_vp
->v_vfsp
))
1559 if (rp
->r_deleg_type
!= OPEN_DELEGATE_NONE
)
1560 nfs4delegreturn_cleanup(rp
, np
);
1562 nfs4_error_zinit(ep
);
1566 * nfs4delegreturn - general function to return a delegation.
1568 * NFS4_DR_FORCE - return the delegation even if start_op fails
1569 * NFS4_DR_PUSH - push modified data back to the server via fop_putpage
1570 * NFS4_DR_DISCARD - discard the delegation w/o delegreturn
1571 * NFS4_DR_DID_OP - calling function already did nfs4_start_op
1572 * NFS4_DR_RECALL - delegreturned initiated via CB_RECALL
1573 * NFS4_DR_REOPEN - do file reopens, if applicable
1576 nfs4delegreturn_impl(rnode4_t
*rp
, int flags
, struct nfs4_callback_globals
*ncg
)
1581 bool_t needrecov
= FALSE
;
1582 bool_t rw_entered
= FALSE
;
1588 * If NFS4_DR_DISCARD is set by itself, take a short-cut and
1589 * discard without doing an otw DELEGRETURN. This may only be used
1590 * by the recovery thread because it bypasses the synchronization
1591 * with r_deleg_recall_lock and mi->mi_recovlock.
1593 if (flags
== NFS4_DR_DISCARD
) {
1594 nfs4delegreturn_cleanup_impl(rp
, NULL
, ncg
);
1598 if (flags
& NFS4_DR_DID_OP
) {
1600 * Caller had already done start_op, which means the
1601 * r_deleg_recall_lock is already held in READ mode
1602 * so we cannot take it in write mode. Return the
1603 * delegation asynchronously.
1605 * Remove the NFS4_DR_DID_OP flag so we don't
1606 * get stuck looping through here.
1609 nfs4delegreturn_async(rp
, (flags
& ~NFS4_DR_DID_OP
), FALSE
);
1614 * Verify we still have a delegation and crhold the credential.
1616 mutex_enter(&rp
->r_statev4_lock
);
1617 if (rp
->r_deleg_type
== OPEN_DELEGATE_NONE
) {
1618 mutex_exit(&rp
->r_statev4_lock
);
1621 cr
= rp
->r_deleg_cred
;
1624 mutex_exit(&rp
->r_statev4_lock
);
1627 * Push the modified data back to the server synchronously
1628 * before doing DELEGRETURN.
1630 if (flags
& NFS4_DR_PUSH
)
1631 (void) fop_putpage(vp
, 0, 0, 0, cr
, NULL
);
1634 * Take r_deleg_recall_lock in WRITE mode, this will prevent
1635 * nfs4_is_otw_open_necessary from trying to use the delegation
1636 * while the DELEGRETURN is in progress.
1638 (void) nfs_rw_enter_sig(&rp
->r_deleg_recall_lock
, RW_WRITER
, FALSE
);
1642 if (rp
->r_deleg_type
== OPEN_DELEGATE_NONE
)
1645 if (flags
& NFS4_DR_REOPEN
) {
1647 * If R4RECOVERRP is already set, then skip re-opening
1648 * the delegation open streams and go straight to doing
1649 * delegreturn. (XXX if the file has failed recovery, then the
1650 * delegreturn attempt is likely to be futile.)
1652 mutex_enter(&rp
->r_statelock
);
1653 do_reopen
= !(rp
->r_flags
& R4RECOVERRP
);
1654 mutex_exit(&rp
->r_statelock
);
1657 error
= deleg_reopen(vp
, &needrecov
, ncg
, flags
);
1659 if ((flags
& (NFS4_DR_FORCE
| NFS4_DR_RECALL
))
1662 } else if (needrecov
) {
1663 if ((flags
& NFS4_DR_FORCE
) == 0)
1669 if (flags
& NFS4_DR_DISCARD
) {
1670 mntinfo4_t
*mi
= VTOMI4(RTOV4(rp
));
1672 mutex_enter(&rp
->r_statelock
);
1674 * deleg_return_pending is cleared inside of delegation_accept
1675 * when a delegation is accepted. if this flag has been
1676 * cleared, then a new delegation has overwritten the one we
1677 * were about to throw away.
1679 if (!rp
->r_deleg_return_pending
) {
1680 mutex_exit(&rp
->r_statelock
);
1683 mutex_exit(&rp
->r_statelock
);
1684 (void) nfs_rw_enter_sig(&mi
->mi_recovlock
, RW_READER
, FALSE
);
1685 nfs4delegreturn_cleanup_impl(rp
, NULL
, ncg
);
1686 nfs_rw_exit(&mi
->mi_recovlock
);
1688 error
= nfs4_do_delegreturn(rp
, flags
, cr
, ncg
);
1695 nfs_rw_exit(&rp
->r_deleg_recall_lock
);
1700 nfs4delegreturn(rnode4_t
*rp
, int flags
)
1702 struct nfs4_callback_globals
*ncg
;
1704 ncg
= zone_getspecific(nfs4_callback_zone_key
, nfs_zone());
1705 ASSERT(ncg
!= NULL
);
1707 return (nfs4delegreturn_impl(rp
, flags
, ncg
));
1711 nfs4delegreturn_async(rnode4_t
*rp
, int flags
, bool_t trunc
)
1713 struct cb_recall_pass
*pp
;
1715 pp
= kmem_alloc(sizeof (struct cb_recall_pass
), KM_SLEEP
);
1718 pp
->truncate
= trunc
;
1721 * Fire up a thread to do the actual delegreturn
1722 * Caller must guarantee that the rnode doesn't
1723 * vanish (by calling VN_HOLD).
1726 (void) zthread_create(NULL
, 0, nfs4delegreturn_thread
, pp
, 0,
1731 delegreturn_all_thread(rpcprog_t
*pp
)
1734 bool_t found
= FALSE
;
1738 zoneid_t zoneid
= getzoneid();
1739 struct nfs4_callback_globals
*ncg
;
1741 NFS4_DEBUG(nfs4_drat_debug
,
1742 (CE_NOTE
, "delereturn_all_thread: prog %d\n", *pp
));
1745 kmem_free(pp
, sizeof (*pp
));
1748 mutex_enter(&nfs4_server_lst_lock
);
1749 for (np
= nfs4_server_lst
.forw
; np
!= &nfs4_server_lst
; np
= np
->forw
) {
1750 if (np
->zoneid
== zoneid
&& np
->s_program
== prog
) {
1751 mutex_enter(&np
->s_lock
);
1756 mutex_exit(&nfs4_server_lst_lock
);
1759 * It's possible that the nfs4_server which was using this
1760 * program number has vanished since this thread is async.
1761 * If so, just return. Your work here is finished, my friend.
1766 ncg
= np
->zone_globals
;
1767 while ((rp
= list_head(&np
->s_deleg_list
)) != NULL
) {
1770 mutex_exit(&np
->s_lock
);
1771 (void) nfs4delegreturn_impl(rp
, NFS4_DR_PUSH
|NFS4_DR_REOPEN
,
1775 /* retake the s_lock for next trip through the loop */
1776 mutex_enter(&np
->s_lock
);
1778 mutex_exit(&np
->s_lock
);
1780 NFS4_DEBUG(nfs4_drat_debug
,
1781 (CE_NOTE
, "delereturn_all_thread: complete\n"));
1786 nfs4_delegreturn_all(nfs4_server_t
*sp
)
1790 mutex_enter(&sp
->s_lock
);
1792 /* Check to see if the delegation list is empty */
1794 if (list_head(&sp
->s_deleg_list
) == NULL
) {
1795 mutex_exit(&sp
->s_lock
);
1799 * Grab the program number; the async thread will use this
1800 * to find the nfs4_server.
1802 pro
= sp
->s_program
;
1803 mutex_exit(&sp
->s_lock
);
1804 pp
= kmem_alloc(sizeof (rpcprog_t
), KM_SLEEP
);
1806 (void) zthread_create(NULL
, 0, delegreturn_all_thread
, pp
, 0,
1812 * Discard any delegations
1814 * Iterate over the servers s_deleg_list and
1815 * for matching mount-point rnodes discard
1819 nfs4_deleg_discard(mntinfo4_t
*mi
, nfs4_server_t
*sp
)
1821 rnode4_t
*rp
, *next
;
1823 struct nfs4_callback_globals
*ncg
;
1825 ASSERT(mutex_owned(&sp
->s_lock
));
1826 ncg
= sp
->zone_globals
;
1828 for (rp
= list_head(&sp
->s_deleg_list
); rp
!= NULL
; rp
= next
) {
1829 r_mi
= VTOMI4(RTOV4(rp
));
1830 next
= list_next(&sp
->s_deleg_list
, rp
);
1834 * Skip if this rnode is in not on the
1840 ASSERT(rp
->r_deleg_type
== OPEN_DELEGATE_READ
);
1843 if (nfs4_client_recov_debug
) {
1844 zprintf(getzoneid(),
1845 "nfs4_deleg_discard: matched rnode %p "
1846 "-- discarding delegation\n", (void *)rp
);
1849 mutex_enter(&rp
->r_statev4_lock
);
1851 * Free the cred originally held when the delegation
1852 * was granted. Also need to decrement the refcnt
1853 * on this server for each delegation we discard
1855 if (rp
->r_deleg_cred
)
1856 crfree(rp
->r_deleg_cred
);
1857 rp
->r_deleg_cred
= NULL
;
1858 rp
->r_deleg_type
= OPEN_DELEGATE_NONE
;
1859 rp
->r_deleg_needs_recovery
= OPEN_DELEGATE_NONE
;
1860 rp
->r_deleg_needs_recall
= FALSE
;
1861 ASSERT(sp
->s_refcnt
> 1);
1863 list_remove(&sp
->s_deleg_list
, rp
);
1864 mutex_exit(&rp
->r_statev4_lock
);
1865 nfs4_dec_state_ref_count_nolock(sp
, mi
);
1866 ncg
->nfs4_callback_stats
.delegations
.value
.ui64
--;
1871 * Reopen any open streams that were covered by the given file's
1873 * Returns zero or an errno value. If there was no error, *recovp
1874 * indicates whether recovery was initiated.
1878 deleg_reopen(vnode_t
*vp
, bool_t
*recovp
, struct nfs4_callback_globals
*ncg
,
1881 nfs4_open_stream_t
*osp
;
1882 nfs4_recov_state_t recov_state
;
1883 bool_t needrecov
= FALSE
;
1886 nfs4_error_t e
= { 0, NFS4_OK
, RPC_SUCCESS
};
1892 recov_state
.rs_flags
= 0;
1893 recov_state
.rs_num_retry_despite_err
= 0;
1896 if ((e
.error
= nfs4_start_op(mi
, vp
, NULL
, &recov_state
)) != 0) {
1901 * if we mean to discard the delegation, it must be BAD, so don't
1902 * use it when doing the reopen or it will fail too.
1904 claimnull
= (flags
& NFS4_DR_DISCARD
);
1906 * Loop through the open streams for this rnode to find
1907 * all of the ones created using the delegation state ID.
1908 * Each of these needs to be re-opened.
1911 while ((osp
= get_next_deleg_stream(rp
, claimnull
)) != NULL
) {
1914 nfs4_reopen(vp
, osp
, &e
, CLAIM_NULL
, FALSE
, FALSE
);
1916 ncg
->nfs4_callback_stats
.claim_cur
.value
.ui64
++;
1918 nfs4_reopen(vp
, osp
, &e
, CLAIM_DELEGATE_CUR
, FALSE
,
1920 if (e
.error
== 0 && e
.stat
== NFS4_OK
)
1921 ncg
->nfs4_callback_stats
.
1922 claim_cur_ok
.value
.ui64
++;
1925 if (e
.error
== EAGAIN
) {
1926 open_stream_rele(osp
, rp
);
1927 nfs4_end_op(mi
, vp
, NULL
, &recov_state
, TRUE
);
1932 * if error is EINTR, ETIMEDOUT, or NFS4_FRC_UNMT_ERR, then
1933 * recovery has already been started inside of nfs4_reopen.
1935 if (e
.error
== EINTR
|| e
.error
== ETIMEDOUT
||
1936 NFS4_FRC_UNMT_ERR(e
.error
, vp
->v_vfsp
)) {
1937 open_stream_rele(osp
, rp
);
1941 needrecov
= nfs4_needs_recovery(&e
, TRUE
, vp
->v_vfsp
);
1943 if (e
.error
!= 0 && !needrecov
) {
1945 * Recovery is not possible, but don't give up yet;
1946 * we'd still like to do delegreturn after
1947 * reopening as many streams as possible.
1948 * Continue processing the open streams.
1951 ncg
->nfs4_callback_stats
.recall_failed
.value
.ui64
++;
1953 } else if (needrecov
) {
1955 * Start recovery and bail out. The recovery
1956 * thread will take it from here.
1958 (void) nfs4_start_recovery(&e
, mi
, vp
, NULL
, NULL
,
1959 NULL
, OP_OPEN
, NULL
, NULL
, NULL
);
1960 open_stream_rele(osp
, rp
);
1965 open_stream_rele(osp
, rp
);
1968 nfs4_end_op(mi
, vp
, NULL
, &recov_state
, needrecov
);
1974 * get_next_deleg_stream - returns the next open stream which
1975 * represents a delegation for this rnode. In order to assure
1976 * forward progress, the caller must guarantee that each open
1977 * stream returned is changed so that a future call won't return
1980 * There are several ways for the open stream to change. If the open
1981 * stream is !os_delegation, then we aren't interested in it. Also, if
1982 * either os_failed_reopen or !os_valid, then don't return the osp.
1984 * If claimnull is false (doing reopen CLAIM_DELEGATE_CUR) then return
1985 * the osp if it is an os_delegation open stream. Also, if the rnode still
1986 * has r_deleg_return_pending, then return the os_delegation osp. Lastly,
1987 * if the rnode's r_deleg_stateid is different from the osp's open_stateid,
1988 * then return the osp.
1990 * We have already taken the 'r_deleg_recall_lock' as WRITER, which
1991 * prevents new OPENs from going OTW (as start_fop takes this
1992 * lock in READ mode); thus, no new open streams can be created
1993 * (which inherently means no new delegation open streams are
1997 static nfs4_open_stream_t
*
1998 get_next_deleg_stream(rnode4_t
*rp
, int claimnull
)
2000 nfs4_open_stream_t
*osp
;
2002 ASSERT(nfs_rw_lock_held(&rp
->r_deleg_recall_lock
, RW_WRITER
));
2005 * Search through the list of open streams looking for
2006 * one that was created while holding the delegation.
2008 mutex_enter(&rp
->r_os_lock
);
2009 for (osp
= list_head(&rp
->r_open_streams
); osp
!= NULL
;
2010 osp
= list_next(&rp
->r_open_streams
, osp
)) {
2011 mutex_enter(&osp
->os_sync_lock
);
2012 if (!osp
->os_delegation
|| osp
->os_failed_reopen
||
2014 mutex_exit(&osp
->os_sync_lock
);
2017 if (!claimnull
|| rp
->r_deleg_return_pending
||
2018 !stateid4_cmp(&osp
->open_stateid
, &rp
->r_deleg_stateid
)) {
2019 osp
->os_ref_count
++;
2020 mutex_exit(&osp
->os_sync_lock
);
2021 mutex_exit(&rp
->r_os_lock
);
2024 mutex_exit(&osp
->os_sync_lock
);
2026 mutex_exit(&rp
->r_os_lock
);
2032 nfs4delegreturn_thread(struct cb_recall_pass
*args
)
2037 int dtype
, error
, flags
;
2040 callb_cpr_t cpr_info
;
2041 struct nfs4_callback_globals
*ncg
;
2043 ncg
= zone_getspecific(nfs4_callback_zone_key
, nfs_zone());
2044 ASSERT(ncg
!= NULL
);
2046 mutex_init(&cpr_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
2048 CALLB_CPR_INIT(&cpr_info
, &cpr_lock
, callb_generic_cpr
,
2054 mutex_enter(&rp
->r_statev4_lock
);
2055 if (rp
->r_deleg_type
== OPEN_DELEGATE_NONE
) {
2056 mutex_exit(&rp
->r_statev4_lock
);
2059 mutex_exit(&rp
->r_statev4_lock
);
2062 * Take the read-write lock in read mode to prevent other
2063 * threads from modifying the data during the recall. This
2064 * doesn't affect mmappers.
2066 (void) nfs_rw_enter_sig(&rp
->r_rwlock
, RW_READER
, FALSE
);
2068 /* Proceed with delegreturn */
2070 mutex_enter(&rp
->r_statev4_lock
);
2071 if (rp
->r_deleg_type
== OPEN_DELEGATE_NONE
) {
2072 mutex_exit(&rp
->r_statev4_lock
);
2073 nfs_rw_exit(&rp
->r_rwlock
);
2076 dtype
= rp
->r_deleg_type
;
2077 cr
= rp
->r_deleg_cred
;
2080 mutex_exit(&rp
->r_statev4_lock
);
2082 flags
= args
->flags
;
2085 * If the file is being truncated at the server, then throw
2086 * away all of the pages, it doesn't matter what flavor of
2087 * delegation we have.
2090 if (args
->truncate
) {
2091 ncg
->nfs4_callback_stats
.recall_trunc
.value
.ui64
++;
2092 nfs4_invalidate_pages(vp
, 0, cr
);
2093 } else if (dtype
== OPEN_DELEGATE_WRITE
) {
2095 mutex_enter(&rp
->r_statelock
);
2096 rdirty
= rp
->r_flags
& R4DIRTY
;
2097 mutex_exit(&rp
->r_statelock
);
2100 error
= fop_putpage(vp
, 0, 0, 0, cr
, NULL
);
2103 CB_WARN1("nfs4delegreturn_thread:"
2104 " fop_putpage: %d\n", error
);
2106 /* turn off NFS4_DR_PUSH because we just did that above. */
2107 flags
&= ~NFS4_DR_PUSH
;
2110 mutex_enter(&rp
->r_statelock
);
2111 rip
= rp
->r_flags
& R4RECOVERRP
;
2112 mutex_exit(&rp
->r_statelock
);
2114 /* If a failed recovery is indicated, discard the pages */
2118 error
= fop_putpage(vp
, 0, 0, B_INVAL
, cr
, NULL
);
2121 CB_WARN1("nfs4delegreturn_thread: fop_putpage: %d\n",
2126 * Pass the flags to nfs4delegreturn_impl, but be sure not to pass
2127 * NFS4_DR_DID_OP, which just calls nfs4delegreturn_async again.
2129 flags
&= ~NFS4_DR_DID_OP
;
2131 (void) nfs4delegreturn_impl(rp
, flags
, ncg
);
2133 nfs_rw_exit(&rp
->r_rwlock
);
2136 kmem_free(args
, sizeof (struct cb_recall_pass
));
2138 mutex_enter(&cpr_lock
);
2139 CALLB_CPR_EXIT(&cpr_info
);
2140 mutex_destroy(&cpr_lock
);
2145 * This function has one assumption that the caller of this function is
2146 * either doing recovery (therefore cannot call nfs4_start_op) or has
2147 * already called nfs4_start_op().
2150 nfs4_delegation_accept(rnode4_t
*rp
, open_claim_type4 claim
, OPEN4res
*res
,
2151 nfs4_ga_res_t
*garp
, cred_t
*cr
)
2153 open_read_delegation4
*orp
;
2154 open_write_delegation4
*owp
;
2156 bool_t already
= FALSE
;
2157 bool_t recall
= FALSE
;
2158 bool_t valid_garp
= TRUE
;
2159 bool_t delegation_granted
= FALSE
;
2160 bool_t dr_needed
= FALSE
;
2166 struct nfs4_callback_globals
*ncg
;
2167 open_delegation_type4 odt
;
2169 ncg
= zone_getspecific(nfs4_callback_zone_key
, nfs_zone());
2170 ASSERT(ncg
!= NULL
);
2172 mi
= VTOMI4(RTOV4(rp
));
2175 * Accept a delegation granted to the client via an OPEN.
2176 * Set the delegation fields in the rnode and insert the
2177 * rnode onto the list anchored in the nfs4_server_t. The
2178 * proper locking order requires the nfs4_server_t first,
2179 * even though it may not be needed in all cases.
2181 * NB: find_nfs4_server returns with s_lock held.
2184 if ((np
= find_nfs4_server(mi
)) == NULL
)
2187 /* grab the statelock too, for examining r_mapcnt */
2188 mutex_enter(&rp
->r_statelock
);
2189 mutex_enter(&rp
->r_statev4_lock
);
2191 if (rp
->r_deleg_type
== OPEN_DELEGATE_READ
||
2192 rp
->r_deleg_type
== OPEN_DELEGATE_WRITE
)
2195 odt
= res
->delegation
.delegation_type
;
2197 if (odt
== OPEN_DELEGATE_READ
) {
2199 rp
->r_deleg_type
= res
->delegation
.delegation_type
;
2200 orp
= &res
->delegation
.open_delegation4_u
.read
;
2201 rp
->r_deleg_stateid
= orp
->stateid
;
2202 rp
->r_deleg_perms
= orp
->permissions
;
2203 if (claim
== CLAIM_PREVIOUS
)
2204 if ((recall
= orp
->recall
) != 0)
2207 delegation_granted
= TRUE
;
2209 ncg
->nfs4_callback_stats
.delegations
.value
.ui64
++;
2210 ncg
->nfs4_callback_stats
.delegaccept_r
.value
.ui64
++;
2212 } else if (odt
== OPEN_DELEGATE_WRITE
) {
2214 rp
->r_deleg_type
= res
->delegation
.delegation_type
;
2215 owp
= &res
->delegation
.open_delegation4_u
.write
;
2216 rp
->r_deleg_stateid
= owp
->stateid
;
2217 rp
->r_deleg_perms
= owp
->permissions
;
2218 rp
->r_deleg_limit
= owp
->space_limit
;
2219 if (claim
== CLAIM_PREVIOUS
)
2220 if ((recall
= owp
->recall
) != 0)
2223 delegation_granted
= TRUE
;
2225 if (garp
== NULL
|| !garp
->n4g_change_valid
) {
2227 rp
->r_deleg_change
= 0;
2228 rp
->r_deleg_change_grant
= 0;
2230 rp
->r_deleg_change
= garp
->n4g_change
;
2231 rp
->r_deleg_change_grant
= garp
->n4g_change
;
2233 mapcnt
= rp
->r_mapcnt
;
2234 rflag
= rp
->r_flags
;
2237 * Update the delegation change attribute if
2238 * there are mappers for the file is dirty. This
2239 * might be the case during recovery after server
2242 if (mapcnt
> 0 || rflag
& R4DIRTY
)
2243 rp
->r_deleg_change
++;
2245 NFS4_DEBUG(nfs4_callback_debug
, (CE_NOTE
,
2246 "nfs4_delegation_accept: r_deleg_change: 0x%x\n",
2247 (int)(rp
->r_deleg_change
>> 32)));
2248 NFS4_DEBUG(nfs4_callback_debug
, (CE_NOTE
,
2249 "nfs4_delegation_accept: r_delg_change_grant: 0x%x\n",
2250 (int)(rp
->r_deleg_change_grant
>> 32)));
2253 ncg
->nfs4_callback_stats
.delegations
.value
.ui64
++;
2254 ncg
->nfs4_callback_stats
.delegaccept_rw
.value
.ui64
++;
2255 } else if (already
) {
2257 * No delegation granted. If the rnode currently has
2258 * has one, then consider it tainted and return it.
2263 if (delegation_granted
) {
2264 /* Add the rnode to the list. */
2267 rp
->r_deleg_cred
= cr
;
2269 ASSERT(mutex_owned(&np
->s_lock
));
2270 list_insert_head(&np
->s_deleg_list
, rp
);
2271 /* added list node gets a reference */
2273 nfs4_inc_state_ref_count_nolock(np
, mi
);
2275 rp
->r_deleg_needs_recovery
= OPEN_DELEGATE_NONE
;
2279 * We've now safely accepted the delegation, if any. Drop the
2280 * locks and figure out what post-processing is needed. We'd
2281 * like to retain r_statev4_lock, but nfs4_server_rele takes
2282 * s_lock which would be a lock ordering violation.
2284 mutex_exit(&rp
->r_statev4_lock
);
2285 mutex_exit(&rp
->r_statelock
);
2286 mutex_exit(&np
->s_lock
);
2287 nfs4_server_rele(np
);
2290 * Check to see if we are in recovery. Remember that
2291 * this function is protected by start_op, so a recovery
2292 * cannot begin until we are out of here.
2294 mutex_enter(&mi
->mi_lock
);
2295 recov
= mi
->mi_recovflags
& MI4_RECOV_ACTIV
;
2296 mutex_exit(&mi
->mi_lock
);
2298 mutex_enter(&rp
->r_statev4_lock
);
2300 if (nfs4_delegreturn_policy
== IMMEDIATE
|| !valid_garp
)
2303 if (dr_needed
&& rp
->r_deleg_return_pending
== FALSE
) {
2306 * We cannot call delegreturn from inside
2307 * of recovery or fop_putpage will hang
2308 * due to nfs4_start_fop call in
2309 * nfs4write. Use dlistadd to add the
2310 * rnode to the list of rnodes needing
2311 * cleaning. We do not need to do reopen
2312 * here because recov_openfiles will do it.
2313 * In the non-recall case, just discard the
2314 * delegation as it is no longer valid.
2317 dr_flags
= NFS4_DR_PUSH
;
2319 dr_flags
= NFS4_DR_PUSH
|NFS4_DR_DISCARD
;
2321 nfs4_dlistadd(rp
, ncg
, dr_flags
);
2325 * Push the modified data back to the server,
2326 * reopen any delegation open streams, and return
2327 * the delegation. Drop the statev4_lock first!
2329 dr_flags
= NFS4_DR_PUSH
|NFS4_DR_DID_OP
|NFS4_DR_REOPEN
;
2332 mutex_exit(&rp
->r_statev4_lock
);
2334 (void) nfs4delegreturn_impl(rp
, dr_flags
, ncg
);
2338 * nfs4delegabandon - Abandon the delegation on an rnode4. This code
2339 * is called when the client receives EXPIRED, BAD_STATEID, OLD_STATEID
2340 * or BADSEQID and the recovery code is unable to recover. Push any
2341 * dirty data back to the server and return the delegation (if any).
2345 nfs4delegabandon(rnode4_t
*rp
)
2348 struct cb_recall_pass
*pp
;
2349 open_delegation_type4 dt
;
2351 mutex_enter(&rp
->r_statev4_lock
);
2352 dt
= rp
->r_deleg_type
;
2353 mutex_exit(&rp
->r_statev4_lock
);
2355 if (dt
== OPEN_DELEGATE_NONE
)
2361 pp
= kmem_alloc(sizeof (struct cb_recall_pass
), KM_SLEEP
);
2364 * Recovery on the file has failed and we want to return
2365 * the delegation. We don't want to reopen files and
2366 * nfs4delegreturn_thread() figures out what to do about
2367 * the data. The only thing to do is attempt to return
2371 pp
->truncate
= FALSE
;
2374 * Fire up a thread to do the delegreturn; this is
2375 * necessary because we could be inside a GETPAGE or
2376 * PUTPAGE and we cannot do another one.
2379 (void) zthread_create(NULL
, 0, nfs4delegreturn_thread
, pp
, 0,
2384 wait_for_recall1(vnode_t
*vp
, nfs4_op_hint_t op
, nfs4_recov_state_t
*rsp
,
2391 if (vp
&& vp
->v_type
== VREG
) {
2395 * Take r_deleg_recall_lock in read mode to synchronize
2398 error
= nfs_rw_enter_sig(&rp
->r_deleg_recall_lock
,
2399 RW_READER
, INTR4(vp
));
2402 rsp
->rs_flags
|= flg
;
2409 nfs4_end_op_recall(vnode_t
*vp1
, vnode_t
*vp2
, nfs4_recov_state_t
*rsp
)
2411 NFS4_DEBUG(nfs4_recall_debug
,
2412 (CE_NOTE
, "nfs4_end_op_recall: 0x%p, 0x%p\n",
2413 (void *)vp1
, (void *)vp2
));
2415 if (vp2
&& rsp
->rs_flags
& NFS4_RS_RECALL_HELD2
)
2416 nfs_rw_exit(&VTOR4(vp2
)->r_deleg_recall_lock
);
2417 if (vp1
&& rsp
->rs_flags
& NFS4_RS_RECALL_HELD1
)
2418 nfs_rw_exit(&VTOR4(vp1
)->r_deleg_recall_lock
);
2422 wait_for_recall(vnode_t
*vp1
, vnode_t
*vp2
, nfs4_op_hint_t op
,
2423 nfs4_recov_state_t
*rsp
)
2427 NFS4_DEBUG(nfs4_recall_debug
,
2428 (CE_NOTE
, "wait_for_recall: 0x%p, 0x%p\n",
2429 (void *)vp1
, (void *) vp2
));
2431 rsp
->rs_flags
&= ~(NFS4_RS_RECALL_HELD1
|NFS4_RS_RECALL_HELD2
);
2433 if ((error
= wait_for_recall1(vp1
, op
, rsp
, NFS4_RS_RECALL_HELD1
)) != 0)
2436 if ((error
= wait_for_recall1(vp2
, op
, rsp
, NFS4_RS_RECALL_HELD2
))
2438 if (rsp
->rs_flags
& NFS4_RS_RECALL_HELD1
) {
2439 nfs_rw_exit(&VTOR4(vp1
)->r_deleg_recall_lock
);
2440 rsp
->rs_flags
&= ~NFS4_RS_RECALL_HELD1
;
2450 * nfs4_dlistadd - Add this rnode to a list of rnodes to be
2451 * DELEGRETURN'd at the end of recovery.
2455 nfs4_dlistadd(rnode4_t
*rp
, struct nfs4_callback_globals
*ncg
, int flags
)
2457 struct nfs4_dnode
*dp
;
2459 ASSERT(mutex_owned(&rp
->r_statev4_lock
));
2461 * Mark the delegation as having a return pending.
2462 * This will prevent the use of the delegation stateID
2463 * by read, write, setattr and open.
2465 rp
->r_deleg_return_pending
= TRUE
;
2466 dp
= kmem_alloc(sizeof (*dp
), KM_SLEEP
);
2470 mutex_enter(&ncg
->nfs4_dlist_lock
);
2471 list_insert_head(&ncg
->nfs4_dlist
, dp
);
2473 ncg
->nfs4_dlistadd_c
++;
2475 mutex_exit(&ncg
->nfs4_dlist_lock
);
2479 * nfs4_dlistclean_impl - Do DELEGRETURN for each rnode on the list.
2480 * of files awaiting cleaning. If the override_flags are non-zero
2481 * then use them rather than the flags that were set when the rnode
2482 * was added to the dlist.
2485 nfs4_dlistclean_impl(struct nfs4_callback_globals
*ncg
, int override_flags
)
2488 struct nfs4_dnode
*dp
;
2491 ASSERT(override_flags
== 0 || override_flags
== NFS4_DR_DISCARD
);
2493 mutex_enter(&ncg
->nfs4_dlist_lock
);
2494 while ((dp
= list_head(&ncg
->nfs4_dlist
)) != NULL
) {
2496 ncg
->nfs4_dlistclean_c
++;
2498 list_remove(&ncg
->nfs4_dlist
, dp
);
2499 mutex_exit(&ncg
->nfs4_dlist_lock
);
2501 flags
= (override_flags
!= 0) ? override_flags
: dp
->flags
;
2502 kmem_free(dp
, sizeof (*dp
));
2503 (void) nfs4delegreturn_impl(rp
, flags
, ncg
);
2505 mutex_enter(&ncg
->nfs4_dlist_lock
);
2507 mutex_exit(&ncg
->nfs4_dlist_lock
);
2511 nfs4_dlistclean(void)
2513 struct nfs4_callback_globals
*ncg
;
2515 ncg
= zone_getspecific(nfs4_callback_zone_key
, nfs_zone());
2516 ASSERT(ncg
!= NULL
);
2518 nfs4_dlistclean_impl(ncg
, 0);