3557 dumpvp_size is not updated correctly when a dump zvol's size is changed
[unleashed.git] / usr / src / uts / common / fs / nfs / nfs4_srv_deleg.c
blob83d96b441e92df8b709d0e2ef26e56082f35f06e
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #include <sys/systm.h>
27 #include <rpc/auth.h>
28 #include <rpc/clnt.h>
29 #include <nfs/nfs4_kprot.h>
30 #include <nfs/nfs4.h>
31 #include <nfs/lm.h>
32 #include <sys/cmn_err.h>
33 #include <sys/disp.h>
34 #include <sys/sdt.h>
36 #include <sys/pathname.h>
38 #include <sys/strsubr.h>
39 #include <sys/ddi.h>
41 #include <sys/vnode.h>
42 #include <sys/sdt.h>
43 #include <inet/common.h>
44 #include <inet/ip.h>
45 #include <inet/ip6.h>
47 #define MAX_READ_DELEGATIONS 5
49 krwlock_t rfs4_deleg_policy_lock;
50 srv_deleg_policy_t rfs4_deleg_policy = SRV_NEVER_DELEGATE;
51 static int rfs4_deleg_wlp = 5;
52 kmutex_t rfs4_deleg_lock;
53 static int rfs4_deleg_disabled;
54 static int rfs4_max_setup_cb_tries = 5;
56 #ifdef DEBUG
58 static int rfs4_test_cbgetattr_fail = 0;
59 int rfs4_cb_null;
60 int rfs4_cb_debug;
61 int rfs4_deleg_debug;
63 #endif
65 static void rfs4_recall_file(rfs4_file_t *,
66 void (*recall)(rfs4_deleg_state_t *, bool_t),
67 bool_t, rfs4_client_t *);
68 static void rfs4_revoke_file(rfs4_file_t *);
69 static void rfs4_cb_chflush(rfs4_cbinfo_t *);
70 static CLIENT *rfs4_cb_getch(rfs4_cbinfo_t *);
71 static void rfs4_cb_freech(rfs4_cbinfo_t *, CLIENT *, bool_t);
72 static rfs4_deleg_state_t *rfs4_deleg_state(rfs4_state_t *,
73 open_delegation_type4, int *);
76 * Convert a universal address to an transport specific
77 * address using inet_pton.
79 static int
80 uaddr2sockaddr(int af, char *ua, void *ap, in_port_t *pp)
82 int dots = 0, i, j, len, k;
83 unsigned char c;
84 in_port_t port = 0;
86 len = strlen(ua);
88 for (i = len-1; i >= 0; i--) {
90 if (ua[i] == '.')
91 dots++;
93 if (dots == 2) {
95 ua[i] = '\0';
97 * We use k to remember were to stick '.' back, since
98 * ua was kmem_allocateded from the pool len+1.
100 k = i;
101 if (inet_pton(af, ua, ap) == 1) {
103 c = 0;
105 for (j = i+1; j < len; j++) {
106 if (ua[j] == '.') {
107 port = c << 8;
108 c = 0;
109 } else if (ua[j] >= '0' &&
110 ua[j] <= '9') {
111 c *= 10;
112 c += ua[j] - '0';
113 } else {
114 ua[k] = '.';
115 return (EINVAL);
118 port += c;
121 /* reset to network order */
122 if (af == AF_INET) {
123 *(uint32_t *)ap =
124 htonl(*(uint32_t *)ap);
125 *pp = htons(port);
126 } else {
127 int ix;
128 uint16_t *sap;
130 for (sap = ap, ix = 0; ix <
131 sizeof (struct in6_addr) /
132 sizeof (uint16_t); ix++)
133 sap[ix] = htons(sap[ix]);
135 *pp = htons(port);
138 ua[k] = '.';
139 return (0);
140 } else {
141 ua[k] = '.';
142 return (EINVAL);
147 return (EINVAL);
151 * Update the delegation policy with the
152 * value of "new_policy"
154 void
155 rfs4_set_deleg_policy(srv_deleg_policy_t new_policy)
157 rw_enter(&rfs4_deleg_policy_lock, RW_WRITER);
158 rfs4_deleg_policy = new_policy;
159 rw_exit(&rfs4_deleg_policy_lock);
162 void
163 rfs4_hold_deleg_policy(void)
165 rw_enter(&rfs4_deleg_policy_lock, RW_READER);
168 void
169 rfs4_rele_deleg_policy(void)
171 rw_exit(&rfs4_deleg_policy_lock);
176 * This free function is to be used when the client struct is being
177 * released and nothing at all is needed of the callback info any
178 * longer.
180 void
181 rfs4_cbinfo_free(rfs4_cbinfo_t *cbp)
183 char *addr = cbp->cb_callback.cb_location.r_addr;
184 char *netid = cbp->cb_callback.cb_location.r_netid;
186 /* Free old address if any */
188 if (addr)
189 kmem_free(addr, strlen(addr) + 1);
190 if (netid)
191 kmem_free(netid, strlen(netid) + 1);
193 addr = cbp->cb_newer.cb_callback.cb_location.r_addr;
194 netid = cbp->cb_newer.cb_callback.cb_location.r_netid;
196 if (addr)
197 kmem_free(addr, strlen(addr) + 1);
198 if (netid)
199 kmem_free(netid, strlen(netid) + 1);
201 if (cbp->cb_chc_free) {
202 rfs4_cb_chflush(cbp);
207 * The server uses this to check the callback path supplied by the
208 * client. The callback connection is marked "in progress" while this
209 * work is going on and then eventually marked either OK or FAILED.
210 * This work can be done as part of a separate thread and at the end
211 * of this the thread will exit or it may be done such that the caller
212 * will continue with other work.
214 static void
215 rfs4_do_cb_null(rfs4_client_t *cp)
217 struct timeval tv;
218 CLIENT *ch;
219 rfs4_cbstate_t newstate;
220 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
222 mutex_enter(cbp->cb_lock);
223 /* If another thread is doing CB_NULL RPC then return */
224 if (cbp->cb_nullcaller == TRUE) {
225 mutex_exit(cbp->cb_lock);
226 rfs4_client_rele(cp);
227 return;
230 /* Mark the cbinfo as having a thread in the NULL callback */
231 cbp->cb_nullcaller = TRUE;
234 * Are there other threads still using the cbinfo client
235 * handles? If so, this thread must wait before going and
236 * mucking aroiund with the callback information
238 while (cbp->cb_refcnt != 0)
239 cv_wait(cbp->cb_cv_nullcaller, cbp->cb_lock);
242 * This thread itself may find that new callback info has
243 * arrived and is set up to handle this case and redrive the
244 * call to the client's callback server.
246 retry:
247 if (cbp->cb_newer.cb_new == TRUE &&
248 cbp->cb_newer.cb_confirmed == TRUE) {
249 char *addr = cbp->cb_callback.cb_location.r_addr;
250 char *netid = cbp->cb_callback.cb_location.r_netid;
253 * Free the old stuff if it exists; may be the first
254 * time through this path
256 if (addr)
257 kmem_free(addr, strlen(addr) + 1);
258 if (netid)
259 kmem_free(netid, strlen(netid) + 1);
261 /* Move over the addr/netid */
262 cbp->cb_callback.cb_location.r_addr =
263 cbp->cb_newer.cb_callback.cb_location.r_addr;
264 cbp->cb_newer.cb_callback.cb_location.r_addr = NULL;
265 cbp->cb_callback.cb_location.r_netid =
266 cbp->cb_newer.cb_callback.cb_location.r_netid;
267 cbp->cb_newer.cb_callback.cb_location.r_netid = NULL;
269 /* Get the program number */
270 cbp->cb_callback.cb_program =
271 cbp->cb_newer.cb_callback.cb_program;
272 cbp->cb_newer.cb_callback.cb_program = 0;
274 /* Don't forget the protocol's "cb_ident" field */
275 cbp->cb_ident = cbp->cb_newer.cb_ident;
276 cbp->cb_newer.cb_ident = 0;
278 /* no longer new */
279 cbp->cb_newer.cb_new = FALSE;
280 cbp->cb_newer.cb_confirmed = FALSE;
282 /* get rid of the old client handles that may exist */
283 rfs4_cb_chflush(cbp);
285 cbp->cb_state = CB_NONE;
286 cbp->cb_timefailed = 0; /* reset the clock */
287 cbp->cb_notified_of_cb_path_down = TRUE;
290 if (cbp->cb_state != CB_NONE) {
291 cv_broadcast(cbp->cb_cv); /* let the others know */
292 cbp->cb_nullcaller = FALSE;
293 mutex_exit(cbp->cb_lock);
294 rfs4_client_rele(cp);
295 return;
298 /* mark rfs4_client_t as CALLBACK NULL in progress */
299 cbp->cb_state = CB_INPROG;
300 mutex_exit(cbp->cb_lock);
302 /* get/generate a client handle */
303 if ((ch = rfs4_cb_getch(cbp)) == NULL) {
304 mutex_enter(cbp->cb_lock);
305 cbp->cb_state = CB_BAD;
306 cbp->cb_timefailed = gethrestime_sec(); /* observability */
307 goto retry;
311 tv.tv_sec = 30;
312 tv.tv_usec = 0;
313 if (clnt_call(ch, CB_NULL, xdr_void, NULL, xdr_void, NULL, tv) != 0) {
314 newstate = CB_BAD;
315 } else {
316 newstate = CB_OK;
317 #ifdef DEBUG
318 rfs4_cb_null++;
319 #endif
322 /* Check to see if the client has specified new callback info */
323 mutex_enter(cbp->cb_lock);
324 rfs4_cb_freech(cbp, ch, TRUE);
325 if (cbp->cb_newer.cb_new == TRUE &&
326 cbp->cb_newer.cb_confirmed == TRUE) {
327 goto retry; /* give the CB_NULL another chance */
330 cbp->cb_state = newstate;
331 if (cbp->cb_state == CB_BAD)
332 cbp->cb_timefailed = gethrestime_sec(); /* observability */
334 cv_broadcast(cbp->cb_cv); /* start up the other threads */
335 cbp->cb_nullcaller = FALSE;
336 mutex_exit(cbp->cb_lock);
338 rfs4_client_rele(cp);
342 * Given a client struct, inspect the callback info to see if the
343 * callback path is up and available.
345 * If new callback path is available and no one has set it up then
346 * try to set it up. If setup is not successful after 5 tries (5 secs)
347 * then gives up and returns NULL.
349 * If callback path is being initialized, then wait for the CB_NULL RPC
350 * call to occur.
352 static rfs4_cbinfo_t *
353 rfs4_cbinfo_hold(rfs4_client_t *cp)
355 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
356 int retries = 0;
358 mutex_enter(cbp->cb_lock);
360 while (cbp->cb_newer.cb_new == TRUE && cbp->cb_nullcaller == FALSE) {
362 * Looks like a new callback path may be available and
363 * noone has set it up.
365 mutex_exit(cbp->cb_lock);
366 rfs4_dbe_hold(cp->rc_dbe);
367 rfs4_do_cb_null(cp); /* caller will release client hold */
369 mutex_enter(cbp->cb_lock);
371 * If callback path is no longer new, or it's being setup
372 * then stop and wait for it to be done.
374 if (cbp->cb_newer.cb_new == FALSE || cbp->cb_nullcaller == TRUE)
375 break;
376 mutex_exit(cbp->cb_lock);
378 if (++retries >= rfs4_max_setup_cb_tries)
379 return (NULL);
380 delay(hz);
381 mutex_enter(cbp->cb_lock);
384 /* Is there a thread working on doing the CB_NULL RPC? */
385 if (cbp->cb_nullcaller == TRUE)
386 cv_wait(cbp->cb_cv, cbp->cb_lock); /* if so, wait on it */
388 /* If the callback path is not okay (up and running), just quit */
389 if (cbp->cb_state != CB_OK) {
390 mutex_exit(cbp->cb_lock);
391 return (NULL);
394 /* Let someone know we are using the current callback info */
395 cbp->cb_refcnt++;
396 mutex_exit(cbp->cb_lock);
397 return (cbp);
401 * The caller is done with the callback info. It may be that the
402 * caller's RPC failed and the NFSv4 client has actually provided new
403 * callback information. If so, let the caller know so they can
404 * advantage of this and maybe retry the RPC that originally failed.
406 static int
407 rfs4_cbinfo_rele(rfs4_cbinfo_t *cbp, rfs4_cbstate_t newstate)
409 int cb_new = FALSE;
411 mutex_enter(cbp->cb_lock);
413 /* The caller gets a chance to mark the callback info as bad */
414 if (newstate != CB_NOCHANGE)
415 cbp->cb_state = newstate;
416 if (newstate == CB_FAILED) {
417 cbp->cb_timefailed = gethrestime_sec(); /* observability */
418 cbp->cb_notified_of_cb_path_down = FALSE;
421 cbp->cb_refcnt--; /* no longer using the information */
424 * A thread may be waiting on this one to finish and if so,
425 * let it know that it is okay to do the CB_NULL to the
426 * client's callback server.
428 if (cbp->cb_refcnt == 0 && cbp->cb_nullcaller)
429 cv_broadcast(cbp->cb_cv_nullcaller);
432 * If this is the last thread to use the callback info and
433 * there is new callback information to try and no thread is
434 * there ready to do the CB_NULL, then return true to teh
435 * caller so they can do the CB_NULL
437 if (cbp->cb_refcnt == 0 &&
438 cbp->cb_nullcaller == FALSE &&
439 cbp->cb_newer.cb_new == TRUE &&
440 cbp->cb_newer.cb_confirmed == TRUE)
441 cb_new = TRUE;
443 mutex_exit(cbp->cb_lock);
445 return (cb_new);
449 * Given the information in the callback info struct, create a client
450 * handle that can be used by the server for its callback path.
452 static CLIENT *
453 rfs4_cbch_init(rfs4_cbinfo_t *cbp)
455 struct knetconfig knc;
456 vnode_t *vp;
457 struct sockaddr_in addr4;
458 struct sockaddr_in6 addr6;
459 void *addr, *taddr;
460 in_port_t *pp;
461 int af;
462 char *devnam;
463 struct netbuf nb;
464 int size;
465 CLIENT *ch = NULL;
466 int useresvport = 0;
468 mutex_enter(cbp->cb_lock);
470 if (cbp->cb_callback.cb_location.r_netid == NULL ||
471 cbp->cb_callback.cb_location.r_addr == NULL) {
472 goto cb_init_out;
475 if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp") == 0) {
476 knc.knc_semantics = NC_TPI_COTS;
477 knc.knc_protofmly = "inet";
478 knc.knc_proto = "tcp";
479 devnam = "/dev/tcp";
480 af = AF_INET;
481 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp")
482 == 0) {
483 knc.knc_semantics = NC_TPI_CLTS;
484 knc.knc_protofmly = "inet";
485 knc.knc_proto = "udp";
486 devnam = "/dev/udp";
487 af = AF_INET;
488 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "tcp6")
489 == 0) {
490 knc.knc_semantics = NC_TPI_COTS;
491 knc.knc_protofmly = "inet6";
492 knc.knc_proto = "tcp";
493 devnam = "/dev/tcp6";
494 af = AF_INET6;
495 } else if (strcmp(cbp->cb_callback.cb_location.r_netid, "udp6")
496 == 0) {
497 knc.knc_semantics = NC_TPI_CLTS;
498 knc.knc_protofmly = "inet6";
499 knc.knc_proto = "udp";
500 devnam = "/dev/udp6";
501 af = AF_INET6;
502 } else {
503 goto cb_init_out;
506 if (lookupname(devnam, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp) != 0) {
508 goto cb_init_out;
511 if (vp->v_type != VCHR) {
512 VN_RELE(vp);
513 goto cb_init_out;
516 knc.knc_rdev = vp->v_rdev;
518 VN_RELE(vp);
520 if (af == AF_INET) {
521 size = sizeof (addr4);
522 bzero(&addr4, size);
523 addr4.sin_family = (sa_family_t)af;
524 addr = &addr4.sin_addr;
525 pp = &addr4.sin_port;
526 taddr = &addr4;
527 } else /* AF_INET6 */ {
528 size = sizeof (addr6);
529 bzero(&addr6, size);
530 addr6.sin6_family = (sa_family_t)af;
531 addr = &addr6.sin6_addr;
532 pp = &addr6.sin6_port;
533 taddr = &addr6;
536 if (uaddr2sockaddr(af,
537 cbp->cb_callback.cb_location.r_addr, addr, pp)) {
539 goto cb_init_out;
543 nb.maxlen = nb.len = size;
544 nb.buf = (char *)taddr;
546 if (clnt_tli_kcreate(&knc, &nb, cbp->cb_callback.cb_program,
547 NFS_CB, 0, 0, curthread->t_cred, &ch)) {
549 ch = NULL;
552 /* turn off reserved port usage */
553 (void) CLNT_CONTROL(ch, CLSET_BINDRESVPORT, (char *)&useresvport);
555 cb_init_out:
556 mutex_exit(cbp->cb_lock);
557 return (ch);
561 * Iterate over the client handle cache and
562 * destroy it.
564 static void
565 rfs4_cb_chflush(rfs4_cbinfo_t *cbp)
567 CLIENT *ch;
569 while (cbp->cb_chc_free) {
570 cbp->cb_chc_free--;
571 ch = cbp->cb_chc[cbp->cb_chc_free];
572 cbp->cb_chc[cbp->cb_chc_free] = NULL;
573 if (ch) {
574 if (ch->cl_auth)
575 auth_destroy(ch->cl_auth);
576 clnt_destroy(ch);
582 * Return a client handle, either from a the small
583 * rfs4_client_t cache or one that we just created.
585 static CLIENT *
586 rfs4_cb_getch(rfs4_cbinfo_t *cbp)
588 CLIENT *cbch = NULL;
589 uint32_t zilch = 0;
591 mutex_enter(cbp->cb_lock);
593 if (cbp->cb_chc_free) {
594 cbp->cb_chc_free--;
595 cbch = cbp->cb_chc[ cbp->cb_chc_free ];
596 mutex_exit(cbp->cb_lock);
597 (void) CLNT_CONTROL(cbch, CLSET_XID, (char *)&zilch);
598 return (cbch);
601 mutex_exit(cbp->cb_lock);
603 /* none free so make it now */
604 cbch = rfs4_cbch_init(cbp);
606 return (cbch);
610 * Return the client handle to the small cache or
611 * destroy it.
613 static void
614 rfs4_cb_freech(rfs4_cbinfo_t *cbp, CLIENT *ch, bool_t lockheld)
616 if (lockheld == FALSE)
617 mutex_enter(cbp->cb_lock);
619 if (cbp->cb_chc_free < RFS4_CBCH_MAX) {
620 cbp->cb_chc[ cbp->cb_chc_free++ ] = ch;
621 if (lockheld == FALSE)
622 mutex_exit(cbp->cb_lock);
623 return;
625 if (lockheld == FALSE)
626 mutex_exit(cbp->cb_lock);
629 * cache maxed out of free entries, obliterate
630 * this client handle, destroy it, throw it away.
632 if (ch->cl_auth)
633 auth_destroy(ch->cl_auth);
634 clnt_destroy(ch);
638 * With the supplied callback information - initialize the client
639 * callback data. If there is a callback in progress, save the
640 * callback info so that a thread can pick it up in the future.
642 void
643 rfs4_client_setcb(rfs4_client_t *cp, cb_client4 *cb, uint32_t cb_ident)
645 char *addr = NULL;
646 char *netid = NULL;
647 rfs4_cbinfo_t *cbp = &cp->rc_cbinfo;
648 size_t len;
650 /* Set the call back for the client */
651 if (cb->cb_location.r_addr && cb->cb_location.r_addr[0] != '\0' &&
652 cb->cb_location.r_netid && cb->cb_location.r_netid[0] != '\0') {
653 len = strlen(cb->cb_location.r_addr) + 1;
654 addr = kmem_alloc(len, KM_SLEEP);
655 bcopy(cb->cb_location.r_addr, addr, len);
656 len = strlen(cb->cb_location.r_netid) + 1;
657 netid = kmem_alloc(len, KM_SLEEP);
658 bcopy(cb->cb_location.r_netid, netid, len);
660 /* ready to save the new information but first free old, if exists */
661 mutex_enter(cbp->cb_lock);
663 cbp->cb_newer.cb_callback.cb_program = cb->cb_program;
665 if (cbp->cb_newer.cb_callback.cb_location.r_addr != NULL)
666 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_addr,
667 strlen(cbp->cb_newer.cb_callback.cb_location.r_addr) + 1);
668 cbp->cb_newer.cb_callback.cb_location.r_addr = addr;
670 if (cbp->cb_newer.cb_callback.cb_location.r_netid != NULL)
671 kmem_free(cbp->cb_newer.cb_callback.cb_location.r_netid,
672 strlen(cbp->cb_newer.cb_callback.cb_location.r_netid) + 1);
673 cbp->cb_newer.cb_callback.cb_location.r_netid = netid;
675 cbp->cb_newer.cb_ident = cb_ident;
677 if (addr && *addr && netid && *netid) {
678 cbp->cb_newer.cb_new = TRUE;
679 cbp->cb_newer.cb_confirmed = FALSE;
680 } else {
681 cbp->cb_newer.cb_new = FALSE;
682 cbp->cb_newer.cb_confirmed = FALSE;
685 mutex_exit(cbp->cb_lock);
689 * The server uses this when processing SETCLIENTID_CONFIRM. Callback
690 * information may have been provided on SETCLIENTID and this call
691 * marks that information as confirmed and then starts a thread to
692 * test the callback path.
694 void
695 rfs4_deleg_cb_check(rfs4_client_t *cp)
697 if (cp->rc_cbinfo.cb_newer.cb_new == FALSE)
698 return;
700 cp->rc_cbinfo.cb_newer.cb_confirmed = TRUE;
702 rfs4_dbe_hold(cp->rc_dbe); /* hold the client struct for thread */
704 (void) thread_create(NULL, 0, rfs4_do_cb_null, cp, 0, &p0, TS_RUN,
705 minclsyspri);
708 static void
709 rfs4args_cb_recall_free(nfs_cb_argop4 *argop)
711 CB_RECALL4args *rec_argp;
713 rec_argp = &argop->nfs_cb_argop4_u.opcbrecall;
714 if (rec_argp->fh.nfs_fh4_val)
715 kmem_free(rec_argp->fh.nfs_fh4_val, rec_argp->fh.nfs_fh4_len);
718 /* ARGSUSED */
719 static void
720 rfs4args_cb_getattr_free(nfs_cb_argop4 *argop)
722 CB_GETATTR4args *argp;
724 argp = &argop->nfs_cb_argop4_u.opcbgetattr;
725 if (argp->fh.nfs_fh4_val)
726 kmem_free(argp->fh.nfs_fh4_val, argp->fh.nfs_fh4_len);
729 static void
730 rfs4freeargres(CB_COMPOUND4args *args, CB_COMPOUND4res *resp)
732 int i, arglen;
733 nfs_cb_argop4 *argop;
736 * First free any special args alloc'd for specific ops.
738 arglen = args->array_len;
739 argop = args->array;
740 for (i = 0; i < arglen; i++, argop++) {
742 switch (argop->argop) {
743 case OP_CB_RECALL:
744 rfs4args_cb_recall_free(argop);
745 break;
747 case OP_CB_GETATTR:
748 rfs4args_cb_getattr_free(argop);
749 break;
751 default:
752 return;
756 if (args->tag.utf8string_len > 0)
757 UTF8STRING_FREE(args->tag)
759 kmem_free(args->array, arglen * sizeof (nfs_cb_argop4));
760 if (resp)
761 (void) xdr_free(xdr_CB_COMPOUND4res, (caddr_t)resp);
765 * General callback routine for the server to the client.
767 static enum clnt_stat
768 rfs4_do_callback(rfs4_client_t *cp, CB_COMPOUND4args *args,
769 CB_COMPOUND4res *res, struct timeval timeout)
771 rfs4_cbinfo_t *cbp;
772 CLIENT *ch;
773 /* start with this in case cb_getch() fails */
774 enum clnt_stat stat = RPC_FAILED;
776 res->tag.utf8string_val = NULL;
777 res->array = NULL;
779 retry:
780 cbp = rfs4_cbinfo_hold(cp);
781 if (cbp == NULL)
782 return (stat);
784 /* get a client handle */
785 if ((ch = rfs4_cb_getch(cbp)) != NULL) {
787 * reset the cb_ident since it may have changed in
788 * rfs4_cbinfo_hold()
790 args->callback_ident = cbp->cb_ident;
792 stat = clnt_call(ch, CB_COMPOUND, xdr_CB_COMPOUND4args_srv,
793 (caddr_t)args, xdr_CB_COMPOUND4res,
794 (caddr_t)res, timeout);
796 /* free client handle */
797 rfs4_cb_freech(cbp, ch, FALSE);
801 * If the rele says that there may be new callback info then
802 * retry this sequence and it may succeed as a result of the
803 * new callback path
805 if (rfs4_cbinfo_rele(cbp,
806 (stat == RPC_SUCCESS ? CB_NOCHANGE : CB_FAILED)) == TRUE)
807 goto retry;
809 return (stat);
813 * Used by the NFSv4 server to get attributes for a file while
814 * handling the case where a file has been write delegated. For the
815 * time being, VOP_GETATTR() is called and CB_GETATTR processing is
816 * not undertaken. This call site is maintained in case the server is
817 * updated in the future to handle write delegation space guarantees.
819 nfsstat4
820 rfs4_vop_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr)
823 int error;
825 error = VOP_GETATTR(vp, vap, flag, cr, NULL);
826 return (puterrno4(error));
830 * This is used everywhere in the v2/v3 server to allow the
831 * integration of all NFS versions and the support of delegation. For
832 * now, just call the VOP_GETATTR(). If the NFSv4 server is enhanced
833 * in the future to provide space guarantees for write delegations
834 * then this call site should be expanded to interact with the client.
837 rfs4_delegated_getattr(vnode_t *vp, vattr_t *vap, int flag, cred_t *cr)
839 return (VOP_GETATTR(vp, vap, flag, cr, NULL));
843 * Place the actual cb_recall otw call to client.
845 static void
846 rfs4_do_cb_recall(rfs4_deleg_state_t *dsp, bool_t trunc)
848 CB_COMPOUND4args cb4_args;
849 CB_COMPOUND4res cb4_res;
850 CB_RECALL4args *rec_argp;
851 CB_RECALL4res *rec_resp;
852 nfs_cb_argop4 *argop;
853 int numops;
854 int argoplist_size;
855 struct timeval timeout;
856 nfs_fh4 *fhp;
857 enum clnt_stat call_stat;
860 * set up the compound args
862 numops = 1; /* CB_RECALL only */
864 argoplist_size = numops * sizeof (nfs_cb_argop4);
865 argop = kmem_zalloc(argoplist_size, KM_SLEEP);
866 argop->argop = OP_CB_RECALL;
867 rec_argp = &argop->nfs_cb_argop4_u.opcbrecall;
869 (void) str_to_utf8("cb_recall", &cb4_args.tag);
870 cb4_args.minorversion = CB4_MINORVERSION;
871 /* cb4_args.callback_ident is set in rfs4_do_callback() */
872 cb4_args.array_len = numops;
873 cb4_args.array = argop;
876 * fill in the args struct
878 bcopy(&dsp->rds_delegid.stateid, &rec_argp->stateid, sizeof (stateid4));
879 rec_argp->truncate = trunc;
881 fhp = &dsp->rds_finfo->rf_filehandle;
882 rec_argp->fh.nfs_fh4_val = kmem_alloc(sizeof (char) *
883 fhp->nfs_fh4_len, KM_SLEEP);
884 nfs_fh4_copy(fhp, &rec_argp->fh);
886 /* Keep track of when we did this for observability */
887 dsp->rds_time_recalled = gethrestime_sec();
890 * Set up the timeout for the callback and make the actual call.
891 * Timeout will be 80% of the lease period for this server.
893 timeout.tv_sec = (rfs4_lease_time * 80) / 100;
894 timeout.tv_usec = 0;
896 DTRACE_NFSV4_3(cb__recall__start, rfs4_client_t *, dsp->rds_client,
897 rfs4_deleg_state_t *, dsp, CB_RECALL4args *, rec_argp);
899 call_stat = rfs4_do_callback(dsp->rds_client, &cb4_args, &cb4_res,
900 timeout);
902 rec_resp = (cb4_res.array_len == 0) ? NULL :
903 &cb4_res.array[0].nfs_cb_resop4_u.opcbrecall;
905 DTRACE_NFSV4_3(cb__recall__done, rfs4_client_t *, dsp->rds_client,
906 rfs4_deleg_state_t *, dsp, CB_RECALL4res *, rec_resp);
908 if (call_stat != RPC_SUCCESS || cb4_res.status != NFS4_OK) {
909 rfs4_return_deleg(dsp, TRUE);
912 rfs4freeargres(&cb4_args, &cb4_res);
915 struct recall_arg {
916 rfs4_deleg_state_t *dsp;
917 void (*recall)(rfs4_deleg_state_t *, bool_t trunc);
918 bool_t trunc;
921 static void
922 do_recall(struct recall_arg *arg)
924 rfs4_deleg_state_t *dsp = arg->dsp;
925 rfs4_file_t *fp = dsp->rds_finfo;
926 callb_cpr_t cpr_info;
927 kmutex_t cpr_lock;
929 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
930 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Recall");
933 * It is possible that before this thread starts
934 * the client has send us a return_delegation, and
935 * if that is the case we do not need to send the
936 * recall callback.
938 if (dsp->rds_dtype != OPEN_DELEGATE_NONE) {
939 DTRACE_PROBE3(nfss__i__recall,
940 struct recall_arg *, arg,
941 struct rfs4_deleg_state_t *, dsp,
942 struct rfs4_file_t *, fp);
944 if (arg->recall)
945 (void) (*arg->recall)(dsp, arg->trunc);
948 mutex_enter(fp->rf_dinfo.rd_recall_lock);
950 * Recall count may go negative if the parent thread that is
951 * creating the individual callback threads does not modify
952 * the recall_count field before the callback thread actually
953 * gets a response from the CB_RECALL
955 fp->rf_dinfo.rd_recall_count--;
956 if (fp->rf_dinfo.rd_recall_count == 0)
957 cv_signal(fp->rf_dinfo.rd_recall_cv);
958 mutex_exit(fp->rf_dinfo.rd_recall_lock);
960 mutex_enter(&cpr_lock);
961 CALLB_CPR_EXIT(&cpr_info);
962 mutex_destroy(&cpr_lock);
964 rfs4_deleg_state_rele(dsp); /* release the hold for this thread */
966 kmem_free(arg, sizeof (struct recall_arg));
969 struct master_recall_args {
970 rfs4_file_t *fp;
971 void (*recall)(rfs4_deleg_state_t *, bool_t);
972 bool_t trunc;
975 static void
976 do_recall_file(struct master_recall_args *map)
978 rfs4_file_t *fp = map->fp;
979 rfs4_deleg_state_t *dsp;
980 struct recall_arg *arg;
981 callb_cpr_t cpr_info;
982 kmutex_t cpr_lock;
983 int32_t recall_count;
985 rfs4_dbe_lock(fp->rf_dbe);
987 /* Recall already in progress ? */
988 mutex_enter(fp->rf_dinfo.rd_recall_lock);
989 if (fp->rf_dinfo.rd_recall_count != 0) {
990 mutex_exit(fp->rf_dinfo.rd_recall_lock);
991 rfs4_dbe_rele_nolock(fp->rf_dbe);
992 rfs4_dbe_unlock(fp->rf_dbe);
993 kmem_free(map, sizeof (struct master_recall_args));
994 return;
997 mutex_exit(fp->rf_dinfo.rd_recall_lock);
999 mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
1000 CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "v4RecallFile");
1002 recall_count = 0;
1003 for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
1004 dsp = list_next(&fp->rf_delegstatelist, dsp)) {
1006 rfs4_dbe_lock(dsp->rds_dbe);
1008 * if this delegation state
1009 * is being reaped skip it
1011 if (rfs4_dbe_is_invalid(dsp->rds_dbe)) {
1012 rfs4_dbe_unlock(dsp->rds_dbe);
1013 continue;
1016 /* hold for receiving thread */
1017 rfs4_dbe_hold(dsp->rds_dbe);
1018 rfs4_dbe_unlock(dsp->rds_dbe);
1020 arg = kmem_alloc(sizeof (struct recall_arg), KM_SLEEP);
1021 arg->recall = map->recall;
1022 arg->trunc = map->trunc;
1023 arg->dsp = dsp;
1025 recall_count++;
1027 (void) thread_create(NULL, 0, do_recall, arg, 0, &p0, TS_RUN,
1028 minclsyspri);
1031 rfs4_dbe_unlock(fp->rf_dbe);
1033 mutex_enter(fp->rf_dinfo.rd_recall_lock);
1035 * Recall count may go negative if the parent thread that is
1036 * creating the individual callback threads does not modify
1037 * the recall_count field before the callback thread actually
1038 * gets a response from the CB_RECALL
1040 fp->rf_dinfo.rd_recall_count += recall_count;
1041 while (fp->rf_dinfo.rd_recall_count)
1042 cv_wait(fp->rf_dinfo.rd_recall_cv, fp->rf_dinfo.rd_recall_lock);
1044 mutex_exit(fp->rf_dinfo.rd_recall_lock);
1046 DTRACE_PROBE1(nfss__i__recall_done, rfs4_file_t *, fp);
1047 rfs4_file_rele(fp);
1048 kmem_free(map, sizeof (struct master_recall_args));
1049 mutex_enter(&cpr_lock);
1050 CALLB_CPR_EXIT(&cpr_info);
1051 mutex_destroy(&cpr_lock);
1054 static void
1055 rfs4_recall_file(rfs4_file_t *fp,
1056 void (*recall)(rfs4_deleg_state_t *, bool_t trunc),
1057 bool_t trunc, rfs4_client_t *cp)
1059 struct master_recall_args *args;
1061 rfs4_dbe_lock(fp->rf_dbe);
1062 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
1063 rfs4_dbe_unlock(fp->rf_dbe);
1064 return;
1066 rfs4_dbe_hold(fp->rf_dbe); /* hold for new thread */
1069 * Mark the time we started the recall processing.
1070 * If it has been previously recalled, do not reset the
1071 * timer since this is used for the revocation decision.
1073 if (fp->rf_dinfo.rd_time_recalled == 0)
1074 fp->rf_dinfo.rd_time_recalled = gethrestime_sec();
1075 fp->rf_dinfo.rd_ever_recalled = TRUE; /* used for policy decision */
1076 /* Client causing recall not always available */
1077 if (cp)
1078 fp->rf_dinfo.rd_conflicted_client = cp->rc_clientid;
1080 rfs4_dbe_unlock(fp->rf_dbe);
1082 args = kmem_alloc(sizeof (struct master_recall_args), KM_SLEEP);
1083 args->fp = fp;
1084 args->recall = recall;
1085 args->trunc = trunc;
1087 (void) thread_create(NULL, 0, do_recall_file, args, 0, &p0, TS_RUN,
1088 minclsyspri);
1091 void
1092 rfs4_recall_deleg(rfs4_file_t *fp, bool_t trunc, rfs4_client_t *cp)
1094 time_t elapsed1, elapsed2;
1096 if (fp->rf_dinfo.rd_time_recalled != 0) {
1097 elapsed1 = gethrestime_sec() - fp->rf_dinfo.rd_time_recalled;
1098 elapsed2 = gethrestime_sec() - fp->rf_dinfo.rd_time_lastwrite;
1099 /* First check to see if a revocation should occur */
1100 if (elapsed1 > rfs4_lease_time &&
1101 elapsed2 > rfs4_lease_time) {
1102 rfs4_revoke_file(fp);
1103 return;
1106 * Next check to see if a recall should be done again
1107 * so quickly.
1109 if (elapsed1 <= ((rfs4_lease_time * 20) / 100))
1110 return;
1112 rfs4_recall_file(fp, rfs4_do_cb_recall, trunc, cp);
1116 * rfs4_check_recall is called from rfs4_do_open to determine if the current
1117 * open conflicts with the delegation.
1118 * Return true if we need recall otherwise false.
1119 * Assumes entry locks for sp and sp->rs_finfo are held.
1121 bool_t
1122 rfs4_check_recall(rfs4_state_t *sp, uint32_t access)
1124 open_delegation_type4 dtype = sp->rs_finfo->rf_dinfo.rd_dtype;
1126 switch (dtype) {
1127 case OPEN_DELEGATE_NONE:
1128 /* Not currently delegated so there is nothing to do */
1129 return (FALSE);
1130 case OPEN_DELEGATE_READ:
1132 * If the access is only asking for READ then there is
1133 * no conflict and nothing to do. If it is asking
1134 * for write, then there will be conflict and the read
1135 * delegation should be recalled.
1137 if (access == OPEN4_SHARE_ACCESS_READ)
1138 return (FALSE);
1139 else
1140 return (TRUE);
1141 case OPEN_DELEGATE_WRITE:
1142 /* Check to see if this client has the delegation */
1143 return (rfs4_is_deleg(sp));
1146 return (FALSE);
1150 * Return the "best" allowable delegation available given the current
1151 * delegation type and the desired access and deny modes on the file.
1152 * At the point that this routine is called we know that the access and
1153 * deny modes are consistent with the file modes.
1155 static open_delegation_type4
1156 rfs4_check_delegation(rfs4_state_t *sp, rfs4_file_t *fp)
1158 open_delegation_type4 dtype = fp->rf_dinfo.rd_dtype;
1159 uint32_t access = sp->rs_share_access;
1160 uint32_t deny = sp->rs_share_deny;
1161 int readcnt = 0;
1162 int writecnt = 0;
1164 switch (dtype) {
1165 case OPEN_DELEGATE_NONE:
1167 * Determine if more than just this OPEN have the file
1168 * open and if so, no delegation may be provided to
1169 * the client.
1171 if (access & OPEN4_SHARE_ACCESS_WRITE)
1172 writecnt++;
1173 if (access & OPEN4_SHARE_ACCESS_READ)
1174 readcnt++;
1176 if (fp->rf_access_read > readcnt ||
1177 fp->rf_access_write > writecnt)
1178 return (OPEN_DELEGATE_NONE);
1181 * If the client is going to write, or if the client
1182 * has exclusive access, return a write delegation.
1184 if ((access & OPEN4_SHARE_ACCESS_WRITE) ||
1185 (deny & (OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE)))
1186 return (OPEN_DELEGATE_WRITE);
1188 * If we don't want to write or we've haven't denied read
1189 * access to others, return a read delegation.
1191 if ((access & ~OPEN4_SHARE_ACCESS_WRITE) ||
1192 (deny & ~OPEN4_SHARE_DENY_READ))
1193 return (OPEN_DELEGATE_READ);
1195 /* Shouldn't get here */
1196 return (OPEN_DELEGATE_NONE);
1198 case OPEN_DELEGATE_READ:
1200 * If the file is delegated for read but we wan't to
1201 * write or deny others to read then we can't delegate
1202 * the file. We shouldn't get here since the delegation should
1203 * have been recalled already.
1205 if ((access & OPEN4_SHARE_ACCESS_WRITE) ||
1206 (deny & OPEN4_SHARE_DENY_READ))
1207 return (OPEN_DELEGATE_NONE);
1208 return (OPEN_DELEGATE_READ);
1210 case OPEN_DELEGATE_WRITE:
1211 return (OPEN_DELEGATE_WRITE);
1214 /* Shouldn't get here */
1215 return (OPEN_DELEGATE_NONE);
1219 * Given the desired delegation type and the "history" of the file
1220 * determine the actual delegation type to return.
1222 static open_delegation_type4
1223 rfs4_delegation_policy(open_delegation_type4 dtype,
1224 rfs4_dinfo_t *dinfo, clientid4 cid)
1226 time_t elapsed;
1228 if (rfs4_deleg_policy != SRV_NORMAL_DELEGATE)
1229 return (OPEN_DELEGATE_NONE);
1232 * Has this file/delegation ever been recalled? If not then
1233 * no further checks for a delegation race need to be done.
1234 * However if a recall has occurred, then check to see if a
1235 * client has caused its own delegation recall to occur. If
1236 * not, then has a delegation for this file been returned
1237 * recently? If so, then do not assign a new delegation to
1238 * avoid a "delegation race" between the original client and
1239 * the new/conflicting client.
1241 if (dinfo->rd_ever_recalled == TRUE) {
1242 if (dinfo->rd_conflicted_client != cid) {
1243 elapsed = gethrestime_sec() - dinfo->rd_time_returned;
1244 if (elapsed < rfs4_lease_time)
1245 return (OPEN_DELEGATE_NONE);
1249 /* Limit the number of read grants */
1250 if (dtype == OPEN_DELEGATE_READ &&
1251 dinfo->rd_rdgrants > MAX_READ_DELEGATIONS)
1252 return (OPEN_DELEGATE_NONE);
1255 * Should consider limiting total number of read/write
1256 * delegations the server will permit.
1259 return (dtype);
1263 * Try and grant a delegation for an open give the state. The routine
1264 * returns the delegation type granted. This could be OPEN_DELEGATE_NONE.
1266 * The state and associate file entry must be locked
1268 rfs4_deleg_state_t *
1269 rfs4_grant_delegation(delegreq_t dreq, rfs4_state_t *sp, int *recall)
1271 rfs4_file_t *fp = sp->rs_finfo;
1272 open_delegation_type4 dtype;
1273 int no_delegation;
1275 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
1276 ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1278 /* Is the server even providing delegations? */
1279 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE || dreq == DELEG_NONE)
1280 return (NULL);
1282 /* Check to see if delegations have been temporarily disabled */
1283 mutex_enter(&rfs4_deleg_lock);
1284 no_delegation = rfs4_deleg_disabled;
1285 mutex_exit(&rfs4_deleg_lock);
1287 if (no_delegation)
1288 return (NULL);
1290 /* Don't grant a delegation if a deletion is impending. */
1291 if (fp->rf_dinfo.rd_hold_grant > 0) {
1292 return (NULL);
1296 * Don't grant a delegation if there are any lock manager
1297 * (NFSv2/v3) locks for the file. This is a bit of a hack (e.g.,
1298 * if there are only read locks we should be able to grant a
1299 * read-only delegation), but it's good enough for now.
1301 * MT safety: the lock manager checks for conflicting delegations
1302 * before processing a lock request. That check will block until
1303 * we are done here. So if the lock manager acquires a lock after
1304 * we decide to grant the delegation, the delegation will get
1305 * immediately recalled (if there's a conflict), so we're safe.
1307 if (lm_vp_active(fp->rf_vp)) {
1308 return (NULL);
1312 * Based on the type of delegation request passed in, take the
1313 * appropriate action (DELEG_NONE is handled above)
1315 switch (dreq) {
1317 case DELEG_READ:
1318 case DELEG_WRITE:
1320 * The server "must" grant the delegation in this case.
1321 * Client is using open previous
1323 dtype = (open_delegation_type4)dreq;
1324 *recall = 1;
1325 break;
1326 case DELEG_ANY:
1328 * If a valid callback path does not exist, no delegation may
1329 * be granted.
1331 if (sp->rs_owner->ro_client->rc_cbinfo.cb_state != CB_OK)
1332 return (NULL);
1335 * If the original operation which caused time_rm_delayed
1336 * to be set hasn't been retried and completed for one
1337 * full lease period, clear it and allow delegations to
1338 * get granted again.
1340 if (fp->rf_dinfo.rd_time_rm_delayed > 0 &&
1341 gethrestime_sec() >
1342 fp->rf_dinfo.rd_time_rm_delayed + rfs4_lease_time)
1343 fp->rf_dinfo.rd_time_rm_delayed = 0;
1346 * If we are waiting for a delegation to be returned then
1347 * don't delegate this file. We do this for correctness as
1348 * well as if the file is being recalled we would likely
1349 * recall this file again.
1352 if (fp->rf_dinfo.rd_time_recalled != 0 ||
1353 fp->rf_dinfo.rd_time_rm_delayed != 0)
1354 return (NULL);
1356 /* Get the "best" delegation candidate */
1357 dtype = rfs4_check_delegation(sp, fp);
1359 if (dtype == OPEN_DELEGATE_NONE)
1360 return (NULL);
1363 * Based on policy and the history of the file get the
1364 * actual delegation.
1366 dtype = rfs4_delegation_policy(dtype, &fp->rf_dinfo,
1367 sp->rs_owner->ro_client->rc_clientid);
1369 if (dtype == OPEN_DELEGATE_NONE)
1370 return (NULL);
1371 break;
1372 default:
1373 return (NULL);
1376 /* set the delegation for the state */
1377 return (rfs4_deleg_state(sp, dtype, recall));
1380 void
1381 rfs4_set_deleg_response(rfs4_deleg_state_t *dsp, open_delegation4 *dp,
1382 nfsace4 *ace, int recall)
1384 open_write_delegation4 *wp;
1385 open_read_delegation4 *rp;
1386 nfs_space_limit4 *spl;
1387 nfsace4 nace;
1390 * We need to allocate a new copy of the who string.
1391 * this string will be freed by the rfs4_op_open dis_resfree
1392 * routine. We need to do this allocation since replays will
1393 * be allocated and rfs4_compound can't tell the difference from
1394 * a replay and an inital open. N.B. if an ace is passed in, it
1395 * the caller's responsibility to free it.
1398 if (ace == NULL) {
1400 * Default is to deny all access, the client will have
1401 * to contact the server. XXX Do we want to actually
1402 * set a deny for every one, or do we simply want to
1403 * construct an entity that will match no one?
1405 nace.type = ACE4_ACCESS_DENIED_ACE_TYPE;
1406 nace.flag = 0;
1407 nace.access_mask = ACE4_VALID_MASK_BITS;
1408 (void) str_to_utf8(ACE4_WHO_EVERYONE, &nace.who);
1409 } else {
1410 nace.type = ace->type;
1411 nace.flag = ace->flag;
1412 nace.access_mask = ace->access_mask;
1413 (void) utf8_copy(&ace->who, &nace.who);
1416 dp->delegation_type = dsp->rds_dtype;
1418 switch (dsp->rds_dtype) {
1419 case OPEN_DELEGATE_NONE:
1420 break;
1421 case OPEN_DELEGATE_READ:
1422 rp = &dp->open_delegation4_u.read;
1423 rp->stateid = dsp->rds_delegid.stateid;
1424 rp->recall = (bool_t)recall;
1425 rp->permissions = nace;
1426 break;
1427 case OPEN_DELEGATE_WRITE:
1428 wp = &dp->open_delegation4_u.write;
1429 wp->stateid = dsp->rds_delegid.stateid;
1430 wp->recall = (bool_t)recall;
1431 spl = &wp->space_limit;
1432 spl->limitby = NFS_LIMIT_SIZE;
1433 spl->nfs_space_limit4_u.filesize = 0;
1434 wp->permissions = nace;
1435 break;
1440 * Check if the file is delegated via the provided file struct.
1441 * Return TRUE if it is delegated. This is intended for use by
1442 * the v4 server. The v2/v3 server code should use rfs4_check_delegated().
1444 * Note that if the file is found to have a delegation, it is
1445 * recalled, unless the clientid of the caller matches the clientid of the
1446 * delegation. If the caller has specified, there is a slight delay
1447 * inserted in the hopes that the delegation will be returned quickly.
1449 bool_t
1450 rfs4_check_delegated_byfp(int mode, rfs4_file_t *fp,
1451 bool_t trunc, bool_t do_delay, bool_t is_rm, clientid4 *cp)
1453 rfs4_deleg_state_t *dsp;
1455 /* Is delegation enabled? */
1456 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE)
1457 return (FALSE);
1459 /* do we have a delegation on this file? */
1460 rfs4_dbe_lock(fp->rf_dbe);
1461 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
1462 if (is_rm)
1463 fp->rf_dinfo.rd_hold_grant++;
1464 rfs4_dbe_unlock(fp->rf_dbe);
1465 return (FALSE);
1468 * do we have a write delegation on this file or are we
1469 * requesting write access to a file with any type of existing
1470 * delegation?
1472 if (mode == FWRITE || fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) {
1473 if (cp != NULL) {
1474 dsp = list_head(&fp->rf_delegstatelist);
1475 if (dsp == NULL) {
1476 rfs4_dbe_unlock(fp->rf_dbe);
1477 return (FALSE);
1480 * Does the requestor already own the delegation?
1482 if (dsp->rds_client->rc_clientid == *(cp)) {
1483 rfs4_dbe_unlock(fp->rf_dbe);
1484 return (FALSE);
1488 rfs4_dbe_unlock(fp->rf_dbe);
1489 rfs4_recall_deleg(fp, trunc, NULL);
1491 if (!do_delay) {
1492 rfs4_dbe_lock(fp->rf_dbe);
1493 fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec();
1494 rfs4_dbe_unlock(fp->rf_dbe);
1495 return (TRUE);
1498 delay(NFS4_DELEGATION_CONFLICT_DELAY);
1500 rfs4_dbe_lock(fp->rf_dbe);
1501 if (fp->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE) {
1502 fp->rf_dinfo.rd_time_rm_delayed = gethrestime_sec();
1503 rfs4_dbe_unlock(fp->rf_dbe);
1504 return (TRUE);
1507 if (is_rm)
1508 fp->rf_dinfo.rd_hold_grant++;
1509 rfs4_dbe_unlock(fp->rf_dbe);
1510 return (FALSE);
1514 * Check if the file is delegated in the case of a v2 or v3 access.
1515 * Return TRUE if it is delegated which in turn means that v2 should
1516 * drop the request and in the case of v3 JUKEBOX should be returned.
1518 bool_t
1519 rfs4_check_delegated(int mode, vnode_t *vp, bool_t trunc)
1521 rfs4_file_t *fp;
1522 bool_t create = FALSE;
1523 bool_t rc = FALSE;
1525 rfs4_hold_deleg_policy();
1527 /* Is delegation enabled? */
1528 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE) {
1529 fp = rfs4_findfile(vp, NULL, &create);
1530 if (fp != NULL) {
1531 if (rfs4_check_delegated_byfp(mode, fp, trunc,
1532 TRUE, FALSE, NULL)) {
1533 rc = TRUE;
1535 rfs4_file_rele(fp);
1538 rfs4_rele_deleg_policy();
1539 return (rc);
1543 * Release a hold on the hold_grant counter which
1544 * prevents delegation from being granted while a remove
1545 * or a rename is in progress.
1547 void
1548 rfs4_clear_dont_grant(rfs4_file_t *fp)
1550 if (rfs4_deleg_policy == SRV_NEVER_DELEGATE)
1551 return;
1552 rfs4_dbe_lock(fp->rf_dbe);
1553 ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
1554 fp->rf_dinfo.rd_hold_grant--;
1555 fp->rf_dinfo.rd_time_rm_delayed = 0;
1556 rfs4_dbe_unlock(fp->rf_dbe);
1560 * State support for delegation.
1561 * Set the state delegation type for this state;
1562 * This routine is called from open via rfs4_grant_delegation and the entry
1563 * locks on sp and sp->rs_finfo are assumed.
1565 static rfs4_deleg_state_t *
1566 rfs4_deleg_state(rfs4_state_t *sp, open_delegation_type4 dtype, int *recall)
1568 rfs4_file_t *fp = sp->rs_finfo;
1569 bool_t create = TRUE;
1570 rfs4_deleg_state_t *dsp;
1571 vnode_t *vp;
1572 int open_prev = *recall;
1573 int ret;
1574 int fflags = 0;
1576 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
1577 ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1579 /* Shouldn't happen */
1580 if (fp->rf_dinfo.rd_recall_count != 0 ||
1581 (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ &&
1582 dtype != OPEN_DELEGATE_READ)) {
1583 return (NULL);
1586 /* Unlock to avoid deadlock */
1587 rfs4_dbe_unlock(fp->rf_dbe);
1588 rfs4_dbe_unlock(sp->rs_dbe);
1590 dsp = rfs4_finddeleg(sp, &create);
1592 rfs4_dbe_lock(sp->rs_dbe);
1593 rfs4_dbe_lock(fp->rf_dbe);
1595 if (dsp == NULL)
1596 return (NULL);
1599 * It is possible that since we dropped the lock
1600 * in order to call finddeleg, the rfs4_file_t
1601 * was marked such that we should not grant a
1602 * delegation, if so bail out.
1604 if (fp->rf_dinfo.rd_hold_grant > 0) {
1605 rfs4_deleg_state_rele(dsp);
1606 return (NULL);
1609 if (create == FALSE) {
1610 if (sp->rs_owner->ro_client == dsp->rds_client &&
1611 dsp->rds_dtype == dtype) {
1612 return (dsp);
1613 } else {
1614 rfs4_deleg_state_rele(dsp);
1615 return (NULL);
1620 * Check that this file has not been delegated to another
1621 * client
1623 if (fp->rf_dinfo.rd_recall_count != 0 ||
1624 fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE ||
1625 (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ &&
1626 dtype != OPEN_DELEGATE_READ)) {
1627 rfs4_deleg_state_rele(dsp);
1628 return (NULL);
1631 vp = fp->rf_vp;
1632 /* vnevent_support returns 0 if file system supports vnevents */
1633 if (vnevent_support(vp, NULL)) {
1634 rfs4_deleg_state_rele(dsp);
1635 return (NULL);
1638 /* Calculate the fflags for this OPEN. */
1639 if (sp->rs_share_access & OPEN4_SHARE_ACCESS_READ)
1640 fflags |= FREAD;
1641 if (sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE)
1642 fflags |= FWRITE;
1644 *recall = 0;
1646 * Before granting a delegation we need to know if anyone else has
1647 * opened the file in a conflicting mode. However, first we need to
1648 * know how we opened the file to check the counts properly.
1650 if (dtype == OPEN_DELEGATE_READ) {
1651 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1652 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1653 vn_is_mapped(vp, V_WRITE)) {
1654 if (open_prev) {
1655 *recall = 1;
1656 } else {
1657 rfs4_deleg_state_rele(dsp);
1658 return (NULL);
1661 ret = fem_install(vp, deleg_rdops, (void *)fp, OPUNIQ,
1662 rfs4_mon_hold, rfs4_mon_rele);
1663 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1664 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1665 vn_is_mapped(vp, V_WRITE)) {
1666 if (open_prev) {
1667 *recall = 1;
1668 } else {
1669 (void) fem_uninstall(vp, deleg_rdops,
1670 (void *)fp);
1671 rfs4_deleg_state_rele(dsp);
1672 return (NULL);
1676 * Because a client can hold onto a delegation after the
1677 * file has been closed, we need to keep track of the
1678 * access to this file. Otherwise the CIFS server would
1679 * not know about the client accessing the file and could
1680 * inappropriately grant an OPLOCK.
1681 * fem_install() returns EBUSY when asked to install a
1682 * OPUNIQ monitor more than once. Therefore, check the
1683 * return code because we only want this done once.
1685 if (ret == 0)
1686 vn_open_upgrade(vp, FREAD);
1687 } else { /* WRITE */
1688 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1689 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1690 ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) ||
1691 (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) ||
1692 vn_is_mapped(vp, V_RDORWR)) {
1693 if (open_prev) {
1694 *recall = 1;
1695 } else {
1696 rfs4_deleg_state_rele(dsp);
1697 return (NULL);
1700 ret = fem_install(vp, deleg_wrops, (void *)fp, OPUNIQ,
1701 rfs4_mon_hold, rfs4_mon_rele);
1702 if (((fflags & FWRITE) && vn_has_other_opens(vp, V_WRITE)) ||
1703 (((fflags & FWRITE) == 0) && vn_is_opened(vp, V_WRITE)) ||
1704 ((fflags & FREAD) && vn_has_other_opens(vp, V_READ)) ||
1705 (((fflags & FREAD) == 0) && vn_is_opened(vp, V_READ)) ||
1706 vn_is_mapped(vp, V_RDORWR)) {
1707 if (open_prev) {
1708 *recall = 1;
1709 } else {
1710 (void) fem_uninstall(vp, deleg_wrops,
1711 (void *)fp);
1712 rfs4_deleg_state_rele(dsp);
1713 return (NULL);
1717 * Because a client can hold onto a delegation after the
1718 * file has been closed, we need to keep track of the
1719 * access to this file. Otherwise the CIFS server would
1720 * not know about the client accessing the file and could
1721 * inappropriately grant an OPLOCK.
1722 * fem_install() returns EBUSY when asked to install a
1723 * OPUNIQ monitor more than once. Therefore, check the
1724 * return code because we only want this done once.
1726 if (ret == 0)
1727 vn_open_upgrade(vp, FREAD|FWRITE);
1729 /* Place on delegation list for file */
1730 ASSERT(!list_link_active(&dsp->rds_node));
1731 list_insert_tail(&fp->rf_delegstatelist, dsp);
1733 dsp->rds_dtype = fp->rf_dinfo.rd_dtype = dtype;
1735 /* Update delegation stats for this file */
1736 fp->rf_dinfo.rd_time_lastgrant = gethrestime_sec();
1738 /* reset since this is a new delegation */
1739 fp->rf_dinfo.rd_conflicted_client = 0;
1740 fp->rf_dinfo.rd_ever_recalled = FALSE;
1742 if (dtype == OPEN_DELEGATE_READ)
1743 fp->rf_dinfo.rd_rdgrants++;
1744 else
1745 fp->rf_dinfo.rd_wrgrants++;
1747 return (dsp);
1751 * State routine for the server when a delegation is returned.
1753 void
1754 rfs4_return_deleg(rfs4_deleg_state_t *dsp, bool_t revoked)
1756 rfs4_file_t *fp = dsp->rds_finfo;
1757 open_delegation_type4 dtypewas;
1759 rfs4_dbe_lock(fp->rf_dbe);
1761 /* nothing to do if no longer on list */
1762 if (!list_link_active(&dsp->rds_node)) {
1763 rfs4_dbe_unlock(fp->rf_dbe);
1764 return;
1767 /* Remove state from recall list */
1768 list_remove(&fp->rf_delegstatelist, dsp);
1770 if (list_is_empty(&fp->rf_delegstatelist)) {
1771 dtypewas = fp->rf_dinfo.rd_dtype;
1772 fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
1773 rfs4_dbe_cv_broadcast(fp->rf_dbe);
1775 /* if file system was unshared, the vp will be NULL */
1776 if (fp->rf_vp != NULL) {
1778 * Once a delegation is no longer held by any client,
1779 * the monitor is uninstalled. At this point, the
1780 * client must send OPEN otw, so we don't need the
1781 * reference on the vnode anymore. The open
1782 * downgrade removes the reference put on earlier.
1784 if (dtypewas == OPEN_DELEGATE_READ) {
1785 (void) fem_uninstall(fp->rf_vp, deleg_rdops,
1786 (void *)fp);
1787 vn_open_downgrade(fp->rf_vp, FREAD);
1788 } else if (dtypewas == OPEN_DELEGATE_WRITE) {
1789 (void) fem_uninstall(fp->rf_vp, deleg_wrops,
1790 (void *)fp);
1791 vn_open_downgrade(fp->rf_vp, FREAD|FWRITE);
1796 switch (dsp->rds_dtype) {
1797 case OPEN_DELEGATE_READ:
1798 fp->rf_dinfo.rd_rdgrants--;
1799 break;
1800 case OPEN_DELEGATE_WRITE:
1801 fp->rf_dinfo.rd_wrgrants--;
1802 break;
1803 default:
1804 break;
1807 /* used in the policy decision */
1808 fp->rf_dinfo.rd_time_returned = gethrestime_sec();
1811 * reset the time_recalled field so future delegations are not
1812 * accidentally revoked
1814 if ((fp->rf_dinfo.rd_rdgrants + fp->rf_dinfo.rd_wrgrants) == 0)
1815 fp->rf_dinfo.rd_time_recalled = 0;
1817 rfs4_dbe_unlock(fp->rf_dbe);
1819 rfs4_dbe_lock(dsp->rds_dbe);
1821 dsp->rds_dtype = OPEN_DELEGATE_NONE;
1823 if (revoked == TRUE)
1824 dsp->rds_time_revoked = gethrestime_sec();
1826 rfs4_dbe_invalidate(dsp->rds_dbe);
1828 rfs4_dbe_unlock(dsp->rds_dbe);
1830 if (revoked == TRUE) {
1831 rfs4_dbe_lock(dsp->rds_client->rc_dbe);
1832 dsp->rds_client->rc_deleg_revoked++; /* observability */
1833 rfs4_dbe_unlock(dsp->rds_client->rc_dbe);
1837 static void
1838 rfs4_revoke_file(rfs4_file_t *fp)
1840 rfs4_deleg_state_t *dsp;
1843 * The lock for rfs4_file_t must be held when traversing the
1844 * delegation list but that lock needs to be released to call
1845 * rfs4_return_deleg()
1847 rfs4_dbe_lock(fp->rf_dbe);
1848 while (dsp = list_head(&fp->rf_delegstatelist)) {
1849 rfs4_dbe_hold(dsp->rds_dbe);
1850 rfs4_dbe_unlock(fp->rf_dbe);
1851 rfs4_return_deleg(dsp, TRUE);
1852 rfs4_deleg_state_rele(dsp);
1853 rfs4_dbe_lock(fp->rf_dbe);
1855 rfs4_dbe_unlock(fp->rf_dbe);
1859 * A delegation is assumed to be present on the file associated with
1860 * "sp". Check to see if the delegation matches is associated with
1861 * the same client as referenced by "sp". If it is not, TRUE is
1862 * returned. If the delegation DOES match the client (or no
1863 * delegation is present), return FALSE.
1864 * Assume the state entry and file entry are locked.
1866 bool_t
1867 rfs4_is_deleg(rfs4_state_t *sp)
1869 rfs4_deleg_state_t *dsp;
1870 rfs4_file_t *fp = sp->rs_finfo;
1871 rfs4_client_t *cp = sp->rs_owner->ro_client;
1873 ASSERT(rfs4_dbe_islocked(fp->rf_dbe));
1874 for (dsp = list_head(&fp->rf_delegstatelist); dsp != NULL;
1875 dsp = list_next(&fp->rf_delegstatelist, dsp)) {
1876 if (cp != dsp->rds_client) {
1877 return (TRUE);
1880 return (FALSE);
1883 void
1884 rfs4_disable_delegation(void)
1886 mutex_enter(&rfs4_deleg_lock);
1887 rfs4_deleg_disabled++;
1888 mutex_exit(&rfs4_deleg_lock);
1891 void
1892 rfs4_enable_delegation(void)
1894 mutex_enter(&rfs4_deleg_lock);
1895 ASSERT(rfs4_deleg_disabled > 0);
1896 rfs4_deleg_disabled--;
1897 mutex_exit(&rfs4_deleg_lock);
1900 void
1901 rfs4_mon_hold(void *arg)
1903 rfs4_file_t *fp = arg;
1905 rfs4_dbe_hold(fp->rf_dbe);
1908 void
1909 rfs4_mon_rele(void *arg)
1911 rfs4_file_t *fp = arg;
1913 rfs4_dbe_rele_nolock(fp->rf_dbe);