4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
26 #include <sys/systm.h>
28 #include <sys/cmn_err.h>
29 #include <sys/atomic.h>
30 #include <sys/flock.h>
31 #include <nfs/export.h>
34 #include <nfs/nfssys.h>
36 #include <sys/pathname.h>
38 #include <sys/nvpair.h>
40 extern u_longlong_t nfs4_srv_caller_id
;
42 extern time_t rfs4_start_time
;
43 extern uint_t nfs4_srv_vkey
;
47 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
53 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
54 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
55 (char)0xff, (char)0xff, (char)0xff, (char)0xff
60 #define ISSPECIAL(id) (stateid4_cmp(id, &special0) || \
61 stateid4_cmp(id, &special1))
63 /* For embedding the cluster nodeid into our clientid */
64 #define CLUSTER_NODEID_SHIFT 24
65 #define CLUSTER_MAX_NODEID 255
71 static uint32_t rfs4_database_debug
= 0x00;
73 static void rfs4_ss_clid_write(rfs4_client_t
*cp
, char *leaf
);
74 static void rfs4_ss_clid_write_one(rfs4_client_t
*cp
, char *dir
, char *leaf
);
75 static void rfs4_dss_clear_oldstate(rfs4_servinst_t
*sip
);
76 static void rfs4_ss_chkclid_sip(rfs4_client_t
*cp
, rfs4_servinst_t
*sip
);
79 * Couple of simple init/destroy functions for a general waiter
82 rfs4_sw_init(rfs4_state_wait_t
*swp
)
84 mutex_init(swp
->sw_cv_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
85 cv_init(swp
->sw_cv
, NULL
, CV_DEFAULT
, NULL
);
86 swp
->sw_active
= FALSE
;
87 swp
->sw_wait_count
= 0;
91 rfs4_sw_destroy(rfs4_state_wait_t
*swp
)
93 mutex_destroy(swp
->sw_cv_lock
);
94 cv_destroy(swp
->sw_cv
);
98 rfs4_sw_enter(rfs4_state_wait_t
*swp
)
100 mutex_enter(swp
->sw_cv_lock
);
101 while (swp
->sw_active
) {
102 swp
->sw_wait_count
++;
103 cv_wait(swp
->sw_cv
, swp
->sw_cv_lock
);
104 swp
->sw_wait_count
--;
106 ASSERT(swp
->sw_active
== FALSE
);
107 swp
->sw_active
= TRUE
;
108 mutex_exit(swp
->sw_cv_lock
);
112 rfs4_sw_exit(rfs4_state_wait_t
*swp
)
114 mutex_enter(swp
->sw_cv_lock
);
115 ASSERT(swp
->sw_active
== TRUE
);
116 swp
->sw_active
= FALSE
;
117 if (swp
->sw_wait_count
!= 0)
118 cv_broadcast(swp
->sw_cv
);
119 mutex_exit(swp
->sw_cv_lock
);
123 * CPR callback id -- not related to v4 callbacks
125 static callb_id_t cpr_id
= 0;
128 deep_lock_copy(LOCK4res
*dres
, LOCK4res
*sres
)
130 lock_owner4
*slo
= &sres
->LOCK4res_u
.denied
.owner
;
131 lock_owner4
*dlo
= &dres
->LOCK4res_u
.denied
.owner
;
133 if (sres
->status
== NFS4ERR_DENIED
) {
134 dlo
->owner_val
= kmem_alloc(slo
->owner_len
, KM_SLEEP
);
135 bcopy(slo
->owner_val
, dlo
->owner_val
, slo
->owner_len
);
140 deep_lock_free(LOCK4res
*res
)
142 lock_owner4
*lo
= &res
->LOCK4res_u
.denied
.owner
;
144 if (res
->status
== NFS4ERR_DENIED
)
145 kmem_free(lo
->owner_val
, lo
->owner_len
);
149 deep_open_copy(OPEN4res
*dres
, OPEN4res
*sres
)
151 nfsace4
*sacep
, *dacep
;
153 if (sres
->status
!= NFS4_OK
) {
157 dres
->attrset
= sres
->attrset
;
159 switch (sres
->delegation
.delegation_type
) {
160 case OPEN_DELEGATE_NONE
:
162 case OPEN_DELEGATE_READ
:
163 sacep
= &sres
->delegation
.open_delegation4_u
.read
.permissions
;
164 dacep
= &dres
->delegation
.open_delegation4_u
.read
.permissions
;
166 case OPEN_DELEGATE_WRITE
:
167 sacep
= &sres
->delegation
.open_delegation4_u
.write
.permissions
;
168 dacep
= &dres
->delegation
.open_delegation4_u
.write
.permissions
;
171 dacep
->who
.utf8string_val
=
172 kmem_alloc(sacep
->who
.utf8string_len
, KM_SLEEP
);
173 bcopy(sacep
->who
.utf8string_val
, dacep
->who
.utf8string_val
,
174 sacep
->who
.utf8string_len
);
178 deep_open_free(OPEN4res
*res
)
181 if (res
->status
!= NFS4_OK
)
184 switch (res
->delegation
.delegation_type
) {
185 case OPEN_DELEGATE_NONE
:
187 case OPEN_DELEGATE_READ
:
188 acep
= &res
->delegation
.open_delegation4_u
.read
.permissions
;
190 case OPEN_DELEGATE_WRITE
:
191 acep
= &res
->delegation
.open_delegation4_u
.write
.permissions
;
195 if (acep
->who
.utf8string_val
) {
196 kmem_free(acep
->who
.utf8string_val
, acep
->who
.utf8string_len
);
197 acep
->who
.utf8string_val
= NULL
;
202 rfs4_free_reply(nfs_resop4
*rp
)
206 deep_lock_free(&rp
->nfs_resop4_u
.oplock
);
209 deep_open_free(&rp
->nfs_resop4_u
.opopen
);
216 rfs4_copy_reply(nfs_resop4
*dst
, nfs_resop4
*src
)
220 /* Handle responses that need deep copy */
221 switch (src
->resop
) {
223 deep_lock_copy(&dst
->nfs_resop4_u
.oplock
,
224 &src
->nfs_resop4_u
.oplock
);
227 deep_open_copy(&dst
->nfs_resop4_u
.opopen
,
228 &src
->nfs_resop4_u
.opopen
);
236 * This is the implementation of the underlying state engine. The
237 * public interface to this engine is described by
238 * nfs4_state.h. Callers to the engine should hold no state engine
239 * locks when they call in to it. If the protocol needs to lock data
240 * structures it should do so after acquiring all references to them
241 * first and then follow the following lock order:
243 * client > openowner > state > lo_state > lockowner > file.
245 * Internally we only allow a thread to hold one hash bucket lock at a
246 * time and the lock is higher in the lock order (must be acquired
247 * first) than the data structure that is on that hash list.
249 * If a new reference was acquired by the caller, that reference needs
250 * to be released after releasing all acquired locks with the
251 * corresponding rfs4_*_rele routine.
255 * This code is some what prototypical for now. Its purpose currently is to
256 * implement the interfaces sufficiently to finish the higher protocol
257 * elements. This will be replaced by a dynamically resizeable tables
258 * backed by kmem_cache allocator. However synchronization is handled
259 * correctly (I hope) and will not change by much. The mutexes for
260 * the hash buckets that can be used to create new instances of data
261 * structures might be good candidates to evolve into reader writer
262 * locks. If it has to do a creation, it would be holding the
263 * mutex across a kmem_alloc with KM_SLEEP specified.
272 #define ADDRHASH(key) ((unsigned long)(key) >> 3)
274 /* Used to serialize create/destroy of rfs4_server_state database */
275 kmutex_t rfs4_state_lock
;
276 static rfs4_database_t
*rfs4_server_state
= NULL
;
278 /* Used to serialize lookups of clientids */
279 static krwlock_t rfs4_findclient_lock
;
282 * For now this "table" is exposed so that the CPR callback
283 * function can tromp through it..
285 rfs4_table_t
*rfs4_client_tab
;
287 static rfs4_index_t
*rfs4_clientid_idx
;
288 static rfs4_index_t
*rfs4_nfsclnt_idx
;
289 static rfs4_table_t
*rfs4_clntip_tab
;
290 static rfs4_index_t
*rfs4_clntip_idx
;
291 static rfs4_table_t
*rfs4_openowner_tab
;
292 static rfs4_index_t
*rfs4_openowner_idx
;
293 static rfs4_table_t
*rfs4_state_tab
;
294 static rfs4_index_t
*rfs4_state_idx
;
295 static rfs4_index_t
*rfs4_state_owner_file_idx
;
296 static rfs4_index_t
*rfs4_state_file_idx
;
297 static rfs4_table_t
*rfs4_lo_state_tab
;
298 static rfs4_index_t
*rfs4_lo_state_idx
;
299 static rfs4_index_t
*rfs4_lo_state_owner_idx
;
300 static rfs4_table_t
*rfs4_lockowner_tab
;
301 static rfs4_index_t
*rfs4_lockowner_idx
;
302 static rfs4_index_t
*rfs4_lockowner_pid_idx
;
303 static rfs4_table_t
*rfs4_file_tab
;
304 static rfs4_index_t
*rfs4_file_idx
;
305 static rfs4_table_t
*rfs4_deleg_state_tab
;
306 static rfs4_index_t
*rfs4_deleg_idx
;
307 static rfs4_index_t
*rfs4_deleg_state_idx
;
309 #define MAXTABSZ 1024*1024
311 /* The values below are rfs4_lease_time units */
314 #define CLIENT_CACHE_TIME 1
315 #define OPENOWNER_CACHE_TIME 1
316 #define STATE_CACHE_TIME 1
317 #define LO_STATE_CACHE_TIME 1
318 #define LOCKOWNER_CACHE_TIME 1
319 #define FILE_CACHE_TIME 3
320 #define DELEG_STATE_CACHE_TIME 1
322 #define CLIENT_CACHE_TIME 10
323 #define OPENOWNER_CACHE_TIME 5
324 #define STATE_CACHE_TIME 1
325 #define LO_STATE_CACHE_TIME 1
326 #define LOCKOWNER_CACHE_TIME 3
327 #define FILE_CACHE_TIME 40
328 #define DELEG_STATE_CACHE_TIME 1
332 static time_t rfs4_client_cache_time
= 0;
333 static time_t rfs4_clntip_cache_time
= 0;
334 static time_t rfs4_openowner_cache_time
= 0;
335 static time_t rfs4_state_cache_time
= 0;
336 static time_t rfs4_lo_state_cache_time
= 0;
337 static time_t rfs4_lockowner_cache_time
= 0;
338 static time_t rfs4_file_cache_time
= 0;
339 static time_t rfs4_deleg_state_cache_time
= 0;
341 static bool_t
rfs4_client_create(rfs4_entry_t
, void *);
342 static void rfs4_dss_remove_cpleaf(rfs4_client_t
*);
343 static void rfs4_dss_remove_leaf(rfs4_servinst_t
*, char *, char *);
344 static void rfs4_client_destroy(rfs4_entry_t
);
345 static bool_t
rfs4_client_expiry(rfs4_entry_t
);
346 static uint32_t clientid_hash(void *);
347 static bool_t
clientid_compare(rfs4_entry_t
, void *);
348 static void *clientid_mkkey(rfs4_entry_t
);
349 static uint32_t nfsclnt_hash(void *);
350 static bool_t
nfsclnt_compare(rfs4_entry_t
, void *);
351 static void *nfsclnt_mkkey(rfs4_entry_t
);
352 static bool_t
rfs4_clntip_expiry(rfs4_entry_t
);
353 static void rfs4_clntip_destroy(rfs4_entry_t
);
354 static bool_t
rfs4_clntip_create(rfs4_entry_t
, void *);
355 static uint32_t clntip_hash(void *);
356 static bool_t
clntip_compare(rfs4_entry_t
, void *);
357 static void *clntip_mkkey(rfs4_entry_t
);
358 static bool_t
rfs4_openowner_create(rfs4_entry_t
, void *);
359 static void rfs4_openowner_destroy(rfs4_entry_t
);
360 static bool_t
rfs4_openowner_expiry(rfs4_entry_t
);
361 static uint32_t openowner_hash(void *);
362 static bool_t
openowner_compare(rfs4_entry_t
, void *);
363 static void *openowner_mkkey(rfs4_entry_t
);
364 static bool_t
rfs4_state_create(rfs4_entry_t
, void *);
365 static void rfs4_state_destroy(rfs4_entry_t
);
366 static bool_t
rfs4_state_expiry(rfs4_entry_t
);
367 static uint32_t state_hash(void *);
368 static bool_t
state_compare(rfs4_entry_t
, void *);
369 static void *state_mkkey(rfs4_entry_t
);
370 static uint32_t state_owner_file_hash(void *);
371 static bool_t
state_owner_file_compare(rfs4_entry_t
, void *);
372 static void *state_owner_file_mkkey(rfs4_entry_t
);
373 static uint32_t state_file_hash(void *);
374 static bool_t
state_file_compare(rfs4_entry_t
, void *);
375 static void *state_file_mkkey(rfs4_entry_t
);
376 static bool_t
rfs4_lo_state_create(rfs4_entry_t
, void *);
377 static void rfs4_lo_state_destroy(rfs4_entry_t
);
378 static bool_t
rfs4_lo_state_expiry(rfs4_entry_t
);
379 static uint32_t lo_state_hash(void *);
380 static bool_t
lo_state_compare(rfs4_entry_t
, void *);
381 static void *lo_state_mkkey(rfs4_entry_t
);
382 static uint32_t lo_state_lo_hash(void *);
383 static bool_t
lo_state_lo_compare(rfs4_entry_t
, void *);
384 static void *lo_state_lo_mkkey(rfs4_entry_t
);
385 static bool_t
rfs4_lockowner_create(rfs4_entry_t
, void *);
386 static void rfs4_lockowner_destroy(rfs4_entry_t
);
387 static bool_t
rfs4_lockowner_expiry(rfs4_entry_t
);
388 static uint32_t lockowner_hash(void *);
389 static bool_t
lockowner_compare(rfs4_entry_t
, void *);
390 static void *lockowner_mkkey(rfs4_entry_t
);
391 static uint32_t pid_hash(void *);
392 static bool_t
pid_compare(rfs4_entry_t
, void *);
393 static void *pid_mkkey(rfs4_entry_t
);
394 static bool_t
rfs4_file_create(rfs4_entry_t
, void *);
395 static void rfs4_file_destroy(rfs4_entry_t
);
396 static uint32_t file_hash(void *);
397 static bool_t
file_compare(rfs4_entry_t
, void *);
398 static void *file_mkkey(rfs4_entry_t
);
399 static bool_t
rfs4_deleg_state_create(rfs4_entry_t
, void *);
400 static void rfs4_deleg_state_destroy(rfs4_entry_t
);
401 static bool_t
rfs4_deleg_state_expiry(rfs4_entry_t
);
402 static uint32_t deleg_hash(void *);
403 static bool_t
deleg_compare(rfs4_entry_t
, void *);
404 static void *deleg_mkkey(rfs4_entry_t
);
405 static uint32_t deleg_state_hash(void *);
406 static bool_t
deleg_state_compare(rfs4_entry_t
, void *);
407 static void *deleg_state_mkkey(rfs4_entry_t
);
409 static void rfs4_state_rele_nounlock(rfs4_state_t
*);
411 static int rfs4_ss_enabled
= 0;
413 extern void (*rfs4_client_clrst
)(struct nfs4clrst_args
*);
416 rfs4_ss_pnfree(rfs4_ss_pn_t
*ss_pn
)
418 kmem_free(ss_pn
, sizeof (rfs4_ss_pn_t
));
421 static rfs4_ss_pn_t
*
422 rfs4_ss_pnalloc(char *dir
, char *leaf
)
425 int dir_len
, leaf_len
;
428 * validate we have a resonable path
429 * (account for the '/' and trailing null)
431 if ((dir_len
= strlen(dir
)) > MAXPATHLEN
||
432 (leaf_len
= strlen(leaf
)) > MAXNAMELEN
||
433 (dir_len
+ leaf_len
+ 2) > MAXPATHLEN
) {
437 ss_pn
= kmem_alloc(sizeof (rfs4_ss_pn_t
), KM_SLEEP
);
439 (void) snprintf(ss_pn
->pn
, MAXPATHLEN
, "%s/%s", dir
, leaf
);
440 /* Handy pointer to just the leaf name */
441 ss_pn
->leaf
= ss_pn
->pn
+ dir_len
+ 1;
447 * Move the "leaf" filename from "sdir" directory
448 * to the "ddir" directory. Return the pathname of
449 * the destination unless the rename fails in which
450 * case we need to return the source pathname.
452 static rfs4_ss_pn_t
*
453 rfs4_ss_movestate(char *sdir
, char *ddir
, char *leaf
)
455 rfs4_ss_pn_t
*src
, *dst
;
457 if ((src
= rfs4_ss_pnalloc(sdir
, leaf
)) == NULL
)
460 if ((dst
= rfs4_ss_pnalloc(ddir
, leaf
)) == NULL
) {
466 * If the rename fails we shall return the src
467 * pathname and free the dst. Otherwise we need
468 * to free the src and return the dst pathanme.
470 if (vn_rename(src
->pn
, dst
->pn
, UIO_SYSSPACE
)) {
479 static rfs4_oldstate_t
*
480 rfs4_ss_getstate(vnode_t
*dvp
, rfs4_ss_pn_t
*ss_pn
)
485 rfs4_oldstate_t
*cl_ss
= NULL
;
489 int err
, kill_file
, file_vers
;
495 * open the state file.
497 if (vn_open(ss_pn
->pn
, UIO_SYSSPACE
, FREAD
, 0, &vp
, 0, 0) != 0) {
501 if (vp
->v_type
!= VREG
) {
502 (void) fop_close(vp
, FREAD
, 1, 0, CRED(), NULL
);
507 err
= fop_access(vp
, VREAD
, 0, CRED(), NULL
);
510 * We don't have read access? better get the heck out.
512 (void) fop_close(vp
, FREAD
, 1, 0, CRED(), NULL
);
517 (void) fop_rwlock(vp
, V_WRITELOCK_FALSE
, NULL
);
519 * get the file size to do some basic validation
521 va
.va_mask
= VATTR_SIZE
;
522 err
= fop_getattr(vp
, &va
, 0, CRED(), NULL
);
524 kill_file
= (va
.va_size
== 0 || va
.va_size
<
525 (NFS4_VERIFIER_SIZE
+ sizeof (uint_t
)+1));
527 if (err
|| kill_file
) {
528 fop_rwunlock(vp
, V_WRITELOCK_FALSE
, NULL
);
529 (void) fop_close(vp
, FREAD
, 1, 0, CRED(), NULL
);
532 (void) fop_remove(dvp
, ss_pn
->leaf
, CRED(), NULL
, 0);
537 cl_ss
= kmem_alloc(sizeof (rfs4_oldstate_t
), KM_SLEEP
);
540 * build iovecs to read in the file_version, verifier and id_len
542 iov
[0].iov_base
= (caddr_t
)&file_vers
;
543 iov
[0].iov_len
= sizeof (int);
544 iov
[1].iov_base
= (caddr_t
)&cl_ss
->cl_id4
.verifier
;
545 iov
[1].iov_len
= NFS4_VERIFIER_SIZE
;
546 iov
[2].iov_base
= (caddr_t
)&id_len
;
547 iov
[2].iov_len
= sizeof (uint_t
);
551 uio
.uio_segflg
= UIO_SYSSPACE
;
553 uio
.uio_resid
= sizeof (int) + NFS4_VERIFIER_SIZE
+ sizeof (uint_t
);
555 if (err
= fop_read(vp
, &uio
, FREAD
, CRED(), NULL
)) {
556 fop_rwunlock(vp
, V_WRITELOCK_FALSE
, NULL
);
557 (void) fop_close(vp
, FREAD
, 1, 0, CRED(), NULL
);
559 kmem_free(cl_ss
, sizeof (rfs4_oldstate_t
));
564 * if the file_version doesn't match or if the
565 * id_len is zero or the combination of the verifier,
566 * id_len and id_val is bigger than the file we have
567 * a problem. If so ditch the file.
569 kill_file
= (file_vers
!= NFS4_SS_VERSION
|| id_len
== 0 ||
570 (id_len
+ NFS4_VERIFIER_SIZE
+ sizeof (uint_t
)) > va
.va_size
);
572 if (err
|| kill_file
) {
573 fop_rwunlock(vp
, V_WRITELOCK_FALSE
, NULL
);
574 (void) fop_close(vp
, FREAD
, 1, 0, CRED(), NULL
);
576 kmem_free(cl_ss
, sizeof (rfs4_oldstate_t
));
578 (void) fop_remove(dvp
, ss_pn
->leaf
, CRED(), NULL
, 0);
584 * now get the client id value
586 cl_ss
->cl_id4
.id_val
= kmem_alloc(id_len
, KM_SLEEP
);
587 iov
[0].iov_base
= cl_ss
->cl_id4
.id_val
;
588 iov
[0].iov_len
= id_len
;
592 uio
.uio_segflg
= UIO_SYSSPACE
;
593 uio
.uio_resid
= cl_ss
->cl_id4
.id_len
= id_len
;
595 if (err
= fop_read(vp
, &uio
, FREAD
, CRED(), NULL
)) {
596 fop_rwunlock(vp
, V_WRITELOCK_FALSE
, NULL
);
597 (void) fop_close(vp
, FREAD
, 1, 0, CRED(), NULL
);
599 kmem_free(cl_ss
->cl_id4
.id_val
, id_len
);
600 kmem_free(cl_ss
, sizeof (rfs4_oldstate_t
));
604 fop_rwunlock(vp
, V_WRITELOCK_FALSE
, NULL
);
605 (void) fop_close(vp
, FREAD
, 1, 0, CRED(), NULL
);
613 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
616 * Add entries from statedir to supplied oldstate list.
617 * Optionally, move all entries from statedir -> destdir.
620 rfs4_ss_oldstate(rfs4_oldstate_t
*oldstate
, char *statedir
, char *destdir
)
623 rfs4_oldstate_t
*cl_ss
= NULL
;
625 int err
, dir_eof
= 0, size
= 0;
629 struct dirent64
*dep
;
630 offset_t dirchunk_offset
= 0;
633 * open the state directory
635 if (vn_open(statedir
, UIO_SYSSPACE
, FREAD
, 0, &dvp
, 0, 0))
638 if (dvp
->v_type
!= VDIR
|| fop_access(dvp
, VREAD
, 0, CRED(), NULL
))
641 dirt
= kmem_alloc(RFS4_SS_DIRSIZE
, KM_SLEEP
);
644 * Get and process the directory entries
647 (void) fop_rwlock(dvp
, V_WRITELOCK_FALSE
, NULL
);
649 iov
.iov_len
= RFS4_SS_DIRSIZE
;
652 uio
.uio_segflg
= UIO_SYSSPACE
;
653 uio
.uio_loffset
= dirchunk_offset
;
654 uio
.uio_resid
= RFS4_SS_DIRSIZE
;
656 err
= fop_readdir(dvp
, &uio
, CRED(), &dir_eof
, NULL
, 0);
657 fop_rwunlock(dvp
, V_WRITELOCK_FALSE
, NULL
);
661 size
= RFS4_SS_DIRSIZE
- uio
.uio_resid
;
664 * Process all the directory entries in this
667 for (dep
= (struct dirent64
*)dirt
; size
> 0;
670 size
-= dep
->d_reclen
;
671 dirchunk_offset
= dep
->d_off
;
676 if (NFS_IS_DOTNAME(dep
->d_name
))
679 ss_pn
= rfs4_ss_pnalloc(statedir
, dep
->d_name
);
683 if (cl_ss
= rfs4_ss_getstate(dvp
, ss_pn
)) {
684 if (destdir
!= NULL
) {
685 rfs4_ss_pnfree(ss_pn
);
686 cl_ss
->ss_pn
= rfs4_ss_movestate(
687 statedir
, destdir
, dep
->d_name
);
689 cl_ss
->ss_pn
= ss_pn
;
691 insque(cl_ss
, oldstate
);
693 rfs4_ss_pnfree(ss_pn
);
699 (void) fop_close(dvp
, FREAD
, 1, 0, CRED(), NULL
);
702 kmem_free((caddr_t
)dirt
, RFS4_SS_DIRSIZE
);
709 char *default_dss_path
= NFS4_DSS_VAR_DIR
;
711 /* read the default stable storage state */
712 rfs4_dss_readstate(npaths
, &default_dss_path
);
720 rfs4_servinst_t
*sip
;
722 mutex_enter(&rfs4_servinst_lock
);
723 sip
= rfs4_cur_servinst
;
724 while (sip
!= NULL
) {
725 rfs4_dss_clear_oldstate(sip
);
728 mutex_exit(&rfs4_servinst_lock
);
732 * Remove all oldstate files referenced by this servinst.
735 rfs4_dss_clear_oldstate(rfs4_servinst_t
*sip
)
737 rfs4_oldstate_t
*os_head
, *osp
;
739 rw_enter(&sip
->oldstate_lock
, RW_WRITER
);
740 os_head
= sip
->oldstate
;
742 if (os_head
== NULL
) {
743 rw_exit(&sip
->oldstate_lock
);
747 /* skip dummy entry */
749 while (osp
!= os_head
) {
750 char *leaf
= osp
->ss_pn
->leaf
;
751 rfs4_oldstate_t
*os_next
;
753 rfs4_dss_remove_leaf(sip
, NFS4_DSS_OLDSTATE_LEAF
, leaf
);
755 if (osp
->cl_id4
.id_val
)
756 kmem_free(osp
->cl_id4
.id_val
, osp
->cl_id4
.id_len
);
757 rfs4_ss_pnfree(osp
->ss_pn
);
761 kmem_free(osp
, sizeof (rfs4_oldstate_t
));
765 rw_exit(&sip
->oldstate_lock
);
769 * Form the state and oldstate paths, and read in the stable storage files.
772 rfs4_dss_readstate(int npaths
, char **paths
)
775 char *state
, *oldstate
;
777 state
= kmem_alloc(MAXPATHLEN
, KM_SLEEP
);
778 oldstate
= kmem_alloc(MAXPATHLEN
, KM_SLEEP
);
780 for (i
= 0; i
< npaths
; i
++) {
781 char *path
= paths
[i
];
783 (void) sprintf(state
, "%s/%s", path
, NFS4_DSS_STATE_LEAF
);
784 (void) sprintf(oldstate
, "%s/%s", path
, NFS4_DSS_OLDSTATE_LEAF
);
787 * Populate the current server instance's oldstate list.
789 * 1. Read stable storage data from old state directory,
790 * leaving its contents alone.
792 * 2. Read stable storage data from state directory,
793 * and move the latter's contents to old state
796 rfs4_ss_oldstate(rfs4_cur_servinst
->oldstate
, oldstate
, NULL
);
797 rfs4_ss_oldstate(rfs4_cur_servinst
->oldstate
, state
, oldstate
);
800 kmem_free(state
, MAXPATHLEN
);
801 kmem_free(oldstate
, MAXPATHLEN
);
806 * Check if we are still in grace and if the client can be
807 * granted permission to perform reclaims.
810 rfs4_ss_chkclid(rfs4_client_t
*cp
)
812 rfs4_servinst_t
*sip
;
815 * It should be sufficient to check the oldstate data for just
816 * this client's instance. However, since our per-instance
817 * client grouping is solely temporal, HA-NFSv4 RG failover
818 * might result in clients of the same RG being partitioned into
819 * separate instances.
821 * Until the client grouping is improved, we must check the
822 * oldstate data for all instances with an active grace period.
824 * This also serves as the mechanism to remove stale oldstate data.
825 * The first time we check an instance after its grace period has
826 * expired, the oldstate data should be cleared.
828 * Start at the current instance, and walk the list backwards
831 mutex_enter(&rfs4_servinst_lock
);
832 for (sip
= rfs4_cur_servinst
; sip
!= NULL
; sip
= sip
->prev
) {
833 rfs4_ss_chkclid_sip(cp
, sip
);
835 /* if the above check found this client, we're done */
836 if (cp
->rc_can_reclaim
)
839 mutex_exit(&rfs4_servinst_lock
);
843 rfs4_ss_chkclid_sip(rfs4_client_t
*cp
, rfs4_servinst_t
*sip
)
845 rfs4_oldstate_t
*osp
, *os_head
;
847 /* short circuit everything if this server instance has no oldstate */
848 rw_enter(&sip
->oldstate_lock
, RW_READER
);
849 os_head
= sip
->oldstate
;
850 rw_exit(&sip
->oldstate_lock
);
855 * If this server instance is no longer in a grace period then
856 * the client won't be able to reclaim. No further need for this
857 * instance's oldstate data, so it can be cleared.
859 if (!rfs4_servinst_in_grace(sip
))
862 /* this instance is still in grace; search for the clientid */
864 rw_enter(&sip
->oldstate_lock
, RW_READER
);
866 os_head
= sip
->oldstate
;
867 /* skip dummy entry */
869 while (osp
!= os_head
) {
870 if (osp
->cl_id4
.id_len
== cp
->rc_nfs_client
.id_len
) {
871 if (bcmp(osp
->cl_id4
.id_val
, cp
->rc_nfs_client
.id_val
,
872 osp
->cl_id4
.id_len
) == 0) {
873 cp
->rc_can_reclaim
= 1;
880 rw_exit(&sip
->oldstate_lock
);
884 * Place client information into stable storage: 1/3.
885 * First, generate the leaf filename, from the client's IP address and
886 * the server-generated short-hand clientid.
889 rfs4_ss_clid(rfs4_client_t
*cp
)
891 const char *kinet_ntop6(uchar_t
*, char *, size_t);
892 char leaf
[MAXNAMELEN
], buf
[INET6_ADDRSTRLEN
];
896 if (rfs4_ss_enabled
== 0) {
902 ca
= (struct sockaddr
*)&cp
->rc_addr
;
905 * Convert the caller's IP address to a dotted string
907 if (ca
->sa_family
== AF_INET
) {
908 b
= (uchar_t
*)&((struct sockaddr_in
*)ca
)->sin_addr
;
909 (void) sprintf(buf
, "%03d.%03d.%03d.%03d", b
[0] & 0xFF,
910 b
[1] & 0xFF, b
[2] & 0xFF, b
[3] & 0xFF);
911 } else if (ca
->sa_family
== AF_INET6
) {
912 struct sockaddr_in6
*sin6
;
914 sin6
= (struct sockaddr_in6
*)ca
;
915 (void) kinet_ntop6((uchar_t
*)&sin6
->sin6_addr
,
916 buf
, INET6_ADDRSTRLEN
);
919 (void) snprintf(leaf
, MAXNAMELEN
, "%s-%llx", buf
,
920 (longlong_t
)cp
->rc_clientid
);
921 rfs4_ss_clid_write(cp
, leaf
);
925 * Place client information into stable storage: 2/3.
926 * DSS: distributed stable storage: the file may need to be written to
927 * multiple directories.
930 rfs4_ss_clid_write(rfs4_client_t
*cp
, char *leaf
)
932 rfs4_servinst_t
*sip
;
935 * It should be sufficient to write the leaf file to (all) DSS paths
936 * associated with just this client's instance. However, since our
937 * per-instance client grouping is solely temporal, HA-NFSv4 RG
938 * failover might result in us losing DSS data.
940 * Until the client grouping is improved, we must write the DSS data
941 * to all instances' paths. Start at the current instance, and
942 * walk the list backwards to the first.
944 mutex_enter(&rfs4_servinst_lock
);
945 for (sip
= rfs4_cur_servinst
; sip
!= NULL
; sip
= sip
->prev
) {
946 int i
, npaths
= sip
->dss_npaths
;
948 /* write the leaf file to all DSS paths */
949 for (i
= 0; i
< npaths
; i
++) {
950 rfs4_dss_path_t
*dss_path
= sip
->dss_paths
[i
];
952 /* HA-NFSv4 path might have been failed-away from us */
953 if (dss_path
== NULL
)
956 rfs4_ss_clid_write_one(cp
, dss_path
->path
, leaf
);
959 mutex_exit(&rfs4_servinst_lock
);
963 * Place client information into stable storage: 3/3.
964 * Write the stable storage data to the requested file.
967 rfs4_ss_clid_write_one(rfs4_client_t
*cp
, char *dss_path
, char *leaf
)
970 int file_vers
= NFS4_SS_VERSION
;
977 nfs_client_id4
*cl_id4
= &(cp
->rc_nfs_client
);
979 /* allow 2 extra bytes for '/' & NUL */
980 dirlen
= strlen(dss_path
) + strlen(NFS4_DSS_STATE_LEAF
) + 2;
981 dir
= kmem_alloc(dirlen
, KM_SLEEP
);
982 (void) sprintf(dir
, "%s/%s", dss_path
, NFS4_DSS_STATE_LEAF
);
984 ss_pn
= rfs4_ss_pnalloc(dir
, leaf
);
985 /* rfs4_ss_pnalloc takes its own copy */
986 kmem_free(dir
, dirlen
);
990 if (vn_open(ss_pn
->pn
, UIO_SYSSPACE
, FCREAT
|FWRITE
, 0600, &vp
,
992 rfs4_ss_pnfree(ss_pn
);
997 * We need to record leaf - i.e. the filename - so that we know
998 * what to remove, in the future. However, the dir part of cp->ss_pn
999 * should never be referenced directly, since it's potentially only
1000 * one of several paths with this leaf in it.
1002 if (cp
->rc_ss_pn
!= NULL
) {
1003 if (strcmp(cp
->rc_ss_pn
->leaf
, leaf
) == 0) {
1004 /* we've already recorded *this* leaf */
1005 rfs4_ss_pnfree(ss_pn
);
1007 /* replace with this leaf */
1008 rfs4_ss_pnfree(cp
->rc_ss_pn
);
1009 cp
->rc_ss_pn
= ss_pn
;
1012 cp
->rc_ss_pn
= ss_pn
;
1016 * Build a scatter list that points to the nfs_client_id4
1018 iov
[0].iov_base
= (caddr_t
)&file_vers
;
1019 iov
[0].iov_len
= sizeof (int);
1020 iov
[1].iov_base
= (caddr_t
)&(cl_id4
->verifier
);
1021 iov
[1].iov_len
= NFS4_VERIFIER_SIZE
;
1022 iov
[2].iov_base
= (caddr_t
)&(cl_id4
->id_len
);
1023 iov
[2].iov_len
= sizeof (uint_t
);
1024 iov
[3].iov_base
= (caddr_t
)cl_id4
->id_val
;
1025 iov
[3].iov_len
= cl_id4
->id_len
;
1029 uio
.uio_loffset
= 0;
1030 uio
.uio_segflg
= UIO_SYSSPACE
;
1031 uio
.uio_llimit
= (rlim64_t
)MAXOFFSET_T
;
1032 uio
.uio_resid
= cl_id4
->id_len
+ sizeof (int) +
1033 NFS4_VERIFIER_SIZE
+ sizeof (uint_t
);
1035 ioflag
= uio
.uio_fmode
= (FWRITE
|FSYNC
);
1036 uio
.uio_extflg
= UIO_COPY_DEFAULT
;
1038 (void) fop_rwlock(vp
, V_WRITELOCK_TRUE
, NULL
);
1039 /* write the full client id to the file. */
1040 (void) fop_write(vp
, &uio
, ioflag
, CRED(), NULL
);
1041 fop_rwunlock(vp
, V_WRITELOCK_TRUE
, NULL
);
1043 (void) fop_close(vp
, FWRITE
, 1, 0, CRED(), NULL
);
1048 * DSS: distributed stable storage.
1049 * Unpack the list of paths passed by nfsd.
1050 * Use nvlist_alloc(9F) to manage the data.
1051 * The caller is responsible for allocating and freeing the buffer.
1054 rfs4_dss_setpaths(char *buf
, size_t buflen
)
1059 * If this is a "warm start", i.e. we previously had DSS paths,
1060 * preserve the old paths.
1062 if (rfs4_dss_paths
!= NULL
) {
1064 * Before we lose the ptr, destroy the nvlist and pathnames
1065 * array from the warm start before this one.
1067 nvlist_free(rfs4_dss_oldpaths
);
1068 rfs4_dss_oldpaths
= rfs4_dss_paths
;
1071 /* unpack the buffer into a searchable nvlist */
1072 error
= nvlist_unpack(buf
, buflen
, &rfs4_dss_paths
, KM_SLEEP
);
1077 * Search the nvlist for the pathnames nvpair (which is the only nvpair
1078 * in the list, and record its location.
1080 error
= nvlist_lookup_string_array(rfs4_dss_paths
, NFS4_DSS_NVPAIR_NAME
,
1081 &rfs4_dss_newpaths
, &rfs4_dss_numnewpaths
);
1086 * Ultimately the nfssys() call NFS4_CLR_STATE endsup here
1087 * to find and mark the client for forced expire.
1090 rfs4_client_scrub(rfs4_entry_t ent
, void *arg
)
1092 rfs4_client_t
*cp
= (rfs4_client_t
*)ent
;
1093 struct nfs4clrst_args
*clr
= arg
;
1094 struct sockaddr_in6
*ent_sin6
;
1095 struct in6_addr clr_in6
;
1096 struct sockaddr_in
*ent_sin
;
1097 struct in_addr clr_in
;
1099 if (clr
->addr_type
!= cp
->rc_addr
.ss_family
) {
1103 switch (clr
->addr_type
) {
1106 /* copyin the address from user space */
1107 if (copyin(clr
->ap
, &clr_in6
, sizeof (clr_in6
))) {
1111 ent_sin6
= (struct sockaddr_in6
*)&cp
->rc_addr
;
1114 * now compare, and if equivalent mark entry
1115 * for forced expiration
1117 if (IN6_ARE_ADDR_EQUAL(&ent_sin6
->sin6_addr
, &clr_in6
)) {
1118 cp
->rc_forced_expire
= 1;
1123 /* copyin the address from user space */
1124 if (copyin(clr
->ap
, &clr_in
, sizeof (clr_in
))) {
1128 ent_sin
= (struct sockaddr_in
*)&cp
->rc_addr
;
1131 * now compare, and if equivalent mark entry
1132 * for forced expiration
1134 if (ent_sin
->sin_addr
.s_addr
== clr_in
.s_addr
) {
1135 cp
->rc_forced_expire
= 1;
1140 /* force this assert to fail */
1141 ASSERT(clr
->addr_type
!= clr
->addr_type
);
1146 * This is called from nfssys() in order to clear server state
1147 * for the specified client IP Address.
1150 rfs4_clear_client_state(struct nfs4clrst_args
*clr
)
1152 (void) rfs4_dbe_walk(rfs4_client_tab
, rfs4_client_scrub
, clr
);
1156 * Used to initialize the NFSv4 server's state or database. All of
1157 * the tables are created and timers are set. Only called when NFSv4
1158 * service is provided.
1164 extern boolean_t
rfs4_cpr_callb(void *, int);
1165 char *dss_path
= NFS4_DSS_VAR_DIR
;
1168 mutex_enter(&rfs4_state_lock
);
1171 * If the server state database has already been initialized,
1174 if (rfs4_server_state
!= NULL
) {
1175 mutex_exit(&rfs4_state_lock
);
1179 rw_init(&rfs4_findclient_lock
, NULL
, RW_DEFAULT
, NULL
);
1182 * Set the boot time. If the server
1183 * has been restarted quickly and has had the opportunity to
1184 * service clients, then the start_time needs to be bumped
1185 * regardless. A small window but it exists...
1187 start_time
= gethrestime_sec();
1188 if (rfs4_start_time
< start_time
)
1189 rfs4_start_time
= start_time
;
1193 /* DSS: distributed stable storage: initialise served paths list */
1194 rfs4_dss_pathlist
= NULL
;
1197 * Create the first server instance, or a new one if the server has
1198 * been restarted; see above comments on rfs4_start_time. Don't
1199 * start its grace period; that will be done later, to maximise the
1200 * clients' recovery window.
1203 rfs4_servinst_create(start_grace
, 1, &dss_path
);
1205 /* reset the "first NFSv4 request" status */
1206 rfs4_seen_first_compound
= 0;
1209 * Add a CPR callback so that we can update client
1210 * access times to extend the lease after a suspend
1211 * and resume (using the same class as rpcmod/connmgr)
1213 cpr_id
= callb_add(rfs4_cpr_callb
, 0, CB_CL_CPR_RPC
, "rfs4");
1215 /* set the various cache timers for table creation */
1216 if (rfs4_client_cache_time
== 0)
1217 rfs4_client_cache_time
= CLIENT_CACHE_TIME
;
1218 if (rfs4_openowner_cache_time
== 0)
1219 rfs4_openowner_cache_time
= OPENOWNER_CACHE_TIME
;
1220 if (rfs4_state_cache_time
== 0)
1221 rfs4_state_cache_time
= STATE_CACHE_TIME
;
1222 if (rfs4_lo_state_cache_time
== 0)
1223 rfs4_lo_state_cache_time
= LO_STATE_CACHE_TIME
;
1224 if (rfs4_lockowner_cache_time
== 0)
1225 rfs4_lockowner_cache_time
= LOCKOWNER_CACHE_TIME
;
1226 if (rfs4_file_cache_time
== 0)
1227 rfs4_file_cache_time
= FILE_CACHE_TIME
;
1228 if (rfs4_deleg_state_cache_time
== 0)
1229 rfs4_deleg_state_cache_time
= DELEG_STATE_CACHE_TIME
;
1231 /* Create the overall database to hold all server state */
1232 rfs4_server_state
= rfs4_database_create(rfs4_database_debug
);
1234 /* Now create the individual tables */
1235 rfs4_client_cache_time
*= rfs4_lease_time
;
1236 rfs4_client_tab
= rfs4_table_create(rfs4_server_state
,
1238 rfs4_client_cache_time
,
1241 rfs4_client_destroy
,
1243 sizeof (rfs4_client_t
),
1246 rfs4_nfsclnt_idx
= rfs4_index_create(rfs4_client_tab
,
1247 "nfs_client_id4", nfsclnt_hash
,
1248 nfsclnt_compare
, nfsclnt_mkkey
,
1250 rfs4_clientid_idx
= rfs4_index_create(rfs4_client_tab
,
1251 "client_id", clientid_hash
,
1252 clientid_compare
, clientid_mkkey
,
1255 rfs4_clntip_cache_time
= 86400 * 365; /* about a year */
1256 rfs4_clntip_tab
= rfs4_table_create(rfs4_server_state
,
1258 rfs4_clntip_cache_time
,
1261 rfs4_clntip_destroy
,
1263 sizeof (rfs4_clntip_t
),
1266 rfs4_clntip_idx
= rfs4_index_create(rfs4_clntip_tab
,
1267 "client_ip", clntip_hash
,
1268 clntip_compare
, clntip_mkkey
,
1271 rfs4_openowner_cache_time
*= rfs4_lease_time
;
1272 rfs4_openowner_tab
= rfs4_table_create(rfs4_server_state
,
1274 rfs4_openowner_cache_time
,
1276 rfs4_openowner_create
,
1277 rfs4_openowner_destroy
,
1278 rfs4_openowner_expiry
,
1279 sizeof (rfs4_openowner_t
),
1282 rfs4_openowner_idx
= rfs4_index_create(rfs4_openowner_tab
,
1283 "open_owner4", openowner_hash
,
1285 openowner_mkkey
, TRUE
);
1287 rfs4_state_cache_time
*= rfs4_lease_time
;
1288 rfs4_state_tab
= rfs4_table_create(rfs4_server_state
,
1290 rfs4_state_cache_time
,
1295 sizeof (rfs4_state_t
),
1299 rfs4_state_owner_file_idx
= rfs4_index_create(rfs4_state_tab
,
1301 state_owner_file_hash
,
1302 state_owner_file_compare
,
1303 state_owner_file_mkkey
, TRUE
);
1305 rfs4_state_idx
= rfs4_index_create(rfs4_state_tab
,
1306 "State-id", state_hash
,
1307 state_compare
, state_mkkey
, FALSE
);
1309 rfs4_state_file_idx
= rfs4_index_create(rfs4_state_tab
,
1310 "File", state_file_hash
,
1311 state_file_compare
, state_file_mkkey
,
1314 rfs4_lo_state_cache_time
*= rfs4_lease_time
;
1315 rfs4_lo_state_tab
= rfs4_table_create(rfs4_server_state
,
1317 rfs4_lo_state_cache_time
,
1319 rfs4_lo_state_create
,
1320 rfs4_lo_state_destroy
,
1321 rfs4_lo_state_expiry
,
1322 sizeof (rfs4_lo_state_t
),
1326 rfs4_lo_state_owner_idx
= rfs4_index_create(rfs4_lo_state_tab
,
1329 lo_state_lo_compare
,
1330 lo_state_lo_mkkey
, TRUE
);
1332 rfs4_lo_state_idx
= rfs4_index_create(rfs4_lo_state_tab
,
1334 lo_state_hash
, lo_state_compare
,
1335 lo_state_mkkey
, FALSE
);
1337 rfs4_lockowner_cache_time
*= rfs4_lease_time
;
1339 rfs4_lockowner_tab
= rfs4_table_create(rfs4_server_state
,
1341 rfs4_lockowner_cache_time
,
1343 rfs4_lockowner_create
,
1344 rfs4_lockowner_destroy
,
1345 rfs4_lockowner_expiry
,
1346 sizeof (rfs4_lockowner_t
),
1350 rfs4_lockowner_idx
= rfs4_index_create(rfs4_lockowner_tab
,
1351 "lock_owner4", lockowner_hash
,
1353 lockowner_mkkey
, TRUE
);
1355 rfs4_lockowner_pid_idx
= rfs4_index_create(rfs4_lockowner_tab
,
1357 pid_compare
, pid_mkkey
,
1360 rfs4_file_cache_time
*= rfs4_lease_time
;
1361 rfs4_file_tab
= rfs4_table_create(rfs4_server_state
,
1363 rfs4_file_cache_time
,
1368 sizeof (rfs4_file_t
),
1372 rfs4_file_idx
= rfs4_index_create(rfs4_file_tab
,
1373 "Filehandle", file_hash
,
1374 file_compare
, file_mkkey
, TRUE
);
1376 rfs4_deleg_state_cache_time
*= rfs4_lease_time
;
1377 rfs4_deleg_state_tab
= rfs4_table_create(rfs4_server_state
,
1379 rfs4_deleg_state_cache_time
,
1381 rfs4_deleg_state_create
,
1382 rfs4_deleg_state_destroy
,
1383 rfs4_deleg_state_expiry
,
1384 sizeof (rfs4_deleg_state_t
),
1387 rfs4_deleg_idx
= rfs4_index_create(rfs4_deleg_state_tab
,
1388 "DelegByFileClient",
1393 rfs4_deleg_state_idx
= rfs4_index_create(rfs4_deleg_state_tab
,
1396 deleg_state_compare
,
1397 deleg_state_mkkey
, FALSE
);
1400 * Init the stable storage.
1404 rfs4_client_clrst
= rfs4_clear_client_state
;
1406 mutex_exit(&rfs4_state_lock
);
1411 * Used at server shutdown to cleanup all of the NFSv4 server's structures
1417 rfs4_database_t
*dbp
;
1419 mutex_enter(&rfs4_state_lock
);
1421 if (rfs4_server_state
== NULL
) {
1422 mutex_exit(&rfs4_state_lock
);
1426 rfs4_client_clrst
= NULL
;
1428 rfs4_set_deleg_policy(SRV_NEVER_DELEGATE
);
1429 dbp
= rfs4_server_state
;
1430 rfs4_server_state
= NULL
;
1433 * Cleanup the CPR callback.
1436 (void) callb_delete(cpr_id
);
1438 rw_destroy(&rfs4_findclient_lock
);
1440 /* First stop all of the reaper threads in the database */
1441 rfs4_database_shutdown(dbp
);
1442 /* clean up any dangling stable storage structures */
1444 /* Now actually destroy/release the database and its tables */
1445 rfs4_database_destroy(dbp
);
1447 /* Reset the cache timers for next time */
1448 rfs4_client_cache_time
= 0;
1449 rfs4_openowner_cache_time
= 0;
1450 rfs4_state_cache_time
= 0;
1451 rfs4_lo_state_cache_time
= 0;
1452 rfs4_lockowner_cache_time
= 0;
1453 rfs4_file_cache_time
= 0;
1454 rfs4_deleg_state_cache_time
= 0;
1456 mutex_exit(&rfs4_state_lock
);
1458 /* destroy server instances and current instance ptr */
1459 rfs4_servinst_destroy_all();
1461 /* reset the "first NFSv4 request" status */
1462 rfs4_seen_first_compound
= 0;
1464 /* DSS: distributed stable storage */
1465 nvlist_free(rfs4_dss_oldpaths
);
1466 nvlist_free(rfs4_dss_paths
);
1467 rfs4_dss_paths
= rfs4_dss_oldpaths
= NULL
;
1472 uint32_t start_time
;
1483 verifier4 confirm_verf
;
1484 } scid_confirm_verf
;
1487 clientid_hash(void *key
)
1491 return (idp
->impl_id
.c_id
);
1495 clientid_compare(rfs4_entry_t entry
, void *key
)
1497 rfs4_client_t
*cp
= (rfs4_client_t
*)entry
;
1498 clientid4
*idp
= key
;
1500 return (*idp
== cp
->rc_clientid
);
1504 clientid_mkkey(rfs4_entry_t entry
)
1506 rfs4_client_t
*cp
= (rfs4_client_t
*)entry
;
1508 return (&cp
->rc_clientid
);
1512 nfsclnt_hash(void *key
)
1514 nfs_client_id4
*client
= key
;
1518 for (i
= 0; i
< client
->id_len
; i
++) {
1520 hash
+= (uint_t
)client
->id_val
[i
];
1527 nfsclnt_compare(rfs4_entry_t entry
, void *key
)
1529 rfs4_client_t
*cp
= (rfs4_client_t
*)entry
;
1530 nfs_client_id4
*nfs_client
= key
;
1532 if (cp
->rc_nfs_client
.id_len
!= nfs_client
->id_len
)
1535 return (bcmp(cp
->rc_nfs_client
.id_val
, nfs_client
->id_val
,
1536 nfs_client
->id_len
) == 0);
1540 nfsclnt_mkkey(rfs4_entry_t entry
)
1542 rfs4_client_t
*cp
= (rfs4_client_t
*)entry
;
1544 return (&cp
->rc_nfs_client
);
1548 rfs4_client_expiry(rfs4_entry_t u_entry
)
1550 rfs4_client_t
*cp
= (rfs4_client_t
*)u_entry
;
1553 if (rfs4_dbe_is_invalid(cp
->rc_dbe
)) {
1554 cp
->rc_ss_remove
= 1;
1558 * If the sysadmin has used clear_locks for this
1559 * entry then forced_expire will be set and we
1560 * want this entry to be reaped. Or the entry
1561 * has exceeded its lease period.
1563 cp_expired
= (cp
->rc_forced_expire
||
1564 (gethrestime_sec() - cp
->rc_last_access
1565 > rfs4_lease_time
));
1567 if (!cp
->rc_ss_remove
&& cp_expired
)
1568 cp
->rc_ss_remove
= 1;
1569 return (cp_expired
);
1573 * Remove the leaf file from all distributed stable storage paths.
1576 rfs4_dss_remove_cpleaf(rfs4_client_t
*cp
)
1578 rfs4_servinst_t
*sip
;
1579 char *leaf
= cp
->rc_ss_pn
->leaf
;
1582 * since the state files are written to all DSS
1583 * paths we must remove this leaf file instance
1584 * from all server instances.
1587 mutex_enter(&rfs4_servinst_lock
);
1588 for (sip
= rfs4_cur_servinst
; sip
!= NULL
; sip
= sip
->prev
) {
1589 /* remove the leaf file associated with this server instance */
1590 rfs4_dss_remove_leaf(sip
, NFS4_DSS_STATE_LEAF
, leaf
);
1592 mutex_exit(&rfs4_servinst_lock
);
1596 rfs4_dss_remove_leaf(rfs4_servinst_t
*sip
, char *dir_leaf
, char *leaf
)
1598 int i
, npaths
= sip
->dss_npaths
;
1600 for (i
= 0; i
< npaths
; i
++) {
1601 rfs4_dss_path_t
*dss_path
= sip
->dss_paths
[i
];
1605 /* the HA-NFSv4 path might have been failed-over away from us */
1606 if (dss_path
== NULL
)
1609 dir
= dss_path
->path
;
1611 /* allow 3 extra bytes for two '/' & a NUL */
1612 pathlen
= strlen(dir
) + strlen(dir_leaf
) + strlen(leaf
) + 3;
1613 path
= kmem_alloc(pathlen
, KM_SLEEP
);
1614 (void) sprintf(path
, "%s/%s/%s", dir
, dir_leaf
, leaf
);
1616 (void) vn_remove(path
, UIO_SYSSPACE
, RMFILE
);
1618 kmem_free(path
, pathlen
);
1623 rfs4_client_destroy(rfs4_entry_t u_entry
)
1625 rfs4_client_t
*cp
= (rfs4_client_t
*)u_entry
;
1627 mutex_destroy(cp
->rc_cbinfo
.cb_lock
);
1628 cv_destroy(cp
->rc_cbinfo
.cb_cv
);
1629 cv_destroy(cp
->rc_cbinfo
.cb_cv_nullcaller
);
1630 list_destroy(&cp
->rc_openownerlist
);
1632 /* free callback info */
1633 rfs4_cbinfo_free(&cp
->rc_cbinfo
);
1635 if (cp
->rc_cp_confirmed
)
1636 rfs4_client_rele(cp
->rc_cp_confirmed
);
1639 /* check if the stable storage files need to be removed */
1640 if (cp
->rc_ss_remove
)
1641 rfs4_dss_remove_cpleaf(cp
);
1642 rfs4_ss_pnfree(cp
->rc_ss_pn
);
1645 /* Free the client supplied client id */
1646 kmem_free(cp
->rc_nfs_client
.id_val
, cp
->rc_nfs_client
.id_len
);
1648 if (cp
->rc_sysidt
!= LM_NOSYSID
)
1649 lm_free_sysidt(cp
->rc_sysidt
);
1653 rfs4_client_create(rfs4_entry_t u_entry
, void *arg
)
1655 rfs4_client_t
*cp
= (rfs4_client_t
*)u_entry
;
1656 nfs_client_id4
*client
= (nfs_client_id4
*)arg
;
1657 struct sockaddr
*ca
;
1659 scid_confirm_verf
*scvp
;
1661 /* Get a clientid to give to the client */
1662 cidp
= (cid
*)&cp
->rc_clientid
;
1663 cidp
->impl_id
.start_time
= rfs4_start_time
;
1664 cidp
->impl_id
.c_id
= (uint32_t)rfs4_dbe_getid(cp
->rc_dbe
);
1666 /* Allocate and copy client's client id value */
1667 cp
->rc_nfs_client
.id_val
= kmem_alloc(client
->id_len
, KM_SLEEP
);
1668 cp
->rc_nfs_client
.id_len
= client
->id_len
;
1669 bcopy(client
->id_val
, cp
->rc_nfs_client
.id_val
, client
->id_len
);
1670 cp
->rc_nfs_client
.verifier
= client
->verifier
;
1672 /* Copy client's IP address */
1673 ca
= client
->cl_addr
;
1674 if (ca
->sa_family
== AF_INET
)
1675 bcopy(ca
, &cp
->rc_addr
, sizeof (struct sockaddr_in
));
1676 else if (ca
->sa_family
== AF_INET6
)
1677 bcopy(ca
, &cp
->rc_addr
, sizeof (struct sockaddr_in6
));
1678 cp
->rc_nfs_client
.cl_addr
= (struct sockaddr
*)&cp
->rc_addr
;
1680 /* Init the value for the SETCLIENTID_CONFIRM verifier */
1681 scvp
= (scid_confirm_verf
*)&cp
->rc_confirm_verf
;
1682 scvp
->cv_impl
.c_id
= cidp
->impl_id
.c_id
;
1683 scvp
->cv_impl
.gen_num
= 0;
1685 /* An F_UNLKSYS has been done for this client */
1686 cp
->rc_unlksys_completed
= FALSE
;
1688 /* We need the client to ack us */
1689 cp
->rc_need_confirm
= TRUE
;
1690 cp
->rc_cp_confirmed
= NULL
;
1692 /* TRUE all the time until the callback path actually fails */
1693 cp
->rc_cbinfo
.cb_notified_of_cb_path_down
= TRUE
;
1695 /* Initialize the access time to now */
1696 cp
->rc_last_access
= gethrestime_sec();
1698 cp
->rc_cr_set
= NULL
;
1700 cp
->rc_sysidt
= LM_NOSYSID
;
1702 list_create(&cp
->rc_openownerlist
, sizeof (rfs4_openowner_t
),
1703 offsetof(rfs4_openowner_t
, ro_node
));
1705 /* set up the callback control structure */
1706 cp
->rc_cbinfo
.cb_state
= CB_UNINIT
;
1707 mutex_init(cp
->rc_cbinfo
.cb_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
1708 cv_init(cp
->rc_cbinfo
.cb_cv
, NULL
, CV_DEFAULT
, NULL
);
1709 cv_init(cp
->rc_cbinfo
.cb_cv_nullcaller
, NULL
, CV_DEFAULT
, NULL
);
1712 * Associate the client_t with the current server instance.
1713 * The hold is solely to satisfy the calling requirement of
1714 * rfs4_servinst_assign(). In this case it's not strictly necessary.
1716 rfs4_dbe_hold(cp
->rc_dbe
);
1717 rfs4_servinst_assign(cp
, rfs4_cur_servinst
);
1718 rfs4_dbe_rele(cp
->rc_dbe
);
1724 * Caller wants to generate/update the setclientid_confirm verifier
1725 * associated with a client. This is done during the SETCLIENTID
1729 rfs4_client_scv_next(rfs4_client_t
*cp
)
1731 scid_confirm_verf
*scvp
;
1733 /* Init the value for the SETCLIENTID_CONFIRM verifier */
1734 scvp
= (scid_confirm_verf
*)&cp
->rc_confirm_verf
;
1735 scvp
->cv_impl
.gen_num
++;
1739 rfs4_client_rele(rfs4_client_t
*cp
)
1741 rfs4_dbe_rele(cp
->rc_dbe
);
1745 rfs4_findclient(nfs_client_id4
*client
, bool_t
*create
, rfs4_client_t
*oldcp
)
1751 rw_enter(&rfs4_findclient_lock
, RW_WRITER
);
1752 rfs4_dbe_hide(oldcp
->rc_dbe
);
1754 rw_enter(&rfs4_findclient_lock
, RW_READER
);
1757 cp
= (rfs4_client_t
*)rfs4_dbsearch(rfs4_nfsclnt_idx
, client
,
1758 create
, (void *)client
, RFS4_DBS_VALID
);
1761 rfs4_dbe_unhide(oldcp
->rc_dbe
);
1763 rw_exit(&rfs4_findclient_lock
);
1769 rfs4_findclient_by_id(clientid4 clientid
, bool_t find_unconfirmed
)
1772 bool_t create
= FALSE
;
1773 cid
*cidp
= (cid
*)&clientid
;
1775 rw_enter(&rfs4_findclient_lock
, RW_READER
);
1777 cp
= (rfs4_client_t
*)rfs4_dbsearch(rfs4_clientid_idx
, &clientid
,
1778 &create
, NULL
, RFS4_DBS_VALID
);
1780 rw_exit(&rfs4_findclient_lock
);
1782 if (cp
&& cp
->rc_need_confirm
&& find_unconfirmed
== FALSE
) {
1783 rfs4_client_rele(cp
);
1791 clntip_hash(void *key
)
1793 struct sockaddr
*addr
= key
;
1798 if (addr
->sa_family
== AF_INET
) {
1799 struct sockaddr_in
*a
= (struct sockaddr_in
*)addr
;
1800 len
= sizeof (struct in_addr
);
1801 ptr
= (char *)&a
->sin_addr
;
1802 } else if (addr
->sa_family
== AF_INET6
) {
1803 struct sockaddr_in6
*a
= (struct sockaddr_in6
*)addr
;
1804 len
= sizeof (struct in6_addr
);
1805 ptr
= (char *)&a
->sin6_addr
;
1809 for (i
= 0; i
< len
; i
++) {
1811 hash
+= (uint_t
)ptr
[i
];
1817 clntip_compare(rfs4_entry_t entry
, void *key
)
1819 rfs4_clntip_t
*cp
= (rfs4_clntip_t
*)entry
;
1820 struct sockaddr
*addr
= key
;
1824 if (addr
->sa_family
== AF_INET
) {
1825 struct sockaddr_in
*a1
= (struct sockaddr_in
*)&cp
->ri_addr
;
1826 struct sockaddr_in
*a2
= (struct sockaddr_in
*)addr
;
1827 len
= sizeof (struct in_addr
);
1828 p1
= (char *)&a1
->sin_addr
;
1829 p2
= (char *)&a2
->sin_addr
;
1830 } else if (addr
->sa_family
== AF_INET6
) {
1831 struct sockaddr_in6
*a1
= (struct sockaddr_in6
*)&cp
->ri_addr
;
1832 struct sockaddr_in6
*a2
= (struct sockaddr_in6
*)addr
;
1833 len
= sizeof (struct in6_addr
);
1834 p1
= (char *)&a1
->sin6_addr
;
1835 p2
= (char *)&a2
->sin6_addr
;
1839 return (bcmp(p1
, p2
, len
) == 0);
1843 clntip_mkkey(rfs4_entry_t entry
)
1845 rfs4_clntip_t
*cp
= (rfs4_clntip_t
*)entry
;
1847 return (&cp
->ri_addr
);
1851 rfs4_clntip_expiry(rfs4_entry_t u_entry
)
1853 rfs4_clntip_t
*cp
= (rfs4_clntip_t
*)u_entry
;
1855 if (rfs4_dbe_is_invalid(cp
->ri_dbe
))
1862 rfs4_clntip_destroy(rfs4_entry_t u_entry
)
1867 rfs4_clntip_create(rfs4_entry_t u_entry
, void *arg
)
1869 rfs4_clntip_t
*cp
= (rfs4_clntip_t
*)u_entry
;
1870 struct sockaddr
*ca
= (struct sockaddr
*)arg
;
1872 /* Copy client's IP address */
1873 if (ca
->sa_family
== AF_INET
)
1874 bcopy(ca
, &cp
->ri_addr
, sizeof (struct sockaddr_in
));
1875 else if (ca
->sa_family
== AF_INET6
)
1876 bcopy(ca
, &cp
->ri_addr
, sizeof (struct sockaddr_in6
));
1879 cp
->ri_no_referrals
= 1;
1885 rfs4_find_clntip(struct sockaddr
*addr
, bool_t
*create
)
1889 rw_enter(&rfs4_findclient_lock
, RW_READER
);
1891 cp
= (rfs4_clntip_t
*)rfs4_dbsearch(rfs4_clntip_idx
, addr
,
1892 create
, addr
, RFS4_DBS_VALID
);
1894 rw_exit(&rfs4_findclient_lock
);
1900 rfs4_invalidate_clntip(struct sockaddr
*addr
)
1903 bool_t create
= FALSE
;
1905 rw_enter(&rfs4_findclient_lock
, RW_READER
);
1907 cp
= (rfs4_clntip_t
*)rfs4_dbsearch(rfs4_clntip_idx
, addr
,
1908 &create
, NULL
, RFS4_DBS_VALID
);
1910 rw_exit(&rfs4_findclient_lock
);
1913 rfs4_dbe_invalidate(cp
->ri_dbe
);
1914 rfs4_dbe_rele(cp
->ri_dbe
);
1916 rw_exit(&rfs4_findclient_lock
);
1920 rfs4_lease_expired(rfs4_client_t
*cp
)
1924 rfs4_dbe_lock(cp
->rc_dbe
);
1927 * If the admin has executed clear_locks for this
1928 * client id, force expire will be set, so no need
1929 * to calculate anything because it's "outa here".
1931 if (cp
->rc_forced_expire
) {
1934 rc
= (gethrestime_sec() - cp
->rc_last_access
> rfs4_lease_time
);
1938 * If the lease has expired we will also want
1939 * to remove any stable storage state data. So
1940 * mark the client id accordingly.
1942 if (!cp
->rc_ss_remove
)
1943 cp
->rc_ss_remove
= (rc
== TRUE
);
1945 rfs4_dbe_unlock(cp
->rc_dbe
);
1951 rfs4_update_lease(rfs4_client_t
*cp
)
1953 rfs4_dbe_lock(cp
->rc_dbe
);
1954 if (!cp
->rc_forced_expire
)
1955 cp
->rc_last_access
= gethrestime_sec();
1956 rfs4_dbe_unlock(cp
->rc_dbe
);
1961 EQOPENOWNER(open_owner4
*a
, open_owner4
*b
)
1965 if (a
->clientid
!= b
->clientid
)
1968 if (a
->owner_len
!= b
->owner_len
)
1971 rc
= (bcmp(a
->owner_val
, b
->owner_val
, a
->owner_len
) == 0);
1977 openowner_hash(void *key
)
1980 open_owner4
*openowner
= key
;
1983 for (i
= 0; i
< openowner
->owner_len
; i
++) {
1985 hash
+= (uint_t
)openowner
->owner_val
[i
];
1987 hash
+= (uint_t
)openowner
->clientid
;
1988 hash
|= (openowner
->clientid
>> 32);
1994 openowner_compare(rfs4_entry_t u_entry
, void *key
)
1996 rfs4_openowner_t
*oo
= (rfs4_openowner_t
*)u_entry
;
1997 open_owner4
*arg
= key
;
1999 return (EQOPENOWNER(&oo
->ro_owner
, arg
));
2003 openowner_mkkey(rfs4_entry_t u_entry
)
2005 rfs4_openowner_t
*oo
= (rfs4_openowner_t
*)u_entry
;
2007 return (&oo
->ro_owner
);
2012 rfs4_openowner_expiry(rfs4_entry_t u_entry
)
2014 /* openstateid held us and did all needed delay */
2019 rfs4_openowner_destroy(rfs4_entry_t u_entry
)
2021 rfs4_openowner_t
*oo
= (rfs4_openowner_t
*)u_entry
;
2023 /* Remove open owner from client's lists of open owners */
2024 rfs4_dbe_lock(oo
->ro_client
->rc_dbe
);
2025 list_remove(&oo
->ro_client
->rc_openownerlist
, oo
);
2026 rfs4_dbe_unlock(oo
->ro_client
->rc_dbe
);
2028 /* One less reference to the client */
2029 rfs4_client_rele(oo
->ro_client
);
2030 oo
->ro_client
= NULL
;
2032 /* Free the last reply for this lock owner */
2033 rfs4_free_reply(&oo
->ro_reply
);
2035 if (oo
->ro_reply_fh
.nfs_fh4_val
) {
2036 kmem_free(oo
->ro_reply_fh
.nfs_fh4_val
,
2037 oo
->ro_reply_fh
.nfs_fh4_len
);
2038 oo
->ro_reply_fh
.nfs_fh4_val
= NULL
;
2039 oo
->ro_reply_fh
.nfs_fh4_len
= 0;
2042 rfs4_sw_destroy(&oo
->ro_sw
);
2043 list_destroy(&oo
->ro_statelist
);
2045 /* Free the lock owner id */
2046 kmem_free(oo
->ro_owner
.owner_val
, oo
->ro_owner
.owner_len
);
2050 rfs4_openowner_rele(rfs4_openowner_t
*oo
)
2052 rfs4_dbe_rele(oo
->ro_dbe
);
2056 rfs4_openowner_create(rfs4_entry_t u_entry
, void *arg
)
2058 rfs4_openowner_t
*oo
= (rfs4_openowner_t
*)u_entry
;
2059 rfs4_openowner_t
*argp
= (rfs4_openowner_t
*)arg
;
2060 open_owner4
*openowner
= &argp
->ro_owner
;
2061 seqid4 seqid
= argp
->ro_open_seqid
;
2063 bool_t create
= FALSE
;
2065 rw_enter(&rfs4_findclient_lock
, RW_READER
);
2067 cp
= (rfs4_client_t
*)rfs4_dbsearch(rfs4_clientid_idx
,
2068 &openowner
->clientid
,
2069 &create
, NULL
, RFS4_DBS_VALID
);
2071 rw_exit(&rfs4_findclient_lock
);
2076 oo
->ro_reply_fh
.nfs_fh4_len
= 0;
2077 oo
->ro_reply_fh
.nfs_fh4_val
= NULL
;
2079 oo
->ro_owner
.clientid
= openowner
->clientid
;
2080 oo
->ro_owner
.owner_val
=
2081 kmem_alloc(openowner
->owner_len
, KM_SLEEP
);
2083 bcopy(openowner
->owner_val
,
2084 oo
->ro_owner
.owner_val
, openowner
->owner_len
);
2086 oo
->ro_owner
.owner_len
= openowner
->owner_len
;
2088 oo
->ro_need_confirm
= TRUE
;
2090 rfs4_sw_init(&oo
->ro_sw
);
2092 oo
->ro_open_seqid
= seqid
;
2093 bzero(&oo
->ro_reply
, sizeof (nfs_resop4
));
2095 oo
->ro_cr_set
= NULL
;
2097 list_create(&oo
->ro_statelist
, sizeof (rfs4_state_t
),
2098 offsetof(rfs4_state_t
, rs_node
));
2100 /* Insert openowner into client's open owner list */
2101 rfs4_dbe_lock(cp
->rc_dbe
);
2102 list_insert_tail(&cp
->rc_openownerlist
, oo
);
2103 rfs4_dbe_unlock(cp
->rc_dbe
);
2109 rfs4_findopenowner(open_owner4
*openowner
, bool_t
*create
, seqid4 seqid
)
2111 rfs4_openowner_t
*oo
;
2112 rfs4_openowner_t arg
;
2114 arg
.ro_owner
= *openowner
;
2115 arg
.ro_open_seqid
= seqid
;
2116 oo
= (rfs4_openowner_t
*)rfs4_dbsearch(rfs4_openowner_idx
, openowner
,
2117 create
, &arg
, RFS4_DBS_VALID
);
2123 rfs4_update_open_sequence(rfs4_openowner_t
*oo
)
2126 rfs4_dbe_lock(oo
->ro_dbe
);
2128 oo
->ro_open_seqid
++;
2130 rfs4_dbe_unlock(oo
->ro_dbe
);
2134 rfs4_update_open_resp(rfs4_openowner_t
*oo
, nfs_resop4
*resp
, nfs_fh4
*fh
)
2137 rfs4_dbe_lock(oo
->ro_dbe
);
2139 rfs4_free_reply(&oo
->ro_reply
);
2141 rfs4_copy_reply(&oo
->ro_reply
, resp
);
2143 /* Save the filehandle if provided and free if not used */
2144 if (resp
->nfs_resop4_u
.opopen
.status
== NFS4_OK
&&
2145 fh
&& fh
->nfs_fh4_len
) {
2146 if (oo
->ro_reply_fh
.nfs_fh4_val
== NULL
)
2147 oo
->ro_reply_fh
.nfs_fh4_val
=
2148 kmem_alloc(fh
->nfs_fh4_len
, KM_SLEEP
);
2149 nfs_fh4_copy(fh
, &oo
->ro_reply_fh
);
2151 if (oo
->ro_reply_fh
.nfs_fh4_val
) {
2152 kmem_free(oo
->ro_reply_fh
.nfs_fh4_val
,
2153 oo
->ro_reply_fh
.nfs_fh4_len
);
2154 oo
->ro_reply_fh
.nfs_fh4_val
= NULL
;
2155 oo
->ro_reply_fh
.nfs_fh4_len
= 0;
2159 rfs4_dbe_unlock(oo
->ro_dbe
);
2163 lockowner_compare(rfs4_entry_t u_entry
, void *key
)
2165 rfs4_lockowner_t
*lo
= (rfs4_lockowner_t
*)u_entry
;
2166 lock_owner4
*b
= (lock_owner4
*)key
;
2168 if (lo
->rl_owner
.clientid
!= b
->clientid
)
2171 if (lo
->rl_owner
.owner_len
!= b
->owner_len
)
2174 return (bcmp(lo
->rl_owner
.owner_val
, b
->owner_val
,
2175 lo
->rl_owner
.owner_len
) == 0);
2179 lockowner_mkkey(rfs4_entry_t u_entry
)
2181 rfs4_lockowner_t
*lo
= (rfs4_lockowner_t
*)u_entry
;
2183 return (&lo
->rl_owner
);
2187 lockowner_hash(void *key
)
2190 lock_owner4
*lockowner
= key
;
2193 for (i
= 0; i
< lockowner
->owner_len
; i
++) {
2195 hash
+= (uint_t
)lockowner
->owner_val
[i
];
2197 hash
+= (uint_t
)lockowner
->clientid
;
2198 hash
|= (lockowner
->clientid
>> 32);
2206 return ((uint32_t)(uintptr_t)key
);
2210 pid_mkkey(rfs4_entry_t u_entry
)
2212 rfs4_lockowner_t
*lo
= (rfs4_lockowner_t
*)u_entry
;
2214 return ((void *)(uintptr_t)lo
->rl_pid
);
2218 pid_compare(rfs4_entry_t u_entry
, void *key
)
2220 rfs4_lockowner_t
*lo
= (rfs4_lockowner_t
*)u_entry
;
2222 return (lo
->rl_pid
== (pid_t
)(uintptr_t)key
);
2226 rfs4_lockowner_destroy(rfs4_entry_t u_entry
)
2228 rfs4_lockowner_t
*lo
= (rfs4_lockowner_t
*)u_entry
;
2230 /* Free the lock owner id */
2231 kmem_free(lo
->rl_owner
.owner_val
, lo
->rl_owner
.owner_len
);
2232 rfs4_client_rele(lo
->rl_client
);
2236 rfs4_lockowner_rele(rfs4_lockowner_t
*lo
)
2238 rfs4_dbe_rele(lo
->rl_dbe
);
2243 rfs4_lockowner_expiry(rfs4_entry_t u_entry
)
2246 * Since expiry is called with no other references on
2247 * this struct, go ahead and have it removed.
2253 rfs4_lockowner_create(rfs4_entry_t u_entry
, void *arg
)
2255 rfs4_lockowner_t
*lo
= (rfs4_lockowner_t
*)u_entry
;
2256 lock_owner4
*lockowner
= (lock_owner4
*)arg
;
2258 bool_t create
= FALSE
;
2260 rw_enter(&rfs4_findclient_lock
, RW_READER
);
2262 cp
= (rfs4_client_t
*)rfs4_dbsearch(rfs4_clientid_idx
,
2263 &lockowner
->clientid
,
2264 &create
, NULL
, RFS4_DBS_VALID
);
2266 rw_exit(&rfs4_findclient_lock
);
2271 /* Reference client */
2273 lo
->rl_owner
.clientid
= lockowner
->clientid
;
2274 lo
->rl_owner
.owner_val
= kmem_alloc(lockowner
->owner_len
, KM_SLEEP
);
2275 bcopy(lockowner
->owner_val
, lo
->rl_owner
.owner_val
,
2276 lockowner
->owner_len
);
2277 lo
->rl_owner
.owner_len
= lockowner
->owner_len
;
2278 lo
->rl_pid
= rfs4_dbe_getid(lo
->rl_dbe
);
2284 rfs4_findlockowner(lock_owner4
*lockowner
, bool_t
*create
)
2286 rfs4_lockowner_t
*lo
;
2288 lo
= (rfs4_lockowner_t
*)rfs4_dbsearch(rfs4_lockowner_idx
, lockowner
,
2289 create
, lockowner
, RFS4_DBS_VALID
);
2295 rfs4_findlockowner_by_pid(pid_t pid
)
2297 rfs4_lockowner_t
*lo
;
2298 bool_t create
= FALSE
;
2300 lo
= (rfs4_lockowner_t
*)rfs4_dbsearch(rfs4_lockowner_pid_idx
,
2301 (void *)(uintptr_t)pid
, &create
, NULL
, RFS4_DBS_VALID
);
2308 file_hash(void *key
)
2310 return (ADDRHASH(key
));
2314 file_mkkey(rfs4_entry_t u_entry
)
2316 rfs4_file_t
*fp
= (rfs4_file_t
*)u_entry
;
2322 file_compare(rfs4_entry_t u_entry
, void *key
)
2324 rfs4_file_t
*fp
= (rfs4_file_t
*)u_entry
;
2326 return (fp
->rf_vp
== (vnode_t
*)key
);
2330 rfs4_file_destroy(rfs4_entry_t u_entry
)
2332 rfs4_file_t
*fp
= (rfs4_file_t
*)u_entry
;
2334 list_destroy(&fp
->rf_delegstatelist
);
2336 if (fp
->rf_filehandle
.nfs_fh4_val
)
2337 kmem_free(fp
->rf_filehandle
.nfs_fh4_val
,
2338 fp
->rf_filehandle
.nfs_fh4_len
);
2339 cv_destroy(fp
->rf_dinfo
.rd_recall_cv
);
2341 vnode_t
*vp
= fp
->rf_vp
;
2343 mutex_enter(&vp
->v_vsd_lock
);
2344 (void) vsd_set(vp
, nfs4_srv_vkey
, NULL
);
2345 mutex_exit(&vp
->v_vsd_lock
);
2349 rw_destroy(&fp
->rf_file_rwlock
);
2353 * Used to unlock the underlying dbe struct only
2356 rfs4_file_rele(rfs4_file_t
*fp
)
2358 rfs4_dbe_rele(fp
->rf_dbe
);
2367 rfs4_file_create(rfs4_entry_t u_entry
, void *arg
)
2369 rfs4_file_t
*fp
= (rfs4_file_t
*)u_entry
;
2370 rfs4_fcreate_arg
*ap
= (rfs4_fcreate_arg
*)arg
;
2371 vnode_t
*vp
= ap
->vp
;
2372 nfs_fh4
*fh
= ap
->fh
;
2376 fp
->rf_filehandle
.nfs_fh4_len
= 0;
2377 fp
->rf_filehandle
.nfs_fh4_val
= NULL
;
2378 ASSERT(fh
&& fh
->nfs_fh4_len
);
2379 if (fh
&& fh
->nfs_fh4_len
) {
2380 fp
->rf_filehandle
.nfs_fh4_val
=
2381 kmem_alloc(fh
->nfs_fh4_len
, KM_SLEEP
);
2382 nfs_fh4_copy(fh
, &fp
->rf_filehandle
);
2386 list_create(&fp
->rf_delegstatelist
, sizeof (rfs4_deleg_state_t
),
2387 offsetof(rfs4_deleg_state_t
, rds_node
));
2389 fp
->rf_share_deny
= fp
->rf_share_access
= fp
->rf_access_read
= 0;
2390 fp
->rf_access_write
= fp
->rf_deny_read
= fp
->rf_deny_write
= 0;
2392 mutex_init(fp
->rf_dinfo
.rd_recall_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
2393 cv_init(fp
->rf_dinfo
.rd_recall_cv
, NULL
, CV_DEFAULT
, NULL
);
2395 fp
->rf_dinfo
.rd_dtype
= OPEN_DELEGATE_NONE
;
2397 rw_init(&fp
->rf_file_rwlock
, NULL
, RW_DEFAULT
, NULL
);
2399 mutex_enter(&vp
->v_vsd_lock
);
2400 VERIFY(vsd_set(vp
, nfs4_srv_vkey
, (void *)fp
) == 0);
2401 mutex_exit(&vp
->v_vsd_lock
);
2407 rfs4_findfile(vnode_t
*vp
, nfs_fh4
*fh
, bool_t
*create
)
2410 rfs4_fcreate_arg arg
;
2415 if (*create
== TRUE
)
2416 fp
= (rfs4_file_t
*)rfs4_dbsearch(rfs4_file_idx
, vp
, create
,
2417 &arg
, RFS4_DBS_VALID
);
2419 mutex_enter(&vp
->v_vsd_lock
);
2420 fp
= (rfs4_file_t
*)vsd_get(vp
, nfs4_srv_vkey
);
2422 rfs4_dbe_lock(fp
->rf_dbe
);
2423 if (rfs4_dbe_is_invalid(fp
->rf_dbe
) ||
2424 (rfs4_dbe_refcnt(fp
->rf_dbe
) == 0)) {
2425 rfs4_dbe_unlock(fp
->rf_dbe
);
2428 rfs4_dbe_hold(fp
->rf_dbe
);
2429 rfs4_dbe_unlock(fp
->rf_dbe
);
2432 mutex_exit(&vp
->v_vsd_lock
);
2438 * Find a file in the db and once it is located, take the rw lock.
2439 * Need to check the vnode pointer and if it does not exist (it was
2440 * removed between the db location and check) redo the find. This
2441 * assumes that a file struct that has a NULL vnode pointer is marked
2442 * at 'invalid' and will not be found in the db the second time
2446 rfs4_findfile_withlock(vnode_t
*vp
, nfs_fh4
*fh
, bool_t
*create
)
2449 rfs4_fcreate_arg arg
;
2450 bool_t screate
= *create
;
2452 if (screate
== FALSE
) {
2453 mutex_enter(&vp
->v_vsd_lock
);
2454 fp
= (rfs4_file_t
*)vsd_get(vp
, nfs4_srv_vkey
);
2456 rfs4_dbe_lock(fp
->rf_dbe
);
2457 if (rfs4_dbe_is_invalid(fp
->rf_dbe
) ||
2458 (rfs4_dbe_refcnt(fp
->rf_dbe
) == 0)) {
2459 rfs4_dbe_unlock(fp
->rf_dbe
);
2460 mutex_exit(&vp
->v_vsd_lock
);
2463 rfs4_dbe_hold(fp
->rf_dbe
);
2464 rfs4_dbe_unlock(fp
->rf_dbe
);
2465 mutex_exit(&vp
->v_vsd_lock
);
2466 rw_enter(&fp
->rf_file_rwlock
, RW_WRITER
);
2467 if (fp
->rf_vp
== NULL
) {
2468 rw_exit(&fp
->rf_file_rwlock
);
2474 mutex_exit(&vp
->v_vsd_lock
);
2481 fp
= (rfs4_file_t
*)rfs4_dbsearch(rfs4_file_idx
, vp
, create
,
2482 &arg
, RFS4_DBS_VALID
);
2484 rw_enter(&fp
->rf_file_rwlock
, RW_WRITER
);
2485 if (fp
->rf_vp
== NULL
) {
2486 rw_exit(&fp
->rf_file_rwlock
);
2498 lo_state_hash(void *key
)
2500 stateid_t
*id
= key
;
2502 return (id
->bits
.ident
+id
->bits
.pid
);
2506 lo_state_compare(rfs4_entry_t u_entry
, void *key
)
2508 rfs4_lo_state_t
*lsp
= (rfs4_lo_state_t
*)u_entry
;
2509 stateid_t
*id
= key
;
2512 rc
= (lsp
->rls_lockid
.bits
.boottime
== id
->bits
.boottime
&&
2513 lsp
->rls_lockid
.bits
.type
== id
->bits
.type
&&
2514 lsp
->rls_lockid
.bits
.ident
== id
->bits
.ident
&&
2515 lsp
->rls_lockid
.bits
.pid
== id
->bits
.pid
);
2521 lo_state_mkkey(rfs4_entry_t u_entry
)
2523 rfs4_lo_state_t
*lsp
= (rfs4_lo_state_t
*)u_entry
;
2525 return (&lsp
->rls_lockid
);
2529 rfs4_lo_state_expiry(rfs4_entry_t u_entry
)
2531 rfs4_lo_state_t
*lsp
= (rfs4_lo_state_t
*)u_entry
;
2533 if (rfs4_dbe_is_invalid(lsp
->rls_dbe
))
2535 if (lsp
->rls_state
->rs_closed
)
2537 return ((gethrestime_sec() -
2538 lsp
->rls_state
->rs_owner
->ro_client
->rc_last_access
2539 > rfs4_lease_time
));
2543 rfs4_lo_state_destroy(rfs4_entry_t u_entry
)
2545 rfs4_lo_state_t
*lsp
= (rfs4_lo_state_t
*)u_entry
;
2547 rfs4_dbe_lock(lsp
->rls_state
->rs_dbe
);
2548 list_remove(&lsp
->rls_state
->rs_lostatelist
, lsp
);
2549 rfs4_dbe_unlock(lsp
->rls_state
->rs_dbe
);
2551 rfs4_sw_destroy(&lsp
->rls_sw
);
2553 /* Make sure to release the file locks */
2554 if (lsp
->rls_locks_cleaned
== FALSE
) {
2555 lsp
->rls_locks_cleaned
= TRUE
;
2556 if (lsp
->rls_locker
->rl_client
->rc_sysidt
!= LM_NOSYSID
) {
2557 /* Is the PxFS kernel module loaded? */
2558 if (lm_remove_file_locks
!= NULL
) {
2561 /* Encode the cluster nodeid in new sysid */
2563 lsp
->rls_locker
->rl_client
->rc_sysidt
;
2564 lm_set_nlmid_flk(&new_sysid
);
2567 * This PxFS routine removes file locks for a
2568 * client over all nodes of a cluster.
2570 DTRACE_PROBE1(nfss_i_clust_rm_lck
,
2572 (*lm_remove_file_locks
)(new_sysid
);
2575 lsp
->rls_state
->rs_finfo
->rf_vp
,
2576 lsp
->rls_locker
->rl_pid
,
2577 lsp
->rls_locker
->rl_client
->rc_sysidt
);
2582 /* Free the last reply for this state */
2583 rfs4_free_reply(&lsp
->rls_reply
);
2585 rfs4_lockowner_rele(lsp
->rls_locker
);
2586 lsp
->rls_locker
= NULL
;
2588 rfs4_state_rele_nounlock(lsp
->rls_state
);
2589 lsp
->rls_state
= NULL
;
2593 rfs4_lo_state_create(rfs4_entry_t u_entry
, void *arg
)
2595 rfs4_lo_state_t
*lsp
= (rfs4_lo_state_t
*)u_entry
;
2596 rfs4_lo_state_t
*argp
= (rfs4_lo_state_t
*)arg
;
2597 rfs4_lockowner_t
*lo
= argp
->rls_locker
;
2598 rfs4_state_t
*sp
= argp
->rls_state
;
2600 lsp
->rls_state
= sp
;
2602 lsp
->rls_lockid
= sp
->rs_stateid
;
2603 lsp
->rls_lockid
.bits
.type
= LOCKID
;
2604 lsp
->rls_lockid
.bits
.chgseq
= 0;
2605 lsp
->rls_lockid
.bits
.pid
= lo
->rl_pid
;
2607 lsp
->rls_locks_cleaned
= FALSE
;
2608 lsp
->rls_lock_completed
= FALSE
;
2610 rfs4_sw_init(&lsp
->rls_sw
);
2612 /* Attached the supplied lock owner */
2613 rfs4_dbe_hold(lo
->rl_dbe
);
2614 lsp
->rls_locker
= lo
;
2616 rfs4_dbe_lock(sp
->rs_dbe
);
2617 list_insert_tail(&sp
->rs_lostatelist
, lsp
);
2618 rfs4_dbe_hold(sp
->rs_dbe
);
2619 rfs4_dbe_unlock(sp
->rs_dbe
);
2625 rfs4_lo_state_rele(rfs4_lo_state_t
*lsp
, bool_t unlock_fp
)
2627 if (unlock_fp
== TRUE
)
2628 rw_exit(&lsp
->rls_state
->rs_finfo
->rf_file_rwlock
);
2629 rfs4_dbe_rele(lsp
->rls_dbe
);
2632 static rfs4_lo_state_t
*
2633 rfs4_findlo_state(stateid_t
*id
, bool_t lock_fp
)
2635 rfs4_lo_state_t
*lsp
;
2636 bool_t create
= FALSE
;
2638 lsp
= (rfs4_lo_state_t
*)rfs4_dbsearch(rfs4_lo_state_idx
, id
,
2639 &create
, NULL
, RFS4_DBS_VALID
);
2640 if (lock_fp
== TRUE
&& lsp
!= NULL
)
2641 rw_enter(&lsp
->rls_state
->rs_finfo
->rf_file_rwlock
, RW_READER
);
2648 lo_state_lo_hash(void *key
)
2650 rfs4_lo_state_t
*lsp
= key
;
2652 return (ADDRHASH(lsp
->rls_locker
) ^ ADDRHASH(lsp
->rls_state
));
2656 lo_state_lo_compare(rfs4_entry_t u_entry
, void *key
)
2658 rfs4_lo_state_t
*lsp
= (rfs4_lo_state_t
*)u_entry
;
2659 rfs4_lo_state_t
*keyp
= key
;
2661 return (keyp
->rls_locker
== lsp
->rls_locker
&&
2662 keyp
->rls_state
== lsp
->rls_state
);
2666 lo_state_lo_mkkey(rfs4_entry_t u_entry
)
2672 rfs4_findlo_state_by_owner(rfs4_lockowner_t
*lo
, rfs4_state_t
*sp
,
2675 rfs4_lo_state_t
*lsp
;
2676 rfs4_lo_state_t arg
;
2678 arg
.rls_locker
= lo
;
2681 lsp
= (rfs4_lo_state_t
*)rfs4_dbsearch(rfs4_lo_state_owner_idx
, &arg
,
2682 create
, &arg
, RFS4_DBS_VALID
);
2688 get_stateid(id_t eid
)
2692 id
.bits
.boottime
= rfs4_start_time
;
2693 id
.bits
.ident
= eid
;
2697 id
.bits
.clnodeid
= 0;
2703 state_hash(void *key
)
2705 stateid_t
*ip
= (stateid_t
*)key
;
2707 return (ip
->bits
.ident
);
2711 state_compare(rfs4_entry_t u_entry
, void *key
)
2713 rfs4_state_t
*sp
= (rfs4_state_t
*)u_entry
;
2714 stateid_t
*id
= (stateid_t
*)key
;
2717 rc
= (sp
->rs_stateid
.bits
.boottime
== id
->bits
.boottime
&&
2718 sp
->rs_stateid
.bits
.ident
== id
->bits
.ident
);
2724 state_mkkey(rfs4_entry_t u_entry
)
2726 rfs4_state_t
*sp
= (rfs4_state_t
*)u_entry
;
2728 return (&sp
->rs_stateid
);
2732 rfs4_state_destroy(rfs4_entry_t u_entry
)
2734 rfs4_state_t
*sp
= (rfs4_state_t
*)u_entry
;
2736 /* remove from openowner list */
2737 rfs4_dbe_lock(sp
->rs_owner
->ro_dbe
);
2738 list_remove(&sp
->rs_owner
->ro_statelist
, sp
);
2739 rfs4_dbe_unlock(sp
->rs_owner
->ro_dbe
);
2741 list_destroy(&sp
->rs_lostatelist
);
2743 /* release any share locks for this stateid if it's still open */
2744 if (!sp
->rs_closed
) {
2745 rfs4_dbe_lock(sp
->rs_dbe
);
2746 (void) rfs4_unshare(sp
);
2747 rfs4_dbe_unlock(sp
->rs_dbe
);
2750 /* Were done with the file */
2751 rfs4_file_rele(sp
->rs_finfo
);
2752 sp
->rs_finfo
= NULL
;
2754 /* And now with the openowner */
2755 rfs4_openowner_rele(sp
->rs_owner
);
2756 sp
->rs_owner
= NULL
;
2760 rfs4_state_rele_nounlock(rfs4_state_t
*sp
)
2762 rfs4_dbe_rele(sp
->rs_dbe
);
2766 rfs4_state_rele(rfs4_state_t
*sp
)
2768 rw_exit(&sp
->rs_finfo
->rf_file_rwlock
);
2769 rfs4_dbe_rele(sp
->rs_dbe
);
2773 deleg_hash(void *key
)
2775 rfs4_deleg_state_t
*dsp
= (rfs4_deleg_state_t
*)key
;
2777 return (ADDRHASH(dsp
->rds_client
) ^ ADDRHASH(dsp
->rds_finfo
));
2781 deleg_compare(rfs4_entry_t u_entry
, void *key
)
2783 rfs4_deleg_state_t
*dsp
= (rfs4_deleg_state_t
*)u_entry
;
2784 rfs4_deleg_state_t
*kdsp
= (rfs4_deleg_state_t
*)key
;
2786 return (dsp
->rds_client
== kdsp
->rds_client
&&
2787 dsp
->rds_finfo
== kdsp
->rds_finfo
);
2791 deleg_mkkey(rfs4_entry_t u_entry
)
2797 deleg_state_hash(void *key
)
2799 stateid_t
*ip
= (stateid_t
*)key
;
2801 return (ip
->bits
.ident
);
2805 deleg_state_compare(rfs4_entry_t u_entry
, void *key
)
2807 rfs4_deleg_state_t
*dsp
= (rfs4_deleg_state_t
*)u_entry
;
2808 stateid_t
*id
= (stateid_t
*)key
;
2811 if (id
->bits
.type
!= DELEGID
)
2814 rc
= (dsp
->rds_delegid
.bits
.boottime
== id
->bits
.boottime
&&
2815 dsp
->rds_delegid
.bits
.ident
== id
->bits
.ident
);
2821 deleg_state_mkkey(rfs4_entry_t u_entry
)
2823 rfs4_deleg_state_t
*dsp
= (rfs4_deleg_state_t
*)u_entry
;
2825 return (&dsp
->rds_delegid
);
2829 rfs4_deleg_state_expiry(rfs4_entry_t u_entry
)
2831 rfs4_deleg_state_t
*dsp
= (rfs4_deleg_state_t
*)u_entry
;
2833 if (rfs4_dbe_is_invalid(dsp
->rds_dbe
))
2836 if (dsp
->rds_dtype
== OPEN_DELEGATE_NONE
)
2839 if ((gethrestime_sec() - dsp
->rds_client
->rc_last_access
2840 > rfs4_lease_time
)) {
2841 rfs4_dbe_invalidate(dsp
->rds_dbe
);
2849 rfs4_deleg_state_create(rfs4_entry_t u_entry
, void *argp
)
2851 rfs4_deleg_state_t
*dsp
= (rfs4_deleg_state_t
*)u_entry
;
2852 rfs4_file_t
*fp
= ((rfs4_deleg_state_t
*)argp
)->rds_finfo
;
2853 rfs4_client_t
*cp
= ((rfs4_deleg_state_t
*)argp
)->rds_client
;
2855 rfs4_dbe_hold(fp
->rf_dbe
);
2856 rfs4_dbe_hold(cp
->rc_dbe
);
2858 dsp
->rds_delegid
= get_stateid(rfs4_dbe_getid(dsp
->rds_dbe
));
2859 dsp
->rds_delegid
.bits
.type
= DELEGID
;
2860 dsp
->rds_finfo
= fp
;
2861 dsp
->rds_client
= cp
;
2862 dsp
->rds_dtype
= OPEN_DELEGATE_NONE
;
2864 dsp
->rds_time_granted
= gethrestime_sec(); /* observability */
2865 dsp
->rds_time_revoked
= 0;
2867 list_link_init(&dsp
->rds_node
);
2873 rfs4_deleg_state_destroy(rfs4_entry_t u_entry
)
2875 rfs4_deleg_state_t
*dsp
= (rfs4_deleg_state_t
*)u_entry
;
2877 /* return delegation if necessary */
2878 rfs4_return_deleg(dsp
, FALSE
);
2880 /* Were done with the file */
2881 rfs4_file_rele(dsp
->rds_finfo
);
2882 dsp
->rds_finfo
= NULL
;
2884 /* And now with the openowner */
2885 rfs4_client_rele(dsp
->rds_client
);
2886 dsp
->rds_client
= NULL
;
2889 rfs4_deleg_state_t
*
2890 rfs4_finddeleg(rfs4_state_t
*sp
, bool_t
*create
)
2892 rfs4_deleg_state_t ds
, *dsp
;
2894 ds
.rds_client
= sp
->rs_owner
->ro_client
;
2895 ds
.rds_finfo
= sp
->rs_finfo
;
2897 dsp
= (rfs4_deleg_state_t
*)rfs4_dbsearch(rfs4_deleg_idx
, &ds
,
2898 create
, &ds
, RFS4_DBS_VALID
);
2903 rfs4_deleg_state_t
*
2904 rfs4_finddelegstate(stateid_t
*id
)
2906 rfs4_deleg_state_t
*dsp
;
2907 bool_t create
= FALSE
;
2909 dsp
= (rfs4_deleg_state_t
*)rfs4_dbsearch(rfs4_deleg_state_idx
, id
,
2910 &create
, NULL
, RFS4_DBS_VALID
);
2916 rfs4_deleg_state_rele(rfs4_deleg_state_t
*dsp
)
2918 rfs4_dbe_rele(dsp
->rds_dbe
);
2922 rfs4_update_lock_sequence(rfs4_lo_state_t
*lsp
)
2925 rfs4_dbe_lock(lsp
->rls_dbe
);
2928 * If we are skipping sequence id checking, this means that
2929 * this is the first lock request and therefore the sequence
2930 * id does not need to be updated. This only happens on the
2931 * first lock request for a lockowner
2933 if (!lsp
->rls_skip_seqid_check
)
2936 rfs4_dbe_unlock(lsp
->rls_dbe
);
2940 rfs4_update_lock_resp(rfs4_lo_state_t
*lsp
, nfs_resop4
*resp
)
2943 rfs4_dbe_lock(lsp
->rls_dbe
);
2945 rfs4_free_reply(&lsp
->rls_reply
);
2947 rfs4_copy_reply(&lsp
->rls_reply
, resp
);
2949 rfs4_dbe_unlock(lsp
->rls_dbe
);
2953 rfs4_free_opens(rfs4_openowner_t
*oo
, bool_t invalidate
,
2954 bool_t close_of_client
)
2958 rfs4_dbe_lock(oo
->ro_dbe
);
2960 for (sp
= list_head(&oo
->ro_statelist
); sp
!= NULL
;
2961 sp
= list_next(&oo
->ro_statelist
, sp
)) {
2962 rfs4_state_close(sp
, FALSE
, close_of_client
, CRED());
2963 if (invalidate
== TRUE
)
2964 rfs4_dbe_invalidate(sp
->rs_dbe
);
2967 rfs4_dbe_invalidate(oo
->ro_dbe
);
2968 rfs4_dbe_unlock(oo
->ro_dbe
);
2972 state_owner_file_hash(void *key
)
2974 rfs4_state_t
*sp
= key
;
2976 return (ADDRHASH(sp
->rs_owner
) ^ ADDRHASH(sp
->rs_finfo
));
2980 state_owner_file_compare(rfs4_entry_t u_entry
, void *key
)
2982 rfs4_state_t
*sp
= (rfs4_state_t
*)u_entry
;
2983 rfs4_state_t
*arg
= key
;
2985 if (sp
->rs_closed
== TRUE
)
2988 return (arg
->rs_owner
== sp
->rs_owner
&& arg
->rs_finfo
== sp
->rs_finfo
);
2992 state_owner_file_mkkey(rfs4_entry_t u_entry
)
2998 state_file_hash(void *key
)
3000 return (ADDRHASH(key
));
3004 state_file_compare(rfs4_entry_t u_entry
, void *key
)
3006 rfs4_state_t
*sp
= (rfs4_state_t
*)u_entry
;
3007 rfs4_file_t
*fp
= key
;
3009 if (sp
->rs_closed
== TRUE
)
3012 return (fp
== sp
->rs_finfo
);
3016 state_file_mkkey(rfs4_entry_t u_entry
)
3018 rfs4_state_t
*sp
= (rfs4_state_t
*)u_entry
;
3020 return (sp
->rs_finfo
);
3024 rfs4_findstate_by_owner_file(rfs4_openowner_t
*oo
, rfs4_file_t
*fp
,
3033 sp
= (rfs4_state_t
*)rfs4_dbsearch(rfs4_state_owner_file_idx
, &key
,
3034 create
, &key
, RFS4_DBS_VALID
);
3039 /* This returns ANY state struct that refers to this file */
3040 static rfs4_state_t
*
3041 rfs4_findstate_by_file(rfs4_file_t
*fp
)
3043 bool_t create
= FALSE
;
3045 return ((rfs4_state_t
*)rfs4_dbsearch(rfs4_state_file_idx
, fp
,
3046 &create
, fp
, RFS4_DBS_VALID
));
3050 rfs4_state_expiry(rfs4_entry_t u_entry
)
3052 rfs4_state_t
*sp
= (rfs4_state_t
*)u_entry
;
3054 if (rfs4_dbe_is_invalid(sp
->rs_dbe
))
3057 if (sp
->rs_closed
== TRUE
&&
3058 ((gethrestime_sec() - rfs4_dbe_get_timerele(sp
->rs_dbe
))
3062 return ((gethrestime_sec() - sp
->rs_owner
->ro_client
->rc_last_access
3063 > rfs4_lease_time
));
3067 rfs4_state_create(rfs4_entry_t u_entry
, void *argp
)
3069 rfs4_state_t
*sp
= (rfs4_state_t
*)u_entry
;
3070 rfs4_file_t
*fp
= ((rfs4_state_t
*)argp
)->rs_finfo
;
3071 rfs4_openowner_t
*oo
= ((rfs4_state_t
*)argp
)->rs_owner
;
3073 rfs4_dbe_hold(fp
->rf_dbe
);
3074 rfs4_dbe_hold(oo
->ro_dbe
);
3075 sp
->rs_stateid
= get_stateid(rfs4_dbe_getid(sp
->rs_dbe
));
3076 sp
->rs_stateid
.bits
.type
= OPENID
;
3080 list_create(&sp
->rs_lostatelist
, sizeof (rfs4_lo_state_t
),
3081 offsetof(rfs4_lo_state_t
, rls_node
));
3083 /* Insert state on per open owner's list */
3084 rfs4_dbe_lock(oo
->ro_dbe
);
3085 list_insert_tail(&oo
->ro_statelist
, sp
);
3086 rfs4_dbe_unlock(oo
->ro_dbe
);
3091 static rfs4_state_t
*
3092 rfs4_findstate(stateid_t
*id
, rfs4_dbsearch_type_t find_invalid
, bool_t lock_fp
)
3095 bool_t create
= FALSE
;
3097 sp
= (rfs4_state_t
*)rfs4_dbsearch(rfs4_state_idx
, id
,
3098 &create
, NULL
, find_invalid
);
3099 if (lock_fp
== TRUE
&& sp
!= NULL
)
3100 rw_enter(&sp
->rs_finfo
->rf_file_rwlock
, RW_READER
);
3106 rfs4_state_close(rfs4_state_t
*sp
, bool_t lock_held
, bool_t close_of_client
,
3109 /* Remove the associated lo_state owners */
3111 rfs4_dbe_lock(sp
->rs_dbe
);
3114 * If refcnt == 0, the dbe is about to be destroyed.
3115 * lock state will be released by the reaper thread.
3118 if (rfs4_dbe_refcnt(sp
->rs_dbe
) > 0) {
3119 if (sp
->rs_closed
== FALSE
) {
3120 rfs4_release_share_lock_state(sp
, cr
, close_of_client
);
3121 sp
->rs_closed
= TRUE
;
3126 rfs4_dbe_unlock(sp
->rs_dbe
);
3130 * Remove all state associated with the given client.
3133 rfs4_client_state_remove(rfs4_client_t
*cp
)
3135 rfs4_openowner_t
*oo
;
3137 rfs4_dbe_lock(cp
->rc_dbe
);
3139 for (oo
= list_head(&cp
->rc_openownerlist
); oo
!= NULL
;
3140 oo
= list_next(&cp
->rc_openownerlist
, oo
)) {
3141 rfs4_free_opens(oo
, TRUE
, TRUE
);
3144 rfs4_dbe_unlock(cp
->rc_dbe
);
3148 rfs4_client_close(rfs4_client_t
*cp
)
3150 /* Mark client as going away. */
3151 rfs4_dbe_lock(cp
->rc_dbe
);
3152 rfs4_dbe_invalidate(cp
->rc_dbe
);
3153 rfs4_dbe_unlock(cp
->rc_dbe
);
3155 rfs4_client_state_remove(cp
);
3157 /* Release the client */
3158 rfs4_client_rele(cp
);
3162 rfs4_check_clientid(clientid4
*cp
, int setclid_confirm
)
3164 cid
*cidp
= (cid
*) cp
;
3167 * If the server start time matches the time provided
3168 * by the client (via the clientid) and this is NOT a
3169 * setclientid_confirm then return EXPIRED.
3171 if (!setclid_confirm
&& cidp
->impl_id
.start_time
== rfs4_start_time
)
3172 return (NFS4ERR_EXPIRED
);
3174 return (NFS4ERR_STALE_CLIENTID
);
3178 * This is used when a stateid has not been found amongst the
3179 * current server's state. Check the stateid to see if it
3180 * was from this server instantiation or not.
3183 what_stateid_error(stateid_t
*id
, stateid_type_t type
)
3185 /* If types don't match then no use checking further */
3186 if (type
!= id
->bits
.type
)
3187 return (NFS4ERR_BAD_STATEID
);
3189 /* From a different server instantiation, return STALE */
3190 if (id
->bits
.boottime
!= rfs4_start_time
)
3191 return (NFS4ERR_STALE_STATEID
);
3194 * From this server but the state is most likely beyond lease
3195 * timeout: return NFS4ERR_EXPIRED. However, there is the
3196 * case of a delegation stateid. For delegations, there is a
3197 * case where the state can be removed without the client's
3198 * knowledge/consent: revocation. In the case of delegation
3199 * revocation, the delegation state will be removed and will
3200 * not be found. If the client does something like a
3201 * DELEGRETURN or even a READ/WRITE with a delegatoin stateid
3202 * that has been revoked, the server should return BAD_STATEID
3203 * instead of the more common EXPIRED error.
3205 if (id
->bits
.boottime
== rfs4_start_time
) {
3206 if (type
== DELEGID
)
3207 return (NFS4ERR_BAD_STATEID
);
3209 return (NFS4ERR_EXPIRED
);
3212 return (NFS4ERR_BAD_STATEID
);
3216 * Used later on to find the various state structs. When called from
3217 * rfs4_check_stateid()->rfs4_get_all_state(), no file struct lock is
3218 * taken (it is not needed) and helps on the read/write path with
3219 * respect to performance.
3222 rfs4_get_state_lockit(stateid4
*stateid
, rfs4_state_t
**spp
,
3223 rfs4_dbsearch_type_t find_invalid
, bool_t lock_fp
)
3225 stateid_t
*id
= (stateid_t
*)stateid
;
3230 sp
= rfs4_findstate(id
, find_invalid
, lock_fp
);
3232 return (what_stateid_error(id
, OPENID
));
3235 if (rfs4_lease_expired(sp
->rs_owner
->ro_client
)) {
3236 if (lock_fp
== TRUE
)
3237 rfs4_state_rele(sp
);
3239 rfs4_state_rele_nounlock(sp
);
3240 return (NFS4ERR_EXPIRED
);
3249 rfs4_get_state(stateid4
*stateid
, rfs4_state_t
**spp
,
3250 rfs4_dbsearch_type_t find_invalid
)
3252 return (rfs4_get_state_lockit(stateid
, spp
, find_invalid
, TRUE
));
3256 rfs4_check_stateid_seqid(rfs4_state_t
*sp
, stateid4
*stateid
)
3258 stateid_t
*id
= (stateid_t
*)stateid
;
3260 if (rfs4_lease_expired(sp
->rs_owner
->ro_client
))
3261 return (NFS4_CHECK_STATEID_EXPIRED
);
3263 /* Stateid is some time in the future - that's bad */
3264 if (sp
->rs_stateid
.bits
.chgseq
< id
->bits
.chgseq
)
3265 return (NFS4_CHECK_STATEID_BAD
);
3267 if (sp
->rs_stateid
.bits
.chgseq
== id
->bits
.chgseq
+ 1)
3268 return (NFS4_CHECK_STATEID_REPLAY
);
3270 /* Stateid is some time in the past - that's old */
3271 if (sp
->rs_stateid
.bits
.chgseq
> id
->bits
.chgseq
)
3272 return (NFS4_CHECK_STATEID_OLD
);
3274 /* Caller needs to know about confirmation before closure */
3275 if (sp
->rs_owner
->ro_need_confirm
)
3276 return (NFS4_CHECK_STATEID_UNCONFIRMED
);
3278 if (sp
->rs_closed
== TRUE
)
3279 return (NFS4_CHECK_STATEID_CLOSED
);
3281 return (NFS4_CHECK_STATEID_OKAY
);
3285 rfs4_check_lo_stateid_seqid(rfs4_lo_state_t
*lsp
, stateid4
*stateid
)
3287 stateid_t
*id
= (stateid_t
*)stateid
;
3289 if (rfs4_lease_expired(lsp
->rls_state
->rs_owner
->ro_client
))
3290 return (NFS4_CHECK_STATEID_EXPIRED
);
3292 /* Stateid is some time in the future - that's bad */
3293 if (lsp
->rls_lockid
.bits
.chgseq
< id
->bits
.chgseq
)
3294 return (NFS4_CHECK_STATEID_BAD
);
3296 if (lsp
->rls_lockid
.bits
.chgseq
== id
->bits
.chgseq
+ 1)
3297 return (NFS4_CHECK_STATEID_REPLAY
);
3299 /* Stateid is some time in the past - that's old */
3300 if (lsp
->rls_lockid
.bits
.chgseq
> id
->bits
.chgseq
)
3301 return (NFS4_CHECK_STATEID_OLD
);
3303 if (lsp
->rls_state
->rs_closed
== TRUE
)
3304 return (NFS4_CHECK_STATEID_CLOSED
);
3306 return (NFS4_CHECK_STATEID_OKAY
);
3310 rfs4_get_deleg_state(stateid4
*stateid
, rfs4_deleg_state_t
**dspp
)
3312 stateid_t
*id
= (stateid_t
*)stateid
;
3313 rfs4_deleg_state_t
*dsp
;
3317 dsp
= rfs4_finddelegstate(id
);
3319 return (what_stateid_error(id
, DELEGID
));
3322 if (rfs4_lease_expired(dsp
->rds_client
)) {
3323 rfs4_deleg_state_rele(dsp
);
3324 return (NFS4ERR_EXPIRED
);
3333 rfs4_get_lo_state(stateid4
*stateid
, rfs4_lo_state_t
**lspp
, bool_t lock_fp
)
3335 stateid_t
*id
= (stateid_t
*)stateid
;
3336 rfs4_lo_state_t
*lsp
;
3340 lsp
= rfs4_findlo_state(id
, lock_fp
);
3342 return (what_stateid_error(id
, LOCKID
));
3345 if (rfs4_lease_expired(lsp
->rls_state
->rs_owner
->ro_client
)) {
3346 rfs4_lo_state_rele(lsp
, lock_fp
);
3347 return (NFS4ERR_EXPIRED
);
3356 rfs4_get_all_state(stateid4
*sid
, rfs4_state_t
**spp
,
3357 rfs4_deleg_state_t
**dspp
, rfs4_lo_state_t
**lspp
)
3359 rfs4_state_t
*sp
= NULL
;
3360 rfs4_deleg_state_t
*dsp
= NULL
;
3361 rfs4_lo_state_t
*lsp
= NULL
;
3365 *spp
= NULL
; *dspp
= NULL
; *lspp
= NULL
;
3367 id
= (stateid_t
*)sid
;
3368 switch (id
->bits
.type
) {
3370 status
= rfs4_get_state_lockit(sid
, &sp
, FALSE
, FALSE
);
3373 status
= rfs4_get_deleg_state(sid
, &dsp
);
3376 status
= rfs4_get_lo_state(sid
, &lsp
, FALSE
);
3377 if (status
== NFS4_OK
) {
3378 sp
= lsp
->rls_state
;
3379 rfs4_dbe_hold(sp
->rs_dbe
);
3383 status
= NFS4ERR_BAD_STATEID
;
3386 if (status
== NFS4_OK
) {
3396 * Given the I/O mode (FREAD or FWRITE), this checks whether the
3397 * rfs4_state_t struct has access to do this operation and if so
3398 * return NFS4_OK; otherwise the proper NFSv4 error is returned.
3401 rfs4_state_has_access(rfs4_state_t
*sp
, int mode
, vnode_t
*vp
)
3403 nfsstat4 stat
= NFS4_OK
;
3405 bool_t create
= FALSE
;
3407 rfs4_dbe_lock(sp
->rs_dbe
);
3408 if (mode
== FWRITE
) {
3409 if (!(sp
->rs_share_access
& OPEN4_SHARE_ACCESS_WRITE
)) {
3410 stat
= NFS4ERR_OPENMODE
;
3412 } else if (mode
== FREAD
) {
3413 if (!(sp
->rs_share_access
& OPEN4_SHARE_ACCESS_READ
)) {
3415 * If we have OPENed the file with DENYing access
3416 * to both READ and WRITE then no one else could
3417 * have OPENed the file, hence no conflicting READ
3418 * deny. This check is merely an optimization.
3420 if (sp
->rs_share_deny
== OPEN4_SHARE_DENY_BOTH
)
3423 /* Check against file struct's DENY mode */
3424 fp
= rfs4_findfile(vp
, NULL
, &create
);
3427 rfs4_dbe_lock(fp
->rf_dbe
);
3429 * Check if any other open owner has the file
3430 * OPENed with deny READ.
3432 if (sp
->rs_share_deny
& OPEN4_SHARE_DENY_READ
)
3434 ASSERT(fp
->rf_deny_read
>= deny_read
);
3435 if (fp
->rf_deny_read
> deny_read
)
3436 stat
= NFS4ERR_OPENMODE
;
3437 rfs4_dbe_unlock(fp
->rf_dbe
);
3442 /* Illegal I/O mode */
3443 stat
= NFS4ERR_INVAL
;
3446 rfs4_dbe_unlock(sp
->rs_dbe
);
3451 * Given the I/O mode (FREAD or FWRITE), the vnode, the stateid and whether
3452 * the file is being truncated, return NFS4_OK if allowed or appropriate
3453 * V4 error if not. Note NFS4ERR_DELAY will be returned and a recall on
3454 * the associated file will be done if the I/O is not consistent with any
3455 * delegation in effect on the file. Should be holding fop_rwlock, either
3456 * as reader or writer as appropriate. rfs4_op_open will acquire the
3457 * fop_rwlock as writer when setting up delegation. If the stateid is bad
3458 * this routine will return NFS4ERR_BAD_STATEID. In addition, through the
3459 * deleg parameter, we will return whether a write delegation is held by
3460 * the client associated with this stateid.
3461 * If the server instance associated with the relevant client is in its
3462 * grace period, return NFS4ERR_GRACE.
3466 rfs4_check_stateid(int mode
, vnode_t
*vp
,
3467 stateid4
*stateid
, bool_t trunc
, bool_t
*deleg
,
3468 bool_t do_access
, caller_context_t
*ct
)
3471 bool_t create
= FALSE
;
3473 rfs4_deleg_state_t
*dsp
;
3474 rfs4_lo_state_t
*lsp
;
3475 stateid_t
*id
= (stateid_t
*)stateid
;
3476 nfsstat4 stat
= NFS4_OK
;
3481 ct
->cc_caller_id
= nfs4_srv_caller_id
;
3482 ct
->cc_flags
= CC_DONTBLOCK
;
3485 if (ISSPECIAL(stateid
)) {
3486 fp
= rfs4_findfile(vp
, NULL
, &create
);
3489 if (fp
->rf_dinfo
.rd_dtype
== OPEN_DELEGATE_NONE
) {
3493 if (mode
== FWRITE
||
3494 fp
->rf_dinfo
.rd_dtype
== OPEN_DELEGATE_WRITE
) {
3495 rfs4_recall_deleg(fp
, trunc
, NULL
);
3497 return (NFS4ERR_DELAY
);
3502 stat
= rfs4_get_all_state(stateid
, &sp
, &dsp
, &lsp
);
3503 if (stat
!= NFS4_OK
)
3506 /* Is associated server instance in its grace period? */
3507 if (rfs4_clnt_in_grace(lsp
->rls_locker
->rl_client
)) {
3508 rfs4_lo_state_rele(lsp
, FALSE
);
3510 rfs4_state_rele_nounlock(sp
);
3511 return (NFS4ERR_GRACE
);
3513 if (id
->bits
.type
== LOCKID
) {
3514 /* Seqid in the future? - that's bad */
3515 if (lsp
->rls_lockid
.bits
.chgseq
<
3517 rfs4_lo_state_rele(lsp
, FALSE
);
3519 rfs4_state_rele_nounlock(sp
);
3520 return (NFS4ERR_BAD_STATEID
);
3522 /* Seqid in the past? - that's old */
3523 if (lsp
->rls_lockid
.bits
.chgseq
>
3525 rfs4_lo_state_rele(lsp
, FALSE
);
3527 rfs4_state_rele_nounlock(sp
);
3528 return (NFS4ERR_OLD_STATEID
);
3530 /* Ensure specified filehandle matches */
3531 if (lsp
->rls_state
->rs_finfo
->rf_vp
!= vp
) {
3532 rfs4_lo_state_rele(lsp
, FALSE
);
3534 rfs4_state_rele_nounlock(sp
);
3535 return (NFS4ERR_BAD_STATEID
);
3540 lsp
->rls_locker
->rl_client
->rc_sysidt
;
3541 ct
->cc_pid
= lsp
->rls_locker
->rl_pid
;
3543 rfs4_lo_state_rele(lsp
, FALSE
);
3546 /* Stateid provided was an "open" stateid */
3548 /* Is associated server instance in its grace period? */
3549 if (rfs4_clnt_in_grace(sp
->rs_owner
->ro_client
)) {
3550 rfs4_state_rele_nounlock(sp
);
3551 return (NFS4ERR_GRACE
);
3553 if (id
->bits
.type
== OPENID
) {
3554 /* Seqid in the future? - that's bad */
3555 if (sp
->rs_stateid
.bits
.chgseq
<
3557 rfs4_state_rele_nounlock(sp
);
3558 return (NFS4ERR_BAD_STATEID
);
3560 /* Seqid in the past - that's old */
3561 if (sp
->rs_stateid
.bits
.chgseq
>
3563 rfs4_state_rele_nounlock(sp
);
3564 return (NFS4ERR_OLD_STATEID
);
3567 /* Ensure specified filehandle matches */
3568 if (sp
->rs_finfo
->rf_vp
!= vp
) {
3569 rfs4_state_rele_nounlock(sp
);
3570 return (NFS4ERR_BAD_STATEID
);
3573 if (sp
->rs_owner
->ro_need_confirm
) {
3574 rfs4_state_rele_nounlock(sp
);
3575 return (NFS4ERR_BAD_STATEID
);
3578 if (sp
->rs_closed
== TRUE
) {
3579 rfs4_state_rele_nounlock(sp
);
3580 return (NFS4ERR_OLD_STATEID
);
3584 stat
= rfs4_state_has_access(sp
, mode
, vp
);
3589 * Return whether this state has write
3590 * delegation if desired
3592 if (deleg
&& (sp
->rs_finfo
->rf_dinfo
.rd_dtype
==
3593 OPEN_DELEGATE_WRITE
))
3597 * We got a valid stateid, so we update the
3598 * lease on the client. Ideally we would like
3599 * to do this after the calling op succeeds,
3600 * but for now this will be good
3601 * enough. Callers of this routine are
3602 * currently insulated from the state stuff.
3604 rfs4_update_lease(sp
->rs_owner
->ro_client
);
3607 * If a delegation is present on this file and
3608 * this is a WRITE, then update the lastwrite
3609 * time to indicate that activity is present.
3611 if (sp
->rs_finfo
->rf_dinfo
.rd_dtype
==
3612 OPEN_DELEGATE_WRITE
&&
3614 sp
->rs_finfo
->rf_dinfo
.rd_time_lastwrite
=
3618 rfs4_state_rele_nounlock(sp
);
3624 /* Is associated server instance in its grace period? */
3625 if (rfs4_clnt_in_grace(dsp
->rds_client
)) {
3626 rfs4_deleg_state_rele(dsp
);
3627 return (NFS4ERR_GRACE
);
3629 if (dsp
->rds_delegid
.bits
.chgseq
!= id
->bits
.chgseq
) {
3630 rfs4_deleg_state_rele(dsp
);
3631 return (NFS4ERR_BAD_STATEID
);
3634 /* Ensure specified filehandle matches */
3635 if (dsp
->rds_finfo
->rf_vp
!= vp
) {
3636 rfs4_deleg_state_rele(dsp
);
3637 return (NFS4ERR_BAD_STATEID
);
3640 * Return whether this state has write
3641 * delegation if desired
3643 if (deleg
&& (dsp
->rds_finfo
->rf_dinfo
.rd_dtype
==
3644 OPEN_DELEGATE_WRITE
))
3647 rfs4_update_lease(dsp
->rds_client
);
3650 * If a delegation is present on this file and
3651 * this is a WRITE, then update the lastwrite
3652 * time to indicate that activity is present.
3654 if (dsp
->rds_finfo
->rf_dinfo
.rd_dtype
==
3655 OPEN_DELEGATE_WRITE
&& mode
== FWRITE
) {
3656 dsp
->rds_finfo
->rf_dinfo
.rd_time_lastwrite
=
3661 * XXX - what happens if this is a WRITE and the
3662 * delegation type of for READ.
3664 rfs4_deleg_state_rele(dsp
);
3669 * If we got this far, something bad happened
3671 return (NFS4ERR_BAD_STATEID
);
3677 * This is a special function in that for the file struct provided the
3678 * server wants to remove/close all current state associated with the
3679 * file. The prime use of this would be with OP_REMOVE to force the
3680 * release of state and particularly of file locks.
3682 * There is an assumption that there is no delegations outstanding on
3683 * this file at this point. The caller should have waited for those
3684 * to be returned or revoked.
3687 rfs4_close_all_state(rfs4_file_t
*fp
)
3691 rfs4_dbe_lock(fp
->rf_dbe
);
3694 /* only applies when server is handing out delegations */
3695 if (rfs4_deleg_policy
!= SRV_NEVER_DELEGATE
)
3696 ASSERT(fp
->rf_dinfo
.rd_hold_grant
> 0);
3699 /* No delegations for this file */
3700 ASSERT(list_is_empty(&fp
->rf_delegstatelist
));
3702 /* Make sure that it can not be found */
3703 rfs4_dbe_invalidate(fp
->rf_dbe
);
3705 if (fp
->rf_vp
== NULL
) {
3706 rfs4_dbe_unlock(fp
->rf_dbe
);
3709 rfs4_dbe_unlock(fp
->rf_dbe
);
3712 * Hold as writer to prevent other server threads from
3713 * processing requests related to the file while all state is
3716 rw_enter(&fp
->rf_file_rwlock
, RW_WRITER
);
3718 /* Remove ALL state from the file */
3719 while (sp
= rfs4_findstate_by_file(fp
)) {
3720 rfs4_state_close(sp
, FALSE
, FALSE
, CRED());
3721 rfs4_state_rele_nounlock(sp
);
3725 * This is only safe since there are no further references to
3728 rfs4_dbe_lock(fp
->rf_dbe
);
3730 vnode_t
*vp
= fp
->rf_vp
;
3732 mutex_enter(&vp
->v_vsd_lock
);
3733 (void) vsd_set(vp
, nfs4_srv_vkey
, NULL
);
3734 mutex_exit(&vp
->v_vsd_lock
);
3738 rfs4_dbe_unlock(fp
->rf_dbe
);
3740 /* Finally let other references to proceed */
3741 rw_exit(&fp
->rf_file_rwlock
);
3745 * This function is used as a target for the rfs4_dbe_walk() call
3746 * below. The purpose of this function is to see if the
3747 * lockowner_state refers to a file that resides within the exportinfo
3748 * export. If so, then remove the lock_owner state (file locks and
3749 * share "locks") for this object since the intent is the server is
3750 * unexporting the specified directory. Be sure to invalidate the
3751 * object after the state has been released
3754 rfs4_lo_state_walk_callout(rfs4_entry_t u_entry
, void *e
)
3756 rfs4_lo_state_t
*lsp
= (rfs4_lo_state_t
*)u_entry
;
3757 struct exportinfo
*exi
= (struct exportinfo
*)e
;
3758 nfs_fh4_fmt_t fhfmt4
, *exi_fhp
, *finfo_fhp
;
3761 efhp
= (fhandle_t
*)&exi
->exi_fh
;
3762 exi_fhp
= (nfs_fh4_fmt_t
*)&fhfmt4
;
3764 FH_TO_FMT4(efhp
, exi_fhp
);
3766 finfo_fhp
= (nfs_fh4_fmt_t
*)lsp
->rls_state
->rs_finfo
->
3767 rf_filehandle
.nfs_fh4_val
;
3769 if (EQFSID(&finfo_fhp
->fh4_fsid
, &exi_fhp
->fh4_fsid
) &&
3770 bcmp(&finfo_fhp
->fh4_xdata
, &exi_fhp
->fh4_xdata
,
3771 exi_fhp
->fh4_xlen
) == 0) {
3772 rfs4_state_close(lsp
->rls_state
, FALSE
, FALSE
, CRED());
3773 rfs4_dbe_invalidate(lsp
->rls_dbe
);
3774 rfs4_dbe_invalidate(lsp
->rls_state
->rs_dbe
);
3779 * This function is used as a target for the rfs4_dbe_walk() call
3780 * below. The purpose of this function is to see if the state refers
3781 * to a file that resides within the exportinfo export. If so, then
3782 * remove the open state for this object since the intent is the
3783 * server is unexporting the specified directory. The main result for
3784 * this type of entry is to invalidate it such it will not be found in
3788 rfs4_state_walk_callout(rfs4_entry_t u_entry
, void *e
)
3790 rfs4_state_t
*sp
= (rfs4_state_t
*)u_entry
;
3791 struct exportinfo
*exi
= (struct exportinfo
*)e
;
3792 nfs_fh4_fmt_t fhfmt4
, *exi_fhp
, *finfo_fhp
;
3795 efhp
= (fhandle_t
*)&exi
->exi_fh
;
3796 exi_fhp
= (nfs_fh4_fmt_t
*)&fhfmt4
;
3798 FH_TO_FMT4(efhp
, exi_fhp
);
3801 (nfs_fh4_fmt_t
*)sp
->rs_finfo
->rf_filehandle
.nfs_fh4_val
;
3803 if (EQFSID(&finfo_fhp
->fh4_fsid
, &exi_fhp
->fh4_fsid
) &&
3804 bcmp(&finfo_fhp
->fh4_xdata
, &exi_fhp
->fh4_xdata
,
3805 exi_fhp
->fh4_xlen
) == 0) {
3806 rfs4_state_close(sp
, TRUE
, FALSE
, CRED());
3807 rfs4_dbe_invalidate(sp
->rs_dbe
);
3812 * This function is used as a target for the rfs4_dbe_walk() call
3813 * below. The purpose of this function is to see if the state refers
3814 * to a file that resides within the exportinfo export. If so, then
3815 * remove the deleg state for this object since the intent is the
3816 * server is unexporting the specified directory. The main result for
3817 * this type of entry is to invalidate it such it will not be found in
3821 rfs4_deleg_state_walk_callout(rfs4_entry_t u_entry
, void *e
)
3823 rfs4_deleg_state_t
*dsp
= (rfs4_deleg_state_t
*)u_entry
;
3824 struct exportinfo
*exi
= (struct exportinfo
*)e
;
3825 nfs_fh4_fmt_t fhfmt4
, *exi_fhp
, *finfo_fhp
;
3828 efhp
= (fhandle_t
*)&exi
->exi_fh
;
3829 exi_fhp
= (nfs_fh4_fmt_t
*)&fhfmt4
;
3831 FH_TO_FMT4(efhp
, exi_fhp
);
3834 (nfs_fh4_fmt_t
*)dsp
->rds_finfo
->rf_filehandle
.nfs_fh4_val
;
3836 if (EQFSID(&finfo_fhp
->fh4_fsid
, &exi_fhp
->fh4_fsid
) &&
3837 bcmp(&finfo_fhp
->fh4_xdata
, &exi_fhp
->fh4_xdata
,
3838 exi_fhp
->fh4_xlen
) == 0) {
3839 rfs4_dbe_invalidate(dsp
->rds_dbe
);
3844 * This function is used as a target for the rfs4_dbe_walk() call
3845 * below. The purpose of this function is to see if the state refers
3846 * to a file that resides within the exportinfo export. If so, then
3847 * release vnode hold for this object since the intent is the server
3848 * is unexporting the specified directory. Invalidation will prevent
3849 * this struct from being found in the future.
3852 rfs4_file_walk_callout(rfs4_entry_t u_entry
, void *e
)
3854 rfs4_file_t
*fp
= (rfs4_file_t
*)u_entry
;
3855 struct exportinfo
*exi
= (struct exportinfo
*)e
;
3856 nfs_fh4_fmt_t fhfmt4
, *exi_fhp
, *finfo_fhp
;
3859 efhp
= (fhandle_t
*)&exi
->exi_fh
;
3860 exi_fhp
= (nfs_fh4_fmt_t
*)&fhfmt4
;
3862 FH_TO_FMT4(efhp
, exi_fhp
);
3864 finfo_fhp
= (nfs_fh4_fmt_t
*)fp
->rf_filehandle
.nfs_fh4_val
;
3866 if (EQFSID(&finfo_fhp
->fh4_fsid
, &exi_fhp
->fh4_fsid
) &&
3867 bcmp(&finfo_fhp
->fh4_xdata
, &exi_fhp
->fh4_xdata
,
3868 exi_fhp
->fh4_xlen
) == 0) {
3870 vnode_t
*vp
= fp
->rf_vp
;
3873 * don't leak monitors and remove the reference
3874 * put on the vnode when the delegation was granted.
3876 if (fp
->rf_dinfo
.rd_dtype
== OPEN_DELEGATE_READ
) {
3877 (void) fem_uninstall(vp
, &deleg_rdops
, fp
);
3878 vn_open_downgrade(vp
, FREAD
);
3879 } else if (fp
->rf_dinfo
.rd_dtype
==
3880 OPEN_DELEGATE_WRITE
) {
3881 (void) fem_uninstall(vp
, &deleg_wrops
, fp
);
3882 vn_open_downgrade(vp
, FREAD
|FWRITE
);
3884 mutex_enter(&vp
->v_vsd_lock
);
3885 (void) vsd_set(vp
, nfs4_srv_vkey
, NULL
);
3886 mutex_exit(&vp
->v_vsd_lock
);
3890 rfs4_dbe_invalidate(fp
->rf_dbe
);
3895 * Given a directory that is being unexported, cleanup/release all
3896 * state in the server that refers to objects residing underneath this
3897 * particular export. The ordering of the release is important.
3898 * Lock_owner, then state and then file.
3901 rfs4_clean_state_exi(struct exportinfo
*exi
)
3903 mutex_enter(&rfs4_state_lock
);
3905 if (rfs4_server_state
== NULL
) {
3906 mutex_exit(&rfs4_state_lock
);
3910 rfs4_dbe_walk(rfs4_lo_state_tab
, rfs4_lo_state_walk_callout
, exi
);
3911 rfs4_dbe_walk(rfs4_state_tab
, rfs4_state_walk_callout
, exi
);
3912 rfs4_dbe_walk(rfs4_deleg_state_tab
, rfs4_deleg_state_walk_callout
, exi
);
3913 rfs4_dbe_walk(rfs4_file_tab
, rfs4_file_walk_callout
, exi
);
3915 mutex_exit(&rfs4_state_lock
);