Merge commit 'ea01a15a654b9e1c7b37d958f4d1911882ed7781'
[unleashed.git] / kernel / fs / nfs / nfs4_dispatch.c
blobfbff936e09128f6dc0092c49de4f5a4c6a7d8188
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #include <sys/systm.h>
28 #include <sys/sdt.h>
29 #include <rpc/types.h>
30 #include <rpc/auth.h>
31 #include <rpc/auth_unix.h>
32 #include <rpc/auth_des.h>
33 #include <rpc/svc.h>
34 #include <rpc/xdr.h>
35 #include <nfs/nfs4.h>
36 #include <nfs/nfs_dispatch.h>
37 #include <nfs/nfs4_drc.h>
39 #define NFS4_MAX_MINOR_VERSION 0
42 * This is the duplicate request cache for NFSv4
44 rfs4_drc_t *nfs4_drc = NULL;
47 * The default size of the duplicate request cache
49 uint32_t nfs4_drc_max = 8 * 1024;
52 * The number of buckets we'd like to hash the
53 * replies into.. do not change this on the fly.
55 uint32_t nfs4_drc_hash = 541;
57 static void rfs4_resource_err(struct svc_req *req, COMPOUND4args *argsp);
60 * Initialize a duplicate request cache.
62 rfs4_drc_t *
63 rfs4_init_drc(uint32_t drc_size, uint32_t drc_hash_size)
65 rfs4_drc_t *drc;
66 uint32_t bki;
68 ASSERT(drc_size);
69 ASSERT(drc_hash_size);
71 drc = kmem_alloc(sizeof (rfs4_drc_t), KM_SLEEP);
73 drc->max_size = drc_size;
74 drc->in_use = 0;
76 mutex_init(&drc->lock, NULL, MUTEX_DEFAULT, NULL);
78 drc->dr_hash = drc_hash_size;
80 drc->dr_buckets = kmem_alloc(sizeof (list_t)*drc_hash_size, KM_SLEEP);
82 for (bki = 0; bki < drc_hash_size; bki++) {
83 list_create(&drc->dr_buckets[bki], sizeof (rfs4_dupreq_t),
84 offsetof(rfs4_dupreq_t, dr_bkt_next));
87 list_create(&(drc->dr_cache), sizeof (rfs4_dupreq_t),
88 offsetof(rfs4_dupreq_t, dr_next));
90 return (drc);
94 * Destroy a duplicate request cache.
96 void
97 rfs4_fini_drc(rfs4_drc_t *drc)
99 rfs4_dupreq_t *drp, *drp_next;
101 ASSERT(drc);
103 /* iterate over the dr_cache and free the enties */
104 for (drp = list_head(&(drc->dr_cache)); drp != NULL; drp = drp_next) {
106 if (drp->dr_state == NFS4_DUP_REPLAY)
107 rfs4_compound_free(&(drp->dr_res));
109 if (drp->dr_addr.buf != NULL)
110 kmem_free(drp->dr_addr.buf, drp->dr_addr.maxlen);
112 drp_next = list_next(&(drc->dr_cache), drp);
114 kmem_free(drp, sizeof (rfs4_dupreq_t));
117 mutex_destroy(&drc->lock);
118 kmem_free(drc->dr_buckets,
119 sizeof (list_t)*drc->dr_hash);
120 kmem_free(drc, sizeof (rfs4_drc_t));
124 * rfs4_dr_chstate:
126 * Change the state of a rfs4_dupreq. If it's not in transition
127 * to the FREE state, return. If we are moving to the FREE state
128 * then we need to clean up the compound results and move the entry
129 * to the end of the list.
131 void
132 rfs4_dr_chstate(rfs4_dupreq_t *drp, int new_state)
134 rfs4_drc_t *drc;
136 ASSERT(drp);
137 ASSERT(drp->drc);
138 ASSERT(drp->dr_bkt);
139 ASSERT(MUTEX_HELD(&drp->drc->lock));
141 drp->dr_state = new_state;
143 if (new_state != NFS4_DUP_FREE)
144 return;
146 drc = drp->drc;
149 * Remove entry from the bucket and
150 * dr_cache list, free compound results.
152 list_remove(drp->dr_bkt, drp);
153 list_remove(&(drc->dr_cache), drp);
154 rfs4_compound_free(&(drp->dr_res));
158 * rfs4_alloc_dr:
160 * Malloc a new one if we have not reached our maximum cache
161 * limit, otherwise pick an entry off the tail -- Use if it
162 * is marked as NFS4_DUP_FREE, or is an entry in the
163 * NFS4_DUP_REPLAY state.
165 rfs4_dupreq_t *
166 rfs4_alloc_dr(rfs4_drc_t *drc)
168 rfs4_dupreq_t *drp_tail, *drp = NULL;
170 ASSERT(drc);
171 ASSERT(MUTEX_HELD(&drc->lock));
174 * Have we hit the cache limit yet ?
176 if (drc->in_use < drc->max_size) {
178 * nope, so let's malloc a new one
180 drp = kmem_zalloc(sizeof (rfs4_dupreq_t), KM_SLEEP);
181 drp->drc = drc;
182 drc->in_use++;
183 DTRACE_PROBE1(nfss__i__drc_new, rfs4_dupreq_t *, drp);
184 return (drp);
188 * Cache is all allocated now traverse the list
189 * backwards to find one we can reuse.
191 for (drp_tail = list_tail(&drc->dr_cache); drp_tail != NULL;
192 drp_tail = list_prev(&drc->dr_cache, drp_tail)) {
194 switch (drp_tail->dr_state) {
196 case NFS4_DUP_FREE:
197 list_remove(&(drc->dr_cache), drp_tail);
198 DTRACE_PROBE1(nfss__i__drc_freeclaim,
199 rfs4_dupreq_t *, drp_tail);
200 return (drp_tail);
201 /* NOTREACHED */
203 case NFS4_DUP_REPLAY:
204 /* grab it. */
205 rfs4_dr_chstate(drp_tail, NFS4_DUP_FREE);
206 DTRACE_PROBE1(nfss__i__drc_replayclaim,
207 rfs4_dupreq_t *, drp_tail);
208 return (drp_tail);
209 /* NOTREACHED */
212 DTRACE_PROBE1(nfss__i__drc_full, rfs4_drc_t *, drc);
213 return (NULL);
217 * rfs4_find_dr:
219 * Search for an entry in the duplicate request cache by
220 * calculating the hash index based on the XID, and examining
221 * the entries in the hash bucket. If we find a match, return.
222 * Once we have searched the bucket we call rfs4_alloc_dr() to
223 * allocate a new entry, or reuse one that is available.
226 rfs4_find_dr(struct svc_req *req, rfs4_drc_t *drc, rfs4_dupreq_t **dup)
229 uint32_t the_xid;
230 list_t *dr_bkt;
231 rfs4_dupreq_t *drp;
232 int bktdex;
235 * Get the XID, calculate the bucket and search to
236 * see if we need to replay from the cache.
238 the_xid = req->rq_xprt->xp_xid;
239 bktdex = the_xid % drc->dr_hash;
241 dr_bkt = (list_t *)
242 &(drc->dr_buckets[(the_xid % drc->dr_hash)]);
244 DTRACE_PROBE3(nfss__i__drc_bktdex,
245 int, bktdex,
246 uint32_t, the_xid,
247 list_t *, dr_bkt);
249 *dup = NULL;
251 mutex_enter(&drc->lock);
253 * Search the bucket for a matching xid and address.
255 for (drp = list_head(dr_bkt); drp != NULL;
256 drp = list_next(dr_bkt, drp)) {
258 if (drp->dr_xid == the_xid &&
259 drp->dr_addr.len == req->rq_xprt->xp_rtaddr.len &&
260 bcmp((caddr_t)drp->dr_addr.buf,
261 (caddr_t)req->rq_xprt->xp_rtaddr.buf,
262 drp->dr_addr.len) == 0) {
265 * Found a match so REPLAY the Reply
267 if (drp->dr_state == NFS4_DUP_REPLAY) {
268 rfs4_dr_chstate(drp, NFS4_DUP_INUSE);
269 mutex_exit(&drc->lock);
270 *dup = drp;
271 DTRACE_PROBE1(nfss__i__drc_replay,
272 rfs4_dupreq_t *, drp);
273 return (NFS4_DUP_REPLAY);
277 * This entry must be in transition, so return
278 * the 'pending' status.
280 mutex_exit(&drc->lock);
281 return (NFS4_DUP_PENDING);
285 drp = rfs4_alloc_dr(drc);
286 mutex_exit(&drc->lock);
289 * The DRC is full and all entries are in use. Upper function
290 * should error out this request and force the client to
291 * retransmit -- effectively this is a resource issue. NFSD
292 * threads tied up with native File System, or the cache size
293 * is too small for the server load.
295 if (drp == NULL)
296 return (NFS4_DUP_ERROR);
299 * Init the state to NEW.
301 drp->dr_state = NFS4_DUP_NEW;
304 * If needed, resize the address buffer
306 if (drp->dr_addr.maxlen < req->rq_xprt->xp_rtaddr.len) {
307 if (drp->dr_addr.buf != NULL)
308 kmem_free(drp->dr_addr.buf, drp->dr_addr.maxlen);
309 drp->dr_addr.maxlen = req->rq_xprt->xp_rtaddr.len;
310 drp->dr_addr.buf = kmem_alloc(drp->dr_addr.maxlen, KM_NOSLEEP);
311 if (drp->dr_addr.buf == NULL) {
313 * If the malloc fails, mark the entry
314 * as free and put on the tail.
316 drp->dr_addr.maxlen = 0;
317 drp->dr_state = NFS4_DUP_FREE;
318 mutex_enter(&drc->lock);
319 list_insert_tail(&(drc->dr_cache), drp);
320 mutex_exit(&drc->lock);
321 return (NFS4_DUP_ERROR);
327 * Copy the address.
329 drp->dr_addr.len = req->rq_xprt->xp_rtaddr.len;
331 bcopy((caddr_t)req->rq_xprt->xp_rtaddr.buf,
332 (caddr_t)drp->dr_addr.buf,
333 drp->dr_addr.len);
335 drp->dr_xid = the_xid;
336 drp->dr_bkt = dr_bkt;
339 * Insert at the head of the bucket and
340 * the drc lists..
342 mutex_enter(&drc->lock);
343 list_insert_head(&drc->dr_cache, drp);
344 list_insert_head(dr_bkt, drp);
345 mutex_exit(&drc->lock);
347 *dup = drp;
349 return (NFS4_DUP_NEW);
354 * This function handles the duplicate request cache,
355 * NULL_PROC and COMPOUND procedure calls for NFSv4;
357 * Passed into this function are:-
359 * disp A pointer to our dispatch table entry
360 * req The request to process
361 * xprt The server transport handle
362 * ap A pointer to the arguments
365 * When appropriate this function is responsible for inserting
366 * the reply into the duplicate cache or replaying an existing
367 * cached reply.
369 * dr_stat reflects the state of the duplicate request that
370 * has been inserted into or retrieved from the cache
372 * drp is the duplicate request entry
376 rfs4_dispatch(struct rpcdisp *disp, struct svc_req *req,
377 SVCXPRT *xprt, char *ap)
380 COMPOUND4res res_buf;
381 COMPOUND4res *rbp;
382 COMPOUND4args *cap;
383 cred_t *cr = NULL;
384 int error = 0;
385 int dis_flags = 0;
386 int dr_stat = NFS4_NOT_DUP;
387 rfs4_dupreq_t *drp = NULL;
388 int rv;
390 ASSERT(disp);
393 * Short circuit the RPC_NULL proc.
395 if (disp->dis_proc == rpc_null) {
396 DTRACE_NFSV4_1(null__start, struct svc_req *, req);
397 if (!svc_sendreply(xprt, xdr_void, NULL)) {
398 DTRACE_NFSV4_1(null__done, struct svc_req *, req);
399 svcerr_systemerr(xprt);
400 return (1);
402 DTRACE_NFSV4_1(null__done, struct svc_req *, req);
403 return (0);
406 /* Only NFSv4 Compounds from this point onward */
408 rbp = &res_buf;
409 cap = (COMPOUND4args *)ap;
412 * Figure out the disposition of the whole COMPOUND
413 * and record it's IDEMPOTENTCY.
415 rfs4_compound_flagproc(cap, &dis_flags);
418 * If NON-IDEMPOTENT then we need to figure out if this
419 * request can be replied from the duplicate cache.
421 * If this is a new request then we need to insert the
422 * reply into the duplicate cache.
424 if (!(dis_flags & RPC_IDEMPOTENT)) {
425 /* look for a replay from the cache or allocate */
426 dr_stat = rfs4_find_dr(req, nfs4_drc, &drp);
428 switch (dr_stat) {
430 case NFS4_DUP_ERROR:
431 rfs4_resource_err(req, cap);
432 return (1);
433 /* NOTREACHED */
435 case NFS4_DUP_PENDING:
437 * reply has previously been inserted into the
438 * duplicate cache, however the reply has
439 * not yet been sent via svc_sendreply()
441 return (1);
442 /* NOTREACHED */
444 case NFS4_DUP_NEW:
445 curthread->t_flag |= T_DONTPEND;
446 /* NON-IDEMPOTENT proc call */
447 rfs4_compound(cap, rbp, NULL, req, cr, &rv);
448 curthread->t_flag &= ~T_DONTPEND;
450 if (rv) /* short ckt sendreply on error */
451 return (rv);
454 * dr_res must be initialized before calling
455 * rfs4_dr_chstate (it frees the reply).
457 drp->dr_res = res_buf;
458 if (curthread->t_flag & T_WOULDBLOCK) {
459 curthread->t_flag &= ~T_WOULDBLOCK;
461 * mark this entry as FREE and plop
462 * on the end of the cache list
464 mutex_enter(&drp->drc->lock);
465 rfs4_dr_chstate(drp, NFS4_DUP_FREE);
466 list_insert_tail(&(drp->drc->dr_cache), drp);
467 mutex_exit(&drp->drc->lock);
468 return (1);
470 break;
472 case NFS4_DUP_REPLAY:
473 /* replay from the cache */
474 rbp = &(drp->dr_res);
475 break;
477 } else {
478 curthread->t_flag |= T_DONTPEND;
479 /* IDEMPOTENT proc call */
480 rfs4_compound(cap, rbp, NULL, req, cr, &rv);
481 curthread->t_flag &= ~T_DONTPEND;
483 if (rv) /* short ckt sendreply on error */
484 return (rv);
486 if (curthread->t_flag & T_WOULDBLOCK) {
487 curthread->t_flag &= ~T_WOULDBLOCK;
488 return (1);
493 * Send out the replayed reply or the 'real' one.
495 if (!svc_sendreply(xprt, xdr_COMPOUND4res_srv, (char *)rbp)) {
496 DTRACE_PROBE2(nfss__e__dispatch_sendfail,
497 struct svc_req *, xprt,
498 char *, rbp);
499 svcerr_systemerr(xprt);
500 error++;
504 * If this reply was just inserted into the duplicate cache
505 * or it was replayed from the dup cache; (re)mark it as
506 * available for replay
508 * At first glance, this 'if' statement seems a little strange;
509 * testing for NFS4_DUP_REPLAY, and then calling...
511 * rfs4_dr_chatate(NFS4_DUP_REPLAY)
513 * ... but notice that we are checking dr_stat, and not the
514 * state of the entry itself, the entry will be NFS4_DUP_INUSE,
515 * we do that so that we know not to prematurely reap it whilst
516 * we resent it to the client.
519 if (dr_stat == NFS4_DUP_NEW || dr_stat == NFS4_DUP_REPLAY) {
520 mutex_enter(&drp->drc->lock);
521 rfs4_dr_chstate(drp, NFS4_DUP_REPLAY);
522 mutex_exit(&drp->drc->lock);
523 } else if (dr_stat == NFS4_NOT_DUP) {
524 rfs4_compound_free(rbp);
527 return (error);
530 bool_t
531 rfs4_minorvers_mismatch(struct svc_req *req, SVCXPRT *xprt, void *args)
533 COMPOUND4args *argsp;
534 COMPOUND4res res_buf, *resp;
536 if (req->rq_vers != 4)
537 return (FALSE);
539 argsp = (COMPOUND4args *)args;
541 if (argsp->minorversion <= NFS4_MAX_MINOR_VERSION)
542 return (FALSE);
544 resp = &res_buf;
547 * Form a reply tag by copying over the reqeuest tag.
549 resp->tag.utf8string_val =
550 kmem_alloc(argsp->tag.utf8string_len, KM_SLEEP);
551 resp->tag.utf8string_len = argsp->tag.utf8string_len;
552 bcopy(argsp->tag.utf8string_val, resp->tag.utf8string_val,
553 resp->tag.utf8string_len);
554 resp->array_len = 0;
555 resp->array = NULL;
556 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
557 if (!svc_sendreply(xprt, xdr_COMPOUND4res_srv, (char *)resp)) {
558 DTRACE_PROBE2(nfss__e__minorvers_mismatch,
559 SVCXPRT *, xprt, char *, resp);
560 svcerr_systemerr(xprt);
562 rfs4_compound_free(resp);
563 return (TRUE);
566 void
567 rfs4_resource_err(struct svc_req *req, COMPOUND4args *argsp)
569 COMPOUND4res res_buf, *rbp;
570 nfs_resop4 *resop;
571 PUTFH4res *resp;
573 rbp = &res_buf;
576 * Form a reply tag by copying over the request tag.
578 rbp->tag.utf8string_val =
579 kmem_alloc(argsp->tag.utf8string_len, KM_SLEEP);
580 rbp->tag.utf8string_len = argsp->tag.utf8string_len;
581 bcopy(argsp->tag.utf8string_val, rbp->tag.utf8string_val,
582 rbp->tag.utf8string_len);
584 rbp->array_len = 1;
585 rbp->array = kmem_zalloc(rbp->array_len * sizeof (nfs_resop4),
586 KM_SLEEP);
587 resop = &rbp->array[0];
588 resop->resop = argsp->array[0].argop; /* copy first op over */
590 /* Any op will do, just need to access status field */
591 resp = &resop->nfs_resop4_u.opputfh;
594 * NFS4ERR_RESOURCE is allowed for all ops, except OP_ILLEGAL.
595 * Note that all op numbers in the compound array were already
596 * validated by the XDR decoder (xdr_COMPOUND4args_srv()).
598 resp->status = (resop->resop == OP_ILLEGAL ?
599 NFS4ERR_OP_ILLEGAL : NFS4ERR_RESOURCE);
601 /* compound status is same as first op status */
602 rbp->status = resp->status;
604 if (!svc_sendreply(req->rq_xprt, xdr_COMPOUND4res_srv, (char *)rbp)) {
605 DTRACE_PROBE2(nfss__rsrc_err__sendfail,
606 struct svc_req *, req->rq_xprt, char *, rbp);
607 svcerr_systemerr(req->rq_xprt);
610 UTF8STRING_FREE(rbp->tag);
611 kmem_free(rbp->array, rbp->array_len * sizeof (nfs_resop4));