4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T
32 * Portions of this source code were derived from Berkeley 4.3 BSD
33 * under license from the Regents of the University of California.
38 * Implements a kernel based, client side RPC.
41 #include <sys/param.h>
42 #include <sys/types.h>
43 #include <sys/systm.h>
44 #include <sys/sysmacros.h>
45 #include <sys/stream.h>
46 #include <sys/strsubr.h>
48 #include <sys/tiuser.h>
49 #include <sys/tihdr.h>
50 #include <sys/t_kuser.h>
51 #include <sys/errno.h>
53 #include <sys/debug.h>
54 #include <sys/kstat.h>
55 #include <sys/t_lock.h>
56 #include <sys/cmn_err.h>
59 #include <sys/taskq.h>
61 #include <sys/atomic.h>
63 #include <netinet/in.h>
64 #include <rpc/types.h>
68 #include <rpc/rpc_msg.h>
72 static enum clnt_stat
clnt_clts_kcallit(CLIENT
*, rpcproc_t
, xdrproc_t
,
73 caddr_t
, xdrproc_t
, caddr_t
, struct timeval
);
74 static void clnt_clts_kabort(CLIENT
*);
75 static void clnt_clts_kerror(CLIENT
*, struct rpc_err
*);
76 static bool_t
clnt_clts_kfreeres(CLIENT
*, xdrproc_t
, caddr_t
);
77 static bool_t
clnt_clts_kcontrol(CLIENT
*, int, char *);
78 static void clnt_clts_kdestroy(CLIENT
*);
79 static int clnt_clts_ksettimers(CLIENT
*, struct rpc_timers
*,
80 struct rpc_timers
*, int, void (*)(), caddr_t
, uint32_t);
83 * Operations vector for CLTS based RPC
85 static struct clnt_ops clts_ops
= {
86 clnt_clts_kcallit
, /* do rpc call */
87 clnt_clts_kabort
, /* abort call */
88 clnt_clts_kerror
, /* return error status */
89 clnt_clts_kfreeres
, /* free results */
90 clnt_clts_kdestroy
, /* destroy rpc handle */
91 clnt_clts_kcontrol
, /* the ioctl() of rpc */
92 clnt_clts_ksettimers
/* set retry timers */
96 * Endpoint for CLTS (INET, INET6, loopback, etc.)
98 typedef struct endpnt_type
{
99 struct endpnt_type
*e_next
; /* pointer to next endpoint type */
100 list_t e_pool
; /* list of available endpoints */
101 list_t e_ilist
; /* list of idle endpoints */
102 struct endpnt
*e_pcurr
; /* pointer to current endpoint */
103 char e_protofmly
[KNC_STRSIZE
]; /* protocol family */
104 dev_t e_rdev
; /* device */
105 kmutex_t e_plock
; /* pool lock */
106 kmutex_t e_ilock
; /* idle list lock */
107 timeout_id_t e_itimer
; /* timer to dispatch the taskq */
108 uint_t e_cnt
; /* number of endpoints in the pool */
109 zoneid_t e_zoneid
; /* zoneid of endpoint type */
110 kcondvar_t e_async_cv
; /* cv for asynchronous reap threads */
111 uint_t e_async_count
; /* count of asynchronous reap threads */
114 typedef struct endpnt
{
115 list_node_t e_node
; /* link to the pool */
116 list_node_t e_idle
; /* link to the idle list */
117 endpnt_type_t
*e_type
; /* back pointer to endpoint type */
118 TIUSER
*e_tiptr
; /* pointer to transport endpoint */
119 queue_t
*e_wq
; /* write queue */
120 uint_t e_flags
; /* endpoint flags */
121 uint_t e_ref
; /* ref count on endpoint */
122 kcondvar_t e_cv
; /* condition variable */
123 kmutex_t e_lock
; /* protects cv and flags */
124 time_t e_itime
; /* time when rele'd */
127 #define ENDPNT_ESTABLISHED 0x1 /* endpoint is established */
128 #define ENDPNT_WAITING 0x2 /* thread waiting for endpoint */
129 #define ENDPNT_BOUND 0x4 /* endpoint is bound */
130 #define ENDPNT_STALE 0x8 /* endpoint is dead */
131 #define ENDPNT_ONIDLE 0x10 /* endpoint is on the idle list */
133 static krwlock_t endpnt_type_lock
; /* protects endpnt_type_list */
134 static endpnt_type_t
*endpnt_type_list
= NULL
; /* list of CLTS endpoints */
135 static struct kmem_cache
*endpnt_cache
; /* cache of endpnt_t's */
136 static taskq_t
*endpnt_taskq
; /* endpnt_t reaper thread */
137 static bool_t taskq_created
; /* flag for endpnt_taskq */
138 static kmutex_t endpnt_taskq_lock
; /* taskq lock */
139 static zone_key_t endpnt_destructor_key
;
141 #define DEFAULT_ENDPOINT_REAP_INTERVAL 60 /* 1 minute */
142 #define DEFAULT_INTERVAL_SHIFT 30 /* 30 seconds */
147 static int clnt_clts_max_endpoints
= -1;
148 static int clnt_clts_hash_size
= DEFAULT_HASH_SIZE
;
149 static time_t clnt_clts_endpoint_reap_interval
= -1;
150 static clock_t clnt_clts_taskq_dispatch_interval
;
153 * Response completion hash queue
155 static call_table_t
*clts_call_ht
;
158 * Routines for the endpoint manager
160 static struct endpnt_type
*endpnt_type_create(struct knetconfig
*);
161 static void endpnt_type_free(struct endpnt_type
*);
162 static int check_endpnt(struct endpnt
*, struct endpnt
**);
163 static struct endpnt
*endpnt_get(struct knetconfig
*, int);
164 static void endpnt_rele(struct endpnt
*);
165 static void endpnt_reap_settimer(endpnt_type_t
*);
166 static void endpnt_reap(endpnt_type_t
*);
167 static void endpnt_reap_dispatch(void *);
168 static void endpnt_reclaim(zoneid_t
);
172 * Request dipatching function.
174 static int clnt_clts_dispatch_send(queue_t
*q
, mblk_t
*, struct netbuf
*addr
,
175 calllist_t
*, uint_t
, cred_t
*);
178 * The size of the preserialized RPC header information.
180 #define CKU_HDRSIZE 20
182 * The initial allocation size. It is small to reduce space requirements.
184 #define CKU_INITSIZE 2048
186 * The size of additional allocations, if required. It is larger to
187 * reduce the number of actual allocations.
189 #define CKU_ALLOCSIZE 8192
192 * Private data per rpc handle. This structure is allocated by
193 * clnt_clts_kcreate, and freed by clnt_clts_kdestroy.
196 CLIENT cku_client
; /* client handle */
197 int cku_retrys
; /* request retrys */
199 struct endpnt
*cku_endpnt
; /* open end point */
200 struct knetconfig cku_config
;
201 struct netbuf cku_addr
; /* remote address */
202 struct rpc_err cku_err
; /* error status */
203 XDR cku_outxdr
; /* xdr stream for output */
204 XDR cku_inxdr
; /* xdr stream for input */
205 char cku_rpchdr
[CKU_HDRSIZE
+ 4]; /* rpc header */
206 struct cred
*cku_cred
; /* credentials */
207 struct rpc_timers
*cku_timers
; /* for estimating RTT */
208 struct rpc_timers
*cku_timeall
; /* for estimating RTT */
209 void (*cku_feedback
)(int, int, caddr_t
);
210 /* ptr to feedback rtn */
211 caddr_t cku_feedarg
; /* argument for feedback func */
212 uint32_t cku_xid
; /* current XID */
213 bool_t cku_bcast
; /* RPC broadcast hint */
214 int cku_useresvport
; /* Use reserved port */
215 struct rpc_clts_client
*cku_stats
; /* counters for the zone */
218 static const struct rpc_clts_client
{
219 kstat_named_t rccalls
;
220 kstat_named_t rcbadcalls
;
221 kstat_named_t rcretrans
;
222 kstat_named_t rcbadxids
;
223 kstat_named_t rctimeouts
;
224 kstat_named_t rcnewcreds
;
225 kstat_named_t rcbadverfs
;
226 kstat_named_t rctimers
;
227 kstat_named_t rcnomem
;
228 kstat_named_t rccantsend
;
229 } clts_rcstat_tmpl
= {
230 { "calls", KSTAT_DATA_UINT64
},
231 { "badcalls", KSTAT_DATA_UINT64
},
232 { "retrans", KSTAT_DATA_UINT64
},
233 { "badxids", KSTAT_DATA_UINT64
},
234 { "timeouts", KSTAT_DATA_UINT64
},
235 { "newcreds", KSTAT_DATA_UINT64
},
236 { "badverfs", KSTAT_DATA_UINT64
},
237 { "timers", KSTAT_DATA_UINT64
},
238 { "nomem", KSTAT_DATA_UINT64
},
239 { "cantsend", KSTAT_DATA_UINT64
},
242 static uint_t clts_rcstat_ndata
=
243 sizeof (clts_rcstat_tmpl
) / sizeof (kstat_named_t
);
245 #define RCSTAT_INCR(s, x) \
246 atomic_add_64(&(s)->x.value.ui64, 1)
248 #define ptoh(p) (&((p)->cku_client))
249 #define htop(h) ((struct cku_private *)((h)->cl_private))
255 #define REFRESHES 2 /* authentication refreshes */
258 * The following is used to determine the global default behavior for
259 * CLTS when binding to a local port.
261 * If the value is set to 1 the default will be to select a reserved
262 * (aka privileged) port, if the value is zero the default will be to
263 * use non-reserved ports. Users of kRPC may override this by using
264 * CLNT_CONTROL() and CLSET_BINDRESVPORT.
266 static int clnt_clts_do_bindresvport
= 1;
268 #define BINDRESVPORT_RETRIES 5
271 clnt_clts_stats_init(zoneid_t zoneid
, struct rpc_clts_client
**statsp
)
276 knp
= rpcstat_zone_init_common(zoneid
, "unix", "rpc_clts_client",
277 (const kstat_named_t
*)&clts_rcstat_tmpl
,
278 sizeof (clts_rcstat_tmpl
));
280 * Backwards compatibility for old kstat clients
282 ksp
= kstat_create_zone("unix", 0, "rpc_client", "rpc",
283 KSTAT_TYPE_NAMED
, clts_rcstat_ndata
,
284 KSTAT_FLAG_VIRTUAL
| KSTAT_FLAG_WRITABLE
, zoneid
);
289 *statsp
= (struct rpc_clts_client
*)knp
;
293 clnt_clts_stats_fini(zoneid_t zoneid
, struct rpc_clts_client
**statsp
)
295 rpcstat_zone_fini_common(zoneid
, "unix", "rpc_clts_client");
296 kstat_delete_byname_zone("unix", 0, "rpc_client", zoneid
);
297 kmem_free(*statsp
, sizeof (clts_rcstat_tmpl
));
301 * Create an rpc handle for a clts rpc connection.
302 * Allocates space for the handle structure and the private data.
306 clnt_clts_kcreate(struct knetconfig
*config
, struct netbuf
*addr
,
307 rpcprog_t pgm
, rpcvers_t vers
, int retrys
, struct cred
*cred
,
311 struct cku_private
*p
;
312 struct rpc_msg call_msg
;
322 p
= kmem_zalloc(sizeof (*p
), KM_SLEEP
);
327 h
->cl_ops
= &clts_ops
;
328 h
->cl_private
= (caddr_t
)p
;
329 h
->cl_auth
= authkern_create();
331 /* call message, just used to pre-serialize below */
333 call_msg
.rm_direction
= CALL
;
334 call_msg
.rm_call
.cb_rpcvers
= RPC_MSG_VERSION
;
335 call_msg
.rm_call
.cb_prog
= pgm
;
336 call_msg
.rm_call
.cb_vers
= vers
;
339 clnt_clts_kinit(h
, addr
, retrys
, cred
);
341 xdrmem_create(&p
->cku_outxdr
, p
->cku_rpchdr
, CKU_HDRSIZE
, XDR_ENCODE
);
343 /* pre-serialize call message header */
344 if (!xdr_callhdr(&p
->cku_outxdr
, &call_msg
)) {
345 error
= EINVAL
; /* XXX */
349 p
->cku_config
.knc_rdev
= config
->knc_rdev
;
350 p
->cku_config
.knc_semantics
= config
->knc_semantics
;
351 plen
= strlen(config
->knc_protofmly
) + 1;
352 p
->cku_config
.knc_protofmly
= kmem_alloc(plen
, KM_SLEEP
);
353 bcopy(config
->knc_protofmly
, p
->cku_config
.knc_protofmly
, plen
);
354 p
->cku_useresvport
= -1; /* value is has not been set */
356 cv_init(&p
->cku_call
.call_cv
, NULL
, CV_DEFAULT
, NULL
);
357 mutex_init(&p
->cku_call
.call_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
363 auth_destroy(h
->cl_auth
);
364 kmem_free(p
->cku_addr
.buf
, addr
->maxlen
);
365 kmem_free(p
, sizeof (struct cku_private
));
371 clnt_clts_kinit(CLIENT
*h
, struct netbuf
*addr
, int retrys
, cred_t
*cred
)
373 /* LINTED pointer alignment */
374 struct cku_private
*p
= htop(h
);
377 rsp
= zone_getspecific(rpcstat_zone_key
, rpc_zone());
380 p
->cku_retrys
= retrys
;
382 if (p
->cku_addr
.maxlen
< addr
->len
) {
383 if (p
->cku_addr
.maxlen
!= 0 && p
->cku_addr
.buf
!= NULL
)
384 kmem_free(p
->cku_addr
.buf
, p
->cku_addr
.maxlen
);
386 p
->cku_addr
.buf
= kmem_zalloc(addr
->maxlen
, KM_SLEEP
);
387 p
->cku_addr
.maxlen
= addr
->maxlen
;
390 p
->cku_addr
.len
= addr
->len
;
391 bcopy(addr
->buf
, p
->cku_addr
.buf
, addr
->len
);
395 p
->cku_timers
= NULL
;
396 p
->cku_timeall
= NULL
;
397 p
->cku_feedback
= NULL
;
398 p
->cku_bcast
= FALSE
;
399 p
->cku_call
.call_xid
= 0;
400 p
->cku_call
.call_hash
= 0;
401 p
->cku_call
.call_notified
= FALSE
;
402 p
->cku_call
.call_next
= NULL
;
403 p
->cku_call
.call_prev
= NULL
;
404 p
->cku_call
.call_reply
= NULL
;
405 p
->cku_call
.call_wq
= NULL
;
406 p
->cku_stats
= rsp
->rpc_clts_client
;
410 * set the timers. Return current retransmission timeout.
413 clnt_clts_ksettimers(CLIENT
*h
, struct rpc_timers
*t
, struct rpc_timers
*all
,
414 int minimum
, void (*feedback
)(int, int, caddr_t
), caddr_t arg
,
417 /* LINTED pointer alignment */
418 struct cku_private
*p
= htop(h
);
421 p
->cku_feedback
= feedback
;
422 p
->cku_feedarg
= arg
;
424 p
->cku_timeall
= all
;
427 value
= all
->rt_rtxcur
;
428 value
+= t
->rt_rtxcur
;
431 RCSTAT_INCR(p
->cku_stats
, rctimers
);
436 * Time out back off function. tim is in HZ
438 #define MAXTIMO (20 * hz)
439 #define backoff(tim) (((tim) < MAXTIMO) ? dobackoff(tim) : (tim))
440 #define dobackoff(tim) ((((tim) << 1) > MAXTIMO) ? MAXTIMO : ((tim) << 1))
442 #define RETRY_POLL_TIMO 30
445 * Call remote procedure.
446 * Most of the work of rpc is done here. We serialize what is left
447 * of the header (some was pre-serialized in the handle), serialize
448 * the arguments, and send it off. We wait for a reply or a time out.
449 * Timeout causes an immediate return, other packet problems may cause
450 * a retry on the receive. When a good packet is received we deserialize
451 * it, and check verification. A bad reply code will cause one retry
452 * with full (longhand) credentials.
455 clnt_clts_kcallit_addr(CLIENT
*h
, rpcproc_t procnum
, xdrproc_t xdr_args
,
456 caddr_t argsp
, xdrproc_t xdr_results
, caddr_t resultsp
,
457 struct timeval wait
, struct netbuf
*sin
)
459 /* LINTED pointer alignment */
460 struct cku_private
*p
= htop(h
);
462 int stries
= p
->cku_retrys
;
463 int refreshes
= REFRESHES
; /* number of times to refresh cred */
464 int round_trip
; /* time the RPC */
470 calllist_t
*call
= &p
->cku_call
;
471 clock_t ori_timout
, timout
;
473 enum clnt_stat status
;
474 struct rpc_msg reply_msg
;
475 enum clnt_stat re_status
;
478 RCSTAT_INCR(p
->cku_stats
, rccalls
);
480 RPCLOG(2, "clnt_clts_kcallit_addr: wait.tv_sec: %ld\n", wait
.tv_sec
);
481 RPCLOG(2, "clnt_clts_kcallit_addr: wait.tv_usec: %ld\n", wait
.tv_usec
);
483 timout
= TIMEVAL_TO_TICK(&wait
);
486 if (p
->cku_xid
== 0) {
487 p
->cku_xid
= alloc_xid();
488 if (p
->cku_endpnt
!= NULL
)
489 endpnt_rele(p
->cku_endpnt
);
490 p
->cku_endpnt
= NULL
;
492 call
->call_zoneid
= rpc_zoneid();
499 while ((mp
= allocb(CKU_INITSIZE
, BPRI_LO
)) == NULL
) {
500 if (strwaitbuf(CKU_INITSIZE
, BPRI_LO
)) {
501 p
->cku_err
.re_status
= RPC_SYSTEMERROR
;
502 p
->cku_err
.re_errno
= ENOSR
;
507 xdrs
= &p
->cku_outxdr
;
508 xdrmblk_init(xdrs
, mp
, XDR_ENCODE
, CKU_ALLOCSIZE
);
510 if (h
->cl_auth
->ah_cred
.oa_flavor
!= RPCSEC_GSS
) {
512 * Copy in the preserialized RPC header
515 bcopy(p
->cku_rpchdr
, mp
->b_rptr
, CKU_HDRSIZE
);
518 * transaction id is the 1st thing in the output
521 /* LINTED pointer alignment */
522 (*(uint32_t *)(mp
->b_rptr
)) = p
->cku_xid
;
524 /* Skip the preserialized stuff. */
525 XDR_SETPOS(xdrs
, CKU_HDRSIZE
);
527 /* Serialize dynamic stuff into the output buffer. */
528 if ((!XDR_PUTINT32(xdrs
, (int32_t *)&procnum
)) ||
529 (!AUTH_MARSHALL(h
->cl_auth
, xdrs
, p
->cku_cred
)) ||
530 (!(*xdr_args
)(xdrs
, argsp
))) {
532 p
->cku_err
.re_status
= RPC_CANTENCODEARGS
;
533 p
->cku_err
.re_errno
= EIO
;
537 uint32_t *uproc
= (uint32_t *)
538 &p
->cku_rpchdr
[CKU_HDRSIZE
];
539 IXDR_PUT_U_INT32(uproc
, procnum
);
541 (*(uint32_t *)(&p
->cku_rpchdr
[0])) = p
->cku_xid
;
544 /* Serialize the procedure number and the arguments. */
545 if (!AUTH_WRAP(h
->cl_auth
, (caddr_t
)p
->cku_rpchdr
,
546 CKU_HDRSIZE
+4, xdrs
, xdr_args
, argsp
)) {
548 p
->cku_err
.re_status
= RPC_CANTENCODEARGS
;
549 p
->cku_err
.re_errno
= EIO
;
559 p
->cku_err
.re_status
= RPC_SYSTEMERROR
;
560 p
->cku_err
.re_errno
= ENOSR
;
565 * Grab an endpnt only if the endpoint is NULL. We could be retrying
566 * the request and in this case we want to go through the same
567 * source port, so that the duplicate request cache may detect a
571 if (p
->cku_endpnt
== NULL
)
572 p
->cku_endpnt
= endpnt_get(&p
->cku_config
, p
->cku_useresvport
);
574 if (p
->cku_endpnt
== NULL
) {
576 p
->cku_err
.re_status
= RPC_SYSTEMERROR
;
577 p
->cku_err
.re_errno
= ENOSR
;
581 round_trip
= ddi_get_lbolt();
583 error
= clnt_clts_dispatch_send(p
->cku_endpnt
->e_wq
, mp
,
584 &p
->cku_addr
, call
, p
->cku_xid
, p
->cku_cred
);
588 p
->cku_err
.re_status
= RPC_CANTSEND
;
589 p
->cku_err
.re_errno
= error
;
590 RCSTAT_INCR(p
->cku_stats
, rccantsend
);
594 RPCLOG(64, "clnt_clts_kcallit_addr: sent call for xid 0x%x\n",
598 * There are two reasons for which we go back to to tryread.
600 * a) In case the status is RPC_PROCUNAVAIL and we sent out a
601 * broadcast we should not get any invalid messages with the
602 * RPC_PROCUNAVAIL error back. Some broken RPC implementations
603 * send them and for this we have to ignore them ( as we would
604 * have never received them ) and look for another message
605 * which might contain the valid response because we don't know
606 * how many broken implementations are in the network. So we are
607 * going to loop until
608 * - we received a valid response
609 * - we have processed all invalid responses and
610 * got a time out when we try to receive again a
613 * b) We will jump back to tryread also in case we failed
614 * within the AUTH_VALIDATE. In this case we should move
615 * on and loop until we received a valid response or we
616 * have processed all responses with broken authentication
617 * and we got a time out when we try to receive a message.
620 mutex_enter(&call
->call_lock
);
622 if (call
->call_notified
== FALSE
) {
623 klwp_t
*lwp
= ttolwp(curthread
);
624 clock_t cv_wait_ret
= 1; /* init to > 0 */
625 clock_t cv_timout
= timout
;
630 cv_timout
+= ddi_get_lbolt();
633 while ((cv_wait_ret
=
634 cv_timedwait(&call
->call_cv
,
635 &call
->call_lock
, cv_timout
)) > 0 &&
636 call
->call_notified
== FALSE
)
639 while ((cv_wait_ret
=
640 cv_timedwait_sig(&call
->call_cv
,
641 &call
->call_lock
, cv_timout
)) > 0 &&
642 call
->call_notified
== FALSE
)
645 if (cv_wait_ret
== 0)
651 resp
= call
->call_reply
;
652 call
->call_reply
= NULL
;
653 status
= call
->call_status
;
655 * We have to reset the call_notified here. In case we have
656 * to do a retry ( e.g. in case we got a RPC_PROCUNAVAIL
657 * error ) we need to set this to false to ensure that
658 * we will wait for the next message. When the next message
659 * is going to arrive the function clnt_clts_dispatch_notify
660 * will set this to true again.
662 call
->call_notified
= FALSE
;
663 call
->call_status
= RPC_TIMEDOUT
;
664 mutex_exit(&call
->call_lock
);
666 if (status
== RPC_TIMEDOUT
) {
669 * We got interrupted, bail out
671 p
->cku_err
.re_status
= RPC_INTR
;
672 p
->cku_err
.re_errno
= EINTR
;
675 RPCLOG(8, "clnt_clts_kcallit_addr: "
676 "request w/xid 0x%x timedout "
677 "waiting for reply\n", p
->cku_xid
);
678 #if 0 /* XXX not yet */
680 * Timeout may be due to a dead gateway. Send
681 * an ioctl downstream advising deletion of
682 * route when we reach the half-way point to
685 if (stries
== p
->cku_retrys
/2) {
686 t_kadvise(p
->cku_endpnt
->e_tiptr
,
687 (uchar_t
*)p
->cku_addr
.buf
,
691 p
->cku_err
.re_status
= RPC_TIMEDOUT
;
692 p
->cku_err
.re_errno
= ETIMEDOUT
;
693 RCSTAT_INCR(p
->cku_stats
, rctimeouts
);
698 ASSERT(resp
!= NULL
);
701 * Prepare the message for further processing. We need to remove
702 * the datagram header and copy the source address if necessary. No
703 * need to verify the header since rpcmod took care of that.
706 * Copy the source address if the caller has supplied a netbuf.
709 union T_primitives
*pptr
;
711 pptr
= (union T_primitives
*)resp
->b_rptr
;
712 bcopy(resp
->b_rptr
+ pptr
->unitdata_ind
.SRC_offset
, sin
->buf
,
713 pptr
->unitdata_ind
.SRC_length
);
714 sin
->len
= pptr
->unitdata_ind
.SRC_length
;
718 * Pop off the datagram header.
719 * It was retained in rpcmodrput().
726 round_trip
= ddi_get_lbolt() - round_trip
;
728 * Van Jacobson timer algorithm here, only if NOT a retransmission.
730 if (p
->cku_timers
!= NULL
&& stries
== p
->cku_retrys
) {
734 rt
-= (p
->cku_timers
->rt_srtt
>> 3);
735 p
->cku_timers
->rt_srtt
+= rt
;
738 rt
-= (p
->cku_timers
->rt_deviate
>> 2);
739 p
->cku_timers
->rt_deviate
+= rt
;
740 p
->cku_timers
->rt_rtxcur
=
741 (clock_t)((p
->cku_timers
->rt_srtt
>> 2) +
742 p
->cku_timers
->rt_deviate
) >> 1;
745 rt
-= (p
->cku_timeall
->rt_srtt
>> 3);
746 p
->cku_timeall
->rt_srtt
+= rt
;
749 rt
-= (p
->cku_timeall
->rt_deviate
>> 2);
750 p
->cku_timeall
->rt_deviate
+= rt
;
751 p
->cku_timeall
->rt_rtxcur
=
752 (clock_t)((p
->cku_timeall
->rt_srtt
>> 2) +
753 p
->cku_timeall
->rt_deviate
) >> 1;
754 if (p
->cku_feedback
!= NULL
) {
755 (*p
->cku_feedback
)(FEEDBACK_OK
, procnum
,
763 xdrs
= &(p
->cku_inxdr
);
764 xdrmblk_init(xdrs
, resp
, XDR_DECODE
, 0);
766 reply_msg
.rm_direction
= REPLY
;
767 reply_msg
.rm_reply
.rp_stat
= MSG_ACCEPTED
;
768 reply_msg
.acpted_rply
.ar_stat
= SUCCESS
;
769 reply_msg
.acpted_rply
.ar_verf
= _null_auth
;
771 * xdr_results will be done in AUTH_UNWRAP.
773 reply_msg
.acpted_rply
.ar_results
.where
= NULL
;
774 reply_msg
.acpted_rply
.ar_results
.proc
= xdr_void
;
777 * Decode and validate the response.
779 if (!xdr_replymsg(xdrs
, &reply_msg
)) {
780 p
->cku_err
.re_status
= RPC_CANTDECODERES
;
781 p
->cku_err
.re_errno
= EIO
;
782 (void) xdr_rpc_free_verifier(xdrs
, &reply_msg
);
786 _seterr_reply(&reply_msg
, &(p
->cku_err
));
788 re_status
= p
->cku_err
.re_status
;
789 if (re_status
== RPC_SUCCESS
) {
791 * Reply is good, check auth.
793 if (!AUTH_VALIDATE(h
->cl_auth
,
794 &reply_msg
.acpted_rply
.ar_verf
)) {
795 p
->cku_err
.re_status
= RPC_AUTHERROR
;
796 p
->cku_err
.re_why
= AUTH_INVALIDRESP
;
797 RCSTAT_INCR(p
->cku_stats
, rcbadverfs
);
798 (void) xdr_rpc_free_verifier(xdrs
, &reply_msg
);
801 if (!AUTH_UNWRAP(h
->cl_auth
, xdrs
, xdr_results
, resultsp
)) {
802 p
->cku_err
.re_status
= RPC_CANTDECODERES
;
803 p
->cku_err
.re_errno
= EIO
;
805 (void) xdr_rpc_free_verifier(xdrs
, &reply_msg
);
808 /* set errno in case we can't recover */
809 if (re_status
!= RPC_VERSMISMATCH
&&
810 re_status
!= RPC_AUTHERROR
&& re_status
!= RPC_PROGVERSMISMATCH
)
811 p
->cku_err
.re_errno
= EIO
;
813 * Determine whether or not we're doing an RPC
814 * broadcast. Some server implementations don't
815 * follow RFC 1050, section 7.4.2 in that they
816 * don't remain silent when they see a proc
817 * they don't support. Therefore we keep trying
818 * to receive on RPC_PROCUNAVAIL, hoping to get
819 * a valid response from a compliant server.
821 if (re_status
== RPC_PROCUNAVAIL
&& p
->cku_bcast
) {
822 (void) xdr_rpc_free_verifier(xdrs
, &reply_msg
);
825 if (re_status
== RPC_AUTHERROR
) {
827 (void) xdr_rpc_free_verifier(xdrs
, &reply_msg
);
828 call_table_remove(call
);
829 if (call
->call_reply
!= NULL
) {
830 freemsg(call
->call_reply
);
831 call
->call_reply
= NULL
;
835 * Maybe our credential need to be refreshed
838 AUTH_REFRESH(h
->cl_auth
, &reply_msg
, p
->cku_cred
)) {
840 * The credential is refreshed. Try the request again.
841 * Even if stries == 0, we still retry as long as
842 * refreshes > 0. This prevents a soft authentication
843 * error turning into a hard one at an upper level.
846 RCSTAT_INCR(p
->cku_stats
, rcbadcalls
);
847 RCSTAT_INCR(p
->cku_stats
, rcnewcreds
);
856 * We have used the client handle to do an AUTH_REFRESH
857 * and the RPC status may be set to RPC_SUCCESS;
858 * Let's make sure to set it to RPC_AUTHERROR.
860 p
->cku_err
.re_status
= RPC_CANTDECODERES
;
863 * Map recoverable and unrecoverable
864 * authentication errors to appropriate errno
866 switch (p
->cku_err
.re_why
) {
869 * Could be an nfsportmon failure, set
870 * useresvport and try again.
872 if (p
->cku_useresvport
!= 1) {
873 p
->cku_useresvport
= 1;
880 endpt
= p
->cku_endpnt
;
881 if (endpt
->e_tiptr
!= NULL
) {
882 mutex_enter(&endpt
->e_lock
);
883 endpt
->e_flags
&= ~ENDPNT_BOUND
;
884 (void) t_kclose(endpt
->e_tiptr
, 1);
885 endpt
->e_tiptr
= NULL
;
886 mutex_exit(&endpt
->e_lock
);
890 p
->cku_xid
= alloc_xid();
891 endpnt_rele(p
->cku_endpnt
);
892 p
->cku_endpnt
= NULL
;
898 case AUTH_INVALIDRESP
:
900 case RPCSEC_GSS_NOCRED
:
901 case RPCSEC_GSS_FAILED
:
902 p
->cku_err
.re_errno
= EACCES
;
904 case AUTH_REJECTEDCRED
:
905 case AUTH_REJECTEDVERF
:
907 p
->cku_err
.re_errno
= EIO
;
910 RPCLOG(1, "clnt_clts_kcallit : authentication failed "
911 "with RPC_AUTHERROR of type %d\n",
916 (void) xdr_rpc_free_verifier(xdrs
, &reply_msg
);
919 call_table_remove(call
);
920 if (call
->call_reply
!= NULL
) {
921 freemsg(call
->call_reply
);
922 call
->call_reply
= NULL
;
924 RPCLOG(64, "clnt_clts_kcallit_addr: xid 0x%x taken off dispatch list",
933 if ((p
->cku_err
.re_status
!= RPC_SUCCESS
) &&
934 (p
->cku_err
.re_status
!= RPC_INTR
) &&
935 (p
->cku_err
.re_status
!= RPC_UDERROR
) &&
936 !IS_UNRECOVERABLE_RPC(p
->cku_err
.re_status
)) {
937 if (p
->cku_feedback
!= NULL
&& stries
== p
->cku_retrys
) {
938 (*p
->cku_feedback
)(FEEDBACK_REXMIT1
, procnum
,
942 timout
= backoff(timout
);
943 if (p
->cku_timeall
!= (struct rpc_timers
*)0)
944 p
->cku_timeall
->rt_rtxcur
= timout
;
946 if (p
->cku_err
.re_status
== RPC_SYSTEMERROR
||
947 p
->cku_err
.re_status
== RPC_CANTSEND
) {
949 * Errors due to lack of resources, wait a bit
955 RCSTAT_INCR(p
->cku_stats
, rcretrans
);
963 if (p
->cku_err
.re_status
!= RPC_SUCCESS
) {
964 RCSTAT_INCR(p
->cku_stats
, rcbadcalls
);
968 * Allow the endpoint to be held by the client handle in case this
969 * RPC was not successful. A retry may occur at a higher level and
970 * in this case we may want to send the request over the same
972 * Endpoint is also released for one-way RPC: no reply, nor retransmit
975 if ((p
->cku_err
.re_status
== RPC_SUCCESS
||
976 (p
->cku_err
.re_status
== RPC_TIMEDOUT
&& ori_timout
== 0)) &&
977 p
->cku_endpnt
!= NULL
) {
978 endpnt_rele(p
->cku_endpnt
);
979 p
->cku_endpnt
= NULL
;
981 DTRACE_PROBE2(clnt_clts_kcallit_done
, int, p
->cku_err
.re_status
,
982 struct endpnt
*, p
->cku_endpnt
);
985 return (p
->cku_err
.re_status
);
988 static enum clnt_stat
989 clnt_clts_kcallit(CLIENT
*h
, rpcproc_t procnum
, xdrproc_t xdr_args
,
990 caddr_t argsp
, xdrproc_t xdr_results
, caddr_t resultsp
,
993 return (clnt_clts_kcallit_addr(h
, procnum
, xdr_args
, argsp
,
994 xdr_results
, resultsp
, wait
, NULL
));
998 * Return error info on this handle.
1001 clnt_clts_kerror(CLIENT
*h
, struct rpc_err
*err
)
1003 /* LINTED pointer alignment */
1004 struct cku_private
*p
= htop(h
);
1010 clnt_clts_kfreeres(CLIENT
*h
, xdrproc_t xdr_res
, caddr_t res_ptr
)
1012 /* LINTED pointer alignment */
1013 struct cku_private
*p
= htop(h
);
1016 xdrs
= &(p
->cku_outxdr
);
1017 xdrs
->x_op
= XDR_FREE
;
1018 return ((*xdr_res
)(xdrs
, res_ptr
));
1023 clnt_clts_kabort(CLIENT
*h
)
1028 clnt_clts_kcontrol(CLIENT
*h
, int cmd
, char *arg
)
1030 /* LINTED pointer alignment */
1031 struct cku_private
*p
= htop(h
);
1035 p
->cku_xid
= *((uint32_t *)arg
);
1039 *((uint32_t *)arg
) = p
->cku_xid
;
1043 p
->cku_bcast
= *((uint32_t *)arg
);
1047 *((uint32_t *)arg
) = p
->cku_bcast
;
1049 case CLSET_BINDRESVPORT
:
1053 if (*(int *)arg
!= 1 && *(int *)arg
!= 0)
1056 p
->cku_useresvport
= *(int *)arg
;
1060 case CLGET_BINDRESVPORT
:
1064 *(int *)arg
= p
->cku_useresvport
;
1074 * Destroy rpc handle.
1075 * Frees the space used for output buffer, private data, and handle
1076 * structure, and the file pointer/TLI data on last reference.
1079 clnt_clts_kdestroy(CLIENT
*h
)
1081 /* LINTED pointer alignment */
1082 struct cku_private
*p
= htop(h
);
1083 calllist_t
*call
= &p
->cku_call
;
1087 RPCLOG(8, "clnt_clts_kdestroy h: %p\n", (void *)h
);
1088 RPCLOG(8, "clnt_clts_kdestroy h: xid=0x%x\n", p
->cku_xid
);
1090 if (p
->cku_endpnt
!= NULL
)
1091 endpnt_rele(p
->cku_endpnt
);
1093 cv_destroy(&call
->call_cv
);
1094 mutex_destroy(&call
->call_lock
);
1096 plen
= strlen(p
->cku_config
.knc_protofmly
) + 1;
1097 kmem_free(p
->cku_config
.knc_protofmly
, plen
);
1098 kmem_free(p
->cku_addr
.buf
, p
->cku_addr
.maxlen
);
1099 kmem_free(p
, sizeof (*p
));
1103 * The connectionless (CLTS) kRPC endpoint management subsystem.
1105 * Because endpoints are potentially shared among threads making RPC calls,
1106 * they are managed in a pool according to type (endpnt_type_t). Each
1107 * endpnt_type_t points to a list of usable endpoints through the e_pool
1108 * field, which is of type list_t. list_t is a doubly-linked list.
1109 * The number of endpoints in the pool is stored in the e_cnt field of
1110 * endpnt_type_t and the endpoints are reference counted using the e_ref field
1111 * in the endpnt_t structure.
1113 * As an optimization, endpoints that have no references are also linked
1114 * to an idle list via e_ilist which is also of type list_t. When a thread
1115 * calls endpnt_get() to obtain a transport endpoint, the idle list is first
1116 * consulted and if such an endpoint exists, it is removed from the idle list
1117 * and returned to the caller.
1119 * If the idle list is empty, then a check is made to see if more endpoints
1120 * can be created. If so, we proceed and create a new endpoint which is added
1121 * to the pool and returned to the caller. If we have reached the limit and
1122 * cannot make a new endpoint then one is returned to the caller via round-
1125 * When an endpoint is placed on the idle list by a thread calling
1126 * endpnt_rele(), it is timestamped and then a reaper taskq is scheduled to
1127 * be dispatched if one hasn't already been. When the timer fires, the
1128 * taskq traverses the idle list and checks to see which endpoints are
1129 * eligible to be closed. It determines this by checking if the timestamp
1130 * when the endpoint was released has exceeded the the threshold for how long
1131 * it should stay alive.
1133 * endpnt_t structures remain persistent until the memory reclaim callback,
1134 * endpnt_reclaim(), is invoked.
1136 * Here is an example of how the data structures would be laid out by the
1142 * _______________ ______________
1143 * | e_next |----------------------->| e_next |---->>
1144 * | e_pool |<---+ | e_pool |<----+
1145 * | e_ilist |<---+--+ | e_ilist |<----+--+
1146 * +->| e_pcurr |----+--+--+ +->| e_pcurr |-----+--+--+
1147 * | | ... | | | | | | ... | | | |
1148 * | | e_itimer (90) | | | | | | e_itimer (0) | | | |
1149 * | | e_cnt (1) | | | | | | e_cnt (3) | | | |
1150 * | +---------------+ | | | | +--------------+ | | |
1152 * | endpnt_t | | | | | | |
1153 * | ____________ | | | | ____________ | | |
1154 * | | e_node |<------+ | | | | e_node |<------+ | |
1155 * | | e_idle |<---------+ | | | e_idle | | | |
1156 * +--| e_type |<------------+ +--| e_type | | | |
1157 * | e_tiptr | | | e_tiptr | | | |
1158 * | ... | | | ... | | | |
1159 * | e_lock | | | e_lock | | | |
1160 * | ... | | | ... | | | |
1161 * | e_ref (0) | | | e_ref (2) | | | |
1162 * | e_itime | | | e_itime | | | |
1163 * +------------+ | +------------+ | | |
1166 * | ____________ | | |
1167 * | | e_node |<------+ | |
1168 * | | e_idle |<------+--+ |
1174 * | | e_ref (0) | | |
1176 * | +------------+ | |
1179 * | ____________ | |
1180 * | | e_node |<------+ |
1182 * +--| e_type |<------------+
1191 * Endpoint locking strategy:
1193 * The following functions manipulate lists which hold the endpoint and the
1194 * endpoints themselves:
1196 * endpnt_get()/check_endpnt()/endpnt_rele()/endpnt_reap()/do_endpnt_reclaim()
1198 * Lock description follows:
1200 * endpnt_type_lock: Global reader/writer lock which protects accesses to the
1203 * e_plock: Lock defined in the endpnt_type_t. It is intended to
1204 * protect accesses to the pool of endopints (e_pool) for a given
1207 * e_ilock: Lock defined in endpnt_type_t. It is intended to protect accesses
1208 * to the idle list (e_ilist) of available endpoints for a given
1209 * endpnt_type_t. It also protects access to the e_itimer, e_async_cv,
1210 * and e_async_count fields in endpnt_type_t.
1212 * e_lock: Lock defined in the endpnt structure. It is intended to protect
1213 * flags, cv, and ref count.
1215 * The order goes as follows so as not to induce deadlock.
1217 * endpnt_type_lock -> e_plock -> e_ilock -> e_lock
1219 * Interaction with Zones and shutting down:
1221 * endpnt_type_ts are uniquely identified by the (e_zoneid, e_rdev, e_protofmly)
1222 * tuple, which means that a zone may not reuse another zone's idle endpoints
1223 * without first doing a t_kclose().
1225 * A zone's endpnt_type_ts are destroyed when a zone is shut down; e_async_cv
1226 * and e_async_count are used to keep track of the threads in endpnt_taskq
1227 * trying to reap endpnt_ts in the endpnt_type_t.
1231 * Allocate and initialize an endpnt_type_t
1233 static struct endpnt_type
*
1234 endpnt_type_create(struct knetconfig
*config
)
1236 struct endpnt_type
*etype
;
1239 * Allocate a new endpoint type to hang a list of
1240 * endpoints off of it.
1242 etype
= kmem_alloc(sizeof (struct endpnt_type
), KM_SLEEP
);
1243 etype
->e_next
= NULL
;
1244 etype
->e_pcurr
= NULL
;
1245 etype
->e_itimer
= 0;
1248 (void) strncpy(etype
->e_protofmly
, config
->knc_protofmly
, KNC_STRSIZE
);
1249 mutex_init(&etype
->e_plock
, NULL
, MUTEX_DEFAULT
, NULL
);
1250 mutex_init(&etype
->e_ilock
, NULL
, MUTEX_DEFAULT
, NULL
);
1251 etype
->e_rdev
= config
->knc_rdev
;
1252 etype
->e_zoneid
= rpc_zoneid();
1253 etype
->e_async_count
= 0;
1254 cv_init(&etype
->e_async_cv
, NULL
, CV_DEFAULT
, NULL
);
1256 list_create(&etype
->e_pool
, sizeof (endpnt_t
),
1257 offsetof(endpnt_t
, e_node
));
1258 list_create(&etype
->e_ilist
, sizeof (endpnt_t
),
1259 offsetof(endpnt_t
, e_idle
));
1262 * Check to see if we need to create a taskq for endpoint
1265 mutex_enter(&endpnt_taskq_lock
);
1266 if (taskq_created
== FALSE
) {
1267 taskq_created
= TRUE
;
1268 mutex_exit(&endpnt_taskq_lock
);
1269 ASSERT(endpnt_taskq
== NULL
);
1270 endpnt_taskq
= taskq_create("clts_endpnt_taskq", 1,
1271 minclsyspri
, 200, INT_MAX
, 0);
1273 mutex_exit(&endpnt_taskq_lock
);
1279 * Free an endpnt_type_t
1282 endpnt_type_free(struct endpnt_type
*etype
)
1284 mutex_destroy(&etype
->e_plock
);
1285 mutex_destroy(&etype
->e_ilock
);
1286 list_destroy(&etype
->e_pool
);
1287 list_destroy(&etype
->e_ilist
);
1288 kmem_free(etype
, sizeof (endpnt_type_t
));
1292 * Check the endpoint to ensure that it is suitable for use.
1294 * Possible return values:
1296 * return (1) - Endpoint is established, but needs to be re-opened.
1297 * return (0) && *newp == NULL - Endpoint is established, but unusable.
1298 * return (0) && *newp != NULL - Endpoint is established and usable.
1301 check_endpnt(struct endpnt
*endp
, struct endpnt
**newp
)
1305 mutex_enter(&endp
->e_lock
);
1306 ASSERT(endp
->e_ref
>= 1);
1309 * The first condition we check for is if the endpoint has been
1310 * allocated, but is unusable either because it has been closed or
1311 * has been marked stale. Only *one* thread will be allowed to
1312 * execute the then clause. This is enforced because the first thread
1313 * to check this condition will clear the flags, so that subsequent
1314 * thread(s) checking this endpoint will move on.
1316 if ((endp
->e_flags
& ENDPNT_ESTABLISHED
) &&
1317 (!(endp
->e_flags
& ENDPNT_BOUND
) ||
1318 (endp
->e_flags
& ENDPNT_STALE
))) {
1320 * Clear the flags here since they will be
1321 * set again by this thread. They need to be
1322 * individually cleared because we want to maintain
1323 * the state for ENDPNT_ONIDLE.
1325 endp
->e_flags
&= ~(ENDPNT_ESTABLISHED
|
1326 ENDPNT_WAITING
| ENDPNT_BOUND
| ENDPNT_STALE
);
1327 mutex_exit(&endp
->e_lock
);
1332 * The second condition is meant for any thread that is waiting for
1333 * an endpoint to become established. It will cv_wait() until
1334 * the condition for the endpoint has been changed to ENDPNT_BOUND or
1337 while (!(endp
->e_flags
& ENDPNT_BOUND
) &&
1338 !(endp
->e_flags
& ENDPNT_STALE
)) {
1339 endp
->e_flags
|= ENDPNT_WAITING
;
1340 cv_wait(&endp
->e_cv
, &endp
->e_lock
);
1343 ASSERT(endp
->e_flags
& ENDPNT_ESTABLISHED
);
1346 * The last case we check for is if the endpoint has been marked stale.
1347 * If this is the case then set *newp to NULL and return, so that the
1348 * caller is notified of the error and can take appropriate action.
1350 if (endp
->e_flags
& ENDPNT_STALE
) {
1354 mutex_exit(&endp
->e_lock
);
1360 * Provide a fault injection setting to test error conditions.
1362 static int endpnt_get_return_null
= 0;
1366 * Returns a handle (struct endpnt *) to an open and bound endpoint
1367 * specified by the knetconfig passed in. Returns NULL if no valid endpoint
1370 static struct endpnt
*
1371 endpnt_get(struct knetconfig
*config
, int useresvport
)
1373 struct endpnt_type
*n_etype
= NULL
;
1374 struct endpnt_type
*np
= NULL
;
1375 struct endpnt
*new = NULL
;
1376 struct endpnt
*endp
= NULL
;
1377 struct endpnt
*next
= NULL
;
1378 TIUSER
*tiptr
= NULL
;
1379 int rtries
= BINDRESVPORT_RETRIES
;
1383 zoneid_t zoneid
= rpc_zoneid();
1386 RPCLOG(1, "endpnt_get: protofmly %s, ", config
->knc_protofmly
);
1387 RPCLOG(1, "rdev %ld\n", config
->knc_rdev
);
1391 * Inject fault if desired. Pretend we have a stale endpoint
1394 if (endpnt_get_return_null
> 0) {
1395 endpnt_get_return_null
--;
1399 rw_enter(&endpnt_type_lock
, RW_READER
);
1402 for (np
= endpnt_type_list
; np
!= NULL
; np
= np
->e_next
)
1403 if ((np
->e_zoneid
== zoneid
) &&
1404 (np
->e_rdev
== config
->knc_rdev
) &&
1405 (strcmp(np
->e_protofmly
,
1406 config
->knc_protofmly
) == 0))
1409 if (np
== NULL
&& n_etype
!= NULL
) {
1410 ASSERT(rw_write_held(&endpnt_type_lock
));
1413 * Link the endpoint type onto the list
1415 n_etype
->e_next
= endpnt_type_list
;
1416 endpnt_type_list
= n_etype
;
1423 * The logic here is that we were unable to find an
1424 * endpnt_type_t that matched our criteria, so we allocate a
1425 * new one. Because kmem_alloc() needs to be called with
1426 * KM_SLEEP, we drop our locks so that we don't induce
1427 * deadlock. After allocating and initializing the
1428 * endpnt_type_t, we reaquire the lock and go back to check
1429 * if this entry needs to be added to the list. Since we do
1430 * some operations without any locking other threads may
1431 * have been looking for the same endpnt_type_t and gone
1432 * through this code path. We check for this case and allow
1433 * one thread to link its endpnt_type_t to the list and the
1434 * other threads will simply free theirs.
1436 rw_exit(&endpnt_type_lock
);
1437 n_etype
= endpnt_type_create(config
);
1440 * We need to reaquire the lock with RW_WRITER here so that
1441 * we can safely link the new endpoint type onto the list.
1443 rw_enter(&endpnt_type_lock
, RW_WRITER
);
1447 rw_exit(&endpnt_type_lock
);
1449 * If n_etype is not NULL, then another thread was able to
1450 * insert an endpnt_type_t of this type onto the list before
1451 * we did. Go ahead and free ours.
1453 if (n_etype
!= NULL
)
1454 endpnt_type_free(n_etype
);
1456 mutex_enter(&np
->e_ilock
);
1458 * The algorithm to hand out endpoints is to first
1459 * give out those that are idle if such endpoints
1460 * exist. Otherwise, create a new one if we haven't
1461 * reached the max threshold. Finally, we give out
1462 * endpoints in a pseudo LRU fashion (round-robin).
1464 * Note: The idle list is merely a hint of those endpoints
1465 * that should be idle. There exists a window after the
1466 * endpoint is released and before it is linked back onto the
1467 * idle list where a thread could get a reference to it and
1468 * use it. This is okay, since the reference counts will
1469 * still be consistent.
1471 if ((endp
= (endpnt_t
*)list_head(&np
->e_ilist
)) != NULL
) {
1472 timeout_id_t t_id
= 0;
1474 mutex_enter(&endp
->e_lock
);
1477 endp
->e_flags
&= ~ENDPNT_ONIDLE
;
1478 mutex_exit(&endp
->e_lock
);
1481 * Pop the endpoint off the idle list and hand it off
1483 list_remove(&np
->e_ilist
, endp
);
1485 if (np
->e_itimer
!= 0) {
1486 t_id
= np
->e_itimer
;
1489 mutex_exit(&np
->e_ilock
);
1491 * Reset the idle timer if it has been set
1493 if (t_id
!= (timeout_id_t
)0)
1494 (void) untimeout(t_id
);
1496 if (check_endpnt(endp
, &new) == 0)
1498 } else if (np
->e_cnt
>= clnt_clts_max_endpoints
) {
1500 * There are no idle endpoints currently, so
1501 * create a new one if we have not reached the maximum or
1502 * hand one out in round-robin.
1504 mutex_exit(&np
->e_ilock
);
1505 mutex_enter(&np
->e_plock
);
1507 mutex_enter(&endp
->e_lock
);
1509 mutex_exit(&endp
->e_lock
);
1511 ASSERT(endp
!= NULL
);
1513 * Advance the pointer to the next eligible endpoint, if
1516 if (np
->e_cnt
> 1) {
1517 next
= (endpnt_t
*)list_next(&np
->e_pool
, np
->e_pcurr
);
1519 next
= (endpnt_t
*)list_head(&np
->e_pool
);
1523 mutex_exit(&np
->e_plock
);
1526 * We need to check to see if this endpoint is bound or
1527 * not. If it is in progress then just wait until
1528 * the set up is complete
1530 if (check_endpnt(endp
, &new) == 0)
1533 mutex_exit(&np
->e_ilock
);
1534 mutex_enter(&np
->e_plock
);
1537 * Allocate a new endpoint to use. If we can't allocate any
1538 * more memory then use one that is already established if any
1539 * such endpoints exist.
1541 new = kmem_cache_alloc(endpnt_cache
, KM_NOSLEEP
);
1543 RPCLOG0(1, "endpnt_get: kmem_cache_alloc failed\n");
1545 * Try to recover by using an existing endpoint.
1547 if (np
->e_cnt
<= 0) {
1548 mutex_exit(&np
->e_plock
);
1552 if ((next
= list_next(&np
->e_pool
, np
->e_pcurr
)) !=
1555 ASSERT(endp
!= NULL
);
1556 mutex_enter(&endp
->e_lock
);
1558 mutex_exit(&endp
->e_lock
);
1559 mutex_exit(&np
->e_plock
);
1561 if (check_endpnt(endp
, &new) == 0)
1565 * Partially init an endpoint structure and put
1566 * it on the list, so that other interested threads
1567 * know that one is being created
1569 bzero(new, sizeof (struct endpnt
));
1571 cv_init(&new->e_cv
, NULL
, CV_DEFAULT
, NULL
);
1572 mutex_init(&new->e_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
1577 * Link the endpoint into the pool.
1579 list_insert_head(&np
->e_pool
, new);
1581 if (np
->e_pcurr
== NULL
)
1583 mutex_exit(&np
->e_plock
);
1588 * The transport should be opened with sufficient privs
1591 error
= t_kopen(NULL
, config
->knc_rdev
, FREAD
|FWRITE
|FNDELAY
, &tiptr
,
1594 RPCLOG(1, "endpnt_get: t_kopen: %d\n", error
);
1598 new->e_tiptr
= tiptr
;
1599 rpc_poptimod(tiptr
->fp
->f_vnode
);
1602 * Allow the kernel to push the module on behalf of the user.
1604 error
= strioctl(tiptr
->fp
->f_vnode
, I_PUSH
, (intptr_t)"rpcmod", 0,
1605 K_TO_K
, cr
, &retval
);
1607 RPCLOG(1, "endpnt_get: kstr_push on rpcmod failed %d\n", error
);
1611 error
= strioctl(tiptr
->fp
->f_vnode
, RPC_CLIENT
, 0, 0, K_TO_K
,
1614 RPCLOG(1, "endpnt_get: strioctl failed %d\n", error
);
1619 * Connectionless data flow should bypass the stream head.
1621 new->e_wq
= tiptr
->fp
->f_vnode
->v_stream
->sd_wrq
->q_next
;
1623 error
= strioctl(tiptr
->fp
->f_vnode
, I_PUSH
, (intptr_t)"timod", 0,
1624 K_TO_K
, cr
, &retval
);
1626 RPCLOG(1, "endpnt_get: kstr_push on timod failed %d\n", error
);
1631 * Attempt to bind the endpoint. If we fail then propogate
1632 * error back to calling subsystem, so that it can be handled
1634 * If the caller has not specified reserved port usage then
1635 * take the system default.
1637 if (useresvport
== -1)
1638 useresvport
= clnt_clts_do_bindresvport
;
1641 (strcmp(config
->knc_protofmly
, NC_INET
) == 0 ||
1642 strcmp(config
->knc_protofmly
, NC_INET6
) == 0)) {
1645 bindresvport(new->e_tiptr
, NULL
, NULL
, FALSE
)) != 0) {
1647 "endpnt_get: bindresvport error %d\n", error
);
1648 if (error
!= EPROTO
) {
1656 (void) t_kclose(new->e_tiptr
, 1);
1658 * reopen with all privileges
1660 error
= t_kopen(NULL
, config
->knc_rdev
,
1661 FREAD
|FWRITE
|FNDELAY
,
1664 RPCLOG(1, "endpnt_get: t_kopen: %d\n", error
);
1665 new->e_tiptr
= NULL
;
1669 } else if ((error
= t_kbind(new->e_tiptr
, NULL
, NULL
)) != 0) {
1670 RPCLOG(1, "endpnt_get: t_kbind failed: %d\n", error
);
1675 * Set the flags and notify and waiters that we have an established
1678 mutex_enter(&new->e_lock
);
1679 new->e_flags
|= ENDPNT_ESTABLISHED
;
1680 new->e_flags
|= ENDPNT_BOUND
;
1681 if (new->e_flags
& ENDPNT_WAITING
) {
1682 cv_broadcast(&new->e_cv
);
1683 new->e_flags
&= ~ENDPNT_WAITING
;
1685 mutex_exit(&new->e_lock
);
1690 ASSERT(new != NULL
);
1692 * mark this endpoint as stale and notify any threads waiting
1693 * on this endpoint that it will be going away.
1695 mutex_enter(&new->e_lock
);
1696 if (new->e_ref
> 0) {
1697 new->e_flags
|= ENDPNT_ESTABLISHED
;
1698 new->e_flags
|= ENDPNT_STALE
;
1699 if (new->e_flags
& ENDPNT_WAITING
) {
1700 cv_broadcast(&new->e_cv
);
1701 new->e_flags
&= ~ENDPNT_WAITING
;
1705 new->e_tiptr
= NULL
;
1706 mutex_exit(&new->e_lock
);
1709 * If there was a transport endopoint opened, then close it.
1712 (void) t_kclose(tiptr
, 1);
1718 * Release a referece to the endpoint
1721 endpnt_rele(struct endpnt
*sp
)
1723 mutex_enter(&sp
->e_lock
);
1724 ASSERT(sp
->e_ref
> 0);
1727 * If the ref count is zero, then start the idle timer and link
1728 * the endpoint onto the idle list.
1730 if (sp
->e_ref
== 0) {
1731 sp
->e_itime
= gethrestime_sec();
1734 * Check to see if the endpoint is already linked to the idle
1735 * list, so that we don't try to reinsert it.
1737 if (sp
->e_flags
& ENDPNT_ONIDLE
) {
1738 mutex_exit(&sp
->e_lock
);
1739 mutex_enter(&sp
->e_type
->e_ilock
);
1740 endpnt_reap_settimer(sp
->e_type
);
1741 mutex_exit(&sp
->e_type
->e_ilock
);
1745 sp
->e_flags
|= ENDPNT_ONIDLE
;
1746 mutex_exit(&sp
->e_lock
);
1747 mutex_enter(&sp
->e_type
->e_ilock
);
1748 list_insert_tail(&sp
->e_type
->e_ilist
, sp
);
1749 endpnt_reap_settimer(sp
->e_type
);
1750 mutex_exit(&sp
->e_type
->e_ilock
);
1752 mutex_exit(&sp
->e_lock
);
1756 endpnt_reap_settimer(endpnt_type_t
*etp
)
1758 if (etp
->e_itimer
== (timeout_id_t
)0)
1759 etp
->e_itimer
= timeout(endpnt_reap_dispatch
, (void *)etp
,
1760 clnt_clts_taskq_dispatch_interval
);
1764 endpnt_reap_dispatch(void *a
)
1766 endpnt_type_t
*etp
= a
;
1769 * The idle timer has fired, so dispatch the taskq to close the
1772 if (taskq_dispatch(endpnt_taskq
, (task_func_t
*)endpnt_reap
, etp
,
1773 TQ_NOSLEEP
) == NULL
)
1775 mutex_enter(&etp
->e_ilock
);
1776 etp
->e_async_count
++;
1777 mutex_exit(&etp
->e_ilock
);
1781 * Traverse the idle list and close those endpoints that have reached their
1785 endpnt_reap(endpnt_type_t
*etp
)
1788 struct endpnt
*next_node
= NULL
;
1790 mutex_enter(&etp
->e_ilock
);
1791 e
= list_head(&etp
->e_ilist
);
1793 next_node
= list_next(&etp
->e_ilist
, e
);
1795 mutex_enter(&e
->e_lock
);
1797 mutex_exit(&e
->e_lock
);
1802 ASSERT(e
->e_ref
== 0);
1803 if (e
->e_itime
> 0 &&
1804 (e
->e_itime
+ clnt_clts_endpoint_reap_interval
) <
1805 gethrestime_sec()) {
1806 e
->e_flags
&= ~ENDPNT_BOUND
;
1807 (void) t_kclose(e
->e_tiptr
, 1);
1811 mutex_exit(&e
->e_lock
);
1815 if (--etp
->e_async_count
== 0)
1816 cv_signal(&etp
->e_async_cv
);
1817 mutex_exit(&etp
->e_ilock
);
1821 endpnt_reclaim(zoneid_t zoneid
)
1823 struct endpnt_type
*np
;
1825 struct endpnt
*next_node
= NULL
;
1829 list_create(&free_list
, sizeof (endpnt_t
), offsetof(endpnt_t
, e_node
));
1831 RPCLOG0(1, "endpnt_reclaim: reclaim callback started\n");
1832 rw_enter(&endpnt_type_lock
, RW_READER
);
1833 for (np
= endpnt_type_list
; np
!= NULL
; np
= np
->e_next
) {
1834 if (zoneid
!= ALL_ZONES
&& zoneid
!= np
->e_zoneid
)
1837 mutex_enter(&np
->e_plock
);
1838 RPCLOG(1, "endpnt_reclaim: protofmly %s, ",
1840 RPCLOG(1, "rdev %ld\n", np
->e_rdev
);
1841 RPCLOG(1, "endpnt_reclaim: found %d endpoint(s)\n",
1844 if (np
->e_cnt
== 0) {
1845 mutex_exit(&np
->e_plock
);
1850 * The nice thing about maintaining an idle list is that if
1851 * there are any endpoints to reclaim, they are going to be
1852 * on this list. Just go through and reap the one's that
1853 * have ref counts of zero.
1855 mutex_enter(&np
->e_ilock
);
1856 e
= list_head(&np
->e_ilist
);
1858 next_node
= list_next(&np
->e_ilist
, e
);
1859 mutex_enter(&e
->e_lock
);
1861 mutex_exit(&e
->e_lock
);
1865 ASSERT(e
->e_ref
== 0);
1866 mutex_exit(&e
->e_lock
);
1868 list_remove(&np
->e_ilist
, e
);
1869 list_remove(&np
->e_pool
, e
);
1870 list_insert_head(&free_list
, e
);
1876 mutex_exit(&np
->e_ilock
);
1878 * Reset the current pointer to be safe
1880 if ((e
= (struct endpnt
*)list_head(&np
->e_pool
)) != NULL
)
1883 ASSERT(np
->e_cnt
== 0);
1887 mutex_exit(&np
->e_plock
);
1889 rw_exit(&endpnt_type_lock
);
1891 while ((e
= list_head(&free_list
)) != NULL
) {
1892 list_remove(&free_list
, e
);
1893 if (e
->e_tiptr
!= NULL
)
1894 (void) t_kclose(e
->e_tiptr
, 1);
1896 cv_destroy(&e
->e_cv
);
1897 mutex_destroy(&e
->e_lock
);
1898 kmem_cache_free(endpnt_cache
, e
);
1900 list_destroy(&free_list
);
1901 RPCLOG(1, "endpnt_reclaim: reclaimed %d endpoint(s)\n", rcnt
);
1905 * Endpoint reclaim zones destructor callback routine.
1907 * After reclaiming any cached entries, we basically go through the endpnt_type
1908 * list, canceling outstanding timeouts and free'ing data structures.
1912 endpnt_destructor(zoneid_t zoneid
, void *a
)
1914 struct endpnt_type
**npp
;
1915 struct endpnt_type
*np
;
1916 struct endpnt_type
*free_list
= NULL
;
1917 timeout_id_t t_id
= 0;
1918 extern void clcleanup_zone(zoneid_t
);
1919 extern void clcleanup4_zone(zoneid_t
);
1921 /* Make sure NFS client handles are released. */
1922 clcleanup_zone(zoneid
);
1923 clcleanup4_zone(zoneid
);
1925 endpnt_reclaim(zoneid
);
1927 * We don't need to be holding on to any locks across the call to
1928 * endpnt_reclaim() and the code below; we know that no-one can
1929 * be holding open connections for this zone (all processes and kernel
1930 * threads are gone), so nothing could be adding anything to the list.
1932 rw_enter(&endpnt_type_lock
, RW_WRITER
);
1933 npp
= &endpnt_type_list
;
1934 while ((np
= *npp
) != NULL
) {
1935 if (np
->e_zoneid
!= zoneid
) {
1939 mutex_enter(&np
->e_plock
);
1940 mutex_enter(&np
->e_ilock
);
1941 if (np
->e_itimer
!= 0) {
1942 t_id
= np
->e_itimer
;
1945 ASSERT(np
->e_cnt
== 0);
1946 ASSERT(list_head(&np
->e_pool
) == NULL
);
1947 ASSERT(list_head(&np
->e_ilist
) == NULL
);
1949 mutex_exit(&np
->e_ilock
);
1950 mutex_exit(&np
->e_plock
);
1953 * untimeout() any outstanding timers that have not yet fired.
1955 if (t_id
!= (timeout_id_t
)0)
1956 (void) untimeout(t_id
);
1958 np
->e_next
= free_list
;
1961 rw_exit(&endpnt_type_lock
);
1963 while (free_list
!= NULL
) {
1965 free_list
= free_list
->e_next
;
1967 * Wait for threads in endpnt_taskq trying to reap endpnt_ts in
1968 * the endpnt_type_t.
1970 mutex_enter(&np
->e_ilock
);
1971 while (np
->e_async_count
> 0)
1972 cv_wait(&np
->e_async_cv
, &np
->e_ilock
);
1973 cv_destroy(&np
->e_async_cv
);
1974 mutex_destroy(&np
->e_plock
);
1975 mutex_destroy(&np
->e_ilock
);
1976 list_destroy(&np
->e_pool
);
1977 list_destroy(&np
->e_ilist
);
1978 kmem_free(np
, sizeof (endpnt_type_t
));
1983 * Endpoint reclaim kmem callback routine.
1987 endpnt_repossess(void *a
)
1990 * Reclaim idle endpnt's from all zones.
1992 if (endpnt_taskq
!= NULL
)
1993 (void) taskq_dispatch(endpnt_taskq
,
1994 (task_func_t
*)endpnt_reclaim
, (void *)ALL_ZONES
,
1999 * RPC request dispatch routine. Constructs a datagram message and wraps it
2000 * around the RPC request to pass downstream.
2003 clnt_clts_dispatch_send(queue_t
*q
, mblk_t
*mp
, struct netbuf
*addr
,
2004 calllist_t
*cp
, uint_t xid
, cred_t
*cr
)
2008 struct T_unitdata_req
*udreq
;
2011 * Set up the call record.
2015 cp
->call_status
= RPC_TIMEDOUT
;
2016 cp
->call_notified
= FALSE
;
2018 "clnt_clts_dispatch_send: putting xid 0x%x on "
2019 "dispatch list\n", xid
);
2020 cp
->call_hash
= call_hash(xid
, clnt_clts_hash_size
);
2021 cp
->call_bucket
= &clts_call_ht
[cp
->call_hash
];
2022 call_table_enter(cp
);
2025 * Construct the datagram
2027 msgsz
= (int)TUNITDATAREQSZ
;
2029 * Note: if the receiver uses SCM_UCRED/getpeerucred the pid will
2032 while (!(bp
= allocb_cred(msgsz
+ addr
->len
, cr
, NOPID
))) {
2033 if (strwaitbuf(msgsz
+ addr
->len
, BPRI_LO
))
2037 udreq
= (struct T_unitdata_req
*)bp
->b_wptr
;
2038 udreq
->PRIM_type
= T_UNITDATA_REQ
;
2039 udreq
->DEST_length
= addr
->len
;
2042 bcopy(addr
->buf
, bp
->b_wptr
+ msgsz
, addr
->len
);
2043 udreq
->DEST_offset
= (t_scalar_t
)msgsz
;
2046 udreq
->DEST_offset
= 0;
2047 udreq
->OPT_length
= 0;
2048 udreq
->OPT_offset
= 0;
2050 bp
->b_datap
->db_type
= M_PROTO
;
2051 bp
->b_wptr
+= msgsz
;
2054 * Link the datagram header with the actual data
2061 if (canput(cp
->call_wq
)) {
2062 put(cp
->call_wq
, bp
);
2070 * RPC response delivery routine. Deliver the response to the waiting
2071 * thread by matching the xid.
2074 clnt_clts_dispatch_notify(mblk_t
*mp
, int resp_off
, zoneid_t zoneid
)
2076 calllist_t
*e
= NULL
;
2080 unsigned char *hdr_offset
;
2084 * If the RPC response is not contained in the same mblk as the
2085 * datagram header, then move to the next mblk.
2087 hdr_offset
= mp
->b_rptr
;
2089 if ((mp
->b_wptr
- (mp
->b_rptr
+ resp_off
)) == 0)
2092 resp
->b_rptr
+= resp_off
;
2094 ASSERT(resp
!= NULL
);
2096 if ((IS_P2ALIGNED(resp
->b_rptr
, sizeof (uint32_t))) &&
2097 (resp
->b_wptr
- resp
->b_rptr
) >= sizeof (xid
))
2098 xid
= *((uint32_t *)resp
->b_rptr
);
2101 unsigned char *p
= (unsigned char *)&xid
;
2102 unsigned char *rptr
;
2106 * Copy the xid, byte-by-byte into xid.
2110 while (rptr
< tmp
->b_wptr
) {
2112 if (++i
>= sizeof (xid
))
2119 * If we got here, we ran out of mblk space before the
2120 * xid could be copied.
2122 ASSERT(tmp
== NULL
&& i
< sizeof (xid
));
2125 "clnt_dispatch_notify(clts): message less than "
2135 * Reset the read pointer back to the beginning of the protocol
2136 * header if we moved it.
2138 if (mp
->b_rptr
!= hdr_offset
)
2139 mp
->b_rptr
= hdr_offset
;
2141 hash
= call_hash(xid
, clnt_clts_hash_size
);
2142 chtp
= &clts_call_ht
[hash
];
2143 /* call_table_find returns with the hash bucket locked */
2144 call_table_find(chtp
, xid
, e
);
2147 mutex_enter(&e
->call_lock
);
2150 * verify that the reply is coming in on
2151 * the same zone that it was sent from.
2153 if (e
->call_zoneid
!= zoneid
) {
2154 mutex_exit(&e
->call_lock
);
2155 mutex_exit(&chtp
->ct_lock
);
2156 RPCLOG0(8, "clnt_dispatch_notify (clts): incorrect "
2163 * found thread waiting for this reply.
2165 if (e
->call_reply
) {
2167 "clnt_dispatch_notify (clts): discarding old "
2168 "reply for xid 0x%x\n",
2170 freemsg(e
->call_reply
);
2172 e
->call_notified
= TRUE
;
2174 e
->call_status
= RPC_SUCCESS
;
2175 cv_signal(&e
->call_cv
);
2176 mutex_exit(&e
->call_lock
);
2177 mutex_exit(&chtp
->ct_lock
);
2180 struct rpcstat
*rpcstat
;
2182 mutex_exit(&chtp
->ct_lock
);
2183 RPCLOG(8, "clnt_dispatch_notify (clts): no caller for reply "
2187 * This is unfortunate, but we need to lookup the zone so we
2188 * can increment its "rcbadxids" counter.
2190 zone
= zone_find_by_id(zoneid
);
2193 * The zone went away...
2197 rpcstat
= zone_getspecific(rpcstat_zone_key
, zone
);
2198 if (zone_status_get(zone
) >= ZONE_IS_SHUTTING_DOWN
) {
2205 RCSTAT_INCR(rpcstat
->rpc_clts_client
, rcbadxids
);
2211 * Init routine. Called when rpcmod is loaded.
2214 clnt_clts_init(void)
2216 endpnt_cache
= kmem_cache_create("clnt_clts_endpnt_cache",
2217 sizeof (struct endpnt
), 0, NULL
, NULL
, endpnt_repossess
, NULL
,
2220 rw_init(&endpnt_type_lock
, NULL
, RW_DEFAULT
, NULL
);
2223 * Perform simple bounds checking to make sure that the setting is
2226 if (clnt_clts_max_endpoints
<= 0) {
2227 if (clnt_clts_do_bindresvport
)
2228 clnt_clts_max_endpoints
= RESERVED_PORTSPACE
;
2230 clnt_clts_max_endpoints
= NONRESERVED_PORTSPACE
;
2233 if (clnt_clts_do_bindresvport
&&
2234 clnt_clts_max_endpoints
> RESERVED_PORTSPACE
)
2235 clnt_clts_max_endpoints
= RESERVED_PORTSPACE
;
2236 else if (clnt_clts_max_endpoints
> NONRESERVED_PORTSPACE
)
2237 clnt_clts_max_endpoints
= NONRESERVED_PORTSPACE
;
2239 if (clnt_clts_hash_size
< DEFAULT_MIN_HASH_SIZE
)
2240 clnt_clts_hash_size
= DEFAULT_MIN_HASH_SIZE
;
2243 * Defer creating the taskq until rpcmod gets pushed. If we are
2244 * in diskless boot mode, rpcmod will get loaded early even before
2245 * thread_create() is available.
2247 endpnt_taskq
= NULL
;
2248 taskq_created
= FALSE
;
2249 mutex_init(&endpnt_taskq_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
2251 if (clnt_clts_endpoint_reap_interval
< DEFAULT_ENDPOINT_REAP_INTERVAL
)
2252 clnt_clts_endpoint_reap_interval
=
2253 DEFAULT_ENDPOINT_REAP_INTERVAL
;
2256 * Dispatch the taskq at an interval which is offset from the
2257 * interval that the endpoints should be reaped.
2259 clnt_clts_taskq_dispatch_interval
=
2260 (clnt_clts_endpoint_reap_interval
+ DEFAULT_INTERVAL_SHIFT
) * hz
;
2263 * Initialize the completion queue
2265 clts_call_ht
= call_table_init(clnt_clts_hash_size
);
2267 * Initialize the zone destructor callback.
2269 zone_key_create(&endpnt_destructor_key
, NULL
, NULL
, endpnt_destructor
);
2273 clnt_clts_fini(void)
2275 (void) zone_key_delete(endpnt_destructor_key
);