4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T
32 * Portions of this source code were derived from Berkeley 4.3 BSD
33 * under license from the Regents of the University of California.
38 * Implements a kernel based, client side RPC over Connection Oriented
43 * Much of this file has been re-written to let NFS work better over slow
44 * transports. A description follows.
46 * One of the annoying things about kRPC/COTS is that it will temporarily
47 * create more than one connection between a client and server. This
48 * happens because when a connection is made, the end-points entry in the
49 * linked list of connections (headed by cm_hd), is removed so that other
50 * threads don't mess with it. Went ahead and bit the bullet by keeping
51 * the endpoint on the connection list and introducing state bits,
52 * condition variables etc. to the connection entry data structure (struct
55 * Here is a summary of the changes to cm-xprt:
57 * x_ctime is the timestamp of when the endpoint was last
58 * connected or disconnected. If an end-point is ever disconnected
59 * or re-connected, then any outstanding RPC request is presumed
60 * lost, telling clnt_cots_kcallit that it needs to re-send the
61 * request, not just wait for the original request's reply to
64 * x_thread flag which tells us if a thread is doing a connection attempt.
66 * x_waitdis flag which tells us we are waiting a disconnect ACK.
68 * x_needdis flag which tells us we need to send a T_DISCONN_REQ
69 * to kill the connection.
71 * x_needrel flag which tells us we need to send a T_ORDREL_REQ to
72 * gracefully close the connection.
74 * #defined bitmasks for the all the b_* bits so that more
75 * efficient (and at times less clumsy) masks can be used to
76 * manipulated state in cases where multiple bits have to
77 * set/cleared/checked in the same critical section.
79 * x_conn_cv and x_dis-_cv are new condition variables to let
80 * threads knows when the connection attempt is done, and to let
81 * the connecting thread know when the disconnect handshake is
84 * Added the CONN_HOLD() macro so that all reference holds have the same
87 * In the private (cku_private) portion of the client handle,
89 * cku_flags replaces the cku_sent a boolean. cku_flags keeps
90 * track of whether a request as been sent, and whether the
91 * client's handles call record is on the dispatch list (so that
92 * the reply can be matched by XID to the right client handle).
93 * The idea of CKU_ONQUEUE is that we can exit clnt_cots_kcallit()
94 * and still have the response find the right client handle so
95 * that the retry of CLNT_CALL() gets the result. Testing, found
96 * situations where if the timeout was increased, performance
97 * degraded. This was due to us hitting a window where the thread
98 * was back in rfscall() (probably printing server not responding)
99 * while the response came back but no place to put it.
101 * cku_ctime is just a cache of x_ctime. If they match,
102 * clnt_cots_kcallit() won't to send a retry (unless the maximum
103 * receive count limit as been reached). If the don't match, then
104 * we assume the request has been lost, and a retry of the request
107 * cku_recv_attempts counts the number of receive count attempts
108 * after one try is sent on the wire.
110 * Added the clnt_delay() routine so that interruptible and
111 * noninterruptible delays are possible.
113 * CLNT_MIN_TIMEOUT has been bumped to 10 seconds from 3. This is used to
114 * control how long the client delays before returned after getting
115 * ECONNREFUSED. At 3 seconds, 8 client threads per mount really does bash
116 * a server that may be booting and not yet started nfsd.
118 * CLNT_MAXRECV_WITHOUT_RETRY is a new macro (value of 3) (with a tunable)
119 * Why don't we just wait forever (receive an infinite # of times)?
120 * Because the server may have rebooted. More insidious is that some
121 * servers (ours) will drop NFS/TCP requests in some cases. This is bad,
122 * but it is a reality.
124 * The case of a server doing orderly release really messes up the
125 * client's recovery, especially if the server's TCP implementation is
126 * buggy. It was found was that the kRPC/COTS client was breaking some
127 * TPI rules, such as not waiting for the acknowledgement of a
128 * T_DISCON_REQ (hence the added case statements T_ERROR_ACK, T_OK_ACK and
129 * T_DISCON_REQ in clnt_dispatch_notifyall()).
131 * One of things that we've seen is that a kRPC TCP endpoint goes into
132 * TIMEWAIT and a thus a reconnect takes a long time to satisfy because
133 * that the TIMEWAIT state takes a while to finish. If a server sends a
134 * T_ORDREL_IND, there is little point in an RPC client doing a
135 * T_ORDREL_REQ, because the RPC request isn't going to make it (the
136 * server is saying that it won't accept any more data). So kRPC was
137 * changed to send a T_DISCON_REQ when we get a T_ORDREL_IND. So now the
138 * connection skips the TIMEWAIT state and goes straight to a bound state
139 * that kRPC can quickly switch to connected.
141 * Code that issues TPI request must use waitforack() to wait for the
142 * corresponding ack (assuming there is one) in any future modifications.
143 * This works around problems that may be introduced by breaking TPI rules
144 * (by submitting new calls before earlier requests have been acked) in the
145 * case of a signal or other early return. waitforack() depends on
146 * clnt_dispatch_notifyconn() to issue the wakeup when the ack
147 * arrives, so adding new TPI calls may require corresponding changes
148 * to clnt_dispatch_notifyconn(). Presently, the timeout period is based on
149 * CLNT_MIN_TIMEOUT which is 10 seconds. If you modify this value, be sure
150 * not to set it too low or TPI ACKS will be lost.
153 #include <sys/param.h>
154 #include <sys/types.h>
155 #include <sys/user.h>
156 #include <sys/systm.h>
157 #include <sys/sysmacros.h>
158 #include <sys/proc.h>
159 #include <sys/socket.h>
160 #include <sys/file.h>
161 #include <sys/stream.h>
162 #include <sys/strsubr.h>
163 #include <sys/stropts.h>
164 #include <sys/strsun.h>
165 #include <sys/timod.h>
166 #include <sys/tiuser.h>
167 #include <sys/tihdr.h>
168 #include <sys/t_kuser.h>
169 #include <sys/fcntl.h>
170 #include <sys/errno.h>
171 #include <sys/kmem.h>
172 #include <sys/debug.h>
173 #include <sys/systm.h>
174 #include <sys/kstat.h>
175 #include <sys/t_lock.h>
177 #include <sys/cmn_err.h>
178 #include <sys/time.h>
179 #include <sys/isa_defs.h>
180 #include <sys/callb.h>
181 #include <sys/sunddi.h>
182 #include <sys/atomic.h>
185 #include <netinet/in.h>
186 #include <netinet/tcp.h>
188 #include <rpc/types.h>
190 #include <rpc/auth.h>
191 #include <rpc/clnt.h>
192 #include <rpc/rpc_msg.h>
194 #define COTS_DEFAULT_ALLOCSIZE 2048
196 #define WIRE_HDR_SIZE 20 /* serialized call header, sans proc number */
197 #define MSG_OFFSET 128 /* offset of call into the mblk */
199 const char *kinet_ntop6(uchar_t
*, char *, size_t);
201 static int clnt_cots_ksettimers(CLIENT
*, struct rpc_timers
*,
202 struct rpc_timers
*, int, void(*)(int, int, caddr_t
), caddr_t
, uint32_t);
203 static enum clnt_stat
clnt_cots_kcallit(CLIENT
*, rpcproc_t
, xdrproc_t
,
204 caddr_t
, xdrproc_t
, caddr_t
, struct timeval
);
205 static void clnt_cots_kabort(CLIENT
*);
206 static void clnt_cots_kerror(CLIENT
*, struct rpc_err
*);
207 static bool_t
clnt_cots_kfreeres(CLIENT
*, xdrproc_t
, caddr_t
);
208 static void clnt_cots_kdestroy(CLIENT
*);
209 static bool_t
clnt_cots_kcontrol(CLIENT
*, int, char *);
212 /* List of transports managed by the connection manager. */
214 TIUSER
*x_tiptr
; /* transport handle */
215 queue_t
*x_wq
; /* send queue */
216 clock_t x_time
; /* last time we handed this xprt out */
217 clock_t x_ctime
; /* time we went to CONNECTED */
218 int x_tidu_size
; /* TIDU size of this transport */
222 #ifdef _BIT_FIELDS_HTOL
223 b_closing
: 1, /* we've sent a ord rel on this conn */
224 b_dead
: 1, /* transport is closed or disconn */
225 b_doomed
: 1, /* too many conns, let this go idle */
226 b_connected
: 1, /* this connection is connected */
228 b_ordrel
: 1, /* do an orderly release? */
229 b_thread
: 1, /* thread doing connect */
230 b_waitdis
: 1, /* waiting for disconnect ACK */
231 b_needdis
: 1, /* need T_DISCON_REQ */
233 b_needrel
: 1, /* need T_ORDREL_REQ */
234 b_early_disc
: 1, /* got a T_ORDREL_IND or T_DISCON_IND */
235 /* disconnect during connect */
241 #ifdef _BIT_FIELDS_LTOH
244 b_early_disc
: 1, /* got a T_ORDREL_IND or T_DISCON_IND */
245 /* disconnect during connect */
246 b_needrel
: 1, /* need T_ORDREL_REQ */
248 b_needdis
: 1, /* need T_DISCON_REQ */
249 b_waitdis
: 1, /* waiting for disconnect ACK */
250 b_thread
: 1, /* thread doing connect */
251 b_ordrel
: 1, /* do an orderly release? */
253 b_connected
: 1, /* this connection is connected */
254 b_doomed
: 1, /* too many conns, let this go idle */
255 b_dead
: 1, /* transport is closed or disconn */
256 b_closing
: 1; /* we've sent a ord rel on this conn */
258 } bit
; unsigned int word
;
260 #define x_closing x_state.bit.b_closing
261 #define x_dead x_state.bit.b_dead
262 #define x_doomed x_state.bit.b_doomed
263 #define x_connected x_state.bit.b_connected
265 #define x_ordrel x_state.bit.b_ordrel
266 #define x_thread x_state.bit.b_thread
267 #define x_waitdis x_state.bit.b_waitdis
268 #define x_needdis x_state.bit.b_needdis
270 #define x_needrel x_state.bit.b_needrel
271 #define x_early_disc x_state.bit.b_early_disc
273 #define x_state_flags x_state.word
275 #define X_CLOSING 0x80000000
276 #define X_DEAD 0x40000000
277 #define X_DOOMED 0x20000000
278 #define X_CONNECTED 0x10000000
280 #define X_ORDREL 0x08000000
281 #define X_THREAD 0x04000000
282 #define X_WAITDIS 0x02000000
283 #define X_NEEDDIS 0x01000000
285 #define X_NEEDREL 0x00800000
286 #define X_EARLYDISC 0x00400000
288 #define X_BADSTATES (X_CLOSING | X_DEAD | X_DOOMED)
291 int x_ref
; /* number of users of this xprt */
292 int x_family
; /* address family of transport */
293 dev_t x_rdev
; /* device number of transport */
294 struct cm_xprt
*x_next
;
296 struct netbuf x_server
; /* destination address */
297 struct netbuf x_src
; /* src address (for retries) */
298 kmutex_t x_lock
; /* lock on this entry */
299 kcondvar_t x_cv
; /* to signal when can be closed */
300 kcondvar_t x_conn_cv
; /* to signal when connection attempt */
304 kcondvar_t x_dis_cv
; /* to signal when disconnect attempt */
306 zoneid_t x_zoneid
; /* zone this xprt belongs to */
309 typedef struct cm_kstat_xprt
{
311 kstat_named_t x_server
;
312 kstat_named_t x_family
;
313 kstat_named_t x_rdev
;
314 kstat_named_t x_time
;
315 kstat_named_t x_state
;
317 kstat_named_t x_port
;
320 static cm_kstat_xprt_t cm_kstat_template
= {
321 { "write_queue", KSTAT_DATA_UINT32
},
322 { "server", KSTAT_DATA_STRING
},
323 { "addr_family", KSTAT_DATA_UINT32
},
324 { "device", KSTAT_DATA_UINT32
},
325 { "time_stamp", KSTAT_DATA_UINT32
},
326 { "status", KSTAT_DATA_UINT32
},
327 { "ref_count", KSTAT_DATA_INT32
},
328 { "port", KSTAT_DATA_UINT32
},
332 * The inverse of this is connmgr_release().
334 #define CONN_HOLD(Cm_entry) {\
335 mutex_enter(&(Cm_entry)->x_lock); \
336 (Cm_entry)->x_ref++; \
337 mutex_exit(&(Cm_entry)->x_lock); \
342 * Private data per rpc handle. This structure is allocated by
343 * clnt_cots_kcreate, and freed by clnt_cots_kdestroy.
345 typedef struct cku_private_s
{
346 CLIENT cku_client
; /* client handle */
347 calllist_t cku_call
; /* for dispatching calls */
348 struct rpc_err cku_err
; /* error status */
350 struct netbuf cku_srcaddr
; /* source address for retries */
351 int cku_addrfmly
; /* for binding port */
352 struct netbuf cku_addr
; /* remote address */
353 dev_t cku_device
; /* device to use */
355 #define CKU_ONQUEUE 0x1
358 bool_t cku_progress
; /* for CLSET_PROGRESS */
359 uint32_t cku_xid
; /* current XID */
360 clock_t cku_ctime
; /* time stamp of when */
361 /* connection was created */
362 uint_t cku_recv_attempts
;
363 XDR cku_outxdr
; /* xdr routine for output */
364 XDR cku_inxdr
; /* xdr routine for input */
365 char cku_rpchdr
[WIRE_HDR_SIZE
+ 4];
366 /* pre-serialized rpc header */
368 uint_t cku_outbuflen
; /* default output mblk length */
369 struct cred
*cku_cred
; /* credentials */
370 bool_t cku_nodelayonerr
;
371 /* for CLSET_NODELAYONERR */
372 int cku_useresvport
; /* Use reserved port */
373 struct rpc_cots_client
*cku_stats
; /* stats for zone */
376 static struct cm_xprt
*connmgr_wrapconnect(struct cm_xprt
*,
377 const struct timeval
*, struct netbuf
*, int, struct netbuf
*,
378 struct rpc_err
*, bool_t
, bool_t
, cred_t
*);
380 static bool_t
connmgr_connect(struct cm_xprt
*, queue_t
*, struct netbuf
*,
381 int, calllist_t
*, int *, bool_t reconnect
,
382 const struct timeval
*, bool_t
, cred_t
*);
384 static void *connmgr_opt_getoff(mblk_t
*mp
, t_uscalar_t offset
,
385 t_uscalar_t length
, uint_t align_size
);
386 static bool_t
connmgr_setbufsz(calllist_t
*e
, queue_t
*wq
, cred_t
*cr
);
387 static bool_t
connmgr_getopt_int(queue_t
*wq
, int level
, int name
, int *val
,
388 calllist_t
*e
, cred_t
*cr
);
389 static bool_t
connmgr_setopt_int(queue_t
*wq
, int level
, int name
, int val
,
390 calllist_t
*e
, cred_t
*cr
);
391 static bool_t
connmgr_setopt(queue_t
*, int, int, calllist_t
*, cred_t
*cr
);
392 static void connmgr_sndrel(struct cm_xprt
*);
393 static void connmgr_snddis(struct cm_xprt
*);
394 static void connmgr_close(struct cm_xprt
*);
395 static void connmgr_release(struct cm_xprt
*);
396 static struct cm_xprt
*connmgr_wrapget(struct netbuf
*, const struct timeval
*,
399 static struct cm_xprt
*connmgr_get(struct netbuf
*, const struct timeval
*,
400 struct netbuf
*, int, struct netbuf
*, struct rpc_err
*, dev_t
,
401 bool_t
, int, cred_t
*);
403 static void connmgr_cancelconn(struct cm_xprt
*);
404 static enum clnt_stat
connmgr_cwait(struct cm_xprt
*, const struct timeval
*,
406 static void connmgr_dis_and_wait(struct cm_xprt
*);
408 static int clnt_dispatch_send(queue_t
*, mblk_t
*, calllist_t
*, uint_t
,
411 static int clnt_delay(clock_t, bool_t
);
413 static int waitforack(calllist_t
*, t_scalar_t
, const struct timeval
*, bool_t
);
416 * Operations vector for TCP/IP based RPC
418 static struct clnt_ops tcp_ops
= {
419 clnt_cots_kcallit
, /* do rpc call */
420 clnt_cots_kabort
, /* abort call */
421 clnt_cots_kerror
, /* return error status */
422 clnt_cots_kfreeres
, /* free results */
423 clnt_cots_kdestroy
, /* destroy rpc handle */
424 clnt_cots_kcontrol
, /* the ioctl() of rpc */
425 clnt_cots_ksettimers
, /* set retry timers */
428 static int rpc_kstat_instance
= 0; /* keeps the current instance */
429 /* number for the next kstat_create */
431 static struct cm_xprt
*cm_hd
= NULL
;
432 static kmutex_t connmgr_lock
; /* for connection mngr's list of transports */
434 extern kmutex_t clnt_max_msg_lock
;
436 static calllist_t
*clnt_pending
= NULL
;
437 extern kmutex_t clnt_pending_lock
;
439 static int clnt_cots_hash_size
= DEFAULT_HASH_SIZE
;
441 static call_table_t
*cots_call_ht
;
443 static const struct rpc_cots_client
{
444 kstat_named_t rccalls
;
445 kstat_named_t rcbadcalls
;
446 kstat_named_t rcbadxids
;
447 kstat_named_t rctimeouts
;
448 kstat_named_t rcnewcreds
;
449 kstat_named_t rcbadverfs
;
450 kstat_named_t rctimers
;
451 kstat_named_t rccantconn
;
452 kstat_named_t rcnomem
;
453 kstat_named_t rcintrs
;
454 } cots_rcstat_tmpl
= {
455 { "calls", KSTAT_DATA_UINT64
},
456 { "badcalls", KSTAT_DATA_UINT64
},
457 { "badxids", KSTAT_DATA_UINT64
},
458 { "timeouts", KSTAT_DATA_UINT64
},
459 { "newcreds", KSTAT_DATA_UINT64
},
460 { "badverfs", KSTAT_DATA_UINT64
},
461 { "timers", KSTAT_DATA_UINT64
},
462 { "cantconn", KSTAT_DATA_UINT64
},
463 { "nomem", KSTAT_DATA_UINT64
},
464 { "interrupts", KSTAT_DATA_UINT64
}
467 #define COTSRCSTAT_INCR(p, x) \
468 atomic_inc_64(&(p)->x.value.ui64)
470 #define CLNT_MAX_CONNS 1 /* concurrent connections between clnt/srvr */
471 int clnt_max_conns
= CLNT_MAX_CONNS
;
473 #define CLNT_MIN_TIMEOUT 10 /* seconds to wait after we get a */
474 /* connection reset */
475 #define CLNT_MIN_CONNTIMEOUT 5 /* seconds to wait for a connection */
478 int clnt_cots_min_tout
= CLNT_MIN_TIMEOUT
;
479 int clnt_cots_min_conntout
= CLNT_MIN_CONNTIMEOUT
;
482 * Limit the number of times we will attempt to receive a reply without
483 * re-sending a response.
485 #define CLNT_MAXRECV_WITHOUT_RETRY 3
486 uint_t clnt_cots_maxrecv
= CLNT_MAXRECV_WITHOUT_RETRY
;
488 uint_t
*clnt_max_msg_sizep
;
489 void (*clnt_stop_idle
)(queue_t
*wq
);
491 #define ptoh(p) (&((p)->cku_client))
492 #define htop(h) ((cku_private_t *)((h)->cl_private))
497 #define REFRESHES 2 /* authentication refreshes */
500 * The following is used to determine the global default behavior for
501 * COTS when binding to a local port.
503 * If the value is set to 1 the default will be to select a reserved
504 * (aka privileged) port, if the value is zero the default will be to
505 * use non-reserved ports. Users of kRPC may override this by using
506 * CLNT_CONTROL() and CLSET_BINDRESVPORT.
508 int clnt_cots_do_bindresvport
= 1;
510 static zone_key_t zone_cots_key
;
513 * Defaults TCP send and receive buffer size for RPC connections.
514 * These values can be tuned by /etc/system.
516 int rpc_send_bufsz
= 1024*1024;
517 int rpc_recv_bufsz
= 1024*1024;
519 * To use system-wide default for TCP send and receive buffer size,
520 * use /etc/system to set rpc_default_tcp_bufsz to 1:
522 * set rpcmod:rpc_default_tcp_bufsz=1
524 int rpc_default_tcp_bufsz
= 0;
527 * We need to do this after all kernel threads in the zone have exited.
531 clnt_zone_destroy(zoneid_t zoneid
, void *unused
)
533 struct cm_xprt
**cmp
;
534 struct cm_xprt
*cm_entry
;
535 struct cm_xprt
*freelist
= NULL
;
537 mutex_enter(&connmgr_lock
);
539 while ((cm_entry
= *cmp
) != NULL
) {
540 if (cm_entry
->x_zoneid
== zoneid
) {
541 *cmp
= cm_entry
->x_next
;
542 cm_entry
->x_next
= freelist
;
545 cmp
= &cm_entry
->x_next
;
548 mutex_exit(&connmgr_lock
);
549 while ((cm_entry
= freelist
) != NULL
) {
550 freelist
= cm_entry
->x_next
;
551 connmgr_close(cm_entry
);
556 clnt_cots_kcreate(dev_t dev
, struct netbuf
*addr
, int family
, rpcprog_t prog
,
557 rpcvers_t vers
, uint_t max_msgsize
, cred_t
*cred
, CLIENT
**ncl
)
561 struct rpc_msg call_msg
;
562 struct rpcstat
*rpcstat
;
564 RPCLOG(8, "clnt_cots_kcreate: prog %u\n", prog
);
566 rpcstat
= zone_getspecific(rpcstat_zone_key
, rpc_zone());
567 ASSERT(rpcstat
!= NULL
);
569 /* Allocate and intialize the client handle. */
570 p
= kmem_zalloc(sizeof (*p
), KM_SLEEP
);
574 h
->cl_private
= (caddr_t
)p
;
575 h
->cl_auth
= authkern_create();
576 h
->cl_ops
= &tcp_ops
;
578 cv_init(&p
->cku_call
.call_cv
, NULL
, CV_DEFAULT
, NULL
);
579 mutex_init(&p
->cku_call
.call_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
582 * If the current sanity check size in rpcmod is smaller
583 * than the size needed, then increase the sanity check.
585 if (max_msgsize
!= 0 && clnt_max_msg_sizep
!= NULL
&&
586 max_msgsize
> *clnt_max_msg_sizep
) {
587 mutex_enter(&clnt_max_msg_lock
);
588 if (max_msgsize
> *clnt_max_msg_sizep
)
589 *clnt_max_msg_sizep
= max_msgsize
;
590 mutex_exit(&clnt_max_msg_lock
);
593 p
->cku_outbuflen
= COTS_DEFAULT_ALLOCSIZE
;
595 /* Preserialize the call message header */
598 call_msg
.rm_direction
= CALL
;
599 call_msg
.rm_call
.cb_rpcvers
= RPC_MSG_VERSION
;
600 call_msg
.rm_call
.cb_prog
= prog
;
601 call_msg
.rm_call
.cb_vers
= vers
;
603 xdrmem_create(&p
->cku_outxdr
, p
->cku_rpchdr
, WIRE_HDR_SIZE
, XDR_ENCODE
);
605 if (!xdr_callhdr(&p
->cku_outxdr
, &call_msg
)) {
606 RPCLOG0(1, "clnt_cots_kcreate - Fatal header serialization "
608 auth_destroy(h
->cl_auth
);
609 kmem_free(p
, sizeof (cku_private_t
));
610 RPCLOG0(1, "clnt_cots_kcreate: create failed error EINVAL\n");
611 return (EINVAL
); /* XXX */
615 * The zalloc initialized the fields below.
618 * p->cku_srcaddr.len = 0;
619 * p->cku_srcaddr.maxlen = 0;
624 p
->cku_addrfmly
= family
;
625 p
->cku_addr
.buf
= kmem_zalloc(addr
->maxlen
, KM_SLEEP
);
626 p
->cku_addr
.maxlen
= addr
->maxlen
;
627 p
->cku_addr
.len
= addr
->len
;
628 bcopy(addr
->buf
, p
->cku_addr
.buf
, addr
->len
);
629 p
->cku_stats
= rpcstat
->rpc_cots_client
;
630 p
->cku_useresvport
= -1; /* value is has not been set */
638 clnt_cots_kabort(CLIENT
*h
)
643 * Return error info on this handle.
646 clnt_cots_kerror(CLIENT
*h
, struct rpc_err
*err
)
648 /* LINTED pointer alignment */
649 cku_private_t
*p
= htop(h
);
655 clnt_cots_kfreeres(CLIENT
*h
, xdrproc_t xdr_res
, caddr_t res_ptr
)
657 /* LINTED pointer alignment */
658 cku_private_t
*p
= htop(h
);
661 xdrs
= &(p
->cku_outxdr
);
662 xdrs
->x_op
= XDR_FREE
;
663 return ((*xdr_res
)(xdrs
, res_ptr
));
667 clnt_cots_kcontrol(CLIENT
*h
, int cmd
, char *arg
)
669 cku_private_t
*p
= htop(h
);
673 p
->cku_progress
= TRUE
;
680 p
->cku_xid
= *((uint32_t *)arg
);
687 *((uint32_t *)arg
) = p
->cku_xid
;
690 case CLSET_NODELAYONERR
:
694 if (*((bool_t
*)arg
) == TRUE
) {
695 p
->cku_nodelayonerr
= TRUE
;
698 if (*((bool_t
*)arg
) == FALSE
) {
699 p
->cku_nodelayonerr
= FALSE
;
704 case CLGET_NODELAYONERR
:
708 *((bool_t
*)arg
) = p
->cku_nodelayonerr
;
711 case CLSET_BINDRESVPORT
:
715 if (*(int *)arg
!= 1 && *(int *)arg
!= 0)
718 p
->cku_useresvport
= *(int *)arg
;
722 case CLGET_BINDRESVPORT
:
726 *(int *)arg
= p
->cku_useresvport
;
736 * Destroy rpc handle. Frees the space used for output buffer,
737 * private data, and handle structure.
740 clnt_cots_kdestroy(CLIENT
*h
)
742 /* LINTED pointer alignment */
743 cku_private_t
*p
= htop(h
);
744 calllist_t
*call
= &p
->cku_call
;
746 RPCLOG(8, "clnt_cots_kdestroy h: %p\n", (void *)h
);
747 RPCLOG(8, "clnt_cots_kdestroy h: xid=0x%x\n", p
->cku_xid
);
749 if (p
->cku_flags
& CKU_ONQUEUE
) {
750 RPCLOG(64, "clnt_cots_kdestroy h: removing call for xid 0x%x "
751 "from dispatch list\n", p
->cku_xid
);
752 call_table_remove(call
);
755 if (call
->call_reply
)
756 freemsg(call
->call_reply
);
757 cv_destroy(&call
->call_cv
);
758 mutex_destroy(&call
->call_lock
);
760 kmem_free(p
->cku_srcaddr
.buf
, p
->cku_srcaddr
.maxlen
);
761 kmem_free(p
->cku_addr
.buf
, p
->cku_addr
.maxlen
);
762 kmem_free(p
, sizeof (*p
));
765 static int clnt_cots_pulls
;
766 #define RM_HDR_SIZE 4 /* record mark header size */
769 * Call remote procedure.
771 static enum clnt_stat
772 clnt_cots_kcallit(CLIENT
*h
, rpcproc_t procnum
, xdrproc_t xdr_args
,
773 caddr_t argsp
, xdrproc_t xdr_results
, caddr_t resultsp
, struct timeval wait
)
775 /* LINTED pointer alignment */
776 cku_private_t
*p
= htop(h
);
777 calllist_t
*call
= &p
->cku_call
;
779 struct rpc_msg reply_msg
;
784 struct netbuf
*retryaddr
;
785 struct cm_xprt
*cm_entry
= NULL
;
787 int len
, waitsecs
, max_waitsecs
;
789 int refreshes
= REFRESHES
;
792 enum clnt_stat status
;
793 struct timeval cwait
;
794 bool_t delay_first
= FALSE
;
797 RPCLOG(2, "clnt_cots_kcallit, procnum %u\n", procnum
);
798 COTSRCSTAT_INCR(p
->cku_stats
, rccalls
);
800 RPCLOG(2, "clnt_cots_kcallit: wait.tv_sec: %ld\n", wait
.tv_sec
);
801 RPCLOG(2, "clnt_cots_kcallit: wait.tv_usec: %ld\n", wait
.tv_usec
);
804 * Look out for zero length timeouts. We don't want to
805 * wait zero seconds for a connection to be established.
807 if (wait
.tv_sec
< clnt_cots_min_conntout
) {
808 cwait
.tv_sec
= clnt_cots_min_conntout
;
810 RPCLOG(8, "clnt_cots_kcallit: wait.tv_sec (%ld) too low,",
812 RPCLOG(8, " setting to: %d\n", clnt_cots_min_conntout
);
819 connmgr_release(cm_entry
);
826 * If the call is not a retry, allocate a new xid and cache it
827 * for future retries.
829 * Treat call as a retry for purposes of binding the source
830 * port only if we actually attempted to send anything on
833 if (p
->cku_xid
== 0) {
834 p
->cku_xid
= alloc_xid();
835 call
->call_zoneid
= rpc_zoneid();
838 * We need to ASSERT here that our xid != 0 because this
839 * determines whether or not our call record gets placed on
840 * the hash table or the linked list. By design, we mandate
841 * that RPC calls over cots must have xid's != 0, so we can
842 * ensure proper management of the hash table.
844 ASSERT(p
->cku_xid
!= 0);
847 p
->cku_flags
&= ~CKU_SENT
;
849 if (p
->cku_flags
& CKU_ONQUEUE
) {
850 RPCLOG(8, "clnt_cots_kcallit: new call, dequeuing old"
851 " one (%p)\n", (void *)call
);
852 call_table_remove(call
);
853 p
->cku_flags
&= ~CKU_ONQUEUE
;
854 RPCLOG(64, "clnt_cots_kcallit: removing call from "
855 "dispatch list because xid was zero (now 0x%x)\n",
859 if (call
->call_reply
!= NULL
) {
860 freemsg(call
->call_reply
);
861 call
->call_reply
= NULL
;
863 } else if (p
->cku_srcaddr
.buf
== NULL
|| p
->cku_srcaddr
.len
== 0) {
866 } else if (p
->cku_flags
& CKU_SENT
) {
867 retryaddr
= &p
->cku_srcaddr
;
871 * Bug ID 1246045: Nothing was sent, so set retryaddr to
872 * NULL and let connmgr_get() bind to any source port it
878 RPCLOG(64, "clnt_cots_kcallit: xid = 0x%x", p
->cku_xid
);
879 RPCLOG(64, " flags = 0x%x\n", p
->cku_flags
);
881 p
->cku_err
.re_status
= RPC_TIMEDOUT
;
882 p
->cku_err
.re_errno
= p
->cku_err
.re_terrno
= 0;
884 cm_entry
= connmgr_wrapget(retryaddr
, &cwait
, p
);
886 if (cm_entry
== NULL
) {
887 RPCLOG(1, "clnt_cots_kcallit: can't connect status %s\n",
888 clnt_sperrno(p
->cku_err
.re_status
));
891 * The reasons why we fail to create a connection are
892 * varied. In most cases we don't want the caller to
893 * immediately retry. This could have one or more
894 * bad effects. This includes flooding the net with
895 * connect requests to ports with no listener; a hard
896 * kernel loop due to all the "reserved" TCP ports being
902 * Even if we end up returning EINTR, we still count a
903 * a "can't connect", because the connection manager
904 * might have been committed to waiting for or timing out on
907 COTSRCSTAT_INCR(p
->cku_stats
, rccantconn
);
908 switch (p
->cku_err
.re_status
) {
910 p
->cku_err
.re_errno
= EINTR
;
913 * No need to delay because a UNIX signal(2)
914 * interrupted us. The caller likely won't
915 * retry the CLNT_CALL() and even if it does,
916 * we assume the caller knows what it is doing.
922 p
->cku_err
.re_errno
= ETIMEDOUT
;
925 * No need to delay because timed out already
926 * on the connection request and assume that the
927 * transport time out is longer than our minimum
928 * timeout, or least not too much smaller.
933 case RPC_SYSTEMERROR
:
936 * We want to delay here because a transient
937 * system error has a better chance of going away
938 * if we delay a bit. If it's not transient, then
939 * we don't want end up in a hard kernel loop
942 ASSERT(p
->cku_err
.re_errno
!= 0);
946 case RPC_CANTCONNECT
:
948 * RPC_CANTCONNECT is set on T_ERROR_ACK which
949 * implies some error down in the TCP layer or
950 * below. If cku_nodelayonerror is set then we
951 * assume the caller knows not to try too hard.
953 RPCLOG0(8, "clnt_cots_kcallit: connection failed,");
954 RPCLOG0(8, " re_status=RPC_CANTCONNECT,");
955 RPCLOG(8, " re_errno=%d,", p
->cku_err
.re_errno
);
956 RPCLOG(8, " cku_nodelayonerr=%d", p
->cku_nodelayonerr
);
957 if (p
->cku_nodelayonerr
== TRUE
)
960 p
->cku_err
.re_errno
= EIO
;
966 * We want to delay here because we likely
967 * got a refused connection.
969 if (p
->cku_err
.re_errno
== 0)
970 p
->cku_err
.re_errno
= EIO
;
972 RPCLOG(1, "clnt_cots_kcallit: transport failed: %d\n",
973 p
->cku_err
.re_errno
);
979 * We delay here because it is better to err
980 * on the side of caution. If we got here then
981 * status could have been RPC_SUCCESS, but we
982 * know that we did not get a connection, so
983 * force the rpc status to RPC_CANTCONNECT.
985 p
->cku_err
.re_status
= RPC_CANTCONNECT
;
986 p
->cku_err
.re_errno
= EIO
;
989 if (delay_first
== TRUE
)
990 ticks
= clnt_cots_min_tout
* drv_usectohz(1000000);
995 * If we've never sent any request on this connection (send count
996 * is zero, or the connection has been reset), cache the
997 * the connection's create time and send a request (possibly a retry)
999 if ((p
->cku_flags
& CKU_SENT
) == 0 ||
1000 p
->cku_ctime
!= cm_entry
->x_ctime
) {
1001 p
->cku_ctime
= cm_entry
->x_ctime
;
1003 } else if ((p
->cku_flags
& CKU_SENT
) && (p
->cku_flags
& CKU_ONQUEUE
) &&
1004 (call
->call_reply
!= NULL
||
1005 p
->cku_recv_attempts
< clnt_cots_maxrecv
)) {
1008 * If we've sent a request and our call is on the dispatch
1009 * queue and we haven't made too many receive attempts, then
1010 * don't re-send, just receive.
1012 p
->cku_recv_attempts
++;
1017 * Now we create the RPC request in a STREAMS message. We have to do
1018 * this after the call to connmgr_get so that we have the correct
1019 * TIDU size for the transport.
1021 tidu_size
= cm_entry
->x_tidu_size
;
1022 len
= MSG_OFFSET
+ MAX(tidu_size
, RM_HDR_SIZE
+ WIRE_HDR_SIZE
);
1024 while ((mp
= allocb(len
, BPRI_MED
)) == NULL
) {
1025 if (strwaitbuf(len
, BPRI_MED
)) {
1026 p
->cku_err
.re_status
= RPC_SYSTEMERROR
;
1027 p
->cku_err
.re_errno
= ENOSR
;
1028 COTSRCSTAT_INCR(p
->cku_stats
, rcnomem
);
1032 xdrs
= &p
->cku_outxdr
;
1033 xdrmblk_init(xdrs
, mp
, XDR_ENCODE
, tidu_size
);
1034 mpsize
= MBLKSIZE(mp
);
1035 ASSERT(mpsize
>= len
);
1036 ASSERT(mp
->b_rptr
== mp
->b_datap
->db_base
);
1039 * If the size of mblk is not appreciably larger than what we
1040 * asked, then resize the mblk to exactly len bytes. The reason for
1041 * this: suppose len is 1600 bytes, the tidu is 1460 bytes
1042 * (from TCP over ethernet), and the arguments to the RPC require
1043 * 2800 bytes. Ideally we want the protocol to render two
1044 * ~1400 byte segments over the wire. However if allocb() gives us a 2k
1045 * mblk, and we allocate a second mblk for the remainder, the protocol
1046 * module may generate 3 segments over the wire:
1047 * 1460 bytes for the first, 448 (2048 - 1600) for the second, and
1048 * 892 for the third. If we "waste" 448 bytes in the first mblk,
1049 * the XDR encoding will generate two ~1400 byte mblks, and the
1050 * protocol module is more likely to produce properly sized segments.
1052 if ((mpsize
>> 1) <= len
)
1053 mp
->b_rptr
+= (mpsize
- len
);
1056 * Adjust b_rptr to reserve space for the non-data protocol headers
1057 * any downstream modules might like to add, and for the
1058 * record marking header.
1060 mp
->b_rptr
+= (MSG_OFFSET
+ RM_HDR_SIZE
);
1062 if (h
->cl_auth
->ah_cred
.oa_flavor
!= RPCSEC_GSS
) {
1063 /* Copy in the preserialized RPC header information. */
1064 bcopy(p
->cku_rpchdr
, mp
->b_rptr
, WIRE_HDR_SIZE
);
1066 /* Use XDR_SETPOS() to set the b_wptr to past the RPC header. */
1067 XDR_SETPOS(xdrs
, (uint_t
)(mp
->b_rptr
- mp
->b_datap
->db_base
+
1070 ASSERT((mp
->b_wptr
- mp
->b_rptr
) == WIRE_HDR_SIZE
);
1072 /* Serialize the procedure number and the arguments. */
1073 if ((!XDR_PUTINT32(xdrs
, (int32_t *)&procnum
)) ||
1074 (!AUTH_MARSHALL(h
->cl_auth
, xdrs
, p
->cku_cred
)) ||
1075 (!(*xdr_args
)(xdrs
, argsp
))) {
1076 p
->cku_err
.re_status
= RPC_CANTENCODEARGS
;
1077 p
->cku_err
.re_errno
= EIO
;
1081 (*(uint32_t *)(mp
->b_rptr
)) = p
->cku_xid
;
1083 uint32_t *uproc
= (uint32_t *)&p
->cku_rpchdr
[WIRE_HDR_SIZE
];
1084 IXDR_PUT_U_INT32(uproc
, procnum
);
1086 (*(uint32_t *)(&p
->cku_rpchdr
[0])) = p
->cku_xid
;
1088 /* Use XDR_SETPOS() to set the b_wptr. */
1089 XDR_SETPOS(xdrs
, (uint_t
)(mp
->b_rptr
- mp
->b_datap
->db_base
));
1091 /* Serialize the procedure number and the arguments. */
1092 if (!AUTH_WRAP(h
->cl_auth
, p
->cku_rpchdr
, WIRE_HDR_SIZE
+4,
1093 xdrs
, xdr_args
, argsp
)) {
1094 p
->cku_err
.re_status
= RPC_CANTENCODEARGS
;
1095 p
->cku_err
.re_errno
= EIO
;
1100 RPCLOG(2, "clnt_cots_kcallit: connected, sending call, tidu_size %d\n",
1103 wq
= cm_entry
->x_wq
;
1107 status
= clnt_dispatch_send(wq
, mp
, call
, p
->cku_xid
,
1108 (p
->cku_flags
& CKU_ONQUEUE
));
1110 if ((status
== RPC_CANTSEND
) && (call
->call_reason
== ENOBUFS
)) {
1112 * QFULL condition, allow some time for queue to drain
1113 * and try again. Give up after waiting for all timeout
1114 * specified for the call, or zone is going away.
1116 max_waitsecs
= wait
.tv_sec
? wait
.tv_sec
: clnt_cots_min_tout
;
1117 if ((waitsecs
++ < max_waitsecs
) &&
1118 !(zone_status_get(curproc
->p_zone
) >=
1119 ZONE_IS_SHUTTING_DOWN
)) {
1121 /* wait 1 sec for queue to drain */
1122 if (clnt_delay(drv_usectohz(1000000),
1123 h
->cl_nosignal
) == EINTR
) {
1124 p
->cku_err
.re_errno
= EINTR
;
1125 p
->cku_err
.re_status
= RPC_INTR
;
1131 goto dispatch_again
;
1133 p
->cku_err
.re_status
= status
;
1134 p
->cku_err
.re_errno
= call
->call_reason
;
1135 DTRACE_PROBE(krpc__e__clntcots__kcallit__cantsend
);
1141 /* adjust timeout to account for time wait to send */
1142 wait
.tv_sec
-= waitsecs
;
1143 if (wait
.tv_sec
< 0) {
1144 /* pick up reply on next retry */
1147 DTRACE_PROBE2(clnt_cots__sendwait
, CLIENT
*, h
,
1151 RPCLOG(64, "clnt_cots_kcallit: sent call for xid 0x%x\n",
1152 (uint_t
)p
->cku_xid
);
1153 p
->cku_flags
= (CKU_ONQUEUE
|CKU_SENT
);
1154 p
->cku_recv_attempts
= 1;
1157 time_sent
= ddi_get_lbolt();
1161 * Wait for a reply or a timeout. If there is no error or timeout,
1162 * (both indicated by call_status), call->call_reply will contain
1163 * the RPC reply message.
1166 mutex_enter(&call
->call_lock
);
1168 if (call
->call_status
== RPC_TIMEDOUT
) {
1170 * Indicate that the lwp is not to be stopped while waiting
1171 * for this network traffic. This is to avoid deadlock while
1172 * debugging a process via /proc and also to avoid recursive
1173 * mutex_enter()s due to NFS page faults while stopping
1174 * (NFS holds locks when it calls here).
1176 clock_t cv_wait_ret
;
1180 klwp_t
*lwp
= ttolwp(curthread
);
1185 oldlbolt
= ddi_get_lbolt();
1186 timout
= wait
.tv_sec
* drv_usectohz(1000000) +
1187 drv_usectohz(wait
.tv_usec
) + oldlbolt
;
1189 * Iterate until the call_status is changed to something
1190 * other that RPC_TIMEDOUT, or if cv_timedwait_sig() returns
1191 * something <=0 zero. The latter means that we timed
1195 while ((cv_wait_ret
= cv_timedwait(&call
->call_cv
,
1196 &call
->call_lock
, timout
)) > 0 &&
1197 call
->call_status
== RPC_TIMEDOUT
)
1200 while ((cv_wait_ret
= cv_timedwait_sig(
1202 &call
->call_lock
, timout
)) > 0 &&
1203 call
->call_status
== RPC_TIMEDOUT
)
1206 switch (cv_wait_ret
) {
1209 * If we got out of the above loop with
1210 * cv_timedwait_sig() returning 0, then we were
1211 * interrupted regardless what call_status is.
1216 /* cv_timedwait_sig() timed out */
1221 * We were cv_signaled(). If we didn't
1222 * get a successful call_status and returned
1223 * before time expired, delay up to clnt_cots_min_tout
1224 * seconds so that the caller doesn't immediately
1225 * try to call us again and thus force the
1226 * same condition that got us here (such
1227 * as a RPC_XPRTFAILED due to the server not
1228 * listening on the end-point.
1230 if (call
->call_status
!= RPC_SUCCESS
) {
1234 curlbolt
= ddi_get_lbolt();
1235 ticks
= clnt_cots_min_tout
*
1236 drv_usectohz(1000000);
1237 diff
= curlbolt
- oldlbolt
;
1251 * Get the reply message, if any. This will be freed at the end
1252 * whether or not an error occurred.
1254 mp
= call
->call_reply
;
1255 call
->call_reply
= NULL
;
1258 * call_err is the error info when the call is on dispatch queue.
1259 * cku_err is the error info returned to the caller.
1260 * Sync cku_err with call_err for local message processing.
1263 status
= call
->call_status
;
1264 p
->cku_err
= call
->call_err
;
1265 mutex_exit(&call
->call_lock
);
1267 if (status
!= RPC_SUCCESS
) {
1270 now
= ddi_get_lbolt();
1272 COTSRCSTAT_INCR(p
->cku_stats
, rcintrs
);
1273 p
->cku_err
.re_status
= RPC_INTR
;
1274 p
->cku_err
.re_errno
= EINTR
;
1275 RPCLOG(1, "clnt_cots_kcallit: xid 0x%x",
1277 RPCLOG(1, "signal interrupted at %ld", now
);
1278 RPCLOG(1, ", was sent at %ld\n", time_sent
);
1280 COTSRCSTAT_INCR(p
->cku_stats
, rctimeouts
);
1281 p
->cku_err
.re_errno
= ETIMEDOUT
;
1282 RPCLOG(1, "clnt_cots_kcallit: timed out at %ld",
1284 RPCLOG(1, ", was sent at %ld\n", time_sent
);
1288 case RPC_XPRTFAILED
:
1289 if (p
->cku_err
.re_errno
== 0)
1290 p
->cku_err
.re_errno
= EIO
;
1292 RPCLOG(1, "clnt_cots_kcallit: transport failed: %d\n",
1293 p
->cku_err
.re_errno
);
1296 case RPC_SYSTEMERROR
:
1297 ASSERT(p
->cku_err
.re_errno
);
1298 RPCLOG(1, "clnt_cots_kcallit: system error: %d\n",
1299 p
->cku_err
.re_errno
);
1303 p
->cku_err
.re_status
= RPC_SYSTEMERROR
;
1304 p
->cku_err
.re_errno
= EIO
;
1305 RPCLOG(1, "clnt_cots_kcallit: error: %s\n",
1306 clnt_sperrno(status
));
1309 if (p
->cku_err
.re_status
!= RPC_TIMEDOUT
) {
1311 if (p
->cku_flags
& CKU_ONQUEUE
) {
1312 call_table_remove(call
);
1313 p
->cku_flags
&= ~CKU_ONQUEUE
;
1316 RPCLOG(64, "clnt_cots_kcallit: non TIMEOUT so xid 0x%x "
1317 "taken off dispatch list\n", p
->cku_xid
);
1318 if (call
->call_reply
) {
1319 freemsg(call
->call_reply
);
1320 call
->call_reply
= NULL
;
1322 } else if (wait
.tv_sec
!= 0) {
1324 * We've sent the request over TCP and so we have
1325 * every reason to believe it will get
1326 * delivered. In which case returning a timeout is not
1329 if (p
->cku_progress
== TRUE
&&
1330 p
->cku_recv_attempts
< clnt_cots_maxrecv
) {
1331 p
->cku_err
.re_status
= RPC_INPROGRESS
;
1337 xdrs
= &p
->cku_inxdr
;
1338 xdrmblk_init(xdrs
, mp
, XDR_DECODE
, 0);
1340 reply_msg
.rm_direction
= REPLY
;
1341 reply_msg
.rm_reply
.rp_stat
= MSG_ACCEPTED
;
1342 reply_msg
.acpted_rply
.ar_stat
= SUCCESS
;
1344 reply_msg
.acpted_rply
.ar_verf
= _null_auth
;
1346 * xdr_results will be done in AUTH_UNWRAP.
1348 reply_msg
.acpted_rply
.ar_results
.where
= NULL
;
1349 reply_msg
.acpted_rply
.ar_results
.proc
= xdr_void
;
1351 if (xdr_replymsg(xdrs
, &reply_msg
)) {
1352 enum clnt_stat re_status
;
1354 _seterr_reply(&reply_msg
, &p
->cku_err
);
1356 re_status
= p
->cku_err
.re_status
;
1357 if (re_status
== RPC_SUCCESS
) {
1359 * Reply is good, check auth.
1361 if (!AUTH_VALIDATE(h
->cl_auth
,
1362 &reply_msg
.acpted_rply
.ar_verf
)) {
1363 COTSRCSTAT_INCR(p
->cku_stats
, rcbadverfs
);
1364 RPCLOG0(1, "clnt_cots_kcallit: validation "
1367 (void) xdr_rpc_free_verifier(xdrs
, &reply_msg
);
1368 mutex_enter(&call
->call_lock
);
1369 if (call
->call_reply
== NULL
)
1370 call
->call_status
= RPC_TIMEDOUT
;
1371 mutex_exit(&call
->call_lock
);
1373 } else if (!AUTH_UNWRAP(h
->cl_auth
, xdrs
,
1374 xdr_results
, resultsp
)) {
1375 RPCLOG0(1, "clnt_cots_kcallit: validation "
1376 "failure (unwrap)\n");
1377 p
->cku_err
.re_status
= RPC_CANTDECODERES
;
1378 p
->cku_err
.re_errno
= EIO
;
1381 /* set errno in case we can't recover */
1382 if (re_status
!= RPC_VERSMISMATCH
&&
1383 re_status
!= RPC_AUTHERROR
&&
1384 re_status
!= RPC_PROGVERSMISMATCH
)
1385 p
->cku_err
.re_errno
= EIO
;
1387 if (re_status
== RPC_AUTHERROR
) {
1389 * Maybe our credential need to be refreshed
1393 * There is the potential that the
1394 * cm_entry has/will be marked dead,
1395 * so drop the connection altogether,
1396 * force REFRESH to establish new
1399 connmgr_cancelconn(cm_entry
);
1403 (void) xdr_rpc_free_verifier(xdrs
,
1406 if (p
->cku_flags
& CKU_ONQUEUE
) {
1407 call_table_remove(call
);
1408 p
->cku_flags
&= ~CKU_ONQUEUE
;
1411 "clnt_cots_kcallit: AUTH_ERROR, xid"
1412 " 0x%x removed off dispatch list\n",
1414 if (call
->call_reply
) {
1415 freemsg(call
->call_reply
);
1416 call
->call_reply
= NULL
;
1419 if ((refreshes
> 0) &&
1420 AUTH_REFRESH(h
->cl_auth
, &reply_msg
,
1426 COTSRCSTAT_INCR(p
->cku_stats
,
1428 COTSRCSTAT_INCR(p
->cku_stats
,
1434 * We have used the client handle to
1435 * do an AUTH_REFRESH and the RPC status may
1436 * be set to RPC_SUCCESS; Let's make sure to
1437 * set it to RPC_AUTHERROR.
1439 p
->cku_err
.re_status
= RPC_AUTHERROR
;
1442 * Map recoverable and unrecoverable
1443 * authentication errors to appropriate errno
1445 switch (p
->cku_err
.re_why
) {
1448 * This could be a failure where the
1449 * server requires use of a reserved
1450 * port, check and optionally set the
1451 * client handle useresvport trying
1452 * one more time. Next go round we
1453 * fall out with the tooweak error.
1455 if (p
->cku_useresvport
!= 1) {
1456 p
->cku_useresvport
= 1;
1465 case AUTH_INVALIDRESP
:
1467 case RPCSEC_GSS_NOCRED
:
1468 case RPCSEC_GSS_FAILED
:
1469 p
->cku_err
.re_errno
= EACCES
;
1471 case AUTH_REJECTEDCRED
:
1472 case AUTH_REJECTEDVERF
:
1473 default: p
->cku_err
.re_errno
= EIO
;
1476 RPCLOG(1, "clnt_cots_kcallit : authentication"
1477 " failed with RPC_AUTHERROR of type %d\n",
1478 (int)p
->cku_err
.re_why
);
1483 /* reply didn't decode properly. */
1484 p
->cku_err
.re_status
= RPC_CANTDECODERES
;
1485 p
->cku_err
.re_errno
= EIO
;
1486 RPCLOG0(1, "clnt_cots_kcallit: decode failure\n");
1489 (void) xdr_rpc_free_verifier(xdrs
, &reply_msg
);
1491 if (p
->cku_flags
& CKU_ONQUEUE
) {
1492 call_table_remove(call
);
1493 p
->cku_flags
&= ~CKU_ONQUEUE
;
1496 RPCLOG(64, "clnt_cots_kcallit: xid 0x%x taken off dispatch list",
1498 RPCLOG(64, " status is %s\n", clnt_sperrno(p
->cku_err
.re_status
));
1501 connmgr_release(cm_entry
);
1505 if ((p
->cku_flags
& CKU_ONQUEUE
) == 0 && call
->call_reply
) {
1506 freemsg(call
->call_reply
);
1507 call
->call_reply
= NULL
;
1509 if (p
->cku_err
.re_status
!= RPC_SUCCESS
) {
1510 RPCLOG0(1, "clnt_cots_kcallit: tail-end failure\n");
1511 COTSRCSTAT_INCR(p
->cku_stats
, rcbadcalls
);
1515 * No point in delaying if the zone is going away.
1517 if (delay_first
== TRUE
&&
1518 !(zone_status_get(curproc
->p_zone
) >= ZONE_IS_SHUTTING_DOWN
)) {
1519 if (clnt_delay(ticks
, h
->cl_nosignal
) == EINTR
) {
1520 p
->cku_err
.re_errno
= EINTR
;
1521 p
->cku_err
.re_status
= RPC_INTR
;
1524 return (p
->cku_err
.re_status
);
1528 * Kinit routine for cots. This sets up the correct operations in
1529 * the client handle, as the handle may have previously been a clts
1530 * handle, and clears the xid field so there is no way a new call
1531 * could be mistaken for a retry. It also sets in the handle the
1532 * information that is passed at create/kinit time but needed at
1533 * call time, as cots creates the transport at call time - device,
1534 * address of the server, protocol family.
1537 clnt_cots_kinit(CLIENT
*h
, dev_t dev
, int family
, struct netbuf
*addr
,
1538 int max_msgsize
, cred_t
*cred
)
1540 /* LINTED pointer alignment */
1541 cku_private_t
*p
= htop(h
);
1542 calllist_t
*call
= &p
->cku_call
;
1544 h
->cl_ops
= &tcp_ops
;
1545 if (p
->cku_flags
& CKU_ONQUEUE
) {
1546 call_table_remove(call
);
1547 p
->cku_flags
&= ~CKU_ONQUEUE
;
1548 RPCLOG(64, "clnt_cots_kinit: removing call for xid 0x%x from"
1549 " dispatch list\n", p
->cku_xid
);
1552 if (call
->call_reply
!= NULL
) {
1553 freemsg(call
->call_reply
);
1554 call
->call_reply
= NULL
;
1557 call
->call_bucket
= NULL
;
1558 call
->call_hash
= 0;
1561 * We don't clear cku_flags here, because clnt_cots_kcallit()
1562 * takes care of handling the cku_flags reset.
1565 p
->cku_device
= dev
;
1566 p
->cku_addrfmly
= family
;
1569 if (p
->cku_addr
.maxlen
< addr
->len
) {
1570 if (p
->cku_addr
.maxlen
!= 0 && p
->cku_addr
.buf
!= NULL
)
1571 kmem_free(p
->cku_addr
.buf
, p
->cku_addr
.maxlen
);
1572 p
->cku_addr
.buf
= kmem_zalloc(addr
->maxlen
, KM_SLEEP
);
1573 p
->cku_addr
.maxlen
= addr
->maxlen
;
1576 p
->cku_addr
.len
= addr
->len
;
1577 bcopy(addr
->buf
, p
->cku_addr
.buf
, addr
->len
);
1580 * If the current sanity check size in rpcmod is smaller
1581 * than the size needed, then increase the sanity check.
1583 if (max_msgsize
!= 0 && clnt_max_msg_sizep
!= NULL
&&
1584 max_msgsize
> *clnt_max_msg_sizep
) {
1585 mutex_enter(&clnt_max_msg_lock
);
1586 if (max_msgsize
> *clnt_max_msg_sizep
)
1587 *clnt_max_msg_sizep
= max_msgsize
;
1588 mutex_exit(&clnt_max_msg_lock
);
1593 * ksettimers is a no-op for cots, with the exception of setting the xid.
1597 clnt_cots_ksettimers(CLIENT
*h
, struct rpc_timers
*t
, struct rpc_timers
*all
,
1598 int minimum
, void (*feedback
)(int, int, caddr_t
), caddr_t arg
,
1601 /* LINTED pointer alignment */
1602 cku_private_t
*p
= htop(h
);
1606 COTSRCSTAT_INCR(p
->cku_stats
, rctimers
);
1610 extern void rpc_poptimod(struct vnode
*);
1611 extern int kstr_push(struct vnode
*, char *);
1614 conn_kstat_update(kstat_t
*ksp
, int rw
)
1616 struct cm_xprt
*cm_entry
;
1617 struct cm_kstat_xprt
*cm_ksp_data
;
1621 if (rw
== KSTAT_WRITE
)
1623 if (ksp
== NULL
|| ksp
->ks_private
== NULL
)
1625 cm_entry
= (struct cm_xprt
*)ksp
->ks_private
;
1626 cm_ksp_data
= (struct cm_kstat_xprt
*)ksp
->ks_data
;
1628 cm_ksp_data
->x_wq
.value
.ui32
= (uint32_t)(uintptr_t)cm_entry
->x_wq
;
1629 cm_ksp_data
->x_family
.value
.ui32
= cm_entry
->x_family
;
1630 cm_ksp_data
->x_rdev
.value
.ui32
= (uint32_t)cm_entry
->x_rdev
;
1631 cm_ksp_data
->x_time
.value
.ui32
= cm_entry
->x_time
;
1632 cm_ksp_data
->x_ref
.value
.ui32
= cm_entry
->x_ref
;
1633 cm_ksp_data
->x_state
.value
.ui32
= cm_entry
->x_state_flags
;
1635 if (cm_entry
->x_server
.buf
) {
1636 fbuf
= cm_ksp_data
->x_server
.value
.str
.addr
.ptr
;
1637 if (cm_entry
->x_family
== AF_INET
&&
1638 cm_entry
->x_server
.len
==
1639 sizeof (struct sockaddr_in
)) {
1640 struct sockaddr_in
*sa
;
1641 sa
= (struct sockaddr_in
*)
1642 cm_entry
->x_server
.buf
;
1643 b
= (uchar_t
*)&sa
->sin_addr
;
1644 (void) sprintf(fbuf
,
1645 "%03d.%03d.%03d.%03d", b
[0] & 0xFF, b
[1] & 0xFF,
1646 b
[2] & 0xFF, b
[3] & 0xFF);
1647 cm_ksp_data
->x_port
.value
.ui32
=
1648 (uint32_t)sa
->sin_port
;
1649 } else if (cm_entry
->x_family
== AF_INET6
&&
1650 cm_entry
->x_server
.len
>=
1651 sizeof (struct sockaddr_in6
)) {
1652 /* extract server IP address & port */
1653 struct sockaddr_in6
*sin6
;
1654 sin6
= (struct sockaddr_in6
*)cm_entry
->x_server
.buf
;
1655 (void) kinet_ntop6((uchar_t
*)&sin6
->sin6_addr
, fbuf
,
1657 cm_ksp_data
->x_port
.value
.ui32
= sin6
->sin6_port
;
1659 struct sockaddr_in
*sa
;
1661 sa
= (struct sockaddr_in
*)cm_entry
->x_server
.buf
;
1662 b
= (uchar_t
*)&sa
->sin_addr
;
1663 (void) sprintf(fbuf
,
1664 "%03d.%03d.%03d.%03d", b
[0] & 0xFF, b
[1] & 0xFF,
1665 b
[2] & 0xFF, b
[3] & 0xFF);
1667 KSTAT_NAMED_STR_BUFLEN(&cm_ksp_data
->x_server
) =
1676 * We want a version of delay which is interruptible by a UNIX signal
1677 * Return EINTR if an interrupt occured.
1680 clnt_delay(clock_t ticks
, bool_t nosignal
)
1682 if (nosignal
== TRUE
) {
1686 return (delay_sig(ticks
));
1690 * Wait for a connection until a timeout, or until we are
1691 * signalled that there has been a connection state change.
1693 static enum clnt_stat
1694 connmgr_cwait(struct cm_xprt
*cm_entry
, const struct timeval
*waitp
,
1698 clock_t timout
, cv_stat
;
1699 enum clnt_stat clstat
;
1700 unsigned int old_state
;
1702 ASSERT(MUTEX_HELD(&connmgr_lock
));
1704 * We wait for the transport connection to be made, or an
1705 * indication that it could not be made.
1707 clstat
= RPC_TIMEDOUT
;
1708 interrupted
= FALSE
;
1710 old_state
= cm_entry
->x_state_flags
;
1712 * Now loop until cv_timedwait{_sig} returns because of
1713 * a signal(0) or timeout(-1) or cv_signal(>0). But it may be
1714 * cv_signalled for various other reasons too. So loop
1715 * until there is a state change on the connection.
1718 timout
= waitp
->tv_sec
* drv_usectohz(1000000) +
1719 drv_usectohz(waitp
->tv_usec
) + ddi_get_lbolt();
1722 while ((cv_stat
= cv_timedwait(&cm_entry
->x_conn_cv
,
1723 &connmgr_lock
, timout
)) > 0 &&
1724 cm_entry
->x_state_flags
== old_state
)
1727 while ((cv_stat
= cv_timedwait_sig(&cm_entry
->x_conn_cv
,
1728 &connmgr_lock
, timout
)) > 0 &&
1729 cm_entry
->x_state_flags
== old_state
)
1732 if (cv_stat
== 0) /* got intr signal? */
1736 if ((cm_entry
->x_state_flags
& (X_BADSTATES
|X_CONNECTED
)) ==
1738 clstat
= RPC_SUCCESS
;
1740 if (interrupted
== TRUE
)
1742 RPCLOG(1, "connmgr_cwait: can't connect, error: %s\n",
1743 clnt_sperrno(clstat
));
1750 * Primary interface for how RPC grabs a connection.
1752 static struct cm_xprt
*
1754 struct netbuf
*retryaddr
,
1755 const struct timeval
*waitp
,
1758 struct cm_xprt
*cm_entry
;
1760 cm_entry
= connmgr_get(retryaddr
, waitp
, &p
->cku_addr
, p
->cku_addrfmly
,
1761 &p
->cku_srcaddr
, &p
->cku_err
, p
->cku_device
,
1762 p
->cku_client
.cl_nosignal
, p
->cku_useresvport
, p
->cku_cred
);
1764 if (cm_entry
== NULL
) {
1766 * Re-map the call status to RPC_INTR if the err code is
1767 * EINTR. This can happen if calls status is RPC_TLIERROR.
1768 * However, don't re-map if signalling has been turned off.
1769 * XXX Really need to create a separate thread whenever
1770 * there isn't an existing connection.
1772 if (p
->cku_err
.re_errno
== EINTR
) {
1773 if (p
->cku_client
.cl_nosignal
== TRUE
)
1774 p
->cku_err
.re_errno
= EIO
;
1776 p
->cku_err
.re_status
= RPC_INTR
;
1784 * Obtains a transport to the server specified in addr. If a suitable transport
1785 * does not already exist in the list of cached transports, a new connection
1786 * is created, connected, and added to the list. The connection is for sending
1787 * only - the reply message may come back on another transport connection.
1789 * To implement round-robin load balancing with multiple client connections,
1790 * the last entry on the list is always selected. Once the entry is selected
1791 * it's re-inserted to the head of the list.
1793 static struct cm_xprt
*
1795 struct netbuf
*retryaddr
,
1796 const struct timeval
*waitp
, /* changed to a ptr to converse stack */
1797 struct netbuf
*destaddr
,
1799 struct netbuf
*srcaddr
,
1800 struct rpc_err
*rpcerr
,
1806 struct cm_xprt
*cm_entry
;
1807 struct cm_xprt
*lru_entry
;
1808 struct cm_xprt
**cmp
, **prev
;
1815 zoneid_t zoneid
= rpc_zoneid();
1818 * If the call is not a retry, look for a transport entry that
1819 * goes to the server of interest.
1821 mutex_enter(&connmgr_lock
);
1823 if (retryaddr
== NULL
) {
1826 cm_entry
= lru_entry
= NULL
;
1828 prev
= cmp
= &cm_hd
;
1829 while ((cm_entry
= *cmp
) != NULL
) {
1830 ASSERT(cm_entry
!= cm_entry
->x_next
);
1832 * Garbage collect conections that are marked
1833 * for needs disconnect.
1835 if (cm_entry
->x_needdis
) {
1836 CONN_HOLD(cm_entry
);
1837 connmgr_dis_and_wait(cm_entry
);
1838 connmgr_release(cm_entry
);
1840 * connmgr_lock could have been
1841 * dropped for the disconnect
1842 * processing so start over.
1848 * Garbage collect the dead connections that have
1849 * no threads working on them.
1851 if ((cm_entry
->x_state_flags
& (X_DEAD
|X_THREAD
)) ==
1853 mutex_enter(&cm_entry
->x_lock
);
1854 if (cm_entry
->x_ref
!= 0) {
1859 cmp
= &cm_entry
->x_next
;
1860 mutex_exit(&cm_entry
->x_lock
);
1863 mutex_exit(&cm_entry
->x_lock
);
1864 *cmp
= cm_entry
->x_next
;
1865 mutex_exit(&connmgr_lock
);
1866 connmgr_close(cm_entry
);
1867 mutex_enter(&connmgr_lock
);
1872 if ((cm_entry
->x_state_flags
& X_BADSTATES
) == 0 &&
1873 cm_entry
->x_zoneid
== zoneid
&&
1874 cm_entry
->x_rdev
== device
&&
1875 destaddr
->len
== cm_entry
->x_server
.len
&&
1876 bcmp(destaddr
->buf
, cm_entry
->x_server
.buf
,
1877 destaddr
->len
) == 0) {
1879 * If the matching entry isn't connected,
1880 * attempt to reconnect it.
1882 if (cm_entry
->x_connected
== FALSE
) {
1884 * We don't go through trying
1885 * to find the least recently
1886 * used connected because
1887 * connmgr_reconnect() briefly
1888 * dropped the connmgr_lock,
1889 * allowing a window for our
1890 * accounting to be messed up.
1891 * In any case, a re-connected
1892 * connection is as good as
1895 return (connmgr_wrapconnect(cm_entry
,
1896 waitp
, destaddr
, addrfmly
, srcaddr
,
1897 rpcerr
, TRUE
, nosignal
, cr
));
1901 /* keep track of the last entry */
1902 lru_entry
= cm_entry
;
1905 cmp
= &cm_entry
->x_next
;
1908 if (i
> clnt_max_conns
) {
1909 RPCLOG(8, "connmgr_get: too many conns, dooming entry"
1910 " %p\n", (void *)lru_entry
->x_tiptr
);
1911 lru_entry
->x_doomed
= TRUE
;
1916 * If we are at the maximum number of connections to
1917 * the server, hand back the least recently used one.
1919 if (i
== clnt_max_conns
) {
1921 * Copy into the handle the source address of
1922 * the connection, which we will use in case of
1925 if (srcaddr
->len
!= lru_entry
->x_src
.len
) {
1926 if (srcaddr
->len
> 0)
1927 kmem_free(srcaddr
->buf
,
1929 srcaddr
->buf
= kmem_zalloc(
1930 lru_entry
->x_src
.len
, KM_SLEEP
);
1931 srcaddr
->maxlen
= srcaddr
->len
=
1932 lru_entry
->x_src
.len
;
1934 bcopy(lru_entry
->x_src
.buf
, srcaddr
->buf
, srcaddr
->len
);
1935 RPCLOG(2, "connmgr_get: call going out on %p\n",
1937 lru_entry
->x_time
= ddi_get_lbolt();
1938 CONN_HOLD(lru_entry
);
1940 if ((i
> 1) && (prev
!= &cm_hd
)) {
1942 * remove and re-insert entry at head of list.
1944 *prev
= lru_entry
->x_next
;
1945 lru_entry
->x_next
= cm_hd
;
1949 mutex_exit(&connmgr_lock
);
1955 * This is the retry case (retryaddr != NULL). Retries must
1956 * be sent on the same source port as the original call.
1960 * Walk the list looking for a connection with a source address
1961 * that matches the retry address.
1965 while ((cm_entry
= *cmp
) != NULL
) {
1966 ASSERT(cm_entry
!= cm_entry
->x_next
);
1969 * determine if this connection matches the passed
1970 * in retry address. If it does not match, advance
1971 * to the next element on the list.
1973 if (zoneid
!= cm_entry
->x_zoneid
||
1974 device
!= cm_entry
->x_rdev
||
1975 retryaddr
->len
!= cm_entry
->x_src
.len
||
1976 bcmp(retryaddr
->buf
, cm_entry
->x_src
.buf
,
1977 retryaddr
->len
) != 0) {
1978 cmp
= &cm_entry
->x_next
;
1982 * Garbage collect conections that are marked
1983 * for needs disconnect.
1985 if (cm_entry
->x_needdis
) {
1986 CONN_HOLD(cm_entry
);
1987 connmgr_dis_and_wait(cm_entry
);
1988 connmgr_release(cm_entry
);
1990 * connmgr_lock could have been
1991 * dropped for the disconnect
1992 * processing so start over.
1994 goto start_retry_loop
;
1997 * Garbage collect the dead connections that have
1998 * no threads working on them.
2000 if ((cm_entry
->x_state_flags
& (X_DEAD
|X_THREAD
)) ==
2002 mutex_enter(&cm_entry
->x_lock
);
2003 if (cm_entry
->x_ref
!= 0) {
2008 cmp
= &cm_entry
->x_next
;
2009 mutex_exit(&cm_entry
->x_lock
);
2012 mutex_exit(&cm_entry
->x_lock
);
2013 *cmp
= cm_entry
->x_next
;
2014 mutex_exit(&connmgr_lock
);
2015 connmgr_close(cm_entry
);
2016 mutex_enter(&connmgr_lock
);
2017 goto start_retry_loop
;
2021 * Sanity check: if the connection with our source
2022 * port is going to some other server, something went
2023 * wrong, as we never delete connections (i.e. release
2024 * ports) unless they have been idle. In this case,
2025 * it is probably better to send the call out using
2026 * a new source address than to fail it altogether,
2027 * since that port may never be released.
2029 if (destaddr
->len
!= cm_entry
->x_server
.len
||
2030 bcmp(destaddr
->buf
, cm_entry
->x_server
.buf
,
2031 destaddr
->len
) != 0) {
2032 RPCLOG(1, "connmgr_get: tiptr %p"
2033 " is going to a different server"
2034 " with the port that belongs"
2035 " to us!\n", (void *)cm_entry
->x_tiptr
);
2041 * If the connection of interest is not connected and we
2042 * can't reconnect it, then the server is probably
2043 * still down. Return NULL to the caller and let it
2044 * retry later if it wants to. We have a delay so the
2045 * machine doesn't go into a tight retry loop. If the
2046 * entry was already connected, or the reconnected was
2047 * successful, return this entry.
2049 if (cm_entry
->x_connected
== FALSE
) {
2050 return (connmgr_wrapconnect(cm_entry
,
2051 waitp
, destaddr
, addrfmly
, NULL
,
2052 rpcerr
, TRUE
, nosignal
, cr
));
2054 CONN_HOLD(cm_entry
);
2056 cm_entry
->x_time
= ddi_get_lbolt();
2057 mutex_exit(&connmgr_lock
);
2058 RPCLOG(2, "connmgr_get: found old "
2059 "transport %p for retry\n",
2066 * We cannot find an entry in the list for this retry.
2067 * Either the entry has been removed temporarily to be
2068 * reconnected by another thread, or the original call
2069 * got a port but never got connected,
2070 * and hence the transport never got put in the
2071 * list. Fall through to the "create new connection" code -
2072 * the former case will fail there trying to rebind the port,
2073 * and the later case (and any other pathological cases) will
2074 * rebind and reconnect and not hang the client machine.
2076 RPCLOG0(8, "connmgr_get: no entry in list for retry\n");
2079 * Set up a transport entry in the connection manager's list.
2081 cm_entry
= (struct cm_xprt
*)
2082 kmem_zalloc(sizeof (struct cm_xprt
), KM_SLEEP
);
2084 cm_entry
->x_server
.buf
= kmem_zalloc(destaddr
->len
, KM_SLEEP
);
2085 bcopy(destaddr
->buf
, cm_entry
->x_server
.buf
, destaddr
->len
);
2086 cm_entry
->x_server
.len
= cm_entry
->x_server
.maxlen
= destaddr
->len
;
2088 cm_entry
->x_state_flags
= X_THREAD
;
2089 cm_entry
->x_ref
= 1;
2090 cm_entry
->x_family
= addrfmly
;
2091 cm_entry
->x_rdev
= device
;
2092 cm_entry
->x_zoneid
= zoneid
;
2093 mutex_init(&cm_entry
->x_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
2094 cv_init(&cm_entry
->x_cv
, NULL
, CV_DEFAULT
, NULL
);
2095 cv_init(&cm_entry
->x_conn_cv
, NULL
, CV_DEFAULT
, NULL
);
2096 cv_init(&cm_entry
->x_dis_cv
, NULL
, CV_DEFAULT
, NULL
);
2099 * Note that we add this partially initialized entry to the
2100 * connection list. This is so that we don't have connections to
2103 * Note that x_src is not initialized at this point. This is because
2104 * retryaddr might be NULL in which case x_src is whatever
2105 * t_kbind/bindresvport gives us. If another thread wants a
2106 * connection to the same server, seemingly we have an issue, but we
2107 * don't. If the other thread comes in with retryaddr == NULL, then it
2108 * will never look at x_src, and it will end up waiting in
2109 * connmgr_cwait() for the first thread to finish the connection
2110 * attempt. If the other thread comes in with retryaddr != NULL, then
2111 * that means there was a request sent on a connection, in which case
2112 * the the connection should already exist. Thus the first thread
2113 * never gets here ... it finds the connection it its server in the
2116 * But even if theory is wrong, in the retryaddr != NULL case, the 2nd
2117 * thread will skip us because x_src.len == 0.
2119 cm_entry
->x_next
= cm_hd
;
2121 mutex_exit(&connmgr_lock
);
2124 * Either we didn't find an entry to the server of interest, or we
2125 * don't have the maximum number of connections to that server -
2126 * create a new connection.
2128 RPCLOG0(8, "connmgr_get: creating new connection\n");
2129 rpcerr
->re_status
= RPC_TLIERROR
;
2131 i
= t_kopen(NULL
, device
, FREAD
|FWRITE
|FNDELAY
, &tiptr
, zone_kcred());
2133 RPCLOG(1, "connmgr_get: can't open cots device, error %d\n", i
);
2134 rpcerr
->re_errno
= i
;
2135 connmgr_cancelconn(cm_entry
);
2138 rpc_poptimod(tiptr
->fp
->f_vnode
);
2140 if (i
= strioctl(tiptr
->fp
->f_vnode
, I_PUSH
, (intptr_t)"rpcmod", 0,
2141 K_TO_K
, kcred
, &retval
)) {
2142 RPCLOG(1, "connmgr_get: can't push cots module, %d\n", i
);
2143 (void) t_kclose(tiptr
, 1);
2144 rpcerr
->re_errno
= i
;
2145 connmgr_cancelconn(cm_entry
);
2149 if (i
= strioctl(tiptr
->fp
->f_vnode
, RPC_CLIENT
, 0, 0, K_TO_K
,
2151 RPCLOG(1, "connmgr_get: can't set client status with cots "
2153 (void) t_kclose(tiptr
, 1);
2154 rpcerr
->re_errno
= i
;
2155 connmgr_cancelconn(cm_entry
);
2159 mutex_enter(&connmgr_lock
);
2161 wq
= tiptr
->fp
->f_vnode
->v_stream
->sd_wrq
->q_next
;
2162 cm_entry
->x_wq
= wq
;
2164 mutex_exit(&connmgr_lock
);
2166 if (i
= strioctl(tiptr
->fp
->f_vnode
, I_PUSH
, (intptr_t)"timod", 0,
2167 K_TO_K
, kcred
, &retval
)) {
2168 RPCLOG(1, "connmgr_get: can't push timod, %d\n", i
);
2169 (void) t_kclose(tiptr
, 1);
2170 rpcerr
->re_errno
= i
;
2171 connmgr_cancelconn(cm_entry
);
2176 * If the caller has not specified reserved port usage then
2177 * take the system default.
2179 if (useresvport
== -1)
2180 useresvport
= clnt_cots_do_bindresvport
;
2182 if ((useresvport
|| retryaddr
!= NULL
) &&
2183 (addrfmly
== AF_INET
|| addrfmly
== AF_INET6
)) {
2184 bool_t alloc_src
= FALSE
;
2186 if (srcaddr
->len
!= destaddr
->len
) {
2187 kmem_free(srcaddr
->buf
, srcaddr
->maxlen
);
2188 srcaddr
->buf
= kmem_zalloc(destaddr
->len
, KM_SLEEP
);
2189 srcaddr
->maxlen
= destaddr
->len
;
2190 srcaddr
->len
= destaddr
->len
;
2194 if ((i
= bindresvport(tiptr
, retryaddr
, srcaddr
, TRUE
)) != 0) {
2195 (void) t_kclose(tiptr
, 1);
2196 RPCLOG(1, "connmgr_get: couldn't bind, retryaddr: "
2197 "%p\n", (void *)retryaddr
);
2200 * 1225408: If we allocated a source address, then it
2201 * is either garbage or all zeroes. In that case
2202 * we need to clear srcaddr.
2204 if (alloc_src
== TRUE
) {
2205 kmem_free(srcaddr
->buf
, srcaddr
->maxlen
);
2206 srcaddr
->maxlen
= srcaddr
->len
= 0;
2207 srcaddr
->buf
= NULL
;
2209 rpcerr
->re_errno
= i
;
2210 connmgr_cancelconn(cm_entry
);
2214 if ((i
= t_kbind(tiptr
, NULL
, NULL
)) != 0) {
2215 RPCLOG(1, "clnt_cots_kcreate: t_kbind: %d\n", i
);
2216 (void) t_kclose(tiptr
, 1);
2217 rpcerr
->re_errno
= i
;
2218 connmgr_cancelconn(cm_entry
);
2225 * Keep the kernel stack lean. Don't move this call
2226 * declaration to the top of this function because a
2227 * call is declared in connmgr_wrapconnect()
2231 bzero(&call
, sizeof (call
));
2232 cv_init(&call
.call_cv
, NULL
, CV_DEFAULT
, NULL
);
2235 * This is a bound end-point so don't close it's stream.
2237 connected
= connmgr_connect(cm_entry
, wq
, destaddr
, addrfmly
,
2238 &call
, &tidu_size
, FALSE
, waitp
, nosignal
, cr
);
2239 *rpcerr
= call
.call_err
;
2240 cv_destroy(&call
.call_cv
);
2244 mutex_enter(&connmgr_lock
);
2247 * Set up a transport entry in the connection manager's list.
2249 cm_entry
->x_src
.buf
= kmem_zalloc(srcaddr
->len
, KM_SLEEP
);
2250 bcopy(srcaddr
->buf
, cm_entry
->x_src
.buf
, srcaddr
->len
);
2251 cm_entry
->x_src
.len
= cm_entry
->x_src
.maxlen
= srcaddr
->len
;
2253 cm_entry
->x_tiptr
= tiptr
;
2254 cm_entry
->x_time
= ddi_get_lbolt();
2256 if (tiptr
->tp_info
.servtype
== T_COTS_ORD
)
2257 cm_entry
->x_ordrel
= TRUE
;
2259 cm_entry
->x_ordrel
= FALSE
;
2261 cm_entry
->x_tidu_size
= tidu_size
;
2263 if (cm_entry
->x_early_disc
) {
2265 * We need to check if a disconnect request has come
2266 * while we are connected, if so, then we need to
2267 * set rpcerr->re_status appropriately before returning
2270 if (rpcerr
->re_status
== RPC_SUCCESS
)
2271 rpcerr
->re_status
= RPC_XPRTFAILED
;
2272 cm_entry
->x_connected
= FALSE
;
2274 cm_entry
->x_connected
= connected
;
2277 * There could be a discrepancy here such that
2278 * x_early_disc is TRUE yet connected is TRUE as well
2279 * and the connection is actually connected. In that case
2280 * lets be conservative and declare the connection as not
2283 cm_entry
->x_early_disc
= FALSE
;
2284 cm_entry
->x_needdis
= (cm_entry
->x_connected
== FALSE
);
2285 cm_entry
->x_ctime
= ddi_get_lbolt();
2288 * Notify any threads waiting that the connection attempt is done.
2290 cm_entry
->x_thread
= FALSE
;
2291 cv_broadcast(&cm_entry
->x_conn_cv
);
2293 if (cm_entry
->x_connected
== FALSE
) {
2294 mutex_exit(&connmgr_lock
);
2295 connmgr_release(cm_entry
);
2299 mutex_exit(&connmgr_lock
);
2305 * Keep the cm_xprt entry on the connecton list when making a connection. This
2306 * is to prevent multiple connections to a slow server from appearing.
2307 * We use the bit field x_thread to tell if a thread is doing a connection
2308 * which keeps other interested threads from messing with connection.
2309 * Those other threads just wait if x_thread is set.
2311 * If x_thread is not set, then we do the actual work of connecting via
2312 * connmgr_connect().
2314 * mutex convention: called with connmgr_lock held, returns with it released.
2316 static struct cm_xprt
*
2317 connmgr_wrapconnect(
2318 struct cm_xprt
*cm_entry
,
2319 const struct timeval
*waitp
,
2320 struct netbuf
*destaddr
,
2322 struct netbuf
*srcaddr
,
2323 struct rpc_err
*rpcerr
,
2328 ASSERT(MUTEX_HELD(&connmgr_lock
));
2330 * Hold this entry as we are about to drop connmgr_lock.
2332 CONN_HOLD(cm_entry
);
2335 * If there is a thread already making a connection for us, then
2336 * wait for it to complete the connection.
2338 if (cm_entry
->x_thread
== TRUE
) {
2339 rpcerr
->re_status
= connmgr_cwait(cm_entry
, waitp
, nosignal
);
2341 if (rpcerr
->re_status
!= RPC_SUCCESS
) {
2342 mutex_exit(&connmgr_lock
);
2343 connmgr_release(cm_entry
);
2350 cm_entry
->x_thread
= TRUE
;
2352 while (cm_entry
->x_needrel
== TRUE
) {
2353 cm_entry
->x_needrel
= FALSE
;
2355 connmgr_sndrel(cm_entry
);
2356 delay(drv_usectohz(1000000));
2358 mutex_enter(&connmgr_lock
);
2362 * If we need to send a T_DISCON_REQ, send one.
2364 connmgr_dis_and_wait(cm_entry
);
2366 mutex_exit(&connmgr_lock
);
2368 bzero(&call
, sizeof (call
));
2369 cv_init(&call
.call_cv
, NULL
, CV_DEFAULT
, NULL
);
2371 connected
= connmgr_connect(cm_entry
, cm_entry
->x_wq
,
2372 destaddr
, addrfmly
, &call
, &cm_entry
->x_tidu_size
,
2373 reconnect
, waitp
, nosignal
, cr
);
2375 *rpcerr
= call
.call_err
;
2376 cv_destroy(&call
.call_cv
);
2378 mutex_enter(&connmgr_lock
);
2381 if (cm_entry
->x_early_disc
) {
2383 * We need to check if a disconnect request has come
2384 * while we are connected, if so, then we need to
2385 * set rpcerr->re_status appropriately before returning
2388 if (rpcerr
->re_status
== RPC_SUCCESS
)
2389 rpcerr
->re_status
= RPC_XPRTFAILED
;
2390 cm_entry
->x_connected
= FALSE
;
2392 cm_entry
->x_connected
= connected
;
2395 * There could be a discrepancy here such that
2396 * x_early_disc is TRUE yet connected is TRUE as well
2397 * and the connection is actually connected. In that case
2398 * lets be conservative and declare the connection as not
2402 cm_entry
->x_early_disc
= FALSE
;
2403 cm_entry
->x_needdis
= (cm_entry
->x_connected
== FALSE
);
2407 * connmgr_connect() may have given up before the connection
2408 * actually timed out. So ensure that before the next
2409 * connection attempt we do a disconnect.
2411 cm_entry
->x_ctime
= ddi_get_lbolt();
2412 cm_entry
->x_thread
= FALSE
;
2414 cv_broadcast(&cm_entry
->x_conn_cv
);
2416 if (cm_entry
->x_connected
== FALSE
) {
2417 mutex_exit(&connmgr_lock
);
2418 connmgr_release(cm_entry
);
2423 if (srcaddr
!= NULL
) {
2425 * Copy into the handle the
2426 * source address of the
2427 * connection, which we will use
2428 * in case of a later retry.
2430 if (srcaddr
->len
!= cm_entry
->x_src
.len
) {
2431 if (srcaddr
->maxlen
> 0)
2432 kmem_free(srcaddr
->buf
, srcaddr
->maxlen
);
2433 srcaddr
->buf
= kmem_zalloc(cm_entry
->x_src
.len
,
2435 srcaddr
->maxlen
= srcaddr
->len
=
2436 cm_entry
->x_src
.len
;
2438 bcopy(cm_entry
->x_src
.buf
, srcaddr
->buf
, srcaddr
->len
);
2440 cm_entry
->x_time
= ddi_get_lbolt();
2441 mutex_exit(&connmgr_lock
);
2446 * If we need to send a T_DISCON_REQ, send one.
2449 connmgr_dis_and_wait(struct cm_xprt
*cm_entry
)
2451 ASSERT(MUTEX_HELD(&connmgr_lock
));
2453 while (cm_entry
->x_needdis
== TRUE
) {
2454 RPCLOG(8, "connmgr_dis_and_wait: need "
2455 "T_DISCON_REQ for connection 0x%p\n",
2457 cm_entry
->x_needdis
= FALSE
;
2458 cm_entry
->x_waitdis
= TRUE
;
2460 connmgr_snddis(cm_entry
);
2462 mutex_enter(&connmgr_lock
);
2465 if (cm_entry
->x_waitdis
== TRUE
) {
2468 RPCLOG(8, "connmgr_dis_and_wait waiting for "
2469 "T_DISCON_REQ's ACK for connection %p\n",
2472 timout
= clnt_cots_min_conntout
* drv_usectohz(1000000);
2475 * The TPI spec says that the T_DISCON_REQ
2476 * will get acknowledged, but in practice
2477 * the ACK may never get sent. So don't
2480 (void) cv_reltimedwait(&cm_entry
->x_dis_cv
,
2481 &connmgr_lock
, timout
, TR_CLOCK_TICK
);
2484 * If we got the ACK, break. If we didn't,
2485 * then send another T_DISCON_REQ.
2487 if (cm_entry
->x_waitdis
== FALSE
) {
2490 RPCLOG(8, "connmgr_dis_and_wait: did"
2491 "not get T_DISCON_REQ's ACK for "
2492 "connection %p\n", (void *)cm_entry
);
2493 cm_entry
->x_needdis
= TRUE
;
2499 connmgr_cancelconn(struct cm_xprt
*cm_entry
)
2502 * Mark the connection table entry as dead; the next thread that
2503 * goes through connmgr_release() will notice this and deal with it.
2505 mutex_enter(&connmgr_lock
);
2506 cm_entry
->x_dead
= TRUE
;
2509 * Notify any threads waiting for the connection that it isn't
2512 cm_entry
->x_thread
= FALSE
;
2513 cv_broadcast(&cm_entry
->x_conn_cv
);
2514 mutex_exit(&connmgr_lock
);
2516 connmgr_release(cm_entry
);
2520 connmgr_close(struct cm_xprt
*cm_entry
)
2522 mutex_enter(&cm_entry
->x_lock
);
2523 while (cm_entry
->x_ref
!= 0) {
2525 * Must be a noninterruptible wait.
2527 cv_wait(&cm_entry
->x_cv
, &cm_entry
->x_lock
);
2530 if (cm_entry
->x_tiptr
!= NULL
)
2531 (void) t_kclose(cm_entry
->x_tiptr
, 1);
2533 mutex_exit(&cm_entry
->x_lock
);
2534 if (cm_entry
->x_ksp
!= NULL
) {
2535 mutex_enter(&connmgr_lock
);
2536 cm_entry
->x_ksp
->ks_private
= NULL
;
2537 mutex_exit(&connmgr_lock
);
2540 * Must free the buffer we allocated for the
2541 * server address in the update function
2543 if (((struct cm_kstat_xprt
*)(cm_entry
->x_ksp
->ks_data
))->
2544 x_server
.value
.str
.addr
.ptr
!= NULL
)
2545 kmem_free(((struct cm_kstat_xprt
*)(cm_entry
->x_ksp
->
2546 ks_data
))->x_server
.value
.str
.addr
.ptr
,
2548 kmem_free(cm_entry
->x_ksp
->ks_data
,
2549 cm_entry
->x_ksp
->ks_data_size
);
2550 kstat_delete(cm_entry
->x_ksp
);
2553 mutex_destroy(&cm_entry
->x_lock
);
2554 cv_destroy(&cm_entry
->x_cv
);
2555 cv_destroy(&cm_entry
->x_conn_cv
);
2556 cv_destroy(&cm_entry
->x_dis_cv
);
2558 if (cm_entry
->x_server
.buf
!= NULL
)
2559 kmem_free(cm_entry
->x_server
.buf
, cm_entry
->x_server
.maxlen
);
2560 if (cm_entry
->x_src
.buf
!= NULL
)
2561 kmem_free(cm_entry
->x_src
.buf
, cm_entry
->x_src
.maxlen
);
2562 kmem_free(cm_entry
, sizeof (struct cm_xprt
));
2566 * Called by KRPC after sending the call message to release the connection
2570 connmgr_release(struct cm_xprt
*cm_entry
)
2572 mutex_enter(&cm_entry
->x_lock
);
2574 if (cm_entry
->x_ref
== 0)
2575 cv_signal(&cm_entry
->x_cv
);
2576 mutex_exit(&cm_entry
->x_lock
);
2580 * Set TCP receive and xmit buffer size for RPC connections.
2583 connmgr_setbufsz(calllist_t
*e
, queue_t
*wq
, cred_t
*cr
)
2588 if (rpc_default_tcp_bufsz
)
2592 * Only set new buffer size if it's larger than the system
2593 * default buffer size. If smaller buffer size is needed
2594 * then use /etc/system to set rpc_default_tcp_bufsz to 1.
2596 ok
= connmgr_getopt_int(wq
, SOL_SOCKET
, SO_RCVBUF
, &val
, e
, cr
);
2597 if ((ok
== TRUE
) && (val
< rpc_send_bufsz
)) {
2598 ok
= connmgr_setopt_int(wq
, SOL_SOCKET
, SO_RCVBUF
,
2599 rpc_send_bufsz
, e
, cr
);
2600 DTRACE_PROBE2(krpc__i__connmgr_rcvbufsz
,
2601 int, ok
, calllist_t
*, e
);
2604 ok
= connmgr_getopt_int(wq
, SOL_SOCKET
, SO_SNDBUF
, &val
, e
, cr
);
2605 if ((ok
== TRUE
) && (val
< rpc_recv_bufsz
)) {
2606 ok
= connmgr_setopt_int(wq
, SOL_SOCKET
, SO_SNDBUF
,
2607 rpc_recv_bufsz
, e
, cr
);
2608 DTRACE_PROBE2(krpc__i__connmgr_sndbufsz
,
2609 int, ok
, calllist_t
*, e
);
2615 * Given an open stream, connect to the remote. Returns true if connected,
2620 struct cm_xprt
*cm_entry
,
2622 struct netbuf
*addr
,
2627 const struct timeval
*waitp
,
2632 struct T_conn_req
*tcr
;
2633 struct T_info_ack
*tinfo
;
2634 int interrupted
, error
;
2635 int tidu_size
, kstat_instance
;
2637 /* if it's a reconnect, flush any lingering data messages */
2639 (void) putctl1(wq
, M_FLUSH
, FLUSHRW
);
2642 * Note: if the receiver uses SCM_UCRED/getpeerucred the pid will
2645 mp
= allocb_cred(sizeof (*tcr
) + addr
->len
, cr
, NOPID
);
2648 * This is unfortunate, but we need to look up the stats for
2649 * this zone to increment the "memory allocation failed"
2650 * counter. curproc->p_zone is safe since we're initiating a
2651 * connection and not in some strange streams context.
2653 struct rpcstat
*rpcstat
;
2655 rpcstat
= zone_getspecific(rpcstat_zone_key
, rpc_zone());
2656 ASSERT(rpcstat
!= NULL
);
2658 RPCLOG0(1, "connmgr_connect: cannot alloc mp for "
2659 "sending conn request\n");
2660 COTSRCSTAT_INCR(rpcstat
->rpc_cots_client
, rcnomem
);
2661 e
->call_status
= RPC_SYSTEMERROR
;
2662 e
->call_reason
= ENOSR
;
2666 /* Set TCP buffer size for RPC connections if needed */
2667 if (addrfmly
== AF_INET
|| addrfmly
== AF_INET6
)
2668 (void) connmgr_setbufsz(e
, wq
, cr
);
2670 mp
->b_datap
->db_type
= M_PROTO
;
2671 tcr
= (struct T_conn_req
*)mp
->b_rptr
;
2672 bzero(tcr
, sizeof (*tcr
));
2673 tcr
->PRIM_type
= T_CONN_REQ
;
2674 tcr
->DEST_length
= addr
->len
;
2675 tcr
->DEST_offset
= sizeof (struct T_conn_req
);
2676 mp
->b_wptr
= mp
->b_rptr
+ sizeof (*tcr
);
2678 bcopy(addr
->buf
, mp
->b_wptr
, tcr
->DEST_length
);
2679 mp
->b_wptr
+= tcr
->DEST_length
;
2681 RPCLOG(8, "connmgr_connect: sending conn request on queue "
2683 RPCLOG(8, " call %p\n", (void *)wq
);
2685 * We use the entry in the handle that is normally used for
2686 * waiting for RPC replies to wait for the connection accept.
2688 if (clnt_dispatch_send(wq
, mp
, e
, 0, 0) != RPC_SUCCESS
) {
2689 DTRACE_PROBE(krpc__e__connmgr__connect__cantsend
);
2694 mutex_enter(&clnt_pending_lock
);
2697 * We wait for the transport connection to be made, or an
2698 * indication that it could not be made.
2703 * waitforack should have been called with T_OK_ACK, but the
2704 * present implementation needs to be passed T_INFO_ACK to
2707 error
= waitforack(e
, T_INFO_ACK
, waitp
, nosignal
);
2710 if (zone_status_get(curproc
->p_zone
) >= ZONE_IS_EMPTY
) {
2712 * No time to lose; we essentially have been signaled to
2719 RPCLOG0(8, "connmgr_connect: giving up "
2720 "on connection attempt; "
2721 "clnt_dispatch notifyconn "
2722 "diagnostic 'no one waiting for "
2723 "connection' should not be "
2727 e
->call_prev
->call_next
= e
->call_next
;
2729 clnt_pending
= e
->call_next
;
2731 e
->call_next
->call_prev
= e
->call_prev
;
2732 mutex_exit(&clnt_pending_lock
);
2734 if (e
->call_status
!= RPC_SUCCESS
|| error
!= 0) {
2736 e
->call_status
= RPC_INTR
;
2737 else if (error
== ETIME
)
2738 e
->call_status
= RPC_TIMEDOUT
;
2739 else if (error
== EPROTO
) {
2740 e
->call_status
= RPC_SYSTEMERROR
;
2741 e
->call_reason
= EPROTO
;
2744 RPCLOG(8, "connmgr_connect: can't connect, status: "
2745 "%s\n", clnt_sperrno(e
->call_status
));
2747 if (e
->call_reply
) {
2748 freemsg(e
->call_reply
);
2749 e
->call_reply
= NULL
;
2755 * The result of the "connection accept" is a T_info_ack
2756 * in the call_reply field.
2758 ASSERT(e
->call_reply
!= NULL
);
2760 e
->call_reply
= NULL
;
2761 tinfo
= (struct T_info_ack
*)mp
->b_rptr
;
2763 tidu_size
= tinfo
->TIDU_size
;
2764 tidu_size
-= (tidu_size
% BYTES_PER_XDR_UNIT
);
2765 if (tidu_size
> COTS_DEFAULT_ALLOCSIZE
|| (tidu_size
<= 0))
2766 tidu_size
= COTS_DEFAULT_ALLOCSIZE
;
2767 *tidu_ptr
= tidu_size
;
2772 * Set up the pertinent options. NODELAY is so the transport doesn't
2773 * buffer up RPC messages on either end. This may not be valid for
2774 * all transports. Failure to set this option is not cause to
2775 * bail out so we return success anyway. Note that lack of NODELAY
2776 * or some other way to flush the message on both ends will cause
2777 * lots of retries and terrible performance.
2779 if (addrfmly
== AF_INET
|| addrfmly
== AF_INET6
) {
2780 (void) connmgr_setopt(wq
, IPPROTO_TCP
, TCP_NODELAY
, e
, cr
);
2781 if (e
->call_status
== RPC_XPRTFAILED
)
2786 * Since we have a connection, we now need to figure out if
2787 * we need to create a kstat. If x_ksp is not NULL then we
2788 * are reusing a connection and so we do not need to create
2789 * another kstat -- lets just return.
2791 if (cm_entry
->x_ksp
!= NULL
)
2795 * We need to increment rpc_kstat_instance atomically to prevent
2796 * two kstats being created with the same instance.
2798 kstat_instance
= atomic_inc_32_nv((uint32_t *)&rpc_kstat_instance
);
2800 if ((cm_entry
->x_ksp
= kstat_create_zone("unix", kstat_instance
,
2801 "rpc_cots_connections", "rpc", KSTAT_TYPE_NAMED
,
2802 (uint_t
)(sizeof (cm_kstat_xprt_t
) / sizeof (kstat_named_t
)),
2803 KSTAT_FLAG_VIRTUAL
, cm_entry
->x_zoneid
)) == NULL
) {
2807 cm_entry
->x_ksp
->ks_lock
= &connmgr_lock
;
2808 cm_entry
->x_ksp
->ks_private
= cm_entry
;
2809 cm_entry
->x_ksp
->ks_data_size
= ((INET6_ADDRSTRLEN
* sizeof (char))
2810 + sizeof (cm_kstat_template
));
2811 cm_entry
->x_ksp
->ks_data
= kmem_alloc(cm_entry
->x_ksp
->ks_data_size
,
2813 bcopy(&cm_kstat_template
, cm_entry
->x_ksp
->ks_data
,
2814 cm_entry
->x_ksp
->ks_data_size
);
2815 ((struct cm_kstat_xprt
*)(cm_entry
->x_ksp
->ks_data
))->
2816 x_server
.value
.str
.addr
.ptr
=
2817 kmem_alloc(INET6_ADDRSTRLEN
, KM_SLEEP
);
2819 cm_entry
->x_ksp
->ks_update
= conn_kstat_update
;
2820 kstat_install(cm_entry
->x_ksp
);
2825 * Verify that the specified offset falls within the mblk and
2826 * that the resulting pointer is aligned.
2827 * Returns NULL if not.
2829 * code from fs/sockfs/socksubr.c
2832 connmgr_opt_getoff(mblk_t
*mp
, t_uscalar_t offset
,
2833 t_uscalar_t length
, uint_t align_size
)
2835 uintptr_t ptr1
, ptr2
;
2837 ASSERT(mp
&& mp
->b_wptr
>= mp
->b_rptr
);
2838 ptr1
= (uintptr_t)mp
->b_rptr
+ offset
;
2839 ptr2
= (uintptr_t)ptr1
+ length
;
2840 if (ptr1
< (uintptr_t)mp
->b_rptr
|| ptr2
> (uintptr_t)mp
->b_wptr
) {
2843 if ((ptr1
& (align_size
- 1)) != 0) {
2846 return ((void *)ptr1
);
2850 connmgr_getopt_int(queue_t
*wq
, int level
, int name
, int *val
,
2851 calllist_t
*e
, cred_t
*cr
)
2854 struct opthdr
*opt
, *opt_res
;
2855 struct T_optmgmt_req
*tor
;
2856 struct T_optmgmt_ack
*opt_ack
;
2857 struct timeval waitp
;
2860 mp
= allocb_cred(sizeof (struct T_optmgmt_req
) +
2861 sizeof (struct opthdr
) + sizeof (int), cr
, NOPID
);
2865 mp
->b_datap
->db_type
= M_PROTO
;
2866 tor
= (struct T_optmgmt_req
*)(mp
->b_rptr
);
2867 tor
->PRIM_type
= T_SVR4_OPTMGMT_REQ
;
2868 tor
->MGMT_flags
= T_CURRENT
;
2869 tor
->OPT_length
= sizeof (struct opthdr
) + sizeof (int);
2870 tor
->OPT_offset
= sizeof (struct T_optmgmt_req
);
2872 opt
= (struct opthdr
*)(mp
->b_rptr
+ sizeof (struct T_optmgmt_req
));
2875 opt
->len
= sizeof (int);
2876 mp
->b_wptr
+= sizeof (struct T_optmgmt_req
) + sizeof (struct opthdr
) +
2880 * We will use this connection regardless
2881 * of whether or not the option is readable.
2883 if (clnt_dispatch_send(wq
, mp
, e
, 0, 0) != RPC_SUCCESS
) {
2884 DTRACE_PROBE(krpc__e__connmgr__getopt__cantsend
);
2889 mutex_enter(&clnt_pending_lock
);
2891 waitp
.tv_sec
= clnt_cots_min_conntout
;
2893 error
= waitforack(e
, T_OPTMGMT_ACK
, &waitp
, 1);
2896 e
->call_prev
->call_next
= e
->call_next
;
2898 clnt_pending
= e
->call_next
;
2900 e
->call_next
->call_prev
= e
->call_prev
;
2901 mutex_exit(&clnt_pending_lock
);
2903 /* get reply message */
2905 e
->call_reply
= NULL
;
2907 if ((!mp
) || (e
->call_status
!= RPC_SUCCESS
) || (error
!= 0)) {
2909 DTRACE_PROBE4(krpc__e__connmgr_getopt
, int, name
,
2910 int, e
->call_status
, int, error
, mblk_t
*, mp
);
2917 opt_ack
= (struct T_optmgmt_ack
*)mp
->b_rptr
;
2918 opt_res
= (struct opthdr
*)connmgr_opt_getoff(mp
, opt_ack
->OPT_offset
,
2919 opt_ack
->OPT_length
, __TPI_ALIGN_SIZE
);
2922 DTRACE_PROBE4(krpc__e__connmgr_optres
, mblk_t
*, mp
, int, name
,
2923 int, opt_ack
->OPT_offset
, int, opt_ack
->OPT_length
);
2927 *val
= *(int *)&opt_res
[1];
2929 DTRACE_PROBE2(connmgr_getopt__ok
, int, name
, int, *val
);
2936 * Called by connmgr_connect to set an option on the new stream.
2939 connmgr_setopt_int(queue_t
*wq
, int level
, int name
, int val
,
2940 calllist_t
*e
, cred_t
*cr
)
2944 struct T_optmgmt_req
*tor
;
2945 struct timeval waitp
;
2948 mp
= allocb_cred(sizeof (struct T_optmgmt_req
) +
2949 sizeof (struct opthdr
) + sizeof (int), cr
, NOPID
);
2951 RPCLOG0(1, "connmgr_setopt: cannot alloc mp for option "
2956 mp
->b_datap
->db_type
= M_PROTO
;
2957 tor
= (struct T_optmgmt_req
*)(mp
->b_rptr
);
2958 tor
->PRIM_type
= T_SVR4_OPTMGMT_REQ
;
2959 tor
->MGMT_flags
= T_NEGOTIATE
;
2960 tor
->OPT_length
= sizeof (struct opthdr
) + sizeof (int);
2961 tor
->OPT_offset
= sizeof (struct T_optmgmt_req
);
2963 opt
= (struct opthdr
*)(mp
->b_rptr
+ sizeof (struct T_optmgmt_req
));
2966 opt
->len
= sizeof (int);
2967 *(int *)((char *)opt
+ sizeof (*opt
)) = val
;
2968 mp
->b_wptr
+= sizeof (struct T_optmgmt_req
) + sizeof (struct opthdr
) +
2972 * We will use this connection regardless
2973 * of whether or not the option is settable.
2975 if (clnt_dispatch_send(wq
, mp
, e
, 0, 0) != RPC_SUCCESS
) {
2976 DTRACE_PROBE(krpc__e__connmgr__setopt__cantsend
);
2981 mutex_enter(&clnt_pending_lock
);
2983 waitp
.tv_sec
= clnt_cots_min_conntout
;
2985 error
= waitforack(e
, T_OPTMGMT_ACK
, &waitp
, 1);
2988 e
->call_prev
->call_next
= e
->call_next
;
2990 clnt_pending
= e
->call_next
;
2992 e
->call_next
->call_prev
= e
->call_prev
;
2993 mutex_exit(&clnt_pending_lock
);
2995 if (e
->call_reply
!= NULL
) {
2996 freemsg(e
->call_reply
);
2997 e
->call_reply
= NULL
;
3000 if (e
->call_status
!= RPC_SUCCESS
|| error
!= 0) {
3001 RPCLOG(1, "connmgr_setopt: can't set option: %d\n", name
);
3004 RPCLOG(8, "connmgr_setopt: successfully set option: %d\n", name
);
3009 connmgr_setopt(queue_t
*wq
, int level
, int name
, calllist_t
*e
, cred_t
*cr
)
3011 return (connmgr_setopt_int(wq
, level
, name
, 1, e
, cr
));
3017 * This is a knob to let us force code coverage in allocation failure
3020 static int connmgr_failsnd
;
3021 #define CONN_SND_ALLOC(Size, Pri) \
3022 ((connmgr_failsnd-- > 0) ? NULL : allocb(Size, Pri))
3026 #define CONN_SND_ALLOC(Size, Pri) allocb(Size, Pri)
3031 * Sends an orderly release on the specified queue.
3032 * Entered with connmgr_lock. Exited without connmgr_lock
3035 connmgr_sndrel(struct cm_xprt
*cm_entry
)
3037 struct T_ordrel_req
*torr
;
3039 queue_t
*q
= cm_entry
->x_wq
;
3040 ASSERT(MUTEX_HELD(&connmgr_lock
));
3041 mp
= CONN_SND_ALLOC(sizeof (struct T_ordrel_req
), BPRI_LO
);
3043 cm_entry
->x_needrel
= TRUE
;
3044 mutex_exit(&connmgr_lock
);
3045 RPCLOG(1, "connmgr_sndrel: cannot alloc mp for sending ordrel "
3046 "to queue %p\n", (void *)q
);
3049 mutex_exit(&connmgr_lock
);
3051 mp
->b_datap
->db_type
= M_PROTO
;
3052 torr
= (struct T_ordrel_req
*)(mp
->b_rptr
);
3053 torr
->PRIM_type
= T_ORDREL_REQ
;
3054 mp
->b_wptr
= mp
->b_rptr
+ sizeof (struct T_ordrel_req
);
3056 RPCLOG(8, "connmgr_sndrel: sending ordrel to queue %p\n", (void *)q
);
3061 * Sends an disconnect on the specified queue.
3062 * Entered with connmgr_lock. Exited without connmgr_lock
3065 connmgr_snddis(struct cm_xprt
*cm_entry
)
3067 struct T_discon_req
*tdis
;
3069 queue_t
*q
= cm_entry
->x_wq
;
3071 ASSERT(MUTEX_HELD(&connmgr_lock
));
3072 mp
= CONN_SND_ALLOC(sizeof (*tdis
), BPRI_LO
);
3074 cm_entry
->x_needdis
= TRUE
;
3075 mutex_exit(&connmgr_lock
);
3076 RPCLOG(1, "connmgr_snddis: cannot alloc mp for sending discon "
3077 "to queue %p\n", (void *)q
);
3080 mutex_exit(&connmgr_lock
);
3082 mp
->b_datap
->db_type
= M_PROTO
;
3083 tdis
= (struct T_discon_req
*)mp
->b_rptr
;
3084 tdis
->PRIM_type
= T_DISCON_REQ
;
3085 mp
->b_wptr
= mp
->b_rptr
+ sizeof (*tdis
);
3087 RPCLOG(8, "connmgr_snddis: sending discon to queue %p\n", (void *)q
);
3092 * Sets up the entry for receiving replies, and calls rpcmod's write put proc
3093 * (through put) to send the call.
3096 clnt_dispatch_send(queue_t
*q
, mblk_t
*mp
, calllist_t
*e
, uint_t xid
,
3101 e
->call_status
= RPC_TIMEDOUT
; /* optimistic, eh? */
3105 e
->call_notified
= FALSE
;
3108 e
->call_status
= RPC_CANTSEND
;
3109 e
->call_reason
= ENOBUFS
;
3110 return (RPC_CANTSEND
);
3114 * If queue_flag is set then the calllist_t is already on the hash
3115 * queue. In this case just send the message and return.
3119 return (RPC_SUCCESS
);
3124 * Set up calls for RPC requests (with XID != 0) on the hash
3125 * queue for fast lookups and place other calls (i.e.
3126 * connection management) on the linked list.
3129 RPCLOG(64, "clnt_dispatch_send: putting xid 0x%x on "
3130 "dispatch list\n", xid
);
3131 e
->call_hash
= call_hash(xid
, clnt_cots_hash_size
);
3132 e
->call_bucket
= &cots_call_ht
[e
->call_hash
];
3133 call_table_enter(e
);
3135 mutex_enter(&clnt_pending_lock
);
3137 clnt_pending
->call_prev
= e
;
3138 e
->call_next
= clnt_pending
;
3139 e
->call_prev
= NULL
;
3141 mutex_exit(&clnt_pending_lock
);
3145 return (RPC_SUCCESS
);
3149 * Called by rpcmod to notify a client with a clnt_pending call that its reply
3150 * has arrived. If we can't find a client waiting for this reply, we log
3151 * the error and return.
3154 clnt_dispatch_notify(mblk_t
*mp
, zoneid_t zoneid
)
3156 calllist_t
*e
= NULL
;
3161 if ((IS_P2ALIGNED(mp
->b_rptr
, sizeof (uint32_t))) &&
3162 (mp
->b_wptr
- mp
->b_rptr
) >= sizeof (xid
))
3163 xid
= *((uint32_t *)mp
->b_rptr
);
3166 unsigned char *p
= (unsigned char *)&xid
;
3167 unsigned char *rptr
;
3171 * Copy the xid, byte-by-byte into xid.
3175 while (rptr
< tmp
->b_wptr
) {
3177 if (++i
>= sizeof (xid
))
3184 * If we got here, we ran out of mblk space before the
3185 * xid could be copied.
3187 ASSERT(tmp
== NULL
&& i
< sizeof (xid
));
3190 "clnt_dispatch_notify: message less than size of xid\n");
3196 hash
= call_hash(xid
, clnt_cots_hash_size
);
3197 chtp
= &cots_call_ht
[hash
];
3198 /* call_table_find returns with the hash bucket locked */
3199 call_table_find(chtp
, xid
, e
);
3203 * Found thread waiting for this reply
3205 mutex_enter(&e
->call_lock
);
3208 * verify that the reply is coming in on
3209 * the same zone that it was sent from.
3211 if (e
->call_zoneid
!= zoneid
) {
3212 mutex_exit(&e
->call_lock
);
3213 mutex_exit(&chtp
->ct_lock
);
3214 RPCLOG0(1, "clnt_dispatch_notify: incorrect zoneid\n");
3220 * This can happen under the following scenario:
3221 * clnt_cots_kcallit() times out on the response,
3222 * rfscall() repeats the CLNT_CALL() with
3223 * the same xid, clnt_cots_kcallit() sends the retry,
3224 * thereby putting the clnt handle on the pending list,
3225 * the first response arrives, signalling the thread
3226 * in clnt_cots_kcallit(). Before that thread is
3227 * dispatched, the second response arrives as well,
3228 * and clnt_dispatch_notify still finds the handle on
3229 * the pending list, with call_reply set. So free the
3232 * It is also possible for a response intended for
3233 * an RPC call with a different xid to reside here.
3234 * This can happen if the thread that owned this
3235 * client handle prior to the current owner bailed
3236 * out and left its call record on the dispatch
3237 * queue. A window exists where the response can
3238 * arrive before the current owner dispatches its
3241 * In any case, this is the very last point where we
3242 * can safely check the call_reply field before
3243 * placing the new response there.
3245 freemsg(e
->call_reply
);
3247 e
->call_status
= RPC_SUCCESS
;
3248 e
->call_notified
= TRUE
;
3249 cv_signal(&e
->call_cv
);
3250 mutex_exit(&e
->call_lock
);
3251 mutex_exit(&chtp
->ct_lock
);
3255 struct rpcstat
*rpcstat
;
3257 mutex_exit(&chtp
->ct_lock
);
3258 RPCLOG(65, "clnt_dispatch_notify: no caller for reply 0x%x\n",
3261 * This is unfortunate, but we need to lookup the zone so we
3262 * can increment its "rcbadxids" counter.
3264 zone
= zone_find_by_id(zoneid
);
3267 * The zone went away...
3271 rpcstat
= zone_getspecific(rpcstat_zone_key
, zone
);
3272 if (zone_status_get(zone
) >= ZONE_IS_SHUTTING_DOWN
) {
3279 COTSRCSTAT_INCR(rpcstat
->rpc_cots_client
, rcbadxids
);
3286 * Called by rpcmod when a non-data indication arrives. The ones in which we
3287 * are interested are connection indications and options acks. We dispatch
3288 * based on the queue the indication came in on. If we are not interested in
3289 * what came in, we return false to rpcmod, who will then pass it upstream.
3292 clnt_dispatch_notifyconn(queue_t
*q
, mblk_t
*mp
)
3297 ASSERT((q
->q_flag
& QREADR
) == 0);
3299 type
= ((union T_primitives
*)mp
->b_rptr
)->type
;
3300 RPCLOG(8, "clnt_dispatch_notifyconn: prim type: [%s]\n",
3301 rpc_tpiprim2name(type
));
3302 mutex_enter(&clnt_pending_lock
);
3303 for (e
= clnt_pending
; /* NO CONDITION */; e
= e
->call_next
) {
3305 mutex_exit(&clnt_pending_lock
);
3306 RPCLOG(1, "clnt_dispatch_notifyconn: no one waiting "
3307 "for connection on queue 0x%p\n", (void *)q
);
3310 if (e
->call_wq
== q
)
3317 * The transport is now connected, send a T_INFO_REQ to get
3320 mutex_exit(&clnt_pending_lock
);
3321 ASSERT(mp
->b_datap
->db_lim
- mp
->b_datap
->db_base
>=
3322 sizeof (struct T_info_req
));
3323 mp
->b_rptr
= mp
->b_datap
->db_base
;
3324 ((union T_primitives
*)mp
->b_rptr
)->type
= T_INFO_REQ
;
3325 mp
->b_wptr
= mp
->b_rptr
+ sizeof (struct T_info_req
);
3326 mp
->b_datap
->db_type
= M_PCPROTO
;
3331 e
->call_status
= RPC_SUCCESS
;
3333 e
->call_notified
= TRUE
;
3334 cv_signal(&e
->call_cv
);
3337 e
->call_status
= RPC_CANTCONNECT
;
3339 e
->call_notified
= TRUE
;
3340 cv_signal(&e
->call_cv
);
3344 * Great, but we are really waiting for a T_CONN_CON
3349 mutex_exit(&clnt_pending_lock
);
3350 RPCLOG(1, "clnt_dispatch_notifyconn: bad type %d\n", type
);
3354 mutex_exit(&clnt_pending_lock
);
3359 * Called by rpcmod when the transport is (or should be) going away. Informs
3360 * all callers waiting for replies and marks the entry in the connection
3361 * manager's list as unconnected, and either closing (close handshake in
3362 * progress) or dead.
3365 clnt_dispatch_notifyall(queue_t
*q
, int32_t msg_type
, int32_t reason
)
3369 struct cm_xprt
*cm_entry
;
3370 int have_connmgr_lock
;
3373 ASSERT((q
->q_flag
& QREADR
) == 0);
3375 RPCLOG(1, "clnt_dispatch_notifyall on queue %p", (void *)q
);
3376 RPCLOG(1, " received a notifcation prim type [%s]",
3377 rpc_tpiprim2name(msg_type
));
3378 RPCLOG(1, " and reason %d\n", reason
);
3381 * Find the transport entry in the connection manager's list, close
3382 * the transport and delete the entry. In the case where rpcmod's
3383 * idle timer goes off, it sends us a T_ORDREL_REQ, indicating we
3384 * should gracefully close the connection.
3386 have_connmgr_lock
= 1;
3387 mutex_enter(&connmgr_lock
);
3388 for (cm_entry
= cm_hd
; cm_entry
; cm_entry
= cm_entry
->x_next
) {
3389 ASSERT(cm_entry
!= cm_entry
->x_next
);
3390 if (cm_entry
->x_wq
== q
) {
3391 ASSERT(MUTEX_HELD(&connmgr_lock
));
3392 ASSERT(have_connmgr_lock
== 1);
3396 if (cm_entry
->x_dead
) {
3397 RPCLOG(1, "idle timeout on dead "
3400 if (clnt_stop_idle
!= NULL
)
3401 (*clnt_stop_idle
)(q
);
3406 * Only mark the connection as dead if it is
3407 * connected and idle.
3408 * An unconnected connection has probably
3409 * gone idle because the server is down,
3410 * and when it comes back up there will be
3411 * retries that need to use that connection.
3413 if (cm_entry
->x_connected
||
3414 cm_entry
->x_doomed
) {
3415 if (cm_entry
->x_ordrel
) {
3416 if (cm_entry
->x_closing
==
3420 * obviously wedged due
3421 * to a bug or problem
3422 * with the transport.
3427 cm_entry
->x_dead
= TRUE
;
3430 have_connmgr_lock
= 0;
3431 if (clnt_stop_idle
!=
3433 (*clnt_stop_idle
)(q
);
3436 cm_entry
->x_closing
= TRUE
;
3437 connmgr_sndrel(cm_entry
);
3438 have_connmgr_lock
= 0;
3440 cm_entry
->x_dead
= TRUE
;
3441 mutex_exit(&connmgr_lock
);
3442 have_connmgr_lock
= 0;
3443 if (clnt_stop_idle
!= NULL
)
3444 (*clnt_stop_idle
)(q
);
3448 * We don't mark the connection
3449 * as dead, but we turn off the
3452 mutex_exit(&connmgr_lock
);
3453 have_connmgr_lock
= 0;
3454 if (clnt_stop_idle
!= NULL
)
3455 (*clnt_stop_idle
)(q
);
3456 RPCLOG(1, "clnt_dispatch_notifyall:"
3457 " ignoring timeout from rpcmod"
3458 " (q %p) because we are not "
3459 " connected\n", (void *)q
);
3464 * If this entry is marked closing, then we are
3465 * completing a close handshake, and the
3466 * connection is dead. Otherwise, the server is
3467 * trying to close. Since the server will not
3468 * be sending any more RPC replies, we abort
3469 * the connection, including flushing
3470 * any RPC requests that are in-transit.
3471 * In either case, mark the entry as dead so
3472 * that it can be closed by the connection
3473 * manager's garbage collector.
3475 cm_entry
->x_dead
= TRUE
;
3476 if (cm_entry
->x_closing
) {
3477 mutex_exit(&connmgr_lock
);
3478 have_connmgr_lock
= 0;
3479 if (clnt_stop_idle
!= NULL
)
3480 (*clnt_stop_idle
)(q
);
3483 * if we're getting a disconnect
3484 * before we've finished our
3485 * connect attempt, mark it for
3488 if (cm_entry
->x_thread
)
3489 cm_entry
->x_early_disc
= TRUE
;
3491 cm_entry
->x_connected
= FALSE
;
3492 cm_entry
->x_waitdis
= TRUE
;
3493 connmgr_snddis(cm_entry
);
3494 have_connmgr_lock
= 0;
3500 cm_entry
->x_waitdis
= FALSE
;
3501 cv_signal(&cm_entry
->x_dis_cv
);
3502 mutex_exit(&connmgr_lock
);
3506 if (cm_entry
->x_thread
)
3507 cm_entry
->x_early_disc
= TRUE
;
3509 cm_entry
->x_connected
= FALSE
;
3510 cm_entry
->x_waitdis
= TRUE
;
3512 connmgr_snddis(cm_entry
);
3513 have_connmgr_lock
= 0;
3519 * if we're getting a disconnect before
3520 * we've finished our connect attempt,
3521 * mark it for later processing
3523 if (cm_entry
->x_closing
) {
3524 cm_entry
->x_dead
= TRUE
;
3525 mutex_exit(&connmgr_lock
);
3526 have_connmgr_lock
= 0;
3527 if (clnt_stop_idle
!= NULL
)
3528 (*clnt_stop_idle
)(q
);
3530 if (cm_entry
->x_thread
) {
3531 cm_entry
->x_early_disc
= TRUE
;
3533 cm_entry
->x_dead
= TRUE
;
3534 cm_entry
->x_connected
= FALSE
;
3543 if (have_connmgr_lock
)
3544 mutex_exit(&connmgr_lock
);
3546 if (msg_type
== T_ERROR_ACK
|| msg_type
== T_OK_ACK
) {
3547 RPCLOG(1, "clnt_dispatch_notifyall: (wq %p) could not find "
3548 "connmgr entry for discon ack\n", (void *)q
);
3553 * Then kick all the clnt_pending calls out of their wait. There
3554 * should be no clnt_pending calls in the case of rpcmod's idle
3557 for (i
= 0; i
< clnt_cots_hash_size
; i
++) {
3558 ctp
= &cots_call_ht
[i
];
3559 mutex_enter(&ctp
->ct_lock
);
3560 for (e
= ctp
->ct_call_next
;
3561 e
!= (calllist_t
*)ctp
;
3563 if (e
->call_wq
== q
&& e
->call_notified
== FALSE
) {
3565 "clnt_dispatch_notifyall for queue %p ",
3567 RPCLOG(1, "aborting clnt_pending call %p\n",
3570 if (msg_type
== T_DISCON_IND
)
3571 e
->call_reason
= reason
;
3572 e
->call_notified
= TRUE
;
3573 e
->call_status
= RPC_XPRTFAILED
;
3574 cv_signal(&e
->call_cv
);
3577 mutex_exit(&ctp
->ct_lock
);
3580 mutex_enter(&clnt_pending_lock
);
3581 for (e
= clnt_pending
; e
; e
= e
->call_next
) {
3583 * Only signal those RPC handles that haven't been
3584 * signalled yet. Otherwise we can get a bogus call_reason.
3585 * This can happen if thread A is making a call over a
3586 * connection. If the server is killed, it will cause
3587 * reset, and reason will default to EIO as a result of
3588 * a T_ORDREL_IND. Thread B then attempts to recreate
3589 * the connection but gets a T_DISCON_IND. If we set the
3590 * call_reason code for all threads, then if thread A
3591 * hasn't been dispatched yet, it will get the wrong
3592 * reason. The bogus call_reason can make it harder to
3593 * discriminate between calls that fail because the
3594 * connection attempt failed versus those where the call
3595 * may have been executed on the server.
3597 if (e
->call_wq
== q
&& e
->call_notified
== FALSE
) {
3598 RPCLOG(1, "clnt_dispatch_notifyall for queue %p ",
3600 RPCLOG(1, " aborting clnt_pending call %p\n",
3603 if (msg_type
== T_DISCON_IND
)
3604 e
->call_reason
= reason
;
3605 e
->call_notified
= TRUE
;
3607 * Let the caller timeout, else he will retry
3610 e
->call_status
= RPC_XPRTFAILED
;
3613 * We used to just signal those threads
3614 * waiting for a connection, (call_xid = 0).
3615 * That meant that threads waiting for a response
3616 * waited till their timeout expired. This
3617 * could be a long time if they've specified a
3618 * maximum timeout. (2^31 - 1). So we
3619 * Signal all threads now.
3621 cv_signal(&e
->call_cv
);
3624 mutex_exit(&clnt_pending_lock
);
3630 * after resuming a system that's been suspended for longer than the
3631 * NFS server's idle timeout (svc_idle_timeout for Solaris 2), rfscall()
3632 * generates "NFS server X not responding" and "NFS server X ok" messages;
3633 * here we reset inet connections to cause a re-connect and avoid those
3634 * NFS messages. see 4045054
3637 connmgr_cpr_reset(void *arg
, int code
)
3639 struct cm_xprt
*cxp
;
3641 if (code
== CB_CODE_CPR_CHKPT
)
3644 if (mutex_tryenter(&connmgr_lock
) == 0)
3646 for (cxp
= cm_hd
; cxp
; cxp
= cxp
->x_next
) {
3647 if ((cxp
->x_family
== AF_INET
|| cxp
->x_family
== AF_INET6
) &&
3648 cxp
->x_connected
== TRUE
) {
3650 cxp
->x_early_disc
= TRUE
;
3652 cxp
->x_connected
= FALSE
;
3653 cxp
->x_needdis
= TRUE
;
3656 mutex_exit(&connmgr_lock
);
3661 clnt_cots_stats_init(zoneid_t zoneid
, struct rpc_cots_client
**statsp
)
3664 *statsp
= (struct rpc_cots_client
*)rpcstat_zone_init_common(zoneid
,
3665 "unix", "rpc_cots_client", (const kstat_named_t
*)&cots_rcstat_tmpl
,
3666 sizeof (cots_rcstat_tmpl
));
3670 clnt_cots_stats_fini(zoneid_t zoneid
, struct rpc_cots_client
**statsp
)
3672 rpcstat_zone_fini_common(zoneid
, "unix", "rpc_cots_client");
3673 kmem_free(*statsp
, sizeof (cots_rcstat_tmpl
));
3677 clnt_cots_init(void)
3679 mutex_init(&connmgr_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
3680 mutex_init(&clnt_pending_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
3682 if (clnt_cots_hash_size
< DEFAULT_MIN_HASH_SIZE
)
3683 clnt_cots_hash_size
= DEFAULT_MIN_HASH_SIZE
;
3685 cots_call_ht
= call_table_init(clnt_cots_hash_size
);
3686 zone_key_create(&zone_cots_key
, NULL
, NULL
, clnt_zone_destroy
);
3690 clnt_cots_fini(void)
3692 (void) zone_key_delete(zone_cots_key
);
3696 * Wait for TPI ack, returns success only if expected ack is received
3697 * within timeout period.
3701 waitforack(calllist_t
*e
, t_scalar_t ack_prim
, const struct timeval
*waitp
,
3704 union T_primitives
*tpr
;
3708 ASSERT(MUTEX_HELD(&clnt_pending_lock
));
3709 while (e
->call_reply
== NULL
) {
3710 if (waitp
!= NULL
) {
3711 timout
= waitp
->tv_sec
* drv_usectohz(MICROSEC
) +
3712 drv_usectohz(waitp
->tv_usec
);
3714 cv_stat
= cv_reltimedwait(&e
->call_cv
,
3715 &clnt_pending_lock
, timout
, TR_CLOCK_TICK
);
3717 cv_stat
= cv_reltimedwait_sig(&e
->call_cv
,
3718 &clnt_pending_lock
, timout
, TR_CLOCK_TICK
);
3721 cv_wait(&e
->call_cv
, &clnt_pending_lock
);
3723 cv_stat
= cv_wait_sig(&e
->call_cv
,
3724 &clnt_pending_lock
);
3731 * if we received an error from the server and we know a reply
3732 * is not going to be sent, do not wait for the full timeout,
3735 if (e
->call_status
== RPC_XPRTFAILED
)
3736 return (e
->call_reason
);
3738 tpr
= (union T_primitives
*)e
->call_reply
->b_rptr
;
3739 if (tpr
->type
== ack_prim
)
3740 return (0); /* Success */
3742 if (tpr
->type
== T_ERROR_ACK
) {
3743 if (tpr
->error_ack
.TLI_error
== TSYSERR
)
3744 return (tpr
->error_ack
.UNIX_error
);
3746 return (t_tlitosyserr(tpr
->error_ack
.TLI_error
));
3749 return (EPROTO
); /* unknown or unexpected primitive */