4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
27 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
28 * Use is subject to license terms.
32 * Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T
37 * Portions of this source code were derived from Berkeley 4.3 BSD
38 * under license from the Regents of the University of California.
43 * Implements a kernel based, client side RPC over Connection Oriented
48 * Much of this file has been re-written to let NFS work better over slow
49 * transports. A description follows.
51 * One of the annoying things about kRPC/COTS is that it will temporarily
52 * create more than one connection between a client and server. This
53 * happens because when a connection is made, the end-points entry in the
54 * linked list of connections (headed by cm_hd), is removed so that other
55 * threads don't mess with it. Went ahead and bit the bullet by keeping
56 * the endpoint on the connection list and introducing state bits,
57 * condition variables etc. to the connection entry data structure (struct
60 * Here is a summary of the changes to cm-xprt:
62 * x_ctime is the timestamp of when the endpoint was last
63 * connected or disconnected. If an end-point is ever disconnected
64 * or re-connected, then any outstanding RPC request is presumed
65 * lost, telling clnt_cots_kcallit that it needs to re-send the
66 * request, not just wait for the original request's reply to
69 * x_thread flag which tells us if a thread is doing a connection attempt.
71 * x_waitdis flag which tells us we are waiting a disconnect ACK.
73 * x_needdis flag which tells us we need to send a T_DISCONN_REQ
74 * to kill the connection.
76 * x_needrel flag which tells us we need to send a T_ORDREL_REQ to
77 * gracefully close the connection.
79 * #defined bitmasks for the all the b_* bits so that more
80 * efficient (and at times less clumsy) masks can be used to
81 * manipulated state in cases where multiple bits have to
82 * set/cleared/checked in the same critical section.
84 * x_conn_cv and x_dis-_cv are new condition variables to let
85 * threads knows when the connection attempt is done, and to let
86 * the connecting thread know when the disconnect handshake is
89 * Added the CONN_HOLD() macro so that all reference holds have the same
92 * In the private (cku_private) portion of the client handle,
94 * cku_flags replaces the cku_sent a boolean. cku_flags keeps
95 * track of whether a request as been sent, and whether the
96 * client's handles call record is on the dispatch list (so that
97 * the reply can be matched by XID to the right client handle).
98 * The idea of CKU_ONQUEUE is that we can exit clnt_cots_kcallit()
99 * and still have the response find the right client handle so
100 * that the retry of CLNT_CALL() gets the result. Testing, found
101 * situations where if the timeout was increased, performance
102 * degraded. This was due to us hitting a window where the thread
103 * was back in rfscall() (probably printing server not responding)
104 * while the response came back but no place to put it.
106 * cku_ctime is just a cache of x_ctime. If they match,
107 * clnt_cots_kcallit() won't to send a retry (unless the maximum
108 * receive count limit as been reached). If the don't match, then
109 * we assume the request has been lost, and a retry of the request
112 * cku_recv_attempts counts the number of receive count attempts
113 * after one try is sent on the wire.
115 * Added the clnt_delay() routine so that interruptible and
116 * noninterruptible delays are possible.
118 * CLNT_MIN_TIMEOUT has been bumped to 10 seconds from 3. This is used to
119 * control how long the client delays before returned after getting
120 * ECONNREFUSED. At 3 seconds, 8 client threads per mount really does bash
121 * a server that may be booting and not yet started nfsd.
123 * CLNT_MAXRECV_WITHOUT_RETRY is a new macro (value of 3) (with a tunable)
124 * Why don't we just wait forever (receive an infinite # of times)?
125 * Because the server may have rebooted. More insidious is that some
126 * servers (ours) will drop NFS/TCP requests in some cases. This is bad,
127 * but it is a reality.
129 * The case of a server doing orderly release really messes up the
130 * client's recovery, especially if the server's TCP implementation is
131 * buggy. It was found was that the kRPC/COTS client was breaking some
132 * TPI rules, such as not waiting for the acknowledgement of a
133 * T_DISCON_REQ (hence the added case statements T_ERROR_ACK, T_OK_ACK and
134 * T_DISCON_REQ in clnt_dispatch_notifyall()).
136 * One of things that we've seen is that a kRPC TCP endpoint goes into
137 * TIMEWAIT and a thus a reconnect takes a long time to satisfy because
138 * that the TIMEWAIT state takes a while to finish. If a server sends a
139 * T_ORDREL_IND, there is little point in an RPC client doing a
140 * T_ORDREL_REQ, because the RPC request isn't going to make it (the
141 * server is saying that it won't accept any more data). So kRPC was
142 * changed to send a T_DISCON_REQ when we get a T_ORDREL_IND. So now the
143 * connection skips the TIMEWAIT state and goes straight to a bound state
144 * that kRPC can quickly switch to connected.
146 * Code that issues TPI request must use waitforack() to wait for the
147 * corresponding ack (assuming there is one) in any future modifications.
148 * This works around problems that may be introduced by breaking TPI rules
149 * (by submitting new calls before earlier requests have been acked) in the
150 * case of a signal or other early return. waitforack() depends on
151 * clnt_dispatch_notifyconn() to issue the wakeup when the ack
152 * arrives, so adding new TPI calls may require corresponding changes
153 * to clnt_dispatch_notifyconn(). Presently, the timeout period is based on
154 * CLNT_MIN_TIMEOUT which is 10 seconds. If you modify this value, be sure
155 * not to set it too low or TPI ACKS will be lost.
158 #include <sys/param.h>
159 #include <sys/types.h>
160 #include <sys/user.h>
161 #include <sys/systm.h>
162 #include <sys/sysmacros.h>
163 #include <sys/proc.h>
164 #include <sys/socket.h>
165 #include <sys/file.h>
166 #include <sys/stream.h>
167 #include <sys/strsubr.h>
168 #include <sys/stropts.h>
169 #include <sys/strsun.h>
170 #include <sys/timod.h>
171 #include <sys/tiuser.h>
172 #include <sys/tihdr.h>
173 #include <sys/t_kuser.h>
174 #include <sys/fcntl.h>
175 #include <sys/errno.h>
176 #include <sys/kmem.h>
177 #include <sys/debug.h>
178 #include <sys/systm.h>
179 #include <sys/kstat.h>
180 #include <sys/t_lock.h>
182 #include <sys/cmn_err.h>
183 #include <sys/time.h>
184 #include <sys/isa_defs.h>
185 #include <sys/callb.h>
186 #include <sys/sunddi.h>
187 #include <sys/atomic.h>
190 #include <netinet/in.h>
191 #include <netinet/tcp.h>
193 #include <rpc/types.h>
195 #include <rpc/auth.h>
196 #include <rpc/clnt.h>
197 #include <rpc/rpc_msg.h>
199 #define COTS_DEFAULT_ALLOCSIZE 2048
201 #define WIRE_HDR_SIZE 20 /* serialized call header, sans proc number */
202 #define MSG_OFFSET 128 /* offset of call into the mblk */
204 const char *kinet_ntop6(uchar_t
*, char *, size_t);
206 static int clnt_cots_ksettimers(CLIENT
*, struct rpc_timers
*,
207 struct rpc_timers
*, int, void(*)(int, int, caddr_t
), caddr_t
, uint32_t);
208 static enum clnt_stat
clnt_cots_kcallit(CLIENT
*, rpcproc_t
, xdrproc_t
,
209 caddr_t
, xdrproc_t
, caddr_t
, struct timeval
);
210 static void clnt_cots_kabort(CLIENT
*);
211 static void clnt_cots_kerror(CLIENT
*, struct rpc_err
*);
212 static bool_t
clnt_cots_kfreeres(CLIENT
*, xdrproc_t
, caddr_t
);
213 static void clnt_cots_kdestroy(CLIENT
*);
214 static bool_t
clnt_cots_kcontrol(CLIENT
*, int, char *);
217 /* List of transports managed by the connection manager. */
219 TIUSER
*x_tiptr
; /* transport handle */
220 queue_t
*x_wq
; /* send queue */
221 clock_t x_time
; /* last time we handed this xprt out */
222 clock_t x_ctime
; /* time we went to CONNECTED */
223 int x_tidu_size
; /* TIDU size of this transport */
227 #ifdef _BIT_FIELDS_HTOL
228 b_closing
: 1, /* we've sent a ord rel on this conn */
229 b_dead
: 1, /* transport is closed or disconn */
230 b_doomed
: 1, /* too many conns, let this go idle */
231 b_connected
: 1, /* this connection is connected */
233 b_ordrel
: 1, /* do an orderly release? */
234 b_thread
: 1, /* thread doing connect */
235 b_waitdis
: 1, /* waiting for disconnect ACK */
236 b_needdis
: 1, /* need T_DISCON_REQ */
238 b_needrel
: 1, /* need T_ORDREL_REQ */
239 b_early_disc
: 1, /* got a T_ORDREL_IND or T_DISCON_IND */
240 /* disconnect during connect */
246 #ifdef _BIT_FIELDS_LTOH
249 b_early_disc
: 1, /* got a T_ORDREL_IND or T_DISCON_IND */
250 /* disconnect during connect */
251 b_needrel
: 1, /* need T_ORDREL_REQ */
253 b_needdis
: 1, /* need T_DISCON_REQ */
254 b_waitdis
: 1, /* waiting for disconnect ACK */
255 b_thread
: 1, /* thread doing connect */
256 b_ordrel
: 1, /* do an orderly release? */
258 b_connected
: 1, /* this connection is connected */
259 b_doomed
: 1, /* too many conns, let this go idle */
260 b_dead
: 1, /* transport is closed or disconn */
261 b_closing
: 1; /* we've sent a ord rel on this conn */
263 } bit
; unsigned int word
;
265 #define x_closing x_state.bit.b_closing
266 #define x_dead x_state.bit.b_dead
267 #define x_doomed x_state.bit.b_doomed
268 #define x_connected x_state.bit.b_connected
270 #define x_ordrel x_state.bit.b_ordrel
271 #define x_thread x_state.bit.b_thread
272 #define x_waitdis x_state.bit.b_waitdis
273 #define x_needdis x_state.bit.b_needdis
275 #define x_needrel x_state.bit.b_needrel
276 #define x_early_disc x_state.bit.b_early_disc
278 #define x_state_flags x_state.word
280 #define X_CLOSING 0x80000000
281 #define X_DEAD 0x40000000
282 #define X_DOOMED 0x20000000
283 #define X_CONNECTED 0x10000000
285 #define X_ORDREL 0x08000000
286 #define X_THREAD 0x04000000
287 #define X_WAITDIS 0x02000000
288 #define X_NEEDDIS 0x01000000
290 #define X_NEEDREL 0x00800000
291 #define X_EARLYDISC 0x00400000
293 #define X_BADSTATES (X_CLOSING | X_DEAD | X_DOOMED)
296 int x_ref
; /* number of users of this xprt */
297 int x_family
; /* address family of transport */
298 dev_t x_rdev
; /* device number of transport */
299 struct cm_xprt
*x_next
;
301 struct netbuf x_server
; /* destination address */
302 struct netbuf x_src
; /* src address (for retries) */
303 kmutex_t x_lock
; /* lock on this entry */
304 kcondvar_t x_cv
; /* to signal when can be closed */
305 kcondvar_t x_conn_cv
; /* to signal when connection attempt */
309 kcondvar_t x_dis_cv
; /* to signal when disconnect attempt */
311 zoneid_t x_zoneid
; /* zone this xprt belongs to */
314 typedef struct cm_kstat_xprt
{
316 kstat_named_t x_server
;
317 kstat_named_t x_family
;
318 kstat_named_t x_rdev
;
319 kstat_named_t x_time
;
320 kstat_named_t x_state
;
322 kstat_named_t x_port
;
325 static cm_kstat_xprt_t cm_kstat_template
= {
326 { "write_queue", KSTAT_DATA_UINT32
},
327 { "server", KSTAT_DATA_STRING
},
328 { "addr_family", KSTAT_DATA_UINT32
},
329 { "device", KSTAT_DATA_UINT32
},
330 { "time_stamp", KSTAT_DATA_UINT32
},
331 { "status", KSTAT_DATA_UINT32
},
332 { "ref_count", KSTAT_DATA_INT32
},
333 { "port", KSTAT_DATA_UINT32
},
337 * The inverse of this is connmgr_release().
339 #define CONN_HOLD(Cm_entry) {\
340 mutex_enter(&(Cm_entry)->x_lock); \
341 (Cm_entry)->x_ref++; \
342 mutex_exit(&(Cm_entry)->x_lock); \
347 * Private data per rpc handle. This structure is allocated by
348 * clnt_cots_kcreate, and freed by clnt_cots_kdestroy.
350 typedef struct cku_private_s
{
351 CLIENT cku_client
; /* client handle */
352 calllist_t cku_call
; /* for dispatching calls */
353 struct rpc_err cku_err
; /* error status */
355 struct netbuf cku_srcaddr
; /* source address for retries */
356 int cku_addrfmly
; /* for binding port */
357 struct netbuf cku_addr
; /* remote address */
358 dev_t cku_device
; /* device to use */
360 #define CKU_ONQUEUE 0x1
363 bool_t cku_progress
; /* for CLSET_PROGRESS */
364 uint32_t cku_xid
; /* current XID */
365 clock_t cku_ctime
; /* time stamp of when */
366 /* connection was created */
367 uint_t cku_recv_attempts
;
368 XDR cku_outxdr
; /* xdr routine for output */
369 XDR cku_inxdr
; /* xdr routine for input */
370 char cku_rpchdr
[WIRE_HDR_SIZE
+ 4];
371 /* pre-serialized rpc header */
373 uint_t cku_outbuflen
; /* default output mblk length */
374 struct cred
*cku_cred
; /* credentials */
375 bool_t cku_nodelayonerr
;
376 /* for CLSET_NODELAYONERR */
377 int cku_useresvport
; /* Use reserved port */
378 struct rpc_cots_client
*cku_stats
; /* stats for zone */
381 static struct cm_xprt
*connmgr_wrapconnect(struct cm_xprt
*,
382 const struct timeval
*, struct netbuf
*, int, struct netbuf
*,
383 struct rpc_err
*, bool_t
, bool_t
, cred_t
*);
385 static bool_t
connmgr_connect(struct cm_xprt
*, queue_t
*, struct netbuf
*,
386 int, calllist_t
*, int *, bool_t reconnect
,
387 const struct timeval
*, bool_t
, cred_t
*);
389 static void *connmgr_opt_getoff(mblk_t
*mp
, t_uscalar_t offset
,
390 t_uscalar_t length
, uint_t align_size
);
391 static bool_t
connmgr_setbufsz(calllist_t
*e
, queue_t
*wq
, cred_t
*cr
);
392 static bool_t
connmgr_getopt_int(queue_t
*wq
, int level
, int name
, int *val
,
393 calllist_t
*e
, cred_t
*cr
);
394 static bool_t
connmgr_setopt_int(queue_t
*wq
, int level
, int name
, int val
,
395 calllist_t
*e
, cred_t
*cr
);
396 static bool_t
connmgr_setopt(queue_t
*, int, int, calllist_t
*, cred_t
*cr
);
397 static void connmgr_sndrel(struct cm_xprt
*);
398 static void connmgr_snddis(struct cm_xprt
*);
399 static void connmgr_close(struct cm_xprt
*);
400 static void connmgr_release(struct cm_xprt
*);
401 static struct cm_xprt
*connmgr_wrapget(struct netbuf
*, const struct timeval
*,
404 static struct cm_xprt
*connmgr_get(struct netbuf
*, const struct timeval
*,
405 struct netbuf
*, int, struct netbuf
*, struct rpc_err
*, dev_t
,
406 bool_t
, int, cred_t
*);
408 static void connmgr_cancelconn(struct cm_xprt
*);
409 static enum clnt_stat
connmgr_cwait(struct cm_xprt
*, const struct timeval
*,
411 static void connmgr_dis_and_wait(struct cm_xprt
*);
413 static int clnt_dispatch_send(queue_t
*, mblk_t
*, calllist_t
*, uint_t
,
416 static int clnt_delay(clock_t, bool_t
);
418 static int waitforack(calllist_t
*, t_scalar_t
, const struct timeval
*, bool_t
);
421 * Operations vector for TCP/IP based RPC
423 static struct clnt_ops tcp_ops
= {
424 clnt_cots_kcallit
, /* do rpc call */
425 clnt_cots_kabort
, /* abort call */
426 clnt_cots_kerror
, /* return error status */
427 clnt_cots_kfreeres
, /* free results */
428 clnt_cots_kdestroy
, /* destroy rpc handle */
429 clnt_cots_kcontrol
, /* the ioctl() of rpc */
430 clnt_cots_ksettimers
, /* set retry timers */
433 static int rpc_kstat_instance
= 0; /* keeps the current instance */
434 /* number for the next kstat_create */
436 static struct cm_xprt
*cm_hd
= NULL
;
437 static kmutex_t connmgr_lock
; /* for connection mngr's list of transports */
439 extern kmutex_t clnt_max_msg_lock
;
441 static calllist_t
*clnt_pending
= NULL
;
442 extern kmutex_t clnt_pending_lock
;
444 static int clnt_cots_hash_size
= DEFAULT_HASH_SIZE
;
446 static call_table_t
*cots_call_ht
;
448 static const struct rpc_cots_client
{
449 kstat_named_t rccalls
;
450 kstat_named_t rcbadcalls
;
451 kstat_named_t rcbadxids
;
452 kstat_named_t rctimeouts
;
453 kstat_named_t rcnewcreds
;
454 kstat_named_t rcbadverfs
;
455 kstat_named_t rctimers
;
456 kstat_named_t rccantconn
;
457 kstat_named_t rcnomem
;
458 kstat_named_t rcintrs
;
459 } cots_rcstat_tmpl
= {
460 { "calls", KSTAT_DATA_UINT64
},
461 { "badcalls", KSTAT_DATA_UINT64
},
462 { "badxids", KSTAT_DATA_UINT64
},
463 { "timeouts", KSTAT_DATA_UINT64
},
464 { "newcreds", KSTAT_DATA_UINT64
},
465 { "badverfs", KSTAT_DATA_UINT64
},
466 { "timers", KSTAT_DATA_UINT64
},
467 { "cantconn", KSTAT_DATA_UINT64
},
468 { "nomem", KSTAT_DATA_UINT64
},
469 { "interrupts", KSTAT_DATA_UINT64
}
472 #define COTSRCSTAT_INCR(p, x) \
473 atomic_inc_64(&(p)->x.value.ui64)
475 #define CLNT_MAX_CONNS 1 /* concurrent connections between clnt/srvr */
476 int clnt_max_conns
= CLNT_MAX_CONNS
;
478 #define CLNT_MIN_TIMEOUT 10 /* seconds to wait after we get a */
479 /* connection reset */
480 #define CLNT_MIN_CONNTIMEOUT 5 /* seconds to wait for a connection */
483 int clnt_cots_min_tout
= CLNT_MIN_TIMEOUT
;
484 int clnt_cots_min_conntout
= CLNT_MIN_CONNTIMEOUT
;
487 * Limit the number of times we will attempt to receive a reply without
488 * re-sending a response.
490 #define CLNT_MAXRECV_WITHOUT_RETRY 3
491 uint_t clnt_cots_maxrecv
= CLNT_MAXRECV_WITHOUT_RETRY
;
493 uint_t
*clnt_max_msg_sizep
;
494 void (*clnt_stop_idle
)(queue_t
*wq
);
496 #define ptoh(p) (&((p)->cku_client))
497 #define htop(h) ((cku_private_t *)((h)->cl_private))
502 #define REFRESHES 2 /* authentication refreshes */
505 * The following is used to determine the global default behavior for
506 * COTS when binding to a local port.
508 * If the value is set to 1 the default will be to select a reserved
509 * (aka privileged) port, if the value is zero the default will be to
510 * use non-reserved ports. Users of kRPC may override this by using
511 * CLNT_CONTROL() and CLSET_BINDRESVPORT.
513 int clnt_cots_do_bindresvport
= 1;
515 static zone_key_t zone_cots_key
;
518 * Defaults TCP send and receive buffer size for RPC connections.
519 * These values can be tuned by /etc/system.
521 int rpc_send_bufsz
= 1024*1024;
522 int rpc_recv_bufsz
= 1024*1024;
524 * To use system-wide default for TCP send and receive buffer size,
525 * use /etc/system to set rpc_default_tcp_bufsz to 1:
527 * set rpcmod:rpc_default_tcp_bufsz=1
529 int rpc_default_tcp_bufsz
= 0;
532 * We need to do this after all kernel threads in the zone have exited.
536 clnt_zone_destroy(zoneid_t zoneid
, void *unused
)
538 struct cm_xprt
**cmp
;
539 struct cm_xprt
*cm_entry
;
540 struct cm_xprt
*freelist
= NULL
;
542 mutex_enter(&connmgr_lock
);
544 while ((cm_entry
= *cmp
) != NULL
) {
545 if (cm_entry
->x_zoneid
== zoneid
) {
546 *cmp
= cm_entry
->x_next
;
547 cm_entry
->x_next
= freelist
;
550 cmp
= &cm_entry
->x_next
;
553 mutex_exit(&connmgr_lock
);
554 while ((cm_entry
= freelist
) != NULL
) {
555 freelist
= cm_entry
->x_next
;
556 connmgr_close(cm_entry
);
561 clnt_cots_kcreate(dev_t dev
, struct netbuf
*addr
, int family
, rpcprog_t prog
,
562 rpcvers_t vers
, uint_t max_msgsize
, cred_t
*cred
, CLIENT
**ncl
)
566 struct rpc_msg call_msg
;
567 struct rpcstat
*rpcstat
;
569 RPCLOG(8, "clnt_cots_kcreate: prog %u\n", prog
);
571 rpcstat
= zone_getspecific(rpcstat_zone_key
, rpc_zone());
572 ASSERT(rpcstat
!= NULL
);
574 /* Allocate and intialize the client handle. */
575 p
= kmem_zalloc(sizeof (*p
), KM_SLEEP
);
579 h
->cl_private
= (caddr_t
)p
;
580 h
->cl_auth
= authkern_create();
581 h
->cl_ops
= &tcp_ops
;
583 cv_init(&p
->cku_call
.call_cv
, NULL
, CV_DEFAULT
, NULL
);
584 mutex_init(&p
->cku_call
.call_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
587 * If the current sanity check size in rpcmod is smaller
588 * than the size needed, then increase the sanity check.
590 if (max_msgsize
!= 0 && clnt_max_msg_sizep
!= NULL
&&
591 max_msgsize
> *clnt_max_msg_sizep
) {
592 mutex_enter(&clnt_max_msg_lock
);
593 if (max_msgsize
> *clnt_max_msg_sizep
)
594 *clnt_max_msg_sizep
= max_msgsize
;
595 mutex_exit(&clnt_max_msg_lock
);
598 p
->cku_outbuflen
= COTS_DEFAULT_ALLOCSIZE
;
600 /* Preserialize the call message header */
603 call_msg
.rm_direction
= CALL
;
604 call_msg
.rm_call
.cb_rpcvers
= RPC_MSG_VERSION
;
605 call_msg
.rm_call
.cb_prog
= prog
;
606 call_msg
.rm_call
.cb_vers
= vers
;
608 xdrmem_create(&p
->cku_outxdr
, p
->cku_rpchdr
, WIRE_HDR_SIZE
, XDR_ENCODE
);
610 if (!xdr_callhdr(&p
->cku_outxdr
, &call_msg
)) {
611 XDR_DESTROY(&p
->cku_outxdr
);
612 RPCLOG0(1, "clnt_cots_kcreate - Fatal header serialization "
614 auth_destroy(h
->cl_auth
);
615 kmem_free(p
, sizeof (cku_private_t
));
616 RPCLOG0(1, "clnt_cots_kcreate: create failed error EINVAL\n");
617 return (EINVAL
); /* XXX */
619 XDR_DESTROY(&p
->cku_outxdr
);
622 * The zalloc initialized the fields below.
625 * p->cku_srcaddr.len = 0;
626 * p->cku_srcaddr.maxlen = 0;
631 p
->cku_addrfmly
= family
;
632 p
->cku_addr
.buf
= kmem_zalloc(addr
->maxlen
, KM_SLEEP
);
633 p
->cku_addr
.maxlen
= addr
->maxlen
;
634 p
->cku_addr
.len
= addr
->len
;
635 bcopy(addr
->buf
, p
->cku_addr
.buf
, addr
->len
);
636 p
->cku_stats
= rpcstat
->rpc_cots_client
;
637 p
->cku_useresvport
= -1; /* value is has not been set */
645 clnt_cots_kabort(CLIENT
*h
)
650 * Return error info on this handle.
653 clnt_cots_kerror(CLIENT
*h
, struct rpc_err
*err
)
655 /* LINTED pointer alignment */
656 cku_private_t
*p
= htop(h
);
663 clnt_cots_kfreeres(CLIENT
*h
, xdrproc_t xdr_res
, caddr_t res_ptr
)
665 xdr_free(xdr_res
, res_ptr
);
671 clnt_cots_kcontrol(CLIENT
*h
, int cmd
, char *arg
)
673 cku_private_t
*p
= htop(h
);
677 p
->cku_progress
= TRUE
;
684 p
->cku_xid
= *((uint32_t *)arg
);
691 *((uint32_t *)arg
) = p
->cku_xid
;
694 case CLSET_NODELAYONERR
:
698 if (*((bool_t
*)arg
) == TRUE
) {
699 p
->cku_nodelayonerr
= TRUE
;
702 if (*((bool_t
*)arg
) == FALSE
) {
703 p
->cku_nodelayonerr
= FALSE
;
708 case CLGET_NODELAYONERR
:
712 *((bool_t
*)arg
) = p
->cku_nodelayonerr
;
715 case CLSET_BINDRESVPORT
:
719 if (*(int *)arg
!= 1 && *(int *)arg
!= 0)
722 p
->cku_useresvport
= *(int *)arg
;
726 case CLGET_BINDRESVPORT
:
730 *(int *)arg
= p
->cku_useresvport
;
740 * Destroy rpc handle. Frees the space used for output buffer,
741 * private data, and handle structure.
744 clnt_cots_kdestroy(CLIENT
*h
)
746 /* LINTED pointer alignment */
747 cku_private_t
*p
= htop(h
);
748 calllist_t
*call
= &p
->cku_call
;
750 RPCLOG(8, "clnt_cots_kdestroy h: %p\n", (void *)h
);
751 RPCLOG(8, "clnt_cots_kdestroy h: xid=0x%x\n", p
->cku_xid
);
753 if (p
->cku_flags
& CKU_ONQUEUE
) {
754 RPCLOG(64, "clnt_cots_kdestroy h: removing call for xid 0x%x "
755 "from dispatch list\n", p
->cku_xid
);
756 call_table_remove(call
);
759 if (call
->call_reply
)
760 freemsg(call
->call_reply
);
761 cv_destroy(&call
->call_cv
);
762 mutex_destroy(&call
->call_lock
);
764 kmem_free(p
->cku_srcaddr
.buf
, p
->cku_srcaddr
.maxlen
);
765 kmem_free(p
->cku_addr
.buf
, p
->cku_addr
.maxlen
);
766 kmem_free(p
, sizeof (*p
));
769 static int clnt_cots_pulls
;
770 #define RM_HDR_SIZE 4 /* record mark header size */
773 * Call remote procedure.
775 static enum clnt_stat
776 clnt_cots_kcallit(CLIENT
*h
, rpcproc_t procnum
, xdrproc_t xdr_args
,
777 caddr_t argsp
, xdrproc_t xdr_results
, caddr_t resultsp
, struct timeval wait
)
779 /* LINTED pointer alignment */
780 cku_private_t
*p
= htop(h
);
781 calllist_t
*call
= &p
->cku_call
;
783 struct rpc_msg reply_msg
;
788 struct netbuf
*retryaddr
;
789 struct cm_xprt
*cm_entry
= NULL
;
791 int len
, waitsecs
, max_waitsecs
;
793 int refreshes
= REFRESHES
;
796 enum clnt_stat status
;
797 struct timeval cwait
;
798 bool_t delay_first
= FALSE
;
801 RPCLOG(2, "clnt_cots_kcallit, procnum %u\n", procnum
);
802 COTSRCSTAT_INCR(p
->cku_stats
, rccalls
);
804 RPCLOG(2, "clnt_cots_kcallit: wait.tv_sec: %ld\n", wait
.tv_sec
);
805 RPCLOG(2, "clnt_cots_kcallit: wait.tv_usec: %ld\n", wait
.tv_usec
);
808 * Look out for zero length timeouts. We don't want to
809 * wait zero seconds for a connection to be established.
811 if (wait
.tv_sec
< clnt_cots_min_conntout
) {
812 cwait
.tv_sec
= clnt_cots_min_conntout
;
814 RPCLOG(8, "clnt_cots_kcallit: wait.tv_sec (%ld) too low,",
816 RPCLOG(8, " setting to: %d\n", clnt_cots_min_conntout
);
823 connmgr_release(cm_entry
);
830 * If the call is not a retry, allocate a new xid and cache it
831 * for future retries.
833 * Treat call as a retry for purposes of binding the source
834 * port only if we actually attempted to send anything on
837 if (p
->cku_xid
== 0) {
838 p
->cku_xid
= alloc_xid();
839 call
->call_zoneid
= rpc_zoneid();
842 * We need to ASSERT here that our xid != 0 because this
843 * determines whether or not our call record gets placed on
844 * the hash table or the linked list. By design, we mandate
845 * that RPC calls over cots must have xid's != 0, so we can
846 * ensure proper management of the hash table.
848 ASSERT(p
->cku_xid
!= 0);
851 p
->cku_flags
&= ~CKU_SENT
;
853 if (p
->cku_flags
& CKU_ONQUEUE
) {
854 RPCLOG(8, "clnt_cots_kcallit: new call, dequeuing old"
855 " one (%p)\n", (void *)call
);
856 call_table_remove(call
);
857 p
->cku_flags
&= ~CKU_ONQUEUE
;
858 RPCLOG(64, "clnt_cots_kcallit: removing call from "
859 "dispatch list because xid was zero (now 0x%x)\n",
863 if (call
->call_reply
!= NULL
) {
864 freemsg(call
->call_reply
);
865 call
->call_reply
= NULL
;
867 } else if (p
->cku_srcaddr
.buf
== NULL
|| p
->cku_srcaddr
.len
== 0) {
870 } else if (p
->cku_flags
& CKU_SENT
) {
871 retryaddr
= &p
->cku_srcaddr
;
875 * Bug ID 1246045: Nothing was sent, so set retryaddr to
876 * NULL and let connmgr_get() bind to any source port it
882 RPCLOG(64, "clnt_cots_kcallit: xid = 0x%x", p
->cku_xid
);
883 RPCLOG(64, " flags = 0x%x\n", p
->cku_flags
);
885 p
->cku_err
.re_status
= RPC_TIMEDOUT
;
886 p
->cku_err
.re_errno
= p
->cku_err
.re_terrno
= 0;
888 cm_entry
= connmgr_wrapget(retryaddr
, &cwait
, p
);
890 if (cm_entry
== NULL
) {
891 RPCLOG(1, "clnt_cots_kcallit: can't connect status %s\n",
892 clnt_sperrno(p
->cku_err
.re_status
));
895 * The reasons why we fail to create a connection are
896 * varied. In most cases we don't want the caller to
897 * immediately retry. This could have one or more
898 * bad effects. This includes flooding the net with
899 * connect requests to ports with no listener; a hard
900 * kernel loop due to all the "reserved" TCP ports being
906 * Even if we end up returning EINTR, we still count a
907 * a "can't connect", because the connection manager
908 * might have been committed to waiting for or timing out on
911 COTSRCSTAT_INCR(p
->cku_stats
, rccantconn
);
912 switch (p
->cku_err
.re_status
) {
914 p
->cku_err
.re_errno
= EINTR
;
917 * No need to delay because a UNIX signal(2)
918 * interrupted us. The caller likely won't
919 * retry the CLNT_CALL() and even if it does,
920 * we assume the caller knows what it is doing.
926 p
->cku_err
.re_errno
= ETIMEDOUT
;
929 * No need to delay because timed out already
930 * on the connection request and assume that the
931 * transport time out is longer than our minimum
932 * timeout, or least not too much smaller.
937 case RPC_SYSTEMERROR
:
940 * We want to delay here because a transient
941 * system error has a better chance of going away
942 * if we delay a bit. If it's not transient, then
943 * we don't want end up in a hard kernel loop
946 ASSERT(p
->cku_err
.re_errno
!= 0);
950 case RPC_CANTCONNECT
:
952 * RPC_CANTCONNECT is set on T_ERROR_ACK which
953 * implies some error down in the TCP layer or
954 * below. If cku_nodelayonerror is set then we
955 * assume the caller knows not to try too hard.
957 RPCLOG0(8, "clnt_cots_kcallit: connection failed,");
958 RPCLOG0(8, " re_status=RPC_CANTCONNECT,");
959 RPCLOG(8, " re_errno=%d,", p
->cku_err
.re_errno
);
960 RPCLOG(8, " cku_nodelayonerr=%d", p
->cku_nodelayonerr
);
961 if (p
->cku_nodelayonerr
== TRUE
)
964 p
->cku_err
.re_errno
= EIO
;
970 * We want to delay here because we likely
971 * got a refused connection.
973 if (p
->cku_err
.re_errno
== 0)
974 p
->cku_err
.re_errno
= EIO
;
976 RPCLOG(1, "clnt_cots_kcallit: transport failed: %d\n",
977 p
->cku_err
.re_errno
);
983 * We delay here because it is better to err
984 * on the side of caution. If we got here then
985 * status could have been RPC_SUCCESS, but we
986 * know that we did not get a connection, so
987 * force the rpc status to RPC_CANTCONNECT.
989 p
->cku_err
.re_status
= RPC_CANTCONNECT
;
990 p
->cku_err
.re_errno
= EIO
;
993 if (delay_first
== TRUE
)
994 ticks
= clnt_cots_min_tout
* drv_usectohz(1000000);
999 * If we've never sent any request on this connection (send count
1000 * is zero, or the connection has been reset), cache the
1001 * the connection's create time and send a request (possibly a retry)
1003 if ((p
->cku_flags
& CKU_SENT
) == 0 ||
1004 p
->cku_ctime
!= cm_entry
->x_ctime
) {
1005 p
->cku_ctime
= cm_entry
->x_ctime
;
1007 } else if ((p
->cku_flags
& CKU_SENT
) && (p
->cku_flags
& CKU_ONQUEUE
) &&
1008 (call
->call_reply
!= NULL
||
1009 p
->cku_recv_attempts
< clnt_cots_maxrecv
)) {
1012 * If we've sent a request and our call is on the dispatch
1013 * queue and we haven't made too many receive attempts, then
1014 * don't re-send, just receive.
1016 p
->cku_recv_attempts
++;
1021 * Now we create the RPC request in a STREAMS message. We have to do
1022 * this after the call to connmgr_get so that we have the correct
1023 * TIDU size for the transport.
1025 tidu_size
= cm_entry
->x_tidu_size
;
1026 len
= MSG_OFFSET
+ MAX(tidu_size
, RM_HDR_SIZE
+ WIRE_HDR_SIZE
);
1028 while ((mp
= allocb(len
, BPRI_MED
)) == NULL
) {
1029 if (strwaitbuf(len
, BPRI_MED
)) {
1030 p
->cku_err
.re_status
= RPC_SYSTEMERROR
;
1031 p
->cku_err
.re_errno
= ENOSR
;
1032 COTSRCSTAT_INCR(p
->cku_stats
, rcnomem
);
1036 xdrs
= &p
->cku_outxdr
;
1037 xdrmblk_init(xdrs
, mp
, XDR_ENCODE
, tidu_size
);
1038 mpsize
= MBLKSIZE(mp
);
1039 ASSERT(mpsize
>= len
);
1040 ASSERT(mp
->b_rptr
== mp
->b_datap
->db_base
);
1043 * If the size of mblk is not appreciably larger than what we
1044 * asked, then resize the mblk to exactly len bytes. The reason for
1045 * this: suppose len is 1600 bytes, the tidu is 1460 bytes
1046 * (from TCP over ethernet), and the arguments to the RPC require
1047 * 2800 bytes. Ideally we want the protocol to render two
1048 * ~1400 byte segments over the wire. However if allocb() gives us a 2k
1049 * mblk, and we allocate a second mblk for the remainder, the protocol
1050 * module may generate 3 segments over the wire:
1051 * 1460 bytes for the first, 448 (2048 - 1600) for the second, and
1052 * 892 for the third. If we "waste" 448 bytes in the first mblk,
1053 * the XDR encoding will generate two ~1400 byte mblks, and the
1054 * protocol module is more likely to produce properly sized segments.
1056 if ((mpsize
>> 1) <= len
)
1057 mp
->b_rptr
+= (mpsize
- len
);
1060 * Adjust b_rptr to reserve space for the non-data protocol headers
1061 * any downstream modules might like to add, and for the
1062 * record marking header.
1064 mp
->b_rptr
+= (MSG_OFFSET
+ RM_HDR_SIZE
);
1066 if (h
->cl_auth
->ah_cred
.oa_flavor
!= RPCSEC_GSS
) {
1067 /* Copy in the preserialized RPC header information. */
1068 bcopy(p
->cku_rpchdr
, mp
->b_rptr
, WIRE_HDR_SIZE
);
1070 /* Use XDR_SETPOS() to set the b_wptr to past the RPC header. */
1071 XDR_SETPOS(xdrs
, (uint_t
)(mp
->b_rptr
- mp
->b_datap
->db_base
+
1074 ASSERT((mp
->b_wptr
- mp
->b_rptr
) == WIRE_HDR_SIZE
);
1076 /* Serialize the procedure number and the arguments. */
1077 if ((!XDR_PUTINT32(xdrs
, (int32_t *)&procnum
)) ||
1078 (!AUTH_MARSHALL(h
->cl_auth
, xdrs
, p
->cku_cred
)) ||
1079 (!(*xdr_args
)(xdrs
, argsp
))) {
1081 p
->cku_err
.re_status
= RPC_CANTENCODEARGS
;
1082 p
->cku_err
.re_errno
= EIO
;
1086 (*(uint32_t *)(mp
->b_rptr
)) = p
->cku_xid
;
1088 uint32_t *uproc
= (uint32_t *)&p
->cku_rpchdr
[WIRE_HDR_SIZE
];
1089 IXDR_PUT_U_INT32(uproc
, procnum
);
1091 (*(uint32_t *)(&p
->cku_rpchdr
[0])) = p
->cku_xid
;
1093 /* Use XDR_SETPOS() to set the b_wptr. */
1094 XDR_SETPOS(xdrs
, (uint_t
)(mp
->b_rptr
- mp
->b_datap
->db_base
));
1096 /* Serialize the procedure number and the arguments. */
1097 if (!AUTH_WRAP(h
->cl_auth
, p
->cku_rpchdr
, WIRE_HDR_SIZE
+4,
1098 xdrs
, xdr_args
, argsp
)) {
1100 p
->cku_err
.re_status
= RPC_CANTENCODEARGS
;
1101 p
->cku_err
.re_errno
= EIO
;
1108 RPCLOG(2, "clnt_cots_kcallit: connected, sending call, tidu_size %d\n",
1111 wq
= cm_entry
->x_wq
;
1115 status
= clnt_dispatch_send(wq
, mp
, call
, p
->cku_xid
,
1116 (p
->cku_flags
& CKU_ONQUEUE
));
1118 if ((status
== RPC_CANTSEND
) && (call
->call_reason
== ENOBUFS
)) {
1120 * QFULL condition, allow some time for queue to drain
1121 * and try again. Give up after waiting for all timeout
1122 * specified for the call, or zone is going away.
1124 max_waitsecs
= wait
.tv_sec
? wait
.tv_sec
: clnt_cots_min_tout
;
1125 if ((waitsecs
++ < max_waitsecs
) &&
1126 !(zone_status_get(curproc
->p_zone
) >=
1127 ZONE_IS_SHUTTING_DOWN
)) {
1129 /* wait 1 sec for queue to drain */
1130 if (clnt_delay(drv_usectohz(1000000),
1131 h
->cl_nosignal
) == EINTR
) {
1132 p
->cku_err
.re_errno
= EINTR
;
1133 p
->cku_err
.re_status
= RPC_INTR
;
1139 goto dispatch_again
;
1141 p
->cku_err
.re_status
= status
;
1142 p
->cku_err
.re_errno
= call
->call_reason
;
1143 DTRACE_PROBE(krpc__e__clntcots__kcallit__cantsend
);
1149 /* adjust timeout to account for time wait to send */
1150 wait
.tv_sec
-= waitsecs
;
1151 if (wait
.tv_sec
< 0) {
1152 /* pick up reply on next retry */
1155 DTRACE_PROBE2(clnt_cots__sendwait
, CLIENT
*, h
,
1159 RPCLOG(64, "clnt_cots_kcallit: sent call for xid 0x%x\n",
1160 (uint_t
)p
->cku_xid
);
1161 p
->cku_flags
= (CKU_ONQUEUE
|CKU_SENT
);
1162 p
->cku_recv_attempts
= 1;
1165 time_sent
= ddi_get_lbolt();
1169 * Wait for a reply or a timeout. If there is no error or timeout,
1170 * (both indicated by call_status), call->call_reply will contain
1171 * the RPC reply message.
1174 mutex_enter(&call
->call_lock
);
1176 if (call
->call_status
== RPC_TIMEDOUT
) {
1178 * Indicate that the lwp is not to be stopped while waiting
1179 * for this network traffic. This is to avoid deadlock while
1180 * debugging a process via /proc and also to avoid recursive
1181 * mutex_enter()s due to NFS page faults while stopping
1182 * (NFS holds locks when it calls here).
1184 clock_t cv_wait_ret
;
1188 klwp_t
*lwp
= ttolwp(curthread
);
1193 oldlbolt
= ddi_get_lbolt();
1194 timout
= wait
.tv_sec
* drv_usectohz(1000000) +
1195 drv_usectohz(wait
.tv_usec
) + oldlbolt
;
1197 * Iterate until the call_status is changed to something
1198 * other that RPC_TIMEDOUT, or if cv_timedwait_sig() returns
1199 * something <=0 zero. The latter means that we timed
1203 while ((cv_wait_ret
= cv_timedwait(&call
->call_cv
,
1204 &call
->call_lock
, timout
)) > 0 &&
1205 call
->call_status
== RPC_TIMEDOUT
)
1208 while ((cv_wait_ret
= cv_timedwait_sig(
1210 &call
->call_lock
, timout
)) > 0 &&
1211 call
->call_status
== RPC_TIMEDOUT
)
1214 switch (cv_wait_ret
) {
1217 * If we got out of the above loop with
1218 * cv_timedwait_sig() returning 0, then we were
1219 * interrupted regardless what call_status is.
1224 /* cv_timedwait_sig() timed out */
1229 * We were cv_signaled(). If we didn't
1230 * get a successful call_status and returned
1231 * before time expired, delay up to clnt_cots_min_tout
1232 * seconds so that the caller doesn't immediately
1233 * try to call us again and thus force the
1234 * same condition that got us here (such
1235 * as a RPC_XPRTFAILED due to the server not
1236 * listening on the end-point.
1238 if (call
->call_status
!= RPC_SUCCESS
) {
1242 curlbolt
= ddi_get_lbolt();
1243 ticks
= clnt_cots_min_tout
*
1244 drv_usectohz(1000000);
1245 diff
= curlbolt
- oldlbolt
;
1259 * Get the reply message, if any. This will be freed at the end
1260 * whether or not an error occurred.
1262 mp
= call
->call_reply
;
1263 call
->call_reply
= NULL
;
1266 * call_err is the error info when the call is on dispatch queue.
1267 * cku_err is the error info returned to the caller.
1268 * Sync cku_err with call_err for local message processing.
1271 status
= call
->call_status
;
1272 p
->cku_err
= call
->call_err
;
1273 mutex_exit(&call
->call_lock
);
1275 if (status
!= RPC_SUCCESS
) {
1278 now
= ddi_get_lbolt();
1280 COTSRCSTAT_INCR(p
->cku_stats
, rcintrs
);
1281 p
->cku_err
.re_status
= RPC_INTR
;
1282 p
->cku_err
.re_errno
= EINTR
;
1283 RPCLOG(1, "clnt_cots_kcallit: xid 0x%x",
1285 RPCLOG(1, "signal interrupted at %ld", now
);
1286 RPCLOG(1, ", was sent at %ld\n", time_sent
);
1288 COTSRCSTAT_INCR(p
->cku_stats
, rctimeouts
);
1289 p
->cku_err
.re_errno
= ETIMEDOUT
;
1290 RPCLOG(1, "clnt_cots_kcallit: timed out at %ld",
1292 RPCLOG(1, ", was sent at %ld\n", time_sent
);
1296 case RPC_XPRTFAILED
:
1297 if (p
->cku_err
.re_errno
== 0)
1298 p
->cku_err
.re_errno
= EIO
;
1300 RPCLOG(1, "clnt_cots_kcallit: transport failed: %d\n",
1301 p
->cku_err
.re_errno
);
1304 case RPC_SYSTEMERROR
:
1305 ASSERT(p
->cku_err
.re_errno
);
1306 RPCLOG(1, "clnt_cots_kcallit: system error: %d\n",
1307 p
->cku_err
.re_errno
);
1311 p
->cku_err
.re_status
= RPC_SYSTEMERROR
;
1312 p
->cku_err
.re_errno
= EIO
;
1313 RPCLOG(1, "clnt_cots_kcallit: error: %s\n",
1314 clnt_sperrno(status
));
1317 if (p
->cku_err
.re_status
!= RPC_TIMEDOUT
) {
1319 if (p
->cku_flags
& CKU_ONQUEUE
) {
1320 call_table_remove(call
);
1321 p
->cku_flags
&= ~CKU_ONQUEUE
;
1324 RPCLOG(64, "clnt_cots_kcallit: non TIMEOUT so xid 0x%x "
1325 "taken off dispatch list\n", p
->cku_xid
);
1326 if (call
->call_reply
) {
1327 freemsg(call
->call_reply
);
1328 call
->call_reply
= NULL
;
1330 } else if (wait
.tv_sec
!= 0) {
1332 * We've sent the request over TCP and so we have
1333 * every reason to believe it will get
1334 * delivered. In which case returning a timeout is not
1337 if (p
->cku_progress
== TRUE
&&
1338 p
->cku_recv_attempts
< clnt_cots_maxrecv
) {
1339 p
->cku_err
.re_status
= RPC_INPROGRESS
;
1345 xdrs
= &p
->cku_inxdr
;
1346 xdrmblk_init(xdrs
, mp
, XDR_DECODE
, 0);
1348 reply_msg
.rm_direction
= REPLY
;
1349 reply_msg
.rm_reply
.rp_stat
= MSG_ACCEPTED
;
1350 reply_msg
.acpted_rply
.ar_stat
= SUCCESS
;
1352 reply_msg
.acpted_rply
.ar_verf
= _null_auth
;
1354 * xdr_results will be done in AUTH_UNWRAP.
1356 reply_msg
.acpted_rply
.ar_results
.where
= NULL
;
1357 reply_msg
.acpted_rply
.ar_results
.proc
= xdr_void
;
1359 if (xdr_replymsg(xdrs
, &reply_msg
)) {
1360 enum clnt_stat re_status
;
1362 _seterr_reply(&reply_msg
, &p
->cku_err
);
1364 re_status
= p
->cku_err
.re_status
;
1365 if (re_status
== RPC_SUCCESS
) {
1367 * Reply is good, check auth.
1369 if (!AUTH_VALIDATE(h
->cl_auth
,
1370 &reply_msg
.acpted_rply
.ar_verf
)) {
1371 COTSRCSTAT_INCR(p
->cku_stats
, rcbadverfs
);
1372 RPCLOG0(1, "clnt_cots_kcallit: validation "
1375 (void) xdr_rpc_free_verifier(xdrs
, &reply_msg
);
1377 mutex_enter(&call
->call_lock
);
1378 if (call
->call_reply
== NULL
)
1379 call
->call_status
= RPC_TIMEDOUT
;
1380 mutex_exit(&call
->call_lock
);
1382 } else if (!AUTH_UNWRAP(h
->cl_auth
, xdrs
,
1383 xdr_results
, resultsp
)) {
1384 RPCLOG0(1, "clnt_cots_kcallit: validation "
1385 "failure (unwrap)\n");
1386 p
->cku_err
.re_status
= RPC_CANTDECODERES
;
1387 p
->cku_err
.re_errno
= EIO
;
1390 /* set errno in case we can't recover */
1391 if (re_status
!= RPC_VERSMISMATCH
&&
1392 re_status
!= RPC_AUTHERROR
&&
1393 re_status
!= RPC_PROGVERSMISMATCH
)
1394 p
->cku_err
.re_errno
= EIO
;
1396 if (re_status
== RPC_AUTHERROR
) {
1398 * Maybe our credential need to be refreshed
1402 * There is the potential that the
1403 * cm_entry has/will be marked dead,
1404 * so drop the connection altogether,
1405 * force REFRESH to establish new
1408 connmgr_cancelconn(cm_entry
);
1412 (void) xdr_rpc_free_verifier(xdrs
,
1416 if (p
->cku_flags
& CKU_ONQUEUE
) {
1417 call_table_remove(call
);
1418 p
->cku_flags
&= ~CKU_ONQUEUE
;
1421 "clnt_cots_kcallit: AUTH_ERROR, xid"
1422 " 0x%x removed off dispatch list\n",
1424 if (call
->call_reply
) {
1425 freemsg(call
->call_reply
);
1426 call
->call_reply
= NULL
;
1429 if ((refreshes
> 0) &&
1430 AUTH_REFRESH(h
->cl_auth
, &reply_msg
,
1436 COTSRCSTAT_INCR(p
->cku_stats
,
1438 COTSRCSTAT_INCR(p
->cku_stats
,
1444 * We have used the client handle to
1445 * do an AUTH_REFRESH and the RPC status may
1446 * be set to RPC_SUCCESS; Let's make sure to
1447 * set it to RPC_AUTHERROR.
1449 p
->cku_err
.re_status
= RPC_AUTHERROR
;
1452 * Map recoverable and unrecoverable
1453 * authentication errors to appropriate errno
1455 switch (p
->cku_err
.re_why
) {
1458 * This could be a failure where the
1459 * server requires use of a reserved
1460 * port, check and optionally set the
1461 * client handle useresvport trying
1462 * one more time. Next go round we
1463 * fall out with the tooweak error.
1465 if (p
->cku_useresvport
!= 1) {
1466 p
->cku_useresvport
= 1;
1475 case AUTH_INVALIDRESP
:
1477 case RPCSEC_GSS_NOCRED
:
1478 case RPCSEC_GSS_FAILED
:
1479 p
->cku_err
.re_errno
= EACCES
;
1481 case AUTH_REJECTEDCRED
:
1482 case AUTH_REJECTEDVERF
:
1483 default: p
->cku_err
.re_errno
= EIO
;
1486 RPCLOG(1, "clnt_cots_kcallit : authentication"
1487 " failed with RPC_AUTHERROR of type %d\n",
1488 (int)p
->cku_err
.re_why
);
1493 /* reply didn't decode properly. */
1494 p
->cku_err
.re_status
= RPC_CANTDECODERES
;
1495 p
->cku_err
.re_errno
= EIO
;
1496 RPCLOG0(1, "clnt_cots_kcallit: decode failure\n");
1499 (void) xdr_rpc_free_verifier(xdrs
, &reply_msg
);
1502 if (p
->cku_flags
& CKU_ONQUEUE
) {
1503 call_table_remove(call
);
1504 p
->cku_flags
&= ~CKU_ONQUEUE
;
1507 RPCLOG(64, "clnt_cots_kcallit: xid 0x%x taken off dispatch list",
1509 RPCLOG(64, " status is %s\n", clnt_sperrno(p
->cku_err
.re_status
));
1512 connmgr_release(cm_entry
);
1516 if ((p
->cku_flags
& CKU_ONQUEUE
) == 0 && call
->call_reply
) {
1517 freemsg(call
->call_reply
);
1518 call
->call_reply
= NULL
;
1520 if (p
->cku_err
.re_status
!= RPC_SUCCESS
) {
1521 RPCLOG0(1, "clnt_cots_kcallit: tail-end failure\n");
1522 COTSRCSTAT_INCR(p
->cku_stats
, rcbadcalls
);
1526 * No point in delaying if the zone is going away.
1528 if (delay_first
== TRUE
&&
1529 !(zone_status_get(curproc
->p_zone
) >= ZONE_IS_SHUTTING_DOWN
)) {
1530 if (clnt_delay(ticks
, h
->cl_nosignal
) == EINTR
) {
1531 p
->cku_err
.re_errno
= EINTR
;
1532 p
->cku_err
.re_status
= RPC_INTR
;
1535 return (p
->cku_err
.re_status
);
1539 * Kinit routine for cots. This sets up the correct operations in
1540 * the client handle, as the handle may have previously been a clts
1541 * handle, and clears the xid field so there is no way a new call
1542 * could be mistaken for a retry. It also sets in the handle the
1543 * information that is passed at create/kinit time but needed at
1544 * call time, as cots creates the transport at call time - device,
1545 * address of the server, protocol family.
1548 clnt_cots_kinit(CLIENT
*h
, dev_t dev
, int family
, struct netbuf
*addr
,
1549 int max_msgsize
, cred_t
*cred
)
1551 /* LINTED pointer alignment */
1552 cku_private_t
*p
= htop(h
);
1553 calllist_t
*call
= &p
->cku_call
;
1555 h
->cl_ops
= &tcp_ops
;
1556 if (p
->cku_flags
& CKU_ONQUEUE
) {
1557 call_table_remove(call
);
1558 p
->cku_flags
&= ~CKU_ONQUEUE
;
1559 RPCLOG(64, "clnt_cots_kinit: removing call for xid 0x%x from"
1560 " dispatch list\n", p
->cku_xid
);
1563 if (call
->call_reply
!= NULL
) {
1564 freemsg(call
->call_reply
);
1565 call
->call_reply
= NULL
;
1568 call
->call_bucket
= NULL
;
1569 call
->call_hash
= 0;
1572 * We don't clear cku_flags here, because clnt_cots_kcallit()
1573 * takes care of handling the cku_flags reset.
1576 p
->cku_device
= dev
;
1577 p
->cku_addrfmly
= family
;
1580 if (p
->cku_addr
.maxlen
< addr
->len
) {
1581 if (p
->cku_addr
.maxlen
!= 0 && p
->cku_addr
.buf
!= NULL
)
1582 kmem_free(p
->cku_addr
.buf
, p
->cku_addr
.maxlen
);
1583 p
->cku_addr
.buf
= kmem_zalloc(addr
->maxlen
, KM_SLEEP
);
1584 p
->cku_addr
.maxlen
= addr
->maxlen
;
1587 p
->cku_addr
.len
= addr
->len
;
1588 bcopy(addr
->buf
, p
->cku_addr
.buf
, addr
->len
);
1591 * If the current sanity check size in rpcmod is smaller
1592 * than the size needed, then increase the sanity check.
1594 if (max_msgsize
!= 0 && clnt_max_msg_sizep
!= NULL
&&
1595 max_msgsize
> *clnt_max_msg_sizep
) {
1596 mutex_enter(&clnt_max_msg_lock
);
1597 if (max_msgsize
> *clnt_max_msg_sizep
)
1598 *clnt_max_msg_sizep
= max_msgsize
;
1599 mutex_exit(&clnt_max_msg_lock
);
1604 * ksettimers is a no-op for cots, with the exception of setting the xid.
1608 clnt_cots_ksettimers(CLIENT
*h
, struct rpc_timers
*t
, struct rpc_timers
*all
,
1609 int minimum
, void (*feedback
)(int, int, caddr_t
), caddr_t arg
,
1612 /* LINTED pointer alignment */
1613 cku_private_t
*p
= htop(h
);
1617 COTSRCSTAT_INCR(p
->cku_stats
, rctimers
);
1621 extern void rpc_poptimod(struct vnode
*);
1622 extern int kstr_push(struct vnode
*, char *);
1625 conn_kstat_update(kstat_t
*ksp
, int rw
)
1627 struct cm_xprt
*cm_entry
;
1628 struct cm_kstat_xprt
*cm_ksp_data
;
1632 if (rw
== KSTAT_WRITE
)
1634 if (ksp
== NULL
|| ksp
->ks_private
== NULL
)
1636 cm_entry
= (struct cm_xprt
*)ksp
->ks_private
;
1637 cm_ksp_data
= (struct cm_kstat_xprt
*)ksp
->ks_data
;
1639 cm_ksp_data
->x_wq
.value
.ui32
= (uint32_t)(uintptr_t)cm_entry
->x_wq
;
1640 cm_ksp_data
->x_family
.value
.ui32
= cm_entry
->x_family
;
1641 cm_ksp_data
->x_rdev
.value
.ui32
= (uint32_t)cm_entry
->x_rdev
;
1642 cm_ksp_data
->x_time
.value
.ui32
= cm_entry
->x_time
;
1643 cm_ksp_data
->x_ref
.value
.ui32
= cm_entry
->x_ref
;
1644 cm_ksp_data
->x_state
.value
.ui32
= cm_entry
->x_state_flags
;
1646 if (cm_entry
->x_server
.buf
) {
1647 fbuf
= cm_ksp_data
->x_server
.value
.str
.addr
.ptr
;
1648 if (cm_entry
->x_family
== AF_INET
&&
1649 cm_entry
->x_server
.len
==
1650 sizeof (struct sockaddr_in
)) {
1651 struct sockaddr_in
*sa
;
1652 sa
= (struct sockaddr_in
*)
1653 cm_entry
->x_server
.buf
;
1654 b
= (uchar_t
*)&sa
->sin_addr
;
1655 (void) sprintf(fbuf
,
1656 "%03d.%03d.%03d.%03d", b
[0] & 0xFF, b
[1] & 0xFF,
1657 b
[2] & 0xFF, b
[3] & 0xFF);
1658 cm_ksp_data
->x_port
.value
.ui32
=
1659 (uint32_t)sa
->sin_port
;
1660 } else if (cm_entry
->x_family
== AF_INET6
&&
1661 cm_entry
->x_server
.len
>=
1662 sizeof (struct sockaddr_in6
)) {
1663 /* extract server IP address & port */
1664 struct sockaddr_in6
*sin6
;
1665 sin6
= (struct sockaddr_in6
*)cm_entry
->x_server
.buf
;
1666 (void) kinet_ntop6((uchar_t
*)&sin6
->sin6_addr
, fbuf
,
1668 cm_ksp_data
->x_port
.value
.ui32
= sin6
->sin6_port
;
1670 struct sockaddr_in
*sa
;
1672 sa
= (struct sockaddr_in
*)cm_entry
->x_server
.buf
;
1673 b
= (uchar_t
*)&sa
->sin_addr
;
1674 (void) sprintf(fbuf
,
1675 "%03d.%03d.%03d.%03d", b
[0] & 0xFF, b
[1] & 0xFF,
1676 b
[2] & 0xFF, b
[3] & 0xFF);
1678 KSTAT_NAMED_STR_BUFLEN(&cm_ksp_data
->x_server
) =
1687 * We want a version of delay which is interruptible by a UNIX signal
1688 * Return EINTR if an interrupt occured.
1691 clnt_delay(clock_t ticks
, bool_t nosignal
)
1693 if (nosignal
== TRUE
) {
1697 return (delay_sig(ticks
));
1701 * Wait for a connection until a timeout, or until we are
1702 * signalled that there has been a connection state change.
1704 static enum clnt_stat
1705 connmgr_cwait(struct cm_xprt
*cm_entry
, const struct timeval
*waitp
,
1709 clock_t timout
, cv_stat
;
1710 enum clnt_stat clstat
;
1711 unsigned int old_state
;
1713 ASSERT(MUTEX_HELD(&connmgr_lock
));
1715 * We wait for the transport connection to be made, or an
1716 * indication that it could not be made.
1718 clstat
= RPC_TIMEDOUT
;
1719 interrupted
= FALSE
;
1721 old_state
= cm_entry
->x_state_flags
;
1723 * Now loop until cv_timedwait{_sig} returns because of
1724 * a signal(0) or timeout(-1) or cv_signal(>0). But it may be
1725 * cv_signalled for various other reasons too. So loop
1726 * until there is a state change on the connection.
1729 timout
= waitp
->tv_sec
* drv_usectohz(1000000) +
1730 drv_usectohz(waitp
->tv_usec
) + ddi_get_lbolt();
1733 while ((cv_stat
= cv_timedwait(&cm_entry
->x_conn_cv
,
1734 &connmgr_lock
, timout
)) > 0 &&
1735 cm_entry
->x_state_flags
== old_state
)
1738 while ((cv_stat
= cv_timedwait_sig(&cm_entry
->x_conn_cv
,
1739 &connmgr_lock
, timout
)) > 0 &&
1740 cm_entry
->x_state_flags
== old_state
)
1743 if (cv_stat
== 0) /* got intr signal? */
1747 if ((cm_entry
->x_state_flags
& (X_BADSTATES
|X_CONNECTED
)) ==
1749 clstat
= RPC_SUCCESS
;
1751 if (interrupted
== TRUE
)
1753 RPCLOG(1, "connmgr_cwait: can't connect, error: %s\n",
1754 clnt_sperrno(clstat
));
1761 * Primary interface for how RPC grabs a connection.
1763 static struct cm_xprt
*
1765 struct netbuf
*retryaddr
,
1766 const struct timeval
*waitp
,
1769 struct cm_xprt
*cm_entry
;
1771 cm_entry
= connmgr_get(retryaddr
, waitp
, &p
->cku_addr
, p
->cku_addrfmly
,
1772 &p
->cku_srcaddr
, &p
->cku_err
, p
->cku_device
,
1773 p
->cku_client
.cl_nosignal
, p
->cku_useresvport
, p
->cku_cred
);
1775 if (cm_entry
== NULL
) {
1777 * Re-map the call status to RPC_INTR if the err code is
1778 * EINTR. This can happen if calls status is RPC_TLIERROR.
1779 * However, don't re-map if signalling has been turned off.
1780 * XXX Really need to create a separate thread whenever
1781 * there isn't an existing connection.
1783 if (p
->cku_err
.re_errno
== EINTR
) {
1784 if (p
->cku_client
.cl_nosignal
== TRUE
)
1785 p
->cku_err
.re_errno
= EIO
;
1787 p
->cku_err
.re_status
= RPC_INTR
;
1795 * Obtains a transport to the server specified in addr. If a suitable transport
1796 * does not already exist in the list of cached transports, a new connection
1797 * is created, connected, and added to the list. The connection is for sending
1798 * only - the reply message may come back on another transport connection.
1800 * To implement round-robin load balancing with multiple client connections,
1801 * the last entry on the list is always selected. Once the entry is selected
1802 * it's re-inserted to the head of the list.
1804 static struct cm_xprt
*
1806 struct netbuf
*retryaddr
,
1807 const struct timeval
*waitp
, /* changed to a ptr to converse stack */
1808 struct netbuf
*destaddr
,
1810 struct netbuf
*srcaddr
,
1811 struct rpc_err
*rpcerr
,
1817 struct cm_xprt
*cm_entry
;
1818 struct cm_xprt
*lru_entry
;
1819 struct cm_xprt
**cmp
, **prev
;
1826 zoneid_t zoneid
= rpc_zoneid();
1829 * If the call is not a retry, look for a transport entry that
1830 * goes to the server of interest.
1832 mutex_enter(&connmgr_lock
);
1834 if (retryaddr
== NULL
) {
1837 cm_entry
= lru_entry
= NULL
;
1839 prev
= cmp
= &cm_hd
;
1840 while ((cm_entry
= *cmp
) != NULL
) {
1841 ASSERT(cm_entry
!= cm_entry
->x_next
);
1843 * Garbage collect conections that are marked
1844 * for needs disconnect.
1846 if (cm_entry
->x_needdis
) {
1847 CONN_HOLD(cm_entry
);
1848 connmgr_dis_and_wait(cm_entry
);
1849 connmgr_release(cm_entry
);
1851 * connmgr_lock could have been
1852 * dropped for the disconnect
1853 * processing so start over.
1859 * Garbage collect the dead connections that have
1860 * no threads working on them.
1862 if ((cm_entry
->x_state_flags
& (X_DEAD
|X_THREAD
)) ==
1864 mutex_enter(&cm_entry
->x_lock
);
1865 if (cm_entry
->x_ref
!= 0) {
1870 cmp
= &cm_entry
->x_next
;
1871 mutex_exit(&cm_entry
->x_lock
);
1874 mutex_exit(&cm_entry
->x_lock
);
1875 *cmp
= cm_entry
->x_next
;
1876 mutex_exit(&connmgr_lock
);
1877 connmgr_close(cm_entry
);
1878 mutex_enter(&connmgr_lock
);
1883 if ((cm_entry
->x_state_flags
& X_BADSTATES
) == 0 &&
1884 cm_entry
->x_zoneid
== zoneid
&&
1885 cm_entry
->x_rdev
== device
&&
1886 destaddr
->len
== cm_entry
->x_server
.len
&&
1887 bcmp(destaddr
->buf
, cm_entry
->x_server
.buf
,
1888 destaddr
->len
) == 0) {
1890 * If the matching entry isn't connected,
1891 * attempt to reconnect it.
1893 if (cm_entry
->x_connected
== FALSE
) {
1895 * We don't go through trying
1896 * to find the least recently
1897 * used connected because
1898 * connmgr_reconnect() briefly
1899 * dropped the connmgr_lock,
1900 * allowing a window for our
1901 * accounting to be messed up.
1902 * In any case, a re-connected
1903 * connection is as good as
1906 return (connmgr_wrapconnect(cm_entry
,
1907 waitp
, destaddr
, addrfmly
, srcaddr
,
1908 rpcerr
, TRUE
, nosignal
, cr
));
1912 /* keep track of the last entry */
1913 lru_entry
= cm_entry
;
1916 cmp
= &cm_entry
->x_next
;
1919 if (i
> clnt_max_conns
) {
1920 RPCLOG(8, "connmgr_get: too many conns, dooming entry"
1921 " %p\n", (void *)lru_entry
->x_tiptr
);
1922 lru_entry
->x_doomed
= TRUE
;
1927 * If we are at the maximum number of connections to
1928 * the server, hand back the least recently used one.
1930 if (i
== clnt_max_conns
) {
1932 * Copy into the handle the source address of
1933 * the connection, which we will use in case of
1936 if (srcaddr
->len
!= lru_entry
->x_src
.len
) {
1937 if (srcaddr
->len
> 0)
1938 kmem_free(srcaddr
->buf
,
1940 srcaddr
->buf
= kmem_zalloc(
1941 lru_entry
->x_src
.len
, KM_SLEEP
);
1942 srcaddr
->maxlen
= srcaddr
->len
=
1943 lru_entry
->x_src
.len
;
1945 bcopy(lru_entry
->x_src
.buf
, srcaddr
->buf
, srcaddr
->len
);
1946 RPCLOG(2, "connmgr_get: call going out on %p\n",
1948 lru_entry
->x_time
= ddi_get_lbolt();
1949 CONN_HOLD(lru_entry
);
1951 if ((i
> 1) && (prev
!= &cm_hd
)) {
1953 * remove and re-insert entry at head of list.
1955 *prev
= lru_entry
->x_next
;
1956 lru_entry
->x_next
= cm_hd
;
1960 mutex_exit(&connmgr_lock
);
1966 * This is the retry case (retryaddr != NULL). Retries must
1967 * be sent on the same source port as the original call.
1971 * Walk the list looking for a connection with a source address
1972 * that matches the retry address.
1976 while ((cm_entry
= *cmp
) != NULL
) {
1977 ASSERT(cm_entry
!= cm_entry
->x_next
);
1980 * determine if this connection matches the passed
1981 * in retry address. If it does not match, advance
1982 * to the next element on the list.
1984 if (zoneid
!= cm_entry
->x_zoneid
||
1985 device
!= cm_entry
->x_rdev
||
1986 retryaddr
->len
!= cm_entry
->x_src
.len
||
1987 bcmp(retryaddr
->buf
, cm_entry
->x_src
.buf
,
1988 retryaddr
->len
) != 0) {
1989 cmp
= &cm_entry
->x_next
;
1993 * Garbage collect conections that are marked
1994 * for needs disconnect.
1996 if (cm_entry
->x_needdis
) {
1997 CONN_HOLD(cm_entry
);
1998 connmgr_dis_and_wait(cm_entry
);
1999 connmgr_release(cm_entry
);
2001 * connmgr_lock could have been
2002 * dropped for the disconnect
2003 * processing so start over.
2005 goto start_retry_loop
;
2008 * Garbage collect the dead connections that have
2009 * no threads working on them.
2011 if ((cm_entry
->x_state_flags
& (X_DEAD
|X_THREAD
)) ==
2013 mutex_enter(&cm_entry
->x_lock
);
2014 if (cm_entry
->x_ref
!= 0) {
2019 cmp
= &cm_entry
->x_next
;
2020 mutex_exit(&cm_entry
->x_lock
);
2023 mutex_exit(&cm_entry
->x_lock
);
2024 *cmp
= cm_entry
->x_next
;
2025 mutex_exit(&connmgr_lock
);
2026 connmgr_close(cm_entry
);
2027 mutex_enter(&connmgr_lock
);
2028 goto start_retry_loop
;
2032 * Sanity check: if the connection with our source
2033 * port is going to some other server, something went
2034 * wrong, as we never delete connections (i.e. release
2035 * ports) unless they have been idle. In this case,
2036 * it is probably better to send the call out using
2037 * a new source address than to fail it altogether,
2038 * since that port may never be released.
2040 if (destaddr
->len
!= cm_entry
->x_server
.len
||
2041 bcmp(destaddr
->buf
, cm_entry
->x_server
.buf
,
2042 destaddr
->len
) != 0) {
2043 RPCLOG(1, "connmgr_get: tiptr %p"
2044 " is going to a different server"
2045 " with the port that belongs"
2046 " to us!\n", (void *)cm_entry
->x_tiptr
);
2052 * If the connection of interest is not connected and we
2053 * can't reconnect it, then the server is probably
2054 * still down. Return NULL to the caller and let it
2055 * retry later if it wants to. We have a delay so the
2056 * machine doesn't go into a tight retry loop. If the
2057 * entry was already connected, or the reconnected was
2058 * successful, return this entry.
2060 if (cm_entry
->x_connected
== FALSE
) {
2061 return (connmgr_wrapconnect(cm_entry
,
2062 waitp
, destaddr
, addrfmly
, NULL
,
2063 rpcerr
, TRUE
, nosignal
, cr
));
2065 CONN_HOLD(cm_entry
);
2067 cm_entry
->x_time
= ddi_get_lbolt();
2068 mutex_exit(&connmgr_lock
);
2069 RPCLOG(2, "connmgr_get: found old "
2070 "transport %p for retry\n",
2077 * We cannot find an entry in the list for this retry.
2078 * Either the entry has been removed temporarily to be
2079 * reconnected by another thread, or the original call
2080 * got a port but never got connected,
2081 * and hence the transport never got put in the
2082 * list. Fall through to the "create new connection" code -
2083 * the former case will fail there trying to rebind the port,
2084 * and the later case (and any other pathological cases) will
2085 * rebind and reconnect and not hang the client machine.
2087 RPCLOG0(8, "connmgr_get: no entry in list for retry\n");
2090 * Set up a transport entry in the connection manager's list.
2092 cm_entry
= (struct cm_xprt
*)
2093 kmem_zalloc(sizeof (struct cm_xprt
), KM_SLEEP
);
2095 cm_entry
->x_server
.buf
= kmem_zalloc(destaddr
->len
, KM_SLEEP
);
2096 bcopy(destaddr
->buf
, cm_entry
->x_server
.buf
, destaddr
->len
);
2097 cm_entry
->x_server
.len
= cm_entry
->x_server
.maxlen
= destaddr
->len
;
2099 cm_entry
->x_state_flags
= X_THREAD
;
2100 cm_entry
->x_ref
= 1;
2101 cm_entry
->x_family
= addrfmly
;
2102 cm_entry
->x_rdev
= device
;
2103 cm_entry
->x_zoneid
= zoneid
;
2104 mutex_init(&cm_entry
->x_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
2105 cv_init(&cm_entry
->x_cv
, NULL
, CV_DEFAULT
, NULL
);
2106 cv_init(&cm_entry
->x_conn_cv
, NULL
, CV_DEFAULT
, NULL
);
2107 cv_init(&cm_entry
->x_dis_cv
, NULL
, CV_DEFAULT
, NULL
);
2110 * Note that we add this partially initialized entry to the
2111 * connection list. This is so that we don't have connections to
2114 * Note that x_src is not initialized at this point. This is because
2115 * retryaddr might be NULL in which case x_src is whatever
2116 * t_kbind/bindresvport gives us. If another thread wants a
2117 * connection to the same server, seemingly we have an issue, but we
2118 * don't. If the other thread comes in with retryaddr == NULL, then it
2119 * will never look at x_src, and it will end up waiting in
2120 * connmgr_cwait() for the first thread to finish the connection
2121 * attempt. If the other thread comes in with retryaddr != NULL, then
2122 * that means there was a request sent on a connection, in which case
2123 * the the connection should already exist. Thus the first thread
2124 * never gets here ... it finds the connection it its server in the
2127 * But even if theory is wrong, in the retryaddr != NULL case, the 2nd
2128 * thread will skip us because x_src.len == 0.
2130 cm_entry
->x_next
= cm_hd
;
2132 mutex_exit(&connmgr_lock
);
2135 * Either we didn't find an entry to the server of interest, or we
2136 * don't have the maximum number of connections to that server -
2137 * create a new connection.
2139 RPCLOG0(8, "connmgr_get: creating new connection\n");
2140 rpcerr
->re_status
= RPC_TLIERROR
;
2142 i
= t_kopen(NULL
, device
, FREAD
|FWRITE
|FNDELAY
, &tiptr
, zone_kcred());
2144 RPCLOG(1, "connmgr_get: can't open cots device, error %d\n", i
);
2145 rpcerr
->re_errno
= i
;
2146 connmgr_cancelconn(cm_entry
);
2149 rpc_poptimod(tiptr
->fp
->f_vnode
);
2151 if (i
= strioctl(tiptr
->fp
->f_vnode
, I_PUSH
, (intptr_t)"rpcmod", 0,
2152 K_TO_K
, kcred
, &retval
)) {
2153 RPCLOG(1, "connmgr_get: can't push cots module, %d\n", i
);
2154 (void) t_kclose(tiptr
, 1);
2155 rpcerr
->re_errno
= i
;
2156 connmgr_cancelconn(cm_entry
);
2160 if (i
= strioctl(tiptr
->fp
->f_vnode
, RPC_CLIENT
, 0, 0, K_TO_K
,
2162 RPCLOG(1, "connmgr_get: can't set client status with cots "
2164 (void) t_kclose(tiptr
, 1);
2165 rpcerr
->re_errno
= i
;
2166 connmgr_cancelconn(cm_entry
);
2170 mutex_enter(&connmgr_lock
);
2172 wq
= tiptr
->fp
->f_vnode
->v_stream
->sd_wrq
->q_next
;
2173 cm_entry
->x_wq
= wq
;
2175 mutex_exit(&connmgr_lock
);
2177 if (i
= strioctl(tiptr
->fp
->f_vnode
, I_PUSH
, (intptr_t)"timod", 0,
2178 K_TO_K
, kcred
, &retval
)) {
2179 RPCLOG(1, "connmgr_get: can't push timod, %d\n", i
);
2180 (void) t_kclose(tiptr
, 1);
2181 rpcerr
->re_errno
= i
;
2182 connmgr_cancelconn(cm_entry
);
2187 * If the caller has not specified reserved port usage then
2188 * take the system default.
2190 if (useresvport
== -1)
2191 useresvport
= clnt_cots_do_bindresvport
;
2193 if ((useresvport
|| retryaddr
!= NULL
) &&
2194 (addrfmly
== AF_INET
|| addrfmly
== AF_INET6
)) {
2195 bool_t alloc_src
= FALSE
;
2197 if (srcaddr
->len
!= destaddr
->len
) {
2198 kmem_free(srcaddr
->buf
, srcaddr
->maxlen
);
2199 srcaddr
->buf
= kmem_zalloc(destaddr
->len
, KM_SLEEP
);
2200 srcaddr
->maxlen
= destaddr
->len
;
2201 srcaddr
->len
= destaddr
->len
;
2205 if ((i
= bindresvport(tiptr
, retryaddr
, srcaddr
, TRUE
)) != 0) {
2206 (void) t_kclose(tiptr
, 1);
2207 RPCLOG(1, "connmgr_get: couldn't bind, retryaddr: "
2208 "%p\n", (void *)retryaddr
);
2211 * 1225408: If we allocated a source address, then it
2212 * is either garbage or all zeroes. In that case
2213 * we need to clear srcaddr.
2215 if (alloc_src
== TRUE
) {
2216 kmem_free(srcaddr
->buf
, srcaddr
->maxlen
);
2217 srcaddr
->maxlen
= srcaddr
->len
= 0;
2218 srcaddr
->buf
= NULL
;
2220 rpcerr
->re_errno
= i
;
2221 connmgr_cancelconn(cm_entry
);
2225 if ((i
= t_kbind(tiptr
, NULL
, NULL
)) != 0) {
2226 RPCLOG(1, "clnt_cots_kcreate: t_kbind: %d\n", i
);
2227 (void) t_kclose(tiptr
, 1);
2228 rpcerr
->re_errno
= i
;
2229 connmgr_cancelconn(cm_entry
);
2236 * Keep the kernel stack lean. Don't move this call
2237 * declaration to the top of this function because a
2238 * call is declared in connmgr_wrapconnect()
2242 bzero(&call
, sizeof (call
));
2243 cv_init(&call
.call_cv
, NULL
, CV_DEFAULT
, NULL
);
2246 * This is a bound end-point so don't close it's stream.
2248 connected
= connmgr_connect(cm_entry
, wq
, destaddr
, addrfmly
,
2249 &call
, &tidu_size
, FALSE
, waitp
, nosignal
, cr
);
2250 *rpcerr
= call
.call_err
;
2251 cv_destroy(&call
.call_cv
);
2255 mutex_enter(&connmgr_lock
);
2258 * Set up a transport entry in the connection manager's list.
2260 cm_entry
->x_src
.buf
= kmem_zalloc(srcaddr
->len
, KM_SLEEP
);
2261 bcopy(srcaddr
->buf
, cm_entry
->x_src
.buf
, srcaddr
->len
);
2262 cm_entry
->x_src
.len
= cm_entry
->x_src
.maxlen
= srcaddr
->len
;
2264 cm_entry
->x_tiptr
= tiptr
;
2265 cm_entry
->x_time
= ddi_get_lbolt();
2267 if (tiptr
->tp_info
.servtype
== T_COTS_ORD
)
2268 cm_entry
->x_ordrel
= TRUE
;
2270 cm_entry
->x_ordrel
= FALSE
;
2272 cm_entry
->x_tidu_size
= tidu_size
;
2274 if (cm_entry
->x_early_disc
) {
2276 * We need to check if a disconnect request has come
2277 * while we are connected, if so, then we need to
2278 * set rpcerr->re_status appropriately before returning
2281 if (rpcerr
->re_status
== RPC_SUCCESS
)
2282 rpcerr
->re_status
= RPC_XPRTFAILED
;
2283 cm_entry
->x_connected
= FALSE
;
2285 cm_entry
->x_connected
= connected
;
2288 * There could be a discrepancy here such that
2289 * x_early_disc is TRUE yet connected is TRUE as well
2290 * and the connection is actually connected. In that case
2291 * lets be conservative and declare the connection as not
2294 cm_entry
->x_early_disc
= FALSE
;
2295 cm_entry
->x_needdis
= (cm_entry
->x_connected
== FALSE
);
2296 cm_entry
->x_ctime
= ddi_get_lbolt();
2299 * Notify any threads waiting that the connection attempt is done.
2301 cm_entry
->x_thread
= FALSE
;
2302 cv_broadcast(&cm_entry
->x_conn_cv
);
2304 if (cm_entry
->x_connected
== FALSE
) {
2305 mutex_exit(&connmgr_lock
);
2306 connmgr_release(cm_entry
);
2310 mutex_exit(&connmgr_lock
);
2316 * Keep the cm_xprt entry on the connecton list when making a connection. This
2317 * is to prevent multiple connections to a slow server from appearing.
2318 * We use the bit field x_thread to tell if a thread is doing a connection
2319 * which keeps other interested threads from messing with connection.
2320 * Those other threads just wait if x_thread is set.
2322 * If x_thread is not set, then we do the actual work of connecting via
2323 * connmgr_connect().
2325 * mutex convention: called with connmgr_lock held, returns with it released.
2327 static struct cm_xprt
*
2328 connmgr_wrapconnect(
2329 struct cm_xprt
*cm_entry
,
2330 const struct timeval
*waitp
,
2331 struct netbuf
*destaddr
,
2333 struct netbuf
*srcaddr
,
2334 struct rpc_err
*rpcerr
,
2339 ASSERT(MUTEX_HELD(&connmgr_lock
));
2341 * Hold this entry as we are about to drop connmgr_lock.
2343 CONN_HOLD(cm_entry
);
2346 * If there is a thread already making a connection for us, then
2347 * wait for it to complete the connection.
2349 if (cm_entry
->x_thread
== TRUE
) {
2350 rpcerr
->re_status
= connmgr_cwait(cm_entry
, waitp
, nosignal
);
2352 if (rpcerr
->re_status
!= RPC_SUCCESS
) {
2353 mutex_exit(&connmgr_lock
);
2354 connmgr_release(cm_entry
);
2361 cm_entry
->x_thread
= TRUE
;
2363 while (cm_entry
->x_needrel
== TRUE
) {
2364 cm_entry
->x_needrel
= FALSE
;
2366 connmgr_sndrel(cm_entry
);
2367 delay(drv_usectohz(1000000));
2369 mutex_enter(&connmgr_lock
);
2373 * If we need to send a T_DISCON_REQ, send one.
2375 connmgr_dis_and_wait(cm_entry
);
2377 mutex_exit(&connmgr_lock
);
2379 bzero(&call
, sizeof (call
));
2380 cv_init(&call
.call_cv
, NULL
, CV_DEFAULT
, NULL
);
2382 connected
= connmgr_connect(cm_entry
, cm_entry
->x_wq
,
2383 destaddr
, addrfmly
, &call
, &cm_entry
->x_tidu_size
,
2384 reconnect
, waitp
, nosignal
, cr
);
2386 *rpcerr
= call
.call_err
;
2387 cv_destroy(&call
.call_cv
);
2389 mutex_enter(&connmgr_lock
);
2392 if (cm_entry
->x_early_disc
) {
2394 * We need to check if a disconnect request has come
2395 * while we are connected, if so, then we need to
2396 * set rpcerr->re_status appropriately before returning
2399 if (rpcerr
->re_status
== RPC_SUCCESS
)
2400 rpcerr
->re_status
= RPC_XPRTFAILED
;
2401 cm_entry
->x_connected
= FALSE
;
2403 cm_entry
->x_connected
= connected
;
2406 * There could be a discrepancy here such that
2407 * x_early_disc is TRUE yet connected is TRUE as well
2408 * and the connection is actually connected. In that case
2409 * lets be conservative and declare the connection as not
2413 cm_entry
->x_early_disc
= FALSE
;
2414 cm_entry
->x_needdis
= (cm_entry
->x_connected
== FALSE
);
2418 * connmgr_connect() may have given up before the connection
2419 * actually timed out. So ensure that before the next
2420 * connection attempt we do a disconnect.
2422 cm_entry
->x_ctime
= ddi_get_lbolt();
2423 cm_entry
->x_thread
= FALSE
;
2425 cv_broadcast(&cm_entry
->x_conn_cv
);
2427 if (cm_entry
->x_connected
== FALSE
) {
2428 mutex_exit(&connmgr_lock
);
2429 connmgr_release(cm_entry
);
2434 if (srcaddr
!= NULL
) {
2436 * Copy into the handle the
2437 * source address of the
2438 * connection, which we will use
2439 * in case of a later retry.
2441 if (srcaddr
->len
!= cm_entry
->x_src
.len
) {
2442 if (srcaddr
->maxlen
> 0)
2443 kmem_free(srcaddr
->buf
, srcaddr
->maxlen
);
2444 srcaddr
->buf
= kmem_zalloc(cm_entry
->x_src
.len
,
2446 srcaddr
->maxlen
= srcaddr
->len
=
2447 cm_entry
->x_src
.len
;
2449 bcopy(cm_entry
->x_src
.buf
, srcaddr
->buf
, srcaddr
->len
);
2451 cm_entry
->x_time
= ddi_get_lbolt();
2452 mutex_exit(&connmgr_lock
);
2457 * If we need to send a T_DISCON_REQ, send one.
2460 connmgr_dis_and_wait(struct cm_xprt
*cm_entry
)
2462 ASSERT(MUTEX_HELD(&connmgr_lock
));
2464 while (cm_entry
->x_needdis
== TRUE
) {
2465 RPCLOG(8, "connmgr_dis_and_wait: need "
2466 "T_DISCON_REQ for connection 0x%p\n",
2468 cm_entry
->x_needdis
= FALSE
;
2469 cm_entry
->x_waitdis
= TRUE
;
2471 connmgr_snddis(cm_entry
);
2473 mutex_enter(&connmgr_lock
);
2476 if (cm_entry
->x_waitdis
== TRUE
) {
2479 RPCLOG(8, "connmgr_dis_and_wait waiting for "
2480 "T_DISCON_REQ's ACK for connection %p\n",
2483 timout
= clnt_cots_min_conntout
* drv_usectohz(1000000);
2486 * The TPI spec says that the T_DISCON_REQ
2487 * will get acknowledged, but in practice
2488 * the ACK may never get sent. So don't
2491 (void) cv_reltimedwait(&cm_entry
->x_dis_cv
,
2492 &connmgr_lock
, timout
, TR_CLOCK_TICK
);
2495 * If we got the ACK, break. If we didn't,
2496 * then send another T_DISCON_REQ.
2498 if (cm_entry
->x_waitdis
== FALSE
) {
2501 RPCLOG(8, "connmgr_dis_and_wait: did"
2502 "not get T_DISCON_REQ's ACK for "
2503 "connection %p\n", (void *)cm_entry
);
2504 cm_entry
->x_needdis
= TRUE
;
2510 connmgr_cancelconn(struct cm_xprt
*cm_entry
)
2513 * Mark the connection table entry as dead; the next thread that
2514 * goes through connmgr_release() will notice this and deal with it.
2516 mutex_enter(&connmgr_lock
);
2517 cm_entry
->x_dead
= TRUE
;
2520 * Notify any threads waiting for the connection that it isn't
2523 cm_entry
->x_thread
= FALSE
;
2524 cv_broadcast(&cm_entry
->x_conn_cv
);
2525 mutex_exit(&connmgr_lock
);
2527 connmgr_release(cm_entry
);
2531 connmgr_close(struct cm_xprt
*cm_entry
)
2533 mutex_enter(&cm_entry
->x_lock
);
2534 while (cm_entry
->x_ref
!= 0) {
2536 * Must be a noninterruptible wait.
2538 cv_wait(&cm_entry
->x_cv
, &cm_entry
->x_lock
);
2541 if (cm_entry
->x_tiptr
!= NULL
)
2542 (void) t_kclose(cm_entry
->x_tiptr
, 1);
2544 mutex_exit(&cm_entry
->x_lock
);
2545 if (cm_entry
->x_ksp
!= NULL
) {
2546 mutex_enter(&connmgr_lock
);
2547 cm_entry
->x_ksp
->ks_private
= NULL
;
2548 mutex_exit(&connmgr_lock
);
2551 * Must free the buffer we allocated for the
2552 * server address in the update function
2554 if (((struct cm_kstat_xprt
*)(cm_entry
->x_ksp
->ks_data
))->
2555 x_server
.value
.str
.addr
.ptr
!= NULL
)
2556 kmem_free(((struct cm_kstat_xprt
*)(cm_entry
->x_ksp
->
2557 ks_data
))->x_server
.value
.str
.addr
.ptr
,
2559 kmem_free(cm_entry
->x_ksp
->ks_data
,
2560 cm_entry
->x_ksp
->ks_data_size
);
2561 kstat_delete(cm_entry
->x_ksp
);
2564 mutex_destroy(&cm_entry
->x_lock
);
2565 cv_destroy(&cm_entry
->x_cv
);
2566 cv_destroy(&cm_entry
->x_conn_cv
);
2567 cv_destroy(&cm_entry
->x_dis_cv
);
2569 if (cm_entry
->x_server
.buf
!= NULL
)
2570 kmem_free(cm_entry
->x_server
.buf
, cm_entry
->x_server
.maxlen
);
2571 if (cm_entry
->x_src
.buf
!= NULL
)
2572 kmem_free(cm_entry
->x_src
.buf
, cm_entry
->x_src
.maxlen
);
2573 kmem_free(cm_entry
, sizeof (struct cm_xprt
));
2577 * Called by KRPC after sending the call message to release the connection
2581 connmgr_release(struct cm_xprt
*cm_entry
)
2583 mutex_enter(&cm_entry
->x_lock
);
2585 if (cm_entry
->x_ref
== 0)
2586 cv_signal(&cm_entry
->x_cv
);
2587 mutex_exit(&cm_entry
->x_lock
);
2591 * Set TCP receive and xmit buffer size for RPC connections.
2594 connmgr_setbufsz(calllist_t
*e
, queue_t
*wq
, cred_t
*cr
)
2599 if (rpc_default_tcp_bufsz
)
2603 * Only set new buffer size if it's larger than the system
2604 * default buffer size. If smaller buffer size is needed
2605 * then use /etc/system to set rpc_default_tcp_bufsz to 1.
2607 ok
= connmgr_getopt_int(wq
, SOL_SOCKET
, SO_RCVBUF
, &val
, e
, cr
);
2608 if ((ok
== TRUE
) && (val
< rpc_send_bufsz
)) {
2609 ok
= connmgr_setopt_int(wq
, SOL_SOCKET
, SO_RCVBUF
,
2610 rpc_send_bufsz
, e
, cr
);
2611 DTRACE_PROBE2(krpc__i__connmgr_rcvbufsz
,
2612 int, ok
, calllist_t
*, e
);
2615 ok
= connmgr_getopt_int(wq
, SOL_SOCKET
, SO_SNDBUF
, &val
, e
, cr
);
2616 if ((ok
== TRUE
) && (val
< rpc_recv_bufsz
)) {
2617 ok
= connmgr_setopt_int(wq
, SOL_SOCKET
, SO_SNDBUF
,
2618 rpc_recv_bufsz
, e
, cr
);
2619 DTRACE_PROBE2(krpc__i__connmgr_sndbufsz
,
2620 int, ok
, calllist_t
*, e
);
2626 * Given an open stream, connect to the remote. Returns true if connected,
2631 struct cm_xprt
*cm_entry
,
2633 struct netbuf
*addr
,
2638 const struct timeval
*waitp
,
2643 struct T_conn_req
*tcr
;
2644 struct T_info_ack
*tinfo
;
2645 int interrupted
, error
;
2646 int tidu_size
, kstat_instance
;
2648 /* if it's a reconnect, flush any lingering data messages */
2650 (void) putctl1(wq
, M_FLUSH
, FLUSHRW
);
2653 * Note: if the receiver uses SCM_UCRED/getpeerucred the pid will
2656 mp
= allocb_cred(sizeof (*tcr
) + addr
->len
, cr
, NOPID
);
2659 * This is unfortunate, but we need to look up the stats for
2660 * this zone to increment the "memory allocation failed"
2661 * counter. curproc->p_zone is safe since we're initiating a
2662 * connection and not in some strange streams context.
2664 struct rpcstat
*rpcstat
;
2666 rpcstat
= zone_getspecific(rpcstat_zone_key
, rpc_zone());
2667 ASSERT(rpcstat
!= NULL
);
2669 RPCLOG0(1, "connmgr_connect: cannot alloc mp for "
2670 "sending conn request\n");
2671 COTSRCSTAT_INCR(rpcstat
->rpc_cots_client
, rcnomem
);
2672 e
->call_status
= RPC_SYSTEMERROR
;
2673 e
->call_reason
= ENOSR
;
2677 /* Set TCP buffer size for RPC connections if needed */
2678 if (addrfmly
== AF_INET
|| addrfmly
== AF_INET6
)
2679 (void) connmgr_setbufsz(e
, wq
, cr
);
2681 mp
->b_datap
->db_type
= M_PROTO
;
2682 tcr
= (struct T_conn_req
*)mp
->b_rptr
;
2683 bzero(tcr
, sizeof (*tcr
));
2684 tcr
->PRIM_type
= T_CONN_REQ
;
2685 tcr
->DEST_length
= addr
->len
;
2686 tcr
->DEST_offset
= sizeof (struct T_conn_req
);
2687 mp
->b_wptr
= mp
->b_rptr
+ sizeof (*tcr
);
2689 bcopy(addr
->buf
, mp
->b_wptr
, tcr
->DEST_length
);
2690 mp
->b_wptr
+= tcr
->DEST_length
;
2692 RPCLOG(8, "connmgr_connect: sending conn request on queue "
2694 RPCLOG(8, " call %p\n", (void *)wq
);
2696 * We use the entry in the handle that is normally used for
2697 * waiting for RPC replies to wait for the connection accept.
2699 if (clnt_dispatch_send(wq
, mp
, e
, 0, 0) != RPC_SUCCESS
) {
2700 DTRACE_PROBE(krpc__e__connmgr__connect__cantsend
);
2705 mutex_enter(&clnt_pending_lock
);
2708 * We wait for the transport connection to be made, or an
2709 * indication that it could not be made.
2714 * waitforack should have been called with T_OK_ACK, but the
2715 * present implementation needs to be passed T_INFO_ACK to
2718 error
= waitforack(e
, T_INFO_ACK
, waitp
, nosignal
);
2721 if (zone_status_get(curproc
->p_zone
) >= ZONE_IS_EMPTY
) {
2723 * No time to lose; we essentially have been signaled to
2730 RPCLOG0(8, "connmgr_connect: giving up "
2731 "on connection attempt; "
2732 "clnt_dispatch notifyconn "
2733 "diagnostic 'no one waiting for "
2734 "connection' should not be "
2738 e
->call_prev
->call_next
= e
->call_next
;
2740 clnt_pending
= e
->call_next
;
2742 e
->call_next
->call_prev
= e
->call_prev
;
2743 mutex_exit(&clnt_pending_lock
);
2745 if (e
->call_status
!= RPC_SUCCESS
|| error
!= 0) {
2747 e
->call_status
= RPC_INTR
;
2748 else if (error
== ETIME
)
2749 e
->call_status
= RPC_TIMEDOUT
;
2750 else if (error
== EPROTO
) {
2751 e
->call_status
= RPC_SYSTEMERROR
;
2752 e
->call_reason
= EPROTO
;
2755 RPCLOG(8, "connmgr_connect: can't connect, status: "
2756 "%s\n", clnt_sperrno(e
->call_status
));
2758 if (e
->call_reply
) {
2759 freemsg(e
->call_reply
);
2760 e
->call_reply
= NULL
;
2766 * The result of the "connection accept" is a T_info_ack
2767 * in the call_reply field.
2769 ASSERT(e
->call_reply
!= NULL
);
2771 e
->call_reply
= NULL
;
2772 tinfo
= (struct T_info_ack
*)mp
->b_rptr
;
2774 tidu_size
= tinfo
->TIDU_size
;
2775 tidu_size
-= (tidu_size
% BYTES_PER_XDR_UNIT
);
2776 if (tidu_size
> COTS_DEFAULT_ALLOCSIZE
|| (tidu_size
<= 0))
2777 tidu_size
= COTS_DEFAULT_ALLOCSIZE
;
2778 *tidu_ptr
= tidu_size
;
2783 * Set up the pertinent options. NODELAY is so the transport doesn't
2784 * buffer up RPC messages on either end. This may not be valid for
2785 * all transports. Failure to set this option is not cause to
2786 * bail out so we return success anyway. Note that lack of NODELAY
2787 * or some other way to flush the message on both ends will cause
2788 * lots of retries and terrible performance.
2790 if (addrfmly
== AF_INET
|| addrfmly
== AF_INET6
) {
2791 (void) connmgr_setopt(wq
, IPPROTO_TCP
, TCP_NODELAY
, e
, cr
);
2792 if (e
->call_status
== RPC_XPRTFAILED
)
2797 * Since we have a connection, we now need to figure out if
2798 * we need to create a kstat. If x_ksp is not NULL then we
2799 * are reusing a connection and so we do not need to create
2800 * another kstat -- lets just return.
2802 if (cm_entry
->x_ksp
!= NULL
)
2806 * We need to increment rpc_kstat_instance atomically to prevent
2807 * two kstats being created with the same instance.
2809 kstat_instance
= atomic_inc_32_nv((uint32_t *)&rpc_kstat_instance
);
2811 if ((cm_entry
->x_ksp
= kstat_create_zone("unix", kstat_instance
,
2812 "rpc_cots_connections", "rpc", KSTAT_TYPE_NAMED
,
2813 (uint_t
)(sizeof (cm_kstat_xprt_t
) / sizeof (kstat_named_t
)),
2814 KSTAT_FLAG_VIRTUAL
, cm_entry
->x_zoneid
)) == NULL
) {
2818 cm_entry
->x_ksp
->ks_lock
= &connmgr_lock
;
2819 cm_entry
->x_ksp
->ks_private
= cm_entry
;
2820 cm_entry
->x_ksp
->ks_data_size
= ((INET6_ADDRSTRLEN
* sizeof (char))
2821 + sizeof (cm_kstat_template
));
2822 cm_entry
->x_ksp
->ks_data
= kmem_alloc(cm_entry
->x_ksp
->ks_data_size
,
2824 bcopy(&cm_kstat_template
, cm_entry
->x_ksp
->ks_data
,
2825 cm_entry
->x_ksp
->ks_data_size
);
2826 ((struct cm_kstat_xprt
*)(cm_entry
->x_ksp
->ks_data
))->
2827 x_server
.value
.str
.addr
.ptr
=
2828 kmem_alloc(INET6_ADDRSTRLEN
, KM_SLEEP
);
2830 cm_entry
->x_ksp
->ks_update
= conn_kstat_update
;
2831 kstat_install(cm_entry
->x_ksp
);
2836 * Verify that the specified offset falls within the mblk and
2837 * that the resulting pointer is aligned.
2838 * Returns NULL if not.
2840 * code from fs/sockfs/socksubr.c
2843 connmgr_opt_getoff(mblk_t
*mp
, t_uscalar_t offset
,
2844 t_uscalar_t length
, uint_t align_size
)
2846 uintptr_t ptr1
, ptr2
;
2848 ASSERT(mp
&& mp
->b_wptr
>= mp
->b_rptr
);
2849 ptr1
= (uintptr_t)mp
->b_rptr
+ offset
;
2850 ptr2
= (uintptr_t)ptr1
+ length
;
2851 if (ptr1
< (uintptr_t)mp
->b_rptr
|| ptr2
> (uintptr_t)mp
->b_wptr
) {
2854 if ((ptr1
& (align_size
- 1)) != 0) {
2857 return ((void *)ptr1
);
2861 connmgr_getopt_int(queue_t
*wq
, int level
, int name
, int *val
,
2862 calllist_t
*e
, cred_t
*cr
)
2865 struct opthdr
*opt
, *opt_res
;
2866 struct T_optmgmt_req
*tor
;
2867 struct T_optmgmt_ack
*opt_ack
;
2868 struct timeval waitp
;
2871 mp
= allocb_cred(sizeof (struct T_optmgmt_req
) +
2872 sizeof (struct opthdr
) + sizeof (int), cr
, NOPID
);
2876 mp
->b_datap
->db_type
= M_PROTO
;
2877 tor
= (struct T_optmgmt_req
*)(mp
->b_rptr
);
2878 tor
->PRIM_type
= T_SVR4_OPTMGMT_REQ
;
2879 tor
->MGMT_flags
= T_CURRENT
;
2880 tor
->OPT_length
= sizeof (struct opthdr
) + sizeof (int);
2881 tor
->OPT_offset
= sizeof (struct T_optmgmt_req
);
2883 opt
= (struct opthdr
*)(mp
->b_rptr
+ sizeof (struct T_optmgmt_req
));
2886 opt
->len
= sizeof (int);
2887 mp
->b_wptr
+= sizeof (struct T_optmgmt_req
) + sizeof (struct opthdr
) +
2891 * We will use this connection regardless
2892 * of whether or not the option is readable.
2894 if (clnt_dispatch_send(wq
, mp
, e
, 0, 0) != RPC_SUCCESS
) {
2895 DTRACE_PROBE(krpc__e__connmgr__getopt__cantsend
);
2900 mutex_enter(&clnt_pending_lock
);
2902 waitp
.tv_sec
= clnt_cots_min_conntout
;
2904 error
= waitforack(e
, T_OPTMGMT_ACK
, &waitp
, 1);
2907 e
->call_prev
->call_next
= e
->call_next
;
2909 clnt_pending
= e
->call_next
;
2911 e
->call_next
->call_prev
= e
->call_prev
;
2912 mutex_exit(&clnt_pending_lock
);
2914 /* get reply message */
2916 e
->call_reply
= NULL
;
2918 if ((!mp
) || (e
->call_status
!= RPC_SUCCESS
) || (error
!= 0)) {
2920 DTRACE_PROBE4(krpc__e__connmgr_getopt
, int, name
,
2921 int, e
->call_status
, int, error
, mblk_t
*, mp
);
2928 opt_ack
= (struct T_optmgmt_ack
*)mp
->b_rptr
;
2929 opt_res
= (struct opthdr
*)connmgr_opt_getoff(mp
, opt_ack
->OPT_offset
,
2930 opt_ack
->OPT_length
, __TPI_ALIGN_SIZE
);
2933 DTRACE_PROBE4(krpc__e__connmgr_optres
, mblk_t
*, mp
, int, name
,
2934 int, opt_ack
->OPT_offset
, int, opt_ack
->OPT_length
);
2938 *val
= *(int *)&opt_res
[1];
2940 DTRACE_PROBE2(connmgr_getopt__ok
, int, name
, int, *val
);
2947 * Called by connmgr_connect to set an option on the new stream.
2950 connmgr_setopt_int(queue_t
*wq
, int level
, int name
, int val
,
2951 calllist_t
*e
, cred_t
*cr
)
2955 struct T_optmgmt_req
*tor
;
2956 struct timeval waitp
;
2959 mp
= allocb_cred(sizeof (struct T_optmgmt_req
) +
2960 sizeof (struct opthdr
) + sizeof (int), cr
, NOPID
);
2962 RPCLOG0(1, "connmgr_setopt: cannot alloc mp for option "
2967 mp
->b_datap
->db_type
= M_PROTO
;
2968 tor
= (struct T_optmgmt_req
*)(mp
->b_rptr
);
2969 tor
->PRIM_type
= T_SVR4_OPTMGMT_REQ
;
2970 tor
->MGMT_flags
= T_NEGOTIATE
;
2971 tor
->OPT_length
= sizeof (struct opthdr
) + sizeof (int);
2972 tor
->OPT_offset
= sizeof (struct T_optmgmt_req
);
2974 opt
= (struct opthdr
*)(mp
->b_rptr
+ sizeof (struct T_optmgmt_req
));
2977 opt
->len
= sizeof (int);
2978 *(int *)((char *)opt
+ sizeof (*opt
)) = val
;
2979 mp
->b_wptr
+= sizeof (struct T_optmgmt_req
) + sizeof (struct opthdr
) +
2983 * We will use this connection regardless
2984 * of whether or not the option is settable.
2986 if (clnt_dispatch_send(wq
, mp
, e
, 0, 0) != RPC_SUCCESS
) {
2987 DTRACE_PROBE(krpc__e__connmgr__setopt__cantsend
);
2992 mutex_enter(&clnt_pending_lock
);
2994 waitp
.tv_sec
= clnt_cots_min_conntout
;
2996 error
= waitforack(e
, T_OPTMGMT_ACK
, &waitp
, 1);
2999 e
->call_prev
->call_next
= e
->call_next
;
3001 clnt_pending
= e
->call_next
;
3003 e
->call_next
->call_prev
= e
->call_prev
;
3004 mutex_exit(&clnt_pending_lock
);
3006 if (e
->call_reply
!= NULL
) {
3007 freemsg(e
->call_reply
);
3008 e
->call_reply
= NULL
;
3011 if (e
->call_status
!= RPC_SUCCESS
|| error
!= 0) {
3012 RPCLOG(1, "connmgr_setopt: can't set option: %d\n", name
);
3015 RPCLOG(8, "connmgr_setopt: successfully set option: %d\n", name
);
3020 connmgr_setopt(queue_t
*wq
, int level
, int name
, calllist_t
*e
, cred_t
*cr
)
3022 return (connmgr_setopt_int(wq
, level
, name
, 1, e
, cr
));
3028 * This is a knob to let us force code coverage in allocation failure
3031 static int connmgr_failsnd
;
3032 #define CONN_SND_ALLOC(Size, Pri) \
3033 ((connmgr_failsnd-- > 0) ? NULL : allocb(Size, Pri))
3037 #define CONN_SND_ALLOC(Size, Pri) allocb(Size, Pri)
3042 * Sends an orderly release on the specified queue.
3043 * Entered with connmgr_lock. Exited without connmgr_lock
3046 connmgr_sndrel(struct cm_xprt
*cm_entry
)
3048 struct T_ordrel_req
*torr
;
3050 queue_t
*q
= cm_entry
->x_wq
;
3051 ASSERT(MUTEX_HELD(&connmgr_lock
));
3052 mp
= CONN_SND_ALLOC(sizeof (struct T_ordrel_req
), BPRI_LO
);
3054 cm_entry
->x_needrel
= TRUE
;
3055 mutex_exit(&connmgr_lock
);
3056 RPCLOG(1, "connmgr_sndrel: cannot alloc mp for sending ordrel "
3057 "to queue %p\n", (void *)q
);
3060 mutex_exit(&connmgr_lock
);
3062 mp
->b_datap
->db_type
= M_PROTO
;
3063 torr
= (struct T_ordrel_req
*)(mp
->b_rptr
);
3064 torr
->PRIM_type
= T_ORDREL_REQ
;
3065 mp
->b_wptr
= mp
->b_rptr
+ sizeof (struct T_ordrel_req
);
3067 RPCLOG(8, "connmgr_sndrel: sending ordrel to queue %p\n", (void *)q
);
3072 * Sends an disconnect on the specified queue.
3073 * Entered with connmgr_lock. Exited without connmgr_lock
3076 connmgr_snddis(struct cm_xprt
*cm_entry
)
3078 struct T_discon_req
*tdis
;
3080 queue_t
*q
= cm_entry
->x_wq
;
3082 ASSERT(MUTEX_HELD(&connmgr_lock
));
3083 mp
= CONN_SND_ALLOC(sizeof (*tdis
), BPRI_LO
);
3085 cm_entry
->x_needdis
= TRUE
;
3086 mutex_exit(&connmgr_lock
);
3087 RPCLOG(1, "connmgr_snddis: cannot alloc mp for sending discon "
3088 "to queue %p\n", (void *)q
);
3091 mutex_exit(&connmgr_lock
);
3093 mp
->b_datap
->db_type
= M_PROTO
;
3094 tdis
= (struct T_discon_req
*)mp
->b_rptr
;
3095 tdis
->PRIM_type
= T_DISCON_REQ
;
3096 mp
->b_wptr
= mp
->b_rptr
+ sizeof (*tdis
);
3098 RPCLOG(8, "connmgr_snddis: sending discon to queue %p\n", (void *)q
);
3103 * Sets up the entry for receiving replies, and calls rpcmod's write put proc
3104 * (through put) to send the call.
3107 clnt_dispatch_send(queue_t
*q
, mblk_t
*mp
, calllist_t
*e
, uint_t xid
,
3112 e
->call_status
= RPC_TIMEDOUT
; /* optimistic, eh? */
3116 e
->call_notified
= FALSE
;
3119 e
->call_status
= RPC_CANTSEND
;
3120 e
->call_reason
= ENOBUFS
;
3121 return (RPC_CANTSEND
);
3125 * If queue_flag is set then the calllist_t is already on the hash
3126 * queue. In this case just send the message and return.
3130 return (RPC_SUCCESS
);
3135 * Set up calls for RPC requests (with XID != 0) on the hash
3136 * queue for fast lookups and place other calls (i.e.
3137 * connection management) on the linked list.
3140 RPCLOG(64, "clnt_dispatch_send: putting xid 0x%x on "
3141 "dispatch list\n", xid
);
3142 e
->call_hash
= call_hash(xid
, clnt_cots_hash_size
);
3143 e
->call_bucket
= &cots_call_ht
[e
->call_hash
];
3144 call_table_enter(e
);
3146 mutex_enter(&clnt_pending_lock
);
3148 clnt_pending
->call_prev
= e
;
3149 e
->call_next
= clnt_pending
;
3150 e
->call_prev
= NULL
;
3152 mutex_exit(&clnt_pending_lock
);
3156 return (RPC_SUCCESS
);
3160 * Called by rpcmod to notify a client with a clnt_pending call that its reply
3161 * has arrived. If we can't find a client waiting for this reply, we log
3162 * the error and return.
3165 clnt_dispatch_notify(mblk_t
*mp
, zoneid_t zoneid
)
3167 calllist_t
*e
= NULL
;
3172 if ((IS_P2ALIGNED(mp
->b_rptr
, sizeof (uint32_t))) &&
3173 (mp
->b_wptr
- mp
->b_rptr
) >= sizeof (xid
))
3174 xid
= *((uint32_t *)mp
->b_rptr
);
3177 unsigned char *p
= (unsigned char *)&xid
;
3178 unsigned char *rptr
;
3182 * Copy the xid, byte-by-byte into xid.
3186 while (rptr
< tmp
->b_wptr
) {
3188 if (++i
>= sizeof (xid
))
3195 * If we got here, we ran out of mblk space before the
3196 * xid could be copied.
3198 ASSERT(tmp
== NULL
&& i
< sizeof (xid
));
3201 "clnt_dispatch_notify: message less than size of xid\n");
3207 hash
= call_hash(xid
, clnt_cots_hash_size
);
3208 chtp
= &cots_call_ht
[hash
];
3209 /* call_table_find returns with the hash bucket locked */
3210 call_table_find(chtp
, xid
, e
);
3214 * Found thread waiting for this reply
3216 mutex_enter(&e
->call_lock
);
3219 * verify that the reply is coming in on
3220 * the same zone that it was sent from.
3222 if (e
->call_zoneid
!= zoneid
) {
3223 mutex_exit(&e
->call_lock
);
3224 mutex_exit(&chtp
->ct_lock
);
3225 RPCLOG0(1, "clnt_dispatch_notify: incorrect zoneid\n");
3231 * This can happen under the following scenario:
3232 * clnt_cots_kcallit() times out on the response,
3233 * rfscall() repeats the CLNT_CALL() with
3234 * the same xid, clnt_cots_kcallit() sends the retry,
3235 * thereby putting the clnt handle on the pending list,
3236 * the first response arrives, signalling the thread
3237 * in clnt_cots_kcallit(). Before that thread is
3238 * dispatched, the second response arrives as well,
3239 * and clnt_dispatch_notify still finds the handle on
3240 * the pending list, with call_reply set. So free the
3243 * It is also possible for a response intended for
3244 * an RPC call with a different xid to reside here.
3245 * This can happen if the thread that owned this
3246 * client handle prior to the current owner bailed
3247 * out and left its call record on the dispatch
3248 * queue. A window exists where the response can
3249 * arrive before the current owner dispatches its
3252 * In any case, this is the very last point where we
3253 * can safely check the call_reply field before
3254 * placing the new response there.
3256 freemsg(e
->call_reply
);
3258 e
->call_status
= RPC_SUCCESS
;
3259 e
->call_notified
= TRUE
;
3260 cv_signal(&e
->call_cv
);
3261 mutex_exit(&e
->call_lock
);
3262 mutex_exit(&chtp
->ct_lock
);
3266 struct rpcstat
*rpcstat
;
3268 mutex_exit(&chtp
->ct_lock
);
3269 RPCLOG(65, "clnt_dispatch_notify: no caller for reply 0x%x\n",
3272 * This is unfortunate, but we need to lookup the zone so we
3273 * can increment its "rcbadxids" counter.
3275 zone
= zone_find_by_id(zoneid
);
3278 * The zone went away...
3282 rpcstat
= zone_getspecific(rpcstat_zone_key
, zone
);
3283 if (zone_status_get(zone
) >= ZONE_IS_SHUTTING_DOWN
) {
3290 COTSRCSTAT_INCR(rpcstat
->rpc_cots_client
, rcbadxids
);
3297 * Called by rpcmod when a non-data indication arrives. The ones in which we
3298 * are interested are connection indications and options acks. We dispatch
3299 * based on the queue the indication came in on. If we are not interested in
3300 * what came in, we return false to rpcmod, who will then pass it upstream.
3303 clnt_dispatch_notifyconn(queue_t
*q
, mblk_t
*mp
)
3308 ASSERT((q
->q_flag
& QREADR
) == 0);
3310 type
= ((union T_primitives
*)mp
->b_rptr
)->type
;
3311 RPCLOG(8, "clnt_dispatch_notifyconn: prim type: [%s]\n",
3312 rpc_tpiprim2name(type
));
3313 mutex_enter(&clnt_pending_lock
);
3314 for (e
= clnt_pending
; /* NO CONDITION */; e
= e
->call_next
) {
3316 mutex_exit(&clnt_pending_lock
);
3317 RPCLOG(1, "clnt_dispatch_notifyconn: no one waiting "
3318 "for connection on queue 0x%p\n", (void *)q
);
3321 if (e
->call_wq
== q
)
3328 * The transport is now connected, send a T_INFO_REQ to get
3331 mutex_exit(&clnt_pending_lock
);
3332 ASSERT(mp
->b_datap
->db_lim
- mp
->b_datap
->db_base
>=
3333 sizeof (struct T_info_req
));
3334 mp
->b_rptr
= mp
->b_datap
->db_base
;
3335 ((union T_primitives
*)mp
->b_rptr
)->type
= T_INFO_REQ
;
3336 mp
->b_wptr
= mp
->b_rptr
+ sizeof (struct T_info_req
);
3337 mp
->b_datap
->db_type
= M_PCPROTO
;
3342 e
->call_status
= RPC_SUCCESS
;
3344 e
->call_notified
= TRUE
;
3345 cv_signal(&e
->call_cv
);
3348 e
->call_status
= RPC_CANTCONNECT
;
3350 e
->call_notified
= TRUE
;
3351 cv_signal(&e
->call_cv
);
3355 * Great, but we are really waiting for a T_CONN_CON
3360 mutex_exit(&clnt_pending_lock
);
3361 RPCLOG(1, "clnt_dispatch_notifyconn: bad type %d\n", type
);
3365 mutex_exit(&clnt_pending_lock
);
3370 * Called by rpcmod when the transport is (or should be) going away. Informs
3371 * all callers waiting for replies and marks the entry in the connection
3372 * manager's list as unconnected, and either closing (close handshake in
3373 * progress) or dead.
3376 clnt_dispatch_notifyall(queue_t
*q
, int32_t msg_type
, int32_t reason
)
3380 struct cm_xprt
*cm_entry
;
3381 int have_connmgr_lock
;
3384 ASSERT((q
->q_flag
& QREADR
) == 0);
3386 RPCLOG(1, "clnt_dispatch_notifyall on queue %p", (void *)q
);
3387 RPCLOG(1, " received a notifcation prim type [%s]",
3388 rpc_tpiprim2name(msg_type
));
3389 RPCLOG(1, " and reason %d\n", reason
);
3392 * Find the transport entry in the connection manager's list, close
3393 * the transport and delete the entry. In the case where rpcmod's
3394 * idle timer goes off, it sends us a T_ORDREL_REQ, indicating we
3395 * should gracefully close the connection.
3397 have_connmgr_lock
= 1;
3398 mutex_enter(&connmgr_lock
);
3399 for (cm_entry
= cm_hd
; cm_entry
; cm_entry
= cm_entry
->x_next
) {
3400 ASSERT(cm_entry
!= cm_entry
->x_next
);
3401 if (cm_entry
->x_wq
== q
) {
3402 ASSERT(MUTEX_HELD(&connmgr_lock
));
3403 ASSERT(have_connmgr_lock
== 1);
3407 if (cm_entry
->x_dead
) {
3408 RPCLOG(1, "idle timeout on dead "
3411 if (clnt_stop_idle
!= NULL
)
3412 (*clnt_stop_idle
)(q
);
3417 * Only mark the connection as dead if it is
3418 * connected and idle.
3419 * An unconnected connection has probably
3420 * gone idle because the server is down,
3421 * and when it comes back up there will be
3422 * retries that need to use that connection.
3424 if (cm_entry
->x_connected
||
3425 cm_entry
->x_doomed
) {
3426 if (cm_entry
->x_ordrel
) {
3427 if (cm_entry
->x_closing
==
3431 * obviously wedged due
3432 * to a bug or problem
3433 * with the transport.
3438 cm_entry
->x_dead
= TRUE
;
3441 have_connmgr_lock
= 0;
3442 if (clnt_stop_idle
!=
3444 (*clnt_stop_idle
)(q
);
3447 cm_entry
->x_closing
= TRUE
;
3448 connmgr_sndrel(cm_entry
);
3449 have_connmgr_lock
= 0;
3451 cm_entry
->x_dead
= TRUE
;
3452 mutex_exit(&connmgr_lock
);
3453 have_connmgr_lock
= 0;
3454 if (clnt_stop_idle
!= NULL
)
3455 (*clnt_stop_idle
)(q
);
3459 * We don't mark the connection
3460 * as dead, but we turn off the
3463 mutex_exit(&connmgr_lock
);
3464 have_connmgr_lock
= 0;
3465 if (clnt_stop_idle
!= NULL
)
3466 (*clnt_stop_idle
)(q
);
3467 RPCLOG(1, "clnt_dispatch_notifyall:"
3468 " ignoring timeout from rpcmod"
3469 " (q %p) because we are not "
3470 " connected\n", (void *)q
);
3475 * If this entry is marked closing, then we are
3476 * completing a close handshake, and the
3477 * connection is dead. Otherwise, the server is
3478 * trying to close. Since the server will not
3479 * be sending any more RPC replies, we abort
3480 * the connection, including flushing
3481 * any RPC requests that are in-transit.
3482 * In either case, mark the entry as dead so
3483 * that it can be closed by the connection
3484 * manager's garbage collector.
3486 cm_entry
->x_dead
= TRUE
;
3487 if (cm_entry
->x_closing
) {
3488 mutex_exit(&connmgr_lock
);
3489 have_connmgr_lock
= 0;
3490 if (clnt_stop_idle
!= NULL
)
3491 (*clnt_stop_idle
)(q
);
3494 * if we're getting a disconnect
3495 * before we've finished our
3496 * connect attempt, mark it for
3499 if (cm_entry
->x_thread
)
3500 cm_entry
->x_early_disc
= TRUE
;
3502 cm_entry
->x_connected
= FALSE
;
3503 cm_entry
->x_waitdis
= TRUE
;
3504 connmgr_snddis(cm_entry
);
3505 have_connmgr_lock
= 0;
3511 cm_entry
->x_waitdis
= FALSE
;
3512 cv_signal(&cm_entry
->x_dis_cv
);
3513 mutex_exit(&connmgr_lock
);
3517 if (cm_entry
->x_thread
)
3518 cm_entry
->x_early_disc
= TRUE
;
3520 cm_entry
->x_connected
= FALSE
;
3521 cm_entry
->x_waitdis
= TRUE
;
3523 connmgr_snddis(cm_entry
);
3524 have_connmgr_lock
= 0;
3530 * if we're getting a disconnect before
3531 * we've finished our connect attempt,
3532 * mark it for later processing
3534 if (cm_entry
->x_closing
) {
3535 cm_entry
->x_dead
= TRUE
;
3536 mutex_exit(&connmgr_lock
);
3537 have_connmgr_lock
= 0;
3538 if (clnt_stop_idle
!= NULL
)
3539 (*clnt_stop_idle
)(q
);
3541 if (cm_entry
->x_thread
) {
3542 cm_entry
->x_early_disc
= TRUE
;
3544 cm_entry
->x_dead
= TRUE
;
3545 cm_entry
->x_connected
= FALSE
;
3554 if (have_connmgr_lock
)
3555 mutex_exit(&connmgr_lock
);
3557 if (msg_type
== T_ERROR_ACK
|| msg_type
== T_OK_ACK
) {
3558 RPCLOG(1, "clnt_dispatch_notifyall: (wq %p) could not find "
3559 "connmgr entry for discon ack\n", (void *)q
);
3564 * Then kick all the clnt_pending calls out of their wait. There
3565 * should be no clnt_pending calls in the case of rpcmod's idle
3568 for (i
= 0; i
< clnt_cots_hash_size
; i
++) {
3569 ctp
= &cots_call_ht
[i
];
3570 mutex_enter(&ctp
->ct_lock
);
3571 for (e
= ctp
->ct_call_next
;
3572 e
!= (calllist_t
*)ctp
;
3574 if (e
->call_wq
== q
&& e
->call_notified
== FALSE
) {
3576 "clnt_dispatch_notifyall for queue %p ",
3578 RPCLOG(1, "aborting clnt_pending call %p\n",
3581 if (msg_type
== T_DISCON_IND
)
3582 e
->call_reason
= reason
;
3583 e
->call_notified
= TRUE
;
3584 e
->call_status
= RPC_XPRTFAILED
;
3585 cv_signal(&e
->call_cv
);
3588 mutex_exit(&ctp
->ct_lock
);
3591 mutex_enter(&clnt_pending_lock
);
3592 for (e
= clnt_pending
; e
; e
= e
->call_next
) {
3594 * Only signal those RPC handles that haven't been
3595 * signalled yet. Otherwise we can get a bogus call_reason.
3596 * This can happen if thread A is making a call over a
3597 * connection. If the server is killed, it will cause
3598 * reset, and reason will default to EIO as a result of
3599 * a T_ORDREL_IND. Thread B then attempts to recreate
3600 * the connection but gets a T_DISCON_IND. If we set the
3601 * call_reason code for all threads, then if thread A
3602 * hasn't been dispatched yet, it will get the wrong
3603 * reason. The bogus call_reason can make it harder to
3604 * discriminate between calls that fail because the
3605 * connection attempt failed versus those where the call
3606 * may have been executed on the server.
3608 if (e
->call_wq
== q
&& e
->call_notified
== FALSE
) {
3609 RPCLOG(1, "clnt_dispatch_notifyall for queue %p ",
3611 RPCLOG(1, " aborting clnt_pending call %p\n",
3614 if (msg_type
== T_DISCON_IND
)
3615 e
->call_reason
= reason
;
3616 e
->call_notified
= TRUE
;
3618 * Let the caller timeout, else he will retry
3621 e
->call_status
= RPC_XPRTFAILED
;
3624 * We used to just signal those threads
3625 * waiting for a connection, (call_xid = 0).
3626 * That meant that threads waiting for a response
3627 * waited till their timeout expired. This
3628 * could be a long time if they've specified a
3629 * maximum timeout. (2^31 - 1). So we
3630 * Signal all threads now.
3632 cv_signal(&e
->call_cv
);
3635 mutex_exit(&clnt_pending_lock
);
3641 * after resuming a system that's been suspended for longer than the
3642 * NFS server's idle timeout (svc_idle_timeout for Solaris 2), rfscall()
3643 * generates "NFS server X not responding" and "NFS server X ok" messages;
3644 * here we reset inet connections to cause a re-connect and avoid those
3645 * NFS messages. see 4045054
3648 connmgr_cpr_reset(void *arg
, int code
)
3650 struct cm_xprt
*cxp
;
3652 if (code
== CB_CODE_CPR_CHKPT
)
3655 if (mutex_tryenter(&connmgr_lock
) == 0)
3657 for (cxp
= cm_hd
; cxp
; cxp
= cxp
->x_next
) {
3658 if ((cxp
->x_family
== AF_INET
|| cxp
->x_family
== AF_INET6
) &&
3659 cxp
->x_connected
== TRUE
) {
3661 cxp
->x_early_disc
= TRUE
;
3663 cxp
->x_connected
= FALSE
;
3664 cxp
->x_needdis
= TRUE
;
3667 mutex_exit(&connmgr_lock
);
3672 clnt_cots_stats_init(zoneid_t zoneid
, struct rpc_cots_client
**statsp
)
3675 *statsp
= (struct rpc_cots_client
*)rpcstat_zone_init_common(zoneid
,
3676 "unix", "rpc_cots_client", (const kstat_named_t
*)&cots_rcstat_tmpl
,
3677 sizeof (cots_rcstat_tmpl
));
3681 clnt_cots_stats_fini(zoneid_t zoneid
, struct rpc_cots_client
**statsp
)
3683 rpcstat_zone_fini_common(zoneid
, "unix", "rpc_cots_client");
3684 kmem_free(*statsp
, sizeof (cots_rcstat_tmpl
));
3688 clnt_cots_init(void)
3690 mutex_init(&connmgr_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
3691 mutex_init(&clnt_pending_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
3693 if (clnt_cots_hash_size
< DEFAULT_MIN_HASH_SIZE
)
3694 clnt_cots_hash_size
= DEFAULT_MIN_HASH_SIZE
;
3696 cots_call_ht
= call_table_init(clnt_cots_hash_size
);
3697 zone_key_create(&zone_cots_key
, NULL
, NULL
, clnt_zone_destroy
);
3701 clnt_cots_fini(void)
3703 (void) zone_key_delete(zone_cots_key
);
3707 * Wait for TPI ack, returns success only if expected ack is received
3708 * within timeout period.
3712 waitforack(calllist_t
*e
, t_scalar_t ack_prim
, const struct timeval
*waitp
,
3715 union T_primitives
*tpr
;
3719 ASSERT(MUTEX_HELD(&clnt_pending_lock
));
3720 while (e
->call_reply
== NULL
) {
3721 if (waitp
!= NULL
) {
3722 timout
= waitp
->tv_sec
* drv_usectohz(MICROSEC
) +
3723 drv_usectohz(waitp
->tv_usec
);
3725 cv_stat
= cv_reltimedwait(&e
->call_cv
,
3726 &clnt_pending_lock
, timout
, TR_CLOCK_TICK
);
3728 cv_stat
= cv_reltimedwait_sig(&e
->call_cv
,
3729 &clnt_pending_lock
, timout
, TR_CLOCK_TICK
);
3732 cv_wait(&e
->call_cv
, &clnt_pending_lock
);
3734 cv_stat
= cv_wait_sig(&e
->call_cv
,
3735 &clnt_pending_lock
);
3742 * if we received an error from the server and we know a reply
3743 * is not going to be sent, do not wait for the full timeout,
3746 if (e
->call_status
== RPC_XPRTFAILED
)
3747 return (e
->call_reason
);
3749 tpr
= (union T_primitives
*)e
->call_reply
->b_rptr
;
3750 if (tpr
->type
== ack_prim
)
3751 return (0); /* Success */
3753 if (tpr
->type
== T_ERROR_ACK
) {
3754 if (tpr
->error_ack
.TLI_error
== TSYSERR
)
3755 return (tpr
->error_ack
.UNIX_error
);
3757 return (t_tlitosyserr(tpr
->error_ack
.TLI_error
));
3760 return (EPROTO
); /* unknown or unexpected primitive */