Merge illumos-gate
[unleashed.git] / usr / src / uts / common / io / tl.c
blob7b746bea0ea5054717d05d9fb2faf96066a3969b
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
27 * Copyright (c) 2012 by Delphix. All rights reserved.
28 * Copyright (c) 2018, Joyent, Inc.
32 * Multithreaded STREAMS Local Transport Provider.
34 * OVERVIEW
35 * ========
37 * This driver provides TLI as well as socket semantics. It provides
38 * connectionless, connection oriented, and connection oriented with orderly
39 * release transports for TLI and sockets. Each transport type has separate name
40 * spaces (i.e. it is not possible to connect from a socket to a TLI endpoint) -
41 * this removes any name space conflicts when binding to socket style transport
42 * addresses.
44 * NOTE: There is one exception: Socket ticots and ticotsord transports share
45 * the same namespace. In fact, sockets always use ticotsord type transport.
47 * The driver mode is specified during open() by the minor number used for
48 * open.
50 * The sockets in addition have the following semantic differences:
51 * No support for passing up credentials (TL_SET[U]CRED).
53 * Options are passed through transparently on T_CONN_REQ to T_CONN_IND,
54 * from T_UNITDATA_REQ to T_UNIDATA_IND, and from T_OPTDATA_REQ to
55 * T_OPTDATA_IND.
57 * The T_CONN_CON is generated when processing the T_CONN_REQ i.e. before
58 * a T_CONN_RES is received from the acceptor. This means that a socket
59 * connect will complete before the peer has called accept.
62 * MULTITHREADING
63 * ==============
65 * The driver does not use STREAMS protection mechanisms. Instead it uses a
66 * generic "serializer" abstraction. Most of the operations are executed behind
67 * the serializer and are, essentially single-threaded. All functions executed
68 * behind the same serializer are strictly serialized. So if one thread calls
69 * serializer_enter(serializer, foo, mp1, arg1); and another thread calls
70 * serializer_enter(serializer, bar, mp2, arg1); then (depending on which one
71 * was called) the actual sequence will be foo(mp1, arg1); bar(mp1, arg2) or
72 * bar(mp1, arg2); foo(mp1, arg1); But foo() and bar() will never run at the
73 * same time.
75 * Connectionless transport use a single serializer per transport type (one for
76 * TLI and one for sockets. Connection-oriented transports use finer-grained
77 * serializers.
79 * All COTS-type endpoints start their life with private serializers. During
80 * connection request processing the endpoint serializer is switched to the
81 * listener's serializer and the rest of T_CONN_REQ processing is done on the
82 * listener serializer. During T_CONN_RES processing the eager serializer is
83 * switched from listener to acceptor serializer and after that point all
84 * processing for eager and acceptor happens on this serializer. To avoid races
85 * with endpoint closes while its serializer may be changing closes are blocked
86 * while serializers are manipulated.
88 * References accounting
89 * ---------------------
91 * Endpoints are reference counted and freed when the last reference is
92 * dropped. Functions within the serializer may access an endpoint state even
93 * after an endpoint closed. The te_closing being set on the endpoint indicates
94 * that the endpoint entered its close routine.
96 * One reference is held for each opened endpoint instance. The reference
97 * counter is incremented when the endpoint is linked to another endpoint and
98 * decremented when the link disappears. It is also incremented when the
99 * endpoint is found by the hash table lookup. This increment is atomic with the
100 * lookup itself and happens while the hash table read lock is held.
102 * Close synchronization
103 * ---------------------
105 * During close the endpoint as marked as closing using te_closing flag. It is
106 * usually enough to check for te_closing flag since all other state changes
107 * happen after this flag is set and the close entered serializer. Immediately
108 * after setting te_closing flag tl_close() enters serializer and waits until
109 * the callback finishes. This allows all functions called within serializer to
110 * simply check te_closing without any locks.
112 * Serializer management.
113 * ---------------------
115 * For COTS transports serializers are created when the endpoint is constructed
116 * and destroyed when the endpoint is destructed. CLTS transports use global
117 * serializers - one for sockets and one for TLI.
119 * COTS serializers have separate reference counts to deal with several
120 * endpoints sharing the same serializer. There is a subtle problem related to
121 * the serializer destruction. The serializer should never be destroyed by any
122 * function executed inside serializer. This means that close has to wait till
123 * all serializer activity for this endpoint is finished before it can drop the
124 * last reference on the endpoint (which may as well free the serializer). This
125 * is only relevant for COTS transports which manage serializers
126 * dynamically. For CLTS transports close may complete without waiting for all
127 * serializer activity to finish since serializer is only destroyed at driver
128 * detach time.
130 * COTS endpoints keep track of the number of outstanding requests on the
131 * serializer for the endpoint. The code handling accept() avoids changing
132 * client serializer if it has any pending messages on the serializer and
133 * instead moves acceptor to listener's serializer.
136 * Use of hash tables
137 * ------------------
139 * The driver uses modhash hash table implementation. Each transport uses two
140 * hash tables - one for finding endpoints by acceptor ID and another one for
141 * finding endpoints by address. For sockets TICOTS and TICOTSORD share the same
142 * pair of hash tables since sockets only use TICOTSORD.
144 * All hash tables lookups increment a reference count for returned endpoints,
145 * so we may safely check the endpoint state even when the endpoint is removed
146 * from the hash by another thread immediately after it is found.
149 * CLOSE processing
150 * ================
152 * The driver enters serializer twice on close(). The close sequence is the
153 * following:
155 * 1) Wait until closing is safe (te_closewait becomes zero)
156 * This step is needed to prevent close during serializer switches. In most
157 * cases (close happening after connection establishment) te_closewait is
158 * zero.
159 * 1) Set te_closing.
160 * 2) Call tl_close_ser() within serializer and wait for it to complete.
162 * te_close_ser simply marks endpoint and wakes up waiting tl_close().
163 * It also needs to clear write-side q_next pointers - this should be done
164 * before qprocsoff().
166 * This synchronous serializer entry during close is needed to ensure that
167 * the queue is valid everywhere inside the serializer.
169 * Note that in many cases close will execute tl_close_ser() synchronously,
170 * so it will not wait at all.
172 * 3) Calls qprocsoff().
173 * 4) Calls tl_close_finish_ser() within the serializer and waits for it to
174 * complete (for COTS transports). For CLTS transport there is no wait.
176 * tl_close_finish_ser() Finishes the close process and wakes up waiting
177 * close if there is any.
179 * Note that in most cases close will enter te_close_ser_finish()
180 * synchronously and will not wait at all.
183 * Flow Control
184 * ============
186 * The driver implements both read and write side service routines. No one calls
187 * putq() on the read queue. The read side service routine tl_rsrv() is called
188 * when the read side stream is back-enabled. It enters serializer synchronously
189 * (waits till serializer processing is complete). Within serializer it
190 * back-enables all endpoints blocked by the queue for connection-less
191 * transports and enables write side service processing for the peer for
192 * connection-oriented transports.
194 * Read and write side service routines use special mblk_sized space in the
195 * endpoint structure to enter perimeter.
197 * Write-side flow control
198 * -----------------------
200 * Write side flow control is a bit tricky. The driver needs to deal with two
201 * message queues - the explicit STREAMS message queue maintained by
202 * putq()/getq()/putbq() and the implicit queue within the serializer. These two
203 * queues should be synchronized to preserve message ordering and should
204 * maintain a single order determined by the order in which messages enter
205 * tl_wput(). In order to maintain the ordering between these two queues the
206 * STREAMS queue is only manipulated within the serializer, so the ordering is
207 * provided by the serializer.
209 * Functions called from the tl_wsrv() sometimes may call putbq(). To
210 * immediately stop any further processing of the STREAMS message queues the
211 * code calling putbq() also sets the te_nowsrv flag in the endpoint. The write
212 * side service processing stops when the flag is set.
214 * The tl_wsrv() function enters serializer synchronously and waits for it to
215 * complete. The serializer call-back tl_wsrv_ser() either drains all messages
216 * on the STREAMS queue or terminates when it notices the te_nowsrv flag
217 * set. Note that the maximum amount of messages processed by tl_wput_ser() is
218 * always bounded by the amount of messages on the STREAMS queue at the time
219 * tl_wsrv_ser() is entered. Any new messages may only appear on the STREAMS
220 * queue from another serialized entry which can't happen in parallel. This
221 * guarantees that tl_wput_ser() is complete in bounded time (there is no risk
222 * of it draining forever while writer places new messages on the STREAMS
223 * queue).
225 * Note that a closing endpoint never sets te_nowsrv and never calls putbq().
228 * Unix Domain Sockets
229 * ===================
231 * The driver knows the structure of Unix Domain sockets addresses and treats
232 * them differently from generic TLI addresses. For sockets implicit binds are
233 * requested by setting SOU_MAGIC_IMPLICIT in the soua_magic part of the address
234 * instead of using address length of zero. Explicit binds specify
235 * SOU_MAGIC_EXPLICIT as magic.
237 * For implicit binds we always use minor number as soua_vp part of the address
238 * and avoid any hash table lookups. This saves two hash tables lookups per
239 * anonymous bind.
241 * For explicit address we hash the vnode pointer instead of hashing the
242 * full-scale address+zone+length. Hashing by pointer is more efficient then
243 * hashing by the full address.
245 * For unix domain sockets the te_ap is always pointing to te_uxaddr part of the
246 * tep structure, so it should be never freed.
248 * Also for sockets the driver always uses minor number as acceptor id.
250 * TPI VIOLATIONS
251 * --------------
253 * This driver violates TPI in several respects for Unix Domain Sockets:
255 * 1) It treats O_T_BIND_REQ as T_BIND_REQ and refuses bind if an explicit bind
256 * is requested and the endpoint is already in use. There is no point in
257 * generating an unused address since this address will be rejected by
258 * sockfs anyway. For implicit binds it always generates a new address
259 * (sets soua_vp to its minor number).
261 * 2) It always uses minor number as acceptor ID and never uses queue
262 * pointer. It is ok since sockets get acceptor ID from T_CAPABILITY_REQ
263 * message and they do not use the queue pointer.
265 * 3) For Listener sockets the usual sequence is to issue bind() zero backlog
266 * followed by listen(). The listen() should be issued with non-zero
267 * backlog, so sotpi_listen() issues unbind request followed by bind
268 * request to the same address but with a non-zero qlen value. Both
269 * tl_bind() and tl_unbind() require write lock on the hash table to
270 * insert/remove the address. The driver does not remove the address from
271 * the hash for endpoints that are bound to the explicit address and have
272 * backlog of zero. During T_BIND_REQ processing if the address requested
273 * is equal to the address the endpoint already has it updates the backlog
274 * without reinserting the address in the hash table. This optimization
275 * avoids two hash table updates for each listener created. It always
276 * avoids the problem of a "stolen" address when another listener may use
277 * the same address between the unbind and bind and suddenly listen() fails
278 * because address is in use even though the bind() succeeded.
281 * CONNECTIONLESS TRANSPORTS
282 * =========================
284 * Connectionless transports all share the same serializer (one for TLI and one
285 * for Sockets). Functions executing behind serializer can check or modify state
286 * of any endpoint.
288 * When endpoint X talks to another endpoint Y it caches the pointer to Y in the
289 * te_lastep field. The next time X talks to some address A it checks whether A
290 * is the same as Y's address and if it is there is no need to lookup Y. If the
291 * address is different or the state of Y is not appropriate (e.g. closed or not
292 * idle) X does a lookup using tl_find_peer() and caches the new address.
293 * NOTE: tl_find_peer() never returns closing endpoint and it places a refhold
294 * on the endpoint found.
296 * During close of endpoint Y it doesn't try to remove itself from other
297 * endpoints caches. They will detect that Y is gone and will search the peer
298 * endpoint again.
300 * Flow Control Handling.
301 * ----------------------
303 * Each connectionless endpoint keeps a list of endpoints which are
304 * flow-controlled by its queue. It also keeps a pointer to the queue which
305 * flow-controls itself. Whenever flow control releases for endpoint X it
306 * enables all queues from the list. During close it also back-enables everyone
307 * in the list. If X is flow-controlled when it is closing it removes it from
308 * the peers list.
310 * DATA STRUCTURES
311 * ===============
313 * Each endpoint is represented by the tl_endpt_t structure which keeps all the
314 * endpoint state. For connection-oriented transports it has a keeps a list
315 * of pending connections (tl_icon_t). For connectionless transports it keeps a
316 * list of endpoints flow controlled by this one.
318 * Each transport type is represented by a per-transport data structure
319 * tl_transport_state_t. It contains a pointer to an acceptor ID hash and the
320 * endpoint address hash tables for each transport. It also contains pointer to
321 * transport serializer for connectionless transports.
323 * Each endpoint keeps a link to its transport structure, so the code can find
324 * all per-transport information quickly.
327 #include <sys/types.h>
328 #include <sys/inttypes.h>
329 #include <sys/stream.h>
330 #include <sys/stropts.h>
331 #define _SUN_TPI_VERSION 2
332 #include <sys/tihdr.h>
333 #include <sys/strlog.h>
334 #include <sys/debug.h>
335 #include <sys/cred.h>
336 #include <sys/errno.h>
337 #include <sys/kmem.h>
338 #include <sys/id_space.h>
339 #include <sys/modhash.h>
340 #include <sys/mkdev.h>
341 #include <sys/tl.h>
342 #include <sys/stat.h>
343 #include <sys/conf.h>
344 #include <sys/modctl.h>
345 #include <sys/strsun.h>
346 #include <sys/socket.h>
347 #include <sys/socketvar.h>
348 #include <sys/sysmacros.h>
349 #include <sys/xti_xtiopt.h>
350 #include <sys/ddi.h>
351 #include <sys/sunddi.h>
352 #include <sys/zone.h>
353 #include <inet/common.h> /* typedef int (*pfi_t)() for inet/optcom.h */
354 #include <inet/optcom.h>
355 #include <sys/strsubr.h>
356 #include <sys/ucred.h>
357 #include <sys/suntpi.h>
358 #include <sys/list.h>
359 #include <sys/serializer.h>
362 * TBD List
363 * 14 Eliminate state changes through table
364 * 16. AF_UNIX socket options
365 * 17. connect() for ticlts
366 * 18. support for "netstat" to show AF_UNIX plus TLI local
367 * transport connections
368 * 21. sanity check to flushing on sending M_ERROR
372 * CONSTANT DECLARATIONS
373 * --------------------
377 * Local declarations
379 #define NEXTSTATE(EV, ST) ti_statetbl[EV][ST]
381 #define BADSEQNUM (-1) /* initial seq number used by T_DISCON_IND */
382 #define TL_BUFWAIT (10000) /* usecs to wait for allocb buffer timeout */
383 #define TL_TIDUSZ (64*1024) /* tidu size when "strmsgz" is unlimited (0) */
385 * Hash tables size.
387 #define TL_HASH_SIZE 311
390 * Definitions for module_info
392 #define TL_ID (104) /* module ID number */
393 #define TL_NAME "tl" /* module name */
394 #define TL_MINPSZ (0) /* min packet size */
395 #define TL_MAXPSZ INFPSZ /* max packet size ZZZ */
396 #define TL_HIWAT (16*1024) /* hi water mark */
397 #define TL_LOWAT (256) /* lo water mark */
399 * Definition of minor numbers/modes for new transport provider modes.
400 * We view the socket use as a separate mode to get a separate name space.
402 #define TL_TICOTS 0 /* connection oriented transport */
403 #define TL_TICOTSORD 1 /* COTS w/ orderly release */
404 #define TL_TICLTS 2 /* connectionless transport */
405 #define TL_UNUSED 3
406 #define TL_SOCKET 4 /* Socket */
407 #define TL_SOCK_COTS (TL_SOCKET|TL_TICOTS)
408 #define TL_SOCK_COTSORD (TL_SOCKET|TL_TICOTSORD)
409 #define TL_SOCK_CLTS (TL_SOCKET|TL_TICLTS)
411 #define TL_MINOR_MASK 0x7
412 #define TL_MINOR_START (TL_TICLTS + 1)
415 * LOCAL MACROS
417 #define T_ALIGN(p) P2ROUNDUP((p), sizeof (t_scalar_t))
420 * EXTERNAL VARIABLE DECLARATIONS
421 * -----------------------------
424 * state table defined in the OS space.c
426 extern char ti_statetbl[TE_NOEVENTS][TS_NOSTATES];
429 * STREAMS DRIVER ENTRY POINTS PROTOTYPES
431 static int tl_open(queue_t *, dev_t *, int, int, cred_t *);
432 static int tl_close(queue_t *, int, cred_t *);
433 static void tl_wput(queue_t *, mblk_t *);
434 static void tl_wsrv(queue_t *);
435 static void tl_rsrv(queue_t *);
437 static int tl_attach(dev_info_t *, ddi_attach_cmd_t);
438 static int tl_detach(dev_info_t *, ddi_detach_cmd_t);
439 static int tl_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
443 * GLOBAL DATA STRUCTURES AND VARIABLES
444 * -----------------------------------
448 * Table representing database of all options managed by T_SVR4_OPTMGMT_REQ
449 * For now, we only manage the SO_RECVUCRED option but we also have
450 * harmless dummy options to make things work with some common code we access.
452 opdes_t tl_opt_arr[] = {
453 /* The SO_TYPE is needed for the hack below */
455 SO_TYPE,
456 SOL_SOCKET,
457 OA_R,
458 OA_R,
459 OP_NP,
461 sizeof (t_scalar_t),
465 SO_RECVUCRED,
466 SOL_SOCKET,
467 OA_RW,
468 OA_RW,
469 OP_NP,
471 sizeof (int),
477 * Table of all supported levels
478 * Note: Some levels (e.g. XTI_GENERIC) may be valid but may not have
479 * any supported options so we need this info separately.
481 * This is needed only for topmost tpi providers.
483 optlevel_t tl_valid_levels_arr[] = {
484 XTI_GENERIC,
485 SOL_SOCKET,
486 TL_PROT_LEVEL
489 #define TL_VALID_LEVELS_CNT A_CNT(tl_valid_levels_arr)
491 * Current upper bound on the amount of space needed to return all options.
492 * Additional options with data size of sizeof(long) are handled automatically.
493 * Others need hand job.
495 #define TL_MAX_OPT_BUF_LEN \
496 ((A_CNT(tl_opt_arr) << 2) + \
497 (A_CNT(tl_opt_arr) * sizeof (struct opthdr)) + \
498 + 64 + sizeof (struct T_optmgmt_ack))
500 #define TL_OPT_ARR_CNT A_CNT(tl_opt_arr)
503 * transport addr structure
505 typedef struct tl_addr {
506 zoneid_t ta_zoneid; /* Zone scope of address */
507 t_scalar_t ta_alen; /* length of abuf */
508 void *ta_abuf; /* the addr itself */
509 } tl_addr_t;
512 * Refcounted version of serializer.
514 typedef struct tl_serializer {
515 uint_t ts_refcnt;
516 serializer_t *ts_serializer;
517 } tl_serializer_t;
520 * Each transport type has a separate state.
521 * Per-transport state.
523 typedef struct tl_transport_state {
524 char *tr_name;
525 minor_t tr_minor;
526 uint32_t tr_defaddr;
527 mod_hash_t *tr_ai_hash;
528 mod_hash_t *tr_addr_hash;
529 tl_serializer_t *tr_serializer;
530 } tl_transport_state_t;
532 #define TL_DFADDR 0x1000
534 static tl_transport_state_t tl_transports[] = {
535 { "ticots", TL_TICOTS, TL_DFADDR, NULL, NULL, NULL },
536 { "ticotsord", TL_TICOTSORD, TL_DFADDR, NULL, NULL, NULL },
537 { "ticlts", TL_TICLTS, TL_DFADDR, NULL, NULL, NULL },
538 { "undefined", TL_UNUSED, TL_DFADDR, NULL, NULL, NULL },
539 { "sticots", TL_SOCK_COTS, TL_DFADDR, NULL, NULL, NULL },
540 { "sticotsord", TL_SOCK_COTSORD, TL_DFADDR, NULL, NULL },
541 { "sticlts", TL_SOCK_CLTS, TL_DFADDR, NULL, NULL, NULL }
544 #define TL_MAXTRANSPORT A_CNT(tl_transports)
546 struct tl_endpt;
547 typedef struct tl_endpt tl_endpt_t;
549 typedef void (tlproc_t)(mblk_t *, tl_endpt_t *);
552 * Data structure used to represent pending connects.
553 * Records enough information so that the connecting peer can close
554 * before the connection gets accepted.
556 typedef struct tl_icon {
557 list_node_t ti_node;
558 struct tl_endpt *ti_tep; /* NULL if peer has already closed */
559 mblk_t *ti_mp; /* b_next list of data + ordrel_ind */
560 t_scalar_t ti_seqno; /* Sequence number */
561 } tl_icon_t;
563 typedef struct so_ux_addr soux_addr_t;
564 #define TL_SOUX_ADDRLEN sizeof (soux_addr_t)
567 * Maximum number of unaccepted connection indications allowed per listener.
569 #define TL_MAXQLEN 4096
570 int tl_maxqlen = TL_MAXQLEN;
573 * transport endpoint structure
575 struct tl_endpt {
576 queue_t *te_rq; /* stream read queue */
577 queue_t *te_wq; /* stream write queue */
578 uint32_t te_refcnt;
579 int32_t te_state; /* TPI state of endpoint */
580 minor_t te_minor; /* minor number */
581 #define te_seqno te_minor
582 uint_t te_flag; /* flag field */
583 boolean_t te_nowsrv;
584 tl_serializer_t *te_ser; /* Serializer to use */
585 #define te_serializer te_ser->ts_serializer
587 soux_addr_t te_uxaddr; /* Socket address */
588 #define te_magic te_uxaddr.soua_magic
589 #define te_vp te_uxaddr.soua_vp
590 tl_addr_t te_ap; /* addr bound to this endpt */
591 #define te_zoneid te_ap.ta_zoneid
592 #define te_alen te_ap.ta_alen
593 #define te_abuf te_ap.ta_abuf
595 tl_transport_state_t *te_transport;
596 #define te_addrhash te_transport->tr_addr_hash
597 #define te_aihash te_transport->tr_ai_hash
598 #define te_defaddr te_transport->tr_defaddr
599 cred_t *te_credp; /* endpoint user credentials */
600 mod_hash_hndl_t te_hash_hndl; /* Handle for address hash */
603 * State specific for connection-oriented and connectionless transports.
605 union {
606 /* Connection-oriented state. */
607 struct {
608 t_uscalar_t _te_nicon; /* count of conn requests */
609 t_uscalar_t _te_qlen; /* max conn requests */
610 tl_endpt_t *_te_oconp; /* conn request pending */
611 tl_endpt_t *_te_conp; /* connected endpt */
612 #ifndef _ILP32
613 void *_te_pad;
614 #endif
615 list_t _te_iconp; /* list of conn ind. pending */
616 } _te_cots_state;
617 /* Connection-less state. */
618 struct {
619 tl_endpt_t *_te_lastep; /* last dest. endpoint */
620 tl_endpt_t *_te_flowq; /* flow controlled on whom */
621 list_node_t _te_flows; /* lists of connections */
622 list_t _te_flowlist; /* Who flowcontrols on me */
623 } _te_clts_state;
624 } _te_transport_state;
625 #define te_nicon _te_transport_state._te_cots_state._te_nicon
626 #define te_qlen _te_transport_state._te_cots_state._te_qlen
627 #define te_oconp _te_transport_state._te_cots_state._te_oconp
628 #define te_conp _te_transport_state._te_cots_state._te_conp
629 #define te_iconp _te_transport_state._te_cots_state._te_iconp
630 #define te_lastep _te_transport_state._te_clts_state._te_lastep
631 #define te_flowq _te_transport_state._te_clts_state._te_flowq
632 #define te_flowlist _te_transport_state._te_clts_state._te_flowlist
633 #define te_flows _te_transport_state._te_clts_state._te_flows
635 bufcall_id_t te_bufcid; /* outstanding bufcall id */
636 timeout_id_t te_timoutid; /* outstanding timeout id */
637 pid_t te_cpid; /* cached pid of endpoint */
638 t_uscalar_t te_acceptor_id; /* acceptor id for T_CONN_RES */
640 * Pieces of the endpoint state needed for closing.
642 kmutex_t te_closelock;
643 kcondvar_t te_closecv;
644 uint8_t te_closing; /* The endpoint started closing */
645 uint8_t te_closewait; /* Wait in close until zero */
646 mblk_t te_closemp; /* for entering serializer on close */
647 mblk_t te_rsrvmp; /* for entering serializer on rsrv */
648 mblk_t te_wsrvmp; /* for entering serializer on wsrv */
649 kmutex_t te_srv_lock;
650 kcondvar_t te_srv_cv;
651 uint8_t te_rsrv_active; /* Running in tl_rsrv() */
652 uint8_t te_wsrv_active; /* Running in tl_wsrv() */
654 * Pieces of the endpoint state needed for serializer transitions.
656 kmutex_t te_ser_lock; /* Protects the count below */
657 uint_t te_ser_count; /* Number of messages on serializer */
661 * Flag values. Lower 4 bits specify that transport used.
662 * TL_LISTENER, TL_ACCEPTOR, TL_ACCEPTED and TL_EAGER are for debugging only,
663 * they allow to identify the endpoint more easily.
665 #define TL_LISTENER 0x00010 /* the listener endpoint */
666 #define TL_ACCEPTOR 0x00020 /* the accepting endpoint */
667 #define TL_EAGER 0x00040 /* connecting endpoint */
668 #define TL_ACCEPTED 0x00080 /* accepted connection */
669 #define TL_SETCRED 0x00100 /* flag to indicate sending of credentials */
670 #define TL_SETUCRED 0x00200 /* flag to indicate sending of ucred */
671 #define TL_SOCKUCRED 0x00400 /* flag to indicate sending of SCM_UCRED */
672 #define TL_ADDRHASHED 0x01000 /* Endpoint address is stored in te_addrhash */
673 #define TL_CLOSE_SER 0x10000 /* Endpoint close has entered the serializer */
675 * Boolean checks for the endpoint type.
677 #define IS_CLTS(x) (((x)->te_flag & TL_TICLTS) != 0)
678 #define IS_COTS(x) (((x)->te_flag & TL_TICLTS) == 0)
679 #define IS_COTSORD(x) (((x)->te_flag & TL_TICOTSORD) != 0)
680 #define IS_SOCKET(x) (((x)->te_flag & TL_SOCKET) != 0)
683 * Certain operations are always used together. These macros reduce the chance
684 * of missing a part of a combination.
686 #define TL_UNCONNECT(x) { tl_refrele(x); x = NULL; }
687 #define TL_REMOVE_PEER(x) { if ((x) != NULL) TL_UNCONNECT(x) }
689 #define TL_PUTBQ(x, mp) { \
690 ASSERT(!((x)->te_flag & TL_CLOSE_SER)); \
691 (x)->te_nowsrv = B_TRUE; \
692 (void) putbq((x)->te_wq, mp); \
695 #define TL_QENABLE(x) { (x)->te_nowsrv = B_FALSE; qenable((x)->te_wq); }
696 #define TL_PUTQ(x, mp) { (x)->te_nowsrv = B_FALSE; (void)putq((x)->te_wq, mp); }
699 * STREAMS driver glue data structures.
701 static struct module_info tl_minfo = {
702 TL_ID, /* mi_idnum */
703 TL_NAME, /* mi_idname */
704 TL_MINPSZ, /* mi_minpsz */
705 TL_MAXPSZ, /* mi_maxpsz */
706 TL_HIWAT, /* mi_hiwat */
707 TL_LOWAT /* mi_lowat */
710 static struct qinit tl_rinit = {
711 NULL, /* qi_putp */
712 (int (*)())tl_rsrv, /* qi_srvp */
713 tl_open, /* qi_qopen */
714 tl_close, /* qi_qclose */
715 NULL, /* qi_qadmin */
716 &tl_minfo, /* qi_minfo */
717 NULL /* qi_mstat */
720 static struct qinit tl_winit = {
721 (int (*)())tl_wput, /* qi_putp */
722 (int (*)())tl_wsrv, /* qi_srvp */
723 NULL, /* qi_qopen */
724 NULL, /* qi_qclose */
725 NULL, /* qi_qadmin */
726 &tl_minfo, /* qi_minfo */
727 NULL /* qi_mstat */
730 static struct streamtab tlinfo = {
731 &tl_rinit, /* st_rdinit */
732 &tl_winit, /* st_wrinit */
733 NULL, /* st_muxrinit */
734 NULL /* st_muxwrinit */
737 DDI_DEFINE_STREAM_OPS(tl_devops, nulldev, nulldev, tl_attach, tl_detach,
738 nulldev, tl_info, D_MP, &tlinfo, ddi_quiesce_not_supported);
740 static struct modldrv modldrv = {
741 &mod_driverops, /* Type of module -- pseudo driver here */
742 "TPI Local Transport (tl)",
743 &tl_devops, /* driver ops */
747 * Module linkage information for the kernel.
749 static struct modlinkage modlinkage = {
750 MODREV_1,
751 &modldrv,
752 NULL
756 * Templates for response to info request
757 * Check sanity of unlimited connect data etc.
760 #define TL_CLTS_PROVIDER_FLAG (XPG4_1|SENDZERO)
761 #define TL_COTS_PROVIDER_FLAG (XPG4_1|SENDZERO)
763 static struct T_info_ack tl_cots_info_ack =
765 T_INFO_ACK, /* PRIM_type -always T_INFO_ACK */
766 T_INFINITE, /* TSDU size */
767 T_INFINITE, /* ETSDU size */
768 T_INFINITE, /* CDATA_size */
769 T_INFINITE, /* DDATA_size */
770 T_INFINITE, /* ADDR_size */
771 T_INFINITE, /* OPT_size */
772 0, /* TIDU_size - fill at run time */
773 T_COTS, /* SERV_type */
774 -1, /* CURRENT_state */
775 TL_COTS_PROVIDER_FLAG /* PROVIDER_flag */
778 static struct T_info_ack tl_clts_info_ack =
780 T_INFO_ACK, /* PRIM_type - always T_INFO_ACK */
781 0, /* TSDU_size - fill at run time */
782 -2, /* ETSDU_size -2 => not supported */
783 -2, /* CDATA_size -2 => not supported */
784 -2, /* DDATA_size -2 => not supported */
785 -1, /* ADDR_size -1 => infinite */
786 -1, /* OPT_size */
787 0, /* TIDU_size - fill at run time */
788 T_CLTS, /* SERV_type */
789 -1, /* CURRENT_state */
790 TL_CLTS_PROVIDER_FLAG /* PROVIDER_flag */
794 * private copy of devinfo pointer used in tl_info
796 static dev_info_t *tl_dip;
799 * Endpoints cache.
801 static kmem_cache_t *tl_cache;
803 * Minor number space.
805 static id_space_t *tl_minors;
808 * Default Data Unit size.
810 static t_scalar_t tl_tidusz;
813 * Size of hash tables.
815 static size_t tl_hash_size = TL_HASH_SIZE;
818 * Debug and test variable ONLY. Turn off T_CONN_IND queueing
819 * for sockets.
821 static int tl_disable_early_connect = 0;
822 static int tl_client_closing_when_accepting;
824 static int tl_serializer_noswitch;
827 * LOCAL FUNCTION PROTOTYPES
828 * -------------------------
830 static boolean_t tl_eqaddr(tl_addr_t *, tl_addr_t *);
831 static void tl_do_proto(mblk_t *, tl_endpt_t *);
832 static void tl_do_ioctl(mblk_t *, tl_endpt_t *);
833 static void tl_do_ioctl_ser(mblk_t *, tl_endpt_t *);
834 static void tl_error_ack(queue_t *, mblk_t *, t_scalar_t, t_scalar_t,
835 t_scalar_t);
836 static void tl_bind(mblk_t *, tl_endpt_t *);
837 static void tl_bind_ser(mblk_t *, tl_endpt_t *);
838 static void tl_ok_ack(queue_t *, mblk_t *mp, t_scalar_t);
839 static void tl_unbind(mblk_t *, tl_endpt_t *);
840 static void tl_optmgmt(queue_t *, mblk_t *);
841 static void tl_conn_req(queue_t *, mblk_t *);
842 static void tl_conn_req_ser(mblk_t *, tl_endpt_t *);
843 static void tl_conn_res(mblk_t *, tl_endpt_t *);
844 static void tl_discon_req(mblk_t *, tl_endpt_t *);
845 static void tl_capability_req(mblk_t *, tl_endpt_t *);
846 static void tl_info_req_ser(mblk_t *, tl_endpt_t *);
847 static void tl_addr_req_ser(mblk_t *, tl_endpt_t *);
848 static void tl_info_req(mblk_t *, tl_endpt_t *);
849 static void tl_addr_req(mblk_t *, tl_endpt_t *);
850 static void tl_connected_cots_addr_req(mblk_t *, tl_endpt_t *);
851 static void tl_data(mblk_t *, tl_endpt_t *);
852 static void tl_exdata(mblk_t *, tl_endpt_t *);
853 static void tl_ordrel(mblk_t *, tl_endpt_t *);
854 static void tl_unitdata(mblk_t *, tl_endpt_t *);
855 static void tl_unitdata_ser(mblk_t *, tl_endpt_t *);
856 static void tl_uderr(queue_t *, mblk_t *, t_scalar_t);
857 static tl_endpt_t *tl_find_peer(tl_endpt_t *, tl_addr_t *);
858 static tl_endpt_t *tl_sock_find_peer(tl_endpt_t *, struct so_ux_addr *);
859 static boolean_t tl_get_any_addr(tl_endpt_t *, tl_addr_t *);
860 static void tl_cl_backenable(tl_endpt_t *);
861 static void tl_co_unconnect(tl_endpt_t *);
862 static mblk_t *tl_resizemp(mblk_t *, ssize_t);
863 static void tl_discon_ind(tl_endpt_t *, uint32_t);
864 static mblk_t *tl_discon_ind_alloc(uint32_t, t_scalar_t);
865 static mblk_t *tl_ordrel_ind_alloc(void);
866 static tl_icon_t *tl_icon_find(tl_endpt_t *, t_scalar_t);
867 static void tl_icon_queuemsg(tl_endpt_t *, t_scalar_t, mblk_t *);
868 static boolean_t tl_icon_hasprim(tl_endpt_t *, t_scalar_t, t_scalar_t);
869 static void tl_icon_sendmsgs(tl_endpt_t *, mblk_t **);
870 static void tl_icon_freemsgs(mblk_t **);
871 static void tl_merror(queue_t *, mblk_t *, int);
872 static void tl_fill_option(uchar_t *, cred_t *, pid_t, int, cred_t *);
873 static int tl_default_opt(queue_t *, int, int, uchar_t *);
874 static int tl_get_opt(queue_t *, int, int, uchar_t *);
875 static int tl_set_opt(queue_t *, uint_t, int, int, uint_t, uchar_t *, uint_t *,
876 uchar_t *, void *, cred_t *);
877 static void tl_memrecover(queue_t *, mblk_t *, size_t);
878 static void tl_freetip(tl_endpt_t *, tl_icon_t *);
879 static void tl_free(tl_endpt_t *);
880 static int tl_constructor(void *, void *, int);
881 static void tl_destructor(void *, void *);
882 static void tl_find_callback(mod_hash_key_t, mod_hash_val_t);
883 static tl_serializer_t *tl_serializer_alloc(int);
884 static void tl_serializer_refhold(tl_serializer_t *);
885 static void tl_serializer_refrele(tl_serializer_t *);
886 static void tl_serializer_enter(tl_endpt_t *, tlproc_t, mblk_t *);
887 static void tl_serializer_exit(tl_endpt_t *);
888 static boolean_t tl_noclose(tl_endpt_t *);
889 static void tl_closeok(tl_endpt_t *);
890 static void tl_refhold(tl_endpt_t *);
891 static void tl_refrele(tl_endpt_t *);
892 static int tl_hash_cmp_addr(mod_hash_key_t, mod_hash_key_t);
893 static uint_t tl_hash_by_addr(void *, mod_hash_key_t);
894 static void tl_close_ser(mblk_t *, tl_endpt_t *);
895 static void tl_close_finish_ser(mblk_t *, tl_endpt_t *);
896 static void tl_wput_data_ser(mblk_t *, tl_endpt_t *);
897 static void tl_proto_ser(mblk_t *, tl_endpt_t *);
898 static void tl_putq_ser(mblk_t *, tl_endpt_t *);
899 static void tl_wput_common_ser(mblk_t *, tl_endpt_t *);
900 static void tl_wput_ser(mblk_t *, tl_endpt_t *);
901 static void tl_wsrv_ser(mblk_t *, tl_endpt_t *);
902 static void tl_rsrv_ser(mblk_t *, tl_endpt_t *);
903 static void tl_addr_unbind(tl_endpt_t *);
906 * Intialize option database object for TL
909 optdb_obj_t tl_opt_obj = {
910 tl_default_opt, /* TL default value function pointer */
911 tl_get_opt, /* TL get function pointer */
912 tl_set_opt, /* TL set function pointer */
913 TL_OPT_ARR_CNT, /* TL option database count of entries */
914 tl_opt_arr, /* TL option database */
915 TL_VALID_LEVELS_CNT, /* TL valid level count of entries */
916 tl_valid_levels_arr /* TL valid level array */
920 * LOCAL FUNCTIONS AND DRIVER ENTRY POINTS
921 * ---------------------------------------
925 * Loadable module routines
928 _init(void)
930 return (mod_install(&modlinkage));
934 _fini(void)
936 return (mod_remove(&modlinkage));
940 _info(struct modinfo *modinfop)
942 return (mod_info(&modlinkage, modinfop));
946 * Driver Entry Points and Other routines
948 static int
949 tl_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
951 int i;
952 char name[32];
955 * Resume from a checkpoint state.
957 if (cmd == DDI_RESUME)
958 return (DDI_SUCCESS);
960 if (cmd != DDI_ATTACH)
961 return (DDI_FAILURE);
964 * Deduce TIDU size to use. Note: "strmsgsz" being 0 has semantics that
965 * streams message sizes can be unlimited. We use a defined constant
966 * instead.
968 tl_tidusz = strmsgsz != 0 ? (t_scalar_t)strmsgsz : TL_TIDUSZ;
971 * Create subdevices for each transport.
973 for (i = 0; i < TL_UNUSED; i++) {
974 if (ddi_create_minor_node(devi,
975 tl_transports[i].tr_name,
976 S_IFCHR, tl_transports[i].tr_minor,
977 DDI_PSEUDO, 0) == DDI_FAILURE) {
978 ddi_remove_minor_node(devi, NULL);
979 return (DDI_FAILURE);
983 tl_cache = kmem_cache_create("tl_cache", sizeof (tl_endpt_t),
984 0, tl_constructor, tl_destructor, NULL, NULL, NULL, 0);
986 if (tl_cache == NULL) {
987 ddi_remove_minor_node(devi, NULL);
988 return (DDI_FAILURE);
991 tl_minors = id_space_create("tl_minor_space",
992 TL_MINOR_START, MAXMIN32 - TL_MINOR_START + 1);
995 * Create ID space for minor numbers
997 for (i = 0; i < TL_MAXTRANSPORT; i++) {
998 tl_transport_state_t *t = &tl_transports[i];
1000 if (i == TL_UNUSED)
1001 continue;
1003 /* Socket COTSORD shares namespace with COTS */
1004 if (i == TL_SOCK_COTSORD) {
1005 t->tr_ai_hash =
1006 tl_transports[TL_SOCK_COTS].tr_ai_hash;
1007 ASSERT(t->tr_ai_hash != NULL);
1008 t->tr_addr_hash =
1009 tl_transports[TL_SOCK_COTS].tr_addr_hash;
1010 ASSERT(t->tr_addr_hash != NULL);
1011 continue;
1015 * Create hash tables.
1017 (void) snprintf(name, sizeof (name), "%s_ai_hash",
1018 t->tr_name);
1019 #ifdef _ILP32
1020 if (i & TL_SOCKET)
1021 t->tr_ai_hash =
1022 mod_hash_create_idhash(name, tl_hash_size - 1,
1023 mod_hash_null_valdtor);
1024 else
1025 t->tr_ai_hash =
1026 mod_hash_create_ptrhash(name, tl_hash_size,
1027 mod_hash_null_valdtor, sizeof (queue_t));
1028 #else
1029 t->tr_ai_hash =
1030 mod_hash_create_idhash(name, tl_hash_size - 1,
1031 mod_hash_null_valdtor);
1032 #endif /* _ILP32 */
1034 if (i & TL_SOCKET) {
1035 (void) snprintf(name, sizeof (name), "%s_sockaddr_hash",
1036 t->tr_name);
1037 t->tr_addr_hash = mod_hash_create_ptrhash(name,
1038 tl_hash_size, mod_hash_null_valdtor,
1039 sizeof (uintptr_t));
1040 } else {
1041 (void) snprintf(name, sizeof (name), "%s_addr_hash",
1042 t->tr_name);
1043 t->tr_addr_hash = mod_hash_create_extended(name,
1044 tl_hash_size, mod_hash_null_keydtor,
1045 mod_hash_null_valdtor,
1046 tl_hash_by_addr, NULL, tl_hash_cmp_addr, KM_SLEEP);
1049 /* Create serializer for connectionless transports. */
1050 if (i & TL_TICLTS)
1051 t->tr_serializer = tl_serializer_alloc(KM_SLEEP);
1054 tl_dip = devi;
1056 return (DDI_SUCCESS);
1059 static int
1060 tl_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
1062 int i;
1064 if (cmd == DDI_SUSPEND)
1065 return (DDI_SUCCESS);
1067 if (cmd != DDI_DETACH)
1068 return (DDI_FAILURE);
1071 * Destroy arenas and hash tables.
1073 for (i = 0; i < TL_MAXTRANSPORT; i++) {
1074 tl_transport_state_t *t = &tl_transports[i];
1076 if ((i == TL_UNUSED) || (i == TL_SOCK_COTSORD))
1077 continue;
1079 EQUIV(i & TL_TICLTS, t->tr_serializer != NULL);
1080 if (t->tr_serializer != NULL) {
1081 tl_serializer_refrele(t->tr_serializer);
1082 t->tr_serializer = NULL;
1085 #ifdef _ILP32
1086 if (i & TL_SOCKET)
1087 mod_hash_destroy_idhash(t->tr_ai_hash);
1088 else
1089 mod_hash_destroy_ptrhash(t->tr_ai_hash);
1090 #else
1091 mod_hash_destroy_idhash(t->tr_ai_hash);
1092 #endif /* _ILP32 */
1093 t->tr_ai_hash = NULL;
1094 if (i & TL_SOCKET)
1095 mod_hash_destroy_ptrhash(t->tr_addr_hash);
1096 else
1097 mod_hash_destroy_hash(t->tr_addr_hash);
1098 t->tr_addr_hash = NULL;
1101 kmem_cache_destroy(tl_cache);
1102 tl_cache = NULL;
1103 id_space_destroy(tl_minors);
1104 tl_minors = NULL;
1105 ddi_remove_minor_node(devi, NULL);
1106 return (DDI_SUCCESS);
1109 /* ARGSUSED */
1110 static int
1111 tl_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
1114 int retcode = DDI_FAILURE;
1116 switch (infocmd) {
1118 case DDI_INFO_DEVT2DEVINFO:
1119 if (tl_dip != NULL) {
1120 *result = (void *)tl_dip;
1121 retcode = DDI_SUCCESS;
1123 break;
1125 case DDI_INFO_DEVT2INSTANCE:
1126 *result = NULL;
1127 retcode = DDI_SUCCESS;
1128 break;
1130 default:
1131 break;
1133 return (retcode);
1137 * Endpoint reference management.
1139 static void
1140 tl_refhold(tl_endpt_t *tep)
1142 atomic_inc_32(&tep->te_refcnt);
1145 static void
1146 tl_refrele(tl_endpt_t *tep)
1148 ASSERT(tep->te_refcnt != 0);
1150 if (atomic_dec_32_nv(&tep->te_refcnt) == 0)
1151 tl_free(tep);
1154 /*ARGSUSED*/
1155 static int
1156 tl_constructor(void *buf, void *cdrarg, int kmflags)
1158 tl_endpt_t *tep = buf;
1160 bzero(tep, sizeof (tl_endpt_t));
1161 mutex_init(&tep->te_closelock, NULL, MUTEX_DEFAULT, NULL);
1162 cv_init(&tep->te_closecv, NULL, CV_DEFAULT, NULL);
1163 mutex_init(&tep->te_srv_lock, NULL, MUTEX_DEFAULT, NULL);
1164 cv_init(&tep->te_srv_cv, NULL, CV_DEFAULT, NULL);
1165 mutex_init(&tep->te_ser_lock, NULL, MUTEX_DEFAULT, NULL);
1167 return (0);
1170 /*ARGSUSED*/
1171 static void
1172 tl_destructor(void *buf, void *cdrarg)
1174 tl_endpt_t *tep = buf;
1176 mutex_destroy(&tep->te_closelock);
1177 cv_destroy(&tep->te_closecv);
1178 mutex_destroy(&tep->te_srv_lock);
1179 cv_destroy(&tep->te_srv_cv);
1180 mutex_destroy(&tep->te_ser_lock);
1183 static void
1184 tl_free(tl_endpt_t *tep)
1186 ASSERT(tep->te_refcnt == 0);
1187 ASSERT(tep->te_transport != NULL);
1188 ASSERT(tep->te_rq == NULL);
1189 ASSERT(tep->te_wq == NULL);
1190 ASSERT(tep->te_ser != NULL);
1191 ASSERT(tep->te_ser_count == 0);
1192 ASSERT(! (tep->te_flag & TL_ADDRHASHED));
1194 if (IS_SOCKET(tep)) {
1195 ASSERT(tep->te_alen == TL_SOUX_ADDRLEN);
1196 ASSERT(tep->te_abuf == &tep->te_uxaddr);
1197 ASSERT(tep->te_vp == (void *)(uintptr_t)tep->te_minor);
1198 ASSERT(tep->te_magic == SOU_MAGIC_IMPLICIT);
1199 } else if (tep->te_abuf != NULL) {
1200 kmem_free(tep->te_abuf, tep->te_alen);
1201 tep->te_alen = -1; /* uninitialized */
1202 tep->te_abuf = NULL;
1203 } else {
1204 ASSERT(tep->te_alen == -1);
1207 id_free(tl_minors, tep->te_minor);
1208 ASSERT(tep->te_credp == NULL);
1210 if (tep->te_hash_hndl != NULL)
1211 mod_hash_cancel(tep->te_addrhash, &tep->te_hash_hndl);
1213 if (IS_COTS(tep)) {
1214 TL_REMOVE_PEER(tep->te_conp);
1215 TL_REMOVE_PEER(tep->te_oconp);
1216 tl_serializer_refrele(tep->te_ser);
1217 tep->te_ser = NULL;
1218 ASSERT(tep->te_nicon == 0);
1219 ASSERT(list_head(&tep->te_iconp) == NULL);
1220 } else {
1221 ASSERT(tep->te_lastep == NULL);
1222 ASSERT(list_head(&tep->te_flowlist) == NULL);
1223 ASSERT(tep->te_flowq == NULL);
1226 ASSERT(tep->te_bufcid == 0);
1227 ASSERT(tep->te_timoutid == 0);
1228 bzero(&tep->te_ap, sizeof (tep->te_ap));
1229 tep->te_acceptor_id = 0;
1231 ASSERT(tep->te_closewait == 0);
1232 ASSERT(!tep->te_rsrv_active);
1233 ASSERT(!tep->te_wsrv_active);
1234 tep->te_closing = 0;
1235 tep->te_nowsrv = B_FALSE;
1236 tep->te_flag = 0;
1238 kmem_cache_free(tl_cache, tep);
1242 * Allocate/free reference-counted wrappers for serializers.
1244 static tl_serializer_t *
1245 tl_serializer_alloc(int flags)
1247 tl_serializer_t *s = kmem_alloc(sizeof (tl_serializer_t), flags);
1248 serializer_t *ser;
1250 if (s == NULL)
1251 return (NULL);
1253 ser = serializer_create(flags);
1255 if (ser == NULL) {
1256 kmem_free(s, sizeof (tl_serializer_t));
1257 return (NULL);
1260 s->ts_refcnt = 1;
1261 s->ts_serializer = ser;
1262 return (s);
1265 static void
1266 tl_serializer_refhold(tl_serializer_t *s)
1268 atomic_inc_32(&s->ts_refcnt);
1271 static void
1272 tl_serializer_refrele(tl_serializer_t *s)
1274 if (atomic_dec_32_nv(&s->ts_refcnt) == 0) {
1275 serializer_destroy(s->ts_serializer);
1276 kmem_free(s, sizeof (tl_serializer_t));
1281 * Post a request on the endpoint serializer. For COTS transports keep track of
1282 * the number of pending requests.
1284 static void
1285 tl_serializer_enter(tl_endpt_t *tep, tlproc_t tlproc, mblk_t *mp)
1287 if (IS_COTS(tep)) {
1288 mutex_enter(&tep->te_ser_lock);
1289 tep->te_ser_count++;
1290 mutex_exit(&tep->te_ser_lock);
1292 serializer_enter(tep->te_serializer, (srproc_t *)tlproc, mp, tep);
1296 * Complete processing the request on the serializer. Decrement the counter for
1297 * pending requests for COTS transports.
1299 static void
1300 tl_serializer_exit(tl_endpt_t *tep)
1302 if (IS_COTS(tep)) {
1303 mutex_enter(&tep->te_ser_lock);
1304 ASSERT(tep->te_ser_count != 0);
1305 tep->te_ser_count--;
1306 mutex_exit(&tep->te_ser_lock);
1311 * Hash management functions.
1315 * Return TRUE if two addresses are equal, false otherwise.
1317 static boolean_t
1318 tl_eqaddr(tl_addr_t *ap1, tl_addr_t *ap2)
1320 return ((ap1->ta_alen > 0) &&
1321 (ap1->ta_alen == ap2->ta_alen) &&
1322 (ap1->ta_zoneid == ap2->ta_zoneid) &&
1323 (bcmp(ap1->ta_abuf, ap2->ta_abuf, ap1->ta_alen) == 0));
1327 * This function is called whenever an endpoint is found in the hash table.
1329 /* ARGSUSED0 */
1330 static void
1331 tl_find_callback(mod_hash_key_t key, mod_hash_val_t val)
1333 tl_refhold((tl_endpt_t *)val);
1337 * Address hash function.
1339 /* ARGSUSED */
1340 static uint_t
1341 tl_hash_by_addr(void *hash_data, mod_hash_key_t key)
1343 tl_addr_t *ap = (tl_addr_t *)key;
1344 size_t len = ap->ta_alen;
1345 uchar_t *p = ap->ta_abuf;
1346 uint_t i, g;
1348 ASSERT((len > 0) && (p != NULL));
1350 for (i = ap->ta_zoneid; len -- != 0; p++) {
1351 i = (i << 4) + (*p);
1352 if ((g = (i & 0xf0000000U)) != 0) {
1353 i ^= (g >> 24);
1354 i ^= g;
1357 return (i);
1361 * This function is used by hash lookups. It compares two generic addresses.
1363 static int
1364 tl_hash_cmp_addr(mod_hash_key_t key1, mod_hash_key_t key2)
1366 #ifdef DEBUG
1367 tl_addr_t *ap1 = (tl_addr_t *)key1;
1368 tl_addr_t *ap2 = (tl_addr_t *)key2;
1370 ASSERT(key1 != NULL);
1371 ASSERT(key2 != NULL);
1373 ASSERT(ap1->ta_abuf != NULL);
1374 ASSERT(ap2->ta_abuf != NULL);
1375 ASSERT(ap1->ta_alen > 0);
1376 ASSERT(ap2->ta_alen > 0);
1377 #endif
1379 return (! tl_eqaddr((tl_addr_t *)key1, (tl_addr_t *)key2));
1383 * Prevent endpoint from closing if possible.
1384 * Return B_TRUE on success, B_FALSE on failure.
1386 static boolean_t
1387 tl_noclose(tl_endpt_t *tep)
1389 boolean_t rc = B_FALSE;
1391 mutex_enter(&tep->te_closelock);
1392 if (! tep->te_closing) {
1393 ASSERT(tep->te_closewait == 0);
1394 tep->te_closewait++;
1395 rc = B_TRUE;
1397 mutex_exit(&tep->te_closelock);
1398 return (rc);
1402 * Allow endpoint to close if needed.
1404 static void
1405 tl_closeok(tl_endpt_t *tep)
1407 ASSERT(tep->te_closewait > 0);
1408 mutex_enter(&tep->te_closelock);
1409 ASSERT(tep->te_closewait == 1);
1410 tep->te_closewait--;
1411 cv_signal(&tep->te_closecv);
1412 mutex_exit(&tep->te_closelock);
1416 * STREAMS open entry point.
1418 /* ARGSUSED */
1419 static int
1420 tl_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp)
1422 tl_endpt_t *tep;
1423 minor_t minor = getminor(*devp);
1426 * Driver is called directly. Both CLONEOPEN and MODOPEN
1427 * are illegal
1429 if ((sflag == CLONEOPEN) || (sflag == MODOPEN))
1430 return (ENXIO);
1432 if (rq->q_ptr != NULL)
1433 return (0);
1435 /* Minor number should specify the mode used for the driver. */
1436 if ((minor >= TL_UNUSED))
1437 return (ENXIO);
1439 if (oflag & SO_SOCKSTR) {
1440 minor |= TL_SOCKET;
1443 tep = kmem_cache_alloc(tl_cache, KM_SLEEP);
1444 tep->te_refcnt = 1;
1445 tep->te_cpid = curproc->p_pid;
1446 rq->q_ptr = WR(rq)->q_ptr = tep;
1447 tep->te_state = TS_UNBND;
1448 tep->te_credp = credp;
1449 crhold(credp);
1450 tep->te_zoneid = getzoneid();
1452 tep->te_flag = minor & TL_MINOR_MASK;
1453 tep->te_transport = &tl_transports[minor];
1455 /* Allocate a unique minor number for this instance. */
1456 tep->te_minor = (minor_t)id_alloc(tl_minors);
1458 /* Reserve hash handle for bind(). */
1459 (void) mod_hash_reserve(tep->te_addrhash, &tep->te_hash_hndl);
1461 /* Transport-specific initialization */
1462 if (IS_COTS(tep)) {
1463 /* Use private serializer */
1464 tep->te_ser = tl_serializer_alloc(KM_SLEEP);
1466 /* Create list for pending connections */
1467 list_create(&tep->te_iconp, sizeof (tl_icon_t),
1468 offsetof(tl_icon_t, ti_node));
1469 tep->te_qlen = 0;
1470 tep->te_nicon = 0;
1471 tep->te_oconp = NULL;
1472 tep->te_conp = NULL;
1473 } else {
1474 /* Use shared serializer */
1475 tep->te_ser = tep->te_transport->tr_serializer;
1476 bzero(&tep->te_flows, sizeof (list_node_t));
1477 /* Create list for flow control */
1478 list_create(&tep->te_flowlist, sizeof (tl_endpt_t),
1479 offsetof(tl_endpt_t, te_flows));
1480 tep->te_flowq = NULL;
1481 tep->te_lastep = NULL;
1485 /* Initialize endpoint address */
1486 if (IS_SOCKET(tep)) {
1487 /* Socket-specific address handling. */
1488 tep->te_alen = TL_SOUX_ADDRLEN;
1489 tep->te_abuf = &tep->te_uxaddr;
1490 tep->te_vp = (void *)(uintptr_t)tep->te_minor;
1491 tep->te_magic = SOU_MAGIC_IMPLICIT;
1492 } else {
1493 tep->te_alen = -1;
1494 tep->te_abuf = NULL;
1497 /* clone the driver */
1498 *devp = makedevice(getmajor(*devp), tep->te_minor);
1500 tep->te_rq = rq;
1501 tep->te_wq = WR(rq);
1503 #ifdef _ILP32
1504 if (IS_SOCKET(tep))
1505 tep->te_acceptor_id = tep->te_minor;
1506 else
1507 tep->te_acceptor_id = (t_uscalar_t)rq;
1508 #else
1509 tep->te_acceptor_id = tep->te_minor;
1510 #endif /* _ILP32 */
1513 qprocson(rq);
1516 * Insert acceptor ID in the hash. The AI hash always sleeps on
1517 * insertion so insertion can't fail.
1519 (void) mod_hash_insert(tep->te_transport->tr_ai_hash,
1520 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1521 (mod_hash_val_t)tep);
1523 return (0);
1526 /* ARGSUSED1 */
1527 static int
1528 tl_close(queue_t *rq, int flag, cred_t *credp)
1530 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
1531 tl_endpt_t *elp = NULL;
1532 queue_t *wq = tep->te_wq;
1533 int rc;
1535 ASSERT(wq == WR(rq));
1538 * Remove the endpoint from acceptor hash.
1540 rc = mod_hash_remove(tep->te_transport->tr_ai_hash,
1541 (mod_hash_key_t)(uintptr_t)tep->te_acceptor_id,
1542 (mod_hash_val_t *)&elp);
1543 ASSERT(rc == 0 && tep == elp);
1544 if ((rc != 0) || (tep != elp)) {
1545 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1546 SL_TRACE|SL_ERROR,
1547 "tl_close:inconsistency in AI hash"));
1551 * Wait till close is safe, then mark endpoint as closing.
1553 mutex_enter(&tep->te_closelock);
1554 while (tep->te_closewait)
1555 cv_wait(&tep->te_closecv, &tep->te_closelock);
1556 tep->te_closing = B_TRUE;
1558 * Will wait for the serializer part of the close to finish, so set
1559 * te_closewait now.
1561 tep->te_closewait = 1;
1562 tep->te_nowsrv = B_FALSE;
1563 mutex_exit(&tep->te_closelock);
1566 * tl_close_ser doesn't drop reference, so no need to tl_refhold.
1567 * It is safe because close will wait for tl_close_ser to finish.
1569 tl_serializer_enter(tep, tl_close_ser, &tep->te_closemp);
1572 * Wait for the first phase of close to complete before qprocsoff().
1574 mutex_enter(&tep->te_closelock);
1575 while (tep->te_closewait)
1576 cv_wait(&tep->te_closecv, &tep->te_closelock);
1577 mutex_exit(&tep->te_closelock);
1579 qprocsoff(rq);
1581 if (tep->te_bufcid) {
1582 qunbufcall(rq, tep->te_bufcid);
1583 tep->te_bufcid = 0;
1585 if (tep->te_timoutid) {
1586 (void) quntimeout(rq, tep->te_timoutid);
1587 tep->te_timoutid = 0;
1591 * Finish close behind serializer.
1593 * For a CLTS endpoint increase a refcount and continue close processing
1594 * with serializer protection. This processing may happen asynchronously
1595 * with the completion of tl_close().
1597 * Fot a COTS endpoint wait before destroying tep since the serializer
1598 * may go away together with tep and we need to destroy serializer
1599 * outside of serializer context.
1601 ASSERT(tep->te_closewait == 0);
1602 if (IS_COTS(tep))
1603 tep->te_closewait = 1;
1604 else
1605 tl_refhold(tep);
1607 tl_serializer_enter(tep, tl_close_finish_ser, &tep->te_closemp);
1610 * For connection-oriented transports wait for all serializer activity
1611 * to settle down.
1613 if (IS_COTS(tep)) {
1614 mutex_enter(&tep->te_closelock);
1615 while (tep->te_closewait)
1616 cv_wait(&tep->te_closecv, &tep->te_closelock);
1617 mutex_exit(&tep->te_closelock);
1620 crfree(tep->te_credp);
1621 tep->te_credp = NULL;
1622 tep->te_wq = NULL;
1623 tl_refrele(tep);
1625 * tep is likely to be destroyed now, so can't reference it any more.
1628 rq->q_ptr = wq->q_ptr = NULL;
1629 return (0);
1633 * First phase of close processing done behind the serializer.
1635 * Do not drop the reference in the end - tl_close() wants this reference to
1636 * stay.
1638 /* ARGSUSED0 */
1639 static void
1640 tl_close_ser(mblk_t *mp, tl_endpt_t *tep)
1642 ASSERT(tep->te_closing);
1643 ASSERT(tep->te_closewait == 1);
1644 ASSERT(!(tep->te_flag & TL_CLOSE_SER));
1646 tep->te_flag |= TL_CLOSE_SER;
1649 * Drain out all messages on queue except for TL_TICOTS where the
1650 * abortive release semantics permit discarding of data on close
1652 if (tep->te_wq->q_first && (IS_CLTS(tep) || IS_COTSORD(tep))) {
1653 tl_wsrv_ser(NULL, tep);
1656 /* Remove address from hash table. */
1657 tl_addr_unbind(tep);
1659 * qprocsoff() gets confused when q->q_next is not NULL on the write
1660 * queue of the driver, so clear these before qprocsoff() is called.
1661 * Also clear q_next for the peer since this queue is going away.
1663 if (IS_COTS(tep) && !IS_SOCKET(tep)) {
1664 tl_endpt_t *peer_tep = tep->te_conp;
1666 tep->te_wq->q_next = NULL;
1667 if ((peer_tep != NULL) && !peer_tep->te_closing)
1668 peer_tep->te_wq->q_next = NULL;
1671 tep->te_rq = NULL;
1673 /* wake up tl_close() */
1674 tl_closeok(tep);
1675 tl_serializer_exit(tep);
1679 * Second phase of tl_close(). Should wakeup tl_close() for COTS mode and drop
1680 * the reference for CLTS.
1682 * Called from serializer. Should drop reference count for CLTS only.
1684 /* ARGSUSED0 */
1685 static void
1686 tl_close_finish_ser(mblk_t *mp, tl_endpt_t *tep)
1688 ASSERT(tep->te_closing);
1689 IMPLY(IS_CLTS(tep), tep->te_closewait == 0);
1690 IMPLY(IS_COTS(tep), tep->te_closewait == 1);
1692 tep->te_state = -1; /* Uninitialized */
1693 if (IS_COTS(tep)) {
1694 tl_co_unconnect(tep);
1695 } else {
1696 /* Connectionless specific cleanup */
1697 TL_REMOVE_PEER(tep->te_lastep);
1699 * Backenable anybody that is flow controlled waiting for
1700 * this endpoint.
1702 tl_cl_backenable(tep);
1703 if (tep->te_flowq != NULL) {
1704 list_remove(&(tep->te_flowq->te_flowlist), tep);
1705 tep->te_flowq = NULL;
1709 tl_serializer_exit(tep);
1710 if (IS_COTS(tep))
1711 tl_closeok(tep);
1712 else
1713 tl_refrele(tep);
1717 * STREAMS write-side put procedure.
1718 * Enter serializer for most of the processing.
1720 * The T_CONN_REQ is processed outside of serializer.
1722 static void
1723 tl_wput(queue_t *wq, mblk_t *mp)
1725 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1726 ssize_t msz = MBLKL(mp);
1727 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
1728 tlproc_t *tl_proc = NULL;
1730 switch (DB_TYPE(mp)) {
1731 case M_DATA:
1732 /* Only valid for connection-oriented transports */
1733 if (IS_CLTS(tep)) {
1734 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1735 SL_TRACE|SL_ERROR,
1736 "tl_wput:M_DATA invalid for ticlts driver"));
1737 tl_merror(wq, mp, EPROTO);
1738 return;
1740 tl_proc = tl_wput_data_ser;
1741 break;
1743 case M_IOCTL:
1744 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
1745 case TL_IOC_CREDOPT:
1746 /* FALLTHROUGH */
1747 case TL_IOC_UCREDOPT:
1749 * Serialize endpoint state change.
1751 tl_proc = tl_do_ioctl_ser;
1752 break;
1754 default:
1755 miocnak(wq, mp, 0, EINVAL);
1756 return;
1758 break;
1760 case M_FLUSH:
1762 * do canonical M_FLUSH processing
1764 if (*mp->b_rptr & FLUSHW) {
1765 flushq(wq, FLUSHALL);
1766 *mp->b_rptr &= ~FLUSHW;
1768 if (*mp->b_rptr & FLUSHR) {
1769 flushq(RD(wq), FLUSHALL);
1770 qreply(wq, mp);
1771 } else {
1772 freemsg(mp);
1774 return;
1776 case M_PROTO:
1777 if (msz < sizeof (prim->type)) {
1778 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1779 SL_TRACE|SL_ERROR,
1780 "tl_wput:M_PROTO data too short"));
1781 tl_merror(wq, mp, EPROTO);
1782 return;
1784 switch (prim->type) {
1785 case T_OPTMGMT_REQ:
1786 case T_SVR4_OPTMGMT_REQ:
1788 * Process TPI option management requests immediately
1789 * in put procedure regardless of in-order processing
1790 * of already queued messages.
1791 * (Note: This driver supports AF_UNIX socket
1792 * implementation. Unless we implement this processing,
1793 * setsockopt() on socket endpoint will block on flow
1794 * controlled endpoints which it should not. That is
1795 * required for successful execution of VSU socket tests
1796 * and is consistent with BSD socket behavior).
1798 tl_optmgmt(wq, mp);
1799 return;
1800 case O_T_BIND_REQ:
1801 case T_BIND_REQ:
1802 tl_proc = tl_bind_ser;
1803 break;
1804 case T_CONN_REQ:
1805 if (IS_CLTS(tep)) {
1806 tl_merror(wq, mp, EPROTO);
1807 return;
1809 tl_conn_req(wq, mp);
1810 return;
1811 case T_DATA_REQ:
1812 case T_OPTDATA_REQ:
1813 case T_EXDATA_REQ:
1814 case T_ORDREL_REQ:
1815 tl_proc = tl_putq_ser;
1816 break;
1817 case T_UNITDATA_REQ:
1818 if (IS_COTS(tep) ||
1819 (msz < sizeof (struct T_unitdata_req))) {
1820 tl_merror(wq, mp, EPROTO);
1821 return;
1823 if ((tep->te_state == TS_IDLE) && !wq->q_first) {
1824 tl_proc = tl_unitdata_ser;
1825 } else {
1826 tl_proc = tl_putq_ser;
1828 break;
1829 default:
1831 * process in service procedure if message already
1832 * queued (maintain in-order processing)
1834 if (wq->q_first != NULL) {
1835 tl_proc = tl_putq_ser;
1836 } else {
1837 tl_proc = tl_wput_ser;
1839 break;
1841 break;
1843 case M_PCPROTO:
1845 * Check that the message has enough data to figure out TPI
1846 * primitive.
1848 if (msz < sizeof (prim->type)) {
1849 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1850 SL_TRACE|SL_ERROR,
1851 "tl_wput:M_PCROTO data too short"));
1852 tl_merror(wq, mp, EPROTO);
1853 return;
1855 switch (prim->type) {
1856 case T_CAPABILITY_REQ:
1857 tl_capability_req(mp, tep);
1858 return;
1859 case T_INFO_REQ:
1860 tl_proc = tl_info_req_ser;
1861 break;
1862 case T_ADDR_REQ:
1863 tl_proc = tl_addr_req_ser;
1864 break;
1866 default:
1867 (void) (STRLOG(TL_ID, tep->te_minor, 1,
1868 SL_TRACE|SL_ERROR,
1869 "tl_wput:unknown TPI msg primitive"));
1870 tl_merror(wq, mp, EPROTO);
1871 return;
1873 break;
1874 default:
1875 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
1876 "tl_wput:default:unexpected Streams message"));
1877 freemsg(mp);
1878 return;
1882 * Continue processing via serializer.
1884 ASSERT(tl_proc != NULL);
1885 tl_refhold(tep);
1886 tl_serializer_enter(tep, tl_proc, mp);
1890 * Place message on the queue while preserving order.
1892 static void
1893 tl_putq_ser(mblk_t *mp, tl_endpt_t *tep)
1895 if (tep->te_closing) {
1896 tl_wput_ser(mp, tep);
1897 } else {
1898 TL_PUTQ(tep, mp);
1899 tl_serializer_exit(tep);
1900 tl_refrele(tep);
1905 static void
1906 tl_wput_common_ser(mblk_t *mp, tl_endpt_t *tep)
1908 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
1910 switch (DB_TYPE(mp)) {
1911 case M_DATA:
1912 tl_data(mp, tep);
1913 break;
1914 case M_PROTO:
1915 tl_do_proto(mp, tep);
1916 break;
1917 default:
1918 freemsg(mp);
1919 break;
1924 * Write side put procedure called from serializer.
1926 static void
1927 tl_wput_ser(mblk_t *mp, tl_endpt_t *tep)
1929 tl_wput_common_ser(mp, tep);
1930 tl_serializer_exit(tep);
1931 tl_refrele(tep);
1935 * M_DATA processing. Called from serializer.
1937 static void
1938 tl_wput_data_ser(mblk_t *mp, tl_endpt_t *tep)
1940 tl_endpt_t *peer_tep = tep->te_conp;
1941 queue_t *peer_rq;
1943 ASSERT(DB_TYPE(mp) == M_DATA);
1944 ASSERT(IS_COTS(tep));
1946 IMPLY(peer_tep, tep->te_serializer == peer_tep->te_serializer);
1949 * fastpath for data. Ignore flow control if tep is closing.
1951 if ((peer_tep != NULL) &&
1952 !peer_tep->te_closing &&
1953 ((tep->te_state == TS_DATA_XFER) ||
1954 (tep->te_state == TS_WREQ_ORDREL)) &&
1955 (tep->te_wq != NULL) &&
1956 (tep->te_wq->q_first == NULL) &&
1957 ((peer_tep->te_state == TS_DATA_XFER) ||
1958 (peer_tep->te_state == TS_WREQ_ORDREL)) &&
1959 ((peer_rq = peer_tep->te_rq) != NULL) &&
1960 (canputnext(peer_rq) || tep->te_closing)) {
1961 putnext(peer_rq, mp);
1962 } else if (tep->te_closing) {
1964 * It is possible that by the time we got here tep started to
1965 * close. If the write queue is not empty, and the state is
1966 * TS_DATA_XFER the data should be delivered in order, so we
1967 * call putq() instead of freeing the data.
1969 if ((tep->te_wq != NULL) &&
1970 ((tep->te_state == TS_DATA_XFER) ||
1971 (tep->te_state == TS_WREQ_ORDREL))) {
1972 TL_PUTQ(tep, mp);
1973 } else {
1974 freemsg(mp);
1976 } else {
1977 TL_PUTQ(tep, mp);
1980 tl_serializer_exit(tep);
1981 tl_refrele(tep);
1985 * Write side service routine.
1987 * All actual processing happens within serializer which is entered
1988 * synchronously. It is possible that by the time tl_wsrv() wakes up, some new
1989 * messages that need processing may have arrived, so tl_wsrv repeats until
1990 * queue is empty or te_nowsrv is set.
1992 static void
1993 tl_wsrv(queue_t *wq)
1995 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
1997 while ((wq->q_first != NULL) && !tep->te_nowsrv) {
1998 mutex_enter(&tep->te_srv_lock);
1999 ASSERT(tep->te_wsrv_active == B_FALSE);
2000 tep->te_wsrv_active = B_TRUE;
2001 mutex_exit(&tep->te_srv_lock);
2003 tl_serializer_enter(tep, tl_wsrv_ser, &tep->te_wsrvmp);
2006 * Wait for serializer job to complete.
2008 mutex_enter(&tep->te_srv_lock);
2009 while (tep->te_wsrv_active) {
2010 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2012 cv_signal(&tep->te_srv_cv);
2013 mutex_exit(&tep->te_srv_lock);
2018 * Serialized write side processing of the STREAMS queue.
2019 * May be called either from tl_wsrv() or from tl_close() in which case ser_mp
2020 * is NULL.
2022 static void
2023 tl_wsrv_ser(mblk_t *ser_mp, tl_endpt_t *tep)
2025 mblk_t *mp;
2026 queue_t *wq = tep->te_wq;
2028 ASSERT(wq != NULL);
2029 while (!tep->te_nowsrv && (mp = getq(wq)) != NULL) {
2030 tl_wput_common_ser(mp, tep);
2034 * Wakeup service routine unless called from close.
2035 * If ser_mp is specified, the caller is tl_wsrv().
2036 * Otherwise, the caller is tl_close_ser(). Since tl_close_ser() doesn't
2037 * call tl_serializer_enter() before calling tl_wsrv_ser(), there should
2038 * be no matching tl_serializer_exit() in this case.
2039 * Also, there is no need to wakeup anyone since tl_close_ser() is not
2040 * waiting on te_srv_cv.
2042 if (ser_mp != NULL) {
2044 * We are called from tl_wsrv.
2046 mutex_enter(&tep->te_srv_lock);
2047 ASSERT(tep->te_wsrv_active);
2048 tep->te_wsrv_active = B_FALSE;
2049 cv_signal(&tep->te_srv_cv);
2050 mutex_exit(&tep->te_srv_lock);
2051 tl_serializer_exit(tep);
2056 * Called when the stream is backenabled. Enter serializer and qenable everyone
2057 * flow controlled by tep.
2059 * NOTE: The service routine should enter serializer synchronously. Otherwise it
2060 * is possible that two instances of tl_rsrv will be running reusing the same
2061 * rsrv mblk.
2063 static void
2064 tl_rsrv(queue_t *rq)
2066 tl_endpt_t *tep = (tl_endpt_t *)rq->q_ptr;
2068 ASSERT(rq->q_first == NULL);
2069 ASSERT(tep->te_rsrv_active == 0);
2071 tep->te_rsrv_active = B_TRUE;
2072 tl_serializer_enter(tep, tl_rsrv_ser, &tep->te_rsrvmp);
2074 * Wait for serializer job to complete.
2076 mutex_enter(&tep->te_srv_lock);
2077 while (tep->te_rsrv_active) {
2078 cv_wait(&tep->te_srv_cv, &tep->te_srv_lock);
2080 cv_signal(&tep->te_srv_cv);
2081 mutex_exit(&tep->te_srv_lock);
2084 /* ARGSUSED */
2085 static void
2086 tl_rsrv_ser(mblk_t *mp, tl_endpt_t *tep)
2088 tl_endpt_t *peer_tep;
2090 if (IS_CLTS(tep) && tep->te_state == TS_IDLE) {
2091 tl_cl_backenable(tep);
2092 } else if (
2093 IS_COTS(tep) &&
2094 ((peer_tep = tep->te_conp) != NULL) &&
2095 !peer_tep->te_closing &&
2096 ((tep->te_state == TS_DATA_XFER) ||
2097 (tep->te_state == TS_WIND_ORDREL)||
2098 (tep->te_state == TS_WREQ_ORDREL))) {
2099 TL_QENABLE(peer_tep);
2103 * Wakeup read side service routine.
2105 mutex_enter(&tep->te_srv_lock);
2106 ASSERT(tep->te_rsrv_active);
2107 tep->te_rsrv_active = B_FALSE;
2108 cv_signal(&tep->te_srv_cv);
2109 mutex_exit(&tep->te_srv_lock);
2110 tl_serializer_exit(tep);
2114 * process M_PROTO messages. Always called from serializer.
2116 static void
2117 tl_do_proto(mblk_t *mp, tl_endpt_t *tep)
2119 ssize_t msz = MBLKL(mp);
2120 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
2122 /* Message size was validated by tl_wput(). */
2123 ASSERT(msz >= sizeof (prim->type));
2125 switch (prim->type) {
2126 case T_UNBIND_REQ:
2127 tl_unbind(mp, tep);
2128 break;
2130 case T_ADDR_REQ:
2131 tl_addr_req(mp, tep);
2132 break;
2134 case O_T_CONN_RES:
2135 case T_CONN_RES:
2136 if (IS_CLTS(tep)) {
2137 tl_merror(tep->te_wq, mp, EPROTO);
2138 break;
2140 tl_conn_res(mp, tep);
2141 break;
2143 case T_DISCON_REQ:
2144 if (IS_CLTS(tep)) {
2145 tl_merror(tep->te_wq, mp, EPROTO);
2146 break;
2148 tl_discon_req(mp, tep);
2149 break;
2151 case T_DATA_REQ:
2152 if (IS_CLTS(tep)) {
2153 tl_merror(tep->te_wq, mp, EPROTO);
2154 break;
2156 tl_data(mp, tep);
2157 break;
2159 case T_OPTDATA_REQ:
2160 if (IS_CLTS(tep)) {
2161 tl_merror(tep->te_wq, mp, EPROTO);
2162 break;
2164 tl_data(mp, tep);
2165 break;
2167 case T_EXDATA_REQ:
2168 if (IS_CLTS(tep)) {
2169 tl_merror(tep->te_wq, mp, EPROTO);
2170 break;
2172 tl_exdata(mp, tep);
2173 break;
2175 case T_ORDREL_REQ:
2176 if (! IS_COTSORD(tep)) {
2177 tl_merror(tep->te_wq, mp, EPROTO);
2178 break;
2180 tl_ordrel(mp, tep);
2181 break;
2183 case T_UNITDATA_REQ:
2184 if (IS_COTS(tep)) {
2185 tl_merror(tep->te_wq, mp, EPROTO);
2186 break;
2188 tl_unitdata(mp, tep);
2189 break;
2191 default:
2192 tl_merror(tep->te_wq, mp, EPROTO);
2193 break;
2198 * Process ioctl from serializer.
2199 * This is a wrapper around tl_do_ioctl().
2201 static void
2202 tl_do_ioctl_ser(mblk_t *mp, tl_endpt_t *tep)
2204 if (! tep->te_closing)
2205 tl_do_ioctl(mp, tep);
2206 else
2207 freemsg(mp);
2209 tl_serializer_exit(tep);
2210 tl_refrele(tep);
2213 static void
2214 tl_do_ioctl(mblk_t *mp, tl_endpt_t *tep)
2216 struct iocblk *iocbp = (struct iocblk *)mp->b_rptr;
2217 int cmd = iocbp->ioc_cmd;
2218 queue_t *wq = tep->te_wq;
2219 int error;
2220 int thisopt, otheropt;
2222 ASSERT((cmd == TL_IOC_CREDOPT) || (cmd == TL_IOC_UCREDOPT));
2224 switch (cmd) {
2225 case TL_IOC_CREDOPT:
2226 if (cmd == TL_IOC_CREDOPT) {
2227 thisopt = TL_SETCRED;
2228 otheropt = TL_SETUCRED;
2229 } else {
2230 /* FALLTHROUGH */
2231 case TL_IOC_UCREDOPT:
2232 thisopt = TL_SETUCRED;
2233 otheropt = TL_SETCRED;
2236 * The credentials passing does not apply to sockets.
2237 * Only one of the cred options can be set at a given time.
2239 if (IS_SOCKET(tep) || (tep->te_flag & otheropt)) {
2240 miocnak(wq, mp, 0, EINVAL);
2241 return;
2245 * Turn on generation of credential options for
2246 * T_conn_req, T_conn_con, T_unidata_ind.
2248 error = miocpullup(mp, sizeof (uint32_t));
2249 if (error != 0) {
2250 miocnak(wq, mp, 0, error);
2251 return;
2253 if (!IS_P2ALIGNED(mp->b_cont->b_rptr, sizeof (uint32_t))) {
2254 miocnak(wq, mp, 0, EINVAL);
2255 return;
2258 if (*(uint32_t *)mp->b_cont->b_rptr)
2259 tep->te_flag |= thisopt;
2260 else
2261 tep->te_flag &= ~thisopt;
2263 miocack(wq, mp, 0, 0);
2264 break;
2266 default:
2267 /* Should not be here */
2268 miocnak(wq, mp, 0, EINVAL);
2269 break;
2275 * send T_ERROR_ACK
2276 * Note: assumes enough memory or caller passed big enough mp
2277 * - no recovery from allocb failures
2280 static void
2281 tl_error_ack(queue_t *wq, mblk_t *mp, t_scalar_t tli_err,
2282 t_scalar_t unix_err, t_scalar_t type)
2284 struct T_error_ack *err_ack;
2285 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
2286 M_PCPROTO, T_ERROR_ACK);
2288 if (ackmp == NULL) {
2289 (void) (STRLOG(TL_ID, 0, 1, SL_TRACE|SL_ERROR,
2290 "tl_error_ack:out of mblk memory"));
2291 tl_merror(wq, NULL, ENOSR);
2292 return;
2294 err_ack = (struct T_error_ack *)ackmp->b_rptr;
2295 err_ack->ERROR_prim = type;
2296 err_ack->TLI_error = tli_err;
2297 err_ack->UNIX_error = unix_err;
2300 * send error ack message
2302 qreply(wq, ackmp);
2308 * send T_OK_ACK
2309 * Note: assumes enough memory or caller passed big enough mp
2310 * - no recovery from allocb failures
2312 static void
2313 tl_ok_ack(queue_t *wq, mblk_t *mp, t_scalar_t type)
2315 struct T_ok_ack *ok_ack;
2316 mblk_t *ackmp = tpi_ack_alloc(mp, sizeof (struct T_ok_ack),
2317 M_PCPROTO, T_OK_ACK);
2319 if (ackmp == NULL) {
2320 tl_merror(wq, NULL, ENOMEM);
2321 return;
2324 ok_ack = (struct T_ok_ack *)ackmp->b_rptr;
2325 ok_ack->CORRECT_prim = type;
2327 (void) qreply(wq, ackmp);
2331 * Process T_BIND_REQ and O_T_BIND_REQ from serializer.
2332 * This is a wrapper around tl_bind().
2334 static void
2335 tl_bind_ser(mblk_t *mp, tl_endpt_t *tep)
2337 if (! tep->te_closing)
2338 tl_bind(mp, tep);
2339 else
2340 freemsg(mp);
2342 tl_serializer_exit(tep);
2343 tl_refrele(tep);
2347 * Process T_BIND_REQ and O_T_BIND_REQ TPI requests.
2348 * Assumes that the endpoint is in the unbound.
2350 static void
2351 tl_bind(mblk_t *mp, tl_endpt_t *tep)
2353 queue_t *wq = tep->te_wq;
2354 struct T_bind_ack *b_ack;
2355 struct T_bind_req *bind = (struct T_bind_req *)mp->b_rptr;
2356 mblk_t *ackmp, *bamp;
2357 soux_addr_t ux_addr;
2358 t_uscalar_t qlen = 0;
2359 t_scalar_t alen, aoff;
2360 tl_addr_t addr_req;
2361 void *addr_startp;
2362 ssize_t msz = MBLKL(mp), basize;
2363 t_scalar_t tli_err = 0, unix_err = 0;
2364 t_scalar_t save_prim_type = bind->PRIM_type;
2365 t_scalar_t save_state = tep->te_state;
2367 if (tep->te_state != TS_UNBND) {
2368 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2369 SL_TRACE|SL_ERROR,
2370 "tl_wput:bind_request:out of state, state=%d",
2371 tep->te_state));
2372 tli_err = TOUTSTATE;
2373 goto error;
2376 if (msz < sizeof (struct T_bind_req)) {
2377 tli_err = TSYSERR; unix_err = EINVAL;
2378 goto error;
2381 tep->te_state = NEXTSTATE(TE_BIND_REQ, tep->te_state);
2383 ASSERT((bind->PRIM_type == O_T_BIND_REQ) ||
2384 (bind->PRIM_type == T_BIND_REQ));
2386 alen = bind->ADDR_length;
2387 aoff = bind->ADDR_offset;
2389 /* negotiate max conn req pending */
2390 if (IS_COTS(tep)) {
2391 qlen = bind->CONIND_number;
2392 if (qlen > tl_maxqlen)
2393 qlen = tl_maxqlen;
2397 * Reserve hash handle. It can only be NULL if the endpoint is unbound
2398 * and bound again.
2400 if ((tep->te_hash_hndl == NULL) &&
2401 ((tep->te_flag & TL_ADDRHASHED) == 0) &&
2402 mod_hash_reserve_nosleep(tep->te_addrhash,
2403 &tep->te_hash_hndl) != 0) {
2404 tli_err = TSYSERR; unix_err = ENOSR;
2405 goto error;
2409 * Verify address correctness.
2411 if (IS_SOCKET(tep)) {
2412 ASSERT(bind->PRIM_type == O_T_BIND_REQ);
2414 if ((alen != TL_SOUX_ADDRLEN) ||
2415 (aoff < 0) ||
2416 (aoff + alen > msz)) {
2417 (void) (STRLOG(TL_ID, tep->te_minor,
2418 1, SL_TRACE|SL_ERROR,
2419 "tl_bind: invalid socket addr"));
2420 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2421 tli_err = TSYSERR; unix_err = EINVAL;
2422 goto error;
2424 /* Copy address from message to local buffer. */
2425 bcopy(mp->b_rptr + aoff, &ux_addr, sizeof (ux_addr));
2427 * Check that we got correct address from sockets
2429 if ((ux_addr.soua_magic != SOU_MAGIC_EXPLICIT) &&
2430 (ux_addr.soua_magic != SOU_MAGIC_IMPLICIT)) {
2431 (void) (STRLOG(TL_ID, tep->te_minor,
2432 1, SL_TRACE|SL_ERROR,
2433 "tl_bind: invalid socket magic"));
2434 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2435 tli_err = TSYSERR; unix_err = EINVAL;
2436 goto error;
2438 if ((ux_addr.soua_magic == SOU_MAGIC_IMPLICIT) &&
2439 (ux_addr.soua_vp != NULL)) {
2440 (void) (STRLOG(TL_ID, tep->te_minor,
2441 1, SL_TRACE|SL_ERROR,
2442 "tl_bind: implicit addr non-empty"));
2443 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2444 tli_err = TSYSERR; unix_err = EINVAL;
2445 goto error;
2447 if ((ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) &&
2448 (ux_addr.soua_vp == NULL)) {
2449 (void) (STRLOG(TL_ID, tep->te_minor,
2450 1, SL_TRACE|SL_ERROR,
2451 "tl_bind: explicit addr empty"));
2452 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2453 tli_err = TSYSERR; unix_err = EINVAL;
2454 goto error;
2456 } else {
2457 if ((alen > 0) && ((aoff < 0) ||
2458 ((ssize_t)(aoff + alen) > msz) ||
2459 ((aoff + alen) < 0))) {
2460 (void) (STRLOG(TL_ID, tep->te_minor,
2461 1, SL_TRACE|SL_ERROR,
2462 "tl_bind: invalid message"));
2463 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2464 tli_err = TSYSERR; unix_err = EINVAL;
2465 goto error;
2467 if ((alen < 0) || (alen > (msz - sizeof (struct T_bind_req)))) {
2468 (void) (STRLOG(TL_ID, tep->te_minor,
2469 1, SL_TRACE|SL_ERROR,
2470 "tl_bind: bad addr in message"));
2471 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2472 tli_err = TBADADDR;
2473 goto error;
2475 #ifdef DEBUG
2477 * Mild form of ASSERT()ion to detect broken TPI apps.
2478 * if (! assertion)
2479 * log warning;
2481 if (! ((alen == 0 && aoff == 0) ||
2482 (aoff >= (t_scalar_t)(sizeof (struct T_bind_req))))) {
2483 (void) (STRLOG(TL_ID, tep->te_minor,
2484 3, SL_TRACE|SL_ERROR,
2485 "tl_bind: addr overlaps TPI message"));
2487 #endif
2491 * Bind the address provided or allocate one if requested.
2492 * Allow rebinds with a new qlen value.
2494 if (IS_SOCKET(tep)) {
2496 * For anonymous requests the te_ap is already set up properly
2497 * so use minor number as an address.
2498 * For explicit requests need to check whether the address is
2499 * already in use.
2501 if (ux_addr.soua_magic == SOU_MAGIC_EXPLICIT) {
2502 int rc;
2504 if (tep->te_flag & TL_ADDRHASHED) {
2505 ASSERT(IS_COTS(tep) && tep->te_qlen == 0);
2506 if (tep->te_vp == ux_addr.soua_vp)
2507 goto skip_addr_bind;
2508 else /* Rebind to a new address. */
2509 tl_addr_unbind(tep);
2512 * Insert address in the hash if it is not already
2513 * there. Since we use preallocated handle, the insert
2514 * can fail only if the key is already present.
2516 rc = mod_hash_insert_reserve(tep->te_addrhash,
2517 (mod_hash_key_t)ux_addr.soua_vp,
2518 (mod_hash_val_t)tep, tep->te_hash_hndl);
2520 if (rc != 0) {
2521 ASSERT(rc == MH_ERR_DUPLICATE);
2523 * Violate O_T_BIND_REQ semantics and fail with
2524 * TADDRBUSY - sockets will not use any address
2525 * other than supplied one for explicit binds.
2527 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2528 SL_TRACE|SL_ERROR,
2529 "tl_bind:requested addr %p is busy",
2530 ux_addr.soua_vp));
2531 tli_err = TADDRBUSY; unix_err = 0;
2532 goto error;
2534 tep->te_uxaddr = ux_addr;
2535 tep->te_flag |= TL_ADDRHASHED;
2536 tep->te_hash_hndl = NULL;
2538 } else if (alen == 0) {
2540 * assign any free address
2542 if (! tl_get_any_addr(tep, NULL)) {
2543 (void) (STRLOG(TL_ID, tep->te_minor,
2544 1, SL_TRACE|SL_ERROR,
2545 "tl_bind:failed to get buffer for any "
2546 "address"));
2547 tli_err = TSYSERR; unix_err = ENOSR;
2548 goto error;
2550 } else {
2551 addr_req.ta_alen = alen;
2552 addr_req.ta_abuf = (mp->b_rptr + aoff);
2553 addr_req.ta_zoneid = tep->te_zoneid;
2555 tep->te_abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
2556 if (tep->te_abuf == NULL) {
2557 tli_err = TSYSERR; unix_err = ENOSR;
2558 goto error;
2560 bcopy(addr_req.ta_abuf, tep->te_abuf, addr_req.ta_alen);
2561 tep->te_alen = alen;
2563 if (mod_hash_insert_reserve(tep->te_addrhash,
2564 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
2565 tep->te_hash_hndl) != 0) {
2566 if (save_prim_type == T_BIND_REQ) {
2568 * The bind semantics for this primitive
2569 * require a failure if the exact address
2570 * requested is busy
2572 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2573 SL_TRACE|SL_ERROR,
2574 "tl_bind:requested addr is busy"));
2575 tli_err = TADDRBUSY; unix_err = 0;
2576 goto error;
2580 * O_T_BIND_REQ semantics say if address if requested
2581 * address is busy, bind to any available free address
2583 if (! tl_get_any_addr(tep, &addr_req)) {
2584 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2585 SL_TRACE|SL_ERROR,
2586 "tl_bind:unable to get any addr buf"));
2587 tli_err = TSYSERR; unix_err = ENOMEM;
2588 goto error;
2590 } else {
2591 tep->te_flag |= TL_ADDRHASHED;
2592 tep->te_hash_hndl = NULL;
2596 ASSERT(tep->te_alen >= 0);
2598 skip_addr_bind:
2600 * prepare T_BIND_ACK TPI message
2602 basize = sizeof (struct T_bind_ack) + tep->te_alen;
2603 bamp = reallocb(mp, basize, 0);
2604 if (bamp == NULL) {
2605 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2606 "tl_wput:tl_bind: allocb failed"));
2608 * roll back state changes
2610 tl_addr_unbind(tep);
2611 tep->te_state = TS_UNBND;
2612 tl_memrecover(wq, mp, basize);
2613 return;
2616 DB_TYPE(bamp) = M_PCPROTO;
2617 bamp->b_wptr = bamp->b_rptr + basize;
2618 b_ack = (struct T_bind_ack *)bamp->b_rptr;
2619 b_ack->PRIM_type = T_BIND_ACK;
2620 b_ack->CONIND_number = qlen;
2621 b_ack->ADDR_length = tep->te_alen;
2622 b_ack->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_ack);
2623 addr_startp = bamp->b_rptr + b_ack->ADDR_offset;
2624 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
2626 if (IS_COTS(tep)) {
2627 tep->te_qlen = qlen;
2628 if (qlen > 0)
2629 tep->te_flag |= TL_LISTENER;
2632 tep->te_state = NEXTSTATE(TE_BIND_ACK, tep->te_state);
2634 * send T_BIND_ACK message
2636 (void) qreply(wq, bamp);
2637 return;
2639 error:
2640 ackmp = reallocb(mp, sizeof (struct T_error_ack), 0);
2641 if (ackmp == NULL) {
2643 * roll back state changes
2645 tep->te_state = save_state;
2646 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2647 return;
2649 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
2650 tl_error_ack(wq, ackmp, tli_err, unix_err, save_prim_type);
2654 * Process T_UNBIND_REQ.
2655 * Called from serializer.
2657 static void
2658 tl_unbind(mblk_t *mp, tl_endpt_t *tep)
2660 queue_t *wq;
2661 mblk_t *ackmp;
2663 if (tep->te_closing) {
2664 freemsg(mp);
2665 return;
2668 wq = tep->te_wq;
2671 * preallocate memory for max of T_OK_ACK and T_ERROR_ACK
2672 * ==> allocate for T_ERROR_ACK (known max)
2674 if ((ackmp = reallocb(mp, sizeof (struct T_error_ack), 0)) == NULL) {
2675 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2676 return;
2679 * memory resources committed
2680 * Note: no message validation. T_UNBIND_REQ message is
2681 * same size as PRIM_type field so already verified earlier.
2685 * validate state
2687 if (tep->te_state != TS_IDLE) {
2688 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2689 SL_TRACE|SL_ERROR,
2690 "tl_wput:T_UNBIND_REQ:out of state, state=%d",
2691 tep->te_state));
2692 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_UNBIND_REQ);
2693 return;
2695 tep->te_state = NEXTSTATE(TE_UNBIND_REQ, tep->te_state);
2698 * TPI says on T_UNBIND_REQ:
2699 * send up a M_FLUSH to flush both
2700 * read and write queues
2702 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
2704 if (! IS_SOCKET(tep) || !IS_CLTS(tep) || tep->te_qlen != 0 ||
2705 tep->te_magic != SOU_MAGIC_EXPLICIT) {
2708 * Sockets use bind with qlen==0 followed by bind() to
2709 * the same address with qlen > 0 for listeners.
2710 * We allow rebind with a new qlen value.
2712 tl_addr_unbind(tep);
2715 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2717 * send T_OK_ACK
2719 tl_ok_ack(wq, ackmp, T_UNBIND_REQ);
2724 * Option management code from drv/ip is used here
2725 * Note: TL_PROT_LEVEL/TL_IOC_CREDOPT option is not part of tl_opt_arr
2726 * database of options. So optcom_req() will fail T_SVR4_OPTMGMT_REQ.
2727 * However, that is what we want as that option is 'unorthodox'
2728 * and only valid in T_CONN_IND, T_CONN_CON and T_UNITDATA_IND
2729 * and not in T_SVR4_OPTMGMT_REQ/ACK
2730 * Note2: use of optcom_req means this routine is an exception to
2731 * recovery from allocb() failures.
2734 static void
2735 tl_optmgmt(queue_t *wq, mblk_t *mp)
2737 tl_endpt_t *tep;
2738 mblk_t *ackmp;
2739 union T_primitives *prim;
2740 cred_t *cr;
2742 tep = (tl_endpt_t *)wq->q_ptr;
2743 prim = (union T_primitives *)mp->b_rptr;
2746 * All Solaris components should pass a db_credp
2747 * for this TPI message, hence we ASSERT.
2748 * But in case there is some other M_PROTO that looks
2749 * like a TPI message sent by some other kernel
2750 * component, we check and return an error.
2752 cr = msg_getcred(mp, NULL);
2753 ASSERT(cr != NULL);
2754 if (cr == NULL) {
2755 tl_error_ack(wq, mp, TSYSERR, EINVAL, prim->type);
2756 return;
2759 /* all states OK for AF_UNIX options ? */
2760 if (!IS_SOCKET(tep) && tep->te_state != TS_IDLE &&
2761 prim->type == T_SVR4_OPTMGMT_REQ) {
2763 * Broken TLI semantics that options can only be managed
2764 * in TS_IDLE state. Needed for Sparc ABI test suite that
2765 * tests this TLI (mis)feature using this device driver.
2767 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2768 SL_TRACE|SL_ERROR,
2769 "tl_wput:T_SVR4_OPTMGMT_REQ:out of state, state=%d",
2770 tep->te_state));
2772 * preallocate memory for T_ERROR_ACK
2774 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2775 if (! ackmp) {
2776 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2777 return;
2780 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_SVR4_OPTMGMT_REQ);
2781 freemsg(mp);
2782 return;
2786 * call common option management routine from drv/ip
2788 if (prim->type == T_SVR4_OPTMGMT_REQ) {
2789 svr4_optcom_req(wq, mp, cr, &tl_opt_obj);
2790 } else {
2791 ASSERT(prim->type == T_OPTMGMT_REQ);
2792 tpi_optcom_req(wq, mp, cr, &tl_opt_obj);
2797 * Handle T_conn_req - the driver part of accept().
2798 * If TL_SET[U]CRED generate the credentials options.
2799 * If this is a socket pass through options unmodified.
2800 * For sockets generate the T_CONN_CON here instead of
2801 * waiting for the T_CONN_RES.
2803 static void
2804 tl_conn_req(queue_t *wq, mblk_t *mp)
2806 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
2807 struct T_conn_req *creq = (struct T_conn_req *)mp->b_rptr;
2808 ssize_t msz = MBLKL(mp);
2809 t_scalar_t alen, aoff, olen, ooff, err = 0;
2810 tl_endpt_t *peer_tep = NULL;
2811 mblk_t *ackmp;
2812 mblk_t *dimp;
2813 struct T_discon_ind *di;
2814 soux_addr_t ux_addr;
2815 tl_addr_t dst;
2817 ASSERT(IS_COTS(tep));
2819 if (tep->te_closing) {
2820 freemsg(mp);
2821 return;
2825 * preallocate memory for:
2826 * 1. max of T_ERROR_ACK and T_OK_ACK
2827 * ==> known max T_ERROR_ACK
2828 * 2. max of T_DISCON_IND and T_CONN_IND
2830 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
2831 if (! ackmp) {
2832 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
2833 return;
2836 * memory committed for T_OK_ACK/T_ERROR_ACK now
2837 * will be committed for T_DISCON_IND/T_CONN_IND later
2840 if (tep->te_state != TS_IDLE) {
2841 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2842 SL_TRACE|SL_ERROR,
2843 "tl_wput:T_CONN_REQ:out of state, state=%d",
2844 tep->te_state));
2845 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2846 freemsg(mp);
2847 return;
2851 * validate the message
2852 * Note: dereference fields in struct inside message only
2853 * after validating the message length.
2855 if (msz < sizeof (struct T_conn_req)) {
2856 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2857 "tl_conn_req:invalid message length"));
2858 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2859 freemsg(mp);
2860 return;
2862 alen = creq->DEST_length;
2863 aoff = creq->DEST_offset;
2864 olen = creq->OPT_length;
2865 ooff = creq->OPT_offset;
2866 if (olen == 0)
2867 ooff = 0;
2869 if (IS_SOCKET(tep)) {
2870 if ((alen != TL_SOUX_ADDRLEN) ||
2871 (aoff < 0) ||
2872 (aoff + alen > msz) ||
2873 (alen > msz - sizeof (struct T_conn_req))) {
2874 (void) (STRLOG(TL_ID, tep->te_minor,
2875 1, SL_TRACE|SL_ERROR,
2876 "tl_conn_req: invalid socket addr"));
2877 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2878 freemsg(mp);
2879 return;
2881 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
2882 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
2883 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
2884 (void) (STRLOG(TL_ID, tep->te_minor,
2885 1, SL_TRACE|SL_ERROR,
2886 "tl_conn_req: invalid socket magic"));
2887 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2888 freemsg(mp);
2889 return;
2891 } else {
2892 if ((alen > 0 && ((aoff + alen) > msz || aoff + alen < 0)) ||
2893 (olen > 0 && ((ssize_t)(ooff + olen) > msz ||
2894 ooff + olen < 0)) ||
2895 olen < 0 || ooff < 0) {
2896 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2897 SL_TRACE|SL_ERROR,
2898 "tl_conn_req:invalid message"));
2899 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_CONN_REQ);
2900 freemsg(mp);
2901 return;
2904 if (alen <= 0 || aoff < 0 ||
2905 (ssize_t)alen > msz - sizeof (struct T_conn_req)) {
2906 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2907 SL_TRACE|SL_ERROR,
2908 "tl_conn_req:bad addr in message, "
2909 "alen=%d, msz=%ld",
2910 alen, msz));
2911 tl_error_ack(wq, ackmp, TBADADDR, 0, T_CONN_REQ);
2912 freemsg(mp);
2913 return;
2915 #ifdef DEBUG
2917 * Mild form of ASSERT()ion to detect broken TPI apps.
2918 * if (! assertion)
2919 * log warning;
2921 if (! (aoff >= (t_scalar_t)sizeof (struct T_conn_req))) {
2922 (void) (STRLOG(TL_ID, tep->te_minor, 3,
2923 SL_TRACE|SL_ERROR,
2924 "tl_conn_req: addr overlaps TPI message"));
2926 #endif
2927 if (olen) {
2929 * no opts in connect req
2930 * supported in this provider except for sockets.
2932 (void) (STRLOG(TL_ID, tep->te_minor, 1,
2933 SL_TRACE|SL_ERROR,
2934 "tl_conn_req:options not supported "
2935 "in message"));
2936 tl_error_ack(wq, ackmp, TBADOPT, 0, T_CONN_REQ);
2937 freemsg(mp);
2938 return;
2943 * Prevent tep from closing on us.
2945 if (! tl_noclose(tep)) {
2946 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2947 "tl_conn_req:endpoint is closing"));
2948 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_CONN_REQ);
2949 freemsg(mp);
2950 return;
2953 tep->te_state = NEXTSTATE(TE_CONN_REQ, tep->te_state);
2955 * get endpoint to connect to
2956 * check that peer with DEST addr is bound to addr
2957 * and has CONIND_number > 0
2959 dst.ta_alen = alen;
2960 dst.ta_abuf = mp->b_rptr + aoff;
2961 dst.ta_zoneid = tep->te_zoneid;
2964 * Verify if remote addr is in use
2966 peer_tep = (IS_SOCKET(tep) ?
2967 tl_sock_find_peer(tep, &ux_addr) :
2968 tl_find_peer(tep, &dst));
2970 if (peer_tep == NULL) {
2971 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
2972 "tl_conn_req:no one at connect address"));
2973 err = ECONNREFUSED;
2974 } else if (peer_tep->te_nicon >= peer_tep->te_qlen) {
2976 * validate that number of incoming connection is
2977 * not to capacity on destination endpoint
2979 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
2980 "tl_conn_req: qlen overflow connection refused"));
2981 err = ECONNREFUSED;
2985 * Send T_DISCON_IND in case of error
2987 if (err != 0) {
2988 if (peer_tep != NULL)
2989 tl_refrele(peer_tep);
2990 /* We are still expected to send T_OK_ACK */
2991 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
2992 tl_ok_ack(tep->te_wq, ackmp, T_CONN_REQ);
2993 tl_closeok(tep);
2994 dimp = tpi_ack_alloc(mp, sizeof (struct T_discon_ind),
2995 M_PROTO, T_DISCON_IND);
2996 if (dimp == NULL) {
2997 tl_merror(wq, NULL, ENOSR);
2998 return;
3000 di = (struct T_discon_ind *)dimp->b_rptr;
3001 di->DISCON_reason = err;
3002 di->SEQ_number = BADSEQNUM;
3004 tep->te_state = TS_IDLE;
3006 * send T_DISCON_IND message
3008 putnext(tep->te_rq, dimp);
3009 return;
3012 ASSERT(IS_COTS(peer_tep));
3015 * Found the listener. At this point processing will continue on
3016 * listener serializer. Close of the endpoint should be blocked while we
3017 * switch serializers.
3019 tl_serializer_refhold(peer_tep->te_ser);
3020 tl_serializer_refrele(tep->te_ser);
3021 tep->te_ser = peer_tep->te_ser;
3022 ASSERT(tep->te_oconp == NULL);
3023 tep->te_oconp = peer_tep;
3026 * It is safe to close now. Close may continue on listener serializer.
3028 tl_closeok(tep);
3031 * Pass ackmp to tl_conn_req_ser. Note that mp->b_cont may contain user
3032 * data, so we link mp to ackmp.
3034 ackmp->b_cont = mp;
3035 mp = ackmp;
3037 tl_refhold(tep);
3038 tl_serializer_enter(tep, tl_conn_req_ser, mp);
3042 * Finish T_CONN_REQ processing on listener serializer.
3044 static void
3045 tl_conn_req_ser(mblk_t *mp, tl_endpt_t *tep)
3047 queue_t *wq;
3048 tl_endpt_t *peer_tep = tep->te_oconp;
3049 mblk_t *confmp, *cimp, *indmp;
3050 void *opts = NULL;
3051 mblk_t *ackmp = mp;
3052 struct T_conn_req *creq = (struct T_conn_req *)mp->b_cont->b_rptr;
3053 struct T_conn_ind *ci;
3054 tl_icon_t *tip;
3055 void *addr_startp;
3056 t_scalar_t olen = creq->OPT_length;
3057 t_scalar_t ooff = creq->OPT_offset;
3058 size_t ci_msz;
3059 size_t size;
3060 cred_t *cr = NULL;
3061 pid_t cpid;
3063 if (tep->te_closing) {
3064 TL_UNCONNECT(tep->te_oconp);
3065 tl_serializer_exit(tep);
3066 tl_refrele(tep);
3067 freemsg(mp);
3068 return;
3071 wq = tep->te_wq;
3072 tep->te_flag |= TL_EAGER;
3075 * Extract preallocated ackmp from mp.
3077 mp = mp->b_cont;
3078 ackmp->b_cont = NULL;
3080 if (olen == 0)
3081 ooff = 0;
3083 if (peer_tep->te_closing ||
3084 !((peer_tep->te_state == TS_IDLE) ||
3085 (peer_tep->te_state == TS_WRES_CIND))) {
3086 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE | SL_ERROR,
3087 "tl_conn_req:peer in bad state (%d)",
3088 peer_tep->te_state));
3089 TL_UNCONNECT(tep->te_oconp);
3090 tl_error_ack(wq, mp, TSYSERR, ECONNREFUSED, T_CONN_REQ);
3091 freemsg(ackmp);
3092 tl_serializer_exit(tep);
3093 tl_refrele(tep);
3094 return;
3098 * preallocate now for T_DISCON_IND or T_CONN_IND
3101 * calculate length of T_CONN_IND message
3103 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3104 cr = msg_getcred(mp, &cpid);
3105 ASSERT(cr != NULL);
3106 if (peer_tep->te_flag & TL_SETCRED) {
3107 ooff = 0;
3108 olen = (t_scalar_t) sizeof (struct opthdr) +
3109 OPTLEN(sizeof (tl_credopt_t));
3110 /* 1 option only */
3111 } else {
3112 ooff = 0;
3113 olen = (t_scalar_t)sizeof (struct opthdr) +
3114 OPTLEN(ucredminsize(cr));
3115 /* 1 option only */
3118 ci_msz = sizeof (struct T_conn_ind) + tep->te_alen;
3119 ci_msz = T_ALIGN(ci_msz) + olen;
3120 size = max(ci_msz, sizeof (struct T_discon_ind));
3123 * Save options from mp - we'll need them for T_CONN_IND.
3125 if (ooff != 0) {
3126 opts = kmem_alloc(olen, KM_NOSLEEP);
3127 if (opts == NULL) {
3129 * roll back state changes
3131 tep->te_state = TS_IDLE;
3132 tl_memrecover(wq, mp, size);
3133 freemsg(ackmp);
3134 TL_UNCONNECT(tep->te_oconp);
3135 tl_serializer_exit(tep);
3136 tl_refrele(tep);
3137 return;
3139 /* Copy options to a temp buffer */
3140 bcopy(mp->b_rptr + ooff, opts, olen);
3143 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
3145 * Generate a T_CONN_CON that has the identical address
3146 * (and options) as the T_CONN_REQ.
3147 * NOTE: assumes that the T_conn_req and T_conn_con structures
3148 * are isomorphic.
3150 confmp = copyb(mp);
3151 if (! confmp) {
3153 * roll back state changes
3155 tep->te_state = TS_IDLE;
3156 tl_memrecover(wq, mp, mp->b_wptr - mp->b_rptr);
3157 freemsg(ackmp);
3158 if (opts != NULL)
3159 kmem_free(opts, olen);
3160 TL_UNCONNECT(tep->te_oconp);
3161 tl_serializer_exit(tep);
3162 tl_refrele(tep);
3163 return;
3165 ((struct T_conn_con *)(confmp->b_rptr))->PRIM_type =
3166 T_CONN_CON;
3167 } else {
3168 confmp = NULL;
3170 if ((indmp = reallocb(mp, size, 0)) == NULL) {
3172 * roll back state changes
3174 tep->te_state = TS_IDLE;
3175 tl_memrecover(wq, mp, size);
3176 freemsg(ackmp);
3177 if (opts != NULL)
3178 kmem_free(opts, olen);
3179 freemsg(confmp);
3180 TL_UNCONNECT(tep->te_oconp);
3181 tl_serializer_exit(tep);
3182 tl_refrele(tep);
3183 return;
3186 tip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP);
3187 if (tip == NULL) {
3189 * roll back state changes
3191 tep->te_state = TS_IDLE;
3192 tl_memrecover(wq, indmp, sizeof (*tip));
3193 freemsg(ackmp);
3194 if (opts != NULL)
3195 kmem_free(opts, olen);
3196 freemsg(confmp);
3197 TL_UNCONNECT(tep->te_oconp);
3198 tl_serializer_exit(tep);
3199 tl_refrele(tep);
3200 return;
3202 tip->ti_mp = NULL;
3205 * memory is now committed for T_DISCON_IND/T_CONN_IND/T_CONN_CON
3206 * and tl_icon_t cell.
3210 * ack validity of request and send the peer credential in the ACK.
3212 tep->te_state = NEXTSTATE(TE_OK_ACK1, tep->te_state);
3214 if (peer_tep != NULL && peer_tep->te_credp != NULL &&
3215 confmp != NULL) {
3216 mblk_setcred(confmp, peer_tep->te_credp, peer_tep->te_cpid);
3219 tl_ok_ack(wq, ackmp, T_CONN_REQ);
3222 * prepare message to send T_CONN_IND
3225 * allocate the message - original data blocks retained
3226 * in the returned mblk
3228 cimp = tl_resizemp(indmp, size);
3229 if (! cimp) {
3230 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3231 "tl_conn_req:con_ind:allocb failure"));
3232 tl_merror(wq, indmp, ENOMEM);
3233 TL_UNCONNECT(tep->te_oconp);
3234 tl_serializer_exit(tep);
3235 tl_refrele(tep);
3236 if (opts != NULL)
3237 kmem_free(opts, olen);
3238 freemsg(confmp);
3239 ASSERT(tip->ti_mp == NULL);
3240 kmem_free(tip, sizeof (*tip));
3241 return;
3244 DB_TYPE(cimp) = M_PROTO;
3245 ci = (struct T_conn_ind *)cimp->b_rptr;
3246 ci->PRIM_type = T_CONN_IND;
3247 ci->SRC_offset = (t_scalar_t)sizeof (struct T_conn_ind);
3248 ci->SRC_length = tep->te_alen;
3249 ci->SEQ_number = tep->te_seqno;
3251 addr_startp = cimp->b_rptr + ci->SRC_offset;
3252 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
3253 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3255 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3256 ci->SRC_length);
3257 ci->OPT_length = olen; /* because only 1 option */
3258 tl_fill_option(cimp->b_rptr + ci->OPT_offset,
3259 cr, cpid,
3260 peer_tep->te_flag, peer_tep->te_credp);
3261 } else if (ooff != 0) {
3262 /* Copy option from T_CONN_REQ */
3263 ci->OPT_offset = (t_scalar_t)T_ALIGN(ci->SRC_offset +
3264 ci->SRC_length);
3265 ci->OPT_length = olen;
3266 ASSERT(opts != NULL);
3267 bcopy(opts, (void *)((uintptr_t)ci + ci->OPT_offset), olen);
3268 } else {
3269 ci->OPT_offset = 0;
3270 ci->OPT_length = 0;
3272 if (opts != NULL)
3273 kmem_free(opts, olen);
3276 * register connection request with server peer
3277 * append to list of incoming connections
3278 * increment references for both peer_tep and tep: peer_tep is placed on
3279 * te_oconp and tep is placed on listeners queue.
3281 tip->ti_tep = tep;
3282 tip->ti_seqno = tep->te_seqno;
3283 list_insert_tail(&peer_tep->te_iconp, tip);
3284 peer_tep->te_nicon++;
3286 peer_tep->te_state = NEXTSTATE(TE_CONN_IND, peer_tep->te_state);
3288 * send the T_CONN_IND message
3290 putnext(peer_tep->te_rq, cimp);
3293 * Send a T_CONN_CON message for sockets.
3294 * Disable the queues until we have reached the correct state!
3296 if (confmp != NULL) {
3297 tep->te_state = NEXTSTATE(TE_CONN_CON, tep->te_state);
3298 noenable(wq);
3299 putnext(tep->te_rq, confmp);
3302 * Now we need to increment tep reference because tep is referenced by
3303 * server list of pending connections. We also need to decrement
3304 * reference before exiting serializer. Two operations void each other
3305 * so we don't modify reference at all.
3307 ASSERT(tep->te_refcnt >= 2);
3308 ASSERT(peer_tep->te_refcnt >= 2);
3309 tl_serializer_exit(tep);
3315 * Handle T_conn_res on listener stream. Called on listener serializer.
3316 * tl_conn_req has already generated the T_CONN_CON.
3317 * tl_conn_res is called on listener serializer.
3318 * No one accesses acceptor at this point, so it is safe to modify acceptor.
3319 * Switch eager serializer to acceptor's.
3321 * If TL_SET[U]CRED generate the credentials options.
3322 * For sockets tl_conn_req has already generated the T_CONN_CON.
3324 static void
3325 tl_conn_res(mblk_t *mp, tl_endpt_t *tep)
3327 queue_t *wq;
3328 struct T_conn_res *cres = (struct T_conn_res *)mp->b_rptr;
3329 ssize_t msz = MBLKL(mp);
3330 t_scalar_t olen, ooff, err = 0;
3331 t_scalar_t prim = cres->PRIM_type;
3332 uchar_t *addr_startp;
3333 tl_endpt_t *acc_ep = NULL, *cl_ep = NULL;
3334 tl_icon_t *tip;
3335 size_t size;
3336 mblk_t *ackmp, *respmp;
3337 mblk_t *dimp, *ccmp = NULL;
3338 struct T_discon_ind *di;
3339 struct T_conn_con *cc;
3340 boolean_t client_noclose_set = B_FALSE;
3341 boolean_t switch_client_serializer = B_TRUE;
3343 ASSERT(IS_COTS(tep));
3345 if (tep->te_closing) {
3346 freemsg(mp);
3347 return;
3350 wq = tep->te_wq;
3353 * preallocate memory for:
3354 * 1. max of T_ERROR_ACK and T_OK_ACK
3355 * ==> known max T_ERROR_ACK
3356 * 2. max of T_DISCON_IND and T_CONN_CON
3358 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3359 if (! ackmp) {
3360 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3361 return;
3364 * memory committed for T_OK_ACK/T_ERROR_ACK now
3365 * will be committed for T_DISCON_IND/T_CONN_CON later
3369 ASSERT(prim == T_CONN_RES || prim == O_T_CONN_RES);
3372 * validate state
3374 if (tep->te_state != TS_WRES_CIND) {
3375 (void) (STRLOG(TL_ID, tep->te_minor, 1,
3376 SL_TRACE|SL_ERROR,
3377 "tl_wput:T_CONN_RES:out of state, state=%d",
3378 tep->te_state));
3379 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3380 freemsg(mp);
3381 return;
3385 * validate the message
3386 * Note: dereference fields in struct inside message only
3387 * after validating the message length.
3389 if (msz < sizeof (struct T_conn_res)) {
3390 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3391 "tl_conn_res:invalid message length"));
3392 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3393 freemsg(mp);
3394 return;
3396 olen = cres->OPT_length;
3397 ooff = cres->OPT_offset;
3398 if (((olen > 0) && ((ooff + olen) > msz))) {
3399 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3400 "tl_conn_res:invalid message"));
3401 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, prim);
3402 freemsg(mp);
3403 return;
3405 if (olen) {
3407 * no opts in connect res
3408 * supported in this provider
3410 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3411 "tl_conn_res:options not supported in message"));
3412 tl_error_ack(wq, ackmp, TBADOPT, 0, prim);
3413 freemsg(mp);
3414 return;
3417 tep->te_state = NEXTSTATE(TE_CONN_RES, tep->te_state);
3418 ASSERT(tep->te_state == TS_WACK_CRES);
3420 if (cres->SEQ_number < TL_MINOR_START &&
3421 cres->SEQ_number >= BADSEQNUM) {
3422 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3423 "tl_conn_res:remote endpoint sequence number bad"));
3424 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3425 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3426 freemsg(mp);
3427 return;
3431 * find accepting endpoint. Will have extra reference if found.
3433 if (mod_hash_find_cb(tep->te_transport->tr_ai_hash,
3434 (mod_hash_key_t)(uintptr_t)cres->ACCEPTOR_id,
3435 (mod_hash_val_t *)&acc_ep, tl_find_callback) != 0) {
3436 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3437 "tl_conn_res:bad accepting endpoint"));
3438 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3439 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3440 freemsg(mp);
3441 return;
3445 * Prevent acceptor from closing.
3447 if (! tl_noclose(acc_ep)) {
3448 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3449 "tl_conn_res:bad accepting endpoint"));
3450 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3451 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3452 tl_refrele(acc_ep);
3453 freemsg(mp);
3454 return;
3457 acc_ep->te_flag |= TL_ACCEPTOR;
3460 * validate that accepting endpoint, if different from listening
3461 * has address bound => state is TS_IDLE
3462 * TROUBLE in XPG4 !!?
3464 if ((tep != acc_ep) && (acc_ep->te_state != TS_IDLE)) {
3465 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3466 "tl_conn_res:accepting endpoint has no address bound,"
3467 "state=%d", acc_ep->te_state));
3468 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3469 tl_error_ack(wq, ackmp, TOUTSTATE, 0, prim);
3470 freemsg(mp);
3471 tl_closeok(acc_ep);
3472 tl_refrele(acc_ep);
3473 return;
3477 * validate if accepting endpt same as listening, then
3478 * no other incoming connection should be on the queue
3481 if ((tep == acc_ep) && (tep->te_nicon > 1)) {
3482 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
3483 "tl_conn_res: > 1 conn_ind on listener-acceptor"));
3484 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3485 tl_error_ack(wq, ackmp, TBADF, 0, prim);
3486 freemsg(mp);
3487 tl_closeok(acc_ep);
3488 tl_refrele(acc_ep);
3489 return;
3493 * Mark for deletion, the entry corresponding to client
3494 * on list of pending connections made by the listener
3495 * search list to see if client is one of the
3496 * recorded as a listener.
3498 tip = tl_icon_find(tep, cres->SEQ_number);
3499 if (tip == NULL) {
3500 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE|SL_ERROR,
3501 "tl_conn_res:no client in listener list"));
3502 tep->te_state = NEXTSTATE(TE_ERROR_ACK, tep->te_state);
3503 tl_error_ack(wq, ackmp, TBADSEQ, 0, prim);
3504 freemsg(mp);
3505 tl_closeok(acc_ep);
3506 tl_refrele(acc_ep);
3507 return;
3511 * If ti_tep is NULL the client has already closed. In this case
3512 * the code below will avoid any action on the client side
3513 * but complete the server and acceptor state transitions.
3515 ASSERT(tip->ti_tep == NULL ||
3516 tip->ti_tep->te_seqno == cres->SEQ_number);
3517 cl_ep = tip->ti_tep;
3520 * If the client is present it is switched from listener's to acceptor's
3521 * serializer. We should block client closes while serializers are
3522 * being switched.
3524 * It is possible that the client is present but is currently being
3525 * closed. There are two possible cases:
3527 * 1) The client has already entered tl_close_finish_ser() and sent
3528 * T_ORDREL_IND. In this case we can just ignore the client (but we
3529 * still need to send all messages from tip->ti_mp to the acceptor).
3531 * 2) The client started the close but has not entered
3532 * tl_close_finish_ser() yet. In this case, the client is already
3533 * proceeding asynchronously on the listener's serializer, so we're
3534 * forced to change the acceptor to use the listener's serializer to
3535 * ensure that any operations on the acceptor are serialized with
3536 * respect to the close that's in-progress.
3538 if (cl_ep != NULL) {
3539 if (tl_noclose(cl_ep)) {
3540 client_noclose_set = B_TRUE;
3541 } else {
3543 * Client is closing. If it it has sent the
3544 * T_ORDREL_IND, we can simply ignore it - otherwise,
3545 * we have to let let the client continue until it is
3546 * sent.
3548 * If we do continue using the client, acceptor will
3549 * switch to client's serializer which is used by client
3550 * for its close.
3552 tl_client_closing_when_accepting++;
3553 switch_client_serializer = B_FALSE;
3554 if (!IS_SOCKET(cl_ep) || tl_disable_early_connect ||
3555 cl_ep->te_state == -1)
3556 cl_ep = NULL;
3560 if (cl_ep != NULL) {
3562 * validate client state to be TS_WCON_CREQ or TS_DATA_XFER
3563 * (latter for sockets only)
3565 if (cl_ep->te_state != TS_WCON_CREQ &&
3566 (cl_ep->te_state != TS_DATA_XFER &&
3567 IS_SOCKET(cl_ep))) {
3568 err = ECONNREFUSED;
3570 * T_DISCON_IND sent later after committing memory
3571 * and acking validity of request
3573 (void) (STRLOG(TL_ID, tep->te_minor, 2, SL_TRACE,
3574 "tl_conn_res:peer in bad state"));
3578 * preallocate now for T_DISCON_IND or T_CONN_CONN
3579 * ack validity of request (T_OK_ACK) after memory committed
3582 if (err)
3583 size = sizeof (struct T_discon_ind);
3584 else {
3586 * calculate length of T_CONN_CON message
3588 olen = 0;
3589 if (cl_ep->te_flag & TL_SETCRED) {
3590 olen = (t_scalar_t)sizeof (struct opthdr) +
3591 OPTLEN(sizeof (tl_credopt_t));
3592 } else if (cl_ep->te_flag & TL_SETUCRED) {
3593 olen = (t_scalar_t)sizeof (struct opthdr) +
3594 OPTLEN(ucredminsize(acc_ep->te_credp));
3596 size = T_ALIGN(sizeof (struct T_conn_con) +
3597 acc_ep->te_alen) + olen;
3599 if ((respmp = reallocb(mp, size, 0)) == NULL) {
3601 * roll back state changes
3603 tep->te_state = TS_WRES_CIND;
3604 tl_memrecover(wq, mp, size);
3605 freemsg(ackmp);
3606 if (client_noclose_set)
3607 tl_closeok(cl_ep);
3608 tl_closeok(acc_ep);
3609 tl_refrele(acc_ep);
3610 return;
3612 mp = NULL;
3616 * Now ack validity of request
3618 if (tep->te_nicon == 1) {
3619 if (tep == acc_ep)
3620 tep->te_state = NEXTSTATE(TE_OK_ACK2, tep->te_state);
3621 else
3622 tep->te_state = NEXTSTATE(TE_OK_ACK3, tep->te_state);
3623 } else
3624 tep->te_state = NEXTSTATE(TE_OK_ACK4, tep->te_state);
3627 * send T_DISCON_IND now if client state validation failed earlier
3629 if (err) {
3630 tl_ok_ack(wq, ackmp, prim);
3632 * flush the queues - why always ?
3634 (void) putnextctl1(acc_ep->te_rq, M_FLUSH, FLUSHR);
3636 dimp = tl_resizemp(respmp, size);
3637 if (! dimp) {
3638 (void) (STRLOG(TL_ID, tep->te_minor, 3,
3639 SL_TRACE|SL_ERROR,
3640 "tl_conn_res:con_ind:allocb failure"));
3641 tl_merror(wq, respmp, ENOMEM);
3642 tl_closeok(acc_ep);
3643 if (client_noclose_set)
3644 tl_closeok(cl_ep);
3645 tl_refrele(acc_ep);
3646 return;
3648 if (dimp->b_cont) {
3649 /* no user data in provider generated discon ind */
3650 freemsg(dimp->b_cont);
3651 dimp->b_cont = NULL;
3654 DB_TYPE(dimp) = M_PROTO;
3655 di = (struct T_discon_ind *)dimp->b_rptr;
3656 di->PRIM_type = T_DISCON_IND;
3657 di->DISCON_reason = err;
3658 di->SEQ_number = BADSEQNUM;
3660 tep->te_state = TS_IDLE;
3662 * send T_DISCON_IND message
3664 putnext(acc_ep->te_rq, dimp);
3665 if (client_noclose_set)
3666 tl_closeok(cl_ep);
3667 tl_closeok(acc_ep);
3668 tl_refrele(acc_ep);
3669 return;
3673 * now start connecting the accepting endpoint
3675 if (tep != acc_ep)
3676 acc_ep->te_state = NEXTSTATE(TE_PASS_CONN, acc_ep->te_state);
3678 if (cl_ep == NULL) {
3680 * The client has already closed. Send up any queued messages
3681 * and change the state accordingly.
3683 tl_ok_ack(wq, ackmp, prim);
3684 tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3687 * remove endpoint from incoming connection
3688 * delete client from list of incoming connections
3690 tl_freetip(tep, tip);
3691 freemsg(mp);
3692 tl_closeok(acc_ep);
3693 tl_refrele(acc_ep);
3694 return;
3695 } else if (tip->ti_mp != NULL) {
3697 * The client could have queued a T_DISCON_IND which needs
3698 * to be sent up.
3699 * Note that t_discon_req can not operate the same as
3700 * t_data_req since it is not possible for it to putbq
3701 * the message and return -1 due to the use of qwriter.
3703 tl_icon_sendmsgs(acc_ep, &tip->ti_mp);
3707 * prepare connect confirm T_CONN_CON message
3711 * allocate the message - original data blocks
3712 * retained in the returned mblk
3714 if (! IS_SOCKET(cl_ep) || tl_disable_early_connect) {
3715 ccmp = tl_resizemp(respmp, size);
3716 if (ccmp == NULL) {
3717 tl_ok_ack(wq, ackmp, prim);
3718 (void) (STRLOG(TL_ID, tep->te_minor, 3,
3719 SL_TRACE|SL_ERROR,
3720 "tl_conn_res:conn_con:allocb failure"));
3721 tl_merror(wq, respmp, ENOMEM);
3722 tl_closeok(acc_ep);
3723 if (client_noclose_set)
3724 tl_closeok(cl_ep);
3725 tl_refrele(acc_ep);
3726 return;
3729 DB_TYPE(ccmp) = M_PROTO;
3730 cc = (struct T_conn_con *)ccmp->b_rptr;
3731 cc->PRIM_type = T_CONN_CON;
3732 cc->RES_offset = (t_scalar_t)sizeof (struct T_conn_con);
3733 cc->RES_length = acc_ep->te_alen;
3734 addr_startp = ccmp->b_rptr + cc->RES_offset;
3735 bcopy(acc_ep->te_abuf, addr_startp, acc_ep->te_alen);
3736 if (cl_ep->te_flag & (TL_SETCRED|TL_SETUCRED)) {
3737 cc->OPT_offset = (t_scalar_t)T_ALIGN(cc->RES_offset +
3738 cc->RES_length);
3739 cc->OPT_length = olen;
3740 tl_fill_option(ccmp->b_rptr + cc->OPT_offset,
3741 acc_ep->te_credp, acc_ep->te_cpid, cl_ep->te_flag,
3742 cl_ep->te_credp);
3743 } else {
3744 cc->OPT_offset = 0;
3745 cc->OPT_length = 0;
3748 * Forward the credential in the packet so it can be picked up
3749 * at the higher layers for more complete credential processing
3751 mblk_setcred(ccmp, acc_ep->te_credp, acc_ep->te_cpid);
3752 } else {
3753 freemsg(respmp);
3754 respmp = NULL;
3758 * make connection linking
3759 * accepting and client endpoints
3760 * No need to increment references:
3761 * on client: it should already have one from tip->ti_tep linkage.
3762 * on acceptor is should already have one from the table lookup.
3764 * At this point both client and acceptor can't close. Set client
3765 * serializer to acceptor's.
3767 ASSERT(cl_ep->te_refcnt >= 2);
3768 ASSERT(acc_ep->te_refcnt >= 2);
3769 ASSERT(cl_ep->te_conp == NULL);
3770 ASSERT(acc_ep->te_conp == NULL);
3771 cl_ep->te_conp = acc_ep;
3772 acc_ep->te_conp = cl_ep;
3773 ASSERT(cl_ep->te_ser == tep->te_ser);
3774 if (switch_client_serializer) {
3775 mutex_enter(&cl_ep->te_ser_lock);
3776 if (cl_ep->te_ser_count > 0) {
3777 switch_client_serializer = B_FALSE;
3778 tl_serializer_noswitch++;
3779 } else {
3781 * Move client to the acceptor's serializer.
3783 tl_serializer_refhold(acc_ep->te_ser);
3784 tl_serializer_refrele(cl_ep->te_ser);
3785 cl_ep->te_ser = acc_ep->te_ser;
3787 mutex_exit(&cl_ep->te_ser_lock);
3789 if (!switch_client_serializer) {
3791 * It is not possible to switch client to use acceptor's.
3792 * Move acceptor to client's serializer (which is the same as
3793 * listener's).
3795 tl_serializer_refhold(cl_ep->te_ser);
3796 tl_serializer_refrele(acc_ep->te_ser);
3797 acc_ep->te_ser = cl_ep->te_ser;
3800 TL_REMOVE_PEER(cl_ep->te_oconp);
3801 TL_REMOVE_PEER(acc_ep->te_oconp);
3804 * remove endpoint from incoming connection
3805 * delete client from list of incoming connections
3807 tip->ti_tep = NULL;
3808 tl_freetip(tep, tip);
3809 tl_ok_ack(wq, ackmp, prim);
3812 * data blocks already linked in reallocb()
3816 * link queues so that I_SENDFD will work
3818 if (! IS_SOCKET(tep)) {
3819 acc_ep->te_wq->q_next = cl_ep->te_rq;
3820 cl_ep->te_wq->q_next = acc_ep->te_rq;
3824 * send T_CONN_CON up on client side unless it was already
3825 * done (for a socket). In cases any data or ordrel req has been
3826 * queued make sure that the service procedure runs.
3828 if (IS_SOCKET(cl_ep) && !tl_disable_early_connect) {
3829 enableok(cl_ep->te_wq);
3830 TL_QENABLE(cl_ep);
3831 if (ccmp != NULL)
3832 freemsg(ccmp);
3833 } else {
3835 * change client state on TE_CONN_CON event
3837 cl_ep->te_state = NEXTSTATE(TE_CONN_CON, cl_ep->te_state);
3838 putnext(cl_ep->te_rq, ccmp);
3841 /* Mark the both endpoints as accepted */
3842 cl_ep->te_flag |= TL_ACCEPTED;
3843 acc_ep->te_flag |= TL_ACCEPTED;
3846 * Allow client and acceptor to close.
3848 tl_closeok(acc_ep);
3849 if (client_noclose_set)
3850 tl_closeok(cl_ep);
3856 static void
3857 tl_discon_req(mblk_t *mp, tl_endpt_t *tep)
3859 queue_t *wq;
3860 struct T_discon_req *dr;
3861 ssize_t msz;
3862 tl_endpt_t *peer_tep = tep->te_conp;
3863 tl_endpt_t *srv_tep = tep->te_oconp;
3864 tl_icon_t *tip;
3865 size_t size;
3866 mblk_t *ackmp, *dimp, *respmp;
3867 struct T_discon_ind *di;
3868 t_scalar_t save_state, new_state;
3870 if (tep->te_closing) {
3871 freemsg(mp);
3872 return;
3875 if ((peer_tep != NULL) && peer_tep->te_closing) {
3876 TL_UNCONNECT(tep->te_conp);
3877 peer_tep = NULL;
3879 if ((srv_tep != NULL) && srv_tep->te_closing) {
3880 TL_UNCONNECT(tep->te_oconp);
3881 srv_tep = NULL;
3884 wq = tep->te_wq;
3887 * preallocate memory for:
3888 * 1. max of T_ERROR_ACK and T_OK_ACK
3889 * ==> known max T_ERROR_ACK
3890 * 2. for T_DISCON_IND
3892 ackmp = allocb(sizeof (struct T_error_ack), BPRI_MED);
3893 if (! ackmp) {
3894 tl_memrecover(wq, mp, sizeof (struct T_error_ack));
3895 return;
3898 * memory committed for T_OK_ACK/T_ERROR_ACK now
3899 * will be committed for T_DISCON_IND later
3902 dr = (struct T_discon_req *)mp->b_rptr;
3903 msz = MBLKL(mp);
3906 * validate the state
3908 save_state = new_state = tep->te_state;
3909 if (! (save_state >= TS_WCON_CREQ && save_state <= TS_WRES_CIND) &&
3910 ! (save_state >= TS_DATA_XFER && save_state <= TS_WREQ_ORDREL)) {
3911 (void) (STRLOG(TL_ID, tep->te_minor, 1,
3912 SL_TRACE|SL_ERROR,
3913 "tl_wput:T_DISCON_REQ:out of state, state=%d",
3914 tep->te_state));
3915 tl_error_ack(wq, ackmp, TOUTSTATE, 0, T_DISCON_REQ);
3916 freemsg(mp);
3917 return;
3920 * Defer committing the state change until it is determined if
3921 * the message will be queued with the tl_icon or not.
3923 new_state = NEXTSTATE(TE_DISCON_REQ, tep->te_state);
3925 /* validate the message */
3926 if (msz < sizeof (struct T_discon_req)) {
3927 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
3928 "tl_discon_req:invalid message"));
3929 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3930 tl_error_ack(wq, ackmp, TSYSERR, EINVAL, T_DISCON_REQ);
3931 freemsg(mp);
3932 return;
3936 * if server, then validate that client exists
3937 * by connection sequence number etc.
3939 if (tep->te_nicon > 0) { /* server */
3942 * search server list for disconnect client
3944 tip = tl_icon_find(tep, dr->SEQ_number);
3945 if (tip == NULL) {
3946 (void) (STRLOG(TL_ID, tep->te_minor, 2,
3947 SL_TRACE|SL_ERROR,
3948 "tl_discon_req:no disconnect endpoint"));
3949 tep->te_state = NEXTSTATE(TE_ERROR_ACK, new_state);
3950 tl_error_ack(wq, ackmp, TBADSEQ, 0, T_DISCON_REQ);
3951 freemsg(mp);
3952 return;
3955 * If ti_tep is NULL the client has already closed. In this case
3956 * the code below will avoid any action on the client side.
3959 IMPLY(tip->ti_tep != NULL,
3960 tip->ti_tep->te_seqno == dr->SEQ_number);
3961 peer_tep = tip->ti_tep;
3965 * preallocate now for T_DISCON_IND
3966 * ack validity of request (T_OK_ACK) after memory committed
3968 size = sizeof (struct T_discon_ind);
3969 if ((respmp = reallocb(mp, size, 0)) == NULL) {
3970 tl_memrecover(wq, mp, size);
3971 freemsg(ackmp);
3972 return;
3976 * prepare message to ack validity of request
3978 if (tep->te_nicon == 0)
3979 new_state = NEXTSTATE(TE_OK_ACK1, new_state);
3980 else
3981 if (tep->te_nicon == 1)
3982 new_state = NEXTSTATE(TE_OK_ACK2, new_state);
3983 else
3984 new_state = NEXTSTATE(TE_OK_ACK4, new_state);
3987 * Flushing queues according to TPI. Using the old state.
3989 if ((tep->te_nicon <= 1) &&
3990 ((save_state == TS_DATA_XFER) ||
3991 (save_state == TS_WIND_ORDREL) ||
3992 (save_state == TS_WREQ_ORDREL)))
3993 (void) putnextctl1(RD(wq), M_FLUSH, FLUSHRW);
3995 /* send T_OK_ACK up */
3996 tl_ok_ack(wq, ackmp, T_DISCON_REQ);
3999 * now do disconnect business
4001 if (tep->te_nicon > 0) { /* listener */
4002 if (peer_tep != NULL && !peer_tep->te_closing) {
4004 * disconnect incoming connect request pending to tep
4006 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4007 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4008 SL_TRACE|SL_ERROR,
4009 "tl_discon_req: reallocb failed"));
4010 tep->te_state = new_state;
4011 tl_merror(wq, respmp, ENOMEM);
4012 return;
4014 di = (struct T_discon_ind *)dimp->b_rptr;
4015 di->SEQ_number = BADSEQNUM;
4016 save_state = peer_tep->te_state;
4017 peer_tep->te_state = TS_IDLE;
4019 TL_REMOVE_PEER(peer_tep->te_oconp);
4020 enableok(peer_tep->te_wq);
4021 TL_QENABLE(peer_tep);
4022 } else {
4023 freemsg(respmp);
4024 dimp = NULL;
4028 * remove endpoint from incoming connection list
4029 * - remove disconnect client from list on server
4031 tl_freetip(tep, tip);
4032 } else if ((peer_tep = tep->te_oconp) != NULL) { /* client */
4034 * disconnect an outgoing request pending from tep
4037 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4038 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4039 SL_TRACE|SL_ERROR,
4040 "tl_discon_req: reallocb failed"));
4041 tep->te_state = new_state;
4042 tl_merror(wq, respmp, ENOMEM);
4043 return;
4045 di = (struct T_discon_ind *)dimp->b_rptr;
4046 DB_TYPE(dimp) = M_PROTO;
4047 di->PRIM_type = T_DISCON_IND;
4048 di->DISCON_reason = ECONNRESET;
4049 di->SEQ_number = tep->te_seqno;
4052 * If this is a socket the T_DISCON_IND is queued with
4053 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
4054 * from the list of pending connections.
4055 * Note that when te_oconp is set the peer better have
4056 * a t_connind_t for the client.
4058 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
4060 * No need to check that
4061 * ti_tep == NULL since the T_DISCON_IND
4062 * takes precedence over other queued
4063 * messages.
4065 tl_icon_queuemsg(peer_tep, tep->te_seqno, dimp);
4066 peer_tep = NULL;
4067 dimp = NULL;
4069 * Can't clear te_oconp since tl_co_unconnect needs
4070 * it as a hint not to free the tep.
4071 * Keep the state unchanged since tl_conn_res inspects
4072 * it.
4074 new_state = tep->te_state;
4075 } else {
4076 /* Found - delete it */
4077 tip = tl_icon_find(peer_tep, tep->te_seqno);
4078 if (tip != NULL) {
4079 ASSERT(tep == tip->ti_tep);
4080 save_state = peer_tep->te_state;
4081 if (peer_tep->te_nicon == 1)
4082 peer_tep->te_state =
4083 NEXTSTATE(TE_DISCON_IND2,
4084 peer_tep->te_state);
4085 else
4086 peer_tep->te_state =
4087 NEXTSTATE(TE_DISCON_IND3,
4088 peer_tep->te_state);
4089 tl_freetip(peer_tep, tip);
4091 ASSERT(tep->te_oconp != NULL);
4092 TL_UNCONNECT(tep->te_oconp);
4094 } else if ((peer_tep = tep->te_conp) != NULL) { /* connected! */
4095 if ((dimp = tl_resizemp(respmp, size)) == NULL) {
4096 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4097 SL_TRACE|SL_ERROR,
4098 "tl_discon_req: reallocb failed"));
4099 tep->te_state = new_state;
4100 tl_merror(wq, respmp, ENOMEM);
4101 return;
4103 di = (struct T_discon_ind *)dimp->b_rptr;
4104 di->SEQ_number = BADSEQNUM;
4106 save_state = peer_tep->te_state;
4107 peer_tep->te_state = TS_IDLE;
4108 } else {
4109 /* Not connected */
4110 tep->te_state = new_state;
4111 freemsg(respmp);
4112 return;
4115 /* Commit state changes */
4116 tep->te_state = new_state;
4118 if (peer_tep == NULL) {
4119 ASSERT(dimp == NULL);
4120 goto done;
4123 * Flush queues on peer before sending up
4124 * T_DISCON_IND according to TPI
4127 if ((save_state == TS_DATA_XFER) ||
4128 (save_state == TS_WIND_ORDREL) ||
4129 (save_state == TS_WREQ_ORDREL))
4130 (void) putnextctl1(peer_tep->te_rq, M_FLUSH, FLUSHRW);
4132 DB_TYPE(dimp) = M_PROTO;
4133 di->PRIM_type = T_DISCON_IND;
4134 di->DISCON_reason = ECONNRESET;
4137 * data blocks already linked into dimp by reallocb()
4140 * send indication message to peer user module
4142 ASSERT(dimp != NULL);
4143 putnext(peer_tep->te_rq, dimp);
4144 done:
4145 if (tep->te_conp) { /* disconnect pointers if connected */
4146 ASSERT(! peer_tep->te_closing);
4149 * Messages may be queued on peer's write queue
4150 * waiting to be processed by its write service
4151 * procedure. Before the pointer to the peer transport
4152 * structure is set to NULL, qenable the peer's write
4153 * queue so that the queued up messages are processed.
4155 if ((save_state == TS_DATA_XFER) ||
4156 (save_state == TS_WIND_ORDREL) ||
4157 (save_state == TS_WREQ_ORDREL))
4158 TL_QENABLE(peer_tep);
4159 ASSERT(peer_tep != NULL && peer_tep->te_conp != NULL);
4160 TL_UNCONNECT(peer_tep->te_conp);
4161 if (! IS_SOCKET(tep)) {
4163 * unlink the streams
4165 tep->te_wq->q_next = NULL;
4166 peer_tep->te_wq->q_next = NULL;
4168 TL_UNCONNECT(tep->te_conp);
4172 static void
4173 tl_addr_req_ser(mblk_t *mp, tl_endpt_t *tep)
4175 if (!tep->te_closing)
4176 tl_addr_req(mp, tep);
4177 else
4178 freemsg(mp);
4180 tl_serializer_exit(tep);
4181 tl_refrele(tep);
4184 static void
4185 tl_addr_req(mblk_t *mp, tl_endpt_t *tep)
4187 queue_t *wq;
4188 size_t ack_sz;
4189 mblk_t *ackmp;
4190 struct T_addr_ack *taa;
4192 if (tep->te_closing) {
4193 freemsg(mp);
4194 return;
4197 wq = tep->te_wq;
4200 * Note: T_ADDR_REQ message has only PRIM_type field
4201 * so it is already validated earlier.
4204 if (IS_CLTS(tep) ||
4205 (tep->te_state > TS_WREQ_ORDREL) ||
4206 (tep->te_state < TS_DATA_XFER)) {
4208 * Either connectionless or connection oriented but not
4209 * in connected data transfer state or half-closed states.
4211 ack_sz = sizeof (struct T_addr_ack);
4212 if (tep->te_state >= TS_IDLE)
4213 /* is bound */
4214 ack_sz += tep->te_alen;
4215 ackmp = reallocb(mp, ack_sz, 0);
4216 if (ackmp == NULL) {
4217 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4218 SL_TRACE|SL_ERROR,
4219 "tl_addr_req: reallocb failed"));
4220 tl_memrecover(wq, mp, ack_sz);
4221 return;
4224 taa = (struct T_addr_ack *)ackmp->b_rptr;
4226 bzero(taa, sizeof (struct T_addr_ack));
4228 taa->PRIM_type = T_ADDR_ACK;
4229 ackmp->b_datap->db_type = M_PCPROTO;
4230 ackmp->b_wptr = (uchar_t *)&taa[1];
4232 if (tep->te_state >= TS_IDLE) {
4233 /* endpoint is bound */
4234 taa->LOCADDR_length = tep->te_alen;
4235 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4237 bcopy(tep->te_abuf, ackmp->b_wptr,
4238 tep->te_alen);
4239 ackmp->b_wptr += tep->te_alen;
4240 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4243 (void) qreply(wq, ackmp);
4244 } else {
4245 ASSERT(tep->te_state == TS_DATA_XFER ||
4246 tep->te_state == TS_WIND_ORDREL ||
4247 tep->te_state == TS_WREQ_ORDREL);
4248 /* connection oriented in data transfer */
4249 tl_connected_cots_addr_req(mp, tep);
4254 static void
4255 tl_connected_cots_addr_req(mblk_t *mp, tl_endpt_t *tep)
4257 tl_endpt_t *peer_tep = tep->te_conp;
4258 size_t ack_sz;
4259 mblk_t *ackmp;
4260 struct T_addr_ack *taa;
4261 uchar_t *addr_startp;
4263 if (tep->te_closing) {
4264 freemsg(mp);
4265 return;
4268 if (peer_tep == NULL || peer_tep->te_closing) {
4269 tl_error_ack(tep->te_wq, mp, TSYSERR, ECONNRESET, T_ADDR_REQ);
4270 return;
4273 ASSERT(tep->te_state >= TS_IDLE);
4275 ack_sz = sizeof (struct T_addr_ack);
4276 ack_sz += T_ALIGN(tep->te_alen);
4277 ack_sz += peer_tep->te_alen;
4279 ackmp = tpi_ack_alloc(mp, ack_sz, M_PCPROTO, T_ADDR_ACK);
4280 if (ackmp == NULL) {
4281 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4282 "tl_connected_cots_addr_req: reallocb failed"));
4283 tl_memrecover(tep->te_wq, mp, ack_sz);
4284 return;
4287 taa = (struct T_addr_ack *)ackmp->b_rptr;
4289 /* endpoint is bound */
4290 taa->LOCADDR_length = tep->te_alen;
4291 taa->LOCADDR_offset = (t_scalar_t)sizeof (*taa);
4293 addr_startp = (uchar_t *)&taa[1];
4295 bcopy(tep->te_abuf, addr_startp,
4296 tep->te_alen);
4298 taa->REMADDR_length = peer_tep->te_alen;
4299 taa->REMADDR_offset = (t_scalar_t)T_ALIGN(taa->LOCADDR_offset +
4300 taa->LOCADDR_length);
4301 addr_startp = ackmp->b_rptr + taa->REMADDR_offset;
4302 bcopy(peer_tep->te_abuf, addr_startp,
4303 peer_tep->te_alen);
4304 ackmp->b_wptr = (uchar_t *)ackmp->b_rptr +
4305 taa->REMADDR_offset + peer_tep->te_alen;
4306 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
4308 putnext(tep->te_rq, ackmp);
4311 static void
4312 tl_copy_info(struct T_info_ack *ia, tl_endpt_t *tep)
4314 if (IS_CLTS(tep)) {
4315 *ia = tl_clts_info_ack;
4316 ia->TSDU_size = tl_tidusz; /* TSDU and TIDU size are same */
4317 } else {
4318 *ia = tl_cots_info_ack;
4319 if (IS_COTSORD(tep))
4320 ia->SERV_type = T_COTS_ORD;
4322 ia->TIDU_size = tl_tidusz;
4323 ia->CURRENT_state = tep->te_state;
4327 * This routine responds to T_CAPABILITY_REQ messages. It is called by
4328 * tl_wput.
4330 static void
4331 tl_capability_req(mblk_t *mp, tl_endpt_t *tep)
4333 mblk_t *ackmp;
4334 t_uscalar_t cap_bits1;
4335 struct T_capability_ack *tcap;
4337 if (tep->te_closing) {
4338 freemsg(mp);
4339 return;
4342 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
4344 ackmp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
4345 M_PCPROTO, T_CAPABILITY_ACK);
4346 if (ackmp == NULL) {
4347 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4348 "tl_capability_req: reallocb failed"));
4349 tl_memrecover(tep->te_wq, mp,
4350 sizeof (struct T_capability_ack));
4351 return;
4354 tcap = (struct T_capability_ack *)ackmp->b_rptr;
4355 tcap->CAP_bits1 = 0;
4357 if (cap_bits1 & TC1_INFO) {
4358 tl_copy_info(&tcap->INFO_ack, tep);
4359 tcap->CAP_bits1 |= TC1_INFO;
4362 if (cap_bits1 & TC1_ACCEPTOR_ID) {
4363 tcap->ACCEPTOR_id = tep->te_acceptor_id;
4364 tcap->CAP_bits1 |= TC1_ACCEPTOR_ID;
4367 putnext(tep->te_rq, ackmp);
4370 static void
4371 tl_info_req_ser(mblk_t *mp, tl_endpt_t *tep)
4373 if (! tep->te_closing)
4374 tl_info_req(mp, tep);
4375 else
4376 freemsg(mp);
4378 tl_serializer_exit(tep);
4379 tl_refrele(tep);
4382 static void
4383 tl_info_req(mblk_t *mp, tl_endpt_t *tep)
4385 mblk_t *ackmp;
4387 ackmp = tpi_ack_alloc(mp, sizeof (struct T_info_ack),
4388 M_PCPROTO, T_INFO_ACK);
4389 if (ackmp == NULL) {
4390 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4391 "tl_info_req: reallocb failed"));
4392 tl_memrecover(tep->te_wq, mp, sizeof (struct T_info_ack));
4393 return;
4397 * fill in T_INFO_ACK contents
4399 tl_copy_info((struct T_info_ack *)ackmp->b_rptr, tep);
4402 * send ack message
4404 putnext(tep->te_rq, ackmp);
4408 * Handle M_DATA, T_data_req and T_optdata_req.
4409 * If this is a socket pass through T_optdata_req options unmodified.
4411 static void
4412 tl_data(mblk_t *mp, tl_endpt_t *tep)
4414 queue_t *wq = tep->te_wq;
4415 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
4416 ssize_t msz = MBLKL(mp);
4417 tl_endpt_t *peer_tep;
4418 queue_t *peer_rq;
4419 boolean_t closing = tep->te_closing;
4421 if (IS_CLTS(tep)) {
4422 (void) (STRLOG(TL_ID, tep->te_minor, 2,
4423 SL_TRACE|SL_ERROR,
4424 "tl_wput:clts:unattached M_DATA"));
4425 if (!closing) {
4426 tl_merror(wq, mp, EPROTO);
4427 } else {
4428 freemsg(mp);
4430 return;
4434 * If the endpoint is closing it should still forward any data to the
4435 * peer (if it has one). If it is not allowed to forward it can just
4436 * free the message.
4438 if (closing &&
4439 (tep->te_state != TS_DATA_XFER) &&
4440 (tep->te_state != TS_WREQ_ORDREL)) {
4441 freemsg(mp);
4442 return;
4445 if (DB_TYPE(mp) == M_PROTO) {
4446 if (prim->type == T_DATA_REQ &&
4447 msz < sizeof (struct T_data_req)) {
4448 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4449 SL_TRACE|SL_ERROR,
4450 "tl_data:T_DATA_REQ:invalid message"));
4451 if (!closing) {
4452 tl_merror(wq, mp, EPROTO);
4453 } else {
4454 freemsg(mp);
4456 return;
4457 } else if (prim->type == T_OPTDATA_REQ &&
4458 (msz < sizeof (struct T_optdata_req) || !IS_SOCKET(tep))) {
4459 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4460 SL_TRACE|SL_ERROR,
4461 "tl_data:T_OPTDATA_REQ:invalid message"));
4462 if (!closing) {
4463 tl_merror(wq, mp, EPROTO);
4464 } else {
4465 freemsg(mp);
4467 return;
4472 * connection oriented provider
4474 switch (tep->te_state) {
4475 case TS_IDLE:
4477 * Other end not here - do nothing.
4479 freemsg(mp);
4480 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4481 "tl_data:cots with endpoint idle"));
4482 return;
4484 case TS_DATA_XFER:
4485 /* valid states */
4486 if (tep->te_conp != NULL)
4487 break;
4489 if (tep->te_oconp == NULL) {
4490 if (!closing) {
4491 tl_merror(wq, mp, EPROTO);
4492 } else {
4493 freemsg(mp);
4495 return;
4498 * For a socket the T_CONN_CON is sent early thus
4499 * the peer might not yet have accepted the connection.
4500 * If we are closing queue the packet with the T_CONN_IND.
4501 * Otherwise defer processing the packet until the peer
4502 * accepts the connection.
4503 * Note that the queue is noenabled when we go into this
4504 * state.
4506 if (!closing) {
4507 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4508 SL_TRACE|SL_ERROR,
4509 "tl_data: ocon"));
4510 TL_PUTBQ(tep, mp);
4511 return;
4513 if (DB_TYPE(mp) == M_PROTO) {
4514 if (msz < sizeof (t_scalar_t)) {
4515 freemsg(mp);
4516 return;
4518 /* reuse message block - just change REQ to IND */
4519 if (prim->type == T_DATA_REQ)
4520 prim->type = T_DATA_IND;
4521 else
4522 prim->type = T_OPTDATA_IND;
4524 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4525 return;
4527 case TS_WREQ_ORDREL:
4528 if (tep->te_conp == NULL) {
4530 * Other end closed - generate discon_ind
4531 * with reason 0 to cause an EPIPE but no
4532 * read side error on AF_UNIX sockets.
4534 freemsg(mp);
4535 (void) (STRLOG(TL_ID, tep->te_minor, 3,
4536 SL_TRACE|SL_ERROR,
4537 "tl_data: WREQ_ORDREL and no peer"));
4538 tl_discon_ind(tep, 0);
4539 return;
4541 break;
4543 default:
4544 /* invalid state for event TE_DATA_REQ */
4545 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4546 "tl_data:cots:out of state"));
4547 tl_merror(wq, mp, EPROTO);
4548 return;
4551 * tep->te_state = NEXTSTATE(TE_DATA_REQ, tep->te_state);
4552 * (State stays same on this event)
4556 * get connected endpoint
4558 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4559 freemsg(mp);
4560 /* Peer closed */
4561 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4562 "tl_data: peer gone"));
4563 return;
4566 ASSERT(tep->te_serializer == peer_tep->te_serializer);
4567 peer_rq = peer_tep->te_rq;
4570 * Put it back if flow controlled
4571 * Note: Messages already on queue when we are closing is bounded
4572 * so we can ignore flow control.
4574 if (!canputnext(peer_rq) && !closing) {
4575 TL_PUTBQ(tep, mp);
4576 return;
4580 * validate peer state
4582 switch (peer_tep->te_state) {
4583 case TS_DATA_XFER:
4584 case TS_WIND_ORDREL:
4585 /* valid states */
4586 break;
4587 default:
4588 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4589 "tl_data:rx side:invalid state"));
4590 tl_merror(peer_tep->te_wq, mp, EPROTO);
4591 return;
4593 if (DB_TYPE(mp) == M_PROTO) {
4594 /* reuse message block - just change REQ to IND */
4595 if (prim->type == T_DATA_REQ)
4596 prim->type = T_DATA_IND;
4597 else
4598 prim->type = T_OPTDATA_IND;
4601 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4602 * (peer state stays same on this event)
4605 * send data to connected peer
4607 putnext(peer_rq, mp);
4612 static void
4613 tl_exdata(mblk_t *mp, tl_endpt_t *tep)
4615 queue_t *wq = tep->te_wq;
4616 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
4617 ssize_t msz = MBLKL(mp);
4618 tl_endpt_t *peer_tep;
4619 queue_t *peer_rq;
4620 boolean_t closing = tep->te_closing;
4622 if (msz < sizeof (struct T_exdata_req)) {
4623 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4624 "tl_exdata:invalid message"));
4625 if (!closing) {
4626 tl_merror(wq, mp, EPROTO);
4627 } else {
4628 freemsg(mp);
4630 return;
4634 * If the endpoint is closing it should still forward any data to the
4635 * peer (if it has one). If it is not allowed to forward it can just
4636 * free the message.
4638 if (closing &&
4639 (tep->te_state != TS_DATA_XFER) &&
4640 (tep->te_state != TS_WREQ_ORDREL)) {
4641 freemsg(mp);
4642 return;
4646 * validate state
4648 switch (tep->te_state) {
4649 case TS_IDLE:
4651 * Other end not here - do nothing.
4653 freemsg(mp);
4654 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4655 "tl_exdata:cots with endpoint idle"));
4656 return;
4658 case TS_DATA_XFER:
4659 /* valid states */
4660 if (tep->te_conp != NULL)
4661 break;
4663 if (tep->te_oconp == NULL) {
4664 if (!closing) {
4665 tl_merror(wq, mp, EPROTO);
4666 } else {
4667 freemsg(mp);
4669 return;
4672 * For a socket the T_CONN_CON is sent early thus
4673 * the peer might not yet have accepted the connection.
4674 * If we are closing queue the packet with the T_CONN_IND.
4675 * Otherwise defer processing the packet until the peer
4676 * accepts the connection.
4677 * Note that the queue is noenabled when we go into this
4678 * state.
4680 if (!closing) {
4681 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4682 SL_TRACE|SL_ERROR,
4683 "tl_exdata: ocon"));
4684 TL_PUTBQ(tep, mp);
4685 return;
4687 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4688 "tl_exdata: closing socket ocon"));
4689 prim->type = T_EXDATA_IND;
4690 tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4691 return;
4693 case TS_WREQ_ORDREL:
4694 if (tep->te_conp == NULL) {
4696 * Other end closed - generate discon_ind
4697 * with reason 0 to cause an EPIPE but no
4698 * read side error on AF_UNIX sockets.
4700 freemsg(mp);
4701 (void) (STRLOG(TL_ID, tep->te_minor, 3,
4702 SL_TRACE|SL_ERROR,
4703 "tl_exdata: WREQ_ORDREL and no peer"));
4704 tl_discon_ind(tep, 0);
4705 return;
4707 break;
4709 default:
4710 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4711 SL_TRACE|SL_ERROR,
4712 "tl_wput:T_EXDATA_REQ:out of state, state=%d",
4713 tep->te_state));
4714 tl_merror(wq, mp, EPROTO);
4715 return;
4718 * tep->te_state = NEXTSTATE(TE_EXDATA_REQ, tep->te_state);
4719 * (state stays same on this event)
4723 * get connected endpoint
4725 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4726 freemsg(mp);
4727 /* Peer closed */
4728 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4729 "tl_exdata: peer gone"));
4730 return;
4733 peer_rq = peer_tep->te_rq;
4736 * Put it back if flow controlled
4737 * Note: Messages already on queue when we are closing is bounded
4738 * so we can ignore flow control.
4740 if (!canputnext(peer_rq) && !closing) {
4741 TL_PUTBQ(tep, mp);
4742 return;
4746 * validate state on peer
4748 switch (peer_tep->te_state) {
4749 case TS_DATA_XFER:
4750 case TS_WIND_ORDREL:
4751 /* valid states */
4752 break;
4753 default:
4754 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4755 "tl_exdata:rx side:invalid state"));
4756 tl_merror(peer_tep->te_wq, mp, EPROTO);
4757 return;
4760 * peer_tep->te_state = NEXTSTATE(TE_DATA_IND, peer_tep->te_state);
4761 * (peer state stays same on this event)
4764 * reuse message block
4766 prim->type = T_EXDATA_IND;
4769 * send data to connected peer
4771 putnext(peer_rq, mp);
4776 static void
4777 tl_ordrel(mblk_t *mp, tl_endpt_t *tep)
4779 queue_t *wq = tep->te_wq;
4780 union T_primitives *prim = (union T_primitives *)mp->b_rptr;
4781 ssize_t msz = MBLKL(mp);
4782 tl_endpt_t *peer_tep;
4783 queue_t *peer_rq;
4784 boolean_t closing = tep->te_closing;
4786 if (msz < sizeof (struct T_ordrel_req)) {
4787 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4788 "tl_ordrel:invalid message"));
4789 if (!closing) {
4790 tl_merror(wq, mp, EPROTO);
4791 } else {
4792 freemsg(mp);
4794 return;
4798 * validate state
4800 switch (tep->te_state) {
4801 case TS_DATA_XFER:
4802 case TS_WREQ_ORDREL:
4803 /* valid states */
4804 if (tep->te_conp != NULL)
4805 break;
4807 if (tep->te_oconp == NULL)
4808 break;
4811 * For a socket the T_CONN_CON is sent early thus
4812 * the peer might not yet have accepted the connection.
4813 * If we are closing queue the packet with the T_CONN_IND.
4814 * Otherwise defer processing the packet until the peer
4815 * accepts the connection.
4816 * Note that the queue is noenabled when we go into this
4817 * state.
4819 if (!closing) {
4820 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4821 SL_TRACE|SL_ERROR,
4822 "tl_ordlrel: ocon"));
4823 TL_PUTBQ(tep, mp);
4824 return;
4826 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4827 "tl_ordlrel: closing socket ocon"));
4828 prim->type = T_ORDREL_IND;
4829 (void) tl_icon_queuemsg(tep->te_oconp, tep->te_seqno, mp);
4830 return;
4832 default:
4833 (void) (STRLOG(TL_ID, tep->te_minor, 1,
4834 SL_TRACE|SL_ERROR,
4835 "tl_wput:T_ORDREL_REQ:out of state, state=%d",
4836 tep->te_state));
4837 if (!closing) {
4838 tl_merror(wq, mp, EPROTO);
4839 } else {
4840 freemsg(mp);
4842 return;
4844 tep->te_state = NEXTSTATE(TE_ORDREL_REQ, tep->te_state);
4847 * get connected endpoint
4849 if (((peer_tep = tep->te_conp) == NULL) || peer_tep->te_closing) {
4850 /* Peer closed */
4851 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4852 "tl_ordrel: peer gone"));
4853 freemsg(mp);
4854 return;
4857 peer_rq = peer_tep->te_rq;
4860 * Put it back if flow controlled except when we are closing.
4861 * Note: Messages already on queue when we are closing is bounded
4862 * so we can ignore flow control.
4864 if (! canputnext(peer_rq) && !closing) {
4865 TL_PUTBQ(tep, mp);
4866 return;
4870 * validate state on peer
4872 switch (peer_tep->te_state) {
4873 case TS_DATA_XFER:
4874 case TS_WIND_ORDREL:
4875 /* valid states */
4876 break;
4877 default:
4878 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
4879 "tl_ordrel:rx side:invalid state"));
4880 tl_merror(peer_tep->te_wq, mp, EPROTO);
4881 return;
4883 peer_tep->te_state = NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
4886 * reuse message block
4888 prim->type = T_ORDREL_IND;
4889 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
4890 "tl_ordrel: send ordrel_ind"));
4893 * send data to connected peer
4895 putnext(peer_rq, mp);
4900 * Send T_UDERROR_IND. The error should be from the <sys/errno.h> space.
4902 static void
4903 tl_uderr(queue_t *wq, mblk_t *mp, t_scalar_t err)
4905 size_t err_sz;
4906 tl_endpt_t *tep;
4907 struct T_unitdata_req *udreq;
4908 mblk_t *err_mp;
4909 t_scalar_t alen;
4910 t_scalar_t olen;
4911 struct T_uderror_ind *uderr;
4912 uchar_t *addr_startp;
4914 err_sz = sizeof (struct T_uderror_ind);
4915 tep = (tl_endpt_t *)wq->q_ptr;
4916 udreq = (struct T_unitdata_req *)mp->b_rptr;
4917 alen = udreq->DEST_length;
4918 olen = udreq->OPT_length;
4920 if (alen > 0)
4921 err_sz = T_ALIGN(err_sz + alen);
4922 if (olen > 0)
4923 err_sz += olen;
4925 err_mp = allocb(err_sz, BPRI_MED);
4926 if (! err_mp) {
4927 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
4928 "tl_uderr:allocb failure"));
4930 * Note: no rollback of state needed as it does
4931 * not change in connectionless transport
4933 tl_memrecover(wq, mp, err_sz);
4934 return;
4937 DB_TYPE(err_mp) = M_PROTO;
4938 err_mp->b_wptr = err_mp->b_rptr + err_sz;
4939 uderr = (struct T_uderror_ind *)err_mp->b_rptr;
4940 uderr->PRIM_type = T_UDERROR_IND;
4941 uderr->ERROR_type = err;
4942 uderr->DEST_length = alen;
4943 uderr->OPT_length = olen;
4944 if (alen <= 0) {
4945 uderr->DEST_offset = 0;
4946 } else {
4947 uderr->DEST_offset =
4948 (t_scalar_t)sizeof (struct T_uderror_ind);
4949 addr_startp = mp->b_rptr + udreq->DEST_offset;
4950 bcopy(addr_startp, err_mp->b_rptr + uderr->DEST_offset,
4951 (size_t)alen);
4953 if (olen <= 0) {
4954 uderr->OPT_offset = 0;
4955 } else {
4956 uderr->OPT_offset =
4957 (t_scalar_t)T_ALIGN(sizeof (struct T_uderror_ind) +
4958 uderr->DEST_length);
4959 addr_startp = mp->b_rptr + udreq->OPT_offset;
4960 bcopy(addr_startp, err_mp->b_rptr+uderr->OPT_offset,
4961 (size_t)olen);
4963 freemsg(mp);
4966 * send indication message
4968 tep->te_state = NEXTSTATE(TE_UDERROR_IND, tep->te_state);
4970 qreply(wq, err_mp);
4973 static void
4974 tl_unitdata_ser(mblk_t *mp, tl_endpt_t *tep)
4976 queue_t *wq = tep->te_wq;
4978 if (!tep->te_closing && (wq->q_first != NULL)) {
4979 TL_PUTQ(tep, mp);
4980 } else if (tep->te_rq != NULL)
4981 tl_unitdata(mp, tep);
4982 else
4983 freemsg(mp);
4985 tl_serializer_exit(tep);
4986 tl_refrele(tep);
4990 * Handle T_unitdata_req.
4991 * If TL_SET[U]CRED or TL_SOCKUCRED generate the credentials options.
4992 * If this is a socket pass through options unmodified.
4994 static void
4995 tl_unitdata(mblk_t *mp, tl_endpt_t *tep)
4997 queue_t *wq = tep->te_wq;
4998 soux_addr_t ux_addr;
4999 tl_addr_t destaddr;
5000 uchar_t *addr_startp;
5001 tl_endpt_t *peer_tep;
5002 struct T_unitdata_ind *udind;
5003 struct T_unitdata_req *udreq;
5004 ssize_t msz, ui_sz, reuse_mb_sz;
5005 t_scalar_t alen, aoff, olen, ooff;
5006 t_scalar_t oldolen = 0;
5007 cred_t *cr = NULL;
5008 pid_t cpid;
5010 udreq = (struct T_unitdata_req *)mp->b_rptr;
5011 msz = MBLKL(mp);
5014 * validate the state
5016 if (tep->te_state != TS_IDLE) {
5017 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5018 SL_TRACE|SL_ERROR,
5019 "tl_wput:T_CONN_REQ:out of state"));
5020 tl_merror(wq, mp, EPROTO);
5021 return;
5024 * tep->te_state = NEXTSTATE(TE_UNITDATA_REQ, tep->te_state);
5025 * (state does not change on this event)
5029 * validate the message
5030 * Note: dereference fields in struct inside message only
5031 * after validating the message length.
5033 if (msz < sizeof (struct T_unitdata_req)) {
5034 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5035 "tl_unitdata:invalid message length"));
5036 tl_merror(wq, mp, EINVAL);
5037 return;
5039 alen = udreq->DEST_length;
5040 aoff = udreq->DEST_offset;
5041 oldolen = olen = udreq->OPT_length;
5042 ooff = udreq->OPT_offset;
5043 if (olen == 0)
5044 ooff = 0;
5046 if (IS_SOCKET(tep)) {
5047 if ((alen != TL_SOUX_ADDRLEN) ||
5048 (aoff < 0) ||
5049 (aoff + alen > msz) ||
5050 (olen < 0) || (ooff < 0) ||
5051 ((olen > 0) && ((ooff + olen) > msz))) {
5052 (void) (STRLOG(TL_ID, tep->te_minor,
5053 1, SL_TRACE|SL_ERROR,
5054 "tl_unitdata_req: invalid socket addr "
5055 "(msz=%d, al=%d, ao=%d, ol=%d, oo = %d)",
5056 (int)msz, alen, aoff, olen, ooff));
5057 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5058 return;
5060 bcopy(mp->b_rptr + aoff, &ux_addr, TL_SOUX_ADDRLEN);
5062 if ((ux_addr.soua_magic != SOU_MAGIC_IMPLICIT) &&
5063 (ux_addr.soua_magic != SOU_MAGIC_EXPLICIT)) {
5064 (void) (STRLOG(TL_ID, tep->te_minor,
5065 1, SL_TRACE|SL_ERROR,
5066 "tl_conn_req: invalid socket magic"));
5067 tl_error_ack(wq, mp, TSYSERR, EINVAL, T_UNITDATA_REQ);
5068 return;
5070 } else {
5071 if ((alen < 0) ||
5072 (aoff < 0) ||
5073 ((alen > 0) && ((aoff + alen) > msz)) ||
5074 ((ssize_t)alen > (msz - sizeof (struct T_unitdata_req))) ||
5075 ((aoff + alen) < 0) ||
5076 ((olen > 0) && ((ooff + olen) > msz)) ||
5077 (olen < 0) ||
5078 (ooff < 0) ||
5079 ((ssize_t)olen > (msz - sizeof (struct T_unitdata_req)))) {
5080 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5081 SL_TRACE|SL_ERROR,
5082 "tl_unitdata:invalid unit data message"));
5083 tl_merror(wq, mp, EINVAL);
5084 return;
5088 /* Options not supported unless it's a socket */
5089 if (alen == 0 || (olen != 0 && !IS_SOCKET(tep))) {
5090 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5091 "tl_unitdata:option use(unsupported) or zero len addr"));
5092 tl_uderr(wq, mp, EPROTO);
5093 return;
5095 #ifdef DEBUG
5097 * Mild form of ASSERT()ion to detect broken TPI apps.
5098 * if (! assertion)
5099 * log warning;
5101 if (! (aoff >= (t_scalar_t)sizeof (struct T_unitdata_req))) {
5102 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5103 "tl_unitdata:addr overlaps TPI message"));
5105 #endif
5107 * get destination endpoint
5109 destaddr.ta_alen = alen;
5110 destaddr.ta_abuf = mp->b_rptr + aoff;
5111 destaddr.ta_zoneid = tep->te_zoneid;
5114 * Check whether the destination is the same that was used previously
5115 * and the destination endpoint is in the right state. If something is
5116 * wrong, find destination again and cache it.
5118 peer_tep = tep->te_lastep;
5120 if ((peer_tep == NULL) || peer_tep->te_closing ||
5121 (peer_tep->te_state != TS_IDLE) ||
5122 !tl_eqaddr(&destaddr, &peer_tep->te_ap)) {
5124 * Not the same as cached destination , need to find the right
5125 * destination.
5127 peer_tep = (IS_SOCKET(tep) ?
5128 tl_sock_find_peer(tep, &ux_addr) :
5129 tl_find_peer(tep, &destaddr));
5131 if (peer_tep == NULL) {
5132 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5133 SL_TRACE|SL_ERROR,
5134 "tl_unitdata:no one at destination address"));
5135 tl_uderr(wq, mp, ECONNRESET);
5136 return;
5140 * Cache the new peer.
5142 if (tep->te_lastep != NULL)
5143 tl_refrele(tep->te_lastep);
5145 tep->te_lastep = peer_tep;
5148 if (peer_tep->te_state != TS_IDLE) {
5149 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
5150 "tl_unitdata:provider in invalid state"));
5151 tl_uderr(wq, mp, EPROTO);
5152 return;
5155 ASSERT(peer_tep->te_rq != NULL);
5158 * Put it back if flow controlled except when we are closing.
5159 * Note: Messages already on queue when we are closing is bounded
5160 * so we can ignore flow control.
5162 if (!canputnext(peer_tep->te_rq) && !(tep->te_closing)) {
5163 /* record what we are flow controlled on */
5164 if (tep->te_flowq != NULL) {
5165 list_remove(&tep->te_flowq->te_flowlist, tep);
5167 list_insert_head(&peer_tep->te_flowlist, tep);
5168 tep->te_flowq = peer_tep;
5169 TL_PUTBQ(tep, mp);
5170 return;
5173 * prepare indication message
5177 * calculate length of message
5179 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5180 cr = msg_getcred(mp, &cpid);
5181 ASSERT(cr != NULL);
5183 if (peer_tep->te_flag & TL_SETCRED) {
5184 ASSERT(olen == 0);
5185 olen = (t_scalar_t)sizeof (struct opthdr) +
5186 OPTLEN(sizeof (tl_credopt_t));
5187 /* 1 option only */
5188 } else if (peer_tep->te_flag & TL_SETUCRED) {
5189 ASSERT(olen == 0);
5190 olen = (t_scalar_t)sizeof (struct opthdr) +
5191 OPTLEN(ucredminsize(cr));
5192 /* 1 option only */
5193 } else {
5194 /* Possibly more than one option */
5195 olen += (t_scalar_t)sizeof (struct T_opthdr) +
5196 OPTLEN(ucredminsize(cr));
5200 ui_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + tep->te_alen) + olen;
5201 reuse_mb_sz = T_ALIGN(sizeof (struct T_unitdata_ind) + alen) + olen;
5204 * If the unitdata_ind fits and we are not adding options
5205 * reuse the udreq mblk.
5207 * Otherwise, it is possible we need to append an option if one of the
5208 * te_flag bits is set. This requires extra space in the data block for
5209 * the additional option but the traditional technique used below to
5210 * allocate a new block and copy into it will not work when there is a
5211 * message block with a free pointer (since we don't know anything
5212 * about the layout of the data, pointers referencing or within the
5213 * data, etc.). To handle this possibility the upper layers may have
5214 * preallocated some space to use for appending an option. We check the
5215 * overall mblock size against the size we need ('reuse_mb_sz' with the
5216 * original address length [alen] to ensure we won't overrun the
5217 * current mblk data size) to see if there is free space and thus
5218 * avoid allocating a new message block.
5220 if (msz >= ui_sz && alen >= tep->te_alen &&
5221 !(peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED))) {
5223 * Reuse the original mblk. Leave options in place.
5225 udind = (struct T_unitdata_ind *)mp->b_rptr;
5226 udind->PRIM_type = T_UNITDATA_IND;
5227 udind->SRC_length = tep->te_alen;
5228 addr_startp = mp->b_rptr + udind->SRC_offset;
5229 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5231 } else if (MBLKSIZE(mp) >= reuse_mb_sz && alen >= tep->te_alen &&
5232 mp->b_datap->db_frtnp != NULL) {
5234 * We have a message block with a free pointer, but extra space
5235 * has been pre-allocated for us in case we need to append an
5236 * option. Reuse the original mblk, leaving existing options in
5237 * place.
5239 udind = (struct T_unitdata_ind *)mp->b_rptr;
5240 udind->PRIM_type = T_UNITDATA_IND;
5241 udind->SRC_length = tep->te_alen;
5242 addr_startp = mp->b_rptr + udind->SRC_offset;
5243 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5245 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5246 ASSERT(cr != NULL);
5248 * We're appending one new option here after the
5249 * original ones.
5251 tl_fill_option(mp->b_rptr + udind->OPT_offset + oldolen,
5252 cr, cpid, peer_tep->te_flag, peer_tep->te_credp);
5255 } else if (mp->b_datap->db_frtnp != NULL) {
5257 * The next block creates a new mp and tries to copy the data
5258 * block into it, but that cannot handle a message with a free
5259 * pointer (for more details see the comment in kstrputmsg()
5260 * where dupmsg() is called). Since we can never properly
5261 * duplicate the mp while also extending the data, just error
5262 * out now.
5264 tl_uderr(wq, mp, EPROTO);
5265 return;
5266 } else {
5267 /* Allocate a new T_unitdata_ind message */
5268 mblk_t *ui_mp;
5270 ui_mp = allocb(ui_sz, BPRI_MED);
5271 if (! ui_mp) {
5272 (void) (STRLOG(TL_ID, tep->te_minor, 4, SL_TRACE,
5273 "tl_unitdata:allocb failure:message queued"));
5274 tl_memrecover(wq, mp, ui_sz);
5275 return;
5279 * fill in T_UNITDATA_IND contents
5281 DB_TYPE(ui_mp) = M_PROTO;
5282 ui_mp->b_wptr = ui_mp->b_rptr + ui_sz;
5283 udind = (struct T_unitdata_ind *)ui_mp->b_rptr;
5284 udind->PRIM_type = T_UNITDATA_IND;
5285 udind->SRC_offset = (t_scalar_t)sizeof (struct T_unitdata_ind);
5286 udind->SRC_length = tep->te_alen;
5287 addr_startp = ui_mp->b_rptr + udind->SRC_offset;
5288 bcopy(tep->te_abuf, addr_startp, tep->te_alen);
5289 udind->OPT_offset =
5290 (t_scalar_t)T_ALIGN(udind->SRC_offset + udind->SRC_length);
5291 udind->OPT_length = olen;
5292 if (peer_tep->te_flag & (TL_SETCRED|TL_SETUCRED|TL_SOCKUCRED)) {
5294 if (oldolen != 0) {
5295 bcopy((void *)((uintptr_t)udreq + ooff),
5296 (void *)((uintptr_t)udind +
5297 udind->OPT_offset),
5298 oldolen);
5300 ASSERT(cr != NULL);
5302 tl_fill_option(ui_mp->b_rptr + udind->OPT_offset +
5303 oldolen, cr, cpid,
5304 peer_tep->te_flag, peer_tep->te_credp);
5305 } else {
5306 bcopy((void *)((uintptr_t)udreq + ooff),
5307 (void *)((uintptr_t)udind + udind->OPT_offset),
5308 olen);
5312 * relink data blocks from mp to ui_mp
5314 ui_mp->b_cont = mp->b_cont;
5315 freeb(mp);
5316 mp = ui_mp;
5319 * send indication message
5321 peer_tep->te_state = NEXTSTATE(TE_UNITDATA_IND, peer_tep->te_state);
5322 putnext(peer_tep->te_rq, mp);
5328 * Check if a given addr is in use.
5329 * Endpoint ptr returned or NULL if not found.
5330 * The name space is separate for each mode. This implies that
5331 * sockets get their own name space.
5333 static tl_endpt_t *
5334 tl_find_peer(tl_endpt_t *tep, tl_addr_t *ap)
5336 tl_endpt_t *peer_tep = NULL;
5337 int rc = mod_hash_find_cb(tep->te_addrhash, (mod_hash_key_t)ap,
5338 (mod_hash_val_t *)&peer_tep, tl_find_callback);
5340 ASSERT(! IS_SOCKET(tep));
5342 ASSERT(ap != NULL && ap->ta_alen > 0);
5343 ASSERT(ap->ta_zoneid == tep->te_zoneid);
5344 ASSERT(ap->ta_abuf != NULL);
5345 EQUIV(rc == 0, peer_tep != NULL);
5346 IMPLY(rc == 0,
5347 (tep->te_zoneid == peer_tep->te_zoneid) &&
5348 (tep->te_transport == peer_tep->te_transport));
5350 if ((rc == 0) && (peer_tep->te_closing)) {
5351 tl_refrele(peer_tep);
5352 peer_tep = NULL;
5355 return (peer_tep);
5359 * Find peer for a socket based on unix domain address.
5360 * For implicit addresses our peer can be found by minor number in ai hash. For
5361 * explicit binds we look vnode address at addr_hash.
5363 static tl_endpt_t *
5364 tl_sock_find_peer(tl_endpt_t *tep, soux_addr_t *ux_addr)
5366 tl_endpt_t *peer_tep = NULL;
5367 mod_hash_t *hash = ux_addr->soua_magic == SOU_MAGIC_IMPLICIT ?
5368 tep->te_aihash : tep->te_addrhash;
5369 int rc = mod_hash_find_cb(hash, (mod_hash_key_t)ux_addr->soua_vp,
5370 (mod_hash_val_t *)&peer_tep, tl_find_callback);
5372 ASSERT(IS_SOCKET(tep));
5373 EQUIV(rc == 0, peer_tep != NULL);
5374 IMPLY(rc == 0, (tep->te_transport == peer_tep->te_transport));
5376 if (peer_tep != NULL) {
5377 /* Don't attempt to use closing peer. */
5378 if (peer_tep->te_closing)
5379 goto errout;
5382 return (peer_tep);
5384 errout:
5385 tl_refrele(peer_tep);
5386 return (NULL);
5390 * Generate a free addr and return it in struct pointed by ap
5391 * but allocating space for address buffer.
5392 * The generated address will be at least 4 bytes long and, if req->ta_alen
5393 * exceeds 4 bytes, be req->ta_alen bytes long.
5395 * If address is found it will be inserted in the hash.
5397 * If req->ta_alen is larger than the default alen (4 bytes) the last
5398 * alen-4 bytes will always be the same as in req.
5400 * Return 0 for failure.
5401 * Return non-zero for success.
5403 static boolean_t
5404 tl_get_any_addr(tl_endpt_t *tep, tl_addr_t *req)
5406 t_scalar_t alen;
5407 uint32_t loopcnt; /* Limit loop to 2^32 */
5409 ASSERT(tep->te_hash_hndl != NULL);
5410 ASSERT(! IS_SOCKET(tep));
5412 if (tep->te_hash_hndl == NULL)
5413 return (B_FALSE);
5416 * check if default addr is in use
5417 * if it is - bump it and try again
5419 if (req == NULL) {
5420 alen = sizeof (uint32_t);
5421 } else {
5422 alen = max(req->ta_alen, sizeof (uint32_t));
5423 ASSERT(tep->te_zoneid == req->ta_zoneid);
5426 if (tep->te_alen < alen) {
5427 void *abuf = kmem_zalloc((size_t)alen, KM_NOSLEEP);
5430 * Not enough space in tep->ta_ap to hold the address,
5431 * allocate a bigger space.
5433 if (abuf == NULL)
5434 return (B_FALSE);
5436 if (tep->te_alen > 0)
5437 kmem_free(tep->te_abuf, tep->te_alen);
5439 tep->te_alen = alen;
5440 tep->te_abuf = abuf;
5443 /* Copy in the address in req */
5444 if (req != NULL) {
5445 ASSERT(alen >= req->ta_alen);
5446 bcopy(req->ta_abuf, tep->te_abuf, (size_t)req->ta_alen);
5450 * First try minor number then try default addresses.
5452 bcopy(&tep->te_minor, tep->te_abuf, sizeof (uint32_t));
5454 for (loopcnt = 0; loopcnt < UINT32_MAX; loopcnt++) {
5455 if (mod_hash_insert_reserve(tep->te_addrhash,
5456 (mod_hash_key_t)&tep->te_ap, (mod_hash_val_t)tep,
5457 tep->te_hash_hndl) == 0) {
5459 * found free address
5461 tep->te_flag |= TL_ADDRHASHED;
5462 tep->te_hash_hndl = NULL;
5464 return (B_TRUE); /* successful return */
5467 * Use default address.
5469 bcopy(&tep->te_defaddr, tep->te_abuf, sizeof (uint32_t));
5470 atomic_inc_32(&tep->te_defaddr);
5474 * Failed to find anything.
5476 (void) (STRLOG(TL_ID, -1, 1, SL_ERROR,
5477 "tl_get_any_addr:looped 2^32 times"));
5478 return (B_FALSE);
5482 * reallocb + set r/w ptrs to reflect size.
5484 static mblk_t *
5485 tl_resizemp(mblk_t *mp, ssize_t new_size)
5487 if ((mp = reallocb(mp, new_size, 0)) == NULL)
5488 return (NULL);
5490 mp->b_rptr = DB_BASE(mp);
5491 mp->b_wptr = mp->b_rptr + new_size;
5492 return (mp);
5495 static void
5496 tl_cl_backenable(tl_endpt_t *tep)
5498 list_t *l = &tep->te_flowlist;
5499 tl_endpt_t *elp;
5501 ASSERT(IS_CLTS(tep));
5503 for (elp = list_head(l); elp != NULL; elp = list_head(l)) {
5504 ASSERT(tep->te_ser == elp->te_ser);
5505 ASSERT(elp->te_flowq == tep);
5506 if (! elp->te_closing)
5507 TL_QENABLE(elp);
5508 elp->te_flowq = NULL;
5509 list_remove(l, elp);
5514 * Unconnect endpoints.
5516 static void
5517 tl_co_unconnect(tl_endpt_t *tep)
5519 tl_endpt_t *peer_tep = tep->te_conp;
5520 tl_endpt_t *srv_tep = tep->te_oconp;
5521 list_t *l;
5522 tl_icon_t *tip;
5523 tl_endpt_t *cl_tep;
5524 mblk_t *d_mp;
5526 ASSERT(IS_COTS(tep));
5528 * If our peer is closing, don't use it.
5530 if ((peer_tep != NULL) && peer_tep->te_closing) {
5531 TL_UNCONNECT(tep->te_conp);
5532 peer_tep = NULL;
5534 if ((srv_tep != NULL) && srv_tep->te_closing) {
5535 TL_UNCONNECT(tep->te_oconp);
5536 srv_tep = NULL;
5539 if (tep->te_nicon > 0) {
5540 l = &tep->te_iconp;
5542 * If incoming requests pending, change state
5543 * of clients on disconnect ind event and send
5544 * discon_ind pdu to modules above them
5545 * for server: all clients get disconnect
5548 while (tep->te_nicon > 0) {
5549 tip = list_head(l);
5550 cl_tep = tip->ti_tep;
5552 if (cl_tep == NULL) {
5553 tl_freetip(tep, tip);
5554 continue;
5557 if (cl_tep->te_oconp != NULL) {
5558 ASSERT(cl_tep != cl_tep->te_oconp);
5559 TL_UNCONNECT(cl_tep->te_oconp);
5562 if (cl_tep->te_closing) {
5563 tl_freetip(tep, tip);
5564 continue;
5567 enableok(cl_tep->te_wq);
5568 TL_QENABLE(cl_tep);
5569 d_mp = tl_discon_ind_alloc(ECONNREFUSED, BADSEQNUM);
5570 if (d_mp != NULL) {
5571 cl_tep->te_state = TS_IDLE;
5572 putnext(cl_tep->te_rq, d_mp);
5573 } else {
5574 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5575 SL_TRACE|SL_ERROR,
5576 "tl_co_unconnect:icmng: "
5577 "allocb failure"));
5579 tl_freetip(tep, tip);
5581 } else if (srv_tep != NULL) {
5583 * If outgoing request pending, change state
5584 * of server on discon ind event
5587 if (IS_SOCKET(tep) && !tl_disable_early_connect &&
5588 IS_COTSORD(srv_tep) &&
5589 !tl_icon_hasprim(srv_tep, tep->te_seqno, T_ORDREL_IND)) {
5591 * Queue ordrel_ind for server to be picked up
5592 * when the connection is accepted.
5594 d_mp = tl_ordrel_ind_alloc();
5595 } else {
5597 * send discon_ind to server
5599 d_mp = tl_discon_ind_alloc(ECONNRESET, tep->te_seqno);
5601 if (d_mp == NULL) {
5602 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5603 SL_TRACE|SL_ERROR,
5604 "tl_co_unconnect:outgoing:allocb failure"));
5605 TL_UNCONNECT(tep->te_oconp);
5606 goto discon_peer;
5610 * If this is a socket the T_DISCON_IND is queued with
5611 * the T_CONN_IND. Otherwise the T_CONN_IND is removed
5612 * from the list of pending connections.
5613 * Note that when te_oconp is set the peer better have
5614 * a t_connind_t for the client.
5616 if (IS_SOCKET(tep) && !tl_disable_early_connect) {
5618 * Queue the disconnection message.
5620 tl_icon_queuemsg(srv_tep, tep->te_seqno, d_mp);
5621 } else {
5622 tip = tl_icon_find(srv_tep, tep->te_seqno);
5623 if (tip == NULL) {
5624 freemsg(d_mp);
5625 } else {
5626 ASSERT(tep == tip->ti_tep);
5627 ASSERT(tep->te_ser == srv_tep->te_ser);
5629 * Delete tip from the server list.
5631 if (srv_tep->te_nicon == 1) {
5632 srv_tep->te_state =
5633 NEXTSTATE(TE_DISCON_IND2,
5634 srv_tep->te_state);
5635 } else {
5636 srv_tep->te_state =
5637 NEXTSTATE(TE_DISCON_IND3,
5638 srv_tep->te_state);
5640 ASSERT(*(uint32_t *)(d_mp->b_rptr) ==
5641 T_DISCON_IND);
5642 putnext(srv_tep->te_rq, d_mp);
5643 tl_freetip(srv_tep, tip);
5645 TL_UNCONNECT(tep->te_oconp);
5646 srv_tep = NULL;
5648 } else if (peer_tep != NULL) {
5650 * unconnect existing connection
5651 * If connected, change state of peer on
5652 * discon ind event and send discon ind pdu
5653 * to module above it
5656 ASSERT(tep->te_ser == peer_tep->te_ser);
5657 if (IS_COTSORD(peer_tep) &&
5658 (peer_tep->te_state == TS_WIND_ORDREL ||
5659 peer_tep->te_state == TS_DATA_XFER)) {
5661 * send ordrel ind
5663 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE,
5664 "tl_co_unconnect:connected: ordrel_ind state %d->%d",
5665 peer_tep->te_state,
5666 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state)));
5667 d_mp = tl_ordrel_ind_alloc();
5668 if (! d_mp) {
5669 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5670 SL_TRACE|SL_ERROR,
5671 "tl_co_unconnect:connected:"
5672 "allocb failure"));
5674 * Continue with cleaning up peer as
5675 * this side may go away with the close
5677 TL_QENABLE(peer_tep);
5678 goto discon_peer;
5680 peer_tep->te_state =
5681 NEXTSTATE(TE_ORDREL_IND, peer_tep->te_state);
5683 putnext(peer_tep->te_rq, d_mp);
5685 * Handle flow control case. This will generate
5686 * a t_discon_ind message with reason 0 if there
5687 * is data queued on the write side.
5689 TL_QENABLE(peer_tep);
5690 } else if (IS_COTSORD(peer_tep) &&
5691 peer_tep->te_state == TS_WREQ_ORDREL) {
5693 * Sent an ordrel_ind. We send a discon with
5694 * with error 0 to inform that the peer is gone.
5696 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5697 SL_TRACE|SL_ERROR,
5698 "tl_co_unconnect: discon in state %d",
5699 tep->te_state));
5700 tl_discon_ind(peer_tep, 0);
5701 } else {
5702 (void) (STRLOG(TL_ID, tep->te_minor, 3,
5703 SL_TRACE|SL_ERROR,
5704 "tl_co_unconnect: state %d", tep->te_state));
5705 tl_discon_ind(peer_tep, ECONNRESET);
5708 discon_peer:
5710 * Disconnect cross-pointers only for close
5712 if (tep->te_closing) {
5713 peer_tep = tep->te_conp;
5714 TL_REMOVE_PEER(peer_tep->te_conp);
5715 TL_REMOVE_PEER(tep->te_conp);
5721 * Note: The following routine does not recover from allocb()
5722 * failures
5723 * The reason should be from the <sys/errno.h> space.
5725 static void
5726 tl_discon_ind(tl_endpt_t *tep, uint32_t reason)
5728 mblk_t *d_mp;
5730 if (tep->te_closing)
5731 return;
5734 * flush the queues.
5736 flushq(tep->te_rq, FLUSHDATA);
5737 (void) putnextctl1(tep->te_rq, M_FLUSH, FLUSHRW);
5740 * send discon ind
5742 d_mp = tl_discon_ind_alloc(reason, tep->te_seqno);
5743 if (! d_mp) {
5744 (void) (STRLOG(TL_ID, tep->te_minor, 3, SL_TRACE|SL_ERROR,
5745 "tl_discon_ind:allocb failure"));
5746 return;
5748 tep->te_state = TS_IDLE;
5749 putnext(tep->te_rq, d_mp);
5753 * Note: The following routine does not recover from allocb()
5754 * failures
5755 * The reason should be from the <sys/errno.h> space.
5757 static mblk_t *
5758 tl_discon_ind_alloc(uint32_t reason, t_scalar_t seqnum)
5760 mblk_t *mp;
5761 struct T_discon_ind *tdi;
5763 if (mp = allocb(sizeof (struct T_discon_ind), BPRI_MED)) {
5764 DB_TYPE(mp) = M_PROTO;
5765 mp->b_wptr = mp->b_rptr + sizeof (struct T_discon_ind);
5766 tdi = (struct T_discon_ind *)mp->b_rptr;
5767 tdi->PRIM_type = T_DISCON_IND;
5768 tdi->DISCON_reason = reason;
5769 tdi->SEQ_number = seqnum;
5771 return (mp);
5776 * Note: The following routine does not recover from allocb()
5777 * failures
5779 static mblk_t *
5780 tl_ordrel_ind_alloc(void)
5782 mblk_t *mp;
5783 struct T_ordrel_ind *toi;
5785 if (mp = allocb(sizeof (struct T_ordrel_ind), BPRI_MED)) {
5786 DB_TYPE(mp) = M_PROTO;
5787 mp->b_wptr = mp->b_rptr + sizeof (struct T_ordrel_ind);
5788 toi = (struct T_ordrel_ind *)mp->b_rptr;
5789 toi->PRIM_type = T_ORDREL_IND;
5791 return (mp);
5796 * Lookup the seqno in the list of queued connections.
5798 static tl_icon_t *
5799 tl_icon_find(tl_endpt_t *tep, t_scalar_t seqno)
5801 list_t *l = &tep->te_iconp;
5802 tl_icon_t *tip = list_head(l);
5804 ASSERT(seqno != 0);
5806 for (; tip != NULL && (tip->ti_seqno != seqno); tip = list_next(l, tip))
5809 return (tip);
5813 * Queue data for a given T_CONN_IND while verifying that redundant
5814 * messages, such as a T_ORDREL_IND after a T_DISCON_IND, are not queued.
5815 * Used when the originator of the connection closes.
5817 static void
5818 tl_icon_queuemsg(tl_endpt_t *tep, t_scalar_t seqno, mblk_t *nmp)
5820 tl_icon_t *tip;
5821 mblk_t **mpp, *mp;
5822 int prim, nprim;
5824 if (nmp->b_datap->db_type == M_PROTO)
5825 nprim = ((union T_primitives *)nmp->b_rptr)->type;
5826 else
5827 nprim = -1; /* M_DATA */
5829 tip = tl_icon_find(tep, seqno);
5830 if (tip == NULL) {
5831 freemsg(nmp);
5832 return;
5835 ASSERT(tip->ti_seqno != 0);
5836 mpp = &tip->ti_mp;
5837 while (*mpp != NULL) {
5838 mp = *mpp;
5840 if (mp->b_datap->db_type == M_PROTO)
5841 prim = ((union T_primitives *)mp->b_rptr)->type;
5842 else
5843 prim = -1; /* M_DATA */
5846 * Allow nothing after a T_DISCON_IND
5848 if (prim == T_DISCON_IND) {
5849 freemsg(nmp);
5850 return;
5853 * Only allow a T_DISCON_IND after an T_ORDREL_IND
5855 if (prim == T_ORDREL_IND && nprim != T_DISCON_IND) {
5856 freemsg(nmp);
5857 return;
5859 mpp = &(mp->b_next);
5861 *mpp = nmp;
5865 * Verify if a certain TPI primitive exists on the connind queue.
5866 * Use prim -1 for M_DATA.
5867 * Return non-zero if found.
5869 static boolean_t
5870 tl_icon_hasprim(tl_endpt_t *tep, t_scalar_t seqno, t_scalar_t prim)
5872 tl_icon_t *tip = tl_icon_find(tep, seqno);
5873 boolean_t found = B_FALSE;
5875 if (tip != NULL) {
5876 mblk_t *mp;
5877 for (mp = tip->ti_mp; !found && mp != NULL; mp = mp->b_next) {
5878 found = (DB_TYPE(mp) == M_PROTO &&
5879 ((union T_primitives *)mp->b_rptr)->type == prim);
5882 return (found);
5886 * Send the b_next mblk chain that has accumulated before the connection
5887 * was accepted. Perform the necessary state transitions.
5889 static void
5890 tl_icon_sendmsgs(tl_endpt_t *tep, mblk_t **mpp)
5892 mblk_t *mp;
5893 union T_primitives *primp;
5895 if (tep->te_closing) {
5896 tl_icon_freemsgs(mpp);
5897 return;
5900 ASSERT(tep->te_state == TS_DATA_XFER);
5901 ASSERT(tep->te_rq->q_first == NULL);
5903 while ((mp = *mpp) != NULL) {
5904 *mpp = mp->b_next;
5905 mp->b_next = NULL;
5907 ASSERT((DB_TYPE(mp) == M_DATA) || (DB_TYPE(mp) == M_PROTO));
5908 switch (DB_TYPE(mp)) {
5909 default:
5910 freemsg(mp);
5911 break;
5912 case M_DATA:
5913 putnext(tep->te_rq, mp);
5914 break;
5915 case M_PROTO:
5916 primp = (union T_primitives *)mp->b_rptr;
5917 switch (primp->type) {
5918 case T_UNITDATA_IND:
5919 case T_DATA_IND:
5920 case T_OPTDATA_IND:
5921 case T_EXDATA_IND:
5922 putnext(tep->te_rq, mp);
5923 break;
5924 case T_ORDREL_IND:
5925 tep->te_state = NEXTSTATE(TE_ORDREL_IND,
5926 tep->te_state);
5927 putnext(tep->te_rq, mp);
5928 break;
5929 case T_DISCON_IND:
5930 tep->te_state = TS_IDLE;
5931 putnext(tep->te_rq, mp);
5932 break;
5933 default:
5934 #ifdef DEBUG
5935 cmn_err(CE_PANIC,
5936 "tl_icon_sendmsgs: unknown primitive");
5937 #endif /* DEBUG */
5938 freemsg(mp);
5939 break;
5941 break;
5947 * Free the b_next mblk chain that has accumulated before the connection
5948 * was accepted.
5950 static void
5951 tl_icon_freemsgs(mblk_t **mpp)
5953 mblk_t *mp;
5955 while ((mp = *mpp) != NULL) {
5956 *mpp = mp->b_next;
5957 mp->b_next = NULL;
5958 freemsg(mp);
5963 * Send M_ERROR
5964 * Note: assumes caller ensured enough space in mp or enough
5965 * memory available. Does not attempt recovery from allocb()
5966 * failures
5969 static void
5970 tl_merror(queue_t *wq, mblk_t *mp, int error)
5972 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
5974 if (tep->te_closing) {
5975 freemsg(mp);
5976 return;
5979 (void) (STRLOG(TL_ID, tep->te_minor, 1,
5980 SL_TRACE|SL_ERROR,
5981 "tl_merror: tep=%p, err=%d", (void *)tep, error));
5984 * flush all messages on queue. we are shutting
5985 * the stream down on fatal error
5987 flushq(wq, FLUSHALL);
5988 if (IS_COTS(tep)) {
5989 /* connection oriented - unconnect endpoints */
5990 tl_co_unconnect(tep);
5992 if (mp->b_cont) {
5993 freemsg(mp->b_cont);
5994 mp->b_cont = NULL;
5997 if ((MBLKSIZE(mp) < 1) || (DB_REF(mp) > 1)) {
5998 freemsg(mp);
5999 mp = allocb(1, BPRI_HI);
6000 if (!mp) {
6001 (void) (STRLOG(TL_ID, tep->te_minor, 1,
6002 SL_TRACE|SL_ERROR,
6003 "tl_merror:M_PROTO: out of memory"));
6004 return;
6007 if (mp) {
6008 DB_TYPE(mp) = M_ERROR;
6009 mp->b_rptr = DB_BASE(mp);
6010 *mp->b_rptr = (char)error;
6011 mp->b_wptr = mp->b_rptr + sizeof (char);
6012 qreply(wq, mp);
6013 } else {
6014 (void) putnextctl1(tep->te_rq, M_ERROR, error);
6018 static void
6019 tl_fill_option(uchar_t *buf, cred_t *cr, pid_t cpid, int flag, cred_t *pcr)
6021 ASSERT(cr != NULL);
6023 if (flag & TL_SETCRED) {
6024 struct opthdr *opt = (struct opthdr *)buf;
6025 tl_credopt_t *tlcred;
6027 opt->level = TL_PROT_LEVEL;
6028 opt->name = TL_OPT_PEER_CRED;
6029 opt->len = (t_uscalar_t)OPTLEN(sizeof (tl_credopt_t));
6031 tlcred = (tl_credopt_t *)(opt + 1);
6032 tlcred->tc_uid = crgetuid(cr);
6033 tlcred->tc_gid = crgetgid(cr);
6034 tlcred->tc_ruid = crgetruid(cr);
6035 tlcred->tc_rgid = crgetrgid(cr);
6036 tlcred->tc_suid = crgetsuid(cr);
6037 tlcred->tc_sgid = crgetsgid(cr);
6038 tlcred->tc_ngroups = crgetngroups(cr);
6039 } else if (flag & TL_SETUCRED) {
6040 struct opthdr *opt = (struct opthdr *)buf;
6042 opt->level = TL_PROT_LEVEL;
6043 opt->name = TL_OPT_PEER_UCRED;
6044 opt->len = (t_uscalar_t)OPTLEN(ucredminsize(cr));
6046 (void) cred2ucred(cr, cpid, (void *)(opt + 1), pcr);
6047 } else {
6048 struct T_opthdr *topt = (struct T_opthdr *)buf;
6049 ASSERT(flag & TL_SOCKUCRED);
6051 topt->level = SOL_SOCKET;
6052 topt->name = SCM_UCRED;
6053 topt->len = ucredminsize(cr) + sizeof (*topt);
6054 topt->status = 0;
6055 (void) cred2ucred(cr, cpid, (void *)(topt + 1), pcr);
6059 /* ARGSUSED */
6060 static int
6061 tl_default_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6063 /* no default value processed in protocol specific code currently */
6064 return (-1);
6067 /* ARGSUSED */
6068 static int
6069 tl_get_opt(queue_t *wq, int level, int name, uchar_t *ptr)
6071 int len;
6072 tl_endpt_t *tep;
6073 int *valp;
6075 tep = (tl_endpt_t *)wq->q_ptr;
6077 len = 0;
6080 * Assumes: option level and name sanity check done elsewhere
6083 switch (level) {
6084 case SOL_SOCKET:
6085 if (! IS_SOCKET(tep))
6086 break;
6087 switch (name) {
6088 case SO_RECVUCRED:
6089 len = sizeof (int);
6090 valp = (int *)ptr;
6091 *valp = (tep->te_flag & TL_SOCKUCRED) != 0;
6092 break;
6093 default:
6094 break;
6096 break;
6097 case TL_PROT_LEVEL:
6098 switch (name) {
6099 case TL_OPT_PEER_CRED:
6100 case TL_OPT_PEER_UCRED:
6102 * option not supposed to retrieved directly
6103 * Only sent in T_CON_{IND,CON}, T_UNITDATA_IND
6104 * when some internal flags set by other options
6105 * Direct retrieval always designed to fail(ignored)
6106 * for this option.
6108 break;
6111 return (len);
6114 /* ARGSUSED */
6115 static int
6116 tl_set_opt(
6117 queue_t *wq,
6118 uint_t mgmt_flags,
6119 int level,
6120 int name,
6121 uint_t inlen,
6122 uchar_t *invalp,
6123 uint_t *outlenp,
6124 uchar_t *outvalp,
6125 void *thisdg_attrs,
6126 cred_t *cr)
6128 int error;
6129 tl_endpt_t *tep;
6131 tep = (tl_endpt_t *)wq->q_ptr;
6133 error = 0; /* NOERROR */
6136 * Assumes: option level and name sanity checks done elsewhere
6139 switch (level) {
6140 case SOL_SOCKET:
6141 if (! IS_SOCKET(tep)) {
6142 error = EINVAL;
6143 break;
6146 * TBD: fill in other AF_UNIX socket options and then stop
6147 * returning error.
6149 switch (name) {
6150 case SO_RECVUCRED:
6152 * We only support this for datagram sockets;
6153 * getpeerucred handles the connection oriented
6154 * transports.
6156 if (! IS_CLTS(tep)) {
6157 error = EINVAL;
6158 break;
6160 if (*(int *)invalp == 0)
6161 tep->te_flag &= ~TL_SOCKUCRED;
6162 else
6163 tep->te_flag |= TL_SOCKUCRED;
6164 break;
6165 default:
6166 error = EINVAL;
6167 break;
6169 break;
6170 case TL_PROT_LEVEL:
6171 switch (name) {
6172 case TL_OPT_PEER_CRED:
6173 case TL_OPT_PEER_UCRED:
6175 * option not supposed to be set directly
6176 * Its value in initialized for each endpoint at
6177 * driver open time.
6178 * Direct setting always designed to fail for this
6179 * option.
6181 (void) (STRLOG(TL_ID, tep->te_minor, 1,
6182 SL_TRACE|SL_ERROR,
6183 "tl_set_opt: option is not supported"));
6184 error = EPROTO;
6185 break;
6188 return (error);
6192 static void
6193 tl_timer(void *arg)
6195 queue_t *wq = arg;
6196 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6198 ASSERT(tep);
6200 tep->te_timoutid = 0;
6202 enableok(wq);
6204 * Note: can call wsrv directly here and save context switch
6205 * Consider change when qtimeout (not timeout) is active
6207 qenable(wq);
6210 static void
6211 tl_buffer(void *arg)
6213 queue_t *wq = arg;
6214 tl_endpt_t *tep = (tl_endpt_t *)wq->q_ptr;
6216 ASSERT(tep);
6218 tep->te_bufcid = 0;
6219 tep->te_nowsrv = B_FALSE;
6221 enableok(wq);
6223 * Note: can call wsrv directly here and save context switch
6224 * Consider change when qbufcall (not bufcall) is active
6226 qenable(wq);
6229 static void
6230 tl_memrecover(queue_t *wq, mblk_t *mp, size_t size)
6232 tl_endpt_t *tep;
6234 tep = (tl_endpt_t *)wq->q_ptr;
6236 if (tep->te_closing) {
6237 freemsg(mp);
6238 return;
6240 noenable(wq);
6242 (void) insq(wq, wq->q_first, mp);
6244 if (tep->te_bufcid || tep->te_timoutid) {
6245 (void) (STRLOG(TL_ID, tep->te_minor, 1, SL_TRACE|SL_ERROR,
6246 "tl_memrecover:recover %p pending", (void *)wq));
6247 return;
6250 if (!(tep->te_bufcid = qbufcall(wq, size, BPRI_MED, tl_buffer, wq))) {
6251 tep->te_timoutid = qtimeout(wq, tl_timer, wq,
6252 drv_usectohz(TL_BUFWAIT));
6256 static void
6257 tl_freetip(tl_endpt_t *tep, tl_icon_t *tip)
6259 ASSERT(tip->ti_seqno != 0);
6261 if (tip->ti_mp != NULL) {
6262 tl_icon_freemsgs(&tip->ti_mp);
6263 tip->ti_mp = NULL;
6265 if (tip->ti_tep != NULL) {
6266 tl_refrele(tip->ti_tep);
6267 tip->ti_tep = NULL;
6269 list_remove(&tep->te_iconp, tip);
6270 kmem_free(tip, sizeof (tl_icon_t));
6271 tep->te_nicon--;
6275 * Remove address from address hash.
6277 static void
6278 tl_addr_unbind(tl_endpt_t *tep)
6280 tl_endpt_t *elp;
6282 if (tep->te_flag & TL_ADDRHASHED) {
6283 if (IS_SOCKET(tep)) {
6284 (void) mod_hash_remove(tep->te_addrhash,
6285 (mod_hash_key_t)tep->te_vp,
6286 (mod_hash_val_t *)&elp);
6287 tep->te_vp = (void *)(uintptr_t)tep->te_minor;
6288 tep->te_magic = SOU_MAGIC_IMPLICIT;
6289 } else {
6290 (void) mod_hash_remove(tep->te_addrhash,
6291 (mod_hash_key_t)&tep->te_ap,
6292 (mod_hash_val_t *)&elp);
6293 (void) kmem_free(tep->te_abuf, tep->te_alen);
6294 tep->te_alen = -1;
6295 tep->te_abuf = NULL;
6297 tep->te_flag &= ~TL_ADDRHASHED;