7888 installboot: print version info of the file
[unleashed.git] / kernel / net / ip / rts.c
blobb87695017e357b1cc2b8e773980468ef4f1db2c9
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #include <sys/types.h>
27 #include <sys/stream.h>
28 #include <sys/strsubr.h>
29 #include <sys/stropts.h>
30 #include <sys/strsun.h>
31 #include <sys/strlog.h>
32 #define _SUN_TPI_VERSION 2
33 #include <sys/tihdr.h>
34 #include <sys/timod.h>
35 #include <sys/ddi.h>
36 #include <sys/sunddi.h>
37 #include <sys/cmn_err.h>
38 #include <sys/proc.h>
39 #include <sys/suntpi.h>
40 #include <sys/policy.h>
41 #include <sys/zone.h>
42 #include <sys/disp.h>
44 #include <sys/socket.h>
45 #include <sys/socketvar.h>
46 #include <netinet/in.h>
48 #include <inet/common.h>
49 #include <netinet/ip6.h>
50 #include <inet/ip.h>
51 #include <inet/ipclassifier.h>
52 #include <inet/proto_set.h>
53 #include <inet/nd.h>
54 #include <inet/optcom.h>
55 #include <netinet/ip_mroute.h>
56 #include <sys/isa_defs.h>
57 #include <net/route.h>
59 #include <inet/rts_impl.h>
60 #include <inet/ip_rts.h>
63 * This is a transport provider for routing sockets. Downstream messages are
64 * wrapped with a IP_IOCTL header, and ip_wput_ioctl calls the appropriate entry
65 * in the ip_ioctl_ftbl callout table to pass the routing socket data into IP.
66 * Upstream messages are generated for listeners of the routing socket as well
67 * as the message sender (unless they have turned off their end using
68 * SO_USELOOPBACK or shutdown(3n)). Upstream messages may also be generated
69 * asynchronously when:
71 * Interfaces are brought up or down.
72 * Addresses are assigned to interfaces.
73 * ICMP redirects are processed and a IRE_HOST/RTF_DYNAMIC is installed.
74 * No route is found while sending a packet.
76 * Since all we do is reformat the messages between routing socket and
77 * ioctl forms, no synchronization is necessary in this module; all
78 * the dirty work is done down in ip.
81 /* Default structure copied into T_INFO_ACK messages */
82 static struct T_info_ack rts_g_t_info_ack = {
83 T_INFO_ACK,
84 T_INFINITE, /* TSDU_size. Maximum size messages. */
85 T_INVALID, /* ETSDU_size. No expedited data. */
86 T_INVALID, /* CDATA_size. No connect data. */
87 T_INVALID, /* DDATA_size. No disconnect data. */
88 0, /* ADDR_size. */
89 0, /* OPT_size - not initialized here */
90 64 * 1024, /* TIDU_size. rts allows maximum size messages. */
91 T_COTS, /* SERV_type. rts supports connection oriented. */
92 TS_UNBND, /* CURRENT_state. This is set from rts_state. */
93 (XPG4_1) /* PROVIDER_flag */
97 * Table of ND variables supported by rts. These are loaded into rts_g_nd
98 * in rts_open.
99 * All of these are alterable, within the min/max values given, at run time.
101 static rtsparam_t lcl_param_arr[] = {
102 /* min max value name */
103 { 4096, 65536, 8192, "rts_xmit_hiwat"},
104 { 0, 65536, 1024, "rts_xmit_lowat"},
105 { 4096, 65536, 8192, "rts_recv_hiwat"},
106 { 65536, 1024*1024*1024, 256*1024, "rts_max_buf"},
108 #define rtss_xmit_hiwat rtss_params[0].rts_param_value
109 #define rtss_xmit_lowat rtss_params[1].rts_param_value
110 #define rtss_recv_hiwat rtss_params[2].rts_param_value
111 #define rtss_max_buf rtss_params[3].rts_param_value
113 static void rts_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
114 int sys_error);
115 static void rts_input(void *, mblk_t *, void *, ip_recv_attr_t *);
116 static void rts_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
117 static mblk_t *rts_ioctl_alloc(mblk_t *data);
118 static int rts_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr);
119 static boolean_t rts_param_register(IDP *ndp, rtsparam_t *rtspa, int cnt);
120 static int rts_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
121 cred_t *cr);
122 static void rts_rsrv(queue_t *q);
123 static void *rts_stack_init(netstackid_t stackid, netstack_t *ns);
124 static void rts_stack_fini(netstackid_t stackid, void *arg);
125 static void rts_wput(queue_t *q, mblk_t *mp);
126 static void rts_wput_iocdata(queue_t *q, mblk_t *mp);
127 static void rts_wput_other(queue_t *q, mblk_t *mp);
128 static int rts_wrw(queue_t *q, struiod_t *dp);
130 static int rts_stream_open(queue_t *q, dev_t *devp, int flag, int sflag,
131 cred_t *credp);
132 static conn_t *rts_open(int flag, cred_t *credp);
134 static int rts_stream_close(queue_t *q);
135 static int rts_close(sock_lower_handle_t proto_handle, int flags,
136 cred_t *cr);
138 static struct module_info rts_mod_info = {
139 129, "rts", 1, INFPSZ, 512, 128
142 static struct qinit rtsrinit = {
143 NULL, (pfi_t)rts_rsrv, rts_stream_open, rts_stream_close, NULL,
144 &rts_mod_info
147 static struct qinit rtswinit = {
148 (pfi_t)rts_wput, NULL, NULL, NULL, NULL, &rts_mod_info,
149 NULL, (pfi_t)rts_wrw, NULL, STRUIOT_STANDARD
152 struct streamtab rtsinfo = {
153 &rtsrinit, &rtswinit
157 * This routine allocates the necessary
158 * message blocks for IOCTL wrapping the
159 * user data.
161 static mblk_t *
162 rts_ioctl_alloc(mblk_t *data)
164 mblk_t *mp = NULL;
165 mblk_t *mp1 = NULL;
166 ipllc_t *ipllc;
167 struct iocblk *ioc;
169 mp = allocb_tmpl(sizeof (ipllc_t), data);
170 if (mp == NULL)
171 return (NULL);
172 mp1 = allocb_tmpl(sizeof (struct iocblk), data);
173 if (mp1 == NULL) {
174 freeb(mp);
175 return (NULL);
178 ipllc = (ipllc_t *)mp->b_rptr;
179 ipllc->ipllc_cmd = IP_IOC_RTS_REQUEST;
180 ipllc->ipllc_name_offset = 0;
181 ipllc->ipllc_name_length = 0;
182 mp->b_wptr += sizeof (ipllc_t);
183 mp->b_cont = data;
185 ioc = (struct iocblk *)mp1->b_rptr;
186 ioc->ioc_cmd = IP_IOCTL;
187 ioc->ioc_error = 0;
188 ioc->ioc_cr = NULL;
189 ioc->ioc_count = msgdsize(mp);
190 mp1->b_wptr += sizeof (struct iocblk);
191 mp1->b_datap->db_type = M_IOCTL;
192 mp1->b_cont = mp;
194 return (mp1);
198 * This routine closes rts stream, by disabling
199 * put/srv routines and freeing the this module
200 * internal datastructure.
202 static int
203 rts_common_close(queue_t *q, conn_t *connp)
206 ASSERT(connp != NULL && IPCL_IS_RTS(connp));
208 ip_rts_unregister(connp);
210 ip_quiesce_conn(connp);
212 if (!IPCL_IS_NONSTR(connp)) {
213 qprocsoff(q);
217 * Now we are truly single threaded on this stream, and can
218 * delete the things hanging off the connp, and finally the connp.
219 * We removed this connp from the fanout list, it cannot be
220 * accessed thru the fanouts, and we already waited for the
221 * conn_ref to drop to 0. We are already in close, so
222 * there cannot be any other thread from the top. qprocsoff
223 * has completed, and service has completed or won't run in
224 * future.
226 ASSERT(connp->conn_ref == 1);
228 if (!IPCL_IS_NONSTR(connp)) {
229 inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
230 } else {
231 ip_free_helper_stream(connp);
234 connp->conn_ref--;
235 ipcl_conn_destroy(connp);
236 return (0);
239 static int
240 rts_stream_close(queue_t *q)
242 conn_t *connp = Q_TO_CONN(q);
244 (void) rts_common_close(q, connp);
245 q->q_ptr = WR(q)->q_ptr = NULL;
246 return (0);
250 * This is the open routine for routing socket. It allocates
251 * rts_t structure for the stream and tells IP that it is a routing socket.
253 /* ARGSUSED */
254 static int
255 rts_stream_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
257 conn_t *connp;
258 dev_t conn_dev;
259 rts_t *rts;
261 /* If the stream is already open, return immediately. */
262 if (q->q_ptr != NULL)
263 return (0);
265 if (sflag == MODOPEN)
266 return (EINVAL);
269 * Since RTS is not used so heavily, allocating from the small
270 * arena should be sufficient.
272 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) {
273 return (EBUSY);
276 connp = rts_open(flag, credp);
277 ASSERT(connp != NULL);
279 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
281 rts = connp->conn_rts;
282 rw_enter(&rts->rts_rwlock, RW_WRITER);
283 connp->conn_dev = conn_dev;
284 connp->conn_minor_arena = ip_minor_arena_sa;
286 q->q_ptr = connp;
287 WR(q)->q_ptr = connp;
288 connp->conn_rq = q;
289 connp->conn_wq = WR(q);
291 WR(q)->q_hiwat = connp->conn_sndbuf;
292 WR(q)->q_lowat = connp->conn_sndlowat;
294 mutex_enter(&connp->conn_lock);
295 connp->conn_state_flags &= ~CONN_INCIPIENT;
296 mutex_exit(&connp->conn_lock);
297 rw_exit(&rts->rts_rwlock);
299 /* Indicate to IP that this is a routing socket client */
300 ip_rts_register(connp);
302 qprocson(q);
304 return (0);
307 /* ARGSUSED */
308 static conn_t *
309 rts_open(int flag, cred_t *credp)
311 netstack_t *ns;
312 rts_stack_t *rtss;
313 rts_t *rts;
314 conn_t *connp;
315 zoneid_t zoneid;
317 ns = netstack_find_by_cred(credp);
318 ASSERT(ns != NULL);
319 rtss = ns->netstack_rts;
320 ASSERT(rtss != NULL);
323 * For exclusive stacks we set the zoneid to zero
324 * to make RTS operate as if in the global zone.
326 if (ns->netstack_stackid != GLOBAL_NETSTACKID)
327 zoneid = GLOBAL_ZONEID;
328 else
329 zoneid = crgetzoneid(credp);
331 connp = ipcl_conn_create(IPCL_RTSCONN, KM_SLEEP, ns);
332 rts = connp->conn_rts;
335 * ipcl_conn_create did a netstack_hold. Undo the hold that was
336 * done by netstack_find_by_cred()
338 netstack_rele(ns);
340 rw_enter(&rts->rts_rwlock, RW_WRITER);
341 ASSERT(connp->conn_rts == rts);
342 ASSERT(rts->rts_connp == connp);
344 connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM;
345 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */
346 connp->conn_ixa->ixa_zoneid = zoneid;
347 connp->conn_zoneid = zoneid;
348 connp->conn_flow_cntrld = B_FALSE;
350 rts->rts_rtss = rtss;
352 connp->conn_rcvbuf = rtss->rtss_recv_hiwat;
353 connp->conn_sndbuf = rtss->rtss_xmit_hiwat;
354 connp->conn_sndlowat = rtss->rtss_xmit_lowat;
355 connp->conn_rcvlowat = rts_mod_info.mi_lowat;
357 connp->conn_family = PF_ROUTE;
358 connp->conn_so_type = SOCK_RAW;
359 /* SO_PROTOTYPE is always sent down by sockfs setting conn_proto */
361 connp->conn_recv = rts_input;
362 connp->conn_recvicmp = rts_icmp_input;
364 crhold(credp);
365 connp->conn_cred = credp;
366 connp->conn_cpid = curproc->p_pid;
367 /* Cache things in ixa without an extra refhold */
368 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
369 connp->conn_ixa->ixa_cred = connp->conn_cred;
370 connp->conn_ixa->ixa_cpid = connp->conn_cpid;
373 * rts sockets start out as bound and connected
374 * For streams based sockets, socket state is set to
375 * SS_ISBOUND | SS_ISCONNECTED in so_strinit.
377 rts->rts_state = TS_DATA_XFER;
378 rw_exit(&rts->rts_rwlock);
380 return (connp);
384 * This routine creates a T_ERROR_ACK message and passes it upstream.
386 static void
387 rts_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
389 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
390 qreply(q, mp);
394 * This routine creates a T_OK_ACK message and passes it upstream.
396 static void
397 rts_ok_ack(queue_t *q, mblk_t *mp)
399 if ((mp = mi_tpi_ok_ack_alloc(mp)) != NULL)
400 qreply(q, mp);
404 * This routine is called by rts_wput to handle T_UNBIND_REQ messages.
406 static void
407 rts_tpi_unbind(queue_t *q, mblk_t *mp)
409 conn_t *connp = Q_TO_CONN(q);
410 rts_t *rts = connp->conn_rts;
412 /* If a bind has not been done, we can't unbind. */
413 if (rts->rts_state != TS_IDLE) {
414 rts_err_ack(q, mp, TOUTSTATE, 0);
415 return;
417 rts->rts_state = TS_UNBND;
418 rts_ok_ack(q, mp);
422 * This routine is called to handle each
423 * O_T_BIND_REQ/T_BIND_REQ message passed to
424 * rts_wput. Note: This routine works with both
425 * O_T_BIND_REQ and T_BIND_REQ semantics.
427 static void
428 rts_tpi_bind(queue_t *q, mblk_t *mp)
430 conn_t *connp = Q_TO_CONN(q);
431 rts_t *rts = connp->conn_rts;
432 struct T_bind_req *tbr;
434 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
435 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
436 "rts_tpi_bind: bad data, %d", rts->rts_state);
437 rts_err_ack(q, mp, TBADADDR, 0);
438 return;
440 if (rts->rts_state != TS_UNBND) {
441 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
442 "rts_tpi_bind: bad state, %d", rts->rts_state);
443 rts_err_ack(q, mp, TOUTSTATE, 0);
444 return;
446 tbr = (struct T_bind_req *)mp->b_rptr;
447 if (tbr->ADDR_length != 0) {
448 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
449 "rts_tpi_bind: bad ADDR_length %d", tbr->ADDR_length);
450 rts_err_ack(q, mp, TBADADDR, 0);
451 return;
453 /* Generic request */
454 tbr->ADDR_offset = (t_scalar_t)sizeof (struct T_bind_req);
455 tbr->ADDR_length = 0;
456 tbr->PRIM_type = T_BIND_ACK;
457 mp->b_datap->db_type = M_PCPROTO;
458 rts->rts_state = TS_IDLE;
459 qreply(q, mp);
462 static void
463 rts_copy_info(struct T_info_ack *tap, rts_t *rts)
465 *tap = rts_g_t_info_ack;
466 tap->CURRENT_state = rts->rts_state;
467 tap->OPT_size = rts_max_optsize;
471 * This routine responds to T_CAPABILITY_REQ messages. It is called by
472 * rts_wput. Much of the T_CAPABILITY_ACK information is copied from
473 * rts_g_t_info_ack. The current state of the stream is copied from
474 * rts_state.
476 static void
477 rts_capability_req(queue_t *q, mblk_t *mp)
479 conn_t *connp = Q_TO_CONN(q);
480 rts_t *rts = connp->conn_rts;
481 t_uscalar_t cap_bits1;
482 struct T_capability_ack *tcap;
484 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
486 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
487 mp->b_datap->db_type, T_CAPABILITY_ACK);
488 if (mp == NULL)
489 return;
491 tcap = (struct T_capability_ack *)mp->b_rptr;
492 tcap->CAP_bits1 = 0;
494 if (cap_bits1 & TC1_INFO) {
495 rts_copy_info(&tcap->INFO_ack, rts);
496 tcap->CAP_bits1 |= TC1_INFO;
499 qreply(q, mp);
503 * This routine responds to T_INFO_REQ messages. It is called by rts_wput.
504 * Most of the T_INFO_ACK information is copied from rts_g_t_info_ack.
505 * The current state of the stream is copied from rts_state.
507 static void
508 rts_info_req(queue_t *q, mblk_t *mp)
510 conn_t *connp = Q_TO_CONN(q);
511 rts_t *rts = connp->conn_rts;
513 mp = tpi_ack_alloc(mp, sizeof (rts_g_t_info_ack), M_PCPROTO,
514 T_INFO_ACK);
515 if (mp == NULL)
516 return;
517 rts_copy_info((struct T_info_ack *)mp->b_rptr, rts);
518 qreply(q, mp);
522 * This routine gets default values of certain options whose default
523 * values are maintained by protcol specific code
525 /* ARGSUSED */
527 rts_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
529 /* no default value processed by protocol specific code currently */
530 return (-1);
534 static int
535 rts_opt_get(conn_t *connp, int level, int name, uchar_t *ptr)
537 rts_t *rts = connp->conn_rts;
538 conn_opt_arg_t coas;
539 int retval;
541 ASSERT(RW_READ_HELD(&rts->rts_rwlock));
543 switch (level) {
544 /* do this in conn_opt_get? */
545 case SOL_ROUTE:
546 switch (name) {
547 case RT_AWARE:
548 mutex_enter(&connp->conn_lock);
549 *(int *)ptr = connp->conn_rtaware;
550 mutex_exit(&connp->conn_lock);
551 return (0);
553 break;
555 coas.coa_connp = connp;
556 coas.coa_ixa = connp->conn_ixa;
557 coas.coa_ipp = &connp->conn_xmit_ipp;
558 mutex_enter(&connp->conn_lock);
559 retval = conn_opt_get(&coas, level, name, ptr);
560 mutex_exit(&connp->conn_lock);
561 return (retval);
564 /* ARGSUSED */
565 static int
566 rts_do_opt_set(conn_t *connp, int level, int name, uint_t inlen,
567 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr,
568 void *thisdg_attrs, boolean_t checkonly)
570 int *i1 = (int *)invalp;
571 rts_t *rts = connp->conn_rts;
572 rts_stack_t *rtss = rts->rts_rtss;
573 int error;
574 conn_opt_arg_t coas;
576 coas.coa_connp = connp;
577 coas.coa_ixa = connp->conn_ixa;
578 coas.coa_ipp = &connp->conn_xmit_ipp;
580 ASSERT(RW_WRITE_HELD(&rts->rts_rwlock));
583 * For rts, we should have no ancillary data sent down
584 * (rts_wput doesn't handle options).
586 ASSERT(thisdg_attrs == NULL);
589 * For fixed length options, no sanity check
590 * of passed in length is done. It is assumed *_optcom_req()
591 * routines do the right thing.
594 switch (level) {
595 case SOL_SOCKET:
596 switch (name) {
597 case SO_PROTOTYPE:
599 * Routing socket applications that call socket() with
600 * a third argument can filter which messages will be
601 * sent upstream thanks to sockfs. so_socket() sends
602 * down the SO_PROTOTYPE and rts_queue_input()
603 * implements the filtering.
605 if (*i1 != AF_INET && *i1 != AF_INET6) {
606 *outlenp = 0;
607 return (EPROTONOSUPPORT);
609 if (!checkonly)
610 connp->conn_proto = *i1;
611 *outlenp = inlen;
612 return (0);
615 * The following two items can be manipulated,
616 * but changing them should do nothing.
618 case SO_SNDBUF:
619 if (*i1 > rtss->rtss_max_buf) {
620 *outlenp = 0;
621 return (ENOBUFS);
623 break; /* goto sizeof (int) option return */
624 case SO_RCVBUF:
625 if (*i1 > rtss->rtss_max_buf) {
626 *outlenp = 0;
627 return (ENOBUFS);
629 break; /* goto sizeof (int) option return */
631 break;
632 case SOL_ROUTE:
633 switch (name) {
634 case RT_AWARE:
635 if (!checkonly) {
636 mutex_enter(&connp->conn_lock);
637 connp->conn_rtaware = *i1;
638 mutex_exit(&connp->conn_lock);
640 *outlenp = inlen;
641 return (0);
643 break;
645 /* Serialized setsockopt since we are D_MTQPAIR */
646 error = conn_opt_set(&coas, level, name, inlen, invalp,
647 checkonly, cr);
648 if (error != 0) {
649 *outlenp = 0;
650 return (error);
653 * Common case of return from an option that is sizeof (int)
655 if (invalp != outvalp) {
656 /* don't trust bcopy for identical src/dst */
657 (void) bcopy(invalp, outvalp, inlen);
659 *outlenp = (t_uscalar_t)sizeof (int);
660 return (0);
663 static int
664 rts_opt_set(conn_t *connp, uint_t optset_context, int level, int name,
665 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
666 void *thisdg_attrs, cred_t *cr)
668 boolean_t checkonly = B_FALSE;
670 if (optset_context) {
671 switch (optset_context) {
672 case SETFN_OPTCOM_CHECKONLY:
673 checkonly = B_TRUE;
675 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
676 * inlen != 0 implies value supplied and
677 * we have to "pretend" to set it.
678 * inlen == 0 implies that there is no value part
679 * in T_CHECK request and just validation
680 * done elsewhere should be enough, we just return here.
682 if (inlen == 0) {
683 *outlenp = 0;
684 return (0);
686 break;
687 case SETFN_OPTCOM_NEGOTIATE:
688 checkonly = B_FALSE;
689 break;
690 case SETFN_UD_NEGOTIATE:
691 case SETFN_CONN_NEGOTIATE:
692 checkonly = B_FALSE;
694 * Negotiating local and "association-related" options
695 * through T_UNITDATA_REQ or T_CONN_{REQ,CON}
696 * Not allowed in this module.
698 return (EINVAL);
699 default:
701 * We should never get here
703 *outlenp = 0;
704 return (EINVAL);
707 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
708 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
711 return (rts_do_opt_set(connp, level, name, inlen, invalp, outlenp,
712 outvalp, cr, thisdg_attrs, checkonly));
717 * This routine retrieves the current status of socket options.
718 * It returns the size of the option retrieved.
721 rts_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
723 rts_t *rts;
724 int err;
726 rts = Q_TO_RTS(q);
727 rw_enter(&rts->rts_rwlock, RW_READER);
728 err = rts_opt_get(Q_TO_CONN(q), level, name, ptr);
729 rw_exit(&rts->rts_rwlock);
730 return (err);
734 * This routine sets socket options.
736 /*ARGSUSED*/
738 rts_tpi_opt_set(queue_t *q, uint_t optset_context, int level,
739 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
740 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr)
742 conn_t *connp = Q_TO_CONN(q);
743 int error;
744 rts_t *rts = connp->conn_rts;
747 rw_enter(&rts->rts_rwlock, RW_WRITER);
748 error = rts_opt_set(connp, optset_context, level, name, inlen, invalp,
749 outlenp, outvalp, thisdg_attrs, cr);
750 rw_exit(&rts->rts_rwlock);
751 return (error);
755 * This routine retrieves the value of an ND variable in a rtsparam_t
756 * structure. It is called through nd_getset when a user reads the
757 * variable.
759 /* ARGSUSED */
760 static int
761 rts_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
763 rtsparam_t *rtspa = (rtsparam_t *)cp;
765 (void) mi_mpprintf(mp, "%u", rtspa->rts_param_value);
766 return (0);
770 * Walk through the param array specified registering each element with the
771 * named dispatch (ND) handler.
773 static boolean_t
774 rts_param_register(IDP *ndp, rtsparam_t *rtspa, int cnt)
776 for (; cnt-- > 0; rtspa++) {
777 if (rtspa->rts_param_name != NULL && rtspa->rts_param_name[0]) {
778 if (!nd_load(ndp, rtspa->rts_param_name,
779 rts_param_get, rts_param_set, (caddr_t)rtspa)) {
780 nd_free(ndp);
781 return (B_FALSE);
785 return (B_TRUE);
788 /* This routine sets an ND variable in a rtsparam_t structure. */
789 /* ARGSUSED */
790 static int
791 rts_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr)
793 ulong_t new_value;
794 rtsparam_t *rtspa = (rtsparam_t *)cp;
797 * Fail the request if the new value does not lie within the
798 * required bounds.
800 if (ddi_strtoul(value, NULL, 10, &new_value) != 0 ||
801 new_value < rtspa->rts_param_min ||
802 new_value > rtspa->rts_param_max) {
803 return (EINVAL);
806 /* Set the new value */
807 rtspa->rts_param_value = new_value;
808 return (0);
812 * Empty rsrv routine which is used by rts_input to cause a wakeup
813 * of a thread in qwait.
815 /*ARGSUSED*/
816 static void
817 rts_rsrv(queue_t *q)
822 * This routine handles synchronous messages passed downstream. It either
823 * consumes the message or passes it downstream; it never queues a
824 * a message. The data messages that go down are wrapped in an IOCTL
825 * message.
827 * Since it is synchronous, it waits for the M_IOCACK/M_IOCNAK so that
828 * it can return an immediate error (such as ENETUNREACH when adding a route).
829 * It uses the RTS_WRW_PENDING to ensure that each rts instance has only
830 * one M_IOCTL outstanding at any given time.
832 static int
833 rts_wrw(queue_t *q, struiod_t *dp)
835 mblk_t *mp = dp->d_mp;
836 mblk_t *mp1;
837 int error;
838 rt_msghdr_t *rtm;
839 conn_t *connp = Q_TO_CONN(q);
840 rts_t *rts = connp->conn_rts;
842 while (rts->rts_flag & RTS_WRW_PENDING) {
843 if (qwait_rw(q)) {
844 rts->rts_error = EINTR;
845 goto err_ret;
848 rts->rts_flag |= RTS_WRW_PENDING;
850 if (isuioq(q) && (error = struioget(q, mp, dp, 0))) {
852 * Uio error of some sort, so just return the error.
854 rts->rts_error = error;
855 goto err_ret;
858 * Pass the mblk (chain) onto wput().
860 dp->d_mp = 0;
862 switch (mp->b_datap->db_type) {
863 case M_PROTO:
864 case M_PCPROTO:
865 /* Expedite other than T_DATA_REQ to below the switch */
866 if (((mp->b_wptr - mp->b_rptr) !=
867 sizeof (struct T_data_req)) ||
868 (((union T_primitives *)mp->b_rptr)->type != T_DATA_REQ))
869 break;
870 if ((mp1 = mp->b_cont) == NULL) {
871 rts->rts_error = EINVAL;
872 freemsg(mp);
873 goto err_ret;
875 freeb(mp);
876 mp = mp1;
877 /* FALLTHRU */
878 case M_DATA:
880 * The semantics of the routing socket is such that the rtm_pid
881 * field is automatically filled in during requests with the
882 * current process' pid. We do this here (where we still have
883 * user context) after checking we have at least a message the
884 * size of a routing message header.
886 if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) {
887 if (!pullupmsg(mp, sizeof (rt_msghdr_t))) {
888 rts->rts_error = EINVAL;
889 freemsg(mp);
890 goto err_ret;
893 rtm = (rt_msghdr_t *)mp->b_rptr;
894 rtm->rtm_pid = curproc->p_pid;
895 break;
896 default:
897 break;
899 rts->rts_flag |= RTS_WPUT_PENDING;
900 rts_wput(q, mp);
901 while (rts->rts_flag & RTS_WPUT_PENDING)
902 if (qwait_rw(q)) {
903 /* RTS_WPUT_PENDING will be cleared below */
904 rts->rts_error = EINTR;
905 break;
907 err_ret:
908 rts->rts_flag &= ~(RTS_WPUT_PENDING | RTS_WRW_PENDING);
909 return (rts->rts_error);
913 * This routine handles all messages passed downstream. It either
914 * consumes the message or passes it downstream; it never queues a
915 * a message. The data messages that go down are wrapped in an IOCTL
916 * message.
918 static void
919 rts_wput(queue_t *q, mblk_t *mp)
921 uchar_t *rptr = mp->b_rptr;
922 mblk_t *mp1;
923 conn_t *connp = Q_TO_CONN(q);
924 rts_t *rts = connp->conn_rts;
926 switch (mp->b_datap->db_type) {
927 case M_DATA:
928 break;
929 case M_PROTO:
930 case M_PCPROTO:
931 if ((mp->b_wptr - rptr) == sizeof (struct T_data_req)) {
932 /* Expedite valid T_DATA_REQ to below the switch */
933 if (((union T_primitives *)rptr)->type == T_DATA_REQ) {
934 mp1 = mp->b_cont;
935 freeb(mp);
936 if (mp1 == NULL)
937 return;
938 mp = mp1;
939 break;
942 /* FALLTHRU */
943 default:
944 rts_wput_other(q, mp);
945 return;
949 ASSERT(msg_getcred(mp, NULL) != NULL);
951 mp1 = rts_ioctl_alloc(mp);
952 if (mp1 == NULL) {
953 ASSERT(rts != NULL);
954 freemsg(mp);
955 if (rts->rts_flag & RTS_WPUT_PENDING) {
956 rts->rts_error = ENOMEM;
957 rts->rts_flag &= ~RTS_WPUT_PENDING;
959 return;
961 ip_wput_nondata(q, mp1);
966 * Handles all the control message, if it
967 * can not understand it, it will
968 * pass down stream.
970 static void
971 rts_wput_other(queue_t *q, mblk_t *mp)
973 conn_t *connp = Q_TO_CONN(q);
974 rts_t *rts = connp->conn_rts;
975 uchar_t *rptr = mp->b_rptr;
976 struct iocblk *iocp;
977 cred_t *cr;
978 rts_stack_t *rtss;
980 rtss = rts->rts_rtss;
982 switch (mp->b_datap->db_type) {
983 case M_PROTO:
984 case M_PCPROTO:
985 if ((mp->b_wptr - rptr) < sizeof (t_scalar_t)) {
987 * If the message does not contain a PRIM_type,
988 * throw it away.
990 freemsg(mp);
991 return;
993 switch (((union T_primitives *)rptr)->type) {
994 case T_BIND_REQ:
995 case O_T_BIND_REQ:
996 rts_tpi_bind(q, mp);
997 return;
998 case T_UNBIND_REQ:
999 rts_tpi_unbind(q, mp);
1000 return;
1001 case T_CAPABILITY_REQ:
1002 rts_capability_req(q, mp);
1003 return;
1004 case T_INFO_REQ:
1005 rts_info_req(q, mp);
1006 return;
1007 case T_SVR4_OPTMGMT_REQ:
1008 case T_OPTMGMT_REQ:
1010 * All Solaris components should pass a db_credp
1011 * for this TPI message, hence we ASSERT.
1012 * But in case there is some other M_PROTO that looks
1013 * like a TPI message sent by some other kernel
1014 * component, we check and return an error.
1016 cr = msg_getcred(mp, NULL);
1017 ASSERT(cr != NULL);
1018 if (cr == NULL) {
1019 rts_err_ack(q, mp, TSYSERR, EINVAL);
1020 return;
1022 if (((union T_primitives *)rptr)->type ==
1023 T_SVR4_OPTMGMT_REQ) {
1024 svr4_optcom_req(q, mp, cr, &rts_opt_obj);
1025 } else {
1026 tpi_optcom_req(q, mp, cr, &rts_opt_obj);
1028 return;
1029 case O_T_CONN_RES:
1030 case T_CONN_RES:
1031 case T_DISCON_REQ:
1032 /* Not supported by rts. */
1033 rts_err_ack(q, mp, TNOTSUPPORT, 0);
1034 return;
1035 case T_DATA_REQ:
1036 case T_EXDATA_REQ:
1037 case T_ORDREL_REQ:
1038 /* Illegal for rts. */
1039 freemsg(mp);
1040 (void) putnextctl1(RD(q), M_ERROR, EPROTO);
1041 return;
1043 default:
1044 break;
1046 break;
1047 case M_IOCTL:
1048 iocp = (struct iocblk *)mp->b_rptr;
1049 switch (iocp->ioc_cmd) {
1050 case ND_SET:
1051 case ND_GET:
1052 if (nd_getset(q, rtss->rtss_g_nd, mp)) {
1053 qreply(q, mp);
1054 return;
1056 break;
1057 case TI_GETPEERNAME:
1058 mi_copyin(q, mp, NULL,
1059 SIZEOF_STRUCT(strbuf, iocp->ioc_flag));
1060 return;
1061 default:
1062 break;
1064 case M_IOCDATA:
1065 rts_wput_iocdata(q, mp);
1066 return;
1067 default:
1068 break;
1070 ip_wput_nondata(q, mp);
1074 * Called by rts_wput_other to handle all M_IOCDATA messages.
1076 static void
1077 rts_wput_iocdata(queue_t *q, mblk_t *mp)
1079 struct sockaddr *rtsaddr;
1080 mblk_t *mp1;
1081 STRUCT_HANDLE(strbuf, sb);
1082 struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
1084 /* Make sure it is one of ours. */
1085 switch (iocp->ioc_cmd) {
1086 case TI_GETPEERNAME:
1087 break;
1088 default:
1089 ip_wput_nondata(q, mp);
1090 return;
1092 switch (mi_copy_state(q, mp, &mp1)) {
1093 case -1:
1094 return;
1095 case MI_COPY_CASE(MI_COPY_IN, 1):
1096 break;
1097 case MI_COPY_CASE(MI_COPY_OUT, 1):
1098 /* Copy out the strbuf. */
1099 mi_copyout(q, mp);
1100 return;
1101 case MI_COPY_CASE(MI_COPY_OUT, 2):
1102 /* All done. */
1103 mi_copy_done(q, mp, 0);
1104 return;
1105 default:
1106 mi_copy_done(q, mp, EPROTO);
1107 return;
1109 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr);
1110 if (STRUCT_FGET(sb, maxlen) < (int)sizeof (sin_t)) {
1111 mi_copy_done(q, mp, EINVAL);
1112 return;
1114 switch (iocp->ioc_cmd) {
1115 case TI_GETPEERNAME:
1116 break;
1117 default:
1118 mi_copy_done(q, mp, EPROTO);
1119 return;
1121 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), sizeof (sin_t),
1122 B_TRUE);
1123 if (mp1 == NULL)
1124 return;
1125 STRUCT_FSET(sb, len, (int)sizeof (sin_t));
1126 rtsaddr = (struct sockaddr *)mp1->b_rptr;
1127 mp1->b_wptr = (uchar_t *)&rtsaddr[1];
1128 bzero(rtsaddr, sizeof (struct sockaddr));
1129 rtsaddr->sa_family = AF_ROUTE;
1130 /* Copy out the address */
1131 mi_copyout(q, mp);
1135 * IP passes up a NULL ira.
1137 /*ARGSUSED2*/
1138 static void
1139 rts_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
1141 conn_t *connp = (conn_t *)arg1;
1142 rts_t *rts = connp->conn_rts;
1143 struct iocblk *iocp;
1144 mblk_t *mp1;
1145 struct T_data_ind *tdi;
1146 int error;
1148 switch (mp->b_datap->db_type) {
1149 case M_IOCACK:
1150 case M_IOCNAK:
1151 iocp = (struct iocblk *)mp->b_rptr;
1152 ASSERT(!IPCL_IS_NONSTR(connp));
1153 if (rts->rts_flag & (RTS_WPUT_PENDING)) {
1154 rts->rts_flag &= ~RTS_WPUT_PENDING;
1155 rts->rts_error = iocp->ioc_error;
1157 * Tell rts_wvw/qwait that we are done.
1158 * Note: there is no qwait_wakeup() we can use.
1160 qenable(connp->conn_rq);
1161 freemsg(mp);
1162 return;
1164 break;
1165 case M_DATA:
1167 * Prepend T_DATA_IND to prevent the stream head from
1168 * consolidating multiple messages together.
1169 * If the allocation fails just send up the M_DATA.
1171 mp1 = allocb(sizeof (*tdi), BPRI_MED);
1172 if (mp1 != NULL) {
1173 mp1->b_cont = mp;
1174 mp = mp1;
1176 mp->b_datap->db_type = M_PROTO;
1177 mp->b_wptr += sizeof (*tdi);
1178 tdi = (struct T_data_ind *)mp->b_rptr;
1179 tdi->PRIM_type = T_DATA_IND;
1180 tdi->MORE_flag = 0;
1182 break;
1183 default:
1184 break;
1187 if (IPCL_IS_NONSTR(connp)) {
1188 if ((*connp->conn_upcalls->su_recv)
1189 (connp->conn_upper_handle, mp, msgdsize(mp), 0,
1190 &error, NULL) < 0) {
1191 ASSERT(error == ENOSPC);
1193 * Let's confirm hoding the lock that
1194 * we are out of recv space.
1196 mutex_enter(&rts->rts_recv_mutex);
1197 if ((*connp->conn_upcalls->su_recv)
1198 (connp->conn_upper_handle, NULL, 0, 0,
1199 &error, NULL) < 0) {
1200 ASSERT(error == ENOSPC);
1201 connp->conn_flow_cntrld = B_TRUE;
1203 mutex_exit(&rts->rts_recv_mutex);
1205 } else {
1206 putnext(connp->conn_rq, mp);
1210 /*ARGSUSED*/
1211 static void
1212 rts_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
1214 freemsg(mp);
1217 void
1218 rts_ddi_g_init(void)
1220 rts_max_optsize = optcom_max_optsize(rts_opt_obj.odb_opt_des_arr,
1221 rts_opt_obj.odb_opt_arr_cnt);
1224 * We want to be informed each time a stack is created or
1225 * destroyed in the kernel, so we can maintain the
1226 * set of rts_stack_t's.
1228 netstack_register(NS_RTS, rts_stack_init, NULL, rts_stack_fini);
1231 void
1232 rts_ddi_g_destroy(void)
1234 netstack_unregister(NS_RTS);
1237 #define INET_NAME "ip"
1240 * Initialize the RTS stack instance.
1242 /* ARGSUSED */
1243 static void *
1244 rts_stack_init(netstackid_t stackid, netstack_t *ns)
1246 rts_stack_t *rtss;
1247 rtsparam_t *pa;
1248 int error = 0;
1249 major_t major;
1251 rtss = (rts_stack_t *)kmem_zalloc(sizeof (*rtss), KM_SLEEP);
1252 rtss->rtss_netstack = ns;
1254 pa = (rtsparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP);
1255 rtss->rtss_params = pa;
1256 bcopy(lcl_param_arr, rtss->rtss_params, sizeof (lcl_param_arr));
1258 (void) rts_param_register(&rtss->rtss_g_nd,
1259 rtss->rtss_params, A_CNT(lcl_param_arr));
1261 major = mod_name_to_major(INET_NAME);
1262 error = ldi_ident_from_major(major, &rtss->rtss_ldi_ident);
1263 ASSERT(error == 0);
1264 return (rtss);
1268 * Free the RTS stack instance.
1270 /* ARGSUSED */
1271 static void
1272 rts_stack_fini(netstackid_t stackid, void *arg)
1274 rts_stack_t *rtss = (rts_stack_t *)arg;
1276 nd_free(&rtss->rtss_g_nd);
1277 kmem_free(rtss->rtss_params, sizeof (lcl_param_arr));
1278 rtss->rtss_params = NULL;
1279 ldi_ident_release(rtss->rtss_ldi_ident);
1280 kmem_free(rtss, sizeof (*rtss));
1283 /* ARGSUSED */
1285 rts_accept(sock_lower_handle_t lproto_handle,
1286 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
1287 cred_t *cr)
1289 return (EINVAL);
1292 /* ARGSUSED */
1293 static int
1294 rts_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
1295 socklen_t len, cred_t *cr)
1298 * rebind not allowed
1300 return (EINVAL);
1303 /* ARGSUSED */
1305 rts_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
1307 return (EINVAL);
1310 /* ARGSUSED */
1312 rts_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
1313 socklen_t len, sock_connid_t *id, cred_t *cr)
1316 * rts sockets start out as bound and connected
1318 *id = 0;
1319 return (EISCONN);
1322 /* ARGSUSED */
1324 rts_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *addr,
1325 socklen_t *addrlen, cred_t *cr)
1327 bzero(addr, sizeof (struct sockaddr));
1328 addr->sa_family = AF_ROUTE;
1329 *addrlen = sizeof (struct sockaddr);
1331 return (0);
1334 /* ARGSUSED */
1336 rts_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *addr,
1337 socklen_t *addrlen, cred_t *cr)
1339 bzero(addr, sizeof (struct sockaddr));
1340 addr->sa_family = AF_ROUTE;
1341 *addrlen = sizeof (struct sockaddr);
1343 return (0);
1346 static int
1347 rts_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
1348 void *optvalp, socklen_t *optlen, cred_t *cr)
1350 conn_t *connp = (conn_t *)proto_handle;
1351 rts_t *rts = connp->conn_rts;
1352 int error;
1353 t_uscalar_t max_optbuf_len;
1354 void *optvalp_buf;
1355 int len;
1357 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
1358 rts_opt_obj.odb_opt_des_arr,
1359 rts_opt_obj.odb_opt_arr_cnt,
1360 B_FALSE, B_TRUE, cr);
1361 if (error != 0) {
1362 if (error < 0)
1363 error = proto_tlitosyserr(-error);
1364 return (error);
1367 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
1368 rw_enter(&rts->rts_rwlock, RW_READER);
1369 len = rts_opt_get(connp, level, option_name, optvalp_buf);
1370 rw_exit(&rts->rts_rwlock);
1371 if (len == -1) {
1372 kmem_free(optvalp_buf, max_optbuf_len);
1373 return (EINVAL);
1377 * update optlen and copy option value
1379 t_uscalar_t size = MIN(len, *optlen);
1381 bcopy(optvalp_buf, optvalp, size);
1382 bcopy(&size, optlen, sizeof (size));
1383 kmem_free(optvalp_buf, max_optbuf_len);
1384 return (0);
1387 static int
1388 rts_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
1389 const void *optvalp, socklen_t optlen, cred_t *cr)
1391 conn_t *connp = (conn_t *)proto_handle;
1392 rts_t *rts = connp->conn_rts;
1393 int error;
1395 error = proto_opt_check(level, option_name, optlen, NULL,
1396 rts_opt_obj.odb_opt_des_arr,
1397 rts_opt_obj.odb_opt_arr_cnt,
1398 B_TRUE, B_FALSE, cr);
1400 if (error != 0) {
1401 if (error < 0)
1402 error = proto_tlitosyserr(-error);
1403 return (error);
1406 rw_enter(&rts->rts_rwlock, RW_WRITER);
1407 error = rts_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name,
1408 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp,
1409 NULL, cr);
1410 rw_exit(&rts->rts_rwlock);
1412 ASSERT(error >= 0);
1414 return (error);
1417 /* ARGSUSED */
1418 static int
1419 rts_send(sock_lower_handle_t proto_handle, mblk_t *mp,
1420 struct msghdr *msg, cred_t *cr)
1422 conn_t *connp = (conn_t *)proto_handle;
1423 rt_msghdr_t *rtm;
1424 int error;
1426 ASSERT(DB_TYPE(mp) == M_DATA);
1428 * The semantics of the routing socket is such that the rtm_pid
1429 * field is automatically filled in during requests with the
1430 * current process' pid. We do this here (where we still have
1431 * user context) after checking we have at least a message the
1432 * size of a routing message header.
1434 if ((mp->b_wptr - mp->b_rptr) < sizeof (rt_msghdr_t)) {
1435 if (!pullupmsg(mp, sizeof (rt_msghdr_t))) {
1436 freemsg(mp);
1437 return (EINVAL);
1440 rtm = (rt_msghdr_t *)mp->b_rptr;
1441 rtm->rtm_pid = curproc->p_pid;
1444 * We are not constrained by the ioctl interface and
1445 * ip_rts_request_common processing requests synchronously hence
1446 * we can send them down concurrently.
1448 error = ip_rts_request_common(mp, connp, cr);
1449 return (error);
1452 /* ARGSUSED */
1453 sock_lower_handle_t
1454 rts_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
1455 uint_t *smodep, int *errorp, int flags, cred_t *credp)
1457 conn_t *connp;
1459 if (family != AF_ROUTE || type != SOCK_RAW ||
1460 (proto != 0 && proto != AF_INET && proto != AF_INET6)) {
1461 *errorp = EPROTONOSUPPORT;
1462 return (NULL);
1465 connp = rts_open(flags, credp);
1466 ASSERT(connp != NULL);
1467 connp->conn_flags |= IPCL_NONSTR;
1469 connp->conn_proto = proto;
1471 mutex_enter(&connp->conn_lock);
1472 connp->conn_state_flags &= ~CONN_INCIPIENT;
1473 mutex_exit(&connp->conn_lock);
1475 *errorp = 0;
1476 *smodep = SM_ATOMIC;
1477 *sock_downcalls = &sock_rts_downcalls;
1478 return ((sock_lower_handle_t)connp);
1481 /* ARGSUSED */
1482 void
1483 rts_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
1484 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr)
1486 conn_t *connp = (conn_t *)proto_handle;
1487 struct sock_proto_props sopp;
1489 connp->conn_upcalls = sock_upcalls;
1490 connp->conn_upper_handle = sock_handle;
1492 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT |
1493 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ;
1494 sopp.sopp_wroff = 0;
1495 sopp.sopp_rxhiwat = connp->conn_rcvbuf;
1496 sopp.sopp_rxlowat = connp->conn_rcvlowat;
1497 sopp.sopp_maxblk = INFPSZ;
1498 sopp.sopp_maxpsz = rts_mod_info.mi_maxpsz;
1499 sopp.sopp_minpsz = (rts_mod_info.mi_minpsz == 1) ? 0 :
1500 rts_mod_info.mi_minpsz;
1502 (*connp->conn_upcalls->su_set_proto_props)
1503 (connp->conn_upper_handle, &sopp);
1506 * We treat it as already connected for routing socket.
1508 (*connp->conn_upcalls->su_connected)
1509 (connp->conn_upper_handle, 0, NULL, -1);
1511 /* Indicate to IP that this is a routing socket client */
1512 ip_rts_register(connp);
1515 /* ARGSUSED */
1517 rts_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
1519 conn_t *connp = (conn_t *)proto_handle;
1521 ASSERT(connp != NULL && IPCL_IS_RTS(connp));
1522 return (rts_common_close(NULL, connp));
1525 /* ARGSUSED */
1527 rts_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
1529 conn_t *connp = (conn_t *)proto_handle;
1531 /* shut down the send side */
1532 if (how != SHUT_RD)
1533 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
1534 SOCK_OPCTL_SHUT_SEND, 0);
1535 /* shut down the recv side */
1536 if (how != SHUT_WR)
1537 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
1538 SOCK_OPCTL_SHUT_RECV, 0);
1539 return (0);
1542 void
1543 rts_clr_flowctrl(sock_lower_handle_t proto_handle)
1545 conn_t *connp = (conn_t *)proto_handle;
1546 rts_t *rts = connp->conn_rts;
1548 mutex_enter(&rts->rts_recv_mutex);
1549 connp->conn_flow_cntrld = B_FALSE;
1550 mutex_exit(&rts->rts_recv_mutex);
1554 rts_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
1555 int mode, int32_t *rvalp, cred_t *cr)
1557 conn_t *connp = (conn_t *)proto_handle;
1558 int error;
1561 * If we don't have a helper stream then create one.
1562 * ip_create_helper_stream takes care of locking the conn_t,
1563 * so this check for NULL is just a performance optimization.
1565 if (connp->conn_helper_info == NULL) {
1566 rts_stack_t *rtss = connp->conn_rts->rts_rtss;
1568 ASSERT(rtss->rtss_ldi_ident != NULL);
1571 * Create a helper stream for non-STREAMS socket.
1573 error = ip_create_helper_stream(connp, rtss->rtss_ldi_ident);
1574 if (error != 0) {
1575 ip0dbg(("rts_ioctl: create of IP helper stream "
1576 "failed %d\n", error));
1577 return (error);
1581 switch (cmd) {
1582 case ND_SET:
1583 case ND_GET:
1584 case TI_GETPEERNAME:
1585 case TI_GETMYNAME:
1586 #ifdef DEUG
1587 cmn_err(CE_CONT, "rts_ioctl cmd 0x%x on non sreams"
1588 " socket", cmd);
1589 #endif
1590 error = EINVAL;
1591 break;
1592 default:
1594 * Pass on to IP using helper stream
1596 error = ldi_ioctl(connp->conn_helper_info->iphs_handle,
1597 cmd, arg, mode, cr, rvalp);
1598 break;
1601 return (error);
1604 sock_downcalls_t sock_rts_downcalls = {
1605 rts_activate,
1606 rts_accept,
1607 rts_bind,
1608 rts_listen,
1609 rts_connect,
1610 rts_getpeername,
1611 rts_getsockname,
1612 rts_getsockopt,
1613 rts_setsockopt,
1614 rts_send,
1615 NULL,
1616 NULL,
1617 NULL,
1618 rts_shutdown,
1619 rts_clr_flowctrl,
1620 rts_ioctl,
1621 rts_close