3882 Remove xmod & friends
[illumos-gate.git] / usr / src / uts / common / inet / ip / conn_opt.c
blobbcbc1c49497585ff4bbbbcaa425e4bd2be704c4d
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
25 /* Copyright (c) 1990 Mentat Inc. */
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/strsun.h>
30 #define _SUN_TPI_VERSION 2
31 #include <sys/tihdr.h>
32 #include <sys/xti_inet.h>
33 #include <sys/ucred.h>
34 #include <sys/zone.h>
35 #include <sys/ddi.h>
36 #include <sys/sunddi.h>
37 #include <sys/cmn_err.h>
38 #include <sys/debug.h>
39 #include <sys/atomic.h>
40 #include <sys/policy.h>
42 #include <sys/systm.h>
43 #include <sys/param.h>
44 #include <sys/kmem.h>
45 #include <sys/sdt.h>
46 #include <sys/socket.h>
47 #include <sys/ethernet.h>
48 #include <sys/mac.h>
49 #include <net/if.h>
50 #include <net/if_types.h>
51 #include <net/if_arp.h>
52 #include <net/route.h>
53 #include <sys/sockio.h>
54 #include <netinet/in.h>
55 #include <net/if_dl.h>
57 #include <inet/common.h>
58 #include <inet/mi.h>
59 #include <inet/mib2.h>
60 #include <inet/nd.h>
61 #include <inet/arp.h>
62 #include <inet/snmpcom.h>
63 #include <inet/kstatcom.h>
65 #include <netinet/igmp_var.h>
66 #include <netinet/ip6.h>
67 #include <netinet/icmp6.h>
68 #include <netinet/sctp.h>
70 #include <inet/ip.h>
71 #include <inet/ip_impl.h>
72 #include <inet/ip6.h>
73 #include <inet/ip6_asp.h>
74 #include <inet/tcp.h>
75 #include <inet/ip_multi.h>
76 #include <inet/ip_if.h>
77 #include <inet/ip_ire.h>
78 #include <inet/ip_ftable.h>
79 #include <inet/ip_rts.h>
80 #include <inet/optcom.h>
81 #include <inet/ip_ndp.h>
82 #include <inet/ip_listutils.h>
83 #include <netinet/igmp.h>
84 #include <netinet/ip_mroute.h>
85 #include <netinet/udp.h>
86 #include <inet/ipp_common.h>
88 #include <net/pfkeyv2.h>
89 #include <inet/sadb.h>
90 #include <inet/ipsec_impl.h>
91 #include <inet/ipdrop.h>
92 #include <inet/ip_netinfo.h>
94 #include <inet/ipclassifier.h>
95 #include <inet/sctp_ip.h>
96 #include <inet/sctp/sctp_impl.h>
97 #include <inet/udp_impl.h>
98 #include <sys/sunddi.h>
100 #include <sys/tsol/label.h>
101 #include <sys/tsol/tnet.h>
104 * Return how much size is needed for the different ancillary data items
106 uint_t
107 conn_recvancillary_size(conn_t *connp, crb_t recv_ancillary,
108 ip_recv_attr_t *ira, mblk_t *mp, ip_pkt_t *ipp)
110 uint_t ancil_size;
111 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
114 * If IP_RECVDSTADDR is set we include the destination IP
115 * address as an option. With IP_RECVOPTS we include all
116 * the IP options.
118 ancil_size = 0;
119 if (recv_ancillary.crb_recvdstaddr &&
120 (ira->ira_flags & IRAF_IS_IPV4)) {
121 ancil_size += sizeof (struct T_opthdr) +
122 sizeof (struct in_addr);
123 IP_STAT(ipst, conn_in_recvdstaddr);
127 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
128 * are different
130 if (recv_ancillary.crb_ip_recvpktinfo &&
131 connp->conn_family == AF_INET) {
132 ancil_size += sizeof (struct T_opthdr) +
133 sizeof (struct in_pktinfo);
134 IP_STAT(ipst, conn_in_recvpktinfo);
137 if ((recv_ancillary.crb_recvopts) &&
138 (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
139 ancil_size += sizeof (struct T_opthdr) +
140 ipp->ipp_ipv4_options_len;
141 IP_STAT(ipst, conn_in_recvopts);
144 if (recv_ancillary.crb_recvslla) {
145 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
146 ill_t *ill;
148 /* Make sure ira_l2src is setup if not already */
149 if (!(ira->ira_flags & IRAF_L2SRC_SET)) {
150 ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE,
151 ipst);
152 if (ill != NULL) {
153 ip_setl2src(mp, ira, ill);
154 ill_refrele(ill);
157 ancil_size += sizeof (struct T_opthdr) +
158 sizeof (struct sockaddr_dl);
159 IP_STAT(ipst, conn_in_recvslla);
162 if (recv_ancillary.crb_recvif) {
163 ancil_size += sizeof (struct T_opthdr) + sizeof (uint_t);
164 IP_STAT(ipst, conn_in_recvif);
168 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
169 * are different
171 if (recv_ancillary.crb_ip_recvpktinfo &&
172 connp->conn_family == AF_INET6) {
173 ancil_size += sizeof (struct T_opthdr) +
174 sizeof (struct in6_pktinfo);
175 IP_STAT(ipst, conn_in_recvpktinfo);
178 if (recv_ancillary.crb_ipv6_recvhoplimit) {
179 ancil_size += sizeof (struct T_opthdr) + sizeof (int);
180 IP_STAT(ipst, conn_in_recvhoplimit);
183 if (recv_ancillary.crb_ipv6_recvtclass) {
184 ancil_size += sizeof (struct T_opthdr) + sizeof (int);
185 IP_STAT(ipst, conn_in_recvtclass);
188 if (recv_ancillary.crb_ipv6_recvhopopts &&
189 (ipp->ipp_fields & IPPF_HOPOPTS)) {
190 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
191 IP_STAT(ipst, conn_in_recvhopopts);
194 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
195 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
196 * options that appear before a routing header.
197 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
199 if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
200 if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
201 (recv_ancillary.crb_ipv6_recvdstopts &&
202 recv_ancillary.crb_ipv6_recvrthdr)) {
203 ancil_size += sizeof (struct T_opthdr) +
204 ipp->ipp_rthdrdstoptslen;
205 IP_STAT(ipst, conn_in_recvrthdrdstopts);
208 if ((recv_ancillary.crb_ipv6_recvrthdr) &&
209 (ipp->ipp_fields & IPPF_RTHDR)) {
210 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
211 IP_STAT(ipst, conn_in_recvrthdr);
213 if ((recv_ancillary.crb_ipv6_recvdstopts ||
214 recv_ancillary.crb_old_ipv6_recvdstopts) &&
215 (ipp->ipp_fields & IPPF_DSTOPTS)) {
216 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
217 IP_STAT(ipst, conn_in_recvdstopts);
219 if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
220 ancil_size += sizeof (struct T_opthdr) +
221 ucredminsize(ira->ira_cred);
222 IP_STAT(ipst, conn_in_recvucred);
226 * If SO_TIMESTAMP is set allocate the appropriate sized
227 * buffer. Since gethrestime() expects a pointer aligned
228 * argument, we allocate space necessary for extra
229 * alignment (even though it might not be used).
231 if (recv_ancillary.crb_timestamp) {
232 ancil_size += sizeof (struct T_opthdr) +
233 sizeof (timestruc_t) + _POINTER_ALIGNMENT;
234 IP_STAT(ipst, conn_in_timestamp);
238 * If IP_RECVTTL is set allocate the appropriate sized buffer
240 if (recv_ancillary.crb_recvttl &&
241 (ira->ira_flags & IRAF_IS_IPV4)) {
242 ancil_size += sizeof (struct T_opthdr) + sizeof (uint8_t);
243 IP_STAT(ipst, conn_in_recvttl);
246 return (ancil_size);
250 * Lay down the ancillary data items at "ancil_buf".
251 * Assumes caller has used conn_recvancillary_size to allocate a sufficiently
252 * large buffer - ancil_size.
254 void
255 conn_recvancillary_add(conn_t *connp, crb_t recv_ancillary,
256 ip_recv_attr_t *ira, ip_pkt_t *ipp, uchar_t *ancil_buf, uint_t ancil_size)
259 * Copy in destination address before options to avoid
260 * any padding issues.
262 if (recv_ancillary.crb_recvdstaddr &&
263 (ira->ira_flags & IRAF_IS_IPV4)) {
264 struct T_opthdr *toh;
265 ipaddr_t *dstptr;
267 toh = (struct T_opthdr *)ancil_buf;
268 toh->level = IPPROTO_IP;
269 toh->name = IP_RECVDSTADDR;
270 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t);
271 toh->status = 0;
272 ancil_buf += sizeof (struct T_opthdr);
273 dstptr = (ipaddr_t *)ancil_buf;
274 *dstptr = ipp->ipp_addr_v4;
275 ancil_buf += sizeof (ipaddr_t);
276 ancil_size -= toh->len;
280 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
281 * are different
283 if (recv_ancillary.crb_ip_recvpktinfo &&
284 connp->conn_family == AF_INET) {
285 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
286 struct T_opthdr *toh;
287 struct in_pktinfo *pktinfop;
288 ill_t *ill;
289 ipif_t *ipif;
291 toh = (struct T_opthdr *)ancil_buf;
292 toh->level = IPPROTO_IP;
293 toh->name = IP_PKTINFO;
294 toh->len = sizeof (struct T_opthdr) + sizeof (*pktinfop);
295 toh->status = 0;
296 ancil_buf += sizeof (struct T_opthdr);
297 pktinfop = (struct in_pktinfo *)ancil_buf;
299 pktinfop->ipi_ifindex = ira->ira_ruifindex;
300 pktinfop->ipi_spec_dst.s_addr = INADDR_ANY;
302 /* Find a good address to report */
303 ill = ill_lookup_on_ifindex(ira->ira_ruifindex, B_FALSE, ipst);
304 if (ill != NULL) {
305 ipif = ipif_good_addr(ill, IPCL_ZONEID(connp));
306 if (ipif != NULL) {
307 pktinfop->ipi_spec_dst.s_addr =
308 ipif->ipif_lcl_addr;
309 ipif_refrele(ipif);
311 ill_refrele(ill);
313 pktinfop->ipi_addr.s_addr = ipp->ipp_addr_v4;
314 ancil_buf += sizeof (struct in_pktinfo);
315 ancil_size -= toh->len;
318 if ((recv_ancillary.crb_recvopts) &&
319 (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
320 struct T_opthdr *toh;
322 toh = (struct T_opthdr *)ancil_buf;
323 toh->level = IPPROTO_IP;
324 toh->name = IP_RECVOPTS;
325 toh->len = sizeof (struct T_opthdr) + ipp->ipp_ipv4_options_len;
326 toh->status = 0;
327 ancil_buf += sizeof (struct T_opthdr);
328 bcopy(ipp->ipp_ipv4_options, ancil_buf,
329 ipp->ipp_ipv4_options_len);
330 ancil_buf += ipp->ipp_ipv4_options_len;
331 ancil_size -= toh->len;
334 if (recv_ancillary.crb_recvslla) {
335 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
336 struct T_opthdr *toh;
337 struct sockaddr_dl *dstptr;
338 ill_t *ill;
339 int alen = 0;
341 ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE, ipst);
342 if (ill != NULL)
343 alen = ill->ill_phys_addr_length;
346 * For loopback multicast and broadcast the packet arrives
347 * with ira_ruifdex being the physical interface, but
348 * ira_l2src is all zero since ip_postfrag_loopback doesn't
349 * know our l2src. We don't report the address in that case.
351 if (ira->ira_flags & IRAF_LOOPBACK)
352 alen = 0;
354 toh = (struct T_opthdr *)ancil_buf;
355 toh->level = IPPROTO_IP;
356 toh->name = IP_RECVSLLA;
357 toh->len = sizeof (struct T_opthdr) +
358 sizeof (struct sockaddr_dl);
359 toh->status = 0;
360 ancil_buf += sizeof (struct T_opthdr);
361 dstptr = (struct sockaddr_dl *)ancil_buf;
362 dstptr->sdl_family = AF_LINK;
363 dstptr->sdl_index = ira->ira_ruifindex;
364 if (ill != NULL)
365 dstptr->sdl_type = ill->ill_type;
366 else
367 dstptr->sdl_type = 0;
368 dstptr->sdl_nlen = 0;
369 dstptr->sdl_alen = alen;
370 dstptr->sdl_slen = 0;
371 bcopy(ira->ira_l2src, dstptr->sdl_data, alen);
372 ancil_buf += sizeof (struct sockaddr_dl);
373 ancil_size -= toh->len;
374 if (ill != NULL)
375 ill_refrele(ill);
378 if (recv_ancillary.crb_recvif) {
379 struct T_opthdr *toh;
380 uint_t *dstptr;
382 toh = (struct T_opthdr *)ancil_buf;
383 toh->level = IPPROTO_IP;
384 toh->name = IP_RECVIF;
385 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
386 toh->status = 0;
387 ancil_buf += sizeof (struct T_opthdr);
388 dstptr = (uint_t *)ancil_buf;
389 *dstptr = ira->ira_ruifindex;
390 ancil_buf += sizeof (uint_t);
391 ancil_size -= toh->len;
395 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
396 * are different
398 if (recv_ancillary.crb_ip_recvpktinfo &&
399 connp->conn_family == AF_INET6) {
400 struct T_opthdr *toh;
401 struct in6_pktinfo *pkti;
403 toh = (struct T_opthdr *)ancil_buf;
404 toh->level = IPPROTO_IPV6;
405 toh->name = IPV6_PKTINFO;
406 toh->len = sizeof (struct T_opthdr) + sizeof (*pkti);
407 toh->status = 0;
408 ancil_buf += sizeof (struct T_opthdr);
409 pkti = (struct in6_pktinfo *)ancil_buf;
410 if (ira->ira_flags & IRAF_IS_IPV4) {
411 IN6_IPADDR_TO_V4MAPPED(ipp->ipp_addr_v4,
412 &pkti->ipi6_addr);
413 } else {
414 pkti->ipi6_addr = ipp->ipp_addr;
416 pkti->ipi6_ifindex = ira->ira_ruifindex;
418 ancil_buf += sizeof (*pkti);
419 ancil_size -= toh->len;
421 if (recv_ancillary.crb_ipv6_recvhoplimit) {
422 struct T_opthdr *toh;
424 toh = (struct T_opthdr *)ancil_buf;
425 toh->level = IPPROTO_IPV6;
426 toh->name = IPV6_HOPLIMIT;
427 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
428 toh->status = 0;
429 ancil_buf += sizeof (struct T_opthdr);
430 *(uint_t *)ancil_buf = ipp->ipp_hoplimit;
431 ancil_buf += sizeof (uint_t);
432 ancil_size -= toh->len;
434 if (recv_ancillary.crb_ipv6_recvtclass) {
435 struct T_opthdr *toh;
437 toh = (struct T_opthdr *)ancil_buf;
438 toh->level = IPPROTO_IPV6;
439 toh->name = IPV6_TCLASS;
440 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
441 toh->status = 0;
442 ancil_buf += sizeof (struct T_opthdr);
444 if (ira->ira_flags & IRAF_IS_IPV4)
445 *(uint_t *)ancil_buf = ipp->ipp_type_of_service;
446 else
447 *(uint_t *)ancil_buf = ipp->ipp_tclass;
448 ancil_buf += sizeof (uint_t);
449 ancil_size -= toh->len;
451 if (recv_ancillary.crb_ipv6_recvhopopts &&
452 (ipp->ipp_fields & IPPF_HOPOPTS)) {
453 struct T_opthdr *toh;
455 toh = (struct T_opthdr *)ancil_buf;
456 toh->level = IPPROTO_IPV6;
457 toh->name = IPV6_HOPOPTS;
458 toh->len = sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
459 toh->status = 0;
460 ancil_buf += sizeof (struct T_opthdr);
461 bcopy(ipp->ipp_hopopts, ancil_buf, ipp->ipp_hopoptslen);
462 ancil_buf += ipp->ipp_hopoptslen;
463 ancil_size -= toh->len;
466 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
467 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
468 * options that appear before a routing header.
469 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
471 if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
472 if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
473 (recv_ancillary.crb_ipv6_recvdstopts &&
474 recv_ancillary.crb_ipv6_recvrthdr)) {
475 struct T_opthdr *toh;
477 toh = (struct T_opthdr *)ancil_buf;
478 toh->level = IPPROTO_IPV6;
479 toh->name = IPV6_DSTOPTS;
480 toh->len = sizeof (struct T_opthdr) +
481 ipp->ipp_rthdrdstoptslen;
482 toh->status = 0;
483 ancil_buf += sizeof (struct T_opthdr);
484 bcopy(ipp->ipp_rthdrdstopts, ancil_buf,
485 ipp->ipp_rthdrdstoptslen);
486 ancil_buf += ipp->ipp_rthdrdstoptslen;
487 ancil_size -= toh->len;
490 if (recv_ancillary.crb_ipv6_recvrthdr &&
491 (ipp->ipp_fields & IPPF_RTHDR)) {
492 struct T_opthdr *toh;
494 toh = (struct T_opthdr *)ancil_buf;
495 toh->level = IPPROTO_IPV6;
496 toh->name = IPV6_RTHDR;
497 toh->len = sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
498 toh->status = 0;
499 ancil_buf += sizeof (struct T_opthdr);
500 bcopy(ipp->ipp_rthdr, ancil_buf, ipp->ipp_rthdrlen);
501 ancil_buf += ipp->ipp_rthdrlen;
502 ancil_size -= toh->len;
504 if ((recv_ancillary.crb_ipv6_recvdstopts ||
505 recv_ancillary.crb_old_ipv6_recvdstopts) &&
506 (ipp->ipp_fields & IPPF_DSTOPTS)) {
507 struct T_opthdr *toh;
509 toh = (struct T_opthdr *)ancil_buf;
510 toh->level = IPPROTO_IPV6;
511 toh->name = IPV6_DSTOPTS;
512 toh->len = sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
513 toh->status = 0;
514 ancil_buf += sizeof (struct T_opthdr);
515 bcopy(ipp->ipp_dstopts, ancil_buf, ipp->ipp_dstoptslen);
516 ancil_buf += ipp->ipp_dstoptslen;
517 ancil_size -= toh->len;
520 if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
521 struct T_opthdr *toh;
522 cred_t *rcr = connp->conn_cred;
524 toh = (struct T_opthdr *)ancil_buf;
525 toh->level = SOL_SOCKET;
526 toh->name = SCM_UCRED;
527 toh->len = sizeof (struct T_opthdr) +
528 ucredminsize(ira->ira_cred);
529 toh->status = 0;
530 (void) cred2ucred(ira->ira_cred, ira->ira_cpid, &toh[1], rcr);
531 ancil_buf += toh->len;
532 ancil_size -= toh->len;
534 if (recv_ancillary.crb_timestamp) {
535 struct T_opthdr *toh;
537 toh = (struct T_opthdr *)ancil_buf;
538 toh->level = SOL_SOCKET;
539 toh->name = SCM_TIMESTAMP;
540 toh->len = sizeof (struct T_opthdr) +
541 sizeof (timestruc_t) + _POINTER_ALIGNMENT;
542 toh->status = 0;
543 ancil_buf += sizeof (struct T_opthdr);
544 /* Align for gethrestime() */
545 ancil_buf = (uchar_t *)P2ROUNDUP((intptr_t)ancil_buf,
546 sizeof (intptr_t));
547 gethrestime((timestruc_t *)ancil_buf);
548 ancil_buf = (uchar_t *)toh + toh->len;
549 ancil_size -= toh->len;
553 * CAUTION:
554 * Due to aligment issues
555 * Processing of IP_RECVTTL option
556 * should always be the last. Adding
557 * any option processing after this will
558 * cause alignment panic.
560 if (recv_ancillary.crb_recvttl &&
561 (ira->ira_flags & IRAF_IS_IPV4)) {
562 struct T_opthdr *toh;
563 uint8_t *dstptr;
565 toh = (struct T_opthdr *)ancil_buf;
566 toh->level = IPPROTO_IP;
567 toh->name = IP_RECVTTL;
568 toh->len = sizeof (struct T_opthdr) + sizeof (uint8_t);
569 toh->status = 0;
570 ancil_buf += sizeof (struct T_opthdr);
571 dstptr = (uint8_t *)ancil_buf;
572 *dstptr = ipp->ipp_hoplimit;
573 ancil_buf += sizeof (uint8_t);
574 ancil_size -= toh->len;
577 /* Consumed all of allocated space */
578 ASSERT(ancil_size == 0);
583 * This routine retrieves the current status of socket options.
584 * It returns the size of the option retrieved, or -1.
587 conn_opt_get(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
588 uchar_t *ptr)
590 int *i1 = (int *)ptr;
591 conn_t *connp = coa->coa_connp;
592 ip_xmit_attr_t *ixa = coa->coa_ixa;
593 ip_pkt_t *ipp = coa->coa_ipp;
594 ip_stack_t *ipst = ixa->ixa_ipst;
595 uint_t len;
597 ASSERT(MUTEX_HELD(&coa->coa_connp->conn_lock));
599 switch (level) {
600 case SOL_SOCKET:
601 switch (name) {
602 case SO_DEBUG:
603 *i1 = connp->conn_debug ? SO_DEBUG : 0;
604 break; /* goto sizeof (int) option return */
605 case SO_KEEPALIVE:
606 *i1 = connp->conn_keepalive ? SO_KEEPALIVE : 0;
607 break;
608 case SO_LINGER: {
609 struct linger *lgr = (struct linger *)ptr;
611 lgr->l_onoff = connp->conn_linger ? SO_LINGER : 0;
612 lgr->l_linger = connp->conn_lingertime;
614 return (sizeof (struct linger));
616 case SO_OOBINLINE:
617 *i1 = connp->conn_oobinline ? SO_OOBINLINE : 0;
618 break;
619 case SO_REUSEADDR:
620 *i1 = connp->conn_reuseaddr ? SO_REUSEADDR : 0;
621 break; /* goto sizeof (int) option return */
622 case SO_TYPE:
623 *i1 = connp->conn_so_type;
624 break; /* goto sizeof (int) option return */
625 case SO_DONTROUTE:
626 *i1 = (ixa->ixa_flags & IXAF_DONTROUTE) ?
627 SO_DONTROUTE : 0;
628 break; /* goto sizeof (int) option return */
629 case SO_USELOOPBACK:
630 *i1 = connp->conn_useloopback ? SO_USELOOPBACK : 0;
631 break; /* goto sizeof (int) option return */
632 case SO_BROADCAST:
633 *i1 = connp->conn_broadcast ? SO_BROADCAST : 0;
634 break; /* goto sizeof (int) option return */
636 case SO_SNDBUF:
637 *i1 = connp->conn_sndbuf;
638 break; /* goto sizeof (int) option return */
639 case SO_RCVBUF:
640 *i1 = connp->conn_rcvbuf;
641 break; /* goto sizeof (int) option return */
642 case SO_RCVTIMEO:
643 case SO_SNDTIMEO:
645 * Pass these two options in order for third part
646 * protocol usage. Here just return directly.
648 *i1 = 0;
649 break;
650 case SO_DGRAM_ERRIND:
651 *i1 = connp->conn_dgram_errind ? SO_DGRAM_ERRIND : 0;
652 break; /* goto sizeof (int) option return */
653 case SO_RECVUCRED:
654 *i1 = connp->conn_recv_ancillary.crb_recvucred;
655 break; /* goto sizeof (int) option return */
656 case SO_TIMESTAMP:
657 *i1 = connp->conn_recv_ancillary.crb_timestamp;
658 break; /* goto sizeof (int) option return */
659 case SO_VRRP:
660 *i1 = connp->conn_isvrrp;
661 break; /* goto sizeof (int) option return */
662 case SO_ANON_MLP:
663 *i1 = connp->conn_anon_mlp;
664 break; /* goto sizeof (int) option return */
665 case SO_MAC_EXEMPT:
666 *i1 = (connp->conn_mac_mode == CONN_MAC_AWARE);
667 break; /* goto sizeof (int) option return */
668 case SO_MAC_IMPLICIT:
669 *i1 = (connp->conn_mac_mode == CONN_MAC_IMPLICIT);
670 break; /* goto sizeof (int) option return */
671 case SO_ALLZONES:
672 *i1 = connp->conn_allzones;
673 break; /* goto sizeof (int) option return */
674 case SO_EXCLBIND:
675 *i1 = connp->conn_exclbind ? SO_EXCLBIND : 0;
676 break;
677 case SO_PROTOTYPE:
678 *i1 = connp->conn_proto;
679 break;
681 case SO_DOMAIN:
682 *i1 = connp->conn_family;
683 break;
684 default:
685 return (-1);
687 break;
688 case IPPROTO_IP:
689 if (connp->conn_family != AF_INET)
690 return (-1);
691 switch (name) {
692 case IP_OPTIONS:
693 case T_IP_OPTIONS:
694 if (!(ipp->ipp_fields & IPPF_IPV4_OPTIONS))
695 return (0);
697 len = ipp->ipp_ipv4_options_len;
698 if (len > 0) {
699 bcopy(ipp->ipp_ipv4_options, ptr, len);
701 return (len);
703 case IP_PKTINFO: {
705 * This also handles IP_RECVPKTINFO.
706 * IP_PKTINFO and IP_RECVPKTINFO have same value.
707 * Differentiation is based on the size of the
708 * argument passed in.
710 struct in_pktinfo *pktinfo;
712 #ifdef notdef
713 /* optcom doesn't provide a length with "get" */
714 if (inlen == sizeof (int)) {
715 /* This is IP_RECVPKTINFO option. */
716 *i1 = connp->conn_recv_ancillary.
717 crb_ip_recvpktinfo;
718 return (sizeof (int));
720 #endif
721 /* XXX assumes that caller has room for max size! */
723 pktinfo = (struct in_pktinfo *)ptr;
724 pktinfo->ipi_ifindex = ixa->ixa_ifindex;
725 if (ipp->ipp_fields & IPPF_ADDR)
726 pktinfo->ipi_spec_dst.s_addr = ipp->ipp_addr_v4;
727 else
728 pktinfo->ipi_spec_dst.s_addr = INADDR_ANY;
729 return (sizeof (struct in_pktinfo));
731 case IP_DONTFRAG:
732 *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
733 return (sizeof (int));
734 case IP_TOS:
735 case T_IP_TOS:
736 *i1 = (int)ipp->ipp_type_of_service;
737 break; /* goto sizeof (int) option return */
738 case IP_TTL:
739 *i1 = (int)ipp->ipp_unicast_hops;
740 break; /* goto sizeof (int) option return */
741 case IP_DHCPINIT_IF:
742 return (-1);
743 case IP_NEXTHOP:
744 if (ixa->ixa_flags & IXAF_NEXTHOP_SET) {
745 *(ipaddr_t *)ptr = ixa->ixa_nexthop_v4;
746 return (sizeof (ipaddr_t));
747 } else {
748 return (0);
751 case IP_MULTICAST_IF:
752 /* 0 address if not set */
753 *(ipaddr_t *)ptr = ixa->ixa_multicast_ifaddr;
754 return (sizeof (ipaddr_t));
755 case IP_MULTICAST_TTL:
756 *(uchar_t *)ptr = ixa->ixa_multicast_ttl;
757 return (sizeof (uchar_t));
758 case IP_MULTICAST_LOOP:
759 *ptr = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
760 return (sizeof (uint8_t));
761 case IP_RECVOPTS:
762 *i1 = connp->conn_recv_ancillary.crb_recvopts;
763 break; /* goto sizeof (int) option return */
764 case IP_RECVDSTADDR:
765 *i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
766 break; /* goto sizeof (int) option return */
767 case IP_RECVIF:
768 *i1 = connp->conn_recv_ancillary.crb_recvif;
769 break; /* goto sizeof (int) option return */
770 case IP_RECVSLLA:
771 *i1 = connp->conn_recv_ancillary.crb_recvslla;
772 break; /* goto sizeof (int) option return */
773 case IP_RECVTTL:
774 *i1 = connp->conn_recv_ancillary.crb_recvttl;
775 break; /* goto sizeof (int) option return */
776 case IP_ADD_MEMBERSHIP:
777 case IP_DROP_MEMBERSHIP:
778 case MCAST_JOIN_GROUP:
779 case MCAST_LEAVE_GROUP:
780 case IP_BLOCK_SOURCE:
781 case IP_UNBLOCK_SOURCE:
782 case IP_ADD_SOURCE_MEMBERSHIP:
783 case IP_DROP_SOURCE_MEMBERSHIP:
784 case MCAST_BLOCK_SOURCE:
785 case MCAST_UNBLOCK_SOURCE:
786 case MCAST_JOIN_SOURCE_GROUP:
787 case MCAST_LEAVE_SOURCE_GROUP:
788 case MRT_INIT:
789 case MRT_DONE:
790 case MRT_ADD_VIF:
791 case MRT_DEL_VIF:
792 case MRT_ADD_MFC:
793 case MRT_DEL_MFC:
794 /* cannot "get" the value for these */
795 return (-1);
796 case MRT_VERSION:
797 case MRT_ASSERT:
798 (void) ip_mrouter_get(name, connp, ptr);
799 return (sizeof (int));
800 case IP_SEC_OPT:
801 return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr,
802 IPSEC_AF_V4));
803 case IP_BOUND_IF:
804 /* Zero if not set */
805 *i1 = connp->conn_bound_if;
806 break; /* goto sizeof (int) option return */
807 case IP_UNSPEC_SRC:
808 *i1 = connp->conn_unspec_src;
809 break; /* goto sizeof (int) option return */
810 case IP_BROADCAST_TTL:
811 if (ixa->ixa_flags & IXAF_BROADCAST_TTL_SET)
812 *(uchar_t *)ptr = ixa->ixa_broadcast_ttl;
813 else
814 *(uchar_t *)ptr = ipst->ips_ip_broadcast_ttl;
815 return (sizeof (uchar_t));
816 default:
817 return (-1);
819 break;
820 case IPPROTO_IPV6:
821 if (connp->conn_family != AF_INET6)
822 return (-1);
823 switch (name) {
824 case IPV6_UNICAST_HOPS:
825 *i1 = (int)ipp->ipp_unicast_hops;
826 break; /* goto sizeof (int) option return */
827 case IPV6_MULTICAST_IF:
828 /* 0 index if not set */
829 *i1 = ixa->ixa_multicast_ifindex;
830 break; /* goto sizeof (int) option return */
831 case IPV6_MULTICAST_HOPS:
832 *i1 = ixa->ixa_multicast_ttl;
833 break; /* goto sizeof (int) option return */
834 case IPV6_MULTICAST_LOOP:
835 *i1 = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
836 break; /* goto sizeof (int) option return */
837 case IPV6_JOIN_GROUP:
838 case IPV6_LEAVE_GROUP:
839 case MCAST_JOIN_GROUP:
840 case MCAST_LEAVE_GROUP:
841 case MCAST_BLOCK_SOURCE:
842 case MCAST_UNBLOCK_SOURCE:
843 case MCAST_JOIN_SOURCE_GROUP:
844 case MCAST_LEAVE_SOURCE_GROUP:
845 /* cannot "get" the value for these */
846 return (-1);
847 case IPV6_BOUND_IF:
848 /* Zero if not set */
849 *i1 = connp->conn_bound_if;
850 break; /* goto sizeof (int) option return */
851 case IPV6_UNSPEC_SRC:
852 *i1 = connp->conn_unspec_src;
853 break; /* goto sizeof (int) option return */
854 case IPV6_RECVPKTINFO:
855 *i1 = connp->conn_recv_ancillary.crb_ip_recvpktinfo;
856 break; /* goto sizeof (int) option return */
857 case IPV6_RECVTCLASS:
858 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvtclass;
859 break; /* goto sizeof (int) option return */
860 case IPV6_RECVPATHMTU:
861 *i1 = connp->conn_ipv6_recvpathmtu;
862 break; /* goto sizeof (int) option return */
863 case IPV6_RECVHOPLIMIT:
864 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhoplimit;
865 break; /* goto sizeof (int) option return */
866 case IPV6_RECVHOPOPTS:
867 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhopopts;
868 break; /* goto sizeof (int) option return */
869 case IPV6_RECVDSTOPTS:
870 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvdstopts;
871 break; /* goto sizeof (int) option return */
872 case _OLD_IPV6_RECVDSTOPTS:
873 *i1 =
874 connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts;
875 break; /* goto sizeof (int) option return */
876 case IPV6_RECVRTHDRDSTOPTS:
877 *i1 = connp->conn_recv_ancillary.
878 crb_ipv6_recvrthdrdstopts;
879 break; /* goto sizeof (int) option return */
880 case IPV6_RECVRTHDR:
881 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvrthdr;
882 break; /* goto sizeof (int) option return */
883 case IPV6_PKTINFO: {
884 /* XXX assumes that caller has room for max size! */
885 struct in6_pktinfo *pkti;
887 pkti = (struct in6_pktinfo *)ptr;
888 pkti->ipi6_ifindex = ixa->ixa_ifindex;
889 if (ipp->ipp_fields & IPPF_ADDR)
890 pkti->ipi6_addr = ipp->ipp_addr;
891 else
892 pkti->ipi6_addr = ipv6_all_zeros;
893 return (sizeof (struct in6_pktinfo));
895 case IPV6_TCLASS:
896 *i1 = ipp->ipp_tclass;
897 break; /* goto sizeof (int) option return */
898 case IPV6_NEXTHOP: {
899 sin6_t *sin6 = (sin6_t *)ptr;
901 if (ixa->ixa_flags & IXAF_NEXTHOP_SET)
902 return (0);
904 *sin6 = sin6_null;
905 sin6->sin6_family = AF_INET6;
906 sin6->sin6_addr = ixa->ixa_nexthop_v6;
908 return (sizeof (sin6_t));
910 case IPV6_HOPOPTS:
911 if (!(ipp->ipp_fields & IPPF_HOPOPTS))
912 return (0);
913 bcopy(ipp->ipp_hopopts, ptr,
914 ipp->ipp_hopoptslen);
915 return (ipp->ipp_hopoptslen);
916 case IPV6_RTHDRDSTOPTS:
917 if (!(ipp->ipp_fields & IPPF_RTHDRDSTOPTS))
918 return (0);
919 bcopy(ipp->ipp_rthdrdstopts, ptr,
920 ipp->ipp_rthdrdstoptslen);
921 return (ipp->ipp_rthdrdstoptslen);
922 case IPV6_RTHDR:
923 if (!(ipp->ipp_fields & IPPF_RTHDR))
924 return (0);
925 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
926 return (ipp->ipp_rthdrlen);
927 case IPV6_DSTOPTS:
928 if (!(ipp->ipp_fields & IPPF_DSTOPTS))
929 return (0);
930 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
931 return (ipp->ipp_dstoptslen);
932 case IPV6_PATHMTU:
933 return (ip_fill_mtuinfo(connp, ixa,
934 (struct ip6_mtuinfo *)ptr));
935 case IPV6_SEC_OPT:
936 return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr,
937 IPSEC_AF_V6));
938 case IPV6_SRC_PREFERENCES:
939 return (ip6_get_src_preferences(ixa, (uint32_t *)ptr));
940 case IPV6_DONTFRAG:
941 *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
942 return (sizeof (int));
943 case IPV6_USE_MIN_MTU:
944 if (ixa->ixa_flags & IXAF_USE_MIN_MTU)
945 *i1 = ixa->ixa_use_min_mtu;
946 else
947 *i1 = IPV6_USE_MIN_MTU_MULTICAST;
948 break;
949 case IPV6_V6ONLY:
950 *i1 = connp->conn_ipv6_v6only;
951 return (sizeof (int));
952 default:
953 return (-1);
955 break;
956 case IPPROTO_UDP:
957 switch (name) {
958 case UDP_ANONPRIVBIND:
959 *i1 = connp->conn_anon_priv_bind;
960 break;
961 case UDP_EXCLBIND:
962 *i1 = connp->conn_exclbind ? UDP_EXCLBIND : 0;
963 break;
964 default:
965 return (-1);
967 break;
968 case IPPROTO_TCP:
969 switch (name) {
970 case TCP_RECVDSTADDR:
971 *i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
972 break;
973 case TCP_ANONPRIVBIND:
974 *i1 = connp->conn_anon_priv_bind;
975 break;
976 case TCP_EXCLBIND:
977 *i1 = connp->conn_exclbind ? TCP_EXCLBIND : 0;
978 break;
979 default:
980 return (-1);
982 break;
983 default:
984 return (-1);
986 return (sizeof (int));
989 static int conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name,
990 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
991 static int conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name,
992 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
993 static int conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name,
994 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
995 static int conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name,
996 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
997 static int conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name,
998 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
1001 * This routine sets the most common socket options including some
1002 * that are transport/ULP specific.
1003 * It returns errno or zero.
1005 * For fixed length options, there is no sanity check
1006 * of passed in length is done. It is assumed *_optcom_req()
1007 * routines do the right thing.
1010 conn_opt_set(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
1011 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1013 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1015 /* We have different functions for different levels */
1016 switch (level) {
1017 case SOL_SOCKET:
1018 return (conn_opt_set_socket(coa, name, inlen, invalp,
1019 checkonly, cr));
1020 case IPPROTO_IP:
1021 return (conn_opt_set_ip(coa, name, inlen, invalp,
1022 checkonly, cr));
1023 case IPPROTO_IPV6:
1024 return (conn_opt_set_ipv6(coa, name, inlen, invalp,
1025 checkonly, cr));
1026 case IPPROTO_UDP:
1027 return (conn_opt_set_udp(coa, name, inlen, invalp,
1028 checkonly, cr));
1029 case IPPROTO_TCP:
1030 return (conn_opt_set_tcp(coa, name, inlen, invalp,
1031 checkonly, cr));
1032 default:
1033 return (0);
1038 * Handle SOL_SOCKET
1039 * Note that we do not handle SO_PROTOTYPE here. The ULPs that support
1040 * it implement their own checks and setting of conn_proto.
1042 /* ARGSUSED1 */
1043 static int
1044 conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1045 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1047 conn_t *connp = coa->coa_connp;
1048 ip_xmit_attr_t *ixa = coa->coa_ixa;
1049 int *i1 = (int *)invalp;
1050 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1052 switch (name) {
1053 case SO_ALLZONES:
1054 if (IPCL_IS_BOUND(connp))
1055 return (EINVAL);
1056 break;
1057 case SO_VRRP:
1058 if (secpolicy_ip_config(cr, checkonly) != 0)
1059 return (EACCES);
1060 break;
1061 case SO_MAC_EXEMPT:
1062 if (secpolicy_net_mac_aware(cr) != 0)
1063 return (EACCES);
1064 if (IPCL_IS_BOUND(connp))
1065 return (EINVAL);
1066 break;
1067 case SO_MAC_IMPLICIT:
1068 if (secpolicy_net_mac_implicit(cr) != 0)
1069 return (EACCES);
1070 break;
1072 if (checkonly)
1073 return (0);
1075 mutex_enter(&connp->conn_lock);
1076 /* Here we set the actual option value */
1077 switch (name) {
1078 case SO_DEBUG:
1079 connp->conn_debug = onoff;
1080 break;
1081 case SO_KEEPALIVE:
1082 connp->conn_keepalive = onoff;
1083 break;
1084 case SO_LINGER: {
1085 struct linger *lgr = (struct linger *)invalp;
1087 if (lgr->l_onoff) {
1088 connp->conn_linger = 1;
1089 connp->conn_lingertime = lgr->l_linger;
1090 } else {
1091 connp->conn_linger = 0;
1092 connp->conn_lingertime = 0;
1094 break;
1096 case SO_OOBINLINE:
1097 connp->conn_oobinline = onoff;
1098 coa->coa_changed |= COA_OOBINLINE_CHANGED;
1099 break;
1100 case SO_REUSEADDR:
1101 connp->conn_reuseaddr = onoff;
1102 break;
1103 case SO_DONTROUTE:
1104 if (onoff)
1105 ixa->ixa_flags |= IXAF_DONTROUTE;
1106 else
1107 ixa->ixa_flags &= ~IXAF_DONTROUTE;
1108 coa->coa_changed |= COA_ROUTE_CHANGED;
1109 break;
1110 case SO_USELOOPBACK:
1111 connp->conn_useloopback = onoff;
1112 break;
1113 case SO_BROADCAST:
1114 connp->conn_broadcast = onoff;
1115 break;
1116 case SO_SNDBUF:
1117 /* ULP has range checked the value */
1118 connp->conn_sndbuf = *i1;
1119 coa->coa_changed |= COA_SNDBUF_CHANGED;
1120 break;
1121 case SO_RCVBUF:
1122 /* ULP has range checked the value */
1123 connp->conn_rcvbuf = *i1;
1124 coa->coa_changed |= COA_RCVBUF_CHANGED;
1125 break;
1126 case SO_RCVTIMEO:
1127 case SO_SNDTIMEO:
1129 * Pass these two options in order for third part
1130 * protocol usage.
1132 break;
1133 case SO_DGRAM_ERRIND:
1134 connp->conn_dgram_errind = onoff;
1135 break;
1136 case SO_RECVUCRED:
1137 connp->conn_recv_ancillary.crb_recvucred = onoff;
1138 break;
1139 case SO_ALLZONES:
1140 connp->conn_allzones = onoff;
1141 coa->coa_changed |= COA_ROUTE_CHANGED;
1142 if (onoff)
1143 ixa->ixa_zoneid = ALL_ZONES;
1144 else
1145 ixa->ixa_zoneid = connp->conn_zoneid;
1146 break;
1147 case SO_TIMESTAMP:
1148 connp->conn_recv_ancillary.crb_timestamp = onoff;
1149 break;
1150 case SO_VRRP:
1151 connp->conn_isvrrp = onoff;
1152 break;
1153 case SO_ANON_MLP:
1154 connp->conn_anon_mlp = onoff;
1155 break;
1156 case SO_MAC_EXEMPT:
1157 connp->conn_mac_mode = onoff ?
1158 CONN_MAC_AWARE : CONN_MAC_DEFAULT;
1159 break;
1160 case SO_MAC_IMPLICIT:
1161 connp->conn_mac_mode = onoff ?
1162 CONN_MAC_IMPLICIT : CONN_MAC_DEFAULT;
1163 break;
1164 case SO_EXCLBIND:
1165 connp->conn_exclbind = onoff;
1166 break;
1168 mutex_exit(&connp->conn_lock);
1169 return (0);
1172 /* Handle IPPROTO_IP */
1173 static int
1174 conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1175 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1177 conn_t *connp = coa->coa_connp;
1178 ip_xmit_attr_t *ixa = coa->coa_ixa;
1179 ip_pkt_t *ipp = coa->coa_ipp;
1180 int *i1 = (int *)invalp;
1181 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1182 ipaddr_t addr = (ipaddr_t)*i1;
1183 uint_t ifindex;
1184 zoneid_t zoneid = IPCL_ZONEID(connp);
1185 ipif_t *ipif;
1186 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1187 int error;
1189 if (connp->conn_family != AF_INET)
1190 return (EINVAL);
1192 switch (name) {
1193 case IP_TTL:
1194 /* Don't allow zero */
1195 if (*i1 < 1 || *i1 > 255)
1196 return (EINVAL);
1197 break;
1198 case IP_MULTICAST_IF:
1199 if (addr == INADDR_ANY) {
1200 /* Clear */
1201 ifindex = 0;
1202 break;
1204 ipif = ipif_lookup_addr(addr, NULL, zoneid, ipst);
1205 if (ipif == NULL)
1206 return (EHOSTUNREACH);
1207 /* not supported by the virtual network iface */
1208 if (IS_VNI(ipif->ipif_ill)) {
1209 ipif_refrele(ipif);
1210 return (EINVAL);
1212 ifindex = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1213 ipif_refrele(ipif);
1214 break;
1215 case IP_NEXTHOP: {
1216 ire_t *ire;
1218 if (addr == INADDR_ANY) {
1219 /* Clear */
1220 break;
1222 /* Verify that the next-hop is on-link */
1223 ire = ire_ftable_lookup_v4(addr, 0, 0, IRE_ONLINK, NULL, zoneid,
1224 NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1225 if (ire == NULL)
1226 return (EHOSTUNREACH);
1227 ire_refrele(ire);
1228 break;
1230 case IP_OPTIONS:
1231 case T_IP_OPTIONS: {
1232 uint_t newlen;
1234 if (ipp->ipp_fields & IPPF_LABEL_V4)
1235 newlen = inlen + (ipp->ipp_label_len_v4 + 3) & ~3;
1236 else
1237 newlen = inlen;
1238 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) {
1239 return (EINVAL);
1241 break;
1243 case IP_PKTINFO: {
1244 struct in_pktinfo *pktinfo;
1246 /* Two different valid lengths */
1247 if (inlen != sizeof (int) &&
1248 inlen != sizeof (struct in_pktinfo))
1249 return (EINVAL);
1250 if (inlen == sizeof (int))
1251 break;
1253 pktinfo = (struct in_pktinfo *)invalp;
1254 if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1255 switch (ip_laddr_verify_v4(pktinfo->ipi_spec_dst.s_addr,
1256 zoneid, ipst, B_FALSE)) {
1257 case IPVL_UNICAST_UP:
1258 case IPVL_UNICAST_DOWN:
1259 break;
1260 default:
1261 return (EADDRNOTAVAIL);
1264 if (!ip_xmit_ifindex_valid(pktinfo->ipi_ifindex, zoneid,
1265 B_FALSE, ipst))
1266 return (ENXIO);
1267 break;
1269 case IP_BOUND_IF:
1270 ifindex = *(uint_t *)i1;
1272 /* Just check it is ok. */
1273 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1274 return (ENXIO);
1275 break;
1277 if (checkonly)
1278 return (0);
1280 /* Here we set the actual option value */
1282 * conn_lock protects the bitfields, and is used to
1283 * set the fields atomically. Not needed for ixa settings since
1284 * the caller has an exclusive copy of the ixa.
1285 * We can not hold conn_lock across the multicast options though.
1287 switch (name) {
1288 case IP_OPTIONS:
1289 case T_IP_OPTIONS:
1290 /* Save options for use by IP. */
1291 mutex_enter(&connp->conn_lock);
1292 error = optcom_pkt_set(invalp, inlen,
1293 (uchar_t **)&ipp->ipp_ipv4_options,
1294 &ipp->ipp_ipv4_options_len);
1295 if (error != 0) {
1296 mutex_exit(&connp->conn_lock);
1297 return (error);
1299 if (ipp->ipp_ipv4_options_len == 0) {
1300 ipp->ipp_fields &= ~IPPF_IPV4_OPTIONS;
1301 } else {
1302 ipp->ipp_fields |= IPPF_IPV4_OPTIONS;
1304 mutex_exit(&connp->conn_lock);
1305 coa->coa_changed |= COA_HEADER_CHANGED;
1306 coa->coa_changed |= COA_WROFF_CHANGED;
1307 break;
1309 case IP_TTL:
1310 mutex_enter(&connp->conn_lock);
1311 ipp->ipp_unicast_hops = *i1;
1312 mutex_exit(&connp->conn_lock);
1313 coa->coa_changed |= COA_HEADER_CHANGED;
1314 break;
1315 case IP_TOS:
1316 case T_IP_TOS:
1317 mutex_enter(&connp->conn_lock);
1318 if (*i1 == -1) {
1319 ipp->ipp_type_of_service = 0;
1320 } else {
1321 ipp->ipp_type_of_service = *i1;
1323 mutex_exit(&connp->conn_lock);
1324 coa->coa_changed |= COA_HEADER_CHANGED;
1325 break;
1326 case IP_MULTICAST_IF:
1327 ixa->ixa_multicast_ifindex = ifindex;
1328 ixa->ixa_multicast_ifaddr = addr;
1329 coa->coa_changed |= COA_ROUTE_CHANGED;
1330 break;
1331 case IP_MULTICAST_TTL:
1332 ixa->ixa_multicast_ttl = *invalp;
1333 /* Handled automatically by ip_output */
1334 break;
1335 case IP_MULTICAST_LOOP:
1336 if (*invalp != 0)
1337 ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1338 else
1339 ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1340 /* Handled automatically by ip_output */
1341 break;
1342 case IP_RECVOPTS:
1343 mutex_enter(&connp->conn_lock);
1344 connp->conn_recv_ancillary.crb_recvopts = onoff;
1345 mutex_exit(&connp->conn_lock);
1346 break;
1347 case IP_RECVDSTADDR:
1348 mutex_enter(&connp->conn_lock);
1349 connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
1350 mutex_exit(&connp->conn_lock);
1351 break;
1352 case IP_RECVIF:
1353 mutex_enter(&connp->conn_lock);
1354 connp->conn_recv_ancillary.crb_recvif = onoff;
1355 mutex_exit(&connp->conn_lock);
1356 break;
1357 case IP_RECVSLLA:
1358 mutex_enter(&connp->conn_lock);
1359 connp->conn_recv_ancillary.crb_recvslla = onoff;
1360 mutex_exit(&connp->conn_lock);
1361 break;
1362 case IP_RECVTTL:
1363 mutex_enter(&connp->conn_lock);
1364 connp->conn_recv_ancillary.crb_recvttl = onoff;
1365 mutex_exit(&connp->conn_lock);
1366 break;
1367 case IP_PKTINFO: {
1369 * This also handles IP_RECVPKTINFO.
1370 * IP_PKTINFO and IP_RECVPKTINFO have same value.
1371 * Differentiation is based on the size of the
1372 * argument passed in.
1374 struct in_pktinfo *pktinfo;
1376 if (inlen == sizeof (int)) {
1377 /* This is IP_RECVPKTINFO option. */
1378 mutex_enter(&connp->conn_lock);
1379 connp->conn_recv_ancillary.crb_ip_recvpktinfo =
1380 onoff;
1381 mutex_exit(&connp->conn_lock);
1382 break;
1385 /* This is IP_PKTINFO option. */
1386 mutex_enter(&connp->conn_lock);
1387 pktinfo = (struct in_pktinfo *)invalp;
1388 if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1389 ipp->ipp_fields |= IPPF_ADDR;
1390 IN6_INADDR_TO_V4MAPPED(&pktinfo->ipi_spec_dst,
1391 &ipp->ipp_addr);
1392 } else {
1393 ipp->ipp_fields &= ~IPPF_ADDR;
1394 ipp->ipp_addr = ipv6_all_zeros;
1396 mutex_exit(&connp->conn_lock);
1397 ixa->ixa_ifindex = pktinfo->ipi_ifindex;
1398 coa->coa_changed |= COA_ROUTE_CHANGED;
1399 coa->coa_changed |= COA_HEADER_CHANGED;
1400 break;
1402 case IP_DONTFRAG:
1403 if (onoff) {
1404 ixa->ixa_flags |= (IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1405 ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1406 } else {
1407 ixa->ixa_flags &= ~(IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1408 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1410 /* Need to redo ip_attr_connect */
1411 coa->coa_changed |= COA_ROUTE_CHANGED;
1412 break;
1413 case IP_ADD_MEMBERSHIP:
1414 case IP_DROP_MEMBERSHIP:
1415 case MCAST_JOIN_GROUP:
1416 case MCAST_LEAVE_GROUP:
1417 return (ip_opt_set_multicast_group(connp, name,
1418 invalp, B_FALSE, checkonly));
1420 case IP_BLOCK_SOURCE:
1421 case IP_UNBLOCK_SOURCE:
1422 case IP_ADD_SOURCE_MEMBERSHIP:
1423 case IP_DROP_SOURCE_MEMBERSHIP:
1424 case MCAST_BLOCK_SOURCE:
1425 case MCAST_UNBLOCK_SOURCE:
1426 case MCAST_JOIN_SOURCE_GROUP:
1427 case MCAST_LEAVE_SOURCE_GROUP:
1428 return (ip_opt_set_multicast_sources(connp, name,
1429 invalp, B_FALSE, checkonly));
1431 case IP_SEC_OPT:
1432 mutex_enter(&connp->conn_lock);
1433 error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1434 mutex_exit(&connp->conn_lock);
1435 if (error != 0) {
1436 return (error);
1438 /* This is an IPsec policy change - redo ip_attr_connect */
1439 coa->coa_changed |= COA_ROUTE_CHANGED;
1440 break;
1441 case IP_NEXTHOP:
1442 ixa->ixa_nexthop_v4 = addr;
1443 if (addr != INADDR_ANY)
1444 ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1445 else
1446 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1447 coa->coa_changed |= COA_ROUTE_CHANGED;
1448 break;
1450 case IP_BOUND_IF:
1451 ixa->ixa_ifindex = ifindex; /* Send */
1452 mutex_enter(&connp->conn_lock);
1453 connp->conn_incoming_ifindex = ifindex; /* Receive */
1454 connp->conn_bound_if = ifindex; /* getsockopt */
1455 mutex_exit(&connp->conn_lock);
1456 coa->coa_changed |= COA_ROUTE_CHANGED;
1457 break;
1458 case IP_UNSPEC_SRC:
1459 mutex_enter(&connp->conn_lock);
1460 connp->conn_unspec_src = onoff;
1461 if (onoff)
1462 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1463 else
1464 ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1466 mutex_exit(&connp->conn_lock);
1467 break;
1468 case IP_BROADCAST_TTL:
1469 ixa->ixa_broadcast_ttl = *invalp;
1470 ixa->ixa_flags |= IXAF_BROADCAST_TTL_SET;
1471 /* Handled automatically by ip_output */
1472 break;
1473 case MRT_INIT:
1474 case MRT_DONE:
1475 case MRT_ADD_VIF:
1476 case MRT_DEL_VIF:
1477 case MRT_ADD_MFC:
1478 case MRT_DEL_MFC:
1479 case MRT_ASSERT:
1480 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1481 return (error);
1483 error = ip_mrouter_set((int)name, connp, checkonly,
1484 (uchar_t *)invalp, inlen);
1485 if (error) {
1486 return (error);
1488 return (0);
1491 return (0);
1494 /* Handle IPPROTO_IPV6 */
1495 static int
1496 conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1497 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1499 conn_t *connp = coa->coa_connp;
1500 ip_xmit_attr_t *ixa = coa->coa_ixa;
1501 ip_pkt_t *ipp = coa->coa_ipp;
1502 int *i1 = (int *)invalp;
1503 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1504 uint_t ifindex;
1505 zoneid_t zoneid = IPCL_ZONEID(connp);
1506 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1507 int error;
1509 if (connp->conn_family != AF_INET6)
1510 return (EINVAL);
1512 switch (name) {
1513 case IPV6_MULTICAST_IF:
1515 * The only possible error is EINVAL.
1516 * We call this option on both V4 and V6
1517 * If both fail, then this call returns
1518 * EINVAL. If at least one of them succeeds we
1519 * return success.
1521 ifindex = *(uint_t *)i1;
1523 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst) &&
1524 !ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1525 return (EINVAL);
1526 break;
1527 case IPV6_UNICAST_HOPS:
1528 /* Don't allow zero. -1 means to use default */
1529 if (*i1 < -1 || *i1 == 0 || *i1 > IPV6_MAX_HOPS)
1530 return (EINVAL);
1531 break;
1532 case IPV6_MULTICAST_HOPS:
1533 /* -1 means use default */
1534 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS)
1535 return (EINVAL);
1536 break;
1537 case IPV6_MULTICAST_LOOP:
1538 if (*i1 != 0 && *i1 != 1)
1539 return (EINVAL);
1540 break;
1541 case IPV6_BOUND_IF:
1542 ifindex = *(uint_t *)i1;
1544 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst))
1545 return (ENXIO);
1546 break;
1547 case IPV6_PKTINFO: {
1548 struct in6_pktinfo *pkti;
1549 boolean_t isv6;
1551 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
1552 return (EINVAL);
1553 if (inlen == 0)
1554 break; /* Clear values below */
1557 * Verify the source address and ifindex. Privileged users
1558 * can use any source address.
1560 pkti = (struct in6_pktinfo *)invalp;
1563 * For link-local addresses we use the ipi6_ifindex when
1564 * we verify the local address.
1565 * If net_rawaccess then any source address can be used.
1567 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) &&
1568 secpolicy_net_rawaccess(cr) != 0) {
1569 uint_t scopeid = 0;
1570 in6_addr_t *v6src = &pkti->ipi6_addr;
1571 ipaddr_t v4src;
1572 ip_laddr_t laddr_type = IPVL_UNICAST_UP;
1574 if (IN6_IS_ADDR_V4MAPPED(v6src)) {
1575 IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
1576 if (v4src != INADDR_ANY) {
1577 laddr_type = ip_laddr_verify_v4(v4src,
1578 zoneid, ipst, B_FALSE);
1580 } else {
1581 if (IN6_IS_ADDR_LINKSCOPE(v6src))
1582 scopeid = pkti->ipi6_ifindex;
1584 laddr_type = ip_laddr_verify_v6(v6src, zoneid,
1585 ipst, B_FALSE, scopeid);
1587 switch (laddr_type) {
1588 case IPVL_UNICAST_UP:
1589 case IPVL_UNICAST_DOWN:
1590 break;
1591 default:
1592 return (EADDRNOTAVAIL);
1594 ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1595 } else if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr)) {
1596 /* Allow any source */
1597 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1599 isv6 = !(IN6_IS_ADDR_V4MAPPED(&pkti->ipi6_addr));
1600 if (!ip_xmit_ifindex_valid(pkti->ipi6_ifindex, zoneid, isv6,
1601 ipst))
1602 return (ENXIO);
1603 break;
1605 case IPV6_HOPLIMIT:
1606 /* It is only allowed as ancilary data */
1607 if (!coa->coa_ancillary)
1608 return (EINVAL);
1610 if (inlen != 0 && inlen != sizeof (int))
1611 return (EINVAL);
1612 if (inlen == sizeof (int)) {
1613 if (*i1 > 255 || *i1 < -1 || *i1 == 0)
1614 return (EINVAL);
1616 break;
1617 case IPV6_TCLASS:
1618 if (inlen != 0 && inlen != sizeof (int))
1619 return (EINVAL);
1620 if (inlen == sizeof (int)) {
1621 if (*i1 > 255 || *i1 < -1)
1622 return (EINVAL);
1624 break;
1625 case IPV6_NEXTHOP:
1626 if (inlen != 0 && inlen != sizeof (sin6_t))
1627 return (EINVAL);
1628 if (inlen == sizeof (sin6_t)) {
1629 sin6_t *sin6 = (sin6_t *)invalp;
1630 ire_t *ire;
1632 if (sin6->sin6_family != AF_INET6)
1633 return (EAFNOSUPPORT);
1634 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
1635 return (EADDRNOTAVAIL);
1637 /* Verify that the next-hop is on-link */
1638 ire = ire_ftable_lookup_v6(&sin6->sin6_addr,
1639 0, 0, IRE_ONLINK, NULL, zoneid,
1640 NULL, MATCH_IRE_TYPE, 0, ipst, NULL);
1641 if (ire == NULL)
1642 return (EHOSTUNREACH);
1643 ire_refrele(ire);
1644 break;
1646 break;
1647 case IPV6_RTHDR:
1648 case IPV6_DSTOPTS:
1649 case IPV6_RTHDRDSTOPTS:
1650 case IPV6_HOPOPTS: {
1651 /* All have the length field in the same place */
1652 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
1654 * Sanity checks - minimum size, size a multiple of
1655 * eight bytes, and matching size passed in.
1657 if (inlen != 0 &&
1658 inlen != (8 * (hopts->ip6h_len + 1)))
1659 return (EINVAL);
1660 break;
1662 case IPV6_PATHMTU:
1663 /* Can't be set */
1664 return (EINVAL);
1666 case IPV6_USE_MIN_MTU:
1667 if (inlen != sizeof (int))
1668 return (EINVAL);
1669 if (*i1 < -1 || *i1 > 1)
1670 return (EINVAL);
1671 break;
1672 case IPV6_SRC_PREFERENCES:
1673 if (inlen != sizeof (uint32_t))
1674 return (EINVAL);
1675 break;
1676 case IPV6_V6ONLY:
1677 if (*i1 < 0 || *i1 > 1) {
1678 return (EINVAL);
1680 break;
1682 if (checkonly)
1683 return (0);
1685 /* Here we set the actual option value */
1687 * conn_lock protects the bitfields, and is used to
1688 * set the fields atomically. Not needed for ixa settings since
1689 * the caller has an exclusive copy of the ixa.
1690 * We can not hold conn_lock across the multicast options though.
1692 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1693 switch (name) {
1694 case IPV6_MULTICAST_IF:
1695 ixa->ixa_multicast_ifindex = ifindex;
1696 /* Need to redo ip_attr_connect */
1697 coa->coa_changed |= COA_ROUTE_CHANGED;
1698 break;
1699 case IPV6_UNICAST_HOPS:
1700 /* -1 means use default */
1701 mutex_enter(&connp->conn_lock);
1702 if (*i1 == -1) {
1703 ipp->ipp_unicast_hops = connp->conn_default_ttl;
1704 } else {
1705 ipp->ipp_unicast_hops = (uint8_t)*i1;
1707 mutex_exit(&connp->conn_lock);
1708 coa->coa_changed |= COA_HEADER_CHANGED;
1709 break;
1710 case IPV6_MULTICAST_HOPS:
1711 /* -1 means use default */
1712 if (*i1 == -1) {
1713 ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1714 } else {
1715 ixa->ixa_multicast_ttl = (uint8_t)*i1;
1717 /* Handled automatically by ip_output */
1718 break;
1719 case IPV6_MULTICAST_LOOP:
1720 if (*i1 != 0)
1721 ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1722 else
1723 ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1724 /* Handled automatically by ip_output */
1725 break;
1726 case IPV6_JOIN_GROUP:
1727 case IPV6_LEAVE_GROUP:
1728 case MCAST_JOIN_GROUP:
1729 case MCAST_LEAVE_GROUP:
1730 return (ip_opt_set_multicast_group(connp, name,
1731 invalp, B_TRUE, checkonly));
1733 case MCAST_BLOCK_SOURCE:
1734 case MCAST_UNBLOCK_SOURCE:
1735 case MCAST_JOIN_SOURCE_GROUP:
1736 case MCAST_LEAVE_SOURCE_GROUP:
1737 return (ip_opt_set_multicast_sources(connp, name,
1738 invalp, B_TRUE, checkonly));
1740 case IPV6_BOUND_IF:
1741 ixa->ixa_ifindex = ifindex; /* Send */
1742 mutex_enter(&connp->conn_lock);
1743 connp->conn_incoming_ifindex = ifindex; /* Receive */
1744 connp->conn_bound_if = ifindex; /* getsockopt */
1745 mutex_exit(&connp->conn_lock);
1746 coa->coa_changed |= COA_ROUTE_CHANGED;
1747 break;
1748 case IPV6_UNSPEC_SRC:
1749 mutex_enter(&connp->conn_lock);
1750 connp->conn_unspec_src = onoff;
1751 if (onoff)
1752 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1753 else
1754 ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1755 mutex_exit(&connp->conn_lock);
1756 break;
1757 case IPV6_RECVPKTINFO:
1758 mutex_enter(&connp->conn_lock);
1759 connp->conn_recv_ancillary.crb_ip_recvpktinfo = onoff;
1760 mutex_exit(&connp->conn_lock);
1761 break;
1762 case IPV6_RECVTCLASS:
1763 mutex_enter(&connp->conn_lock);
1764 connp->conn_recv_ancillary.crb_ipv6_recvtclass = onoff;
1765 mutex_exit(&connp->conn_lock);
1766 break;
1767 case IPV6_RECVPATHMTU:
1768 mutex_enter(&connp->conn_lock);
1769 connp->conn_ipv6_recvpathmtu = onoff;
1770 mutex_exit(&connp->conn_lock);
1771 break;
1772 case IPV6_RECVHOPLIMIT:
1773 mutex_enter(&connp->conn_lock);
1774 connp->conn_recv_ancillary.crb_ipv6_recvhoplimit =
1775 onoff;
1776 mutex_exit(&connp->conn_lock);
1777 break;
1778 case IPV6_RECVHOPOPTS:
1779 mutex_enter(&connp->conn_lock);
1780 connp->conn_recv_ancillary.crb_ipv6_recvhopopts = onoff;
1781 mutex_exit(&connp->conn_lock);
1782 break;
1783 case IPV6_RECVDSTOPTS:
1784 mutex_enter(&connp->conn_lock);
1785 connp->conn_recv_ancillary.crb_ipv6_recvdstopts = onoff;
1786 mutex_exit(&connp->conn_lock);
1787 break;
1788 case _OLD_IPV6_RECVDSTOPTS:
1789 mutex_enter(&connp->conn_lock);
1790 connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts =
1791 onoff;
1792 mutex_exit(&connp->conn_lock);
1793 break;
1794 case IPV6_RECVRTHDRDSTOPTS:
1795 mutex_enter(&connp->conn_lock);
1796 connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts =
1797 onoff;
1798 mutex_exit(&connp->conn_lock);
1799 break;
1800 case IPV6_RECVRTHDR:
1801 mutex_enter(&connp->conn_lock);
1802 connp->conn_recv_ancillary.crb_ipv6_recvrthdr = onoff;
1803 mutex_exit(&connp->conn_lock);
1804 break;
1805 case IPV6_PKTINFO:
1806 mutex_enter(&connp->conn_lock);
1807 if (inlen == 0) {
1808 ipp->ipp_fields &= ~IPPF_ADDR;
1809 ipp->ipp_addr = ipv6_all_zeros;
1810 ixa->ixa_ifindex = 0;
1811 } else {
1812 struct in6_pktinfo *pkti;
1814 pkti = (struct in6_pktinfo *)invalp;
1815 ipp->ipp_addr = pkti->ipi6_addr;
1816 if (!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr))
1817 ipp->ipp_fields |= IPPF_ADDR;
1818 else
1819 ipp->ipp_fields &= ~IPPF_ADDR;
1820 ixa->ixa_ifindex = pkti->ipi6_ifindex;
1822 mutex_exit(&connp->conn_lock);
1823 /* Source and ifindex might have changed */
1824 coa->coa_changed |= COA_HEADER_CHANGED;
1825 coa->coa_changed |= COA_ROUTE_CHANGED;
1826 break;
1827 case IPV6_HOPLIMIT:
1828 mutex_enter(&connp->conn_lock);
1829 if (inlen == 0 || *i1 == -1) {
1830 /* Revert to default */
1831 ipp->ipp_fields &= ~IPPF_HOPLIMIT;
1832 ixa->ixa_flags &= ~IXAF_NO_TTL_CHANGE;
1833 } else {
1834 ipp->ipp_hoplimit = *i1;
1835 ipp->ipp_fields |= IPPF_HOPLIMIT;
1836 /* Ensure that it sticks for multicast packets */
1837 ixa->ixa_flags |= IXAF_NO_TTL_CHANGE;
1839 mutex_exit(&connp->conn_lock);
1840 coa->coa_changed |= COA_HEADER_CHANGED;
1841 break;
1842 case IPV6_TCLASS:
1844 * IPV6_TCLASS accepts -1 as use kernel default
1845 * and [0, 255] as the actualy traffic class.
1847 mutex_enter(&connp->conn_lock);
1848 if (inlen == 0 || *i1 == -1) {
1849 ipp->ipp_tclass = 0;
1850 ipp->ipp_fields &= ~IPPF_TCLASS;
1851 } else {
1852 ipp->ipp_tclass = *i1;
1853 ipp->ipp_fields |= IPPF_TCLASS;
1855 mutex_exit(&connp->conn_lock);
1856 coa->coa_changed |= COA_HEADER_CHANGED;
1857 break;
1858 case IPV6_NEXTHOP:
1859 if (inlen == 0) {
1860 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1861 } else {
1862 sin6_t *sin6 = (sin6_t *)invalp;
1864 ixa->ixa_nexthop_v6 = sin6->sin6_addr;
1865 if (!IN6_IS_ADDR_UNSPECIFIED(&ixa->ixa_nexthop_v6))
1866 ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1867 else
1868 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1870 coa->coa_changed |= COA_ROUTE_CHANGED;
1871 break;
1872 case IPV6_HOPOPTS:
1873 mutex_enter(&connp->conn_lock);
1874 error = optcom_pkt_set(invalp, inlen,
1875 (uchar_t **)&ipp->ipp_hopopts, &ipp->ipp_hopoptslen);
1876 if (error != 0) {
1877 mutex_exit(&connp->conn_lock);
1878 return (error);
1880 if (ipp->ipp_hopoptslen == 0) {
1881 ipp->ipp_fields &= ~IPPF_HOPOPTS;
1882 } else {
1883 ipp->ipp_fields |= IPPF_HOPOPTS;
1885 mutex_exit(&connp->conn_lock);
1886 coa->coa_changed |= COA_HEADER_CHANGED;
1887 coa->coa_changed |= COA_WROFF_CHANGED;
1888 break;
1889 case IPV6_RTHDRDSTOPTS:
1890 mutex_enter(&connp->conn_lock);
1891 error = optcom_pkt_set(invalp, inlen,
1892 (uchar_t **)&ipp->ipp_rthdrdstopts,
1893 &ipp->ipp_rthdrdstoptslen);
1894 if (error != 0) {
1895 mutex_exit(&connp->conn_lock);
1896 return (error);
1898 if (ipp->ipp_rthdrdstoptslen == 0) {
1899 ipp->ipp_fields &= ~IPPF_RTHDRDSTOPTS;
1900 } else {
1901 ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
1903 mutex_exit(&connp->conn_lock);
1904 coa->coa_changed |= COA_HEADER_CHANGED;
1905 coa->coa_changed |= COA_WROFF_CHANGED;
1906 break;
1907 case IPV6_DSTOPTS:
1908 mutex_enter(&connp->conn_lock);
1909 error = optcom_pkt_set(invalp, inlen,
1910 (uchar_t **)&ipp->ipp_dstopts, &ipp->ipp_dstoptslen);
1911 if (error != 0) {
1912 mutex_exit(&connp->conn_lock);
1913 return (error);
1915 if (ipp->ipp_dstoptslen == 0) {
1916 ipp->ipp_fields &= ~IPPF_DSTOPTS;
1917 } else {
1918 ipp->ipp_fields |= IPPF_DSTOPTS;
1920 mutex_exit(&connp->conn_lock);
1921 coa->coa_changed |= COA_HEADER_CHANGED;
1922 coa->coa_changed |= COA_WROFF_CHANGED;
1923 break;
1924 case IPV6_RTHDR:
1925 mutex_enter(&connp->conn_lock);
1926 error = optcom_pkt_set(invalp, inlen,
1927 (uchar_t **)&ipp->ipp_rthdr, &ipp->ipp_rthdrlen);
1928 if (error != 0) {
1929 mutex_exit(&connp->conn_lock);
1930 return (error);
1932 if (ipp->ipp_rthdrlen == 0) {
1933 ipp->ipp_fields &= ~IPPF_RTHDR;
1934 } else {
1935 ipp->ipp_fields |= IPPF_RTHDR;
1937 mutex_exit(&connp->conn_lock);
1938 coa->coa_changed |= COA_HEADER_CHANGED;
1939 coa->coa_changed |= COA_WROFF_CHANGED;
1940 break;
1942 case IPV6_DONTFRAG:
1943 if (onoff) {
1944 ixa->ixa_flags |= IXAF_DONTFRAG;
1945 ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1946 } else {
1947 ixa->ixa_flags &= ~IXAF_DONTFRAG;
1948 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1950 /* Need to redo ip_attr_connect */
1951 coa->coa_changed |= COA_ROUTE_CHANGED;
1952 break;
1954 case IPV6_USE_MIN_MTU:
1955 ixa->ixa_flags |= IXAF_USE_MIN_MTU;
1956 ixa->ixa_use_min_mtu = *i1;
1957 /* Need to redo ip_attr_connect */
1958 coa->coa_changed |= COA_ROUTE_CHANGED;
1959 break;
1961 case IPV6_SEC_OPT:
1962 mutex_enter(&connp->conn_lock);
1963 error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1964 mutex_exit(&connp->conn_lock);
1965 if (error != 0) {
1966 return (error);
1968 /* This is an IPsec policy change - redo ip_attr_connect */
1969 coa->coa_changed |= COA_ROUTE_CHANGED;
1970 break;
1971 case IPV6_SRC_PREFERENCES:
1973 * This socket option only affects connected
1974 * sockets that haven't already bound to a specific
1975 * IPv6 address. In other words, sockets that
1976 * don't call bind() with an address other than the
1977 * unspecified address and that call connect().
1978 * ip_set_destination_v6() passes these preferences
1979 * to the ipif_select_source_v6() function.
1981 mutex_enter(&connp->conn_lock);
1982 error = ip6_set_src_preferences(ixa, *(uint32_t *)invalp);
1983 mutex_exit(&connp->conn_lock);
1984 if (error != 0) {
1985 return (error);
1987 break;
1988 case IPV6_V6ONLY:
1989 mutex_enter(&connp->conn_lock);
1990 connp->conn_ipv6_v6only = onoff;
1991 mutex_exit(&connp->conn_lock);
1992 break;
1994 return (0);
1997 /* Handle IPPROTO_UDP */
1998 /* ARGSUSED1 */
1999 static int
2000 conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2001 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2003 conn_t *connp = coa->coa_connp;
2004 int *i1 = (int *)invalp;
2005 boolean_t onoff = (*i1 == 0) ? 0 : 1;
2006 int error;
2008 switch (name) {
2009 case UDP_ANONPRIVBIND:
2010 if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_UDP)) != 0) {
2011 return (error);
2013 break;
2015 if (checkonly)
2016 return (0);
2018 /* Here we set the actual option value */
2019 mutex_enter(&connp->conn_lock);
2020 switch (name) {
2021 case UDP_ANONPRIVBIND:
2022 connp->conn_anon_priv_bind = onoff;
2023 break;
2024 case UDP_EXCLBIND:
2025 connp->conn_exclbind = onoff;
2026 break;
2028 mutex_exit(&connp->conn_lock);
2029 return (0);
2032 /* Handle IPPROTO_TCP */
2033 /* ARGSUSED1 */
2034 static int
2035 conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
2036 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
2038 conn_t *connp = coa->coa_connp;
2039 int *i1 = (int *)invalp;
2040 boolean_t onoff = (*i1 == 0) ? 0 : 1;
2041 int error;
2043 switch (name) {
2044 case TCP_ANONPRIVBIND:
2045 if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_TCP)) != 0) {
2046 return (error);
2048 break;
2050 if (checkonly)
2051 return (0);
2053 /* Here we set the actual option value */
2054 mutex_enter(&connp->conn_lock);
2055 switch (name) {
2056 case TCP_ANONPRIVBIND:
2057 connp->conn_anon_priv_bind = onoff;
2058 break;
2059 case TCP_EXCLBIND:
2060 connp->conn_exclbind = onoff;
2061 break;
2062 case TCP_RECVDSTADDR:
2063 connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
2064 break;
2066 mutex_exit(&connp->conn_lock);
2067 return (0);
2071 conn_getsockname(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2073 sin_t *sin;
2074 sin6_t *sin6;
2076 if (connp->conn_family == AF_INET) {
2077 if (*salenp < sizeof (sin_t))
2078 return (EINVAL);
2080 *salenp = sizeof (sin_t);
2081 /* Fill zeroes and then initialize non-zero fields */
2082 sin = (sin_t *)sa;
2083 *sin = sin_null;
2084 sin->sin_family = AF_INET;
2085 if (!IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_saddr_v6) &&
2086 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2087 sin->sin_addr.s_addr = connp->conn_saddr_v4;
2088 } else {
2090 * INADDR_ANY
2091 * conn_saddr is not set, we might be bound to
2092 * broadcast/multicast. Use conn_bound_addr as
2093 * local address instead (that could
2094 * also still be INADDR_ANY)
2096 sin->sin_addr.s_addr = connp->conn_bound_addr_v4;
2098 sin->sin_port = connp->conn_lport;
2099 } else {
2100 if (*salenp < sizeof (sin6_t))
2101 return (EINVAL);
2103 *salenp = sizeof (sin6_t);
2104 /* Fill zeroes and then initialize non-zero fields */
2105 sin6 = (sin6_t *)sa;
2106 *sin6 = sin6_null;
2107 sin6->sin6_family = AF_INET6;
2108 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2109 sin6->sin6_addr = connp->conn_saddr_v6;
2110 } else {
2112 * conn_saddr is not set, we might be bound to
2113 * broadcast/multicast. Use conn_bound_addr as
2114 * local address instead (which could
2115 * also still be unspecified)
2117 sin6->sin6_addr = connp->conn_bound_addr_v6;
2119 sin6->sin6_port = connp->conn_lport;
2120 if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2121 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2122 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2124 return (0);
2128 conn_getpeername(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2130 struct sockaddr_in *sin;
2131 struct sockaddr_in6 *sin6;
2133 if (connp->conn_family == AF_INET) {
2134 if (*salenp < sizeof (sin_t))
2135 return (EINVAL);
2137 *salenp = sizeof (sin_t);
2138 /* initialize */
2139 sin = (sin_t *)sa;
2140 *sin = sin_null;
2141 sin->sin_family = AF_INET;
2142 sin->sin_addr.s_addr = connp->conn_faddr_v4;
2143 sin->sin_port = connp->conn_fport;
2144 } else {
2145 if (*salenp < sizeof (sin6_t))
2146 return (EINVAL);
2148 *salenp = sizeof (sin6_t);
2149 /* initialize */
2150 sin6 = (sin6_t *)sa;
2151 *sin6 = sin6_null;
2152 sin6->sin6_family = AF_INET6;
2153 sin6->sin6_addr = connp->conn_faddr_v6;
2154 sin6->sin6_port = connp->conn_fport;
2155 sin6->sin6_flowinfo = connp->conn_flowinfo;
2156 if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2157 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2158 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2160 return (0);
2163 static uint32_t cksum_massage_options_v4(ipha_t *, netstack_t *);
2164 static uint32_t cksum_massage_options_v6(ip6_t *, uint_t, netstack_t *);
2167 * Allocate and fill in conn_ht_iphc based on the current information
2168 * in the conn.
2169 * Normally used when we bind() and connect().
2170 * Returns failure if can't allocate memory, or if there is a problem
2171 * with a routing header/option.
2173 * We allocate space for the transport header (ulp_hdr_len + extra) and
2174 * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2175 * The extra is there for transports that want some spare room for future
2176 * options. conn_ht_iphc_allocated is what was allocated; conn_ht_iphc_len
2177 * excludes the extra part.
2179 * We massage an routing option/header and store the ckecksum difference
2180 * in conn_sum.
2182 * Caller needs to update conn_wroff if desired.
2185 conn_build_hdr_template(conn_t *connp, uint_t ulp_hdr_length, uint_t extra,
2186 const in6_addr_t *v6src, const in6_addr_t *v6dst, uint32_t flowinfo)
2188 ip_xmit_attr_t *ixa = connp->conn_ixa;
2189 ip_pkt_t *ipp = &connp->conn_xmit_ipp;
2190 uint_t ip_hdr_length;
2191 uchar_t *hdrs;
2192 uint_t hdrs_len;
2194 ASSERT(MUTEX_HELD(&connp->conn_lock));
2196 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2197 ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2198 /* In case of TX label and IP options it can be too much */
2199 if (ip_hdr_length > IP_MAX_HDR_LENGTH) {
2200 /* Preserves existing TX errno for this */
2201 return (EHOSTUNREACH);
2203 } else {
2204 ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2206 ixa->ixa_ip_hdr_length = ip_hdr_length;
2207 hdrs_len = ip_hdr_length + ulp_hdr_length + extra;
2208 ASSERT(hdrs_len != 0);
2210 if (hdrs_len != connp->conn_ht_iphc_allocated) {
2211 /* Allocate new before we free any old */
2212 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
2213 if (hdrs == NULL)
2214 return (ENOMEM);
2216 if (connp->conn_ht_iphc != NULL) {
2217 kmem_free(connp->conn_ht_iphc,
2218 connp->conn_ht_iphc_allocated);
2220 connp->conn_ht_iphc = hdrs;
2221 connp->conn_ht_iphc_allocated = hdrs_len;
2222 } else {
2223 hdrs = connp->conn_ht_iphc;
2225 hdrs_len -= extra;
2226 connp->conn_ht_iphc_len = hdrs_len;
2228 connp->conn_ht_ulp = hdrs + ip_hdr_length;
2229 connp->conn_ht_ulp_len = ulp_hdr_length;
2231 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2232 ipha_t *ipha = (ipha_t *)hdrs;
2234 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2235 IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2236 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, connp->conn_proto);
2237 ipha->ipha_length = htons(hdrs_len);
2238 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2239 ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2240 else
2241 ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2243 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2244 connp->conn_sum = cksum_massage_options_v4(ipha,
2245 connp->conn_netstack);
2246 } else {
2247 connp->conn_sum = 0;
2249 } else {
2250 ip6_t *ip6h = (ip6_t *)hdrs;
2252 ip6h->ip6_src = *v6src;
2253 ip6h->ip6_dst = *v6dst;
2254 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, connp->conn_proto,
2255 flowinfo);
2256 ip6h->ip6_plen = htons(hdrs_len - IPV6_HDR_LEN);
2258 if (ipp->ipp_fields & IPPF_RTHDR) {
2259 connp->conn_sum = cksum_massage_options_v6(ip6h,
2260 ip_hdr_length, connp->conn_netstack);
2263 * Verify that the first hop isn't a mapped address.
2264 * Routers along the path need to do this verification
2265 * for subsequent hops.
2267 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst))
2268 return (EADDRNOTAVAIL);
2270 } else {
2271 connp->conn_sum = 0;
2274 return (0);
2278 * Prepend a header template to data_mp based on the ip_pkt_t
2279 * and the passed in source, destination and protocol.
2281 * Returns failure if can't allocate memory, in which case data_mp is freed.
2282 * We allocate space for the transport header (ulp_hdr_len) and
2283 * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2285 * We massage an routing option/header and return the ckecksum difference
2286 * in *sump. This is in host byte order.
2288 * Caller needs to update conn_wroff if desired.
2290 mblk_t *
2291 conn_prepend_hdr(ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
2292 const in6_addr_t *v6src, const in6_addr_t *v6dst,
2293 uint8_t protocol, uint32_t flowinfo, uint_t ulp_hdr_length, mblk_t *data_mp,
2294 uint_t data_length, uint_t wroff_extra, uint32_t *sump, int *errorp)
2296 uint_t ip_hdr_length;
2297 uchar_t *hdrs;
2298 uint_t hdrs_len;
2299 mblk_t *mp;
2301 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2302 ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2303 ASSERT(ip_hdr_length <= IP_MAX_HDR_LENGTH);
2304 } else {
2305 ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2307 hdrs_len = ip_hdr_length + ulp_hdr_length;
2308 ASSERT(hdrs_len != 0);
2310 ixa->ixa_ip_hdr_length = ip_hdr_length;
2312 /* Can we prepend to data_mp? */
2313 if (data_mp != NULL &&
2314 data_mp->b_rptr - data_mp->b_datap->db_base >= hdrs_len &&
2315 data_mp->b_datap->db_ref == 1) {
2316 hdrs = data_mp->b_rptr - hdrs_len;
2317 data_mp->b_rptr = hdrs;
2318 mp = data_mp;
2319 } else {
2320 mp = allocb(hdrs_len + wroff_extra, BPRI_MED);
2321 if (mp == NULL) {
2322 freemsg(data_mp);
2323 *errorp = ENOMEM;
2324 return (NULL);
2326 mp->b_wptr = mp->b_datap->db_lim;
2327 hdrs = mp->b_rptr = mp->b_wptr - hdrs_len;
2328 mp->b_cont = data_mp;
2332 * Set the source in the header. ip_build_hdrs_v4/v6 will overwrite it
2333 * if PKTINFO (aka IPPF_ADDR) was set.
2335 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2336 ipha_t *ipha = (ipha_t *)hdrs;
2338 ASSERT(IN6_IS_ADDR_V4MAPPED(v6dst));
2339 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2340 IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2341 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, protocol);
2342 ipha->ipha_length = htons(hdrs_len + data_length);
2343 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2344 ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2345 else
2346 ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2348 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2349 *sump = cksum_massage_options_v4(ipha,
2350 ixa->ixa_ipst->ips_netstack);
2351 } else {
2352 *sump = 0;
2354 } else {
2355 ip6_t *ip6h = (ip6_t *)hdrs;
2357 ip6h->ip6_src = *v6src;
2358 ip6h->ip6_dst = *v6dst;
2359 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, protocol, flowinfo);
2360 ip6h->ip6_plen = htons(hdrs_len + data_length - IPV6_HDR_LEN);
2362 if (ipp->ipp_fields & IPPF_RTHDR) {
2363 *sump = cksum_massage_options_v6(ip6h,
2364 ip_hdr_length, ixa->ixa_ipst->ips_netstack);
2367 * Verify that the first hop isn't a mapped address.
2368 * Routers along the path need to do this verification
2369 * for subsequent hops.
2371 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
2372 *errorp = EADDRNOTAVAIL;
2373 freemsg(mp);
2374 return (NULL);
2376 } else {
2377 *sump = 0;
2380 return (mp);
2384 * Massage a source route if any putting the first hop
2385 * in ipha_dst. Compute a starting value for the checksum which
2386 * takes into account that the original ipha_dst should be
2387 * included in the checksum but that IP will include the
2388 * first hop from the source route in the tcp checksum.
2390 static uint32_t
2391 cksum_massage_options_v4(ipha_t *ipha, netstack_t *ns)
2393 in_addr_t dst;
2394 uint32_t cksum;
2396 /* Get last hop then diff against first hop */
2397 cksum = ip_massage_options(ipha, ns);
2398 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2399 dst = ipha->ipha_dst;
2400 cksum -= ((dst >> 16) + (dst & 0xffff));
2401 if ((int)cksum < 0)
2402 cksum--;
2403 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2404 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2405 ASSERT(cksum < 0x10000);
2406 return (ntohs(cksum));
2409 static uint32_t
2410 cksum_massage_options_v6(ip6_t *ip6h, uint_t ip_hdr_len, netstack_t *ns)
2412 uint8_t *end;
2413 ip6_rthdr_t *rth;
2414 uint32_t cksum;
2416 end = (uint8_t *)ip6h + ip_hdr_len;
2417 rth = ip_find_rthdr_v6(ip6h, end);
2418 if (rth == NULL)
2419 return (0);
2421 cksum = ip_massage_options_v6(ip6h, rth, ns);
2422 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2423 ASSERT(cksum < 0x10000);
2424 return (ntohs(cksum));
2428 * ULPs that change the destination address need to call this for each
2429 * change to discard any state about a previous destination that might
2430 * have been multicast or multirt.
2432 void
2433 ip_attr_newdst(ip_xmit_attr_t *ixa)
2435 ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM |
2436 IXAF_NO_TTL_CHANGE | IXAF_IPV6_ADD_FRAGHDR |
2437 IXAF_NO_LOOP_ZONEID_SET);
2441 * Determine the nexthop which will be used.
2442 * Normally this is just the destination, but if a IPv4 source route, or
2443 * IPv6 routing header, is in the ip_pkt_t then we extract the nexthop from
2444 * there.
2446 void
2447 ip_attr_nexthop(const ip_pkt_t *ipp, const ip_xmit_attr_t *ixa,
2448 const in6_addr_t *dst, in6_addr_t *nexthop)
2450 if (!(ipp->ipp_fields & (IPPF_IPV4_OPTIONS|IPPF_RTHDR))) {
2451 *nexthop = *dst;
2452 return;
2454 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2455 ipaddr_t v4dst;
2456 ipaddr_t v4nexthop;
2458 IN6_V4MAPPED_TO_IPADDR(dst, v4dst);
2459 v4nexthop = ip_pkt_source_route_v4(ipp);
2460 if (v4nexthop == INADDR_ANY)
2461 v4nexthop = v4dst;
2463 IN6_IPADDR_TO_V4MAPPED(v4nexthop, nexthop);
2464 } else {
2465 const in6_addr_t *v6nexthop;
2467 v6nexthop = ip_pkt_source_route_v6(ipp);
2468 if (v6nexthop == NULL)
2469 v6nexthop = dst;
2471 *nexthop = *v6nexthop;
2476 * Update the ip_xmit_attr_t based the addresses, conn_xmit_ipp and conn_ixa.
2477 * If IPDF_IPSEC is set we cache the IPsec policy to handle the unconnected
2478 * case (connected latching is done in conn_connect).
2479 * Note that IPsec policy lookup requires conn_proto and conn_laddr to be
2480 * set, but doesn't otherwise use the conn_t.
2482 * Caller must set/clear IXAF_IS_IPV4 as appropriately.
2483 * Caller must use ip_attr_nexthop() to determine the nexthop argument.
2485 * The caller must NOT hold conn_lock (to avoid problems with ill_refrele
2486 * causing the squeue to run doing ipcl_walk grabbing conn_lock.)
2488 * Updates laddrp and uinfo if they are non-NULL.
2490 * TSOL notes: The callers if ip_attr_connect must check if the destination
2491 * is different than before and in that case redo conn_update_label.
2492 * The callers of conn_connect do not need that since conn_connect
2493 * performs the conn_update_label.
2496 ip_attr_connect(const conn_t *connp, ip_xmit_attr_t *ixa,
2497 const in6_addr_t *v6src, const in6_addr_t *v6dst,
2498 const in6_addr_t *v6nexthop, in_port_t dstport, in6_addr_t *laddrp,
2499 iulp_t *uinfo, uint32_t flags)
2501 in6_addr_t laddr = *v6src;
2502 int error;
2504 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
2506 if (connp->conn_zone_is_global)
2507 flags |= IPDF_ZONE_IS_GLOBAL;
2508 else
2509 flags &= ~IPDF_ZONE_IS_GLOBAL;
2512 * Lookup the route to determine a source address and the uinfo.
2513 * If the ULP has a source route option then the caller will
2514 * have set v6nexthop to be the first hop.
2516 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2517 ipaddr_t v4dst;
2518 ipaddr_t v4src, v4nexthop;
2520 IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2521 IN6_V4MAPPED_TO_IPADDR(v6nexthop, v4nexthop);
2522 IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
2524 if (connp->conn_unspec_src || v4src != INADDR_ANY)
2525 flags &= ~IPDF_SELECT_SRC;
2526 else
2527 flags |= IPDF_SELECT_SRC;
2529 error = ip_set_destination_v4(&v4src, v4dst, v4nexthop, ixa,
2530 uinfo, flags, connp->conn_mac_mode);
2531 IN6_IPADDR_TO_V4MAPPED(v4src, &laddr);
2532 } else {
2533 if (connp->conn_unspec_src || !IN6_IS_ADDR_UNSPECIFIED(v6src))
2534 flags &= ~IPDF_SELECT_SRC;
2535 else
2536 flags |= IPDF_SELECT_SRC;
2538 error = ip_set_destination_v6(&laddr, v6dst, v6nexthop, ixa,
2539 uinfo, flags, connp->conn_mac_mode);
2541 /* Pass out some address even if we hit a RTF_REJECT etc */
2542 if (laddrp != NULL)
2543 *laddrp = laddr;
2545 if (error != 0)
2546 return (error);
2548 if (flags & IPDF_IPSEC) {
2550 * Set any IPsec policy in ixa. Routine also looks at ULP
2551 * ports.
2553 ipsec_cache_outbound_policy(connp, v6src, v6dst, dstport, ixa);
2555 return (0);
2559 * Connect the conn based on the addresses, conn_xmit_ipp and conn_ixa.
2560 * Assumes that conn_faddr and conn_fport are already set. As such it is not
2561 * usable for SCTP, since SCTP has multiple faddrs.
2563 * Caller must hold conn_lock to provide atomic constency between the
2564 * conn_t's addresses and the ixa.
2565 * NOTE: this function drops and reaquires conn_lock since it can't be
2566 * held across ip_attr_connect/ip_set_destination.
2568 * The caller needs to handle inserting in the receive-side fanout when
2569 * appropriate after conn_connect returns.
2572 conn_connect(conn_t *connp, iulp_t *uinfo, uint32_t flags)
2574 ip_xmit_attr_t *ixa = connp->conn_ixa;
2575 in6_addr_t nexthop;
2576 in6_addr_t saddr, faddr;
2577 in_port_t fport;
2578 int error;
2580 ASSERT(MUTEX_HELD(&connp->conn_lock));
2582 if (connp->conn_ipversion == IPV4_VERSION)
2583 ixa->ixa_flags |= IXAF_IS_IPV4;
2584 else
2585 ixa->ixa_flags &= ~IXAF_IS_IPV4;
2587 /* We do IPsec latching below - hence no caching in ip_attr_connect */
2588 flags &= ~IPDF_IPSEC;
2590 /* In case we had previously done an ip_attr_connect */
2591 ip_attr_newdst(ixa);
2594 * Determine the nexthop and copy the addresses before dropping
2595 * conn_lock.
2597 ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
2598 &connp->conn_faddr_v6, &nexthop);
2599 saddr = connp->conn_saddr_v6;
2600 faddr = connp->conn_faddr_v6;
2601 fport = connp->conn_fport;
2603 mutex_exit(&connp->conn_lock);
2604 error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, fport,
2605 &saddr, uinfo, flags | IPDF_VERIFY_DST);
2606 mutex_enter(&connp->conn_lock);
2608 /* Could have changed even if an error */
2609 connp->conn_saddr_v6 = saddr;
2610 if (error != 0)
2611 return (error);
2614 * Check whether Trusted Solaris policy allows communication with this
2615 * host, and pretend that the destination is unreachable if not.
2616 * Compute any needed label and place it in ipp_label_v4/v6.
2618 * Later conn_build_hdr_template() takes ipp_label_v4/v6 to form
2619 * the packet.
2621 * TSOL Note: Any concurrent threads would pick a different ixa
2622 * (and ipp if they are to change the ipp) so we
2623 * don't have to worry about concurrent threads.
2625 if (is_system_labeled()) {
2626 if (connp->conn_mlp_type != mlptSingle)
2627 return (ECONNREFUSED);
2630 * conn_update_label will set ipp_label* which will later
2631 * be used by conn_build_hdr_template.
2633 error = conn_update_label(connp, ixa,
2634 &connp->conn_faddr_v6, &connp->conn_xmit_ipp);
2635 if (error != 0)
2636 return (error);
2640 * Ensure that we match on the selected local address.
2641 * This overrides conn_laddr in the case we had earlier bound to a
2642 * multicast or broadcast address.
2644 connp->conn_laddr_v6 = connp->conn_saddr_v6;
2647 * Allow setting new policies.
2648 * The addresses/ports are already set, thus the IPsec policy calls
2649 * can handle their passed-in conn's.
2651 connp->conn_policy_cached = B_FALSE;
2654 * Cache IPsec policy in this conn. If we have per-socket policy,
2655 * we'll cache that. If we don't, we'll inherit global policy.
2657 * This is done before the caller inserts in the receive-side fanout.
2658 * Note that conn_policy_cached is set by ipsec_conn_cache_policy() even
2659 * for connections where we don't have a policy. This is to prevent
2660 * global policy lookups in the inbound path.
2662 * If we insert before we set conn_policy_cached,
2663 * CONN_INBOUND_POLICY_PRESENT() check can still evaluate true
2664 * because global policy cound be non-empty. We normally call
2665 * ipsec_check_policy() for conn_policy_cached connections only if
2666 * conn_in_enforce_policy is set. But in this case,
2667 * conn_policy_cached can get set anytime since we made the
2668 * CONN_INBOUND_POLICY_PRESENT() check and ipsec_check_policy() is
2669 * called, which will make the above assumption false. Thus, we
2670 * need to insert after we set conn_policy_cached.
2672 error = ipsec_conn_cache_policy(connp,
2673 connp->conn_ipversion == IPV4_VERSION);
2674 if (error != 0)
2675 return (error);
2678 * We defer to do LSO check until here since now we have better idea
2679 * whether IPsec is present. If the underlying ill is LSO capable,
2680 * copy its capability in so the ULP can decide whether to enable LSO
2681 * on this connection. So far, only TCP/IPv4 is implemented, so won't
2682 * claim LSO for IPv6.
2684 * Currently, won't enable LSO for IRE_LOOPBACK or IRE_LOCAL, because
2685 * the receiver can not handle it. Also not to enable LSO for MULTIRT.
2687 ixa->ixa_flags &= ~IXAF_LSO_CAPAB;
2689 ASSERT(ixa->ixa_ire != NULL);
2690 if (ixa->ixa_ipst->ips_ip_lso_outbound && (flags & IPDF_LSO) &&
2691 !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2692 !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2693 !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2694 (ixa->ixa_nce != NULL) &&
2695 ((ixa->ixa_flags & IXAF_IS_IPV4) ?
2696 ILL_LSO_TCP_IPV4_USABLE(ixa->ixa_nce->nce_ill) :
2697 ILL_LSO_TCP_IPV6_USABLE(ixa->ixa_nce->nce_ill))) {
2698 ixa->ixa_lso_capab = *ixa->ixa_nce->nce_ill->ill_lso_capab;
2699 ixa->ixa_flags |= IXAF_LSO_CAPAB;
2702 /* Check whether ZEROCOPY capability is usable for this connection. */
2703 ixa->ixa_flags &= ~IXAF_ZCOPY_CAPAB;
2705 if ((flags & IPDF_ZCOPY) &&
2706 !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2707 !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2708 !(ixa->ixa_ire->ire_flags & RTF_MULTIRT) &&
2709 (ixa->ixa_nce != NULL) &&
2710 ILL_ZCOPY_USABLE(ixa->ixa_nce->nce_ill)) {
2711 ixa->ixa_flags |= IXAF_ZCOPY_CAPAB;
2713 return (0);
2717 * Predicates to check if the addresses match conn_last*
2721 * Compare the conn against an address.
2722 * If using mapped addresses on AF_INET6 sockets, use the _v6 function
2724 boolean_t
2725 conn_same_as_last_v4(conn_t *connp, sin_t *sin)
2727 ASSERT(connp->conn_family == AF_INET);
2728 return (sin->sin_addr.s_addr == connp->conn_v4lastdst &&
2729 sin->sin_port == connp->conn_lastdstport);
2733 * Compare, including for mapped addresses
2735 boolean_t
2736 conn_same_as_last_v6(conn_t *connp, sin6_t *sin6)
2738 return (IN6_ARE_ADDR_EQUAL(&connp->conn_v6lastdst, &sin6->sin6_addr) &&
2739 sin6->sin6_port == connp->conn_lastdstport &&
2740 sin6->sin6_flowinfo == connp->conn_lastflowinfo &&
2741 sin6->sin6_scope_id == connp->conn_lastscopeid);
2745 * Compute a label and place it in the ip_packet_t.
2746 * Handles IPv4 and IPv6.
2747 * The caller should have a correct ixa_tsl and ixa_zoneid and have
2748 * already called conn_connect or ip_attr_connect to ensure that tsol_check_dest
2749 * has been called.
2752 conn_update_label(const conn_t *connp, const ip_xmit_attr_t *ixa,
2753 const in6_addr_t *v6dst, ip_pkt_t *ipp)
2755 int err;
2756 ipaddr_t v4dst;
2758 if (IN6_IS_ADDR_V4MAPPED(v6dst)) {
2759 uchar_t opt_storage[IP_MAX_OPT_LENGTH];
2761 IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2763 err = tsol_compute_label_v4(ixa->ixa_tsl, ixa->ixa_zoneid,
2764 v4dst, opt_storage, ixa->ixa_ipst);
2765 if (err == 0) {
2766 /* Length contained in opt_storage[IPOPT_OLEN] */
2767 err = optcom_pkt_set(opt_storage,
2768 opt_storage[IPOPT_OLEN],
2769 (uchar_t **)&ipp->ipp_label_v4,
2770 &ipp->ipp_label_len_v4);
2772 if (err != 0) {
2773 DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2774 char *, "conn(1) failed to update options(2) "
2775 "on ixa(3)",
2776 conn_t *, connp, char *, opt_storage,
2777 ip_xmit_attr_t *, ixa);
2779 if (ipp->ipp_label_len_v4 != 0)
2780 ipp->ipp_fields |= IPPF_LABEL_V4;
2781 else
2782 ipp->ipp_fields &= ~IPPF_LABEL_V4;
2783 } else {
2784 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION];
2785 uint_t optlen;
2787 err = tsol_compute_label_v6(ixa->ixa_tsl, ixa->ixa_zoneid,
2788 v6dst, opt_storage, ixa->ixa_ipst);
2789 if (err == 0) {
2791 * Note that ipp_label_v6 is just the option - not
2792 * the hopopts extension header.
2794 * Length contained in opt_storage[IPOPT_OLEN], but
2795 * that doesn't include the two byte options header.
2797 optlen = opt_storage[IPOPT_OLEN];
2798 if (optlen != 0)
2799 optlen += 2;
2801 err = optcom_pkt_set(opt_storage, optlen,
2802 (uchar_t **)&ipp->ipp_label_v6,
2803 &ipp->ipp_label_len_v6);
2805 if (err != 0) {
2806 DTRACE_PROBE4(tx__ip__log__info__updatelabel,
2807 char *, "conn(1) failed to update options(2) "
2808 "on ixa(3)",
2809 conn_t *, connp, char *, opt_storage,
2810 ip_xmit_attr_t *, ixa);
2812 if (ipp->ipp_label_len_v6 != 0)
2813 ipp->ipp_fields |= IPPF_LABEL_V6;
2814 else
2815 ipp->ipp_fields &= ~IPPF_LABEL_V6;
2817 return (err);
2821 * Inherit all options settings from the parent/listener to the eager.
2822 * Returns zero on success; ENOMEM if memory allocation failed.
2824 * We assume that the eager has not had any work done i.e., the conn_ixa
2825 * and conn_xmit_ipp are all zero.
2826 * Furthermore we assume that no other thread can access the eager (because
2827 * it isn't inserted in any fanout list).
2830 conn_inherit_parent(conn_t *lconnp, conn_t *econnp)
2832 cred_t *credp;
2833 int err;
2834 void *notify_cookie;
2835 uint32_t xmit_hint;
2837 econnp->conn_family = lconnp->conn_family;
2838 econnp->conn_ipv6_v6only = lconnp->conn_ipv6_v6only;
2839 econnp->conn_wq = lconnp->conn_wq;
2840 econnp->conn_rq = lconnp->conn_rq;
2843 * Make a safe copy of the transmit attributes.
2844 * conn_connect will later be used by the caller to setup the ire etc.
2846 ASSERT(econnp->conn_ixa->ixa_refcnt == 1);
2847 ASSERT(econnp->conn_ixa->ixa_ire == NULL);
2848 ASSERT(econnp->conn_ixa->ixa_dce == NULL);
2849 ASSERT(econnp->conn_ixa->ixa_nce == NULL);
2851 /* Preserve ixa_notify_cookie and xmit_hint */
2852 notify_cookie = econnp->conn_ixa->ixa_notify_cookie;
2853 xmit_hint = econnp->conn_ixa->ixa_xmit_hint;
2854 ixa_safe_copy(lconnp->conn_ixa, econnp->conn_ixa);
2855 econnp->conn_ixa->ixa_notify_cookie = notify_cookie;
2856 econnp->conn_ixa->ixa_xmit_hint = xmit_hint;
2858 econnp->conn_bound_if = lconnp->conn_bound_if;
2859 econnp->conn_incoming_ifindex = lconnp->conn_incoming_ifindex;
2861 /* Inherit all RECV options */
2862 econnp->conn_recv_ancillary = lconnp->conn_recv_ancillary;
2864 err = ip_pkt_copy(&lconnp->conn_xmit_ipp, &econnp->conn_xmit_ipp,
2865 KM_NOSLEEP);
2866 if (err != 0)
2867 return (err);
2869 econnp->conn_zoneid = lconnp->conn_zoneid;
2870 econnp->conn_allzones = lconnp->conn_allzones;
2872 /* This is odd. Pick a flowlabel for each connection instead? */
2873 econnp->conn_flowinfo = lconnp->conn_flowinfo;
2875 econnp->conn_default_ttl = lconnp->conn_default_ttl;
2878 * TSOL: tsol_input_proc() needs the eager's cred before the
2879 * eager is accepted
2881 ASSERT(lconnp->conn_cred != NULL);
2882 econnp->conn_cred = credp = lconnp->conn_cred;
2883 crhold(credp);
2884 econnp->conn_cpid = lconnp->conn_cpid;
2885 econnp->conn_open_time = ddi_get_lbolt64();
2888 * Cache things in the ixa without any refhold.
2889 * Listener might not have set up ixa_cred
2891 ASSERT(!(econnp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
2892 econnp->conn_ixa->ixa_cred = econnp->conn_cred;
2893 econnp->conn_ixa->ixa_cpid = econnp->conn_cpid;
2894 if (is_system_labeled())
2895 econnp->conn_ixa->ixa_tsl = crgetlabel(econnp->conn_cred);
2898 * If the caller has the process-wide flag set, then default to MAC
2899 * exempt mode. This allows read-down to unlabeled hosts.
2901 if (getpflags(NET_MAC_AWARE, credp) != 0)
2902 econnp->conn_mac_mode = CONN_MAC_AWARE;
2904 econnp->conn_zone_is_global = lconnp->conn_zone_is_global;
2907 * We eliminate the need for sockfs to send down a T_SVR4_OPTMGMT_REQ
2908 * via soaccept()->soinheritoptions() which essentially applies
2909 * all the listener options to the new connection. The options that we
2910 * need to take care of are:
2911 * SO_DEBUG, SO_REUSEADDR, SO_KEEPALIVE, SO_DONTROUTE, SO_BROADCAST,
2912 * SO_USELOOPBACK, SO_OOBINLINE, SO_DGRAM_ERRIND, SO_LINGER,
2913 * SO_SNDBUF, SO_RCVBUF.
2915 * SO_RCVBUF: conn_rcvbuf is set.
2916 * SO_SNDBUF: conn_sndbuf is set.
2919 /* Could we define a struct and use a struct copy for this? */
2920 econnp->conn_sndbuf = lconnp->conn_sndbuf;
2921 econnp->conn_rcvbuf = lconnp->conn_rcvbuf;
2922 econnp->conn_sndlowat = lconnp->conn_sndlowat;
2923 econnp->conn_rcvlowat = lconnp->conn_rcvlowat;
2924 econnp->conn_dgram_errind = lconnp->conn_dgram_errind;
2925 econnp->conn_oobinline = lconnp->conn_oobinline;
2926 econnp->conn_debug = lconnp->conn_debug;
2927 econnp->conn_keepalive = lconnp->conn_keepalive;
2928 econnp->conn_linger = lconnp->conn_linger;
2929 econnp->conn_lingertime = lconnp->conn_lingertime;
2931 /* Set the IP options */
2932 econnp->conn_broadcast = lconnp->conn_broadcast;
2933 econnp->conn_useloopback = lconnp->conn_useloopback;
2934 econnp->conn_reuseaddr = lconnp->conn_reuseaddr;
2935 return (0);