Merge commit '80d5689f5d4588adc071138e25e9d0d5252d9b55'
[unleashed.git] / kernel / net / ip / conn_opt.c
blobf6baadf502cac3933c83a52dd4b5371942cd3c4c
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
25 /* Copyright (c) 1990 Mentat Inc. */
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/strsun.h>
30 #define _SUN_TPI_VERSION 2
31 #include <sys/tihdr.h>
32 #include <sys/xti_inet.h>
33 #include <sys/ucred.h>
34 #include <sys/zone.h>
35 #include <sys/ddi.h>
36 #include <sys/sunddi.h>
37 #include <sys/cmn_err.h>
38 #include <sys/debug.h>
39 #include <sys/atomic.h>
40 #include <sys/policy.h>
42 #include <sys/systm.h>
43 #include <sys/param.h>
44 #include <sys/kmem.h>
45 #include <sys/sdt.h>
46 #include <sys/socket.h>
47 #include <sys/ethernet.h>
48 #include <sys/mac.h>
49 #include <net/if.h>
50 #include <net/if_types.h>
51 #include <net/if_arp.h>
52 #include <net/route.h>
53 #include <sys/sockio.h>
54 #include <netinet/in.h>
55 #include <net/if_dl.h>
57 #include <inet/common.h>
58 #include <inet/mi.h>
59 #include <inet/mib2.h>
60 #include <inet/nd.h>
61 #include <inet/arp.h>
62 #include <inet/snmpcom.h>
63 #include <inet/kstatcom.h>
65 #include <netinet/igmp_var.h>
66 #include <netinet/ip6.h>
67 #include <netinet/icmp6.h>
68 #include <netinet/sctp.h>
70 #include <inet/ip.h>
71 #include <inet/ip_impl.h>
72 #include <inet/ip6.h>
73 #include <inet/ip6_asp.h>
74 #include <inet/tcp.h>
75 #include <inet/ip_multi.h>
76 #include <inet/ip_if.h>
77 #include <inet/ip_ire.h>
78 #include <inet/ip_ftable.h>
79 #include <inet/ip_rts.h>
80 #include <inet/optcom.h>
81 #include <inet/ip_ndp.h>
82 #include <inet/ip_listutils.h>
83 #include <netinet/igmp.h>
84 #include <netinet/ip_mroute.h>
85 #include <netinet/udp.h>
86 #include <inet/ipp_common.h>
88 #include <net/pfkeyv2.h>
89 #include <inet/sadb.h>
90 #include <inet/ipsec_impl.h>
91 #include <inet/ipdrop.h>
92 #include <inet/ip_netinfo.h>
94 #include <inet/ipclassifier.h>
95 #include <inet/sctp_ip.h>
96 #include <inet/sctp/sctp_impl.h>
97 #include <inet/udp_impl.h>
98 #include <sys/sunddi.h>
101 * Return how much size is needed for the different ancillary data items
103 uint_t
104 conn_recvancillary_size(conn_t *connp, crb_t recv_ancillary,
105 ip_recv_attr_t *ira, mblk_t *mp, ip_pkt_t *ipp)
107 uint_t ancil_size;
108 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
111 * If IP_RECVDSTADDR is set we include the destination IP
112 * address as an option. With IP_RECVOPTS we include all
113 * the IP options.
115 ancil_size = 0;
116 if (recv_ancillary.crb_recvdstaddr &&
117 (ira->ira_flags & IRAF_IS_IPV4)) {
118 ancil_size += sizeof (struct T_opthdr) +
119 sizeof (struct in_addr);
120 IP_STAT(ipst, conn_in_recvdstaddr);
124 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
125 * are different
127 if (recv_ancillary.crb_ip_recvpktinfo &&
128 connp->conn_family == AF_INET) {
129 ancil_size += sizeof (struct T_opthdr) +
130 sizeof (struct in_pktinfo);
131 IP_STAT(ipst, conn_in_recvpktinfo);
134 if ((recv_ancillary.crb_recvopts) &&
135 (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
136 ancil_size += sizeof (struct T_opthdr) +
137 ipp->ipp_ipv4_options_len;
138 IP_STAT(ipst, conn_in_recvopts);
141 if (recv_ancillary.crb_recvslla) {
142 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
143 ill_t *ill;
145 /* Make sure ira_l2src is setup if not already */
146 if (!(ira->ira_flags & IRAF_L2SRC_SET)) {
147 ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE,
148 ipst);
149 if (ill != NULL) {
150 ip_setl2src(mp, ira, ill);
151 ill_refrele(ill);
154 ancil_size += sizeof (struct T_opthdr) +
155 sizeof (struct sockaddr_dl);
156 IP_STAT(ipst, conn_in_recvslla);
159 if (recv_ancillary.crb_recvif) {
160 ancil_size += sizeof (struct T_opthdr) + sizeof (uint_t);
161 IP_STAT(ipst, conn_in_recvif);
165 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
166 * are different
168 if (recv_ancillary.crb_ip_recvpktinfo &&
169 connp->conn_family == AF_INET6) {
170 ancil_size += sizeof (struct T_opthdr) +
171 sizeof (struct in6_pktinfo);
172 IP_STAT(ipst, conn_in_recvpktinfo);
175 if (recv_ancillary.crb_ipv6_recvhoplimit) {
176 ancil_size += sizeof (struct T_opthdr) + sizeof (int);
177 IP_STAT(ipst, conn_in_recvhoplimit);
180 if (recv_ancillary.crb_ipv6_recvtclass) {
181 ancil_size += sizeof (struct T_opthdr) + sizeof (int);
182 IP_STAT(ipst, conn_in_recvtclass);
185 if (recv_ancillary.crb_ipv6_recvhopopts &&
186 (ipp->ipp_fields & IPPF_HOPOPTS)) {
187 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
188 IP_STAT(ipst, conn_in_recvhopopts);
191 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
192 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
193 * options that appear before a routing header.
194 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
196 if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
197 if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
198 (recv_ancillary.crb_ipv6_recvdstopts &&
199 recv_ancillary.crb_ipv6_recvrthdr)) {
200 ancil_size += sizeof (struct T_opthdr) +
201 ipp->ipp_rthdrdstoptslen;
202 IP_STAT(ipst, conn_in_recvrthdrdstopts);
205 if ((recv_ancillary.crb_ipv6_recvrthdr) &&
206 (ipp->ipp_fields & IPPF_RTHDR)) {
207 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
208 IP_STAT(ipst, conn_in_recvrthdr);
210 if ((recv_ancillary.crb_ipv6_recvdstopts ||
211 recv_ancillary.crb_old_ipv6_recvdstopts) &&
212 (ipp->ipp_fields & IPPF_DSTOPTS)) {
213 ancil_size += sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
214 IP_STAT(ipst, conn_in_recvdstopts);
216 if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
217 ancil_size += sizeof (struct T_opthdr) +
218 ucredminsize(ira->ira_cred);
219 IP_STAT(ipst, conn_in_recvucred);
223 * If SO_TIMESTAMP is set allocate the appropriate sized
224 * buffer. Since gethrestime() expects a pointer aligned
225 * argument, we allocate space necessary for extra
226 * alignment (even though it might not be used).
228 if (recv_ancillary.crb_timestamp) {
229 ancil_size += sizeof (struct T_opthdr) +
230 sizeof (timestruc_t) + _POINTER_ALIGNMENT;
231 IP_STAT(ipst, conn_in_timestamp);
235 * If IP_RECVTTL is set allocate the appropriate sized buffer
237 if (recv_ancillary.crb_recvttl &&
238 (ira->ira_flags & IRAF_IS_IPV4)) {
239 ancil_size += sizeof (struct T_opthdr) + sizeof (uint8_t);
240 IP_STAT(ipst, conn_in_recvttl);
243 return (ancil_size);
247 * Lay down the ancillary data items at "ancil_buf".
248 * Assumes caller has used conn_recvancillary_size to allocate a sufficiently
249 * large buffer - ancil_size.
251 void
252 conn_recvancillary_add(conn_t *connp, crb_t recv_ancillary,
253 ip_recv_attr_t *ira, ip_pkt_t *ipp, uchar_t *ancil_buf, uint_t ancil_size)
256 * Copy in destination address before options to avoid
257 * any padding issues.
259 if (recv_ancillary.crb_recvdstaddr &&
260 (ira->ira_flags & IRAF_IS_IPV4)) {
261 struct T_opthdr *toh;
262 ipaddr_t *dstptr;
264 toh = (struct T_opthdr *)ancil_buf;
265 toh->level = IPPROTO_IP;
266 toh->name = IP_RECVDSTADDR;
267 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t);
268 toh->status = 0;
269 ancil_buf += sizeof (struct T_opthdr);
270 dstptr = (ipaddr_t *)ancil_buf;
271 *dstptr = ipp->ipp_addr_v4;
272 ancil_buf += sizeof (ipaddr_t);
273 ancil_size -= toh->len;
277 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
278 * are different
280 if (recv_ancillary.crb_ip_recvpktinfo &&
281 connp->conn_family == AF_INET) {
282 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
283 struct T_opthdr *toh;
284 struct in_pktinfo *pktinfop;
285 ill_t *ill;
286 ipif_t *ipif;
288 toh = (struct T_opthdr *)ancil_buf;
289 toh->level = IPPROTO_IP;
290 toh->name = IP_PKTINFO;
291 toh->len = sizeof (struct T_opthdr) + sizeof (*pktinfop);
292 toh->status = 0;
293 ancil_buf += sizeof (struct T_opthdr);
294 pktinfop = (struct in_pktinfo *)ancil_buf;
296 pktinfop->ipi_ifindex = ira->ira_ruifindex;
297 pktinfop->ipi_spec_dst.s_addr = INADDR_ANY;
299 /* Find a good address to report */
300 ill = ill_lookup_on_ifindex(ira->ira_ruifindex, B_FALSE, ipst);
301 if (ill != NULL) {
302 ipif = ipif_good_addr(ill, IPCL_ZONEID(connp));
303 if (ipif != NULL) {
304 pktinfop->ipi_spec_dst.s_addr =
305 ipif->ipif_lcl_addr;
306 ipif_refrele(ipif);
308 ill_refrele(ill);
310 pktinfop->ipi_addr.s_addr = ipp->ipp_addr_v4;
311 ancil_buf += sizeof (struct in_pktinfo);
312 ancil_size -= toh->len;
315 if ((recv_ancillary.crb_recvopts) &&
316 (ipp->ipp_fields & IPPF_IPV4_OPTIONS)) {
317 struct T_opthdr *toh;
319 toh = (struct T_opthdr *)ancil_buf;
320 toh->level = IPPROTO_IP;
321 toh->name = IP_RECVOPTS;
322 toh->len = sizeof (struct T_opthdr) + ipp->ipp_ipv4_options_len;
323 toh->status = 0;
324 ancil_buf += sizeof (struct T_opthdr);
325 bcopy(ipp->ipp_ipv4_options, ancil_buf,
326 ipp->ipp_ipv4_options_len);
327 ancil_buf += ipp->ipp_ipv4_options_len;
328 ancil_size -= toh->len;
331 if (recv_ancillary.crb_recvslla) {
332 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
333 struct T_opthdr *toh;
334 struct sockaddr_dl *dstptr;
335 ill_t *ill;
336 int alen = 0;
338 ill = ill_lookup_on_ifindex(ira->ira_rifindex, B_FALSE, ipst);
339 if (ill != NULL)
340 alen = ill->ill_phys_addr_length;
343 * For loopback multicast and broadcast the packet arrives
344 * with ira_ruifdex being the physical interface, but
345 * ira_l2src is all zero since ip_postfrag_loopback doesn't
346 * know our l2src. We don't report the address in that case.
348 if (ira->ira_flags & IRAF_LOOPBACK)
349 alen = 0;
351 toh = (struct T_opthdr *)ancil_buf;
352 toh->level = IPPROTO_IP;
353 toh->name = IP_RECVSLLA;
354 toh->len = sizeof (struct T_opthdr) +
355 sizeof (struct sockaddr_dl);
356 toh->status = 0;
357 ancil_buf += sizeof (struct T_opthdr);
358 dstptr = (struct sockaddr_dl *)ancil_buf;
359 dstptr->sdl_family = AF_LINK;
360 dstptr->sdl_index = ira->ira_ruifindex;
361 if (ill != NULL)
362 dstptr->sdl_type = ill->ill_type;
363 else
364 dstptr->sdl_type = 0;
365 dstptr->sdl_nlen = 0;
366 dstptr->sdl_alen = alen;
367 dstptr->sdl_slen = 0;
368 bcopy(ira->ira_l2src, dstptr->sdl_data, alen);
369 ancil_buf += sizeof (struct sockaddr_dl);
370 ancil_size -= toh->len;
371 if (ill != NULL)
372 ill_refrele(ill);
375 if (recv_ancillary.crb_recvif) {
376 struct T_opthdr *toh;
377 uint_t *dstptr;
379 toh = (struct T_opthdr *)ancil_buf;
380 toh->level = IPPROTO_IP;
381 toh->name = IP_RECVIF;
382 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
383 toh->status = 0;
384 ancil_buf += sizeof (struct T_opthdr);
385 dstptr = (uint_t *)ancil_buf;
386 *dstptr = ira->ira_ruifindex;
387 ancil_buf += sizeof (uint_t);
388 ancil_size -= toh->len;
392 * ip_recvpktinfo is used for both AF_INET and AF_INET6 but
393 * are different
395 if (recv_ancillary.crb_ip_recvpktinfo &&
396 connp->conn_family == AF_INET6) {
397 struct T_opthdr *toh;
398 struct in6_pktinfo *pkti;
400 toh = (struct T_opthdr *)ancil_buf;
401 toh->level = IPPROTO_IPV6;
402 toh->name = IPV6_PKTINFO;
403 toh->len = sizeof (struct T_opthdr) + sizeof (*pkti);
404 toh->status = 0;
405 ancil_buf += sizeof (struct T_opthdr);
406 pkti = (struct in6_pktinfo *)ancil_buf;
407 if (ira->ira_flags & IRAF_IS_IPV4) {
408 IN6_IPADDR_TO_V4MAPPED(ipp->ipp_addr_v4,
409 &pkti->ipi6_addr);
410 } else {
411 pkti->ipi6_addr = ipp->ipp_addr;
413 pkti->ipi6_ifindex = ira->ira_ruifindex;
415 ancil_buf += sizeof (*pkti);
416 ancil_size -= toh->len;
418 if (recv_ancillary.crb_ipv6_recvhoplimit) {
419 struct T_opthdr *toh;
421 toh = (struct T_opthdr *)ancil_buf;
422 toh->level = IPPROTO_IPV6;
423 toh->name = IPV6_HOPLIMIT;
424 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
425 toh->status = 0;
426 ancil_buf += sizeof (struct T_opthdr);
427 *(uint_t *)ancil_buf = ipp->ipp_hoplimit;
428 ancil_buf += sizeof (uint_t);
429 ancil_size -= toh->len;
431 if (recv_ancillary.crb_ipv6_recvtclass) {
432 struct T_opthdr *toh;
434 toh = (struct T_opthdr *)ancil_buf;
435 toh->level = IPPROTO_IPV6;
436 toh->name = IPV6_TCLASS;
437 toh->len = sizeof (struct T_opthdr) + sizeof (uint_t);
438 toh->status = 0;
439 ancil_buf += sizeof (struct T_opthdr);
441 if (ira->ira_flags & IRAF_IS_IPV4)
442 *(uint_t *)ancil_buf = ipp->ipp_type_of_service;
443 else
444 *(uint_t *)ancil_buf = ipp->ipp_tclass;
445 ancil_buf += sizeof (uint_t);
446 ancil_size -= toh->len;
448 if (recv_ancillary.crb_ipv6_recvhopopts &&
449 (ipp->ipp_fields & IPPF_HOPOPTS)) {
450 struct T_opthdr *toh;
452 toh = (struct T_opthdr *)ancil_buf;
453 toh->level = IPPROTO_IPV6;
454 toh->name = IPV6_HOPOPTS;
455 toh->len = sizeof (struct T_opthdr) + ipp->ipp_hopoptslen;
456 toh->status = 0;
457 ancil_buf += sizeof (struct T_opthdr);
458 bcopy(ipp->ipp_hopopts, ancil_buf, ipp->ipp_hopoptslen);
459 ancil_buf += ipp->ipp_hopoptslen;
460 ancil_size -= toh->len;
463 * To honor RFC3542 when an application asks for both IPV6_RECVDSTOPTS
464 * and IPV6_RECVRTHDR, we pass up the item rthdrdstopts (the destination
465 * options that appear before a routing header.
466 * We also pass them up if IPV6_RECVRTHDRDSTOPTS is set.
468 if (ipp->ipp_fields & IPPF_RTHDRDSTOPTS) {
469 if (recv_ancillary.crb_ipv6_recvrthdrdstopts ||
470 (recv_ancillary.crb_ipv6_recvdstopts &&
471 recv_ancillary.crb_ipv6_recvrthdr)) {
472 struct T_opthdr *toh;
474 toh = (struct T_opthdr *)ancil_buf;
475 toh->level = IPPROTO_IPV6;
476 toh->name = IPV6_DSTOPTS;
477 toh->len = sizeof (struct T_opthdr) +
478 ipp->ipp_rthdrdstoptslen;
479 toh->status = 0;
480 ancil_buf += sizeof (struct T_opthdr);
481 bcopy(ipp->ipp_rthdrdstopts, ancil_buf,
482 ipp->ipp_rthdrdstoptslen);
483 ancil_buf += ipp->ipp_rthdrdstoptslen;
484 ancil_size -= toh->len;
487 if (recv_ancillary.crb_ipv6_recvrthdr &&
488 (ipp->ipp_fields & IPPF_RTHDR)) {
489 struct T_opthdr *toh;
491 toh = (struct T_opthdr *)ancil_buf;
492 toh->level = IPPROTO_IPV6;
493 toh->name = IPV6_RTHDR;
494 toh->len = sizeof (struct T_opthdr) + ipp->ipp_rthdrlen;
495 toh->status = 0;
496 ancil_buf += sizeof (struct T_opthdr);
497 bcopy(ipp->ipp_rthdr, ancil_buf, ipp->ipp_rthdrlen);
498 ancil_buf += ipp->ipp_rthdrlen;
499 ancil_size -= toh->len;
501 if ((recv_ancillary.crb_ipv6_recvdstopts ||
502 recv_ancillary.crb_old_ipv6_recvdstopts) &&
503 (ipp->ipp_fields & IPPF_DSTOPTS)) {
504 struct T_opthdr *toh;
506 toh = (struct T_opthdr *)ancil_buf;
507 toh->level = IPPROTO_IPV6;
508 toh->name = IPV6_DSTOPTS;
509 toh->len = sizeof (struct T_opthdr) + ipp->ipp_dstoptslen;
510 toh->status = 0;
511 ancil_buf += sizeof (struct T_opthdr);
512 bcopy(ipp->ipp_dstopts, ancil_buf, ipp->ipp_dstoptslen);
513 ancil_buf += ipp->ipp_dstoptslen;
514 ancil_size -= toh->len;
517 if (recv_ancillary.crb_recvucred && ira->ira_cred != NULL) {
518 struct T_opthdr *toh;
519 cred_t *rcr = connp->conn_cred;
521 toh = (struct T_opthdr *)ancil_buf;
522 toh->level = SOL_SOCKET;
523 toh->name = SCM_UCRED;
524 toh->len = sizeof (struct T_opthdr) +
525 ucredminsize(ira->ira_cred);
526 toh->status = 0;
527 (void) cred2ucred(ira->ira_cred, ira->ira_cpid, &toh[1], rcr);
528 ancil_buf += toh->len;
529 ancil_size -= toh->len;
531 if (recv_ancillary.crb_timestamp) {
532 struct T_opthdr *toh;
534 toh = (struct T_opthdr *)ancil_buf;
535 toh->level = SOL_SOCKET;
536 toh->name = SCM_TIMESTAMP;
537 toh->len = sizeof (struct T_opthdr) +
538 sizeof (timestruc_t) + _POINTER_ALIGNMENT;
539 toh->status = 0;
540 ancil_buf += sizeof (struct T_opthdr);
541 /* Align for gethrestime() */
542 ancil_buf = (uchar_t *)P2ROUNDUP((intptr_t)ancil_buf,
543 sizeof (intptr_t));
544 gethrestime((timestruc_t *)ancil_buf);
545 ancil_buf = (uchar_t *)toh + toh->len;
546 ancil_size -= toh->len;
550 * CAUTION:
551 * Due to aligment issues
552 * Processing of IP_RECVTTL option
553 * should always be the last. Adding
554 * any option processing after this will
555 * cause alignment panic.
557 if (recv_ancillary.crb_recvttl &&
558 (ira->ira_flags & IRAF_IS_IPV4)) {
559 struct T_opthdr *toh;
560 uint8_t *dstptr;
562 toh = (struct T_opthdr *)ancil_buf;
563 toh->level = IPPROTO_IP;
564 toh->name = IP_RECVTTL;
565 toh->len = sizeof (struct T_opthdr) + sizeof (uint8_t);
566 toh->status = 0;
567 ancil_buf += sizeof (struct T_opthdr);
568 dstptr = (uint8_t *)ancil_buf;
569 *dstptr = ipp->ipp_hoplimit;
570 ancil_buf += sizeof (uint8_t);
571 ancil_size -= toh->len;
574 /* Consumed all of allocated space */
575 ASSERT(ancil_size == 0);
580 * This routine retrieves the current status of socket options.
581 * It returns the size of the option retrieved, or -1.
584 conn_opt_get(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
585 uchar_t *ptr)
587 int *i1 = (int *)ptr;
588 conn_t *connp = coa->coa_connp;
589 ip_xmit_attr_t *ixa = coa->coa_ixa;
590 ip_pkt_t *ipp = coa->coa_ipp;
591 ip_stack_t *ipst = ixa->ixa_ipst;
592 uint_t len;
594 ASSERT(MUTEX_HELD(&coa->coa_connp->conn_lock));
596 switch (level) {
597 case SOL_SOCKET:
598 switch (name) {
599 case SO_DEBUG:
600 *i1 = connp->conn_debug ? SO_DEBUG : 0;
601 break; /* goto sizeof (int) option return */
602 case SO_KEEPALIVE:
603 *i1 = connp->conn_keepalive ? SO_KEEPALIVE : 0;
604 break;
605 case SO_LINGER: {
606 struct linger *lgr = (struct linger *)ptr;
608 lgr->l_onoff = connp->conn_linger ? SO_LINGER : 0;
609 lgr->l_linger = connp->conn_lingertime;
611 return (sizeof (struct linger));
613 case SO_OOBINLINE:
614 *i1 = connp->conn_oobinline ? SO_OOBINLINE : 0;
615 break;
616 case SO_REUSEADDR:
617 *i1 = connp->conn_reuseaddr ? SO_REUSEADDR : 0;
618 break; /* goto sizeof (int) option return */
619 case SO_TYPE:
620 *i1 = connp->conn_so_type;
621 break; /* goto sizeof (int) option return */
622 case SO_DONTROUTE:
623 *i1 = (ixa->ixa_flags & IXAF_DONTROUTE) ?
624 SO_DONTROUTE : 0;
625 break; /* goto sizeof (int) option return */
626 case SO_USELOOPBACK:
627 *i1 = connp->conn_useloopback ? SO_USELOOPBACK : 0;
628 break; /* goto sizeof (int) option return */
629 case SO_BROADCAST:
630 *i1 = connp->conn_broadcast ? SO_BROADCAST : 0;
631 break; /* goto sizeof (int) option return */
633 case SO_SNDBUF:
634 *i1 = connp->conn_sndbuf;
635 break; /* goto sizeof (int) option return */
636 case SO_RCVBUF:
637 *i1 = connp->conn_rcvbuf;
638 break; /* goto sizeof (int) option return */
639 case SO_RCVTIMEO:
640 case SO_SNDTIMEO:
642 * Pass these two options in order for third part
643 * protocol usage. Here just return directly.
645 *i1 = 0;
646 break;
647 case SO_DGRAM_ERRIND:
648 *i1 = connp->conn_dgram_errind ? SO_DGRAM_ERRIND : 0;
649 break; /* goto sizeof (int) option return */
650 case SO_RECVUCRED:
651 *i1 = connp->conn_recv_ancillary.crb_recvucred;
652 break; /* goto sizeof (int) option return */
653 case SO_TIMESTAMP:
654 *i1 = connp->conn_recv_ancillary.crb_timestamp;
655 break; /* goto sizeof (int) option return */
656 case SO_VRRP:
657 *i1 = connp->conn_isvrrp;
658 break; /* goto sizeof (int) option return */
659 case SO_ALLZONES:
660 *i1 = connp->conn_allzones;
661 break; /* goto sizeof (int) option return */
662 case SO_EXCLBIND:
663 *i1 = connp->conn_exclbind ? SO_EXCLBIND : 0;
664 break;
665 case SO_PROTOTYPE:
666 *i1 = connp->conn_proto;
667 break;
669 case SO_DOMAIN:
670 *i1 = connp->conn_family;
671 break;
672 default:
673 return (-1);
675 break;
676 case IPPROTO_IP:
677 if (connp->conn_family != AF_INET)
678 return (-1);
679 switch (name) {
680 case IP_OPTIONS:
681 case T_IP_OPTIONS:
682 if (!(ipp->ipp_fields & IPPF_IPV4_OPTIONS))
683 return (0);
685 len = ipp->ipp_ipv4_options_len;
686 if (len > 0) {
687 bcopy(ipp->ipp_ipv4_options, ptr, len);
689 return (len);
691 case IP_PKTINFO: {
693 * This also handles IP_RECVPKTINFO.
694 * IP_PKTINFO and IP_RECVPKTINFO have same value.
695 * Differentiation is based on the size of the
696 * argument passed in.
698 struct in_pktinfo *pktinfo;
700 #ifdef notdef
701 /* optcom doesn't provide a length with "get" */
702 if (inlen == sizeof (int)) {
703 /* This is IP_RECVPKTINFO option. */
704 *i1 = connp->conn_recv_ancillary.
705 crb_ip_recvpktinfo;
706 return (sizeof (int));
708 #endif
709 /* XXX assumes that caller has room for max size! */
711 pktinfo = (struct in_pktinfo *)ptr;
712 pktinfo->ipi_ifindex = ixa->ixa_ifindex;
713 if (ipp->ipp_fields & IPPF_ADDR)
714 pktinfo->ipi_spec_dst.s_addr = ipp->ipp_addr_v4;
715 else
716 pktinfo->ipi_spec_dst.s_addr = INADDR_ANY;
717 return (sizeof (struct in_pktinfo));
719 case IP_DONTFRAG:
720 *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
721 return (sizeof (int));
722 case IP_TOS:
723 case T_IP_TOS:
724 *i1 = (int)ipp->ipp_type_of_service;
725 break; /* goto sizeof (int) option return */
726 case IP_TTL:
727 *i1 = (int)ipp->ipp_unicast_hops;
728 break; /* goto sizeof (int) option return */
729 case IP_DHCPINIT_IF:
730 return (-1);
731 case IP_NEXTHOP:
732 if (ixa->ixa_flags & IXAF_NEXTHOP_SET) {
733 *(ipaddr_t *)ptr = ixa->ixa_nexthop_v4;
734 return (sizeof (ipaddr_t));
735 } else {
736 return (0);
739 case IP_MULTICAST_IF:
740 /* 0 address if not set */
741 *(ipaddr_t *)ptr = ixa->ixa_multicast_ifaddr;
742 return (sizeof (ipaddr_t));
743 case IP_MULTICAST_TTL:
744 *(uchar_t *)ptr = ixa->ixa_multicast_ttl;
745 return (sizeof (uchar_t));
746 case IP_MULTICAST_LOOP:
747 *ptr = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
748 return (sizeof (uint8_t));
749 case IP_RECVOPTS:
750 *i1 = connp->conn_recv_ancillary.crb_recvopts;
751 break; /* goto sizeof (int) option return */
752 case IP_RECVDSTADDR:
753 *i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
754 break; /* goto sizeof (int) option return */
755 case IP_RECVIF:
756 *i1 = connp->conn_recv_ancillary.crb_recvif;
757 break; /* goto sizeof (int) option return */
758 case IP_RECVSLLA:
759 *i1 = connp->conn_recv_ancillary.crb_recvslla;
760 break; /* goto sizeof (int) option return */
761 case IP_RECVTTL:
762 *i1 = connp->conn_recv_ancillary.crb_recvttl;
763 break; /* goto sizeof (int) option return */
764 case IP_ADD_MEMBERSHIP:
765 case IP_DROP_MEMBERSHIP:
766 case MCAST_JOIN_GROUP:
767 case MCAST_LEAVE_GROUP:
768 case IP_BLOCK_SOURCE:
769 case IP_UNBLOCK_SOURCE:
770 case IP_ADD_SOURCE_MEMBERSHIP:
771 case IP_DROP_SOURCE_MEMBERSHIP:
772 case MCAST_BLOCK_SOURCE:
773 case MCAST_UNBLOCK_SOURCE:
774 case MCAST_JOIN_SOURCE_GROUP:
775 case MCAST_LEAVE_SOURCE_GROUP:
776 case MRT_INIT:
777 case MRT_DONE:
778 case MRT_ADD_VIF:
779 case MRT_DEL_VIF:
780 case MRT_ADD_MFC:
781 case MRT_DEL_MFC:
782 /* cannot "get" the value for these */
783 return (-1);
784 case MRT_VERSION:
785 case MRT_ASSERT:
786 (void) ip_mrouter_get(name, connp, ptr);
787 return (sizeof (int));
788 case IP_SEC_OPT:
789 return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr,
790 IPSEC_AF_V4));
791 case IP_BOUND_IF:
792 /* Zero if not set */
793 *i1 = connp->conn_bound_if;
794 break; /* goto sizeof (int) option return */
795 case IP_UNSPEC_SRC:
796 *i1 = connp->conn_unspec_src;
797 break; /* goto sizeof (int) option return */
798 case IP_BROADCAST_TTL:
799 if (ixa->ixa_flags & IXAF_BROADCAST_TTL_SET)
800 *(uchar_t *)ptr = ixa->ixa_broadcast_ttl;
801 else
802 *(uchar_t *)ptr = ipst->ips_ip_broadcast_ttl;
803 return (sizeof (uchar_t));
804 default:
805 return (-1);
807 break;
808 case IPPROTO_IPV6:
809 if (connp->conn_family != AF_INET6)
810 return (-1);
811 switch (name) {
812 case IPV6_UNICAST_HOPS:
813 *i1 = (int)ipp->ipp_unicast_hops;
814 break; /* goto sizeof (int) option return */
815 case IPV6_MULTICAST_IF:
816 /* 0 index if not set */
817 *i1 = ixa->ixa_multicast_ifindex;
818 break; /* goto sizeof (int) option return */
819 case IPV6_MULTICAST_HOPS:
820 *i1 = ixa->ixa_multicast_ttl;
821 break; /* goto sizeof (int) option return */
822 case IPV6_MULTICAST_LOOP:
823 *i1 = (ixa->ixa_flags & IXAF_MULTICAST_LOOP) ? 1 : 0;
824 break; /* goto sizeof (int) option return */
825 case IPV6_JOIN_GROUP:
826 case IPV6_LEAVE_GROUP:
827 case MCAST_JOIN_GROUP:
828 case MCAST_LEAVE_GROUP:
829 case MCAST_BLOCK_SOURCE:
830 case MCAST_UNBLOCK_SOURCE:
831 case MCAST_JOIN_SOURCE_GROUP:
832 case MCAST_LEAVE_SOURCE_GROUP:
833 /* cannot "get" the value for these */
834 return (-1);
835 case IPV6_BOUND_IF:
836 /* Zero if not set */
837 *i1 = connp->conn_bound_if;
838 break; /* goto sizeof (int) option return */
839 case IPV6_UNSPEC_SRC:
840 *i1 = connp->conn_unspec_src;
841 break; /* goto sizeof (int) option return */
842 case IPV6_RECVPKTINFO:
843 *i1 = connp->conn_recv_ancillary.crb_ip_recvpktinfo;
844 break; /* goto sizeof (int) option return */
845 case IPV6_RECVTCLASS:
846 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvtclass;
847 break; /* goto sizeof (int) option return */
848 case IPV6_RECVPATHMTU:
849 *i1 = connp->conn_ipv6_recvpathmtu;
850 break; /* goto sizeof (int) option return */
851 case IPV6_RECVHOPLIMIT:
852 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhoplimit;
853 break; /* goto sizeof (int) option return */
854 case IPV6_RECVHOPOPTS:
855 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvhopopts;
856 break; /* goto sizeof (int) option return */
857 case IPV6_RECVDSTOPTS:
858 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvdstopts;
859 break; /* goto sizeof (int) option return */
860 case _OLD_IPV6_RECVDSTOPTS:
861 *i1 =
862 connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts;
863 break; /* goto sizeof (int) option return */
864 case IPV6_RECVRTHDRDSTOPTS:
865 *i1 = connp->conn_recv_ancillary.
866 crb_ipv6_recvrthdrdstopts;
867 break; /* goto sizeof (int) option return */
868 case IPV6_RECVRTHDR:
869 *i1 = connp->conn_recv_ancillary.crb_ipv6_recvrthdr;
870 break; /* goto sizeof (int) option return */
871 case IPV6_PKTINFO: {
872 /* XXX assumes that caller has room for max size! */
873 struct in6_pktinfo *pkti;
875 pkti = (struct in6_pktinfo *)ptr;
876 pkti->ipi6_ifindex = ixa->ixa_ifindex;
877 if (ipp->ipp_fields & IPPF_ADDR)
878 pkti->ipi6_addr = ipp->ipp_addr;
879 else
880 pkti->ipi6_addr = ipv6_all_zeros;
881 return (sizeof (struct in6_pktinfo));
883 case IPV6_TCLASS:
884 *i1 = ipp->ipp_tclass;
885 break; /* goto sizeof (int) option return */
886 case IPV6_NEXTHOP: {
887 sin6_t *sin6 = (sin6_t *)ptr;
889 if (ixa->ixa_flags & IXAF_NEXTHOP_SET)
890 return (0);
892 *sin6 = sin6_null;
893 sin6->sin6_family = AF_INET6;
894 sin6->sin6_addr = ixa->ixa_nexthop_v6;
896 return (sizeof (sin6_t));
898 case IPV6_HOPOPTS:
899 if (!(ipp->ipp_fields & IPPF_HOPOPTS))
900 return (0);
901 bcopy(ipp->ipp_hopopts, ptr,
902 ipp->ipp_hopoptslen);
903 return (ipp->ipp_hopoptslen);
904 case IPV6_RTHDRDSTOPTS:
905 if (!(ipp->ipp_fields & IPPF_RTHDRDSTOPTS))
906 return (0);
907 bcopy(ipp->ipp_rthdrdstopts, ptr,
908 ipp->ipp_rthdrdstoptslen);
909 return (ipp->ipp_rthdrdstoptslen);
910 case IPV6_RTHDR:
911 if (!(ipp->ipp_fields & IPPF_RTHDR))
912 return (0);
913 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen);
914 return (ipp->ipp_rthdrlen);
915 case IPV6_DSTOPTS:
916 if (!(ipp->ipp_fields & IPPF_DSTOPTS))
917 return (0);
918 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen);
919 return (ipp->ipp_dstoptslen);
920 case IPV6_PATHMTU:
921 return (ip_fill_mtuinfo(connp, ixa,
922 (struct ip6_mtuinfo *)ptr));
923 case IPV6_SEC_OPT:
924 return (ipsec_req_from_conn(connp, (ipsec_req_t *)ptr,
925 IPSEC_AF_V6));
926 case IPV6_SRC_PREFERENCES:
927 return (ip6_get_src_preferences(ixa, (uint32_t *)ptr));
928 case IPV6_DONTFRAG:
929 *i1 = (ixa->ixa_flags & IXAF_DONTFRAG) != 0;
930 return (sizeof (int));
931 case IPV6_USE_MIN_MTU:
932 if (ixa->ixa_flags & IXAF_USE_MIN_MTU)
933 *i1 = ixa->ixa_use_min_mtu;
934 else
935 *i1 = IPV6_USE_MIN_MTU_MULTICAST;
936 break;
937 case IPV6_V6ONLY:
938 *i1 = connp->conn_ipv6_v6only;
939 return (sizeof (int));
940 default:
941 return (-1);
943 break;
944 case IPPROTO_UDP:
945 switch (name) {
946 case UDP_ANONPRIVBIND:
947 *i1 = connp->conn_anon_priv_bind;
948 break;
949 case UDP_EXCLBIND:
950 *i1 = connp->conn_exclbind ? UDP_EXCLBIND : 0;
951 break;
952 default:
953 return (-1);
955 break;
956 case IPPROTO_TCP:
957 switch (name) {
958 case TCP_RECVDSTADDR:
959 *i1 = connp->conn_recv_ancillary.crb_recvdstaddr;
960 break;
961 case TCP_ANONPRIVBIND:
962 *i1 = connp->conn_anon_priv_bind;
963 break;
964 case TCP_EXCLBIND:
965 *i1 = connp->conn_exclbind ? TCP_EXCLBIND : 0;
966 break;
967 default:
968 return (-1);
970 break;
971 default:
972 return (-1);
974 return (sizeof (int));
977 static int conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name,
978 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
979 static int conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name,
980 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
981 static int conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name,
982 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
983 static int conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name,
984 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
985 static int conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name,
986 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr);
989 * This routine sets the most common socket options including some
990 * that are transport/ULP specific.
991 * It returns errno or zero.
993 * For fixed length options, there is no sanity check
994 * of passed in length is done. It is assumed *_optcom_req()
995 * routines do the right thing.
998 conn_opt_set(conn_opt_arg_t *coa, t_scalar_t level, t_scalar_t name,
999 uint_t inlen, uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1001 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1003 /* We have different functions for different levels */
1004 switch (level) {
1005 case SOL_SOCKET:
1006 return (conn_opt_set_socket(coa, name, inlen, invalp,
1007 checkonly, cr));
1008 case IPPROTO_IP:
1009 return (conn_opt_set_ip(coa, name, inlen, invalp,
1010 checkonly, cr));
1011 case IPPROTO_IPV6:
1012 return (conn_opt_set_ipv6(coa, name, inlen, invalp,
1013 checkonly, cr));
1014 case IPPROTO_UDP:
1015 return (conn_opt_set_udp(coa, name, inlen, invalp,
1016 checkonly, cr));
1017 case IPPROTO_TCP:
1018 return (conn_opt_set_tcp(coa, name, inlen, invalp,
1019 checkonly, cr));
1020 default:
1021 return (0);
1026 * Handle SOL_SOCKET
1027 * Note that we do not handle SO_PROTOTYPE here. The ULPs that support
1028 * it implement their own checks and setting of conn_proto.
1030 /* ARGSUSED1 */
1031 static int
1032 conn_opt_set_socket(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1033 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1035 conn_t *connp = coa->coa_connp;
1036 ip_xmit_attr_t *ixa = coa->coa_ixa;
1037 int *i1 = (int *)invalp;
1038 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1040 switch (name) {
1041 case SO_ALLZONES:
1042 if (IPCL_IS_BOUND(connp))
1043 return (EINVAL);
1044 break;
1045 case SO_VRRP:
1046 if (secpolicy_ip_config(cr, checkonly) != 0)
1047 return (EACCES);
1048 break;
1050 if (checkonly)
1051 return (0);
1053 mutex_enter(&connp->conn_lock);
1054 /* Here we set the actual option value */
1055 switch (name) {
1056 case SO_DEBUG:
1057 connp->conn_debug = onoff;
1058 break;
1059 case SO_KEEPALIVE:
1060 connp->conn_keepalive = onoff;
1061 break;
1062 case SO_LINGER: {
1063 struct linger *lgr = (struct linger *)invalp;
1065 if (lgr->l_onoff) {
1066 connp->conn_linger = 1;
1067 connp->conn_lingertime = lgr->l_linger;
1068 } else {
1069 connp->conn_linger = 0;
1070 connp->conn_lingertime = 0;
1072 break;
1074 case SO_OOBINLINE:
1075 connp->conn_oobinline = onoff;
1076 coa->coa_changed |= COA_OOBINLINE_CHANGED;
1077 break;
1078 case SO_REUSEADDR:
1079 connp->conn_reuseaddr = onoff;
1080 break;
1081 case SO_DONTROUTE:
1082 if (onoff)
1083 ixa->ixa_flags |= IXAF_DONTROUTE;
1084 else
1085 ixa->ixa_flags &= ~IXAF_DONTROUTE;
1086 coa->coa_changed |= COA_ROUTE_CHANGED;
1087 break;
1088 case SO_USELOOPBACK:
1089 connp->conn_useloopback = onoff;
1090 break;
1091 case SO_BROADCAST:
1092 connp->conn_broadcast = onoff;
1093 break;
1094 case SO_SNDBUF:
1095 /* ULP has range checked the value */
1096 connp->conn_sndbuf = *i1;
1097 coa->coa_changed |= COA_SNDBUF_CHANGED;
1098 break;
1099 case SO_RCVBUF:
1100 /* ULP has range checked the value */
1101 connp->conn_rcvbuf = *i1;
1102 coa->coa_changed |= COA_RCVBUF_CHANGED;
1103 break;
1104 case SO_RCVTIMEO:
1105 case SO_SNDTIMEO:
1107 * Pass these two options in order for third part
1108 * protocol usage.
1110 break;
1111 case SO_DGRAM_ERRIND:
1112 connp->conn_dgram_errind = onoff;
1113 break;
1114 case SO_RECVUCRED:
1115 connp->conn_recv_ancillary.crb_recvucred = onoff;
1116 break;
1117 case SO_ALLZONES:
1118 connp->conn_allzones = onoff;
1119 coa->coa_changed |= COA_ROUTE_CHANGED;
1120 if (onoff)
1121 ixa->ixa_zoneid = ALL_ZONES;
1122 else
1123 ixa->ixa_zoneid = connp->conn_zoneid;
1124 break;
1125 case SO_TIMESTAMP:
1126 connp->conn_recv_ancillary.crb_timestamp = onoff;
1127 break;
1128 case SO_VRRP:
1129 connp->conn_isvrrp = onoff;
1130 break;
1131 case SO_EXCLBIND:
1132 connp->conn_exclbind = onoff;
1133 break;
1135 mutex_exit(&connp->conn_lock);
1136 return (0);
1139 /* Handle IPPROTO_IP */
1140 static int
1141 conn_opt_set_ip(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1142 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1144 conn_t *connp = coa->coa_connp;
1145 ip_xmit_attr_t *ixa = coa->coa_ixa;
1146 ip_pkt_t *ipp = coa->coa_ipp;
1147 int *i1 = (int *)invalp;
1148 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1149 ipaddr_t addr = (ipaddr_t)*i1;
1150 uint_t ifindex;
1151 zoneid_t zoneid = IPCL_ZONEID(connp);
1152 ipif_t *ipif;
1153 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1154 int error;
1156 if (connp->conn_family != AF_INET)
1157 return (EINVAL);
1159 switch (name) {
1160 case IP_TTL:
1161 /* Don't allow zero */
1162 if (*i1 < 1 || *i1 > 255)
1163 return (EINVAL);
1164 break;
1165 case IP_MULTICAST_IF:
1166 if (addr == INADDR_ANY) {
1167 /* Clear */
1168 ifindex = 0;
1169 break;
1171 ipif = ipif_lookup_addr(addr, NULL, zoneid, ipst);
1172 if (ipif == NULL)
1173 return (EHOSTUNREACH);
1174 /* not supported by the virtual network iface */
1175 if (IS_VNI(ipif->ipif_ill)) {
1176 ipif_refrele(ipif);
1177 return (EINVAL);
1179 ifindex = ipif->ipif_ill->ill_phyint->phyint_ifindex;
1180 ipif_refrele(ipif);
1181 break;
1182 case IP_NEXTHOP: {
1183 ire_t *ire;
1185 if (addr == INADDR_ANY) {
1186 /* Clear */
1187 break;
1189 /* Verify that the next-hop is on-link */
1190 ire = ire_ftable_lookup_v4(addr, 0, 0, IRE_ONLINK, NULL, zoneid,
1191 MATCH_IRE_TYPE, 0, ipst, NULL);
1192 if (ire == NULL)
1193 return (EHOSTUNREACH);
1194 ire_refrele(ire);
1195 break;
1197 case IP_OPTIONS:
1198 case T_IP_OPTIONS:
1199 if ((inlen & 0x3) || inlen > IP_MAX_OPT_LENGTH)
1200 return (EINVAL);
1201 break;
1202 case IP_PKTINFO: {
1203 struct in_pktinfo *pktinfo;
1205 /* Two different valid lengths */
1206 if (inlen != sizeof (int) &&
1207 inlen != sizeof (struct in_pktinfo))
1208 return (EINVAL);
1209 if (inlen == sizeof (int))
1210 break;
1212 pktinfo = (struct in_pktinfo *)invalp;
1213 if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1214 switch (ip_laddr_verify_v4(pktinfo->ipi_spec_dst.s_addr,
1215 zoneid, ipst, B_FALSE)) {
1216 case IPVL_UNICAST_UP:
1217 case IPVL_UNICAST_DOWN:
1218 break;
1219 default:
1220 return (EADDRNOTAVAIL);
1223 if (!ip_xmit_ifindex_valid(pktinfo->ipi_ifindex, zoneid,
1224 B_FALSE, ipst))
1225 return (ENXIO);
1226 break;
1228 case IP_BOUND_IF:
1229 ifindex = *(uint_t *)i1;
1231 /* Just check it is ok. */
1232 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1233 return (ENXIO);
1234 break;
1236 if (checkonly)
1237 return (0);
1239 /* Here we set the actual option value */
1241 * conn_lock protects the bitfields, and is used to
1242 * set the fields atomically. Not needed for ixa settings since
1243 * the caller has an exclusive copy of the ixa.
1244 * We can not hold conn_lock across the multicast options though.
1246 switch (name) {
1247 case IP_OPTIONS:
1248 case T_IP_OPTIONS:
1249 /* Save options for use by IP. */
1250 mutex_enter(&connp->conn_lock);
1251 error = optcom_pkt_set(invalp, inlen,
1252 (uchar_t **)&ipp->ipp_ipv4_options,
1253 &ipp->ipp_ipv4_options_len);
1254 if (error != 0) {
1255 mutex_exit(&connp->conn_lock);
1256 return (error);
1258 if (ipp->ipp_ipv4_options_len == 0) {
1259 ipp->ipp_fields &= ~IPPF_IPV4_OPTIONS;
1260 } else {
1261 ipp->ipp_fields |= IPPF_IPV4_OPTIONS;
1263 mutex_exit(&connp->conn_lock);
1264 coa->coa_changed |= COA_HEADER_CHANGED;
1265 coa->coa_changed |= COA_WROFF_CHANGED;
1266 break;
1268 case IP_TTL:
1269 mutex_enter(&connp->conn_lock);
1270 ipp->ipp_unicast_hops = *i1;
1271 mutex_exit(&connp->conn_lock);
1272 coa->coa_changed |= COA_HEADER_CHANGED;
1273 break;
1274 case IP_TOS:
1275 case T_IP_TOS:
1276 mutex_enter(&connp->conn_lock);
1277 if (*i1 == -1) {
1278 ipp->ipp_type_of_service = 0;
1279 } else {
1280 ipp->ipp_type_of_service = *i1;
1282 mutex_exit(&connp->conn_lock);
1283 coa->coa_changed |= COA_HEADER_CHANGED;
1284 break;
1285 case IP_MULTICAST_IF:
1286 ixa->ixa_multicast_ifindex = ifindex;
1287 ixa->ixa_multicast_ifaddr = addr;
1288 coa->coa_changed |= COA_ROUTE_CHANGED;
1289 break;
1290 case IP_MULTICAST_TTL:
1291 ixa->ixa_multicast_ttl = *invalp;
1292 /* Handled automatically by ip_output */
1293 break;
1294 case IP_MULTICAST_LOOP:
1295 if (*invalp != 0)
1296 ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1297 else
1298 ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1299 /* Handled automatically by ip_output */
1300 break;
1301 case IP_RECVOPTS:
1302 mutex_enter(&connp->conn_lock);
1303 connp->conn_recv_ancillary.crb_recvopts = onoff;
1304 mutex_exit(&connp->conn_lock);
1305 break;
1306 case IP_RECVDSTADDR:
1307 mutex_enter(&connp->conn_lock);
1308 connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
1309 mutex_exit(&connp->conn_lock);
1310 break;
1311 case IP_RECVIF:
1312 mutex_enter(&connp->conn_lock);
1313 connp->conn_recv_ancillary.crb_recvif = onoff;
1314 mutex_exit(&connp->conn_lock);
1315 break;
1316 case IP_RECVSLLA:
1317 mutex_enter(&connp->conn_lock);
1318 connp->conn_recv_ancillary.crb_recvslla = onoff;
1319 mutex_exit(&connp->conn_lock);
1320 break;
1321 case IP_RECVTTL:
1322 mutex_enter(&connp->conn_lock);
1323 connp->conn_recv_ancillary.crb_recvttl = onoff;
1324 mutex_exit(&connp->conn_lock);
1325 break;
1326 case IP_PKTINFO: {
1328 * This also handles IP_RECVPKTINFO.
1329 * IP_PKTINFO and IP_RECVPKTINFO have same value.
1330 * Differentiation is based on the size of the
1331 * argument passed in.
1333 struct in_pktinfo *pktinfo;
1335 if (inlen == sizeof (int)) {
1336 /* This is IP_RECVPKTINFO option. */
1337 mutex_enter(&connp->conn_lock);
1338 connp->conn_recv_ancillary.crb_ip_recvpktinfo =
1339 onoff;
1340 mutex_exit(&connp->conn_lock);
1341 break;
1344 /* This is IP_PKTINFO option. */
1345 mutex_enter(&connp->conn_lock);
1346 pktinfo = (struct in_pktinfo *)invalp;
1347 if (pktinfo->ipi_spec_dst.s_addr != INADDR_ANY) {
1348 ipp->ipp_fields |= IPPF_ADDR;
1349 IN6_INADDR_TO_V4MAPPED(&pktinfo->ipi_spec_dst,
1350 &ipp->ipp_addr);
1351 } else {
1352 ipp->ipp_fields &= ~IPPF_ADDR;
1353 ipp->ipp_addr = ipv6_all_zeros;
1355 mutex_exit(&connp->conn_lock);
1356 ixa->ixa_ifindex = pktinfo->ipi_ifindex;
1357 coa->coa_changed |= COA_ROUTE_CHANGED;
1358 coa->coa_changed |= COA_HEADER_CHANGED;
1359 break;
1361 case IP_DONTFRAG:
1362 if (onoff) {
1363 ixa->ixa_flags |= (IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1364 ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1365 } else {
1366 ixa->ixa_flags &= ~(IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF);
1367 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1369 /* Need to redo ip_attr_connect */
1370 coa->coa_changed |= COA_ROUTE_CHANGED;
1371 break;
1372 case IP_ADD_MEMBERSHIP:
1373 case IP_DROP_MEMBERSHIP:
1374 case MCAST_JOIN_GROUP:
1375 case MCAST_LEAVE_GROUP:
1376 return (ip_opt_set_multicast_group(connp, name,
1377 invalp, B_FALSE, checkonly));
1379 case IP_BLOCK_SOURCE:
1380 case IP_UNBLOCK_SOURCE:
1381 case IP_ADD_SOURCE_MEMBERSHIP:
1382 case IP_DROP_SOURCE_MEMBERSHIP:
1383 case MCAST_BLOCK_SOURCE:
1384 case MCAST_UNBLOCK_SOURCE:
1385 case MCAST_JOIN_SOURCE_GROUP:
1386 case MCAST_LEAVE_SOURCE_GROUP:
1387 return (ip_opt_set_multicast_sources(connp, name,
1388 invalp, B_FALSE, checkonly));
1390 case IP_SEC_OPT:
1391 mutex_enter(&connp->conn_lock);
1392 error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1393 mutex_exit(&connp->conn_lock);
1394 if (error != 0) {
1395 return (error);
1397 /* This is an IPsec policy change - redo ip_attr_connect */
1398 coa->coa_changed |= COA_ROUTE_CHANGED;
1399 break;
1400 case IP_NEXTHOP:
1401 ixa->ixa_nexthop_v4 = addr;
1402 if (addr != INADDR_ANY)
1403 ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1404 else
1405 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1406 coa->coa_changed |= COA_ROUTE_CHANGED;
1407 break;
1409 case IP_BOUND_IF:
1410 ixa->ixa_ifindex = ifindex; /* Send */
1411 mutex_enter(&connp->conn_lock);
1412 connp->conn_incoming_ifindex = ifindex; /* Receive */
1413 connp->conn_bound_if = ifindex; /* getsockopt */
1414 mutex_exit(&connp->conn_lock);
1415 coa->coa_changed |= COA_ROUTE_CHANGED;
1416 break;
1417 case IP_UNSPEC_SRC:
1418 mutex_enter(&connp->conn_lock);
1419 connp->conn_unspec_src = onoff;
1420 if (onoff)
1421 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1422 else
1423 ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1425 mutex_exit(&connp->conn_lock);
1426 break;
1427 case IP_BROADCAST_TTL:
1428 ixa->ixa_broadcast_ttl = *invalp;
1429 ixa->ixa_flags |= IXAF_BROADCAST_TTL_SET;
1430 /* Handled automatically by ip_output */
1431 break;
1432 case MRT_INIT:
1433 case MRT_DONE:
1434 case MRT_ADD_VIF:
1435 case MRT_DEL_VIF:
1436 case MRT_ADD_MFC:
1437 case MRT_DEL_MFC:
1438 case MRT_ASSERT:
1439 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
1440 return (error);
1442 error = ip_mrouter_set((int)name, connp, checkonly,
1443 (uchar_t *)invalp, inlen);
1444 if (error) {
1445 return (error);
1447 return (0);
1450 return (0);
1453 /* Handle IPPROTO_IPV6 */
1454 static int
1455 conn_opt_set_ipv6(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1456 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1458 conn_t *connp = coa->coa_connp;
1459 ip_xmit_attr_t *ixa = coa->coa_ixa;
1460 ip_pkt_t *ipp = coa->coa_ipp;
1461 int *i1 = (int *)invalp;
1462 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1463 uint_t ifindex;
1464 zoneid_t zoneid = IPCL_ZONEID(connp);
1465 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1466 int error;
1468 if (connp->conn_family != AF_INET6)
1469 return (EINVAL);
1471 switch (name) {
1472 case IPV6_MULTICAST_IF:
1474 * The only possible error is EINVAL.
1475 * We call this option on both V4 and V6
1476 * If both fail, then this call returns
1477 * EINVAL. If at least one of them succeeds we
1478 * return success.
1480 ifindex = *(uint_t *)i1;
1482 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst) &&
1483 !ip_xmit_ifindex_valid(ifindex, zoneid, B_FALSE, ipst))
1484 return (EINVAL);
1485 break;
1486 case IPV6_UNICAST_HOPS:
1487 /* Don't allow zero. -1 means to use default */
1488 if (*i1 < -1 || *i1 == 0 || *i1 > IPV6_MAX_HOPS)
1489 return (EINVAL);
1490 break;
1491 case IPV6_MULTICAST_HOPS:
1492 /* -1 means use default */
1493 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS)
1494 return (EINVAL);
1495 break;
1496 case IPV6_MULTICAST_LOOP:
1497 if (*i1 != 0 && *i1 != 1)
1498 return (EINVAL);
1499 break;
1500 case IPV6_BOUND_IF:
1501 ifindex = *(uint_t *)i1;
1503 if (!ip_xmit_ifindex_valid(ifindex, zoneid, B_TRUE, ipst))
1504 return (ENXIO);
1505 break;
1506 case IPV6_PKTINFO: {
1507 struct in6_pktinfo *pkti;
1508 boolean_t isv6;
1510 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo))
1511 return (EINVAL);
1512 if (inlen == 0)
1513 break; /* Clear values below */
1516 * Verify the source address and ifindex. Privileged users
1517 * can use any source address.
1519 pkti = (struct in6_pktinfo *)invalp;
1522 * For link-local addresses we use the ipi6_ifindex when
1523 * we verify the local address.
1524 * If net_rawaccess then any source address can be used.
1526 if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr) &&
1527 secpolicy_net_rawaccess(cr) != 0) {
1528 uint_t scopeid = 0;
1529 in6_addr_t *v6src = &pkti->ipi6_addr;
1530 ipaddr_t v4src;
1531 ip_laddr_t laddr_type = IPVL_UNICAST_UP;
1533 if (IN6_IS_ADDR_V4MAPPED(v6src)) {
1534 IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
1535 if (v4src != INADDR_ANY) {
1536 laddr_type = ip_laddr_verify_v4(v4src,
1537 zoneid, ipst, B_FALSE);
1539 } else {
1540 if (IN6_IS_ADDR_LINKSCOPE(v6src))
1541 scopeid = pkti->ipi6_ifindex;
1543 laddr_type = ip_laddr_verify_v6(v6src, zoneid,
1544 ipst, B_FALSE, scopeid);
1546 switch (laddr_type) {
1547 case IPVL_UNICAST_UP:
1548 case IPVL_UNICAST_DOWN:
1549 break;
1550 default:
1551 return (EADDRNOTAVAIL);
1553 ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1554 } else if (!IN6_IS_ADDR_UNSPECIFIED(&pkti->ipi6_addr)) {
1555 /* Allow any source */
1556 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1558 isv6 = !(IN6_IS_ADDR_V4MAPPED(&pkti->ipi6_addr));
1559 if (!ip_xmit_ifindex_valid(pkti->ipi6_ifindex, zoneid, isv6,
1560 ipst))
1561 return (ENXIO);
1562 break;
1564 case IPV6_HOPLIMIT:
1565 /* It is only allowed as ancilary data */
1566 if (!coa->coa_ancillary)
1567 return (EINVAL);
1569 if (inlen != 0 && inlen != sizeof (int))
1570 return (EINVAL);
1571 if (inlen == sizeof (int)) {
1572 if (*i1 > 255 || *i1 < -1 || *i1 == 0)
1573 return (EINVAL);
1575 break;
1576 case IPV6_TCLASS:
1577 if (inlen != 0 && inlen != sizeof (int))
1578 return (EINVAL);
1579 if (inlen == sizeof (int)) {
1580 if (*i1 > 255 || *i1 < -1)
1581 return (EINVAL);
1583 break;
1584 case IPV6_NEXTHOP:
1585 if (inlen != 0 && inlen != sizeof (sin6_t))
1586 return (EINVAL);
1587 if (inlen == sizeof (sin6_t)) {
1588 sin6_t *sin6 = (sin6_t *)invalp;
1589 ire_t *ire;
1591 if (sin6->sin6_family != AF_INET6)
1592 return (EAFNOSUPPORT);
1593 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr))
1594 return (EADDRNOTAVAIL);
1596 /* Verify that the next-hop is on-link */
1597 ire = ire_ftable_lookup_v6(&sin6->sin6_addr,
1598 0, 0, IRE_ONLINK, NULL, zoneid,
1599 MATCH_IRE_TYPE, 0, ipst, NULL);
1600 if (ire == NULL)
1601 return (EHOSTUNREACH);
1602 ire_refrele(ire);
1603 break;
1605 break;
1606 case IPV6_RTHDR:
1607 case IPV6_DSTOPTS:
1608 case IPV6_RTHDRDSTOPTS:
1609 case IPV6_HOPOPTS: {
1610 /* All have the length field in the same place */
1611 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp;
1613 * Sanity checks - minimum size, size a multiple of
1614 * eight bytes, and matching size passed in.
1616 if (inlen != 0 &&
1617 inlen != (8 * (hopts->ip6h_len + 1)))
1618 return (EINVAL);
1619 break;
1621 case IPV6_PATHMTU:
1622 /* Can't be set */
1623 return (EINVAL);
1625 case IPV6_USE_MIN_MTU:
1626 if (inlen != sizeof (int))
1627 return (EINVAL);
1628 if (*i1 < -1 || *i1 > 1)
1629 return (EINVAL);
1630 break;
1631 case IPV6_SRC_PREFERENCES:
1632 if (inlen != sizeof (uint32_t))
1633 return (EINVAL);
1634 break;
1635 case IPV6_V6ONLY:
1636 if (*i1 < 0 || *i1 > 1) {
1637 return (EINVAL);
1639 break;
1641 if (checkonly)
1642 return (0);
1644 /* Here we set the actual option value */
1646 * conn_lock protects the bitfields, and is used to
1647 * set the fields atomically. Not needed for ixa settings since
1648 * the caller has an exclusive copy of the ixa.
1649 * We can not hold conn_lock across the multicast options though.
1651 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
1652 switch (name) {
1653 case IPV6_MULTICAST_IF:
1654 ixa->ixa_multicast_ifindex = ifindex;
1655 /* Need to redo ip_attr_connect */
1656 coa->coa_changed |= COA_ROUTE_CHANGED;
1657 break;
1658 case IPV6_UNICAST_HOPS:
1659 /* -1 means use default */
1660 mutex_enter(&connp->conn_lock);
1661 if (*i1 == -1) {
1662 ipp->ipp_unicast_hops = connp->conn_default_ttl;
1663 } else {
1664 ipp->ipp_unicast_hops = (uint8_t)*i1;
1666 mutex_exit(&connp->conn_lock);
1667 coa->coa_changed |= COA_HEADER_CHANGED;
1668 break;
1669 case IPV6_MULTICAST_HOPS:
1670 /* -1 means use default */
1671 if (*i1 == -1) {
1672 ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1673 } else {
1674 ixa->ixa_multicast_ttl = (uint8_t)*i1;
1676 /* Handled automatically by ip_output */
1677 break;
1678 case IPV6_MULTICAST_LOOP:
1679 if (*i1 != 0)
1680 ixa->ixa_flags |= IXAF_MULTICAST_LOOP;
1681 else
1682 ixa->ixa_flags &= ~IXAF_MULTICAST_LOOP;
1683 /* Handled automatically by ip_output */
1684 break;
1685 case IPV6_JOIN_GROUP:
1686 case IPV6_LEAVE_GROUP:
1687 case MCAST_JOIN_GROUP:
1688 case MCAST_LEAVE_GROUP:
1689 return (ip_opt_set_multicast_group(connp, name,
1690 invalp, B_TRUE, checkonly));
1692 case MCAST_BLOCK_SOURCE:
1693 case MCAST_UNBLOCK_SOURCE:
1694 case MCAST_JOIN_SOURCE_GROUP:
1695 case MCAST_LEAVE_SOURCE_GROUP:
1696 return (ip_opt_set_multicast_sources(connp, name,
1697 invalp, B_TRUE, checkonly));
1699 case IPV6_BOUND_IF:
1700 ixa->ixa_ifindex = ifindex; /* Send */
1701 mutex_enter(&connp->conn_lock);
1702 connp->conn_incoming_ifindex = ifindex; /* Receive */
1703 connp->conn_bound_if = ifindex; /* getsockopt */
1704 mutex_exit(&connp->conn_lock);
1705 coa->coa_changed |= COA_ROUTE_CHANGED;
1706 break;
1707 case IPV6_UNSPEC_SRC:
1708 mutex_enter(&connp->conn_lock);
1709 connp->conn_unspec_src = onoff;
1710 if (onoff)
1711 ixa->ixa_flags &= ~IXAF_VERIFY_SOURCE;
1712 else
1713 ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1714 mutex_exit(&connp->conn_lock);
1715 break;
1716 case IPV6_RECVPKTINFO:
1717 mutex_enter(&connp->conn_lock);
1718 connp->conn_recv_ancillary.crb_ip_recvpktinfo = onoff;
1719 mutex_exit(&connp->conn_lock);
1720 break;
1721 case IPV6_RECVTCLASS:
1722 mutex_enter(&connp->conn_lock);
1723 connp->conn_recv_ancillary.crb_ipv6_recvtclass = onoff;
1724 mutex_exit(&connp->conn_lock);
1725 break;
1726 case IPV6_RECVPATHMTU:
1727 mutex_enter(&connp->conn_lock);
1728 connp->conn_ipv6_recvpathmtu = onoff;
1729 mutex_exit(&connp->conn_lock);
1730 break;
1731 case IPV6_RECVHOPLIMIT:
1732 mutex_enter(&connp->conn_lock);
1733 connp->conn_recv_ancillary.crb_ipv6_recvhoplimit =
1734 onoff;
1735 mutex_exit(&connp->conn_lock);
1736 break;
1737 case IPV6_RECVHOPOPTS:
1738 mutex_enter(&connp->conn_lock);
1739 connp->conn_recv_ancillary.crb_ipv6_recvhopopts = onoff;
1740 mutex_exit(&connp->conn_lock);
1741 break;
1742 case IPV6_RECVDSTOPTS:
1743 mutex_enter(&connp->conn_lock);
1744 connp->conn_recv_ancillary.crb_ipv6_recvdstopts = onoff;
1745 mutex_exit(&connp->conn_lock);
1746 break;
1747 case _OLD_IPV6_RECVDSTOPTS:
1748 mutex_enter(&connp->conn_lock);
1749 connp->conn_recv_ancillary.crb_old_ipv6_recvdstopts =
1750 onoff;
1751 mutex_exit(&connp->conn_lock);
1752 break;
1753 case IPV6_RECVRTHDRDSTOPTS:
1754 mutex_enter(&connp->conn_lock);
1755 connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts =
1756 onoff;
1757 mutex_exit(&connp->conn_lock);
1758 break;
1759 case IPV6_RECVRTHDR:
1760 mutex_enter(&connp->conn_lock);
1761 connp->conn_recv_ancillary.crb_ipv6_recvrthdr = onoff;
1762 mutex_exit(&connp->conn_lock);
1763 break;
1764 case IPV6_PKTINFO:
1765 mutex_enter(&connp->conn_lock);
1766 if (inlen == 0) {
1767 ipp->ipp_fields &= ~IPPF_ADDR;
1768 ipp->ipp_addr = ipv6_all_zeros;
1769 ixa->ixa_ifindex = 0;
1770 } else {
1771 struct in6_pktinfo *pkti;
1773 pkti = (struct in6_pktinfo *)invalp;
1774 ipp->ipp_addr = pkti->ipi6_addr;
1775 if (!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr))
1776 ipp->ipp_fields |= IPPF_ADDR;
1777 else
1778 ipp->ipp_fields &= ~IPPF_ADDR;
1779 ixa->ixa_ifindex = pkti->ipi6_ifindex;
1781 mutex_exit(&connp->conn_lock);
1782 /* Source and ifindex might have changed */
1783 coa->coa_changed |= COA_HEADER_CHANGED;
1784 coa->coa_changed |= COA_ROUTE_CHANGED;
1785 break;
1786 case IPV6_HOPLIMIT:
1787 mutex_enter(&connp->conn_lock);
1788 if (inlen == 0 || *i1 == -1) {
1789 /* Revert to default */
1790 ipp->ipp_fields &= ~IPPF_HOPLIMIT;
1791 ixa->ixa_flags &= ~IXAF_NO_TTL_CHANGE;
1792 } else {
1793 ipp->ipp_hoplimit = *i1;
1794 ipp->ipp_fields |= IPPF_HOPLIMIT;
1795 /* Ensure that it sticks for multicast packets */
1796 ixa->ixa_flags |= IXAF_NO_TTL_CHANGE;
1798 mutex_exit(&connp->conn_lock);
1799 coa->coa_changed |= COA_HEADER_CHANGED;
1800 break;
1801 case IPV6_TCLASS:
1803 * IPV6_TCLASS accepts -1 as use kernel default
1804 * and [0, 255] as the actualy traffic class.
1806 mutex_enter(&connp->conn_lock);
1807 if (inlen == 0 || *i1 == -1) {
1808 ipp->ipp_tclass = 0;
1809 ipp->ipp_fields &= ~IPPF_TCLASS;
1810 } else {
1811 ipp->ipp_tclass = *i1;
1812 ipp->ipp_fields |= IPPF_TCLASS;
1814 mutex_exit(&connp->conn_lock);
1815 coa->coa_changed |= COA_HEADER_CHANGED;
1816 break;
1817 case IPV6_NEXTHOP:
1818 if (inlen == 0) {
1819 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1820 } else {
1821 sin6_t *sin6 = (sin6_t *)invalp;
1823 ixa->ixa_nexthop_v6 = sin6->sin6_addr;
1824 if (!IN6_IS_ADDR_UNSPECIFIED(&ixa->ixa_nexthop_v6))
1825 ixa->ixa_flags |= IXAF_NEXTHOP_SET;
1826 else
1827 ixa->ixa_flags &= ~IXAF_NEXTHOP_SET;
1829 coa->coa_changed |= COA_ROUTE_CHANGED;
1830 break;
1831 case IPV6_HOPOPTS:
1832 mutex_enter(&connp->conn_lock);
1833 error = optcom_pkt_set(invalp, inlen,
1834 (uchar_t **)&ipp->ipp_hopopts, &ipp->ipp_hopoptslen);
1835 if (error != 0) {
1836 mutex_exit(&connp->conn_lock);
1837 return (error);
1839 if (ipp->ipp_hopoptslen == 0) {
1840 ipp->ipp_fields &= ~IPPF_HOPOPTS;
1841 } else {
1842 ipp->ipp_fields |= IPPF_HOPOPTS;
1844 mutex_exit(&connp->conn_lock);
1845 coa->coa_changed |= COA_HEADER_CHANGED;
1846 coa->coa_changed |= COA_WROFF_CHANGED;
1847 break;
1848 case IPV6_RTHDRDSTOPTS:
1849 mutex_enter(&connp->conn_lock);
1850 error = optcom_pkt_set(invalp, inlen,
1851 (uchar_t **)&ipp->ipp_rthdrdstopts,
1852 &ipp->ipp_rthdrdstoptslen);
1853 if (error != 0) {
1854 mutex_exit(&connp->conn_lock);
1855 return (error);
1857 if (ipp->ipp_rthdrdstoptslen == 0) {
1858 ipp->ipp_fields &= ~IPPF_RTHDRDSTOPTS;
1859 } else {
1860 ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
1862 mutex_exit(&connp->conn_lock);
1863 coa->coa_changed |= COA_HEADER_CHANGED;
1864 coa->coa_changed |= COA_WROFF_CHANGED;
1865 break;
1866 case IPV6_DSTOPTS:
1867 mutex_enter(&connp->conn_lock);
1868 error = optcom_pkt_set(invalp, inlen,
1869 (uchar_t **)&ipp->ipp_dstopts, &ipp->ipp_dstoptslen);
1870 if (error != 0) {
1871 mutex_exit(&connp->conn_lock);
1872 return (error);
1874 if (ipp->ipp_dstoptslen == 0) {
1875 ipp->ipp_fields &= ~IPPF_DSTOPTS;
1876 } else {
1877 ipp->ipp_fields |= IPPF_DSTOPTS;
1879 mutex_exit(&connp->conn_lock);
1880 coa->coa_changed |= COA_HEADER_CHANGED;
1881 coa->coa_changed |= COA_WROFF_CHANGED;
1882 break;
1883 case IPV6_RTHDR:
1884 mutex_enter(&connp->conn_lock);
1885 error = optcom_pkt_set(invalp, inlen,
1886 (uchar_t **)&ipp->ipp_rthdr, &ipp->ipp_rthdrlen);
1887 if (error != 0) {
1888 mutex_exit(&connp->conn_lock);
1889 return (error);
1891 if (ipp->ipp_rthdrlen == 0) {
1892 ipp->ipp_fields &= ~IPPF_RTHDR;
1893 } else {
1894 ipp->ipp_fields |= IPPF_RTHDR;
1896 mutex_exit(&connp->conn_lock);
1897 coa->coa_changed |= COA_HEADER_CHANGED;
1898 coa->coa_changed |= COA_WROFF_CHANGED;
1899 break;
1901 case IPV6_DONTFRAG:
1902 if (onoff) {
1903 ixa->ixa_flags |= IXAF_DONTFRAG;
1904 ixa->ixa_flags &= ~IXAF_PMTU_DISCOVERY;
1905 } else {
1906 ixa->ixa_flags &= ~IXAF_DONTFRAG;
1907 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
1909 /* Need to redo ip_attr_connect */
1910 coa->coa_changed |= COA_ROUTE_CHANGED;
1911 break;
1913 case IPV6_USE_MIN_MTU:
1914 ixa->ixa_flags |= IXAF_USE_MIN_MTU;
1915 ixa->ixa_use_min_mtu = *i1;
1916 /* Need to redo ip_attr_connect */
1917 coa->coa_changed |= COA_ROUTE_CHANGED;
1918 break;
1920 case IPV6_SEC_OPT:
1921 mutex_enter(&connp->conn_lock);
1922 error = ipsec_set_req(cr, connp, (ipsec_req_t *)invalp);
1923 mutex_exit(&connp->conn_lock);
1924 if (error != 0) {
1925 return (error);
1927 /* This is an IPsec policy change - redo ip_attr_connect */
1928 coa->coa_changed |= COA_ROUTE_CHANGED;
1929 break;
1930 case IPV6_SRC_PREFERENCES:
1932 * This socket option only affects connected
1933 * sockets that haven't already bound to a specific
1934 * IPv6 address. In other words, sockets that
1935 * don't call bind() with an address other than the
1936 * unspecified address and that call connect().
1937 * ip_set_destination_v6() passes these preferences
1938 * to the ipif_select_source_v6() function.
1940 mutex_enter(&connp->conn_lock);
1941 error = ip6_set_src_preferences(ixa, *(uint32_t *)invalp);
1942 mutex_exit(&connp->conn_lock);
1943 if (error != 0) {
1944 return (error);
1946 break;
1947 case IPV6_V6ONLY:
1948 mutex_enter(&connp->conn_lock);
1949 connp->conn_ipv6_v6only = onoff;
1950 mutex_exit(&connp->conn_lock);
1951 break;
1953 return (0);
1956 /* Handle IPPROTO_UDP */
1957 /* ARGSUSED1 */
1958 static int
1959 conn_opt_set_udp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1960 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1962 conn_t *connp = coa->coa_connp;
1963 int *i1 = (int *)invalp;
1964 boolean_t onoff = (*i1 == 0) ? 0 : 1;
1965 int error;
1967 switch (name) {
1968 case UDP_ANONPRIVBIND:
1969 if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_UDP)) != 0) {
1970 return (error);
1972 break;
1974 if (checkonly)
1975 return (0);
1977 /* Here we set the actual option value */
1978 mutex_enter(&connp->conn_lock);
1979 switch (name) {
1980 case UDP_ANONPRIVBIND:
1981 connp->conn_anon_priv_bind = onoff;
1982 break;
1983 case UDP_EXCLBIND:
1984 connp->conn_exclbind = onoff;
1985 break;
1987 mutex_exit(&connp->conn_lock);
1988 return (0);
1991 /* Handle IPPROTO_TCP */
1992 /* ARGSUSED1 */
1993 static int
1994 conn_opt_set_tcp(conn_opt_arg_t *coa, t_scalar_t name, uint_t inlen,
1995 uchar_t *invalp, boolean_t checkonly, cred_t *cr)
1997 conn_t *connp = coa->coa_connp;
1998 int *i1 = (int *)invalp;
1999 boolean_t onoff = (*i1 == 0) ? 0 : 1;
2000 int error;
2002 switch (name) {
2003 case TCP_ANONPRIVBIND:
2004 if ((error = secpolicy_net_privaddr(cr, 0, IPPROTO_TCP)) != 0) {
2005 return (error);
2007 break;
2009 if (checkonly)
2010 return (0);
2012 /* Here we set the actual option value */
2013 mutex_enter(&connp->conn_lock);
2014 switch (name) {
2015 case TCP_ANONPRIVBIND:
2016 connp->conn_anon_priv_bind = onoff;
2017 break;
2018 case TCP_EXCLBIND:
2019 connp->conn_exclbind = onoff;
2020 break;
2021 case TCP_RECVDSTADDR:
2022 connp->conn_recv_ancillary.crb_recvdstaddr = onoff;
2023 break;
2025 mutex_exit(&connp->conn_lock);
2026 return (0);
2030 conn_getsockname(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2032 sin_t *sin;
2033 sin6_t *sin6;
2035 if (connp->conn_family == AF_INET) {
2036 if (*salenp < sizeof (sin_t))
2037 return (EINVAL);
2039 *salenp = sizeof (sin_t);
2040 /* Fill zeroes and then initialize non-zero fields */
2041 sin = (sin_t *)sa;
2042 *sin = sin_null;
2043 sin->sin_family = AF_INET;
2044 if (!IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_saddr_v6) &&
2045 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2046 sin->sin_addr.s_addr = connp->conn_saddr_v4;
2047 } else {
2049 * INADDR_ANY
2050 * conn_saddr is not set, we might be bound to
2051 * broadcast/multicast. Use conn_bound_addr as
2052 * local address instead (that could
2053 * also still be INADDR_ANY)
2055 sin->sin_addr.s_addr = connp->conn_bound_addr_v4;
2057 sin->sin_port = connp->conn_lport;
2058 } else {
2059 if (*salenp < sizeof (sin6_t))
2060 return (EINVAL);
2062 *salenp = sizeof (sin6_t);
2063 /* Fill zeroes and then initialize non-zero fields */
2064 sin6 = (sin6_t *)sa;
2065 *sin6 = sin6_null;
2066 sin6->sin6_family = AF_INET6;
2067 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
2068 sin6->sin6_addr = connp->conn_saddr_v6;
2069 } else {
2071 * conn_saddr is not set, we might be bound to
2072 * broadcast/multicast. Use conn_bound_addr as
2073 * local address instead (which could
2074 * also still be unspecified)
2076 sin6->sin6_addr = connp->conn_bound_addr_v6;
2078 sin6->sin6_port = connp->conn_lport;
2079 if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2080 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2081 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2083 return (0);
2087 conn_getpeername(conn_t *connp, struct sockaddr *sa, uint_t *salenp)
2089 struct sockaddr_in *sin;
2090 struct sockaddr_in6 *sin6;
2092 if (connp->conn_family == AF_INET) {
2093 if (*salenp < sizeof (sin_t))
2094 return (EINVAL);
2096 *salenp = sizeof (sin_t);
2097 /* initialize */
2098 sin = (sin_t *)sa;
2099 *sin = sin_null;
2100 sin->sin_family = AF_INET;
2101 sin->sin_addr.s_addr = connp->conn_faddr_v4;
2102 sin->sin_port = connp->conn_fport;
2103 } else {
2104 if (*salenp < sizeof (sin6_t))
2105 return (EINVAL);
2107 *salenp = sizeof (sin6_t);
2108 /* initialize */
2109 sin6 = (sin6_t *)sa;
2110 *sin6 = sin6_null;
2111 sin6->sin6_family = AF_INET6;
2112 sin6->sin6_addr = connp->conn_faddr_v6;
2113 sin6->sin6_port = connp->conn_fport;
2114 sin6->sin6_flowinfo = connp->conn_flowinfo;
2115 if (IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr) &&
2116 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET))
2117 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
2119 return (0);
2122 static uint32_t cksum_massage_options_v4(ipha_t *, netstack_t *);
2123 static uint32_t cksum_massage_options_v6(ip6_t *, uint_t, netstack_t *);
2126 * Allocate and fill in conn_ht_iphc based on the current information
2127 * in the conn.
2128 * Normally used when we bind() and connect().
2129 * Returns failure if can't allocate memory, or if there is a problem
2130 * with a routing header/option.
2132 * We allocate space for the transport header (ulp_hdr_len + extra) and
2133 * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2134 * The extra is there for transports that want some spare room for future
2135 * options. conn_ht_iphc_allocated is what was allocated; conn_ht_iphc_len
2136 * excludes the extra part.
2138 * We massage an routing option/header and store the ckecksum difference
2139 * in conn_sum.
2141 * Caller needs to update conn_wroff if desired.
2144 conn_build_hdr_template(conn_t *connp, uint_t ulp_hdr_length, uint_t extra,
2145 const in6_addr_t *v6src, const in6_addr_t *v6dst, uint32_t flowinfo)
2147 ip_xmit_attr_t *ixa = connp->conn_ixa;
2148 ip_pkt_t *ipp = &connp->conn_xmit_ipp;
2149 uint_t ip_hdr_length;
2150 uchar_t *hdrs;
2151 uint_t hdrs_len;
2153 ASSERT(MUTEX_HELD(&connp->conn_lock));
2155 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2156 ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2157 /* In case of TX label and IP options it can be too much */
2158 if (ip_hdr_length > IP_MAX_HDR_LENGTH) {
2159 /* Preserves existing TX errno for this */
2160 return (EHOSTUNREACH);
2162 } else {
2163 ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2165 ixa->ixa_ip_hdr_length = ip_hdr_length;
2166 hdrs_len = ip_hdr_length + ulp_hdr_length + extra;
2167 ASSERT(hdrs_len != 0);
2169 if (hdrs_len != connp->conn_ht_iphc_allocated) {
2170 /* Allocate new before we free any old */
2171 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP);
2172 if (hdrs == NULL)
2173 return (ENOMEM);
2175 if (connp->conn_ht_iphc != NULL) {
2176 kmem_free(connp->conn_ht_iphc,
2177 connp->conn_ht_iphc_allocated);
2179 connp->conn_ht_iphc = hdrs;
2180 connp->conn_ht_iphc_allocated = hdrs_len;
2181 } else {
2182 hdrs = connp->conn_ht_iphc;
2184 hdrs_len -= extra;
2185 connp->conn_ht_iphc_len = hdrs_len;
2187 connp->conn_ht_ulp = hdrs + ip_hdr_length;
2188 connp->conn_ht_ulp_len = ulp_hdr_length;
2190 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2191 ipha_t *ipha = (ipha_t *)hdrs;
2193 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2194 IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2195 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, connp->conn_proto);
2196 ipha->ipha_length = htons(hdrs_len);
2197 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2198 ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2199 else
2200 ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2202 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2203 connp->conn_sum = cksum_massage_options_v4(ipha,
2204 connp->conn_netstack);
2205 } else {
2206 connp->conn_sum = 0;
2208 } else {
2209 ip6_t *ip6h = (ip6_t *)hdrs;
2211 ip6h->ip6_src = *v6src;
2212 ip6h->ip6_dst = *v6dst;
2213 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, connp->conn_proto,
2214 flowinfo);
2215 ip6h->ip6_plen = htons(hdrs_len - IPV6_HDR_LEN);
2217 if (ipp->ipp_fields & IPPF_RTHDR) {
2218 connp->conn_sum = cksum_massage_options_v6(ip6h,
2219 ip_hdr_length, connp->conn_netstack);
2222 * Verify that the first hop isn't a mapped address.
2223 * Routers along the path need to do this verification
2224 * for subsequent hops.
2226 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst))
2227 return (EADDRNOTAVAIL);
2229 } else {
2230 connp->conn_sum = 0;
2233 return (0);
2237 * Prepend a header template to data_mp based on the ip_pkt_t
2238 * and the passed in source, destination and protocol.
2240 * Returns failure if can't allocate memory, in which case data_mp is freed.
2241 * We allocate space for the transport header (ulp_hdr_len) and
2242 * indicate the offset of the ulp header by setting ixa_ip_hdr_length.
2244 * We massage an routing option/header and return the ckecksum difference
2245 * in *sump. This is in host byte order.
2247 * Caller needs to update conn_wroff if desired.
2249 mblk_t *
2250 conn_prepend_hdr(ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
2251 const in6_addr_t *v6src, const in6_addr_t *v6dst,
2252 uint8_t protocol, uint32_t flowinfo, uint_t ulp_hdr_length, mblk_t *data_mp,
2253 uint_t data_length, uint_t wroff_extra, uint32_t *sump, int *errorp)
2255 uint_t ip_hdr_length;
2256 uchar_t *hdrs;
2257 uint_t hdrs_len;
2258 mblk_t *mp;
2260 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2261 ip_hdr_length = ip_total_hdrs_len_v4(ipp);
2262 ASSERT(ip_hdr_length <= IP_MAX_HDR_LENGTH);
2263 } else {
2264 ip_hdr_length = ip_total_hdrs_len_v6(ipp);
2266 hdrs_len = ip_hdr_length + ulp_hdr_length;
2267 ASSERT(hdrs_len != 0);
2269 ixa->ixa_ip_hdr_length = ip_hdr_length;
2271 /* Can we prepend to data_mp? */
2272 if (data_mp != NULL &&
2273 data_mp->b_rptr - data_mp->b_datap->db_base >= hdrs_len &&
2274 data_mp->b_datap->db_ref == 1) {
2275 hdrs = data_mp->b_rptr - hdrs_len;
2276 data_mp->b_rptr = hdrs;
2277 mp = data_mp;
2278 } else {
2279 mp = allocb(hdrs_len + wroff_extra, BPRI_MED);
2280 if (mp == NULL) {
2281 freemsg(data_mp);
2282 *errorp = ENOMEM;
2283 return (NULL);
2285 mp->b_wptr = mp->b_datap->db_lim;
2286 hdrs = mp->b_rptr = mp->b_wptr - hdrs_len;
2287 mp->b_cont = data_mp;
2291 * Set the source in the header. ip_build_hdrs_v4/v6 will overwrite it
2292 * if PKTINFO (aka IPPF_ADDR) was set.
2294 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2295 ipha_t *ipha = (ipha_t *)hdrs;
2297 ASSERT(IN6_IS_ADDR_V4MAPPED(v6dst));
2298 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
2299 IN6_V4MAPPED_TO_IPADDR(v6dst, ipha->ipha_dst);
2300 ip_build_hdrs_v4(hdrs, ip_hdr_length, ipp, protocol);
2301 ipha->ipha_length = htons(hdrs_len + data_length);
2302 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2303 ipha->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2304 else
2305 ipha->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2307 if (ipp->ipp_fields & IPPF_IPV4_OPTIONS) {
2308 *sump = cksum_massage_options_v4(ipha,
2309 ixa->ixa_ipst->ips_netstack);
2310 } else {
2311 *sump = 0;
2313 } else {
2314 ip6_t *ip6h = (ip6_t *)hdrs;
2316 ip6h->ip6_src = *v6src;
2317 ip6h->ip6_dst = *v6dst;
2318 ip_build_hdrs_v6(hdrs, ip_hdr_length, ipp, protocol, flowinfo);
2319 ip6h->ip6_plen = htons(hdrs_len + data_length - IPV6_HDR_LEN);
2321 if (ipp->ipp_fields & IPPF_RTHDR) {
2322 *sump = cksum_massage_options_v6(ip6h,
2323 ip_hdr_length, ixa->ixa_ipst->ips_netstack);
2326 * Verify that the first hop isn't a mapped address.
2327 * Routers along the path need to do this verification
2328 * for subsequent hops.
2330 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) {
2331 *errorp = EADDRNOTAVAIL;
2332 freemsg(mp);
2333 return (NULL);
2335 } else {
2336 *sump = 0;
2339 return (mp);
2343 * Massage a source route if any putting the first hop
2344 * in ipha_dst. Compute a starting value for the checksum which
2345 * takes into account that the original ipha_dst should be
2346 * included in the checksum but that IP will include the
2347 * first hop from the source route in the tcp checksum.
2349 static uint32_t
2350 cksum_massage_options_v4(ipha_t *ipha, netstack_t *ns)
2352 in_addr_t dst;
2353 uint32_t cksum;
2355 /* Get last hop then diff against first hop */
2356 cksum = ip_massage_options(ipha, ns);
2357 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2358 dst = ipha->ipha_dst;
2359 cksum -= ((dst >> 16) + (dst & 0xffff));
2360 if ((int)cksum < 0)
2361 cksum--;
2362 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2363 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2364 ASSERT(cksum < 0x10000);
2365 return (ntohs(cksum));
2368 static uint32_t
2369 cksum_massage_options_v6(ip6_t *ip6h, uint_t ip_hdr_len, netstack_t *ns)
2371 uint8_t *end;
2372 ip6_rthdr_t *rth;
2373 uint32_t cksum;
2375 end = (uint8_t *)ip6h + ip_hdr_len;
2376 rth = ip_find_rthdr_v6(ip6h, end);
2377 if (rth == NULL)
2378 return (0);
2380 cksum = ip_massage_options_v6(ip6h, rth, ns);
2381 cksum = (cksum & 0xFFFF) + (cksum >> 16);
2382 ASSERT(cksum < 0x10000);
2383 return (ntohs(cksum));
2387 * ULPs that change the destination address need to call this for each
2388 * change to discard any state about a previous destination that might
2389 * have been multicast.
2391 void
2392 ip_attr_newdst(ip_xmit_attr_t *ixa)
2394 ixa->ixa_flags &= ~(IXAF_LOOPBACK_COPY | IXAF_NO_HW_CKSUM |
2395 IXAF_NO_TTL_CHANGE | IXAF_IPV6_ADD_FRAGHDR |
2396 IXAF_NO_LOOP_ZONEID_SET);
2400 * Determine the nexthop which will be used.
2401 * Normally this is just the destination, but if a IPv4 source route, or
2402 * IPv6 routing header, is in the ip_pkt_t then we extract the nexthop from
2403 * there.
2405 void
2406 ip_attr_nexthop(const ip_pkt_t *ipp, const ip_xmit_attr_t *ixa,
2407 const in6_addr_t *dst, in6_addr_t *nexthop)
2409 if (!(ipp->ipp_fields & (IPPF_IPV4_OPTIONS|IPPF_RTHDR))) {
2410 *nexthop = *dst;
2411 return;
2413 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2414 ipaddr_t v4dst;
2415 ipaddr_t v4nexthop;
2417 IN6_V4MAPPED_TO_IPADDR(dst, v4dst);
2418 v4nexthop = ip_pkt_source_route_v4(ipp);
2419 if (v4nexthop == INADDR_ANY)
2420 v4nexthop = v4dst;
2422 IN6_IPADDR_TO_V4MAPPED(v4nexthop, nexthop);
2423 } else {
2424 const in6_addr_t *v6nexthop;
2426 v6nexthop = ip_pkt_source_route_v6(ipp);
2427 if (v6nexthop == NULL)
2428 v6nexthop = dst;
2430 *nexthop = *v6nexthop;
2435 * Update the ip_xmit_attr_t based the addresses, conn_xmit_ipp and conn_ixa.
2436 * If IPDF_IPSEC is set we cache the IPsec policy to handle the unconnected
2437 * case (connected latching is done in conn_connect).
2438 * Note that IPsec policy lookup requires conn_proto and conn_laddr to be
2439 * set, but doesn't otherwise use the conn_t.
2441 * Caller must set/clear IXAF_IS_IPV4 as appropriately.
2442 * Caller must use ip_attr_nexthop() to determine the nexthop argument.
2444 * The caller must NOT hold conn_lock (to avoid problems with ill_refrele
2445 * causing the squeue to run doing ipcl_walk grabbing conn_lock.)
2447 * Updates laddrp and uinfo if they are non-NULL.
2450 ip_attr_connect(const conn_t *connp, ip_xmit_attr_t *ixa,
2451 const in6_addr_t *v6src, const in6_addr_t *v6dst,
2452 const in6_addr_t *v6nexthop, in_port_t dstport, in6_addr_t *laddrp,
2453 iulp_t *uinfo, uint32_t flags)
2455 in6_addr_t laddr = *v6src;
2456 int error;
2458 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
2460 if (connp->conn_zone_is_global)
2461 flags |= IPDF_ZONE_IS_GLOBAL;
2462 else
2463 flags &= ~IPDF_ZONE_IS_GLOBAL;
2466 * Lookup the route to determine a source address and the uinfo.
2467 * If the ULP has a source route option then the caller will
2468 * have set v6nexthop to be the first hop.
2470 if (ixa->ixa_flags & IXAF_IS_IPV4) {
2471 ipaddr_t v4dst;
2472 ipaddr_t v4src, v4nexthop;
2474 IN6_V4MAPPED_TO_IPADDR(v6dst, v4dst);
2475 IN6_V4MAPPED_TO_IPADDR(v6nexthop, v4nexthop);
2476 IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
2478 if (connp->conn_unspec_src || v4src != INADDR_ANY)
2479 flags &= ~IPDF_SELECT_SRC;
2480 else
2481 flags |= IPDF_SELECT_SRC;
2483 error = ip_set_destination_v4(&v4src, v4dst, v4nexthop, ixa,
2484 uinfo, flags);
2485 IN6_IPADDR_TO_V4MAPPED(v4src, &laddr);
2486 } else {
2487 if (connp->conn_unspec_src || !IN6_IS_ADDR_UNSPECIFIED(v6src))
2488 flags &= ~IPDF_SELECT_SRC;
2489 else
2490 flags |= IPDF_SELECT_SRC;
2492 error = ip_set_destination_v6(&laddr, v6dst, v6nexthop, ixa,
2493 uinfo, flags);
2495 /* Pass out some address even if we hit a RTF_REJECT etc */
2496 if (laddrp != NULL)
2497 *laddrp = laddr;
2499 if (error != 0)
2500 return (error);
2502 if (flags & IPDF_IPSEC) {
2504 * Set any IPsec policy in ixa. Routine also looks at ULP
2505 * ports.
2507 ipsec_cache_outbound_policy(connp, v6src, v6dst, dstport, ixa);
2509 return (0);
2513 * Connect the conn based on the addresses, conn_xmit_ipp and conn_ixa.
2514 * Assumes that conn_faddr and conn_fport are already set. As such it is not
2515 * usable for SCTP, since SCTP has multiple faddrs.
2517 * Caller must hold conn_lock to provide atomic constency between the
2518 * conn_t's addresses and the ixa.
2519 * NOTE: this function drops and reaquires conn_lock since it can't be
2520 * held across ip_attr_connect/ip_set_destination.
2522 * The caller needs to handle inserting in the receive-side fanout when
2523 * appropriate after conn_connect returns.
2526 conn_connect(conn_t *connp, iulp_t *uinfo, uint32_t flags)
2528 ip_xmit_attr_t *ixa = connp->conn_ixa;
2529 in6_addr_t nexthop;
2530 in6_addr_t saddr, faddr;
2531 in_port_t fport;
2532 int error;
2534 ASSERT(MUTEX_HELD(&connp->conn_lock));
2536 if (connp->conn_ipversion == IPV4_VERSION)
2537 ixa->ixa_flags |= IXAF_IS_IPV4;
2538 else
2539 ixa->ixa_flags &= ~IXAF_IS_IPV4;
2541 /* We do IPsec latching below - hence no caching in ip_attr_connect */
2542 flags &= ~IPDF_IPSEC;
2544 /* In case we had previously done an ip_attr_connect */
2545 ip_attr_newdst(ixa);
2548 * Determine the nexthop and copy the addresses before dropping
2549 * conn_lock.
2551 ip_attr_nexthop(&connp->conn_xmit_ipp, connp->conn_ixa,
2552 &connp->conn_faddr_v6, &nexthop);
2553 saddr = connp->conn_saddr_v6;
2554 faddr = connp->conn_faddr_v6;
2555 fport = connp->conn_fport;
2557 mutex_exit(&connp->conn_lock);
2558 error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, fport,
2559 &saddr, uinfo, flags | IPDF_VERIFY_DST);
2560 mutex_enter(&connp->conn_lock);
2562 /* Could have changed even if an error */
2563 connp->conn_saddr_v6 = saddr;
2564 if (error != 0)
2565 return (error);
2568 * Ensure that we match on the selected local address.
2569 * This overrides conn_laddr in the case we had earlier bound to a
2570 * multicast or broadcast address.
2572 connp->conn_laddr_v6 = connp->conn_saddr_v6;
2575 * Allow setting new policies.
2576 * The addresses/ports are already set, thus the IPsec policy calls
2577 * can handle their passed-in conn's.
2579 connp->conn_policy_cached = B_FALSE;
2582 * Cache IPsec policy in this conn. If we have per-socket policy,
2583 * we'll cache that. If we don't, we'll inherit global policy.
2585 * This is done before the caller inserts in the receive-side fanout.
2586 * Note that conn_policy_cached is set by ipsec_conn_cache_policy() even
2587 * for connections where we don't have a policy. This is to prevent
2588 * global policy lookups in the inbound path.
2590 * If we insert before we set conn_policy_cached,
2591 * CONN_INBOUND_POLICY_PRESENT() check can still evaluate true
2592 * because global policy cound be non-empty. We normally call
2593 * ipsec_check_policy() for conn_policy_cached connections only if
2594 * conn_in_enforce_policy is set. But in this case,
2595 * conn_policy_cached can get set anytime since we made the
2596 * CONN_INBOUND_POLICY_PRESENT() check and ipsec_check_policy() is
2597 * called, which will make the above assumption false. Thus, we
2598 * need to insert after we set conn_policy_cached.
2600 error = ipsec_conn_cache_policy(connp,
2601 connp->conn_ipversion == IPV4_VERSION);
2602 if (error != 0)
2603 return (error);
2606 * We defer to do LSO check until here since now we have better idea
2607 * whether IPsec is present. If the underlying ill is LSO capable,
2608 * copy its capability in so the ULP can decide whether to enable LSO
2609 * on this connection. So far, only TCP/IPv4 is implemented, so won't
2610 * claim LSO for IPv6.
2612 * Currently, won't enable LSO for IRE_LOOPBACK or IRE_LOCAL, because
2613 * the receiver can not handle it.
2615 ixa->ixa_flags &= ~IXAF_LSO_CAPAB;
2617 ASSERT(ixa->ixa_ire != NULL);
2618 if (ixa->ixa_ipst->ips_ip_lso_outbound && (flags & IPDF_LSO) &&
2619 !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2620 !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2621 (ixa->ixa_nce != NULL) &&
2622 ((ixa->ixa_flags & IXAF_IS_IPV4) ?
2623 ILL_LSO_TCP_IPV4_USABLE(ixa->ixa_nce->nce_ill) :
2624 ILL_LSO_TCP_IPV6_USABLE(ixa->ixa_nce->nce_ill))) {
2625 ixa->ixa_lso_capab = *ixa->ixa_nce->nce_ill->ill_lso_capab;
2626 ixa->ixa_flags |= IXAF_LSO_CAPAB;
2629 /* Check whether ZEROCOPY capability is usable for this connection. */
2630 ixa->ixa_flags &= ~IXAF_ZCOPY_CAPAB;
2632 if ((flags & IPDF_ZCOPY) &&
2633 !(ixa->ixa_flags & IXAF_IPSEC_SECURE) &&
2634 !(ixa->ixa_ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK)) &&
2635 (ixa->ixa_nce != NULL) &&
2636 ILL_ZCOPY_USABLE(ixa->ixa_nce->nce_ill)) {
2637 ixa->ixa_flags |= IXAF_ZCOPY_CAPAB;
2639 return (0);
2643 * Predicates to check if the addresses match conn_last*
2647 * Compare the conn against an address.
2648 * If using mapped addresses on AF_INET6 sockets, use the _v6 function
2650 boolean_t
2651 conn_same_as_last_v4(conn_t *connp, sin_t *sin)
2653 ASSERT(connp->conn_family == AF_INET);
2654 return (sin->sin_addr.s_addr == connp->conn_v4lastdst &&
2655 sin->sin_port == connp->conn_lastdstport);
2659 * Compare, including for mapped addresses
2661 boolean_t
2662 conn_same_as_last_v6(conn_t *connp, sin6_t *sin6)
2664 return (IN6_ARE_ADDR_EQUAL(&connp->conn_v6lastdst, &sin6->sin6_addr) &&
2665 sin6->sin6_port == connp->conn_lastdstport &&
2666 sin6->sin6_flowinfo == connp->conn_lastflowinfo &&
2667 sin6->sin6_scope_id == connp->conn_lastscopeid);
2671 * Inherit all options settings from the parent/listener to the eager.
2672 * Returns zero on success; ENOMEM if memory allocation failed.
2674 * We assume that the eager has not had any work done i.e., the conn_ixa
2675 * and conn_xmit_ipp are all zero.
2676 * Furthermore we assume that no other thread can access the eager (because
2677 * it isn't inserted in any fanout list).
2680 conn_inherit_parent(conn_t *lconnp, conn_t *econnp)
2682 cred_t *credp;
2683 int err;
2684 void *notify_cookie;
2685 uint32_t xmit_hint;
2687 econnp->conn_family = lconnp->conn_family;
2688 econnp->conn_ipv6_v6only = lconnp->conn_ipv6_v6only;
2689 econnp->conn_wq = lconnp->conn_wq;
2690 econnp->conn_rq = lconnp->conn_rq;
2693 * Make a safe copy of the transmit attributes.
2694 * conn_connect will later be used by the caller to setup the ire etc.
2696 ASSERT(econnp->conn_ixa->ixa_refcnt == 1);
2697 ASSERT(econnp->conn_ixa->ixa_ire == NULL);
2698 ASSERT(econnp->conn_ixa->ixa_dce == NULL);
2699 ASSERT(econnp->conn_ixa->ixa_nce == NULL);
2701 /* Preserve ixa_notify_cookie and xmit_hint */
2702 notify_cookie = econnp->conn_ixa->ixa_notify_cookie;
2703 xmit_hint = econnp->conn_ixa->ixa_xmit_hint;
2704 ixa_safe_copy(lconnp->conn_ixa, econnp->conn_ixa);
2705 econnp->conn_ixa->ixa_notify_cookie = notify_cookie;
2706 econnp->conn_ixa->ixa_xmit_hint = xmit_hint;
2708 econnp->conn_bound_if = lconnp->conn_bound_if;
2709 econnp->conn_incoming_ifindex = lconnp->conn_incoming_ifindex;
2711 /* Inherit all RECV options */
2712 econnp->conn_recv_ancillary = lconnp->conn_recv_ancillary;
2714 err = ip_pkt_copy(&lconnp->conn_xmit_ipp, &econnp->conn_xmit_ipp,
2715 KM_NOSLEEP);
2716 if (err != 0)
2717 return (err);
2719 econnp->conn_zoneid = lconnp->conn_zoneid;
2720 econnp->conn_allzones = lconnp->conn_allzones;
2722 /* This is odd. Pick a flowlabel for each connection instead? */
2723 econnp->conn_flowinfo = lconnp->conn_flowinfo;
2725 econnp->conn_default_ttl = lconnp->conn_default_ttl;
2727 ASSERT(lconnp->conn_cred != NULL);
2728 econnp->conn_cred = credp = lconnp->conn_cred;
2729 crhold(credp);
2730 econnp->conn_cpid = lconnp->conn_cpid;
2731 econnp->conn_open_time = ddi_get_lbolt64();
2734 * Cache things in the ixa without any refhold.
2735 * Listener might not have set up ixa_cred
2737 ASSERT(!(econnp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
2738 econnp->conn_ixa->ixa_cred = econnp->conn_cred;
2739 econnp->conn_ixa->ixa_cpid = econnp->conn_cpid;
2741 econnp->conn_zone_is_global = lconnp->conn_zone_is_global;
2744 * We eliminate the need for sockfs to send down a T_SVR4_OPTMGMT_REQ
2745 * via soaccept()->soinheritoptions() which essentially applies
2746 * all the listener options to the new connection. The options that we
2747 * need to take care of are:
2748 * SO_DEBUG, SO_REUSEADDR, SO_KEEPALIVE, SO_DONTROUTE, SO_BROADCAST,
2749 * SO_USELOOPBACK, SO_OOBINLINE, SO_DGRAM_ERRIND, SO_LINGER,
2750 * SO_SNDBUF, SO_RCVBUF.
2752 * SO_RCVBUF: conn_rcvbuf is set.
2753 * SO_SNDBUF: conn_sndbuf is set.
2756 /* Could we define a struct and use a struct copy for this? */
2757 econnp->conn_sndbuf = lconnp->conn_sndbuf;
2758 econnp->conn_rcvbuf = lconnp->conn_rcvbuf;
2759 econnp->conn_sndlowat = lconnp->conn_sndlowat;
2760 econnp->conn_rcvlowat = lconnp->conn_rcvlowat;
2761 econnp->conn_dgram_errind = lconnp->conn_dgram_errind;
2762 econnp->conn_oobinline = lconnp->conn_oobinline;
2763 econnp->conn_debug = lconnp->conn_debug;
2764 econnp->conn_keepalive = lconnp->conn_keepalive;
2765 econnp->conn_linger = lconnp->conn_linger;
2766 econnp->conn_lingertime = lconnp->conn_lingertime;
2768 /* Set the IP options */
2769 econnp->conn_broadcast = lconnp->conn_broadcast;
2770 econnp->conn_useloopback = lconnp->conn_useloopback;
2771 econnp->conn_reuseaddr = lconnp->conn_reuseaddr;
2772 return (0);