kill tsol ("Trusted Solaris") aka TX ("Trusted Extensions")
[unleashed.git] / kernel / net / ip / ip6.c
blob984768c11f8647c5bb9df0652eec112e56ac69be
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 1990 Mentat Inc.
26 #include <sys/types.h>
27 #include <sys/stream.h>
28 #include <sys/dlpi.h>
29 #include <sys/stropts.h>
30 #include <sys/sysmacros.h>
31 #include <sys/strsun.h>
32 #include <sys/strlog.h>
33 #include <sys/strsubr.h>
34 #define _SUN_TPI_VERSION 2
35 #include <sys/tihdr.h>
36 #include <sys/ddi.h>
37 #include <sys/sunddi.h>
38 #include <sys/cmn_err.h>
39 #include <sys/debug.h>
40 #include <sys/sdt.h>
41 #include <sys/kobj.h>
42 #include <sys/zone.h>
43 #include <sys/neti.h>
44 #include <sys/hook.h>
46 #include <sys/kmem.h>
47 #include <sys/systm.h>
48 #include <sys/param.h>
49 #include <sys/socket.h>
50 #include <sys/vtrace.h>
51 #include <sys/isa_defs.h>
52 #include <sys/atomic.h>
53 #include <sys/policy.h>
54 #include <sys/mac.h>
55 #include <net/if.h>
56 #include <net/if_types.h>
57 #include <net/route.h>
58 #include <net/if_dl.h>
59 #include <sys/sockio.h>
60 #include <netinet/in.h>
61 #include <netinet/ip6.h>
62 #include <netinet/icmp6.h>
63 #include <netinet/sctp.h>
65 #include <inet/common.h>
66 #include <inet/mi.h>
67 #include <inet/optcom.h>
68 #include <inet/mib2.h>
69 #include <inet/nd.h>
70 #include <inet/arp.h>
72 #include <inet/ip.h>
73 #include <inet/ip_impl.h>
74 #include <inet/ip6.h>
75 #include <inet/ip6_asp.h>
76 #include <inet/tcp.h>
77 #include <inet/tcp_impl.h>
78 #include <inet/udp_impl.h>
79 #include <inet/ipp_common.h>
81 #include <inet/ip_multi.h>
82 #include <inet/ip_if.h>
83 #include <inet/ip_ire.h>
84 #include <inet/ip_rts.h>
85 #include <inet/ip_ndp.h>
86 #include <net/pfkeyv2.h>
87 #include <inet/sadb.h>
88 #include <inet/ipsec_impl.h>
89 #include <inet/iptun/iptun_impl.h>
90 #include <inet/sctp_ip.h>
91 #include <sys/pattr.h>
92 #include <inet/ipclassifier.h>
93 #include <inet/ipsecah.h>
94 #include <inet/rawip_impl.h>
95 #include <inet/rts_impl.h>
96 #include <sys/squeue_impl.h>
97 #include <sys/squeue.h>
99 /* Temporary; for CR 6451644 work-around */
100 #include <sys/ethernet.h>
103 * Naming conventions:
104 * These rules should be judiciously applied
105 * if there is a need to identify something as IPv6 versus IPv4
106 * IPv6 funcions will end with _v6 in the ip module.
107 * IPv6 funcions will end with _ipv6 in the transport modules.
108 * IPv6 macros:
109 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6
110 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY
111 * And then there are ..V4_PART_OF_V6.
112 * The intent is that macros in the ip module end with _V6.
113 * IPv6 global variables will start with ipv6_
114 * IPv6 structures will start with ipv6
115 * IPv6 defined constants should start with IPV6_
116 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc)
119 const in6_addr_t ipv6_all_ones =
120 { 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU };
121 const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 };
123 #ifdef _BIG_ENDIAN
124 const in6_addr_t ipv6_unspecified_group = { 0xff000000U, 0, 0, 0 };
125 #else /* _BIG_ENDIAN */
126 const in6_addr_t ipv6_unspecified_group = { 0x000000ffU, 0, 0, 0 };
127 #endif /* _BIG_ENDIAN */
129 #ifdef _BIG_ENDIAN
130 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x00000001U };
131 #else /* _BIG_ENDIAN */
132 const in6_addr_t ipv6_loopback = { 0, 0, 0, 0x01000000U };
133 #endif /* _BIG_ENDIAN */
135 #ifdef _BIG_ENDIAN
136 const in6_addr_t ipv6_all_hosts_mcast = { 0xff020000U, 0, 0, 0x00000001U };
137 #else /* _BIG_ENDIAN */
138 const in6_addr_t ipv6_all_hosts_mcast = { 0x000002ffU, 0, 0, 0x01000000U };
139 #endif /* _BIG_ENDIAN */
141 #ifdef _BIG_ENDIAN
142 const in6_addr_t ipv6_all_rtrs_mcast = { 0xff020000U, 0, 0, 0x00000002U };
143 #else /* _BIG_ENDIAN */
144 const in6_addr_t ipv6_all_rtrs_mcast = { 0x000002ffU, 0, 0, 0x02000000U };
145 #endif /* _BIG_ENDIAN */
147 #ifdef _BIG_ENDIAN
148 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0xff020000U, 0, 0, 0x00000016U };
149 #else /* _BIG_ENDIAN */
150 const in6_addr_t ipv6_all_v2rtrs_mcast = { 0x000002ffU, 0, 0, 0x16000000U };
151 #endif /* _BIG_ENDIAN */
153 #ifdef _BIG_ENDIAN
154 const in6_addr_t ipv6_solicited_node_mcast =
155 { 0xff020000U, 0, 0x00000001U, 0xff000000U };
156 #else /* _BIG_ENDIAN */
157 const in6_addr_t ipv6_solicited_node_mcast =
158 { 0x000002ffU, 0, 0x01000000U, 0x000000ffU };
159 #endif /* _BIG_ENDIAN */
161 static boolean_t icmp_inbound_verify_v6(mblk_t *, icmp6_t *, ip_recv_attr_t *);
162 static void icmp_inbound_too_big_v6(icmp6_t *, ip_recv_attr_t *);
163 static void icmp_pkt_v6(mblk_t *, void *, size_t, const in6_addr_t *,
164 ip_recv_attr_t *);
165 static void icmp_redirect_v6(mblk_t *, ip6_t *, nd_redirect_t *,
166 ip_recv_attr_t *);
167 static void icmp_send_redirect_v6(mblk_t *, in6_addr_t *,
168 in6_addr_t *, ip_recv_attr_t *);
169 static void icmp_send_reply_v6(mblk_t *, ip6_t *, icmp6_t *,
170 ip_recv_attr_t *);
171 static boolean_t ip_source_routed_v6(ip6_t *, mblk_t *, ip_stack_t *);
174 * icmp_inbound_v6 deals with ICMP messages that are handled by IP.
175 * If the ICMP message is consumed by IP, i.e., it should not be delivered
176 * to any IPPROTO_ICMP raw sockets, then it returns NULL.
177 * Likewise, if the ICMP error is misformed (too short, etc), then it
178 * returns NULL. The caller uses this to determine whether or not to send
179 * to raw sockets.
181 * All error messages are passed to the matching transport stream.
183 * See comment for icmp_inbound_v4() on how IPsec is handled.
185 mblk_t *
186 icmp_inbound_v6(mblk_t *mp, ip_recv_attr_t *ira)
188 icmp6_t *icmp6;
189 ip6_t *ip6h; /* Outer header */
190 int ip_hdr_length; /* Outer header length */
191 boolean_t interested;
192 ill_t *ill = ira->ira_ill;
193 ip_stack_t *ipst = ill->ill_ipst;
194 mblk_t *mp_ret = NULL;
196 ip6h = (ip6_t *)mp->b_rptr;
198 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInMsgs);
200 /* Check for Martian packets */
201 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src)) {
202 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
203 ip_drop_input("ipIfStatsInAddrErrors: mcast src", mp, ill);
204 freemsg(mp);
205 return (NULL);
208 /* Make sure ira_l2src is set for ndp_input */
209 if (!(ira->ira_flags & IRAF_L2SRC_SET))
210 ip_setl2src(mp, ira, ira->ira_rill);
212 ip_hdr_length = ira->ira_ip_hdr_length;
213 if ((mp->b_wptr - mp->b_rptr) < (ip_hdr_length + ICMP6_MINLEN)) {
214 if (ira->ira_pktlen < (ip_hdr_length + ICMP6_MINLEN)) {
215 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInTruncatedPkts);
216 ip_drop_input("ipIfStatsInTruncatedPkts", mp, ill);
217 freemsg(mp);
218 return (NULL);
220 ip6h = ip_pullup(mp, ip_hdr_length + ICMP6_MINLEN, ira);
221 if (ip6h == NULL) {
222 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
223 freemsg(mp);
224 return (NULL);
228 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
229 DTRACE_PROBE2(icmp__inbound__v6, ip6_t *, ip6h, icmp6_t *, icmp6);
230 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6->icmp6_type,
231 icmp6->icmp6_code));
234 * We will set "interested" to "true" if we should pass a copy to
235 * the transport i.e., if it is an error message.
237 interested = !(icmp6->icmp6_type & ICMP6_INFOMSG_MASK);
239 switch (icmp6->icmp6_type) {
240 case ICMP6_DST_UNREACH:
241 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInDestUnreachs);
242 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
243 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInAdminProhibs);
244 break;
246 case ICMP6_TIME_EXCEEDED:
247 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInTimeExcds);
248 break;
250 case ICMP6_PARAM_PROB:
251 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInParmProblems);
252 break;
254 case ICMP6_PACKET_TOO_BIG:
255 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInPktTooBigs);
256 break;
258 case ICMP6_ECHO_REQUEST:
259 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchos);
260 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst) &&
261 !ipst->ips_ipv6_resp_echo_mcast)
262 break;
265 * We must have exclusive use of the mblk to convert it to
266 * a response.
267 * If not, we copy it.
269 if (mp->b_datap->db_ref > 1) {
270 mblk_t *mp1;
272 mp1 = copymsg(mp);
273 if (mp1 == NULL) {
274 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
275 ip_drop_input("ipIfStatsInDiscards - copymsg",
276 mp, ill);
277 freemsg(mp);
278 return (NULL);
280 freemsg(mp);
281 mp = mp1;
282 ip6h = (ip6_t *)mp->b_rptr;
283 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
286 icmp6->icmp6_type = ICMP6_ECHO_REPLY;
287 icmp_send_reply_v6(mp, ip6h, icmp6, ira);
288 return (NULL);
290 case ICMP6_ECHO_REPLY:
291 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInEchoReplies);
292 break;
294 case ND_ROUTER_SOLICIT:
295 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterSolicits);
296 break;
298 case ND_ROUTER_ADVERT:
299 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRouterAdvertisements);
300 break;
302 case ND_NEIGHBOR_SOLICIT:
303 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInNeighborSolicits);
304 ndp_input(mp, ira);
305 return (NULL);
307 case ND_NEIGHBOR_ADVERT:
308 BUMP_MIB(ill->ill_icmp6_mib,
309 ipv6IfIcmpInNeighborAdvertisements);
310 ndp_input(mp, ira);
311 return (NULL);
313 case ND_REDIRECT:
314 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInRedirects);
316 if (ipst->ips_ipv6_ignore_redirect)
317 break;
319 /* We now allow a RAW socket to receive this. */
320 interested = B_TRUE;
321 break;
324 * The next three icmp messages will be handled by MLD.
325 * Pass all valid MLD packets up to any process(es)
326 * listening on a raw ICMP socket.
328 case MLD_LISTENER_QUERY:
329 case MLD_LISTENER_REPORT:
330 case MLD_LISTENER_REDUCTION:
331 mp = mld_input(mp, ira);
332 return (mp);
333 default:
334 break;
337 * See if there is an ICMP client to avoid an extra copymsg/freemsg
338 * if there isn't one.
340 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_ICMPV6].connf_head != NULL) {
341 /* If there is an ICMP client and we want one too, copy it. */
343 if (!interested) {
344 /* Caller will deliver to RAW sockets */
345 return (mp);
347 mp_ret = copymsg(mp);
348 if (mp_ret == NULL) {
349 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
350 ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill);
352 } else if (!interested) {
353 /* Neither we nor raw sockets are interested. Drop packet now */
354 freemsg(mp);
355 return (NULL);
359 * ICMP error or redirect packet. Make sure we have enough of
360 * the header and that db_ref == 1 since we might end up modifying
361 * the packet.
363 if (mp->b_cont != NULL) {
364 if (ip_pullup(mp, -1, ira) == NULL) {
365 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
366 ip_drop_input("ipIfStatsInDiscards - ip_pullup",
367 mp, ill);
368 freemsg(mp);
369 return (mp_ret);
373 if (mp->b_datap->db_ref > 1) {
374 mblk_t *mp1;
376 mp1 = copymsg(mp);
377 if (mp1 == NULL) {
378 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
379 ip_drop_input("ipIfStatsInDiscards - copymsg", mp, ill);
380 freemsg(mp);
381 return (mp_ret);
383 freemsg(mp);
384 mp = mp1;
388 * In case mp has changed, verify the message before any further
389 * processes.
391 ip6h = (ip6_t *)mp->b_rptr;
392 icmp6 = (icmp6_t *)(&mp->b_rptr[ip_hdr_length]);
393 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
394 freemsg(mp);
395 return (mp_ret);
398 switch (icmp6->icmp6_type) {
399 case ND_REDIRECT:
400 icmp_redirect_v6(mp, ip6h, (nd_redirect_t *)icmp6, ira);
401 break;
402 case ICMP6_PACKET_TOO_BIG:
403 /* Update DCE and adjust MTU is icmp header if needed */
404 icmp_inbound_too_big_v6(icmp6, ira);
405 /* FALLTHRU */
406 default:
407 icmp_inbound_error_fanout_v6(mp, icmp6, ira);
408 break;
411 return (mp_ret);
415 * Send an ICMP echo reply.
416 * The caller has already updated the payload part of the packet.
417 * We handle the ICMP checksum, IP source address selection and feed
418 * the packet into ip_output_simple.
420 static void
421 icmp_send_reply_v6(mblk_t *mp, ip6_t *ip6h, icmp6_t *icmp6,
422 ip_recv_attr_t *ira)
424 uint_t ip_hdr_length = ira->ira_ip_hdr_length;
425 ill_t *ill = ira->ira_ill;
426 ip_stack_t *ipst = ill->ill_ipst;
427 ip_xmit_attr_t ixas;
428 in6_addr_t origsrc;
431 * Remove any extension headers (do not reverse a source route)
432 * and clear the flow id (keep traffic class for now).
434 if (ip_hdr_length != IPV6_HDR_LEN) {
435 int i;
437 for (i = 0; i < IPV6_HDR_LEN; i++) {
438 mp->b_rptr[ip_hdr_length - i - 1] =
439 mp->b_rptr[IPV6_HDR_LEN - i - 1];
441 mp->b_rptr += (ip_hdr_length - IPV6_HDR_LEN);
442 ip6h = (ip6_t *)mp->b_rptr;
443 ip6h->ip6_nxt = IPPROTO_ICMPV6;
444 i = ntohs(ip6h->ip6_plen);
445 i -= (ip_hdr_length - IPV6_HDR_LEN);
446 ip6h->ip6_plen = htons(i);
447 ip_hdr_length = IPV6_HDR_LEN;
448 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == msgdsize(mp));
450 ip6h->ip6_vcf &= ~IPV6_FLOWINFO_FLOWLABEL;
452 /* Reverse the source and destination addresses. */
453 origsrc = ip6h->ip6_src;
454 ip6h->ip6_src = ip6h->ip6_dst;
455 ip6h->ip6_dst = origsrc;
457 /* set the hop limit */
458 ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
461 * Prepare for checksum by putting icmp length in the icmp
462 * checksum field. The checksum is calculated in ip_output
464 icmp6->icmp6_cksum = ip6h->ip6_plen;
466 bzero(&ixas, sizeof (ixas));
467 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
468 ixas.ixa_zoneid = ira->ira_zoneid;
469 ixas.ixa_cred = kcred;
470 ixas.ixa_cpid = NOPID;
471 ixas.ixa_ifindex = 0;
472 ixas.ixa_ipst = ipst;
473 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
475 if (!(ira->ira_flags & IRAF_IPSEC_SECURE)) {
477 * This packet should go out the same way as it
478 * came in i.e in clear, independent of the IPsec
479 * policy for transmitting packets.
481 ixas.ixa_flags |= IXAF_NO_IPSEC;
482 } else {
483 if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) {
484 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
485 /* Note: mp already consumed and ip_drop_packet done */
486 return;
490 /* Was the destination (now source) link-local? Send out same group */
491 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
492 ixas.ixa_flags |= IXAF_SCOPEID_SET;
493 if (IS_UNDER_IPMP(ill))
494 ixas.ixa_scopeid = ill_get_upper_ifindex(ill);
495 else
496 ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex;
499 if (ira->ira_flags & IRAF_MULTIBROADCAST) {
501 * Not one or our addresses (IRE_LOCALs), thus we let
502 * ip_output_simple pick the source.
504 ip6h->ip6_src = ipv6_all_zeros;
505 ixas.ixa_flags |= IXAF_SET_SOURCE;
508 /* Should we send using dce_pmtu? */
509 if (ipst->ips_ipv6_icmp_return_pmtu)
510 ixas.ixa_flags |= IXAF_PMTU_DISCOVERY;
512 (void) ip_output_simple(mp, &ixas);
513 ixa_cleanup(&ixas);
518 * Verify the ICMP messages for either for ICMP error or redirect packet.
519 * The caller should have fully pulled up the message. If it's a redirect
520 * packet, only basic checks on IP header will be done; otherwise, verify
521 * the packet by looking at the included ULP header.
523 * Called before icmp_inbound_error_fanout_v6 is called.
525 static boolean_t
526 icmp_inbound_verify_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira)
528 ill_t *ill = ira->ira_ill;
529 uint16_t hdr_length;
530 uint8_t *nexthdrp;
531 uint8_t nexthdr;
532 ip_stack_t *ipst = ill->ill_ipst;
533 conn_t *connp;
534 ip6_t *ip6h; /* Inner header */
536 ip6h = (ip6_t *)&icmp6[1];
537 if ((uchar_t *)ip6h + IPV6_HDR_LEN > mp->b_wptr)
538 goto truncated;
540 if (icmp6->icmp6_type == ND_REDIRECT) {
541 hdr_length = sizeof (nd_redirect_t);
542 } else {
543 if ((IPH_HDR_VERSION(ip6h) != IPV6_VERSION))
544 goto discard_pkt;
545 hdr_length = IPV6_HDR_LEN;
548 if ((uchar_t *)ip6h + hdr_length > mp->b_wptr)
549 goto truncated;
552 * Stop here for ICMP_REDIRECT.
554 if (icmp6->icmp6_type == ND_REDIRECT)
555 return (B_TRUE);
558 * ICMP errors only.
560 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
561 goto discard_pkt;
562 nexthdr = *nexthdrp;
564 /* Try to pass the ICMP message to clients who need it */
565 switch (nexthdr) {
566 case IPPROTO_UDP:
568 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
569 * transport header.
571 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
572 mp->b_wptr)
573 goto truncated;
574 break;
575 case IPPROTO_TCP: {
576 tcpha_t *tcpha;
579 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
580 * transport header.
582 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
583 mp->b_wptr)
584 goto truncated;
586 tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length);
588 * With IPMP we need to match across group, which we do
589 * since we have the upper ill from ira_ill.
591 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha, TCPS_LISTEN,
592 ill->ill_phyint->phyint_ifindex, ipst);
593 if (connp == NULL)
594 goto discard_pkt;
596 if ((connp->conn_verifyicmp != NULL) &&
597 !connp->conn_verifyicmp(connp, tcpha, NULL, icmp6, ira)) {
598 CONN_DEC_REF(connp);
599 goto discard_pkt;
601 CONN_DEC_REF(connp);
602 break;
604 case IPPROTO_SCTP:
606 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
607 * transport header.
609 if ((uchar_t *)ip6h + hdr_length + ICMP_MIN_TP_HDR_LEN >
610 mp->b_wptr)
611 goto truncated;
612 break;
613 case IPPROTO_ESP:
614 case IPPROTO_AH:
615 break;
616 case IPPROTO_ENCAP:
617 case IPPROTO_IPV6: {
618 /* Look for self-encapsulated packets that caused an error */
619 ip6_t *in_ip6h;
621 in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length);
622 if ((uint8_t *)in_ip6h + (nexthdr == IPPROTO_ENCAP ?
623 sizeof (ipha_t) : sizeof (ip6_t)) > mp->b_wptr)
624 goto truncated;
625 break;
627 default:
628 break;
631 return (B_TRUE);
633 discard_pkt:
634 /* Bogus ICMP error. */
635 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
636 return (B_FALSE);
638 truncated:
639 /* We pulled up everthing already. Must be truncated */
640 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
641 return (B_FALSE);
645 * Process received IPv6 ICMP Packet too big.
646 * The caller is responsible for validating the packet before passing it in
647 * and also to fanout the ICMP error to any matching transport conns. Assumes
648 * the message has been fully pulled up.
650 * Before getting here, the caller has called icmp_inbound_verify_v6()
651 * that should have verified with ULP to prevent undoing the changes we're
652 * going to make to DCE. For example, TCP might have verified that the packet
653 * which generated error is in the send window.
655 * In some cases modified this MTU in the ICMP header packet; the caller
656 * should pass to the matching ULP after this returns.
658 static void
659 icmp_inbound_too_big_v6(icmp6_t *icmp6, ip_recv_attr_t *ira)
661 uint32_t mtu;
662 dce_t *dce;
663 ill_t *ill = ira->ira_ill; /* Upper ill if IPMP */
664 ip_stack_t *ipst = ill->ill_ipst;
665 int old_max_frag;
666 in6_addr_t final_dst;
667 ip6_t *ip6h; /* Inner IP header */
669 /* Caller has already pulled up everything. */
670 ip6h = (ip6_t *)&icmp6[1];
671 final_dst = ip_get_dst_v6(ip6h, NULL, NULL);
674 * For link local destinations matching simply on address is not
675 * sufficient. Same link local addresses for different ILL's is
676 * possible.
678 if (IN6_IS_ADDR_LINKSCOPE(&final_dst)) {
679 dce = dce_lookup_and_add_v6(&final_dst,
680 ill->ill_phyint->phyint_ifindex, ipst);
681 } else {
682 dce = dce_lookup_and_add_v6(&final_dst, 0, ipst);
684 if (dce == NULL) {
685 /* Couldn't add a unique one - ENOMEM */
686 if (ip_debug > 2) {
687 /* ip1dbg */
688 pr_addr_dbg("icmp_inbound_too_big_v6:"
689 "no dce for dst %s\n", AF_INET6,
690 &final_dst);
692 return;
695 mtu = ntohl(icmp6->icmp6_mtu);
697 mutex_enter(&dce->dce_lock);
698 if (dce->dce_flags & DCEF_PMTU)
699 old_max_frag = dce->dce_pmtu;
700 else if (IN6_IS_ADDR_MULTICAST(&final_dst))
701 old_max_frag = ill->ill_mc_mtu;
702 else
703 old_max_frag = ill->ill_mtu;
705 if (mtu < IPV6_MIN_MTU) {
706 ip1dbg(("Received mtu less than IPv6 "
707 "min mtu %d: %d\n", IPV6_MIN_MTU, mtu));
708 mtu = IPV6_MIN_MTU;
710 * If an mtu less than IPv6 min mtu is received,
711 * we must include a fragment header in
712 * subsequent packets.
714 dce->dce_flags |= DCEF_TOO_SMALL_PMTU;
715 } else {
716 dce->dce_flags &= ~DCEF_TOO_SMALL_PMTU;
718 ip1dbg(("Received mtu from router: %d\n", mtu));
719 dce->dce_pmtu = MIN(old_max_frag, mtu);
721 /* Prepare to send the new max frag size for the ULP. */
722 if (dce->dce_flags & DCEF_TOO_SMALL_PMTU) {
724 * If we need a fragment header in every packet
725 * (above case or multirouting), make sure the
726 * ULP takes it into account when computing the
727 * payload size.
729 icmp6->icmp6_mtu = htonl(dce->dce_pmtu - sizeof (ip6_frag_t));
730 } else {
731 icmp6->icmp6_mtu = htonl(dce->dce_pmtu);
733 /* We now have a PMTU for sure */
734 dce->dce_flags |= DCEF_PMTU;
735 dce->dce_last_change_time = TICK_TO_SEC(ddi_get_lbolt64());
736 mutex_exit(&dce->dce_lock);
738 * After dropping the lock the new value is visible to everyone.
739 * Then we bump the generation number so any cached values reinspect
740 * the dce_t.
742 dce_increment_generation(dce);
743 dce_refrele(dce);
747 * Fanout received ICMPv6 error packets to the transports.
748 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else.
750 * The caller must have called icmp_inbound_verify_v6.
752 void
753 icmp_inbound_error_fanout_v6(mblk_t *mp, icmp6_t *icmp6, ip_recv_attr_t *ira)
755 uint16_t *up; /* Pointer to ports in ULP header */
756 uint32_t ports; /* reversed ports for fanout */
757 ip6_t rip6h; /* With reversed addresses */
758 ip6_t *ip6h; /* Inner IP header */
759 uint16_t hdr_length; /* Inner IP header length */
760 uint8_t *nexthdrp;
761 uint8_t nexthdr;
762 tcpha_t *tcpha;
763 conn_t *connp;
764 ill_t *ill = ira->ira_ill; /* Upper in the case of IPMP */
765 ip_stack_t *ipst = ill->ill_ipst;
766 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec;
768 /* Caller has already pulled up everything. */
769 ip6h = (ip6_t *)&icmp6[1];
770 ASSERT(mp->b_cont == NULL);
771 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr);
773 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp))
774 goto drop_pkt;
775 nexthdr = *nexthdrp;
776 ira->ira_protocol = nexthdr;
779 * We need a separate IP header with the source and destination
780 * addresses reversed to do fanout/classification because the ip6h in
781 * the ICMPv6 error is in the form we sent it out.
783 rip6h.ip6_src = ip6h->ip6_dst;
784 rip6h.ip6_dst = ip6h->ip6_src;
785 rip6h.ip6_nxt = nexthdr;
787 /* Try to pass the ICMP message to clients who need it */
788 switch (nexthdr) {
789 case IPPROTO_UDP: {
790 /* Attempt to find a client stream based on port. */
791 up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
793 /* Note that we send error to all matches. */
794 ira->ira_flags |= IRAF_ICMP_ERROR;
795 ip_fanout_udp_multi_v6(mp, &rip6h, up[0], up[1], ira);
796 ira->ira_flags &= ~IRAF_ICMP_ERROR;
797 return;
799 case IPPROTO_TCP: {
801 * Attempt to find a client stream based on port.
802 * Note that we do a reverse lookup since the header is
803 * in the form we sent it out.
805 tcpha = (tcpha_t *)((uchar_t *)ip6h + hdr_length);
807 * With IPMP we need to match across group, which we do
808 * since we have the upper ill from ira_ill.
810 connp = ipcl_tcp_lookup_reversed_ipv6(ip6h, tcpha,
811 TCPS_LISTEN, ill->ill_phyint->phyint_ifindex, ipst);
812 if (connp == NULL) {
813 goto drop_pkt;
816 if (CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss) ||
817 (ira->ira_flags & IRAF_IPSEC_SECURE)) {
818 mp = ipsec_check_inbound_policy(mp, connp,
819 NULL, ip6h, ira);
820 if (mp == NULL) {
821 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
822 /* Note that mp is NULL */
823 ip_drop_input("ipIfStatsInDiscards", mp, ill);
824 CONN_DEC_REF(connp);
825 return;
829 ira->ira_flags |= IRAF_ICMP_ERROR;
830 if (IPCL_IS_TCP(connp)) {
831 SQUEUE_ENTER_ONE(connp->conn_sqp, mp,
832 connp->conn_recvicmp, connp, ira, SQ_FILL,
833 SQTAG_TCP6_INPUT_ICMP_ERR);
834 } else {
835 /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */
836 ill_t *rill = ira->ira_rill;
838 ira->ira_ill = ira->ira_rill = NULL;
839 (connp->conn_recv)(connp, mp, NULL, ira);
840 CONN_DEC_REF(connp);
841 ira->ira_ill = ill;
842 ira->ira_rill = rill;
844 ira->ira_flags &= ~IRAF_ICMP_ERROR;
845 return;
848 case IPPROTO_SCTP:
849 up = (uint16_t *)((uchar_t *)ip6h + hdr_length);
850 /* Find a SCTP client stream for this packet. */
851 ((uint16_t *)&ports)[0] = up[1];
852 ((uint16_t *)&ports)[1] = up[0];
854 ira->ira_flags |= IRAF_ICMP_ERROR;
855 ip_fanout_sctp(mp, NULL, &rip6h, ports, ira);
856 ira->ira_flags &= ~IRAF_ICMP_ERROR;
857 return;
859 case IPPROTO_ESP:
860 case IPPROTO_AH:
861 if (!ipsec_loaded(ipss)) {
862 ip_proto_not_sup(mp, ira);
863 return;
866 if (nexthdr == IPPROTO_ESP)
867 mp = ipsecesp_icmp_error(mp, ira);
868 else
869 mp = ipsecah_icmp_error(mp, ira);
870 if (mp == NULL)
871 return;
873 /* Just in case ipsec didn't preserve the NULL b_cont */
874 if (mp->b_cont != NULL) {
875 if (!pullupmsg(mp, -1))
876 goto drop_pkt;
880 * If succesful, the mp has been modified to not include
881 * the ESP/AH header so we can fanout to the ULP's icmp
882 * error handler.
884 if (mp->b_wptr - mp->b_rptr < IPV6_HDR_LEN)
885 goto drop_pkt;
887 ip6h = (ip6_t *)mp->b_rptr;
888 /* Don't call hdr_length_v6() unless you have to. */
889 if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
890 hdr_length = ip_hdr_length_v6(mp, ip6h);
891 else
892 hdr_length = IPV6_HDR_LEN;
894 /* Verify the modified message before any further processes. */
895 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
896 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
897 freemsg(mp);
898 return;
901 icmp_inbound_error_fanout_v6(mp, icmp6, ira);
902 return;
904 case IPPROTO_IPV6: {
905 /* Look for self-encapsulated packets that caused an error */
906 ip6_t *in_ip6h;
908 in_ip6h = (ip6_t *)((uint8_t *)ip6h + hdr_length);
910 if (IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_src, &ip6h->ip6_src) &&
911 IN6_ARE_ADDR_EQUAL(&in_ip6h->ip6_dst, &ip6h->ip6_dst)) {
913 * Self-encapsulated case. As in the ipv4 case,
914 * we need to strip the 2nd IP header. Since mp
915 * is already pulled-up, we can simply bcopy
916 * the 3rd header + data over the 2nd header.
918 uint16_t unused_len;
921 * Make sure we don't do recursion more than once.
923 if (!ip_hdr_length_nexthdr_v6(mp, in_ip6h,
924 &unused_len, &nexthdrp) ||
925 *nexthdrp == IPPROTO_IPV6) {
926 goto drop_pkt;
930 * Copy the 3rd header + remaining data on top
931 * of the 2nd header.
933 bcopy(in_ip6h, ip6h, mp->b_wptr - (uchar_t *)in_ip6h);
936 * Subtract length of the 2nd header.
938 mp->b_wptr -= hdr_length;
940 ip6h = (ip6_t *)mp->b_rptr;
941 /* Don't call hdr_length_v6() unless you have to. */
942 if (ip6h->ip6_nxt != IPPROTO_ICMPV6)
943 hdr_length = ip_hdr_length_v6(mp, ip6h);
944 else
945 hdr_length = IPV6_HDR_LEN;
948 * Verify the modified message before any further
949 * processes.
951 icmp6 = (icmp6_t *)(&mp->b_rptr[hdr_length]);
952 if (!icmp_inbound_verify_v6(mp, icmp6, ira)) {
953 freemsg(mp);
954 return;
958 * Now recurse, and see what I _really_ should be
959 * doing here.
961 icmp_inbound_error_fanout_v6(mp, icmp6, ira);
962 return;
964 /* FALLTHRU */
966 case IPPROTO_ENCAP:
967 if ((connp = ipcl_iptun_classify_v6(&rip6h.ip6_src,
968 &rip6h.ip6_dst, ipst)) != NULL) {
969 ira->ira_flags |= IRAF_ICMP_ERROR;
970 connp->conn_recvicmp(connp, mp, NULL, ira);
971 CONN_DEC_REF(connp);
972 ira->ira_flags &= ~IRAF_ICMP_ERROR;
973 return;
976 * No IP tunnel is interested, fallthrough and see
977 * if a raw socket will want it.
979 /* FALLTHRU */
980 default:
981 ira->ira_flags |= IRAF_ICMP_ERROR;
982 ASSERT(ira->ira_protocol == nexthdr);
983 ip_fanout_proto_v6(mp, &rip6h, ira);
984 ira->ira_flags &= ~IRAF_ICMP_ERROR;
985 return;
987 /* NOTREACHED */
988 drop_pkt:
989 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
990 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n"));
991 freemsg(mp);
995 * Process received IPv6 ICMP Redirect messages.
996 * Assumes the caller has verified that the headers are in the pulled up mblk.
997 * Consumes mp.
999 /* ARGSUSED */
1000 static void
1001 icmp_redirect_v6(mblk_t *mp, ip6_t *ip6h, nd_redirect_t *rd,
1002 ip_recv_attr_t *ira)
1004 ire_t *ire, *nire;
1005 ire_t *prev_ire = NULL;
1006 ire_t *redir_ire;
1007 in6_addr_t *src, *dst, *gateway;
1008 nd_opt_hdr_t *opt;
1009 nce_t *nce;
1010 int ncec_flags = 0;
1011 int err = 0;
1012 boolean_t redirect_to_router = B_FALSE;
1013 int len;
1014 int optlen;
1015 ill_t *ill = ira->ira_rill;
1016 ill_t *rill = ira->ira_rill;
1017 ip_stack_t *ipst = ill->ill_ipst;
1020 * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill
1021 * and make it be the IPMP upper so avoid being confused by a packet
1022 * addressed to a unicast address on a different ill.
1024 if (IS_UNDER_IPMP(rill)) {
1025 rill = ipmp_ill_hold_ipmp_ill(rill);
1026 if (rill == NULL) {
1027 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1028 ip_drop_input("ipv6IfIcmpInBadRedirects - IPMP ill",
1029 mp, ill);
1030 freemsg(mp);
1031 return;
1033 ASSERT(rill != ira->ira_rill);
1036 len = mp->b_wptr - (uchar_t *)rd;
1037 src = &ip6h->ip6_src;
1038 dst = &rd->nd_rd_dst;
1039 gateway = &rd->nd_rd_target;
1041 /* Verify if it is a valid redirect */
1042 if (!IN6_IS_ADDR_LINKLOCAL(src) ||
1043 (ip6h->ip6_hops != IPV6_MAX_HOPS) ||
1044 (rd->nd_rd_code != 0) ||
1045 (len < sizeof (nd_redirect_t)) ||
1046 (IN6_IS_ADDR_V4MAPPED(dst)) ||
1047 (IN6_IS_ADDR_MULTICAST(dst))) {
1048 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1049 ip_drop_input("ipv6IfIcmpInBadRedirects - addr/len", mp, ill);
1050 goto fail_redirect;
1053 if (!(IN6_IS_ADDR_LINKLOCAL(gateway) ||
1054 IN6_ARE_ADDR_EQUAL(gateway, dst))) {
1055 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1056 ip_drop_input("ipv6IfIcmpInBadRedirects - bad gateway",
1057 mp, ill);
1058 goto fail_redirect;
1061 optlen = len - sizeof (nd_redirect_t);
1062 if (optlen != 0) {
1063 if (!ndp_verify_optlen((nd_opt_hdr_t *)&rd[1], optlen)) {
1064 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1065 ip_drop_input("ipv6IfIcmpInBadRedirects - options",
1066 mp, ill);
1067 goto fail_redirect;
1071 if (!IN6_ARE_ADDR_EQUAL(gateway, dst)) {
1072 redirect_to_router = B_TRUE;
1073 ncec_flags |= NCE_F_ISROUTER;
1074 } else {
1075 gateway = dst; /* Add nce for dst */
1080 * Verify that the IP source address of the redirect is
1081 * the same as the current first-hop router for the specified
1082 * ICMP destination address.
1083 * Also, Make sure we had a route for the dest in question and
1084 * that route was pointing to the old gateway (the source of the
1085 * redirect packet.)
1086 * We do longest match and then compare ire_gateway_addr_v6 below.
1088 prev_ire = ire_ftable_lookup_v6(dst, 0, 0, 0, rill,
1089 ALL_ZONES, MATCH_IRE_ILL, 0, ipst, NULL);
1092 * Check that
1093 * the redirect was not from ourselves
1094 * old gateway is still directly reachable
1096 if (prev_ire == NULL ||
1097 (prev_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK)) ||
1098 (prev_ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) ||
1099 !IN6_ARE_ADDR_EQUAL(src, &prev_ire->ire_gateway_addr_v6)) {
1100 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1101 ip_drop_input("ipv6IfIcmpInBadRedirects - ire", mp, ill);
1102 goto fail_redirect;
1105 ASSERT(prev_ire->ire_ill != NULL);
1106 if (prev_ire->ire_ill->ill_flags & ILLF_NONUD)
1107 ncec_flags |= NCE_F_NONUD;
1109 opt = (nd_opt_hdr_t *)&rd[1];
1110 opt = ndp_get_option(opt, optlen, ND_OPT_TARGET_LINKADDR);
1111 if (opt != NULL) {
1112 err = nce_lookup_then_add_v6(rill,
1113 (uchar_t *)&opt[1], /* Link layer address */
1114 rill->ill_phys_addr_length,
1115 gateway, ncec_flags, ND_STALE, &nce);
1116 switch (err) {
1117 case 0:
1118 nce_refrele(nce);
1119 break;
1120 case EEXIST:
1122 * Check to see if link layer address has changed and
1123 * process the ncec_state accordingly.
1125 nce_process(nce->nce_common,
1126 (uchar_t *)&opt[1], 0, B_FALSE);
1127 nce_refrele(nce);
1128 break;
1129 default:
1130 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n",
1131 err));
1132 goto fail_redirect;
1135 if (redirect_to_router) {
1136 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway));
1139 * Create a Route Association. This will allow us to remember
1140 * a router told us to use the particular gateway.
1142 ire = ire_create_v6(
1143 dst,
1144 &ipv6_all_ones, /* mask */
1145 gateway, /* gateway addr */
1146 IRE_HOST,
1147 prev_ire->ire_ill,
1148 ALL_ZONES,
1149 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST),
1150 ipst);
1151 } else {
1152 ipif_t *ipif;
1153 in6_addr_t gw;
1156 * Just create an on link entry, i.e. interface route.
1157 * The gateway field is our link-local on the ill.
1159 mutex_enter(&rill->ill_lock);
1160 for (ipif = rill->ill_ipif; ipif != NULL;
1161 ipif = ipif->ipif_next) {
1162 if (!(ipif->ipif_state_flags & IPIF_CONDEMNED) &&
1163 IN6_IS_ADDR_LINKLOCAL(&ipif->ipif_v6lcl_addr))
1164 break;
1166 if (ipif == NULL) {
1167 /* We have no link-local address! */
1168 mutex_exit(&rill->ill_lock);
1169 goto fail_redirect;
1171 gw = ipif->ipif_v6lcl_addr;
1172 mutex_exit(&rill->ill_lock);
1174 ire = ire_create_v6(
1175 dst, /* gateway == dst */
1176 &ipv6_all_ones, /* mask */
1177 &gw, /* gateway addr */
1178 rill->ill_net_type, /* IF_[NO]RESOLVER */
1179 prev_ire->ire_ill,
1180 ALL_ZONES,
1181 (RTF_DYNAMIC | RTF_HOST),
1182 ipst);
1185 if (ire == NULL)
1186 goto fail_redirect;
1188 nire = ire_add(ire);
1189 /* Check if it was a duplicate entry */
1190 if (nire != NULL && nire != ire) {
1191 ASSERT(nire->ire_identical_ref > 1);
1192 ire_delete(nire);
1193 ire_refrele(nire);
1194 nire = NULL;
1196 ire = nire;
1197 if (ire != NULL) {
1198 ire_refrele(ire); /* Held in ire_add */
1200 /* tell routing sockets that we received a redirect */
1201 ip_rts_change_v6(RTM_REDIRECT,
1202 &rd->nd_rd_dst,
1203 &rd->nd_rd_target,
1204 &ipv6_all_ones, 0, src,
1205 (RTF_DYNAMIC | RTF_GATEWAY | RTF_HOST), 0,
1206 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_AUTHOR), ipst);
1209 * Delete any existing IRE_HOST type ires for this destination.
1210 * This together with the added IRE has the effect of
1211 * modifying an existing redirect.
1213 redir_ire = ire_ftable_lookup_v6(dst, 0, src, IRE_HOST,
1214 prev_ire->ire_ill, ALL_ZONES,
1215 (MATCH_IRE_GW | MATCH_IRE_TYPE | MATCH_IRE_ILL), 0, ipst,
1216 NULL);
1218 if (redir_ire != NULL) {
1219 if (redir_ire->ire_flags & RTF_DYNAMIC)
1220 ire_delete(redir_ire);
1221 ire_refrele(redir_ire);
1225 ire_refrele(prev_ire);
1226 prev_ire = NULL;
1228 fail_redirect:
1229 if (prev_ire != NULL)
1230 ire_refrele(prev_ire);
1231 freemsg(mp);
1232 if (rill != ira->ira_rill)
1233 ill_refrele(rill);
1237 * Build and ship an IPv6 ICMP message using the packet data in mp,
1238 * and the ICMP header pointed to by "stuff". (May be called as
1239 * writer.)
1240 * Note: assumes that icmp_pkt_err_ok_v6 has been called to
1241 * verify that an icmp error packet can be sent.
1243 * If v6src_ptr is set use it as a source. Otherwise select a reasonable
1244 * source address (see above function).
1246 static void
1247 icmp_pkt_v6(mblk_t *mp, void *stuff, size_t len,
1248 const in6_addr_t *v6src_ptr, ip_recv_attr_t *ira)
1250 ip6_t *ip6h;
1251 in6_addr_t v6dst;
1252 size_t len_needed;
1253 size_t msg_len;
1254 mblk_t *mp1;
1255 icmp6_t *icmp6;
1256 in6_addr_t v6src;
1257 ill_t *ill = ira->ira_ill;
1258 ip_stack_t *ipst = ill->ill_ipst;
1259 ip_xmit_attr_t ixas;
1261 ip6h = (ip6_t *)mp->b_rptr;
1263 bzero(&ixas, sizeof (ixas));
1264 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
1265 ixas.ixa_zoneid = ira->ira_zoneid;
1266 ixas.ixa_ifindex = 0;
1267 ixas.ixa_ipst = ipst;
1268 ixas.ixa_cred = kcred;
1269 ixas.ixa_cpid = NOPID;
1270 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1273 * If the source of the original packet was link-local, then
1274 * make sure we send on the same ill (group) as we received it on.
1276 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) {
1277 ixas.ixa_flags |= IXAF_SCOPEID_SET;
1278 if (IS_UNDER_IPMP(ill))
1279 ixas.ixa_scopeid = ill_get_upper_ifindex(ill);
1280 else
1281 ixas.ixa_scopeid = ill->ill_phyint->phyint_ifindex;
1284 if (ira->ira_flags & IRAF_IPSEC_SECURE) {
1286 * Apply IPsec based on how IPsec was applied to
1287 * the packet that had the error.
1289 * If it was an outbound packet that caused the ICMP
1290 * error, then the caller will have setup the IRA
1291 * appropriately.
1293 if (!ipsec_in_to_out(ira, &ixas, mp, NULL, ip6h)) {
1294 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
1295 /* Note: mp already consumed and ip_drop_packet done */
1296 return;
1298 } else {
1300 * This is in clear. The icmp message we are building
1301 * here should go out in clear, independent of our policy.
1303 ixas.ixa_flags |= IXAF_NO_IPSEC;
1307 * If the caller specified the source we use that.
1308 * Otherwise, if the packet was for one of our unicast addresses, make
1309 * sure we respond with that as the source. Otherwise
1310 * have ip_output_simple pick the source address.
1312 if (v6src_ptr != NULL) {
1313 v6src = *v6src_ptr;
1314 } else {
1315 ire_t *ire;
1316 uint_t match_flags = MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY;
1318 if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src) ||
1319 IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_dst))
1320 match_flags |= MATCH_IRE_ILL;
1322 ire = ire_ftable_lookup_v6(&ip6h->ip6_dst, 0, 0,
1323 (IRE_LOCAL|IRE_LOOPBACK), ill, ira->ira_zoneid,
1324 match_flags, 0, ipst, NULL);
1325 if (ire != NULL) {
1326 v6src = ip6h->ip6_dst;
1327 ire_refrele(ire);
1328 } else {
1329 v6src = ipv6_all_zeros;
1330 ixas.ixa_flags |= IXAF_SET_SOURCE;
1333 v6dst = ip6h->ip6_src;
1334 len_needed = ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len;
1335 msg_len = msgdsize(mp);
1336 if (msg_len > len_needed) {
1337 if (!adjmsg(mp, len_needed - msg_len)) {
1338 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1339 freemsg(mp);
1340 return;
1342 msg_len = len_needed;
1344 mp1 = allocb(IPV6_HDR_LEN + len, BPRI_MED);
1345 if (mp1 == NULL) {
1346 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutErrors);
1347 freemsg(mp);
1348 return;
1350 mp1->b_cont = mp;
1351 mp = mp1;
1354 * Set IXAF_TRUSTED_ICMP so we can let the ICMP messages this
1355 * node generates be accepted in peace by all on-host destinations.
1356 * If we do NOT assume that all on-host destinations trust
1357 * self-generated ICMP messages, then rework here, ip6.c, and spd.c.
1358 * (Look for IXAF_TRUSTED_ICMP).
1360 ixas.ixa_flags |= IXAF_TRUSTED_ICMP;
1362 ip6h = (ip6_t *)mp->b_rptr;
1363 mp1->b_wptr = (uchar_t *)ip6h + (IPV6_HDR_LEN + len);
1365 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
1366 ip6h->ip6_nxt = IPPROTO_ICMPV6;
1367 ip6h->ip6_hops = ipst->ips_ipv6_def_hops;
1368 ip6h->ip6_dst = v6dst;
1369 ip6h->ip6_src = v6src;
1370 msg_len += IPV6_HDR_LEN + len;
1371 if (msg_len > IP_MAXPACKET + IPV6_HDR_LEN) {
1372 (void) adjmsg(mp, IP_MAXPACKET + IPV6_HDR_LEN - msg_len);
1373 msg_len = IP_MAXPACKET + IPV6_HDR_LEN;
1375 ip6h->ip6_plen = htons((uint16_t)(msgdsize(mp) - IPV6_HDR_LEN));
1376 icmp6 = (icmp6_t *)&ip6h[1];
1377 bcopy(stuff, (char *)icmp6, len);
1379 * Prepare for checksum by putting icmp length in the icmp
1380 * checksum field. The checksum is calculated in ip_output_wire_v6.
1382 icmp6->icmp6_cksum = ip6h->ip6_plen;
1383 if (icmp6->icmp6_type == ND_REDIRECT) {
1384 ip6h->ip6_hops = IPV6_MAX_HOPS;
1387 (void) ip_output_simple(mp, &ixas);
1388 ixa_cleanup(&ixas);
1392 * Update the output mib when ICMPv6 packets are sent.
1394 void
1395 icmp_update_out_mib_v6(ill_t *ill, icmp6_t *icmp6)
1397 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutMsgs);
1399 switch (icmp6->icmp6_type) {
1400 case ICMP6_DST_UNREACH:
1401 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutDestUnreachs);
1402 if (icmp6->icmp6_code == ICMP6_DST_UNREACH_ADMIN)
1403 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutAdminProhibs);
1404 break;
1406 case ICMP6_TIME_EXCEEDED:
1407 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutTimeExcds);
1408 break;
1410 case ICMP6_PARAM_PROB:
1411 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutParmProblems);
1412 break;
1414 case ICMP6_PACKET_TOO_BIG:
1415 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutPktTooBigs);
1416 break;
1418 case ICMP6_ECHO_REQUEST:
1419 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchos);
1420 break;
1422 case ICMP6_ECHO_REPLY:
1423 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutEchoReplies);
1424 break;
1426 case ND_ROUTER_SOLICIT:
1427 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterSolicits);
1428 break;
1430 case ND_ROUTER_ADVERT:
1431 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRouterAdvertisements);
1432 break;
1434 case ND_NEIGHBOR_SOLICIT:
1435 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutNeighborSolicits);
1436 break;
1438 case ND_NEIGHBOR_ADVERT:
1439 BUMP_MIB(ill->ill_icmp6_mib,
1440 ipv6IfIcmpOutNeighborAdvertisements);
1441 break;
1443 case ND_REDIRECT:
1444 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutRedirects);
1445 break;
1447 case MLD_LISTENER_QUERY:
1448 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembQueries);
1449 break;
1451 case MLD_LISTENER_REPORT:
1452 case MLD_V2_LISTENER_REPORT:
1453 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembResponses);
1454 break;
1456 case MLD_LISTENER_REDUCTION:
1457 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpOutGroupMembReductions);
1458 break;
1463 * Check if it is ok to send an ICMPv6 error packet in
1464 * response to the IP packet in mp.
1465 * Free the message and return null if no
1466 * ICMP error packet should be sent.
1468 static mblk_t *
1469 icmp_pkt_err_ok_v6(mblk_t *mp, boolean_t mcast_ok, ip_recv_attr_t *ira)
1471 ill_t *ill = ira->ira_ill;
1472 ip_stack_t *ipst = ill->ill_ipst;
1473 boolean_t llbcast;
1474 ip6_t *ip6h;
1476 if (!mp)
1477 return (NULL);
1479 /* We view multicast and broadcast as the same.. */
1480 llbcast = (ira->ira_flags &
1481 (IRAF_L2DST_MULTICAST|IRAF_L2DST_BROADCAST)) != 0;
1482 ip6h = (ip6_t *)mp->b_rptr;
1484 /* Check if source address uniquely identifies the host */
1486 if (IN6_IS_ADDR_MULTICAST(&ip6h->ip6_src) ||
1487 IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_src) ||
1488 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) {
1489 freemsg(mp);
1490 return (NULL);
1493 if (ip6h->ip6_nxt == IPPROTO_ICMPV6) {
1494 size_t len_needed = IPV6_HDR_LEN + ICMP6_MINLEN;
1495 icmp6_t *icmp6;
1497 if (mp->b_wptr - mp->b_rptr < len_needed) {
1498 if (!pullupmsg(mp, len_needed)) {
1499 BUMP_MIB(ill->ill_icmp6_mib,
1500 ipv6IfIcmpInErrors);
1501 freemsg(mp);
1502 return (NULL);
1504 ip6h = (ip6_t *)mp->b_rptr;
1506 icmp6 = (icmp6_t *)&ip6h[1];
1507 /* Explicitly do not generate errors in response to redirects */
1508 if (ICMP6_IS_ERROR(icmp6->icmp6_type) ||
1509 icmp6->icmp6_type == ND_REDIRECT) {
1510 freemsg(mp);
1511 return (NULL);
1515 * Check that the destination is not multicast and that the packet
1516 * was not sent on link layer broadcast or multicast. (Exception
1517 * is Packet too big message as per the draft - when mcast_ok is set.)
1519 if (!mcast_ok &&
1520 (llbcast || IN6_IS_ADDR_MULTICAST(&ip6h->ip6_dst))) {
1521 freemsg(mp);
1522 return (NULL);
1525 if (icmp_err_rate_limit(ipst)) {
1527 * Only send ICMP error packets every so often.
1528 * This should be done on a per port/source basis,
1529 * but for now this will suffice.
1531 freemsg(mp);
1532 return (NULL);
1534 return (mp);
1538 * Called when a packet was sent out the same link that it arrived on.
1539 * Check if it is ok to send a redirect and then send it.
1541 void
1542 ip_send_potential_redirect_v6(mblk_t *mp, ip6_t *ip6h, ire_t *ire,
1543 ip_recv_attr_t *ira)
1545 ill_t *ill = ira->ira_ill;
1546 ip_stack_t *ipst = ill->ill_ipst;
1547 in6_addr_t *v6targ;
1548 ire_t *src_ire_v6 = NULL;
1549 mblk_t *mp1;
1550 ire_t *nhop_ire = NULL;
1553 * Don't send a redirect when forwarding a source
1554 * routed packet.
1556 if (ip_source_routed_v6(ip6h, mp, ipst))
1557 return;
1559 if (ire->ire_type & IRE_ONLINK) {
1560 /* Target is directly connected */
1561 v6targ = &ip6h->ip6_dst;
1562 } else {
1563 /* Determine the most specific IRE used to send the packets */
1564 nhop_ire = ire_nexthop(ire);
1565 if (nhop_ire == NULL)
1566 return;
1569 * We won't send redirects to a router
1570 * that doesn't have a link local
1571 * address, but will forward.
1573 if (!IN6_IS_ADDR_LINKLOCAL(&nhop_ire->ire_addr_v6)) {
1574 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
1575 ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
1576 ire_refrele(nhop_ire);
1577 return;
1579 v6targ = &nhop_ire->ire_addr_v6;
1581 src_ire_v6 = ire_ftable_lookup_v6(&ip6h->ip6_src,
1582 NULL, NULL, IRE_INTERFACE, ire->ire_ill, ALL_ZONES,
1583 MATCH_IRE_ILL | MATCH_IRE_TYPE, 0, ipst, NULL);
1585 if (src_ire_v6 == NULL) {
1586 if (nhop_ire != NULL)
1587 ire_refrele(nhop_ire);
1588 return;
1592 * The source is directly connected.
1594 mp1 = copymsg(mp);
1595 if (mp1 != NULL)
1596 icmp_send_redirect_v6(mp1, v6targ, &ip6h->ip6_dst, ira);
1598 if (nhop_ire != NULL)
1599 ire_refrele(nhop_ire);
1600 ire_refrele(src_ire_v6);
1604 * Generate an ICMPv6 redirect message.
1605 * Include target link layer address option if it exits.
1606 * Always include redirect header.
1608 static void
1609 icmp_send_redirect_v6(mblk_t *mp, in6_addr_t *targetp, in6_addr_t *dest,
1610 ip_recv_attr_t *ira)
1612 nd_redirect_t *rd;
1613 nd_opt_rd_hdr_t *rdh;
1614 uchar_t *buf;
1615 ncec_t *ncec = NULL;
1616 nd_opt_hdr_t *opt;
1617 int len;
1618 int ll_opt_len = 0;
1619 int max_redir_hdr_data_len;
1620 int pkt_len;
1621 in6_addr_t *srcp;
1622 ill_t *ill;
1623 boolean_t need_refrele;
1624 ip_stack_t *ipst = ira->ira_ill->ill_ipst;
1626 mp = icmp_pkt_err_ok_v6(mp, B_FALSE, ira);
1627 if (mp == NULL)
1628 return;
1630 if (IS_UNDER_IPMP(ira->ira_ill)) {
1631 ill = ipmp_ill_hold_ipmp_ill(ira->ira_ill);
1632 if (ill == NULL) {
1633 ill = ira->ira_ill;
1634 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInBadRedirects);
1635 ip_drop_output("no IPMP ill for sending redirect",
1636 mp, ill);
1637 freemsg(mp);
1638 return;
1640 need_refrele = B_TRUE;
1641 } else {
1642 ill = ira->ira_ill;
1643 need_refrele = B_FALSE;
1646 ncec = ncec_lookup_illgrp_v6(ill, targetp);
1647 if (ncec != NULL && ncec->ncec_state != ND_INCOMPLETE &&
1648 ncec->ncec_lladdr != NULL) {
1649 ll_opt_len = (sizeof (nd_opt_hdr_t) +
1650 ill->ill_phys_addr_length + 7)/8 * 8;
1652 len = sizeof (nd_redirect_t) + sizeof (nd_opt_rd_hdr_t) + ll_opt_len;
1653 ASSERT(len % 4 == 0);
1654 buf = kmem_alloc(len, KM_NOSLEEP);
1655 if (buf == NULL) {
1656 if (ncec != NULL)
1657 ncec_refrele(ncec);
1658 if (need_refrele)
1659 ill_refrele(ill);
1660 freemsg(mp);
1661 return;
1664 rd = (nd_redirect_t *)buf;
1665 rd->nd_rd_type = (uint8_t)ND_REDIRECT;
1666 rd->nd_rd_code = 0;
1667 rd->nd_rd_reserved = 0;
1668 rd->nd_rd_target = *targetp;
1669 rd->nd_rd_dst = *dest;
1671 opt = (nd_opt_hdr_t *)(buf + sizeof (nd_redirect_t));
1672 if (ncec != NULL && ll_opt_len != 0) {
1673 opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
1674 opt->nd_opt_len = ll_opt_len/8;
1675 bcopy((char *)ncec->ncec_lladdr, &opt[1],
1676 ill->ill_phys_addr_length);
1678 if (ncec != NULL)
1679 ncec_refrele(ncec);
1680 rdh = (nd_opt_rd_hdr_t *)(buf + sizeof (nd_redirect_t) + ll_opt_len);
1681 rdh->nd_opt_rh_type = (uint8_t)ND_OPT_REDIRECTED_HEADER;
1682 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */
1683 max_redir_hdr_data_len =
1684 (ipst->ips_ipv6_icmp_return - IPV6_HDR_LEN - len)/8*8;
1685 pkt_len = msgdsize(mp);
1686 /* Make sure mp is 8 byte aligned */
1687 if (pkt_len > max_redir_hdr_data_len) {
1688 rdh->nd_opt_rh_len = (max_redir_hdr_data_len +
1689 sizeof (nd_opt_rd_hdr_t))/8;
1690 (void) adjmsg(mp, max_redir_hdr_data_len - pkt_len);
1691 } else {
1692 rdh->nd_opt_rh_len = (pkt_len + sizeof (nd_opt_rd_hdr_t))/8;
1693 (void) adjmsg(mp, -(pkt_len % 8));
1695 rdh->nd_opt_rh_reserved1 = 0;
1696 rdh->nd_opt_rh_reserved2 = 0;
1697 /* ipif_v6lcl_addr contains the link-local source address */
1698 srcp = &ill->ill_ipif->ipif_v6lcl_addr;
1700 /* Redirects sent by router, and router is global zone */
1701 ASSERT(ira->ira_zoneid == ALL_ZONES);
1702 ira->ira_zoneid = GLOBAL_ZONEID;
1703 icmp_pkt_v6(mp, buf, len, srcp, ira);
1704 kmem_free(buf, len);
1705 if (need_refrele)
1706 ill_refrele(ill);
1710 /* Generate an ICMP time exceeded message. (May be called as writer.) */
1711 void
1712 icmp_time_exceeded_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok,
1713 ip_recv_attr_t *ira)
1715 icmp6_t icmp6;
1717 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1718 if (mp == NULL)
1719 return;
1721 bzero(&icmp6, sizeof (icmp6_t));
1722 icmp6.icmp6_type = ICMP6_TIME_EXCEEDED;
1723 icmp6.icmp6_code = code;
1724 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1728 * Generate an ICMP unreachable message.
1729 * When called from ip_output side a minimal ip_recv_attr_t needs to be
1730 * constructed by the caller.
1732 void
1733 icmp_unreachable_v6(mblk_t *mp, uint8_t code, boolean_t mcast_ok,
1734 ip_recv_attr_t *ira)
1736 icmp6_t icmp6;
1738 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1739 if (mp == NULL)
1740 return;
1742 bzero(&icmp6, sizeof (icmp6_t));
1743 icmp6.icmp6_type = ICMP6_DST_UNREACH;
1744 icmp6.icmp6_code = code;
1745 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1749 * Generate an ICMP pkt too big message.
1750 * When called from ip_output side a minimal ip_recv_attr_t needs to be
1751 * constructed by the caller.
1753 void
1754 icmp_pkt2big_v6(mblk_t *mp, uint32_t mtu, boolean_t mcast_ok,
1755 ip_recv_attr_t *ira)
1757 icmp6_t icmp6;
1759 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1760 if (mp == NULL)
1761 return;
1763 bzero(&icmp6, sizeof (icmp6_t));
1764 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG;
1765 icmp6.icmp6_code = 0;
1766 icmp6.icmp6_mtu = htonl(mtu);
1768 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1772 * Generate an ICMP parameter problem message. (May be called as writer.)
1773 * 'offset' is the offset from the beginning of the packet in error.
1774 * When called from ip_output side a minimal ip_recv_attr_t needs to be
1775 * constructed by the caller.
1777 static void
1778 icmp_param_problem_v6(mblk_t *mp, uint8_t code, uint32_t offset,
1779 boolean_t mcast_ok, ip_recv_attr_t *ira)
1781 icmp6_t icmp6;
1783 mp = icmp_pkt_err_ok_v6(mp, mcast_ok, ira);
1784 if (mp == NULL)
1785 return;
1787 bzero((char *)&icmp6, sizeof (icmp6_t));
1788 icmp6.icmp6_type = ICMP6_PARAM_PROB;
1789 icmp6.icmp6_code = code;
1790 icmp6.icmp6_pptr = htonl(offset);
1791 icmp_pkt_v6(mp, &icmp6, sizeof (icmp6_t), NULL, ira);
1794 void
1795 icmp_param_problem_nexthdr_v6(mblk_t *mp, boolean_t mcast_ok,
1796 ip_recv_attr_t *ira)
1798 ip6_t *ip6h = (ip6_t *)mp->b_rptr;
1799 uint16_t hdr_length;
1800 uint8_t *nexthdrp;
1801 uint32_t offset;
1802 ill_t *ill = ira->ira_ill;
1804 /* Determine the offset of the bad nexthdr value */
1805 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) {
1806 /* Malformed packet */
1807 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
1808 ip_drop_input("ipIfStatsInDiscards", mp, ill);
1809 freemsg(mp);
1810 return;
1813 offset = nexthdrp - mp->b_rptr;
1814 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_NEXTHEADER, offset,
1815 mcast_ok, ira);
1819 * Verify whether or not the IP address is a valid local address.
1820 * Could be a unicast, including one for a down interface.
1821 * If allow_mcbc then a multicast or broadcast address is also
1822 * acceptable.
1824 * In the case of a multicast address, however, the
1825 * upper protocol is expected to reset the src address
1826 * to zero when we return IPVL_MCAST so that
1827 * no packets are emitted with multicast address as
1828 * source address.
1829 * The addresses valid for bind are:
1830 * (1) - in6addr_any
1831 * (2) - IP address of an UP interface
1832 * (3) - IP address of a DOWN interface
1833 * (4) - a multicast address. In this case
1834 * the conn will only receive packets destined to
1835 * the specified multicast address. Note: the
1836 * application still has to issue an
1837 * IPV6_JOIN_GROUP socket option.
1839 * In all the above cases, the bound address must be valid in the current zone.
1840 * When the address is loopback or multicast, there might be many matching IREs
1841 * so bind has to look up based on the zone.
1843 ip_laddr_t
1844 ip_laddr_verify_v6(const in6_addr_t *v6src, zoneid_t zoneid,
1845 ip_stack_t *ipst, boolean_t allow_mcbc, uint_t scopeid)
1847 ire_t *src_ire;
1848 uint_t match_flags;
1849 ill_t *ill = NULL;
1851 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6src));
1852 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(v6src));
1854 match_flags = MATCH_IRE_ZONEONLY;
1855 if (scopeid != 0) {
1856 ill = ill_lookup_on_ifindex(scopeid, B_TRUE, ipst);
1857 if (ill == NULL)
1858 return (IPVL_BAD);
1859 match_flags |= MATCH_IRE_ILL;
1862 src_ire = ire_ftable_lookup_v6(v6src, NULL, NULL, 0,
1863 ill, zoneid, match_flags, 0, ipst, NULL);
1864 if (ill != NULL)
1865 ill_refrele(ill);
1868 * If an address other than in6addr_any is requested,
1869 * we verify that it is a valid address for bind
1870 * Note: Following code is in if-else-if form for
1871 * readability compared to a condition check.
1873 if (src_ire != NULL && (src_ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK))) {
1875 * (2) Bind to address of local UP interface
1877 ire_refrele(src_ire);
1878 return (IPVL_UNICAST_UP);
1879 } else if (IN6_IS_ADDR_MULTICAST(v6src)) {
1880 /* (4) bind to multicast address. */
1881 if (src_ire != NULL)
1882 ire_refrele(src_ire);
1885 * Note: caller should take IPV6_MULTICAST_IF
1886 * into account when selecting a real source address.
1888 if (allow_mcbc)
1889 return (IPVL_MCAST);
1890 else
1891 return (IPVL_BAD);
1892 } else {
1893 ipif_t *ipif;
1896 * (3) Bind to address of local DOWN interface?
1897 * (ipif_lookup_addr() looks up all interfaces
1898 * but we do not get here for UP interfaces
1899 * - case (2) above)
1901 if (src_ire != NULL)
1902 ire_refrele(src_ire);
1904 ipif = ipif_lookup_addr_v6(v6src, NULL, zoneid, ipst);
1905 if (ipif == NULL)
1906 return (IPVL_BAD);
1908 /* Not a useful source? */
1909 if (ipif->ipif_flags & (IPIF_NOLOCAL | IPIF_ANYCAST)) {
1910 ipif_refrele(ipif);
1911 return (IPVL_BAD);
1913 ipif_refrele(ipif);
1914 return (IPVL_UNICAST_DOWN);
1919 * Verify that both the source and destination addresses are valid. If
1920 * IPDF_VERIFY_DST is not set, then the destination address may be unreachable,
1921 * i.e. have no route to it. Protocols like TCP want to verify destination
1922 * reachability, while tunnels do not.
1924 * Determine the route, the interface, and (optionally) the source address
1925 * to use to reach a given destination.
1926 * Note that we allow connect to broadcast and multicast addresses when
1927 * IPDF_ALLOW_MCBC is set.
1928 * first_hop and dst_addr are normally the same, but if source routing
1929 * they will differ; in that case the first_hop is what we'll use for the
1930 * routing lookup but the dce checks will be done on dst_addr,
1932 * If uinfo is set, then we fill in the best available information
1933 * we have for the destination. This is based on (in priority order) any
1934 * metrics and path MTU stored in a dce_t, route metrics, and finally the
1935 * ill_mtu/ill_mc_mtu.
1937 * Assumes that the caller has set ixa_scopeid for link-local communication.
1940 ip_set_destination_v6(in6_addr_t *src_addrp, const in6_addr_t *dst_addr,
1941 const in6_addr_t *firsthop, ip_xmit_attr_t *ixa, iulp_t *uinfo,
1942 uint32_t flags)
1944 ire_t *ire;
1945 int error = 0;
1946 in6_addr_t setsrc; /* RTF_SETSRC */
1947 zoneid_t zoneid = ixa->ixa_zoneid; /* Honors SO_ALLZONES */
1948 ip_stack_t *ipst = ixa->ixa_ipst;
1949 dce_t *dce;
1950 uint_t pmtu;
1951 uint_t ifindex;
1952 uint_t generation;
1953 nce_t *nce;
1954 ill_t *ill = NULL;
1955 boolean_t multirt = B_FALSE;
1957 ASSERT(!IN6_IS_ADDR_V4MAPPED(dst_addr));
1959 ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
1962 * We never send to zero; the ULPs map it to the loopback address.
1963 * We can't allow it since we use zero to mean unitialized in some
1964 * places.
1966 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(dst_addr));
1968 setsrc = ipv6_all_zeros;
1970 * Select a route; For IPMP interfaces, we would only select
1971 * a "hidden" route (i.e., going through a specific under_ill)
1972 * if ixa_ifindex has been specified.
1974 ire = ip_select_route_v6(firsthop, *src_addrp, ixa, &generation,
1975 &setsrc, &error, &multirt);
1976 ASSERT(ire != NULL); /* IRE_NOROUTE if none found */
1977 if (error != 0)
1978 goto bad_addr;
1981 * ire can't be a broadcast or multicast unless IPDF_ALLOW_MCBC is set.
1982 * If IPDF_VERIFY_DST is set, the destination must be reachable.
1983 * Otherwise the destination needn't be reachable.
1985 * If we match on a reject or black hole, then we've got a
1986 * local failure. May as well fail out the connect() attempt,
1987 * since it's never going to succeed.
1989 if (ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE)) {
1991 * If we're verifying destination reachability, we always want
1992 * to complain here.
1994 * If we're not verifying destination reachability but the
1995 * destination has a route, we still want to fail on the
1996 * temporary address and broadcast address tests.
1998 * In both cases do we let the code continue so some reasonable
1999 * information is returned to the caller. That enables the
2000 * caller to use (and even cache) the IRE. conn_ip_ouput will
2001 * use the generation mismatch path to check for the unreachable
2002 * case thereby avoiding any specific check in the main path.
2004 ASSERT(generation == IRE_GENERATION_VERIFY);
2005 if (flags & IPDF_VERIFY_DST) {
2007 * Set errno but continue to set up ixa_ire to be
2008 * the RTF_REJECT|RTF_BLACKHOLE IRE.
2009 * That allows callers to use ip_output to get an
2010 * ICMP error back.
2012 if (!(ire->ire_type & IRE_HOST))
2013 error = ENETUNREACH;
2014 else
2015 error = EHOSTUNREACH;
2019 if ((ire->ire_type & (IRE_BROADCAST|IRE_MULTICAST)) &&
2020 !(flags & IPDF_ALLOW_MCBC)) {
2021 ire_refrele(ire);
2022 ire = ire_reject(ipst, B_FALSE);
2023 generation = IRE_GENERATION_VERIFY;
2024 error = ENETUNREACH;
2027 /* Cache things */
2028 if (ixa->ixa_ire != NULL)
2029 ire_refrele_notr(ixa->ixa_ire);
2030 #ifdef DEBUG
2031 ire_refhold_notr(ire);
2032 ire_refrele(ire);
2033 #endif
2034 ixa->ixa_ire = ire;
2035 ixa->ixa_ire_generation = generation;
2038 * Ensure that ixa_dce is always set any time that ixa_ire is set,
2039 * since some callers will send a packet to conn_ip_output() even if
2040 * there's an error.
2042 ifindex = 0;
2043 if (IN6_IS_ADDR_LINKSCOPE(dst_addr)) {
2044 /* If we are creating a DCE we'd better have an ifindex */
2045 if (ill != NULL)
2046 ifindex = ill->ill_phyint->phyint_ifindex;
2047 else
2048 flags &= ~IPDF_UNIQUE_DCE;
2051 if (flags & IPDF_UNIQUE_DCE) {
2052 /* Fallback to the default dce if allocation fails */
2053 dce = dce_lookup_and_add_v6(dst_addr, ifindex, ipst);
2054 if (dce != NULL) {
2055 generation = dce->dce_generation;
2056 } else {
2057 dce = dce_lookup_v6(dst_addr, ifindex, ipst,
2058 &generation);
2060 } else {
2061 dce = dce_lookup_v6(dst_addr, ifindex, ipst, &generation);
2063 ASSERT(dce != NULL);
2064 if (ixa->ixa_dce != NULL)
2065 dce_refrele_notr(ixa->ixa_dce);
2066 #ifdef DEBUG
2067 dce_refhold_notr(dce);
2068 dce_refrele(dce);
2069 #endif
2070 ixa->ixa_dce = dce;
2071 ixa->ixa_dce_generation = generation;
2075 * For multicast with multirt we have a flag passed back from
2076 * ire_lookup_multi_ill_v6 since we don't have an IRE for each
2077 * possible multicast address.
2078 * We also need a flag for multicast since we can't check
2079 * whether RTF_MULTIRT is set in ixa_ire for multicast.
2081 if (multirt) {
2082 ixa->ixa_postfragfn = ip_postfrag_multirt_v6;
2083 ixa->ixa_flags |= IXAF_MULTIRT_MULTICAST;
2084 } else {
2085 ixa->ixa_postfragfn = ire->ire_postfragfn;
2086 ixa->ixa_flags &= ~IXAF_MULTIRT_MULTICAST;
2088 if (!(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
2089 /* Get an nce to cache. */
2090 nce = ire_to_nce(ire, NULL, firsthop);
2091 if (nce == NULL) {
2092 /* Allocation failure? */
2093 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2094 } else {
2095 if (ixa->ixa_nce != NULL)
2096 nce_refrele(ixa->ixa_nce);
2097 ixa->ixa_nce = nce;
2102 * If the source address is a loopback address, the
2103 * destination had best be local or multicast.
2104 * If we are sending to an IRE_LOCAL using a loopback source then
2105 * it had better be the same zoneid.
2107 if (IN6_IS_ADDR_LOOPBACK(src_addrp)) {
2108 if ((ire->ire_type & IRE_LOCAL) && ire->ire_zoneid != zoneid) {
2109 ire = NULL; /* Stored in ixa_ire */
2110 error = EADDRNOTAVAIL;
2111 goto bad_addr;
2113 if (!(ire->ire_type & (IRE_LOOPBACK|IRE_LOCAL|IRE_MULTICAST))) {
2114 ire = NULL; /* Stored in ixa_ire */
2115 error = EADDRNOTAVAIL;
2116 goto bad_addr;
2121 * Does the caller want us to pick a source address?
2123 if (flags & IPDF_SELECT_SRC) {
2124 in6_addr_t src_addr;
2127 * We use use ire_nexthop_ill to avoid the under ipmp
2128 * interface for source address selection. Note that for ipmp
2129 * probe packets, ixa_ifindex would have been specified, and
2130 * the ip_select_route() invocation would have picked an ire
2131 * will ire_ill pointing at an under interface.
2133 ill = ire_nexthop_ill(ire);
2135 /* If unreachable we have no ill but need some source */
2136 if (ill == NULL) {
2137 src_addr = ipv6_loopback;
2138 /* Make sure we look for a better source address */
2139 generation = SRC_GENERATION_VERIFY;
2140 } else {
2141 error = ip_select_source_v6(ill, &setsrc, dst_addr,
2142 zoneid, ipst, B_FALSE, ixa->ixa_src_preferences,
2143 &src_addr, &generation, NULL);
2144 if (error != 0) {
2145 ire = NULL; /* Stored in ixa_ire */
2146 goto bad_addr;
2151 * We allow the source address to to down.
2152 * However, we check that we don't use the loopback address
2153 * as a source when sending out on the wire.
2155 if (IN6_IS_ADDR_LOOPBACK(&src_addr) &&
2156 !(ire->ire_type & (IRE_LOCAL|IRE_LOOPBACK|IRE_MULTICAST)) &&
2157 !(ire->ire_flags & (RTF_REJECT|RTF_BLACKHOLE))) {
2158 ire = NULL; /* Stored in ixa_ire */
2159 error = EADDRNOTAVAIL;
2160 goto bad_addr;
2163 *src_addrp = src_addr;
2164 ixa->ixa_src_generation = generation;
2168 * Make sure we don't leave an unreachable ixa_nce in place
2169 * since ip_select_route is used when we unplumb i.e., remove
2170 * references on ixa_ire, ixa_nce, and ixa_dce.
2172 nce = ixa->ixa_nce;
2173 if (nce != NULL && nce->nce_is_condemned) {
2174 nce_refrele(nce);
2175 ixa->ixa_nce = NULL;
2176 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2180 * Note that IPv6 multicast supports PMTU discovery unlike IPv4
2181 * multicast. But pmtu discovery is only enabled for connected
2182 * sockets in general.
2186 * Set initial value for fragmentation limit. Either conn_ip_output
2187 * or ULP might updates it when there are routing changes.
2188 * Handles a NULL ixa_ire->ire_ill or a NULL ixa_nce for RTF_REJECT.
2190 pmtu = ip_get_pmtu(ixa);
2191 ixa->ixa_fragsize = pmtu;
2192 /* Make sure ixa_fragsize and ixa_pmtu remain identical */
2193 if (ixa->ixa_flags & IXAF_VERIFY_PMTU)
2194 ixa->ixa_pmtu = pmtu;
2197 * Extract information useful for some transports.
2198 * First we look for DCE metrics. Then we take what we have in
2199 * the metrics in the route, where the offlink is used if we have
2200 * one.
2202 if (uinfo != NULL) {
2203 bzero(uinfo, sizeof (*uinfo));
2205 if (dce->dce_flags & DCEF_UINFO)
2206 *uinfo = dce->dce_uinfo;
2208 rts_merge_metrics(uinfo, &ire->ire_metrics);
2210 /* Allow ire_metrics to decrease the path MTU from above */
2211 if (uinfo->iulp_mtu == 0 || uinfo->iulp_mtu > pmtu)
2212 uinfo->iulp_mtu = pmtu;
2214 uinfo->iulp_localnet = (ire->ire_type & IRE_ONLINK) != 0;
2215 uinfo->iulp_loopback = (ire->ire_type & IRE_LOOPBACK) != 0;
2216 uinfo->iulp_local = (ire->ire_type & IRE_LOCAL) != 0;
2219 if (ill != NULL)
2220 ill_refrele(ill);
2222 return (error);
2224 bad_addr:
2225 if (ire != NULL)
2226 ire_refrele(ire);
2228 if (ill != NULL)
2229 ill_refrele(ill);
2232 * Make sure we don't leave an unreachable ixa_nce in place
2233 * since ip_select_route is used when we unplumb i.e., remove
2234 * references on ixa_ire, ixa_nce, and ixa_dce.
2236 nce = ixa->ixa_nce;
2237 if (nce != NULL && nce->nce_is_condemned) {
2238 nce_refrele(nce);
2239 ixa->ixa_nce = NULL;
2240 ixa->ixa_ire_generation = IRE_GENERATION_VERIFY;
2243 return (error);
2247 * Handle protocols with which IP is less intimate. There
2248 * can be more than one stream bound to a particular
2249 * protocol. When this is the case, normally each one gets a copy
2250 * of any incoming packets.
2252 * Zones notes:
2253 * Packets will be distributed to conns in all zones. This is really only
2254 * useful for ICMPv6 as only applications in the global zone can create raw
2255 * sockets for other protocols.
2257 void
2258 ip_fanout_proto_v6(mblk_t *mp, ip6_t *ip6h, ip_recv_attr_t *ira)
2260 mblk_t *mp1;
2261 in6_addr_t laddr = ip6h->ip6_dst;
2262 conn_t *connp, *first_connp, *next_connp;
2263 connf_t *connfp;
2264 ill_t *ill = ira->ira_ill;
2265 ip_stack_t *ipst = ill->ill_ipst;
2267 connfp = &ipst->ips_ipcl_proto_fanout_v6[ira->ira_protocol];
2268 mutex_enter(&connfp->connf_lock);
2269 connp = connfp->connf_head;
2270 for (connp = connfp->connf_head; connp != NULL;
2271 connp = connp->conn_next) {
2272 /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
2273 if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h))
2274 break;
2277 if (connp == NULL) {
2279 * No one bound to this port. Is
2280 * there a client that wants all
2281 * unclaimed datagrams?
2283 mutex_exit(&connfp->connf_lock);
2284 ip_fanout_send_icmp_v6(mp, ICMP6_PARAM_PROB,
2285 ICMP6_PARAMPROB_NEXTHEADER, ira);
2286 return;
2289 ASSERT(IPCL_IS_NONSTR(connp) || connp->conn_rq != NULL);
2291 CONN_INC_REF(connp);
2292 first_connp = connp;
2295 * XXX: Fix the multiple protocol listeners case. We should not
2296 * be walking the conn->conn_next list here.
2298 connp = connp->conn_next;
2299 for (;;) {
2300 while (connp != NULL) {
2301 /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
2302 if (IPCL_PROTO_MATCH_V6(connp, ira, ip6h))
2303 break;
2304 connp = connp->conn_next;
2307 if (connp == NULL) {
2308 /* No more interested clients */
2309 connp = first_connp;
2310 break;
2312 if (((mp1 = dupmsg(mp)) == NULL) &&
2313 ((mp1 = copymsg(mp)) == NULL)) {
2314 /* Memory allocation failed */
2315 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2316 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2317 connp = first_connp;
2318 break;
2321 CONN_INC_REF(connp);
2322 mutex_exit(&connfp->connf_lock);
2324 ip_fanout_proto_conn(connp, mp1, NULL, (ip6_t *)mp1->b_rptr,
2325 ira);
2327 mutex_enter(&connfp->connf_lock);
2328 /* Follow the next pointer before releasing the conn. */
2329 next_connp = connp->conn_next;
2330 CONN_DEC_REF(connp);
2331 connp = next_connp;
2334 /* Last one. Send it upstream. */
2335 mutex_exit(&connfp->connf_lock);
2337 ip_fanout_proto_conn(connp, mp, NULL, ip6h, ira);
2339 CONN_DEC_REF(connp);
2343 * Called when it is conceptually a ULP that would sent the packet
2344 * e.g., port unreachable and nexthdr unknown. Check that the packet
2345 * would have passed the IPsec global policy before sending the error.
2347 * Send an ICMP error after patching up the packet appropriately.
2348 * Uses ip_drop_input and bumps the appropriate MIB.
2349 * For ICMP6_PARAMPROB_NEXTHEADER we determine the offset to use.
2351 void
2352 ip_fanout_send_icmp_v6(mblk_t *mp, uint_t icmp_type, uint8_t icmp_code,
2353 ip_recv_attr_t *ira)
2355 ip6_t *ip6h;
2356 boolean_t secure;
2357 ill_t *ill = ira->ira_ill;
2358 ip_stack_t *ipst = ill->ill_ipst;
2359 netstack_t *ns = ipst->ips_netstack;
2360 ipsec_stack_t *ipss = ns->netstack_ipsec;
2362 secure = ira->ira_flags & IRAF_IPSEC_SECURE;
2365 * We are generating an icmp error for some inbound packet.
2366 * Called from all ip_fanout_(udp, tcp, proto) functions.
2367 * Before we generate an error, check with global policy
2368 * to see whether this is allowed to enter the system. As
2369 * there is no "conn", we are checking with global policy.
2371 ip6h = (ip6_t *)mp->b_rptr;
2372 if (secure || ipss->ipsec_inbound_v6_policy_present) {
2373 mp = ipsec_check_global_policy(mp, NULL, NULL, ip6h, ira, ns);
2374 if (mp == NULL)
2375 return;
2378 /* We never send errors for protocols that we do implement */
2379 if (ira->ira_protocol == IPPROTO_ICMPV6) {
2380 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2381 ip_drop_input("ip_fanout_send_icmp_v6", mp, ill);
2382 freemsg(mp);
2383 return;
2386 switch (icmp_type) {
2387 case ICMP6_DST_UNREACH:
2388 ASSERT(icmp_code == ICMP6_DST_UNREACH_NOPORT);
2390 BUMP_MIB(ill->ill_ip_mib, udpIfStatsNoPorts);
2391 ip_drop_input("ipIfStatsNoPorts", mp, ill);
2393 icmp_unreachable_v6(mp, icmp_code, B_FALSE, ira);
2394 break;
2395 case ICMP6_PARAM_PROB:
2396 ASSERT(icmp_code == ICMP6_PARAMPROB_NEXTHEADER);
2398 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInUnknownProtos);
2399 ip_drop_input("ipIfStatsInUnknownProtos", mp, ill);
2401 /* Let the system determine the offset for this one */
2402 icmp_param_problem_nexthdr_v6(mp, B_FALSE, ira);
2403 break;
2404 default:
2405 #ifdef DEBUG
2406 panic("ip_fanout_send_icmp_v6: wrong type");
2407 /*NOTREACHED*/
2408 #else
2409 freemsg(mp);
2410 break;
2411 #endif
2416 * Fanout for UDP packets that are multicast or ICMP errors.
2417 * (Unicast fanout is handled in ip_input_v6.)
2419 * If SO_REUSEADDR is set all multicast packets
2420 * will be delivered to all conns bound to the same port.
2422 * Fanout for UDP packets.
2423 * The caller puts <fport, lport> in the ports parameter.
2424 * ire_type must be IRE_BROADCAST for multicast and broadcast packets.
2426 * If SO_REUSEADDR is set all multicast and broadcast packets
2427 * will be delivered to all conns bound to the same port.
2429 * Zones notes:
2430 * Earlier in ip_input on a system with multiple shared-IP zones we
2431 * duplicate the multicast and broadcast packets and send them up
2432 * with each explicit zoneid that exists on that ill.
2433 * This means that here we can match the zoneid with SO_ALLZONES being special.
2435 void
2436 ip_fanout_udp_multi_v6(mblk_t *mp, ip6_t *ip6h, uint16_t lport, uint16_t fport,
2437 ip_recv_attr_t *ira)
2439 in6_addr_t laddr;
2440 conn_t *connp;
2441 connf_t *connfp;
2442 in6_addr_t faddr;
2443 ill_t *ill = ira->ira_ill;
2444 ip_stack_t *ipst = ill->ill_ipst;
2446 ASSERT(ira->ira_flags & (IRAF_MULTIBROADCAST|IRAF_ICMP_ERROR));
2448 laddr = ip6h->ip6_dst;
2449 faddr = ip6h->ip6_src;
2451 /* Attempt to find a client stream based on destination port. */
2452 connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
2453 mutex_enter(&connfp->connf_lock);
2454 connp = connfp->connf_head;
2455 while (connp != NULL) {
2456 if ((IPCL_UDP_MATCH_V6(connp, lport, laddr, fport, faddr)) &&
2457 conn_wantpacket_v6(connp, ira, ip6h))
2458 break;
2459 connp = connp->conn_next;
2462 if (connp == NULL)
2463 goto notfound;
2465 CONN_INC_REF(connp);
2467 if (connp->conn_reuseaddr) {
2468 conn_t *first_connp = connp;
2469 conn_t *next_connp;
2470 mblk_t *mp1;
2472 connp = connp->conn_next;
2473 for (;;) {
2474 while (connp != NULL) {
2475 if (IPCL_UDP_MATCH_V6(connp, lport, laddr,
2476 fport, faddr) &&
2477 conn_wantpacket_v6(connp, ira, ip6h))
2478 break;
2479 connp = connp->conn_next;
2481 if (connp == NULL) {
2482 /* No more interested clients */
2483 connp = first_connp;
2484 break;
2486 if (((mp1 = dupmsg(mp)) == NULL) &&
2487 ((mp1 = copymsg(mp)) == NULL)) {
2488 /* Memory allocation failed */
2489 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
2490 ip_drop_input("ipIfStatsInDiscards", mp, ill);
2491 connp = first_connp;
2492 break;
2495 CONN_INC_REF(connp);
2496 mutex_exit(&connfp->connf_lock);
2498 IP6_STAT(ipst, ip6_udp_fanmb);
2499 ip_fanout_udp_conn(connp, mp1, NULL,
2500 (ip6_t *)mp1->b_rptr, ira);
2502 mutex_enter(&connfp->connf_lock);
2503 /* Follow the next pointer before releasing the conn. */
2504 next_connp = connp->conn_next;
2505 IP6_STAT(ipst, ip6_udp_fanmb);
2506 CONN_DEC_REF(connp);
2507 connp = next_connp;
2511 /* Last one. Send it upstream. */
2512 mutex_exit(&connfp->connf_lock);
2514 IP6_STAT(ipst, ip6_udp_fanmb);
2515 ip_fanout_udp_conn(connp, mp, NULL, ip6h, ira);
2516 CONN_DEC_REF(connp);
2517 return;
2519 notfound:
2520 mutex_exit(&connfp->connf_lock);
2522 * No one bound to this port. Is
2523 * there a client that wants all
2524 * unclaimed datagrams?
2526 if (ipst->ips_ipcl_proto_fanout_v6[IPPROTO_UDP].connf_head != NULL) {
2527 ASSERT(ira->ira_protocol == IPPROTO_UDP);
2528 ip_fanout_proto_v6(mp, ip6h, ira);
2529 } else {
2530 ip_fanout_send_icmp_v6(mp, ICMP6_DST_UNREACH,
2531 ICMP6_DST_UNREACH_NOPORT, ira);
2536 * int ip_find_hdr_v6()
2538 * This routine is used by the upper layer protocols, iptun, and IPsec:
2539 * - Set extension header pointers to appropriate locations
2540 * - Determine IPv6 header length and return it
2541 * - Return a pointer to the last nexthdr value
2543 * The caller must initialize ipp_fields.
2545 * NOTE: If multiple extension headers of the same type are present,
2546 * ip_find_hdr_v6() will set the respective extension header pointers
2547 * to the first one that it encounters in the IPv6 header. It also
2548 * skips fragment headers. This routine deals with malformed packets
2549 * of various sorts in which case the returned length is up to the
2550 * malformed part.
2553 ip_find_hdr_v6(mblk_t *mp, ip6_t *ip6h, ip_pkt_t *ipp, uint8_t *nexthdrp)
2555 uint_t length, ehdrlen;
2556 uint8_t nexthdr;
2557 uint8_t *whereptr, *endptr;
2558 ip6_dest_t *tmpdstopts;
2559 ip6_rthdr_t *tmprthdr;
2560 ip6_hbh_t *tmphopopts;
2561 ip6_frag_t *tmpfraghdr;
2563 ipp->ipp_fields |= IPPF_HOPLIMIT | IPPF_TCLASS | IPPF_ADDR;
2564 ipp->ipp_hoplimit = ip6h->ip6_hops;
2565 ipp->ipp_tclass = IPV6_FLOW_TCLASS(ip6h->ip6_flow);
2566 ipp->ipp_addr = ip6h->ip6_dst;
2568 length = IPV6_HDR_LEN;
2569 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
2570 endptr = mp->b_wptr;
2572 nexthdr = ip6h->ip6_nxt;
2573 while (whereptr < endptr) {
2574 /* Is there enough left for len + nexthdr? */
2575 if (whereptr + MIN_EHDR_LEN > endptr)
2576 goto done;
2578 switch (nexthdr) {
2579 case IPPROTO_HOPOPTS: {
2580 tmphopopts = (ip6_hbh_t *)whereptr;
2581 ehdrlen = 8 * (tmphopopts->ip6h_len + 1);
2582 if ((uchar_t *)tmphopopts + ehdrlen > endptr)
2583 goto done;
2584 nexthdr = tmphopopts->ip6h_nxt;
2586 /* return only 1st hbh */
2587 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) {
2588 ipp->ipp_fields |= IPPF_HOPOPTS;
2589 ipp->ipp_hopopts = (ip6_hbh_t *)whereptr;
2590 ipp->ipp_hopoptslen = ehdrlen;
2592 break;
2594 case IPPROTO_DSTOPTS:
2595 tmpdstopts = (ip6_dest_t *)whereptr;
2596 ehdrlen = 8 * (tmpdstopts->ip6d_len + 1);
2597 if ((uchar_t *)tmpdstopts + ehdrlen > endptr)
2598 goto done;
2599 nexthdr = tmpdstopts->ip6d_nxt;
2601 * ipp_dstopts is set to the destination header after a
2602 * routing header.
2603 * Assume it is a post-rthdr destination header
2604 * and adjust when we find an rthdr.
2606 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) {
2607 ipp->ipp_fields |= IPPF_DSTOPTS;
2608 ipp->ipp_dstopts = tmpdstopts;
2609 ipp->ipp_dstoptslen = ehdrlen;
2611 break;
2612 case IPPROTO_ROUTING:
2613 tmprthdr = (ip6_rthdr_t *)whereptr;
2614 ehdrlen = 8 * (tmprthdr->ip6r_len + 1);
2615 if ((uchar_t *)tmprthdr + ehdrlen > endptr)
2616 goto done;
2617 nexthdr = tmprthdr->ip6r_nxt;
2618 /* return only 1st rthdr */
2619 if (!(ipp->ipp_fields & IPPF_RTHDR)) {
2620 ipp->ipp_fields |= IPPF_RTHDR;
2621 ipp->ipp_rthdr = tmprthdr;
2622 ipp->ipp_rthdrlen = ehdrlen;
2625 * Make any destination header we've seen be a
2626 * pre-rthdr destination header.
2628 if (ipp->ipp_fields & IPPF_DSTOPTS) {
2629 ipp->ipp_fields &= ~IPPF_DSTOPTS;
2630 ipp->ipp_fields |= IPPF_RTHDRDSTOPTS;
2631 ipp->ipp_rthdrdstopts = ipp->ipp_dstopts;
2632 ipp->ipp_dstopts = NULL;
2633 ipp->ipp_rthdrdstoptslen = ipp->ipp_dstoptslen;
2634 ipp->ipp_dstoptslen = 0;
2636 break;
2637 case IPPROTO_FRAGMENT:
2638 tmpfraghdr = (ip6_frag_t *)whereptr;
2639 ehdrlen = sizeof (ip6_frag_t);
2640 if ((uchar_t *)tmpfraghdr + ehdrlen > endptr)
2641 goto done;
2642 nexthdr = tmpfraghdr->ip6f_nxt;
2643 if (!(ipp->ipp_fields & IPPF_FRAGHDR)) {
2644 ipp->ipp_fields |= IPPF_FRAGHDR;
2645 ipp->ipp_fraghdr = tmpfraghdr;
2646 ipp->ipp_fraghdrlen = ehdrlen;
2648 break;
2649 case IPPROTO_NONE:
2650 default:
2651 goto done;
2653 length += ehdrlen;
2654 whereptr += ehdrlen;
2656 done:
2657 if (nexthdrp != NULL)
2658 *nexthdrp = nexthdr;
2659 return (length);
2663 * Try to determine where and what are the IPv6 header length and
2664 * pointer to nexthdr value for the upper layer protocol (or an
2665 * unknown next hdr).
2667 * Parameters returns a pointer to the nexthdr value;
2668 * Must handle malformed packets of various sorts.
2669 * Function returns failure for malformed cases.
2671 boolean_t
2672 ip_hdr_length_nexthdr_v6(mblk_t *mp, ip6_t *ip6h, uint16_t *hdr_length_ptr,
2673 uint8_t **nexthdrpp)
2675 uint16_t length;
2676 uint_t ehdrlen;
2677 uint8_t *nexthdrp;
2678 uint8_t *whereptr;
2679 uint8_t *endptr;
2680 ip6_dest_t *desthdr;
2681 ip6_rthdr_t *rthdr;
2682 ip6_frag_t *fraghdr;
2684 ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
2685 length = IPV6_HDR_LEN;
2686 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
2687 endptr = mp->b_wptr;
2689 nexthdrp = &ip6h->ip6_nxt;
2690 while (whereptr < endptr) {
2691 /* Is there enough left for len + nexthdr? */
2692 if (whereptr + MIN_EHDR_LEN > endptr)
2693 break;
2695 switch (*nexthdrp) {
2696 case IPPROTO_HOPOPTS:
2697 case IPPROTO_DSTOPTS:
2698 /* Assumes the headers are identical for hbh and dst */
2699 desthdr = (ip6_dest_t *)whereptr;
2700 ehdrlen = 8 * (desthdr->ip6d_len + 1);
2701 if ((uchar_t *)desthdr + ehdrlen > endptr)
2702 return (B_FALSE);
2703 nexthdrp = &desthdr->ip6d_nxt;
2704 break;
2705 case IPPROTO_ROUTING:
2706 rthdr = (ip6_rthdr_t *)whereptr;
2707 ehdrlen = 8 * (rthdr->ip6r_len + 1);
2708 if ((uchar_t *)rthdr + ehdrlen > endptr)
2709 return (B_FALSE);
2710 nexthdrp = &rthdr->ip6r_nxt;
2711 break;
2712 case IPPROTO_FRAGMENT:
2713 fraghdr = (ip6_frag_t *)whereptr;
2714 ehdrlen = sizeof (ip6_frag_t);
2715 if ((uchar_t *)&fraghdr[1] > endptr)
2716 return (B_FALSE);
2717 nexthdrp = &fraghdr->ip6f_nxt;
2718 break;
2719 case IPPROTO_NONE:
2720 /* No next header means we're finished */
2721 default:
2722 *hdr_length_ptr = length;
2723 *nexthdrpp = nexthdrp;
2724 return (B_TRUE);
2726 length += ehdrlen;
2727 whereptr += ehdrlen;
2728 *hdr_length_ptr = length;
2729 *nexthdrpp = nexthdrp;
2731 switch (*nexthdrp) {
2732 case IPPROTO_HOPOPTS:
2733 case IPPROTO_DSTOPTS:
2734 case IPPROTO_ROUTING:
2735 case IPPROTO_FRAGMENT:
2737 * If any know extension headers are still to be processed,
2738 * the packet's malformed (or at least all the IP header(s) are
2739 * not in the same mblk - and that should never happen.
2741 return (B_FALSE);
2743 default:
2745 * If we get here, we know that all of the IP headers were in
2746 * the same mblk, even if the ULP header is in the next mblk.
2748 *hdr_length_ptr = length;
2749 *nexthdrpp = nexthdrp;
2750 return (B_TRUE);
2755 * Return the length of the IPv6 related headers (including extension headers)
2756 * Returns a length even if the packet is malformed.
2759 ip_hdr_length_v6(mblk_t *mp, ip6_t *ip6h)
2761 uint16_t hdr_len;
2762 uint8_t *nexthdrp;
2764 (void) ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len, &nexthdrp);
2765 return (hdr_len);
2769 * Parse and process any hop-by-hop or destination options.
2771 * Assumes that q is an ill read queue so that ICMP errors for link-local
2772 * destinations are sent out the correct interface.
2774 * Returns -1 if there was an error and mp has been consumed.
2775 * Returns 0 if no special action is needed.
2776 * Returns 1 if the packet contained a router alert option for this node
2777 * which is verified to be "interesting/known" for our implementation.
2779 * XXX Note: In future as more hbh or dest options are defined,
2780 * it may be better to have different routines for hbh and dest
2781 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN
2782 * may have same value in different namespaces. Or is it same namespace ??
2783 * Current code checks for each opt_type (other than pads) if it is in
2784 * the expected nexthdr (hbh or dest)
2787 ip_process_options_v6(mblk_t *mp, ip6_t *ip6h,
2788 uint8_t *optptr, uint_t optlen, uint8_t hdr_type, ip_recv_attr_t *ira)
2790 uint8_t opt_type;
2791 uint_t optused;
2792 int ret = 0;
2793 const char *errtype;
2794 ill_t *ill = ira->ira_ill;
2795 ip_stack_t *ipst = ill->ill_ipst;
2797 while (optlen != 0) {
2798 opt_type = *optptr;
2799 if (opt_type == IP6OPT_PAD1) {
2800 optused = 1;
2801 } else {
2802 if (optlen < 2)
2803 goto bad_opt;
2804 errtype = "malformed";
2805 switch (opt_type) {
2806 case IP6OPT_PADN:
2808 * Note:We don't verify that (N-2) pad octets
2809 * are zero as required by spec. Adhere to
2810 * "be liberal in what you accept..." part of
2811 * implementation philosophy (RFC791,RFC1122)
2813 optused = 2 + optptr[1];
2814 if (optused > optlen)
2815 goto bad_opt;
2816 break;
2818 case IP6OPT_JUMBO:
2819 if (hdr_type != IPPROTO_HOPOPTS)
2820 goto opt_error;
2821 goto opt_error; /* XXX Not implemented! */
2823 case IP6OPT_ROUTER_ALERT: {
2824 struct ip6_opt_router *or;
2826 if (hdr_type != IPPROTO_HOPOPTS)
2827 goto opt_error;
2828 optused = 2 + optptr[1];
2829 if (optused > optlen)
2830 goto bad_opt;
2831 or = (struct ip6_opt_router *)optptr;
2832 /* Check total length and alignment */
2833 if (optused != sizeof (*or) ||
2834 ((uintptr_t)or->ip6or_value & 0x1) != 0)
2835 goto opt_error;
2836 /* Check value */
2837 switch (*((uint16_t *)or->ip6or_value)) {
2838 case IP6_ALERT_MLD:
2839 case IP6_ALERT_RSVP:
2840 ret = 1;
2842 break;
2844 case IP6OPT_HOME_ADDRESS: {
2846 * Minimal support for the home address option
2847 * (which is required by all IPv6 nodes).
2848 * Implement by just swapping the home address
2849 * and source address.
2850 * XXX Note: this has IPsec implications since
2851 * AH needs to take this into account.
2852 * Also, when IPsec is used we need to ensure
2853 * that this is only processed once
2854 * in the received packet (to avoid swapping
2855 * back and forth).
2856 * NOTE:This option processing is considered
2857 * to be unsafe and prone to a denial of
2858 * service attack.
2859 * The current processing is not safe even with
2860 * IPsec secured IP packets. Since the home
2861 * address option processing requirement still
2862 * is in the IETF draft and in the process of
2863 * being redefined for its usage, it has been
2864 * decided to turn off the option by default.
2865 * If this section of code needs to be executed,
2866 * ndd variable ip6_ignore_home_address_opt
2867 * should be set to 0 at the user's own risk.
2869 struct ip6_opt_home_address *oh;
2870 in6_addr_t tmp;
2872 if (ipst->ips_ipv6_ignore_home_address_opt)
2873 goto opt_error;
2875 if (hdr_type != IPPROTO_DSTOPTS)
2876 goto opt_error;
2877 optused = 2 + optptr[1];
2878 if (optused > optlen)
2879 goto bad_opt;
2882 * We did this dest. opt the first time
2883 * around (i.e. before AH processing).
2884 * If we've done AH... stop now.
2886 if ((ira->ira_flags & IRAF_IPSEC_SECURE) &&
2887 ira->ira_ipsec_ah_sa != NULL)
2888 break;
2890 oh = (struct ip6_opt_home_address *)optptr;
2891 /* Check total length and alignment */
2892 if (optused < sizeof (*oh) ||
2893 ((uintptr_t)oh->ip6oh_addr & 0x7) != 0)
2894 goto opt_error;
2895 /* Swap ip6_src and the home address */
2896 tmp = ip6h->ip6_src;
2897 /* XXX Note: only 8 byte alignment option */
2898 ip6h->ip6_src = *(in6_addr_t *)oh->ip6oh_addr;
2899 *(in6_addr_t *)oh->ip6oh_addr = tmp;
2900 break;
2903 case IP6OPT_TUNNEL_LIMIT:
2904 if (hdr_type != IPPROTO_DSTOPTS) {
2905 goto opt_error;
2907 optused = 2 + optptr[1];
2908 if (optused > optlen) {
2909 goto bad_opt;
2911 if (optused != 3) {
2912 goto opt_error;
2914 break;
2916 default:
2917 errtype = "unknown";
2918 /* FALLTHROUGH */
2919 opt_error:
2920 /* Determine which zone should send error */
2921 switch (IP6OPT_TYPE(opt_type)) {
2922 case IP6OPT_TYPE_SKIP:
2923 optused = 2 + optptr[1];
2924 if (optused > optlen)
2925 goto bad_opt;
2926 ip1dbg(("ip_process_options_v6: %s "
2927 "opt 0x%x skipped\n",
2928 errtype, opt_type));
2929 break;
2930 case IP6OPT_TYPE_DISCARD:
2931 ip1dbg(("ip_process_options_v6: %s "
2932 "opt 0x%x; packet dropped\n",
2933 errtype, opt_type));
2934 BUMP_MIB(ill->ill_ip_mib,
2935 ipIfStatsInHdrErrors);
2936 ip_drop_input("ipIfStatsInHdrErrors",
2937 mp, ill);
2938 freemsg(mp);
2939 return (-1);
2940 case IP6OPT_TYPE_ICMP:
2941 BUMP_MIB(ill->ill_ip_mib,
2942 ipIfStatsInHdrErrors);
2943 ip_drop_input("ipIfStatsInHdrErrors",
2944 mp, ill);
2945 icmp_param_problem_v6(mp,
2946 ICMP6_PARAMPROB_OPTION,
2947 (uint32_t)(optptr -
2948 (uint8_t *)ip6h),
2949 B_FALSE, ira);
2950 return (-1);
2951 case IP6OPT_TYPE_FORCEICMP:
2952 BUMP_MIB(ill->ill_ip_mib,
2953 ipIfStatsInHdrErrors);
2954 ip_drop_input("ipIfStatsInHdrErrors",
2955 mp, ill);
2956 icmp_param_problem_v6(mp,
2957 ICMP6_PARAMPROB_OPTION,
2958 (uint32_t)(optptr -
2959 (uint8_t *)ip6h),
2960 B_TRUE, ira);
2961 return (-1);
2962 default:
2963 ASSERT(0);
2967 optlen -= optused;
2968 optptr += optused;
2970 return (ret);
2972 bad_opt:
2973 /* Determine which zone should send error */
2974 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
2975 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_OPTION,
2976 (uint32_t)(optptr - (uint8_t *)ip6h),
2977 B_FALSE, ira);
2978 return (-1);
2982 * Process a routing header that is not yet empty.
2983 * Because of RFC 5095, we now reject all route headers.
2985 void
2986 ip_process_rthdr(mblk_t *mp, ip6_t *ip6h, ip6_rthdr_t *rth,
2987 ip_recv_attr_t *ira)
2989 ill_t *ill = ira->ira_ill;
2990 ip_stack_t *ipst = ill->ill_ipst;
2992 ASSERT(rth->ip6r_segleft != 0);
2994 if (!ipst->ips_ipv6_forward_src_routed) {
2995 /* XXX Check for source routed out same interface? */
2996 BUMP_MIB(ill->ill_ip_mib, ipIfStatsForwProhibits);
2997 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInAddrErrors);
2998 ip_drop_input("ipIfStatsInAddrErrors", mp, ill);
2999 freemsg(mp);
3000 return;
3003 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3004 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3005 (uint32_t)((uchar_t *)&rth->ip6r_type - (uchar_t *)ip6h),
3006 B_FALSE, ira);
3010 * Read side put procedure for IPv6 module.
3012 void
3013 ip_rput_v6(queue_t *q, mblk_t *mp)
3015 ill_t *ill;
3017 ill = (ill_t *)q->q_ptr;
3018 if (ill->ill_state_flags & (ILL_CONDEMNED | ILL_LL_SUBNET_PENDING)) {
3019 union DL_primitives *dl;
3021 dl = (union DL_primitives *)mp->b_rptr;
3023 * Things are opening or closing - only accept DLPI
3024 * ack messages. If the stream is closing and ip_wsrv
3025 * has completed, ip_close is out of the qwait, but has
3026 * not yet completed qprocsoff. Don't proceed any further
3027 * because the ill has been cleaned up and things hanging
3028 * off the ill have been freed.
3030 if ((mp->b_datap->db_type != M_PCPROTO) ||
3031 (dl->dl_primitive == DL_UNITDATA_IND)) {
3032 inet_freemsg(mp);
3033 return;
3036 if (DB_TYPE(mp) == M_DATA) {
3037 struct mac_header_info_s mhi;
3039 ip_mdata_to_mhi(ill, mp, &mhi);
3040 ip_input_v6(ill, NULL, mp, &mhi);
3041 } else {
3042 ip_rput_notdata(ill, mp);
3047 * Walk through the IPv6 packet in mp and see if there's an AH header
3048 * in it. See if the AH header needs to get done before other headers in
3049 * the packet. (Worker function for ipsec_early_ah_v6().)
3051 #define IPSEC_HDR_DONT_PROCESS 0
3052 #define IPSEC_HDR_PROCESS 1
3053 #define IPSEC_MEMORY_ERROR 2 /* or malformed packet */
3054 static int
3055 ipsec_needs_processing_v6(mblk_t *mp, uint8_t *nexthdr)
3057 uint_t length;
3058 uint_t ehdrlen;
3059 uint8_t *whereptr;
3060 uint8_t *endptr;
3061 uint8_t *nexthdrp;
3062 ip6_dest_t *desthdr;
3063 ip6_rthdr_t *rthdr;
3064 ip6_t *ip6h;
3067 * For now just pullup everything. In general, the less pullups,
3068 * the better, but there's so much squirrelling through anyway,
3069 * it's just easier this way.
3071 if (!pullupmsg(mp, -1)) {
3072 return (IPSEC_MEMORY_ERROR);
3075 ip6h = (ip6_t *)mp->b_rptr;
3076 length = IPV6_HDR_LEN;
3077 whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
3078 endptr = mp->b_wptr;
3081 * We can't just use the argument nexthdr in the place
3082 * of nexthdrp becaue we don't dereference nexthdrp
3083 * till we confirm whether it is a valid address.
3085 nexthdrp = &ip6h->ip6_nxt;
3086 while (whereptr < endptr) {
3087 /* Is there enough left for len + nexthdr? */
3088 if (whereptr + MIN_EHDR_LEN > endptr)
3089 return (IPSEC_MEMORY_ERROR);
3091 switch (*nexthdrp) {
3092 case IPPROTO_HOPOPTS:
3093 case IPPROTO_DSTOPTS:
3094 /* Assumes the headers are identical for hbh and dst */
3095 desthdr = (ip6_dest_t *)whereptr;
3096 ehdrlen = 8 * (desthdr->ip6d_len + 1);
3097 if ((uchar_t *)desthdr + ehdrlen > endptr)
3098 return (IPSEC_MEMORY_ERROR);
3100 * Return DONT_PROCESS because the destination
3101 * options header may be for each hop in a
3102 * routing-header, and we only want AH if we're
3103 * finished with routing headers.
3105 if (*nexthdrp == IPPROTO_DSTOPTS)
3106 return (IPSEC_HDR_DONT_PROCESS);
3107 nexthdrp = &desthdr->ip6d_nxt;
3108 break;
3109 case IPPROTO_ROUTING:
3110 rthdr = (ip6_rthdr_t *)whereptr;
3113 * If there's more hops left on the routing header,
3114 * return now with DON'T PROCESS.
3116 if (rthdr->ip6r_segleft > 0)
3117 return (IPSEC_HDR_DONT_PROCESS);
3119 ehdrlen = 8 * (rthdr->ip6r_len + 1);
3120 if ((uchar_t *)rthdr + ehdrlen > endptr)
3121 return (IPSEC_MEMORY_ERROR);
3122 nexthdrp = &rthdr->ip6r_nxt;
3123 break;
3124 case IPPROTO_FRAGMENT:
3125 /* Wait for reassembly */
3126 return (IPSEC_HDR_DONT_PROCESS);
3127 case IPPROTO_AH:
3128 *nexthdr = IPPROTO_AH;
3129 return (IPSEC_HDR_PROCESS);
3130 case IPPROTO_NONE:
3131 /* No next header means we're finished */
3132 default:
3133 return (IPSEC_HDR_DONT_PROCESS);
3135 length += ehdrlen;
3136 whereptr += ehdrlen;
3139 * Malformed/truncated packet.
3141 return (IPSEC_MEMORY_ERROR);
3145 * Path for AH if options are present.
3146 * Returns NULL if the mblk was consumed.
3148 * Sometimes AH needs to be done before other IPv6 headers for security
3149 * reasons. This function (and its ipsec_needs_processing_v6() above)
3150 * indicates if that is so, and fans out to the appropriate IPsec protocol
3151 * for the datagram passed in.
3153 mblk_t *
3154 ipsec_early_ah_v6(mblk_t *mp, ip_recv_attr_t *ira)
3156 uint8_t nexthdr;
3157 ah_t *ah;
3158 ill_t *ill = ira->ira_ill;
3159 ip_stack_t *ipst = ill->ill_ipst;
3160 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec;
3162 switch (ipsec_needs_processing_v6(mp, &nexthdr)) {
3163 case IPSEC_MEMORY_ERROR:
3164 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3165 ip_drop_input("ipIfStatsInDiscards", mp, ill);
3166 freemsg(mp);
3167 return (NULL);
3168 case IPSEC_HDR_DONT_PROCESS:
3169 return (mp);
3172 /* Default means send it to AH! */
3173 ASSERT(nexthdr == IPPROTO_AH);
3175 if (!ipsec_loaded(ipss)) {
3176 ip_proto_not_sup(mp, ira);
3177 return (NULL);
3180 mp = ipsec_inbound_ah_sa(mp, ira, &ah);
3181 if (mp == NULL)
3182 return (NULL);
3183 ASSERT(ah != NULL);
3184 ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
3185 ASSERT(ira->ira_ipsec_ah_sa != NULL);
3186 ASSERT(ira->ira_ipsec_ah_sa->ipsa_input_func != NULL);
3187 mp = ira->ira_ipsec_ah_sa->ipsa_input_func(mp, ah, ira);
3189 if (mp == NULL) {
3191 * Either it failed or is pending. In the former case
3192 * ipIfStatsInDiscards was increased.
3194 return (NULL);
3197 /* we're done with IPsec processing, send it up */
3198 ip_input_post_ipsec(mp, ira);
3199 return (NULL);
3203 * Reassemble fragment.
3204 * When it returns a completed message the first mblk will only contain
3205 * the headers prior to the fragment header, with the nexthdr value updated
3206 * to be the header after the fragment header.
3208 mblk_t *
3209 ip_input_fragment_v6(mblk_t *mp, ip6_t *ip6h,
3210 ip6_frag_t *fraghdr, uint_t remlen, ip_recv_attr_t *ira)
3212 uint32_t ident = ntohl(fraghdr->ip6f_ident);
3213 uint16_t offset;
3214 boolean_t more_frags;
3215 uint8_t nexthdr = fraghdr->ip6f_nxt;
3216 in6_addr_t *v6dst_ptr;
3217 in6_addr_t *v6src_ptr;
3218 uint_t end;
3219 uint_t hdr_length;
3220 size_t count;
3221 ipf_t *ipf;
3222 ipf_t **ipfp;
3223 ipfb_t *ipfb;
3224 mblk_t *mp1;
3225 uint8_t ecn_info = 0;
3226 size_t msg_len;
3227 mblk_t *tail_mp;
3228 mblk_t *t_mp;
3229 boolean_t pruned = B_FALSE;
3230 uint32_t sum_val;
3231 uint16_t sum_flags;
3232 ill_t *ill = ira->ira_ill;
3233 ip_stack_t *ipst = ill->ill_ipst;
3234 uint_t prev_nexthdr_offset;
3235 uint8_t prev_nexthdr;
3236 uint8_t *ptr;
3237 uint32_t packet_size;
3240 * We utilize hardware computed checksum info only for UDP since
3241 * IP fragmentation is a normal occurence for the protocol. In
3242 * addition, checksum offload support for IP fragments carrying
3243 * UDP payload is commonly implemented across network adapters.
3245 ASSERT(ira->ira_rill != NULL);
3246 if (nexthdr == IPPROTO_UDP && dohwcksum &&
3247 ILL_HCKSUM_CAPABLE(ira->ira_rill) &&
3248 (DB_CKSUMFLAGS(mp) & (HCK_FULLCKSUM | HCK_PARTIALCKSUM))) {
3249 mblk_t *mp1 = mp->b_cont;
3250 int32_t len;
3252 /* Record checksum information from the packet */
3253 sum_val = (uint32_t)DB_CKSUM16(mp);
3254 sum_flags = DB_CKSUMFLAGS(mp);
3256 /* fragmented payload offset from beginning of mblk */
3257 offset = (uint16_t)((uchar_t *)&fraghdr[1] - mp->b_rptr);
3259 if ((sum_flags & HCK_PARTIALCKSUM) &&
3260 (mp1 == NULL || mp1->b_cont == NULL) &&
3261 offset >= DB_CKSUMSTART(mp) &&
3262 ((len = offset - DB_CKSUMSTART(mp)) & 1) == 0) {
3263 uint32_t adj;
3265 * Partial checksum has been calculated by hardware
3266 * and attached to the packet; in addition, any
3267 * prepended extraneous data is even byte aligned.
3268 * If any such data exists, we adjust the checksum;
3269 * this would also handle any postpended data.
3271 IP_ADJCKSUM_PARTIAL(mp->b_rptr + DB_CKSUMSTART(mp),
3272 mp, mp1, len, adj);
3274 /* One's complement subtract extraneous checksum */
3275 if (adj >= sum_val)
3276 sum_val = ~(adj - sum_val) & 0xFFFF;
3277 else
3278 sum_val -= adj;
3280 } else {
3281 sum_val = 0;
3282 sum_flags = 0;
3285 /* Clear hardware checksumming flag */
3286 DB_CKSUMFLAGS(mp) = 0;
3289 * Determine the offset (from the begining of the IP header)
3290 * of the nexthdr value which has IPPROTO_FRAGMENT. We use
3291 * this when removing the fragment header from the packet.
3292 * This packet consists of the IPv6 header, a potential
3293 * hop-by-hop options header, a potential pre-routing-header
3294 * destination options header, and a potential routing header.
3296 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h;
3297 prev_nexthdr = ip6h->ip6_nxt;
3298 ptr = (uint8_t *)&ip6h[1];
3300 if (prev_nexthdr == IPPROTO_HOPOPTS) {
3301 ip6_hbh_t *hbh_hdr;
3302 uint_t hdr_len;
3304 hbh_hdr = (ip6_hbh_t *)ptr;
3305 hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
3306 prev_nexthdr = hbh_hdr->ip6h_nxt;
3307 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt
3308 - (uint8_t *)ip6h;
3309 ptr += hdr_len;
3311 if (prev_nexthdr == IPPROTO_DSTOPTS) {
3312 ip6_dest_t *dest_hdr;
3313 uint_t hdr_len;
3315 dest_hdr = (ip6_dest_t *)ptr;
3316 hdr_len = 8 * (dest_hdr->ip6d_len + 1);
3317 prev_nexthdr = dest_hdr->ip6d_nxt;
3318 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt
3319 - (uint8_t *)ip6h;
3320 ptr += hdr_len;
3322 if (prev_nexthdr == IPPROTO_ROUTING) {
3323 ip6_rthdr_t *rthdr;
3324 uint_t hdr_len;
3326 rthdr = (ip6_rthdr_t *)ptr;
3327 prev_nexthdr = rthdr->ip6r_nxt;
3328 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt
3329 - (uint8_t *)ip6h;
3330 hdr_len = 8 * (rthdr->ip6r_len + 1);
3331 ptr += hdr_len;
3333 if (prev_nexthdr != IPPROTO_FRAGMENT) {
3334 /* Can't handle other headers before the fragment header */
3335 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3336 ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
3337 freemsg(mp);
3338 return (NULL);
3342 * Note: Fragment offset in header is in 8-octet units.
3343 * Clearing least significant 3 bits not only extracts
3344 * it but also gets it in units of octets.
3346 offset = ntohs(fraghdr->ip6f_offlg) & ~7;
3347 more_frags = (fraghdr->ip6f_offlg & IP6F_MORE_FRAG);
3350 * Is the more frags flag on and the payload length not a multiple
3351 * of eight?
3353 if (more_frags && (ntohs(ip6h->ip6_plen) & 7)) {
3354 ip_drop_input("ICMP_PARAM_PROBLEM", mp, ill);
3355 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3356 (uint32_t)((char *)&ip6h->ip6_plen -
3357 (char *)ip6h), B_FALSE, ira);
3358 return (NULL);
3361 v6src_ptr = &ip6h->ip6_src;
3362 v6dst_ptr = &ip6h->ip6_dst;
3363 end = remlen;
3365 hdr_length = (uint_t)((char *)&fraghdr[1] - (char *)ip6h);
3366 end += offset;
3369 * Would fragment cause reassembled packet to have a payload length
3370 * greater than IP_MAXPACKET - the max payload size?
3372 if (end > IP_MAXPACKET) {
3373 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3374 ip_drop_input("Reassembled packet too large", mp, ill);
3375 icmp_param_problem_v6(mp, ICMP6_PARAMPROB_HEADER,
3376 (uint32_t)((char *)&fraghdr->ip6f_offlg -
3377 (char *)ip6h), B_FALSE, ira);
3378 return (NULL);
3382 * This packet just has one fragment. Reassembly not
3383 * needed.
3385 if (!more_frags && offset == 0) {
3386 goto reass_done;
3390 * Drop the fragmented as early as possible, if
3391 * we don't have resource(s) to re-assemble.
3393 if (ipst->ips_ip_reass_queue_bytes == 0) {
3394 freemsg(mp);
3395 return (NULL);
3398 /* Record the ECN field info. */
3399 ecn_info = (uint8_t)(ntohl(ip6h->ip6_vcf & htonl(~0xFFCFFFFF)) >> 20);
3401 * If this is not the first fragment, dump the unfragmentable
3402 * portion of the packet.
3404 if (offset)
3405 mp->b_rptr = (uchar_t *)&fraghdr[1];
3408 * Fragmentation reassembly. Each ILL has a hash table for
3409 * queueing packets undergoing reassembly for all IPIFs
3410 * associated with the ILL. The hash is based on the packet
3411 * IP ident field. The ILL frag hash table was allocated
3412 * as a timer block at the time the ILL was created. Whenever
3413 * there is anything on the reassembly queue, the timer will
3414 * be running.
3416 /* Handle vnic loopback of fragments */
3417 if (mp->b_datap->db_ref > 2)
3418 msg_len = 0;
3419 else
3420 msg_len = MBLKSIZE(mp);
3422 tail_mp = mp;
3423 while (tail_mp->b_cont != NULL) {
3424 tail_mp = tail_mp->b_cont;
3425 if (tail_mp->b_datap->db_ref <= 2)
3426 msg_len += MBLKSIZE(tail_mp);
3429 * If the reassembly list for this ILL will get too big
3430 * prune it.
3433 if ((msg_len + sizeof (*ipf) + ill->ill_frag_count) >=
3434 ipst->ips_ip_reass_queue_bytes) {
3435 DTRACE_PROBE3(ip_reass_queue_bytes, uint_t, msg_len,
3436 uint_t, ill->ill_frag_count,
3437 uint_t, ipst->ips_ip_reass_queue_bytes);
3438 ill_frag_prune(ill,
3439 (ipst->ips_ip_reass_queue_bytes < msg_len) ? 0 :
3440 (ipst->ips_ip_reass_queue_bytes - msg_len));
3441 pruned = B_TRUE;
3444 ipfb = &ill->ill_frag_hash_tbl[ILL_FRAG_HASH_V6(*v6src_ptr, ident)];
3445 mutex_enter(&ipfb->ipfb_lock);
3447 ipfp = &ipfb->ipfb_ipf;
3448 /* Try to find an existing fragment queue for this packet. */
3449 for (;;) {
3450 ipf = ipfp[0];
3451 if (ipf) {
3453 * It has to match on ident, source address, and
3454 * dest address.
3456 if (ipf->ipf_ident == ident &&
3457 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6src, v6src_ptr) &&
3458 IN6_ARE_ADDR_EQUAL(&ipf->ipf_v6dst, v6dst_ptr)) {
3461 * If we have received too many
3462 * duplicate fragments for this packet
3463 * free it.
3465 if (ipf->ipf_num_dups > ip_max_frag_dups) {
3466 ill_frag_free_pkts(ill, ipfb, ipf, 1);
3467 freemsg(mp);
3468 mutex_exit(&ipfb->ipfb_lock);
3469 return (NULL);
3472 break;
3474 ipfp = &ipf->ipf_hash_next;
3475 continue;
3480 * If we pruned the list, do we want to store this new
3481 * fragment?. We apply an optimization here based on the
3482 * fact that most fragments will be received in order.
3483 * So if the offset of this incoming fragment is zero,
3484 * it is the first fragment of a new packet. We will
3485 * keep it. Otherwise drop the fragment, as we have
3486 * probably pruned the packet already (since the
3487 * packet cannot be found).
3490 if (pruned && offset != 0) {
3491 mutex_exit(&ipfb->ipfb_lock);
3492 freemsg(mp);
3493 return (NULL);
3496 /* New guy. Allocate a frag message. */
3497 mp1 = allocb(sizeof (*ipf), BPRI_MED);
3498 if (!mp1) {
3499 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3500 ip_drop_input("ipIfStatsInDiscards", mp, ill);
3501 freemsg(mp);
3502 partial_reass_done:
3503 mutex_exit(&ipfb->ipfb_lock);
3504 return (NULL);
3507 if (ipfb->ipfb_frag_pkts >= MAX_FRAG_PKTS(ipst)) {
3509 * Too many fragmented packets in this hash bucket.
3510 * Free the oldest.
3512 ill_frag_free_pkts(ill, ipfb, ipfb->ipfb_ipf, 1);
3515 mp1->b_cont = mp;
3517 /* Initialize the fragment header. */
3518 ipf = (ipf_t *)mp1->b_rptr;
3519 ipf->ipf_mp = mp1;
3520 ipf->ipf_ptphn = ipfp;
3521 ipfp[0] = ipf;
3522 ipf->ipf_hash_next = NULL;
3523 ipf->ipf_ident = ident;
3524 ipf->ipf_v6src = *v6src_ptr;
3525 ipf->ipf_v6dst = *v6dst_ptr;
3526 /* Record reassembly start time. */
3527 ipf->ipf_timestamp = gethrestime_sec();
3528 /* Record ipf generation and account for frag header */
3529 ipf->ipf_gen = ill->ill_ipf_gen++;
3530 ipf->ipf_count = MBLKSIZE(mp1);
3531 ipf->ipf_protocol = nexthdr;
3532 ipf->ipf_nf_hdr_len = 0;
3533 ipf->ipf_prev_nexthdr_offset = 0;
3534 ipf->ipf_last_frag_seen = B_FALSE;
3535 ipf->ipf_ecn = ecn_info;
3536 ipf->ipf_num_dups = 0;
3537 ipfb->ipfb_frag_pkts++;
3538 ipf->ipf_checksum = 0;
3539 ipf->ipf_checksum_flags = 0;
3541 /* Store checksum value in fragment header */
3542 if (sum_flags != 0) {
3543 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3544 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3545 ipf->ipf_checksum = sum_val;
3546 ipf->ipf_checksum_flags = sum_flags;
3550 * We handle reassembly two ways. In the easy case,
3551 * where all the fragments show up in order, we do
3552 * minimal bookkeeping, and just clip new pieces on
3553 * the end. If we ever see a hole, then we go off
3554 * to ip_reassemble which has to mark the pieces and
3555 * keep track of the number of holes, etc. Obviously,
3556 * the point of having both mechanisms is so we can
3557 * handle the easy case as efficiently as possible.
3559 if (offset == 0) {
3560 /* Easy case, in-order reassembly so far. */
3561 /* Update the byte count */
3562 ipf->ipf_count += msg_len;
3563 ipf->ipf_tail_mp = tail_mp;
3565 * Keep track of next expected offset in
3566 * ipf_end.
3568 ipf->ipf_end = end;
3569 ipf->ipf_nf_hdr_len = hdr_length;
3570 ipf->ipf_prev_nexthdr_offset = prev_nexthdr_offset;
3571 } else {
3572 /* Hard case, hole at the beginning. */
3573 ipf->ipf_tail_mp = NULL;
3575 * ipf_end == 0 means that we have given up
3576 * on easy reassembly.
3578 ipf->ipf_end = 0;
3580 /* Forget checksum offload from now on */
3581 ipf->ipf_checksum_flags = 0;
3584 * ipf_hole_cnt is set by ip_reassemble.
3585 * ipf_count is updated by ip_reassemble.
3586 * No need to check for return value here
3587 * as we don't expect reassembly to complete or
3588 * fail for the first fragment itself.
3590 (void) ip_reassemble(mp, ipf, offset, more_frags, ill,
3591 msg_len);
3593 /* Update per ipfb and ill byte counts */
3594 ipfb->ipfb_count += ipf->ipf_count;
3595 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */
3596 atomic_add_32(&ill->ill_frag_count, ipf->ipf_count);
3597 /* If the frag timer wasn't already going, start it. */
3598 mutex_enter(&ill->ill_lock);
3599 ill_frag_timer_start(ill);
3600 mutex_exit(&ill->ill_lock);
3601 goto partial_reass_done;
3605 * If the packet's flag has changed (it could be coming up
3606 * from an interface different than the previous, therefore
3607 * possibly different checksum capability), then forget about
3608 * any stored checksum states. Otherwise add the value to
3609 * the existing one stored in the fragment header.
3611 if (sum_flags != 0 && sum_flags == ipf->ipf_checksum_flags) {
3612 sum_val += ipf->ipf_checksum;
3613 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3614 sum_val = (sum_val & 0xFFFF) + (sum_val >> 16);
3615 ipf->ipf_checksum = sum_val;
3616 } else if (ipf->ipf_checksum_flags != 0) {
3617 /* Forget checksum offload from now on */
3618 ipf->ipf_checksum_flags = 0;
3622 * We have a new piece of a datagram which is already being
3623 * reassembled. Update the ECN info if all IP fragments
3624 * are ECN capable. If there is one which is not, clear
3625 * all the info. If there is at least one which has CE
3626 * code point, IP needs to report that up to transport.
3628 if (ecn_info != IPH_ECN_NECT && ipf->ipf_ecn != IPH_ECN_NECT) {
3629 if (ecn_info == IPH_ECN_CE)
3630 ipf->ipf_ecn = IPH_ECN_CE;
3631 } else {
3632 ipf->ipf_ecn = IPH_ECN_NECT;
3635 if (offset && ipf->ipf_end == offset) {
3636 /* The new fragment fits at the end */
3637 ipf->ipf_tail_mp->b_cont = mp;
3638 /* Update the byte count */
3639 ipf->ipf_count += msg_len;
3640 /* Update per ipfb and ill byte counts */
3641 ipfb->ipfb_count += msg_len;
3642 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */
3643 atomic_add_32(&ill->ill_frag_count, msg_len);
3644 if (more_frags) {
3645 /* More to come. */
3646 ipf->ipf_end = end;
3647 ipf->ipf_tail_mp = tail_mp;
3648 goto partial_reass_done;
3650 } else {
3652 * Go do the hard cases.
3653 * Call ip_reassemble().
3655 int ret;
3657 if (offset == 0) {
3658 if (ipf->ipf_prev_nexthdr_offset == 0) {
3659 ipf->ipf_nf_hdr_len = hdr_length;
3660 ipf->ipf_prev_nexthdr_offset =
3661 prev_nexthdr_offset;
3664 /* Save current byte count */
3665 count = ipf->ipf_count;
3666 ret = ip_reassemble(mp, ipf, offset, more_frags, ill, msg_len);
3668 /* Count of bytes added and subtracted (freeb()ed) */
3669 count = ipf->ipf_count - count;
3670 if (count) {
3671 /* Update per ipfb and ill byte counts */
3672 ipfb->ipfb_count += count;
3673 ASSERT(ipfb->ipfb_count > 0); /* Wraparound */
3674 atomic_add_32(&ill->ill_frag_count, count);
3676 if (ret == IP_REASS_PARTIAL) {
3677 goto partial_reass_done;
3678 } else if (ret == IP_REASS_FAILED) {
3679 /* Reassembly failed. Free up all resources */
3680 ill_frag_free_pkts(ill, ipfb, ipf, 1);
3681 for (t_mp = mp; t_mp != NULL; t_mp = t_mp->b_cont) {
3682 IP_REASS_SET_START(t_mp, 0);
3683 IP_REASS_SET_END(t_mp, 0);
3685 freemsg(mp);
3686 goto partial_reass_done;
3689 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */
3692 * We have completed reassembly. Unhook the frag header from
3693 * the reassembly list.
3695 * Grab the unfragmentable header length next header value out
3696 * of the first fragment
3698 ASSERT(ipf->ipf_nf_hdr_len != 0);
3699 hdr_length = ipf->ipf_nf_hdr_len;
3702 * Before we free the frag header, record the ECN info
3703 * to report back to the transport.
3705 ecn_info = ipf->ipf_ecn;
3708 * Store the nextheader field in the header preceding the fragment
3709 * header
3711 nexthdr = ipf->ipf_protocol;
3712 prev_nexthdr_offset = ipf->ipf_prev_nexthdr_offset;
3713 ipfp = ipf->ipf_ptphn;
3715 /* We need to supply these to caller */
3716 if ((sum_flags = ipf->ipf_checksum_flags) != 0)
3717 sum_val = ipf->ipf_checksum;
3718 else
3719 sum_val = 0;
3721 mp1 = ipf->ipf_mp;
3722 count = ipf->ipf_count;
3723 ipf = ipf->ipf_hash_next;
3724 if (ipf)
3725 ipf->ipf_ptphn = ipfp;
3726 ipfp[0] = ipf;
3727 atomic_add_32(&ill->ill_frag_count, -count);
3728 ASSERT(ipfb->ipfb_count >= count);
3729 ipfb->ipfb_count -= count;
3730 ipfb->ipfb_frag_pkts--;
3731 mutex_exit(&ipfb->ipfb_lock);
3732 /* Ditch the frag header. */
3733 mp = mp1->b_cont;
3734 freeb(mp1);
3737 * Make sure the packet is good by doing some sanity
3738 * check. If bad we can silentely drop the packet.
3740 reass_done:
3741 if (hdr_length < sizeof (ip6_frag_t)) {
3742 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInHdrErrors);
3743 ip_drop_input("ipIfStatsInHdrErrors", mp, ill);
3744 ip1dbg(("ip_input_fragment_v6: bad packet\n"));
3745 freemsg(mp);
3746 return (NULL);
3750 * Remove the fragment header from the initial header by
3751 * splitting the mblk into the non-fragmentable header and
3752 * everthing after the fragment extension header. This has the
3753 * side effect of putting all the headers that need destination
3754 * processing into the b_cont block-- on return this fact is
3755 * used in order to avoid having to look at the extensions
3756 * already processed.
3758 * Note that this code assumes that the unfragmentable portion
3759 * of the header is in the first mblk and increments
3760 * the read pointer past it. If this assumption is broken
3761 * this code fails badly.
3763 if (mp->b_rptr + hdr_length != mp->b_wptr) {
3764 mblk_t *nmp;
3766 if (!(nmp = dupb(mp))) {
3767 ip1dbg(("ip_input_fragment_v6: dupb failed\n"));
3768 BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3769 ip_drop_input("ipIfStatsInDiscards", mp, ill);
3770 freemsg(mp);
3771 return (NULL);
3773 nmp->b_cont = mp->b_cont;
3774 mp->b_cont = nmp;
3775 nmp->b_rptr += hdr_length;
3777 mp->b_wptr = mp->b_rptr + hdr_length - sizeof (ip6_frag_t);
3779 ip6h = (ip6_t *)mp->b_rptr;
3780 ((char *)ip6h)[prev_nexthdr_offset] = nexthdr;
3782 /* Restore original IP length in header. */
3783 packet_size = msgdsize(mp);
3784 ip6h->ip6_plen = htons((uint16_t)(packet_size - IPV6_HDR_LEN));
3785 /* Record the ECN info. */
3786 ip6h->ip6_vcf &= htonl(0xFFCFFFFF);
3787 ip6h->ip6_vcf |= htonl(ecn_info << 20);
3789 /* Update the receive attributes */
3790 ira->ira_pktlen = packet_size;
3791 ira->ira_ip_hdr_length = hdr_length - sizeof (ip6_frag_t);
3792 ira->ira_protocol = nexthdr;
3794 /* Reassembly is successful; set checksum information in packet */
3795 DB_CKSUM16(mp) = (uint16_t)sum_val;
3796 DB_CKSUMFLAGS(mp) = sum_flags;
3797 DB_CKSUMSTART(mp) = ira->ira_ip_hdr_length;
3799 return (mp);
3803 * Given an mblk and a ptr, find the destination address in an IPv6 routing
3804 * header.
3806 static in6_addr_t
3807 pluck_out_dst(const mblk_t *mp, uint8_t *whereptr, in6_addr_t oldrv)
3809 ip6_rthdr0_t *rt0;
3810 int segleft, numaddr;
3811 in6_addr_t *ap, rv = oldrv;
3813 rt0 = (ip6_rthdr0_t *)whereptr;
3814 if (rt0->ip6r0_type != 0 && rt0->ip6r0_type != 2) {
3815 DTRACE_PROBE2(pluck_out_dst_unknown_type, mblk_t *, mp,
3816 uint8_t *, whereptr);
3817 return (rv);
3819 segleft = rt0->ip6r0_segleft;
3820 numaddr = rt0->ip6r0_len / 2;
3822 if ((rt0->ip6r0_len & 0x1) ||
3823 (mp != NULL && whereptr + (rt0->ip6r0_len + 1) * 8 > mp->b_wptr) ||
3824 (segleft > rt0->ip6r0_len / 2)) {
3826 * Corrupt packet. Either the routing header length is odd
3827 * (can't happen) or mismatched compared to the packet, or the
3828 * number of addresses is. Return what we can. This will
3829 * only be a problem on forwarded packets that get squeezed
3830 * through an outbound tunnel enforcing IPsec Tunnel Mode.
3832 DTRACE_PROBE2(pluck_out_dst_badpkt, mblk_t *, mp, uint8_t *,
3833 whereptr);
3834 return (rv);
3837 if (segleft != 0) {
3838 ap = (in6_addr_t *)((char *)rt0 + sizeof (*rt0));
3839 rv = ap[numaddr - 1];
3842 return (rv);
3846 * Walk through the options to see if there is a routing header.
3847 * If present get the destination which is the last address of
3848 * the option.
3849 * mp needs to be provided in cases when the extension headers might span
3850 * b_cont; mp is never modified by this function.
3852 in6_addr_t
3853 ip_get_dst_v6(ip6_t *ip6h, const mblk_t *mp, boolean_t *is_fragment)
3855 const mblk_t *current_mp = mp;
3856 uint8_t nexthdr;
3857 uint8_t *whereptr;
3858 int ehdrlen;
3859 in6_addr_t rv;
3861 whereptr = (uint8_t *)ip6h;
3862 ehdrlen = sizeof (ip6_t);
3864 /* We assume at least the IPv6 base header is within one mblk. */
3865 ASSERT(mp == NULL ||
3866 (mp->b_rptr <= whereptr && mp->b_wptr >= whereptr + ehdrlen));
3868 rv = ip6h->ip6_dst;
3869 nexthdr = ip6h->ip6_nxt;
3870 if (is_fragment != NULL)
3871 *is_fragment = B_FALSE;
3874 * We also assume (thanks to ipsec_tun_outbound()'s pullup) that
3875 * no extension headers will be split across mblks.
3878 while (nexthdr == IPPROTO_HOPOPTS || nexthdr == IPPROTO_DSTOPTS ||
3879 nexthdr == IPPROTO_ROUTING) {
3880 if (nexthdr == IPPROTO_ROUTING)
3881 rv = pluck_out_dst(current_mp, whereptr, rv);
3884 * All IPv6 extension headers have the next-header in byte
3885 * 0, and the (length - 8) in 8-byte-words.
3887 while (current_mp != NULL &&
3888 whereptr + ehdrlen >= current_mp->b_wptr) {
3889 ehdrlen -= (current_mp->b_wptr - whereptr);
3890 current_mp = current_mp->b_cont;
3891 if (current_mp == NULL) {
3892 /* Bad packet. Return what we can. */
3893 DTRACE_PROBE3(ip_get_dst_v6_badpkt, mblk_t *,
3894 mp, mblk_t *, current_mp, ip6_t *, ip6h);
3895 goto done;
3897 whereptr = current_mp->b_rptr;
3899 whereptr += ehdrlen;
3901 nexthdr = *whereptr;
3902 ASSERT(current_mp == NULL || whereptr + 1 < current_mp->b_wptr);
3903 ehdrlen = (*(whereptr + 1) + 1) * 8;
3906 done:
3907 if (nexthdr == IPPROTO_FRAGMENT && is_fragment != NULL)
3908 *is_fragment = B_TRUE;
3909 return (rv);
3913 * ip_source_routed_v6:
3914 * This function is called by redirect code (called from ip_input_v6) to
3915 * know whether this packet is source routed through this node i.e
3916 * whether this node (router) is part of the journey. This
3917 * function is called under two cases :
3919 * case 1 : Routing header was processed by this node and
3920 * ip_process_rthdr replaced ip6_dst with the next hop
3921 * and we are forwarding the packet to the next hop.
3923 * case 2 : Routing header was not processed by this node and we
3924 * are just forwarding the packet.
3926 * For case (1) we don't want to send redirects. For case(2) we
3927 * want to send redirects.
3929 static boolean_t
3930 ip_source_routed_v6(ip6_t *ip6h, mblk_t *mp, ip_stack_t *ipst)
3932 uint8_t nexthdr;
3933 in6_addr_t *addrptr;
3934 ip6_rthdr0_t *rthdr;
3935 uint8_t numaddr;
3936 ip6_hbh_t *hbhhdr;
3937 uint_t ehdrlen;
3938 uint8_t *byteptr;
3940 ip2dbg(("ip_source_routed_v6\n"));
3941 nexthdr = ip6h->ip6_nxt;
3942 ehdrlen = IPV6_HDR_LEN;
3944 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */
3945 while (nexthdr == IPPROTO_HOPOPTS ||
3946 nexthdr == IPPROTO_DSTOPTS) {
3947 byteptr = (uint8_t *)ip6h + ehdrlen;
3949 * Check if we have already processed
3950 * packets or we are just a forwarding
3951 * router which only pulled up msgs up
3952 * to IPV6HDR and one HBH ext header
3954 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) {
3955 ip2dbg(("ip_source_routed_v6: Extension"
3956 " headers not processed\n"));
3957 return (B_FALSE);
3959 hbhhdr = (ip6_hbh_t *)byteptr;
3960 nexthdr = hbhhdr->ip6h_nxt;
3961 ehdrlen = ehdrlen + 8 * (hbhhdr->ip6h_len + 1);
3963 switch (nexthdr) {
3964 case IPPROTO_ROUTING:
3965 byteptr = (uint8_t *)ip6h + ehdrlen;
3967 * If for some reason, we haven't pulled up
3968 * the routing hdr data mblk, then we must
3969 * not have processed it at all. So for sure
3970 * we are not part of the source routed journey.
3972 if (byteptr + MIN_EHDR_LEN > mp->b_wptr) {
3973 ip2dbg(("ip_source_routed_v6: Routing"
3974 " header not processed\n"));
3975 return (B_FALSE);
3977 rthdr = (ip6_rthdr0_t *)byteptr;
3979 * Either we are an intermediate router or the
3980 * last hop before destination and we have
3981 * already processed the routing header.
3982 * If segment_left is greater than or equal to zero,
3983 * then we must be the (numaddr - segleft) entry
3984 * of the routing header. Although ip6r0_segleft
3985 * is a unit8_t variable, we still check for zero
3986 * or greater value, if in case the data type
3987 * is changed someday in future.
3989 if (rthdr->ip6r0_segleft > 0 ||
3990 rthdr->ip6r0_segleft == 0) {
3991 numaddr = rthdr->ip6r0_len / 2;
3992 addrptr = (in6_addr_t *)((char *)rthdr +
3993 sizeof (*rthdr));
3994 addrptr += (numaddr - (rthdr->ip6r0_segleft + 1));
3995 if (addrptr != NULL) {
3996 if (ip_type_v6(addrptr, ipst) == IRE_LOCAL)
3997 return (B_TRUE);
3998 ip1dbg(("ip_source_routed_v6: Not local\n"));
4001 /* FALLTHRU */
4002 default:
4003 ip2dbg(("ip_source_routed_v6: Not source routed here\n"));
4004 return (B_FALSE);
4009 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation.
4010 * We have not optimized this in terms of number of mblks
4011 * allocated. For instance, for each fragment sent we always allocate a
4012 * mblk to hold the IPv6 header and fragment header.
4014 * Assumes that all the extension headers are contained in the first mblk
4015 * and that the fragment header has has already been added by calling
4016 * ip_fraghdr_add_v6.
4019 ip_fragment_v6(mblk_t *mp, nce_t *nce, iaflags_t ixaflags, uint_t pkt_len,
4020 uint32_t max_frag, uint32_t xmit_hint, zoneid_t szone, zoneid_t nolzid,
4021 pfirepostfrag_t postfragfn, uintptr_t *ixa_cookie)
4023 ip6_t *ip6h = (ip6_t *)mp->b_rptr;
4024 ip6_t *fip6h;
4025 mblk_t *hmp;
4026 mblk_t *hmp0;
4027 mblk_t *dmp;
4028 ip6_frag_t *fraghdr;
4029 size_t unfragmentable_len;
4030 size_t mlen;
4031 size_t max_chunk;
4032 uint16_t off_flags;
4033 uint16_t offset = 0;
4034 ill_t *ill = nce->nce_ill;
4035 uint8_t nexthdr;
4036 uint8_t *ptr;
4037 ip_stack_t *ipst = ill->ill_ipst;
4038 uint_t priority = mp->b_band;
4039 int error = 0;
4041 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragReqds);
4042 if (max_frag == 0) {
4043 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4044 ip_drop_output("FragFails: zero max_frag", mp, ill);
4045 freemsg(mp);
4046 return (EINVAL);
4050 * Caller should have added fraghdr_t to pkt_len, and also
4051 * updated ip6_plen.
4053 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == pkt_len);
4054 ASSERT(msgdsize(mp) == pkt_len);
4057 * Determine the length of the unfragmentable portion of this
4058 * datagram. This consists of the IPv6 header, a potential
4059 * hop-by-hop options header, a potential pre-routing-header
4060 * destination options header, and a potential routing header.
4062 nexthdr = ip6h->ip6_nxt;
4063 ptr = (uint8_t *)&ip6h[1];
4065 if (nexthdr == IPPROTO_HOPOPTS) {
4066 ip6_hbh_t *hbh_hdr;
4067 uint_t hdr_len;
4069 hbh_hdr = (ip6_hbh_t *)ptr;
4070 hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
4071 nexthdr = hbh_hdr->ip6h_nxt;
4072 ptr += hdr_len;
4074 if (nexthdr == IPPROTO_DSTOPTS) {
4075 ip6_dest_t *dest_hdr;
4076 uint_t hdr_len;
4078 dest_hdr = (ip6_dest_t *)ptr;
4079 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) {
4080 hdr_len = 8 * (dest_hdr->ip6d_len + 1);
4081 nexthdr = dest_hdr->ip6d_nxt;
4082 ptr += hdr_len;
4085 if (nexthdr == IPPROTO_ROUTING) {
4086 ip6_rthdr_t *rthdr;
4087 uint_t hdr_len;
4089 rthdr = (ip6_rthdr_t *)ptr;
4090 nexthdr = rthdr->ip6r_nxt;
4091 hdr_len = 8 * (rthdr->ip6r_len + 1);
4092 ptr += hdr_len;
4094 if (nexthdr != IPPROTO_FRAGMENT) {
4095 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4096 ip_drop_output("FragFails: bad nexthdr", mp, ill);
4097 freemsg(mp);
4098 return (EINVAL);
4100 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h);
4101 unfragmentable_len += sizeof (ip6_frag_t);
4103 max_chunk = (max_frag - unfragmentable_len) & ~7;
4106 * Allocate an mblk with enough room for the link-layer
4107 * header and the unfragmentable part of the datagram, which includes
4108 * the fragment header. This (or a copy) will be used as the
4109 * first mblk for each fragment we send.
4111 hmp = allocb_tmpl(unfragmentable_len + ipst->ips_ip_wroff_extra, mp);
4112 if (hmp == NULL) {
4113 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4114 ip_drop_output("FragFails: no hmp", mp, ill);
4115 freemsg(mp);
4116 return (ENOBUFS);
4118 hmp->b_rptr += ipst->ips_ip_wroff_extra;
4119 hmp->b_wptr = hmp->b_rptr + unfragmentable_len;
4121 fip6h = (ip6_t *)hmp->b_rptr;
4122 bcopy(ip6h, fip6h, unfragmentable_len);
4125 * pkt_len is set to the total length of the fragmentable data in this
4126 * datagram. For each fragment sent, we will decrement pkt_len
4127 * by the amount of fragmentable data sent in that fragment
4128 * until len reaches zero.
4130 pkt_len -= unfragmentable_len;
4133 * Move read ptr past unfragmentable portion, we don't want this part
4134 * of the data in our fragments.
4136 mp->b_rptr += unfragmentable_len;
4137 if (mp->b_rptr == mp->b_wptr) {
4138 mblk_t *mp1 = mp->b_cont;
4139 freeb(mp);
4140 mp = mp1;
4143 while (pkt_len != 0) {
4144 mlen = MIN(pkt_len, max_chunk);
4145 pkt_len -= mlen;
4146 if (pkt_len != 0) {
4147 /* Not last */
4148 hmp0 = copyb(hmp);
4149 if (hmp0 == NULL) {
4150 BUMP_MIB(ill->ill_ip_mib,
4151 ipIfStatsOutFragFails);
4152 ip_drop_output("FragFails: copyb failed",
4153 mp, ill);
4154 freeb(hmp);
4155 freemsg(mp);
4156 ip1dbg(("ip_fragment_v6: copyb failed\n"));
4157 return (ENOBUFS);
4159 off_flags = IP6F_MORE_FRAG;
4160 } else {
4161 /* Last fragment */
4162 hmp0 = hmp;
4163 hmp = NULL;
4164 off_flags = 0;
4166 fip6h = (ip6_t *)(hmp0->b_rptr);
4167 fraghdr = (ip6_frag_t *)(hmp0->b_rptr + unfragmentable_len -
4168 sizeof (ip6_frag_t));
4170 fip6h->ip6_plen = htons((uint16_t)(mlen +
4171 unfragmentable_len - IPV6_HDR_LEN));
4173 * Note: Optimization alert.
4174 * In IPv6 (and IPv4) protocol header, Fragment Offset
4175 * ("offset") is 13 bits wide and in 8-octet units.
4176 * In IPv6 protocol header (unlike IPv4) in a 16 bit field,
4177 * it occupies the most significant 13 bits.
4178 * (least significant 13 bits in IPv4).
4179 * We do not do any shifts here. Not shifting is same effect
4180 * as taking offset value in octet units, dividing by 8 and
4181 * then shifting 3 bits left to line it up in place in proper
4182 * place protocol header.
4184 fraghdr->ip6f_offlg = htons(offset) | off_flags;
4186 if (!(dmp = ip_carve_mp(&mp, mlen))) {
4187 /* mp has already been freed by ip_carve_mp() */
4188 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4189 ip_drop_output("FragFails: could not carve mp",
4190 hmp0, ill);
4191 if (hmp != NULL)
4192 freeb(hmp);
4193 freeb(hmp0);
4194 ip1dbg(("ip_carve_mp: failed\n"));
4195 return (ENOBUFS);
4197 hmp0->b_cont = dmp;
4198 /* Get the priority marking, if any */
4199 hmp0->b_band = priority;
4201 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragCreates);
4203 error = postfragfn(hmp0, nce, ixaflags,
4204 mlen + unfragmentable_len, xmit_hint, szone, nolzid,
4205 ixa_cookie);
4206 if (error != 0 && error != EWOULDBLOCK && hmp != NULL) {
4207 /* No point in sending the other fragments */
4208 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragFails);
4209 ip_drop_output("FragFails: postfragfn failed",
4210 hmp, ill);
4211 freeb(hmp);
4212 freemsg(mp);
4213 return (error);
4215 /* No need to redo state machine in loop */
4216 ixaflags &= ~IXAF_REACH_CONF;
4218 offset += mlen;
4220 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutFragOKs);
4221 return (error);
4225 * Add a fragment header to an IPv6 packet.
4226 * Assumes that all the extension headers are contained in the first mblk.
4228 * The fragment header is inserted after an hop-by-hop options header
4229 * and after [an optional destinations header followed by] a routing header.
4231 mblk_t *
4232 ip_fraghdr_add_v6(mblk_t *mp, uint32_t ident, ip_xmit_attr_t *ixa)
4234 ip6_t *ip6h = (ip6_t *)mp->b_rptr;
4235 ip6_t *fip6h;
4236 mblk_t *hmp;
4237 ip6_frag_t *fraghdr;
4238 size_t unfragmentable_len;
4239 uint8_t nexthdr;
4240 uint_t prev_nexthdr_offset;
4241 uint8_t *ptr;
4242 uint_t priority = mp->b_band;
4243 ip_stack_t *ipst = ixa->ixa_ipst;
4246 * Determine the length of the unfragmentable portion of this
4247 * datagram. This consists of the IPv6 header, a potential
4248 * hop-by-hop options header, a potential pre-routing-header
4249 * destination options header, and a potential routing header.
4251 nexthdr = ip6h->ip6_nxt;
4252 prev_nexthdr_offset = (uint8_t *)&ip6h->ip6_nxt - (uint8_t *)ip6h;
4253 ptr = (uint8_t *)&ip6h[1];
4255 if (nexthdr == IPPROTO_HOPOPTS) {
4256 ip6_hbh_t *hbh_hdr;
4257 uint_t hdr_len;
4259 hbh_hdr = (ip6_hbh_t *)ptr;
4260 hdr_len = 8 * (hbh_hdr->ip6h_len + 1);
4261 nexthdr = hbh_hdr->ip6h_nxt;
4262 prev_nexthdr_offset = (uint8_t *)&hbh_hdr->ip6h_nxt
4263 - (uint8_t *)ip6h;
4264 ptr += hdr_len;
4266 if (nexthdr == IPPROTO_DSTOPTS) {
4267 ip6_dest_t *dest_hdr;
4268 uint_t hdr_len;
4270 dest_hdr = (ip6_dest_t *)ptr;
4271 if (dest_hdr->ip6d_nxt == IPPROTO_ROUTING) {
4272 hdr_len = 8 * (dest_hdr->ip6d_len + 1);
4273 nexthdr = dest_hdr->ip6d_nxt;
4274 prev_nexthdr_offset = (uint8_t *)&dest_hdr->ip6d_nxt
4275 - (uint8_t *)ip6h;
4276 ptr += hdr_len;
4279 if (nexthdr == IPPROTO_ROUTING) {
4280 ip6_rthdr_t *rthdr;
4281 uint_t hdr_len;
4283 rthdr = (ip6_rthdr_t *)ptr;
4284 nexthdr = rthdr->ip6r_nxt;
4285 prev_nexthdr_offset = (uint8_t *)&rthdr->ip6r_nxt
4286 - (uint8_t *)ip6h;
4287 hdr_len = 8 * (rthdr->ip6r_len + 1);
4288 ptr += hdr_len;
4290 unfragmentable_len = (uint_t)(ptr - (uint8_t *)ip6h);
4293 * Allocate an mblk with enough room for the link-layer
4294 * header, the unfragmentable part of the datagram, and the
4295 * fragment header.
4297 hmp = allocb_tmpl(unfragmentable_len + sizeof (ip6_frag_t) +
4298 ipst->ips_ip_wroff_extra, mp);
4299 if (hmp == NULL) {
4300 ill_t *ill = ixa->ixa_nce->nce_ill;
4302 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
4303 ip_drop_output("ipIfStatsOutDiscards: allocb failure", mp, ill);
4304 freemsg(mp);
4305 return (NULL);
4307 hmp->b_rptr += ipst->ips_ip_wroff_extra;
4308 hmp->b_wptr = hmp->b_rptr + unfragmentable_len + sizeof (ip6_frag_t);
4310 fip6h = (ip6_t *)hmp->b_rptr;
4311 fraghdr = (ip6_frag_t *)(hmp->b_rptr + unfragmentable_len);
4313 bcopy(ip6h, fip6h, unfragmentable_len);
4314 fip6h->ip6_plen = htons(ntohs(fip6h->ip6_plen) + sizeof (ip6_frag_t));
4315 hmp->b_rptr[prev_nexthdr_offset] = IPPROTO_FRAGMENT;
4317 fraghdr->ip6f_nxt = nexthdr;
4318 fraghdr->ip6f_reserved = 0;
4319 fraghdr->ip6f_offlg = 0;
4320 fraghdr->ip6f_ident = htonl(ident);
4322 /* Get the priority marking, if any */
4323 hmp->b_band = priority;
4326 * Move read ptr past unfragmentable portion, we don't want this part
4327 * of the data in our fragments.
4329 mp->b_rptr += unfragmentable_len;
4330 hmp->b_cont = mp;
4331 return (hmp);
4335 * Determine if the ill and multicast aspects of that packets
4336 * "matches" the conn.
4338 boolean_t
4339 conn_wantpacket_v6(conn_t *connp, ip_recv_attr_t *ira, ip6_t *ip6h)
4341 ill_t *ill = ira->ira_rill;
4342 zoneid_t zoneid = ira->ira_zoneid;
4343 uint_t in_ifindex;
4344 in6_addr_t *v6dst_ptr = &ip6h->ip6_dst;
4345 in6_addr_t *v6src_ptr = &ip6h->ip6_src;
4348 * conn_incoming_ifindex is set by IPV6_BOUND_IF and as link-local
4349 * scopeid. This is used to limit
4350 * unicast and multicast reception to conn_incoming_ifindex.
4351 * conn_wantpacket_v6 is called both for unicast and
4352 * multicast packets.
4354 in_ifindex = connp->conn_incoming_ifindex;
4356 /* mpathd can bind to the under IPMP interface, which we allow */
4357 if (in_ifindex != 0 && in_ifindex != ill->ill_phyint->phyint_ifindex) {
4358 if (!IS_UNDER_IPMP(ill))
4359 return (B_FALSE);
4361 if (in_ifindex != ipmp_ill_get_ipmp_ifindex(ill))
4362 return (B_FALSE);
4365 if (!IPCL_ZONE_MATCH(connp, zoneid))
4366 return (B_FALSE);
4368 if (!(ira->ira_flags & IRAF_MULTICAST))
4369 return (B_TRUE);
4371 if (connp->conn_multi_router)
4372 return (B_TRUE);
4374 if (ira->ira_protocol == IPPROTO_RSVP)
4375 return (B_TRUE);
4377 return (conn_hasmembers_ill_withsrc_v6(connp, v6dst_ptr, v6src_ptr,
4378 ira->ira_ill));
4382 * pr_addr_dbg function provides the needed buffer space to call
4383 * inet_ntop() function's 3rd argument. This function should be
4384 * used by any kernel routine which wants to save INET6_ADDRSTRLEN
4385 * stack buffer space in it's own stack frame. This function uses
4386 * a buffer from it's own stack and prints the information.
4387 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr)
4389 * Note: This function can call inet_ntop() once.
4391 void
4392 pr_addr_dbg(char *fmt1, int af, const void *addr)
4394 char buf[INET6_ADDRSTRLEN];
4396 if (fmt1 == NULL) {
4397 ip0dbg(("pr_addr_dbg: Wrong arguments\n"));
4398 return;
4402 * This does not compare debug level and just prints
4403 * out. Thus it is the responsibility of the caller
4404 * to check the appropriate debug-level before calling
4405 * this function.
4407 if (ip_debug > 0) {
4408 printf(fmt1, inet_ntop(af, addr, buf, sizeof (buf)));
4416 * Return the length in bytes of the IPv6 headers (base header
4417 * extension headers) that will be needed based on the
4418 * ip_pkt_t structure passed by the caller.
4420 * The returned length does not include the length of the upper level
4421 * protocol (ULP) header.
4424 ip_total_hdrs_len_v6(const ip_pkt_t *ipp)
4426 int len;
4428 len = IPV6_HDR_LEN;
4430 if (ipp->ipp_fields & IPPF_HOPOPTS) {
4431 ASSERT(ipp->ipp_hopoptslen != 0);
4432 len += ipp->ipp_hopoptslen;
4436 * En-route destination options
4437 * Only do them if there's a routing header as well
4439 if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) ==
4440 (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) {
4441 ASSERT(ipp->ipp_rthdrdstoptslen != 0);
4442 len += ipp->ipp_rthdrdstoptslen;
4444 if (ipp->ipp_fields & IPPF_RTHDR) {
4445 ASSERT(ipp->ipp_rthdrlen != 0);
4446 len += ipp->ipp_rthdrlen;
4448 if (ipp->ipp_fields & IPPF_DSTOPTS) {
4449 ASSERT(ipp->ipp_dstoptslen != 0);
4450 len += ipp->ipp_dstoptslen;
4452 return (len);
4456 * All-purpose routine to build a header chain of an IPv6 header
4457 * followed by any required extension headers and a proto header.
4459 * The caller has to set the source and destination address as well as
4460 * ip6_plen. The caller has to massage any routing header and compensate
4461 * for the ULP pseudo-header checksum due to the source route.
4463 * The extension headers will all be fully filled in.
4465 void
4466 ip_build_hdrs_v6(uchar_t *buf, uint_t buf_len, const ip_pkt_t *ipp,
4467 uint8_t protocol, uint32_t flowinfo)
4469 uint8_t *nxthdr_ptr;
4470 uint8_t *cp;
4471 ip6_t *ip6h = (ip6_t *)buf;
4473 /* Initialize IPv6 header */
4474 ip6h->ip6_vcf =
4475 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
4476 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
4478 if (ipp->ipp_fields & IPPF_TCLASS) {
4479 /* Overrides the class part of flowinfo */
4480 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
4481 ipp->ipp_tclass);
4484 if (ipp->ipp_fields & IPPF_HOPLIMIT)
4485 ip6h->ip6_hops = ipp->ipp_hoplimit;
4486 else
4487 ip6h->ip6_hops = ipp->ipp_unicast_hops;
4489 if ((ipp->ipp_fields & IPPF_ADDR) &&
4490 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
4491 ip6h->ip6_src = ipp->ipp_addr;
4493 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt;
4494 cp = (uint8_t *)&ip6h[1];
4496 * Here's where we have to start stringing together
4497 * any extension headers in the right order:
4498 * Hop-by-hop, destination, routing, and final destination opts.
4500 if (ipp->ipp_fields & IPPF_HOPOPTS) {
4501 /* Hop-by-hop options */
4502 ip6_hbh_t *hbh = (ip6_hbh_t *)cp;
4504 *nxthdr_ptr = IPPROTO_HOPOPTS;
4505 nxthdr_ptr = &hbh->ip6h_nxt;
4507 bcopy(ipp->ipp_hopopts, cp, ipp->ipp_hopoptslen);
4508 cp += ipp->ipp_hopoptslen;
4511 * En-route destination options
4512 * Only do them if there's a routing header as well
4514 if ((ipp->ipp_fields & (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) ==
4515 (IPPF_RTHDRDSTOPTS|IPPF_RTHDR)) {
4516 ip6_dest_t *dst = (ip6_dest_t *)cp;
4518 *nxthdr_ptr = IPPROTO_DSTOPTS;
4519 nxthdr_ptr = &dst->ip6d_nxt;
4521 bcopy(ipp->ipp_rthdrdstopts, cp, ipp->ipp_rthdrdstoptslen);
4522 cp += ipp->ipp_rthdrdstoptslen;
4525 * Routing header next
4527 if (ipp->ipp_fields & IPPF_RTHDR) {
4528 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp;
4530 *nxthdr_ptr = IPPROTO_ROUTING;
4531 nxthdr_ptr = &rt->ip6r_nxt;
4533 bcopy(ipp->ipp_rthdr, cp, ipp->ipp_rthdrlen);
4534 cp += ipp->ipp_rthdrlen;
4537 * Do ultimate destination options
4539 if (ipp->ipp_fields & IPPF_DSTOPTS) {
4540 ip6_dest_t *dest = (ip6_dest_t *)cp;
4542 *nxthdr_ptr = IPPROTO_DSTOPTS;
4543 nxthdr_ptr = &dest->ip6d_nxt;
4545 bcopy(ipp->ipp_dstopts, cp, ipp->ipp_dstoptslen);
4546 cp += ipp->ipp_dstoptslen;
4549 * Now set the last header pointer to the proto passed in
4551 *nxthdr_ptr = protocol;
4552 ASSERT((int)(cp - buf) == buf_len);
4556 * Return a pointer to the routing header extension header
4557 * in the IPv6 header(s) chain passed in.
4558 * If none found, return NULL
4559 * Assumes that all extension headers are in same mblk as the v6 header
4561 ip6_rthdr_t *
4562 ip_find_rthdr_v6(ip6_t *ip6h, uint8_t *endptr)
4564 ip6_dest_t *desthdr;
4565 ip6_frag_t *fraghdr;
4566 uint_t hdrlen;
4567 uint8_t nexthdr;
4568 uint8_t *ptr = (uint8_t *)&ip6h[1];
4570 if (ip6h->ip6_nxt == IPPROTO_ROUTING)
4571 return ((ip6_rthdr_t *)ptr);
4574 * The routing header will precede all extension headers
4575 * other than the hop-by-hop and destination options
4576 * extension headers, so if we see anything other than those,
4577 * we're done and didn't find it.
4578 * We could see a destination options header alone but no
4579 * routing header, in which case we'll return NULL as soon as
4580 * we see anything after that.
4581 * Hop-by-hop and destination option headers are identical,
4582 * so we can use either one we want as a template.
4584 nexthdr = ip6h->ip6_nxt;
4585 while (ptr < endptr) {
4586 /* Is there enough left for len + nexthdr? */
4587 if (ptr + MIN_EHDR_LEN > endptr)
4588 return (NULL);
4590 switch (nexthdr) {
4591 case IPPROTO_HOPOPTS:
4592 case IPPROTO_DSTOPTS:
4593 /* Assumes the headers are identical for hbh and dst */
4594 desthdr = (ip6_dest_t *)ptr;
4595 hdrlen = 8 * (desthdr->ip6d_len + 1);
4596 nexthdr = desthdr->ip6d_nxt;
4597 break;
4599 case IPPROTO_ROUTING:
4600 return ((ip6_rthdr_t *)ptr);
4602 case IPPROTO_FRAGMENT:
4603 fraghdr = (ip6_frag_t *)ptr;
4604 hdrlen = sizeof (ip6_frag_t);
4605 nexthdr = fraghdr->ip6f_nxt;
4606 break;
4608 default:
4609 return (NULL);
4611 ptr += hdrlen;
4613 return (NULL);
4617 * Called for source-routed packets originating on this node.
4618 * Manipulates the original routing header by moving every entry up
4619 * one slot, placing the first entry in the v6 header's v6_dst field,
4620 * and placing the ultimate destination in the routing header's last
4621 * slot.
4623 * Returns the checksum diference between the ultimate destination
4624 * (last hop in the routing header when the packet is sent) and
4625 * the first hop (ip6_dst when the packet is sent)
4627 /* ARGSUSED2 */
4628 uint32_t
4629 ip_massage_options_v6(ip6_t *ip6h, ip6_rthdr_t *rth, netstack_t *ns)
4631 uint_t numaddr;
4632 uint_t i;
4633 in6_addr_t *addrptr;
4634 in6_addr_t tmp;
4635 ip6_rthdr0_t *rthdr = (ip6_rthdr0_t *)rth;
4636 uint32_t cksm;
4637 uint32_t addrsum = 0;
4638 uint16_t *ptr;
4641 * Perform any processing needed for source routing.
4642 * We know that all extension headers will be in the same mblk
4643 * as the IPv6 header.
4647 * If no segments left in header, or the header length field is zero,
4648 * don't move hop addresses around;
4649 * Checksum difference is zero.
4651 if ((rthdr->ip6r0_segleft == 0) || (rthdr->ip6r0_len == 0))
4652 return (0);
4654 ptr = (uint16_t *)&ip6h->ip6_dst;
4655 cksm = 0;
4656 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
4657 cksm += ptr[i];
4659 cksm = (cksm & 0xFFFF) + (cksm >> 16);
4662 * Here's where the fun begins - we have to
4663 * move all addresses up one spot, take the
4664 * first hop and make it our first ip6_dst,
4665 * and place the ultimate destination in the
4666 * newly-opened last slot.
4668 addrptr = (in6_addr_t *)((char *)rthdr + sizeof (*rthdr));
4669 numaddr = rthdr->ip6r0_len / 2;
4670 tmp = *addrptr;
4671 for (i = 0; i < (numaddr - 1); addrptr++, i++) {
4672 *addrptr = addrptr[1];
4674 *addrptr = ip6h->ip6_dst;
4675 ip6h->ip6_dst = tmp;
4678 * From the checksummed ultimate destination subtract the checksummed
4679 * current ip6_dst (the first hop address). Return that number.
4680 * (In the v4 case, the second part of this is done in each routine
4681 * that calls ip_massage_options(). We do it all in this one place
4682 * for v6).
4684 ptr = (uint16_t *)&ip6h->ip6_dst;
4685 for (i = 0; i < (sizeof (in6_addr_t) / sizeof (uint16_t)); i++) {
4686 addrsum += ptr[i];
4688 cksm -= ((addrsum >> 16) + (addrsum & 0xFFFF));
4689 if ((int)cksm < 0)
4690 cksm--;
4691 cksm = (cksm & 0xFFFF) + (cksm >> 16);
4693 return (cksm);
4696 void
4697 *ip6_kstat_init(netstackid_t stackid, ip6_stat_t *ip6_statisticsp)
4699 kstat_t *ksp;
4701 ip6_stat_t template = {
4702 { "ip6_udp_fannorm", KSTAT_DATA_UINT64 },
4703 { "ip6_udp_fanmb", KSTAT_DATA_UINT64 },
4704 { "ip6_recv_pullup", KSTAT_DATA_UINT64 },
4705 { "ip6_db_ref", KSTAT_DATA_UINT64 },
4706 { "ip6_notaligned", KSTAT_DATA_UINT64 },
4707 { "ip6_multimblk", KSTAT_DATA_UINT64 },
4708 { "ipsec_proto_ahesp", KSTAT_DATA_UINT64 },
4709 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64 },
4710 { "ip6_out_sw_cksum_bytes", KSTAT_DATA_UINT64 },
4711 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64 },
4712 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 },
4713 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 },
4714 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64 },
4715 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64 },
4716 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64 },
4717 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64 },
4719 ksp = kstat_create_netstack("ip", 0, "ip6stat", "net",
4720 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t),
4721 KSTAT_FLAG_VIRTUAL, stackid);
4723 if (ksp == NULL)
4724 return (NULL);
4726 bcopy(&template, ip6_statisticsp, sizeof (template));
4727 ksp->ks_data = (void *)ip6_statisticsp;
4728 ksp->ks_private = (void *)(uintptr_t)stackid;
4730 kstat_install(ksp);
4731 return (ksp);
4734 void
4735 ip6_kstat_fini(netstackid_t stackid, kstat_t *ksp)
4737 if (ksp != NULL) {
4738 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
4739 kstat_delete_netstack(ksp, stackid);
4744 * The following two functions set and get the value for the
4745 * IPV6_SRC_PREFERENCES socket option.
4748 ip6_set_src_preferences(ip_xmit_attr_t *ixa, uint32_t prefs)
4751 * We only support preferences that are covered by
4752 * IPV6_PREFER_SRC_MASK.
4754 if (prefs & ~IPV6_PREFER_SRC_MASK)
4755 return (EINVAL);
4758 * Look for conflicting preferences or default preferences. If
4759 * both bits of a related pair are clear, the application wants the
4760 * system's default value for that pair. Both bits in a pair can't
4761 * be set.
4763 if ((prefs & IPV6_PREFER_SRC_MIPMASK) == 0) {
4764 prefs |= IPV6_PREFER_SRC_MIPDEFAULT;
4765 } else if ((prefs & IPV6_PREFER_SRC_MIPMASK) ==
4766 IPV6_PREFER_SRC_MIPMASK) {
4767 return (EINVAL);
4769 if ((prefs & IPV6_PREFER_SRC_TMPMASK) == 0) {
4770 prefs |= IPV6_PREFER_SRC_TMPDEFAULT;
4771 } else if ((prefs & IPV6_PREFER_SRC_TMPMASK) ==
4772 IPV6_PREFER_SRC_TMPMASK) {
4773 return (EINVAL);
4775 if ((prefs & IPV6_PREFER_SRC_CGAMASK) == 0) {
4776 prefs |= IPV6_PREFER_SRC_CGADEFAULT;
4777 } else if ((prefs & IPV6_PREFER_SRC_CGAMASK) ==
4778 IPV6_PREFER_SRC_CGAMASK) {
4779 return (EINVAL);
4782 ixa->ixa_src_preferences = prefs;
4783 return (0);
4786 size_t
4787 ip6_get_src_preferences(ip_xmit_attr_t *ixa, uint32_t *val)
4789 *val = ixa->ixa_src_preferences;
4790 return (sizeof (ixa->ixa_src_preferences));
4794 * Get the size of the IP options (including the IP headers size)
4795 * without including the AH header's size. If till_ah is B_FALSE,
4796 * and if AH header is present, dest options beyond AH header will
4797 * also be included in the returned size.
4800 ipsec_ah_get_hdr_size_v6(mblk_t *mp, boolean_t till_ah)
4802 ip6_t *ip6h;
4803 uint8_t nexthdr;
4804 uint8_t *whereptr;
4805 ip6_hbh_t *hbhhdr;
4806 ip6_dest_t *dsthdr;
4807 ip6_rthdr_t *rthdr;
4808 int ehdrlen;
4809 int size;
4810 ah_t *ah;
4812 ip6h = (ip6_t *)mp->b_rptr;
4813 size = IPV6_HDR_LEN;
4814 nexthdr = ip6h->ip6_nxt;
4815 whereptr = (uint8_t *)&ip6h[1];
4816 for (;;) {
4817 /* Assume IP has already stripped it */
4818 ASSERT(nexthdr != IPPROTO_FRAGMENT);
4819 switch (nexthdr) {
4820 case IPPROTO_HOPOPTS:
4821 hbhhdr = (ip6_hbh_t *)whereptr;
4822 nexthdr = hbhhdr->ip6h_nxt;
4823 ehdrlen = 8 * (hbhhdr->ip6h_len + 1);
4824 break;
4825 case IPPROTO_DSTOPTS:
4826 dsthdr = (ip6_dest_t *)whereptr;
4827 nexthdr = dsthdr->ip6d_nxt;
4828 ehdrlen = 8 * (dsthdr->ip6d_len + 1);
4829 break;
4830 case IPPROTO_ROUTING:
4831 rthdr = (ip6_rthdr_t *)whereptr;
4832 nexthdr = rthdr->ip6r_nxt;
4833 ehdrlen = 8 * (rthdr->ip6r_len + 1);
4834 break;
4835 default :
4836 if (till_ah) {
4837 ASSERT(nexthdr == IPPROTO_AH);
4838 return (size);
4841 * If we don't have a AH header to traverse,
4842 * return now. This happens normally for
4843 * outbound datagrams where we have not inserted
4844 * the AH header.
4846 if (nexthdr != IPPROTO_AH) {
4847 return (size);
4851 * We don't include the AH header's size
4852 * to be symmetrical with other cases where
4853 * we either don't have a AH header (outbound)
4854 * or peek into the AH header yet (inbound and
4855 * not pulled up yet).
4857 ah = (ah_t *)whereptr;
4858 nexthdr = ah->ah_nexthdr;
4859 ehdrlen = (ah->ah_length << 2) + 8;
4861 if (nexthdr == IPPROTO_DSTOPTS) {
4862 if (whereptr + ehdrlen >= mp->b_wptr) {
4864 * The destination options header
4865 * is not part of the first mblk.
4867 whereptr = mp->b_cont->b_rptr;
4868 } else {
4869 whereptr += ehdrlen;
4872 dsthdr = (ip6_dest_t *)whereptr;
4873 ehdrlen = 8 * (dsthdr->ip6d_len + 1);
4874 size += ehdrlen;
4876 return (size);
4878 whereptr += ehdrlen;
4879 size += ehdrlen;
4884 * Utility routine that checks if `v6srcp' is a valid address on underlying
4885 * interface `ill'. If `ipifp' is non-NULL, it's set to a held ipif
4886 * associated with `v6srcp' on success. NOTE: if this is not called from
4887 * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the
4888 * group during or after this lookup.
4890 boolean_t
4891 ipif_lookup_testaddr_v6(ill_t *ill, const in6_addr_t *v6srcp, ipif_t **ipifp)
4893 ipif_t *ipif;
4896 ipif = ipif_lookup_addr_exact_v6(v6srcp, ill, ill->ill_ipst);
4897 if (ipif != NULL) {
4898 if (ipifp != NULL)
4899 *ipifp = ipif;
4900 else
4901 ipif_refrele(ipif);
4902 return (B_TRUE);
4905 if (ip_debug > 2) {
4906 pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for "
4907 "src %s\n", AF_INET6, v6srcp);
4909 return (B_FALSE);