4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 1990 Mentat Inc.
26 #include <sys/types.h>
27 #include <sys/stream.h>
29 #include <sys/stropts.h>
30 #include <sys/sysmacros.h>
31 #include <sys/strsun.h>
32 #include <sys/strlog.h>
33 #include <sys/strsubr.h>
34 #define _SUN_TPI_VERSION 2
35 #include <sys/tihdr.h>
37 #include <sys/sunddi.h>
38 #include <sys/cmn_err.h>
39 #include <sys/debug.h>
47 #include <sys/systm.h>
48 #include <sys/param.h>
49 #include <sys/socket.h>
50 #include <sys/vtrace.h>
51 #include <sys/isa_defs.h>
52 #include <sys/atomic.h>
53 #include <sys/policy.h>
56 #include <net/if_types.h>
57 #include <net/route.h>
58 #include <net/if_dl.h>
59 #include <sys/sockio.h>
60 #include <netinet/in.h>
61 #include <netinet/ip6.h>
62 #include <netinet/icmp6.h>
63 #include <netinet/sctp.h>
65 #include <inet/common.h>
67 #include <inet/optcom.h>
68 #include <inet/mib2.h>
73 #include <inet/ip_impl.h>
75 #include <inet/ip6_asp.h>
77 #include <inet/tcp_impl.h>
78 #include <inet/udp_impl.h>
79 #include <inet/ipp_common.h>
81 #include <inet/ip_multi.h>
82 #include <inet/ip_if.h>
83 #include <inet/ip_ire.h>
84 #include <inet/ip_rts.h>
85 #include <inet/ip_ndp.h>
86 #include <net/pfkeyv2.h>
87 #include <inet/sadb.h>
88 #include <inet/ipsec_impl.h>
89 #include <inet/iptun/iptun_impl.h>
90 #include <inet/sctp_ip.h>
91 #include <sys/pattr.h>
92 #include <inet/ipclassifier.h>
93 #include <inet/ipsecah.h>
94 #include <inet/rawip_impl.h>
95 #include <inet/rts_impl.h>
96 #include <sys/squeue_impl.h>
97 #include <sys/squeue.h>
99 /* Temporary; for CR 6451644 work-around */
100 #include <sys/ethernet.h>
103 * Naming conventions:
104 * These rules should be judiciously applied
105 * if there is a need to identify something as IPv6 versus IPv4
106 * IPv6 funcions will end with _v6 in the ip module.
107 * IPv6 funcions will end with _ipv6 in the transport modules.
109 * Some macros end with _V6; e.g. ILL_FRAG_HASH_V6
110 * Some macros start with V6_; e.g. V6_OR_V4_INADDR_ANY
111 * And then there are ..V4_PART_OF_V6.
112 * The intent is that macros in the ip module end with _V6.
113 * IPv6 global variables will start with ipv6_
114 * IPv6 structures will start with ipv6
115 * IPv6 defined constants should start with IPV6_
116 * (but then there are NDP_DEFAULT_VERS_PRI_AND_FLOW, etc)
119 const in6_addr_t ipv6_all_ones
=
120 { 0xffffffffU
, 0xffffffffU
, 0xffffffffU
, 0xffffffffU
};
121 const in6_addr_t ipv6_all_zeros
= { 0, 0, 0, 0 };
124 const in6_addr_t ipv6_unspecified_group
= { 0xff000000U
, 0, 0, 0 };
125 #else /* _BIG_ENDIAN */
126 const in6_addr_t ipv6_unspecified_group
= { 0x000000ffU
, 0, 0, 0 };
127 #endif /* _BIG_ENDIAN */
130 const in6_addr_t ipv6_loopback
= { 0, 0, 0, 0x00000001U
};
131 #else /* _BIG_ENDIAN */
132 const in6_addr_t ipv6_loopback
= { 0, 0, 0, 0x01000000U
};
133 #endif /* _BIG_ENDIAN */
136 const in6_addr_t ipv6_all_hosts_mcast
= { 0xff020000U
, 0, 0, 0x00000001U
};
137 #else /* _BIG_ENDIAN */
138 const in6_addr_t ipv6_all_hosts_mcast
= { 0x000002ffU
, 0, 0, 0x01000000U
};
139 #endif /* _BIG_ENDIAN */
142 const in6_addr_t ipv6_all_rtrs_mcast
= { 0xff020000U
, 0, 0, 0x00000002U
};
143 #else /* _BIG_ENDIAN */
144 const in6_addr_t ipv6_all_rtrs_mcast
= { 0x000002ffU
, 0, 0, 0x02000000U
};
145 #endif /* _BIG_ENDIAN */
148 const in6_addr_t ipv6_all_v2rtrs_mcast
= { 0xff020000U
, 0, 0, 0x00000016U
};
149 #else /* _BIG_ENDIAN */
150 const in6_addr_t ipv6_all_v2rtrs_mcast
= { 0x000002ffU
, 0, 0, 0x16000000U
};
151 #endif /* _BIG_ENDIAN */
154 const in6_addr_t ipv6_solicited_node_mcast
=
155 { 0xff020000U
, 0, 0x00000001U
, 0xff000000U
};
156 #else /* _BIG_ENDIAN */
157 const in6_addr_t ipv6_solicited_node_mcast
=
158 { 0x000002ffU
, 0, 0x01000000U
, 0x000000ffU
};
159 #endif /* _BIG_ENDIAN */
161 static boolean_t
icmp_inbound_verify_v6(mblk_t
*, icmp6_t
*, ip_recv_attr_t
*);
162 static void icmp_inbound_too_big_v6(icmp6_t
*, ip_recv_attr_t
*);
163 static void icmp_pkt_v6(mblk_t
*, void *, size_t, const in6_addr_t
*,
165 static void icmp_redirect_v6(mblk_t
*, ip6_t
*, nd_redirect_t
*,
167 static void icmp_send_redirect_v6(mblk_t
*, in6_addr_t
*,
168 in6_addr_t
*, ip_recv_attr_t
*);
169 static void icmp_send_reply_v6(mblk_t
*, ip6_t
*, icmp6_t
*,
171 static boolean_t
ip_source_routed_v6(ip6_t
*, mblk_t
*, ip_stack_t
*);
174 * icmp_inbound_v6 deals with ICMP messages that are handled by IP.
175 * If the ICMP message is consumed by IP, i.e., it should not be delivered
176 * to any IPPROTO_ICMP raw sockets, then it returns NULL.
177 * Likewise, if the ICMP error is misformed (too short, etc), then it
178 * returns NULL. The caller uses this to determine whether or not to send
181 * All error messages are passed to the matching transport stream.
183 * See comment for icmp_inbound_v4() on how IPsec is handled.
186 icmp_inbound_v6(mblk_t
*mp
, ip_recv_attr_t
*ira
)
189 ip6_t
*ip6h
; /* Outer header */
190 int ip_hdr_length
; /* Outer header length */
191 boolean_t interested
;
192 ill_t
*ill
= ira
->ira_ill
;
193 ip_stack_t
*ipst
= ill
->ill_ipst
;
194 mblk_t
*mp_ret
= NULL
;
196 ip6h
= (ip6_t
*)mp
->b_rptr
;
198 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInMsgs
);
200 /* Check for Martian packets */
201 if (IN6_IS_ADDR_MULTICAST(&ip6h
->ip6_src
)) {
202 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInAddrErrors
);
203 ip_drop_input("ipIfStatsInAddrErrors: mcast src", mp
, ill
);
208 /* Make sure ira_l2src is set for ndp_input */
209 if (!(ira
->ira_flags
& IRAF_L2SRC_SET
))
210 ip_setl2src(mp
, ira
, ira
->ira_rill
);
212 ip_hdr_length
= ira
->ira_ip_hdr_length
;
213 if ((mp
->b_wptr
- mp
->b_rptr
) < (ip_hdr_length
+ ICMP6_MINLEN
)) {
214 if (ira
->ira_pktlen
< (ip_hdr_length
+ ICMP6_MINLEN
)) {
215 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInTruncatedPkts
);
216 ip_drop_input("ipIfStatsInTruncatedPkts", mp
, ill
);
220 ip6h
= ip_pullup(mp
, ip_hdr_length
+ ICMP6_MINLEN
, ira
);
222 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInErrors
);
228 icmp6
= (icmp6_t
*)(&mp
->b_rptr
[ip_hdr_length
]);
229 DTRACE_PROBE2(icmp__inbound__v6
, ip6_t
*, ip6h
, icmp6_t
*, icmp6
);
230 ip2dbg(("icmp_inbound_v6: type %d code %d\n", icmp6
->icmp6_type
,
234 * We will set "interested" to "true" if we should pass a copy to
235 * the transport i.e., if it is an error message.
237 interested
= !(icmp6
->icmp6_type
& ICMP6_INFOMSG_MASK
);
239 switch (icmp6
->icmp6_type
) {
240 case ICMP6_DST_UNREACH
:
241 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInDestUnreachs
);
242 if (icmp6
->icmp6_code
== ICMP6_DST_UNREACH_ADMIN
)
243 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInAdminProhibs
);
246 case ICMP6_TIME_EXCEEDED
:
247 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInTimeExcds
);
250 case ICMP6_PARAM_PROB
:
251 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInParmProblems
);
254 case ICMP6_PACKET_TOO_BIG
:
255 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInPktTooBigs
);
258 case ICMP6_ECHO_REQUEST
:
259 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInEchos
);
260 if (IN6_IS_ADDR_MULTICAST(&ip6h
->ip6_dst
) &&
261 !ipst
->ips_ipv6_resp_echo_mcast
)
265 * We must have exclusive use of the mblk to convert it to
267 * If not, we copy it.
269 if (mp
->b_datap
->db_ref
> 1) {
274 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
275 ip_drop_input("ipIfStatsInDiscards - copymsg",
282 ip6h
= (ip6_t
*)mp
->b_rptr
;
283 icmp6
= (icmp6_t
*)(&mp
->b_rptr
[ip_hdr_length
]);
286 icmp6
->icmp6_type
= ICMP6_ECHO_REPLY
;
287 icmp_send_reply_v6(mp
, ip6h
, icmp6
, ira
);
290 case ICMP6_ECHO_REPLY
:
291 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInEchoReplies
);
294 case ND_ROUTER_SOLICIT
:
295 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInRouterSolicits
);
298 case ND_ROUTER_ADVERT
:
299 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInRouterAdvertisements
);
302 case ND_NEIGHBOR_SOLICIT
:
303 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInNeighborSolicits
);
307 case ND_NEIGHBOR_ADVERT
:
308 BUMP_MIB(ill
->ill_icmp6_mib
,
309 ipv6IfIcmpInNeighborAdvertisements
);
314 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInRedirects
);
316 if (ipst
->ips_ipv6_ignore_redirect
)
319 /* We now allow a RAW socket to receive this. */
324 * The next three icmp messages will be handled by MLD.
325 * Pass all valid MLD packets up to any process(es)
326 * listening on a raw ICMP socket.
328 case MLD_LISTENER_QUERY
:
329 case MLD_LISTENER_REPORT
:
330 case MLD_LISTENER_REDUCTION
:
331 mp
= mld_input(mp
, ira
);
337 * See if there is an ICMP client to avoid an extra copymsg/freemsg
338 * if there isn't one.
340 if (ipst
->ips_ipcl_proto_fanout_v6
[IPPROTO_ICMPV6
].connf_head
!= NULL
) {
341 /* If there is an ICMP client and we want one too, copy it. */
344 /* Caller will deliver to RAW sockets */
347 mp_ret
= copymsg(mp
);
348 if (mp_ret
== NULL
) {
349 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
350 ip_drop_input("ipIfStatsInDiscards - copymsg", mp
, ill
);
352 } else if (!interested
) {
353 /* Neither we nor raw sockets are interested. Drop packet now */
359 * ICMP error or redirect packet. Make sure we have enough of
360 * the header and that db_ref == 1 since we might end up modifying
363 if (mp
->b_cont
!= NULL
) {
364 if (ip_pullup(mp
, -1, ira
) == NULL
) {
365 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
366 ip_drop_input("ipIfStatsInDiscards - ip_pullup",
373 if (mp
->b_datap
->db_ref
> 1) {
378 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
379 ip_drop_input("ipIfStatsInDiscards - copymsg", mp
, ill
);
388 * In case mp has changed, verify the message before any further
391 ip6h
= (ip6_t
*)mp
->b_rptr
;
392 icmp6
= (icmp6_t
*)(&mp
->b_rptr
[ip_hdr_length
]);
393 if (!icmp_inbound_verify_v6(mp
, icmp6
, ira
)) {
398 switch (icmp6
->icmp6_type
) {
400 icmp_redirect_v6(mp
, ip6h
, (nd_redirect_t
*)icmp6
, ira
);
402 case ICMP6_PACKET_TOO_BIG
:
403 /* Update DCE and adjust MTU is icmp header if needed */
404 icmp_inbound_too_big_v6(icmp6
, ira
);
407 icmp_inbound_error_fanout_v6(mp
, icmp6
, ira
);
415 * Send an ICMP echo reply.
416 * The caller has already updated the payload part of the packet.
417 * We handle the ICMP checksum, IP source address selection and feed
418 * the packet into ip_output_simple.
421 icmp_send_reply_v6(mblk_t
*mp
, ip6_t
*ip6h
, icmp6_t
*icmp6
,
424 uint_t ip_hdr_length
= ira
->ira_ip_hdr_length
;
425 ill_t
*ill
= ira
->ira_ill
;
426 ip_stack_t
*ipst
= ill
->ill_ipst
;
431 * Remove any extension headers (do not reverse a source route)
432 * and clear the flow id (keep traffic class for now).
434 if (ip_hdr_length
!= IPV6_HDR_LEN
) {
437 for (i
= 0; i
< IPV6_HDR_LEN
; i
++) {
438 mp
->b_rptr
[ip_hdr_length
- i
- 1] =
439 mp
->b_rptr
[IPV6_HDR_LEN
- i
- 1];
441 mp
->b_rptr
+= (ip_hdr_length
- IPV6_HDR_LEN
);
442 ip6h
= (ip6_t
*)mp
->b_rptr
;
443 ip6h
->ip6_nxt
= IPPROTO_ICMPV6
;
444 i
= ntohs(ip6h
->ip6_plen
);
445 i
-= (ip_hdr_length
- IPV6_HDR_LEN
);
446 ip6h
->ip6_plen
= htons(i
);
447 ip_hdr_length
= IPV6_HDR_LEN
;
448 ASSERT(ntohs(ip6h
->ip6_plen
) + IPV6_HDR_LEN
== msgdsize(mp
));
450 ip6h
->ip6_vcf
&= ~IPV6_FLOWINFO_FLOWLABEL
;
452 /* Reverse the source and destination addresses. */
453 origsrc
= ip6h
->ip6_src
;
454 ip6h
->ip6_src
= ip6h
->ip6_dst
;
455 ip6h
->ip6_dst
= origsrc
;
457 /* set the hop limit */
458 ip6h
->ip6_hops
= ipst
->ips_ipv6_def_hops
;
461 * Prepare for checksum by putting icmp length in the icmp
462 * checksum field. The checksum is calculated in ip_output
464 icmp6
->icmp6_cksum
= ip6h
->ip6_plen
;
466 bzero(&ixas
, sizeof (ixas
));
467 ixas
.ixa_flags
= IXAF_BASIC_SIMPLE_V6
;
468 ixas
.ixa_zoneid
= ira
->ira_zoneid
;
469 ixas
.ixa_cred
= kcred
;
470 ixas
.ixa_cpid
= NOPID
;
471 ixas
.ixa_ifindex
= 0;
472 ixas
.ixa_ipst
= ipst
;
473 ixas
.ixa_multicast_ttl
= IP_DEFAULT_MULTICAST_TTL
;
475 if (!(ira
->ira_flags
& IRAF_IPSEC_SECURE
)) {
477 * This packet should go out the same way as it
478 * came in i.e in clear, independent of the IPsec
479 * policy for transmitting packets.
481 ixas
.ixa_flags
|= IXAF_NO_IPSEC
;
483 if (!ipsec_in_to_out(ira
, &ixas
, mp
, NULL
, ip6h
)) {
484 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
485 /* Note: mp already consumed and ip_drop_packet done */
490 /* Was the destination (now source) link-local? Send out same group */
491 if (IN6_IS_ADDR_LINKSCOPE(&ip6h
->ip6_src
)) {
492 ixas
.ixa_flags
|= IXAF_SCOPEID_SET
;
493 if (IS_UNDER_IPMP(ill
))
494 ixas
.ixa_scopeid
= ill_get_upper_ifindex(ill
);
496 ixas
.ixa_scopeid
= ill
->ill_phyint
->phyint_ifindex
;
499 if (ira
->ira_flags
& IRAF_MULTIBROADCAST
) {
501 * Not one or our addresses (IRE_LOCALs), thus we let
502 * ip_output_simple pick the source.
504 ip6h
->ip6_src
= ipv6_all_zeros
;
505 ixas
.ixa_flags
|= IXAF_SET_SOURCE
;
508 /* Should we send using dce_pmtu? */
509 if (ipst
->ips_ipv6_icmp_return_pmtu
)
510 ixas
.ixa_flags
|= IXAF_PMTU_DISCOVERY
;
512 (void) ip_output_simple(mp
, &ixas
);
518 * Verify the ICMP messages for either for ICMP error or redirect packet.
519 * The caller should have fully pulled up the message. If it's a redirect
520 * packet, only basic checks on IP header will be done; otherwise, verify
521 * the packet by looking at the included ULP header.
523 * Called before icmp_inbound_error_fanout_v6 is called.
526 icmp_inbound_verify_v6(mblk_t
*mp
, icmp6_t
*icmp6
, ip_recv_attr_t
*ira
)
528 ill_t
*ill
= ira
->ira_ill
;
532 ip_stack_t
*ipst
= ill
->ill_ipst
;
534 ip6_t
*ip6h
; /* Inner header */
536 ip6h
= (ip6_t
*)&icmp6
[1];
537 if ((uchar_t
*)ip6h
+ IPV6_HDR_LEN
> mp
->b_wptr
)
540 if (icmp6
->icmp6_type
== ND_REDIRECT
) {
541 hdr_length
= sizeof (nd_redirect_t
);
543 if ((IPH_HDR_VERSION(ip6h
) != IPV6_VERSION
))
545 hdr_length
= IPV6_HDR_LEN
;
548 if ((uchar_t
*)ip6h
+ hdr_length
> mp
->b_wptr
)
552 * Stop here for ICMP_REDIRECT.
554 if (icmp6
->icmp6_type
== ND_REDIRECT
)
560 if (!ip_hdr_length_nexthdr_v6(mp
, ip6h
, &hdr_length
, &nexthdrp
))
564 /* Try to pass the ICMP message to clients who need it */
568 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
571 if ((uchar_t
*)ip6h
+ hdr_length
+ ICMP_MIN_TP_HDR_LEN
>
579 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
582 if ((uchar_t
*)ip6h
+ hdr_length
+ ICMP_MIN_TP_HDR_LEN
>
586 tcpha
= (tcpha_t
*)((uchar_t
*)ip6h
+ hdr_length
);
588 * With IPMP we need to match across group, which we do
589 * since we have the upper ill from ira_ill.
591 connp
= ipcl_tcp_lookup_reversed_ipv6(ip6h
, tcpha
, TCPS_LISTEN
,
592 ill
->ill_phyint
->phyint_ifindex
, ipst
);
596 if ((connp
->conn_verifyicmp
!= NULL
) &&
597 !connp
->conn_verifyicmp(connp
, tcpha
, NULL
, icmp6
, ira
)) {
606 * Verify we have at least ICMP_MIN_TP_HDR_LEN bytes of
609 if ((uchar_t
*)ip6h
+ hdr_length
+ ICMP_MIN_TP_HDR_LEN
>
618 /* Look for self-encapsulated packets that caused an error */
621 in_ip6h
= (ip6_t
*)((uint8_t *)ip6h
+ hdr_length
);
622 if ((uint8_t *)in_ip6h
+ (nexthdr
== IPPROTO_ENCAP
?
623 sizeof (ipha_t
) : sizeof (ip6_t
)) > mp
->b_wptr
)
634 /* Bogus ICMP error. */
635 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
639 /* We pulled up everthing already. Must be truncated */
640 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInErrors
);
645 * Process received IPv6 ICMP Packet too big.
646 * The caller is responsible for validating the packet before passing it in
647 * and also to fanout the ICMP error to any matching transport conns. Assumes
648 * the message has been fully pulled up.
650 * Before getting here, the caller has called icmp_inbound_verify_v6()
651 * that should have verified with ULP to prevent undoing the changes we're
652 * going to make to DCE. For example, TCP might have verified that the packet
653 * which generated error is in the send window.
655 * In some cases modified this MTU in the ICMP header packet; the caller
656 * should pass to the matching ULP after this returns.
659 icmp_inbound_too_big_v6(icmp6_t
*icmp6
, ip_recv_attr_t
*ira
)
663 ill_t
*ill
= ira
->ira_ill
; /* Upper ill if IPMP */
664 ip_stack_t
*ipst
= ill
->ill_ipst
;
666 in6_addr_t final_dst
;
667 ip6_t
*ip6h
; /* Inner IP header */
669 /* Caller has already pulled up everything. */
670 ip6h
= (ip6_t
*)&icmp6
[1];
671 final_dst
= ip_get_dst_v6(ip6h
, NULL
, NULL
);
674 * For link local destinations matching simply on address is not
675 * sufficient. Same link local addresses for different ILL's is
678 if (IN6_IS_ADDR_LINKSCOPE(&final_dst
)) {
679 dce
= dce_lookup_and_add_v6(&final_dst
,
680 ill
->ill_phyint
->phyint_ifindex
, ipst
);
682 dce
= dce_lookup_and_add_v6(&final_dst
, 0, ipst
);
685 /* Couldn't add a unique one - ENOMEM */
688 pr_addr_dbg("icmp_inbound_too_big_v6:"
689 "no dce for dst %s\n", AF_INET6
,
695 mtu
= ntohl(icmp6
->icmp6_mtu
);
697 mutex_enter(&dce
->dce_lock
);
698 if (dce
->dce_flags
& DCEF_PMTU
)
699 old_max_frag
= dce
->dce_pmtu
;
700 else if (IN6_IS_ADDR_MULTICAST(&final_dst
))
701 old_max_frag
= ill
->ill_mc_mtu
;
703 old_max_frag
= ill
->ill_mtu
;
705 if (mtu
< IPV6_MIN_MTU
) {
706 ip1dbg(("Received mtu less than IPv6 "
707 "min mtu %d: %d\n", IPV6_MIN_MTU
, mtu
));
710 * If an mtu less than IPv6 min mtu is received,
711 * we must include a fragment header in
712 * subsequent packets.
714 dce
->dce_flags
|= DCEF_TOO_SMALL_PMTU
;
716 dce
->dce_flags
&= ~DCEF_TOO_SMALL_PMTU
;
718 ip1dbg(("Received mtu from router: %d\n", mtu
));
719 dce
->dce_pmtu
= MIN(old_max_frag
, mtu
);
721 /* Prepare to send the new max frag size for the ULP. */
722 if (dce
->dce_flags
& DCEF_TOO_SMALL_PMTU
) {
724 * If we need a fragment header in every packet
725 * (above case or multirouting), make sure the
726 * ULP takes it into account when computing the
729 icmp6
->icmp6_mtu
= htonl(dce
->dce_pmtu
- sizeof (ip6_frag_t
));
731 icmp6
->icmp6_mtu
= htonl(dce
->dce_pmtu
);
733 /* We now have a PMTU for sure */
734 dce
->dce_flags
|= DCEF_PMTU
;
735 dce
->dce_last_change_time
= TICK_TO_SEC(ddi_get_lbolt64());
736 mutex_exit(&dce
->dce_lock
);
738 * After dropping the lock the new value is visible to everyone.
739 * Then we bump the generation number so any cached values reinspect
742 dce_increment_generation(dce
);
747 * Fanout received ICMPv6 error packets to the transports.
748 * Assumes the IPv6 plus ICMPv6 headers have been pulled up but nothing else.
750 * The caller must have called icmp_inbound_verify_v6.
753 icmp_inbound_error_fanout_v6(mblk_t
*mp
, icmp6_t
*icmp6
, ip_recv_attr_t
*ira
)
755 uint16_t *up
; /* Pointer to ports in ULP header */
756 uint32_t ports
; /* reversed ports for fanout */
757 ip6_t rip6h
; /* With reversed addresses */
758 ip6_t
*ip6h
; /* Inner IP header */
759 uint16_t hdr_length
; /* Inner IP header length */
764 ill_t
*ill
= ira
->ira_ill
; /* Upper in the case of IPMP */
765 ip_stack_t
*ipst
= ill
->ill_ipst
;
766 ipsec_stack_t
*ipss
= ipst
->ips_netstack
->netstack_ipsec
;
768 /* Caller has already pulled up everything. */
769 ip6h
= (ip6_t
*)&icmp6
[1];
770 ASSERT(mp
->b_cont
== NULL
);
771 ASSERT((uchar_t
*)&ip6h
[1] <= mp
->b_wptr
);
773 if (!ip_hdr_length_nexthdr_v6(mp
, ip6h
, &hdr_length
, &nexthdrp
))
776 ira
->ira_protocol
= nexthdr
;
779 * We need a separate IP header with the source and destination
780 * addresses reversed to do fanout/classification because the ip6h in
781 * the ICMPv6 error is in the form we sent it out.
783 rip6h
.ip6_src
= ip6h
->ip6_dst
;
784 rip6h
.ip6_dst
= ip6h
->ip6_src
;
785 rip6h
.ip6_nxt
= nexthdr
;
787 /* Try to pass the ICMP message to clients who need it */
790 /* Attempt to find a client stream based on port. */
791 up
= (uint16_t *)((uchar_t
*)ip6h
+ hdr_length
);
793 /* Note that we send error to all matches. */
794 ira
->ira_flags
|= IRAF_ICMP_ERROR
;
795 ip_fanout_udp_multi_v6(mp
, &rip6h
, up
[0], up
[1], ira
);
796 ira
->ira_flags
&= ~IRAF_ICMP_ERROR
;
801 * Attempt to find a client stream based on port.
802 * Note that we do a reverse lookup since the header is
803 * in the form we sent it out.
805 tcpha
= (tcpha_t
*)((uchar_t
*)ip6h
+ hdr_length
);
807 * With IPMP we need to match across group, which we do
808 * since we have the upper ill from ira_ill.
810 connp
= ipcl_tcp_lookup_reversed_ipv6(ip6h
, tcpha
,
811 TCPS_LISTEN
, ill
->ill_phyint
->phyint_ifindex
, ipst
);
816 if (CONN_INBOUND_POLICY_PRESENT_V6(connp
, ipss
) ||
817 (ira
->ira_flags
& IRAF_IPSEC_SECURE
)) {
818 mp
= ipsec_check_inbound_policy(mp
, connp
,
821 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
822 /* Note that mp is NULL */
823 ip_drop_input("ipIfStatsInDiscards", mp
, ill
);
829 ira
->ira_flags
|= IRAF_ICMP_ERROR
;
830 if (IPCL_IS_TCP(connp
)) {
831 SQUEUE_ENTER_ONE(connp
->conn_sqp
, mp
,
832 connp
->conn_recvicmp
, connp
, ira
, SQ_FILL
,
833 SQTAG_TCP6_INPUT_ICMP_ERR
);
835 /* Not TCP; must be SOCK_RAW, IPPROTO_TCP */
836 ill_t
*rill
= ira
->ira_rill
;
838 ira
->ira_ill
= ira
->ira_rill
= NULL
;
839 (connp
->conn_recv
)(connp
, mp
, NULL
, ira
);
842 ira
->ira_rill
= rill
;
844 ira
->ira_flags
&= ~IRAF_ICMP_ERROR
;
849 up
= (uint16_t *)((uchar_t
*)ip6h
+ hdr_length
);
850 /* Find a SCTP client stream for this packet. */
851 ((uint16_t *)&ports
)[0] = up
[1];
852 ((uint16_t *)&ports
)[1] = up
[0];
854 ira
->ira_flags
|= IRAF_ICMP_ERROR
;
855 ip_fanout_sctp(mp
, NULL
, &rip6h
, ports
, ira
);
856 ira
->ira_flags
&= ~IRAF_ICMP_ERROR
;
861 if (!ipsec_loaded(ipss
)) {
862 ip_proto_not_sup(mp
, ira
);
866 if (nexthdr
== IPPROTO_ESP
)
867 mp
= ipsecesp_icmp_error(mp
, ira
);
869 mp
= ipsecah_icmp_error(mp
, ira
);
873 /* Just in case ipsec didn't preserve the NULL b_cont */
874 if (mp
->b_cont
!= NULL
) {
875 if (!pullupmsg(mp
, -1))
880 * If succesful, the mp has been modified to not include
881 * the ESP/AH header so we can fanout to the ULP's icmp
884 if (mp
->b_wptr
- mp
->b_rptr
< IPV6_HDR_LEN
)
887 ip6h
= (ip6_t
*)mp
->b_rptr
;
888 /* Don't call hdr_length_v6() unless you have to. */
889 if (ip6h
->ip6_nxt
!= IPPROTO_ICMPV6
)
890 hdr_length
= ip_hdr_length_v6(mp
, ip6h
);
892 hdr_length
= IPV6_HDR_LEN
;
894 /* Verify the modified message before any further processes. */
895 icmp6
= (icmp6_t
*)(&mp
->b_rptr
[hdr_length
]);
896 if (!icmp_inbound_verify_v6(mp
, icmp6
, ira
)) {
901 icmp_inbound_error_fanout_v6(mp
, icmp6
, ira
);
905 /* Look for self-encapsulated packets that caused an error */
908 in_ip6h
= (ip6_t
*)((uint8_t *)ip6h
+ hdr_length
);
910 if (IN6_ARE_ADDR_EQUAL(&in_ip6h
->ip6_src
, &ip6h
->ip6_src
) &&
911 IN6_ARE_ADDR_EQUAL(&in_ip6h
->ip6_dst
, &ip6h
->ip6_dst
)) {
913 * Self-encapsulated case. As in the ipv4 case,
914 * we need to strip the 2nd IP header. Since mp
915 * is already pulled-up, we can simply bcopy
916 * the 3rd header + data over the 2nd header.
921 * Make sure we don't do recursion more than once.
923 if (!ip_hdr_length_nexthdr_v6(mp
, in_ip6h
,
924 &unused_len
, &nexthdrp
) ||
925 *nexthdrp
== IPPROTO_IPV6
) {
930 * Copy the 3rd header + remaining data on top
933 bcopy(in_ip6h
, ip6h
, mp
->b_wptr
- (uchar_t
*)in_ip6h
);
936 * Subtract length of the 2nd header.
938 mp
->b_wptr
-= hdr_length
;
940 ip6h
= (ip6_t
*)mp
->b_rptr
;
941 /* Don't call hdr_length_v6() unless you have to. */
942 if (ip6h
->ip6_nxt
!= IPPROTO_ICMPV6
)
943 hdr_length
= ip_hdr_length_v6(mp
, ip6h
);
945 hdr_length
= IPV6_HDR_LEN
;
948 * Verify the modified message before any further
951 icmp6
= (icmp6_t
*)(&mp
->b_rptr
[hdr_length
]);
952 if (!icmp_inbound_verify_v6(mp
, icmp6
, ira
)) {
958 * Now recurse, and see what I _really_ should be
961 icmp_inbound_error_fanout_v6(mp
, icmp6
, ira
);
967 if ((connp
= ipcl_iptun_classify_v6(&rip6h
.ip6_src
,
968 &rip6h
.ip6_dst
, ipst
)) != NULL
) {
969 ira
->ira_flags
|= IRAF_ICMP_ERROR
;
970 connp
->conn_recvicmp(connp
, mp
, NULL
, ira
);
972 ira
->ira_flags
&= ~IRAF_ICMP_ERROR
;
976 * No IP tunnel is interested, fallthrough and see
977 * if a raw socket will want it.
981 ira
->ira_flags
|= IRAF_ICMP_ERROR
;
982 ASSERT(ira
->ira_protocol
== nexthdr
);
983 ip_fanout_proto_v6(mp
, &rip6h
, ira
);
984 ira
->ira_flags
&= ~IRAF_ICMP_ERROR
;
989 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInErrors
);
990 ip1dbg(("icmp_inbound_error_fanout_v6: drop pkt\n"));
995 * Process received IPv6 ICMP Redirect messages.
996 * Assumes the caller has verified that the headers are in the pulled up mblk.
1001 icmp_redirect_v6(mblk_t
*mp
, ip6_t
*ip6h
, nd_redirect_t
*rd
,
1002 ip_recv_attr_t
*ira
)
1005 ire_t
*prev_ire
= NULL
;
1007 in6_addr_t
*src
, *dst
, *gateway
;
1012 boolean_t redirect_to_router
= B_FALSE
;
1015 ill_t
*ill
= ira
->ira_rill
;
1016 ill_t
*rill
= ira
->ira_rill
;
1017 ip_stack_t
*ipst
= ill
->ill_ipst
;
1020 * Since ira_ill is where the IRE_LOCAL was hosted we use ira_rill
1021 * and make it be the IPMP upper so avoid being confused by a packet
1022 * addressed to a unicast address on a different ill.
1024 if (IS_UNDER_IPMP(rill
)) {
1025 rill
= ipmp_ill_hold_ipmp_ill(rill
);
1027 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInBadRedirects
);
1028 ip_drop_input("ipv6IfIcmpInBadRedirects - IPMP ill",
1033 ASSERT(rill
!= ira
->ira_rill
);
1036 len
= mp
->b_wptr
- (uchar_t
*)rd
;
1037 src
= &ip6h
->ip6_src
;
1038 dst
= &rd
->nd_rd_dst
;
1039 gateway
= &rd
->nd_rd_target
;
1041 /* Verify if it is a valid redirect */
1042 if (!IN6_IS_ADDR_LINKLOCAL(src
) ||
1043 (ip6h
->ip6_hops
!= IPV6_MAX_HOPS
) ||
1044 (rd
->nd_rd_code
!= 0) ||
1045 (len
< sizeof (nd_redirect_t
)) ||
1046 (IN6_IS_ADDR_V4MAPPED(dst
)) ||
1047 (IN6_IS_ADDR_MULTICAST(dst
))) {
1048 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInBadRedirects
);
1049 ip_drop_input("ipv6IfIcmpInBadRedirects - addr/len", mp
, ill
);
1053 if (!(IN6_IS_ADDR_LINKLOCAL(gateway
) ||
1054 IN6_ARE_ADDR_EQUAL(gateway
, dst
))) {
1055 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInBadRedirects
);
1056 ip_drop_input("ipv6IfIcmpInBadRedirects - bad gateway",
1061 optlen
= len
- sizeof (nd_redirect_t
);
1063 if (!ndp_verify_optlen((nd_opt_hdr_t
*)&rd
[1], optlen
)) {
1064 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInBadRedirects
);
1065 ip_drop_input("ipv6IfIcmpInBadRedirects - options",
1071 if (!IN6_ARE_ADDR_EQUAL(gateway
, dst
)) {
1072 redirect_to_router
= B_TRUE
;
1073 ncec_flags
|= NCE_F_ISROUTER
;
1075 gateway
= dst
; /* Add nce for dst */
1080 * Verify that the IP source address of the redirect is
1081 * the same as the current first-hop router for the specified
1082 * ICMP destination address.
1083 * Also, Make sure we had a route for the dest in question and
1084 * that route was pointing to the old gateway (the source of the
1086 * We do longest match and then compare ire_gateway_addr_v6 below.
1088 prev_ire
= ire_ftable_lookup_v6(dst
, 0, 0, 0, rill
,
1089 ALL_ZONES
, MATCH_IRE_ILL
, 0, ipst
, NULL
);
1093 * the redirect was not from ourselves
1094 * old gateway is still directly reachable
1096 if (prev_ire
== NULL
||
1097 (prev_ire
->ire_type
& (IRE_LOCAL
|IRE_LOOPBACK
)) ||
1098 (prev_ire
->ire_flags
& (RTF_REJECT
|RTF_BLACKHOLE
)) ||
1099 !IN6_ARE_ADDR_EQUAL(src
, &prev_ire
->ire_gateway_addr_v6
)) {
1100 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInBadRedirects
);
1101 ip_drop_input("ipv6IfIcmpInBadRedirects - ire", mp
, ill
);
1105 ASSERT(prev_ire
->ire_ill
!= NULL
);
1106 if (prev_ire
->ire_ill
->ill_flags
& ILLF_NONUD
)
1107 ncec_flags
|= NCE_F_NONUD
;
1109 opt
= (nd_opt_hdr_t
*)&rd
[1];
1110 opt
= ndp_get_option(opt
, optlen
, ND_OPT_TARGET_LINKADDR
);
1112 err
= nce_lookup_then_add_v6(rill
,
1113 (uchar_t
*)&opt
[1], /* Link layer address */
1114 rill
->ill_phys_addr_length
,
1115 gateway
, ncec_flags
, ND_STALE
, &nce
);
1122 * Check to see if link layer address has changed and
1123 * process the ncec_state accordingly.
1125 nce_process(nce
->nce_common
,
1126 (uchar_t
*)&opt
[1], 0, B_FALSE
);
1130 ip1dbg(("icmp_redirect_v6: NCE create failed %d\n",
1135 if (redirect_to_router
) {
1136 ASSERT(IN6_IS_ADDR_LINKLOCAL(gateway
));
1139 * Create a Route Association. This will allow us to remember
1140 * a router told us to use the particular gateway.
1142 ire
= ire_create_v6(
1144 &ipv6_all_ones
, /* mask */
1145 gateway
, /* gateway addr */
1149 (RTF_DYNAMIC
| RTF_GATEWAY
| RTF_HOST
),
1156 * Just create an on link entry, i.e. interface route.
1157 * The gateway field is our link-local on the ill.
1159 mutex_enter(&rill
->ill_lock
);
1160 for (ipif
= rill
->ill_ipif
; ipif
!= NULL
;
1161 ipif
= ipif
->ipif_next
) {
1162 if (!(ipif
->ipif_state_flags
& IPIF_CONDEMNED
) &&
1163 IN6_IS_ADDR_LINKLOCAL(&ipif
->ipif_v6lcl_addr
))
1167 /* We have no link-local address! */
1168 mutex_exit(&rill
->ill_lock
);
1171 gw
= ipif
->ipif_v6lcl_addr
;
1172 mutex_exit(&rill
->ill_lock
);
1174 ire
= ire_create_v6(
1175 dst
, /* gateway == dst */
1176 &ipv6_all_ones
, /* mask */
1177 &gw
, /* gateway addr */
1178 rill
->ill_net_type
, /* IF_[NO]RESOLVER */
1181 (RTF_DYNAMIC
| RTF_HOST
),
1188 nire
= ire_add(ire
);
1189 /* Check if it was a duplicate entry */
1190 if (nire
!= NULL
&& nire
!= ire
) {
1191 ASSERT(nire
->ire_identical_ref
> 1);
1198 ire_refrele(ire
); /* Held in ire_add */
1200 /* tell routing sockets that we received a redirect */
1201 ip_rts_change_v6(RTM_REDIRECT
,
1204 &ipv6_all_ones
, 0, src
,
1205 (RTF_DYNAMIC
| RTF_GATEWAY
| RTF_HOST
), 0,
1206 (RTA_DST
| RTA_GATEWAY
| RTA_NETMASK
| RTA_AUTHOR
), ipst
);
1209 * Delete any existing IRE_HOST type ires for this destination.
1210 * This together with the added IRE has the effect of
1211 * modifying an existing redirect.
1213 redir_ire
= ire_ftable_lookup_v6(dst
, 0, src
, IRE_HOST
,
1214 prev_ire
->ire_ill
, ALL_ZONES
,
1215 (MATCH_IRE_GW
| MATCH_IRE_TYPE
| MATCH_IRE_ILL
), 0, ipst
,
1218 if (redir_ire
!= NULL
) {
1219 if (redir_ire
->ire_flags
& RTF_DYNAMIC
)
1220 ire_delete(redir_ire
);
1221 ire_refrele(redir_ire
);
1225 ire_refrele(prev_ire
);
1229 if (prev_ire
!= NULL
)
1230 ire_refrele(prev_ire
);
1232 if (rill
!= ira
->ira_rill
)
1237 * Build and ship an IPv6 ICMP message using the packet data in mp,
1238 * and the ICMP header pointed to by "stuff". (May be called as
1240 * Note: assumes that icmp_pkt_err_ok_v6 has been called to
1241 * verify that an icmp error packet can be sent.
1243 * If v6src_ptr is set use it as a source. Otherwise select a reasonable
1244 * source address (see above function).
1247 icmp_pkt_v6(mblk_t
*mp
, void *stuff
, size_t len
,
1248 const in6_addr_t
*v6src_ptr
, ip_recv_attr_t
*ira
)
1257 ill_t
*ill
= ira
->ira_ill
;
1258 ip_stack_t
*ipst
= ill
->ill_ipst
;
1259 ip_xmit_attr_t ixas
;
1261 ip6h
= (ip6_t
*)mp
->b_rptr
;
1263 bzero(&ixas
, sizeof (ixas
));
1264 ixas
.ixa_flags
= IXAF_BASIC_SIMPLE_V6
;
1265 ixas
.ixa_zoneid
= ira
->ira_zoneid
;
1266 ixas
.ixa_ifindex
= 0;
1267 ixas
.ixa_ipst
= ipst
;
1268 ixas
.ixa_cred
= kcred
;
1269 ixas
.ixa_cpid
= NOPID
;
1270 ixas
.ixa_multicast_ttl
= IP_DEFAULT_MULTICAST_TTL
;
1273 * If the source of the original packet was link-local, then
1274 * make sure we send on the same ill (group) as we received it on.
1276 if (IN6_IS_ADDR_LINKSCOPE(&ip6h
->ip6_src
)) {
1277 ixas
.ixa_flags
|= IXAF_SCOPEID_SET
;
1278 if (IS_UNDER_IPMP(ill
))
1279 ixas
.ixa_scopeid
= ill_get_upper_ifindex(ill
);
1281 ixas
.ixa_scopeid
= ill
->ill_phyint
->phyint_ifindex
;
1284 if (ira
->ira_flags
& IRAF_IPSEC_SECURE
) {
1286 * Apply IPsec based on how IPsec was applied to
1287 * the packet that had the error.
1289 * If it was an outbound packet that caused the ICMP
1290 * error, then the caller will have setup the IRA
1293 if (!ipsec_in_to_out(ira
, &ixas
, mp
, NULL
, ip6h
)) {
1294 BUMP_MIB(&ipst
->ips_ip_mib
, ipIfStatsOutDiscards
);
1295 /* Note: mp already consumed and ip_drop_packet done */
1300 * This is in clear. The icmp message we are building
1301 * here should go out in clear, independent of our policy.
1303 ixas
.ixa_flags
|= IXAF_NO_IPSEC
;
1307 * If the caller specified the source we use that.
1308 * Otherwise, if the packet was for one of our unicast addresses, make
1309 * sure we respond with that as the source. Otherwise
1310 * have ip_output_simple pick the source address.
1312 if (v6src_ptr
!= NULL
) {
1316 uint_t match_flags
= MATCH_IRE_TYPE
| MATCH_IRE_ZONEONLY
;
1318 if (IN6_IS_ADDR_LINKLOCAL(&ip6h
->ip6_src
) ||
1319 IN6_IS_ADDR_LINKLOCAL(&ip6h
->ip6_dst
))
1320 match_flags
|= MATCH_IRE_ILL
;
1322 ire
= ire_ftable_lookup_v6(&ip6h
->ip6_dst
, 0, 0,
1323 (IRE_LOCAL
|IRE_LOOPBACK
), ill
, ira
->ira_zoneid
,
1324 match_flags
, 0, ipst
, NULL
);
1326 v6src
= ip6h
->ip6_dst
;
1329 v6src
= ipv6_all_zeros
;
1330 ixas
.ixa_flags
|= IXAF_SET_SOURCE
;
1333 v6dst
= ip6h
->ip6_src
;
1334 len_needed
= ipst
->ips_ipv6_icmp_return
- IPV6_HDR_LEN
- len
;
1335 msg_len
= msgdsize(mp
);
1336 if (msg_len
> len_needed
) {
1337 if (!adjmsg(mp
, len_needed
- msg_len
)) {
1338 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpOutErrors
);
1342 msg_len
= len_needed
;
1344 mp1
= allocb(IPV6_HDR_LEN
+ len
, BPRI_MED
);
1346 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpOutErrors
);
1354 * Set IXAF_TRUSTED_ICMP so we can let the ICMP messages this
1355 * node generates be accepted in peace by all on-host destinations.
1356 * If we do NOT assume that all on-host destinations trust
1357 * self-generated ICMP messages, then rework here, ip6.c, and spd.c.
1358 * (Look for IXAF_TRUSTED_ICMP).
1360 ixas
.ixa_flags
|= IXAF_TRUSTED_ICMP
;
1362 ip6h
= (ip6_t
*)mp
->b_rptr
;
1363 mp1
->b_wptr
= (uchar_t
*)ip6h
+ (IPV6_HDR_LEN
+ len
);
1365 ip6h
->ip6_vcf
= IPV6_DEFAULT_VERS_AND_FLOW
;
1366 ip6h
->ip6_nxt
= IPPROTO_ICMPV6
;
1367 ip6h
->ip6_hops
= ipst
->ips_ipv6_def_hops
;
1368 ip6h
->ip6_dst
= v6dst
;
1369 ip6h
->ip6_src
= v6src
;
1370 msg_len
+= IPV6_HDR_LEN
+ len
;
1371 if (msg_len
> IP_MAXPACKET
+ IPV6_HDR_LEN
) {
1372 (void) adjmsg(mp
, IP_MAXPACKET
+ IPV6_HDR_LEN
- msg_len
);
1373 msg_len
= IP_MAXPACKET
+ IPV6_HDR_LEN
;
1375 ip6h
->ip6_plen
= htons((uint16_t)(msgdsize(mp
) - IPV6_HDR_LEN
));
1376 icmp6
= (icmp6_t
*)&ip6h
[1];
1377 bcopy(stuff
, (char *)icmp6
, len
);
1379 * Prepare for checksum by putting icmp length in the icmp
1380 * checksum field. The checksum is calculated in ip_output_wire_v6.
1382 icmp6
->icmp6_cksum
= ip6h
->ip6_plen
;
1383 if (icmp6
->icmp6_type
== ND_REDIRECT
) {
1384 ip6h
->ip6_hops
= IPV6_MAX_HOPS
;
1387 (void) ip_output_simple(mp
, &ixas
);
1392 * Update the output mib when ICMPv6 packets are sent.
1395 icmp_update_out_mib_v6(ill_t
*ill
, icmp6_t
*icmp6
)
1397 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpOutMsgs
);
1399 switch (icmp6
->icmp6_type
) {
1400 case ICMP6_DST_UNREACH
:
1401 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpOutDestUnreachs
);
1402 if (icmp6
->icmp6_code
== ICMP6_DST_UNREACH_ADMIN
)
1403 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpOutAdminProhibs
);
1406 case ICMP6_TIME_EXCEEDED
:
1407 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpOutTimeExcds
);
1410 case ICMP6_PARAM_PROB
:
1411 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpOutParmProblems
);
1414 case ICMP6_PACKET_TOO_BIG
:
1415 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpOutPktTooBigs
);
1418 case ICMP6_ECHO_REQUEST
:
1419 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpOutEchos
);
1422 case ICMP6_ECHO_REPLY
:
1423 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpOutEchoReplies
);
1426 case ND_ROUTER_SOLICIT
:
1427 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpOutRouterSolicits
);
1430 case ND_ROUTER_ADVERT
:
1431 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpOutRouterAdvertisements
);
1434 case ND_NEIGHBOR_SOLICIT
:
1435 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpOutNeighborSolicits
);
1438 case ND_NEIGHBOR_ADVERT
:
1439 BUMP_MIB(ill
->ill_icmp6_mib
,
1440 ipv6IfIcmpOutNeighborAdvertisements
);
1444 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpOutRedirects
);
1447 case MLD_LISTENER_QUERY
:
1448 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpOutGroupMembQueries
);
1451 case MLD_LISTENER_REPORT
:
1452 case MLD_V2_LISTENER_REPORT
:
1453 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpOutGroupMembResponses
);
1456 case MLD_LISTENER_REDUCTION
:
1457 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpOutGroupMembReductions
);
1463 * Check if it is ok to send an ICMPv6 error packet in
1464 * response to the IP packet in mp.
1465 * Free the message and return null if no
1466 * ICMP error packet should be sent.
1469 icmp_pkt_err_ok_v6(mblk_t
*mp
, boolean_t mcast_ok
, ip_recv_attr_t
*ira
)
1471 ill_t
*ill
= ira
->ira_ill
;
1472 ip_stack_t
*ipst
= ill
->ill_ipst
;
1479 /* We view multicast and broadcast as the same.. */
1480 llbcast
= (ira
->ira_flags
&
1481 (IRAF_L2DST_MULTICAST
|IRAF_L2DST_BROADCAST
)) != 0;
1482 ip6h
= (ip6_t
*)mp
->b_rptr
;
1484 /* Check if source address uniquely identifies the host */
1486 if (IN6_IS_ADDR_MULTICAST(&ip6h
->ip6_src
) ||
1487 IN6_IS_ADDR_V4MAPPED(&ip6h
->ip6_src
) ||
1488 IN6_IS_ADDR_UNSPECIFIED(&ip6h
->ip6_src
)) {
1493 if (ip6h
->ip6_nxt
== IPPROTO_ICMPV6
) {
1494 size_t len_needed
= IPV6_HDR_LEN
+ ICMP6_MINLEN
;
1497 if (mp
->b_wptr
- mp
->b_rptr
< len_needed
) {
1498 if (!pullupmsg(mp
, len_needed
)) {
1499 BUMP_MIB(ill
->ill_icmp6_mib
,
1500 ipv6IfIcmpInErrors
);
1504 ip6h
= (ip6_t
*)mp
->b_rptr
;
1506 icmp6
= (icmp6_t
*)&ip6h
[1];
1507 /* Explicitly do not generate errors in response to redirects */
1508 if (ICMP6_IS_ERROR(icmp6
->icmp6_type
) ||
1509 icmp6
->icmp6_type
== ND_REDIRECT
) {
1515 * Check that the destination is not multicast and that the packet
1516 * was not sent on link layer broadcast or multicast. (Exception
1517 * is Packet too big message as per the draft - when mcast_ok is set.)
1520 (llbcast
|| IN6_IS_ADDR_MULTICAST(&ip6h
->ip6_dst
))) {
1525 if (icmp_err_rate_limit(ipst
)) {
1527 * Only send ICMP error packets every so often.
1528 * This should be done on a per port/source basis,
1529 * but for now this will suffice.
1538 * Called when a packet was sent out the same link that it arrived on.
1539 * Check if it is ok to send a redirect and then send it.
1542 ip_send_potential_redirect_v6(mblk_t
*mp
, ip6_t
*ip6h
, ire_t
*ire
,
1543 ip_recv_attr_t
*ira
)
1545 ill_t
*ill
= ira
->ira_ill
;
1546 ip_stack_t
*ipst
= ill
->ill_ipst
;
1548 ire_t
*src_ire_v6
= NULL
;
1550 ire_t
*nhop_ire
= NULL
;
1553 * Don't send a redirect when forwarding a source
1556 if (ip_source_routed_v6(ip6h
, mp
, ipst
))
1559 if (ire
->ire_type
& IRE_ONLINK
) {
1560 /* Target is directly connected */
1561 v6targ
= &ip6h
->ip6_dst
;
1563 /* Determine the most specific IRE used to send the packets */
1564 nhop_ire
= ire_nexthop(ire
);
1565 if (nhop_ire
== NULL
)
1569 * We won't send redirects to a router
1570 * that doesn't have a link local
1571 * address, but will forward.
1573 if (!IN6_IS_ADDR_LINKLOCAL(&nhop_ire
->ire_addr_v6
)) {
1574 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInAddrErrors
);
1575 ip_drop_input("ipIfStatsInAddrErrors", mp
, ill
);
1576 ire_refrele(nhop_ire
);
1579 v6targ
= &nhop_ire
->ire_addr_v6
;
1581 src_ire_v6
= ire_ftable_lookup_v6(&ip6h
->ip6_src
,
1582 NULL
, NULL
, IRE_INTERFACE
, ire
->ire_ill
, ALL_ZONES
,
1583 MATCH_IRE_ILL
| MATCH_IRE_TYPE
, 0, ipst
, NULL
);
1585 if (src_ire_v6
== NULL
) {
1586 if (nhop_ire
!= NULL
)
1587 ire_refrele(nhop_ire
);
1592 * The source is directly connected.
1596 icmp_send_redirect_v6(mp1
, v6targ
, &ip6h
->ip6_dst
, ira
);
1598 if (nhop_ire
!= NULL
)
1599 ire_refrele(nhop_ire
);
1600 ire_refrele(src_ire_v6
);
1604 * Generate an ICMPv6 redirect message.
1605 * Include target link layer address option if it exits.
1606 * Always include redirect header.
1609 icmp_send_redirect_v6(mblk_t
*mp
, in6_addr_t
*targetp
, in6_addr_t
*dest
,
1610 ip_recv_attr_t
*ira
)
1613 nd_opt_rd_hdr_t
*rdh
;
1615 ncec_t
*ncec
= NULL
;
1619 int max_redir_hdr_data_len
;
1623 boolean_t need_refrele
;
1624 ip_stack_t
*ipst
= ira
->ira_ill
->ill_ipst
;
1626 mp
= icmp_pkt_err_ok_v6(mp
, B_FALSE
, ira
);
1630 if (IS_UNDER_IPMP(ira
->ira_ill
)) {
1631 ill
= ipmp_ill_hold_ipmp_ill(ira
->ira_ill
);
1634 BUMP_MIB(ill
->ill_icmp6_mib
, ipv6IfIcmpInBadRedirects
);
1635 ip_drop_output("no IPMP ill for sending redirect",
1640 need_refrele
= B_TRUE
;
1643 need_refrele
= B_FALSE
;
1646 ncec
= ncec_lookup_illgrp_v6(ill
, targetp
);
1647 if (ncec
!= NULL
&& ncec
->ncec_state
!= ND_INCOMPLETE
&&
1648 ncec
->ncec_lladdr
!= NULL
) {
1649 ll_opt_len
= (sizeof (nd_opt_hdr_t
) +
1650 ill
->ill_phys_addr_length
+ 7)/8 * 8;
1652 len
= sizeof (nd_redirect_t
) + sizeof (nd_opt_rd_hdr_t
) + ll_opt_len
;
1653 ASSERT(len
% 4 == 0);
1654 buf
= kmem_alloc(len
, KM_NOSLEEP
);
1664 rd
= (nd_redirect_t
*)buf
;
1665 rd
->nd_rd_type
= (uint8_t)ND_REDIRECT
;
1667 rd
->nd_rd_reserved
= 0;
1668 rd
->nd_rd_target
= *targetp
;
1669 rd
->nd_rd_dst
= *dest
;
1671 opt
= (nd_opt_hdr_t
*)(buf
+ sizeof (nd_redirect_t
));
1672 if (ncec
!= NULL
&& ll_opt_len
!= 0) {
1673 opt
->nd_opt_type
= ND_OPT_TARGET_LINKADDR
;
1674 opt
->nd_opt_len
= ll_opt_len
/8;
1675 bcopy((char *)ncec
->ncec_lladdr
, &opt
[1],
1676 ill
->ill_phys_addr_length
);
1680 rdh
= (nd_opt_rd_hdr_t
*)(buf
+ sizeof (nd_redirect_t
) + ll_opt_len
);
1681 rdh
->nd_opt_rh_type
= (uint8_t)ND_OPT_REDIRECTED_HEADER
;
1682 /* max_redir_hdr_data_len and nd_opt_rh_len must be multiple of 8 */
1683 max_redir_hdr_data_len
=
1684 (ipst
->ips_ipv6_icmp_return
- IPV6_HDR_LEN
- len
)/8*8;
1685 pkt_len
= msgdsize(mp
);
1686 /* Make sure mp is 8 byte aligned */
1687 if (pkt_len
> max_redir_hdr_data_len
) {
1688 rdh
->nd_opt_rh_len
= (max_redir_hdr_data_len
+
1689 sizeof (nd_opt_rd_hdr_t
))/8;
1690 (void) adjmsg(mp
, max_redir_hdr_data_len
- pkt_len
);
1692 rdh
->nd_opt_rh_len
= (pkt_len
+ sizeof (nd_opt_rd_hdr_t
))/8;
1693 (void) adjmsg(mp
, -(pkt_len
% 8));
1695 rdh
->nd_opt_rh_reserved1
= 0;
1696 rdh
->nd_opt_rh_reserved2
= 0;
1697 /* ipif_v6lcl_addr contains the link-local source address */
1698 srcp
= &ill
->ill_ipif
->ipif_v6lcl_addr
;
1700 /* Redirects sent by router, and router is global zone */
1701 ASSERT(ira
->ira_zoneid
== ALL_ZONES
);
1702 ira
->ira_zoneid
= GLOBAL_ZONEID
;
1703 icmp_pkt_v6(mp
, buf
, len
, srcp
, ira
);
1704 kmem_free(buf
, len
);
1710 /* Generate an ICMP time exceeded message. (May be called as writer.) */
1712 icmp_time_exceeded_v6(mblk_t
*mp
, uint8_t code
, boolean_t mcast_ok
,
1713 ip_recv_attr_t
*ira
)
1717 mp
= icmp_pkt_err_ok_v6(mp
, mcast_ok
, ira
);
1721 bzero(&icmp6
, sizeof (icmp6_t
));
1722 icmp6
.icmp6_type
= ICMP6_TIME_EXCEEDED
;
1723 icmp6
.icmp6_code
= code
;
1724 icmp_pkt_v6(mp
, &icmp6
, sizeof (icmp6_t
), NULL
, ira
);
1728 * Generate an ICMP unreachable message.
1729 * When called from ip_output side a minimal ip_recv_attr_t needs to be
1730 * constructed by the caller.
1733 icmp_unreachable_v6(mblk_t
*mp
, uint8_t code
, boolean_t mcast_ok
,
1734 ip_recv_attr_t
*ira
)
1738 mp
= icmp_pkt_err_ok_v6(mp
, mcast_ok
, ira
);
1742 bzero(&icmp6
, sizeof (icmp6_t
));
1743 icmp6
.icmp6_type
= ICMP6_DST_UNREACH
;
1744 icmp6
.icmp6_code
= code
;
1745 icmp_pkt_v6(mp
, &icmp6
, sizeof (icmp6_t
), NULL
, ira
);
1749 * Generate an ICMP pkt too big message.
1750 * When called from ip_output side a minimal ip_recv_attr_t needs to be
1751 * constructed by the caller.
1754 icmp_pkt2big_v6(mblk_t
*mp
, uint32_t mtu
, boolean_t mcast_ok
,
1755 ip_recv_attr_t
*ira
)
1759 mp
= icmp_pkt_err_ok_v6(mp
, mcast_ok
, ira
);
1763 bzero(&icmp6
, sizeof (icmp6_t
));
1764 icmp6
.icmp6_type
= ICMP6_PACKET_TOO_BIG
;
1765 icmp6
.icmp6_code
= 0;
1766 icmp6
.icmp6_mtu
= htonl(mtu
);
1768 icmp_pkt_v6(mp
, &icmp6
, sizeof (icmp6_t
), NULL
, ira
);
1772 * Generate an ICMP parameter problem message. (May be called as writer.)
1773 * 'offset' is the offset from the beginning of the packet in error.
1774 * When called from ip_output side a minimal ip_recv_attr_t needs to be
1775 * constructed by the caller.
1778 icmp_param_problem_v6(mblk_t
*mp
, uint8_t code
, uint32_t offset
,
1779 boolean_t mcast_ok
, ip_recv_attr_t
*ira
)
1783 mp
= icmp_pkt_err_ok_v6(mp
, mcast_ok
, ira
);
1787 bzero((char *)&icmp6
, sizeof (icmp6_t
));
1788 icmp6
.icmp6_type
= ICMP6_PARAM_PROB
;
1789 icmp6
.icmp6_code
= code
;
1790 icmp6
.icmp6_pptr
= htonl(offset
);
1791 icmp_pkt_v6(mp
, &icmp6
, sizeof (icmp6_t
), NULL
, ira
);
1795 icmp_param_problem_nexthdr_v6(mblk_t
*mp
, boolean_t mcast_ok
,
1796 ip_recv_attr_t
*ira
)
1798 ip6_t
*ip6h
= (ip6_t
*)mp
->b_rptr
;
1799 uint16_t hdr_length
;
1802 ill_t
*ill
= ira
->ira_ill
;
1804 /* Determine the offset of the bad nexthdr value */
1805 if (!ip_hdr_length_nexthdr_v6(mp
, ip6h
, &hdr_length
, &nexthdrp
)) {
1806 /* Malformed packet */
1807 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
1808 ip_drop_input("ipIfStatsInDiscards", mp
, ill
);
1813 offset
= nexthdrp
- mp
->b_rptr
;
1814 icmp_param_problem_v6(mp
, ICMP6_PARAMPROB_NEXTHEADER
, offset
,
1819 * Verify whether or not the IP address is a valid local address.
1820 * Could be a unicast, including one for a down interface.
1821 * If allow_mcbc then a multicast or broadcast address is also
1824 * In the case of a multicast address, however, the
1825 * upper protocol is expected to reset the src address
1826 * to zero when we return IPVL_MCAST so that
1827 * no packets are emitted with multicast address as
1829 * The addresses valid for bind are:
1831 * (2) - IP address of an UP interface
1832 * (3) - IP address of a DOWN interface
1833 * (4) - a multicast address. In this case
1834 * the conn will only receive packets destined to
1835 * the specified multicast address. Note: the
1836 * application still has to issue an
1837 * IPV6_JOIN_GROUP socket option.
1839 * In all the above cases, the bound address must be valid in the current zone.
1840 * When the address is loopback or multicast, there might be many matching IREs
1841 * so bind has to look up based on the zone.
1844 ip_laddr_verify_v6(const in6_addr_t
*v6src
, zoneid_t zoneid
,
1845 ip_stack_t
*ipst
, boolean_t allow_mcbc
, uint_t scopeid
)
1851 ASSERT(!IN6_IS_ADDR_V4MAPPED(v6src
));
1852 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(v6src
));
1854 match_flags
= MATCH_IRE_ZONEONLY
;
1856 ill
= ill_lookup_on_ifindex(scopeid
, B_TRUE
, ipst
);
1859 match_flags
|= MATCH_IRE_ILL
;
1862 src_ire
= ire_ftable_lookup_v6(v6src
, NULL
, NULL
, 0,
1863 ill
, zoneid
, match_flags
, 0, ipst
, NULL
);
1868 * If an address other than in6addr_any is requested,
1869 * we verify that it is a valid address for bind
1870 * Note: Following code is in if-else-if form for
1871 * readability compared to a condition check.
1873 if (src_ire
!= NULL
&& (src_ire
->ire_type
& (IRE_LOCAL
|IRE_LOOPBACK
))) {
1875 * (2) Bind to address of local UP interface
1877 ire_refrele(src_ire
);
1878 return (IPVL_UNICAST_UP
);
1879 } else if (IN6_IS_ADDR_MULTICAST(v6src
)) {
1880 /* (4) bind to multicast address. */
1881 if (src_ire
!= NULL
)
1882 ire_refrele(src_ire
);
1885 * Note: caller should take IPV6_MULTICAST_IF
1886 * into account when selecting a real source address.
1889 return (IPVL_MCAST
);
1896 * (3) Bind to address of local DOWN interface?
1897 * (ipif_lookup_addr() looks up all interfaces
1898 * but we do not get here for UP interfaces
1901 if (src_ire
!= NULL
)
1902 ire_refrele(src_ire
);
1904 ipif
= ipif_lookup_addr_v6(v6src
, NULL
, zoneid
, ipst
);
1908 /* Not a useful source? */
1909 if (ipif
->ipif_flags
& (IPIF_NOLOCAL
| IPIF_ANYCAST
)) {
1914 return (IPVL_UNICAST_DOWN
);
1919 * Verify that both the source and destination addresses are valid. If
1920 * IPDF_VERIFY_DST is not set, then the destination address may be unreachable,
1921 * i.e. have no route to it. Protocols like TCP want to verify destination
1922 * reachability, while tunnels do not.
1924 * Determine the route, the interface, and (optionally) the source address
1925 * to use to reach a given destination.
1926 * Note that we allow connect to broadcast and multicast addresses when
1927 * IPDF_ALLOW_MCBC is set.
1928 * first_hop and dst_addr are normally the same, but if source routing
1929 * they will differ; in that case the first_hop is what we'll use for the
1930 * routing lookup but the dce checks will be done on dst_addr,
1932 * If uinfo is set, then we fill in the best available information
1933 * we have for the destination. This is based on (in priority order) any
1934 * metrics and path MTU stored in a dce_t, route metrics, and finally the
1935 * ill_mtu/ill_mc_mtu.
1937 * Assumes that the caller has set ixa_scopeid for link-local communication.
1940 ip_set_destination_v6(in6_addr_t
*src_addrp
, const in6_addr_t
*dst_addr
,
1941 const in6_addr_t
*firsthop
, ip_xmit_attr_t
*ixa
, iulp_t
*uinfo
,
1946 in6_addr_t setsrc
; /* RTF_SETSRC */
1947 zoneid_t zoneid
= ixa
->ixa_zoneid
; /* Honors SO_ALLZONES */
1948 ip_stack_t
*ipst
= ixa
->ixa_ipst
;
1955 boolean_t multirt
= B_FALSE
;
1957 ASSERT(!IN6_IS_ADDR_V4MAPPED(dst_addr
));
1959 ASSERT(!(ixa
->ixa_flags
& IXAF_IS_IPV4
));
1962 * We never send to zero; the ULPs map it to the loopback address.
1963 * We can't allow it since we use zero to mean unitialized in some
1966 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(dst_addr
));
1968 setsrc
= ipv6_all_zeros
;
1970 * Select a route; For IPMP interfaces, we would only select
1971 * a "hidden" route (i.e., going through a specific under_ill)
1972 * if ixa_ifindex has been specified.
1974 ire
= ip_select_route_v6(firsthop
, *src_addrp
, ixa
, &generation
,
1975 &setsrc
, &error
, &multirt
);
1976 ASSERT(ire
!= NULL
); /* IRE_NOROUTE if none found */
1981 * ire can't be a broadcast or multicast unless IPDF_ALLOW_MCBC is set.
1982 * If IPDF_VERIFY_DST is set, the destination must be reachable.
1983 * Otherwise the destination needn't be reachable.
1985 * If we match on a reject or black hole, then we've got a
1986 * local failure. May as well fail out the connect() attempt,
1987 * since it's never going to succeed.
1989 if (ire
->ire_flags
& (RTF_REJECT
|RTF_BLACKHOLE
)) {
1991 * If we're verifying destination reachability, we always want
1994 * If we're not verifying destination reachability but the
1995 * destination has a route, we still want to fail on the
1996 * temporary address and broadcast address tests.
1998 * In both cases do we let the code continue so some reasonable
1999 * information is returned to the caller. That enables the
2000 * caller to use (and even cache) the IRE. conn_ip_ouput will
2001 * use the generation mismatch path to check for the unreachable
2002 * case thereby avoiding any specific check in the main path.
2004 ASSERT(generation
== IRE_GENERATION_VERIFY
);
2005 if (flags
& IPDF_VERIFY_DST
) {
2007 * Set errno but continue to set up ixa_ire to be
2008 * the RTF_REJECT|RTF_BLACKHOLE IRE.
2009 * That allows callers to use ip_output to get an
2012 if (!(ire
->ire_type
& IRE_HOST
))
2013 error
= ENETUNREACH
;
2015 error
= EHOSTUNREACH
;
2019 if ((ire
->ire_type
& (IRE_BROADCAST
|IRE_MULTICAST
)) &&
2020 !(flags
& IPDF_ALLOW_MCBC
)) {
2022 ire
= ire_reject(ipst
, B_FALSE
);
2023 generation
= IRE_GENERATION_VERIFY
;
2024 error
= ENETUNREACH
;
2028 if (ixa
->ixa_ire
!= NULL
)
2029 ire_refrele_notr(ixa
->ixa_ire
);
2031 ire_refhold_notr(ire
);
2035 ixa
->ixa_ire_generation
= generation
;
2038 * Ensure that ixa_dce is always set any time that ixa_ire is set,
2039 * since some callers will send a packet to conn_ip_output() even if
2043 if (IN6_IS_ADDR_LINKSCOPE(dst_addr
)) {
2044 /* If we are creating a DCE we'd better have an ifindex */
2046 ifindex
= ill
->ill_phyint
->phyint_ifindex
;
2048 flags
&= ~IPDF_UNIQUE_DCE
;
2051 if (flags
& IPDF_UNIQUE_DCE
) {
2052 /* Fallback to the default dce if allocation fails */
2053 dce
= dce_lookup_and_add_v6(dst_addr
, ifindex
, ipst
);
2055 generation
= dce
->dce_generation
;
2057 dce
= dce_lookup_v6(dst_addr
, ifindex
, ipst
,
2061 dce
= dce_lookup_v6(dst_addr
, ifindex
, ipst
, &generation
);
2063 ASSERT(dce
!= NULL
);
2064 if (ixa
->ixa_dce
!= NULL
)
2065 dce_refrele_notr(ixa
->ixa_dce
);
2067 dce_refhold_notr(dce
);
2071 ixa
->ixa_dce_generation
= generation
;
2075 * For multicast with multirt we have a flag passed back from
2076 * ire_lookup_multi_ill_v6 since we don't have an IRE for each
2077 * possible multicast address.
2078 * We also need a flag for multicast since we can't check
2079 * whether RTF_MULTIRT is set in ixa_ire for multicast.
2082 ixa
->ixa_postfragfn
= ip_postfrag_multirt_v6
;
2083 ixa
->ixa_flags
|= IXAF_MULTIRT_MULTICAST
;
2085 ixa
->ixa_postfragfn
= ire
->ire_postfragfn
;
2086 ixa
->ixa_flags
&= ~IXAF_MULTIRT_MULTICAST
;
2088 if (!(ire
->ire_flags
& (RTF_REJECT
|RTF_BLACKHOLE
))) {
2089 /* Get an nce to cache. */
2090 nce
= ire_to_nce(ire
, NULL
, firsthop
);
2092 /* Allocation failure? */
2093 ixa
->ixa_ire_generation
= IRE_GENERATION_VERIFY
;
2095 if (ixa
->ixa_nce
!= NULL
)
2096 nce_refrele(ixa
->ixa_nce
);
2102 * If the source address is a loopback address, the
2103 * destination had best be local or multicast.
2104 * If we are sending to an IRE_LOCAL using a loopback source then
2105 * it had better be the same zoneid.
2107 if (IN6_IS_ADDR_LOOPBACK(src_addrp
)) {
2108 if ((ire
->ire_type
& IRE_LOCAL
) && ire
->ire_zoneid
!= zoneid
) {
2109 ire
= NULL
; /* Stored in ixa_ire */
2110 error
= EADDRNOTAVAIL
;
2113 if (!(ire
->ire_type
& (IRE_LOOPBACK
|IRE_LOCAL
|IRE_MULTICAST
))) {
2114 ire
= NULL
; /* Stored in ixa_ire */
2115 error
= EADDRNOTAVAIL
;
2121 * Does the caller want us to pick a source address?
2123 if (flags
& IPDF_SELECT_SRC
) {
2124 in6_addr_t src_addr
;
2127 * We use use ire_nexthop_ill to avoid the under ipmp
2128 * interface for source address selection. Note that for ipmp
2129 * probe packets, ixa_ifindex would have been specified, and
2130 * the ip_select_route() invocation would have picked an ire
2131 * will ire_ill pointing at an under interface.
2133 ill
= ire_nexthop_ill(ire
);
2135 /* If unreachable we have no ill but need some source */
2137 src_addr
= ipv6_loopback
;
2138 /* Make sure we look for a better source address */
2139 generation
= SRC_GENERATION_VERIFY
;
2141 error
= ip_select_source_v6(ill
, &setsrc
, dst_addr
,
2142 zoneid
, ipst
, B_FALSE
, ixa
->ixa_src_preferences
,
2143 &src_addr
, &generation
, NULL
);
2145 ire
= NULL
; /* Stored in ixa_ire */
2151 * We allow the source address to to down.
2152 * However, we check that we don't use the loopback address
2153 * as a source when sending out on the wire.
2155 if (IN6_IS_ADDR_LOOPBACK(&src_addr
) &&
2156 !(ire
->ire_type
& (IRE_LOCAL
|IRE_LOOPBACK
|IRE_MULTICAST
)) &&
2157 !(ire
->ire_flags
& (RTF_REJECT
|RTF_BLACKHOLE
))) {
2158 ire
= NULL
; /* Stored in ixa_ire */
2159 error
= EADDRNOTAVAIL
;
2163 *src_addrp
= src_addr
;
2164 ixa
->ixa_src_generation
= generation
;
2168 * Make sure we don't leave an unreachable ixa_nce in place
2169 * since ip_select_route is used when we unplumb i.e., remove
2170 * references on ixa_ire, ixa_nce, and ixa_dce.
2173 if (nce
!= NULL
&& nce
->nce_is_condemned
) {
2175 ixa
->ixa_nce
= NULL
;
2176 ixa
->ixa_ire_generation
= IRE_GENERATION_VERIFY
;
2180 * Note that IPv6 multicast supports PMTU discovery unlike IPv4
2181 * multicast. But pmtu discovery is only enabled for connected
2182 * sockets in general.
2186 * Set initial value for fragmentation limit. Either conn_ip_output
2187 * or ULP might updates it when there are routing changes.
2188 * Handles a NULL ixa_ire->ire_ill or a NULL ixa_nce for RTF_REJECT.
2190 pmtu
= ip_get_pmtu(ixa
);
2191 ixa
->ixa_fragsize
= pmtu
;
2192 /* Make sure ixa_fragsize and ixa_pmtu remain identical */
2193 if (ixa
->ixa_flags
& IXAF_VERIFY_PMTU
)
2194 ixa
->ixa_pmtu
= pmtu
;
2197 * Extract information useful for some transports.
2198 * First we look for DCE metrics. Then we take what we have in
2199 * the metrics in the route, where the offlink is used if we have
2202 if (uinfo
!= NULL
) {
2203 bzero(uinfo
, sizeof (*uinfo
));
2205 if (dce
->dce_flags
& DCEF_UINFO
)
2206 *uinfo
= dce
->dce_uinfo
;
2208 rts_merge_metrics(uinfo
, &ire
->ire_metrics
);
2210 /* Allow ire_metrics to decrease the path MTU from above */
2211 if (uinfo
->iulp_mtu
== 0 || uinfo
->iulp_mtu
> pmtu
)
2212 uinfo
->iulp_mtu
= pmtu
;
2214 uinfo
->iulp_localnet
= (ire
->ire_type
& IRE_ONLINK
) != 0;
2215 uinfo
->iulp_loopback
= (ire
->ire_type
& IRE_LOOPBACK
) != 0;
2216 uinfo
->iulp_local
= (ire
->ire_type
& IRE_LOCAL
) != 0;
2232 * Make sure we don't leave an unreachable ixa_nce in place
2233 * since ip_select_route is used when we unplumb i.e., remove
2234 * references on ixa_ire, ixa_nce, and ixa_dce.
2237 if (nce
!= NULL
&& nce
->nce_is_condemned
) {
2239 ixa
->ixa_nce
= NULL
;
2240 ixa
->ixa_ire_generation
= IRE_GENERATION_VERIFY
;
2247 * Handle protocols with which IP is less intimate. There
2248 * can be more than one stream bound to a particular
2249 * protocol. When this is the case, normally each one gets a copy
2250 * of any incoming packets.
2253 * Packets will be distributed to conns in all zones. This is really only
2254 * useful for ICMPv6 as only applications in the global zone can create raw
2255 * sockets for other protocols.
2258 ip_fanout_proto_v6(mblk_t
*mp
, ip6_t
*ip6h
, ip_recv_attr_t
*ira
)
2261 in6_addr_t laddr
= ip6h
->ip6_dst
;
2262 conn_t
*connp
, *first_connp
, *next_connp
;
2264 ill_t
*ill
= ira
->ira_ill
;
2265 ip_stack_t
*ipst
= ill
->ill_ipst
;
2267 connfp
= &ipst
->ips_ipcl_proto_fanout_v6
[ira
->ira_protocol
];
2268 mutex_enter(&connfp
->connf_lock
);
2269 connp
= connfp
->connf_head
;
2270 for (connp
= connfp
->connf_head
; connp
!= NULL
;
2271 connp
= connp
->conn_next
) {
2272 /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
2273 if (IPCL_PROTO_MATCH_V6(connp
, ira
, ip6h
))
2277 if (connp
== NULL
) {
2279 * No one bound to this port. Is
2280 * there a client that wants all
2281 * unclaimed datagrams?
2283 mutex_exit(&connfp
->connf_lock
);
2284 ip_fanout_send_icmp_v6(mp
, ICMP6_PARAM_PROB
,
2285 ICMP6_PARAMPROB_NEXTHEADER
, ira
);
2289 ASSERT(IPCL_IS_NONSTR(connp
) || connp
->conn_rq
!= NULL
);
2291 CONN_INC_REF(connp
);
2292 first_connp
= connp
;
2295 * XXX: Fix the multiple protocol listeners case. We should not
2296 * be walking the conn->conn_next list here.
2298 connp
= connp
->conn_next
;
2300 while (connp
!= NULL
) {
2301 /* Note: IPCL_PROTO_MATCH_V6 includes conn_wantpacket */
2302 if (IPCL_PROTO_MATCH_V6(connp
, ira
, ip6h
))
2304 connp
= connp
->conn_next
;
2307 if (connp
== NULL
) {
2308 /* No more interested clients */
2309 connp
= first_connp
;
2312 if (((mp1
= dupmsg(mp
)) == NULL
) &&
2313 ((mp1
= copymsg(mp
)) == NULL
)) {
2314 /* Memory allocation failed */
2315 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
2316 ip_drop_input("ipIfStatsInDiscards", mp
, ill
);
2317 connp
= first_connp
;
2321 CONN_INC_REF(connp
);
2322 mutex_exit(&connfp
->connf_lock
);
2324 ip_fanout_proto_conn(connp
, mp1
, NULL
, (ip6_t
*)mp1
->b_rptr
,
2327 mutex_enter(&connfp
->connf_lock
);
2328 /* Follow the next pointer before releasing the conn. */
2329 next_connp
= connp
->conn_next
;
2330 CONN_DEC_REF(connp
);
2334 /* Last one. Send it upstream. */
2335 mutex_exit(&connfp
->connf_lock
);
2337 ip_fanout_proto_conn(connp
, mp
, NULL
, ip6h
, ira
);
2339 CONN_DEC_REF(connp
);
2343 * Called when it is conceptually a ULP that would sent the packet
2344 * e.g., port unreachable and nexthdr unknown. Check that the packet
2345 * would have passed the IPsec global policy before sending the error.
2347 * Send an ICMP error after patching up the packet appropriately.
2348 * Uses ip_drop_input and bumps the appropriate MIB.
2349 * For ICMP6_PARAMPROB_NEXTHEADER we determine the offset to use.
2352 ip_fanout_send_icmp_v6(mblk_t
*mp
, uint_t icmp_type
, uint8_t icmp_code
,
2353 ip_recv_attr_t
*ira
)
2357 ill_t
*ill
= ira
->ira_ill
;
2358 ip_stack_t
*ipst
= ill
->ill_ipst
;
2359 netstack_t
*ns
= ipst
->ips_netstack
;
2360 ipsec_stack_t
*ipss
= ns
->netstack_ipsec
;
2362 secure
= ira
->ira_flags
& IRAF_IPSEC_SECURE
;
2365 * We are generating an icmp error for some inbound packet.
2366 * Called from all ip_fanout_(udp, tcp, proto) functions.
2367 * Before we generate an error, check with global policy
2368 * to see whether this is allowed to enter the system. As
2369 * there is no "conn", we are checking with global policy.
2371 ip6h
= (ip6_t
*)mp
->b_rptr
;
2372 if (secure
|| ipss
->ipsec_inbound_v6_policy_present
) {
2373 mp
= ipsec_check_global_policy(mp
, NULL
, NULL
, ip6h
, ira
, ns
);
2378 /* We never send errors for protocols that we do implement */
2379 if (ira
->ira_protocol
== IPPROTO_ICMPV6
) {
2380 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
2381 ip_drop_input("ip_fanout_send_icmp_v6", mp
, ill
);
2386 switch (icmp_type
) {
2387 case ICMP6_DST_UNREACH
:
2388 ASSERT(icmp_code
== ICMP6_DST_UNREACH_NOPORT
);
2390 BUMP_MIB(ill
->ill_ip_mib
, udpIfStatsNoPorts
);
2391 ip_drop_input("ipIfStatsNoPorts", mp
, ill
);
2393 icmp_unreachable_v6(mp
, icmp_code
, B_FALSE
, ira
);
2395 case ICMP6_PARAM_PROB
:
2396 ASSERT(icmp_code
== ICMP6_PARAMPROB_NEXTHEADER
);
2398 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInUnknownProtos
);
2399 ip_drop_input("ipIfStatsInUnknownProtos", mp
, ill
);
2401 /* Let the system determine the offset for this one */
2402 icmp_param_problem_nexthdr_v6(mp
, B_FALSE
, ira
);
2406 panic("ip_fanout_send_icmp_v6: wrong type");
2416 * Fanout for UDP packets that are multicast or ICMP errors.
2417 * (Unicast fanout is handled in ip_input_v6.)
2419 * If SO_REUSEADDR is set all multicast packets
2420 * will be delivered to all conns bound to the same port.
2422 * Fanout for UDP packets.
2423 * The caller puts <fport, lport> in the ports parameter.
2424 * ire_type must be IRE_BROADCAST for multicast and broadcast packets.
2426 * If SO_REUSEADDR is set all multicast and broadcast packets
2427 * will be delivered to all conns bound to the same port.
2430 * Earlier in ip_input on a system with multiple shared-IP zones we
2431 * duplicate the multicast and broadcast packets and send them up
2432 * with each explicit zoneid that exists on that ill.
2433 * This means that here we can match the zoneid with SO_ALLZONES being special.
2436 ip_fanout_udp_multi_v6(mblk_t
*mp
, ip6_t
*ip6h
, uint16_t lport
, uint16_t fport
,
2437 ip_recv_attr_t
*ira
)
2443 ill_t
*ill
= ira
->ira_ill
;
2444 ip_stack_t
*ipst
= ill
->ill_ipst
;
2446 ASSERT(ira
->ira_flags
& (IRAF_MULTIBROADCAST
|IRAF_ICMP_ERROR
));
2448 laddr
= ip6h
->ip6_dst
;
2449 faddr
= ip6h
->ip6_src
;
2451 /* Attempt to find a client stream based on destination port. */
2452 connfp
= &ipst
->ips_ipcl_udp_fanout
[IPCL_UDP_HASH(lport
, ipst
)];
2453 mutex_enter(&connfp
->connf_lock
);
2454 connp
= connfp
->connf_head
;
2455 while (connp
!= NULL
) {
2456 if ((IPCL_UDP_MATCH_V6(connp
, lport
, laddr
, fport
, faddr
)) &&
2457 conn_wantpacket_v6(connp
, ira
, ip6h
))
2459 connp
= connp
->conn_next
;
2465 CONN_INC_REF(connp
);
2467 if (connp
->conn_reuseaddr
) {
2468 conn_t
*first_connp
= connp
;
2472 connp
= connp
->conn_next
;
2474 while (connp
!= NULL
) {
2475 if (IPCL_UDP_MATCH_V6(connp
, lport
, laddr
,
2477 conn_wantpacket_v6(connp
, ira
, ip6h
))
2479 connp
= connp
->conn_next
;
2481 if (connp
== NULL
) {
2482 /* No more interested clients */
2483 connp
= first_connp
;
2486 if (((mp1
= dupmsg(mp
)) == NULL
) &&
2487 ((mp1
= copymsg(mp
)) == NULL
)) {
2488 /* Memory allocation failed */
2489 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
2490 ip_drop_input("ipIfStatsInDiscards", mp
, ill
);
2491 connp
= first_connp
;
2495 CONN_INC_REF(connp
);
2496 mutex_exit(&connfp
->connf_lock
);
2498 IP6_STAT(ipst
, ip6_udp_fanmb
);
2499 ip_fanout_udp_conn(connp
, mp1
, NULL
,
2500 (ip6_t
*)mp1
->b_rptr
, ira
);
2502 mutex_enter(&connfp
->connf_lock
);
2503 /* Follow the next pointer before releasing the conn. */
2504 next_connp
= connp
->conn_next
;
2505 IP6_STAT(ipst
, ip6_udp_fanmb
);
2506 CONN_DEC_REF(connp
);
2511 /* Last one. Send it upstream. */
2512 mutex_exit(&connfp
->connf_lock
);
2514 IP6_STAT(ipst
, ip6_udp_fanmb
);
2515 ip_fanout_udp_conn(connp
, mp
, NULL
, ip6h
, ira
);
2516 CONN_DEC_REF(connp
);
2520 mutex_exit(&connfp
->connf_lock
);
2522 * No one bound to this port. Is
2523 * there a client that wants all
2524 * unclaimed datagrams?
2526 if (ipst
->ips_ipcl_proto_fanout_v6
[IPPROTO_UDP
].connf_head
!= NULL
) {
2527 ASSERT(ira
->ira_protocol
== IPPROTO_UDP
);
2528 ip_fanout_proto_v6(mp
, ip6h
, ira
);
2530 ip_fanout_send_icmp_v6(mp
, ICMP6_DST_UNREACH
,
2531 ICMP6_DST_UNREACH_NOPORT
, ira
);
2536 * int ip_find_hdr_v6()
2538 * This routine is used by the upper layer protocols, iptun, and IPsec:
2539 * - Set extension header pointers to appropriate locations
2540 * - Determine IPv6 header length and return it
2541 * - Return a pointer to the last nexthdr value
2543 * The caller must initialize ipp_fields.
2545 * NOTE: If multiple extension headers of the same type are present,
2546 * ip_find_hdr_v6() will set the respective extension header pointers
2547 * to the first one that it encounters in the IPv6 header. It also
2548 * skips fragment headers. This routine deals with malformed packets
2549 * of various sorts in which case the returned length is up to the
2553 ip_find_hdr_v6(mblk_t
*mp
, ip6_t
*ip6h
, ip_pkt_t
*ipp
, uint8_t *nexthdrp
)
2555 uint_t length
, ehdrlen
;
2557 uint8_t *whereptr
, *endptr
;
2558 ip6_dest_t
*tmpdstopts
;
2559 ip6_rthdr_t
*tmprthdr
;
2560 ip6_hbh_t
*tmphopopts
;
2561 ip6_frag_t
*tmpfraghdr
;
2563 ipp
->ipp_fields
|= IPPF_HOPLIMIT
| IPPF_TCLASS
| IPPF_ADDR
;
2564 ipp
->ipp_hoplimit
= ip6h
->ip6_hops
;
2565 ipp
->ipp_tclass
= IPV6_FLOW_TCLASS(ip6h
->ip6_flow
);
2566 ipp
->ipp_addr
= ip6h
->ip6_dst
;
2568 length
= IPV6_HDR_LEN
;
2569 whereptr
= ((uint8_t *)&ip6h
[1]); /* point to next hdr */
2570 endptr
= mp
->b_wptr
;
2572 nexthdr
= ip6h
->ip6_nxt
;
2573 while (whereptr
< endptr
) {
2574 /* Is there enough left for len + nexthdr? */
2575 if (whereptr
+ MIN_EHDR_LEN
> endptr
)
2579 case IPPROTO_HOPOPTS
: {
2580 tmphopopts
= (ip6_hbh_t
*)whereptr
;
2581 ehdrlen
= 8 * (tmphopopts
->ip6h_len
+ 1);
2582 if ((uchar_t
*)tmphopopts
+ ehdrlen
> endptr
)
2584 nexthdr
= tmphopopts
->ip6h_nxt
;
2586 /* return only 1st hbh */
2587 if (!(ipp
->ipp_fields
& IPPF_HOPOPTS
)) {
2588 ipp
->ipp_fields
|= IPPF_HOPOPTS
;
2589 ipp
->ipp_hopopts
= (ip6_hbh_t
*)whereptr
;
2590 ipp
->ipp_hopoptslen
= ehdrlen
;
2594 case IPPROTO_DSTOPTS
:
2595 tmpdstopts
= (ip6_dest_t
*)whereptr
;
2596 ehdrlen
= 8 * (tmpdstopts
->ip6d_len
+ 1);
2597 if ((uchar_t
*)tmpdstopts
+ ehdrlen
> endptr
)
2599 nexthdr
= tmpdstopts
->ip6d_nxt
;
2601 * ipp_dstopts is set to the destination header after a
2603 * Assume it is a post-rthdr destination header
2604 * and adjust when we find an rthdr.
2606 if (!(ipp
->ipp_fields
& IPPF_DSTOPTS
)) {
2607 ipp
->ipp_fields
|= IPPF_DSTOPTS
;
2608 ipp
->ipp_dstopts
= tmpdstopts
;
2609 ipp
->ipp_dstoptslen
= ehdrlen
;
2612 case IPPROTO_ROUTING
:
2613 tmprthdr
= (ip6_rthdr_t
*)whereptr
;
2614 ehdrlen
= 8 * (tmprthdr
->ip6r_len
+ 1);
2615 if ((uchar_t
*)tmprthdr
+ ehdrlen
> endptr
)
2617 nexthdr
= tmprthdr
->ip6r_nxt
;
2618 /* return only 1st rthdr */
2619 if (!(ipp
->ipp_fields
& IPPF_RTHDR
)) {
2620 ipp
->ipp_fields
|= IPPF_RTHDR
;
2621 ipp
->ipp_rthdr
= tmprthdr
;
2622 ipp
->ipp_rthdrlen
= ehdrlen
;
2625 * Make any destination header we've seen be a
2626 * pre-rthdr destination header.
2628 if (ipp
->ipp_fields
& IPPF_DSTOPTS
) {
2629 ipp
->ipp_fields
&= ~IPPF_DSTOPTS
;
2630 ipp
->ipp_fields
|= IPPF_RTHDRDSTOPTS
;
2631 ipp
->ipp_rthdrdstopts
= ipp
->ipp_dstopts
;
2632 ipp
->ipp_dstopts
= NULL
;
2633 ipp
->ipp_rthdrdstoptslen
= ipp
->ipp_dstoptslen
;
2634 ipp
->ipp_dstoptslen
= 0;
2637 case IPPROTO_FRAGMENT
:
2638 tmpfraghdr
= (ip6_frag_t
*)whereptr
;
2639 ehdrlen
= sizeof (ip6_frag_t
);
2640 if ((uchar_t
*)tmpfraghdr
+ ehdrlen
> endptr
)
2642 nexthdr
= tmpfraghdr
->ip6f_nxt
;
2643 if (!(ipp
->ipp_fields
& IPPF_FRAGHDR
)) {
2644 ipp
->ipp_fields
|= IPPF_FRAGHDR
;
2645 ipp
->ipp_fraghdr
= tmpfraghdr
;
2646 ipp
->ipp_fraghdrlen
= ehdrlen
;
2654 whereptr
+= ehdrlen
;
2657 if (nexthdrp
!= NULL
)
2658 *nexthdrp
= nexthdr
;
2663 * Try to determine where and what are the IPv6 header length and
2664 * pointer to nexthdr value for the upper layer protocol (or an
2665 * unknown next hdr).
2667 * Parameters returns a pointer to the nexthdr value;
2668 * Must handle malformed packets of various sorts.
2669 * Function returns failure for malformed cases.
2672 ip_hdr_length_nexthdr_v6(mblk_t
*mp
, ip6_t
*ip6h
, uint16_t *hdr_length_ptr
,
2673 uint8_t **nexthdrpp
)
2680 ip6_dest_t
*desthdr
;
2682 ip6_frag_t
*fraghdr
;
2684 ASSERT(IPH_HDR_VERSION(ip6h
) == IPV6_VERSION
);
2685 length
= IPV6_HDR_LEN
;
2686 whereptr
= ((uint8_t *)&ip6h
[1]); /* point to next hdr */
2687 endptr
= mp
->b_wptr
;
2689 nexthdrp
= &ip6h
->ip6_nxt
;
2690 while (whereptr
< endptr
) {
2691 /* Is there enough left for len + nexthdr? */
2692 if (whereptr
+ MIN_EHDR_LEN
> endptr
)
2695 switch (*nexthdrp
) {
2696 case IPPROTO_HOPOPTS
:
2697 case IPPROTO_DSTOPTS
:
2698 /* Assumes the headers are identical for hbh and dst */
2699 desthdr
= (ip6_dest_t
*)whereptr
;
2700 ehdrlen
= 8 * (desthdr
->ip6d_len
+ 1);
2701 if ((uchar_t
*)desthdr
+ ehdrlen
> endptr
)
2703 nexthdrp
= &desthdr
->ip6d_nxt
;
2705 case IPPROTO_ROUTING
:
2706 rthdr
= (ip6_rthdr_t
*)whereptr
;
2707 ehdrlen
= 8 * (rthdr
->ip6r_len
+ 1);
2708 if ((uchar_t
*)rthdr
+ ehdrlen
> endptr
)
2710 nexthdrp
= &rthdr
->ip6r_nxt
;
2712 case IPPROTO_FRAGMENT
:
2713 fraghdr
= (ip6_frag_t
*)whereptr
;
2714 ehdrlen
= sizeof (ip6_frag_t
);
2715 if ((uchar_t
*)&fraghdr
[1] > endptr
)
2717 nexthdrp
= &fraghdr
->ip6f_nxt
;
2720 /* No next header means we're finished */
2722 *hdr_length_ptr
= length
;
2723 *nexthdrpp
= nexthdrp
;
2727 whereptr
+= ehdrlen
;
2728 *hdr_length_ptr
= length
;
2729 *nexthdrpp
= nexthdrp
;
2731 switch (*nexthdrp
) {
2732 case IPPROTO_HOPOPTS
:
2733 case IPPROTO_DSTOPTS
:
2734 case IPPROTO_ROUTING
:
2735 case IPPROTO_FRAGMENT
:
2737 * If any know extension headers are still to be processed,
2738 * the packet's malformed (or at least all the IP header(s) are
2739 * not in the same mblk - and that should never happen.
2745 * If we get here, we know that all of the IP headers were in
2746 * the same mblk, even if the ULP header is in the next mblk.
2748 *hdr_length_ptr
= length
;
2749 *nexthdrpp
= nexthdrp
;
2755 * Return the length of the IPv6 related headers (including extension headers)
2756 * Returns a length even if the packet is malformed.
2759 ip_hdr_length_v6(mblk_t
*mp
, ip6_t
*ip6h
)
2764 (void) ip_hdr_length_nexthdr_v6(mp
, ip6h
, &hdr_len
, &nexthdrp
);
2769 * Parse and process any hop-by-hop or destination options.
2771 * Assumes that q is an ill read queue so that ICMP errors for link-local
2772 * destinations are sent out the correct interface.
2774 * Returns -1 if there was an error and mp has been consumed.
2775 * Returns 0 if no special action is needed.
2776 * Returns 1 if the packet contained a router alert option for this node
2777 * which is verified to be "interesting/known" for our implementation.
2779 * XXX Note: In future as more hbh or dest options are defined,
2780 * it may be better to have different routines for hbh and dest
2781 * options as opt_type fields other than IP6OPT_PAD1 and IP6OPT_PADN
2782 * may have same value in different namespaces. Or is it same namespace ??
2783 * Current code checks for each opt_type (other than pads) if it is in
2784 * the expected nexthdr (hbh or dest)
2787 ip_process_options_v6(mblk_t
*mp
, ip6_t
*ip6h
,
2788 uint8_t *optptr
, uint_t optlen
, uint8_t hdr_type
, ip_recv_attr_t
*ira
)
2793 const char *errtype
;
2794 ill_t
*ill
= ira
->ira_ill
;
2795 ip_stack_t
*ipst
= ill
->ill_ipst
;
2797 while (optlen
!= 0) {
2799 if (opt_type
== IP6OPT_PAD1
) {
2804 errtype
= "malformed";
2808 * Note:We don't verify that (N-2) pad octets
2809 * are zero as required by spec. Adhere to
2810 * "be liberal in what you accept..." part of
2811 * implementation philosophy (RFC791,RFC1122)
2813 optused
= 2 + optptr
[1];
2814 if (optused
> optlen
)
2819 if (hdr_type
!= IPPROTO_HOPOPTS
)
2821 goto opt_error
; /* XXX Not implemented! */
2823 case IP6OPT_ROUTER_ALERT
: {
2824 struct ip6_opt_router
*or;
2826 if (hdr_type
!= IPPROTO_HOPOPTS
)
2828 optused
= 2 + optptr
[1];
2829 if (optused
> optlen
)
2831 or = (struct ip6_opt_router
*)optptr
;
2832 /* Check total length and alignment */
2833 if (optused
!= sizeof (*or) ||
2834 ((uintptr_t)or->ip6or_value
& 0x1) != 0)
2837 switch (*((uint16_t *)or->ip6or_value
)) {
2839 case IP6_ALERT_RSVP
:
2844 case IP6OPT_HOME_ADDRESS
: {
2846 * Minimal support for the home address option
2847 * (which is required by all IPv6 nodes).
2848 * Implement by just swapping the home address
2849 * and source address.
2850 * XXX Note: this has IPsec implications since
2851 * AH needs to take this into account.
2852 * Also, when IPsec is used we need to ensure
2853 * that this is only processed once
2854 * in the received packet (to avoid swapping
2856 * NOTE:This option processing is considered
2857 * to be unsafe and prone to a denial of
2859 * The current processing is not safe even with
2860 * IPsec secured IP packets. Since the home
2861 * address option processing requirement still
2862 * is in the IETF draft and in the process of
2863 * being redefined for its usage, it has been
2864 * decided to turn off the option by default.
2865 * If this section of code needs to be executed,
2866 * ndd variable ip6_ignore_home_address_opt
2867 * should be set to 0 at the user's own risk.
2869 struct ip6_opt_home_address
*oh
;
2872 if (ipst
->ips_ipv6_ignore_home_address_opt
)
2875 if (hdr_type
!= IPPROTO_DSTOPTS
)
2877 optused
= 2 + optptr
[1];
2878 if (optused
> optlen
)
2882 * We did this dest. opt the first time
2883 * around (i.e. before AH processing).
2884 * If we've done AH... stop now.
2886 if ((ira
->ira_flags
& IRAF_IPSEC_SECURE
) &&
2887 ira
->ira_ipsec_ah_sa
!= NULL
)
2890 oh
= (struct ip6_opt_home_address
*)optptr
;
2891 /* Check total length and alignment */
2892 if (optused
< sizeof (*oh
) ||
2893 ((uintptr_t)oh
->ip6oh_addr
& 0x7) != 0)
2895 /* Swap ip6_src and the home address */
2896 tmp
= ip6h
->ip6_src
;
2897 /* XXX Note: only 8 byte alignment option */
2898 ip6h
->ip6_src
= *(in6_addr_t
*)oh
->ip6oh_addr
;
2899 *(in6_addr_t
*)oh
->ip6oh_addr
= tmp
;
2903 case IP6OPT_TUNNEL_LIMIT
:
2904 if (hdr_type
!= IPPROTO_DSTOPTS
) {
2907 optused
= 2 + optptr
[1];
2908 if (optused
> optlen
) {
2917 errtype
= "unknown";
2920 /* Determine which zone should send error */
2921 switch (IP6OPT_TYPE(opt_type
)) {
2922 case IP6OPT_TYPE_SKIP
:
2923 optused
= 2 + optptr
[1];
2924 if (optused
> optlen
)
2926 ip1dbg(("ip_process_options_v6: %s "
2927 "opt 0x%x skipped\n",
2928 errtype
, opt_type
));
2930 case IP6OPT_TYPE_DISCARD
:
2931 ip1dbg(("ip_process_options_v6: %s "
2932 "opt 0x%x; packet dropped\n",
2933 errtype
, opt_type
));
2934 BUMP_MIB(ill
->ill_ip_mib
,
2935 ipIfStatsInHdrErrors
);
2936 ip_drop_input("ipIfStatsInHdrErrors",
2940 case IP6OPT_TYPE_ICMP
:
2941 BUMP_MIB(ill
->ill_ip_mib
,
2942 ipIfStatsInHdrErrors
);
2943 ip_drop_input("ipIfStatsInHdrErrors",
2945 icmp_param_problem_v6(mp
,
2946 ICMP6_PARAMPROB_OPTION
,
2951 case IP6OPT_TYPE_FORCEICMP
:
2952 BUMP_MIB(ill
->ill_ip_mib
,
2953 ipIfStatsInHdrErrors
);
2954 ip_drop_input("ipIfStatsInHdrErrors",
2956 icmp_param_problem_v6(mp
,
2957 ICMP6_PARAMPROB_OPTION
,
2973 /* Determine which zone should send error */
2974 ip_drop_input("ICMP_PARAM_PROBLEM", mp
, ill
);
2975 icmp_param_problem_v6(mp
, ICMP6_PARAMPROB_OPTION
,
2976 (uint32_t)(optptr
- (uint8_t *)ip6h
),
2982 * Process a routing header that is not yet empty.
2983 * Because of RFC 5095, we now reject all route headers.
2986 ip_process_rthdr(mblk_t
*mp
, ip6_t
*ip6h
, ip6_rthdr_t
*rth
,
2987 ip_recv_attr_t
*ira
)
2989 ill_t
*ill
= ira
->ira_ill
;
2990 ip_stack_t
*ipst
= ill
->ill_ipst
;
2992 ASSERT(rth
->ip6r_segleft
!= 0);
2994 if (!ipst
->ips_ipv6_forward_src_routed
) {
2995 /* XXX Check for source routed out same interface? */
2996 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsForwProhibits
);
2997 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInAddrErrors
);
2998 ip_drop_input("ipIfStatsInAddrErrors", mp
, ill
);
3003 ip_drop_input("ICMP_PARAM_PROBLEM", mp
, ill
);
3004 icmp_param_problem_v6(mp
, ICMP6_PARAMPROB_HEADER
,
3005 (uint32_t)((uchar_t
*)&rth
->ip6r_type
- (uchar_t
*)ip6h
),
3010 * Read side put procedure for IPv6 module.
3013 ip_rput_v6(queue_t
*q
, mblk_t
*mp
)
3017 ill
= (ill_t
*)q
->q_ptr
;
3018 if (ill
->ill_state_flags
& (ILL_CONDEMNED
| ILL_LL_SUBNET_PENDING
)) {
3019 union DL_primitives
*dl
;
3021 dl
= (union DL_primitives
*)mp
->b_rptr
;
3023 * Things are opening or closing - only accept DLPI
3024 * ack messages. If the stream is closing and ip_wsrv
3025 * has completed, ip_close is out of the qwait, but has
3026 * not yet completed qprocsoff. Don't proceed any further
3027 * because the ill has been cleaned up and things hanging
3028 * off the ill have been freed.
3030 if ((mp
->b_datap
->db_type
!= M_PCPROTO
) ||
3031 (dl
->dl_primitive
== DL_UNITDATA_IND
)) {
3036 if (DB_TYPE(mp
) == M_DATA
) {
3037 struct mac_header_info_s mhi
;
3039 ip_mdata_to_mhi(ill
, mp
, &mhi
);
3040 ip_input_v6(ill
, NULL
, mp
, &mhi
);
3042 ip_rput_notdata(ill
, mp
);
3047 * Walk through the IPv6 packet in mp and see if there's an AH header
3048 * in it. See if the AH header needs to get done before other headers in
3049 * the packet. (Worker function for ipsec_early_ah_v6().)
3051 #define IPSEC_HDR_DONT_PROCESS 0
3052 #define IPSEC_HDR_PROCESS 1
3053 #define IPSEC_MEMORY_ERROR 2 /* or malformed packet */
3055 ipsec_needs_processing_v6(mblk_t
*mp
, uint8_t *nexthdr
)
3062 ip6_dest_t
*desthdr
;
3067 * For now just pullup everything. In general, the less pullups,
3068 * the better, but there's so much squirrelling through anyway,
3069 * it's just easier this way.
3071 if (!pullupmsg(mp
, -1)) {
3072 return (IPSEC_MEMORY_ERROR
);
3075 ip6h
= (ip6_t
*)mp
->b_rptr
;
3076 length
= IPV6_HDR_LEN
;
3077 whereptr
= ((uint8_t *)&ip6h
[1]); /* point to next hdr */
3078 endptr
= mp
->b_wptr
;
3081 * We can't just use the argument nexthdr in the place
3082 * of nexthdrp becaue we don't dereference nexthdrp
3083 * till we confirm whether it is a valid address.
3085 nexthdrp
= &ip6h
->ip6_nxt
;
3086 while (whereptr
< endptr
) {
3087 /* Is there enough left for len + nexthdr? */
3088 if (whereptr
+ MIN_EHDR_LEN
> endptr
)
3089 return (IPSEC_MEMORY_ERROR
);
3091 switch (*nexthdrp
) {
3092 case IPPROTO_HOPOPTS
:
3093 case IPPROTO_DSTOPTS
:
3094 /* Assumes the headers are identical for hbh and dst */
3095 desthdr
= (ip6_dest_t
*)whereptr
;
3096 ehdrlen
= 8 * (desthdr
->ip6d_len
+ 1);
3097 if ((uchar_t
*)desthdr
+ ehdrlen
> endptr
)
3098 return (IPSEC_MEMORY_ERROR
);
3100 * Return DONT_PROCESS because the destination
3101 * options header may be for each hop in a
3102 * routing-header, and we only want AH if we're
3103 * finished with routing headers.
3105 if (*nexthdrp
== IPPROTO_DSTOPTS
)
3106 return (IPSEC_HDR_DONT_PROCESS
);
3107 nexthdrp
= &desthdr
->ip6d_nxt
;
3109 case IPPROTO_ROUTING
:
3110 rthdr
= (ip6_rthdr_t
*)whereptr
;
3113 * If there's more hops left on the routing header,
3114 * return now with DON'T PROCESS.
3116 if (rthdr
->ip6r_segleft
> 0)
3117 return (IPSEC_HDR_DONT_PROCESS
);
3119 ehdrlen
= 8 * (rthdr
->ip6r_len
+ 1);
3120 if ((uchar_t
*)rthdr
+ ehdrlen
> endptr
)
3121 return (IPSEC_MEMORY_ERROR
);
3122 nexthdrp
= &rthdr
->ip6r_nxt
;
3124 case IPPROTO_FRAGMENT
:
3125 /* Wait for reassembly */
3126 return (IPSEC_HDR_DONT_PROCESS
);
3128 *nexthdr
= IPPROTO_AH
;
3129 return (IPSEC_HDR_PROCESS
);
3131 /* No next header means we're finished */
3133 return (IPSEC_HDR_DONT_PROCESS
);
3136 whereptr
+= ehdrlen
;
3139 * Malformed/truncated packet.
3141 return (IPSEC_MEMORY_ERROR
);
3145 * Path for AH if options are present.
3146 * Returns NULL if the mblk was consumed.
3148 * Sometimes AH needs to be done before other IPv6 headers for security
3149 * reasons. This function (and its ipsec_needs_processing_v6() above)
3150 * indicates if that is so, and fans out to the appropriate IPsec protocol
3151 * for the datagram passed in.
3154 ipsec_early_ah_v6(mblk_t
*mp
, ip_recv_attr_t
*ira
)
3158 ill_t
*ill
= ira
->ira_ill
;
3159 ip_stack_t
*ipst
= ill
->ill_ipst
;
3160 ipsec_stack_t
*ipss
= ipst
->ips_netstack
->netstack_ipsec
;
3162 switch (ipsec_needs_processing_v6(mp
, &nexthdr
)) {
3163 case IPSEC_MEMORY_ERROR
:
3164 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
3165 ip_drop_input("ipIfStatsInDiscards", mp
, ill
);
3168 case IPSEC_HDR_DONT_PROCESS
:
3172 /* Default means send it to AH! */
3173 ASSERT(nexthdr
== IPPROTO_AH
);
3175 if (!ipsec_loaded(ipss
)) {
3176 ip_proto_not_sup(mp
, ira
);
3180 mp
= ipsec_inbound_ah_sa(mp
, ira
, &ah
);
3184 ASSERT(ira
->ira_flags
& IRAF_IPSEC_SECURE
);
3185 ASSERT(ira
->ira_ipsec_ah_sa
!= NULL
);
3186 ASSERT(ira
->ira_ipsec_ah_sa
->ipsa_input_func
!= NULL
);
3187 mp
= ira
->ira_ipsec_ah_sa
->ipsa_input_func(mp
, ah
, ira
);
3191 * Either it failed or is pending. In the former case
3192 * ipIfStatsInDiscards was increased.
3197 /* we're done with IPsec processing, send it up */
3198 ip_input_post_ipsec(mp
, ira
);
3203 * Reassemble fragment.
3204 * When it returns a completed message the first mblk will only contain
3205 * the headers prior to the fragment header, with the nexthdr value updated
3206 * to be the header after the fragment header.
3209 ip_input_fragment_v6(mblk_t
*mp
, ip6_t
*ip6h
,
3210 ip6_frag_t
*fraghdr
, uint_t remlen
, ip_recv_attr_t
*ira
)
3212 uint32_t ident
= ntohl(fraghdr
->ip6f_ident
);
3214 boolean_t more_frags
;
3215 uint8_t nexthdr
= fraghdr
->ip6f_nxt
;
3216 in6_addr_t
*v6dst_ptr
;
3217 in6_addr_t
*v6src_ptr
;
3225 uint8_t ecn_info
= 0;
3229 boolean_t pruned
= B_FALSE
;
3232 ill_t
*ill
= ira
->ira_ill
;
3233 ip_stack_t
*ipst
= ill
->ill_ipst
;
3234 uint_t prev_nexthdr_offset
;
3235 uint8_t prev_nexthdr
;
3237 uint32_t packet_size
;
3240 * We utilize hardware computed checksum info only for UDP since
3241 * IP fragmentation is a normal occurence for the protocol. In
3242 * addition, checksum offload support for IP fragments carrying
3243 * UDP payload is commonly implemented across network adapters.
3245 ASSERT(ira
->ira_rill
!= NULL
);
3246 if (nexthdr
== IPPROTO_UDP
&& dohwcksum
&&
3247 ILL_HCKSUM_CAPABLE(ira
->ira_rill
) &&
3248 (DB_CKSUMFLAGS(mp
) & (HCK_FULLCKSUM
| HCK_PARTIALCKSUM
))) {
3249 mblk_t
*mp1
= mp
->b_cont
;
3252 /* Record checksum information from the packet */
3253 sum_val
= (uint32_t)DB_CKSUM16(mp
);
3254 sum_flags
= DB_CKSUMFLAGS(mp
);
3256 /* fragmented payload offset from beginning of mblk */
3257 offset
= (uint16_t)((uchar_t
*)&fraghdr
[1] - mp
->b_rptr
);
3259 if ((sum_flags
& HCK_PARTIALCKSUM
) &&
3260 (mp1
== NULL
|| mp1
->b_cont
== NULL
) &&
3261 offset
>= DB_CKSUMSTART(mp
) &&
3262 ((len
= offset
- DB_CKSUMSTART(mp
)) & 1) == 0) {
3265 * Partial checksum has been calculated by hardware
3266 * and attached to the packet; in addition, any
3267 * prepended extraneous data is even byte aligned.
3268 * If any such data exists, we adjust the checksum;
3269 * this would also handle any postpended data.
3271 IP_ADJCKSUM_PARTIAL(mp
->b_rptr
+ DB_CKSUMSTART(mp
),
3274 /* One's complement subtract extraneous checksum */
3276 sum_val
= ~(adj
- sum_val
) & 0xFFFF;
3285 /* Clear hardware checksumming flag */
3286 DB_CKSUMFLAGS(mp
) = 0;
3289 * Determine the offset (from the begining of the IP header)
3290 * of the nexthdr value which has IPPROTO_FRAGMENT. We use
3291 * this when removing the fragment header from the packet.
3292 * This packet consists of the IPv6 header, a potential
3293 * hop-by-hop options header, a potential pre-routing-header
3294 * destination options header, and a potential routing header.
3296 prev_nexthdr_offset
= (uint8_t *)&ip6h
->ip6_nxt
- (uint8_t *)ip6h
;
3297 prev_nexthdr
= ip6h
->ip6_nxt
;
3298 ptr
= (uint8_t *)&ip6h
[1];
3300 if (prev_nexthdr
== IPPROTO_HOPOPTS
) {
3304 hbh_hdr
= (ip6_hbh_t
*)ptr
;
3305 hdr_len
= 8 * (hbh_hdr
->ip6h_len
+ 1);
3306 prev_nexthdr
= hbh_hdr
->ip6h_nxt
;
3307 prev_nexthdr_offset
= (uint8_t *)&hbh_hdr
->ip6h_nxt
3311 if (prev_nexthdr
== IPPROTO_DSTOPTS
) {
3312 ip6_dest_t
*dest_hdr
;
3315 dest_hdr
= (ip6_dest_t
*)ptr
;
3316 hdr_len
= 8 * (dest_hdr
->ip6d_len
+ 1);
3317 prev_nexthdr
= dest_hdr
->ip6d_nxt
;
3318 prev_nexthdr_offset
= (uint8_t *)&dest_hdr
->ip6d_nxt
3322 if (prev_nexthdr
== IPPROTO_ROUTING
) {
3326 rthdr
= (ip6_rthdr_t
*)ptr
;
3327 prev_nexthdr
= rthdr
->ip6r_nxt
;
3328 prev_nexthdr_offset
= (uint8_t *)&rthdr
->ip6r_nxt
3330 hdr_len
= 8 * (rthdr
->ip6r_len
+ 1);
3333 if (prev_nexthdr
!= IPPROTO_FRAGMENT
) {
3334 /* Can't handle other headers before the fragment header */
3335 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInHdrErrors
);
3336 ip_drop_input("ipIfStatsInHdrErrors", mp
, ill
);
3342 * Note: Fragment offset in header is in 8-octet units.
3343 * Clearing least significant 3 bits not only extracts
3344 * it but also gets it in units of octets.
3346 offset
= ntohs(fraghdr
->ip6f_offlg
) & ~7;
3347 more_frags
= (fraghdr
->ip6f_offlg
& IP6F_MORE_FRAG
);
3350 * Is the more frags flag on and the payload length not a multiple
3353 if (more_frags
&& (ntohs(ip6h
->ip6_plen
) & 7)) {
3354 ip_drop_input("ICMP_PARAM_PROBLEM", mp
, ill
);
3355 icmp_param_problem_v6(mp
, ICMP6_PARAMPROB_HEADER
,
3356 (uint32_t)((char *)&ip6h
->ip6_plen
-
3357 (char *)ip6h
), B_FALSE
, ira
);
3361 v6src_ptr
= &ip6h
->ip6_src
;
3362 v6dst_ptr
= &ip6h
->ip6_dst
;
3365 hdr_length
= (uint_t
)((char *)&fraghdr
[1] - (char *)ip6h
);
3369 * Would fragment cause reassembled packet to have a payload length
3370 * greater than IP_MAXPACKET - the max payload size?
3372 if (end
> IP_MAXPACKET
) {
3373 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInHdrErrors
);
3374 ip_drop_input("Reassembled packet too large", mp
, ill
);
3375 icmp_param_problem_v6(mp
, ICMP6_PARAMPROB_HEADER
,
3376 (uint32_t)((char *)&fraghdr
->ip6f_offlg
-
3377 (char *)ip6h
), B_FALSE
, ira
);
3382 * This packet just has one fragment. Reassembly not
3385 if (!more_frags
&& offset
== 0) {
3390 * Drop the fragmented as early as possible, if
3391 * we don't have resource(s) to re-assemble.
3393 if (ipst
->ips_ip_reass_queue_bytes
== 0) {
3398 /* Record the ECN field info. */
3399 ecn_info
= (uint8_t)(ntohl(ip6h
->ip6_vcf
& htonl(~0xFFCFFFFF)) >> 20);
3401 * If this is not the first fragment, dump the unfragmentable
3402 * portion of the packet.
3405 mp
->b_rptr
= (uchar_t
*)&fraghdr
[1];
3408 * Fragmentation reassembly. Each ILL has a hash table for
3409 * queueing packets undergoing reassembly for all IPIFs
3410 * associated with the ILL. The hash is based on the packet
3411 * IP ident field. The ILL frag hash table was allocated
3412 * as a timer block at the time the ILL was created. Whenever
3413 * there is anything on the reassembly queue, the timer will
3416 /* Handle vnic loopback of fragments */
3417 if (mp
->b_datap
->db_ref
> 2)
3420 msg_len
= MBLKSIZE(mp
);
3423 while (tail_mp
->b_cont
!= NULL
) {
3424 tail_mp
= tail_mp
->b_cont
;
3425 if (tail_mp
->b_datap
->db_ref
<= 2)
3426 msg_len
+= MBLKSIZE(tail_mp
);
3429 * If the reassembly list for this ILL will get too big
3433 if ((msg_len
+ sizeof (*ipf
) + ill
->ill_frag_count
) >=
3434 ipst
->ips_ip_reass_queue_bytes
) {
3435 DTRACE_PROBE3(ip_reass_queue_bytes
, uint_t
, msg_len
,
3436 uint_t
, ill
->ill_frag_count
,
3437 uint_t
, ipst
->ips_ip_reass_queue_bytes
);
3439 (ipst
->ips_ip_reass_queue_bytes
< msg_len
) ? 0 :
3440 (ipst
->ips_ip_reass_queue_bytes
- msg_len
));
3444 ipfb
= &ill
->ill_frag_hash_tbl
[ILL_FRAG_HASH_V6(*v6src_ptr
, ident
)];
3445 mutex_enter(&ipfb
->ipfb_lock
);
3447 ipfp
= &ipfb
->ipfb_ipf
;
3448 /* Try to find an existing fragment queue for this packet. */
3453 * It has to match on ident, source address, and
3456 if (ipf
->ipf_ident
== ident
&&
3457 IN6_ARE_ADDR_EQUAL(&ipf
->ipf_v6src
, v6src_ptr
) &&
3458 IN6_ARE_ADDR_EQUAL(&ipf
->ipf_v6dst
, v6dst_ptr
)) {
3461 * If we have received too many
3462 * duplicate fragments for this packet
3465 if (ipf
->ipf_num_dups
> ip_max_frag_dups
) {
3466 ill_frag_free_pkts(ill
, ipfb
, ipf
, 1);
3468 mutex_exit(&ipfb
->ipfb_lock
);
3474 ipfp
= &ipf
->ipf_hash_next
;
3480 * If we pruned the list, do we want to store this new
3481 * fragment?. We apply an optimization here based on the
3482 * fact that most fragments will be received in order.
3483 * So if the offset of this incoming fragment is zero,
3484 * it is the first fragment of a new packet. We will
3485 * keep it. Otherwise drop the fragment, as we have
3486 * probably pruned the packet already (since the
3487 * packet cannot be found).
3490 if (pruned
&& offset
!= 0) {
3491 mutex_exit(&ipfb
->ipfb_lock
);
3496 /* New guy. Allocate a frag message. */
3497 mp1
= allocb(sizeof (*ipf
), BPRI_MED
);
3499 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
3500 ip_drop_input("ipIfStatsInDiscards", mp
, ill
);
3503 mutex_exit(&ipfb
->ipfb_lock
);
3507 if (ipfb
->ipfb_frag_pkts
>= MAX_FRAG_PKTS(ipst
)) {
3509 * Too many fragmented packets in this hash bucket.
3512 ill_frag_free_pkts(ill
, ipfb
, ipfb
->ipfb_ipf
, 1);
3517 /* Initialize the fragment header. */
3518 ipf
= (ipf_t
*)mp1
->b_rptr
;
3520 ipf
->ipf_ptphn
= ipfp
;
3522 ipf
->ipf_hash_next
= NULL
;
3523 ipf
->ipf_ident
= ident
;
3524 ipf
->ipf_v6src
= *v6src_ptr
;
3525 ipf
->ipf_v6dst
= *v6dst_ptr
;
3526 /* Record reassembly start time. */
3527 ipf
->ipf_timestamp
= gethrestime_sec();
3528 /* Record ipf generation and account for frag header */
3529 ipf
->ipf_gen
= ill
->ill_ipf_gen
++;
3530 ipf
->ipf_count
= MBLKSIZE(mp1
);
3531 ipf
->ipf_protocol
= nexthdr
;
3532 ipf
->ipf_nf_hdr_len
= 0;
3533 ipf
->ipf_prev_nexthdr_offset
= 0;
3534 ipf
->ipf_last_frag_seen
= B_FALSE
;
3535 ipf
->ipf_ecn
= ecn_info
;
3536 ipf
->ipf_num_dups
= 0;
3537 ipfb
->ipfb_frag_pkts
++;
3538 ipf
->ipf_checksum
= 0;
3539 ipf
->ipf_checksum_flags
= 0;
3541 /* Store checksum value in fragment header */
3542 if (sum_flags
!= 0) {
3543 sum_val
= (sum_val
& 0xFFFF) + (sum_val
>> 16);
3544 sum_val
= (sum_val
& 0xFFFF) + (sum_val
>> 16);
3545 ipf
->ipf_checksum
= sum_val
;
3546 ipf
->ipf_checksum_flags
= sum_flags
;
3550 * We handle reassembly two ways. In the easy case,
3551 * where all the fragments show up in order, we do
3552 * minimal bookkeeping, and just clip new pieces on
3553 * the end. If we ever see a hole, then we go off
3554 * to ip_reassemble which has to mark the pieces and
3555 * keep track of the number of holes, etc. Obviously,
3556 * the point of having both mechanisms is so we can
3557 * handle the easy case as efficiently as possible.
3560 /* Easy case, in-order reassembly so far. */
3561 /* Update the byte count */
3562 ipf
->ipf_count
+= msg_len
;
3563 ipf
->ipf_tail_mp
= tail_mp
;
3565 * Keep track of next expected offset in
3569 ipf
->ipf_nf_hdr_len
= hdr_length
;
3570 ipf
->ipf_prev_nexthdr_offset
= prev_nexthdr_offset
;
3572 /* Hard case, hole at the beginning. */
3573 ipf
->ipf_tail_mp
= NULL
;
3575 * ipf_end == 0 means that we have given up
3576 * on easy reassembly.
3580 /* Forget checksum offload from now on */
3581 ipf
->ipf_checksum_flags
= 0;
3584 * ipf_hole_cnt is set by ip_reassemble.
3585 * ipf_count is updated by ip_reassemble.
3586 * No need to check for return value here
3587 * as we don't expect reassembly to complete or
3588 * fail for the first fragment itself.
3590 (void) ip_reassemble(mp
, ipf
, offset
, more_frags
, ill
,
3593 /* Update per ipfb and ill byte counts */
3594 ipfb
->ipfb_count
+= ipf
->ipf_count
;
3595 ASSERT(ipfb
->ipfb_count
> 0); /* Wraparound */
3596 atomic_add_32(&ill
->ill_frag_count
, ipf
->ipf_count
);
3597 /* If the frag timer wasn't already going, start it. */
3598 mutex_enter(&ill
->ill_lock
);
3599 ill_frag_timer_start(ill
);
3600 mutex_exit(&ill
->ill_lock
);
3601 goto partial_reass_done
;
3605 * If the packet's flag has changed (it could be coming up
3606 * from an interface different than the previous, therefore
3607 * possibly different checksum capability), then forget about
3608 * any stored checksum states. Otherwise add the value to
3609 * the existing one stored in the fragment header.
3611 if (sum_flags
!= 0 && sum_flags
== ipf
->ipf_checksum_flags
) {
3612 sum_val
+= ipf
->ipf_checksum
;
3613 sum_val
= (sum_val
& 0xFFFF) + (sum_val
>> 16);
3614 sum_val
= (sum_val
& 0xFFFF) + (sum_val
>> 16);
3615 ipf
->ipf_checksum
= sum_val
;
3616 } else if (ipf
->ipf_checksum_flags
!= 0) {
3617 /* Forget checksum offload from now on */
3618 ipf
->ipf_checksum_flags
= 0;
3622 * We have a new piece of a datagram which is already being
3623 * reassembled. Update the ECN info if all IP fragments
3624 * are ECN capable. If there is one which is not, clear
3625 * all the info. If there is at least one which has CE
3626 * code point, IP needs to report that up to transport.
3628 if (ecn_info
!= IPH_ECN_NECT
&& ipf
->ipf_ecn
!= IPH_ECN_NECT
) {
3629 if (ecn_info
== IPH_ECN_CE
)
3630 ipf
->ipf_ecn
= IPH_ECN_CE
;
3632 ipf
->ipf_ecn
= IPH_ECN_NECT
;
3635 if (offset
&& ipf
->ipf_end
== offset
) {
3636 /* The new fragment fits at the end */
3637 ipf
->ipf_tail_mp
->b_cont
= mp
;
3638 /* Update the byte count */
3639 ipf
->ipf_count
+= msg_len
;
3640 /* Update per ipfb and ill byte counts */
3641 ipfb
->ipfb_count
+= msg_len
;
3642 ASSERT(ipfb
->ipfb_count
> 0); /* Wraparound */
3643 atomic_add_32(&ill
->ill_frag_count
, msg_len
);
3647 ipf
->ipf_tail_mp
= tail_mp
;
3648 goto partial_reass_done
;
3652 * Go do the hard cases.
3653 * Call ip_reassemble().
3658 if (ipf
->ipf_prev_nexthdr_offset
== 0) {
3659 ipf
->ipf_nf_hdr_len
= hdr_length
;
3660 ipf
->ipf_prev_nexthdr_offset
=
3661 prev_nexthdr_offset
;
3664 /* Save current byte count */
3665 count
= ipf
->ipf_count
;
3666 ret
= ip_reassemble(mp
, ipf
, offset
, more_frags
, ill
, msg_len
);
3668 /* Count of bytes added and subtracted (freeb()ed) */
3669 count
= ipf
->ipf_count
- count
;
3671 /* Update per ipfb and ill byte counts */
3672 ipfb
->ipfb_count
+= count
;
3673 ASSERT(ipfb
->ipfb_count
> 0); /* Wraparound */
3674 atomic_add_32(&ill
->ill_frag_count
, count
);
3676 if (ret
== IP_REASS_PARTIAL
) {
3677 goto partial_reass_done
;
3678 } else if (ret
== IP_REASS_FAILED
) {
3679 /* Reassembly failed. Free up all resources */
3680 ill_frag_free_pkts(ill
, ipfb
, ipf
, 1);
3681 for (t_mp
= mp
; t_mp
!= NULL
; t_mp
= t_mp
->b_cont
) {
3682 IP_REASS_SET_START(t_mp
, 0);
3683 IP_REASS_SET_END(t_mp
, 0);
3686 goto partial_reass_done
;
3689 /* We will reach here iff 'ret' is IP_REASS_COMPLETE */
3692 * We have completed reassembly. Unhook the frag header from
3693 * the reassembly list.
3695 * Grab the unfragmentable header length next header value out
3696 * of the first fragment
3698 ASSERT(ipf
->ipf_nf_hdr_len
!= 0);
3699 hdr_length
= ipf
->ipf_nf_hdr_len
;
3702 * Before we free the frag header, record the ECN info
3703 * to report back to the transport.
3705 ecn_info
= ipf
->ipf_ecn
;
3708 * Store the nextheader field in the header preceding the fragment
3711 nexthdr
= ipf
->ipf_protocol
;
3712 prev_nexthdr_offset
= ipf
->ipf_prev_nexthdr_offset
;
3713 ipfp
= ipf
->ipf_ptphn
;
3715 /* We need to supply these to caller */
3716 if ((sum_flags
= ipf
->ipf_checksum_flags
) != 0)
3717 sum_val
= ipf
->ipf_checksum
;
3722 count
= ipf
->ipf_count
;
3723 ipf
= ipf
->ipf_hash_next
;
3725 ipf
->ipf_ptphn
= ipfp
;
3727 atomic_add_32(&ill
->ill_frag_count
, -count
);
3728 ASSERT(ipfb
->ipfb_count
>= count
);
3729 ipfb
->ipfb_count
-= count
;
3730 ipfb
->ipfb_frag_pkts
--;
3731 mutex_exit(&ipfb
->ipfb_lock
);
3732 /* Ditch the frag header. */
3737 * Make sure the packet is good by doing some sanity
3738 * check. If bad we can silentely drop the packet.
3741 if (hdr_length
< sizeof (ip6_frag_t
)) {
3742 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInHdrErrors
);
3743 ip_drop_input("ipIfStatsInHdrErrors", mp
, ill
);
3744 ip1dbg(("ip_input_fragment_v6: bad packet\n"));
3750 * Remove the fragment header from the initial header by
3751 * splitting the mblk into the non-fragmentable header and
3752 * everthing after the fragment extension header. This has the
3753 * side effect of putting all the headers that need destination
3754 * processing into the b_cont block-- on return this fact is
3755 * used in order to avoid having to look at the extensions
3756 * already processed.
3758 * Note that this code assumes that the unfragmentable portion
3759 * of the header is in the first mblk and increments
3760 * the read pointer past it. If this assumption is broken
3761 * this code fails badly.
3763 if (mp
->b_rptr
+ hdr_length
!= mp
->b_wptr
) {
3766 if (!(nmp
= dupb(mp
))) {
3767 ip1dbg(("ip_input_fragment_v6: dupb failed\n"));
3768 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
3769 ip_drop_input("ipIfStatsInDiscards", mp
, ill
);
3773 nmp
->b_cont
= mp
->b_cont
;
3775 nmp
->b_rptr
+= hdr_length
;
3777 mp
->b_wptr
= mp
->b_rptr
+ hdr_length
- sizeof (ip6_frag_t
);
3779 ip6h
= (ip6_t
*)mp
->b_rptr
;
3780 ((char *)ip6h
)[prev_nexthdr_offset
] = nexthdr
;
3782 /* Restore original IP length in header. */
3783 packet_size
= msgdsize(mp
);
3784 ip6h
->ip6_plen
= htons((uint16_t)(packet_size
- IPV6_HDR_LEN
));
3785 /* Record the ECN info. */
3786 ip6h
->ip6_vcf
&= htonl(0xFFCFFFFF);
3787 ip6h
->ip6_vcf
|= htonl(ecn_info
<< 20);
3789 /* Update the receive attributes */
3790 ira
->ira_pktlen
= packet_size
;
3791 ira
->ira_ip_hdr_length
= hdr_length
- sizeof (ip6_frag_t
);
3792 ira
->ira_protocol
= nexthdr
;
3794 /* Reassembly is successful; set checksum information in packet */
3795 DB_CKSUM16(mp
) = (uint16_t)sum_val
;
3796 DB_CKSUMFLAGS(mp
) = sum_flags
;
3797 DB_CKSUMSTART(mp
) = ira
->ira_ip_hdr_length
;
3803 * Given an mblk and a ptr, find the destination address in an IPv6 routing
3807 pluck_out_dst(const mblk_t
*mp
, uint8_t *whereptr
, in6_addr_t oldrv
)
3810 int segleft
, numaddr
;
3811 in6_addr_t
*ap
, rv
= oldrv
;
3813 rt0
= (ip6_rthdr0_t
*)whereptr
;
3814 if (rt0
->ip6r0_type
!= 0 && rt0
->ip6r0_type
!= 2) {
3815 DTRACE_PROBE2(pluck_out_dst_unknown_type
, mblk_t
*, mp
,
3816 uint8_t *, whereptr
);
3819 segleft
= rt0
->ip6r0_segleft
;
3820 numaddr
= rt0
->ip6r0_len
/ 2;
3822 if ((rt0
->ip6r0_len
& 0x1) ||
3823 (mp
!= NULL
&& whereptr
+ (rt0
->ip6r0_len
+ 1) * 8 > mp
->b_wptr
) ||
3824 (segleft
> rt0
->ip6r0_len
/ 2)) {
3826 * Corrupt packet. Either the routing header length is odd
3827 * (can't happen) or mismatched compared to the packet, or the
3828 * number of addresses is. Return what we can. This will
3829 * only be a problem on forwarded packets that get squeezed
3830 * through an outbound tunnel enforcing IPsec Tunnel Mode.
3832 DTRACE_PROBE2(pluck_out_dst_badpkt
, mblk_t
*, mp
, uint8_t *,
3838 ap
= (in6_addr_t
*)((char *)rt0
+ sizeof (*rt0
));
3839 rv
= ap
[numaddr
- 1];
3846 * Walk through the options to see if there is a routing header.
3847 * If present get the destination which is the last address of
3849 * mp needs to be provided in cases when the extension headers might span
3850 * b_cont; mp is never modified by this function.
3853 ip_get_dst_v6(ip6_t
*ip6h
, const mblk_t
*mp
, boolean_t
*is_fragment
)
3855 const mblk_t
*current_mp
= mp
;
3861 whereptr
= (uint8_t *)ip6h
;
3862 ehdrlen
= sizeof (ip6_t
);
3864 /* We assume at least the IPv6 base header is within one mblk. */
3865 ASSERT(mp
== NULL
||
3866 (mp
->b_rptr
<= whereptr
&& mp
->b_wptr
>= whereptr
+ ehdrlen
));
3869 nexthdr
= ip6h
->ip6_nxt
;
3870 if (is_fragment
!= NULL
)
3871 *is_fragment
= B_FALSE
;
3874 * We also assume (thanks to ipsec_tun_outbound()'s pullup) that
3875 * no extension headers will be split across mblks.
3878 while (nexthdr
== IPPROTO_HOPOPTS
|| nexthdr
== IPPROTO_DSTOPTS
||
3879 nexthdr
== IPPROTO_ROUTING
) {
3880 if (nexthdr
== IPPROTO_ROUTING
)
3881 rv
= pluck_out_dst(current_mp
, whereptr
, rv
);
3884 * All IPv6 extension headers have the next-header in byte
3885 * 0, and the (length - 8) in 8-byte-words.
3887 while (current_mp
!= NULL
&&
3888 whereptr
+ ehdrlen
>= current_mp
->b_wptr
) {
3889 ehdrlen
-= (current_mp
->b_wptr
- whereptr
);
3890 current_mp
= current_mp
->b_cont
;
3891 if (current_mp
== NULL
) {
3892 /* Bad packet. Return what we can. */
3893 DTRACE_PROBE3(ip_get_dst_v6_badpkt
, mblk_t
*,
3894 mp
, mblk_t
*, current_mp
, ip6_t
*, ip6h
);
3897 whereptr
= current_mp
->b_rptr
;
3899 whereptr
+= ehdrlen
;
3901 nexthdr
= *whereptr
;
3902 ASSERT(current_mp
== NULL
|| whereptr
+ 1 < current_mp
->b_wptr
);
3903 ehdrlen
= (*(whereptr
+ 1) + 1) * 8;
3907 if (nexthdr
== IPPROTO_FRAGMENT
&& is_fragment
!= NULL
)
3908 *is_fragment
= B_TRUE
;
3913 * ip_source_routed_v6:
3914 * This function is called by redirect code (called from ip_input_v6) to
3915 * know whether this packet is source routed through this node i.e
3916 * whether this node (router) is part of the journey. This
3917 * function is called under two cases :
3919 * case 1 : Routing header was processed by this node and
3920 * ip_process_rthdr replaced ip6_dst with the next hop
3921 * and we are forwarding the packet to the next hop.
3923 * case 2 : Routing header was not processed by this node and we
3924 * are just forwarding the packet.
3926 * For case (1) we don't want to send redirects. For case(2) we
3927 * want to send redirects.
3930 ip_source_routed_v6(ip6_t
*ip6h
, mblk_t
*mp
, ip_stack_t
*ipst
)
3933 in6_addr_t
*addrptr
;
3934 ip6_rthdr0_t
*rthdr
;
3940 ip2dbg(("ip_source_routed_v6\n"));
3941 nexthdr
= ip6h
->ip6_nxt
;
3942 ehdrlen
= IPV6_HDR_LEN
;
3944 /* if a routing hdr is preceeded by HOPOPT or DSTOPT */
3945 while (nexthdr
== IPPROTO_HOPOPTS
||
3946 nexthdr
== IPPROTO_DSTOPTS
) {
3947 byteptr
= (uint8_t *)ip6h
+ ehdrlen
;
3949 * Check if we have already processed
3950 * packets or we are just a forwarding
3951 * router which only pulled up msgs up
3952 * to IPV6HDR and one HBH ext header
3954 if (byteptr
+ MIN_EHDR_LEN
> mp
->b_wptr
) {
3955 ip2dbg(("ip_source_routed_v6: Extension"
3956 " headers not processed\n"));
3959 hbhhdr
= (ip6_hbh_t
*)byteptr
;
3960 nexthdr
= hbhhdr
->ip6h_nxt
;
3961 ehdrlen
= ehdrlen
+ 8 * (hbhhdr
->ip6h_len
+ 1);
3964 case IPPROTO_ROUTING
:
3965 byteptr
= (uint8_t *)ip6h
+ ehdrlen
;
3967 * If for some reason, we haven't pulled up
3968 * the routing hdr data mblk, then we must
3969 * not have processed it at all. So for sure
3970 * we are not part of the source routed journey.
3972 if (byteptr
+ MIN_EHDR_LEN
> mp
->b_wptr
) {
3973 ip2dbg(("ip_source_routed_v6: Routing"
3974 " header not processed\n"));
3977 rthdr
= (ip6_rthdr0_t
*)byteptr
;
3979 * Either we are an intermediate router or the
3980 * last hop before destination and we have
3981 * already processed the routing header.
3982 * If segment_left is greater than or equal to zero,
3983 * then we must be the (numaddr - segleft) entry
3984 * of the routing header. Although ip6r0_segleft
3985 * is a unit8_t variable, we still check for zero
3986 * or greater value, if in case the data type
3987 * is changed someday in future.
3989 if (rthdr
->ip6r0_segleft
> 0 ||
3990 rthdr
->ip6r0_segleft
== 0) {
3991 numaddr
= rthdr
->ip6r0_len
/ 2;
3992 addrptr
= (in6_addr_t
*)((char *)rthdr
+
3994 addrptr
+= (numaddr
- (rthdr
->ip6r0_segleft
+ 1));
3995 if (addrptr
!= NULL
) {
3996 if (ip_type_v6(addrptr
, ipst
) == IRE_LOCAL
)
3998 ip1dbg(("ip_source_routed_v6: Not local\n"));
4003 ip2dbg(("ip_source_routed_v6: Not source routed here\n"));
4009 * IPv6 fragmentation. Essentially the same as IPv4 fragmentation.
4010 * We have not optimized this in terms of number of mblks
4011 * allocated. For instance, for each fragment sent we always allocate a
4012 * mblk to hold the IPv6 header and fragment header.
4014 * Assumes that all the extension headers are contained in the first mblk
4015 * and that the fragment header has has already been added by calling
4016 * ip_fraghdr_add_v6.
4019 ip_fragment_v6(mblk_t
*mp
, nce_t
*nce
, iaflags_t ixaflags
, uint_t pkt_len
,
4020 uint32_t max_frag
, uint32_t xmit_hint
, zoneid_t szone
, zoneid_t nolzid
,
4021 pfirepostfrag_t postfragfn
, uintptr_t *ixa_cookie
)
4023 ip6_t
*ip6h
= (ip6_t
*)mp
->b_rptr
;
4028 ip6_frag_t
*fraghdr
;
4029 size_t unfragmentable_len
;
4033 uint16_t offset
= 0;
4034 ill_t
*ill
= nce
->nce_ill
;
4037 ip_stack_t
*ipst
= ill
->ill_ipst
;
4038 uint_t priority
= mp
->b_band
;
4041 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutFragReqds
);
4042 if (max_frag
== 0) {
4043 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutFragFails
);
4044 ip_drop_output("FragFails: zero max_frag", mp
, ill
);
4050 * Caller should have added fraghdr_t to pkt_len, and also
4053 ASSERT(ntohs(ip6h
->ip6_plen
) + IPV6_HDR_LEN
== pkt_len
);
4054 ASSERT(msgdsize(mp
) == pkt_len
);
4057 * Determine the length of the unfragmentable portion of this
4058 * datagram. This consists of the IPv6 header, a potential
4059 * hop-by-hop options header, a potential pre-routing-header
4060 * destination options header, and a potential routing header.
4062 nexthdr
= ip6h
->ip6_nxt
;
4063 ptr
= (uint8_t *)&ip6h
[1];
4065 if (nexthdr
== IPPROTO_HOPOPTS
) {
4069 hbh_hdr
= (ip6_hbh_t
*)ptr
;
4070 hdr_len
= 8 * (hbh_hdr
->ip6h_len
+ 1);
4071 nexthdr
= hbh_hdr
->ip6h_nxt
;
4074 if (nexthdr
== IPPROTO_DSTOPTS
) {
4075 ip6_dest_t
*dest_hdr
;
4078 dest_hdr
= (ip6_dest_t
*)ptr
;
4079 if (dest_hdr
->ip6d_nxt
== IPPROTO_ROUTING
) {
4080 hdr_len
= 8 * (dest_hdr
->ip6d_len
+ 1);
4081 nexthdr
= dest_hdr
->ip6d_nxt
;
4085 if (nexthdr
== IPPROTO_ROUTING
) {
4089 rthdr
= (ip6_rthdr_t
*)ptr
;
4090 nexthdr
= rthdr
->ip6r_nxt
;
4091 hdr_len
= 8 * (rthdr
->ip6r_len
+ 1);
4094 if (nexthdr
!= IPPROTO_FRAGMENT
) {
4095 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutFragFails
);
4096 ip_drop_output("FragFails: bad nexthdr", mp
, ill
);
4100 unfragmentable_len
= (uint_t
)(ptr
- (uint8_t *)ip6h
);
4101 unfragmentable_len
+= sizeof (ip6_frag_t
);
4103 max_chunk
= (max_frag
- unfragmentable_len
) & ~7;
4106 * Allocate an mblk with enough room for the link-layer
4107 * header and the unfragmentable part of the datagram, which includes
4108 * the fragment header. This (or a copy) will be used as the
4109 * first mblk for each fragment we send.
4111 hmp
= allocb_tmpl(unfragmentable_len
+ ipst
->ips_ip_wroff_extra
, mp
);
4113 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutFragFails
);
4114 ip_drop_output("FragFails: no hmp", mp
, ill
);
4118 hmp
->b_rptr
+= ipst
->ips_ip_wroff_extra
;
4119 hmp
->b_wptr
= hmp
->b_rptr
+ unfragmentable_len
;
4121 fip6h
= (ip6_t
*)hmp
->b_rptr
;
4122 bcopy(ip6h
, fip6h
, unfragmentable_len
);
4125 * pkt_len is set to the total length of the fragmentable data in this
4126 * datagram. For each fragment sent, we will decrement pkt_len
4127 * by the amount of fragmentable data sent in that fragment
4128 * until len reaches zero.
4130 pkt_len
-= unfragmentable_len
;
4133 * Move read ptr past unfragmentable portion, we don't want this part
4134 * of the data in our fragments.
4136 mp
->b_rptr
+= unfragmentable_len
;
4137 if (mp
->b_rptr
== mp
->b_wptr
) {
4138 mblk_t
*mp1
= mp
->b_cont
;
4143 while (pkt_len
!= 0) {
4144 mlen
= MIN(pkt_len
, max_chunk
);
4150 BUMP_MIB(ill
->ill_ip_mib
,
4151 ipIfStatsOutFragFails
);
4152 ip_drop_output("FragFails: copyb failed",
4156 ip1dbg(("ip_fragment_v6: copyb failed\n"));
4159 off_flags
= IP6F_MORE_FRAG
;
4166 fip6h
= (ip6_t
*)(hmp0
->b_rptr
);
4167 fraghdr
= (ip6_frag_t
*)(hmp0
->b_rptr
+ unfragmentable_len
-
4168 sizeof (ip6_frag_t
));
4170 fip6h
->ip6_plen
= htons((uint16_t)(mlen
+
4171 unfragmentable_len
- IPV6_HDR_LEN
));
4173 * Note: Optimization alert.
4174 * In IPv6 (and IPv4) protocol header, Fragment Offset
4175 * ("offset") is 13 bits wide and in 8-octet units.
4176 * In IPv6 protocol header (unlike IPv4) in a 16 bit field,
4177 * it occupies the most significant 13 bits.
4178 * (least significant 13 bits in IPv4).
4179 * We do not do any shifts here. Not shifting is same effect
4180 * as taking offset value in octet units, dividing by 8 and
4181 * then shifting 3 bits left to line it up in place in proper
4182 * place protocol header.
4184 fraghdr
->ip6f_offlg
= htons(offset
) | off_flags
;
4186 if (!(dmp
= ip_carve_mp(&mp
, mlen
))) {
4187 /* mp has already been freed by ip_carve_mp() */
4188 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutFragFails
);
4189 ip_drop_output("FragFails: could not carve mp",
4194 ip1dbg(("ip_carve_mp: failed\n"));
4198 /* Get the priority marking, if any */
4199 hmp0
->b_band
= priority
;
4201 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutFragCreates
);
4203 error
= postfragfn(hmp0
, nce
, ixaflags
,
4204 mlen
+ unfragmentable_len
, xmit_hint
, szone
, nolzid
,
4206 if (error
!= 0 && error
!= EWOULDBLOCK
&& hmp
!= NULL
) {
4207 /* No point in sending the other fragments */
4208 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutFragFails
);
4209 ip_drop_output("FragFails: postfragfn failed",
4215 /* No need to redo state machine in loop */
4216 ixaflags
&= ~IXAF_REACH_CONF
;
4220 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutFragOKs
);
4225 * Add a fragment header to an IPv6 packet.
4226 * Assumes that all the extension headers are contained in the first mblk.
4228 * The fragment header is inserted after an hop-by-hop options header
4229 * and after [an optional destinations header followed by] a routing header.
4232 ip_fraghdr_add_v6(mblk_t
*mp
, uint32_t ident
, ip_xmit_attr_t
*ixa
)
4234 ip6_t
*ip6h
= (ip6_t
*)mp
->b_rptr
;
4237 ip6_frag_t
*fraghdr
;
4238 size_t unfragmentable_len
;
4240 uint_t prev_nexthdr_offset
;
4242 uint_t priority
= mp
->b_band
;
4243 ip_stack_t
*ipst
= ixa
->ixa_ipst
;
4246 * Determine the length of the unfragmentable portion of this
4247 * datagram. This consists of the IPv6 header, a potential
4248 * hop-by-hop options header, a potential pre-routing-header
4249 * destination options header, and a potential routing header.
4251 nexthdr
= ip6h
->ip6_nxt
;
4252 prev_nexthdr_offset
= (uint8_t *)&ip6h
->ip6_nxt
- (uint8_t *)ip6h
;
4253 ptr
= (uint8_t *)&ip6h
[1];
4255 if (nexthdr
== IPPROTO_HOPOPTS
) {
4259 hbh_hdr
= (ip6_hbh_t
*)ptr
;
4260 hdr_len
= 8 * (hbh_hdr
->ip6h_len
+ 1);
4261 nexthdr
= hbh_hdr
->ip6h_nxt
;
4262 prev_nexthdr_offset
= (uint8_t *)&hbh_hdr
->ip6h_nxt
4266 if (nexthdr
== IPPROTO_DSTOPTS
) {
4267 ip6_dest_t
*dest_hdr
;
4270 dest_hdr
= (ip6_dest_t
*)ptr
;
4271 if (dest_hdr
->ip6d_nxt
== IPPROTO_ROUTING
) {
4272 hdr_len
= 8 * (dest_hdr
->ip6d_len
+ 1);
4273 nexthdr
= dest_hdr
->ip6d_nxt
;
4274 prev_nexthdr_offset
= (uint8_t *)&dest_hdr
->ip6d_nxt
4279 if (nexthdr
== IPPROTO_ROUTING
) {
4283 rthdr
= (ip6_rthdr_t
*)ptr
;
4284 nexthdr
= rthdr
->ip6r_nxt
;
4285 prev_nexthdr_offset
= (uint8_t *)&rthdr
->ip6r_nxt
4287 hdr_len
= 8 * (rthdr
->ip6r_len
+ 1);
4290 unfragmentable_len
= (uint_t
)(ptr
- (uint8_t *)ip6h
);
4293 * Allocate an mblk with enough room for the link-layer
4294 * header, the unfragmentable part of the datagram, and the
4297 hmp
= allocb_tmpl(unfragmentable_len
+ sizeof (ip6_frag_t
) +
4298 ipst
->ips_ip_wroff_extra
, mp
);
4300 ill_t
*ill
= ixa
->ixa_nce
->nce_ill
;
4302 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutDiscards
);
4303 ip_drop_output("ipIfStatsOutDiscards: allocb failure", mp
, ill
);
4307 hmp
->b_rptr
+= ipst
->ips_ip_wroff_extra
;
4308 hmp
->b_wptr
= hmp
->b_rptr
+ unfragmentable_len
+ sizeof (ip6_frag_t
);
4310 fip6h
= (ip6_t
*)hmp
->b_rptr
;
4311 fraghdr
= (ip6_frag_t
*)(hmp
->b_rptr
+ unfragmentable_len
);
4313 bcopy(ip6h
, fip6h
, unfragmentable_len
);
4314 fip6h
->ip6_plen
= htons(ntohs(fip6h
->ip6_plen
) + sizeof (ip6_frag_t
));
4315 hmp
->b_rptr
[prev_nexthdr_offset
] = IPPROTO_FRAGMENT
;
4317 fraghdr
->ip6f_nxt
= nexthdr
;
4318 fraghdr
->ip6f_reserved
= 0;
4319 fraghdr
->ip6f_offlg
= 0;
4320 fraghdr
->ip6f_ident
= htonl(ident
);
4322 /* Get the priority marking, if any */
4323 hmp
->b_band
= priority
;
4326 * Move read ptr past unfragmentable portion, we don't want this part
4327 * of the data in our fragments.
4329 mp
->b_rptr
+= unfragmentable_len
;
4335 * Determine if the ill and multicast aspects of that packets
4336 * "matches" the conn.
4339 conn_wantpacket_v6(conn_t
*connp
, ip_recv_attr_t
*ira
, ip6_t
*ip6h
)
4341 ill_t
*ill
= ira
->ira_rill
;
4342 zoneid_t zoneid
= ira
->ira_zoneid
;
4344 in6_addr_t
*v6dst_ptr
= &ip6h
->ip6_dst
;
4345 in6_addr_t
*v6src_ptr
= &ip6h
->ip6_src
;
4348 * conn_incoming_ifindex is set by IPV6_BOUND_IF and as link-local
4349 * scopeid. This is used to limit
4350 * unicast and multicast reception to conn_incoming_ifindex.
4351 * conn_wantpacket_v6 is called both for unicast and
4352 * multicast packets.
4354 in_ifindex
= connp
->conn_incoming_ifindex
;
4356 /* mpathd can bind to the under IPMP interface, which we allow */
4357 if (in_ifindex
!= 0 && in_ifindex
!= ill
->ill_phyint
->phyint_ifindex
) {
4358 if (!IS_UNDER_IPMP(ill
))
4361 if (in_ifindex
!= ipmp_ill_get_ipmp_ifindex(ill
))
4365 if (!IPCL_ZONE_MATCH(connp
, zoneid
))
4368 if (!(ira
->ira_flags
& IRAF_MULTICAST
))
4371 if (connp
->conn_multi_router
)
4374 if (ira
->ira_protocol
== IPPROTO_RSVP
)
4377 return (conn_hasmembers_ill_withsrc_v6(connp
, v6dst_ptr
, v6src_ptr
,
4382 * pr_addr_dbg function provides the needed buffer space to call
4383 * inet_ntop() function's 3rd argument. This function should be
4384 * used by any kernel routine which wants to save INET6_ADDRSTRLEN
4385 * stack buffer space in it's own stack frame. This function uses
4386 * a buffer from it's own stack and prints the information.
4387 * Example: pr_addr_dbg("func: no route for %s\n ", AF_INET, addr)
4389 * Note: This function can call inet_ntop() once.
4392 pr_addr_dbg(char *fmt1
, int af
, const void *addr
)
4394 char buf
[INET6_ADDRSTRLEN
];
4397 ip0dbg(("pr_addr_dbg: Wrong arguments\n"));
4402 * This does not compare debug level and just prints
4403 * out. Thus it is the responsibility of the caller
4404 * to check the appropriate debug-level before calling
4408 printf(fmt1
, inet_ntop(af
, addr
, buf
, sizeof (buf
)));
4416 * Return the length in bytes of the IPv6 headers (base header
4417 * extension headers) that will be needed based on the
4418 * ip_pkt_t structure passed by the caller.
4420 * The returned length does not include the length of the upper level
4421 * protocol (ULP) header.
4424 ip_total_hdrs_len_v6(const ip_pkt_t
*ipp
)
4430 if (ipp
->ipp_fields
& IPPF_HOPOPTS
) {
4431 ASSERT(ipp
->ipp_hopoptslen
!= 0);
4432 len
+= ipp
->ipp_hopoptslen
;
4436 * En-route destination options
4437 * Only do them if there's a routing header as well
4439 if ((ipp
->ipp_fields
& (IPPF_RTHDRDSTOPTS
|IPPF_RTHDR
)) ==
4440 (IPPF_RTHDRDSTOPTS
|IPPF_RTHDR
)) {
4441 ASSERT(ipp
->ipp_rthdrdstoptslen
!= 0);
4442 len
+= ipp
->ipp_rthdrdstoptslen
;
4444 if (ipp
->ipp_fields
& IPPF_RTHDR
) {
4445 ASSERT(ipp
->ipp_rthdrlen
!= 0);
4446 len
+= ipp
->ipp_rthdrlen
;
4448 if (ipp
->ipp_fields
& IPPF_DSTOPTS
) {
4449 ASSERT(ipp
->ipp_dstoptslen
!= 0);
4450 len
+= ipp
->ipp_dstoptslen
;
4456 * All-purpose routine to build a header chain of an IPv6 header
4457 * followed by any required extension headers and a proto header.
4459 * The caller has to set the source and destination address as well as
4460 * ip6_plen. The caller has to massage any routing header and compensate
4461 * for the ULP pseudo-header checksum due to the source route.
4463 * The extension headers will all be fully filled in.
4466 ip_build_hdrs_v6(uchar_t
*buf
, uint_t buf_len
, const ip_pkt_t
*ipp
,
4467 uint8_t protocol
, uint32_t flowinfo
)
4469 uint8_t *nxthdr_ptr
;
4471 ip6_t
*ip6h
= (ip6_t
*)buf
;
4473 /* Initialize IPv6 header */
4475 (IPV6_DEFAULT_VERS_AND_FLOW
& IPV6_VERS_AND_FLOW_MASK
) |
4476 (flowinfo
& ~IPV6_VERS_AND_FLOW_MASK
);
4478 if (ipp
->ipp_fields
& IPPF_TCLASS
) {
4479 /* Overrides the class part of flowinfo */
4480 ip6h
->ip6_vcf
= IPV6_TCLASS_FLOW(ip6h
->ip6_vcf
,
4484 if (ipp
->ipp_fields
& IPPF_HOPLIMIT
)
4485 ip6h
->ip6_hops
= ipp
->ipp_hoplimit
;
4487 ip6h
->ip6_hops
= ipp
->ipp_unicast_hops
;
4489 if ((ipp
->ipp_fields
& IPPF_ADDR
) &&
4490 !IN6_IS_ADDR_V4MAPPED(&ipp
->ipp_addr
))
4491 ip6h
->ip6_src
= ipp
->ipp_addr
;
4493 nxthdr_ptr
= (uint8_t *)&ip6h
->ip6_nxt
;
4494 cp
= (uint8_t *)&ip6h
[1];
4496 * Here's where we have to start stringing together
4497 * any extension headers in the right order:
4498 * Hop-by-hop, destination, routing, and final destination opts.
4500 if (ipp
->ipp_fields
& IPPF_HOPOPTS
) {
4501 /* Hop-by-hop options */
4502 ip6_hbh_t
*hbh
= (ip6_hbh_t
*)cp
;
4504 *nxthdr_ptr
= IPPROTO_HOPOPTS
;
4505 nxthdr_ptr
= &hbh
->ip6h_nxt
;
4507 bcopy(ipp
->ipp_hopopts
, cp
, ipp
->ipp_hopoptslen
);
4508 cp
+= ipp
->ipp_hopoptslen
;
4511 * En-route destination options
4512 * Only do them if there's a routing header as well
4514 if ((ipp
->ipp_fields
& (IPPF_RTHDRDSTOPTS
|IPPF_RTHDR
)) ==
4515 (IPPF_RTHDRDSTOPTS
|IPPF_RTHDR
)) {
4516 ip6_dest_t
*dst
= (ip6_dest_t
*)cp
;
4518 *nxthdr_ptr
= IPPROTO_DSTOPTS
;
4519 nxthdr_ptr
= &dst
->ip6d_nxt
;
4521 bcopy(ipp
->ipp_rthdrdstopts
, cp
, ipp
->ipp_rthdrdstoptslen
);
4522 cp
+= ipp
->ipp_rthdrdstoptslen
;
4525 * Routing header next
4527 if (ipp
->ipp_fields
& IPPF_RTHDR
) {
4528 ip6_rthdr_t
*rt
= (ip6_rthdr_t
*)cp
;
4530 *nxthdr_ptr
= IPPROTO_ROUTING
;
4531 nxthdr_ptr
= &rt
->ip6r_nxt
;
4533 bcopy(ipp
->ipp_rthdr
, cp
, ipp
->ipp_rthdrlen
);
4534 cp
+= ipp
->ipp_rthdrlen
;
4537 * Do ultimate destination options
4539 if (ipp
->ipp_fields
& IPPF_DSTOPTS
) {
4540 ip6_dest_t
*dest
= (ip6_dest_t
*)cp
;
4542 *nxthdr_ptr
= IPPROTO_DSTOPTS
;
4543 nxthdr_ptr
= &dest
->ip6d_nxt
;
4545 bcopy(ipp
->ipp_dstopts
, cp
, ipp
->ipp_dstoptslen
);
4546 cp
+= ipp
->ipp_dstoptslen
;
4549 * Now set the last header pointer to the proto passed in
4551 *nxthdr_ptr
= protocol
;
4552 ASSERT((int)(cp
- buf
) == buf_len
);
4556 * Return a pointer to the routing header extension header
4557 * in the IPv6 header(s) chain passed in.
4558 * If none found, return NULL
4559 * Assumes that all extension headers are in same mblk as the v6 header
4562 ip_find_rthdr_v6(ip6_t
*ip6h
, uint8_t *endptr
)
4564 ip6_dest_t
*desthdr
;
4565 ip6_frag_t
*fraghdr
;
4568 uint8_t *ptr
= (uint8_t *)&ip6h
[1];
4570 if (ip6h
->ip6_nxt
== IPPROTO_ROUTING
)
4571 return ((ip6_rthdr_t
*)ptr
);
4574 * The routing header will precede all extension headers
4575 * other than the hop-by-hop and destination options
4576 * extension headers, so if we see anything other than those,
4577 * we're done and didn't find it.
4578 * We could see a destination options header alone but no
4579 * routing header, in which case we'll return NULL as soon as
4580 * we see anything after that.
4581 * Hop-by-hop and destination option headers are identical,
4582 * so we can use either one we want as a template.
4584 nexthdr
= ip6h
->ip6_nxt
;
4585 while (ptr
< endptr
) {
4586 /* Is there enough left for len + nexthdr? */
4587 if (ptr
+ MIN_EHDR_LEN
> endptr
)
4591 case IPPROTO_HOPOPTS
:
4592 case IPPROTO_DSTOPTS
:
4593 /* Assumes the headers are identical for hbh and dst */
4594 desthdr
= (ip6_dest_t
*)ptr
;
4595 hdrlen
= 8 * (desthdr
->ip6d_len
+ 1);
4596 nexthdr
= desthdr
->ip6d_nxt
;
4599 case IPPROTO_ROUTING
:
4600 return ((ip6_rthdr_t
*)ptr
);
4602 case IPPROTO_FRAGMENT
:
4603 fraghdr
= (ip6_frag_t
*)ptr
;
4604 hdrlen
= sizeof (ip6_frag_t
);
4605 nexthdr
= fraghdr
->ip6f_nxt
;
4617 * Called for source-routed packets originating on this node.
4618 * Manipulates the original routing header by moving every entry up
4619 * one slot, placing the first entry in the v6 header's v6_dst field,
4620 * and placing the ultimate destination in the routing header's last
4623 * Returns the checksum diference between the ultimate destination
4624 * (last hop in the routing header when the packet is sent) and
4625 * the first hop (ip6_dst when the packet is sent)
4629 ip_massage_options_v6(ip6_t
*ip6h
, ip6_rthdr_t
*rth
, netstack_t
*ns
)
4633 in6_addr_t
*addrptr
;
4635 ip6_rthdr0_t
*rthdr
= (ip6_rthdr0_t
*)rth
;
4637 uint32_t addrsum
= 0;
4641 * Perform any processing needed for source routing.
4642 * We know that all extension headers will be in the same mblk
4643 * as the IPv6 header.
4647 * If no segments left in header, or the header length field is zero,
4648 * don't move hop addresses around;
4649 * Checksum difference is zero.
4651 if ((rthdr
->ip6r0_segleft
== 0) || (rthdr
->ip6r0_len
== 0))
4654 ptr
= (uint16_t *)&ip6h
->ip6_dst
;
4656 for (i
= 0; i
< (sizeof (in6_addr_t
) / sizeof (uint16_t)); i
++) {
4659 cksm
= (cksm
& 0xFFFF) + (cksm
>> 16);
4662 * Here's where the fun begins - we have to
4663 * move all addresses up one spot, take the
4664 * first hop and make it our first ip6_dst,
4665 * and place the ultimate destination in the
4666 * newly-opened last slot.
4668 addrptr
= (in6_addr_t
*)((char *)rthdr
+ sizeof (*rthdr
));
4669 numaddr
= rthdr
->ip6r0_len
/ 2;
4671 for (i
= 0; i
< (numaddr
- 1); addrptr
++, i
++) {
4672 *addrptr
= addrptr
[1];
4674 *addrptr
= ip6h
->ip6_dst
;
4675 ip6h
->ip6_dst
= tmp
;
4678 * From the checksummed ultimate destination subtract the checksummed
4679 * current ip6_dst (the first hop address). Return that number.
4680 * (In the v4 case, the second part of this is done in each routine
4681 * that calls ip_massage_options(). We do it all in this one place
4684 ptr
= (uint16_t *)&ip6h
->ip6_dst
;
4685 for (i
= 0; i
< (sizeof (in6_addr_t
) / sizeof (uint16_t)); i
++) {
4688 cksm
-= ((addrsum
>> 16) + (addrsum
& 0xFFFF));
4691 cksm
= (cksm
& 0xFFFF) + (cksm
>> 16);
4697 *ip6_kstat_init(netstackid_t stackid
, ip6_stat_t
*ip6_statisticsp
)
4701 ip6_stat_t
template = {
4702 { "ip6_udp_fannorm", KSTAT_DATA_UINT64
},
4703 { "ip6_udp_fanmb", KSTAT_DATA_UINT64
},
4704 { "ip6_recv_pullup", KSTAT_DATA_UINT64
},
4705 { "ip6_db_ref", KSTAT_DATA_UINT64
},
4706 { "ip6_notaligned", KSTAT_DATA_UINT64
},
4707 { "ip6_multimblk", KSTAT_DATA_UINT64
},
4708 { "ipsec_proto_ahesp", KSTAT_DATA_UINT64
},
4709 { "ip6_out_sw_cksum", KSTAT_DATA_UINT64
},
4710 { "ip6_out_sw_cksum_bytes", KSTAT_DATA_UINT64
},
4711 { "ip6_in_sw_cksum", KSTAT_DATA_UINT64
},
4712 { "ip6_tcp_in_full_hw_cksum_err", KSTAT_DATA_UINT64
},
4713 { "ip6_tcp_in_part_hw_cksum_err", KSTAT_DATA_UINT64
},
4714 { "ip6_tcp_in_sw_cksum_err", KSTAT_DATA_UINT64
},
4715 { "ip6_udp_in_full_hw_cksum_err", KSTAT_DATA_UINT64
},
4716 { "ip6_udp_in_part_hw_cksum_err", KSTAT_DATA_UINT64
},
4717 { "ip6_udp_in_sw_cksum_err", KSTAT_DATA_UINT64
},
4719 ksp
= kstat_create_netstack("ip", 0, "ip6stat", "net",
4720 KSTAT_TYPE_NAMED
, sizeof (template) / sizeof (kstat_named_t
),
4721 KSTAT_FLAG_VIRTUAL
, stackid
);
4726 bcopy(&template, ip6_statisticsp
, sizeof (template));
4727 ksp
->ks_data
= (void *)ip6_statisticsp
;
4728 ksp
->ks_private
= (void *)(uintptr_t)stackid
;
4735 ip6_kstat_fini(netstackid_t stackid
, kstat_t
*ksp
)
4738 ASSERT(stackid
== (netstackid_t
)(uintptr_t)ksp
->ks_private
);
4739 kstat_delete_netstack(ksp
, stackid
);
4744 * The following two functions set and get the value for the
4745 * IPV6_SRC_PREFERENCES socket option.
4748 ip6_set_src_preferences(ip_xmit_attr_t
*ixa
, uint32_t prefs
)
4751 * We only support preferences that are covered by
4752 * IPV6_PREFER_SRC_MASK.
4754 if (prefs
& ~IPV6_PREFER_SRC_MASK
)
4758 * Look for conflicting preferences or default preferences. If
4759 * both bits of a related pair are clear, the application wants the
4760 * system's default value for that pair. Both bits in a pair can't
4763 if ((prefs
& IPV6_PREFER_SRC_MIPMASK
) == 0) {
4764 prefs
|= IPV6_PREFER_SRC_MIPDEFAULT
;
4765 } else if ((prefs
& IPV6_PREFER_SRC_MIPMASK
) ==
4766 IPV6_PREFER_SRC_MIPMASK
) {
4769 if ((prefs
& IPV6_PREFER_SRC_TMPMASK
) == 0) {
4770 prefs
|= IPV6_PREFER_SRC_TMPDEFAULT
;
4771 } else if ((prefs
& IPV6_PREFER_SRC_TMPMASK
) ==
4772 IPV6_PREFER_SRC_TMPMASK
) {
4775 if ((prefs
& IPV6_PREFER_SRC_CGAMASK
) == 0) {
4776 prefs
|= IPV6_PREFER_SRC_CGADEFAULT
;
4777 } else if ((prefs
& IPV6_PREFER_SRC_CGAMASK
) ==
4778 IPV6_PREFER_SRC_CGAMASK
) {
4782 ixa
->ixa_src_preferences
= prefs
;
4787 ip6_get_src_preferences(ip_xmit_attr_t
*ixa
, uint32_t *val
)
4789 *val
= ixa
->ixa_src_preferences
;
4790 return (sizeof (ixa
->ixa_src_preferences
));
4794 * Get the size of the IP options (including the IP headers size)
4795 * without including the AH header's size. If till_ah is B_FALSE,
4796 * and if AH header is present, dest options beyond AH header will
4797 * also be included in the returned size.
4800 ipsec_ah_get_hdr_size_v6(mblk_t
*mp
, boolean_t till_ah
)
4812 ip6h
= (ip6_t
*)mp
->b_rptr
;
4813 size
= IPV6_HDR_LEN
;
4814 nexthdr
= ip6h
->ip6_nxt
;
4815 whereptr
= (uint8_t *)&ip6h
[1];
4817 /* Assume IP has already stripped it */
4818 ASSERT(nexthdr
!= IPPROTO_FRAGMENT
);
4820 case IPPROTO_HOPOPTS
:
4821 hbhhdr
= (ip6_hbh_t
*)whereptr
;
4822 nexthdr
= hbhhdr
->ip6h_nxt
;
4823 ehdrlen
= 8 * (hbhhdr
->ip6h_len
+ 1);
4825 case IPPROTO_DSTOPTS
:
4826 dsthdr
= (ip6_dest_t
*)whereptr
;
4827 nexthdr
= dsthdr
->ip6d_nxt
;
4828 ehdrlen
= 8 * (dsthdr
->ip6d_len
+ 1);
4830 case IPPROTO_ROUTING
:
4831 rthdr
= (ip6_rthdr_t
*)whereptr
;
4832 nexthdr
= rthdr
->ip6r_nxt
;
4833 ehdrlen
= 8 * (rthdr
->ip6r_len
+ 1);
4837 ASSERT(nexthdr
== IPPROTO_AH
);
4841 * If we don't have a AH header to traverse,
4842 * return now. This happens normally for
4843 * outbound datagrams where we have not inserted
4846 if (nexthdr
!= IPPROTO_AH
) {
4851 * We don't include the AH header's size
4852 * to be symmetrical with other cases where
4853 * we either don't have a AH header (outbound)
4854 * or peek into the AH header yet (inbound and
4855 * not pulled up yet).
4857 ah
= (ah_t
*)whereptr
;
4858 nexthdr
= ah
->ah_nexthdr
;
4859 ehdrlen
= (ah
->ah_length
<< 2) + 8;
4861 if (nexthdr
== IPPROTO_DSTOPTS
) {
4862 if (whereptr
+ ehdrlen
>= mp
->b_wptr
) {
4864 * The destination options header
4865 * is not part of the first mblk.
4867 whereptr
= mp
->b_cont
->b_rptr
;
4869 whereptr
+= ehdrlen
;
4872 dsthdr
= (ip6_dest_t
*)whereptr
;
4873 ehdrlen
= 8 * (dsthdr
->ip6d_len
+ 1);
4878 whereptr
+= ehdrlen
;
4884 * Utility routine that checks if `v6srcp' is a valid address on underlying
4885 * interface `ill'. If `ipifp' is non-NULL, it's set to a held ipif
4886 * associated with `v6srcp' on success. NOTE: if this is not called from
4887 * inside the IPSQ (ill_g_lock is not held), `ill' may be removed from the
4888 * group during or after this lookup.
4891 ipif_lookup_testaddr_v6(ill_t
*ill
, const in6_addr_t
*v6srcp
, ipif_t
**ipifp
)
4896 ipif
= ipif_lookup_addr_exact_v6(v6srcp
, ill
, ill
->ill_ipst
);
4906 pr_addr_dbg("ipif_lookup_testaddr_v6: cannot find ipif for "
4907 "src %s\n", AF_INET6
, v6srcp
);