4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved.
27 /* Copyright (c) 1990 Mentat Inc. */
29 #include <sys/types.h>
30 #include <sys/stream.h>
31 #include <sys/strsubr.h>
33 #include <sys/strsun.h>
36 #include <sys/sunddi.h>
37 #include <sys/cmn_err.h>
38 #include <sys/debug.h>
39 #include <sys/atomic.h>
41 #include <sys/systm.h>
42 #include <sys/param.h>
45 #include <sys/socket.h>
48 #include <net/if_arp.h>
49 #include <net/route.h>
50 #include <sys/sockio.h>
51 #include <netinet/in.h>
52 #include <net/if_dl.h>
54 #include <inet/common.h>
56 #include <inet/mib2.h>
59 #include <inet/snmpcom.h>
60 #include <inet/kstatcom.h>
62 #include <netinet/igmp_var.h>
63 #include <netinet/ip6.h>
64 #include <netinet/icmp6.h>
65 #include <netinet/sctp.h>
68 #include <inet/ip_impl.h>
70 #include <inet/ip6_asp.h>
72 #include <inet/ip_multi.h>
73 #include <inet/ip_if.h>
74 #include <inet/ip_ire.h>
75 #include <inet/ip_ftable.h>
76 #include <inet/ip_rts.h>
77 #include <inet/optcom.h>
78 #include <inet/ip_ndp.h>
79 #include <inet/ip_listutils.h>
80 #include <netinet/igmp.h>
81 #include <netinet/ip_mroute.h>
82 #include <inet/ipp_common.h>
84 #include <net/pfkeyv2.h>
85 #include <inet/sadb.h>
86 #include <inet/ipsec_impl.h>
87 #include <inet/ipdrop.h>
88 #include <inet/ip_netinfo.h>
90 #include <sys/pattr.h>
91 #include <inet/ipclassifier.h>
92 #include <inet/sctp_ip.h>
93 #include <inet/sctp/sctp_impl.h>
94 #include <inet/udp_impl.h>
95 #include <sys/sunddi.h>
98 extern boolean_t skip_sctp_cksum
;
102 ip_output_simple_v6(mblk_t
*mp
, ip_xmit_attr_t
*ixa
)
105 in6_addr_t firsthop
; /* In IP header */
106 in6_addr_t dst
; /* End of source route, or ip6_dst if none */
113 iaflags_t ixaflags
= ixa
->ixa_flags
;
114 ip_stack_t
*ipst
= ixa
->ixa_ipst
;
116 boolean_t repeat
= B_FALSE
;
120 ip6h
= (ip6_t
*)mp
->b_rptr
;
121 ASSERT(IPH_HDR_VERSION(ip6h
) == IPV6_VERSION
);
123 ASSERT(ixa
->ixa_nce
== NULL
);
125 ixa
->ixa_pktlen
= ntohs(ip6h
->ip6_plen
) + IPV6_HDR_LEN
;
126 ASSERT(ixa
->ixa_pktlen
== msgdsize(mp
));
127 if (!ip_hdr_length_nexthdr_v6(mp
, ip6h
, &ixa
->ixa_ip_hdr_length
,
129 /* Malformed packet */
130 BUMP_MIB(&ipst
->ips_ip_mib
, ipIfStatsHCOutRequests
);
131 BUMP_MIB(&ipst
->ips_ip_mib
, ipIfStatsOutDiscards
);
132 ip_drop_output("ipIfStatsOutDiscards", mp
, NULL
);
136 ixa
->ixa_protocol
= *nexthdrp
;
139 * Assumes that source routed packets have already been massaged by
140 * the ULP (ip_massage_options_v6) and as a result ip6_dst is the next
141 * hop in the source route. The final destination is used for IPsec
142 * policy and DCE lookup.
144 firsthop
= ip6h
->ip6_dst
;
145 dst
= ip_get_dst_v6(ip6h
, mp
, NULL
);
149 setsrc
= ipv6_all_zeros
;
150 ire
= ip_select_route_v6(&firsthop
, ip6h
->ip6_src
, ixa
, NULL
, &setsrc
,
152 ASSERT(ire
!= NULL
); /* IRE_NOROUTE if none found */
154 BUMP_MIB(&ipst
->ips_ip_mib
, ipIfStatsHCOutRequests
);
155 BUMP_MIB(&ipst
->ips_ip_mib
, ipIfStatsOutDiscards
);
156 ip_drop_output("ipIfStatsOutDiscards", mp
, NULL
);
161 if (ire
->ire_flags
& (RTF_BLACKHOLE
|RTF_REJECT
)) {
162 /* ire_ill might be NULL hence need to skip some code */
163 if (ixaflags
& IXAF_SET_SOURCE
)
164 ip6h
->ip6_src
= ipv6_loopback
;
165 ixa
->ixa_fragsize
= IP_MAXPACKET
;
166 ire
->ire_ob_pkt_count
++;
167 BUMP_MIB(&ipst
->ips_ip_mib
, ipIfStatsHCOutRequests
);
168 /* No dce yet; use default one */
169 error
= (ire
->ire_sendfn
)(ire
, mp
, ip6h
, ixa
,
170 &ipst
->ips_dce_default
->dce_ident
);
174 /* Note that ip6_dst is only used for IRE_MULTICAST */
175 nce
= ire_to_nce(ire
, INADDR_ANY
, &ip6h
->ip6_dst
);
177 /* Allocation failure? */
178 ip_drop_output("ire_to_nce", mp
, ill
);
183 if (nce
->nce_is_condemned
) {
186 nce1
= ire_handle_condemned_nce(nce
, ire
, NULL
, ip6h
, B_TRUE
);
190 /* Try finding a better IRE */
195 /* Tried twice - drop packet */
196 BUMP_MIB(&ipst
->ips_ip_mib
, ipIfStatsOutDiscards
);
197 ip_drop_output("No nce", mp
, ill
);
205 ixa
->ixa_postfragfn
= ire
->ire_postfragfn
;
207 ASSERT(ixa
->ixa_nce
== NULL
);
211 * Check for a dce_t with a path mtu.
214 if (IN6_IS_ADDR_LINKSCOPE(&dst
))
215 ifindex
= nce
->nce_common
->ncec_ill
->ill_phyint
->phyint_ifindex
;
217 dce
= dce_lookup_v6(&dst
, ifindex
, ipst
, NULL
);
220 if (!(ixaflags
& IXAF_PMTU_DISCOVERY
)) {
221 ixa
->ixa_fragsize
= IPV6_MIN_MTU
;
222 } else if (dce
->dce_flags
& DCEF_PMTU
) {
224 * To avoid a periodic timer to increase the path MTU we
225 * look at dce_last_change_time each time we send a packet.
227 now
= ddi_get_lbolt64();
228 if (TICK_TO_SEC(now
) - dce
->dce_last_change_time
>
229 ipst
->ips_ip_pathmtu_interval
) {
231 * Older than 20 minutes. Drop the path MTU information.
233 mutex_enter(&dce
->dce_lock
);
234 dce
->dce_flags
&= ~DCEF_PMTU
;
235 dce
->dce_last_change_time
= TICK_TO_SEC(now
);
236 mutex_exit(&dce
->dce_lock
);
237 dce_increment_generation(dce
);
238 ixa
->ixa_fragsize
= ip_get_base_mtu(nce
->nce_ill
, ire
);
242 fragsize
= ip_get_base_mtu(nce
->nce_ill
, ire
);
243 if (fragsize
> dce
->dce_pmtu
)
244 fragsize
= dce
->dce_pmtu
;
245 ixa
->ixa_fragsize
= fragsize
;
248 ixa
->ixa_fragsize
= ip_get_base_mtu(nce
->nce_ill
, ire
);
252 * We use use ire_nexthop_ill (and not ncec_ill) to avoid the under ipmp
253 * interface for source address selection.
255 ill
= ire_nexthop_ill(ire
);
257 if (ixaflags
& IXAF_SET_SOURCE
) {
261 * We use the final destination to get
262 * correct selection for source routed packets
265 /* If unreachable we have no ill but need some source */
270 error
= ip_select_source_v6(ill
, &setsrc
, &dst
,
271 ixa
->ixa_zoneid
, ipst
, B_FALSE
,
272 ixa
->ixa_src_preferences
, &src
, NULL
, NULL
);
275 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsHCOutRequests
);
276 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutDiscards
);
277 ip_drop_output("ipIfStatsOutDiscards - no source",
283 } else if (ixaflags
& IXAF_VERIFY_SOURCE
) {
284 /* Check if the IP source is assigned to the host. */
285 if (!ip_verify_src(mp
, ixa
, NULL
)) {
286 /* Don't send a packet with a source that isn't ours */
287 BUMP_MIB(&ipst
->ips_ip_mib
, ipIfStatsHCOutRequests
);
288 BUMP_MIB(&ipst
->ips_ip_mib
, ipIfStatsOutDiscards
);
289 ip_drop_output("ipIfStatsOutDiscards - invalid source",
292 error
= EADDRNOTAVAIL
;
298 * Check against global IPsec policy to set the AH/ESP attributes.
299 * IPsec will set IXAF_IPSEC_* and ixa_ipsec_* as appropriate.
301 if (!(ixaflags
& (IXAF_NO_IPSEC
|IXAF_IPSEC_SECURE
))) {
302 ASSERT(ixa
->ixa_ipsec_policy
== NULL
);
303 mp
= ip_output_attach_policy(mp
, NULL
, ip6h
, NULL
, ixa
);
305 /* MIB and ip_drop_packet already done */
306 return (EHOSTUNREACH
); /* IPsec policy failure */
311 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsHCOutRequests
);
313 BUMP_MIB(&ipst
->ips_ip_mib
, ipIfStatsHCOutRequests
);
317 * We update the statistics on the most specific IRE i.e., the first
319 * We don't have an IRE when we fragment, hence ire_ob_pkt_count
320 * can only count the use prior to fragmentation. However the MIB
321 * counters on the ill will be incremented in post fragmentation.
323 ire
->ire_ob_pkt_count
++;
326 * Based on ire_type and ire_flags call one of:
327 * ire_send_local_v6 - for IRE_LOCAL and IRE_LOOPBACK
328 * ire_send_noroute_v6 - if RTF_REJECT or RTF_BLACHOLE
329 * ire_send_multicast_v6 - for IRE_MULTICAST
330 * ire_send_wire_v6 - for the rest.
332 error
= (ire
->ire_sendfn
)(ire
, mp
, ip6h
, ixa
, &dce
->dce_ident
);
339 if (ixa
->ixa_nce
!= NULL
)
340 nce_refrele(ixa
->ixa_nce
);
346 * ire_sendfn() functions.
347 * These functions use the following xmit_attr:
348 * - ixa_fragsize - read to determine whether or not to fragment
349 * - IXAF_IPSEC_SECURE - to determine whether or not to invoke IPsec
350 * - ixa_ipsec_* are used inside IPsec
351 * - IXAF_LOOPBACK_COPY - for multicast
356 * ire_sendfn for IRE_LOCAL and IRE_LOOPBACK
358 * The checks for restrict_interzone_loopback are done in ire_route_recursive.
362 ire_send_local_v6(ire_t
*ire
, mblk_t
*mp
, void *iph_arg
,
363 ip_xmit_attr_t
*ixa
, uint32_t *identp
)
365 ip6_t
*ip6h
= (ip6_t
*)iph_arg
;
366 ip_stack_t
*ipst
= ixa
->ixa_ipst
;
367 ill_t
*ill
= ire
->ire_ill
;
368 ip_recv_attr_t iras
; /* NOTE: No bzero for performance */
369 uint_t pktlen
= ixa
->ixa_pktlen
;
372 * No fragmentation, no nce, and no application of IPsec.
375 * Note different order between IP provider and FW_HOOKS than in
380 * DTrace this as ip:::send. A packet blocked by FW_HOOKS will fire the
381 * send probe, but not the receive probe.
383 DTRACE_IP7(send
, mblk_t
*, mp
, conn_t
*, NULL
, void_ip_t
*,
384 ip6h
, __dtrace_ipsr_ill_t
*, ill
, ipha_t
*, NULL
, ip6_t
*, ip6h
,
387 DTRACE_PROBE4(ip6__loopback__out__start
,
388 ill_t
*, NULL
, ill_t
*, ill
,
389 ip6_t
*, ip6h
, mblk_t
*, mp
);
391 if (HOOKS6_INTERESTED_LOOPBACK_OUT(ipst
)) {
394 FW_HOOKS(ipst
->ips_ip6_loopback_out_event
,
395 ipst
->ips_ipv6firewall_loopback_out
,
396 NULL
, ill
, ip6h
, mp
, mp
, 0, ipst
, error
);
398 DTRACE_PROBE1(ip6__loopback__out__end
, mblk_t
*, mp
);
403 * Even if the destination was changed by the filter we use the
404 * forwarding decision that was made based on the address
405 * in ip_output/ip_set_destination.
407 /* Length could be different */
408 ip6h
= (ip6_t
*)mp
->b_rptr
;
409 pktlen
= ntohs(ip6h
->ip6_plen
) + IPV6_HDR_LEN
;
413 * If a callback is enabled then we need to know the
414 * source and destination zoneids for the packet. We already
417 if (ipst
->ips_ip6_observe
.he_interested
) {
418 zoneid_t szone
, dzone
;
419 zoneid_t stackzoneid
;
421 stackzoneid
= netstackid_to_zoneid(
422 ipst
->ips_netstack
->netstack_stackid
);
424 if (stackzoneid
== GLOBAL_ZONEID
) {
426 dzone
= ire
->ire_zoneid
;
427 szone
= ixa
->ixa_zoneid
;
429 szone
= dzone
= stackzoneid
;
431 ipobs_hook(mp
, IPOBS_HOOK_LOCAL
, szone
, dzone
, ill
, ipst
);
434 /* Handle lo0 stats */
435 ipst
->ips_loopback_packets
++;
438 * Update output mib stats. Note that we can't move into the icmp
439 * sender (icmp_output etc) since they don't know the ill and the
442 if (ixa
->ixa_protocol
== IPPROTO_ICMPV6
) {
445 icmp6
= (icmp6_t
*)((uchar_t
*)ip6h
+ ixa
->ixa_ip_hdr_length
);
446 icmp_update_out_mib_v6(ill
, icmp6
);
449 DTRACE_PROBE4(ip6__loopback__in__start
,
450 ill_t
*, ill
, ill_t
*, NULL
,
451 ip6_t
*, ip6h
, mblk_t
*, mp
);
453 if (HOOKS6_INTERESTED_LOOPBACK_IN(ipst
)) {
456 FW_HOOKS(ipst
->ips_ip6_loopback_in_event
,
457 ipst
->ips_ipv6firewall_loopback_in
,
458 ill
, NULL
, ip6h
, mp
, mp
, 0, ipst
, error
);
460 DTRACE_PROBE1(ip6__loopback__in__end
, mblk_t
*, mp
);
465 * Even if the destination was changed by the filter we use the
466 * forwarding decision that was made based on the address
467 * in ip_output/ip_set_destination.
469 /* Length could be different */
470 ip6h
= (ip6_t
*)mp
->b_rptr
;
471 pktlen
= ntohs(ip6h
->ip6_plen
) + IPV6_HDR_LEN
;
474 DTRACE_IP7(receive
, mblk_t
*, mp
, conn_t
*, NULL
, void_ip_t
*,
475 ip6h
, __dtrace_ipsr_ill_t
*, ill
, ipha_t
*, NULL
, ip6_t
*, ip6h
,
478 /* Map ixa to ira including IPsec policies */
479 ipsec_out_to_in(ixa
, ill
, &iras
);
480 iras
.ira_pktlen
= pktlen
;
482 ire
->ire_ib_pkt_count
++;
483 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsHCInReceives
);
484 UPDATE_MIB(ill
->ill_ip_mib
, ipIfStatsHCInOctets
, pktlen
);
486 /* Destined to ire_zoneid - use that for fanout */
487 iras
.ira_zoneid
= ire
->ire_zoneid
;
489 ip_fanout_v6(mp
, ip6h
, &iras
);
491 /* We moved any IPsec refs from ixa to iras */
492 ira_cleanup(&iras
, B_FALSE
);
497 * ire_sendfn for IRE_MULTICAST
499 * Note that we do path MTU discovery by default for IPv6 multicast. But
500 * since unconnected UDP and RAW sockets don't set IXAF_PMTU_DISCOVERY
501 * only connected sockets get this by default.
504 ire_send_multicast_v6(ire_t
*ire
, mblk_t
*mp
, void *iph_arg
,
505 ip_xmit_attr_t
*ixa
, uint32_t *identp
)
507 ip6_t
*ip6h
= (ip6_t
*)iph_arg
;
508 ip_stack_t
*ipst
= ixa
->ixa_ipst
;
509 ill_t
*ill
= ire
->ire_ill
;
510 iaflags_t ixaflags
= ixa
->ixa_flags
;
513 * Check if anything in ip_input_v6 wants a copy of the transmitted
514 * packet (after IPsec and fragmentation)
516 * 1. Multicast routers always need a copy unless SO_DONTROUTE is set
517 * RSVP and the rsvp daemon is an example of a
518 * protocol and user level process that
519 * handles it's own routing. Hence, it uses the
520 * SO_DONTROUTE option to accomplish this.
521 * 2. If the sender has set IP_MULTICAST_LOOP, then we just
522 * check whether there are any receivers for the group on the ill
523 * (ignoring the zoneid).
524 * 3. If IP_MULTICAST_LOOP is not set, then we check if there are
525 * any members in other shared-IP zones.
526 * If such members exist, then we indicate that the sending zone
527 * shouldn't get a loopback copy to preserve the IP_MULTICAST_LOOP
530 * When we loopback we skip hardware checksum to make sure loopback
531 * copy is checksumed.
533 * Note that ire_ill is the upper in the case of IPMP.
535 ixa
->ixa_flags
&= ~(IXAF_LOOPBACK_COPY
| IXAF_NO_HW_CKSUM
);
536 if (ipst
->ips_ip_g_mrouter
&& ill
->ill_mrouter_cnt
> 0 &&
537 !(ixaflags
& IXAF_DONTROUTE
)) {
538 ixa
->ixa_flags
|= IXAF_LOOPBACK_COPY
| IXAF_NO_HW_CKSUM
;
539 } else if (ixaflags
& IXAF_MULTICAST_LOOP
) {
541 * If this zone or any other zone has members then loopback
544 if (ill_hasmembers_v6(ill
, &ip6h
->ip6_dst
))
545 ixa
->ixa_flags
|= IXAF_LOOPBACK_COPY
| IXAF_NO_HW_CKSUM
;
546 } else if (ipst
->ips_netstack
->netstack_numzones
> 1) {
548 * This zone should not have a copy. But there are some other
549 * zones which might have members.
551 if (ill_hasmembers_otherzones_v6(ill
, &ip6h
->ip6_dst
,
553 ixa
->ixa_flags
|= IXAF_NO_LOOP_ZONEID_SET
;
554 ixa
->ixa_no_loop_zoneid
= ixa
->ixa_zoneid
;
555 ixa
->ixa_flags
|= IXAF_LOOPBACK_COPY
| IXAF_NO_HW_CKSUM
;
560 * Unless IPV6_HOPLIMIT already set a ttl, force the ttl to the
561 * IP_MULTICAST_TTL value
563 if (!(ixaflags
& IXAF_NO_TTL_CHANGE
)) {
564 ip6h
->ip6_hops
= ixa
->ixa_multicast_ttl
;
567 return (ire_send_wire_v6(ire
, mp
, ip6h
, ixa
, identp
));
571 * ire_sendfn for IREs with RTF_REJECT/RTF_BLACKHOLE, including IRE_NOROUTE
575 ire_send_noroute_v6(ire_t
*ire
, mblk_t
*mp
, void *iph_arg
,
576 ip_xmit_attr_t
*ixa
, uint32_t *identp
)
578 ip6_t
*ip6h
= (ip6_t
*)iph_arg
;
579 ip_stack_t
*ipst
= ixa
->ixa_ipst
;
584 BUMP_MIB(&ipst
->ips_ip_mib
, ipIfStatsOutNoRoutes
);
586 if (ire
->ire_type
& IRE_NOROUTE
) {
587 /* A lack of a route as opposed to RTF_REJECT|BLACKHOLE */
588 ip_rts_change_v6(RTM_MISS
, &ip6h
->ip6_dst
, 0, 0, 0, 0, 0, 0,
592 if (ire
->ire_flags
& RTF_BLACKHOLE
) {
593 ip_drop_output("ipIfStatsOutNoRoutes RTF_BLACKHOLE", mp
, NULL
);
595 /* No error even for local senders - silent blackhole */
598 ip_drop_output("ipIfStatsOutNoRoutes RTF_REJECT", mp
, NULL
);
601 * We need an ill_t for the ip_recv_attr_t even though this packet
602 * was never received and icmp_unreachable doesn't currently use
605 ill
= ill_lookup_on_name("lo0", B_FALSE
,
606 !(ixa
->ixa_flags
& IRAF_IS_IPV4
), &dummy
, ipst
);
609 return (EHOSTUNREACH
);
612 bzero(&iras
, sizeof (iras
));
613 /* Map ixa to ira including IPsec policies */
614 ipsec_out_to_in(ixa
, ill
, &iras
);
616 icmp_unreachable_v6(mp
, ICMP6_DST_UNREACH_NOROUTE
, B_FALSE
, &iras
);
617 /* We moved any IPsec refs from ixa to iras */
618 ira_cleanup(&iras
, B_FALSE
);
621 return (EHOSTUNREACH
);
625 * Calculate a checksum ignoring any hardware capabilities
627 * Returns B_FALSE if the packet was too short for the checksum. Caller
628 * should free and do stats.
631 ip_output_sw_cksum_v6(mblk_t
*mp
, ip6_t
*ip6h
, ip_xmit_attr_t
*ixa
)
633 ip_stack_t
*ipst
= ixa
->ixa_ipst
;
634 uint_t pktlen
= ixa
->ixa_pktlen
;
637 uint8_t protocol
= ixa
->ixa_protocol
;
638 uint16_t ip_hdr_length
= ixa
->ixa_ip_hdr_length
;
640 #define iphs ((uint16_t *)ip6h)
642 /* Just in case it contained garbage */
643 DB_CKSUMFLAGS(mp
) &= ~HCK_FLAGS
;
646 * Calculate ULP checksum
648 if (protocol
== IPPROTO_TCP
) {
649 cksump
= IPH_TCPH_CHECKSUMP(ip6h
, ip_hdr_length
);
650 cksum
= IP_TCP_CSUM_COMP
;
651 } else if (protocol
== IPPROTO_UDP
) {
652 cksump
= IPH_UDPH_CHECKSUMP(ip6h
, ip_hdr_length
);
653 cksum
= IP_UDP_CSUM_COMP
;
654 } else if (protocol
== IPPROTO_SCTP
) {
657 ASSERT(MBLKL(mp
) >= (ip_hdr_length
+ sizeof (*sctph
)));
658 sctph
= (sctp_hdr_t
*)(mp
->b_rptr
+ ip_hdr_length
);
660 * Zero out the checksum field to ensure proper
661 * checksum calculation.
663 sctph
->sh_chksum
= 0;
665 if (!skip_sctp_cksum
)
667 sctph
->sh_chksum
= sctp_cksum(mp
, ip_hdr_length
);
669 } else if (ixa
->ixa_flags
& IXAF_SET_RAW_CKSUM
) {
671 * icmp has placed length and routing
672 * header adjustment in the checksum field.
674 cksump
= (uint16_t *)(((uint8_t *)ip6h
) + ip_hdr_length
+
675 ixa
->ixa_raw_cksum_offset
);
676 cksum
= htons(protocol
);
677 } else if (protocol
== IPPROTO_ICMPV6
) {
678 cksump
= IPH_ICMPV6_CHECKSUMP(ip6h
, ip_hdr_length
);
679 cksum
= IP_ICMPV6_CSUM_COMP
; /* Pseudo-header cksum */
684 /* ULP puts the checksum field is in the first mblk */
685 ASSERT(((uchar_t
*)cksump
) + sizeof (uint16_t) <= mp
->b_wptr
);
688 * We accumulate the pseudo header checksum in cksum.
689 * This is pretty hairy code, so watch close. One
690 * thing to keep in mind is that UDP and TCP have
691 * stored their respective datagram lengths in their
692 * checksum fields. This lines things up real nice.
694 cksum
+= iphs
[4] + iphs
[5] + iphs
[6] + iphs
[7] +
695 iphs
[8] + iphs
[9] + iphs
[10] + iphs
[11] +
696 iphs
[12] + iphs
[13] + iphs
[14] + iphs
[15] +
697 iphs
[16] + iphs
[17] + iphs
[18] + iphs
[19];
698 cksum
= IP_CSUM(mp
, ip_hdr_length
, cksum
);
701 * For UDP/IPv6 a zero UDP checksum is not allowed.
704 if (protocol
== IPPROTO_UDP
&& cksum
== 0)
709 IP6_STAT(ipst
, ip6_out_sw_cksum
);
710 IP6_STAT_UPDATE(ipst
, ip6_out_sw_cksum_bytes
, pktlen
);
712 /* No IP header checksum for IPv6 */
718 /* There are drivers that can't do partial checksum for ICMPv6 */
719 int nxge_cksum_workaround
= 1;
722 * Calculate the ULP checksum - try to use hardware.
723 * In the case of multicast the IXAF_NO_HW_CKSUM is set in which case we use
726 * Returns B_FALSE if the packet was too short for the checksum. Caller
727 * should free and do stats.
730 ip_output_cksum_v6(iaflags_t ixaflags
, mblk_t
*mp
, ip6_t
*ip6h
,
731 ip_xmit_attr_t
*ixa
, ill_t
*ill
)
733 uint_t pktlen
= ixa
->ixa_pktlen
;
737 uint8_t protocol
= ixa
->ixa_protocol
;
738 uint16_t ip_hdr_length
= ixa
->ixa_ip_hdr_length
;
740 #define iphs ((uint16_t *)ip6h)
742 if ((ixaflags
& IXAF_NO_HW_CKSUM
) || !ILL_HCKSUM_CAPABLE(ill
) ||
744 return (ip_output_sw_cksum_v6(mp
, ip6h
, ixa
));
748 * Calculate ULP checksum. Note that we don't use cksump and cksum
749 * if the ill has FULL support.
751 if (protocol
== IPPROTO_TCP
) {
752 cksump
= IPH_TCPH_CHECKSUMP(ip6h
, ip_hdr_length
);
753 cksum
= IP_TCP_CSUM_COMP
; /* Pseudo-header cksum */
754 } else if (protocol
== IPPROTO_UDP
) {
755 cksump
= IPH_UDPH_CHECKSUMP(ip6h
, ip_hdr_length
);
756 cksum
= IP_UDP_CSUM_COMP
; /* Pseudo-header cksum */
757 } else if (protocol
== IPPROTO_SCTP
) {
760 ASSERT(MBLKL(mp
) >= (ip_hdr_length
+ sizeof (*sctph
)));
761 sctph
= (sctp_hdr_t
*)(mp
->b_rptr
+ ip_hdr_length
);
763 * Zero out the checksum field to ensure proper
764 * checksum calculation.
766 sctph
->sh_chksum
= 0;
768 if (!skip_sctp_cksum
)
770 sctph
->sh_chksum
= sctp_cksum(mp
, ip_hdr_length
);
772 } else if (ixa
->ixa_flags
& IXAF_SET_RAW_CKSUM
) {
774 * icmp has placed length and routing
775 * header adjustment in the checksum field.
777 cksump
= (uint16_t *)(((uint8_t *)ip6h
) + ip_hdr_length
+
778 ixa
->ixa_raw_cksum_offset
);
779 cksum
= htons(protocol
);
780 } else if (protocol
== IPPROTO_ICMPV6
) {
781 cksump
= IPH_ICMPV6_CHECKSUMP(ip6h
, ip_hdr_length
);
782 cksum
= IP_ICMPV6_CSUM_COMP
; /* Pseudo-header cksum */
785 /* No IP header checksum for IPv6 */
789 /* ULP puts the checksum field is in the first mblk */
790 ASSERT(((uchar_t
*)cksump
) + sizeof (uint16_t) <= mp
->b_wptr
);
793 * Underlying interface supports hardware checksum offload for
794 * the payload; leave the payload checksum for the hardware to
795 * calculate. N.B: We only need to set up checksum info on the
798 hck_flags
= ill
->ill_hcksum_capab
->ill_hcksum_txflags
;
800 DB_CKSUMFLAGS(mp
) &= ~HCK_FLAGS
;
801 if (hck_flags
& HCKSUM_INET_FULL_V6
) {
803 * Hardware calculates pseudo-header, header and the
804 * payload checksums, so clear the checksum field in
805 * the protocol header.
808 DB_CKSUMFLAGS(mp
) |= HCK_FULLCKSUM
;
811 if (((hck_flags
) & HCKSUM_INET_PARTIAL
) &&
812 (protocol
!= IPPROTO_ICMPV6
|| !nxge_cksum_workaround
)) {
814 * Partial checksum offload has been enabled. Fill
815 * the checksum field in the protocol header with the
816 * pseudo-header checksum value.
818 * We accumulate the pseudo header checksum in cksum.
819 * This is pretty hairy code, so watch close. One
820 * thing to keep in mind is that UDP and TCP have
821 * stored their respective datagram lengths in their
822 * checksum fields. This lines things up real nice.
824 cksum
+= iphs
[4] + iphs
[5] + iphs
[6] + iphs
[7] +
825 iphs
[8] + iphs
[9] + iphs
[10] + iphs
[11] +
826 iphs
[12] + iphs
[13] + iphs
[14] + iphs
[15] +
827 iphs
[16] + iphs
[17] + iphs
[18] + iphs
[19];
829 cksum
= (cksum
& 0xFFFF) + (cksum
>> 16);
830 *(cksump
) = (cksum
& 0xFFFF) + (cksum
>> 16);
833 * Offsets are relative to beginning of IP header.
835 DB_CKSUMSTART(mp
) = ip_hdr_length
;
836 DB_CKSUMSTUFF(mp
) = (uint8_t *)cksump
- (uint8_t *)ip6h
;
837 DB_CKSUMEND(mp
) = pktlen
;
838 DB_CKSUMFLAGS(mp
) |= HCK_PARTIALCKSUM
;
841 /* Hardware capabilities include neither full nor partial IPv6 */
842 return (ip_output_sw_cksum_v6(mp
, ip6h
, ixa
));
847 * ire_sendfn for offlink and onlink destinations.
848 * Also called from the multicast send function.
850 * Assumes that the caller has a hold on the ire.
852 * This function doesn't care if the IRE just became condemned since that
853 * can happen at any time.
857 ire_send_wire_v6(ire_t
*ire
, mblk_t
*mp
, void *iph_arg
,
858 ip_xmit_attr_t
*ixa
, uint32_t *identp
)
860 ip_stack_t
*ipst
= ixa
->ixa_ipst
;
861 ip6_t
*ip6h
= (ip6_t
*)iph_arg
;
862 iaflags_t ixaflags
= ixa
->ixa_flags
;
864 uint32_t pktlen
= ixa
->ixa_pktlen
;
866 ASSERT(ixa
->ixa_nce
!= NULL
);
867 ill
= ixa
->ixa_nce
->nce_ill
;
870 * Update output mib stats. Note that we can't move into the icmp
871 * sender (icmp_output etc) since they don't know the ill and the
874 * With IPMP we record the stats on the upper ill.
876 if (ixa
->ixa_protocol
== IPPROTO_ICMPV6
) {
879 icmp6
= (icmp6_t
*)((uchar_t
*)ip6h
+ ixa
->ixa_ip_hdr_length
);
880 icmp_update_out_mib_v6(ixa
->ixa_nce
->nce_common
->ncec_ill
,
884 if (ixaflags
& IXAF_DONTROUTE
)
888 * This might set b_band, thus the IPsec and fragmentation
889 * code in IP ensures that b_band is updated in the first mblk.
891 if (IPP_ENABLED(IPP_LOCAL_OUT
, ipst
)) {
892 /* ip_process translates an IS_UNDER_IPMP */
893 mp
= ip_process(IPP_LOCAL_OUT
, mp
, ill
, ill
);
895 /* ip_drop_packet and MIB done */
896 return (0); /* Might just be delayed */
900 if (pktlen
> ixa
->ixa_fragsize
||
901 (ixaflags
& (IXAF_IPSEC_SECURE
|IXAF_IPV6_ADD_FRAGHDR
))) {
904 if (ixaflags
& IXAF_IPSEC_SECURE
)
905 pktlen
+= ipsec_out_extra_length(ixa
);
907 if (pktlen
> IP_MAXPACKET
)
910 if (ixaflags
& IXAF_SET_ULP_CKSUM
) {
912 * Compute ULP checksum using software
914 if (!ip_output_sw_cksum_v6(mp
, ip6h
, ixa
)) {
915 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutDiscards
);
916 ip_drop_output("ipIfStatsOutDiscards", mp
, ill
);
920 /* Avoid checksum again below if we only add fraghdr */
921 ixaflags
&= ~IXAF_SET_ULP_CKSUM
;
925 * If we need a fragment header, pick the ident and insert
926 * the header before IPsec to we have a place to store
929 if ((ixaflags
& IXAF_IPV6_ADD_FRAGHDR
) ||
930 pktlen
> ixa
->ixa_fragsize
) {
932 * If this packet would generate a icmp_frag_needed
933 * message, we need to handle it before we do the IPsec
934 * processing. Otherwise, we need to strip the IPsec
935 * headers before we send up the message to the ULPs
936 * which becomes messy and difficult.
938 if ((pktlen
> ixa
->ixa_fragsize
) &&
939 (ixaflags
& IXAF_DONTFRAG
)) {
940 /* Generate ICMP and return error */
943 DTRACE_PROBE4(ip6__fragsize__fail
,
944 uint_t
, pktlen
, uint_t
, ixa
->ixa_fragsize
,
945 uint_t
, ixa
->ixa_pktlen
,
946 uint_t
, ixa
->ixa_pmtu
);
948 bzero(&iras
, sizeof (iras
));
949 /* Map ixa to ira including IPsec policies */
950 ipsec_out_to_in(ixa
, ill
, &iras
);
952 ip_drop_output("ICMP6_PKT_TOO_BIG", mp
, ill
);
953 icmp_pkt2big_v6(mp
, ixa
->ixa_fragsize
, B_TRUE
,
955 /* We moved any IPsec refs from ixa to iras */
956 ira_cleanup(&iras
, B_FALSE
);
959 DTRACE_PROBE4(ip6__fragsize__ok
, uint_t
, pktlen
,
960 uint_t
, ixa
->ixa_fragsize
, uint_t
, ixa
->ixa_pktlen
,
961 uint_t
, ixa
->ixa_pmtu
);
963 * Assign an ident value for this packet. There could
964 * be other threads targeting the same destination, so
965 * we have to arrange for a atomic increment.
966 * Normally ixa_extra_ident is 0, but in the case of
967 * LSO it will be the number of TCP segments that the
968 * driver/hardware will extraly construct.
970 ident
= atomic_add_32_nv(identp
, ixa
->ixa_extra_ident
+
972 ixa
->ixa_ident
= ident
; /* In case we do IPsec */
974 if (ixaflags
& IXAF_IPSEC_SECURE
) {
976 * Pass in sufficient information so that
977 * IPsec can determine whether to fragment, and
978 * which function to call after fragmentation.
980 return (ipsec_out_process(mp
, ixa
));
983 mp
= ip_fraghdr_add_v6(mp
, ident
, ixa
);
985 /* MIB and ip_drop_output already done */
988 ASSERT(pktlen
== ixa
->ixa_pktlen
);
989 pktlen
+= sizeof (ip6_frag_t
);
991 if (pktlen
> ixa
->ixa_fragsize
) {
992 return (ip_fragment_v6(mp
, ixa
->ixa_nce
, ixaflags
,
993 pktlen
, ixa
->ixa_fragsize
,
994 ixa
->ixa_xmit_hint
, ixa
->ixa_zoneid
,
995 ixa
->ixa_no_loop_zoneid
, ixa
->ixa_postfragfn
,
999 if (ixaflags
& IXAF_SET_ULP_CKSUM
) {
1000 /* Compute ULP checksum and IP header checksum */
1001 /* An IS_UNDER_IPMP ill is ok here */
1002 if (!ip_output_cksum_v6(ixaflags
, mp
, ip6h
, ixa
, ill
)) {
1003 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutDiscards
);
1004 ip_drop_output("ipIfStatsOutDiscards", mp
, ill
);
1009 return ((ixa
->ixa_postfragfn
)(mp
, ixa
->ixa_nce
, ixaflags
,
1010 pktlen
, ixa
->ixa_xmit_hint
, ixa
->ixa_zoneid
,
1011 ixa
->ixa_no_loop_zoneid
, &ixa
->ixa_cookie
));