4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
25 /* Copyright (c) 1990 Mentat Inc. */
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/strsubr.h>
31 #include <sys/strsun.h>
34 #include <sys/sunddi.h>
35 #include <sys/cmn_err.h>
36 #include <sys/debug.h>
37 #include <sys/atomic.h>
39 #include <sys/systm.h>
40 #include <sys/param.h>
43 #include <sys/socket.h>
46 #include <net/if_arp.h>
47 #include <net/route.h>
48 #include <sys/sockio.h>
49 #include <netinet/in.h>
50 #include <net/if_dl.h>
52 #include <inet/common.h>
54 #include <inet/mib2.h>
57 #include <inet/snmpcom.h>
58 #include <inet/kstatcom.h>
60 #include <netinet/igmp_var.h>
61 #include <netinet/ip6.h>
62 #include <netinet/icmp6.h>
63 #include <netinet/sctp.h>
66 #include <inet/ip_impl.h>
68 #include <inet/ip6_asp.h>
70 #include <inet/ip_multi.h>
71 #include <inet/ip_if.h>
72 #include <inet/ip_ire.h>
73 #include <inet/ip_ftable.h>
74 #include <inet/ip_rts.h>
75 #include <inet/optcom.h>
76 #include <inet/ip_ndp.h>
77 #include <inet/ip_listutils.h>
78 #include <netinet/igmp.h>
79 #include <netinet/ip_mroute.h>
80 #include <inet/ipp_common.h>
82 #include <net/pfkeyv2.h>
83 #include <inet/sadb.h>
84 #include <inet/ipsec_impl.h>
85 #include <inet/ipdrop.h>
86 #include <inet/ip_netinfo.h>
88 #include <sys/pattr.h>
89 #include <inet/ipclassifier.h>
90 #include <inet/sctp_ip.h>
91 #include <inet/sctp/sctp_impl.h>
92 #include <inet/udp_impl.h>
93 #include <sys/sunddi.h>
95 #include <sys/clock_impl.h> /* For LBOLT_FASTPATH{,64} */
98 extern boolean_t skip_sctp_cksum
;
101 static int ip_verify_nce(mblk_t
*, ip_xmit_attr_t
*);
102 static int ip_verify_dce(mblk_t
*, ip_xmit_attr_t
*);
103 static boolean_t
ip_verify_lso(ill_t
*, ip_xmit_attr_t
*);
104 static boolean_t
ip_verify_zcopy(ill_t
*, ip_xmit_attr_t
*);
105 static void ip_output_simple_broadcast(ip_xmit_attr_t
*, mblk_t
*);
108 * There are two types of output functions for IP used for different
110 * - ip_output_simple() is when sending ICMP errors, TCP resets, etc when there
111 * is no context in the form of a conn_t. However, there is a
112 * ip_xmit_attr_t that the callers use to influence interface selection
113 * (needed for ICMP echo as well as IPv6 link-locals) and IPsec.
115 * - conn_ip_output() is used when sending packets with a conn_t and
116 * ip_set_destination has been called to cache information. In that case
117 * various socket options are recorded in the ip_xmit_attr_t and should
118 * be taken into account.
122 * The caller *must* have called conn_connect() or ip_attr_connect()
123 * before calling conn_ip_output(). The caller needs to redo that each time
124 * the destination IP address or port changes, as well as each time there is
125 * a change to any socket option that would modify how packets are routed out
126 * of the box (e.g., SO_DONTROUTE, IP_NEXTHOP, IP_BOUND_IF).
128 * The ULP caller has to serialize the use of a single ip_xmit_attr_t.
129 * We assert for that here.
132 conn_ip_output(mblk_t
*mp
, ip_xmit_attr_t
*ixa
)
134 iaflags_t ixaflags
= ixa
->ixa_flags
;
139 ip_stack_t
*ipst
= ixa
->ixa_ipst
;
142 /* We defer ipIfStatsHCOutRequests until an error or we have an ill */
144 ASSERT(ixa
->ixa_ire
!= NULL
);
145 /* Note there is no ixa_nce when reject and blackhole routes */
146 ASSERT(ixa
->ixa_dce
!= NULL
); /* Could be default dce */
149 ASSERT(ixa
->ixa_curthread
== NULL
);
150 ixa
->ixa_curthread
= curthread
;
156 * If the ULP says the (old) IRE resulted in reachability we
157 * record this before determine whether to use a new IRE.
158 * No locking for performance reasons.
160 if (ixaflags
& IXAF_REACH_CONF
)
164 * Has routing changed since we cached the results of the lookup?
166 * This check captures all of:
167 * - the cached ire being deleted (by means of the special
168 * IRE_GENERATION_CONDEMNED)
169 * - A potentially better ire being added (ire_generation being
171 * - A deletion of the nexthop ire that was used when we did the
173 * - An addition of a potentially better nexthop ire.
174 * The last two are handled by walking and increasing the generation
175 * number on all dependant IREs in ire_flush_cache().
177 * The check also handles all cases of RTF_REJECT and RTF_BLACKHOLE
178 * since we ensure that each time we set ixa_ire to such an IRE we
179 * make sure the ixa_ire_generation does not match (by using
180 * IRE_GENERATION_VERIFY).
182 if (ire
->ire_generation
!= ixa
->ixa_ire_generation
) {
183 error
= ip_verify_ire(mp
, ixa
);
185 ip_drop_output("ipIfStatsOutDiscards - verify ire",
191 if (ire
->ire_flags
& (RTF_REJECT
|RTF_BLACKHOLE
)) {
193 ASSERT(ixa
->ixa_curthread
== curthread
);
194 ixa
->ixa_curthread
= NULL
;
196 ire
->ire_ob_pkt_count
++;
197 /* ixa_dce might be condemned; use default one */
198 return ((ire
->ire_sendfn
)(ire
, mp
, mp
->b_rptr
, ixa
,
199 &ipst
->ips_dce_default
->dce_ident
));
202 * If the ncec changed then ip_verify_ire already set
203 * ixa->ixa_dce_generation = DCE_GENERATION_VERIFY;
204 * so we can recheck the interface mtu.
208 * Note that ire->ire_generation could already have changed.
209 * We catch that next time we send a packet.
214 * No need to lock access to ixa_nce since the ip_xmit_attr usage
215 * is single threaded.
217 ASSERT(ixa
->ixa_nce
!= NULL
);
219 if (nce
->nce_is_condemned
) {
220 error
= ip_verify_nce(mp
, ixa
);
222 * In case ZEROCOPY capability become not available, we
223 * copy the message and free the original one. We might
224 * be copying more data than needed but it doesn't hurt
225 * since such change rarely happens.
230 case ENOTSUP
: { /* ZEROCOPY */
233 if ((nmp
= copymsg(mp
)) != NULL
) {
242 ip_drop_output("ipIfStatsOutDiscards - verify nce",
248 if (ire
->ire_flags
& (RTF_REJECT
|RTF_BLACKHOLE
)) {
250 ASSERT(ixa
->ixa_curthread
== curthread
);
251 ixa
->ixa_curthread
= NULL
;
253 ire
->ire_ob_pkt_count
++;
254 /* ixa_dce might be condemned; use default one */
255 return ((ire
->ire_sendfn
)(ire
, mp
, mp
->b_rptr
,
256 ixa
, &ipst
->ips_dce_default
->dce_ident
));
258 ASSERT(ixa
->ixa_nce
!= NULL
);
262 * Note that some other event could already have made
263 * the new nce condemned. We catch that next time we
264 * try to send a packet.
268 * If there is no per-destination dce_t then we have a reference to
269 * the default dce_t (which merely contains the dce_ipid).
270 * The generation check captures both the introduction of a
271 * per-destination dce_t (e.g., due to ICMP packet too big) and
272 * any change to the per-destination dce (including it becoming
273 * condemned by use of the special DCE_GENERATION_CONDEMNED).
278 * To avoid a periodic timer to increase the path MTU we
279 * look at dce_last_change_time each time we send a packet.
281 if (dce
->dce_flags
& DCEF_PMTU
) {
282 int64_t now
= LBOLT_FASTPATH64
;
284 if ((TICK_TO_SEC(now
) - dce
->dce_last_change_time
>
285 ipst
->ips_ip_pathmtu_interval
)) {
287 * Older than 20 minutes. Drop the path MTU information.
288 * Since the path MTU changes as a result of this,
289 * twiddle ixa_dce_generation to make us go through the
290 * dce verification code in conn_ip_output.
292 mutex_enter(&dce
->dce_lock
);
293 dce
->dce_flags
&= ~(DCEF_PMTU
|DCEF_TOO_SMALL_PMTU
);
294 dce
->dce_last_change_time
= TICK_TO_SEC(now
);
295 mutex_exit(&dce
->dce_lock
);
296 dce_increment_generation(dce
);
300 if (dce
->dce_generation
!= ixa
->ixa_dce_generation
) {
301 error
= ip_verify_dce(mp
, ixa
);
303 ip_drop_output("ipIfStatsOutDiscards - verify dce",
310 * Note that some other event could already have made the
311 * new dce's generation number change.
312 * We catch that next time we try to send a packet.
319 * An initial ixa_fragsize was set in ip_set_destination
320 * and we update it if any routing changes above.
321 * A change to ill_mtu with ifconfig will increase all dce_generation
322 * so that we will detect that with the generation check. Ditto for
327 * Caller needs to make sure IXAF_VERIFY_SRC is not set if
330 if ((ixaflags
& IXAF_VERIFY_SOURCE
) &&
331 ixa
->ixa_src_generation
!= ipst
->ips_src_generation
) {
332 /* Check if the IP source is still assigned to the host. */
335 if (!ip_verify_src(mp
, ixa
, &gen
)) {
336 /* Don't send a packet with a source that isn't ours */
337 error
= EADDRNOTAVAIL
;
338 ip_drop_output("ipIfStatsOutDiscards - invalid src",
342 /* The source is still valid - update the generation number */
343 ixa
->ixa_src_generation
= gen
;
347 * We don't have an IRE when we fragment, hence ire_ob_pkt_count
348 * can only count the use prior to fragmentation. However the MIB
349 * counters on the ill will be incremented in post fragmentation.
351 ire
->ire_ob_pkt_count
++;
352 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsHCOutRequests
);
355 * Based on ire_type and ire_flags call one of:
356 * ire_send_local_v* - for IRE_LOCAL and IRE_LOOPBACK
357 * ire_send_noroute_v* - if RTF_REJECT or RTF_BLACHOLE
358 * ire_send_multicast_v* - for IRE_MULTICAST
359 * ire_send_broadcast_v4 - for IRE_BROADCAST
360 * ire_send_wire_v* - for the rest.
363 ASSERT(ixa
->ixa_curthread
== curthread
);
364 ixa
->ixa_curthread
= NULL
;
366 return ((ire
->ire_sendfn
)(ire
, mp
, mp
->b_rptr
, ixa
, &dce
->dce_ident
));
369 if (ixaflags
& IXAF_IS_IPV4
) {
370 BUMP_MIB(&ipst
->ips_ip_mib
, ipIfStatsHCOutRequests
);
371 BUMP_MIB(&ipst
->ips_ip_mib
, ipIfStatsOutDiscards
);
373 BUMP_MIB(&ipst
->ips_ip6_mib
, ipIfStatsHCOutRequests
);
374 BUMP_MIB(&ipst
->ips_ip6_mib
, ipIfStatsOutDiscards
);
378 ASSERT(ixa
->ixa_curthread
== curthread
);
379 ixa
->ixa_curthread
= NULL
;
385 * Handle both IPv4 and IPv6. Sets the generation number
386 * to allow the caller to know when to call us again.
387 * Returns true if the source address in the packet is a valid source.
388 * We handle callers which try to send with a zero address (since we only
389 * get here if UNSPEC_SRC is not set).
392 ip_verify_src(mblk_t
*mp
, ip_xmit_attr_t
*ixa
, uint_t
*generationp
)
394 ip_stack_t
*ipst
= ixa
->ixa_ipst
;
397 * Need to grab the generation number before we check to
398 * avoid a race with a change to the set of local addresses.
399 * No lock needed since the thread which updates the set of local
400 * addresses use ipif/ill locks and exit those (hence a store memory
401 * barrier) before doing the atomic increase of ips_src_generation.
403 if (generationp
!= NULL
)
404 *generationp
= ipst
->ips_src_generation
;
406 if (ixa
->ixa_flags
& IXAF_IS_IPV4
) {
407 ipha_t
*ipha
= (ipha_t
*)mp
->b_rptr
;
409 if (ipha
->ipha_src
== INADDR_ANY
)
412 return (ip_laddr_verify_v4(ipha
->ipha_src
, ixa
->ixa_zoneid
,
413 ipst
, B_FALSE
) != IPVL_BAD
);
415 ip6_t
*ip6h
= (ip6_t
*)mp
->b_rptr
;
418 if (IN6_IS_ADDR_UNSPECIFIED(&ip6h
->ip6_src
))
421 if (ixa
->ixa_flags
& IXAF_SCOPEID_SET
)
422 scopeid
= ixa
->ixa_scopeid
;
426 return (ip_laddr_verify_v6(&ip6h
->ip6_src
, ixa
->ixa_zoneid
,
427 ipst
, B_FALSE
, scopeid
) != IPVL_BAD
);
432 * Handle both IPv4 and IPv6. Reverify/recalculate the IRE to use.
435 ip_verify_ire(mblk_t
*mp
, ip_xmit_attr_t
*ixa
)
443 * Redo ip_select_route.
444 * Need to grab generation number as part of the lookup to
448 ire
= ip_select_route_pkt(mp
, ixa
, &gen
, &error
);
449 ASSERT(ire
!= NULL
); /* IRE_NOROUTE if none found */
455 if (ixa
->ixa_ire
!= NULL
)
456 ire_refrele_notr(ixa
->ixa_ire
);
458 ire_refhold_notr(ire
);
462 ixa
->ixa_ire_generation
= gen
;
463 ixa
->ixa_postfragfn
= ire
->ire_postfragfn
;
466 * Don't look for an nce for reject or blackhole.
467 * They have ire_generation set to IRE_GENERATION_VERIFY which
468 * makes conn_ip_output avoid references to ixa_nce.
470 if (ire
->ire_flags
& (RTF_REJECT
|RTF_BLACKHOLE
)) {
471 ASSERT(ixa
->ixa_ire_generation
== IRE_GENERATION_VERIFY
);
472 ixa
->ixa_dce_generation
= DCE_GENERATION_VERIFY
;
476 /* The NCE could now be different */
477 nce
= ire_to_nce_pkt(ire
, mp
);
480 * Allocation failure. Make sure we redo ire/nce selection
483 ixa
->ixa_ire_generation
= IRE_GENERATION_VERIFY
;
484 ixa
->ixa_dce_generation
= DCE_GENERATION_VERIFY
;
487 if (nce
== ixa
->ixa_nce
) {
494 * Since the path MTU might change as a result of this
495 * route change, we twiddle ixa_dce_generation to
496 * make conn_ip_output go through the ip_verify_dce code.
498 ixa
->ixa_dce_generation
= DCE_GENERATION_VERIFY
;
500 if (ixa
->ixa_nce
!= NULL
)
501 nce_refrele(ixa
->ixa_nce
);
507 * Handle both IPv4 and IPv6. Reverify/recalculate the NCE to use.
510 ip_verify_nce(mblk_t
*mp
, ip_xmit_attr_t
*ixa
)
512 ire_t
*ire
= ixa
->ixa_ire
;
518 if (ire
->ire_ipversion
== IPV4_VERSION
)
519 ipha
= (ipha_t
*)mp
->b_rptr
;
521 ip6h
= (ip6_t
*)mp
->b_rptr
;
523 nce
= ire_handle_condemned_nce(ixa
->ixa_nce
, ire
, ipha
, ip6h
, B_TRUE
);
525 /* Try to find a better ire */
526 return (ip_verify_ire(mp
, ixa
));
530 * The hardware offloading capabilities, for example LSO, of the
531 * interface might have changed, so do sanity verification here.
533 if (ixa
->ixa_flags
& IXAF_VERIFY_LSO
) {
534 if (!ip_verify_lso(nce
->nce_ill
, ixa
)) {
535 ASSERT(ixa
->ixa_notify
!= NULL
);
536 ixa
->ixa_notify(ixa
->ixa_notify_cookie
, ixa
,
543 * Verify ZEROCOPY capability of underlying ill. Notify the ULP with
544 * any ZEROCOPY changes. In case ZEROCOPY capability is not available
545 * any more, return error so that conn_ip_output() can take care of
546 * the ZEROCOPY message properly. It's safe to continue send the
547 * message when ZEROCOPY newly become available.
549 if (ixa
->ixa_flags
& IXAF_VERIFY_ZCOPY
) {
550 if (!ip_verify_zcopy(nce
->nce_ill
, ixa
)) {
551 ASSERT(ixa
->ixa_notify
!= NULL
);
552 ixa
->ixa_notify(ixa
->ixa_notify_cookie
, ixa
,
554 if ((ixa
->ixa_flags
& IXAF_ZCOPY_CAPAB
) == 0)
560 * Since the path MTU might change as a result of this
561 * change, we twiddle ixa_dce_generation to
562 * make conn_ip_output go through the ip_verify_dce code.
564 ixa
->ixa_dce_generation
= DCE_GENERATION_VERIFY
;
566 nce_refrele(ixa
->ixa_nce
);
572 * Handle both IPv4 and IPv6. Reverify/recalculate the DCE to use.
575 ip_verify_dce(mblk_t
*mp
, ip_xmit_attr_t
*ixa
)
581 dce
= dce_lookup_pkt(mp
, ixa
, &gen
);
584 dce_refrele_notr(ixa
->ixa_dce
);
586 dce_refhold_notr(dce
);
590 ixa
->ixa_dce_generation
= gen
;
592 /* Extract the (path) mtu from the dce, ncec_ill etc */
593 pmtu
= ip_get_pmtu(ixa
);
596 * Tell ULP about PMTU changes - increase or decrease - by returning
597 * an error if IXAF_VERIFY_PMTU is set. In such case, ULP should update
598 * both ixa_pmtu and ixa_fragsize appropriately.
600 * If ULP doesn't set that flag then we need to update ixa_fragsize
601 * since routing could have changed the ill after after ixa_fragsize
602 * was set previously in the conn_ip_output path or in
603 * ip_set_destination.
605 * In case of LSO, ixa_fragsize might be greater than ixa_pmtu.
607 * In the case of a path MTU increase we send the packet after the
610 if (ixa
->ixa_flags
& IXAF_VERIFY_PMTU
) {
611 if (ixa
->ixa_pmtu
!= pmtu
) {
612 uint_t oldmtu
= ixa
->ixa_pmtu
;
614 DTRACE_PROBE2(verify_pmtu
, uint32_t, pmtu
,
615 uint32_t, ixa
->ixa_pmtu
);
616 ASSERT(ixa
->ixa_notify
!= NULL
);
617 ixa
->ixa_notify(ixa
->ixa_notify_cookie
, ixa
,
623 ixa
->ixa_fragsize
= pmtu
;
629 * Verify LSO usability. Keep the return value simple to indicate whether
630 * the LSO capability has changed. Handle both IPv4 and IPv6.
633 ip_verify_lso(ill_t
*ill
, ip_xmit_attr_t
*ixa
)
635 ill_lso_capab_t
*lsoc
= &ixa
->ixa_lso_capab
;
636 ill_lso_capab_t
*new_lsoc
= ill
->ill_lso_capab
;
638 if (ixa
->ixa_flags
& IXAF_LSO_CAPAB
) {
640 * Not unsable any more.
642 if ((ixa
->ixa_flags
& IXAF_IPSEC_SECURE
) ||
643 (ixa
->ixa_ire
->ire_type
& (IRE_LOCAL
| IRE_LOOPBACK
)) ||
644 ((ixa
->ixa_flags
& IXAF_IS_IPV4
) ?
645 !ILL_LSO_TCP_IPV4_USABLE(ill
) :
646 !ILL_LSO_TCP_IPV6_USABLE(ill
))) {
647 ixa
->ixa_flags
&= ~IXAF_LSO_CAPAB
;
653 * Capability has changed, refresh the copy in ixa.
655 if (lsoc
->ill_lso_max
!= new_lsoc
->ill_lso_max
) {
660 } else { /* Was not usable */
661 if (!(ixa
->ixa_flags
& IXAF_IPSEC_SECURE
) &&
662 !(ixa
->ixa_ire
->ire_type
& (IRE_LOCAL
| IRE_LOOPBACK
)) &&
663 ((ixa
->ixa_flags
& IXAF_IS_IPV4
) ?
664 ILL_LSO_TCP_IPV4_USABLE(ill
) :
665 ILL_LSO_TCP_IPV6_USABLE(ill
))) {
667 ixa
->ixa_flags
|= IXAF_LSO_CAPAB
;
677 * Verify ZEROCOPY usability. Keep the return value simple to indicate whether
678 * the ZEROCOPY capability has changed. Handle both IPv4 and IPv6.
681 ip_verify_zcopy(ill_t
*ill
, ip_xmit_attr_t
*ixa
)
683 if (ixa
->ixa_flags
& IXAF_ZCOPY_CAPAB
) {
685 * Not unsable any more.
687 if ((ixa
->ixa_flags
& IXAF_IPSEC_SECURE
) ||
688 (ixa
->ixa_ire
->ire_type
& (IRE_LOCAL
| IRE_LOOPBACK
)) ||
689 !ILL_ZCOPY_USABLE(ill
)) {
690 ixa
->ixa_flags
&= ~IXAF_ZCOPY_CAPAB
;
694 } else { /* Was not usable */
695 if (!(ixa
->ixa_flags
& IXAF_IPSEC_SECURE
) &&
696 !(ixa
->ixa_ire
->ire_type
& (IRE_LOCAL
| IRE_LOOPBACK
)) &&
697 ILL_ZCOPY_USABLE(ill
)) {
698 ixa
->ixa_flags
|= IXAF_ZCOPY_CAPAB
;
709 * When there is no conn_t context, this will send a packet.
710 * The caller must *not* have called conn_connect() or ip_attr_connect()
711 * before calling ip_output_simple().
712 * Handles IPv4 and IPv6. Returns zero or an errno such as ENETUNREACH.
713 * Honors IXAF_SET_SOURCE.
715 * We acquire the ire and after calling ire_sendfn we release
716 * the hold on the ire. Ditto for the nce and dce.
718 * This assumes that the caller has set the following in ip_xmit_attr_t:
719 * ixa_zoneid, and ixa_ipst must always be set.
720 * If ixa_ifindex is non-zero it means send out that ill. (If it is
721 * an upper IPMP ill we load balance across the group; if a lower we send
722 * on that lower ill without load balancing.)
723 * IXAF_IS_IPV4 must be set correctly.
724 * If IXAF_IPSEC_SECURE is set then the ixa_ipsec_* fields must be set.
725 * If IXAF_NO_IPSEC is set we'd skip IPsec policy lookup.
726 * If neither of those two are set we do an IPsec policy lookup.
728 * We handle setting things like
733 * The caller may set ixa_xmit_hint, which is used for ECMP selection and
734 * transmit ring selecting in GLD.
736 * The caller must do an ixa_cleanup() to release any IPsec references
740 ip_output_simple(mblk_t
*mp
, ip_xmit_attr_t
*ixa
)
744 ASSERT(ixa
->ixa_ipst
!= NULL
);
746 if (ixa
->ixa_flags
& IXAF_IS_IPV4
)
747 return (ip_output_simple_v4(mp
, ixa
));
749 return (ip_output_simple_v6(mp
, ixa
));
753 ip_output_simple_v4(mblk_t
*mp
, ip_xmit_attr_t
*ixa
)
756 ipaddr_t firsthop
; /* In IP header */
757 ipaddr_t dst
; /* End of source route, or ipha_dst if none */
759 ipaddr_t setsrc
; /* RTF_SETSRC */
764 iaflags_t ixaflags
= ixa
->ixa_flags
;
765 ip_stack_t
*ipst
= ixa
->ixa_ipst
;
766 boolean_t repeat
= B_FALSE
;
769 ipha
= (ipha_t
*)mp
->b_rptr
;
770 ASSERT(IPH_HDR_VERSION(ipha
) == IPV4_VERSION
);
772 /* Caller already set flags */
773 ASSERT(ixa
->ixa_flags
& IXAF_IS_IPV4
);
775 ASSERT(ixa
->ixa_nce
== NULL
);
777 ixa
->ixa_pktlen
= ntohs(ipha
->ipha_length
);
778 ASSERT(ixa
->ixa_pktlen
== msgdsize(mp
));
779 ixa
->ixa_ip_hdr_length
= IPH_HDR_LENGTH(ipha
);
780 ixa
->ixa_protocol
= ipha
->ipha_protocol
;
783 * Assumes that source routed packets have already been massaged by
784 * the ULP (ip_massage_options) and as a result ipha_dst is the next
785 * hop in the source route. The final destination is used for IPsec
786 * policy and DCE lookup.
788 firsthop
= ipha
->ipha_dst
;
789 dst
= ip_get_dst(ipha
);
794 ire
= ip_select_route_v4(firsthop
, ipha
->ipha_src
, ixa
, NULL
,
796 ASSERT(ire
!= NULL
); /* IRE_NOROUTE if none found */
798 BUMP_MIB(&ipst
->ips_ip_mib
, ipIfStatsHCOutRequests
);
799 BUMP_MIB(&ipst
->ips_ip_mib
, ipIfStatsOutDiscards
);
800 ip_drop_output("ipIfStatsOutDiscards - select route", mp
, NULL
);
805 if (ire
->ire_flags
& (RTF_BLACKHOLE
|RTF_REJECT
)) {
806 /* ire_ill might be NULL hence need to skip some code */
807 if (ixaflags
& IXAF_SET_SOURCE
)
808 ipha
->ipha_src
= htonl(INADDR_LOOPBACK
);
809 ixa
->ixa_fragsize
= IP_MAXPACKET
;
812 ire
->ire_ob_pkt_count
++;
813 BUMP_MIB(&ipst
->ips_ip_mib
, ipIfStatsHCOutRequests
);
814 /* No dce yet; use default one */
815 error
= (ire
->ire_sendfn
)(ire
, mp
, ipha
, ixa
,
816 &ipst
->ips_dce_default
->dce_ident
);
820 /* Note that ipha_dst is only used for IRE_MULTICAST */
821 nce
= ire_to_nce(ire
, ipha
->ipha_dst
, NULL
);
823 /* Allocation failure? */
824 ip_drop_output("ire_to_nce", mp
, ill
);
829 if (nce
->nce_is_condemned
) {
832 nce1
= ire_handle_condemned_nce(nce
, ire
, ipha
, NULL
, B_TRUE
);
836 /* Try finding a better IRE */
841 /* Tried twice - drop packet */
842 BUMP_MIB(&ipst
->ips_ip_mib
, ipIfStatsOutDiscards
);
843 ip_drop_output("No nce", mp
, ill
);
851 ixa
->ixa_postfragfn
= ire
->ire_postfragfn
;
853 ASSERT(ixa
->ixa_nce
== NULL
);
857 * Check for a dce_t with a path mtu.
859 dce
= dce_lookup_v4(dst
, ipst
, NULL
);
862 if (!(ixaflags
& IXAF_PMTU_DISCOVERY
)) {
863 ixa
->ixa_fragsize
= ip_get_base_mtu(nce
->nce_ill
, ire
);
864 } else if (dce
->dce_flags
& DCEF_PMTU
) {
866 * To avoid a periodic timer to increase the path MTU we
867 * look at dce_last_change_time each time we send a packet.
869 now
= ddi_get_lbolt64();
870 if (TICK_TO_SEC(now
) - dce
->dce_last_change_time
>
871 ipst
->ips_ip_pathmtu_interval
) {
873 * Older than 20 minutes. Drop the path MTU information.
875 mutex_enter(&dce
->dce_lock
);
876 dce
->dce_flags
&= ~(DCEF_PMTU
|DCEF_TOO_SMALL_PMTU
);
877 dce
->dce_last_change_time
= TICK_TO_SEC(now
);
878 mutex_exit(&dce
->dce_lock
);
879 dce_increment_generation(dce
);
880 ixa
->ixa_fragsize
= ip_get_base_mtu(nce
->nce_ill
, ire
);
884 fragsize
= ip_get_base_mtu(nce
->nce_ill
, ire
);
885 if (fragsize
> dce
->dce_pmtu
)
886 fragsize
= dce
->dce_pmtu
;
887 ixa
->ixa_fragsize
= fragsize
;
890 ixa
->ixa_fragsize
= ip_get_base_mtu(nce
->nce_ill
, ire
);
894 * We use use ire_nexthop_ill (and not ncec_ill) to avoid the under ipmp
895 * interface for source address selection.
897 ill
= ire_nexthop_ill(ire
);
899 if (ixaflags
& IXAF_SET_SOURCE
) {
903 * We use the final destination to get
904 * correct selection for source routed packets
907 /* If unreachable we have no ill but need some source */
909 src
= htonl(INADDR_LOOPBACK
);
912 error
= ip_select_source_v4(ill
, setsrc
, dst
,
913 ixa
->ixa_multicast_ifaddr
, ixa
->ixa_zoneid
, ipst
,
917 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsHCOutRequests
);
918 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutDiscards
);
919 ip_drop_output("ipIfStatsOutDiscards - no source",
924 ipha
->ipha_src
= src
;
925 } else if (ixaflags
& IXAF_VERIFY_SOURCE
) {
926 /* Check if the IP source is assigned to the host. */
927 if (!ip_verify_src(mp
, ixa
, NULL
)) {
928 /* Don't send a packet with a source that isn't ours */
929 BUMP_MIB(&ipst
->ips_ip_mib
, ipIfStatsHCOutRequests
);
930 BUMP_MIB(&ipst
->ips_ip_mib
, ipIfStatsOutDiscards
);
931 ip_drop_output("ipIfStatsOutDiscards - invalid source",
934 error
= EADDRNOTAVAIL
;
941 * Check against global IPsec policy to set the AH/ESP attributes.
942 * IPsec will set IXAF_IPSEC_* and ixa_ipsec_* as appropriate.
944 if (!(ixaflags
& (IXAF_NO_IPSEC
|IXAF_IPSEC_SECURE
))) {
945 ASSERT(ixa
->ixa_ipsec_policy
== NULL
);
946 mp
= ip_output_attach_policy(mp
, ipha
, NULL
, NULL
, ixa
);
948 /* MIB and ip_drop_packet already done */
949 return (EHOSTUNREACH
); /* IPsec policy failure */
954 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsHCOutRequests
);
956 BUMP_MIB(&ipst
->ips_ip_mib
, ipIfStatsHCOutRequests
);
960 * We update the statistics on the most specific IRE i.e., the first
962 * We don't have an IRE when we fragment, hence ire_ob_pkt_count
963 * can only count the use prior to fragmentation. However the MIB
964 * counters on the ill will be incremented in post fragmentation.
966 ire
->ire_ob_pkt_count
++;
969 * Based on ire_type and ire_flags call one of:
970 * ire_send_local_v4 - for IRE_LOCAL and IRE_LOOPBACK
971 * ire_send_noroute_v4 - if RTF_REJECT or RTF_BLACHOLE
972 * ire_send_multicast_v4 - for IRE_MULTICAST
973 * ire_send_broadcast_v4 - for IRE_BROADCAST
974 * ire_send_wire_v4 - for the rest.
976 error
= (ire
->ire_sendfn
)(ire
, mp
, ipha
, ixa
, &dce
->dce_ident
);
983 if (ixa
->ixa_nce
!= NULL
)
984 nce_refrele(ixa
->ixa_nce
);
990 * ire_sendfn() functions.
991 * These functions use the following xmit_attr:
992 * - ixa_fragsize - read to determine whether or not to fragment
993 * - IXAF_IPSEC_SECURE - to determine whether or not to invoke IPsec
994 * - ixa_ipsec_* are used inside IPsec
995 * - IXAF_SET_SOURCE - replace IP source in broadcast case.
996 * - IXAF_LOOPBACK_COPY - for multicast and broadcast
1001 * ire_sendfn for IRE_LOCAL and IRE_LOOPBACK
1003 * The checks for restrict_interzone_loopback are done in ire_route_recursive.
1007 ire_send_local_v4(ire_t
*ire
, mblk_t
*mp
, void *iph_arg
,
1008 ip_xmit_attr_t
*ixa
, uint32_t *identp
)
1010 ipha_t
*ipha
= (ipha_t
*)iph_arg
;
1011 ip_stack_t
*ipst
= ixa
->ixa_ipst
;
1012 ill_t
*ill
= ire
->ire_ill
;
1013 ip_recv_attr_t iras
; /* NOTE: No bzero for performance */
1014 uint_t pktlen
= ixa
->ixa_pktlen
;
1017 * No fragmentation, no nce, no application of IPsec,
1018 * and no ipha_ident assignment.
1020 * Note different order between IP provider and FW_HOOKS than in
1025 * DTrace this as ip:::send. A packet blocked by FW_HOOKS will fire the
1026 * send probe, but not the receive probe.
1028 DTRACE_IP7(send
, mblk_t
*, mp
, conn_t
*, NULL
, void_ip_t
*,
1029 ipha
, __dtrace_ipsr_ill_t
*, ill
, ipha_t
*, ipha
, ip6_t
*, NULL
,
1032 if (HOOKS4_INTERESTED_LOOPBACK_OUT(ipst
)) {
1035 DTRACE_PROBE4(ip4__loopback__out__start
, ill_t
*, NULL
,
1036 ill_t
*, ill
, ipha_t
*, ipha
, mblk_t
*, mp
);
1037 FW_HOOKS(ipst
->ips_ip4_loopback_out_event
,
1038 ipst
->ips_ipv4firewall_loopback_out
,
1039 NULL
, ill
, ipha
, mp
, mp
, 0, ipst
, error
);
1040 DTRACE_PROBE1(ip4__loopback__out__end
, mblk_t
*, mp
);
1045 * Even if the destination was changed by the filter we use the
1046 * forwarding decision that was made based on the address
1047 * in ip_output/ip_set_destination.
1049 /* Length could be different */
1050 ipha
= (ipha_t
*)mp
->b_rptr
;
1051 pktlen
= ntohs(ipha
->ipha_length
);
1055 * If a callback is enabled then we need to know the
1056 * source and destination zoneids for the packet. We already
1059 if (ipst
->ips_ip4_observe
.he_interested
) {
1060 zoneid_t szone
, dzone
;
1061 zoneid_t stackzoneid
;
1063 stackzoneid
= netstackid_to_zoneid(
1064 ipst
->ips_netstack
->netstack_stackid
);
1066 if (stackzoneid
== GLOBAL_ZONEID
) {
1067 /* Shared-IP zone */
1068 dzone
= ire
->ire_zoneid
;
1069 szone
= ixa
->ixa_zoneid
;
1071 szone
= dzone
= stackzoneid
;
1073 ipobs_hook(mp
, IPOBS_HOOK_LOCAL
, szone
, dzone
, ill
, ipst
);
1076 /* Handle lo0 stats */
1077 ipst
->ips_loopback_packets
++;
1079 /* Map ixa to ira including IPsec policies */
1080 ipsec_out_to_in(ixa
, ill
, &iras
);
1081 iras
.ira_pktlen
= pktlen
;
1083 if (!IS_SIMPLE_IPH(ipha
)) {
1084 ip_output_local_options(ipha
, ipst
);
1085 iras
.ira_flags
|= IRAF_IPV4_OPTIONS
;
1088 if (HOOKS4_INTERESTED_LOOPBACK_IN(ipst
)) {
1091 DTRACE_PROBE4(ip4__loopback__in__start
, ill_t
*, ill
,
1092 ill_t
*, NULL
, ipha_t
*, ipha
, mblk_t
*, mp
);
1093 FW_HOOKS(ipst
->ips_ip4_loopback_in_event
,
1094 ipst
->ips_ipv4firewall_loopback_in
,
1095 ill
, NULL
, ipha
, mp
, mp
, 0, ipst
, error
);
1097 DTRACE_PROBE1(ip4__loopback__in__end
, mblk_t
*, mp
);
1099 ira_cleanup(&iras
, B_FALSE
);
1103 * Even if the destination was changed by the filter we use the
1104 * forwarding decision that was made based on the address
1105 * in ip_output/ip_set_destination.
1107 /* Length could be different */
1108 ipha
= (ipha_t
*)mp
->b_rptr
;
1109 pktlen
= iras
.ira_pktlen
= ntohs(ipha
->ipha_length
);
1112 DTRACE_IP7(receive
, mblk_t
*, mp
, conn_t
*, NULL
, void_ip_t
*,
1113 ipha
, __dtrace_ipsr_ill_t
*, ill
, ipha_t
*, ipha
, ip6_t
*, NULL
,
1116 ire
->ire_ib_pkt_count
++;
1117 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsHCInReceives
);
1118 UPDATE_MIB(ill
->ill_ip_mib
, ipIfStatsHCInOctets
, pktlen
);
1120 /* Destined to ire_zoneid - use that for fanout */
1121 iras
.ira_zoneid
= ire
->ire_zoneid
;
1123 ip_fanout_v4(mp
, ipha
, &iras
);
1125 /* We moved any IPsec refs from ixa to iras */
1126 ira_cleanup(&iras
, B_FALSE
);
1131 * ire_sendfn for IRE_BROADCAST
1132 * If the broadcast address is present on multiple ills and ixa_ifindex
1133 * isn't set, then we generate
1134 * a separate datagram (potentially with different source address) for
1135 * those ills. In any case, only one copy is looped back to ip_input_v4.
1138 ire_send_broadcast_v4(ire_t
*ire
, mblk_t
*mp
, void *iph_arg
,
1139 ip_xmit_attr_t
*ixa
, uint32_t *identp
)
1141 ipha_t
*ipha
= (ipha_t
*)iph_arg
;
1142 ip_stack_t
*ipst
= ixa
->ixa_ipst
;
1143 irb_t
*irb
= ire
->ire_bucket
;
1147 iaflags_t ixaflags
= ixa
->ixa_flags
;
1148 nce_t
*nce1
, *nce_orig
;
1151 * Unless someone already set a ttl, force the ttl to a smallish
1154 if (!(ixa
->ixa_flags
& IXAF_NO_TTL_CHANGE
)) {
1156 * To avoid broadcast storms, we usually set the TTL to 1 for
1157 * broadcasts. This can
1158 * be overridden stack-wide through the ip_broadcast_ttl
1159 * ndd tunable, or on a per-connection basis through the
1160 * IP_BROADCAST_TTL socket option.
1162 * If SO_DONTROUTE/IXAF_DONTROUTE is set, then ire_send_wire_v4
1163 * will force ttl to one after we've set this.
1165 if (ixaflags
& IXAF_BROADCAST_TTL_SET
)
1166 ipha
->ipha_ttl
= ixa
->ixa_broadcast_ttl
;
1168 ipha
->ipha_ttl
= ipst
->ips_ip_broadcast_ttl
;
1171 * Make sure we get a loopback copy (after IPsec and frag)
1172 * Skip hardware checksum so that loopback copy is checksumed.
1174 ixa
->ixa_flags
|= IXAF_LOOPBACK_COPY
| IXAF_NO_HW_CKSUM
;
1176 /* Do we need to potentially generate multiple copies? */
1177 if (irb
->irb_ire_cnt
== 1 || ixa
->ixa_ifindex
!= 0)
1178 return (ire_send_wire_v4(ire
, mp
, ipha
, ixa
, identp
));
1181 * Loop over all IRE_BROADCAST in the bucket (might only be one).
1182 * Note that everything in the bucket has the same destination address.
1185 for (ire1
= irb
->irb_ire
; ire1
!= NULL
; ire1
= ire1
->ire_next
) {
1186 /* We do the main IRE after the end of the loop */
1191 * Only IREs for the same IP address should be in the same
1194 ASSERT(ire1
->ire_addr
== ire
->ire_addr
);
1195 if (!(ire1
->ire_type
& IRE_BROADCAST
))
1198 if (IRE_IS_CONDEMNED(ire1
))
1201 if (ixa
->ixa_zoneid
!= ALL_ZONES
&&
1202 ire
->ire_zoneid
!= ire1
->ire_zoneid
)
1205 ASSERT(ire
->ire_ill
!= ire1
->ire_ill
&& ire1
->ire_ill
!= NULL
);
1208 * For IPMP we only send for the ipmp_ill. arp_nce_init() will
1209 * ensure that this goes out on the cast_ill.
1211 if (IS_UNDER_IPMP(ire1
->ire_ill
))
1216 BUMP_MIB(ire1
->ire_ill
->ill_ip_mib
,
1217 ipIfStatsOutDiscards
);
1218 ip_drop_output("ipIfStatsOutDiscards",
1223 ipha1
= (ipha_t
*)mp1
->b_rptr
;
1224 if (ixa
->ixa_flags
& IXAF_SET_SOURCE
) {
1226 * Need to pick a different source address for each
1227 * interface. If we have a global IPsec policy and
1228 * no per-socket policy then we punt to
1229 * ip_output_simple_v4 using a separate ip_xmit_attr_t.
1231 if (ixaflags
& IXAF_IPSEC_GLOBAL_POLICY
) {
1232 ip_output_simple_broadcast(ixa
, mp1
);
1235 /* Pick a new source address for each interface */
1236 if (ip_select_source_v4(ire1
->ire_ill
, INADDR_ANY
,
1237 ipha1
->ipha_dst
, INADDR_ANY
, ixa
->ixa_zoneid
, ipst
,
1238 &ipha1
->ipha_src
, NULL
, NULL
) != 0) {
1239 BUMP_MIB(ire1
->ire_ill
->ill_ip_mib
,
1240 ipIfStatsOutDiscards
);
1241 ip_drop_output("ipIfStatsOutDiscards - select "
1242 "broadcast source", mp1
, ire1
->ire_ill
);
1247 * Check against global IPsec policy to set the AH/ESP
1248 * attributes. IPsec will set IXAF_IPSEC_* and
1249 * ixa_ipsec_* as appropriate.
1251 if (!(ixaflags
& (IXAF_NO_IPSEC
|IXAF_IPSEC_SECURE
))) {
1252 ASSERT(ixa
->ixa_ipsec_policy
== NULL
);
1253 mp1
= ip_output_attach_policy(mp1
, ipha
, NULL
,
1257 * MIB and ip_drop_packet already
1264 /* Make sure we have an NCE on this ill */
1265 nce1
= arp_nce_init(ire1
->ire_ill
, ire1
->ire_addr
,
1268 BUMP_MIB(ire1
->ire_ill
->ill_ip_mib
,
1269 ipIfStatsOutDiscards
);
1270 ip_drop_output("ipIfStatsOutDiscards - broadcast nce",
1271 mp1
, ire1
->ire_ill
);
1275 nce_orig
= ixa
->ixa_nce
;
1276 ixa
->ixa_nce
= nce1
;
1280 * Ignore any errors here. We just collect the errno for
1281 * the main ire below
1283 (void) ire_send_wire_v4(ire1
, mp1
, ipha1
, ixa
, identp
);
1286 ixa
->ixa_nce
= nce_orig
;
1289 ixa
->ixa_flags
&= ~IXAF_LOOPBACK_COPY
;
1292 /* Finally, the main one */
1295 * For IPMP we only send broadcasts on the ipmp_ill.
1297 if (IS_UNDER_IPMP(ire
->ire_ill
)) {
1302 return (ire_send_wire_v4(ire
, mp
, ipha
, ixa
, identp
));
1306 * Send a packet using a different source address and different
1310 ip_output_simple_broadcast(ip_xmit_attr_t
*ixa
, mblk_t
*mp
)
1312 ip_xmit_attr_t ixas
;
1314 bzero(&ixas
, sizeof (ixas
));
1315 ixas
.ixa_flags
= IXAF_BASIC_SIMPLE_V4
;
1316 ixas
.ixa_zoneid
= ixa
->ixa_zoneid
;
1317 ixas
.ixa_ifindex
= 0;
1318 ixas
.ixa_ipst
= ixa
->ixa_ipst
;
1319 ixas
.ixa_cred
= ixa
->ixa_cred
;
1320 ixas
.ixa_cpid
= ixa
->ixa_cpid
;
1321 ixas
.ixa_multicast_ttl
= IP_DEFAULT_MULTICAST_TTL
;
1323 (void) ip_output_simple(mp
, &ixas
);
1328 * ire_sendfn for IRE_MULTICAST
1331 ire_send_multicast_v4(ire_t
*ire
, mblk_t
*mp
, void *iph_arg
,
1332 ip_xmit_attr_t
*ixa
, uint32_t *identp
)
1334 ipha_t
*ipha
= (ipha_t
*)iph_arg
;
1335 ip_stack_t
*ipst
= ixa
->ixa_ipst
;
1336 ill_t
*ill
= ire
->ire_ill
;
1337 iaflags_t ixaflags
= ixa
->ixa_flags
;
1340 * Check if anything in ip_input_v4 wants a copy of the transmitted
1341 * packet (after IPsec and fragmentation)
1343 * 1. Multicast routers always need a copy unless SO_DONTROUTE is set
1344 * RSVP and the rsvp daemon is an example of a
1345 * protocol and user level process that
1346 * handles it's own routing. Hence, it uses the
1347 * SO_DONTROUTE option to accomplish this.
1348 * 2. If the sender has set IP_MULTICAST_LOOP, then we just
1349 * check whether there are any receivers for the group on the ill
1350 * (ignoring the zoneid).
1351 * 3. If IP_MULTICAST_LOOP is not set, then we check if there are
1352 * any members in other shared-IP zones.
1353 * If such members exist, then we indicate that the sending zone
1354 * shouldn't get a loopback copy to preserve the IP_MULTICAST_LOOP
1357 * When we loopback we skip hardware checksum to make sure loopback
1358 * copy is checksumed.
1360 * Note that ire_ill is the upper in the case of IPMP.
1362 ixa
->ixa_flags
&= ~(IXAF_LOOPBACK_COPY
| IXAF_NO_HW_CKSUM
);
1363 if (ipst
->ips_ip_g_mrouter
&& ill
->ill_mrouter_cnt
> 0 &&
1364 !(ixaflags
& IXAF_DONTROUTE
)) {
1365 ixa
->ixa_flags
|= IXAF_LOOPBACK_COPY
| IXAF_NO_HW_CKSUM
;
1366 } else if (ixaflags
& IXAF_MULTICAST_LOOP
) {
1368 * If this zone or any other zone has members then loopback
1371 if (ill_hasmembers_v4(ill
, ipha
->ipha_dst
))
1372 ixa
->ixa_flags
|= IXAF_LOOPBACK_COPY
| IXAF_NO_HW_CKSUM
;
1373 } else if (ipst
->ips_netstack
->netstack_numzones
> 1) {
1375 * This zone should not have a copy. But there are some other
1376 * zones which might have members.
1378 if (ill_hasmembers_otherzones_v4(ill
, ipha
->ipha_dst
,
1380 ixa
->ixa_flags
|= IXAF_NO_LOOP_ZONEID_SET
;
1381 ixa
->ixa_no_loop_zoneid
= ixa
->ixa_zoneid
;
1382 ixa
->ixa_flags
|= IXAF_LOOPBACK_COPY
| IXAF_NO_HW_CKSUM
;
1387 * Unless icmp_output_hdrincl already set a ttl, force the ttl to
1388 * the IP_MULTICAST_TTL value
1390 if (!(ixaflags
& IXAF_NO_TTL_CHANGE
)) {
1391 ipha
->ipha_ttl
= ixa
->ixa_multicast_ttl
;
1394 return (ire_send_wire_v4(ire
, mp
, ipha
, ixa
, identp
));
1398 * ire_sendfn for IREs with RTF_REJECT/RTF_BLACKHOLE, including IRE_NOROUTE
1401 ire_send_noroute_v4(ire_t
*ire
, mblk_t
*mp
, void *iph_arg
,
1402 ip_xmit_attr_t
*ixa
, uint32_t *identp
)
1404 ip_stack_t
*ipst
= ixa
->ixa_ipst
;
1405 ipha_t
*ipha
= (ipha_t
*)iph_arg
;
1407 ip_recv_attr_t iras
;
1410 /* We assign an IP ident for nice errors */
1411 ipha
->ipha_ident
= atomic_inc_32_nv(identp
);
1413 BUMP_MIB(&ipst
->ips_ip_mib
, ipIfStatsOutNoRoutes
);
1415 if (ire
->ire_type
& IRE_NOROUTE
) {
1416 /* A lack of a route as opposed to RTF_REJECT|BLACKHOLE */
1417 ip_rts_change(RTM_MISS
, ipha
->ipha_dst
, 0, 0, 0, 0, 0, 0,
1421 if (ire
->ire_flags
& RTF_BLACKHOLE
) {
1422 ip_drop_output("ipIfStatsOutNoRoutes RTF_BLACKHOLE", mp
, NULL
);
1424 /* No error even for local senders - silent blackhole */
1427 ip_drop_output("ipIfStatsOutNoRoutes RTF_REJECT", mp
, NULL
);
1430 * We need an ill_t for the ip_recv_attr_t even though this packet
1431 * was never received and icmp_unreachable doesn't currently use
1434 ill
= ill_lookup_on_name("lo0", B_FALSE
,
1435 !(ixa
->ixa_flags
& IRAF_IS_IPV4
), &dummy
, ipst
);
1438 return (EHOSTUNREACH
);
1441 bzero(&iras
, sizeof (iras
));
1442 /* Map ixa to ira including IPsec policies */
1443 ipsec_out_to_in(ixa
, ill
, &iras
);
1445 if (ip_source_routed(ipha
, ipst
)) {
1446 icmp_unreachable(mp
, ICMP_SOURCE_ROUTE_FAILED
, &iras
);
1448 icmp_unreachable(mp
, ICMP_HOST_UNREACHABLE
, &iras
);
1450 /* We moved any IPsec refs from ixa to iras */
1451 ira_cleanup(&iras
, B_FALSE
);
1453 return (EHOSTUNREACH
);
1457 * Calculate a checksum ignoring any hardware capabilities
1459 * Returns B_FALSE if the packet was too short for the checksum. Caller
1460 * should free and do stats.
1463 ip_output_sw_cksum_v4(mblk_t
*mp
, ipha_t
*ipha
, ip_xmit_attr_t
*ixa
)
1465 ip_stack_t
*ipst
= ixa
->ixa_ipst
;
1466 uint_t pktlen
= ixa
->ixa_pktlen
;
1469 uint8_t protocol
= ixa
->ixa_protocol
;
1470 uint16_t ip_hdr_length
= ixa
->ixa_ip_hdr_length
;
1471 ipaddr_t dst
= ipha
->ipha_dst
;
1472 ipaddr_t src
= ipha
->ipha_src
;
1474 /* Just in case it contained garbage */
1475 DB_CKSUMFLAGS(mp
) &= ~HCK_FLAGS
;
1478 * Calculate ULP checksum
1480 if (protocol
== IPPROTO_TCP
) {
1481 cksump
= IPH_TCPH_CHECKSUMP(ipha
, ip_hdr_length
);
1482 cksum
= IP_TCP_CSUM_COMP
;
1483 } else if (protocol
== IPPROTO_UDP
) {
1484 cksump
= IPH_UDPH_CHECKSUMP(ipha
, ip_hdr_length
);
1485 cksum
= IP_UDP_CSUM_COMP
;
1486 } else if (protocol
== IPPROTO_SCTP
) {
1489 ASSERT(MBLKL(mp
) >= (ip_hdr_length
+ sizeof (*sctph
)));
1490 sctph
= (sctp_hdr_t
*)(mp
->b_rptr
+ ip_hdr_length
);
1492 * Zero out the checksum field to ensure proper
1493 * checksum calculation.
1495 sctph
->sh_chksum
= 0;
1497 if (!skip_sctp_cksum
)
1499 sctph
->sh_chksum
= sctp_cksum(mp
, ip_hdr_length
);
1505 /* ULP puts the checksum field is in the first mblk */
1506 ASSERT(((uchar_t
*)cksump
) + sizeof (uint16_t) <= mp
->b_wptr
);
1509 * We accumulate the pseudo header checksum in cksum.
1510 * This is pretty hairy code, so watch close. One
1511 * thing to keep in mind is that UDP and TCP have
1512 * stored their respective datagram lengths in their
1513 * checksum fields. This lines things up real nice.
1515 cksum
+= (dst
>> 16) + (dst
& 0xFFFF) + (src
>> 16) + (src
& 0xFFFF);
1517 cksum
= IP_CSUM(mp
, ip_hdr_length
, cksum
);
1519 * For UDP/IPv4 a zero means that the packets wasn't checksummed.
1522 if (protocol
== IPPROTO_UDP
&& cksum
== 0)
1527 IP_STAT(ipst
, ip_out_sw_cksum
);
1528 IP_STAT_UPDATE(ipst
, ip_out_sw_cksum_bytes
, pktlen
);
1531 /* Calculate IPv4 header checksum */
1532 ipha
->ipha_hdr_checksum
= 0;
1533 ipha
->ipha_hdr_checksum
= ip_csum_hdr(ipha
);
1538 * Calculate the ULP checksum - try to use hardware.
1540 * If the hardware supports IP header checksum offload; then clear the
1541 * contents of IP header checksum field as expected by NIC.
1542 * Do this only if we offloaded either full or partial sum.
1544 * Returns B_FALSE if the packet was too short for the checksum. Caller
1545 * should free and do stats.
1548 ip_output_cksum_v4(iaflags_t ixaflags
, mblk_t
*mp
, ipha_t
*ipha
,
1549 ip_xmit_attr_t
*ixa
, ill_t
*ill
)
1551 uint_t pktlen
= ixa
->ixa_pktlen
;
1555 uint8_t protocol
= ixa
->ixa_protocol
;
1556 uint16_t ip_hdr_length
= ixa
->ixa_ip_hdr_length
;
1558 if ((ixaflags
& IXAF_NO_HW_CKSUM
) || !ILL_HCKSUM_CAPABLE(ill
) ||
1560 return (ip_output_sw_cksum_v4(mp
, ipha
, ixa
));
1564 * Calculate ULP checksum. Note that we don't use cksump and cksum
1565 * if the ill has FULL support.
1567 if (protocol
== IPPROTO_TCP
) {
1568 cksump
= IPH_TCPH_CHECKSUMP(ipha
, ip_hdr_length
);
1569 cksum
= IP_TCP_CSUM_COMP
; /* Pseudo-header cksum */
1570 } else if (protocol
== IPPROTO_UDP
) {
1571 cksump
= IPH_UDPH_CHECKSUMP(ipha
, ip_hdr_length
);
1572 cksum
= IP_UDP_CSUM_COMP
; /* Pseudo-header cksum */
1573 } else if (protocol
== IPPROTO_SCTP
) {
1576 ASSERT(MBLKL(mp
) >= (ip_hdr_length
+ sizeof (*sctph
)));
1577 sctph
= (sctp_hdr_t
*)(mp
->b_rptr
+ ip_hdr_length
);
1579 * Zero out the checksum field to ensure proper
1580 * checksum calculation.
1582 sctph
->sh_chksum
= 0;
1584 if (!skip_sctp_cksum
)
1586 sctph
->sh_chksum
= sctp_cksum(mp
, ip_hdr_length
);
1590 /* Calculate IPv4 header checksum */
1591 ipha
->ipha_hdr_checksum
= 0;
1592 ipha
->ipha_hdr_checksum
= ip_csum_hdr(ipha
);
1596 /* ULP puts the checksum field is in the first mblk */
1597 ASSERT(((uchar_t
*)cksump
) + sizeof (uint16_t) <= mp
->b_wptr
);
1600 * Underlying interface supports hardware checksum offload for
1601 * the payload; leave the payload checksum for the hardware to
1602 * calculate. N.B: We only need to set up checksum info on the
1605 hck_flags
= ill
->ill_hcksum_capab
->ill_hcksum_txflags
;
1607 DB_CKSUMFLAGS(mp
) &= ~HCK_FLAGS
;
1608 if (hck_flags
& HCKSUM_INET_FULL_V4
) {
1610 * Hardware calculates pseudo-header, header and the
1611 * payload checksums, so clear the checksum field in
1612 * the protocol header.
1615 DB_CKSUMFLAGS(mp
) |= HCK_FULLCKSUM
;
1617 ipha
->ipha_hdr_checksum
= 0;
1618 if (hck_flags
& HCKSUM_IPHDRCKSUM
) {
1619 DB_CKSUMFLAGS(mp
) |= HCK_IPV4_HDRCKSUM
;
1621 ipha
->ipha_hdr_checksum
= ip_csum_hdr(ipha
);
1625 if ((hck_flags
) & HCKSUM_INET_PARTIAL
) {
1626 ipaddr_t dst
= ipha
->ipha_dst
;
1627 ipaddr_t src
= ipha
->ipha_src
;
1629 * Partial checksum offload has been enabled. Fill
1630 * the checksum field in the protocol header with the
1631 * pseudo-header checksum value.
1633 * We accumulate the pseudo header checksum in cksum.
1634 * This is pretty hairy code, so watch close. One
1635 * thing to keep in mind is that UDP and TCP have
1636 * stored their respective datagram lengths in their
1637 * checksum fields. This lines things up real nice.
1639 cksum
+= (dst
>> 16) + (dst
& 0xFFFF) +
1640 (src
>> 16) + (src
& 0xFFFF);
1642 cksum
= (cksum
& 0xFFFF) + (cksum
>> 16);
1643 *(cksump
) = (cksum
& 0xFFFF) + (cksum
>> 16);
1646 * Offsets are relative to beginning of IP header.
1648 DB_CKSUMSTART(mp
) = ip_hdr_length
;
1649 DB_CKSUMSTUFF(mp
) = (uint8_t *)cksump
- (uint8_t *)ipha
;
1650 DB_CKSUMEND(mp
) = pktlen
;
1651 DB_CKSUMFLAGS(mp
) |= HCK_PARTIALCKSUM
;
1653 ipha
->ipha_hdr_checksum
= 0;
1654 if (hck_flags
& HCKSUM_IPHDRCKSUM
) {
1655 DB_CKSUMFLAGS(mp
) |= HCK_IPV4_HDRCKSUM
;
1657 ipha
->ipha_hdr_checksum
= ip_csum_hdr(ipha
);
1661 /* Hardware capabilities include neither full nor partial IPv4 */
1662 return (ip_output_sw_cksum_v4(mp
, ipha
, ixa
));
1666 * ire_sendfn for offlink and onlink destinations.
1667 * Also called from the multicast and broadcast send functions.
1669 * Assumes that the caller has a hold on the ire.
1671 * This function doesn't care if the IRE just became condemned since that
1672 * can happen at any time.
1676 ire_send_wire_v4(ire_t
*ire
, mblk_t
*mp
, void *iph_arg
,
1677 ip_xmit_attr_t
*ixa
, uint32_t *identp
)
1679 ip_stack_t
*ipst
= ixa
->ixa_ipst
;
1680 ipha_t
*ipha
= (ipha_t
*)iph_arg
;
1681 iaflags_t ixaflags
= ixa
->ixa_flags
;
1684 ASSERT(ixa
->ixa_nce
!= NULL
);
1685 ill
= ixa
->ixa_nce
->nce_ill
;
1687 if (ixaflags
& IXAF_DONTROUTE
)
1691 * Assign an ident value for this packet. There could be other
1692 * threads targeting the same destination, so we have to arrange
1693 * for a atomic increment. Note that we use a 32-bit atomic add
1694 * because it has better performance than its 16-bit sibling.
1696 * Normally ixa_extra_ident is 0, but in the case of LSO it will
1697 * be the number of TCP segments that the driver/hardware will
1698 * extraly construct.
1700 ipha
->ipha_ident
= atomic_add_32_nv(identp
,
1701 ixa
->ixa_extra_ident
+ 1);
1703 ipha
->ipha_ident
= htons(ipha
->ipha_ident
);
1707 * This might set b_band, thus the IPsec and fragmentation
1708 * code in IP ensures that b_band is updated in the first mblk.
1710 if (IPP_ENABLED(IPP_LOCAL_OUT
, ipst
)) {
1711 /* ip_process translates an IS_UNDER_IPMP */
1712 mp
= ip_process(IPP_LOCAL_OUT
, mp
, ill
, ill
);
1714 /* ip_drop_packet and MIB done */
1715 return (0); /* Might just be delayed */
1720 * Verify any IPv4 options.
1722 * The presense of IP options also forces the network stack to
1723 * calculate the checksum in software. This is because:
1725 * Wrap around: certain partial-checksum NICs (eri, ce) limit
1726 * the size of "start offset" width to 6-bit. This effectively
1727 * sets the largest value of the offset to 64-bytes, starting
1728 * from the MAC header. When the cumulative MAC and IP headers
1729 * exceed such limit, the offset will wrap around. This causes
1730 * the checksum to be calculated at the wrong place.
1732 * IPv4 source routing: none of the full-checksum capable NICs
1733 * is capable of correctly handling the IPv4 source-routing
1734 * option for purposes of calculating the pseudo-header; the
1735 * actual destination is different from the destination in the
1736 * header which is that of the next-hop. (This case may not be
1737 * true for NICs which can parse IPv6 extension headers, but
1738 * we choose to simplify the implementation by not offloading
1739 * checksum when they are present.)
1741 if (!IS_SIMPLE_IPH(ipha
)) {
1742 ixaflags
= ixa
->ixa_flags
|= IXAF_NO_HW_CKSUM
;
1743 /* An IS_UNDER_IPMP ill is ok here */
1744 if (ip_output_options(mp
, ipha
, ixa
, ill
)) {
1745 /* Packet has been consumed and ICMP error sent */
1746 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutDiscards
);
1751 if (ixa
->ixa_pktlen
> ixa
->ixa_fragsize
||
1752 (ixaflags
& IXAF_IPSEC_SECURE
)) {
1755 pktlen
= ixa
->ixa_pktlen
;
1756 if (ixaflags
& IXAF_IPSEC_SECURE
)
1757 pktlen
+= ipsec_out_extra_length(ixa
);
1759 if (pktlen
> IP_MAXPACKET
)
1762 if (ixaflags
& IXAF_SET_ULP_CKSUM
) {
1764 * Compute ULP checksum and IP header checksum
1767 if (!ip_output_sw_cksum_v4(mp
, ipha
, ixa
)) {
1768 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutDiscards
);
1769 ip_drop_output("ipIfStatsOutDiscards", mp
, ill
);
1774 /* Calculate IPv4 header checksum */
1775 ipha
->ipha_hdr_checksum
= 0;
1776 ipha
->ipha_hdr_checksum
= ip_csum_hdr(ipha
);
1780 * If this packet would generate a icmp_frag_needed
1781 * message, we need to handle it before we do the IPsec
1782 * processing. Otherwise, we need to strip the IPsec
1783 * headers before we send up the message to the ULPs
1784 * which becomes messy and difficult.
1786 * We check using IXAF_DONTFRAG. The DF bit in the header
1787 * is not inspected - it will be copied to any generated
1790 if ((pktlen
> ixa
->ixa_fragsize
) &&
1791 (ixaflags
& IXAF_DONTFRAG
)) {
1792 /* Generate ICMP and return error */
1793 ip_recv_attr_t iras
;
1795 DTRACE_PROBE4(ip4__fragsize__fail
, uint_t
, pktlen
,
1796 uint_t
, ixa
->ixa_fragsize
, uint_t
, ixa
->ixa_pktlen
,
1797 uint_t
, ixa
->ixa_pmtu
);
1799 bzero(&iras
, sizeof (iras
));
1800 /* Map ixa to ira including IPsec policies */
1801 ipsec_out_to_in(ixa
, ill
, &iras
);
1803 ip_drop_output("ICMP_FRAG_NEEDED", mp
, ill
);
1804 icmp_frag_needed(mp
, ixa
->ixa_fragsize
, &iras
);
1805 /* We moved any IPsec refs from ixa to iras */
1806 ira_cleanup(&iras
, B_FALSE
);
1809 DTRACE_PROBE4(ip4__fragsize__ok
, uint_t
, pktlen
,
1810 uint_t
, ixa
->ixa_fragsize
, uint_t
, ixa
->ixa_pktlen
,
1811 uint_t
, ixa
->ixa_pmtu
);
1813 if (ixaflags
& IXAF_IPSEC_SECURE
) {
1815 * Pass in sufficient information so that
1816 * IPsec can determine whether to fragment, and
1817 * which function to call after fragmentation.
1819 return (ipsec_out_process(mp
, ixa
));
1821 return (ip_fragment_v4(mp
, ixa
->ixa_nce
, ixaflags
,
1822 ixa
->ixa_pktlen
, ixa
->ixa_fragsize
, ixa
->ixa_xmit_hint
,
1823 ixa
->ixa_zoneid
, ixa
->ixa_no_loop_zoneid
,
1824 ixa
->ixa_postfragfn
, &ixa
->ixa_cookie
));
1826 if (ixaflags
& IXAF_SET_ULP_CKSUM
) {
1827 /* Compute ULP checksum and IP header checksum */
1828 /* An IS_UNDER_IPMP ill is ok here */
1829 if (!ip_output_cksum_v4(ixaflags
, mp
, ipha
, ixa
, ill
)) {
1830 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutDiscards
);
1831 ip_drop_output("ipIfStatsOutDiscards", mp
, ill
);
1836 /* Calculate IPv4 header checksum */
1837 ipha
->ipha_hdr_checksum
= 0;
1838 ipha
->ipha_hdr_checksum
= ip_csum_hdr(ipha
);
1840 return ((ixa
->ixa_postfragfn
)(mp
, ixa
->ixa_nce
, ixaflags
,
1841 ixa
->ixa_pktlen
, ixa
->ixa_xmit_hint
, ixa
->ixa_zoneid
,
1842 ixa
->ixa_no_loop_zoneid
, &ixa
->ixa_cookie
));
1846 * Send mp into ip_input
1847 * Common for IPv4 and IPv6
1850 ip_postfrag_loopback(mblk_t
*mp
, nce_t
*nce
, iaflags_t ixaflags
,
1851 uint_t pkt_len
, zoneid_t nolzid
)
1854 ill_t
*ill
= nce
->nce_ill
;
1855 ip_recv_attr_t iras
; /* NOTE: No bzero for performance */
1858 ncec
= nce
->nce_common
;
1859 iras
.ira_flags
= IRAF_VERIFY_IP_CKSUM
| IRAF_VERIFY_ULP_CKSUM
|
1860 IRAF_LOOPBACK
| IRAF_L2SRC_LOOPBACK
;
1861 if (ncec
->ncec_flags
& NCE_F_BCAST
)
1862 iras
.ira_flags
|= IRAF_L2DST_BROADCAST
;
1863 else if (ncec
->ncec_flags
& NCE_F_MCAST
)
1864 iras
.ira_flags
|= IRAF_L2DST_MULTICAST
;
1866 iras
.ira_free_flags
= 0;
1867 iras
.ira_cred
= NULL
;
1868 iras
.ira_cpid
= NOPID
;
1869 iras
.ira_zoneid
= ALL_ZONES
;
1870 iras
.ira_pktlen
= pkt_len
;
1871 UPDATE_MIB(ill
->ill_ip_mib
, ipIfStatsHCInOctets
, iras
.ira_pktlen
);
1872 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsHCInReceives
);
1874 if (ixaflags
& IXAF_IS_IPV4
)
1875 iras
.ira_flags
|= IRAF_IS_IPV4
;
1877 iras
.ira_ill
= iras
.ira_rill
= ill
;
1878 iras
.ira_ruifindex
= ill
->ill_phyint
->phyint_ifindex
;
1879 iras
.ira_rifindex
= iras
.ira_ruifindex
;
1880 iras
.ira_mhip
= NULL
;
1882 iras
.ira_flags
|= ixaflags
& IAF_MASK
;
1883 iras
.ira_no_loop_zoneid
= nolzid
;
1885 /* Broadcast and multicast doesn't care about the squeue */
1886 iras
.ira_sqp
= NULL
;
1889 if (ixaflags
& IXAF_IS_IPV4
) {
1890 ipha_t
*ipha
= (ipha_t
*)mp
->b_rptr
;
1892 rtc
.rtc_ipaddr
= INADDR_ANY
;
1894 (*ill
->ill_inputfn
)(mp
, ipha
, &ipha
->ipha_dst
, &iras
, &rtc
);
1895 if (rtc
.rtc_ire
!= NULL
) {
1896 ASSERT(rtc
.rtc_ipaddr
!= INADDR_ANY
);
1897 ire_refrele(rtc
.rtc_ire
);
1900 ip6_t
*ip6h
= (ip6_t
*)mp
->b_rptr
;
1902 rtc
.rtc_ip6addr
= ipv6_all_zeros
;
1904 (*ill
->ill_inputfn
)(mp
, ip6h
, &ip6h
->ip6_dst
, &iras
, &rtc
);
1905 if (rtc
.rtc_ire
!= NULL
) {
1906 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&rtc
.rtc_ip6addr
));
1907 ire_refrele(rtc
.rtc_ire
);
1910 /* Any references to clean up? No hold on ira */
1911 if (iras
.ira_flags
& (IRAF_IPSEC_SECURE
))
1912 ira_cleanup(&iras
, B_FALSE
);
1916 * Post fragmentation function for IRE_MULTICAST and IRE_BROADCAST which
1917 * looks at the IXAF_LOOPBACK_COPY flag.
1918 * Common for IPv4 and IPv6.
1920 * If the loopback copy fails (due to no memory) but we send the packet out
1921 * on the wire we return no failure. Only in the case we supress the wire
1922 * sending do we take the loopback failure into account.
1924 * Note that we do not perform DTRACE_IP7 and FW_HOOKS for the looped back copy.
1925 * Those operations are performed on this packet in ip_xmit() and it would
1926 * be odd to do it twice for the same packet.
1929 ip_postfrag_loopcheck(mblk_t
*mp
, nce_t
*nce
, iaflags_t ixaflags
,
1930 uint_t pkt_len
, uint32_t xmit_hint
, zoneid_t szone
, zoneid_t nolzid
,
1931 uintptr_t *ixacookie
)
1933 ill_t
*ill
= nce
->nce_ill
;
1937 * Check for IXAF_LOOPBACK_COPY - send a copy to ip as if the driver
1938 * had looped it back
1940 if (ixaflags
& IXAF_LOOPBACK_COPY
) {
1945 /* Failed to deliver the loopback copy. */
1946 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsOutDiscards
);
1947 ip_drop_output("ipIfStatsOutDiscards", mp
, ill
);
1950 ip_postfrag_loopback(mp1
, nce
, ixaflags
, pkt_len
,
1956 * If TTL = 0 then only do the loopback to this host i.e. we are
1957 * done. We are also done if this was the
1958 * loopback interface since it is sufficient
1959 * to loopback one copy of a multicast packet.
1961 if (ixaflags
& IXAF_IS_IPV4
) {
1962 ipha_t
*ipha
= (ipha_t
*)mp
->b_rptr
;
1964 if (ipha
->ipha_ttl
== 0) {
1965 ip_drop_output("multicast ipha_ttl not sent to wire",
1971 ip6_t
*ip6h
= (ip6_t
*)mp
->b_rptr
;
1973 if (ip6h
->ip6_hops
== 0) {
1974 ip_drop_output("multicast ipha_ttl not sent to wire",
1980 if (nce
->nce_ill
->ill_wq
== NULL
) {
1981 /* Loopback interface */
1982 ip_drop_output("multicast on lo0 not sent to wire", mp
, ill
);
1987 return (ip_xmit(mp
, nce
, ixaflags
, pkt_len
, xmit_hint
, szone
, 0,
1992 * Verify local connectivity. This check is called by ULP fusion code.
1993 * The generation number on an IRE_LOCAL or IRE_LOOPBACK only changes if
1994 * the interface is brought down and back up. So we simply fail the local
1995 * process. The caller, TCP Fusion, should unfuse the connection.
1998 ip_output_verify_local(ip_xmit_attr_t
*ixa
)
2000 ire_t
*ire
= ixa
->ixa_ire
;
2002 if (!(ire
->ire_type
& (IRE_LOCAL
| IRE_LOOPBACK
)))
2005 return (ixa
->ixa_ire
->ire_generation
== ixa
->ixa_ire_generation
);
2009 * Local process for ULP loopback, TCP Fusion. Handle both IPv4 and IPv6.
2011 * The caller must call ip_output_verify_local() first. This function handles
2012 * IPobs, FW_HOOKS, and/or IPsec cases sequentially.
2015 ip_output_process_local(mblk_t
*mp
, ip_xmit_attr_t
*ixa
, boolean_t hooks_out
,
2016 boolean_t hooks_in
, conn_t
*peer_connp
)
2018 ill_t
*ill
= ixa
->ixa_ire
->ire_ill
;
2019 ipha_t
*ipha
= NULL
;
2021 ip_stack_t
*ipst
= ixa
->ixa_ipst
;
2022 iaflags_t ixaflags
= ixa
->ixa_flags
;
2023 ip_recv_attr_t iras
;
2028 if (ixaflags
& IXAF_IS_IPV4
) {
2029 ipha
= (ipha_t
*)mp
->b_rptr
;
2032 * If a callback is enabled then we need to know the
2033 * source and destination zoneids for the packet. We already
2036 if (ipst
->ips_ip4_observe
.he_interested
) {
2037 zoneid_t szone
, dzone
;
2038 zoneid_t stackzoneid
;
2040 stackzoneid
= netstackid_to_zoneid(
2041 ipst
->ips_netstack
->netstack_stackid
);
2043 if (stackzoneid
== GLOBAL_ZONEID
) {
2044 /* Shared-IP zone */
2045 dzone
= ixa
->ixa_ire
->ire_zoneid
;
2046 szone
= ixa
->ixa_zoneid
;
2048 szone
= dzone
= stackzoneid
;
2050 ipobs_hook(mp
, IPOBS_HOOK_LOCAL
, szone
, dzone
, ill
,
2053 DTRACE_IP7(send
, mblk_t
*, mp
, conn_t
*, NULL
, void_ip_t
*,
2054 ipha
, __dtrace_ipsr_ill_t
*, ill
, ipha_t
*, ipha
, ip6_t
*,
2057 /* FW_HOOKS: LOOPBACK_OUT */
2059 DTRACE_PROBE4(ip4__loopback__out__start
, ill_t
*, NULL
,
2060 ill_t
*, ill
, ipha_t
*, ipha
, mblk_t
*, mp
);
2061 FW_HOOKS(ipst
->ips_ip4_loopback_out_event
,
2062 ipst
->ips_ipv4firewall_loopback_out
,
2063 NULL
, ill
, ipha
, mp
, mp
, 0, ipst
, error
);
2064 DTRACE_PROBE1(ip4__loopback__out__end
, mblk_t
*, mp
);
2069 /* FW_HOOKS: LOOPBACK_IN */
2071 DTRACE_PROBE4(ip4__loopback__in__start
, ill_t
*, ill
,
2072 ill_t
*, NULL
, ipha_t
*, ipha
, mblk_t
*, mp
);
2073 FW_HOOKS(ipst
->ips_ip4_loopback_in_event
,
2074 ipst
->ips_ipv4firewall_loopback_in
,
2075 ill
, NULL
, ipha
, mp
, mp
, 0, ipst
, error
);
2076 DTRACE_PROBE1(ip4__loopback__in__end
, mblk_t
*, mp
);
2081 DTRACE_IP7(receive
, mblk_t
*, mp
, conn_t
*, NULL
, void_ip_t
*,
2082 ipha
, __dtrace_ipsr_ill_t
*, ill
, ipha_t
*, ipha
, ip6_t
*,
2085 /* Inbound IPsec polocies */
2086 if (peer_connp
!= NULL
) {
2087 /* Map ixa to ira including IPsec policies. */
2088 ipsec_out_to_in(ixa
, ill
, &iras
);
2089 mp
= ipsec_check_inbound_policy(mp
, peer_connp
, ipha
,
2093 ip6h
= (ip6_t
*)mp
->b_rptr
;
2096 * If a callback is enabled then we need to know the
2097 * source and destination zoneids for the packet. We already
2100 if (ipst
->ips_ip6_observe
.he_interested
) {
2101 zoneid_t szone
, dzone
;
2102 zoneid_t stackzoneid
;
2104 stackzoneid
= netstackid_to_zoneid(
2105 ipst
->ips_netstack
->netstack_stackid
);
2107 if (stackzoneid
== GLOBAL_ZONEID
) {
2108 /* Shared-IP zone */
2109 dzone
= ixa
->ixa_ire
->ire_zoneid
;
2110 szone
= ixa
->ixa_zoneid
;
2112 szone
= dzone
= stackzoneid
;
2114 ipobs_hook(mp
, IPOBS_HOOK_LOCAL
, szone
, dzone
, ill
,
2117 DTRACE_IP7(send
, mblk_t
*, mp
, conn_t
*, NULL
, void_ip_t
*,
2118 ip6h
, __dtrace_ipsr_ill_t
*, ill
, ipha_t
*, NULL
, ip6_t
*,
2121 /* FW_HOOKS: LOOPBACK_OUT */
2123 DTRACE_PROBE4(ip6__loopback__out__start
, ill_t
*, NULL
,
2124 ill_t
*, ill
, ip6_t
*, ip6h
, mblk_t
*, mp
);
2125 FW_HOOKS6(ipst
->ips_ip6_loopback_out_event
,
2126 ipst
->ips_ipv6firewall_loopback_out
,
2127 NULL
, ill
, ip6h
, mp
, mp
, 0, ipst
, error
);
2128 DTRACE_PROBE1(ip6__loopback__out__end
, mblk_t
*, mp
);
2133 /* FW_HOOKS: LOOPBACK_IN */
2135 DTRACE_PROBE4(ip6__loopback__in__start
, ill_t
*, ill
,
2136 ill_t
*, NULL
, ip6_t
*, ip6h
, mblk_t
*, mp
);
2137 FW_HOOKS6(ipst
->ips_ip6_loopback_in_event
,
2138 ipst
->ips_ipv6firewall_loopback_in
,
2139 ill
, NULL
, ip6h
, mp
, mp
, 0, ipst
, error
);
2140 DTRACE_PROBE1(ip6__loopback__in__end
, mblk_t
*, mp
);
2145 DTRACE_IP7(receive
, mblk_t
*, mp
, conn_t
*, NULL
, void_ip_t
*,
2146 ip6h
, __dtrace_ipsr_ill_t
*, ill
, ipha_t
*, NULL
, ip6_t
*,
2149 /* Inbound IPsec polocies */
2150 if (peer_connp
!= NULL
) {
2151 /* Map ixa to ira including IPsec policies. */
2152 ipsec_out_to_in(ixa
, ill
, &iras
);
2153 mp
= ipsec_check_inbound_policy(mp
, peer_connp
, NULL
,
2159 BUMP_MIB(ill
->ill_ip_mib
, ipIfStatsInDiscards
);
2160 ip_drop_input("ipIfStatsInDiscards", NULL
, ill
);