4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * iptun - IP Tunneling Driver
29 * This module is a GLDv3 driver that implements virtual datalinks over IP
30 * (a.k.a, IP tunneling). The datalinks are managed through a dld ioctl
31 * interface (see iptun_ctl.c), and registered with GLDv3 using
32 * mac_register(). It implements the logic for various forms of IP (IPv4 or
33 * IPv6) encapsulation within IP (IPv4 or IPv6) by interacting with the ip
34 * module below it. Each virtual IP tunnel datalink has a conn_t associated
35 * with it representing the "outer" IP connection.
37 * The module implements the following locking semantics:
39 * Lookups and deletions in iptun_hash are synchronized using iptun_hash_lock.
40 * See comments above iptun_hash_lock for details.
42 * No locks are ever held while calling up to GLDv3. The general architecture
43 * of GLDv3 requires this, as the mac perimeter (essentially a lock) for a
44 * given link will be held while making downcalls (iptun_m_*() callbacks).
45 * Because we need to hold locks while handling downcalls, holding these locks
46 * while issuing upcalls results in deadlock scenarios. See the block comment
47 * above iptun_task_cb() for details on how we safely issue upcalls without
50 * The contents of each iptun_t is protected by an iptun_mutex which is held
51 * in iptun_enter() (called by iptun_enter_by_linkid()), and exited in
54 * See comments in iptun_delete() and iptun_free() for details on how the
55 * iptun_t is deleted safely.
58 #include <sys/types.h>
60 #include <sys/errno.h>
61 #include <sys/modhash.h>
63 #include <sys/strsun.h>
65 #include <sys/systm.h>
66 #include <sys/tihdr.h>
67 #include <sys/param.h>
68 #include <sys/mac_provider.h>
69 #include <sys/mac_ipv4.h>
70 #include <sys/mac_ipv6.h>
71 #include <sys/mac_6to4.h>
72 #include <sys/tsol/tnet.h>
73 #include <sys/sunldi.h>
74 #include <netinet/in.h>
75 #include <netinet/ip6.h>
77 #include <inet/ip_ire.h>
78 #include <inet/ipsec_impl.h>
79 #include <sys/tsol/label.h>
80 #include <sys/tsol/tnet.h>
81 #include <inet/iptun.h>
82 #include "iptun_impl.h"
84 /* Do the tunnel type and address family match? */
85 #define IPTUN_ADDR_MATCH(iptun_type, family) \
86 ((iptun_type == IPTUN_TYPE_IPV4 && family == AF_INET) || \
87 (iptun_type == IPTUN_TYPE_IPV6 && family == AF_INET6) || \
88 (iptun_type == IPTUN_TYPE_6TO4 && family == AF_INET))
90 #define IPTUN_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key))
92 #define IPTUN_MIN_IPV4_MTU 576 /* ip.h still uses 68 (!) */
93 #define IPTUN_MIN_IPV6_MTU IPV6_MIN_MTU
94 #define IPTUN_MAX_IPV4_MTU (IP_MAXPACKET - sizeof (ipha_t))
95 #define IPTUN_MAX_IPV6_MTU (IP_MAXPACKET - sizeof (ip6_t) - \
96 sizeof (iptun_encaplim_t))
98 #define IPTUN_MIN_HOPLIMIT 1
99 #define IPTUN_MAX_HOPLIMIT UINT8_MAX
101 #define IPTUN_MIN_ENCAPLIMIT 0
102 #define IPTUN_MAX_ENCAPLIMIT UINT8_MAX
104 #define IPTUN_IPSEC_REQ_MASK (IPSEC_PREF_REQUIRED | IPSEC_PREF_NEVER)
106 static iptun_encaplim_t iptun_encaplim_init
= {
110 IPTUN_DEFAULT_ENCAPLIMIT
, /* filled in with actual value later */
117 * Table containing per-iptun-type information.
118 * Since IPv6 can run over all of these we have the IPv6 min as the min MTU.
120 static iptun_typeinfo_t iptun_type_table
[] = {
121 { IPTUN_TYPE_IPV4
, MAC_PLUGIN_IDENT_IPV4
, IPV4_VERSION
,
122 IPTUN_MIN_IPV6_MTU
, IPTUN_MAX_IPV4_MTU
, B_TRUE
},
123 { IPTUN_TYPE_IPV6
, MAC_PLUGIN_IDENT_IPV6
, IPV6_VERSION
,
124 IPTUN_MIN_IPV6_MTU
, IPTUN_MAX_IPV6_MTU
, B_TRUE
},
125 { IPTUN_TYPE_6TO4
, MAC_PLUGIN_IDENT_6TO4
, IPV4_VERSION
,
126 IPTUN_MIN_IPV6_MTU
, IPTUN_MAX_IPV4_MTU
, B_FALSE
},
127 { IPTUN_TYPE_UNKNOWN
, NULL
, 0, 0, 0, B_FALSE
}
131 * iptun_hash is an iptun_t lookup table by link ID protected by
132 * iptun_hash_lock. While the hash table's integrity is maintained via
133 * internal locking in the mod_hash_*() functions, we need additional locking
134 * so that an iptun_t cannot be deleted after a hash lookup has returned an
135 * iptun_t and before iptun_lock has been entered. As such, we use
136 * iptun_hash_lock when doing lookups and removals from iptun_hash.
138 mod_hash_t
*iptun_hash
;
139 static kmutex_t iptun_hash_lock
;
141 static uint_t iptun_tunnelcount
; /* total for all stacks */
142 kmem_cache_t
*iptun_cache
;
143 ddi_taskq_t
*iptun_taskq
;
146 IPTUN_TASK_MTU_UPDATE
, /* tell mac about new tunnel link MTU */
147 IPTUN_TASK_LADDR_UPDATE
, /* tell mac about new local address */
148 IPTUN_TASK_RADDR_UPDATE
, /* tell mac about new remote address */
149 IPTUN_TASK_LINK_UPDATE
, /* tell mac about new link state */
150 IPTUN_TASK_PDATA_UPDATE
/* tell mac about updated plugin data */
153 typedef struct iptun_task_data_s
{
154 iptun_task_t itd_task
;
155 datalink_id_t itd_linkid
;
158 static void iptun_task_dispatch(iptun_t
*, iptun_task_t
);
159 static int iptun_enter(iptun_t
*);
160 static void iptun_exit(iptun_t
*);
161 static void iptun_headergen(iptun_t
*, boolean_t
);
162 static void iptun_drop_pkt(mblk_t
*, uint64_t *);
163 static void iptun_input(void *, mblk_t
*, void *, ip_recv_attr_t
*);
164 static void iptun_input_icmp(void *, mblk_t
*, void *, ip_recv_attr_t
*);
165 static void iptun_output(iptun_t
*, mblk_t
*);
166 static uint32_t iptun_get_maxmtu(iptun_t
*, ip_xmit_attr_t
*, uint32_t);
167 static uint32_t iptun_update_mtu(iptun_t
*, ip_xmit_attr_t
*, uint32_t);
168 static uint32_t iptun_get_dst_pmtu(iptun_t
*, ip_xmit_attr_t
*);
169 static void iptun_update_dst_pmtu(iptun_t
*, ip_xmit_attr_t
*);
170 static int iptun_setladdr(iptun_t
*, const struct sockaddr_storage
*);
172 static void iptun_output_6to4(iptun_t
*, mblk_t
*);
173 static void iptun_output_common(iptun_t
*, ip_xmit_attr_t
*, mblk_t
*);
174 static boolean_t
iptun_verifyicmp(conn_t
*, void *, icmph_t
*, icmp6_t
*,
177 static void iptun_notify(void *, ip_xmit_attr_t
*, ixa_notify_type_t
,
180 static mac_callbacks_t iptun_m_callbacks
;
183 iptun_m_getstat(void *arg
, uint_t stat
, uint64_t *val
)
185 iptun_t
*iptun
= arg
;
189 case MAC_STAT_IERRORS
:
190 *val
= iptun
->iptun_ierrors
;
192 case MAC_STAT_OERRORS
:
193 *val
= iptun
->iptun_oerrors
;
195 case MAC_STAT_RBYTES
:
196 *val
= iptun
->iptun_rbytes
;
198 case MAC_STAT_IPACKETS
:
199 *val
= iptun
->iptun_ipackets
;
201 case MAC_STAT_OBYTES
:
202 *val
= iptun
->iptun_obytes
;
204 case MAC_STAT_OPACKETS
:
205 *val
= iptun
->iptun_opackets
;
207 case MAC_STAT_NORCVBUF
:
208 *val
= iptun
->iptun_norcvbuf
;
210 case MAC_STAT_NOXMTBUF
:
211 *val
= iptun
->iptun_noxmtbuf
;
221 iptun_m_start(void *arg
)
223 iptun_t
*iptun
= arg
;
226 if ((err
= iptun_enter(iptun
)) == 0) {
227 iptun
->iptun_flags
|= IPTUN_MAC_STARTED
;
228 iptun_task_dispatch(iptun
, IPTUN_TASK_LINK_UPDATE
);
235 iptun_m_stop(void *arg
)
237 iptun_t
*iptun
= arg
;
239 if (iptun_enter(iptun
) == 0) {
240 iptun
->iptun_flags
&= ~IPTUN_MAC_STARTED
;
241 iptun_task_dispatch(iptun
, IPTUN_TASK_LINK_UPDATE
);
247 * iptun_m_setpromisc() does nothing and always succeeds. This is because a
248 * tunnel data-link only ever receives packets that are destined exclusively
249 * for the local address of the tunnel.
253 iptun_m_setpromisc(void *arg
, boolean_t on
)
260 iptun_m_multicst(void *arg
, boolean_t add
, const uint8_t *addrp
)
266 * iptun_m_unicst() sets the local address.
270 iptun_m_unicst(void *arg
, const uint8_t *addrp
)
272 iptun_t
*iptun
= arg
;
274 struct sockaddr_storage ss
;
275 struct sockaddr_in
*sin
;
276 struct sockaddr_in6
*sin6
;
278 if ((err
= iptun_enter(iptun
)) == 0) {
279 switch (iptun
->iptun_typeinfo
->iti_ipvers
) {
281 sin
= (struct sockaddr_in
*)&ss
;
282 sin
->sin_family
= AF_INET
;
283 bcopy(addrp
, &sin
->sin_addr
, sizeof (in_addr_t
));
286 sin6
= (struct sockaddr_in6
*)&ss
;
287 sin6
->sin6_family
= AF_INET6
;
288 bcopy(addrp
, &sin6
->sin6_addr
, sizeof (in6_addr_t
));
293 err
= iptun_setladdr(iptun
, &ss
);
300 iptun_m_tx(void *arg
, mblk_t
*mpchain
)
303 iptun_t
*iptun
= arg
;
305 if (!IS_IPTUN_RUNNING(iptun
)) {
306 iptun_drop_pkt(mpchain
, &iptun
->iptun_noxmtbuf
);
310 for (mp
= mpchain
; mp
!= NULL
; mp
= nmp
) {
313 iptun_output(iptun
, mp
);
321 iptun_m_setprop(void *barg
, const char *pr_name
, mac_prop_id_t pr_num
,
322 uint_t pr_valsize
, const void *pr_val
)
324 iptun_t
*iptun
= barg
;
325 uint32_t value
= *(uint32_t *)pr_val
;
329 * We need to enter this iptun_t since we'll be modifying the outer
332 if ((err
= iptun_enter(iptun
)) != 0)
336 case MAC_PROP_IPTUN_HOPLIMIT
:
337 if (value
< IPTUN_MIN_HOPLIMIT
|| value
> IPTUN_MAX_HOPLIMIT
) {
341 if (value
!= iptun
->iptun_hoplimit
) {
342 iptun
->iptun_hoplimit
= (uint8_t)value
;
343 iptun_headergen(iptun
, B_TRUE
);
346 case MAC_PROP_IPTUN_ENCAPLIMIT
:
347 if (iptun
->iptun_typeinfo
->iti_type
!= IPTUN_TYPE_IPV6
||
348 value
> IPTUN_MAX_ENCAPLIMIT
) {
352 if (value
!= iptun
->iptun_encaplimit
) {
353 iptun
->iptun_encaplimit
= (uint8_t)value
;
354 iptun_headergen(iptun
, B_TRUE
);
358 uint32_t maxmtu
= iptun_get_maxmtu(iptun
, NULL
, 0);
360 if (value
< iptun
->iptun_typeinfo
->iti_minmtu
||
365 iptun
->iptun_flags
|= IPTUN_FIXED_MTU
;
366 if (value
!= iptun
->iptun_mtu
) {
367 iptun
->iptun_mtu
= value
;
368 iptun_task_dispatch(iptun
, IPTUN_TASK_MTU_UPDATE
);
381 iptun_m_getprop(void *barg
, const char *pr_name
, mac_prop_id_t pr_num
,
382 uint_t pr_flags
, uint_t pr_valsize
, void *pr_val
, uint_t
*perm
)
384 iptun_t
*iptun
= barg
;
385 mac_propval_range_t range
;
386 boolean_t is_default
= (pr_flags
& MAC_PROP_DEFAULT
);
387 boolean_t is_possible
= (pr_flags
& MAC_PROP_POSSIBLE
);
390 if ((err
= iptun_enter(iptun
)) != 0)
393 if ((pr_flags
& ~(MAC_PROP_DEFAULT
| MAC_PROP_POSSIBLE
)) != 0) {
397 if (is_default
&& is_possible
) {
402 *perm
= MAC_PROP_PERM_RW
;
405 if (pr_valsize
< sizeof (mac_propval_range_t
)) {
410 range
.mpr_type
= MAC_PROPVAL_UINT32
;
411 } else if (pr_valsize
< sizeof (uint32_t)) {
417 case MAC_PROP_IPTUN_HOPLIMIT
:
419 range
.range_uint32
[0].mpur_min
= IPTUN_MIN_HOPLIMIT
;
420 range
.range_uint32
[0].mpur_max
= IPTUN_MAX_HOPLIMIT
;
421 } else if (is_default
) {
422 *(uint32_t *)pr_val
= IPTUN_DEFAULT_HOPLIMIT
;
424 *(uint32_t *)pr_val
= iptun
->iptun_hoplimit
;
427 case MAC_PROP_IPTUN_ENCAPLIMIT
:
428 if (iptun
->iptun_typeinfo
->iti_type
!= IPTUN_TYPE_IPV6
) {
433 range
.range_uint32
[0].mpur_min
= IPTUN_MIN_ENCAPLIMIT
;
434 range
.range_uint32
[0].mpur_max
= IPTUN_MAX_ENCAPLIMIT
;
435 } else if (is_default
) {
436 *(uint32_t *)pr_val
= IPTUN_DEFAULT_ENCAPLIMIT
;
438 *(uint32_t *)pr_val
= iptun
->iptun_encaplimit
;
442 uint32_t maxmtu
= iptun_get_maxmtu(iptun
, NULL
, 0);
445 range
.range_uint32
[0].mpur_min
=
446 iptun
->iptun_typeinfo
->iti_minmtu
;
447 range
.range_uint32
[0].mpur_max
= maxmtu
;
450 * The MAC module knows the current value and should
451 * never call us for it. There is also no default
452 * MTU, as by default, it is a dynamic property.
464 bcopy(&range
, pr_val
, sizeof (range
));
473 return (iptun_tunnelcount
);
477 * Enter an iptun_t exclusively. This is essentially just a mutex, but we
478 * don't allow iptun_enter() to succeed on a tunnel if it's in the process of
482 iptun_enter(iptun_t
*iptun
)
484 mutex_enter(&iptun
->iptun_lock
);
485 while (iptun
->iptun_flags
& IPTUN_DELETE_PENDING
)
486 cv_wait(&iptun
->iptun_enter_cv
, &iptun
->iptun_lock
);
487 if (iptun
->iptun_flags
& IPTUN_CONDEMNED
) {
488 mutex_exit(&iptun
->iptun_lock
);
495 * Exit the tunnel entered in iptun_enter().
498 iptun_exit(iptun_t
*iptun
)
500 mutex_exit(&iptun
->iptun_lock
);
504 * Enter the IP tunnel instance by datalink ID.
507 iptun_enter_by_linkid(datalink_id_t linkid
, iptun_t
**iptun
)
511 mutex_enter(&iptun_hash_lock
);
512 if (mod_hash_find(iptun_hash
, IPTUN_HASH_KEY(linkid
),
513 (mod_hash_val_t
*)iptun
) == 0)
514 err
= iptun_enter(*iptun
);
519 mutex_exit(&iptun_hash_lock
);
524 * Handle tasks that were deferred through the iptun_taskq because they require
525 * calling up to the mac module, and we can't call up to the mac module while
528 * This is tricky to get right without introducing race conditions and
529 * deadlocks with the mac module, as we cannot issue an upcall while in the
530 * iptun_t. The reason is that upcalls may try and enter the mac perimeter,
531 * while iptun callbacks (such as iptun_m_setprop()) called from the mac
532 * module will already have the perimeter held, and will then try and enter
533 * the iptun_t. You can see the lock ordering problem with this; this will
536 * The safe way to do this is to enter the iptun_t in question and copy the
537 * information we need out of it so that we can exit it and know that the
538 * information being passed up to the upcalls won't be subject to modification
539 * by other threads. The problem now is that we need to exit it prior to
540 * issuing the upcall, but once we do this, a thread could come along and
541 * delete the iptun_t and thus the mac handle required to issue the upcall.
542 * To prevent this, we set the IPTUN_UPCALL_PENDING flag prior to exiting the
543 * iptun_t. This flag is the condition associated with iptun_upcall_cv, which
544 * iptun_delete() will cv_wait() on. When the upcall completes, we clear
545 * IPTUN_UPCALL_PENDING and cv_signal() any potentially waiting
546 * iptun_delete(). We can thus still safely use iptun->iptun_mh after having
547 * exited the iptun_t.
550 iptun_task_cb(void *arg
)
552 iptun_task_data_t
*itd
= arg
;
553 iptun_task_t task
= itd
->itd_task
;
554 datalink_id_t linkid
= itd
->itd_linkid
;
558 link_state_t linkstate
;
560 iptun_header_t header
;
562 kmem_free(itd
, sizeof (*itd
));
565 * Note that if the lookup fails, it's because the tunnel was deleted
566 * between the time the task was dispatched and now. That isn't an
569 if (iptun_enter_by_linkid(linkid
, &iptun
) != 0)
572 iptun
->iptun_flags
|= IPTUN_UPCALL_PENDING
;
575 case IPTUN_TASK_MTU_UPDATE
:
576 mtu
= iptun
->iptun_mtu
;
578 case IPTUN_TASK_LADDR_UPDATE
:
579 addr
= iptun
->iptun_laddr
;
581 case IPTUN_TASK_RADDR_UPDATE
:
582 addr
= iptun
->iptun_raddr
;
584 case IPTUN_TASK_LINK_UPDATE
:
585 linkstate
= IS_IPTUN_RUNNING(iptun
) ?
586 LINK_STATE_UP
: LINK_STATE_DOWN
;
588 case IPTUN_TASK_PDATA_UPDATE
:
589 header_size
= iptun
->iptun_header_size
;
590 header
= iptun
->iptun_header
;
599 case IPTUN_TASK_MTU_UPDATE
:
600 (void) mac_maxsdu_update(iptun
->iptun_mh
, mtu
);
602 case IPTUN_TASK_LADDR_UPDATE
:
603 mac_unicst_update(iptun
->iptun_mh
, (uint8_t *)&addr
.ia_addr
);
605 case IPTUN_TASK_RADDR_UPDATE
:
606 mac_dst_update(iptun
->iptun_mh
, (uint8_t *)&addr
.ia_addr
);
608 case IPTUN_TASK_LINK_UPDATE
:
609 mac_link_update(iptun
->iptun_mh
, linkstate
);
611 case IPTUN_TASK_PDATA_UPDATE
:
612 if (mac_pdata_update(iptun
->iptun_mh
,
613 header_size
== 0 ? NULL
: &header
, header_size
) != 0)
614 atomic_inc_64(&iptun
->iptun_taskq_fail
);
618 mutex_enter(&iptun
->iptun_lock
);
619 iptun
->iptun_flags
&= ~IPTUN_UPCALL_PENDING
;
620 cv_signal(&iptun
->iptun_upcall_cv
);
621 mutex_exit(&iptun
->iptun_lock
);
625 iptun_task_dispatch(iptun_t
*iptun
, iptun_task_t iptun_task
)
627 iptun_task_data_t
*itd
;
629 itd
= kmem_alloc(sizeof (*itd
), KM_NOSLEEP
);
631 atomic_inc_64(&iptun
->iptun_taskq_fail
);
634 itd
->itd_task
= iptun_task
;
635 itd
->itd_linkid
= iptun
->iptun_linkid
;
636 if (ddi_taskq_dispatch(iptun_taskq
, iptun_task_cb
, itd
, DDI_NOSLEEP
)) {
637 atomic_inc_64(&iptun
->iptun_taskq_fail
);
638 kmem_free(itd
, sizeof (*itd
));
643 * Convert an iptun_addr_t to sockaddr_storage.
646 iptun_getaddr(iptun_addr_t
*iptun_addr
, struct sockaddr_storage
*ss
)
648 struct sockaddr_in
*sin
;
649 struct sockaddr_in6
*sin6
;
651 bzero(ss
, sizeof (*ss
));
652 switch (iptun_addr
->ia_family
) {
654 sin
= (struct sockaddr_in
*)ss
;
655 sin
->sin_addr
.s_addr
= iptun_addr
->ia_addr
.iau_addr4
;
658 sin6
= (struct sockaddr_in6
*)ss
;
659 sin6
->sin6_addr
= iptun_addr
->ia_addr
.iau_addr6
;
664 ss
->ss_family
= iptun_addr
->ia_family
;
668 * General purpose function to set an IP tunnel source or destination address.
671 iptun_setaddr(iptun_type_t iptun_type
, iptun_addr_t
*iptun_addr
,
672 const struct sockaddr_storage
*ss
)
674 if (!IPTUN_ADDR_MATCH(iptun_type
, ss
->ss_family
))
677 switch (ss
->ss_family
) {
679 struct sockaddr_in
*sin
= (struct sockaddr_in
*)ss
;
681 if ((sin
->sin_addr
.s_addr
== INADDR_ANY
) ||
682 (sin
->sin_addr
.s_addr
== INADDR_BROADCAST
) ||
683 CLASSD(sin
->sin_addr
.s_addr
)) {
684 return (EADDRNOTAVAIL
);
686 iptun_addr
->ia_addr
.iau_addr4
= sin
->sin_addr
.s_addr
;
690 struct sockaddr_in6
*sin6
= (struct sockaddr_in6
*)ss
;
692 if (IN6_IS_ADDR_UNSPECIFIED(&sin6
->sin6_addr
) ||
693 IN6_IS_ADDR_MULTICAST(&sin6
->sin6_addr
) ||
694 IN6_IS_ADDR_V4MAPPED(&sin6
->sin6_addr
)) {
695 return (EADDRNOTAVAIL
);
697 iptun_addr
->ia_addr
.iau_addr6
= sin6
->sin6_addr
;
701 return (EAFNOSUPPORT
);
703 iptun_addr
->ia_family
= ss
->ss_family
;
708 iptun_setladdr(iptun_t
*iptun
, const struct sockaddr_storage
*laddr
)
710 return (iptun_setaddr(iptun
->iptun_typeinfo
->iti_type
,
711 &iptun
->iptun_laddr
, laddr
));
715 iptun_setraddr(iptun_t
*iptun
, const struct sockaddr_storage
*raddr
)
717 if (!(iptun
->iptun_typeinfo
->iti_hasraddr
))
719 return (iptun_setaddr(iptun
->iptun_typeinfo
->iti_type
,
720 &iptun
->iptun_raddr
, raddr
));
724 iptun_canbind(iptun_t
*iptun
)
727 * A tunnel may bind when its source address has been set, and if its
728 * tunnel type requires one, also its destination address.
730 return ((iptun
->iptun_flags
& IPTUN_LADDR
) &&
731 ((iptun
->iptun_flags
& IPTUN_RADDR
) ||
732 !(iptun
->iptun_typeinfo
->iti_hasraddr
)));
736 * Verify that the local address is valid, and insert in the fanout
739 iptun_bind(iptun_t
*iptun
)
741 conn_t
*connp
= iptun
->iptun_connp
;
745 ip_stack_t
*ipst
= connp
->conn_netstack
->netstack_ip
;
747 /* Get an exclusive ixa for this thread, and replace conn_ixa */
748 ixa
= conn_get_ixa(connp
, B_TRUE
);
751 ASSERT(ixa
->ixa_refcnt
>= 2);
752 ASSERT(ixa
== connp
->conn_ixa
);
754 /* We create PMTU state including for 6to4 */
755 ixa
->ixa_flags
|= IXAF_PMTU_DISCOVERY
;
757 ASSERT(iptun_canbind(iptun
));
759 mutex_enter(&connp
->conn_lock
);
761 * Note that conn_proto can't be set since the upper protocol
762 * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel.
763 * ipcl_iptun_classify doesn't use conn_proto.
765 connp
->conn_ipversion
= iptun
->iptun_typeinfo
->iti_ipvers
;
767 switch (iptun
->iptun_typeinfo
->iti_type
) {
768 case IPTUN_TYPE_IPV4
:
769 IN6_IPADDR_TO_V4MAPPED(iptun
->iptun_laddr4
,
770 &connp
->conn_laddr_v6
);
771 IN6_IPADDR_TO_V4MAPPED(iptun
->iptun_raddr4
,
772 &connp
->conn_faddr_v6
);
773 ixa
->ixa_flags
|= IXAF_IS_IPV4
;
774 if (ip_laddr_verify_v4(iptun
->iptun_laddr4
, IPCL_ZONEID(connp
),
775 ipst
, B_FALSE
) != IPVL_UNICAST_UP
) {
776 mutex_exit(&connp
->conn_lock
);
777 error
= EADDRNOTAVAIL
;
781 case IPTUN_TYPE_IPV6
:
782 connp
->conn_laddr_v6
= iptun
->iptun_laddr6
;
783 connp
->conn_faddr_v6
= iptun
->iptun_raddr6
;
784 ixa
->ixa_flags
&= ~IXAF_IS_IPV4
;
785 /* We use a zero scopeid for now */
786 if (ip_laddr_verify_v6(&iptun
->iptun_laddr6
, IPCL_ZONEID(connp
),
787 ipst
, B_FALSE
, 0) != IPVL_UNICAST_UP
) {
788 mutex_exit(&connp
->conn_lock
);
789 error
= EADDRNOTAVAIL
;
793 case IPTUN_TYPE_6TO4
:
794 IN6_IPADDR_TO_V4MAPPED(iptun
->iptun_laddr4
,
795 &connp
->conn_laddr_v6
);
796 IN6_IPADDR_TO_V4MAPPED(INADDR_ANY
, &connp
->conn_faddr_v6
);
797 ixa
->ixa_flags
|= IXAF_IS_IPV4
;
798 mutex_exit(&connp
->conn_lock
);
800 switch (ip_laddr_verify_v4(iptun
->iptun_laddr4
,
801 IPCL_ZONEID(connp
), ipst
, B_FALSE
)) {
802 case IPVL_UNICAST_UP
:
803 case IPVL_UNICAST_DOWN
:
806 error
= EADDRNOTAVAIL
;
812 /* In case previous destination was multirt */
816 * When we set a tunnel's destination address, we do not
817 * care if the destination is reachable. Transient routing
818 * issues should not inhibit the creation of a tunnel
819 * interface, for example. Thus we pass B_FALSE here.
821 connp
->conn_saddr_v6
= connp
->conn_laddr_v6
;
822 mutex_exit(&connp
->conn_lock
);
824 /* As long as the MTU is large we avoid fragmentation */
825 ixa
->ixa_flags
|= IXAF_DONTFRAG
| IXAF_PMTU_IPV4_DF
;
827 /* We handle IPsec in iptun_output_common */
828 error
= ip_attr_connect(connp
, ixa
, &connp
->conn_saddr_v6
,
829 &connp
->conn_faddr_v6
, &connp
->conn_faddr_v6
, 0,
830 &connp
->conn_saddr_v6
, &uinfo
, 0);
835 /* saddr shouldn't change since it was already set */
836 ASSERT(IN6_ARE_ADDR_EQUAL(&connp
->conn_laddr_v6
,
837 &connp
->conn_saddr_v6
));
839 /* We set IXAF_VERIFY_PMTU to catch PMTU increases */
840 ixa
->ixa_flags
|= IXAF_VERIFY_PMTU
;
841 ASSERT(uinfo
.iulp_mtu
!= 0);
844 * Allow setting new policies.
845 * The addresses/ports are already set, thus the IPsec policy calls
846 * can handle their passed-in conn's.
848 connp
->conn_policy_cached
= B_FALSE
;
851 error
= ipcl_conn_insert(connp
);
855 /* Record this as the "last" send even though we haven't sent any */
856 connp
->conn_v6lastdst
= connp
->conn_faddr_v6
;
858 iptun
->iptun_flags
|= IPTUN_BOUND
;
860 * Now that we're bound with ip below us, this is a good
861 * time to initialize the destination path MTU and to
862 * re-calculate the tunnel's link MTU.
864 (void) iptun_update_mtu(iptun
, ixa
, 0);
866 if (IS_IPTUN_RUNNING(iptun
))
867 iptun_task_dispatch(iptun
, IPTUN_TASK_LINK_UPDATE
);
875 iptun_unbind(iptun_t
*iptun
)
877 ASSERT(iptun
->iptun_flags
& IPTUN_BOUND
);
878 ASSERT(mutex_owned(&iptun
->iptun_lock
) ||
879 (iptun
->iptun_flags
& IPTUN_CONDEMNED
));
880 ip_unbind(iptun
->iptun_connp
);
881 iptun
->iptun_flags
&= ~IPTUN_BOUND
;
882 if (!(iptun
->iptun_flags
& IPTUN_CONDEMNED
))
883 iptun_task_dispatch(iptun
, IPTUN_TASK_LINK_UPDATE
);
887 * Re-generate the template data-link header for a given IP tunnel given the
888 * tunnel's current parameters.
891 iptun_headergen(iptun_t
*iptun
, boolean_t update_mac
)
893 switch (iptun
->iptun_typeinfo
->iti_ipvers
) {
896 * We only need to use a custom IP header if the administrator
897 * has supplied a non-default hoplimit.
899 if (iptun
->iptun_hoplimit
== IPTUN_DEFAULT_HOPLIMIT
) {
900 iptun
->iptun_header_size
= 0;
903 iptun
->iptun_header_size
= sizeof (ipha_t
);
904 iptun
->iptun_header4
.ipha_version_and_hdr_length
=
905 IP_SIMPLE_HDR_VERSION
;
906 iptun
->iptun_header4
.ipha_fragment_offset_and_flags
=
908 iptun
->iptun_header4
.ipha_ttl
= iptun
->iptun_hoplimit
;
911 ip6_t
*ip6hp
= &iptun
->iptun_header6
.it6h_ip6h
;
914 * We only need to use a custom IPv6 header if either the
915 * administrator has supplied a non-default hoplimit, or we
916 * need to include an encapsulation limit option in the outer
919 if (iptun
->iptun_hoplimit
== IPTUN_DEFAULT_HOPLIMIT
&&
920 iptun
->iptun_encaplimit
== 0) {
921 iptun
->iptun_header_size
= 0;
925 (void) memset(ip6hp
, 0, sizeof (*ip6hp
));
926 if (iptun
->iptun_encaplimit
== 0) {
927 iptun
->iptun_header_size
= sizeof (ip6_t
);
928 ip6hp
->ip6_nxt
= IPPROTO_NONE
;
930 iptun_encaplim_t
*iel
;
932 iptun
->iptun_header_size
= sizeof (iptun_ipv6hdrs_t
);
934 * The mac_ipv6 plugin requires ip6_plen to be in host
935 * byte order and reflect the extension headers
936 * present in the template. The actual network byte
937 * order ip6_plen will be set on a per-packet basis on
940 ip6hp
->ip6_plen
= sizeof (*iel
);
941 ip6hp
->ip6_nxt
= IPPROTO_DSTOPTS
;
942 iel
= &iptun
->iptun_header6
.it6h_encaplim
;
943 *iel
= iptun_encaplim_init
;
944 iel
->iel_telopt
.ip6ot_encap_limit
=
945 iptun
->iptun_encaplimit
;
948 ip6hp
->ip6_hlim
= iptun
->iptun_hoplimit
;
954 iptun_task_dispatch(iptun
, IPTUN_TASK_PDATA_UPDATE
);
958 * Insert inbound and outbound IPv4 and IPv6 policy into the given policy
962 iptun_insert_simple_policies(ipsec_policy_head_t
*ph
, ipsec_act_t
*actp
,
963 uint_t n
, netstack_t
*ns
)
967 if (!ipsec_polhead_insert(ph
, actp
, n
, f
, IPSEC_TYPE_INBOUND
, ns
) ||
968 !ipsec_polhead_insert(ph
, actp
, n
, f
, IPSEC_TYPE_OUTBOUND
, ns
))
972 return (ipsec_polhead_insert(ph
, actp
, n
, f
, IPSEC_TYPE_INBOUND
, ns
) &&
973 ipsec_polhead_insert(ph
, actp
, n
, f
, IPSEC_TYPE_OUTBOUND
, ns
));
977 * Used to set IPsec policy when policy is set through the IPTUN_CREATE or
978 * IPTUN_MODIFY ioctls.
981 iptun_set_sec_simple(iptun_t
*iptun
, const ipsec_req_t
*ipsr
)
985 ipsec_act_t
*actp
= NULL
;
986 boolean_t clear_all
, old_policy
= B_FALSE
;
987 ipsec_tun_pol_t
*itp
;
988 char name
[MAXLINKNAMELEN
];
990 netstack_t
*ns
= iptun
->iptun_ns
;
992 /* Can't specify self-encap on a tunnel. */
993 if (ipsr
->ipsr_self_encap_req
!= 0)
997 * If it's a "clear-all" entry, unset the security flags and resume
998 * normal cleartext (or inherit-from-global) policy.
1000 clear_all
= ((ipsr
->ipsr_ah_req
& IPTUN_IPSEC_REQ_MASK
) == 0 &&
1001 (ipsr
->ipsr_esp_req
& IPTUN_IPSEC_REQ_MASK
) == 0);
1003 ASSERT(mutex_owned(&iptun
->iptun_lock
));
1004 itp
= iptun
->iptun_itp
;
1008 if ((rc
= dls_mgmt_get_linkinfo(iptun
->iptun_linkid
, name
, NULL
,
1011 ASSERT(name
[0] != '\0');
1012 if ((itp
= create_tunnel_policy(name
, &rc
, &gen
, ns
)) == NULL
)
1014 iptun
->iptun_itp
= itp
;
1017 /* Allocate the actvec now, before holding itp or polhead locks. */
1018 ipsec_actvec_from_req(ipsr
, &actp
, &nact
, ns
);
1025 * Just write on the active polhead. Save the primary/secondary stuff
1026 * for spdsock operations.
1028 * Mutex because we need to write to the polhead AND flags atomically.
1029 * Other threads will acquire the polhead lock as a reader if the
1030 * (unprotected) flag is set.
1032 mutex_enter(&itp
->itp_lock
);
1033 if (itp
->itp_flags
& ITPF_P_TUNNEL
) {
1034 /* Oops, we lost a race. Let's get out of here. */
1038 old_policy
= ((itp
->itp_flags
& ITPF_P_ACTIVE
) != 0);
1041 ITPF_CLONE(itp
->itp_flags
);
1042 rc
= ipsec_copy_polhead(itp
->itp_policy
, itp
->itp_inactive
, ns
);
1044 /* inactive has already been cleared. */
1045 itp
->itp_flags
&= ~ITPF_IFLAGS
;
1048 rw_enter(&itp
->itp_policy
->iph_lock
, RW_WRITER
);
1049 ipsec_polhead_flush(itp
->itp_policy
, ns
);
1051 /* Else assume itp->itp_policy is already flushed. */
1052 rw_enter(&itp
->itp_policy
->iph_lock
, RW_WRITER
);
1056 ASSERT(avl_numnodes(&itp
->itp_policy
->iph_rulebyid
) == 0);
1057 itp
->itp_flags
&= ~ITPF_PFLAGS
;
1058 rw_exit(&itp
->itp_policy
->iph_lock
);
1059 old_policy
= B_FALSE
; /* Clear out the inactive one too. */
1063 if (iptun_insert_simple_policies(itp
->itp_policy
, actp
, nact
, ns
)) {
1064 rw_exit(&itp
->itp_policy
->iph_lock
);
1066 * Adjust MTU and make sure the DL side knows what's up.
1068 itp
->itp_flags
= ITPF_P_ACTIVE
;
1069 (void) iptun_update_mtu(iptun
, NULL
, 0);
1070 old_policy
= B_FALSE
; /* Blank out inactive - we succeeded */
1072 rw_exit(&itp
->itp_policy
->iph_lock
);
1078 /* Recover policy in in active polhead. */
1079 ipsec_swap_policy(itp
->itp_policy
, itp
->itp_inactive
, ns
);
1080 ITPF_SWAP(itp
->itp_flags
);
1083 /* Clear policy in inactive polhead. */
1084 itp
->itp_flags
&= ~ITPF_IFLAGS
;
1085 rw_enter(&itp
->itp_inactive
->iph_lock
, RW_WRITER
);
1086 ipsec_polhead_flush(itp
->itp_inactive
, ns
);
1087 rw_exit(&itp
->itp_inactive
->iph_lock
);
1090 mutex_exit(&itp
->itp_lock
);
1094 ipsec_actvec_free(actp
, nact
);
1099 static iptun_typeinfo_t
*
1100 iptun_gettypeinfo(iptun_type_t type
)
1104 for (i
= 0; iptun_type_table
[i
].iti_type
!= IPTUN_TYPE_UNKNOWN
; i
++) {
1105 if (iptun_type_table
[i
].iti_type
== type
)
1108 return (&iptun_type_table
[i
]);
1112 * Set the parameters included in ik on the tunnel iptun. Parameters that can
1113 * only be set at creation time are set in iptun_create().
1116 iptun_setparams(iptun_t
*iptun
, const iptun_kparams_t
*ik
)
1119 netstack_t
*ns
= iptun
->iptun_ns
;
1120 iptun_addr_t orig_laddr
, orig_raddr
;
1121 uint_t orig_flags
= iptun
->iptun_flags
;
1123 if (ik
->iptun_kparam_flags
& IPTUN_KPARAM_LADDR
) {
1124 if (orig_flags
& IPTUN_LADDR
)
1125 orig_laddr
= iptun
->iptun_laddr
;
1126 if ((err
= iptun_setladdr(iptun
, &ik
->iptun_kparam_laddr
)) != 0)
1128 iptun
->iptun_flags
|= IPTUN_LADDR
;
1131 if (ik
->iptun_kparam_flags
& IPTUN_KPARAM_RADDR
) {
1132 if (orig_flags
& IPTUN_RADDR
)
1133 orig_raddr
= iptun
->iptun_raddr
;
1134 if ((err
= iptun_setraddr(iptun
, &ik
->iptun_kparam_raddr
)) != 0)
1136 iptun
->iptun_flags
|= IPTUN_RADDR
;
1139 if (ik
->iptun_kparam_flags
& IPTUN_KPARAM_SECINFO
) {
1141 * Set IPsec policy originating from the ifconfig(1M) command
1142 * line. This is traditionally called "simple" policy because
1143 * the ipsec_req_t (iptun_kparam_secinfo) can only describe a
1144 * simple policy of "do ESP on everything" and/or "do AH on
1145 * everything" (as opposed to the rich policy that can be
1146 * defined with ipsecconf(1M)).
1148 if (iptun
->iptun_typeinfo
->iti_type
== IPTUN_TYPE_6TO4
) {
1150 * Can't set security properties for automatic
1157 if (!ipsec_loaded(ns
->netstack_ipsec
)) {
1158 /* If IPsec can be loaded, try and load it now. */
1159 if (ipsec_failed(ns
->netstack_ipsec
)) {
1160 err
= EPROTONOSUPPORT
;
1163 ipsec_loader_loadnow(ns
->netstack_ipsec
);
1165 * ipsec_loader_loadnow() returns while IPsec is
1166 * loaded asynchronously. While a method exists to
1167 * wait for IPsec to load (ipsec_loader_wait()), it
1168 * requires use of a STREAMS queue to do a qwait().
1169 * We're not in STREAMS context here, and so we can't
1170 * use it. This is not a problem in practice because
1171 * in the vast majority of cases, key management and
1172 * global policy will have loaded before any tunnels
1173 * are plumbed, and so IPsec will already have been
1180 err
= iptun_set_sec_simple(iptun
, &ik
->iptun_kparam_secinfo
);
1182 iptun
->iptun_flags
|= IPTUN_SIMPLE_POLICY
;
1183 iptun
->iptun_simple_policy
= ik
->iptun_kparam_secinfo
;
1188 /* Restore original source and destination. */
1189 if (ik
->iptun_kparam_flags
& IPTUN_KPARAM_LADDR
&&
1190 (orig_flags
& IPTUN_LADDR
))
1191 iptun
->iptun_laddr
= orig_laddr
;
1192 if ((ik
->iptun_kparam_flags
& IPTUN_KPARAM_RADDR
) &&
1193 (orig_flags
& IPTUN_RADDR
))
1194 iptun
->iptun_raddr
= orig_raddr
;
1195 iptun
->iptun_flags
= orig_flags
;
1201 iptun_register(iptun_t
*iptun
)
1203 mac_register_t
*mac
;
1206 ASSERT(!(iptun
->iptun_flags
& IPTUN_MAC_REGISTERED
));
1208 if ((mac
= mac_alloc(MAC_VERSION
)) == NULL
)
1211 mac
->m_type_ident
= iptun
->iptun_typeinfo
->iti_ident
;
1212 mac
->m_driver
= iptun
;
1213 mac
->m_dip
= iptun_dip
;
1214 mac
->m_instance
= (uint_t
)-1;
1215 mac
->m_src_addr
= (uint8_t *)&iptun
->iptun_laddr
.ia_addr
;
1216 mac
->m_dst_addr
= iptun
->iptun_typeinfo
->iti_hasraddr
?
1217 (uint8_t *)&iptun
->iptun_raddr
.ia_addr
: NULL
;
1218 mac
->m_callbacks
= &iptun_m_callbacks
;
1219 mac
->m_min_sdu
= iptun
->iptun_typeinfo
->iti_minmtu
;
1220 mac
->m_max_sdu
= iptun
->iptun_mtu
;
1221 if (iptun
->iptun_header_size
!= 0) {
1222 mac
->m_pdata
= &iptun
->iptun_header
;
1223 mac
->m_pdata_size
= iptun
->iptun_header_size
;
1225 if ((err
= mac_register(mac
, &iptun
->iptun_mh
)) == 0)
1226 iptun
->iptun_flags
|= IPTUN_MAC_REGISTERED
;
1232 iptun_unregister(iptun_t
*iptun
)
1236 ASSERT(iptun
->iptun_flags
& IPTUN_MAC_REGISTERED
);
1237 if ((err
= mac_unregister(iptun
->iptun_mh
)) == 0)
1238 iptun
->iptun_flags
&= ~IPTUN_MAC_REGISTERED
;
1243 iptun_conn_create(iptun_t
*iptun
, netstack_t
*ns
, cred_t
*credp
)
1247 if ((connp
= ipcl_conn_create(IPCL_IPCCONN
, KM_NOSLEEP
, ns
)) == NULL
)
1250 connp
->conn_flags
|= IPCL_IPTUN
;
1251 connp
->conn_iptun
= iptun
;
1252 connp
->conn_recv
= iptun_input
;
1253 connp
->conn_recvicmp
= iptun_input_icmp
;
1254 connp
->conn_verifyicmp
= iptun_verifyicmp
;
1257 * Register iptun_notify to listen to capability changes detected by IP.
1258 * This upcall is made in the context of the call to conn_ip_output.
1260 connp
->conn_ixa
->ixa_notify
= iptun_notify
;
1261 connp
->conn_ixa
->ixa_notify_cookie
= iptun
;
1264 * For exclusive stacks we set conn_zoneid to GLOBAL_ZONEID as is done
1265 * for all other conn_t's.
1267 * Note that there's an important distinction between iptun_zoneid and
1268 * conn_zoneid. The conn_zoneid is set to GLOBAL_ZONEID in non-global
1269 * exclusive stack zones to make the ip module believe that the
1270 * non-global zone is actually a global zone. Therefore, when
1271 * interacting with the ip module, we must always use conn_zoneid.
1273 connp
->conn_zoneid
= (ns
->netstack_stackid
== GLOBAL_NETSTACKID
) ?
1274 crgetzoneid(credp
) : GLOBAL_ZONEID
;
1275 connp
->conn_cred
= credp
;
1276 /* crfree() is done in ipcl_conn_destroy(), called by CONN_DEC_REF() */
1277 crhold(connp
->conn_cred
);
1278 connp
->conn_cpid
= NOPID
;
1280 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */
1281 connp
->conn_ixa
->ixa_zoneid
= connp
->conn_zoneid
;
1282 ASSERT(connp
->conn_ref
== 1);
1284 /* Cache things in ixa without an extra refhold */
1285 connp
->conn_ixa
->ixa_cred
= connp
->conn_cred
;
1286 connp
->conn_ixa
->ixa_cpid
= connp
->conn_cpid
;
1287 if (is_system_labeled())
1288 connp
->conn_ixa
->ixa_tsl
= crgetlabel(connp
->conn_cred
);
1291 * Have conn_ip_output drop packets should our outer source
1294 connp
->conn_ixa
->ixa_flags
|= IXAF_VERIFY_SOURCE
;
1296 switch (iptun
->iptun_typeinfo
->iti_ipvers
) {
1298 connp
->conn_family
= AF_INET6
;
1301 connp
->conn_family
= AF_INET
;
1304 mutex_enter(&connp
->conn_lock
);
1305 connp
->conn_state_flags
&= ~CONN_INCIPIENT
;
1306 mutex_exit(&connp
->conn_lock
);
1311 iptun_conn_destroy(conn_t
*connp
)
1313 ip_quiesce_conn(connp
);
1314 connp
->conn_iptun
= NULL
;
1315 ASSERT(connp
->conn_ref
== 1);
1316 CONN_DEC_REF(connp
);
1324 if ((iptun
= kmem_cache_alloc(iptun_cache
, KM_NOSLEEP
)) != NULL
) {
1325 bzero(iptun
, sizeof (*iptun
));
1326 atomic_inc_32(&iptun_tunnelcount
);
1332 iptun_free(iptun_t
*iptun
)
1334 ASSERT(iptun
->iptun_flags
& IPTUN_CONDEMNED
);
1336 if (iptun
->iptun_flags
& IPTUN_HASH_INSERTED
) {
1337 iptun_stack_t
*iptuns
= iptun
->iptun_iptuns
;
1339 mutex_enter(&iptun_hash_lock
);
1340 VERIFY(mod_hash_remove(iptun_hash
,
1341 IPTUN_HASH_KEY(iptun
->iptun_linkid
),
1342 (mod_hash_val_t
*)&iptun
) == 0);
1343 mutex_exit(&iptun_hash_lock
);
1344 iptun
->iptun_flags
&= ~IPTUN_HASH_INSERTED
;
1345 mutex_enter(&iptuns
->iptuns_lock
);
1346 list_remove(&iptuns
->iptuns_iptunlist
, iptun
);
1347 mutex_exit(&iptuns
->iptuns_lock
);
1350 if (iptun
->iptun_flags
& IPTUN_BOUND
)
1351 iptun_unbind(iptun
);
1354 * After iptun_unregister(), there will be no threads executing a
1355 * downcall from the mac module, including in the tx datapath.
1357 if (iptun
->iptun_flags
& IPTUN_MAC_REGISTERED
)
1358 VERIFY(iptun_unregister(iptun
) == 0);
1360 if (iptun
->iptun_itp
!= NULL
) {
1362 * Remove from the AVL tree, AND release the reference iptun_t
1363 * itself holds on the ITP.
1365 itp_unlink(iptun
->iptun_itp
, iptun
->iptun_ns
);
1366 ITP_REFRELE(iptun
->iptun_itp
, iptun
->iptun_ns
);
1367 iptun
->iptun_itp
= NULL
;
1368 iptun
->iptun_flags
&= ~IPTUN_SIMPLE_POLICY
;
1372 * After ipcl_conn_destroy(), there will be no threads executing an
1373 * upcall from ip (i.e., iptun_input()), and it is then safe to free
1376 if (iptun
->iptun_connp
!= NULL
) {
1377 iptun_conn_destroy(iptun
->iptun_connp
);
1378 iptun
->iptun_connp
= NULL
;
1381 kmem_cache_free(iptun_cache
, iptun
);
1382 atomic_dec_32(&iptun_tunnelcount
);
1386 iptun_create(iptun_kparams_t
*ik
, cred_t
*credp
)
1388 iptun_t
*iptun
= NULL
;
1390 char linkname
[MAXLINKNAMELEN
];
1391 ipsec_tun_pol_t
*itp
;
1392 netstack_t
*ns
= NULL
;
1393 iptun_stack_t
*iptuns
;
1394 datalink_id_t tmpid
;
1395 zoneid_t zoneid
= crgetzoneid(credp
);
1396 boolean_t link_created
= B_FALSE
;
1398 /* The tunnel type is mandatory */
1399 if (!(ik
->iptun_kparam_flags
& IPTUN_KPARAM_TYPE
))
1403 * Is the linkid that the caller wishes to associate with this new
1404 * tunnel assigned to this zone?
1406 if (zone_check_datalink(&zoneid
, ik
->iptun_kparam_linkid
) != 0) {
1407 if (zoneid
!= GLOBAL_ZONEID
)
1409 } else if (zoneid
== GLOBAL_ZONEID
) {
1414 * Make sure that we're not trying to create a tunnel that has already
1417 if (iptun_enter_by_linkid(ik
->iptun_kparam_linkid
, &iptun
) == 0) {
1424 ns
= netstack_find_by_cred(credp
);
1425 iptuns
= ns
->netstack_iptun
;
1427 if ((iptun
= iptun_alloc()) == NULL
) {
1432 iptun
->iptun_linkid
= ik
->iptun_kparam_linkid
;
1433 iptun
->iptun_zoneid
= zoneid
;
1434 iptun
->iptun_ns
= ns
;
1436 iptun
->iptun_typeinfo
= iptun_gettypeinfo(ik
->iptun_kparam_type
);
1437 if (iptun
->iptun_typeinfo
->iti_type
== IPTUN_TYPE_UNKNOWN
) {
1442 if (ik
->iptun_kparam_flags
& IPTUN_KPARAM_IMPLICIT
)
1443 iptun
->iptun_flags
|= IPTUN_IMPLICIT
;
1445 if ((err
= iptun_setparams(iptun
, ik
)) != 0)
1448 iptun
->iptun_hoplimit
= IPTUN_DEFAULT_HOPLIMIT
;
1449 if (iptun
->iptun_typeinfo
->iti_type
== IPTUN_TYPE_IPV6
)
1450 iptun
->iptun_encaplimit
= IPTUN_DEFAULT_ENCAPLIMIT
;
1452 iptun_headergen(iptun
, B_FALSE
);
1454 iptun
->iptun_connp
= iptun_conn_create(iptun
, ns
, credp
);
1455 if (iptun
->iptun_connp
== NULL
) {
1460 iptun
->iptun_mtu
= iptun
->iptun_typeinfo
->iti_maxmtu
;
1461 iptun
->iptun_dpmtu
= iptun
->iptun_mtu
;
1464 * Find an ITP based on linkname. If we have parms already set via
1465 * the iptun_setparams() call above, it may have created an ITP for
1466 * us. We always try get_tunnel_policy() for DEBUG correctness
1467 * checks, and we may wish to refactor this to only check when
1468 * iptun_itp is NULL.
1470 if ((err
= dls_mgmt_get_linkinfo(iptun
->iptun_linkid
, linkname
, NULL
,
1473 if ((itp
= get_tunnel_policy(linkname
, ns
)) != NULL
)
1474 iptun
->iptun_itp
= itp
;
1477 * See if we have the necessary IP addresses assigned to this tunnel
1478 * to try and bind them with ip underneath us. If we're not ready to
1479 * bind yet, then we'll defer the bind operation until the addresses
1482 if (iptun_canbind(iptun
) && ((err
= iptun_bind(iptun
)) != 0))
1485 if ((err
= iptun_register(iptun
)) != 0)
1488 err
= dls_devnet_create(iptun
->iptun_mh
, iptun
->iptun_linkid
,
1489 iptun
->iptun_zoneid
);
1492 link_created
= B_TRUE
;
1495 * We hash by link-id as that is the key used by all other iptun
1496 * interfaces (modify, delete, etc.).
1498 if ((mherr
= mod_hash_insert(iptun_hash
,
1499 IPTUN_HASH_KEY(iptun
->iptun_linkid
), (mod_hash_val_t
)iptun
)) == 0) {
1500 mutex_enter(&iptuns
->iptuns_lock
);
1501 list_insert_head(&iptuns
->iptuns_iptunlist
, iptun
);
1502 mutex_exit(&iptuns
->iptuns_lock
);
1503 iptun
->iptun_flags
|= IPTUN_HASH_INSERTED
;
1504 } else if (mherr
== MH_ERR_NOMEM
) {
1506 } else if (mherr
== MH_ERR_DUPLICATE
) {
1513 if (iptun
== NULL
&& ns
!= NULL
)
1515 if (err
!= 0 && iptun
!= NULL
) {
1517 (void) dls_devnet_destroy(iptun
->iptun_mh
, &tmpid
,
1520 iptun
->iptun_flags
|= IPTUN_CONDEMNED
;
1527 iptun_delete(datalink_id_t linkid
, cred_t
*credp
)
1530 iptun_t
*iptun
= NULL
;
1532 if ((err
= iptun_enter_by_linkid(linkid
, &iptun
)) != 0)
1535 /* One cannot delete a tunnel that belongs to another zone. */
1536 if (iptun
->iptun_zoneid
!= crgetzoneid(credp
)) {
1542 * We need to exit iptun in order to issue calls up the stack such as
1543 * dls_devnet_destroy(). If we call up while still in iptun, deadlock
1544 * with calls coming down the stack is possible. We prevent other
1545 * threads from entering this iptun after we've exited it by setting
1546 * the IPTUN_DELETE_PENDING flag. This will cause callers of
1547 * iptun_enter() to block waiting on iptun_enter_cv. The assumption
1548 * here is that the functions we're calling while IPTUN_DELETE_PENDING
1549 * is set dont resuult in an iptun_enter() call, as that would result
1552 iptun
->iptun_flags
|= IPTUN_DELETE_PENDING
;
1554 /* Wait for any pending upcall to the mac module to complete. */
1555 while (iptun
->iptun_flags
& IPTUN_UPCALL_PENDING
)
1556 cv_wait(&iptun
->iptun_upcall_cv
, &iptun
->iptun_lock
);
1560 if ((err
= dls_devnet_destroy(iptun
->iptun_mh
, &linkid
, B_TRUE
)) == 0) {
1562 * mac_disable() will fail with EBUSY if there are references
1563 * to the iptun MAC. If there are none, then mac_disable()
1564 * will assure that none can be acquired until the MAC is
1567 * XXX CR 6791335 prevents us from calling mac_disable() prior
1568 * to dls_devnet_destroy(), so we unfortunately need to
1569 * attempt to re-create the devnet node if mac_disable()
1572 if ((err
= mac_disable(iptun
->iptun_mh
)) != 0) {
1573 (void) dls_devnet_create(iptun
->iptun_mh
, linkid
,
1574 iptun
->iptun_zoneid
);
1579 * Now that we know the fate of this iptun_t, we need to clear
1580 * IPTUN_DELETE_PENDING, and set IPTUN_CONDEMNED if the iptun_t is
1581 * slated to be freed. Either way, we need to signal the threads
1582 * waiting in iptun_enter() so that they can either fail if
1583 * IPTUN_CONDEMNED is set, or continue if it's not.
1585 mutex_enter(&iptun
->iptun_lock
);
1586 iptun
->iptun_flags
&= ~IPTUN_DELETE_PENDING
;
1588 iptun
->iptun_flags
|= IPTUN_CONDEMNED
;
1589 cv_broadcast(&iptun
->iptun_enter_cv
);
1590 mutex_exit(&iptun
->iptun_lock
);
1593 * Note that there is no danger in calling iptun_free() after having
1594 * dropped the iptun_lock since callers of iptun_enter() at this point
1595 * are doing so from iptun_enter_by_linkid() (mac_disable() got rid of
1596 * threads entering from mac callbacks which call iptun_enter()
1597 * directly) which holds iptun_hash_lock, and iptun_free() grabs this
1598 * lock in order to remove the iptun_t from the hash table.
1607 iptun_modify(const iptun_kparams_t
*ik
, cred_t
*credp
)
1610 boolean_t laddr_change
= B_FALSE
, raddr_change
= B_FALSE
;
1613 if ((err
= iptun_enter_by_linkid(ik
->iptun_kparam_linkid
, &iptun
)) != 0)
1616 /* One cannot modify a tunnel that belongs to another zone. */
1617 if (iptun
->iptun_zoneid
!= crgetzoneid(credp
)) {
1622 /* The tunnel type cannot be changed */
1623 if (ik
->iptun_kparam_flags
& IPTUN_KPARAM_TYPE
) {
1628 if ((err
= iptun_setparams(iptun
, ik
)) != 0)
1630 iptun_headergen(iptun
, B_FALSE
);
1633 * If any of the tunnel's addresses has been modified and the tunnel
1634 * has the necessary addresses assigned to it, we need to try to bind
1635 * with ip underneath us. If we're not ready to bind yet, then we'll
1636 * try again when the addresses are modified later.
1638 laddr_change
= (ik
->iptun_kparam_flags
& IPTUN_KPARAM_LADDR
);
1639 raddr_change
= (ik
->iptun_kparam_flags
& IPTUN_KPARAM_RADDR
);
1640 if (laddr_change
|| raddr_change
) {
1641 if (iptun
->iptun_flags
& IPTUN_BOUND
)
1642 iptun_unbind(iptun
);
1643 if (iptun_canbind(iptun
) && (err
= iptun_bind(iptun
)) != 0) {
1645 iptun
->iptun_flags
&= ~IPTUN_LADDR
;
1647 iptun
->iptun_flags
&= ~IPTUN_RADDR
;
1653 iptun_task_dispatch(iptun
, IPTUN_TASK_LADDR_UPDATE
);
1655 iptun_task_dispatch(iptun
, IPTUN_TASK_RADDR_UPDATE
);
1662 /* Given an IP tunnel's datalink id, fill in its parameters. */
1664 iptun_info(iptun_kparams_t
*ik
, cred_t
*credp
)
1669 /* Is the tunnel link visible from the caller's zone? */
1670 if (!dls_devnet_islinkvisible(ik
->iptun_kparam_linkid
,
1671 crgetzoneid(credp
)))
1674 if ((err
= iptun_enter_by_linkid(ik
->iptun_kparam_linkid
, &iptun
)) != 0)
1677 bzero(ik
, sizeof (iptun_kparams_t
));
1679 ik
->iptun_kparam_linkid
= iptun
->iptun_linkid
;
1680 ik
->iptun_kparam_type
= iptun
->iptun_typeinfo
->iti_type
;
1681 ik
->iptun_kparam_flags
|= IPTUN_KPARAM_TYPE
;
1683 if (iptun
->iptun_flags
& IPTUN_LADDR
) {
1684 iptun_getaddr(&iptun
->iptun_laddr
, &ik
->iptun_kparam_laddr
);
1685 ik
->iptun_kparam_flags
|= IPTUN_KPARAM_LADDR
;
1687 if (iptun
->iptun_flags
& IPTUN_RADDR
) {
1688 iptun_getaddr(&iptun
->iptun_raddr
, &ik
->iptun_kparam_raddr
);
1689 ik
->iptun_kparam_flags
|= IPTUN_KPARAM_RADDR
;
1692 if (iptun
->iptun_flags
& IPTUN_IMPLICIT
)
1693 ik
->iptun_kparam_flags
|= IPTUN_KPARAM_IMPLICIT
;
1695 if (iptun
->iptun_itp
!= NULL
) {
1696 mutex_enter(&iptun
->iptun_itp
->itp_lock
);
1697 if (iptun
->iptun_itp
->itp_flags
& ITPF_P_ACTIVE
) {
1698 ik
->iptun_kparam_flags
|= IPTUN_KPARAM_IPSECPOL
;
1699 if (iptun
->iptun_flags
& IPTUN_SIMPLE_POLICY
) {
1700 ik
->iptun_kparam_flags
|= IPTUN_KPARAM_SECINFO
;
1701 ik
->iptun_kparam_secinfo
=
1702 iptun
->iptun_simple_policy
;
1705 mutex_exit(&iptun
->iptun_itp
->itp_lock
);
1714 iptun_set_6to4relay(netstack_t
*ns
, ipaddr_t relay_addr
)
1716 if (relay_addr
== INADDR_BROADCAST
|| CLASSD(relay_addr
))
1717 return (EADDRNOTAVAIL
);
1718 ns
->netstack_iptun
->iptuns_relay_rtr_addr
= relay_addr
;
1723 iptun_get_6to4relay(netstack_t
*ns
, ipaddr_t
*relay_addr
)
1725 *relay_addr
= ns
->netstack_iptun
->iptuns_relay_rtr_addr
;
1729 iptun_set_policy(datalink_id_t linkid
, ipsec_tun_pol_t
*itp
)
1733 if (iptun_enter_by_linkid(linkid
, &iptun
) != 0)
1735 if (iptun
->iptun_itp
!= itp
) {
1736 ASSERT(iptun
->iptun_itp
== NULL
);
1738 iptun
->iptun_itp
= itp
;
1741 * IPsec policy means IPsec overhead, which means lower MTU.
1742 * Refresh the MTU for this tunnel.
1744 (void) iptun_update_mtu(iptun
, NULL
, 0);
1749 * Obtain the path MTU to the tunnel destination.
1750 * Can return zero in some cases.
1753 iptun_get_dst_pmtu(iptun_t
*iptun
, ip_xmit_attr_t
*ixa
)
1756 conn_t
*connp
= iptun
->iptun_connp
;
1757 boolean_t need_rele
= B_FALSE
;
1760 * We only obtain the pmtu for tunnels that have a remote tunnel
1763 if (!(iptun
->iptun_flags
& IPTUN_RADDR
))
1767 ixa
= conn_get_ixa(connp
, B_FALSE
);
1773 * Guard against ICMP errors before we have sent, as well as against
1774 * and a thread which held conn_ixa.
1776 if (ixa
->ixa_ire
!= NULL
) {
1777 pmtu
= ip_get_pmtu(ixa
);
1780 * For both IPv4 and IPv6 we can have indication that the outer
1781 * header needs fragmentation.
1783 if (ixa
->ixa_flags
& IXAF_PMTU_TOO_SMALL
) {
1784 /* Must allow fragmentation in ip_output */
1785 ixa
->ixa_flags
&= ~IXAF_DONTFRAG
;
1786 } else if (iptun
->iptun_typeinfo
->iti_type
!= IPTUN_TYPE_6TO4
) {
1787 ixa
->ixa_flags
|= IXAF_DONTFRAG
;
1789 /* ip_get_pmtu might have set this - we don't want it */
1790 ixa
->ixa_flags
&= ~IXAF_PMTU_IPV4_DF
;
1800 * Update the ip_xmit_attr_t to capture the current lower path mtu as known
1804 iptun_update_dst_pmtu(iptun_t
*iptun
, ip_xmit_attr_t
*ixa
)
1807 conn_t
*connp
= iptun
->iptun_connp
;
1808 boolean_t need_rele
= B_FALSE
;
1810 /* IXAF_VERIFY_PMTU is not set if we don't have a fixed destination */
1811 if (!(iptun
->iptun_flags
& IPTUN_RADDR
))
1815 ixa
= conn_get_ixa(connp
, B_FALSE
);
1821 * Guard against ICMP errors before we have sent, as well as against
1822 * and a thread which held conn_ixa.
1824 if (ixa
->ixa_ire
!= NULL
) {
1825 pmtu
= ip_get_pmtu(ixa
);
1827 * Update ixa_fragsize and ixa_pmtu.
1829 ixa
->ixa_fragsize
= ixa
->ixa_pmtu
= pmtu
;
1832 * For both IPv4 and IPv6 we can have indication that the outer
1833 * header needs fragmentation.
1835 if (ixa
->ixa_flags
& IXAF_PMTU_TOO_SMALL
) {
1836 /* Must allow fragmentation in ip_output */
1837 ixa
->ixa_flags
&= ~IXAF_DONTFRAG
;
1838 } else if (iptun
->iptun_typeinfo
->iti_type
!= IPTUN_TYPE_6TO4
) {
1839 ixa
->ixa_flags
|= IXAF_DONTFRAG
;
1841 /* ip_get_pmtu might have set this - we don't want it */
1842 ixa
->ixa_flags
&= ~IXAF_PMTU_IPV4_DF
;
1851 * There is nothing that iptun can verify in addition to IP having
1852 * verified the IP addresses in the fanout.
1856 iptun_verifyicmp(conn_t
*connp
, void *arg2
, icmph_t
*icmph
, icmp6_t
*icmp6
,
1857 ip_recv_attr_t
*ira
)
1863 * Notify function registered with ip_xmit_attr_t.
1866 iptun_notify(void *arg
, ip_xmit_attr_t
*ixa
, ixa_notify_type_t ntype
,
1867 ixa_notify_arg_t narg
)
1869 iptun_t
*iptun
= (iptun_t
*)arg
;
1873 (void) iptun_update_mtu(iptun
, ixa
, narg
);
1879 * Returns the max of old_ovhd and the overhead associated with pol.
1882 iptun_max_policy_overhead(ipsec_policy_t
*pol
, uint32_t old_ovhd
)
1884 uint32_t new_ovhd
= old_ovhd
;
1886 while (pol
!= NULL
) {
1887 new_ovhd
= max(new_ovhd
,
1888 ipsec_act_ovhd(&pol
->ipsp_act
->ipa_act
));
1889 pol
= pol
->ipsp_hash
.hash_next
;
1895 iptun_get_ipsec_overhead(iptun_t
*iptun
)
1897 ipsec_policy_root_t
*ipr
;
1898 ipsec_policy_head_t
*iph
;
1899 ipsec_policy_t
*pol
;
1900 ipsec_selector_t sel
;
1902 uint32_t ipsec_ovhd
= 0;
1903 ipsec_tun_pol_t
*itp
= iptun
->iptun_itp
;
1904 netstack_t
*ns
= iptun
->iptun_ns
;
1906 if (itp
== NULL
|| !(itp
->itp_flags
& ITPF_P_ACTIVE
)) {
1908 * Consult global policy, just in case. This will only work
1909 * if we have both source and destination addresses to work
1912 if ((iptun
->iptun_flags
& (IPTUN_LADDR
|IPTUN_RADDR
)) !=
1913 (IPTUN_LADDR
|IPTUN_RADDR
))
1916 iph
= ipsec_system_policy(ns
);
1917 bzero(&sel
, sizeof (sel
));
1919 (iptun
->iptun_typeinfo
->iti_ipvers
== IPV4_VERSION
);
1920 switch (iptun
->iptun_typeinfo
->iti_ipvers
) {
1922 sel
.ips_local_addr_v4
= iptun
->iptun_laddr4
;
1923 sel
.ips_remote_addr_v4
= iptun
->iptun_raddr4
;
1926 sel
.ips_local_addr_v6
= iptun
->iptun_laddr6
;
1927 sel
.ips_remote_addr_v6
= iptun
->iptun_raddr6
;
1930 /* Check for both IPv4 and IPv6. */
1931 sel
.ips_protocol
= IPPROTO_ENCAP
;
1932 pol
= ipsec_find_policy_head(NULL
, iph
, IPSEC_TYPE_OUTBOUND
,
1935 ipsec_ovhd
= ipsec_act_ovhd(&pol
->ipsp_act
->ipa_act
);
1938 sel
.ips_protocol
= IPPROTO_IPV6
;
1939 pol
= ipsec_find_policy_head(NULL
, iph
, IPSEC_TYPE_OUTBOUND
,
1942 ipsec_ovhd
= max(ipsec_ovhd
,
1943 ipsec_act_ovhd(&pol
->ipsp_act
->ipa_act
));
1946 IPPH_REFRELE(iph
, ns
);
1949 * Look through all of the possible IPsec actions for the
1950 * tunnel, and find the largest potential IPsec overhead.
1952 iph
= itp
->itp_policy
;
1953 rw_enter(&iph
->iph_lock
, RW_READER
);
1954 ipr
= &(iph
->iph_root
[IPSEC_TYPE_OUTBOUND
]);
1955 ipsec_ovhd
= iptun_max_policy_overhead(
1956 ipr
->ipr_nonhash
[IPSEC_AF_V4
], 0);
1957 ipsec_ovhd
= iptun_max_policy_overhead(
1958 ipr
->ipr_nonhash
[IPSEC_AF_V6
], ipsec_ovhd
);
1959 for (i
= 0; i
< ipr
->ipr_nchains
; i
++) {
1960 ipsec_ovhd
= iptun_max_policy_overhead(
1961 ipr
->ipr_hash
[i
].hash_head
, ipsec_ovhd
);
1963 rw_exit(&iph
->iph_lock
);
1966 return (ipsec_ovhd
);
1970 * Calculate and return the maximum possible upper MTU for the given tunnel.
1972 * If new_pmtu is set then we also need to update the lower path MTU information
1973 * in the ip_xmit_attr_t. That is needed since we set IXAF_VERIFY_PMTU so that
1974 * we are notified by conn_ip_output() when the path MTU increases.
1977 iptun_get_maxmtu(iptun_t
*iptun
, ip_xmit_attr_t
*ixa
, uint32_t new_pmtu
)
1979 size_t header_size
, ipsec_overhead
;
1980 uint32_t maxmtu
, pmtu
;
1983 * Start with the path-MTU to the remote address, which is either
1984 * provided as the new_pmtu argument, or obtained using
1985 * iptun_get_dst_pmtu().
1987 if (new_pmtu
!= 0) {
1988 if (iptun
->iptun_flags
& IPTUN_RADDR
)
1989 iptun
->iptun_dpmtu
= new_pmtu
;
1991 } else if (iptun
->iptun_flags
& IPTUN_RADDR
) {
1992 if ((pmtu
= iptun_get_dst_pmtu(iptun
, ixa
)) == 0) {
1994 * We weren't able to obtain the path-MTU of the
1995 * destination. Use the previous value.
1997 pmtu
= iptun
->iptun_dpmtu
;
1999 iptun
->iptun_dpmtu
= pmtu
;
2003 * We have no path-MTU information to go on, use the maximum
2006 pmtu
= iptun
->iptun_typeinfo
->iti_maxmtu
;
2010 * Now calculate tunneling overhead and subtract that from the
2011 * path-MTU information obtained above.
2013 if (iptun
->iptun_header_size
!= 0) {
2014 header_size
= iptun
->iptun_header_size
;
2016 switch (iptun
->iptun_typeinfo
->iti_ipvers
) {
2018 header_size
= sizeof (ipha_t
);
2019 if (is_system_labeled())
2020 header_size
+= IP_MAX_OPT_LENGTH
;
2023 header_size
= sizeof (iptun_ipv6hdrs_t
);
2028 ipsec_overhead
= iptun_get_ipsec_overhead(iptun
);
2030 maxmtu
= pmtu
- (header_size
+ ipsec_overhead
);
2031 return (max(maxmtu
, iptun
->iptun_typeinfo
->iti_minmtu
));
2035 * Re-calculate the tunnel's MTU as seen from above and notify the MAC layer
2036 * of any change in MTU. The new_pmtu argument is the new lower path MTU to
2037 * the tunnel destination to be used in the tunnel MTU calculation. Passing
2038 * in 0 for new_pmtu causes the lower path MTU to be dynamically updated using
2041 * If the calculated tunnel MTU is different than its previous value, then we
2042 * notify the MAC layer above us of this change using mac_maxsdu_update().
2045 iptun_update_mtu(iptun_t
*iptun
, ip_xmit_attr_t
*ixa
, uint32_t new_pmtu
)
2049 /* We always update the ixa since we might have set IXAF_VERIFY_PMTU */
2050 iptun_update_dst_pmtu(iptun
, ixa
);
2053 * We return the current MTU without updating it if it was pegged to a
2054 * static value using the MAC_PROP_MTU link property.
2056 if (iptun
->iptun_flags
& IPTUN_FIXED_MTU
)
2057 return (iptun
->iptun_mtu
);
2059 /* If the MTU isn't fixed, then use the maximum possible value. */
2060 newmtu
= iptun_get_maxmtu(iptun
, ixa
, new_pmtu
);
2062 * We only dynamically adjust the tunnel MTU for tunnels with
2063 * destinations because dynamic MTU calculations are based on the
2064 * destination path-MTU.
2066 if ((iptun
->iptun_flags
& IPTUN_RADDR
) && newmtu
!= iptun
->iptun_mtu
) {
2067 iptun
->iptun_mtu
= newmtu
;
2068 if (iptun
->iptun_flags
& IPTUN_MAC_REGISTERED
)
2069 iptun_task_dispatch(iptun
, IPTUN_TASK_MTU_UPDATE
);
2076 * Frees a packet or packet chain and bumps stat for each freed packet.
2079 iptun_drop_pkt(mblk_t
*mp
, uint64_t *stat
)
2083 for (pktmp
= mp
; pktmp
!= NULL
; pktmp
= mp
) {
2085 pktmp
->b_next
= NULL
;
2087 atomic_inc_64(stat
);
2093 * Allocate and return a new mblk to hold an IP and ICMP header, and chain the
2094 * original packet to its b_cont. Returns NULL on failure.
2097 iptun_build_icmperr(size_t hdrs_size
, mblk_t
*orig_pkt
)
2101 if ((icmperr_mp
= allocb(hdrs_size
, BPRI_MED
)) != NULL
) {
2102 icmperr_mp
->b_wptr
+= hdrs_size
;
2103 /* tack on the offending packet */
2104 icmperr_mp
->b_cont
= orig_pkt
;
2106 return (icmperr_mp
);
2110 * Transmit an ICMP error. mp->b_rptr points at the packet to be included in
2114 iptun_sendicmp_v4(iptun_t
*iptun
, icmph_t
*icmp
, ipha_t
*orig_ipha
, mblk_t
*mp
,
2117 size_t orig_pktsize
, hdrs_size
;
2121 ip_xmit_attr_t ixas
;
2122 conn_t
*connp
= iptun
->iptun_connp
;
2124 orig_pktsize
= msgdsize(mp
);
2125 hdrs_size
= sizeof (ipha_t
) + sizeof (icmph_t
);
2126 if ((icmperr_mp
= iptun_build_icmperr(hdrs_size
, mp
)) == NULL
) {
2127 iptun_drop_pkt(mp
, &iptun
->iptun_noxmtbuf
);
2131 new_ipha
= (ipha_t
*)icmperr_mp
->b_rptr
;
2132 new_icmp
= (icmph_t
*)(new_ipha
+ 1);
2134 new_ipha
->ipha_version_and_hdr_length
= IP_SIMPLE_HDR_VERSION
;
2135 new_ipha
->ipha_type_of_service
= 0;
2136 new_ipha
->ipha_ident
= 0;
2137 new_ipha
->ipha_fragment_offset_and_flags
= 0;
2138 new_ipha
->ipha_ttl
= orig_ipha
->ipha_ttl
;
2139 new_ipha
->ipha_protocol
= IPPROTO_ICMP
;
2140 new_ipha
->ipha_src
= orig_ipha
->ipha_dst
;
2141 new_ipha
->ipha_dst
= orig_ipha
->ipha_src
;
2142 new_ipha
->ipha_hdr_checksum
= 0; /* will be computed by ip */
2143 new_ipha
->ipha_length
= htons(hdrs_size
+ orig_pktsize
);
2146 new_icmp
->icmph_checksum
= 0;
2147 new_icmp
->icmph_checksum
= IP_CSUM(icmperr_mp
, sizeof (ipha_t
), 0);
2149 bzero(&ixas
, sizeof (ixas
));
2150 ixas
.ixa_flags
= IXAF_BASIC_SIMPLE_V4
;
2151 if (new_ipha
->ipha_src
== INADDR_ANY
)
2152 ixas
.ixa_flags
|= IXAF_SET_SOURCE
;
2154 ixas
.ixa_zoneid
= IPCL_ZONEID(connp
);
2155 ixas
.ixa_ipst
= connp
->conn_netstack
->netstack_ip
;
2156 ixas
.ixa_cred
= connp
->conn_cred
;
2157 ixas
.ixa_cpid
= NOPID
;
2158 if (is_system_labeled())
2161 ixas
.ixa_ifindex
= 0;
2162 ixas
.ixa_multicast_ttl
= IP_DEFAULT_MULTICAST_TTL
;
2164 (void) ip_output_simple(icmperr_mp
, &ixas
);
2169 iptun_sendicmp_v6(iptun_t
*iptun
, icmp6_t
*icmp6
, ip6_t
*orig_ip6h
, mblk_t
*mp
,
2172 size_t orig_pktsize
, hdrs_size
;
2173 mblk_t
*icmp6err_mp
;
2176 ip_xmit_attr_t ixas
;
2177 conn_t
*connp
= iptun
->iptun_connp
;
2179 orig_pktsize
= msgdsize(mp
);
2180 hdrs_size
= sizeof (ip6_t
) + sizeof (icmp6_t
);
2181 if ((icmp6err_mp
= iptun_build_icmperr(hdrs_size
, mp
)) == NULL
) {
2182 iptun_drop_pkt(mp
, &iptun
->iptun_noxmtbuf
);
2186 new_ip6h
= (ip6_t
*)icmp6err_mp
->b_rptr
;
2187 new_icmp6
= (icmp6_t
*)(new_ip6h
+ 1);
2189 new_ip6h
->ip6_vcf
= orig_ip6h
->ip6_vcf
;
2190 new_ip6h
->ip6_plen
= htons(sizeof (icmp6_t
) + orig_pktsize
);
2191 new_ip6h
->ip6_hops
= orig_ip6h
->ip6_hops
;
2192 new_ip6h
->ip6_nxt
= IPPROTO_ICMPV6
;
2193 new_ip6h
->ip6_src
= orig_ip6h
->ip6_dst
;
2194 new_ip6h
->ip6_dst
= orig_ip6h
->ip6_src
;
2196 *new_icmp6
= *icmp6
;
2197 /* The checksum is calculated in ip_output_simple and friends. */
2198 new_icmp6
->icmp6_cksum
= new_ip6h
->ip6_plen
;
2200 bzero(&ixas
, sizeof (ixas
));
2201 ixas
.ixa_flags
= IXAF_BASIC_SIMPLE_V6
;
2202 if (IN6_IS_ADDR_UNSPECIFIED(&new_ip6h
->ip6_src
))
2203 ixas
.ixa_flags
|= IXAF_SET_SOURCE
;
2205 ixas
.ixa_zoneid
= IPCL_ZONEID(connp
);
2206 ixas
.ixa_ipst
= connp
->conn_netstack
->netstack_ip
;
2207 ixas
.ixa_cred
= connp
->conn_cred
;
2208 ixas
.ixa_cpid
= NOPID
;
2209 if (is_system_labeled())
2212 ixas
.ixa_ifindex
= 0;
2213 ixas
.ixa_multicast_ttl
= IP_DEFAULT_MULTICAST_TTL
;
2215 (void) ip_output_simple(icmp6err_mp
, &ixas
);
2220 iptun_icmp_error_v4(iptun_t
*iptun
, ipha_t
*orig_ipha
, mblk_t
*mp
,
2221 uint8_t type
, uint8_t code
, ts_label_t
*tsl
)
2225 bzero(&icmp
, sizeof (icmp
));
2226 icmp
.icmph_type
= type
;
2227 icmp
.icmph_code
= code
;
2229 iptun_sendicmp_v4(iptun
, &icmp
, orig_ipha
, mp
, tsl
);
2233 iptun_icmp_fragneeded_v4(iptun_t
*iptun
, uint32_t newmtu
, ipha_t
*orig_ipha
,
2234 mblk_t
*mp
, ts_label_t
*tsl
)
2238 icmp
.icmph_type
= ICMP_DEST_UNREACHABLE
;
2239 icmp
.icmph_code
= ICMP_FRAGMENTATION_NEEDED
;
2240 icmp
.icmph_du_zero
= 0;
2241 icmp
.icmph_du_mtu
= htons(newmtu
);
2243 iptun_sendicmp_v4(iptun
, &icmp
, orig_ipha
, mp
, tsl
);
2247 iptun_icmp_error_v6(iptun_t
*iptun
, ip6_t
*orig_ip6h
, mblk_t
*mp
,
2248 uint8_t type
, uint8_t code
, uint32_t offset
, ts_label_t
*tsl
)
2252 bzero(&icmp6
, sizeof (icmp6
));
2253 icmp6
.icmp6_type
= type
;
2254 icmp6
.icmp6_code
= code
;
2255 if (type
== ICMP6_PARAM_PROB
)
2256 icmp6
.icmp6_pptr
= htonl(offset
);
2258 iptun_sendicmp_v6(iptun
, &icmp6
, orig_ip6h
, mp
, tsl
);
2262 iptun_icmp_toobig_v6(iptun_t
*iptun
, uint32_t newmtu
, ip6_t
*orig_ip6h
,
2263 mblk_t
*mp
, ts_label_t
*tsl
)
2267 icmp6
.icmp6_type
= ICMP6_PACKET_TOO_BIG
;
2268 icmp6
.icmp6_code
= 0;
2269 icmp6
.icmp6_mtu
= htonl(newmtu
);
2271 iptun_sendicmp_v6(iptun
, &icmp6
, orig_ip6h
, mp
, tsl
);
2275 * Determines if the packet pointed to by ipha or ip6h is an ICMP error. The
2276 * mp argument is only used to do bounds checking.
2279 is_icmp_error(mblk_t
*mp
, ipha_t
*ipha
, ip6_t
*ip6h
)
2286 ASSERT(ip6h
== NULL
);
2287 if (ipha
->ipha_protocol
!= IPPROTO_ICMP
)
2290 hlen
= IPH_HDR_LENGTH(ipha
);
2291 icmph
= (icmph_t
*)((uint8_t *)ipha
+ hlen
);
2292 return (ICMP_IS_ERROR(icmph
->icmph_type
) ||
2293 icmph
->icmph_type
== ICMP_REDIRECT
);
2298 ASSERT(ip6h
!= NULL
);
2299 if (!ip_hdr_length_nexthdr_v6(mp
, ip6h
, &hlen
, &nexthdrp
) ||
2300 *nexthdrp
!= IPPROTO_ICMPV6
) {
2304 icmp6
= (icmp6_t
*)((uint8_t *)ip6h
+ hlen
);
2305 return (ICMP6_IS_ERROR(icmp6
->icmp6_type
) ||
2306 icmp6
->icmp6_type
== ND_REDIRECT
);
2311 * Find inner and outer IP headers from a tunneled packet as setup for calls
2312 * into ipsec_tun_{in,out}bound().
2313 * Note that we need to allow the outer header to be in a separate mblk from
2315 * If the caller knows the outer_hlen, the caller passes it in. Otherwise zero.
2318 iptun_find_headers(mblk_t
*mp
, size_t outer_hlen
, ipha_t
**outer4
,
2319 ipha_t
**inner4
, ip6_t
**outer6
, ip6_t
**inner6
)
2322 size_t first_mblkl
= MBLKL(mp
);
2326 * Don't bother handling packets that don't have a full IP header in
2327 * the fist mblk. For the input path, the ip module ensures that this
2328 * won't happen, and on the output path, the IP tunneling MAC-type
2329 * plugins ensure that this also won't happen.
2331 if (first_mblkl
< sizeof (ipha_t
))
2333 ipha
= (ipha_t
*)(mp
->b_rptr
);
2334 switch (IPH_HDR_VERSION(ipha
)) {
2338 if (outer_hlen
== 0)
2339 outer_hlen
= IPH_HDR_LENGTH(ipha
);
2343 *outer6
= (ip6_t
*)ipha
;
2344 if (outer_hlen
== 0)
2345 outer_hlen
= ip_hdr_length_v6(mp
, (ip6_t
*)ipha
);
2351 if (first_mblkl
< outer_hlen
||
2352 (first_mblkl
== outer_hlen
&& mp
->b_cont
== NULL
))
2356 * We don't bother doing a pullup here since the outer header will
2357 * just get stripped off soon on input anyway. We just want to ensure
2358 * that the inner* pointer points to a full header.
2360 if (first_mblkl
== outer_hlen
) {
2361 inner_mp
= mp
->b_cont
;
2362 ipha
= (ipha_t
*)inner_mp
->b_rptr
;
2365 ipha
= (ipha_t
*)(mp
->b_rptr
+ outer_hlen
);
2367 switch (IPH_HDR_VERSION(ipha
)) {
2369 if (inner_mp
->b_wptr
- (uint8_t *)ipha
< sizeof (ipha_t
))
2375 if (inner_mp
->b_wptr
- (uint8_t *)ipha
< sizeof (ip6_t
))
2378 *inner6
= (ip6_t
*)ipha
;
2384 return (outer_hlen
);
2388 * Received ICMP error in response to an X over IPv4 packet that we
2391 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of
2394 * [IPv4(0)][ICMPv4][IPv4(1)][IPv4(2)][ULP]
2398 * [IPv4(0)][ICMPv4][IPv4(1)][IPv6][ULP]
2400 * And "outer4" will get set to IPv4(1), and inner[46] will correspond to
2401 * whatever the very-inner packet is (IPv4(2) or IPv6).
2404 iptun_input_icmp_v4(iptun_t
*iptun
, mblk_t
*data_mp
, icmph_t
*icmph
,
2405 ip_recv_attr_t
*ira
)
2408 ipha_t
*outer4
, *inner4
;
2409 ip6_t
*outer6
, *inner6
;
2413 ASSERT(data_mp
->b_cont
== NULL
);
2415 * Temporarily move b_rptr forward so that iptun_find_headers() can
2416 * find headers in the ICMP packet payload.
2418 orig
= data_mp
->b_rptr
;
2419 data_mp
->b_rptr
= (uint8_t *)(icmph
+ 1);
2421 * The ip module ensures that ICMP errors contain at least the
2422 * original IP header (otherwise, the error would never have made it
2425 ASSERT(MBLKL(data_mp
) >= 0);
2426 outer_hlen
= iptun_find_headers(data_mp
, 0, &outer4
, &inner4
, &outer6
,
2428 ASSERT(outer6
== NULL
);
2429 data_mp
->b_rptr
= orig
;
2430 if (outer_hlen
== 0) {
2431 iptun_drop_pkt(data_mp
, &iptun
->iptun_ierrors
);
2435 /* Only ICMP errors due to tunneled packets should reach here. */
2436 ASSERT(outer4
->ipha_protocol
== IPPROTO_ENCAP
||
2437 outer4
->ipha_protocol
== IPPROTO_IPV6
);
2439 data_mp
= ipsec_tun_inbound(ira
, data_mp
, iptun
->iptun_itp
,
2440 inner4
, inner6
, outer4
, outer6
, -outer_hlen
, iptun
->iptun_ns
);
2441 if (data_mp
== NULL
) {
2442 /* Callee did all of the freeing. */
2443 atomic_inc_64(&iptun
->iptun_ierrors
);
2446 /* We should never see reassembled fragment here. */
2447 ASSERT(data_mp
->b_next
== NULL
);
2449 data_mp
->b_rptr
= (uint8_t *)outer4
+ outer_hlen
;
2452 * If the original packet being transmitted was itself an ICMP error,
2453 * then drop this packet. We don't want to generate an ICMP error in
2454 * response to an ICMP error.
2456 if (is_icmp_error(data_mp
, inner4
, inner6
)) {
2457 iptun_drop_pkt(data_mp
, &iptun
->iptun_norcvbuf
);
2461 switch (icmph
->icmph_type
) {
2462 case ICMP_DEST_UNREACHABLE
:
2463 type
= (inner4
!= NULL
? icmph
->icmph_type
: ICMP6_DST_UNREACH
);
2464 switch (icmph
->icmph_code
) {
2465 case ICMP_FRAGMENTATION_NEEDED
: {
2469 * We reconcile this with the fact that the tunnel may
2470 * also have IPsec policy by letting iptun_update_mtu
2473 newmtu
= iptun_update_mtu(iptun
, NULL
,
2474 ntohs(icmph
->icmph_du_mtu
));
2476 if (inner4
!= NULL
) {
2477 iptun_icmp_fragneeded_v4(iptun
, newmtu
, inner4
,
2478 data_mp
, ira
->ira_tsl
);
2480 iptun_icmp_toobig_v6(iptun
, newmtu
, inner6
,
2481 data_mp
, ira
->ira_tsl
);
2485 case ICMP_DEST_NET_UNREACH_ADMIN
:
2486 case ICMP_DEST_HOST_UNREACH_ADMIN
:
2487 code
= (inner4
!= NULL
? ICMP_DEST_NET_UNREACH_ADMIN
:
2488 ICMP6_DST_UNREACH_ADMIN
);
2491 code
= (inner4
!= NULL
? ICMP_HOST_UNREACHABLE
:
2492 ICMP6_DST_UNREACH_ADDR
);
2496 case ICMP_TIME_EXCEEDED
:
2497 if (inner6
!= NULL
) {
2498 type
= ICMP6_TIME_EXCEEDED
;
2500 } /* else we're already set. */
2502 case ICMP_PARAM_PROBLEM
:
2504 * This is a problem with the outer header we transmitted.
2505 * Treat this as an output error.
2507 iptun_drop_pkt(data_mp
, &iptun
->iptun_oerrors
);
2510 iptun_drop_pkt(data_mp
, &iptun
->iptun_norcvbuf
);
2514 if (inner4
!= NULL
) {
2515 iptun_icmp_error_v4(iptun
, inner4
, data_mp
, type
, code
,
2518 iptun_icmp_error_v6(iptun
, inner6
, data_mp
, type
, code
, 0,
2524 * Return B_TRUE if the IPv6 packet pointed to by ip6h contains a Tunnel
2525 * Encapsulation Limit destination option. If there is one, set encaplim_ptr
2526 * to point to the option value.
2529 iptun_find_encaplimit(mblk_t
*mp
, ip6_t
*ip6h
, uint8_t **encaplim_ptr
)
2534 struct ip6_opt
*optp
;
2536 pkt
.ipp_fields
= 0; /* must be initialized */
2537 (void) ip_find_hdr_v6(mp
, ip6h
, B_FALSE
, &pkt
, NULL
);
2538 if ((pkt
.ipp_fields
& IPPF_DSTOPTS
) != 0) {
2539 destp
= pkt
.ipp_dstopts
;
2540 } else if ((pkt
.ipp_fields
& IPPF_RTHDRDSTOPTS
) != 0) {
2541 destp
= pkt
.ipp_rthdrdstopts
;
2546 endptr
= (uint8_t *)destp
+ 8 * (destp
->ip6d_len
+ 1);
2547 optp
= (struct ip6_opt
*)(destp
+ 1);
2548 while (endptr
- (uint8_t *)optp
> sizeof (*optp
)) {
2549 if (optp
->ip6o_type
== IP6OPT_TUNNEL_LIMIT
) {
2550 if ((uint8_t *)(optp
+ 1) >= endptr
)
2552 *encaplim_ptr
= (uint8_t *)&optp
[1];
2555 optp
= (struct ip6_opt
*)((uint8_t *)optp
+ optp
->ip6o_len
+ 2);
2561 * Received ICMPv6 error in response to an X over IPv6 packet that we
2564 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of
2567 * [IPv6(0)][ICMPv6][IPv6(1)][IPv4][ULP]
2571 * [IPv6(0)][ICMPv6][IPv6(1)][IPv6(2)][ULP]
2573 * And "outer6" will get set to IPv6(1), and inner[46] will correspond to
2574 * whatever the very-inner packet is (IPv4 or IPv6(2)).
2577 iptun_input_icmp_v6(iptun_t
*iptun
, mblk_t
*data_mp
, icmp6_t
*icmp6h
,
2578 ip_recv_attr_t
*ira
)
2581 ipha_t
*outer4
, *inner4
;
2582 ip6_t
*outer6
, *inner6
;
2586 ASSERT(data_mp
->b_cont
== NULL
);
2589 * Temporarily move b_rptr forward so that iptun_find_headers() can
2590 * find IP headers in the ICMP packet payload.
2592 orig
= data_mp
->b_rptr
;
2593 data_mp
->b_rptr
= (uint8_t *)(icmp6h
+ 1);
2595 * The ip module ensures that ICMP errors contain at least the
2596 * original IP header (otherwise, the error would never have made it
2599 ASSERT(MBLKL(data_mp
) >= 0);
2600 outer_hlen
= iptun_find_headers(data_mp
, 0, &outer4
, &inner4
, &outer6
,
2602 ASSERT(outer4
== NULL
);
2603 data_mp
->b_rptr
= orig
; /* Restore r_ptr */
2604 if (outer_hlen
== 0) {
2605 iptun_drop_pkt(data_mp
, &iptun
->iptun_ierrors
);
2609 data_mp
= ipsec_tun_inbound(ira
, data_mp
, iptun
->iptun_itp
,
2610 inner4
, inner6
, outer4
, outer6
, -outer_hlen
, iptun
->iptun_ns
);
2611 if (data_mp
== NULL
) {
2612 /* Callee did all of the freeing. */
2613 atomic_inc_64(&iptun
->iptun_ierrors
);
2616 /* We should never see reassembled fragment here. */
2617 ASSERT(data_mp
->b_next
== NULL
);
2619 data_mp
->b_rptr
= (uint8_t *)outer6
+ outer_hlen
;
2622 * If the original packet being transmitted was itself an ICMP error,
2623 * then drop this packet. We don't want to generate an ICMP error in
2624 * response to an ICMP error.
2626 if (is_icmp_error(data_mp
, inner4
, inner6
)) {
2627 iptun_drop_pkt(data_mp
, &iptun
->iptun_norcvbuf
);
2631 switch (icmp6h
->icmp6_type
) {
2632 case ICMP6_PARAM_PROB
: {
2633 uint8_t *encaplim_ptr
;
2636 * If the ICMPv6 error points to a valid Tunnel Encapsulation
2637 * Limit option and the limit value is 0, then fall through
2638 * and send a host unreachable message. Otherwise, treat the
2639 * error as an output error, as there must have been a problem
2640 * with a packet we sent.
2642 if (!iptun_find_encaplimit(data_mp
, outer6
, &encaplim_ptr
) ||
2643 (icmp6h
->icmp6_pptr
!=
2644 ((ptrdiff_t)encaplim_ptr
- (ptrdiff_t)outer6
)) ||
2645 *encaplim_ptr
!= 0) {
2646 iptun_drop_pkt(data_mp
, &iptun
->iptun_oerrors
);
2651 case ICMP6_TIME_EXCEEDED
:
2652 case ICMP6_DST_UNREACH
:
2653 type
= (inner4
!= NULL
? ICMP_DEST_UNREACHABLE
:
2655 code
= (inner4
!= NULL
? ICMP_HOST_UNREACHABLE
:
2656 ICMP6_DST_UNREACH_ADDR
);
2658 case ICMP6_PACKET_TOO_BIG
: {
2662 * We reconcile this with the fact that the tunnel may also
2663 * have IPsec policy by letting iptun_update_mtu take care of
2666 newmtu
= iptun_update_mtu(iptun
, NULL
,
2667 ntohl(icmp6h
->icmp6_mtu
));
2669 if (inner4
!= NULL
) {
2670 iptun_icmp_fragneeded_v4(iptun
, newmtu
, inner4
,
2671 data_mp
, ira
->ira_tsl
);
2673 iptun_icmp_toobig_v6(iptun
, newmtu
, inner6
, data_mp
,
2679 iptun_drop_pkt(data_mp
, &iptun
->iptun_norcvbuf
);
2683 if (inner4
!= NULL
) {
2684 iptun_icmp_error_v4(iptun
, inner4
, data_mp
, type
, code
,
2687 iptun_icmp_error_v6(iptun
, inner6
, data_mp
, type
, code
, 0,
2693 * Called as conn_recvicmp from IP for ICMP errors.
2697 iptun_input_icmp(void *arg
, mblk_t
*mp
, void *arg2
, ip_recv_attr_t
*ira
)
2699 conn_t
*connp
= arg
;
2700 iptun_t
*iptun
= connp
->conn_iptun
;
2704 ASSERT(IPCL_IS_IPTUN(connp
));
2706 if (mp
->b_cont
!= NULL
) {
2708 * Since ICMP error processing necessitates access to bits
2709 * that are within the ICMP error payload (the original packet
2710 * that caused the error), pull everything up into a single
2711 * block for convenience.
2713 if ((tmpmp
= msgpullup(mp
, -1)) == NULL
) {
2714 iptun_drop_pkt(mp
, &iptun
->iptun_norcvbuf
);
2721 hlen
= ira
->ira_ip_hdr_length
;
2722 switch (iptun
->iptun_typeinfo
->iti_ipvers
) {
2725 * The outer IP header coming up from IP is always ipha_t
2726 * alligned (otherwise, we would have crashed in ip).
2728 iptun_input_icmp_v4(iptun
, mp
, (icmph_t
*)(mp
->b_rptr
+ hlen
),
2732 iptun_input_icmp_v6(iptun
, mp
, (icmp6_t
*)(mp
->b_rptr
+ hlen
),
2739 iptun_in_6to4_ok(iptun_t
*iptun
, ipha_t
*outer4
, ip6_t
*inner6
)
2744 * It's possible that someone sent us an IPv4-in-IPv4 packet with the
2745 * IPv4 address of a 6to4 tunnel as the destination.
2751 * Make sure that the IPv6 destination is within the site that this
2752 * 6to4 tunnel is routing for. We don't want people bouncing random
2753 * tunneled IPv6 packets through this 6to4 router.
2755 IN6_6TO4_TO_V4ADDR(&inner6
->ip6_dst
, (struct in_addr
*)&v4addr
);
2756 if (outer4
->ipha_dst
!= v4addr
)
2759 if (IN6_IS_ADDR_6TO4(&inner6
->ip6_src
)) {
2761 * Section 9 of RFC 3056 (security considerations) suggests
2762 * that when a packet is from a 6to4 site (i.e., it's not a
2763 * global address being forwarded froma relay router), make
2764 * sure that the packet was tunneled by that site's 6to4
2767 IN6_6TO4_TO_V4ADDR(&inner6
->ip6_src
, (struct in_addr
*)&v4addr
);
2768 if (outer4
->ipha_src
!= v4addr
)
2772 * Only accept packets from a relay router if we've configured
2773 * outbound relay router functionality.
2775 if (iptun
->iptun_iptuns
->iptuns_relay_rtr_addr
== INADDR_ANY
)
2783 * Input function for everything that comes up from the ip module below us.
2784 * This is called directly from the ip module via connp->conn_recv().
2786 * We receive M_DATA messages with IP-in-IP tunneled packets.
2790 iptun_input(void *arg
, mblk_t
*data_mp
, void *arg2
, ip_recv_attr_t
*ira
)
2792 conn_t
*connp
= arg
;
2793 iptun_t
*iptun
= connp
->conn_iptun
;
2795 ipha_t
*outer4
, *inner4
;
2796 ip6_t
*outer6
, *inner6
;
2798 ASSERT(IPCL_IS_IPTUN(connp
));
2799 ASSERT(DB_TYPE(data_mp
) == M_DATA
);
2801 outer_hlen
= iptun_find_headers(data_mp
, ira
->ira_ip_hdr_length
,
2802 &outer4
, &inner4
, &outer6
, &inner6
);
2803 if (outer_hlen
== 0)
2807 * If the system is labeled, we call tsol_check_dest() on the packet
2808 * destination (our local tunnel address) to ensure that the packet as
2809 * labeled should be allowed to be sent to us. We don't need to call
2810 * the more involved tsol_receive_local() since the tunnel link itself
2811 * cannot be assigned to shared-stack non-global zones.
2813 if (ira
->ira_flags
& IRAF_SYSTEM_LABELED
) {
2814 if (ira
->ira_tsl
== NULL
)
2816 if (tsol_check_dest(ira
->ira_tsl
, (outer4
!= NULL
?
2817 (void *)&outer4
->ipha_dst
: (void *)&outer6
->ip6_dst
),
2818 (outer4
!= NULL
? IPV4_VERSION
: IPV6_VERSION
),
2819 CONN_MAC_DEFAULT
, B_FALSE
, NULL
) != 0)
2823 data_mp
= ipsec_tun_inbound(ira
, data_mp
, iptun
->iptun_itp
,
2824 inner4
, inner6
, outer4
, outer6
, outer_hlen
, iptun
->iptun_ns
);
2825 if (data_mp
== NULL
) {
2826 /* Callee did all of the freeing. */
2830 if (iptun
->iptun_typeinfo
->iti_type
== IPTUN_TYPE_6TO4
&&
2831 !iptun_in_6to4_ok(iptun
, outer4
, inner6
))
2835 * We need to statistically account for each packet individually, so
2836 * we might as well split up any b_next chains here.
2841 mp
= data_mp
->b_next
;
2842 data_mp
->b_next
= NULL
;
2844 atomic_inc_64(&iptun
->iptun_ipackets
);
2845 atomic_add_64(&iptun
->iptun_rbytes
, msgdsize(data_mp
));
2846 mac_rx(iptun
->iptun_mh
, NULL
, data_mp
);
2849 } while (data_mp
!= NULL
);
2852 iptun_drop_pkt(data_mp
, &iptun
->iptun_ierrors
);
2856 * Do 6to4-specific header-processing on output. Return B_TRUE if the packet
2857 * was processed without issue, or B_FALSE if the packet had issues and should
2861 iptun_out_process_6to4(iptun_t
*iptun
, ipha_t
*outer4
, ip6_t
*inner6
)
2866 * IPv6 source must be a 6to4 address. This is because a conscious
2867 * decision was made to not allow a Solaris system to be used as a
2868 * relay router (for security reasons) when 6to4 was initially
2869 * integrated. If this decision is ever reversed, the following check
2872 if (!IN6_IS_ADDR_6TO4(&inner6
->ip6_src
))
2876 * RFC3056 mandates that the IPv4 source MUST be set to the IPv4
2877 * portion of the 6to4 IPv6 source address. In other words, make sure
2878 * that we're tunneling packets from our own 6to4 site.
2880 IN6_6TO4_TO_V4ADDR(&inner6
->ip6_src
, (struct in_addr
*)&v4addr
);
2881 if (outer4
->ipha_src
!= v4addr
)
2885 * Automatically set the destination of the outer IPv4 header as
2886 * described in RFC3056. There are two possibilities:
2888 * a. If the IPv6 destination is a 6to4 address, set the IPv4 address
2889 * to the IPv4 portion of the 6to4 address.
2890 * b. If the IPv6 destination is a native IPv6 address, set the IPv4
2891 * destination to the address of a relay router.
2893 * Design Note: b shouldn't be necessary here, and this is a flaw in
2894 * the design of the 6to4relay command. Instead of setting a 6to4
2895 * relay address in this module via an ioctl, the 6to4relay command
2896 * could simply add a IPv6 route for native IPv6 addresses (such as a
2897 * default route) in the forwarding table that uses a 6to4 destination
2898 * as its next hop, and the IPv4 portion of that address could be a
2899 * 6to4 relay address. In order for this to work, IP would have to
2900 * resolve the next hop address, which would necessitate a link-layer
2901 * address resolver for 6to4 links, which doesn't exist today.
2903 * In fact, if a resolver existed for 6to4 links, then setting the
2904 * IPv4 destination in the outer header could be done as part of
2905 * link-layer address resolution and fast-path header generation, and
2908 if (IN6_IS_ADDR_6TO4(&inner6
->ip6_dst
)) {
2909 /* destination is a 6to4 router */
2910 IN6_6TO4_TO_V4ADDR(&inner6
->ip6_dst
,
2911 (struct in_addr
*)&outer4
->ipha_dst
);
2913 /* Reject attempts to send to INADDR_ANY */
2914 if (outer4
->ipha_dst
== INADDR_ANY
)
2918 * The destination is a native IPv6 address. If output to a
2919 * relay-router is enabled, use the relay-router's IPv4
2920 * address as the destination.
2922 if (iptun
->iptun_iptuns
->iptuns_relay_rtr_addr
== INADDR_ANY
)
2924 outer4
->ipha_dst
= iptun
->iptun_iptuns
->iptuns_relay_rtr_addr
;
2928 * If the outer source and destination are equal, this means that the
2929 * 6to4 router somehow forwarded an IPv6 packet destined for its own
2930 * 6to4 site to its 6to4 tunnel interface, which will result in this
2931 * packet infinitely bouncing between ip and iptun.
2933 return (outer4
->ipha_src
!= outer4
->ipha_dst
);
2937 * Process output packets with outer IPv4 headers. Frees mp and bumps stat on
2941 iptun_out_process_ipv4(iptun_t
*iptun
, mblk_t
*mp
, ipha_t
*outer4
,
2942 ipha_t
*inner4
, ip6_t
*inner6
, ip_xmit_attr_t
*ixa
)
2944 uint8_t *innerptr
= (inner4
!= NULL
?
2945 (uint8_t *)inner4
: (uint8_t *)inner6
);
2946 size_t minmtu
= iptun
->iptun_typeinfo
->iti_minmtu
;
2948 if (inner4
!= NULL
) {
2949 ASSERT(outer4
->ipha_protocol
== IPPROTO_ENCAP
);
2951 * Copy the tos from the inner IPv4 header. We mask off ECN
2952 * bits (bits 6 and 7) because there is currently no
2953 * tunnel-tunnel communication to determine if both sides
2954 * support ECN. We opt for the safe choice: don't copy the
2955 * ECN bits when doing encapsulation.
2957 outer4
->ipha_type_of_service
=
2958 inner4
->ipha_type_of_service
& ~0x03;
2960 ASSERT(outer4
->ipha_protocol
== IPPROTO_IPV6
&&
2963 if (ixa
->ixa_flags
& IXAF_PMTU_IPV4_DF
)
2964 outer4
->ipha_fragment_offset_and_flags
|= IPH_DF_HTONS
;
2966 outer4
->ipha_fragment_offset_and_flags
&= ~IPH_DF_HTONS
;
2969 * As described in section 3.2.2 of RFC4213, if the packet payload is
2970 * less than or equal to the minimum MTU size, then we need to allow
2971 * IPv4 to fragment the packet. The reason is that even if we end up
2972 * receiving an ICMP frag-needed, the interface above this tunnel
2973 * won't be allowed to drop its MTU as a result, since the packet was
2974 * already smaller than the smallest allowable MTU for that interface.
2976 if (mp
->b_wptr
- innerptr
<= minmtu
) {
2977 outer4
->ipha_fragment_offset_and_flags
= 0;
2978 ixa
->ixa_flags
&= ~IXAF_DONTFRAG
;
2979 } else if (!(ixa
->ixa_flags
& IXAF_PMTU_TOO_SMALL
) &&
2980 (iptun
->iptun_typeinfo
->iti_type
!= IPTUN_TYPE_6TO4
)) {
2981 ixa
->ixa_flags
|= IXAF_DONTFRAG
;
2984 ixa
->ixa_ip_hdr_length
= IPH_HDR_LENGTH(outer4
);
2985 ixa
->ixa_pktlen
= msgdsize(mp
);
2986 ixa
->ixa_protocol
= outer4
->ipha_protocol
;
2988 outer4
->ipha_length
= htons(ixa
->ixa_pktlen
);
2993 * Insert an encapsulation limit destination option in the packet provided.
2994 * Always consumes the mp argument and returns a new mblk pointer.
2997 iptun_insert_encaplimit(iptun_t
*iptun
, mblk_t
*mp
, ip6_t
*outer6
,
3001 iptun_ipv6hdrs_t
*newouter6
;
3003 ASSERT(outer6
->ip6_nxt
== IPPROTO_IPV6
);
3004 ASSERT(mp
->b_cont
== NULL
);
3006 mp
->b_rptr
+= sizeof (ip6_t
);
3007 newmp
= allocb(sizeof (iptun_ipv6hdrs_t
) + MBLKL(mp
), BPRI_MED
);
3008 if (newmp
== NULL
) {
3009 iptun_drop_pkt(mp
, &iptun
->iptun_noxmtbuf
);
3012 newmp
->b_wptr
+= sizeof (iptun_ipv6hdrs_t
);
3013 /* Copy the payload (Starting with the inner IPv6 header). */
3014 bcopy(mp
->b_rptr
, newmp
->b_wptr
, MBLKL(mp
));
3015 newmp
->b_wptr
+= MBLKL(mp
);
3016 newouter6
= (iptun_ipv6hdrs_t
*)newmp
->b_rptr
;
3017 /* Now copy the outer IPv6 header. */
3018 bcopy(outer6
, &newouter6
->it6h_ip6h
, sizeof (ip6_t
));
3019 newouter6
->it6h_ip6h
.ip6_nxt
= IPPROTO_DSTOPTS
;
3020 newouter6
->it6h_encaplim
= iptun_encaplim_init
;
3021 newouter6
->it6h_encaplim
.iel_destopt
.ip6d_nxt
= outer6
->ip6_nxt
;
3022 newouter6
->it6h_encaplim
.iel_telopt
.ip6ot_encap_limit
= limit
;
3025 * The payload length will be set at the end of
3026 * iptun_out_process_ipv6().
3034 * Process output packets with outer IPv6 headers. Frees mp and bumps stats
3038 iptun_out_process_ipv6(iptun_t
*iptun
, mblk_t
*mp
, ip6_t
*outer6
,
3039 ipha_t
*inner4
, ip6_t
*inner6
, ip_xmit_attr_t
*ixa
)
3041 uint8_t *innerptr
= (inner4
!= NULL
?
3042 (uint8_t *)inner4
: (uint8_t *)inner6
);
3043 size_t minmtu
= iptun
->iptun_typeinfo
->iti_minmtu
;
3044 uint8_t *limit
, *configlimit
;
3046 iptun_ipv6hdrs_t
*v6hdrs
;
3048 if (inner6
!= NULL
&& iptun_find_encaplimit(mp
, inner6
, &limit
)) {
3050 * The inner packet is an IPv6 packet which itself contains an
3051 * encapsulation limit option. The limit variable points to
3052 * the value in the embedded option. Process the
3053 * encapsulation limit option as specified in RFC 2473.
3055 * If limit is 0, then we've exceeded the limit and we need to
3056 * send back an ICMPv6 parameter problem message.
3058 * If limit is > 0, then we decrement it by 1 and make sure
3059 * that the encapsulation limit option in the outer header
3060 * reflects that (adding an option if one isn't already
3063 ASSERT(limit
> mp
->b_rptr
&& limit
< mp
->b_wptr
);
3065 mp
->b_rptr
= (uint8_t *)inner6
;
3066 offset
= limit
- mp
->b_rptr
;
3067 iptun_icmp_error_v6(iptun
, inner6
, mp
, ICMP6_PARAM_PROB
,
3068 0, offset
, ixa
->ixa_tsl
);
3069 atomic_inc_64(&iptun
->iptun_noxmtbuf
);
3074 * The outer header requires an encapsulation limit option.
3075 * If there isn't one already, add one.
3077 if (iptun
->iptun_encaplimit
== 0) {
3078 if ((mp
= iptun_insert_encaplimit(iptun
, mp
, outer6
,
3079 (*limit
- 1))) == NULL
)
3081 v6hdrs
= (iptun_ipv6hdrs_t
*)mp
->b_rptr
;
3084 * There is an existing encapsulation limit option in
3085 * the outer header. If the inner encapsulation limit
3086 * is less than the configured encapsulation limit,
3087 * update the outer encapsulation limit to reflect
3088 * this lesser value.
3090 v6hdrs
= (iptun_ipv6hdrs_t
*)mp
->b_rptr
;
3092 &v6hdrs
->it6h_encaplim
.iel_telopt
.ip6ot_encap_limit
;
3093 if ((*limit
- 1) < *configlimit
)
3094 *configlimit
= (*limit
- 1);
3096 ixa
->ixa_ip_hdr_length
= sizeof (iptun_ipv6hdrs_t
);
3097 ixa
->ixa_protocol
= v6hdrs
->it6h_encaplim
.iel_destopt
.ip6d_nxt
;
3099 ixa
->ixa_ip_hdr_length
= sizeof (ip6_t
);
3100 ixa
->ixa_protocol
= outer6
->ip6_nxt
;
3103 * See iptun_output_process_ipv4() why we allow fragmentation for
3106 if (mp
->b_wptr
- innerptr
<= minmtu
)
3107 ixa
->ixa_flags
&= ~IXAF_DONTFRAG
;
3108 else if (!(ixa
->ixa_flags
& IXAF_PMTU_TOO_SMALL
))
3109 ixa
->ixa_flags
|= IXAF_DONTFRAG
;
3111 ixa
->ixa_pktlen
= msgdsize(mp
);
3112 outer6
->ip6_plen
= htons(ixa
->ixa_pktlen
- sizeof (ip6_t
));
3117 * The IP tunneling MAC-type plugins have already done most of the header
3118 * processing and validity checks. We are simply responsible for multiplexing
3119 * down to the ip module below us.
3122 iptun_output(iptun_t
*iptun
, mblk_t
*mp
)
3124 conn_t
*connp
= iptun
->iptun_connp
;
3127 ip_xmit_attr_t
*ixa
;
3129 ASSERT(mp
->b_datap
->db_type
== M_DATA
);
3131 if (mp
->b_cont
!= NULL
) {
3132 if ((newmp
= msgpullup(mp
, -1)) == NULL
) {
3133 iptun_drop_pkt(mp
, &iptun
->iptun_noxmtbuf
);
3140 if (iptun
->iptun_typeinfo
->iti_type
== IPTUN_TYPE_6TO4
) {
3141 iptun_output_6to4(iptun
, mp
);
3145 if (is_system_labeled()) {
3147 * Since the label can be different meaning a potentially
3148 * different IRE,we always use a unique ip_xmit_attr_t.
3150 ixa
= conn_get_ixa_exclusive(connp
);
3153 * If no other thread is using conn_ixa this just gets a
3154 * reference to conn_ixa. Otherwise we get a safe copy of
3157 ixa
= conn_get_ixa(connp
, B_FALSE
);
3160 iptun_drop_pkt(mp
, &iptun
->iptun_oerrors
);
3165 * In case we got a safe copy of conn_ixa, then we need
3166 * to fill in any pointers in it.
3168 if (ixa
->ixa_ire
== NULL
) {
3169 error
= ip_attr_connect(connp
, ixa
, &connp
->conn_saddr_v6
,
3170 &connp
->conn_faddr_v6
, &connp
->conn_faddr_v6
, 0,
3173 if (ixa
->ixa_ire
!= NULL
&&
3174 (error
== EHOSTUNREACH
|| error
== ENETUNREACH
)) {
3176 * Let conn_ip_output/ire_send_noroute return
3177 * the error and send any local ICMP error.
3182 iptun_drop_pkt(mp
, &iptun
->iptun_oerrors
);
3188 iptun_output_common(iptun
, ixa
, mp
);
3193 * We use an ixa based on the last destination.
3196 iptun_output_6to4(iptun_t
*iptun
, mblk_t
*mp
)
3198 conn_t
*connp
= iptun
->iptun_connp
;
3199 ipha_t
*outer4
, *inner4
;
3200 ip6_t
*outer6
, *inner6
;
3201 ip_xmit_attr_t
*ixa
;
3202 ip_xmit_attr_t
*oldixa
;
3204 boolean_t need_connect
;
3207 ASSERT(mp
->b_cont
== NULL
); /* Verified by iptun_output */
3209 /* Make sure we set ipha_dst before we look at ipha_dst */
3211 (void) iptun_find_headers(mp
, 0, &outer4
, &inner4
, &outer6
, &inner6
);
3212 ASSERT(outer4
!= NULL
);
3213 if (!iptun_out_process_6to4(iptun
, outer4
, inner6
)) {
3214 iptun_drop_pkt(mp
, &iptun
->iptun_oerrors
);
3218 if (is_system_labeled()) {
3220 * Since the label can be different meaning a potentially
3221 * different IRE,we always use a unique ip_xmit_attr_t.
3223 ixa
= conn_get_ixa_exclusive(connp
);
3226 * If no other thread is using conn_ixa this just gets a
3227 * reference to conn_ixa. Otherwise we get a safe copy of
3230 ixa
= conn_get_ixa(connp
, B_FALSE
);
3233 iptun_drop_pkt(mp
, &iptun
->iptun_oerrors
);
3237 mutex_enter(&connp
->conn_lock
);
3238 if (connp
->conn_v4lastdst
== outer4
->ipha_dst
) {
3239 need_connect
= (ixa
->ixa_ire
== NULL
);
3241 /* In case previous destination was multirt */
3242 ip_attr_newdst(ixa
);
3245 * We later update conn_ixa when we update conn_v4lastdst
3246 * which enables subsequent packets to avoid redoing
3249 need_connect
= B_TRUE
;
3251 mutex_exit(&connp
->conn_lock
);
3254 * In case we got a safe copy of conn_ixa, or otherwise we don't
3255 * have a current ixa_ire, then we need to fill in any pointers in
3259 IN6_IPADDR_TO_V4MAPPED(outer4
->ipha_dst
, &v6dst
);
3261 /* We handle IPsec in iptun_output_common */
3262 error
= ip_attr_connect(connp
, ixa
, &connp
->conn_saddr_v6
,
3263 &v6dst
, &v6dst
, 0, NULL
, NULL
, 0);
3265 if (ixa
->ixa_ire
!= NULL
&&
3266 (error
== EHOSTUNREACH
|| error
== ENETUNREACH
)) {
3268 * Let conn_ip_output/ire_send_noroute return
3269 * the error and send any local ICMP error.
3274 iptun_drop_pkt(mp
, &iptun
->iptun_oerrors
);
3280 iptun_output_common(iptun
, ixa
, mp
);
3282 /* Atomically replace conn_ixa and conn_v4lastdst */
3283 mutex_enter(&connp
->conn_lock
);
3284 if (connp
->conn_v4lastdst
!= outer4
->ipha_dst
) {
3285 /* Remember the dst which corresponds to conn_ixa */
3286 connp
->conn_v6lastdst
= v6dst
;
3287 oldixa
= conn_replace_ixa(connp
, ixa
);
3291 mutex_exit(&connp
->conn_lock
);
3294 ixa_refrele(oldixa
);
3298 * Check the destination/label. Modifies *mpp by adding/removing CIPSO.
3300 * We get the label from the message in order to honor the
3301 * ULPs/IPs choice of label. This will be NULL for forwarded
3302 * packets, neighbor discovery packets and some others.
3305 iptun_output_check_label(mblk_t
**mpp
, ip_xmit_attr_t
*ixa
)
3311 ts_label_t
*effective_tsl
= NULL
;
3314 ASSERT(is_system_labeled());
3316 cr
= msg_getcred(*mpp
, NULL
);
3321 * We need to start with a label based on the IP/ULP above us
3323 ip_xmit_attr_restore_tsl(ixa
, cr
);
3326 * Need to update packet with any CIPSO option since
3327 * conn_ip_output doesn't do that.
3329 if (ixa
->ixa_flags
& IXAF_IS_IPV4
) {
3332 ipha
= (ipha_t
*)(*mpp
)->b_rptr
;
3333 iplen
= ntohs(ipha
->ipha_length
);
3334 err
= tsol_check_label_v4(ixa
->ixa_tsl
,
3335 ixa
->ixa_zoneid
, mpp
, CONN_MAC_DEFAULT
, B_FALSE
,
3336 ixa
->ixa_ipst
, &effective_tsl
);
3340 ipha
= (ipha_t
*)(*mpp
)->b_rptr
;
3341 adjust
= (int)ntohs(ipha
->ipha_length
) - iplen
;
3345 ip6h
= (ip6_t
*)(*mpp
)->b_rptr
;
3346 iplen
= ntohs(ip6h
->ip6_plen
);
3348 err
= tsol_check_label_v6(ixa
->ixa_tsl
,
3349 ixa
->ixa_zoneid
, mpp
, CONN_MAC_DEFAULT
, B_FALSE
,
3350 ixa
->ixa_ipst
, &effective_tsl
);
3354 ip6h
= (ip6_t
*)(*mpp
)->b_rptr
;
3355 adjust
= (int)ntohs(ip6h
->ip6_plen
) - iplen
;
3358 if (effective_tsl
!= NULL
) {
3359 /* Update the label */
3360 ip_xmit_attr_replace_tsl(ixa
, effective_tsl
);
3362 ixa
->ixa_pktlen
+= adjust
;
3363 ixa
->ixa_ip_hdr_length
+= adjust
;
3369 iptun_output_common(iptun_t
*iptun
, ip_xmit_attr_t
*ixa
, mblk_t
*mp
)
3371 ipsec_tun_pol_t
*itp
= iptun
->iptun_itp
;
3374 ipha_t
*outer4
, *inner4
;
3375 ip6_t
*outer6
, *inner6
;
3377 boolean_t update_pktlen
;
3379 ASSERT(ixa
->ixa_ire
!= NULL
);
3381 outer_hlen
= iptun_find_headers(mp
, 0, &outer4
, &inner4
, &outer6
,
3383 if (outer_hlen
== 0) {
3384 iptun_drop_pkt(mp
, &iptun
->iptun_oerrors
);
3388 /* Save IXAF_DONTFRAG value */
3389 iaflags_t dontfrag
= ixa
->ixa_flags
& IXAF_DONTFRAG
;
3391 /* Perform header processing. */
3392 if (outer4
!= NULL
) {
3393 mp
= iptun_out_process_ipv4(iptun
, mp
, outer4
, inner4
, inner6
,
3396 mp
= iptun_out_process_ipv6(iptun
, mp
, outer6
, inner4
, inner6
,
3403 * Let's hope the compiler optimizes this with "branch taken".
3405 if (itp
!= NULL
&& (itp
->itp_flags
& ITPF_P_ACTIVE
)) {
3406 /* This updates the ip_xmit_attr_t */
3407 mp
= ipsec_tun_outbound(mp
, iptun
, inner4
, inner6
, outer4
,
3408 outer6
, outer_hlen
, ixa
);
3410 atomic_inc_64(&iptun
->iptun_oerrors
);
3413 if (is_system_labeled()) {
3415 * Might change the packet by adding/removing CIPSO.
3416 * After this caller inner* and outer* and outer_hlen
3419 error
= iptun_output_check_label(&mp
, ixa
);
3421 ip2dbg(("label check failed (%d)\n", error
));
3422 iptun_drop_pkt(mp
, &iptun
->iptun_oerrors
);
3428 * ipsec_tun_outbound() returns a chain of tunneled IP
3429 * fragments linked with b_next (or a single message if the
3430 * tunneled packet wasn't a fragment).
3431 * If fragcache returned a list then we need to update
3432 * ixa_pktlen for all packets in the list.
3434 update_pktlen
= (mp
->b_next
!= NULL
);
3437 * Otherwise, we're good to go. The ixa has been updated with
3438 * instructions for outbound IPsec processing.
3440 for (newmp
= mp
; newmp
!= NULL
; newmp
= mp
) {
3441 size_t minmtu
= iptun
->iptun_typeinfo
->iti_minmtu
;
3443 atomic_inc_64(&iptun
->iptun_opackets
);
3444 atomic_add_64(&iptun
->iptun_obytes
, ixa
->ixa_pktlen
);
3446 newmp
->b_next
= NULL
;
3449 * The IXAF_DONTFRAG flag is global, but there is
3450 * a chain here. Check if we're really already
3451 * smaller than the minimum allowed MTU and reset here
3452 * appropriately. Otherwise one small packet can kill
3453 * the whole chain's path mtu discovery.
3454 * In addition, update the pktlen to the length of
3455 * the actual packet being processed.
3457 if (update_pktlen
) {
3458 ixa
->ixa_pktlen
= msgdsize(newmp
);
3459 if (ixa
->ixa_pktlen
<= minmtu
)
3460 ixa
->ixa_flags
&= ~IXAF_DONTFRAG
;
3463 atomic_inc_64(&iptun
->iptun_opackets
);
3464 atomic_add_64(&iptun
->iptun_obytes
, ixa
->ixa_pktlen
);
3466 error
= conn_ip_output(newmp
, ixa
);
3468 /* Restore IXAF_DONTFRAG value */
3469 ixa
->ixa_flags
|= dontfrag
;
3471 if (error
== EMSGSIZE
) {
3472 /* IPsec policy might have changed */
3473 (void) iptun_update_mtu(iptun
, ixa
, 0);
3478 * The ip module will potentially apply global policy to the
3479 * packet in its output path if there's no active tunnel
3482 ASSERT(ixa
->ixa_ipsec_policy
== NULL
);
3483 mp
= ip_output_attach_policy(mp
, outer4
, outer6
, NULL
, ixa
);
3485 atomic_inc_64(&iptun
->iptun_oerrors
);
3488 if (is_system_labeled()) {
3490 * Might change the packet by adding/removing CIPSO.
3491 * After this caller inner* and outer* and outer_hlen
3494 error
= iptun_output_check_label(&mp
, ixa
);
3496 ip2dbg(("label check failed (%d)\n", error
));
3497 iptun_drop_pkt(mp
, &iptun
->iptun_oerrors
);
3502 atomic_inc_64(&iptun
->iptun_opackets
);
3503 atomic_add_64(&iptun
->iptun_obytes
, ixa
->ixa_pktlen
);
3505 error
= conn_ip_output(mp
, ixa
);
3506 if (error
== EMSGSIZE
) {
3507 /* IPsec policy might have changed */
3508 (void) iptun_update_mtu(iptun
, ixa
, 0);
3511 if (ixa
->ixa_flags
& IXAF_IPSEC_SECURE
)
3512 ipsec_out_release_refs(ixa
);
3515 static mac_callbacks_t iptun_m_callbacks
= {
3516 .mc_callbacks
= (MC_SETPROP
| MC_GETPROP
),
3517 .mc_getstat
= iptun_m_getstat
,
3518 .mc_start
= iptun_m_start
,
3519 .mc_stop
= iptun_m_stop
,
3520 .mc_setpromisc
= iptun_m_setpromisc
,
3521 .mc_multicst
= iptun_m_multicst
,
3522 .mc_unicst
= iptun_m_unicst
,
3523 .mc_tx
= iptun_m_tx
,
3524 .mc_setprop
= iptun_m_setprop
,
3525 .mc_getprop
= iptun_m_getprop