4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2016, Joyent, Inc. All rights reserved.
27 * iptun - IP Tunneling Driver
29 * This module is a GLDv3 driver that implements virtual datalinks over IP
30 * (a.k.a, IP tunneling). The datalinks are managed through a dld ioctl
31 * interface (see iptun_ctl.c), and registered with GLDv3 using
32 * mac_register(). It implements the logic for various forms of IP (IPv4 or
33 * IPv6) encapsulation within IP (IPv4 or IPv6) by interacting with the ip
34 * module below it. Each virtual IP tunnel datalink has a conn_t associated
35 * with it representing the "outer" IP connection.
37 * The module implements the following locking semantics:
39 * Lookups and deletions in iptun_hash are synchronized using iptun_hash_lock.
40 * See comments above iptun_hash_lock for details.
42 * No locks are ever held while calling up to GLDv3. The general architecture
43 * of GLDv3 requires this, as the mac perimeter (essentially a lock) for a
44 * given link will be held while making downcalls (iptun_m_*() callbacks).
45 * Because we need to hold locks while handling downcalls, holding these locks
46 * while issuing upcalls results in deadlock scenarios. See the block comment
47 * above iptun_task_cb() for details on how we safely issue upcalls without
50 * The contents of each iptun_t is protected by an iptun_mutex which is held
51 * in iptun_enter() (called by iptun_enter_by_linkid()), and exited in
54 * See comments in iptun_delete() and iptun_free() for details on how the
55 * iptun_t is deleted safely.
58 #include <sys/types.h>
60 #include <sys/errno.h>
61 #include <sys/modhash.h>
63 #include <sys/strsun.h>
65 #include <sys/systm.h>
66 #include <sys/tihdr.h>
67 #include <sys/param.h>
68 #include <sys/mac_provider.h>
69 #include <sys/mac_ipv4.h>
70 #include <sys/mac_ipv6.h>
71 #include <sys/mac_6to4.h>
72 #include <sys/sunldi.h>
73 #include <netinet/in.h>
74 #include <netinet/ip6.h>
76 #include <inet/ip_ire.h>
77 #include <inet/ipsec_impl.h>
78 #include <inet/iptun.h>
79 #include <inet/iptun/iptun_impl.h>
81 /* Do the tunnel type and address family match? */
82 #define IPTUN_ADDR_MATCH(iptun_type, family) \
83 ((iptun_type == IPTUN_TYPE_IPV4 && family == AF_INET) || \
84 (iptun_type == IPTUN_TYPE_IPV6 && family == AF_INET6) || \
85 (iptun_type == IPTUN_TYPE_6TO4 && family == AF_INET))
87 #define IPTUN_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key))
89 #define IPTUN_MIN_IPV4_MTU 576 /* ip.h still uses 68 (!) */
90 #define IPTUN_MIN_IPV6_MTU IPV6_MIN_MTU
91 #define IPTUN_MAX_IPV4_MTU (IP_MAXPACKET - sizeof (ipha_t))
92 #define IPTUN_MAX_IPV6_MTU (IP_MAXPACKET - sizeof (ip6_t) - \
93 sizeof (iptun_encaplim_t))
95 #define IPTUN_MIN_HOPLIMIT 1
96 #define IPTUN_MAX_HOPLIMIT UINT8_MAX
98 #define IPTUN_MIN_ENCAPLIMIT 0
99 #define IPTUN_MAX_ENCAPLIMIT UINT8_MAX
101 #define IPTUN_IPSEC_REQ_MASK (IPSEC_PREF_REQUIRED | IPSEC_PREF_NEVER)
103 static iptun_encaplim_t iptun_encaplim_init
= {
107 IPTUN_DEFAULT_ENCAPLIMIT
, /* filled in with actual value later */
114 * Table containing per-iptun-type information.
115 * Since IPv6 can run over all of these we have the IPv6 min as the min MTU.
117 static iptun_typeinfo_t iptun_type_table
[] = {
118 { IPTUN_TYPE_IPV4
, MAC_PLUGIN_IDENT_IPV4
, IPV4_VERSION
,
119 IPTUN_MIN_IPV6_MTU
, IPTUN_MAX_IPV4_MTU
, B_TRUE
},
120 { IPTUN_TYPE_IPV6
, MAC_PLUGIN_IDENT_IPV6
, IPV6_VERSION
,
121 IPTUN_MIN_IPV6_MTU
, IPTUN_MAX_IPV6_MTU
, B_TRUE
},
122 { IPTUN_TYPE_6TO4
, MAC_PLUGIN_IDENT_6TO4
, IPV4_VERSION
,
123 IPTUN_MIN_IPV6_MTU
, IPTUN_MAX_IPV4_MTU
, B_FALSE
},
124 { IPTUN_TYPE_UNKNOWN
, NULL
, 0, 0, 0, B_FALSE
}
128 * iptun_hash is an iptun_t lookup table by link ID protected by
129 * iptun_hash_lock. While the hash table's integrity is maintained via
130 * internal locking in the mod_hash_*() functions, we need additional locking
131 * so that an iptun_t cannot be deleted after a hash lookup has returned an
132 * iptun_t and before iptun_lock has been entered. As such, we use
133 * iptun_hash_lock when doing lookups and removals from iptun_hash.
135 mod_hash_t
*iptun_hash
;
136 static kmutex_t iptun_hash_lock
;
138 static uint_t iptun_tunnelcount
; /* total for all stacks */
139 kmem_cache_t
*iptun_cache
;
140 ddi_taskq_t
*iptun_taskq
;
143 IPTUN_TASK_MTU_UPDATE
, /* tell mac about new tunnel link MTU */
144 IPTUN_TASK_LADDR_UPDATE
, /* tell mac about new local address */
145 IPTUN_TASK_RADDR_UPDATE
, /* tell mac about new remote address */
146 IPTUN_TASK_LINK_UPDATE
, /* tell mac about new link state */
147 IPTUN_TASK_PDATA_UPDATE
/* tell mac about updated plugin data */
150 typedef struct iptun_task_data_s
{
151 iptun_task_t itd_task
;
152 datalink_id_t itd_linkid
;
155 static void iptun_task_dispatch(iptun_t
*, iptun_task_t
);
156 static int iptun_enter(iptun_t
*);
157 static void iptun_exit(iptun_t
*);
158 static void iptun_headergen(iptun_t
*, boolean_t
);
159 static void iptun_drop_pkt(mblk_t
*, uint64_t *);
160 static void iptun_input(void *, mblk_t
*, void *, ip_recv_attr_t
*);
161 static void iptun_input_icmp(void *, mblk_t
*, void *, ip_recv_attr_t
*);
162 static void iptun_output(iptun_t
*, mblk_t
*);
163 static uint32_t iptun_get_maxmtu(iptun_t
*, ip_xmit_attr_t
*, uint32_t);
164 static uint32_t iptun_update_mtu(iptun_t
*, ip_xmit_attr_t
*, uint32_t);
165 static uint32_t iptun_get_dst_pmtu(iptun_t
*, ip_xmit_attr_t
*);
166 static void iptun_update_dst_pmtu(iptun_t
*, ip_xmit_attr_t
*);
167 static int iptun_setladdr(iptun_t
*, const struct sockaddr_storage
*);
169 static void iptun_output_6to4(iptun_t
*, mblk_t
*);
170 static void iptun_output_common(iptun_t
*, ip_xmit_attr_t
*, mblk_t
*);
171 static boolean_t
iptun_verifyicmp(conn_t
*, void *, icmph_t
*, icmp6_t
*,
174 static void iptun_notify(void *, ip_xmit_attr_t
*, ixa_notify_type_t
,
177 static mac_callbacks_t iptun_m_callbacks
;
180 iptun_m_getstat(void *arg
, uint_t stat
, uint64_t *val
)
182 iptun_t
*iptun
= arg
;
186 case MAC_STAT_IERRORS
:
187 *val
= iptun
->iptun_ierrors
;
189 case MAC_STAT_OERRORS
:
190 *val
= iptun
->iptun_oerrors
;
192 case MAC_STAT_RBYTES
:
193 *val
= iptun
->iptun_rbytes
;
195 case MAC_STAT_IPACKETS
:
196 *val
= iptun
->iptun_ipackets
;
198 case MAC_STAT_OBYTES
:
199 *val
= iptun
->iptun_obytes
;
201 case MAC_STAT_OPACKETS
:
202 *val
= iptun
->iptun_opackets
;
204 case MAC_STAT_NORCVBUF
:
205 *val
= iptun
->iptun_norcvbuf
;
207 case MAC_STAT_NOXMTBUF
:
208 *val
= iptun
->iptun_noxmtbuf
;
218 iptun_m_start(void *arg
)
220 iptun_t
*iptun
= arg
;
223 if ((err
= iptun_enter(iptun
)) == 0) {
224 iptun
->iptun_flags
|= IPTUN_MAC_STARTED
;
225 iptun_task_dispatch(iptun
, IPTUN_TASK_LINK_UPDATE
);
232 iptun_m_stop(void *arg
)
234 iptun_t
*iptun
= arg
;
236 if (iptun_enter(iptun
) == 0) {
237 iptun
->iptun_flags
&= ~IPTUN_MAC_STARTED
;
238 iptun_task_dispatch(iptun
, IPTUN_TASK_LINK_UPDATE
);
244 * iptun_m_setpromisc() does nothing and always succeeds. This is because a
245 * tunnel data-link only ever receives packets that are destined exclusively
246 * for the local address of the tunnel.
250 iptun_m_setpromisc(void *arg
, boolean_t on
)
257 iptun_m_multicst(void *arg
, boolean_t add
, const uint8_t *addrp
)
263 * iptun_m_unicst() sets the local address.
267 iptun_m_unicst(void *arg
, const uint8_t *addrp
)
269 iptun_t
*iptun
= arg
;
271 struct sockaddr_storage ss
;
272 struct sockaddr_in
*sin
;
273 struct sockaddr_in6
*sin6
;
275 if ((err
= iptun_enter(iptun
)) == 0) {
276 switch (iptun
->iptun_typeinfo
->iti_ipvers
) {
278 sin
= (struct sockaddr_in
*)&ss
;
279 sin
->sin_family
= AF_INET
;
280 bcopy(addrp
, &sin
->sin_addr
, sizeof (in_addr_t
));
283 sin6
= (struct sockaddr_in6
*)&ss
;
284 sin6
->sin6_family
= AF_INET6
;
285 bcopy(addrp
, &sin6
->sin6_addr
, sizeof (in6_addr_t
));
290 err
= iptun_setladdr(iptun
, &ss
);
297 iptun_m_tx(void *arg
, mblk_t
*mpchain
)
300 iptun_t
*iptun
= arg
;
302 if (!IS_IPTUN_RUNNING(iptun
)) {
303 iptun_drop_pkt(mpchain
, &iptun
->iptun_noxmtbuf
);
307 for (mp
= mpchain
; mp
!= NULL
; mp
= nmp
) {
310 iptun_output(iptun
, mp
);
318 iptun_m_setprop(void *barg
, const char *pr_name
, mac_prop_id_t pr_num
,
319 uint_t pr_valsize
, const void *pr_val
)
321 iptun_t
*iptun
= barg
;
322 uint32_t value
= *(uint32_t *)pr_val
;
326 * We need to enter this iptun_t since we'll be modifying the outer
329 if ((err
= iptun_enter(iptun
)) != 0)
333 case MAC_PROP_IPTUN_HOPLIMIT
:
334 if (value
< IPTUN_MIN_HOPLIMIT
|| value
> IPTUN_MAX_HOPLIMIT
) {
338 if (value
!= iptun
->iptun_hoplimit
) {
339 iptun
->iptun_hoplimit
= (uint8_t)value
;
340 iptun_headergen(iptun
, B_TRUE
);
343 case MAC_PROP_IPTUN_ENCAPLIMIT
:
344 if (iptun
->iptun_typeinfo
->iti_type
!= IPTUN_TYPE_IPV6
||
345 value
> IPTUN_MAX_ENCAPLIMIT
) {
349 if (value
!= iptun
->iptun_encaplimit
) {
350 iptun
->iptun_encaplimit
= (uint8_t)value
;
351 iptun_headergen(iptun
, B_TRUE
);
355 uint32_t maxmtu
= iptun_get_maxmtu(iptun
, NULL
, 0);
357 if (value
< iptun
->iptun_typeinfo
->iti_minmtu
||
362 iptun
->iptun_flags
|= IPTUN_FIXED_MTU
;
363 if (value
!= iptun
->iptun_mtu
) {
364 iptun
->iptun_mtu
= value
;
365 iptun_task_dispatch(iptun
, IPTUN_TASK_MTU_UPDATE
);
378 iptun_m_getprop(void *barg
, const char *pr_name
, mac_prop_id_t pr_num
,
379 uint_t pr_valsize
, void *pr_val
)
381 iptun_t
*iptun
= barg
;
384 if ((err
= iptun_enter(iptun
)) != 0)
388 case MAC_PROP_IPTUN_HOPLIMIT
:
389 ASSERT(pr_valsize
>= sizeof (uint32_t));
390 *(uint32_t *)pr_val
= iptun
->iptun_hoplimit
;
393 case MAC_PROP_IPTUN_ENCAPLIMIT
:
394 *(uint32_t *)pr_val
= iptun
->iptun_encaplimit
;
406 iptun_m_propinfo(void *barg
, const char *pr_name
, mac_prop_id_t pr_num
,
407 mac_prop_info_handle_t prh
)
409 iptun_t
*iptun
= barg
;
412 case MAC_PROP_IPTUN_HOPLIMIT
:
413 mac_prop_info_set_range_uint32(prh
,
414 IPTUN_MIN_HOPLIMIT
, IPTUN_MAX_HOPLIMIT
);
415 mac_prop_info_set_default_uint32(prh
, IPTUN_DEFAULT_HOPLIMIT
);
418 case MAC_PROP_IPTUN_ENCAPLIMIT
:
419 if (iptun
->iptun_typeinfo
->iti_type
!= IPTUN_TYPE_IPV6
)
421 mac_prop_info_set_range_uint32(prh
,
422 IPTUN_MIN_ENCAPLIMIT
, IPTUN_MAX_ENCAPLIMIT
);
423 mac_prop_info_set_default_uint32(prh
, IPTUN_DEFAULT_ENCAPLIMIT
);
426 mac_prop_info_set_range_uint32(prh
,
427 iptun
->iptun_typeinfo
->iti_minmtu
,
428 iptun_get_maxmtu(iptun
, NULL
, 0));
436 return (iptun_tunnelcount
);
440 * Enter an iptun_t exclusively. This is essentially just a mutex, but we
441 * don't allow iptun_enter() to succeed on a tunnel if it's in the process of
445 iptun_enter(iptun_t
*iptun
)
447 mutex_enter(&iptun
->iptun_lock
);
448 while (iptun
->iptun_flags
& IPTUN_DELETE_PENDING
)
449 cv_wait(&iptun
->iptun_enter_cv
, &iptun
->iptun_lock
);
450 if (iptun
->iptun_flags
& IPTUN_CONDEMNED
) {
451 mutex_exit(&iptun
->iptun_lock
);
458 * Exit the tunnel entered in iptun_enter().
461 iptun_exit(iptun_t
*iptun
)
463 mutex_exit(&iptun
->iptun_lock
);
467 * Enter the IP tunnel instance by datalink ID.
470 iptun_enter_by_linkid(datalink_id_t linkid
, iptun_t
**iptun
)
474 mutex_enter(&iptun_hash_lock
);
475 if (mod_hash_find(iptun_hash
, IPTUN_HASH_KEY(linkid
),
476 (mod_hash_val_t
*)iptun
) == 0)
477 err
= iptun_enter(*iptun
);
482 mutex_exit(&iptun_hash_lock
);
487 * Handle tasks that were deferred through the iptun_taskq because they require
488 * calling up to the mac module, and we can't call up to the mac module while
491 * This is tricky to get right without introducing race conditions and
492 * deadlocks with the mac module, as we cannot issue an upcall while in the
493 * iptun_t. The reason is that upcalls may try and enter the mac perimeter,
494 * while iptun callbacks (such as iptun_m_setprop()) called from the mac
495 * module will already have the perimeter held, and will then try and enter
496 * the iptun_t. You can see the lock ordering problem with this; this will
499 * The safe way to do this is to enter the iptun_t in question and copy the
500 * information we need out of it so that we can exit it and know that the
501 * information being passed up to the upcalls won't be subject to modification
502 * by other threads. The problem now is that we need to exit it prior to
503 * issuing the upcall, but once we do this, a thread could come along and
504 * delete the iptun_t and thus the mac handle required to issue the upcall.
505 * To prevent this, we set the IPTUN_UPCALL_PENDING flag prior to exiting the
506 * iptun_t. This flag is the condition associated with iptun_upcall_cv, which
507 * iptun_delete() will cv_wait() on. When the upcall completes, we clear
508 * IPTUN_UPCALL_PENDING and cv_signal() any potentially waiting
509 * iptun_delete(). We can thus still safely use iptun->iptun_mh after having
510 * exited the iptun_t.
513 iptun_task_cb(void *arg
)
515 iptun_task_data_t
*itd
= arg
;
516 iptun_task_t task
= itd
->itd_task
;
517 datalink_id_t linkid
= itd
->itd_linkid
;
521 link_state_t linkstate
;
523 iptun_header_t header
;
525 kmem_free(itd
, sizeof (*itd
));
528 * Note that if the lookup fails, it's because the tunnel was deleted
529 * between the time the task was dispatched and now. That isn't an
532 if (iptun_enter_by_linkid(linkid
, &iptun
) != 0)
535 iptun
->iptun_flags
|= IPTUN_UPCALL_PENDING
;
538 case IPTUN_TASK_MTU_UPDATE
:
539 mtu
= iptun
->iptun_mtu
;
541 case IPTUN_TASK_LADDR_UPDATE
:
542 addr
= iptun
->iptun_laddr
;
544 case IPTUN_TASK_RADDR_UPDATE
:
545 addr
= iptun
->iptun_raddr
;
547 case IPTUN_TASK_LINK_UPDATE
:
548 linkstate
= IS_IPTUN_RUNNING(iptun
) ?
549 LINK_STATE_UP
: LINK_STATE_DOWN
;
551 case IPTUN_TASK_PDATA_UPDATE
:
552 header_size
= iptun
->iptun_header_size
;
553 header
= iptun
->iptun_header
;
562 case IPTUN_TASK_MTU_UPDATE
:
563 (void) mac_maxsdu_update(iptun
->iptun_mh
, mtu
);
565 case IPTUN_TASK_LADDR_UPDATE
:
566 mac_unicst_update(iptun
->iptun_mh
, (uint8_t *)&addr
.ia_addr
);
568 case IPTUN_TASK_RADDR_UPDATE
:
569 mac_dst_update(iptun
->iptun_mh
, (uint8_t *)&addr
.ia_addr
);
571 case IPTUN_TASK_LINK_UPDATE
:
572 mac_link_update(iptun
->iptun_mh
, linkstate
);
574 case IPTUN_TASK_PDATA_UPDATE
:
575 if (mac_pdata_update(iptun
->iptun_mh
,
576 header_size
== 0 ? NULL
: &header
, header_size
) != 0)
577 atomic_inc_64(&iptun
->iptun_taskq_fail
);
581 mutex_enter(&iptun
->iptun_lock
);
582 iptun
->iptun_flags
&= ~IPTUN_UPCALL_PENDING
;
583 cv_signal(&iptun
->iptun_upcall_cv
);
584 mutex_exit(&iptun
->iptun_lock
);
588 iptun_task_dispatch(iptun_t
*iptun
, iptun_task_t iptun_task
)
590 iptun_task_data_t
*itd
;
592 itd
= kmem_alloc(sizeof (*itd
), KM_NOSLEEP
);
594 atomic_inc_64(&iptun
->iptun_taskq_fail
);
597 itd
->itd_task
= iptun_task
;
598 itd
->itd_linkid
= iptun
->iptun_linkid
;
599 if (ddi_taskq_dispatch(iptun_taskq
, iptun_task_cb
, itd
, DDI_NOSLEEP
)) {
600 atomic_inc_64(&iptun
->iptun_taskq_fail
);
601 kmem_free(itd
, sizeof (*itd
));
606 * Convert an iptun_addr_t to sockaddr_storage.
609 iptun_getaddr(iptun_addr_t
*iptun_addr
, struct sockaddr_storage
*ss
)
611 struct sockaddr_in
*sin
;
612 struct sockaddr_in6
*sin6
;
614 bzero(ss
, sizeof (*ss
));
615 switch (iptun_addr
->ia_family
) {
617 sin
= (struct sockaddr_in
*)ss
;
618 sin
->sin_addr
.s_addr
= iptun_addr
->ia_addr
.iau_addr4
;
621 sin6
= (struct sockaddr_in6
*)ss
;
622 sin6
->sin6_addr
= iptun_addr
->ia_addr
.iau_addr6
;
627 ss
->ss_family
= iptun_addr
->ia_family
;
631 * General purpose function to set an IP tunnel source or destination address.
634 iptun_setaddr(iptun_type_t iptun_type
, iptun_addr_t
*iptun_addr
,
635 const struct sockaddr_storage
*ss
)
637 if (!IPTUN_ADDR_MATCH(iptun_type
, ss
->ss_family
))
640 switch (ss
->ss_family
) {
642 struct sockaddr_in
*sin
= (struct sockaddr_in
*)ss
;
644 if ((sin
->sin_addr
.s_addr
== INADDR_ANY
) ||
645 (sin
->sin_addr
.s_addr
== INADDR_BROADCAST
) ||
646 CLASSD(sin
->sin_addr
.s_addr
)) {
647 return (EADDRNOTAVAIL
);
649 iptun_addr
->ia_addr
.iau_addr4
= sin
->sin_addr
.s_addr
;
653 struct sockaddr_in6
*sin6
= (struct sockaddr_in6
*)ss
;
655 if (IN6_IS_ADDR_UNSPECIFIED(&sin6
->sin6_addr
) ||
656 IN6_IS_ADDR_MULTICAST(&sin6
->sin6_addr
) ||
657 IN6_IS_ADDR_V4MAPPED(&sin6
->sin6_addr
)) {
658 return (EADDRNOTAVAIL
);
660 iptun_addr
->ia_addr
.iau_addr6
= sin6
->sin6_addr
;
664 return (EAFNOSUPPORT
);
666 iptun_addr
->ia_family
= ss
->ss_family
;
671 iptun_setladdr(iptun_t
*iptun
, const struct sockaddr_storage
*laddr
)
673 return (iptun_setaddr(iptun
->iptun_typeinfo
->iti_type
,
674 &iptun
->iptun_laddr
, laddr
));
678 iptun_setraddr(iptun_t
*iptun
, const struct sockaddr_storage
*raddr
)
680 if (!(iptun
->iptun_typeinfo
->iti_hasraddr
))
682 return (iptun_setaddr(iptun
->iptun_typeinfo
->iti_type
,
683 &iptun
->iptun_raddr
, raddr
));
687 iptun_canbind(iptun_t
*iptun
)
690 * A tunnel may bind when its source address has been set, and if its
691 * tunnel type requires one, also its destination address.
693 return ((iptun
->iptun_flags
& IPTUN_LADDR
) &&
694 ((iptun
->iptun_flags
& IPTUN_RADDR
) ||
695 !(iptun
->iptun_typeinfo
->iti_hasraddr
)));
699 * Verify that the local address is valid, and insert in the fanout
702 iptun_bind(iptun_t
*iptun
)
704 conn_t
*connp
= iptun
->iptun_connp
;
707 ip_xmit_attr_t
*oldixa
;
709 ip_stack_t
*ipst
= connp
->conn_netstack
->netstack_ip
;
712 * Get an exclusive ixa for this thread.
713 * We defer updating conn_ixa until later to handle any concurrent
714 * conn_ixa_cleanup thread.
716 ixa
= conn_get_ixa(connp
, B_FALSE
);
720 /* We create PMTU state including for 6to4 */
721 ixa
->ixa_flags
|= IXAF_PMTU_DISCOVERY
;
723 ASSERT(iptun_canbind(iptun
));
725 mutex_enter(&connp
->conn_lock
);
727 * Note that conn_proto can't be set since the upper protocol
728 * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel.
729 * ipcl_iptun_classify doesn't use conn_proto.
731 connp
->conn_ipversion
= iptun
->iptun_typeinfo
->iti_ipvers
;
733 switch (iptun
->iptun_typeinfo
->iti_type
) {
734 case IPTUN_TYPE_IPV4
:
735 IN6_IPADDR_TO_V4MAPPED(iptun
->iptun_laddr4
,
736 &connp
->conn_laddr_v6
);
737 IN6_IPADDR_TO_V4MAPPED(iptun
->iptun_raddr4
,
738 &connp
->conn_faddr_v6
);
739 ixa
->ixa_flags
|= IXAF_IS_IPV4
;
740 if (ip_laddr_verify_v4(iptun
->iptun_laddr4
, IPCL_ZONEID(connp
),
741 ipst
, B_FALSE
) != IPVL_UNICAST_UP
) {
742 mutex_exit(&connp
->conn_lock
);
743 error
= EADDRNOTAVAIL
;
747 case IPTUN_TYPE_IPV6
:
748 connp
->conn_laddr_v6
= iptun
->iptun_laddr6
;
749 connp
->conn_faddr_v6
= iptun
->iptun_raddr6
;
750 ixa
->ixa_flags
&= ~IXAF_IS_IPV4
;
751 /* We use a zero scopeid for now */
752 if (ip_laddr_verify_v6(&iptun
->iptun_laddr6
, IPCL_ZONEID(connp
),
753 ipst
, B_FALSE
, 0) != IPVL_UNICAST_UP
) {
754 mutex_exit(&connp
->conn_lock
);
755 error
= EADDRNOTAVAIL
;
759 case IPTUN_TYPE_6TO4
:
760 IN6_IPADDR_TO_V4MAPPED(iptun
->iptun_laddr4
,
761 &connp
->conn_laddr_v6
);
762 IN6_IPADDR_TO_V4MAPPED(INADDR_ANY
, &connp
->conn_faddr_v6
);
763 ixa
->ixa_flags
|= IXAF_IS_IPV4
;
764 mutex_exit(&connp
->conn_lock
);
766 switch (ip_laddr_verify_v4(iptun
->iptun_laddr4
,
767 IPCL_ZONEID(connp
), ipst
, B_FALSE
)) {
768 case IPVL_UNICAST_UP
:
769 case IPVL_UNICAST_DOWN
:
772 error
= EADDRNOTAVAIL
;
778 /* TODO: do we need to do this? */
782 * When we set a tunnel's destination address, we do not
783 * care if the destination is reachable. Transient routing
784 * issues should not inhibit the creation of a tunnel
785 * interface, for example. Thus we pass B_FALSE here.
787 connp
->conn_saddr_v6
= connp
->conn_laddr_v6
;
788 mutex_exit(&connp
->conn_lock
);
790 /* As long as the MTU is large we avoid fragmentation */
791 ixa
->ixa_flags
|= IXAF_DONTFRAG
| IXAF_PMTU_IPV4_DF
;
793 /* We handle IPsec in iptun_output_common */
794 error
= ip_attr_connect(connp
, ixa
, &connp
->conn_saddr_v6
,
795 &connp
->conn_faddr_v6
, &connp
->conn_faddr_v6
, 0,
796 &connp
->conn_saddr_v6
, &uinfo
, 0);
801 /* saddr shouldn't change since it was already set */
802 ASSERT(IN6_ARE_ADDR_EQUAL(&connp
->conn_laddr_v6
,
803 &connp
->conn_saddr_v6
));
805 /* We set IXAF_VERIFY_PMTU to catch PMTU increases */
806 ixa
->ixa_flags
|= IXAF_VERIFY_PMTU
;
807 ASSERT(uinfo
.iulp_mtu
!= 0);
810 * Allow setting new policies.
811 * The addresses/ports are already set, thus the IPsec policy calls
812 * can handle their passed-in conn's.
814 connp
->conn_policy_cached
= B_FALSE
;
817 error
= ipcl_conn_insert(connp
);
821 /* Atomically update v6lastdst and conn_ixa */
822 mutex_enter(&connp
->conn_lock
);
823 /* Record this as the "last" send even though we haven't sent any */
824 connp
->conn_v6lastdst
= connp
->conn_faddr_v6
;
826 iptun
->iptun_flags
|= IPTUN_BOUND
;
828 oldixa
= conn_replace_ixa(connp
, ixa
);
829 /* Done with conn_t */
830 mutex_exit(&connp
->conn_lock
);
834 * Now that we're bound with ip below us, this is a good
835 * time to initialize the destination path MTU and to
836 * re-calculate the tunnel's link MTU.
838 (void) iptun_update_mtu(iptun
, ixa
, 0);
840 if (IS_IPTUN_RUNNING(iptun
))
841 iptun_task_dispatch(iptun
, IPTUN_TASK_LINK_UPDATE
);
849 iptun_unbind(iptun_t
*iptun
)
851 ASSERT(iptun
->iptun_flags
& IPTUN_BOUND
);
852 ASSERT(mutex_owned(&iptun
->iptun_lock
) ||
853 (iptun
->iptun_flags
& IPTUN_CONDEMNED
));
854 ip_unbind(iptun
->iptun_connp
);
855 iptun
->iptun_flags
&= ~IPTUN_BOUND
;
856 if (!(iptun
->iptun_flags
& IPTUN_CONDEMNED
))
857 iptun_task_dispatch(iptun
, IPTUN_TASK_LINK_UPDATE
);
861 * Re-generate the template data-link header for a given IP tunnel given the
862 * tunnel's current parameters.
865 iptun_headergen(iptun_t
*iptun
, boolean_t update_mac
)
867 switch (iptun
->iptun_typeinfo
->iti_ipvers
) {
870 * We only need to use a custom IP header if the administrator
871 * has supplied a non-default hoplimit.
873 if (iptun
->iptun_hoplimit
== IPTUN_DEFAULT_HOPLIMIT
) {
874 iptun
->iptun_header_size
= 0;
877 iptun
->iptun_header_size
= sizeof (ipha_t
);
878 iptun
->iptun_header4
.ipha_version_and_hdr_length
=
879 IP_SIMPLE_HDR_VERSION
;
880 iptun
->iptun_header4
.ipha_fragment_offset_and_flags
=
882 iptun
->iptun_header4
.ipha_ttl
= iptun
->iptun_hoplimit
;
885 ip6_t
*ip6hp
= &iptun
->iptun_header6
.it6h_ip6h
;
888 * We only need to use a custom IPv6 header if either the
889 * administrator has supplied a non-default hoplimit, or we
890 * need to include an encapsulation limit option in the outer
893 if (iptun
->iptun_hoplimit
== IPTUN_DEFAULT_HOPLIMIT
&&
894 iptun
->iptun_encaplimit
== 0) {
895 iptun
->iptun_header_size
= 0;
899 (void) memset(ip6hp
, 0, sizeof (*ip6hp
));
900 if (iptun
->iptun_encaplimit
== 0) {
901 iptun
->iptun_header_size
= sizeof (ip6_t
);
902 ip6hp
->ip6_nxt
= IPPROTO_NONE
;
904 iptun_encaplim_t
*iel
;
906 iptun
->iptun_header_size
= sizeof (iptun_ipv6hdrs_t
);
908 * The mac_ipv6 plugin requires ip6_plen to be in host
909 * byte order and reflect the extension headers
910 * present in the template. The actual network byte
911 * order ip6_plen will be set on a per-packet basis on
914 ip6hp
->ip6_plen
= sizeof (*iel
);
915 ip6hp
->ip6_nxt
= IPPROTO_DSTOPTS
;
916 iel
= &iptun
->iptun_header6
.it6h_encaplim
;
917 *iel
= iptun_encaplim_init
;
918 iel
->iel_telopt
.ip6ot_encap_limit
=
919 iptun
->iptun_encaplimit
;
922 ip6hp
->ip6_hlim
= iptun
->iptun_hoplimit
;
928 iptun_task_dispatch(iptun
, IPTUN_TASK_PDATA_UPDATE
);
932 * Insert inbound and outbound IPv4 and IPv6 policy into the given policy
936 iptun_insert_simple_policies(ipsec_policy_head_t
*ph
, ipsec_act_t
*actp
,
937 uint_t n
, netstack_t
*ns
)
941 if (!ipsec_polhead_insert(ph
, actp
, n
, f
, IPSEC_TYPE_INBOUND
, ns
) ||
942 !ipsec_polhead_insert(ph
, actp
, n
, f
, IPSEC_TYPE_OUTBOUND
, ns
))
946 return (ipsec_polhead_insert(ph
, actp
, n
, f
, IPSEC_TYPE_INBOUND
, ns
) &&
947 ipsec_polhead_insert(ph
, actp
, n
, f
, IPSEC_TYPE_OUTBOUND
, ns
));
951 * Used to set IPsec policy when policy is set through the IPTUN_CREATE or
952 * IPTUN_MODIFY ioctls.
955 iptun_set_sec_simple(iptun_t
*iptun
, const ipsec_req_t
*ipsr
)
959 ipsec_act_t
*actp
= NULL
;
960 boolean_t clear_all
, old_policy
= B_FALSE
;
961 ipsec_tun_pol_t
*itp
;
962 char name
[MAXLINKNAMELEN
];
964 netstack_t
*ns
= iptun
->iptun_ns
;
966 /* Can't specify self-encap on a tunnel. */
967 if (ipsr
->ipsr_self_encap_req
!= 0)
971 * If it's a "clear-all" entry, unset the security flags and resume
972 * normal cleartext (or inherit-from-global) policy.
974 clear_all
= ((ipsr
->ipsr_ah_req
& IPTUN_IPSEC_REQ_MASK
) == 0 &&
975 (ipsr
->ipsr_esp_req
& IPTUN_IPSEC_REQ_MASK
) == 0);
977 ASSERT(mutex_owned(&iptun
->iptun_lock
));
978 itp
= iptun
->iptun_itp
;
982 if ((rc
= dls_mgmt_get_linkinfo(iptun
->iptun_linkid
, name
, NULL
,
985 ASSERT(name
[0] != '\0');
986 if ((itp
= create_tunnel_policy(name
, &rc
, &gen
, ns
)) == NULL
)
988 iptun
->iptun_itp
= itp
;
991 /* Allocate the actvec now, before holding itp or polhead locks. */
992 ipsec_actvec_from_req(ipsr
, &actp
, &nact
, ns
);
999 * Just write on the active polhead. Save the primary/secondary stuff
1000 * for spdsock operations.
1002 * Mutex because we need to write to the polhead AND flags atomically.
1003 * Other threads will acquire the polhead lock as a reader if the
1004 * (unprotected) flag is set.
1006 mutex_enter(&itp
->itp_lock
);
1007 if (itp
->itp_flags
& ITPF_P_TUNNEL
) {
1008 /* Oops, we lost a race. Let's get out of here. */
1012 old_policy
= ((itp
->itp_flags
& ITPF_P_ACTIVE
) != 0);
1015 ITPF_CLONE(itp
->itp_flags
);
1016 rc
= ipsec_copy_polhead(itp
->itp_policy
, itp
->itp_inactive
, ns
);
1018 /* inactive has already been cleared. */
1019 itp
->itp_flags
&= ~ITPF_IFLAGS
;
1022 rw_enter(&itp
->itp_policy
->iph_lock
, RW_WRITER
);
1023 ipsec_polhead_flush(itp
->itp_policy
, ns
);
1025 /* Else assume itp->itp_policy is already flushed. */
1026 rw_enter(&itp
->itp_policy
->iph_lock
, RW_WRITER
);
1030 ASSERT(avl_numnodes(&itp
->itp_policy
->iph_rulebyid
) == 0);
1031 itp
->itp_flags
&= ~ITPF_PFLAGS
;
1032 rw_exit(&itp
->itp_policy
->iph_lock
);
1033 old_policy
= B_FALSE
; /* Clear out the inactive one too. */
1037 if (iptun_insert_simple_policies(itp
->itp_policy
, actp
, nact
, ns
)) {
1038 rw_exit(&itp
->itp_policy
->iph_lock
);
1040 * Adjust MTU and make sure the DL side knows what's up.
1042 itp
->itp_flags
= ITPF_P_ACTIVE
;
1043 (void) iptun_update_mtu(iptun
, NULL
, 0);
1044 old_policy
= B_FALSE
; /* Blank out inactive - we succeeded */
1046 rw_exit(&itp
->itp_policy
->iph_lock
);
1052 /* Recover policy in in active polhead. */
1053 ipsec_swap_policy(itp
->itp_policy
, itp
->itp_inactive
, ns
);
1054 ITPF_SWAP(itp
->itp_flags
);
1057 /* Clear policy in inactive polhead. */
1058 itp
->itp_flags
&= ~ITPF_IFLAGS
;
1059 rw_enter(&itp
->itp_inactive
->iph_lock
, RW_WRITER
);
1060 ipsec_polhead_flush(itp
->itp_inactive
, ns
);
1061 rw_exit(&itp
->itp_inactive
->iph_lock
);
1064 mutex_exit(&itp
->itp_lock
);
1068 ipsec_actvec_free(actp
, nact
);
1073 static iptun_typeinfo_t
*
1074 iptun_gettypeinfo(iptun_type_t type
)
1078 for (i
= 0; iptun_type_table
[i
].iti_type
!= IPTUN_TYPE_UNKNOWN
; i
++) {
1079 if (iptun_type_table
[i
].iti_type
== type
)
1082 return (&iptun_type_table
[i
]);
1086 * Set the parameters included in ik on the tunnel iptun. Parameters that can
1087 * only be set at creation time are set in iptun_create().
1090 iptun_setparams(iptun_t
*iptun
, const iptun_kparams_t
*ik
)
1093 netstack_t
*ns
= iptun
->iptun_ns
;
1094 iptun_addr_t orig_laddr
, orig_raddr
;
1095 uint_t orig_flags
= iptun
->iptun_flags
;
1097 if (ik
->iptun_kparam_flags
& IPTUN_KPARAM_LADDR
) {
1098 if (orig_flags
& IPTUN_LADDR
)
1099 orig_laddr
= iptun
->iptun_laddr
;
1100 if ((err
= iptun_setladdr(iptun
, &ik
->iptun_kparam_laddr
)) != 0)
1102 iptun
->iptun_flags
|= IPTUN_LADDR
;
1105 if (ik
->iptun_kparam_flags
& IPTUN_KPARAM_RADDR
) {
1106 if (orig_flags
& IPTUN_RADDR
)
1107 orig_raddr
= iptun
->iptun_raddr
;
1108 if ((err
= iptun_setraddr(iptun
, &ik
->iptun_kparam_raddr
)) != 0)
1110 iptun
->iptun_flags
|= IPTUN_RADDR
;
1113 if (ik
->iptun_kparam_flags
& IPTUN_KPARAM_SECINFO
) {
1115 * Set IPsec policy originating from the ifconfig(1M) command
1116 * line. This is traditionally called "simple" policy because
1117 * the ipsec_req_t (iptun_kparam_secinfo) can only describe a
1118 * simple policy of "do ESP on everything" and/or "do AH on
1119 * everything" (as opposed to the rich policy that can be
1120 * defined with ipsecconf(1M)).
1122 if (iptun
->iptun_typeinfo
->iti_type
== IPTUN_TYPE_6TO4
) {
1124 * Can't set security properties for automatic
1131 if (!ipsec_loaded(ns
->netstack_ipsec
)) {
1132 /* If IPsec can be loaded, try and load it now. */
1133 if (ipsec_failed(ns
->netstack_ipsec
)) {
1134 err
= EPROTONOSUPPORT
;
1137 ipsec_loader_loadnow(ns
->netstack_ipsec
);
1139 * ipsec_loader_loadnow() returns while IPsec is
1140 * loaded asynchronously. While a method exists to
1141 * wait for IPsec to load (ipsec_loader_wait()), it
1142 * requires use of a STREAMS queue to do a qwait().
1143 * We're not in STREAMS context here, and so we can't
1144 * use it. This is not a problem in practice because
1145 * in the vast majority of cases, key management and
1146 * global policy will have loaded before any tunnels
1147 * are plumbed, and so IPsec will already have been
1154 err
= iptun_set_sec_simple(iptun
, &ik
->iptun_kparam_secinfo
);
1156 iptun
->iptun_flags
|= IPTUN_SIMPLE_POLICY
;
1157 iptun
->iptun_simple_policy
= ik
->iptun_kparam_secinfo
;
1162 /* Restore original source and destination. */
1163 if (ik
->iptun_kparam_flags
& IPTUN_KPARAM_LADDR
&&
1164 (orig_flags
& IPTUN_LADDR
))
1165 iptun
->iptun_laddr
= orig_laddr
;
1166 if ((ik
->iptun_kparam_flags
& IPTUN_KPARAM_RADDR
) &&
1167 (orig_flags
& IPTUN_RADDR
))
1168 iptun
->iptun_raddr
= orig_raddr
;
1169 iptun
->iptun_flags
= orig_flags
;
1175 iptun_register(iptun_t
*iptun
)
1177 mac_register_t
*mac
;
1180 ASSERT(!(iptun
->iptun_flags
& IPTUN_MAC_REGISTERED
));
1182 if ((mac
= mac_alloc(MAC_VERSION
)) == NULL
)
1185 mac
->m_type_ident
= iptun
->iptun_typeinfo
->iti_ident
;
1186 mac
->m_driver
= iptun
;
1187 mac
->m_dip
= iptun_dip
;
1188 mac
->m_instance
= (uint_t
)-1;
1189 mac
->m_src_addr
= (uint8_t *)&iptun
->iptun_laddr
.ia_addr
;
1190 mac
->m_dst_addr
= iptun
->iptun_typeinfo
->iti_hasraddr
?
1191 (uint8_t *)&iptun
->iptun_raddr
.ia_addr
: NULL
;
1192 mac
->m_callbacks
= &iptun_m_callbacks
;
1193 mac
->m_min_sdu
= iptun
->iptun_typeinfo
->iti_minmtu
;
1194 mac
->m_max_sdu
= iptun
->iptun_mtu
;
1195 if (iptun
->iptun_header_size
!= 0) {
1196 mac
->m_pdata
= &iptun
->iptun_header
;
1197 mac
->m_pdata_size
= iptun
->iptun_header_size
;
1199 if ((err
= mac_register(mac
, &iptun
->iptun_mh
)) == 0)
1200 iptun
->iptun_flags
|= IPTUN_MAC_REGISTERED
;
1206 iptun_unregister(iptun_t
*iptun
)
1210 ASSERT(iptun
->iptun_flags
& IPTUN_MAC_REGISTERED
);
1211 if ((err
= mac_unregister(iptun
->iptun_mh
)) == 0)
1212 iptun
->iptun_flags
&= ~IPTUN_MAC_REGISTERED
;
1217 iptun_conn_create(iptun_t
*iptun
, netstack_t
*ns
, cred_t
*credp
)
1221 if ((connp
= ipcl_conn_create(IPCL_IPCCONN
, KM_NOSLEEP
, ns
)) == NULL
)
1224 connp
->conn_flags
|= IPCL_IPTUN
;
1225 connp
->conn_iptun
= iptun
;
1226 connp
->conn_recv
= iptun_input
;
1227 connp
->conn_recvicmp
= iptun_input_icmp
;
1228 connp
->conn_verifyicmp
= iptun_verifyicmp
;
1231 * Register iptun_notify to listen to capability changes detected by IP.
1232 * This upcall is made in the context of the call to conn_ip_output.
1234 connp
->conn_ixa
->ixa_notify
= iptun_notify
;
1235 connp
->conn_ixa
->ixa_notify_cookie
= iptun
;
1238 * For exclusive stacks we set conn_zoneid to GLOBAL_ZONEID as is done
1239 * for all other conn_t's.
1241 * Note that there's an important distinction between iptun_zoneid and
1242 * conn_zoneid. The conn_zoneid is set to GLOBAL_ZONEID in non-global
1243 * exclusive stack zones to make the ip module believe that the
1244 * non-global zone is actually a global zone. Therefore, when
1245 * interacting with the ip module, we must always use conn_zoneid.
1247 connp
->conn_zoneid
= (ns
->netstack_stackid
== GLOBAL_NETSTACKID
) ?
1248 crgetzoneid(credp
) : GLOBAL_ZONEID
;
1249 connp
->conn_cred
= credp
;
1250 /* crfree() is done in ipcl_conn_destroy(), called by CONN_DEC_REF() */
1251 crhold(connp
->conn_cred
);
1252 connp
->conn_cpid
= NOPID
;
1254 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */
1255 connp
->conn_ixa
->ixa_zoneid
= connp
->conn_zoneid
;
1256 ASSERT(connp
->conn_ref
== 1);
1258 /* Cache things in ixa without an extra refhold */
1259 ASSERT(!(connp
->conn_ixa
->ixa_free_flags
& IXA_FREE_CRED
));
1260 connp
->conn_ixa
->ixa_cred
= connp
->conn_cred
;
1261 connp
->conn_ixa
->ixa_cpid
= connp
->conn_cpid
;
1264 * Have conn_ip_output drop packets should our outer source
1267 connp
->conn_ixa
->ixa_flags
|= IXAF_VERIFY_SOURCE
;
1269 switch (iptun
->iptun_typeinfo
->iti_ipvers
) {
1271 connp
->conn_family
= AF_INET6
;
1274 connp
->conn_family
= AF_INET
;
1277 mutex_enter(&connp
->conn_lock
);
1278 connp
->conn_state_flags
&= ~CONN_INCIPIENT
;
1279 mutex_exit(&connp
->conn_lock
);
1284 iptun_conn_destroy(conn_t
*connp
)
1286 ip_quiesce_conn(connp
);
1287 connp
->conn_iptun
= NULL
;
1288 ASSERT(connp
->conn_ref
== 1);
1289 CONN_DEC_REF(connp
);
1297 if ((iptun
= kmem_cache_alloc(iptun_cache
, KM_NOSLEEP
)) != NULL
) {
1298 bzero(iptun
, sizeof (*iptun
));
1299 atomic_inc_32(&iptun_tunnelcount
);
1305 iptun_free(iptun_t
*iptun
)
1307 ASSERT(iptun
->iptun_flags
& IPTUN_CONDEMNED
);
1309 if (iptun
->iptun_flags
& IPTUN_HASH_INSERTED
) {
1310 iptun_stack_t
*iptuns
= iptun
->iptun_iptuns
;
1312 mutex_enter(&iptun_hash_lock
);
1313 VERIFY(mod_hash_remove(iptun_hash
,
1314 IPTUN_HASH_KEY(iptun
->iptun_linkid
),
1315 (mod_hash_val_t
*)&iptun
) == 0);
1316 mutex_exit(&iptun_hash_lock
);
1317 iptun
->iptun_flags
&= ~IPTUN_HASH_INSERTED
;
1318 mutex_enter(&iptuns
->iptuns_lock
);
1319 list_remove(&iptuns
->iptuns_iptunlist
, iptun
);
1320 mutex_exit(&iptuns
->iptuns_lock
);
1323 if (iptun
->iptun_flags
& IPTUN_BOUND
)
1324 iptun_unbind(iptun
);
1327 * After iptun_unregister(), there will be no threads executing a
1328 * downcall from the mac module, including in the tx datapath.
1330 if (iptun
->iptun_flags
& IPTUN_MAC_REGISTERED
)
1331 VERIFY(iptun_unregister(iptun
) == 0);
1333 if (iptun
->iptun_itp
!= NULL
) {
1335 * Remove from the AVL tree, AND release the reference iptun_t
1336 * itself holds on the ITP.
1338 itp_unlink(iptun
->iptun_itp
, iptun
->iptun_ns
);
1339 ITP_REFRELE(iptun
->iptun_itp
, iptun
->iptun_ns
);
1340 iptun
->iptun_itp
= NULL
;
1341 iptun
->iptun_flags
&= ~IPTUN_SIMPLE_POLICY
;
1345 * After ipcl_conn_destroy(), there will be no threads executing an
1346 * upcall from ip (i.e., iptun_input()), and it is then safe to free
1349 if (iptun
->iptun_connp
!= NULL
) {
1350 iptun_conn_destroy(iptun
->iptun_connp
);
1351 iptun
->iptun_connp
= NULL
;
1354 netstack_rele(iptun
->iptun_ns
);
1355 kmem_cache_free(iptun_cache
, iptun
);
1356 atomic_dec_32(&iptun_tunnelcount
);
1360 iptun_create(iptun_kparams_t
*ik
, cred_t
*credp
)
1362 iptun_t
*iptun
= NULL
;
1364 char linkname
[MAXLINKNAMELEN
];
1365 ipsec_tun_pol_t
*itp
;
1366 netstack_t
*ns
= NULL
;
1367 iptun_stack_t
*iptuns
;
1368 datalink_id_t tmpid
;
1369 zoneid_t zoneid
= crgetzoneid(credp
);
1370 boolean_t link_created
= B_FALSE
;
1372 /* The tunnel type is mandatory */
1373 if (!(ik
->iptun_kparam_flags
& IPTUN_KPARAM_TYPE
))
1377 * Is the linkid that the caller wishes to associate with this new
1378 * tunnel assigned to this zone?
1380 if (zone_check_datalink(&zoneid
, ik
->iptun_kparam_linkid
) != 0) {
1381 if (zoneid
!= GLOBAL_ZONEID
)
1383 } else if (zoneid
== GLOBAL_ZONEID
) {
1388 * Make sure that we're not trying to create a tunnel that has already
1391 if (iptun_enter_by_linkid(ik
->iptun_kparam_linkid
, &iptun
) == 0) {
1398 ns
= netstack_find_by_cred(credp
);
1399 iptuns
= ns
->netstack_iptun
;
1401 if ((iptun
= iptun_alloc()) == NULL
) {
1406 iptun
->iptun_linkid
= ik
->iptun_kparam_linkid
;
1407 iptun
->iptun_zoneid
= zoneid
;
1408 iptun
->iptun_ns
= ns
;
1410 iptun
->iptun_typeinfo
= iptun_gettypeinfo(ik
->iptun_kparam_type
);
1411 if (iptun
->iptun_typeinfo
->iti_type
== IPTUN_TYPE_UNKNOWN
) {
1416 if (ik
->iptun_kparam_flags
& IPTUN_KPARAM_IMPLICIT
)
1417 iptun
->iptun_flags
|= IPTUN_IMPLICIT
;
1419 if ((err
= iptun_setparams(iptun
, ik
)) != 0)
1422 iptun
->iptun_hoplimit
= IPTUN_DEFAULT_HOPLIMIT
;
1423 if (iptun
->iptun_typeinfo
->iti_type
== IPTUN_TYPE_IPV6
)
1424 iptun
->iptun_encaplimit
= IPTUN_DEFAULT_ENCAPLIMIT
;
1426 iptun_headergen(iptun
, B_FALSE
);
1428 iptun
->iptun_connp
= iptun_conn_create(iptun
, ns
, credp
);
1429 if (iptun
->iptun_connp
== NULL
) {
1434 iptun
->iptun_mtu
= iptun
->iptun_typeinfo
->iti_maxmtu
;
1435 iptun
->iptun_dpmtu
= iptun
->iptun_mtu
;
1438 * Find an ITP based on linkname. If we have parms already set via
1439 * the iptun_setparams() call above, it may have created an ITP for
1440 * us. We always try get_tunnel_policy() for DEBUG correctness
1441 * checks, and we may wish to refactor this to only check when
1442 * iptun_itp is NULL.
1444 if ((err
= dls_mgmt_get_linkinfo(iptun
->iptun_linkid
, linkname
, NULL
,
1447 if ((itp
= get_tunnel_policy(linkname
, ns
)) != NULL
)
1448 iptun
->iptun_itp
= itp
;
1451 * See if we have the necessary IP addresses assigned to this tunnel
1452 * to try and bind them with ip underneath us. If we're not ready to
1453 * bind yet, then we'll defer the bind operation until the addresses
1456 if (iptun_canbind(iptun
) && ((err
= iptun_bind(iptun
)) != 0))
1459 if ((err
= iptun_register(iptun
)) != 0)
1462 err
= dls_devnet_create(iptun
->iptun_mh
, iptun
->iptun_linkid
,
1463 iptun
->iptun_zoneid
);
1466 link_created
= B_TRUE
;
1469 * We hash by link-id as that is the key used by all other iptun
1470 * interfaces (modify, delete, etc.).
1472 if ((mherr
= mod_hash_insert(iptun_hash
,
1473 IPTUN_HASH_KEY(iptun
->iptun_linkid
), (mod_hash_val_t
)iptun
)) == 0) {
1474 mutex_enter(&iptuns
->iptuns_lock
);
1475 list_insert_head(&iptuns
->iptuns_iptunlist
, iptun
);
1476 mutex_exit(&iptuns
->iptuns_lock
);
1477 iptun
->iptun_flags
|= IPTUN_HASH_INSERTED
;
1478 } else if (mherr
== MH_ERR_NOMEM
) {
1480 } else if (mherr
== MH_ERR_DUPLICATE
) {
1487 if (iptun
== NULL
&& ns
!= NULL
)
1489 if (err
!= 0 && iptun
!= NULL
) {
1491 (void) dls_devnet_destroy(iptun
->iptun_mh
, &tmpid
,
1494 iptun
->iptun_flags
|= IPTUN_CONDEMNED
;
1501 iptun_delete(datalink_id_t linkid
, cred_t
*credp
)
1504 iptun_t
*iptun
= NULL
;
1506 if ((err
= iptun_enter_by_linkid(linkid
, &iptun
)) != 0)
1509 /* One cannot delete a tunnel that belongs to another zone. */
1510 if (iptun
->iptun_zoneid
!= crgetzoneid(credp
)) {
1516 * We need to exit iptun in order to issue calls up the stack such as
1517 * dls_devnet_destroy(). If we call up while still in iptun, deadlock
1518 * with calls coming down the stack is possible. We prevent other
1519 * threads from entering this iptun after we've exited it by setting
1520 * the IPTUN_DELETE_PENDING flag. This will cause callers of
1521 * iptun_enter() to block waiting on iptun_enter_cv. The assumption
1522 * here is that the functions we're calling while IPTUN_DELETE_PENDING
1523 * is set dont resuult in an iptun_enter() call, as that would result
1526 iptun
->iptun_flags
|= IPTUN_DELETE_PENDING
;
1528 /* Wait for any pending upcall to the mac module to complete. */
1529 while (iptun
->iptun_flags
& IPTUN_UPCALL_PENDING
)
1530 cv_wait(&iptun
->iptun_upcall_cv
, &iptun
->iptun_lock
);
1534 if ((err
= dls_devnet_destroy(iptun
->iptun_mh
, &linkid
, B_TRUE
)) == 0) {
1536 * mac_disable() will fail with EBUSY if there are references
1537 * to the iptun MAC. If there are none, then mac_disable()
1538 * will assure that none can be acquired until the MAC is
1541 * XXX CR 6791335 prevents us from calling mac_disable() prior
1542 * to dls_devnet_destroy(), so we unfortunately need to
1543 * attempt to re-create the devnet node if mac_disable()
1546 if ((err
= mac_disable(iptun
->iptun_mh
)) != 0) {
1547 (void) dls_devnet_create(iptun
->iptun_mh
, linkid
,
1548 iptun
->iptun_zoneid
);
1553 * Now that we know the fate of this iptun_t, we need to clear
1554 * IPTUN_DELETE_PENDING, and set IPTUN_CONDEMNED if the iptun_t is
1555 * slated to be freed. Either way, we need to signal the threads
1556 * waiting in iptun_enter() so that they can either fail if
1557 * IPTUN_CONDEMNED is set, or continue if it's not.
1559 mutex_enter(&iptun
->iptun_lock
);
1560 iptun
->iptun_flags
&= ~IPTUN_DELETE_PENDING
;
1562 iptun
->iptun_flags
|= IPTUN_CONDEMNED
;
1563 cv_broadcast(&iptun
->iptun_enter_cv
);
1564 mutex_exit(&iptun
->iptun_lock
);
1567 * Note that there is no danger in calling iptun_free() after having
1568 * dropped the iptun_lock since callers of iptun_enter() at this point
1569 * are doing so from iptun_enter_by_linkid() (mac_disable() got rid of
1570 * threads entering from mac callbacks which call iptun_enter()
1571 * directly) which holds iptun_hash_lock, and iptun_free() grabs this
1572 * lock in order to remove the iptun_t from the hash table.
1581 iptun_modify(const iptun_kparams_t
*ik
, cred_t
*credp
)
1584 boolean_t laddr_change
= B_FALSE
, raddr_change
= B_FALSE
;
1587 if ((err
= iptun_enter_by_linkid(ik
->iptun_kparam_linkid
, &iptun
)) != 0)
1590 /* One cannot modify a tunnel that belongs to another zone. */
1591 if (iptun
->iptun_zoneid
!= crgetzoneid(credp
)) {
1596 /* The tunnel type cannot be changed */
1597 if (ik
->iptun_kparam_flags
& IPTUN_KPARAM_TYPE
) {
1602 if ((err
= iptun_setparams(iptun
, ik
)) != 0)
1604 iptun_headergen(iptun
, B_FALSE
);
1607 * If any of the tunnel's addresses has been modified and the tunnel
1608 * has the necessary addresses assigned to it, we need to try to bind
1609 * with ip underneath us. If we're not ready to bind yet, then we'll
1610 * try again when the addresses are modified later.
1612 laddr_change
= (ik
->iptun_kparam_flags
& IPTUN_KPARAM_LADDR
);
1613 raddr_change
= (ik
->iptun_kparam_flags
& IPTUN_KPARAM_RADDR
);
1614 if (laddr_change
|| raddr_change
) {
1615 if (iptun
->iptun_flags
& IPTUN_BOUND
)
1616 iptun_unbind(iptun
);
1617 if (iptun_canbind(iptun
) && (err
= iptun_bind(iptun
)) != 0) {
1619 iptun
->iptun_flags
&= ~IPTUN_LADDR
;
1621 iptun
->iptun_flags
&= ~IPTUN_RADDR
;
1627 iptun_task_dispatch(iptun
, IPTUN_TASK_LADDR_UPDATE
);
1629 iptun_task_dispatch(iptun
, IPTUN_TASK_RADDR_UPDATE
);
1636 /* Given an IP tunnel's datalink id, fill in its parameters. */
1638 iptun_info(iptun_kparams_t
*ik
, cred_t
*credp
)
1643 /* Is the tunnel link visible from the caller's zone? */
1644 if (!dls_devnet_islinkvisible(ik
->iptun_kparam_linkid
,
1645 crgetzoneid(credp
)))
1648 if ((err
= iptun_enter_by_linkid(ik
->iptun_kparam_linkid
, &iptun
)) != 0)
1651 bzero(ik
, sizeof (iptun_kparams_t
));
1653 ik
->iptun_kparam_linkid
= iptun
->iptun_linkid
;
1654 ik
->iptun_kparam_type
= iptun
->iptun_typeinfo
->iti_type
;
1655 ik
->iptun_kparam_flags
|= IPTUN_KPARAM_TYPE
;
1657 if (iptun
->iptun_flags
& IPTUN_LADDR
) {
1658 iptun_getaddr(&iptun
->iptun_laddr
, &ik
->iptun_kparam_laddr
);
1659 ik
->iptun_kparam_flags
|= IPTUN_KPARAM_LADDR
;
1661 if (iptun
->iptun_flags
& IPTUN_RADDR
) {
1662 iptun_getaddr(&iptun
->iptun_raddr
, &ik
->iptun_kparam_raddr
);
1663 ik
->iptun_kparam_flags
|= IPTUN_KPARAM_RADDR
;
1666 if (iptun
->iptun_flags
& IPTUN_IMPLICIT
)
1667 ik
->iptun_kparam_flags
|= IPTUN_KPARAM_IMPLICIT
;
1669 if (iptun
->iptun_itp
!= NULL
) {
1670 mutex_enter(&iptun
->iptun_itp
->itp_lock
);
1671 if (iptun
->iptun_itp
->itp_flags
& ITPF_P_ACTIVE
) {
1672 ik
->iptun_kparam_flags
|= IPTUN_KPARAM_IPSECPOL
;
1673 if (iptun
->iptun_flags
& IPTUN_SIMPLE_POLICY
) {
1674 ik
->iptun_kparam_flags
|= IPTUN_KPARAM_SECINFO
;
1675 ik
->iptun_kparam_secinfo
=
1676 iptun
->iptun_simple_policy
;
1679 mutex_exit(&iptun
->iptun_itp
->itp_lock
);
1688 iptun_set_6to4relay(netstack_t
*ns
, ipaddr_t relay_addr
)
1690 if (relay_addr
== INADDR_BROADCAST
|| CLASSD(relay_addr
))
1691 return (EADDRNOTAVAIL
);
1692 ns
->netstack_iptun
->iptuns_relay_rtr_addr
= relay_addr
;
1697 iptun_get_6to4relay(netstack_t
*ns
, ipaddr_t
*relay_addr
)
1699 *relay_addr
= ns
->netstack_iptun
->iptuns_relay_rtr_addr
;
1703 iptun_set_policy(datalink_id_t linkid
, ipsec_tun_pol_t
*itp
)
1707 if (iptun_enter_by_linkid(linkid
, &iptun
) != 0)
1709 if (iptun
->iptun_itp
!= itp
) {
1710 ASSERT(iptun
->iptun_itp
== NULL
);
1712 iptun
->iptun_itp
= itp
;
1715 * IPsec policy means IPsec overhead, which means lower MTU.
1716 * Refresh the MTU for this tunnel.
1718 (void) iptun_update_mtu(iptun
, NULL
, 0);
1723 * Obtain the path MTU to the tunnel destination.
1724 * Can return zero in some cases.
1727 iptun_get_dst_pmtu(iptun_t
*iptun
, ip_xmit_attr_t
*ixa
)
1730 conn_t
*connp
= iptun
->iptun_connp
;
1731 boolean_t need_rele
= B_FALSE
;
1734 * We only obtain the pmtu for tunnels that have a remote tunnel
1737 if (!(iptun
->iptun_flags
& IPTUN_RADDR
))
1741 ixa
= conn_get_ixa(connp
, B_FALSE
);
1747 * Guard against ICMP errors before we have sent, as well as against
1748 * and a thread which held conn_ixa.
1750 if (ixa
->ixa_ire
!= NULL
) {
1751 pmtu
= ip_get_pmtu(ixa
);
1754 * For both IPv4 and IPv6 we can have indication that the outer
1755 * header needs fragmentation.
1757 if (ixa
->ixa_flags
& IXAF_PMTU_TOO_SMALL
) {
1758 /* Must allow fragmentation in ip_output */
1759 ixa
->ixa_flags
&= ~IXAF_DONTFRAG
;
1760 } else if (iptun
->iptun_typeinfo
->iti_type
!= IPTUN_TYPE_6TO4
) {
1761 ixa
->ixa_flags
|= IXAF_DONTFRAG
;
1763 /* ip_get_pmtu might have set this - we don't want it */
1764 ixa
->ixa_flags
&= ~IXAF_PMTU_IPV4_DF
;
1774 * Update the ip_xmit_attr_t to capture the current lower path mtu as known
1778 iptun_update_dst_pmtu(iptun_t
*iptun
, ip_xmit_attr_t
*ixa
)
1781 conn_t
*connp
= iptun
->iptun_connp
;
1782 boolean_t need_rele
= B_FALSE
;
1784 /* IXAF_VERIFY_PMTU is not set if we don't have a fixed destination */
1785 if (!(iptun
->iptun_flags
& IPTUN_RADDR
))
1789 ixa
= conn_get_ixa(connp
, B_FALSE
);
1795 * Guard against ICMP errors before we have sent, as well as against
1796 * and a thread which held conn_ixa.
1798 if (ixa
->ixa_ire
!= NULL
) {
1799 pmtu
= ip_get_pmtu(ixa
);
1801 * Update ixa_fragsize and ixa_pmtu.
1803 ixa
->ixa_fragsize
= ixa
->ixa_pmtu
= pmtu
;
1806 * For both IPv4 and IPv6 we can have indication that the outer
1807 * header needs fragmentation.
1809 if (ixa
->ixa_flags
& IXAF_PMTU_TOO_SMALL
) {
1810 /* Must allow fragmentation in ip_output */
1811 ixa
->ixa_flags
&= ~IXAF_DONTFRAG
;
1812 } else if (iptun
->iptun_typeinfo
->iti_type
!= IPTUN_TYPE_6TO4
) {
1813 ixa
->ixa_flags
|= IXAF_DONTFRAG
;
1815 /* ip_get_pmtu might have set this - we don't want it */
1816 ixa
->ixa_flags
&= ~IXAF_PMTU_IPV4_DF
;
1825 * There is nothing that iptun can verify in addition to IP having
1826 * verified the IP addresses in the fanout.
1830 iptun_verifyicmp(conn_t
*connp
, void *arg2
, icmph_t
*icmph
, icmp6_t
*icmp6
,
1831 ip_recv_attr_t
*ira
)
1837 * Notify function registered with ip_xmit_attr_t.
1840 iptun_notify(void *arg
, ip_xmit_attr_t
*ixa
, ixa_notify_type_t ntype
,
1841 ixa_notify_arg_t narg
)
1843 iptun_t
*iptun
= (iptun_t
*)arg
;
1847 (void) iptun_update_mtu(iptun
, ixa
, narg
);
1853 * Returns the max of old_ovhd and the overhead associated with pol.
1856 iptun_max_policy_overhead(ipsec_policy_t
*pol
, uint32_t old_ovhd
)
1858 uint32_t new_ovhd
= old_ovhd
;
1860 while (pol
!= NULL
) {
1861 new_ovhd
= max(new_ovhd
,
1862 ipsec_act_ovhd(&pol
->ipsp_act
->ipa_act
));
1863 pol
= pol
->ipsp_hash
.hash_next
;
1869 iptun_get_ipsec_overhead(iptun_t
*iptun
)
1871 ipsec_policy_root_t
*ipr
;
1872 ipsec_policy_head_t
*iph
;
1873 ipsec_policy_t
*pol
;
1874 ipsec_selector_t sel
;
1876 uint32_t ipsec_ovhd
= 0;
1877 ipsec_tun_pol_t
*itp
= iptun
->iptun_itp
;
1878 netstack_t
*ns
= iptun
->iptun_ns
;
1880 if (itp
== NULL
|| !(itp
->itp_flags
& ITPF_P_ACTIVE
)) {
1882 * Consult global policy, just in case. This will only work
1883 * if we have both source and destination addresses to work
1886 if ((iptun
->iptun_flags
& (IPTUN_LADDR
|IPTUN_RADDR
)) !=
1887 (IPTUN_LADDR
|IPTUN_RADDR
))
1890 iph
= ipsec_system_policy(ns
);
1891 bzero(&sel
, sizeof (sel
));
1893 (iptun
->iptun_typeinfo
->iti_ipvers
== IPV4_VERSION
);
1894 switch (iptun
->iptun_typeinfo
->iti_ipvers
) {
1896 sel
.ips_local_addr_v4
= iptun
->iptun_laddr4
;
1897 sel
.ips_remote_addr_v4
= iptun
->iptun_raddr4
;
1900 sel
.ips_local_addr_v6
= iptun
->iptun_laddr6
;
1901 sel
.ips_remote_addr_v6
= iptun
->iptun_raddr6
;
1904 /* Check for both IPv4 and IPv6. */
1905 sel
.ips_protocol
= IPPROTO_ENCAP
;
1906 pol
= ipsec_find_policy_head(NULL
, iph
, IPSEC_TYPE_OUTBOUND
,
1909 ipsec_ovhd
= ipsec_act_ovhd(&pol
->ipsp_act
->ipa_act
);
1912 sel
.ips_protocol
= IPPROTO_IPV6
;
1913 pol
= ipsec_find_policy_head(NULL
, iph
, IPSEC_TYPE_OUTBOUND
,
1916 ipsec_ovhd
= max(ipsec_ovhd
,
1917 ipsec_act_ovhd(&pol
->ipsp_act
->ipa_act
));
1920 IPPH_REFRELE(iph
, ns
);
1923 * Look through all of the possible IPsec actions for the
1924 * tunnel, and find the largest potential IPsec overhead.
1926 iph
= itp
->itp_policy
;
1927 rw_enter(&iph
->iph_lock
, RW_READER
);
1928 ipr
= &(iph
->iph_root
[IPSEC_TYPE_OUTBOUND
]);
1929 ipsec_ovhd
= iptun_max_policy_overhead(
1930 ipr
->ipr_nonhash
[IPSEC_AF_V4
], 0);
1931 ipsec_ovhd
= iptun_max_policy_overhead(
1932 ipr
->ipr_nonhash
[IPSEC_AF_V6
], ipsec_ovhd
);
1933 for (i
= 0; i
< ipr
->ipr_nchains
; i
++) {
1934 ipsec_ovhd
= iptun_max_policy_overhead(
1935 ipr
->ipr_hash
[i
].hash_head
, ipsec_ovhd
);
1937 rw_exit(&iph
->iph_lock
);
1940 return (ipsec_ovhd
);
1944 * Calculate and return the maximum possible upper MTU for the given tunnel.
1946 * If new_pmtu is set then we also need to update the lower path MTU information
1947 * in the ip_xmit_attr_t. That is needed since we set IXAF_VERIFY_PMTU so that
1948 * we are notified by conn_ip_output() when the path MTU increases.
1951 iptun_get_maxmtu(iptun_t
*iptun
, ip_xmit_attr_t
*ixa
, uint32_t new_pmtu
)
1953 size_t header_size
, ipsec_overhead
;
1954 uint32_t maxmtu
, pmtu
;
1957 * Start with the path-MTU to the remote address, which is either
1958 * provided as the new_pmtu argument, or obtained using
1959 * iptun_get_dst_pmtu().
1961 if (new_pmtu
!= 0) {
1962 if (iptun
->iptun_flags
& IPTUN_RADDR
)
1963 iptun
->iptun_dpmtu
= new_pmtu
;
1965 } else if (iptun
->iptun_flags
& IPTUN_RADDR
) {
1966 if ((pmtu
= iptun_get_dst_pmtu(iptun
, ixa
)) == 0) {
1968 * We weren't able to obtain the path-MTU of the
1969 * destination. Use the previous value.
1971 pmtu
= iptun
->iptun_dpmtu
;
1973 iptun
->iptun_dpmtu
= pmtu
;
1977 * We have no path-MTU information to go on, use the maximum
1980 pmtu
= iptun
->iptun_typeinfo
->iti_maxmtu
;
1984 * Now calculate tunneling overhead and subtract that from the
1985 * path-MTU information obtained above.
1987 if (iptun
->iptun_header_size
!= 0) {
1988 header_size
= iptun
->iptun_header_size
;
1990 switch (iptun
->iptun_typeinfo
->iti_ipvers
) {
1992 header_size
= sizeof (ipha_t
);
1995 header_size
= sizeof (iptun_ipv6hdrs_t
);
2000 ipsec_overhead
= iptun_get_ipsec_overhead(iptun
);
2002 maxmtu
= pmtu
- (header_size
+ ipsec_overhead
);
2003 return (max(maxmtu
, iptun
->iptun_typeinfo
->iti_minmtu
));
2007 * Re-calculate the tunnel's MTU as seen from above and notify the MAC layer
2008 * of any change in MTU. The new_pmtu argument is the new lower path MTU to
2009 * the tunnel destination to be used in the tunnel MTU calculation. Passing
2010 * in 0 for new_pmtu causes the lower path MTU to be dynamically updated using
2013 * If the calculated tunnel MTU is different than its previous value, then we
2014 * notify the MAC layer above us of this change using mac_maxsdu_update().
2017 iptun_update_mtu(iptun_t
*iptun
, ip_xmit_attr_t
*ixa
, uint32_t new_pmtu
)
2021 /* We always update the ixa since we might have set IXAF_VERIFY_PMTU */
2022 iptun_update_dst_pmtu(iptun
, ixa
);
2025 * We return the current MTU without updating it if it was pegged to a
2026 * static value using the MAC_PROP_MTU link property.
2028 if (iptun
->iptun_flags
& IPTUN_FIXED_MTU
)
2029 return (iptun
->iptun_mtu
);
2031 /* If the MTU isn't fixed, then use the maximum possible value. */
2032 newmtu
= iptun_get_maxmtu(iptun
, ixa
, new_pmtu
);
2034 * We only dynamically adjust the tunnel MTU for tunnels with
2035 * destinations because dynamic MTU calculations are based on the
2036 * destination path-MTU.
2038 if ((iptun
->iptun_flags
& IPTUN_RADDR
) && newmtu
!= iptun
->iptun_mtu
) {
2039 iptun
->iptun_mtu
= newmtu
;
2040 if (iptun
->iptun_flags
& IPTUN_MAC_REGISTERED
)
2041 iptun_task_dispatch(iptun
, IPTUN_TASK_MTU_UPDATE
);
2048 * Frees a packet or packet chain and bumps stat for each freed packet.
2051 iptun_drop_pkt(mblk_t
*mp
, uint64_t *stat
)
2055 for (pktmp
= mp
; pktmp
!= NULL
; pktmp
= mp
) {
2057 pktmp
->b_next
= NULL
;
2059 atomic_inc_64(stat
);
2065 * Allocate and return a new mblk to hold an IP and ICMP header, and chain the
2066 * original packet to its b_cont. Returns NULL on failure.
2069 iptun_build_icmperr(size_t hdrs_size
, mblk_t
*orig_pkt
)
2073 if ((icmperr_mp
= allocb(hdrs_size
, BPRI_MED
)) != NULL
) {
2074 icmperr_mp
->b_wptr
+= hdrs_size
;
2075 /* tack on the offending packet */
2076 icmperr_mp
->b_cont
= orig_pkt
;
2078 return (icmperr_mp
);
2082 * Transmit an ICMP error. mp->b_rptr points at the packet to be included in
2086 iptun_sendicmp_v4(iptun_t
*iptun
, icmph_t
*icmp
, ipha_t
*orig_ipha
, mblk_t
*mp
)
2088 size_t orig_pktsize
, hdrs_size
;
2092 ip_xmit_attr_t ixas
;
2093 conn_t
*connp
= iptun
->iptun_connp
;
2095 orig_pktsize
= msgdsize(mp
);
2096 hdrs_size
= sizeof (ipha_t
) + sizeof (icmph_t
);
2097 if ((icmperr_mp
= iptun_build_icmperr(hdrs_size
, mp
)) == NULL
) {
2098 iptun_drop_pkt(mp
, &iptun
->iptun_noxmtbuf
);
2102 new_ipha
= (ipha_t
*)icmperr_mp
->b_rptr
;
2103 new_icmp
= (icmph_t
*)(new_ipha
+ 1);
2105 new_ipha
->ipha_version_and_hdr_length
= IP_SIMPLE_HDR_VERSION
;
2106 new_ipha
->ipha_type_of_service
= 0;
2107 new_ipha
->ipha_ident
= 0;
2108 new_ipha
->ipha_fragment_offset_and_flags
= 0;
2109 new_ipha
->ipha_ttl
= orig_ipha
->ipha_ttl
;
2110 new_ipha
->ipha_protocol
= IPPROTO_ICMP
;
2111 new_ipha
->ipha_src
= orig_ipha
->ipha_dst
;
2112 new_ipha
->ipha_dst
= orig_ipha
->ipha_src
;
2113 new_ipha
->ipha_hdr_checksum
= 0; /* will be computed by ip */
2114 new_ipha
->ipha_length
= htons(hdrs_size
+ orig_pktsize
);
2117 new_icmp
->icmph_checksum
= 0;
2118 new_icmp
->icmph_checksum
= IP_CSUM(icmperr_mp
, sizeof (ipha_t
), 0);
2120 bzero(&ixas
, sizeof (ixas
));
2121 ixas
.ixa_flags
= IXAF_BASIC_SIMPLE_V4
;
2122 if (new_ipha
->ipha_src
== INADDR_ANY
) {
2123 ixas
.ixa_flags
&= ~IXAF_VERIFY_SOURCE
;
2124 ixas
.ixa_flags
|= IXAF_SET_SOURCE
;
2127 ixas
.ixa_zoneid
= IPCL_ZONEID(connp
);
2128 ixas
.ixa_ipst
= connp
->conn_netstack
->netstack_ip
;
2129 ixas
.ixa_cred
= connp
->conn_cred
;
2130 ixas
.ixa_cpid
= NOPID
;
2132 ixas
.ixa_ifindex
= 0;
2133 ixas
.ixa_multicast_ttl
= IP_DEFAULT_MULTICAST_TTL
;
2135 (void) ip_output_simple(icmperr_mp
, &ixas
);
2140 iptun_sendicmp_v6(iptun_t
*iptun
, icmp6_t
*icmp6
, ip6_t
*orig_ip6h
, mblk_t
*mp
)
2142 size_t orig_pktsize
, hdrs_size
;
2143 mblk_t
*icmp6err_mp
;
2146 ip_xmit_attr_t ixas
;
2147 conn_t
*connp
= iptun
->iptun_connp
;
2149 orig_pktsize
= msgdsize(mp
);
2150 hdrs_size
= sizeof (ip6_t
) + sizeof (icmp6_t
);
2151 if ((icmp6err_mp
= iptun_build_icmperr(hdrs_size
, mp
)) == NULL
) {
2152 iptun_drop_pkt(mp
, &iptun
->iptun_noxmtbuf
);
2156 new_ip6h
= (ip6_t
*)icmp6err_mp
->b_rptr
;
2157 new_icmp6
= (icmp6_t
*)(new_ip6h
+ 1);
2159 new_ip6h
->ip6_vcf
= orig_ip6h
->ip6_vcf
;
2160 new_ip6h
->ip6_plen
= htons(sizeof (icmp6_t
) + orig_pktsize
);
2161 new_ip6h
->ip6_hops
= orig_ip6h
->ip6_hops
;
2162 new_ip6h
->ip6_nxt
= IPPROTO_ICMPV6
;
2163 new_ip6h
->ip6_src
= orig_ip6h
->ip6_dst
;
2164 new_ip6h
->ip6_dst
= orig_ip6h
->ip6_src
;
2166 *new_icmp6
= *icmp6
;
2167 /* The checksum is calculated in ip_output_simple and friends. */
2168 new_icmp6
->icmp6_cksum
= new_ip6h
->ip6_plen
;
2170 bzero(&ixas
, sizeof (ixas
));
2171 ixas
.ixa_flags
= IXAF_BASIC_SIMPLE_V6
;
2172 if (IN6_IS_ADDR_UNSPECIFIED(&new_ip6h
->ip6_src
)) {
2173 ixas
.ixa_flags
&= ~IXAF_VERIFY_SOURCE
;
2174 ixas
.ixa_flags
|= IXAF_SET_SOURCE
;
2177 ixas
.ixa_zoneid
= IPCL_ZONEID(connp
);
2178 ixas
.ixa_ipst
= connp
->conn_netstack
->netstack_ip
;
2179 ixas
.ixa_cred
= connp
->conn_cred
;
2180 ixas
.ixa_cpid
= NOPID
;
2182 ixas
.ixa_ifindex
= 0;
2183 ixas
.ixa_multicast_ttl
= IP_DEFAULT_MULTICAST_TTL
;
2185 (void) ip_output_simple(icmp6err_mp
, &ixas
);
2190 iptun_icmp_error_v4(iptun_t
*iptun
, ipha_t
*orig_ipha
, mblk_t
*mp
,
2191 uint8_t type
, uint8_t code
)
2195 bzero(&icmp
, sizeof (icmp
));
2196 icmp
.icmph_type
= type
;
2197 icmp
.icmph_code
= code
;
2199 iptun_sendicmp_v4(iptun
, &icmp
, orig_ipha
, mp
);
2203 iptun_icmp_fragneeded_v4(iptun_t
*iptun
, uint32_t newmtu
, ipha_t
*orig_ipha
,
2208 icmp
.icmph_type
= ICMP_DEST_UNREACHABLE
;
2209 icmp
.icmph_code
= ICMP_FRAGMENTATION_NEEDED
;
2210 icmp
.icmph_du_zero
= 0;
2211 icmp
.icmph_du_mtu
= htons(newmtu
);
2213 iptun_sendicmp_v4(iptun
, &icmp
, orig_ipha
, mp
);
2217 iptun_icmp_error_v6(iptun_t
*iptun
, ip6_t
*orig_ip6h
, mblk_t
*mp
,
2218 uint8_t type
, uint8_t code
, uint32_t offset
)
2222 bzero(&icmp6
, sizeof (icmp6
));
2223 icmp6
.icmp6_type
= type
;
2224 icmp6
.icmp6_code
= code
;
2225 if (type
== ICMP6_PARAM_PROB
)
2226 icmp6
.icmp6_pptr
= htonl(offset
);
2228 iptun_sendicmp_v6(iptun
, &icmp6
, orig_ip6h
, mp
);
2232 iptun_icmp_toobig_v6(iptun_t
*iptun
, uint32_t newmtu
, ip6_t
*orig_ip6h
,
2237 icmp6
.icmp6_type
= ICMP6_PACKET_TOO_BIG
;
2238 icmp6
.icmp6_code
= 0;
2239 icmp6
.icmp6_mtu
= htonl(newmtu
);
2241 iptun_sendicmp_v6(iptun
, &icmp6
, orig_ip6h
, mp
);
2245 * Determines if the packet pointed to by ipha or ip6h is an ICMP error. The
2246 * mp argument is only used to do bounds checking.
2249 is_icmp_error(mblk_t
*mp
, ipha_t
*ipha
, ip6_t
*ip6h
)
2256 ASSERT(ip6h
== NULL
);
2257 if (ipha
->ipha_protocol
!= IPPROTO_ICMP
)
2260 hlen
= IPH_HDR_LENGTH(ipha
);
2261 icmph
= (icmph_t
*)((uint8_t *)ipha
+ hlen
);
2262 return (ICMP_IS_ERROR(icmph
->icmph_type
) ||
2263 icmph
->icmph_type
== ICMP_REDIRECT
);
2268 ASSERT(ip6h
!= NULL
);
2269 if (!ip_hdr_length_nexthdr_v6(mp
, ip6h
, &hlen
, &nexthdrp
) ||
2270 *nexthdrp
!= IPPROTO_ICMPV6
) {
2274 icmp6
= (icmp6_t
*)((uint8_t *)ip6h
+ hlen
);
2275 return (ICMP6_IS_ERROR(icmp6
->icmp6_type
) ||
2276 icmp6
->icmp6_type
== ND_REDIRECT
);
2281 * Find inner and outer IP headers from a tunneled packet as setup for calls
2282 * into ipsec_tun_{in,out}bound().
2283 * Note that we need to allow the outer header to be in a separate mblk from
2285 * If the caller knows the outer_hlen, the caller passes it in. Otherwise zero.
2288 iptun_find_headers(mblk_t
*mp
, size_t outer_hlen
, ipha_t
**outer4
,
2289 ipha_t
**inner4
, ip6_t
**outer6
, ip6_t
**inner6
)
2292 size_t first_mblkl
= MBLKL(mp
);
2296 * Don't bother handling packets that don't have a full IP header in
2297 * the fist mblk. For the input path, the ip module ensures that this
2298 * won't happen, and on the output path, the IP tunneling MAC-type
2299 * plugins ensure that this also won't happen.
2301 if (first_mblkl
< sizeof (ipha_t
))
2303 ipha
= (ipha_t
*)(mp
->b_rptr
);
2304 switch (IPH_HDR_VERSION(ipha
)) {
2308 if (outer_hlen
== 0)
2309 outer_hlen
= IPH_HDR_LENGTH(ipha
);
2313 *outer6
= (ip6_t
*)ipha
;
2314 if (outer_hlen
== 0)
2315 outer_hlen
= ip_hdr_length_v6(mp
, (ip6_t
*)ipha
);
2321 if (first_mblkl
< outer_hlen
||
2322 (first_mblkl
== outer_hlen
&& mp
->b_cont
== NULL
))
2326 * We don't bother doing a pullup here since the outer header will
2327 * just get stripped off soon on input anyway. We just want to ensure
2328 * that the inner* pointer points to a full header.
2330 if (first_mblkl
== outer_hlen
) {
2331 inner_mp
= mp
->b_cont
;
2332 ipha
= (ipha_t
*)inner_mp
->b_rptr
;
2335 ipha
= (ipha_t
*)(mp
->b_rptr
+ outer_hlen
);
2337 switch (IPH_HDR_VERSION(ipha
)) {
2339 if (inner_mp
->b_wptr
- (uint8_t *)ipha
< sizeof (ipha_t
))
2345 if (inner_mp
->b_wptr
- (uint8_t *)ipha
< sizeof (ip6_t
))
2348 *inner6
= (ip6_t
*)ipha
;
2354 return (outer_hlen
);
2358 * Received ICMP error in response to an X over IPv4 packet that we
2361 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of
2364 * [IPv4(0)][ICMPv4][IPv4(1)][IPv4(2)][ULP]
2368 * [IPv4(0)][ICMPv4][IPv4(1)][IPv6][ULP]
2370 * And "outer4" will get set to IPv4(1), and inner[46] will correspond to
2371 * whatever the very-inner packet is (IPv4(2) or IPv6).
2374 iptun_input_icmp_v4(iptun_t
*iptun
, mblk_t
*data_mp
, icmph_t
*icmph
,
2375 ip_recv_attr_t
*ira
)
2378 ipha_t
*outer4
, *inner4
;
2379 ip6_t
*outer6
, *inner6
;
2383 ASSERT(data_mp
->b_cont
== NULL
);
2385 * Temporarily move b_rptr forward so that iptun_find_headers() can
2386 * find headers in the ICMP packet payload.
2388 orig
= data_mp
->b_rptr
;
2389 data_mp
->b_rptr
= (uint8_t *)(icmph
+ 1);
2391 * The ip module ensures that ICMP errors contain at least the
2392 * original IP header (otherwise, the error would never have made it
2395 ASSERT(MBLKL(data_mp
) >= 0);
2396 outer_hlen
= iptun_find_headers(data_mp
, 0, &outer4
, &inner4
, &outer6
,
2398 ASSERT(outer6
== NULL
);
2399 data_mp
->b_rptr
= orig
;
2400 if (outer_hlen
== 0) {
2401 iptun_drop_pkt(data_mp
, &iptun
->iptun_ierrors
);
2405 /* Only ICMP errors due to tunneled packets should reach here. */
2406 ASSERT(outer4
->ipha_protocol
== IPPROTO_ENCAP
||
2407 outer4
->ipha_protocol
== IPPROTO_IPV6
);
2409 data_mp
= ipsec_tun_inbound(ira
, data_mp
, iptun
->iptun_itp
,
2410 inner4
, inner6
, outer4
, outer6
, -outer_hlen
, iptun
->iptun_ns
);
2411 if (data_mp
== NULL
) {
2412 /* Callee did all of the freeing. */
2413 atomic_inc_64(&iptun
->iptun_ierrors
);
2416 /* We should never see reassembled fragment here. */
2417 ASSERT(data_mp
->b_next
== NULL
);
2419 data_mp
->b_rptr
= (uint8_t *)outer4
+ outer_hlen
;
2422 * If the original packet being transmitted was itself an ICMP error,
2423 * then drop this packet. We don't want to generate an ICMP error in
2424 * response to an ICMP error.
2426 if (is_icmp_error(data_mp
, inner4
, inner6
)) {
2427 iptun_drop_pkt(data_mp
, &iptun
->iptun_norcvbuf
);
2431 switch (icmph
->icmph_type
) {
2432 case ICMP_DEST_UNREACHABLE
:
2433 type
= (inner4
!= NULL
? icmph
->icmph_type
: ICMP6_DST_UNREACH
);
2434 switch (icmph
->icmph_code
) {
2435 case ICMP_FRAGMENTATION_NEEDED
: {
2439 * We reconcile this with the fact that the tunnel may
2440 * also have IPsec policy by letting iptun_update_mtu
2443 newmtu
= iptun_update_mtu(iptun
, NULL
,
2444 ntohs(icmph
->icmph_du_mtu
));
2446 if (inner4
!= NULL
) {
2447 iptun_icmp_fragneeded_v4(iptun
, newmtu
, inner4
,
2450 iptun_icmp_toobig_v6(iptun
, newmtu
, inner6
,
2455 case ICMP_DEST_NET_UNREACH_ADMIN
:
2456 case ICMP_DEST_HOST_UNREACH_ADMIN
:
2457 code
= (inner4
!= NULL
? ICMP_DEST_NET_UNREACH_ADMIN
:
2458 ICMP6_DST_UNREACH_ADMIN
);
2461 code
= (inner4
!= NULL
? ICMP_HOST_UNREACHABLE
:
2462 ICMP6_DST_UNREACH_ADDR
);
2466 case ICMP_TIME_EXCEEDED
:
2467 if (inner6
!= NULL
) {
2468 type
= ICMP6_TIME_EXCEEDED
;
2470 } /* else we're already set. */
2472 case ICMP_PARAM_PROBLEM
:
2474 * This is a problem with the outer header we transmitted.
2475 * Treat this as an output error.
2477 iptun_drop_pkt(data_mp
, &iptun
->iptun_oerrors
);
2480 iptun_drop_pkt(data_mp
, &iptun
->iptun_norcvbuf
);
2484 if (inner4
!= NULL
) {
2485 iptun_icmp_error_v4(iptun
, inner4
, data_mp
, type
, code
);
2487 iptun_icmp_error_v6(iptun
, inner6
, data_mp
, type
, code
, 0);
2492 * Return B_TRUE if the IPv6 packet pointed to by ip6h contains a Tunnel
2493 * Encapsulation Limit destination option. If there is one, set encaplim_ptr
2494 * to point to the option value.
2497 iptun_find_encaplimit(mblk_t
*mp
, ip6_t
*ip6h
, uint8_t **encaplim_ptr
)
2502 struct ip6_opt
*optp
;
2504 pkt
.ipp_fields
= 0; /* must be initialized */
2505 (void) ip_find_hdr_v6(mp
, ip6h
, &pkt
, NULL
);
2506 if ((pkt
.ipp_fields
& IPPF_DSTOPTS
) != 0) {
2507 destp
= pkt
.ipp_dstopts
;
2508 } else if ((pkt
.ipp_fields
& IPPF_RTHDRDSTOPTS
) != 0) {
2509 destp
= pkt
.ipp_rthdrdstopts
;
2514 endptr
= (uint8_t *)destp
+ 8 * (destp
->ip6d_len
+ 1);
2515 optp
= (struct ip6_opt
*)(destp
+ 1);
2516 while (endptr
- (uint8_t *)optp
> sizeof (*optp
)) {
2517 if (optp
->ip6o_type
== IP6OPT_TUNNEL_LIMIT
) {
2518 if ((uint8_t *)(optp
+ 1) >= endptr
)
2520 *encaplim_ptr
= (uint8_t *)&optp
[1];
2523 optp
= (struct ip6_opt
*)((uint8_t *)optp
+ optp
->ip6o_len
+ 2);
2529 * Received ICMPv6 error in response to an X over IPv6 packet that we
2532 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of
2535 * [IPv6(0)][ICMPv6][IPv6(1)][IPv4][ULP]
2539 * [IPv6(0)][ICMPv6][IPv6(1)][IPv6(2)][ULP]
2541 * And "outer6" will get set to IPv6(1), and inner[46] will correspond to
2542 * whatever the very-inner packet is (IPv4 or IPv6(2)).
2545 iptun_input_icmp_v6(iptun_t
*iptun
, mblk_t
*data_mp
, icmp6_t
*icmp6h
,
2546 ip_recv_attr_t
*ira
)
2549 ipha_t
*outer4
, *inner4
;
2550 ip6_t
*outer6
, *inner6
;
2554 ASSERT(data_mp
->b_cont
== NULL
);
2557 * Temporarily move b_rptr forward so that iptun_find_headers() can
2558 * find IP headers in the ICMP packet payload.
2560 orig
= data_mp
->b_rptr
;
2561 data_mp
->b_rptr
= (uint8_t *)(icmp6h
+ 1);
2563 * The ip module ensures that ICMP errors contain at least the
2564 * original IP header (otherwise, the error would never have made it
2567 ASSERT(MBLKL(data_mp
) >= 0);
2568 outer_hlen
= iptun_find_headers(data_mp
, 0, &outer4
, &inner4
, &outer6
,
2570 ASSERT(outer4
== NULL
);
2571 data_mp
->b_rptr
= orig
; /* Restore r_ptr */
2572 if (outer_hlen
== 0) {
2573 iptun_drop_pkt(data_mp
, &iptun
->iptun_ierrors
);
2577 data_mp
= ipsec_tun_inbound(ira
, data_mp
, iptun
->iptun_itp
,
2578 inner4
, inner6
, outer4
, outer6
, -outer_hlen
, iptun
->iptun_ns
);
2579 if (data_mp
== NULL
) {
2580 /* Callee did all of the freeing. */
2581 atomic_inc_64(&iptun
->iptun_ierrors
);
2584 /* We should never see reassembled fragment here. */
2585 ASSERT(data_mp
->b_next
== NULL
);
2587 data_mp
->b_rptr
= (uint8_t *)outer6
+ outer_hlen
;
2590 * If the original packet being transmitted was itself an ICMP error,
2591 * then drop this packet. We don't want to generate an ICMP error in
2592 * response to an ICMP error.
2594 if (is_icmp_error(data_mp
, inner4
, inner6
)) {
2595 iptun_drop_pkt(data_mp
, &iptun
->iptun_norcvbuf
);
2599 switch (icmp6h
->icmp6_type
) {
2600 case ICMP6_PARAM_PROB
: {
2601 uint8_t *encaplim_ptr
;
2604 * If the ICMPv6 error points to a valid Tunnel Encapsulation
2605 * Limit option and the limit value is 0, then fall through
2606 * and send a host unreachable message. Otherwise, treat the
2607 * error as an output error, as there must have been a problem
2608 * with a packet we sent.
2610 if (!iptun_find_encaplimit(data_mp
, outer6
, &encaplim_ptr
) ||
2611 (icmp6h
->icmp6_pptr
!=
2612 ((ptrdiff_t)encaplim_ptr
- (ptrdiff_t)outer6
)) ||
2613 *encaplim_ptr
!= 0) {
2614 iptun_drop_pkt(data_mp
, &iptun
->iptun_oerrors
);
2619 case ICMP6_TIME_EXCEEDED
:
2620 case ICMP6_DST_UNREACH
:
2621 type
= (inner4
!= NULL
? ICMP_DEST_UNREACHABLE
:
2623 code
= (inner4
!= NULL
? ICMP_HOST_UNREACHABLE
:
2624 ICMP6_DST_UNREACH_ADDR
);
2626 case ICMP6_PACKET_TOO_BIG
: {
2630 * We reconcile this with the fact that the tunnel may also
2631 * have IPsec policy by letting iptun_update_mtu take care of
2634 newmtu
= iptun_update_mtu(iptun
, NULL
,
2635 ntohl(icmp6h
->icmp6_mtu
));
2637 if (inner4
!= NULL
) {
2638 iptun_icmp_fragneeded_v4(iptun
, newmtu
, inner4
,
2641 iptun_icmp_toobig_v6(iptun
, newmtu
, inner6
, data_mp
);
2646 iptun_drop_pkt(data_mp
, &iptun
->iptun_norcvbuf
);
2650 if (inner4
!= NULL
) {
2651 iptun_icmp_error_v4(iptun
, inner4
, data_mp
, type
, code
);
2653 iptun_icmp_error_v6(iptun
, inner6
, data_mp
, type
, code
, 0);
2658 * Called as conn_recvicmp from IP for ICMP errors.
2662 iptun_input_icmp(void *arg
, mblk_t
*mp
, void *arg2
, ip_recv_attr_t
*ira
)
2664 conn_t
*connp
= arg
;
2665 iptun_t
*iptun
= connp
->conn_iptun
;
2669 ASSERT(IPCL_IS_IPTUN(connp
));
2671 if (mp
->b_cont
!= NULL
) {
2673 * Since ICMP error processing necessitates access to bits
2674 * that are within the ICMP error payload (the original packet
2675 * that caused the error), pull everything up into a single
2676 * block for convenience.
2678 if ((tmpmp
= msgpullup(mp
, -1)) == NULL
) {
2679 iptun_drop_pkt(mp
, &iptun
->iptun_norcvbuf
);
2686 hlen
= ira
->ira_ip_hdr_length
;
2687 switch (iptun
->iptun_typeinfo
->iti_ipvers
) {
2690 * The outer IP header coming up from IP is always ipha_t
2691 * alligned (otherwise, we would have crashed in ip).
2693 iptun_input_icmp_v4(iptun
, mp
, (icmph_t
*)(mp
->b_rptr
+ hlen
),
2697 iptun_input_icmp_v6(iptun
, mp
, (icmp6_t
*)(mp
->b_rptr
+ hlen
),
2704 iptun_in_6to4_ok(iptun_t
*iptun
, ipha_t
*outer4
, ip6_t
*inner6
)
2709 * It's possible that someone sent us an IPv4-in-IPv4 packet with the
2710 * IPv4 address of a 6to4 tunnel as the destination.
2716 * Make sure that the IPv6 destination is within the site that this
2717 * 6to4 tunnel is routing for. We don't want people bouncing random
2718 * tunneled IPv6 packets through this 6to4 router.
2720 IN6_6TO4_TO_V4ADDR(&inner6
->ip6_dst
, (struct in_addr
*)&v4addr
);
2721 if (outer4
->ipha_dst
!= v4addr
)
2724 if (IN6_IS_ADDR_6TO4(&inner6
->ip6_src
)) {
2726 * Section 9 of RFC 3056 (security considerations) suggests
2727 * that when a packet is from a 6to4 site (i.e., it's not a
2728 * global address being forwarded froma relay router), make
2729 * sure that the packet was tunneled by that site's 6to4
2732 IN6_6TO4_TO_V4ADDR(&inner6
->ip6_src
, (struct in_addr
*)&v4addr
);
2733 if (outer4
->ipha_src
!= v4addr
)
2737 * Only accept packets from a relay router if we've configured
2738 * outbound relay router functionality.
2740 if (iptun
->iptun_iptuns
->iptuns_relay_rtr_addr
== INADDR_ANY
)
2748 * Input function for everything that comes up from the ip module below us.
2749 * This is called directly from the ip module via connp->conn_recv().
2751 * We receive M_DATA messages with IP-in-IP tunneled packets.
2755 iptun_input(void *arg
, mblk_t
*data_mp
, void *arg2
, ip_recv_attr_t
*ira
)
2757 conn_t
*connp
= arg
;
2758 iptun_t
*iptun
= connp
->conn_iptun
;
2760 ipha_t
*outer4
, *inner4
;
2761 ip6_t
*outer6
, *inner6
;
2763 ASSERT(IPCL_IS_IPTUN(connp
));
2764 ASSERT(DB_TYPE(data_mp
) == M_DATA
);
2766 outer_hlen
= iptun_find_headers(data_mp
, ira
->ira_ip_hdr_length
,
2767 &outer4
, &inner4
, &outer6
, &inner6
);
2768 if (outer_hlen
== 0)
2771 data_mp
= ipsec_tun_inbound(ira
, data_mp
, iptun
->iptun_itp
,
2772 inner4
, inner6
, outer4
, outer6
, outer_hlen
, iptun
->iptun_ns
);
2773 if (data_mp
== NULL
) {
2774 /* Callee did all of the freeing. */
2778 if (iptun
->iptun_typeinfo
->iti_type
== IPTUN_TYPE_6TO4
&&
2779 !iptun_in_6to4_ok(iptun
, outer4
, inner6
))
2783 * We need to statistically account for each packet individually, so
2784 * we might as well split up any b_next chains here.
2789 mp
= data_mp
->b_next
;
2790 data_mp
->b_next
= NULL
;
2792 atomic_inc_64(&iptun
->iptun_ipackets
);
2793 atomic_add_64(&iptun
->iptun_rbytes
, msgdsize(data_mp
));
2794 mac_rx(iptun
->iptun_mh
, NULL
, data_mp
);
2797 } while (data_mp
!= NULL
);
2800 iptun_drop_pkt(data_mp
, &iptun
->iptun_ierrors
);
2804 * Do 6to4-specific header-processing on output. Return B_TRUE if the packet
2805 * was processed without issue, or B_FALSE if the packet had issues and should
2809 iptun_out_process_6to4(iptun_t
*iptun
, ipha_t
*outer4
, ip6_t
*inner6
)
2814 * IPv6 source must be a 6to4 address. This is because a conscious
2815 * decision was made to not allow a Solaris system to be used as a
2816 * relay router (for security reasons) when 6to4 was initially
2817 * integrated. If this decision is ever reversed, the following check
2820 if (!IN6_IS_ADDR_6TO4(&inner6
->ip6_src
))
2824 * RFC3056 mandates that the IPv4 source MUST be set to the IPv4
2825 * portion of the 6to4 IPv6 source address. In other words, make sure
2826 * that we're tunneling packets from our own 6to4 site.
2828 IN6_6TO4_TO_V4ADDR(&inner6
->ip6_src
, (struct in_addr
*)&v4addr
);
2829 if (outer4
->ipha_src
!= v4addr
)
2833 * Automatically set the destination of the outer IPv4 header as
2834 * described in RFC3056. There are two possibilities:
2836 * a. If the IPv6 destination is a 6to4 address, set the IPv4 address
2837 * to the IPv4 portion of the 6to4 address.
2838 * b. If the IPv6 destination is a native IPv6 address, set the IPv4
2839 * destination to the address of a relay router.
2841 * Design Note: b shouldn't be necessary here, and this is a flaw in
2842 * the design of the 6to4relay command. Instead of setting a 6to4
2843 * relay address in this module via an ioctl, the 6to4relay command
2844 * could simply add a IPv6 route for native IPv6 addresses (such as a
2845 * default route) in the forwarding table that uses a 6to4 destination
2846 * as its next hop, and the IPv4 portion of that address could be a
2847 * 6to4 relay address. In order for this to work, IP would have to
2848 * resolve the next hop address, which would necessitate a link-layer
2849 * address resolver for 6to4 links, which doesn't exist today.
2851 * In fact, if a resolver existed for 6to4 links, then setting the
2852 * IPv4 destination in the outer header could be done as part of
2853 * link-layer address resolution and fast-path header generation, and
2856 if (IN6_IS_ADDR_6TO4(&inner6
->ip6_dst
)) {
2857 /* destination is a 6to4 router */
2858 IN6_6TO4_TO_V4ADDR(&inner6
->ip6_dst
,
2859 (struct in_addr
*)&outer4
->ipha_dst
);
2861 /* Reject attempts to send to INADDR_ANY */
2862 if (outer4
->ipha_dst
== INADDR_ANY
)
2866 * The destination is a native IPv6 address. If output to a
2867 * relay-router is enabled, use the relay-router's IPv4
2868 * address as the destination.
2870 if (iptun
->iptun_iptuns
->iptuns_relay_rtr_addr
== INADDR_ANY
)
2872 outer4
->ipha_dst
= iptun
->iptun_iptuns
->iptuns_relay_rtr_addr
;
2876 * If the outer source and destination are equal, this means that the
2877 * 6to4 router somehow forwarded an IPv6 packet destined for its own
2878 * 6to4 site to its 6to4 tunnel interface, which will result in this
2879 * packet infinitely bouncing between ip and iptun.
2881 return (outer4
->ipha_src
!= outer4
->ipha_dst
);
2885 * Process output packets with outer IPv4 headers. Frees mp and bumps stat on
2889 iptun_out_process_ipv4(iptun_t
*iptun
, mblk_t
*mp
, ipha_t
*outer4
,
2890 ipha_t
*inner4
, ip6_t
*inner6
, ip_xmit_attr_t
*ixa
)
2892 uint8_t *innerptr
= (inner4
!= NULL
?
2893 (uint8_t *)inner4
: (uint8_t *)inner6
);
2894 size_t minmtu
= iptun
->iptun_typeinfo
->iti_minmtu
;
2896 if (inner4
!= NULL
) {
2897 ASSERT(outer4
->ipha_protocol
== IPPROTO_ENCAP
);
2899 * Copy the tos from the inner IPv4 header. We mask off ECN
2900 * bits (bits 6 and 7) because there is currently no
2901 * tunnel-tunnel communication to determine if both sides
2902 * support ECN. We opt for the safe choice: don't copy the
2903 * ECN bits when doing encapsulation.
2905 outer4
->ipha_type_of_service
=
2906 inner4
->ipha_type_of_service
& ~0x03;
2908 ASSERT(outer4
->ipha_protocol
== IPPROTO_IPV6
&&
2911 if (ixa
->ixa_flags
& IXAF_PMTU_IPV4_DF
)
2912 outer4
->ipha_fragment_offset_and_flags
|= IPH_DF_HTONS
;
2914 outer4
->ipha_fragment_offset_and_flags
&= ~IPH_DF_HTONS
;
2917 * As described in section 3.2.2 of RFC4213, if the packet payload is
2918 * less than or equal to the minimum MTU size, then we need to allow
2919 * IPv4 to fragment the packet. The reason is that even if we end up
2920 * receiving an ICMP frag-needed, the interface above this tunnel
2921 * won't be allowed to drop its MTU as a result, since the packet was
2922 * already smaller than the smallest allowable MTU for that interface.
2924 if (mp
->b_wptr
- innerptr
<= minmtu
) {
2925 outer4
->ipha_fragment_offset_and_flags
= 0;
2926 ixa
->ixa_flags
&= ~IXAF_DONTFRAG
;
2927 } else if (!(ixa
->ixa_flags
& IXAF_PMTU_TOO_SMALL
) &&
2928 (iptun
->iptun_typeinfo
->iti_type
!= IPTUN_TYPE_6TO4
)) {
2929 ixa
->ixa_flags
|= IXAF_DONTFRAG
;
2932 ixa
->ixa_ip_hdr_length
= IPH_HDR_LENGTH(outer4
);
2933 ixa
->ixa_pktlen
= msgdsize(mp
);
2934 ixa
->ixa_protocol
= outer4
->ipha_protocol
;
2936 outer4
->ipha_length
= htons(ixa
->ixa_pktlen
);
2941 * Insert an encapsulation limit destination option in the packet provided.
2942 * Always consumes the mp argument and returns a new mblk pointer.
2945 iptun_insert_encaplimit(iptun_t
*iptun
, mblk_t
*mp
, ip6_t
*outer6
,
2949 iptun_ipv6hdrs_t
*newouter6
;
2951 ASSERT(outer6
->ip6_nxt
== IPPROTO_IPV6
);
2952 ASSERT(mp
->b_cont
== NULL
);
2954 mp
->b_rptr
+= sizeof (ip6_t
);
2955 newmp
= allocb(sizeof (iptun_ipv6hdrs_t
) + MBLKL(mp
), BPRI_MED
);
2956 if (newmp
== NULL
) {
2957 iptun_drop_pkt(mp
, &iptun
->iptun_noxmtbuf
);
2960 newmp
->b_wptr
+= sizeof (iptun_ipv6hdrs_t
);
2961 /* Copy the payload (Starting with the inner IPv6 header). */
2962 bcopy(mp
->b_rptr
, newmp
->b_wptr
, MBLKL(mp
));
2963 newmp
->b_wptr
+= MBLKL(mp
);
2964 newouter6
= (iptun_ipv6hdrs_t
*)newmp
->b_rptr
;
2965 /* Now copy the outer IPv6 header. */
2966 bcopy(outer6
, &newouter6
->it6h_ip6h
, sizeof (ip6_t
));
2967 newouter6
->it6h_ip6h
.ip6_nxt
= IPPROTO_DSTOPTS
;
2968 newouter6
->it6h_encaplim
= iptun_encaplim_init
;
2969 newouter6
->it6h_encaplim
.iel_destopt
.ip6d_nxt
= outer6
->ip6_nxt
;
2970 newouter6
->it6h_encaplim
.iel_telopt
.ip6ot_encap_limit
= limit
;
2973 * The payload length will be set at the end of
2974 * iptun_out_process_ipv6().
2982 * Process output packets with outer IPv6 headers. Frees mp and bumps stats
2986 iptun_out_process_ipv6(iptun_t
*iptun
, mblk_t
*mp
, ip6_t
*outer6
,
2987 ipha_t
*inner4
, ip6_t
*inner6
, ip_xmit_attr_t
*ixa
)
2989 uint8_t *innerptr
= (inner4
!= NULL
?
2990 (uint8_t *)inner4
: (uint8_t *)inner6
);
2991 size_t minmtu
= iptun
->iptun_typeinfo
->iti_minmtu
;
2992 uint8_t *limit
, *configlimit
;
2994 iptun_ipv6hdrs_t
*v6hdrs
;
2996 if (inner6
!= NULL
&& iptun_find_encaplimit(mp
, inner6
, &limit
)) {
2998 * The inner packet is an IPv6 packet which itself contains an
2999 * encapsulation limit option. The limit variable points to
3000 * the value in the embedded option. Process the
3001 * encapsulation limit option as specified in RFC 2473.
3003 * If limit is 0, then we've exceeded the limit and we need to
3004 * send back an ICMPv6 parameter problem message.
3006 * If limit is > 0, then we decrement it by 1 and make sure
3007 * that the encapsulation limit option in the outer header
3008 * reflects that (adding an option if one isn't already
3011 ASSERT(limit
> mp
->b_rptr
&& limit
< mp
->b_wptr
);
3013 mp
->b_rptr
= (uint8_t *)inner6
;
3014 offset
= limit
- mp
->b_rptr
;
3015 iptun_icmp_error_v6(iptun
, inner6
, mp
, ICMP6_PARAM_PROB
,
3017 atomic_inc_64(&iptun
->iptun_noxmtbuf
);
3022 * The outer header requires an encapsulation limit option.
3023 * If there isn't one already, add one.
3025 if (iptun
->iptun_encaplimit
== 0) {
3026 if ((mp
= iptun_insert_encaplimit(iptun
, mp
, outer6
,
3027 (*limit
- 1))) == NULL
)
3029 v6hdrs
= (iptun_ipv6hdrs_t
*)mp
->b_rptr
;
3032 * There is an existing encapsulation limit option in
3033 * the outer header. If the inner encapsulation limit
3034 * is less than the configured encapsulation limit,
3035 * update the outer encapsulation limit to reflect
3036 * this lesser value.
3038 v6hdrs
= (iptun_ipv6hdrs_t
*)mp
->b_rptr
;
3040 &v6hdrs
->it6h_encaplim
.iel_telopt
.ip6ot_encap_limit
;
3041 if ((*limit
- 1) < *configlimit
)
3042 *configlimit
= (*limit
- 1);
3044 ixa
->ixa_ip_hdr_length
= sizeof (iptun_ipv6hdrs_t
);
3045 ixa
->ixa_protocol
= v6hdrs
->it6h_encaplim
.iel_destopt
.ip6d_nxt
;
3047 ixa
->ixa_ip_hdr_length
= sizeof (ip6_t
);
3048 ixa
->ixa_protocol
= outer6
->ip6_nxt
;
3051 * See iptun_output_process_ipv4() why we allow fragmentation for
3054 if (mp
->b_wptr
- innerptr
<= minmtu
)
3055 ixa
->ixa_flags
&= ~IXAF_DONTFRAG
;
3056 else if (!(ixa
->ixa_flags
& IXAF_PMTU_TOO_SMALL
))
3057 ixa
->ixa_flags
|= IXAF_DONTFRAG
;
3059 ixa
->ixa_pktlen
= msgdsize(mp
);
3060 outer6
->ip6_plen
= htons(ixa
->ixa_pktlen
- sizeof (ip6_t
));
3065 * The IP tunneling MAC-type plugins have already done most of the header
3066 * processing and validity checks. We are simply responsible for multiplexing
3067 * down to the ip module below us.
3070 iptun_output(iptun_t
*iptun
, mblk_t
*mp
)
3072 conn_t
*connp
= iptun
->iptun_connp
;
3075 ip_xmit_attr_t
*ixa
;
3077 ASSERT(mp
->b_datap
->db_type
== M_DATA
);
3079 if (mp
->b_cont
!= NULL
) {
3080 if ((newmp
= msgpullup(mp
, -1)) == NULL
) {
3081 iptun_drop_pkt(mp
, &iptun
->iptun_noxmtbuf
);
3088 if (iptun
->iptun_typeinfo
->iti_type
== IPTUN_TYPE_6TO4
) {
3089 iptun_output_6to4(iptun
, mp
);
3094 * If no other thread is using conn_ixa this just gets a
3095 * reference to conn_ixa. Otherwise we get a safe copy of
3098 ixa
= conn_get_ixa(connp
, B_FALSE
);
3100 iptun_drop_pkt(mp
, &iptun
->iptun_oerrors
);
3105 * In case we got a safe copy of conn_ixa, then we need
3106 * to fill in any pointers in it.
3108 if (ixa
->ixa_ire
== NULL
) {
3109 error
= ip_attr_connect(connp
, ixa
, &connp
->conn_saddr_v6
,
3110 &connp
->conn_faddr_v6
, &connp
->conn_faddr_v6
, 0,
3113 if (ixa
->ixa_ire
!= NULL
&&
3114 (error
== EHOSTUNREACH
|| error
== ENETUNREACH
)) {
3116 * Let conn_ip_output/ire_send_noroute return
3117 * the error and send any local ICMP error.
3122 iptun_drop_pkt(mp
, &iptun
->iptun_oerrors
);
3128 iptun_output_common(iptun
, ixa
, mp
);
3133 * We use an ixa based on the last destination.
3136 iptun_output_6to4(iptun_t
*iptun
, mblk_t
*mp
)
3138 conn_t
*connp
= iptun
->iptun_connp
;
3139 ipha_t
*outer4
, *inner4
;
3140 ip6_t
*outer6
, *inner6
;
3141 ip_xmit_attr_t
*ixa
;
3142 ip_xmit_attr_t
*oldixa
;
3144 boolean_t need_connect
;
3147 ASSERT(mp
->b_cont
== NULL
); /* Verified by iptun_output */
3149 /* Make sure we set ipha_dst before we look at ipha_dst */
3151 (void) iptun_find_headers(mp
, 0, &outer4
, &inner4
, &outer6
, &inner6
);
3152 ASSERT(outer4
!= NULL
);
3153 if (!iptun_out_process_6to4(iptun
, outer4
, inner6
)) {
3154 iptun_drop_pkt(mp
, &iptun
->iptun_oerrors
);
3159 * If no other thread is using conn_ixa this just gets a
3160 * reference to conn_ixa. Otherwise we get a safe copy of
3163 ixa
= conn_get_ixa(connp
, B_FALSE
);
3165 iptun_drop_pkt(mp
, &iptun
->iptun_oerrors
);
3169 mutex_enter(&connp
->conn_lock
);
3170 if (connp
->conn_v4lastdst
== outer4
->ipha_dst
) {
3171 need_connect
= (ixa
->ixa_ire
== NULL
);
3173 /* TODO: do we need to do this? */
3174 ip_attr_newdst(ixa
);
3177 * We later update conn_ixa when we update conn_v4lastdst
3178 * which enables subsequent packets to avoid redoing
3181 need_connect
= B_TRUE
;
3183 mutex_exit(&connp
->conn_lock
);
3186 * In case we got a safe copy of conn_ixa, or otherwise we don't
3187 * have a current ixa_ire, then we need to fill in any pointers in
3191 IN6_IPADDR_TO_V4MAPPED(outer4
->ipha_dst
, &v6dst
);
3193 /* We handle IPsec in iptun_output_common */
3194 error
= ip_attr_connect(connp
, ixa
, &connp
->conn_saddr_v6
,
3195 &v6dst
, &v6dst
, 0, NULL
, NULL
, 0);
3197 if (ixa
->ixa_ire
!= NULL
&&
3198 (error
== EHOSTUNREACH
|| error
== ENETUNREACH
)) {
3200 * Let conn_ip_output/ire_send_noroute return
3201 * the error and send any local ICMP error.
3206 iptun_drop_pkt(mp
, &iptun
->iptun_oerrors
);
3212 iptun_output_common(iptun
, ixa
, mp
);
3214 /* Atomically replace conn_ixa and conn_v4lastdst */
3215 mutex_enter(&connp
->conn_lock
);
3216 if (connp
->conn_v4lastdst
!= outer4
->ipha_dst
) {
3217 /* Remember the dst which corresponds to conn_ixa */
3218 connp
->conn_v6lastdst
= v6dst
;
3219 oldixa
= conn_replace_ixa(connp
, ixa
);
3223 mutex_exit(&connp
->conn_lock
);
3226 ixa_refrele(oldixa
);
3230 iptun_output_common(iptun_t
*iptun
, ip_xmit_attr_t
*ixa
, mblk_t
*mp
)
3232 ipsec_tun_pol_t
*itp
= iptun
->iptun_itp
;
3235 ipha_t
*outer4
, *inner4
;
3236 ip6_t
*outer6
, *inner6
;
3238 boolean_t update_pktlen
;
3240 ASSERT(ixa
->ixa_ire
!= NULL
);
3242 outer_hlen
= iptun_find_headers(mp
, 0, &outer4
, &inner4
, &outer6
,
3244 if (outer_hlen
== 0) {
3245 iptun_drop_pkt(mp
, &iptun
->iptun_oerrors
);
3249 /* Save IXAF_DONTFRAG value */
3250 iaflags_t dontfrag
= ixa
->ixa_flags
& IXAF_DONTFRAG
;
3252 /* Perform header processing. */
3253 if (outer4
!= NULL
) {
3254 mp
= iptun_out_process_ipv4(iptun
, mp
, outer4
, inner4
, inner6
,
3257 mp
= iptun_out_process_ipv6(iptun
, mp
, outer6
, inner4
, inner6
,
3264 * Let's hope the compiler optimizes this with "branch taken".
3266 if (itp
!= NULL
&& (itp
->itp_flags
& ITPF_P_ACTIVE
)) {
3267 /* This updates the ip_xmit_attr_t */
3268 mp
= ipsec_tun_outbound(mp
, iptun
, inner4
, inner6
, outer4
,
3269 outer6
, outer_hlen
, ixa
);
3271 atomic_inc_64(&iptun
->iptun_oerrors
);
3276 * ipsec_tun_outbound() returns a chain of tunneled IP
3277 * fragments linked with b_next (or a single message if the
3278 * tunneled packet wasn't a fragment).
3279 * If fragcache returned a list then we need to update
3280 * ixa_pktlen for all packets in the list.
3282 update_pktlen
= (mp
->b_next
!= NULL
);
3285 * Otherwise, we're good to go. The ixa has been updated with
3286 * instructions for outbound IPsec processing.
3288 for (newmp
= mp
; newmp
!= NULL
; newmp
= mp
) {
3289 size_t minmtu
= iptun
->iptun_typeinfo
->iti_minmtu
;
3291 atomic_inc_64(&iptun
->iptun_opackets
);
3292 atomic_add_64(&iptun
->iptun_obytes
, ixa
->ixa_pktlen
);
3294 newmp
->b_next
= NULL
;
3297 * The IXAF_DONTFRAG flag is global, but there is
3298 * a chain here. Check if we're really already
3299 * smaller than the minimum allowed MTU and reset here
3300 * appropriately. Otherwise one small packet can kill
3301 * the whole chain's path mtu discovery.
3302 * In addition, update the pktlen to the length of
3303 * the actual packet being processed.
3305 if (update_pktlen
) {
3306 ixa
->ixa_pktlen
= msgdsize(newmp
);
3307 if (ixa
->ixa_pktlen
<= minmtu
)
3308 ixa
->ixa_flags
&= ~IXAF_DONTFRAG
;
3311 atomic_inc_64(&iptun
->iptun_opackets
);
3312 atomic_add_64(&iptun
->iptun_obytes
, ixa
->ixa_pktlen
);
3314 error
= conn_ip_output(newmp
, ixa
);
3316 /* Restore IXAF_DONTFRAG value */
3317 ixa
->ixa_flags
|= dontfrag
;
3319 if (error
== EMSGSIZE
) {
3320 /* IPsec policy might have changed */
3321 (void) iptun_update_mtu(iptun
, ixa
, 0);
3326 * The ip module will potentially apply global policy to the
3327 * packet in its output path if there's no active tunnel
3330 ASSERT(ixa
->ixa_ipsec_policy
== NULL
);
3331 mp
= ip_output_attach_policy(mp
, outer4
, outer6
, NULL
, ixa
);
3333 atomic_inc_64(&iptun
->iptun_oerrors
);
3337 atomic_inc_64(&iptun
->iptun_opackets
);
3338 atomic_add_64(&iptun
->iptun_obytes
, ixa
->ixa_pktlen
);
3340 error
= conn_ip_output(mp
, ixa
);
3341 if (error
== EMSGSIZE
) {
3342 /* IPsec policy might have changed */
3343 (void) iptun_update_mtu(iptun
, ixa
, 0);
3346 if (ixa
->ixa_flags
& IXAF_IPSEC_SECURE
)
3347 ipsec_out_release_refs(ixa
);
3350 static mac_callbacks_t iptun_m_callbacks
= {
3351 .mc_callbacks
= (MC_SETPROP
| MC_GETPROP
| MC_PROPINFO
),
3352 .mc_getstat
= iptun_m_getstat
,
3353 .mc_start
= iptun_m_start
,
3354 .mc_stop
= iptun_m_stop
,
3355 .mc_setpromisc
= iptun_m_setpromisc
,
3356 .mc_multicst
= iptun_m_multicst
,
3357 .mc_unicst
= iptun_m_unicst
,
3358 .mc_tx
= iptun_m_tx
,
3359 .mc_reserved
= NULL
,
3360 .mc_setprop
= iptun_m_setprop
,
3361 .mc_getprop
= iptun_m_getprop
,
3362 .mc_propinfo
= iptun_m_propinfo