7888 installboot: print version info of the file
[unleashed.git] / kernel / net / iptun / iptun.c
blob658bbbb48c9bae07536b168b563ad8596a7c57f1
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2016, Joyent, Inc. All rights reserved.
27 * iptun - IP Tunneling Driver
29 * This module is a GLDv3 driver that implements virtual datalinks over IP
30 * (a.k.a, IP tunneling). The datalinks are managed through a dld ioctl
31 * interface (see iptun_ctl.c), and registered with GLDv3 using
32 * mac_register(). It implements the logic for various forms of IP (IPv4 or
33 * IPv6) encapsulation within IP (IPv4 or IPv6) by interacting with the ip
34 * module below it. Each virtual IP tunnel datalink has a conn_t associated
35 * with it representing the "outer" IP connection.
37 * The module implements the following locking semantics:
39 * Lookups and deletions in iptun_hash are synchronized using iptun_hash_lock.
40 * See comments above iptun_hash_lock for details.
42 * No locks are ever held while calling up to GLDv3. The general architecture
43 * of GLDv3 requires this, as the mac perimeter (essentially a lock) for a
44 * given link will be held while making downcalls (iptun_m_*() callbacks).
45 * Because we need to hold locks while handling downcalls, holding these locks
46 * while issuing upcalls results in deadlock scenarios. See the block comment
47 * above iptun_task_cb() for details on how we safely issue upcalls without
48 * holding any locks.
50 * The contents of each iptun_t is protected by an iptun_mutex which is held
51 * in iptun_enter() (called by iptun_enter_by_linkid()), and exited in
52 * iptun_exit().
54 * See comments in iptun_delete() and iptun_free() for details on how the
55 * iptun_t is deleted safely.
58 #include <sys/types.h>
59 #include <sys/kmem.h>
60 #include <sys/errno.h>
61 #include <sys/modhash.h>
62 #include <sys/list.h>
63 #include <sys/strsun.h>
64 #include <sys/file.h>
65 #include <sys/systm.h>
66 #include <sys/tihdr.h>
67 #include <sys/param.h>
68 #include <sys/mac_provider.h>
69 #include <sys/mac_ipv4.h>
70 #include <sys/mac_ipv6.h>
71 #include <sys/mac_6to4.h>
72 #include <sys/sunldi.h>
73 #include <netinet/in.h>
74 #include <netinet/ip6.h>
75 #include <inet/ip.h>
76 #include <inet/ip_ire.h>
77 #include <inet/ipsec_impl.h>
78 #include <inet/iptun.h>
79 #include <inet/iptun/iptun_impl.h>
81 /* Do the tunnel type and address family match? */
82 #define IPTUN_ADDR_MATCH(iptun_type, family) \
83 ((iptun_type == IPTUN_TYPE_IPV4 && family == AF_INET) || \
84 (iptun_type == IPTUN_TYPE_IPV6 && family == AF_INET6) || \
85 (iptun_type == IPTUN_TYPE_6TO4 && family == AF_INET))
87 #define IPTUN_HASH_KEY(key) ((mod_hash_key_t)(uintptr_t)(key))
89 #define IPTUN_MIN_IPV4_MTU 576 /* ip.h still uses 68 (!) */
90 #define IPTUN_MIN_IPV6_MTU IPV6_MIN_MTU
91 #define IPTUN_MAX_IPV4_MTU (IP_MAXPACKET - sizeof (ipha_t))
92 #define IPTUN_MAX_IPV6_MTU (IP_MAXPACKET - sizeof (ip6_t) - \
93 sizeof (iptun_encaplim_t))
95 #define IPTUN_MIN_HOPLIMIT 1
96 #define IPTUN_MAX_HOPLIMIT UINT8_MAX
98 #define IPTUN_MIN_ENCAPLIMIT 0
99 #define IPTUN_MAX_ENCAPLIMIT UINT8_MAX
101 #define IPTUN_IPSEC_REQ_MASK (IPSEC_PREF_REQUIRED | IPSEC_PREF_NEVER)
103 static iptun_encaplim_t iptun_encaplim_init = {
104 { IPPROTO_NONE, 0 },
105 IP6OPT_TUNNEL_LIMIT,
107 IPTUN_DEFAULT_ENCAPLIMIT, /* filled in with actual value later */
108 IP6OPT_PADN,
114 * Table containing per-iptun-type information.
115 * Since IPv6 can run over all of these we have the IPv6 min as the min MTU.
117 static iptun_typeinfo_t iptun_type_table[] = {
118 { IPTUN_TYPE_IPV4, MAC_PLUGIN_IDENT_IPV4, IPV4_VERSION,
119 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV4_MTU, B_TRUE },
120 { IPTUN_TYPE_IPV6, MAC_PLUGIN_IDENT_IPV6, IPV6_VERSION,
121 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV6_MTU, B_TRUE },
122 { IPTUN_TYPE_6TO4, MAC_PLUGIN_IDENT_6TO4, IPV4_VERSION,
123 IPTUN_MIN_IPV6_MTU, IPTUN_MAX_IPV4_MTU, B_FALSE },
124 { IPTUN_TYPE_UNKNOWN, NULL, 0, 0, 0, B_FALSE }
128 * iptun_hash is an iptun_t lookup table by link ID protected by
129 * iptun_hash_lock. While the hash table's integrity is maintained via
130 * internal locking in the mod_hash_*() functions, we need additional locking
131 * so that an iptun_t cannot be deleted after a hash lookup has returned an
132 * iptun_t and before iptun_lock has been entered. As such, we use
133 * iptun_hash_lock when doing lookups and removals from iptun_hash.
135 mod_hash_t *iptun_hash;
136 static kmutex_t iptun_hash_lock;
138 static uint_t iptun_tunnelcount; /* total for all stacks */
139 kmem_cache_t *iptun_cache;
140 ddi_taskq_t *iptun_taskq;
142 typedef enum {
143 IPTUN_TASK_MTU_UPDATE, /* tell mac about new tunnel link MTU */
144 IPTUN_TASK_LADDR_UPDATE, /* tell mac about new local address */
145 IPTUN_TASK_RADDR_UPDATE, /* tell mac about new remote address */
146 IPTUN_TASK_LINK_UPDATE, /* tell mac about new link state */
147 IPTUN_TASK_PDATA_UPDATE /* tell mac about updated plugin data */
148 } iptun_task_t;
150 typedef struct iptun_task_data_s {
151 iptun_task_t itd_task;
152 datalink_id_t itd_linkid;
153 } iptun_task_data_t;
155 static void iptun_task_dispatch(iptun_t *, iptun_task_t);
156 static int iptun_enter(iptun_t *);
157 static void iptun_exit(iptun_t *);
158 static void iptun_headergen(iptun_t *, boolean_t);
159 static void iptun_drop_pkt(mblk_t *, uint64_t *);
160 static void iptun_input(void *, mblk_t *, void *, ip_recv_attr_t *);
161 static void iptun_input_icmp(void *, mblk_t *, void *, ip_recv_attr_t *);
162 static void iptun_output(iptun_t *, mblk_t *);
163 static uint32_t iptun_get_maxmtu(iptun_t *, ip_xmit_attr_t *, uint32_t);
164 static uint32_t iptun_update_mtu(iptun_t *, ip_xmit_attr_t *, uint32_t);
165 static uint32_t iptun_get_dst_pmtu(iptun_t *, ip_xmit_attr_t *);
166 static void iptun_update_dst_pmtu(iptun_t *, ip_xmit_attr_t *);
167 static int iptun_setladdr(iptun_t *, const struct sockaddr_storage *);
169 static void iptun_output_6to4(iptun_t *, mblk_t *);
170 static void iptun_output_common(iptun_t *, ip_xmit_attr_t *, mblk_t *);
171 static boolean_t iptun_verifyicmp(conn_t *, void *, icmph_t *, icmp6_t *,
172 ip_recv_attr_t *);
174 static void iptun_notify(void *, ip_xmit_attr_t *, ixa_notify_type_t,
175 ixa_notify_arg_t);
177 static mac_callbacks_t iptun_m_callbacks;
179 static int
180 iptun_m_getstat(void *arg, uint_t stat, uint64_t *val)
182 iptun_t *iptun = arg;
183 int err = 0;
185 switch (stat) {
186 case MAC_STAT_IERRORS:
187 *val = iptun->iptun_ierrors;
188 break;
189 case MAC_STAT_OERRORS:
190 *val = iptun->iptun_oerrors;
191 break;
192 case MAC_STAT_RBYTES:
193 *val = iptun->iptun_rbytes;
194 break;
195 case MAC_STAT_IPACKETS:
196 *val = iptun->iptun_ipackets;
197 break;
198 case MAC_STAT_OBYTES:
199 *val = iptun->iptun_obytes;
200 break;
201 case MAC_STAT_OPACKETS:
202 *val = iptun->iptun_opackets;
203 break;
204 case MAC_STAT_NORCVBUF:
205 *val = iptun->iptun_norcvbuf;
206 break;
207 case MAC_STAT_NOXMTBUF:
208 *val = iptun->iptun_noxmtbuf;
209 break;
210 default:
211 err = ENOTSUP;
214 return (err);
217 static int
218 iptun_m_start(void *arg)
220 iptun_t *iptun = arg;
221 int err;
223 if ((err = iptun_enter(iptun)) == 0) {
224 iptun->iptun_flags |= IPTUN_MAC_STARTED;
225 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE);
226 iptun_exit(iptun);
228 return (err);
231 static void
232 iptun_m_stop(void *arg)
234 iptun_t *iptun = arg;
236 if (iptun_enter(iptun) == 0) {
237 iptun->iptun_flags &= ~IPTUN_MAC_STARTED;
238 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE);
239 iptun_exit(iptun);
244 * iptun_m_setpromisc() does nothing and always succeeds. This is because a
245 * tunnel data-link only ever receives packets that are destined exclusively
246 * for the local address of the tunnel.
248 /* ARGSUSED */
249 static int
250 iptun_m_setpromisc(void *arg, boolean_t on)
252 return (0);
255 /* ARGSUSED */
256 static int
257 iptun_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
259 return (ENOTSUP);
263 * iptun_m_unicst() sets the local address.
265 /* ARGSUSED */
266 static int
267 iptun_m_unicst(void *arg, const uint8_t *addrp)
269 iptun_t *iptun = arg;
270 int err;
271 struct sockaddr_storage ss;
272 struct sockaddr_in *sin;
273 struct sockaddr_in6 *sin6;
275 if ((err = iptun_enter(iptun)) == 0) {
276 switch (iptun->iptun_typeinfo->iti_ipvers) {
277 case IPV4_VERSION:
278 sin = (struct sockaddr_in *)&ss;
279 sin->sin_family = AF_INET;
280 bcopy(addrp, &sin->sin_addr, sizeof (in_addr_t));
281 break;
282 case IPV6_VERSION:
283 sin6 = (struct sockaddr_in6 *)&ss;
284 sin6->sin6_family = AF_INET6;
285 bcopy(addrp, &sin6->sin6_addr, sizeof (in6_addr_t));
286 break;
287 default:
288 ASSERT(0);
290 err = iptun_setladdr(iptun, &ss);
291 iptun_exit(iptun);
293 return (err);
296 static mblk_t *
297 iptun_m_tx(void *arg, mblk_t *mpchain)
299 mblk_t *mp, *nmp;
300 iptun_t *iptun = arg;
302 if (!IS_IPTUN_RUNNING(iptun)) {
303 iptun_drop_pkt(mpchain, &iptun->iptun_noxmtbuf);
304 return (NULL);
307 for (mp = mpchain; mp != NULL; mp = nmp) {
308 nmp = mp->b_next;
309 mp->b_next = NULL;
310 iptun_output(iptun, mp);
313 return (NULL);
316 /* ARGSUSED */
317 static int
318 iptun_m_setprop(void *barg, const char *pr_name, mac_prop_id_t pr_num,
319 uint_t pr_valsize, const void *pr_val)
321 iptun_t *iptun = barg;
322 uint32_t value = *(uint32_t *)pr_val;
323 int err;
326 * We need to enter this iptun_t since we'll be modifying the outer
327 * header.
329 if ((err = iptun_enter(iptun)) != 0)
330 return (err);
332 switch (pr_num) {
333 case MAC_PROP_IPTUN_HOPLIMIT:
334 if (value < IPTUN_MIN_HOPLIMIT || value > IPTUN_MAX_HOPLIMIT) {
335 err = EINVAL;
336 break;
338 if (value != iptun->iptun_hoplimit) {
339 iptun->iptun_hoplimit = (uint8_t)value;
340 iptun_headergen(iptun, B_TRUE);
342 break;
343 case MAC_PROP_IPTUN_ENCAPLIMIT:
344 if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6 ||
345 value > IPTUN_MAX_ENCAPLIMIT) {
346 err = EINVAL;
347 break;
349 if (value != iptun->iptun_encaplimit) {
350 iptun->iptun_encaplimit = (uint8_t)value;
351 iptun_headergen(iptun, B_TRUE);
353 break;
354 case MAC_PROP_MTU: {
355 uint32_t maxmtu = iptun_get_maxmtu(iptun, NULL, 0);
357 if (value < iptun->iptun_typeinfo->iti_minmtu ||
358 value > maxmtu) {
359 err = EINVAL;
360 break;
362 iptun->iptun_flags |= IPTUN_FIXED_MTU;
363 if (value != iptun->iptun_mtu) {
364 iptun->iptun_mtu = value;
365 iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE);
367 break;
369 default:
370 err = EINVAL;
372 iptun_exit(iptun);
373 return (err);
376 /* ARGSUSED */
377 static int
378 iptun_m_getprop(void *barg, const char *pr_name, mac_prop_id_t pr_num,
379 uint_t pr_valsize, void *pr_val)
381 iptun_t *iptun = barg;
382 int err;
384 if ((err = iptun_enter(iptun)) != 0)
385 return (err);
387 switch (pr_num) {
388 case MAC_PROP_IPTUN_HOPLIMIT:
389 ASSERT(pr_valsize >= sizeof (uint32_t));
390 *(uint32_t *)pr_val = iptun->iptun_hoplimit;
391 break;
393 case MAC_PROP_IPTUN_ENCAPLIMIT:
394 *(uint32_t *)pr_val = iptun->iptun_encaplimit;
395 break;
396 default:
397 err = ENOTSUP;
399 done:
400 iptun_exit(iptun);
401 return (err);
404 /* ARGSUSED */
405 static void
406 iptun_m_propinfo(void *barg, const char *pr_name, mac_prop_id_t pr_num,
407 mac_prop_info_handle_t prh)
409 iptun_t *iptun = barg;
411 switch (pr_num) {
412 case MAC_PROP_IPTUN_HOPLIMIT:
413 mac_prop_info_set_range_uint32(prh,
414 IPTUN_MIN_HOPLIMIT, IPTUN_MAX_HOPLIMIT);
415 mac_prop_info_set_default_uint32(prh, IPTUN_DEFAULT_HOPLIMIT);
416 break;
418 case MAC_PROP_IPTUN_ENCAPLIMIT:
419 if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_IPV6)
420 break;
421 mac_prop_info_set_range_uint32(prh,
422 IPTUN_MIN_ENCAPLIMIT, IPTUN_MAX_ENCAPLIMIT);
423 mac_prop_info_set_default_uint32(prh, IPTUN_DEFAULT_ENCAPLIMIT);
424 break;
425 case MAC_PROP_MTU:
426 mac_prop_info_set_range_uint32(prh,
427 iptun->iptun_typeinfo->iti_minmtu,
428 iptun_get_maxmtu(iptun, NULL, 0));
429 break;
433 uint_t
434 iptun_count(void)
436 return (iptun_tunnelcount);
440 * Enter an iptun_t exclusively. This is essentially just a mutex, but we
441 * don't allow iptun_enter() to succeed on a tunnel if it's in the process of
442 * being deleted.
444 static int
445 iptun_enter(iptun_t *iptun)
447 mutex_enter(&iptun->iptun_lock);
448 while (iptun->iptun_flags & IPTUN_DELETE_PENDING)
449 cv_wait(&iptun->iptun_enter_cv, &iptun->iptun_lock);
450 if (iptun->iptun_flags & IPTUN_CONDEMNED) {
451 mutex_exit(&iptun->iptun_lock);
452 return (ENOENT);
454 return (0);
458 * Exit the tunnel entered in iptun_enter().
460 static void
461 iptun_exit(iptun_t *iptun)
463 mutex_exit(&iptun->iptun_lock);
467 * Enter the IP tunnel instance by datalink ID.
469 static int
470 iptun_enter_by_linkid(datalink_id_t linkid, iptun_t **iptun)
472 int err;
474 mutex_enter(&iptun_hash_lock);
475 if (mod_hash_find(iptun_hash, IPTUN_HASH_KEY(linkid),
476 (mod_hash_val_t *)iptun) == 0)
477 err = iptun_enter(*iptun);
478 else
479 err = ENOENT;
480 if (err != 0)
481 *iptun = NULL;
482 mutex_exit(&iptun_hash_lock);
483 return (err);
487 * Handle tasks that were deferred through the iptun_taskq because they require
488 * calling up to the mac module, and we can't call up to the mac module while
489 * holding locks.
491 * This is tricky to get right without introducing race conditions and
492 * deadlocks with the mac module, as we cannot issue an upcall while in the
493 * iptun_t. The reason is that upcalls may try and enter the mac perimeter,
494 * while iptun callbacks (such as iptun_m_setprop()) called from the mac
495 * module will already have the perimeter held, and will then try and enter
496 * the iptun_t. You can see the lock ordering problem with this; this will
497 * deadlock.
499 * The safe way to do this is to enter the iptun_t in question and copy the
500 * information we need out of it so that we can exit it and know that the
501 * information being passed up to the upcalls won't be subject to modification
502 * by other threads. The problem now is that we need to exit it prior to
503 * issuing the upcall, but once we do this, a thread could come along and
504 * delete the iptun_t and thus the mac handle required to issue the upcall.
505 * To prevent this, we set the IPTUN_UPCALL_PENDING flag prior to exiting the
506 * iptun_t. This flag is the condition associated with iptun_upcall_cv, which
507 * iptun_delete() will cv_wait() on. When the upcall completes, we clear
508 * IPTUN_UPCALL_PENDING and cv_signal() any potentially waiting
509 * iptun_delete(). We can thus still safely use iptun->iptun_mh after having
510 * exited the iptun_t.
512 static void
513 iptun_task_cb(void *arg)
515 iptun_task_data_t *itd = arg;
516 iptun_task_t task = itd->itd_task;
517 datalink_id_t linkid = itd->itd_linkid;
518 iptun_t *iptun;
519 uint32_t mtu;
520 iptun_addr_t addr;
521 link_state_t linkstate;
522 size_t header_size;
523 iptun_header_t header;
525 kmem_free(itd, sizeof (*itd));
528 * Note that if the lookup fails, it's because the tunnel was deleted
529 * between the time the task was dispatched and now. That isn't an
530 * error.
532 if (iptun_enter_by_linkid(linkid, &iptun) != 0)
533 return;
535 iptun->iptun_flags |= IPTUN_UPCALL_PENDING;
537 switch (task) {
538 case IPTUN_TASK_MTU_UPDATE:
539 mtu = iptun->iptun_mtu;
540 break;
541 case IPTUN_TASK_LADDR_UPDATE:
542 addr = iptun->iptun_laddr;
543 break;
544 case IPTUN_TASK_RADDR_UPDATE:
545 addr = iptun->iptun_raddr;
546 break;
547 case IPTUN_TASK_LINK_UPDATE:
548 linkstate = IS_IPTUN_RUNNING(iptun) ?
549 LINK_STATE_UP : LINK_STATE_DOWN;
550 break;
551 case IPTUN_TASK_PDATA_UPDATE:
552 header_size = iptun->iptun_header_size;
553 header = iptun->iptun_header;
554 break;
555 default:
556 ASSERT(0);
559 iptun_exit(iptun);
561 switch (task) {
562 case IPTUN_TASK_MTU_UPDATE:
563 (void) mac_maxsdu_update(iptun->iptun_mh, mtu);
564 break;
565 case IPTUN_TASK_LADDR_UPDATE:
566 mac_unicst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr);
567 break;
568 case IPTUN_TASK_RADDR_UPDATE:
569 mac_dst_update(iptun->iptun_mh, (uint8_t *)&addr.ia_addr);
570 break;
571 case IPTUN_TASK_LINK_UPDATE:
572 mac_link_update(iptun->iptun_mh, linkstate);
573 break;
574 case IPTUN_TASK_PDATA_UPDATE:
575 if (mac_pdata_update(iptun->iptun_mh,
576 header_size == 0 ? NULL : &header, header_size) != 0)
577 atomic_inc_64(&iptun->iptun_taskq_fail);
578 break;
581 mutex_enter(&iptun->iptun_lock);
582 iptun->iptun_flags &= ~IPTUN_UPCALL_PENDING;
583 cv_signal(&iptun->iptun_upcall_cv);
584 mutex_exit(&iptun->iptun_lock);
587 static void
588 iptun_task_dispatch(iptun_t *iptun, iptun_task_t iptun_task)
590 iptun_task_data_t *itd;
592 itd = kmem_alloc(sizeof (*itd), KM_NOSLEEP);
593 if (itd == NULL) {
594 atomic_inc_64(&iptun->iptun_taskq_fail);
595 return;
597 itd->itd_task = iptun_task;
598 itd->itd_linkid = iptun->iptun_linkid;
599 if (ddi_taskq_dispatch(iptun_taskq, iptun_task_cb, itd, DDI_NOSLEEP)) {
600 atomic_inc_64(&iptun->iptun_taskq_fail);
601 kmem_free(itd, sizeof (*itd));
606 * Convert an iptun_addr_t to sockaddr_storage.
608 static void
609 iptun_getaddr(iptun_addr_t *iptun_addr, struct sockaddr_storage *ss)
611 struct sockaddr_in *sin;
612 struct sockaddr_in6 *sin6;
614 bzero(ss, sizeof (*ss));
615 switch (iptun_addr->ia_family) {
616 case AF_INET:
617 sin = (struct sockaddr_in *)ss;
618 sin->sin_addr.s_addr = iptun_addr->ia_addr.iau_addr4;
619 break;
620 case AF_INET6:
621 sin6 = (struct sockaddr_in6 *)ss;
622 sin6->sin6_addr = iptun_addr->ia_addr.iau_addr6;
623 break;
624 default:
625 ASSERT(0);
627 ss->ss_family = iptun_addr->ia_family;
631 * General purpose function to set an IP tunnel source or destination address.
633 static int
634 iptun_setaddr(iptun_type_t iptun_type, iptun_addr_t *iptun_addr,
635 const struct sockaddr_storage *ss)
637 if (!IPTUN_ADDR_MATCH(iptun_type, ss->ss_family))
638 return (EINVAL);
640 switch (ss->ss_family) {
641 case AF_INET: {
642 struct sockaddr_in *sin = (struct sockaddr_in *)ss;
644 if ((sin->sin_addr.s_addr == INADDR_ANY) ||
645 (sin->sin_addr.s_addr == INADDR_BROADCAST) ||
646 CLASSD(sin->sin_addr.s_addr)) {
647 return (EADDRNOTAVAIL);
649 iptun_addr->ia_addr.iau_addr4 = sin->sin_addr.s_addr;
650 break;
652 case AF_INET6: {
653 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)ss;
655 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) ||
656 IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) ||
657 IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
658 return (EADDRNOTAVAIL);
660 iptun_addr->ia_addr.iau_addr6 = sin6->sin6_addr;
661 break;
663 default:
664 return (EAFNOSUPPORT);
666 iptun_addr->ia_family = ss->ss_family;
667 return (0);
670 static int
671 iptun_setladdr(iptun_t *iptun, const struct sockaddr_storage *laddr)
673 return (iptun_setaddr(iptun->iptun_typeinfo->iti_type,
674 &iptun->iptun_laddr, laddr));
677 static int
678 iptun_setraddr(iptun_t *iptun, const struct sockaddr_storage *raddr)
680 if (!(iptun->iptun_typeinfo->iti_hasraddr))
681 return (EINVAL);
682 return (iptun_setaddr(iptun->iptun_typeinfo->iti_type,
683 &iptun->iptun_raddr, raddr));
686 static boolean_t
687 iptun_canbind(iptun_t *iptun)
690 * A tunnel may bind when its source address has been set, and if its
691 * tunnel type requires one, also its destination address.
693 return ((iptun->iptun_flags & IPTUN_LADDR) &&
694 ((iptun->iptun_flags & IPTUN_RADDR) ||
695 !(iptun->iptun_typeinfo->iti_hasraddr)));
699 * Verify that the local address is valid, and insert in the fanout
701 static int
702 iptun_bind(iptun_t *iptun)
704 conn_t *connp = iptun->iptun_connp;
705 int error = 0;
706 ip_xmit_attr_t *ixa;
707 ip_xmit_attr_t *oldixa;
708 iulp_t uinfo;
709 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
712 * Get an exclusive ixa for this thread.
713 * We defer updating conn_ixa until later to handle any concurrent
714 * conn_ixa_cleanup thread.
716 ixa = conn_get_ixa(connp, B_FALSE);
717 if (ixa == NULL)
718 return (ENOMEM);
720 /* We create PMTU state including for 6to4 */
721 ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
723 ASSERT(iptun_canbind(iptun));
725 mutex_enter(&connp->conn_lock);
727 * Note that conn_proto can't be set since the upper protocol
728 * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel.
729 * ipcl_iptun_classify doesn't use conn_proto.
731 connp->conn_ipversion = iptun->iptun_typeinfo->iti_ipvers;
733 switch (iptun->iptun_typeinfo->iti_type) {
734 case IPTUN_TYPE_IPV4:
735 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_laddr4,
736 &connp->conn_laddr_v6);
737 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_raddr4,
738 &connp->conn_faddr_v6);
739 ixa->ixa_flags |= IXAF_IS_IPV4;
740 if (ip_laddr_verify_v4(iptun->iptun_laddr4, IPCL_ZONEID(connp),
741 ipst, B_FALSE) != IPVL_UNICAST_UP) {
742 mutex_exit(&connp->conn_lock);
743 error = EADDRNOTAVAIL;
744 goto done;
746 break;
747 case IPTUN_TYPE_IPV6:
748 connp->conn_laddr_v6 = iptun->iptun_laddr6;
749 connp->conn_faddr_v6 = iptun->iptun_raddr6;
750 ixa->ixa_flags &= ~IXAF_IS_IPV4;
751 /* We use a zero scopeid for now */
752 if (ip_laddr_verify_v6(&iptun->iptun_laddr6, IPCL_ZONEID(connp),
753 ipst, B_FALSE, 0) != IPVL_UNICAST_UP) {
754 mutex_exit(&connp->conn_lock);
755 error = EADDRNOTAVAIL;
756 goto done;
758 break;
759 case IPTUN_TYPE_6TO4:
760 IN6_IPADDR_TO_V4MAPPED(iptun->iptun_laddr4,
761 &connp->conn_laddr_v6);
762 IN6_IPADDR_TO_V4MAPPED(INADDR_ANY, &connp->conn_faddr_v6);
763 ixa->ixa_flags |= IXAF_IS_IPV4;
764 mutex_exit(&connp->conn_lock);
766 switch (ip_laddr_verify_v4(iptun->iptun_laddr4,
767 IPCL_ZONEID(connp), ipst, B_FALSE)) {
768 case IPVL_UNICAST_UP:
769 case IPVL_UNICAST_DOWN:
770 break;
771 default:
772 error = EADDRNOTAVAIL;
773 goto done;
775 goto insert;
778 /* TODO: do we need to do this? */
779 ip_attr_newdst(ixa);
782 * When we set a tunnel's destination address, we do not
783 * care if the destination is reachable. Transient routing
784 * issues should not inhibit the creation of a tunnel
785 * interface, for example. Thus we pass B_FALSE here.
787 connp->conn_saddr_v6 = connp->conn_laddr_v6;
788 mutex_exit(&connp->conn_lock);
790 /* As long as the MTU is large we avoid fragmentation */
791 ixa->ixa_flags |= IXAF_DONTFRAG | IXAF_PMTU_IPV4_DF;
793 /* We handle IPsec in iptun_output_common */
794 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6,
795 &connp->conn_faddr_v6, &connp->conn_faddr_v6, 0,
796 &connp->conn_saddr_v6, &uinfo, 0);
798 if (error != 0)
799 goto done;
801 /* saddr shouldn't change since it was already set */
802 ASSERT(IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
803 &connp->conn_saddr_v6));
805 /* We set IXAF_VERIFY_PMTU to catch PMTU increases */
806 ixa->ixa_flags |= IXAF_VERIFY_PMTU;
807 ASSERT(uinfo.iulp_mtu != 0);
810 * Allow setting new policies.
811 * The addresses/ports are already set, thus the IPsec policy calls
812 * can handle their passed-in conn's.
814 connp->conn_policy_cached = B_FALSE;
816 insert:
817 error = ipcl_conn_insert(connp);
818 if (error != 0)
819 goto done;
821 /* Atomically update v6lastdst and conn_ixa */
822 mutex_enter(&connp->conn_lock);
823 /* Record this as the "last" send even though we haven't sent any */
824 connp->conn_v6lastdst = connp->conn_faddr_v6;
826 iptun->iptun_flags |= IPTUN_BOUND;
828 oldixa = conn_replace_ixa(connp, ixa);
829 /* Done with conn_t */
830 mutex_exit(&connp->conn_lock);
831 ixa_refrele(oldixa);
834 * Now that we're bound with ip below us, this is a good
835 * time to initialize the destination path MTU and to
836 * re-calculate the tunnel's link MTU.
838 (void) iptun_update_mtu(iptun, ixa, 0);
840 if (IS_IPTUN_RUNNING(iptun))
841 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE);
843 done:
844 ixa_refrele(ixa);
845 return (error);
848 static void
849 iptun_unbind(iptun_t *iptun)
851 ASSERT(iptun->iptun_flags & IPTUN_BOUND);
852 ASSERT(mutex_owned(&iptun->iptun_lock) ||
853 (iptun->iptun_flags & IPTUN_CONDEMNED));
854 ip_unbind(iptun->iptun_connp);
855 iptun->iptun_flags &= ~IPTUN_BOUND;
856 if (!(iptun->iptun_flags & IPTUN_CONDEMNED))
857 iptun_task_dispatch(iptun, IPTUN_TASK_LINK_UPDATE);
861 * Re-generate the template data-link header for a given IP tunnel given the
862 * tunnel's current parameters.
864 static void
865 iptun_headergen(iptun_t *iptun, boolean_t update_mac)
867 switch (iptun->iptun_typeinfo->iti_ipvers) {
868 case IPV4_VERSION:
870 * We only need to use a custom IP header if the administrator
871 * has supplied a non-default hoplimit.
873 if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT) {
874 iptun->iptun_header_size = 0;
875 break;
877 iptun->iptun_header_size = sizeof (ipha_t);
878 iptun->iptun_header4.ipha_version_and_hdr_length =
879 IP_SIMPLE_HDR_VERSION;
880 iptun->iptun_header4.ipha_fragment_offset_and_flags =
881 htons(IPH_DF);
882 iptun->iptun_header4.ipha_ttl = iptun->iptun_hoplimit;
883 break;
884 case IPV6_VERSION: {
885 ip6_t *ip6hp = &iptun->iptun_header6.it6h_ip6h;
888 * We only need to use a custom IPv6 header if either the
889 * administrator has supplied a non-default hoplimit, or we
890 * need to include an encapsulation limit option in the outer
891 * header.
893 if (iptun->iptun_hoplimit == IPTUN_DEFAULT_HOPLIMIT &&
894 iptun->iptun_encaplimit == 0) {
895 iptun->iptun_header_size = 0;
896 break;
899 (void) memset(ip6hp, 0, sizeof (*ip6hp));
900 if (iptun->iptun_encaplimit == 0) {
901 iptun->iptun_header_size = sizeof (ip6_t);
902 ip6hp->ip6_nxt = IPPROTO_NONE;
903 } else {
904 iptun_encaplim_t *iel;
906 iptun->iptun_header_size = sizeof (iptun_ipv6hdrs_t);
908 * The mac_ipv6 plugin requires ip6_plen to be in host
909 * byte order and reflect the extension headers
910 * present in the template. The actual network byte
911 * order ip6_plen will be set on a per-packet basis on
912 * transmit.
914 ip6hp->ip6_plen = sizeof (*iel);
915 ip6hp->ip6_nxt = IPPROTO_DSTOPTS;
916 iel = &iptun->iptun_header6.it6h_encaplim;
917 *iel = iptun_encaplim_init;
918 iel->iel_telopt.ip6ot_encap_limit =
919 iptun->iptun_encaplimit;
922 ip6hp->ip6_hlim = iptun->iptun_hoplimit;
923 break;
927 if (update_mac)
928 iptun_task_dispatch(iptun, IPTUN_TASK_PDATA_UPDATE);
932 * Insert inbound and outbound IPv4 and IPv6 policy into the given policy
933 * head.
935 static boolean_t
936 iptun_insert_simple_policies(ipsec_policy_head_t *ph, ipsec_act_t *actp,
937 uint_t n, netstack_t *ns)
939 int f = IPSEC_AF_V4;
941 if (!ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) ||
942 !ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns))
943 return (B_FALSE);
945 f = IPSEC_AF_V6;
946 return (ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_INBOUND, ns) &&
947 ipsec_polhead_insert(ph, actp, n, f, IPSEC_TYPE_OUTBOUND, ns));
951 * Used to set IPsec policy when policy is set through the IPTUN_CREATE or
952 * IPTUN_MODIFY ioctls.
954 static int
955 iptun_set_sec_simple(iptun_t *iptun, const ipsec_req_t *ipsr)
957 int rc = 0;
958 uint_t nact;
959 ipsec_act_t *actp = NULL;
960 boolean_t clear_all, old_policy = B_FALSE;
961 ipsec_tun_pol_t *itp;
962 char name[MAXLINKNAMELEN];
963 uint64_t gen;
964 netstack_t *ns = iptun->iptun_ns;
966 /* Can't specify self-encap on a tunnel. */
967 if (ipsr->ipsr_self_encap_req != 0)
968 return (EINVAL);
971 * If it's a "clear-all" entry, unset the security flags and resume
972 * normal cleartext (or inherit-from-global) policy.
974 clear_all = ((ipsr->ipsr_ah_req & IPTUN_IPSEC_REQ_MASK) == 0 &&
975 (ipsr->ipsr_esp_req & IPTUN_IPSEC_REQ_MASK) == 0);
977 ASSERT(mutex_owned(&iptun->iptun_lock));
978 itp = iptun->iptun_itp;
979 if (itp == NULL) {
980 if (clear_all)
981 goto bail;
982 if ((rc = dls_mgmt_get_linkinfo(iptun->iptun_linkid, name, NULL,
983 NULL, NULL)) != 0)
984 goto bail;
985 ASSERT(name[0] != '\0');
986 if ((itp = create_tunnel_policy(name, &rc, &gen, ns)) == NULL)
987 goto bail;
988 iptun->iptun_itp = itp;
991 /* Allocate the actvec now, before holding itp or polhead locks. */
992 ipsec_actvec_from_req(ipsr, &actp, &nact, ns);
993 if (actp == NULL) {
994 rc = ENOMEM;
995 goto bail;
999 * Just write on the active polhead. Save the primary/secondary stuff
1000 * for spdsock operations.
1002 * Mutex because we need to write to the polhead AND flags atomically.
1003 * Other threads will acquire the polhead lock as a reader if the
1004 * (unprotected) flag is set.
1006 mutex_enter(&itp->itp_lock);
1007 if (itp->itp_flags & ITPF_P_TUNNEL) {
1008 /* Oops, we lost a race. Let's get out of here. */
1009 rc = EBUSY;
1010 goto mutex_bail;
1012 old_policy = ((itp->itp_flags & ITPF_P_ACTIVE) != 0);
1014 if (old_policy) {
1015 ITPF_CLONE(itp->itp_flags);
1016 rc = ipsec_copy_polhead(itp->itp_policy, itp->itp_inactive, ns);
1017 if (rc != 0) {
1018 /* inactive has already been cleared. */
1019 itp->itp_flags &= ~ITPF_IFLAGS;
1020 goto mutex_bail;
1022 rw_enter(&itp->itp_policy->iph_lock, RW_WRITER);
1023 ipsec_polhead_flush(itp->itp_policy, ns);
1024 } else {
1025 /* Else assume itp->itp_policy is already flushed. */
1026 rw_enter(&itp->itp_policy->iph_lock, RW_WRITER);
1029 if (clear_all) {
1030 ASSERT(avl_numnodes(&itp->itp_policy->iph_rulebyid) == 0);
1031 itp->itp_flags &= ~ITPF_PFLAGS;
1032 rw_exit(&itp->itp_policy->iph_lock);
1033 old_policy = B_FALSE; /* Clear out the inactive one too. */
1034 goto recover_bail;
1037 if (iptun_insert_simple_policies(itp->itp_policy, actp, nact, ns)) {
1038 rw_exit(&itp->itp_policy->iph_lock);
1040 * Adjust MTU and make sure the DL side knows what's up.
1042 itp->itp_flags = ITPF_P_ACTIVE;
1043 (void) iptun_update_mtu(iptun, NULL, 0);
1044 old_policy = B_FALSE; /* Blank out inactive - we succeeded */
1045 } else {
1046 rw_exit(&itp->itp_policy->iph_lock);
1047 rc = ENOMEM;
1050 recover_bail:
1051 if (old_policy) {
1052 /* Recover policy in in active polhead. */
1053 ipsec_swap_policy(itp->itp_policy, itp->itp_inactive, ns);
1054 ITPF_SWAP(itp->itp_flags);
1057 /* Clear policy in inactive polhead. */
1058 itp->itp_flags &= ~ITPF_IFLAGS;
1059 rw_enter(&itp->itp_inactive->iph_lock, RW_WRITER);
1060 ipsec_polhead_flush(itp->itp_inactive, ns);
1061 rw_exit(&itp->itp_inactive->iph_lock);
1063 mutex_bail:
1064 mutex_exit(&itp->itp_lock);
1066 bail:
1067 if (actp != NULL)
1068 ipsec_actvec_free(actp, nact);
1070 return (rc);
1073 static iptun_typeinfo_t *
1074 iptun_gettypeinfo(iptun_type_t type)
1076 int i;
1078 for (i = 0; iptun_type_table[i].iti_type != IPTUN_TYPE_UNKNOWN; i++) {
1079 if (iptun_type_table[i].iti_type == type)
1080 break;
1082 return (&iptun_type_table[i]);
1086 * Set the parameters included in ik on the tunnel iptun. Parameters that can
1087 * only be set at creation time are set in iptun_create().
1089 static int
1090 iptun_setparams(iptun_t *iptun, const iptun_kparams_t *ik)
1092 int err = 0;
1093 netstack_t *ns = iptun->iptun_ns;
1094 iptun_addr_t orig_laddr, orig_raddr;
1095 uint_t orig_flags = iptun->iptun_flags;
1097 if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR) {
1098 if (orig_flags & IPTUN_LADDR)
1099 orig_laddr = iptun->iptun_laddr;
1100 if ((err = iptun_setladdr(iptun, &ik->iptun_kparam_laddr)) != 0)
1101 return (err);
1102 iptun->iptun_flags |= IPTUN_LADDR;
1105 if (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) {
1106 if (orig_flags & IPTUN_RADDR)
1107 orig_raddr = iptun->iptun_raddr;
1108 if ((err = iptun_setraddr(iptun, &ik->iptun_kparam_raddr)) != 0)
1109 goto done;
1110 iptun->iptun_flags |= IPTUN_RADDR;
1113 if (ik->iptun_kparam_flags & IPTUN_KPARAM_SECINFO) {
1115 * Set IPsec policy originating from the ifconfig(1M) command
1116 * line. This is traditionally called "simple" policy because
1117 * the ipsec_req_t (iptun_kparam_secinfo) can only describe a
1118 * simple policy of "do ESP on everything" and/or "do AH on
1119 * everything" (as opposed to the rich policy that can be
1120 * defined with ipsecconf(1M)).
1122 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4) {
1124 * Can't set security properties for automatic
1125 * tunnels.
1127 err = EINVAL;
1128 goto done;
1131 if (!ipsec_loaded(ns->netstack_ipsec)) {
1132 /* If IPsec can be loaded, try and load it now. */
1133 if (ipsec_failed(ns->netstack_ipsec)) {
1134 err = EPROTONOSUPPORT;
1135 goto done;
1137 ipsec_loader_loadnow(ns->netstack_ipsec);
1139 * ipsec_loader_loadnow() returns while IPsec is
1140 * loaded asynchronously. While a method exists to
1141 * wait for IPsec to load (ipsec_loader_wait()), it
1142 * requires use of a STREAMS queue to do a qwait().
1143 * We're not in STREAMS context here, and so we can't
1144 * use it. This is not a problem in practice because
1145 * in the vast majority of cases, key management and
1146 * global policy will have loaded before any tunnels
1147 * are plumbed, and so IPsec will already have been
1148 * loaded.
1150 err = EAGAIN;
1151 goto done;
1154 err = iptun_set_sec_simple(iptun, &ik->iptun_kparam_secinfo);
1155 if (err == 0) {
1156 iptun->iptun_flags |= IPTUN_SIMPLE_POLICY;
1157 iptun->iptun_simple_policy = ik->iptun_kparam_secinfo;
1160 done:
1161 if (err != 0) {
1162 /* Restore original source and destination. */
1163 if (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR &&
1164 (orig_flags & IPTUN_LADDR))
1165 iptun->iptun_laddr = orig_laddr;
1166 if ((ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR) &&
1167 (orig_flags & IPTUN_RADDR))
1168 iptun->iptun_raddr = orig_raddr;
1169 iptun->iptun_flags = orig_flags;
1171 return (err);
1174 static int
1175 iptun_register(iptun_t *iptun)
1177 mac_register_t *mac;
1178 int err;
1180 ASSERT(!(iptun->iptun_flags & IPTUN_MAC_REGISTERED));
1182 if ((mac = mac_alloc(MAC_VERSION)) == NULL)
1183 return (EINVAL);
1185 mac->m_type_ident = iptun->iptun_typeinfo->iti_ident;
1186 mac->m_driver = iptun;
1187 mac->m_dip = iptun_dip;
1188 mac->m_instance = (uint_t)-1;
1189 mac->m_src_addr = (uint8_t *)&iptun->iptun_laddr.ia_addr;
1190 mac->m_dst_addr = iptun->iptun_typeinfo->iti_hasraddr ?
1191 (uint8_t *)&iptun->iptun_raddr.ia_addr : NULL;
1192 mac->m_callbacks = &iptun_m_callbacks;
1193 mac->m_min_sdu = iptun->iptun_typeinfo->iti_minmtu;
1194 mac->m_max_sdu = iptun->iptun_mtu;
1195 if (iptun->iptun_header_size != 0) {
1196 mac->m_pdata = &iptun->iptun_header;
1197 mac->m_pdata_size = iptun->iptun_header_size;
1199 if ((err = mac_register(mac, &iptun->iptun_mh)) == 0)
1200 iptun->iptun_flags |= IPTUN_MAC_REGISTERED;
1201 mac_free(mac);
1202 return (err);
1205 static int
1206 iptun_unregister(iptun_t *iptun)
1208 int err;
1210 ASSERT(iptun->iptun_flags & IPTUN_MAC_REGISTERED);
1211 if ((err = mac_unregister(iptun->iptun_mh)) == 0)
1212 iptun->iptun_flags &= ~IPTUN_MAC_REGISTERED;
1213 return (err);
1216 static conn_t *
1217 iptun_conn_create(iptun_t *iptun, netstack_t *ns, cred_t *credp)
1219 conn_t *connp;
1221 if ((connp = ipcl_conn_create(IPCL_IPCCONN, KM_NOSLEEP, ns)) == NULL)
1222 return (NULL);
1224 connp->conn_flags |= IPCL_IPTUN;
1225 connp->conn_iptun = iptun;
1226 connp->conn_recv = iptun_input;
1227 connp->conn_recvicmp = iptun_input_icmp;
1228 connp->conn_verifyicmp = iptun_verifyicmp;
1231 * Register iptun_notify to listen to capability changes detected by IP.
1232 * This upcall is made in the context of the call to conn_ip_output.
1234 connp->conn_ixa->ixa_notify = iptun_notify;
1235 connp->conn_ixa->ixa_notify_cookie = iptun;
1238 * For exclusive stacks we set conn_zoneid to GLOBAL_ZONEID as is done
1239 * for all other conn_t's.
1241 * Note that there's an important distinction between iptun_zoneid and
1242 * conn_zoneid. The conn_zoneid is set to GLOBAL_ZONEID in non-global
1243 * exclusive stack zones to make the ip module believe that the
1244 * non-global zone is actually a global zone. Therefore, when
1245 * interacting with the ip module, we must always use conn_zoneid.
1247 connp->conn_zoneid = (ns->netstack_stackid == GLOBAL_NETSTACKID) ?
1248 crgetzoneid(credp) : GLOBAL_ZONEID;
1249 connp->conn_cred = credp;
1250 /* crfree() is done in ipcl_conn_destroy(), called by CONN_DEC_REF() */
1251 crhold(connp->conn_cred);
1252 connp->conn_cpid = NOPID;
1254 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */
1255 connp->conn_ixa->ixa_zoneid = connp->conn_zoneid;
1256 ASSERT(connp->conn_ref == 1);
1258 /* Cache things in ixa without an extra refhold */
1259 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED));
1260 connp->conn_ixa->ixa_cred = connp->conn_cred;
1261 connp->conn_ixa->ixa_cpid = connp->conn_cpid;
1264 * Have conn_ip_output drop packets should our outer source
1265 * go invalid
1267 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
1269 switch (iptun->iptun_typeinfo->iti_ipvers) {
1270 case IPV4_VERSION:
1271 connp->conn_family = AF_INET6;
1272 break;
1273 case IPV6_VERSION:
1274 connp->conn_family = AF_INET;
1275 break;
1277 mutex_enter(&connp->conn_lock);
1278 connp->conn_state_flags &= ~CONN_INCIPIENT;
1279 mutex_exit(&connp->conn_lock);
1280 return (connp);
1283 static void
1284 iptun_conn_destroy(conn_t *connp)
1286 ip_quiesce_conn(connp);
1287 connp->conn_iptun = NULL;
1288 ASSERT(connp->conn_ref == 1);
1289 CONN_DEC_REF(connp);
1292 static iptun_t *
1293 iptun_alloc(void)
1295 iptun_t *iptun;
1297 if ((iptun = kmem_cache_alloc(iptun_cache, KM_NOSLEEP)) != NULL) {
1298 bzero(iptun, sizeof (*iptun));
1299 atomic_inc_32(&iptun_tunnelcount);
1301 return (iptun);
1304 static void
1305 iptun_free(iptun_t *iptun)
1307 ASSERT(iptun->iptun_flags & IPTUN_CONDEMNED);
1309 if (iptun->iptun_flags & IPTUN_HASH_INSERTED) {
1310 iptun_stack_t *iptuns = iptun->iptun_iptuns;
1312 mutex_enter(&iptun_hash_lock);
1313 VERIFY(mod_hash_remove(iptun_hash,
1314 IPTUN_HASH_KEY(iptun->iptun_linkid),
1315 (mod_hash_val_t *)&iptun) == 0);
1316 mutex_exit(&iptun_hash_lock);
1317 iptun->iptun_flags &= ~IPTUN_HASH_INSERTED;
1318 mutex_enter(&iptuns->iptuns_lock);
1319 list_remove(&iptuns->iptuns_iptunlist, iptun);
1320 mutex_exit(&iptuns->iptuns_lock);
1323 if (iptun->iptun_flags & IPTUN_BOUND)
1324 iptun_unbind(iptun);
1327 * After iptun_unregister(), there will be no threads executing a
1328 * downcall from the mac module, including in the tx datapath.
1330 if (iptun->iptun_flags & IPTUN_MAC_REGISTERED)
1331 VERIFY(iptun_unregister(iptun) == 0);
1333 if (iptun->iptun_itp != NULL) {
1335 * Remove from the AVL tree, AND release the reference iptun_t
1336 * itself holds on the ITP.
1338 itp_unlink(iptun->iptun_itp, iptun->iptun_ns);
1339 ITP_REFRELE(iptun->iptun_itp, iptun->iptun_ns);
1340 iptun->iptun_itp = NULL;
1341 iptun->iptun_flags &= ~IPTUN_SIMPLE_POLICY;
1345 * After ipcl_conn_destroy(), there will be no threads executing an
1346 * upcall from ip (i.e., iptun_input()), and it is then safe to free
1347 * the iptun_t.
1349 if (iptun->iptun_connp != NULL) {
1350 iptun_conn_destroy(iptun->iptun_connp);
1351 iptun->iptun_connp = NULL;
1354 netstack_rele(iptun->iptun_ns);
1355 kmem_cache_free(iptun_cache, iptun);
1356 atomic_dec_32(&iptun_tunnelcount);
1360 iptun_create(iptun_kparams_t *ik, cred_t *credp)
1362 iptun_t *iptun = NULL;
1363 int err = 0, mherr;
1364 char linkname[MAXLINKNAMELEN];
1365 ipsec_tun_pol_t *itp;
1366 netstack_t *ns = NULL;
1367 iptun_stack_t *iptuns;
1368 datalink_id_t tmpid;
1369 zoneid_t zoneid = crgetzoneid(credp);
1370 boolean_t link_created = B_FALSE;
1372 /* The tunnel type is mandatory */
1373 if (!(ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE))
1374 return (EINVAL);
1377 * Is the linkid that the caller wishes to associate with this new
1378 * tunnel assigned to this zone?
1380 if (zone_check_datalink(&zoneid, ik->iptun_kparam_linkid) != 0) {
1381 if (zoneid != GLOBAL_ZONEID)
1382 return (EINVAL);
1383 } else if (zoneid == GLOBAL_ZONEID) {
1384 return (EINVAL);
1388 * Make sure that we're not trying to create a tunnel that has already
1389 * been created.
1391 if (iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun) == 0) {
1392 iptun_exit(iptun);
1393 iptun = NULL;
1394 err = EEXIST;
1395 goto done;
1398 ns = netstack_find_by_cred(credp);
1399 iptuns = ns->netstack_iptun;
1401 if ((iptun = iptun_alloc()) == NULL) {
1402 err = ENOMEM;
1403 goto done;
1406 iptun->iptun_linkid = ik->iptun_kparam_linkid;
1407 iptun->iptun_zoneid = zoneid;
1408 iptun->iptun_ns = ns;
1410 iptun->iptun_typeinfo = iptun_gettypeinfo(ik->iptun_kparam_type);
1411 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_UNKNOWN) {
1412 err = EINVAL;
1413 goto done;
1416 if (ik->iptun_kparam_flags & IPTUN_KPARAM_IMPLICIT)
1417 iptun->iptun_flags |= IPTUN_IMPLICIT;
1419 if ((err = iptun_setparams(iptun, ik)) != 0)
1420 goto done;
1422 iptun->iptun_hoplimit = IPTUN_DEFAULT_HOPLIMIT;
1423 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_IPV6)
1424 iptun->iptun_encaplimit = IPTUN_DEFAULT_ENCAPLIMIT;
1426 iptun_headergen(iptun, B_FALSE);
1428 iptun->iptun_connp = iptun_conn_create(iptun, ns, credp);
1429 if (iptun->iptun_connp == NULL) {
1430 err = ENOMEM;
1431 goto done;
1434 iptun->iptun_mtu = iptun->iptun_typeinfo->iti_maxmtu;
1435 iptun->iptun_dpmtu = iptun->iptun_mtu;
1438 * Find an ITP based on linkname. If we have parms already set via
1439 * the iptun_setparams() call above, it may have created an ITP for
1440 * us. We always try get_tunnel_policy() for DEBUG correctness
1441 * checks, and we may wish to refactor this to only check when
1442 * iptun_itp is NULL.
1444 if ((err = dls_mgmt_get_linkinfo(iptun->iptun_linkid, linkname, NULL,
1445 NULL, NULL)) != 0)
1446 goto done;
1447 if ((itp = get_tunnel_policy(linkname, ns)) != NULL)
1448 iptun->iptun_itp = itp;
1451 * See if we have the necessary IP addresses assigned to this tunnel
1452 * to try and bind them with ip underneath us. If we're not ready to
1453 * bind yet, then we'll defer the bind operation until the addresses
1454 * are modified.
1456 if (iptun_canbind(iptun) && ((err = iptun_bind(iptun)) != 0))
1457 goto done;
1459 if ((err = iptun_register(iptun)) != 0)
1460 goto done;
1462 err = dls_devnet_create(iptun->iptun_mh, iptun->iptun_linkid,
1463 iptun->iptun_zoneid);
1464 if (err != 0)
1465 goto done;
1466 link_created = B_TRUE;
1469 * We hash by link-id as that is the key used by all other iptun
1470 * interfaces (modify, delete, etc.).
1472 if ((mherr = mod_hash_insert(iptun_hash,
1473 IPTUN_HASH_KEY(iptun->iptun_linkid), (mod_hash_val_t)iptun)) == 0) {
1474 mutex_enter(&iptuns->iptuns_lock);
1475 list_insert_head(&iptuns->iptuns_iptunlist, iptun);
1476 mutex_exit(&iptuns->iptuns_lock);
1477 iptun->iptun_flags |= IPTUN_HASH_INSERTED;
1478 } else if (mherr == MH_ERR_NOMEM) {
1479 err = ENOMEM;
1480 } else if (mherr == MH_ERR_DUPLICATE) {
1481 err = EEXIST;
1482 } else {
1483 err = EINVAL;
1486 done:
1487 if (iptun == NULL && ns != NULL)
1488 netstack_rele(ns);
1489 if (err != 0 && iptun != NULL) {
1490 if (link_created) {
1491 (void) dls_devnet_destroy(iptun->iptun_mh, &tmpid,
1492 B_TRUE);
1494 iptun->iptun_flags |= IPTUN_CONDEMNED;
1495 iptun_free(iptun);
1497 return (err);
1501 iptun_delete(datalink_id_t linkid, cred_t *credp)
1503 int err;
1504 iptun_t *iptun = NULL;
1506 if ((err = iptun_enter_by_linkid(linkid, &iptun)) != 0)
1507 return (err);
1509 /* One cannot delete a tunnel that belongs to another zone. */
1510 if (iptun->iptun_zoneid != crgetzoneid(credp)) {
1511 iptun_exit(iptun);
1512 return (EACCES);
1516 * We need to exit iptun in order to issue calls up the stack such as
1517 * dls_devnet_destroy(). If we call up while still in iptun, deadlock
1518 * with calls coming down the stack is possible. We prevent other
1519 * threads from entering this iptun after we've exited it by setting
1520 * the IPTUN_DELETE_PENDING flag. This will cause callers of
1521 * iptun_enter() to block waiting on iptun_enter_cv. The assumption
1522 * here is that the functions we're calling while IPTUN_DELETE_PENDING
1523 * is set dont resuult in an iptun_enter() call, as that would result
1524 * in deadlock.
1526 iptun->iptun_flags |= IPTUN_DELETE_PENDING;
1528 /* Wait for any pending upcall to the mac module to complete. */
1529 while (iptun->iptun_flags & IPTUN_UPCALL_PENDING)
1530 cv_wait(&iptun->iptun_upcall_cv, &iptun->iptun_lock);
1532 iptun_exit(iptun);
1534 if ((err = dls_devnet_destroy(iptun->iptun_mh, &linkid, B_TRUE)) == 0) {
1536 * mac_disable() will fail with EBUSY if there are references
1537 * to the iptun MAC. If there are none, then mac_disable()
1538 * will assure that none can be acquired until the MAC is
1539 * unregistered.
1541 * XXX CR 6791335 prevents us from calling mac_disable() prior
1542 * to dls_devnet_destroy(), so we unfortunately need to
1543 * attempt to re-create the devnet node if mac_disable()
1544 * fails.
1546 if ((err = mac_disable(iptun->iptun_mh)) != 0) {
1547 (void) dls_devnet_create(iptun->iptun_mh, linkid,
1548 iptun->iptun_zoneid);
1553 * Now that we know the fate of this iptun_t, we need to clear
1554 * IPTUN_DELETE_PENDING, and set IPTUN_CONDEMNED if the iptun_t is
1555 * slated to be freed. Either way, we need to signal the threads
1556 * waiting in iptun_enter() so that they can either fail if
1557 * IPTUN_CONDEMNED is set, or continue if it's not.
1559 mutex_enter(&iptun->iptun_lock);
1560 iptun->iptun_flags &= ~IPTUN_DELETE_PENDING;
1561 if (err == 0)
1562 iptun->iptun_flags |= IPTUN_CONDEMNED;
1563 cv_broadcast(&iptun->iptun_enter_cv);
1564 mutex_exit(&iptun->iptun_lock);
1567 * Note that there is no danger in calling iptun_free() after having
1568 * dropped the iptun_lock since callers of iptun_enter() at this point
1569 * are doing so from iptun_enter_by_linkid() (mac_disable() got rid of
1570 * threads entering from mac callbacks which call iptun_enter()
1571 * directly) which holds iptun_hash_lock, and iptun_free() grabs this
1572 * lock in order to remove the iptun_t from the hash table.
1574 if (err == 0)
1575 iptun_free(iptun);
1577 return (err);
1581 iptun_modify(const iptun_kparams_t *ik, cred_t *credp)
1583 iptun_t *iptun;
1584 boolean_t laddr_change = B_FALSE, raddr_change = B_FALSE;
1585 int err;
1587 if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0)
1588 return (err);
1590 /* One cannot modify a tunnel that belongs to another zone. */
1591 if (iptun->iptun_zoneid != crgetzoneid(credp)) {
1592 err = EACCES;
1593 goto done;
1596 /* The tunnel type cannot be changed */
1597 if (ik->iptun_kparam_flags & IPTUN_KPARAM_TYPE) {
1598 err = EINVAL;
1599 goto done;
1602 if ((err = iptun_setparams(iptun, ik)) != 0)
1603 goto done;
1604 iptun_headergen(iptun, B_FALSE);
1607 * If any of the tunnel's addresses has been modified and the tunnel
1608 * has the necessary addresses assigned to it, we need to try to bind
1609 * with ip underneath us. If we're not ready to bind yet, then we'll
1610 * try again when the addresses are modified later.
1612 laddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_LADDR);
1613 raddr_change = (ik->iptun_kparam_flags & IPTUN_KPARAM_RADDR);
1614 if (laddr_change || raddr_change) {
1615 if (iptun->iptun_flags & IPTUN_BOUND)
1616 iptun_unbind(iptun);
1617 if (iptun_canbind(iptun) && (err = iptun_bind(iptun)) != 0) {
1618 if (laddr_change)
1619 iptun->iptun_flags &= ~IPTUN_LADDR;
1620 if (raddr_change)
1621 iptun->iptun_flags &= ~IPTUN_RADDR;
1622 goto done;
1626 if (laddr_change)
1627 iptun_task_dispatch(iptun, IPTUN_TASK_LADDR_UPDATE);
1628 if (raddr_change)
1629 iptun_task_dispatch(iptun, IPTUN_TASK_RADDR_UPDATE);
1631 done:
1632 iptun_exit(iptun);
1633 return (err);
1636 /* Given an IP tunnel's datalink id, fill in its parameters. */
1638 iptun_info(iptun_kparams_t *ik, cred_t *credp)
1640 iptun_t *iptun;
1641 int err;
1643 /* Is the tunnel link visible from the caller's zone? */
1644 if (!dls_devnet_islinkvisible(ik->iptun_kparam_linkid,
1645 crgetzoneid(credp)))
1646 return (ENOENT);
1648 if ((err = iptun_enter_by_linkid(ik->iptun_kparam_linkid, &iptun)) != 0)
1649 return (err);
1651 bzero(ik, sizeof (iptun_kparams_t));
1653 ik->iptun_kparam_linkid = iptun->iptun_linkid;
1654 ik->iptun_kparam_type = iptun->iptun_typeinfo->iti_type;
1655 ik->iptun_kparam_flags |= IPTUN_KPARAM_TYPE;
1657 if (iptun->iptun_flags & IPTUN_LADDR) {
1658 iptun_getaddr(&iptun->iptun_laddr, &ik->iptun_kparam_laddr);
1659 ik->iptun_kparam_flags |= IPTUN_KPARAM_LADDR;
1661 if (iptun->iptun_flags & IPTUN_RADDR) {
1662 iptun_getaddr(&iptun->iptun_raddr, &ik->iptun_kparam_raddr);
1663 ik->iptun_kparam_flags |= IPTUN_KPARAM_RADDR;
1666 if (iptun->iptun_flags & IPTUN_IMPLICIT)
1667 ik->iptun_kparam_flags |= IPTUN_KPARAM_IMPLICIT;
1669 if (iptun->iptun_itp != NULL) {
1670 mutex_enter(&iptun->iptun_itp->itp_lock);
1671 if (iptun->iptun_itp->itp_flags & ITPF_P_ACTIVE) {
1672 ik->iptun_kparam_flags |= IPTUN_KPARAM_IPSECPOL;
1673 if (iptun->iptun_flags & IPTUN_SIMPLE_POLICY) {
1674 ik->iptun_kparam_flags |= IPTUN_KPARAM_SECINFO;
1675 ik->iptun_kparam_secinfo =
1676 iptun->iptun_simple_policy;
1679 mutex_exit(&iptun->iptun_itp->itp_lock);
1682 done:
1683 iptun_exit(iptun);
1684 return (err);
1688 iptun_set_6to4relay(netstack_t *ns, ipaddr_t relay_addr)
1690 if (relay_addr == INADDR_BROADCAST || CLASSD(relay_addr))
1691 return (EADDRNOTAVAIL);
1692 ns->netstack_iptun->iptuns_relay_rtr_addr = relay_addr;
1693 return (0);
1696 void
1697 iptun_get_6to4relay(netstack_t *ns, ipaddr_t *relay_addr)
1699 *relay_addr = ns->netstack_iptun->iptuns_relay_rtr_addr;
1702 void
1703 iptun_set_policy(datalink_id_t linkid, ipsec_tun_pol_t *itp)
1705 iptun_t *iptun;
1707 if (iptun_enter_by_linkid(linkid, &iptun) != 0)
1708 return;
1709 if (iptun->iptun_itp != itp) {
1710 ASSERT(iptun->iptun_itp == NULL);
1711 ITP_REFHOLD(itp);
1712 iptun->iptun_itp = itp;
1715 * IPsec policy means IPsec overhead, which means lower MTU.
1716 * Refresh the MTU for this tunnel.
1718 (void) iptun_update_mtu(iptun, NULL, 0);
1719 iptun_exit(iptun);
1723 * Obtain the path MTU to the tunnel destination.
1724 * Can return zero in some cases.
1726 static uint32_t
1727 iptun_get_dst_pmtu(iptun_t *iptun, ip_xmit_attr_t *ixa)
1729 uint32_t pmtu = 0;
1730 conn_t *connp = iptun->iptun_connp;
1731 boolean_t need_rele = B_FALSE;
1734 * We only obtain the pmtu for tunnels that have a remote tunnel
1735 * address.
1737 if (!(iptun->iptun_flags & IPTUN_RADDR))
1738 return (0);
1740 if (ixa == NULL) {
1741 ixa = conn_get_ixa(connp, B_FALSE);
1742 if (ixa == NULL)
1743 return (0);
1744 need_rele = B_TRUE;
1747 * Guard against ICMP errors before we have sent, as well as against
1748 * and a thread which held conn_ixa.
1750 if (ixa->ixa_ire != NULL) {
1751 pmtu = ip_get_pmtu(ixa);
1754 * For both IPv4 and IPv6 we can have indication that the outer
1755 * header needs fragmentation.
1757 if (ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) {
1758 /* Must allow fragmentation in ip_output */
1759 ixa->ixa_flags &= ~IXAF_DONTFRAG;
1760 } else if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4) {
1761 ixa->ixa_flags |= IXAF_DONTFRAG;
1762 } else {
1763 /* ip_get_pmtu might have set this - we don't want it */
1764 ixa->ixa_flags &= ~IXAF_PMTU_IPV4_DF;
1768 if (need_rele)
1769 ixa_refrele(ixa);
1770 return (pmtu);
1774 * Update the ip_xmit_attr_t to capture the current lower path mtu as known
1775 * by ip.
1777 static void
1778 iptun_update_dst_pmtu(iptun_t *iptun, ip_xmit_attr_t *ixa)
1780 uint32_t pmtu;
1781 conn_t *connp = iptun->iptun_connp;
1782 boolean_t need_rele = B_FALSE;
1784 /* IXAF_VERIFY_PMTU is not set if we don't have a fixed destination */
1785 if (!(iptun->iptun_flags & IPTUN_RADDR))
1786 return;
1788 if (ixa == NULL) {
1789 ixa = conn_get_ixa(connp, B_FALSE);
1790 if (ixa == NULL)
1791 return;
1792 need_rele = B_TRUE;
1795 * Guard against ICMP errors before we have sent, as well as against
1796 * and a thread which held conn_ixa.
1798 if (ixa->ixa_ire != NULL) {
1799 pmtu = ip_get_pmtu(ixa);
1801 * Update ixa_fragsize and ixa_pmtu.
1803 ixa->ixa_fragsize = ixa->ixa_pmtu = pmtu;
1806 * For both IPv4 and IPv6 we can have indication that the outer
1807 * header needs fragmentation.
1809 if (ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) {
1810 /* Must allow fragmentation in ip_output */
1811 ixa->ixa_flags &= ~IXAF_DONTFRAG;
1812 } else if (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4) {
1813 ixa->ixa_flags |= IXAF_DONTFRAG;
1814 } else {
1815 /* ip_get_pmtu might have set this - we don't want it */
1816 ixa->ixa_flags &= ~IXAF_PMTU_IPV4_DF;
1820 if (need_rele)
1821 ixa_refrele(ixa);
1825 * There is nothing that iptun can verify in addition to IP having
1826 * verified the IP addresses in the fanout.
1828 /* ARGSUSED */
1829 static boolean_t
1830 iptun_verifyicmp(conn_t *connp, void *arg2, icmph_t *icmph, icmp6_t *icmp6,
1831 ip_recv_attr_t *ira)
1833 return (B_TRUE);
1837 * Notify function registered with ip_xmit_attr_t.
1839 static void
1840 iptun_notify(void *arg, ip_xmit_attr_t *ixa, ixa_notify_type_t ntype,
1841 ixa_notify_arg_t narg)
1843 iptun_t *iptun = (iptun_t *)arg;
1845 switch (ntype) {
1846 case IXAN_PMTU:
1847 (void) iptun_update_mtu(iptun, ixa, narg);
1848 break;
1853 * Returns the max of old_ovhd and the overhead associated with pol.
1855 static uint32_t
1856 iptun_max_policy_overhead(ipsec_policy_t *pol, uint32_t old_ovhd)
1858 uint32_t new_ovhd = old_ovhd;
1860 while (pol != NULL) {
1861 new_ovhd = max(new_ovhd,
1862 ipsec_act_ovhd(&pol->ipsp_act->ipa_act));
1863 pol = pol->ipsp_hash.hash_next;
1865 return (new_ovhd);
1868 static uint32_t
1869 iptun_get_ipsec_overhead(iptun_t *iptun)
1871 ipsec_policy_root_t *ipr;
1872 ipsec_policy_head_t *iph;
1873 ipsec_policy_t *pol;
1874 ipsec_selector_t sel;
1875 int i;
1876 uint32_t ipsec_ovhd = 0;
1877 ipsec_tun_pol_t *itp = iptun->iptun_itp;
1878 netstack_t *ns = iptun->iptun_ns;
1880 if (itp == NULL || !(itp->itp_flags & ITPF_P_ACTIVE)) {
1882 * Consult global policy, just in case. This will only work
1883 * if we have both source and destination addresses to work
1884 * with.
1886 if ((iptun->iptun_flags & (IPTUN_LADDR|IPTUN_RADDR)) !=
1887 (IPTUN_LADDR|IPTUN_RADDR))
1888 return (0);
1890 iph = ipsec_system_policy(ns);
1891 bzero(&sel, sizeof (sel));
1892 sel.ips_isv4 =
1893 (iptun->iptun_typeinfo->iti_ipvers == IPV4_VERSION);
1894 switch (iptun->iptun_typeinfo->iti_ipvers) {
1895 case IPV4_VERSION:
1896 sel.ips_local_addr_v4 = iptun->iptun_laddr4;
1897 sel.ips_remote_addr_v4 = iptun->iptun_raddr4;
1898 break;
1899 case IPV6_VERSION:
1900 sel.ips_local_addr_v6 = iptun->iptun_laddr6;
1901 sel.ips_remote_addr_v6 = iptun->iptun_raddr6;
1902 break;
1904 /* Check for both IPv4 and IPv6. */
1905 sel.ips_protocol = IPPROTO_ENCAP;
1906 pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND,
1907 &sel);
1908 if (pol != NULL) {
1909 ipsec_ovhd = ipsec_act_ovhd(&pol->ipsp_act->ipa_act);
1910 IPPOL_REFRELE(pol);
1912 sel.ips_protocol = IPPROTO_IPV6;
1913 pol = ipsec_find_policy_head(NULL, iph, IPSEC_TYPE_OUTBOUND,
1914 &sel);
1915 if (pol != NULL) {
1916 ipsec_ovhd = max(ipsec_ovhd,
1917 ipsec_act_ovhd(&pol->ipsp_act->ipa_act));
1918 IPPOL_REFRELE(pol);
1920 IPPH_REFRELE(iph, ns);
1921 } else {
1923 * Look through all of the possible IPsec actions for the
1924 * tunnel, and find the largest potential IPsec overhead.
1926 iph = itp->itp_policy;
1927 rw_enter(&iph->iph_lock, RW_READER);
1928 ipr = &(iph->iph_root[IPSEC_TYPE_OUTBOUND]);
1929 ipsec_ovhd = iptun_max_policy_overhead(
1930 ipr->ipr_nonhash[IPSEC_AF_V4], 0);
1931 ipsec_ovhd = iptun_max_policy_overhead(
1932 ipr->ipr_nonhash[IPSEC_AF_V6], ipsec_ovhd);
1933 for (i = 0; i < ipr->ipr_nchains; i++) {
1934 ipsec_ovhd = iptun_max_policy_overhead(
1935 ipr->ipr_hash[i].hash_head, ipsec_ovhd);
1937 rw_exit(&iph->iph_lock);
1940 return (ipsec_ovhd);
1944 * Calculate and return the maximum possible upper MTU for the given tunnel.
1946 * If new_pmtu is set then we also need to update the lower path MTU information
1947 * in the ip_xmit_attr_t. That is needed since we set IXAF_VERIFY_PMTU so that
1948 * we are notified by conn_ip_output() when the path MTU increases.
1950 static uint32_t
1951 iptun_get_maxmtu(iptun_t *iptun, ip_xmit_attr_t *ixa, uint32_t new_pmtu)
1953 size_t header_size, ipsec_overhead;
1954 uint32_t maxmtu, pmtu;
1957 * Start with the path-MTU to the remote address, which is either
1958 * provided as the new_pmtu argument, or obtained using
1959 * iptun_get_dst_pmtu().
1961 if (new_pmtu != 0) {
1962 if (iptun->iptun_flags & IPTUN_RADDR)
1963 iptun->iptun_dpmtu = new_pmtu;
1964 pmtu = new_pmtu;
1965 } else if (iptun->iptun_flags & IPTUN_RADDR) {
1966 if ((pmtu = iptun_get_dst_pmtu(iptun, ixa)) == 0) {
1968 * We weren't able to obtain the path-MTU of the
1969 * destination. Use the previous value.
1971 pmtu = iptun->iptun_dpmtu;
1972 } else {
1973 iptun->iptun_dpmtu = pmtu;
1975 } else {
1977 * We have no path-MTU information to go on, use the maximum
1978 * possible value.
1980 pmtu = iptun->iptun_typeinfo->iti_maxmtu;
1984 * Now calculate tunneling overhead and subtract that from the
1985 * path-MTU information obtained above.
1987 if (iptun->iptun_header_size != 0) {
1988 header_size = iptun->iptun_header_size;
1989 } else {
1990 switch (iptun->iptun_typeinfo->iti_ipvers) {
1991 case IPV4_VERSION:
1992 header_size = sizeof (ipha_t);
1993 break;
1994 case IPV6_VERSION:
1995 header_size = sizeof (iptun_ipv6hdrs_t);
1996 break;
2000 ipsec_overhead = iptun_get_ipsec_overhead(iptun);
2002 maxmtu = pmtu - (header_size + ipsec_overhead);
2003 return (max(maxmtu, iptun->iptun_typeinfo->iti_minmtu));
2007 * Re-calculate the tunnel's MTU as seen from above and notify the MAC layer
2008 * of any change in MTU. The new_pmtu argument is the new lower path MTU to
2009 * the tunnel destination to be used in the tunnel MTU calculation. Passing
2010 * in 0 for new_pmtu causes the lower path MTU to be dynamically updated using
2011 * ip_get_pmtu().
2013 * If the calculated tunnel MTU is different than its previous value, then we
2014 * notify the MAC layer above us of this change using mac_maxsdu_update().
2016 static uint32_t
2017 iptun_update_mtu(iptun_t *iptun, ip_xmit_attr_t *ixa, uint32_t new_pmtu)
2019 uint32_t newmtu;
2021 /* We always update the ixa since we might have set IXAF_VERIFY_PMTU */
2022 iptun_update_dst_pmtu(iptun, ixa);
2025 * We return the current MTU without updating it if it was pegged to a
2026 * static value using the MAC_PROP_MTU link property.
2028 if (iptun->iptun_flags & IPTUN_FIXED_MTU)
2029 return (iptun->iptun_mtu);
2031 /* If the MTU isn't fixed, then use the maximum possible value. */
2032 newmtu = iptun_get_maxmtu(iptun, ixa, new_pmtu);
2034 * We only dynamically adjust the tunnel MTU for tunnels with
2035 * destinations because dynamic MTU calculations are based on the
2036 * destination path-MTU.
2038 if ((iptun->iptun_flags & IPTUN_RADDR) && newmtu != iptun->iptun_mtu) {
2039 iptun->iptun_mtu = newmtu;
2040 if (iptun->iptun_flags & IPTUN_MAC_REGISTERED)
2041 iptun_task_dispatch(iptun, IPTUN_TASK_MTU_UPDATE);
2044 return (newmtu);
2048 * Frees a packet or packet chain and bumps stat for each freed packet.
2050 static void
2051 iptun_drop_pkt(mblk_t *mp, uint64_t *stat)
2053 mblk_t *pktmp;
2055 for (pktmp = mp; pktmp != NULL; pktmp = mp) {
2056 mp = mp->b_next;
2057 pktmp->b_next = NULL;
2058 if (stat != NULL)
2059 atomic_inc_64(stat);
2060 freemsg(pktmp);
2065 * Allocate and return a new mblk to hold an IP and ICMP header, and chain the
2066 * original packet to its b_cont. Returns NULL on failure.
2068 static mblk_t *
2069 iptun_build_icmperr(size_t hdrs_size, mblk_t *orig_pkt)
2071 mblk_t *icmperr_mp;
2073 if ((icmperr_mp = allocb(hdrs_size, BPRI_MED)) != NULL) {
2074 icmperr_mp->b_wptr += hdrs_size;
2075 /* tack on the offending packet */
2076 icmperr_mp->b_cont = orig_pkt;
2078 return (icmperr_mp);
2082 * Transmit an ICMP error. mp->b_rptr points at the packet to be included in
2083 * the ICMP error.
2085 static void
2086 iptun_sendicmp_v4(iptun_t *iptun, icmph_t *icmp, ipha_t *orig_ipha, mblk_t *mp)
2088 size_t orig_pktsize, hdrs_size;
2089 mblk_t *icmperr_mp;
2090 ipha_t *new_ipha;
2091 icmph_t *new_icmp;
2092 ip_xmit_attr_t ixas;
2093 conn_t *connp = iptun->iptun_connp;
2095 orig_pktsize = msgdsize(mp);
2096 hdrs_size = sizeof (ipha_t) + sizeof (icmph_t);
2097 if ((icmperr_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) {
2098 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf);
2099 return;
2102 new_ipha = (ipha_t *)icmperr_mp->b_rptr;
2103 new_icmp = (icmph_t *)(new_ipha + 1);
2105 new_ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION;
2106 new_ipha->ipha_type_of_service = 0;
2107 new_ipha->ipha_ident = 0;
2108 new_ipha->ipha_fragment_offset_and_flags = 0;
2109 new_ipha->ipha_ttl = orig_ipha->ipha_ttl;
2110 new_ipha->ipha_protocol = IPPROTO_ICMP;
2111 new_ipha->ipha_src = orig_ipha->ipha_dst;
2112 new_ipha->ipha_dst = orig_ipha->ipha_src;
2113 new_ipha->ipha_hdr_checksum = 0; /* will be computed by ip */
2114 new_ipha->ipha_length = htons(hdrs_size + orig_pktsize);
2116 *new_icmp = *icmp;
2117 new_icmp->icmph_checksum = 0;
2118 new_icmp->icmph_checksum = IP_CSUM(icmperr_mp, sizeof (ipha_t), 0);
2120 bzero(&ixas, sizeof (ixas));
2121 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V4;
2122 if (new_ipha->ipha_src == INADDR_ANY) {
2123 ixas.ixa_flags &= ~IXAF_VERIFY_SOURCE;
2124 ixas.ixa_flags |= IXAF_SET_SOURCE;
2127 ixas.ixa_zoneid = IPCL_ZONEID(connp);
2128 ixas.ixa_ipst = connp->conn_netstack->netstack_ip;
2129 ixas.ixa_cred = connp->conn_cred;
2130 ixas.ixa_cpid = NOPID;
2132 ixas.ixa_ifindex = 0;
2133 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
2135 (void) ip_output_simple(icmperr_mp, &ixas);
2136 ixa_cleanup(&ixas);
2139 static void
2140 iptun_sendicmp_v6(iptun_t *iptun, icmp6_t *icmp6, ip6_t *orig_ip6h, mblk_t *mp)
2142 size_t orig_pktsize, hdrs_size;
2143 mblk_t *icmp6err_mp;
2144 ip6_t *new_ip6h;
2145 icmp6_t *new_icmp6;
2146 ip_xmit_attr_t ixas;
2147 conn_t *connp = iptun->iptun_connp;
2149 orig_pktsize = msgdsize(mp);
2150 hdrs_size = sizeof (ip6_t) + sizeof (icmp6_t);
2151 if ((icmp6err_mp = iptun_build_icmperr(hdrs_size, mp)) == NULL) {
2152 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf);
2153 return;
2156 new_ip6h = (ip6_t *)icmp6err_mp->b_rptr;
2157 new_icmp6 = (icmp6_t *)(new_ip6h + 1);
2159 new_ip6h->ip6_vcf = orig_ip6h->ip6_vcf;
2160 new_ip6h->ip6_plen = htons(sizeof (icmp6_t) + orig_pktsize);
2161 new_ip6h->ip6_hops = orig_ip6h->ip6_hops;
2162 new_ip6h->ip6_nxt = IPPROTO_ICMPV6;
2163 new_ip6h->ip6_src = orig_ip6h->ip6_dst;
2164 new_ip6h->ip6_dst = orig_ip6h->ip6_src;
2166 *new_icmp6 = *icmp6;
2167 /* The checksum is calculated in ip_output_simple and friends. */
2168 new_icmp6->icmp6_cksum = new_ip6h->ip6_plen;
2170 bzero(&ixas, sizeof (ixas));
2171 ixas.ixa_flags = IXAF_BASIC_SIMPLE_V6;
2172 if (IN6_IS_ADDR_UNSPECIFIED(&new_ip6h->ip6_src)) {
2173 ixas.ixa_flags &= ~IXAF_VERIFY_SOURCE;
2174 ixas.ixa_flags |= IXAF_SET_SOURCE;
2177 ixas.ixa_zoneid = IPCL_ZONEID(connp);
2178 ixas.ixa_ipst = connp->conn_netstack->netstack_ip;
2179 ixas.ixa_cred = connp->conn_cred;
2180 ixas.ixa_cpid = NOPID;
2182 ixas.ixa_ifindex = 0;
2183 ixas.ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
2185 (void) ip_output_simple(icmp6err_mp, &ixas);
2186 ixa_cleanup(&ixas);
2189 static void
2190 iptun_icmp_error_v4(iptun_t *iptun, ipha_t *orig_ipha, mblk_t *mp,
2191 uint8_t type, uint8_t code)
2193 icmph_t icmp;
2195 bzero(&icmp, sizeof (icmp));
2196 icmp.icmph_type = type;
2197 icmp.icmph_code = code;
2199 iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp);
2202 static void
2203 iptun_icmp_fragneeded_v4(iptun_t *iptun, uint32_t newmtu, ipha_t *orig_ipha,
2204 mblk_t *mp)
2206 icmph_t icmp;
2208 icmp.icmph_type = ICMP_DEST_UNREACHABLE;
2209 icmp.icmph_code = ICMP_FRAGMENTATION_NEEDED;
2210 icmp.icmph_du_zero = 0;
2211 icmp.icmph_du_mtu = htons(newmtu);
2213 iptun_sendicmp_v4(iptun, &icmp, orig_ipha, mp);
2216 static void
2217 iptun_icmp_error_v6(iptun_t *iptun, ip6_t *orig_ip6h, mblk_t *mp,
2218 uint8_t type, uint8_t code, uint32_t offset)
2220 icmp6_t icmp6;
2222 bzero(&icmp6, sizeof (icmp6));
2223 icmp6.icmp6_type = type;
2224 icmp6.icmp6_code = code;
2225 if (type == ICMP6_PARAM_PROB)
2226 icmp6.icmp6_pptr = htonl(offset);
2228 iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp);
2231 static void
2232 iptun_icmp_toobig_v6(iptun_t *iptun, uint32_t newmtu, ip6_t *orig_ip6h,
2233 mblk_t *mp)
2235 icmp6_t icmp6;
2237 icmp6.icmp6_type = ICMP6_PACKET_TOO_BIG;
2238 icmp6.icmp6_code = 0;
2239 icmp6.icmp6_mtu = htonl(newmtu);
2241 iptun_sendicmp_v6(iptun, &icmp6, orig_ip6h, mp);
2245 * Determines if the packet pointed to by ipha or ip6h is an ICMP error. The
2246 * mp argument is only used to do bounds checking.
2248 static boolean_t
2249 is_icmp_error(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h)
2251 uint16_t hlen;
2253 if (ipha != NULL) {
2254 icmph_t *icmph;
2256 ASSERT(ip6h == NULL);
2257 if (ipha->ipha_protocol != IPPROTO_ICMP)
2258 return (B_FALSE);
2260 hlen = IPH_HDR_LENGTH(ipha);
2261 icmph = (icmph_t *)((uint8_t *)ipha + hlen);
2262 return (ICMP_IS_ERROR(icmph->icmph_type) ||
2263 icmph->icmph_type == ICMP_REDIRECT);
2264 } else {
2265 icmp6_t *icmp6;
2266 uint8_t *nexthdrp;
2268 ASSERT(ip6h != NULL);
2269 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hlen, &nexthdrp) ||
2270 *nexthdrp != IPPROTO_ICMPV6) {
2271 return (B_FALSE);
2274 icmp6 = (icmp6_t *)((uint8_t *)ip6h + hlen);
2275 return (ICMP6_IS_ERROR(icmp6->icmp6_type) ||
2276 icmp6->icmp6_type == ND_REDIRECT);
2281 * Find inner and outer IP headers from a tunneled packet as setup for calls
2282 * into ipsec_tun_{in,out}bound().
2283 * Note that we need to allow the outer header to be in a separate mblk from
2284 * the inner header.
2285 * If the caller knows the outer_hlen, the caller passes it in. Otherwise zero.
2287 static size_t
2288 iptun_find_headers(mblk_t *mp, size_t outer_hlen, ipha_t **outer4,
2289 ipha_t **inner4, ip6_t **outer6, ip6_t **inner6)
2291 ipha_t *ipha;
2292 size_t first_mblkl = MBLKL(mp);
2293 mblk_t *inner_mp;
2296 * Don't bother handling packets that don't have a full IP header in
2297 * the fist mblk. For the input path, the ip module ensures that this
2298 * won't happen, and on the output path, the IP tunneling MAC-type
2299 * plugins ensure that this also won't happen.
2301 if (first_mblkl < sizeof (ipha_t))
2302 return (0);
2303 ipha = (ipha_t *)(mp->b_rptr);
2304 switch (IPH_HDR_VERSION(ipha)) {
2305 case IPV4_VERSION:
2306 *outer4 = ipha;
2307 *outer6 = NULL;
2308 if (outer_hlen == 0)
2309 outer_hlen = IPH_HDR_LENGTH(ipha);
2310 break;
2311 case IPV6_VERSION:
2312 *outer4 = NULL;
2313 *outer6 = (ip6_t *)ipha;
2314 if (outer_hlen == 0)
2315 outer_hlen = ip_hdr_length_v6(mp, (ip6_t *)ipha);
2316 break;
2317 default:
2318 return (0);
2321 if (first_mblkl < outer_hlen ||
2322 (first_mblkl == outer_hlen && mp->b_cont == NULL))
2323 return (0);
2326 * We don't bother doing a pullup here since the outer header will
2327 * just get stripped off soon on input anyway. We just want to ensure
2328 * that the inner* pointer points to a full header.
2330 if (first_mblkl == outer_hlen) {
2331 inner_mp = mp->b_cont;
2332 ipha = (ipha_t *)inner_mp->b_rptr;
2333 } else {
2334 inner_mp = mp;
2335 ipha = (ipha_t *)(mp->b_rptr + outer_hlen);
2337 switch (IPH_HDR_VERSION(ipha)) {
2338 case IPV4_VERSION:
2339 if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ipha_t))
2340 return (0);
2341 *inner4 = ipha;
2342 *inner6 = NULL;
2343 break;
2344 case IPV6_VERSION:
2345 if (inner_mp->b_wptr - (uint8_t *)ipha < sizeof (ip6_t))
2346 return (0);
2347 *inner4 = NULL;
2348 *inner6 = (ip6_t *)ipha;
2349 break;
2350 default:
2351 return (0);
2354 return (outer_hlen);
2358 * Received ICMP error in response to an X over IPv4 packet that we
2359 * transmitted.
2361 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of
2362 * the following:
2364 * [IPv4(0)][ICMPv4][IPv4(1)][IPv4(2)][ULP]
2366 * or
2368 * [IPv4(0)][ICMPv4][IPv4(1)][IPv6][ULP]
2370 * And "outer4" will get set to IPv4(1), and inner[46] will correspond to
2371 * whatever the very-inner packet is (IPv4(2) or IPv6).
2373 static void
2374 iptun_input_icmp_v4(iptun_t *iptun, mblk_t *data_mp, icmph_t *icmph,
2375 ip_recv_attr_t *ira)
2377 uint8_t *orig;
2378 ipha_t *outer4, *inner4;
2379 ip6_t *outer6, *inner6;
2380 int outer_hlen;
2381 uint8_t type, code;
2383 ASSERT(data_mp->b_cont == NULL);
2385 * Temporarily move b_rptr forward so that iptun_find_headers() can
2386 * find headers in the ICMP packet payload.
2388 orig = data_mp->b_rptr;
2389 data_mp->b_rptr = (uint8_t *)(icmph + 1);
2391 * The ip module ensures that ICMP errors contain at least the
2392 * original IP header (otherwise, the error would never have made it
2393 * here).
2395 ASSERT(MBLKL(data_mp) >= 0);
2396 outer_hlen = iptun_find_headers(data_mp, 0, &outer4, &inner4, &outer6,
2397 &inner6);
2398 ASSERT(outer6 == NULL);
2399 data_mp->b_rptr = orig;
2400 if (outer_hlen == 0) {
2401 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors);
2402 return;
2405 /* Only ICMP errors due to tunneled packets should reach here. */
2406 ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP ||
2407 outer4->ipha_protocol == IPPROTO_IPV6);
2409 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp,
2410 inner4, inner6, outer4, outer6, -outer_hlen, iptun->iptun_ns);
2411 if (data_mp == NULL) {
2412 /* Callee did all of the freeing. */
2413 atomic_inc_64(&iptun->iptun_ierrors);
2414 return;
2416 /* We should never see reassembled fragment here. */
2417 ASSERT(data_mp->b_next == NULL);
2419 data_mp->b_rptr = (uint8_t *)outer4 + outer_hlen;
2422 * If the original packet being transmitted was itself an ICMP error,
2423 * then drop this packet. We don't want to generate an ICMP error in
2424 * response to an ICMP error.
2426 if (is_icmp_error(data_mp, inner4, inner6)) {
2427 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf);
2428 return;
2431 switch (icmph->icmph_type) {
2432 case ICMP_DEST_UNREACHABLE:
2433 type = (inner4 != NULL ? icmph->icmph_type : ICMP6_DST_UNREACH);
2434 switch (icmph->icmph_code) {
2435 case ICMP_FRAGMENTATION_NEEDED: {
2436 uint32_t newmtu;
2439 * We reconcile this with the fact that the tunnel may
2440 * also have IPsec policy by letting iptun_update_mtu
2441 * take care of it.
2443 newmtu = iptun_update_mtu(iptun, NULL,
2444 ntohs(icmph->icmph_du_mtu));
2446 if (inner4 != NULL) {
2447 iptun_icmp_fragneeded_v4(iptun, newmtu, inner4,
2448 data_mp);
2449 } else {
2450 iptun_icmp_toobig_v6(iptun, newmtu, inner6,
2451 data_mp);
2453 return;
2455 case ICMP_DEST_NET_UNREACH_ADMIN:
2456 case ICMP_DEST_HOST_UNREACH_ADMIN:
2457 code = (inner4 != NULL ? ICMP_DEST_NET_UNREACH_ADMIN :
2458 ICMP6_DST_UNREACH_ADMIN);
2459 break;
2460 default:
2461 code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE :
2462 ICMP6_DST_UNREACH_ADDR);
2463 break;
2465 break;
2466 case ICMP_TIME_EXCEEDED:
2467 if (inner6 != NULL) {
2468 type = ICMP6_TIME_EXCEEDED;
2469 code = 0;
2470 } /* else we're already set. */
2471 break;
2472 case ICMP_PARAM_PROBLEM:
2474 * This is a problem with the outer header we transmitted.
2475 * Treat this as an output error.
2477 iptun_drop_pkt(data_mp, &iptun->iptun_oerrors);
2478 return;
2479 default:
2480 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf);
2481 return;
2484 if (inner4 != NULL) {
2485 iptun_icmp_error_v4(iptun, inner4, data_mp, type, code);
2486 } else {
2487 iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0);
2492 * Return B_TRUE if the IPv6 packet pointed to by ip6h contains a Tunnel
2493 * Encapsulation Limit destination option. If there is one, set encaplim_ptr
2494 * to point to the option value.
2496 static boolean_t
2497 iptun_find_encaplimit(mblk_t *mp, ip6_t *ip6h, uint8_t **encaplim_ptr)
2499 ip_pkt_t pkt;
2500 uint8_t *endptr;
2501 ip6_dest_t *destp;
2502 struct ip6_opt *optp;
2504 pkt.ipp_fields = 0; /* must be initialized */
2505 (void) ip_find_hdr_v6(mp, ip6h, &pkt, NULL);
2506 if ((pkt.ipp_fields & IPPF_DSTOPTS) != 0) {
2507 destp = pkt.ipp_dstopts;
2508 } else if ((pkt.ipp_fields & IPPF_RTHDRDSTOPTS) != 0) {
2509 destp = pkt.ipp_rthdrdstopts;
2510 } else {
2511 return (B_FALSE);
2514 endptr = (uint8_t *)destp + 8 * (destp->ip6d_len + 1);
2515 optp = (struct ip6_opt *)(destp + 1);
2516 while (endptr - (uint8_t *)optp > sizeof (*optp)) {
2517 if (optp->ip6o_type == IP6OPT_TUNNEL_LIMIT) {
2518 if ((uint8_t *)(optp + 1) >= endptr)
2519 return (B_FALSE);
2520 *encaplim_ptr = (uint8_t *)&optp[1];
2521 return (B_TRUE);
2523 optp = (struct ip6_opt *)((uint8_t *)optp + optp->ip6o_len + 2);
2525 return (B_FALSE);
2529 * Received ICMPv6 error in response to an X over IPv6 packet that we
2530 * transmitted.
2532 * NOTE: "outer" refers to what's inside the ICMP payload. We will get one of
2533 * the following:
2535 * [IPv6(0)][ICMPv6][IPv6(1)][IPv4][ULP]
2537 * or
2539 * [IPv6(0)][ICMPv6][IPv6(1)][IPv6(2)][ULP]
2541 * And "outer6" will get set to IPv6(1), and inner[46] will correspond to
2542 * whatever the very-inner packet is (IPv4 or IPv6(2)).
2544 static void
2545 iptun_input_icmp_v6(iptun_t *iptun, mblk_t *data_mp, icmp6_t *icmp6h,
2546 ip_recv_attr_t *ira)
2548 uint8_t *orig;
2549 ipha_t *outer4, *inner4;
2550 ip6_t *outer6, *inner6;
2551 int outer_hlen;
2552 uint8_t type, code;
2554 ASSERT(data_mp->b_cont == NULL);
2557 * Temporarily move b_rptr forward so that iptun_find_headers() can
2558 * find IP headers in the ICMP packet payload.
2560 orig = data_mp->b_rptr;
2561 data_mp->b_rptr = (uint8_t *)(icmp6h + 1);
2563 * The ip module ensures that ICMP errors contain at least the
2564 * original IP header (otherwise, the error would never have made it
2565 * here).
2567 ASSERT(MBLKL(data_mp) >= 0);
2568 outer_hlen = iptun_find_headers(data_mp, 0, &outer4, &inner4, &outer6,
2569 &inner6);
2570 ASSERT(outer4 == NULL);
2571 data_mp->b_rptr = orig; /* Restore r_ptr */
2572 if (outer_hlen == 0) {
2573 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors);
2574 return;
2577 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp,
2578 inner4, inner6, outer4, outer6, -outer_hlen, iptun->iptun_ns);
2579 if (data_mp == NULL) {
2580 /* Callee did all of the freeing. */
2581 atomic_inc_64(&iptun->iptun_ierrors);
2582 return;
2584 /* We should never see reassembled fragment here. */
2585 ASSERT(data_mp->b_next == NULL);
2587 data_mp->b_rptr = (uint8_t *)outer6 + outer_hlen;
2590 * If the original packet being transmitted was itself an ICMP error,
2591 * then drop this packet. We don't want to generate an ICMP error in
2592 * response to an ICMP error.
2594 if (is_icmp_error(data_mp, inner4, inner6)) {
2595 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf);
2596 return;
2599 switch (icmp6h->icmp6_type) {
2600 case ICMP6_PARAM_PROB: {
2601 uint8_t *encaplim_ptr;
2604 * If the ICMPv6 error points to a valid Tunnel Encapsulation
2605 * Limit option and the limit value is 0, then fall through
2606 * and send a host unreachable message. Otherwise, treat the
2607 * error as an output error, as there must have been a problem
2608 * with a packet we sent.
2610 if (!iptun_find_encaplimit(data_mp, outer6, &encaplim_ptr) ||
2611 (icmp6h->icmp6_pptr !=
2612 ((ptrdiff_t)encaplim_ptr - (ptrdiff_t)outer6)) ||
2613 *encaplim_ptr != 0) {
2614 iptun_drop_pkt(data_mp, &iptun->iptun_oerrors);
2615 return;
2617 /* FALLTHRU */
2619 case ICMP6_TIME_EXCEEDED:
2620 case ICMP6_DST_UNREACH:
2621 type = (inner4 != NULL ? ICMP_DEST_UNREACHABLE :
2622 ICMP6_DST_UNREACH);
2623 code = (inner4 != NULL ? ICMP_HOST_UNREACHABLE :
2624 ICMP6_DST_UNREACH_ADDR);
2625 break;
2626 case ICMP6_PACKET_TOO_BIG: {
2627 uint32_t newmtu;
2630 * We reconcile this with the fact that the tunnel may also
2631 * have IPsec policy by letting iptun_update_mtu take care of
2632 * it.
2634 newmtu = iptun_update_mtu(iptun, NULL,
2635 ntohl(icmp6h->icmp6_mtu));
2637 if (inner4 != NULL) {
2638 iptun_icmp_fragneeded_v4(iptun, newmtu, inner4,
2639 data_mp);
2640 } else {
2641 iptun_icmp_toobig_v6(iptun, newmtu, inner6, data_mp);
2643 return;
2645 default:
2646 iptun_drop_pkt(data_mp, &iptun->iptun_norcvbuf);
2647 return;
2650 if (inner4 != NULL) {
2651 iptun_icmp_error_v4(iptun, inner4, data_mp, type, code);
2652 } else {
2653 iptun_icmp_error_v6(iptun, inner6, data_mp, type, code, 0);
2658 * Called as conn_recvicmp from IP for ICMP errors.
2660 /* ARGSUSED2 */
2661 static void
2662 iptun_input_icmp(void *arg, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
2664 conn_t *connp = arg;
2665 iptun_t *iptun = connp->conn_iptun;
2666 mblk_t *tmpmp;
2667 size_t hlen;
2669 ASSERT(IPCL_IS_IPTUN(connp));
2671 if (mp->b_cont != NULL) {
2673 * Since ICMP error processing necessitates access to bits
2674 * that are within the ICMP error payload (the original packet
2675 * that caused the error), pull everything up into a single
2676 * block for convenience.
2678 if ((tmpmp = msgpullup(mp, -1)) == NULL) {
2679 iptun_drop_pkt(mp, &iptun->iptun_norcvbuf);
2680 return;
2682 freemsg(mp);
2683 mp = tmpmp;
2686 hlen = ira->ira_ip_hdr_length;
2687 switch (iptun->iptun_typeinfo->iti_ipvers) {
2688 case IPV4_VERSION:
2690 * The outer IP header coming up from IP is always ipha_t
2691 * alligned (otherwise, we would have crashed in ip).
2693 iptun_input_icmp_v4(iptun, mp, (icmph_t *)(mp->b_rptr + hlen),
2694 ira);
2695 break;
2696 case IPV6_VERSION:
2697 iptun_input_icmp_v6(iptun, mp, (icmp6_t *)(mp->b_rptr + hlen),
2698 ira);
2699 break;
2703 static boolean_t
2704 iptun_in_6to4_ok(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6)
2706 ipaddr_t v4addr;
2709 * It's possible that someone sent us an IPv4-in-IPv4 packet with the
2710 * IPv4 address of a 6to4 tunnel as the destination.
2712 if (inner6 == NULL)
2713 return (B_FALSE);
2716 * Make sure that the IPv6 destination is within the site that this
2717 * 6to4 tunnel is routing for. We don't want people bouncing random
2718 * tunneled IPv6 packets through this 6to4 router.
2720 IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst, (struct in_addr *)&v4addr);
2721 if (outer4->ipha_dst != v4addr)
2722 return (B_FALSE);
2724 if (IN6_IS_ADDR_6TO4(&inner6->ip6_src)) {
2726 * Section 9 of RFC 3056 (security considerations) suggests
2727 * that when a packet is from a 6to4 site (i.e., it's not a
2728 * global address being forwarded froma relay router), make
2729 * sure that the packet was tunneled by that site's 6to4
2730 * router.
2732 IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr);
2733 if (outer4->ipha_src != v4addr)
2734 return (B_FALSE);
2735 } else {
2737 * Only accept packets from a relay router if we've configured
2738 * outbound relay router functionality.
2740 if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY)
2741 return (B_FALSE);
2744 return (B_TRUE);
2748 * Input function for everything that comes up from the ip module below us.
2749 * This is called directly from the ip module via connp->conn_recv().
2751 * We receive M_DATA messages with IP-in-IP tunneled packets.
2753 /* ARGSUSED2 */
2754 static void
2755 iptun_input(void *arg, mblk_t *data_mp, void *arg2, ip_recv_attr_t *ira)
2757 conn_t *connp = arg;
2758 iptun_t *iptun = connp->conn_iptun;
2759 int outer_hlen;
2760 ipha_t *outer4, *inner4;
2761 ip6_t *outer6, *inner6;
2763 ASSERT(IPCL_IS_IPTUN(connp));
2764 ASSERT(DB_TYPE(data_mp) == M_DATA);
2766 outer_hlen = iptun_find_headers(data_mp, ira->ira_ip_hdr_length,
2767 &outer4, &inner4, &outer6, &inner6);
2768 if (outer_hlen == 0)
2769 goto drop;
2771 data_mp = ipsec_tun_inbound(ira, data_mp, iptun->iptun_itp,
2772 inner4, inner6, outer4, outer6, outer_hlen, iptun->iptun_ns);
2773 if (data_mp == NULL) {
2774 /* Callee did all of the freeing. */
2775 return;
2778 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4 &&
2779 !iptun_in_6to4_ok(iptun, outer4, inner6))
2780 goto drop;
2783 * We need to statistically account for each packet individually, so
2784 * we might as well split up any b_next chains here.
2786 do {
2787 mblk_t *mp;
2789 mp = data_mp->b_next;
2790 data_mp->b_next = NULL;
2792 atomic_inc_64(&iptun->iptun_ipackets);
2793 atomic_add_64(&iptun->iptun_rbytes, msgdsize(data_mp));
2794 mac_rx(iptun->iptun_mh, NULL, data_mp);
2796 data_mp = mp;
2797 } while (data_mp != NULL);
2798 return;
2799 drop:
2800 iptun_drop_pkt(data_mp, &iptun->iptun_ierrors);
2804 * Do 6to4-specific header-processing on output. Return B_TRUE if the packet
2805 * was processed without issue, or B_FALSE if the packet had issues and should
2806 * be dropped.
2808 static boolean_t
2809 iptun_out_process_6to4(iptun_t *iptun, ipha_t *outer4, ip6_t *inner6)
2811 ipaddr_t v4addr;
2814 * IPv6 source must be a 6to4 address. This is because a conscious
2815 * decision was made to not allow a Solaris system to be used as a
2816 * relay router (for security reasons) when 6to4 was initially
2817 * integrated. If this decision is ever reversed, the following check
2818 * can be removed.
2820 if (!IN6_IS_ADDR_6TO4(&inner6->ip6_src))
2821 return (B_FALSE);
2824 * RFC3056 mandates that the IPv4 source MUST be set to the IPv4
2825 * portion of the 6to4 IPv6 source address. In other words, make sure
2826 * that we're tunneling packets from our own 6to4 site.
2828 IN6_6TO4_TO_V4ADDR(&inner6->ip6_src, (struct in_addr *)&v4addr);
2829 if (outer4->ipha_src != v4addr)
2830 return (B_FALSE);
2833 * Automatically set the destination of the outer IPv4 header as
2834 * described in RFC3056. There are two possibilities:
2836 * a. If the IPv6 destination is a 6to4 address, set the IPv4 address
2837 * to the IPv4 portion of the 6to4 address.
2838 * b. If the IPv6 destination is a native IPv6 address, set the IPv4
2839 * destination to the address of a relay router.
2841 * Design Note: b shouldn't be necessary here, and this is a flaw in
2842 * the design of the 6to4relay command. Instead of setting a 6to4
2843 * relay address in this module via an ioctl, the 6to4relay command
2844 * could simply add a IPv6 route for native IPv6 addresses (such as a
2845 * default route) in the forwarding table that uses a 6to4 destination
2846 * as its next hop, and the IPv4 portion of that address could be a
2847 * 6to4 relay address. In order for this to work, IP would have to
2848 * resolve the next hop address, which would necessitate a link-layer
2849 * address resolver for 6to4 links, which doesn't exist today.
2851 * In fact, if a resolver existed for 6to4 links, then setting the
2852 * IPv4 destination in the outer header could be done as part of
2853 * link-layer address resolution and fast-path header generation, and
2854 * not here.
2856 if (IN6_IS_ADDR_6TO4(&inner6->ip6_dst)) {
2857 /* destination is a 6to4 router */
2858 IN6_6TO4_TO_V4ADDR(&inner6->ip6_dst,
2859 (struct in_addr *)&outer4->ipha_dst);
2861 /* Reject attempts to send to INADDR_ANY */
2862 if (outer4->ipha_dst == INADDR_ANY)
2863 return (B_FALSE);
2864 } else {
2866 * The destination is a native IPv6 address. If output to a
2867 * relay-router is enabled, use the relay-router's IPv4
2868 * address as the destination.
2870 if (iptun->iptun_iptuns->iptuns_relay_rtr_addr == INADDR_ANY)
2871 return (B_FALSE);
2872 outer4->ipha_dst = iptun->iptun_iptuns->iptuns_relay_rtr_addr;
2876 * If the outer source and destination are equal, this means that the
2877 * 6to4 router somehow forwarded an IPv6 packet destined for its own
2878 * 6to4 site to its 6to4 tunnel interface, which will result in this
2879 * packet infinitely bouncing between ip and iptun.
2881 return (outer4->ipha_src != outer4->ipha_dst);
2885 * Process output packets with outer IPv4 headers. Frees mp and bumps stat on
2886 * error.
2888 static mblk_t *
2889 iptun_out_process_ipv4(iptun_t *iptun, mblk_t *mp, ipha_t *outer4,
2890 ipha_t *inner4, ip6_t *inner6, ip_xmit_attr_t *ixa)
2892 uint8_t *innerptr = (inner4 != NULL ?
2893 (uint8_t *)inner4 : (uint8_t *)inner6);
2894 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu;
2896 if (inner4 != NULL) {
2897 ASSERT(outer4->ipha_protocol == IPPROTO_ENCAP);
2899 * Copy the tos from the inner IPv4 header. We mask off ECN
2900 * bits (bits 6 and 7) because there is currently no
2901 * tunnel-tunnel communication to determine if both sides
2902 * support ECN. We opt for the safe choice: don't copy the
2903 * ECN bits when doing encapsulation.
2905 outer4->ipha_type_of_service =
2906 inner4->ipha_type_of_service & ~0x03;
2907 } else {
2908 ASSERT(outer4->ipha_protocol == IPPROTO_IPV6 &&
2909 inner6 != NULL);
2911 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF)
2912 outer4->ipha_fragment_offset_and_flags |= IPH_DF_HTONS;
2913 else
2914 outer4->ipha_fragment_offset_and_flags &= ~IPH_DF_HTONS;
2917 * As described in section 3.2.2 of RFC4213, if the packet payload is
2918 * less than or equal to the minimum MTU size, then we need to allow
2919 * IPv4 to fragment the packet. The reason is that even if we end up
2920 * receiving an ICMP frag-needed, the interface above this tunnel
2921 * won't be allowed to drop its MTU as a result, since the packet was
2922 * already smaller than the smallest allowable MTU for that interface.
2924 if (mp->b_wptr - innerptr <= minmtu) {
2925 outer4->ipha_fragment_offset_and_flags = 0;
2926 ixa->ixa_flags &= ~IXAF_DONTFRAG;
2927 } else if (!(ixa->ixa_flags & IXAF_PMTU_TOO_SMALL) &&
2928 (iptun->iptun_typeinfo->iti_type != IPTUN_TYPE_6TO4)) {
2929 ixa->ixa_flags |= IXAF_DONTFRAG;
2932 ixa->ixa_ip_hdr_length = IPH_HDR_LENGTH(outer4);
2933 ixa->ixa_pktlen = msgdsize(mp);
2934 ixa->ixa_protocol = outer4->ipha_protocol;
2936 outer4->ipha_length = htons(ixa->ixa_pktlen);
2937 return (mp);
2941 * Insert an encapsulation limit destination option in the packet provided.
2942 * Always consumes the mp argument and returns a new mblk pointer.
2944 static mblk_t *
2945 iptun_insert_encaplimit(iptun_t *iptun, mblk_t *mp, ip6_t *outer6,
2946 uint8_t limit)
2948 mblk_t *newmp;
2949 iptun_ipv6hdrs_t *newouter6;
2951 ASSERT(outer6->ip6_nxt == IPPROTO_IPV6);
2952 ASSERT(mp->b_cont == NULL);
2954 mp->b_rptr += sizeof (ip6_t);
2955 newmp = allocb(sizeof (iptun_ipv6hdrs_t) + MBLKL(mp), BPRI_MED);
2956 if (newmp == NULL) {
2957 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf);
2958 return (NULL);
2960 newmp->b_wptr += sizeof (iptun_ipv6hdrs_t);
2961 /* Copy the payload (Starting with the inner IPv6 header). */
2962 bcopy(mp->b_rptr, newmp->b_wptr, MBLKL(mp));
2963 newmp->b_wptr += MBLKL(mp);
2964 newouter6 = (iptun_ipv6hdrs_t *)newmp->b_rptr;
2965 /* Now copy the outer IPv6 header. */
2966 bcopy(outer6, &newouter6->it6h_ip6h, sizeof (ip6_t));
2967 newouter6->it6h_ip6h.ip6_nxt = IPPROTO_DSTOPTS;
2968 newouter6->it6h_encaplim = iptun_encaplim_init;
2969 newouter6->it6h_encaplim.iel_destopt.ip6d_nxt = outer6->ip6_nxt;
2970 newouter6->it6h_encaplim.iel_telopt.ip6ot_encap_limit = limit;
2973 * The payload length will be set at the end of
2974 * iptun_out_process_ipv6().
2977 freemsg(mp);
2978 return (newmp);
2982 * Process output packets with outer IPv6 headers. Frees mp and bumps stats
2983 * on error.
2985 static mblk_t *
2986 iptun_out_process_ipv6(iptun_t *iptun, mblk_t *mp, ip6_t *outer6,
2987 ipha_t *inner4, ip6_t *inner6, ip_xmit_attr_t *ixa)
2989 uint8_t *innerptr = (inner4 != NULL ?
2990 (uint8_t *)inner4 : (uint8_t *)inner6);
2991 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu;
2992 uint8_t *limit, *configlimit;
2993 uint32_t offset;
2994 iptun_ipv6hdrs_t *v6hdrs;
2996 if (inner6 != NULL && iptun_find_encaplimit(mp, inner6, &limit)) {
2998 * The inner packet is an IPv6 packet which itself contains an
2999 * encapsulation limit option. The limit variable points to
3000 * the value in the embedded option. Process the
3001 * encapsulation limit option as specified in RFC 2473.
3003 * If limit is 0, then we've exceeded the limit and we need to
3004 * send back an ICMPv6 parameter problem message.
3006 * If limit is > 0, then we decrement it by 1 and make sure
3007 * that the encapsulation limit option in the outer header
3008 * reflects that (adding an option if one isn't already
3009 * there).
3011 ASSERT(limit > mp->b_rptr && limit < mp->b_wptr);
3012 if (*limit == 0) {
3013 mp->b_rptr = (uint8_t *)inner6;
3014 offset = limit - mp->b_rptr;
3015 iptun_icmp_error_v6(iptun, inner6, mp, ICMP6_PARAM_PROB,
3016 0, offset);
3017 atomic_inc_64(&iptun->iptun_noxmtbuf);
3018 return (NULL);
3022 * The outer header requires an encapsulation limit option.
3023 * If there isn't one already, add one.
3025 if (iptun->iptun_encaplimit == 0) {
3026 if ((mp = iptun_insert_encaplimit(iptun, mp, outer6,
3027 (*limit - 1))) == NULL)
3028 return (NULL);
3029 v6hdrs = (iptun_ipv6hdrs_t *)mp->b_rptr;
3030 } else {
3032 * There is an existing encapsulation limit option in
3033 * the outer header. If the inner encapsulation limit
3034 * is less than the configured encapsulation limit,
3035 * update the outer encapsulation limit to reflect
3036 * this lesser value.
3038 v6hdrs = (iptun_ipv6hdrs_t *)mp->b_rptr;
3039 configlimit =
3040 &v6hdrs->it6h_encaplim.iel_telopt.ip6ot_encap_limit;
3041 if ((*limit - 1) < *configlimit)
3042 *configlimit = (*limit - 1);
3044 ixa->ixa_ip_hdr_length = sizeof (iptun_ipv6hdrs_t);
3045 ixa->ixa_protocol = v6hdrs->it6h_encaplim.iel_destopt.ip6d_nxt;
3046 } else {
3047 ixa->ixa_ip_hdr_length = sizeof (ip6_t);
3048 ixa->ixa_protocol = outer6->ip6_nxt;
3051 * See iptun_output_process_ipv4() why we allow fragmentation for
3052 * small packets
3054 if (mp->b_wptr - innerptr <= minmtu)
3055 ixa->ixa_flags &= ~IXAF_DONTFRAG;
3056 else if (!(ixa->ixa_flags & IXAF_PMTU_TOO_SMALL))
3057 ixa->ixa_flags |= IXAF_DONTFRAG;
3059 ixa->ixa_pktlen = msgdsize(mp);
3060 outer6->ip6_plen = htons(ixa->ixa_pktlen - sizeof (ip6_t));
3061 return (mp);
3065 * The IP tunneling MAC-type plugins have already done most of the header
3066 * processing and validity checks. We are simply responsible for multiplexing
3067 * down to the ip module below us.
3069 static void
3070 iptun_output(iptun_t *iptun, mblk_t *mp)
3072 conn_t *connp = iptun->iptun_connp;
3073 mblk_t *newmp;
3074 int error;
3075 ip_xmit_attr_t *ixa;
3077 ASSERT(mp->b_datap->db_type == M_DATA);
3079 if (mp->b_cont != NULL) {
3080 if ((newmp = msgpullup(mp, -1)) == NULL) {
3081 iptun_drop_pkt(mp, &iptun->iptun_noxmtbuf);
3082 return;
3084 freemsg(mp);
3085 mp = newmp;
3088 if (iptun->iptun_typeinfo->iti_type == IPTUN_TYPE_6TO4) {
3089 iptun_output_6to4(iptun, mp);
3090 return;
3094 * If no other thread is using conn_ixa this just gets a
3095 * reference to conn_ixa. Otherwise we get a safe copy of
3096 * conn_ixa.
3098 ixa = conn_get_ixa(connp, B_FALSE);
3099 if (ixa == NULL) {
3100 iptun_drop_pkt(mp, &iptun->iptun_oerrors);
3101 return;
3105 * In case we got a safe copy of conn_ixa, then we need
3106 * to fill in any pointers in it.
3108 if (ixa->ixa_ire == NULL) {
3109 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6,
3110 &connp->conn_faddr_v6, &connp->conn_faddr_v6, 0,
3111 NULL, NULL, 0);
3112 if (error != 0) {
3113 if (ixa->ixa_ire != NULL &&
3114 (error == EHOSTUNREACH || error == ENETUNREACH)) {
3116 * Let conn_ip_output/ire_send_noroute return
3117 * the error and send any local ICMP error.
3119 error = 0;
3120 } else {
3121 ixa_refrele(ixa);
3122 iptun_drop_pkt(mp, &iptun->iptun_oerrors);
3123 return;
3128 iptun_output_common(iptun, ixa, mp);
3129 ixa_refrele(ixa);
3133 * We use an ixa based on the last destination.
3135 static void
3136 iptun_output_6to4(iptun_t *iptun, mblk_t *mp)
3138 conn_t *connp = iptun->iptun_connp;
3139 ipha_t *outer4, *inner4;
3140 ip6_t *outer6, *inner6;
3141 ip_xmit_attr_t *ixa;
3142 ip_xmit_attr_t *oldixa;
3143 int error;
3144 boolean_t need_connect;
3145 in6_addr_t v6dst;
3147 ASSERT(mp->b_cont == NULL); /* Verified by iptun_output */
3149 /* Make sure we set ipha_dst before we look at ipha_dst */
3151 (void) iptun_find_headers(mp, 0, &outer4, &inner4, &outer6, &inner6);
3152 ASSERT(outer4 != NULL);
3153 if (!iptun_out_process_6to4(iptun, outer4, inner6)) {
3154 iptun_drop_pkt(mp, &iptun->iptun_oerrors);
3155 return;
3159 * If no other thread is using conn_ixa this just gets a
3160 * reference to conn_ixa. Otherwise we get a safe copy of
3161 * conn_ixa.
3163 ixa = conn_get_ixa(connp, B_FALSE);
3164 if (ixa == NULL) {
3165 iptun_drop_pkt(mp, &iptun->iptun_oerrors);
3166 return;
3169 mutex_enter(&connp->conn_lock);
3170 if (connp->conn_v4lastdst == outer4->ipha_dst) {
3171 need_connect = (ixa->ixa_ire == NULL);
3172 } else {
3173 /* TODO: do we need to do this? */
3174 ip_attr_newdst(ixa);
3177 * We later update conn_ixa when we update conn_v4lastdst
3178 * which enables subsequent packets to avoid redoing
3179 * ip_attr_connect
3181 need_connect = B_TRUE;
3183 mutex_exit(&connp->conn_lock);
3186 * In case we got a safe copy of conn_ixa, or otherwise we don't
3187 * have a current ixa_ire, then we need to fill in any pointers in
3188 * the ixa.
3190 if (need_connect) {
3191 IN6_IPADDR_TO_V4MAPPED(outer4->ipha_dst, &v6dst);
3193 /* We handle IPsec in iptun_output_common */
3194 error = ip_attr_connect(connp, ixa, &connp->conn_saddr_v6,
3195 &v6dst, &v6dst, 0, NULL, NULL, 0);
3196 if (error != 0) {
3197 if (ixa->ixa_ire != NULL &&
3198 (error == EHOSTUNREACH || error == ENETUNREACH)) {
3200 * Let conn_ip_output/ire_send_noroute return
3201 * the error and send any local ICMP error.
3203 error = 0;
3204 } else {
3205 ixa_refrele(ixa);
3206 iptun_drop_pkt(mp, &iptun->iptun_oerrors);
3207 return;
3212 iptun_output_common(iptun, ixa, mp);
3214 /* Atomically replace conn_ixa and conn_v4lastdst */
3215 mutex_enter(&connp->conn_lock);
3216 if (connp->conn_v4lastdst != outer4->ipha_dst) {
3217 /* Remember the dst which corresponds to conn_ixa */
3218 connp->conn_v6lastdst = v6dst;
3219 oldixa = conn_replace_ixa(connp, ixa);
3220 } else {
3221 oldixa = NULL;
3223 mutex_exit(&connp->conn_lock);
3224 ixa_refrele(ixa);
3225 if (oldixa != NULL)
3226 ixa_refrele(oldixa);
3229 static void
3230 iptun_output_common(iptun_t *iptun, ip_xmit_attr_t *ixa, mblk_t *mp)
3232 ipsec_tun_pol_t *itp = iptun->iptun_itp;
3233 int outer_hlen;
3234 mblk_t *newmp;
3235 ipha_t *outer4, *inner4;
3236 ip6_t *outer6, *inner6;
3237 int error;
3238 boolean_t update_pktlen;
3240 ASSERT(ixa->ixa_ire != NULL);
3242 outer_hlen = iptun_find_headers(mp, 0, &outer4, &inner4, &outer6,
3243 &inner6);
3244 if (outer_hlen == 0) {
3245 iptun_drop_pkt(mp, &iptun->iptun_oerrors);
3246 return;
3249 /* Save IXAF_DONTFRAG value */
3250 iaflags_t dontfrag = ixa->ixa_flags & IXAF_DONTFRAG;
3252 /* Perform header processing. */
3253 if (outer4 != NULL) {
3254 mp = iptun_out_process_ipv4(iptun, mp, outer4, inner4, inner6,
3255 ixa);
3256 } else {
3257 mp = iptun_out_process_ipv6(iptun, mp, outer6, inner4, inner6,
3258 ixa);
3260 if (mp == NULL)
3261 return;
3264 * Let's hope the compiler optimizes this with "branch taken".
3266 if (itp != NULL && (itp->itp_flags & ITPF_P_ACTIVE)) {
3267 /* This updates the ip_xmit_attr_t */
3268 mp = ipsec_tun_outbound(mp, iptun, inner4, inner6, outer4,
3269 outer6, outer_hlen, ixa);
3270 if (mp == NULL) {
3271 atomic_inc_64(&iptun->iptun_oerrors);
3272 return;
3276 * ipsec_tun_outbound() returns a chain of tunneled IP
3277 * fragments linked with b_next (or a single message if the
3278 * tunneled packet wasn't a fragment).
3279 * If fragcache returned a list then we need to update
3280 * ixa_pktlen for all packets in the list.
3282 update_pktlen = (mp->b_next != NULL);
3285 * Otherwise, we're good to go. The ixa has been updated with
3286 * instructions for outbound IPsec processing.
3288 for (newmp = mp; newmp != NULL; newmp = mp) {
3289 size_t minmtu = iptun->iptun_typeinfo->iti_minmtu;
3291 atomic_inc_64(&iptun->iptun_opackets);
3292 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen);
3293 mp = mp->b_next;
3294 newmp->b_next = NULL;
3297 * The IXAF_DONTFRAG flag is global, but there is
3298 * a chain here. Check if we're really already
3299 * smaller than the minimum allowed MTU and reset here
3300 * appropriately. Otherwise one small packet can kill
3301 * the whole chain's path mtu discovery.
3302 * In addition, update the pktlen to the length of
3303 * the actual packet being processed.
3305 if (update_pktlen) {
3306 ixa->ixa_pktlen = msgdsize(newmp);
3307 if (ixa->ixa_pktlen <= minmtu)
3308 ixa->ixa_flags &= ~IXAF_DONTFRAG;
3311 atomic_inc_64(&iptun->iptun_opackets);
3312 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen);
3314 error = conn_ip_output(newmp, ixa);
3316 /* Restore IXAF_DONTFRAG value */
3317 ixa->ixa_flags |= dontfrag;
3319 if (error == EMSGSIZE) {
3320 /* IPsec policy might have changed */
3321 (void) iptun_update_mtu(iptun, ixa, 0);
3324 } else {
3326 * The ip module will potentially apply global policy to the
3327 * packet in its output path if there's no active tunnel
3328 * policy.
3330 ASSERT(ixa->ixa_ipsec_policy == NULL);
3331 mp = ip_output_attach_policy(mp, outer4, outer6, NULL, ixa);
3332 if (mp == NULL) {
3333 atomic_inc_64(&iptun->iptun_oerrors);
3334 return;
3337 atomic_inc_64(&iptun->iptun_opackets);
3338 atomic_add_64(&iptun->iptun_obytes, ixa->ixa_pktlen);
3340 error = conn_ip_output(mp, ixa);
3341 if (error == EMSGSIZE) {
3342 /* IPsec policy might have changed */
3343 (void) iptun_update_mtu(iptun, ixa, 0);
3346 if (ixa->ixa_flags & IXAF_IPSEC_SECURE)
3347 ipsec_out_release_refs(ixa);
3350 static mac_callbacks_t iptun_m_callbacks = {
3351 .mc_callbacks = (MC_SETPROP | MC_GETPROP | MC_PROPINFO),
3352 .mc_getstat = iptun_m_getstat,
3353 .mc_start = iptun_m_start,
3354 .mc_stop = iptun_m_stop,
3355 .mc_setpromisc = iptun_m_setpromisc,
3356 .mc_multicst = iptun_m_multicst,
3357 .mc_unicst = iptun_m_unicst,
3358 .mc_tx = iptun_m_tx,
3359 .mc_reserved = NULL,
3360 .mc_setprop = iptun_m_setprop,
3361 .mc_getprop = iptun_m_getprop,
3362 .mc_propinfo = iptun_m_propinfo