4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright (c) 2016 by Delphix. All rights reserved.
29 * Copyright (c) 2016, Joyent, Inc. All rights reserved.
33 * The ipnet device defined here provides access to packets at the IP layer. To
34 * provide access to packets at this layer it registers a callback function in
35 * the ip module and when there are open instances of the device ip will pass
36 * packets into the device. Packets from ip are passed on the input, output and
37 * loopback paths. Internally the module returns to ip as soon as possible by
38 * deferring processing using a taskq.
40 * Management of the devices in /dev/ipnet/ is handled by the devname
41 * filesystem and use of the neti interfaces. This module registers for NIC
42 * events using the neti framework so that when IP interfaces are bought up,
43 * taken down etc. the ipnet module is notified and its view of the interfaces
44 * configured on the system adjusted. On attach, the module gets an initial
45 * view of the system again using the neti framework but as it has already
46 * registered for IP interface events, it is still up-to-date with any changes.
49 #include <sys/types.h>
54 #include <sys/sunddi.h>
55 #include <sys/modctl.h>
57 #include <sys/strsun.h>
58 #include <sys/id_space.h>
60 #include <sys/mkdev.h>
63 #include <sys/errno.h>
65 #include <sys/ksynch.h>
66 #include <sys/hook_event.h>
68 #include <sys/stropts.h>
69 #include <sys/sysmacros.h>
71 #include <inet/ip_if.h>
72 #include <inet/ip_multi.h>
74 #include <inet/ipnet.h>
76 #include <net/bpfdesc.h>
79 static struct module_info ipnet_minfo
= {
81 "ipnet", /* mi_idname */
83 INFPSZ
, /* mi_maxpsz */
89 * List to hold static view of ipnetif_t's on the system. This is needed to
90 * avoid holding the lock protecting the avl tree of ipnetif's over the
91 * callback into the dev filesystem.
93 typedef struct ipnetif_cbdata
{
94 char ic_ifname
[LIFNAMSIZ
];
100 * Convenience enumerated type for ipnet_accept(). It describes the
101 * properties of a given ipnet_addrp_t relative to a single ipnet_t
102 * client stream. The values represent whether the address is ...
105 IPNETADDR_MYADDR
, /* an address on my ipnetif_t. */
106 IPNETADDR_MBCAST
, /* a multicast or broadcast address. */
107 IPNETADDR_UNKNOWN
/* none of the above. */
110 /* Argument used for the ipnet_nicevent_taskq callback. */
111 typedef struct ipnet_nicevent_s
{
112 nic_event_t ipne_event
;
113 net_handle_t ipne_protocol
;
114 netstackid_t ipne_stackid
;
115 uint64_t ipne_ifindex
;
116 uint64_t ipne_lifindex
;
117 char ipne_ifname
[LIFNAMSIZ
];
120 static dev_info_t
*ipnet_dip
;
121 static major_t ipnet_major
;
122 static ddi_taskq_t
*ipnet_taskq
; /* taskq for packets */
123 static ddi_taskq_t
*ipnet_nicevent_taskq
; /* taskq for NIC events */
124 static id_space_t
*ipnet_minor_space
;
125 static const int IPNET_MINOR_LO
= 1; /* minor number for /dev/lo0 */
126 static const int IPNET_MINOR_MIN
= 2; /* start of dynamic minors */
127 static dl_info_ack_t ipnet_infoack
= IPNET_INFO_ACK_INIT
;
128 static ipnet_acceptfn_t ipnet_accept
, ipnet_loaccept
;
129 static bpf_itap_fn_t ipnet_itap
;
131 static void ipnet_input(mblk_t
*);
132 static int ipnet_wput(queue_t
*, mblk_t
*);
133 static int ipnet_rsrv(queue_t
*);
134 static int ipnet_open(queue_t
*, dev_t
*, int, int, cred_t
*);
135 static int ipnet_close(queue_t
*);
136 static void ipnet_ioctl(queue_t
*, mblk_t
*);
137 static void ipnet_iocdata(queue_t
*, mblk_t
*);
138 static void ipnet_wputnondata(queue_t
*, mblk_t
*);
139 static int ipnet_attach(dev_info_t
*, ddi_attach_cmd_t
);
140 static int ipnet_detach(dev_info_t
*, ddi_detach_cmd_t
);
141 static int ipnet_devinfo(dev_info_t
*, ddi_info_cmd_t
, void *, void **);
142 static void ipnet_inforeq(queue_t
*q
, mblk_t
*mp
);
143 static void ipnet_bindreq(queue_t
*q
, mblk_t
*mp
);
144 static void ipnet_unbindreq(queue_t
*q
, mblk_t
*mp
);
145 static void ipnet_dlpromisconreq(queue_t
*q
, mblk_t
*mp
);
146 static void ipnet_dlpromiscoffreq(queue_t
*q
, mblk_t
*mp
);
147 static int ipnet_join_allmulti(ipnetif_t
*, ipnet_stack_t
*);
148 static void ipnet_leave_allmulti(ipnetif_t
*, ipnet_stack_t
*);
149 static int ipnet_nicevent_cb(hook_event_token_t
, hook_data_t
, void *);
150 static void ipnet_nicevent_task(void *);
151 static ipnetif_t
*ipnetif_create(const char *, uint64_t, ipnet_stack_t
*,
153 static void ipnetif_remove(ipnetif_t
*, ipnet_stack_t
*);
154 static ipnetif_addr_t
*ipnet_match_lif(ipnetif_t
*, lif_if_t
, boolean_t
);
155 static ipnetif_t
*ipnetif_getby_index(uint64_t, ipnet_stack_t
*);
156 static ipnetif_t
*ipnetif_getby_dev(dev_t
, ipnet_stack_t
*);
157 static boolean_t
ipnetif_in_zone(ipnetif_t
*, zoneid_t
, ipnet_stack_t
*);
158 static void ipnetif_zonecheck(ipnetif_t
*, ipnet_stack_t
*);
159 static int ipnet_populate_if(net_handle_t
, ipnet_stack_t
*, boolean_t
);
160 static int ipnetif_compare_name(const void *, const void *);
161 static int ipnetif_compare_name_zone(const void *, const void *);
162 static int ipnetif_compare_index(const void *, const void *);
163 static void ipnet_add_ifaddr(uint64_t, ipnetif_t
*, net_handle_t
);
164 static void ipnet_delete_ifaddr(ipnetif_addr_t
*, ipnetif_t
*, boolean_t
);
165 static void ipnetif_refhold(ipnetif_t
*);
166 static void ipnetif_refrele(ipnetif_t
*);
167 static void ipnet_walkers_inc(ipnet_stack_t
*);
168 static void ipnet_walkers_dec(ipnet_stack_t
*);
169 static void ipnet_register_netihook(ipnet_stack_t
*);
170 static void *ipnet_stack_init(netstackid_t
, netstack_t
*);
171 static void ipnet_stack_fini(netstackid_t
, void *);
172 static void ipnet_dispatch(void *);
173 static int ipobs_bounce_func(hook_event_token_t
, hook_data_t
, void *);
174 static int ipnet_bpf_bounce(hook_event_token_t
, hook_data_t
, void *);
175 static ipnetif_t
*ipnetif_clone_create(ipnetif_t
*, zoneid_t
);
176 static void ipnetif_clone_release(ipnetif_t
*);
178 static struct qinit ipnet_rinit
= {
180 ipnet_rsrv
, /* qi_srvp */
181 ipnet_open
, /* qi_qopen */
182 ipnet_close
, /* qi_qclose */
183 NULL
, /* qi_qadmin */
184 &ipnet_minfo
, /* qi_minfo */
187 static struct qinit ipnet_winit
= {
188 ipnet_wput
, /* qi_putp */
191 NULL
, /* qi_qclose */
192 NULL
, /* qi_qadmin */
193 &ipnet_minfo
, /* qi_minfo */
196 static struct streamtab ipnet_info
= {
197 &ipnet_rinit
, &ipnet_winit
200 DDI_DEFINE_STREAM_OPS(ipnet_ops
, nulldev
, nulldev
, ipnet_attach
,
201 ipnet_detach
, nodev
, ipnet_devinfo
, D_MP
| D_MTPERMOD
, &ipnet_info
,
202 ddi_quiesce_not_supported
);
204 static struct modldrv modldrv
= {
206 "STREAMS ipnet driver",
210 static struct modlinkage modlinkage
= {
211 MODREV_1
, &modldrv
, NULL
215 * This structure contains the template data (names and type) that is
216 * copied, in bulk, into the new kstats structure created by net_kstat_create.
217 * No actual statistical information is stored in this instance of the
218 * ipnet_kstats_t structure.
220 static ipnet_kstats_t stats_template
= {
221 { "duplicationFail", KSTAT_DATA_UINT64
},
222 { "dispatchOk", KSTAT_DATA_UINT64
},
223 { "dispatchFail", KSTAT_DATA_UINT64
},
224 { "dispatchHeaderDrop", KSTAT_DATA_UINT64
},
225 { "dispatchDupDrop", KSTAT_DATA_UINT64
},
226 { "dispatchDeliver", KSTAT_DATA_UINT64
},
227 { "acceptOk", KSTAT_DATA_UINT64
},
228 { "acceptFail", KSTAT_DATA_UINT64
}
232 * Walk the list of physical interfaces on the machine, for each
233 * interface create a new ipnetif_t and add any addresses to it. We
234 * need to do the walk twice, once for IPv4 and once for IPv6.
236 * The interfaces are destroyed as part of ipnet_stack_fini() for each
237 * stack. Note that we cannot do this initialization in
238 * ipnet_stack_init(), since ipnet_stack_init() cannot fail.
243 netstack_handle_t nh
;
248 netstack_next_init(&nh
);
249 while ((ns
= netstack_next(&nh
)) != NULL
) {
250 ips
= ns
->netstack_ipnet
;
251 if ((ret
= ipnet_populate_if(ips
->ips_ndv4
, ips
, B_FALSE
)) == 0)
252 ret
= ipnet_populate_if(ips
->ips_ndv6
, ips
, B_TRUE
);
257 netstack_next_fini(&nh
);
262 * Standard module entry points.
268 boolean_t netstack_registered
= B_FALSE
;
270 if ((ipnet_major
= ddi_name_to_major("ipnet")) == (major_t
)-1)
272 ipnet_minor_space
= id_space_create("ipnet_minor_space",
273 IPNET_MINOR_MIN
, MAXMIN32
);
276 * We call ddi_taskq_create() with nthread == 1 to ensure in-order
277 * delivery of packets to clients. Note that we need to create the
278 * taskqs before calling netstack_register() since ipnet_stack_init()
279 * registers callbacks that use 'em.
281 ipnet_taskq
= ddi_taskq_create(NULL
, "ipnet", 1, TASKQ_DEFAULTPRI
, 0);
282 ipnet_nicevent_taskq
= ddi_taskq_create(NULL
, "ipnet_nic_event_queue",
283 1, TASKQ_DEFAULTPRI
, 0);
284 if (ipnet_taskq
== NULL
|| ipnet_nicevent_taskq
== NULL
) {
289 netstack_register(NS_IPNET
, ipnet_stack_init
, NULL
, ipnet_stack_fini
);
290 netstack_registered
= B_TRUE
;
292 if ((ret
= ipnetif_init()) == 0)
293 ret
= mod_install(&modlinkage
);
296 if (ipnet_taskq
!= NULL
)
297 ddi_taskq_destroy(ipnet_taskq
);
298 if (ipnet_nicevent_taskq
!= NULL
)
299 ddi_taskq_destroy(ipnet_nicevent_taskq
);
300 if (netstack_registered
)
301 netstack_unregister(NS_IPNET
);
302 id_space_destroy(ipnet_minor_space
);
312 if ((err
= mod_remove(&modlinkage
)) != 0)
315 netstack_unregister(NS_IPNET
);
316 ddi_taskq_destroy(ipnet_nicevent_taskq
);
317 ddi_taskq_destroy(ipnet_taskq
);
318 id_space_destroy(ipnet_minor_space
);
323 _info(struct modinfo
*modinfop
)
325 return (mod_info(&modlinkage
, modinfop
));
329 ipnet_register_netihook(ipnet_stack_t
*ips
)
335 HOOK_INIT(ips
->ips_nicevents
, ipnet_nicevent_cb
, "ipnet_nicevents",
339 * It is possible for an exclusive stack to be in the process of
340 * shutting down here, and the netid and protocol lookups could fail
343 zoneid
= netstackid_to_zoneid(ips
->ips_netstack
->netstack_stackid
);
344 if ((netid
= net_zoneidtonetid(zoneid
)) == -1)
347 if ((ips
->ips_ndv4
= net_protocol_lookup(netid
, NHF_INET
)) != NULL
) {
348 if ((ret
= net_hook_register(ips
->ips_ndv4
, NH_NIC_EVENTS
,
349 ips
->ips_nicevents
)) != 0) {
350 VERIFY(net_protocol_release(ips
->ips_ndv4
) == 0);
351 ips
->ips_ndv4
= NULL
;
352 cmn_err(CE_WARN
, "unable to register IPv4 netinfo hooks"
353 " in zone %d: %d", zoneid
, ret
);
356 if ((ips
->ips_ndv6
= net_protocol_lookup(netid
, NHF_INET6
)) != NULL
) {
357 if ((ret
= net_hook_register(ips
->ips_ndv6
, NH_NIC_EVENTS
,
358 ips
->ips_nicevents
)) != 0) {
359 VERIFY(net_protocol_release(ips
->ips_ndv6
) == 0);
360 ips
->ips_ndv6
= NULL
;
361 cmn_err(CE_WARN
, "unable to register IPv6 netinfo hooks"
362 " in zone %d: %d", zoneid
, ret
);
367 * Create a local set of kstats for each zone.
369 ips
->ips_kstatp
= net_kstat_create(netid
, "ipnet", 0, "ipnet_stats",
370 "misc", KSTAT_TYPE_NAMED
,
371 sizeof (ipnet_kstats_t
) / sizeof (kstat_named_t
), 0);
372 if (ips
->ips_kstatp
!= NULL
) {
373 bcopy(&stats_template
, &ips
->ips_stats
,
374 sizeof (ips
->ips_stats
));
375 ips
->ips_kstatp
->ks_data
= &ips
->ips_stats
;
376 ips
->ips_kstatp
->ks_private
=
377 (void *)(uintptr_t)ips
->ips_netstack
->netstack_stackid
;
378 kstat_install(ips
->ips_kstatp
);
380 cmn_err(CE_WARN
, "net_kstat_create(%s,%s,%s) failed",
381 "ipnet", "ipnet_stats", "misc");
386 * This function is called on attach to build an initial view of the
387 * interfaces on the system. It will be called once for IPv4 and once
388 * for IPv6, although there is only one ipnet interface for both IPv4
389 * and IPv6 there are separate address lists.
392 ipnet_populate_if(net_handle_t nd
, ipnet_stack_t
*ips
, boolean_t isv6
)
397 char name
[LIFNAMSIZ
];
398 boolean_t new_if
= B_FALSE
;
403 * If ipnet_register_netihook() was unable to initialize this
404 * stack's net_handle_t, then we cannot populate any interface
405 * information. This usually happens when we attempted to
406 * grab a net_handle_t as a stack was shutting down. We don't
407 * want to fail the entire _init() operation because of a
408 * stack shutdown (other stacks will continue to work just
409 * fine), so we silently return success here.
415 * Make sure we're not processing NIC events during the
416 * population of our interfaces and address lists.
418 mutex_enter(&ips
->ips_event_lock
);
420 for (phyif
= net_phygetnext(nd
, 0); phyif
!= 0;
421 phyif
= net_phygetnext(nd
, phyif
)) {
422 if (net_getifname(nd
, phyif
, name
, LIFNAMSIZ
) != 0)
425 (void) net_getlifflags(nd
, phyif
, 0, &ifflags
);
426 if ((ipnetif
= ipnetif_getby_index(phyif
, ips
)) == NULL
) {
427 ipnetif
= ipnetif_create(name
, phyif
, ips
, ifflags
);
428 if (ipnetif
== NULL
) {
435 isv6
? IPNETIF_IPV6PLUMBED
: IPNETIF_IPV4PLUMBED
;
437 for (lif
= net_lifgetnext(nd
, phyif
, 0); lif
!= 0;
438 lif
= net_lifgetnext(nd
, phyif
, lif
)) {
440 * Skip addresses that aren't up. We'll add
441 * them when we receive an NE_LIF_UP event.
443 if (net_getlifflags(nd
, phyif
, lif
, &ifflags
) != 0 ||
446 /* Don't add it if we already have it. */
447 if (ipnet_match_lif(ipnetif
, lif
, isv6
) != NULL
)
449 ipnet_add_ifaddr(lif
, ipnetif
, nd
);
452 ipnetif_refrele(ipnetif
);
456 mutex_exit(&ips
->ips_event_lock
);
461 ipnet_attach(dev_info_t
*dip
, ddi_attach_cmd_t cmd
)
463 if (cmd
!= DDI_ATTACH
)
464 return (DDI_FAILURE
);
466 if (ddi_create_minor_node(dip
, "lo0", S_IFCHR
, IPNET_MINOR_LO
,
467 DDI_PSEUDO
, 0) == DDI_FAILURE
)
468 return (DDI_FAILURE
);
471 return (DDI_SUCCESS
);
475 ipnet_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
)
477 if (cmd
!= DDI_DETACH
)
478 return (DDI_FAILURE
);
480 ASSERT(dip
== ipnet_dip
);
481 ddi_remove_minor_node(ipnet_dip
, NULL
);
483 return (DDI_SUCCESS
);
488 ipnet_devinfo(dev_info_t
*dip
, ddi_info_cmd_t infocmd
, void *arg
, void **result
)
490 int error
= DDI_FAILURE
;
493 case DDI_INFO_DEVT2INSTANCE
:
497 case DDI_INFO_DEVT2DEVINFO
:
498 if (ipnet_dip
!= NULL
) {
509 ipnet_open(queue_t
*rq
, dev_t
*dev
, int oflag
, int sflag
, cred_t
*crp
)
512 netstack_t
*ns
= NULL
;
515 zoneid_t zoneid
= crgetzoneid(crp
);
517 /* We don't support open as a module */
521 /* This driver is self-cloning, we don't support re-open. */
522 if (rq
->q_ptr
!= NULL
)
525 if ((ipnet
= kmem_zalloc(sizeof (*ipnet
), KM_NOSLEEP
)) == NULL
)
528 VERIFY((ns
= netstack_find_by_cred(crp
)) != NULL
);
529 ips
= ns
->netstack_ipnet
;
531 rq
->q_ptr
= WR(rq
)->q_ptr
= ipnet
;
532 ipnet
->ipnet_rq
= rq
;
533 ipnet
->ipnet_minor
= (minor_t
)id_alloc(ipnet_minor_space
);
534 ipnet
->ipnet_zoneid
= zoneid
;
535 ipnet
->ipnet_dlstate
= DL_UNBOUND
;
536 ipnet
->ipnet_ns
= ns
;
539 * We need to hold ips_event_lock here as any NE_LIF_DOWN events need
540 * to be processed after ipnet_if is set and the ipnet_t has been
541 * inserted in the ips_str_list.
543 mutex_enter(&ips
->ips_event_lock
);
544 if (getminor(*dev
) == IPNET_MINOR_LO
) {
545 ipnet
->ipnet_flags
|= IPNET_LOMODE
;
546 ipnet
->ipnet_acceptfn
= ipnet_loaccept
;
548 ipnet
->ipnet_acceptfn
= ipnet_accept
;
549 ipnet
->ipnet_if
= ipnetif_getby_dev(*dev
, ips
);
550 if (ipnet
->ipnet_if
== NULL
||
551 !ipnetif_in_zone(ipnet
->ipnet_if
, zoneid
, ips
)) {
557 mutex_enter(&ips
->ips_walkers_lock
);
558 while (ips
->ips_walkers_cnt
!= 0)
559 cv_wait(&ips
->ips_walkers_cv
, &ips
->ips_walkers_lock
);
560 list_insert_head(&ips
->ips_str_list
, ipnet
);
561 *dev
= makedevice(getmajor(*dev
), ipnet
->ipnet_minor
);
565 * Only register our callback if we're the first open client; we call
566 * unregister in close() for the last open client.
568 if (list_head(&ips
->ips_str_list
) == list_tail(&ips
->ips_str_list
))
569 ips
->ips_hook
= ipobs_register_hook(ns
, ipnet_input
);
570 mutex_exit(&ips
->ips_walkers_lock
);
573 mutex_exit(&ips
->ips_event_lock
);
576 id_free(ipnet_minor_space
, ipnet
->ipnet_minor
);
577 if (ipnet
->ipnet_if
!= NULL
)
578 ipnetif_refrele(ipnet
->ipnet_if
);
579 kmem_free(ipnet
, sizeof (*ipnet
));
585 ipnet_close(queue_t
*rq
)
587 ipnet_t
*ipnet
= rq
->q_ptr
;
588 ipnet_stack_t
*ips
= ipnet
->ipnet_ns
->netstack_ipnet
;
590 if (ipnet
->ipnet_flags
& IPNET_PROMISC_PHYS
)
591 ipnet_leave_allmulti(ipnet
->ipnet_if
, ips
);
592 if (ipnet
->ipnet_flags
& IPNET_PROMISC_MULTI
)
593 ipnet_leave_allmulti(ipnet
->ipnet_if
, ips
);
595 mutex_enter(&ips
->ips_walkers_lock
);
596 while (ips
->ips_walkers_cnt
!= 0)
597 cv_wait(&ips
->ips_walkers_cv
, &ips
->ips_walkers_lock
);
601 list_remove(&ips
->ips_str_list
, ipnet
);
602 if (ipnet
->ipnet_if
!= NULL
)
603 ipnetif_refrele(ipnet
->ipnet_if
);
604 id_free(ipnet_minor_space
, ipnet
->ipnet_minor
);
606 if (list_is_empty(&ips
->ips_str_list
)) {
607 ipobs_unregister_hook(ips
->ips_netstack
, ips
->ips_hook
);
608 ips
->ips_hook
= NULL
;
611 kmem_free(ipnet
, sizeof (*ipnet
));
613 mutex_exit(&ips
->ips_walkers_lock
);
614 netstack_rele(ips
->ips_netstack
);
619 ipnet_wput(queue_t
*q
, mblk_t
*mp
)
621 switch (mp
->b_datap
->db_type
) {
623 if (*mp
->b_rptr
& FLUSHW
) {
624 flushq(q
, FLUSHDATA
);
625 *mp
->b_rptr
&= ~FLUSHW
;
627 if (*mp
->b_rptr
& FLUSHR
)
634 ipnet_wputnondata(q
, mp
);
640 ipnet_iocdata(q
, mp
);
650 ipnet_rsrv(queue_t
*q
)
654 while ((mp
= getq(q
)) != NULL
) {
655 ASSERT(DB_TYPE(mp
) == M_DATA
);
667 ipnet_ioctl(queue_t
*q
, mblk_t
*mp
)
669 struct iocblk
*iocp
= (struct iocblk
*)mp
->b_rptr
;
671 switch (iocp
->ioc_cmd
) {
673 miocack(q
, mp
, 0, 0);
676 if (iocp
->ioc_count
== TRANSPARENT
) {
677 mcopyin(mp
, NULL
, sizeof (uint_t
), NULL
);
681 /* We don't support I_STR with DLIOCIPNETINFO. */
684 miocnak(q
, mp
, 0, EINVAL
);
690 ipnet_iocdata(queue_t
*q
, mblk_t
*mp
)
692 struct iocblk
*iocp
= (struct iocblk
*)mp
->b_rptr
;
693 ipnet_t
*ipnet
= q
->q_ptr
;
695 switch (iocp
->ioc_cmd
) {
697 if (*(int *)mp
->b_cont
->b_rptr
== 1)
698 ipnet
->ipnet_flags
|= IPNET_INFO
;
699 else if (*(int *)mp
->b_cont
->b_rptr
== 0)
700 ipnet
->ipnet_flags
&= ~IPNET_INFO
;
703 miocack(q
, mp
, 0, DL_IPNETINFO_VERSION
);
707 miocnak(q
, mp
, 0, EINVAL
);
713 ipnet_wputnondata(queue_t
*q
, mblk_t
*mp
)
715 union DL_primitives
*dlp
= (union DL_primitives
*)mp
->b_rptr
;
716 t_uscalar_t prim
= dlp
->dl_primitive
;
720 ipnet_inforeq(q
, mp
);
723 ipnet_unbindreq(q
, mp
);
726 ipnet_bindreq(q
, mp
);
728 case DL_PROMISCON_REQ
:
729 ipnet_dlpromisconreq(q
, mp
);
731 case DL_PROMISCOFF_REQ
:
732 ipnet_dlpromiscoffreq(q
, mp
);
734 case DL_UNITDATA_REQ
:
736 case DL_PHYS_ADDR_REQ
:
737 case DL_SET_PHYS_ADDR_REQ
:
738 case DL_ENABMULTI_REQ
:
739 case DL_DISABMULTI_REQ
:
741 dlerrorack(q
, mp
, prim
, DL_UNSUPPORTED
, 0);
744 dlerrorack(q
, mp
, prim
, DL_BADPRIM
, 0);
750 ipnet_inforeq(queue_t
*q
, mblk_t
*mp
)
753 size_t size
= sizeof (dl_info_ack_t
) + sizeof (ushort_t
);
755 if (MBLKL(mp
) < DL_INFO_REQ_SIZE
) {
756 dlerrorack(q
, mp
, DL_INFO_REQ
, DL_BADPRIM
, 0);
760 if ((mp
= mexchange(q
, mp
, size
, M_PCPROTO
, DL_INFO_ACK
)) == NULL
)
763 dlip
= (dl_info_ack_t
*)mp
->b_rptr
;
764 *dlip
= ipnet_infoack
;
769 ipnet_bindreq(queue_t
*q
, mblk_t
*mp
)
771 union DL_primitives
*dlp
= (union DL_primitives
*)mp
->b_rptr
;
772 ipnet_t
*ipnet
= q
->q_ptr
;
774 if (MBLKL(mp
) < DL_BIND_REQ_SIZE
) {
775 dlerrorack(q
, mp
, DL_BIND_REQ
, DL_BADPRIM
, 0);
779 switch (dlp
->bind_req
.dl_sap
) {
781 ipnet
->ipnet_family
= AF_UNSPEC
;
784 ipnet
->ipnet_family
= AF_INET
;
787 ipnet
->ipnet_family
= AF_INET6
;
790 dlerrorack(q
, mp
, DL_BIND_REQ
, DL_BADSAP
, 0);
795 ipnet
->ipnet_dlstate
= DL_IDLE
;
796 dlbindack(q
, mp
, dlp
->bind_req
.dl_sap
, 0, 0, 0, 0);
800 ipnet_unbindreq(queue_t
*q
, mblk_t
*mp
)
802 ipnet_t
*ipnet
= q
->q_ptr
;
804 if (MBLKL(mp
) < DL_UNBIND_REQ_SIZE
) {
805 dlerrorack(q
, mp
, DL_UNBIND_REQ
, DL_BADPRIM
, 0);
809 if (ipnet
->ipnet_dlstate
!= DL_IDLE
) {
810 dlerrorack(q
, mp
, DL_UNBIND_REQ
, DL_OUTSTATE
, 0);
812 ipnet
->ipnet_dlstate
= DL_UNBOUND
;
813 ipnet
->ipnet_family
= AF_UNSPEC
;
814 dlokack(q
, mp
, DL_UNBIND_REQ
);
819 ipnet_dlpromisconreq(queue_t
*q
, mblk_t
*mp
)
821 ipnet_t
*ipnet
= q
->q_ptr
;
825 if (MBLKL(mp
) < DL_PROMISCON_REQ_SIZE
) {
826 dlerrorack(q
, mp
, DL_PROMISCON_REQ
, DL_BADPRIM
, 0);
830 if (ipnet
->ipnet_flags
& IPNET_LOMODE
) {
831 dlokack(q
, mp
, DL_PROMISCON_REQ
);
835 level
= ((dl_promiscon_req_t
*)mp
->b_rptr
)->dl_level
;
836 if (level
== DL_PROMISC_PHYS
|| level
== DL_PROMISC_MULTI
) {
837 if ((err
= ipnet_join_allmulti(ipnet
->ipnet_if
,
838 ipnet
->ipnet_ns
->netstack_ipnet
)) != 0) {
839 dlerrorack(q
, mp
, DL_PROMISCON_REQ
, DL_SYSERR
, err
);
845 case DL_PROMISC_PHYS
:
846 ipnet
->ipnet_flags
|= IPNET_PROMISC_PHYS
;
849 ipnet
->ipnet_flags
|= IPNET_PROMISC_SAP
;
851 case DL_PROMISC_MULTI
:
852 ipnet
->ipnet_flags
|= IPNET_PROMISC_MULTI
;
855 dlerrorack(q
, mp
, DL_PROMISCON_REQ
, DL_BADPRIM
, 0);
859 dlokack(q
, mp
, DL_PROMISCON_REQ
);
863 ipnet_dlpromiscoffreq(queue_t
*q
, mblk_t
*mp
)
865 ipnet_t
*ipnet
= q
->q_ptr
;
867 uint16_t orig_ipnet_flags
= ipnet
->ipnet_flags
;
869 if (MBLKL(mp
) < DL_PROMISCOFF_REQ_SIZE
) {
870 dlerrorack(q
, mp
, DL_PROMISCOFF_REQ
, DL_BADPRIM
, 0);
874 if (ipnet
->ipnet_flags
& IPNET_LOMODE
) {
875 dlokack(q
, mp
, DL_PROMISCOFF_REQ
);
879 level
= ((dl_promiscon_req_t
*)mp
->b_rptr
)->dl_level
;
881 case DL_PROMISC_PHYS
:
882 if (ipnet
->ipnet_flags
& IPNET_PROMISC_PHYS
)
883 ipnet
->ipnet_flags
&= ~IPNET_PROMISC_PHYS
;
886 if (ipnet
->ipnet_flags
& IPNET_PROMISC_SAP
)
887 ipnet
->ipnet_flags
&= ~IPNET_PROMISC_SAP
;
889 case DL_PROMISC_MULTI
:
890 if (ipnet
->ipnet_flags
& IPNET_PROMISC_MULTI
)
891 ipnet
->ipnet_flags
&= ~IPNET_PROMISC_MULTI
;
894 dlerrorack(q
, mp
, DL_PROMISCOFF_REQ
, DL_BADPRIM
, 0);
898 if (orig_ipnet_flags
== ipnet
->ipnet_flags
) {
899 dlerrorack(q
, mp
, DL_PROMISCOFF_REQ
, DL_NOTENAB
, 0);
903 if (level
== DL_PROMISC_PHYS
|| level
== DL_PROMISC_MULTI
) {
904 ipnet_leave_allmulti(ipnet
->ipnet_if
,
905 ipnet
->ipnet_ns
->netstack_ipnet
);
908 dlokack(q
, mp
, DL_PROMISCOFF_REQ
);
912 ipnet_join_allmulti(ipnetif_t
*ipnetif
, ipnet_stack_t
*ips
)
915 ip_stack_t
*ipst
= ips
->ips_netstack
->netstack_ip
;
916 uint64_t index
= ipnetif
->if_index
;
918 mutex_enter(&ips
->ips_event_lock
);
919 if (ipnetif
->if_multicnt
== 0) {
920 ASSERT((ipnetif
->if_flags
&
921 (IPNETIF_IPV4ALLMULTI
| IPNETIF_IPV6ALLMULTI
)) == 0);
922 if (ipnetif
->if_flags
& IPNETIF_IPV4PLUMBED
) {
923 err
= ip_join_allmulti(index
, B_FALSE
, ipst
);
926 ipnetif
->if_flags
|= IPNETIF_IPV4ALLMULTI
;
928 if (ipnetif
->if_flags
& IPNETIF_IPV6PLUMBED
) {
929 err
= ip_join_allmulti(index
, B_TRUE
, ipst
);
931 (ipnetif
->if_flags
& IPNETIF_IPV4ALLMULTI
)) {
932 (void) ip_leave_allmulti(index
, B_FALSE
, ipst
);
933 ipnetif
->if_flags
&= ~IPNETIF_IPV4ALLMULTI
;
936 ipnetif
->if_flags
|= IPNETIF_IPV6ALLMULTI
;
939 ipnetif
->if_multicnt
++;
942 mutex_exit(&ips
->ips_event_lock
);
947 ipnet_leave_allmulti(ipnetif_t
*ipnetif
, ipnet_stack_t
*ips
)
950 ip_stack_t
*ipst
= ips
->ips_netstack
->netstack_ip
;
951 uint64_t index
= ipnetif
->if_index
;
953 mutex_enter(&ips
->ips_event_lock
);
954 ASSERT(ipnetif
->if_multicnt
!= 0);
955 if (--ipnetif
->if_multicnt
== 0) {
956 if (ipnetif
->if_flags
& IPNETIF_IPV4ALLMULTI
) {
957 err
= ip_leave_allmulti(index
, B_FALSE
, ipst
);
958 ASSERT(err
== 0 || err
== ENODEV
);
959 ipnetif
->if_flags
&= ~IPNETIF_IPV4ALLMULTI
;
961 if (ipnetif
->if_flags
& IPNETIF_IPV6ALLMULTI
) {
962 err
= ip_leave_allmulti(index
, B_TRUE
, ipst
);
963 ASSERT(err
== 0 || err
== ENODEV
);
964 ipnetif
->if_flags
&= ~IPNETIF_IPV6ALLMULTI
;
967 mutex_exit(&ips
->ips_event_lock
);
971 * Allocate a new mblk_t and put a dl_ipnetinfo_t in it.
972 * The structure it copies the header information from,
973 * hook_pkt_observe_t, is constructed using network byte
974 * order in ipobs_hook(), so there is no conversion here.
977 ipnet_addheader(hook_pkt_observe_t
*hdr
, mblk_t
*mp
)
982 if ((dlhdr
= allocb(sizeof (dl_ipnetinfo_t
), BPRI_HI
)) == NULL
) {
986 dl
= (dl_ipnetinfo_t
*)dlhdr
->b_rptr
;
987 dl
->dli_version
= DL_IPNETINFO_VERSION
;
988 dl
->dli_family
= hdr
->hpo_family
;
989 dl
->dli_htype
= hdr
->hpo_htype
;
990 dl
->dli_pktlen
= hdr
->hpo_pktlen
;
991 dl
->dli_ifindex
= hdr
->hpo_ifindex
;
992 dl
->dli_grifindex
= hdr
->hpo_grifindex
;
993 dl
->dli_zsrc
= hdr
->hpo_zsrc
;
994 dl
->dli_zdst
= hdr
->hpo_zdst
;
995 dlhdr
->b_wptr
+= sizeof (*dl
);
1001 static ipnet_addrtype_t
1002 ipnet_get_addrtype(ipnet_t
*ipnet
, ipnet_addrp_t
*addr
)
1005 ipnetif_t
*ipnetif
= ipnet
->ipnet_if
;
1006 ipnetif_addr_t
*ifaddr
;
1007 ipnet_addrtype_t addrtype
= IPNETADDR_UNKNOWN
;
1009 /* First check if the address is multicast or limited broadcast. */
1010 switch (addr
->iap_family
) {
1012 if (CLASSD(*(addr
->iap_addr4
)) ||
1013 *(addr
->iap_addr4
) == INADDR_BROADCAST
)
1014 return (IPNETADDR_MBCAST
);
1017 if (IN6_IS_ADDR_MULTICAST(addr
->iap_addr6
))
1018 return (IPNETADDR_MBCAST
);
1023 * Walk the address list to see if the address belongs to our
1024 * interface or is one of our subnet broadcast addresses.
1026 mutex_enter(&ipnetif
->if_addr_lock
);
1027 list
= (addr
->iap_family
== AF_INET
) ?
1028 &ipnetif
->if_ip4addr_list
: &ipnetif
->if_ip6addr_list
;
1029 for (ifaddr
= list_head(list
);
1030 ifaddr
!= NULL
&& addrtype
== IPNETADDR_UNKNOWN
;
1031 ifaddr
= list_next(list
, ifaddr
)) {
1033 * If we're not in the global zone, then only look at
1034 * addresses in our zone.
1036 if (ipnet
->ipnet_zoneid
!= GLOBAL_ZONEID
&&
1037 ipnet
->ipnet_zoneid
!= ifaddr
->ifa_zone
)
1039 switch (addr
->iap_family
) {
1041 if (ifaddr
->ifa_ip4addr
!= INADDR_ANY
&&
1042 *(addr
->iap_addr4
) == ifaddr
->ifa_ip4addr
)
1043 addrtype
= IPNETADDR_MYADDR
;
1044 else if (ifaddr
->ifa_brdaddr
!= INADDR_ANY
&&
1045 *(addr
->iap_addr4
) == ifaddr
->ifa_brdaddr
)
1046 addrtype
= IPNETADDR_MBCAST
;
1049 if (IN6_ARE_ADDR_EQUAL(addr
->iap_addr6
,
1050 &ifaddr
->ifa_ip6addr
))
1051 addrtype
= IPNETADDR_MYADDR
;
1055 mutex_exit(&ipnetif
->if_addr_lock
);
1061 * Verify if the packet contained in hdr should be passed up to the
1062 * ipnet client stream.
1065 ipnet_accept(ipnet_t
*ipnet
, hook_pkt_observe_t
*hdr
, ipnet_addrp_t
*src
,
1069 uint64_t ifindex
= ipnet
->ipnet_if
->if_index
;
1070 ipnet_addrtype_t srctype
;
1071 ipnet_addrtype_t dsttype
;
1073 srctype
= ipnet_get_addrtype(ipnet
, src
);
1074 dsttype
= ipnet_get_addrtype(ipnet
, dst
);
1077 * If the packet's ifindex matches ours, or the packet's group ifindex
1078 * matches ours, it's on the interface we're observing. (Thus,
1079 * observing on the group ifindex matches all ifindexes in the group.)
1081 obsif
= (ntohl(hdr
->hpo_ifindex
) == ifindex
||
1082 ntohl(hdr
->hpo_grifindex
) == ifindex
);
1084 DTRACE_PROBE5(ipnet_accept__addr
,
1085 ipnet_addrtype_t
, srctype
, ipnet_addrp_t
*, src
,
1086 ipnet_addrtype_t
, dsttype
, ipnet_addrp_t
*, dst
,
1090 * Do not allow an ipnet stream to see packets that are not from or to
1091 * its zone. The exception is when zones are using the shared stack
1092 * model. In this case, streams in the global zone have visibility
1093 * into other shared-stack zones, and broadcast and multicast traffic
1094 * is visible by all zones in the stack.
1096 if (ipnet
->ipnet_zoneid
!= GLOBAL_ZONEID
&&
1097 dsttype
!= IPNETADDR_MBCAST
) {
1098 if (ipnet
->ipnet_zoneid
!= ntohl(hdr
->hpo_zsrc
) &&
1099 ipnet
->ipnet_zoneid
!= ntohl(hdr
->hpo_zdst
))
1104 * If DL_PROMISC_SAP isn't enabled, then the bound SAP must match the
1105 * packet's IP version.
1107 if (!(ipnet
->ipnet_flags
& IPNET_PROMISC_SAP
) &&
1108 ipnet
->ipnet_family
!= hdr
->hpo_family
)
1111 /* If the destination address is ours, then accept the packet. */
1112 if (dsttype
== IPNETADDR_MYADDR
)
1116 * If DL_PROMISC_PHYS is enabled, then we can see all packets that are
1117 * sent or received on the interface we're observing, or packets that
1118 * have our source address (this allows us to see packets we send).
1120 if (ipnet
->ipnet_flags
& IPNET_PROMISC_PHYS
) {
1121 if (srctype
== IPNETADDR_MYADDR
|| obsif
)
1126 * We accept multicast and broadcast packets transmitted or received
1127 * on the interface we're observing.
1129 if (dsttype
== IPNETADDR_MBCAST
&& obsif
)
1136 * Verify if the packet contained in hdr should be passed up to the ipnet
1137 * client stream that's in IPNET_LOMODE.
1141 ipnet_loaccept(ipnet_t
*ipnet
, hook_pkt_observe_t
*hdr
, ipnet_addrp_t
*src
,
1144 if (hdr
->hpo_htype
!= htons(IPOBS_HOOK_LOCAL
)) {
1146 * ipnet_if is only NULL for IPNET_MINOR_LO devices.
1148 if (ipnet
->ipnet_if
== NULL
)
1153 * An ipnet stream must not see packets that are not from/to its zone.
1155 if (ipnet
->ipnet_zoneid
!= GLOBAL_ZONEID
) {
1156 if (ipnet
->ipnet_zoneid
!= ntohl(hdr
->hpo_zsrc
) &&
1157 ipnet
->ipnet_zoneid
!= ntohl(hdr
->hpo_zdst
))
1161 return (ipnet
->ipnet_family
== AF_UNSPEC
||
1162 ipnet
->ipnet_family
== hdr
->hpo_family
);
1166 ipnet_dispatch(void *arg
)
1169 hook_pkt_observe_t
*hdr
= (hook_pkt_observe_t
*)mp
->b_rptr
;
1177 ips
= ((netstack_t
*)hdr
->hpo_ctx
)->netstack_ipnet
;
1179 netmp
= hdr
->hpo_pkt
->b_cont
;
1180 src
.iap_family
= hdr
->hpo_family
;
1181 dst
.iap_family
= hdr
->hpo_family
;
1183 if (hdr
->hpo_family
== AF_INET
) {
1184 src
.iap_addr4
= &((ipha_t
*)(netmp
->b_rptr
))->ipha_src
;
1185 dst
.iap_addr4
= &((ipha_t
*)(netmp
->b_rptr
))->ipha_dst
;
1187 src
.iap_addr6
= &((ip6_t
*)(netmp
->b_rptr
))->ip6_src
;
1188 dst
.iap_addr6
= &((ip6_t
*)(netmp
->b_rptr
))->ip6_dst
;
1191 ipnet_walkers_inc(ips
);
1193 list
= &ips
->ips_str_list
;
1194 for (ipnet
= list_head(list
); ipnet
!= NULL
;
1195 ipnet
= list_next(list
, ipnet
)) {
1196 if (!(*ipnet
->ipnet_acceptfn
)(ipnet
, hdr
, &src
, &dst
)) {
1197 IPSK_BUMP(ips
, ik_acceptFail
);
1200 IPSK_BUMP(ips
, ik_acceptOk
);
1202 if (list_next(list
, ipnet
) == NULL
) {
1203 netmp
= hdr
->hpo_pkt
->b_cont
;
1204 hdr
->hpo_pkt
->b_cont
= NULL
;
1206 if ((netmp
= dupmsg(hdr
->hpo_pkt
->b_cont
)) == NULL
&&
1207 (netmp
= copymsg(hdr
->hpo_pkt
->b_cont
)) == NULL
) {
1208 IPSK_BUMP(ips
, ik_duplicationFail
);
1213 if (ipnet
->ipnet_flags
& IPNET_INFO
) {
1214 if ((netmp
= ipnet_addheader(hdr
, netmp
)) == NULL
) {
1215 IPSK_BUMP(ips
, ik_dispatchHeaderDrop
);
1220 if (ipnet
->ipnet_rq
->q_first
== NULL
&&
1221 canputnext(ipnet
->ipnet_rq
)) {
1222 putnext(ipnet
->ipnet_rq
, netmp
);
1223 IPSK_BUMP(ips
, ik_dispatchDeliver
);
1224 } else if (canput(ipnet
->ipnet_rq
)) {
1225 (void) putq(ipnet
->ipnet_rq
, netmp
);
1226 IPSK_BUMP(ips
, ik_dispatchDeliver
);
1229 IPSK_BUMP(ips
, ik_dispatchPutDrop
);
1233 ipnet_walkers_dec(ips
);
1239 ipnet_input(mblk_t
*mp
)
1241 hook_pkt_observe_t
*hdr
= (hook_pkt_observe_t
*)mp
->b_rptr
;
1244 ips
= ((netstack_t
*)hdr
->hpo_ctx
)->netstack_ipnet
;
1246 if (ddi_taskq_dispatch(ipnet_taskq
, ipnet_dispatch
, mp
, DDI_NOSLEEP
) !=
1248 IPSK_BUMP(ips
, ik_dispatchFail
);
1251 IPSK_BUMP(ips
, ik_dispatchOk
);
1256 ipnet_alloc_if(ipnet_stack_t
*ips
)
1260 if ((ipnetif
= kmem_zalloc(sizeof (*ipnetif
), KM_NOSLEEP
)) == NULL
)
1263 mutex_init(&ipnetif
->if_addr_lock
, NULL
, MUTEX_DEFAULT
, 0);
1264 list_create(&ipnetif
->if_ip4addr_list
, sizeof (ipnetif_addr_t
),
1265 offsetof(ipnetif_addr_t
, ifa_link
));
1266 list_create(&ipnetif
->if_ip6addr_list
, sizeof (ipnetif_addr_t
),
1267 offsetof(ipnetif_addr_t
, ifa_link
));
1268 mutex_init(&ipnetif
->if_reflock
, NULL
, MUTEX_DEFAULT
, 0);
1270 ipnetif
->if_stackp
= ips
;
1276 * Create a new ipnetif_t and new minor node for it. If creation is
1277 * successful the new ipnetif_t is inserted into an avl_tree
1278 * containing ipnetif's for this stack instance.
1281 ipnetif_create(const char *name
, uint64_t index
, ipnet_stack_t
*ips
,
1285 avl_index_t where
= 0;
1289 * Because ipnetif_create() can be called from a NIC event
1290 * callback, it should not block.
1292 ifminor
= (minor_t
)id_alloc_nosleep(ipnet_minor_space
);
1293 if (ifminor
== (minor_t
)-1)
1295 if ((ipnetif
= ipnet_alloc_if(ips
)) == NULL
) {
1296 id_free(ipnet_minor_space
, ifminor
);
1300 (void) strlcpy(ipnetif
->if_name
, name
, LIFNAMSIZ
);
1301 ipnetif
->if_index
= (uint_t
)index
;
1302 ipnetif
->if_zoneid
= netstack_get_zoneid(ips
->ips_netstack
);
1303 ipnetif
->if_dev
= makedevice(ipnet_major
, ifminor
);
1305 ipnetif
->if_refcnt
= 1;
1306 if ((ifflags
& IFF_LOOPBACK
) != 0)
1307 ipnetif
->if_flags
= IPNETIF_LOOPBACK
;
1309 mutex_enter(&ips
->ips_avl_lock
);
1310 VERIFY(avl_find(&ips
->ips_avl_by_index
, &index
, &where
) == NULL
);
1311 avl_insert(&ips
->ips_avl_by_index
, ipnetif
, where
);
1312 VERIFY(avl_find(&ips
->ips_avl_by_name
, (void *)name
, &where
) == NULL
);
1313 avl_insert(&ips
->ips_avl_by_name
, ipnetif
, where
);
1314 mutex_exit(&ips
->ips_avl_lock
);
1320 ipnetif_remove(ipnetif_t
*ipnetif
, ipnet_stack_t
*ips
)
1324 ipnet_walkers_inc(ips
);
1325 /* Send a SIGHUP to all open streams associated with this ipnetif. */
1326 for (ipnet
= list_head(&ips
->ips_str_list
); ipnet
!= NULL
;
1327 ipnet
= list_next(&ips
->ips_str_list
, ipnet
)) {
1328 if (ipnet
->ipnet_if
== ipnetif
)
1329 (void) putnextctl(ipnet
->ipnet_rq
, M_HANGUP
);
1331 ipnet_walkers_dec(ips
);
1332 mutex_enter(&ips
->ips_avl_lock
);
1333 avl_remove(&ips
->ips_avl_by_index
, ipnetif
);
1334 avl_remove(&ips
->ips_avl_by_name
, ipnetif
);
1335 mutex_exit(&ips
->ips_avl_lock
);
1337 * Release the reference we implicitly held in ipnetif_create().
1339 ipnetif_refrele(ipnetif
);
1343 ipnet_purge_addrlist(list_t
*addrlist
)
1345 ipnetif_addr_t
*ifa
;
1347 while ((ifa
= list_head(addrlist
)) != NULL
) {
1348 list_remove(addrlist
, ifa
);
1349 if (ifa
->ifa_shared
!= NULL
)
1350 ipnetif_clone_release(ifa
->ifa_shared
);
1351 kmem_free(ifa
, sizeof (*ifa
));
1356 ipnetif_free(ipnetif_t
*ipnetif
)
1358 ASSERT(ipnetif
->if_refcnt
== 0);
1359 ASSERT(ipnetif
->if_sharecnt
== 0);
1361 /* Remove IPv4/v6 address lists from the ipnetif */
1362 ipnet_purge_addrlist(&ipnetif
->if_ip4addr_list
);
1363 list_destroy(&ipnetif
->if_ip4addr_list
);
1364 ipnet_purge_addrlist(&ipnetif
->if_ip6addr_list
);
1365 list_destroy(&ipnetif
->if_ip6addr_list
);
1366 mutex_destroy(&ipnetif
->if_addr_lock
);
1367 mutex_destroy(&ipnetif
->if_reflock
);
1368 if (ipnetif
->if_dev
!= 0)
1369 id_free(ipnet_minor_space
, getminor(ipnetif
->if_dev
));
1370 kmem_free(ipnetif
, sizeof (*ipnetif
));
1374 * Create an ipnetif_addr_t with the given logical interface id (lif)
1375 * and add it to the supplied ipnetif. The lif is the netinfo
1376 * representation of logical interface id, and we use this id to match
1377 * incoming netinfo events against our lists of addresses.
1380 ipnet_add_ifaddr(uint64_t lif
, ipnetif_t
*ipnetif
, net_handle_t nd
)
1382 ipnetif_addr_t
*ifaddr
;
1384 struct sockaddr_in bcast
;
1385 struct sockaddr_storage addr
;
1386 net_ifaddr_t type
= NA_ADDRESS
;
1387 uint64_t phyif
= ipnetif
->if_index
;
1389 if (net_getlifaddr(nd
, phyif
, lif
, 1, &type
, &addr
) != 0 ||
1390 net_getlifzone(nd
, phyif
, lif
, &zoneid
) != 0)
1393 if ((ifaddr
= kmem_alloc(sizeof (*ifaddr
), KM_NOSLEEP
)) == NULL
)
1395 ifaddr
->ifa_zone
= zoneid
;
1396 ifaddr
->ifa_id
= lif
;
1397 ifaddr
->ifa_shared
= NULL
;
1399 switch (addr
.ss_family
) {
1401 ifaddr
->ifa_ip4addr
=
1402 ((struct sockaddr_in
*)&addr
)->sin_addr
.s_addr
;
1404 * Try and get the broadcast address. Note that it's okay for
1405 * an interface to not have a broadcast address, so we don't
1406 * fail the entire operation if net_getlifaddr() fails here.
1408 type
= NA_BROADCAST
;
1409 if (net_getlifaddr(nd
, phyif
, lif
, 1, &type
, &bcast
) == 0)
1410 ifaddr
->ifa_brdaddr
= bcast
.sin_addr
.s_addr
;
1413 ifaddr
->ifa_ip6addr
= ((struct sockaddr_in6
*)&addr
)->sin6_addr
;
1418 * The zoneid stored in ipnetif_t needs to correspond to the actual
1419 * zone the address is being used in. This facilitates finding the
1420 * correct netstack_t pointer, amongst other things, later.
1422 if (zoneid
== ALL_ZONES
)
1423 zoneid
= GLOBAL_ZONEID
;
1425 mutex_enter(&ipnetif
->if_addr_lock
);
1426 if (zoneid
!= ipnetif
->if_zoneid
) {
1429 ifp2
= ipnetif_clone_create(ipnetif
, zoneid
);
1430 ifaddr
->ifa_shared
= ifp2
;
1432 list_insert_tail(addr
.ss_family
== AF_INET
?
1433 &ipnetif
->if_ip4addr_list
: &ipnetif
->if_ip6addr_list
, ifaddr
);
1434 mutex_exit(&ipnetif
->if_addr_lock
);
1438 ipnet_delete_ifaddr(ipnetif_addr_t
*ifaddr
, ipnetif_t
*ipnetif
, boolean_t isv6
)
1440 mutex_enter(&ipnetif
->if_addr_lock
);
1441 if (ifaddr
->ifa_shared
!= NULL
)
1442 ipnetif_clone_release(ifaddr
->ifa_shared
);
1445 &ipnetif
->if_ip6addr_list
: &ipnetif
->if_ip4addr_list
, ifaddr
);
1446 mutex_exit(&ipnetif
->if_addr_lock
);
1447 kmem_free(ifaddr
, sizeof (*ifaddr
));
1451 ipnet_plumb_ev(ipnet_nicevent_t
*ipne
, ipnet_stack_t
*ips
, boolean_t isv6
)
1454 boolean_t refrele_needed
= B_TRUE
;
1460 ifname
= ipne
->ipne_ifname
;
1461 ifindex
= ipne
->ipne_ifindex
;
1463 (void) net_getlifflags(ipne
->ipne_protocol
, ifindex
, 0, &ifflags
);
1465 if ((ipnetif
= ipnetif_getby_index(ifindex
, ips
)) == NULL
) {
1466 ipnetif
= ipnetif_create(ifname
, ifindex
, ips
, ifflags
);
1467 refrele_needed
= B_FALSE
;
1469 if (ipnetif
!= NULL
) {
1470 ipnetif
->if_flags
|=
1471 isv6
? IPNETIF_IPV6PLUMBED
: IPNETIF_IPV4PLUMBED
;
1474 if (ipnetif
->if_multicnt
!= 0) {
1475 if (ip_join_allmulti(ifindex
, isv6
,
1476 ips
->ips_netstack
->netstack_ip
) == 0) {
1477 ipnetif
->if_flags
|=
1478 isv6
? IPNETIF_IPV6ALLMULTI
: IPNETIF_IPV4ALLMULTI
;
1483 ipnetif_refrele(ipnetif
);
1487 ipnet_unplumb_ev(uint64_t ifindex
, ipnet_stack_t
*ips
, boolean_t isv6
)
1491 if ((ipnetif
= ipnetif_getby_index(ifindex
, ips
)) == NULL
)
1494 mutex_enter(&ipnetif
->if_addr_lock
);
1495 ipnet_purge_addrlist(isv6
?
1496 &ipnetif
->if_ip6addr_list
: &ipnetif
->if_ip4addr_list
);
1497 mutex_exit(&ipnetif
->if_addr_lock
);
1500 * Note that we have one ipnetif for both IPv4 and IPv6, but we receive
1501 * separate NE_UNPLUMB events for IPv4 and IPv6. We remove the ipnetif
1502 * if both IPv4 and IPv6 interfaces have been unplumbed.
1504 ipnetif
->if_flags
&= isv6
? ~IPNETIF_IPV6PLUMBED
: ~IPNETIF_IPV4PLUMBED
;
1505 if (!(ipnetif
->if_flags
& (IPNETIF_IPV4PLUMBED
| IPNETIF_IPV6PLUMBED
)))
1506 ipnetif_remove(ipnetif
, ips
);
1507 ipnetif_refrele(ipnetif
);
1511 ipnet_lifup_ev(uint64_t ifindex
, uint64_t lifindex
, net_handle_t nd
,
1512 ipnet_stack_t
*ips
, boolean_t isv6
)
1515 ipnetif_addr_t
*ifaddr
;
1517 if ((ipnetif
= ipnetif_getby_index(ifindex
, ips
)) == NULL
)
1519 if ((ifaddr
= ipnet_match_lif(ipnetif
, lifindex
, isv6
)) != NULL
) {
1521 * We must have missed a NE_LIF_DOWN event. Delete this
1522 * ifaddr and re-create it.
1524 ipnet_delete_ifaddr(ifaddr
, ipnetif
, isv6
);
1527 ipnet_add_ifaddr(lifindex
, ipnetif
, nd
);
1528 ipnetif_refrele(ipnetif
);
1532 ipnet_lifdown_ev(uint64_t ifindex
, uint64_t lifindex
, ipnet_stack_t
*ips
,
1536 ipnetif_addr_t
*ifaddr
;
1538 if ((ipnetif
= ipnetif_getby_index(ifindex
, ips
)) == NULL
)
1540 if ((ifaddr
= ipnet_match_lif(ipnetif
, lifindex
, isv6
)) != NULL
)
1541 ipnet_delete_ifaddr(ifaddr
, ipnetif
, isv6
);
1542 ipnetif_refrele(ipnetif
);
1544 * Make sure that open streams on this ipnetif are still allowed to
1547 ipnetif_zonecheck(ipnetif
, ips
);
1551 * This callback from the NIC event framework dispatches a taskq as the event
1552 * handlers may block.
1556 ipnet_nicevent_cb(hook_event_token_t token
, hook_data_t info
, void *arg
)
1558 ipnet_stack_t
*ips
= arg
;
1559 hook_nic_event_t
*hn
= (hook_nic_event_t
*)info
;
1560 ipnet_nicevent_t
*ipne
;
1562 if ((ipne
= kmem_alloc(sizeof (ipnet_nicevent_t
), KM_NOSLEEP
)) == NULL
)
1564 ipne
->ipne_event
= hn
->hne_event
;
1565 ipne
->ipne_protocol
= hn
->hne_protocol
;
1566 ipne
->ipne_stackid
= ips
->ips_netstack
->netstack_stackid
;
1567 ipne
->ipne_ifindex
= hn
->hne_nic
;
1568 ipne
->ipne_lifindex
= hn
->hne_lif
;
1569 if (hn
->hne_datalen
!= 0) {
1570 (void) strlcpy(ipne
->ipne_ifname
, hn
->hne_data
,
1571 sizeof (ipne
->ipne_ifname
));
1573 (void) ddi_taskq_dispatch(ipnet_nicevent_taskq
, ipnet_nicevent_task
,
1579 ipnet_nicevent_task(void *arg
)
1581 ipnet_nicevent_t
*ipne
= arg
;
1586 if ((ns
= netstack_find_by_stackid(ipne
->ipne_stackid
)) == NULL
)
1588 ips
= ns
->netstack_ipnet
;
1589 isv6
= (ipne
->ipne_protocol
== ips
->ips_ndv6
);
1591 mutex_enter(&ips
->ips_event_lock
);
1592 switch (ipne
->ipne_event
) {
1594 ipnet_plumb_ev(ipne
, ips
, isv6
);
1597 ipnet_unplumb_ev(ipne
->ipne_ifindex
, ips
, isv6
);
1600 ipnet_lifup_ev(ipne
->ipne_ifindex
, ipne
->ipne_lifindex
,
1601 ipne
->ipne_protocol
, ips
, isv6
);
1604 ipnet_lifdown_ev(ipne
->ipne_ifindex
, ipne
->ipne_lifindex
, ips
,
1610 mutex_exit(&ips
->ips_event_lock
);
1614 kmem_free(ipne
, sizeof (ipnet_nicevent_t
));
1618 ipnet_if_getdev(char *name
, zoneid_t zoneid
)
1623 dev_t dev
= (dev_t
)-1;
1625 if ((ns
= netstack_find_by_zoneid(zoneid
)) == NULL
)
1628 ips
= ns
->netstack_ipnet
;
1629 mutex_enter(&ips
->ips_avl_lock
);
1630 if ((ipnetif
= avl_find(&ips
->ips_avl_by_name
, name
, NULL
)) != NULL
) {
1631 if (ipnetif_in_zone(ipnetif
, zoneid
, ips
))
1632 dev
= ipnetif
->if_dev
;
1634 mutex_exit(&ips
->ips_avl_lock
);
1641 ipnetif_getby_index(uint64_t id
, ipnet_stack_t
*ips
)
1645 mutex_enter(&ips
->ips_avl_lock
);
1646 if ((ipnetif
= avl_find(&ips
->ips_avl_by_index
, &id
, NULL
)) != NULL
)
1647 ipnetif_refhold(ipnetif
);
1648 mutex_exit(&ips
->ips_avl_lock
);
1653 ipnetif_getby_dev(dev_t dev
, ipnet_stack_t
*ips
)
1658 mutex_enter(&ips
->ips_avl_lock
);
1659 tree
= &ips
->ips_avl_by_index
;
1660 for (ipnetif
= avl_first(tree
); ipnetif
!= NULL
;
1661 ipnetif
= avl_walk(tree
, ipnetif
, AVL_AFTER
)) {
1662 if (ipnetif
->if_dev
== dev
) {
1663 ipnetif_refhold(ipnetif
);
1667 mutex_exit(&ips
->ips_avl_lock
);
1671 static ipnetif_addr_t
*
1672 ipnet_match_lif(ipnetif_t
*ipnetif
, lif_if_t lid
, boolean_t isv6
)
1674 ipnetif_addr_t
*ifaddr
;
1677 mutex_enter(&ipnetif
->if_addr_lock
);
1678 list
= isv6
? &ipnetif
->if_ip6addr_list
: &ipnetif
->if_ip4addr_list
;
1679 for (ifaddr
= list_head(list
); ifaddr
!= NULL
;
1680 ifaddr
= list_next(list
, ifaddr
)) {
1681 if (lid
== ifaddr
->ifa_id
)
1684 mutex_exit(&ipnetif
->if_addr_lock
);
1690 ipnet_stack_init(netstackid_t stackid
, netstack_t
*ns
)
1694 ips
= kmem_zalloc(sizeof (*ips
), KM_SLEEP
);
1695 ips
->ips_netstack
= ns
;
1696 mutex_init(&ips
->ips_avl_lock
, NULL
, MUTEX_DEFAULT
, 0);
1697 avl_create(&ips
->ips_avl_by_index
, ipnetif_compare_index
,
1698 sizeof (ipnetif_t
), offsetof(ipnetif_t
, if_avl_by_index
));
1699 avl_create(&ips
->ips_avl_by_name
, ipnetif_compare_name
,
1700 sizeof (ipnetif_t
), offsetof(ipnetif_t
, if_avl_by_name
));
1701 avl_create(&ips
->ips_avl_by_shared
, ipnetif_compare_name_zone
,
1702 sizeof (ipnetif_t
), offsetof(ipnetif_t
, if_avl_by_shared
));
1703 mutex_init(&ips
->ips_walkers_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
1704 cv_init(&ips
->ips_walkers_cv
, NULL
, CV_DRIVER
, NULL
);
1705 list_create(&ips
->ips_str_list
, sizeof (ipnet_t
),
1706 offsetof(ipnet_t
, ipnet_next
));
1707 ipnet_register_netihook(ips
);
1713 ipnet_stack_fini(netstackid_t stackid
, void *arg
)
1715 ipnet_stack_t
*ips
= arg
;
1716 ipnetif_t
*ipnetif
, *nipnetif
;
1718 if (ips
->ips_kstatp
!= NULL
) {
1721 zoneid
= netstackid_to_zoneid(stackid
);
1722 net_kstat_delete(net_zoneidtonetid(zoneid
), ips
->ips_kstatp
);
1724 if (ips
->ips_ndv4
!= NULL
) {
1725 VERIFY(net_hook_unregister(ips
->ips_ndv4
, NH_NIC_EVENTS
,
1726 ips
->ips_nicevents
) == 0);
1727 VERIFY(net_protocol_release(ips
->ips_ndv4
) == 0);
1729 if (ips
->ips_ndv6
!= NULL
) {
1730 VERIFY(net_hook_unregister(ips
->ips_ndv6
, NH_NIC_EVENTS
,
1731 ips
->ips_nicevents
) == 0);
1732 VERIFY(net_protocol_release(ips
->ips_ndv6
) == 0);
1734 hook_free(ips
->ips_nicevents
);
1736 for (ipnetif
= avl_first(&ips
->ips_avl_by_index
); ipnetif
!= NULL
;
1737 ipnetif
= nipnetif
) {
1738 nipnetif
= AVL_NEXT(&ips
->ips_avl_by_index
, ipnetif
);
1739 ipnetif_remove(ipnetif
, ips
);
1741 avl_destroy(&ips
->ips_avl_by_shared
);
1742 avl_destroy(&ips
->ips_avl_by_index
);
1743 avl_destroy(&ips
->ips_avl_by_name
);
1744 mutex_destroy(&ips
->ips_avl_lock
);
1745 mutex_destroy(&ips
->ips_walkers_lock
);
1746 cv_destroy(&ips
->ips_walkers_cv
);
1747 list_destroy(&ips
->ips_str_list
);
1748 kmem_free(ips
, sizeof (*ips
));
1751 /* Do any of the addresses in addrlist belong the supplied zoneid? */
1753 ipnet_addrs_in_zone(list_t
*addrlist
, zoneid_t zoneid
)
1755 ipnetif_addr_t
*ifa
;
1757 for (ifa
= list_head(addrlist
); ifa
!= NULL
;
1758 ifa
= list_next(addrlist
, ifa
)) {
1759 if (ifa
->ifa_zone
== zoneid
)
1765 /* Should the supplied ipnetif be visible from the supplied zoneid? */
1767 ipnetif_in_zone(ipnetif_t
*ipnetif
, zoneid_t zoneid
, ipnet_stack_t
*ips
)
1772 * The global zone has visibility into all interfaces in the global
1773 * stack, and exclusive stack zones have visibility into all
1774 * interfaces in their stack.
1776 if (zoneid
== GLOBAL_ZONEID
||
1777 ips
->ips_netstack
->netstack_stackid
!= GLOBAL_NETSTACKID
)
1781 * Shared-stack zones only have visibility for interfaces that have
1782 * addresses in their zone.
1784 mutex_enter(&ipnetif
->if_addr_lock
);
1785 ret
= ipnet_addrs_in_zone(&ipnetif
->if_ip4addr_list
, zoneid
) ||
1786 ipnet_addrs_in_zone(&ipnetif
->if_ip6addr_list
, zoneid
);
1787 mutex_exit(&ipnetif
->if_addr_lock
);
1792 * Verify that any ipnet_t that has a reference to the supplied ipnetif should
1793 * still be allowed to have it open. A given ipnet_t may no longer be allowed
1794 * to have an ipnetif open if there are no longer any addresses that belong to
1795 * the ipnetif in the ipnet_t's non-global shared-stack zoneid. If that's the
1796 * case, send the ipnet_t an M_HANGUP.
1799 ipnetif_zonecheck(ipnetif_t
*ipnetif
, ipnet_stack_t
*ips
)
1801 list_t
*strlist
= &ips
->ips_str_list
;
1804 ipnet_walkers_inc(ips
);
1805 for (ipnet
= list_head(strlist
); ipnet
!= NULL
;
1806 ipnet
= list_next(strlist
, ipnet
)) {
1807 if (ipnet
->ipnet_if
!= ipnetif
)
1809 if (!ipnetif_in_zone(ipnetif
, ipnet
->ipnet_zoneid
, ips
))
1810 (void) putnextctl(ipnet
->ipnet_rq
, M_HANGUP
);
1812 ipnet_walkers_dec(ips
);
1816 ipnet_walk_if(ipnet_walkfunc_t
*cb
, void *arg
, zoneid_t zoneid
)
1820 ipnetif_cbdata_t
*cbnode
;
1824 if ((ns
= netstack_find_by_zoneid(zoneid
)) == NULL
)
1827 ips
= ns
->netstack_ipnet
;
1828 list_create(&cbdata
, sizeof (ipnetif_cbdata_t
),
1829 offsetof(ipnetif_cbdata_t
, ic_next
));
1831 mutex_enter(&ips
->ips_avl_lock
);
1832 for (ipnetif
= avl_first(&ips
->ips_avl_by_index
); ipnetif
!= NULL
;
1833 ipnetif
= avl_walk(&ips
->ips_avl_by_index
, ipnetif
, AVL_AFTER
)) {
1834 if (!ipnetif_in_zone(ipnetif
, zoneid
, ips
))
1836 cbnode
= kmem_zalloc(sizeof (ipnetif_cbdata_t
), KM_SLEEP
);
1837 (void) strlcpy(cbnode
->ic_ifname
, ipnetif
->if_name
, LIFNAMSIZ
);
1838 cbnode
->ic_dev
= ipnetif
->if_dev
;
1839 list_insert_head(&cbdata
, cbnode
);
1841 mutex_exit(&ips
->ips_avl_lock
);
1843 while ((cbnode
= list_head(&cbdata
)) != NULL
) {
1844 cb(cbnode
->ic_ifname
, arg
, cbnode
->ic_dev
);
1845 list_remove(&cbdata
, cbnode
);
1846 kmem_free(cbnode
, sizeof (ipnetif_cbdata_t
));
1848 list_destroy(&cbdata
);
1853 ipnetif_compare_index(const void *index_ptr
, const void *ipnetifp
)
1855 int64_t index1
= *((int64_t *)index_ptr
);
1856 int64_t index2
= (int64_t)((ipnetif_t
*)ipnetifp
)->if_index
;
1858 return (SIGNOF(index2
- index1
));
1862 ipnetif_compare_name(const void *name_ptr
, const void *ipnetifp
)
1866 res
= strcmp(((ipnetif_t
*)ipnetifp
)->if_name
, name_ptr
);
1867 return (SIGNOF(res
));
1871 ipnetif_compare_name_zone(const void *key_ptr
, const void *ipnetifp
)
1873 const uintptr_t *ptr
= key_ptr
;
1874 const ipnetif_t
*ifp
;
1878 res
= ifp
->if_zoneid
- ptr
[0];
1880 return (SIGNOF(res
));
1881 res
= strcmp(ifp
->if_name
, (char *)ptr
[1]);
1882 return (SIGNOF(res
));
1886 ipnetif_refhold(ipnetif_t
*ipnetif
)
1888 mutex_enter(&ipnetif
->if_reflock
);
1889 ipnetif
->if_refcnt
++;
1890 mutex_exit(&ipnetif
->if_reflock
);
1894 ipnetif_refrele(ipnetif_t
*ipnetif
)
1896 mutex_enter(&ipnetif
->if_reflock
);
1897 ASSERT(ipnetif
->if_refcnt
> 0);
1898 if (--ipnetif
->if_refcnt
== 0)
1899 ipnetif_free(ipnetif
);
1901 mutex_exit(&ipnetif
->if_reflock
);
1905 ipnet_walkers_inc(ipnet_stack_t
*ips
)
1907 mutex_enter(&ips
->ips_walkers_lock
);
1908 ips
->ips_walkers_cnt
++;
1909 mutex_exit(&ips
->ips_walkers_lock
);
1913 ipnet_walkers_dec(ipnet_stack_t
*ips
)
1915 mutex_enter(&ips
->ips_walkers_lock
);
1916 ASSERT(ips
->ips_walkers_cnt
!= 0);
1917 if (--ips
->ips_walkers_cnt
== 0)
1918 cv_broadcast(&ips
->ips_walkers_cv
);
1919 mutex_exit(&ips
->ips_walkers_lock
);
1924 ipobs_bounce_func(hook_event_token_t token
, hook_data_t info
, void *arg
)
1926 hook_pkt_observe_t
*hdr
;
1927 pfv_t func
= (pfv_t
)arg
;
1930 hdr
= (hook_pkt_observe_t
*)info
;
1932 * Code in ip_input() expects that it is the only one accessing the
1935 mp
= copymsg(hdr
->hpo_pkt
);
1937 netstack_t
*ns
= hdr
->hpo_ctx
;
1938 ipnet_stack_t
*ips
= ns
->netstack_ipnet
;
1940 IPSK_BUMP(ips
, ik_dispatchDupDrop
);
1944 hdr
= (hook_pkt_observe_t
*)mp
->b_rptr
;
1953 ipobs_register_hook(netstack_t
*ns
, pfv_t func
)
1955 ip_stack_t
*ipst
= ns
->netstack_ip
;
1959 HOOK_INIT(hook
, ipobs_bounce_func
, "", (void *)func
);
1960 VERIFY(hook
!= NULL
);
1963 * To register multiple hooks with the same callback function,
1964 * a unique name is needed.
1966 (void) snprintf(name
, sizeof (name
), "ipobserve_%p", (void *)hook
);
1967 hook
->h_name
= strdup(name
);
1969 (void) net_hook_register(ipst
->ips_ip4_observe_pr
, NH_OBSERVE
, hook
);
1970 (void) net_hook_register(ipst
->ips_ip6_observe_pr
, NH_OBSERVE
, hook
);
1976 ipobs_unregister_hook(netstack_t
*ns
, hook_t
*hook
)
1978 ip_stack_t
*ipst
= ns
->netstack_ip
;
1980 (void) net_hook_unregister(ipst
->ips_ip4_observe_pr
, NH_OBSERVE
, hook
);
1982 (void) net_hook_unregister(ipst
->ips_ip6_observe_pr
, NH_OBSERVE
, hook
);
1984 strfree(hook
->h_name
);
1989 /* ******************************************************************** */
1990 /* BPF Functions below */
1991 /* ******************************************************************** */
1994 * Convenience function to make mapping a zoneid to an ipnet_stack_t easy.
1997 ipnet_find_by_zoneid(zoneid_t zoneid
)
2001 VERIFY((ns
= netstack_find_by_zoneid(zoneid
)) != NULL
);
2002 return (ns
->netstack_ipnet
);
2006 * Functions, such as the above ipnet_find_by_zoneid(), will return a
2007 * pointer to ipnet_stack_t by calling a netstack lookup function.
2008 * The netstack_find_*() functions return a pointer after doing a "hold"
2009 * on the data structure and thereby require a "release" when the caller
2010 * is finished with it. We need to mirror that API here and thus a caller
2011 * of ipnet_find_by_zoneid() is required to call ipnet_rele().
2014 ipnet_rele(ipnet_stack_t
*ips
)
2016 netstack_rele(ips
->ips_netstack
);
2022 ipnet_set_itap(bpf_itap_fn_t tapfunc
)
2024 ipnet_itap
= tapfunc
;
2028 * The list of interfaces available via ipnet is private for each zone,
2029 * so the AVL tree of each zone must be searched for a given name, even
2030 * if all names are unique.
2033 ipnet_open_byname(const char *name
, ipnetif_t
**ptr
, zoneid_t zoneid
)
2038 ASSERT(ptr
!= NULL
);
2039 VERIFY((ips
= ipnet_find_by_zoneid(zoneid
)) != NULL
);
2041 mutex_enter(&ips
->ips_avl_lock
);
2044 * Shared instance zone?
2046 if (netstackid_to_zoneid(zoneid_to_netstackid(zoneid
)) != zoneid
) {
2047 uintptr_t key
[2] = { zoneid
, (uintptr_t)name
};
2049 ipnetif
= avl_find(&ips
->ips_avl_by_shared
, (void *)key
, NULL
);
2051 ipnetif
= avl_find(&ips
->ips_avl_by_name
, (void *)name
, NULL
);
2053 if (ipnetif
!= NULL
)
2054 ipnetif_refhold(ipnetif
);
2055 mutex_exit(&ips
->ips_avl_lock
);
2060 if (ipnetif
== NULL
)
2066 ipnet_close_byhandle(ipnetif_t
*ifp
)
2068 ASSERT(ifp
!= NULL
);
2069 ipnetif_refrele(ifp
);
2073 ipnet_name(ipnetif_t
*ifp
)
2075 ASSERT(ifp
!= NULL
);
2076 return (ifp
->if_name
);
2080 * To find the linkid for a given name, it is necessary to know which zone
2081 * the interface name belongs to and to search the avl tree for that zone
2082 * as there is no master list of all interfaces and which zone they belong
2083 * to. It is assumed that the caller of this function is somehow already
2084 * working with the ipnet interfaces and hence the ips_event_lock is held.
2085 * When BPF calls into this function, it is doing so because of an event
2086 * in ipnet, and thus ipnet holds the ips_event_lock. Thus the datalink id
2087 * value returned has meaning without the need for grabbing a hold on the
2091 ipnet_get_linkid_byname(const char *name
, uint_t
*idp
, zoneid_t zoneid
)
2096 VERIFY((ips
= ipnet_find_by_zoneid(zoneid
)) != NULL
);
2097 ASSERT(mutex_owned(&ips
->ips_event_lock
));
2099 mutex_enter(&ips
->ips_avl_lock
);
2100 ifp
= avl_find(&ips
->ips_avl_by_name
, (void *)name
, NULL
);
2102 *idp
= (uint_t
)ifp
->if_index
;
2105 * Shared instance zone?
2107 if (netstackid_to_zoneid(zoneid_to_netstackid(zoneid
)) != zoneid
) {
2108 uintptr_t key
[2] = { zoneid
, (uintptr_t)name
};
2110 ifp
= avl_find(&ips
->ips_avl_by_shared
, (void *)key
, NULL
);
2112 *idp
= (uint_t
)ifp
->if_index
;
2115 mutex_exit(&ips
->ips_avl_lock
);
2124 * Strictly speaking, there is no such thing as a "client" in ipnet, like
2125 * there is in mac. BPF only needs to have this because it is required as
2126 * part of interfacing correctly with mac. The reuse of the original
2127 * ipnetif_t as a client poses no danger, so long as it is done with its
2128 * own ref-count'd hold that is given up on close.
2131 ipnet_client_open(ipnetif_t
*ptr
, ipnetif_t
**result
)
2133 ASSERT(ptr
!= NULL
);
2134 ASSERT(result
!= NULL
);
2135 ipnetif_refhold(ptr
);
2142 ipnet_client_close(ipnetif_t
*ptr
)
2144 ASSERT(ptr
!= NULL
);
2145 ipnetif_refrele(ptr
);
2149 * This is called from BPF when it needs to start receiving packets
2152 * The use of the ipnet_t structure here is somewhat lightweight when
2153 * compared to how it is used elsewhere but it already has all of the
2154 * right fields in it, so reuse here doesn't seem out of order. Its
2155 * primary purpose here is to provide the means to store pointers for
2156 * use when ipnet_promisc_remove() needs to be called.
2158 * This should never be called for the IPNET_MINOR_LO device as it is
2159 * never created via ipnetif_create.
2163 ipnet_promisc_add(void *handle
, uint_t how
, void *data
, uintptr_t *mhandle
,
2173 ifp
= (ipnetif_t
*)handle
;
2175 if (how
!= DL_PROMISC_PHYS
&& how
!= DL_PROMISC_MULTI
)
2178 ns
= netstack_find_by_zoneid(ifp
->if_zoneid
);
2180 if ((error
= ipnet_join_allmulti(ifp
, ns
->netstack_ipnet
)) != 0) {
2185 ipnet
= kmem_zalloc(sizeof (*ipnet
), KM_SLEEP
);
2186 ipnet
->ipnet_if
= ifp
;
2187 ipnet
->ipnet_ns
= ns
;
2188 ipnet
->ipnet_flags
= flags
;
2190 if ((ifp
->if_flags
& IPNETIF_LOOPBACK
) != 0) {
2191 ipnet
->ipnet_acceptfn
= ipnet_loaccept
;
2193 ipnet
->ipnet_acceptfn
= ipnet_accept
;
2197 * To register multiple hooks with the same callback function,
2198 * a unique name is needed.
2200 HOOK_INIT(ipnet
->ipnet_hook
, ipnet_bpf_bounce
, "", ipnet
);
2201 (void) snprintf(name
, sizeof (name
), "ipnet_promisc_%p",
2202 (void *)ipnet
->ipnet_hook
);
2203 ipnet
->ipnet_hook
->h_name
= strdup(name
);
2204 ipnet
->ipnet_data
= data
;
2205 ipnet
->ipnet_zoneid
= ifp
->if_zoneid
;
2207 ipst
= ns
->netstack_ip
;
2209 error
= net_hook_register(ipst
->ips_ip4_observe_pr
, NH_OBSERVE
,
2214 error
= net_hook_register(ipst
->ips_ip6_observe_pr
, NH_OBSERVE
,
2217 (void) net_hook_unregister(ipst
->ips_ip4_observe_pr
,
2218 NH_OBSERVE
, ipnet
->ipnet_hook
);
2222 *mhandle
= (uintptr_t)ipnet
;
2228 cmn_err(CE_WARN
, "net_hook_register failed: %d", error
);
2229 strfree(ipnet
->ipnet_hook
->h_name
);
2230 hook_free(ipnet
->ipnet_hook
);
2236 ipnet_promisc_remove(void *data
)
2243 ipst
= ipnet
->ipnet_ns
->netstack_ip
;
2244 hook
= ipnet
->ipnet_hook
;
2246 VERIFY(net_hook_unregister(ipst
->ips_ip4_observe_pr
, NH_OBSERVE
,
2249 VERIFY(net_hook_unregister(ipst
->ips_ip6_observe_pr
, NH_OBSERVE
,
2252 strfree(hook
->h_name
);
2256 kmem_free(ipnet
, sizeof (*ipnet
));
2260 * arg here comes from the ipnet_t allocated in ipnet_promisc_add.
2261 * An important field from that structure is "ipnet_data" that
2262 * contains the "data" pointer passed into ipnet_promisc_add: it needs
2263 * to be passed back to bpf when we call into ipnet_itap.
2265 * ipnet_itap is set by ipnet_set_bpfattach, which in turn is called
2270 ipnet_bpf_bounce(hook_event_token_t token
, hook_data_t info
, void *arg
)
2272 hook_pkt_observe_t
*hdr
;
2280 hdr
= (hook_pkt_observe_t
*)info
;
2282 ipnet
= (ipnet_t
*)arg
;
2283 ips
= ((netstack_t
*)hdr
->hpo_ctx
)->netstack_ipnet
;
2285 netmp
= hdr
->hpo_pkt
->b_cont
;
2286 src
.iap_family
= hdr
->hpo_family
;
2287 dst
.iap_family
= hdr
->hpo_family
;
2289 if (hdr
->hpo_family
== AF_INET
) {
2290 src
.iap_addr4
= &((ipha_t
*)(netmp
->b_rptr
))->ipha_src
;
2291 dst
.iap_addr4
= &((ipha_t
*)(netmp
->b_rptr
))->ipha_dst
;
2293 src
.iap_addr6
= &((ip6_t
*)(netmp
->b_rptr
))->ip6_src
;
2294 dst
.iap_addr6
= &((ip6_t
*)(netmp
->b_rptr
))->ip6_dst
;
2297 if (!(*ipnet
->ipnet_acceptfn
)(ipnet
, hdr
, &src
, &dst
)) {
2298 IPSK_BUMP(ips
, ik_acceptFail
);
2301 IPSK_BUMP(ips
, ik_acceptOk
);
2303 ipnet_itap(ipnet
->ipnet_data
, mp
,
2304 hdr
->hpo_htype
== htons(IPOBS_HOOK_OUTBOUND
),
2305 ntohl(hdr
->hpo_pktlen
) + MBLKL(mp
));
2311 * clone'd ipnetif_t's are created when a shared IP instance zone comes
2312 * to life and configures an IP address. The model that BPF uses is that
2313 * each interface must have a unique pointer and each interface must be
2314 * representative of what it can capture. They are limited to one DLT
2315 * per interface and one zone per interface. Thus every interface that
2316 * can be seen in a zone must be announced via an attach to bpf. For
2317 * shared instance zones, this means the ipnet driver needs to detect
2318 * when an address is added to an interface in a zone for the first
2319 * time (and also when the last address is removed.)
2322 ipnetif_clone_create(ipnetif_t
*ifp
, zoneid_t zoneid
)
2324 uintptr_t key
[2] = { zoneid
, (uintptr_t)ifp
->if_name
};
2325 ipnet_stack_t
*ips
= ifp
->if_stackp
;
2326 avl_index_t where
= 0;
2329 mutex_enter(&ips
->ips_avl_lock
);
2330 newif
= avl_find(&ips
->ips_avl_by_shared
, (void *)key
, &where
);
2331 if (newif
!= NULL
) {
2332 ipnetif_refhold(newif
);
2333 newif
->if_sharecnt
++;
2334 mutex_exit(&ips
->ips_avl_lock
);
2338 newif
= ipnet_alloc_if(ips
);
2339 if (newif
== NULL
) {
2340 mutex_exit(&ips
->ips_avl_lock
);
2344 newif
->if_refcnt
= 1;
2345 newif
->if_sharecnt
= 1;
2346 newif
->if_zoneid
= zoneid
;
2347 (void) strlcpy(newif
->if_name
, ifp
->if_name
, LIFNAMSIZ
);
2348 newif
->if_flags
= ifp
->if_flags
& IPNETIF_LOOPBACK
;
2349 newif
->if_index
= ifp
->if_index
;
2351 avl_insert(&ips
->ips_avl_by_shared
, newif
, where
);
2352 mutex_exit(&ips
->ips_avl_lock
);
2358 ipnetif_clone_release(ipnetif_t
*ipnetif
)
2360 boolean_t dofree
= B_FALSE
;
2361 boolean_t doremove
= B_FALSE
;
2362 ipnet_stack_t
*ips
= ipnetif
->if_stackp
;
2364 mutex_enter(&ipnetif
->if_reflock
);
2365 ASSERT(ipnetif
->if_refcnt
> 0);
2366 if (--ipnetif
->if_refcnt
== 0)
2368 ASSERT(ipnetif
->if_sharecnt
> 0);
2369 if (--ipnetif
->if_sharecnt
== 0)
2371 mutex_exit(&ipnetif
->if_reflock
);
2373 mutex_enter(&ips
->ips_avl_lock
);
2374 avl_remove(&ips
->ips_avl_by_shared
, ipnetif
);
2375 mutex_exit(&ips
->ips_avl_lock
);
2378 ASSERT(ipnetif
->if_sharecnt
== 0);
2379 ipnetif_free(ipnetif
);