4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright (c) 2016 by Delphix. All rights reserved.
29 * Copyright (c) 2016, Joyent, Inc. All rights reserved.
33 * The ipnet device defined here provides access to packets at the IP layer. To
34 * provide access to packets at this layer it registers a callback function in
35 * the ip module and when there are open instances of the device ip will pass
36 * packets into the device. Packets from ip are passed on the input, output and
37 * loopback paths. Internally the module returns to ip as soon as possible by
38 * deferring processing using a taskq.
40 * Management of the devices in /dev/ipnet/ is handled by the devname
41 * filesystem and use of the neti interfaces. This module registers for NIC
42 * events using the neti framework so that when IP interfaces are bought up,
43 * taken down etc. the ipnet module is notified and its view of the interfaces
44 * configured on the system adjusted. On attach, the module gets an initial
45 * view of the system again using the neti framework but as it has already
46 * registered for IP interface events, it is still up-to-date with any changes.
49 #include <sys/types.h>
54 #include <sys/sunddi.h>
55 #include <sys/modctl.h>
57 #include <sys/strsun.h>
58 #include <sys/id_space.h>
60 #include <sys/mkdev.h>
63 #include <sys/errno.h>
65 #include <sys/ksynch.h>
66 #include <sys/hook_event.h>
68 #include <sys/stropts.h>
69 #include <sys/sysmacros.h>
71 #include <inet/ip_if.h>
72 #include <inet/ip_multi.h>
74 #include <inet/ipnet.h>
76 #include <net/bpfdesc.h>
79 static struct module_info ipnet_minfo
= {
81 "ipnet", /* mi_idname */
83 INFPSZ
, /* mi_maxpsz */
89 * List to hold static view of ipnetif_t's on the system. This is needed to
90 * avoid holding the lock protecting the avl tree of ipnetif's over the
91 * callback into the dev filesystem.
93 typedef struct ipnetif_cbdata
{
94 char ic_ifname
[LIFNAMSIZ
];
100 * Convenience enumerated type for ipnet_accept(). It describes the
101 * properties of a given ipnet_addrp_t relative to a single ipnet_t
102 * client stream. The values represent whether the address is ...
105 IPNETADDR_MYADDR
, /* an address on my ipnetif_t. */
106 IPNETADDR_MBCAST
, /* a multicast or broadcast address. */
107 IPNETADDR_UNKNOWN
/* none of the above. */
110 /* Argument used for the ipnet_nicevent_taskq callback. */
111 typedef struct ipnet_nicevent_s
{
112 nic_event_t ipne_event
;
113 net_handle_t ipne_protocol
;
114 netstackid_t ipne_stackid
;
115 uint64_t ipne_ifindex
;
116 uint64_t ipne_lifindex
;
117 char ipne_ifname
[LIFNAMSIZ
];
120 static dev_info_t
*ipnet_dip
;
121 static major_t ipnet_major
;
122 static ddi_taskq_t
*ipnet_taskq
; /* taskq for packets */
123 static ddi_taskq_t
*ipnet_nicevent_taskq
; /* taskq for NIC events */
124 static id_space_t
*ipnet_minor_space
;
125 static const int IPNET_MINOR_LO
= 1; /* minor number for /dev/lo0 */
126 static const int IPNET_MINOR_MIN
= 2; /* start of dynamic minors */
127 static dl_info_ack_t ipnet_infoack
= IPNET_INFO_ACK_INIT
;
128 static ipnet_acceptfn_t ipnet_accept
, ipnet_loaccept
;
129 static bpf_itap_fn_t ipnet_itap
;
131 static void ipnet_input(mblk_t
*);
132 static int ipnet_wput(queue_t
*, mblk_t
*);
133 static int ipnet_rsrv(queue_t
*);
134 static int ipnet_open(queue_t
*, dev_t
*, int, int, cred_t
*);
135 static int ipnet_close(queue_t
*, int, cred_t
*);
136 static void ipnet_ioctl(queue_t
*, mblk_t
*);
137 static void ipnet_iocdata(queue_t
*, mblk_t
*);
138 static void ipnet_wputnondata(queue_t
*, mblk_t
*);
139 static int ipnet_attach(dev_info_t
*, ddi_attach_cmd_t
);
140 static int ipnet_detach(dev_info_t
*, ddi_detach_cmd_t
);
141 static int ipnet_devinfo(dev_info_t
*, ddi_info_cmd_t
, void *, void **);
142 static void ipnet_inforeq(queue_t
*q
, mblk_t
*mp
);
143 static void ipnet_bindreq(queue_t
*q
, mblk_t
*mp
);
144 static void ipnet_unbindreq(queue_t
*q
, mblk_t
*mp
);
145 static void ipnet_dlpromisconreq(queue_t
*q
, mblk_t
*mp
);
146 static void ipnet_dlpromiscoffreq(queue_t
*q
, mblk_t
*mp
);
147 static int ipnet_join_allmulti(ipnetif_t
*, ipnet_stack_t
*);
148 static void ipnet_leave_allmulti(ipnetif_t
*, ipnet_stack_t
*);
149 static int ipnet_nicevent_cb(hook_event_token_t
, hook_data_t
, void *);
150 static void ipnet_nicevent_task(void *);
151 static ipnetif_t
*ipnetif_create(const char *, uint64_t, ipnet_stack_t
*,
153 static void ipnetif_remove(ipnetif_t
*, ipnet_stack_t
*);
154 static ipnetif_addr_t
*ipnet_match_lif(ipnetif_t
*, lif_if_t
, boolean_t
);
155 static ipnetif_t
*ipnetif_getby_index(uint64_t, ipnet_stack_t
*);
156 static ipnetif_t
*ipnetif_getby_dev(dev_t
, ipnet_stack_t
*);
157 static boolean_t
ipnetif_in_zone(ipnetif_t
*, zoneid_t
, ipnet_stack_t
*);
158 static void ipnetif_zonecheck(ipnetif_t
*, ipnet_stack_t
*);
159 static int ipnet_populate_if(net_handle_t
, ipnet_stack_t
*, boolean_t
);
160 static int ipnetif_compare_name(const void *, const void *);
161 static int ipnetif_compare_name_zone(const void *, const void *);
162 static int ipnetif_compare_index(const void *, const void *);
163 static void ipnet_add_ifaddr(uint64_t, ipnetif_t
*, net_handle_t
);
164 static void ipnet_delete_ifaddr(ipnetif_addr_t
*, ipnetif_t
*, boolean_t
);
165 static void ipnetif_refhold(ipnetif_t
*);
166 static void ipnetif_refrele(ipnetif_t
*);
167 static void ipnet_walkers_inc(ipnet_stack_t
*);
168 static void ipnet_walkers_dec(ipnet_stack_t
*);
169 static void ipnet_register_netihook(ipnet_stack_t
*);
170 static void *ipnet_stack_init(netstackid_t
, netstack_t
*);
171 static void ipnet_stack_fini(netstackid_t
, void *);
172 static void ipnet_dispatch(void *);
173 static int ipobs_bounce_func(hook_event_token_t
, hook_data_t
, void *);
174 static int ipnet_bpf_bounce(hook_event_token_t
, hook_data_t
, void *);
175 static ipnetif_t
*ipnetif_clone_create(ipnetif_t
*, zoneid_t
);
176 static void ipnetif_clone_release(ipnetif_t
*);
178 static struct qinit ipnet_rinit
= {
180 ipnet_rsrv
, /* qi_srvp */
181 ipnet_open
, /* qi_qopen */
182 ipnet_close
, /* qi_qclose */
183 NULL
, /* qi_qadmin */
184 &ipnet_minfo
, /* qi_minfo */
187 static struct qinit ipnet_winit
= {
188 ipnet_wput
, /* qi_putp */
191 NULL
, /* qi_qclose */
192 NULL
, /* qi_qadmin */
193 &ipnet_minfo
, /* qi_minfo */
196 static struct streamtab ipnet_info
= {
197 &ipnet_rinit
, &ipnet_winit
200 DDI_DEFINE_STREAM_OPS(ipnet_ops
, nulldev
, nulldev
, ipnet_attach
,
201 ipnet_detach
, nodev
, ipnet_devinfo
, D_MP
| D_MTPERMOD
, &ipnet_info
,
202 ddi_quiesce_not_supported
);
204 static struct modldrv modldrv
= {
206 "STREAMS ipnet driver",
210 static struct modlinkage modlinkage
= {
211 MODREV_1
, &modldrv
, NULL
215 * This structure contains the template data (names and type) that is
216 * copied, in bulk, into the new kstats structure created by net_kstat_create.
217 * No actual statistical information is stored in this instance of the
218 * ipnet_kstats_t structure.
220 static ipnet_kstats_t stats_template
= {
221 { "duplicationFail", KSTAT_DATA_UINT64
},
222 { "dispatchOk", KSTAT_DATA_UINT64
},
223 { "dispatchFail", KSTAT_DATA_UINT64
},
224 { "dispatchHeaderDrop", KSTAT_DATA_UINT64
},
225 { "dispatchDupDrop", KSTAT_DATA_UINT64
},
226 { "dispatchDeliver", KSTAT_DATA_UINT64
},
227 { "acceptOk", KSTAT_DATA_UINT64
},
228 { "acceptFail", KSTAT_DATA_UINT64
}
232 * Walk the list of physical interfaces on the machine, for each
233 * interface create a new ipnetif_t and add any addresses to it. We
234 * need to do the walk twice, once for IPv4 and once for IPv6.
236 * The interfaces are destroyed as part of ipnet_stack_fini() for each
237 * stack. Note that we cannot do this initialization in
238 * ipnet_stack_init(), since ipnet_stack_init() cannot fail.
243 netstack_handle_t nh
;
248 netstack_next_init(&nh
);
249 while ((ns
= netstack_next(&nh
)) != NULL
) {
250 ips
= ns
->netstack_ipnet
;
251 if ((ret
= ipnet_populate_if(ips
->ips_ndv4
, ips
, B_FALSE
)) == 0)
252 ret
= ipnet_populate_if(ips
->ips_ndv6
, ips
, B_TRUE
);
257 netstack_next_fini(&nh
);
262 * Standard module entry points.
268 boolean_t netstack_registered
= B_FALSE
;
270 if ((ipnet_major
= ddi_name_to_major("ipnet")) == (major_t
)-1)
272 ipnet_minor_space
= id_space_create("ipnet_minor_space",
273 IPNET_MINOR_MIN
, MAXMIN32
);
276 * We call ddi_taskq_create() with nthread == 1 to ensure in-order
277 * delivery of packets to clients. Note that we need to create the
278 * taskqs before calling netstack_register() since ipnet_stack_init()
279 * registers callbacks that use 'em.
281 ipnet_taskq
= ddi_taskq_create(NULL
, "ipnet", 1, TASKQ_DEFAULTPRI
, 0);
282 ipnet_nicevent_taskq
= ddi_taskq_create(NULL
, "ipnet_nic_event_queue",
283 1, TASKQ_DEFAULTPRI
, 0);
284 if (ipnet_taskq
== NULL
|| ipnet_nicevent_taskq
== NULL
) {
289 netstack_register(NS_IPNET
, ipnet_stack_init
, NULL
, ipnet_stack_fini
);
290 netstack_registered
= B_TRUE
;
292 if ((ret
= ipnetif_init()) == 0)
293 ret
= mod_install(&modlinkage
);
296 if (ipnet_taskq
!= NULL
)
297 ddi_taskq_destroy(ipnet_taskq
);
298 if (ipnet_nicevent_taskq
!= NULL
)
299 ddi_taskq_destroy(ipnet_nicevent_taskq
);
300 if (netstack_registered
)
301 netstack_unregister(NS_IPNET
);
302 id_space_destroy(ipnet_minor_space
);
312 if ((err
= mod_remove(&modlinkage
)) != 0)
315 netstack_unregister(NS_IPNET
);
316 ddi_taskq_destroy(ipnet_nicevent_taskq
);
317 ddi_taskq_destroy(ipnet_taskq
);
318 id_space_destroy(ipnet_minor_space
);
323 _info(struct modinfo
*modinfop
)
325 return (mod_info(&modlinkage
, modinfop
));
329 ipnet_register_netihook(ipnet_stack_t
*ips
)
335 HOOK_INIT(ips
->ips_nicevents
, ipnet_nicevent_cb
, "ipnet_nicevents",
339 * It is possible for an exclusive stack to be in the process of
340 * shutting down here, and the netid and protocol lookups could fail
343 zoneid
= netstackid_to_zoneid(ips
->ips_netstack
->netstack_stackid
);
344 if ((netid
= net_zoneidtonetid(zoneid
)) == -1)
347 if ((ips
->ips_ndv4
= net_protocol_lookup(netid
, NHF_INET
)) != NULL
) {
348 if ((ret
= net_hook_register(ips
->ips_ndv4
, NH_NIC_EVENTS
,
349 ips
->ips_nicevents
)) != 0) {
350 VERIFY(net_protocol_release(ips
->ips_ndv4
) == 0);
351 ips
->ips_ndv4
= NULL
;
352 cmn_err(CE_WARN
, "unable to register IPv4 netinfo hooks"
353 " in zone %d: %d", zoneid
, ret
);
356 if ((ips
->ips_ndv6
= net_protocol_lookup(netid
, NHF_INET6
)) != NULL
) {
357 if ((ret
= net_hook_register(ips
->ips_ndv6
, NH_NIC_EVENTS
,
358 ips
->ips_nicevents
)) != 0) {
359 VERIFY(net_protocol_release(ips
->ips_ndv6
) == 0);
360 ips
->ips_ndv6
= NULL
;
361 cmn_err(CE_WARN
, "unable to register IPv6 netinfo hooks"
362 " in zone %d: %d", zoneid
, ret
);
367 * Create a local set of kstats for each zone.
369 ips
->ips_kstatp
= net_kstat_create(netid
, "ipnet", 0, "ipnet_stats",
370 "misc", KSTAT_TYPE_NAMED
,
371 sizeof (ipnet_kstats_t
) / sizeof (kstat_named_t
), 0);
372 if (ips
->ips_kstatp
!= NULL
) {
373 bcopy(&stats_template
, &ips
->ips_stats
,
374 sizeof (ips
->ips_stats
));
375 ips
->ips_kstatp
->ks_data
= &ips
->ips_stats
;
376 ips
->ips_kstatp
->ks_private
=
377 (void *)(uintptr_t)ips
->ips_netstack
->netstack_stackid
;
378 kstat_install(ips
->ips_kstatp
);
380 cmn_err(CE_WARN
, "net_kstat_create(%s,%s,%s) failed",
381 "ipnet", "ipnet_stats", "misc");
386 * This function is called on attach to build an initial view of the
387 * interfaces on the system. It will be called once for IPv4 and once
388 * for IPv6, although there is only one ipnet interface for both IPv4
389 * and IPv6 there are separate address lists.
392 ipnet_populate_if(net_handle_t nd
, ipnet_stack_t
*ips
, boolean_t isv6
)
397 char name
[LIFNAMSIZ
];
398 boolean_t new_if
= B_FALSE
;
403 * If ipnet_register_netihook() was unable to initialize this
404 * stack's net_handle_t, then we cannot populate any interface
405 * information. This usually happens when we attempted to
406 * grab a net_handle_t as a stack was shutting down. We don't
407 * want to fail the entire _init() operation because of a
408 * stack shutdown (other stacks will continue to work just
409 * fine), so we silently return success here.
415 * Make sure we're not processing NIC events during the
416 * population of our interfaces and address lists.
418 mutex_enter(&ips
->ips_event_lock
);
420 for (phyif
= net_phygetnext(nd
, 0); phyif
!= 0;
421 phyif
= net_phygetnext(nd
, phyif
)) {
422 if (net_getifname(nd
, phyif
, name
, LIFNAMSIZ
) != 0)
425 (void) net_getlifflags(nd
, phyif
, 0, &ifflags
);
426 if ((ipnetif
= ipnetif_getby_index(phyif
, ips
)) == NULL
) {
427 ipnetif
= ipnetif_create(name
, phyif
, ips
, ifflags
);
428 if (ipnetif
== NULL
) {
435 isv6
? IPNETIF_IPV6PLUMBED
: IPNETIF_IPV4PLUMBED
;
437 for (lif
= net_lifgetnext(nd
, phyif
, 0); lif
!= 0;
438 lif
= net_lifgetnext(nd
, phyif
, lif
)) {
440 * Skip addresses that aren't up. We'll add
441 * them when we receive an NE_LIF_UP event.
443 if (net_getlifflags(nd
, phyif
, lif
, &ifflags
) != 0 ||
446 /* Don't add it if we already have it. */
447 if (ipnet_match_lif(ipnetif
, lif
, isv6
) != NULL
)
449 ipnet_add_ifaddr(lif
, ipnetif
, nd
);
452 ipnetif_refrele(ipnetif
);
456 mutex_exit(&ips
->ips_event_lock
);
461 ipnet_attach(dev_info_t
*dip
, ddi_attach_cmd_t cmd
)
463 if (cmd
!= DDI_ATTACH
)
464 return (DDI_FAILURE
);
466 if (ddi_create_minor_node(dip
, "lo0", S_IFCHR
, IPNET_MINOR_LO
,
467 DDI_PSEUDO
, 0) == DDI_FAILURE
)
468 return (DDI_FAILURE
);
471 return (DDI_SUCCESS
);
475 ipnet_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
)
477 if (cmd
!= DDI_DETACH
)
478 return (DDI_FAILURE
);
480 ASSERT(dip
== ipnet_dip
);
481 ddi_remove_minor_node(ipnet_dip
, NULL
);
483 return (DDI_SUCCESS
);
488 ipnet_devinfo(dev_info_t
*dip
, ddi_info_cmd_t infocmd
, void *arg
, void **result
)
490 int error
= DDI_FAILURE
;
493 case DDI_INFO_DEVT2INSTANCE
:
497 case DDI_INFO_DEVT2DEVINFO
:
498 if (ipnet_dip
!= NULL
) {
509 ipnet_open(queue_t
*rq
, dev_t
*dev
, int oflag
, int sflag
, cred_t
*crp
)
512 netstack_t
*ns
= NULL
;
515 zoneid_t zoneid
= crgetzoneid(crp
);
518 * If the system is labeled, only the global zone is allowed to open
519 * IP observability nodes.
521 if (is_system_labeled() && zoneid
!= GLOBAL_ZONEID
)
524 /* We don't support open as a module */
528 /* This driver is self-cloning, we don't support re-open. */
529 if (rq
->q_ptr
!= NULL
)
532 if ((ipnet
= kmem_zalloc(sizeof (*ipnet
), KM_NOSLEEP
)) == NULL
)
535 VERIFY((ns
= netstack_find_by_cred(crp
)) != NULL
);
536 ips
= ns
->netstack_ipnet
;
538 rq
->q_ptr
= WR(rq
)->q_ptr
= ipnet
;
539 ipnet
->ipnet_rq
= rq
;
540 ipnet
->ipnet_minor
= (minor_t
)id_alloc(ipnet_minor_space
);
541 ipnet
->ipnet_zoneid
= zoneid
;
542 ipnet
->ipnet_dlstate
= DL_UNBOUND
;
543 ipnet
->ipnet_ns
= ns
;
546 * We need to hold ips_event_lock here as any NE_LIF_DOWN events need
547 * to be processed after ipnet_if is set and the ipnet_t has been
548 * inserted in the ips_str_list.
550 mutex_enter(&ips
->ips_event_lock
);
551 if (getminor(*dev
) == IPNET_MINOR_LO
) {
552 ipnet
->ipnet_flags
|= IPNET_LOMODE
;
553 ipnet
->ipnet_acceptfn
= ipnet_loaccept
;
555 ipnet
->ipnet_acceptfn
= ipnet_accept
;
556 ipnet
->ipnet_if
= ipnetif_getby_dev(*dev
, ips
);
557 if (ipnet
->ipnet_if
== NULL
||
558 !ipnetif_in_zone(ipnet
->ipnet_if
, zoneid
, ips
)) {
564 mutex_enter(&ips
->ips_walkers_lock
);
565 while (ips
->ips_walkers_cnt
!= 0)
566 cv_wait(&ips
->ips_walkers_cv
, &ips
->ips_walkers_lock
);
567 list_insert_head(&ips
->ips_str_list
, ipnet
);
568 *dev
= makedevice(getmajor(*dev
), ipnet
->ipnet_minor
);
572 * Only register our callback if we're the first open client; we call
573 * unregister in close() for the last open client.
575 if (list_head(&ips
->ips_str_list
) == list_tail(&ips
->ips_str_list
))
576 ips
->ips_hook
= ipobs_register_hook(ns
, ipnet_input
);
577 mutex_exit(&ips
->ips_walkers_lock
);
580 mutex_exit(&ips
->ips_event_lock
);
583 id_free(ipnet_minor_space
, ipnet
->ipnet_minor
);
584 if (ipnet
->ipnet_if
!= NULL
)
585 ipnetif_refrele(ipnet
->ipnet_if
);
586 kmem_free(ipnet
, sizeof (*ipnet
));
593 ipnet_close(queue_t
*rq
, int flags __unused
, cred_t
*credp __unused
)
595 ipnet_t
*ipnet
= rq
->q_ptr
;
596 ipnet_stack_t
*ips
= ipnet
->ipnet_ns
->netstack_ipnet
;
598 if (ipnet
->ipnet_flags
& IPNET_PROMISC_PHYS
)
599 ipnet_leave_allmulti(ipnet
->ipnet_if
, ips
);
600 if (ipnet
->ipnet_flags
& IPNET_PROMISC_MULTI
)
601 ipnet_leave_allmulti(ipnet
->ipnet_if
, ips
);
603 mutex_enter(&ips
->ips_walkers_lock
);
604 while (ips
->ips_walkers_cnt
!= 0)
605 cv_wait(&ips
->ips_walkers_cv
, &ips
->ips_walkers_lock
);
609 list_remove(&ips
->ips_str_list
, ipnet
);
610 if (ipnet
->ipnet_if
!= NULL
)
611 ipnetif_refrele(ipnet
->ipnet_if
);
612 id_free(ipnet_minor_space
, ipnet
->ipnet_minor
);
614 if (list_is_empty(&ips
->ips_str_list
)) {
615 ipobs_unregister_hook(ips
->ips_netstack
, ips
->ips_hook
);
616 ips
->ips_hook
= NULL
;
619 kmem_free(ipnet
, sizeof (*ipnet
));
621 mutex_exit(&ips
->ips_walkers_lock
);
622 netstack_rele(ips
->ips_netstack
);
627 ipnet_wput(queue_t
*q
, mblk_t
*mp
)
629 switch (mp
->b_datap
->db_type
) {
631 if (*mp
->b_rptr
& FLUSHW
) {
632 flushq(q
, FLUSHDATA
);
633 *mp
->b_rptr
&= ~FLUSHW
;
635 if (*mp
->b_rptr
& FLUSHR
)
642 ipnet_wputnondata(q
, mp
);
648 ipnet_iocdata(q
, mp
);
658 ipnet_rsrv(queue_t
*q
)
662 while ((mp
= getq(q
)) != NULL
) {
663 ASSERT(DB_TYPE(mp
) == M_DATA
);
675 ipnet_ioctl(queue_t
*q
, mblk_t
*mp
)
677 struct iocblk
*iocp
= (struct iocblk
*)mp
->b_rptr
;
679 switch (iocp
->ioc_cmd
) {
681 miocack(q
, mp
, 0, 0);
684 if (iocp
->ioc_count
== TRANSPARENT
) {
685 mcopyin(mp
, NULL
, sizeof (uint_t
), NULL
);
689 /* We don't support I_STR with DLIOCIPNETINFO. */
692 miocnak(q
, mp
, 0, EINVAL
);
698 ipnet_iocdata(queue_t
*q
, mblk_t
*mp
)
700 struct iocblk
*iocp
= (struct iocblk
*)mp
->b_rptr
;
701 ipnet_t
*ipnet
= q
->q_ptr
;
703 switch (iocp
->ioc_cmd
) {
705 if (*(int *)mp
->b_cont
->b_rptr
== 1)
706 ipnet
->ipnet_flags
|= IPNET_INFO
;
707 else if (*(int *)mp
->b_cont
->b_rptr
== 0)
708 ipnet
->ipnet_flags
&= ~IPNET_INFO
;
711 miocack(q
, mp
, 0, DL_IPNETINFO_VERSION
);
715 miocnak(q
, mp
, 0, EINVAL
);
721 ipnet_wputnondata(queue_t
*q
, mblk_t
*mp
)
723 union DL_primitives
*dlp
= (union DL_primitives
*)mp
->b_rptr
;
724 t_uscalar_t prim
= dlp
->dl_primitive
;
728 ipnet_inforeq(q
, mp
);
731 ipnet_unbindreq(q
, mp
);
734 ipnet_bindreq(q
, mp
);
736 case DL_PROMISCON_REQ
:
737 ipnet_dlpromisconreq(q
, mp
);
739 case DL_PROMISCOFF_REQ
:
740 ipnet_dlpromiscoffreq(q
, mp
);
742 case DL_UNITDATA_REQ
:
744 case DL_PHYS_ADDR_REQ
:
745 case DL_SET_PHYS_ADDR_REQ
:
746 case DL_ENABMULTI_REQ
:
747 case DL_DISABMULTI_REQ
:
749 dlerrorack(q
, mp
, prim
, DL_UNSUPPORTED
, 0);
752 dlerrorack(q
, mp
, prim
, DL_BADPRIM
, 0);
758 ipnet_inforeq(queue_t
*q
, mblk_t
*mp
)
761 size_t size
= sizeof (dl_info_ack_t
) + sizeof (ushort_t
);
763 if (MBLKL(mp
) < DL_INFO_REQ_SIZE
) {
764 dlerrorack(q
, mp
, DL_INFO_REQ
, DL_BADPRIM
, 0);
768 if ((mp
= mexchange(q
, mp
, size
, M_PCPROTO
, DL_INFO_ACK
)) == NULL
)
771 dlip
= (dl_info_ack_t
*)mp
->b_rptr
;
772 *dlip
= ipnet_infoack
;
777 ipnet_bindreq(queue_t
*q
, mblk_t
*mp
)
779 union DL_primitives
*dlp
= (union DL_primitives
*)mp
->b_rptr
;
780 ipnet_t
*ipnet
= q
->q_ptr
;
782 if (MBLKL(mp
) < DL_BIND_REQ_SIZE
) {
783 dlerrorack(q
, mp
, DL_BIND_REQ
, DL_BADPRIM
, 0);
787 switch (dlp
->bind_req
.dl_sap
) {
789 ipnet
->ipnet_family
= AF_UNSPEC
;
792 ipnet
->ipnet_family
= AF_INET
;
795 ipnet
->ipnet_family
= AF_INET6
;
798 dlerrorack(q
, mp
, DL_BIND_REQ
, DL_BADSAP
, 0);
803 ipnet
->ipnet_dlstate
= DL_IDLE
;
804 dlbindack(q
, mp
, dlp
->bind_req
.dl_sap
, 0, 0, 0, 0);
808 ipnet_unbindreq(queue_t
*q
, mblk_t
*mp
)
810 ipnet_t
*ipnet
= q
->q_ptr
;
812 if (MBLKL(mp
) < DL_UNBIND_REQ_SIZE
) {
813 dlerrorack(q
, mp
, DL_UNBIND_REQ
, DL_BADPRIM
, 0);
817 if (ipnet
->ipnet_dlstate
!= DL_IDLE
) {
818 dlerrorack(q
, mp
, DL_UNBIND_REQ
, DL_OUTSTATE
, 0);
820 ipnet
->ipnet_dlstate
= DL_UNBOUND
;
821 ipnet
->ipnet_family
= AF_UNSPEC
;
822 dlokack(q
, mp
, DL_UNBIND_REQ
);
827 ipnet_dlpromisconreq(queue_t
*q
, mblk_t
*mp
)
829 ipnet_t
*ipnet
= q
->q_ptr
;
833 if (MBLKL(mp
) < DL_PROMISCON_REQ_SIZE
) {
834 dlerrorack(q
, mp
, DL_PROMISCON_REQ
, DL_BADPRIM
, 0);
838 if (ipnet
->ipnet_flags
& IPNET_LOMODE
) {
839 dlokack(q
, mp
, DL_PROMISCON_REQ
);
843 level
= ((dl_promiscon_req_t
*)mp
->b_rptr
)->dl_level
;
844 if (level
== DL_PROMISC_PHYS
|| level
== DL_PROMISC_MULTI
) {
845 if ((err
= ipnet_join_allmulti(ipnet
->ipnet_if
,
846 ipnet
->ipnet_ns
->netstack_ipnet
)) != 0) {
847 dlerrorack(q
, mp
, DL_PROMISCON_REQ
, DL_SYSERR
, err
);
853 case DL_PROMISC_PHYS
:
854 ipnet
->ipnet_flags
|= IPNET_PROMISC_PHYS
;
857 ipnet
->ipnet_flags
|= IPNET_PROMISC_SAP
;
859 case DL_PROMISC_MULTI
:
860 ipnet
->ipnet_flags
|= IPNET_PROMISC_MULTI
;
863 dlerrorack(q
, mp
, DL_PROMISCON_REQ
, DL_BADPRIM
, 0);
867 dlokack(q
, mp
, DL_PROMISCON_REQ
);
871 ipnet_dlpromiscoffreq(queue_t
*q
, mblk_t
*mp
)
873 ipnet_t
*ipnet
= q
->q_ptr
;
875 uint16_t orig_ipnet_flags
= ipnet
->ipnet_flags
;
877 if (MBLKL(mp
) < DL_PROMISCOFF_REQ_SIZE
) {
878 dlerrorack(q
, mp
, DL_PROMISCOFF_REQ
, DL_BADPRIM
, 0);
882 if (ipnet
->ipnet_flags
& IPNET_LOMODE
) {
883 dlokack(q
, mp
, DL_PROMISCOFF_REQ
);
887 level
= ((dl_promiscon_req_t
*)mp
->b_rptr
)->dl_level
;
889 case DL_PROMISC_PHYS
:
890 if (ipnet
->ipnet_flags
& IPNET_PROMISC_PHYS
)
891 ipnet
->ipnet_flags
&= ~IPNET_PROMISC_PHYS
;
894 if (ipnet
->ipnet_flags
& IPNET_PROMISC_SAP
)
895 ipnet
->ipnet_flags
&= ~IPNET_PROMISC_SAP
;
897 case DL_PROMISC_MULTI
:
898 if (ipnet
->ipnet_flags
& IPNET_PROMISC_MULTI
)
899 ipnet
->ipnet_flags
&= ~IPNET_PROMISC_MULTI
;
902 dlerrorack(q
, mp
, DL_PROMISCOFF_REQ
, DL_BADPRIM
, 0);
906 if (orig_ipnet_flags
== ipnet
->ipnet_flags
) {
907 dlerrorack(q
, mp
, DL_PROMISCOFF_REQ
, DL_NOTENAB
, 0);
911 if (level
== DL_PROMISC_PHYS
|| level
== DL_PROMISC_MULTI
) {
912 ipnet_leave_allmulti(ipnet
->ipnet_if
,
913 ipnet
->ipnet_ns
->netstack_ipnet
);
916 dlokack(q
, mp
, DL_PROMISCOFF_REQ
);
920 ipnet_join_allmulti(ipnetif_t
*ipnetif
, ipnet_stack_t
*ips
)
923 ip_stack_t
*ipst
= ips
->ips_netstack
->netstack_ip
;
924 uint64_t index
= ipnetif
->if_index
;
926 mutex_enter(&ips
->ips_event_lock
);
927 if (ipnetif
->if_multicnt
== 0) {
928 ASSERT((ipnetif
->if_flags
&
929 (IPNETIF_IPV4ALLMULTI
| IPNETIF_IPV6ALLMULTI
)) == 0);
930 if (ipnetif
->if_flags
& IPNETIF_IPV4PLUMBED
) {
931 err
= ip_join_allmulti(index
, B_FALSE
, ipst
);
934 ipnetif
->if_flags
|= IPNETIF_IPV4ALLMULTI
;
936 if (ipnetif
->if_flags
& IPNETIF_IPV6PLUMBED
) {
937 err
= ip_join_allmulti(index
, B_TRUE
, ipst
);
939 (ipnetif
->if_flags
& IPNETIF_IPV4ALLMULTI
)) {
940 (void) ip_leave_allmulti(index
, B_FALSE
, ipst
);
941 ipnetif
->if_flags
&= ~IPNETIF_IPV4ALLMULTI
;
944 ipnetif
->if_flags
|= IPNETIF_IPV6ALLMULTI
;
947 ipnetif
->if_multicnt
++;
950 mutex_exit(&ips
->ips_event_lock
);
955 ipnet_leave_allmulti(ipnetif_t
*ipnetif
, ipnet_stack_t
*ips
)
958 ip_stack_t
*ipst
= ips
->ips_netstack
->netstack_ip
;
959 uint64_t index
= ipnetif
->if_index
;
961 mutex_enter(&ips
->ips_event_lock
);
962 ASSERT(ipnetif
->if_multicnt
!= 0);
963 if (--ipnetif
->if_multicnt
== 0) {
964 if (ipnetif
->if_flags
& IPNETIF_IPV4ALLMULTI
) {
965 err
= ip_leave_allmulti(index
, B_FALSE
, ipst
);
966 ASSERT(err
== 0 || err
== ENODEV
);
967 ipnetif
->if_flags
&= ~IPNETIF_IPV4ALLMULTI
;
969 if (ipnetif
->if_flags
& IPNETIF_IPV6ALLMULTI
) {
970 err
= ip_leave_allmulti(index
, B_TRUE
, ipst
);
971 ASSERT(err
== 0 || err
== ENODEV
);
972 ipnetif
->if_flags
&= ~IPNETIF_IPV6ALLMULTI
;
975 mutex_exit(&ips
->ips_event_lock
);
979 * Allocate a new mblk_t and put a dl_ipnetinfo_t in it.
980 * The structure it copies the header information from,
981 * hook_pkt_observe_t, is constructed using network byte
982 * order in ipobs_hook(), so there is no conversion here.
985 ipnet_addheader(hook_pkt_observe_t
*hdr
, mblk_t
*mp
)
990 if ((dlhdr
= allocb(sizeof (dl_ipnetinfo_t
), BPRI_HI
)) == NULL
) {
994 dl
= (dl_ipnetinfo_t
*)dlhdr
->b_rptr
;
995 dl
->dli_version
= DL_IPNETINFO_VERSION
;
996 dl
->dli_family
= hdr
->hpo_family
;
997 dl
->dli_htype
= hdr
->hpo_htype
;
998 dl
->dli_pktlen
= hdr
->hpo_pktlen
;
999 dl
->dli_ifindex
= hdr
->hpo_ifindex
;
1000 dl
->dli_grifindex
= hdr
->hpo_grifindex
;
1001 dl
->dli_zsrc
= hdr
->hpo_zsrc
;
1002 dl
->dli_zdst
= hdr
->hpo_zdst
;
1003 dlhdr
->b_wptr
+= sizeof (*dl
);
1009 static ipnet_addrtype_t
1010 ipnet_get_addrtype(ipnet_t
*ipnet
, ipnet_addrp_t
*addr
)
1013 ipnetif_t
*ipnetif
= ipnet
->ipnet_if
;
1014 ipnetif_addr_t
*ifaddr
;
1015 ipnet_addrtype_t addrtype
= IPNETADDR_UNKNOWN
;
1017 /* First check if the address is multicast or limited broadcast. */
1018 switch (addr
->iap_family
) {
1020 if (CLASSD(*(addr
->iap_addr4
)) ||
1021 *(addr
->iap_addr4
) == INADDR_BROADCAST
)
1022 return (IPNETADDR_MBCAST
);
1025 if (IN6_IS_ADDR_MULTICAST(addr
->iap_addr6
))
1026 return (IPNETADDR_MBCAST
);
1031 * Walk the address list to see if the address belongs to our
1032 * interface or is one of our subnet broadcast addresses.
1034 mutex_enter(&ipnetif
->if_addr_lock
);
1035 list
= (addr
->iap_family
== AF_INET
) ?
1036 &ipnetif
->if_ip4addr_list
: &ipnetif
->if_ip6addr_list
;
1037 for (ifaddr
= list_head(list
);
1038 ifaddr
!= NULL
&& addrtype
== IPNETADDR_UNKNOWN
;
1039 ifaddr
= list_next(list
, ifaddr
)) {
1041 * If we're not in the global zone, then only look at
1042 * addresses in our zone.
1044 if (ipnet
->ipnet_zoneid
!= GLOBAL_ZONEID
&&
1045 ipnet
->ipnet_zoneid
!= ifaddr
->ifa_zone
)
1047 switch (addr
->iap_family
) {
1049 if (ifaddr
->ifa_ip4addr
!= INADDR_ANY
&&
1050 *(addr
->iap_addr4
) == ifaddr
->ifa_ip4addr
)
1051 addrtype
= IPNETADDR_MYADDR
;
1052 else if (ifaddr
->ifa_brdaddr
!= INADDR_ANY
&&
1053 *(addr
->iap_addr4
) == ifaddr
->ifa_brdaddr
)
1054 addrtype
= IPNETADDR_MBCAST
;
1057 if (IN6_ARE_ADDR_EQUAL(addr
->iap_addr6
,
1058 &ifaddr
->ifa_ip6addr
))
1059 addrtype
= IPNETADDR_MYADDR
;
1063 mutex_exit(&ipnetif
->if_addr_lock
);
1069 * Verify if the packet contained in hdr should be passed up to the
1070 * ipnet client stream.
1073 ipnet_accept(ipnet_t
*ipnet
, hook_pkt_observe_t
*hdr
, ipnet_addrp_t
*src
,
1077 uint64_t ifindex
= ipnet
->ipnet_if
->if_index
;
1078 ipnet_addrtype_t srctype
;
1079 ipnet_addrtype_t dsttype
;
1081 srctype
= ipnet_get_addrtype(ipnet
, src
);
1082 dsttype
= ipnet_get_addrtype(ipnet
, dst
);
1085 * If the packet's ifindex matches ours, or the packet's group ifindex
1086 * matches ours, it's on the interface we're observing. (Thus,
1087 * observing on the group ifindex matches all ifindexes in the group.)
1089 obsif
= (ntohl(hdr
->hpo_ifindex
) == ifindex
||
1090 ntohl(hdr
->hpo_grifindex
) == ifindex
);
1092 DTRACE_PROBE5(ipnet_accept__addr
,
1093 ipnet_addrtype_t
, srctype
, ipnet_addrp_t
*, src
,
1094 ipnet_addrtype_t
, dsttype
, ipnet_addrp_t
*, dst
,
1098 * Do not allow an ipnet stream to see packets that are not from or to
1099 * its zone. The exception is when zones are using the shared stack
1100 * model. In this case, streams in the global zone have visibility
1101 * into other shared-stack zones, and broadcast and multicast traffic
1102 * is visible by all zones in the stack.
1104 if (ipnet
->ipnet_zoneid
!= GLOBAL_ZONEID
&&
1105 dsttype
!= IPNETADDR_MBCAST
) {
1106 if (ipnet
->ipnet_zoneid
!= ntohl(hdr
->hpo_zsrc
) &&
1107 ipnet
->ipnet_zoneid
!= ntohl(hdr
->hpo_zdst
))
1112 * If DL_PROMISC_SAP isn't enabled, then the bound SAP must match the
1113 * packet's IP version.
1115 if (!(ipnet
->ipnet_flags
& IPNET_PROMISC_SAP
) &&
1116 ipnet
->ipnet_family
!= hdr
->hpo_family
)
1119 /* If the destination address is ours, then accept the packet. */
1120 if (dsttype
== IPNETADDR_MYADDR
)
1124 * If DL_PROMISC_PHYS is enabled, then we can see all packets that are
1125 * sent or received on the interface we're observing, or packets that
1126 * have our source address (this allows us to see packets we send).
1128 if (ipnet
->ipnet_flags
& IPNET_PROMISC_PHYS
) {
1129 if (srctype
== IPNETADDR_MYADDR
|| obsif
)
1134 * We accept multicast and broadcast packets transmitted or received
1135 * on the interface we're observing.
1137 if (dsttype
== IPNETADDR_MBCAST
&& obsif
)
1144 * Verify if the packet contained in hdr should be passed up to the ipnet
1145 * client stream that's in IPNET_LOMODE.
1149 ipnet_loaccept(ipnet_t
*ipnet
, hook_pkt_observe_t
*hdr
, ipnet_addrp_t
*src
,
1152 if (hdr
->hpo_htype
!= htons(IPOBS_HOOK_LOCAL
)) {
1154 * ipnet_if is only NULL for IPNET_MINOR_LO devices.
1156 if (ipnet
->ipnet_if
== NULL
)
1161 * An ipnet stream must not see packets that are not from/to its zone.
1163 if (ipnet
->ipnet_zoneid
!= GLOBAL_ZONEID
) {
1164 if (ipnet
->ipnet_zoneid
!= ntohl(hdr
->hpo_zsrc
) &&
1165 ipnet
->ipnet_zoneid
!= ntohl(hdr
->hpo_zdst
))
1169 return (ipnet
->ipnet_family
== AF_UNSPEC
||
1170 ipnet
->ipnet_family
== hdr
->hpo_family
);
1174 ipnet_dispatch(void *arg
)
1177 hook_pkt_observe_t
*hdr
= (hook_pkt_observe_t
*)mp
->b_rptr
;
1185 ips
= ((netstack_t
*)hdr
->hpo_ctx
)->netstack_ipnet
;
1187 netmp
= hdr
->hpo_pkt
->b_cont
;
1188 src
.iap_family
= hdr
->hpo_family
;
1189 dst
.iap_family
= hdr
->hpo_family
;
1191 if (hdr
->hpo_family
== AF_INET
) {
1192 src
.iap_addr4
= &((ipha_t
*)(netmp
->b_rptr
))->ipha_src
;
1193 dst
.iap_addr4
= &((ipha_t
*)(netmp
->b_rptr
))->ipha_dst
;
1195 src
.iap_addr6
= &((ip6_t
*)(netmp
->b_rptr
))->ip6_src
;
1196 dst
.iap_addr6
= &((ip6_t
*)(netmp
->b_rptr
))->ip6_dst
;
1199 ipnet_walkers_inc(ips
);
1201 list
= &ips
->ips_str_list
;
1202 for (ipnet
= list_head(list
); ipnet
!= NULL
;
1203 ipnet
= list_next(list
, ipnet
)) {
1204 if (!(*ipnet
->ipnet_acceptfn
)(ipnet
, hdr
, &src
, &dst
)) {
1205 IPSK_BUMP(ips
, ik_acceptFail
);
1208 IPSK_BUMP(ips
, ik_acceptOk
);
1210 if (list_next(list
, ipnet
) == NULL
) {
1211 netmp
= hdr
->hpo_pkt
->b_cont
;
1212 hdr
->hpo_pkt
->b_cont
= NULL
;
1214 if ((netmp
= dupmsg(hdr
->hpo_pkt
->b_cont
)) == NULL
&&
1215 (netmp
= copymsg(hdr
->hpo_pkt
->b_cont
)) == NULL
) {
1216 IPSK_BUMP(ips
, ik_duplicationFail
);
1221 if (ipnet
->ipnet_flags
& IPNET_INFO
) {
1222 if ((netmp
= ipnet_addheader(hdr
, netmp
)) == NULL
) {
1223 IPSK_BUMP(ips
, ik_dispatchHeaderDrop
);
1228 if (ipnet
->ipnet_rq
->q_first
== NULL
&&
1229 canputnext(ipnet
->ipnet_rq
)) {
1230 putnext(ipnet
->ipnet_rq
, netmp
);
1231 IPSK_BUMP(ips
, ik_dispatchDeliver
);
1232 } else if (canput(ipnet
->ipnet_rq
)) {
1233 (void) putq(ipnet
->ipnet_rq
, netmp
);
1234 IPSK_BUMP(ips
, ik_dispatchDeliver
);
1237 IPSK_BUMP(ips
, ik_dispatchPutDrop
);
1241 ipnet_walkers_dec(ips
);
1247 ipnet_input(mblk_t
*mp
)
1249 hook_pkt_observe_t
*hdr
= (hook_pkt_observe_t
*)mp
->b_rptr
;
1252 ips
= ((netstack_t
*)hdr
->hpo_ctx
)->netstack_ipnet
;
1254 if (ddi_taskq_dispatch(ipnet_taskq
, ipnet_dispatch
, mp
, DDI_NOSLEEP
) !=
1256 IPSK_BUMP(ips
, ik_dispatchFail
);
1259 IPSK_BUMP(ips
, ik_dispatchOk
);
1264 ipnet_alloc_if(ipnet_stack_t
*ips
)
1268 if ((ipnetif
= kmem_zalloc(sizeof (*ipnetif
), KM_NOSLEEP
)) == NULL
)
1271 mutex_init(&ipnetif
->if_addr_lock
, NULL
, MUTEX_DEFAULT
, 0);
1272 list_create(&ipnetif
->if_ip4addr_list
, sizeof (ipnetif_addr_t
),
1273 offsetof(ipnetif_addr_t
, ifa_link
));
1274 list_create(&ipnetif
->if_ip6addr_list
, sizeof (ipnetif_addr_t
),
1275 offsetof(ipnetif_addr_t
, ifa_link
));
1276 mutex_init(&ipnetif
->if_reflock
, NULL
, MUTEX_DEFAULT
, 0);
1278 ipnetif
->if_stackp
= ips
;
1284 * Create a new ipnetif_t and new minor node for it. If creation is
1285 * successful the new ipnetif_t is inserted into an avl_tree
1286 * containing ipnetif's for this stack instance.
1289 ipnetif_create(const char *name
, uint64_t index
, ipnet_stack_t
*ips
,
1293 avl_index_t where
= 0;
1297 * Because ipnetif_create() can be called from a NIC event
1298 * callback, it should not block.
1300 ifminor
= (minor_t
)id_alloc_nosleep(ipnet_minor_space
);
1301 if (ifminor
== (minor_t
)-1)
1303 if ((ipnetif
= ipnet_alloc_if(ips
)) == NULL
) {
1304 id_free(ipnet_minor_space
, ifminor
);
1308 (void) strlcpy(ipnetif
->if_name
, name
, LIFNAMSIZ
);
1309 ipnetif
->if_index
= (uint_t
)index
;
1310 ipnetif
->if_zoneid
= netstack_get_zoneid(ips
->ips_netstack
);
1311 ipnetif
->if_dev
= makedevice(ipnet_major
, ifminor
);
1313 ipnetif
->if_refcnt
= 1;
1314 if ((ifflags
& IFF_LOOPBACK
) != 0)
1315 ipnetif
->if_flags
= IPNETIF_LOOPBACK
;
1317 mutex_enter(&ips
->ips_avl_lock
);
1318 VERIFY(avl_find(&ips
->ips_avl_by_index
, &index
, &where
) == NULL
);
1319 avl_insert(&ips
->ips_avl_by_index
, ipnetif
, where
);
1320 VERIFY(avl_find(&ips
->ips_avl_by_name
, (void *)name
, &where
) == NULL
);
1321 avl_insert(&ips
->ips_avl_by_name
, ipnetif
, where
);
1322 mutex_exit(&ips
->ips_avl_lock
);
1328 ipnetif_remove(ipnetif_t
*ipnetif
, ipnet_stack_t
*ips
)
1332 ipnet_walkers_inc(ips
);
1333 /* Send a SIGHUP to all open streams associated with this ipnetif. */
1334 for (ipnet
= list_head(&ips
->ips_str_list
); ipnet
!= NULL
;
1335 ipnet
= list_next(&ips
->ips_str_list
, ipnet
)) {
1336 if (ipnet
->ipnet_if
== ipnetif
)
1337 (void) putnextctl(ipnet
->ipnet_rq
, M_HANGUP
);
1339 ipnet_walkers_dec(ips
);
1340 mutex_enter(&ips
->ips_avl_lock
);
1341 avl_remove(&ips
->ips_avl_by_index
, ipnetif
);
1342 avl_remove(&ips
->ips_avl_by_name
, ipnetif
);
1343 mutex_exit(&ips
->ips_avl_lock
);
1345 * Release the reference we implicitly held in ipnetif_create().
1347 ipnetif_refrele(ipnetif
);
1351 ipnet_purge_addrlist(list_t
*addrlist
)
1353 ipnetif_addr_t
*ifa
;
1355 while ((ifa
= list_head(addrlist
)) != NULL
) {
1356 list_remove(addrlist
, ifa
);
1357 if (ifa
->ifa_shared
!= NULL
)
1358 ipnetif_clone_release(ifa
->ifa_shared
);
1359 kmem_free(ifa
, sizeof (*ifa
));
1364 ipnetif_free(ipnetif_t
*ipnetif
)
1366 ASSERT(ipnetif
->if_refcnt
== 0);
1367 ASSERT(ipnetif
->if_sharecnt
== 0);
1369 /* Remove IPv4/v6 address lists from the ipnetif */
1370 ipnet_purge_addrlist(&ipnetif
->if_ip4addr_list
);
1371 list_destroy(&ipnetif
->if_ip4addr_list
);
1372 ipnet_purge_addrlist(&ipnetif
->if_ip6addr_list
);
1373 list_destroy(&ipnetif
->if_ip6addr_list
);
1374 mutex_destroy(&ipnetif
->if_addr_lock
);
1375 mutex_destroy(&ipnetif
->if_reflock
);
1376 if (ipnetif
->if_dev
!= 0)
1377 id_free(ipnet_minor_space
, getminor(ipnetif
->if_dev
));
1378 kmem_free(ipnetif
, sizeof (*ipnetif
));
1382 * Create an ipnetif_addr_t with the given logical interface id (lif)
1383 * and add it to the supplied ipnetif. The lif is the netinfo
1384 * representation of logical interface id, and we use this id to match
1385 * incoming netinfo events against our lists of addresses.
1388 ipnet_add_ifaddr(uint64_t lif
, ipnetif_t
*ipnetif
, net_handle_t nd
)
1390 ipnetif_addr_t
*ifaddr
;
1392 struct sockaddr_in bcast
;
1393 struct sockaddr_storage addr
;
1394 net_ifaddr_t type
= NA_ADDRESS
;
1395 uint64_t phyif
= ipnetif
->if_index
;
1397 if (net_getlifaddr(nd
, phyif
, lif
, 1, &type
, &addr
) != 0 ||
1398 net_getlifzone(nd
, phyif
, lif
, &zoneid
) != 0)
1401 if ((ifaddr
= kmem_alloc(sizeof (*ifaddr
), KM_NOSLEEP
)) == NULL
)
1403 ifaddr
->ifa_zone
= zoneid
;
1404 ifaddr
->ifa_id
= lif
;
1405 ifaddr
->ifa_shared
= NULL
;
1407 switch (addr
.ss_family
) {
1409 ifaddr
->ifa_ip4addr
=
1410 ((struct sockaddr_in
*)&addr
)->sin_addr
.s_addr
;
1412 * Try and get the broadcast address. Note that it's okay for
1413 * an interface to not have a broadcast address, so we don't
1414 * fail the entire operation if net_getlifaddr() fails here.
1416 type
= NA_BROADCAST
;
1417 if (net_getlifaddr(nd
, phyif
, lif
, 1, &type
, &bcast
) == 0)
1418 ifaddr
->ifa_brdaddr
= bcast
.sin_addr
.s_addr
;
1421 ifaddr
->ifa_ip6addr
= ((struct sockaddr_in6
*)&addr
)->sin6_addr
;
1426 * The zoneid stored in ipnetif_t needs to correspond to the actual
1427 * zone the address is being used in. This facilitates finding the
1428 * correct netstack_t pointer, amongst other things, later.
1430 if (zoneid
== ALL_ZONES
)
1431 zoneid
= GLOBAL_ZONEID
;
1433 mutex_enter(&ipnetif
->if_addr_lock
);
1434 if (zoneid
!= ipnetif
->if_zoneid
) {
1437 ifp2
= ipnetif_clone_create(ipnetif
, zoneid
);
1438 ifaddr
->ifa_shared
= ifp2
;
1440 list_insert_tail(addr
.ss_family
== AF_INET
?
1441 &ipnetif
->if_ip4addr_list
: &ipnetif
->if_ip6addr_list
, ifaddr
);
1442 mutex_exit(&ipnetif
->if_addr_lock
);
1446 ipnet_delete_ifaddr(ipnetif_addr_t
*ifaddr
, ipnetif_t
*ipnetif
, boolean_t isv6
)
1448 mutex_enter(&ipnetif
->if_addr_lock
);
1449 if (ifaddr
->ifa_shared
!= NULL
)
1450 ipnetif_clone_release(ifaddr
->ifa_shared
);
1453 &ipnetif
->if_ip6addr_list
: &ipnetif
->if_ip4addr_list
, ifaddr
);
1454 mutex_exit(&ipnetif
->if_addr_lock
);
1455 kmem_free(ifaddr
, sizeof (*ifaddr
));
1459 ipnet_plumb_ev(ipnet_nicevent_t
*ipne
, ipnet_stack_t
*ips
, boolean_t isv6
)
1462 boolean_t refrele_needed
= B_TRUE
;
1468 ifname
= ipne
->ipne_ifname
;
1469 ifindex
= ipne
->ipne_ifindex
;
1471 (void) net_getlifflags(ipne
->ipne_protocol
, ifindex
, 0, &ifflags
);
1473 if ((ipnetif
= ipnetif_getby_index(ifindex
, ips
)) == NULL
) {
1474 ipnetif
= ipnetif_create(ifname
, ifindex
, ips
, ifflags
);
1475 refrele_needed
= B_FALSE
;
1477 if (ipnetif
!= NULL
) {
1478 ipnetif
->if_flags
|=
1479 isv6
? IPNETIF_IPV6PLUMBED
: IPNETIF_IPV4PLUMBED
;
1482 if (ipnetif
->if_multicnt
!= 0) {
1483 if (ip_join_allmulti(ifindex
, isv6
,
1484 ips
->ips_netstack
->netstack_ip
) == 0) {
1485 ipnetif
->if_flags
|=
1486 isv6
? IPNETIF_IPV6ALLMULTI
: IPNETIF_IPV4ALLMULTI
;
1491 ipnetif_refrele(ipnetif
);
1495 ipnet_unplumb_ev(uint64_t ifindex
, ipnet_stack_t
*ips
, boolean_t isv6
)
1499 if ((ipnetif
= ipnetif_getby_index(ifindex
, ips
)) == NULL
)
1502 mutex_enter(&ipnetif
->if_addr_lock
);
1503 ipnet_purge_addrlist(isv6
?
1504 &ipnetif
->if_ip6addr_list
: &ipnetif
->if_ip4addr_list
);
1505 mutex_exit(&ipnetif
->if_addr_lock
);
1508 * Note that we have one ipnetif for both IPv4 and IPv6, but we receive
1509 * separate NE_UNPLUMB events for IPv4 and IPv6. We remove the ipnetif
1510 * if both IPv4 and IPv6 interfaces have been unplumbed.
1512 ipnetif
->if_flags
&= isv6
? ~IPNETIF_IPV6PLUMBED
: ~IPNETIF_IPV4PLUMBED
;
1513 if (!(ipnetif
->if_flags
& (IPNETIF_IPV4PLUMBED
| IPNETIF_IPV6PLUMBED
)))
1514 ipnetif_remove(ipnetif
, ips
);
1515 ipnetif_refrele(ipnetif
);
1519 ipnet_lifup_ev(uint64_t ifindex
, uint64_t lifindex
, net_handle_t nd
,
1520 ipnet_stack_t
*ips
, boolean_t isv6
)
1523 ipnetif_addr_t
*ifaddr
;
1525 if ((ipnetif
= ipnetif_getby_index(ifindex
, ips
)) == NULL
)
1527 if ((ifaddr
= ipnet_match_lif(ipnetif
, lifindex
, isv6
)) != NULL
) {
1529 * We must have missed a NE_LIF_DOWN event. Delete this
1530 * ifaddr and re-create it.
1532 ipnet_delete_ifaddr(ifaddr
, ipnetif
, isv6
);
1535 ipnet_add_ifaddr(lifindex
, ipnetif
, nd
);
1536 ipnetif_refrele(ipnetif
);
1540 ipnet_lifdown_ev(uint64_t ifindex
, uint64_t lifindex
, ipnet_stack_t
*ips
,
1544 ipnetif_addr_t
*ifaddr
;
1546 if ((ipnetif
= ipnetif_getby_index(ifindex
, ips
)) == NULL
)
1548 if ((ifaddr
= ipnet_match_lif(ipnetif
, lifindex
, isv6
)) != NULL
)
1549 ipnet_delete_ifaddr(ifaddr
, ipnetif
, isv6
);
1550 ipnetif_refrele(ipnetif
);
1552 * Make sure that open streams on this ipnetif are still allowed to
1555 ipnetif_zonecheck(ipnetif
, ips
);
1559 * This callback from the NIC event framework dispatches a taskq as the event
1560 * handlers may block.
1564 ipnet_nicevent_cb(hook_event_token_t token
, hook_data_t info
, void *arg
)
1566 ipnet_stack_t
*ips
= arg
;
1567 hook_nic_event_t
*hn
= (hook_nic_event_t
*)info
;
1568 ipnet_nicevent_t
*ipne
;
1570 if ((ipne
= kmem_alloc(sizeof (ipnet_nicevent_t
), KM_NOSLEEP
)) == NULL
)
1572 ipne
->ipne_event
= hn
->hne_event
;
1573 ipne
->ipne_protocol
= hn
->hne_protocol
;
1574 ipne
->ipne_stackid
= ips
->ips_netstack
->netstack_stackid
;
1575 ipne
->ipne_ifindex
= hn
->hne_nic
;
1576 ipne
->ipne_lifindex
= hn
->hne_lif
;
1577 if (hn
->hne_datalen
!= 0) {
1578 (void) strlcpy(ipne
->ipne_ifname
, hn
->hne_data
,
1579 sizeof (ipne
->ipne_ifname
));
1581 (void) ddi_taskq_dispatch(ipnet_nicevent_taskq
, ipnet_nicevent_task
,
1587 ipnet_nicevent_task(void *arg
)
1589 ipnet_nicevent_t
*ipne
= arg
;
1594 if ((ns
= netstack_find_by_stackid(ipne
->ipne_stackid
)) == NULL
)
1596 ips
= ns
->netstack_ipnet
;
1597 isv6
= (ipne
->ipne_protocol
== ips
->ips_ndv6
);
1599 mutex_enter(&ips
->ips_event_lock
);
1600 switch (ipne
->ipne_event
) {
1602 ipnet_plumb_ev(ipne
, ips
, isv6
);
1605 ipnet_unplumb_ev(ipne
->ipne_ifindex
, ips
, isv6
);
1608 ipnet_lifup_ev(ipne
->ipne_ifindex
, ipne
->ipne_lifindex
,
1609 ipne
->ipne_protocol
, ips
, isv6
);
1612 ipnet_lifdown_ev(ipne
->ipne_ifindex
, ipne
->ipne_lifindex
, ips
,
1618 mutex_exit(&ips
->ips_event_lock
);
1622 kmem_free(ipne
, sizeof (ipnet_nicevent_t
));
1626 ipnet_if_getdev(char *name
, zoneid_t zoneid
)
1631 dev_t dev
= (dev_t
)-1;
1633 if (is_system_labeled() && zoneid
!= GLOBAL_ZONEID
)
1635 if ((ns
= netstack_find_by_zoneid(zoneid
)) == NULL
)
1638 ips
= ns
->netstack_ipnet
;
1639 mutex_enter(&ips
->ips_avl_lock
);
1640 if ((ipnetif
= avl_find(&ips
->ips_avl_by_name
, name
, NULL
)) != NULL
) {
1641 if (ipnetif_in_zone(ipnetif
, zoneid
, ips
))
1642 dev
= ipnetif
->if_dev
;
1644 mutex_exit(&ips
->ips_avl_lock
);
1651 ipnetif_getby_index(uint64_t id
, ipnet_stack_t
*ips
)
1655 mutex_enter(&ips
->ips_avl_lock
);
1656 if ((ipnetif
= avl_find(&ips
->ips_avl_by_index
, &id
, NULL
)) != NULL
)
1657 ipnetif_refhold(ipnetif
);
1658 mutex_exit(&ips
->ips_avl_lock
);
1663 ipnetif_getby_dev(dev_t dev
, ipnet_stack_t
*ips
)
1668 mutex_enter(&ips
->ips_avl_lock
);
1669 tree
= &ips
->ips_avl_by_index
;
1670 for (ipnetif
= avl_first(tree
); ipnetif
!= NULL
;
1671 ipnetif
= avl_walk(tree
, ipnetif
, AVL_AFTER
)) {
1672 if (ipnetif
->if_dev
== dev
) {
1673 ipnetif_refhold(ipnetif
);
1677 mutex_exit(&ips
->ips_avl_lock
);
1681 static ipnetif_addr_t
*
1682 ipnet_match_lif(ipnetif_t
*ipnetif
, lif_if_t lid
, boolean_t isv6
)
1684 ipnetif_addr_t
*ifaddr
;
1687 mutex_enter(&ipnetif
->if_addr_lock
);
1688 list
= isv6
? &ipnetif
->if_ip6addr_list
: &ipnetif
->if_ip4addr_list
;
1689 for (ifaddr
= list_head(list
); ifaddr
!= NULL
;
1690 ifaddr
= list_next(list
, ifaddr
)) {
1691 if (lid
== ifaddr
->ifa_id
)
1694 mutex_exit(&ipnetif
->if_addr_lock
);
1700 ipnet_stack_init(netstackid_t stackid
, netstack_t
*ns
)
1704 ips
= kmem_zalloc(sizeof (*ips
), KM_SLEEP
);
1705 ips
->ips_netstack
= ns
;
1706 mutex_init(&ips
->ips_avl_lock
, NULL
, MUTEX_DEFAULT
, 0);
1707 avl_create(&ips
->ips_avl_by_index
, ipnetif_compare_index
,
1708 sizeof (ipnetif_t
), offsetof(ipnetif_t
, if_avl_by_index
));
1709 avl_create(&ips
->ips_avl_by_name
, ipnetif_compare_name
,
1710 sizeof (ipnetif_t
), offsetof(ipnetif_t
, if_avl_by_name
));
1711 avl_create(&ips
->ips_avl_by_shared
, ipnetif_compare_name_zone
,
1712 sizeof (ipnetif_t
), offsetof(ipnetif_t
, if_avl_by_shared
));
1713 mutex_init(&ips
->ips_walkers_lock
, NULL
, MUTEX_DEFAULT
, NULL
);
1714 cv_init(&ips
->ips_walkers_cv
, NULL
, CV_DRIVER
, NULL
);
1715 list_create(&ips
->ips_str_list
, sizeof (ipnet_t
),
1716 offsetof(ipnet_t
, ipnet_next
));
1717 ipnet_register_netihook(ips
);
1723 ipnet_stack_fini(netstackid_t stackid
, void *arg
)
1725 ipnet_stack_t
*ips
= arg
;
1726 ipnetif_t
*ipnetif
, *nipnetif
;
1728 if (ips
->ips_kstatp
!= NULL
) {
1731 zoneid
= netstackid_to_zoneid(stackid
);
1732 net_kstat_delete(net_zoneidtonetid(zoneid
), ips
->ips_kstatp
);
1734 if (ips
->ips_ndv4
!= NULL
) {
1735 VERIFY(net_hook_unregister(ips
->ips_ndv4
, NH_NIC_EVENTS
,
1736 ips
->ips_nicevents
) == 0);
1737 VERIFY(net_protocol_release(ips
->ips_ndv4
) == 0);
1739 if (ips
->ips_ndv6
!= NULL
) {
1740 VERIFY(net_hook_unregister(ips
->ips_ndv6
, NH_NIC_EVENTS
,
1741 ips
->ips_nicevents
) == 0);
1742 VERIFY(net_protocol_release(ips
->ips_ndv6
) == 0);
1744 hook_free(ips
->ips_nicevents
);
1746 for (ipnetif
= avl_first(&ips
->ips_avl_by_index
); ipnetif
!= NULL
;
1747 ipnetif
= nipnetif
) {
1748 nipnetif
= AVL_NEXT(&ips
->ips_avl_by_index
, ipnetif
);
1749 ipnetif_remove(ipnetif
, ips
);
1751 avl_destroy(&ips
->ips_avl_by_shared
);
1752 avl_destroy(&ips
->ips_avl_by_index
);
1753 avl_destroy(&ips
->ips_avl_by_name
);
1754 mutex_destroy(&ips
->ips_avl_lock
);
1755 mutex_destroy(&ips
->ips_walkers_lock
);
1756 cv_destroy(&ips
->ips_walkers_cv
);
1757 list_destroy(&ips
->ips_str_list
);
1758 kmem_free(ips
, sizeof (*ips
));
1761 /* Do any of the addresses in addrlist belong the supplied zoneid? */
1763 ipnet_addrs_in_zone(list_t
*addrlist
, zoneid_t zoneid
)
1765 ipnetif_addr_t
*ifa
;
1767 for (ifa
= list_head(addrlist
); ifa
!= NULL
;
1768 ifa
= list_next(addrlist
, ifa
)) {
1769 if (ifa
->ifa_zone
== zoneid
)
1775 /* Should the supplied ipnetif be visible from the supplied zoneid? */
1777 ipnetif_in_zone(ipnetif_t
*ipnetif
, zoneid_t zoneid
, ipnet_stack_t
*ips
)
1782 * The global zone has visibility into all interfaces in the global
1783 * stack, and exclusive stack zones have visibility into all
1784 * interfaces in their stack.
1786 if (zoneid
== GLOBAL_ZONEID
||
1787 ips
->ips_netstack
->netstack_stackid
!= GLOBAL_NETSTACKID
)
1791 * Shared-stack zones only have visibility for interfaces that have
1792 * addresses in their zone.
1794 mutex_enter(&ipnetif
->if_addr_lock
);
1795 ret
= ipnet_addrs_in_zone(&ipnetif
->if_ip4addr_list
, zoneid
) ||
1796 ipnet_addrs_in_zone(&ipnetif
->if_ip6addr_list
, zoneid
);
1797 mutex_exit(&ipnetif
->if_addr_lock
);
1802 * Verify that any ipnet_t that has a reference to the supplied ipnetif should
1803 * still be allowed to have it open. A given ipnet_t may no longer be allowed
1804 * to have an ipnetif open if there are no longer any addresses that belong to
1805 * the ipnetif in the ipnet_t's non-global shared-stack zoneid. If that's the
1806 * case, send the ipnet_t an M_HANGUP.
1809 ipnetif_zonecheck(ipnetif_t
*ipnetif
, ipnet_stack_t
*ips
)
1811 list_t
*strlist
= &ips
->ips_str_list
;
1814 ipnet_walkers_inc(ips
);
1815 for (ipnet
= list_head(strlist
); ipnet
!= NULL
;
1816 ipnet
= list_next(strlist
, ipnet
)) {
1817 if (ipnet
->ipnet_if
!= ipnetif
)
1819 if (!ipnetif_in_zone(ipnetif
, ipnet
->ipnet_zoneid
, ips
))
1820 (void) putnextctl(ipnet
->ipnet_rq
, M_HANGUP
);
1822 ipnet_walkers_dec(ips
);
1826 ipnet_walk_if(ipnet_walkfunc_t
*cb
, void *arg
, zoneid_t zoneid
)
1830 ipnetif_cbdata_t
*cbnode
;
1835 * On labeled systems, non-global zones shouldn't see anything
1838 if (is_system_labeled() && zoneid
!= GLOBAL_ZONEID
)
1841 if ((ns
= netstack_find_by_zoneid(zoneid
)) == NULL
)
1844 ips
= ns
->netstack_ipnet
;
1845 list_create(&cbdata
, sizeof (ipnetif_cbdata_t
),
1846 offsetof(ipnetif_cbdata_t
, ic_next
));
1848 mutex_enter(&ips
->ips_avl_lock
);
1849 for (ipnetif
= avl_first(&ips
->ips_avl_by_index
); ipnetif
!= NULL
;
1850 ipnetif
= avl_walk(&ips
->ips_avl_by_index
, ipnetif
, AVL_AFTER
)) {
1851 if (!ipnetif_in_zone(ipnetif
, zoneid
, ips
))
1853 cbnode
= kmem_zalloc(sizeof (ipnetif_cbdata_t
), KM_SLEEP
);
1854 (void) strlcpy(cbnode
->ic_ifname
, ipnetif
->if_name
, LIFNAMSIZ
);
1855 cbnode
->ic_dev
= ipnetif
->if_dev
;
1856 list_insert_head(&cbdata
, cbnode
);
1858 mutex_exit(&ips
->ips_avl_lock
);
1860 while ((cbnode
= list_head(&cbdata
)) != NULL
) {
1861 cb(cbnode
->ic_ifname
, arg
, cbnode
->ic_dev
);
1862 list_remove(&cbdata
, cbnode
);
1863 kmem_free(cbnode
, sizeof (ipnetif_cbdata_t
));
1865 list_destroy(&cbdata
);
1870 ipnetif_compare_index(const void *index_ptr
, const void *ipnetifp
)
1872 int64_t index1
= *((int64_t *)index_ptr
);
1873 int64_t index2
= (int64_t)((ipnetif_t
*)ipnetifp
)->if_index
;
1875 return (SIGNOF(index2
- index1
));
1879 ipnetif_compare_name(const void *name_ptr
, const void *ipnetifp
)
1883 res
= strcmp(((ipnetif_t
*)ipnetifp
)->if_name
, name_ptr
);
1884 return (SIGNOF(res
));
1888 ipnetif_compare_name_zone(const void *key_ptr
, const void *ipnetifp
)
1890 const uintptr_t *ptr
= key_ptr
;
1891 const ipnetif_t
*ifp
;
1895 res
= ifp
->if_zoneid
- ptr
[0];
1897 return (SIGNOF(res
));
1898 res
= strcmp(ifp
->if_name
, (char *)ptr
[1]);
1899 return (SIGNOF(res
));
1903 ipnetif_refhold(ipnetif_t
*ipnetif
)
1905 mutex_enter(&ipnetif
->if_reflock
);
1906 ipnetif
->if_refcnt
++;
1907 mutex_exit(&ipnetif
->if_reflock
);
1911 ipnetif_refrele(ipnetif_t
*ipnetif
)
1913 mutex_enter(&ipnetif
->if_reflock
);
1914 ASSERT(ipnetif
->if_refcnt
> 0);
1915 if (--ipnetif
->if_refcnt
== 0)
1916 ipnetif_free(ipnetif
);
1918 mutex_exit(&ipnetif
->if_reflock
);
1922 ipnet_walkers_inc(ipnet_stack_t
*ips
)
1924 mutex_enter(&ips
->ips_walkers_lock
);
1925 ips
->ips_walkers_cnt
++;
1926 mutex_exit(&ips
->ips_walkers_lock
);
1930 ipnet_walkers_dec(ipnet_stack_t
*ips
)
1932 mutex_enter(&ips
->ips_walkers_lock
);
1933 ASSERT(ips
->ips_walkers_cnt
!= 0);
1934 if (--ips
->ips_walkers_cnt
== 0)
1935 cv_broadcast(&ips
->ips_walkers_cv
);
1936 mutex_exit(&ips
->ips_walkers_lock
);
1941 ipobs_bounce_func(hook_event_token_t token
, hook_data_t info
, void *arg
)
1943 hook_pkt_observe_t
*hdr
;
1944 pfv_t func
= (pfv_t
)arg
;
1947 hdr
= (hook_pkt_observe_t
*)info
;
1949 * Code in ip_input() expects that it is the only one accessing the
1952 mp
= copymsg(hdr
->hpo_pkt
);
1954 netstack_t
*ns
= hdr
->hpo_ctx
;
1955 ipnet_stack_t
*ips
= ns
->netstack_ipnet
;
1957 IPSK_BUMP(ips
, ik_dispatchDupDrop
);
1961 hdr
= (hook_pkt_observe_t
*)mp
->b_rptr
;
1970 ipobs_register_hook(netstack_t
*ns
, pfv_t func
)
1972 ip_stack_t
*ipst
= ns
->netstack_ip
;
1976 HOOK_INIT(hook
, ipobs_bounce_func
, "", (void *)func
);
1977 VERIFY(hook
!= NULL
);
1980 * To register multiple hooks with the same callback function,
1981 * a unique name is needed.
1983 (void) snprintf(name
, sizeof (name
), "ipobserve_%p", (void *)hook
);
1984 hook
->h_name
= strdup(name
);
1986 (void) net_hook_register(ipst
->ips_ip4_observe_pr
, NH_OBSERVE
, hook
);
1987 (void) net_hook_register(ipst
->ips_ip6_observe_pr
, NH_OBSERVE
, hook
);
1993 ipobs_unregister_hook(netstack_t
*ns
, hook_t
*hook
)
1995 ip_stack_t
*ipst
= ns
->netstack_ip
;
1997 (void) net_hook_unregister(ipst
->ips_ip4_observe_pr
, NH_OBSERVE
, hook
);
1999 (void) net_hook_unregister(ipst
->ips_ip6_observe_pr
, NH_OBSERVE
, hook
);
2001 strfree(hook
->h_name
);
2006 /* ******************************************************************** */
2007 /* BPF Functions below */
2008 /* ******************************************************************** */
2011 * Convenience function to make mapping a zoneid to an ipnet_stack_t easy.
2014 ipnet_find_by_zoneid(zoneid_t zoneid
)
2018 VERIFY((ns
= netstack_find_by_zoneid(zoneid
)) != NULL
);
2019 return (ns
->netstack_ipnet
);
2023 * Functions, such as the above ipnet_find_by_zoneid(), will return a
2024 * pointer to ipnet_stack_t by calling a netstack lookup function.
2025 * The netstack_find_*() functions return a pointer after doing a "hold"
2026 * on the data structure and thereby require a "release" when the caller
2027 * is finished with it. We need to mirror that API here and thus a caller
2028 * of ipnet_find_by_zoneid() is required to call ipnet_rele().
2031 ipnet_rele(ipnet_stack_t
*ips
)
2033 netstack_rele(ips
->ips_netstack
);
2039 ipnet_set_itap(bpf_itap_fn_t tapfunc
)
2041 ipnet_itap
= tapfunc
;
2045 * The list of interfaces available via ipnet is private for each zone,
2046 * so the AVL tree of each zone must be searched for a given name, even
2047 * if all names are unique.
2050 ipnet_open_byname(const char *name
, ipnetif_t
**ptr
, zoneid_t zoneid
)
2055 ASSERT(ptr
!= NULL
);
2056 VERIFY((ips
= ipnet_find_by_zoneid(zoneid
)) != NULL
);
2058 mutex_enter(&ips
->ips_avl_lock
);
2061 * Shared instance zone?
2063 if (netstackid_to_zoneid(zoneid_to_netstackid(zoneid
)) != zoneid
) {
2064 uintptr_t key
[2] = { zoneid
, (uintptr_t)name
};
2066 ipnetif
= avl_find(&ips
->ips_avl_by_shared
, (void *)key
, NULL
);
2068 ipnetif
= avl_find(&ips
->ips_avl_by_name
, (void *)name
, NULL
);
2070 if (ipnetif
!= NULL
)
2071 ipnetif_refhold(ipnetif
);
2072 mutex_exit(&ips
->ips_avl_lock
);
2077 if (ipnetif
== NULL
)
2083 ipnet_close_byhandle(ipnetif_t
*ifp
)
2085 ASSERT(ifp
!= NULL
);
2086 ipnetif_refrele(ifp
);
2090 ipnet_name(ipnetif_t
*ifp
)
2092 ASSERT(ifp
!= NULL
);
2093 return (ifp
->if_name
);
2097 * To find the linkid for a given name, it is necessary to know which zone
2098 * the interface name belongs to and to search the avl tree for that zone
2099 * as there is no master list of all interfaces and which zone they belong
2100 * to. It is assumed that the caller of this function is somehow already
2101 * working with the ipnet interfaces and hence the ips_event_lock is held.
2102 * When BPF calls into this function, it is doing so because of an event
2103 * in ipnet, and thus ipnet holds the ips_event_lock. Thus the datalink id
2104 * value returned has meaning without the need for grabbing a hold on the
2108 ipnet_get_linkid_byname(const char *name
, uint_t
*idp
, zoneid_t zoneid
)
2113 VERIFY((ips
= ipnet_find_by_zoneid(zoneid
)) != NULL
);
2114 ASSERT(mutex_owned(&ips
->ips_event_lock
));
2116 mutex_enter(&ips
->ips_avl_lock
);
2117 ifp
= avl_find(&ips
->ips_avl_by_name
, (void *)name
, NULL
);
2119 *idp
= (uint_t
)ifp
->if_index
;
2122 * Shared instance zone?
2124 if (netstackid_to_zoneid(zoneid_to_netstackid(zoneid
)) != zoneid
) {
2125 uintptr_t key
[2] = { zoneid
, (uintptr_t)name
};
2127 ifp
= avl_find(&ips
->ips_avl_by_shared
, (void *)key
, NULL
);
2129 *idp
= (uint_t
)ifp
->if_index
;
2132 mutex_exit(&ips
->ips_avl_lock
);
2141 * Strictly speaking, there is no such thing as a "client" in ipnet, like
2142 * there is in mac. BPF only needs to have this because it is required as
2143 * part of interfacing correctly with mac. The reuse of the original
2144 * ipnetif_t as a client poses no danger, so long as it is done with its
2145 * own ref-count'd hold that is given up on close.
2148 ipnet_client_open(ipnetif_t
*ptr
, ipnetif_t
**result
)
2150 ASSERT(ptr
!= NULL
);
2151 ASSERT(result
!= NULL
);
2152 ipnetif_refhold(ptr
);
2159 ipnet_client_close(ipnetif_t
*ptr
)
2161 ASSERT(ptr
!= NULL
);
2162 ipnetif_refrele(ptr
);
2166 * This is called from BPF when it needs to start receiving packets
2169 * The use of the ipnet_t structure here is somewhat lightweight when
2170 * compared to how it is used elsewhere but it already has all of the
2171 * right fields in it, so reuse here doesn't seem out of order. Its
2172 * primary purpose here is to provide the means to store pointers for
2173 * use when ipnet_promisc_remove() needs to be called.
2175 * This should never be called for the IPNET_MINOR_LO device as it is
2176 * never created via ipnetif_create.
2180 ipnet_promisc_add(void *handle
, uint_t how
, void *data
, uintptr_t *mhandle
,
2190 ifp
= (ipnetif_t
*)handle
;
2192 if (how
!= DL_PROMISC_PHYS
&& how
!= DL_PROMISC_MULTI
)
2195 ns
= netstack_find_by_zoneid(ifp
->if_zoneid
);
2197 if ((error
= ipnet_join_allmulti(ifp
, ns
->netstack_ipnet
)) != 0) {
2202 ipnet
= kmem_zalloc(sizeof (*ipnet
), KM_SLEEP
);
2203 ipnet
->ipnet_if
= ifp
;
2204 ipnet
->ipnet_ns
= ns
;
2205 ipnet
->ipnet_flags
= flags
;
2207 if ((ifp
->if_flags
& IPNETIF_LOOPBACK
) != 0) {
2208 ipnet
->ipnet_acceptfn
= ipnet_loaccept
;
2210 ipnet
->ipnet_acceptfn
= ipnet_accept
;
2214 * To register multiple hooks with the same callback function,
2215 * a unique name is needed.
2217 HOOK_INIT(ipnet
->ipnet_hook
, ipnet_bpf_bounce
, "", ipnet
);
2218 (void) snprintf(name
, sizeof (name
), "ipnet_promisc_%p",
2219 (void *)ipnet
->ipnet_hook
);
2220 ipnet
->ipnet_hook
->h_name
= strdup(name
);
2221 ipnet
->ipnet_data
= data
;
2222 ipnet
->ipnet_zoneid
= ifp
->if_zoneid
;
2224 ipst
= ns
->netstack_ip
;
2226 error
= net_hook_register(ipst
->ips_ip4_observe_pr
, NH_OBSERVE
,
2231 error
= net_hook_register(ipst
->ips_ip6_observe_pr
, NH_OBSERVE
,
2234 (void) net_hook_unregister(ipst
->ips_ip4_observe_pr
,
2235 NH_OBSERVE
, ipnet
->ipnet_hook
);
2239 *mhandle
= (uintptr_t)ipnet
;
2245 cmn_err(CE_WARN
, "net_hook_register failed: %d", error
);
2246 strfree(ipnet
->ipnet_hook
->h_name
);
2247 hook_free(ipnet
->ipnet_hook
);
2253 ipnet_promisc_remove(void *data
)
2260 ipst
= ipnet
->ipnet_ns
->netstack_ip
;
2261 hook
= ipnet
->ipnet_hook
;
2263 VERIFY(net_hook_unregister(ipst
->ips_ip4_observe_pr
, NH_OBSERVE
,
2266 VERIFY(net_hook_unregister(ipst
->ips_ip6_observe_pr
, NH_OBSERVE
,
2269 strfree(hook
->h_name
);
2273 kmem_free(ipnet
, sizeof (*ipnet
));
2277 * arg here comes from the ipnet_t allocated in ipnet_promisc_add.
2278 * An important field from that structure is "ipnet_data" that
2279 * contains the "data" pointer passed into ipnet_promisc_add: it needs
2280 * to be passed back to bpf when we call into ipnet_itap.
2282 * ipnet_itap is set by ipnet_set_bpfattach, which in turn is called
2287 ipnet_bpf_bounce(hook_event_token_t token
, hook_data_t info
, void *arg
)
2289 hook_pkt_observe_t
*hdr
;
2297 hdr
= (hook_pkt_observe_t
*)info
;
2299 ipnet
= (ipnet_t
*)arg
;
2300 ips
= ((netstack_t
*)hdr
->hpo_ctx
)->netstack_ipnet
;
2302 netmp
= hdr
->hpo_pkt
->b_cont
;
2303 src
.iap_family
= hdr
->hpo_family
;
2304 dst
.iap_family
= hdr
->hpo_family
;
2306 if (hdr
->hpo_family
== AF_INET
) {
2307 src
.iap_addr4
= &((ipha_t
*)(netmp
->b_rptr
))->ipha_src
;
2308 dst
.iap_addr4
= &((ipha_t
*)(netmp
->b_rptr
))->ipha_dst
;
2310 src
.iap_addr6
= &((ip6_t
*)(netmp
->b_rptr
))->ip6_src
;
2311 dst
.iap_addr6
= &((ip6_t
*)(netmp
->b_rptr
))->ip6_dst
;
2314 if (!(*ipnet
->ipnet_acceptfn
)(ipnet
, hdr
, &src
, &dst
)) {
2315 IPSK_BUMP(ips
, ik_acceptFail
);
2318 IPSK_BUMP(ips
, ik_acceptOk
);
2320 ipnet_itap(ipnet
->ipnet_data
, mp
,
2321 hdr
->hpo_htype
== htons(IPOBS_HOOK_OUTBOUND
),
2322 ntohl(hdr
->hpo_pktlen
) + MBLKL(mp
));
2328 * clone'd ipnetif_t's are created when a shared IP instance zone comes
2329 * to life and configures an IP address. The model that BPF uses is that
2330 * each interface must have a unique pointer and each interface must be
2331 * representative of what it can capture. They are limited to one DLT
2332 * per interface and one zone per interface. Thus every interface that
2333 * can be seen in a zone must be announced via an attach to bpf. For
2334 * shared instance zones, this means the ipnet driver needs to detect
2335 * when an address is added to an interface in a zone for the first
2336 * time (and also when the last address is removed.)
2339 ipnetif_clone_create(ipnetif_t
*ifp
, zoneid_t zoneid
)
2341 uintptr_t key
[2] = { zoneid
, (uintptr_t)ifp
->if_name
};
2342 ipnet_stack_t
*ips
= ifp
->if_stackp
;
2343 avl_index_t where
= 0;
2346 mutex_enter(&ips
->ips_avl_lock
);
2347 newif
= avl_find(&ips
->ips_avl_by_shared
, (void *)key
, &where
);
2348 if (newif
!= NULL
) {
2349 ipnetif_refhold(newif
);
2350 newif
->if_sharecnt
++;
2351 mutex_exit(&ips
->ips_avl_lock
);
2355 newif
= ipnet_alloc_if(ips
);
2356 if (newif
== NULL
) {
2357 mutex_exit(&ips
->ips_avl_lock
);
2361 newif
->if_refcnt
= 1;
2362 newif
->if_sharecnt
= 1;
2363 newif
->if_zoneid
= zoneid
;
2364 (void) strlcpy(newif
->if_name
, ifp
->if_name
, LIFNAMSIZ
);
2365 newif
->if_flags
= ifp
->if_flags
& IPNETIF_LOOPBACK
;
2366 newif
->if_index
= ifp
->if_index
;
2368 avl_insert(&ips
->ips_avl_by_shared
, newif
, where
);
2369 mutex_exit(&ips
->ips_avl_lock
);
2375 ipnetif_clone_release(ipnetif_t
*ipnetif
)
2377 boolean_t dofree
= B_FALSE
;
2378 boolean_t doremove
= B_FALSE
;
2379 ipnet_stack_t
*ips
= ipnetif
->if_stackp
;
2381 mutex_enter(&ipnetif
->if_reflock
);
2382 ASSERT(ipnetif
->if_refcnt
> 0);
2383 if (--ipnetif
->if_refcnt
== 0)
2385 ASSERT(ipnetif
->if_sharecnt
> 0);
2386 if (--ipnetif
->if_sharecnt
== 0)
2388 mutex_exit(&ipnetif
->if_reflock
);
2390 mutex_enter(&ips
->ips_avl_lock
);
2391 avl_remove(&ips
->ips_avl_by_shared
, ipnetif
);
2392 mutex_exit(&ips
->ips_avl_lock
);
2395 ASSERT(ipnetif
->if_sharecnt
== 0);
2396 ipnetif_free(ipnetif
);