uts: make emu10k non-verbose
[unleashed.git] / kernel / os / netstack.c
blob3d239e68a50b1702d2494b962335ab3d1ee88fea
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright (c) 2016, Joyent, Inc. All rights reserved.
28 #include <sys/param.h>
29 #include <sys/sysmacros.h>
30 #include <sys/vm.h>
31 #include <sys/proc.h>
32 #include <sys/tuneable.h>
33 #include <sys/systm.h>
34 #include <sys/cmn_err.h>
35 #include <sys/debug.h>
36 #include <sys/sdt.h>
37 #include <sys/mutex.h>
38 #include <sys/bitmap.h>
39 #include <sys/atomic.h>
40 #include <sys/kobj.h>
41 #include <sys/disp.h>
42 #include <vm/seg_kmem.h>
43 #include <sys/zone.h>
44 #include <sys/netstack.h>
47 * What we use so that the zones framework can tell us about new zones,
48 * which we use to create new stacks.
50 static zone_key_t netstack_zone_key;
52 static int netstack_initialized = 0;
55 * Track the registered netstacks.
56 * The global lock protects
57 * - ns_reg
58 * - the list starting at netstack_head and following the netstack_next
59 * pointers.
61 static kmutex_t netstack_g_lock;
64 * Registry of netstacks with their create/shutdown/destory functions.
66 static struct netstack_registry ns_reg[NS_MAX];
69 * Global list of existing stacks. We use this when a new zone with
70 * an exclusive IP instance is created.
72 * Note that in some cases a netstack_t needs to stay around after the zone
73 * has gone away. This is because there might be outstanding references
74 * (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data
75 * structure and all the foo_stack_t's hanging off of it will be cleaned up
76 * when the last reference to it is dropped.
77 * However, the same zone might be rebooted. That is handled using the
78 * assumption that the zones framework picks a new zoneid each time a zone
79 * is (re)booted. We assert for that condition in netstack_zone_create().
80 * Thus the old netstack_t can take its time for things to time out.
82 static netstack_t *netstack_head;
85 * To support kstat_create_netstack() using kstat_zone_add we need
86 * to track both
87 * - all zoneids that use the global/shared stack
88 * - all kstats that have been added for the shared stack
90 struct shared_zone_list {
91 struct shared_zone_list *sz_next;
92 zoneid_t sz_zoneid;
95 struct shared_kstat_list {
96 struct shared_kstat_list *sk_next;
97 kstat_t *sk_kstat;
100 static kmutex_t netstack_shared_lock; /* protects the following two */
101 static struct shared_zone_list *netstack_shared_zones;
102 static struct shared_kstat_list *netstack_shared_kstats;
104 static void *netstack_zone_create(zoneid_t zoneid);
105 static void netstack_zone_shutdown(zoneid_t zoneid, void *arg);
106 static void netstack_zone_destroy(zoneid_t zoneid, void *arg);
108 static void netstack_shared_zone_add(zoneid_t zoneid);
109 static void netstack_shared_zone_remove(zoneid_t zoneid);
110 static void netstack_shared_kstat_add(kstat_t *ks);
111 static void netstack_shared_kstat_remove(kstat_t *ks);
113 typedef boolean_t applyfn_t(kmutex_t *, netstack_t *, int);
115 static void apply_all_netstacks(int, applyfn_t *);
116 static void apply_all_modules(netstack_t *, applyfn_t *);
117 static void apply_all_modules_reverse(netstack_t *, applyfn_t *);
118 static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int);
119 static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int);
120 static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
121 static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
122 static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
123 kmutex_t *);
125 void
126 netstack_init(void)
128 mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
129 mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
131 netstack_initialized = 1;
134 * We want to be informed each time a zone is created or
135 * destroyed in the kernel, so we can maintain the
136 * stack instance information.
138 zone_key_create(&netstack_zone_key, netstack_zone_create,
139 netstack_zone_shutdown, netstack_zone_destroy);
143 * Register a new module with the framework.
144 * This registers interest in changes to the set of netstacks.
145 * The createfn and destroyfn are required, but the shutdownfn can be
146 * NULL.
147 * Note that due to the current zsd implementation, when the create
148 * function is called the zone isn't fully present, thus functions
149 * like zone_find_by_* will fail, hence the create function can not
150 * use many zones kernel functions including zcmn_err().
152 void
153 netstack_register(int moduleid,
154 void *(*module_create)(netstackid_t, netstack_t *),
155 void (*module_shutdown)(netstackid_t, void *),
156 void (*module_destroy)(netstackid_t, void *))
158 netstack_t *ns;
160 ASSERT(netstack_initialized);
161 ASSERT(moduleid >= 0 && moduleid < NS_MAX);
162 ASSERT(module_create != NULL);
165 * Make instances created after this point in time run the create
166 * callback.
168 mutex_enter(&netstack_g_lock);
169 ASSERT(ns_reg[moduleid].nr_create == NULL);
170 ASSERT(ns_reg[moduleid].nr_flags == 0);
171 ns_reg[moduleid].nr_create = module_create;
172 ns_reg[moduleid].nr_shutdown = module_shutdown;
173 ns_reg[moduleid].nr_destroy = module_destroy;
174 ns_reg[moduleid].nr_flags = NRF_REGISTERED;
177 * Determine the set of stacks that exist before we drop the lock.
178 * Set NSS_CREATE_NEEDED for each of those.
179 * netstacks which have been deleted will have NSS_CREATE_COMPLETED
180 * set, but check NSF_CLOSING to be sure.
182 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
183 nm_state_t *nms = &ns->netstack_m_state[moduleid];
185 mutex_enter(&ns->netstack_lock);
186 if (!(ns->netstack_flags & NSF_CLOSING) &&
187 (nms->nms_flags & NSS_CREATE_ALL) == 0) {
188 nms->nms_flags |= NSS_CREATE_NEEDED;
189 DTRACE_PROBE2(netstack__create__needed,
190 netstack_t *, ns, int, moduleid);
192 mutex_exit(&ns->netstack_lock);
194 mutex_exit(&netstack_g_lock);
197 * At this point in time a new instance can be created or an instance
198 * can be destroyed, or some other module can register or unregister.
199 * Make sure we either run all the create functions for this moduleid
200 * or we wait for any other creators for this moduleid.
202 apply_all_netstacks(moduleid, netstack_apply_create);
205 void
206 netstack_unregister(int moduleid)
208 netstack_t *ns;
210 ASSERT(moduleid >= 0 && moduleid < NS_MAX);
212 ASSERT(ns_reg[moduleid].nr_create != NULL);
213 ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
215 mutex_enter(&netstack_g_lock);
217 * Determine the set of stacks that exist before we drop the lock.
218 * Set NSS_SHUTDOWN_NEEDED and NSS_DESTROY_NEEDED for each of those.
219 * That ensures that when we return all the callbacks for existing
220 * instances have completed. And since we set NRF_DYING no new
221 * instances can use this module.
223 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
224 boolean_t created = B_FALSE;
225 nm_state_t *nms = &ns->netstack_m_state[moduleid];
227 mutex_enter(&ns->netstack_lock);
230 * We need to be careful here. We could actually have a netstack
231 * being created as we speak waiting for us to let go of this
232 * lock to proceed. It may have set NSS_CREATE_NEEDED, but not
233 * have gotten to the point of completing it yet. If
234 * NSS_CREATE_NEEDED, we can safely just remove it here and
235 * never create the module. However, if NSS_CREATE_INPROGRESS is
236 * set, we need to still flag this module for shutdown and
237 * deletion, just as though it had reached NSS_CREATE_COMPLETED.
239 * It is safe to do that because of two different guarantees
240 * that exist in the system. The first is that before we do a
241 * create, shutdown, or destroy, we ensure that nothing else is
242 * in progress in the system for this netstack and wait for it
243 * to complete. Secondly, because the zone is being created, we
244 * know that the following call to apply_all_netstack will block
245 * on the zone finishing its initialization.
247 if (nms->nms_flags & NSS_CREATE_NEEDED)
248 nms->nms_flags &= ~NSS_CREATE_NEEDED;
250 if (nms->nms_flags & NSS_CREATE_INPROGRESS ||
251 nms->nms_flags & NSS_CREATE_COMPLETED)
252 created = B_TRUE;
254 if (ns_reg[moduleid].nr_shutdown != NULL && created &&
255 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
256 (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
257 nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
258 DTRACE_PROBE2(netstack__shutdown__needed,
259 netstack_t *, ns, int, moduleid);
261 if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) &&
262 ns_reg[moduleid].nr_destroy != NULL && created &&
263 (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
264 nms->nms_flags |= NSS_DESTROY_NEEDED;
265 DTRACE_PROBE2(netstack__destroy__needed,
266 netstack_t *, ns, int, moduleid);
268 mutex_exit(&ns->netstack_lock);
271 * Prevent any new netstack from calling the registered create
272 * function, while keeping the function pointers in place until the
273 * shutdown and destroy callbacks are complete.
275 ns_reg[moduleid].nr_flags |= NRF_DYING;
276 mutex_exit(&netstack_g_lock);
278 apply_all_netstacks(moduleid, netstack_apply_shutdown);
279 apply_all_netstacks(moduleid, netstack_apply_destroy);
282 * Clear the nms_flags so that we can handle this module
283 * being loaded again.
284 * Also remove the registered functions.
286 mutex_enter(&netstack_g_lock);
287 ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
288 ASSERT(ns_reg[moduleid].nr_flags & NRF_DYING);
289 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
290 nm_state_t *nms = &ns->netstack_m_state[moduleid];
292 mutex_enter(&ns->netstack_lock);
293 if (nms->nms_flags & NSS_DESTROY_COMPLETED) {
294 nms->nms_flags = 0;
295 DTRACE_PROBE2(netstack__destroy__done,
296 netstack_t *, ns, int, moduleid);
298 mutex_exit(&ns->netstack_lock);
301 ns_reg[moduleid].nr_create = NULL;
302 ns_reg[moduleid].nr_shutdown = NULL;
303 ns_reg[moduleid].nr_destroy = NULL;
304 ns_reg[moduleid].nr_flags = 0;
305 mutex_exit(&netstack_g_lock);
309 * Lookup and/or allocate a netstack for this zone.
311 static void *
312 netstack_zone_create(zoneid_t zoneid)
314 netstackid_t stackid;
315 netstack_t *ns;
316 netstack_t **nsp;
317 zone_t *zone;
318 int i;
320 ASSERT(netstack_initialized);
322 zone = zone_find_by_id_nolock(zoneid);
323 ASSERT(zone != NULL);
325 if (zone->zone_flags & ZF_NET_EXCL) {
326 stackid = zoneid;
327 } else {
328 /* Look for the stack instance for the global */
329 stackid = GLOBAL_NETSTACKID;
332 /* Allocate even if it isn't needed; simplifies locking */
333 ns = (netstack_t *)kmem_zalloc(sizeof (netstack_t), KM_SLEEP);
335 /* Look if there is a matching stack instance */
336 mutex_enter(&netstack_g_lock);
337 for (nsp = &netstack_head; *nsp != NULL;
338 nsp = &((*nsp)->netstack_next)) {
339 if ((*nsp)->netstack_stackid == stackid) {
341 * Should never find a pre-existing exclusive stack
343 VERIFY(stackid == GLOBAL_NETSTACKID);
344 kmem_free(ns, sizeof (netstack_t));
345 ns = *nsp;
346 mutex_enter(&ns->netstack_lock);
347 ns->netstack_numzones++;
348 mutex_exit(&ns->netstack_lock);
349 mutex_exit(&netstack_g_lock);
350 DTRACE_PROBE1(netstack__inc__numzones,
351 netstack_t *, ns);
352 /* Record that we have a new shared stack zone */
353 netstack_shared_zone_add(zoneid);
354 zone->zone_netstack = ns;
355 return (ns);
358 /* Not found */
359 mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL);
360 cv_init(&ns->netstack_cv, NULL, CV_DEFAULT, NULL);
361 ns->netstack_stackid = zoneid;
362 ns->netstack_numzones = 1;
363 ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */
364 ns->netstack_flags = NSF_UNINIT;
365 *nsp = ns;
366 zone->zone_netstack = ns;
368 mutex_enter(&ns->netstack_lock);
370 * Mark this netstack as having a CREATE running so
371 * any netstack_register/netstack_unregister waits for
372 * the existing create callbacks to complete in moduleid order
374 ns->netstack_flags |= NSF_ZONE_CREATE;
377 * Determine the set of module create functions that need to be
378 * called before we drop the lock.
379 * Set NSS_CREATE_NEEDED for each of those.
380 * Skip any with NRF_DYING set, since those are in the process of
381 * going away, by checking for flags being exactly NRF_REGISTERED.
383 for (i = 0; i < NS_MAX; i++) {
384 nm_state_t *nms = &ns->netstack_m_state[i];
386 cv_init(&nms->nms_cv, NULL, CV_DEFAULT, NULL);
388 if ((ns_reg[i].nr_flags == NRF_REGISTERED) &&
389 (nms->nms_flags & NSS_CREATE_ALL) == 0) {
390 nms->nms_flags |= NSS_CREATE_NEEDED;
391 DTRACE_PROBE2(netstack__create__needed,
392 netstack_t *, ns, int, i);
395 mutex_exit(&ns->netstack_lock);
396 mutex_exit(&netstack_g_lock);
398 apply_all_modules(ns, netstack_apply_create);
400 /* Tell any waiting netstack_register/netstack_unregister to proceed */
401 mutex_enter(&ns->netstack_lock);
402 ns->netstack_flags &= ~NSF_UNINIT;
403 ASSERT(ns->netstack_flags & NSF_ZONE_CREATE);
404 ns->netstack_flags &= ~NSF_ZONE_CREATE;
405 cv_broadcast(&ns->netstack_cv);
406 mutex_exit(&ns->netstack_lock);
408 return (ns);
411 /* ARGSUSED */
412 static void
413 netstack_zone_shutdown(zoneid_t zoneid, void *arg)
415 netstack_t *ns = (netstack_t *)arg;
416 int i;
418 ASSERT(arg != NULL);
420 mutex_enter(&ns->netstack_lock);
421 ASSERT(ns->netstack_numzones > 0);
422 if (ns->netstack_numzones != 1) {
423 /* Stack instance being used by other zone */
424 mutex_exit(&ns->netstack_lock);
425 ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
426 return;
428 mutex_exit(&ns->netstack_lock);
430 mutex_enter(&netstack_g_lock);
431 mutex_enter(&ns->netstack_lock);
433 * Mark this netstack as having a SHUTDOWN running so
434 * any netstack_register/netstack_unregister waits for
435 * the existing create callbacks to complete in moduleid order
437 ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
438 ns->netstack_flags |= NSF_ZONE_SHUTDOWN;
441 * Determine the set of stacks that exist before we drop the lock.
442 * Set NSS_SHUTDOWN_NEEDED for each of those.
444 for (i = 0; i < NS_MAX; i++) {
445 nm_state_t *nms = &ns->netstack_m_state[i];
447 if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
448 ns_reg[i].nr_shutdown != NULL &&
449 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
450 (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
451 nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
452 DTRACE_PROBE2(netstack__shutdown__needed,
453 netstack_t *, ns, int, i);
456 mutex_exit(&ns->netstack_lock);
457 mutex_exit(&netstack_g_lock);
460 * Call the shutdown function for all registered modules for this
461 * netstack.
463 apply_all_modules_reverse(ns, netstack_apply_shutdown);
465 /* Tell any waiting netstack_register/netstack_unregister to proceed */
466 mutex_enter(&ns->netstack_lock);
467 ASSERT(ns->netstack_flags & NSF_ZONE_SHUTDOWN);
468 ns->netstack_flags &= ~NSF_ZONE_SHUTDOWN;
469 cv_broadcast(&ns->netstack_cv);
470 mutex_exit(&ns->netstack_lock);
474 * Common routine to release a zone.
475 * If this was the last zone using the stack instance then prepare to
476 * have the refcnt dropping to zero free the zone.
478 /* ARGSUSED */
479 static void
480 netstack_zone_destroy(zoneid_t zoneid, void *arg)
482 netstack_t *ns = (netstack_t *)arg;
484 ASSERT(arg != NULL);
486 mutex_enter(&ns->netstack_lock);
487 ASSERT(ns->netstack_numzones > 0);
488 ns->netstack_numzones--;
489 if (ns->netstack_numzones != 0) {
490 /* Stack instance being used by other zone */
491 mutex_exit(&ns->netstack_lock);
492 ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
493 /* Record that we a shared stack zone has gone away */
494 netstack_shared_zone_remove(zoneid);
495 return;
498 * Set CLOSING so that netstack_find_by will not find it.
500 ns->netstack_flags |= NSF_CLOSING;
501 mutex_exit(&ns->netstack_lock);
502 DTRACE_PROBE1(netstack__dec__numzones, netstack_t *, ns);
503 /* No other thread can call zone_destroy for this stack */
506 * Decrease refcnt to account for the one in netstack_zone_init()
508 netstack_rele(ns);
512 * Called when the reference count drops to zero.
513 * Call the destroy functions for each registered module.
515 static void
516 netstack_stack_inactive(netstack_t *ns)
518 int i;
520 mutex_enter(&netstack_g_lock);
521 mutex_enter(&ns->netstack_lock);
523 * Mark this netstack as having a DESTROY running so
524 * any netstack_register/netstack_unregister waits for
525 * the existing destroy callbacks to complete in reverse moduleid order
527 ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
528 ns->netstack_flags |= NSF_ZONE_DESTROY;
530 * If the shutdown callback wasn't called earlier (e.g., if this is
531 * a netstack shared between multiple zones), then we schedule it now.
533 * Determine the set of stacks that exist before we drop the lock.
534 * Set NSS_DESTROY_NEEDED for each of those. That
535 * ensures that when we return all the callbacks for existing
536 * instances have completed.
538 for (i = 0; i < NS_MAX; i++) {
539 nm_state_t *nms = &ns->netstack_m_state[i];
541 if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
542 ns_reg[i].nr_shutdown != NULL &&
543 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
544 (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
545 nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
546 DTRACE_PROBE2(netstack__shutdown__needed,
547 netstack_t *, ns, int, i);
550 if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
551 ns_reg[i].nr_destroy != NULL &&
552 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
553 (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
554 nms->nms_flags |= NSS_DESTROY_NEEDED;
555 DTRACE_PROBE2(netstack__destroy__needed,
556 netstack_t *, ns, int, i);
559 mutex_exit(&ns->netstack_lock);
560 mutex_exit(&netstack_g_lock);
563 * Call the shutdown and destroy functions for all registered modules
564 * for this netstack.
566 * Since there are some ordering dependencies between the modules we
567 * tear them down in the reverse order of what was used to create them.
569 * Since a netstack_t is never reused (when a zone is rebooted it gets
570 * a new zoneid == netstackid i.e. a new netstack_t is allocated) we
571 * leave nms_flags the way it is i.e. with NSS_DESTROY_COMPLETED set.
572 * That is different than in the netstack_unregister() case.
574 apply_all_modules_reverse(ns, netstack_apply_shutdown);
575 apply_all_modules_reverse(ns, netstack_apply_destroy);
577 /* Tell any waiting netstack_register/netstack_unregister to proceed */
578 mutex_enter(&ns->netstack_lock);
579 ASSERT(ns->netstack_flags & NSF_ZONE_DESTROY);
580 ns->netstack_flags &= ~NSF_ZONE_DESTROY;
581 cv_broadcast(&ns->netstack_cv);
582 mutex_exit(&ns->netstack_lock);
586 * Apply a function to all netstacks for a particular moduleid.
588 * If there is any zone activity (due to a zone being created, shutdown,
589 * or destroyed) we wait for that to complete before we proceed. This ensures
590 * that the moduleids are processed in order when a zone is created or
591 * destroyed.
593 * The applyfn has to drop netstack_g_lock if it does some work.
594 * In that case we don't follow netstack_next,
595 * even if it is possible to do so without any hazards. This is
596 * because we want the design to allow for the list of netstacks threaded
597 * by netstack_next to change in any arbitrary way during the time the
598 * lock was dropped.
600 * It is safe to restart the loop at netstack_head since the applyfn
601 * changes netstack_m_state as it processes things, so a subsequent
602 * pass through will have no effect in applyfn, hence the loop will terminate
603 * in at worst O(N^2).
605 static void
606 apply_all_netstacks(int moduleid, applyfn_t *applyfn)
608 netstack_t *ns;
610 mutex_enter(&netstack_g_lock);
611 ns = netstack_head;
612 while (ns != NULL) {
613 if (wait_for_zone_creator(ns, &netstack_g_lock)) {
614 /* Lock dropped - restart at head */
615 ns = netstack_head;
616 } else if ((applyfn)(&netstack_g_lock, ns, moduleid)) {
617 /* Lock dropped - restart at head */
618 ns = netstack_head;
619 } else {
620 ns = ns->netstack_next;
623 mutex_exit(&netstack_g_lock);
627 * Apply a function to all moduleids for a particular netstack.
629 * Since the netstack linkage doesn't matter in this case we can
630 * ignore whether the function drops the lock.
632 static void
633 apply_all_modules(netstack_t *ns, applyfn_t *applyfn)
635 int i;
637 mutex_enter(&netstack_g_lock);
638 for (i = 0; i < NS_MAX; i++) {
640 * We don't care whether the lock was dropped
641 * since we are not iterating over netstack_head.
643 (void) (applyfn)(&netstack_g_lock, ns, i);
645 mutex_exit(&netstack_g_lock);
648 /* Like the above but in reverse moduleid order */
649 static void
650 apply_all_modules_reverse(netstack_t *ns, applyfn_t *applyfn)
652 int i;
654 mutex_enter(&netstack_g_lock);
655 for (i = NS_MAX-1; i >= 0; i--) {
657 * We don't care whether the lock was dropped
658 * since we are not iterating over netstack_head.
660 (void) (applyfn)(&netstack_g_lock, ns, i);
662 mutex_exit(&netstack_g_lock);
666 * Call the create function for the ns and moduleid if CREATE_NEEDED
667 * is set.
668 * If some other thread gets here first and sets *_INPROGRESS, then
669 * we wait for that thread to complete so that we can ensure that
670 * all the callbacks are done when we've looped over all netstacks/moduleids.
672 * When we call the create function, we temporarily drop the netstack_lock
673 * held by the caller, and return true to tell the caller it needs to
674 * re-evalute the state.
676 static boolean_t
677 netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid)
679 void *result;
680 netstackid_t stackid;
681 nm_state_t *nms = &ns->netstack_m_state[moduleid];
682 boolean_t dropped = B_FALSE;
684 ASSERT(MUTEX_HELD(lockp));
685 mutex_enter(&ns->netstack_lock);
687 if (wait_for_nms_inprogress(ns, nms, lockp))
688 dropped = B_TRUE;
690 if (nms->nms_flags & NSS_CREATE_NEEDED) {
691 nms->nms_flags &= ~NSS_CREATE_NEEDED;
692 nms->nms_flags |= NSS_CREATE_INPROGRESS;
693 DTRACE_PROBE2(netstack__create__inprogress,
694 netstack_t *, ns, int, moduleid);
695 mutex_exit(&ns->netstack_lock);
696 mutex_exit(lockp);
697 dropped = B_TRUE;
699 ASSERT(ns_reg[moduleid].nr_create != NULL);
700 stackid = ns->netstack_stackid;
701 DTRACE_PROBE2(netstack__create__start,
702 netstackid_t, stackid,
703 netstack_t *, ns);
704 result = (ns_reg[moduleid].nr_create)(stackid, ns);
705 DTRACE_PROBE2(netstack__create__end,
706 void *, result, netstack_t *, ns);
708 ASSERT(result != NULL);
709 mutex_enter(lockp);
710 mutex_enter(&ns->netstack_lock);
711 ns->netstack_modules[moduleid] = result;
712 nms->nms_flags &= ~NSS_CREATE_INPROGRESS;
713 nms->nms_flags |= NSS_CREATE_COMPLETED;
714 cv_broadcast(&nms->nms_cv);
715 DTRACE_PROBE2(netstack__create__completed,
716 netstack_t *, ns, int, moduleid);
717 mutex_exit(&ns->netstack_lock);
718 return (dropped);
719 } else {
720 mutex_exit(&ns->netstack_lock);
721 return (dropped);
726 * Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED
727 * is set.
728 * If some other thread gets here first and sets *_INPROGRESS, then
729 * we wait for that thread to complete so that we can ensure that
730 * all the callbacks are done when we've looped over all netstacks/moduleids.
732 * When we call the shutdown function, we temporarily drop the netstack_lock
733 * held by the caller, and return true to tell the caller it needs to
734 * re-evalute the state.
736 static boolean_t
737 netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid)
739 netstackid_t stackid;
740 void * netstack_module;
741 nm_state_t *nms = &ns->netstack_m_state[moduleid];
742 boolean_t dropped = B_FALSE;
744 ASSERT(MUTEX_HELD(lockp));
745 mutex_enter(&ns->netstack_lock);
747 if (wait_for_nms_inprogress(ns, nms, lockp))
748 dropped = B_TRUE;
750 if (nms->nms_flags & NSS_SHUTDOWN_NEEDED) {
751 nms->nms_flags &= ~NSS_SHUTDOWN_NEEDED;
752 nms->nms_flags |= NSS_SHUTDOWN_INPROGRESS;
753 DTRACE_PROBE2(netstack__shutdown__inprogress,
754 netstack_t *, ns, int, moduleid);
755 mutex_exit(&ns->netstack_lock);
756 mutex_exit(lockp);
757 dropped = B_TRUE;
759 ASSERT(ns_reg[moduleid].nr_shutdown != NULL);
760 stackid = ns->netstack_stackid;
761 netstack_module = ns->netstack_modules[moduleid];
762 DTRACE_PROBE2(netstack__shutdown__start,
763 netstackid_t, stackid,
764 void *, netstack_module);
765 (ns_reg[moduleid].nr_shutdown)(stackid, netstack_module);
766 DTRACE_PROBE1(netstack__shutdown__end,
767 netstack_t *, ns);
769 mutex_enter(lockp);
770 mutex_enter(&ns->netstack_lock);
771 nms->nms_flags &= ~NSS_SHUTDOWN_INPROGRESS;
772 nms->nms_flags |= NSS_SHUTDOWN_COMPLETED;
773 cv_broadcast(&nms->nms_cv);
774 DTRACE_PROBE2(netstack__shutdown__completed,
775 netstack_t *, ns, int, moduleid);
776 mutex_exit(&ns->netstack_lock);
777 return (dropped);
778 } else {
779 mutex_exit(&ns->netstack_lock);
780 return (dropped);
785 * Call the destroy function for the ns and moduleid if DESTROY_NEEDED
786 * is set.
787 * If some other thread gets here first and sets *_INPROGRESS, then
788 * we wait for that thread to complete so that we can ensure that
789 * all the callbacks are done when we've looped over all netstacks/moduleids.
791 * When we call the destroy function, we temporarily drop the netstack_lock
792 * held by the caller, and return true to tell the caller it needs to
793 * re-evalute the state.
795 static boolean_t
796 netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid)
798 netstackid_t stackid;
799 void * netstack_module;
800 nm_state_t *nms = &ns->netstack_m_state[moduleid];
801 boolean_t dropped = B_FALSE;
803 ASSERT(MUTEX_HELD(lockp));
804 mutex_enter(&ns->netstack_lock);
806 if (wait_for_nms_inprogress(ns, nms, lockp))
807 dropped = B_TRUE;
809 if (nms->nms_flags & NSS_DESTROY_NEEDED) {
810 nms->nms_flags &= ~NSS_DESTROY_NEEDED;
811 nms->nms_flags |= NSS_DESTROY_INPROGRESS;
812 DTRACE_PROBE2(netstack__destroy__inprogress,
813 netstack_t *, ns, int, moduleid);
814 mutex_exit(&ns->netstack_lock);
815 mutex_exit(lockp);
816 dropped = B_TRUE;
818 ASSERT(ns_reg[moduleid].nr_destroy != NULL);
819 stackid = ns->netstack_stackid;
820 netstack_module = ns->netstack_modules[moduleid];
821 DTRACE_PROBE2(netstack__destroy__start,
822 netstackid_t, stackid,
823 void *, netstack_module);
824 (ns_reg[moduleid].nr_destroy)(stackid, netstack_module);
825 DTRACE_PROBE1(netstack__destroy__end,
826 netstack_t *, ns);
828 mutex_enter(lockp);
829 mutex_enter(&ns->netstack_lock);
830 ns->netstack_modules[moduleid] = NULL;
831 nms->nms_flags &= ~NSS_DESTROY_INPROGRESS;
832 nms->nms_flags |= NSS_DESTROY_COMPLETED;
833 cv_broadcast(&nms->nms_cv);
834 DTRACE_PROBE2(netstack__destroy__completed,
835 netstack_t *, ns, int, moduleid);
836 mutex_exit(&ns->netstack_lock);
837 return (dropped);
838 } else {
839 mutex_exit(&ns->netstack_lock);
840 return (dropped);
845 * If somebody is creating the netstack (due to a new zone being created)
846 * then we wait for them to complete. This ensures that any additional
847 * netstack_register() doesn't cause the create functions to run out of
848 * order.
849 * Note that we do not need such a global wait in the case of the shutdown
850 * and destroy callbacks, since in that case it is sufficient for both
851 * threads to set NEEDED and wait for INPROGRESS to ensure ordering.
852 * Returns true if lockp was temporarily dropped while waiting.
854 static boolean_t
855 wait_for_zone_creator(netstack_t *ns, kmutex_t *lockp)
857 boolean_t dropped = B_FALSE;
859 mutex_enter(&ns->netstack_lock);
860 while (ns->netstack_flags & NSF_ZONE_CREATE) {
861 DTRACE_PROBE1(netstack__wait__zone__inprogress,
862 netstack_t *, ns);
863 if (lockp != NULL) {
864 dropped = B_TRUE;
865 mutex_exit(lockp);
867 cv_wait(&ns->netstack_cv, &ns->netstack_lock);
868 if (lockp != NULL) {
869 /* First drop netstack_lock to preserve order */
870 mutex_exit(&ns->netstack_lock);
871 mutex_enter(lockp);
872 mutex_enter(&ns->netstack_lock);
875 mutex_exit(&ns->netstack_lock);
876 return (dropped);
880 * Wait for any INPROGRESS flag to be cleared for the netstack/moduleid
881 * combination.
882 * Returns true if lockp was temporarily dropped while waiting.
884 static boolean_t
885 wait_for_nms_inprogress(netstack_t *ns, nm_state_t *nms, kmutex_t *lockp)
887 boolean_t dropped = B_FALSE;
889 while (nms->nms_flags & NSS_ALL_INPROGRESS) {
890 DTRACE_PROBE2(netstack__wait__nms__inprogress,
891 netstack_t *, ns, nm_state_t *, nms);
892 if (lockp != NULL) {
893 dropped = B_TRUE;
894 mutex_exit(lockp);
896 cv_wait(&nms->nms_cv, &ns->netstack_lock);
897 if (lockp != NULL) {
898 /* First drop netstack_lock to preserve order */
899 mutex_exit(&ns->netstack_lock);
900 mutex_enter(lockp);
901 mutex_enter(&ns->netstack_lock);
904 return (dropped);
908 * Get the stack instance used in caller's zone.
909 * Increases the reference count, caller must do a netstack_rele.
910 * It can't be called after zone_destroy() has started.
912 netstack_t *
913 netstack_get_current(void)
915 netstack_t *ns;
917 ns = curproc->p_zone->zone_netstack;
918 ASSERT(ns != NULL);
919 if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
920 return (NULL);
922 netstack_hold(ns);
924 return (ns);
928 * Find a stack instance given the cred.
929 * This is used by the modules to potentially allow for a future when
930 * something other than the zoneid is used to determine the stack.
932 netstack_t *
933 netstack_find_by_cred(const cred_t *cr)
935 zoneid_t zoneid = crgetzoneid(cr);
937 /* Handle the case when cr_zone is NULL */
938 if (zoneid == (zoneid_t)-1)
939 zoneid = GLOBAL_ZONEID;
941 /* For performance ... */
942 if (curproc->p_zone->zone_id == zoneid)
943 return (netstack_get_current());
944 else
945 return (netstack_find_by_zoneid(zoneid));
949 * Find a stack instance given the zoneid.
950 * Increases the reference count if found; caller must do a
951 * netstack_rele().
953 * If there is no exact match then assume the shared stack instance
954 * matches.
956 * Skip the unitialized ones.
958 netstack_t *
959 netstack_find_by_zoneid(zoneid_t zoneid)
961 netstack_t *ns;
962 zone_t *zone;
964 zone = zone_find_by_id(zoneid);
966 if (zone == NULL)
967 return (NULL);
969 ns = zone->zone_netstack;
970 ASSERT(ns != NULL);
971 if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
972 ns = NULL;
973 else
974 netstack_hold(ns);
976 zone_rele(zone);
977 return (ns);
981 * Find a stack instance given the zoneid. Can only be called from
982 * the create callback. See the comments in zone_find_by_id_nolock why
983 * that limitation exists.
985 * Increases the reference count if found; caller must do a
986 * netstack_rele().
988 * If there is no exact match then assume the shared stack instance
989 * matches.
991 * Skip the unitialized ones.
993 netstack_t *
994 netstack_find_by_zoneid_nolock(zoneid_t zoneid)
996 netstack_t *ns;
997 zone_t *zone;
999 zone = zone_find_by_id_nolock(zoneid);
1001 if (zone == NULL)
1002 return (NULL);
1004 ns = zone->zone_netstack;
1005 ASSERT(ns != NULL);
1007 if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
1008 ns = NULL;
1009 else
1010 netstack_hold(ns);
1012 /* zone_find_by_id_nolock does not have a hold on the zone */
1013 return (ns);
1017 * Find a stack instance given the stackid with exact match?
1018 * Increases the reference count if found; caller must do a
1019 * netstack_rele().
1021 * Skip the unitialized ones.
1023 netstack_t *
1024 netstack_find_by_stackid(netstackid_t stackid)
1026 netstack_t *ns;
1028 mutex_enter(&netstack_g_lock);
1029 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1030 mutex_enter(&ns->netstack_lock);
1031 if (ns->netstack_stackid == stackid &&
1032 !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
1033 mutex_exit(&ns->netstack_lock);
1034 netstack_hold(ns);
1035 mutex_exit(&netstack_g_lock);
1036 return (ns);
1038 mutex_exit(&ns->netstack_lock);
1040 mutex_exit(&netstack_g_lock);
1041 return (NULL);
1044 boolean_t
1045 netstack_inuse_by_stackid(netstackid_t stackid)
1047 netstack_t *ns;
1048 boolean_t rval = B_FALSE;
1050 mutex_enter(&netstack_g_lock);
1052 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1053 if (ns->netstack_stackid == stackid) {
1054 rval = B_TRUE;
1055 break;
1059 mutex_exit(&netstack_g_lock);
1061 return (rval);
1064 void
1065 netstack_rele(netstack_t *ns)
1067 netstack_t **nsp;
1068 boolean_t found;
1069 int refcnt, numzones;
1070 int i;
1072 mutex_enter(&ns->netstack_lock);
1073 ASSERT(ns->netstack_refcnt > 0);
1074 ns->netstack_refcnt--;
1076 * As we drop the lock additional netstack_rele()s can come in
1077 * and decrement the refcnt to zero and free the netstack_t.
1078 * Store pointers in local variables and if we were not the last
1079 * then don't reference the netstack_t after that.
1081 refcnt = ns->netstack_refcnt;
1082 numzones = ns->netstack_numzones;
1083 DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
1084 mutex_exit(&ns->netstack_lock);
1086 if (refcnt == 0 && numzones == 0) {
1088 * Time to call the destroy functions and free up
1089 * the structure
1091 netstack_stack_inactive(ns);
1093 /* Make sure nothing increased the references */
1094 ASSERT(ns->netstack_refcnt == 0);
1095 ASSERT(ns->netstack_numzones == 0);
1097 /* Finally remove from list of netstacks */
1098 mutex_enter(&netstack_g_lock);
1099 found = B_FALSE;
1100 for (nsp = &netstack_head; *nsp != NULL;
1101 nsp = &(*nsp)->netstack_next) {
1102 if (*nsp == ns) {
1103 *nsp = ns->netstack_next;
1104 ns->netstack_next = NULL;
1105 found = B_TRUE;
1106 break;
1109 ASSERT(found);
1110 mutex_exit(&netstack_g_lock);
1112 /* Make sure nothing increased the references */
1113 ASSERT(ns->netstack_refcnt == 0);
1114 ASSERT(ns->netstack_numzones == 0);
1116 ASSERT(ns->netstack_flags & NSF_CLOSING);
1118 for (i = 0; i < NS_MAX; i++) {
1119 nm_state_t *nms = &ns->netstack_m_state[i];
1121 cv_destroy(&nms->nms_cv);
1123 mutex_destroy(&ns->netstack_lock);
1124 cv_destroy(&ns->netstack_cv);
1125 kmem_free(ns, sizeof (*ns));
1129 void
1130 netstack_hold(netstack_t *ns)
1132 mutex_enter(&ns->netstack_lock);
1133 ns->netstack_refcnt++;
1134 ASSERT(ns->netstack_refcnt > 0);
1135 mutex_exit(&ns->netstack_lock);
1136 DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
1140 * To support kstat_create_netstack() using kstat_zone_add we need
1141 * to track both
1142 * - all zoneids that use the global/shared stack
1143 * - all kstats that have been added for the shared stack
1145 kstat_t *
1146 kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
1147 char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
1148 netstackid_t ks_netstackid)
1150 kstat_t *ks;
1152 if (ks_netstackid == GLOBAL_NETSTACKID) {
1153 ks = kstat_create_zone(ks_module, ks_instance, ks_name,
1154 ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID);
1155 if (ks != NULL)
1156 netstack_shared_kstat_add(ks);
1157 return (ks);
1158 } else {
1159 zoneid_t zoneid = ks_netstackid;
1161 return (kstat_create_zone(ks_module, ks_instance, ks_name,
1162 ks_class, ks_type, ks_ndata, ks_flags, zoneid));
1166 void
1167 kstat_delete_netstack(kstat_t *ks, netstackid_t ks_netstackid)
1169 if (ks_netstackid == GLOBAL_NETSTACKID) {
1170 netstack_shared_kstat_remove(ks);
1172 kstat_delete(ks);
1175 static void
1176 netstack_shared_zone_add(zoneid_t zoneid)
1178 struct shared_zone_list *sz;
1179 struct shared_kstat_list *sk;
1181 sz = kmem_zalloc(sizeof (*sz), KM_SLEEP);
1182 sz->sz_zoneid = zoneid;
1184 /* Insert in list */
1185 mutex_enter(&netstack_shared_lock);
1186 sz->sz_next = netstack_shared_zones;
1187 netstack_shared_zones = sz;
1190 * Perform kstat_zone_add for each existing shared stack kstat.
1191 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1193 for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1194 kstat_zone_add(sk->sk_kstat, zoneid);
1196 mutex_exit(&netstack_shared_lock);
1199 static void
1200 netstack_shared_zone_remove(zoneid_t zoneid)
1202 struct shared_zone_list **szp, *sz;
1203 struct shared_kstat_list *sk;
1205 /* Find in list */
1206 mutex_enter(&netstack_shared_lock);
1207 sz = NULL;
1208 for (szp = &netstack_shared_zones; *szp != NULL;
1209 szp = &((*szp)->sz_next)) {
1210 if ((*szp)->sz_zoneid == zoneid) {
1211 sz = *szp;
1212 break;
1215 /* We must find it */
1216 ASSERT(sz != NULL);
1217 *szp = sz->sz_next;
1218 sz->sz_next = NULL;
1221 * Perform kstat_zone_remove for each existing shared stack kstat.
1222 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1224 for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1225 kstat_zone_remove(sk->sk_kstat, zoneid);
1227 mutex_exit(&netstack_shared_lock);
1229 kmem_free(sz, sizeof (*sz));
1232 static void
1233 netstack_shared_kstat_add(kstat_t *ks)
1235 struct shared_zone_list *sz;
1236 struct shared_kstat_list *sk;
1238 sk = kmem_zalloc(sizeof (*sk), KM_SLEEP);
1239 sk->sk_kstat = ks;
1241 /* Insert in list */
1242 mutex_enter(&netstack_shared_lock);
1243 sk->sk_next = netstack_shared_kstats;
1244 netstack_shared_kstats = sk;
1247 * Perform kstat_zone_add for each existing shared stack zone.
1248 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1250 for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1251 kstat_zone_add(ks, sz->sz_zoneid);
1253 mutex_exit(&netstack_shared_lock);
1256 static void
1257 netstack_shared_kstat_remove(kstat_t *ks)
1259 struct shared_zone_list *sz;
1260 struct shared_kstat_list **skp, *sk;
1262 /* Find in list */
1263 mutex_enter(&netstack_shared_lock);
1264 sk = NULL;
1265 for (skp = &netstack_shared_kstats; *skp != NULL;
1266 skp = &((*skp)->sk_next)) {
1267 if ((*skp)->sk_kstat == ks) {
1268 sk = *skp;
1269 break;
1272 /* Must find it */
1273 ASSERT(sk != NULL);
1274 *skp = sk->sk_next;
1275 sk->sk_next = NULL;
1278 * Perform kstat_zone_remove for each existing shared stack kstat.
1279 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1281 for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1282 kstat_zone_remove(ks, sz->sz_zoneid);
1284 mutex_exit(&netstack_shared_lock);
1285 kmem_free(sk, sizeof (*sk));
1289 * If a zoneid is part of the shared zone, return true
1291 static boolean_t
1292 netstack_find_shared_zoneid(zoneid_t zoneid)
1294 struct shared_zone_list *sz;
1296 mutex_enter(&netstack_shared_lock);
1297 for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1298 if (sz->sz_zoneid == zoneid) {
1299 mutex_exit(&netstack_shared_lock);
1300 return (B_TRUE);
1303 mutex_exit(&netstack_shared_lock);
1304 return (B_FALSE);
1308 * Hide the fact that zoneids and netstackids are allocated from
1309 * the same space in the current implementation.
1310 * We currently do not check that the stackid/zoneids are valid, since there
1311 * is no need for that. But this should only be done for ids that are
1312 * valid.
1314 zoneid_t
1315 netstackid_to_zoneid(netstackid_t stackid)
1317 return (stackid);
1320 netstackid_t
1321 zoneid_to_netstackid(zoneid_t zoneid)
1323 if (netstack_find_shared_zoneid(zoneid))
1324 return (GLOBAL_ZONEID);
1325 else
1326 return (zoneid);
1329 zoneid_t
1330 netstack_get_zoneid(netstack_t *ns)
1332 return (netstackid_to_zoneid(ns->netstack_stackid));
1336 * Simplistic support for walking all the handles.
1337 * Example usage:
1338 * netstack_handle_t nh;
1339 * netstack_t *ns;
1341 * netstack_next_init(&nh);
1342 * while ((ns = netstack_next(&nh)) != NULL) {
1343 * do something;
1344 * netstack_rele(ns);
1346 * netstack_next_fini(&nh);
1348 void
1349 netstack_next_init(netstack_handle_t *handle)
1351 *handle = 0;
1354 /* ARGSUSED */
1355 void
1356 netstack_next_fini(netstack_handle_t *handle)
1360 netstack_t *
1361 netstack_next(netstack_handle_t *handle)
1363 netstack_t *ns;
1364 int i, end;
1366 end = *handle;
1367 /* Walk skipping *handle number of instances */
1369 /* Look if there is a matching stack instance */
1370 mutex_enter(&netstack_g_lock);
1371 ns = netstack_head;
1372 for (i = 0; i < end; i++) {
1373 if (ns == NULL)
1374 break;
1375 ns = ns->netstack_next;
1377 /* skip those with that aren't really here */
1378 while (ns != NULL) {
1379 mutex_enter(&ns->netstack_lock);
1380 if ((ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) == 0) {
1381 mutex_exit(&ns->netstack_lock);
1382 break;
1384 mutex_exit(&ns->netstack_lock);
1385 end++;
1386 ns = ns->netstack_next;
1388 if (ns != NULL) {
1389 *handle = end + 1;
1390 netstack_hold(ns);
1392 mutex_exit(&netstack_g_lock);
1393 return (ns);