Unleashed v1.4
[unleashed.git] / usr / src / uts / common / io / bridge.c
blob28f16175ee21e372b3095c29055c1c20d63bae88
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright (c) 2016 by Delphix. All rights reserved.
29 * This module implements a STREAMS driver that provides layer-two (Ethernet)
30 * bridging functionality. The STREAMS interface is used to provide
31 * observability (snoop/wireshark) and control, but not for interface plumbing.
34 #include <sys/types.h>
35 #include <sys/bitmap.h>
36 #include <sys/cmn_err.h>
37 #include <sys/conf.h>
38 #include <sys/ddi.h>
39 #include <sys/errno.h>
40 #include <sys/kstat.h>
41 #include <sys/modctl.h>
42 #include <sys/note.h>
43 #include <sys/param.h>
44 #include <sys/policy.h>
45 #include <sys/sdt.h>
46 #include <sys/stat.h>
47 #include <sys/stream.h>
48 #include <sys/stropts.h>
49 #include <sys/strsun.h>
50 #include <sys/sunddi.h>
51 #include <sys/sysmacros.h>
52 #include <sys/systm.h>
53 #include <sys/time.h>
54 #include <sys/dlpi.h>
55 #include <sys/dls.h>
56 #include <sys/mac_ether.h>
57 #include <sys/mac_provider.h>
58 #include <sys/mac_client_priv.h>
59 #include <sys/mac_impl.h>
60 #include <sys/vlan.h>
61 #include <net/bridge.h>
62 #include <net/bridge_impl.h>
63 #include <net/trill.h>
64 #include <sys/dld_ioc.h>
67 * Locks and reference counts: object lifetime and design.
69 * bridge_mac_t
70 * Bridge mac (snoop) instances are in bmac_list, which is protected by
71 * bmac_rwlock. They're allocated by bmac_alloc and freed by bridge_timer().
72 * Every bridge_inst_t has a single bridge_mac_t, but when bridge_inst_t goes
73 * away, the bridge_mac_t remains until either all of the users go away
74 * (detected by a timer) or until the instance is picked up again by the same
75 * bridge starting back up.
77 * bridge_inst_t
78 * Bridge instances are in inst_list, which is protected by inst_lock.
79 * They're allocated by inst_alloc() and freed by inst_free(). After
80 * allocation, an instance is placed in inst_list, and the reference count is
81 * incremented to represent this. That reference is decremented when the
82 * BIF_SHUTDOWN flag is set, and no new increments may occur. When the last
83 * reference is freed, the instance is removed from the list.
85 * Bridge instances have lists of links and an AVL tree of forwarding
86 * entries. Each of these structures holds one reference on the bridge
87 * instance. These lists and tree are protected by bi_rwlock.
89 * bridge_stream_t
90 * Bridge streams are allocated by stream_alloc() and freed by stream_free().
91 * These streams are created when "bridged" opens /dev/bridgectl, and are
92 * used to create new bridge instances (via BRIOC_NEWBRIDGE) and control the
93 * links on the bridge. When a stream closes, the bridge instance created is
94 * destroyed. There's at most one bridge instance for a given control
95 * stream.
97 * bridge_link_t
98 * Links are allocated by bridge_add_link() and freed by link_free(). The
99 * bi_links list holds a reference to the link. When the BLF_DELETED flag is
100 * set, that reference is dropped. The link isn't removed from the list
101 * until the last reference drops. Each forwarding entry that uses a given
102 * link holds a reference, as does each thread transmitting a packet via the
103 * link. The MAC layer calls in via bridge_ref_cb() to hold a reference on
104 * a link when transmitting.
106 * It's important that once BLF_DELETED is set, there's no way for the
107 * reference count to increase again. If it can, then the link may be
108 * double-freed. The BLF_FREED flag is intended for use with assertions to
109 * guard against this in testing.
111 * bridge_fwd_t
112 * Bridge forwarding entries are allocated by bridge_recv_cb() and freed by
113 * fwd_free(). The bi_fwd AVL tree holds one reference to the entry. Unlike
114 * other data structures, the reference is dropped when the entry is removed
115 * from the tree by fwd_delete(), and the BFF_INTREE flag is removed. Each
116 * thread that's forwarding a packet to a known destination holds a reference
117 * to a forwarding entry.
119 * TRILL notes:
121 * The TRILL module does all of its I/O through bridging. It uses references
122 * on the bridge_inst_t and bridge_link_t structures, and has seven entry
123 * points and four callbacks. One entry point is for setting the callbacks
124 * (bridge_trill_register_cb). There are four entry points for taking bridge
125 * and link references (bridge_trill_{br,ln}{ref,unref}). The final two
126 * entry points are for decapsulated packets from TRILL (bridge_trill_decaps)
127 * that need to be bridged locally, and for TRILL-encapsulated output packets
128 * (bridge_trill_output).
130 * The four callbacks comprise two notification functions for bridges and
131 * links being deleted, one function for raw received TRILL packets, and one
132 * for bridge output to non-local TRILL destinations (tunnel entry).
136 * Ethernet reserved multicast addresses for TRILL; used also in TRILL module.
138 const uint8_t all_isis_rbridges[] = ALL_ISIS_RBRIDGES;
139 static const uint8_t all_esadi_rbridges[] = ALL_ESADI_RBRIDGES;
140 const uint8_t bridge_group_address[] = BRIDGE_GROUP_ADDRESS;
142 static const char *inst_kstats_list[] = { KSINST_NAMES };
143 static const char *link_kstats_list[] = { KSLINK_NAMES };
145 #define KREF(p, m, vn) p->m.vn.value.ui64
146 #define KINCR(p, m, vn) ++KREF(p, m, vn)
147 #define KDECR(p, m, vn) --KREF(p, m, vn)
149 #define KIPINCR(p, vn) KINCR(p, bi_kstats, vn)
150 #define KIPDECR(p, vn) KDECR(p, bi_kstats, vn)
151 #define KLPINCR(p, vn) KINCR(p, bl_kstats, vn)
153 #define KIINCR(vn) KIPINCR(bip, vn)
154 #define KIDECR(vn) KIPDECR(bip, vn)
155 #define KLINCR(vn) KLPINCR(blp, vn)
157 #define Dim(x) (sizeof (x) / sizeof (*(x)))
159 /* Amount of overhead added when encapsulating with VLAN headers */
160 #define VLAN_INCR (sizeof (struct ether_vlan_header) - \
161 sizeof (struct ether_header))
163 static dev_info_t *bridge_dev_info;
164 static major_t bridge_major;
165 static ddi_taskq_t *bridge_taskq;
168 * These are the bridge instance management data structures. The mutex lock
169 * protects the list of bridge instances. A reference count is then used on
170 * each instance to determine when to free it. We use mac_minor_hold() to
171 * allocate minor_t values, which are used both for self-cloning /dev/net/
172 * device nodes as well as client streams. Minor node 0 is reserved for the
173 * allocation control node.
175 static list_t inst_list;
176 static kcondvar_t inst_cv; /* Allows us to wait for shutdown */
177 static kmutex_t inst_lock;
179 static krwlock_t bmac_rwlock;
180 static list_t bmac_list;
182 /* Wait for taskq entries that use STREAMS */
183 static kcondvar_t stream_ref_cv;
184 static kmutex_t stream_ref_lock;
186 static timeout_id_t bridge_timerid;
187 static clock_t bridge_scan_interval;
188 static clock_t bridge_fwd_age;
190 static bridge_inst_t *bridge_find_name(const char *);
191 static void bridge_timer(void *);
192 static void bridge_unref(bridge_inst_t *);
194 static const uint8_t zero_addr[ETHERADDRL] = { 0 };
196 /* Global TRILL linkage */
197 static trill_recv_pkt_t trill_recv_fn;
198 static trill_encap_pkt_t trill_encap_fn;
199 static trill_br_dstr_t trill_brdstr_fn;
200 static trill_ln_dstr_t trill_lndstr_fn;
202 /* special settings to accommodate DLD flow control; see dld_str.c */
203 static struct module_info bridge_dld_modinfo = {
204 0, /* mi_idnum */
205 BRIDGE_DEV_NAME, /* mi_idname */
206 0, /* mi_minpsz */
207 INFPSZ, /* mi_maxpsz */
208 1, /* mi_hiwat */
209 0 /* mi_lowat */
212 static struct qinit bridge_dld_rinit = {
213 NULL, /* qi_putp */
214 NULL, /* qi_srvp */
215 dld_open, /* qi_qopen */
216 dld_close, /* qi_qclose */
217 NULL, /* qi_qadmin */
218 &bridge_dld_modinfo, /* qi_minfo */
219 NULL /* qi_mstat */
222 static struct qinit bridge_dld_winit = {
223 (int (*)())dld_wput, /* qi_putp */
224 (int (*)())dld_wsrv, /* qi_srvp */
225 NULL, /* qi_qopen */
226 NULL, /* qi_qclose */
227 NULL, /* qi_qadmin */
228 &bridge_dld_modinfo, /* qi_minfo */
229 NULL /* qi_mstat */
232 static int bridge_ioc_listfwd(void *, intptr_t, int, cred_t *, int *);
234 /* GLDv3 control ioctls used by Bridging */
235 static dld_ioc_info_t bridge_ioc_list[] = {
236 {BRIDGE_IOC_LISTFWD, DLDCOPYINOUT, sizeof (bridge_listfwd_t),
237 bridge_ioc_listfwd, NULL},
241 * Given a bridge mac pointer, get a ref-held pointer to the corresponding
242 * bridge instance, if any. We must hold the global bmac_rwlock so that
243 * bm_inst doesn't slide out from under us.
245 static bridge_inst_t *
246 mac_to_inst(const bridge_mac_t *bmp)
248 bridge_inst_t *bip;
250 rw_enter(&bmac_rwlock, RW_READER);
251 if ((bip = bmp->bm_inst) != NULL)
252 atomic_inc_uint(&bip->bi_refs);
253 rw_exit(&bmac_rwlock);
254 return (bip);
257 static void
258 link_sdu_fail(bridge_link_t *blp, boolean_t failed, mblk_t **mlist)
260 mblk_t *mp;
261 bridge_ctl_t *bcp;
262 bridge_link_t *blcmp;
263 bridge_inst_t *bip;
264 bridge_mac_t *bmp;
266 if (failed) {
267 if (blp->bl_flags & BLF_SDUFAIL)
268 return;
269 blp->bl_flags |= BLF_SDUFAIL;
270 } else {
271 if (!(blp->bl_flags & BLF_SDUFAIL))
272 return;
273 blp->bl_flags &= ~BLF_SDUFAIL;
277 * If this link is otherwise up, then check if there are any other
278 * non-failed non-down links. If not, then we control the state of the
279 * whole bridge.
281 bip = blp->bl_inst;
282 bmp = bip->bi_mac;
283 if (blp->bl_linkstate != LINK_STATE_DOWN) {
284 for (blcmp = list_head(&bip->bi_links); blcmp != NULL;
285 blcmp = list_next(&bip->bi_links, blcmp)) {
286 if (blp != blcmp &&
287 !(blcmp->bl_flags & (BLF_DELETED|BLF_SDUFAIL)) &&
288 blcmp->bl_linkstate != LINK_STATE_DOWN)
289 break;
291 if (blcmp == NULL) {
292 bmp->bm_linkstate = failed ? LINK_STATE_DOWN :
293 LINK_STATE_UP;
294 mac_link_redo(bmp->bm_mh, bmp->bm_linkstate);
299 * If we're becoming failed, then the link's current true state needs
300 * to be reflected upwards to this link's clients. If we're becoming
301 * unfailed, then we get the state of the bridge instead on all
302 * clients.
304 if (failed) {
305 if (bmp->bm_linkstate != blp->bl_linkstate)
306 mac_link_redo(blp->bl_mh, blp->bl_linkstate);
307 } else {
308 mac_link_redo(blp->bl_mh, bmp->bm_linkstate);
311 /* get the current mblk we're going to send up */
312 if ((mp = blp->bl_lfailmp) == NULL &&
313 (mp = allocb(sizeof (bridge_ctl_t), BPRI_MED)) == NULL)
314 return;
316 /* get a new one for next time */
317 blp->bl_lfailmp = allocb(sizeof (bridge_ctl_t), BPRI_MED);
319 /* if none for next time, then report only failures */
320 if (blp->bl_lfailmp == NULL && !failed) {
321 blp->bl_lfailmp = mp;
322 return;
325 /* LINTED: alignment */
326 bcp = (bridge_ctl_t *)mp->b_rptr;
327 bcp->bc_linkid = blp->bl_linkid;
328 bcp->bc_failed = failed;
329 mp->b_wptr = (uchar_t *)(bcp + 1);
330 mp->b_next = *mlist;
331 *mlist = mp;
335 * Send control messages (link SDU changes) using the stream to the
336 * bridge instance daemon.
338 static void
339 send_up_messages(bridge_inst_t *bip, mblk_t *mp)
341 mblk_t *mnext;
342 queue_t *rq;
344 rq = bip->bi_control->bs_wq;
345 rq = OTHERQ(rq);
346 while (mp != NULL) {
347 mnext = mp->b_next;
348 mp->b_next = NULL;
349 putnext(rq, mp);
350 mp = mnext;
354 /* ARGSUSED */
355 static int
356 bridge_m_getstat(void *arg, uint_t stat, uint64_t *val)
358 return (ENOTSUP);
361 static int
362 bridge_m_start(void *arg)
364 bridge_mac_t *bmp = arg;
366 bmp->bm_flags |= BMF_STARTED;
367 return (0);
370 static void
371 bridge_m_stop(void *arg)
373 bridge_mac_t *bmp = arg;
375 bmp->bm_flags &= ~BMF_STARTED;
378 /* ARGSUSED */
379 static int
380 bridge_m_setpromisc(void *arg, boolean_t on)
382 return (0);
385 /* ARGSUSED */
386 static int
387 bridge_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
389 return (0);
392 /* ARGSUSED */
393 static int
394 bridge_m_unicst(void *arg, const uint8_t *macaddr)
396 return (ENOTSUP);
399 static mblk_t *
400 bridge_m_tx(void *arg, mblk_t *mp)
402 _NOTE(ARGUNUSED(arg));
403 freemsgchain(mp);
404 return (NULL);
407 /* ARGSUSED */
408 static int
409 bridge_ioc_listfwd(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp)
411 bridge_listfwd_t *blf = karg;
412 bridge_inst_t *bip;
413 bridge_fwd_t *bfp, match;
414 avl_index_t where;
416 bip = bridge_find_name(blf->blf_name);
417 if (bip == NULL)
418 return (ENOENT);
420 bcopy(blf->blf_dest, match.bf_dest, ETHERADDRL);
421 match.bf_flags |= BFF_VLANLOCAL;
422 rw_enter(&bip->bi_rwlock, RW_READER);
423 if ((bfp = avl_find(&bip->bi_fwd, &match, &where)) == NULL)
424 bfp = avl_nearest(&bip->bi_fwd, where, AVL_AFTER);
425 else
426 bfp = AVL_NEXT(&bip->bi_fwd, bfp);
427 if (bfp == NULL) {
428 bzero(blf, sizeof (*blf));
429 } else {
430 bcopy(bfp->bf_dest, blf->blf_dest, ETHERADDRL);
431 blf->blf_trill_nick = bfp->bf_trill_nick;
432 blf->blf_ms_age =
433 drv_hztousec(ddi_get_lbolt() - bfp->bf_lastheard) / 1000;
434 blf->blf_is_local =
435 (bfp->bf_flags & BFF_LOCALADDR) != 0;
436 blf->blf_linkid = bfp->bf_links[0]->bl_linkid;
438 rw_exit(&bip->bi_rwlock);
439 bridge_unref(bip);
440 return (0);
443 static int
444 bridge_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
445 uint_t pr_valsize, const void *pr_val)
447 bridge_mac_t *bmp = arg;
448 bridge_inst_t *bip;
449 bridge_link_t *blp;
450 int err;
451 uint_t maxsdu;
452 mblk_t *mlist;
454 _NOTE(ARGUNUSED(pr_name));
455 switch (pr_num) {
456 case MAC_PROP_MTU:
457 if (pr_valsize < sizeof (bmp->bm_maxsdu)) {
458 err = EINVAL;
459 break;
461 (void) bcopy(pr_val, &maxsdu, sizeof (maxsdu));
462 if (maxsdu == bmp->bm_maxsdu) {
463 err = 0;
464 } else if ((bip = mac_to_inst(bmp)) == NULL) {
465 err = ENXIO;
466 } else {
467 rw_enter(&bip->bi_rwlock, RW_WRITER);
468 mlist = NULL;
469 for (blp = list_head(&bip->bi_links); blp != NULL;
470 blp = list_next(&bip->bi_links, blp)) {
471 if (blp->bl_flags & BLF_DELETED)
472 continue;
473 if (blp->bl_maxsdu == maxsdu)
474 link_sdu_fail(blp, B_FALSE, &mlist);
475 else if (blp->bl_maxsdu == bmp->bm_maxsdu)
476 link_sdu_fail(blp, B_TRUE, &mlist);
478 rw_exit(&bip->bi_rwlock);
479 bmp->bm_maxsdu = maxsdu;
480 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu);
481 send_up_messages(bip, mlist);
482 bridge_unref(bip);
483 err = 0;
485 break;
487 default:
488 err = ENOTSUP;
489 break;
491 return (err);
494 static int
495 bridge_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
496 uint_t pr_valsize, void *pr_val)
498 bridge_mac_t *bmp = arg;
499 int err = 0;
501 _NOTE(ARGUNUSED(pr_name));
502 switch (pr_num) {
503 case MAC_PROP_STATUS:
504 ASSERT(pr_valsize >= sizeof (bmp->bm_linkstate));
505 bcopy(&bmp->bm_linkstate, pr_val, sizeof (&bmp->bm_linkstate));
506 break;
508 default:
509 err = ENOTSUP;
510 break;
512 return (err);
515 static void
516 bridge_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
517 mac_prop_info_handle_t prh)
519 bridge_mac_t *bmp = arg;
521 _NOTE(ARGUNUSED(pr_name));
523 switch (pr_num) {
524 case MAC_PROP_MTU:
525 mac_prop_info_set_range_uint32(prh, bmp->bm_maxsdu,
526 bmp->bm_maxsdu);
527 break;
528 case MAC_PROP_STATUS:
529 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ);
530 break;
534 static mac_callbacks_t bridge_m_callbacks = {
535 MC_SETPROP | MC_GETPROP | MC_PROPINFO,
536 bridge_m_getstat,
537 bridge_m_start,
538 bridge_m_stop,
539 bridge_m_setpromisc,
540 bridge_m_multicst,
541 bridge_m_unicst,
542 bridge_m_tx,
543 NULL, /* reserved */
544 NULL, /* ioctl */
545 NULL, /* getcapab */
546 NULL, /* open */
547 NULL, /* close */
548 bridge_m_setprop,
549 bridge_m_getprop,
550 bridge_m_propinfo
554 * Create kstats from a list.
556 static kstat_t *
557 kstat_setup(kstat_named_t *knt, const char **names, int nstat,
558 const char *unitname)
560 kstat_t *ksp;
561 int i;
563 for (i = 0; i < nstat; i++)
564 kstat_named_init(&knt[i], names[i], KSTAT_DATA_UINT64);
566 ksp = kstat_create_zone(BRIDGE_DEV_NAME, 0, unitname, "net",
567 KSTAT_TYPE_NAMED, nstat, KSTAT_FLAG_VIRTUAL, GLOBAL_ZONEID);
568 if (ksp != NULL) {
569 ksp->ks_data = knt;
570 kstat_install(ksp);
572 return (ksp);
576 * Find an existing bridge_mac_t structure or allocate a new one for the given
577 * bridge instance. This creates the mac driver instance that snoop can use.
579 static int
580 bmac_alloc(bridge_inst_t *bip, bridge_mac_t **bmacp)
582 bridge_mac_t *bmp, *bnew;
583 mac_register_t *mac;
584 int err;
586 *bmacp = NULL;
587 if ((mac = mac_alloc(MAC_VERSION)) == NULL)
588 return (EINVAL);
590 bnew = kmem_zalloc(sizeof (*bnew), KM_SLEEP);
592 rw_enter(&bmac_rwlock, RW_WRITER);
593 for (bmp = list_head(&bmac_list); bmp != NULL;
594 bmp = list_next(&bmac_list, bmp)) {
595 if (strcmp(bip->bi_name, bmp->bm_name) == 0) {
596 ASSERT(bmp->bm_inst == NULL);
597 bmp->bm_inst = bip;
598 rw_exit(&bmac_rwlock);
599 kmem_free(bnew, sizeof (*bnew));
600 mac_free(mac);
601 *bmacp = bmp;
602 return (0);
606 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
607 mac->m_driver = bnew;
608 mac->m_dip = bridge_dev_info;
609 mac->m_instance = (uint_t)-1;
610 mac->m_src_addr = (uint8_t *)zero_addr;
611 mac->m_callbacks = &bridge_m_callbacks;
614 * Note that the SDU limits are irrelevant, as nobody transmits on the
615 * bridge node itself. It's mainly for monitoring but we allow
616 * setting the bridge MTU for quick transition of all links part of the
617 * bridge to a new MTU.
619 mac->m_min_sdu = 1;
620 mac->m_max_sdu = 1500;
621 err = mac_register(mac, &bnew->bm_mh);
622 mac_free(mac);
623 if (err != 0) {
624 rw_exit(&bmac_rwlock);
625 kmem_free(bnew, sizeof (*bnew));
626 return (err);
629 bnew->bm_inst = bip;
630 (void) strcpy(bnew->bm_name, bip->bi_name);
631 if (list_is_empty(&bmac_list)) {
632 bridge_timerid = timeout(bridge_timer, NULL,
633 bridge_scan_interval);
635 list_insert_tail(&bmac_list, bnew);
636 rw_exit(&bmac_rwlock);
639 * Mark the MAC as unable to go "active" so that only passive clients
640 * (such as snoop) can bind to it.
642 mac_no_active(bnew->bm_mh);
643 *bmacp = bnew;
644 return (0);
648 * Disconnect the given bridge_mac_t from its bridge instance. The bridge
649 * instance is going away. The mac instance can't go away until the clients
650 * are gone (see bridge_timer).
652 static void
653 bmac_disconnect(bridge_mac_t *bmp)
655 bridge_inst_t *bip;
657 bmp->bm_linkstate = LINK_STATE_DOWN;
658 mac_link_redo(bmp->bm_mh, LINK_STATE_DOWN);
660 rw_enter(&bmac_rwlock, RW_READER);
661 bip = bmp->bm_inst;
662 bip->bi_mac = NULL;
663 bmp->bm_inst = NULL;
664 rw_exit(&bmac_rwlock);
667 /* This is used by the avl trees to sort forwarding table entries */
668 static int
669 fwd_compare(const void *addr1, const void *addr2)
671 const bridge_fwd_t *fwd1 = addr1;
672 const bridge_fwd_t *fwd2 = addr2;
673 int diff = memcmp(fwd1->bf_dest, fwd2->bf_dest, ETHERADDRL);
675 if (diff != 0)
676 return (diff > 0 ? 1 : -1);
678 if ((fwd1->bf_flags ^ fwd2->bf_flags) & BFF_VLANLOCAL) {
679 if (fwd1->bf_vlanid > fwd2->bf_vlanid)
680 return (1);
681 else if (fwd1->bf_vlanid < fwd2->bf_vlanid)
682 return (-1);
684 return (0);
687 static void
688 inst_free(bridge_inst_t *bip)
690 ASSERT(bip->bi_mac == NULL);
691 rw_destroy(&bip->bi_rwlock);
692 list_destroy(&bip->bi_links);
693 cv_destroy(&bip->bi_linkwait);
694 avl_destroy(&bip->bi_fwd);
695 if (bip->bi_ksp != NULL)
696 kstat_delete(bip->bi_ksp);
697 kmem_free(bip, sizeof (*bip));
700 static bridge_inst_t *
701 inst_alloc(const char *bridge)
703 bridge_inst_t *bip;
705 bip = kmem_zalloc(sizeof (*bip), KM_SLEEP);
706 bip->bi_refs = 1;
707 (void) strcpy(bip->bi_name, bridge);
708 rw_init(&bip->bi_rwlock, NULL, RW_DRIVER, NULL);
709 list_create(&bip->bi_links, sizeof (bridge_link_t),
710 offsetof(bridge_link_t, bl_node));
711 cv_init(&bip->bi_linkwait, NULL, CV_DRIVER, NULL);
712 avl_create(&bip->bi_fwd, fwd_compare, sizeof (bridge_fwd_t),
713 offsetof(bridge_fwd_t, bf_node));
714 return (bip);
717 static bridge_inst_t *
718 bridge_find_name(const char *bridge)
720 bridge_inst_t *bip;
722 mutex_enter(&inst_lock);
723 for (bip = list_head(&inst_list); bip != NULL;
724 bip = list_next(&inst_list, bip)) {
725 if (!(bip->bi_flags & BIF_SHUTDOWN) &&
726 strcmp(bridge, bip->bi_name) == 0) {
727 atomic_inc_uint(&bip->bi_refs);
728 break;
731 mutex_exit(&inst_lock);
733 return (bip);
736 static int
737 bridge_create(datalink_id_t linkid, const char *bridge, bridge_inst_t **bipc,
738 cred_t *cred)
740 bridge_inst_t *bip, *bipnew;
741 bridge_mac_t *bmp = NULL;
742 int err;
744 *bipc = NULL;
745 bipnew = inst_alloc(bridge);
747 mutex_enter(&inst_lock);
748 lookup_retry:
749 for (bip = list_head(&inst_list); bip != NULL;
750 bip = list_next(&inst_list, bip)) {
751 if (strcmp(bridge, bip->bi_name) == 0)
752 break;
755 /* This should not take long; if it does, we've got a design problem */
756 if (bip != NULL && (bip->bi_flags & BIF_SHUTDOWN)) {
757 cv_wait(&inst_cv, &inst_lock);
758 goto lookup_retry;
761 if (bip == NULL) {
762 bip = bipnew;
763 bipnew = NULL;
764 list_insert_tail(&inst_list, bip);
767 mutex_exit(&inst_lock);
768 if (bipnew != NULL) {
769 inst_free(bipnew);
770 return (EEXIST);
773 bip->bi_ksp = kstat_setup((kstat_named_t *)&bip->bi_kstats,
774 inst_kstats_list, Dim(inst_kstats_list), bip->bi_name);
776 err = bmac_alloc(bip, &bmp);
777 if ((bip->bi_mac = bmp) == NULL)
778 goto fail_create;
781 * bm_inst is set, so the timer cannot yank the DLS rug from under us.
782 * No extra locking is needed here.
784 if (!(bmp->bm_flags & BMF_DLS)) {
785 err = dls_devnet_create(bmp->bm_mh, linkid, crgetzoneid(cred));
786 if (err != 0)
787 goto fail_create;
788 bmp->bm_flags |= BMF_DLS;
791 bip->bi_dev = makedevice(bridge_major, mac_minor(bmp->bm_mh));
792 *bipc = bip;
793 return (0);
795 fail_create:
796 ASSERT(bip->bi_trilldata == NULL);
797 bip->bi_flags |= BIF_SHUTDOWN;
798 bridge_unref(bip);
799 return (err);
802 static void
803 bridge_unref(bridge_inst_t *bip)
805 if (atomic_dec_uint_nv(&bip->bi_refs) == 0) {
806 ASSERT(bip->bi_flags & BIF_SHUTDOWN);
807 /* free up mac for reuse before leaving global list */
808 if (bip->bi_mac != NULL)
809 bmac_disconnect(bip->bi_mac);
810 mutex_enter(&inst_lock);
811 list_remove(&inst_list, bip);
812 cv_broadcast(&inst_cv);
813 mutex_exit(&inst_lock);
814 inst_free(bip);
819 * Stream instances are used only for allocating bridges and serving as a
820 * control node. They serve no data-handling function.
822 static bridge_stream_t *
823 stream_alloc(void)
825 bridge_stream_t *bsp;
826 minor_t mn;
828 if ((mn = mac_minor_hold(B_FALSE)) == 0)
829 return (NULL);
830 bsp = kmem_zalloc(sizeof (*bsp), KM_SLEEP);
831 bsp->bs_minor = mn;
832 return (bsp);
835 static void
836 stream_free(bridge_stream_t *bsp)
838 mac_minor_rele(bsp->bs_minor);
839 kmem_free(bsp, sizeof (*bsp));
842 /* Reference hold/release functions for STREAMS-related taskq */
843 static void
844 stream_ref(bridge_stream_t *bsp)
846 mutex_enter(&stream_ref_lock);
847 bsp->bs_taskq_cnt++;
848 mutex_exit(&stream_ref_lock);
851 static void
852 stream_unref(bridge_stream_t *bsp)
854 mutex_enter(&stream_ref_lock);
855 if (--bsp->bs_taskq_cnt == 0)
856 cv_broadcast(&stream_ref_cv);
857 mutex_exit(&stream_ref_lock);
860 static void
861 link_free(bridge_link_t *blp)
863 bridge_inst_t *bip = blp->bl_inst;
865 ASSERT(!(blp->bl_flags & BLF_FREED));
866 blp->bl_flags |= BLF_FREED;
867 if (blp->bl_ksp != NULL)
868 kstat_delete(blp->bl_ksp);
869 if (blp->bl_lfailmp != NULL)
870 freeb(blp->bl_lfailmp);
871 cv_destroy(&blp->bl_trillwait);
872 mutex_destroy(&blp->bl_trilllock);
873 kmem_free(blp, sizeof (*blp));
874 /* Don't unreference the bridge until the MAC is closed */
875 bridge_unref(bip);
878 static void
879 link_unref(bridge_link_t *blp)
881 if (atomic_dec_uint_nv(&blp->bl_refs) == 0) {
882 bridge_inst_t *bip = blp->bl_inst;
884 ASSERT(blp->bl_flags & BLF_DELETED);
885 rw_enter(&bip->bi_rwlock, RW_WRITER);
886 if (blp->bl_flags & BLF_LINK_ADDED)
887 list_remove(&bip->bi_links, blp);
888 rw_exit(&bip->bi_rwlock);
889 if (bip->bi_trilldata != NULL && list_is_empty(&bip->bi_links))
890 cv_broadcast(&bip->bi_linkwait);
891 link_free(blp);
895 static bridge_fwd_t *
896 fwd_alloc(const uint8_t *addr, uint_t nlinks, uint16_t nick)
898 bridge_fwd_t *bfp;
900 bfp = kmem_zalloc(sizeof (*bfp) + (nlinks * sizeof (bridge_link_t *)),
901 KM_NOSLEEP);
902 if (bfp != NULL) {
903 bcopy(addr, bfp->bf_dest, ETHERADDRL);
904 bfp->bf_lastheard = ddi_get_lbolt();
905 bfp->bf_maxlinks = nlinks;
906 bfp->bf_links = (bridge_link_t **)(bfp + 1);
907 bfp->bf_trill_nick = nick;
909 return (bfp);
912 static bridge_fwd_t *
913 fwd_find(bridge_inst_t *bip, const uint8_t *addr, uint16_t vlanid)
915 bridge_fwd_t *bfp, *vbfp;
916 bridge_fwd_t match;
918 bcopy(addr, match.bf_dest, ETHERADDRL);
919 match.bf_flags = 0;
920 rw_enter(&bip->bi_rwlock, RW_READER);
921 if ((bfp = avl_find(&bip->bi_fwd, &match, NULL)) != NULL) {
922 if (bfp->bf_vlanid != vlanid && bfp->bf_vcnt > 0) {
923 match.bf_vlanid = vlanid;
924 match.bf_flags = BFF_VLANLOCAL;
925 vbfp = avl_find(&bip->bi_fwd, &match, NULL);
926 if (vbfp != NULL)
927 bfp = vbfp;
929 atomic_inc_uint(&bfp->bf_refs);
931 rw_exit(&bip->bi_rwlock);
932 return (bfp);
935 static void
936 fwd_free(bridge_fwd_t *bfp)
938 uint_t i;
939 bridge_inst_t *bip = bfp->bf_links[0]->bl_inst;
941 KIDECR(bki_count);
942 for (i = 0; i < bfp->bf_nlinks; i++)
943 link_unref(bfp->bf_links[i]);
944 kmem_free(bfp,
945 sizeof (*bfp) + bfp->bf_maxlinks * sizeof (bridge_link_t *));
948 static void
949 fwd_unref(bridge_fwd_t *bfp)
951 if (atomic_dec_uint_nv(&bfp->bf_refs) == 0) {
952 ASSERT(!(bfp->bf_flags & BFF_INTREE));
953 fwd_free(bfp);
957 static void
958 fwd_delete(bridge_fwd_t *bfp)
960 bridge_inst_t *bip;
961 bridge_fwd_t *bfpzero;
963 if (bfp->bf_flags & BFF_INTREE) {
964 ASSERT(bfp->bf_nlinks > 0);
965 bip = bfp->bf_links[0]->bl_inst;
966 rw_enter(&bip->bi_rwlock, RW_WRITER);
967 /* Another thread could beat us to this */
968 if (bfp->bf_flags & BFF_INTREE) {
969 avl_remove(&bip->bi_fwd, bfp);
970 bfp->bf_flags &= ~BFF_INTREE;
971 if (bfp->bf_flags & BFF_VLANLOCAL) {
972 bfp->bf_flags &= ~BFF_VLANLOCAL;
973 bfpzero = avl_find(&bip->bi_fwd, bfp, NULL);
974 if (bfpzero != NULL && bfpzero->bf_vcnt > 0)
975 bfpzero->bf_vcnt--;
977 rw_exit(&bip->bi_rwlock);
978 fwd_unref(bfp); /* no longer in avl tree */
979 } else {
980 rw_exit(&bip->bi_rwlock);
985 static boolean_t
986 fwd_insert(bridge_inst_t *bip, bridge_fwd_t *bfp)
988 avl_index_t idx;
989 boolean_t retv;
991 rw_enter(&bip->bi_rwlock, RW_WRITER);
992 if (!(bip->bi_flags & BIF_SHUTDOWN) &&
993 avl_numnodes(&bip->bi_fwd) < bip->bi_tablemax &&
994 avl_find(&bip->bi_fwd, bfp, &idx) == NULL) {
995 avl_insert(&bip->bi_fwd, bfp, idx);
996 bfp->bf_flags |= BFF_INTREE;
997 atomic_inc_uint(&bfp->bf_refs); /* avl entry */
998 retv = B_TRUE;
999 } else {
1000 retv = B_FALSE;
1002 rw_exit(&bip->bi_rwlock);
1003 return (retv);
1006 static void
1007 fwd_update_local(bridge_link_t *blp, const uint8_t *oldaddr,
1008 const uint8_t *newaddr)
1010 bridge_inst_t *bip = blp->bl_inst;
1011 bridge_fwd_t *bfp, *bfnew;
1012 bridge_fwd_t match;
1013 avl_index_t idx;
1014 boolean_t drop_ref = B_FALSE;
1016 if (bcmp(oldaddr, newaddr, ETHERADDRL) == 0)
1017 return;
1019 if (bcmp(oldaddr, zero_addr, ETHERADDRL) == 0)
1020 goto no_old_addr;
1023 * Find the previous entry, and remove our link from it.
1025 bcopy(oldaddr, match.bf_dest, ETHERADDRL);
1026 rw_enter(&bip->bi_rwlock, RW_WRITER);
1027 if ((bfp = avl_find(&bip->bi_fwd, &match, NULL)) != NULL) {
1028 int i;
1031 * See if we're in the list, and remove if so.
1033 for (i = 0; i < bfp->bf_nlinks; i++) {
1034 if (bfp->bf_links[i] == blp) {
1036 * We assume writes are atomic, so no special
1037 * MT handling is needed. The list length is
1038 * decremented first, and then we remove
1039 * entries.
1041 bfp->bf_nlinks--;
1042 for (; i < bfp->bf_nlinks; i++)
1043 bfp->bf_links[i] = bfp->bf_links[i + 1];
1044 drop_ref = B_TRUE;
1045 break;
1048 /* If no more links, then remove and free up */
1049 if (bfp->bf_nlinks == 0) {
1050 avl_remove(&bip->bi_fwd, bfp);
1051 bfp->bf_flags &= ~BFF_INTREE;
1052 } else {
1053 bfp = NULL;
1056 rw_exit(&bip->bi_rwlock);
1057 if (bfp != NULL)
1058 fwd_unref(bfp); /* no longer in avl tree */
1061 * Now get the new link address and add this link to the list. The
1062 * list should be of length 1 unless the user has configured multiple
1063 * NICs with the same address. (That's an incorrect configuration, but
1064 * we support it anyway.)
1066 no_old_addr:
1067 bfp = NULL;
1068 if ((bip->bi_flags & BIF_SHUTDOWN) ||
1069 bcmp(newaddr, zero_addr, ETHERADDRL) == 0)
1070 goto no_new_addr;
1072 bcopy(newaddr, match.bf_dest, ETHERADDRL);
1073 rw_enter(&bip->bi_rwlock, RW_WRITER);
1074 if ((bfp = avl_find(&bip->bi_fwd, &match, &idx)) == NULL) {
1075 bfnew = fwd_alloc(newaddr, 1, RBRIDGE_NICKNAME_NONE);
1076 if (bfnew != NULL)
1077 KIINCR(bki_count);
1078 } else if (bfp->bf_nlinks < bfp->bf_maxlinks) {
1079 /* special case: link fits in existing entry */
1080 bfnew = bfp;
1081 } else {
1082 bfnew = fwd_alloc(newaddr, bfp->bf_nlinks + 1,
1083 RBRIDGE_NICKNAME_NONE);
1084 if (bfnew != NULL) {
1085 KIINCR(bki_count);
1086 avl_remove(&bip->bi_fwd, bfp);
1087 bfp->bf_flags &= ~BFF_INTREE;
1088 bfnew->bf_nlinks = bfp->bf_nlinks;
1089 bcopy(bfp->bf_links, bfnew->bf_links,
1090 bfp->bf_nlinks * sizeof (bfp));
1091 /* reset the idx value due to removal above */
1092 (void) avl_find(&bip->bi_fwd, &match, &idx);
1096 if (bfnew != NULL) {
1097 bfnew->bf_links[bfnew->bf_nlinks++] = blp;
1098 if (drop_ref)
1099 drop_ref = B_FALSE;
1100 else
1101 atomic_inc_uint(&blp->bl_refs); /* bf_links entry */
1103 if (bfnew != bfp) {
1104 /* local addresses are not subject to table limits */
1105 avl_insert(&bip->bi_fwd, bfnew, idx);
1106 bfnew->bf_flags |= (BFF_INTREE | BFF_LOCALADDR);
1107 atomic_inc_uint(&bfnew->bf_refs); /* avl entry */
1110 rw_exit(&bip->bi_rwlock);
1112 no_new_addr:
1114 * If we found an existing entry and we replaced it with a new one,
1115 * then drop the table reference from the old one. We removed it from
1116 * the AVL tree above.
1118 if (bfnew != NULL && bfp != NULL && bfnew != bfp)
1119 fwd_unref(bfp);
1121 /* Account for removed entry. */
1122 if (drop_ref)
1123 link_unref(blp);
1126 static void
1127 bridge_new_unicst(bridge_link_t *blp)
1129 uint8_t new_mac[ETHERADDRL];
1131 mac_unicast_primary_get(blp->bl_mh, new_mac);
1132 fwd_update_local(blp, blp->bl_local_mac, new_mac);
1133 bcopy(new_mac, blp->bl_local_mac, ETHERADDRL);
1137 * We must shut down a link prior to freeing it, and doing that requires
1138 * blocking to wait for running MAC threads while holding a reference. This is
1139 * run from a taskq to accomplish proper link shutdown followed by reference
1140 * drop.
1142 static void
1143 link_shutdown(void *arg)
1145 bridge_link_t *blp = arg;
1146 mac_handle_t mh = blp->bl_mh;
1147 bridge_inst_t *bip;
1148 bridge_fwd_t *bfp, *bfnext;
1149 avl_tree_t fwd_scavenge;
1150 int i;
1153 * This link is being destroyed. Notify TRILL now that it's no longer
1154 * possible to send packets. Data packets may still arrive until TRILL
1155 * calls bridge_trill_lnunref.
1157 if (blp->bl_trilldata != NULL)
1158 trill_lndstr_fn(blp->bl_trilldata, blp);
1160 if (blp->bl_flags & BLF_PROM_ADDED)
1161 (void) mac_promisc_remove(blp->bl_mphp);
1163 if (blp->bl_flags & BLF_SET_BRIDGE)
1164 mac_bridge_clear(mh, (mac_handle_t)blp);
1166 if (blp->bl_flags & BLF_MARGIN_ADDED) {
1167 (void) mac_notify_remove(blp->bl_mnh, B_TRUE);
1168 (void) mac_margin_remove(mh, blp->bl_margin);
1171 /* Tell the clients the real link state when we leave */
1172 mac_link_redo(blp->bl_mh,
1173 mac_stat_get(blp->bl_mh, MAC_STAT_LOWLINK_STATE));
1175 /* Destroy all of the forwarding entries related to this link */
1176 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t),
1177 offsetof(bridge_fwd_t, bf_node));
1178 bip = blp->bl_inst;
1179 rw_enter(&bip->bi_rwlock, RW_WRITER);
1180 bfnext = avl_first(&bip->bi_fwd);
1181 while ((bfp = bfnext) != NULL) {
1182 bfnext = AVL_NEXT(&bip->bi_fwd, bfp);
1183 for (i = 0; i < bfp->bf_nlinks; i++) {
1184 if (bfp->bf_links[i] == blp)
1185 break;
1187 if (i >= bfp->bf_nlinks)
1188 continue;
1189 if (bfp->bf_nlinks > 1) {
1190 /* note that this can't be the last reference */
1191 link_unref(blp);
1192 bfp->bf_nlinks--;
1193 for (; i < bfp->bf_nlinks; i++)
1194 bfp->bf_links[i] = bfp->bf_links[i + 1];
1195 } else {
1196 ASSERT(bfp->bf_flags & BFF_INTREE);
1197 avl_remove(&bip->bi_fwd, bfp);
1198 bfp->bf_flags &= ~BFF_INTREE;
1199 avl_add(&fwd_scavenge, bfp);
1202 rw_exit(&bip->bi_rwlock);
1203 bfnext = avl_first(&fwd_scavenge);
1204 while ((bfp = bfnext) != NULL) {
1205 bfnext = AVL_NEXT(&fwd_scavenge, bfp);
1206 avl_remove(&fwd_scavenge, bfp);
1207 fwd_unref(bfp);
1209 avl_destroy(&fwd_scavenge);
1211 if (blp->bl_flags & BLF_CLIENT_OPEN)
1212 mac_client_close(blp->bl_mch, 0);
1214 mac_close(mh);
1217 * We are now completely removed from the active list, so drop the
1218 * reference (see bridge_add_link).
1220 link_unref(blp);
1223 static void
1224 shutdown_inst(bridge_inst_t *bip)
1226 bridge_link_t *blp, *blnext;
1227 bridge_fwd_t *bfp;
1229 mutex_enter(&inst_lock);
1230 if (bip->bi_flags & BIF_SHUTDOWN) {
1231 mutex_exit(&inst_lock);
1232 return;
1236 * Once on the inst_list, the bridge instance must not leave that list
1237 * without having the shutdown flag set first. When the shutdown flag
1238 * is set, we own the list reference, so we must drop it before
1239 * returning.
1241 bip->bi_flags |= BIF_SHUTDOWN;
1242 mutex_exit(&inst_lock);
1244 bip->bi_control = NULL;
1246 rw_enter(&bip->bi_rwlock, RW_READER);
1247 blnext = list_head(&bip->bi_links);
1248 while ((blp = blnext) != NULL) {
1249 blnext = list_next(&bip->bi_links, blp);
1250 if (!(blp->bl_flags & BLF_DELETED)) {
1251 blp->bl_flags |= BLF_DELETED;
1252 (void) ddi_taskq_dispatch(bridge_taskq, link_shutdown,
1253 blp, DDI_SLEEP);
1256 while ((bfp = avl_first(&bip->bi_fwd)) != NULL) {
1257 atomic_inc_uint(&bfp->bf_refs);
1258 rw_exit(&bip->bi_rwlock);
1259 fwd_delete(bfp);
1260 fwd_unref(bfp);
1261 rw_enter(&bip->bi_rwlock, RW_READER);
1263 rw_exit(&bip->bi_rwlock);
1266 * This bridge is being destroyed. Notify TRILL once all of the
1267 * links are all gone.
1269 mutex_enter(&inst_lock);
1270 while (bip->bi_trilldata != NULL && !list_is_empty(&bip->bi_links))
1271 cv_wait(&bip->bi_linkwait, &inst_lock);
1272 mutex_exit(&inst_lock);
1273 if (bip->bi_trilldata != NULL)
1274 trill_brdstr_fn(bip->bi_trilldata, bip);
1276 bridge_unref(bip);
1280 * This is called once by the TRILL module when it starts up. It just sets the
1281 * global TRILL callback function pointers -- data transmit/receive and bridge
1282 * and link destroy notification. There's only one TRILL module, so only one
1283 * registration is needed.
1285 * TRILL should call this function with NULL pointers before unloading. It
1286 * must not do so before dropping all references to bridges and links. We
1287 * assert that this is true on debug builds.
1289 void
1290 bridge_trill_register_cb(trill_recv_pkt_t recv_fn, trill_encap_pkt_t encap_fn,
1291 trill_br_dstr_t brdstr_fn, trill_ln_dstr_t lndstr_fn)
1293 #ifdef DEBUG
1294 if (recv_fn == NULL && trill_recv_fn != NULL) {
1295 bridge_inst_t *bip;
1296 bridge_link_t *blp;
1298 mutex_enter(&inst_lock);
1299 for (bip = list_head(&inst_list); bip != NULL;
1300 bip = list_next(&inst_list, bip)) {
1301 ASSERT(bip->bi_trilldata == NULL);
1302 rw_enter(&bip->bi_rwlock, RW_READER);
1303 for (blp = list_head(&bip->bi_links); blp != NULL;
1304 blp = list_next(&bip->bi_links, blp)) {
1305 ASSERT(blp->bl_trilldata == NULL);
1307 rw_exit(&bip->bi_rwlock);
1309 mutex_exit(&inst_lock);
1311 #endif
1312 trill_recv_fn = recv_fn;
1313 trill_encap_fn = encap_fn;
1314 trill_brdstr_fn = brdstr_fn;
1315 trill_lndstr_fn = lndstr_fn;
1319 * This registers the TRILL instance pointer with a bridge. Before this
1320 * pointer is set, the forwarding, TRILL receive, and bridge destructor
1321 * functions won't be called.
1323 * TRILL holds a reference on a bridge with this call. It must free the
1324 * reference by calling the unregister function below.
1326 bridge_inst_t *
1327 bridge_trill_brref(const char *bname, void *ptr)
1329 char bridge[MAXLINKNAMELEN];
1330 bridge_inst_t *bip;
1332 (void) snprintf(bridge, MAXLINKNAMELEN, "%s0", bname);
1333 bip = bridge_find_name(bridge);
1334 if (bip != NULL) {
1335 ASSERT(bip->bi_trilldata == NULL && ptr != NULL);
1336 bip->bi_trilldata = ptr;
1338 return (bip);
1341 void
1342 bridge_trill_brunref(bridge_inst_t *bip)
1344 ASSERT(bip->bi_trilldata != NULL);
1345 bip->bi_trilldata = NULL;
1346 bridge_unref(bip);
1350 * TRILL calls this function when referencing a particular link on a bridge.
1352 * It holds a reference on the link, so TRILL must clear out the reference when
1353 * it's done with the link (on unbinding).
1355 bridge_link_t *
1356 bridge_trill_lnref(bridge_inst_t *bip, datalink_id_t linkid, void *ptr)
1358 bridge_link_t *blp;
1360 ASSERT(ptr != NULL);
1361 rw_enter(&bip->bi_rwlock, RW_READER);
1362 for (blp = list_head(&bip->bi_links); blp != NULL;
1363 blp = list_next(&bip->bi_links, blp)) {
1364 if (!(blp->bl_flags & BLF_DELETED) &&
1365 blp->bl_linkid == linkid && blp->bl_trilldata == NULL) {
1366 blp->bl_trilldata = ptr;
1367 blp->bl_flags &= ~BLF_TRILLACTIVE;
1368 (void) memset(blp->bl_afs, 0, sizeof (blp->bl_afs));
1369 atomic_inc_uint(&blp->bl_refs);
1370 break;
1373 rw_exit(&bip->bi_rwlock);
1374 return (blp);
1377 void
1378 bridge_trill_lnunref(bridge_link_t *blp)
1380 mutex_enter(&blp->bl_trilllock);
1381 ASSERT(blp->bl_trilldata != NULL);
1382 blp->bl_trilldata = NULL;
1383 blp->bl_flags &= ~BLF_TRILLACTIVE;
1384 while (blp->bl_trillthreads > 0)
1385 cv_wait(&blp->bl_trillwait, &blp->bl_trilllock);
1386 mutex_exit(&blp->bl_trilllock);
1387 (void) memset(blp->bl_afs, 0xff, sizeof (blp->bl_afs));
1388 link_unref(blp);
1392 * This periodic timer performs three functions:
1393 * 1. It scans the list of learned forwarding entries, and removes ones that
1394 * haven't been heard from in a while. The time limit is backed down if
1395 * we're above the configured table limit.
1396 * 2. It walks the links and decays away the bl_learns counter.
1397 * 3. It scans the observability node entries looking for ones that can be
1398 * freed up.
1400 /* ARGSUSED */
1401 static void
1402 bridge_timer(void *arg)
1404 bridge_inst_t *bip;
1405 bridge_fwd_t *bfp, *bfnext;
1406 bridge_mac_t *bmp, *bmnext;
1407 bridge_link_t *blp;
1408 int err;
1409 datalink_id_t tmpid;
1410 avl_tree_t fwd_scavenge;
1411 clock_t age_limit;
1412 uint32_t ldecay;
1414 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t),
1415 offsetof(bridge_fwd_t, bf_node));
1416 mutex_enter(&inst_lock);
1417 for (bip = list_head(&inst_list); bip != NULL;
1418 bip = list_next(&inst_list, bip)) {
1419 if (bip->bi_flags & BIF_SHUTDOWN)
1420 continue;
1421 rw_enter(&bip->bi_rwlock, RW_WRITER);
1422 /* compute scaled maximum age based on table limit */
1423 if (avl_numnodes(&bip->bi_fwd) > bip->bi_tablemax)
1424 bip->bi_tshift++;
1425 else
1426 bip->bi_tshift = 0;
1427 if ((age_limit = bridge_fwd_age >> bip->bi_tshift) == 0) {
1428 if (bip->bi_tshift != 0)
1429 bip->bi_tshift--;
1430 age_limit = 1;
1432 bfnext = avl_first(&bip->bi_fwd);
1433 while ((bfp = bfnext) != NULL) {
1434 bfnext = AVL_NEXT(&bip->bi_fwd, bfp);
1435 if (!(bfp->bf_flags & BFF_LOCALADDR) &&
1436 (ddi_get_lbolt() - bfp->bf_lastheard) > age_limit) {
1437 ASSERT(bfp->bf_flags & BFF_INTREE);
1438 avl_remove(&bip->bi_fwd, bfp);
1439 bfp->bf_flags &= ~BFF_INTREE;
1440 avl_add(&fwd_scavenge, bfp);
1443 for (blp = list_head(&bip->bi_links); blp != NULL;
1444 blp = list_next(&bip->bi_links, blp)) {
1445 ldecay = mac_get_ldecay(blp->bl_mh);
1446 if (ldecay >= blp->bl_learns)
1447 blp->bl_learns = 0;
1448 else
1449 atomic_add_int(&blp->bl_learns, -(int)ldecay);
1451 rw_exit(&bip->bi_rwlock);
1452 bfnext = avl_first(&fwd_scavenge);
1453 while ((bfp = bfnext) != NULL) {
1454 bfnext = AVL_NEXT(&fwd_scavenge, bfp);
1455 avl_remove(&fwd_scavenge, bfp);
1456 KIINCR(bki_expire);
1457 fwd_unref(bfp); /* drop tree reference */
1460 mutex_exit(&inst_lock);
1461 avl_destroy(&fwd_scavenge);
1464 * Scan the bridge_mac_t entries and try to free up the ones that are
1465 * no longer active. This must be done by polling, as neither DLS nor
1466 * MAC provides a driver any sort of positive control over clients.
1468 rw_enter(&bmac_rwlock, RW_WRITER);
1469 bmnext = list_head(&bmac_list);
1470 while ((bmp = bmnext) != NULL) {
1471 bmnext = list_next(&bmac_list, bmp);
1473 /* ignore active bridges */
1474 if (bmp->bm_inst != NULL)
1475 continue;
1477 if (bmp->bm_flags & BMF_DLS) {
1478 err = dls_devnet_destroy(bmp->bm_mh, &tmpid, B_FALSE);
1479 ASSERT(err == 0 || err == EBUSY);
1480 if (err == 0)
1481 bmp->bm_flags &= ~BMF_DLS;
1484 if (!(bmp->bm_flags & BMF_DLS)) {
1485 err = mac_unregister(bmp->bm_mh);
1486 ASSERT(err == 0 || err == EBUSY);
1487 if (err == 0) {
1488 list_remove(&bmac_list, bmp);
1489 kmem_free(bmp, sizeof (*bmp));
1493 if (list_is_empty(&bmac_list)) {
1494 bridge_timerid = 0;
1495 } else {
1496 bridge_timerid = timeout(bridge_timer, NULL,
1497 bridge_scan_interval);
1499 rw_exit(&bmac_rwlock);
1502 static int
1503 bridge_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp)
1505 bridge_stream_t *bsp;
1507 if (rq->q_ptr != NULL)
1508 return (0);
1510 if (sflag & MODOPEN)
1511 return (EINVAL);
1514 * Check the minor node number being opened. This tells us which
1515 * bridge instance the user wants.
1517 if (getminor(*devp) != 0) {
1519 * This is a regular DLPI stream for snoop or the like.
1520 * Redirect it through DLD.
1522 rq->q_qinfo = &bridge_dld_rinit;
1523 OTHERQ(rq)->q_qinfo = &bridge_dld_winit;
1524 return (dld_open(rq, devp, oflag, sflag, credp));
1525 } else {
1527 * Allocate the bridge control stream structure.
1529 if ((bsp = stream_alloc()) == NULL)
1530 return (ENOSR);
1531 rq->q_ptr = WR(rq)->q_ptr = (caddr_t)bsp;
1532 bsp->bs_wq = WR(rq);
1533 *devp = makedevice(getmajor(*devp), bsp->bs_minor);
1534 qprocson(rq);
1535 return (0);
1540 * This is used only for bridge control streams. DLPI goes through dld
1541 * instead.
1543 /* ARGSUSED */
1544 static int
1545 bridge_close(queue_t *rq, int flags __unused, cred_t *credp __unused)
1547 bridge_stream_t *bsp = rq->q_ptr;
1548 bridge_inst_t *bip;
1551 * Wait for any stray taskq (add/delete link) entries related to this
1552 * stream to leave the system.
1554 mutex_enter(&stream_ref_lock);
1555 while (bsp->bs_taskq_cnt != 0)
1556 cv_wait(&stream_ref_cv, &stream_ref_lock);
1557 mutex_exit(&stream_ref_lock);
1559 qprocsoff(rq);
1560 if ((bip = bsp->bs_inst) != NULL)
1561 shutdown_inst(bip);
1562 rq->q_ptr = WR(rq)->q_ptr = NULL;
1563 stream_free(bsp);
1564 if (bip != NULL)
1565 bridge_unref(bip);
1567 return (0);
1570 static void
1571 bridge_learn(bridge_link_t *blp, const uint8_t *saddr, uint16_t ingress_nick,
1572 uint16_t vlanid)
1574 bridge_inst_t *bip = blp->bl_inst;
1575 bridge_fwd_t *bfp, *bfpnew;
1576 int i;
1577 boolean_t replaced = B_FALSE;
1579 /* Ignore multi-destination address used as source; it's nonsense. */
1580 if (*saddr & 1)
1581 return;
1584 * If the source is known, then check whether it belongs on this link.
1585 * If not, and this isn't a fixed local address, then we've detected a
1586 * move. If it's not known, learn it.
1588 if ((bfp = fwd_find(bip, saddr, vlanid)) != NULL) {
1590 * If the packet has a fixed local source address, then there's
1591 * nothing we can learn. We must quit. If this was a received
1592 * packet, then the sender has stolen our address, but there's
1593 * nothing we can do. If it's a transmitted packet, then
1594 * that's the normal case.
1596 if (bfp->bf_flags & BFF_LOCALADDR) {
1597 fwd_unref(bfp);
1598 return;
1602 * Check if the link (and TRILL sender, if any) being used is
1603 * among the ones registered for this address. If so, then
1604 * this is information that we already know.
1606 if (bfp->bf_trill_nick == ingress_nick) {
1607 for (i = 0; i < bfp->bf_nlinks; i++) {
1608 if (bfp->bf_links[i] == blp) {
1609 bfp->bf_lastheard = ddi_get_lbolt();
1610 fwd_unref(bfp);
1611 return;
1618 * Note that we intentionally "unlearn" things that appear to be under
1619 * attack on this link. The forwarding cache is a negative thing for
1620 * security -- it disables reachability as a performance optimization
1621 * -- so leaving out entries optimizes for success and defends against
1622 * the attack. Thus, the bare increment without a check in the delete
1623 * code above is right. (And it's ok if we skid over the limit a
1624 * little, so there's no syncronization needed on the test.)
1626 if (blp->bl_learns >= mac_get_llimit(blp->bl_mh)) {
1627 if (bfp != NULL) {
1628 if (bfp->bf_vcnt == 0)
1629 fwd_delete(bfp);
1630 fwd_unref(bfp);
1632 return;
1635 atomic_inc_uint(&blp->bl_learns);
1637 if ((bfpnew = fwd_alloc(saddr, 1, ingress_nick)) == NULL) {
1638 if (bfp != NULL)
1639 fwd_unref(bfp);
1640 return;
1642 KIINCR(bki_count);
1644 if (bfp != NULL) {
1646 * If this is a new destination for the same VLAN, then delete
1647 * so that we can update. If it's a different VLAN, then we're
1648 * not going to delete the original. Split off instead into an
1649 * IVL entry.
1651 if (bfp->bf_vlanid == vlanid) {
1652 /* save the count of IVL duplicates */
1653 bfpnew->bf_vcnt = bfp->bf_vcnt;
1655 /* entry deletes count as learning events */
1656 atomic_inc_uint(&blp->bl_learns);
1658 /* destroy and create anew; node moved */
1659 fwd_delete(bfp);
1660 replaced = B_TRUE;
1661 KIINCR(bki_moved);
1662 } else {
1663 bfp->bf_vcnt++;
1664 bfpnew->bf_flags |= BFF_VLANLOCAL;
1666 fwd_unref(bfp);
1668 bfpnew->bf_links[0] = blp;
1669 bfpnew->bf_nlinks = 1;
1670 atomic_inc_uint(&blp->bl_refs); /* bf_links entry */
1671 if (!fwd_insert(bip, bfpnew))
1672 fwd_free(bfpnew);
1673 else if (!replaced)
1674 KIINCR(bki_source);
1678 * Process the VLAN headers for output on a given link. There are several
1679 * cases (noting that we don't map VLANs):
1680 * 1. The input packet is good as it is; either
1681 * a. It has no tag, and output has same PVID
1682 * b. It has a non-zero priority-only tag for PVID, and b_band is same
1683 * c. It has a tag with VLAN different from PVID, and b_band is same
1684 * 2. The tag must change: non-zero b_band is different from tag priority
1685 * 3. The packet has a tag and should not (VLAN same as PVID, b_band zero)
1686 * 4. The packet has no tag and needs one:
1687 * a. VLAN ID same as PVID, but b_band is non-zero
1688 * b. VLAN ID different from PVID
1689 * We exclude case 1 first, then modify the packet. Note that output packets
1690 * get a priority set by the mblk, not by the header, because QoS in bridging
1691 * requires priority recalculation at each node.
1693 * The passed-in tci is the "impossible" value 0xFFFF when no tag is present.
1695 static mblk_t *
1696 reform_vlan_header(mblk_t *mp, uint16_t vlanid, uint16_t tci, uint16_t pvid)
1698 boolean_t source_has_tag = (tci != 0xFFFF);
1699 mblk_t *mpcopy;
1700 size_t mlen, minlen;
1701 struct ether_vlan_header *evh;
1702 int pri;
1704 /* This helps centralize error handling in the caller. */
1705 if (mp == NULL)
1706 return (mp);
1708 /* No forwarded packet can have hardware checksum enabled */
1709 DB_CKSUMFLAGS(mp) = 0;
1711 /* Get the no-modification cases out of the way first */
1712 if (!source_has_tag && vlanid == pvid) /* 1a */
1713 return (mp);
1715 pri = VLAN_PRI(tci);
1716 if (source_has_tag && mp->b_band == pri) {
1717 if (vlanid != pvid) /* 1c */
1718 return (mp);
1719 if (pri != 0 && VLAN_ID(tci) == 0) /* 1b */
1720 return (mp);
1724 * We now know that we must modify the packet. Prepare for that. Note
1725 * that if a tag is present, the caller has already done a pullup for
1726 * the VLAN header, so we're good to go.
1728 if (MBLKL(mp) < sizeof (struct ether_header)) {
1729 mpcopy = msgpullup(mp, sizeof (struct ether_header));
1730 if (mpcopy == NULL) {
1731 freemsg(mp);
1732 return (NULL);
1734 mp = mpcopy;
1736 if (DB_REF(mp) > 1 || !IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)) ||
1737 (!source_has_tag && MBLKTAIL(mp) < VLAN_INCR)) {
1738 minlen = mlen = MBLKL(mp);
1739 if (!source_has_tag)
1740 minlen += VLAN_INCR;
1741 ASSERT(minlen >= sizeof (struct ether_vlan_header));
1743 * We're willing to copy some data to avoid fragmentation, but
1744 * not a lot.
1746 if (minlen > 256)
1747 minlen = sizeof (struct ether_vlan_header);
1748 mpcopy = allocb(minlen, BPRI_MED);
1749 if (mpcopy == NULL) {
1750 freemsg(mp);
1751 return (NULL);
1753 if (mlen <= minlen) {
1754 /* We toss the first mblk when we can. */
1755 bcopy(mp->b_rptr, mpcopy->b_rptr, mlen);
1756 mpcopy->b_wptr += mlen;
1757 mpcopy->b_cont = mp->b_cont;
1758 freeb(mp);
1759 } else {
1760 /* If not, then just copy what we need */
1761 if (!source_has_tag)
1762 minlen = sizeof (struct ether_header);
1763 bcopy(mp->b_rptr, mpcopy->b_rptr, minlen);
1764 mpcopy->b_wptr += minlen;
1765 mpcopy->b_cont = mp;
1766 mp->b_rptr += minlen;
1768 mp = mpcopy;
1771 /* LINTED: pointer alignment */
1772 evh = (struct ether_vlan_header *)mp->b_rptr;
1773 if (source_has_tag) {
1774 if (mp->b_band == 0 && vlanid == pvid) { /* 3 */
1775 evh->ether_tpid = evh->ether_type;
1776 mlen = MBLKL(mp);
1777 if (mlen > sizeof (struct ether_vlan_header))
1778 ovbcopy(mp->b_rptr +
1779 sizeof (struct ether_vlan_header),
1780 mp->b_rptr + sizeof (struct ether_header),
1781 mlen - sizeof (struct ether_vlan_header));
1782 mp->b_wptr -= VLAN_INCR;
1783 } else { /* 2 */
1784 if (vlanid == pvid)
1785 vlanid = VLAN_ID_NONE;
1786 tci = VLAN_TCI(mp->b_band, ETHER_CFI, vlanid);
1787 evh->ether_tci = htons(tci);
1789 } else {
1790 /* case 4: no header present, but one is needed */
1791 mlen = MBLKL(mp);
1792 if (mlen > sizeof (struct ether_header))
1793 ovbcopy(mp->b_rptr + sizeof (struct ether_header),
1794 mp->b_rptr + sizeof (struct ether_vlan_header),
1795 mlen - sizeof (struct ether_header));
1796 mp->b_wptr += VLAN_INCR;
1797 ASSERT(mp->b_wptr <= DB_LIM(mp));
1798 if (vlanid == pvid)
1799 vlanid = VLAN_ID_NONE;
1800 tci = VLAN_TCI(mp->b_band, ETHER_CFI, vlanid);
1801 evh->ether_type = evh->ether_tpid;
1802 evh->ether_tpid = htons(ETHERTYPE_VLAN);
1803 evh->ether_tci = htons(tci);
1805 return (mp);
1808 /* Record VLAN information and strip header if requested . */
1809 static void
1810 update_header(mblk_t *mp, mac_header_info_t *hdr_info, boolean_t striphdr)
1812 if (hdr_info->mhi_bindsap == ETHERTYPE_VLAN) {
1813 struct ether_vlan_header *evhp;
1814 uint16_t ether_type;
1816 /* LINTED: alignment */
1817 evhp = (struct ether_vlan_header *)mp->b_rptr;
1818 hdr_info->mhi_istagged = B_TRUE;
1819 hdr_info->mhi_tci = ntohs(evhp->ether_tci);
1820 if (striphdr) {
1822 * For VLAN tagged frames update the ether_type
1823 * in hdr_info before stripping the header.
1825 ether_type = ntohs(evhp->ether_type);
1826 hdr_info->mhi_origsap = ether_type;
1827 hdr_info->mhi_bindsap = (ether_type > ETHERMTU) ?
1828 ether_type : DLS_SAP_LLC;
1829 mp->b_rptr = (uchar_t *)(evhp + 1);
1831 } else {
1832 hdr_info->mhi_istagged = B_FALSE;
1833 hdr_info->mhi_tci = VLAN_ID_NONE;
1834 if (striphdr)
1835 mp->b_rptr += sizeof (struct ether_header);
1840 * Return B_TRUE if we're allowed to send on this link with the given VLAN ID.
1842 static boolean_t
1843 bridge_can_send(bridge_link_t *blp, uint16_t vlanid)
1845 ASSERT(vlanid != VLAN_ID_NONE);
1846 if (blp->bl_flags & BLF_DELETED)
1847 return (B_FALSE);
1848 if (blp->bl_trilldata == NULL && blp->bl_state != BLS_FORWARDING)
1849 return (B_FALSE);
1850 return (BRIDGE_VLAN_ISSET(blp, vlanid) && BRIDGE_AF_ISSET(blp, vlanid));
1854 * This function scans the bridge forwarding tables in order to forward a given
1855 * packet. If the packet either doesn't need forwarding (the current link is
1856 * correct) or the current link needs a copy as well, then the packet is
1857 * returned to the caller.
1859 * If a packet has been decapsulated from TRILL, then it must *NOT* reenter a
1860 * TRILL tunnel. If the destination points there, then drop instead.
1862 static mblk_t *
1863 bridge_forward(bridge_link_t *blp, mac_header_info_t *hdr_info, mblk_t *mp,
1864 uint16_t vlanid, uint16_t tci, boolean_t from_trill, boolean_t is_xmit)
1866 mblk_t *mpsend, *mpcopy;
1867 bridge_inst_t *bip = blp->bl_inst;
1868 bridge_link_t *blpsend, *blpnext;
1869 bridge_fwd_t *bfp;
1870 uint_t i;
1871 boolean_t selfseen = B_FALSE;
1872 void *tdp;
1873 const uint8_t *daddr = hdr_info->mhi_daddr;
1876 * Check for the IEEE "reserved" multicast addresses. Messages sent to
1877 * these addresses are used for link-local control (STP and pause), and
1878 * are never forwarded or redirected.
1880 if (daddr[0] == 1 && daddr[1] == 0x80 && daddr[2] == 0xc2 &&
1881 daddr[3] == 0 && daddr[4] == 0 && (daddr[5] & 0xf0) == 0) {
1882 if (from_trill) {
1883 freemsg(mp);
1884 mp = NULL;
1886 return (mp);
1889 if ((bfp = fwd_find(bip, daddr, vlanid)) != NULL) {
1892 * If trill indicates a destination for this node, then it's
1893 * clearly not intended for local delivery. We must tell TRILL
1894 * to encapsulate, as long as we didn't just decapsulate it.
1896 if (bfp->bf_trill_nick != RBRIDGE_NICKNAME_NONE) {
1898 * Error case: can't reencapsulate if the protocols are
1899 * working correctly.
1901 if (from_trill) {
1902 freemsg(mp);
1903 return (NULL);
1905 mutex_enter(&blp->bl_trilllock);
1906 if ((tdp = blp->bl_trilldata) != NULL) {
1907 blp->bl_trillthreads++;
1908 mutex_exit(&blp->bl_trilllock);
1909 update_header(mp, hdr_info, B_FALSE);
1910 if (is_xmit)
1911 mp = mac_fix_cksum(mp);
1912 /* all trill data frames have Inner.VLAN */
1913 mp = reform_vlan_header(mp, vlanid, tci, 0);
1914 if (mp == NULL) {
1915 KIINCR(bki_drops);
1916 fwd_unref(bfp);
1917 return (NULL);
1919 trill_encap_fn(tdp, blp, hdr_info, mp,
1920 bfp->bf_trill_nick);
1921 mutex_enter(&blp->bl_trilllock);
1922 if (--blp->bl_trillthreads == 0 &&
1923 blp->bl_trilldata == NULL)
1924 cv_broadcast(&blp->bl_trillwait);
1926 mutex_exit(&blp->bl_trilllock);
1928 /* if TRILL has been disabled, then kill this stray */
1929 if (tdp == NULL) {
1930 freemsg(mp);
1931 fwd_delete(bfp);
1933 fwd_unref(bfp);
1934 return (NULL);
1937 /* find first link we can send on */
1938 for (i = 0; i < bfp->bf_nlinks; i++) {
1939 blpsend = bfp->bf_links[i];
1940 if (blpsend == blp)
1941 selfseen = B_TRUE;
1942 else if (bridge_can_send(blpsend, vlanid))
1943 break;
1946 while (i < bfp->bf_nlinks) {
1947 blpsend = bfp->bf_links[i];
1948 for (i++; i < bfp->bf_nlinks; i++) {
1949 blpnext = bfp->bf_links[i];
1950 if (blpnext == blp)
1951 selfseen = B_TRUE;
1952 else if (bridge_can_send(blpnext, vlanid))
1953 break;
1955 if (i == bfp->bf_nlinks && !selfseen) {
1956 mpsend = mp;
1957 mp = NULL;
1958 } else {
1959 mpsend = copymsg(mp);
1962 if (!from_trill && is_xmit)
1963 mpsend = mac_fix_cksum(mpsend);
1965 mpsend = reform_vlan_header(mpsend, vlanid, tci,
1966 blpsend->bl_pvid);
1967 if (mpsend == NULL) {
1968 KIINCR(bki_drops);
1969 continue;
1972 KIINCR(bki_forwards);
1974 * No need to bump up the link reference count, as
1975 * the forwarding entry itself holds a reference to
1976 * the link.
1978 if (bfp->bf_flags & BFF_LOCALADDR) {
1979 mac_rx_common(blpsend->bl_mh, NULL, mpsend);
1980 } else {
1981 KLPINCR(blpsend, bkl_xmit);
1982 MAC_RING_TX(blpsend->bl_mh, NULL, mpsend,
1983 mpsend);
1984 freemsg(mpsend);
1988 * Handle a special case: if we're transmitting to the original
1989 * link, then check whether the localaddr flag is set. If it
1990 * is, then receive instead. This doesn't happen with ordinary
1991 * bridging, but does happen often with TRILL decapsulation.
1993 if (mp != NULL && is_xmit && (bfp->bf_flags & BFF_LOCALADDR)) {
1994 mac_rx_common(blp->bl_mh, NULL, mp);
1995 mp = NULL;
1997 fwd_unref(bfp);
1998 } else {
2000 * TRILL has two cases to handle. If the packet is off the
2001 * wire (not from TRILL), then we need to send up into the
2002 * TRILL module to have the distribution tree computed. If the
2003 * packet is from TRILL (decapsulated), then we're part of the
2004 * distribution tree, and we need to copy the packet on member
2005 * interfaces.
2007 * Thus, the from TRILL case is identical to the STP case.
2009 if (!from_trill && blp->bl_trilldata != NULL) {
2010 mutex_enter(&blp->bl_trilllock);
2011 if ((tdp = blp->bl_trilldata) != NULL) {
2012 blp->bl_trillthreads++;
2013 mutex_exit(&blp->bl_trilllock);
2014 if ((mpsend = copymsg(mp)) != NULL) {
2015 update_header(mpsend,
2016 hdr_info, B_FALSE);
2018 * all trill data frames have
2019 * Inner.VLAN
2021 mpsend = reform_vlan_header(mpsend,
2022 vlanid, tci, 0);
2023 if (mpsend == NULL) {
2024 KIINCR(bki_drops);
2025 } else {
2026 trill_encap_fn(tdp, blp,
2027 hdr_info, mpsend,
2028 RBRIDGE_NICKNAME_NONE);
2031 mutex_enter(&blp->bl_trilllock);
2032 if (--blp->bl_trillthreads == 0 &&
2033 blp->bl_trilldata == NULL)
2034 cv_broadcast(&blp->bl_trillwait);
2036 mutex_exit(&blp->bl_trilllock);
2040 * This is an unknown destination, so flood.
2042 rw_enter(&bip->bi_rwlock, RW_READER);
2043 for (blpnext = list_head(&bip->bi_links); blpnext != NULL;
2044 blpnext = list_next(&bip->bi_links, blpnext)) {
2045 if (blpnext == blp)
2046 selfseen = B_TRUE;
2047 else if (bridge_can_send(blpnext, vlanid))
2048 break;
2050 if (blpnext != NULL)
2051 atomic_inc_uint(&blpnext->bl_refs);
2052 rw_exit(&bip->bi_rwlock);
2053 while ((blpsend = blpnext) != NULL) {
2054 rw_enter(&bip->bi_rwlock, RW_READER);
2055 for (blpnext = list_next(&bip->bi_links, blpsend);
2056 blpnext != NULL;
2057 blpnext = list_next(&bip->bi_links, blpnext)) {
2058 if (blpnext == blp)
2059 selfseen = B_TRUE;
2060 else if (bridge_can_send(blpnext, vlanid))
2061 break;
2063 if (blpnext != NULL)
2064 atomic_inc_uint(&blpnext->bl_refs);
2065 rw_exit(&bip->bi_rwlock);
2066 if (blpnext == NULL && !selfseen) {
2067 mpsend = mp;
2068 mp = NULL;
2069 } else {
2070 mpsend = copymsg(mp);
2073 if (!from_trill && is_xmit)
2074 mpsend = mac_fix_cksum(mpsend);
2076 mpsend = reform_vlan_header(mpsend, vlanid, tci,
2077 blpsend->bl_pvid);
2078 if (mpsend == NULL) {
2079 KIINCR(bki_drops);
2080 continue;
2083 if (hdr_info->mhi_dsttype == MAC_ADDRTYPE_UNICAST)
2084 KIINCR(bki_unknown);
2085 else
2086 KIINCR(bki_mbcast);
2087 KLPINCR(blpsend, bkl_xmit);
2088 if ((mpcopy = copymsg(mpsend)) != NULL)
2089 mac_rx_common(blpsend->bl_mh, NULL, mpcopy);
2090 MAC_RING_TX(blpsend->bl_mh, NULL, mpsend, mpsend);
2091 freemsg(mpsend);
2092 link_unref(blpsend);
2097 * At this point, if np is non-NULL, it means that the caller needs to
2098 * continue on the selected link.
2100 return (mp);
2104 * Extract and validate the VLAN information for a given packet. This checks
2105 * conformance with the rules for use of the PVID on the link, and for the
2106 * allowed (configured) VLAN set.
2108 * Returns B_TRUE if the packet passes, B_FALSE if it fails.
2110 static boolean_t
2111 bridge_get_vlan(bridge_link_t *blp, mac_header_info_t *hdr_info, mblk_t *mp,
2112 uint16_t *vlanidp, uint16_t *tcip)
2114 uint16_t tci, vlanid;
2116 if (hdr_info->mhi_bindsap == ETHERTYPE_VLAN) {
2117 ptrdiff_t tpos = offsetof(struct ether_vlan_header, ether_tci);
2118 ptrdiff_t mlen;
2121 * Extract the VLAN ID information, regardless of alignment,
2122 * and without a pullup. This isn't attractive, but we do this
2123 * to avoid having to deal with the pointers stashed in
2124 * hdr_info moving around or having the caller deal with a new
2125 * mblk_t pointer.
2127 while (mp != NULL) {
2128 mlen = MBLKL(mp);
2129 if (mlen > tpos && mlen > 0)
2130 break;
2131 tpos -= mlen;
2132 mp = mp->b_cont;
2134 if (mp == NULL)
2135 return (B_FALSE);
2136 tci = mp->b_rptr[tpos] << 8;
2137 if (++tpos >= mlen) {
2138 do {
2139 mp = mp->b_cont;
2140 } while (mp != NULL && MBLKL(mp) == 0);
2141 if (mp == NULL)
2142 return (B_FALSE);
2143 tpos = 0;
2145 tci |= mp->b_rptr[tpos];
2147 vlanid = VLAN_ID(tci);
2148 if (VLAN_CFI(tci) != ETHER_CFI || vlanid > VLAN_ID_MAX)
2149 return (B_FALSE);
2150 if (vlanid == VLAN_ID_NONE || vlanid == blp->bl_pvid)
2151 goto input_no_vlan;
2152 if (!BRIDGE_VLAN_ISSET(blp, vlanid))
2153 return (B_FALSE);
2154 } else {
2155 tci = 0xFFFF;
2156 input_no_vlan:
2158 * If PVID is set to zero, then untagged traffic is not
2159 * supported here. Do not learn or forward.
2161 if ((vlanid = blp->bl_pvid) == VLAN_ID_NONE)
2162 return (B_FALSE);
2165 *tcip = tci;
2166 *vlanidp = vlanid;
2167 return (B_TRUE);
2171 * Handle MAC notifications.
2173 static void
2174 bridge_notify_cb(void *arg, mac_notify_type_t note_type)
2176 bridge_link_t *blp = arg;
2178 switch (note_type) {
2179 case MAC_NOTE_UNICST:
2180 bridge_new_unicst(blp);
2181 break;
2183 case MAC_NOTE_SDU_SIZE: {
2184 uint_t maxsdu;
2185 bridge_inst_t *bip = blp->bl_inst;
2186 bridge_mac_t *bmp = bip->bi_mac;
2187 boolean_t notify = B_FALSE;
2188 mblk_t *mlist = NULL;
2190 mac_sdu_get(blp->bl_mh, NULL, &maxsdu);
2191 rw_enter(&bip->bi_rwlock, RW_READER);
2192 if (list_prev(&bip->bi_links, blp) == NULL &&
2193 list_next(&bip->bi_links, blp) == NULL) {
2194 notify = (maxsdu != bmp->bm_maxsdu);
2195 bmp->bm_maxsdu = maxsdu;
2197 blp->bl_maxsdu = maxsdu;
2198 if (maxsdu != bmp->bm_maxsdu)
2199 link_sdu_fail(blp, B_TRUE, &mlist);
2200 else if (notify)
2201 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu);
2202 rw_exit(&bip->bi_rwlock);
2203 send_up_messages(bip, mlist);
2204 break;
2210 * This is called by the MAC layer. As with the transmit side, we're right in
2211 * the data path for all I/O on this port, so if we don't need to forward this
2212 * packet anywhere, we have to send it upwards via mac_rx_common.
2214 static void
2215 bridge_recv_cb(mac_handle_t mh, mac_resource_handle_t rsrc, mblk_t *mpnext)
2217 mblk_t *mp, *mpcopy;
2218 bridge_link_t *blp = (bridge_link_t *)mh;
2219 bridge_inst_t *bip = blp->bl_inst;
2220 bridge_mac_t *bmp = bip->bi_mac;
2221 mac_header_info_t hdr_info;
2222 uint16_t vlanid, tci;
2223 boolean_t trillmode = B_FALSE;
2225 KIINCR(bki_recv);
2226 KLINCR(bkl_recv);
2229 * Regardless of state, check for inbound TRILL packets when TRILL is
2230 * active. These are pulled out of band and sent for TRILL handling.
2232 if (blp->bl_trilldata != NULL) {
2233 void *tdp;
2234 mblk_t *newhead;
2235 mblk_t *tail = NULL;
2237 mutex_enter(&blp->bl_trilllock);
2238 if ((tdp = blp->bl_trilldata) != NULL) {
2239 blp->bl_trillthreads++;
2240 mutex_exit(&blp->bl_trilllock);
2241 trillmode = B_TRUE;
2242 newhead = mpnext;
2243 while ((mp = mpnext) != NULL) {
2244 boolean_t raw_isis, bridge_group;
2246 mpnext = mp->b_next;
2249 * If the header isn't readable, then leave on
2250 * the list and continue.
2252 if (mac_header_info(blp->bl_mh, mp,
2253 &hdr_info) != 0) {
2254 tail = mp;
2255 continue;
2259 * The TRILL document specifies that, on
2260 * Ethernet alone, IS-IS packets arrive with
2261 * LLC rather than Ethertype, and using a
2262 * specific destination address. We must check
2263 * for that here. Also, we need to give BPDUs
2264 * to TRILL for processing.
2266 raw_isis = bridge_group = B_FALSE;
2267 if (hdr_info.mhi_dsttype ==
2268 MAC_ADDRTYPE_MULTICAST) {
2269 if (memcmp(hdr_info.mhi_daddr,
2270 all_isis_rbridges, ETHERADDRL) == 0)
2271 raw_isis = B_TRUE;
2272 else if (memcmp(hdr_info.mhi_daddr,
2273 bridge_group_address, ETHERADDRL) ==
2275 bridge_group = B_TRUE;
2277 if (!raw_isis && !bridge_group &&
2278 hdr_info.mhi_bindsap != ETHERTYPE_TRILL &&
2279 (hdr_info.mhi_bindsap != ETHERTYPE_VLAN ||
2280 /* LINTED: alignment */
2281 ((struct ether_vlan_header *)mp->b_rptr)->
2282 ether_type != htons(ETHERTYPE_TRILL))) {
2283 tail = mp;
2284 continue;
2288 * We've got TRILL input. Remove from the list
2289 * and send up through the TRILL module. (Send
2290 * a copy through promiscuous receive just to
2291 * support snooping on TRILL. Order isn't
2292 * preserved strictly, but that doesn't matter
2293 * here.)
2295 if (tail != NULL)
2296 tail->b_next = mpnext;
2297 mp->b_next = NULL;
2298 if (mp == newhead)
2299 newhead = mpnext;
2300 mac_trill_snoop(blp->bl_mh, mp);
2301 update_header(mp, &hdr_info, B_TRUE);
2303 * On raw IS-IS and BPDU frames, we have to
2304 * make sure that the length is trimmed
2305 * properly. We use origsap in order to cope
2306 * with jumbograms for IS-IS. (Regular mac
2307 * can't.)
2309 if (raw_isis || bridge_group) {
2310 size_t msglen = msgdsize(mp);
2312 if (msglen > hdr_info.mhi_origsap) {
2313 (void) adjmsg(mp,
2314 hdr_info.mhi_origsap -
2315 msglen);
2316 } else if (msglen <
2317 hdr_info.mhi_origsap) {
2318 freemsg(mp);
2319 continue;
2322 trill_recv_fn(tdp, blp, rsrc, mp, &hdr_info);
2324 mpnext = newhead;
2325 mutex_enter(&blp->bl_trilllock);
2326 if (--blp->bl_trillthreads == 0 &&
2327 blp->bl_trilldata == NULL)
2328 cv_broadcast(&blp->bl_trillwait);
2330 mutex_exit(&blp->bl_trilllock);
2331 if (mpnext == NULL)
2332 return;
2336 * If this is a TRILL RBridge, then just check whether this link is
2337 * used at all for forwarding. If not, then we're done.
2339 if (trillmode) {
2340 if (!(blp->bl_flags & BLF_TRILLACTIVE) ||
2341 (blp->bl_flags & BLF_SDUFAIL)) {
2342 mac_rx_common(blp->bl_mh, rsrc, mpnext);
2343 return;
2345 } else {
2347 * For regular (STP) bridges, if we're in blocking or listening
2348 * state, then do nothing. We don't learn or forward until
2349 * told to do so.
2351 if (blp->bl_state == BLS_BLOCKLISTEN) {
2352 mac_rx_common(blp->bl_mh, rsrc, mpnext);
2353 return;
2358 * Send a copy of the message chain up to the observability node users.
2359 * For TRILL, we must obey the VLAN AF rules, so we go packet-by-
2360 * packet.
2362 if (!trillmode && blp->bl_state == BLS_FORWARDING &&
2363 (bmp->bm_flags & BMF_STARTED) &&
2364 (mp = copymsgchain(mpnext)) != NULL) {
2365 mac_rx(bmp->bm_mh, NULL, mp);
2369 * We must be in learning or forwarding state, or using TRILL on a link
2370 * with one or more VLANs active. For each packet in the list, process
2371 * the source address, and then attempt to forward.
2373 while ((mp = mpnext) != NULL) {
2374 mpnext = mp->b_next;
2375 mp->b_next = NULL;
2378 * If we can't decode the header or if the header specifies a
2379 * multicast source address (impossible!), then don't bother
2380 * learning or forwarding, but go ahead and forward up the
2381 * stack for subsequent processing.
2383 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0 ||
2384 (hdr_info.mhi_saddr[0] & 1) != 0) {
2385 KIINCR(bki_drops);
2386 KLINCR(bkl_drops);
2387 mac_rx_common(blp->bl_mh, rsrc, mp);
2388 continue;
2392 * Extract and validate the VLAN ID for this packet.
2394 if (!bridge_get_vlan(blp, &hdr_info, mp, &vlanid, &tci) ||
2395 !BRIDGE_AF_ISSET(blp, vlanid)) {
2396 mac_rx_common(blp->bl_mh, rsrc, mp);
2397 continue;
2400 if (trillmode) {
2402 * Special test required by TRILL document: must
2403 * discard frames with outer address set to ESADI.
2405 if (memcmp(hdr_info.mhi_daddr, all_esadi_rbridges,
2406 ETHERADDRL) == 0) {
2407 mac_rx_common(blp->bl_mh, rsrc, mp);
2408 continue;
2412 * If we're in TRILL mode, then the call above to get
2413 * the VLAN ID has also checked that we're the
2414 * appointed forwarder, so report that we're handling
2415 * this packet to any observability node users.
2417 if ((bmp->bm_flags & BMF_STARTED) &&
2418 (mpcopy = copymsg(mp)) != NULL)
2419 mac_rx(bmp->bm_mh, NULL, mpcopy);
2423 * First process the source address and learn from it. For
2424 * TRILL, we learn only if we're the appointed forwarder.
2426 bridge_learn(blp, hdr_info.mhi_saddr, RBRIDGE_NICKNAME_NONE,
2427 vlanid);
2430 * Now check whether we're forwarding and look up the
2431 * destination. If we can forward, do so.
2433 if (trillmode || blp->bl_state == BLS_FORWARDING) {
2434 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci,
2435 B_FALSE, B_FALSE);
2437 if (mp != NULL)
2438 mac_rx_common(blp->bl_mh, rsrc, mp);
2443 /* ARGSUSED */
2444 static mblk_t *
2445 bridge_xmit_cb(mac_handle_t mh, mac_ring_handle_t rh, mblk_t *mpnext)
2447 bridge_link_t *blp = (bridge_link_t *)mh;
2448 bridge_inst_t *bip = blp->bl_inst;
2449 bridge_mac_t *bmp = bip->bi_mac;
2450 mac_header_info_t hdr_info;
2451 uint16_t vlanid, tci;
2452 mblk_t *mp, *mpcopy;
2453 boolean_t trillmode;
2455 trillmode = blp->bl_trilldata != NULL;
2458 * If we're using STP and we're in blocking or listening state, or if
2459 * we're using TRILL and no VLANs are active, then behave as though the
2460 * bridge isn't here at all, and send on the local link alone.
2462 if ((!trillmode && blp->bl_state == BLS_BLOCKLISTEN) ||
2463 (trillmode &&
2464 (!(blp->bl_flags & BLF_TRILLACTIVE) ||
2465 (blp->bl_flags & BLF_SDUFAIL)))) {
2466 KIINCR(bki_sent);
2467 KLINCR(bkl_xmit);
2468 MAC_RING_TX(blp->bl_mh, rh, mpnext, mp);
2469 return (mp);
2473 * Send a copy of the message up to the observability node users.
2474 * TRILL needs to check on a packet-by-packet basis.
2476 if (!trillmode && blp->bl_state == BLS_FORWARDING &&
2477 (bmp->bm_flags & BMF_STARTED) &&
2478 (mp = copymsgchain(mpnext)) != NULL) {
2479 mac_rx(bmp->bm_mh, NULL, mp);
2482 while ((mp = mpnext) != NULL) {
2483 mpnext = mp->b_next;
2484 mp->b_next = NULL;
2486 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0) {
2487 freemsg(mp);
2488 continue;
2492 * Extract and validate the VLAN ID for this packet.
2494 if (!bridge_get_vlan(blp, &hdr_info, mp, &vlanid, &tci) ||
2495 !BRIDGE_AF_ISSET(blp, vlanid)) {
2496 freemsg(mp);
2497 continue;
2501 * If we're using TRILL, then we've now validated that we're
2502 * the forwarder for this VLAN, so go ahead and let
2503 * observability node users know about the packet.
2505 if (trillmode && (bmp->bm_flags & BMF_STARTED) &&
2506 (mpcopy = copymsg(mp)) != NULL) {
2507 mac_rx(bmp->bm_mh, NULL, mpcopy);
2511 * We have to learn from our own transmitted packets, because
2512 * there may be a Solaris DLPI raw sender (which can specify its
2513 * own source address) using promiscuous mode for receive. The
2514 * mac layer information won't (and can't) tell us everything
2515 * we need to know.
2517 bridge_learn(blp, hdr_info.mhi_saddr, RBRIDGE_NICKNAME_NONE,
2518 vlanid);
2520 /* attempt forwarding */
2521 if (trillmode || blp->bl_state == BLS_FORWARDING) {
2522 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci,
2523 B_FALSE, B_TRUE);
2525 if (mp != NULL) {
2526 MAC_RING_TX(blp->bl_mh, rh, mp, mp);
2527 if (mp == NULL) {
2528 KIINCR(bki_sent);
2529 KLINCR(bkl_xmit);
2533 * If we get stuck, then stop. Don't let the user's output
2534 * packets get out of order. (More importantly: don't try to
2535 * bridge the same packet multiple times if flow control is
2536 * asserted.)
2538 if (mp != NULL) {
2539 mp->b_next = mpnext;
2540 break;
2543 return (mp);
2547 * This is called by TRILL when it decapsulates an packet, and we must forward
2548 * locally. On failure, we just drop.
2550 * Note that the ingress_nick reported by TRILL must not represent this local
2551 * node.
2553 void
2554 bridge_trill_decaps(bridge_link_t *blp, mblk_t *mp, uint16_t ingress_nick)
2556 mac_header_info_t hdr_info;
2557 uint16_t vlanid, tci;
2558 bridge_inst_t *bip = blp->bl_inst; /* used by macros */
2559 mblk_t *mpcopy;
2561 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0) {
2562 freemsg(mp);
2563 return;
2566 /* Extract VLAN ID for this packet. */
2567 if (hdr_info.mhi_bindsap == ETHERTYPE_VLAN) {
2568 struct ether_vlan_header *evhp;
2570 /* LINTED: alignment */
2571 evhp = (struct ether_vlan_header *)mp->b_rptr;
2572 tci = ntohs(evhp->ether_tci);
2573 vlanid = VLAN_ID(tci);
2574 } else {
2575 /* Inner VLAN headers are required in TRILL data packets */
2576 DTRACE_PROBE3(bridge__trill__decaps__novlan, bridge_link_t *,
2577 blp, mblk_t *, mp, uint16_t, ingress_nick);
2578 freemsg(mp);
2579 return;
2582 /* Learn the location of this sender in the RBridge network */
2583 bridge_learn(blp, hdr_info.mhi_saddr, ingress_nick, vlanid);
2585 /* attempt forwarding */
2586 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci, B_TRUE, B_TRUE);
2587 if (mp != NULL) {
2588 if (bridge_can_send(blp, vlanid)) {
2589 /* Deliver a copy locally as well */
2590 if ((mpcopy = copymsg(mp)) != NULL)
2591 mac_rx_common(blp->bl_mh, NULL, mpcopy);
2592 MAC_RING_TX(blp->bl_mh, NULL, mp, mp);
2594 if (mp == NULL) {
2595 KIINCR(bki_sent);
2596 KLINCR(bkl_xmit);
2597 } else {
2598 freemsg(mp);
2604 * This function is used by TRILL _only_ to transmit TRILL-encapsulated
2605 * packets. It sends on a single underlying link and does not bridge.
2607 mblk_t *
2608 bridge_trill_output(bridge_link_t *blp, mblk_t *mp)
2610 bridge_inst_t *bip = blp->bl_inst; /* used by macros */
2612 mac_trill_snoop(blp->bl_mh, mp);
2613 MAC_RING_TX(blp->bl_mh, NULL, mp, mp);
2614 if (mp == NULL) {
2615 KIINCR(bki_sent);
2616 KLINCR(bkl_xmit);
2618 return (mp);
2622 * Set the "appointed forwarder" flag array for this link. TRILL controls
2623 * forwarding on a VLAN basis. The "trillactive" flag is an optimization for
2624 * the forwarder.
2626 void
2627 bridge_trill_setvlans(bridge_link_t *blp, const uint8_t *arr)
2629 int i;
2630 uint_t newflags = 0;
2632 for (i = 0; i < BRIDGE_VLAN_ARR_SIZE; i++) {
2633 if ((blp->bl_afs[i] = arr[i]) != 0)
2634 newflags = BLF_TRILLACTIVE;
2636 blp->bl_flags = (blp->bl_flags & ~BLF_TRILLACTIVE) | newflags;
2639 void
2640 bridge_trill_flush(bridge_link_t *blp, uint16_t vlan, boolean_t dotrill)
2642 bridge_inst_t *bip = blp->bl_inst;
2643 bridge_fwd_t *bfp, *bfnext;
2644 avl_tree_t fwd_scavenge;
2645 int i;
2647 _NOTE(ARGUNUSED(vlan));
2649 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t),
2650 offsetof(bridge_fwd_t, bf_node));
2651 rw_enter(&bip->bi_rwlock, RW_WRITER);
2652 bfnext = avl_first(&bip->bi_fwd);
2653 while ((bfp = bfnext) != NULL) {
2654 bfnext = AVL_NEXT(&bip->bi_fwd, bfp);
2655 if (bfp->bf_flags & BFF_LOCALADDR)
2656 continue;
2657 if (dotrill) {
2658 /* port doesn't matter if we're flushing TRILL */
2659 if (bfp->bf_trill_nick == RBRIDGE_NICKNAME_NONE)
2660 continue;
2661 } else {
2662 if (bfp->bf_trill_nick != RBRIDGE_NICKNAME_NONE)
2663 continue;
2664 for (i = 0; i < bfp->bf_nlinks; i++) {
2665 if (bfp->bf_links[i] == blp)
2666 break;
2668 if (i >= bfp->bf_nlinks)
2669 continue;
2671 ASSERT(bfp->bf_flags & BFF_INTREE);
2672 avl_remove(&bip->bi_fwd, bfp);
2673 bfp->bf_flags &= ~BFF_INTREE;
2674 avl_add(&fwd_scavenge, bfp);
2676 rw_exit(&bip->bi_rwlock);
2677 bfnext = avl_first(&fwd_scavenge);
2678 while ((bfp = bfnext) != NULL) {
2679 bfnext = AVL_NEXT(&fwd_scavenge, bfp);
2680 avl_remove(&fwd_scavenge, bfp);
2681 fwd_unref(bfp);
2683 avl_destroy(&fwd_scavenge);
2687 * Let the mac module take or drop a reference to a bridge link. When this is
2688 * called, the mac module is holding the mi_bridge_lock, so the link cannot be
2689 * in the process of entering or leaving a bridge.
2691 static void
2692 bridge_ref_cb(mac_handle_t mh, boolean_t hold)
2694 bridge_link_t *blp = (bridge_link_t *)mh;
2696 if (hold)
2697 atomic_inc_uint(&blp->bl_refs);
2698 else
2699 link_unref(blp);
2703 * Handle link state changes reported by the mac layer. This acts as a filter
2704 * for link state changes: if a link is reporting down, but there are other
2705 * links still up on the bridge, then the state is changed to "up." When the
2706 * last link goes down, all are marked down, and when the first link goes up,
2707 * all are marked up. (Recursion is avoided by the use of the "redo" function.)
2709 * We treat unknown as equivalent to "up."
2711 static link_state_t
2712 bridge_ls_cb(mac_handle_t mh, link_state_t newls)
2714 bridge_link_t *blp = (bridge_link_t *)mh;
2715 bridge_link_t *blcmp;
2716 bridge_inst_t *bip;
2717 bridge_mac_t *bmp;
2719 if (newls != LINK_STATE_DOWN && blp->bl_linkstate != LINK_STATE_DOWN ||
2720 (blp->bl_flags & (BLF_DELETED|BLF_SDUFAIL))) {
2721 blp->bl_linkstate = newls;
2722 return (newls);
2726 * Scan first to see if there are any other non-down links. If there
2727 * are, then we're done. Otherwise, if all others are down, then the
2728 * state of this link is the state of the bridge.
2730 bip = blp->bl_inst;
2731 rw_enter(&bip->bi_rwlock, RW_WRITER);
2732 for (blcmp = list_head(&bip->bi_links); blcmp != NULL;
2733 blcmp = list_next(&bip->bi_links, blcmp)) {
2734 if (blcmp != blp &&
2735 !(blcmp->bl_flags & (BLF_DELETED|BLF_SDUFAIL)) &&
2736 blcmp->bl_linkstate != LINK_STATE_DOWN)
2737 break;
2740 if (blcmp != NULL) {
2742 * If there are other links that are considered up, then tell
2743 * the caller that the link is actually still up, regardless of
2744 * this link's underlying state.
2746 blp->bl_linkstate = newls;
2747 newls = LINK_STATE_UP;
2748 } else if (blp->bl_linkstate != newls) {
2750 * If we've found no other 'up' links, and this link has
2751 * changed state, then report the new state of the bridge to
2752 * all other clients.
2754 blp->bl_linkstate = newls;
2755 for (blcmp = list_head(&bip->bi_links); blcmp != NULL;
2756 blcmp = list_next(&bip->bi_links, blcmp)) {
2757 if (blcmp != blp && !(blcmp->bl_flags & BLF_DELETED))
2758 mac_link_redo(blcmp->bl_mh, newls);
2760 bmp = bip->bi_mac;
2761 if ((bmp->bm_linkstate = newls) != LINK_STATE_DOWN)
2762 bmp->bm_linkstate = LINK_STATE_UP;
2763 mac_link_redo(bmp->bm_mh, bmp->bm_linkstate);
2765 rw_exit(&bip->bi_rwlock);
2766 return (newls);
2769 static void
2770 bridge_add_link(void *arg)
2772 mblk_t *mp = arg;
2773 bridge_stream_t *bsp;
2774 bridge_inst_t *bip, *bipt;
2775 bridge_mac_t *bmp;
2776 datalink_id_t linkid;
2777 int err;
2778 mac_handle_t mh;
2779 uint_t maxsdu;
2780 bridge_link_t *blp = NULL, *blpt;
2781 const mac_info_t *mip;
2782 boolean_t macopen = B_FALSE;
2783 char linkname[MAXLINKNAMELEN];
2784 char kstatname[KSTAT_STRLEN];
2785 int i;
2786 link_state_t linkstate;
2787 mblk_t *mlist;
2789 bsp = (bridge_stream_t *)mp->b_next;
2790 mp->b_next = NULL;
2791 bip = bsp->bs_inst;
2792 /* LINTED: alignment */
2793 linkid = *(datalink_id_t *)mp->b_cont->b_rptr;
2796 * First make sure that there is no other bridge that has this link.
2797 * We don't want to overlap operations from two bridges; the MAC layer
2798 * supports only one bridge on a given MAC at a time.
2800 * We rely on the fact that there's just one taskq thread for the
2801 * bridging module: once we've checked for a duplicate, we can drop the
2802 * lock, because no other thread could possibly be adding another link
2803 * until we're done.
2805 mutex_enter(&inst_lock);
2806 for (bipt = list_head(&inst_list); bipt != NULL;
2807 bipt = list_next(&inst_list, bipt)) {
2808 rw_enter(&bipt->bi_rwlock, RW_READER);
2809 for (blpt = list_head(&bipt->bi_links); blpt != NULL;
2810 blpt = list_next(&bipt->bi_links, blpt)) {
2811 if (linkid == blpt->bl_linkid)
2812 break;
2814 rw_exit(&bipt->bi_rwlock);
2815 if (blpt != NULL)
2816 break;
2818 mutex_exit(&inst_lock);
2819 if (bipt != NULL) {
2820 err = EBUSY;
2821 goto fail;
2824 if ((err = mac_open_by_linkid(linkid, &mh)) != 0)
2825 goto fail;
2826 macopen = B_TRUE;
2828 /* we bridge only Ethernet */
2829 mip = mac_info(mh);
2830 if (mip->mi_media != DL_ETHER) {
2831 err = ENOTSUP;
2832 goto fail;
2836 * Get the current maximum SDU on this interface. If there are other
2837 * links on the bridge, then this one must match, or it errors out.
2838 * Otherwise, the first link becomes the standard for the new bridge.
2840 mac_sdu_get(mh, NULL, &maxsdu);
2841 bmp = bip->bi_mac;
2842 if (list_is_empty(&bip->bi_links)) {
2843 bmp->bm_maxsdu = maxsdu;
2844 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu);
2847 /* figure the kstat name; also used as the mac client name */
2848 i = MBLKL(mp->b_cont) - sizeof (datalink_id_t);
2849 if (i < 0 || i >= MAXLINKNAMELEN)
2850 i = MAXLINKNAMELEN - 1;
2851 bcopy(mp->b_cont->b_rptr + sizeof (datalink_id_t), linkname, i);
2852 linkname[i] = '\0';
2853 (void) snprintf(kstatname, sizeof (kstatname), "%s-%s", bip->bi_name,
2854 linkname);
2856 if ((blp = kmem_zalloc(sizeof (*blp), KM_NOSLEEP)) == NULL) {
2857 err = ENOMEM;
2858 goto fail;
2860 blp->bl_lfailmp = allocb(sizeof (bridge_ctl_t), BPRI_MED);
2861 if (blp->bl_lfailmp == NULL) {
2862 kmem_free(blp, sizeof (*blp));
2863 blp = NULL;
2864 err = ENOMEM;
2865 goto fail;
2868 blp->bl_refs = 1;
2869 atomic_inc_uint(&bip->bi_refs);
2870 blp->bl_inst = bip;
2871 blp->bl_mh = mh;
2872 blp->bl_linkid = linkid;
2873 blp->bl_maxsdu = maxsdu;
2874 cv_init(&blp->bl_trillwait, NULL, CV_DRIVER, NULL);
2875 mutex_init(&blp->bl_trilllock, NULL, MUTEX_DRIVER, NULL);
2876 (void) memset(blp->bl_afs, 0xff, sizeof (blp->bl_afs));
2878 err = mac_client_open(mh, &blp->bl_mch, kstatname, 0);
2879 if (err != 0)
2880 goto fail;
2881 blp->bl_flags |= BLF_CLIENT_OPEN;
2883 err = mac_margin_add(mh, &blp->bl_margin, B_TRUE);
2884 if (err != 0)
2885 goto fail;
2886 blp->bl_flags |= BLF_MARGIN_ADDED;
2888 blp->bl_mnh = mac_notify_add(mh, bridge_notify_cb, blp);
2890 /* Enable Bridging on the link */
2891 err = mac_bridge_set(mh, (mac_handle_t)blp);
2892 if (err != 0)
2893 goto fail;
2894 blp->bl_flags |= BLF_SET_BRIDGE;
2896 err = mac_promisc_add(blp->bl_mch, MAC_CLIENT_PROMISC_ALL, NULL,
2897 blp, &blp->bl_mphp, MAC_PROMISC_FLAGS_NO_TX_LOOP);
2898 if (err != 0)
2899 goto fail;
2900 blp->bl_flags |= BLF_PROM_ADDED;
2902 bridge_new_unicst(blp);
2904 blp->bl_ksp = kstat_setup((kstat_named_t *)&blp->bl_kstats,
2905 link_kstats_list, Dim(link_kstats_list), kstatname);
2908 * The link holds a reference to the bridge instance, so that the
2909 * instance can't go away before the link is freed. The insertion into
2910 * bi_links holds a reference on the link (reference set to 1 above).
2911 * When marking as removed from bi_links (BLF_DELETED), drop the
2912 * reference on the link. When freeing the link, drop the reference on
2913 * the instance. BLF_LINK_ADDED tracks link insertion in bi_links list.
2915 rw_enter(&bip->bi_rwlock, RW_WRITER);
2916 list_insert_tail(&bip->bi_links, blp);
2917 blp->bl_flags |= BLF_LINK_ADDED;
2920 * If the new link is no good on this bridge, then let the daemon know
2921 * about the problem.
2923 mlist = NULL;
2924 if (maxsdu != bmp->bm_maxsdu)
2925 link_sdu_fail(blp, B_TRUE, &mlist);
2926 rw_exit(&bip->bi_rwlock);
2927 send_up_messages(bip, mlist);
2930 * Trigger a link state update so that if this link is the first one
2931 * "up" in the bridge, then we notify everyone. This triggers a trip
2932 * through bridge_ls_cb.
2934 linkstate = mac_stat_get(mh, MAC_STAT_LOWLINK_STATE);
2935 blp->bl_linkstate = LINK_STATE_DOWN;
2936 mac_link_update(mh, linkstate);
2939 * We now need to report back to the stream that invoked us, and then
2940 * drop the reference on the stream that we're holding.
2942 miocack(bsp->bs_wq, mp, 0, 0);
2943 stream_unref(bsp);
2944 return;
2946 fail:
2947 if (blp == NULL) {
2948 if (macopen)
2949 mac_close(mh);
2950 } else {
2951 link_shutdown(blp);
2953 miocnak(bsp->bs_wq, mp, 0, err);
2954 stream_unref(bsp);
2957 static void
2958 bridge_rem_link(void *arg)
2960 mblk_t *mp = arg;
2961 bridge_stream_t *bsp;
2962 bridge_inst_t *bip;
2963 bridge_mac_t *bmp;
2964 datalink_id_t linkid;
2965 bridge_link_t *blp, *blsave;
2966 boolean_t found;
2967 mblk_t *mlist;
2969 bsp = (bridge_stream_t *)mp->b_next;
2970 mp->b_next = NULL;
2971 bip = bsp->bs_inst;
2972 /* LINTED: alignment */
2973 linkid = *(datalink_id_t *)mp->b_cont->b_rptr;
2976 * We become reader here so that we can loop over the other links and
2977 * deliver link up/down notification.
2979 rw_enter(&bip->bi_rwlock, RW_READER);
2980 found = B_FALSE;
2981 for (blp = list_head(&bip->bi_links); blp != NULL;
2982 blp = list_next(&bip->bi_links, blp)) {
2983 if (blp->bl_linkid == linkid &&
2984 !(blp->bl_flags & BLF_DELETED)) {
2985 blp->bl_flags |= BLF_DELETED;
2986 (void) ddi_taskq_dispatch(bridge_taskq, link_shutdown,
2987 blp, DDI_SLEEP);
2988 found = B_TRUE;
2989 break;
2994 * Check if this link is up and the remainder of the links are all
2995 * down.
2997 if (blp != NULL && blp->bl_linkstate != LINK_STATE_DOWN) {
2998 for (blp = list_head(&bip->bi_links); blp != NULL;
2999 blp = list_next(&bip->bi_links, blp)) {
3000 if (blp->bl_linkstate != LINK_STATE_DOWN &&
3001 !(blp->bl_flags & (BLF_DELETED|BLF_SDUFAIL)))
3002 break;
3004 if (blp == NULL) {
3005 for (blp = list_head(&bip->bi_links); blp != NULL;
3006 blp = list_next(&bip->bi_links, blp)) {
3007 if (!(blp->bl_flags & BLF_DELETED))
3008 mac_link_redo(blp->bl_mh,
3009 LINK_STATE_DOWN);
3011 bmp = bip->bi_mac;
3012 bmp->bm_linkstate = LINK_STATE_DOWN;
3013 mac_link_redo(bmp->bm_mh, LINK_STATE_DOWN);
3018 * Check if there's just one working link left on the bridge. If so,
3019 * then that link is now authoritative for bridge MTU.
3021 blsave = NULL;
3022 for (blp = list_head(&bip->bi_links); blp != NULL;
3023 blp = list_next(&bip->bi_links, blp)) {
3024 if (!(blp->bl_flags & BLF_DELETED)) {
3025 if (blsave == NULL)
3026 blsave = blp;
3027 else
3028 break;
3031 mlist = NULL;
3032 bmp = bip->bi_mac;
3033 if (blsave != NULL && blp == NULL &&
3034 blsave->bl_maxsdu != bmp->bm_maxsdu) {
3035 bmp->bm_maxsdu = blsave->bl_maxsdu;
3036 (void) mac_maxsdu_update(bmp->bm_mh, blsave->bl_maxsdu);
3037 link_sdu_fail(blsave, B_FALSE, &mlist);
3039 rw_exit(&bip->bi_rwlock);
3040 send_up_messages(bip, mlist);
3042 if (found)
3043 miocack(bsp->bs_wq, mp, 0, 0);
3044 else
3045 miocnak(bsp->bs_wq, mp, 0, ENOENT);
3046 stream_unref(bsp);
3050 * This function intentionally returns with bi_rwlock held; it is intended for
3051 * quick checks and updates.
3053 static bridge_link_t *
3054 enter_link(bridge_inst_t *bip, datalink_id_t linkid)
3056 bridge_link_t *blp;
3058 rw_enter(&bip->bi_rwlock, RW_READER);
3059 for (blp = list_head(&bip->bi_links); blp != NULL;
3060 blp = list_next(&bip->bi_links, blp)) {
3061 if (blp->bl_linkid == linkid && !(blp->bl_flags & BLF_DELETED))
3062 break;
3064 return (blp);
3067 static void
3068 bridge_ioctl(queue_t *wq, mblk_t *mp)
3070 bridge_stream_t *bsp = wq->q_ptr;
3071 bridge_inst_t *bip;
3072 struct iocblk *iop;
3073 int rc = EINVAL;
3074 int len = 0;
3075 bridge_link_t *blp;
3076 cred_t *cr;
3078 /* LINTED: alignment */
3079 iop = (struct iocblk *)mp->b_rptr;
3082 * For now, all of the bridge ioctls are privileged.
3084 if ((cr = msg_getcred(mp, NULL)) == NULL)
3085 cr = iop->ioc_cr;
3086 if (cr != NULL && secpolicy_net_config(cr, B_FALSE) != 0) {
3087 miocnak(wq, mp, 0, EPERM);
3088 return;
3091 switch (iop->ioc_cmd) {
3092 case BRIOC_NEWBRIDGE: {
3093 bridge_newbridge_t *bnb;
3095 if (bsp->bs_inst != NULL ||
3096 (rc = miocpullup(mp, sizeof (bridge_newbridge_t))) != 0)
3097 break;
3098 /* LINTED: alignment */
3099 bnb = (bridge_newbridge_t *)mp->b_cont->b_rptr;
3100 bnb->bnb_name[MAXNAMELEN-1] = '\0';
3101 rc = bridge_create(bnb->bnb_linkid, bnb->bnb_name, &bip, cr);
3102 if (rc != 0)
3103 break;
3105 rw_enter(&bip->bi_rwlock, RW_WRITER);
3106 if (bip->bi_control != NULL) {
3107 rw_exit(&bip->bi_rwlock);
3108 bridge_unref(bip);
3109 rc = EBUSY;
3110 } else {
3111 atomic_inc_uint(&bip->bi_refs);
3112 bsp->bs_inst = bip; /* stream holds reference */
3113 bip->bi_control = bsp;
3114 rw_exit(&bip->bi_rwlock);
3115 rc = 0;
3117 break;
3120 case BRIOC_ADDLINK:
3121 if ((bip = bsp->bs_inst) == NULL ||
3122 (rc = miocpullup(mp, sizeof (datalink_id_t))) != 0)
3123 break;
3125 * We cannot perform the action in this thread, because we're
3126 * not in process context, and we may already be holding
3127 * MAC-related locks. Place the request on taskq.
3129 mp->b_next = (mblk_t *)bsp;
3130 stream_ref(bsp);
3131 (void) ddi_taskq_dispatch(bridge_taskq, bridge_add_link, mp,
3132 DDI_SLEEP);
3133 return;
3135 case BRIOC_REMLINK:
3136 if ((bip = bsp->bs_inst) == NULL ||
3137 (rc = miocpullup(mp, sizeof (datalink_id_t))) != 0)
3138 break;
3140 * We cannot perform the action in this thread, because we're
3141 * not in process context, and we may already be holding
3142 * MAC-related locks. Place the request on taskq.
3144 mp->b_next = (mblk_t *)bsp;
3145 stream_ref(bsp);
3146 (void) ddi_taskq_dispatch(bridge_taskq, bridge_rem_link, mp,
3147 DDI_SLEEP);
3148 return;
3150 case BRIOC_SETSTATE: {
3151 bridge_setstate_t *bss;
3153 if ((bip = bsp->bs_inst) == NULL ||
3154 (rc = miocpullup(mp, sizeof (*bss))) != 0)
3155 break;
3156 /* LINTED: alignment */
3157 bss = (bridge_setstate_t *)mp->b_cont->b_rptr;
3158 if ((blp = enter_link(bip, bss->bss_linkid)) == NULL) {
3159 rc = ENOENT;
3160 } else {
3161 rc = 0;
3162 blp->bl_state = bss->bss_state;
3164 rw_exit(&bip->bi_rwlock);
3165 break;
3168 case BRIOC_SETPVID: {
3169 bridge_setpvid_t *bsv;
3171 if ((bip = bsp->bs_inst) == NULL ||
3172 (rc = miocpullup(mp, sizeof (*bsv))) != 0)
3173 break;
3174 /* LINTED: alignment */
3175 bsv = (bridge_setpvid_t *)mp->b_cont->b_rptr;
3176 if (bsv->bsv_vlan > VLAN_ID_MAX)
3177 break;
3178 if ((blp = enter_link(bip, bsv->bsv_linkid)) == NULL) {
3179 rc = ENOENT;
3180 } else if (blp->bl_pvid == bsv->bsv_vlan) {
3181 rc = 0;
3182 } else {
3183 rc = 0;
3184 BRIDGE_VLAN_CLR(blp, blp->bl_pvid);
3185 blp->bl_pvid = bsv->bsv_vlan;
3186 if (blp->bl_pvid != 0)
3187 BRIDGE_VLAN_SET(blp, blp->bl_pvid);
3189 rw_exit(&bip->bi_rwlock);
3190 break;
3193 case BRIOC_VLANENAB: {
3194 bridge_vlanenab_t *bve;
3196 if ((bip = bsp->bs_inst) == NULL ||
3197 (rc = miocpullup(mp, sizeof (*bve))) != 0)
3198 break;
3199 /* LINTED: alignment */
3200 bve = (bridge_vlanenab_t *)mp->b_cont->b_rptr;
3201 if (bve->bve_vlan > VLAN_ID_MAX)
3202 break;
3203 if ((blp = enter_link(bip, bve->bve_linkid)) == NULL) {
3204 rc = ENOENT;
3205 } else {
3206 rc = 0;
3207 /* special case: vlan 0 means "all" */
3208 if (bve->bve_vlan == 0) {
3209 (void) memset(blp->bl_vlans,
3210 bve->bve_onoff ? ~0 : 0,
3211 sizeof (blp->bl_vlans));
3212 BRIDGE_VLAN_CLR(blp, 0);
3213 if (blp->bl_pvid != 0)
3214 BRIDGE_VLAN_SET(blp, blp->bl_pvid);
3215 } else if (bve->bve_vlan == blp->bl_pvid) {
3216 rc = EINVAL;
3217 } else if (bve->bve_onoff) {
3218 BRIDGE_VLAN_SET(blp, bve->bve_vlan);
3219 } else {
3220 BRIDGE_VLAN_CLR(blp, bve->bve_vlan);
3223 rw_exit(&bip->bi_rwlock);
3224 break;
3227 case BRIOC_FLUSHFWD: {
3228 bridge_flushfwd_t *bff;
3229 bridge_fwd_t *bfp, *bfnext;
3230 avl_tree_t fwd_scavenge;
3231 int i;
3233 if ((bip = bsp->bs_inst) == NULL ||
3234 (rc = miocpullup(mp, sizeof (*bff))) != 0)
3235 break;
3236 /* LINTED: alignment */
3237 bff = (bridge_flushfwd_t *)mp->b_cont->b_rptr;
3238 rw_enter(&bip->bi_rwlock, RW_WRITER);
3239 /* This case means "all" */
3240 if (bff->bff_linkid == DATALINK_INVALID_LINKID) {
3241 blp = NULL;
3242 } else {
3243 for (blp = list_head(&bip->bi_links); blp != NULL;
3244 blp = list_next(&bip->bi_links, blp)) {
3245 if (blp->bl_linkid == bff->bff_linkid &&
3246 !(blp->bl_flags & BLF_DELETED))
3247 break;
3249 if (blp == NULL) {
3250 rc = ENOENT;
3251 rw_exit(&bip->bi_rwlock);
3252 break;
3255 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t),
3256 offsetof(bridge_fwd_t, bf_node));
3257 bfnext = avl_first(&bip->bi_fwd);
3258 while ((bfp = bfnext) != NULL) {
3259 bfnext = AVL_NEXT(&bip->bi_fwd, bfp);
3260 if (bfp->bf_flags & BFF_LOCALADDR)
3261 continue;
3262 if (blp != NULL) {
3263 for (i = 0; i < bfp->bf_maxlinks; i++) {
3264 if (bfp->bf_links[i] == blp)
3265 break;
3268 * If the link is there and we're excluding,
3269 * then skip. If the link is not there and
3270 * we're doing only that link, then skip.
3272 if ((i < bfp->bf_maxlinks) == bff->bff_exclude)
3273 continue;
3275 ASSERT(bfp->bf_flags & BFF_INTREE);
3276 avl_remove(&bip->bi_fwd, bfp);
3277 bfp->bf_flags &= ~BFF_INTREE;
3278 avl_add(&fwd_scavenge, bfp);
3280 rw_exit(&bip->bi_rwlock);
3281 bfnext = avl_first(&fwd_scavenge);
3282 while ((bfp = bfnext) != NULL) {
3283 bfnext = AVL_NEXT(&fwd_scavenge, bfp);
3284 avl_remove(&fwd_scavenge, bfp);
3285 fwd_unref(bfp); /* drop tree reference */
3287 avl_destroy(&fwd_scavenge);
3288 break;
3291 case BRIOC_TABLEMAX:
3292 if ((bip = bsp->bs_inst) == NULL ||
3293 (rc = miocpullup(mp, sizeof (uint32_t))) != 0)
3294 break;
3295 /* LINTED: alignment */
3296 bip->bi_tablemax = *(uint32_t *)mp->b_cont->b_rptr;
3297 break;
3300 if (rc == 0)
3301 miocack(wq, mp, len, 0);
3302 else
3303 miocnak(wq, mp, 0, rc);
3306 static void
3307 bridge_wput(queue_t *wq, mblk_t *mp)
3309 switch (DB_TYPE(mp)) {
3310 case M_IOCTL:
3311 bridge_ioctl(wq, mp);
3312 break;
3313 case M_FLUSH:
3314 if (*mp->b_rptr & FLUSHW)
3315 *mp->b_rptr &= ~FLUSHW;
3316 if (*mp->b_rptr & FLUSHR)
3317 qreply(wq, mp);
3318 else
3319 freemsg(mp);
3320 break;
3321 default:
3322 freemsg(mp);
3323 break;
3328 * This function allocates the main data structures for the bridge driver and
3329 * connects us into devfs.
3331 static void
3332 bridge_inst_init(void)
3334 bridge_scan_interval = 5 * drv_usectohz(1000000);
3335 bridge_fwd_age = 25 * drv_usectohz(1000000);
3337 rw_init(&bmac_rwlock, NULL, RW_DRIVER, NULL);
3338 list_create(&bmac_list, sizeof (bridge_mac_t),
3339 offsetof(bridge_mac_t, bm_node));
3340 list_create(&inst_list, sizeof (bridge_inst_t),
3341 offsetof(bridge_inst_t, bi_node));
3342 cv_init(&inst_cv, NULL, CV_DRIVER, NULL);
3343 mutex_init(&inst_lock, NULL, MUTEX_DRIVER, NULL);
3344 cv_init(&stream_ref_cv, NULL, CV_DRIVER, NULL);
3345 mutex_init(&stream_ref_lock, NULL, MUTEX_DRIVER, NULL);
3347 mac_bridge_vectors(bridge_xmit_cb, bridge_recv_cb, bridge_ref_cb,
3348 bridge_ls_cb);
3352 * This function disconnects from devfs and destroys all data structures in
3353 * preparation for unload. It's assumed that there are no active bridge
3354 * references left at this point.
3356 static void
3357 bridge_inst_fini(void)
3359 mac_bridge_vectors(NULL, NULL, NULL, NULL);
3360 if (bridge_timerid != 0)
3361 (void) untimeout(bridge_timerid);
3362 rw_destroy(&bmac_rwlock);
3363 list_destroy(&bmac_list);
3364 list_destroy(&inst_list);
3365 cv_destroy(&inst_cv);
3366 mutex_destroy(&inst_lock);
3367 cv_destroy(&stream_ref_cv);
3368 mutex_destroy(&stream_ref_lock);
3372 * bridge_attach()
3374 * Description:
3375 * Attach bridge driver to the system.
3377 static int
3378 bridge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
3380 if (cmd != DDI_ATTACH)
3381 return (DDI_FAILURE);
3383 if (ddi_create_minor_node(dip, BRIDGE_CTL, S_IFCHR, 0, DDI_PSEUDO,
3384 CLONE_DEV) == DDI_FAILURE) {
3385 return (DDI_FAILURE);
3388 if (dld_ioc_register(BRIDGE_IOC, bridge_ioc_list,
3389 DLDIOCCNT(bridge_ioc_list)) != 0) {
3390 ddi_remove_minor_node(dip, BRIDGE_CTL);
3391 return (DDI_FAILURE);
3394 bridge_dev_info = dip;
3395 bridge_major = ddi_driver_major(dip);
3396 bridge_taskq = ddi_taskq_create(dip, BRIDGE_DEV_NAME, 1,
3397 TASKQ_DEFAULTPRI, 0);
3398 return (DDI_SUCCESS);
3402 * bridge_detach()
3404 * Description:
3405 * Detach an interface to the system.
3407 static int
3408 bridge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
3410 if (cmd != DDI_DETACH)
3411 return (DDI_FAILURE);
3413 ddi_remove_minor_node(dip, NULL);
3414 ddi_taskq_destroy(bridge_taskq);
3415 bridge_dev_info = NULL;
3416 return (DDI_SUCCESS);
3420 * bridge_info()
3422 * Description:
3423 * Translate "dev_t" to a pointer to the associated "dev_info_t".
3425 /* ARGSUSED */
3426 static int
3427 bridge_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
3428 void **result)
3430 int rc;
3432 switch (infocmd) {
3433 case DDI_INFO_DEVT2DEVINFO:
3434 if (bridge_dev_info == NULL) {
3435 rc = DDI_FAILURE;
3436 } else {
3437 *result = (void *)bridge_dev_info;
3438 rc = DDI_SUCCESS;
3440 break;
3441 case DDI_INFO_DEVT2INSTANCE:
3442 *result = NULL;
3443 rc = DDI_SUCCESS;
3444 break;
3445 default:
3446 rc = DDI_FAILURE;
3447 break;
3449 return (rc);
3452 static struct module_info bridge_modinfo = {
3453 2105, /* mi_idnum */
3454 BRIDGE_DEV_NAME, /* mi_idname */
3455 0, /* mi_minpsz */
3456 16384, /* mi_maxpsz */
3457 65536, /* mi_hiwat */
3458 128 /* mi_lowat */
3461 static struct qinit bridge_rinit = {
3462 NULL, /* qi_putp */
3463 NULL, /* qi_srvp */
3464 bridge_open, /* qi_qopen */
3465 bridge_close, /* qi_qclose */
3466 NULL, /* qi_qadmin */
3467 &bridge_modinfo, /* qi_minfo */
3468 NULL /* qi_mstat */
3471 static struct qinit bridge_winit = {
3472 (int (*)())bridge_wput, /* qi_putp */
3473 NULL, /* qi_srvp */
3474 NULL, /* qi_qopen */
3475 NULL, /* qi_qclose */
3476 NULL, /* qi_qadmin */
3477 &bridge_modinfo, /* qi_minfo */
3478 NULL /* qi_mstat */
3481 static struct streamtab bridge_tab = {
3482 &bridge_rinit, /* st_rdinit */
3483 &bridge_winit /* st_wrinit */
3486 /* No STREAMS perimeters; we do all our own locking */
3487 DDI_DEFINE_STREAM_OPS(bridge_ops, nulldev, nulldev, bridge_attach,
3488 bridge_detach, nodev, bridge_info, D_NEW | D_MP, &bridge_tab,
3489 ddi_quiesce_not_supported);
3491 static struct modldrv modldrv = {
3492 &mod_driverops,
3493 "bridging driver",
3494 &bridge_ops
3497 static struct modlinkage modlinkage = {
3498 MODREV_1,
3499 (void *)&modldrv,
3500 NULL
3504 _init(void)
3506 int retv;
3508 mac_init_ops(NULL, BRIDGE_DEV_NAME);
3509 bridge_inst_init();
3510 if ((retv = mod_install(&modlinkage)) != 0)
3511 bridge_inst_fini();
3512 return (retv);
3516 _fini(void)
3518 int retv;
3520 rw_enter(&bmac_rwlock, RW_READER);
3521 retv = list_is_empty(&bmac_list) ? 0 : EBUSY;
3522 rw_exit(&bmac_rwlock);
3523 if (retv == 0 &&
3524 (retv = mod_remove(&modlinkage)) == 0)
3525 bridge_inst_fini();
3526 return (retv);
3530 _info(struct modinfo *modinfop)
3532 return (mod_info(&modlinkage, modinfop));