4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright (c) 2016 by Delphix. All rights reserved.
29 * This module implements a STREAMS driver that provides layer-two (Ethernet)
30 * bridging functionality. The STREAMS interface is used to provide
31 * observability (snoop/wireshark) and control, but not for interface plumbing.
34 #include <sys/types.h>
35 #include <sys/bitmap.h>
36 #include <sys/cmn_err.h>
39 #include <sys/errno.h>
40 #include <sys/kstat.h>
41 #include <sys/modctl.h>
43 #include <sys/param.h>
44 #include <sys/policy.h>
47 #include <sys/stream.h>
48 #include <sys/stropts.h>
49 #include <sys/strsun.h>
50 #include <sys/sunddi.h>
51 #include <sys/sysmacros.h>
52 #include <sys/systm.h>
56 #include <sys/mac_ether.h>
57 #include <sys/mac_provider.h>
58 #include <sys/mac_client_priv.h>
59 #include <sys/mac_impl.h>
61 #include <net/bridge.h>
62 #include <net/bridge_impl.h>
63 #include <net/trill.h>
64 #include <sys/dld_ioc.h>
67 * Locks and reference counts: object lifetime and design.
70 * Bridge mac (snoop) instances are in bmac_list, which is protected by
71 * bmac_rwlock. They're allocated by bmac_alloc and freed by bridge_timer().
72 * Every bridge_inst_t has a single bridge_mac_t, but when bridge_inst_t goes
73 * away, the bridge_mac_t remains until either all of the users go away
74 * (detected by a timer) or until the instance is picked up again by the same
75 * bridge starting back up.
78 * Bridge instances are in inst_list, which is protected by inst_lock.
79 * They're allocated by inst_alloc() and freed by inst_free(). After
80 * allocation, an instance is placed in inst_list, and the reference count is
81 * incremented to represent this. That reference is decremented when the
82 * BIF_SHUTDOWN flag is set, and no new increments may occur. When the last
83 * reference is freed, the instance is removed from the list.
85 * Bridge instances have lists of links and an AVL tree of forwarding
86 * entries. Each of these structures holds one reference on the bridge
87 * instance. These lists and tree are protected by bi_rwlock.
90 * Bridge streams are allocated by stream_alloc() and freed by stream_free().
91 * These streams are created when "bridged" opens /dev/bridgectl, and are
92 * used to create new bridge instances (via BRIOC_NEWBRIDGE) and control the
93 * links on the bridge. When a stream closes, the bridge instance created is
94 * destroyed. There's at most one bridge instance for a given control
98 * Links are allocated by bridge_add_link() and freed by link_free(). The
99 * bi_links list holds a reference to the link. When the BLF_DELETED flag is
100 * set, that reference is dropped. The link isn't removed from the list
101 * until the last reference drops. Each forwarding entry that uses a given
102 * link holds a reference, as does each thread transmitting a packet via the
103 * link. The MAC layer calls in via bridge_ref_cb() to hold a reference on
104 * a link when transmitting.
106 * It's important that once BLF_DELETED is set, there's no way for the
107 * reference count to increase again. If it can, then the link may be
108 * double-freed. The BLF_FREED flag is intended for use with assertions to
109 * guard against this in testing.
112 * Bridge forwarding entries are allocated by bridge_recv_cb() and freed by
113 * fwd_free(). The bi_fwd AVL tree holds one reference to the entry. Unlike
114 * other data structures, the reference is dropped when the entry is removed
115 * from the tree by fwd_delete(), and the BFF_INTREE flag is removed. Each
116 * thread that's forwarding a packet to a known destination holds a reference
117 * to a forwarding entry.
121 * The TRILL module does all of its I/O through bridging. It uses references
122 * on the bridge_inst_t and bridge_link_t structures, and has seven entry
123 * points and four callbacks. One entry point is for setting the callbacks
124 * (bridge_trill_register_cb). There are four entry points for taking bridge
125 * and link references (bridge_trill_{br,ln}{ref,unref}). The final two
126 * entry points are for decapsulated packets from TRILL (bridge_trill_decaps)
127 * that need to be bridged locally, and for TRILL-encapsulated output packets
128 * (bridge_trill_output).
130 * The four callbacks comprise two notification functions for bridges and
131 * links being deleted, one function for raw received TRILL packets, and one
132 * for bridge output to non-local TRILL destinations (tunnel entry).
136 * Ethernet reserved multicast addresses for TRILL; used also in TRILL module.
138 const uint8_t all_isis_rbridges
[] = ALL_ISIS_RBRIDGES
;
139 static const uint8_t all_esadi_rbridges
[] = ALL_ESADI_RBRIDGES
;
140 const uint8_t bridge_group_address
[] = BRIDGE_GROUP_ADDRESS
;
142 static const char *inst_kstats_list
[] = { KSINST_NAMES
};
143 static const char *link_kstats_list
[] = { KSLINK_NAMES
};
145 #define KREF(p, m, vn) p->m.vn.value.ui64
146 #define KINCR(p, m, vn) ++KREF(p, m, vn)
147 #define KDECR(p, m, vn) --KREF(p, m, vn)
149 #define KIPINCR(p, vn) KINCR(p, bi_kstats, vn)
150 #define KIPDECR(p, vn) KDECR(p, bi_kstats, vn)
151 #define KLPINCR(p, vn) KINCR(p, bl_kstats, vn)
153 #define KIINCR(vn) KIPINCR(bip, vn)
154 #define KIDECR(vn) KIPDECR(bip, vn)
155 #define KLINCR(vn) KLPINCR(blp, vn)
157 #define Dim(x) (sizeof (x) / sizeof (*(x)))
159 /* Amount of overhead added when encapsulating with VLAN headers */
160 #define VLAN_INCR (sizeof (struct ether_vlan_header) - \
161 sizeof (struct ether_header))
163 static dev_info_t
*bridge_dev_info
;
164 static major_t bridge_major
;
165 static ddi_taskq_t
*bridge_taskq
;
168 * These are the bridge instance management data structures. The mutex lock
169 * protects the list of bridge instances. A reference count is then used on
170 * each instance to determine when to free it. We use mac_minor_hold() to
171 * allocate minor_t values, which are used both for self-cloning /dev/net/
172 * device nodes as well as client streams. Minor node 0 is reserved for the
173 * allocation control node.
175 static list_t inst_list
;
176 static kcondvar_t inst_cv
; /* Allows us to wait for shutdown */
177 static kmutex_t inst_lock
;
179 static krwlock_t bmac_rwlock
;
180 static list_t bmac_list
;
182 /* Wait for taskq entries that use STREAMS */
183 static kcondvar_t stream_ref_cv
;
184 static kmutex_t stream_ref_lock
;
186 static timeout_id_t bridge_timerid
;
187 static clock_t bridge_scan_interval
;
188 static clock_t bridge_fwd_age
;
190 static bridge_inst_t
*bridge_find_name(const char *);
191 static void bridge_timer(void *);
192 static void bridge_unref(bridge_inst_t
*);
194 static const uint8_t zero_addr
[ETHERADDRL
] = { 0 };
196 /* Global TRILL linkage */
197 static trill_recv_pkt_t trill_recv_fn
;
198 static trill_encap_pkt_t trill_encap_fn
;
199 static trill_br_dstr_t trill_brdstr_fn
;
200 static trill_ln_dstr_t trill_lndstr_fn
;
202 /* special settings to accommodate DLD flow control; see dld_str.c */
203 static struct module_info bridge_dld_modinfo
= {
205 BRIDGE_DEV_NAME
, /* mi_idname */
207 INFPSZ
, /* mi_maxpsz */
212 static struct qinit bridge_dld_rinit
= {
215 dld_open
, /* qi_qopen */
216 dld_close
, /* qi_qclose */
217 NULL
, /* qi_qadmin */
218 &bridge_dld_modinfo
, /* qi_minfo */
222 static struct qinit bridge_dld_winit
= {
223 (int (*)())dld_wput
, /* qi_putp */
224 (int (*)())dld_wsrv
, /* qi_srvp */
226 NULL
, /* qi_qclose */
227 NULL
, /* qi_qadmin */
228 &bridge_dld_modinfo
, /* qi_minfo */
232 static int bridge_ioc_listfwd(void *, intptr_t, int, cred_t
*, int *);
234 /* GLDv3 control ioctls used by Bridging */
235 static dld_ioc_info_t bridge_ioc_list
[] = {
236 {BRIDGE_IOC_LISTFWD
, DLDCOPYINOUT
, sizeof (bridge_listfwd_t
),
237 bridge_ioc_listfwd
, NULL
},
241 * Given a bridge mac pointer, get a ref-held pointer to the corresponding
242 * bridge instance, if any. We must hold the global bmac_rwlock so that
243 * bm_inst doesn't slide out from under us.
245 static bridge_inst_t
*
246 mac_to_inst(const bridge_mac_t
*bmp
)
250 rw_enter(&bmac_rwlock
, RW_READER
);
251 if ((bip
= bmp
->bm_inst
) != NULL
)
252 atomic_inc_uint(&bip
->bi_refs
);
253 rw_exit(&bmac_rwlock
);
258 link_sdu_fail(bridge_link_t
*blp
, boolean_t failed
, mblk_t
**mlist
)
262 bridge_link_t
*blcmp
;
267 if (blp
->bl_flags
& BLF_SDUFAIL
)
269 blp
->bl_flags
|= BLF_SDUFAIL
;
271 if (!(blp
->bl_flags
& BLF_SDUFAIL
))
273 blp
->bl_flags
&= ~BLF_SDUFAIL
;
277 * If this link is otherwise up, then check if there are any other
278 * non-failed non-down links. If not, then we control the state of the
283 if (blp
->bl_linkstate
!= LINK_STATE_DOWN
) {
284 for (blcmp
= list_head(&bip
->bi_links
); blcmp
!= NULL
;
285 blcmp
= list_next(&bip
->bi_links
, blcmp
)) {
287 !(blcmp
->bl_flags
& (BLF_DELETED
|BLF_SDUFAIL
)) &&
288 blcmp
->bl_linkstate
!= LINK_STATE_DOWN
)
292 bmp
->bm_linkstate
= failed
? LINK_STATE_DOWN
:
294 mac_link_redo(bmp
->bm_mh
, bmp
->bm_linkstate
);
299 * If we're becoming failed, then the link's current true state needs
300 * to be reflected upwards to this link's clients. If we're becoming
301 * unfailed, then we get the state of the bridge instead on all
305 if (bmp
->bm_linkstate
!= blp
->bl_linkstate
)
306 mac_link_redo(blp
->bl_mh
, blp
->bl_linkstate
);
308 mac_link_redo(blp
->bl_mh
, bmp
->bm_linkstate
);
311 /* get the current mblk we're going to send up */
312 if ((mp
= blp
->bl_lfailmp
) == NULL
&&
313 (mp
= allocb(sizeof (bridge_ctl_t
), BPRI_MED
)) == NULL
)
316 /* get a new one for next time */
317 blp
->bl_lfailmp
= allocb(sizeof (bridge_ctl_t
), BPRI_MED
);
319 /* if none for next time, then report only failures */
320 if (blp
->bl_lfailmp
== NULL
&& !failed
) {
321 blp
->bl_lfailmp
= mp
;
325 /* LINTED: alignment */
326 bcp
= (bridge_ctl_t
*)mp
->b_rptr
;
327 bcp
->bc_linkid
= blp
->bl_linkid
;
328 bcp
->bc_failed
= failed
;
329 mp
->b_wptr
= (uchar_t
*)(bcp
+ 1);
335 * Send control messages (link SDU changes) using the stream to the
336 * bridge instance daemon.
339 send_up_messages(bridge_inst_t
*bip
, mblk_t
*mp
)
344 rq
= bip
->bi_control
->bs_wq
;
356 bridge_m_getstat(void *arg
, uint_t stat
, uint64_t *val
)
362 bridge_m_start(void *arg
)
364 bridge_mac_t
*bmp
= arg
;
366 bmp
->bm_flags
|= BMF_STARTED
;
371 bridge_m_stop(void *arg
)
373 bridge_mac_t
*bmp
= arg
;
375 bmp
->bm_flags
&= ~BMF_STARTED
;
380 bridge_m_setpromisc(void *arg
, boolean_t on
)
387 bridge_m_multicst(void *arg
, boolean_t add
, const uint8_t *mca
)
394 bridge_m_unicst(void *arg
, const uint8_t *macaddr
)
400 bridge_m_tx(void *arg
, mblk_t
*mp
)
402 _NOTE(ARGUNUSED(arg
));
409 bridge_ioc_listfwd(void *karg
, intptr_t arg
, int mode
, cred_t
*cred
, int *rvalp
)
411 bridge_listfwd_t
*blf
= karg
;
413 bridge_fwd_t
*bfp
, match
;
416 bip
= bridge_find_name(blf
->blf_name
);
420 bcopy(blf
->blf_dest
, match
.bf_dest
, ETHERADDRL
);
421 match
.bf_flags
|= BFF_VLANLOCAL
;
422 rw_enter(&bip
->bi_rwlock
, RW_READER
);
423 if ((bfp
= avl_find(&bip
->bi_fwd
, &match
, &where
)) == NULL
)
424 bfp
= avl_nearest(&bip
->bi_fwd
, where
, AVL_AFTER
);
426 bfp
= AVL_NEXT(&bip
->bi_fwd
, bfp
);
428 bzero(blf
, sizeof (*blf
));
430 bcopy(bfp
->bf_dest
, blf
->blf_dest
, ETHERADDRL
);
431 blf
->blf_trill_nick
= bfp
->bf_trill_nick
;
433 drv_hztousec(ddi_get_lbolt() - bfp
->bf_lastheard
) / 1000;
435 (bfp
->bf_flags
& BFF_LOCALADDR
) != 0;
436 blf
->blf_linkid
= bfp
->bf_links
[0]->bl_linkid
;
438 rw_exit(&bip
->bi_rwlock
);
444 bridge_m_setprop(void *arg
, const char *pr_name
, mac_prop_id_t pr_num
,
445 uint_t pr_valsize
, const void *pr_val
)
447 bridge_mac_t
*bmp
= arg
;
454 _NOTE(ARGUNUSED(pr_name
));
457 if (pr_valsize
< sizeof (bmp
->bm_maxsdu
)) {
461 (void) bcopy(pr_val
, &maxsdu
, sizeof (maxsdu
));
462 if (maxsdu
== bmp
->bm_maxsdu
) {
464 } else if ((bip
= mac_to_inst(bmp
)) == NULL
) {
467 rw_enter(&bip
->bi_rwlock
, RW_WRITER
);
469 for (blp
= list_head(&bip
->bi_links
); blp
!= NULL
;
470 blp
= list_next(&bip
->bi_links
, blp
)) {
471 if (blp
->bl_flags
& BLF_DELETED
)
473 if (blp
->bl_maxsdu
== maxsdu
)
474 link_sdu_fail(blp
, B_FALSE
, &mlist
);
475 else if (blp
->bl_maxsdu
== bmp
->bm_maxsdu
)
476 link_sdu_fail(blp
, B_TRUE
, &mlist
);
478 rw_exit(&bip
->bi_rwlock
);
479 bmp
->bm_maxsdu
= maxsdu
;
480 (void) mac_maxsdu_update(bmp
->bm_mh
, maxsdu
);
481 send_up_messages(bip
, mlist
);
495 bridge_m_getprop(void *arg
, const char *pr_name
, mac_prop_id_t pr_num
,
496 uint_t pr_valsize
, void *pr_val
)
498 bridge_mac_t
*bmp
= arg
;
501 _NOTE(ARGUNUSED(pr_name
));
503 case MAC_PROP_STATUS
:
504 ASSERT(pr_valsize
>= sizeof (bmp
->bm_linkstate
));
505 bcopy(&bmp
->bm_linkstate
, pr_val
, sizeof (&bmp
->bm_linkstate
));
516 bridge_m_propinfo(void *arg
, const char *pr_name
, mac_prop_id_t pr_num
,
517 mac_prop_info_handle_t prh
)
519 bridge_mac_t
*bmp
= arg
;
521 _NOTE(ARGUNUSED(pr_name
));
525 mac_prop_info_set_range_uint32(prh
, bmp
->bm_maxsdu
,
528 case MAC_PROP_STATUS
:
529 mac_prop_info_set_perm(prh
, MAC_PROP_PERM_READ
);
534 static mac_callbacks_t bridge_m_callbacks
= {
535 MC_SETPROP
| MC_GETPROP
| MC_PROPINFO
,
554 * Create kstats from a list.
557 kstat_setup(kstat_named_t
*knt
, const char **names
, int nstat
,
558 const char *unitname
)
563 for (i
= 0; i
< nstat
; i
++)
564 kstat_named_init(&knt
[i
], names
[i
], KSTAT_DATA_UINT64
);
566 ksp
= kstat_create_zone(BRIDGE_DEV_NAME
, 0, unitname
, "net",
567 KSTAT_TYPE_NAMED
, nstat
, KSTAT_FLAG_VIRTUAL
, GLOBAL_ZONEID
);
576 * Find an existing bridge_mac_t structure or allocate a new one for the given
577 * bridge instance. This creates the mac driver instance that snoop can use.
580 bmac_alloc(bridge_inst_t
*bip
, bridge_mac_t
**bmacp
)
582 bridge_mac_t
*bmp
, *bnew
;
587 if ((mac
= mac_alloc(MAC_VERSION
)) == NULL
)
590 bnew
= kmem_zalloc(sizeof (*bnew
), KM_SLEEP
);
592 rw_enter(&bmac_rwlock
, RW_WRITER
);
593 for (bmp
= list_head(&bmac_list
); bmp
!= NULL
;
594 bmp
= list_next(&bmac_list
, bmp
)) {
595 if (strcmp(bip
->bi_name
, bmp
->bm_name
) == 0) {
596 ASSERT(bmp
->bm_inst
== NULL
);
598 rw_exit(&bmac_rwlock
);
599 kmem_free(bnew
, sizeof (*bnew
));
606 mac
->m_type_ident
= MAC_PLUGIN_IDENT_ETHER
;
607 mac
->m_driver
= bnew
;
608 mac
->m_dip
= bridge_dev_info
;
609 mac
->m_instance
= (uint_t
)-1;
610 mac
->m_src_addr
= (uint8_t *)zero_addr
;
611 mac
->m_callbacks
= &bridge_m_callbacks
;
614 * Note that the SDU limits are irrelevant, as nobody transmits on the
615 * bridge node itself. It's mainly for monitoring but we allow
616 * setting the bridge MTU for quick transition of all links part of the
617 * bridge to a new MTU.
620 mac
->m_max_sdu
= 1500;
621 err
= mac_register(mac
, &bnew
->bm_mh
);
624 rw_exit(&bmac_rwlock
);
625 kmem_free(bnew
, sizeof (*bnew
));
630 (void) strcpy(bnew
->bm_name
, bip
->bi_name
);
631 if (list_is_empty(&bmac_list
)) {
632 bridge_timerid
= timeout(bridge_timer
, NULL
,
633 bridge_scan_interval
);
635 list_insert_tail(&bmac_list
, bnew
);
636 rw_exit(&bmac_rwlock
);
639 * Mark the MAC as unable to go "active" so that only passive clients
640 * (such as snoop) can bind to it.
642 mac_no_active(bnew
->bm_mh
);
648 * Disconnect the given bridge_mac_t from its bridge instance. The bridge
649 * instance is going away. The mac instance can't go away until the clients
650 * are gone (see bridge_timer).
653 bmac_disconnect(bridge_mac_t
*bmp
)
657 bmp
->bm_linkstate
= LINK_STATE_DOWN
;
658 mac_link_redo(bmp
->bm_mh
, LINK_STATE_DOWN
);
660 rw_enter(&bmac_rwlock
, RW_READER
);
664 rw_exit(&bmac_rwlock
);
667 /* This is used by the avl trees to sort forwarding table entries */
669 fwd_compare(const void *addr1
, const void *addr2
)
671 const bridge_fwd_t
*fwd1
= addr1
;
672 const bridge_fwd_t
*fwd2
= addr2
;
673 int diff
= memcmp(fwd1
->bf_dest
, fwd2
->bf_dest
, ETHERADDRL
);
676 return (diff
> 0 ? 1 : -1);
678 if ((fwd1
->bf_flags
^ fwd2
->bf_flags
) & BFF_VLANLOCAL
) {
679 if (fwd1
->bf_vlanid
> fwd2
->bf_vlanid
)
681 else if (fwd1
->bf_vlanid
< fwd2
->bf_vlanid
)
688 inst_free(bridge_inst_t
*bip
)
690 ASSERT(bip
->bi_mac
== NULL
);
691 rw_destroy(&bip
->bi_rwlock
);
692 list_destroy(&bip
->bi_links
);
693 cv_destroy(&bip
->bi_linkwait
);
694 avl_destroy(&bip
->bi_fwd
);
695 if (bip
->bi_ksp
!= NULL
)
696 kstat_delete(bip
->bi_ksp
);
697 kmem_free(bip
, sizeof (*bip
));
700 static bridge_inst_t
*
701 inst_alloc(const char *bridge
)
705 bip
= kmem_zalloc(sizeof (*bip
), KM_SLEEP
);
707 (void) strcpy(bip
->bi_name
, bridge
);
708 rw_init(&bip
->bi_rwlock
, NULL
, RW_DRIVER
, NULL
);
709 list_create(&bip
->bi_links
, sizeof (bridge_link_t
),
710 offsetof(bridge_link_t
, bl_node
));
711 cv_init(&bip
->bi_linkwait
, NULL
, CV_DRIVER
, NULL
);
712 avl_create(&bip
->bi_fwd
, fwd_compare
, sizeof (bridge_fwd_t
),
713 offsetof(bridge_fwd_t
, bf_node
));
717 static bridge_inst_t
*
718 bridge_find_name(const char *bridge
)
722 mutex_enter(&inst_lock
);
723 for (bip
= list_head(&inst_list
); bip
!= NULL
;
724 bip
= list_next(&inst_list
, bip
)) {
725 if (!(bip
->bi_flags
& BIF_SHUTDOWN
) &&
726 strcmp(bridge
, bip
->bi_name
) == 0) {
727 atomic_inc_uint(&bip
->bi_refs
);
731 mutex_exit(&inst_lock
);
737 bridge_create(datalink_id_t linkid
, const char *bridge
, bridge_inst_t
**bipc
,
740 bridge_inst_t
*bip
, *bipnew
;
741 bridge_mac_t
*bmp
= NULL
;
745 bipnew
= inst_alloc(bridge
);
747 mutex_enter(&inst_lock
);
749 for (bip
= list_head(&inst_list
); bip
!= NULL
;
750 bip
= list_next(&inst_list
, bip
)) {
751 if (strcmp(bridge
, bip
->bi_name
) == 0)
755 /* This should not take long; if it does, we've got a design problem */
756 if (bip
!= NULL
&& (bip
->bi_flags
& BIF_SHUTDOWN
)) {
757 cv_wait(&inst_cv
, &inst_lock
);
764 list_insert_tail(&inst_list
, bip
);
767 mutex_exit(&inst_lock
);
768 if (bipnew
!= NULL
) {
773 bip
->bi_ksp
= kstat_setup((kstat_named_t
*)&bip
->bi_kstats
,
774 inst_kstats_list
, Dim(inst_kstats_list
), bip
->bi_name
);
776 err
= bmac_alloc(bip
, &bmp
);
777 if ((bip
->bi_mac
= bmp
) == NULL
)
781 * bm_inst is set, so the timer cannot yank the DLS rug from under us.
782 * No extra locking is needed here.
784 if (!(bmp
->bm_flags
& BMF_DLS
)) {
785 err
= dls_devnet_create(bmp
->bm_mh
, linkid
, crgetzoneid(cred
));
788 bmp
->bm_flags
|= BMF_DLS
;
791 bip
->bi_dev
= makedevice(bridge_major
, mac_minor(bmp
->bm_mh
));
796 ASSERT(bip
->bi_trilldata
== NULL
);
797 bip
->bi_flags
|= BIF_SHUTDOWN
;
803 bridge_unref(bridge_inst_t
*bip
)
805 if (atomic_dec_uint_nv(&bip
->bi_refs
) == 0) {
806 ASSERT(bip
->bi_flags
& BIF_SHUTDOWN
);
807 /* free up mac for reuse before leaving global list */
808 if (bip
->bi_mac
!= NULL
)
809 bmac_disconnect(bip
->bi_mac
);
810 mutex_enter(&inst_lock
);
811 list_remove(&inst_list
, bip
);
812 cv_broadcast(&inst_cv
);
813 mutex_exit(&inst_lock
);
819 * Stream instances are used only for allocating bridges and serving as a
820 * control node. They serve no data-handling function.
822 static bridge_stream_t
*
825 bridge_stream_t
*bsp
;
828 if ((mn
= mac_minor_hold(B_FALSE
)) == 0)
830 bsp
= kmem_zalloc(sizeof (*bsp
), KM_SLEEP
);
836 stream_free(bridge_stream_t
*bsp
)
838 mac_minor_rele(bsp
->bs_minor
);
839 kmem_free(bsp
, sizeof (*bsp
));
842 /* Reference hold/release functions for STREAMS-related taskq */
844 stream_ref(bridge_stream_t
*bsp
)
846 mutex_enter(&stream_ref_lock
);
848 mutex_exit(&stream_ref_lock
);
852 stream_unref(bridge_stream_t
*bsp
)
854 mutex_enter(&stream_ref_lock
);
855 if (--bsp
->bs_taskq_cnt
== 0)
856 cv_broadcast(&stream_ref_cv
);
857 mutex_exit(&stream_ref_lock
);
861 link_free(bridge_link_t
*blp
)
863 bridge_inst_t
*bip
= blp
->bl_inst
;
865 ASSERT(!(blp
->bl_flags
& BLF_FREED
));
866 blp
->bl_flags
|= BLF_FREED
;
867 if (blp
->bl_ksp
!= NULL
)
868 kstat_delete(blp
->bl_ksp
);
869 if (blp
->bl_lfailmp
!= NULL
)
870 freeb(blp
->bl_lfailmp
);
871 cv_destroy(&blp
->bl_trillwait
);
872 mutex_destroy(&blp
->bl_trilllock
);
873 kmem_free(blp
, sizeof (*blp
));
874 /* Don't unreference the bridge until the MAC is closed */
879 link_unref(bridge_link_t
*blp
)
881 if (atomic_dec_uint_nv(&blp
->bl_refs
) == 0) {
882 bridge_inst_t
*bip
= blp
->bl_inst
;
884 ASSERT(blp
->bl_flags
& BLF_DELETED
);
885 rw_enter(&bip
->bi_rwlock
, RW_WRITER
);
886 if (blp
->bl_flags
& BLF_LINK_ADDED
)
887 list_remove(&bip
->bi_links
, blp
);
888 rw_exit(&bip
->bi_rwlock
);
889 if (bip
->bi_trilldata
!= NULL
&& list_is_empty(&bip
->bi_links
))
890 cv_broadcast(&bip
->bi_linkwait
);
895 static bridge_fwd_t
*
896 fwd_alloc(const uint8_t *addr
, uint_t nlinks
, uint16_t nick
)
900 bfp
= kmem_zalloc(sizeof (*bfp
) + (nlinks
* sizeof (bridge_link_t
*)),
903 bcopy(addr
, bfp
->bf_dest
, ETHERADDRL
);
904 bfp
->bf_lastheard
= ddi_get_lbolt();
905 bfp
->bf_maxlinks
= nlinks
;
906 bfp
->bf_links
= (bridge_link_t
**)(bfp
+ 1);
907 bfp
->bf_trill_nick
= nick
;
912 static bridge_fwd_t
*
913 fwd_find(bridge_inst_t
*bip
, const uint8_t *addr
, uint16_t vlanid
)
915 bridge_fwd_t
*bfp
, *vbfp
;
918 bcopy(addr
, match
.bf_dest
, ETHERADDRL
);
920 rw_enter(&bip
->bi_rwlock
, RW_READER
);
921 if ((bfp
= avl_find(&bip
->bi_fwd
, &match
, NULL
)) != NULL
) {
922 if (bfp
->bf_vlanid
!= vlanid
&& bfp
->bf_vcnt
> 0) {
923 match
.bf_vlanid
= vlanid
;
924 match
.bf_flags
= BFF_VLANLOCAL
;
925 vbfp
= avl_find(&bip
->bi_fwd
, &match
, NULL
);
929 atomic_inc_uint(&bfp
->bf_refs
);
931 rw_exit(&bip
->bi_rwlock
);
936 fwd_free(bridge_fwd_t
*bfp
)
939 bridge_inst_t
*bip
= bfp
->bf_links
[0]->bl_inst
;
942 for (i
= 0; i
< bfp
->bf_nlinks
; i
++)
943 link_unref(bfp
->bf_links
[i
]);
945 sizeof (*bfp
) + bfp
->bf_maxlinks
* sizeof (bridge_link_t
*));
949 fwd_unref(bridge_fwd_t
*bfp
)
951 if (atomic_dec_uint_nv(&bfp
->bf_refs
) == 0) {
952 ASSERT(!(bfp
->bf_flags
& BFF_INTREE
));
958 fwd_delete(bridge_fwd_t
*bfp
)
961 bridge_fwd_t
*bfpzero
;
963 if (bfp
->bf_flags
& BFF_INTREE
) {
964 ASSERT(bfp
->bf_nlinks
> 0);
965 bip
= bfp
->bf_links
[0]->bl_inst
;
966 rw_enter(&bip
->bi_rwlock
, RW_WRITER
);
967 /* Another thread could beat us to this */
968 if (bfp
->bf_flags
& BFF_INTREE
) {
969 avl_remove(&bip
->bi_fwd
, bfp
);
970 bfp
->bf_flags
&= ~BFF_INTREE
;
971 if (bfp
->bf_flags
& BFF_VLANLOCAL
) {
972 bfp
->bf_flags
&= ~BFF_VLANLOCAL
;
973 bfpzero
= avl_find(&bip
->bi_fwd
, bfp
, NULL
);
974 if (bfpzero
!= NULL
&& bfpzero
->bf_vcnt
> 0)
977 rw_exit(&bip
->bi_rwlock
);
978 fwd_unref(bfp
); /* no longer in avl tree */
980 rw_exit(&bip
->bi_rwlock
);
986 fwd_insert(bridge_inst_t
*bip
, bridge_fwd_t
*bfp
)
991 rw_enter(&bip
->bi_rwlock
, RW_WRITER
);
992 if (!(bip
->bi_flags
& BIF_SHUTDOWN
) &&
993 avl_numnodes(&bip
->bi_fwd
) < bip
->bi_tablemax
&&
994 avl_find(&bip
->bi_fwd
, bfp
, &idx
) == NULL
) {
995 avl_insert(&bip
->bi_fwd
, bfp
, idx
);
996 bfp
->bf_flags
|= BFF_INTREE
;
997 atomic_inc_uint(&bfp
->bf_refs
); /* avl entry */
1002 rw_exit(&bip
->bi_rwlock
);
1007 fwd_update_local(bridge_link_t
*blp
, const uint8_t *oldaddr
,
1008 const uint8_t *newaddr
)
1010 bridge_inst_t
*bip
= blp
->bl_inst
;
1011 bridge_fwd_t
*bfp
, *bfnew
;
1014 boolean_t drop_ref
= B_FALSE
;
1016 if (bcmp(oldaddr
, newaddr
, ETHERADDRL
) == 0)
1019 if (bcmp(oldaddr
, zero_addr
, ETHERADDRL
) == 0)
1023 * Find the previous entry, and remove our link from it.
1025 bcopy(oldaddr
, match
.bf_dest
, ETHERADDRL
);
1026 rw_enter(&bip
->bi_rwlock
, RW_WRITER
);
1027 if ((bfp
= avl_find(&bip
->bi_fwd
, &match
, NULL
)) != NULL
) {
1031 * See if we're in the list, and remove if so.
1033 for (i
= 0; i
< bfp
->bf_nlinks
; i
++) {
1034 if (bfp
->bf_links
[i
] == blp
) {
1036 * We assume writes are atomic, so no special
1037 * MT handling is needed. The list length is
1038 * decremented first, and then we remove
1042 for (; i
< bfp
->bf_nlinks
; i
++)
1043 bfp
->bf_links
[i
] = bfp
->bf_links
[i
+ 1];
1048 /* If no more links, then remove and free up */
1049 if (bfp
->bf_nlinks
== 0) {
1050 avl_remove(&bip
->bi_fwd
, bfp
);
1051 bfp
->bf_flags
&= ~BFF_INTREE
;
1056 rw_exit(&bip
->bi_rwlock
);
1058 fwd_unref(bfp
); /* no longer in avl tree */
1061 * Now get the new link address and add this link to the list. The
1062 * list should be of length 1 unless the user has configured multiple
1063 * NICs with the same address. (That's an incorrect configuration, but
1064 * we support it anyway.)
1068 if ((bip
->bi_flags
& BIF_SHUTDOWN
) ||
1069 bcmp(newaddr
, zero_addr
, ETHERADDRL
) == 0)
1072 bcopy(newaddr
, match
.bf_dest
, ETHERADDRL
);
1073 rw_enter(&bip
->bi_rwlock
, RW_WRITER
);
1074 if ((bfp
= avl_find(&bip
->bi_fwd
, &match
, &idx
)) == NULL
) {
1075 bfnew
= fwd_alloc(newaddr
, 1, RBRIDGE_NICKNAME_NONE
);
1078 } else if (bfp
->bf_nlinks
< bfp
->bf_maxlinks
) {
1079 /* special case: link fits in existing entry */
1082 bfnew
= fwd_alloc(newaddr
, bfp
->bf_nlinks
+ 1,
1083 RBRIDGE_NICKNAME_NONE
);
1084 if (bfnew
!= NULL
) {
1086 avl_remove(&bip
->bi_fwd
, bfp
);
1087 bfp
->bf_flags
&= ~BFF_INTREE
;
1088 bfnew
->bf_nlinks
= bfp
->bf_nlinks
;
1089 bcopy(bfp
->bf_links
, bfnew
->bf_links
,
1090 bfp
->bf_nlinks
* sizeof (bfp
));
1091 /* reset the idx value due to removal above */
1092 (void) avl_find(&bip
->bi_fwd
, &match
, &idx
);
1096 if (bfnew
!= NULL
) {
1097 bfnew
->bf_links
[bfnew
->bf_nlinks
++] = blp
;
1101 atomic_inc_uint(&blp
->bl_refs
); /* bf_links entry */
1104 /* local addresses are not subject to table limits */
1105 avl_insert(&bip
->bi_fwd
, bfnew
, idx
);
1106 bfnew
->bf_flags
|= (BFF_INTREE
| BFF_LOCALADDR
);
1107 atomic_inc_uint(&bfnew
->bf_refs
); /* avl entry */
1110 rw_exit(&bip
->bi_rwlock
);
1114 * If we found an existing entry and we replaced it with a new one,
1115 * then drop the table reference from the old one. We removed it from
1116 * the AVL tree above.
1118 if (bfnew
!= NULL
&& bfp
!= NULL
&& bfnew
!= bfp
)
1121 /* Account for removed entry. */
1127 bridge_new_unicst(bridge_link_t
*blp
)
1129 uint8_t new_mac
[ETHERADDRL
];
1131 mac_unicast_primary_get(blp
->bl_mh
, new_mac
);
1132 fwd_update_local(blp
, blp
->bl_local_mac
, new_mac
);
1133 bcopy(new_mac
, blp
->bl_local_mac
, ETHERADDRL
);
1137 * We must shut down a link prior to freeing it, and doing that requires
1138 * blocking to wait for running MAC threads while holding a reference. This is
1139 * run from a taskq to accomplish proper link shutdown followed by reference
1143 link_shutdown(void *arg
)
1145 bridge_link_t
*blp
= arg
;
1146 mac_handle_t mh
= blp
->bl_mh
;
1148 bridge_fwd_t
*bfp
, *bfnext
;
1149 avl_tree_t fwd_scavenge
;
1153 * This link is being destroyed. Notify TRILL now that it's no longer
1154 * possible to send packets. Data packets may still arrive until TRILL
1155 * calls bridge_trill_lnunref.
1157 if (blp
->bl_trilldata
!= NULL
)
1158 trill_lndstr_fn(blp
->bl_trilldata
, blp
);
1160 if (blp
->bl_flags
& BLF_PROM_ADDED
)
1161 (void) mac_promisc_remove(blp
->bl_mphp
);
1163 if (blp
->bl_flags
& BLF_SET_BRIDGE
)
1164 mac_bridge_clear(mh
, (mac_handle_t
)blp
);
1166 if (blp
->bl_flags
& BLF_MARGIN_ADDED
) {
1167 (void) mac_notify_remove(blp
->bl_mnh
, B_TRUE
);
1168 (void) mac_margin_remove(mh
, blp
->bl_margin
);
1171 /* Tell the clients the real link state when we leave */
1172 mac_link_redo(blp
->bl_mh
,
1173 mac_stat_get(blp
->bl_mh
, MAC_STAT_LOWLINK_STATE
));
1175 /* Destroy all of the forwarding entries related to this link */
1176 avl_create(&fwd_scavenge
, fwd_compare
, sizeof (bridge_fwd_t
),
1177 offsetof(bridge_fwd_t
, bf_node
));
1179 rw_enter(&bip
->bi_rwlock
, RW_WRITER
);
1180 bfnext
= avl_first(&bip
->bi_fwd
);
1181 while ((bfp
= bfnext
) != NULL
) {
1182 bfnext
= AVL_NEXT(&bip
->bi_fwd
, bfp
);
1183 for (i
= 0; i
< bfp
->bf_nlinks
; i
++) {
1184 if (bfp
->bf_links
[i
] == blp
)
1187 if (i
>= bfp
->bf_nlinks
)
1189 if (bfp
->bf_nlinks
> 1) {
1190 /* note that this can't be the last reference */
1193 for (; i
< bfp
->bf_nlinks
; i
++)
1194 bfp
->bf_links
[i
] = bfp
->bf_links
[i
+ 1];
1196 ASSERT(bfp
->bf_flags
& BFF_INTREE
);
1197 avl_remove(&bip
->bi_fwd
, bfp
);
1198 bfp
->bf_flags
&= ~BFF_INTREE
;
1199 avl_add(&fwd_scavenge
, bfp
);
1202 rw_exit(&bip
->bi_rwlock
);
1203 bfnext
= avl_first(&fwd_scavenge
);
1204 while ((bfp
= bfnext
) != NULL
) {
1205 bfnext
= AVL_NEXT(&fwd_scavenge
, bfp
);
1206 avl_remove(&fwd_scavenge
, bfp
);
1209 avl_destroy(&fwd_scavenge
);
1211 if (blp
->bl_flags
& BLF_CLIENT_OPEN
)
1212 mac_client_close(blp
->bl_mch
, 0);
1217 * We are now completely removed from the active list, so drop the
1218 * reference (see bridge_add_link).
1224 shutdown_inst(bridge_inst_t
*bip
)
1226 bridge_link_t
*blp
, *blnext
;
1229 mutex_enter(&inst_lock
);
1230 if (bip
->bi_flags
& BIF_SHUTDOWN
) {
1231 mutex_exit(&inst_lock
);
1236 * Once on the inst_list, the bridge instance must not leave that list
1237 * without having the shutdown flag set first. When the shutdown flag
1238 * is set, we own the list reference, so we must drop it before
1241 bip
->bi_flags
|= BIF_SHUTDOWN
;
1242 mutex_exit(&inst_lock
);
1244 bip
->bi_control
= NULL
;
1246 rw_enter(&bip
->bi_rwlock
, RW_READER
);
1247 blnext
= list_head(&bip
->bi_links
);
1248 while ((blp
= blnext
) != NULL
) {
1249 blnext
= list_next(&bip
->bi_links
, blp
);
1250 if (!(blp
->bl_flags
& BLF_DELETED
)) {
1251 blp
->bl_flags
|= BLF_DELETED
;
1252 (void) ddi_taskq_dispatch(bridge_taskq
, link_shutdown
,
1256 while ((bfp
= avl_first(&bip
->bi_fwd
)) != NULL
) {
1257 atomic_inc_uint(&bfp
->bf_refs
);
1258 rw_exit(&bip
->bi_rwlock
);
1261 rw_enter(&bip
->bi_rwlock
, RW_READER
);
1263 rw_exit(&bip
->bi_rwlock
);
1266 * This bridge is being destroyed. Notify TRILL once all of the
1267 * links are all gone.
1269 mutex_enter(&inst_lock
);
1270 while (bip
->bi_trilldata
!= NULL
&& !list_is_empty(&bip
->bi_links
))
1271 cv_wait(&bip
->bi_linkwait
, &inst_lock
);
1272 mutex_exit(&inst_lock
);
1273 if (bip
->bi_trilldata
!= NULL
)
1274 trill_brdstr_fn(bip
->bi_trilldata
, bip
);
1280 * This is called once by the TRILL module when it starts up. It just sets the
1281 * global TRILL callback function pointers -- data transmit/receive and bridge
1282 * and link destroy notification. There's only one TRILL module, so only one
1283 * registration is needed.
1285 * TRILL should call this function with NULL pointers before unloading. It
1286 * must not do so before dropping all references to bridges and links. We
1287 * assert that this is true on debug builds.
1290 bridge_trill_register_cb(trill_recv_pkt_t recv_fn
, trill_encap_pkt_t encap_fn
,
1291 trill_br_dstr_t brdstr_fn
, trill_ln_dstr_t lndstr_fn
)
1294 if (recv_fn
== NULL
&& trill_recv_fn
!= NULL
) {
1298 mutex_enter(&inst_lock
);
1299 for (bip
= list_head(&inst_list
); bip
!= NULL
;
1300 bip
= list_next(&inst_list
, bip
)) {
1301 ASSERT(bip
->bi_trilldata
== NULL
);
1302 rw_enter(&bip
->bi_rwlock
, RW_READER
);
1303 for (blp
= list_head(&bip
->bi_links
); blp
!= NULL
;
1304 blp
= list_next(&bip
->bi_links
, blp
)) {
1305 ASSERT(blp
->bl_trilldata
== NULL
);
1307 rw_exit(&bip
->bi_rwlock
);
1309 mutex_exit(&inst_lock
);
1312 trill_recv_fn
= recv_fn
;
1313 trill_encap_fn
= encap_fn
;
1314 trill_brdstr_fn
= brdstr_fn
;
1315 trill_lndstr_fn
= lndstr_fn
;
1319 * This registers the TRILL instance pointer with a bridge. Before this
1320 * pointer is set, the forwarding, TRILL receive, and bridge destructor
1321 * functions won't be called.
1323 * TRILL holds a reference on a bridge with this call. It must free the
1324 * reference by calling the unregister function below.
1327 bridge_trill_brref(const char *bname
, void *ptr
)
1329 char bridge
[MAXLINKNAMELEN
];
1332 (void) snprintf(bridge
, MAXLINKNAMELEN
, "%s0", bname
);
1333 bip
= bridge_find_name(bridge
);
1335 ASSERT(bip
->bi_trilldata
== NULL
&& ptr
!= NULL
);
1336 bip
->bi_trilldata
= ptr
;
1342 bridge_trill_brunref(bridge_inst_t
*bip
)
1344 ASSERT(bip
->bi_trilldata
!= NULL
);
1345 bip
->bi_trilldata
= NULL
;
1350 * TRILL calls this function when referencing a particular link on a bridge.
1352 * It holds a reference on the link, so TRILL must clear out the reference when
1353 * it's done with the link (on unbinding).
1356 bridge_trill_lnref(bridge_inst_t
*bip
, datalink_id_t linkid
, void *ptr
)
1360 ASSERT(ptr
!= NULL
);
1361 rw_enter(&bip
->bi_rwlock
, RW_READER
);
1362 for (blp
= list_head(&bip
->bi_links
); blp
!= NULL
;
1363 blp
= list_next(&bip
->bi_links
, blp
)) {
1364 if (!(blp
->bl_flags
& BLF_DELETED
) &&
1365 blp
->bl_linkid
== linkid
&& blp
->bl_trilldata
== NULL
) {
1366 blp
->bl_trilldata
= ptr
;
1367 blp
->bl_flags
&= ~BLF_TRILLACTIVE
;
1368 (void) memset(blp
->bl_afs
, 0, sizeof (blp
->bl_afs
));
1369 atomic_inc_uint(&blp
->bl_refs
);
1373 rw_exit(&bip
->bi_rwlock
);
1378 bridge_trill_lnunref(bridge_link_t
*blp
)
1380 mutex_enter(&blp
->bl_trilllock
);
1381 ASSERT(blp
->bl_trilldata
!= NULL
);
1382 blp
->bl_trilldata
= NULL
;
1383 blp
->bl_flags
&= ~BLF_TRILLACTIVE
;
1384 while (blp
->bl_trillthreads
> 0)
1385 cv_wait(&blp
->bl_trillwait
, &blp
->bl_trilllock
);
1386 mutex_exit(&blp
->bl_trilllock
);
1387 (void) memset(blp
->bl_afs
, 0xff, sizeof (blp
->bl_afs
));
1392 * This periodic timer performs three functions:
1393 * 1. It scans the list of learned forwarding entries, and removes ones that
1394 * haven't been heard from in a while. The time limit is backed down if
1395 * we're above the configured table limit.
1396 * 2. It walks the links and decays away the bl_learns counter.
1397 * 3. It scans the observability node entries looking for ones that can be
1402 bridge_timer(void *arg
)
1405 bridge_fwd_t
*bfp
, *bfnext
;
1406 bridge_mac_t
*bmp
, *bmnext
;
1409 datalink_id_t tmpid
;
1410 avl_tree_t fwd_scavenge
;
1414 avl_create(&fwd_scavenge
, fwd_compare
, sizeof (bridge_fwd_t
),
1415 offsetof(bridge_fwd_t
, bf_node
));
1416 mutex_enter(&inst_lock
);
1417 for (bip
= list_head(&inst_list
); bip
!= NULL
;
1418 bip
= list_next(&inst_list
, bip
)) {
1419 if (bip
->bi_flags
& BIF_SHUTDOWN
)
1421 rw_enter(&bip
->bi_rwlock
, RW_WRITER
);
1422 /* compute scaled maximum age based on table limit */
1423 if (avl_numnodes(&bip
->bi_fwd
) > bip
->bi_tablemax
)
1427 if ((age_limit
= bridge_fwd_age
>> bip
->bi_tshift
) == 0) {
1428 if (bip
->bi_tshift
!= 0)
1432 bfnext
= avl_first(&bip
->bi_fwd
);
1433 while ((bfp
= bfnext
) != NULL
) {
1434 bfnext
= AVL_NEXT(&bip
->bi_fwd
, bfp
);
1435 if (!(bfp
->bf_flags
& BFF_LOCALADDR
) &&
1436 (ddi_get_lbolt() - bfp
->bf_lastheard
) > age_limit
) {
1437 ASSERT(bfp
->bf_flags
& BFF_INTREE
);
1438 avl_remove(&bip
->bi_fwd
, bfp
);
1439 bfp
->bf_flags
&= ~BFF_INTREE
;
1440 avl_add(&fwd_scavenge
, bfp
);
1443 for (blp
= list_head(&bip
->bi_links
); blp
!= NULL
;
1444 blp
= list_next(&bip
->bi_links
, blp
)) {
1445 ldecay
= mac_get_ldecay(blp
->bl_mh
);
1446 if (ldecay
>= blp
->bl_learns
)
1449 atomic_add_int(&blp
->bl_learns
, -(int)ldecay
);
1451 rw_exit(&bip
->bi_rwlock
);
1452 bfnext
= avl_first(&fwd_scavenge
);
1453 while ((bfp
= bfnext
) != NULL
) {
1454 bfnext
= AVL_NEXT(&fwd_scavenge
, bfp
);
1455 avl_remove(&fwd_scavenge
, bfp
);
1457 fwd_unref(bfp
); /* drop tree reference */
1460 mutex_exit(&inst_lock
);
1461 avl_destroy(&fwd_scavenge
);
1464 * Scan the bridge_mac_t entries and try to free up the ones that are
1465 * no longer active. This must be done by polling, as neither DLS nor
1466 * MAC provides a driver any sort of positive control over clients.
1468 rw_enter(&bmac_rwlock
, RW_WRITER
);
1469 bmnext
= list_head(&bmac_list
);
1470 while ((bmp
= bmnext
) != NULL
) {
1471 bmnext
= list_next(&bmac_list
, bmp
);
1473 /* ignore active bridges */
1474 if (bmp
->bm_inst
!= NULL
)
1477 if (bmp
->bm_flags
& BMF_DLS
) {
1478 err
= dls_devnet_destroy(bmp
->bm_mh
, &tmpid
, B_FALSE
);
1479 ASSERT(err
== 0 || err
== EBUSY
);
1481 bmp
->bm_flags
&= ~BMF_DLS
;
1484 if (!(bmp
->bm_flags
& BMF_DLS
)) {
1485 err
= mac_unregister(bmp
->bm_mh
);
1486 ASSERT(err
== 0 || err
== EBUSY
);
1488 list_remove(&bmac_list
, bmp
);
1489 kmem_free(bmp
, sizeof (*bmp
));
1493 if (list_is_empty(&bmac_list
)) {
1496 bridge_timerid
= timeout(bridge_timer
, NULL
,
1497 bridge_scan_interval
);
1499 rw_exit(&bmac_rwlock
);
1503 bridge_open(queue_t
*rq
, dev_t
*devp
, int oflag
, int sflag
, cred_t
*credp
)
1505 bridge_stream_t
*bsp
;
1507 if (rq
->q_ptr
!= NULL
)
1510 if (sflag
& MODOPEN
)
1514 * Check the minor node number being opened. This tells us which
1515 * bridge instance the user wants.
1517 if (getminor(*devp
) != 0) {
1519 * This is a regular DLPI stream for snoop or the like.
1520 * Redirect it through DLD.
1522 rq
->q_qinfo
= &bridge_dld_rinit
;
1523 OTHERQ(rq
)->q_qinfo
= &bridge_dld_winit
;
1524 return (dld_open(rq
, devp
, oflag
, sflag
, credp
));
1527 * Allocate the bridge control stream structure.
1529 if ((bsp
= stream_alloc()) == NULL
)
1531 rq
->q_ptr
= WR(rq
)->q_ptr
= (caddr_t
)bsp
;
1532 bsp
->bs_wq
= WR(rq
);
1533 *devp
= makedevice(getmajor(*devp
), bsp
->bs_minor
);
1540 * This is used only for bridge control streams. DLPI goes through dld
1545 bridge_close(queue_t
*rq
, int flags __unused
, cred_t
*credp __unused
)
1547 bridge_stream_t
*bsp
= rq
->q_ptr
;
1551 * Wait for any stray taskq (add/delete link) entries related to this
1552 * stream to leave the system.
1554 mutex_enter(&stream_ref_lock
);
1555 while (bsp
->bs_taskq_cnt
!= 0)
1556 cv_wait(&stream_ref_cv
, &stream_ref_lock
);
1557 mutex_exit(&stream_ref_lock
);
1560 if ((bip
= bsp
->bs_inst
) != NULL
)
1562 rq
->q_ptr
= WR(rq
)->q_ptr
= NULL
;
1571 bridge_learn(bridge_link_t
*blp
, const uint8_t *saddr
, uint16_t ingress_nick
,
1574 bridge_inst_t
*bip
= blp
->bl_inst
;
1575 bridge_fwd_t
*bfp
, *bfpnew
;
1577 boolean_t replaced
= B_FALSE
;
1579 /* Ignore multi-destination address used as source; it's nonsense. */
1584 * If the source is known, then check whether it belongs on this link.
1585 * If not, and this isn't a fixed local address, then we've detected a
1586 * move. If it's not known, learn it.
1588 if ((bfp
= fwd_find(bip
, saddr
, vlanid
)) != NULL
) {
1590 * If the packet has a fixed local source address, then there's
1591 * nothing we can learn. We must quit. If this was a received
1592 * packet, then the sender has stolen our address, but there's
1593 * nothing we can do. If it's a transmitted packet, then
1594 * that's the normal case.
1596 if (bfp
->bf_flags
& BFF_LOCALADDR
) {
1602 * Check if the link (and TRILL sender, if any) being used is
1603 * among the ones registered for this address. If so, then
1604 * this is information that we already know.
1606 if (bfp
->bf_trill_nick
== ingress_nick
) {
1607 for (i
= 0; i
< bfp
->bf_nlinks
; i
++) {
1608 if (bfp
->bf_links
[i
] == blp
) {
1609 bfp
->bf_lastheard
= ddi_get_lbolt();
1618 * Note that we intentionally "unlearn" things that appear to be under
1619 * attack on this link. The forwarding cache is a negative thing for
1620 * security -- it disables reachability as a performance optimization
1621 * -- so leaving out entries optimizes for success and defends against
1622 * the attack. Thus, the bare increment without a check in the delete
1623 * code above is right. (And it's ok if we skid over the limit a
1624 * little, so there's no syncronization needed on the test.)
1626 if (blp
->bl_learns
>= mac_get_llimit(blp
->bl_mh
)) {
1628 if (bfp
->bf_vcnt
== 0)
1635 atomic_inc_uint(&blp
->bl_learns
);
1637 if ((bfpnew
= fwd_alloc(saddr
, 1, ingress_nick
)) == NULL
) {
1646 * If this is a new destination for the same VLAN, then delete
1647 * so that we can update. If it's a different VLAN, then we're
1648 * not going to delete the original. Split off instead into an
1651 if (bfp
->bf_vlanid
== vlanid
) {
1652 /* save the count of IVL duplicates */
1653 bfpnew
->bf_vcnt
= bfp
->bf_vcnt
;
1655 /* entry deletes count as learning events */
1656 atomic_inc_uint(&blp
->bl_learns
);
1658 /* destroy and create anew; node moved */
1664 bfpnew
->bf_flags
|= BFF_VLANLOCAL
;
1668 bfpnew
->bf_links
[0] = blp
;
1669 bfpnew
->bf_nlinks
= 1;
1670 atomic_inc_uint(&blp
->bl_refs
); /* bf_links entry */
1671 if (!fwd_insert(bip
, bfpnew
))
1678 * Process the VLAN headers for output on a given link. There are several
1679 * cases (noting that we don't map VLANs):
1680 * 1. The input packet is good as it is; either
1681 * a. It has no tag, and output has same PVID
1682 * b. It has a non-zero priority-only tag for PVID, and b_band is same
1683 * c. It has a tag with VLAN different from PVID, and b_band is same
1684 * 2. The tag must change: non-zero b_band is different from tag priority
1685 * 3. The packet has a tag and should not (VLAN same as PVID, b_band zero)
1686 * 4. The packet has no tag and needs one:
1687 * a. VLAN ID same as PVID, but b_band is non-zero
1688 * b. VLAN ID different from PVID
1689 * We exclude case 1 first, then modify the packet. Note that output packets
1690 * get a priority set by the mblk, not by the header, because QoS in bridging
1691 * requires priority recalculation at each node.
1693 * The passed-in tci is the "impossible" value 0xFFFF when no tag is present.
1696 reform_vlan_header(mblk_t
*mp
, uint16_t vlanid
, uint16_t tci
, uint16_t pvid
)
1698 boolean_t source_has_tag
= (tci
!= 0xFFFF);
1700 size_t mlen
, minlen
;
1701 struct ether_vlan_header
*evh
;
1704 /* This helps centralize error handling in the caller. */
1708 /* No forwarded packet can have hardware checksum enabled */
1709 DB_CKSUMFLAGS(mp
) = 0;
1711 /* Get the no-modification cases out of the way first */
1712 if (!source_has_tag
&& vlanid
== pvid
) /* 1a */
1715 pri
= VLAN_PRI(tci
);
1716 if (source_has_tag
&& mp
->b_band
== pri
) {
1717 if (vlanid
!= pvid
) /* 1c */
1719 if (pri
!= 0 && VLAN_ID(tci
) == 0) /* 1b */
1724 * We now know that we must modify the packet. Prepare for that. Note
1725 * that if a tag is present, the caller has already done a pullup for
1726 * the VLAN header, so we're good to go.
1728 if (MBLKL(mp
) < sizeof (struct ether_header
)) {
1729 mpcopy
= msgpullup(mp
, sizeof (struct ether_header
));
1730 if (mpcopy
== NULL
) {
1736 if (DB_REF(mp
) > 1 || !IS_P2ALIGNED(mp
->b_rptr
, sizeof (uint16_t)) ||
1737 (!source_has_tag
&& MBLKTAIL(mp
) < VLAN_INCR
)) {
1738 minlen
= mlen
= MBLKL(mp
);
1739 if (!source_has_tag
)
1740 minlen
+= VLAN_INCR
;
1741 ASSERT(minlen
>= sizeof (struct ether_vlan_header
));
1743 * We're willing to copy some data to avoid fragmentation, but
1747 minlen
= sizeof (struct ether_vlan_header
);
1748 mpcopy
= allocb(minlen
, BPRI_MED
);
1749 if (mpcopy
== NULL
) {
1753 if (mlen
<= minlen
) {
1754 /* We toss the first mblk when we can. */
1755 bcopy(mp
->b_rptr
, mpcopy
->b_rptr
, mlen
);
1756 mpcopy
->b_wptr
+= mlen
;
1757 mpcopy
->b_cont
= mp
->b_cont
;
1760 /* If not, then just copy what we need */
1761 if (!source_has_tag
)
1762 minlen
= sizeof (struct ether_header
);
1763 bcopy(mp
->b_rptr
, mpcopy
->b_rptr
, minlen
);
1764 mpcopy
->b_wptr
+= minlen
;
1765 mpcopy
->b_cont
= mp
;
1766 mp
->b_rptr
+= minlen
;
1771 /* LINTED: pointer alignment */
1772 evh
= (struct ether_vlan_header
*)mp
->b_rptr
;
1773 if (source_has_tag
) {
1774 if (mp
->b_band
== 0 && vlanid
== pvid
) { /* 3 */
1775 evh
->ether_tpid
= evh
->ether_type
;
1777 if (mlen
> sizeof (struct ether_vlan_header
))
1778 ovbcopy(mp
->b_rptr
+
1779 sizeof (struct ether_vlan_header
),
1780 mp
->b_rptr
+ sizeof (struct ether_header
),
1781 mlen
- sizeof (struct ether_vlan_header
));
1782 mp
->b_wptr
-= VLAN_INCR
;
1785 vlanid
= VLAN_ID_NONE
;
1786 tci
= VLAN_TCI(mp
->b_band
, ETHER_CFI
, vlanid
);
1787 evh
->ether_tci
= htons(tci
);
1790 /* case 4: no header present, but one is needed */
1792 if (mlen
> sizeof (struct ether_header
))
1793 ovbcopy(mp
->b_rptr
+ sizeof (struct ether_header
),
1794 mp
->b_rptr
+ sizeof (struct ether_vlan_header
),
1795 mlen
- sizeof (struct ether_header
));
1796 mp
->b_wptr
+= VLAN_INCR
;
1797 ASSERT(mp
->b_wptr
<= DB_LIM(mp
));
1799 vlanid
= VLAN_ID_NONE
;
1800 tci
= VLAN_TCI(mp
->b_band
, ETHER_CFI
, vlanid
);
1801 evh
->ether_type
= evh
->ether_tpid
;
1802 evh
->ether_tpid
= htons(ETHERTYPE_VLAN
);
1803 evh
->ether_tci
= htons(tci
);
1808 /* Record VLAN information and strip header if requested . */
1810 update_header(mblk_t
*mp
, mac_header_info_t
*hdr_info
, boolean_t striphdr
)
1812 if (hdr_info
->mhi_bindsap
== ETHERTYPE_VLAN
) {
1813 struct ether_vlan_header
*evhp
;
1814 uint16_t ether_type
;
1816 /* LINTED: alignment */
1817 evhp
= (struct ether_vlan_header
*)mp
->b_rptr
;
1818 hdr_info
->mhi_istagged
= B_TRUE
;
1819 hdr_info
->mhi_tci
= ntohs(evhp
->ether_tci
);
1822 * For VLAN tagged frames update the ether_type
1823 * in hdr_info before stripping the header.
1825 ether_type
= ntohs(evhp
->ether_type
);
1826 hdr_info
->mhi_origsap
= ether_type
;
1827 hdr_info
->mhi_bindsap
= (ether_type
> ETHERMTU
) ?
1828 ether_type
: DLS_SAP_LLC
;
1829 mp
->b_rptr
= (uchar_t
*)(evhp
+ 1);
1832 hdr_info
->mhi_istagged
= B_FALSE
;
1833 hdr_info
->mhi_tci
= VLAN_ID_NONE
;
1835 mp
->b_rptr
+= sizeof (struct ether_header
);
1840 * Return B_TRUE if we're allowed to send on this link with the given VLAN ID.
1843 bridge_can_send(bridge_link_t
*blp
, uint16_t vlanid
)
1845 ASSERT(vlanid
!= VLAN_ID_NONE
);
1846 if (blp
->bl_flags
& BLF_DELETED
)
1848 if (blp
->bl_trilldata
== NULL
&& blp
->bl_state
!= BLS_FORWARDING
)
1850 return (BRIDGE_VLAN_ISSET(blp
, vlanid
) && BRIDGE_AF_ISSET(blp
, vlanid
));
1854 * This function scans the bridge forwarding tables in order to forward a given
1855 * packet. If the packet either doesn't need forwarding (the current link is
1856 * correct) or the current link needs a copy as well, then the packet is
1857 * returned to the caller.
1859 * If a packet has been decapsulated from TRILL, then it must *NOT* reenter a
1860 * TRILL tunnel. If the destination points there, then drop instead.
1863 bridge_forward(bridge_link_t
*blp
, mac_header_info_t
*hdr_info
, mblk_t
*mp
,
1864 uint16_t vlanid
, uint16_t tci
, boolean_t from_trill
, boolean_t is_xmit
)
1866 mblk_t
*mpsend
, *mpcopy
;
1867 bridge_inst_t
*bip
= blp
->bl_inst
;
1868 bridge_link_t
*blpsend
, *blpnext
;
1871 boolean_t selfseen
= B_FALSE
;
1873 const uint8_t *daddr
= hdr_info
->mhi_daddr
;
1876 * Check for the IEEE "reserved" multicast addresses. Messages sent to
1877 * these addresses are used for link-local control (STP and pause), and
1878 * are never forwarded or redirected.
1880 if (daddr
[0] == 1 && daddr
[1] == 0x80 && daddr
[2] == 0xc2 &&
1881 daddr
[3] == 0 && daddr
[4] == 0 && (daddr
[5] & 0xf0) == 0) {
1889 if ((bfp
= fwd_find(bip
, daddr
, vlanid
)) != NULL
) {
1892 * If trill indicates a destination for this node, then it's
1893 * clearly not intended for local delivery. We must tell TRILL
1894 * to encapsulate, as long as we didn't just decapsulate it.
1896 if (bfp
->bf_trill_nick
!= RBRIDGE_NICKNAME_NONE
) {
1898 * Error case: can't reencapsulate if the protocols are
1899 * working correctly.
1905 mutex_enter(&blp
->bl_trilllock
);
1906 if ((tdp
= blp
->bl_trilldata
) != NULL
) {
1907 blp
->bl_trillthreads
++;
1908 mutex_exit(&blp
->bl_trilllock
);
1909 update_header(mp
, hdr_info
, B_FALSE
);
1911 mp
= mac_fix_cksum(mp
);
1912 /* all trill data frames have Inner.VLAN */
1913 mp
= reform_vlan_header(mp
, vlanid
, tci
, 0);
1919 trill_encap_fn(tdp
, blp
, hdr_info
, mp
,
1920 bfp
->bf_trill_nick
);
1921 mutex_enter(&blp
->bl_trilllock
);
1922 if (--blp
->bl_trillthreads
== 0 &&
1923 blp
->bl_trilldata
== NULL
)
1924 cv_broadcast(&blp
->bl_trillwait
);
1926 mutex_exit(&blp
->bl_trilllock
);
1928 /* if TRILL has been disabled, then kill this stray */
1937 /* find first link we can send on */
1938 for (i
= 0; i
< bfp
->bf_nlinks
; i
++) {
1939 blpsend
= bfp
->bf_links
[i
];
1942 else if (bridge_can_send(blpsend
, vlanid
))
1946 while (i
< bfp
->bf_nlinks
) {
1947 blpsend
= bfp
->bf_links
[i
];
1948 for (i
++; i
< bfp
->bf_nlinks
; i
++) {
1949 blpnext
= bfp
->bf_links
[i
];
1952 else if (bridge_can_send(blpnext
, vlanid
))
1955 if (i
== bfp
->bf_nlinks
&& !selfseen
) {
1959 mpsend
= copymsg(mp
);
1962 if (!from_trill
&& is_xmit
)
1963 mpsend
= mac_fix_cksum(mpsend
);
1965 mpsend
= reform_vlan_header(mpsend
, vlanid
, tci
,
1967 if (mpsend
== NULL
) {
1972 KIINCR(bki_forwards
);
1974 * No need to bump up the link reference count, as
1975 * the forwarding entry itself holds a reference to
1978 if (bfp
->bf_flags
& BFF_LOCALADDR
) {
1979 mac_rx_common(blpsend
->bl_mh
, NULL
, mpsend
);
1981 KLPINCR(blpsend
, bkl_xmit
);
1982 MAC_RING_TX(blpsend
->bl_mh
, NULL
, mpsend
,
1988 * Handle a special case: if we're transmitting to the original
1989 * link, then check whether the localaddr flag is set. If it
1990 * is, then receive instead. This doesn't happen with ordinary
1991 * bridging, but does happen often with TRILL decapsulation.
1993 if (mp
!= NULL
&& is_xmit
&& (bfp
->bf_flags
& BFF_LOCALADDR
)) {
1994 mac_rx_common(blp
->bl_mh
, NULL
, mp
);
2000 * TRILL has two cases to handle. If the packet is off the
2001 * wire (not from TRILL), then we need to send up into the
2002 * TRILL module to have the distribution tree computed. If the
2003 * packet is from TRILL (decapsulated), then we're part of the
2004 * distribution tree, and we need to copy the packet on member
2007 * Thus, the from TRILL case is identical to the STP case.
2009 if (!from_trill
&& blp
->bl_trilldata
!= NULL
) {
2010 mutex_enter(&blp
->bl_trilllock
);
2011 if ((tdp
= blp
->bl_trilldata
) != NULL
) {
2012 blp
->bl_trillthreads
++;
2013 mutex_exit(&blp
->bl_trilllock
);
2014 if ((mpsend
= copymsg(mp
)) != NULL
) {
2015 update_header(mpsend
,
2018 * all trill data frames have
2021 mpsend
= reform_vlan_header(mpsend
,
2023 if (mpsend
== NULL
) {
2026 trill_encap_fn(tdp
, blp
,
2028 RBRIDGE_NICKNAME_NONE
);
2031 mutex_enter(&blp
->bl_trilllock
);
2032 if (--blp
->bl_trillthreads
== 0 &&
2033 blp
->bl_trilldata
== NULL
)
2034 cv_broadcast(&blp
->bl_trillwait
);
2036 mutex_exit(&blp
->bl_trilllock
);
2040 * This is an unknown destination, so flood.
2042 rw_enter(&bip
->bi_rwlock
, RW_READER
);
2043 for (blpnext
= list_head(&bip
->bi_links
); blpnext
!= NULL
;
2044 blpnext
= list_next(&bip
->bi_links
, blpnext
)) {
2047 else if (bridge_can_send(blpnext
, vlanid
))
2050 if (blpnext
!= NULL
)
2051 atomic_inc_uint(&blpnext
->bl_refs
);
2052 rw_exit(&bip
->bi_rwlock
);
2053 while ((blpsend
= blpnext
) != NULL
) {
2054 rw_enter(&bip
->bi_rwlock
, RW_READER
);
2055 for (blpnext
= list_next(&bip
->bi_links
, blpsend
);
2057 blpnext
= list_next(&bip
->bi_links
, blpnext
)) {
2060 else if (bridge_can_send(blpnext
, vlanid
))
2063 if (blpnext
!= NULL
)
2064 atomic_inc_uint(&blpnext
->bl_refs
);
2065 rw_exit(&bip
->bi_rwlock
);
2066 if (blpnext
== NULL
&& !selfseen
) {
2070 mpsend
= copymsg(mp
);
2073 if (!from_trill
&& is_xmit
)
2074 mpsend
= mac_fix_cksum(mpsend
);
2076 mpsend
= reform_vlan_header(mpsend
, vlanid
, tci
,
2078 if (mpsend
== NULL
) {
2083 if (hdr_info
->mhi_dsttype
== MAC_ADDRTYPE_UNICAST
)
2084 KIINCR(bki_unknown
);
2087 KLPINCR(blpsend
, bkl_xmit
);
2088 if ((mpcopy
= copymsg(mpsend
)) != NULL
)
2089 mac_rx_common(blpsend
->bl_mh
, NULL
, mpcopy
);
2090 MAC_RING_TX(blpsend
->bl_mh
, NULL
, mpsend
, mpsend
);
2092 link_unref(blpsend
);
2097 * At this point, if np is non-NULL, it means that the caller needs to
2098 * continue on the selected link.
2104 * Extract and validate the VLAN information for a given packet. This checks
2105 * conformance with the rules for use of the PVID on the link, and for the
2106 * allowed (configured) VLAN set.
2108 * Returns B_TRUE if the packet passes, B_FALSE if it fails.
2111 bridge_get_vlan(bridge_link_t
*blp
, mac_header_info_t
*hdr_info
, mblk_t
*mp
,
2112 uint16_t *vlanidp
, uint16_t *tcip
)
2114 uint16_t tci
, vlanid
;
2116 if (hdr_info
->mhi_bindsap
== ETHERTYPE_VLAN
) {
2117 ptrdiff_t tpos
= offsetof(struct ether_vlan_header
, ether_tci
);
2121 * Extract the VLAN ID information, regardless of alignment,
2122 * and without a pullup. This isn't attractive, but we do this
2123 * to avoid having to deal with the pointers stashed in
2124 * hdr_info moving around or having the caller deal with a new
2127 while (mp
!= NULL
) {
2129 if (mlen
> tpos
&& mlen
> 0)
2136 tci
= mp
->b_rptr
[tpos
] << 8;
2137 if (++tpos
>= mlen
) {
2140 } while (mp
!= NULL
&& MBLKL(mp
) == 0);
2145 tci
|= mp
->b_rptr
[tpos
];
2147 vlanid
= VLAN_ID(tci
);
2148 if (VLAN_CFI(tci
) != ETHER_CFI
|| vlanid
> VLAN_ID_MAX
)
2150 if (vlanid
== VLAN_ID_NONE
|| vlanid
== blp
->bl_pvid
)
2152 if (!BRIDGE_VLAN_ISSET(blp
, vlanid
))
2158 * If PVID is set to zero, then untagged traffic is not
2159 * supported here. Do not learn or forward.
2161 if ((vlanid
= blp
->bl_pvid
) == VLAN_ID_NONE
)
2171 * Handle MAC notifications.
2174 bridge_notify_cb(void *arg
, mac_notify_type_t note_type
)
2176 bridge_link_t
*blp
= arg
;
2178 switch (note_type
) {
2179 case MAC_NOTE_UNICST
:
2180 bridge_new_unicst(blp
);
2183 case MAC_NOTE_SDU_SIZE
: {
2185 bridge_inst_t
*bip
= blp
->bl_inst
;
2186 bridge_mac_t
*bmp
= bip
->bi_mac
;
2187 boolean_t notify
= B_FALSE
;
2188 mblk_t
*mlist
= NULL
;
2190 mac_sdu_get(blp
->bl_mh
, NULL
, &maxsdu
);
2191 rw_enter(&bip
->bi_rwlock
, RW_READER
);
2192 if (list_prev(&bip
->bi_links
, blp
) == NULL
&&
2193 list_next(&bip
->bi_links
, blp
) == NULL
) {
2194 notify
= (maxsdu
!= bmp
->bm_maxsdu
);
2195 bmp
->bm_maxsdu
= maxsdu
;
2197 blp
->bl_maxsdu
= maxsdu
;
2198 if (maxsdu
!= bmp
->bm_maxsdu
)
2199 link_sdu_fail(blp
, B_TRUE
, &mlist
);
2201 (void) mac_maxsdu_update(bmp
->bm_mh
, maxsdu
);
2202 rw_exit(&bip
->bi_rwlock
);
2203 send_up_messages(bip
, mlist
);
2210 * This is called by the MAC layer. As with the transmit side, we're right in
2211 * the data path for all I/O on this port, so if we don't need to forward this
2212 * packet anywhere, we have to send it upwards via mac_rx_common.
2215 bridge_recv_cb(mac_handle_t mh
, mac_resource_handle_t rsrc
, mblk_t
*mpnext
)
2217 mblk_t
*mp
, *mpcopy
;
2218 bridge_link_t
*blp
= (bridge_link_t
*)mh
;
2219 bridge_inst_t
*bip
= blp
->bl_inst
;
2220 bridge_mac_t
*bmp
= bip
->bi_mac
;
2221 mac_header_info_t hdr_info
;
2222 uint16_t vlanid
, tci
;
2223 boolean_t trillmode
= B_FALSE
;
2229 * Regardless of state, check for inbound TRILL packets when TRILL is
2230 * active. These are pulled out of band and sent for TRILL handling.
2232 if (blp
->bl_trilldata
!= NULL
) {
2235 mblk_t
*tail
= NULL
;
2237 mutex_enter(&blp
->bl_trilllock
);
2238 if ((tdp
= blp
->bl_trilldata
) != NULL
) {
2239 blp
->bl_trillthreads
++;
2240 mutex_exit(&blp
->bl_trilllock
);
2243 while ((mp
= mpnext
) != NULL
) {
2244 boolean_t raw_isis
, bridge_group
;
2246 mpnext
= mp
->b_next
;
2249 * If the header isn't readable, then leave on
2250 * the list and continue.
2252 if (mac_header_info(blp
->bl_mh
, mp
,
2259 * The TRILL document specifies that, on
2260 * Ethernet alone, IS-IS packets arrive with
2261 * LLC rather than Ethertype, and using a
2262 * specific destination address. We must check
2263 * for that here. Also, we need to give BPDUs
2264 * to TRILL for processing.
2266 raw_isis
= bridge_group
= B_FALSE
;
2267 if (hdr_info
.mhi_dsttype
==
2268 MAC_ADDRTYPE_MULTICAST
) {
2269 if (memcmp(hdr_info
.mhi_daddr
,
2270 all_isis_rbridges
, ETHERADDRL
) == 0)
2272 else if (memcmp(hdr_info
.mhi_daddr
,
2273 bridge_group_address
, ETHERADDRL
) ==
2275 bridge_group
= B_TRUE
;
2277 if (!raw_isis
&& !bridge_group
&&
2278 hdr_info
.mhi_bindsap
!= ETHERTYPE_TRILL
&&
2279 (hdr_info
.mhi_bindsap
!= ETHERTYPE_VLAN
||
2280 /* LINTED: alignment */
2281 ((struct ether_vlan_header
*)mp
->b_rptr
)->
2282 ether_type
!= htons(ETHERTYPE_TRILL
))) {
2288 * We've got TRILL input. Remove from the list
2289 * and send up through the TRILL module. (Send
2290 * a copy through promiscuous receive just to
2291 * support snooping on TRILL. Order isn't
2292 * preserved strictly, but that doesn't matter
2296 tail
->b_next
= mpnext
;
2300 mac_trill_snoop(blp
->bl_mh
, mp
);
2301 update_header(mp
, &hdr_info
, B_TRUE
);
2303 * On raw IS-IS and BPDU frames, we have to
2304 * make sure that the length is trimmed
2305 * properly. We use origsap in order to cope
2306 * with jumbograms for IS-IS. (Regular mac
2309 if (raw_isis
|| bridge_group
) {
2310 size_t msglen
= msgdsize(mp
);
2312 if (msglen
> hdr_info
.mhi_origsap
) {
2314 hdr_info
.mhi_origsap
-
2317 hdr_info
.mhi_origsap
) {
2322 trill_recv_fn(tdp
, blp
, rsrc
, mp
, &hdr_info
);
2325 mutex_enter(&blp
->bl_trilllock
);
2326 if (--blp
->bl_trillthreads
== 0 &&
2327 blp
->bl_trilldata
== NULL
)
2328 cv_broadcast(&blp
->bl_trillwait
);
2330 mutex_exit(&blp
->bl_trilllock
);
2336 * If this is a TRILL RBridge, then just check whether this link is
2337 * used at all for forwarding. If not, then we're done.
2340 if (!(blp
->bl_flags
& BLF_TRILLACTIVE
) ||
2341 (blp
->bl_flags
& BLF_SDUFAIL
)) {
2342 mac_rx_common(blp
->bl_mh
, rsrc
, mpnext
);
2347 * For regular (STP) bridges, if we're in blocking or listening
2348 * state, then do nothing. We don't learn or forward until
2351 if (blp
->bl_state
== BLS_BLOCKLISTEN
) {
2352 mac_rx_common(blp
->bl_mh
, rsrc
, mpnext
);
2358 * Send a copy of the message chain up to the observability node users.
2359 * For TRILL, we must obey the VLAN AF rules, so we go packet-by-
2362 if (!trillmode
&& blp
->bl_state
== BLS_FORWARDING
&&
2363 (bmp
->bm_flags
& BMF_STARTED
) &&
2364 (mp
= copymsgchain(mpnext
)) != NULL
) {
2365 mac_rx(bmp
->bm_mh
, NULL
, mp
);
2369 * We must be in learning or forwarding state, or using TRILL on a link
2370 * with one or more VLANs active. For each packet in the list, process
2371 * the source address, and then attempt to forward.
2373 while ((mp
= mpnext
) != NULL
) {
2374 mpnext
= mp
->b_next
;
2378 * If we can't decode the header or if the header specifies a
2379 * multicast source address (impossible!), then don't bother
2380 * learning or forwarding, but go ahead and forward up the
2381 * stack for subsequent processing.
2383 if (mac_header_info(blp
->bl_mh
, mp
, &hdr_info
) != 0 ||
2384 (hdr_info
.mhi_saddr
[0] & 1) != 0) {
2387 mac_rx_common(blp
->bl_mh
, rsrc
, mp
);
2392 * Extract and validate the VLAN ID for this packet.
2394 if (!bridge_get_vlan(blp
, &hdr_info
, mp
, &vlanid
, &tci
) ||
2395 !BRIDGE_AF_ISSET(blp
, vlanid
)) {
2396 mac_rx_common(blp
->bl_mh
, rsrc
, mp
);
2402 * Special test required by TRILL document: must
2403 * discard frames with outer address set to ESADI.
2405 if (memcmp(hdr_info
.mhi_daddr
, all_esadi_rbridges
,
2407 mac_rx_common(blp
->bl_mh
, rsrc
, mp
);
2412 * If we're in TRILL mode, then the call above to get
2413 * the VLAN ID has also checked that we're the
2414 * appointed forwarder, so report that we're handling
2415 * this packet to any observability node users.
2417 if ((bmp
->bm_flags
& BMF_STARTED
) &&
2418 (mpcopy
= copymsg(mp
)) != NULL
)
2419 mac_rx(bmp
->bm_mh
, NULL
, mpcopy
);
2423 * First process the source address and learn from it. For
2424 * TRILL, we learn only if we're the appointed forwarder.
2426 bridge_learn(blp
, hdr_info
.mhi_saddr
, RBRIDGE_NICKNAME_NONE
,
2430 * Now check whether we're forwarding and look up the
2431 * destination. If we can forward, do so.
2433 if (trillmode
|| blp
->bl_state
== BLS_FORWARDING
) {
2434 mp
= bridge_forward(blp
, &hdr_info
, mp
, vlanid
, tci
,
2438 mac_rx_common(blp
->bl_mh
, rsrc
, mp
);
2445 bridge_xmit_cb(mac_handle_t mh
, mac_ring_handle_t rh
, mblk_t
*mpnext
)
2447 bridge_link_t
*blp
= (bridge_link_t
*)mh
;
2448 bridge_inst_t
*bip
= blp
->bl_inst
;
2449 bridge_mac_t
*bmp
= bip
->bi_mac
;
2450 mac_header_info_t hdr_info
;
2451 uint16_t vlanid
, tci
;
2452 mblk_t
*mp
, *mpcopy
;
2453 boolean_t trillmode
;
2455 trillmode
= blp
->bl_trilldata
!= NULL
;
2458 * If we're using STP and we're in blocking or listening state, or if
2459 * we're using TRILL and no VLANs are active, then behave as though the
2460 * bridge isn't here at all, and send on the local link alone.
2462 if ((!trillmode
&& blp
->bl_state
== BLS_BLOCKLISTEN
) ||
2464 (!(blp
->bl_flags
& BLF_TRILLACTIVE
) ||
2465 (blp
->bl_flags
& BLF_SDUFAIL
)))) {
2468 MAC_RING_TX(blp
->bl_mh
, rh
, mpnext
, mp
);
2473 * Send a copy of the message up to the observability node users.
2474 * TRILL needs to check on a packet-by-packet basis.
2476 if (!trillmode
&& blp
->bl_state
== BLS_FORWARDING
&&
2477 (bmp
->bm_flags
& BMF_STARTED
) &&
2478 (mp
= copymsgchain(mpnext
)) != NULL
) {
2479 mac_rx(bmp
->bm_mh
, NULL
, mp
);
2482 while ((mp
= mpnext
) != NULL
) {
2483 mpnext
= mp
->b_next
;
2486 if (mac_header_info(blp
->bl_mh
, mp
, &hdr_info
) != 0) {
2492 * Extract and validate the VLAN ID for this packet.
2494 if (!bridge_get_vlan(blp
, &hdr_info
, mp
, &vlanid
, &tci
) ||
2495 !BRIDGE_AF_ISSET(blp
, vlanid
)) {
2501 * If we're using TRILL, then we've now validated that we're
2502 * the forwarder for this VLAN, so go ahead and let
2503 * observability node users know about the packet.
2505 if (trillmode
&& (bmp
->bm_flags
& BMF_STARTED
) &&
2506 (mpcopy
= copymsg(mp
)) != NULL
) {
2507 mac_rx(bmp
->bm_mh
, NULL
, mpcopy
);
2511 * We have to learn from our own transmitted packets, because
2512 * there may be a Solaris DLPI raw sender (which can specify its
2513 * own source address) using promiscuous mode for receive. The
2514 * mac layer information won't (and can't) tell us everything
2517 bridge_learn(blp
, hdr_info
.mhi_saddr
, RBRIDGE_NICKNAME_NONE
,
2520 /* attempt forwarding */
2521 if (trillmode
|| blp
->bl_state
== BLS_FORWARDING
) {
2522 mp
= bridge_forward(blp
, &hdr_info
, mp
, vlanid
, tci
,
2526 MAC_RING_TX(blp
->bl_mh
, rh
, mp
, mp
);
2533 * If we get stuck, then stop. Don't let the user's output
2534 * packets get out of order. (More importantly: don't try to
2535 * bridge the same packet multiple times if flow control is
2539 mp
->b_next
= mpnext
;
2547 * This is called by TRILL when it decapsulates an packet, and we must forward
2548 * locally. On failure, we just drop.
2550 * Note that the ingress_nick reported by TRILL must not represent this local
2554 bridge_trill_decaps(bridge_link_t
*blp
, mblk_t
*mp
, uint16_t ingress_nick
)
2556 mac_header_info_t hdr_info
;
2557 uint16_t vlanid
, tci
;
2558 bridge_inst_t
*bip
= blp
->bl_inst
; /* used by macros */
2561 if (mac_header_info(blp
->bl_mh
, mp
, &hdr_info
) != 0) {
2566 /* Extract VLAN ID for this packet. */
2567 if (hdr_info
.mhi_bindsap
== ETHERTYPE_VLAN
) {
2568 struct ether_vlan_header
*evhp
;
2570 /* LINTED: alignment */
2571 evhp
= (struct ether_vlan_header
*)mp
->b_rptr
;
2572 tci
= ntohs(evhp
->ether_tci
);
2573 vlanid
= VLAN_ID(tci
);
2575 /* Inner VLAN headers are required in TRILL data packets */
2576 DTRACE_PROBE3(bridge__trill__decaps__novlan
, bridge_link_t
*,
2577 blp
, mblk_t
*, mp
, uint16_t, ingress_nick
);
2582 /* Learn the location of this sender in the RBridge network */
2583 bridge_learn(blp
, hdr_info
.mhi_saddr
, ingress_nick
, vlanid
);
2585 /* attempt forwarding */
2586 mp
= bridge_forward(blp
, &hdr_info
, mp
, vlanid
, tci
, B_TRUE
, B_TRUE
);
2588 if (bridge_can_send(blp
, vlanid
)) {
2589 /* Deliver a copy locally as well */
2590 if ((mpcopy
= copymsg(mp
)) != NULL
)
2591 mac_rx_common(blp
->bl_mh
, NULL
, mpcopy
);
2592 MAC_RING_TX(blp
->bl_mh
, NULL
, mp
, mp
);
2604 * This function is used by TRILL _only_ to transmit TRILL-encapsulated
2605 * packets. It sends on a single underlying link and does not bridge.
2608 bridge_trill_output(bridge_link_t
*blp
, mblk_t
*mp
)
2610 bridge_inst_t
*bip
= blp
->bl_inst
; /* used by macros */
2612 mac_trill_snoop(blp
->bl_mh
, mp
);
2613 MAC_RING_TX(blp
->bl_mh
, NULL
, mp
, mp
);
2622 * Set the "appointed forwarder" flag array for this link. TRILL controls
2623 * forwarding on a VLAN basis. The "trillactive" flag is an optimization for
2627 bridge_trill_setvlans(bridge_link_t
*blp
, const uint8_t *arr
)
2630 uint_t newflags
= 0;
2632 for (i
= 0; i
< BRIDGE_VLAN_ARR_SIZE
; i
++) {
2633 if ((blp
->bl_afs
[i
] = arr
[i
]) != 0)
2634 newflags
= BLF_TRILLACTIVE
;
2636 blp
->bl_flags
= (blp
->bl_flags
& ~BLF_TRILLACTIVE
) | newflags
;
2640 bridge_trill_flush(bridge_link_t
*blp
, uint16_t vlan
, boolean_t dotrill
)
2642 bridge_inst_t
*bip
= blp
->bl_inst
;
2643 bridge_fwd_t
*bfp
, *bfnext
;
2644 avl_tree_t fwd_scavenge
;
2647 _NOTE(ARGUNUSED(vlan
));
2649 avl_create(&fwd_scavenge
, fwd_compare
, sizeof (bridge_fwd_t
),
2650 offsetof(bridge_fwd_t
, bf_node
));
2651 rw_enter(&bip
->bi_rwlock
, RW_WRITER
);
2652 bfnext
= avl_first(&bip
->bi_fwd
);
2653 while ((bfp
= bfnext
) != NULL
) {
2654 bfnext
= AVL_NEXT(&bip
->bi_fwd
, bfp
);
2655 if (bfp
->bf_flags
& BFF_LOCALADDR
)
2658 /* port doesn't matter if we're flushing TRILL */
2659 if (bfp
->bf_trill_nick
== RBRIDGE_NICKNAME_NONE
)
2662 if (bfp
->bf_trill_nick
!= RBRIDGE_NICKNAME_NONE
)
2664 for (i
= 0; i
< bfp
->bf_nlinks
; i
++) {
2665 if (bfp
->bf_links
[i
] == blp
)
2668 if (i
>= bfp
->bf_nlinks
)
2671 ASSERT(bfp
->bf_flags
& BFF_INTREE
);
2672 avl_remove(&bip
->bi_fwd
, bfp
);
2673 bfp
->bf_flags
&= ~BFF_INTREE
;
2674 avl_add(&fwd_scavenge
, bfp
);
2676 rw_exit(&bip
->bi_rwlock
);
2677 bfnext
= avl_first(&fwd_scavenge
);
2678 while ((bfp
= bfnext
) != NULL
) {
2679 bfnext
= AVL_NEXT(&fwd_scavenge
, bfp
);
2680 avl_remove(&fwd_scavenge
, bfp
);
2683 avl_destroy(&fwd_scavenge
);
2687 * Let the mac module take or drop a reference to a bridge link. When this is
2688 * called, the mac module is holding the mi_bridge_lock, so the link cannot be
2689 * in the process of entering or leaving a bridge.
2692 bridge_ref_cb(mac_handle_t mh
, boolean_t hold
)
2694 bridge_link_t
*blp
= (bridge_link_t
*)mh
;
2697 atomic_inc_uint(&blp
->bl_refs
);
2703 * Handle link state changes reported by the mac layer. This acts as a filter
2704 * for link state changes: if a link is reporting down, but there are other
2705 * links still up on the bridge, then the state is changed to "up." When the
2706 * last link goes down, all are marked down, and when the first link goes up,
2707 * all are marked up. (Recursion is avoided by the use of the "redo" function.)
2709 * We treat unknown as equivalent to "up."
2712 bridge_ls_cb(mac_handle_t mh
, link_state_t newls
)
2714 bridge_link_t
*blp
= (bridge_link_t
*)mh
;
2715 bridge_link_t
*blcmp
;
2719 if (newls
!= LINK_STATE_DOWN
&& blp
->bl_linkstate
!= LINK_STATE_DOWN
||
2720 (blp
->bl_flags
& (BLF_DELETED
|BLF_SDUFAIL
))) {
2721 blp
->bl_linkstate
= newls
;
2726 * Scan first to see if there are any other non-down links. If there
2727 * are, then we're done. Otherwise, if all others are down, then the
2728 * state of this link is the state of the bridge.
2731 rw_enter(&bip
->bi_rwlock
, RW_WRITER
);
2732 for (blcmp
= list_head(&bip
->bi_links
); blcmp
!= NULL
;
2733 blcmp
= list_next(&bip
->bi_links
, blcmp
)) {
2735 !(blcmp
->bl_flags
& (BLF_DELETED
|BLF_SDUFAIL
)) &&
2736 blcmp
->bl_linkstate
!= LINK_STATE_DOWN
)
2740 if (blcmp
!= NULL
) {
2742 * If there are other links that are considered up, then tell
2743 * the caller that the link is actually still up, regardless of
2744 * this link's underlying state.
2746 blp
->bl_linkstate
= newls
;
2747 newls
= LINK_STATE_UP
;
2748 } else if (blp
->bl_linkstate
!= newls
) {
2750 * If we've found no other 'up' links, and this link has
2751 * changed state, then report the new state of the bridge to
2752 * all other clients.
2754 blp
->bl_linkstate
= newls
;
2755 for (blcmp
= list_head(&bip
->bi_links
); blcmp
!= NULL
;
2756 blcmp
= list_next(&bip
->bi_links
, blcmp
)) {
2757 if (blcmp
!= blp
&& !(blcmp
->bl_flags
& BLF_DELETED
))
2758 mac_link_redo(blcmp
->bl_mh
, newls
);
2761 if ((bmp
->bm_linkstate
= newls
) != LINK_STATE_DOWN
)
2762 bmp
->bm_linkstate
= LINK_STATE_UP
;
2763 mac_link_redo(bmp
->bm_mh
, bmp
->bm_linkstate
);
2765 rw_exit(&bip
->bi_rwlock
);
2770 bridge_add_link(void *arg
)
2773 bridge_stream_t
*bsp
;
2774 bridge_inst_t
*bip
, *bipt
;
2776 datalink_id_t linkid
;
2780 bridge_link_t
*blp
= NULL
, *blpt
;
2781 const mac_info_t
*mip
;
2782 boolean_t macopen
= B_FALSE
;
2783 char linkname
[MAXLINKNAMELEN
];
2784 char kstatname
[KSTAT_STRLEN
];
2786 link_state_t linkstate
;
2789 bsp
= (bridge_stream_t
*)mp
->b_next
;
2792 /* LINTED: alignment */
2793 linkid
= *(datalink_id_t
*)mp
->b_cont
->b_rptr
;
2796 * First make sure that there is no other bridge that has this link.
2797 * We don't want to overlap operations from two bridges; the MAC layer
2798 * supports only one bridge on a given MAC at a time.
2800 * We rely on the fact that there's just one taskq thread for the
2801 * bridging module: once we've checked for a duplicate, we can drop the
2802 * lock, because no other thread could possibly be adding another link
2805 mutex_enter(&inst_lock
);
2806 for (bipt
= list_head(&inst_list
); bipt
!= NULL
;
2807 bipt
= list_next(&inst_list
, bipt
)) {
2808 rw_enter(&bipt
->bi_rwlock
, RW_READER
);
2809 for (blpt
= list_head(&bipt
->bi_links
); blpt
!= NULL
;
2810 blpt
= list_next(&bipt
->bi_links
, blpt
)) {
2811 if (linkid
== blpt
->bl_linkid
)
2814 rw_exit(&bipt
->bi_rwlock
);
2818 mutex_exit(&inst_lock
);
2824 if ((err
= mac_open_by_linkid(linkid
, &mh
)) != 0)
2828 /* we bridge only Ethernet */
2830 if (mip
->mi_media
!= DL_ETHER
) {
2836 * Get the current maximum SDU on this interface. If there are other
2837 * links on the bridge, then this one must match, or it errors out.
2838 * Otherwise, the first link becomes the standard for the new bridge.
2840 mac_sdu_get(mh
, NULL
, &maxsdu
);
2842 if (list_is_empty(&bip
->bi_links
)) {
2843 bmp
->bm_maxsdu
= maxsdu
;
2844 (void) mac_maxsdu_update(bmp
->bm_mh
, maxsdu
);
2847 /* figure the kstat name; also used as the mac client name */
2848 i
= MBLKL(mp
->b_cont
) - sizeof (datalink_id_t
);
2849 if (i
< 0 || i
>= MAXLINKNAMELEN
)
2850 i
= MAXLINKNAMELEN
- 1;
2851 bcopy(mp
->b_cont
->b_rptr
+ sizeof (datalink_id_t
), linkname
, i
);
2853 (void) snprintf(kstatname
, sizeof (kstatname
), "%s-%s", bip
->bi_name
,
2856 if ((blp
= kmem_zalloc(sizeof (*blp
), KM_NOSLEEP
)) == NULL
) {
2860 blp
->bl_lfailmp
= allocb(sizeof (bridge_ctl_t
), BPRI_MED
);
2861 if (blp
->bl_lfailmp
== NULL
) {
2862 kmem_free(blp
, sizeof (*blp
));
2869 atomic_inc_uint(&bip
->bi_refs
);
2872 blp
->bl_linkid
= linkid
;
2873 blp
->bl_maxsdu
= maxsdu
;
2874 cv_init(&blp
->bl_trillwait
, NULL
, CV_DRIVER
, NULL
);
2875 mutex_init(&blp
->bl_trilllock
, NULL
, MUTEX_DRIVER
, NULL
);
2876 (void) memset(blp
->bl_afs
, 0xff, sizeof (blp
->bl_afs
));
2878 err
= mac_client_open(mh
, &blp
->bl_mch
, kstatname
, 0);
2881 blp
->bl_flags
|= BLF_CLIENT_OPEN
;
2883 err
= mac_margin_add(mh
, &blp
->bl_margin
, B_TRUE
);
2886 blp
->bl_flags
|= BLF_MARGIN_ADDED
;
2888 blp
->bl_mnh
= mac_notify_add(mh
, bridge_notify_cb
, blp
);
2890 /* Enable Bridging on the link */
2891 err
= mac_bridge_set(mh
, (mac_handle_t
)blp
);
2894 blp
->bl_flags
|= BLF_SET_BRIDGE
;
2896 err
= mac_promisc_add(blp
->bl_mch
, MAC_CLIENT_PROMISC_ALL
, NULL
,
2897 blp
, &blp
->bl_mphp
, MAC_PROMISC_FLAGS_NO_TX_LOOP
);
2900 blp
->bl_flags
|= BLF_PROM_ADDED
;
2902 bridge_new_unicst(blp
);
2904 blp
->bl_ksp
= kstat_setup((kstat_named_t
*)&blp
->bl_kstats
,
2905 link_kstats_list
, Dim(link_kstats_list
), kstatname
);
2908 * The link holds a reference to the bridge instance, so that the
2909 * instance can't go away before the link is freed. The insertion into
2910 * bi_links holds a reference on the link (reference set to 1 above).
2911 * When marking as removed from bi_links (BLF_DELETED), drop the
2912 * reference on the link. When freeing the link, drop the reference on
2913 * the instance. BLF_LINK_ADDED tracks link insertion in bi_links list.
2915 rw_enter(&bip
->bi_rwlock
, RW_WRITER
);
2916 list_insert_tail(&bip
->bi_links
, blp
);
2917 blp
->bl_flags
|= BLF_LINK_ADDED
;
2920 * If the new link is no good on this bridge, then let the daemon know
2921 * about the problem.
2924 if (maxsdu
!= bmp
->bm_maxsdu
)
2925 link_sdu_fail(blp
, B_TRUE
, &mlist
);
2926 rw_exit(&bip
->bi_rwlock
);
2927 send_up_messages(bip
, mlist
);
2930 * Trigger a link state update so that if this link is the first one
2931 * "up" in the bridge, then we notify everyone. This triggers a trip
2932 * through bridge_ls_cb.
2934 linkstate
= mac_stat_get(mh
, MAC_STAT_LOWLINK_STATE
);
2935 blp
->bl_linkstate
= LINK_STATE_DOWN
;
2936 mac_link_update(mh
, linkstate
);
2939 * We now need to report back to the stream that invoked us, and then
2940 * drop the reference on the stream that we're holding.
2942 miocack(bsp
->bs_wq
, mp
, 0, 0);
2953 miocnak(bsp
->bs_wq
, mp
, 0, err
);
2958 bridge_rem_link(void *arg
)
2961 bridge_stream_t
*bsp
;
2964 datalink_id_t linkid
;
2965 bridge_link_t
*blp
, *blsave
;
2969 bsp
= (bridge_stream_t
*)mp
->b_next
;
2972 /* LINTED: alignment */
2973 linkid
= *(datalink_id_t
*)mp
->b_cont
->b_rptr
;
2976 * We become reader here so that we can loop over the other links and
2977 * deliver link up/down notification.
2979 rw_enter(&bip
->bi_rwlock
, RW_READER
);
2981 for (blp
= list_head(&bip
->bi_links
); blp
!= NULL
;
2982 blp
= list_next(&bip
->bi_links
, blp
)) {
2983 if (blp
->bl_linkid
== linkid
&&
2984 !(blp
->bl_flags
& BLF_DELETED
)) {
2985 blp
->bl_flags
|= BLF_DELETED
;
2986 (void) ddi_taskq_dispatch(bridge_taskq
, link_shutdown
,
2994 * Check if this link is up and the remainder of the links are all
2997 if (blp
!= NULL
&& blp
->bl_linkstate
!= LINK_STATE_DOWN
) {
2998 for (blp
= list_head(&bip
->bi_links
); blp
!= NULL
;
2999 blp
= list_next(&bip
->bi_links
, blp
)) {
3000 if (blp
->bl_linkstate
!= LINK_STATE_DOWN
&&
3001 !(blp
->bl_flags
& (BLF_DELETED
|BLF_SDUFAIL
)))
3005 for (blp
= list_head(&bip
->bi_links
); blp
!= NULL
;
3006 blp
= list_next(&bip
->bi_links
, blp
)) {
3007 if (!(blp
->bl_flags
& BLF_DELETED
))
3008 mac_link_redo(blp
->bl_mh
,
3012 bmp
->bm_linkstate
= LINK_STATE_DOWN
;
3013 mac_link_redo(bmp
->bm_mh
, LINK_STATE_DOWN
);
3018 * Check if there's just one working link left on the bridge. If so,
3019 * then that link is now authoritative for bridge MTU.
3022 for (blp
= list_head(&bip
->bi_links
); blp
!= NULL
;
3023 blp
= list_next(&bip
->bi_links
, blp
)) {
3024 if (!(blp
->bl_flags
& BLF_DELETED
)) {
3033 if (blsave
!= NULL
&& blp
== NULL
&&
3034 blsave
->bl_maxsdu
!= bmp
->bm_maxsdu
) {
3035 bmp
->bm_maxsdu
= blsave
->bl_maxsdu
;
3036 (void) mac_maxsdu_update(bmp
->bm_mh
, blsave
->bl_maxsdu
);
3037 link_sdu_fail(blsave
, B_FALSE
, &mlist
);
3039 rw_exit(&bip
->bi_rwlock
);
3040 send_up_messages(bip
, mlist
);
3043 miocack(bsp
->bs_wq
, mp
, 0, 0);
3045 miocnak(bsp
->bs_wq
, mp
, 0, ENOENT
);
3050 * This function intentionally returns with bi_rwlock held; it is intended for
3051 * quick checks and updates.
3053 static bridge_link_t
*
3054 enter_link(bridge_inst_t
*bip
, datalink_id_t linkid
)
3058 rw_enter(&bip
->bi_rwlock
, RW_READER
);
3059 for (blp
= list_head(&bip
->bi_links
); blp
!= NULL
;
3060 blp
= list_next(&bip
->bi_links
, blp
)) {
3061 if (blp
->bl_linkid
== linkid
&& !(blp
->bl_flags
& BLF_DELETED
))
3068 bridge_ioctl(queue_t
*wq
, mblk_t
*mp
)
3070 bridge_stream_t
*bsp
= wq
->q_ptr
;
3078 /* LINTED: alignment */
3079 iop
= (struct iocblk
*)mp
->b_rptr
;
3082 * For now, all of the bridge ioctls are privileged.
3084 if ((cr
= msg_getcred(mp
, NULL
)) == NULL
)
3086 if (cr
!= NULL
&& secpolicy_net_config(cr
, B_FALSE
) != 0) {
3087 miocnak(wq
, mp
, 0, EPERM
);
3091 switch (iop
->ioc_cmd
) {
3092 case BRIOC_NEWBRIDGE
: {
3093 bridge_newbridge_t
*bnb
;
3095 if (bsp
->bs_inst
!= NULL
||
3096 (rc
= miocpullup(mp
, sizeof (bridge_newbridge_t
))) != 0)
3098 /* LINTED: alignment */
3099 bnb
= (bridge_newbridge_t
*)mp
->b_cont
->b_rptr
;
3100 bnb
->bnb_name
[MAXNAMELEN
-1] = '\0';
3101 rc
= bridge_create(bnb
->bnb_linkid
, bnb
->bnb_name
, &bip
, cr
);
3105 rw_enter(&bip
->bi_rwlock
, RW_WRITER
);
3106 if (bip
->bi_control
!= NULL
) {
3107 rw_exit(&bip
->bi_rwlock
);
3111 atomic_inc_uint(&bip
->bi_refs
);
3112 bsp
->bs_inst
= bip
; /* stream holds reference */
3113 bip
->bi_control
= bsp
;
3114 rw_exit(&bip
->bi_rwlock
);
3121 if ((bip
= bsp
->bs_inst
) == NULL
||
3122 (rc
= miocpullup(mp
, sizeof (datalink_id_t
))) != 0)
3125 * We cannot perform the action in this thread, because we're
3126 * not in process context, and we may already be holding
3127 * MAC-related locks. Place the request on taskq.
3129 mp
->b_next
= (mblk_t
*)bsp
;
3131 (void) ddi_taskq_dispatch(bridge_taskq
, bridge_add_link
, mp
,
3136 if ((bip
= bsp
->bs_inst
) == NULL
||
3137 (rc
= miocpullup(mp
, sizeof (datalink_id_t
))) != 0)
3140 * We cannot perform the action in this thread, because we're
3141 * not in process context, and we may already be holding
3142 * MAC-related locks. Place the request on taskq.
3144 mp
->b_next
= (mblk_t
*)bsp
;
3146 (void) ddi_taskq_dispatch(bridge_taskq
, bridge_rem_link
, mp
,
3150 case BRIOC_SETSTATE
: {
3151 bridge_setstate_t
*bss
;
3153 if ((bip
= bsp
->bs_inst
) == NULL
||
3154 (rc
= miocpullup(mp
, sizeof (*bss
))) != 0)
3156 /* LINTED: alignment */
3157 bss
= (bridge_setstate_t
*)mp
->b_cont
->b_rptr
;
3158 if ((blp
= enter_link(bip
, bss
->bss_linkid
)) == NULL
) {
3162 blp
->bl_state
= bss
->bss_state
;
3164 rw_exit(&bip
->bi_rwlock
);
3168 case BRIOC_SETPVID
: {
3169 bridge_setpvid_t
*bsv
;
3171 if ((bip
= bsp
->bs_inst
) == NULL
||
3172 (rc
= miocpullup(mp
, sizeof (*bsv
))) != 0)
3174 /* LINTED: alignment */
3175 bsv
= (bridge_setpvid_t
*)mp
->b_cont
->b_rptr
;
3176 if (bsv
->bsv_vlan
> VLAN_ID_MAX
)
3178 if ((blp
= enter_link(bip
, bsv
->bsv_linkid
)) == NULL
) {
3180 } else if (blp
->bl_pvid
== bsv
->bsv_vlan
) {
3184 BRIDGE_VLAN_CLR(blp
, blp
->bl_pvid
);
3185 blp
->bl_pvid
= bsv
->bsv_vlan
;
3186 if (blp
->bl_pvid
!= 0)
3187 BRIDGE_VLAN_SET(blp
, blp
->bl_pvid
);
3189 rw_exit(&bip
->bi_rwlock
);
3193 case BRIOC_VLANENAB
: {
3194 bridge_vlanenab_t
*bve
;
3196 if ((bip
= bsp
->bs_inst
) == NULL
||
3197 (rc
= miocpullup(mp
, sizeof (*bve
))) != 0)
3199 /* LINTED: alignment */
3200 bve
= (bridge_vlanenab_t
*)mp
->b_cont
->b_rptr
;
3201 if (bve
->bve_vlan
> VLAN_ID_MAX
)
3203 if ((blp
= enter_link(bip
, bve
->bve_linkid
)) == NULL
) {
3207 /* special case: vlan 0 means "all" */
3208 if (bve
->bve_vlan
== 0) {
3209 (void) memset(blp
->bl_vlans
,
3210 bve
->bve_onoff
? ~0 : 0,
3211 sizeof (blp
->bl_vlans
));
3212 BRIDGE_VLAN_CLR(blp
, 0);
3213 if (blp
->bl_pvid
!= 0)
3214 BRIDGE_VLAN_SET(blp
, blp
->bl_pvid
);
3215 } else if (bve
->bve_vlan
== blp
->bl_pvid
) {
3217 } else if (bve
->bve_onoff
) {
3218 BRIDGE_VLAN_SET(blp
, bve
->bve_vlan
);
3220 BRIDGE_VLAN_CLR(blp
, bve
->bve_vlan
);
3223 rw_exit(&bip
->bi_rwlock
);
3227 case BRIOC_FLUSHFWD
: {
3228 bridge_flushfwd_t
*bff
;
3229 bridge_fwd_t
*bfp
, *bfnext
;
3230 avl_tree_t fwd_scavenge
;
3233 if ((bip
= bsp
->bs_inst
) == NULL
||
3234 (rc
= miocpullup(mp
, sizeof (*bff
))) != 0)
3236 /* LINTED: alignment */
3237 bff
= (bridge_flushfwd_t
*)mp
->b_cont
->b_rptr
;
3238 rw_enter(&bip
->bi_rwlock
, RW_WRITER
);
3239 /* This case means "all" */
3240 if (bff
->bff_linkid
== DATALINK_INVALID_LINKID
) {
3243 for (blp
= list_head(&bip
->bi_links
); blp
!= NULL
;
3244 blp
= list_next(&bip
->bi_links
, blp
)) {
3245 if (blp
->bl_linkid
== bff
->bff_linkid
&&
3246 !(blp
->bl_flags
& BLF_DELETED
))
3251 rw_exit(&bip
->bi_rwlock
);
3255 avl_create(&fwd_scavenge
, fwd_compare
, sizeof (bridge_fwd_t
),
3256 offsetof(bridge_fwd_t
, bf_node
));
3257 bfnext
= avl_first(&bip
->bi_fwd
);
3258 while ((bfp
= bfnext
) != NULL
) {
3259 bfnext
= AVL_NEXT(&bip
->bi_fwd
, bfp
);
3260 if (bfp
->bf_flags
& BFF_LOCALADDR
)
3263 for (i
= 0; i
< bfp
->bf_maxlinks
; i
++) {
3264 if (bfp
->bf_links
[i
] == blp
)
3268 * If the link is there and we're excluding,
3269 * then skip. If the link is not there and
3270 * we're doing only that link, then skip.
3272 if ((i
< bfp
->bf_maxlinks
) == bff
->bff_exclude
)
3275 ASSERT(bfp
->bf_flags
& BFF_INTREE
);
3276 avl_remove(&bip
->bi_fwd
, bfp
);
3277 bfp
->bf_flags
&= ~BFF_INTREE
;
3278 avl_add(&fwd_scavenge
, bfp
);
3280 rw_exit(&bip
->bi_rwlock
);
3281 bfnext
= avl_first(&fwd_scavenge
);
3282 while ((bfp
= bfnext
) != NULL
) {
3283 bfnext
= AVL_NEXT(&fwd_scavenge
, bfp
);
3284 avl_remove(&fwd_scavenge
, bfp
);
3285 fwd_unref(bfp
); /* drop tree reference */
3287 avl_destroy(&fwd_scavenge
);
3291 case BRIOC_TABLEMAX
:
3292 if ((bip
= bsp
->bs_inst
) == NULL
||
3293 (rc
= miocpullup(mp
, sizeof (uint32_t))) != 0)
3295 /* LINTED: alignment */
3296 bip
->bi_tablemax
= *(uint32_t *)mp
->b_cont
->b_rptr
;
3301 miocack(wq
, mp
, len
, 0);
3303 miocnak(wq
, mp
, 0, rc
);
3307 bridge_wput(queue_t
*wq
, mblk_t
*mp
)
3309 switch (DB_TYPE(mp
)) {
3311 bridge_ioctl(wq
, mp
);
3314 if (*mp
->b_rptr
& FLUSHW
)
3315 *mp
->b_rptr
&= ~FLUSHW
;
3316 if (*mp
->b_rptr
& FLUSHR
)
3328 * This function allocates the main data structures for the bridge driver and
3329 * connects us into devfs.
3332 bridge_inst_init(void)
3334 bridge_scan_interval
= 5 * drv_usectohz(1000000);
3335 bridge_fwd_age
= 25 * drv_usectohz(1000000);
3337 rw_init(&bmac_rwlock
, NULL
, RW_DRIVER
, NULL
);
3338 list_create(&bmac_list
, sizeof (bridge_mac_t
),
3339 offsetof(bridge_mac_t
, bm_node
));
3340 list_create(&inst_list
, sizeof (bridge_inst_t
),
3341 offsetof(bridge_inst_t
, bi_node
));
3342 cv_init(&inst_cv
, NULL
, CV_DRIVER
, NULL
);
3343 mutex_init(&inst_lock
, NULL
, MUTEX_DRIVER
, NULL
);
3344 cv_init(&stream_ref_cv
, NULL
, CV_DRIVER
, NULL
);
3345 mutex_init(&stream_ref_lock
, NULL
, MUTEX_DRIVER
, NULL
);
3347 mac_bridge_vectors(bridge_xmit_cb
, bridge_recv_cb
, bridge_ref_cb
,
3352 * This function disconnects from devfs and destroys all data structures in
3353 * preparation for unload. It's assumed that there are no active bridge
3354 * references left at this point.
3357 bridge_inst_fini(void)
3359 mac_bridge_vectors(NULL
, NULL
, NULL
, NULL
);
3360 if (bridge_timerid
!= 0)
3361 (void) untimeout(bridge_timerid
);
3362 rw_destroy(&bmac_rwlock
);
3363 list_destroy(&bmac_list
);
3364 list_destroy(&inst_list
);
3365 cv_destroy(&inst_cv
);
3366 mutex_destroy(&inst_lock
);
3367 cv_destroy(&stream_ref_cv
);
3368 mutex_destroy(&stream_ref_lock
);
3375 * Attach bridge driver to the system.
3378 bridge_attach(dev_info_t
*dip
, ddi_attach_cmd_t cmd
)
3380 if (cmd
!= DDI_ATTACH
)
3381 return (DDI_FAILURE
);
3383 if (ddi_create_minor_node(dip
, BRIDGE_CTL
, S_IFCHR
, 0, DDI_PSEUDO
,
3384 CLONE_DEV
) == DDI_FAILURE
) {
3385 return (DDI_FAILURE
);
3388 if (dld_ioc_register(BRIDGE_IOC
, bridge_ioc_list
,
3389 DLDIOCCNT(bridge_ioc_list
)) != 0) {
3390 ddi_remove_minor_node(dip
, BRIDGE_CTL
);
3391 return (DDI_FAILURE
);
3394 bridge_dev_info
= dip
;
3395 bridge_major
= ddi_driver_major(dip
);
3396 bridge_taskq
= ddi_taskq_create(dip
, BRIDGE_DEV_NAME
, 1,
3397 TASKQ_DEFAULTPRI
, 0);
3398 return (DDI_SUCCESS
);
3405 * Detach an interface to the system.
3408 bridge_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
)
3410 if (cmd
!= DDI_DETACH
)
3411 return (DDI_FAILURE
);
3413 ddi_remove_minor_node(dip
, NULL
);
3414 ddi_taskq_destroy(bridge_taskq
);
3415 bridge_dev_info
= NULL
;
3416 return (DDI_SUCCESS
);
3423 * Translate "dev_t" to a pointer to the associated "dev_info_t".
3427 bridge_info(dev_info_t
*dip
, ddi_info_cmd_t infocmd
, void *arg
,
3433 case DDI_INFO_DEVT2DEVINFO
:
3434 if (bridge_dev_info
== NULL
) {
3437 *result
= (void *)bridge_dev_info
;
3441 case DDI_INFO_DEVT2INSTANCE
:
3452 static struct module_info bridge_modinfo
= {
3453 2105, /* mi_idnum */
3454 BRIDGE_DEV_NAME
, /* mi_idname */
3456 16384, /* mi_maxpsz */
3457 65536, /* mi_hiwat */
3461 static struct qinit bridge_rinit
= {
3464 bridge_open
, /* qi_qopen */
3465 bridge_close
, /* qi_qclose */
3466 NULL
, /* qi_qadmin */
3467 &bridge_modinfo
, /* qi_minfo */
3471 static struct qinit bridge_winit
= {
3472 (int (*)())bridge_wput
, /* qi_putp */
3474 NULL
, /* qi_qopen */
3475 NULL
, /* qi_qclose */
3476 NULL
, /* qi_qadmin */
3477 &bridge_modinfo
, /* qi_minfo */
3481 static struct streamtab bridge_tab
= {
3482 &bridge_rinit
, /* st_rdinit */
3483 &bridge_winit
/* st_wrinit */
3486 /* No STREAMS perimeters; we do all our own locking */
3487 DDI_DEFINE_STREAM_OPS(bridge_ops
, nulldev
, nulldev
, bridge_attach
,
3488 bridge_detach
, nodev
, bridge_info
, D_NEW
| D_MP
, &bridge_tab
,
3489 ddi_quiesce_not_supported
);
3491 static struct modldrv modldrv
= {
3497 static struct modlinkage modlinkage
= {
3508 mac_init_ops(NULL
, BRIDGE_DEV_NAME
);
3510 if ((retv
= mod_install(&modlinkage
)) != 0)
3520 rw_enter(&bmac_rwlock
, RW_READER
);
3521 retv
= list_is_empty(&bmac_list
) ? 0 : EBUSY
;
3522 rw_exit(&bmac_rwlock
);
3524 (retv
= mod_remove(&modlinkage
)) == 0)
3530 _info(struct modinfo
*modinfop
)
3532 return (mod_info(&modlinkage
, modinfop
));