bmake-ify mega_sas
[unleashed.git] / usr / src / uts / common / io / trill.c
bloba008b4d807d6e669a972b5bd97826e9c70348c1f
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * This module supports AF_TRILL sockets and TRILL layer-2 forwarding.
31 #include <sys/strsubr.h>
32 #include <sys/socket.h>
33 #include <sys/socketvar.h>
34 #include <sys/modctl.h>
35 #include <sys/cmn_err.h>
36 #include <sys/tihdr.h>
37 #include <sys/strsun.h>
38 #include <sys/policy.h>
39 #include <sys/ethernet.h>
40 #include <sys/vlan.h>
41 #include <net/trill.h>
42 #include <net/if_dl.h>
43 #include <sys/mac.h>
44 #include <sys/mac_client.h>
45 #include <sys/mac_provider.h>
46 #include <sys/mac_client_priv.h>
47 #include <sys/sdt.h>
48 #include <sys/dls.h>
49 #include <sys/sunddi.h>
51 #include "trill_impl.h"
53 static void trill_del_all(trill_inst_t *, boolean_t);
54 static int trill_del_nick(trill_inst_t *, uint16_t, boolean_t);
55 static void trill_stop_recv(trill_sock_t *);
56 static void trill_ctrl_input(trill_sock_t *, mblk_t *, const uint8_t *,
57 uint16_t);
58 static trill_node_t *trill_node_lookup(trill_inst_t *, uint16_t);
59 static void trill_node_unref(trill_inst_t *, trill_node_t *);
60 static void trill_sock_unref(trill_sock_t *);
61 static void trill_kstats_init(trill_sock_t *, const char *);
63 static list_t trill_inst_list;
64 static krwlock_t trill_inst_rwlock;
66 static sock_lower_handle_t trill_create(int, int, int, sock_downcalls_t **,
67 uint_t *, int *, int, cred_t *);
69 static smod_reg_t sinfo = {
70 SOCKMOD_VERSION,
71 "trill",
72 SOCK_UC_VERSION,
73 SOCK_DC_VERSION,
74 trill_create,
75 NULL,
78 /* modldrv structure */
79 static struct modlsockmod sockmod = {
80 &mod_sockmodops, "AF_TRILL socket module", &sinfo
83 /* modlinkage structure */
84 static struct modlinkage ml = {
85 MODREV_1,
86 &sockmod,
87 NULL
90 #define VALID_NICK(n) ((n) != RBRIDGE_NICKNAME_NONE && \
91 (n) != RBRIDGE_NICKNAME_UNUSED)
93 static mblk_t *
94 create_trill_header(trill_sock_t *tsock, mblk_t *mp, const uint8_t *daddr,
95 boolean_t trill_hdr_ok, boolean_t multidest, uint16_t tci,
96 size_t msglen)
98 int extra_hdr_len;
99 struct ether_vlan_header *ethvlanhdr;
100 mblk_t *hdr_mp;
101 uint16_t etype;
103 etype = msglen > 0 ? (uint16_t)msglen : ETHERTYPE_TRILL;
105 /* When sending on the PVID, we must not give a VLAN ID */
106 if (tci == tsock->ts_link->bl_pvid)
107 tci = TRILL_NO_TCI;
110 * Create new Ethernet header and include additional space
111 * for writing TRILL header and/or VLAN tag.
113 extra_hdr_len = (trill_hdr_ok ? 0 : sizeof (trill_header_t)) +
114 (tci != TRILL_NO_TCI ? sizeof (struct ether_vlan_extinfo) : 0);
115 hdr_mp = mac_header(tsock->ts_link->bl_mh, daddr,
116 tci != TRILL_NO_TCI ? ETHERTYPE_VLAN : etype, mp, extra_hdr_len);
117 if (hdr_mp == NULL) {
118 freemsg(mp);
119 return (NULL);
122 if (tci != TRILL_NO_TCI) {
123 /* LINTED: alignment */
124 ethvlanhdr = (struct ether_vlan_header *)hdr_mp->b_rptr;
125 ethvlanhdr->ether_tci = htons(tci);
126 ethvlanhdr->ether_type = htons(etype);
127 hdr_mp->b_wptr += sizeof (struct ether_vlan_extinfo);
130 if (!trill_hdr_ok) {
131 trill_header_t *thp;
132 /* LINTED: alignment */
133 thp = (trill_header_t *)hdr_mp->b_wptr;
134 (void) memset(thp, 0, sizeof (trill_header_t));
135 thp->th_hopcount = TRILL_DEFAULT_HOPS;
136 thp->th_multidest = (multidest ? 1:0);
137 hdr_mp->b_wptr += sizeof (trill_header_t);
140 hdr_mp->b_cont = mp;
141 return (hdr_mp);
145 * TRILL local recv function. TRILL data frames that should be received
146 * by the local system are decapsulated here and passed to bridging for
147 * learning and local system receive. Only called when we are the forwarder
148 * on the link (multi-dest frames) or the frame was destined for us.
150 static void
151 trill_recv_local(trill_sock_t *tsock, mblk_t *mp, uint16_t ingressnick)
153 struct ether_header *inner_ethhdr;
155 /* LINTED: alignment */
156 inner_ethhdr = (struct ether_header *)mp->b_rptr;
157 DTRACE_PROBE1(trill__recv__local, struct ether_header *, inner_ethhdr);
159 DB_CKSUMFLAGS(mp) = 0;
161 * Transmit the decapsulated frame on the link via Bridging.
162 * Bridging does source address learning and appropriate forwarding.
164 bridge_trill_decaps(tsock->ts_link, mp, ingressnick);
165 KSPINCR(tks_decap);
169 * Determines the outgoing link to reach a RBridge having the given nick
170 * Assumes caller has acquired the trill instance rwlock.
172 static trill_sock_t *
173 find_trill_link(trill_inst_t *tip, datalink_id_t linkid)
175 trill_sock_t *tsp = NULL;
177 ASSERT(RW_LOCK_HELD(&tip->ti_rwlock));
178 for (tsp = list_head(&tip->ti_socklist); tsp != NULL;
179 tsp = list_next(&tip->ti_socklist, tsp)) {
180 if (tsp->ts_link != NULL && tsp->ts_link->bl_linkid == linkid) {
181 ASSERT(tsp->ts_link->bl_mh != NULL);
182 ASSERT(!(tsp->ts_flags & TSF_SHUTDOWN));
183 atomic_inc_uint(&tsp->ts_refs);
184 break;
187 return (tsp);
191 * TRILL destination forwarding function. Transmits the TRILL data packet
192 * to the next-hop, adjacent RBridge. Consumes passed mblk_t.
194 static void
195 trill_dest_fwd(trill_inst_t *tip, mblk_t *fwd_mp, uint16_t adj_nick,
196 boolean_t has_trill_hdr, boolean_t multidest, uint16_t dtnick)
198 trill_node_t *adj;
199 trill_sock_t *tsock = NULL;
200 trill_header_t *trillhdr;
201 struct ether_header *ethhdr;
202 int ethtype;
203 int ethhdrlen;
205 adj = trill_node_lookup(tip, adj_nick);
206 if (adj == NULL || ((tsock = adj->tn_tsp) == NULL))
207 goto dest_fwd_fail;
209 ASSERT(tsock->ts_link != NULL);
210 ASSERT(!(tsock->ts_flags & TSF_SHUTDOWN));
211 ASSERT(adj->tn_ni != NULL);
213 DTRACE_PROBE3(trill__dest__fwd, uint16_t, adj_nick, trill_node_t,
214 adj, trill_sock_t, tsock);
217 * For broadcast links by using the dest address of
218 * the RBridge to forward the frame should result in
219 * savings. When the link is a bridged LAN or there are
220 * many end stations the frame will not always be flooded.
222 fwd_mp = create_trill_header(tsock, fwd_mp, adj->tn_ni->tni_adjsnpa,
223 has_trill_hdr, multidest, tsock->ts_desigvlan, 0);
224 if (fwd_mp == NULL)
225 goto dest_fwd_fail;
227 /* LINTED: alignment */
228 ethhdr = (struct ether_header *)fwd_mp->b_rptr;
229 ethtype = ntohs(ethhdr->ether_type);
230 ASSERT(ethtype == ETHERTYPE_VLAN || ethtype == ETHERTYPE_TRILL);
232 /* Pullup Ethernet and TRILL header (w/o TRILL options) */
233 ethhdrlen = sizeof (struct ether_header) +
234 (ethtype == ETHERTYPE_VLAN ? sizeof (struct ether_vlan_extinfo):0);
235 if (!pullupmsg(fwd_mp, ethhdrlen + sizeof (trill_header_t)))
236 goto dest_fwd_fail;
237 /* LINTED: alignment */
238 trillhdr = (struct trill_header *)(fwd_mp->b_rptr + ethhdrlen);
240 /* Update TRILL header with ingress and egress nicks for new frames */
241 if (!has_trill_hdr) {
242 /* We are creating a new TRILL frame */
243 trillhdr->th_egressnick = (multidest ? dtnick:adj_nick);
244 rw_enter(&tip->ti_rwlock, RW_READER);
245 trillhdr->th_ingressnick = tip->ti_nick;
246 rw_exit(&tip->ti_rwlock);
247 if (!VALID_NICK(trillhdr->th_ingressnick))
248 goto dest_fwd_fail;
251 /* Set hop count and update header in packet */
252 ASSERT(trillhdr->th_hopcount != 0);
253 trillhdr->th_hopcount--;
255 /* Clear checksum flag and transmit frame on the link */
256 DB_CKSUMFLAGS(fwd_mp) = 0;
257 DTRACE_PROBE1(trill__dest__fwd__tx, trill_header_t *, &trillhdr);
258 fwd_mp = bridge_trill_output(tsock->ts_link, fwd_mp);
259 if (fwd_mp == NULL) {
260 KSPINCR(tks_sent);
261 KSPINCR(tks_forward);
262 } else {
263 freemsg(fwd_mp);
264 KSPINCR(tks_drops);
266 trill_node_unref(tip, adj);
267 return;
269 dest_fwd_fail:
270 if (adj != NULL)
271 trill_node_unref(tip, adj);
272 if (tsock != NULL)
273 KSPINCR(tks_drops);
274 freemsg(fwd_mp);
278 * TRILL multi-destination forwarding. Transmits the packet to the adjacencies
279 * on the distribution tree determined by the egress nick. Source addr (saddr)
280 * is NULL for new TRILL packets originating from us.
282 static void
283 trill_multidest_fwd(trill_inst_t *tip, mblk_t *mp, uint16_t egressnick,
284 uint16_t ingressnick, boolean_t is_trill_pkt, const uint8_t *saddr,
285 int inner_vlan, boolean_t free_mblk)
287 int idx;
288 uint16_t adjnick;
289 trill_node_t *dest;
290 trill_node_t *adj;
291 mblk_t *fwd_mp;
292 boolean_t nicksaved = B_FALSE;
293 uint16_t adjnicksaved;
295 /* Lookup the egress nick info, this is the DT root */
296 if ((dest = trill_node_lookup(tip, egressnick)) == NULL)
297 goto fail_multidest_fwd;
299 /* Send a copy to all our adjacencies on the DT root */
300 ASSERT(dest->tn_ni);
301 for (idx = 0; idx < dest->tn_ni->tni_adjcount; idx++) {
303 /* Check for a valid adjacency node */
304 adjnick = TNI_ADJNICK(dest->tn_ni, idx);
305 if (!VALID_NICK(adjnick) || ingressnick == adjnick ||
306 ((adj = trill_node_lookup(tip, adjnick)) == NULL))
307 continue;
309 /* Do not forward back to adjacency that sent the pkt to us */
310 ASSERT(adj->tn_ni != NULL);
311 if ((saddr != NULL) &&
312 (memcmp(adj->tn_ni->tni_adjsnpa, saddr,
313 ETHERADDRL) == 0)) {
314 trill_node_unref(tip, adj);
315 continue;
318 /* Check if adj is marked as reaching inner VLAN downstream */
319 if ((inner_vlan != VLAN_ID_NONE) &&
320 !TRILL_VLANISSET(TNI_VLANFILTERMAP(dest->tn_ni, idx),
321 inner_vlan)) {
322 trill_node_unref(tip, adj);
323 DTRACE_PROBE4(trill__multi__dest__fwd__vlanfiltered,
324 uint16_t, adjnick, uint16_t, ingressnick,
325 uint16_t, egressnick, int, inner_vlan);
326 continue;
329 trill_node_unref(tip, adj);
332 * Save the nick and look ahead to see if we should forward the
333 * frame to more adjacencies. We avoid doing a copy for this
334 * nick and use the passed mblk when we can consume the passed
335 * mblk.
337 if (free_mblk && !nicksaved) {
338 adjnicksaved = adjnick;
339 nicksaved = B_TRUE;
340 continue;
343 fwd_mp = copymsg(mp);
344 if (fwd_mp == NULL)
345 break;
346 DTRACE_PROBE2(trill__multi__dest__fwd, uint16_t,
347 adjnick, uint16_t, ingressnick);
348 trill_dest_fwd(tip, fwd_mp, adjnick, is_trill_pkt,
349 B_TRUE, egressnick);
351 trill_node_unref(tip, dest);
353 if (nicksaved) {
354 ASSERT(free_mblk);
355 DTRACE_PROBE2(trill__multi__dest__fwd, uint16_t,
356 adjnicksaved, uint16_t, ingressnick);
357 trill_dest_fwd(tip, mp, adjnicksaved, is_trill_pkt,
358 B_TRUE, egressnick);
359 return;
362 fail_multidest_fwd:
363 DTRACE_PROBE2(trill__multi__dest__fwd__fail, uint16_t,
364 egressnick, uint16_t, ingressnick);
365 if (free_mblk) {
366 freemsg(mp);
371 * TRILL data receive function. Forwards the received frame if necessary
372 * and also determines if the received frame should be consumed locally.
373 * Consumes passed mblk.
375 static void
376 trill_recv(trill_sock_t *tsock, mblk_t *mp, const uint8_t *mpsaddr)
378 trill_header_t *trillhdr;
379 trill_node_t *dest = NULL;
380 trill_node_t *source = NULL;
381 trill_node_t *adj;
382 uint16_t ournick, adjnick, treeroot;
383 struct ether_header *ethhdr;
384 trill_inst_t *tip = tsock->ts_tip;
385 uint8_t srcaddr[ETHERADDRL];
386 size_t trillhdrlen;
387 int inner_vlan = VLAN_ID_NONE;
388 int tci;
389 int idx;
390 size_t min_size;
392 /* Copy Ethernet source address before modifying packet */
393 (void) memcpy(srcaddr, mpsaddr, ETHERADDRL);
395 /* Pull up TRILL header if necessary. */
396 min_size = sizeof (trill_header_t);
397 if ((MBLKL(mp) < min_size ||
398 !IS_P2ALIGNED(mp->b_rptr, TRILL_HDR_ALIGN)) &&
399 !pullupmsg(mp, min_size))
400 goto fail;
402 /* LINTED: alignment */
403 trillhdr = (trill_header_t *)mp->b_rptr;
404 if (trillhdr->th_version != TRILL_PROTOCOL_VERS) {
405 DTRACE_PROBE1(trill__recv__wrongversion,
406 trill_header_t *, trillhdr);
407 goto fail;
410 /* Drop if unknown or invalid nickname */
411 if (!VALID_NICK(trillhdr->th_egressnick) ||
412 !VALID_NICK(trillhdr->th_ingressnick)) {
413 DTRACE_PROBE1(trill__recv__invalidnick,
414 trill_header_t *, trillhdr);
415 goto fail;
418 rw_enter(&tip->ti_rwlock, RW_READER);
419 ournick = tip->ti_nick;
420 treeroot = tip->ti_treeroot;
421 rw_exit(&tip->ti_rwlock);
422 /* Drop if we received a packet with our nick as ingress */
423 if (trillhdr->th_ingressnick == ournick)
424 goto fail;
426 /* Re-pull any TRILL options and inner Ethernet header */
427 min_size += GET_TRILL_OPTS_LEN(trillhdr) * sizeof (uint32_t) +
428 sizeof (struct ether_header);
429 if (MBLKL(mp) < min_size) {
430 if (!pullupmsg(mp, min_size))
431 goto fail;
432 /* LINTED: alignment */
433 trillhdr = (trill_header_t *)mp->b_rptr;
435 trillhdrlen = sizeof (trill_header_t) +
436 (GET_TRILL_OPTS_LEN(trillhdr) * sizeof (uint32_t));
439 * Get the inner Ethernet header, plus the inner VLAN header if there
440 * is one.
442 /* LINTED: alignment */
443 ethhdr = (struct ether_header *)(mp->b_rptr + trillhdrlen);
444 if (ethhdr->ether_type == htons(ETHERTYPE_VLAN)) {
445 min_size += sizeof (struct ether_vlan_extinfo);
446 if (MBLKL(mp) < min_size) {
447 if (!pullupmsg(mp, min_size))
448 goto fail;
449 /* LINTED: alignment */
450 trillhdr = (trill_header_t *)mp->b_rptr;
451 /* LINTED: alignment */
452 ethhdr = (struct ether_header *)(mp->b_rptr +
453 trillhdrlen);
456 tci = ntohs(((struct ether_vlan_header *)ethhdr)->ether_tci);
457 inner_vlan = VLAN_ID(tci);
460 /* Known/single destination forwarding. */
461 if (!trillhdr->th_multidest) {
463 /* Inner MacDA must be unicast */
464 if (ethhdr->ether_dhost.ether_addr_octet[0] & 1)
465 goto fail;
467 /* Ingress and Egress nicks must be different */
468 if (trillhdr->th_egressnick == trillhdr->th_ingressnick)
469 goto fail;
471 DTRACE_PROBE1(trill__recv__singledest,
472 trill_header_t *, trillhdr);
473 if (trillhdr->th_egressnick == ournick) {
474 mp->b_rptr += trillhdrlen;
475 trill_recv_local(tsock, mp, trillhdr->th_ingressnick);
476 } else if (trillhdr->th_hopcount > 0) {
477 trill_dest_fwd(tip, mp, trillhdr->th_egressnick,
478 B_TRUE, B_FALSE, RBRIDGE_NICKNAME_NONE);
479 } else {
480 goto fail;
482 return;
486 * Multi-destination frame: perform checks verifying we have
487 * received a valid multi-destination frame before receiving the
488 * frame locally and forwarding the frame to other RBridges.
490 * Check if we received this multi-destination frame on a
491 * adjacency in the distribution tree indicated by the frame's
492 * egress nickname.
494 if ((dest = trill_node_lookup(tip, trillhdr->th_egressnick)) == NULL)
495 goto fail;
496 for (idx = 0; idx < dest->tn_ni->tni_adjcount; idx++) {
497 adjnick = TNI_ADJNICK(dest->tn_ni, idx);
498 if ((adj = trill_node_lookup(tip, adjnick)) == NULL)
499 continue;
500 if (memcmp(adj->tn_ni->tni_adjsnpa, srcaddr, ETHERADDRL) == 0) {
501 trill_node_unref(tip, adj);
502 break;
504 trill_node_unref(tip, adj);
507 if (idx >= dest->tn_ni->tni_adjcount) {
508 DTRACE_PROBE2(trill__recv__multidest__adjcheckfail,
509 trill_header_t *, trillhdr, trill_node_t *, dest);
510 goto fail;
514 * Reverse path forwarding check. Check if the ingress RBridge
515 * that has forwarded the frame advertised the use of the
516 * distribution tree specified in the egress nick.
518 if ((source = trill_node_lookup(tip, trillhdr->th_ingressnick)) == NULL)
519 goto fail;
520 for (idx = 0; idx < source->tn_ni->tni_dtrootcount; idx++) {
521 if (TNI_DTROOTNICK(source->tn_ni, idx) ==
522 trillhdr->th_egressnick)
523 break;
526 if (idx >= source->tn_ni->tni_dtrootcount) {
528 * Allow receipt of forwarded frame with the highest
529 * tree root RBridge as the egress RBridge when the
530 * ingress RBridge has not advertised the use of any
531 * distribution trees.
533 if (source->tn_ni->tni_dtrootcount != 0 ||
534 trillhdr->th_egressnick != treeroot) {
535 DTRACE_PROBE3(
536 trill__recv__multidest__rpfcheckfail,
537 trill_header_t *, trillhdr, trill_node_t *,
538 source, trill_inst_t *, tip);
539 goto fail;
543 /* Check hop count before doing any forwarding */
544 if (trillhdr->th_hopcount == 0)
545 goto fail;
547 /* Forward frame using the distribution tree specified by egress nick */
548 DTRACE_PROBE2(trill__recv__multidest, trill_header_t *,
549 trillhdr, trill_node_t *, source);
550 trill_node_unref(tip, source);
551 trill_node_unref(tip, dest);
553 /* Tell forwarding not to free if we're the link forwarder. */
554 trill_multidest_fwd(tip, mp, trillhdr->th_egressnick,
555 trillhdr->th_ingressnick, B_TRUE, srcaddr, inner_vlan,
556 B_FALSE);
559 * Send de-capsulated frame locally if we are the link forwarder (also
560 * does bridge learning).
562 mp->b_rptr += trillhdrlen;
563 trill_recv_local(tsock, mp, trillhdr->th_ingressnick);
564 KSPINCR(tks_recv);
565 return;
567 fail:
568 DTRACE_PROBE2(trill__recv__multidest__fail, mblk_t *, mp,
569 trill_sock_t *, tsock);
570 if (dest != NULL)
571 trill_node_unref(tip, dest);
572 if (source != NULL)
573 trill_node_unref(tip, source);
574 freemsg(mp);
575 KSPINCR(tks_drops);
578 static void
579 trill_stop_recv(trill_sock_t *tsock)
581 mutex_enter(&tsock->ts_socklock);
582 stop_retry:
583 if (tsock->ts_state == TS_UNBND || tsock->ts_link == NULL) {
584 mutex_exit(&tsock->ts_socklock);
585 return;
589 * If another thread is closing the socket then wait. Our callers
590 * expect us to return only after the socket is closed.
592 if (tsock->ts_flags & TSF_CLOSEWAIT) {
593 cv_wait(&tsock->ts_sockclosewait, &tsock->ts_socklock);
594 goto stop_retry;
598 * Set state and flags to block new bind or close calls
599 * while we close the socket.
601 tsock->ts_flags |= TSF_CLOSEWAIT;
603 /* Wait until all AF_TRILL socket transmit operations are done */
604 while (tsock->ts_sockthreadcount > 0)
605 cv_wait(&tsock->ts_sockthreadwait, &tsock->ts_socklock);
608 * We are guaranteed to be the only thread closing on the
609 * socket while the TSF_CLOSEWAIT flag is set, all others cv_wait
610 * for us to finish.
612 ASSERT(tsock->ts_link != NULL);
613 if (tsock->ts_ksp != NULL)
614 kstat_delete(tsock->ts_ksp);
617 * Release lock before bridge_trill_lnunref to prevent deadlock
618 * between trill_ctrl_input thread waiting to acquire ts_socklock
619 * and bridge_trill_lnunref waiting for the trill thread to finish.
621 mutex_exit(&tsock->ts_socklock);
624 * Release TRILL link reference from Bridging. On return from
625 * bridge_trill_lnunref we can be sure there are no active TRILL data
626 * threads for this link.
628 bridge_trill_lnunref(tsock->ts_link);
630 /* Set socket as unbound & wakeup threads waiting for socket to close */
631 mutex_enter(&tsock->ts_socklock);
632 ASSERT(tsock->ts_link != NULL);
633 tsock->ts_link = NULL;
634 tsock->ts_state = TS_UNBND;
635 tsock->ts_flags &= ~TSF_CLOSEWAIT;
636 cv_broadcast(&tsock->ts_sockclosewait);
637 mutex_exit(&tsock->ts_socklock);
640 static int
641 trill_start_recv(trill_sock_t *tsock, const struct sockaddr *sa, socklen_t len)
643 struct sockaddr_dl *lladdr = (struct sockaddr_dl *)sa;
644 datalink_id_t linkid;
645 int err = 0;
647 if (len != sizeof (*lladdr))
648 return (EINVAL);
650 mutex_enter(&tsock->ts_socklock);
651 if (tsock->ts_tip == NULL || tsock->ts_state != TS_UNBND) {
652 err = EINVAL;
653 goto bind_error;
656 if (tsock->ts_flags & TSF_CLOSEWAIT || tsock->ts_link != NULL) {
657 err = EBUSY;
658 goto bind_error;
661 (void) memcpy(&(tsock->ts_lladdr), lladdr,
662 sizeof (struct sockaddr_dl));
663 (void) memcpy(&linkid, tsock->ts_lladdr.sdl_data,
664 sizeof (datalink_id_t));
666 tsock->ts_link = bridge_trill_lnref(tsock->ts_tip->ti_binst,
667 linkid, tsock);
668 if (tsock->ts_link == NULL) {
669 err = EINVAL;
670 goto bind_error;
673 trill_kstats_init(tsock, tsock->ts_tip->ti_bridgename);
674 tsock->ts_state = TS_IDLE;
676 bind_error:
677 mutex_exit(&tsock->ts_socklock);
678 return (err);
681 static int
682 trill_do_unbind(trill_sock_t *tsock)
684 /* If a bind has not been done, we can't unbind. */
685 if (tsock->ts_state != TS_IDLE)
686 return (EINVAL);
688 trill_stop_recv(tsock);
689 return (0);
692 static void
693 trill_instance_unref(trill_inst_t *tip)
695 rw_enter(&trill_inst_rwlock, RW_WRITER);
696 rw_enter(&tip->ti_rwlock, RW_WRITER);
697 if (atomic_dec_uint_nv(&tip->ti_refs) == 0) {
698 list_remove(&trill_inst_list, tip);
699 rw_exit(&tip->ti_rwlock);
700 rw_exit(&trill_inst_rwlock);
701 if (tip->ti_binst != NULL)
702 bridge_trill_brunref(tip->ti_binst);
703 list_destroy(&tip->ti_socklist);
704 rw_destroy(&tip->ti_rwlock);
705 kmem_free(tip, sizeof (*tip));
706 } else {
707 rw_exit(&tip->ti_rwlock);
708 rw_exit(&trill_inst_rwlock);
713 * This is called when the bridge module receives a TRILL-encapsulated packet
714 * on a given link or a packet identified as "TRILL control." We must verify
715 * that it's for us (it almost certainly will be), and then either decapsulate
716 * (if it's to our nickname), forward (if it's to someone else), or send up one
717 * of the sockets (if it's control traffic).
719 * Sadly, on Ethernet, the control traffic is identified by Outer.MacDA, and
720 * not by TRILL header information.
722 static void
723 trill_recv_pkt_cb(void *lptr, bridge_link_t *blp, mac_resource_handle_t rsrc,
724 mblk_t *mp, mac_header_info_t *hdr_info)
726 trill_sock_t *tsock = lptr;
728 _NOTE(ARGUNUSED(rsrc));
730 ASSERT(tsock->ts_tip != NULL);
731 ASSERT(tsock->ts_link != NULL);
732 ASSERT(!(tsock->ts_flags & TSF_SHUTDOWN));
735 * Only receive packet if the source address is not multicast (which is
736 * bogus).
738 if (hdr_info->mhi_saddr[0] & 1)
739 goto discard;
742 * Check if this is our own packet reflected back. It should not be.
744 if (bcmp(hdr_info->mhi_saddr, blp->bl_local_mac, ETHERADDRL) == 0)
745 goto discard;
747 /* Only receive unicast packet if addressed to us */
748 if (hdr_info->mhi_dsttype == MAC_ADDRTYPE_UNICAST &&
749 bcmp(hdr_info->mhi_daddr, blp->bl_local_mac, ETHERADDRL) != 0)
750 goto discard;
752 if (hdr_info->mhi_bindsap == ETHERTYPE_TRILL) {
753 /* TRILL data packets */
754 trill_recv(tsock, mp, hdr_info->mhi_saddr);
755 } else {
756 /* Design constraint for cheap IS-IS/BPDU comparison */
757 ASSERT(all_isis_rbridges[4] != bridge_group_address[4]);
758 /* Send received control packet upstream */
759 trill_ctrl_input(tsock, mp, hdr_info->mhi_saddr,
760 hdr_info->mhi_daddr[4] == all_isis_rbridges[4] ?
761 hdr_info->mhi_tci : TRILL_TCI_BPDU);
764 return;
766 discard:
767 freemsg(mp);
768 KSPINCR(tks_drops);
772 * This is called when the bridge module discovers that the destination address
773 * for a packet is not local -- it's through some remote node. We must verify
774 * that the remote node isn't our nickname (it shouldn't be), add a TRILL
775 * header, and then use the IS-IS data to determine which link and which
776 * next-hop RBridge should be used for output. We then transmit on that link.
778 * The egress_nick is RBRIDGE_NICKNAME_NONE for the "unknown destination" case.
780 static void
781 trill_encap_pkt_cb(void *lptr, bridge_link_t *blp, mac_header_info_t *hdr_info,
782 mblk_t *mp, uint16_t egress_nick)
784 uint16_t ournick;
785 uint16_t dtnick;
786 trill_node_t *self = NULL;
787 trill_sock_t *tsock = lptr;
788 trill_inst_t *tip = tsock->ts_tip;
789 int vlan = VLAN_ID_NONE;
791 _NOTE(ARGUNUSED(blp));
792 ASSERT(hdr_info->mhi_bindsap != ETHERTYPE_TRILL);
794 /* egress_nick = RBRIDGE_NICKNAME_NONE is valid */
795 if (egress_nick != RBRIDGE_NICKNAME_NONE && !VALID_NICK(egress_nick))
796 goto discard;
798 /* Check if our own nick is valid before we do any forwarding */
799 rw_enter(&tip->ti_rwlock, RW_READER);
800 ournick = tip->ti_nick;
801 dtnick = tip->ti_treeroot;
802 rw_exit(&tip->ti_rwlock);
803 if (!VALID_NICK(ournick))
804 goto discard;
807 * For Multi-Destination forwarding determine our choice of
808 * root distribution tree. If we didn't choose a distribution
809 * tree (dtroots_count=0) then we use the highest priority tree
810 * root (t_treeroot) else we drop the packet without forwarding.
812 if (egress_nick == RBRIDGE_NICKNAME_NONE) {
813 if ((self = trill_node_lookup(tip, ournick)) == NULL)
814 goto discard;
817 * Use the first DT configured for now. In future we
818 * should have DT selection code here.
820 if (self->tn_ni->tni_dtrootcount > 0) {
821 dtnick = TNI_DTROOTNICK(self->tn_ni, 0);
824 trill_node_unref(tip, self);
825 if (!VALID_NICK(dtnick)) {
826 DTRACE_PROBE(trill__fwd__packet__nodtroot);
827 goto discard;
832 * Retrieve VLAN ID of the native frame used for VLAN
833 * pruning of multi-destination frames.
835 if (hdr_info->mhi_istagged) {
836 vlan = VLAN_ID(hdr_info->mhi_tci);
839 DTRACE_PROBE2(trill__fwd__packet, mac_header_info_t *, hdr_info,
840 uint16_t, egress_nick);
841 if (egress_nick == RBRIDGE_NICKNAME_NONE) {
842 trill_multidest_fwd(tip, mp, dtnick,
843 ournick, B_FALSE, NULL, vlan, B_TRUE);
844 } else {
845 trill_dest_fwd(tip, mp, egress_nick, B_FALSE, B_FALSE,
846 RBRIDGE_NICKNAME_NONE);
848 KSPINCR(tks_encap);
849 return;
851 discard:
852 freemsg(mp);
856 * This is called when the bridge module has completely torn down a bridge
857 * instance and all of the attached links. We need to make the TRILL instance
858 * go away at this point.
860 static void
861 trill_br_dstr_cb(void *bptr, bridge_inst_t *bip)
863 trill_inst_t *tip = bptr;
865 _NOTE(ARGUNUSED(bip));
866 rw_enter(&tip->ti_rwlock, RW_WRITER);
867 if (tip->ti_binst != NULL)
868 bridge_trill_brunref(tip->ti_binst);
869 tip->ti_binst = NULL;
870 rw_exit(&tip->ti_rwlock);
874 * This is called when the bridge module is tearing down a link, but before the
875 * actual tear-down starts. When this function returns, we must make sure that
876 * we will not initiate any new transmits on this link.
878 static void
879 trill_ln_dstr_cb(void *lptr, bridge_link_t *blp)
881 trill_sock_t *tsock = lptr;
883 _NOTE(ARGUNUSED(blp));
884 trill_stop_recv(tsock);
887 static void
888 trill_init(void)
890 list_create(&trill_inst_list, sizeof (trill_inst_t),
891 offsetof(trill_inst_t, ti_instnode));
892 rw_init(&trill_inst_rwlock, NULL, RW_DRIVER, NULL);
893 bridge_trill_register_cb(trill_recv_pkt_cb, trill_encap_pkt_cb,
894 trill_br_dstr_cb, trill_ln_dstr_cb);
897 static void
898 trill_fini(void)
900 bridge_trill_register_cb(NULL, NULL, NULL, NULL);
901 rw_destroy(&trill_inst_rwlock);
902 list_destroy(&trill_inst_list);
905 /* Loadable module configuration entry points */
907 _init(void)
909 int rc;
911 trill_init();
912 if ((rc = mod_install(&ml)) != 0)
913 trill_fini();
914 return (rc);
918 _info(struct modinfo *modinfop)
920 return (mod_info(&ml, modinfop));
924 _fini(void)
926 int rc;
928 rw_enter(&trill_inst_rwlock, RW_READER);
929 rc = list_is_empty(&trill_inst_list) ? 0 : EBUSY;
930 rw_exit(&trill_inst_rwlock);
931 if (rc == 0 && ((rc = mod_remove(&ml)) == 0))
932 trill_fini();
933 return (rc);
936 static void
937 trill_kstats_init(trill_sock_t *tsock, const char *bname)
939 int i;
940 char kstatname[KSTAT_STRLEN];
941 kstat_named_t *knt;
942 static const char *sock_kstats_list[] = { TRILL_KSSOCK_NAMES };
943 char link_name[MAXNAMELEN];
944 int num;
945 int err;
947 bzero(link_name, sizeof (link_name));
948 if ((err = dls_mgmt_get_linkinfo(tsock->ts_link->bl_linkid, link_name,
949 NULL, NULL, NULL)) != 0) {
950 cmn_err(CE_WARN, "%s: trill_kstats_init: error %d retrieving"
951 " linkinfo for linkid:%d", "trill", err,
952 tsock->ts_link->bl_linkid);
953 return;
956 bzero(kstatname, sizeof (kstatname));
957 (void) snprintf(kstatname, sizeof (kstatname), "%s-%s",
958 bname, link_name);
960 num = sizeof (sock_kstats_list) / sizeof (*sock_kstats_list);
961 for (i = 0; i < num; i++) {
962 knt = (kstat_named_t *)&(tsock->ts_kstats);
963 kstat_named_init(&knt[i], sock_kstats_list[i],
964 KSTAT_DATA_UINT64);
967 tsock->ts_ksp = kstat_create_zone("trill", 0, kstatname, "sock",
968 KSTAT_TYPE_NAMED, num, KSTAT_FLAG_VIRTUAL, GLOBAL_ZONEID);
969 if (tsock->ts_ksp != NULL) {
970 tsock->ts_ksp->ks_data = &tsock->ts_kstats;
971 kstat_install(tsock->ts_ksp);
975 static trill_sock_t *
976 trill_do_open(int flags)
978 trill_sock_t *tsock;
979 int kmflag = ((flags & SOCKET_NOSLEEP)) ? KM_NOSLEEP:KM_SLEEP;
981 tsock = kmem_zalloc(sizeof (trill_sock_t), kmflag);
982 if (tsock != NULL) {
983 tsock->ts_state = TS_UNBND;
984 tsock->ts_refs++;
985 mutex_init(&tsock->ts_socklock, NULL, MUTEX_DRIVER, NULL);
986 cv_init(&tsock->ts_sockthreadwait, NULL, CV_DRIVER, NULL);
987 cv_init(&tsock->ts_sockclosewait, NULL, CV_DRIVER, NULL);
989 return (tsock);
992 static int
993 trill_find_bridge(trill_sock_t *tsock, const char *bname, boolean_t can_create)
995 trill_inst_t *tip, *newtip = NULL;
997 /* Allocate some memory (speculatively) before taking locks */
998 if (can_create)
999 newtip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP);
1001 rw_enter(&trill_inst_rwlock, RW_WRITER);
1002 for (tip = list_head(&trill_inst_list); tip != NULL;
1003 tip = list_next(&trill_inst_list, tip)) {
1004 if (strcmp(tip->ti_bridgename, bname) == 0)
1005 break;
1007 if (tip == NULL) {
1008 if (!can_create || newtip == NULL) {
1009 rw_exit(&trill_inst_rwlock);
1010 return (can_create ? ENOMEM : ENOENT);
1013 tip = newtip;
1014 newtip = NULL;
1015 (void) strcpy(tip->ti_bridgename, bname);
1017 /* Register TRILL instance with bridging */
1018 tip->ti_binst = bridge_trill_brref(bname, tip);
1019 if (tip->ti_binst == NULL) {
1020 rw_exit(&trill_inst_rwlock);
1021 kmem_free(tip, sizeof (*tip));
1022 return (ENOENT);
1025 rw_init(&tip->ti_rwlock, NULL, RW_DRIVER, NULL);
1026 list_create(&tip->ti_socklist, sizeof (trill_sock_t),
1027 offsetof(trill_sock_t, ts_socklistnode));
1028 list_insert_tail(&trill_inst_list, tip);
1030 atomic_inc_uint(&tip->ti_refs);
1031 rw_exit(&trill_inst_rwlock);
1033 /* If we didn't need the preallocated memory, then discard now. */
1034 if (newtip != NULL)
1035 kmem_free(newtip, sizeof (*newtip));
1037 rw_enter(&tip->ti_rwlock, RW_WRITER);
1038 list_insert_tail(&(tip->ti_socklist), tsock);
1039 tsock->ts_tip = tip;
1040 rw_exit(&tip->ti_rwlock);
1041 return (0);
1044 static void
1045 trill_clear_bridge(trill_sock_t *tsock)
1047 trill_inst_t *tip;
1049 if ((tip = tsock->ts_tip) == NULL)
1050 return;
1051 rw_enter(&tip->ti_rwlock, RW_WRITER);
1052 list_remove(&tip->ti_socklist, tsock);
1053 if (list_is_empty(&tip->ti_socklist))
1054 trill_del_all(tip, B_TRUE);
1055 rw_exit(&tip->ti_rwlock);
1058 static void
1059 trill_sock_unref(trill_sock_t *tsock)
1061 if (atomic_dec_uint_nv(&tsock->ts_refs) == 0) {
1062 mutex_destroy(&tsock->ts_socklock);
1063 cv_destroy(&tsock->ts_sockthreadwait);
1064 cv_destroy(&tsock->ts_sockclosewait);
1065 kmem_free(tsock, sizeof (trill_sock_t));
1069 static void
1070 trill_do_close(trill_sock_t *tsock)
1072 trill_inst_t *tip;
1074 tip = tsock->ts_tip;
1075 trill_stop_recv(tsock);
1076 /* Remove socket from TRILL instance socket list */
1077 trill_clear_bridge(tsock);
1078 tsock->ts_flags |= TSF_SHUTDOWN;
1079 trill_sock_unref(tsock);
1080 if (tip != NULL)
1081 trill_instance_unref(tip);
1084 static void
1085 trill_del_all(trill_inst_t *tip, boolean_t lockheld)
1087 int i;
1089 if (!lockheld)
1090 rw_enter(&tip->ti_rwlock, RW_WRITER);
1091 for (i = RBRIDGE_NICKNAME_MIN; i < RBRIDGE_NICKNAME_MAX; i++) {
1092 if (tip->ti_nodes[i] != NULL)
1093 (void) trill_del_nick(tip, i, B_TRUE);
1095 if (!lockheld)
1096 rw_exit(&tip->ti_rwlock);
1099 static void
1100 trill_node_free(trill_node_t *nick_entry)
1102 trill_nickinfo_t *tni;
1104 tni = nick_entry->tn_ni;
1105 kmem_free(tni, TNI_TOTALSIZE(tni));
1106 kmem_free(nick_entry, sizeof (trill_node_t));
1109 static void
1110 trill_node_unref(trill_inst_t *tip, trill_node_t *tnp)
1112 if (atomic_dec_uint_nv(&tnp->tn_refs) == 0) {
1113 if (tnp->tn_tsp != NULL)
1114 trill_sock_unref(tnp->tn_tsp);
1115 trill_node_free(tnp);
1116 atomic_dec_uint(&tip->ti_nodecount);
1120 static trill_node_t *
1121 trill_node_lookup(trill_inst_t *tip, uint16_t nick)
1123 trill_node_t *nick_entry;
1125 if (!VALID_NICK(nick))
1126 return (NULL);
1127 rw_enter(&tip->ti_rwlock, RW_READER);
1128 nick_entry = tip->ti_nodes[nick];
1129 if (nick_entry != NULL) {
1130 atomic_inc_uint(&nick_entry->tn_refs);
1132 rw_exit(&tip->ti_rwlock);
1133 return (nick_entry);
1136 static int
1137 trill_del_nick(trill_inst_t *tip, uint16_t nick, boolean_t lockheld)
1139 trill_node_t *nick_entry;
1140 int rc = ENOENT;
1142 if (!lockheld)
1143 rw_enter(&tip->ti_rwlock, RW_WRITER);
1144 if (VALID_NICK(nick)) {
1145 nick_entry = tip->ti_nodes[nick];
1146 if (nick_entry != NULL) {
1147 trill_node_unref(tip, nick_entry);
1148 tip->ti_nodes[nick] = NULL;
1149 rc = 0;
1152 if (!lockheld)
1153 rw_exit(&tip->ti_rwlock);
1154 return (rc);
1157 static int
1158 trill_add_nick(trill_inst_t *tip, void *arg, boolean_t self, int mode)
1160 uint16_t nick;
1161 int size;
1162 trill_node_t *tnode;
1163 trill_nickinfo_t tnihdr;
1165 /* First make sure we have at least the header available */
1166 if (ddi_copyin(arg, &tnihdr, sizeof (trill_nickinfo_t), mode) != 0)
1167 return (EFAULT);
1169 nick = tnihdr.tni_nick;
1170 if (!VALID_NICK(nick)) {
1171 DTRACE_PROBE1(trill__add__nick__bad, trill_nickinfo_t *,
1172 &tnihdr);
1173 return (EINVAL);
1176 size = TNI_TOTALSIZE(&tnihdr);
1177 if (size > TNI_MAXSIZE)
1178 return (EINVAL);
1179 tnode = kmem_zalloc(sizeof (trill_node_t), KM_SLEEP);
1180 tnode->tn_ni = kmem_zalloc(size, KM_SLEEP);
1181 if (ddi_copyin(arg, tnode->tn_ni, size, mode) != 0) {
1182 kmem_free(tnode->tn_ni, size);
1183 kmem_free(tnode, sizeof (trill_node_t));
1184 return (EFAULT);
1187 tnode->tn_refs++;
1188 rw_enter(&tip->ti_rwlock, RW_WRITER);
1189 if (tip->ti_nodes[nick] != NULL)
1190 (void) trill_del_nick(tip, nick, B_TRUE);
1192 if (self) {
1193 tip->ti_nick = nick;
1194 } else {
1195 tnode->tn_tsp = find_trill_link(tip,
1196 tnode->tn_ni->tni_linkid);
1198 DTRACE_PROBE2(trill__add__nick, trill_node_t *, tnode,
1199 uint16_t, nick);
1200 tip->ti_nodes[nick] = tnode;
1201 tip->ti_nodecount++;
1202 rw_exit(&tip->ti_rwlock);
1203 return (0);
1206 static int
1207 trill_do_ioctl(trill_sock_t *tsock, int cmd, void *arg, int mode)
1209 int error = 0;
1210 trill_inst_t *tip = tsock->ts_tip;
1212 switch (cmd) {
1213 case TRILL_DESIGVLAN: {
1214 uint16_t desigvlan;
1216 if (ddi_copyin(arg, &desigvlan, sizeof (desigvlan), mode) != 0)
1217 return (EFAULT);
1218 tsock->ts_desigvlan = desigvlan;
1219 break;
1221 case TRILL_VLANFWDER: {
1222 uint8_t vlans[TRILL_VLANS_ARRSIZE];
1224 if (tsock->ts_link == NULL)
1225 return (EINVAL);
1226 if ((ddi_copyin(arg, vlans, sizeof (vlans), mode)) != 0)
1227 return (EFAULT);
1228 bridge_trill_setvlans(tsock->ts_link, vlans);
1229 break;
1231 case TRILL_SETNICK:
1232 if (tip == NULL)
1233 return (EINVAL);
1234 error = trill_add_nick(tip, arg, B_TRUE, mode);
1235 break;
1237 case TRILL_GETNICK:
1238 if (tip == NULL)
1239 return (EINVAL);
1240 rw_enter(&tip->ti_rwlock, RW_READER);
1241 if (ddi_copyout(&tip->ti_nick, arg, sizeof (tip->ti_nick),
1242 mode) != 0)
1243 error = EFAULT;
1244 rw_exit(&tip->ti_rwlock);
1245 break;
1247 case TRILL_ADDNICK:
1248 if (tip == NULL)
1249 break;
1250 error = trill_add_nick(tip, arg, B_FALSE, mode);
1251 break;
1253 case TRILL_DELNICK: {
1254 uint16_t delnick;
1256 if (tip == NULL)
1257 break;
1258 if (ddi_copyin(arg, &delnick, sizeof (delnick), mode) != 0)
1259 return (EFAULT);
1260 error = trill_del_nick(tip, delnick, B_FALSE);
1261 break;
1263 case TRILL_DELALL:
1264 if (tip == NULL)
1265 break;
1266 trill_del_all(tip, B_FALSE);
1267 break;
1269 case TRILL_TREEROOT: {
1270 uint16_t treeroot;
1272 if (tip == NULL)
1273 break;
1274 if (ddi_copyin(arg, &treeroot, sizeof (treeroot), mode) != 0)
1275 return (EFAULT);
1276 if (!VALID_NICK(treeroot))
1277 return (EINVAL);
1278 rw_enter(&tip->ti_rwlock, RW_WRITER);
1279 tip->ti_treeroot = treeroot;
1280 rw_exit(&tip->ti_rwlock);
1281 break;
1283 case TRILL_HWADDR:
1284 if (tsock->ts_link == NULL)
1285 break;
1286 if (ddi_copyout(tsock->ts_link->bl_local_mac, arg, ETHERADDRL,
1287 mode) != 0)
1288 return (EFAULT);
1289 break;
1291 case TRILL_NEWBRIDGE: {
1292 char bname[MAXLINKNAMELEN];
1294 if (tsock->ts_state != TS_UNBND)
1295 return (ENOTSUP);
1296 /* ts_tip can only be set once */
1297 if (tip != NULL)
1298 return (EEXIST);
1299 if (ddi_copyin(arg, bname, sizeof (bname), mode) != 0)
1300 return (EFAULT);
1301 bname[MAXLINKNAMELEN-1] = '\0';
1302 error = trill_find_bridge(tsock, bname, B_TRUE);
1303 break;
1306 case TRILL_GETBRIDGE: {
1307 char bname[MAXLINKNAMELEN];
1309 /* ts_tip can only be set once */
1310 if (tip != NULL)
1311 return (EEXIST);
1312 if (ddi_copyin(arg, bname, sizeof (bname), mode) != 0)
1313 return (EFAULT);
1314 bname[MAXLINKNAMELEN - 1] = '\0';
1315 error = trill_find_bridge(tsock, bname, B_FALSE);
1316 break;
1319 case TRILL_LISTNICK: {
1320 trill_listnick_t tln;
1321 trill_node_t *tnp;
1322 trill_nickinfo_t *tnip;
1323 uint16_t nick;
1325 if (tip == NULL)
1326 return (EINVAL);
1327 if (ddi_copyin(arg, &tln, sizeof (tln), mode) != 0)
1328 return (EFAULT);
1329 nick = tln.tln_nick;
1330 if (nick >= RBRIDGE_NICKNAME_MAX) {
1331 error = EINVAL;
1332 break;
1334 rw_enter(&tip->ti_rwlock, RW_READER);
1335 while (++nick < RBRIDGE_NICKNAME_MAX) {
1336 if ((tnp = tip->ti_nodes[nick]) != NULL) {
1337 tnip = tnp->tn_ni;
1338 ASSERT(nick == tnip->tni_nick);
1339 tln.tln_nick = nick;
1340 bcopy(tnip->tni_adjsnpa, tln.tln_nexthop,
1341 ETHERADDRL);
1342 tln.tln_ours = nick == tip->ti_nick;
1343 if (tln.tln_ours || tnp->tn_tsp == NULL) {
1344 tln.tln_linkid =
1345 DATALINK_INVALID_LINKID;
1346 } else {
1347 tln.tln_linkid =
1348 tnp->tn_tsp->ts_link->bl_linkid;
1350 break;
1353 rw_exit(&tip->ti_rwlock);
1354 if (nick >= RBRIDGE_NICKNAME_MAX)
1355 bzero(&tln, sizeof (tln));
1356 if (ddi_copyout(&tln, arg, sizeof (tln), mode) != 0)
1357 return (EFAULT);
1358 break;
1362 * Port flush: this is used when we lose AF on a port. We must discard
1363 * all regular bridge forwarding entries on this port with the
1364 * indicated VLAN.
1366 case TRILL_PORTFLUSH: {
1367 uint16_t vlan = (uint16_t)(uintptr_t)arg;
1369 if (tsock->ts_link == NULL)
1370 return (EINVAL);
1371 bridge_trill_flush(tsock->ts_link, vlan, B_FALSE);
1372 break;
1376 * Nick flush: this is used when we lose AF on a port. We must discard
1377 * all bridge TRILL forwarding entries on this port with the indicated
1378 * VLAN.
1380 case TRILL_NICKFLUSH: {
1381 uint16_t vlan = (uint16_t)(uintptr_t)arg;
1383 if (tsock->ts_link == NULL)
1384 return (EINVAL);
1385 bridge_trill_flush(tsock->ts_link, vlan, B_TRUE);
1386 break;
1389 case TRILL_GETMTU:
1390 if (tsock->ts_link == NULL)
1391 break;
1392 if (ddi_copyout(&tsock->ts_link->bl_maxsdu, arg,
1393 sizeof (uint_t), mode) != 0)
1394 return (EFAULT);
1395 break;
1397 default:
1398 error = ENOTSUP;
1399 break;
1402 return (error);
1406 * Sends received packet back upstream on the TRILL socket.
1407 * Consumes passed mblk_t.
1409 static void
1410 trill_ctrl_input(trill_sock_t *tsock, mblk_t *mp, const uint8_t *saddr,
1411 uint16_t tci)
1413 int udi_size;
1414 mblk_t *mp1;
1415 struct T_unitdata_ind *tudi;
1416 struct sockaddr_dl *sdl;
1417 char *lladdr;
1418 int error;
1420 ASSERT(!(tsock->ts_flags & TSF_SHUTDOWN));
1421 if (tsock->ts_flow_ctrld) {
1422 freemsg(mp);
1423 KSPINCR(tks_drops);
1424 return;
1427 udi_size = sizeof (struct T_unitdata_ind) +
1428 sizeof (struct sockaddr_dl);
1429 mp1 = allocb(udi_size, BPRI_MED);
1430 if (mp1 == NULL) {
1431 freemsg(mp);
1432 KSPINCR(tks_drops);
1433 return;
1436 mp1->b_cont = mp;
1437 mp = mp1;
1438 mp->b_datap->db_type = M_PROTO;
1439 /* LINTED: alignment */
1440 tudi = (struct T_unitdata_ind *)mp->b_rptr;
1441 mp->b_wptr = (uchar_t *)tudi + udi_size;
1443 tudi->PRIM_type = T_UNITDATA_IND;
1444 tudi->SRC_length = sizeof (struct sockaddr_dl);
1445 tudi->SRC_offset = sizeof (struct T_unitdata_ind);
1446 tudi->OPT_length = 0;
1447 tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
1448 sizeof (struct sockaddr_dl);
1450 /* Information of the link on which packet was received. */
1451 sdl = (struct sockaddr_dl *)&tudi[1];
1452 (void) memset(sdl, 0, sizeof (struct sockaddr_dl));
1453 sdl->sdl_family = AF_TRILL;
1455 /* LINTED: alignment */
1456 *(datalink_id_t *)sdl->sdl_data = tsock->ts_link->bl_linkid;
1457 sdl->sdl_nlen = sizeof (tsock->ts_link->bl_linkid);
1459 lladdr = LLADDR(sdl);
1460 (void) memcpy(lladdr, saddr, ETHERADDRL);
1461 lladdr += ETHERADDRL;
1462 sdl->sdl_alen = ETHERADDRL;
1464 /* LINTED: alignment */
1465 *(uint16_t *)lladdr = tci;
1466 sdl->sdl_slen = sizeof (uint16_t);
1468 DTRACE_PROBE2(trill__ctrl__input, trill_sock_t *, tsock, mblk_t *, mp);
1469 (*tsock->ts_conn_upcalls->su_recv)(tsock->ts_conn_upper_handle,
1470 mp, msgdsize(mp), 0, &error, NULL);
1472 if (error == ENOSPC) {
1473 mutex_enter(&tsock->ts_socklock);
1474 (*tsock->ts_conn_upcalls->su_recv)(tsock->ts_conn_upper_handle,
1475 NULL, 0, 0, &error, NULL);
1476 if (error == ENOSPC)
1477 tsock->ts_flow_ctrld = B_TRUE;
1478 mutex_exit(&tsock->ts_socklock);
1479 KSPINCR(tks_drops);
1480 } else if (error != 0) {
1481 KSPINCR(tks_drops);
1482 } else {
1483 KSPINCR(tks_recv);
1486 DTRACE_PROBE2(trill__ctrl__input__done, trill_sock_t *,
1487 tsock, int, error);
1490 /* ARGSUSED */
1491 static void
1492 trill_activate(sock_lower_handle_t proto_handle,
1493 sock_upper_handle_t sock_handle, sock_upcalls_t *sock_upcalls,
1494 int flags, cred_t *cr)
1496 trill_sock_t *tsock = (trill_sock_t *)proto_handle;
1497 struct sock_proto_props sopp;
1499 tsock->ts_conn_upcalls = sock_upcalls;
1500 tsock->ts_conn_upper_handle = sock_handle;
1502 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT |
1503 SOCKOPT_RCVLOWAT | SOCKOPT_MAXADDRLEN | SOCKOPT_MAXPSZ |
1504 SOCKOPT_MAXBLK | SOCKOPT_MINPSZ;
1505 sopp.sopp_wroff = 0;
1506 sopp.sopp_rxhiwat = SOCKET_RECVHIWATER;
1507 sopp.sopp_rxlowat = SOCKET_RECVLOWATER;
1508 sopp.sopp_maxaddrlen = sizeof (struct sockaddr_dl);
1509 sopp.sopp_maxpsz = INFPSZ;
1510 sopp.sopp_maxblk = INFPSZ;
1511 sopp.sopp_minpsz = 0;
1512 (*tsock->ts_conn_upcalls->su_set_proto_props)(
1513 tsock->ts_conn_upper_handle, &sopp);
1516 /* ARGSUSED */
1517 static int
1518 trill_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
1520 trill_sock_t *tsock = (trill_sock_t *)proto_handle;
1522 trill_do_close(tsock);
1523 return (0);
1526 /* ARGSUSED */
1527 static int
1528 trill_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
1529 socklen_t len, cred_t *cr)
1531 int error;
1532 trill_sock_t *tsock = (trill_sock_t *)proto_handle;
1534 if (sa == NULL)
1535 error = trill_do_unbind(tsock);
1536 else
1537 error = trill_start_recv(tsock, sa, len);
1539 return (error);
1542 /* ARGSUSED */
1543 static int
1544 trill_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct msghdr *msg,
1545 cred_t *cr)
1547 trill_sock_t *tsock = (trill_sock_t *)proto_handle;
1548 struct sockaddr_dl *laddr;
1549 uint16_t tci;
1551 ASSERT(DB_TYPE(mp) == M_DATA);
1552 ASSERT(!(tsock->ts_flags & TSF_SHUTDOWN));
1554 if (msg->msg_name == NULL || msg->msg_namelen != sizeof (*laddr))
1555 goto eproto;
1558 * The name is a datalink_id_t, the address is an Ethernet address, and
1559 * the selector value is the VLAN ID.
1561 laddr = (struct sockaddr_dl *)msg->msg_name;
1562 if (laddr->sdl_nlen != sizeof (datalink_id_t) ||
1563 laddr->sdl_alen != ETHERADDRL ||
1564 (laddr->sdl_slen != sizeof (tci) && laddr->sdl_slen != 0))
1565 goto eproto;
1567 mutex_enter(&tsock->ts_socklock);
1568 if (tsock->ts_state != TS_IDLE || tsock->ts_link == NULL) {
1569 mutex_exit(&tsock->ts_socklock);
1570 goto eproto;
1572 atomic_inc_uint(&tsock->ts_sockthreadcount);
1573 mutex_exit(&tsock->ts_socklock);
1576 * Safe to dereference VLAN now, as we've checked the user's specified
1577 * values, and alignment is now guaranteed.
1579 if (laddr->sdl_slen == 0) {
1580 tci = TRILL_NO_TCI;
1581 } else {
1582 /* LINTED: alignment */
1583 tci = *(uint16_t *)(LLADDR(laddr) + ETHERADDRL);
1586 mp = create_trill_header(tsock, mp, (const uchar_t *)LLADDR(laddr),
1587 B_TRUE, B_FALSE, tci, msgdsize(mp));
1588 if (mp != NULL) {
1589 mp = bridge_trill_output(tsock->ts_link, mp);
1590 if (mp == NULL) {
1591 KSPINCR(tks_sent);
1592 } else {
1593 freemsg(mp);
1594 KSPINCR(tks_drops);
1598 /* Wake up any threads blocking on us */
1599 if (atomic_dec_uint_nv(&tsock->ts_sockthreadcount) == 0)
1600 cv_broadcast(&tsock->ts_sockthreadwait);
1601 return (0);
1603 eproto:
1604 freemsg(mp);
1605 KSPINCR(tks_drops);
1606 return (EPROTO);
1609 /* ARGSUSED */
1610 static int
1611 trill_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
1612 int mode, int32_t *rvalp, cred_t *cr)
1614 trill_sock_t *tsock = (trill_sock_t *)proto_handle;
1615 int rc;
1617 switch (cmd) {
1618 /* List of unprivileged TRILL ioctls */
1619 case TRILL_GETNICK:
1620 case TRILL_GETBRIDGE:
1621 case TRILL_LISTNICK:
1622 break;
1623 default:
1624 if (secpolicy_dl_config(cr) != 0)
1625 return (EPERM);
1626 break;
1629 /* Lock ensures socket state is unchanged during ioctl handling */
1630 mutex_enter(&tsock->ts_socklock);
1631 rc = trill_do_ioctl(tsock, cmd, (void *)arg, mode);
1632 mutex_exit(&tsock->ts_socklock);
1633 return (rc);
1636 static void
1637 trill_clr_flowctrl(sock_lower_handle_t proto_handle)
1639 trill_sock_t *tsock = (trill_sock_t *)proto_handle;
1641 mutex_enter(&tsock->ts_socklock);
1642 tsock->ts_flow_ctrld = B_FALSE;
1643 mutex_exit(&tsock->ts_socklock);
1646 static sock_downcalls_t sock_trill_downcalls = {
1647 trill_activate, /* sd_activate */
1648 sock_accept_notsupp, /* sd_accept */
1649 trill_bind, /* sd_bind */
1650 sock_listen_notsupp, /* sd_listen */
1651 sock_connect_notsupp, /* sd_connect */
1652 sock_getpeername_notsupp, /* sd_getpeername */
1653 sock_getsockname_notsupp, /* sd_getsockname */
1654 sock_getsockopt_notsupp, /* sd_getsockopt */
1655 sock_setsockopt_notsupp, /* sd_setsockopt */
1656 trill_send, /* sd_send */
1657 NULL, /* sd_send_uio */
1658 NULL, /* sd_recv_uio */
1659 NULL, /* sd_poll */
1660 sock_shutdown_notsupp, /* sd_shutdown */
1661 trill_clr_flowctrl, /* sd_setflowctrl */
1662 trill_ioctl, /* sd_ioctl */
1663 trill_close /* sd_close */
1666 /* ARGSUSED */
1667 static sock_lower_handle_t
1668 trill_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
1669 uint_t *smodep, int *errorp, int flags, cred_t *credp)
1671 trill_sock_t *tsock;
1673 if (family != AF_TRILL || type != SOCK_DGRAM || proto != 0) {
1674 *errorp = EPROTONOSUPPORT;
1675 return (NULL);
1678 *sock_downcalls = &sock_trill_downcalls;
1679 *smodep = SM_ATOMIC;
1680 tsock = trill_do_open(flags);
1681 *errorp = (tsock != NULL) ? 0:ENOMEM;
1682 return ((sock_lower_handle_t)tsock);