4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
26 #include <sys/types.h>
27 #include <sys/errno.h>
28 #include <sys/debug.h>
30 #include <sys/sysmacros.h>
31 #include <sys/systm.h>
33 #include <sys/stropts.h>
34 #include <sys/stream.h>
35 #include <sys/strlog.h>
36 #include <sys/strsubr.h>
37 #include <sys/cmn_err.h>
42 #include <sys/sunddi.h>
43 #include <sys/ksynch.h>
45 #include <sys/kstat.h>
46 #include <sys/vtrace.h>
47 #include <sys/strsun.h>
49 #include <sys/ethernet.h>
51 #include <sys/varargs.h>
52 #include <sys/machsystm.h>
53 #include <sys/modctl.h>
54 #include <sys/modhash.h>
56 #include <sys/mac_ether.h>
57 #include <sys/taskq.h>
59 #include <sys/mach_descrip.h>
62 #include <sys/vsw_fdb.h>
64 #include <sys/vio_mailbox.h>
65 #include <sys/vnet_mailbox.h>
66 #include <sys/vnet_common.h>
67 #include <sys/vio_util.h>
69 #include <sys/atomic.h>
70 #include <sys/callb.h>
73 /* Port add/deletion/etc routines */
74 static void vsw_port_delete(vsw_port_t
*port
);
75 static int vsw_ldc_attach(vsw_port_t
*port
, uint64_t ldc_id
);
76 static void vsw_ldc_detach(vsw_ldc_t
*ldcp
);
77 static int vsw_ldc_init(vsw_ldc_t
*ldcp
);
78 static void vsw_ldc_uninit(vsw_ldc_t
*ldcp
);
79 static void vsw_ldc_drain(vsw_ldc_t
*ldcp
);
80 static void vsw_drain_port_taskq(vsw_port_t
*port
);
81 static void vsw_marker_task(void *);
82 static int vsw_plist_del_node(vsw_t
*, vsw_port_t
*port
);
83 void vsw_detach_ports(vsw_t
*vswp
);
84 int vsw_port_add(vsw_t
*vswp
, md_t
*mdp
, mde_cookie_t
*node
);
85 mcst_addr_t
*vsw_del_addr(uint8_t devtype
, void *arg
, uint64_t addr
);
86 int vsw_port_detach(vsw_t
*vswp
, int p_instance
);
87 int vsw_portsend(vsw_port_t
*port
, mblk_t
*mp
);
88 int vsw_port_attach(vsw_port_t
*portp
);
89 vsw_port_t
*vsw_lookup_port(vsw_t
*vswp
, int p_instance
);
90 void vsw_vlan_unaware_port_reset(vsw_port_t
*portp
);
91 void vsw_hio_port_reset(vsw_port_t
*portp
, boolean_t immediate
);
92 void vsw_reset_ports(vsw_t
*vswp
);
93 void vsw_port_reset(vsw_port_t
*portp
);
94 void vsw_physlink_update_ports(vsw_t
*vswp
);
95 static void vsw_port_physlink_update(vsw_port_t
*portp
);
97 /* Interrupt routines */
98 static uint_t
vsw_ldc_cb(uint64_t cb
, caddr_t arg
);
100 /* Handshake routines */
101 static void vsw_ldc_reinit(vsw_ldc_t
*);
102 static void vsw_conn_task(void *);
103 static int vsw_check_flag(vsw_ldc_t
*, int, uint64_t);
104 static void vsw_next_milestone(vsw_ldc_t
*);
105 static int vsw_supported_version(vio_ver_msg_t
*);
106 static void vsw_set_vnet_proto_ops(vsw_ldc_t
*ldcp
);
107 static void vsw_reset_vnet_proto_ops(vsw_ldc_t
*ldcp
);
108 void vsw_process_conn_evt(vsw_ldc_t
*, uint16_t);
110 /* Data processing routines */
111 void vsw_process_pkt(void *);
112 static void vsw_dispatch_ctrl_task(vsw_ldc_t
*, void *, vio_msg_tag_t
*, int);
113 static void vsw_process_ctrl_pkt(void *);
114 static void vsw_process_ctrl_ver_pkt(vsw_ldc_t
*, void *);
115 static void vsw_process_ctrl_attr_pkt(vsw_ldc_t
*, void *);
116 static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t
*, void *);
117 static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t
*, void *);
118 static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t
*, void *);
119 static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t
*, void *);
120 static void vsw_process_physlink_msg(vsw_ldc_t
*, void *);
121 static void vsw_process_data_pkt(vsw_ldc_t
*, void *, vio_msg_tag_t
*,
123 static void vsw_process_pkt_data_nop(void *, void *, uint32_t);
124 static void vsw_process_pkt_data(void *, void *, uint32_t);
125 static void vsw_process_data_ibnd_pkt(vsw_ldc_t
*, void *);
126 static void vsw_process_err_pkt(vsw_ldc_t
*, void *, vio_msg_tag_t
*);
127 static void vsw_process_evt_read(vsw_ldc_t
*ldcp
);
128 static void vsw_ldc_rcv(vsw_ldc_t
*ldcp
);
130 /* Switching/data transmit routines */
131 static int vsw_descrsend(vsw_ldc_t
*, mblk_t
*);
132 static void vsw_ldcsend_pkt(vsw_ldc_t
*ldcp
, mblk_t
*mp
);
133 static int vsw_ldcsend(vsw_ldc_t
*ldcp
, mblk_t
*mp
, uint32_t retries
);
134 static int vsw_ldctx_pri(void *arg
, mblk_t
*mp
, mblk_t
*mpt
, uint32_t count
);
135 static int vsw_ldctx(void *arg
, mblk_t
*mp
, mblk_t
*mpt
, uint32_t count
);
137 /* Packet creation routines */
138 static void vsw_send_ver(void *);
139 static void vsw_send_attr(vsw_ldc_t
*);
140 static void vsw_send_dring_info(vsw_ldc_t
*);
141 static void vsw_send_rdx(vsw_ldc_t
*);
142 static void vsw_send_physlink_msg(vsw_ldc_t
*ldcp
, link_state_t plink_state
);
145 static void vsw_create_privring(vsw_ldc_t
*);
146 static dring_info_t
*vsw_map_dring(vsw_ldc_t
*ldcp
, void *pkt
);
147 static void vsw_unmap_dring(vsw_ldc_t
*ldcp
);
148 static void vsw_destroy_dring(vsw_ldc_t
*ldcp
);
149 static void vsw_free_lane_resources(vsw_ldc_t
*, uint64_t);
150 static int vsw_map_data(vsw_ldc_t
*ldcp
, dring_info_t
*dp
, void *pkt
);
151 static void vsw_set_lane_attr(vsw_t
*, lane_t
*);
152 dring_info_t
*vsw_map_dring_cmn(vsw_ldc_t
*ldcp
,
153 vio_dring_reg_msg_t
*dring_pkt
);
154 static int vsw_mapin_avail(vsw_ldc_t
*ldcp
);
156 /* tx/msg/rcv thread routines */
157 static void vsw_stop_tx_thread(vsw_ldc_t
*ldcp
);
158 static void vsw_ldc_tx_worker(void *arg
);
160 /* Misc support routines */
161 static void vsw_save_lmacaddr(vsw_t
*vswp
, uint64_t macaddr
);
162 static int vsw_get_same_dest_list(struct ether_header
*ehp
,
163 mblk_t
**rhead
, mblk_t
**rtail
, mblk_t
**mpp
);
164 static mblk_t
*vsw_dupmsgchain(mblk_t
*mp
);
166 /* Debugging routines */
167 static void dump_flags(uint64_t);
168 static void display_state(void);
169 static void display_lane(lane_t
*);
170 static void display_ring(dring_info_t
*);
173 * Functions imported from other files.
175 extern int vsw_set_hw(vsw_t
*, vsw_port_t
*, int);
176 extern void vsw_unset_hw(vsw_t
*, vsw_port_t
*, int);
177 extern int vsw_add_rem_mcst(vnet_mcast_msg_t
*mcst_pkt
, vsw_port_t
*port
);
178 extern void vsw_del_mcst_port(vsw_port_t
*port
);
179 extern int vsw_add_mcst(vsw_t
*vswp
, uint8_t devtype
, uint64_t addr
, void *arg
);
180 extern int vsw_del_mcst(vsw_t
*vswp
, uint8_t devtype
, uint64_t addr
, void *arg
);
181 extern void vsw_fdbe_add(vsw_t
*vswp
, void *port
);
182 extern void vsw_fdbe_del(vsw_t
*vswp
, struct ether_addr
*eaddr
);
183 extern void vsw_create_vlans(void *arg
, int type
);
184 extern void vsw_destroy_vlans(void *arg
, int type
);
185 extern void vsw_vlan_add_ids(void *arg
, int type
);
186 extern void vsw_vlan_remove_ids(void *arg
, int type
);
187 extern boolean_t
vsw_frame_lookup_vid(void *arg
, int caller
,
188 struct ether_header
*ehp
, uint16_t *vidp
);
189 extern mblk_t
*vsw_vlan_frame_pretag(void *arg
, int type
, mblk_t
*mp
);
190 extern uint32_t vsw_vlan_frame_untag(void *arg
, int type
, mblk_t
**np
,
192 extern boolean_t
vsw_vlan_lookup(mod_hash_t
*vlan_hashp
, uint16_t vid
);
193 extern void vsw_hio_start(vsw_t
*vswp
, vsw_ldc_t
*ldcp
);
194 extern void vsw_hio_stop(vsw_t
*vswp
, vsw_ldc_t
*ldcp
);
195 extern void vsw_process_dds_msg(vsw_t
*vswp
, vsw_ldc_t
*ldcp
, void *msg
);
196 extern void vsw_hio_stop_port(vsw_port_t
*portp
);
197 extern void vsw_publish_macaddr(vsw_t
*vswp
, vsw_port_t
*portp
);
198 extern int vsw_mac_client_init(vsw_t
*vswp
, vsw_port_t
*port
, int type
);
199 extern void vsw_mac_client_cleanup(vsw_t
*vswp
, vsw_port_t
*port
, int type
);
200 extern void vsw_destroy_rxpools(void *arg
);
201 extern void vsw_stop_msg_thread(vsw_ldc_t
*ldcp
);
202 extern int vsw_send_msg(vsw_ldc_t
*, void *, int, boolean_t
);
203 extern int vsw_dringsend(vsw_ldc_t
*, mblk_t
*);
204 extern int vsw_reclaim_dring(dring_info_t
*dp
, int start
);
205 extern int vsw_dring_find_free_desc(dring_info_t
*, vsw_private_desc_t
**,
207 extern vio_dring_reg_msg_t
*vsw_create_tx_dring_info(vsw_ldc_t
*);
208 extern int vsw_setup_tx_dring(vsw_ldc_t
*ldcp
, dring_info_t
*dp
);
209 extern void vsw_destroy_tx_dring(vsw_ldc_t
*ldcp
);
210 extern dring_info_t
*vsw_map_rx_dring(vsw_ldc_t
*ldcp
, void *pkt
);
211 extern void vsw_unmap_rx_dring(vsw_ldc_t
*ldcp
);
212 extern void vsw_ldc_msg_worker(void *arg
);
213 extern void vsw_process_dringdata(void *, void *);
214 extern vio_dring_reg_msg_t
*vsw_create_rx_dring_info(vsw_ldc_t
*);
215 extern void vsw_destroy_rx_dring(vsw_ldc_t
*ldcp
);
216 extern dring_info_t
*vsw_map_tx_dring(vsw_ldc_t
*ldcp
, void *pkt
);
217 extern void vsw_unmap_tx_dring(vsw_ldc_t
*ldcp
);
218 extern void vsw_ldc_rcv_worker(void *arg
);
219 extern void vsw_stop_rcv_thread(vsw_ldc_t
*ldcp
);
220 extern int vsw_dringsend_shm(vsw_ldc_t
*, mblk_t
*);
221 extern void vsw_process_dringdata_shm(void *, void *);
224 * Tunables used in this file.
226 extern int vsw_num_handshakes
;
227 extern int vsw_ldc_tx_delay
;
228 extern int vsw_ldc_tx_retries
;
229 extern int vsw_ldc_retries
;
230 extern int vsw_ldc_delay
;
231 extern boolean_t vsw_ldc_rxthr_enabled
;
232 extern boolean_t vsw_ldc_txthr_enabled
;
233 extern uint32_t vsw_num_descriptors
;
234 extern uint8_t vsw_dring_mode
;
235 extern uint32_t vsw_max_tx_qcount
;
236 extern boolean_t vsw_obp_ver_proto_workaround
;
237 extern uint32_t vsw_publish_macaddr_count
;
238 extern uint32_t vsw_nrbufs_factor
;
240 #define LDC_ENTER_LOCK(ldcp) \
241 mutex_enter(&((ldcp)->ldc_cblock));\
242 mutex_enter(&((ldcp)->ldc_rxlock));\
243 mutex_enter(&((ldcp)->ldc_txlock));
244 #define LDC_EXIT_LOCK(ldcp) \
245 mutex_exit(&((ldcp)->ldc_txlock));\
246 mutex_exit(&((ldcp)->ldc_rxlock));\
247 mutex_exit(&((ldcp)->ldc_cblock));
249 #define VSW_VER_EQ(ldcp, major, minor) \
250 ((ldcp)->lane_out.ver_major == (major) && \
251 (ldcp)->lane_out.ver_minor == (minor))
253 #define VSW_VER_LT(ldcp, major, minor) \
254 (((ldcp)->lane_out.ver_major < (major)) || \
255 ((ldcp)->lane_out.ver_major == (major) && \
256 (ldcp)->lane_out.ver_minor < (minor)))
258 #define VSW_VER_GTEQ(ldcp, major, minor) \
259 (((ldcp)->lane_out.ver_major > (major)) || \
260 ((ldcp)->lane_out.ver_major == (major) && \
261 (ldcp)->lane_out.ver_minor >= (minor)))
263 #define VSW_VER_LTEQ(ldcp, major, minor) \
264 (((ldcp)->lane_out.ver_major < (major)) || \
265 ((ldcp)->lane_out.ver_major == (major) && \
266 (ldcp)->lane_out.ver_minor <= (minor)))
269 * VIO Protocol Version Info:
271 * The version specified below represents the version of protocol currently
272 * supported in the driver. It means the driver can negotiate with peers with
273 * versions <= this version. Here is a summary of the feature(s) that are
274 * supported at each version of the protocol:
276 * 1.0 Basic VIO protocol.
277 * 1.1 vDisk protocol update (no virtual network update).
278 * 1.2 Support for priority frames (priority-ether-types).
279 * 1.3 VLAN and HybridIO support.
280 * 1.4 Jumbo Frame support.
281 * 1.5 Link State Notification support with optional support
282 * for Physical Link information.
283 * 1.6 Support for RxDringData mode.
285 static ver_sup_t vsw_versions
[] = { {1, 6} };
288 * For the moment the state dump routines have their own
295 #define DUMP_TAG(tag) \
297 D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \
298 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype); \
299 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env); \
302 #define DUMP_TAG_PTR(tag) \
304 D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \
305 D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype); \
306 D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env); \
309 #define DUMP_FLAGS(flags) dump_flags(flags);
310 #define DISPLAY_STATE() display_state()
314 #define DUMP_TAG(tag)
315 #define DUMP_TAG_PTR(tag)
316 #define DUMP_FLAGS(state)
317 #define DISPLAY_STATE()
319 #endif /* DUMP_STATE */
322 * Attach the specified port.
324 * Returns 0 on success, 1 on failure.
327 vsw_port_attach(vsw_port_t
*port
)
329 vsw_t
*vswp
= port
->p_vswp
;
330 vsw_port_list_t
*plist
= &vswp
->plist
;
332 int nids
= port
->num_ldcs
;
336 D1(vswp
, "%s: enter : port %d", __func__
, port
->p_instance
);
338 /* port already exists? */
339 READ_ENTER(&plist
->lockrw
);
340 for (p
= plist
->head
; p
!= NULL
; p
= p
->p_next
) {
341 if (p
->p_instance
== port
->p_instance
) {
342 DWARN(vswp
, "%s: port instance %d already attached",
343 __func__
, p
->p_instance
);
344 RW_EXIT(&plist
->lockrw
);
348 RW_EXIT(&plist
->lockrw
);
350 mutex_init(&port
->tx_lock
, NULL
, MUTEX_DRIVER
, NULL
);
351 mutex_init(&port
->mca_lock
, NULL
, MUTEX_DRIVER
, NULL
);
352 rw_init(&port
->maccl_rwlock
, NULL
, RW_DRIVER
, NULL
);
354 mutex_init(&port
->state_lock
, NULL
, MUTEX_DRIVER
, NULL
);
355 cv_init(&port
->state_cv
, NULL
, CV_DRIVER
, NULL
);
356 port
->state
= VSW_PORT_INIT
;
358 D2(vswp
, "%s: %d nids", __func__
, nids
);
359 ldcids
= port
->ldc_ids
;
360 D2(vswp
, "%s: ldcid (%llx)", __func__
, (uint64_t)ldcids
[0]);
361 if (vsw_ldc_attach(port
, (uint64_t)ldcids
[0]) != 0) {
362 DERR(vswp
, "%s: ldc_attach failed", __func__
);
366 if (vswp
->switching_setup_done
== B_TRUE
) {
368 * If the underlying network device has been setup,
369 * then open a mac client and porgram the mac address
372 rv
= vsw_mac_client_init(vswp
, port
, VSW_VNETPORT
);
378 /* create the fdb entry for this port/mac address */
379 vsw_fdbe_add(vswp
, port
);
381 vsw_create_vlans(port
, VSW_VNETPORT
);
383 WRITE_ENTER(&plist
->lockrw
);
385 /* link it into the list of ports for this vsw instance */
386 pp
= (vsw_port_t
**)(&plist
->head
);
391 RW_EXIT(&plist
->lockrw
);
394 * Initialise the port and any ldc's under it.
396 (void) vsw_ldc_init(port
->ldcp
);
398 /* announce macaddr of vnet to the physical switch */
399 if (vsw_publish_macaddr_count
!= 0) { /* enabled */
400 vsw_publish_macaddr(vswp
, port
);
403 D1(vswp
, "%s: exit", __func__
);
408 cv_destroy(&port
->state_cv
);
409 mutex_destroy(&port
->state_lock
);
411 rw_destroy(&port
->maccl_rwlock
);
412 mutex_destroy(&port
->tx_lock
);
413 mutex_destroy(&port
->mca_lock
);
414 kmem_free(port
, sizeof (vsw_port_t
));
419 * Detach the specified port.
421 * Returns 0 on success, 1 on failure.
424 vsw_port_detach(vsw_t
*vswp
, int p_instance
)
426 vsw_port_t
*port
= NULL
;
427 vsw_port_list_t
*plist
= &vswp
->plist
;
429 D1(vswp
, "%s: enter: port id %d", __func__
, p_instance
);
431 WRITE_ENTER(&plist
->lockrw
);
433 if ((port
= vsw_lookup_port(vswp
, p_instance
)) == NULL
) {
434 RW_EXIT(&plist
->lockrw
);
438 if (vsw_plist_del_node(vswp
, port
)) {
439 RW_EXIT(&plist
->lockrw
);
443 /* cleanup any HybridIO for this port */
444 vsw_hio_stop_port(port
);
447 * No longer need to hold writer lock on port list now
448 * that we have unlinked the target port from the list.
450 RW_EXIT(&plist
->lockrw
);
452 /* Cleanup and close the mac client */
453 vsw_mac_client_cleanup(vswp
, port
, VSW_VNETPORT
);
455 /* Remove the fdb entry for this port/mac address */
456 vsw_fdbe_del(vswp
, &(port
->p_macaddr
));
457 vsw_destroy_vlans(port
, VSW_VNETPORT
);
459 /* Remove any multicast addresses.. */
460 vsw_del_mcst_port(port
);
462 vsw_port_delete(port
);
464 D1(vswp
, "%s: exit: p_instance(%d)", __func__
, p_instance
);
469 * Detach all active ports.
472 vsw_detach_ports(vsw_t
*vswp
)
474 vsw_port_list_t
*plist
= &vswp
->plist
;
475 vsw_port_t
*port
= NULL
;
477 D1(vswp
, "%s: enter", __func__
);
479 WRITE_ENTER(&plist
->lockrw
);
481 while ((port
= plist
->head
) != NULL
) {
482 (void) vsw_plist_del_node(vswp
, port
);
484 /* cleanup any HybridIO for this port */
485 vsw_hio_stop_port(port
);
487 /* Cleanup and close the mac client */
488 vsw_mac_client_cleanup(vswp
, port
, VSW_VNETPORT
);
490 /* Remove the fdb entry for this port/mac address */
491 vsw_fdbe_del(vswp
, &(port
->p_macaddr
));
492 vsw_destroy_vlans(port
, VSW_VNETPORT
);
494 /* Remove any multicast addresses.. */
495 vsw_del_mcst_port(port
);
498 * No longer need to hold the lock on the port list
499 * now that we have unlinked the target port from the
502 RW_EXIT(&plist
->lockrw
);
503 vsw_port_delete(port
);
504 WRITE_ENTER(&plist
->lockrw
);
506 RW_EXIT(&plist
->lockrw
);
508 D1(vswp
, "%s: exit", __func__
);
512 * Delete the specified port.
515 vsw_port_delete(vsw_port_t
*port
)
517 vsw_t
*vswp
= port
->p_vswp
;
519 D1(vswp
, "%s: enter : port id %d", __func__
, port
->p_instance
);
521 vsw_ldc_uninit(port
->ldcp
);
524 * Wait for any pending ctrl msg tasks which reference this
527 vsw_drain_port_taskq(port
);
530 * Wait for any active callbacks to finish
532 vsw_ldc_drain(port
->ldcp
);
534 vsw_ldc_detach(port
->ldcp
);
536 rw_destroy(&port
->maccl_rwlock
);
537 mutex_destroy(&port
->mca_lock
);
538 mutex_destroy(&port
->tx_lock
);
540 cv_destroy(&port
->state_cv
);
541 mutex_destroy(&port
->state_lock
);
543 if (port
->num_ldcs
!= 0) {
544 kmem_free(port
->ldc_ids
, port
->num_ldcs
* sizeof (uint64_t));
548 if (port
->nvids
!= 0) {
549 kmem_free(port
->vids
, sizeof (vsw_vlanid_t
) * port
->nvids
);
552 kmem_free(port
, sizeof (vsw_port_t
));
554 D1(vswp
, "%s: exit", __func__
);
558 * Attach a logical domain channel (ldc) under a specified port.
560 * Returns 0 on success, 1 on failure.
563 vsw_ldc_attach(vsw_port_t
*port
, uint64_t ldc_id
)
565 vsw_t
*vswp
= port
->p_vswp
;
566 vsw_ldc_t
*ldcp
= NULL
;
568 ldc_status_t istatus
;
569 int status
= DDI_FAILURE
;
570 char kname
[MAXNAMELEN
];
571 enum { PROG_init
= 0x0,
573 PROG_tx_thread
= 0x2}
576 progress
= PROG_init
;
578 D1(vswp
, "%s: enter", __func__
);
580 ldcp
= kmem_zalloc(sizeof (vsw_ldc_t
), KM_NOSLEEP
);
582 DERR(vswp
, "%s: kmem_zalloc failed", __func__
);
585 ldcp
->ldc_id
= ldc_id
;
587 mutex_init(&ldcp
->ldc_txlock
, NULL
, MUTEX_DRIVER
, NULL
);
588 mutex_init(&ldcp
->ldc_rxlock
, NULL
, MUTEX_DRIVER
, NULL
);
589 mutex_init(&ldcp
->ldc_cblock
, NULL
, MUTEX_DRIVER
, NULL
);
590 ldcp
->msg_thr_flags
= 0;
591 mutex_init(&ldcp
->msg_thr_lock
, NULL
, MUTEX_DRIVER
, NULL
);
592 cv_init(&ldcp
->msg_thr_cv
, NULL
, CV_DRIVER
, NULL
);
593 ldcp
->rcv_thr_flags
= 0;
594 mutex_init(&ldcp
->rcv_thr_lock
, NULL
, MUTEX_DRIVER
, NULL
);
595 cv_init(&ldcp
->rcv_thr_cv
, NULL
, CV_DRIVER
, NULL
);
596 mutex_init(&ldcp
->drain_cv_lock
, NULL
, MUTEX_DRIVER
, NULL
);
597 cv_init(&ldcp
->drain_cv
, NULL
, CV_DRIVER
, NULL
);
599 /* required for handshake with peer */
600 ldcp
->local_session
= (uint64_t)ddi_get_lbolt();
601 ldcp
->peer_session
= 0;
602 ldcp
->session_status
= 0;
603 ldcp
->hss_id
= 1; /* Initial handshake session id */
604 ldcp
->hphase
= VSW_MILESTONE0
;
606 (void) atomic_swap_32(&port
->p_hio_capable
, B_FALSE
);
608 /* only set for outbound lane, inbound set by peer */
609 vsw_set_lane_attr(vswp
, &ldcp
->lane_out
);
611 attr
.devclass
= LDC_DEV_NT_SVC
;
612 attr
.instance
= ddi_get_instance(vswp
->dip
);
613 attr
.mode
= LDC_MODE_UNRELIABLE
;
614 attr
.mtu
= VSW_LDC_MTU
;
615 status
= ldc_init(ldc_id
, &attr
, &ldcp
->ldc_handle
);
617 DERR(vswp
, "%s(%lld): ldc_init failed, rv (%d)",
618 __func__
, ldc_id
, status
);
619 goto ldc_attach_fail
;
622 if (vsw_ldc_txthr_enabled
) {
623 ldcp
->tx_thr_flags
= 0;
624 ldcp
->tx_mhead
= ldcp
->tx_mtail
= NULL
;
626 mutex_init(&ldcp
->tx_thr_lock
, NULL
, MUTEX_DRIVER
, NULL
);
627 cv_init(&ldcp
->tx_thr_cv
, NULL
, CV_DRIVER
, NULL
);
628 ldcp
->tx_thread
= thread_create(NULL
, 2 * DEFAULTSTKSZ
,
629 vsw_ldc_tx_worker
, ldcp
, 0, &p0
, TS_RUN
, maxclsyspri
);
631 progress
|= PROG_tx_thread
;
632 if (ldcp
->tx_thread
== NULL
) {
633 DWARN(vswp
, "%s(%lld): Failed to create worker thread",
635 goto ldc_attach_fail
;
639 status
= ldc_reg_callback(ldcp
->ldc_handle
, vsw_ldc_cb
, (caddr_t
)ldcp
);
641 DERR(vswp
, "%s(%lld): ldc_reg_callback failed, rv (%d)",
642 __func__
, ldc_id
, status
);
643 (void) ldc_fini(ldcp
->ldc_handle
);
644 goto ldc_attach_fail
;
647 * allocate a message for ldc_read()s, big enough to hold ctrl and
648 * data msgs, including raw data msgs used to recv priority frames.
650 ldcp
->msglen
= VIO_PKT_DATA_HDRSIZE
+ vswp
->max_frame_size
;
651 ldcp
->ldcmsg
= kmem_alloc(ldcp
->msglen
, KM_SLEEP
);
653 progress
|= PROG_callback
;
655 mutex_init(&ldcp
->status_lock
, NULL
, MUTEX_DRIVER
, NULL
);
657 if (ldc_status(ldcp
->ldc_handle
, &istatus
) != 0) {
658 DERR(vswp
, "%s: ldc_status failed", __func__
);
659 mutex_destroy(&ldcp
->status_lock
);
660 goto ldc_attach_fail
;
663 ldcp
->ldc_status
= istatus
;
664 ldcp
->ldc_port
= port
;
665 ldcp
->ldc_vswp
= vswp
;
667 vsw_reset_vnet_proto_ops(ldcp
);
669 (void) sprintf(kname
, "%sldc0x%lx", DRV_NAME
, ldcp
->ldc_id
);
670 ldcp
->ksp
= vgen_setup_kstats(DRV_NAME
, vswp
->instance
,
671 kname
, &ldcp
->ldc_stats
);
672 if (ldcp
->ksp
== NULL
) {
673 DERR(vswp
, "%s: kstats setup failed", __func__
);
674 goto ldc_attach_fail
;
677 /* link it into this port */
680 D1(vswp
, "%s: exit", __func__
);
685 if (progress
& PROG_callback
) {
686 (void) ldc_unreg_callback(ldcp
->ldc_handle
);
687 kmem_free(ldcp
->ldcmsg
, ldcp
->msglen
);
690 if (progress
& PROG_tx_thread
) {
691 if (ldcp
->tx_thread
!= NULL
) {
692 vsw_stop_tx_thread(ldcp
);
694 mutex_destroy(&ldcp
->tx_thr_lock
);
695 cv_destroy(&ldcp
->tx_thr_cv
);
697 if (ldcp
->ksp
!= NULL
) {
698 vgen_destroy_kstats(ldcp
->ksp
);
700 mutex_destroy(&ldcp
->msg_thr_lock
);
701 mutex_destroy(&ldcp
->rcv_thr_lock
);
702 mutex_destroy(&ldcp
->ldc_txlock
);
703 mutex_destroy(&ldcp
->ldc_rxlock
);
704 mutex_destroy(&ldcp
->ldc_cblock
);
705 mutex_destroy(&ldcp
->drain_cv_lock
);
706 cv_destroy(&ldcp
->msg_thr_cv
);
707 cv_destroy(&ldcp
->rcv_thr_cv
);
708 cv_destroy(&ldcp
->drain_cv
);
710 kmem_free(ldcp
, sizeof (vsw_ldc_t
));
716 * Detach a logical domain channel (ldc) belonging to a
720 vsw_ldc_detach(vsw_ldc_t
*ldcp
)
723 vsw_t
*vswp
= ldcp
->ldc_port
->p_vswp
;
726 D2(vswp
, "%s: detaching channel %lld", __func__
, ldcp
->ldc_id
);
728 /* Stop msg/rcv thread */
729 if (ldcp
->rcv_thread
!= NULL
) {
730 vsw_stop_rcv_thread(ldcp
);
731 } else if (ldcp
->msg_thread
!= NULL
) {
732 vsw_stop_msg_thread(ldcp
);
734 kmem_free(ldcp
->ldcmsg
, ldcp
->msglen
);
736 /* Stop the tx thread */
737 if (ldcp
->tx_thread
!= NULL
) {
738 vsw_stop_tx_thread(ldcp
);
739 mutex_destroy(&ldcp
->tx_thr_lock
);
740 cv_destroy(&ldcp
->tx_thr_cv
);
741 if (ldcp
->tx_mhead
!= NULL
) {
742 freemsgchain(ldcp
->tx_mhead
);
743 ldcp
->tx_mhead
= ldcp
->tx_mtail
= NULL
;
749 vgen_destroy_kstats(ldcp
->ksp
);
752 * Before we can close the channel we must release any mapped
753 * resources (e.g. drings).
755 vsw_free_lane_resources(ldcp
, INBOUND
);
756 vsw_free_lane_resources(ldcp
, OUTBOUND
);
759 * Close the channel, retry on EAAGIN.
761 while ((rv
= ldc_close(ldcp
->ldc_handle
)) == EAGAIN
) {
762 if (++retries
> vsw_ldc_retries
) {
765 drv_usecwait(vsw_ldc_delay
);
769 "!vsw%d: Error(%d) closing the channel(0x%lx)\n",
770 vswp
->instance
, rv
, ldcp
->ldc_id
);
773 (void) ldc_fini(ldcp
->ldc_handle
);
775 ldcp
->ldc_status
= LDC_INIT
;
776 ldcp
->ldc_handle
= NULL
;
777 ldcp
->ldc_vswp
= NULL
;
779 mutex_destroy(&ldcp
->msg_thr_lock
);
780 mutex_destroy(&ldcp
->rcv_thr_lock
);
781 mutex_destroy(&ldcp
->ldc_txlock
);
782 mutex_destroy(&ldcp
->ldc_rxlock
);
783 mutex_destroy(&ldcp
->ldc_cblock
);
784 mutex_destroy(&ldcp
->drain_cv_lock
);
785 mutex_destroy(&ldcp
->status_lock
);
786 cv_destroy(&ldcp
->msg_thr_cv
);
787 cv_destroy(&ldcp
->rcv_thr_cv
);
788 cv_destroy(&ldcp
->drain_cv
);
790 kmem_free(ldcp
, sizeof (vsw_ldc_t
));
794 * Open and attempt to bring up the channel. Note that channel
795 * can only be brought up if peer has also opened channel.
797 * Returns 0 if can open and bring up channel, otherwise
801 vsw_ldc_init(vsw_ldc_t
*ldcp
)
803 vsw_t
*vswp
= ldcp
->ldc_vswp
;
804 ldc_status_t istatus
= 0;
807 D1(vswp
, "%s: enter", __func__
);
809 LDC_ENTER_LOCK(ldcp
);
811 /* don't start at 0 in case clients don't like that */
812 ldcp
->next_ident
= 1;
814 rv
= ldc_open(ldcp
->ldc_handle
);
816 DERR(vswp
, "%s: ldc_open failed: id(%lld) rv(%d)",
817 __func__
, ldcp
->ldc_id
, rv
);
822 if (ldc_status(ldcp
->ldc_handle
, &istatus
) != 0) {
823 DERR(vswp
, "%s: unable to get status", __func__
);
827 } else if (istatus
!= LDC_OPEN
&& istatus
!= LDC_READY
) {
828 DERR(vswp
, "%s: id (%lld) status(%d) is not OPEN/READY",
829 __func__
, ldcp
->ldc_id
, istatus
);
834 mutex_enter(&ldcp
->status_lock
);
835 ldcp
->ldc_status
= istatus
;
836 mutex_exit(&ldcp
->status_lock
);
838 rv
= ldc_up(ldcp
->ldc_handle
);
841 * Not a fatal error for ldc_up() to fail, as peer
842 * end point may simply not be ready yet.
844 D2(vswp
, "%s: ldc_up err id(%lld) rv(%d)", __func__
,
851 * ldc_up() call is non-blocking so need to explicitly
852 * check channel status to see if in fact the channel
855 mutex_enter(&ldcp
->status_lock
);
856 if (ldc_status(ldcp
->ldc_handle
, &ldcp
->ldc_status
) != 0) {
857 DERR(vswp
, "%s: unable to get status", __func__
);
858 mutex_exit(&ldcp
->status_lock
);
864 if (ldcp
->ldc_status
== LDC_UP
) {
865 D2(vswp
, "%s: channel %ld now UP (%ld)", __func__
,
866 ldcp
->ldc_id
, istatus
);
867 mutex_exit(&ldcp
->status_lock
);
870 vsw_process_conn_evt(ldcp
, VSW_CONN_UP
);
874 mutex_exit(&ldcp
->status_lock
);
877 D1(vswp
, "%s: exit", __func__
);
881 /* disable callbacks on the channel */
883 vsw_ldc_uninit(vsw_ldc_t
*ldcp
)
885 vsw_t
*vswp
= ldcp
->ldc_vswp
;
888 D1(vswp
, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp
->ldc_id
);
890 LDC_ENTER_LOCK(ldcp
);
892 rv
= ldc_set_cb_mode(ldcp
->ldc_handle
, LDC_CB_DISABLE
);
894 cmn_err(CE_NOTE
, "!vsw_ldc_uninit(%ld): error disabling "
895 "interrupts (rv = %d)\n", ldcp
->ldc_id
, rv
);
898 mutex_enter(&ldcp
->status_lock
);
899 ldcp
->ldc_status
= LDC_INIT
;
900 mutex_exit(&ldcp
->status_lock
);
904 D1(vswp
, "vsw_ldc_uninit: exit: id(%lx)", ldcp
->ldc_id
);
908 * Wait until the callback(s) associated with the ldcs under the specified
909 * port have completed.
911 * Prior to this function being invoked each channel under this port
912 * should have been quiesced via ldc_set_cb_mode(DISABLE).
914 * A short explaination of what we are doing below..
916 * The simplest approach would be to have a reference counter in
917 * the ldc structure which is increment/decremented by the callbacks as
918 * they use the channel. The drain function could then simply disable any
919 * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately
920 * there is a tiny window here - before the callback is able to get the lock
921 * on the channel it is interrupted and this function gets to execute. It
922 * sees that the ref count is zero and believes its free to delete the
923 * associated data structures.
925 * We get around this by taking advantage of the fact that before the ldc
926 * framework invokes a callback it sets a flag to indicate that there is a
927 * callback active (or about to become active). If when we attempt to
928 * unregister a callback when this active flag is set then the unregister
929 * will fail with EWOULDBLOCK.
931 * If the unregister fails we do a cv_timedwait. We will either be signaled
932 * by the callback as it is exiting (note we have to wait a short period to
933 * allow the callback to return fully to the ldc framework and it to clear
934 * the active flag), or by the timer expiring. In either case we again attempt
935 * the unregister. We repeat this until we can succesfully unregister the
938 * The reason we use a cv_timedwait rather than a simple cv_wait is to catch
939 * the case where the callback has finished but the ldc framework has not yet
940 * cleared the active flag. In this case we would never get a cv_signal.
943 vsw_ldc_drain(vsw_ldc_t
*ldcp
)
945 vsw_t
*vswp
= ldcp
->ldc_port
->p_vswp
;
947 D1(vswp
, "%s: enter", __func__
);
950 * If we can unregister the channel callback then we
951 * know that there is no callback either running or
952 * scheduled to run for this channel so move on to next
953 * channel in the list.
955 mutex_enter(&ldcp
->drain_cv_lock
);
957 /* prompt active callbacks to quit */
958 ldcp
->drain_state
= VSW_LDC_DRAINING
;
960 if ((ldc_unreg_callback(ldcp
->ldc_handle
)) == 0) {
961 D2(vswp
, "%s: unreg callback for chan %ld", __func__
,
963 mutex_exit(&ldcp
->drain_cv_lock
);
966 * If we end up here we know that either 1) a callback
967 * is currently executing, 2) is about to start (i.e.
968 * the ldc framework has set the active flag but
969 * has not actually invoked the callback yet, or 3)
970 * has finished and has returned to the ldc framework
971 * but the ldc framework has not yet cleared the
974 * Wait for it to finish.
976 while (ldc_unreg_callback(ldcp
->ldc_handle
) == EWOULDBLOCK
) {
977 (void) cv_timedwait(&ldcp
->drain_cv
,
978 &ldcp
->drain_cv_lock
, ddi_get_lbolt() + hz
);
981 mutex_exit(&ldcp
->drain_cv_lock
);
982 D2(vswp
, "%s: unreg callback for chan %ld after "
983 "timeout", __func__
, ldcp
->ldc_id
);
986 D1(vswp
, "%s: exit", __func__
);
990 * Wait until all tasks which reference this port have completed.
992 * Prior to this function being invoked each channel under this port
993 * should have been quiesced via ldc_set_cb_mode(DISABLE).
996 vsw_drain_port_taskq(vsw_port_t
*port
)
998 vsw_t
*vswp
= port
->p_vswp
;
1000 D1(vswp
, "%s: enter", __func__
);
1003 * Mark the port as in the process of being detached, and
1004 * dispatch a marker task to the queue so we know when all
1005 * relevant tasks have completed.
1007 mutex_enter(&port
->state_lock
);
1008 port
->state
= VSW_PORT_DETACHING
;
1010 if ((vswp
->taskq_p
== NULL
) ||
1011 (ddi_taskq_dispatch(vswp
->taskq_p
, vsw_marker_task
,
1012 port
, DDI_NOSLEEP
) != DDI_SUCCESS
)) {
1013 cmn_err(CE_NOTE
, "!vsw%d: unable to dispatch marker task",
1015 mutex_exit(&port
->state_lock
);
1020 * Wait for the marker task to finish.
1022 while (port
->state
!= VSW_PORT_DETACHABLE
)
1023 cv_wait(&port
->state_cv
, &port
->state_lock
);
1025 mutex_exit(&port
->state_lock
);
1027 D1(vswp
, "%s: exit", __func__
);
1031 vsw_marker_task(void *arg
)
1033 vsw_port_t
*port
= arg
;
1034 vsw_t
*vswp
= port
->p_vswp
;
1036 D1(vswp
, "%s: enter", __func__
);
1038 mutex_enter(&port
->state_lock
);
1041 * No further tasks should be dispatched which reference
1042 * this port so ok to mark it as safe to detach.
1044 port
->state
= VSW_PORT_DETACHABLE
;
1046 cv_signal(&port
->state_cv
);
1048 mutex_exit(&port
->state_lock
);
1050 D1(vswp
, "%s: exit", __func__
);
1054 vsw_lookup_port(vsw_t
*vswp
, int p_instance
)
1056 vsw_port_list_t
*plist
= &vswp
->plist
;
1059 for (port
= plist
->head
; port
!= NULL
; port
= port
->p_next
) {
1060 if (port
->p_instance
== p_instance
) {
1061 D2(vswp
, "vsw_lookup_port: found p_instance\n");
1070 vsw_vlan_unaware_port_reset(vsw_port_t
*portp
)
1072 vsw_ldc_t
*ldcp
= portp
->ldcp
;
1074 mutex_enter(&ldcp
->ldc_cblock
);
1077 * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate
1078 * the connection. See comments in vsw_set_vnet_proto_ops().
1080 if (ldcp
->hphase
== VSW_MILESTONE4
&& VSW_VER_LT(ldcp
, 1, 3) &&
1081 portp
->nvids
!= 0) {
1082 vsw_process_conn_evt(ldcp
, VSW_CONN_RESTART
);
1085 mutex_exit(&ldcp
->ldc_cblock
);
1089 vsw_hio_port_reset(vsw_port_t
*portp
, boolean_t immediate
)
1091 vsw_ldc_t
*ldcp
= portp
->ldcp
;
1093 mutex_enter(&ldcp
->ldc_cblock
);
1096 * If the peer is HybridIO capable (ver >= 1.3), reset channel
1097 * to trigger re-negotiation, which inturn trigger HybridIO
1100 if ((ldcp
->hphase
== VSW_MILESTONE4
) &&
1101 (portp
->p_hio_capable
== B_TRUE
)) {
1102 if (immediate
== B_TRUE
) {
1103 (void) ldc_down(ldcp
->ldc_handle
);
1105 vsw_process_conn_evt(ldcp
, VSW_CONN_RESTART
);
1109 mutex_exit(&ldcp
->ldc_cblock
);
1113 vsw_port_reset(vsw_port_t
*portp
)
1115 vsw_ldc_t
*ldcp
= portp
->ldcp
;
1117 mutex_enter(&ldcp
->ldc_cblock
);
1120 * reset channel and terminate the connection.
1122 vsw_process_conn_evt(ldcp
, VSW_CONN_RESTART
);
1124 mutex_exit(&ldcp
->ldc_cblock
);
1128 vsw_reset_ports(vsw_t
*vswp
)
1130 vsw_port_list_t
*plist
= &vswp
->plist
;
1133 READ_ENTER(&plist
->lockrw
);
1134 for (portp
= plist
->head
; portp
!= NULL
; portp
= portp
->p_next
) {
1135 if ((portp
->p_hio_capable
) && (portp
->p_hio_enabled
)) {
1136 vsw_hio_stop_port(portp
);
1138 vsw_port_reset(portp
);
1140 RW_EXIT(&plist
->lockrw
);
1144 vsw_send_physlink_msg(vsw_ldc_t
*ldcp
, link_state_t plink_state
)
1146 vnet_physlink_msg_t msg
;
1147 vnet_physlink_msg_t
*msgp
= &msg
;
1148 uint32_t physlink_info
= 0;
1150 if (plink_state
== LINK_STATE_UP
) {
1151 physlink_info
|= VNET_PHYSLINK_STATE_UP
;
1153 physlink_info
|= VNET_PHYSLINK_STATE_DOWN
;
1156 msgp
->tag
.vio_msgtype
= VIO_TYPE_CTRL
;
1157 msgp
->tag
.vio_subtype
= VIO_SUBTYPE_INFO
;
1158 msgp
->tag
.vio_subtype_env
= VNET_PHYSLINK_INFO
;
1159 msgp
->tag
.vio_sid
= ldcp
->local_session
;
1160 msgp
->physlink_info
= physlink_info
;
1162 (void) vsw_send_msg(ldcp
, msgp
, sizeof (msg
), B_TRUE
);
1166 vsw_port_physlink_update(vsw_port_t
*portp
)
1171 vswp
= portp
->p_vswp
;
1174 mutex_enter(&ldcp
->ldc_cblock
);
1177 * If handshake has completed successfully and if the vnet device
1178 * has negotiated to get physical link state updates, send a message
1179 * with the current state.
1181 if (ldcp
->hphase
== VSW_MILESTONE4
&& ldcp
->pls_negotiated
== B_TRUE
) {
1182 vsw_send_physlink_msg(ldcp
, vswp
->phys_link_state
);
1185 mutex_exit(&ldcp
->ldc_cblock
);
1189 vsw_physlink_update_ports(vsw_t
*vswp
)
1191 vsw_port_list_t
*plist
= &vswp
->plist
;
1194 READ_ENTER(&plist
->lockrw
);
1195 for (portp
= plist
->head
; portp
!= NULL
; portp
= portp
->p_next
) {
1196 vsw_port_physlink_update(portp
);
1198 RW_EXIT(&plist
->lockrw
);
1202 * Search for and remove the specified port from the port
1203 * list. Returns 0 if able to locate and remove port, otherwise
1207 vsw_plist_del_node(vsw_t
*vswp
, vsw_port_t
*port
)
1209 vsw_port_list_t
*plist
= &vswp
->plist
;
1210 vsw_port_t
*curr_p
, *prev_p
;
1212 if (plist
->head
== NULL
)
1215 curr_p
= prev_p
= plist
->head
;
1217 while (curr_p
!= NULL
) {
1218 if (curr_p
== port
) {
1219 if (prev_p
== curr_p
) {
1220 plist
->head
= curr_p
->p_next
;
1222 prev_p
->p_next
= curr_p
->p_next
;
1228 curr_p
= curr_p
->p_next
;
1235 * Interrupt handler for ldc messages.
1238 vsw_ldc_cb(uint64_t event
, caddr_t arg
)
1240 vsw_ldc_t
*ldcp
= (vsw_ldc_t
*)arg
;
1241 vsw_t
*vswp
= ldcp
->ldc_vswp
;
1243 D1(vswp
, "%s: enter: ldcid (%lld)\n", __func__
, ldcp
->ldc_id
);
1245 mutex_enter(&ldcp
->ldc_cblock
);
1246 ldcp
->ldc_stats
.callbacks
++;
1248 mutex_enter(&ldcp
->status_lock
);
1249 if ((ldcp
->ldc_status
== LDC_INIT
) || (ldcp
->ldc_handle
== NULL
)) {
1250 mutex_exit(&ldcp
->status_lock
);
1251 mutex_exit(&ldcp
->ldc_cblock
);
1252 return (LDC_SUCCESS
);
1254 mutex_exit(&ldcp
->status_lock
);
1256 if (event
& LDC_EVT_UP
) {
1258 * Channel has come up.
1260 D2(vswp
, "%s: id(%ld) event(%llx) UP: status(%ld)",
1261 __func__
, ldcp
->ldc_id
, event
, ldcp
->ldc_status
);
1263 vsw_process_conn_evt(ldcp
, VSW_CONN_UP
);
1265 ASSERT((event
& (LDC_EVT_RESET
| LDC_EVT_DOWN
)) == 0);
1268 if (event
& LDC_EVT_READ
) {
1270 * Data available for reading.
1272 D2(vswp
, "%s: id(ld) event(%llx) data READ",
1273 __func__
, ldcp
->ldc_id
, event
);
1275 vsw_process_evt_read(ldcp
);
1277 ASSERT((event
& (LDC_EVT_RESET
| LDC_EVT_DOWN
)) == 0);
1282 if (event
& (LDC_EVT_DOWN
| LDC_EVT_RESET
)) {
1283 D2(vswp
, "%s: id(%ld) event (%lx) DOWN/RESET: status(%ld)",
1284 __func__
, ldcp
->ldc_id
, event
, ldcp
->ldc_status
);
1286 vsw_process_conn_evt(ldcp
, VSW_CONN_RESET
);
1290 * Catch either LDC_EVT_WRITE which we don't support or any
1294 ~(LDC_EVT_UP
| LDC_EVT_RESET
| LDC_EVT_DOWN
| LDC_EVT_READ
)) {
1295 DERR(vswp
, "%s: id(%ld) Unexpected event=(%llx) status(%ld)",
1296 __func__
, ldcp
->ldc_id
, event
, ldcp
->ldc_status
);
1300 mutex_exit(&ldcp
->ldc_cblock
);
1303 * Let the drain function know we are finishing if it
1306 mutex_enter(&ldcp
->drain_cv_lock
);
1307 if (ldcp
->drain_state
== VSW_LDC_DRAINING
)
1308 cv_signal(&ldcp
->drain_cv
);
1309 mutex_exit(&ldcp
->drain_cv_lock
);
1311 return (LDC_SUCCESS
);
1315 * Reinitialise data structures associated with the channel.
1318 vsw_ldc_reinit(vsw_ldc_t
*ldcp
)
1320 vsw_t
*vswp
= ldcp
->ldc_vswp
;
1323 D1(vswp
, "%s: enter", __func__
);
1325 port
= ldcp
->ldc_port
;
1327 D2(vswp
, "%s: in 0x%llx : out 0x%llx", __func__
,
1328 ldcp
->lane_in
.lstate
, ldcp
->lane_out
.lstate
);
1330 vsw_free_lane_resources(ldcp
, INBOUND
);
1331 vsw_free_lane_resources(ldcp
, OUTBOUND
);
1333 ldcp
->lane_in
.lstate
= 0;
1334 ldcp
->lane_out
.lstate
= 0;
1337 * Remove parent port from any multicast groups
1338 * it may have registered with. Client must resend
1339 * multicast add command after handshake completes.
1341 vsw_del_mcst_port(port
);
1343 ldcp
->peer_session
= 0;
1344 ldcp
->session_status
= 0;
1346 ldcp
->hphase
= VSW_MILESTONE0
;
1348 vsw_reset_vnet_proto_ops(ldcp
);
1350 D1(vswp
, "%s: exit", __func__
);
1354 * Process a connection event.
1357 vsw_process_conn_evt(vsw_ldc_t
*ldcp
, uint16_t evt
)
1359 vsw_t
*vswp
= ldcp
->ldc_vswp
;
1360 vsw_conn_evt_t
*conn
= NULL
;
1362 D1(vswp
, "%s: enter", __func__
);
1365 * Check if either a reset or restart event is pending
1366 * or in progress. If so just return.
1368 * A VSW_CONN_RESET event originates either with a LDC_RESET_EVT
1369 * being received by the callback handler, or a ECONNRESET error
1370 * code being returned from a ldc_read() or ldc_write() call.
1372 * A VSW_CONN_RESTART event occurs when some error checking code
1373 * decides that there is a problem with data from the channel,
1374 * and that the handshake should be restarted.
1376 if (((evt
== VSW_CONN_RESET
) || (evt
== VSW_CONN_RESTART
)) &&
1377 (ldstub((uint8_t *)&ldcp
->reset_active
)))
1381 * If it is an LDC_UP event we first check the recorded
1382 * state of the channel. If this is UP then we know that
1383 * the channel moving to the UP state has already been dealt
1384 * with and don't need to dispatch a new task.
1386 * The reason for this check is that when we do a ldc_up(),
1387 * depending on the state of the peer, we may or may not get
1388 * a LDC_UP event. As we can't depend on getting a LDC_UP evt
1389 * every time we do ldc_up() we explicitly check the channel
1390 * status to see has it come up (ldc_up() is asynch and will
1391 * complete at some undefined time), and take the appropriate
1394 * The flip side of this is that we may get a LDC_UP event
1395 * when we have already seen that the channel is up and have
1398 mutex_enter(&ldcp
->status_lock
);
1399 if (evt
== VSW_CONN_UP
) {
1400 if ((ldcp
->ldc_status
== LDC_UP
) || (ldcp
->reset_active
!= 0)) {
1401 mutex_exit(&ldcp
->status_lock
);
1405 mutex_exit(&ldcp
->status_lock
);
1408 * The transaction group id allows us to identify and discard
1409 * any tasks which are still pending on the taskq and refer
1410 * to the handshake session we are about to restart or reset.
1411 * These stale messages no longer have any real meaning.
1413 (void) atomic_inc_32(&ldcp
->hss_id
);
1415 ASSERT(vswp
->taskq_p
!= NULL
);
1417 if ((conn
= kmem_zalloc(sizeof (vsw_conn_evt_t
), KM_NOSLEEP
)) == NULL
) {
1418 cmn_err(CE_WARN
, "!vsw%d: unable to allocate memory for"
1419 " connection event", vswp
->instance
);
1426 if (ddi_taskq_dispatch(vswp
->taskq_p
, vsw_conn_task
, conn
,
1427 DDI_NOSLEEP
) != DDI_SUCCESS
) {
1428 cmn_err(CE_WARN
, "!vsw%d: Can't dispatch connection task",
1431 kmem_free(conn
, sizeof (vsw_conn_evt_t
));
1435 D1(vswp
, "%s: exit", __func__
);
1440 * Have mostly likely failed due to memory shortage. Clear the flag so
1441 * that future requests will at least be attempted and will hopefully
1444 if ((evt
== VSW_CONN_RESET
) || (evt
== VSW_CONN_RESTART
))
1445 ldcp
->reset_active
= 0;
1449 * Deal with events relating to a connection. Invoked from a taskq.
1452 vsw_conn_task(void *arg
)
1454 vsw_conn_evt_t
*conn
= (vsw_conn_evt_t
*)arg
;
1455 vsw_ldc_t
*ldcp
= NULL
;
1459 ldc_status_t curr_status
;
1463 vswp
= ldcp
->ldc_vswp
;
1464 portp
= ldcp
->ldc_port
;
1466 D1(vswp
, "%s: enter", __func__
);
1468 /* can safely free now have copied out data */
1469 kmem_free(conn
, sizeof (vsw_conn_evt_t
));
1471 if (ldcp
->rcv_thread
!= NULL
) {
1472 vsw_stop_rcv_thread(ldcp
);
1473 } else if (ldcp
->msg_thread
!= NULL
) {
1474 vsw_stop_msg_thread(ldcp
);
1477 mutex_enter(&ldcp
->status_lock
);
1478 if (ldc_status(ldcp
->ldc_handle
, &curr_status
) != 0) {
1479 cmn_err(CE_WARN
, "!vsw%d: Unable to read status of "
1480 "channel %ld", vswp
->instance
, ldcp
->ldc_id
);
1481 mutex_exit(&ldcp
->status_lock
);
1486 * If we wish to restart the handshake on this channel, then if
1487 * the channel is UP we bring it DOWN to flush the underlying
1490 if ((evt
== VSW_CONN_RESTART
) && (curr_status
== LDC_UP
))
1491 (void) ldc_down(ldcp
->ldc_handle
);
1493 if ((portp
->p_hio_capable
) && (portp
->p_hio_enabled
)) {
1494 vsw_hio_stop(vswp
, ldcp
);
1498 * re-init all the associated data structures.
1500 vsw_ldc_reinit(ldcp
);
1503 * Bring the channel back up (note it does no harm to
1504 * do this even if the channel is already UP, Just
1505 * becomes effectively a no-op).
1507 (void) ldc_up(ldcp
->ldc_handle
);
1510 * Check if channel is now UP. This will only happen if
1511 * peer has also done a ldc_up().
1513 if (ldc_status(ldcp
->ldc_handle
, &curr_status
) != 0) {
1514 cmn_err(CE_WARN
, "!vsw%d: Unable to read status of "
1515 "channel %ld", vswp
->instance
, ldcp
->ldc_id
);
1516 mutex_exit(&ldcp
->status_lock
);
1520 ldcp
->ldc_status
= curr_status
;
1522 /* channel UP so restart handshake by sending version info */
1523 if (curr_status
== LDC_UP
) {
1524 if (ldcp
->hcnt
++ > vsw_num_handshakes
) {
1525 cmn_err(CE_WARN
, "!vsw%d: exceeded number of permitted"
1526 " handshake attempts (%d) on channel %ld",
1527 vswp
->instance
, ldcp
->hcnt
, ldcp
->ldc_id
);
1528 mutex_exit(&ldcp
->status_lock
);
1532 if (vsw_obp_ver_proto_workaround
== B_FALSE
&&
1533 (ddi_taskq_dispatch(vswp
->taskq_p
, vsw_send_ver
, ldcp
,
1534 DDI_NOSLEEP
) != DDI_SUCCESS
)) {
1535 cmn_err(CE_WARN
, "!vsw%d: Can't dispatch version task",
1539 * Don't count as valid restart attempt if couldn't
1548 * Mark that the process is complete by clearing the flag.
1550 * Note is it possible that the taskq dispatch above may have failed,
1551 * most likely due to memory shortage. We still clear the flag so
1552 * future attempts will at least be attempted and will hopefully
1555 if ((evt
== VSW_CONN_RESET
) || (evt
== VSW_CONN_RESTART
))
1556 ldcp
->reset_active
= 0;
1558 mutex_exit(&ldcp
->status_lock
);
1560 D1(vswp
, "%s: exit", __func__
);
1564 * returns 0 if legal for event signified by flag to have
1565 * occured at the time it did. Otherwise returns 1.
1568 vsw_check_flag(vsw_ldc_t
*ldcp
, int dir
, uint64_t flag
)
1570 vsw_t
*vswp
= ldcp
->ldc_vswp
;
1575 state
= ldcp
->lane_in
.lstate
;
1577 state
= ldcp
->lane_out
.lstate
;
1579 phase
= ldcp
->hphase
;
1582 case VSW_VER_INFO_RECV
:
1583 if (phase
> VSW_MILESTONE0
) {
1584 DERR(vswp
, "vsw_check_flag (%d): VER_INFO_RECV"
1585 " when in state %d\n", ldcp
->ldc_id
, phase
);
1586 vsw_process_conn_evt(ldcp
, VSW_CONN_RESTART
);
1591 case VSW_VER_ACK_RECV
:
1592 case VSW_VER_NACK_RECV
:
1593 if (!(state
& VSW_VER_INFO_SENT
)) {
1594 DERR(vswp
, "vsw_check_flag (%d): spurious VER_ACK or "
1595 "VER_NACK when in state %d\n", ldcp
->ldc_id
, phase
);
1596 vsw_process_conn_evt(ldcp
, VSW_CONN_RESTART
);
1599 state
&= ~VSW_VER_INFO_SENT
;
1602 case VSW_ATTR_INFO_RECV
:
1603 if ((phase
< VSW_MILESTONE1
) || (phase
>= VSW_MILESTONE2
)) {
1604 DERR(vswp
, "vsw_check_flag (%d): ATTR_INFO_RECV"
1605 " when in state %d\n", ldcp
->ldc_id
, phase
);
1606 vsw_process_conn_evt(ldcp
, VSW_CONN_RESTART
);
1611 case VSW_ATTR_ACK_RECV
:
1612 case VSW_ATTR_NACK_RECV
:
1613 if (!(state
& VSW_ATTR_INFO_SENT
)) {
1614 DERR(vswp
, "vsw_check_flag (%d): spurious ATTR_ACK"
1615 " or ATTR_NACK when in state %d\n",
1616 ldcp
->ldc_id
, phase
);
1617 vsw_process_conn_evt(ldcp
, VSW_CONN_RESTART
);
1620 state
&= ~VSW_ATTR_INFO_SENT
;
1623 case VSW_DRING_INFO_RECV
:
1624 if (phase
< VSW_MILESTONE1
) {
1625 DERR(vswp
, "vsw_check_flag (%d): DRING_INFO_RECV"
1626 " when in state %d\n", ldcp
->ldc_id
, phase
);
1627 vsw_process_conn_evt(ldcp
, VSW_CONN_RESTART
);
1632 case VSW_DRING_ACK_RECV
:
1633 case VSW_DRING_NACK_RECV
:
1634 if (!(state
& VSW_DRING_INFO_SENT
)) {
1635 DERR(vswp
, "vsw_check_flag (%d): spurious DRING_ACK "
1636 " or DRING_NACK when in state %d\n",
1637 ldcp
->ldc_id
, phase
);
1638 vsw_process_conn_evt(ldcp
, VSW_CONN_RESTART
);
1641 state
&= ~VSW_DRING_INFO_SENT
;
1644 case VSW_RDX_INFO_RECV
:
1645 if (phase
< VSW_MILESTONE3
) {
1646 DERR(vswp
, "vsw_check_flag (%d): RDX_INFO_RECV"
1647 " when in state %d\n", ldcp
->ldc_id
, phase
);
1648 vsw_process_conn_evt(ldcp
, VSW_CONN_RESTART
);
1653 case VSW_RDX_ACK_RECV
:
1654 case VSW_RDX_NACK_RECV
:
1655 if (!(state
& VSW_RDX_INFO_SENT
)) {
1656 DERR(vswp
, "vsw_check_flag (%d): spurious RDX_ACK or "
1657 "RDX_NACK when in state %d\n", ldcp
->ldc_id
, phase
);
1658 vsw_process_conn_evt(ldcp
, VSW_CONN_RESTART
);
1661 state
&= ~VSW_RDX_INFO_SENT
;
1664 case VSW_MCST_INFO_RECV
:
1665 if (phase
< VSW_MILESTONE3
) {
1666 DERR(vswp
, "vsw_check_flag (%d): VSW_MCST_INFO_RECV"
1667 " when in state %d\n", ldcp
->ldc_id
, phase
);
1668 vsw_process_conn_evt(ldcp
, VSW_CONN_RESTART
);
1674 DERR(vswp
, "vsw_check_flag (%lld): unknown flag (%llx)",
1675 ldcp
->ldc_id
, flag
);
1680 ldcp
->lane_in
.lstate
= state
;
1682 ldcp
->lane_out
.lstate
= state
;
1684 D1(vswp
, "vsw_check_flag (chan %lld): exit", ldcp
->ldc_id
);
1690 vsw_next_milestone(vsw_ldc_t
*ldcp
)
1692 vsw_t
*vswp
= ldcp
->ldc_vswp
;
1693 vsw_port_t
*portp
= ldcp
->ldc_port
;
1694 lane_t
*lane_out
= &ldcp
->lane_out
;
1695 lane_t
*lane_in
= &ldcp
->lane_in
;
1697 D1(vswp
, "%s (chan %lld): enter (phase %ld)", __func__
,
1698 ldcp
->ldc_id
, ldcp
->hphase
);
1700 DUMP_FLAGS(lane_in
->lstate
);
1701 DUMP_FLAGS(lane_out
->lstate
);
1703 switch (ldcp
->hphase
) {
1705 case VSW_MILESTONE0
:
1707 * If we haven't started to handshake with our peer,
1708 * start to do so now.
1710 if (lane_out
->lstate
== 0) {
1711 D2(vswp
, "%s: (chan %lld) starting handshake "
1712 "with peer", __func__
, ldcp
->ldc_id
);
1713 vsw_process_conn_evt(ldcp
, VSW_CONN_UP
);
1717 * Only way to pass this milestone is to have successfully
1718 * negotiated version info.
1720 if ((lane_in
->lstate
& VSW_VER_ACK_SENT
) &&
1721 (lane_out
->lstate
& VSW_VER_ACK_RECV
)) {
1723 D2(vswp
, "%s: (chan %lld) leaving milestone 0",
1724 __func__
, ldcp
->ldc_id
);
1726 vsw_set_vnet_proto_ops(ldcp
);
1729 * Next milestone is passed when attribute
1730 * information has been successfully exchanged.
1732 ldcp
->hphase
= VSW_MILESTONE1
;
1733 vsw_send_attr(ldcp
);
1738 case VSW_MILESTONE1
:
1740 * Only way to pass this milestone is to have successfully
1741 * negotiated attribute information, in both directions.
1743 if (!((lane_in
->lstate
& VSW_ATTR_ACK_SENT
) &&
1744 (lane_out
->lstate
& VSW_ATTR_ACK_RECV
))) {
1748 ldcp
->hphase
= VSW_MILESTONE2
;
1751 * If the peer device has said it wishes to
1752 * use descriptor rings then we send it our ring
1753 * info, otherwise we just set up a private ring
1754 * which we use an internal buffer
1756 if ((VSW_VER_GTEQ(ldcp
, 1, 2) &&
1757 (lane_in
->xfer_mode
& VIO_DRING_MODE_V1_2
)) ||
1758 (VSW_VER_LT(ldcp
, 1, 2) &&
1759 (lane_in
->xfer_mode
== VIO_DRING_MODE_V1_0
))) {
1760 vsw_send_dring_info(ldcp
);
1765 * The peer doesn't operate in dring mode; we
1766 * can simply fallthru to the RDX phase from
1771 case VSW_MILESTONE2
:
1773 * If peer has indicated in its attribute message that
1774 * it wishes to use descriptor rings then the only way
1775 * to pass this milestone is for us to have received
1778 * If peer is not using descriptor rings then just fall
1781 if ((VSW_VER_GTEQ(ldcp
, 1, 2) &&
1782 (lane_in
->xfer_mode
& VIO_DRING_MODE_V1_2
)) ||
1783 (VSW_VER_LT(ldcp
, 1, 2) &&
1784 (lane_in
->xfer_mode
==
1785 VIO_DRING_MODE_V1_0
))) {
1786 if (!(lane_in
->lstate
& VSW_DRING_ACK_SENT
))
1790 D2(vswp
, "%s: (chan %lld) leaving milestone 2",
1791 __func__
, ldcp
->ldc_id
);
1793 ldcp
->hphase
= VSW_MILESTONE3
;
1797 case VSW_MILESTONE3
:
1799 * Pass this milestone when all paramaters have been
1800 * successfully exchanged and RDX sent in both directions.
1802 * Mark the relevant lane as available to transmit data. In
1803 * RxDringData mode, lane_in is associated with transmit and
1804 * lane_out is associated with receive. It is the reverse in
1807 if ((lane_out
->lstate
& VSW_RDX_ACK_SENT
) &&
1808 (lane_in
->lstate
& VSW_RDX_ACK_RECV
)) {
1810 D2(vswp
, "%s: (chan %lld) leaving milestone 3",
1811 __func__
, ldcp
->ldc_id
);
1812 D2(vswp
, "%s: ** handshake complete (0x%llx : "
1813 "0x%llx) **", __func__
, lane_in
->lstate
,
1815 if (lane_out
->dring_mode
== VIO_RX_DRING_DATA
) {
1816 lane_in
->lstate
|= VSW_LANE_ACTIVE
;
1818 lane_out
->lstate
|= VSW_LANE_ACTIVE
;
1820 ldcp
->hphase
= VSW_MILESTONE4
;
1823 /* Start HIO if enabled and capable */
1824 if ((portp
->p_hio_enabled
) && (portp
->p_hio_capable
)) {
1825 D2(vswp
, "%s: start HybridIO setup", __func__
);
1826 vsw_hio_start(vswp
, ldcp
);
1829 if (ldcp
->pls_negotiated
== B_TRUE
) {
1831 * The vnet device has negotiated to get phys
1832 * link updates. Now that the handshake with
1833 * the vnet device is complete, send an initial
1834 * update with the current physical link state.
1836 vsw_send_physlink_msg(ldcp
,
1837 vswp
->phys_link_state
);
1841 D2(vswp
, "%s: still in milestone 3 (0x%llx : 0x%llx)",
1842 __func__
, lane_in
->lstate
,
1847 case VSW_MILESTONE4
:
1848 D2(vswp
, "%s: (chan %lld) in milestone 4", __func__
,
1853 DERR(vswp
, "%s: (chan %lld) Unknown Phase %x", __func__
,
1854 ldcp
->ldc_id
, ldcp
->hphase
);
1857 D1(vswp
, "%s (chan %lld): exit (phase %ld)", __func__
, ldcp
->ldc_id
,
1862 * Check if major version is supported.
1864 * Returns 0 if finds supported major number, and if necessary
1865 * adjusts the minor field.
1867 * Returns 1 if can't match major number exactly. Sets mjor/minor
1868 * to next lowest support values, or to zero if no other values possible.
1871 vsw_supported_version(vio_ver_msg_t
*vp
)
1875 D1(NULL
, "vsw_supported_version: enter");
1877 for (i
= 0; i
< VSW_NUM_VER
; i
++) {
1878 if (vsw_versions
[i
].ver_major
== vp
->ver_major
) {
1880 * Matching or lower major version found. Update
1881 * minor number if necessary.
1883 if (vp
->ver_minor
> vsw_versions
[i
].ver_minor
) {
1884 D2(NULL
, "%s: adjusting minor value from %d "
1885 "to %d", __func__
, vp
->ver_minor
,
1886 vsw_versions
[i
].ver_minor
);
1887 vp
->ver_minor
= vsw_versions
[i
].ver_minor
;
1894 * If the message contains a higher major version number, set
1895 * the message's major/minor versions to the current values
1896 * and return false, so this message will get resent with
1899 if (vsw_versions
[i
].ver_major
< vp
->ver_major
) {
1900 D2(NULL
, "%s: adjusting major and minor "
1901 "values to %d, %d\n",
1902 __func__
, vsw_versions
[i
].ver_major
,
1903 vsw_versions
[i
].ver_minor
);
1904 vp
->ver_major
= vsw_versions
[i
].ver_major
;
1905 vp
->ver_minor
= vsw_versions
[i
].ver_minor
;
1910 /* No match was possible, zero out fields */
1914 D1(NULL
, "vsw_supported_version: exit");
1920 * Set vnet-protocol-version dependent functions based on version.
1923 vsw_set_vnet_proto_ops(vsw_ldc_t
*ldcp
)
1925 vsw_t
*vswp
= ldcp
->ldc_vswp
;
1926 lane_t
*lp
= &ldcp
->lane_out
;
1929 * Setup the appropriate dring data processing routine and any
1930 * associated thread based on the version.
1932 * In versions < 1.6, we support only TxDring mode. In this mode, the
1933 * msg worker thread processes all types of VIO msgs (ctrl and data).
1935 * In versions >= 1.6, we also support RxDringData mode. In this mode,
1936 * the rcv worker thread processes dring data messages (msgtype:
1937 * VIO_TYPE_DATA, subtype: VIO_SUBTYPE_INFO, env: VIO_DRING_DATA). The
1938 * rest of the data messages (including acks) and ctrl messages are
1939 * handled directly by the callback (intr) thread.
1941 * However, for versions >= 1.6, we could still fallback to TxDring
1942 * mode. This could happen if RxDringData mode has been disabled (see
1943 * below) on this guest or on the peer guest. This info is determined
1944 * as part of attr exchange phase of handshake. Hence, we setup these
1945 * pointers for v1.6 after attr msg phase completes during handshake.
1947 if (VSW_VER_GTEQ(ldcp
, 1, 6)) {
1949 * Set data dring mode for vsw_send_attr(). We setup msg worker
1950 * thread in TxDring mode or rcv worker thread in RxDringData
1951 * mode when attr phase of handshake completes.
1953 if (vsw_mapin_avail(ldcp
) == B_TRUE
) {
1954 lp
->dring_mode
= (VIO_RX_DRING_DATA
| VIO_TX_DRING
);
1956 lp
->dring_mode
= VIO_TX_DRING
;
1959 lp
->dring_mode
= VIO_TX_DRING
;
1963 * Setup the MTU for attribute negotiation based on the version.
1965 if (VSW_VER_GTEQ(ldcp
, 1, 4)) {
1967 * If the version negotiated with peer is >= 1.4(Jumbo Frame
1968 * Support), set the mtu in our attributes to max_frame_size.
1970 lp
->mtu
= vswp
->max_frame_size
;
1971 } else if (VSW_VER_EQ(ldcp
, 1, 3)) {
1973 * If the version negotiated with peer is == 1.3 (Vlan Tag
1974 * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ.
1976 lp
->mtu
= ETHERMAX
+ VLAN_TAGSZ
;
1978 vsw_port_t
*portp
= ldcp
->ldc_port
;
1980 * Pre-1.3 peers expect max frame size of ETHERMAX.
1981 * We can negotiate that size with those peers provided only
1982 * pvid is defined for our peer and there are no vids. Then we
1983 * can send/recv only untagged frames of max size ETHERMAX.
1984 * Note that pvid of the peer can be different, as vsw has to
1985 * serve the vnet in that vlan even if itself is not assigned
1988 if (portp
->nvids
== 0) {
1994 * Setup version dependent data processing functions.
1996 if (VSW_VER_GTEQ(ldcp
, 1, 2)) {
1997 /* Versions >= 1.2 */
1999 if (VSW_PRI_ETH_DEFINED(vswp
)) {
2001 * enable priority routines and pkt mode only if
2002 * at least one pri-eth-type is specified in MD.
2004 ldcp
->tx
= vsw_ldctx_pri
;
2005 ldcp
->rx_pktdata
= vsw_process_pkt_data
;
2007 /* set xfer mode for vsw_send_attr() */
2008 lp
->xfer_mode
= VIO_PKT_MODE
| VIO_DRING_MODE_V1_2
;
2010 /* no priority eth types defined in MD */
2012 ldcp
->tx
= vsw_ldctx
;
2013 ldcp
->rx_pktdata
= vsw_process_pkt_data_nop
;
2015 /* set xfer mode for vsw_send_attr() */
2016 lp
->xfer_mode
= VIO_DRING_MODE_V1_2
;
2020 /* Versions prior to 1.2 */
2022 vsw_reset_vnet_proto_ops(ldcp
);
2027 * Reset vnet-protocol-version dependent functions to v1.0.
2030 vsw_reset_vnet_proto_ops(vsw_ldc_t
*ldcp
)
2032 lane_t
*lp
= &ldcp
->lane_out
;
2034 ldcp
->tx
= vsw_ldctx
;
2035 ldcp
->rx_pktdata
= vsw_process_pkt_data_nop
;
2037 /* set xfer mode for vsw_send_attr() */
2038 lp
->xfer_mode
= VIO_DRING_MODE_V1_0
;
2042 vsw_process_evt_read(vsw_ldc_t
*ldcp
)
2044 if (ldcp
->msg_thread
!= NULL
) {
2046 * TxDring mode; wakeup message worker
2047 * thread to process the VIO messages.
2049 mutex_exit(&ldcp
->ldc_cblock
);
2050 mutex_enter(&ldcp
->msg_thr_lock
);
2051 if (!(ldcp
->msg_thr_flags
& VSW_WTHR_DATARCVD
)) {
2052 ldcp
->msg_thr_flags
|= VSW_WTHR_DATARCVD
;
2053 cv_signal(&ldcp
->msg_thr_cv
);
2055 mutex_exit(&ldcp
->msg_thr_lock
);
2056 mutex_enter(&ldcp
->ldc_cblock
);
2059 * We invoke vsw_process_pkt() in the context of the LDC
2060 * callback (vsw_ldc_cb()) during handshake, until the dring
2061 * mode is negotiated. After the dring mode is negotiated, the
2062 * msgs are processed by the msg worker thread (above case) if
2063 * the dring mode is TxDring. Otherwise (in RxDringData mode)
2064 * we continue to process the msgs directly in the callback
2067 vsw_process_pkt(ldcp
);
2072 * Main routine for processing messages received over LDC.
2075 vsw_process_pkt(void *arg
)
2077 vsw_ldc_t
*ldcp
= (vsw_ldc_t
*)arg
;
2078 vsw_t
*vswp
= ldcp
->ldc_vswp
;
2080 vio_msg_tag_t
*tagp
;
2085 D1(vswp
, "%s enter: ldcid (%lld)\n", __func__
, ldcp
->ldc_id
);
2087 ASSERT(MUTEX_HELD(&ldcp
->ldc_cblock
));
2089 ldcmsg
= ldcp
->ldcmsg
;
2091 * If channel is up read messages until channel is empty.
2094 msglen
= ldcp
->msglen
;
2095 rv
= ldc_read(ldcp
->ldc_handle
, (caddr_t
)ldcmsg
, &msglen
);
2098 DERR(vswp
, "%s :ldc_read err id(%lld) rv(%d) len(%d)\n",
2099 __func__
, ldcp
->ldc_id
, rv
, msglen
);
2102 /* channel has been reset */
2103 if (rv
== ECONNRESET
) {
2104 vsw_process_conn_evt(ldcp
, VSW_CONN_RESET
);
2109 D2(vswp
, "%s: ldc_read id(%lld) NODATA", __func__
,
2114 D2(vswp
, "%s: ldc_read id(%lld): msglen(%d)", __func__
,
2115 ldcp
->ldc_id
, msglen
);
2118 * Figure out what sort of packet we have gotten by
2119 * examining the msg tag, and then switch it appropriately.
2121 tagp
= (vio_msg_tag_t
*)ldcmsg
;
2123 switch (tagp
->vio_msgtype
) {
2125 vsw_dispatch_ctrl_task(ldcp
, ldcmsg
, tagp
, msglen
);
2128 vsw_process_data_pkt(ldcp
, ldcmsg
, tagp
, msglen
);
2131 vsw_process_err_pkt(ldcp
, ldcmsg
, tagp
);
2134 DERR(vswp
, "%s: Unknown tag(%lx) ", __func__
,
2135 "id(%lx)\n", tagp
->vio_msgtype
, ldcp
->ldc_id
);
2140 D1(vswp
, "%s exit: ldcid (%lld)\n", __func__
, ldcp
->ldc_id
);
2144 * Dispatch a task to process a VIO control message.
2147 vsw_dispatch_ctrl_task(vsw_ldc_t
*ldcp
, void *cpkt
, vio_msg_tag_t
*tagp
,
2150 vsw_ctrl_task_t
*ctaskp
= NULL
;
2151 vsw_port_t
*port
= ldcp
->ldc_port
;
2152 vsw_t
*vswp
= port
->p_vswp
;
2154 D1(vswp
, "%s: enter", __func__
);
2157 * We need to handle RDX ACK messages in-band as once they
2158 * are exchanged it is possible that we will get an
2159 * immediate (legitimate) data packet.
2161 if ((tagp
->vio_subtype_env
== VIO_RDX
) &&
2162 (tagp
->vio_subtype
== VIO_SUBTYPE_ACK
)) {
2164 if (vsw_check_flag(ldcp
, INBOUND
, VSW_RDX_ACK_RECV
))
2167 ldcp
->lane_in
.lstate
|= VSW_RDX_ACK_RECV
;
2168 D2(vswp
, "%s (%ld) handling RDX_ACK in place "
2169 "(ostate 0x%llx : hphase %d)", __func__
,
2170 ldcp
->ldc_id
, ldcp
->lane_in
.lstate
, ldcp
->hphase
);
2171 vsw_next_milestone(ldcp
);
2175 ctaskp
= kmem_alloc(sizeof (vsw_ctrl_task_t
), KM_NOSLEEP
);
2177 if (ctaskp
== NULL
) {
2178 DERR(vswp
, "%s: unable to alloc space for ctrl msg", __func__
);
2179 vsw_process_conn_evt(ldcp
, VSW_CONN_RESTART
);
2183 ctaskp
->ldcp
= ldcp
;
2184 bcopy((def_msg_t
*)cpkt
, &ctaskp
->pktp
, msglen
);
2185 ctaskp
->hss_id
= ldcp
->hss_id
;
2188 * Dispatch task to processing taskq if port is not in
2189 * the process of being detached.
2191 mutex_enter(&port
->state_lock
);
2192 if (port
->state
== VSW_PORT_INIT
) {
2193 if ((vswp
->taskq_p
== NULL
) ||
2194 (ddi_taskq_dispatch(vswp
->taskq_p
, vsw_process_ctrl_pkt
,
2195 ctaskp
, DDI_NOSLEEP
) != DDI_SUCCESS
)) {
2196 mutex_exit(&port
->state_lock
);
2197 DERR(vswp
, "%s: unable to dispatch task to taskq",
2199 vsw_process_conn_evt(ldcp
, VSW_CONN_RESTART
);
2200 kmem_free(ctaskp
, sizeof (vsw_ctrl_task_t
));
2204 kmem_free(ctaskp
, sizeof (vsw_ctrl_task_t
));
2205 DWARN(vswp
, "%s: port %d detaching, not dispatching "
2206 "task", __func__
, port
->p_instance
);
2209 mutex_exit(&port
->state_lock
);
2211 D2(vswp
, "%s: dispatched task to taskq for chan %d", __func__
,
2213 D1(vswp
, "%s: exit", __func__
);
2217 * Process a VIO ctrl message. Invoked from taskq.
2220 vsw_process_ctrl_pkt(void *arg
)
2222 vsw_ctrl_task_t
*ctaskp
= (vsw_ctrl_task_t
*)arg
;
2223 vsw_ldc_t
*ldcp
= ctaskp
->ldcp
;
2224 vsw_t
*vswp
= ldcp
->ldc_vswp
;
2228 D1(vswp
, "%s(%lld): enter", __func__
, ldcp
->ldc_id
);
2230 bcopy(&ctaskp
->pktp
, &tag
, sizeof (vio_msg_tag_t
));
2231 env
= tag
.vio_subtype_env
;
2233 /* stale pkt check */
2234 if (ctaskp
->hss_id
< ldcp
->hss_id
) {
2235 DWARN(vswp
, "%s: discarding stale packet belonging to earlier"
2236 " (%ld) handshake session", __func__
, ctaskp
->hss_id
);
2237 kmem_free(ctaskp
, sizeof (vsw_ctrl_task_t
));
2241 /* session id check */
2242 if (ldcp
->session_status
& VSW_PEER_SESSION
) {
2243 if (ldcp
->peer_session
!= tag
.vio_sid
) {
2244 DERR(vswp
, "%s (chan %d): invalid session id (%llx)",
2245 __func__
, ldcp
->ldc_id
, tag
.vio_sid
);
2246 kmem_free(ctaskp
, sizeof (vsw_ctrl_task_t
));
2247 vsw_process_conn_evt(ldcp
, VSW_CONN_RESTART
);
2253 * Switch on vio_subtype envelope, then let lower routines
2254 * decide if its an INFO, ACK or NACK packet.
2258 vsw_process_ctrl_ver_pkt(ldcp
, &ctaskp
->pktp
);
2261 vsw_process_ctrl_dring_reg_pkt(ldcp
, &ctaskp
->pktp
);
2263 case VIO_DRING_UNREG
:
2264 vsw_process_ctrl_dring_unreg_pkt(ldcp
, &ctaskp
->pktp
);
2267 vsw_process_ctrl_attr_pkt(ldcp
, &ctaskp
->pktp
);
2269 case VNET_MCAST_INFO
:
2270 vsw_process_ctrl_mcst_pkt(ldcp
, &ctaskp
->pktp
);
2273 vsw_process_ctrl_rdx_pkt(ldcp
, &ctaskp
->pktp
);
2276 vsw_process_dds_msg(vswp
, ldcp
, &ctaskp
->pktp
);
2279 case VNET_PHYSLINK_INFO
:
2280 vsw_process_physlink_msg(ldcp
, &ctaskp
->pktp
);
2283 DERR(vswp
, "%s: unknown vio_subtype_env (%x)\n", __func__
, env
);
2286 kmem_free(ctaskp
, sizeof (vsw_ctrl_task_t
));
2287 D1(vswp
, "%s(%lld): exit", __func__
, ldcp
->ldc_id
);
2291 * Version negotiation. We can end up here either because our peer
2292 * has responded to a handshake message we have sent it, or our peer
2293 * has initiated a handshake with us. If its the former then can only
2294 * be ACK or NACK, if its the later can only be INFO.
2296 * If its an ACK we move to the next stage of the handshake, namely
2297 * attribute exchange. If its a NACK we see if we can specify another
2298 * version, if we can't we stop.
2300 * If it is an INFO we reset all params associated with communication
2301 * in that direction over this channel (remember connection is
2302 * essentially 2 independent simplex channels).
2305 vsw_process_ctrl_ver_pkt(vsw_ldc_t
*ldcp
, void *pkt
)
2307 vio_ver_msg_t
*ver_pkt
;
2308 vsw_t
*vswp
= ldcp
->ldc_vswp
;
2310 D1(vswp
, "%s(%lld): enter", __func__
, ldcp
->ldc_id
);
2313 * We know this is a ctrl/version packet so
2314 * cast it into the correct structure.
2316 ver_pkt
= (vio_ver_msg_t
*)pkt
;
2318 switch (ver_pkt
->tag
.vio_subtype
) {
2319 case VIO_SUBTYPE_INFO
:
2320 D2(vswp
, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n");
2323 * Record the session id, which we will use from now
2324 * until we see another VER_INFO msg. Even then the
2325 * session id in most cases will be unchanged, execpt
2326 * if channel was reset.
2328 if ((ldcp
->session_status
& VSW_PEER_SESSION
) &&
2329 (ldcp
->peer_session
!= ver_pkt
->tag
.vio_sid
)) {
2330 DERR(vswp
, "%s: updating session id for chan %lld "
2331 "from %llx to %llx", __func__
, ldcp
->ldc_id
,
2332 ldcp
->peer_session
, ver_pkt
->tag
.vio_sid
);
2335 ldcp
->peer_session
= ver_pkt
->tag
.vio_sid
;
2336 ldcp
->session_status
|= VSW_PEER_SESSION
;
2338 /* Legal message at this time ? */
2339 if (vsw_check_flag(ldcp
, INBOUND
, VSW_VER_INFO_RECV
))
2343 * First check the device class. Currently only expect
2344 * to be talking to a network device. In the future may
2345 * also talk to another switch.
2347 if (ver_pkt
->dev_class
!= VDEV_NETWORK
) {
2348 DERR(vswp
, "%s: illegal device class %d", __func__
,
2349 ver_pkt
->dev_class
);
2351 ver_pkt
->tag
.vio_sid
= ldcp
->local_session
;
2352 ver_pkt
->tag
.vio_subtype
= VIO_SUBTYPE_NACK
;
2354 DUMP_TAG_PTR((vio_msg_tag_t
*)ver_pkt
);
2356 (void) vsw_send_msg(ldcp
, (void *)ver_pkt
,
2357 sizeof (vio_ver_msg_t
), B_TRUE
);
2359 ldcp
->lane_in
.lstate
|= VSW_VER_NACK_SENT
;
2360 vsw_next_milestone(ldcp
);
2363 ldcp
->dev_class
= ver_pkt
->dev_class
;
2367 * Now check the version.
2369 if (vsw_supported_version(ver_pkt
) == 0) {
2371 * Support this major version and possibly
2372 * adjusted minor version.
2375 D2(vswp
, "%s: accepted ver %d:%d", __func__
,
2376 ver_pkt
->ver_major
, ver_pkt
->ver_minor
);
2378 /* Store accepted values */
2379 ldcp
->lane_in
.ver_major
= ver_pkt
->ver_major
;
2380 ldcp
->lane_in
.ver_minor
= ver_pkt
->ver_minor
;
2382 ver_pkt
->tag
.vio_subtype
= VIO_SUBTYPE_ACK
;
2384 ldcp
->lane_in
.lstate
|= VSW_VER_ACK_SENT
;
2386 if (vsw_obp_ver_proto_workaround
== B_TRUE
) {
2388 * Send a version info message
2389 * using the accepted version that
2390 * we are about to ack. Also note that
2391 * we send our ver info before we ack.
2392 * Otherwise, as soon as receiving the
2393 * ack, obp sends attr info msg, which
2394 * breaks vsw_check_flag() invoked
2395 * from vsw_process_ctrl_attr_pkt();
2396 * as we also need VSW_VER_ACK_RECV to
2397 * be set in lane_out.lstate, before
2398 * we can receive attr info.
2404 * NACK back with the next lower major/minor
2405 * pairing we support (if don't suuport any more
2406 * versions then they will be set to zero.
2409 D2(vswp
, "%s: replying with ver %d:%d", __func__
,
2410 ver_pkt
->ver_major
, ver_pkt
->ver_minor
);
2412 /* Store updated values */
2413 ldcp
->lane_in
.ver_major
= ver_pkt
->ver_major
;
2414 ldcp
->lane_in
.ver_minor
= ver_pkt
->ver_minor
;
2416 ver_pkt
->tag
.vio_subtype
= VIO_SUBTYPE_NACK
;
2418 ldcp
->lane_in
.lstate
|= VSW_VER_NACK_SENT
;
2421 DUMP_TAG_PTR((vio_msg_tag_t
*)ver_pkt
);
2422 ver_pkt
->tag
.vio_sid
= ldcp
->local_session
;
2423 (void) vsw_send_msg(ldcp
, (void *)ver_pkt
,
2424 sizeof (vio_ver_msg_t
), B_TRUE
);
2426 vsw_next_milestone(ldcp
);
2429 case VIO_SUBTYPE_ACK
:
2430 D2(vswp
, "%s: VIO_SUBTYPE_ACK\n", __func__
);
2432 if (vsw_check_flag(ldcp
, OUTBOUND
, VSW_VER_ACK_RECV
))
2435 /* Store updated values */
2436 ldcp
->lane_out
.ver_major
= ver_pkt
->ver_major
;
2437 ldcp
->lane_out
.ver_minor
= ver_pkt
->ver_minor
;
2439 ldcp
->lane_out
.lstate
|= VSW_VER_ACK_RECV
;
2440 vsw_next_milestone(ldcp
);
2444 case VIO_SUBTYPE_NACK
:
2445 D2(vswp
, "%s: VIO_SUBTYPE_NACK\n", __func__
);
2447 if (vsw_check_flag(ldcp
, OUTBOUND
, VSW_VER_NACK_RECV
))
2451 * If our peer sent us a NACK with the ver fields set to
2452 * zero then there is nothing more we can do. Otherwise see
2453 * if we support either the version suggested, or a lesser
2456 if ((ver_pkt
->ver_major
== 0) && (ver_pkt
->ver_minor
== 0)) {
2457 DERR(vswp
, "%s: peer unable to negotiate any "
2458 "further.", __func__
);
2459 ldcp
->lane_out
.lstate
|= VSW_VER_NACK_RECV
;
2460 vsw_next_milestone(ldcp
);
2465 * Check to see if we support this major version or
2466 * a lower one. If we don't then maj/min will be set
2469 (void) vsw_supported_version(ver_pkt
);
2470 if ((ver_pkt
->ver_major
== 0) && (ver_pkt
->ver_minor
== 0)) {
2471 /* Nothing more we can do */
2472 DERR(vswp
, "%s: version negotiation failed.\n",
2474 ldcp
->lane_out
.lstate
|= VSW_VER_NACK_RECV
;
2475 vsw_next_milestone(ldcp
);
2477 /* found a supported major version */
2478 ldcp
->lane_out
.ver_major
= ver_pkt
->ver_major
;
2479 ldcp
->lane_out
.ver_minor
= ver_pkt
->ver_minor
;
2481 D2(vswp
, "%s: resending with updated values (%x, %x)",
2482 __func__
, ver_pkt
->ver_major
, ver_pkt
->ver_minor
);
2484 ldcp
->lane_out
.lstate
|= VSW_VER_INFO_SENT
;
2485 ver_pkt
->tag
.vio_sid
= ldcp
->local_session
;
2486 ver_pkt
->tag
.vio_subtype
= VIO_SUBTYPE_INFO
;
2488 DUMP_TAG_PTR((vio_msg_tag_t
*)ver_pkt
);
2490 (void) vsw_send_msg(ldcp
, (void *)ver_pkt
,
2491 sizeof (vio_ver_msg_t
), B_TRUE
);
2493 vsw_next_milestone(ldcp
);
2499 DERR(vswp
, "%s: unknown vio_subtype %x\n", __func__
,
2500 ver_pkt
->tag
.vio_subtype
);
2503 D1(vswp
, "%s(%lld): exit\n", __func__
, ldcp
->ldc_id
);
2507 vsw_process_attr_info(vsw_ldc_t
*ldcp
, vnet_attr_msg_t
*msg
)
2509 vsw_t
*vswp
= ldcp
->ldc_vswp
;
2510 vsw_port_t
*port
= ldcp
->ldc_port
;
2511 struct ether_addr ea
;
2512 uint64_t macaddr
= 0;
2513 lane_t
*lane_out
= &ldcp
->lane_out
;
2514 lane_t
*lane_in
= &ldcp
->lane_in
;
2519 D2(vswp
, "%s: VIO_SUBTYPE_INFO", __func__
);
2521 if (vsw_check_flag(ldcp
, INBOUND
, VSW_ATTR_INFO_RECV
)) {
2525 if ((msg
->xfer_mode
!= VIO_DESC_MODE
) &&
2526 (msg
->xfer_mode
!= lane_out
->xfer_mode
)) {
2527 D2(NULL
, "%s: unknown mode %x\n", __func__
, msg
->xfer_mode
);
2531 /* Only support MAC addresses at moment. */
2532 if ((msg
->addr_type
!= ADDR_TYPE_MAC
) || (msg
->addr
== 0)) {
2533 D2(NULL
, "%s: invalid addr_type %x, or address 0x%llx\n",
2534 __func__
, msg
->addr_type
, msg
->addr
);
2539 * MAC address supplied by device should match that stored
2540 * in the vsw-port OBP node. Need to decide what to do if they
2541 * don't match, for the moment just warn but don't fail.
2543 vnet_macaddr_ultostr(msg
->addr
, ea
.ether_addr_octet
);
2544 if (ether_cmp(&ea
, &port
->p_macaddr
) != 0) {
2545 DERR(NULL
, "%s: device supplied address "
2546 "0x%llx doesn't match node address 0x%llx\n",
2547 __func__
, msg
->addr
, port
->p_macaddr
);
2551 * Ack freq only makes sense in pkt mode, in shared
2552 * mode the ring descriptors say whether or not to
2555 if ((VSW_VER_GTEQ(ldcp
, 1, 2) &&
2556 (msg
->xfer_mode
& VIO_DRING_MODE_V1_2
)) ||
2557 (VSW_VER_LT(ldcp
, 1, 2) &&
2558 (msg
->xfer_mode
== VIO_DRING_MODE_V1_0
))) {
2559 if (msg
->ack_freq
> 0) {
2560 D2(NULL
, "%s: non zero ack freq in SHM mode\n",
2567 * Process dring mode attribute.
2569 if (VSW_VER_GTEQ(ldcp
, 1, 6)) {
2572 * Though we are operating in v1.6 mode, it is possible that
2573 * RxDringData mode has been disabled either on this guest or
2574 * on the peer guest. If so, we revert to pre v1.6 behavior of
2575 * TxDring mode. But this must be agreed upon in both
2576 * directions of attr exchange. We first determine the mode
2577 * that can be negotiated.
2579 if ((msg
->options
& VIO_RX_DRING_DATA
) != 0 &&
2580 vsw_mapin_avail(ldcp
) == B_TRUE
) {
2582 * The peer is capable of handling RxDringData AND we
2583 * are also capable of it; we enable RxDringData mode
2586 dring_mode
= VIO_RX_DRING_DATA
;
2587 } else if ((msg
->options
& VIO_TX_DRING
) != 0) {
2589 * If the peer is capable of TxDring mode, we
2590 * negotiate TxDring mode on this channel.
2592 dring_mode
= VIO_TX_DRING
;
2595 * We support only VIO_TX_DRING and VIO_RX_DRING_DATA
2596 * modes. We don't support VIO_RX_DRING mode.
2602 * If we have received an ack for the attr info that we sent,
2603 * then check if the dring mode matches what the peer had ack'd
2604 * (saved in lane_out). If they don't match, we fail the
2607 if (lane_out
->lstate
& VSW_ATTR_ACK_RECV
) {
2608 if (msg
->options
!= lane_out
->dring_mode
) {
2614 * Save the negotiated dring mode in our attr
2615 * parameters, so it gets sent in the attr info from us
2618 lane_out
->dring_mode
= dring_mode
;
2621 /* save the negotiated dring mode in the msg to be replied */
2622 msg
->options
= dring_mode
;
2626 * Process MTU attribute.
2628 if (VSW_VER_GTEQ(ldcp
, 1, 4)) {
2631 * Validate mtu of the peer is at least ETHERMAX. Then, the mtu
2632 * is negotiated down to the minimum of our mtu and peer's mtu.
2634 if (msg
->mtu
< ETHERMAX
) {
2638 mtu
= MIN(msg
->mtu
, vswp
->max_frame_size
);
2641 * If we have received an ack for the attr info
2642 * that we sent, then check if the mtu computed
2643 * above matches the mtu that the peer had ack'd
2644 * (saved in local hparams). If they don't
2645 * match, we fail the handshake.
2647 if (lane_out
->lstate
& VSW_ATTR_ACK_RECV
) {
2648 if (mtu
!= lane_out
->mtu
) {
2654 * Save the mtu computed above in our
2655 * attr parameters, so it gets sent in
2656 * the attr info from us to the peer.
2658 lane_out
->mtu
= mtu
;
2661 /* save the MIN mtu in the msg to be replied */
2664 /* Versions < 1.4, mtu must match */
2665 if (msg
->mtu
!= lane_out
->mtu
) {
2666 D2(NULL
, "%s: invalid MTU (0x%llx)\n",
2667 __func__
, msg
->mtu
);
2673 * Otherwise store attributes for this lane and update
2676 lane_in
->mtu
= msg
->mtu
;
2677 lane_in
->addr
= msg
->addr
;
2678 lane_in
->addr_type
= msg
->addr_type
;
2679 lane_in
->xfer_mode
= msg
->xfer_mode
;
2680 lane_in
->ack_freq
= msg
->ack_freq
;
2681 lane_in
->physlink_update
= msg
->physlink_update
;
2682 lane_in
->dring_mode
= msg
->options
;
2685 * Check if the client has requested physlink state updates.
2686 * If there is a physical device bound to this vswitch (L2
2687 * mode), set the ack bits to indicate it is supported.
2688 * Otherwise, set the nack bits.
2690 if (VSW_VER_GTEQ(ldcp
, 1, 5)) { /* Protocol ver >= 1.5 */
2692 /* Does the vnet need phys link state updates ? */
2693 if ((lane_in
->physlink_update
&
2694 PHYSLINK_UPDATE_STATE_MASK
) ==
2695 PHYSLINK_UPDATE_STATE
) {
2697 if (vswp
->smode
& VSW_LAYER2
) {
2698 /* is a net-dev assigned to us ? */
2699 msg
->physlink_update
=
2700 PHYSLINK_UPDATE_STATE_ACK
;
2701 ldcp
->pls_negotiated
= B_TRUE
;
2703 /* not in L2 mode */
2704 msg
->physlink_update
=
2705 PHYSLINK_UPDATE_STATE_NACK
;
2706 ldcp
->pls_negotiated
= B_FALSE
;
2710 msg
->physlink_update
=
2711 PHYSLINK_UPDATE_NONE
;
2712 ldcp
->pls_negotiated
= B_FALSE
;
2717 * physlink_update bits are ignored
2718 * if set by clients < v1.5 protocol.
2720 msg
->physlink_update
= PHYSLINK_UPDATE_NONE
;
2721 ldcp
->pls_negotiated
= B_FALSE
;
2724 macaddr
= lane_in
->addr
;
2725 for (i
= ETHERADDRL
- 1; i
>= 0; i
--) {
2726 port
->p_macaddr
.ether_addr_octet
[i
] = macaddr
& 0xFF;
2731 * Setup device specific xmit routines. Note this could be changed
2732 * further in vsw_send_dring_info() for versions >= 1.6 if operating in
2735 mutex_enter(&port
->tx_lock
);
2737 if ((VSW_VER_GTEQ(ldcp
, 1, 2) &&
2738 (lane_in
->xfer_mode
& VIO_DRING_MODE_V1_2
)) ||
2739 (VSW_VER_LT(ldcp
, 1, 2) &&
2740 (lane_in
->xfer_mode
== VIO_DRING_MODE_V1_0
))) {
2741 D2(vswp
, "%s: mode = VIO_DRING_MODE", __func__
);
2742 port
->transmit
= vsw_dringsend
;
2743 } else if (lane_in
->xfer_mode
== VIO_DESC_MODE
) {
2744 D2(vswp
, "%s: mode = VIO_DESC_MODE", __func__
);
2745 vsw_create_privring(ldcp
);
2746 port
->transmit
= vsw_descrsend
;
2747 lane_out
->xfer_mode
= VIO_DESC_MODE
;
2751 * HybridIO is supported only vnet, not by OBP.
2752 * So, set hio_capable to true only when in DRING mode.
2754 if (VSW_VER_GTEQ(ldcp
, 1, 3) &&
2755 (lane_in
->xfer_mode
!= VIO_DESC_MODE
)) {
2756 (void) atomic_swap_32(&port
->p_hio_capable
, B_TRUE
);
2758 (void) atomic_swap_32(&port
->p_hio_capable
, B_FALSE
);
2761 mutex_exit(&port
->tx_lock
);
2767 vsw_process_attr_ack(vsw_ldc_t
*ldcp
, vnet_attr_msg_t
*msg
)
2769 vsw_t
*vswp
= ldcp
->ldc_vswp
;
2770 lane_t
*lane_out
= &ldcp
->lane_out
;
2771 lane_t
*lane_in
= &ldcp
->lane_in
;
2773 D2(vswp
, "%s: VIO_SUBTYPE_ACK", __func__
);
2775 if (vsw_check_flag(ldcp
, OUTBOUND
, VSW_ATTR_ACK_RECV
)) {
2780 * Process dring mode attribute.
2782 if (VSW_VER_GTEQ(ldcp
, 1, 6)) {
2785 * The ack msg sent by the peer contains the negotiated dring
2786 * mode between our capability (that we had sent in our attr
2787 * info) and the peer's capability.
2789 if (lane_in
->lstate
& VSW_ATTR_ACK_SENT
) {
2791 * If we have sent an ack for the attr info msg from
2792 * the peer, check if the dring mode that was
2793 * negotiated then (saved in lane_out) matches the
2794 * mode that the peer has ack'd. If they don't match,
2795 * we fail the handshake.
2797 if (lane_out
->dring_mode
!= msg
->options
) {
2801 if ((msg
->options
& lane_out
->dring_mode
) == 0) {
2803 * Peer ack'd with a mode that we don't
2804 * support; we fail the handshake.
2808 if ((msg
->options
& (VIO_TX_DRING
|VIO_RX_DRING_DATA
))
2809 == (VIO_TX_DRING
|VIO_RX_DRING_DATA
)) {
2811 * Peer must ack with only one negotiated mode.
2812 * Otherwise fail handshake.
2818 * Save the negotiated mode, so we can validate it when
2819 * we receive attr info from the peer.
2821 lane_out
->dring_mode
= msg
->options
;
2826 * Process MTU attribute.
2828 if (VSW_VER_GTEQ(ldcp
, 1, 4)) {
2831 * The ack msg sent by the peer contains the minimum of
2832 * our mtu (that we had sent in our attr info) and the
2835 * If we have sent an ack for the attr info msg from
2836 * the peer, check if the mtu that was computed then
2837 * (saved in lane_out params) matches the mtu that the
2838 * peer has ack'd. If they don't match, we fail the
2841 if (lane_in
->lstate
& VSW_ATTR_ACK_SENT
) {
2842 if (lane_out
->mtu
!= msg
->mtu
) {
2847 * If the mtu ack'd by the peer is > our mtu
2848 * fail handshake. Otherwise, save the mtu, so
2849 * we can validate it when we receive attr info
2852 if (msg
->mtu
<= lane_out
->mtu
) {
2853 lane_out
->mtu
= msg
->mtu
;
2864 * Process an attribute packet. We can end up here either because our peer
2865 * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our
2866 * peer has sent us an attribute INFO message
2868 * If its an ACK we then move to the next stage of the handshake which
2869 * is to send our descriptor ring info to our peer. If its a NACK then
2870 * there is nothing more we can (currently) do.
2872 * If we get a valid/acceptable INFO packet (and we have already negotiated
2873 * a version) we ACK back and set channel state to ATTR_RECV, otherwise we
2874 * NACK back and reset channel state to INACTIV.
2876 * FUTURE: in time we will probably negotiate over attributes, but for
2877 * the moment unacceptable attributes are regarded as a fatal error.
2881 vsw_process_ctrl_attr_pkt(vsw_ldc_t
*ldcp
, void *pkt
)
2883 vnet_attr_msg_t
*attr_pkt
;
2884 vsw_t
*vswp
= ldcp
->ldc_vswp
;
2885 lane_t
*lane_out
= &ldcp
->lane_out
;
2886 lane_t
*lane_in
= &ldcp
->lane_in
;
2889 D1(vswp
, "%s(%lld) enter", __func__
, ldcp
->ldc_id
);
2892 * We know this is a ctrl/attr packet so
2893 * cast it into the correct structure.
2895 attr_pkt
= (vnet_attr_msg_t
*)pkt
;
2897 switch (attr_pkt
->tag
.vio_subtype
) {
2898 case VIO_SUBTYPE_INFO
:
2900 rv
= vsw_process_attr_info(ldcp
, attr_pkt
);
2902 vsw_free_lane_resources(ldcp
, INBOUND
);
2903 attr_pkt
->tag
.vio_subtype
= VIO_SUBTYPE_NACK
;
2904 ldcp
->lane_in
.lstate
|= VSW_ATTR_NACK_SENT
;
2906 attr_pkt
->tag
.vio_subtype
= VIO_SUBTYPE_ACK
;
2907 lane_in
->lstate
|= VSW_ATTR_ACK_SENT
;
2909 attr_pkt
->tag
.vio_sid
= ldcp
->local_session
;
2910 DUMP_TAG_PTR((vio_msg_tag_t
*)attr_pkt
);
2911 (void) vsw_send_msg(ldcp
, (void *)attr_pkt
,
2912 sizeof (vnet_attr_msg_t
), B_TRUE
);
2913 vsw_next_milestone(ldcp
);
2916 case VIO_SUBTYPE_ACK
:
2918 rv
= vsw_process_attr_ack(ldcp
, attr_pkt
);
2922 lane_out
->lstate
|= VSW_ATTR_ACK_RECV
;
2923 vsw_next_milestone(ldcp
);
2926 case VIO_SUBTYPE_NACK
:
2927 D2(vswp
, "%s: VIO_SUBTYPE_NACK", __func__
);
2929 if (vsw_check_flag(ldcp
, OUTBOUND
, VSW_ATTR_NACK_RECV
))
2932 lane_out
->lstate
|= VSW_ATTR_NACK_RECV
;
2933 vsw_next_milestone(ldcp
);
2937 DERR(vswp
, "%s: unknown vio_subtype %x\n", __func__
,
2938 attr_pkt
->tag
.vio_subtype
);
2941 D1(vswp
, "%s(%lld) exit", __func__
, ldcp
->ldc_id
);
2945 vsw_process_dring_reg_info(vsw_ldc_t
*ldcp
, vio_msg_tag_t
*tagp
)
2948 vsw_t
*vswp
= ldcp
->ldc_vswp
;
2949 lane_t
*lp
= &ldcp
->lane_out
;
2950 dring_info_t
*dp
= NULL
;
2952 D2(vswp
, "%s: VIO_SUBTYPE_INFO", __func__
);
2954 rv
= vsw_check_flag(ldcp
, INBOUND
, VSW_DRING_INFO_RECV
);
2959 if (VSW_VER_GTEQ(ldcp
, 1, 6) &&
2960 (lp
->dring_mode
!= ((vio_dring_reg_msg_t
*)tagp
)->options
)) {
2962 * The earlier version of Solaris vnet driver doesn't set the
2963 * option (VIO_TX_DRING in its case) correctly in its dring reg
2964 * message. We workaround that here by doing the check only
2965 * for versions >= v1.6.
2967 DWARN(vswp
, "%s(%lld): Rcvd dring reg option (%d), "
2968 "negotiated mode (%d)\n", __func__
, ldcp
->ldc_id
,
2969 ((vio_dring_reg_msg_t
*)tagp
)->options
, lp
->dring_mode
);
2974 * Map dring exported by the peer.
2976 dp
= vsw_map_dring(ldcp
, (void *)tagp
);
2982 * Map data buffers exported by the peer if we are in RxDringData mode.
2984 if (lp
->dring_mode
== VIO_RX_DRING_DATA
) {
2985 rv
= vsw_map_data(ldcp
, dp
, (void *)tagp
);
2987 vsw_unmap_dring(ldcp
);
2996 vsw_process_dring_reg_ack(vsw_ldc_t
*ldcp
, vio_msg_tag_t
*tagp
)
2998 vsw_t
*vswp
= ldcp
->ldc_vswp
;
3001 D2(vswp
, "%s: VIO_SUBTYPE_ACK", __func__
);
3003 if (vsw_check_flag(ldcp
, OUTBOUND
, VSW_DRING_ACK_RECV
)) {
3007 dp
= ldcp
->lane_out
.dringp
;
3009 /* save dring_ident acked by peer */
3010 dp
->ident
= ((vio_dring_reg_msg_t
*)tagp
)->dring_ident
;
3016 * Process a dring info packet. We can end up here either because our peer
3017 * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our
3018 * peer has sent us a dring INFO message.
3020 * If we get a valid/acceptable INFO packet (and we have already negotiated
3021 * a version) we ACK back and update the lane state, otherwise we NACK back.
3023 * FUTURE: nothing to stop client from sending us info on multiple dring's
3024 * but for the moment we will just use the first one we are given.
3028 vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t
*ldcp
, void *pkt
)
3033 vio_msg_tag_t
*tagp
= (vio_msg_tag_t
*)pkt
;
3034 vsw_t
*vswp
= ldcp
->ldc_vswp
;
3035 lane_t
*lane_out
= &ldcp
->lane_out
;
3036 lane_t
*lane_in
= &ldcp
->lane_in
;
3038 D1(vswp
, "%s(%lld) enter", __func__
, ldcp
->ldc_id
);
3040 switch (tagp
->vio_subtype
) {
3041 case VIO_SUBTYPE_INFO
:
3042 rv
= vsw_process_dring_reg_info(ldcp
, tagp
);
3044 vsw_free_lane_resources(ldcp
, INBOUND
);
3045 tagp
->vio_subtype
= VIO_SUBTYPE_NACK
;
3046 lane_in
->lstate
|= VSW_DRING_NACK_SENT
;
3048 tagp
->vio_subtype
= VIO_SUBTYPE_ACK
;
3049 lane_in
->lstate
|= VSW_DRING_ACK_SENT
;
3051 tagp
->vio_sid
= ldcp
->local_session
;
3053 if (lane_out
->dring_mode
== VIO_RX_DRING_DATA
) {
3054 dp
= lane_in
->dringp
;
3056 VNET_DRING_REG_EXT_MSG_SIZE(dp
->data_ncookies
);
3058 msgsize
= sizeof (vio_dring_reg_msg_t
);
3060 (void) vsw_send_msg(ldcp
, (void *)tagp
, msgsize
, B_TRUE
);
3061 vsw_next_milestone(ldcp
);
3064 case VIO_SUBTYPE_ACK
:
3065 rv
= vsw_process_dring_reg_ack(ldcp
, tagp
);
3069 lane_out
->lstate
|= VSW_DRING_ACK_RECV
;
3070 vsw_next_milestone(ldcp
);
3073 case VIO_SUBTYPE_NACK
:
3074 D2(vswp
, "%s: VIO_SUBTYPE_NACK", __func__
);
3076 if (vsw_check_flag(ldcp
, OUTBOUND
, VSW_DRING_NACK_RECV
))
3079 lane_out
->lstate
|= VSW_DRING_NACK_RECV
;
3080 vsw_next_milestone(ldcp
);
3084 DERR(vswp
, "%s: Unknown vio_subtype %x\n", __func__
,
3088 D1(vswp
, "%s(%lld) exit", __func__
, ldcp
->ldc_id
);
3092 * Process a request from peer to unregister a dring.
3094 * For the moment we just restart the handshake if our
3095 * peer endpoint attempts to unregister a dring.
3098 vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t
*ldcp
, void *pkt
)
3100 vsw_t
*vswp
= ldcp
->ldc_vswp
;
3101 vio_dring_unreg_msg_t
*dring_pkt
;
3104 * We know this is a ctrl/dring packet so
3105 * cast it into the correct structure.
3107 dring_pkt
= (vio_dring_unreg_msg_t
*)pkt
;
3109 D1(vswp
, "%s(%lld): enter", __func__
, ldcp
->ldc_id
);
3111 switch (dring_pkt
->tag
.vio_subtype
) {
3112 case VIO_SUBTYPE_INFO
:
3113 D2(vswp
, "%s: VIO_SUBTYPE_INFO", __func__
);
3115 DWARN(vswp
, "%s: restarting handshake..", __func__
);
3118 case VIO_SUBTYPE_ACK
:
3119 D2(vswp
, "%s: VIO_SUBTYPE_ACK", __func__
);
3121 DWARN(vswp
, "%s: restarting handshake..", __func__
);
3124 case VIO_SUBTYPE_NACK
:
3125 D2(vswp
, "%s: VIO_SUBTYPE_NACK", __func__
);
3127 DWARN(vswp
, "%s: restarting handshake..", __func__
);
3131 DERR(vswp
, "%s: Unknown vio_subtype %x\n", __func__
,
3132 dring_pkt
->tag
.vio_subtype
);
3135 vsw_process_conn_evt(ldcp
, VSW_CONN_RESTART
);
3137 D1(vswp
, "%s(%lld): exit", __func__
, ldcp
->ldc_id
);
3140 #define SND_MCST_NACK(ldcp, pkt) \
3141 pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \
3142 pkt->tag.vio_sid = ldcp->local_session; \
3143 (void) vsw_send_msg(ldcp, (void *)pkt, \
3144 sizeof (vnet_mcast_msg_t), B_TRUE);
3147 * Process a multicast request from a vnet.
3149 * Vnet's specify a multicast address that they are interested in. This
3150 * address is used as a key into the hash table which forms the multicast
3151 * forwarding database (mFDB).
3153 * The table keys are the multicast addresses, while the table entries
3154 * are pointers to lists of ports which wish to receive packets for the
3155 * specified multicast address.
3157 * When a multicast packet is being switched we use the address as a key
3158 * into the hash table, and then walk the appropriate port list forwarding
3159 * the pkt to each port in turn.
3161 * If a vnet is no longer interested in a particular multicast grouping
3162 * we simply find the correct location in the hash table and then delete
3163 * the relevant port from the port list.
3165 * To deal with the case whereby a port is being deleted without first
3166 * removing itself from the lists in the hash table, we maintain a list
3167 * of multicast addresses the port has registered an interest in, within
3168 * the port structure itself. We then simply walk that list of addresses
3169 * using them as keys into the hash table and remove the port from the
3170 * appropriate lists.
3173 vsw_process_ctrl_mcst_pkt(vsw_ldc_t
*ldcp
, void *pkt
)
3175 vnet_mcast_msg_t
*mcst_pkt
;
3176 vsw_port_t
*port
= ldcp
->ldc_port
;
3177 vsw_t
*vswp
= ldcp
->ldc_vswp
;
3180 D1(vswp
, "%s(%lld): enter", __func__
, ldcp
->ldc_id
);
3183 * We know this is a ctrl/mcast packet so
3184 * cast it into the correct structure.
3186 mcst_pkt
= (vnet_mcast_msg_t
*)pkt
;
3188 switch (mcst_pkt
->tag
.vio_subtype
) {
3189 case VIO_SUBTYPE_INFO
:
3190 D2(vswp
, "%s: VIO_SUBTYPE_INFO", __func__
);
3193 * Check if in correct state to receive a multicast
3194 * message (i.e. handshake complete). If not reset
3197 if (vsw_check_flag(ldcp
, INBOUND
, VSW_MCST_INFO_RECV
))
3201 * Before attempting to add or remove address check
3202 * that they are valid multicast addresses.
3203 * If not, then NACK back.
3205 for (i
= 0; i
< mcst_pkt
->count
; i
++) {
3206 if ((mcst_pkt
->mca
[i
].ether_addr_octet
[0] & 01) != 1) {
3207 DERR(vswp
, "%s: invalid multicast address",
3209 SND_MCST_NACK(ldcp
, mcst_pkt
);
3215 * Now add/remove the addresses. If this fails we
3218 if (vsw_add_rem_mcst(mcst_pkt
, port
) != 0) {
3219 SND_MCST_NACK(ldcp
, mcst_pkt
);
3223 mcst_pkt
->tag
.vio_subtype
= VIO_SUBTYPE_ACK
;
3224 mcst_pkt
->tag
.vio_sid
= ldcp
->local_session
;
3226 DUMP_TAG_PTR((vio_msg_tag_t
*)mcst_pkt
);
3228 (void) vsw_send_msg(ldcp
, (void *)mcst_pkt
,
3229 sizeof (vnet_mcast_msg_t
), B_TRUE
);
3232 case VIO_SUBTYPE_ACK
:
3233 DWARN(vswp
, "%s: VIO_SUBTYPE_ACK", __func__
);
3236 * We shouldn't ever get a multicast ACK message as
3237 * at the moment we never request multicast addresses
3238 * to be set on some other device. This may change in
3239 * the future if we have cascading switches.
3241 if (vsw_check_flag(ldcp
, OUTBOUND
, VSW_MCST_ACK_RECV
))
3247 case VIO_SUBTYPE_NACK
:
3248 DWARN(vswp
, "%s: VIO_SUBTYPE_NACK", __func__
);
3251 * We shouldn't get a multicast NACK packet for the
3252 * same reasons as we shouldn't get a ACK packet.
3254 if (vsw_check_flag(ldcp
, OUTBOUND
, VSW_MCST_NACK_RECV
))
3261 DERR(vswp
, "%s: unknown vio_subtype %x\n", __func__
,
3262 mcst_pkt
->tag
.vio_subtype
);
3265 D1(vswp
, "%s(%lld): exit", __func__
, ldcp
->ldc_id
);
3269 vsw_process_ctrl_rdx_pkt(vsw_ldc_t
*ldcp
, void *pkt
)
3271 vio_rdx_msg_t
*rdx_pkt
;
3272 vsw_t
*vswp
= ldcp
->ldc_vswp
;
3275 * We know this is a ctrl/rdx packet so
3276 * cast it into the correct structure.
3278 rdx_pkt
= (vio_rdx_msg_t
*)pkt
;
3280 D1(vswp
, "%s(%lld) enter", __func__
, ldcp
->ldc_id
);
3282 switch (rdx_pkt
->tag
.vio_subtype
) {
3283 case VIO_SUBTYPE_INFO
:
3284 D2(vswp
, "%s: VIO_SUBTYPE_INFO", __func__
);
3286 if (vsw_check_flag(ldcp
, OUTBOUND
, VSW_RDX_INFO_RECV
))
3289 rdx_pkt
->tag
.vio_sid
= ldcp
->local_session
;
3290 rdx_pkt
->tag
.vio_subtype
= VIO_SUBTYPE_ACK
;
3292 DUMP_TAG_PTR((vio_msg_tag_t
*)rdx_pkt
);
3294 ldcp
->lane_out
.lstate
|= VSW_RDX_ACK_SENT
;
3296 (void) vsw_send_msg(ldcp
, (void *)rdx_pkt
,
3297 sizeof (vio_rdx_msg_t
), B_TRUE
);
3299 vsw_next_milestone(ldcp
);
3302 case VIO_SUBTYPE_ACK
:
3304 * Should be handled in-band by callback handler.
3306 DERR(vswp
, "%s: Unexpected VIO_SUBTYPE_ACK", __func__
);
3307 vsw_process_conn_evt(ldcp
, VSW_CONN_RESTART
);
3310 case VIO_SUBTYPE_NACK
:
3311 D2(vswp
, "%s: VIO_SUBTYPE_NACK", __func__
);
3313 if (vsw_check_flag(ldcp
, INBOUND
, VSW_RDX_NACK_RECV
))
3316 ldcp
->lane_in
.lstate
|= VSW_RDX_NACK_RECV
;
3317 vsw_next_milestone(ldcp
);
3321 DERR(vswp
, "%s: Unknown vio_subtype %x\n", __func__
,
3322 rdx_pkt
->tag
.vio_subtype
);
3325 D1(vswp
, "%s(%lld): exit", __func__
, ldcp
->ldc_id
);
3329 vsw_process_physlink_msg(vsw_ldc_t
*ldcp
, void *pkt
)
3331 vnet_physlink_msg_t
*msgp
;
3332 vsw_t
*vswp
= ldcp
->ldc_vswp
;
3334 msgp
= (vnet_physlink_msg_t
*)pkt
;
3336 D1(vswp
, "%s(%lld) enter", __func__
, ldcp
->ldc_id
);
3338 switch (msgp
->tag
.vio_subtype
) {
3339 case VIO_SUBTYPE_INFO
:
3341 /* vsw shouldn't recv physlink info */
3342 DWARN(vswp
, "%s: Unexpected VIO_SUBTYPE_INFO", __func__
);
3345 case VIO_SUBTYPE_ACK
:
3347 D2(vswp
, "%s: VIO_SUBTYPE_ACK", __func__
);
3350 case VIO_SUBTYPE_NACK
:
3352 D2(vswp
, "%s: VIO_SUBTYPE_NACK", __func__
);
3356 DERR(vswp
, "%s: Unknown vio_subtype %x\n", __func__
,
3357 msgp
->tag
.vio_subtype
);
3360 D1(vswp
, "%s(%lld): exit", __func__
, ldcp
->ldc_id
);
3364 vsw_process_data_pkt(vsw_ldc_t
*ldcp
, void *dpkt
, vio_msg_tag_t
*tagp
,
3367 uint16_t env
= tagp
->vio_subtype_env
;
3368 vsw_t
*vswp
= ldcp
->ldc_vswp
;
3369 lane_t
*lp
= &ldcp
->lane_out
;
3370 uint8_t dring_mode
= lp
->dring_mode
;
3372 D1(vswp
, "%s(%lld): enter", __func__
, ldcp
->ldc_id
);
3374 /* session id check */
3375 if (ldcp
->session_status
& VSW_PEER_SESSION
) {
3376 if (ldcp
->peer_session
!= tagp
->vio_sid
) {
3377 DERR(vswp
, "%s (chan %d): invalid session id (%llx)",
3378 __func__
, ldcp
->ldc_id
, tagp
->vio_sid
);
3379 vsw_process_conn_evt(ldcp
, VSW_CONN_RESTART
);
3385 * It is an error for us to be getting data packets
3386 * before the handshake has completed.
3388 if (ldcp
->hphase
!= VSW_MILESTONE4
) {
3389 DERR(vswp
, "%s: got data packet before handshake complete "
3390 "hphase %d (%x: %x)", __func__
, ldcp
->hphase
,
3391 ldcp
->lane_in
.lstate
, ldcp
->lane_out
.lstate
);
3392 DUMP_FLAGS(ldcp
->lane_in
.lstate
);
3393 DUMP_FLAGS(ldcp
->lane_out
.lstate
);
3394 vsw_process_conn_evt(ldcp
, VSW_CONN_RESTART
);
3397 if (dring_mode
== VIO_TX_DRING
) {
3399 * To reduce the locking contention, release the ldc_cblock
3400 * here and re-acquire it once we are done receiving packets.
3401 * We do this only in TxDring mode to allow further callbaks to
3402 * continue while the msg worker thread processes the messages.
3403 * In RxDringData mode, we process the messages in the callback
3404 * itself and wake up rcv worker thread to process only data
3407 mutex_exit(&ldcp
->ldc_cblock
);
3408 mutex_enter(&ldcp
->ldc_rxlock
);
3412 * Switch on vio_subtype envelope, then let lower routines
3413 * decide if its an INFO, ACK or NACK packet.
3415 if (env
== VIO_DRING_DATA
) {
3416 ldcp
->rx_dringdata(ldcp
, dpkt
);
3417 } else if (env
== VIO_PKT_DATA
) {
3418 ldcp
->rx_pktdata(ldcp
, dpkt
, msglen
);
3419 } else if (env
== VIO_DESC_DATA
) {
3420 vsw_process_data_ibnd_pkt(ldcp
, dpkt
);
3422 DERR(vswp
, "%s: unknown vio_subtype_env (%x)\n",
3426 if (dring_mode
== VIO_TX_DRING
) {
3427 mutex_exit(&ldcp
->ldc_rxlock
);
3428 mutex_enter(&ldcp
->ldc_cblock
);
3431 D1(vswp
, "%s(%lld): exit", __func__
, ldcp
->ldc_id
);
3435 * dummy pkt data handler function for vnet protocol version 1.0
3438 vsw_process_pkt_data_nop(void *arg1
, void *arg2
, uint32_t msglen
)
3440 _NOTE(ARGUNUSED(arg1
, arg2
, msglen
))
3444 * This function handles raw pkt data messages received over the channel.
3445 * Currently, only priority-eth-type frames are received through this mechanism.
3446 * In this case, the frame(data) is present within the message itself which
3447 * is copied into an mblk before switching it.
3450 vsw_process_pkt_data(void *arg1
, void *arg2
, uint32_t msglen
)
3452 vsw_ldc_t
*ldcp
= (vsw_ldc_t
*)arg1
;
3453 vio_raw_data_msg_t
*dpkt
= (vio_raw_data_msg_t
*)arg2
;
3457 vsw_t
*vswp
= ldcp
->ldc_vswp
;
3458 vgen_stats_t
*statsp
= &ldcp
->ldc_stats
;
3459 lane_t
*lp
= &ldcp
->lane_out
;
3461 size
= msglen
- VIO_PKT_DATA_HDRSIZE
;
3462 if (size
< ETHERMIN
|| size
> lp
->mtu
) {
3463 (void) atomic_inc_32(&statsp
->rx_pri_fail
);
3464 DWARN(vswp
, "%s(%lld) invalid size(%d)\n", __func__
,
3465 ldcp
->ldc_id
, size
);
3469 vmp
= vio_multipool_allocb(&ldcp
->vmp
, size
+ VLAN_TAGSZ
);
3471 mp
= allocb(size
+ VLAN_TAGSZ
, BPRI_MED
);
3473 (void) atomic_inc_32(&statsp
->rx_pri_fail
);
3474 DWARN(vswp
, "%s(%lld) allocb failure, "
3475 "unable to process priority frame\n", __func__
,
3483 /* skip over the extra space for vlan tag */
3484 mp
->b_rptr
+= VLAN_TAGSZ
;
3486 /* copy the frame from the payload of raw data msg into the mblk */
3487 bcopy(dpkt
->data
, mp
->b_rptr
, size
);
3488 mp
->b_wptr
= mp
->b_rptr
+ size
;
3491 vmp
->state
= VIO_MBLK_HAS_DATA
;
3495 (void) atomic_inc_64(&statsp
->rx_pri_packets
);
3496 (void) atomic_add_64(&statsp
->rx_pri_bytes
, size
);
3499 * VLAN_TAGSZ of extra space has been pre-alloc'd if tag is needed.
3501 (void) vsw_vlan_frame_pretag(ldcp
->ldc_port
, VSW_VNETPORT
, mp
);
3503 /* switch the frame to destination */
3504 vswp
->vsw_switch_frame(vswp
, mp
, VSW_VNETPORT
, ldcp
->ldc_port
, NULL
);
3508 * Process an in-band descriptor message (most likely from
3512 vsw_process_data_ibnd_pkt(vsw_ldc_t
*ldcp
, void *pkt
)
3514 vnet_ibnd_desc_t
*ibnd_desc
;
3515 dring_info_t
*dp
= NULL
;
3516 vsw_private_desc_t
*priv_addr
= NULL
;
3517 vsw_t
*vswp
= ldcp
->ldc_vswp
;
3522 uint32_t num
= 1, len
, datalen
= 0;
3523 uint64_t ncookies
= 0;
3527 D1(vswp
, "%s(%lld): enter", __func__
, ldcp
->ldc_id
);
3529 ibnd_desc
= (vnet_ibnd_desc_t
*)pkt
;
3531 switch (ibnd_desc
->hdr
.tag
.vio_subtype
) {
3532 case VIO_SUBTYPE_INFO
:
3533 D1(vswp
, "%s: VIO_SUBTYPE_INFO", __func__
);
3535 if (vsw_check_flag(ldcp
, INBOUND
, VSW_DRING_INFO_RECV
))
3539 * Data is padded to align on a 8 byte boundary,
3540 * nbytes is actual data length, i.e. minus that
3543 datalen
= ibnd_desc
->nbytes
;
3545 D2(vswp
, "%s(%lld): processing inband desc : "
3546 ": datalen 0x%lx", __func__
, ldcp
->ldc_id
, datalen
);
3548 ncookies
= ibnd_desc
->ncookies
;
3551 * allocb(9F) returns an aligned data block. We
3552 * need to ensure that we ask ldc for an aligned
3553 * number of bytes also.
3557 off
= 8 - (nbytes
& 0x7);
3561 /* alloc extra space for VLAN_TAG */
3562 mp
= allocb(datalen
+ 8, BPRI_MED
);
3564 DERR(vswp
, "%s(%lld): allocb failed",
3565 __func__
, ldcp
->ldc_id
);
3566 ldcp
->ldc_stats
.rx_allocb_fail
++;
3570 /* skip over the extra space for VLAN_TAG */
3573 rv
= ldc_mem_copy(ldcp
->ldc_handle
, (caddr_t
)mp
->b_rptr
,
3574 0, &nbytes
, ibnd_desc
->memcookie
, (uint64_t)ncookies
,
3578 DERR(vswp
, "%s(%d): unable to copy in data from "
3579 "%d cookie(s)", __func__
, ldcp
->ldc_id
, ncookies
);
3581 ldcp
->ldc_stats
.ierrors
++;
3585 D2(vswp
, "%s(%d): copied in %ld bytes using %d cookies",
3586 __func__
, ldcp
->ldc_id
, nbytes
, ncookies
);
3588 /* point to the actual end of data */
3589 mp
->b_wptr
= mp
->b_rptr
+ datalen
;
3590 ldcp
->ldc_stats
.ipackets
++;
3591 ldcp
->ldc_stats
.rbytes
+= datalen
;
3594 * We ACK back every in-band descriptor message we process
3596 ibnd_desc
->hdr
.tag
.vio_subtype
= VIO_SUBTYPE_ACK
;
3597 ibnd_desc
->hdr
.tag
.vio_sid
= ldcp
->local_session
;
3598 (void) vsw_send_msg(ldcp
, (void *)ibnd_desc
,
3599 sizeof (vnet_ibnd_desc_t
), B_TRUE
);
3602 * there is extra space alloc'd for VLAN_TAG
3604 (void) vsw_vlan_frame_pretag(ldcp
->ldc_port
, VSW_VNETPORT
, mp
);
3606 /* send the packet to be switched */
3607 vswp
->vsw_switch_frame(vswp
, mp
, VSW_VNETPORT
,
3608 ldcp
->ldc_port
, NULL
);
3612 case VIO_SUBTYPE_ACK
:
3613 D1(vswp
, "%s: VIO_SUBTYPE_ACK", __func__
);
3615 /* Verify the ACK is valid */
3616 idx
= ibnd_desc
->hdr
.desc_handle
;
3618 if (idx
>= vsw_num_descriptors
) {
3619 cmn_err(CE_WARN
, "!vsw%d: corrupted ACK received "
3620 "(idx %ld)", vswp
->instance
, idx
);
3624 if ((dp
= ldcp
->lane_out
.dringp
) == NULL
) {
3625 DERR(vswp
, "%s: no dring found", __func__
);
3629 len
= dp
->num_descriptors
;
3631 * If the descriptor we are being ACK'ed for is not the
3632 * one we expected, then pkts were lost somwhere, either
3633 * when we tried to send a msg, or a previous ACK msg from
3634 * our peer. In either case we now reclaim the descriptors
3635 * in the range from the last ACK we received up to the
3638 if (idx
!= dp
->last_ack_recv
) {
3639 DWARN(vswp
, "%s: dropped pkts detected, (%ld, %ld)",
3640 __func__
, dp
->last_ack_recv
, idx
);
3641 num
= idx
>= dp
->last_ack_recv
?
3642 idx
- dp
->last_ack_recv
+ 1:
3643 (len
- dp
->last_ack_recv
+ 1) + idx
;
3647 * When we sent the in-band message to our peer we
3648 * marked the copy in our private ring as READY. We now
3649 * check that the descriptor we are being ACK'ed for is in
3650 * fact READY, i.e. it is one we have shared with our peer.
3652 * If its not we flag an error, but still reset the descr
3655 for (i
= dp
->last_ack_recv
; j
< num
; i
= (i
+ 1) % len
, j
++) {
3656 priv_addr
= (vsw_private_desc_t
*)dp
->priv_addr
+ i
;
3657 mutex_enter(&priv_addr
->dstate_lock
);
3658 if (priv_addr
->dstate
!= VIO_DESC_READY
) {
3659 DERR(vswp
, "%s: (%ld) desc at index %ld not "
3660 "READY (0x%lx)", __func__
,
3661 ldcp
->ldc_id
, idx
, priv_addr
->dstate
);
3662 DERR(vswp
, "%s: bound %d: ncookies %ld : "
3663 "datalen %ld", __func__
,
3664 priv_addr
->bound
, priv_addr
->ncookies
,
3665 priv_addr
->datalen
);
3667 D2(vswp
, "%s: (%lld) freeing descp at %lld", __func__
,
3669 /* release resources associated with sent msg */
3670 priv_addr
->datalen
= 0;
3671 priv_addr
->dstate
= VIO_DESC_FREE
;
3672 mutex_exit(&priv_addr
->dstate_lock
);
3674 /* update to next expected value */
3675 dp
->last_ack_recv
= (idx
+ 1) % dp
->num_descriptors
;
3679 case VIO_SUBTYPE_NACK
:
3680 DERR(vswp
, "%s: VIO_SUBTYPE_NACK", __func__
);
3683 * We should only get a NACK if our peer doesn't like
3684 * something about a message we have sent it. If this
3685 * happens we just release the resources associated with
3686 * the message. (We are relying on higher layers to decide
3687 * whether or not to resend.
3691 idx
= ibnd_desc
->hdr
.desc_handle
;
3693 if (idx
>= vsw_num_descriptors
) {
3694 DERR(vswp
, "%s: corrupted NACK received (idx %lld)",
3699 if ((dp
= ldcp
->lane_out
.dringp
) == NULL
) {
3700 DERR(vswp
, "%s: no dring found", __func__
);
3704 priv_addr
= (vsw_private_desc_t
*)dp
->priv_addr
;
3706 /* move to correct location in ring */
3709 /* release resources associated with sent msg */
3710 mutex_enter(&priv_addr
->dstate_lock
);
3711 priv_addr
->datalen
= 0;
3712 priv_addr
->dstate
= VIO_DESC_FREE
;
3713 mutex_exit(&priv_addr
->dstate_lock
);
3718 DERR(vswp
, "%s(%lld): Unknown vio_subtype %x\n", __func__
,
3719 ldcp
->ldc_id
, ibnd_desc
->hdr
.tag
.vio_subtype
);
3722 D1(vswp
, "%s(%lld) exit", __func__
, ldcp
->ldc_id
);
3726 vsw_process_err_pkt(vsw_ldc_t
*ldcp
, void *epkt
, vio_msg_tag_t
*tagp
)
3728 _NOTE(ARGUNUSED(epkt
))
3730 vsw_t
*vswp
= ldcp
->ldc_vswp
;
3731 uint16_t env
= tagp
->vio_subtype_env
;
3733 D1(vswp
, "%s (%lld): enter\n", __func__
, ldcp
->ldc_id
);
3736 * Error vio_subtypes have yet to be defined. So for
3737 * the moment we can't do anything.
3739 D2(vswp
, "%s: (%x) vio_subtype env", __func__
, env
);
3741 D1(vswp
, "%s (%lld): exit\n", __func__
, ldcp
->ldc_id
);
3744 /* transmit the packet over the given port */
3746 vsw_portsend(vsw_port_t
*port
, mblk_t
*mp
)
3750 vsw_ldc_t
*ldcp
= port
->ldcp
;
3753 count
= vsw_vlan_frame_untag(port
, VSW_VNETPORT
, &mp
, &mpt
);
3755 status
= ldcp
->tx(ldcp
, mp
, mpt
, count
);
3761 * Break up frames into 2 seperate chains: normal and
3762 * priority, based on the frame type. The number of
3763 * priority frames is also counted and returned.
3766 * vswp: pointer to the instance of vsw
3767 * np: head of packet chain to be broken
3768 * npt: tail of packet chain to be broken
3771 * np: head of normal data packets
3772 * npt: tail of normal data packets
3773 * hp: head of high priority packets
3774 * hpt: tail of high priority packets
3777 vsw_get_pri_packets(vsw_t
*vswp
, mblk_t
**np
, mblk_t
**npt
,
3778 mblk_t
**hp
, mblk_t
**hpt
)
3782 mblk_t
*hmp
= NULL
; /* high prio pkts head */
3783 mblk_t
*hmpt
= NULL
; /* high prio pkts tail */
3784 mblk_t
*nmp
= NULL
; /* normal pkts head */
3785 mblk_t
*nmpt
= NULL
; /* normal pkts tail */
3788 struct ether_header
*ehp
;
3793 while (tmp
!= NULL
) {
3800 ehp
= (struct ether_header
*)smp
->b_rptr
;
3801 num_types
= vswp
->pri_num_types
;
3802 types
= vswp
->pri_types
;
3803 for (i
= 0; i
< num_types
; i
++) {
3804 if (ehp
->ether_type
== types
[i
]) {
3805 /* high priority frame */
3817 if (i
== num_types
) {
3818 /* normal data frame */
3838 * Wrapper function to transmit normal and/or priority frames over the channel.
3841 vsw_ldctx_pri(void *arg
, mblk_t
*mp
, mblk_t
*mpt
, uint32_t count
)
3843 vsw_ldc_t
*ldcp
= (vsw_ldc_t
*)arg
;
3846 mblk_t
*hmp
; /* high prio pkts head */
3847 mblk_t
*hmpt
; /* high prio pkts tail */
3848 mblk_t
*nmp
; /* normal pkts head */
3849 mblk_t
*nmpt
; /* normal pkts tail */
3851 vsw_t
*vswp
= ldcp
->ldc_vswp
;
3853 ASSERT(VSW_PRI_ETH_DEFINED(vswp
));
3859 /* gather any priority frames from the chain of packets */
3860 n
= vsw_get_pri_packets(vswp
, &nmp
, &nmpt
, &hmp
, &hmpt
);
3862 /* transmit priority frames */
3864 while (tmp
!= NULL
) {
3868 vsw_ldcsend_pkt(ldcp
, smp
);
3874 /* no normal data frames to process */
3878 return (vsw_ldctx(ldcp
, nmp
, nmpt
, count
));
3882 * Wrapper function to transmit normal frames over the channel.
3885 vsw_ldctx(void *arg
, mblk_t
*mp
, mblk_t
*mpt
, uint32_t count
)
3887 vsw_ldc_t
*ldcp
= (vsw_ldc_t
*)arg
;
3892 * If the TX thread is enabled, then queue the
3893 * ordinary frames and signal the tx thread.
3895 if (ldcp
->tx_thread
!= NULL
) {
3897 mutex_enter(&ldcp
->tx_thr_lock
);
3899 if ((ldcp
->tx_cnt
+ count
) >= vsw_max_tx_qcount
) {
3901 * If we reached queue limit,
3902 * do not queue new packets,
3905 ldcp
->ldc_stats
.tx_qfull
+= count
;
3906 mutex_exit(&ldcp
->tx_thr_lock
);
3910 if (ldcp
->tx_mhead
== NULL
) {
3911 ldcp
->tx_mhead
= mp
;
3912 ldcp
->tx_mtail
= mpt
;
3913 cv_signal(&ldcp
->tx_thr_cv
);
3915 ldcp
->tx_mtail
->b_next
= mp
;
3916 ldcp
->tx_mtail
= mpt
;
3918 ldcp
->tx_cnt
+= count
;
3919 mutex_exit(&ldcp
->tx_thr_lock
);
3921 while (mp
!= NULL
) {
3923 mp
->b_next
= mp
->b_prev
= NULL
;
3924 (void) vsw_ldcsend(ldcp
, mp
, 1);
3934 * This function transmits the frame in the payload of a raw data
3935 * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to
3936 * send special frames with high priorities, without going through
3937 * the normal data path which uses descriptor ring mechanism.
3940 vsw_ldcsend_pkt(vsw_ldc_t
*ldcp
, mblk_t
*mp
)
3942 vio_raw_data_msg_t
*pkt
;
3951 vsw_t
*vswp
= ldcp
->ldc_vswp
;
3952 vgen_stats_t
*statsp
= &ldcp
->ldc_stats
;
3954 if ((!(ldcp
->lane_out
.lstate
& VSW_LANE_ACTIVE
)) ||
3955 (ldcp
->ldc_status
!= LDC_UP
) || (ldcp
->ldc_handle
== NULL
)) {
3956 (void) atomic_inc_32(&statsp
->tx_pri_fail
);
3957 DWARN(vswp
, "%s(%lld) status(%d) lstate(0x%llx), dropping "
3958 "packet\n", __func__
, ldcp
->ldc_id
, ldcp
->ldc_status
,
3959 ldcp
->lane_out
.lstate
);
3965 /* frame size bigger than available payload len of raw data msg ? */
3966 if (size
> (size_t)(ldcp
->msglen
- VIO_PKT_DATA_HDRSIZE
)) {
3967 (void) atomic_inc_32(&statsp
->tx_pri_fail
);
3968 DWARN(vswp
, "%s(%lld) invalid size(%d)\n", __func__
,
3969 ldcp
->ldc_id
, size
);
3973 if (size
< ETHERMIN
)
3976 /* alloc space for a raw data message */
3977 vmp
= vio_allocb(vswp
->pri_tx_vmp
);
3979 (void) atomic_inc_32(&statsp
->tx_pri_fail
);
3980 DWARN(vswp
, "vio_allocb failed\n");
3985 pkt
= (vio_raw_data_msg_t
*)nmp
->b_rptr
;
3987 /* copy frame into the payload of raw data message */
3988 dst
= (caddr_t
)pkt
->data
;
3989 for (bp
= mp
; bp
!= NULL
; bp
= bp
->b_cont
) {
3991 bcopy(bp
->b_rptr
, dst
, mblksz
);
3995 vmp
->state
= VIO_MBLK_HAS_DATA
;
3997 /* setup the raw data msg */
3998 pkt
->tag
.vio_msgtype
= VIO_TYPE_DATA
;
3999 pkt
->tag
.vio_subtype
= VIO_SUBTYPE_INFO
;
4000 pkt
->tag
.vio_subtype_env
= VIO_PKT_DATA
;
4001 pkt
->tag
.vio_sid
= ldcp
->local_session
;
4002 nbytes
= VIO_PKT_DATA_HDRSIZE
+ size
;
4004 /* send the msg over ldc */
4005 rv
= vsw_send_msg(ldcp
, (void *)pkt
, nbytes
, B_TRUE
);
4007 (void) atomic_inc_32(&statsp
->tx_pri_fail
);
4008 DWARN(vswp
, "%s(%lld) Error sending priority frame\n", __func__
,
4014 (void) atomic_inc_64(&statsp
->tx_pri_packets
);
4015 (void) atomic_add_64(&statsp
->tx_pri_packets
, size
);
4024 * Transmit the packet over the given LDC channel.
4026 * The 'retries' argument indicates how many times a packet
4027 * is retried before it is dropped. Note, the retry is done
4028 * only for a resource related failure, for all other failures
4029 * the packet is dropped immediately.
4032 vsw_ldcsend(vsw_ldc_t
*ldcp
, mblk_t
*mp
, uint32_t retries
)
4037 vsw_port_t
*port
= ldcp
->ldc_port
;
4038 dring_info_t
*dp
= NULL
;
4039 lane_t
*lp
= &ldcp
->lane_out
;
4041 for (i
= 0; i
< retries
; ) {
4043 * Send the message out using the appropriate
4044 * transmit function which will free mblock when it
4045 * is finished with it.
4047 mutex_enter(&port
->tx_lock
);
4048 if (port
->transmit
!= NULL
) {
4049 status
= (*port
->transmit
)(ldcp
, mp
);
4051 if (status
== LDC_TX_SUCCESS
) {
4052 mutex_exit(&port
->tx_lock
);
4055 i
++; /* increment the counter here */
4057 /* If its the last retry, then update the oerror */
4058 if ((i
== retries
) && (status
== LDC_TX_NORESOURCES
)) {
4059 ldcp
->ldc_stats
.oerrors
++;
4061 mutex_exit(&port
->tx_lock
);
4063 if (status
!= LDC_TX_NORESOURCES
) {
4065 * No retrying required for errors un-related
4070 if (((dp
= ldcp
->lane_out
.dringp
) != NULL
) &&
4071 ((VSW_VER_GTEQ(ldcp
, 1, 2) &&
4072 (ldcp
->lane_out
.xfer_mode
& VIO_DRING_MODE_V1_2
)) ||
4073 ((VSW_VER_LT(ldcp
, 1, 2) &&
4074 (ldcp
->lane_out
.xfer_mode
== VIO_DRING_MODE_V1_0
))))) {
4076 /* Need to reclaim in TxDring mode. */
4077 if (lp
->dring_mode
== VIO_TX_DRING
) {
4078 rc
= vsw_reclaim_dring(dp
, dp
->end_idx
);
4083 * If there is no dring or the xfer_mode is
4084 * set to DESC_MODE(ie., OBP), then simply break here.
4090 * Delay only if none were reclaimed
4091 * and its not the last retry.
4093 if ((rc
== 0) && (i
< retries
)) {
4094 delay(drv_usectohz(vsw_ldc_tx_delay
));
4102 * Send an in-band descriptor message over ldc.
4105 vsw_descrsend(vsw_ldc_t
*ldcp
, mblk_t
*mp
)
4107 vsw_t
*vswp
= ldcp
->ldc_vswp
;
4108 vnet_ibnd_desc_t ibnd_msg
;
4109 vsw_private_desc_t
*priv_desc
= NULL
;
4110 dring_info_t
*dp
= NULL
;
4115 int status
= LDC_TX_SUCCESS
;
4116 static int warn_msg
= 1;
4117 lane_t
*lp
= &ldcp
->lane_out
;
4119 D1(vswp
, "%s(%lld): enter", __func__
, ldcp
->ldc_id
);
4123 if ((!(ldcp
->lane_out
.lstate
& VSW_LANE_ACTIVE
)) ||
4124 (ldcp
->ldc_status
!= LDC_UP
) || (ldcp
->ldc_handle
== NULL
)) {
4125 DERR(vswp
, "%s(%lld) status(%d) state (0x%llx), dropping pkt",
4126 __func__
, ldcp
->ldc_id
, ldcp
->ldc_status
,
4127 ldcp
->lane_out
.lstate
);
4128 ldcp
->ldc_stats
.oerrors
++;
4129 return (LDC_TX_FAILURE
);
4133 * The dring here is as an internal buffer,
4134 * rather than a transfer channel.
4136 if ((dp
= ldcp
->lane_out
.dringp
) == NULL
) {
4137 DERR(vswp
, "%s(%lld): no dring for outbound lane",
4138 __func__
, ldcp
->ldc_id
);
4139 DERR(vswp
, "%s(%lld) status(%d) state (0x%llx)", __func__
,
4140 ldcp
->ldc_id
, ldcp
->ldc_status
, ldcp
->lane_out
.lstate
);
4141 ldcp
->ldc_stats
.oerrors
++;
4142 return (LDC_TX_FAILURE
);
4146 if (size
> (size_t)lp
->mtu
) {
4147 DERR(vswp
, "%s(%lld) invalid size (%ld)\n", __func__
,
4148 ldcp
->ldc_id
, size
);
4149 ldcp
->ldc_stats
.oerrors
++;
4150 return (LDC_TX_FAILURE
);
4154 * Find a free descriptor in our buffer ring
4156 if (vsw_dring_find_free_desc(dp
, &priv_desc
, &idx
) != 0) {
4158 DERR(vswp
, "%s(%lld): no descriptor available for ring "
4159 "at 0x%llx", __func__
, ldcp
->ldc_id
, dp
);
4163 /* nothing more we can do */
4164 status
= LDC_TX_NORESOURCES
;
4165 goto vsw_descrsend_free_exit
;
4167 D2(vswp
, "%s(%lld): free private descriptor found at pos "
4168 "%ld addr 0x%x\n", __func__
, ldcp
->ldc_id
, idx
, priv_desc
);
4172 /* copy data into the descriptor */
4173 bufp
= priv_desc
->datap
;
4174 for (bp
= mp
, n
= 0; bp
!= NULL
; bp
= bp
->b_cont
) {
4176 bcopy(bp
->b_rptr
, bufp
, n
);
4180 priv_desc
->datalen
= (size
< (size_t)ETHERMIN
) ? ETHERMIN
: size
;
4182 /* create and send the in-band descp msg */
4183 ibnd_msg
.hdr
.tag
.vio_msgtype
= VIO_TYPE_DATA
;
4184 ibnd_msg
.hdr
.tag
.vio_subtype
= VIO_SUBTYPE_INFO
;
4185 ibnd_msg
.hdr
.tag
.vio_subtype_env
= VIO_DESC_DATA
;
4186 ibnd_msg
.hdr
.tag
.vio_sid
= ldcp
->local_session
;
4189 * Copy the mem cookies describing the data from the
4190 * private region of the descriptor ring into the inband
4193 for (i
= 0; i
< priv_desc
->ncookies
; i
++) {
4194 bcopy(&priv_desc
->memcookie
[i
], &ibnd_msg
.memcookie
[i
],
4195 sizeof (ldc_mem_cookie_t
));
4198 ibnd_msg
.hdr
.desc_handle
= idx
;
4199 ibnd_msg
.ncookies
= priv_desc
->ncookies
;
4200 ibnd_msg
.nbytes
= size
;
4202 ldcp
->ldc_stats
.opackets
++;
4203 ldcp
->ldc_stats
.obytes
+= size
;
4205 (void) vsw_send_msg(ldcp
, (void *)&ibnd_msg
,
4206 sizeof (vnet_ibnd_desc_t
), B_TRUE
);
4208 vsw_descrsend_free_exit
:
4210 D1(vswp
, "%s(%lld): exit", __func__
, ldcp
->ldc_id
);
4215 vsw_send_ver(void *arg
)
4217 vsw_ldc_t
*ldcp
= (vsw_ldc_t
*)arg
;
4218 vsw_t
*vswp
= ldcp
->ldc_vswp
;
4219 lane_t
*lp
= &ldcp
->lane_out
;
4220 vio_ver_msg_t ver_msg
;
4222 D1(vswp
, "%s enter", __func__
);
4224 ver_msg
.tag
.vio_msgtype
= VIO_TYPE_CTRL
;
4225 ver_msg
.tag
.vio_subtype
= VIO_SUBTYPE_INFO
;
4226 ver_msg
.tag
.vio_subtype_env
= VIO_VER_INFO
;
4227 ver_msg
.tag
.vio_sid
= ldcp
->local_session
;
4229 if (vsw_obp_ver_proto_workaround
== B_FALSE
) {
4230 ver_msg
.ver_major
= vsw_versions
[0].ver_major
;
4231 ver_msg
.ver_minor
= vsw_versions
[0].ver_minor
;
4233 /* use the major,minor that we've ack'd */
4234 lane_t
*lpi
= &ldcp
->lane_in
;
4235 ver_msg
.ver_major
= lpi
->ver_major
;
4236 ver_msg
.ver_minor
= lpi
->ver_minor
;
4238 ver_msg
.dev_class
= VDEV_NETWORK_SWITCH
;
4240 lp
->lstate
|= VSW_VER_INFO_SENT
;
4241 lp
->ver_major
= ver_msg
.ver_major
;
4242 lp
->ver_minor
= ver_msg
.ver_minor
;
4244 DUMP_TAG(ver_msg
.tag
);
4246 (void) vsw_send_msg(ldcp
, &ver_msg
, sizeof (vio_ver_msg_t
), B_TRUE
);
4248 D1(vswp
, "%s (%d): exit", __func__
, ldcp
->ldc_id
);
4252 vsw_send_attr(vsw_ldc_t
*ldcp
)
4254 vsw_t
*vswp
= ldcp
->ldc_vswp
;
4255 lane_t
*lp
= &ldcp
->lane_out
;
4256 vnet_attr_msg_t attr_msg
;
4258 D1(vswp
, "%s (%ld) enter", __func__
, ldcp
->ldc_id
);
4261 * Subtype is set to INFO by default
4263 attr_msg
.tag
.vio_msgtype
= VIO_TYPE_CTRL
;
4264 attr_msg
.tag
.vio_subtype
= VIO_SUBTYPE_INFO
;
4265 attr_msg
.tag
.vio_subtype_env
= VIO_ATTR_INFO
;
4266 attr_msg
.tag
.vio_sid
= ldcp
->local_session
;
4268 /* payload copied from default settings for lane */
4269 attr_msg
.mtu
= lp
->mtu
;
4270 attr_msg
.addr_type
= lp
->addr_type
;
4271 attr_msg
.xfer_mode
= lp
->xfer_mode
;
4272 attr_msg
.ack_freq
= lp
->xfer_mode
;
4273 attr_msg
.options
= lp
->dring_mode
;
4275 READ_ENTER(&vswp
->if_lockrw
);
4276 attr_msg
.addr
= vnet_macaddr_strtoul((vswp
->if_addr
).ether_addr_octet
);
4277 RW_EXIT(&vswp
->if_lockrw
);
4279 ldcp
->lane_out
.lstate
|= VSW_ATTR_INFO_SENT
;
4281 DUMP_TAG(attr_msg
.tag
);
4283 (void) vsw_send_msg(ldcp
, &attr_msg
, sizeof (vnet_attr_msg_t
), B_TRUE
);
4285 D1(vswp
, "%s (%ld) exit", __func__
, ldcp
->ldc_id
);
4289 vsw_send_dring_info(vsw_ldc_t
*ldcp
)
4293 vsw_t
*vswp
= ldcp
->ldc_vswp
;
4294 vsw_port_t
*port
= ldcp
->ldc_port
;
4295 lane_t
*lp
= &ldcp
->lane_out
;
4296 vgen_stats_t
*statsp
= &ldcp
->ldc_stats
;
4298 D1(vswp
, "%s: (%ld) enter", __func__
, ldcp
->ldc_id
);
4300 /* dring mode has been negotiated in attr phase; save in stats */
4301 statsp
->dring_mode
= lp
->dring_mode
;
4303 if (lp
->dring_mode
== VIO_RX_DRING_DATA
) {
4305 * Change the transmit routine for RxDringData mode.
4307 port
->transmit
= vsw_dringsend_shm
;
4308 msg
= (void *) vsw_create_rx_dring_info(ldcp
);
4313 VNET_DRING_REG_EXT_MSG_SIZE(lp
->dringp
->data_ncookies
);
4314 ldcp
->rcv_thread
= thread_create(NULL
, 2 * DEFAULTSTKSZ
,
4315 vsw_ldc_rcv_worker
, ldcp
, 0, &p0
, TS_RUN
, maxclsyspri
);
4316 ldcp
->rx_dringdata
= vsw_process_dringdata_shm
;
4318 msg
= (void *) vsw_create_tx_dring_info(ldcp
);
4322 msgsize
= sizeof (vio_dring_reg_msg_t
);
4323 ldcp
->msg_thread
= thread_create(NULL
, 2 * DEFAULTSTKSZ
,
4324 vsw_ldc_msg_worker
, ldcp
, 0, &p0
, TS_RUN
, maxclsyspri
);
4325 ldcp
->rx_dringdata
= vsw_process_dringdata
;
4328 lp
->lstate
|= VSW_DRING_INFO_SENT
;
4329 DUMP_TAG_PTR((vio_msg_tag_t
*)msg
);
4330 (void) vsw_send_msg(ldcp
, msg
, msgsize
, B_TRUE
);
4331 kmem_free(msg
, msgsize
);
4333 D1(vswp
, "%s: (%ld) exit", __func__
, ldcp
->ldc_id
);
4337 vsw_send_rdx(vsw_ldc_t
*ldcp
)
4339 vsw_t
*vswp
= ldcp
->ldc_vswp
;
4340 vio_rdx_msg_t rdx_msg
;
4342 D1(vswp
, "%s (%ld) enter", __func__
, ldcp
->ldc_id
);
4344 rdx_msg
.tag
.vio_msgtype
= VIO_TYPE_CTRL
;
4345 rdx_msg
.tag
.vio_subtype
= VIO_SUBTYPE_INFO
;
4346 rdx_msg
.tag
.vio_subtype_env
= VIO_RDX
;
4347 rdx_msg
.tag
.vio_sid
= ldcp
->local_session
;
4349 ldcp
->lane_in
.lstate
|= VSW_RDX_INFO_SENT
;
4351 DUMP_TAG(rdx_msg
.tag
);
4353 (void) vsw_send_msg(ldcp
, &rdx_msg
, sizeof (vio_rdx_msg_t
), B_TRUE
);
4355 D1(vswp
, "%s (%ld) exit", __func__
, ldcp
->ldc_id
);
4359 * Remove the specified address from the list of address maintained
4360 * in this port node.
4363 vsw_del_addr(uint8_t devtype
, void *arg
, uint64_t addr
)
4366 vsw_port_t
*port
= NULL
;
4367 mcst_addr_t
*prev_p
= NULL
;
4368 mcst_addr_t
*curr_p
= NULL
;
4370 D1(NULL
, "%s: enter : devtype %d : addr 0x%llx",
4371 __func__
, devtype
, addr
);
4373 if (devtype
== VSW_VNETPORT
) {
4374 port
= (vsw_port_t
*)arg
;
4375 mutex_enter(&port
->mca_lock
);
4376 prev_p
= curr_p
= port
->mcap
;
4378 vswp
= (vsw_t
*)arg
;
4379 mutex_enter(&vswp
->mca_lock
);
4380 prev_p
= curr_p
= vswp
->mcap
;
4383 while (curr_p
!= NULL
) {
4384 if (curr_p
->addr
== addr
) {
4385 D2(NULL
, "%s: address found", __func__
);
4387 if (prev_p
== curr_p
) {
4389 if (devtype
== VSW_VNETPORT
)
4390 port
->mcap
= curr_p
->nextp
;
4392 vswp
->mcap
= curr_p
->nextp
;
4394 prev_p
->nextp
= curr_p
->nextp
;
4399 curr_p
= curr_p
->nextp
;
4403 if (devtype
== VSW_VNETPORT
)
4404 mutex_exit(&port
->mca_lock
);
4406 mutex_exit(&vswp
->mca_lock
);
4408 D1(NULL
, "%s: exit", __func__
);
4414 * Create a ring consisting of just a private portion and link
4415 * it into the list of rings for the outbound lane.
4417 * These type of rings are used primarily for temporary data
4418 * storage (i.e. as data buffers).
4421 vsw_create_privring(vsw_ldc_t
*ldcp
)
4424 vsw_t
*vswp
= ldcp
->ldc_vswp
;
4426 D1(vswp
, "%s(%lld): enter", __func__
, ldcp
->ldc_id
);
4428 dp
= kmem_zalloc(sizeof (dring_info_t
), KM_SLEEP
);
4429 mutex_init(&dp
->dlock
, NULL
, MUTEX_DRIVER
, NULL
);
4430 mutex_init(&dp
->restart_lock
, NULL
, MUTEX_DRIVER
, NULL
);
4431 ldcp
->lane_out
.dringp
= dp
;
4433 /* no public section */
4434 dp
->pub_addr
= NULL
;
4435 dp
->priv_addr
= kmem_zalloc(
4436 (sizeof (vsw_private_desc_t
) * vsw_num_descriptors
), KM_SLEEP
);
4437 dp
->num_descriptors
= vsw_num_descriptors
;
4439 if (vsw_setup_tx_dring(ldcp
, dp
)) {
4440 DERR(vswp
, "%s: setup of ring failed", __func__
);
4441 vsw_destroy_tx_dring(ldcp
);
4445 /* haven't used any descriptors yet */
4447 dp
->restart_reqd
= B_TRUE
;
4449 D1(vswp
, "%s(%lld): exit", __func__
, ldcp
->ldc_id
);
4453 * Set the default lane attributes. These are copied into
4454 * the attr msg we send to our peer. If they are not acceptable
4455 * then (currently) the handshake ends.
4458 vsw_set_lane_attr(vsw_t
*vswp
, lane_t
*lp
)
4460 bzero(lp
, sizeof (lane_t
));
4462 READ_ENTER(&vswp
->if_lockrw
);
4463 ether_copy(&(vswp
->if_addr
), &(lp
->addr
));
4464 RW_EXIT(&vswp
->if_lockrw
);
4466 lp
->mtu
= vswp
->max_frame_size
;
4467 lp
->addr_type
= ADDR_TYPE_MAC
;
4468 lp
->xfer_mode
= VIO_DRING_MODE_V1_0
;
4469 lp
->ack_freq
= 0; /* for shared mode */
4470 lp
->seq_num
= VNET_ISS
;
4474 * Map the descriptor ring exported by the peer.
4476 static dring_info_t
*
4477 vsw_map_dring(vsw_ldc_t
*ldcp
, void *pkt
)
4479 dring_info_t
*dp
= NULL
;
4480 lane_t
*lp
= &ldcp
->lane_out
;
4482 if (lp
->dring_mode
== VIO_RX_DRING_DATA
) {
4484 * In RxDringData mode, dring that we map in
4485 * becomes our transmit descriptor ring.
4487 dp
= vsw_map_tx_dring(ldcp
, pkt
);
4490 * In TxDring mode, dring that we map in
4491 * becomes our receive descriptor ring.
4493 dp
= vsw_map_rx_dring(ldcp
, pkt
);
4499 * Common dring mapping function used in both TxDring and RxDringData modes.
4502 vsw_map_dring_cmn(vsw_ldc_t
*ldcp
, vio_dring_reg_msg_t
*dring_pkt
)
4506 ldc_mem_info_t minfo
;
4507 vsw_t
*vswp
= ldcp
->ldc_vswp
;
4510 * If the dring params are unacceptable then we NACK back.
4512 if ((dring_pkt
->num_descriptors
== 0) ||
4513 (dring_pkt
->descriptor_size
== 0) ||
4514 (dring_pkt
->ncookies
!= 1)) {
4515 DERR(vswp
, "%s (%lld): invalid dring info",
4516 __func__
, ldcp
->ldc_id
);
4520 dp
= kmem_zalloc(sizeof (dring_info_t
), KM_SLEEP
);
4522 dp
->num_descriptors
= dring_pkt
->num_descriptors
;
4523 dp
->descriptor_size
= dring_pkt
->descriptor_size
;
4524 dp
->options
= dring_pkt
->options
;
4525 dp
->dring_ncookies
= dring_pkt
->ncookies
;
4528 * Note: should only get one cookie. Enforced in
4531 bcopy(&dring_pkt
->cookie
[0], &dp
->dring_cookie
[0],
4532 sizeof (ldc_mem_cookie_t
));
4534 rv
= ldc_mem_dring_map(ldcp
->ldc_handle
, &dp
->dring_cookie
[0],
4535 dp
->dring_ncookies
, dp
->num_descriptors
, dp
->descriptor_size
,
4536 LDC_DIRECT_MAP
, &(dp
->dring_handle
));
4541 rv
= ldc_mem_dring_info(dp
->dring_handle
, &minfo
);
4545 /* store the address of the ring */
4546 dp
->pub_addr
= minfo
.vaddr
;
4548 /* cache the dring mtype */
4549 dp
->dring_mtype
= minfo
.mtype
;
4551 /* no private section as we are importing */
4552 dp
->priv_addr
= NULL
;
4555 * Using simple mono increasing int for ident at the moment.
4557 dp
->ident
= ldcp
->next_ident
;
4561 * Acknowledge it; we send back a unique dring identifier that
4562 * the sending side will use in future to refer to this
4565 dring_pkt
->dring_ident
= dp
->ident
;
4569 if (dp
->dring_handle
!= NULL
) {
4570 (void) ldc_mem_dring_unmap(dp
->dring_handle
);
4572 kmem_free(dp
, sizeof (*dp
));
4577 * Unmap the descriptor ring exported by the peer.
4580 vsw_unmap_dring(vsw_ldc_t
*ldcp
)
4582 lane_t
*lane_out
= &ldcp
->lane_out
;
4584 if (lane_out
->dring_mode
== VIO_RX_DRING_DATA
) {
4585 vsw_unmap_tx_dring(ldcp
);
4587 vsw_unmap_rx_dring(ldcp
);
4592 * Map the shared memory data buffer area exported by the peer.
4593 * Used in RxDringData mode only.
4596 vsw_map_data(vsw_ldc_t
*ldcp
, dring_info_t
*dp
, void *pkt
)
4599 vio_dring_reg_ext_msg_t
*emsg
;
4600 vio_dring_reg_msg_t
*msg
= pkt
;
4601 uint8_t *buf
= (uint8_t *)msg
->cookie
;
4602 vsw_t
*vswp
= ldcp
->ldc_vswp
;
4603 ldc_mem_info_t minfo
;
4605 /* skip over dring cookies */
4606 ASSERT(msg
->ncookies
== 1);
4607 buf
+= (msg
->ncookies
* sizeof (ldc_mem_cookie_t
));
4609 emsg
= (vio_dring_reg_ext_msg_t
*)buf
;
4610 if (emsg
->data_ncookies
> VNET_DATA_AREA_COOKIES
) {
4614 /* save # of data area cookies */
4615 dp
->data_ncookies
= emsg
->data_ncookies
;
4617 /* save data area size */
4618 dp
->data_sz
= emsg
->data_area_size
;
4620 /* allocate ldc mem handle for data area */
4621 rv
= ldc_mem_alloc_handle(ldcp
->ldc_handle
, &dp
->data_handle
);
4623 cmn_err(CE_WARN
, "ldc_mem_alloc_handle failed\n");
4624 DWARN(vswp
, "%s (%lld) ldc_mem_alloc_handle() failed: %d\n",
4625 __func__
, ldcp
->ldc_id
, rv
);
4629 /* map the data area */
4630 rv
= ldc_mem_map(dp
->data_handle
, emsg
->data_cookie
,
4631 emsg
->data_ncookies
, LDC_DIRECT_MAP
, LDC_MEM_R
,
4632 (caddr_t
*)&dp
->data_addr
, NULL
);
4634 cmn_err(CE_WARN
, "ldc_mem_map failed\n");
4635 DWARN(vswp
, "%s (%lld) ldc_mem_map() failed: %d\n",
4636 __func__
, ldcp
->ldc_id
, rv
);
4640 /* get the map info */
4641 rv
= ldc_mem_info(dp
->data_handle
, &minfo
);
4643 cmn_err(CE_WARN
, "ldc_mem_info failed\n");
4644 DWARN(vswp
, "%s (%lld) ldc_mem_info() failed: %d\n",
4645 __func__
, ldcp
->ldc_id
, rv
);
4649 if (minfo
.mtype
!= LDC_DIRECT_MAP
) {
4650 DWARN(vswp
, "%s (%lld) mtype(%d) is not direct map\n",
4651 __func__
, ldcp
->ldc_id
, minfo
.mtype
);
4655 /* allocate memory for data area cookies */
4656 dp
->data_cookie
= kmem_zalloc(emsg
->data_ncookies
*
4657 sizeof (ldc_mem_cookie_t
), KM_SLEEP
);
4659 /* save data area cookies */
4660 bcopy(emsg
->data_cookie
, dp
->data_cookie
,
4661 emsg
->data_ncookies
* sizeof (ldc_mem_cookie_t
));
4667 * Reset and free all the resources associated with the channel.
4670 vsw_free_lane_resources(vsw_ldc_t
*ldcp
, uint64_t dir
)
4674 D1(ldcp
->ldc_vswp
, "%s (%lld): enter", __func__
, ldcp
->ldc_id
);
4676 if (dir
== INBOUND
) {
4677 D2(ldcp
->ldc_vswp
, "%s: freeing INBOUND lane"
4678 " of channel %lld", __func__
, ldcp
->ldc_id
);
4679 lp
= &ldcp
->lane_in
;
4681 D2(ldcp
->ldc_vswp
, "%s: freeing OUTBOUND lane"
4682 " of channel %lld", __func__
, ldcp
->ldc_id
);
4683 lp
= &ldcp
->lane_out
;
4686 lp
->lstate
= VSW_LANE_INACTIV
;
4687 lp
->seq_num
= VNET_ISS
;
4689 if (dir
== INBOUND
) {
4690 /* Unmap the remote dring which is imported from the peer */
4691 vsw_unmap_dring(ldcp
);
4693 /* Destroy the local dring which is exported to the peer */
4694 vsw_destroy_dring(ldcp
);
4697 D1(ldcp
->ldc_vswp
, "%s (%lld): exit", __func__
, ldcp
->ldc_id
);
4701 * Destroy the descriptor ring.
4704 vsw_destroy_dring(vsw_ldc_t
*ldcp
)
4706 lane_t
*lp
= &ldcp
->lane_out
;
4708 if (lp
->dring_mode
== VIO_RX_DRING_DATA
) {
4709 vsw_destroy_rx_dring(ldcp
);
4711 vsw_destroy_tx_dring(ldcp
);
4716 * vsw_ldc_tx_worker -- A per LDC worker thread to transmit data.
4717 * This thread is woken up by the vsw_portsend to transmit
4721 vsw_ldc_tx_worker(void *arg
)
4723 callb_cpr_t cprinfo
;
4724 vsw_ldc_t
*ldcp
= (vsw_ldc_t
*)arg
;
4725 vsw_t
*vswp
= ldcp
->ldc_vswp
;
4729 D1(vswp
, "%s(%lld):enter\n", __func__
, ldcp
->ldc_id
);
4730 CALLB_CPR_INIT(&cprinfo
, &ldcp
->tx_thr_lock
, callb_generic_cpr
,
4732 mutex_enter(&ldcp
->tx_thr_lock
);
4733 while (!(ldcp
->tx_thr_flags
& VSW_WTHR_STOP
)) {
4735 CALLB_CPR_SAFE_BEGIN(&cprinfo
);
4737 * Wait until the data is received or a stop
4738 * request is received.
4740 while (!(ldcp
->tx_thr_flags
& VSW_WTHR_STOP
) &&
4741 (ldcp
->tx_mhead
== NULL
)) {
4742 cv_wait(&ldcp
->tx_thr_cv
, &ldcp
->tx_thr_lock
);
4744 CALLB_CPR_SAFE_END(&cprinfo
, &ldcp
->tx_thr_lock
)
4747 * First process the stop request.
4749 if (ldcp
->tx_thr_flags
& VSW_WTHR_STOP
) {
4750 D2(vswp
, "%s(%lld):tx thread stopped\n",
4751 __func__
, ldcp
->ldc_id
);
4754 mp
= ldcp
->tx_mhead
;
4755 ldcp
->tx_mhead
= ldcp
->tx_mtail
= NULL
;
4757 mutex_exit(&ldcp
->tx_thr_lock
);
4758 D2(vswp
, "%s(%lld):calling vsw_ldcsend\n",
4759 __func__
, ldcp
->ldc_id
);
4760 while (mp
!= NULL
) {
4762 mp
->b_next
= mp
->b_prev
= NULL
;
4763 (void) vsw_ldcsend(ldcp
, mp
, vsw_ldc_tx_retries
);
4766 mutex_enter(&ldcp
->tx_thr_lock
);
4770 * Update the run status and wakeup the thread that
4771 * has sent the stop request.
4773 ldcp
->tx_thr_flags
&= ~VSW_WTHR_STOP
;
4774 ldcp
->tx_thread
= NULL
;
4775 CALLB_CPR_EXIT(&cprinfo
);
4776 D1(vswp
, "%s(%lld):exit\n", __func__
, ldcp
->ldc_id
);
4780 /* vsw_stop_tx_thread -- Co-ordinate with receive thread to stop it */
4782 vsw_stop_tx_thread(vsw_ldc_t
*ldcp
)
4785 vsw_t
*vswp
= ldcp
->ldc_vswp
;
4787 D1(vswp
, "%s(%lld):enter\n", __func__
, ldcp
->ldc_id
);
4789 * Send a stop request by setting the stop flag and
4790 * wait until the receive thread stops.
4792 mutex_enter(&ldcp
->tx_thr_lock
);
4793 if (ldcp
->tx_thread
!= NULL
) {
4794 tid
= ldcp
->tx_thread
->t_did
;
4795 ldcp
->tx_thr_flags
|= VSW_WTHR_STOP
;
4796 cv_signal(&ldcp
->tx_thr_cv
);
4798 mutex_exit(&ldcp
->tx_thr_lock
);
4804 D1(vswp
, "%s(%lld):exit\n", __func__
, ldcp
->ldc_id
);
4808 vsw_mapin_avail(vsw_ldc_t
*ldcp
)
4812 uint64_t mapin_sz_req
;
4814 vsw_t
*vswp
= ldcp
->ldc_vswp
;
4816 rv
= ldc_info(ldcp
->ldc_handle
, &info
);
4821 dblk_sz
= RXDRING_DBLK_SZ(vswp
->max_frame_size
);
4822 mapin_sz_req
= (VSW_RXDRING_NRBUFS
* dblk_sz
);
4824 if (info
.direct_map_size_max
>= mapin_sz_req
) {
4832 * Debugging routines
4838 vsw_port_list_t
*plist
;
4841 extern vsw_t
*vsw_head
;
4843 cmn_err(CE_NOTE
, "***** system state *****");
4845 for (vswp
= vsw_head
; vswp
; vswp
= vswp
->next
) {
4846 plist
= &vswp
->plist
;
4847 READ_ENTER(&plist
->lockrw
);
4848 cmn_err(CE_CONT
, "vsw instance %d has %d ports attached\n",
4849 vswp
->instance
, plist
->num_ports
);
4851 for (port
= plist
->head
; port
!= NULL
; port
= port
->p_next
) {
4852 cmn_err(CE_CONT
, "port %d : %d ldcs attached\n",
4853 port
->p_instance
, port
->num_ldcs
);
4855 cmn_err(CE_CONT
, "chan %lu : dev %d : "
4856 "status %d : phase %u\n",
4857 ldcp
->ldc_id
, ldcp
->dev_class
,
4858 ldcp
->ldc_status
, ldcp
->hphase
);
4859 cmn_err(CE_CONT
, "chan %lu : lsession %lu : "
4860 "psession %lu\n", ldcp
->ldc_id
,
4861 ldcp
->local_session
, ldcp
->peer_session
);
4863 cmn_err(CE_CONT
, "Inbound lane:\n");
4864 display_lane(&ldcp
->lane_in
);
4865 cmn_err(CE_CONT
, "Outbound lane:\n");
4866 display_lane(&ldcp
->lane_out
);
4868 RW_EXIT(&plist
->lockrw
);
4870 cmn_err(CE_NOTE
, "***** system state *****");
4874 display_lane(lane_t
*lp
)
4876 dring_info_t
*drp
= lp
->dringp
;
4878 cmn_err(CE_CONT
, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n",
4879 lp
->ver_major
, lp
->ver_minor
, lp
->lstate
, lp
->mtu
);
4880 cmn_err(CE_CONT
, "addr_type %d : addr 0x%lx : xmode %d\n",
4881 lp
->addr_type
, lp
->addr
, lp
->xfer_mode
);
4882 cmn_err(CE_CONT
, "dringp 0x%lx\n", (uint64_t)lp
->dringp
);
4884 cmn_err(CE_CONT
, "Dring info:\n");
4885 cmn_err(CE_CONT
, "\tnum_desc %u : dsize %u\n",
4886 drp
->num_descriptors
, drp
->descriptor_size
);
4887 cmn_err(CE_CONT
, "\thandle 0x%lx\n", drp
->dring_handle
);
4888 cmn_err(CE_CONT
, "\tpub_addr 0x%lx : priv_addr 0x%lx\n",
4889 (uint64_t)drp
->pub_addr
, (uint64_t)drp
->priv_addr
);
4890 cmn_err(CE_CONT
, "\tident 0x%lx : end_idx %lu\n",
4891 drp
->ident
, drp
->end_idx
);
4896 display_ring(dring_info_t
*dringp
)
4899 uint64_t priv_count
= 0;
4900 uint64_t pub_count
= 0;
4901 vnet_public_desc_t
*pub_addr
= NULL
;
4902 vsw_private_desc_t
*priv_addr
= NULL
;
4904 for (i
= 0; i
< vsw_num_descriptors
; i
++) {
4905 if (dringp
->pub_addr
!= NULL
) {
4906 pub_addr
= (vnet_public_desc_t
*)dringp
->pub_addr
+ i
;
4908 if (pub_addr
->hdr
.dstate
== VIO_DESC_FREE
)
4912 if (dringp
->priv_addr
!= NULL
) {
4913 priv_addr
= (vsw_private_desc_t
*)dringp
->priv_addr
+ i
;
4915 if (priv_addr
->dstate
== VIO_DESC_FREE
)
4919 cmn_err(CE_CONT
, "\t%lu elements: %lu priv free: %lu pub free\n",
4920 i
, priv_count
, pub_count
);
4924 dump_flags(uint64_t state
)
4928 typedef struct flag_name
{
4933 flag_name_t flags
[] = {
4934 VSW_VER_INFO_SENT
, "VSW_VER_INFO_SENT",
4935 VSW_VER_INFO_RECV
, "VSW_VER_INFO_RECV",
4936 VSW_VER_ACK_RECV
, "VSW_VER_ACK_RECV",
4937 VSW_VER_ACK_SENT
, "VSW_VER_ACK_SENT",
4938 VSW_VER_NACK_RECV
, "VSW_VER_NACK_RECV",
4939 VSW_VER_NACK_SENT
, "VSW_VER_NACK_SENT",
4940 VSW_ATTR_INFO_SENT
, "VSW_ATTR_INFO_SENT",
4941 VSW_ATTR_INFO_RECV
, "VSW_ATTR_INFO_RECV",
4942 VSW_ATTR_ACK_SENT
, "VSW_ATTR_ACK_SENT",
4943 VSW_ATTR_ACK_RECV
, "VSW_ATTR_ACK_RECV",
4944 VSW_ATTR_NACK_SENT
, "VSW_ATTR_NACK_SENT",
4945 VSW_ATTR_NACK_RECV
, "VSW_ATTR_NACK_RECV",
4946 VSW_DRING_INFO_SENT
, "VSW_DRING_INFO_SENT",
4947 VSW_DRING_INFO_RECV
, "VSW_DRING_INFO_RECV",
4948 VSW_DRING_ACK_SENT
, "VSW_DRING_ACK_SENT",
4949 VSW_DRING_ACK_RECV
, "VSW_DRING_ACK_RECV",
4950 VSW_DRING_NACK_SENT
, "VSW_DRING_NACK_SENT",
4951 VSW_DRING_NACK_RECV
, "VSW_DRING_NACK_RECV",
4952 VSW_RDX_INFO_SENT
, "VSW_RDX_INFO_SENT",
4953 VSW_RDX_INFO_RECV
, "VSW_RDX_INFO_RECV",
4954 VSW_RDX_ACK_SENT
, "VSW_RDX_ACK_SENT",
4955 VSW_RDX_ACK_RECV
, "VSW_RDX_ACK_RECV",
4956 VSW_RDX_NACK_SENT
, "VSW_RDX_NACK_SENT",
4957 VSW_RDX_NACK_RECV
, "VSW_RDX_NACK_RECV",
4958 VSW_MCST_INFO_SENT
, "VSW_MCST_INFO_SENT",
4959 VSW_MCST_INFO_RECV
, "VSW_MCST_INFO_RECV",
4960 VSW_MCST_ACK_SENT
, "VSW_MCST_ACK_SENT",
4961 VSW_MCST_ACK_RECV
, "VSW_MCST_ACK_RECV",
4962 VSW_MCST_NACK_SENT
, "VSW_MCST_NACK_SENT",
4963 VSW_MCST_NACK_RECV
, "VSW_MCST_NACK_RECV",
4964 VSW_LANE_ACTIVE
, "VSW_LANE_ACTIVE"};
4966 DERR(NULL
, "DUMP_FLAGS: %llx\n", state
);
4967 for (i
= 0; i
< sizeof (flags
)/sizeof (flag_name_t
); i
++) {
4968 if (state
& flags
[i
].flag_val
)
4969 DERR(NULL
, "DUMP_FLAGS %s", flags
[i
].flag_name
);