4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
27 * This header file contains the basic data structures which the
28 * virtual switch (vsw) uses to communicate with vnet clients.
30 * The virtual switch reads the machine description (MD) to
31 * determine how many port_t structures to create (each port_t
32 * can support communications to a single network device). The
33 * port_t's are maintained in a linked list.
35 * Each port in turn contains a number of logical domain channels
36 * (ldc's) which are inter domain communications channels which
37 * are used for passing small messages between the domains. There
38 * may be any number of channels associated with each port, though
39 * currently most devices only have a single channel. The current
40 * implementation provides support for only one channel per port.
42 * The ldc is a bi-directional channel, which is divided up into
43 * two directional 'lanes', one outbound from the switch to the
44 * virtual network device, the other inbound to the switch.
45 * Depending on the type of device each lane may have seperate
46 * communication paramaters (such as mtu etc).
48 * For those network clients which use descriptor rings the
49 * rings are associated with the appropriate lane. I.e. rings
50 * which the switch exports are associated with the outbound lanes
51 * while those which the network clients are exporting to the switch
52 * are associated with the inbound lane.
54 * In diagram form the data structures look as follows:
58 * +----->port_t----->port_t----->port_t----->
62 * +--->lane_t (inbound)
66 * +--->lane_t (outbound)
80 * LDC pkt tranfer MTU - largest msg size used
82 #define VSW_LDC_MTU 64
84 #define VSW_DEF_MSG_WORDS \
85 (VNET_DRING_REG_EXT_MSG_SIZE_MAX / sizeof (uint64_t))
88 * Default message type.
90 typedef struct def_msg
{
91 uint64_t data
[VSW_DEF_MSG_WORDS
];
95 * Currently only support one major/minor pair.
99 typedef struct ver_sup
{
100 uint16_t ver_major
; /* major version number */
101 uint16_t ver_minor
; /* minor version number */
107 #define VSW_LANE_INACTIV 0x0 /* No params set for lane */
109 #define VSW_VER_INFO_SENT 0x1 /* Version # sent to peer */
110 #define VSW_VER_INFO_RECV 0x2 /* Version # recv from peer */
111 #define VSW_VER_ACK_RECV 0x4
112 #define VSW_VER_ACK_SENT 0x8
113 #define VSW_VER_NACK_RECV 0x10
114 #define VSW_VER_NACK_SENT 0x20
116 #define VSW_ATTR_INFO_SENT 0x40 /* Attributes sent to peer */
117 #define VSW_ATTR_INFO_RECV 0x80 /* Peer attributes received */
118 #define VSW_ATTR_ACK_SENT 0x100
119 #define VSW_ATTR_ACK_RECV 0x200
120 #define VSW_ATTR_NACK_SENT 0x400
121 #define VSW_ATTR_NACK_RECV 0x800
123 #define VSW_DRING_INFO_SENT 0x1000 /* Dring info sent to peer */
124 #define VSW_DRING_INFO_RECV 0x2000 /* Dring info received */
125 #define VSW_DRING_ACK_SENT 0x4000
126 #define VSW_DRING_ACK_RECV 0x8000
127 #define VSW_DRING_NACK_SENT 0x10000
128 #define VSW_DRING_NACK_RECV 0x20000
130 #define VSW_RDX_INFO_SENT 0x40000 /* RDX sent to peer */
131 #define VSW_RDX_INFO_RECV 0x80000 /* RDX received from peer */
132 #define VSW_RDX_ACK_SENT 0x100000
133 #define VSW_RDX_ACK_RECV 0x200000
134 #define VSW_RDX_NACK_SENT 0x400000
135 #define VSW_RDX_NACK_RECV 0x800000
137 #define VSW_MCST_INFO_SENT 0x1000000
138 #define VSW_MCST_INFO_RECV 0x2000000
139 #define VSW_MCST_ACK_SENT 0x4000000
140 #define VSW_MCST_ACK_RECV 0x8000000
141 #define VSW_MCST_NACK_SENT 0x10000000
142 #define VSW_MCST_NACK_RECV 0x20000000
144 #define VSW_LANE_ACTIVE 0x40000000 /* Lane open to xmit data */
146 /* Handshake milestones */
147 #define VSW_MILESTONE0 0x1 /* ver info exchanged */
148 #define VSW_MILESTONE1 0x2 /* attribute exchanged */
149 #define VSW_MILESTONE2 0x4 /* dring info exchanged */
150 #define VSW_MILESTONE3 0x8 /* rdx exchanged */
151 #define VSW_MILESTONE4 0x10 /* handshake complete */
154 * Lane direction (relative to ourselves).
159 /* Peer session id received */
160 #define VSW_PEER_SESSION 0x1
163 * Maximum number of consecutive reads of data from channel
165 #define VSW_MAX_CHAN_READ 50
168 * Currently only support one ldc per port.
170 #define VSW_PORT_MAX_LDCS 1 /* max # of ldcs per port */
173 * Used for port add/deletion.
175 #define VSW_PORT_UPDATED 0x1
177 #define LDC_TX_SUCCESS 0 /* ldc transmit success */
178 #define LDC_TX_FAILURE 1 /* ldc transmit failure */
179 #define LDC_TX_NORESOURCES 2 /* out of descriptors */
182 * Descriptor ring info
184 * Each descriptor element has a pre-allocated data buffer
185 * associated with it, into which data being transmitted is
186 * copied. By pre-allocating we speed up the copying process.
187 * The buffer is re-used once the peer has indicated that it is
188 * finished with the descriptor.
190 #define VSW_RING_EL_DATA_SZ 2048 /* Size of data section (bytes) */
191 #define VSW_PRIV_SIZE sizeof (vnet_private_desc_t)
193 #define VSW_MAX_COOKIES ((ETHERMTU >> MMU_PAGESHIFT) + 2)
196 * Size of the mblk in each mblk pool.
198 #define VSW_MBLK_SZ_128 128
199 #define VSW_MBLK_SZ_256 256
200 #define VSW_MBLK_SZ_2048 2048
203 * Number of mblks in each mblk pool.
205 #define VSW_NUM_MBLKS 1024
208 * Number of rcv buffers in RxDringData mode
210 #define VSW_RXDRING_NRBUFS (vsw_num_descriptors * vsw_nrbufs_factor)
212 /* increment recv index */
213 #define INCR_DESC_INDEX(dp, i) \
214 ((i) = (((i) + 1) & ((dp)->num_descriptors - 1)))
216 /* decrement recv index */
217 #define DECR_DESC_INDEX(dp, i) \
218 ((i) = (((i) - 1) & ((dp)->num_descriptors - 1)))
220 #define INCR_TXI INCR_DESC_INDEX
221 #define DECR_TXI DECR_DESC_INDEX
222 #define INCR_RXI INCR_DESC_INDEX
223 #define DECR_RXI DECR_DESC_INDEX
225 /* bounds check rx index */
226 #define CHECK_DESC_INDEX(dp, i) \
227 (((i) >= 0) && ((i) < (dp)->num_descriptors))
229 #define CHECK_RXI CHECK_DESC_INDEX
230 #define CHECK_TXI CHECK_DESC_INDEX
235 typedef struct vsw_private_desc
{
237 * Below lock must be held when accessing the state of
238 * a descriptor on either the private or public sections
241 kmutex_t dstate_lock
;
243 vnet_public_desc_t
*descp
;
244 ldc_mem_handle_t memhandle
;
248 ldc_mem_cookie_t memcookie
[VSW_MAX_COOKIES
];
250 } vsw_private_desc_t
;
253 * Descriptor ring structure
255 typedef struct dring_info
{
256 kmutex_t dlock
; /* sync access */
257 uint32_t num_descriptors
; /* # of descriptors */
258 uint32_t descriptor_size
; /* size of descriptor */
259 uint32_t options
; /* dring options (mode) */
260 ldc_dring_handle_t dring_handle
; /* dring LDC handle */
261 uint32_t dring_ncookies
; /* # of dring cookies */
262 ldc_mem_cookie_t dring_cookie
[1]; /* LDC cookie of dring */
263 ldc_mem_handle_t data_handle
; /* data area LDC handle */
264 uint32_t data_ncookies
; /* # of data area cookies */
265 ldc_mem_cookie_t
*data_cookie
; /* data area LDC cookies */
266 uint64_t ident
; /* identifier sent to peer */
267 uint64_t end_idx
; /* last idx processed */
268 int64_t last_ack_recv
; /* last ack received */
269 kmutex_t txlock
; /* protect tx desc alloc */
270 uint32_t next_txi
; /* next tx descriptor index */
271 uint32_t next_rxi
; /* next expected recv index */
272 kmutex_t restart_lock
; /* protect restart_reqd */
273 boolean_t restart_reqd
; /* send restart msg */
274 uint32_t restart_peer_txi
; /* index to restart peer */
275 void *pub_addr
; /* base of public section */
276 void *priv_addr
; /* base of private section */
277 void *data_addr
; /* base of data section */
278 size_t data_sz
; /* size of data section */
279 size_t desc_data_sz
; /* size of descr data blk */
280 uint8_t dring_mtype
; /* dring mem map type */
281 uint32_t num_bufs
; /* # of buffers */
282 vio_mblk_pool_t
*rx_vmp
; /* rx mblk pool */
283 vio_mblk_t
**rxdp_to_vmp
; /* descr to buf map tbl */
287 * Each ldc connection is comprised of two lanes, incoming
288 * from a peer, and outgoing to that peer. Each lane shares
289 * common ldc parameters and also has private lane-specific
292 typedef struct lane
{
293 uint64_t lstate
; /* Lane state */
294 uint16_t ver_major
; /* Version major number */
295 uint16_t ver_minor
; /* Version minor number */
296 uint64_t seq_num
; /* Sequence number */
297 uint64_t mtu
; /* ETHERMTU */
298 uint64_t addr
; /* Unique physical address */
299 uint8_t addr_type
; /* Only MAC address at moment */
300 uint8_t xfer_mode
; /* Dring or Pkt based */
301 uint8_t ack_freq
; /* Only non zero for Pkt based xfer */
302 uint32_t physlink_update
; /* physlink updates */
303 uint8_t dring_mode
; /* Descriptor ring mode */
304 dring_info_t
*dringp
; /* List of drings for this lane */
307 /* channel drain states */
308 #define VSW_LDC_INIT 0x1 /* Initial non-drain state */
309 #define VSW_LDC_DRAINING 0x2 /* Channel draining */
312 * vnet-protocol-version dependent function prototypes.
314 typedef int (*vsw_ldctx_t
) (void *, mblk_t
*, mblk_t
*, uint32_t);
315 typedef void (*vsw_ldcrx_pktdata_t
) (void *, void *, uint32_t);
316 typedef void (*vsw_ldcrx_dringdata_t
) (void *, void *);
318 /* ldc information associated with a vsw-port */
319 typedef struct vsw_ldc
{
320 struct vsw_ldc
*ldc_next
; /* next ldc in the list */
321 struct vsw_port
*ldc_port
; /* associated port */
322 struct vsw
*ldc_vswp
; /* associated vsw */
323 kmutex_t ldc_cblock
; /* sync callback processing */
324 kmutex_t ldc_txlock
; /* sync transmits */
325 kmutex_t ldc_rxlock
; /* sync rx */
326 uint64_t ldc_id
; /* channel number */
327 ldc_handle_t ldc_handle
; /* channel handle */
328 kmutex_t drain_cv_lock
;
329 kcondvar_t drain_cv
; /* channel draining */
331 uint32_t hphase
; /* handshake phase */
332 int hcnt
; /* # handshake attempts */
333 kmutex_t status_lock
;
334 ldc_status_t ldc_status
; /* channel status */
335 uint8_t reset_active
; /* reset flag */
336 uint64_t local_session
; /* Our session id */
337 uint64_t peer_session
; /* Our peers session id */
338 uint8_t session_status
; /* Session recv'd, sent */
339 uint32_t hss_id
; /* Handshake session id */
340 uint64_t next_ident
; /* Next dring ident # to use */
341 lane_t lane_in
; /* Inbound lane */
342 lane_t lane_out
; /* Outbound lane */
343 uint8_t dev_class
; /* Peer device class */
344 boolean_t pls_negotiated
; /* phys link state update ? */
345 vio_multi_pool_t vmp
; /* Receive mblk pools */
346 uint32_t max_rxpool_size
; /* max size of rxpool in use */
347 uint64_t *ldcmsg
; /* msg buffer for ldc_read() */
348 uint64_t msglen
; /* size of ldcmsg */
349 uint32_t dringdata_msgid
; /* msgid in RxDringData mode */
351 /* tx thread fields */
352 kthread_t
*tx_thread
; /* tx thread */
353 uint32_t tx_thr_flags
; /* tx thread flags */
354 kmutex_t tx_thr_lock
; /* lock for tx thread */
355 kcondvar_t tx_thr_cv
; /* cond.var for tx thread */
356 mblk_t
*tx_mhead
; /* tx mblks head */
357 mblk_t
*tx_mtail
; /* tx mblks tail */
358 uint32_t tx_cnt
; /* # of pkts queued for tx */
360 /* message thread fields */
361 kthread_t
*msg_thread
; /* message thread */
362 uint32_t msg_thr_flags
; /* message thread flags */
363 kmutex_t msg_thr_lock
; /* lock for message thread */
364 kcondvar_t msg_thr_cv
; /* cond.var for msg thread */
366 /* receive thread fields */
367 kthread_t
*rcv_thread
; /* receive thread */
368 uint32_t rcv_thr_flags
; /* receive thread flags */
369 kmutex_t rcv_thr_lock
; /* lock for receive thread */
370 kcondvar_t rcv_thr_cv
; /* cond.var for recv thread */
372 vsw_ldctx_t tx
; /* transmit function */
373 vsw_ldcrx_pktdata_t rx_pktdata
; /* process raw data msg */
374 vsw_ldcrx_dringdata_t rx_dringdata
; /* process dring data msg */
376 /* channel statistics */
377 vgen_stats_t ldc_stats
; /* channel statistics */
378 kstat_t
*ksp
; /* channel kstats */
381 /* worker thread flags */
382 #define VSW_WTHR_DATARCVD 0x01 /* data received */
383 #define VSW_WTHR_STOP 0x02 /* stop worker thread request */
385 /* multicast addresses port is interested in */
386 typedef struct mcst_addr
{
387 struct mcst_addr
*nextp
;
388 struct ether_addr mca
; /* multicast address */
389 uint64_t addr
; /* mcast addr converted to hash key */
390 boolean_t mac_added
; /* added into physical device */
393 /* Port detach states */
394 #define VSW_PORT_INIT 0x1 /* Initial non-detach state */
395 #define VSW_PORT_DETACHING 0x2 /* In process of being detached */
396 #define VSW_PORT_DETACHABLE 0x4 /* Safe to detach */
398 /* port information associated with a vsw */
399 typedef struct vsw_port
{
400 int p_instance
; /* port instance */
401 struct vsw_port
*p_next
; /* next port in the list */
402 struct vsw
*p_vswp
; /* associated vsw */
403 int num_ldcs
; /* # of ldcs in the port */
404 uint64_t *ldc_ids
; /* ldc ids */
405 vsw_ldc_t
*ldcp
; /* ldc for this port */
407 kmutex_t tx_lock
; /* transmit lock */
408 int (*transmit
)(vsw_ldc_t
*, mblk_t
*);
410 int state
; /* port state */
414 krwlock_t maccl_rwlock
; /* protect fields below */
415 mac_client_handle_t p_mch
; /* mac client handle */
416 mac_unicast_handle_t p_muh
; /* mac unicast handle */
418 kmutex_t mca_lock
; /* multicast lock */
419 mcst_addr_t
*mcap
; /* list of multicast addrs */
421 boolean_t addr_set
; /* Addr set where */
424 * mac address of the port & connected device
426 struct ether_addr p_macaddr
;
427 uint16_t pvid
; /* port vlan id (untagged) */
428 struct vsw_vlanid
*vids
; /* vlan ids (tagged) */
429 uint16_t nvids
; /* # of vids */
430 mod_hash_t
*vlan_hashp
; /* vlan hash table */
431 uint32_t vlan_nchains
; /* # of vlan hash chains */
433 /* HybridIO related info */
434 uint32_t p_hio_enabled
; /* Hybrid mode enabled? */
435 uint32_t p_hio_capable
; /* Port capable of HIO */
437 /* bandwidth limit */
438 uint64_t p_bandwidth
; /* bandwidth limit */
441 /* list of ports per vsw */
442 typedef struct vsw_port_list
{
443 vsw_port_t
*head
; /* head of the list */
444 krwlock_t lockrw
; /* sync access(rw) to the list */
445 int num_ports
; /* number of ports in the list */
449 * Taskq control message
451 typedef struct vsw_ctrl_task
{
458 * State of connection to peer. Some of these states
459 * can be mapped to LDC events as follows:
461 * VSW_CONN_RESET -> LDC_RESET_EVT
462 * VSW_CONN_UP -> LDC_UP_EVT
464 #define VSW_CONN_UP 0x1 /* Connection come up */
465 #define VSW_CONN_RESET 0x2 /* Connection reset */
466 #define VSW_CONN_RESTART 0x4 /* Restarting handshake on connection */
468 typedef struct vsw_conn_evt
{
469 uint16_t evt
; /* Connection event */
474 * Ethernet broadcast address definition.
476 static struct ether_addr etherbroadcastaddr
= {
477 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
480 #define IS_BROADCAST(ehp) \
481 (bcmp(&ehp->ether_dhost, ðerbroadcastaddr, ETHERADDRL) == 0)
482 #define IS_MULTICAST(ehp) \
483 ((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
485 #define READ_ENTER(x) rw_enter(x, RW_READER)
486 #define WRITE_ENTER(x) rw_enter(x, RW_WRITER)
487 #define RW_EXIT(x) rw_exit(x)
489 #define VSW_PORT_REFHOLD(portp) atomic_inc_32(&((portp)->ref_cnt))
490 #define VSW_PORT_REFRELE(portp) atomic_dec_32(&((portp)->ref_cnt))
496 #endif /* _VSW_LDC_H */