4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #include <sys/types.h>
28 #include <sys/errno.h>
29 #include <sys/param.h>
30 #include <sys/callb.h>
31 #include <sys/stream.h>
34 #include <sys/devops.h>
35 #include <sys/ksynch.h>
37 #include <sys/modctl.h>
38 #include <sys/modhash.h>
39 #include <sys/debug.h>
40 #include <sys/ethernet.h>
43 #include <sys/mac_provider.h>
44 #include <sys/mac_client.h>
45 #include <sys/mac_client_priv.h>
46 #include <sys/mac_ether.h>
48 #include <sys/sunddi.h>
49 #include <sys/strsun.h>
51 #include <sys/atomic.h>
54 #include <sys/vnet_mailbox.h>
55 #include <sys/vnet_common.h>
57 #include <sys/strsubr.h>
58 #include <sys/taskq.h>
61 * Function prototypes.
65 static int vnetdevinfo(dev_info_t
*, ddi_info_cmd_t
, void *, void **);
66 static int vnetattach(dev_info_t
*, ddi_attach_cmd_t
);
67 static int vnetdetach(dev_info_t
*, ddi_detach_cmd_t
);
70 static int vnet_m_stat(void *, uint_t
, uint64_t *);
71 static int vnet_m_start(void *);
72 static void vnet_m_stop(void *);
73 static int vnet_m_promisc(void *, boolean_t
);
74 static int vnet_m_multicst(void *, boolean_t
, const uint8_t *);
75 static int vnet_m_unicst(void *, const uint8_t *);
76 mblk_t
*vnet_m_tx(void *, mblk_t
*);
77 static void vnet_m_ioctl(void *arg
, queue_t
*q
, mblk_t
*mp
);
79 static void vnet_force_link_state(vnet_t
*vnetp
, queue_t
*q
, mblk_t
*mp
);
81 static boolean_t
vnet_m_capab(void *arg
, mac_capab_t cap
, void *cap_data
);
82 static void vnet_get_ring(void *arg
, mac_ring_type_t rtype
, const int g_index
,
83 const int r_index
, mac_ring_info_t
*infop
, mac_ring_handle_t r_handle
);
84 static void vnet_get_group(void *arg
, mac_ring_type_t type
, const int index
,
85 mac_group_info_t
*infop
, mac_group_handle_t handle
);
86 static int vnet_rx_ring_start(mac_ring_driver_t rdriver
, uint64_t mr_gen_num
);
87 static void vnet_rx_ring_stop(mac_ring_driver_t rdriver
);
88 static int vnet_rx_ring_stat(mac_ring_driver_t rdriver
, uint_t stat
,
90 static int vnet_tx_ring_start(mac_ring_driver_t rdriver
, uint64_t mr_gen_num
);
91 static void vnet_tx_ring_stop(mac_ring_driver_t rdriver
);
92 static int vnet_tx_ring_stat(mac_ring_driver_t rdriver
, uint_t stat
,
94 static int vnet_ring_enable_intr(void *arg
);
95 static int vnet_ring_disable_intr(void *arg
);
96 static mblk_t
*vnet_rx_poll(void *arg
, int bytes_to_pickup
);
97 static int vnet_addmac(void *arg
, const uint8_t *mac_addr
);
98 static int vnet_remmac(void *arg
, const uint8_t *mac_addr
);
100 /* vnet internal functions */
101 static int vnet_unattach(vnet_t
*vnetp
);
102 static void vnet_ring_grp_init(vnet_t
*vnetp
);
103 static void vnet_ring_grp_uninit(vnet_t
*vnetp
);
104 static int vnet_mac_register(vnet_t
*);
105 static int vnet_read_mac_address(vnet_t
*vnetp
);
106 static int vnet_bind_vgenring(vnet_res_t
*vresp
);
107 static void vnet_unbind_vgenring(vnet_res_t
*vresp
);
108 static int vnet_bind_hwrings(vnet_t
*vnetp
);
109 static void vnet_unbind_hwrings(vnet_t
*vnetp
);
110 static int vnet_bind_rings(vnet_res_t
*vresp
);
111 static void vnet_unbind_rings(vnet_res_t
*vresp
);
112 static int vnet_hio_stat(void *, uint_t
, uint64_t *);
113 static int vnet_hio_start(void *);
114 static void vnet_hio_stop(void *);
115 mblk_t
*vnet_hio_tx(void *, mblk_t
*);
117 /* Forwarding database (FDB) routines */
118 static void vnet_fdb_create(vnet_t
*vnetp
);
119 static void vnet_fdb_destroy(vnet_t
*vnetp
);
120 static vnet_res_t
*vnet_fdbe_find(vnet_t
*vnetp
, struct ether_addr
*addrp
);
121 static void vnet_fdbe_find_cb(mod_hash_key_t key
, mod_hash_val_t val
);
122 void vnet_fdbe_add(vnet_t
*vnetp
, vnet_res_t
*vresp
);
123 static void vnet_fdbe_del(vnet_t
*vnetp
, vnet_res_t
*vresp
);
125 static void vnet_rx_frames_untag(uint16_t pvid
, mblk_t
**mp
);
126 static void vnet_rx(vio_net_handle_t vrh
, mblk_t
*mp
);
127 static void vnet_tx_update(vio_net_handle_t vrh
);
128 static void vnet_res_start_task(void *arg
);
129 static void vnet_start_resources(vnet_t
*vnetp
);
130 static void vnet_stop_resources(vnet_t
*vnetp
);
131 static void vnet_dispatch_res_task(vnet_t
*vnetp
);
132 static void vnet_res_start_task(void *arg
);
133 static void vnet_handle_res_err(vio_net_handle_t vrh
, vio_net_err_val_t err
);
134 static void vnet_add_resource(vnet_t
*vnetp
, vnet_res_t
*vresp
);
135 static vnet_res_t
*vnet_rem_resource(vnet_t
*vnetp
, vnet_res_t
*vresp
);
136 static void vnet_tx_notify_thread(void *);
138 /* Exported to vnet_gen */
139 int vnet_mtu_update(vnet_t
*vnetp
, uint32_t mtu
);
140 void vnet_link_update(vnet_t
*vnetp
, link_state_t link_state
);
141 void vnet_dds_cleanup_hio(vnet_t
*vnetp
);
143 static kstat_t
*vnet_hio_setup_kstats(char *ks_mod
, char *ks_name
,
145 static int vnet_hio_update_kstats(kstat_t
*ksp
, int rw
);
146 static void vnet_hio_get_stats(vnet_res_t
*vresp
, vnet_hio_stats_t
*statsp
);
147 static void vnet_hio_destroy_kstats(kstat_t
*ksp
);
149 /* Exported to to vnet_dds */
150 int vnet_send_dds_msg(vnet_t
*vnetp
, void *dmsg
);
151 int vnet_hio_mac_init(vnet_t
*vnetp
, char *ifname
);
152 void vnet_hio_mac_cleanup(vnet_t
*vnetp
);
154 /* Externs that are imported from vnet_gen */
155 extern int vgen_init(void *vnetp
, uint64_t regprop
, dev_info_t
*vnetdip
,
156 const uint8_t *macaddr
, void **vgenhdl
);
157 extern int vgen_init_mdeg(void *arg
);
158 extern void vgen_uninit(void *arg
);
159 extern int vgen_dds_tx(void *arg
, void *dmsg
);
160 extern int vgen_enable_intr(void *arg
);
161 extern int vgen_disable_intr(void *arg
);
162 extern mblk_t
*vgen_rx_poll(void *arg
, int bytes_to_pickup
);
164 /* Externs that are imported from vnet_dds */
165 extern void vdds_mod_init(void);
166 extern void vdds_mod_fini(void);
167 extern int vdds_init(vnet_t
*vnetp
);
168 extern void vdds_cleanup(vnet_t
*vnetp
);
169 extern void vdds_process_dds_msg(vnet_t
*vnetp
, vio_dds_msg_t
*dmsg
);
170 extern void vdds_cleanup_hybrid_res(void *arg
);
171 extern void vdds_cleanup_hio(vnet_t
*vnetp
);
173 extern pri_t minclsyspri
;
175 #define DRV_NAME "vnet"
176 #define VNET_FDBE_REFHOLD(p) \
178 atomic_inc_32(&(p)->refcnt); \
179 ASSERT((p)->refcnt != 0); \
182 #define VNET_FDBE_REFRELE(p) \
184 ASSERT((p)->refcnt != 0); \
185 atomic_dec_32(&(p)->refcnt); \
188 #ifdef VNET_IOC_DEBUG
189 #define VNET_M_CALLBACK_FLAGS (MC_IOCTL | MC_GETCAPAB)
191 #define VNET_M_CALLBACK_FLAGS (MC_GETCAPAB)
194 static mac_callbacks_t vnet_m_callbacks
= {
195 VNET_M_CALLBACK_FLAGS
,
201 NULL
, /* m_unicst entry must be NULL while rx rings are exposed */
202 NULL
, /* m_tx entry must be NULL while tx rings are exposed */
209 static mac_callbacks_t vnet_hio_res_callbacks
= {
224 * Linked list of "vnet_t" structures - one per instance.
226 static vnet_t
*vnet_headp
= NULL
;
227 static krwlock_t vnet_rw
;
230 uint32_t vnet_num_descriptors
= VNET_NUM_DESCRIPTORS
;
233 * Configure tx serialization in mac layer for the vnet device. This tunable
234 * should be enabled to improve performance only if HybridIO is configured for
237 boolean_t vnet_mac_tx_serialize
= B_FALSE
;
239 /* Configure enqueing at Rx soft rings in mac layer for the vnet device */
240 boolean_t vnet_mac_rx_queuing
= B_TRUE
;
243 * Set this to non-zero to enable additional internal receive buffer pools
244 * based on the MTU of the device for better performance at the cost of more
245 * memory consumption. This is turned off by default, to use allocb(9F) for
246 * receive buffer allocations of sizes > 2K.
248 boolean_t vnet_jumbo_rxpools
= B_FALSE
;
250 /* # of chains in fdb hash table */
251 uint32_t vnet_fdb_nchains
= VNET_NFDB_HASH
;
253 /* Internal tunables */
254 uint32_t vnet_ethermtu
= 1500; /* mtu of the device */
257 * Default vlan id. This is only used internally when the "default-vlan-id"
258 * property is not present in the MD device node. Therefore, this should not be
259 * used as a tunable; if this value is changed, the corresponding variable
260 * should be updated to the same value in vsw and also other vnets connected to
263 uint16_t vnet_default_vlan_id
= 1;
265 /* delay in usec to wait for all references on a fdb entry to be dropped */
266 uint32_t vnet_fdbe_refcnt_delay
= 10;
268 static struct ether_addr etherbroadcastaddr
= {
269 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
272 /* mac_open() retry delay in usec */
273 uint32_t vnet_mac_open_delay
= 100; /* 0.1 ms */
275 /* max # of mac_open() retries */
276 uint32_t vnet_mac_open_retries
= 100;
281 static char macaddr_propname
[] = "local-mac-address";
284 * This is the string displayed by modinfo(1m).
286 static char vnet_ident
[] = "vnet driver";
287 extern struct mod_ops mod_driverops
;
288 static struct cb_ops cb_vnetops
= {
289 nulldev
, /* cb_open */
290 nulldev
, /* cb_close */
291 nodev
, /* cb_strategy */
292 nodev
, /* cb_print */
295 nodev
, /* cb_write */
296 nodev
, /* cb_ioctl */
297 nodev
, /* cb_devmap */
299 nodev
, /* cb_segmap */
300 nochpoll
, /* cb_chpoll */
301 ddi_prop_op
, /* cb_prop_op */
302 NULL
, /* cb_stream */
303 (int)(D_MP
) /* cb_flag */
306 static struct dev_ops vnetops
= {
307 DEVO_REV
, /* devo_rev */
309 NULL
, /* devo_getinfo */
310 nulldev
, /* devo_identify */
311 nulldev
, /* devo_probe */
312 vnetattach
, /* devo_attach */
313 vnetdetach
, /* devo_detach */
314 nodev
, /* devo_reset */
315 &cb_vnetops
, /* devo_cb_ops */
316 NULL
, /* devo_bus_ops */
317 NULL
, /* devo_power */
318 ddi_quiesce_not_supported
, /* devo_quiesce */
321 static struct modldrv modldrv
= {
322 &mod_driverops
, /* Type of module. This one is a driver */
323 vnet_ident
, /* ID string */
324 &vnetops
/* driver specific ops */
327 static struct modlinkage modlinkage
= {
328 MODREV_1
, (void *)&modldrv
, NULL
333 #define DEBUG_PRINTF debug_printf
336 * Print debug messages - set to 0xf to enable all msgs
338 int vnet_dbglevel
= 0x8;
341 debug_printf(const char *fname
, void *arg
, const char *fmt
, ...)
345 vnet_t
*vnetp
= (vnet_t
*)arg
;
349 (void) sprintf(bufp
, "%s: ", fname
);
350 bufp
+= strlen(bufp
);
352 (void) sprintf(bufp
, "vnet%d:%s: ", vnetp
->instance
, fname
);
353 bufp
+= strlen(bufp
);
356 (void) vsprintf(bufp
, fmt
, ap
);
358 cmn_err(CE_CONT
, "%s\n", buf
);
363 /* _init(9E): initialize the loadable module */
369 DBG1(NULL
, "enter\n");
371 mac_init_ops(&vnetops
, "vnet");
372 status
= mod_install(&modlinkage
);
374 mac_fini_ops(&vnetops
);
377 DBG1(NULL
, "exit(%d)\n", status
);
381 /* _fini(9E): prepare the module for unloading. */
387 DBG1(NULL
, "enter\n");
389 status
= mod_remove(&modlinkage
);
392 mac_fini_ops(&vnetops
);
395 DBG1(NULL
, "exit(%d)\n", status
);
399 /* _info(9E): return information about the loadable module */
401 _info(struct modinfo
*modinfop
)
403 return (mod_info(&modlinkage
, modinfop
));
407 * attach(9E): attach a device to the system.
408 * called once for each instance of the device on the system.
411 vnetattach(dev_info_t
*dip
, ddi_attach_cmd_t cmd
)
417 char qname
[TASKQ_NAMELEN
];
418 vnet_attach_progress_t attach_progress
;
420 attach_progress
= AST_init
;
428 goto vnet_attach_fail
;
431 instance
= ddi_get_instance(dip
);
432 DBG1(NULL
, "instance(%d) enter\n", instance
);
434 /* allocate vnet_t and mac_t structures */
435 vnetp
= kmem_zalloc(sizeof (vnet_t
), KM_SLEEP
);
437 vnetp
->instance
= instance
;
438 rw_init(&vnetp
->vrwlock
, NULL
, RW_DRIVER
, NULL
);
439 rw_init(&vnetp
->vsw_fp_rw
, NULL
, RW_DRIVER
, NULL
);
440 attach_progress
|= AST_vnet_alloc
;
442 vnet_ring_grp_init(vnetp
);
443 attach_progress
|= AST_ring_init
;
445 status
= vdds_init(vnetp
);
447 goto vnet_attach_fail
;
449 attach_progress
|= AST_vdds_init
;
451 /* setup links to vnet_t from both devinfo and mac_t */
452 ddi_set_driver_private(dip
, (caddr_t
)vnetp
);
454 /* read the mac address */
455 status
= vnet_read_mac_address(vnetp
);
456 if (status
!= DDI_SUCCESS
) {
457 goto vnet_attach_fail
;
459 attach_progress
|= AST_read_macaddr
;
461 reg
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
,
462 DDI_PROP_DONTPASS
, "reg", -1);
464 goto vnet_attach_fail
;
468 vnet_fdb_create(vnetp
);
469 attach_progress
|= AST_fdbh_alloc
;
471 (void) snprintf(qname
, TASKQ_NAMELEN
, "vres_taskq%d", instance
);
472 if ((vnetp
->taskqp
= ddi_taskq_create(dip
, qname
, 1,
473 TASKQ_DEFAULTPRI
, 0)) == NULL
) {
474 cmn_err(CE_WARN
, "!vnet%d: Unable to create task queue",
476 goto vnet_attach_fail
;
478 attach_progress
|= AST_taskq_create
;
480 /* add to the list of vnet devices */
481 WRITE_ENTER(&vnet_rw
);
482 vnetp
->nextp
= vnet_headp
;
486 attach_progress
|= AST_vnet_list
;
489 * Initialize the generic vnet plugin which provides communication via
490 * sun4v LDC (logical domain channel) based resources. This involves 2
491 * steps; first, vgen_init() is invoked to read the various properties
492 * of the vnet device from its MD node (including its mtu which is
493 * needed to mac_register()) and obtain a handle to the vgen layer.
494 * After mac_register() is done and we have a mac handle, we then
495 * invoke vgen_init_mdeg() which registers with the the MD event
496 * generator (mdeg) framework to allow LDC resource notifications.
497 * Note: this sequence also allows us to report the correct default #
498 * of pseudo rings (2TX and 3RX) in vnet_m_capab() which gets invoked
499 * in the context of mac_register(); and avoids conflicting with
500 * dynamic pseudo rx rings which get added/removed as a result of mdeg
503 status
= vgen_init(vnetp
, reg
, vnetp
->dip
,
504 (uint8_t *)vnetp
->curr_macaddr
, &vnetp
->vgenhdl
);
505 if (status
!= DDI_SUCCESS
) {
506 DERR(vnetp
, "vgen_init() failed\n");
507 goto vnet_attach_fail
;
509 attach_progress
|= AST_vgen_init
;
511 status
= vnet_mac_register(vnetp
);
512 if (status
!= DDI_SUCCESS
) {
513 goto vnet_attach_fail
;
515 vnetp
->link_state
= LINK_STATE_UNKNOWN
;
516 attach_progress
|= AST_macreg
;
518 status
= vgen_init_mdeg(vnetp
->vgenhdl
);
519 if (status
!= DDI_SUCCESS
) {
520 goto vnet_attach_fail
;
522 attach_progress
|= AST_init_mdeg
;
524 vnetp
->attach_progress
= attach_progress
;
526 DBG1(NULL
, "instance(%d) exit\n", instance
);
527 return (DDI_SUCCESS
);
530 vnetp
->attach_progress
= attach_progress
;
531 status
= vnet_unattach(vnetp
);
533 return (DDI_FAILURE
);
537 * detach(9E): detach a device from the system.
540 vnetdetach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
)
545 instance
= ddi_get_instance(dip
);
546 DBG1(NULL
, "instance(%d) enter\n", instance
);
548 vnetp
= ddi_get_driver_private(dip
);
550 goto vnet_detach_fail
;
559 goto vnet_detach_fail
;
562 if (vnet_unattach(vnetp
) != 0) {
563 goto vnet_detach_fail
;
566 return (DDI_SUCCESS
);
569 return (DDI_FAILURE
);
573 * Common routine to handle vnetattach() failure and vnetdetach(). Note that
574 * the only reason this function could fail is if mac_unregister() fails.
575 * Otherwise, this function must ensure that all resources are freed and return
579 vnet_unattach(vnet_t
*vnetp
)
581 vnet_attach_progress_t attach_progress
;
583 attach_progress
= vnetp
->attach_progress
;
586 * Disable the mac device in the gldv3 subsystem. This can fail, in
587 * particular if there are still any open references to this mac
588 * device; in which case we just return failure without continuing to
590 * If it succeeds, we then invoke vgen_uninit() which should unregister
591 * any pseudo rings registered with the mac layer. Note we keep the
592 * AST_macreg flag on, so we can unregister with the mac layer at
593 * the end of this routine.
595 if (attach_progress
& AST_macreg
) {
596 if (mac_disable(vnetp
->mh
) != 0) {
602 * Now that we have disabled the device, we must finish all other steps
603 * and successfully return from this function; otherwise we will end up
604 * leaving the device in a broken/unusable state.
606 * First, release any hybrid resources assigned to this vnet device.
608 if (attach_progress
& AST_vdds_init
) {
610 attach_progress
&= ~AST_vdds_init
;
614 * Uninit vgen. This stops further mdeg callbacks to this vnet
615 * device and/or its ports; and detaches any existing ports.
617 if (attach_progress
& (AST_vgen_init
|AST_init_mdeg
)) {
618 vgen_uninit(vnetp
->vgenhdl
);
619 attach_progress
&= ~AST_vgen_init
;
620 attach_progress
&= ~AST_init_mdeg
;
623 /* Destroy the taskq. */
624 if (attach_progress
& AST_taskq_create
) {
625 ddi_taskq_destroy(vnetp
->taskqp
);
626 attach_progress
&= ~AST_taskq_create
;
630 if (attach_progress
& AST_fdbh_alloc
) {
631 vnet_fdb_destroy(vnetp
);
632 attach_progress
&= ~AST_fdbh_alloc
;
635 /* Remove from the device list */
636 if (attach_progress
& AST_vnet_list
) {
638 /* unlink from instance(vnet_t) list */
639 WRITE_ENTER(&vnet_rw
);
640 for (vnetpp
= &vnet_headp
; *vnetpp
;
641 vnetpp
= &(*vnetpp
)->nextp
) {
642 if (*vnetpp
== vnetp
) {
643 *vnetpp
= vnetp
->nextp
;
648 attach_progress
&= ~AST_vnet_list
;
651 if (attach_progress
& AST_ring_init
) {
652 vnet_ring_grp_uninit(vnetp
);
653 attach_progress
&= ~AST_ring_init
;
656 if (attach_progress
& AST_macreg
) {
657 VERIFY(mac_unregister(vnetp
->mh
) == 0);
659 attach_progress
&= ~AST_macreg
;
662 if (attach_progress
& AST_vnet_alloc
) {
663 rw_destroy(&vnetp
->vrwlock
);
664 rw_destroy(&vnetp
->vsw_fp_rw
);
665 attach_progress
&= ~AST_vnet_list
;
672 /* enable the device for transmit/receive */
674 vnet_m_start(void *arg
)
678 DBG1(vnetp
, "enter\n");
680 WRITE_ENTER(&vnetp
->vrwlock
);
681 vnetp
->flags
|= VNET_STARTED
;
682 vnet_start_resources(vnetp
);
683 RW_EXIT(&vnetp
->vrwlock
);
685 DBG1(vnetp
, "exit\n");
686 return (VNET_SUCCESS
);
690 /* stop transmit/receive for the device */
692 vnet_m_stop(void *arg
)
696 DBG1(vnetp
, "enter\n");
698 WRITE_ENTER(&vnetp
->vrwlock
);
699 if (vnetp
->flags
& VNET_STARTED
) {
701 * Set the flags appropriately; this should prevent starting of
702 * any new resources that are added(see vnet_res_start_task()),
703 * while we release the vrwlock in vnet_stop_resources() before
704 * stopping each resource.
706 vnetp
->flags
&= ~VNET_STARTED
;
707 vnetp
->flags
|= VNET_STOPPING
;
708 vnet_stop_resources(vnetp
);
709 vnetp
->flags
&= ~VNET_STOPPING
;
711 RW_EXIT(&vnetp
->vrwlock
);
713 DBG1(vnetp
, "exit\n");
716 /* set the unicast mac address of the device */
718 vnet_m_unicst(void *arg
, const uint8_t *macaddr
)
720 _NOTE(ARGUNUSED(macaddr
))
724 DBG1(vnetp
, "enter\n");
726 * NOTE: setting mac address dynamically is not supported.
728 DBG1(vnetp
, "exit\n");
730 return (VNET_FAILURE
);
733 /* enable/disable a multicast address */
735 vnet_m_multicst(void *arg
, boolean_t add
, const uint8_t *mca
)
737 _NOTE(ARGUNUSED(add
, mca
))
741 mac_register_t
*macp
;
742 mac_callbacks_t
*cbp
;
743 int rv
= VNET_SUCCESS
;
745 DBG1(vnetp
, "enter\n");
747 READ_ENTER(&vnetp
->vsw_fp_rw
);
748 if (vnetp
->vsw_fp
== NULL
) {
749 RW_EXIT(&vnetp
->vsw_fp_rw
);
752 VNET_FDBE_REFHOLD(vnetp
->vsw_fp
);
753 RW_EXIT(&vnetp
->vsw_fp_rw
);
755 vresp
= vnetp
->vsw_fp
;
756 macp
= &vresp
->macreg
;
757 cbp
= macp
->m_callbacks
;
758 rv
= cbp
->mc_multicst(macp
->m_driver
, add
, mca
);
760 VNET_FDBE_REFRELE(vnetp
->vsw_fp
);
762 DBG1(vnetp
, "exit(%d)\n", rv
);
766 /* set or clear promiscuous mode on the device */
768 vnet_m_promisc(void *arg
, boolean_t on
)
773 DBG1(vnetp
, "enter\n");
775 * NOTE: setting promiscuous mode is not supported, just return success.
777 DBG1(vnetp
, "exit\n");
778 return (VNET_SUCCESS
);
782 * Transmit a chain of packets. This function provides switching functionality
783 * based on the destination mac address to reach other guests (within ldoms) or
787 vnet_tx_ring_send(void *arg
, mblk_t
*mp
)
789 vnet_pseudo_tx_ring_t
*tx_ringp
;
790 vnet_tx_ring_stats_t
*statsp
;
795 mac_register_t
*macp
;
796 struct ether_header
*ehp
;
797 boolean_t is_unicast
;
798 boolean_t is_pvid
; /* non-default pvid ? */
799 boolean_t hres
; /* Hybrid resource ? */
803 tx_ringp
= (vnet_pseudo_tx_ring_t
*)arg
;
804 statsp
= &tx_ringp
->tx_ring_stats
;
805 vnetp
= (vnet_t
*)tx_ringp
->vnetp
;
806 DBG1(vnetp
, "enter\n");
809 is_pvid
= (vnetp
->pvid
!= vnetp
->default_vlan_id
) ? B_TRUE
: B_FALSE
;
820 * Find fdb entry for the destination
821 * and hold a reference to it.
823 ehp
= (struct ether_header
*)mp
->b_rptr
;
824 vresp
= vnet_fdbe_find(vnetp
, &ehp
->ether_dhost
);
828 * Destination found in FDB.
829 * The destination is a vnet device within ldoms
830 * and directly reachable, invoke the tx function
833 macp
= &vresp
->macreg
;
834 resid_mp
= macp
->m_callbacks
->mc_tx(macp
->m_driver
, mp
);
836 /* tx done; now release ref on fdb entry */
837 VNET_FDBE_REFRELE(vresp
);
839 if (resid_mp
!= NULL
) {
845 is_unicast
= !(IS_BROADCAST(ehp
) ||
846 (IS_MULTICAST(ehp
)));
848 * Destination is not in FDB.
849 * If the destination is broadcast or multicast,
850 * then forward the packet to vswitch.
851 * If a Hybrid resource avilable, then send the
852 * unicast packet via hybrid resource, otherwise
853 * forward it to vswitch.
855 READ_ENTER(&vnetp
->vsw_fp_rw
);
857 if ((is_unicast
) && (vnetp
->hio_fp
!= NULL
)) {
858 vresp
= vnetp
->hio_fp
;
861 vresp
= vnetp
->vsw_fp
;
866 * no fdb entry to vsw? drop the packet.
868 RW_EXIT(&vnetp
->vsw_fp_rw
);
874 /* ref hold the fdb entry to vsw */
875 VNET_FDBE_REFHOLD(vresp
);
877 RW_EXIT(&vnetp
->vsw_fp_rw
);
880 * In the case of a hybrid resource we need to insert
881 * the tag for the pvid case here; unlike packets that
882 * are destined to a vnet/vsw in which case the vgen
883 * layer does the tagging before sending it over ldc.
885 if (hres
== B_TRUE
) {
887 * Determine if the frame being transmitted
888 * over the hybrid resource is untagged. If so,
889 * insert the tag before transmitting.
891 if (is_pvid
== B_TRUE
&&
892 ehp
->ether_type
!= htons(ETHERTYPE_VLAN
)) {
894 mp
= vnet_vlan_insert_tag(mp
,
897 VNET_FDBE_REFRELE(vresp
);
904 macp
= &vresp
->macreg
;
907 macp
= &vresp
->macreg
;
908 tx_arg
= macp
->m_driver
;
910 resid_mp
= macp
->m_callbacks
->mc_tx(tx_arg
, mp
);
912 /* tx done; now release ref on fdb entry */
913 VNET_FDBE_REFRELE(vresp
);
915 if (resid_mp
!= NULL
) {
922 statsp
->obytes
+= size
;
927 DBG1(vnetp
, "exit\n");
931 /* get statistics from the device */
933 vnet_m_stat(void *arg
, uint_t stat
, uint64_t *val
)
937 mac_register_t
*macp
;
938 mac_callbacks_t
*cbp
;
939 uint64_t val_total
= 0;
941 DBG1(vnetp
, "enter\n");
944 * get the specified statistic from each transport and return the
945 * aggregate val. This obviously only works for counters.
947 if ((IS_MAC_STAT(stat
) && !MAC_STAT_ISACOUNTER(stat
)) ||
948 (IS_MACTYPE_STAT(stat
) && !ETHER_STAT_ISACOUNTER(stat
))) {
952 READ_ENTER(&vnetp
->vrwlock
);
953 for (vresp
= vnetp
->vres_list
; vresp
!= NULL
; vresp
= vresp
->nextp
) {
954 macp
= &vresp
->macreg
;
955 cbp
= macp
->m_callbacks
;
956 if (cbp
->mc_getstat(macp
->m_driver
, stat
, val
) == 0)
959 RW_EXIT(&vnetp
->vrwlock
);
963 DBG1(vnetp
, "exit\n");
968 vnet_ring_grp_init(vnet_t
*vnetp
)
970 vnet_pseudo_rx_group_t
*rx_grp
;
971 vnet_pseudo_rx_ring_t
*rx_ringp
;
972 vnet_pseudo_tx_group_t
*tx_grp
;
973 vnet_pseudo_tx_ring_t
*tx_ringp
;
976 tx_grp
= &vnetp
->tx_grp
[0];
977 tx_ringp
= kmem_zalloc(sizeof (vnet_pseudo_tx_ring_t
) *
978 VNET_NUM_PSEUDO_TXRINGS
, KM_SLEEP
);
979 for (i
= 0; i
< VNET_NUM_PSEUDO_TXRINGS
; i
++) {
980 tx_ringp
[i
].state
|= VNET_TXRING_SHARED
;
982 tx_grp
->rings
= tx_ringp
;
983 tx_grp
->ring_cnt
= VNET_NUM_PSEUDO_TXRINGS
;
984 mutex_init(&tx_grp
->flowctl_lock
, NULL
, MUTEX_DRIVER
, NULL
);
985 cv_init(&tx_grp
->flowctl_cv
, NULL
, CV_DRIVER
, NULL
);
986 tx_grp
->flowctl_thread
= thread_create(NULL
, 0,
987 vnet_tx_notify_thread
, tx_grp
, 0, &p0
, TS_RUN
, minclsyspri
);
989 rx_grp
= &vnetp
->rx_grp
[0];
990 rx_grp
->max_ring_cnt
= MAX_RINGS_PER_GROUP
;
991 rw_init(&rx_grp
->lock
, NULL
, RW_DRIVER
, NULL
);
992 rx_ringp
= kmem_zalloc(sizeof (vnet_pseudo_rx_ring_t
) *
993 rx_grp
->max_ring_cnt
, KM_SLEEP
);
996 * Setup the first 3 Pseudo RX Rings that are reserved;
997 * 1 for LDC resource to vswitch + 2 for RX rings of Hybrid resource.
999 rx_ringp
[0].state
|= VNET_RXRING_INUSE
|VNET_RXRING_LDC_SERVICE
;
1000 rx_ringp
[0].index
= 0;
1001 rx_ringp
[1].state
|= VNET_RXRING_INUSE
|VNET_RXRING_HYBRID
;
1002 rx_ringp
[1].index
= 1;
1003 rx_ringp
[2].state
|= VNET_RXRING_INUSE
|VNET_RXRING_HYBRID
;
1004 rx_ringp
[2].index
= 2;
1006 rx_grp
->ring_cnt
= VNET_NUM_PSEUDO_RXRINGS_DEFAULT
;
1007 rx_grp
->rings
= rx_ringp
;
1009 for (i
= VNET_NUM_PSEUDO_RXRINGS_DEFAULT
;
1010 i
< rx_grp
->max_ring_cnt
; i
++) {
1011 rx_ringp
= &rx_grp
->rings
[i
];
1012 rx_ringp
->state
= VNET_RXRING_FREE
;
1013 rx_ringp
->index
= i
;
1018 vnet_ring_grp_uninit(vnet_t
*vnetp
)
1020 vnet_pseudo_rx_group_t
*rx_grp
;
1021 vnet_pseudo_tx_group_t
*tx_grp
;
1024 tx_grp
= &vnetp
->tx_grp
[0];
1026 /* Inform tx_notify_thread to exit */
1027 mutex_enter(&tx_grp
->flowctl_lock
);
1028 if (tx_grp
->flowctl_thread
!= NULL
) {
1029 tid
= tx_grp
->flowctl_thread
->t_did
;
1030 tx_grp
->flowctl_done
= B_TRUE
;
1031 cv_signal(&tx_grp
->flowctl_cv
);
1033 mutex_exit(&tx_grp
->flowctl_lock
);
1037 if (tx_grp
->rings
!= NULL
) {
1038 ASSERT(tx_grp
->ring_cnt
== VNET_NUM_PSEUDO_TXRINGS
);
1039 kmem_free(tx_grp
->rings
, sizeof (vnet_pseudo_tx_ring_t
) *
1041 tx_grp
->rings
= NULL
;
1044 rx_grp
= &vnetp
->rx_grp
[0];
1045 if (rx_grp
->rings
!= NULL
) {
1046 ASSERT(rx_grp
->max_ring_cnt
== MAX_RINGS_PER_GROUP
);
1047 ASSERT(rx_grp
->ring_cnt
== VNET_NUM_PSEUDO_RXRINGS_DEFAULT
);
1048 kmem_free(rx_grp
->rings
, sizeof (vnet_pseudo_rx_ring_t
) *
1049 rx_grp
->max_ring_cnt
);
1050 rx_grp
->rings
= NULL
;
1054 static vnet_pseudo_rx_ring_t
*
1055 vnet_alloc_pseudo_rx_ring(vnet_t
*vnetp
)
1057 vnet_pseudo_rx_group_t
*rx_grp
;
1058 vnet_pseudo_rx_ring_t
*rx_ringp
;
1061 rx_grp
= &vnetp
->rx_grp
[0];
1062 WRITE_ENTER(&rx_grp
->lock
);
1064 if (rx_grp
->ring_cnt
== rx_grp
->max_ring_cnt
) {
1065 /* no rings available */
1066 RW_EXIT(&rx_grp
->lock
);
1070 for (index
= VNET_NUM_PSEUDO_RXRINGS_DEFAULT
;
1071 index
< rx_grp
->max_ring_cnt
; index
++) {
1072 rx_ringp
= &rx_grp
->rings
[index
];
1073 if (rx_ringp
->state
== VNET_RXRING_FREE
) {
1074 rx_ringp
->state
|= VNET_RXRING_INUSE
;
1080 RW_EXIT(&rx_grp
->lock
);
1085 vnet_free_pseudo_rx_ring(vnet_t
*vnetp
, vnet_pseudo_rx_ring_t
*ringp
)
1087 vnet_pseudo_rx_group_t
*rx_grp
;
1089 ASSERT(ringp
->index
>= VNET_NUM_PSEUDO_RXRINGS_DEFAULT
);
1090 rx_grp
= &vnetp
->rx_grp
[0];
1091 WRITE_ENTER(&rx_grp
->lock
);
1093 if (ringp
->state
!= VNET_RXRING_FREE
) {
1094 ringp
->state
= VNET_RXRING_FREE
;
1095 ringp
->handle
= NULL
;
1099 RW_EXIT(&rx_grp
->lock
);
1102 /* wrapper function for mac_register() */
1104 vnet_mac_register(vnet_t
*vnetp
)
1106 mac_register_t
*macp
;
1109 if ((macp
= mac_alloc(MAC_VERSION
)) == NULL
)
1110 return (DDI_FAILURE
);
1111 macp
->m_type_ident
= MAC_PLUGIN_IDENT_ETHER
;
1112 macp
->m_driver
= vnetp
;
1113 macp
->m_dip
= vnetp
->dip
;
1114 macp
->m_src_addr
= vnetp
->curr_macaddr
;
1115 macp
->m_callbacks
= &vnet_m_callbacks
;
1116 macp
->m_min_sdu
= 0;
1117 macp
->m_max_sdu
= vnetp
->mtu
;
1118 macp
->m_margin
= VLAN_TAGSZ
;
1120 macp
->m_v12n
= MAC_VIRT_LEVEL1
;
1123 * Finally, we're ready to register ourselves with the MAC layer
1124 * interface; if this succeeds, we're all ready to start()
1126 err
= mac_register(macp
, &vnetp
->mh
);
1128 return (err
== 0 ? DDI_SUCCESS
: DDI_FAILURE
);
1131 /* read the mac address of the device */
1133 vnet_read_mac_address(vnet_t
*vnetp
)
1139 rv
= ddi_prop_lookup_byte_array(DDI_DEV_T_ANY
, vnetp
->dip
,
1140 DDI_PROP_DONTPASS
, macaddr_propname
, &macaddr
, &size
);
1141 if ((rv
!= DDI_PROP_SUCCESS
) || (size
!= ETHERADDRL
)) {
1142 DWARN(vnetp
, "prop_lookup failed(%s) err(%d)\n",
1143 macaddr_propname
, rv
);
1144 return (DDI_FAILURE
);
1146 bcopy(macaddr
, (caddr_t
)vnetp
->vendor_addr
, ETHERADDRL
);
1147 bcopy(macaddr
, (caddr_t
)vnetp
->curr_macaddr
, ETHERADDRL
);
1148 ddi_prop_free(macaddr
);
1150 return (DDI_SUCCESS
);
1154 vnet_fdb_create(vnet_t
*vnetp
)
1156 char hashname
[MAXNAMELEN
];
1158 (void) snprintf(hashname
, MAXNAMELEN
, "vnet%d-fdbhash",
1160 vnetp
->fdb_nchains
= vnet_fdb_nchains
;
1161 vnetp
->fdb_hashp
= mod_hash_create_ptrhash(hashname
, vnetp
->fdb_nchains
,
1162 mod_hash_null_valdtor
, sizeof (void *));
1166 vnet_fdb_destroy(vnet_t
*vnetp
)
1168 /* destroy fdb-hash-table */
1169 if (vnetp
->fdb_hashp
!= NULL
) {
1170 mod_hash_destroy_hash(vnetp
->fdb_hashp
);
1171 vnetp
->fdb_hashp
= NULL
;
1172 vnetp
->fdb_nchains
= 0;
1177 * Add an entry into the fdb.
1180 vnet_fdbe_add(vnet_t
*vnetp
, vnet_res_t
*vresp
)
1185 KEY_HASH(addr
, vresp
->rem_macaddr
);
1188 * If the entry being added corresponds to LDC_SERVICE resource,
1189 * that is, vswitch connection, it is added to the hash and also
1190 * the entry is cached, an additional reference count reflects
1191 * this. The HYBRID resource is not added to the hash, but only
1192 * cached, as it is only used for sending out packets for unknown
1193 * unicast destinations.
1195 (vresp
->type
== VIO_NET_RES_LDC_SERVICE
) ?
1196 (vresp
->refcnt
= 1) : (vresp
->refcnt
= 0);
1199 * Note: duplicate keys will be rejected by mod_hash.
1201 if (vresp
->type
!= VIO_NET_RES_HYBRID
) {
1202 rv
= mod_hash_insert(vnetp
->fdb_hashp
, (mod_hash_key_t
)addr
,
1203 (mod_hash_val_t
)vresp
);
1205 DWARN(vnetp
, "Duplicate macaddr key(%lx)\n", addr
);
1210 if (vresp
->type
== VIO_NET_RES_LDC_SERVICE
) {
1211 /* Cache the fdb entry to vsw-port */
1212 WRITE_ENTER(&vnetp
->vsw_fp_rw
);
1213 if (vnetp
->vsw_fp
== NULL
)
1214 vnetp
->vsw_fp
= vresp
;
1215 RW_EXIT(&vnetp
->vsw_fp_rw
);
1216 } else if (vresp
->type
== VIO_NET_RES_HYBRID
) {
1217 /* Cache the fdb entry to hybrid resource */
1218 WRITE_ENTER(&vnetp
->vsw_fp_rw
);
1219 if (vnetp
->hio_fp
== NULL
)
1220 vnetp
->hio_fp
= vresp
;
1221 RW_EXIT(&vnetp
->vsw_fp_rw
);
1226 * Remove an entry from fdb.
1229 vnet_fdbe_del(vnet_t
*vnetp
, vnet_res_t
*vresp
)
1236 KEY_HASH(addr
, vresp
->rem_macaddr
);
1239 * Remove the entry from fdb hash table.
1240 * This prevents further references to this fdb entry.
1242 if (vresp
->type
!= VIO_NET_RES_HYBRID
) {
1243 rv
= mod_hash_remove(vnetp
->fdb_hashp
, (mod_hash_key_t
)addr
,
1244 (mod_hash_val_t
*)&tmp
);
1247 * As the resources are added to the hash only
1248 * after they are started, this can occur if
1249 * a resource unregisters before it is ever started.
1255 if (vresp
->type
== VIO_NET_RES_LDC_SERVICE
) {
1256 WRITE_ENTER(&vnetp
->vsw_fp_rw
);
1258 ASSERT(tmp
== vnetp
->vsw_fp
);
1259 vnetp
->vsw_fp
= NULL
;
1261 RW_EXIT(&vnetp
->vsw_fp_rw
);
1262 } else if (vresp
->type
== VIO_NET_RES_HYBRID
) {
1263 WRITE_ENTER(&vnetp
->vsw_fp_rw
);
1265 vnetp
->hio_fp
= NULL
;
1267 RW_EXIT(&vnetp
->vsw_fp_rw
);
1271 * If there are threads already ref holding before the entry was
1272 * removed from hash table, then wait for ref count to drop to zero.
1274 (vresp
->type
== VIO_NET_RES_LDC_SERVICE
) ?
1275 (refcnt
= 1) : (refcnt
= 0);
1276 while (vresp
->refcnt
> refcnt
) {
1277 delay(drv_usectohz(vnet_fdbe_refcnt_delay
));
1282 * Search fdb for a given mac address. If an entry is found, hold
1283 * a reference to it and return the entry; else returns NULL.
1286 vnet_fdbe_find(vnet_t
*vnetp
, struct ether_addr
*addrp
)
1292 KEY_HASH(key
, addrp
->ether_addr_octet
);
1294 rv
= mod_hash_find_cb(vnetp
->fdb_hashp
, (mod_hash_key_t
)key
,
1295 (mod_hash_val_t
*)&vresp
, vnet_fdbe_find_cb
);
1304 * Callback function provided to mod_hash_find_cb(). After finding the fdb
1305 * entry corresponding to the key (macaddr), this callback will be invoked by
1306 * mod_hash_find_cb() to atomically increment the reference count on the fdb
1307 * entry before returning the found entry.
1310 vnet_fdbe_find_cb(mod_hash_key_t key
, mod_hash_val_t val
)
1312 _NOTE(ARGUNUSED(key
))
1313 VNET_FDBE_REFHOLD((vnet_res_t
*)val
);
1317 * Frames received that are tagged with the pvid of the vnet device must be
1318 * untagged before sending up the stack. This function walks the chain of rx
1319 * frames, untags any such frames and returns the updated chain.
1322 * pvid: pvid of the vnet device for which packets are being received
1323 * mp: head of pkt chain to be validated and untagged
1326 * mp: head of updated chain of packets
1329 vnet_rx_frames_untag(uint16_t pvid
, mblk_t
**mp
)
1331 struct ether_vlan_header
*evhp
;
1337 bpn
= bph
= bpt
= NULL
;
1339 for (bp
= *mp
; bp
!= NULL
; bp
= bpn
) {
1342 bp
->b_next
= bp
->b_prev
= NULL
;
1344 evhp
= (struct ether_vlan_header
*)bp
->b_rptr
;
1346 if (ntohs(evhp
->ether_tpid
) == ETHERTYPE_VLAN
&&
1347 VLAN_ID(ntohs(evhp
->ether_tci
)) == pvid
) {
1349 bp
= vnet_vlan_remove_tag(bp
);
1356 /* build a chain of processed packets */
1370 vnet_rx(vio_net_handle_t vrh
, mblk_t
*mp
)
1372 vnet_res_t
*vresp
= (vnet_res_t
*)vrh
;
1373 vnet_t
*vnetp
= vresp
->vnetp
;
1374 vnet_pseudo_rx_ring_t
*ringp
;
1376 if ((vnetp
== NULL
) || (vnetp
->mh
== 0)) {
1381 ringp
= vresp
->rx_ringp
;
1382 mac_rx_ring(vnetp
->mh
, ringp
->handle
, mp
, ringp
->gen_num
);
1386 vnet_tx_update(vio_net_handle_t vrh
)
1388 vnet_res_t
*vresp
= (vnet_res_t
*)vrh
;
1389 vnet_t
*vnetp
= vresp
->vnetp
;
1390 vnet_pseudo_tx_ring_t
*tx_ringp
;
1391 vnet_pseudo_tx_group_t
*tx_grp
;
1394 if (vnetp
== NULL
|| vnetp
->mh
== NULL
) {
1399 * Currently, the tx hwring API (used to access rings that belong to
1400 * a Hybrid IO resource) does not provide us a per ring flow ctrl
1401 * update; also the pseudo rings are shared by the ports/ldcs in the
1402 * vgen layer. Thus we can't figure out which pseudo ring is being
1403 * re-enabled for transmits. To work around this, when we get a tx
1404 * restart notification from below, we simply propagate that to all
1405 * the tx pseudo rings registered with the mac layer above.
1407 * There are a couple of side effects with this approach, but they are
1408 * not harmful, as outlined below:
1410 * A) We might send an invalid ring_update() for a ring that is not
1411 * really flow controlled. This will not have any effect in the mac
1412 * layer and packets will continue to be transmitted on that ring.
1414 * B) We might end up clearing the flow control in the mac layer for
1415 * a ring that is still flow controlled in the underlying resource.
1416 * This will result in the mac layer restarting transmit, only to be
1417 * flow controlled again on that ring.
1419 tx_grp
= &vnetp
->tx_grp
[0];
1420 for (i
= 0; i
< tx_grp
->ring_cnt
; i
++) {
1421 tx_ringp
= &tx_grp
->rings
[i
];
1422 mac_tx_ring_update(vnetp
->mh
, tx_ringp
->handle
);
1427 * vnet_tx_notify_thread:
1429 * vnet_tx_ring_update() callback function wakes up this thread when
1430 * it gets called. This thread will call mac_tx_ring_update() to
1431 * notify upper mac of flow control getting relieved. Note that
1432 * vnet_tx_ring_update() cannot call mac_tx_ring_update() directly
1433 * because vnet_tx_ring_update() is called from lower mac with
1434 * mi_rw_lock held and mac_tx_ring_update() would also try to grab
1438 vnet_tx_notify_thread(void *arg
)
1440 callb_cpr_t cprinfo
;
1441 vnet_pseudo_tx_group_t
*tx_grp
= (vnet_pseudo_tx_group_t
*)arg
;
1442 vnet_pseudo_tx_ring_t
*tx_ringp
;
1446 CALLB_CPR_INIT(&cprinfo
, &tx_grp
->flowctl_lock
, callb_generic_cpr
,
1447 "vnet_tx_notify_thread");
1449 mutex_enter(&tx_grp
->flowctl_lock
);
1450 while (!tx_grp
->flowctl_done
) {
1451 CALLB_CPR_SAFE_BEGIN(&cprinfo
);
1452 cv_wait(&tx_grp
->flowctl_cv
, &tx_grp
->flowctl_lock
);
1453 CALLB_CPR_SAFE_END(&cprinfo
, &tx_grp
->flowctl_lock
);
1455 for (i
= 0; i
< tx_grp
->ring_cnt
; i
++) {
1456 tx_ringp
= &tx_grp
->rings
[i
];
1457 if (tx_ringp
->woken_up
) {
1458 tx_ringp
->woken_up
= B_FALSE
;
1459 vnetp
= tx_ringp
->vnetp
;
1460 mac_tx_ring_update(vnetp
->mh
, tx_ringp
->handle
);
1465 * The tx_grp is being destroyed, exit the thread.
1467 tx_grp
->flowctl_thread
= NULL
;
1468 CALLB_CPR_EXIT(&cprinfo
);
1473 vnet_tx_ring_update(void *arg1
, uintptr_t arg2
)
1475 vnet_t
*vnetp
= (vnet_t
*)arg1
;
1476 vnet_pseudo_tx_group_t
*tx_grp
;
1477 vnet_pseudo_tx_ring_t
*tx_ringp
;
1480 tx_grp
= &vnetp
->tx_grp
[0];
1481 for (i
= 0; i
< tx_grp
->ring_cnt
; i
++) {
1482 tx_ringp
= &tx_grp
->rings
[i
];
1483 if (tx_ringp
->hw_rh
== (mac_ring_handle_t
)arg2
) {
1484 mutex_enter(&tx_grp
->flowctl_lock
);
1485 tx_ringp
->woken_up
= B_TRUE
;
1486 cv_signal(&tx_grp
->flowctl_cv
);
1487 mutex_exit(&tx_grp
->flowctl_lock
);
1494 * Update the new mtu of vnet into the mac layer. First check if the device has
1495 * been plumbed and if so fail the mtu update. Returns 0 on success.
1498 vnet_mtu_update(vnet_t
*vnetp
, uint32_t mtu
)
1502 if (vnetp
== NULL
|| vnetp
->mh
== NULL
) {
1506 WRITE_ENTER(&vnetp
->vrwlock
);
1508 if (vnetp
->flags
& VNET_STARTED
) {
1509 RW_EXIT(&vnetp
->vrwlock
);
1510 cmn_err(CE_NOTE
, "!vnet%d: Unable to process mtu "
1511 "update as the device is plumbed\n",
1516 /* update mtu in the mac layer */
1517 rv
= mac_maxsdu_update(vnetp
->mh
, mtu
);
1519 RW_EXIT(&vnetp
->vrwlock
);
1521 "!vnet%d: Unable to update mtu with mac layer\n",
1528 RW_EXIT(&vnetp
->vrwlock
);
1534 * Update the link state of vnet to the mac layer.
1537 vnet_link_update(vnet_t
*vnetp
, link_state_t link_state
)
1539 if (vnetp
== NULL
|| vnetp
->mh
== NULL
) {
1543 WRITE_ENTER(&vnetp
->vrwlock
);
1544 if (vnetp
->link_state
== link_state
) {
1545 RW_EXIT(&vnetp
->vrwlock
);
1548 vnetp
->link_state
= link_state
;
1549 RW_EXIT(&vnetp
->vrwlock
);
1551 mac_link_update(vnetp
->mh
, link_state
);
1555 * vio_net_resource_reg -- An interface called to register a resource
1557 * macp -- a GLDv3 mac_register that has all the details of
1558 * a resource and its callbacks etc.
1559 * type -- resource type.
1560 * local_macaddr -- resource's MAC address. This is used to
1561 * associate a resource with a corresponding vnet.
1562 * remote_macaddr -- remote side MAC address. This is ignored for
1563 * the Hybrid resources.
1564 * vhp -- A handle returned to the caller.
1565 * vcb -- A set of callbacks provided to the callers.
1567 int vio_net_resource_reg(mac_register_t
*macp
, vio_net_res_type_t type
,
1568 ether_addr_t local_macaddr
, ether_addr_t rem_macaddr
, vio_net_handle_t
*vhp
,
1569 vio_net_callbacks_t
*vcb
)
1574 vresp
= kmem_zalloc(sizeof (vnet_res_t
), KM_SLEEP
);
1575 ether_copy(local_macaddr
, vresp
->local_macaddr
);
1576 ether_copy(rem_macaddr
, vresp
->rem_macaddr
);
1578 bcopy(macp
, &vresp
->macreg
, sizeof (mac_register_t
));
1580 DBG1(NULL
, "Resource Registerig type=0%X\n", type
);
1582 READ_ENTER(&vnet_rw
);
1584 while (vnetp
!= NULL
) {
1585 if (VNET_MATCH_RES(vresp
, vnetp
)) {
1586 vresp
->vnetp
= vnetp
;
1588 /* Setup kstats for hio resource */
1589 if (vresp
->type
== VIO_NET_RES_HYBRID
) {
1590 vresp
->ksp
= vnet_hio_setup_kstats(DRV_NAME
,
1592 if (vresp
->ksp
== NULL
) {
1593 cmn_err(CE_NOTE
, "!vnet%d: Cannot "
1594 "create kstats for hio resource",
1598 vnet_add_resource(vnetp
, vresp
);
1601 vnetp
= vnetp
->nextp
;
1604 if (vresp
->vnetp
== NULL
) {
1605 DWARN(NULL
, "No vnet instance");
1606 kmem_free(vresp
, sizeof (vnet_res_t
));
1611 vcb
->vio_net_rx_cb
= vnet_rx
;
1612 vcb
->vio_net_tx_update
= vnet_tx_update
;
1613 vcb
->vio_net_report_err
= vnet_handle_res_err
;
1615 /* Bind the resource to pseudo ring(s) */
1616 if (vnet_bind_rings(vresp
) != 0) {
1617 (void) vnet_rem_resource(vnetp
, vresp
);
1618 vnet_hio_destroy_kstats(vresp
->ksp
);
1623 /* Dispatch a task to start resources */
1624 vnet_dispatch_res_task(vnetp
);
1629 * vio_net_resource_unreg -- An interface to unregister a resource.
1632 vio_net_resource_unreg(vio_net_handle_t vhp
)
1634 vnet_res_t
*vresp
= (vnet_res_t
*)vhp
;
1635 vnet_t
*vnetp
= vresp
->vnetp
;
1637 DBG1(NULL
, "Resource Registerig hdl=0x%p", vhp
);
1639 ASSERT(vnetp
!= NULL
);
1641 * Remove the resource from fdb; this ensures
1642 * there are no references to the resource.
1644 vnet_fdbe_del(vnetp
, vresp
);
1646 vnet_unbind_rings(vresp
);
1648 /* Now remove the resource from the list */
1649 (void) vnet_rem_resource(vnetp
, vresp
);
1651 vnet_hio_destroy_kstats(vresp
->ksp
);
1656 vnet_add_resource(vnet_t
*vnetp
, vnet_res_t
*vresp
)
1658 WRITE_ENTER(&vnetp
->vrwlock
);
1659 vresp
->nextp
= vnetp
->vres_list
;
1660 vnetp
->vres_list
= vresp
;
1661 RW_EXIT(&vnetp
->vrwlock
);
1665 vnet_rem_resource(vnet_t
*vnetp
, vnet_res_t
*vresp
)
1669 WRITE_ENTER(&vnetp
->vrwlock
);
1670 if (vresp
== vnetp
->vres_list
) {
1671 vnetp
->vres_list
= vresp
->nextp
;
1673 vrp
= vnetp
->vres_list
;
1674 while (vrp
->nextp
!= NULL
) {
1675 if (vrp
->nextp
== vresp
) {
1676 vrp
->nextp
= vresp
->nextp
;
1682 vresp
->vnetp
= NULL
;
1683 vresp
->nextp
= NULL
;
1685 RW_EXIT(&vnetp
->vrwlock
);
1691 * vnet_dds_rx -- an interface called by vgen to DDS messages.
1694 vnet_dds_rx(void *arg
, void *dmsg
)
1696 vnet_t
*vnetp
= arg
;
1697 vdds_process_dds_msg(vnetp
, dmsg
);
1701 * vnet_send_dds_msg -- An interface provided to DDS to send
1702 * DDS messages. This simply sends meessages via vgen.
1705 vnet_send_dds_msg(vnet_t
*vnetp
, void *dmsg
)
1709 if (vnetp
->vgenhdl
!= NULL
) {
1710 rv
= vgen_dds_tx(vnetp
->vgenhdl
, dmsg
);
1716 * vnet_cleanup_hio -- an interface called by vgen to cleanup hio resources.
1719 vnet_dds_cleanup_hio(vnet_t
*vnetp
)
1721 vdds_cleanup_hio(vnetp
);
1725 * vnet_handle_res_err -- A callback function called by a resource
1726 * to report an error. For example, vgen can call to report
1727 * an LDC down/reset event. This will trigger cleanup of associated
1732 vnet_handle_res_err(vio_net_handle_t vrh
, vio_net_err_val_t err
)
1734 vnet_res_t
*vresp
= (vnet_res_t
*)vrh
;
1735 vnet_t
*vnetp
= vresp
->vnetp
;
1737 if (vnetp
== NULL
) {
1740 if ((vresp
->type
!= VIO_NET_RES_LDC_SERVICE
) &&
1741 (vresp
->type
!= VIO_NET_RES_HYBRID
)) {
1745 vdds_cleanup_hio(vnetp
);
1749 * vnet_dispatch_res_task -- A function to dispatch tasks start resources.
1752 vnet_dispatch_res_task(vnet_t
*vnetp
)
1757 * Dispatch the task. It could be the case that vnetp->flags does
1758 * not have VNET_STARTED set. This is ok as vnet_rest_start_task()
1759 * can abort the task when the task is started. See related comments
1760 * in vnet_m_stop() and vnet_stop_resources().
1762 rv
= ddi_taskq_dispatch(vnetp
->taskqp
, vnet_res_start_task
,
1763 vnetp
, DDI_NOSLEEP
);
1764 if (rv
!= DDI_SUCCESS
) {
1766 "vnet%d:Can't dispatch start resource task",
1772 * vnet_res_start_task -- A taskq callback function that starts a resource.
1775 vnet_res_start_task(void *arg
)
1777 vnet_t
*vnetp
= arg
;
1779 WRITE_ENTER(&vnetp
->vrwlock
);
1780 if (vnetp
->flags
& VNET_STARTED
) {
1781 vnet_start_resources(vnetp
);
1783 RW_EXIT(&vnetp
->vrwlock
);
1787 * vnet_start_resources -- starts all resources associated with
1791 vnet_start_resources(vnet_t
*vnetp
)
1793 mac_register_t
*macp
;
1794 mac_callbacks_t
*cbp
;
1798 DBG1(vnetp
, "enter\n");
1800 ASSERT(RW_WRITE_HELD(&vnetp
->vrwlock
));
1802 for (vresp
= vnetp
->vres_list
; vresp
!= NULL
; vresp
= vresp
->nextp
) {
1803 /* skip if it is already started */
1804 if (vresp
->flags
& VNET_STARTED
) {
1807 macp
= &vresp
->macreg
;
1808 cbp
= macp
->m_callbacks
;
1809 rv
= cbp
->mc_start(macp
->m_driver
);
1812 * Successfully started the resource, so now
1813 * add it to the fdb.
1815 vresp
->flags
|= VNET_STARTED
;
1816 vnet_fdbe_add(vnetp
, vresp
);
1820 DBG1(vnetp
, "exit\n");
1825 * vnet_stop_resources -- stop all resources associated with a vnet.
1828 vnet_stop_resources(vnet_t
*vnetp
)
1831 mac_register_t
*macp
;
1832 mac_callbacks_t
*cbp
;
1834 DBG1(vnetp
, "enter\n");
1836 ASSERT(RW_WRITE_HELD(&vnetp
->vrwlock
));
1838 for (vresp
= vnetp
->vres_list
; vresp
!= NULL
; ) {
1839 if (vresp
->flags
& VNET_STARTED
) {
1841 * Release the lock while invoking mc_stop() of the
1842 * underlying resource. We hold a reference to this
1843 * resource to prevent being removed from the list in
1844 * vio_net_resource_unreg(). Note that new resources
1845 * can be added to the head of the list while the lock
1846 * is released, but they won't be started, as
1847 * VNET_STARTED flag has been cleared for the vnet
1848 * device in vnet_m_stop(). Also, while the lock is
1849 * released a resource could be removed from the list
1850 * in vio_net_resource_unreg(); but that is ok, as we
1851 * re-acquire the lock and only then access the forward
1852 * link (vresp->nextp) to continue with the next
1855 vresp
->flags
&= ~VNET_STARTED
;
1856 vresp
->flags
|= VNET_STOPPING
;
1857 macp
= &vresp
->macreg
;
1858 cbp
= macp
->m_callbacks
;
1859 VNET_FDBE_REFHOLD(vresp
);
1860 RW_EXIT(&vnetp
->vrwlock
);
1862 cbp
->mc_stop(macp
->m_driver
);
1864 WRITE_ENTER(&vnetp
->vrwlock
);
1865 vresp
->flags
&= ~VNET_STOPPING
;
1866 VNET_FDBE_REFRELE(vresp
);
1868 vresp
= vresp
->nextp
;
1870 DBG1(vnetp
, "exit\n");
1874 * Setup kstats for the HIO statistics.
1875 * NOTE: the synchronization for the statistics is the
1876 * responsibility of the caller.
1879 vnet_hio_setup_kstats(char *ks_mod
, char *ks_name
, vnet_res_t
*vresp
)
1882 vnet_t
*vnetp
= vresp
->vnetp
;
1883 vnet_hio_kstats_t
*hiokp
;
1886 ASSERT(vnetp
!= NULL
);
1887 size
= sizeof (vnet_hio_kstats_t
) / sizeof (kstat_named_t
);
1888 ksp
= kstat_create(ks_mod
, vnetp
->instance
, ks_name
, "net",
1889 KSTAT_TYPE_NAMED
, size
, 0);
1894 hiokp
= (vnet_hio_kstats_t
*)ksp
->ks_data
;
1895 kstat_named_init(&hiokp
->ipackets
, "ipackets",
1897 kstat_named_init(&hiokp
->ierrors
, "ierrors",
1899 kstat_named_init(&hiokp
->opackets
, "opackets",
1901 kstat_named_init(&hiokp
->oerrors
, "oerrors",
1905 /* MIB II kstat variables */
1906 kstat_named_init(&hiokp
->rbytes
, "rbytes",
1908 kstat_named_init(&hiokp
->obytes
, "obytes",
1910 kstat_named_init(&hiokp
->multircv
, "multircv",
1912 kstat_named_init(&hiokp
->multixmt
, "multixmt",
1914 kstat_named_init(&hiokp
->brdcstrcv
, "brdcstrcv",
1916 kstat_named_init(&hiokp
->brdcstxmt
, "brdcstxmt",
1918 kstat_named_init(&hiokp
->norcvbuf
, "norcvbuf",
1920 kstat_named_init(&hiokp
->noxmtbuf
, "noxmtbuf",
1923 ksp
->ks_update
= vnet_hio_update_kstats
;
1924 ksp
->ks_private
= (void *)vresp
;
1933 vnet_hio_destroy_kstats(kstat_t
*ksp
)
1940 * Update the kstats.
1943 vnet_hio_update_kstats(kstat_t
*ksp
, int rw
)
1947 vnet_hio_stats_t statsp
;
1948 vnet_hio_kstats_t
*hiokp
;
1950 vresp
= (vnet_res_t
*)ksp
->ks_private
;
1951 vnetp
= vresp
->vnetp
;
1953 bzero(&statsp
, sizeof (vnet_hio_stats_t
));
1955 READ_ENTER(&vnetp
->vsw_fp_rw
);
1956 if (vnetp
->hio_fp
== NULL
) {
1957 /* not using hio resources, just return */
1958 RW_EXIT(&vnetp
->vsw_fp_rw
);
1961 VNET_FDBE_REFHOLD(vnetp
->hio_fp
);
1962 RW_EXIT(&vnetp
->vsw_fp_rw
);
1963 vnet_hio_get_stats(vnetp
->hio_fp
, &statsp
);
1964 VNET_FDBE_REFRELE(vnetp
->hio_fp
);
1966 hiokp
= (vnet_hio_kstats_t
*)ksp
->ks_data
;
1968 if (rw
== KSTAT_READ
) {
1969 /* Link Input/Output stats */
1970 hiokp
->ipackets
.value
.ul
= (uint32_t)statsp
.ipackets
;
1971 hiokp
->ipackets64
.value
.ull
= statsp
.ipackets
;
1972 hiokp
->ierrors
.value
.ul
= statsp
.ierrors
;
1973 hiokp
->opackets
.value
.ul
= (uint32_t)statsp
.opackets
;
1974 hiokp
->opackets64
.value
.ull
= statsp
.opackets
;
1975 hiokp
->oerrors
.value
.ul
= statsp
.oerrors
;
1977 /* MIB II kstat variables */
1978 hiokp
->rbytes
.value
.ul
= (uint32_t)statsp
.rbytes
;
1979 hiokp
->rbytes64
.value
.ull
= statsp
.rbytes
;
1980 hiokp
->obytes
.value
.ul
= (uint32_t)statsp
.obytes
;
1981 hiokp
->obytes64
.value
.ull
= statsp
.obytes
;
1982 hiokp
->multircv
.value
.ul
= statsp
.multircv
;
1983 hiokp
->multixmt
.value
.ul
= statsp
.multixmt
;
1984 hiokp
->brdcstrcv
.value
.ul
= statsp
.brdcstrcv
;
1985 hiokp
->brdcstxmt
.value
.ul
= statsp
.brdcstxmt
;
1986 hiokp
->norcvbuf
.value
.ul
= statsp
.norcvbuf
;
1987 hiokp
->noxmtbuf
.value
.ul
= statsp
.noxmtbuf
;
1996 vnet_hio_get_stats(vnet_res_t
*vresp
, vnet_hio_stats_t
*statsp
)
1998 mac_register_t
*macp
;
1999 mac_callbacks_t
*cbp
;
2004 * get the specified statistics from the underlying nxge.
2006 macp
= &vresp
->macreg
;
2007 cbp
= macp
->m_callbacks
;
2008 for (stat
= MAC_STAT_MIN
; stat
< MAC_STAT_OVERFLOWS
; stat
++) {
2009 if (cbp
->mc_getstat(macp
->m_driver
, stat
, &val
) == 0) {
2011 case MAC_STAT_IPACKETS
:
2012 statsp
->ipackets
= val
;
2015 case MAC_STAT_IERRORS
:
2016 statsp
->ierrors
= val
;
2019 case MAC_STAT_OPACKETS
:
2020 statsp
->opackets
= val
;
2023 case MAC_STAT_OERRORS
:
2024 statsp
->oerrors
= val
;
2027 case MAC_STAT_RBYTES
:
2028 statsp
->rbytes
= val
;
2031 case MAC_STAT_OBYTES
:
2032 statsp
->obytes
= val
;
2035 case MAC_STAT_MULTIRCV
:
2036 statsp
->multircv
= val
;
2039 case MAC_STAT_MULTIXMT
:
2040 statsp
->multixmt
= val
;
2043 case MAC_STAT_BRDCSTRCV
:
2044 statsp
->brdcstrcv
= val
;
2047 case MAC_STAT_BRDCSTXMT
:
2048 statsp
->brdcstxmt
= val
;
2051 case MAC_STAT_NOXMTBUF
:
2052 statsp
->noxmtbuf
= val
;
2055 case MAC_STAT_NORCVBUF
:
2056 statsp
->norcvbuf
= val
;
2061 * parameters not interested.
2070 vnet_m_capab(void *arg
, mac_capab_t cap
, void *cap_data
)
2072 vnet_t
*vnetp
= (vnet_t
*)arg
;
2074 if (vnetp
== NULL
) {
2080 case MAC_CAPAB_RINGS
: {
2082 mac_capab_rings_t
*cap_rings
= cap_data
;
2084 * Rings Capability Notes:
2085 * We advertise rings to make use of the rings framework in
2086 * gldv3 mac layer, to improve the performance. This is
2087 * specifically needed when a Hybrid resource (with multiple
2088 * tx/rx hardware rings) is assigned to a vnet device. We also
2089 * leverage this for the normal case when no Hybrid resource is
2094 * We expose a pseudo ring group with 2 pseudo tx rings (as
2095 * currently HybridIO exports only 2 rings) In the normal case,
2096 * transmit traffic that comes down to the driver through the
2097 * mri_tx (vnet_tx_ring_send()) entry point goes through the
2098 * distributed switching algorithm in vnet and gets transmitted
2099 * over a port/LDC in the vgen layer to either the vswitch or a
2100 * peer vnet. If and when a Hybrid resource is assigned to the
2101 * vnet, we obtain the tx ring information of the Hybrid device
2102 * (nxge) and map the pseudo rings 1:1 to the 2 hw tx rings.
2103 * Traffic being sent over the Hybrid resource by the mac layer
2104 * gets spread across both hw rings, as they are mapped to the
2105 * 2 pseudo tx rings in vnet.
2108 * We expose a pseudo ring group with 3 pseudo rx rings (static
2109 * rings) initially. The first (default) pseudo rx ring is
2110 * reserved for the resource that connects to the vswitch
2111 * service. The next 2 rings are reserved for a Hybrid resource
2112 * that may be assigned to the vnet device. If and when a
2113 * Hybrid resource is assigned to the vnet, we obtain the rx
2114 * ring information of the Hybrid device (nxge) and map these
2115 * pseudo rings 1:1 to the 2 hw rx rings. For each additional
2116 * resource that connects to a peer vnet, we dynamically
2117 * allocate a pseudo rx ring and map it to that resource, when
2118 * the resource gets added; and the pseudo rx ring is
2119 * dynamically registered with the upper mac layer. We do the
2120 * reverse and unregister the ring with the mac layer when
2121 * the resource gets removed.
2123 * Synchronization notes:
2124 * We don't need any lock to protect members of ring structure,
2125 * specifically ringp->hw_rh, in either the TX or the RX ring,
2126 * as explained below.
2128 * ring->hw_rh is initialized only when a Hybrid resource is
2129 * associated; and gets referenced only in vnet_hio_tx(). The
2130 * Hybrid resource itself is available in fdb only after tx
2131 * hwrings are found and mapped; i.e, in vio_net_resource_reg()
2132 * we call vnet_bind_rings() first and then call
2133 * vnet_start_resources() which adds an entry to fdb. For
2134 * traffic going over LDC resources, we don't reference
2135 * ring->hw_rh at all.
2137 * For rings mapped to Hybrid resource ring->hw_rh is
2138 * initialized and only then do we add the rx callback for
2139 * the underlying Hybrid resource; we disable callbacks before
2140 * we unmap ring->hw_rh. For rings mapped to LDC resources, we
2141 * stop the rx callbacks (in vgen) before we remove ring->hw_rh
2142 * (vio_net_resource_unreg()).
2143 * Also, we access ring->hw_rh in vnet_rx_ring_stat().
2144 * Note that for rings mapped to Hybrid resource, though the
2145 * rings are statically registered with the mac layer, its
2146 * hardware ring mapping (ringp->hw_rh) can be torn down in
2147 * vnet_unbind_hwrings() while the kstat operation is in
2148 * progress. To protect against this, we hold a reference to
2149 * the resource in FDB; this ensures that the thread in
2150 * vio_net_resource_unreg() waits for the reference to be
2151 * dropped before unbinding the ring.
2153 * We don't need to do this for rings mapped to LDC resources.
2154 * These rings are registered/unregistered dynamically with
2155 * the mac layer and so any attempt to unregister the ring
2156 * while kstat operation is in progress will block in
2157 * mac_group_rem_ring(). Thus implicitly protects the
2158 * resource (ringp->hw_rh) from disappearing.
2161 if (cap_rings
->mr_type
== MAC_RING_TYPE_RX
) {
2162 cap_rings
->mr_group_type
= MAC_GROUP_TYPE_STATIC
;
2165 * The ring_cnt for rx grp is initialized in
2166 * vnet_ring_grp_init(). Later, the ring_cnt gets
2167 * updated dynamically whenever LDC resources are added
2170 cap_rings
->mr_rnum
= vnetp
->rx_grp
[0].ring_cnt
;
2171 cap_rings
->mr_rget
= vnet_get_ring
;
2173 cap_rings
->mr_gnum
= VNET_NUM_PSEUDO_GROUPS
;
2174 cap_rings
->mr_gget
= vnet_get_group
;
2175 cap_rings
->mr_gaddring
= NULL
;
2176 cap_rings
->mr_gremring
= NULL
;
2178 cap_rings
->mr_group_type
= MAC_GROUP_TYPE_STATIC
;
2181 * The ring_cnt for tx grp is initialized in
2182 * vnet_ring_grp_init() and remains constant, as we
2183 * do not support dymanic tx rings for now.
2185 cap_rings
->mr_rnum
= vnetp
->tx_grp
[0].ring_cnt
;
2186 cap_rings
->mr_rget
= vnet_get_ring
;
2189 * Transmit rings are not grouped; i.e, the number of
2190 * transmit ring groups advertised should be set to 0.
2192 cap_rings
->mr_gnum
= 0;
2194 cap_rings
->mr_gget
= vnet_get_group
;
2195 cap_rings
->mr_gaddring
= NULL
;
2196 cap_rings
->mr_gremring
= NULL
;
2211 * Callback funtion for MAC layer to get ring information.
2214 vnet_get_ring(void *arg
, mac_ring_type_t rtype
, const int g_index
,
2215 const int r_index
, mac_ring_info_t
*infop
, mac_ring_handle_t r_handle
)
2217 vnet_t
*vnetp
= arg
;
2221 case MAC_RING_TYPE_RX
: {
2223 vnet_pseudo_rx_group_t
*rx_grp
;
2224 vnet_pseudo_rx_ring_t
*rx_ringp
;
2227 /* We advertised only one RX group */
2228 ASSERT(g_index
== 0);
2229 rx_grp
= &vnetp
->rx_grp
[g_index
];
2231 /* Check the current # of rings in the rx group */
2232 ASSERT((r_index
>= 0) && (r_index
< rx_grp
->max_ring_cnt
));
2234 /* Get the ring based on the index */
2235 rx_ringp
= &rx_grp
->rings
[r_index
];
2237 rx_ringp
->handle
= r_handle
;
2239 * Note: we don't need to save the incoming r_index in rx_ring,
2240 * as vnet_ring_grp_init() would have initialized the index for
2241 * each ring in the array.
2243 rx_ringp
->grp
= rx_grp
;
2244 rx_ringp
->vnetp
= vnetp
;
2246 mintr
= &infop
->mri_intr
;
2247 mintr
->mi_handle
= (mac_intr_handle_t
)rx_ringp
;
2248 mintr
->mi_enable
= (mac_intr_enable_t
)vnet_ring_enable_intr
;
2249 mintr
->mi_disable
= (mac_intr_disable_t
)vnet_ring_disable_intr
;
2251 infop
->mri_driver
= (mac_ring_driver_t
)rx_ringp
;
2252 infop
->mri_start
= vnet_rx_ring_start
;
2253 infop
->mri_stop
= vnet_rx_ring_stop
;
2254 infop
->mri_stat
= vnet_rx_ring_stat
;
2256 /* Set the poll function, as this is an rx ring */
2257 infop
->mri_poll
= vnet_rx_poll
;
2259 * MAC_RING_RX_ENQUEUE bit needed to be set for nxge
2260 * which was not sending packet chains in interrupt
2261 * context. For such drivers, packets are queued in
2262 * Rx soft rings so that we get a chance to switch
2263 * into a polling mode under backlog. This bug (not
2264 * sending packet chains) has now been fixed. Once
2265 * the performance impact is measured, this change
2268 infop
->mri_flags
= (vnet_mac_rx_queuing
?
2269 MAC_RING_RX_ENQUEUE
: 0);
2273 case MAC_RING_TYPE_TX
: {
2274 vnet_pseudo_tx_group_t
*tx_grp
;
2275 vnet_pseudo_tx_ring_t
*tx_ringp
;
2278 * No need to check grp index; mac layer passes -1 for it.
2280 tx_grp
= &vnetp
->tx_grp
[0];
2282 /* Check the # of rings in the tx group */
2283 ASSERT((r_index
>= 0) && (r_index
< tx_grp
->ring_cnt
));
2285 /* Get the ring based on the index */
2286 tx_ringp
= &tx_grp
->rings
[r_index
];
2288 tx_ringp
->handle
= r_handle
;
2289 tx_ringp
->index
= r_index
;
2290 tx_ringp
->grp
= tx_grp
;
2291 tx_ringp
->vnetp
= vnetp
;
2293 infop
->mri_driver
= (mac_ring_driver_t
)tx_ringp
;
2294 infop
->mri_start
= vnet_tx_ring_start
;
2295 infop
->mri_stop
= vnet_tx_ring_stop
;
2296 infop
->mri_stat
= vnet_tx_ring_stat
;
2298 /* Set the transmit function, as this is a tx ring */
2299 infop
->mri_tx
= vnet_tx_ring_send
;
2301 * MAC_RING_TX_SERIALIZE bit needs to be set while
2302 * hybridIO is enabled to workaround tx lock
2303 * contention issues in nxge.
2305 infop
->mri_flags
= (vnet_mac_tx_serialize
?
2306 MAC_RING_TX_SERIALIZE
: 0);
2316 * Callback funtion for MAC layer to get group information.
2319 vnet_get_group(void *arg
, mac_ring_type_t type
, const int index
,
2320 mac_group_info_t
*infop
, mac_group_handle_t handle
)
2322 vnet_t
*vnetp
= (vnet_t
*)arg
;
2326 case MAC_RING_TYPE_RX
:
2328 vnet_pseudo_rx_group_t
*rx_grp
;
2330 /* We advertised only one RX group */
2333 rx_grp
= &vnetp
->rx_grp
[index
];
2334 rx_grp
->handle
= handle
;
2335 rx_grp
->index
= index
;
2336 rx_grp
->vnetp
= vnetp
;
2338 infop
->mgi_driver
= (mac_group_driver_t
)rx_grp
;
2339 infop
->mgi_start
= NULL
;
2340 infop
->mgi_stop
= NULL
;
2341 infop
->mgi_addmac
= vnet_addmac
;
2342 infop
->mgi_remmac
= vnet_remmac
;
2343 infop
->mgi_count
= rx_grp
->ring_cnt
;
2348 case MAC_RING_TYPE_TX
:
2350 vnet_pseudo_tx_group_t
*tx_grp
;
2352 /* We advertised only one TX group */
2355 tx_grp
= &vnetp
->tx_grp
[index
];
2356 tx_grp
->handle
= handle
;
2357 tx_grp
->index
= index
;
2358 tx_grp
->vnetp
= vnetp
;
2360 infop
->mgi_driver
= (mac_group_driver_t
)tx_grp
;
2361 infop
->mgi_start
= NULL
;
2362 infop
->mgi_stop
= NULL
;
2363 infop
->mgi_addmac
= NULL
;
2364 infop
->mgi_remmac
= NULL
;
2365 infop
->mgi_count
= VNET_NUM_PSEUDO_TXRINGS
;
2377 vnet_rx_ring_start(mac_ring_driver_t arg
, uint64_t mr_gen_num
)
2379 vnet_pseudo_rx_ring_t
*rx_ringp
= (vnet_pseudo_rx_ring_t
*)arg
;
2383 * If this ring is mapped to a LDC resource, simply mark the state to
2384 * indicate the ring is started and return.
2386 if ((rx_ringp
->state
&
2387 (VNET_RXRING_LDC_SERVICE
|VNET_RXRING_LDC_GUEST
)) != 0) {
2388 rx_ringp
->gen_num
= mr_gen_num
;
2389 rx_ringp
->state
|= VNET_RXRING_STARTED
;
2393 ASSERT((rx_ringp
->state
& VNET_RXRING_HYBRID
) != 0);
2396 * This must be a ring reserved for a hwring. If the hwring is not
2397 * bound yet, simply mark the state to indicate the ring is started and
2398 * return. If and when a hybrid resource is activated for this vnet
2399 * device, we will bind the hwring and start it then. If a hwring is
2400 * already bound, start it now.
2402 if (rx_ringp
->hw_rh
== NULL
) {
2403 rx_ringp
->gen_num
= mr_gen_num
;
2404 rx_ringp
->state
|= VNET_RXRING_STARTED
;
2408 err
= mac_hwring_start(rx_ringp
->hw_rh
);
2410 rx_ringp
->gen_num
= mr_gen_num
;
2411 rx_ringp
->state
|= VNET_RXRING_STARTED
;
2420 vnet_rx_ring_stop(mac_ring_driver_t arg
)
2422 vnet_pseudo_rx_ring_t
*rx_ringp
= (vnet_pseudo_rx_ring_t
*)arg
;
2425 * If this ring is mapped to a LDC resource, simply mark the state to
2426 * indicate the ring is now stopped and return.
2428 if ((rx_ringp
->state
&
2429 (VNET_RXRING_LDC_SERVICE
|VNET_RXRING_LDC_GUEST
)) != 0) {
2430 rx_ringp
->state
&= ~VNET_RXRING_STARTED
;
2434 ASSERT((rx_ringp
->state
& VNET_RXRING_HYBRID
) != 0);
2437 * This must be a ring reserved for a hwring. If the hwring is not
2438 * bound yet, simply mark the state to indicate the ring is stopped and
2439 * return. If a hwring is already bound, stop it now.
2441 if (rx_ringp
->hw_rh
== NULL
) {
2442 rx_ringp
->state
&= ~VNET_RXRING_STARTED
;
2446 mac_hwring_stop(rx_ringp
->hw_rh
);
2447 rx_ringp
->state
&= ~VNET_RXRING_STARTED
;
2451 vnet_rx_ring_stat(mac_ring_driver_t rdriver
, uint_t stat
, uint64_t *val
)
2453 vnet_pseudo_rx_ring_t
*rx_ringp
= (vnet_pseudo_rx_ring_t
*)rdriver
;
2454 vnet_t
*vnetp
= (vnet_t
*)rx_ringp
->vnetp
;
2456 mac_register_t
*macp
;
2457 mac_callbacks_t
*cbp
;
2460 * Refer to vnet_m_capab() function for detailed comments on ring
2463 if ((rx_ringp
->state
& VNET_RXRING_HYBRID
) != 0) {
2464 READ_ENTER(&vnetp
->vsw_fp_rw
);
2465 if (vnetp
->hio_fp
== NULL
) {
2466 RW_EXIT(&vnetp
->vsw_fp_rw
);
2470 VNET_FDBE_REFHOLD(vnetp
->hio_fp
);
2471 RW_EXIT(&vnetp
->vsw_fp_rw
);
2472 (void) mac_hwring_getstat(rx_ringp
->hw_rh
, stat
, val
);
2473 VNET_FDBE_REFRELE(vnetp
->hio_fp
);
2477 ASSERT((rx_ringp
->state
&
2478 (VNET_RXRING_LDC_SERVICE
|VNET_RXRING_LDC_GUEST
)) != 0);
2479 vresp
= (vnet_res_t
*)rx_ringp
->hw_rh
;
2480 macp
= &vresp
->macreg
;
2481 cbp
= macp
->m_callbacks
;
2483 cbp
->mc_getstat(macp
->m_driver
, stat
, val
);
2490 vnet_tx_ring_start(mac_ring_driver_t arg
, uint64_t mr_gen_num
)
2492 vnet_pseudo_tx_ring_t
*tx_ringp
= (vnet_pseudo_tx_ring_t
*)arg
;
2494 tx_ringp
->state
|= VNET_TXRING_STARTED
;
2499 vnet_tx_ring_stop(mac_ring_driver_t arg
)
2501 vnet_pseudo_tx_ring_t
*tx_ringp
= (vnet_pseudo_tx_ring_t
*)arg
;
2503 tx_ringp
->state
&= ~VNET_TXRING_STARTED
;
2507 vnet_tx_ring_stat(mac_ring_driver_t rdriver
, uint_t stat
, uint64_t *val
)
2509 vnet_pseudo_tx_ring_t
*tx_ringp
= (vnet_pseudo_tx_ring_t
*)rdriver
;
2510 vnet_tx_ring_stats_t
*statsp
;
2512 statsp
= &tx_ringp
->tx_ring_stats
;
2515 case MAC_STAT_OPACKETS
:
2516 *val
= statsp
->opackets
;
2519 case MAC_STAT_OBYTES
:
2520 *val
= statsp
->obytes
;
2532 * Disable polling for a ring and enable its interrupt.
2535 vnet_ring_enable_intr(void *arg
)
2537 vnet_pseudo_rx_ring_t
*rx_ringp
= (vnet_pseudo_rx_ring_t
*)arg
;
2540 if (rx_ringp
->hw_rh
== NULL
) {
2542 * Ring enable intr func is being invoked, but the ring is
2543 * not bound to any underlying resource ? This must be a ring
2544 * reserved for Hybrid resource and no such resource has been
2545 * assigned to this vnet device yet. We simply return success.
2547 ASSERT((rx_ringp
->state
& VNET_RXRING_HYBRID
) != 0);
2552 * The rx ring has been bound to either a LDC or a Hybrid resource.
2553 * Call the appropriate function to enable interrupts for the ring.
2555 if (rx_ringp
->state
& VNET_RXRING_HYBRID
) {
2556 return (mac_hwring_enable_intr(rx_ringp
->hw_rh
));
2558 vresp
= (vnet_res_t
*)rx_ringp
->hw_rh
;
2559 return (vgen_enable_intr(vresp
->macreg
.m_driver
));
2564 * Enable polling for a ring and disable its interrupt.
2567 vnet_ring_disable_intr(void *arg
)
2569 vnet_pseudo_rx_ring_t
*rx_ringp
= (vnet_pseudo_rx_ring_t
*)arg
;
2572 if (rx_ringp
->hw_rh
== NULL
) {
2574 * Ring disable intr func is being invoked, but the ring is
2575 * not bound to any underlying resource ? This must be a ring
2576 * reserved for Hybrid resource and no such resource has been
2577 * assigned to this vnet device yet. We simply return success.
2579 ASSERT((rx_ringp
->state
& VNET_RXRING_HYBRID
) != 0);
2584 * The rx ring has been bound to either a LDC or a Hybrid resource.
2585 * Call the appropriate function to disable interrupts for the ring.
2587 if (rx_ringp
->state
& VNET_RXRING_HYBRID
) {
2588 return (mac_hwring_disable_intr(rx_ringp
->hw_rh
));
2590 vresp
= (vnet_res_t
*)rx_ringp
->hw_rh
;
2591 return (vgen_disable_intr(vresp
->macreg
.m_driver
));
2596 * Poll 'bytes_to_pickup' bytes of message from the rx ring.
2599 vnet_rx_poll(void *arg
, int bytes_to_pickup
)
2601 vnet_pseudo_rx_ring_t
*rx_ringp
= (vnet_pseudo_rx_ring_t
*)arg
;
2604 vnet_t
*vnetp
= rx_ringp
->vnetp
;
2606 if (rx_ringp
->hw_rh
== NULL
) {
2610 if (rx_ringp
->state
& VNET_RXRING_HYBRID
) {
2611 mp
= mac_hwring_poll(rx_ringp
->hw_rh
, bytes_to_pickup
);
2613 * Packets received over a hybrid resource need additional
2614 * processing to remove the tag, for the pvid case. The
2615 * underlying resource is not aware of the vnet's pvid and thus
2616 * packets are received with the vlan tag in the header; unlike
2617 * packets that are received over a ldc channel in which case
2618 * the peer vnet/vsw would have already removed the tag.
2620 if (vnetp
->pvid
!= vnetp
->default_vlan_id
) {
2621 vnet_rx_frames_untag(vnetp
->pvid
, &mp
);
2624 vresp
= (vnet_res_t
*)rx_ringp
->hw_rh
;
2625 mp
= vgen_rx_poll(vresp
->macreg
.m_driver
, bytes_to_pickup
);
2632 vnet_hio_rx_cb(void *arg
, mac_resource_handle_t mrh
, mblk_t
*mp
,
2635 vnet_t
*vnetp
= (vnet_t
*)arg
;
2636 vnet_pseudo_rx_ring_t
*ringp
= (vnet_pseudo_rx_ring_t
*)mrh
;
2639 * Packets received over a hybrid resource need additional processing
2640 * to remove the tag, for the pvid case. The underlying resource is
2641 * not aware of the vnet's pvid and thus packets are received with the
2642 * vlan tag in the header; unlike packets that are received over a ldc
2643 * channel in which case the peer vnet/vsw would have already removed
2646 if (vnetp
->pvid
!= vnetp
->default_vlan_id
) {
2647 vnet_rx_frames_untag(vnetp
->pvid
, &mp
);
2652 mac_rx_ring(vnetp
->mh
, ringp
->handle
, mp
, ringp
->gen_num
);
2656 vnet_addmac(void *arg
, const uint8_t *mac_addr
)
2658 vnet_pseudo_rx_group_t
*rx_grp
= (vnet_pseudo_rx_group_t
*)arg
;
2661 vnetp
= rx_grp
->vnetp
;
2663 if (bcmp(mac_addr
, vnetp
->curr_macaddr
, ETHERADDRL
) == 0) {
2667 cmn_err(CE_CONT
, "!vnet%d: %s: Multiple macaddr unsupported\n",
2668 vnetp
->instance
, __func__
);
2673 vnet_remmac(void *arg
, const uint8_t *mac_addr
)
2675 vnet_pseudo_rx_group_t
*rx_grp
= (vnet_pseudo_rx_group_t
*)arg
;
2678 vnetp
= rx_grp
->vnetp
;
2680 if (bcmp(mac_addr
, vnetp
->curr_macaddr
, ETHERADDRL
) == 0) {
2684 cmn_err(CE_CONT
, "!vnet%d: %s: Invalid macaddr: %s\n",
2685 vnetp
->instance
, __func__
, ether_sprintf((void *)mac_addr
));
2690 vnet_hio_mac_init(vnet_t
*vnetp
, char *ifname
)
2693 mac_client_handle_t mch
= NULL
;
2694 mac_unicast_handle_t muh
= NULL
;
2696 mac_register_t
*macp
;
2697 char client_name
[MAXNAMELEN
];
2699 uint16_t mac_flags
= MAC_UNICAST_TAG_DISABLE
|
2700 MAC_UNICAST_STRIP_DISABLE
| MAC_UNICAST_PRIMARY
;
2701 vio_net_callbacks_t vcb
;
2702 ether_addr_t rem_addr
=
2703 { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
2704 uint32_t retries
= 0;
2706 if ((macp
= mac_alloc(MAC_VERSION
)) == NULL
) {
2711 rv
= mac_open_by_linkname(ifname
, &mh
);
2715 if (rv
!= ENOENT
|| (retries
++ >= vnet_mac_open_retries
)) {
2719 drv_usecwait(vnet_mac_open_delay
);
2720 } while (rv
== ENOENT
);
2724 (void) snprintf(client_name
, MAXNAMELEN
, "vnet%d-%s", vnetp
->instance
,
2726 rv
= mac_client_open(mh
, &mch
, client_name
, MAC_OPEN_FLAGS_EXCLUSIVE
);
2730 vnetp
->hio_mch
= mch
;
2732 rv
= mac_unicast_add(mch
, vnetp
->curr_macaddr
, mac_flags
, &muh
, 0,
2737 vnetp
->hio_muh
= muh
;
2739 macp
->m_type_ident
= MAC_PLUGIN_IDENT_ETHER
;
2740 macp
->m_driver
= vnetp
;
2742 macp
->m_src_addr
= NULL
;
2743 macp
->m_callbacks
= &vnet_hio_res_callbacks
;
2744 macp
->m_min_sdu
= 0;
2745 macp
->m_max_sdu
= ETHERMTU
;
2747 rv
= vio_net_resource_reg(macp
, VIO_NET_RES_HYBRID
,
2748 vnetp
->curr_macaddr
, rem_addr
, &vnetp
->hio_vhp
, &vcb
);
2754 /* add the recv callback */
2755 mac_rx_set(vnetp
->hio_mch
, vnet_hio_rx_cb
, vnetp
);
2761 vnet_hio_mac_cleanup(vnetp
);
2766 vnet_hio_mac_cleanup(vnet_t
*vnetp
)
2768 if (vnetp
->hio_vhp
!= NULL
) {
2769 vio_net_resource_unreg(vnetp
->hio_vhp
);
2770 vnetp
->hio_vhp
= NULL
;
2773 if (vnetp
->hio_muh
!= NULL
) {
2774 (void) mac_unicast_remove(vnetp
->hio_mch
, vnetp
->hio_muh
);
2775 vnetp
->hio_muh
= NULL
;
2778 if (vnetp
->hio_mch
!= NULL
) {
2779 mac_client_close(vnetp
->hio_mch
, 0);
2780 vnetp
->hio_mch
= NULL
;
2783 if (vnetp
->hio_mh
!= NULL
) {
2784 mac_close(vnetp
->hio_mh
);
2785 vnetp
->hio_mh
= NULL
;
2789 /* Bind pseudo rings to hwrings */
2791 vnet_bind_hwrings(vnet_t
*vnetp
)
2793 mac_ring_handle_t hw_rh
[VNET_NUM_HYBRID_RINGS
];
2794 mac_perim_handle_t mph1
;
2795 vnet_pseudo_rx_group_t
*rx_grp
;
2796 vnet_pseudo_rx_ring_t
*rx_ringp
;
2797 vnet_pseudo_tx_group_t
*tx_grp
;
2798 vnet_pseudo_tx_ring_t
*tx_ringp
;
2803 mac_perim_enter_by_mh(vnetp
->hio_mh
, &mph1
);
2805 /* Get the list of the underlying RX rings. */
2806 hw_ring_cnt
= mac_hwrings_get(vnetp
->hio_mch
, &vnetp
->rx_hwgh
, hw_rh
,
2809 /* We expect the the # of hw rx rings to match VNET_NUM_HYBRID_RINGS */
2810 if (hw_ring_cnt
!= VNET_NUM_HYBRID_RINGS
) {
2812 "!vnet%d: vnet_bind_hwrings: bad rx hw_ring_cnt(%d)\n",
2813 vnetp
->instance
, hw_ring_cnt
);
2817 if (vnetp
->rx_hwgh
!= NULL
) {
2819 * Quiesce the HW ring and the mac srs on the ring. Note
2820 * that the HW ring will be restarted when the pseudo ring
2821 * is started. At that time all the packets will be
2822 * directly passed up to the pseudo RX ring and handled
2823 * by mac srs created over the pseudo RX ring.
2825 mac_rx_client_quiesce(vnetp
->hio_mch
);
2826 mac_srs_perm_quiesce(vnetp
->hio_mch
, B_TRUE
);
2830 * Bind the pseudo rings to the hwrings and start the hwrings.
2831 * Note we don't need to register these with the upper mac, as we have
2832 * statically exported these pseudo rxrings which are reserved for
2833 * rxrings of Hybrid resource.
2835 rx_grp
= &vnetp
->rx_grp
[0];
2836 for (i
= 0; i
< VNET_NUM_HYBRID_RINGS
; i
++) {
2837 /* Pick the rxrings reserved for Hybrid resource */
2838 rx_ringp
= &rx_grp
->rings
[i
+ VNET_HYBRID_RXRING_INDEX
];
2840 /* Store the hw ring handle */
2841 rx_ringp
->hw_rh
= hw_rh
[i
];
2843 /* Bind the pseudo ring to the underlying hwring */
2844 mac_hwring_setup(rx_ringp
->hw_rh
,
2845 (mac_resource_handle_t
)rx_ringp
, NULL
);
2847 /* Start the hwring if needed */
2848 if (rx_ringp
->state
& VNET_RXRING_STARTED
) {
2849 rv
= mac_hwring_start(rx_ringp
->hw_rh
);
2851 mac_hwring_teardown(rx_ringp
->hw_rh
);
2852 rx_ringp
->hw_rh
= NULL
;
2858 /* Get the list of the underlying TX rings. */
2859 hw_ring_cnt
= mac_hwrings_get(vnetp
->hio_mch
, &vnetp
->tx_hwgh
, hw_rh
,
2862 /* We expect the # of hw tx rings to match VNET_NUM_HYBRID_RINGS */
2863 if (hw_ring_cnt
!= VNET_NUM_HYBRID_RINGS
) {
2865 "!vnet%d: vnet_bind_hwrings: bad tx hw_ring_cnt(%d)\n",
2866 vnetp
->instance
, hw_ring_cnt
);
2871 * Now map the pseudo txrings to the hw txrings. Note we don't need
2872 * to register these with the upper mac, as we have statically exported
2873 * these rings. Note that these rings will continue to be used for LDC
2874 * resources to peer vnets and vswitch (shared ring).
2876 tx_grp
= &vnetp
->tx_grp
[0];
2877 for (i
= 0; i
< tx_grp
->ring_cnt
; i
++) {
2878 tx_ringp
= &tx_grp
->rings
[i
];
2879 tx_ringp
->hw_rh
= hw_rh
[i
];
2880 tx_ringp
->state
|= VNET_TXRING_HYBRID
;
2882 tx_grp
->tx_notify_handle
=
2883 mac_client_tx_notify(vnetp
->hio_mch
, vnet_tx_ring_update
, vnetp
);
2885 mac_perim_exit(mph1
);
2889 mac_perim_exit(mph1
);
2890 vnet_unbind_hwrings(vnetp
);
2894 /* Unbind pseudo rings from hwrings */
2896 vnet_unbind_hwrings(vnet_t
*vnetp
)
2898 mac_perim_handle_t mph1
;
2899 vnet_pseudo_rx_ring_t
*rx_ringp
;
2900 vnet_pseudo_rx_group_t
*rx_grp
;
2901 vnet_pseudo_tx_group_t
*tx_grp
;
2902 vnet_pseudo_tx_ring_t
*tx_ringp
;
2905 mac_perim_enter_by_mh(vnetp
->hio_mh
, &mph1
);
2907 tx_grp
= &vnetp
->tx_grp
[0];
2908 for (i
= 0; i
< VNET_NUM_HYBRID_RINGS
; i
++) {
2909 tx_ringp
= &tx_grp
->rings
[i
];
2910 if (tx_ringp
->state
& VNET_TXRING_HYBRID
) {
2911 tx_ringp
->state
&= ~VNET_TXRING_HYBRID
;
2912 tx_ringp
->hw_rh
= NULL
;
2915 (void) mac_client_tx_notify(vnetp
->hio_mch
, NULL
,
2916 tx_grp
->tx_notify_handle
);
2918 rx_grp
= &vnetp
->rx_grp
[0];
2919 for (i
= 0; i
< VNET_NUM_HYBRID_RINGS
; i
++) {
2920 rx_ringp
= &rx_grp
->rings
[i
+ VNET_HYBRID_RXRING_INDEX
];
2921 if (rx_ringp
->hw_rh
!= NULL
) {
2922 /* Stop the hwring */
2923 mac_hwring_stop(rx_ringp
->hw_rh
);
2925 /* Teardown the hwring */
2926 mac_hwring_teardown(rx_ringp
->hw_rh
);
2927 rx_ringp
->hw_rh
= NULL
;
2931 if (vnetp
->rx_hwgh
!= NULL
) {
2932 vnetp
->rx_hwgh
= NULL
;
2934 * First clear the permanent-quiesced flag of the RX srs then
2935 * restart the HW ring and the mac srs on the ring.
2937 mac_srs_perm_quiesce(vnetp
->hio_mch
, B_FALSE
);
2938 mac_rx_client_restart(vnetp
->hio_mch
);
2941 mac_perim_exit(mph1
);
2944 /* Bind pseudo ring to a LDC resource */
2946 vnet_bind_vgenring(vnet_res_t
*vresp
)
2949 vnet_pseudo_rx_group_t
*rx_grp
;
2950 vnet_pseudo_rx_ring_t
*rx_ringp
;
2951 mac_perim_handle_t mph1
;
2955 vnetp
= vresp
->vnetp
;
2957 rx_grp
= &vnetp
->rx_grp
[0];
2959 if (type
== VIO_NET_RES_LDC_SERVICE
) {
2961 * Ring Index 0 is the default ring in the group and is
2962 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
2963 * is allocated statically and is reported to the mac layer
2964 * in vnet_m_capab(). So, all we need to do here, is save a
2965 * reference to the associated vresp.
2967 rx_ringp
= &rx_grp
->rings
[0];
2968 rx_ringp
->hw_rh
= (mac_ring_handle_t
)vresp
;
2969 vresp
->rx_ringp
= (void *)rx_ringp
;
2972 ASSERT(type
== VIO_NET_RES_LDC_GUEST
);
2974 mac_perim_enter_by_mh(vnetp
->mh
, &mph1
);
2976 rx_ringp
= vnet_alloc_pseudo_rx_ring(vnetp
);
2977 if (rx_ringp
== NULL
) {
2978 cmn_err(CE_WARN
, "!vnet%d: Failed to allocate pseudo rx ring",
2983 /* Store the LDC resource itself as the ring handle */
2984 rx_ringp
->hw_rh
= (mac_ring_handle_t
)vresp
;
2987 * Save a reference to the ring in the resource for lookup during
2988 * unbind. Note this is only done for LDC resources. We don't need this
2989 * in the case of a Hybrid resource (see vnet_bind_hwrings()), as its
2990 * rx rings are mapped to reserved pseudo rx rings (index 1 and 2).
2992 vresp
->rx_ringp
= (void *)rx_ringp
;
2993 rx_ringp
->state
|= VNET_RXRING_LDC_GUEST
;
2995 /* Register the pseudo ring with upper-mac */
2996 rv
= mac_group_add_ring(rx_grp
->handle
, rx_ringp
->index
);
2998 rx_ringp
->state
&= ~VNET_RXRING_LDC_GUEST
;
2999 rx_ringp
->hw_rh
= NULL
;
3000 vnet_free_pseudo_rx_ring(vnetp
, rx_ringp
);
3004 mac_perim_exit(mph1
);
3007 mac_perim_exit(mph1
);
3011 /* Unbind pseudo ring from a LDC resource */
3013 vnet_unbind_vgenring(vnet_res_t
*vresp
)
3016 vnet_pseudo_rx_group_t
*rx_grp
;
3017 vnet_pseudo_rx_ring_t
*rx_ringp
;
3018 mac_perim_handle_t mph1
;
3021 vnetp
= vresp
->vnetp
;
3023 rx_grp
= &vnetp
->rx_grp
[0];
3025 if (vresp
->rx_ringp
== NULL
) {
3029 if (type
== VIO_NET_RES_LDC_SERVICE
) {
3031 * Ring Index 0 is the default ring in the group and is
3032 * reserved for LDC_SERVICE in vnet_ring_grp_init(). This ring
3033 * is allocated statically and is reported to the mac layer
3034 * in vnet_m_capab(). So, all we need to do here, is remove its
3035 * reference to the associated vresp.
3037 rx_ringp
= &rx_grp
->rings
[0];
3038 rx_ringp
->hw_rh
= NULL
;
3039 vresp
->rx_ringp
= NULL
;
3042 ASSERT(type
== VIO_NET_RES_LDC_GUEST
);
3044 mac_perim_enter_by_mh(vnetp
->mh
, &mph1
);
3046 rx_ringp
= (vnet_pseudo_rx_ring_t
*)vresp
->rx_ringp
;
3047 vresp
->rx_ringp
= NULL
;
3049 if (rx_ringp
!= NULL
&& (rx_ringp
->state
& VNET_RXRING_LDC_GUEST
)) {
3050 /* Unregister the pseudo ring with upper-mac */
3051 mac_group_rem_ring(rx_grp
->handle
, rx_ringp
->handle
);
3053 rx_ringp
->hw_rh
= NULL
;
3054 rx_ringp
->state
&= ~VNET_RXRING_LDC_GUEST
;
3056 /* Free the pseudo rx ring */
3057 vnet_free_pseudo_rx_ring(vnetp
, rx_ringp
);
3060 mac_perim_exit(mph1
);
3064 vnet_unbind_rings(vnet_res_t
*vresp
)
3066 switch (vresp
->type
) {
3068 case VIO_NET_RES_LDC_SERVICE
:
3069 case VIO_NET_RES_LDC_GUEST
:
3070 vnet_unbind_vgenring(vresp
);
3073 case VIO_NET_RES_HYBRID
:
3074 vnet_unbind_hwrings(vresp
->vnetp
);
3084 vnet_bind_rings(vnet_res_t
*vresp
)
3088 switch (vresp
->type
) {
3090 case VIO_NET_RES_LDC_SERVICE
:
3091 case VIO_NET_RES_LDC_GUEST
:
3092 rv
= vnet_bind_vgenring(vresp
);
3095 case VIO_NET_RES_HYBRID
:
3096 rv
= vnet_bind_hwrings(vresp
->vnetp
);
3110 vnet_hio_stat(void *arg
, uint_t stat
, uint64_t *val
)
3112 vnet_t
*vnetp
= (vnet_t
*)arg
;
3114 *val
= mac_stat_get(vnetp
->hio_mh
, stat
);
3119 * The start() and stop() routines for the Hybrid resource below, are just
3120 * dummy functions. This is provided to avoid resource type specific code in
3121 * vnet_start_resources() and vnet_stop_resources(). The starting and stopping
3122 * of the Hybrid resource happens in the context of the mac_client interfaces
3123 * that are invoked in vnet_hio_mac_init() and vnet_hio_mac_cleanup().
3127 vnet_hio_start(void *arg
)
3134 vnet_hio_stop(void *arg
)
3139 vnet_hio_tx(void *arg
, mblk_t
*mp
)
3141 vnet_pseudo_tx_ring_t
*tx_ringp
;
3145 tx_ringp
= (vnet_pseudo_tx_ring_t
*)arg
;
3150 ret_mp
= mac_hwring_tx(tx_ringp
->hw_rh
, mp
);
3151 if (ret_mp
!= NULL
) {
3152 ret_mp
->b_next
= nextp
;
3157 if ((mp
= nextp
) == NULL
)
3163 #ifdef VNET_IOC_DEBUG
3166 * The ioctl entry point is used only for debugging for now. The ioctl commands
3167 * can be used to force the link state of the channel connected to vsw.
3170 vnet_m_ioctl(void *arg
, queue_t
*q
, mblk_t
*mp
)
3172 struct iocblk
*iocp
;
3175 iocp
= (struct iocblk
*)(uintptr_t)mp
->b_rptr
;
3176 iocp
->ioc_error
= 0;
3177 vnetp
= (vnet_t
*)arg
;
3179 if (vnetp
== NULL
) {
3180 miocnak(q
, mp
, 0, EINVAL
);
3184 switch (iocp
->ioc_cmd
) {
3186 case VNET_FORCE_LINK_DOWN
:
3187 case VNET_FORCE_LINK_UP
:
3188 vnet_force_link_state(vnetp
, q
, mp
);
3192 iocp
->ioc_error
= EINVAL
;
3193 miocnak(q
, mp
, 0, iocp
->ioc_error
);
3200 vnet_force_link_state(vnet_t
*vnetp
, queue_t
*q
, mblk_t
*mp
)
3202 mac_register_t
*macp
;
3203 mac_callbacks_t
*cbp
;
3206 READ_ENTER(&vnetp
->vsw_fp_rw
);
3208 vresp
= vnetp
->vsw_fp
;
3209 if (vresp
== NULL
) {
3210 RW_EXIT(&vnetp
->vsw_fp_rw
);
3214 macp
= &vresp
->macreg
;
3215 cbp
= macp
->m_callbacks
;
3216 cbp
->mc_ioctl(macp
->m_driver
, q
, mp
);
3218 RW_EXIT(&vnetp
->vsw_fp_rw
);
3224 vnet_m_ioctl(void *arg
, queue_t
*q
, mblk_t
*mp
)
3228 vnetp
= (vnet_t
*)arg
;
3230 if (vnetp
== NULL
) {
3231 miocnak(q
, mp
, 0, EINVAL
);
3235 /* ioctl support only for debugging */
3236 miocnak(q
, mp
, 0, ENOTSUP
);