2 * xen paravirt network card backend
4 * (c) Gerd Hoffmann <kraxel@redhat.com>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; under version 2 of the License.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, see <http://www.gnu.org/licenses/>.
18 * Contributions after 2012-01-13 are licensed under the terms of the
19 * GNU GPL, version 2 or (at your option) any later version.
22 #include "qemu/osdep.h"
23 #include "qemu/main-loop.h"
24 #include "qemu/cutils.h"
26 #include "qemu/qemu-print.h"
27 #include "qapi/qmp/qdict.h"
28 #include "qapi/error.h"
30 #include <sys/socket.h>
31 #include <sys/ioctl.h>
35 #include "net/checksum.h"
38 #include "hw/xen/xen-backend.h"
39 #include "hw/xen/xen-bus-helper.h"
40 #include "hw/qdev-properties.h"
41 #include "hw/qdev-properties-system.h"
43 #include "hw/xen/interface/io/netif.h"
44 #include "hw/xen/interface/io/xs_wire.h"
48 /* ------------------------------------------------------------- */
51 struct XenDevice xendev
; /* must be first */
52 XenEventChannel
*event_channel
;
55 unsigned int tx_ring_ref
;
56 unsigned int rx_ring_ref
;
57 struct netif_tx_sring
*txs
;
58 struct netif_rx_sring
*rxs
;
59 netif_tx_back_ring_t tx_ring
;
60 netif_rx_back_ring_t rx_ring
;
65 typedef struct XenNetDev XenNetDev
;
67 #define TYPE_XEN_NET_DEVICE "xen-net-device"
68 OBJECT_DECLARE_SIMPLE_TYPE(XenNetDev
, XEN_NET_DEVICE
)
70 /* ------------------------------------------------------------- */
72 static void net_tx_response(struct XenNetDev
*netdev
, netif_tx_request_t
*txp
, int8_t st
)
74 RING_IDX i
= netdev
->tx_ring
.rsp_prod_pvt
;
75 netif_tx_response_t
*resp
;
78 resp
= RING_GET_RESPONSE(&netdev
->tx_ring
, i
);
83 if (txp
->flags
& NETTXF_extra_info
) {
84 RING_GET_RESPONSE(&netdev
->tx_ring
, ++i
)->status
= NETIF_RSP_NULL
;
88 netdev
->tx_ring
.rsp_prod_pvt
= ++i
;
89 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netdev
->tx_ring
, notify
);
91 xen_device_notify_event_channel(XEN_DEVICE(netdev
),
92 netdev
->event_channel
, NULL
);
95 if (i
== netdev
->tx_ring
.req_cons
) {
97 RING_FINAL_CHECK_FOR_REQUESTS(&netdev
->tx_ring
, more_to_do
);
104 static void net_tx_error(struct XenNetDev
*netdev
, netif_tx_request_t
*txp
, RING_IDX end
)
108 * Hmm, why netback fails everything in the ring?
109 * Should we do that even when not supporting SG and TSO?
111 RING_IDX cons
= netdev
->tx_ring
.req_cons
;
114 make_tx_response(netif
, txp
, NETIF_RSP_ERROR
);
118 txp
= RING_GET_REQUEST(&netdev
->tx_ring
, cons
++);
120 netdev
->tx_ring
.req_cons
= cons
;
121 netif_schedule_work(netif
);
124 net_tx_response(netdev
, txp
, NETIF_RSP_ERROR
);
128 static bool net_tx_packets(struct XenNetDev
*netdev
)
130 bool done_something
= false;
131 netif_tx_request_t txreq
;
136 assert(bql_locked());
139 rc
= netdev
->tx_ring
.req_cons
;
140 rp
= netdev
->tx_ring
.sring
->req_prod
;
141 xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
144 if (RING_REQUEST_CONS_OVERFLOW(&netdev
->tx_ring
, rc
)) {
147 memcpy(&txreq
, RING_GET_REQUEST(&netdev
->tx_ring
, rc
), sizeof(txreq
));
148 netdev
->tx_ring
.req_cons
= ++rc
;
149 done_something
= true;
152 /* should not happen in theory, we don't announce the *
153 * feature-{sg,gso,whatelse} flags in xenstore (yet?) */
154 if (txreq
.flags
& NETTXF_extra_info
) {
155 qemu_log_mask(LOG_UNIMP
, "vif%u: FIXME: extra info flag\n",
157 net_tx_error(netdev
, &txreq
, rc
);
160 if (txreq
.flags
& NETTXF_more_data
) {
161 qemu_log_mask(LOG_UNIMP
, "vif%u: FIXME: more data flag\n",
163 net_tx_error(netdev
, &txreq
, rc
);
168 if (txreq
.size
< 14) {
169 qemu_log_mask(LOG_GUEST_ERROR
, "vif%u: bad packet size: %d\n",
170 netdev
->dev
, txreq
.size
);
171 net_tx_error(netdev
, &txreq
, rc
);
175 if ((txreq
.offset
+ txreq
.size
) > XEN_PAGE_SIZE
) {
176 qemu_log_mask(LOG_GUEST_ERROR
, "vif%u: error: page crossing\n",
178 net_tx_error(netdev
, &txreq
, rc
);
182 trace_xen_netdev_tx(netdev
->dev
, txreq
.gref
, txreq
.offset
,
183 txreq
.size
, txreq
.flags
,
184 (txreq
.flags
& NETTXF_csum_blank
) ? " csum_blank" : "",
185 (txreq
.flags
& NETTXF_data_validated
) ? " data_validated" : "",
186 (txreq
.flags
& NETTXF_more_data
) ? " more_data" : "",
187 (txreq
.flags
& NETTXF_extra_info
) ? " extra_info" : "");
189 page
= xen_device_map_grant_refs(&netdev
->xendev
, &txreq
.gref
, 1,
192 qemu_log_mask(LOG_GUEST_ERROR
,
193 "vif%u: tx gref dereference failed (%d)\n",
194 netdev
->dev
, txreq
.gref
);
195 net_tx_error(netdev
, &txreq
, rc
);
198 if (txreq
.flags
& NETTXF_csum_blank
) {
199 /* have read-only mapping -> can't fill checksum in-place */
201 tmpbuf
= g_malloc(XEN_PAGE_SIZE
);
203 memcpy(tmpbuf
, page
+ txreq
.offset
, txreq
.size
);
204 net_checksum_calculate(tmpbuf
, txreq
.size
, CSUM_ALL
);
205 qemu_send_packet(qemu_get_queue(netdev
->nic
), tmpbuf
,
208 qemu_send_packet(qemu_get_queue(netdev
->nic
),
209 page
+ txreq
.offset
, txreq
.size
);
211 xen_device_unmap_grant_refs(&netdev
->xendev
, page
, &txreq
.gref
, 1,
213 net_tx_response(netdev
, &txreq
, NETIF_RSP_OKAY
);
215 if (!netdev
->tx_work
) {
221 return done_something
;
224 /* ------------------------------------------------------------- */
226 static void net_rx_response(struct XenNetDev
*netdev
,
227 netif_rx_request_t
*req
, int8_t st
,
228 uint16_t offset
, uint16_t size
,
231 RING_IDX i
= netdev
->rx_ring
.rsp_prod_pvt
;
232 netif_rx_response_t
*resp
;
235 resp
= RING_GET_RESPONSE(&netdev
->rx_ring
, i
);
236 resp
->offset
= offset
;
239 resp
->status
= (int16_t)size
;
241 resp
->status
= (int16_t)st
;
244 trace_xen_netdev_rx(netdev
->dev
, i
, resp
->status
, resp
->flags
);
246 netdev
->rx_ring
.rsp_prod_pvt
= ++i
;
247 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netdev
->rx_ring
, notify
);
249 xen_device_notify_event_channel(XEN_DEVICE(netdev
),
250 netdev
->event_channel
, NULL
);
254 #define NET_IP_ALIGN 2
256 static ssize_t
net_rx_packet(NetClientState
*nc
, const uint8_t *buf
, size_t size
)
258 struct XenNetDev
*netdev
= qemu_get_nic_opaque(nc
);
259 netif_rx_request_t rxreq
;
263 assert(bql_locked());
265 if (xen_device_backend_get_state(&netdev
->xendev
) != XenbusStateConnected
) {
269 rc
= netdev
->rx_ring
.req_cons
;
270 rp
= netdev
->rx_ring
.sring
->req_prod
;
271 xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
273 if (rc
== rp
|| RING_REQUEST_CONS_OVERFLOW(&netdev
->rx_ring
, rc
)) {
276 if (size
> XEN_PAGE_SIZE
- NET_IP_ALIGN
) {
277 qemu_log_mask(LOG_GUEST_ERROR
, "vif%u: packet too big (%lu > %ld)",
278 netdev
->dev
, (unsigned long)size
,
279 XEN_PAGE_SIZE
- NET_IP_ALIGN
);
283 memcpy(&rxreq
, RING_GET_REQUEST(&netdev
->rx_ring
, rc
), sizeof(rxreq
));
284 netdev
->rx_ring
.req_cons
= ++rc
;
286 page
= xen_device_map_grant_refs(&netdev
->xendev
, &rxreq
.gref
, 1,
289 qemu_log_mask(LOG_GUEST_ERROR
,
290 "vif%u: rx gref dereference failed (%d)\n",
291 netdev
->dev
, rxreq
.gref
);
292 net_rx_response(netdev
, &rxreq
, NETIF_RSP_ERROR
, 0, 0, 0);
295 memcpy(page
+ NET_IP_ALIGN
, buf
, size
);
296 xen_device_unmap_grant_refs(&netdev
->xendev
, page
, &rxreq
.gref
, 1, NULL
);
297 net_rx_response(netdev
, &rxreq
, NETIF_RSP_OKAY
, NET_IP_ALIGN
, size
, 0);
302 /* ------------------------------------------------------------- */
304 static NetClientInfo net_xen_info
= {
305 .type
= NET_CLIENT_DRIVER_NIC
,
306 .size
= sizeof(NICState
),
307 .receive
= net_rx_packet
,
310 static void xen_netdev_realize(XenDevice
*xendev
, Error
**errp
)
313 XenNetDev
*netdev
= XEN_NET_DEVICE(xendev
);
316 qemu_macaddr_default_if_unset(&netdev
->conf
.macaddr
);
318 xen_device_frontend_printf(xendev
, "mac", "%02x:%02x:%02x:%02x:%02x:%02x",
319 netdev
->conf
.macaddr
.a
[0],
320 netdev
->conf
.macaddr
.a
[1],
321 netdev
->conf
.macaddr
.a
[2],
322 netdev
->conf
.macaddr
.a
[3],
323 netdev
->conf
.macaddr
.a
[4],
324 netdev
->conf
.macaddr
.a
[5]);
326 netdev
->nic
= qemu_new_nic(&net_xen_info
, &netdev
->conf
,
327 object_get_typename(OBJECT(xendev
)),
329 &xendev
->qdev
.mem_reentrancy_guard
, netdev
);
331 nc
= qemu_get_queue(netdev
->nic
);
332 qemu_format_nic_info_str(nc
, netdev
->conf
.macaddr
.a
);
335 xen_device_backend_printf(xendev
, "feature-rx-copy", "%u", 1);
336 xen_device_backend_printf(xendev
, "feature-rx-flip", "%u", 0);
338 trace_xen_netdev_realize(netdev
->dev
, nc
->info_str
, nc
->peer
?
339 nc
->peer
->name
: "(none)");
342 static bool net_event(void *_xendev
)
344 XenNetDev
*netdev
= XEN_NET_DEVICE(_xendev
);
347 done_something
= net_tx_packets(netdev
);
348 qemu_flush_queued_packets(qemu_get_queue(netdev
->nic
));
349 return done_something
;
352 static bool xen_netdev_connect(XenDevice
*xendev
, Error
**errp
)
355 XenNetDev
*netdev
= XEN_NET_DEVICE(xendev
);
356 unsigned int port
, rx_copy
;
358 assert(bql_locked());
360 if (xen_device_frontend_scanf(xendev
, "tx-ring-ref", "%u",
361 &netdev
->tx_ring_ref
) != 1) {
362 error_setg(errp
, "failed to read tx-ring-ref");
366 if (xen_device_frontend_scanf(xendev
, "rx-ring-ref", "%u",
367 &netdev
->rx_ring_ref
) != 1) {
368 error_setg(errp
, "failed to read rx-ring-ref");
372 if (xen_device_frontend_scanf(xendev
, "event-channel", "%u",
374 error_setg(errp
, "failed to read event-channel");
378 if (xen_device_frontend_scanf(xendev
, "request-rx-copy", "%u",
383 error_setg(errp
, "frontend doesn't support rx-copy");
387 netdev
->txs
= xen_device_map_grant_refs(xendev
,
388 &netdev
->tx_ring_ref
, 1,
389 PROT_READ
| PROT_WRITE
,
392 error_prepend(errp
, "failed to map tx grant ref: ");
396 netdev
->rxs
= xen_device_map_grant_refs(xendev
,
397 &netdev
->rx_ring_ref
, 1,
398 PROT_READ
| PROT_WRITE
,
401 error_prepend(errp
, "failed to map rx grant ref: ");
405 BACK_RING_INIT(&netdev
->tx_ring
, netdev
->txs
, XEN_PAGE_SIZE
);
406 BACK_RING_INIT(&netdev
->rx_ring
, netdev
->rxs
, XEN_PAGE_SIZE
);
408 netdev
->event_channel
= xen_device_bind_event_channel(xendev
, port
,
412 if (!netdev
->event_channel
) {
416 trace_xen_netdev_connect(netdev
->dev
, netdev
->tx_ring_ref
,
417 netdev
->rx_ring_ref
, port
);
419 net_tx_packets(netdev
);
423 static void xen_netdev_disconnect(XenDevice
*xendev
, Error
**errp
)
425 XenNetDev
*netdev
= XEN_NET_DEVICE(xendev
);
427 trace_xen_netdev_disconnect(netdev
->dev
);
429 assert(bql_locked());
431 netdev
->tx_ring
.sring
= NULL
;
432 netdev
->rx_ring
.sring
= NULL
;
434 if (netdev
->event_channel
) {
435 xen_device_unbind_event_channel(xendev
, netdev
->event_channel
,
437 netdev
->event_channel
= NULL
;
440 xen_device_unmap_grant_refs(xendev
, netdev
->txs
,
441 &netdev
->tx_ring_ref
, 1, errp
);
445 xen_device_unmap_grant_refs(xendev
, netdev
->rxs
,
446 &netdev
->rx_ring_ref
, 1, errp
);
451 /* -------------------------------------------------------------------- */
454 static void xen_netdev_frontend_changed(XenDevice
*xendev
,
455 enum xenbus_state frontend_state
,
459 enum xenbus_state backend_state
= xen_device_backend_get_state(xendev
);
461 trace_xen_netdev_frontend_changed(xendev
->name
, frontend_state
);
463 switch (frontend_state
) {
464 case XenbusStateConnected
:
465 if (backend_state
== XenbusStateConnected
) {
469 xen_netdev_disconnect(xendev
, errp
);
474 if (!xen_netdev_connect(xendev
, errp
)) {
475 xen_netdev_disconnect(xendev
, NULL
);
476 xen_device_backend_set_state(xendev
, XenbusStateClosing
);
480 xen_device_backend_set_state(xendev
, XenbusStateConnected
);
483 case XenbusStateClosing
:
484 xen_device_backend_set_state(xendev
, XenbusStateClosing
);
487 case XenbusStateClosed
:
488 case XenbusStateUnknown
:
489 xen_netdev_disconnect(xendev
, errp
);
494 xen_device_backend_set_state(xendev
, XenbusStateClosed
);
497 case XenbusStateInitialised
:
499 * Linux netback does nothing on the frontend going (back) to
500 * XenbusStateInitialised, so do the same here.
507 static char *xen_netdev_get_name(XenDevice
*xendev
, Error
**errp
)
509 XenNetDev
*netdev
= XEN_NET_DEVICE(xendev
);
511 if (netdev
->dev
== -1) {
512 XenBus
*xenbus
= XEN_BUS(qdev_get_parent_bus(DEVICE(xendev
)));
513 char fe_path
[XENSTORE_ABS_PATH_MAX
+ 1];
514 int idx
= (xen_mode
== XEN_EMULATE
) ? 0 : 1;
517 /* Theoretically we could go up to INT_MAX here but that's overkill */
519 snprintf(fe_path
, sizeof(fe_path
),
520 "/local/domain/%u/device/vif/%u",
521 xendev
->frontend_id
, idx
);
522 value
= qemu_xen_xs_read(xenbus
->xsh
, XBT_NULL
, fe_path
, NULL
);
524 if (errno
== ENOENT
) {
528 error_setg(errp
, "cannot read %s: %s", fe_path
,
535 error_setg(errp
, "cannot find device index for netdev device");
539 return g_strdup_printf("%u", netdev
->dev
);
542 static void xen_netdev_unrealize(XenDevice
*xendev
)
544 XenNetDev
*netdev
= XEN_NET_DEVICE(xendev
);
546 trace_xen_netdev_unrealize(netdev
->dev
);
548 /* Disconnect from the frontend in case this has not already happened */
549 xen_netdev_disconnect(xendev
, NULL
);
552 qemu_del_nic(netdev
->nic
);
556 /* ------------------------------------------------------------- */
558 static Property xen_netdev_properties
[] = {
559 DEFINE_NIC_PROPERTIES(XenNetDev
, conf
),
560 DEFINE_PROP_INT32("idx", XenNetDev
, dev
, -1),
561 DEFINE_PROP_END_OF_LIST(),
564 static void xen_netdev_class_init(ObjectClass
*class, void *data
)
566 DeviceClass
*dev_class
= DEVICE_CLASS(class);
567 XenDeviceClass
*xendev_class
= XEN_DEVICE_CLASS(class);
569 xendev_class
->backend
= "qnic";
570 xendev_class
->device
= "vif";
571 xendev_class
->get_name
= xen_netdev_get_name
;
572 xendev_class
->realize
= xen_netdev_realize
;
573 xendev_class
->frontend_changed
= xen_netdev_frontend_changed
;
574 xendev_class
->unrealize
= xen_netdev_unrealize
;
575 set_bit(DEVICE_CATEGORY_NETWORK
, dev_class
->categories
);
576 dev_class
->user_creatable
= true;
578 device_class_set_props(dev_class
, xen_netdev_properties
);
581 static const TypeInfo xen_net_type_info
= {
582 .name
= TYPE_XEN_NET_DEVICE
,
583 .parent
= TYPE_XEN_DEVICE
,
584 .instance_size
= sizeof(XenNetDev
),
585 .class_init
= xen_netdev_class_init
,
588 static void xen_net_register_types(void)
590 type_register_static(&xen_net_type_info
);
593 type_init(xen_net_register_types
)
595 /* Called to instantiate a XenNetDev when the backend is detected. */
596 static void xen_net_device_create(XenBackendInstance
*backend
,
597 QDict
*opts
, Error
**errp
)
600 XenBus
*xenbus
= xen_backend_get_bus(backend
);
601 const char *name
= xen_backend_get_name(backend
);
602 XenDevice
*xendev
= NULL
;
603 unsigned long number
;
608 if (qemu_strtoul(name
, NULL
, 10, &number
) || number
>= INT_MAX
) {
609 error_setg(errp
, "failed to parse name '%s'", name
);
613 trace_xen_netdev_create(number
);
615 macstr
= qdict_get_try_str(opts
, "mac");
616 if (macstr
== NULL
) {
617 error_setg(errp
, "no MAC address found");
621 if (net_parse_macaddr(mac
.a
, macstr
) < 0) {
622 error_setg(errp
, "failed to parse MAC address");
626 xendev
= XEN_DEVICE(qdev_new(TYPE_XEN_NET_DEVICE
));
627 net
= XEN_NET_DEVICE(xendev
);
630 memcpy(&net
->conf
.macaddr
, &mac
, sizeof(mac
));
632 if (qdev_realize_and_unref(DEVICE(xendev
), BUS(xenbus
), errp
)) {
633 xen_backend_set_device(backend
, xendev
);
637 error_prepend(errp
, "realization of net device %lu failed: ",
642 object_unparent(OBJECT(xendev
));
646 static void xen_net_device_destroy(XenBackendInstance
*backend
,
650 XenDevice
*xendev
= xen_backend_get_device(backend
);
651 XenNetDev
*netdev
= XEN_NET_DEVICE(xendev
);
653 trace_xen_netdev_destroy(netdev
->dev
);
655 object_unparent(OBJECT(xendev
));
658 static const XenBackendInfo xen_net_backend_info
= {
660 .create
= xen_net_device_create
,
661 .destroy
= xen_net_device_destroy
,
664 static void xen_net_register_backend(void)
666 xen_backend_register(&xen_net_backend_info
);
669 xen_backend_init(xen_net_register_backend
);