2 * QEMU VMWARE VMXNET* paravirtual NICs - TX packets abstractions
4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
6 * Developed by Daynix Computing LTD (http://www.daynix.com)
9 * Dmitry Fleytman <dmitry@daynix.com>
10 * Tamir Shomer <tamirs@daynix.com>
11 * Yan Vugenfirer <yan@daynix.com>
13 * This work is licensed under the terms of the GNU GPL, version 2 or later.
14 * See the COPYING file in the top-level directory.
18 #include "qemu/osdep.h"
20 #include "vmxnet_tx_pkt.h"
22 #include "qemu-common.h"
24 #include "net/checksum.h"
29 VMXNET_TX_PKT_VHDR_FRAG
= 0,
30 VMXNET_TX_PKT_L2HDR_FRAG
,
31 VMXNET_TX_PKT_L3HDR_FRAG
,
32 VMXNET_TX_PKT_PL_START_FRAG
35 /* TX packet private context */
37 struct virtio_net_hdr virt_hdr
;
42 uint32_t max_raw_frags
;
46 uint8_t l2_hdr
[ETH_MAX_L2_HDR_LEN
];
50 uint32_t payload_frags
;
51 uint32_t max_payload_frags
;
54 eth_pkt_types_e packet_type
;
58 void vmxnet_tx_pkt_init(struct VmxnetTxPkt
**pkt
, uint32_t max_frags
,
61 struct VmxnetTxPkt
*p
= g_malloc0(sizeof *p
);
63 p
->vec
= g_malloc((sizeof *p
->vec
) *
64 (max_frags
+ VMXNET_TX_PKT_PL_START_FRAG
));
66 p
->raw
= g_malloc((sizeof *p
->raw
) * max_frags
);
68 p
->max_payload_frags
= max_frags
;
69 p
->max_raw_frags
= max_frags
;
70 p
->has_virt_hdr
= has_virt_hdr
;
71 p
->vec
[VMXNET_TX_PKT_VHDR_FRAG
].iov_base
= &p
->virt_hdr
;
72 p
->vec
[VMXNET_TX_PKT_VHDR_FRAG
].iov_len
=
73 p
->has_virt_hdr
? sizeof p
->virt_hdr
: 0;
74 p
->vec
[VMXNET_TX_PKT_L2HDR_FRAG
].iov_base
= &p
->l2_hdr
;
75 p
->vec
[VMXNET_TX_PKT_L3HDR_FRAG
].iov_base
= NULL
;
76 p
->vec
[VMXNET_TX_PKT_L3HDR_FRAG
].iov_len
= 0;
81 void vmxnet_tx_pkt_uninit(struct VmxnetTxPkt
*pkt
)
90 void vmxnet_tx_pkt_update_ip_checksums(struct VmxnetTxPkt
*pkt
)
95 uint8_t gso_type
= pkt
->virt_hdr
.gso_type
& ~VIRTIO_NET_HDR_GSO_ECN
;
96 struct ip_header
*ip_hdr
;
98 if (VIRTIO_NET_HDR_GSO_TCPV4
!= gso_type
&&
99 VIRTIO_NET_HDR_GSO_UDP
!= gso_type
) {
103 ip_hdr
= pkt
->vec
[VMXNET_TX_PKT_L3HDR_FRAG
].iov_base
;
105 if (pkt
->payload_len
+ pkt
->vec
[VMXNET_TX_PKT_L3HDR_FRAG
].iov_len
>
106 ETH_MAX_IP_DGRAM_LEN
) {
110 ip_hdr
->ip_len
= cpu_to_be16(pkt
->payload_len
+
111 pkt
->vec
[VMXNET_TX_PKT_L3HDR_FRAG
].iov_len
);
113 /* Calculate IP header checksum */
115 csum
= net_raw_checksum((uint8_t *)ip_hdr
,
116 pkt
->vec
[VMXNET_TX_PKT_L3HDR_FRAG
].iov_len
);
117 ip_hdr
->ip_sum
= cpu_to_be16(csum
);
119 /* Calculate IP pseudo header checksum */
120 ph_raw_csum
= eth_calc_pseudo_hdr_csum(ip_hdr
, pkt
->payload_len
);
121 csum
= cpu_to_be16(~net_checksum_finish(ph_raw_csum
));
122 iov_from_buf(&pkt
->vec
[VMXNET_TX_PKT_PL_START_FRAG
], pkt
->payload_frags
,
123 pkt
->virt_hdr
.csum_offset
, &csum
, sizeof(csum
));
126 static void vmxnet_tx_pkt_calculate_hdr_len(struct VmxnetTxPkt
*pkt
)
128 pkt
->hdr_len
= pkt
->vec
[VMXNET_TX_PKT_L2HDR_FRAG
].iov_len
+
129 pkt
->vec
[VMXNET_TX_PKT_L3HDR_FRAG
].iov_len
;
132 static bool vmxnet_tx_pkt_parse_headers(struct VmxnetTxPkt
*pkt
)
134 struct iovec
*l2_hdr
, *l3_hdr
;
136 size_t full_ip6hdr_len
;
141 l2_hdr
= &pkt
->vec
[VMXNET_TX_PKT_L2HDR_FRAG
];
142 l3_hdr
= &pkt
->vec
[VMXNET_TX_PKT_L3HDR_FRAG
];
144 bytes_read
= iov_to_buf(pkt
->raw
, pkt
->raw_frags
, 0, l2_hdr
->iov_base
,
146 if (bytes_read
< sizeof(struct eth_header
)) {
151 l2_hdr
->iov_len
= sizeof(struct eth_header
);
152 switch (be16_to_cpu(PKT_GET_ETH_HDR(l2_hdr
->iov_base
)->h_proto
)) {
154 l2_hdr
->iov_len
+= sizeof(struct vlan_header
);
157 l2_hdr
->iov_len
+= 2 * sizeof(struct vlan_header
);
161 if (bytes_read
< l2_hdr
->iov_len
) {
166 l3_proto
= eth_get_l3_proto(l2_hdr
->iov_base
, l2_hdr
->iov_len
);
170 l3_hdr
->iov_base
= g_malloc(ETH_MAX_IP4_HDR_LEN
);
172 bytes_read
= iov_to_buf(pkt
->raw
, pkt
->raw_frags
, l2_hdr
->iov_len
,
173 l3_hdr
->iov_base
, sizeof(struct ip_header
));
175 if (bytes_read
< sizeof(struct ip_header
)) {
180 l3_hdr
->iov_len
= IP_HDR_GET_LEN(l3_hdr
->iov_base
);
181 pkt
->l4proto
= ((struct ip_header
*) l3_hdr
->iov_base
)->ip_p
;
183 /* copy optional IPv4 header data */
184 bytes_read
= iov_to_buf(pkt
->raw
, pkt
->raw_frags
,
185 l2_hdr
->iov_len
+ sizeof(struct ip_header
),
186 l3_hdr
->iov_base
+ sizeof(struct ip_header
),
187 l3_hdr
->iov_len
- sizeof(struct ip_header
));
188 if (bytes_read
< l3_hdr
->iov_len
- sizeof(struct ip_header
)) {
195 if (!eth_parse_ipv6_hdr(pkt
->raw
, pkt
->raw_frags
, l2_hdr
->iov_len
,
196 &pkt
->l4proto
, &full_ip6hdr_len
)) {
201 l3_hdr
->iov_base
= g_malloc(full_ip6hdr_len
);
203 bytes_read
= iov_to_buf(pkt
->raw
, pkt
->raw_frags
, l2_hdr
->iov_len
,
204 l3_hdr
->iov_base
, full_ip6hdr_len
);
206 if (bytes_read
< full_ip6hdr_len
) {
210 l3_hdr
->iov_len
= full_ip6hdr_len
;
219 vmxnet_tx_pkt_calculate_hdr_len(pkt
);
220 pkt
->packet_type
= get_eth_packet_type(l2_hdr
->iov_base
);
224 static bool vmxnet_tx_pkt_rebuild_payload(struct VmxnetTxPkt
*pkt
)
226 size_t payload_len
= iov_size(pkt
->raw
, pkt
->raw_frags
) - pkt
->hdr_len
;
228 pkt
->payload_frags
= iov_copy(&pkt
->vec
[VMXNET_TX_PKT_PL_START_FRAG
],
229 pkt
->max_payload_frags
,
230 pkt
->raw
, pkt
->raw_frags
,
231 pkt
->hdr_len
, payload_len
);
233 if (pkt
->payload_frags
!= (uint32_t) -1) {
234 pkt
->payload_len
= payload_len
;
241 bool vmxnet_tx_pkt_parse(struct VmxnetTxPkt
*pkt
)
243 return vmxnet_tx_pkt_parse_headers(pkt
) &&
244 vmxnet_tx_pkt_rebuild_payload(pkt
);
247 struct virtio_net_hdr
*vmxnet_tx_pkt_get_vhdr(struct VmxnetTxPkt
*pkt
)
250 return &pkt
->virt_hdr
;
253 static uint8_t vmxnet_tx_pkt_get_gso_type(struct VmxnetTxPkt
*pkt
,
256 uint8_t rc
= VIRTIO_NET_HDR_GSO_NONE
;
259 l3_proto
= eth_get_l3_proto(pkt
->vec
[VMXNET_TX_PKT_L2HDR_FRAG
].iov_base
,
260 pkt
->vec
[VMXNET_TX_PKT_L2HDR_FRAG
].iov_len
);
266 rc
= eth_get_gso_type(l3_proto
, pkt
->vec
[VMXNET_TX_PKT_L3HDR_FRAG
].iov_base
,
273 void vmxnet_tx_pkt_build_vheader(struct VmxnetTxPkt
*pkt
, bool tso_enable
,
274 bool csum_enable
, uint32_t gso_size
)
276 struct tcp_hdr l4hdr
;
279 /* csum has to be enabled if tso is. */
280 assert(csum_enable
|| !tso_enable
);
282 pkt
->virt_hdr
.gso_type
= vmxnet_tx_pkt_get_gso_type(pkt
, tso_enable
);
284 switch (pkt
->virt_hdr
.gso_type
& ~VIRTIO_NET_HDR_GSO_ECN
) {
285 case VIRTIO_NET_HDR_GSO_NONE
:
286 pkt
->virt_hdr
.hdr_len
= 0;
287 pkt
->virt_hdr
.gso_size
= 0;
290 case VIRTIO_NET_HDR_GSO_UDP
:
291 pkt
->virt_hdr
.gso_size
= IP_FRAG_ALIGN_SIZE(gso_size
);
292 pkt
->virt_hdr
.hdr_len
= pkt
->hdr_len
+ sizeof(struct udp_header
);
295 case VIRTIO_NET_HDR_GSO_TCPV4
:
296 case VIRTIO_NET_HDR_GSO_TCPV6
:
297 iov_to_buf(&pkt
->vec
[VMXNET_TX_PKT_PL_START_FRAG
], pkt
->payload_frags
,
298 0, &l4hdr
, sizeof(l4hdr
));
299 pkt
->virt_hdr
.hdr_len
= pkt
->hdr_len
+ l4hdr
.th_off
* sizeof(uint32_t);
300 pkt
->virt_hdr
.gso_size
= IP_FRAG_ALIGN_SIZE(gso_size
);
304 g_assert_not_reached();
308 switch (pkt
->l4proto
) {
310 pkt
->virt_hdr
.flags
= VIRTIO_NET_HDR_F_NEEDS_CSUM
;
311 pkt
->virt_hdr
.csum_start
= pkt
->hdr_len
;
312 pkt
->virt_hdr
.csum_offset
= offsetof(struct tcp_hdr
, th_sum
);
315 pkt
->virt_hdr
.flags
= VIRTIO_NET_HDR_F_NEEDS_CSUM
;
316 pkt
->virt_hdr
.csum_start
= pkt
->hdr_len
;
317 pkt
->virt_hdr
.csum_offset
= offsetof(struct udp_hdr
, uh_sum
);
325 void vmxnet_tx_pkt_setup_vlan_header(struct VmxnetTxPkt
*pkt
, uint16_t vlan
)
330 eth_setup_vlan_headers(pkt
->vec
[VMXNET_TX_PKT_L2HDR_FRAG
].iov_base
,
333 /* update l2hdrlen */
335 pkt
->hdr_len
+= sizeof(struct vlan_header
);
336 pkt
->vec
[VMXNET_TX_PKT_L2HDR_FRAG
].iov_len
+=
337 sizeof(struct vlan_header
);
341 bool vmxnet_tx_pkt_add_raw_fragment(struct VmxnetTxPkt
*pkt
, hwaddr pa
,
344 hwaddr mapped_len
= 0;
345 struct iovec
*ventry
;
347 assert(pkt
->max_raw_frags
> pkt
->raw_frags
);
353 ventry
= &pkt
->raw
[pkt
->raw_frags
];
356 ventry
->iov_base
= cpu_physical_memory_map(pa
, &mapped_len
, false);
357 ventry
->iov_len
= mapped_len
;
358 pkt
->raw_frags
+= !!ventry
->iov_base
;
360 if ((ventry
->iov_base
== NULL
) || (len
!= mapped_len
)) {
367 eth_pkt_types_e
vmxnet_tx_pkt_get_packet_type(struct VmxnetTxPkt
*pkt
)
371 return pkt
->packet_type
;
374 size_t vmxnet_tx_pkt_get_total_len(struct VmxnetTxPkt
*pkt
)
378 return pkt
->hdr_len
+ pkt
->payload_len
;
381 void vmxnet_tx_pkt_dump(struct VmxnetTxPkt
*pkt
)
383 #ifdef VMXNET_TX_PKT_DEBUG
386 printf("TX PKT: hdr_len: %d, pkt_type: 0x%X, l2hdr_len: %lu, "
387 "l3hdr_len: %lu, payload_len: %u\n", pkt
->hdr_len
, pkt
->packet_type
,
388 pkt
->vec
[VMXNET_TX_PKT_L2HDR_FRAG
].iov_len
,
389 pkt
->vec
[VMXNET_TX_PKT_L3HDR_FRAG
].iov_len
, pkt
->payload_len
);
393 void vmxnet_tx_pkt_reset(struct VmxnetTxPkt
*pkt
)
397 /* no assert, as reset can be called before tx_pkt_init */
402 memset(&pkt
->virt_hdr
, 0, sizeof(pkt
->virt_hdr
));
404 g_free(pkt
->vec
[VMXNET_TX_PKT_L3HDR_FRAG
].iov_base
);
405 pkt
->vec
[VMXNET_TX_PKT_L3HDR_FRAG
].iov_base
= NULL
;
408 for (i
= VMXNET_TX_PKT_L2HDR_FRAG
;
409 i
< pkt
->payload_frags
+ VMXNET_TX_PKT_PL_START_FRAG
; i
++) {
410 pkt
->vec
[i
].iov_len
= 0;
412 pkt
->payload_len
= 0;
413 pkt
->payload_frags
= 0;
416 for (i
= 0; i
< pkt
->raw_frags
; i
++) {
417 assert(pkt
->raw
[i
].iov_base
);
418 cpu_physical_memory_unmap(pkt
->raw
[i
].iov_base
, pkt
->raw
[i
].iov_len
,
419 false, pkt
->raw
[i
].iov_len
);
420 pkt
->raw
[i
].iov_len
= 0;
425 pkt
->packet_type
= 0;
429 static void vmxnet_tx_pkt_do_sw_csum(struct VmxnetTxPkt
*pkt
)
431 struct iovec
*iov
= &pkt
->vec
[VMXNET_TX_PKT_L2HDR_FRAG
];
434 /* num of iovec without vhdr */
435 uint32_t iov_len
= pkt
->payload_frags
+ VMXNET_TX_PKT_PL_START_FRAG
- 1;
437 struct ip_header
*iphdr
;
438 size_t csum_offset
= pkt
->virt_hdr
.csum_start
+ pkt
->virt_hdr
.csum_offset
;
440 /* Put zero to checksum field */
441 iov_from_buf(iov
, iov_len
, csum_offset
, &csum
, sizeof csum
);
443 /* Calculate L4 TCP/UDP checksum */
444 csl
= pkt
->payload_len
;
448 net_checksum_add_iov(iov
, iov_len
, pkt
->virt_hdr
.csum_start
, csl
);
449 /* add pseudo header to csum */
450 iphdr
= pkt
->vec
[VMXNET_TX_PKT_L3HDR_FRAG
].iov_base
;
451 csum_cntr
+= eth_calc_pseudo_hdr_csum(iphdr
, csl
);
453 /* Put the checksum obtained into the packet */
454 csum
= cpu_to_be16(net_checksum_finish(csum_cntr
));
455 iov_from_buf(iov
, iov_len
, csum_offset
, &csum
, sizeof csum
);
459 VMXNET_TX_PKT_FRAGMENT_L2_HDR_POS
= 0,
460 VMXNET_TX_PKT_FRAGMENT_L3_HDR_POS
,
461 VMXNET_TX_PKT_FRAGMENT_HEADER_NUM
464 #define VMXNET_MAX_FRAG_SG_LIST (64)
466 static size_t vmxnet_tx_pkt_fetch_fragment(struct VmxnetTxPkt
*pkt
,
467 int *src_idx
, size_t *src_offset
, struct iovec
*dst
, int *dst_idx
)
470 struct iovec
*src
= pkt
->vec
;
472 *dst_idx
= VMXNET_TX_PKT_FRAGMENT_HEADER_NUM
;
474 while (fetched
< pkt
->virt_hdr
.gso_size
) {
476 /* no more place in fragment iov */
477 if (*dst_idx
== VMXNET_MAX_FRAG_SG_LIST
) {
481 /* no more data in iovec */
482 if (*src_idx
== (pkt
->payload_frags
+ VMXNET_TX_PKT_PL_START_FRAG
)) {
487 dst
[*dst_idx
].iov_base
= src
[*src_idx
].iov_base
+ *src_offset
;
488 dst
[*dst_idx
].iov_len
= MIN(src
[*src_idx
].iov_len
- *src_offset
,
489 pkt
->virt_hdr
.gso_size
- fetched
);
491 *src_offset
+= dst
[*dst_idx
].iov_len
;
492 fetched
+= dst
[*dst_idx
].iov_len
;
494 if (*src_offset
== src
[*src_idx
].iov_len
) {
505 static bool vmxnet_tx_pkt_do_sw_fragmentation(struct VmxnetTxPkt
*pkt
,
508 struct iovec fragment
[VMXNET_MAX_FRAG_SG_LIST
];
509 size_t fragment_len
= 0;
510 bool more_frags
= false;
512 /* some pointers for shorter code */
513 void *l2_iov_base
, *l3_iov_base
;
514 size_t l2_iov_len
, l3_iov_len
;
515 int src_idx
= VMXNET_TX_PKT_PL_START_FRAG
, dst_idx
;
516 size_t src_offset
= 0;
517 size_t fragment_offset
= 0;
519 l2_iov_base
= pkt
->vec
[VMXNET_TX_PKT_L2HDR_FRAG
].iov_base
;
520 l2_iov_len
= pkt
->vec
[VMXNET_TX_PKT_L2HDR_FRAG
].iov_len
;
521 l3_iov_base
= pkt
->vec
[VMXNET_TX_PKT_L3HDR_FRAG
].iov_base
;
522 l3_iov_len
= pkt
->vec
[VMXNET_TX_PKT_L3HDR_FRAG
].iov_len
;
525 fragment
[VMXNET_TX_PKT_FRAGMENT_L2_HDR_POS
].iov_base
= l2_iov_base
;
526 fragment
[VMXNET_TX_PKT_FRAGMENT_L2_HDR_POS
].iov_len
= l2_iov_len
;
527 fragment
[VMXNET_TX_PKT_FRAGMENT_L3_HDR_POS
].iov_base
= l3_iov_base
;
528 fragment
[VMXNET_TX_PKT_FRAGMENT_L3_HDR_POS
].iov_len
= l3_iov_len
;
531 /* Put as much data as possible and send */
533 fragment_len
= vmxnet_tx_pkt_fetch_fragment(pkt
, &src_idx
, &src_offset
,
536 more_frags
= (fragment_offset
+ fragment_len
< pkt
->payload_len
);
538 eth_setup_ip4_fragmentation(l2_iov_base
, l2_iov_len
, l3_iov_base
,
539 l3_iov_len
, fragment_len
, fragment_offset
, more_frags
);
541 eth_fix_ip4_checksum(l3_iov_base
, l3_iov_len
);
543 qemu_sendv_packet(nc
, fragment
, dst_idx
);
545 fragment_offset
+= fragment_len
;
547 } while (more_frags
);
552 bool vmxnet_tx_pkt_send(struct VmxnetTxPkt
*pkt
, NetClientState
*nc
)
556 if (!pkt
->has_virt_hdr
&&
557 pkt
->virt_hdr
.flags
& VIRTIO_NET_HDR_F_NEEDS_CSUM
) {
558 vmxnet_tx_pkt_do_sw_csum(pkt
);
562 * Since underlying infrastructure does not support IP datagrams longer
563 * than 64K we should drop such packets and don't even try to send
565 if (VIRTIO_NET_HDR_GSO_NONE
!= pkt
->virt_hdr
.gso_type
) {
566 if (pkt
->payload_len
>
567 ETH_MAX_IP_DGRAM_LEN
-
568 pkt
->vec
[VMXNET_TX_PKT_L3HDR_FRAG
].iov_len
) {
573 if (pkt
->has_virt_hdr
||
574 pkt
->virt_hdr
.gso_type
== VIRTIO_NET_HDR_GSO_NONE
) {
575 qemu_sendv_packet(nc
, pkt
->vec
,
576 pkt
->payload_frags
+ VMXNET_TX_PKT_PL_START_FRAG
);
580 return vmxnet_tx_pkt_do_sw_fragmentation(pkt
, nc
);