dump: Recognize "fd:" protocols on Windows hosts
[qemu/armbru.git] / hw / net / net_tx_pkt.c
blob2e5f58b3c9cc643ddf9aed2cedc2e247b6429570
1 /*
2 * QEMU TX packets abstractions
4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
6 * Developed by Daynix Computing LTD (http://www.daynix.com)
8 * Authors:
9 * Dmitry Fleytman <dmitry@daynix.com>
10 * Tamir Shomer <tamirs@daynix.com>
11 * Yan Vugenfirer <yan@daynix.com>
13 * This work is licensed under the terms of the GNU GPL, version 2 or later.
14 * See the COPYING file in the top-level directory.
18 #include "qemu/osdep.h"
19 #include "qemu/crc32c.h"
20 #include "net/eth.h"
21 #include "net/checksum.h"
22 #include "net/tap.h"
23 #include "net/net.h"
24 #include "hw/pci/pci_device.h"
25 #include "net_tx_pkt.h"
27 enum {
28 NET_TX_PKT_VHDR_FRAG = 0,
29 NET_TX_PKT_L2HDR_FRAG,
30 NET_TX_PKT_L3HDR_FRAG,
31 NET_TX_PKT_PL_START_FRAG
34 /* TX packet private context */
35 struct NetTxPkt {
36 struct virtio_net_hdr virt_hdr;
38 struct iovec *raw;
39 uint32_t raw_frags;
40 uint32_t max_raw_frags;
42 struct iovec *vec;
44 struct {
45 struct eth_header eth;
46 struct vlan_header vlan[3];
47 } l2_hdr;
48 union {
49 struct ip_header ip;
50 struct ip6_header ip6;
51 uint8_t octets[ETH_MAX_IP_DGRAM_LEN];
52 } l3_hdr;
54 uint32_t payload_len;
56 uint32_t payload_frags;
57 uint32_t max_payload_frags;
59 uint16_t hdr_len;
60 eth_pkt_types_e packet_type;
61 uint8_t l4proto;
64 void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags)
66 struct NetTxPkt *p = g_malloc0(sizeof *p);
68 p->vec = g_new(struct iovec, max_frags + NET_TX_PKT_PL_START_FRAG);
70 p->raw = g_new(struct iovec, max_frags);
72 p->max_payload_frags = max_frags;
73 p->max_raw_frags = max_frags;
74 p->vec[NET_TX_PKT_VHDR_FRAG].iov_base = &p->virt_hdr;
75 p->vec[NET_TX_PKT_VHDR_FRAG].iov_len = sizeof p->virt_hdr;
76 p->vec[NET_TX_PKT_L2HDR_FRAG].iov_base = &p->l2_hdr;
77 p->vec[NET_TX_PKT_L3HDR_FRAG].iov_base = &p->l3_hdr;
79 *pkt = p;
82 void net_tx_pkt_uninit(struct NetTxPkt *pkt)
84 if (pkt) {
85 g_free(pkt->vec);
86 g_free(pkt->raw);
87 g_free(pkt);
91 void net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt *pkt)
93 uint16_t csum;
94 assert(pkt);
96 pkt->l3_hdr.ip.ip_len = cpu_to_be16(pkt->payload_len +
97 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len);
99 pkt->l3_hdr.ip.ip_sum = 0;
100 csum = net_raw_checksum(pkt->l3_hdr.octets,
101 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len);
102 pkt->l3_hdr.ip.ip_sum = cpu_to_be16(csum);
105 void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt)
107 uint16_t csum;
108 uint32_t cntr, cso;
109 assert(pkt);
110 uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
111 void *ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base;
113 if (pkt->payload_len + pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len >
114 ETH_MAX_IP_DGRAM_LEN) {
115 return;
118 if (gso_type == VIRTIO_NET_HDR_GSO_TCPV4 ||
119 gso_type == VIRTIO_NET_HDR_GSO_UDP) {
120 /* Calculate IP header checksum */
121 net_tx_pkt_update_ip_hdr_checksum(pkt);
123 /* Calculate IP pseudo header checksum */
124 cntr = eth_calc_ip4_pseudo_hdr_csum(ip_hdr, pkt->payload_len, &cso);
125 csum = cpu_to_be16(~net_checksum_finish(cntr));
126 } else if (gso_type == VIRTIO_NET_HDR_GSO_TCPV6) {
127 /* Calculate IP pseudo header checksum */
128 cntr = eth_calc_ip6_pseudo_hdr_csum(ip_hdr, pkt->payload_len,
129 IP_PROTO_TCP, &cso);
130 csum = cpu_to_be16(~net_checksum_finish(cntr));
131 } else {
132 return;
135 iov_from_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG], pkt->payload_frags,
136 pkt->virt_hdr.csum_offset, &csum, sizeof(csum));
139 bool net_tx_pkt_update_sctp_checksum(struct NetTxPkt *pkt)
141 uint32_t csum = 0;
142 struct iovec *pl_start_frag = pkt->vec + NET_TX_PKT_PL_START_FRAG;
144 if (iov_from_buf(pl_start_frag, pkt->payload_frags, 8, &csum, sizeof(csum)) < sizeof(csum)) {
145 return false;
148 csum = cpu_to_le32(iov_crc32c(0xffffffff, pl_start_frag, pkt->payload_frags));
149 if (iov_from_buf(pl_start_frag, pkt->payload_frags, 8, &csum, sizeof(csum)) < sizeof(csum)) {
150 return false;
153 return true;
156 static void net_tx_pkt_calculate_hdr_len(struct NetTxPkt *pkt)
158 pkt->hdr_len = pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len +
159 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len;
162 static bool net_tx_pkt_parse_headers(struct NetTxPkt *pkt)
164 struct iovec *l2_hdr, *l3_hdr;
165 size_t bytes_read;
166 size_t full_ip6hdr_len;
167 uint16_t l3_proto;
169 assert(pkt);
171 l2_hdr = &pkt->vec[NET_TX_PKT_L2HDR_FRAG];
172 l3_hdr = &pkt->vec[NET_TX_PKT_L3HDR_FRAG];
174 bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, 0, l2_hdr->iov_base,
175 ETH_MAX_L2_HDR_LEN);
176 if (bytes_read < sizeof(struct eth_header)) {
177 l2_hdr->iov_len = 0;
178 return false;
181 l2_hdr->iov_len = sizeof(struct eth_header);
182 switch (be16_to_cpu(PKT_GET_ETH_HDR(l2_hdr->iov_base)->h_proto)) {
183 case ETH_P_VLAN:
184 l2_hdr->iov_len += sizeof(struct vlan_header);
185 break;
186 case ETH_P_DVLAN:
187 l2_hdr->iov_len += 2 * sizeof(struct vlan_header);
188 break;
191 if (bytes_read < l2_hdr->iov_len) {
192 l2_hdr->iov_len = 0;
193 l3_hdr->iov_len = 0;
194 pkt->packet_type = ETH_PKT_UCAST;
195 return false;
196 } else {
197 l2_hdr->iov_len = ETH_MAX_L2_HDR_LEN;
198 l2_hdr->iov_len = eth_get_l2_hdr_length(l2_hdr->iov_base);
199 pkt->packet_type = get_eth_packet_type(l2_hdr->iov_base);
202 l3_proto = eth_get_l3_proto(l2_hdr, 1, l2_hdr->iov_len);
204 switch (l3_proto) {
205 case ETH_P_IP:
206 bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len,
207 l3_hdr->iov_base, sizeof(struct ip_header));
209 if (bytes_read < sizeof(struct ip_header)) {
210 l3_hdr->iov_len = 0;
211 return false;
214 l3_hdr->iov_len = IP_HDR_GET_LEN(l3_hdr->iov_base);
216 if (l3_hdr->iov_len < sizeof(struct ip_header)) {
217 l3_hdr->iov_len = 0;
218 return false;
221 pkt->l4proto = IP_HDR_GET_P(l3_hdr->iov_base);
223 if (IP_HDR_GET_LEN(l3_hdr->iov_base) != sizeof(struct ip_header)) {
224 /* copy optional IPv4 header data if any*/
225 bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags,
226 l2_hdr->iov_len + sizeof(struct ip_header),
227 l3_hdr->iov_base + sizeof(struct ip_header),
228 l3_hdr->iov_len - sizeof(struct ip_header));
229 if (bytes_read < l3_hdr->iov_len - sizeof(struct ip_header)) {
230 l3_hdr->iov_len = 0;
231 return false;
235 break;
237 case ETH_P_IPV6:
239 eth_ip6_hdr_info hdrinfo;
241 if (!eth_parse_ipv6_hdr(pkt->raw, pkt->raw_frags, l2_hdr->iov_len,
242 &hdrinfo)) {
243 l3_hdr->iov_len = 0;
244 return false;
247 pkt->l4proto = hdrinfo.l4proto;
248 full_ip6hdr_len = hdrinfo.full_hdr_len;
250 if (full_ip6hdr_len > ETH_MAX_IP_DGRAM_LEN) {
251 l3_hdr->iov_len = 0;
252 return false;
255 bytes_read = iov_to_buf(pkt->raw, pkt->raw_frags, l2_hdr->iov_len,
256 l3_hdr->iov_base, full_ip6hdr_len);
258 if (bytes_read < full_ip6hdr_len) {
259 l3_hdr->iov_len = 0;
260 return false;
261 } else {
262 l3_hdr->iov_len = full_ip6hdr_len;
264 break;
266 default:
267 l3_hdr->iov_len = 0;
268 break;
271 net_tx_pkt_calculate_hdr_len(pkt);
272 return true;
275 static void net_tx_pkt_rebuild_payload(struct NetTxPkt *pkt)
277 pkt->payload_len = iov_size(pkt->raw, pkt->raw_frags) - pkt->hdr_len;
278 pkt->payload_frags = iov_copy(&pkt->vec[NET_TX_PKT_PL_START_FRAG],
279 pkt->max_payload_frags,
280 pkt->raw, pkt->raw_frags,
281 pkt->hdr_len, pkt->payload_len);
284 bool net_tx_pkt_parse(struct NetTxPkt *pkt)
286 if (net_tx_pkt_parse_headers(pkt)) {
287 net_tx_pkt_rebuild_payload(pkt);
288 return true;
289 } else {
290 return false;
294 struct virtio_net_hdr *net_tx_pkt_get_vhdr(struct NetTxPkt *pkt)
296 assert(pkt);
297 return &pkt->virt_hdr;
300 static uint8_t net_tx_pkt_get_gso_type(struct NetTxPkt *pkt,
301 bool tso_enable)
303 uint8_t rc = VIRTIO_NET_HDR_GSO_NONE;
304 uint16_t l3_proto;
306 l3_proto = eth_get_l3_proto(&pkt->vec[NET_TX_PKT_L2HDR_FRAG], 1,
307 pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len);
309 if (!tso_enable) {
310 goto func_exit;
313 rc = eth_get_gso_type(l3_proto, pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base,
314 pkt->l4proto);
316 func_exit:
317 return rc;
320 bool net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable,
321 bool csum_enable, uint32_t gso_size)
323 struct tcp_hdr l4hdr;
324 size_t bytes_read;
325 assert(pkt);
327 /* csum has to be enabled if tso is. */
328 assert(csum_enable || !tso_enable);
330 pkt->virt_hdr.gso_type = net_tx_pkt_get_gso_type(pkt, tso_enable);
332 switch (pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
333 case VIRTIO_NET_HDR_GSO_NONE:
334 pkt->virt_hdr.hdr_len = 0;
335 pkt->virt_hdr.gso_size = 0;
336 break;
338 case VIRTIO_NET_HDR_GSO_UDP:
339 pkt->virt_hdr.gso_size = gso_size;
340 pkt->virt_hdr.hdr_len = pkt->hdr_len + sizeof(struct udp_header);
341 break;
343 case VIRTIO_NET_HDR_GSO_TCPV4:
344 case VIRTIO_NET_HDR_GSO_TCPV6:
345 bytes_read = iov_to_buf(&pkt->vec[NET_TX_PKT_PL_START_FRAG],
346 pkt->payload_frags, 0, &l4hdr, sizeof(l4hdr));
347 if (bytes_read < sizeof(l4hdr) ||
348 l4hdr.th_off * sizeof(uint32_t) < sizeof(l4hdr)) {
349 return false;
352 pkt->virt_hdr.hdr_len = pkt->hdr_len + l4hdr.th_off * sizeof(uint32_t);
353 pkt->virt_hdr.gso_size = gso_size;
354 break;
356 default:
357 g_assert_not_reached();
360 if (csum_enable) {
361 switch (pkt->l4proto) {
362 case IP_PROTO_TCP:
363 if (pkt->payload_len < sizeof(struct tcp_hdr)) {
364 return false;
366 pkt->virt_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
367 pkt->virt_hdr.csum_start = pkt->hdr_len;
368 pkt->virt_hdr.csum_offset = offsetof(struct tcp_hdr, th_sum);
369 break;
370 case IP_PROTO_UDP:
371 if (pkt->payload_len < sizeof(struct udp_hdr)) {
372 return false;
374 pkt->virt_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
375 pkt->virt_hdr.csum_start = pkt->hdr_len;
376 pkt->virt_hdr.csum_offset = offsetof(struct udp_hdr, uh_sum);
377 break;
378 default:
379 break;
383 return true;
386 void net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt *pkt,
387 uint16_t vlan, uint16_t vlan_ethtype)
389 assert(pkt);
391 eth_setup_vlan_headers(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base,
392 &pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len,
393 vlan, vlan_ethtype);
395 pkt->hdr_len += sizeof(struct vlan_header);
398 bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, void *base, size_t len)
400 struct iovec *ventry;
401 assert(pkt);
403 if (pkt->raw_frags >= pkt->max_raw_frags) {
404 return false;
407 ventry = &pkt->raw[pkt->raw_frags];
408 ventry->iov_base = base;
409 ventry->iov_len = len;
410 pkt->raw_frags++;
412 return true;
415 bool net_tx_pkt_has_fragments(struct NetTxPkt *pkt)
417 return pkt->raw_frags > 0;
420 eth_pkt_types_e net_tx_pkt_get_packet_type(struct NetTxPkt *pkt)
422 assert(pkt);
424 return pkt->packet_type;
427 size_t net_tx_pkt_get_total_len(struct NetTxPkt *pkt)
429 assert(pkt);
431 return pkt->hdr_len + pkt->payload_len;
434 void net_tx_pkt_dump(struct NetTxPkt *pkt)
436 #ifdef NET_TX_PKT_DEBUG
437 assert(pkt);
439 printf("TX PKT: hdr_len: %d, pkt_type: 0x%X, l2hdr_len: %lu, "
440 "l3hdr_len: %lu, payload_len: %u\n", pkt->hdr_len, pkt->packet_type,
441 pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len,
442 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len, pkt->payload_len);
443 #endif
446 void net_tx_pkt_reset(struct NetTxPkt *pkt,
447 NetTxPktFreeFrag callback, void *context)
449 int i;
451 /* no assert, as reset can be called before tx_pkt_init */
452 if (!pkt) {
453 return;
456 memset(&pkt->virt_hdr, 0, sizeof(pkt->virt_hdr));
458 assert(pkt->vec);
460 pkt->payload_len = 0;
461 pkt->payload_frags = 0;
463 if (pkt->max_raw_frags > 0) {
464 assert(pkt->raw);
465 for (i = 0; i < pkt->raw_frags; i++) {
466 assert(pkt->raw[i].iov_base);
467 callback(context, pkt->raw[i].iov_base, pkt->raw[i].iov_len);
470 pkt->raw_frags = 0;
472 pkt->hdr_len = 0;
473 pkt->l4proto = 0;
476 void net_tx_pkt_unmap_frag_pci(void *context, void *base, size_t len)
478 pci_dma_unmap(context, base, len, DMA_DIRECTION_TO_DEVICE, 0);
481 bool net_tx_pkt_add_raw_fragment_pci(struct NetTxPkt *pkt, PCIDevice *pci_dev,
482 dma_addr_t pa, size_t len)
484 dma_addr_t mapped_len = len;
485 void *base = pci_dma_map(pci_dev, pa, &mapped_len, DMA_DIRECTION_TO_DEVICE);
486 if (!base) {
487 return false;
490 if (mapped_len != len || !net_tx_pkt_add_raw_fragment(pkt, base, len)) {
491 net_tx_pkt_unmap_frag_pci(pci_dev, base, mapped_len);
492 return false;
495 return true;
498 static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt,
499 struct iovec *iov, uint32_t iov_len,
500 uint16_t csl)
502 uint32_t csum_cntr;
503 uint16_t csum = 0;
504 uint32_t cso;
505 /* num of iovec without vhdr */
506 size_t csum_offset = pkt->virt_hdr.csum_start + pkt->virt_hdr.csum_offset;
507 uint16_t l3_proto = eth_get_l3_proto(iov, 1, iov->iov_len);
509 /* Put zero to checksum field */
510 iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum);
512 /* Calculate L4 TCP/UDP checksum */
513 csum_cntr = 0;
514 cso = 0;
515 /* add pseudo header to csum */
516 if (l3_proto == ETH_P_IP) {
517 csum_cntr = eth_calc_ip4_pseudo_hdr_csum(
518 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base,
519 csl, &cso);
520 } else if (l3_proto == ETH_P_IPV6) {
521 csum_cntr = eth_calc_ip6_pseudo_hdr_csum(
522 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base,
523 csl, pkt->l4proto, &cso);
526 /* data checksum */
527 csum_cntr +=
528 net_checksum_add_iov(iov, iov_len, pkt->virt_hdr.csum_start, csl, cso);
530 /* Put the checksum obtained into the packet */
531 csum = cpu_to_be16(net_checksum_finish_nozero(csum_cntr));
532 iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum);
535 #define NET_MAX_FRAG_SG_LIST (64)
537 static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt,
538 int *src_idx, size_t *src_offset, size_t src_len,
539 struct iovec *dst, int *dst_idx)
541 size_t fetched = 0;
542 struct iovec *src = pkt->vec;
544 while (fetched < src_len) {
546 /* no more place in fragment iov */
547 if (*dst_idx == NET_MAX_FRAG_SG_LIST) {
548 break;
551 /* no more data in iovec */
552 if (*src_idx == (pkt->payload_frags + NET_TX_PKT_PL_START_FRAG)) {
553 break;
557 dst[*dst_idx].iov_base = src[*src_idx].iov_base + *src_offset;
558 dst[*dst_idx].iov_len = MIN(src[*src_idx].iov_len - *src_offset,
559 src_len - fetched);
561 *src_offset += dst[*dst_idx].iov_len;
562 fetched += dst[*dst_idx].iov_len;
564 if (*src_offset == src[*src_idx].iov_len) {
565 *src_offset = 0;
566 (*src_idx)++;
569 (*dst_idx)++;
572 return fetched;
575 static void net_tx_pkt_sendv(
576 void *opaque, const struct iovec *iov, int iov_cnt,
577 const struct iovec *virt_iov, int virt_iov_cnt)
579 NetClientState *nc = opaque;
581 if (qemu_get_using_vnet_hdr(nc->peer)) {
582 qemu_sendv_packet(nc, virt_iov, virt_iov_cnt);
583 } else {
584 qemu_sendv_packet(nc, iov, iov_cnt);
588 static bool net_tx_pkt_tcp_fragment_init(struct NetTxPkt *pkt,
589 struct iovec *fragment,
590 int *pl_idx,
591 size_t *l4hdr_len,
592 int *src_idx,
593 size_t *src_offset,
594 size_t *src_len)
596 struct iovec *l4 = fragment + NET_TX_PKT_PL_START_FRAG;
597 size_t bytes_read = 0;
598 struct tcp_hdr *th;
600 if (!pkt->payload_frags) {
601 return false;
604 l4->iov_len = pkt->virt_hdr.hdr_len - pkt->hdr_len;
605 l4->iov_base = g_malloc(l4->iov_len);
607 *src_idx = NET_TX_PKT_PL_START_FRAG;
608 while (pkt->vec[*src_idx].iov_len < l4->iov_len - bytes_read) {
609 memcpy((char *)l4->iov_base + bytes_read, pkt->vec[*src_idx].iov_base,
610 pkt->vec[*src_idx].iov_len);
612 bytes_read += pkt->vec[*src_idx].iov_len;
614 (*src_idx)++;
615 if (*src_idx >= pkt->payload_frags + NET_TX_PKT_PL_START_FRAG) {
616 g_free(l4->iov_base);
617 return false;
621 *src_offset = l4->iov_len - bytes_read;
622 memcpy((char *)l4->iov_base + bytes_read, pkt->vec[*src_idx].iov_base,
623 *src_offset);
625 th = l4->iov_base;
626 th->th_flags &= ~(TH_FIN | TH_PUSH);
628 *pl_idx = NET_TX_PKT_PL_START_FRAG + 1;
629 *l4hdr_len = l4->iov_len;
630 *src_len = pkt->virt_hdr.gso_size;
632 return true;
635 static void net_tx_pkt_tcp_fragment_deinit(struct iovec *fragment)
637 g_free(fragment[NET_TX_PKT_PL_START_FRAG].iov_base);
640 static void net_tx_pkt_tcp_fragment_fix(struct NetTxPkt *pkt,
641 struct iovec *fragment,
642 size_t fragment_len,
643 uint8_t gso_type)
645 struct iovec *l3hdr = fragment + NET_TX_PKT_L3HDR_FRAG;
646 struct iovec *l4hdr = fragment + NET_TX_PKT_PL_START_FRAG;
647 struct ip_header *ip = l3hdr->iov_base;
648 struct ip6_header *ip6 = l3hdr->iov_base;
649 size_t len = l3hdr->iov_len + l4hdr->iov_len + fragment_len;
651 switch (gso_type) {
652 case VIRTIO_NET_HDR_GSO_TCPV4:
653 ip->ip_len = cpu_to_be16(len);
654 eth_fix_ip4_checksum(l3hdr->iov_base, l3hdr->iov_len);
655 break;
657 case VIRTIO_NET_HDR_GSO_TCPV6:
658 len -= sizeof(struct ip6_header);
659 ip6->ip6_ctlun.ip6_un1.ip6_un1_plen = cpu_to_be16(len);
660 break;
664 static void net_tx_pkt_tcp_fragment_advance(struct NetTxPkt *pkt,
665 struct iovec *fragment,
666 size_t fragment_len,
667 uint8_t gso_type)
669 struct iovec *l3hdr = fragment + NET_TX_PKT_L3HDR_FRAG;
670 struct iovec *l4hdr = fragment + NET_TX_PKT_PL_START_FRAG;
671 struct ip_header *ip = l3hdr->iov_base;
672 struct tcp_hdr *th = l4hdr->iov_base;
674 if (gso_type == VIRTIO_NET_HDR_GSO_TCPV4) {
675 ip->ip_id = cpu_to_be16(be16_to_cpu(ip->ip_id) + 1);
678 th->th_seq = cpu_to_be32(be32_to_cpu(th->th_seq) + fragment_len);
679 th->th_flags &= ~TH_CWR;
682 static void net_tx_pkt_udp_fragment_init(struct NetTxPkt *pkt,
683 int *pl_idx,
684 size_t *l4hdr_len,
685 int *src_idx, size_t *src_offset,
686 size_t *src_len)
688 *pl_idx = NET_TX_PKT_PL_START_FRAG;
689 *l4hdr_len = 0;
690 *src_idx = NET_TX_PKT_PL_START_FRAG;
691 *src_offset = 0;
692 *src_len = IP_FRAG_ALIGN_SIZE(pkt->virt_hdr.gso_size);
695 static void net_tx_pkt_udp_fragment_fix(struct NetTxPkt *pkt,
696 struct iovec *fragment,
697 size_t fragment_offset,
698 size_t fragment_len)
700 bool more_frags = fragment_offset + fragment_len < pkt->payload_len;
701 uint16_t orig_flags;
702 struct iovec *l3hdr = fragment + NET_TX_PKT_L3HDR_FRAG;
703 struct ip_header *ip = l3hdr->iov_base;
704 uint16_t frag_off_units = fragment_offset / IP_FRAG_UNIT_SIZE;
705 uint16_t new_ip_off;
707 assert(fragment_offset % IP_FRAG_UNIT_SIZE == 0);
708 assert((frag_off_units & ~IP_OFFMASK) == 0);
710 orig_flags = be16_to_cpu(ip->ip_off) & ~(IP_OFFMASK | IP_MF);
711 new_ip_off = frag_off_units | orig_flags | (more_frags ? IP_MF : 0);
712 ip->ip_off = cpu_to_be16(new_ip_off);
713 ip->ip_len = cpu_to_be16(l3hdr->iov_len + fragment_len);
715 eth_fix_ip4_checksum(l3hdr->iov_base, l3hdr->iov_len);
718 static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt,
719 NetTxPktSend callback,
720 void *context)
722 uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
724 struct iovec fragment[NET_MAX_FRAG_SG_LIST];
725 size_t fragment_len;
726 size_t l4hdr_len;
727 size_t src_len;
729 int src_idx, dst_idx, pl_idx;
730 size_t src_offset;
731 size_t fragment_offset = 0;
732 struct virtio_net_hdr virt_hdr = {
733 .flags = pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM ?
734 VIRTIO_NET_HDR_F_DATA_VALID : 0
737 /* Copy headers */
738 fragment[NET_TX_PKT_VHDR_FRAG].iov_base = &virt_hdr;
739 fragment[NET_TX_PKT_VHDR_FRAG].iov_len = sizeof(virt_hdr);
740 fragment[NET_TX_PKT_L2HDR_FRAG] = pkt->vec[NET_TX_PKT_L2HDR_FRAG];
741 fragment[NET_TX_PKT_L3HDR_FRAG] = pkt->vec[NET_TX_PKT_L3HDR_FRAG];
743 switch (gso_type) {
744 case VIRTIO_NET_HDR_GSO_TCPV4:
745 case VIRTIO_NET_HDR_GSO_TCPV6:
746 if (!net_tx_pkt_tcp_fragment_init(pkt, fragment, &pl_idx, &l4hdr_len,
747 &src_idx, &src_offset, &src_len)) {
748 return false;
750 break;
752 case VIRTIO_NET_HDR_GSO_UDP:
753 net_tx_pkt_do_sw_csum(pkt, &pkt->vec[NET_TX_PKT_L2HDR_FRAG],
754 pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1,
755 pkt->payload_len);
756 net_tx_pkt_udp_fragment_init(pkt, &pl_idx, &l4hdr_len,
757 &src_idx, &src_offset, &src_len);
758 break;
760 default:
761 abort();
764 /* Put as much data as possible and send */
765 while (true) {
766 dst_idx = pl_idx;
767 fragment_len = net_tx_pkt_fetch_fragment(pkt,
768 &src_idx, &src_offset, src_len, fragment, &dst_idx);
769 if (!fragment_len) {
770 break;
773 switch (gso_type) {
774 case VIRTIO_NET_HDR_GSO_TCPV4:
775 case VIRTIO_NET_HDR_GSO_TCPV6:
776 net_tx_pkt_tcp_fragment_fix(pkt, fragment, fragment_len, gso_type);
777 net_tx_pkt_do_sw_csum(pkt, fragment + NET_TX_PKT_L2HDR_FRAG,
778 dst_idx - NET_TX_PKT_L2HDR_FRAG,
779 l4hdr_len + fragment_len);
780 break;
782 case VIRTIO_NET_HDR_GSO_UDP:
783 net_tx_pkt_udp_fragment_fix(pkt, fragment, fragment_offset,
784 fragment_len);
785 break;
788 callback(context,
789 fragment + NET_TX_PKT_L2HDR_FRAG, dst_idx - NET_TX_PKT_L2HDR_FRAG,
790 fragment + NET_TX_PKT_VHDR_FRAG, dst_idx - NET_TX_PKT_VHDR_FRAG);
792 if (gso_type == VIRTIO_NET_HDR_GSO_TCPV4 ||
793 gso_type == VIRTIO_NET_HDR_GSO_TCPV6) {
794 net_tx_pkt_tcp_fragment_advance(pkt, fragment, fragment_len,
795 gso_type);
798 fragment_offset += fragment_len;
801 if (gso_type == VIRTIO_NET_HDR_GSO_TCPV4 ||
802 gso_type == VIRTIO_NET_HDR_GSO_TCPV6) {
803 net_tx_pkt_tcp_fragment_deinit(fragment);
806 return true;
809 bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc)
811 bool offload = qemu_get_using_vnet_hdr(nc->peer);
812 return net_tx_pkt_send_custom(pkt, offload, net_tx_pkt_sendv, nc);
815 bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload,
816 NetTxPktSend callback, void *context)
818 assert(pkt);
820 uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
823 * Since underlying infrastructure does not support IP datagrams longer
824 * than 64K we should drop such packets and don't even try to send
826 if (VIRTIO_NET_HDR_GSO_NONE != gso_type) {
827 if (pkt->payload_len >
828 ETH_MAX_IP_DGRAM_LEN -
829 pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len) {
830 return false;
834 if (offload || gso_type == VIRTIO_NET_HDR_GSO_NONE) {
835 if (!offload && pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
836 net_tx_pkt_do_sw_csum(pkt, &pkt->vec[NET_TX_PKT_L2HDR_FRAG],
837 pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1,
838 pkt->payload_len);
841 net_tx_pkt_fix_ip6_payload_len(pkt);
842 callback(context, pkt->vec + NET_TX_PKT_L2HDR_FRAG,
843 pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - NET_TX_PKT_L2HDR_FRAG,
844 pkt->vec + NET_TX_PKT_VHDR_FRAG,
845 pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - NET_TX_PKT_VHDR_FRAG);
846 return true;
849 return net_tx_pkt_do_sw_fragmentation(pkt, callback, context);
852 void net_tx_pkt_fix_ip6_payload_len(struct NetTxPkt *pkt)
854 struct iovec *l2 = &pkt->vec[NET_TX_PKT_L2HDR_FRAG];
855 if (eth_get_l3_proto(l2, 1, l2->iov_len) == ETH_P_IPV6) {
857 * TODO: if qemu would support >64K packets - add jumbo option check
858 * something like that:
859 * 'if (ip6->ip6_plen == 0 && !has_jumbo_option(ip6)) {'
861 if (pkt->l3_hdr.ip6.ip6_plen == 0) {
862 if (pkt->payload_len <= ETH_MAX_IP_DGRAM_LEN) {
863 pkt->l3_hdr.ip6.ip6_plen = htons(pkt->payload_len);
866 * TODO: if qemu would support >64K packets
867 * add jumbo option for packets greater then 65,535 bytes