2 * QEMU TX packets abstractions
4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
6 * Developed by Daynix Computing LTD (http://www.daynix.com)
9 * Dmitry Fleytman <dmitry@daynix.com>
10 * Tamir Shomer <tamirs@daynix.com>
11 * Yan Vugenfirer <yan@daynix.com>
13 * This work is licensed under the terms of the GNU GPL, version 2 or later.
14 * See the COPYING file in the top-level directory.
18 #include "qemu/osdep.h"
19 #include "qemu/crc32c.h"
21 #include "net/checksum.h"
24 #include "hw/pci/pci_device.h"
25 #include "net_tx_pkt.h"
28 NET_TX_PKT_VHDR_FRAG
= 0,
29 NET_TX_PKT_L2HDR_FRAG
,
30 NET_TX_PKT_L3HDR_FRAG
,
31 NET_TX_PKT_PL_START_FRAG
34 /* TX packet private context */
36 struct virtio_net_hdr virt_hdr
;
40 uint32_t max_raw_frags
;
45 struct eth_header eth
;
46 struct vlan_header vlan
[3];
50 struct ip6_header ip6
;
51 uint8_t octets
[ETH_MAX_IP_DGRAM_LEN
];
56 uint32_t payload_frags
;
57 uint32_t max_payload_frags
;
60 eth_pkt_types_e packet_type
;
64 void net_tx_pkt_init(struct NetTxPkt
**pkt
, uint32_t max_frags
)
66 struct NetTxPkt
*p
= g_malloc0(sizeof *p
);
68 p
->vec
= g_new(struct iovec
, max_frags
+ NET_TX_PKT_PL_START_FRAG
);
70 p
->raw
= g_new(struct iovec
, max_frags
);
72 p
->max_payload_frags
= max_frags
;
73 p
->max_raw_frags
= max_frags
;
74 p
->vec
[NET_TX_PKT_VHDR_FRAG
].iov_base
= &p
->virt_hdr
;
75 p
->vec
[NET_TX_PKT_VHDR_FRAG
].iov_len
= sizeof p
->virt_hdr
;
76 p
->vec
[NET_TX_PKT_L2HDR_FRAG
].iov_base
= &p
->l2_hdr
;
77 p
->vec
[NET_TX_PKT_L3HDR_FRAG
].iov_base
= &p
->l3_hdr
;
82 void net_tx_pkt_uninit(struct NetTxPkt
*pkt
)
91 void net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt
*pkt
)
96 pkt
->l3_hdr
.ip
.ip_len
= cpu_to_be16(pkt
->payload_len
+
97 pkt
->vec
[NET_TX_PKT_L3HDR_FRAG
].iov_len
);
99 pkt
->l3_hdr
.ip
.ip_sum
= 0;
100 csum
= net_raw_checksum(pkt
->l3_hdr
.octets
,
101 pkt
->vec
[NET_TX_PKT_L3HDR_FRAG
].iov_len
);
102 pkt
->l3_hdr
.ip
.ip_sum
= cpu_to_be16(csum
);
105 void net_tx_pkt_update_ip_checksums(struct NetTxPkt
*pkt
)
110 uint8_t gso_type
= pkt
->virt_hdr
.gso_type
& ~VIRTIO_NET_HDR_GSO_ECN
;
111 void *ip_hdr
= pkt
->vec
[NET_TX_PKT_L3HDR_FRAG
].iov_base
;
113 if (pkt
->payload_len
+ pkt
->vec
[NET_TX_PKT_L3HDR_FRAG
].iov_len
>
114 ETH_MAX_IP_DGRAM_LEN
) {
118 if (gso_type
== VIRTIO_NET_HDR_GSO_TCPV4
||
119 gso_type
== VIRTIO_NET_HDR_GSO_UDP
) {
120 /* Calculate IP header checksum */
121 net_tx_pkt_update_ip_hdr_checksum(pkt
);
123 /* Calculate IP pseudo header checksum */
124 cntr
= eth_calc_ip4_pseudo_hdr_csum(ip_hdr
, pkt
->payload_len
, &cso
);
125 csum
= cpu_to_be16(~net_checksum_finish(cntr
));
126 } else if (gso_type
== VIRTIO_NET_HDR_GSO_TCPV6
) {
127 /* Calculate IP pseudo header checksum */
128 cntr
= eth_calc_ip6_pseudo_hdr_csum(ip_hdr
, pkt
->payload_len
,
130 csum
= cpu_to_be16(~net_checksum_finish(cntr
));
135 iov_from_buf(&pkt
->vec
[NET_TX_PKT_PL_START_FRAG
], pkt
->payload_frags
,
136 pkt
->virt_hdr
.csum_offset
, &csum
, sizeof(csum
));
139 bool net_tx_pkt_update_sctp_checksum(struct NetTxPkt
*pkt
)
142 struct iovec
*pl_start_frag
= pkt
->vec
+ NET_TX_PKT_PL_START_FRAG
;
144 if (iov_size(pl_start_frag
, pkt
->payload_frags
) < 8 + sizeof(csum
)) {
148 if (iov_from_buf(pl_start_frag
, pkt
->payload_frags
, 8, &csum
, sizeof(csum
)) < sizeof(csum
)) {
152 csum
= cpu_to_le32(iov_crc32c(0xffffffff, pl_start_frag
, pkt
->payload_frags
));
153 if (iov_from_buf(pl_start_frag
, pkt
->payload_frags
, 8, &csum
, sizeof(csum
)) < sizeof(csum
)) {
160 static void net_tx_pkt_calculate_hdr_len(struct NetTxPkt
*pkt
)
162 pkt
->hdr_len
= pkt
->vec
[NET_TX_PKT_L2HDR_FRAG
].iov_len
+
163 pkt
->vec
[NET_TX_PKT_L3HDR_FRAG
].iov_len
;
166 static bool net_tx_pkt_parse_headers(struct NetTxPkt
*pkt
)
168 struct iovec
*l2_hdr
, *l3_hdr
;
170 size_t full_ip6hdr_len
;
175 l2_hdr
= &pkt
->vec
[NET_TX_PKT_L2HDR_FRAG
];
176 l3_hdr
= &pkt
->vec
[NET_TX_PKT_L3HDR_FRAG
];
178 bytes_read
= iov_to_buf(pkt
->raw
, pkt
->raw_frags
, 0, l2_hdr
->iov_base
,
180 if (bytes_read
< sizeof(struct eth_header
)) {
185 l2_hdr
->iov_len
= sizeof(struct eth_header
);
186 switch (be16_to_cpu(PKT_GET_ETH_HDR(l2_hdr
->iov_base
)->h_proto
)) {
188 l2_hdr
->iov_len
+= sizeof(struct vlan_header
);
191 l2_hdr
->iov_len
+= 2 * sizeof(struct vlan_header
);
195 if (bytes_read
< l2_hdr
->iov_len
) {
198 pkt
->packet_type
= ETH_PKT_UCAST
;
201 l2_hdr
->iov_len
= ETH_MAX_L2_HDR_LEN
;
202 l2_hdr
->iov_len
= eth_get_l2_hdr_length(l2_hdr
->iov_base
);
203 pkt
->packet_type
= get_eth_packet_type(l2_hdr
->iov_base
);
206 l3_proto
= eth_get_l3_proto(l2_hdr
, 1, l2_hdr
->iov_len
);
210 bytes_read
= iov_to_buf(pkt
->raw
, pkt
->raw_frags
, l2_hdr
->iov_len
,
211 l3_hdr
->iov_base
, sizeof(struct ip_header
));
213 if (bytes_read
< sizeof(struct ip_header
)) {
218 l3_hdr
->iov_len
= IP_HDR_GET_LEN(l3_hdr
->iov_base
);
220 if (l3_hdr
->iov_len
< sizeof(struct ip_header
)) {
225 pkt
->l4proto
= IP_HDR_GET_P(l3_hdr
->iov_base
);
227 if (IP_HDR_GET_LEN(l3_hdr
->iov_base
) != sizeof(struct ip_header
)) {
228 /* copy optional IPv4 header data if any*/
229 bytes_read
= iov_to_buf(pkt
->raw
, pkt
->raw_frags
,
230 l2_hdr
->iov_len
+ sizeof(struct ip_header
),
231 l3_hdr
->iov_base
+ sizeof(struct ip_header
),
232 l3_hdr
->iov_len
- sizeof(struct ip_header
));
233 if (bytes_read
< l3_hdr
->iov_len
- sizeof(struct ip_header
)) {
243 eth_ip6_hdr_info hdrinfo
;
245 if (!eth_parse_ipv6_hdr(pkt
->raw
, pkt
->raw_frags
, l2_hdr
->iov_len
,
251 pkt
->l4proto
= hdrinfo
.l4proto
;
252 full_ip6hdr_len
= hdrinfo
.full_hdr_len
;
254 if (full_ip6hdr_len
> ETH_MAX_IP_DGRAM_LEN
) {
259 bytes_read
= iov_to_buf(pkt
->raw
, pkt
->raw_frags
, l2_hdr
->iov_len
,
260 l3_hdr
->iov_base
, full_ip6hdr_len
);
262 if (bytes_read
< full_ip6hdr_len
) {
266 l3_hdr
->iov_len
= full_ip6hdr_len
;
275 net_tx_pkt_calculate_hdr_len(pkt
);
279 static void net_tx_pkt_rebuild_payload(struct NetTxPkt
*pkt
)
281 pkt
->payload_len
= iov_size(pkt
->raw
, pkt
->raw_frags
) - pkt
->hdr_len
;
282 pkt
->payload_frags
= iov_copy(&pkt
->vec
[NET_TX_PKT_PL_START_FRAG
],
283 pkt
->max_payload_frags
,
284 pkt
->raw
, pkt
->raw_frags
,
285 pkt
->hdr_len
, pkt
->payload_len
);
288 bool net_tx_pkt_parse(struct NetTxPkt
*pkt
)
290 if (net_tx_pkt_parse_headers(pkt
)) {
291 net_tx_pkt_rebuild_payload(pkt
);
298 struct virtio_net_hdr
*net_tx_pkt_get_vhdr(struct NetTxPkt
*pkt
)
301 return &pkt
->virt_hdr
;
304 static uint8_t net_tx_pkt_get_gso_type(struct NetTxPkt
*pkt
,
307 uint8_t rc
= VIRTIO_NET_HDR_GSO_NONE
;
310 l3_proto
= eth_get_l3_proto(&pkt
->vec
[NET_TX_PKT_L2HDR_FRAG
], 1,
311 pkt
->vec
[NET_TX_PKT_L2HDR_FRAG
].iov_len
);
317 rc
= eth_get_gso_type(l3_proto
, pkt
->vec
[NET_TX_PKT_L3HDR_FRAG
].iov_base
,
324 bool net_tx_pkt_build_vheader(struct NetTxPkt
*pkt
, bool tso_enable
,
325 bool csum_enable
, uint32_t gso_size
)
327 struct tcp_hdr l4hdr
;
331 /* csum has to be enabled if tso is. */
332 assert(csum_enable
|| !tso_enable
);
334 pkt
->virt_hdr
.gso_type
= net_tx_pkt_get_gso_type(pkt
, tso_enable
);
336 switch (pkt
->virt_hdr
.gso_type
& ~VIRTIO_NET_HDR_GSO_ECN
) {
337 case VIRTIO_NET_HDR_GSO_NONE
:
338 pkt
->virt_hdr
.hdr_len
= 0;
339 pkt
->virt_hdr
.gso_size
= 0;
342 case VIRTIO_NET_HDR_GSO_UDP
:
343 pkt
->virt_hdr
.gso_size
= gso_size
;
344 pkt
->virt_hdr
.hdr_len
= pkt
->hdr_len
+ sizeof(struct udp_header
);
347 case VIRTIO_NET_HDR_GSO_TCPV4
:
348 case VIRTIO_NET_HDR_GSO_TCPV6
:
349 bytes_read
= iov_to_buf(&pkt
->vec
[NET_TX_PKT_PL_START_FRAG
],
350 pkt
->payload_frags
, 0, &l4hdr
, sizeof(l4hdr
));
351 if (bytes_read
< sizeof(l4hdr
) ||
352 l4hdr
.th_off
* sizeof(uint32_t) < sizeof(l4hdr
)) {
356 pkt
->virt_hdr
.hdr_len
= pkt
->hdr_len
+ l4hdr
.th_off
* sizeof(uint32_t);
357 pkt
->virt_hdr
.gso_size
= gso_size
;
361 g_assert_not_reached();
365 switch (pkt
->l4proto
) {
367 if (pkt
->payload_len
< sizeof(struct tcp_hdr
)) {
370 pkt
->virt_hdr
.flags
= VIRTIO_NET_HDR_F_NEEDS_CSUM
;
371 pkt
->virt_hdr
.csum_start
= pkt
->hdr_len
;
372 pkt
->virt_hdr
.csum_offset
= offsetof(struct tcp_hdr
, th_sum
);
375 if (pkt
->payload_len
< sizeof(struct udp_hdr
)) {
378 pkt
->virt_hdr
.flags
= VIRTIO_NET_HDR_F_NEEDS_CSUM
;
379 pkt
->virt_hdr
.csum_start
= pkt
->hdr_len
;
380 pkt
->virt_hdr
.csum_offset
= offsetof(struct udp_hdr
, uh_sum
);
390 void net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt
*pkt
,
391 uint16_t vlan
, uint16_t vlan_ethtype
)
395 eth_setup_vlan_headers(pkt
->vec
[NET_TX_PKT_L2HDR_FRAG
].iov_base
,
396 &pkt
->vec
[NET_TX_PKT_L2HDR_FRAG
].iov_len
,
399 pkt
->hdr_len
+= sizeof(struct vlan_header
);
402 bool net_tx_pkt_add_raw_fragment(struct NetTxPkt
*pkt
, void *base
, size_t len
)
404 struct iovec
*ventry
;
407 if (pkt
->raw_frags
>= pkt
->max_raw_frags
) {
411 ventry
= &pkt
->raw
[pkt
->raw_frags
];
412 ventry
->iov_base
= base
;
413 ventry
->iov_len
= len
;
419 bool net_tx_pkt_has_fragments(struct NetTxPkt
*pkt
)
421 return pkt
->raw_frags
> 0;
424 eth_pkt_types_e
net_tx_pkt_get_packet_type(struct NetTxPkt
*pkt
)
428 return pkt
->packet_type
;
431 size_t net_tx_pkt_get_total_len(struct NetTxPkt
*pkt
)
435 return pkt
->hdr_len
+ pkt
->payload_len
;
438 void net_tx_pkt_dump(struct NetTxPkt
*pkt
)
440 #ifdef NET_TX_PKT_DEBUG
443 printf("TX PKT: hdr_len: %d, pkt_type: 0x%X, l2hdr_len: %lu, "
444 "l3hdr_len: %lu, payload_len: %u\n", pkt
->hdr_len
, pkt
->packet_type
,
445 pkt
->vec
[NET_TX_PKT_L2HDR_FRAG
].iov_len
,
446 pkt
->vec
[NET_TX_PKT_L3HDR_FRAG
].iov_len
, pkt
->payload_len
);
450 void net_tx_pkt_reset(struct NetTxPkt
*pkt
,
451 NetTxPktFreeFrag callback
, void *context
)
455 /* no assert, as reset can be called before tx_pkt_init */
460 memset(&pkt
->virt_hdr
, 0, sizeof(pkt
->virt_hdr
));
464 pkt
->payload_len
= 0;
465 pkt
->payload_frags
= 0;
467 if (pkt
->max_raw_frags
> 0) {
469 for (i
= 0; i
< pkt
->raw_frags
; i
++) {
470 assert(pkt
->raw
[i
].iov_base
);
471 callback(context
, pkt
->raw
[i
].iov_base
, pkt
->raw
[i
].iov_len
);
480 void net_tx_pkt_unmap_frag_pci(void *context
, void *base
, size_t len
)
482 pci_dma_unmap(context
, base
, len
, DMA_DIRECTION_TO_DEVICE
, 0);
485 bool net_tx_pkt_add_raw_fragment_pci(struct NetTxPkt
*pkt
, PCIDevice
*pci_dev
,
486 dma_addr_t pa
, size_t len
)
488 dma_addr_t mapped_len
= len
;
489 void *base
= pci_dma_map(pci_dev
, pa
, &mapped_len
, DMA_DIRECTION_TO_DEVICE
);
494 if (mapped_len
!= len
|| !net_tx_pkt_add_raw_fragment(pkt
, base
, len
)) {
495 net_tx_pkt_unmap_frag_pci(pci_dev
, base
, mapped_len
);
502 static void net_tx_pkt_do_sw_csum(struct NetTxPkt
*pkt
,
503 struct iovec
*iov
, uint32_t iov_len
,
509 /* num of iovec without vhdr */
510 size_t csum_offset
= pkt
->virt_hdr
.csum_start
+ pkt
->virt_hdr
.csum_offset
;
511 uint16_t l3_proto
= eth_get_l3_proto(iov
, 1, iov
->iov_len
);
513 /* Put zero to checksum field */
514 iov_from_buf(iov
, iov_len
, csum_offset
, &csum
, sizeof csum
);
516 /* Calculate L4 TCP/UDP checksum */
519 /* add pseudo header to csum */
520 if (l3_proto
== ETH_P_IP
) {
521 csum_cntr
= eth_calc_ip4_pseudo_hdr_csum(
522 pkt
->vec
[NET_TX_PKT_L3HDR_FRAG
].iov_base
,
524 } else if (l3_proto
== ETH_P_IPV6
) {
525 csum_cntr
= eth_calc_ip6_pseudo_hdr_csum(
526 pkt
->vec
[NET_TX_PKT_L3HDR_FRAG
].iov_base
,
527 csl
, pkt
->l4proto
, &cso
);
532 net_checksum_add_iov(iov
, iov_len
, pkt
->virt_hdr
.csum_start
, csl
, cso
);
534 /* Put the checksum obtained into the packet */
535 csum
= cpu_to_be16(net_checksum_finish_nozero(csum_cntr
));
536 iov_from_buf(iov
, iov_len
, csum_offset
, &csum
, sizeof csum
);
539 #define NET_MAX_FRAG_SG_LIST (64)
541 static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt
*pkt
,
542 int *src_idx
, size_t *src_offset
, size_t src_len
,
543 struct iovec
*dst
, int *dst_idx
)
546 struct iovec
*src
= pkt
->vec
;
548 while (fetched
< src_len
) {
550 /* no more place in fragment iov */
551 if (*dst_idx
== NET_MAX_FRAG_SG_LIST
) {
555 /* no more data in iovec */
556 if (*src_idx
== (pkt
->payload_frags
+ NET_TX_PKT_PL_START_FRAG
)) {
561 dst
[*dst_idx
].iov_base
= src
[*src_idx
].iov_base
+ *src_offset
;
562 dst
[*dst_idx
].iov_len
= MIN(src
[*src_idx
].iov_len
- *src_offset
,
565 *src_offset
+= dst
[*dst_idx
].iov_len
;
566 fetched
+= dst
[*dst_idx
].iov_len
;
568 if (*src_offset
== src
[*src_idx
].iov_len
) {
579 static void net_tx_pkt_sendv(
580 void *opaque
, const struct iovec
*iov
, int iov_cnt
,
581 const struct iovec
*virt_iov
, int virt_iov_cnt
)
583 NetClientState
*nc
= opaque
;
585 if (qemu_get_vnet_hdr_len(nc
->peer
)) {
586 qemu_sendv_packet(nc
, virt_iov
, virt_iov_cnt
);
588 qemu_sendv_packet(nc
, iov
, iov_cnt
);
592 static bool net_tx_pkt_tcp_fragment_init(struct NetTxPkt
*pkt
,
593 struct iovec
*fragment
,
600 struct iovec
*l4
= fragment
+ NET_TX_PKT_PL_START_FRAG
;
601 size_t bytes_read
= 0;
604 if (!pkt
->payload_frags
) {
608 l4
->iov_len
= pkt
->virt_hdr
.hdr_len
- pkt
->hdr_len
;
609 l4
->iov_base
= g_malloc(l4
->iov_len
);
611 *src_idx
= NET_TX_PKT_PL_START_FRAG
;
612 while (pkt
->vec
[*src_idx
].iov_len
< l4
->iov_len
- bytes_read
) {
613 memcpy((char *)l4
->iov_base
+ bytes_read
, pkt
->vec
[*src_idx
].iov_base
,
614 pkt
->vec
[*src_idx
].iov_len
);
616 bytes_read
+= pkt
->vec
[*src_idx
].iov_len
;
619 if (*src_idx
>= pkt
->payload_frags
+ NET_TX_PKT_PL_START_FRAG
) {
620 g_free(l4
->iov_base
);
625 *src_offset
= l4
->iov_len
- bytes_read
;
626 memcpy((char *)l4
->iov_base
+ bytes_read
, pkt
->vec
[*src_idx
].iov_base
,
630 th
->th_flags
&= ~(TH_FIN
| TH_PUSH
);
632 *pl_idx
= NET_TX_PKT_PL_START_FRAG
+ 1;
633 *l4hdr_len
= l4
->iov_len
;
634 *src_len
= pkt
->virt_hdr
.gso_size
;
639 static void net_tx_pkt_tcp_fragment_deinit(struct iovec
*fragment
)
641 g_free(fragment
[NET_TX_PKT_PL_START_FRAG
].iov_base
);
644 static void net_tx_pkt_tcp_fragment_fix(struct NetTxPkt
*pkt
,
645 struct iovec
*fragment
,
649 struct iovec
*l3hdr
= fragment
+ NET_TX_PKT_L3HDR_FRAG
;
650 struct iovec
*l4hdr
= fragment
+ NET_TX_PKT_PL_START_FRAG
;
651 struct ip_header
*ip
= l3hdr
->iov_base
;
652 struct ip6_header
*ip6
= l3hdr
->iov_base
;
653 size_t len
= l3hdr
->iov_len
+ l4hdr
->iov_len
+ fragment_len
;
656 case VIRTIO_NET_HDR_GSO_TCPV4
:
657 ip
->ip_len
= cpu_to_be16(len
);
658 eth_fix_ip4_checksum(l3hdr
->iov_base
, l3hdr
->iov_len
);
661 case VIRTIO_NET_HDR_GSO_TCPV6
:
662 len
-= sizeof(struct ip6_header
);
663 ip6
->ip6_ctlun
.ip6_un1
.ip6_un1_plen
= cpu_to_be16(len
);
668 static void net_tx_pkt_tcp_fragment_advance(struct NetTxPkt
*pkt
,
669 struct iovec
*fragment
,
673 struct iovec
*l3hdr
= fragment
+ NET_TX_PKT_L3HDR_FRAG
;
674 struct iovec
*l4hdr
= fragment
+ NET_TX_PKT_PL_START_FRAG
;
675 struct ip_header
*ip
= l3hdr
->iov_base
;
676 struct tcp_hdr
*th
= l4hdr
->iov_base
;
678 if (gso_type
== VIRTIO_NET_HDR_GSO_TCPV4
) {
679 ip
->ip_id
= cpu_to_be16(be16_to_cpu(ip
->ip_id
) + 1);
682 th
->th_seq
= cpu_to_be32(be32_to_cpu(th
->th_seq
) + fragment_len
);
683 th
->th_flags
&= ~TH_CWR
;
686 static void net_tx_pkt_udp_fragment_init(struct NetTxPkt
*pkt
,
689 int *src_idx
, size_t *src_offset
,
692 *pl_idx
= NET_TX_PKT_PL_START_FRAG
;
694 *src_idx
= NET_TX_PKT_PL_START_FRAG
;
696 *src_len
= IP_FRAG_ALIGN_SIZE(pkt
->virt_hdr
.gso_size
);
699 static void net_tx_pkt_udp_fragment_fix(struct NetTxPkt
*pkt
,
700 struct iovec
*fragment
,
701 size_t fragment_offset
,
704 bool more_frags
= fragment_offset
+ fragment_len
< pkt
->payload_len
;
706 struct iovec
*l3hdr
= fragment
+ NET_TX_PKT_L3HDR_FRAG
;
707 struct ip_header
*ip
= l3hdr
->iov_base
;
708 uint16_t frag_off_units
= fragment_offset
/ IP_FRAG_UNIT_SIZE
;
711 assert(fragment_offset
% IP_FRAG_UNIT_SIZE
== 0);
712 assert((frag_off_units
& ~IP_OFFMASK
) == 0);
714 orig_flags
= be16_to_cpu(ip
->ip_off
) & ~(IP_OFFMASK
| IP_MF
);
715 new_ip_off
= frag_off_units
| orig_flags
| (more_frags
? IP_MF
: 0);
716 ip
->ip_off
= cpu_to_be16(new_ip_off
);
717 ip
->ip_len
= cpu_to_be16(l3hdr
->iov_len
+ fragment_len
);
719 eth_fix_ip4_checksum(l3hdr
->iov_base
, l3hdr
->iov_len
);
722 static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt
*pkt
,
723 NetTxPktSend callback
,
726 uint8_t gso_type
= pkt
->virt_hdr
.gso_type
& ~VIRTIO_NET_HDR_GSO_ECN
;
728 struct iovec fragment
[NET_MAX_FRAG_SG_LIST
];
733 int src_idx
, dst_idx
, pl_idx
;
735 size_t fragment_offset
= 0;
736 struct virtio_net_hdr virt_hdr
= {
737 .flags
= pkt
->virt_hdr
.flags
& VIRTIO_NET_HDR_F_NEEDS_CSUM
?
738 VIRTIO_NET_HDR_F_DATA_VALID
: 0
742 fragment
[NET_TX_PKT_VHDR_FRAG
].iov_base
= &virt_hdr
;
743 fragment
[NET_TX_PKT_VHDR_FRAG
].iov_len
= sizeof(virt_hdr
);
744 fragment
[NET_TX_PKT_L2HDR_FRAG
] = pkt
->vec
[NET_TX_PKT_L2HDR_FRAG
];
745 fragment
[NET_TX_PKT_L3HDR_FRAG
] = pkt
->vec
[NET_TX_PKT_L3HDR_FRAG
];
748 case VIRTIO_NET_HDR_GSO_TCPV4
:
749 case VIRTIO_NET_HDR_GSO_TCPV6
:
750 if (!net_tx_pkt_tcp_fragment_init(pkt
, fragment
, &pl_idx
, &l4hdr_len
,
751 &src_idx
, &src_offset
, &src_len
)) {
756 case VIRTIO_NET_HDR_GSO_UDP
:
757 net_tx_pkt_do_sw_csum(pkt
, &pkt
->vec
[NET_TX_PKT_L2HDR_FRAG
],
758 pkt
->payload_frags
+ NET_TX_PKT_PL_START_FRAG
- 1,
760 net_tx_pkt_udp_fragment_init(pkt
, &pl_idx
, &l4hdr_len
,
761 &src_idx
, &src_offset
, &src_len
);
768 /* Put as much data as possible and send */
771 fragment_len
= net_tx_pkt_fetch_fragment(pkt
,
772 &src_idx
, &src_offset
, src_len
, fragment
, &dst_idx
);
778 case VIRTIO_NET_HDR_GSO_TCPV4
:
779 case VIRTIO_NET_HDR_GSO_TCPV6
:
780 net_tx_pkt_tcp_fragment_fix(pkt
, fragment
, fragment_len
, gso_type
);
781 net_tx_pkt_do_sw_csum(pkt
, fragment
+ NET_TX_PKT_L2HDR_FRAG
,
782 dst_idx
- NET_TX_PKT_L2HDR_FRAG
,
783 l4hdr_len
+ fragment_len
);
786 case VIRTIO_NET_HDR_GSO_UDP
:
787 net_tx_pkt_udp_fragment_fix(pkt
, fragment
, fragment_offset
,
793 fragment
+ NET_TX_PKT_L2HDR_FRAG
, dst_idx
- NET_TX_PKT_L2HDR_FRAG
,
794 fragment
+ NET_TX_PKT_VHDR_FRAG
, dst_idx
- NET_TX_PKT_VHDR_FRAG
);
796 if (gso_type
== VIRTIO_NET_HDR_GSO_TCPV4
||
797 gso_type
== VIRTIO_NET_HDR_GSO_TCPV6
) {
798 net_tx_pkt_tcp_fragment_advance(pkt
, fragment
, fragment_len
,
802 fragment_offset
+= fragment_len
;
805 if (gso_type
== VIRTIO_NET_HDR_GSO_TCPV4
||
806 gso_type
== VIRTIO_NET_HDR_GSO_TCPV6
) {
807 net_tx_pkt_tcp_fragment_deinit(fragment
);
813 bool net_tx_pkt_send(struct NetTxPkt
*pkt
, NetClientState
*nc
)
815 bool offload
= qemu_get_vnet_hdr_len(nc
->peer
);
816 return net_tx_pkt_send_custom(pkt
, offload
, net_tx_pkt_sendv
, nc
);
819 bool net_tx_pkt_send_custom(struct NetTxPkt
*pkt
, bool offload
,
820 NetTxPktSend callback
, void *context
)
824 uint8_t gso_type
= pkt
->virt_hdr
.gso_type
& ~VIRTIO_NET_HDR_GSO_ECN
;
827 * Since underlying infrastructure does not support IP datagrams longer
828 * than 64K we should drop such packets and don't even try to send
830 if (VIRTIO_NET_HDR_GSO_NONE
!= gso_type
) {
831 if (pkt
->payload_len
>
832 ETH_MAX_IP_DGRAM_LEN
-
833 pkt
->vec
[NET_TX_PKT_L3HDR_FRAG
].iov_len
) {
838 if (offload
|| gso_type
== VIRTIO_NET_HDR_GSO_NONE
) {
839 if (!offload
&& pkt
->virt_hdr
.flags
& VIRTIO_NET_HDR_F_NEEDS_CSUM
) {
840 pkt
->virt_hdr
.flags
&= ~VIRTIO_NET_HDR_F_NEEDS_CSUM
;
841 net_tx_pkt_do_sw_csum(pkt
, &pkt
->vec
[NET_TX_PKT_L2HDR_FRAG
],
842 pkt
->payload_frags
+ NET_TX_PKT_PL_START_FRAG
- 1,
846 net_tx_pkt_fix_ip6_payload_len(pkt
);
847 callback(context
, pkt
->vec
+ NET_TX_PKT_L2HDR_FRAG
,
848 pkt
->payload_frags
+ NET_TX_PKT_PL_START_FRAG
- NET_TX_PKT_L2HDR_FRAG
,
849 pkt
->vec
+ NET_TX_PKT_VHDR_FRAG
,
850 pkt
->payload_frags
+ NET_TX_PKT_PL_START_FRAG
- NET_TX_PKT_VHDR_FRAG
);
854 return net_tx_pkt_do_sw_fragmentation(pkt
, callback
, context
);
857 void net_tx_pkt_fix_ip6_payload_len(struct NetTxPkt
*pkt
)
859 struct iovec
*l2
= &pkt
->vec
[NET_TX_PKT_L2HDR_FRAG
];
860 if (eth_get_l3_proto(l2
, 1, l2
->iov_len
) == ETH_P_IPV6
) {
862 * TODO: if qemu would support >64K packets - add jumbo option check
863 * something like that:
864 * 'if (ip6->ip6_plen == 0 && !has_jumbo_option(ip6)) {'
866 if (pkt
->l3_hdr
.ip6
.ip6_plen
== 0) {
867 if (pkt
->payload_len
<= ETH_MAX_IP_DGRAM_LEN
) {
868 pkt
->l3_hdr
.ip6
.ip6_plen
= htons(pkt
->payload_len
);
871 * TODO: if qemu would support >64K packets
872 * add jumbo option for packets greater then 65,535 bytes