2 * QEMU RX packets abstractions
4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
6 * Developed by Daynix Computing LTD (http://www.daynix.com)
9 * Dmitry Fleytman <dmitry@daynix.com>
10 * Tamir Shomer <tamirs@daynix.com>
11 * Yan Vugenfirer <yan@daynix.com>
13 * This work is licensed under the terms of the GNU GPL, version 2 or later.
14 * See the COPYING file in the top-level directory.
18 #include "qemu/osdep.h"
19 #include "qemu/crc32c.h"
21 #include "net_rx_pkt.h"
22 #include "net/checksum.h"
26 struct virtio_net_hdr virt_hdr
;
28 struct eth_header eth
;
29 struct vlan_header vlan
;
32 uint16_t vec_len_total
;
37 eth_pkt_types_e packet_type
;
39 /* Analysis results */
47 eth_ip6_hdr_info ip6hdr_info
;
48 eth_ip4_hdr_info ip4hdr_info
;
49 eth_l4_hdr_info l4hdr_info
;
52 void net_rx_pkt_init(struct NetRxPkt
**pkt
)
54 struct NetRxPkt
*p
= g_malloc0(sizeof *p
);
60 void net_rx_pkt_uninit(struct NetRxPkt
*pkt
)
62 if (pkt
->vec_len_total
!= 0) {
69 struct virtio_net_hdr
*net_rx_pkt_get_vhdr(struct NetRxPkt
*pkt
)
72 return &pkt
->virt_hdr
;
76 net_rx_pkt_iovec_realloc(struct NetRxPkt
*pkt
,
79 if (pkt
->vec_len_total
< new_iov_len
) {
81 pkt
->vec
= g_malloc(sizeof(*pkt
->vec
) * new_iov_len
);
82 pkt
->vec_len_total
= new_iov_len
;
87 net_rx_pkt_pull_data(struct NetRxPkt
*pkt
,
88 const struct iovec
*iov
, int iovcnt
,
91 uint32_t pllen
= iov_size(iov
, iovcnt
) - ploff
;
93 if (pkt
->ehdr_buf_len
) {
94 net_rx_pkt_iovec_realloc(pkt
, iovcnt
+ 1);
96 pkt
->vec
[0].iov_base
= &pkt
->ehdr_buf
;
97 pkt
->vec
[0].iov_len
= pkt
->ehdr_buf_len
;
99 pkt
->tot_len
= pllen
+ pkt
->ehdr_buf_len
;
100 pkt
->vec_len
= iov_copy(pkt
->vec
+ 1, pkt
->vec_len_total
- 1,
101 iov
, iovcnt
, ploff
, pllen
) + 1;
103 net_rx_pkt_iovec_realloc(pkt
, iovcnt
);
105 pkt
->tot_len
= pllen
;
106 pkt
->vec_len
= iov_copy(pkt
->vec
, pkt
->vec_len_total
,
107 iov
, iovcnt
, ploff
, pkt
->tot_len
);
110 eth_get_protocols(pkt
->vec
, pkt
->vec_len
, 0, &pkt
->hasip4
, &pkt
->hasip6
,
111 &pkt
->l3hdr_off
, &pkt
->l4hdr_off
, &pkt
->l5hdr_off
,
112 &pkt
->ip6hdr_info
, &pkt
->ip4hdr_info
, &pkt
->l4hdr_info
);
114 trace_net_rx_pkt_parsed(pkt
->hasip4
, pkt
->hasip6
, pkt
->l4hdr_info
.proto
,
115 pkt
->l3hdr_off
, pkt
->l4hdr_off
, pkt
->l5hdr_off
);
118 void net_rx_pkt_attach_iovec(struct NetRxPkt
*pkt
,
119 const struct iovec
*iov
, int iovcnt
,
120 size_t iovoff
, bool strip_vlan
)
123 uint16_t ploff
= iovoff
;
127 pkt
->ehdr_buf_len
= eth_strip_vlan(iov
, iovcnt
, iovoff
, &pkt
->ehdr_buf
,
130 pkt
->ehdr_buf_len
= 0;
135 net_rx_pkt_pull_data(pkt
, iov
, iovcnt
, ploff
);
138 void net_rx_pkt_attach_iovec_ex(struct NetRxPkt
*pkt
,
139 const struct iovec
*iov
, int iovcnt
,
140 size_t iovoff
, bool strip_vlan
,
144 uint16_t ploff
= iovoff
;
148 pkt
->ehdr_buf_len
= eth_strip_vlan_ex(iov
, iovcnt
, iovoff
, vet
,
152 pkt
->ehdr_buf_len
= 0;
157 net_rx_pkt_pull_data(pkt
, iov
, iovcnt
, ploff
);
160 void net_rx_pkt_dump(struct NetRxPkt
*pkt
)
162 #ifdef NET_RX_PKT_DEBUG
165 printf("RX PKT: tot_len: %d, ehdr_buf_len: %lu, vlan_tag: %d\n",
166 pkt
->tot_len
, pkt
->ehdr_buf_len
, pkt
->tci
);
170 void net_rx_pkt_set_packet_type(struct NetRxPkt
*pkt
,
171 eth_pkt_types_e packet_type
)
175 pkt
->packet_type
= packet_type
;
179 eth_pkt_types_e
net_rx_pkt_get_packet_type(struct NetRxPkt
*pkt
)
183 return pkt
->packet_type
;
186 size_t net_rx_pkt_get_total_len(struct NetRxPkt
*pkt
)
193 void net_rx_pkt_set_protocols(struct NetRxPkt
*pkt
,
194 const struct iovec
*iov
, size_t iovcnt
,
199 eth_get_protocols(iov
, iovcnt
, iovoff
, &pkt
->hasip4
, &pkt
->hasip6
,
200 &pkt
->l3hdr_off
, &pkt
->l4hdr_off
, &pkt
->l5hdr_off
,
201 &pkt
->ip6hdr_info
, &pkt
->ip4hdr_info
, &pkt
->l4hdr_info
);
204 void net_rx_pkt_get_protocols(struct NetRxPkt
*pkt
,
205 bool *hasip4
, bool *hasip6
,
206 EthL4HdrProto
*l4hdr_proto
)
210 *hasip4
= pkt
->hasip4
;
211 *hasip6
= pkt
->hasip6
;
212 *l4hdr_proto
= pkt
->l4hdr_info
.proto
;
215 size_t net_rx_pkt_get_l3_hdr_offset(struct NetRxPkt
*pkt
)
218 return pkt
->l3hdr_off
;
221 size_t net_rx_pkt_get_l4_hdr_offset(struct NetRxPkt
*pkt
)
224 return pkt
->l4hdr_off
;
227 size_t net_rx_pkt_get_l5_hdr_offset(struct NetRxPkt
*pkt
)
230 return pkt
->l5hdr_off
;
233 eth_ip6_hdr_info
*net_rx_pkt_get_ip6_info(struct NetRxPkt
*pkt
)
235 return &pkt
->ip6hdr_info
;
238 eth_ip4_hdr_info
*net_rx_pkt_get_ip4_info(struct NetRxPkt
*pkt
)
240 return &pkt
->ip4hdr_info
;
244 _net_rx_rss_add_chunk(uint8_t *rss_input
, size_t *bytes_written
,
245 void *ptr
, size_t size
)
247 memcpy(&rss_input
[*bytes_written
], ptr
, size
);
248 trace_net_rx_pkt_rss_add_chunk(ptr
, size
, *bytes_written
);
249 *bytes_written
+= size
;
253 _net_rx_rss_prepare_ip4(uint8_t *rss_input
,
254 struct NetRxPkt
*pkt
,
255 size_t *bytes_written
)
257 struct ip_header
*ip4_hdr
= &pkt
->ip4hdr_info
.ip4_hdr
;
259 _net_rx_rss_add_chunk(rss_input
, bytes_written
,
260 &ip4_hdr
->ip_src
, sizeof(uint32_t));
262 _net_rx_rss_add_chunk(rss_input
, bytes_written
,
263 &ip4_hdr
->ip_dst
, sizeof(uint32_t));
267 _net_rx_rss_prepare_ip6(uint8_t *rss_input
,
268 struct NetRxPkt
*pkt
,
269 bool ipv6ex
, size_t *bytes_written
)
271 eth_ip6_hdr_info
*ip6info
= &pkt
->ip6hdr_info
;
273 _net_rx_rss_add_chunk(rss_input
, bytes_written
,
274 (ipv6ex
&& ip6info
->rss_ex_src_valid
) ? &ip6info
->rss_ex_src
275 : &ip6info
->ip6_hdr
.ip6_src
,
276 sizeof(struct in6_address
));
278 _net_rx_rss_add_chunk(rss_input
, bytes_written
,
279 (ipv6ex
&& ip6info
->rss_ex_dst_valid
) ? &ip6info
->rss_ex_dst
280 : &ip6info
->ip6_hdr
.ip6_dst
,
281 sizeof(struct in6_address
));
285 _net_rx_rss_prepare_tcp(uint8_t *rss_input
,
286 struct NetRxPkt
*pkt
,
287 size_t *bytes_written
)
289 struct tcp_header
*tcphdr
= &pkt
->l4hdr_info
.hdr
.tcp
;
291 _net_rx_rss_add_chunk(rss_input
, bytes_written
,
292 &tcphdr
->th_sport
, sizeof(uint16_t));
294 _net_rx_rss_add_chunk(rss_input
, bytes_written
,
295 &tcphdr
->th_dport
, sizeof(uint16_t));
299 _net_rx_rss_prepare_udp(uint8_t *rss_input
,
300 struct NetRxPkt
*pkt
,
301 size_t *bytes_written
)
303 struct udp_header
*udphdr
= &pkt
->l4hdr_info
.hdr
.udp
;
305 _net_rx_rss_add_chunk(rss_input
, bytes_written
,
306 &udphdr
->uh_sport
, sizeof(uint16_t));
308 _net_rx_rss_add_chunk(rss_input
, bytes_written
,
309 &udphdr
->uh_dport
, sizeof(uint16_t));
313 net_rx_pkt_calc_rss_hash(struct NetRxPkt
*pkt
,
314 NetRxPktRssType type
,
317 uint8_t rss_input
[36];
318 size_t rss_length
= 0;
319 uint32_t rss_hash
= 0;
320 net_toeplitz_key key_data
;
325 trace_net_rx_pkt_rss_ip4();
326 _net_rx_rss_prepare_ip4(&rss_input
[0], pkt
, &rss_length
);
328 case NetPktRssIpV4Tcp
:
330 assert(pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_TCP
);
331 trace_net_rx_pkt_rss_ip4_tcp();
332 _net_rx_rss_prepare_ip4(&rss_input
[0], pkt
, &rss_length
);
333 _net_rx_rss_prepare_tcp(&rss_input
[0], pkt
, &rss_length
);
335 case NetPktRssIpV6Tcp
:
337 assert(pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_TCP
);
338 trace_net_rx_pkt_rss_ip6_tcp();
339 _net_rx_rss_prepare_ip6(&rss_input
[0], pkt
, false, &rss_length
);
340 _net_rx_rss_prepare_tcp(&rss_input
[0], pkt
, &rss_length
);
344 trace_net_rx_pkt_rss_ip6();
345 _net_rx_rss_prepare_ip6(&rss_input
[0], pkt
, false, &rss_length
);
347 case NetPktRssIpV6Ex
:
349 trace_net_rx_pkt_rss_ip6_ex();
350 _net_rx_rss_prepare_ip6(&rss_input
[0], pkt
, true, &rss_length
);
352 case NetPktRssIpV6TcpEx
:
354 assert(pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_TCP
);
355 trace_net_rx_pkt_rss_ip6_ex_tcp();
356 _net_rx_rss_prepare_ip6(&rss_input
[0], pkt
, true, &rss_length
);
357 _net_rx_rss_prepare_tcp(&rss_input
[0], pkt
, &rss_length
);
359 case NetPktRssIpV4Udp
:
361 assert(pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_UDP
);
362 trace_net_rx_pkt_rss_ip4_udp();
363 _net_rx_rss_prepare_ip4(&rss_input
[0], pkt
, &rss_length
);
364 _net_rx_rss_prepare_udp(&rss_input
[0], pkt
, &rss_length
);
366 case NetPktRssIpV6Udp
:
368 assert(pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_UDP
);
369 trace_net_rx_pkt_rss_ip6_udp();
370 _net_rx_rss_prepare_ip6(&rss_input
[0], pkt
, false, &rss_length
);
371 _net_rx_rss_prepare_udp(&rss_input
[0], pkt
, &rss_length
);
373 case NetPktRssIpV6UdpEx
:
375 assert(pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_UDP
);
376 trace_net_rx_pkt_rss_ip6_ex_udp();
377 _net_rx_rss_prepare_ip6(&rss_input
[0], pkt
, true, &rss_length
);
378 _net_rx_rss_prepare_udp(&rss_input
[0], pkt
, &rss_length
);
385 net_toeplitz_key_init(&key_data
, key
);
386 net_toeplitz_add(&rss_hash
, rss_input
, rss_length
, &key_data
);
388 trace_net_rx_pkt_rss_hash(rss_length
, rss_hash
);
393 uint16_t net_rx_pkt_get_ip_id(struct NetRxPkt
*pkt
)
398 return be16_to_cpu(pkt
->ip4hdr_info
.ip4_hdr
.ip_id
);
404 bool net_rx_pkt_is_tcp_ack(struct NetRxPkt
*pkt
)
408 if (pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_TCP
) {
409 return TCP_HEADER_FLAGS(&pkt
->l4hdr_info
.hdr
.tcp
) & TCP_FLAG_ACK
;
415 bool net_rx_pkt_has_tcp_data(struct NetRxPkt
*pkt
)
419 if (pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_TCP
) {
420 return pkt
->l4hdr_info
.has_tcp_data
;
426 struct iovec
*net_rx_pkt_get_iovec(struct NetRxPkt
*pkt
)
433 uint16_t net_rx_pkt_get_iovec_len(struct NetRxPkt
*pkt
)
440 void net_rx_pkt_set_vhdr(struct NetRxPkt
*pkt
,
441 struct virtio_net_hdr
*vhdr
)
445 memcpy(&pkt
->virt_hdr
, vhdr
, sizeof pkt
->virt_hdr
);
448 void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt
*pkt
,
449 const struct iovec
*iov
, int iovcnt
)
453 iov_to_buf(iov
, iovcnt
, 0, &pkt
->virt_hdr
, sizeof pkt
->virt_hdr
);
456 void net_rx_pkt_unset_vhdr(struct NetRxPkt
*pkt
)
460 memset(&pkt
->virt_hdr
, 0, sizeof(pkt
->virt_hdr
));
463 bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt
*pkt
)
467 return pkt
->ehdr_buf_len
? true : false;
470 uint16_t net_rx_pkt_get_vlan_tag(struct NetRxPkt
*pkt
)
477 bool net_rx_pkt_validate_l3_csum(struct NetRxPkt
*pkt
, bool *csum_valid
)
483 trace_net_rx_pkt_l3_csum_validate_entry();
486 trace_net_rx_pkt_l3_csum_validate_not_ip4();
490 csl
= pkt
->l4hdr_off
- pkt
->l3hdr_off
;
492 cntr
= net_checksum_add_iov(pkt
->vec
, pkt
->vec_len
,
496 csum
= net_checksum_finish(cntr
);
498 *csum_valid
= (csum
== 0);
500 trace_net_rx_pkt_l3_csum_validate_csum(pkt
->l3hdr_off
, csl
,
501 cntr
, csum
, *csum_valid
);
507 _net_rx_pkt_calc_l4_csum(struct NetRxPkt
*pkt
)
514 trace_net_rx_pkt_l4_csum_calc_entry();
517 if (pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_UDP
) {
518 csl
= be16_to_cpu(pkt
->l4hdr_info
.hdr
.udp
.uh_ulen
);
519 trace_net_rx_pkt_l4_csum_calc_ip4_udp();
521 csl
= be16_to_cpu(pkt
->ip4hdr_info
.ip4_hdr
.ip_len
) -
522 IP_HDR_GET_LEN(&pkt
->ip4hdr_info
.ip4_hdr
);
523 trace_net_rx_pkt_l4_csum_calc_ip4_tcp();
526 cntr
= eth_calc_ip4_pseudo_hdr_csum(&pkt
->ip4hdr_info
.ip4_hdr
,
528 trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr
, csl
);
530 if (pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_UDP
) {
531 csl
= be16_to_cpu(pkt
->l4hdr_info
.hdr
.udp
.uh_ulen
);
532 trace_net_rx_pkt_l4_csum_calc_ip6_udp();
534 struct ip6_header
*ip6hdr
= &pkt
->ip6hdr_info
.ip6_hdr
;
535 size_t full_ip6hdr_len
= pkt
->l4hdr_off
- pkt
->l3hdr_off
;
536 size_t ip6opts_len
= full_ip6hdr_len
- sizeof(struct ip6_header
);
538 csl
= be16_to_cpu(ip6hdr
->ip6_ctlun
.ip6_un1
.ip6_un1_plen
) -
540 trace_net_rx_pkt_l4_csum_calc_ip6_tcp();
543 cntr
= eth_calc_ip6_pseudo_hdr_csum(&pkt
->ip6hdr_info
.ip6_hdr
, csl
,
544 pkt
->ip6hdr_info
.l4proto
, &cso
);
545 trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr
, csl
);
548 cntr
+= net_checksum_add_iov(pkt
->vec
, pkt
->vec_len
,
549 pkt
->l4hdr_off
, csl
, cso
);
551 csum
= net_checksum_finish_nozero(cntr
);
553 trace_net_rx_pkt_l4_csum_calc_csum(pkt
->l4hdr_off
, csl
, cntr
, csum
);
559 _net_rx_pkt_validate_sctp_sum(struct NetRxPkt
*pkt
)
562 size_t off
= pkt
->l4hdr_off
;
563 size_t vec_len
= pkt
->vec_len
;
565 uint32_t calculated
= 0;
569 for (vec
= pkt
->vec
; vec
->iov_len
< off
; vec
++) {
576 if (!iov_to_buf(vec
, vec_len
, csum_off
, &original
, sizeof(original
))) {
580 if (!iov_from_buf(vec
, vec_len
, csum_off
,
581 &calculated
, sizeof(calculated
))) {
585 calculated
= crc32c(0xffffffff,
586 (uint8_t *)vec
->iov_base
+ off
, vec
->iov_len
- off
);
587 calculated
= iov_crc32c(calculated
^ 0xffffffff, vec
+ 1, vec_len
- 1);
588 valid
= calculated
== le32_to_cpu(original
);
589 iov_from_buf(vec
, vec_len
, csum_off
, &original
, sizeof(original
));
594 bool net_rx_pkt_validate_l4_csum(struct NetRxPkt
*pkt
, bool *csum_valid
)
598 trace_net_rx_pkt_l4_csum_validate_entry();
600 if (pkt
->hasip4
&& pkt
->ip4hdr_info
.fragment
) {
601 trace_net_rx_pkt_l4_csum_validate_ip4_fragment();
605 switch (pkt
->l4hdr_info
.proto
) {
606 case ETH_L4_HDR_PROTO_UDP
:
607 if (pkt
->l4hdr_info
.hdr
.udp
.uh_sum
== 0) {
608 trace_net_rx_pkt_l4_csum_validate_udp_with_no_checksum();
612 case ETH_L4_HDR_PROTO_TCP
:
613 csum
= _net_rx_pkt_calc_l4_csum(pkt
);
614 *csum_valid
= ((csum
== 0) || (csum
== 0xFFFF));
617 case ETH_L4_HDR_PROTO_SCTP
:
618 *csum_valid
= _net_rx_pkt_validate_sctp_sum(pkt
);
622 trace_net_rx_pkt_l4_csum_validate_not_xxp();
626 trace_net_rx_pkt_l4_csum_validate_csum(*csum_valid
);
631 bool net_rx_pkt_fix_l4_csum(struct NetRxPkt
*pkt
)
636 trace_net_rx_pkt_l4_csum_fix_entry();
638 switch (pkt
->l4hdr_info
.proto
) {
639 case ETH_L4_HDR_PROTO_TCP
:
640 l4_cso
= offsetof(struct tcp_header
, th_sum
);
641 trace_net_rx_pkt_l4_csum_fix_tcp(l4_cso
);
644 case ETH_L4_HDR_PROTO_UDP
:
645 if (pkt
->l4hdr_info
.hdr
.udp
.uh_sum
== 0) {
646 trace_net_rx_pkt_l4_csum_fix_udp_with_no_checksum();
649 l4_cso
= offsetof(struct udp_header
, uh_sum
);
650 trace_net_rx_pkt_l4_csum_fix_udp(l4_cso
);
654 trace_net_rx_pkt_l4_csum_fix_not_xxp();
658 if (pkt
->hasip4
&& pkt
->ip4hdr_info
.fragment
) {
659 trace_net_rx_pkt_l4_csum_fix_ip4_fragment();
663 /* Set zero to checksum word */
664 iov_from_buf(pkt
->vec
, pkt
->vec_len
,
665 pkt
->l4hdr_off
+ l4_cso
,
666 &csum
, sizeof(csum
));
668 /* Calculate L4 checksum */
669 csum
= cpu_to_be16(_net_rx_pkt_calc_l4_csum(pkt
));
671 /* Set calculated checksum to checksum word */
672 iov_from_buf(pkt
->vec
, pkt
->vec_len
,
673 pkt
->l4hdr_off
+ l4_cso
,
674 &csum
, sizeof(csum
));
676 trace_net_rx_pkt_l4_csum_fix_csum(pkt
->l4hdr_off
+ l4_cso
, csum
);