2 * QEMU RX packets abstractions
4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
6 * Developed by Daynix Computing LTD (http://www.daynix.com)
9 * Dmitry Fleytman <dmitry@daynix.com>
10 * Tamir Shomer <tamirs@daynix.com>
11 * Yan Vugenfirer <yan@daynix.com>
13 * This work is licensed under the terms of the GNU GPL, version 2 or later.
14 * See the COPYING file in the top-level directory.
18 #include "qemu/osdep.h"
19 #include "qemu/crc32c.h"
21 #include "net_rx_pkt.h"
22 #include "net/checksum.h"
26 struct virtio_net_hdr virt_hdr
;
28 struct eth_header eth
;
29 struct vlan_header vlan
;
32 uint16_t vec_len_total
;
37 eth_pkt_types_e packet_type
;
39 /* Analysis results */
47 eth_ip6_hdr_info ip6hdr_info
;
48 eth_ip4_hdr_info ip4hdr_info
;
49 eth_l4_hdr_info l4hdr_info
;
52 void net_rx_pkt_init(struct NetRxPkt
**pkt
)
54 struct NetRxPkt
*p
= g_malloc0(sizeof *p
);
60 void net_rx_pkt_uninit(struct NetRxPkt
*pkt
)
62 if (pkt
->vec_len_total
!= 0) {
69 struct virtio_net_hdr
*net_rx_pkt_get_vhdr(struct NetRxPkt
*pkt
)
72 return &pkt
->virt_hdr
;
76 net_rx_pkt_iovec_realloc(struct NetRxPkt
*pkt
,
79 if (pkt
->vec_len_total
< new_iov_len
) {
81 pkt
->vec
= g_malloc(sizeof(*pkt
->vec
) * new_iov_len
);
82 pkt
->vec_len_total
= new_iov_len
;
87 net_rx_pkt_pull_data(struct NetRxPkt
*pkt
,
88 const struct iovec
*iov
, int iovcnt
,
91 uint32_t pllen
= iov_size(iov
, iovcnt
) - ploff
;
93 if (pkt
->ehdr_buf_len
) {
94 net_rx_pkt_iovec_realloc(pkt
, iovcnt
+ 1);
96 pkt
->vec
[0].iov_base
= &pkt
->ehdr_buf
;
97 pkt
->vec
[0].iov_len
= pkt
->ehdr_buf_len
;
99 pkt
->tot_len
= pllen
+ pkt
->ehdr_buf_len
;
100 pkt
->vec_len
= iov_copy(pkt
->vec
+ 1, pkt
->vec_len_total
- 1,
101 iov
, iovcnt
, ploff
, pllen
) + 1;
103 net_rx_pkt_iovec_realloc(pkt
, iovcnt
);
105 pkt
->tot_len
= pllen
;
106 pkt
->vec_len
= iov_copy(pkt
->vec
, pkt
->vec_len_total
,
107 iov
, iovcnt
, ploff
, pkt
->tot_len
);
110 eth_get_protocols(pkt
->vec
, pkt
->vec_len
, 0, &pkt
->hasip4
, &pkt
->hasip6
,
111 &pkt
->l3hdr_off
, &pkt
->l4hdr_off
, &pkt
->l5hdr_off
,
112 &pkt
->ip6hdr_info
, &pkt
->ip4hdr_info
, &pkt
->l4hdr_info
);
114 trace_net_rx_pkt_parsed(pkt
->hasip4
, pkt
->hasip6
, pkt
->l4hdr_info
.proto
,
115 pkt
->l3hdr_off
, pkt
->l4hdr_off
, pkt
->l5hdr_off
);
118 void net_rx_pkt_attach_iovec(struct NetRxPkt
*pkt
,
119 const struct iovec
*iov
, int iovcnt
,
120 size_t iovoff
, bool strip_vlan
)
123 uint16_t ploff
= iovoff
;
127 pkt
->ehdr_buf_len
= eth_strip_vlan(iov
, iovcnt
, iovoff
, &pkt
->ehdr_buf
,
130 pkt
->ehdr_buf_len
= 0;
135 net_rx_pkt_pull_data(pkt
, iov
, iovcnt
, ploff
);
138 void net_rx_pkt_attach_iovec_ex(struct NetRxPkt
*pkt
,
139 const struct iovec
*iov
, int iovcnt
,
140 size_t iovoff
, int strip_vlan_index
,
141 uint16_t vet
, uint16_t vet_ext
)
144 uint16_t ploff
= iovoff
;
147 pkt
->ehdr_buf_len
= eth_strip_vlan_ex(iov
, iovcnt
, iovoff
,
148 strip_vlan_index
, vet
, vet_ext
,
154 net_rx_pkt_pull_data(pkt
, iov
, iovcnt
, ploff
);
157 void net_rx_pkt_dump(struct NetRxPkt
*pkt
)
159 #ifdef NET_RX_PKT_DEBUG
162 printf("RX PKT: tot_len: %d, ehdr_buf_len: %lu, vlan_tag: %d\n",
163 pkt
->tot_len
, pkt
->ehdr_buf_len
, pkt
->tci
);
167 void net_rx_pkt_set_packet_type(struct NetRxPkt
*pkt
,
168 eth_pkt_types_e packet_type
)
172 pkt
->packet_type
= packet_type
;
176 eth_pkt_types_e
net_rx_pkt_get_packet_type(struct NetRxPkt
*pkt
)
180 return pkt
->packet_type
;
183 size_t net_rx_pkt_get_total_len(struct NetRxPkt
*pkt
)
190 void net_rx_pkt_set_protocols(struct NetRxPkt
*pkt
,
191 const struct iovec
*iov
, size_t iovcnt
,
196 eth_get_protocols(iov
, iovcnt
, iovoff
, &pkt
->hasip4
, &pkt
->hasip6
,
197 &pkt
->l3hdr_off
, &pkt
->l4hdr_off
, &pkt
->l5hdr_off
,
198 &pkt
->ip6hdr_info
, &pkt
->ip4hdr_info
, &pkt
->l4hdr_info
);
201 void net_rx_pkt_get_protocols(struct NetRxPkt
*pkt
,
202 bool *hasip4
, bool *hasip6
,
203 EthL4HdrProto
*l4hdr_proto
)
207 *hasip4
= pkt
->hasip4
;
208 *hasip6
= pkt
->hasip6
;
209 *l4hdr_proto
= pkt
->l4hdr_info
.proto
;
212 size_t net_rx_pkt_get_l3_hdr_offset(struct NetRxPkt
*pkt
)
215 return pkt
->l3hdr_off
;
218 size_t net_rx_pkt_get_l4_hdr_offset(struct NetRxPkt
*pkt
)
221 return pkt
->l4hdr_off
;
224 size_t net_rx_pkt_get_l5_hdr_offset(struct NetRxPkt
*pkt
)
227 return pkt
->l5hdr_off
;
230 eth_ip6_hdr_info
*net_rx_pkt_get_ip6_info(struct NetRxPkt
*pkt
)
232 return &pkt
->ip6hdr_info
;
235 eth_ip4_hdr_info
*net_rx_pkt_get_ip4_info(struct NetRxPkt
*pkt
)
237 return &pkt
->ip4hdr_info
;
241 _net_rx_rss_add_chunk(uint8_t *rss_input
, size_t *bytes_written
,
242 void *ptr
, size_t size
)
244 memcpy(&rss_input
[*bytes_written
], ptr
, size
);
245 trace_net_rx_pkt_rss_add_chunk(ptr
, size
, *bytes_written
);
246 *bytes_written
+= size
;
250 _net_rx_rss_prepare_ip4(uint8_t *rss_input
,
251 struct NetRxPkt
*pkt
,
252 size_t *bytes_written
)
254 struct ip_header
*ip4_hdr
= &pkt
->ip4hdr_info
.ip4_hdr
;
256 _net_rx_rss_add_chunk(rss_input
, bytes_written
,
257 &ip4_hdr
->ip_src
, sizeof(uint32_t));
259 _net_rx_rss_add_chunk(rss_input
, bytes_written
,
260 &ip4_hdr
->ip_dst
, sizeof(uint32_t));
264 _net_rx_rss_prepare_ip6(uint8_t *rss_input
,
265 struct NetRxPkt
*pkt
,
266 bool ipv6ex
, size_t *bytes_written
)
268 eth_ip6_hdr_info
*ip6info
= &pkt
->ip6hdr_info
;
270 _net_rx_rss_add_chunk(rss_input
, bytes_written
,
271 (ipv6ex
&& ip6info
->rss_ex_src_valid
) ? &ip6info
->rss_ex_src
272 : &ip6info
->ip6_hdr
.ip6_src
,
273 sizeof(struct in6_address
));
275 _net_rx_rss_add_chunk(rss_input
, bytes_written
,
276 (ipv6ex
&& ip6info
->rss_ex_dst_valid
) ? &ip6info
->rss_ex_dst
277 : &ip6info
->ip6_hdr
.ip6_dst
,
278 sizeof(struct in6_address
));
282 _net_rx_rss_prepare_tcp(uint8_t *rss_input
,
283 struct NetRxPkt
*pkt
,
284 size_t *bytes_written
)
286 struct tcp_header
*tcphdr
= &pkt
->l4hdr_info
.hdr
.tcp
;
288 _net_rx_rss_add_chunk(rss_input
, bytes_written
,
289 &tcphdr
->th_sport
, sizeof(uint16_t));
291 _net_rx_rss_add_chunk(rss_input
, bytes_written
,
292 &tcphdr
->th_dport
, sizeof(uint16_t));
296 _net_rx_rss_prepare_udp(uint8_t *rss_input
,
297 struct NetRxPkt
*pkt
,
298 size_t *bytes_written
)
300 struct udp_header
*udphdr
= &pkt
->l4hdr_info
.hdr
.udp
;
302 _net_rx_rss_add_chunk(rss_input
, bytes_written
,
303 &udphdr
->uh_sport
, sizeof(uint16_t));
305 _net_rx_rss_add_chunk(rss_input
, bytes_written
,
306 &udphdr
->uh_dport
, sizeof(uint16_t));
310 net_rx_pkt_calc_rss_hash(struct NetRxPkt
*pkt
,
311 NetRxPktRssType type
,
314 uint8_t rss_input
[36];
315 size_t rss_length
= 0;
316 uint32_t rss_hash
= 0;
317 net_toeplitz_key key_data
;
322 trace_net_rx_pkt_rss_ip4();
323 _net_rx_rss_prepare_ip4(&rss_input
[0], pkt
, &rss_length
);
325 case NetPktRssIpV4Tcp
:
327 assert(pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_TCP
);
328 trace_net_rx_pkt_rss_ip4_tcp();
329 _net_rx_rss_prepare_ip4(&rss_input
[0], pkt
, &rss_length
);
330 _net_rx_rss_prepare_tcp(&rss_input
[0], pkt
, &rss_length
);
332 case NetPktRssIpV6Tcp
:
334 assert(pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_TCP
);
335 trace_net_rx_pkt_rss_ip6_tcp();
336 _net_rx_rss_prepare_ip6(&rss_input
[0], pkt
, false, &rss_length
);
337 _net_rx_rss_prepare_tcp(&rss_input
[0], pkt
, &rss_length
);
341 trace_net_rx_pkt_rss_ip6();
342 _net_rx_rss_prepare_ip6(&rss_input
[0], pkt
, false, &rss_length
);
344 case NetPktRssIpV6Ex
:
346 trace_net_rx_pkt_rss_ip6_ex();
347 _net_rx_rss_prepare_ip6(&rss_input
[0], pkt
, true, &rss_length
);
349 case NetPktRssIpV6TcpEx
:
351 assert(pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_TCP
);
352 trace_net_rx_pkt_rss_ip6_ex_tcp();
353 _net_rx_rss_prepare_ip6(&rss_input
[0], pkt
, true, &rss_length
);
354 _net_rx_rss_prepare_tcp(&rss_input
[0], pkt
, &rss_length
);
356 case NetPktRssIpV4Udp
:
358 assert(pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_UDP
);
359 trace_net_rx_pkt_rss_ip4_udp();
360 _net_rx_rss_prepare_ip4(&rss_input
[0], pkt
, &rss_length
);
361 _net_rx_rss_prepare_udp(&rss_input
[0], pkt
, &rss_length
);
363 case NetPktRssIpV6Udp
:
365 assert(pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_UDP
);
366 trace_net_rx_pkt_rss_ip6_udp();
367 _net_rx_rss_prepare_ip6(&rss_input
[0], pkt
, false, &rss_length
);
368 _net_rx_rss_prepare_udp(&rss_input
[0], pkt
, &rss_length
);
370 case NetPktRssIpV6UdpEx
:
372 assert(pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_UDP
);
373 trace_net_rx_pkt_rss_ip6_ex_udp();
374 _net_rx_rss_prepare_ip6(&rss_input
[0], pkt
, true, &rss_length
);
375 _net_rx_rss_prepare_udp(&rss_input
[0], pkt
, &rss_length
);
382 net_toeplitz_key_init(&key_data
, key
);
383 net_toeplitz_add(&rss_hash
, rss_input
, rss_length
, &key_data
);
385 trace_net_rx_pkt_rss_hash(rss_length
, rss_hash
);
390 uint16_t net_rx_pkt_get_ip_id(struct NetRxPkt
*pkt
)
395 return be16_to_cpu(pkt
->ip4hdr_info
.ip4_hdr
.ip_id
);
401 bool net_rx_pkt_is_tcp_ack(struct NetRxPkt
*pkt
)
405 if (pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_TCP
) {
406 return TCP_HEADER_FLAGS(&pkt
->l4hdr_info
.hdr
.tcp
) & TCP_FLAG_ACK
;
412 bool net_rx_pkt_has_tcp_data(struct NetRxPkt
*pkt
)
416 if (pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_TCP
) {
417 return pkt
->l4hdr_info
.has_tcp_data
;
423 struct iovec
*net_rx_pkt_get_iovec(struct NetRxPkt
*pkt
)
430 uint16_t net_rx_pkt_get_iovec_len(struct NetRxPkt
*pkt
)
437 void net_rx_pkt_set_vhdr(struct NetRxPkt
*pkt
,
438 struct virtio_net_hdr
*vhdr
)
442 memcpy(&pkt
->virt_hdr
, vhdr
, sizeof pkt
->virt_hdr
);
445 void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt
*pkt
,
446 const struct iovec
*iov
, int iovcnt
)
450 iov_to_buf(iov
, iovcnt
, 0, &pkt
->virt_hdr
, sizeof pkt
->virt_hdr
);
453 void net_rx_pkt_unset_vhdr(struct NetRxPkt
*pkt
)
457 memset(&pkt
->virt_hdr
, 0, sizeof(pkt
->virt_hdr
));
460 bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt
*pkt
)
464 return pkt
->ehdr_buf_len
? true : false;
467 uint16_t net_rx_pkt_get_vlan_tag(struct NetRxPkt
*pkt
)
474 bool net_rx_pkt_validate_l3_csum(struct NetRxPkt
*pkt
, bool *csum_valid
)
480 trace_net_rx_pkt_l3_csum_validate_entry();
483 trace_net_rx_pkt_l3_csum_validate_not_ip4();
487 csl
= pkt
->l4hdr_off
- pkt
->l3hdr_off
;
489 cntr
= net_checksum_add_iov(pkt
->vec
, pkt
->vec_len
,
493 csum
= net_checksum_finish(cntr
);
495 *csum_valid
= (csum
== 0);
497 trace_net_rx_pkt_l3_csum_validate_csum(pkt
->l3hdr_off
, csl
,
498 cntr
, csum
, *csum_valid
);
504 _net_rx_pkt_calc_l4_csum(struct NetRxPkt
*pkt
)
511 trace_net_rx_pkt_l4_csum_calc_entry();
514 if (pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_UDP
) {
515 csl
= be16_to_cpu(pkt
->l4hdr_info
.hdr
.udp
.uh_ulen
);
516 trace_net_rx_pkt_l4_csum_calc_ip4_udp();
518 csl
= be16_to_cpu(pkt
->ip4hdr_info
.ip4_hdr
.ip_len
) -
519 IP_HDR_GET_LEN(&pkt
->ip4hdr_info
.ip4_hdr
);
520 trace_net_rx_pkt_l4_csum_calc_ip4_tcp();
523 cntr
= eth_calc_ip4_pseudo_hdr_csum(&pkt
->ip4hdr_info
.ip4_hdr
,
525 trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr
, csl
);
527 if (pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_UDP
) {
528 csl
= be16_to_cpu(pkt
->l4hdr_info
.hdr
.udp
.uh_ulen
);
529 trace_net_rx_pkt_l4_csum_calc_ip6_udp();
531 struct ip6_header
*ip6hdr
= &pkt
->ip6hdr_info
.ip6_hdr
;
532 size_t full_ip6hdr_len
= pkt
->l4hdr_off
- pkt
->l3hdr_off
;
533 size_t ip6opts_len
= full_ip6hdr_len
- sizeof(struct ip6_header
);
535 csl
= be16_to_cpu(ip6hdr
->ip6_ctlun
.ip6_un1
.ip6_un1_plen
) -
537 trace_net_rx_pkt_l4_csum_calc_ip6_tcp();
540 cntr
= eth_calc_ip6_pseudo_hdr_csum(&pkt
->ip6hdr_info
.ip6_hdr
, csl
,
541 pkt
->ip6hdr_info
.l4proto
, &cso
);
542 trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr
, csl
);
545 cntr
+= net_checksum_add_iov(pkt
->vec
, pkt
->vec_len
,
546 pkt
->l4hdr_off
, csl
, cso
);
548 csum
= net_checksum_finish_nozero(cntr
);
550 trace_net_rx_pkt_l4_csum_calc_csum(pkt
->l4hdr_off
, csl
, cntr
, csum
);
556 _net_rx_pkt_validate_sctp_sum(struct NetRxPkt
*pkt
)
559 size_t off
= pkt
->l4hdr_off
;
560 size_t vec_len
= pkt
->vec_len
;
562 uint32_t calculated
= 0;
566 for (vec
= pkt
->vec
; vec
->iov_len
< off
; vec
++) {
573 if (!iov_to_buf(vec
, vec_len
, csum_off
, &original
, sizeof(original
))) {
577 if (!iov_from_buf(vec
, vec_len
, csum_off
,
578 &calculated
, sizeof(calculated
))) {
582 calculated
= crc32c(0xffffffff,
583 (uint8_t *)vec
->iov_base
+ off
, vec
->iov_len
- off
);
584 calculated
= iov_crc32c(calculated
^ 0xffffffff, vec
+ 1, vec_len
- 1);
585 valid
= calculated
== le32_to_cpu(original
);
586 iov_from_buf(vec
, vec_len
, csum_off
, &original
, sizeof(original
));
591 bool net_rx_pkt_validate_l4_csum(struct NetRxPkt
*pkt
, bool *csum_valid
)
595 trace_net_rx_pkt_l4_csum_validate_entry();
597 if (pkt
->hasip4
&& pkt
->ip4hdr_info
.fragment
) {
598 trace_net_rx_pkt_l4_csum_validate_ip4_fragment();
602 switch (pkt
->l4hdr_info
.proto
) {
603 case ETH_L4_HDR_PROTO_UDP
:
604 if (pkt
->l4hdr_info
.hdr
.udp
.uh_sum
== 0) {
605 trace_net_rx_pkt_l4_csum_validate_udp_with_no_checksum();
609 case ETH_L4_HDR_PROTO_TCP
:
610 csum
= _net_rx_pkt_calc_l4_csum(pkt
);
611 *csum_valid
= ((csum
== 0) || (csum
== 0xFFFF));
614 case ETH_L4_HDR_PROTO_SCTP
:
615 *csum_valid
= _net_rx_pkt_validate_sctp_sum(pkt
);
619 trace_net_rx_pkt_l4_csum_validate_not_xxp();
623 trace_net_rx_pkt_l4_csum_validate_csum(*csum_valid
);
628 bool net_rx_pkt_fix_l4_csum(struct NetRxPkt
*pkt
)
633 trace_net_rx_pkt_l4_csum_fix_entry();
635 switch (pkt
->l4hdr_info
.proto
) {
636 case ETH_L4_HDR_PROTO_TCP
:
637 l4_cso
= offsetof(struct tcp_header
, th_sum
);
638 trace_net_rx_pkt_l4_csum_fix_tcp(l4_cso
);
641 case ETH_L4_HDR_PROTO_UDP
:
642 if (pkt
->l4hdr_info
.hdr
.udp
.uh_sum
== 0) {
643 trace_net_rx_pkt_l4_csum_fix_udp_with_no_checksum();
646 l4_cso
= offsetof(struct udp_header
, uh_sum
);
647 trace_net_rx_pkt_l4_csum_fix_udp(l4_cso
);
651 trace_net_rx_pkt_l4_csum_fix_not_xxp();
655 if (pkt
->hasip4
&& pkt
->ip4hdr_info
.fragment
) {
656 trace_net_rx_pkt_l4_csum_fix_ip4_fragment();
660 /* Set zero to checksum word */
661 iov_from_buf(pkt
->vec
, pkt
->vec_len
,
662 pkt
->l4hdr_off
+ l4_cso
,
663 &csum
, sizeof(csum
));
665 /* Calculate L4 checksum */
666 csum
= cpu_to_be16(_net_rx_pkt_calc_l4_csum(pkt
));
668 /* Set calculated checksum to checksum word */
669 iov_from_buf(pkt
->vec
, pkt
->vec_len
,
670 pkt
->l4hdr_off
+ l4_cso
,
671 &csum
, sizeof(csum
));
673 trace_net_rx_pkt_l4_csum_fix_csum(pkt
->l4hdr_off
+ l4_cso
, csum
);