2 * QEMU RX packets abstractions
4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
6 * Developed by Daynix Computing LTD (http://www.daynix.com)
9 * Dmitry Fleytman <dmitry@daynix.com>
10 * Tamir Shomer <tamirs@daynix.com>
11 * Yan Vugenfirer <yan@daynix.com>
13 * This work is licensed under the terms of the GNU GPL, version 2 or later.
14 * See the COPYING file in the top-level directory.
18 #include "qemu/osdep.h"
20 #include "net_rx_pkt.h"
21 #include "net/checksum.h"
25 struct virtio_net_hdr virt_hdr
;
27 struct eth_header eth
;
28 struct vlan_header vlan
;
31 uint16_t vec_len_total
;
36 eth_pkt_types_e packet_type
;
38 /* Analysis results */
46 eth_ip6_hdr_info ip6hdr_info
;
47 eth_ip4_hdr_info ip4hdr_info
;
48 eth_l4_hdr_info l4hdr_info
;
51 void net_rx_pkt_init(struct NetRxPkt
**pkt
)
53 struct NetRxPkt
*p
= g_malloc0(sizeof *p
);
59 void net_rx_pkt_uninit(struct NetRxPkt
*pkt
)
61 if (pkt
->vec_len_total
!= 0) {
68 struct virtio_net_hdr
*net_rx_pkt_get_vhdr(struct NetRxPkt
*pkt
)
71 return &pkt
->virt_hdr
;
75 net_rx_pkt_iovec_realloc(struct NetRxPkt
*pkt
,
78 if (pkt
->vec_len_total
< new_iov_len
) {
80 pkt
->vec
= g_malloc(sizeof(*pkt
->vec
) * new_iov_len
);
81 pkt
->vec_len_total
= new_iov_len
;
86 net_rx_pkt_pull_data(struct NetRxPkt
*pkt
,
87 const struct iovec
*iov
, int iovcnt
,
90 uint32_t pllen
= iov_size(iov
, iovcnt
) - ploff
;
92 if (pkt
->ehdr_buf_len
) {
93 net_rx_pkt_iovec_realloc(pkt
, iovcnt
+ 1);
95 pkt
->vec
[0].iov_base
= &pkt
->ehdr_buf
;
96 pkt
->vec
[0].iov_len
= pkt
->ehdr_buf_len
;
98 pkt
->tot_len
= pllen
+ pkt
->ehdr_buf_len
;
99 pkt
->vec_len
= iov_copy(pkt
->vec
+ 1, pkt
->vec_len_total
- 1,
100 iov
, iovcnt
, ploff
, pllen
) + 1;
102 net_rx_pkt_iovec_realloc(pkt
, iovcnt
);
104 pkt
->tot_len
= pllen
;
105 pkt
->vec_len
= iov_copy(pkt
->vec
, pkt
->vec_len_total
,
106 iov
, iovcnt
, ploff
, pkt
->tot_len
);
109 eth_get_protocols(pkt
->vec
, pkt
->vec_len
, 0, &pkt
->hasip4
, &pkt
->hasip6
,
110 &pkt
->l3hdr_off
, &pkt
->l4hdr_off
, &pkt
->l5hdr_off
,
111 &pkt
->ip6hdr_info
, &pkt
->ip4hdr_info
, &pkt
->l4hdr_info
);
113 trace_net_rx_pkt_parsed(pkt
->hasip4
, pkt
->hasip6
, pkt
->l4hdr_info
.proto
,
114 pkt
->l3hdr_off
, pkt
->l4hdr_off
, pkt
->l5hdr_off
);
117 void net_rx_pkt_attach_iovec(struct NetRxPkt
*pkt
,
118 const struct iovec
*iov
, int iovcnt
,
119 size_t iovoff
, bool strip_vlan
)
122 uint16_t ploff
= iovoff
;
126 pkt
->ehdr_buf_len
= eth_strip_vlan(iov
, iovcnt
, iovoff
, &pkt
->ehdr_buf
,
129 pkt
->ehdr_buf_len
= 0;
134 net_rx_pkt_pull_data(pkt
, iov
, iovcnt
, ploff
);
137 void net_rx_pkt_attach_iovec_ex(struct NetRxPkt
*pkt
,
138 const struct iovec
*iov
, int iovcnt
,
139 size_t iovoff
, bool strip_vlan
,
143 uint16_t ploff
= iovoff
;
147 pkt
->ehdr_buf_len
= eth_strip_vlan_ex(iov
, iovcnt
, iovoff
, vet
,
151 pkt
->ehdr_buf_len
= 0;
156 net_rx_pkt_pull_data(pkt
, iov
, iovcnt
, ploff
);
159 void net_rx_pkt_dump(struct NetRxPkt
*pkt
)
161 #ifdef NET_RX_PKT_DEBUG
164 printf("RX PKT: tot_len: %d, ehdr_buf_len: %lu, vlan_tag: %d\n",
165 pkt
->tot_len
, pkt
->ehdr_buf_len
, pkt
->tci
);
169 void net_rx_pkt_set_packet_type(struct NetRxPkt
*pkt
,
170 eth_pkt_types_e packet_type
)
174 pkt
->packet_type
= packet_type
;
178 eth_pkt_types_e
net_rx_pkt_get_packet_type(struct NetRxPkt
*pkt
)
182 return pkt
->packet_type
;
185 size_t net_rx_pkt_get_total_len(struct NetRxPkt
*pkt
)
192 void net_rx_pkt_set_protocols(struct NetRxPkt
*pkt
,
193 const struct iovec
*iov
, size_t iovcnt
,
198 eth_get_protocols(iov
, iovcnt
, iovoff
, &pkt
->hasip4
, &pkt
->hasip6
,
199 &pkt
->l3hdr_off
, &pkt
->l4hdr_off
, &pkt
->l5hdr_off
,
200 &pkt
->ip6hdr_info
, &pkt
->ip4hdr_info
, &pkt
->l4hdr_info
);
203 void net_rx_pkt_get_protocols(struct NetRxPkt
*pkt
,
204 bool *hasip4
, bool *hasip6
,
205 EthL4HdrProto
*l4hdr_proto
)
209 *hasip4
= pkt
->hasip4
;
210 *hasip6
= pkt
->hasip6
;
211 *l4hdr_proto
= pkt
->l4hdr_info
.proto
;
214 size_t net_rx_pkt_get_l3_hdr_offset(struct NetRxPkt
*pkt
)
217 return pkt
->l3hdr_off
;
220 size_t net_rx_pkt_get_l4_hdr_offset(struct NetRxPkt
*pkt
)
223 return pkt
->l4hdr_off
;
226 size_t net_rx_pkt_get_l5_hdr_offset(struct NetRxPkt
*pkt
)
229 return pkt
->l5hdr_off
;
232 eth_ip6_hdr_info
*net_rx_pkt_get_ip6_info(struct NetRxPkt
*pkt
)
234 return &pkt
->ip6hdr_info
;
237 eth_ip4_hdr_info
*net_rx_pkt_get_ip4_info(struct NetRxPkt
*pkt
)
239 return &pkt
->ip4hdr_info
;
243 _net_rx_rss_add_chunk(uint8_t *rss_input
, size_t *bytes_written
,
244 void *ptr
, size_t size
)
246 memcpy(&rss_input
[*bytes_written
], ptr
, size
);
247 trace_net_rx_pkt_rss_add_chunk(ptr
, size
, *bytes_written
);
248 *bytes_written
+= size
;
252 _net_rx_rss_prepare_ip4(uint8_t *rss_input
,
253 struct NetRxPkt
*pkt
,
254 size_t *bytes_written
)
256 struct ip_header
*ip4_hdr
= &pkt
->ip4hdr_info
.ip4_hdr
;
258 _net_rx_rss_add_chunk(rss_input
, bytes_written
,
259 &ip4_hdr
->ip_src
, sizeof(uint32_t));
261 _net_rx_rss_add_chunk(rss_input
, bytes_written
,
262 &ip4_hdr
->ip_dst
, sizeof(uint32_t));
266 _net_rx_rss_prepare_ip6(uint8_t *rss_input
,
267 struct NetRxPkt
*pkt
,
268 bool ipv6ex
, size_t *bytes_written
)
270 eth_ip6_hdr_info
*ip6info
= &pkt
->ip6hdr_info
;
272 _net_rx_rss_add_chunk(rss_input
, bytes_written
,
273 (ipv6ex
&& ip6info
->rss_ex_src_valid
) ? &ip6info
->rss_ex_src
274 : &ip6info
->ip6_hdr
.ip6_src
,
275 sizeof(struct in6_address
));
277 _net_rx_rss_add_chunk(rss_input
, bytes_written
,
278 (ipv6ex
&& ip6info
->rss_ex_dst_valid
) ? &ip6info
->rss_ex_dst
279 : &ip6info
->ip6_hdr
.ip6_dst
,
280 sizeof(struct in6_address
));
284 _net_rx_rss_prepare_tcp(uint8_t *rss_input
,
285 struct NetRxPkt
*pkt
,
286 size_t *bytes_written
)
288 struct tcp_header
*tcphdr
= &pkt
->l4hdr_info
.hdr
.tcp
;
290 _net_rx_rss_add_chunk(rss_input
, bytes_written
,
291 &tcphdr
->th_sport
, sizeof(uint16_t));
293 _net_rx_rss_add_chunk(rss_input
, bytes_written
,
294 &tcphdr
->th_dport
, sizeof(uint16_t));
298 _net_rx_rss_prepare_udp(uint8_t *rss_input
,
299 struct NetRxPkt
*pkt
,
300 size_t *bytes_written
)
302 struct udp_header
*udphdr
= &pkt
->l4hdr_info
.hdr
.udp
;
304 _net_rx_rss_add_chunk(rss_input
, bytes_written
,
305 &udphdr
->uh_sport
, sizeof(uint16_t));
307 _net_rx_rss_add_chunk(rss_input
, bytes_written
,
308 &udphdr
->uh_dport
, sizeof(uint16_t));
312 net_rx_pkt_calc_rss_hash(struct NetRxPkt
*pkt
,
313 NetRxPktRssType type
,
316 uint8_t rss_input
[36];
317 size_t rss_length
= 0;
318 uint32_t rss_hash
= 0;
319 net_toeplitz_key key_data
;
324 trace_net_rx_pkt_rss_ip4();
325 _net_rx_rss_prepare_ip4(&rss_input
[0], pkt
, &rss_length
);
327 case NetPktRssIpV4Tcp
:
329 assert(pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_TCP
);
330 trace_net_rx_pkt_rss_ip4_tcp();
331 _net_rx_rss_prepare_ip4(&rss_input
[0], pkt
, &rss_length
);
332 _net_rx_rss_prepare_tcp(&rss_input
[0], pkt
, &rss_length
);
334 case NetPktRssIpV6Tcp
:
336 assert(pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_TCP
);
337 trace_net_rx_pkt_rss_ip6_tcp();
338 _net_rx_rss_prepare_ip6(&rss_input
[0], pkt
, false, &rss_length
);
339 _net_rx_rss_prepare_tcp(&rss_input
[0], pkt
, &rss_length
);
343 trace_net_rx_pkt_rss_ip6();
344 _net_rx_rss_prepare_ip6(&rss_input
[0], pkt
, false, &rss_length
);
346 case NetPktRssIpV6Ex
:
348 trace_net_rx_pkt_rss_ip6_ex();
349 _net_rx_rss_prepare_ip6(&rss_input
[0], pkt
, true, &rss_length
);
351 case NetPktRssIpV6TcpEx
:
353 assert(pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_TCP
);
354 trace_net_rx_pkt_rss_ip6_ex_tcp();
355 _net_rx_rss_prepare_ip6(&rss_input
[0], pkt
, true, &rss_length
);
356 _net_rx_rss_prepare_tcp(&rss_input
[0], pkt
, &rss_length
);
358 case NetPktRssIpV4Udp
:
360 assert(pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_UDP
);
361 trace_net_rx_pkt_rss_ip4_udp();
362 _net_rx_rss_prepare_ip4(&rss_input
[0], pkt
, &rss_length
);
363 _net_rx_rss_prepare_udp(&rss_input
[0], pkt
, &rss_length
);
365 case NetPktRssIpV6Udp
:
367 assert(pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_UDP
);
368 trace_net_rx_pkt_rss_ip6_udp();
369 _net_rx_rss_prepare_ip6(&rss_input
[0], pkt
, false, &rss_length
);
370 _net_rx_rss_prepare_udp(&rss_input
[0], pkt
, &rss_length
);
372 case NetPktRssIpV6UdpEx
:
374 assert(pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_UDP
);
375 trace_net_rx_pkt_rss_ip6_ex_udp();
376 _net_rx_rss_prepare_ip6(&rss_input
[0], pkt
, true, &rss_length
);
377 _net_rx_rss_prepare_udp(&rss_input
[0], pkt
, &rss_length
);
384 net_toeplitz_key_init(&key_data
, key
);
385 net_toeplitz_add(&rss_hash
, rss_input
, rss_length
, &key_data
);
387 trace_net_rx_pkt_rss_hash(rss_length
, rss_hash
);
392 uint16_t net_rx_pkt_get_ip_id(struct NetRxPkt
*pkt
)
397 return be16_to_cpu(pkt
->ip4hdr_info
.ip4_hdr
.ip_id
);
403 bool net_rx_pkt_is_tcp_ack(struct NetRxPkt
*pkt
)
407 if (pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_TCP
) {
408 return TCP_HEADER_FLAGS(&pkt
->l4hdr_info
.hdr
.tcp
) & TCP_FLAG_ACK
;
414 bool net_rx_pkt_has_tcp_data(struct NetRxPkt
*pkt
)
418 if (pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_TCP
) {
419 return pkt
->l4hdr_info
.has_tcp_data
;
425 struct iovec
*net_rx_pkt_get_iovec(struct NetRxPkt
*pkt
)
432 uint16_t net_rx_pkt_get_iovec_len(struct NetRxPkt
*pkt
)
439 void net_rx_pkt_set_vhdr(struct NetRxPkt
*pkt
,
440 struct virtio_net_hdr
*vhdr
)
444 memcpy(&pkt
->virt_hdr
, vhdr
, sizeof pkt
->virt_hdr
);
447 void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt
*pkt
,
448 const struct iovec
*iov
, int iovcnt
)
452 iov_to_buf(iov
, iovcnt
, 0, &pkt
->virt_hdr
, sizeof pkt
->virt_hdr
);
455 void net_rx_pkt_unset_vhdr(struct NetRxPkt
*pkt
)
459 memset(&pkt
->virt_hdr
, 0, sizeof(pkt
->virt_hdr
));
462 bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt
*pkt
)
466 return pkt
->ehdr_buf_len
? true : false;
469 uint16_t net_rx_pkt_get_vlan_tag(struct NetRxPkt
*pkt
)
476 bool net_rx_pkt_validate_l3_csum(struct NetRxPkt
*pkt
, bool *csum_valid
)
482 trace_net_rx_pkt_l3_csum_validate_entry();
485 trace_net_rx_pkt_l3_csum_validate_not_ip4();
489 csl
= pkt
->l4hdr_off
- pkt
->l3hdr_off
;
491 cntr
= net_checksum_add_iov(pkt
->vec
, pkt
->vec_len
,
495 csum
= net_checksum_finish(cntr
);
497 *csum_valid
= (csum
== 0);
499 trace_net_rx_pkt_l3_csum_validate_csum(pkt
->l3hdr_off
, csl
,
500 cntr
, csum
, *csum_valid
);
506 _net_rx_pkt_calc_l4_csum(struct NetRxPkt
*pkt
)
513 trace_net_rx_pkt_l4_csum_calc_entry();
516 if (pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_UDP
) {
517 csl
= be16_to_cpu(pkt
->l4hdr_info
.hdr
.udp
.uh_ulen
);
518 trace_net_rx_pkt_l4_csum_calc_ip4_udp();
520 csl
= be16_to_cpu(pkt
->ip4hdr_info
.ip4_hdr
.ip_len
) -
521 IP_HDR_GET_LEN(&pkt
->ip4hdr_info
.ip4_hdr
);
522 trace_net_rx_pkt_l4_csum_calc_ip4_tcp();
525 cntr
= eth_calc_ip4_pseudo_hdr_csum(&pkt
->ip4hdr_info
.ip4_hdr
,
527 trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr
, csl
);
529 if (pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_UDP
) {
530 csl
= be16_to_cpu(pkt
->l4hdr_info
.hdr
.udp
.uh_ulen
);
531 trace_net_rx_pkt_l4_csum_calc_ip6_udp();
533 struct ip6_header
*ip6hdr
= &pkt
->ip6hdr_info
.ip6_hdr
;
534 size_t full_ip6hdr_len
= pkt
->l4hdr_off
- pkt
->l3hdr_off
;
535 size_t ip6opts_len
= full_ip6hdr_len
- sizeof(struct ip6_header
);
537 csl
= be16_to_cpu(ip6hdr
->ip6_ctlun
.ip6_un1
.ip6_un1_plen
) -
539 trace_net_rx_pkt_l4_csum_calc_ip6_tcp();
542 cntr
= eth_calc_ip6_pseudo_hdr_csum(&pkt
->ip6hdr_info
.ip6_hdr
, csl
,
543 pkt
->ip6hdr_info
.l4proto
, &cso
);
544 trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr
, csl
);
547 cntr
+= net_checksum_add_iov(pkt
->vec
, pkt
->vec_len
,
548 pkt
->l4hdr_off
, csl
, cso
);
550 csum
= net_checksum_finish_nozero(cntr
);
552 trace_net_rx_pkt_l4_csum_calc_csum(pkt
->l4hdr_off
, csl
, cntr
, csum
);
557 bool net_rx_pkt_validate_l4_csum(struct NetRxPkt
*pkt
, bool *csum_valid
)
561 trace_net_rx_pkt_l4_csum_validate_entry();
563 if (pkt
->l4hdr_info
.proto
!= ETH_L4_HDR_PROTO_TCP
&&
564 pkt
->l4hdr_info
.proto
!= ETH_L4_HDR_PROTO_UDP
) {
565 trace_net_rx_pkt_l4_csum_validate_not_xxp();
569 if (pkt
->l4hdr_info
.proto
== ETH_L4_HDR_PROTO_UDP
&&
570 pkt
->l4hdr_info
.hdr
.udp
.uh_sum
== 0) {
571 trace_net_rx_pkt_l4_csum_validate_udp_with_no_checksum();
575 if (pkt
->hasip4
&& pkt
->ip4hdr_info
.fragment
) {
576 trace_net_rx_pkt_l4_csum_validate_ip4_fragment();
580 csum
= _net_rx_pkt_calc_l4_csum(pkt
);
582 *csum_valid
= ((csum
== 0) || (csum
== 0xFFFF));
584 trace_net_rx_pkt_l4_csum_validate_csum(*csum_valid
);
589 bool net_rx_pkt_fix_l4_csum(struct NetRxPkt
*pkt
)
594 trace_net_rx_pkt_l4_csum_fix_entry();
596 switch (pkt
->l4hdr_info
.proto
) {
597 case ETH_L4_HDR_PROTO_TCP
:
598 l4_cso
= offsetof(struct tcp_header
, th_sum
);
599 trace_net_rx_pkt_l4_csum_fix_tcp(l4_cso
);
602 case ETH_L4_HDR_PROTO_UDP
:
603 if (pkt
->l4hdr_info
.hdr
.udp
.uh_sum
== 0) {
604 trace_net_rx_pkt_l4_csum_fix_udp_with_no_checksum();
607 l4_cso
= offsetof(struct udp_header
, uh_sum
);
608 trace_net_rx_pkt_l4_csum_fix_udp(l4_cso
);
612 trace_net_rx_pkt_l4_csum_fix_not_xxp();
616 if (pkt
->hasip4
&& pkt
->ip4hdr_info
.fragment
) {
617 trace_net_rx_pkt_l4_csum_fix_ip4_fragment();
621 /* Set zero to checksum word */
622 iov_from_buf(pkt
->vec
, pkt
->vec_len
,
623 pkt
->l4hdr_off
+ l4_cso
,
624 &csum
, sizeof(csum
));
626 /* Calculate L4 checksum */
627 csum
= cpu_to_be16(_net_rx_pkt_calc_l4_csum(pkt
));
629 /* Set calculated checksum to checksum word */
630 iov_from_buf(pkt
->vec
, pkt
->vec_len
,
631 pkt
->l4hdr_off
+ l4_cso
,
632 &csum
, sizeof(csum
));
634 trace_net_rx_pkt_l4_csum_fix_csum(pkt
->l4hdr_off
+ l4_cso
, csum
);