igb: Implement Rx SCTP CSO
[qemu/kevin.git] / hw / net / net_rx_pkt.c
blob3575c8b9f91fd3125d6001460027beaec7ddb366
1 /*
2 * QEMU RX packets abstractions
4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
6 * Developed by Daynix Computing LTD (http://www.daynix.com)
8 * Authors:
9 * Dmitry Fleytman <dmitry@daynix.com>
10 * Tamir Shomer <tamirs@daynix.com>
11 * Yan Vugenfirer <yan@daynix.com>
13 * This work is licensed under the terms of the GNU GPL, version 2 or later.
14 * See the COPYING file in the top-level directory.
18 #include "qemu/osdep.h"
19 #include "qemu/crc32c.h"
20 #include "trace.h"
21 #include "net_rx_pkt.h"
22 #include "net/checksum.h"
23 #include "net/tap.h"
25 struct NetRxPkt {
26 struct virtio_net_hdr virt_hdr;
27 struct {
28 struct eth_header eth;
29 struct vlan_header vlan;
30 } ehdr_buf;
31 struct iovec *vec;
32 uint16_t vec_len_total;
33 uint16_t vec_len;
34 uint32_t tot_len;
35 uint16_t tci;
36 size_t ehdr_buf_len;
37 eth_pkt_types_e packet_type;
39 /* Analysis results */
40 bool hasip4;
41 bool hasip6;
43 size_t l3hdr_off;
44 size_t l4hdr_off;
45 size_t l5hdr_off;
47 eth_ip6_hdr_info ip6hdr_info;
48 eth_ip4_hdr_info ip4hdr_info;
49 eth_l4_hdr_info l4hdr_info;
52 void net_rx_pkt_init(struct NetRxPkt **pkt)
54 struct NetRxPkt *p = g_malloc0(sizeof *p);
55 p->vec = NULL;
56 p->vec_len_total = 0;
57 *pkt = p;
60 void net_rx_pkt_uninit(struct NetRxPkt *pkt)
62 if (pkt->vec_len_total != 0) {
63 g_free(pkt->vec);
66 g_free(pkt);
69 struct virtio_net_hdr *net_rx_pkt_get_vhdr(struct NetRxPkt *pkt)
71 assert(pkt);
72 return &pkt->virt_hdr;
75 static inline void
76 net_rx_pkt_iovec_realloc(struct NetRxPkt *pkt,
77 int new_iov_len)
79 if (pkt->vec_len_total < new_iov_len) {
80 g_free(pkt->vec);
81 pkt->vec = g_malloc(sizeof(*pkt->vec) * new_iov_len);
82 pkt->vec_len_total = new_iov_len;
86 static void
87 net_rx_pkt_pull_data(struct NetRxPkt *pkt,
88 const struct iovec *iov, int iovcnt,
89 size_t ploff)
91 uint32_t pllen = iov_size(iov, iovcnt) - ploff;
93 if (pkt->ehdr_buf_len) {
94 net_rx_pkt_iovec_realloc(pkt, iovcnt + 1);
96 pkt->vec[0].iov_base = &pkt->ehdr_buf;
97 pkt->vec[0].iov_len = pkt->ehdr_buf_len;
99 pkt->tot_len = pllen + pkt->ehdr_buf_len;
100 pkt->vec_len = iov_copy(pkt->vec + 1, pkt->vec_len_total - 1,
101 iov, iovcnt, ploff, pllen) + 1;
102 } else {
103 net_rx_pkt_iovec_realloc(pkt, iovcnt);
105 pkt->tot_len = pllen;
106 pkt->vec_len = iov_copy(pkt->vec, pkt->vec_len_total,
107 iov, iovcnt, ploff, pkt->tot_len);
110 eth_get_protocols(pkt->vec, pkt->vec_len, 0, &pkt->hasip4, &pkt->hasip6,
111 &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off,
112 &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info);
114 trace_net_rx_pkt_parsed(pkt->hasip4, pkt->hasip6, pkt->l4hdr_info.proto,
115 pkt->l3hdr_off, pkt->l4hdr_off, pkt->l5hdr_off);
118 void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt,
119 const struct iovec *iov, int iovcnt,
120 size_t iovoff, bool strip_vlan)
122 uint16_t tci = 0;
123 uint16_t ploff = iovoff;
124 assert(pkt);
126 if (strip_vlan) {
127 pkt->ehdr_buf_len = eth_strip_vlan(iov, iovcnt, iovoff, &pkt->ehdr_buf,
128 &ploff, &tci);
129 } else {
130 pkt->ehdr_buf_len = 0;
133 pkt->tci = tci;
135 net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff);
138 void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt,
139 const struct iovec *iov, int iovcnt,
140 size_t iovoff, bool strip_vlan,
141 uint16_t vet)
143 uint16_t tci = 0;
144 uint16_t ploff = iovoff;
145 assert(pkt);
147 if (strip_vlan) {
148 pkt->ehdr_buf_len = eth_strip_vlan_ex(iov, iovcnt, iovoff, vet,
149 &pkt->ehdr_buf,
150 &ploff, &tci);
151 } else {
152 pkt->ehdr_buf_len = 0;
155 pkt->tci = tci;
157 net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff);
160 void net_rx_pkt_dump(struct NetRxPkt *pkt)
162 #ifdef NET_RX_PKT_DEBUG
163 assert(pkt);
165 printf("RX PKT: tot_len: %d, ehdr_buf_len: %lu, vlan_tag: %d\n",
166 pkt->tot_len, pkt->ehdr_buf_len, pkt->tci);
167 #endif
170 void net_rx_pkt_set_packet_type(struct NetRxPkt *pkt,
171 eth_pkt_types_e packet_type)
173 assert(pkt);
175 pkt->packet_type = packet_type;
179 eth_pkt_types_e net_rx_pkt_get_packet_type(struct NetRxPkt *pkt)
181 assert(pkt);
183 return pkt->packet_type;
186 size_t net_rx_pkt_get_total_len(struct NetRxPkt *pkt)
188 assert(pkt);
190 return pkt->tot_len;
193 void net_rx_pkt_set_protocols(struct NetRxPkt *pkt,
194 const struct iovec *iov, size_t iovcnt,
195 size_t iovoff)
197 assert(pkt);
199 eth_get_protocols(iov, iovcnt, iovoff, &pkt->hasip4, &pkt->hasip6,
200 &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off,
201 &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info);
204 void net_rx_pkt_get_protocols(struct NetRxPkt *pkt,
205 bool *hasip4, bool *hasip6,
206 EthL4HdrProto *l4hdr_proto)
208 assert(pkt);
210 *hasip4 = pkt->hasip4;
211 *hasip6 = pkt->hasip6;
212 *l4hdr_proto = pkt->l4hdr_info.proto;
215 size_t net_rx_pkt_get_l3_hdr_offset(struct NetRxPkt *pkt)
217 assert(pkt);
218 return pkt->l3hdr_off;
221 size_t net_rx_pkt_get_l4_hdr_offset(struct NetRxPkt *pkt)
223 assert(pkt);
224 return pkt->l4hdr_off;
227 size_t net_rx_pkt_get_l5_hdr_offset(struct NetRxPkt *pkt)
229 assert(pkt);
230 return pkt->l5hdr_off;
233 eth_ip6_hdr_info *net_rx_pkt_get_ip6_info(struct NetRxPkt *pkt)
235 return &pkt->ip6hdr_info;
238 eth_ip4_hdr_info *net_rx_pkt_get_ip4_info(struct NetRxPkt *pkt)
240 return &pkt->ip4hdr_info;
243 static inline void
244 _net_rx_rss_add_chunk(uint8_t *rss_input, size_t *bytes_written,
245 void *ptr, size_t size)
247 memcpy(&rss_input[*bytes_written], ptr, size);
248 trace_net_rx_pkt_rss_add_chunk(ptr, size, *bytes_written);
249 *bytes_written += size;
252 static inline void
253 _net_rx_rss_prepare_ip4(uint8_t *rss_input,
254 struct NetRxPkt *pkt,
255 size_t *bytes_written)
257 struct ip_header *ip4_hdr = &pkt->ip4hdr_info.ip4_hdr;
259 _net_rx_rss_add_chunk(rss_input, bytes_written,
260 &ip4_hdr->ip_src, sizeof(uint32_t));
262 _net_rx_rss_add_chunk(rss_input, bytes_written,
263 &ip4_hdr->ip_dst, sizeof(uint32_t));
266 static inline void
267 _net_rx_rss_prepare_ip6(uint8_t *rss_input,
268 struct NetRxPkt *pkt,
269 bool ipv6ex, size_t *bytes_written)
271 eth_ip6_hdr_info *ip6info = &pkt->ip6hdr_info;
273 _net_rx_rss_add_chunk(rss_input, bytes_written,
274 (ipv6ex && ip6info->rss_ex_src_valid) ? &ip6info->rss_ex_src
275 : &ip6info->ip6_hdr.ip6_src,
276 sizeof(struct in6_address));
278 _net_rx_rss_add_chunk(rss_input, bytes_written,
279 (ipv6ex && ip6info->rss_ex_dst_valid) ? &ip6info->rss_ex_dst
280 : &ip6info->ip6_hdr.ip6_dst,
281 sizeof(struct in6_address));
284 static inline void
285 _net_rx_rss_prepare_tcp(uint8_t *rss_input,
286 struct NetRxPkt *pkt,
287 size_t *bytes_written)
289 struct tcp_header *tcphdr = &pkt->l4hdr_info.hdr.tcp;
291 _net_rx_rss_add_chunk(rss_input, bytes_written,
292 &tcphdr->th_sport, sizeof(uint16_t));
294 _net_rx_rss_add_chunk(rss_input, bytes_written,
295 &tcphdr->th_dport, sizeof(uint16_t));
298 static inline void
299 _net_rx_rss_prepare_udp(uint8_t *rss_input,
300 struct NetRxPkt *pkt,
301 size_t *bytes_written)
303 struct udp_header *udphdr = &pkt->l4hdr_info.hdr.udp;
305 _net_rx_rss_add_chunk(rss_input, bytes_written,
306 &udphdr->uh_sport, sizeof(uint16_t));
308 _net_rx_rss_add_chunk(rss_input, bytes_written,
309 &udphdr->uh_dport, sizeof(uint16_t));
312 uint32_t
313 net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt,
314 NetRxPktRssType type,
315 uint8_t *key)
317 uint8_t rss_input[36];
318 size_t rss_length = 0;
319 uint32_t rss_hash = 0;
320 net_toeplitz_key key_data;
322 switch (type) {
323 case NetPktRssIpV4:
324 assert(pkt->hasip4);
325 trace_net_rx_pkt_rss_ip4();
326 _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length);
327 break;
328 case NetPktRssIpV4Tcp:
329 assert(pkt->hasip4);
330 assert(pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_TCP);
331 trace_net_rx_pkt_rss_ip4_tcp();
332 _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length);
333 _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length);
334 break;
335 case NetPktRssIpV6Tcp:
336 assert(pkt->hasip6);
337 assert(pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_TCP);
338 trace_net_rx_pkt_rss_ip6_tcp();
339 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length);
340 _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length);
341 break;
342 case NetPktRssIpV6:
343 assert(pkt->hasip6);
344 trace_net_rx_pkt_rss_ip6();
345 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length);
346 break;
347 case NetPktRssIpV6Ex:
348 assert(pkt->hasip6);
349 trace_net_rx_pkt_rss_ip6_ex();
350 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
351 break;
352 case NetPktRssIpV6TcpEx:
353 assert(pkt->hasip6);
354 assert(pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_TCP);
355 trace_net_rx_pkt_rss_ip6_ex_tcp();
356 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
357 _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length);
358 break;
359 case NetPktRssIpV4Udp:
360 assert(pkt->hasip4);
361 assert(pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_UDP);
362 trace_net_rx_pkt_rss_ip4_udp();
363 _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length);
364 _net_rx_rss_prepare_udp(&rss_input[0], pkt, &rss_length);
365 break;
366 case NetPktRssIpV6Udp:
367 assert(pkt->hasip6);
368 assert(pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_UDP);
369 trace_net_rx_pkt_rss_ip6_udp();
370 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length);
371 _net_rx_rss_prepare_udp(&rss_input[0], pkt, &rss_length);
372 break;
373 case NetPktRssIpV6UdpEx:
374 assert(pkt->hasip6);
375 assert(pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_UDP);
376 trace_net_rx_pkt_rss_ip6_ex_udp();
377 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
378 _net_rx_rss_prepare_udp(&rss_input[0], pkt, &rss_length);
379 break;
380 default:
381 assert(false);
382 break;
385 net_toeplitz_key_init(&key_data, key);
386 net_toeplitz_add(&rss_hash, rss_input, rss_length, &key_data);
388 trace_net_rx_pkt_rss_hash(rss_length, rss_hash);
390 return rss_hash;
393 uint16_t net_rx_pkt_get_ip_id(struct NetRxPkt *pkt)
395 assert(pkt);
397 if (pkt->hasip4) {
398 return be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_id);
401 return 0;
404 bool net_rx_pkt_is_tcp_ack(struct NetRxPkt *pkt)
406 assert(pkt);
408 if (pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_TCP) {
409 return TCP_HEADER_FLAGS(&pkt->l4hdr_info.hdr.tcp) & TCP_FLAG_ACK;
412 return false;
415 bool net_rx_pkt_has_tcp_data(struct NetRxPkt *pkt)
417 assert(pkt);
419 if (pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_TCP) {
420 return pkt->l4hdr_info.has_tcp_data;
423 return false;
426 struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt)
428 assert(pkt);
430 return pkt->vec;
433 uint16_t net_rx_pkt_get_iovec_len(struct NetRxPkt *pkt)
435 assert(pkt);
437 return pkt->vec_len;
440 void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt,
441 struct virtio_net_hdr *vhdr)
443 assert(pkt);
445 memcpy(&pkt->virt_hdr, vhdr, sizeof pkt->virt_hdr);
448 void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt *pkt,
449 const struct iovec *iov, int iovcnt)
451 assert(pkt);
453 iov_to_buf(iov, iovcnt, 0, &pkt->virt_hdr, sizeof pkt->virt_hdr);
456 void net_rx_pkt_unset_vhdr(struct NetRxPkt *pkt)
458 assert(pkt);
460 memset(&pkt->virt_hdr, 0, sizeof(pkt->virt_hdr));
463 bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt)
465 assert(pkt);
467 return pkt->ehdr_buf_len ? true : false;
470 uint16_t net_rx_pkt_get_vlan_tag(struct NetRxPkt *pkt)
472 assert(pkt);
474 return pkt->tci;
477 bool net_rx_pkt_validate_l3_csum(struct NetRxPkt *pkt, bool *csum_valid)
479 uint32_t cntr;
480 uint16_t csum;
481 uint32_t csl;
483 trace_net_rx_pkt_l3_csum_validate_entry();
485 if (!pkt->hasip4) {
486 trace_net_rx_pkt_l3_csum_validate_not_ip4();
487 return false;
490 csl = pkt->l4hdr_off - pkt->l3hdr_off;
492 cntr = net_checksum_add_iov(pkt->vec, pkt->vec_len,
493 pkt->l3hdr_off,
494 csl, 0);
496 csum = net_checksum_finish(cntr);
498 *csum_valid = (csum == 0);
500 trace_net_rx_pkt_l3_csum_validate_csum(pkt->l3hdr_off, csl,
501 cntr, csum, *csum_valid);
503 return true;
506 static uint16_t
507 _net_rx_pkt_calc_l4_csum(struct NetRxPkt *pkt)
509 uint32_t cntr;
510 uint16_t csum;
511 uint16_t csl;
512 uint32_t cso;
514 trace_net_rx_pkt_l4_csum_calc_entry();
516 if (pkt->hasip4) {
517 if (pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_UDP) {
518 csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen);
519 trace_net_rx_pkt_l4_csum_calc_ip4_udp();
520 } else {
521 csl = be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_len) -
522 IP_HDR_GET_LEN(&pkt->ip4hdr_info.ip4_hdr);
523 trace_net_rx_pkt_l4_csum_calc_ip4_tcp();
526 cntr = eth_calc_ip4_pseudo_hdr_csum(&pkt->ip4hdr_info.ip4_hdr,
527 csl, &cso);
528 trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl);
529 } else {
530 if (pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_UDP) {
531 csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen);
532 trace_net_rx_pkt_l4_csum_calc_ip6_udp();
533 } else {
534 struct ip6_header *ip6hdr = &pkt->ip6hdr_info.ip6_hdr;
535 size_t full_ip6hdr_len = pkt->l4hdr_off - pkt->l3hdr_off;
536 size_t ip6opts_len = full_ip6hdr_len - sizeof(struct ip6_header);
538 csl = be16_to_cpu(ip6hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) -
539 ip6opts_len;
540 trace_net_rx_pkt_l4_csum_calc_ip6_tcp();
543 cntr = eth_calc_ip6_pseudo_hdr_csum(&pkt->ip6hdr_info.ip6_hdr, csl,
544 pkt->ip6hdr_info.l4proto, &cso);
545 trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl);
548 cntr += net_checksum_add_iov(pkt->vec, pkt->vec_len,
549 pkt->l4hdr_off, csl, cso);
551 csum = net_checksum_finish_nozero(cntr);
553 trace_net_rx_pkt_l4_csum_calc_csum(pkt->l4hdr_off, csl, cntr, csum);
555 return csum;
558 static bool
559 _net_rx_pkt_validate_sctp_sum(struct NetRxPkt *pkt)
561 size_t csum_off;
562 size_t off = pkt->l4hdr_off;
563 size_t vec_len = pkt->vec_len;
564 struct iovec *vec;
565 uint32_t calculated = 0;
566 uint32_t original;
567 bool valid;
569 for (vec = pkt->vec; vec->iov_len < off; vec++) {
570 off -= vec->iov_len;
571 vec_len--;
574 csum_off = off + 8;
576 if (!iov_to_buf(vec, vec_len, csum_off, &original, sizeof(original))) {
577 return false;
580 if (!iov_from_buf(vec, vec_len, csum_off,
581 &calculated, sizeof(calculated))) {
582 return false;
585 calculated = crc32c(0xffffffff,
586 (uint8_t *)vec->iov_base + off, vec->iov_len - off);
587 calculated = iov_crc32c(calculated ^ 0xffffffff, vec + 1, vec_len - 1);
588 valid = calculated == le32_to_cpu(original);
589 iov_from_buf(vec, vec_len, csum_off, &original, sizeof(original));
591 return valid;
594 bool net_rx_pkt_validate_l4_csum(struct NetRxPkt *pkt, bool *csum_valid)
596 uint32_t csum;
598 trace_net_rx_pkt_l4_csum_validate_entry();
600 if (pkt->hasip4 && pkt->ip4hdr_info.fragment) {
601 trace_net_rx_pkt_l4_csum_validate_ip4_fragment();
602 return false;
605 switch (pkt->l4hdr_info.proto) {
606 case ETH_L4_HDR_PROTO_UDP:
607 if (pkt->l4hdr_info.hdr.udp.uh_sum == 0) {
608 trace_net_rx_pkt_l4_csum_validate_udp_with_no_checksum();
609 return false;
611 /* fall through */
612 case ETH_L4_HDR_PROTO_TCP:
613 csum = _net_rx_pkt_calc_l4_csum(pkt);
614 *csum_valid = ((csum == 0) || (csum == 0xFFFF));
615 break;
617 case ETH_L4_HDR_PROTO_SCTP:
618 *csum_valid = _net_rx_pkt_validate_sctp_sum(pkt);
619 break;
621 default:
622 trace_net_rx_pkt_l4_csum_validate_not_xxp();
623 return false;
626 trace_net_rx_pkt_l4_csum_validate_csum(*csum_valid);
628 return true;
631 bool net_rx_pkt_fix_l4_csum(struct NetRxPkt *pkt)
633 uint16_t csum = 0;
634 uint32_t l4_cso;
636 trace_net_rx_pkt_l4_csum_fix_entry();
638 switch (pkt->l4hdr_info.proto) {
639 case ETH_L4_HDR_PROTO_TCP:
640 l4_cso = offsetof(struct tcp_header, th_sum);
641 trace_net_rx_pkt_l4_csum_fix_tcp(l4_cso);
642 break;
644 case ETH_L4_HDR_PROTO_UDP:
645 if (pkt->l4hdr_info.hdr.udp.uh_sum == 0) {
646 trace_net_rx_pkt_l4_csum_fix_udp_with_no_checksum();
647 return false;
649 l4_cso = offsetof(struct udp_header, uh_sum);
650 trace_net_rx_pkt_l4_csum_fix_udp(l4_cso);
651 break;
653 default:
654 trace_net_rx_pkt_l4_csum_fix_not_xxp();
655 return false;
658 if (pkt->hasip4 && pkt->ip4hdr_info.fragment) {
659 trace_net_rx_pkt_l4_csum_fix_ip4_fragment();
660 return false;
663 /* Set zero to checksum word */
664 iov_from_buf(pkt->vec, pkt->vec_len,
665 pkt->l4hdr_off + l4_cso,
666 &csum, sizeof(csum));
668 /* Calculate L4 checksum */
669 csum = cpu_to_be16(_net_rx_pkt_calc_l4_csum(pkt));
671 /* Set calculated checksum to checksum word */
672 iov_from_buf(pkt->vec, pkt->vec_len,
673 pkt->l4hdr_off + l4_cso,
674 &csum, sizeof(csum));
676 trace_net_rx_pkt_l4_csum_fix_csum(pkt->l4hdr_off + l4_cso, csum);
678 return true;