hw/net/net_rx_pkt: Enforce alignment for eth_header
[qemu/armbru.git] / hw / net / net_rx_pkt.c
blob1de42b4f513c75dc00fae2f976e1ee205987193a
1 /*
2 * QEMU RX packets abstractions
4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
6 * Developed by Daynix Computing LTD (http://www.daynix.com)
8 * Authors:
9 * Dmitry Fleytman <dmitry@daynix.com>
10 * Tamir Shomer <tamirs@daynix.com>
11 * Yan Vugenfirer <yan@daynix.com>
13 * This work is licensed under the terms of the GNU GPL, version 2 or later.
14 * See the COPYING file in the top-level directory.
18 #include "qemu/osdep.h"
19 #include "trace.h"
20 #include "net_rx_pkt.h"
21 #include "net/checksum.h"
22 #include "net/tap.h"
24 struct NetRxPkt {
25 struct virtio_net_hdr virt_hdr;
26 struct {
27 struct eth_header eth;
28 struct vlan_header vlan;
29 } ehdr_buf;
30 struct iovec *vec;
31 uint16_t vec_len_total;
32 uint16_t vec_len;
33 uint32_t tot_len;
34 uint16_t tci;
35 size_t ehdr_buf_len;
36 eth_pkt_types_e packet_type;
38 /* Analysis results */
39 bool hasip4;
40 bool hasip6;
42 size_t l3hdr_off;
43 size_t l4hdr_off;
44 size_t l5hdr_off;
46 eth_ip6_hdr_info ip6hdr_info;
47 eth_ip4_hdr_info ip4hdr_info;
48 eth_l4_hdr_info l4hdr_info;
51 void net_rx_pkt_init(struct NetRxPkt **pkt)
53 struct NetRxPkt *p = g_malloc0(sizeof *p);
54 p->vec = NULL;
55 p->vec_len_total = 0;
56 *pkt = p;
59 void net_rx_pkt_uninit(struct NetRxPkt *pkt)
61 if (pkt->vec_len_total != 0) {
62 g_free(pkt->vec);
65 g_free(pkt);
68 struct virtio_net_hdr *net_rx_pkt_get_vhdr(struct NetRxPkt *pkt)
70 assert(pkt);
71 return &pkt->virt_hdr;
74 static inline void
75 net_rx_pkt_iovec_realloc(struct NetRxPkt *pkt,
76 int new_iov_len)
78 if (pkt->vec_len_total < new_iov_len) {
79 g_free(pkt->vec);
80 pkt->vec = g_malloc(sizeof(*pkt->vec) * new_iov_len);
81 pkt->vec_len_total = new_iov_len;
85 static void
86 net_rx_pkt_pull_data(struct NetRxPkt *pkt,
87 const struct iovec *iov, int iovcnt,
88 size_t ploff)
90 uint32_t pllen = iov_size(iov, iovcnt) - ploff;
92 if (pkt->ehdr_buf_len) {
93 net_rx_pkt_iovec_realloc(pkt, iovcnt + 1);
95 pkt->vec[0].iov_base = &pkt->ehdr_buf;
96 pkt->vec[0].iov_len = pkt->ehdr_buf_len;
98 pkt->tot_len = pllen + pkt->ehdr_buf_len;
99 pkt->vec_len = iov_copy(pkt->vec + 1, pkt->vec_len_total - 1,
100 iov, iovcnt, ploff, pllen) + 1;
101 } else {
102 net_rx_pkt_iovec_realloc(pkt, iovcnt);
104 pkt->tot_len = pllen;
105 pkt->vec_len = iov_copy(pkt->vec, pkt->vec_len_total,
106 iov, iovcnt, ploff, pkt->tot_len);
109 eth_get_protocols(pkt->vec, pkt->vec_len, 0, &pkt->hasip4, &pkt->hasip6,
110 &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off,
111 &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info);
113 trace_net_rx_pkt_parsed(pkt->hasip4, pkt->hasip6, pkt->l4hdr_info.proto,
114 pkt->l3hdr_off, pkt->l4hdr_off, pkt->l5hdr_off);
117 void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt,
118 const struct iovec *iov, int iovcnt,
119 size_t iovoff, bool strip_vlan)
121 uint16_t tci = 0;
122 uint16_t ploff = iovoff;
123 assert(pkt);
125 if (strip_vlan) {
126 pkt->ehdr_buf_len = eth_strip_vlan(iov, iovcnt, iovoff, &pkt->ehdr_buf,
127 &ploff, &tci);
128 } else {
129 pkt->ehdr_buf_len = 0;
132 pkt->tci = tci;
134 net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff);
137 void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt,
138 const struct iovec *iov, int iovcnt,
139 size_t iovoff, bool strip_vlan,
140 uint16_t vet)
142 uint16_t tci = 0;
143 uint16_t ploff = iovoff;
144 assert(pkt);
146 if (strip_vlan) {
147 pkt->ehdr_buf_len = eth_strip_vlan_ex(iov, iovcnt, iovoff, vet,
148 &pkt->ehdr_buf,
149 &ploff, &tci);
150 } else {
151 pkt->ehdr_buf_len = 0;
154 pkt->tci = tci;
156 net_rx_pkt_pull_data(pkt, iov, iovcnt, ploff);
159 void net_rx_pkt_dump(struct NetRxPkt *pkt)
161 #ifdef NET_RX_PKT_DEBUG
162 assert(pkt);
164 printf("RX PKT: tot_len: %d, ehdr_buf_len: %lu, vlan_tag: %d\n",
165 pkt->tot_len, pkt->ehdr_buf_len, pkt->tci);
166 #endif
169 void net_rx_pkt_set_packet_type(struct NetRxPkt *pkt,
170 eth_pkt_types_e packet_type)
172 assert(pkt);
174 pkt->packet_type = packet_type;
178 eth_pkt_types_e net_rx_pkt_get_packet_type(struct NetRxPkt *pkt)
180 assert(pkt);
182 return pkt->packet_type;
185 size_t net_rx_pkt_get_total_len(struct NetRxPkt *pkt)
187 assert(pkt);
189 return pkt->tot_len;
192 void net_rx_pkt_set_protocols(struct NetRxPkt *pkt,
193 const struct iovec *iov, size_t iovcnt,
194 size_t iovoff)
196 assert(pkt);
198 eth_get_protocols(iov, iovcnt, iovoff, &pkt->hasip4, &pkt->hasip6,
199 &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off,
200 &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info);
203 void net_rx_pkt_get_protocols(struct NetRxPkt *pkt,
204 bool *hasip4, bool *hasip6,
205 EthL4HdrProto *l4hdr_proto)
207 assert(pkt);
209 *hasip4 = pkt->hasip4;
210 *hasip6 = pkt->hasip6;
211 *l4hdr_proto = pkt->l4hdr_info.proto;
214 size_t net_rx_pkt_get_l3_hdr_offset(struct NetRxPkt *pkt)
216 assert(pkt);
217 return pkt->l3hdr_off;
220 size_t net_rx_pkt_get_l4_hdr_offset(struct NetRxPkt *pkt)
222 assert(pkt);
223 return pkt->l4hdr_off;
226 size_t net_rx_pkt_get_l5_hdr_offset(struct NetRxPkt *pkt)
228 assert(pkt);
229 return pkt->l5hdr_off;
232 eth_ip6_hdr_info *net_rx_pkt_get_ip6_info(struct NetRxPkt *pkt)
234 return &pkt->ip6hdr_info;
237 eth_ip4_hdr_info *net_rx_pkt_get_ip4_info(struct NetRxPkt *pkt)
239 return &pkt->ip4hdr_info;
242 static inline void
243 _net_rx_rss_add_chunk(uint8_t *rss_input, size_t *bytes_written,
244 void *ptr, size_t size)
246 memcpy(&rss_input[*bytes_written], ptr, size);
247 trace_net_rx_pkt_rss_add_chunk(ptr, size, *bytes_written);
248 *bytes_written += size;
251 static inline void
252 _net_rx_rss_prepare_ip4(uint8_t *rss_input,
253 struct NetRxPkt *pkt,
254 size_t *bytes_written)
256 struct ip_header *ip4_hdr = &pkt->ip4hdr_info.ip4_hdr;
258 _net_rx_rss_add_chunk(rss_input, bytes_written,
259 &ip4_hdr->ip_src, sizeof(uint32_t));
261 _net_rx_rss_add_chunk(rss_input, bytes_written,
262 &ip4_hdr->ip_dst, sizeof(uint32_t));
265 static inline void
266 _net_rx_rss_prepare_ip6(uint8_t *rss_input,
267 struct NetRxPkt *pkt,
268 bool ipv6ex, size_t *bytes_written)
270 eth_ip6_hdr_info *ip6info = &pkt->ip6hdr_info;
272 _net_rx_rss_add_chunk(rss_input, bytes_written,
273 (ipv6ex && ip6info->rss_ex_src_valid) ? &ip6info->rss_ex_src
274 : &ip6info->ip6_hdr.ip6_src,
275 sizeof(struct in6_address));
277 _net_rx_rss_add_chunk(rss_input, bytes_written,
278 (ipv6ex && ip6info->rss_ex_dst_valid) ? &ip6info->rss_ex_dst
279 : &ip6info->ip6_hdr.ip6_dst,
280 sizeof(struct in6_address));
283 static inline void
284 _net_rx_rss_prepare_tcp(uint8_t *rss_input,
285 struct NetRxPkt *pkt,
286 size_t *bytes_written)
288 struct tcp_header *tcphdr = &pkt->l4hdr_info.hdr.tcp;
290 _net_rx_rss_add_chunk(rss_input, bytes_written,
291 &tcphdr->th_sport, sizeof(uint16_t));
293 _net_rx_rss_add_chunk(rss_input, bytes_written,
294 &tcphdr->th_dport, sizeof(uint16_t));
297 static inline void
298 _net_rx_rss_prepare_udp(uint8_t *rss_input,
299 struct NetRxPkt *pkt,
300 size_t *bytes_written)
302 struct udp_header *udphdr = &pkt->l4hdr_info.hdr.udp;
304 _net_rx_rss_add_chunk(rss_input, bytes_written,
305 &udphdr->uh_sport, sizeof(uint16_t));
307 _net_rx_rss_add_chunk(rss_input, bytes_written,
308 &udphdr->uh_dport, sizeof(uint16_t));
311 uint32_t
312 net_rx_pkt_calc_rss_hash(struct NetRxPkt *pkt,
313 NetRxPktRssType type,
314 uint8_t *key)
316 uint8_t rss_input[36];
317 size_t rss_length = 0;
318 uint32_t rss_hash = 0;
319 net_toeplitz_key key_data;
321 switch (type) {
322 case NetPktRssIpV4:
323 assert(pkt->hasip4);
324 trace_net_rx_pkt_rss_ip4();
325 _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length);
326 break;
327 case NetPktRssIpV4Tcp:
328 assert(pkt->hasip4);
329 assert(pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_TCP);
330 trace_net_rx_pkt_rss_ip4_tcp();
331 _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length);
332 _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length);
333 break;
334 case NetPktRssIpV6Tcp:
335 assert(pkt->hasip6);
336 assert(pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_TCP);
337 trace_net_rx_pkt_rss_ip6_tcp();
338 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length);
339 _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length);
340 break;
341 case NetPktRssIpV6:
342 assert(pkt->hasip6);
343 trace_net_rx_pkt_rss_ip6();
344 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length);
345 break;
346 case NetPktRssIpV6Ex:
347 assert(pkt->hasip6);
348 trace_net_rx_pkt_rss_ip6_ex();
349 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
350 break;
351 case NetPktRssIpV6TcpEx:
352 assert(pkt->hasip6);
353 assert(pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_TCP);
354 trace_net_rx_pkt_rss_ip6_ex_tcp();
355 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
356 _net_rx_rss_prepare_tcp(&rss_input[0], pkt, &rss_length);
357 break;
358 case NetPktRssIpV4Udp:
359 assert(pkt->hasip4);
360 assert(pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_UDP);
361 trace_net_rx_pkt_rss_ip4_udp();
362 _net_rx_rss_prepare_ip4(&rss_input[0], pkt, &rss_length);
363 _net_rx_rss_prepare_udp(&rss_input[0], pkt, &rss_length);
364 break;
365 case NetPktRssIpV6Udp:
366 assert(pkt->hasip6);
367 assert(pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_UDP);
368 trace_net_rx_pkt_rss_ip6_udp();
369 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, false, &rss_length);
370 _net_rx_rss_prepare_udp(&rss_input[0], pkt, &rss_length);
371 break;
372 case NetPktRssIpV6UdpEx:
373 assert(pkt->hasip6);
374 assert(pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_UDP);
375 trace_net_rx_pkt_rss_ip6_ex_udp();
376 _net_rx_rss_prepare_ip6(&rss_input[0], pkt, true, &rss_length);
377 _net_rx_rss_prepare_udp(&rss_input[0], pkt, &rss_length);
378 break;
379 default:
380 assert(false);
381 break;
384 net_toeplitz_key_init(&key_data, key);
385 net_toeplitz_add(&rss_hash, rss_input, rss_length, &key_data);
387 trace_net_rx_pkt_rss_hash(rss_length, rss_hash);
389 return rss_hash;
392 uint16_t net_rx_pkt_get_ip_id(struct NetRxPkt *pkt)
394 assert(pkt);
396 if (pkt->hasip4) {
397 return be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_id);
400 return 0;
403 bool net_rx_pkt_is_tcp_ack(struct NetRxPkt *pkt)
405 assert(pkt);
407 if (pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_TCP) {
408 return TCP_HEADER_FLAGS(&pkt->l4hdr_info.hdr.tcp) & TCP_FLAG_ACK;
411 return false;
414 bool net_rx_pkt_has_tcp_data(struct NetRxPkt *pkt)
416 assert(pkt);
418 if (pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_TCP) {
419 return pkt->l4hdr_info.has_tcp_data;
422 return false;
425 struct iovec *net_rx_pkt_get_iovec(struct NetRxPkt *pkt)
427 assert(pkt);
429 return pkt->vec;
432 uint16_t net_rx_pkt_get_iovec_len(struct NetRxPkt *pkt)
434 assert(pkt);
436 return pkt->vec_len;
439 void net_rx_pkt_set_vhdr(struct NetRxPkt *pkt,
440 struct virtio_net_hdr *vhdr)
442 assert(pkt);
444 memcpy(&pkt->virt_hdr, vhdr, sizeof pkt->virt_hdr);
447 void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt *pkt,
448 const struct iovec *iov, int iovcnt)
450 assert(pkt);
452 iov_to_buf(iov, iovcnt, 0, &pkt->virt_hdr, sizeof pkt->virt_hdr);
455 void net_rx_pkt_unset_vhdr(struct NetRxPkt *pkt)
457 assert(pkt);
459 memset(&pkt->virt_hdr, 0, sizeof(pkt->virt_hdr));
462 bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt)
464 assert(pkt);
466 return pkt->ehdr_buf_len ? true : false;
469 uint16_t net_rx_pkt_get_vlan_tag(struct NetRxPkt *pkt)
471 assert(pkt);
473 return pkt->tci;
476 bool net_rx_pkt_validate_l3_csum(struct NetRxPkt *pkt, bool *csum_valid)
478 uint32_t cntr;
479 uint16_t csum;
480 uint32_t csl;
482 trace_net_rx_pkt_l3_csum_validate_entry();
484 if (!pkt->hasip4) {
485 trace_net_rx_pkt_l3_csum_validate_not_ip4();
486 return false;
489 csl = pkt->l4hdr_off - pkt->l3hdr_off;
491 cntr = net_checksum_add_iov(pkt->vec, pkt->vec_len,
492 pkt->l3hdr_off,
493 csl, 0);
495 csum = net_checksum_finish(cntr);
497 *csum_valid = (csum == 0);
499 trace_net_rx_pkt_l3_csum_validate_csum(pkt->l3hdr_off, csl,
500 cntr, csum, *csum_valid);
502 return true;
505 static uint16_t
506 _net_rx_pkt_calc_l4_csum(struct NetRxPkt *pkt)
508 uint32_t cntr;
509 uint16_t csum;
510 uint16_t csl;
511 uint32_t cso;
513 trace_net_rx_pkt_l4_csum_calc_entry();
515 if (pkt->hasip4) {
516 if (pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_UDP) {
517 csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen);
518 trace_net_rx_pkt_l4_csum_calc_ip4_udp();
519 } else {
520 csl = be16_to_cpu(pkt->ip4hdr_info.ip4_hdr.ip_len) -
521 IP_HDR_GET_LEN(&pkt->ip4hdr_info.ip4_hdr);
522 trace_net_rx_pkt_l4_csum_calc_ip4_tcp();
525 cntr = eth_calc_ip4_pseudo_hdr_csum(&pkt->ip4hdr_info.ip4_hdr,
526 csl, &cso);
527 trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl);
528 } else {
529 if (pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_UDP) {
530 csl = be16_to_cpu(pkt->l4hdr_info.hdr.udp.uh_ulen);
531 trace_net_rx_pkt_l4_csum_calc_ip6_udp();
532 } else {
533 struct ip6_header *ip6hdr = &pkt->ip6hdr_info.ip6_hdr;
534 size_t full_ip6hdr_len = pkt->l4hdr_off - pkt->l3hdr_off;
535 size_t ip6opts_len = full_ip6hdr_len - sizeof(struct ip6_header);
537 csl = be16_to_cpu(ip6hdr->ip6_ctlun.ip6_un1.ip6_un1_plen) -
538 ip6opts_len;
539 trace_net_rx_pkt_l4_csum_calc_ip6_tcp();
542 cntr = eth_calc_ip6_pseudo_hdr_csum(&pkt->ip6hdr_info.ip6_hdr, csl,
543 pkt->ip6hdr_info.l4proto, &cso);
544 trace_net_rx_pkt_l4_csum_calc_ph_csum(cntr, csl);
547 cntr += net_checksum_add_iov(pkt->vec, pkt->vec_len,
548 pkt->l4hdr_off, csl, cso);
550 csum = net_checksum_finish_nozero(cntr);
552 trace_net_rx_pkt_l4_csum_calc_csum(pkt->l4hdr_off, csl, cntr, csum);
554 return csum;
557 bool net_rx_pkt_validate_l4_csum(struct NetRxPkt *pkt, bool *csum_valid)
559 uint16_t csum;
561 trace_net_rx_pkt_l4_csum_validate_entry();
563 if (pkt->l4hdr_info.proto != ETH_L4_HDR_PROTO_TCP &&
564 pkt->l4hdr_info.proto != ETH_L4_HDR_PROTO_UDP) {
565 trace_net_rx_pkt_l4_csum_validate_not_xxp();
566 return false;
569 if (pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_UDP &&
570 pkt->l4hdr_info.hdr.udp.uh_sum == 0) {
571 trace_net_rx_pkt_l4_csum_validate_udp_with_no_checksum();
572 return false;
575 if (pkt->hasip4 && pkt->ip4hdr_info.fragment) {
576 trace_net_rx_pkt_l4_csum_validate_ip4_fragment();
577 return false;
580 csum = _net_rx_pkt_calc_l4_csum(pkt);
582 *csum_valid = ((csum == 0) || (csum == 0xFFFF));
584 trace_net_rx_pkt_l4_csum_validate_csum(*csum_valid);
586 return true;
589 bool net_rx_pkt_fix_l4_csum(struct NetRxPkt *pkt)
591 uint16_t csum = 0;
592 uint32_t l4_cso;
594 trace_net_rx_pkt_l4_csum_fix_entry();
596 switch (pkt->l4hdr_info.proto) {
597 case ETH_L4_HDR_PROTO_TCP:
598 l4_cso = offsetof(struct tcp_header, th_sum);
599 trace_net_rx_pkt_l4_csum_fix_tcp(l4_cso);
600 break;
602 case ETH_L4_HDR_PROTO_UDP:
603 if (pkt->l4hdr_info.hdr.udp.uh_sum == 0) {
604 trace_net_rx_pkt_l4_csum_fix_udp_with_no_checksum();
605 return false;
607 l4_cso = offsetof(struct udp_header, uh_sum);
608 trace_net_rx_pkt_l4_csum_fix_udp(l4_cso);
609 break;
611 default:
612 trace_net_rx_pkt_l4_csum_fix_not_xxp();
613 return false;
616 if (pkt->hasip4 && pkt->ip4hdr_info.fragment) {
617 trace_net_rx_pkt_l4_csum_fix_ip4_fragment();
618 return false;
621 /* Set zero to checksum word */
622 iov_from_buf(pkt->vec, pkt->vec_len,
623 pkt->l4hdr_off + l4_cso,
624 &csum, sizeof(csum));
626 /* Calculate L4 checksum */
627 csum = cpu_to_be16(_net_rx_pkt_calc_l4_csum(pkt));
629 /* Set calculated checksum to checksum word */
630 iov_from_buf(pkt->vec, pkt->vec_len,
631 pkt->l4hdr_off + l4_cso,
632 &csum, sizeof(csum));
634 trace_net_rx_pkt_l4_csum_fix_csum(pkt->l4hdr_off + l4_cso, csum);
636 return true;