2 * QEMU network structures definitions and helper functions
4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
6 * Developed by Daynix Computing LTD (http://www.daynix.com)
9 * Dmitry Fleytman <dmitry@daynix.com>
10 * Tamir Shomer <tamirs@daynix.com>
11 * Yan Vugenfirer <yan@daynix.com>
13 * This work is licensed under the terms of the GNU GPL, version 2 or later.
14 * See the COPYING file in the top-level directory.
18 #include "qemu/osdep.h"
20 #include "net/checksum.h"
21 #include "qemu-common.h"
24 void eth_setup_vlan_headers_ex(struct eth_header
*ehdr
, uint16_t vlan_tag
,
25 uint16_t vlan_ethtype
, bool *is_new
)
27 struct vlan_header
*vhdr
= PKT_GET_VLAN_HDR(ehdr
);
29 switch (be16_to_cpu(ehdr
->h_proto
)) {
37 /* No VLAN header, put a new one */
38 vhdr
->h_proto
= ehdr
->h_proto
;
39 ehdr
->h_proto
= cpu_to_be16(vlan_ethtype
);
43 vhdr
->h_tci
= cpu_to_be16(vlan_tag
);
47 eth_get_gso_type(uint16_t l3_proto
, uint8_t *l3_hdr
, uint8_t l4proto
)
49 uint8_t ecn_state
= 0;
51 if (l3_proto
== ETH_P_IP
) {
52 struct ip_header
*iphdr
= (struct ip_header
*) l3_hdr
;
54 if (IP_HEADER_VERSION(iphdr
) == IP_HEADER_VERSION_4
) {
55 if (IPTOS_ECN(iphdr
->ip_tos
) == IPTOS_ECN_CE
) {
56 ecn_state
= VIRTIO_NET_HDR_GSO_ECN
;
58 if (l4proto
== IP_PROTO_TCP
) {
59 return VIRTIO_NET_HDR_GSO_TCPV4
| ecn_state
;
60 } else if (l4proto
== IP_PROTO_UDP
) {
61 return VIRTIO_NET_HDR_GSO_UDP
| ecn_state
;
64 } else if (l3_proto
== ETH_P_IPV6
) {
65 struct ip6_header
*ip6hdr
= (struct ip6_header
*) l3_hdr
;
67 if (IP6_ECN(ip6hdr
->ip6_ecn_acc
) == IP6_ECN_CE
) {
68 ecn_state
= VIRTIO_NET_HDR_GSO_ECN
;
71 if (l4proto
== IP_PROTO_TCP
) {
72 return VIRTIO_NET_HDR_GSO_TCPV6
| ecn_state
;
76 /* Unsupported offload */
77 g_assert_not_reached();
79 return VIRTIO_NET_HDR_GSO_NONE
| ecn_state
;
83 eth_get_l3_proto(const struct iovec
*l2hdr_iov
, int iovcnt
, size_t l2hdr_len
)
87 size_t size
= iov_size(l2hdr_iov
, iovcnt
);
88 size_t proto_offset
= l2hdr_len
- sizeof(proto
);
90 if (size
< proto_offset
) {
94 copied
= iov_to_buf(l2hdr_iov
, iovcnt
, proto_offset
,
95 &proto
, sizeof(proto
));
97 return (copied
== sizeof(proto
)) ? be16_to_cpu(proto
) : ETH_P_UNKNOWN
;
101 _eth_copy_chunk(size_t input_size
,
102 const struct iovec
*iov
, int iovcnt
,
103 size_t offset
, size_t length
,
108 if (input_size
< offset
) {
112 copied
= iov_to_buf(iov
, iovcnt
, offset
, buffer
, length
);
114 if (copied
< length
) {
122 _eth_tcp_has_data(bool is_ip4
,
123 const struct ip_header
*ip4_hdr
,
124 const struct ip6_header
*ip6_hdr
,
125 size_t full_ip6hdr_len
,
126 const struct tcp_header
*tcp
)
131 l4len
= be16_to_cpu(ip4_hdr
->ip_len
) - IP_HDR_GET_LEN(ip4_hdr
);
133 size_t opts_len
= full_ip6hdr_len
- sizeof(struct ip6_header
);
134 l4len
= be16_to_cpu(ip6_hdr
->ip6_ctlun
.ip6_un1
.ip6_un1_plen
) - opts_len
;
137 return l4len
> TCP_HEADER_DATA_OFFSET(tcp
);
140 void eth_get_protocols(const struct iovec
*iov
, int iovcnt
,
141 bool *isip4
, bool *isip6
,
142 bool *isudp
, bool *istcp
,
146 eth_ip6_hdr_info
*ip6hdr_info
,
147 eth_ip4_hdr_info
*ip4hdr_info
,
148 eth_l4_hdr_info
*l4hdr_info
)
151 bool fragment
= false;
152 size_t l2hdr_len
= eth_get_l2_hdr_length_iov(iov
, iovcnt
);
153 size_t input_size
= iov_size(iov
, iovcnt
);
156 *isip4
= *isip6
= *isudp
= *istcp
= false;
158 proto
= eth_get_l3_proto(iov
, iovcnt
, l2hdr_len
);
160 *l3hdr_off
= l2hdr_len
;
162 if (proto
== ETH_P_IP
) {
163 struct ip_header
*iphdr
= &ip4hdr_info
->ip4_hdr
;
165 if (input_size
< l2hdr_len
) {
169 copied
= iov_to_buf(iov
, iovcnt
, l2hdr_len
, iphdr
, sizeof(*iphdr
));
173 if (copied
< sizeof(*iphdr
)) {
177 if (IP_HEADER_VERSION(iphdr
) == IP_HEADER_VERSION_4
) {
178 if (iphdr
->ip_p
== IP_PROTO_TCP
) {
180 } else if (iphdr
->ip_p
== IP_PROTO_UDP
) {
185 ip4hdr_info
->fragment
= IP4_IS_FRAGMENT(iphdr
);
186 *l4hdr_off
= l2hdr_len
+ IP_HDR_GET_LEN(iphdr
);
188 fragment
= ip4hdr_info
->fragment
;
189 } else if (proto
== ETH_P_IPV6
) {
192 if (eth_parse_ipv6_hdr(iov
, iovcnt
, l2hdr_len
,
194 if (ip6hdr_info
->l4proto
== IP_PROTO_TCP
) {
196 } else if (ip6hdr_info
->l4proto
== IP_PROTO_UDP
) {
203 *l4hdr_off
= l2hdr_len
+ ip6hdr_info
->full_hdr_len
;
204 fragment
= ip6hdr_info
->fragment
;
209 *istcp
= _eth_copy_chunk(input_size
,
211 *l4hdr_off
, sizeof(l4hdr_info
->hdr
.tcp
),
212 &l4hdr_info
->hdr
.tcp
);
215 *l5hdr_off
= *l4hdr_off
+
216 TCP_HEADER_DATA_OFFSET(&l4hdr_info
->hdr
.tcp
);
218 l4hdr_info
->has_tcp_data
=
219 _eth_tcp_has_data(proto
== ETH_P_IP
,
220 &ip4hdr_info
->ip4_hdr
,
221 &ip6hdr_info
->ip6_hdr
,
222 *l4hdr_off
- *l3hdr_off
,
223 &l4hdr_info
->hdr
.tcp
);
226 *isudp
= _eth_copy_chunk(input_size
,
228 *l4hdr_off
, sizeof(l4hdr_info
->hdr
.udp
),
229 &l4hdr_info
->hdr
.udp
);
230 *l5hdr_off
= *l4hdr_off
+ sizeof(l4hdr_info
->hdr
.udp
);
236 eth_strip_vlan(const struct iovec
*iov
, int iovcnt
, size_t iovoff
,
237 uint8_t *new_ehdr_buf
,
238 uint16_t *payload_offset
, uint16_t *tci
)
240 struct vlan_header vlan_hdr
;
241 struct eth_header
*new_ehdr
= (struct eth_header
*) new_ehdr_buf
;
243 size_t copied
= iov_to_buf(iov
, iovcnt
, iovoff
,
244 new_ehdr
, sizeof(*new_ehdr
));
246 if (copied
< sizeof(*new_ehdr
)) {
250 switch (be16_to_cpu(new_ehdr
->h_proto
)) {
253 copied
= iov_to_buf(iov
, iovcnt
, iovoff
+ sizeof(*new_ehdr
),
254 &vlan_hdr
, sizeof(vlan_hdr
));
256 if (copied
< sizeof(vlan_hdr
)) {
260 new_ehdr
->h_proto
= vlan_hdr
.h_proto
;
262 *tci
= be16_to_cpu(vlan_hdr
.h_tci
);
263 *payload_offset
= iovoff
+ sizeof(*new_ehdr
) + sizeof(vlan_hdr
);
265 if (be16_to_cpu(new_ehdr
->h_proto
) == ETH_P_VLAN
) {
267 copied
= iov_to_buf(iov
, iovcnt
, *payload_offset
,
268 PKT_GET_VLAN_HDR(new_ehdr
), sizeof(vlan_hdr
));
270 if (copied
< sizeof(vlan_hdr
)) {
274 *payload_offset
+= sizeof(vlan_hdr
);
276 return sizeof(struct eth_header
) + sizeof(struct vlan_header
);
278 return sizeof(struct eth_header
);
286 eth_strip_vlan_ex(const struct iovec
*iov
, int iovcnt
, size_t iovoff
,
287 uint16_t vet
, uint8_t *new_ehdr_buf
,
288 uint16_t *payload_offset
, uint16_t *tci
)
290 struct vlan_header vlan_hdr
;
291 struct eth_header
*new_ehdr
= (struct eth_header
*) new_ehdr_buf
;
293 size_t copied
= iov_to_buf(iov
, iovcnt
, iovoff
,
294 new_ehdr
, sizeof(*new_ehdr
));
296 if (copied
< sizeof(*new_ehdr
)) {
300 if (be16_to_cpu(new_ehdr
->h_proto
) == vet
) {
301 copied
= iov_to_buf(iov
, iovcnt
, iovoff
+ sizeof(*new_ehdr
),
302 &vlan_hdr
, sizeof(vlan_hdr
));
304 if (copied
< sizeof(vlan_hdr
)) {
308 new_ehdr
->h_proto
= vlan_hdr
.h_proto
;
310 *tci
= be16_to_cpu(vlan_hdr
.h_tci
);
311 *payload_offset
= iovoff
+ sizeof(*new_ehdr
) + sizeof(vlan_hdr
);
312 return sizeof(struct eth_header
);
319 eth_setup_ip4_fragmentation(const void *l2hdr
, size_t l2hdr_len
,
320 void *l3hdr
, size_t l3hdr_len
,
321 size_t l3payload_len
,
322 size_t frag_offset
, bool more_frags
)
324 const struct iovec l2vec
= {
325 .iov_base
= (void *) l2hdr
,
329 if (eth_get_l3_proto(&l2vec
, 1, l2hdr_len
) == ETH_P_IP
) {
331 struct ip_header
*iphdr
= (struct ip_header
*) l3hdr
;
332 uint16_t frag_off_units
= frag_offset
/ IP_FRAG_UNIT_SIZE
;
335 assert(frag_offset
% IP_FRAG_UNIT_SIZE
== 0);
336 assert((frag_off_units
& ~IP_OFFMASK
) == 0);
338 orig_flags
= be16_to_cpu(iphdr
->ip_off
) & ~(IP_OFFMASK
|IP_MF
);
339 new_ip_off
= frag_off_units
| orig_flags
| (more_frags
? IP_MF
: 0);
340 iphdr
->ip_off
= cpu_to_be16(new_ip_off
);
341 iphdr
->ip_len
= cpu_to_be16(l3payload_len
+ l3hdr_len
);
346 eth_fix_ip4_checksum(void *l3hdr
, size_t l3hdr_len
)
348 struct ip_header
*iphdr
= (struct ip_header
*) l3hdr
;
350 iphdr
->ip_sum
= cpu_to_be16(net_raw_checksum(l3hdr
, l3hdr_len
));
354 eth_calc_ip4_pseudo_hdr_csum(struct ip_header
*iphdr
,
358 struct ip_pseudo_header ipph
;
359 ipph
.ip_src
= iphdr
->ip_src
;
360 ipph
.ip_dst
= iphdr
->ip_dst
;
361 ipph
.ip_payload
= cpu_to_be16(csl
);
362 ipph
.ip_proto
= iphdr
->ip_p
;
365 return net_checksum_add(*cso
, (uint8_t *) &ipph
);
369 eth_calc_ip6_pseudo_hdr_csum(struct ip6_header
*iphdr
,
374 struct ip6_pseudo_header ipph
;
375 ipph
.ip6_src
= iphdr
->ip6_src
;
376 ipph
.ip6_dst
= iphdr
->ip6_dst
;
377 ipph
.len
= cpu_to_be16(csl
);
381 ipph
.next_hdr
= l4_proto
;
383 return net_checksum_add(*cso
, (uint8_t *)&ipph
);
387 eth_is_ip6_extension_header_type(uint8_t hdr_type
)
394 case IP6_AUTHENTICATION
:
404 _eth_get_rss_ex_dst_addr(const struct iovec
*pkt
, int pkt_frags
,
406 struct ip6_ext_hdr
*ext_hdr
,
407 struct in6_address
*dst_addr
)
409 struct ip6_ext_hdr_routing
*rthdr
= (struct ip6_ext_hdr_routing
*) ext_hdr
;
411 if ((rthdr
->rtype
== 2) &&
412 (rthdr
->len
== sizeof(struct in6_address
) / 8) &&
413 (rthdr
->segleft
== 1)) {
415 size_t input_size
= iov_size(pkt
, pkt_frags
);
418 if (input_size
< rthdr_offset
+ sizeof(*ext_hdr
)) {
422 bytes_read
= iov_to_buf(pkt
, pkt_frags
,
423 rthdr_offset
+ sizeof(*ext_hdr
),
424 dst_addr
, sizeof(*dst_addr
));
426 return bytes_read
== sizeof(*dst_addr
);
433 _eth_get_rss_ex_src_addr(const struct iovec
*pkt
, int pkt_frags
,
434 size_t dsthdr_offset
,
435 struct ip6_ext_hdr
*ext_hdr
,
436 struct in6_address
*src_addr
)
438 size_t bytes_left
= (ext_hdr
->ip6r_len
+ 1) * 8 - sizeof(*ext_hdr
);
439 struct ip6_option_hdr opthdr
;
440 size_t opt_offset
= dsthdr_offset
+ sizeof(*ext_hdr
);
442 while (bytes_left
> sizeof(opthdr
)) {
443 size_t input_size
= iov_size(pkt
, pkt_frags
);
444 size_t bytes_read
, optlen
;
446 if (input_size
< opt_offset
) {
450 bytes_read
= iov_to_buf(pkt
, pkt_frags
, opt_offset
,
451 &opthdr
, sizeof(opthdr
));
453 if (bytes_read
!= sizeof(opthdr
)) {
457 optlen
= (opthdr
.type
== IP6_OPT_PAD1
) ? 1
458 : (opthdr
.len
+ sizeof(opthdr
));
460 if (optlen
> bytes_left
) {
464 if (opthdr
.type
== IP6_OPT_HOME
) {
465 size_t input_size
= iov_size(pkt
, pkt_frags
);
467 if (input_size
< opt_offset
+ sizeof(opthdr
)) {
471 bytes_read
= iov_to_buf(pkt
, pkt_frags
,
472 opt_offset
+ sizeof(opthdr
),
473 src_addr
, sizeof(*src_addr
));
475 return bytes_read
== sizeof(*src_addr
);
478 opt_offset
+= optlen
;
479 bytes_left
-= optlen
;
485 bool eth_parse_ipv6_hdr(const struct iovec
*pkt
, int pkt_frags
,
486 size_t ip6hdr_off
, eth_ip6_hdr_info
*info
)
488 struct ip6_ext_hdr ext_hdr
;
490 uint8_t curr_ext_hdr_type
;
491 size_t input_size
= iov_size(pkt
, pkt_frags
);
493 info
->rss_ex_dst_valid
= false;
494 info
->rss_ex_src_valid
= false;
495 info
->fragment
= false;
497 if (input_size
< ip6hdr_off
) {
501 bytes_read
= iov_to_buf(pkt
, pkt_frags
, ip6hdr_off
,
502 &info
->ip6_hdr
, sizeof(info
->ip6_hdr
));
503 if (bytes_read
< sizeof(info
->ip6_hdr
)) {
507 info
->full_hdr_len
= sizeof(struct ip6_header
);
509 curr_ext_hdr_type
= info
->ip6_hdr
.ip6_nxt
;
511 if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type
)) {
512 info
->l4proto
= info
->ip6_hdr
.ip6_nxt
;
513 info
->has_ext_hdrs
= false;
517 info
->has_ext_hdrs
= true;
520 if (input_size
< ip6hdr_off
+ info
->full_hdr_len
) {
524 bytes_read
= iov_to_buf(pkt
, pkt_frags
, ip6hdr_off
+ info
->full_hdr_len
,
525 &ext_hdr
, sizeof(ext_hdr
));
527 if (bytes_read
< sizeof(ext_hdr
)) {
531 if (curr_ext_hdr_type
== IP6_ROUTING
) {
532 info
->rss_ex_dst_valid
=
533 _eth_get_rss_ex_dst_addr(pkt
, pkt_frags
,
534 ip6hdr_off
+ info
->full_hdr_len
,
535 &ext_hdr
, &info
->rss_ex_dst
);
536 } else if (curr_ext_hdr_type
== IP6_DESTINATON
) {
537 info
->rss_ex_src_valid
=
538 _eth_get_rss_ex_src_addr(pkt
, pkt_frags
,
539 ip6hdr_off
+ info
->full_hdr_len
,
540 &ext_hdr
, &info
->rss_ex_src
);
541 } else if (curr_ext_hdr_type
== IP6_FRAGMENT
) {
542 info
->fragment
= true;
545 info
->full_hdr_len
+= (ext_hdr
.ip6r_len
+ 1) * IP6_EXT_GRANULARITY
;
546 curr_ext_hdr_type
= ext_hdr
.ip6r_nxt
;
547 } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type
));
549 info
->l4proto
= ext_hdr
.ip6r_nxt
;