2 * QEMU network structures definitions and helper functions
4 * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
6 * Developed by Daynix Computing LTD (http://www.daynix.com)
9 * Dmitry Fleytman <dmitry@daynix.com>
10 * Tamir Shomer <tamirs@daynix.com>
11 * Yan Vugenfirer <yan@daynix.com>
13 * This work is licensed under the terms of the GNU GPL, version 2 or later.
14 * See the COPYING file in the top-level directory.
18 #include "qemu/osdep.h"
21 #include "net/checksum.h"
24 void eth_setup_vlan_headers(struct eth_header
*ehdr
, size_t *ehdr_size
,
25 uint16_t vlan_tag
, uint16_t vlan_ethtype
)
27 struct vlan_header
*vhdr
= PKT_GET_VLAN_HDR(ehdr
);
29 memmove(vhdr
+ 1, vhdr
, *ehdr_size
- ETH_HLEN
);
30 vhdr
->h_tci
= cpu_to_be16(vlan_tag
);
31 vhdr
->h_proto
= ehdr
->h_proto
;
32 ehdr
->h_proto
= cpu_to_be16(vlan_ethtype
);
33 *ehdr_size
+= sizeof(*vhdr
);
37 eth_get_gso_type(uint16_t l3_proto
, uint8_t *l3_hdr
, uint8_t l4proto
)
39 uint8_t ecn_state
= 0;
41 if (l3_proto
== ETH_P_IP
) {
42 struct ip_header
*iphdr
= (struct ip_header
*) l3_hdr
;
44 if (IP_HEADER_VERSION(iphdr
) == IP_HEADER_VERSION_4
) {
45 if (IPTOS_ECN(iphdr
->ip_tos
) == IPTOS_ECN_CE
) {
46 ecn_state
= VIRTIO_NET_HDR_GSO_ECN
;
48 if (l4proto
== IP_PROTO_TCP
) {
49 return VIRTIO_NET_HDR_GSO_TCPV4
| ecn_state
;
50 } else if (l4proto
== IP_PROTO_UDP
) {
51 return VIRTIO_NET_HDR_GSO_UDP
| ecn_state
;
54 } else if (l3_proto
== ETH_P_IPV6
) {
55 struct ip6_header
*ip6hdr
= (struct ip6_header
*) l3_hdr
;
57 if (IP6_ECN(ip6hdr
->ip6_ecn_acc
) == IP6_ECN_CE
) {
58 ecn_state
= VIRTIO_NET_HDR_GSO_ECN
;
61 if (l4proto
== IP_PROTO_TCP
) {
62 return VIRTIO_NET_HDR_GSO_TCPV6
| ecn_state
;
65 qemu_log_mask(LOG_UNIMP
, "%s: probably not GSO frame, "
66 "unknown L3 protocol: 0x%04"PRIx16
"\n", __func__
, l3_proto
);
68 return VIRTIO_NET_HDR_GSO_NONE
| ecn_state
;
72 eth_get_l3_proto(const struct iovec
*l2hdr_iov
, int iovcnt
, size_t l2hdr_len
)
76 size_t size
= iov_size(l2hdr_iov
, iovcnt
);
77 size_t proto_offset
= l2hdr_len
- sizeof(proto
);
79 if (size
< proto_offset
) {
83 copied
= iov_to_buf(l2hdr_iov
, iovcnt
, proto_offset
,
84 &proto
, sizeof(proto
));
86 return (copied
== sizeof(proto
)) ? be16_to_cpu(proto
) : ETH_P_UNKNOWN
;
90 _eth_copy_chunk(size_t input_size
,
91 const struct iovec
*iov
, int iovcnt
,
92 size_t offset
, size_t length
,
97 if (input_size
< offset
) {
101 copied
= iov_to_buf(iov
, iovcnt
, offset
, buffer
, length
);
103 if (copied
< length
) {
111 _eth_tcp_has_data(bool is_ip4
,
112 const struct ip_header
*ip4_hdr
,
113 const struct ip6_header
*ip6_hdr
,
114 size_t full_ip6hdr_len
,
115 const struct tcp_header
*tcp
)
120 l4len
= be16_to_cpu(ip4_hdr
->ip_len
) - IP_HDR_GET_LEN(ip4_hdr
);
122 size_t opts_len
= full_ip6hdr_len
- sizeof(struct ip6_header
);
123 l4len
= be16_to_cpu(ip6_hdr
->ip6_ctlun
.ip6_un1
.ip6_un1_plen
) - opts_len
;
126 return l4len
> TCP_HEADER_DATA_OFFSET(tcp
);
129 void eth_get_protocols(const struct iovec
*iov
, size_t iovcnt
, size_t iovoff
,
130 bool *hasip4
, bool *hasip6
,
134 eth_ip6_hdr_info
*ip6hdr_info
,
135 eth_ip4_hdr_info
*ip4hdr_info
,
136 eth_l4_hdr_info
*l4hdr_info
)
139 bool fragment
= false;
140 size_t input_size
= iov_size(iov
, iovcnt
);
144 *hasip4
= *hasip6
= false;
145 *l3hdr_off
= iovoff
+ eth_get_l2_hdr_length_iov(iov
, iovcnt
, iovoff
);
146 l4hdr_info
->proto
= ETH_L4_HDR_PROTO_INVALID
;
148 proto
= eth_get_l3_proto(iov
, iovcnt
, *l3hdr_off
);
150 if (proto
== ETH_P_IP
) {
151 struct ip_header
*iphdr
= &ip4hdr_info
->ip4_hdr
;
153 if (input_size
< *l3hdr_off
) {
157 copied
= iov_to_buf(iov
, iovcnt
, *l3hdr_off
, iphdr
, sizeof(*iphdr
));
158 if (copied
< sizeof(*iphdr
) ||
159 IP_HEADER_VERSION(iphdr
) != IP_HEADER_VERSION_4
) {
165 ip4hdr_info
->fragment
= IP4_IS_FRAGMENT(iphdr
);
166 *l4hdr_off
= *l3hdr_off
+ IP_HDR_GET_LEN(iphdr
);
168 fragment
= ip4hdr_info
->fragment
;
169 } else if (proto
== ETH_P_IPV6
) {
170 if (!eth_parse_ipv6_hdr(iov
, iovcnt
, *l3hdr_off
, ip6hdr_info
)) {
175 ip_p
= ip6hdr_info
->l4proto
;
176 *l4hdr_off
= *l3hdr_off
+ ip6hdr_info
->full_hdr_len
;
177 fragment
= ip6hdr_info
->fragment
;
188 if (_eth_copy_chunk(input_size
,
190 *l4hdr_off
, sizeof(l4hdr_info
->hdr
.tcp
),
191 &l4hdr_info
->hdr
.tcp
)) {
192 l4hdr_info
->proto
= ETH_L4_HDR_PROTO_TCP
;
193 *l5hdr_off
= *l4hdr_off
+
194 TCP_HEADER_DATA_OFFSET(&l4hdr_info
->hdr
.tcp
);
196 l4hdr_info
->has_tcp_data
=
197 _eth_tcp_has_data(proto
== ETH_P_IP
,
198 &ip4hdr_info
->ip4_hdr
,
199 &ip6hdr_info
->ip6_hdr
,
200 *l4hdr_off
- *l3hdr_off
,
201 &l4hdr_info
->hdr
.tcp
);
206 if (_eth_copy_chunk(input_size
,
208 *l4hdr_off
, sizeof(l4hdr_info
->hdr
.udp
),
209 &l4hdr_info
->hdr
.udp
)) {
210 l4hdr_info
->proto
= ETH_L4_HDR_PROTO_UDP
;
211 *l5hdr_off
= *l4hdr_off
+ sizeof(l4hdr_info
->hdr
.udp
);
216 l4hdr_info
->proto
= ETH_L4_HDR_PROTO_SCTP
;
222 eth_strip_vlan(const struct iovec
*iov
, int iovcnt
, size_t iovoff
,
224 uint16_t *payload_offset
, uint16_t *tci
)
226 struct vlan_header vlan_hdr
;
227 struct eth_header
*new_ehdr
= new_ehdr_buf
;
229 size_t copied
= iov_to_buf(iov
, iovcnt
, iovoff
,
230 new_ehdr
, sizeof(*new_ehdr
));
232 if (copied
< sizeof(*new_ehdr
)) {
236 switch (be16_to_cpu(new_ehdr
->h_proto
)) {
239 copied
= iov_to_buf(iov
, iovcnt
, iovoff
+ sizeof(*new_ehdr
),
240 &vlan_hdr
, sizeof(vlan_hdr
));
242 if (copied
< sizeof(vlan_hdr
)) {
246 new_ehdr
->h_proto
= vlan_hdr
.h_proto
;
248 *tci
= be16_to_cpu(vlan_hdr
.h_tci
);
249 *payload_offset
= iovoff
+ sizeof(*new_ehdr
) + sizeof(vlan_hdr
);
251 if (be16_to_cpu(new_ehdr
->h_proto
) == ETH_P_VLAN
) {
253 copied
= iov_to_buf(iov
, iovcnt
, *payload_offset
,
254 PKT_GET_VLAN_HDR(new_ehdr
), sizeof(vlan_hdr
));
256 if (copied
< sizeof(vlan_hdr
)) {
260 *payload_offset
+= sizeof(vlan_hdr
);
262 return sizeof(struct eth_header
) + sizeof(struct vlan_header
);
264 return sizeof(struct eth_header
);
272 eth_strip_vlan_ex(const struct iovec
*iov
, int iovcnt
, size_t iovoff
, int index
,
273 uint16_t vet
, uint16_t vet_ext
, void *new_ehdr_buf
,
274 uint16_t *payload_offset
, uint16_t *tci
)
276 struct vlan_header vlan_hdr
;
277 uint16_t *new_ehdr_proto
;
278 size_t new_ehdr_size
;
283 new_ehdr_proto
= &PKT_GET_ETH_HDR(new_ehdr_buf
)->h_proto
;
284 new_ehdr_size
= sizeof(struct eth_header
);
285 copied
= iov_to_buf(iov
, iovcnt
, iovoff
, new_ehdr_buf
, new_ehdr_size
);
289 new_ehdr_proto
= &PKT_GET_VLAN_HDR(new_ehdr_buf
)->h_proto
;
290 new_ehdr_size
= sizeof(struct eth_header
) + sizeof(struct vlan_header
);
291 copied
= iov_to_buf(iov
, iovcnt
, iovoff
, new_ehdr_buf
, new_ehdr_size
);
292 if (be16_to_cpu(PKT_GET_ETH_HDR(new_ehdr_buf
)->h_proto
) != vet_ext
) {
301 if (copied
< new_ehdr_size
|| be16_to_cpu(*new_ehdr_proto
) != vet
) {
305 copied
= iov_to_buf(iov
, iovcnt
, iovoff
+ new_ehdr_size
,
306 &vlan_hdr
, sizeof(vlan_hdr
));
307 if (copied
< sizeof(vlan_hdr
)) {
311 *new_ehdr_proto
= vlan_hdr
.h_proto
;
312 *payload_offset
= iovoff
+ new_ehdr_size
+ sizeof(vlan_hdr
);
313 *tci
= be16_to_cpu(vlan_hdr
.h_tci
);
315 return new_ehdr_size
;
319 eth_fix_ip4_checksum(void *l3hdr
, size_t l3hdr_len
)
321 struct ip_header
*iphdr
= (struct ip_header
*) l3hdr
;
323 iphdr
->ip_sum
= cpu_to_be16(net_raw_checksum(l3hdr
, l3hdr_len
));
327 eth_calc_ip4_pseudo_hdr_csum(struct ip_header
*iphdr
,
331 struct ip_pseudo_header ipph
;
332 ipph
.ip_src
= iphdr
->ip_src
;
333 ipph
.ip_dst
= iphdr
->ip_dst
;
334 ipph
.ip_payload
= cpu_to_be16(csl
);
335 ipph
.ip_proto
= iphdr
->ip_p
;
338 return net_checksum_add(*cso
, (uint8_t *) &ipph
);
342 eth_calc_ip6_pseudo_hdr_csum(struct ip6_header
*iphdr
,
347 struct ip6_pseudo_header ipph
;
348 ipph
.ip6_src
= iphdr
->ip6_src
;
349 ipph
.ip6_dst
= iphdr
->ip6_dst
;
350 ipph
.len
= cpu_to_be16(csl
);
354 ipph
.next_hdr
= l4_proto
;
356 return net_checksum_add(*cso
, (uint8_t *)&ipph
);
360 eth_is_ip6_extension_header_type(uint8_t hdr_type
)
366 case IP6_AUTHENTICATION
:
376 _eth_get_rss_ex_dst_addr(const struct iovec
*pkt
, int pkt_frags
,
377 size_t ext_hdr_offset
,
378 struct ip6_ext_hdr
*ext_hdr
,
379 struct in6_address
*dst_addr
)
381 struct ip6_ext_hdr_routing rt_hdr
;
382 size_t input_size
= iov_size(pkt
, pkt_frags
);
385 if (input_size
< ext_hdr_offset
+ sizeof(rt_hdr
) + sizeof(*dst_addr
)) {
389 bytes_read
= iov_to_buf(pkt
, pkt_frags
, ext_hdr_offset
,
390 &rt_hdr
, sizeof(rt_hdr
));
391 assert(bytes_read
== sizeof(rt_hdr
));
392 if ((rt_hdr
.rtype
!= 2) || (rt_hdr
.segleft
!= 1)) {
395 bytes_read
= iov_to_buf(pkt
, pkt_frags
, ext_hdr_offset
+ sizeof(rt_hdr
),
396 dst_addr
, sizeof(*dst_addr
));
397 assert(bytes_read
== sizeof(*dst_addr
));
403 _eth_get_rss_ex_src_addr(const struct iovec
*pkt
, int pkt_frags
,
404 size_t dsthdr_offset
,
405 struct ip6_ext_hdr
*ext_hdr
,
406 struct in6_address
*src_addr
)
408 size_t bytes_left
= (ext_hdr
->ip6r_len
+ 1) * 8 - sizeof(*ext_hdr
);
409 struct ip6_option_hdr opthdr
;
410 size_t opt_offset
= dsthdr_offset
+ sizeof(*ext_hdr
);
412 while (bytes_left
> sizeof(opthdr
)) {
413 size_t input_size
= iov_size(pkt
, pkt_frags
);
414 size_t bytes_read
, optlen
;
416 if (input_size
< opt_offset
) {
420 bytes_read
= iov_to_buf(pkt
, pkt_frags
, opt_offset
,
421 &opthdr
, sizeof(opthdr
));
423 if (bytes_read
!= sizeof(opthdr
)) {
427 optlen
= (opthdr
.type
== IP6_OPT_PAD1
) ? 1
428 : (opthdr
.len
+ sizeof(opthdr
));
430 if (optlen
> bytes_left
) {
434 if (opthdr
.type
== IP6_OPT_HOME
) {
435 if (input_size
< opt_offset
+ sizeof(opthdr
)) {
439 bytes_read
= iov_to_buf(pkt
, pkt_frags
,
440 opt_offset
+ sizeof(opthdr
),
441 src_addr
, sizeof(*src_addr
));
443 return bytes_read
== sizeof(*src_addr
);
446 opt_offset
+= optlen
;
447 bytes_left
-= optlen
;
453 bool eth_parse_ipv6_hdr(const struct iovec
*pkt
, int pkt_frags
,
454 size_t ip6hdr_off
, eth_ip6_hdr_info
*info
)
456 struct ip6_ext_hdr ext_hdr
;
458 uint8_t curr_ext_hdr_type
;
459 size_t input_size
= iov_size(pkt
, pkt_frags
);
461 info
->rss_ex_dst_valid
= false;
462 info
->rss_ex_src_valid
= false;
463 info
->fragment
= false;
465 if (input_size
< ip6hdr_off
) {
469 bytes_read
= iov_to_buf(pkt
, pkt_frags
, ip6hdr_off
,
470 &info
->ip6_hdr
, sizeof(info
->ip6_hdr
));
471 if (bytes_read
< sizeof(info
->ip6_hdr
)) {
475 info
->full_hdr_len
= sizeof(struct ip6_header
);
477 curr_ext_hdr_type
= info
->ip6_hdr
.ip6_nxt
;
479 if (!eth_is_ip6_extension_header_type(curr_ext_hdr_type
)) {
480 info
->l4proto
= info
->ip6_hdr
.ip6_nxt
;
481 info
->has_ext_hdrs
= false;
485 info
->has_ext_hdrs
= true;
488 if (input_size
< ip6hdr_off
+ info
->full_hdr_len
) {
492 bytes_read
= iov_to_buf(pkt
, pkt_frags
, ip6hdr_off
+ info
->full_hdr_len
,
493 &ext_hdr
, sizeof(ext_hdr
));
495 if (bytes_read
< sizeof(ext_hdr
)) {
499 if (curr_ext_hdr_type
== IP6_ROUTING
) {
500 if (ext_hdr
.ip6r_len
== sizeof(struct in6_address
) / 8) {
501 info
->rss_ex_dst_valid
=
502 _eth_get_rss_ex_dst_addr(pkt
, pkt_frags
,
503 ip6hdr_off
+ info
->full_hdr_len
,
504 &ext_hdr
, &info
->rss_ex_dst
);
506 } else if (curr_ext_hdr_type
== IP6_DESTINATON
) {
507 info
->rss_ex_src_valid
=
508 _eth_get_rss_ex_src_addr(pkt
, pkt_frags
,
509 ip6hdr_off
+ info
->full_hdr_len
,
510 &ext_hdr
, &info
->rss_ex_src
);
511 } else if (curr_ext_hdr_type
== IP6_FRAGMENT
) {
512 info
->fragment
= true;
515 info
->full_hdr_len
+= (ext_hdr
.ip6r_len
+ 1) * IP6_EXT_GRANULARITY
;
516 curr_ext_hdr_type
= ext_hdr
.ip6r_nxt
;
517 } while (eth_is_ip6_extension_header_type(curr_ext_hdr_type
));
519 info
->l4proto
= ext_hdr
.ip6r_nxt
;
523 bool eth_pad_short_frame(uint8_t *padded_pkt
, size_t *padded_buflen
,
524 const void *pkt
, size_t pkt_size
)
526 assert(padded_buflen
&& *padded_buflen
>= ETH_ZLEN
);
528 if (pkt_size
>= ETH_ZLEN
) {
532 /* pad to minimum Ethernet frame length */
533 memcpy(padded_pkt
, pkt
, pkt_size
);
534 memset(&padded_pkt
[pkt_size
], 0, ETH_ZLEN
- pkt_size
);
535 *padded_buflen
= ETH_ZLEN
;