4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2012-2014 Cisco Systems
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
28 #include "config-host.h"
31 #include "monitor/monitor.h"
32 #include "qemu-common.h"
33 #include "qemu/error-report.h"
34 #include "qemu/option.h"
35 #include "qemu/sockets.h"
37 #include "qemu/main-loop.h"
40 /* The buffer size needs to be investigated for optimum numbers and
41 * optimum means of paging in on different systems. This size is
42 * chosen to be sufficient to accommodate one packet with some headers
45 #define BUFFER_ALIGN sysconf(_SC_PAGESIZE)
46 #define BUFFER_SIZE 2048
48 #define MAX_L2TPV3_MSGCNT 64
49 #define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE)
51 /* Header set to 0x30000 signifies a data packet */
53 #define L2TPV3_DATA_PACKET 0x30000
55 /* IANA-assigned IP protocol ID for L2TPv3 */
58 #define IPPROTO_L2TP 0x73
61 typedef struct NetL2TPV3State
{
66 * these are used for xmit - that happens packet a time
67 * and for first sign of life packet (easier to parse that once)
74 * these are used for receive - try to "eat" up to 32 packets at a time
77 struct mmsghdr
*msgvec
;
83 struct sockaddr_storage
*dgram_dst
;
98 * DOS avoidance in error handling
101 bool header_mismatch
;
104 * Ring buffer handling
112 * Precomputed offsets
116 uint32_t cookie_offset
;
117 uint32_t counter_offset
;
118 uint32_t session_offset
;
136 static int l2tpv3_can_send(void *opaque
);
137 static void net_l2tpv3_send(void *opaque
);
138 static void l2tpv3_writable(void *opaque
);
140 static void l2tpv3_update_fd_handler(NetL2TPV3State
*s
)
142 qemu_set_fd_handler2(s
->fd
,
143 s
->read_poll
? l2tpv3_can_send
: NULL
,
144 s
->read_poll
? net_l2tpv3_send
: NULL
,
145 s
->write_poll
? l2tpv3_writable
: NULL
,
149 static void l2tpv3_read_poll(NetL2TPV3State
*s
, bool enable
)
151 if (s
->read_poll
!= enable
) {
152 s
->read_poll
= enable
;
153 l2tpv3_update_fd_handler(s
);
157 static void l2tpv3_write_poll(NetL2TPV3State
*s
, bool enable
)
159 if (s
->write_poll
!= enable
) {
160 s
->write_poll
= enable
;
161 l2tpv3_update_fd_handler(s
);
165 static void l2tpv3_writable(void *opaque
)
167 NetL2TPV3State
*s
= opaque
;
168 l2tpv3_write_poll(s
, false);
169 qemu_flush_queued_packets(&s
->nc
);
172 static int l2tpv3_can_send(void *opaque
)
174 NetL2TPV3State
*s
= opaque
;
176 return qemu_can_send_packet(&s
->nc
);
179 static void l2tpv3_send_completed(NetClientState
*nc
, ssize_t len
)
181 NetL2TPV3State
*s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
182 l2tpv3_read_poll(s
, true);
185 static void l2tpv3_poll(NetClientState
*nc
, bool enable
)
187 NetL2TPV3State
*s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
188 l2tpv3_write_poll(s
, enable
);
189 l2tpv3_read_poll(s
, enable
);
192 static void l2tpv3_form_header(NetL2TPV3State
*s
)
197 stl_be_p((uint32_t *) s
->header_buf
, L2TPV3_DATA_PACKET
);
200 (uint32_t *) (s
->header_buf
+ s
->session_offset
),
204 if (s
->cookie_is_64
) {
206 (uint64_t *)(s
->header_buf
+ s
->cookie_offset
),
211 (uint32_t *) (s
->header_buf
+ s
->cookie_offset
),
216 if (s
->has_counter
) {
217 counter
= (uint32_t *)(s
->header_buf
+ s
->counter_offset
);
218 if (s
->pin_counter
) {
221 stl_be_p(counter
, ++s
->counter
);
226 static ssize_t
net_l2tpv3_receive_dgram_iov(NetClientState
*nc
,
227 const struct iovec
*iov
,
230 NetL2TPV3State
*s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
232 struct msghdr message
;
235 if (iovcnt
> MAX_L2TPV3_IOVCNT
- 1) {
237 "iovec too long %d > %d, change l2tpv3.h",
238 iovcnt
, MAX_L2TPV3_IOVCNT
242 l2tpv3_form_header(s
);
243 memcpy(s
->vec
+ 1, iov
, iovcnt
* sizeof(struct iovec
));
244 s
->vec
->iov_base
= s
->header_buf
;
245 s
->vec
->iov_len
= s
->offset
;
246 message
.msg_name
= s
->dgram_dst
;
247 message
.msg_namelen
= s
->dst_size
;
248 message
.msg_iov
= s
->vec
;
249 message
.msg_iovlen
= iovcnt
+ 1;
250 message
.msg_control
= NULL
;
251 message
.msg_controllen
= 0;
252 message
.msg_flags
= 0;
254 ret
= sendmsg(s
->fd
, &message
, 0);
255 } while ((ret
== -1) && (errno
== EINTR
));
258 } else if (ret
== 0) {
259 /* belt and braces - should not occur on DGRAM
260 * we should get an error and never a 0 send
262 ret
= iov_size(iov
, iovcnt
);
264 /* signal upper layer that socket buffer is full */
266 if (ret
== -EAGAIN
|| ret
== -ENOBUFS
) {
267 l2tpv3_write_poll(s
, true);
274 static ssize_t
net_l2tpv3_receive_dgram(NetClientState
*nc
,
278 NetL2TPV3State
*s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
281 struct msghdr message
;
284 l2tpv3_form_header(s
);
286 vec
->iov_base
= s
->header_buf
;
287 vec
->iov_len
= s
->offset
;
289 vec
->iov_base
= (void *) buf
;
291 message
.msg_name
= s
->dgram_dst
;
292 message
.msg_namelen
= s
->dst_size
;
293 message
.msg_iov
= s
->vec
;
294 message
.msg_iovlen
= 2;
295 message
.msg_control
= NULL
;
296 message
.msg_controllen
= 0;
297 message
.msg_flags
= 0;
299 ret
= sendmsg(s
->fd
, &message
, 0);
300 } while ((ret
== -1) && (errno
== EINTR
));
303 } else if (ret
== 0) {
304 /* belt and braces - should not occur on DGRAM
305 * we should get an error and never a 0 send
310 if (ret
== -EAGAIN
|| ret
== -ENOBUFS
) {
311 /* signal upper layer that socket buffer is full */
312 l2tpv3_write_poll(s
, true);
319 static int l2tpv3_verify_header(NetL2TPV3State
*s
, uint8_t *buf
)
325 if ((!s
->udp
) && (!s
->ipv6
)) {
326 buf
+= sizeof(struct iphdr
) /* fix for ipv4 raw */;
329 /* we do not do a strict check for "data" packets as per
330 * the RFC spec because the pure IP spec does not have
335 if (s
->cookie_is_64
) {
336 cookie
= ldq_be_p(buf
+ s
->cookie_offset
);
338 cookie
= ldl_be_p(buf
+ s
->cookie_offset
);
340 if (cookie
!= s
->rx_cookie
) {
341 if (!s
->header_mismatch
) {
342 error_report("unknown cookie id");
347 session
= (uint32_t *) (buf
+ s
->session_offset
);
348 if (ldl_be_p(session
) != s
->rx_session
) {
349 if (!s
->header_mismatch
) {
350 error_report("session mismatch");
357 static void net_l2tpv3_process_queue(NetL2TPV3State
*s
)
363 struct mmsghdr
*msgvec
;
365 /* go into ring mode only if there is a "pending" tail */
366 if (s
->queue_depth
> 0) {
368 msgvec
= s
->msgvec
+ s
->queue_tail
;
369 if (msgvec
->msg_len
> 0) {
370 data_size
= msgvec
->msg_len
- s
->header_size
;
371 vec
= msgvec
->msg_hdr
.msg_iov
;
372 if ((data_size
> 0) &&
373 (l2tpv3_verify_header(s
, vec
->iov_base
) == 0)) {
375 /* Use the legacy delivery for now, we will
376 * switch to using our own ring as a queueing mechanism
379 size
= qemu_send_packet_async(
383 l2tpv3_send_completed
386 l2tpv3_read_poll(s
, false);
391 if (!s
->header_mismatch
) {
392 /* report error only once */
393 error_report("l2tpv3 header verification failed");
394 s
->header_mismatch
= true;
400 s
->queue_tail
= (s
->queue_tail
+ 1) % MAX_L2TPV3_MSGCNT
;
403 (s
->queue_depth
> 0) &&
404 qemu_can_send_packet(&s
->nc
) &&
405 ((size
> 0) || bad_read
)
410 static void net_l2tpv3_send(void *opaque
)
412 NetL2TPV3State
*s
= opaque
;
413 int target_count
, count
;
414 struct mmsghdr
*msgvec
;
416 /* go into ring mode only if there is a "pending" tail */
418 if (s
->queue_depth
) {
420 /* The ring buffer we use has variable intake
421 * count of how much we can read varies - adjust accordingly
424 target_count
= MAX_L2TPV3_MSGCNT
- s
->queue_depth
;
426 /* Ensure we do not overrun the ring when we have
427 * a lot of enqueued packets
430 if (s
->queue_head
+ target_count
> MAX_L2TPV3_MSGCNT
) {
431 target_count
= MAX_L2TPV3_MSGCNT
- s
->queue_head
;
435 /* we do not have any pending packets - we can use
436 * the whole message vector linearly instead of using
442 target_count
= MAX_L2TPV3_MSGCNT
;
445 msgvec
= s
->msgvec
+ s
->queue_head
;
446 if (target_count
> 0) {
451 target_count
, MSG_DONTWAIT
, NULL
);
452 } while ((count
== -1) && (errno
== EINTR
));
454 /* Recv error - we still need to flush packets here,
455 * (re)set queue head to current position
459 s
->queue_head
= (s
->queue_head
+ count
) % MAX_L2TPV3_MSGCNT
;
460 s
->queue_depth
+= count
;
462 net_l2tpv3_process_queue(s
);
465 static void destroy_vector(struct mmsghdr
*msgvec
, int count
, int iovcount
)
469 struct mmsghdr
*cleanup
= msgvec
;
471 for (i
= 0; i
< count
; i
++) {
472 if (cleanup
->msg_hdr
.msg_iov
) {
473 iov
= cleanup
->msg_hdr
.msg_iov
;
474 for (j
= 0; j
< iovcount
; j
++) {
475 g_free(iov
->iov_base
);
478 g_free(cleanup
->msg_hdr
.msg_iov
);
486 static struct mmsghdr
*build_l2tpv3_vector(NetL2TPV3State
*s
, int count
)
490 struct mmsghdr
*msgvec
, *result
;
492 msgvec
= g_malloc(sizeof(struct mmsghdr
) * count
);
494 for (i
= 0; i
< count
; i
++) {
495 msgvec
->msg_hdr
.msg_name
= NULL
;
496 msgvec
->msg_hdr
.msg_namelen
= 0;
497 iov
= g_malloc(sizeof(struct iovec
) * IOVSIZE
);
498 msgvec
->msg_hdr
.msg_iov
= iov
;
499 iov
->iov_base
= g_malloc(s
->header_size
);
500 iov
->iov_len
= s
->header_size
;
502 iov
->iov_base
= qemu_memalign(BUFFER_ALIGN
, BUFFER_SIZE
);
503 iov
->iov_len
= BUFFER_SIZE
;
504 msgvec
->msg_hdr
.msg_iovlen
= 2;
505 msgvec
->msg_hdr
.msg_control
= NULL
;
506 msgvec
->msg_hdr
.msg_controllen
= 0;
507 msgvec
->msg_hdr
.msg_flags
= 0;
513 static void net_l2tpv3_cleanup(NetClientState
*nc
)
515 NetL2TPV3State
*s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
516 qemu_purge_queued_packets(nc
);
517 l2tpv3_read_poll(s
, false);
518 l2tpv3_write_poll(s
, false);
522 destroy_vector(s
->msgvec
, MAX_L2TPV3_MSGCNT
, IOVSIZE
);
524 g_free(s
->header_buf
);
525 g_free(s
->dgram_dst
);
528 static NetClientInfo net_l2tpv3_info
= {
529 .type
= NET_CLIENT_OPTIONS_KIND_L2TPV3
,
530 .size
= sizeof(NetL2TPV3State
),
531 .receive
= net_l2tpv3_receive_dgram
,
532 .receive_iov
= net_l2tpv3_receive_dgram_iov
,
534 .cleanup
= net_l2tpv3_cleanup
,
537 int net_init_l2tpv3(const NetClientOptions
*opts
,
539 NetClientState
*peer
)
543 const NetdevL2TPv3Options
*l2tpv3
;
547 struct addrinfo hints
;
548 struct addrinfo
*result
= NULL
;
549 char *srcport
, *dstport
;
551 nc
= qemu_new_net_client(&net_l2tpv3_info
, peer
, "l2tpv3", name
);
553 s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
557 s
->header_mismatch
= false;
559 assert(opts
->kind
== NET_CLIENT_OPTIONS_KIND_L2TPV3
);
560 l2tpv3
= opts
->l2tpv3
;
562 if (l2tpv3
->has_ipv6
&& l2tpv3
->ipv6
) {
563 s
->ipv6
= l2tpv3
->ipv6
;
568 if ((l2tpv3
->has_offset
) && (l2tpv3
->offset
> 256)) {
569 error_report("l2tpv3_open : offset must be less than 256 bytes");
573 if (l2tpv3
->has_rxcookie
|| l2tpv3
->has_txcookie
) {
574 if (l2tpv3
->has_rxcookie
&& l2tpv3
->has_txcookie
) {
583 if (l2tpv3
->has_cookie64
|| l2tpv3
->cookie64
) {
584 s
->cookie_is_64
= true;
586 s
->cookie_is_64
= false;
589 if (l2tpv3
->has_udp
&& l2tpv3
->udp
) {
591 if (!(l2tpv3
->has_srcport
&& l2tpv3
->has_dstport
)) {
592 error_report("l2tpv3_open : need both src and dst port for udp");
595 srcport
= l2tpv3
->srcport
;
596 dstport
= l2tpv3
->dstport
;
606 s
->session_offset
= 0;
607 s
->cookie_offset
= 4;
608 s
->counter_offset
= 4;
610 s
->tx_session
= l2tpv3
->txsession
;
611 if (l2tpv3
->has_rxsession
) {
612 s
->rx_session
= l2tpv3
->rxsession
;
614 s
->rx_session
= s
->tx_session
;
618 s
->rx_cookie
= l2tpv3
->rxcookie
;
619 s
->tx_cookie
= l2tpv3
->txcookie
;
620 if (s
->cookie_is_64
== true) {
623 s
->counter_offset
+= 8;
627 s
->counter_offset
+= 4;
631 memset(&hints
, 0, sizeof(hints
));
634 hints
.ai_family
= AF_INET6
;
636 hints
.ai_family
= AF_INET
;
639 hints
.ai_socktype
= SOCK_DGRAM
;
640 hints
.ai_protocol
= 0;
642 s
->counter_offset
+= 4;
643 s
->session_offset
+= 4;
644 s
->cookie_offset
+= 4;
646 hints
.ai_socktype
= SOCK_RAW
;
647 hints
.ai_protocol
= IPPROTO_L2TP
;
650 gairet
= getaddrinfo(l2tpv3
->src
, srcport
, &hints
, &result
);
652 if ((gairet
!= 0) || (result
== NULL
)) {
654 "l2tpv3_open : could not resolve src, errno = %s",
659 fd
= socket(result
->ai_family
, result
->ai_socktype
, result
->ai_protocol
);
662 error_report("l2tpv3_open : socket creation failed, errno = %d", -fd
);
665 if (bind(fd
, (struct sockaddr
*) result
->ai_addr
, result
->ai_addrlen
)) {
666 error_report("l2tpv3_open : could not bind socket err=%i", errno
);
670 freeaddrinfo(result
);
673 memset(&hints
, 0, sizeof(hints
));
676 hints
.ai_family
= AF_INET6
;
678 hints
.ai_family
= AF_INET
;
681 hints
.ai_socktype
= SOCK_DGRAM
;
682 hints
.ai_protocol
= 0;
684 hints
.ai_socktype
= SOCK_RAW
;
685 hints
.ai_protocol
= IPPROTO_L2TP
;
689 gairet
= getaddrinfo(l2tpv3
->dst
, dstport
, &hints
, &result
);
690 if ((gairet
!= 0) || (result
== NULL
)) {
692 "l2tpv3_open : could not resolve dst, error = %s",
698 s
->dgram_dst
= g_malloc(sizeof(struct sockaddr_storage
));
699 memset(s
->dgram_dst
, '\0' , sizeof(struct sockaddr_storage
));
700 memcpy(s
->dgram_dst
, result
->ai_addr
, result
->ai_addrlen
);
701 s
->dst_size
= result
->ai_addrlen
;
704 freeaddrinfo(result
);
707 if (l2tpv3
->has_counter
&& l2tpv3
->counter
) {
708 s
->has_counter
= true;
711 s
->has_counter
= false;
714 if (l2tpv3
->has_pincounter
&& l2tpv3
->pincounter
) {
715 s
->has_counter
= true; /* pin counter implies that there is counter */
716 s
->pin_counter
= true;
718 s
->pin_counter
= false;
721 if (l2tpv3
->has_offset
) {
723 s
->offset
+= l2tpv3
->offset
;
726 if ((s
->ipv6
) || (s
->udp
)) {
727 s
->header_size
= s
->offset
;
729 s
->header_size
= s
->offset
+ sizeof(struct iphdr
);
732 s
->msgvec
= build_l2tpv3_vector(s
, MAX_L2TPV3_MSGCNT
);
733 s
->vec
= g_malloc(sizeof(struct iovec
) * MAX_L2TPV3_IOVCNT
);
734 s
->header_buf
= g_malloc(s
->header_size
);
736 qemu_set_nonblock(fd
);
741 l2tpv3_read_poll(s
, true);
743 snprintf(s
->nc
.info_str
, sizeof(s
->nc
.info_str
),
744 "l2tpv3: connected");
747 qemu_del_net_client(nc
);
752 freeaddrinfo(result
);