4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2012-2014 Cisco Systems
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 #include "qemu/osdep.h"
31 #include "qapi/error.h"
32 #include "qemu-common.h"
33 #include "qemu/error-report.h"
34 #include "qemu/option.h"
35 #include "qemu/sockets.h"
37 #include "qemu/main-loop.h"
40 /* The buffer size needs to be investigated for optimum numbers and
41 * optimum means of paging in on different systems. This size is
42 * chosen to be sufficient to accommodate one packet with some headers
45 #define BUFFER_ALIGN sysconf(_SC_PAGESIZE)
46 #define BUFFER_SIZE 2048
48 #define MAX_L2TPV3_MSGCNT 64
49 #define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE)
51 /* Header set to 0x30000 signifies a data packet */
53 #define L2TPV3_DATA_PACKET 0x30000
55 /* IANA-assigned IP protocol ID for L2TPv3 */
58 #define IPPROTO_L2TP 0x73
61 typedef struct NetL2TPV3State
{
66 * these are used for xmit - that happens packet a time
67 * and for first sign of life packet (easier to parse that once)
74 * these are used for receive - try to "eat" up to 32 packets at a time
77 struct mmsghdr
*msgvec
;
83 struct sockaddr_storage
*dgram_dst
;
98 * DOS avoidance in error handling
101 bool header_mismatch
;
104 * Ring buffer handling
112 * Precomputed offsets
116 uint32_t cookie_offset
;
117 uint32_t counter_offset
;
118 uint32_t session_offset
;
136 static void net_l2tpv3_send(void *opaque
);
137 static void l2tpv3_writable(void *opaque
);
139 static void l2tpv3_update_fd_handler(NetL2TPV3State
*s
)
141 qemu_set_fd_handler(s
->fd
,
142 s
->read_poll
? net_l2tpv3_send
: NULL
,
143 s
->write_poll
? l2tpv3_writable
: NULL
,
147 static void l2tpv3_read_poll(NetL2TPV3State
*s
, bool enable
)
149 if (s
->read_poll
!= enable
) {
150 s
->read_poll
= enable
;
151 l2tpv3_update_fd_handler(s
);
155 static void l2tpv3_write_poll(NetL2TPV3State
*s
, bool enable
)
157 if (s
->write_poll
!= enable
) {
158 s
->write_poll
= enable
;
159 l2tpv3_update_fd_handler(s
);
163 static void l2tpv3_writable(void *opaque
)
165 NetL2TPV3State
*s
= opaque
;
166 l2tpv3_write_poll(s
, false);
167 qemu_flush_queued_packets(&s
->nc
);
170 static void l2tpv3_send_completed(NetClientState
*nc
, ssize_t len
)
172 NetL2TPV3State
*s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
173 l2tpv3_read_poll(s
, true);
176 static void l2tpv3_poll(NetClientState
*nc
, bool enable
)
178 NetL2TPV3State
*s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
179 l2tpv3_write_poll(s
, enable
);
180 l2tpv3_read_poll(s
, enable
);
183 static void l2tpv3_form_header(NetL2TPV3State
*s
)
188 stl_be_p((uint32_t *) s
->header_buf
, L2TPV3_DATA_PACKET
);
191 (uint32_t *) (s
->header_buf
+ s
->session_offset
),
195 if (s
->cookie_is_64
) {
197 (uint64_t *)(s
->header_buf
+ s
->cookie_offset
),
202 (uint32_t *) (s
->header_buf
+ s
->cookie_offset
),
207 if (s
->has_counter
) {
208 counter
= (uint32_t *)(s
->header_buf
+ s
->counter_offset
);
209 if (s
->pin_counter
) {
212 stl_be_p(counter
, ++s
->counter
);
217 static ssize_t
net_l2tpv3_receive_dgram_iov(NetClientState
*nc
,
218 const struct iovec
*iov
,
221 NetL2TPV3State
*s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
223 struct msghdr message
;
226 if (iovcnt
> MAX_L2TPV3_IOVCNT
- 1) {
228 "iovec too long %d > %d, change l2tpv3.h",
229 iovcnt
, MAX_L2TPV3_IOVCNT
233 l2tpv3_form_header(s
);
234 memcpy(s
->vec
+ 1, iov
, iovcnt
* sizeof(struct iovec
));
235 s
->vec
->iov_base
= s
->header_buf
;
236 s
->vec
->iov_len
= s
->offset
;
237 message
.msg_name
= s
->dgram_dst
;
238 message
.msg_namelen
= s
->dst_size
;
239 message
.msg_iov
= s
->vec
;
240 message
.msg_iovlen
= iovcnt
+ 1;
241 message
.msg_control
= NULL
;
242 message
.msg_controllen
= 0;
243 message
.msg_flags
= 0;
245 ret
= sendmsg(s
->fd
, &message
, 0);
246 } while ((ret
== -1) && (errno
== EINTR
));
249 } else if (ret
== 0) {
250 /* belt and braces - should not occur on DGRAM
251 * we should get an error and never a 0 send
253 ret
= iov_size(iov
, iovcnt
);
255 /* signal upper layer that socket buffer is full */
257 if (ret
== -EAGAIN
|| ret
== -ENOBUFS
) {
258 l2tpv3_write_poll(s
, true);
265 static ssize_t
net_l2tpv3_receive_dgram(NetClientState
*nc
,
269 NetL2TPV3State
*s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
272 struct msghdr message
;
275 l2tpv3_form_header(s
);
277 vec
->iov_base
= s
->header_buf
;
278 vec
->iov_len
= s
->offset
;
280 vec
->iov_base
= (void *) buf
;
282 message
.msg_name
= s
->dgram_dst
;
283 message
.msg_namelen
= s
->dst_size
;
284 message
.msg_iov
= s
->vec
;
285 message
.msg_iovlen
= 2;
286 message
.msg_control
= NULL
;
287 message
.msg_controllen
= 0;
288 message
.msg_flags
= 0;
290 ret
= sendmsg(s
->fd
, &message
, 0);
291 } while ((ret
== -1) && (errno
== EINTR
));
294 } else if (ret
== 0) {
295 /* belt and braces - should not occur on DGRAM
296 * we should get an error and never a 0 send
301 if (ret
== -EAGAIN
|| ret
== -ENOBUFS
) {
302 /* signal upper layer that socket buffer is full */
303 l2tpv3_write_poll(s
, true);
310 static int l2tpv3_verify_header(NetL2TPV3State
*s
, uint8_t *buf
)
316 if ((!s
->udp
) && (!s
->ipv6
)) {
317 buf
+= sizeof(struct iphdr
) /* fix for ipv4 raw */;
320 /* we do not do a strict check for "data" packets as per
321 * the RFC spec because the pure IP spec does not have
326 if (s
->cookie_is_64
) {
327 cookie
= ldq_be_p(buf
+ s
->cookie_offset
);
329 cookie
= ldl_be_p(buf
+ s
->cookie_offset
) & 0xffffffffULL
;
331 if (cookie
!= s
->rx_cookie
) {
332 if (!s
->header_mismatch
) {
333 error_report("unknown cookie id");
338 session
= (uint32_t *) (buf
+ s
->session_offset
);
339 if (ldl_be_p(session
) != s
->rx_session
) {
340 if (!s
->header_mismatch
) {
341 error_report("session mismatch");
348 static void net_l2tpv3_process_queue(NetL2TPV3State
*s
)
354 struct mmsghdr
*msgvec
;
356 /* go into ring mode only if there is a "pending" tail */
357 if (s
->queue_depth
> 0) {
359 msgvec
= s
->msgvec
+ s
->queue_tail
;
360 if (msgvec
->msg_len
> 0) {
361 data_size
= msgvec
->msg_len
- s
->header_size
;
362 vec
= msgvec
->msg_hdr
.msg_iov
;
363 if ((data_size
> 0) &&
364 (l2tpv3_verify_header(s
, vec
->iov_base
) == 0)) {
366 /* Use the legacy delivery for now, we will
367 * switch to using our own ring as a queueing mechanism
370 size
= qemu_send_packet_async(
374 l2tpv3_send_completed
377 l2tpv3_read_poll(s
, false);
382 if (!s
->header_mismatch
) {
383 /* report error only once */
384 error_report("l2tpv3 header verification failed");
385 s
->header_mismatch
= true;
391 s
->queue_tail
= (s
->queue_tail
+ 1) % MAX_L2TPV3_MSGCNT
;
394 (s
->queue_depth
> 0) &&
395 qemu_can_send_packet(&s
->nc
) &&
396 ((size
> 0) || bad_read
)
401 static void net_l2tpv3_send(void *opaque
)
403 NetL2TPV3State
*s
= opaque
;
404 int target_count
, count
;
405 struct mmsghdr
*msgvec
;
407 /* go into ring mode only if there is a "pending" tail */
409 if (s
->queue_depth
) {
411 /* The ring buffer we use has variable intake
412 * count of how much we can read varies - adjust accordingly
415 target_count
= MAX_L2TPV3_MSGCNT
- s
->queue_depth
;
417 /* Ensure we do not overrun the ring when we have
418 * a lot of enqueued packets
421 if (s
->queue_head
+ target_count
> MAX_L2TPV3_MSGCNT
) {
422 target_count
= MAX_L2TPV3_MSGCNT
- s
->queue_head
;
426 /* we do not have any pending packets - we can use
427 * the whole message vector linearly instead of using
433 target_count
= MAX_L2TPV3_MSGCNT
;
436 msgvec
= s
->msgvec
+ s
->queue_head
;
437 if (target_count
> 0) {
442 target_count
, MSG_DONTWAIT
, NULL
);
443 } while ((count
== -1) && (errno
== EINTR
));
445 /* Recv error - we still need to flush packets here,
446 * (re)set queue head to current position
450 s
->queue_head
= (s
->queue_head
+ count
) % MAX_L2TPV3_MSGCNT
;
451 s
->queue_depth
+= count
;
453 net_l2tpv3_process_queue(s
);
456 static void destroy_vector(struct mmsghdr
*msgvec
, int count
, int iovcount
)
460 struct mmsghdr
*cleanup
= msgvec
;
462 for (i
= 0; i
< count
; i
++) {
463 if (cleanup
->msg_hdr
.msg_iov
) {
464 iov
= cleanup
->msg_hdr
.msg_iov
;
465 for (j
= 0; j
< iovcount
; j
++) {
466 g_free(iov
->iov_base
);
469 g_free(cleanup
->msg_hdr
.msg_iov
);
477 static struct mmsghdr
*build_l2tpv3_vector(NetL2TPV3State
*s
, int count
)
481 struct mmsghdr
*msgvec
, *result
;
483 msgvec
= g_new(struct mmsghdr
, count
);
485 for (i
= 0; i
< count
; i
++) {
486 msgvec
->msg_hdr
.msg_name
= NULL
;
487 msgvec
->msg_hdr
.msg_namelen
= 0;
488 iov
= g_new(struct iovec
, IOVSIZE
);
489 msgvec
->msg_hdr
.msg_iov
= iov
;
490 iov
->iov_base
= g_malloc(s
->header_size
);
491 iov
->iov_len
= s
->header_size
;
493 iov
->iov_base
= qemu_memalign(BUFFER_ALIGN
, BUFFER_SIZE
);
494 iov
->iov_len
= BUFFER_SIZE
;
495 msgvec
->msg_hdr
.msg_iovlen
= 2;
496 msgvec
->msg_hdr
.msg_control
= NULL
;
497 msgvec
->msg_hdr
.msg_controllen
= 0;
498 msgvec
->msg_hdr
.msg_flags
= 0;
504 static void net_l2tpv3_cleanup(NetClientState
*nc
)
506 NetL2TPV3State
*s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
507 qemu_purge_queued_packets(nc
);
508 l2tpv3_read_poll(s
, false);
509 l2tpv3_write_poll(s
, false);
513 destroy_vector(s
->msgvec
, MAX_L2TPV3_MSGCNT
, IOVSIZE
);
515 g_free(s
->header_buf
);
516 g_free(s
->dgram_dst
);
519 static NetClientInfo net_l2tpv3_info
= {
520 .type
= NET_CLIENT_DRIVER_L2TPV3
,
521 .size
= sizeof(NetL2TPV3State
),
522 .receive
= net_l2tpv3_receive_dgram
,
523 .receive_iov
= net_l2tpv3_receive_dgram_iov
,
525 .cleanup
= net_l2tpv3_cleanup
,
528 int net_init_l2tpv3(const Netdev
*netdev
,
530 NetClientState
*peer
, Error
**errp
)
532 const NetdevL2TPv3Options
*l2tpv3
;
536 struct addrinfo hints
;
537 struct addrinfo
*result
= NULL
;
538 char *srcport
, *dstport
;
540 nc
= qemu_new_net_client(&net_l2tpv3_info
, peer
, "l2tpv3", name
);
542 s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
546 s
->header_mismatch
= false;
548 assert(netdev
->type
== NET_CLIENT_DRIVER_L2TPV3
);
549 l2tpv3
= &netdev
->u
.l2tpv3
;
551 if (l2tpv3
->has_ipv6
&& l2tpv3
->ipv6
) {
552 s
->ipv6
= l2tpv3
->ipv6
;
557 if ((l2tpv3
->has_offset
) && (l2tpv3
->offset
> 256)) {
558 error_setg(errp
, "offset must be less than 256 bytes");
562 if (l2tpv3
->has_rxcookie
|| l2tpv3
->has_txcookie
) {
563 if (l2tpv3
->has_rxcookie
&& l2tpv3
->has_txcookie
) {
567 "require both 'rxcookie' and 'txcookie' or neither");
574 if (l2tpv3
->has_cookie64
|| l2tpv3
->cookie64
) {
575 s
->cookie_is_64
= true;
577 s
->cookie_is_64
= false;
580 if (l2tpv3
->has_udp
&& l2tpv3
->udp
) {
582 if (!(l2tpv3
->has_srcport
&& l2tpv3
->has_dstport
)) {
583 error_setg(errp
, "need both src and dst port for udp");
586 srcport
= l2tpv3
->srcport
;
587 dstport
= l2tpv3
->dstport
;
597 s
->session_offset
= 0;
598 s
->cookie_offset
= 4;
599 s
->counter_offset
= 4;
601 s
->tx_session
= l2tpv3
->txsession
;
602 if (l2tpv3
->has_rxsession
) {
603 s
->rx_session
= l2tpv3
->rxsession
;
605 s
->rx_session
= s
->tx_session
;
609 s
->rx_cookie
= l2tpv3
->rxcookie
;
610 s
->tx_cookie
= l2tpv3
->txcookie
;
611 if (s
->cookie_is_64
== true) {
614 s
->counter_offset
+= 8;
618 s
->counter_offset
+= 4;
622 memset(&hints
, 0, sizeof(hints
));
625 hints
.ai_family
= AF_INET6
;
627 hints
.ai_family
= AF_INET
;
630 hints
.ai_socktype
= SOCK_DGRAM
;
631 hints
.ai_protocol
= 0;
633 s
->counter_offset
+= 4;
634 s
->session_offset
+= 4;
635 s
->cookie_offset
+= 4;
637 hints
.ai_socktype
= SOCK_RAW
;
638 hints
.ai_protocol
= IPPROTO_L2TP
;
641 gairet
= getaddrinfo(l2tpv3
->src
, srcport
, &hints
, &result
);
643 if ((gairet
!= 0) || (result
== NULL
)) {
644 error_setg(errp
, "could not resolve src, errno = %s",
645 gai_strerror(gairet
));
648 fd
= socket(result
->ai_family
, result
->ai_socktype
, result
->ai_protocol
);
651 error_setg(errp
, "socket creation failed, errno = %d",
655 if (bind(fd
, (struct sockaddr
*) result
->ai_addr
, result
->ai_addrlen
)) {
656 error_setg(errp
, "could not bind socket err=%i", errno
);
660 freeaddrinfo(result
);
663 memset(&hints
, 0, sizeof(hints
));
666 hints
.ai_family
= AF_INET6
;
668 hints
.ai_family
= AF_INET
;
671 hints
.ai_socktype
= SOCK_DGRAM
;
672 hints
.ai_protocol
= 0;
674 hints
.ai_socktype
= SOCK_RAW
;
675 hints
.ai_protocol
= IPPROTO_L2TP
;
679 gairet
= getaddrinfo(l2tpv3
->dst
, dstport
, &hints
, &result
);
680 if ((gairet
!= 0) || (result
== NULL
)) {
681 error_setg(errp
, "could not resolve dst, error = %s",
682 gai_strerror(gairet
));
686 s
->dgram_dst
= g_new0(struct sockaddr_storage
, 1);
687 memcpy(s
->dgram_dst
, result
->ai_addr
, result
->ai_addrlen
);
688 s
->dst_size
= result
->ai_addrlen
;
691 freeaddrinfo(result
);
694 if (l2tpv3
->has_counter
&& l2tpv3
->counter
) {
695 s
->has_counter
= true;
698 s
->has_counter
= false;
701 if (l2tpv3
->has_pincounter
&& l2tpv3
->pincounter
) {
702 s
->has_counter
= true; /* pin counter implies that there is counter */
703 s
->pin_counter
= true;
705 s
->pin_counter
= false;
708 if (l2tpv3
->has_offset
) {
710 s
->offset
+= l2tpv3
->offset
;
713 if ((s
->ipv6
) || (s
->udp
)) {
714 s
->header_size
= s
->offset
;
716 s
->header_size
= s
->offset
+ sizeof(struct iphdr
);
719 s
->msgvec
= build_l2tpv3_vector(s
, MAX_L2TPV3_MSGCNT
);
720 s
->vec
= g_new(struct iovec
, MAX_L2TPV3_IOVCNT
);
721 s
->header_buf
= g_malloc(s
->header_size
);
723 qemu_set_nonblock(fd
);
728 l2tpv3_read_poll(s
, true);
730 snprintf(s
->nc
.info_str
, sizeof(s
->nc
.info_str
),
731 "l2tpv3: connected");
734 qemu_del_net_client(nc
);
739 freeaddrinfo(result
);