4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2012-2014 Cisco Systems
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
28 #include "config-host.h"
31 #include "monitor/monitor.h"
32 #include "qemu-common.h"
33 #include "qemu/error-report.h"
34 #include "qemu/option.h"
35 #include "qemu/sockets.h"
37 #include "qemu/main-loop.h"
40 /* The buffer size needs to be investigated for optimum numbers and
41 * optimum means of paging in on different systems. This size is
42 * chosen to be sufficient to accommodate one packet with some headers
45 #define BUFFER_ALIGN sysconf(_SC_PAGESIZE)
46 #define BUFFER_SIZE 2048
48 #define MAX_L2TPV3_MSGCNT 64
49 #define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE)
51 /* Header set to 0x30000 signifies a data packet */
53 #define L2TPV3_DATA_PACKET 0x30000
55 /* IANA-assigned IP protocol ID for L2TPv3 */
58 #define IPPROTO_L2TP 0x73
61 typedef struct NetL2TPV3State
{
66 * these are used for xmit - that happens packet a time
67 * and for first sign of life packet (easier to parse that once)
74 * these are used for receive - try to "eat" up to 32 packets at a time
77 struct mmsghdr
*msgvec
;
83 struct sockaddr_storage
*dgram_dst
;
98 * DOS avoidance in error handling
101 bool header_mismatch
;
104 * Ring buffer handling
112 * Precomputed offsets
116 uint32_t cookie_offset
;
117 uint32_t counter_offset
;
118 uint32_t session_offset
;
136 static void net_l2tpv3_send(void *opaque
);
137 static void l2tpv3_writable(void *opaque
);
139 static void l2tpv3_update_fd_handler(NetL2TPV3State
*s
)
141 qemu_set_fd_handler(s
->fd
,
142 s
->read_poll
? net_l2tpv3_send
: NULL
,
143 s
->write_poll
? l2tpv3_writable
: NULL
,
147 static void l2tpv3_read_poll(NetL2TPV3State
*s
, bool enable
)
149 if (s
->read_poll
!= enable
) {
150 s
->read_poll
= enable
;
151 l2tpv3_update_fd_handler(s
);
155 static void l2tpv3_write_poll(NetL2TPV3State
*s
, bool enable
)
157 if (s
->write_poll
!= enable
) {
158 s
->write_poll
= enable
;
159 l2tpv3_update_fd_handler(s
);
163 static void l2tpv3_writable(void *opaque
)
165 NetL2TPV3State
*s
= opaque
;
166 l2tpv3_write_poll(s
, false);
167 qemu_flush_queued_packets(&s
->nc
);
170 static void l2tpv3_send_completed(NetClientState
*nc
, ssize_t len
)
172 NetL2TPV3State
*s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
173 l2tpv3_read_poll(s
, true);
176 static void l2tpv3_poll(NetClientState
*nc
, bool enable
)
178 NetL2TPV3State
*s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
179 l2tpv3_write_poll(s
, enable
);
180 l2tpv3_read_poll(s
, enable
);
183 static void l2tpv3_form_header(NetL2TPV3State
*s
)
188 stl_be_p((uint32_t *) s
->header_buf
, L2TPV3_DATA_PACKET
);
191 (uint32_t *) (s
->header_buf
+ s
->session_offset
),
195 if (s
->cookie_is_64
) {
197 (uint64_t *)(s
->header_buf
+ s
->cookie_offset
),
202 (uint32_t *) (s
->header_buf
+ s
->cookie_offset
),
207 if (s
->has_counter
) {
208 counter
= (uint32_t *)(s
->header_buf
+ s
->counter_offset
);
209 if (s
->pin_counter
) {
212 stl_be_p(counter
, ++s
->counter
);
217 static ssize_t
net_l2tpv3_receive_dgram_iov(NetClientState
*nc
,
218 const struct iovec
*iov
,
221 NetL2TPV3State
*s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
223 struct msghdr message
;
226 if (iovcnt
> MAX_L2TPV3_IOVCNT
- 1) {
228 "iovec too long %d > %d, change l2tpv3.h",
229 iovcnt
, MAX_L2TPV3_IOVCNT
233 l2tpv3_form_header(s
);
234 memcpy(s
->vec
+ 1, iov
, iovcnt
* sizeof(struct iovec
));
235 s
->vec
->iov_base
= s
->header_buf
;
236 s
->vec
->iov_len
= s
->offset
;
237 message
.msg_name
= s
->dgram_dst
;
238 message
.msg_namelen
= s
->dst_size
;
239 message
.msg_iov
= s
->vec
;
240 message
.msg_iovlen
= iovcnt
+ 1;
241 message
.msg_control
= NULL
;
242 message
.msg_controllen
= 0;
243 message
.msg_flags
= 0;
245 ret
= sendmsg(s
->fd
, &message
, 0);
246 } while ((ret
== -1) && (errno
== EINTR
));
249 } else if (ret
== 0) {
250 /* belt and braces - should not occur on DGRAM
251 * we should get an error and never a 0 send
253 ret
= iov_size(iov
, iovcnt
);
255 /* signal upper layer that socket buffer is full */
257 if (ret
== -EAGAIN
|| ret
== -ENOBUFS
) {
258 l2tpv3_write_poll(s
, true);
265 static ssize_t
net_l2tpv3_receive_dgram(NetClientState
*nc
,
269 NetL2TPV3State
*s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
272 struct msghdr message
;
275 l2tpv3_form_header(s
);
277 vec
->iov_base
= s
->header_buf
;
278 vec
->iov_len
= s
->offset
;
280 vec
->iov_base
= (void *) buf
;
282 message
.msg_name
= s
->dgram_dst
;
283 message
.msg_namelen
= s
->dst_size
;
284 message
.msg_iov
= s
->vec
;
285 message
.msg_iovlen
= 2;
286 message
.msg_control
= NULL
;
287 message
.msg_controllen
= 0;
288 message
.msg_flags
= 0;
290 ret
= sendmsg(s
->fd
, &message
, 0);
291 } while ((ret
== -1) && (errno
== EINTR
));
294 } else if (ret
== 0) {
295 /* belt and braces - should not occur on DGRAM
296 * we should get an error and never a 0 send
301 if (ret
== -EAGAIN
|| ret
== -ENOBUFS
) {
302 /* signal upper layer that socket buffer is full */
303 l2tpv3_write_poll(s
, true);
310 static int l2tpv3_verify_header(NetL2TPV3State
*s
, uint8_t *buf
)
316 if ((!s
->udp
) && (!s
->ipv6
)) {
317 buf
+= sizeof(struct iphdr
) /* fix for ipv4 raw */;
320 /* we do not do a strict check for "data" packets as per
321 * the RFC spec because the pure IP spec does not have
326 if (s
->cookie_is_64
) {
327 cookie
= ldq_be_p(buf
+ s
->cookie_offset
);
329 cookie
= ldl_be_p(buf
+ s
->cookie_offset
);
331 if (cookie
!= s
->rx_cookie
) {
332 if (!s
->header_mismatch
) {
333 error_report("unknown cookie id");
338 session
= (uint32_t *) (buf
+ s
->session_offset
);
339 if (ldl_be_p(session
) != s
->rx_session
) {
340 if (!s
->header_mismatch
) {
341 error_report("session mismatch");
348 static void net_l2tpv3_process_queue(NetL2TPV3State
*s
)
354 struct mmsghdr
*msgvec
;
356 /* go into ring mode only if there is a "pending" tail */
357 if (s
->queue_depth
> 0) {
359 msgvec
= s
->msgvec
+ s
->queue_tail
;
360 if (msgvec
->msg_len
> 0) {
361 data_size
= msgvec
->msg_len
- s
->header_size
;
362 vec
= msgvec
->msg_hdr
.msg_iov
;
363 if ((data_size
> 0) &&
364 (l2tpv3_verify_header(s
, vec
->iov_base
) == 0)) {
366 /* Use the legacy delivery for now, we will
367 * switch to using our own ring as a queueing mechanism
370 size
= qemu_send_packet_async(
374 l2tpv3_send_completed
377 l2tpv3_read_poll(s
, false);
382 if (!s
->header_mismatch
) {
383 /* report error only once */
384 error_report("l2tpv3 header verification failed");
385 s
->header_mismatch
= true;
391 s
->queue_tail
= (s
->queue_tail
+ 1) % MAX_L2TPV3_MSGCNT
;
394 (s
->queue_depth
> 0) &&
395 qemu_can_send_packet(&s
->nc
) &&
396 ((size
> 0) || bad_read
)
401 static void net_l2tpv3_send(void *opaque
)
403 NetL2TPV3State
*s
= opaque
;
404 int target_count
, count
;
405 struct mmsghdr
*msgvec
;
407 /* go into ring mode only if there is a "pending" tail */
409 if (s
->queue_depth
) {
411 /* The ring buffer we use has variable intake
412 * count of how much we can read varies - adjust accordingly
415 target_count
= MAX_L2TPV3_MSGCNT
- s
->queue_depth
;
417 /* Ensure we do not overrun the ring when we have
418 * a lot of enqueued packets
421 if (s
->queue_head
+ target_count
> MAX_L2TPV3_MSGCNT
) {
422 target_count
= MAX_L2TPV3_MSGCNT
- s
->queue_head
;
426 /* we do not have any pending packets - we can use
427 * the whole message vector linearly instead of using
433 target_count
= MAX_L2TPV3_MSGCNT
;
436 msgvec
= s
->msgvec
+ s
->queue_head
;
437 if (target_count
> 0) {
442 target_count
, MSG_DONTWAIT
, NULL
);
443 } while ((count
== -1) && (errno
== EINTR
));
445 /* Recv error - we still need to flush packets here,
446 * (re)set queue head to current position
450 s
->queue_head
= (s
->queue_head
+ count
) % MAX_L2TPV3_MSGCNT
;
451 s
->queue_depth
+= count
;
453 net_l2tpv3_process_queue(s
);
456 static void destroy_vector(struct mmsghdr
*msgvec
, int count
, int iovcount
)
460 struct mmsghdr
*cleanup
= msgvec
;
462 for (i
= 0; i
< count
; i
++) {
463 if (cleanup
->msg_hdr
.msg_iov
) {
464 iov
= cleanup
->msg_hdr
.msg_iov
;
465 for (j
= 0; j
< iovcount
; j
++) {
466 g_free(iov
->iov_base
);
469 g_free(cleanup
->msg_hdr
.msg_iov
);
477 static struct mmsghdr
*build_l2tpv3_vector(NetL2TPV3State
*s
, int count
)
481 struct mmsghdr
*msgvec
, *result
;
483 msgvec
= g_new(struct mmsghdr
, count
);
485 for (i
= 0; i
< count
; i
++) {
486 msgvec
->msg_hdr
.msg_name
= NULL
;
487 msgvec
->msg_hdr
.msg_namelen
= 0;
488 iov
= g_new(struct iovec
, IOVSIZE
);
489 msgvec
->msg_hdr
.msg_iov
= iov
;
490 iov
->iov_base
= g_malloc(s
->header_size
);
491 iov
->iov_len
= s
->header_size
;
493 iov
->iov_base
= qemu_memalign(BUFFER_ALIGN
, BUFFER_SIZE
);
494 iov
->iov_len
= BUFFER_SIZE
;
495 msgvec
->msg_hdr
.msg_iovlen
= 2;
496 msgvec
->msg_hdr
.msg_control
= NULL
;
497 msgvec
->msg_hdr
.msg_controllen
= 0;
498 msgvec
->msg_hdr
.msg_flags
= 0;
504 static void net_l2tpv3_cleanup(NetClientState
*nc
)
506 NetL2TPV3State
*s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
507 qemu_purge_queued_packets(nc
);
508 l2tpv3_read_poll(s
, false);
509 l2tpv3_write_poll(s
, false);
513 destroy_vector(s
->msgvec
, MAX_L2TPV3_MSGCNT
, IOVSIZE
);
515 g_free(s
->header_buf
);
516 g_free(s
->dgram_dst
);
519 static NetClientInfo net_l2tpv3_info
= {
520 .type
= NET_CLIENT_OPTIONS_KIND_L2TPV3
,
521 .size
= sizeof(NetL2TPV3State
),
522 .receive
= net_l2tpv3_receive_dgram
,
523 .receive_iov
= net_l2tpv3_receive_dgram_iov
,
525 .cleanup
= net_l2tpv3_cleanup
,
528 int net_init_l2tpv3(const NetClientOptions
*opts
,
530 NetClientState
*peer
, Error
**errp
)
532 /* FIXME error_setg(errp, ...) on failure */
533 const NetdevL2TPv3Options
*l2tpv3
;
537 struct addrinfo hints
;
538 struct addrinfo
*result
= NULL
;
539 char *srcport
, *dstport
;
541 nc
= qemu_new_net_client(&net_l2tpv3_info
, peer
, "l2tpv3", name
);
543 s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
547 s
->header_mismatch
= false;
549 assert(opts
->kind
== NET_CLIENT_OPTIONS_KIND_L2TPV3
);
550 l2tpv3
= opts
->l2tpv3
;
552 if (l2tpv3
->has_ipv6
&& l2tpv3
->ipv6
) {
553 s
->ipv6
= l2tpv3
->ipv6
;
558 if ((l2tpv3
->has_offset
) && (l2tpv3
->offset
> 256)) {
559 error_report("l2tpv3_open : offset must be less than 256 bytes");
563 if (l2tpv3
->has_rxcookie
|| l2tpv3
->has_txcookie
) {
564 if (l2tpv3
->has_rxcookie
&& l2tpv3
->has_txcookie
) {
573 if (l2tpv3
->has_cookie64
|| l2tpv3
->cookie64
) {
574 s
->cookie_is_64
= true;
576 s
->cookie_is_64
= false;
579 if (l2tpv3
->has_udp
&& l2tpv3
->udp
) {
581 if (!(l2tpv3
->has_srcport
&& l2tpv3
->has_dstport
)) {
582 error_report("l2tpv3_open : need both src and dst port for udp");
585 srcport
= l2tpv3
->srcport
;
586 dstport
= l2tpv3
->dstport
;
596 s
->session_offset
= 0;
597 s
->cookie_offset
= 4;
598 s
->counter_offset
= 4;
600 s
->tx_session
= l2tpv3
->txsession
;
601 if (l2tpv3
->has_rxsession
) {
602 s
->rx_session
= l2tpv3
->rxsession
;
604 s
->rx_session
= s
->tx_session
;
608 s
->rx_cookie
= l2tpv3
->rxcookie
;
609 s
->tx_cookie
= l2tpv3
->txcookie
;
610 if (s
->cookie_is_64
== true) {
613 s
->counter_offset
+= 8;
617 s
->counter_offset
+= 4;
621 memset(&hints
, 0, sizeof(hints
));
624 hints
.ai_family
= AF_INET6
;
626 hints
.ai_family
= AF_INET
;
629 hints
.ai_socktype
= SOCK_DGRAM
;
630 hints
.ai_protocol
= 0;
632 s
->counter_offset
+= 4;
633 s
->session_offset
+= 4;
634 s
->cookie_offset
+= 4;
636 hints
.ai_socktype
= SOCK_RAW
;
637 hints
.ai_protocol
= IPPROTO_L2TP
;
640 gairet
= getaddrinfo(l2tpv3
->src
, srcport
, &hints
, &result
);
642 if ((gairet
!= 0) || (result
== NULL
)) {
644 "l2tpv3_open : could not resolve src, errno = %s",
649 fd
= socket(result
->ai_family
, result
->ai_socktype
, result
->ai_protocol
);
652 error_report("l2tpv3_open : socket creation failed, errno = %d", -fd
);
655 if (bind(fd
, (struct sockaddr
*) result
->ai_addr
, result
->ai_addrlen
)) {
656 error_report("l2tpv3_open : could not bind socket err=%i", errno
);
660 freeaddrinfo(result
);
663 memset(&hints
, 0, sizeof(hints
));
666 hints
.ai_family
= AF_INET6
;
668 hints
.ai_family
= AF_INET
;
671 hints
.ai_socktype
= SOCK_DGRAM
;
672 hints
.ai_protocol
= 0;
674 hints
.ai_socktype
= SOCK_RAW
;
675 hints
.ai_protocol
= IPPROTO_L2TP
;
679 gairet
= getaddrinfo(l2tpv3
->dst
, dstport
, &hints
, &result
);
680 if ((gairet
!= 0) || (result
== NULL
)) {
682 "l2tpv3_open : could not resolve dst, error = %s",
688 s
->dgram_dst
= g_new0(struct sockaddr_storage
, 1);
689 memcpy(s
->dgram_dst
, result
->ai_addr
, result
->ai_addrlen
);
690 s
->dst_size
= result
->ai_addrlen
;
693 freeaddrinfo(result
);
696 if (l2tpv3
->has_counter
&& l2tpv3
->counter
) {
697 s
->has_counter
= true;
700 s
->has_counter
= false;
703 if (l2tpv3
->has_pincounter
&& l2tpv3
->pincounter
) {
704 s
->has_counter
= true; /* pin counter implies that there is counter */
705 s
->pin_counter
= true;
707 s
->pin_counter
= false;
710 if (l2tpv3
->has_offset
) {
712 s
->offset
+= l2tpv3
->offset
;
715 if ((s
->ipv6
) || (s
->udp
)) {
716 s
->header_size
= s
->offset
;
718 s
->header_size
= s
->offset
+ sizeof(struct iphdr
);
721 s
->msgvec
= build_l2tpv3_vector(s
, MAX_L2TPV3_MSGCNT
);
722 s
->vec
= g_new(struct iovec
, MAX_L2TPV3_IOVCNT
);
723 s
->header_buf
= g_malloc(s
->header_size
);
725 qemu_set_nonblock(fd
);
730 l2tpv3_read_poll(s
, true);
732 snprintf(s
->nc
.info_str
, sizeof(s
->nc
.info_str
),
733 "l2tpv3: connected");
736 qemu_del_net_client(nc
);
741 freeaddrinfo(result
);