4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2012-2014 Cisco Systems
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 #include "qemu/osdep.h"
31 #include "qapi/error.h"
32 #include "qemu/error-report.h"
33 #include "qemu/option.h"
34 #include "qemu/sockets.h"
36 #include "qemu/main-loop.h"
37 #include "qemu/memalign.h"
39 /* The buffer size needs to be investigated for optimum numbers and
40 * optimum means of paging in on different systems. This size is
41 * chosen to be sufficient to accommodate one packet with some headers
44 #define BUFFER_ALIGN sysconf(_SC_PAGESIZE)
45 #define BUFFER_SIZE 2048
47 #define MAX_L2TPV3_MSGCNT 64
48 #define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE)
50 /* Header set to 0x30000 signifies a data packet */
52 #define L2TPV3_DATA_PACKET 0x30000
54 /* IANA-assigned IP protocol ID for L2TPv3 */
57 #define IPPROTO_L2TP 0x73
60 typedef struct NetL2TPV3State
{
65 * these are used for xmit - that happens packet a time
66 * and for first sign of life packet (easier to parse that once)
73 * these are used for receive - try to "eat" up to 32 packets at a time
76 struct mmsghdr
*msgvec
;
82 struct sockaddr_storage
*dgram_dst
;
97 * DOS avoidance in error handling
100 bool header_mismatch
;
103 * Ring buffer handling
111 * Precomputed offsets
115 uint32_t cookie_offset
;
116 uint32_t counter_offset
;
117 uint32_t session_offset
;
135 static void net_l2tpv3_send(void *opaque
);
136 static void l2tpv3_writable(void *opaque
);
138 static void l2tpv3_update_fd_handler(NetL2TPV3State
*s
)
140 qemu_set_fd_handler(s
->fd
,
141 s
->read_poll
? net_l2tpv3_send
: NULL
,
142 s
->write_poll
? l2tpv3_writable
: NULL
,
146 static void l2tpv3_read_poll(NetL2TPV3State
*s
, bool enable
)
148 if (s
->read_poll
!= enable
) {
149 s
->read_poll
= enable
;
150 l2tpv3_update_fd_handler(s
);
154 static void l2tpv3_write_poll(NetL2TPV3State
*s
, bool enable
)
156 if (s
->write_poll
!= enable
) {
157 s
->write_poll
= enable
;
158 l2tpv3_update_fd_handler(s
);
162 static void l2tpv3_writable(void *opaque
)
164 NetL2TPV3State
*s
= opaque
;
165 l2tpv3_write_poll(s
, false);
166 qemu_flush_queued_packets(&s
->nc
);
169 static void l2tpv3_send_completed(NetClientState
*nc
, ssize_t len
)
171 NetL2TPV3State
*s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
172 l2tpv3_read_poll(s
, true);
175 static void l2tpv3_poll(NetClientState
*nc
, bool enable
)
177 NetL2TPV3State
*s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
178 l2tpv3_write_poll(s
, enable
);
179 l2tpv3_read_poll(s
, enable
);
182 static void l2tpv3_form_header(NetL2TPV3State
*s
)
187 stl_be_p((uint32_t *) s
->header_buf
, L2TPV3_DATA_PACKET
);
190 (uint32_t *) (s
->header_buf
+ s
->session_offset
),
194 if (s
->cookie_is_64
) {
196 (uint64_t *)(s
->header_buf
+ s
->cookie_offset
),
201 (uint32_t *) (s
->header_buf
+ s
->cookie_offset
),
206 if (s
->has_counter
) {
207 counter
= (uint32_t *)(s
->header_buf
+ s
->counter_offset
);
208 if (s
->pin_counter
) {
211 stl_be_p(counter
, ++s
->counter
);
216 static ssize_t
net_l2tpv3_receive_dgram_iov(NetClientState
*nc
,
217 const struct iovec
*iov
,
220 NetL2TPV3State
*s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
222 struct msghdr message
;
225 if (iovcnt
> MAX_L2TPV3_IOVCNT
- 1) {
227 "iovec too long %d > %d, change l2tpv3.h",
228 iovcnt
, MAX_L2TPV3_IOVCNT
232 l2tpv3_form_header(s
);
233 memcpy(s
->vec
+ 1, iov
, iovcnt
* sizeof(struct iovec
));
234 s
->vec
->iov_base
= s
->header_buf
;
235 s
->vec
->iov_len
= s
->offset
;
236 message
.msg_name
= s
->dgram_dst
;
237 message
.msg_namelen
= s
->dst_size
;
238 message
.msg_iov
= s
->vec
;
239 message
.msg_iovlen
= iovcnt
+ 1;
240 message
.msg_control
= NULL
;
241 message
.msg_controllen
= 0;
242 message
.msg_flags
= 0;
244 ret
= sendmsg(s
->fd
, &message
, 0);
245 } while ((ret
== -1) && (errno
== EINTR
));
248 } else if (ret
== 0) {
249 /* belt and braces - should not occur on DGRAM
250 * we should get an error and never a 0 send
252 ret
= iov_size(iov
, iovcnt
);
254 /* signal upper layer that socket buffer is full */
256 if (ret
== -EAGAIN
|| ret
== -ENOBUFS
) {
257 l2tpv3_write_poll(s
, true);
264 static ssize_t
net_l2tpv3_receive_dgram(NetClientState
*nc
,
268 NetL2TPV3State
*s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
271 struct msghdr message
;
274 l2tpv3_form_header(s
);
276 vec
->iov_base
= s
->header_buf
;
277 vec
->iov_len
= s
->offset
;
279 vec
->iov_base
= (void *) buf
;
281 message
.msg_name
= s
->dgram_dst
;
282 message
.msg_namelen
= s
->dst_size
;
283 message
.msg_iov
= s
->vec
;
284 message
.msg_iovlen
= 2;
285 message
.msg_control
= NULL
;
286 message
.msg_controllen
= 0;
287 message
.msg_flags
= 0;
289 ret
= sendmsg(s
->fd
, &message
, 0);
290 } while ((ret
== -1) && (errno
== EINTR
));
293 } else if (ret
== 0) {
294 /* belt and braces - should not occur on DGRAM
295 * we should get an error and never a 0 send
300 if (ret
== -EAGAIN
|| ret
== -ENOBUFS
) {
301 /* signal upper layer that socket buffer is full */
302 l2tpv3_write_poll(s
, true);
309 static int l2tpv3_verify_header(NetL2TPV3State
*s
, uint8_t *buf
)
315 if ((!s
->udp
) && (!s
->ipv6
)) {
316 buf
+= sizeof(struct iphdr
) /* fix for ipv4 raw */;
319 /* we do not do a strict check for "data" packets as per
320 * the RFC spec because the pure IP spec does not have
325 if (s
->cookie_is_64
) {
326 cookie
= ldq_be_p(buf
+ s
->cookie_offset
);
328 cookie
= ldl_be_p(buf
+ s
->cookie_offset
) & 0xffffffffULL
;
330 if (cookie
!= s
->rx_cookie
) {
331 if (!s
->header_mismatch
) {
332 error_report("unknown cookie id");
337 session
= (uint32_t *) (buf
+ s
->session_offset
);
338 if (ldl_be_p(session
) != s
->rx_session
) {
339 if (!s
->header_mismatch
) {
340 error_report("session mismatch");
347 static void net_l2tpv3_process_queue(NetL2TPV3State
*s
)
353 struct mmsghdr
*msgvec
;
355 /* go into ring mode only if there is a "pending" tail */
356 if (s
->queue_depth
> 0) {
358 msgvec
= s
->msgvec
+ s
->queue_tail
;
359 if (msgvec
->msg_len
> 0) {
360 data_size
= msgvec
->msg_len
- s
->header_size
;
361 vec
= msgvec
->msg_hdr
.msg_iov
;
362 if ((data_size
> 0) &&
363 (l2tpv3_verify_header(s
, vec
->iov_base
) == 0)) {
365 /* Use the legacy delivery for now, we will
366 * switch to using our own ring as a queueing mechanism
369 size
= qemu_send_packet_async(
373 l2tpv3_send_completed
376 l2tpv3_read_poll(s
, false);
381 if (!s
->header_mismatch
) {
382 /* report error only once */
383 error_report("l2tpv3 header verification failed");
384 s
->header_mismatch
= true;
390 s
->queue_tail
= (s
->queue_tail
+ 1) % MAX_L2TPV3_MSGCNT
;
393 (s
->queue_depth
> 0) &&
394 qemu_can_send_packet(&s
->nc
) &&
395 ((size
> 0) || bad_read
)
400 static void net_l2tpv3_send(void *opaque
)
402 NetL2TPV3State
*s
= opaque
;
403 int target_count
, count
;
404 struct mmsghdr
*msgvec
;
406 /* go into ring mode only if there is a "pending" tail */
408 if (s
->queue_depth
) {
410 /* The ring buffer we use has variable intake
411 * count of how much we can read varies - adjust accordingly
414 target_count
= MAX_L2TPV3_MSGCNT
- s
->queue_depth
;
416 /* Ensure we do not overrun the ring when we have
417 * a lot of enqueued packets
420 if (s
->queue_head
+ target_count
> MAX_L2TPV3_MSGCNT
) {
421 target_count
= MAX_L2TPV3_MSGCNT
- s
->queue_head
;
425 /* we do not have any pending packets - we can use
426 * the whole message vector linearly instead of using
432 target_count
= MAX_L2TPV3_MSGCNT
;
435 msgvec
= s
->msgvec
+ s
->queue_head
;
436 if (target_count
> 0) {
441 target_count
, MSG_DONTWAIT
, NULL
);
442 } while ((count
== -1) && (errno
== EINTR
));
444 /* Recv error - we still need to flush packets here,
445 * (re)set queue head to current position
449 s
->queue_head
= (s
->queue_head
+ count
) % MAX_L2TPV3_MSGCNT
;
450 s
->queue_depth
+= count
;
452 net_l2tpv3_process_queue(s
);
455 static void destroy_vector(struct mmsghdr
*msgvec
, int count
, int iovcount
)
459 struct mmsghdr
*cleanup
= msgvec
;
461 for (i
= 0; i
< count
; i
++) {
462 if (cleanup
->msg_hdr
.msg_iov
) {
463 iov
= cleanup
->msg_hdr
.msg_iov
;
464 for (j
= 0; j
< iovcount
; j
++) {
465 g_free(iov
->iov_base
);
468 g_free(cleanup
->msg_hdr
.msg_iov
);
476 static struct mmsghdr
*build_l2tpv3_vector(NetL2TPV3State
*s
, int count
)
480 struct mmsghdr
*msgvec
, *result
;
482 msgvec
= g_new(struct mmsghdr
, count
);
484 for (i
= 0; i
< count
; i
++) {
485 msgvec
->msg_hdr
.msg_name
= NULL
;
486 msgvec
->msg_hdr
.msg_namelen
= 0;
487 iov
= g_new(struct iovec
, IOVSIZE
);
488 msgvec
->msg_hdr
.msg_iov
= iov
;
489 iov
->iov_base
= g_malloc(s
->header_size
);
490 iov
->iov_len
= s
->header_size
;
492 iov
->iov_base
= qemu_memalign(BUFFER_ALIGN
, BUFFER_SIZE
);
493 iov
->iov_len
= BUFFER_SIZE
;
494 msgvec
->msg_hdr
.msg_iovlen
= 2;
495 msgvec
->msg_hdr
.msg_control
= NULL
;
496 msgvec
->msg_hdr
.msg_controllen
= 0;
497 msgvec
->msg_hdr
.msg_flags
= 0;
503 static void net_l2tpv3_cleanup(NetClientState
*nc
)
505 NetL2TPV3State
*s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
506 qemu_purge_queued_packets(nc
);
507 l2tpv3_read_poll(s
, false);
508 l2tpv3_write_poll(s
, false);
512 destroy_vector(s
->msgvec
, MAX_L2TPV3_MSGCNT
, IOVSIZE
);
514 g_free(s
->header_buf
);
515 g_free(s
->dgram_dst
);
518 static NetClientInfo net_l2tpv3_info
= {
519 .type
= NET_CLIENT_DRIVER_L2TPV3
,
520 .size
= sizeof(NetL2TPV3State
),
521 .receive
= net_l2tpv3_receive_dgram
,
522 .receive_iov
= net_l2tpv3_receive_dgram_iov
,
524 .cleanup
= net_l2tpv3_cleanup
,
527 int net_init_l2tpv3(const Netdev
*netdev
,
529 NetClientState
*peer
, Error
**errp
)
531 const NetdevL2TPv3Options
*l2tpv3
;
535 struct addrinfo hints
;
536 struct addrinfo
*result
= NULL
;
537 char *srcport
, *dstport
;
539 nc
= qemu_new_net_client(&net_l2tpv3_info
, peer
, "l2tpv3", name
);
541 s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
545 s
->header_mismatch
= false;
547 assert(netdev
->type
== NET_CLIENT_DRIVER_L2TPV3
);
548 l2tpv3
= &netdev
->u
.l2tpv3
;
550 if (l2tpv3
->has_ipv6
&& l2tpv3
->ipv6
) {
551 s
->ipv6
= l2tpv3
->ipv6
;
556 if ((l2tpv3
->has_offset
) && (l2tpv3
->offset
> 256)) {
557 error_setg(errp
, "offset must be less than 256 bytes");
561 if (l2tpv3
->has_rxcookie
|| l2tpv3
->has_txcookie
) {
562 if (l2tpv3
->has_rxcookie
&& l2tpv3
->has_txcookie
) {
566 "require both 'rxcookie' and 'txcookie' or neither");
573 if (l2tpv3
->has_cookie64
|| l2tpv3
->cookie64
) {
574 s
->cookie_is_64
= true;
576 s
->cookie_is_64
= false;
579 if (l2tpv3
->has_udp
&& l2tpv3
->udp
) {
581 if (!(l2tpv3
->has_srcport
&& l2tpv3
->has_dstport
)) {
582 error_setg(errp
, "need both src and dst port for udp");
585 srcport
= l2tpv3
->srcport
;
586 dstport
= l2tpv3
->dstport
;
596 s
->session_offset
= 0;
597 s
->cookie_offset
= 4;
598 s
->counter_offset
= 4;
600 s
->tx_session
= l2tpv3
->txsession
;
601 if (l2tpv3
->has_rxsession
) {
602 s
->rx_session
= l2tpv3
->rxsession
;
604 s
->rx_session
= s
->tx_session
;
608 s
->rx_cookie
= l2tpv3
->rxcookie
;
609 s
->tx_cookie
= l2tpv3
->txcookie
;
610 if (s
->cookie_is_64
== true) {
613 s
->counter_offset
+= 8;
617 s
->counter_offset
+= 4;
621 memset(&hints
, 0, sizeof(hints
));
624 hints
.ai_family
= AF_INET6
;
626 hints
.ai_family
= AF_INET
;
629 hints
.ai_socktype
= SOCK_DGRAM
;
630 hints
.ai_protocol
= 0;
632 s
->counter_offset
+= 4;
633 s
->session_offset
+= 4;
634 s
->cookie_offset
+= 4;
636 hints
.ai_socktype
= SOCK_RAW
;
637 hints
.ai_protocol
= IPPROTO_L2TP
;
640 gairet
= getaddrinfo(l2tpv3
->src
, srcport
, &hints
, &result
);
642 if ((gairet
!= 0) || (result
== NULL
)) {
643 error_setg(errp
, "could not resolve src, errno = %s",
644 gai_strerror(gairet
));
647 fd
= socket(result
->ai_family
, result
->ai_socktype
, result
->ai_protocol
);
650 error_setg(errp
, "socket creation failed, errno = %d",
654 if (bind(fd
, (struct sockaddr
*) result
->ai_addr
, result
->ai_addrlen
)) {
655 error_setg(errp
, "could not bind socket err=%i", errno
);
659 freeaddrinfo(result
);
661 memset(&hints
, 0, sizeof(hints
));
664 hints
.ai_family
= AF_INET6
;
666 hints
.ai_family
= AF_INET
;
669 hints
.ai_socktype
= SOCK_DGRAM
;
670 hints
.ai_protocol
= 0;
672 hints
.ai_socktype
= SOCK_RAW
;
673 hints
.ai_protocol
= IPPROTO_L2TP
;
677 gairet
= getaddrinfo(l2tpv3
->dst
, dstport
, &hints
, &result
);
678 if ((gairet
!= 0) || (result
== NULL
)) {
679 error_setg(errp
, "could not resolve dst, error = %s",
680 gai_strerror(gairet
));
684 s
->dgram_dst
= g_new0(struct sockaddr_storage
, 1);
685 memcpy(s
->dgram_dst
, result
->ai_addr
, result
->ai_addrlen
);
686 s
->dst_size
= result
->ai_addrlen
;
688 freeaddrinfo(result
);
690 if (l2tpv3
->has_counter
&& l2tpv3
->counter
) {
691 s
->has_counter
= true;
694 s
->has_counter
= false;
697 if (l2tpv3
->has_pincounter
&& l2tpv3
->pincounter
) {
698 s
->has_counter
= true; /* pin counter implies that there is counter */
699 s
->pin_counter
= true;
701 s
->pin_counter
= false;
704 if (l2tpv3
->has_offset
) {
706 s
->offset
+= l2tpv3
->offset
;
709 if ((s
->ipv6
) || (s
->udp
)) {
710 s
->header_size
= s
->offset
;
712 s
->header_size
= s
->offset
+ sizeof(struct iphdr
);
715 s
->msgvec
= build_l2tpv3_vector(s
, MAX_L2TPV3_MSGCNT
);
716 s
->vec
= g_new(struct iovec
, MAX_L2TPV3_IOVCNT
);
717 s
->header_buf
= g_malloc(s
->header_size
);
719 qemu_set_nonblock(fd
);
724 l2tpv3_read_poll(s
, true);
726 snprintf(s
->nc
.info_str
, sizeof(s
->nc
.info_str
),
727 "l2tpv3: connected");
730 qemu_del_net_client(nc
);
735 freeaddrinfo(result
);