4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2012-2014 Cisco Systems
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 #include "qemu/osdep.h"
31 #include "qapi/error.h"
32 #include "qemu/error-report.h"
33 #include "qemu/option.h"
34 #include "qemu/sockets.h"
36 #include "qemu/main-loop.h"
37 #include "qemu/memalign.h"
39 /* The buffer size needs to be investigated for optimum numbers and
40 * optimum means of paging in on different systems. This size is
41 * chosen to be sufficient to accommodate one packet with some headers
44 #define BUFFER_ALIGN sysconf(_SC_PAGESIZE)
45 #define BUFFER_SIZE 16384
47 #define MAX_L2TPV3_MSGCNT 64
48 #define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE)
50 /* Header set to 0x30000 signifies a data packet */
52 #define L2TPV3_DATA_PACKET 0x30000
54 /* IANA-assigned IP protocol ID for L2TPv3 */
57 #define IPPROTO_L2TP 0x73
60 typedef struct NetL2TPV3State
{
65 * these are used for xmit - that happens packet a time
66 * and for first sign of life packet (easier to parse that once)
73 * these are used for receive - try to "eat" up to 32 packets at a time
76 struct mmsghdr
*msgvec
;
82 struct sockaddr_storage
*dgram_dst
;
97 * DOS avoidance in error handling
100 bool header_mismatch
;
103 * Ring buffer handling
111 * Precomputed offsets
115 uint32_t cookie_offset
;
116 uint32_t counter_offset
;
117 uint32_t session_offset
;
135 static void net_l2tpv3_send(void *opaque
);
136 static void l2tpv3_writable(void *opaque
);
138 static void l2tpv3_update_fd_handler(NetL2TPV3State
*s
)
140 qemu_set_fd_handler(s
->fd
,
141 s
->read_poll
? net_l2tpv3_send
: NULL
,
142 s
->write_poll
? l2tpv3_writable
: NULL
,
146 static void l2tpv3_read_poll(NetL2TPV3State
*s
, bool enable
)
148 if (s
->read_poll
!= enable
) {
149 s
->read_poll
= enable
;
150 l2tpv3_update_fd_handler(s
);
154 static void l2tpv3_write_poll(NetL2TPV3State
*s
, bool enable
)
156 if (s
->write_poll
!= enable
) {
157 s
->write_poll
= enable
;
158 l2tpv3_update_fd_handler(s
);
162 static void l2tpv3_writable(void *opaque
)
164 NetL2TPV3State
*s
= opaque
;
165 l2tpv3_write_poll(s
, false);
166 qemu_flush_queued_packets(&s
->nc
);
169 static void l2tpv3_send_completed(NetClientState
*nc
, ssize_t len
)
171 NetL2TPV3State
*s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
172 l2tpv3_read_poll(s
, true);
175 static void l2tpv3_poll(NetClientState
*nc
, bool enable
)
177 NetL2TPV3State
*s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
178 l2tpv3_write_poll(s
, enable
);
179 l2tpv3_read_poll(s
, enable
);
182 static void l2tpv3_form_header(NetL2TPV3State
*s
)
187 stl_be_p((uint32_t *) s
->header_buf
, L2TPV3_DATA_PACKET
);
190 (uint32_t *) (s
->header_buf
+ s
->session_offset
),
194 if (s
->cookie_is_64
) {
196 (uint64_t *)(s
->header_buf
+ s
->cookie_offset
),
201 (uint32_t *) (s
->header_buf
+ s
->cookie_offset
),
206 if (s
->has_counter
) {
207 counter
= (uint32_t *)(s
->header_buf
+ s
->counter_offset
);
208 if (s
->pin_counter
) {
211 stl_be_p(counter
, ++s
->counter
);
216 static ssize_t
net_l2tpv3_receive_dgram_iov(NetClientState
*nc
,
217 const struct iovec
*iov
,
220 NetL2TPV3State
*s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
222 struct msghdr message
;
225 if (iovcnt
> MAX_L2TPV3_IOVCNT
- 1) {
227 "iovec too long %d > %d, change l2tpv3.h",
228 iovcnt
, MAX_L2TPV3_IOVCNT
232 l2tpv3_form_header(s
);
233 memcpy(s
->vec
+ 1, iov
, iovcnt
* sizeof(struct iovec
));
234 s
->vec
->iov_base
= s
->header_buf
;
235 s
->vec
->iov_len
= s
->offset
;
236 message
.msg_name
= s
->dgram_dst
;
237 message
.msg_namelen
= s
->dst_size
;
238 message
.msg_iov
= s
->vec
;
239 message
.msg_iovlen
= iovcnt
+ 1;
240 message
.msg_control
= NULL
;
241 message
.msg_controllen
= 0;
242 message
.msg_flags
= 0;
243 ret
= RETRY_ON_EINTR(sendmsg(s
->fd
, &message
, 0));
246 } else if (ret
== 0) {
247 /* belt and braces - should not occur on DGRAM
248 * we should get an error and never a 0 send
250 ret
= iov_size(iov
, iovcnt
);
252 /* signal upper layer that socket buffer is full */
254 if (ret
== -EAGAIN
|| ret
== -ENOBUFS
) {
255 l2tpv3_write_poll(s
, true);
262 static ssize_t
net_l2tpv3_receive_dgram(NetClientState
*nc
,
266 NetL2TPV3State
*s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
269 struct msghdr message
;
272 l2tpv3_form_header(s
);
274 vec
->iov_base
= s
->header_buf
;
275 vec
->iov_len
= s
->offset
;
277 vec
->iov_base
= (void *) buf
;
279 message
.msg_name
= s
->dgram_dst
;
280 message
.msg_namelen
= s
->dst_size
;
281 message
.msg_iov
= s
->vec
;
282 message
.msg_iovlen
= 2;
283 message
.msg_control
= NULL
;
284 message
.msg_controllen
= 0;
285 message
.msg_flags
= 0;
286 ret
= RETRY_ON_EINTR(sendmsg(s
->fd
, &message
, 0));
289 } else if (ret
== 0) {
290 /* belt and braces - should not occur on DGRAM
291 * we should get an error and never a 0 send
296 if (ret
== -EAGAIN
|| ret
== -ENOBUFS
) {
297 /* signal upper layer that socket buffer is full */
298 l2tpv3_write_poll(s
, true);
305 static int l2tpv3_verify_header(NetL2TPV3State
*s
, uint8_t *buf
)
311 if ((!s
->udp
) && (!s
->ipv6
)) {
312 buf
+= sizeof(struct iphdr
) /* fix for ipv4 raw */;
315 /* we do not do a strict check for "data" packets as per
316 * the RFC spec because the pure IP spec does not have
321 if (s
->cookie_is_64
) {
322 cookie
= ldq_be_p(buf
+ s
->cookie_offset
);
324 cookie
= ldl_be_p(buf
+ s
->cookie_offset
) & 0xffffffffULL
;
326 if (cookie
!= s
->rx_cookie
) {
327 if (!s
->header_mismatch
) {
328 error_report("unknown cookie id");
333 session
= (uint32_t *) (buf
+ s
->session_offset
);
334 if (ldl_be_p(session
) != s
->rx_session
) {
335 if (!s
->header_mismatch
) {
336 error_report("session mismatch");
343 static void net_l2tpv3_process_queue(NetL2TPV3State
*s
)
349 struct mmsghdr
*msgvec
;
351 /* go into ring mode only if there is a "pending" tail */
352 if (s
->queue_depth
> 0) {
354 msgvec
= s
->msgvec
+ s
->queue_tail
;
355 if (msgvec
->msg_len
> 0) {
356 data_size
= msgvec
->msg_len
- s
->header_size
;
357 vec
= msgvec
->msg_hdr
.msg_iov
;
358 if ((data_size
> 0) &&
359 (l2tpv3_verify_header(s
, vec
->iov_base
) == 0)) {
361 /* Use the legacy delivery for now, we will
362 * switch to using our own ring as a queueing mechanism
365 size
= qemu_send_packet_async(
369 l2tpv3_send_completed
372 l2tpv3_read_poll(s
, false);
377 if (!s
->header_mismatch
) {
378 /* report error only once */
379 error_report("l2tpv3 header verification failed");
380 s
->header_mismatch
= true;
386 s
->queue_tail
= (s
->queue_tail
+ 1) % MAX_L2TPV3_MSGCNT
;
389 (s
->queue_depth
> 0) &&
390 qemu_can_send_packet(&s
->nc
) &&
391 ((size
> 0) || bad_read
)
396 static void net_l2tpv3_send(void *opaque
)
398 NetL2TPV3State
*s
= opaque
;
399 int target_count
, count
;
400 struct mmsghdr
*msgvec
;
402 /* go into ring mode only if there is a "pending" tail */
404 if (s
->queue_depth
) {
406 /* The ring buffer we use has variable intake
407 * count of how much we can read varies - adjust accordingly
410 target_count
= MAX_L2TPV3_MSGCNT
- s
->queue_depth
;
412 /* Ensure we do not overrun the ring when we have
413 * a lot of enqueued packets
416 if (s
->queue_head
+ target_count
> MAX_L2TPV3_MSGCNT
) {
417 target_count
= MAX_L2TPV3_MSGCNT
- s
->queue_head
;
421 /* we do not have any pending packets - we can use
422 * the whole message vector linearly instead of using
428 target_count
= MAX_L2TPV3_MSGCNT
;
431 msgvec
= s
->msgvec
+ s
->queue_head
;
432 if (target_count
> 0) {
433 count
= RETRY_ON_EINTR(
434 recvmmsg(s
->fd
, msgvec
, target_count
, MSG_DONTWAIT
, NULL
)
437 /* Recv error - we still need to flush packets here,
438 * (re)set queue head to current position
442 s
->queue_head
= (s
->queue_head
+ count
) % MAX_L2TPV3_MSGCNT
;
443 s
->queue_depth
+= count
;
445 net_l2tpv3_process_queue(s
);
448 static void destroy_vector(struct mmsghdr
*msgvec
, int count
, int iovcount
)
452 struct mmsghdr
*cleanup
= msgvec
;
454 for (i
= 0; i
< count
; i
++) {
455 if (cleanup
->msg_hdr
.msg_iov
) {
456 iov
= cleanup
->msg_hdr
.msg_iov
;
457 for (j
= 0; j
< iovcount
; j
++) {
458 g_free(iov
->iov_base
);
461 g_free(cleanup
->msg_hdr
.msg_iov
);
469 static struct mmsghdr
*build_l2tpv3_vector(NetL2TPV3State
*s
, int count
)
473 struct mmsghdr
*msgvec
, *result
;
475 msgvec
= g_new(struct mmsghdr
, count
);
477 for (i
= 0; i
< count
; i
++) {
478 msgvec
->msg_hdr
.msg_name
= NULL
;
479 msgvec
->msg_hdr
.msg_namelen
= 0;
480 iov
= g_new(struct iovec
, IOVSIZE
);
481 msgvec
->msg_hdr
.msg_iov
= iov
;
482 iov
->iov_base
= g_malloc(s
->header_size
);
483 iov
->iov_len
= s
->header_size
;
485 iov
->iov_base
= qemu_memalign(BUFFER_ALIGN
, BUFFER_SIZE
);
486 iov
->iov_len
= BUFFER_SIZE
;
487 msgvec
->msg_hdr
.msg_iovlen
= 2;
488 msgvec
->msg_hdr
.msg_control
= NULL
;
489 msgvec
->msg_hdr
.msg_controllen
= 0;
490 msgvec
->msg_hdr
.msg_flags
= 0;
496 static void net_l2tpv3_cleanup(NetClientState
*nc
)
498 NetL2TPV3State
*s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
499 qemu_purge_queued_packets(nc
);
500 l2tpv3_read_poll(s
, false);
501 l2tpv3_write_poll(s
, false);
505 destroy_vector(s
->msgvec
, MAX_L2TPV3_MSGCNT
, IOVSIZE
);
507 g_free(s
->header_buf
);
508 g_free(s
->dgram_dst
);
511 static NetClientInfo net_l2tpv3_info
= {
512 .type
= NET_CLIENT_DRIVER_L2TPV3
,
513 .size
= sizeof(NetL2TPV3State
),
514 .receive
= net_l2tpv3_receive_dgram
,
515 .receive_iov
= net_l2tpv3_receive_dgram_iov
,
517 .cleanup
= net_l2tpv3_cleanup
,
520 int net_init_l2tpv3(const Netdev
*netdev
,
522 NetClientState
*peer
, Error
**errp
)
524 const NetdevL2TPv3Options
*l2tpv3
;
528 struct addrinfo hints
;
529 struct addrinfo
*result
= NULL
;
530 char *srcport
, *dstport
;
532 nc
= qemu_new_net_client(&net_l2tpv3_info
, peer
, "l2tpv3", name
);
534 s
= DO_UPCAST(NetL2TPV3State
, nc
, nc
);
538 s
->header_mismatch
= false;
540 assert(netdev
->type
== NET_CLIENT_DRIVER_L2TPV3
);
541 l2tpv3
= &netdev
->u
.l2tpv3
;
543 if (l2tpv3
->has_ipv6
&& l2tpv3
->ipv6
) {
544 s
->ipv6
= l2tpv3
->ipv6
;
549 if ((l2tpv3
->has_offset
) && (l2tpv3
->offset
> 256)) {
550 error_setg(errp
, "offset must be less than 256 bytes");
554 if (l2tpv3
->has_rxcookie
|| l2tpv3
->has_txcookie
) {
555 if (l2tpv3
->has_rxcookie
&& l2tpv3
->has_txcookie
) {
559 "require both 'rxcookie' and 'txcookie' or neither");
566 if (l2tpv3
->has_cookie64
|| l2tpv3
->cookie64
) {
567 s
->cookie_is_64
= true;
569 s
->cookie_is_64
= false;
572 if (l2tpv3
->has_udp
&& l2tpv3
->udp
) {
574 if (!(l2tpv3
->srcport
&& l2tpv3
->dstport
)) {
575 error_setg(errp
, "need both src and dst port for udp");
578 srcport
= l2tpv3
->srcport
;
579 dstport
= l2tpv3
->dstport
;
589 s
->session_offset
= 0;
590 s
->cookie_offset
= 4;
591 s
->counter_offset
= 4;
593 s
->tx_session
= l2tpv3
->txsession
;
594 if (l2tpv3
->has_rxsession
) {
595 s
->rx_session
= l2tpv3
->rxsession
;
597 s
->rx_session
= s
->tx_session
;
601 s
->rx_cookie
= l2tpv3
->rxcookie
;
602 s
->tx_cookie
= l2tpv3
->txcookie
;
603 if (s
->cookie_is_64
== true) {
606 s
->counter_offset
+= 8;
610 s
->counter_offset
+= 4;
614 memset(&hints
, 0, sizeof(hints
));
617 hints
.ai_family
= AF_INET6
;
619 hints
.ai_family
= AF_INET
;
622 hints
.ai_socktype
= SOCK_DGRAM
;
623 hints
.ai_protocol
= 0;
625 s
->counter_offset
+= 4;
626 s
->session_offset
+= 4;
627 s
->cookie_offset
+= 4;
629 hints
.ai_socktype
= SOCK_RAW
;
630 hints
.ai_protocol
= IPPROTO_L2TP
;
633 gairet
= getaddrinfo(l2tpv3
->src
, srcport
, &hints
, &result
);
635 if ((gairet
!= 0) || (result
== NULL
)) {
636 error_setg(errp
, "could not resolve src, errno = %s",
637 gai_strerror(gairet
));
640 fd
= socket(result
->ai_family
, result
->ai_socktype
, result
->ai_protocol
);
643 error_setg(errp
, "socket creation failed, errno = %d",
647 if (bind(fd
, (struct sockaddr
*) result
->ai_addr
, result
->ai_addrlen
)) {
648 error_setg(errp
, "could not bind socket err=%i", errno
);
652 freeaddrinfo(result
);
654 memset(&hints
, 0, sizeof(hints
));
657 hints
.ai_family
= AF_INET6
;
659 hints
.ai_family
= AF_INET
;
662 hints
.ai_socktype
= SOCK_DGRAM
;
663 hints
.ai_protocol
= 0;
665 hints
.ai_socktype
= SOCK_RAW
;
666 hints
.ai_protocol
= IPPROTO_L2TP
;
670 gairet
= getaddrinfo(l2tpv3
->dst
, dstport
, &hints
, &result
);
671 if ((gairet
!= 0) || (result
== NULL
)) {
672 error_setg(errp
, "could not resolve dst, error = %s",
673 gai_strerror(gairet
));
677 s
->dgram_dst
= g_new0(struct sockaddr_storage
, 1);
678 memcpy(s
->dgram_dst
, result
->ai_addr
, result
->ai_addrlen
);
679 s
->dst_size
= result
->ai_addrlen
;
681 freeaddrinfo(result
);
683 if (l2tpv3
->has_counter
&& l2tpv3
->counter
) {
684 s
->has_counter
= true;
687 s
->has_counter
= false;
690 if (l2tpv3
->has_pincounter
&& l2tpv3
->pincounter
) {
691 s
->has_counter
= true; /* pin counter implies that there is counter */
692 s
->pin_counter
= true;
694 s
->pin_counter
= false;
697 if (l2tpv3
->has_offset
) {
699 s
->offset
+= l2tpv3
->offset
;
702 if ((s
->ipv6
) || (s
->udp
)) {
703 s
->header_size
= s
->offset
;
705 s
->header_size
= s
->offset
+ sizeof(struct iphdr
);
708 s
->msgvec
= build_l2tpv3_vector(s
, MAX_L2TPV3_MSGCNT
);
709 s
->vec
= g_new(struct iovec
, MAX_L2TPV3_IOVCNT
);
710 s
->header_buf
= g_malloc(s
->header_size
);
712 qemu_socket_set_nonblock(fd
);
717 l2tpv3_read_poll(s
, true);
719 qemu_set_info_str(&s
->nc
, "l2tpv3: connected");
722 qemu_del_net_client(nc
);
727 freeaddrinfo(result
);