migration/rdma: Clean up qemu_rdma_poll()'s return type
[qemu/kevin.git] / net / l2tpv3.c
blobb5547cb917af98484f332db80462f9e2f9f411b4
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2012-2014 Cisco Systems
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
26 #include "qemu/osdep.h"
27 #include <linux/ip.h>
28 #include <netdb.h>
29 #include "net/net.h"
30 #include "clients.h"
31 #include "qapi/error.h"
32 #include "qemu/error-report.h"
33 #include "qemu/option.h"
34 #include "qemu/sockets.h"
35 #include "qemu/iov.h"
36 #include "qemu/main-loop.h"
37 #include "qemu/memalign.h"
39 /* The buffer size needs to be investigated for optimum numbers and
40 * optimum means of paging in on different systems. This size is
41 * chosen to be sufficient to accommodate one packet with some headers
44 #define BUFFER_ALIGN sysconf(_SC_PAGESIZE)
45 #define BUFFER_SIZE 16384
46 #define IOVSIZE 2
47 #define MAX_L2TPV3_MSGCNT 64
48 #define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE)
50 /* Header set to 0x30000 signifies a data packet */
52 #define L2TPV3_DATA_PACKET 0x30000
54 /* IANA-assigned IP protocol ID for L2TPv3 */
56 #ifndef IPPROTO_L2TP
57 #define IPPROTO_L2TP 0x73
58 #endif
60 typedef struct NetL2TPV3State {
61 NetClientState nc;
62 int fd;
65 * these are used for xmit - that happens packet a time
66 * and for first sign of life packet (easier to parse that once)
69 uint8_t *header_buf;
70 struct iovec *vec;
73 * these are used for receive - try to "eat" up to 32 packets at a time
76 struct mmsghdr *msgvec;
79 * peer address
82 struct sockaddr_storage *dgram_dst;
83 uint32_t dst_size;
86 * L2TPv3 parameters
89 uint64_t rx_cookie;
90 uint64_t tx_cookie;
91 uint32_t rx_session;
92 uint32_t tx_session;
93 uint32_t header_size;
94 uint32_t counter;
97 * DOS avoidance in error handling
100 bool header_mismatch;
103 * Ring buffer handling
106 int queue_head;
107 int queue_tail;
108 int queue_depth;
111 * Precomputed offsets
114 uint32_t offset;
115 uint32_t cookie_offset;
116 uint32_t counter_offset;
117 uint32_t session_offset;
119 /* Poll Control */
121 bool read_poll;
122 bool write_poll;
124 /* Flags */
126 bool ipv6;
127 bool udp;
128 bool has_counter;
129 bool pin_counter;
130 bool cookie;
131 bool cookie_is_64;
133 } NetL2TPV3State;
135 static void net_l2tpv3_send(void *opaque);
136 static void l2tpv3_writable(void *opaque);
138 static void l2tpv3_update_fd_handler(NetL2TPV3State *s)
140 qemu_set_fd_handler(s->fd,
141 s->read_poll ? net_l2tpv3_send : NULL,
142 s->write_poll ? l2tpv3_writable : NULL,
146 static void l2tpv3_read_poll(NetL2TPV3State *s, bool enable)
148 if (s->read_poll != enable) {
149 s->read_poll = enable;
150 l2tpv3_update_fd_handler(s);
154 static void l2tpv3_write_poll(NetL2TPV3State *s, bool enable)
156 if (s->write_poll != enable) {
157 s->write_poll = enable;
158 l2tpv3_update_fd_handler(s);
162 static void l2tpv3_writable(void *opaque)
164 NetL2TPV3State *s = opaque;
165 l2tpv3_write_poll(s, false);
166 qemu_flush_queued_packets(&s->nc);
169 static void l2tpv3_send_completed(NetClientState *nc, ssize_t len)
171 NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
172 l2tpv3_read_poll(s, true);
175 static void l2tpv3_poll(NetClientState *nc, bool enable)
177 NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
178 l2tpv3_write_poll(s, enable);
179 l2tpv3_read_poll(s, enable);
182 static void l2tpv3_form_header(NetL2TPV3State *s)
184 uint32_t *counter;
186 if (s->udp) {
187 stl_be_p((uint32_t *) s->header_buf, L2TPV3_DATA_PACKET);
189 stl_be_p(
190 (uint32_t *) (s->header_buf + s->session_offset),
191 s->tx_session
193 if (s->cookie) {
194 if (s->cookie_is_64) {
195 stq_be_p(
196 (uint64_t *)(s->header_buf + s->cookie_offset),
197 s->tx_cookie
199 } else {
200 stl_be_p(
201 (uint32_t *) (s->header_buf + s->cookie_offset),
202 s->tx_cookie
206 if (s->has_counter) {
207 counter = (uint32_t *)(s->header_buf + s->counter_offset);
208 if (s->pin_counter) {
209 *counter = 0;
210 } else {
211 stl_be_p(counter, ++s->counter);
216 static ssize_t net_l2tpv3_receive_dgram_iov(NetClientState *nc,
217 const struct iovec *iov,
218 int iovcnt)
220 NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
222 struct msghdr message;
223 int ret;
225 if (iovcnt > MAX_L2TPV3_IOVCNT - 1) {
226 error_report(
227 "iovec too long %d > %d, change l2tpv3.h",
228 iovcnt, MAX_L2TPV3_IOVCNT
230 return -1;
232 l2tpv3_form_header(s);
233 memcpy(s->vec + 1, iov, iovcnt * sizeof(struct iovec));
234 s->vec->iov_base = s->header_buf;
235 s->vec->iov_len = s->offset;
236 message.msg_name = s->dgram_dst;
237 message.msg_namelen = s->dst_size;
238 message.msg_iov = s->vec;
239 message.msg_iovlen = iovcnt + 1;
240 message.msg_control = NULL;
241 message.msg_controllen = 0;
242 message.msg_flags = 0;
243 ret = RETRY_ON_EINTR(sendmsg(s->fd, &message, 0));
244 if (ret > 0) {
245 ret -= s->offset;
246 } else if (ret == 0) {
247 /* belt and braces - should not occur on DGRAM
248 * we should get an error and never a 0 send
250 ret = iov_size(iov, iovcnt);
251 } else {
252 /* signal upper layer that socket buffer is full */
253 ret = -errno;
254 if (ret == -EAGAIN || ret == -ENOBUFS) {
255 l2tpv3_write_poll(s, true);
256 ret = 0;
259 return ret;
262 static ssize_t net_l2tpv3_receive_dgram(NetClientState *nc,
263 const uint8_t *buf,
264 size_t size)
266 NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
268 struct iovec *vec;
269 struct msghdr message;
270 ssize_t ret = 0;
272 l2tpv3_form_header(s);
273 vec = s->vec;
274 vec->iov_base = s->header_buf;
275 vec->iov_len = s->offset;
276 vec++;
277 vec->iov_base = (void *) buf;
278 vec->iov_len = size;
279 message.msg_name = s->dgram_dst;
280 message.msg_namelen = s->dst_size;
281 message.msg_iov = s->vec;
282 message.msg_iovlen = 2;
283 message.msg_control = NULL;
284 message.msg_controllen = 0;
285 message.msg_flags = 0;
286 ret = RETRY_ON_EINTR(sendmsg(s->fd, &message, 0));
287 if (ret > 0) {
288 ret -= s->offset;
289 } else if (ret == 0) {
290 /* belt and braces - should not occur on DGRAM
291 * we should get an error and never a 0 send
293 ret = size;
294 } else {
295 ret = -errno;
296 if (ret == -EAGAIN || ret == -ENOBUFS) {
297 /* signal upper layer that socket buffer is full */
298 l2tpv3_write_poll(s, true);
299 ret = 0;
302 return ret;
305 static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf)
308 uint32_t *session;
309 uint64_t cookie;
311 if ((!s->udp) && (!s->ipv6)) {
312 buf += sizeof(struct iphdr) /* fix for ipv4 raw */;
315 /* we do not do a strict check for "data" packets as per
316 * the RFC spec because the pure IP spec does not have
317 * that anyway.
320 if (s->cookie) {
321 if (s->cookie_is_64) {
322 cookie = ldq_be_p(buf + s->cookie_offset);
323 } else {
324 cookie = ldl_be_p(buf + s->cookie_offset) & 0xffffffffULL;
326 if (cookie != s->rx_cookie) {
327 if (!s->header_mismatch) {
328 error_report("unknown cookie id");
330 return -1;
333 session = (uint32_t *) (buf + s->session_offset);
334 if (ldl_be_p(session) != s->rx_session) {
335 if (!s->header_mismatch) {
336 error_report("session mismatch");
338 return -1;
340 return 0;
343 static void net_l2tpv3_process_queue(NetL2TPV3State *s)
345 int size = 0;
346 struct iovec *vec;
347 bool bad_read;
348 int data_size;
349 struct mmsghdr *msgvec;
351 /* go into ring mode only if there is a "pending" tail */
352 if (s->queue_depth > 0) {
353 do {
354 msgvec = s->msgvec + s->queue_tail;
355 if (msgvec->msg_len > 0) {
356 data_size = msgvec->msg_len - s->header_size;
357 vec = msgvec->msg_hdr.msg_iov;
358 if ((data_size > 0) &&
359 (l2tpv3_verify_header(s, vec->iov_base) == 0)) {
360 vec++;
361 /* Use the legacy delivery for now, we will
362 * switch to using our own ring as a queueing mechanism
363 * at a later date
365 size = qemu_send_packet_async(
366 &s->nc,
367 vec->iov_base,
368 data_size,
369 l2tpv3_send_completed
371 if (size == 0) {
372 l2tpv3_read_poll(s, false);
374 bad_read = false;
375 } else {
376 bad_read = true;
377 if (!s->header_mismatch) {
378 /* report error only once */
379 error_report("l2tpv3 header verification failed");
380 s->header_mismatch = true;
383 } else {
384 bad_read = true;
386 s->queue_tail = (s->queue_tail + 1) % MAX_L2TPV3_MSGCNT;
387 s->queue_depth--;
388 } while (
389 (s->queue_depth > 0) &&
390 qemu_can_send_packet(&s->nc) &&
391 ((size > 0) || bad_read)
396 static void net_l2tpv3_send(void *opaque)
398 NetL2TPV3State *s = opaque;
399 int target_count, count;
400 struct mmsghdr *msgvec;
402 /* go into ring mode only if there is a "pending" tail */
404 if (s->queue_depth) {
406 /* The ring buffer we use has variable intake
407 * count of how much we can read varies - adjust accordingly
410 target_count = MAX_L2TPV3_MSGCNT - s->queue_depth;
412 /* Ensure we do not overrun the ring when we have
413 * a lot of enqueued packets
416 if (s->queue_head + target_count > MAX_L2TPV3_MSGCNT) {
417 target_count = MAX_L2TPV3_MSGCNT - s->queue_head;
419 } else {
421 /* we do not have any pending packets - we can use
422 * the whole message vector linearly instead of using
423 * it as a ring
426 s->queue_head = 0;
427 s->queue_tail = 0;
428 target_count = MAX_L2TPV3_MSGCNT;
431 msgvec = s->msgvec + s->queue_head;
432 if (target_count > 0) {
433 count = RETRY_ON_EINTR(
434 recvmmsg(s->fd, msgvec, target_count, MSG_DONTWAIT, NULL)
436 if (count < 0) {
437 /* Recv error - we still need to flush packets here,
438 * (re)set queue head to current position
440 count = 0;
442 s->queue_head = (s->queue_head + count) % MAX_L2TPV3_MSGCNT;
443 s->queue_depth += count;
445 net_l2tpv3_process_queue(s);
448 static void destroy_vector(struct mmsghdr *msgvec, int count, int iovcount)
450 int i, j;
451 struct iovec *iov;
452 struct mmsghdr *cleanup = msgvec;
453 if (cleanup) {
454 for (i = 0; i < count; i++) {
455 if (cleanup->msg_hdr.msg_iov) {
456 iov = cleanup->msg_hdr.msg_iov;
457 for (j = 0; j < iovcount; j++) {
458 g_free(iov->iov_base);
459 iov++;
461 g_free(cleanup->msg_hdr.msg_iov);
463 cleanup++;
465 g_free(msgvec);
469 static struct mmsghdr *build_l2tpv3_vector(NetL2TPV3State *s, int count)
471 int i;
472 struct iovec *iov;
473 struct mmsghdr *msgvec, *result;
475 msgvec = g_new(struct mmsghdr, count);
476 result = msgvec;
477 for (i = 0; i < count ; i++) {
478 msgvec->msg_hdr.msg_name = NULL;
479 msgvec->msg_hdr.msg_namelen = 0;
480 iov = g_new(struct iovec, IOVSIZE);
481 msgvec->msg_hdr.msg_iov = iov;
482 iov->iov_base = g_malloc(s->header_size);
483 iov->iov_len = s->header_size;
484 iov++ ;
485 iov->iov_base = qemu_memalign(BUFFER_ALIGN, BUFFER_SIZE);
486 iov->iov_len = BUFFER_SIZE;
487 msgvec->msg_hdr.msg_iovlen = 2;
488 msgvec->msg_hdr.msg_control = NULL;
489 msgvec->msg_hdr.msg_controllen = 0;
490 msgvec->msg_hdr.msg_flags = 0;
491 msgvec++;
493 return result;
496 static void net_l2tpv3_cleanup(NetClientState *nc)
498 NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
499 qemu_purge_queued_packets(nc);
500 l2tpv3_read_poll(s, false);
501 l2tpv3_write_poll(s, false);
502 if (s->fd >= 0) {
503 close(s->fd);
505 destroy_vector(s->msgvec, MAX_L2TPV3_MSGCNT, IOVSIZE);
506 g_free(s->vec);
507 g_free(s->header_buf);
508 g_free(s->dgram_dst);
511 static NetClientInfo net_l2tpv3_info = {
512 .type = NET_CLIENT_DRIVER_L2TPV3,
513 .size = sizeof(NetL2TPV3State),
514 .receive = net_l2tpv3_receive_dgram,
515 .receive_iov = net_l2tpv3_receive_dgram_iov,
516 .poll = l2tpv3_poll,
517 .cleanup = net_l2tpv3_cleanup,
520 int net_init_l2tpv3(const Netdev *netdev,
521 const char *name,
522 NetClientState *peer, Error **errp)
524 const NetdevL2TPv3Options *l2tpv3;
525 NetL2TPV3State *s;
526 NetClientState *nc;
527 int fd = -1, gairet;
528 struct addrinfo hints;
529 struct addrinfo *result = NULL;
530 char *srcport, *dstport;
532 nc = qemu_new_net_client(&net_l2tpv3_info, peer, "l2tpv3", name);
534 s = DO_UPCAST(NetL2TPV3State, nc, nc);
536 s->queue_head = 0;
537 s->queue_tail = 0;
538 s->header_mismatch = false;
540 assert(netdev->type == NET_CLIENT_DRIVER_L2TPV3);
541 l2tpv3 = &netdev->u.l2tpv3;
543 if (l2tpv3->has_ipv6 && l2tpv3->ipv6) {
544 s->ipv6 = l2tpv3->ipv6;
545 } else {
546 s->ipv6 = false;
549 if ((l2tpv3->has_offset) && (l2tpv3->offset > 256)) {
550 error_setg(errp, "offset must be less than 256 bytes");
551 goto outerr;
554 if (l2tpv3->has_rxcookie || l2tpv3->has_txcookie) {
555 if (l2tpv3->has_rxcookie && l2tpv3->has_txcookie) {
556 s->cookie = true;
557 } else {
558 error_setg(errp,
559 "require both 'rxcookie' and 'txcookie' or neither");
560 goto outerr;
562 } else {
563 s->cookie = false;
566 if (l2tpv3->has_cookie64 || l2tpv3->cookie64) {
567 s->cookie_is_64 = true;
568 } else {
569 s->cookie_is_64 = false;
572 if (l2tpv3->has_udp && l2tpv3->udp) {
573 s->udp = true;
574 if (!(l2tpv3->srcport && l2tpv3->dstport)) {
575 error_setg(errp, "need both src and dst port for udp");
576 goto outerr;
577 } else {
578 srcport = l2tpv3->srcport;
579 dstport = l2tpv3->dstport;
581 } else {
582 s->udp = false;
583 srcport = NULL;
584 dstport = NULL;
588 s->offset = 4;
589 s->session_offset = 0;
590 s->cookie_offset = 4;
591 s->counter_offset = 4;
593 s->tx_session = l2tpv3->txsession;
594 if (l2tpv3->has_rxsession) {
595 s->rx_session = l2tpv3->rxsession;
596 } else {
597 s->rx_session = s->tx_session;
600 if (s->cookie) {
601 s->rx_cookie = l2tpv3->rxcookie;
602 s->tx_cookie = l2tpv3->txcookie;
603 if (s->cookie_is_64 == true) {
604 /* 64 bit cookie */
605 s->offset += 8;
606 s->counter_offset += 8;
607 } else {
608 /* 32 bit cookie */
609 s->offset += 4;
610 s->counter_offset += 4;
614 memset(&hints, 0, sizeof(hints));
616 if (s->ipv6) {
617 hints.ai_family = AF_INET6;
618 } else {
619 hints.ai_family = AF_INET;
621 if (s->udp) {
622 hints.ai_socktype = SOCK_DGRAM;
623 hints.ai_protocol = 0;
624 s->offset += 4;
625 s->counter_offset += 4;
626 s->session_offset += 4;
627 s->cookie_offset += 4;
628 } else {
629 hints.ai_socktype = SOCK_RAW;
630 hints.ai_protocol = IPPROTO_L2TP;
633 gairet = getaddrinfo(l2tpv3->src, srcport, &hints, &result);
635 if ((gairet != 0) || (result == NULL)) {
636 error_setg(errp, "could not resolve src, errno = %s",
637 gai_strerror(gairet));
638 goto outerr;
640 fd = socket(result->ai_family, result->ai_socktype, result->ai_protocol);
641 if (fd == -1) {
642 fd = -errno;
643 error_setg(errp, "socket creation failed, errno = %d",
644 -fd);
645 goto outerr;
647 if (bind(fd, (struct sockaddr *) result->ai_addr, result->ai_addrlen)) {
648 error_setg(errp, "could not bind socket err=%i", errno);
649 goto outerr;
652 freeaddrinfo(result);
654 memset(&hints, 0, sizeof(hints));
656 if (s->ipv6) {
657 hints.ai_family = AF_INET6;
658 } else {
659 hints.ai_family = AF_INET;
661 if (s->udp) {
662 hints.ai_socktype = SOCK_DGRAM;
663 hints.ai_protocol = 0;
664 } else {
665 hints.ai_socktype = SOCK_RAW;
666 hints.ai_protocol = IPPROTO_L2TP;
669 result = NULL;
670 gairet = getaddrinfo(l2tpv3->dst, dstport, &hints, &result);
671 if ((gairet != 0) || (result == NULL)) {
672 error_setg(errp, "could not resolve dst, error = %s",
673 gai_strerror(gairet));
674 goto outerr;
677 s->dgram_dst = g_new0(struct sockaddr_storage, 1);
678 memcpy(s->dgram_dst, result->ai_addr, result->ai_addrlen);
679 s->dst_size = result->ai_addrlen;
681 freeaddrinfo(result);
683 if (l2tpv3->has_counter && l2tpv3->counter) {
684 s->has_counter = true;
685 s->offset += 4;
686 } else {
687 s->has_counter = false;
690 if (l2tpv3->has_pincounter && l2tpv3->pincounter) {
691 s->has_counter = true; /* pin counter implies that there is counter */
692 s->pin_counter = true;
693 } else {
694 s->pin_counter = false;
697 if (l2tpv3->has_offset) {
698 /* extra offset */
699 s->offset += l2tpv3->offset;
702 if ((s->ipv6) || (s->udp)) {
703 s->header_size = s->offset;
704 } else {
705 s->header_size = s->offset + sizeof(struct iphdr);
708 s->msgvec = build_l2tpv3_vector(s, MAX_L2TPV3_MSGCNT);
709 s->vec = g_new(struct iovec, MAX_L2TPV3_IOVCNT);
710 s->header_buf = g_malloc(s->header_size);
712 qemu_socket_set_nonblock(fd);
714 s->fd = fd;
715 s->counter = 0;
717 l2tpv3_read_poll(s, true);
719 qemu_set_info_str(&s->nc, "l2tpv3: connected");
720 return 0;
721 outerr:
722 qemu_del_net_client(nc);
723 if (fd >= 0) {
724 close(fd);
726 if (result) {
727 freeaddrinfo(result);
729 return -1;