qos-test: nvme test node
[qemu/ar7.git] / net / l2tpv3.c
blob81db24dc8cb904485db11684f363a7caf929097b
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2012-2014 Cisco Systems
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
26 #include "qemu/osdep.h"
27 #include <linux/ip.h>
28 #include <netdb.h>
29 #include "net/net.h"
30 #include "clients.h"
31 #include "qapi/error.h"
32 #include "qemu-common.h"
33 #include "qemu/error-report.h"
34 #include "qemu/option.h"
35 #include "qemu/sockets.h"
36 #include "qemu/iov.h"
37 #include "qemu/main-loop.h"
40 /* The buffer size needs to be investigated for optimum numbers and
41 * optimum means of paging in on different systems. This size is
42 * chosen to be sufficient to accommodate one packet with some headers
45 #define BUFFER_ALIGN sysconf(_SC_PAGESIZE)
46 #define BUFFER_SIZE 2048
47 #define IOVSIZE 2
48 #define MAX_L2TPV3_MSGCNT 64
49 #define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE)
51 /* Header set to 0x30000 signifies a data packet */
53 #define L2TPV3_DATA_PACKET 0x30000
55 /* IANA-assigned IP protocol ID for L2TPv3 */
57 #ifndef IPPROTO_L2TP
58 #define IPPROTO_L2TP 0x73
59 #endif
61 typedef struct NetL2TPV3State {
62 NetClientState nc;
63 int fd;
66 * these are used for xmit - that happens packet a time
67 * and for first sign of life packet (easier to parse that once)
70 uint8_t *header_buf;
71 struct iovec *vec;
74 * these are used for receive - try to "eat" up to 32 packets at a time
77 struct mmsghdr *msgvec;
80 * peer address
83 struct sockaddr_storage *dgram_dst;
84 uint32_t dst_size;
87 * L2TPv3 parameters
90 uint64_t rx_cookie;
91 uint64_t tx_cookie;
92 uint32_t rx_session;
93 uint32_t tx_session;
94 uint32_t header_size;
95 uint32_t counter;
98 * DOS avoidance in error handling
101 bool header_mismatch;
104 * Ring buffer handling
107 int queue_head;
108 int queue_tail;
109 int queue_depth;
112 * Precomputed offsets
115 uint32_t offset;
116 uint32_t cookie_offset;
117 uint32_t counter_offset;
118 uint32_t session_offset;
120 /* Poll Control */
122 bool read_poll;
123 bool write_poll;
125 /* Flags */
127 bool ipv6;
128 bool udp;
129 bool has_counter;
130 bool pin_counter;
131 bool cookie;
132 bool cookie_is_64;
134 } NetL2TPV3State;
136 static void net_l2tpv3_send(void *opaque);
137 static void l2tpv3_writable(void *opaque);
139 static void l2tpv3_update_fd_handler(NetL2TPV3State *s)
141 qemu_set_fd_handler(s->fd,
142 s->read_poll ? net_l2tpv3_send : NULL,
143 s->write_poll ? l2tpv3_writable : NULL,
147 static void l2tpv3_read_poll(NetL2TPV3State *s, bool enable)
149 if (s->read_poll != enable) {
150 s->read_poll = enable;
151 l2tpv3_update_fd_handler(s);
155 static void l2tpv3_write_poll(NetL2TPV3State *s, bool enable)
157 if (s->write_poll != enable) {
158 s->write_poll = enable;
159 l2tpv3_update_fd_handler(s);
163 static void l2tpv3_writable(void *opaque)
165 NetL2TPV3State *s = opaque;
166 l2tpv3_write_poll(s, false);
167 qemu_flush_queued_packets(&s->nc);
170 static void l2tpv3_send_completed(NetClientState *nc, ssize_t len)
172 NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
173 l2tpv3_read_poll(s, true);
176 static void l2tpv3_poll(NetClientState *nc, bool enable)
178 NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
179 l2tpv3_write_poll(s, enable);
180 l2tpv3_read_poll(s, enable);
183 static void l2tpv3_form_header(NetL2TPV3State *s)
185 uint32_t *counter;
187 if (s->udp) {
188 stl_be_p((uint32_t *) s->header_buf, L2TPV3_DATA_PACKET);
190 stl_be_p(
191 (uint32_t *) (s->header_buf + s->session_offset),
192 s->tx_session
194 if (s->cookie) {
195 if (s->cookie_is_64) {
196 stq_be_p(
197 (uint64_t *)(s->header_buf + s->cookie_offset),
198 s->tx_cookie
200 } else {
201 stl_be_p(
202 (uint32_t *) (s->header_buf + s->cookie_offset),
203 s->tx_cookie
207 if (s->has_counter) {
208 counter = (uint32_t *)(s->header_buf + s->counter_offset);
209 if (s->pin_counter) {
210 *counter = 0;
211 } else {
212 stl_be_p(counter, ++s->counter);
217 static ssize_t net_l2tpv3_receive_dgram_iov(NetClientState *nc,
218 const struct iovec *iov,
219 int iovcnt)
221 NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
223 struct msghdr message;
224 int ret;
226 if (iovcnt > MAX_L2TPV3_IOVCNT - 1) {
227 error_report(
228 "iovec too long %d > %d, change l2tpv3.h",
229 iovcnt, MAX_L2TPV3_IOVCNT
231 return -1;
233 l2tpv3_form_header(s);
234 memcpy(s->vec + 1, iov, iovcnt * sizeof(struct iovec));
235 s->vec->iov_base = s->header_buf;
236 s->vec->iov_len = s->offset;
237 message.msg_name = s->dgram_dst;
238 message.msg_namelen = s->dst_size;
239 message.msg_iov = s->vec;
240 message.msg_iovlen = iovcnt + 1;
241 message.msg_control = NULL;
242 message.msg_controllen = 0;
243 message.msg_flags = 0;
244 do {
245 ret = sendmsg(s->fd, &message, 0);
246 } while ((ret == -1) && (errno == EINTR));
247 if (ret > 0) {
248 ret -= s->offset;
249 } else if (ret == 0) {
250 /* belt and braces - should not occur on DGRAM
251 * we should get an error and never a 0 send
253 ret = iov_size(iov, iovcnt);
254 } else {
255 /* signal upper layer that socket buffer is full */
256 ret = -errno;
257 if (ret == -EAGAIN || ret == -ENOBUFS) {
258 l2tpv3_write_poll(s, true);
259 ret = 0;
262 return ret;
265 static ssize_t net_l2tpv3_receive_dgram(NetClientState *nc,
266 const uint8_t *buf,
267 size_t size)
269 NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
271 struct iovec *vec;
272 struct msghdr message;
273 ssize_t ret = 0;
275 l2tpv3_form_header(s);
276 vec = s->vec;
277 vec->iov_base = s->header_buf;
278 vec->iov_len = s->offset;
279 vec++;
280 vec->iov_base = (void *) buf;
281 vec->iov_len = size;
282 message.msg_name = s->dgram_dst;
283 message.msg_namelen = s->dst_size;
284 message.msg_iov = s->vec;
285 message.msg_iovlen = 2;
286 message.msg_control = NULL;
287 message.msg_controllen = 0;
288 message.msg_flags = 0;
289 do {
290 ret = sendmsg(s->fd, &message, 0);
291 } while ((ret == -1) && (errno == EINTR));
292 if (ret > 0) {
293 ret -= s->offset;
294 } else if (ret == 0) {
295 /* belt and braces - should not occur on DGRAM
296 * we should get an error and never a 0 send
298 ret = size;
299 } else {
300 ret = -errno;
301 if (ret == -EAGAIN || ret == -ENOBUFS) {
302 /* signal upper layer that socket buffer is full */
303 l2tpv3_write_poll(s, true);
304 ret = 0;
307 return ret;
310 static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf)
313 uint32_t *session;
314 uint64_t cookie;
316 if ((!s->udp) && (!s->ipv6)) {
317 buf += sizeof(struct iphdr) /* fix for ipv4 raw */;
320 /* we do not do a strict check for "data" packets as per
321 * the RFC spec because the pure IP spec does not have
322 * that anyway.
325 if (s->cookie) {
326 if (s->cookie_is_64) {
327 cookie = ldq_be_p(buf + s->cookie_offset);
328 } else {
329 cookie = ldl_be_p(buf + s->cookie_offset) & 0xffffffffULL;
331 if (cookie != s->rx_cookie) {
332 if (!s->header_mismatch) {
333 error_report("unknown cookie id");
335 return -1;
338 session = (uint32_t *) (buf + s->session_offset);
339 if (ldl_be_p(session) != s->rx_session) {
340 if (!s->header_mismatch) {
341 error_report("session mismatch");
343 return -1;
345 return 0;
348 static void net_l2tpv3_process_queue(NetL2TPV3State *s)
350 int size = 0;
351 struct iovec *vec;
352 bool bad_read;
353 int data_size;
354 struct mmsghdr *msgvec;
356 /* go into ring mode only if there is a "pending" tail */
357 if (s->queue_depth > 0) {
358 do {
359 msgvec = s->msgvec + s->queue_tail;
360 if (msgvec->msg_len > 0) {
361 data_size = msgvec->msg_len - s->header_size;
362 vec = msgvec->msg_hdr.msg_iov;
363 if ((data_size > 0) &&
364 (l2tpv3_verify_header(s, vec->iov_base) == 0)) {
365 vec++;
366 /* Use the legacy delivery for now, we will
367 * switch to using our own ring as a queueing mechanism
368 * at a later date
370 size = qemu_send_packet_async(
371 &s->nc,
372 vec->iov_base,
373 data_size,
374 l2tpv3_send_completed
376 if (size == 0) {
377 l2tpv3_read_poll(s, false);
379 bad_read = false;
380 } else {
381 bad_read = true;
382 if (!s->header_mismatch) {
383 /* report error only once */
384 error_report("l2tpv3 header verification failed");
385 s->header_mismatch = true;
388 } else {
389 bad_read = true;
391 s->queue_tail = (s->queue_tail + 1) % MAX_L2TPV3_MSGCNT;
392 s->queue_depth--;
393 } while (
394 (s->queue_depth > 0) &&
395 qemu_can_send_packet(&s->nc) &&
396 ((size > 0) || bad_read)
401 static void net_l2tpv3_send(void *opaque)
403 NetL2TPV3State *s = opaque;
404 int target_count, count;
405 struct mmsghdr *msgvec;
407 /* go into ring mode only if there is a "pending" tail */
409 if (s->queue_depth) {
411 /* The ring buffer we use has variable intake
412 * count of how much we can read varies - adjust accordingly
415 target_count = MAX_L2TPV3_MSGCNT - s->queue_depth;
417 /* Ensure we do not overrun the ring when we have
418 * a lot of enqueued packets
421 if (s->queue_head + target_count > MAX_L2TPV3_MSGCNT) {
422 target_count = MAX_L2TPV3_MSGCNT - s->queue_head;
424 } else {
426 /* we do not have any pending packets - we can use
427 * the whole message vector linearly instead of using
428 * it as a ring
431 s->queue_head = 0;
432 s->queue_tail = 0;
433 target_count = MAX_L2TPV3_MSGCNT;
436 msgvec = s->msgvec + s->queue_head;
437 if (target_count > 0) {
438 do {
439 count = recvmmsg(
440 s->fd,
441 msgvec,
442 target_count, MSG_DONTWAIT, NULL);
443 } while ((count == -1) && (errno == EINTR));
444 if (count < 0) {
445 /* Recv error - we still need to flush packets here,
446 * (re)set queue head to current position
448 count = 0;
450 s->queue_head = (s->queue_head + count) % MAX_L2TPV3_MSGCNT;
451 s->queue_depth += count;
453 net_l2tpv3_process_queue(s);
456 static void destroy_vector(struct mmsghdr *msgvec, int count, int iovcount)
458 int i, j;
459 struct iovec *iov;
460 struct mmsghdr *cleanup = msgvec;
461 if (cleanup) {
462 for (i = 0; i < count; i++) {
463 if (cleanup->msg_hdr.msg_iov) {
464 iov = cleanup->msg_hdr.msg_iov;
465 for (j = 0; j < iovcount; j++) {
466 g_free(iov->iov_base);
467 iov++;
469 g_free(cleanup->msg_hdr.msg_iov);
471 cleanup++;
473 g_free(msgvec);
477 static struct mmsghdr *build_l2tpv3_vector(NetL2TPV3State *s, int count)
479 int i;
480 struct iovec *iov;
481 struct mmsghdr *msgvec, *result;
483 msgvec = g_new(struct mmsghdr, count);
484 result = msgvec;
485 for (i = 0; i < count ; i++) {
486 msgvec->msg_hdr.msg_name = NULL;
487 msgvec->msg_hdr.msg_namelen = 0;
488 iov = g_new(struct iovec, IOVSIZE);
489 msgvec->msg_hdr.msg_iov = iov;
490 iov->iov_base = g_malloc(s->header_size);
491 iov->iov_len = s->header_size;
492 iov++ ;
493 iov->iov_base = qemu_memalign(BUFFER_ALIGN, BUFFER_SIZE);
494 iov->iov_len = BUFFER_SIZE;
495 msgvec->msg_hdr.msg_iovlen = 2;
496 msgvec->msg_hdr.msg_control = NULL;
497 msgvec->msg_hdr.msg_controllen = 0;
498 msgvec->msg_hdr.msg_flags = 0;
499 msgvec++;
501 return result;
504 static void net_l2tpv3_cleanup(NetClientState *nc)
506 NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc);
507 qemu_purge_queued_packets(nc);
508 l2tpv3_read_poll(s, false);
509 l2tpv3_write_poll(s, false);
510 if (s->fd >= 0) {
511 close(s->fd);
513 destroy_vector(s->msgvec, MAX_L2TPV3_MSGCNT, IOVSIZE);
514 g_free(s->vec);
515 g_free(s->header_buf);
516 g_free(s->dgram_dst);
519 static NetClientInfo net_l2tpv3_info = {
520 .type = NET_CLIENT_DRIVER_L2TPV3,
521 .size = sizeof(NetL2TPV3State),
522 .receive = net_l2tpv3_receive_dgram,
523 .receive_iov = net_l2tpv3_receive_dgram_iov,
524 .poll = l2tpv3_poll,
525 .cleanup = net_l2tpv3_cleanup,
528 int net_init_l2tpv3(const Netdev *netdev,
529 const char *name,
530 NetClientState *peer, Error **errp)
532 const NetdevL2TPv3Options *l2tpv3;
533 NetL2TPV3State *s;
534 NetClientState *nc;
535 int fd = -1, gairet;
536 struct addrinfo hints;
537 struct addrinfo *result = NULL;
538 char *srcport, *dstport;
540 nc = qemu_new_net_client(&net_l2tpv3_info, peer, "l2tpv3", name);
542 s = DO_UPCAST(NetL2TPV3State, nc, nc);
544 s->queue_head = 0;
545 s->queue_tail = 0;
546 s->header_mismatch = false;
548 assert(netdev->type == NET_CLIENT_DRIVER_L2TPV3);
549 l2tpv3 = &netdev->u.l2tpv3;
551 if (l2tpv3->has_ipv6 && l2tpv3->ipv6) {
552 s->ipv6 = l2tpv3->ipv6;
553 } else {
554 s->ipv6 = false;
557 if ((l2tpv3->has_offset) && (l2tpv3->offset > 256)) {
558 error_setg(errp, "offset must be less than 256 bytes");
559 goto outerr;
562 if (l2tpv3->has_rxcookie || l2tpv3->has_txcookie) {
563 if (l2tpv3->has_rxcookie && l2tpv3->has_txcookie) {
564 s->cookie = true;
565 } else {
566 error_setg(errp,
567 "require both 'rxcookie' and 'txcookie' or neither");
568 goto outerr;
570 } else {
571 s->cookie = false;
574 if (l2tpv3->has_cookie64 || l2tpv3->cookie64) {
575 s->cookie_is_64 = true;
576 } else {
577 s->cookie_is_64 = false;
580 if (l2tpv3->has_udp && l2tpv3->udp) {
581 s->udp = true;
582 if (!(l2tpv3->has_srcport && l2tpv3->has_dstport)) {
583 error_setg(errp, "need both src and dst port for udp");
584 goto outerr;
585 } else {
586 srcport = l2tpv3->srcport;
587 dstport = l2tpv3->dstport;
589 } else {
590 s->udp = false;
591 srcport = NULL;
592 dstport = NULL;
596 s->offset = 4;
597 s->session_offset = 0;
598 s->cookie_offset = 4;
599 s->counter_offset = 4;
601 s->tx_session = l2tpv3->txsession;
602 if (l2tpv3->has_rxsession) {
603 s->rx_session = l2tpv3->rxsession;
604 } else {
605 s->rx_session = s->tx_session;
608 if (s->cookie) {
609 s->rx_cookie = l2tpv3->rxcookie;
610 s->tx_cookie = l2tpv3->txcookie;
611 if (s->cookie_is_64 == true) {
612 /* 64 bit cookie */
613 s->offset += 8;
614 s->counter_offset += 8;
615 } else {
616 /* 32 bit cookie */
617 s->offset += 4;
618 s->counter_offset += 4;
622 memset(&hints, 0, sizeof(hints));
624 if (s->ipv6) {
625 hints.ai_family = AF_INET6;
626 } else {
627 hints.ai_family = AF_INET;
629 if (s->udp) {
630 hints.ai_socktype = SOCK_DGRAM;
631 hints.ai_protocol = 0;
632 s->offset += 4;
633 s->counter_offset += 4;
634 s->session_offset += 4;
635 s->cookie_offset += 4;
636 } else {
637 hints.ai_socktype = SOCK_RAW;
638 hints.ai_protocol = IPPROTO_L2TP;
641 gairet = getaddrinfo(l2tpv3->src, srcport, &hints, &result);
643 if ((gairet != 0) || (result == NULL)) {
644 error_setg(errp, "could not resolve src, errno = %s",
645 gai_strerror(gairet));
646 goto outerr;
648 fd = socket(result->ai_family, result->ai_socktype, result->ai_protocol);
649 if (fd == -1) {
650 fd = -errno;
651 error_setg(errp, "socket creation failed, errno = %d",
652 -fd);
653 goto outerr;
655 if (bind(fd, (struct sockaddr *) result->ai_addr, result->ai_addrlen)) {
656 error_setg(errp, "could not bind socket err=%i", errno);
657 goto outerr;
659 if (result) {
660 freeaddrinfo(result);
663 memset(&hints, 0, sizeof(hints));
665 if (s->ipv6) {
666 hints.ai_family = AF_INET6;
667 } else {
668 hints.ai_family = AF_INET;
670 if (s->udp) {
671 hints.ai_socktype = SOCK_DGRAM;
672 hints.ai_protocol = 0;
673 } else {
674 hints.ai_socktype = SOCK_RAW;
675 hints.ai_protocol = IPPROTO_L2TP;
678 result = NULL;
679 gairet = getaddrinfo(l2tpv3->dst, dstport, &hints, &result);
680 if ((gairet != 0) || (result == NULL)) {
681 error_setg(errp, "could not resolve dst, error = %s",
682 gai_strerror(gairet));
683 goto outerr;
686 s->dgram_dst = g_new0(struct sockaddr_storage, 1);
687 memcpy(s->dgram_dst, result->ai_addr, result->ai_addrlen);
688 s->dst_size = result->ai_addrlen;
690 if (result) {
691 freeaddrinfo(result);
694 if (l2tpv3->has_counter && l2tpv3->counter) {
695 s->has_counter = true;
696 s->offset += 4;
697 } else {
698 s->has_counter = false;
701 if (l2tpv3->has_pincounter && l2tpv3->pincounter) {
702 s->has_counter = true; /* pin counter implies that there is counter */
703 s->pin_counter = true;
704 } else {
705 s->pin_counter = false;
708 if (l2tpv3->has_offset) {
709 /* extra offset */
710 s->offset += l2tpv3->offset;
713 if ((s->ipv6) || (s->udp)) {
714 s->header_size = s->offset;
715 } else {
716 s->header_size = s->offset + sizeof(struct iphdr);
719 s->msgvec = build_l2tpv3_vector(s, MAX_L2TPV3_MSGCNT);
720 s->vec = g_new(struct iovec, MAX_L2TPV3_IOVCNT);
721 s->header_buf = g_malloc(s->header_size);
723 qemu_set_nonblock(fd);
725 s->fd = fd;
726 s->counter = 0;
728 l2tpv3_read_poll(s, true);
730 snprintf(s->nc.info_str, sizeof(s->nc.info_str),
731 "l2tpv3: connected");
732 return 0;
733 outerr:
734 qemu_del_net_client(nc);
735 if (fd >= 0) {
736 close(fd);
738 if (result) {
739 freeaddrinfo(result);
741 return -1;