net: split BSD tap_open() out into net/tap-bsd.c
[armpft.git] / net / tap.c
blob64553ab8e166867d96ebcbf8c244eb9aa9d209a8
1 /*
2 * QEMU System Emulator
4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2009 Red Hat, Inc.
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
26 #include "net/tap.h"
28 #include "config-host.h"
30 #include <signal.h>
31 #include <sys/ioctl.h>
32 #include <sys/stat.h>
33 #include <sys/wait.h>
34 #include <net/if.h>
36 #include "net.h"
37 #include "sysemu.h"
38 #include "qemu-char.h"
39 #include "qemu-common.h"
41 #ifdef __linux__
42 #include "net/tap-linux.h"
43 #endif
45 #ifdef __sun__
46 #include <sys/stat.h>
47 #include <sys/ethernet.h>
48 #include <sys/sockio.h>
49 #include <netinet/arp.h>
50 #include <netinet/in.h>
51 #include <netinet/in_systm.h>
52 #include <netinet/ip.h>
53 #include <netinet/ip_icmp.h> // must come after ip.h
54 #include <netinet/udp.h>
55 #include <netinet/tcp.h>
56 #include <net/if.h>
57 #include <syslog.h>
58 #include <stropts.h>
59 #endif
61 #if !defined(_AIX)
63 /* Maximum GSO packet size (64k) plus plenty of room for
64 * the ethernet and virtio_net headers
66 #define TAP_BUFSIZE (4096 + 65536)
68 typedef struct TAPState {
69 VLANClientState *vc;
70 int fd;
71 char down_script[1024];
72 char down_script_arg[128];
73 uint8_t buf[TAP_BUFSIZE];
74 unsigned int read_poll : 1;
75 unsigned int write_poll : 1;
76 unsigned int has_vnet_hdr : 1;
77 unsigned int using_vnet_hdr : 1;
78 unsigned int has_ufo: 1;
79 } TAPState;
81 static int launch_script(const char *setup_script, const char *ifname, int fd);
83 static int tap_can_send(void *opaque);
84 static void tap_send(void *opaque);
85 static void tap_writable(void *opaque);
87 static void tap_update_fd_handler(TAPState *s)
89 qemu_set_fd_handler2(s->fd,
90 s->read_poll ? tap_can_send : NULL,
91 s->read_poll ? tap_send : NULL,
92 s->write_poll ? tap_writable : NULL,
93 s);
96 static void tap_read_poll(TAPState *s, int enable)
98 s->read_poll = !!enable;
99 tap_update_fd_handler(s);
102 static void tap_write_poll(TAPState *s, int enable)
104 s->write_poll = !!enable;
105 tap_update_fd_handler(s);
108 static void tap_writable(void *opaque)
110 TAPState *s = opaque;
112 tap_write_poll(s, 0);
114 qemu_flush_queued_packets(s->vc);
117 static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt)
119 ssize_t len;
121 do {
122 len = writev(s->fd, iov, iovcnt);
123 } while (len == -1 && errno == EINTR);
125 if (len == -1 && errno == EAGAIN) {
126 tap_write_poll(s, 1);
127 return 0;
130 return len;
133 static ssize_t tap_receive_iov(VLANClientState *vc, const struct iovec *iov,
134 int iovcnt)
136 TAPState *s = vc->opaque;
137 const struct iovec *iovp = iov;
138 struct iovec iov_copy[iovcnt + 1];
139 struct virtio_net_hdr hdr = { 0, };
141 if (s->has_vnet_hdr && !s->using_vnet_hdr) {
142 iov_copy[0].iov_base = &hdr;
143 iov_copy[0].iov_len = sizeof(hdr);
144 memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
145 iovp = iov_copy;
146 iovcnt++;
149 return tap_write_packet(s, iovp, iovcnt);
152 static ssize_t tap_receive_raw(VLANClientState *vc, const uint8_t *buf, size_t size)
154 TAPState *s = vc->opaque;
155 struct iovec iov[2];
156 int iovcnt = 0;
157 struct virtio_net_hdr hdr = { 0, };
159 if (s->has_vnet_hdr) {
160 iov[iovcnt].iov_base = &hdr;
161 iov[iovcnt].iov_len = sizeof(hdr);
162 iovcnt++;
165 iov[iovcnt].iov_base = (char *)buf;
166 iov[iovcnt].iov_len = size;
167 iovcnt++;
169 return tap_write_packet(s, iov, iovcnt);
172 static ssize_t tap_receive(VLANClientState *vc, const uint8_t *buf, size_t size)
174 TAPState *s = vc->opaque;
175 struct iovec iov[1];
177 if (s->has_vnet_hdr && !s->using_vnet_hdr) {
178 return tap_receive_raw(vc, buf, size);
181 iov[0].iov_base = (char *)buf;
182 iov[0].iov_len = size;
184 return tap_write_packet(s, iov, 1);
187 static int tap_can_send(void *opaque)
189 TAPState *s = opaque;
191 return qemu_can_send_packet(s->vc);
194 #ifdef __sun__
195 static ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
197 struct strbuf sbuf;
198 int f = 0;
200 sbuf.maxlen = maxlen;
201 sbuf.buf = (char *)buf;
203 return getmsg(tapfd, NULL, &sbuf, &f) >= 0 ? sbuf.len : -1;
205 #else
206 static ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
208 return read(tapfd, buf, maxlen);
210 #endif
212 static void tap_send_completed(VLANClientState *vc, ssize_t len)
214 TAPState *s = vc->opaque;
215 tap_read_poll(s, 1);
218 static void tap_send(void *opaque)
220 TAPState *s = opaque;
221 int size;
223 do {
224 uint8_t *buf = s->buf;
226 size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
227 if (size <= 0) {
228 break;
231 if (s->has_vnet_hdr && !s->using_vnet_hdr) {
232 buf += sizeof(struct virtio_net_hdr);
233 size -= sizeof(struct virtio_net_hdr);
236 size = qemu_send_packet_async(s->vc, buf, size, tap_send_completed);
237 if (size == 0) {
238 tap_read_poll(s, 0);
240 } while (size > 0);
243 /* sndbuf should be set to a value lower than the tx queue
244 * capacity of any destination network interface.
245 * Ethernet NICs generally have txqueuelen=1000, so 1Mb is
246 * a good default, given a 1500 byte MTU.
248 #define TAP_DEFAULT_SNDBUF 1024*1024
250 static int tap_set_sndbuf(TAPState *s, QemuOpts *opts)
252 int sndbuf;
254 sndbuf = qemu_opt_get_size(opts, "sndbuf", TAP_DEFAULT_SNDBUF);
255 if (!sndbuf) {
256 sndbuf = INT_MAX;
259 if (ioctl(s->fd, TUNSETSNDBUF, &sndbuf) == -1 && qemu_opt_get(opts, "sndbuf")) {
260 qemu_error("TUNSETSNDBUF ioctl failed: %s\n", strerror(errno));
261 return -1;
263 return 0;
266 int tap_has_ufo(VLANClientState *vc)
268 TAPState *s = vc->opaque;
270 assert(vc->type == NET_CLIENT_TYPE_TAP);
272 return s->has_ufo;
275 int tap_has_vnet_hdr(VLANClientState *vc)
277 TAPState *s = vc->opaque;
279 assert(vc->type == NET_CLIENT_TYPE_TAP);
281 return s->has_vnet_hdr;
284 void tap_using_vnet_hdr(VLANClientState *vc, int using_vnet_hdr)
286 TAPState *s = vc->opaque;
288 using_vnet_hdr = using_vnet_hdr != 0;
290 assert(vc->type == NET_CLIENT_TYPE_TAP);
291 assert(s->has_vnet_hdr == using_vnet_hdr);
293 s->using_vnet_hdr = using_vnet_hdr;
296 static int tap_probe_vnet_hdr(int fd)
298 struct ifreq ifr;
300 if (ioctl(fd, TUNGETIFF, &ifr) != 0) {
301 qemu_error("TUNGETIFF ioctl() failed: %s\n", strerror(errno));
302 return 0;
305 return ifr.ifr_flags & IFF_VNET_HDR;
308 void tap_set_offload(VLANClientState *vc, int csum, int tso4,
309 int tso6, int ecn, int ufo)
311 TAPState *s = vc->opaque;
312 unsigned int offload = 0;
314 if (csum) {
315 offload |= TUN_F_CSUM;
316 if (tso4)
317 offload |= TUN_F_TSO4;
318 if (tso6)
319 offload |= TUN_F_TSO6;
320 if ((tso4 || tso6) && ecn)
321 offload |= TUN_F_TSO_ECN;
322 if (ufo)
323 offload |= TUN_F_UFO;
326 if (ioctl(s->fd, TUNSETOFFLOAD, offload) != 0) {
327 offload &= ~TUN_F_UFO;
328 if (ioctl(s->fd, TUNSETOFFLOAD, offload) != 0) {
329 fprintf(stderr, "TUNSETOFFLOAD ioctl() failed: %s\n",
330 strerror(errno));
335 static void tap_cleanup(VLANClientState *vc)
337 TAPState *s = vc->opaque;
339 qemu_purge_queued_packets(vc);
341 if (s->down_script[0])
342 launch_script(s->down_script, s->down_script_arg, s->fd);
344 tap_read_poll(s, 0);
345 tap_write_poll(s, 0);
346 close(s->fd);
347 qemu_free(s);
350 /* fd support */
352 static TAPState *net_tap_fd_init(VLANState *vlan,
353 const char *model,
354 const char *name,
355 int fd,
356 int vnet_hdr)
358 TAPState *s;
359 unsigned int offload;
361 s = qemu_mallocz(sizeof(TAPState));
362 s->fd = fd;
363 s->has_vnet_hdr = vnet_hdr != 0;
364 s->using_vnet_hdr = 0;
365 s->vc = qemu_new_vlan_client(NET_CLIENT_TYPE_TAP,
366 vlan, NULL, model, name, NULL,
367 tap_receive, tap_receive_raw,
368 tap_receive_iov, tap_cleanup, s);
369 s->has_ufo = 0;
370 /* Check if tap supports UFO */
371 offload = TUN_F_CSUM | TUN_F_UFO;
372 if (ioctl(s->fd, TUNSETOFFLOAD, offload) == 0)
373 s->has_ufo = 1;
374 tap_set_offload(s->vc, 0, 0, 0, 0, 0);
375 tap_read_poll(s, 1);
376 return s;
379 #ifdef __sun__
380 #define TUNNEWPPA (('T'<<16) | 0x0001)
382 * Allocate TAP device, returns opened fd.
383 * Stores dev name in the first arg(must be large enough).
385 static int tap_alloc(char *dev, size_t dev_size)
387 int tap_fd, if_fd, ppa = -1;
388 static int ip_fd = 0;
389 char *ptr;
391 static int arp_fd = 0;
392 int ip_muxid, arp_muxid;
393 struct strioctl strioc_if, strioc_ppa;
394 int link_type = I_PLINK;;
395 struct lifreq ifr;
396 char actual_name[32] = "";
398 memset(&ifr, 0x0, sizeof(ifr));
400 if( *dev ){
401 ptr = dev;
402 while( *ptr && !qemu_isdigit((int)*ptr) ) ptr++;
403 ppa = atoi(ptr);
406 /* Check if IP device was opened */
407 if( ip_fd )
408 close(ip_fd);
410 TFR(ip_fd = open("/dev/udp", O_RDWR, 0));
411 if (ip_fd < 0) {
412 syslog(LOG_ERR, "Can't open /dev/ip (actually /dev/udp)");
413 return -1;
416 TFR(tap_fd = open("/dev/tap", O_RDWR, 0));
417 if (tap_fd < 0) {
418 syslog(LOG_ERR, "Can't open /dev/tap");
419 return -1;
422 /* Assign a new PPA and get its unit number. */
423 strioc_ppa.ic_cmd = TUNNEWPPA;
424 strioc_ppa.ic_timout = 0;
425 strioc_ppa.ic_len = sizeof(ppa);
426 strioc_ppa.ic_dp = (char *)&ppa;
427 if ((ppa = ioctl (tap_fd, I_STR, &strioc_ppa)) < 0)
428 syslog (LOG_ERR, "Can't assign new interface");
430 TFR(if_fd = open("/dev/tap", O_RDWR, 0));
431 if (if_fd < 0) {
432 syslog(LOG_ERR, "Can't open /dev/tap (2)");
433 return -1;
435 if(ioctl(if_fd, I_PUSH, "ip") < 0){
436 syslog(LOG_ERR, "Can't push IP module");
437 return -1;
440 if (ioctl(if_fd, SIOCGLIFFLAGS, &ifr) < 0)
441 syslog(LOG_ERR, "Can't get flags\n");
443 snprintf (actual_name, 32, "tap%d", ppa);
444 pstrcpy(ifr.lifr_name, sizeof(ifr.lifr_name), actual_name);
446 ifr.lifr_ppa = ppa;
447 /* Assign ppa according to the unit number returned by tun device */
449 if (ioctl (if_fd, SIOCSLIFNAME, &ifr) < 0)
450 syslog (LOG_ERR, "Can't set PPA %d", ppa);
451 if (ioctl(if_fd, SIOCGLIFFLAGS, &ifr) <0)
452 syslog (LOG_ERR, "Can't get flags\n");
453 /* Push arp module to if_fd */
454 if (ioctl (if_fd, I_PUSH, "arp") < 0)
455 syslog (LOG_ERR, "Can't push ARP module (2)");
457 /* Push arp module to ip_fd */
458 if (ioctl (ip_fd, I_POP, NULL) < 0)
459 syslog (LOG_ERR, "I_POP failed\n");
460 if (ioctl (ip_fd, I_PUSH, "arp") < 0)
461 syslog (LOG_ERR, "Can't push ARP module (3)\n");
462 /* Open arp_fd */
463 TFR(arp_fd = open ("/dev/tap", O_RDWR, 0));
464 if (arp_fd < 0)
465 syslog (LOG_ERR, "Can't open %s\n", "/dev/tap");
467 /* Set ifname to arp */
468 strioc_if.ic_cmd = SIOCSLIFNAME;
469 strioc_if.ic_timout = 0;
470 strioc_if.ic_len = sizeof(ifr);
471 strioc_if.ic_dp = (char *)&ifr;
472 if (ioctl(arp_fd, I_STR, &strioc_if) < 0){
473 syslog (LOG_ERR, "Can't set ifname to arp\n");
476 if((ip_muxid = ioctl(ip_fd, I_LINK, if_fd)) < 0){
477 syslog(LOG_ERR, "Can't link TAP device to IP");
478 return -1;
481 if ((arp_muxid = ioctl (ip_fd, link_type, arp_fd)) < 0)
482 syslog (LOG_ERR, "Can't link TAP device to ARP");
484 close (if_fd);
486 memset(&ifr, 0x0, sizeof(ifr));
487 pstrcpy(ifr.lifr_name, sizeof(ifr.lifr_name), actual_name);
488 ifr.lifr_ip_muxid = ip_muxid;
489 ifr.lifr_arp_muxid = arp_muxid;
491 if (ioctl (ip_fd, SIOCSLIFMUXID, &ifr) < 0)
493 ioctl (ip_fd, I_PUNLINK , arp_muxid);
494 ioctl (ip_fd, I_PUNLINK, ip_muxid);
495 syslog (LOG_ERR, "Can't set multiplexor id");
498 snprintf(dev, dev_size, "tap%d", ppa);
499 return tap_fd;
502 int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
504 char dev[10]="";
505 int fd;
506 if( (fd = tap_alloc(dev, sizeof(dev))) < 0 ){
507 fprintf(stderr, "Cannot allocate TAP device\n");
508 return -1;
510 pstrcpy(ifname, ifname_size, dev);
511 fcntl(fd, F_SETFL, O_NONBLOCK);
512 return fd;
514 #elif defined (_AIX)
515 int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
517 fprintf (stderr, "no tap on AIX\n");
518 return -1;
520 #else
521 int tap_open(char *ifname, int ifname_size, int *vnet_hdr, int vnet_hdr_required)
523 struct ifreq ifr;
524 int fd, ret;
526 TFR(fd = open("/dev/net/tun", O_RDWR));
527 if (fd < 0) {
528 fprintf(stderr, "warning: could not open /dev/net/tun: no virtual network emulation\n");
529 return -1;
531 memset(&ifr, 0, sizeof(ifr));
532 ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
534 if (*vnet_hdr) {
535 unsigned int features;
537 if (ioctl(fd, TUNGETFEATURES, &features) == 0 &&
538 features & IFF_VNET_HDR) {
539 *vnet_hdr = 1;
540 ifr.ifr_flags |= IFF_VNET_HDR;
543 if (vnet_hdr_required && !*vnet_hdr) {
544 qemu_error("vnet_hdr=1 requested, but no kernel "
545 "support for IFF_VNET_HDR available");
546 close(fd);
547 return -1;
551 if (ifname[0] != '\0')
552 pstrcpy(ifr.ifr_name, IFNAMSIZ, ifname);
553 else
554 pstrcpy(ifr.ifr_name, IFNAMSIZ, "tap%d");
555 ret = ioctl(fd, TUNSETIFF, (void *) &ifr);
556 if (ret != 0) {
557 fprintf(stderr, "warning: could not configure /dev/net/tun: no virtual network emulation\n");
558 close(fd);
559 return -1;
561 pstrcpy(ifname, ifname_size, ifr.ifr_name);
562 fcntl(fd, F_SETFL, O_NONBLOCK);
563 return fd;
565 #endif
567 static int launch_script(const char *setup_script, const char *ifname, int fd)
569 sigset_t oldmask, mask;
570 int pid, status;
571 char *args[3];
572 char **parg;
574 sigemptyset(&mask);
575 sigaddset(&mask, SIGCHLD);
576 sigprocmask(SIG_BLOCK, &mask, &oldmask);
578 /* try to launch network script */
579 pid = fork();
580 if (pid == 0) {
581 int open_max = sysconf(_SC_OPEN_MAX), i;
583 for (i = 0; i < open_max; i++) {
584 if (i != STDIN_FILENO &&
585 i != STDOUT_FILENO &&
586 i != STDERR_FILENO &&
587 i != fd) {
588 close(i);
591 parg = args;
592 *parg++ = (char *)setup_script;
593 *parg++ = (char *)ifname;
594 *parg++ = NULL;
595 execv(setup_script, args);
596 _exit(1);
597 } else if (pid > 0) {
598 while (waitpid(pid, &status, 0) != pid) {
599 /* loop */
601 sigprocmask(SIG_SETMASK, &oldmask, NULL);
603 if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
604 return 0;
607 fprintf(stderr, "%s: could not launch network script\n", setup_script);
608 return -1;
611 static int net_tap_init(QemuOpts *opts, int *vnet_hdr)
613 int fd, vnet_hdr_required;
614 char ifname[128] = {0,};
615 const char *setup_script;
617 if (qemu_opt_get(opts, "ifname")) {
618 pstrcpy(ifname, sizeof(ifname), qemu_opt_get(opts, "ifname"));
621 *vnet_hdr = qemu_opt_get_bool(opts, "vnet_hdr", 1);
622 if (qemu_opt_get(opts, "vnet_hdr")) {
623 vnet_hdr_required = *vnet_hdr;
624 } else {
625 vnet_hdr_required = 0;
628 TFR(fd = tap_open(ifname, sizeof(ifname), vnet_hdr, vnet_hdr_required));
629 if (fd < 0) {
630 return -1;
633 setup_script = qemu_opt_get(opts, "script");
634 if (setup_script &&
635 setup_script[0] != '\0' &&
636 strcmp(setup_script, "no") != 0 &&
637 launch_script(setup_script, ifname, fd)) {
638 close(fd);
639 return -1;
642 qemu_opt_set(opts, "ifname", ifname);
644 return fd;
647 int net_init_tap(QemuOpts *opts, Monitor *mon, const char *name, VLANState *vlan)
649 TAPState *s;
650 int fd, vnet_hdr;
652 if (qemu_opt_get(opts, "fd")) {
653 if (qemu_opt_get(opts, "ifname") ||
654 qemu_opt_get(opts, "script") ||
655 qemu_opt_get(opts, "downscript") ||
656 qemu_opt_get(opts, "vnet_hdr")) {
657 qemu_error("ifname=, script=, downscript= and vnet_hdr= is invalid with fd=\n");
658 return -1;
661 fd = net_handle_fd_param(mon, qemu_opt_get(opts, "fd"));
662 if (fd == -1) {
663 return -1;
666 fcntl(fd, F_SETFL, O_NONBLOCK);
668 vnet_hdr = tap_probe_vnet_hdr(fd);
669 } else {
670 if (!qemu_opt_get(opts, "script")) {
671 qemu_opt_set(opts, "script", DEFAULT_NETWORK_SCRIPT);
674 if (!qemu_opt_get(opts, "downscript")) {
675 qemu_opt_set(opts, "downscript", DEFAULT_NETWORK_DOWN_SCRIPT);
678 fd = net_tap_init(opts, &vnet_hdr);
681 s = net_tap_fd_init(vlan, "tap", name, fd, vnet_hdr);
682 if (!s) {
683 close(fd);
684 return -1;
687 if (tap_set_sndbuf(s, opts) < 0) {
688 return -1;
691 if (qemu_opt_get(opts, "fd")) {
692 snprintf(s->vc->info_str, sizeof(s->vc->info_str), "fd=%d", fd);
693 } else {
694 const char *ifname, *script, *downscript;
696 ifname = qemu_opt_get(opts, "ifname");
697 script = qemu_opt_get(opts, "script");
698 downscript = qemu_opt_get(opts, "downscript");
700 snprintf(s->vc->info_str, sizeof(s->vc->info_str),
701 "ifname=%s,script=%s,downscript=%s",
702 ifname, script, downscript);
704 if (strcmp(downscript, "no") != 0) {
705 snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
706 snprintf(s->down_script_arg, sizeof(s->down_script_arg), "%s", ifname);
710 if (vlan) {
711 vlan->nb_host_devs++;
714 return 0;
717 #endif /* !defined(_AIX) */