4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2009 Red Hat, Inc.
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
28 #include "config-host.h"
31 #include <sys/ioctl.h>
38 #include "qemu-char.h"
39 #include "qemu-common.h"
42 #include "net/tap-linux.h"
47 #include <sys/ethernet.h>
48 #include <sys/sockio.h>
49 #include <netinet/arp.h>
50 #include <netinet/in.h>
51 #include <netinet/in_systm.h>
52 #include <netinet/ip.h>
53 #include <netinet/ip_icmp.h> // must come after ip.h
54 #include <netinet/udp.h>
55 #include <netinet/tcp.h>
63 /* Maximum GSO packet size (64k) plus plenty of room for
64 * the ethernet and virtio_net headers
66 #define TAP_BUFSIZE (4096 + 65536)
68 typedef struct TAPState
{
71 char down_script
[1024];
72 char down_script_arg
[128];
73 uint8_t buf
[TAP_BUFSIZE
];
74 unsigned int read_poll
: 1;
75 unsigned int write_poll
: 1;
76 unsigned int has_vnet_hdr
: 1;
77 unsigned int using_vnet_hdr
: 1;
78 unsigned int has_ufo
: 1;
81 static int launch_script(const char *setup_script
, const char *ifname
, int fd
);
83 static int tap_can_send(void *opaque
);
84 static void tap_send(void *opaque
);
85 static void tap_writable(void *opaque
);
87 static void tap_update_fd_handler(TAPState
*s
)
89 qemu_set_fd_handler2(s
->fd
,
90 s
->read_poll
? tap_can_send
: NULL
,
91 s
->read_poll
? tap_send
: NULL
,
92 s
->write_poll
? tap_writable
: NULL
,
96 static void tap_read_poll(TAPState
*s
, int enable
)
98 s
->read_poll
= !!enable
;
99 tap_update_fd_handler(s
);
102 static void tap_write_poll(TAPState
*s
, int enable
)
104 s
->write_poll
= !!enable
;
105 tap_update_fd_handler(s
);
108 static void tap_writable(void *opaque
)
110 TAPState
*s
= opaque
;
112 tap_write_poll(s
, 0);
114 qemu_flush_queued_packets(s
->vc
);
117 static ssize_t
tap_write_packet(TAPState
*s
, const struct iovec
*iov
, int iovcnt
)
122 len
= writev(s
->fd
, iov
, iovcnt
);
123 } while (len
== -1 && errno
== EINTR
);
125 if (len
== -1 && errno
== EAGAIN
) {
126 tap_write_poll(s
, 1);
133 static ssize_t
tap_receive_iov(VLANClientState
*vc
, const struct iovec
*iov
,
136 TAPState
*s
= vc
->opaque
;
137 const struct iovec
*iovp
= iov
;
138 struct iovec iov_copy
[iovcnt
+ 1];
139 struct virtio_net_hdr hdr
= { 0, };
141 if (s
->has_vnet_hdr
&& !s
->using_vnet_hdr
) {
142 iov_copy
[0].iov_base
= &hdr
;
143 iov_copy
[0].iov_len
= sizeof(hdr
);
144 memcpy(&iov_copy
[1], iov
, iovcnt
* sizeof(*iov
));
149 return tap_write_packet(s
, iovp
, iovcnt
);
152 static ssize_t
tap_receive_raw(VLANClientState
*vc
, const uint8_t *buf
, size_t size
)
154 TAPState
*s
= vc
->opaque
;
157 struct virtio_net_hdr hdr
= { 0, };
159 if (s
->has_vnet_hdr
) {
160 iov
[iovcnt
].iov_base
= &hdr
;
161 iov
[iovcnt
].iov_len
= sizeof(hdr
);
165 iov
[iovcnt
].iov_base
= (char *)buf
;
166 iov
[iovcnt
].iov_len
= size
;
169 return tap_write_packet(s
, iov
, iovcnt
);
172 static ssize_t
tap_receive(VLANClientState
*vc
, const uint8_t *buf
, size_t size
)
174 TAPState
*s
= vc
->opaque
;
177 if (s
->has_vnet_hdr
&& !s
->using_vnet_hdr
) {
178 return tap_receive_raw(vc
, buf
, size
);
181 iov
[0].iov_base
= (char *)buf
;
182 iov
[0].iov_len
= size
;
184 return tap_write_packet(s
, iov
, 1);
187 static int tap_can_send(void *opaque
)
189 TAPState
*s
= opaque
;
191 return qemu_can_send_packet(s
->vc
);
195 static ssize_t
tap_read_packet(int tapfd
, uint8_t *buf
, int maxlen
)
200 sbuf
.maxlen
= maxlen
;
201 sbuf
.buf
= (char *)buf
;
203 return getmsg(tapfd
, NULL
, &sbuf
, &f
) >= 0 ? sbuf
.len
: -1;
206 static ssize_t
tap_read_packet(int tapfd
, uint8_t *buf
, int maxlen
)
208 return read(tapfd
, buf
, maxlen
);
212 static void tap_send_completed(VLANClientState
*vc
, ssize_t len
)
214 TAPState
*s
= vc
->opaque
;
218 static void tap_send(void *opaque
)
220 TAPState
*s
= opaque
;
224 uint8_t *buf
= s
->buf
;
226 size
= tap_read_packet(s
->fd
, s
->buf
, sizeof(s
->buf
));
231 if (s
->has_vnet_hdr
&& !s
->using_vnet_hdr
) {
232 buf
+= sizeof(struct virtio_net_hdr
);
233 size
-= sizeof(struct virtio_net_hdr
);
236 size
= qemu_send_packet_async(s
->vc
, buf
, size
, tap_send_completed
);
243 /* sndbuf should be set to a value lower than the tx queue
244 * capacity of any destination network interface.
245 * Ethernet NICs generally have txqueuelen=1000, so 1Mb is
246 * a good default, given a 1500 byte MTU.
248 #define TAP_DEFAULT_SNDBUF 1024*1024
250 static int tap_set_sndbuf(TAPState
*s
, QemuOpts
*opts
)
254 sndbuf
= qemu_opt_get_size(opts
, "sndbuf", TAP_DEFAULT_SNDBUF
);
259 if (ioctl(s
->fd
, TUNSETSNDBUF
, &sndbuf
) == -1 && qemu_opt_get(opts
, "sndbuf")) {
260 qemu_error("TUNSETSNDBUF ioctl failed: %s\n", strerror(errno
));
266 int tap_has_ufo(VLANClientState
*vc
)
268 TAPState
*s
= vc
->opaque
;
270 assert(vc
->type
== NET_CLIENT_TYPE_TAP
);
275 int tap_has_vnet_hdr(VLANClientState
*vc
)
277 TAPState
*s
= vc
->opaque
;
279 assert(vc
->type
== NET_CLIENT_TYPE_TAP
);
281 return s
->has_vnet_hdr
;
284 void tap_using_vnet_hdr(VLANClientState
*vc
, int using_vnet_hdr
)
286 TAPState
*s
= vc
->opaque
;
288 using_vnet_hdr
= using_vnet_hdr
!= 0;
290 assert(vc
->type
== NET_CLIENT_TYPE_TAP
);
291 assert(s
->has_vnet_hdr
== using_vnet_hdr
);
293 s
->using_vnet_hdr
= using_vnet_hdr
;
296 static int tap_probe_vnet_hdr(int fd
)
300 if (ioctl(fd
, TUNGETIFF
, &ifr
) != 0) {
301 qemu_error("TUNGETIFF ioctl() failed: %s\n", strerror(errno
));
305 return ifr
.ifr_flags
& IFF_VNET_HDR
;
308 void tap_set_offload(VLANClientState
*vc
, int csum
, int tso4
,
309 int tso6
, int ecn
, int ufo
)
311 TAPState
*s
= vc
->opaque
;
312 unsigned int offload
= 0;
315 offload
|= TUN_F_CSUM
;
317 offload
|= TUN_F_TSO4
;
319 offload
|= TUN_F_TSO6
;
320 if ((tso4
|| tso6
) && ecn
)
321 offload
|= TUN_F_TSO_ECN
;
323 offload
|= TUN_F_UFO
;
326 if (ioctl(s
->fd
, TUNSETOFFLOAD
, offload
) != 0) {
327 offload
&= ~TUN_F_UFO
;
328 if (ioctl(s
->fd
, TUNSETOFFLOAD
, offload
) != 0) {
329 fprintf(stderr
, "TUNSETOFFLOAD ioctl() failed: %s\n",
335 static void tap_cleanup(VLANClientState
*vc
)
337 TAPState
*s
= vc
->opaque
;
339 qemu_purge_queued_packets(vc
);
341 if (s
->down_script
[0])
342 launch_script(s
->down_script
, s
->down_script_arg
, s
->fd
);
345 tap_write_poll(s
, 0);
352 static TAPState
*net_tap_fd_init(VLANState
*vlan
,
359 unsigned int offload
;
361 s
= qemu_mallocz(sizeof(TAPState
));
363 s
->has_vnet_hdr
= vnet_hdr
!= 0;
364 s
->using_vnet_hdr
= 0;
365 s
->vc
= qemu_new_vlan_client(NET_CLIENT_TYPE_TAP
,
366 vlan
, NULL
, model
, name
, NULL
,
367 tap_receive
, tap_receive_raw
,
368 tap_receive_iov
, tap_cleanup
, s
);
370 /* Check if tap supports UFO */
371 offload
= TUN_F_CSUM
| TUN_F_UFO
;
372 if (ioctl(s
->fd
, TUNSETOFFLOAD
, offload
) == 0)
374 tap_set_offload(s
->vc
, 0, 0, 0, 0, 0);
380 #define TUNNEWPPA (('T'<<16) | 0x0001)
382 * Allocate TAP device, returns opened fd.
383 * Stores dev name in the first arg(must be large enough).
385 static int tap_alloc(char *dev
, size_t dev_size
)
387 int tap_fd
, if_fd
, ppa
= -1;
388 static int ip_fd
= 0;
391 static int arp_fd
= 0;
392 int ip_muxid
, arp_muxid
;
393 struct strioctl strioc_if
, strioc_ppa
;
394 int link_type
= I_PLINK
;;
396 char actual_name
[32] = "";
398 memset(&ifr
, 0x0, sizeof(ifr
));
402 while( *ptr
&& !qemu_isdigit((int)*ptr
) ) ptr
++;
406 /* Check if IP device was opened */
410 TFR(ip_fd
= open("/dev/udp", O_RDWR
, 0));
412 syslog(LOG_ERR
, "Can't open /dev/ip (actually /dev/udp)");
416 TFR(tap_fd
= open("/dev/tap", O_RDWR
, 0));
418 syslog(LOG_ERR
, "Can't open /dev/tap");
422 /* Assign a new PPA and get its unit number. */
423 strioc_ppa
.ic_cmd
= TUNNEWPPA
;
424 strioc_ppa
.ic_timout
= 0;
425 strioc_ppa
.ic_len
= sizeof(ppa
);
426 strioc_ppa
.ic_dp
= (char *)&ppa
;
427 if ((ppa
= ioctl (tap_fd
, I_STR
, &strioc_ppa
)) < 0)
428 syslog (LOG_ERR
, "Can't assign new interface");
430 TFR(if_fd
= open("/dev/tap", O_RDWR
, 0));
432 syslog(LOG_ERR
, "Can't open /dev/tap (2)");
435 if(ioctl(if_fd
, I_PUSH
, "ip") < 0){
436 syslog(LOG_ERR
, "Can't push IP module");
440 if (ioctl(if_fd
, SIOCGLIFFLAGS
, &ifr
) < 0)
441 syslog(LOG_ERR
, "Can't get flags\n");
443 snprintf (actual_name
, 32, "tap%d", ppa
);
444 pstrcpy(ifr
.lifr_name
, sizeof(ifr
.lifr_name
), actual_name
);
447 /* Assign ppa according to the unit number returned by tun device */
449 if (ioctl (if_fd
, SIOCSLIFNAME
, &ifr
) < 0)
450 syslog (LOG_ERR
, "Can't set PPA %d", ppa
);
451 if (ioctl(if_fd
, SIOCGLIFFLAGS
, &ifr
) <0)
452 syslog (LOG_ERR
, "Can't get flags\n");
453 /* Push arp module to if_fd */
454 if (ioctl (if_fd
, I_PUSH
, "arp") < 0)
455 syslog (LOG_ERR
, "Can't push ARP module (2)");
457 /* Push arp module to ip_fd */
458 if (ioctl (ip_fd
, I_POP
, NULL
) < 0)
459 syslog (LOG_ERR
, "I_POP failed\n");
460 if (ioctl (ip_fd
, I_PUSH
, "arp") < 0)
461 syslog (LOG_ERR
, "Can't push ARP module (3)\n");
463 TFR(arp_fd
= open ("/dev/tap", O_RDWR
, 0));
465 syslog (LOG_ERR
, "Can't open %s\n", "/dev/tap");
467 /* Set ifname to arp */
468 strioc_if
.ic_cmd
= SIOCSLIFNAME
;
469 strioc_if
.ic_timout
= 0;
470 strioc_if
.ic_len
= sizeof(ifr
);
471 strioc_if
.ic_dp
= (char *)&ifr
;
472 if (ioctl(arp_fd
, I_STR
, &strioc_if
) < 0){
473 syslog (LOG_ERR
, "Can't set ifname to arp\n");
476 if((ip_muxid
= ioctl(ip_fd
, I_LINK
, if_fd
)) < 0){
477 syslog(LOG_ERR
, "Can't link TAP device to IP");
481 if ((arp_muxid
= ioctl (ip_fd
, link_type
, arp_fd
)) < 0)
482 syslog (LOG_ERR
, "Can't link TAP device to ARP");
486 memset(&ifr
, 0x0, sizeof(ifr
));
487 pstrcpy(ifr
.lifr_name
, sizeof(ifr
.lifr_name
), actual_name
);
488 ifr
.lifr_ip_muxid
= ip_muxid
;
489 ifr
.lifr_arp_muxid
= arp_muxid
;
491 if (ioctl (ip_fd
, SIOCSLIFMUXID
, &ifr
) < 0)
493 ioctl (ip_fd
, I_PUNLINK
, arp_muxid
);
494 ioctl (ip_fd
, I_PUNLINK
, ip_muxid
);
495 syslog (LOG_ERR
, "Can't set multiplexor id");
498 snprintf(dev
, dev_size
, "tap%d", ppa
);
502 int tap_open(char *ifname
, int ifname_size
, int *vnet_hdr
, int vnet_hdr_required
)
506 if( (fd
= tap_alloc(dev
, sizeof(dev
))) < 0 ){
507 fprintf(stderr
, "Cannot allocate TAP device\n");
510 pstrcpy(ifname
, ifname_size
, dev
);
511 fcntl(fd
, F_SETFL
, O_NONBLOCK
);
515 int tap_open(char *ifname
, int ifname_size
, int *vnet_hdr
, int vnet_hdr_required
)
517 fprintf (stderr
, "no tap on AIX\n");
521 int tap_open(char *ifname
, int ifname_size
, int *vnet_hdr
, int vnet_hdr_required
)
526 TFR(fd
= open("/dev/net/tun", O_RDWR
));
528 fprintf(stderr
, "warning: could not open /dev/net/tun: no virtual network emulation\n");
531 memset(&ifr
, 0, sizeof(ifr
));
532 ifr
.ifr_flags
= IFF_TAP
| IFF_NO_PI
;
535 unsigned int features
;
537 if (ioctl(fd
, TUNGETFEATURES
, &features
) == 0 &&
538 features
& IFF_VNET_HDR
) {
540 ifr
.ifr_flags
|= IFF_VNET_HDR
;
543 if (vnet_hdr_required
&& !*vnet_hdr
) {
544 qemu_error("vnet_hdr=1 requested, but no kernel "
545 "support for IFF_VNET_HDR available");
551 if (ifname
[0] != '\0')
552 pstrcpy(ifr
.ifr_name
, IFNAMSIZ
, ifname
);
554 pstrcpy(ifr
.ifr_name
, IFNAMSIZ
, "tap%d");
555 ret
= ioctl(fd
, TUNSETIFF
, (void *) &ifr
);
557 fprintf(stderr
, "warning: could not configure /dev/net/tun: no virtual network emulation\n");
561 pstrcpy(ifname
, ifname_size
, ifr
.ifr_name
);
562 fcntl(fd
, F_SETFL
, O_NONBLOCK
);
567 static int launch_script(const char *setup_script
, const char *ifname
, int fd
)
569 sigset_t oldmask
, mask
;
575 sigaddset(&mask
, SIGCHLD
);
576 sigprocmask(SIG_BLOCK
, &mask
, &oldmask
);
578 /* try to launch network script */
581 int open_max
= sysconf(_SC_OPEN_MAX
), i
;
583 for (i
= 0; i
< open_max
; i
++) {
584 if (i
!= STDIN_FILENO
&&
585 i
!= STDOUT_FILENO
&&
586 i
!= STDERR_FILENO
&&
592 *parg
++ = (char *)setup_script
;
593 *parg
++ = (char *)ifname
;
595 execv(setup_script
, args
);
597 } else if (pid
> 0) {
598 while (waitpid(pid
, &status
, 0) != pid
) {
601 sigprocmask(SIG_SETMASK
, &oldmask
, NULL
);
603 if (WIFEXITED(status
) && WEXITSTATUS(status
) == 0) {
607 fprintf(stderr
, "%s: could not launch network script\n", setup_script
);
611 static int net_tap_init(QemuOpts
*opts
, int *vnet_hdr
)
613 int fd
, vnet_hdr_required
;
614 char ifname
[128] = {0,};
615 const char *setup_script
;
617 if (qemu_opt_get(opts
, "ifname")) {
618 pstrcpy(ifname
, sizeof(ifname
), qemu_opt_get(opts
, "ifname"));
621 *vnet_hdr
= qemu_opt_get_bool(opts
, "vnet_hdr", 1);
622 if (qemu_opt_get(opts
, "vnet_hdr")) {
623 vnet_hdr_required
= *vnet_hdr
;
625 vnet_hdr_required
= 0;
628 TFR(fd
= tap_open(ifname
, sizeof(ifname
), vnet_hdr
, vnet_hdr_required
));
633 setup_script
= qemu_opt_get(opts
, "script");
635 setup_script
[0] != '\0' &&
636 strcmp(setup_script
, "no") != 0 &&
637 launch_script(setup_script
, ifname
, fd
)) {
642 qemu_opt_set(opts
, "ifname", ifname
);
647 int net_init_tap(QemuOpts
*opts
, Monitor
*mon
, const char *name
, VLANState
*vlan
)
652 if (qemu_opt_get(opts
, "fd")) {
653 if (qemu_opt_get(opts
, "ifname") ||
654 qemu_opt_get(opts
, "script") ||
655 qemu_opt_get(opts
, "downscript") ||
656 qemu_opt_get(opts
, "vnet_hdr")) {
657 qemu_error("ifname=, script=, downscript= and vnet_hdr= is invalid with fd=\n");
661 fd
= net_handle_fd_param(mon
, qemu_opt_get(opts
, "fd"));
666 fcntl(fd
, F_SETFL
, O_NONBLOCK
);
668 vnet_hdr
= tap_probe_vnet_hdr(fd
);
670 if (!qemu_opt_get(opts
, "script")) {
671 qemu_opt_set(opts
, "script", DEFAULT_NETWORK_SCRIPT
);
674 if (!qemu_opt_get(opts
, "downscript")) {
675 qemu_opt_set(opts
, "downscript", DEFAULT_NETWORK_DOWN_SCRIPT
);
678 fd
= net_tap_init(opts
, &vnet_hdr
);
681 s
= net_tap_fd_init(vlan
, "tap", name
, fd
, vnet_hdr
);
687 if (tap_set_sndbuf(s
, opts
) < 0) {
691 if (qemu_opt_get(opts
, "fd")) {
692 snprintf(s
->vc
->info_str
, sizeof(s
->vc
->info_str
), "fd=%d", fd
);
694 const char *ifname
, *script
, *downscript
;
696 ifname
= qemu_opt_get(opts
, "ifname");
697 script
= qemu_opt_get(opts
, "script");
698 downscript
= qemu_opt_get(opts
, "downscript");
700 snprintf(s
->vc
->info_str
, sizeof(s
->vc
->info_str
),
701 "ifname=%s,script=%s,downscript=%s",
702 ifname
, script
, downscript
);
704 if (strcmp(downscript
, "no") != 0) {
705 snprintf(s
->down_script
, sizeof(s
->down_script
), "%s", downscript
);
706 snprintf(s
->down_script_arg
, sizeof(s
->down_script_arg
), "%s", ifname
);
711 vlan
->nb_host_devs
++;
717 #endif /* !defined(_AIX) */