pc64: An mfence is supposed to suffice for TSC_DEADLINE vs. xAPIC ordering.
[dragonfly.git] / usr.sbin / vknetd / vknetd.c
blob586c1376e41d3e372aedce061d3e3ac0dd0d9dd6
1 /*
2 * Copyright (c) 2008 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
35 * vknet [-cdU] [-b bridgeN] [-p socket_path] [-t tapN] [address/cidrbits]
37 * Create a named unix-domain socket which userland vkernels can open
38 * to gain access to a local network. All connections to the socket
39 * are bridged together and the local network can also be bridged onto
40 * a TAP interface by specifying the -t option.
42 #include "vknetd.h"
44 static ioinfo_t vknet_tap(const char *tapName, const char *bridgeName);
45 static int vknet_listener(const char *pathName);
46 static void vknet_acceptor(int net_fd);
47 static void *vknet_io(void *arg);
48 static int vknet_connect(const char *pathName);
49 static void vknet_monitor(int net_fd);
50 static void usage(void);
51 static void writepid(void);
52 static void cleanup(int);
54 pthread_mutex_t BridgeMutex;
56 int SecureOpt = 1;
57 int DebugOpt = 0;
58 int SetAddrOpt = 0;
59 const char *pidfile = "/var/run/vknetd.pid";
61 struct in_addr NetAddress;
62 struct in_addr NetMask;
64 int
65 main(int ac, char **av)
67 const char *pathName = "/var/run/vknet";
68 const char *tapName = "auto";
69 const char *bridgeName = NULL;
70 int net_fd;
71 int connectOpt = 0;
72 int c;
73 ioinfo_t tap_info;
74 pthread_t dummy_td;
76 while ((c = getopt(ac, av, "b:cdp:i:t:U")) != -1) {
77 switch (c) {
78 case 'U':
79 SecureOpt = 0;
80 break;
81 case 'b':
82 bridgeName = optarg;
83 break;
84 case 'd':
85 DebugOpt = 1;
86 break;
87 case 'p':
88 pathName = optarg;
89 break;
90 case 'i':
91 pidfile = optarg;
92 break;
93 case 't':
94 tapName = optarg;
95 break;
96 case 'c':
97 connectOpt = 1;
98 break;
99 default:
100 usage();
103 av += optind;
104 ac -= optind;
105 if (ac)
106 SetAddrOpt = 1;
109 * Ignore SIGPIPE to prevent write() races against disconnecting
110 * clients from killing vknetd. Should be inherited by all I/O
111 * threads.
113 signal(SIGPIPE, SIG_IGN);
116 * Special connect/debug mode
118 if (connectOpt) {
119 net_fd = vknet_connect(pathName);
120 if (net_fd < 0) {
121 perror("connect");
122 exit(1);
124 vknet_monitor(net_fd);
125 exit(0);
129 * In secure mode (the default), a network address/mask must be
130 * specified. e.g. 10.1.0.0/16. Any traffic going out the TAP
131 * interface will be filtered.
133 * If non-secure mode the network address/mask is optional.
135 if (SecureOpt || SetAddrOpt) {
136 char *str;
137 int masklen;
138 u_int32_t mask;
140 if (ac == 0 || strchr(av[0], '/') == NULL)
141 usage();
142 str = strdup(av[0]);
143 if (inet_pton(AF_INET, strtok(str, "/"), &NetAddress) <= 0)
144 usage();
145 masklen = strtoul(strtok(NULL, "/"), NULL, 10);
146 mask = (1 << (32 - masklen)) - 1;
147 NetMask.s_addr = htonl(~mask);
151 * Normal operation, create the tap/bridge and listener. This
152 * part is not threaded.
154 mac_init();
156 if ((tap_info = vknet_tap(tapName, bridgeName)) == NULL) {
157 perror("tap: ");
158 exit(1);
160 if ((net_fd = vknet_listener(pathName)) < 0) {
161 perror("listener: ");
162 exit(1);
166 * Now make us a demon and start the threads going.
168 if (DebugOpt == 0)
169 daemon(1, 0);
171 writepid();
173 signal(SIGINT, cleanup);
174 signal(SIGHUP, cleanup);
175 signal(SIGTERM, cleanup);
177 pthread_mutex_init(&BridgeMutex, NULL);
178 pthread_create(&dummy_td, NULL, vknet_io, tap_info);
179 vknet_acceptor(net_fd);
181 exit(0);
184 #define TAPDEV_MINOR(x) ((int)((x) & 0xffff00ff))
186 static ioinfo_t
187 vknet_tap(const char *tapName, const char *bridgeName)
189 struct ifreq ifr;
190 struct ifaliasreq ifra;
191 struct stat st;
192 char *buf = NULL;
193 int tap_fd;
194 int tap_unit;
195 int i;
196 int s;
197 int flags;
198 ioinfo_t info;
200 if (strcmp(tapName, "auto") == 0) {
201 for (i = 0; ; ++i) {
202 asprintf(&buf, "/dev/tap%d", i);
203 tap_fd = open(buf, O_RDWR | O_NONBLOCK);
204 free(buf);
205 if (tap_fd >= 0 || errno == ENOENT)
206 break;
208 } else if (strncmp(tapName, "tap", 3) == 0) {
209 asprintf(&buf, "/dev/%s", tapName);
210 tap_fd = open(buf, O_RDWR | O_NONBLOCK);
211 free(buf);
212 } else {
213 tap_fd = open(tapName, O_RDWR | O_NONBLOCK);
215 if (tap_fd < 0)
216 return(NULL);
219 * Figure out the tap unit number
221 if (fstat(tap_fd, &st) < 0) {
222 close(tap_fd);
223 return(NULL);
225 tap_unit = TAPDEV_MINOR(st.st_rdev);
228 * Setup for ioctls
230 fcntl(tap_fd, F_SETFL, 0);
231 bzero(&ifr, sizeof(ifr));
232 bzero(&ifra, sizeof(ifra));
233 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "tap%d", tap_unit);
234 snprintf(ifra.ifra_name, sizeof(ifra.ifra_name), "tap%d", tap_unit);
236 s = socket(AF_INET, SOCK_DGRAM, 0);
239 * Set the interface address if in Secure mode.
241 if (SetAddrOpt) {
242 struct sockaddr_in *in;
244 in = (void *)&ifra.ifra_addr;
245 in->sin_family = AF_INET;
246 in->sin_len = sizeof(ifra.ifra_addr);
247 in->sin_addr = NetAddress;
248 in = (void *)&ifra.ifra_mask;
249 in->sin_family = AF_INET;
250 in->sin_len = sizeof(ifra.ifra_mask);
251 in->sin_addr = NetMask;
252 if (ioctl(s, SIOCAIFADDR, &ifra) < 0) {
253 perror("Unable to set address on tap interface");
254 exit(1);
259 * Turn up the interface
261 flags = IFF_UP;
262 if (ioctl(s, SIOCGIFFLAGS, &ifr) >= 0) {
263 bzero(&ifr, sizeof(ifr));
264 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "tap%d", tap_unit);
265 ifr.ifr_flags |= flags & 0xFFFF;
266 ifr.ifr_flagshigh |= flags >> 16;
267 if (ioctl(s, SIOCSIFFLAGS, &ifr) < 0) {
268 perror("Unable to set IFF_UP on tap interface");
269 exit(1);
273 if (bridgeName) {
274 struct ifbreq ifbr;
275 struct ifdrv ifd;
278 * Create the bridge if necessary.
280 bzero(&ifr, sizeof(ifr));
281 snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s", bridgeName);
282 if (ioctl(s, SIOCIFCREATE, &ifr) < 0) {
283 if (errno != EEXIST) {
284 perror("Unable to create bridge interface");
285 exit(1);
291 * Add the tap interface to the bridge
293 bzero(&ifbr, sizeof(ifbr));
294 snprintf(ifbr.ifbr_ifsname, sizeof(ifbr.ifbr_ifsname),
295 "tap%d", tap_unit);
297 bzero(&ifd, sizeof(ifd));
298 snprintf(ifd.ifd_name, sizeof(ifd.ifd_name), "%s", bridgeName);
299 ifd.ifd_cmd = BRDGADD;
300 ifd.ifd_len = sizeof(ifbr);
301 ifd.ifd_data = &ifbr;
303 if (ioctl(s, SIOCSDRVSPEC, &ifd) < 0) {
304 if (errno != EEXIST) {
305 perror("Unable to add tap ifc to bridge!");
306 exit(1);
311 close(s);
312 info = malloc(sizeof(*info));
313 bzero(info, sizeof(*info));
314 info->fd = tap_fd;
315 info->istap = 1;
316 return(info);
319 #undef TAPDEV_MINOR
321 static int
322 vknet_listener(const char *pathName)
324 struct sockaddr_un sunx;
325 int net_fd;
326 int len;
327 gid_t gid;
328 struct group *grp;
331 * Group access to our named unix domain socket.
333 if ((grp = getgrnam("vknet")) == NULL) {
334 fprintf(stderr, "The 'vknet' group must exist\n");
335 exit(1);
337 gid = grp->gr_gid;
338 endgrent();
341 * Socket setup
343 snprintf(sunx.sun_path, sizeof(sunx.sun_path), "%s", pathName);
344 len = offsetof(struct sockaddr_un, sun_path[strlen(sunx.sun_path)]);
345 ++len; /* include nul */
346 sunx.sun_family = AF_UNIX;
347 sunx.sun_len = len;
349 net_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
350 if (net_fd < 0)
351 return(-1);
352 remove(pathName);
353 if (bind(net_fd, (void *)&sunx, len) < 0) {
354 close(net_fd);
355 return(-1);
357 if (listen(net_fd, 1024) < 0) {
358 close(net_fd);
359 return(-1);
361 if (chown(pathName, (uid_t)-1, gid) < 0) {
362 close(net_fd);
363 return(-1);
365 if (chmod(pathName, 0660) < 0) {
366 close(net_fd);
367 return(-1);
369 return(net_fd);
372 static
373 void
374 vknet_acceptor(int net_fd)
376 struct sockaddr_un sunx;
377 pthread_t dummy_td;
378 int sunx_len;
379 int rfd;
380 ioinfo_t info;
382 for (;;) {
383 sunx_len = sizeof(sunx);
384 rfd = accept(net_fd, (void *)&sunx, &sunx_len);
385 if (rfd < 0)
386 break;
387 info = malloc(sizeof(*info));
388 bzero(info, sizeof(*info));
389 info->fd = rfd;
390 info->istap = 0;
391 pthread_create(&dummy_td, NULL, vknet_io, info);
396 * This I/O thread implements the core of the bridging code.
398 static
399 void *
400 vknet_io(void *arg)
402 ioinfo_t info = arg;
403 bridge_t bridge;
404 u_int8_t *pkt;
405 int bytes;
407 pthread_detach(pthread_self());
410 * Assign as a bridge slot using our thread id.
412 pthread_mutex_lock(&BridgeMutex);
413 bridge = bridge_add(info);
414 pthread_mutex_unlock(&BridgeMutex);
417 * Read packet loop. Writing is handled by the bridge code.
419 pkt = malloc(MAXPKT);
420 while ((bytes = read(info->fd, pkt, MAXPKT)) > 0) {
421 pthread_mutex_lock(&BridgeMutex);
422 bridge_packet(bridge, pkt, bytes);
423 pthread_mutex_unlock(&BridgeMutex);
427 * Cleanup
429 pthread_mutex_lock(&BridgeMutex);
430 bridge_del(bridge);
431 pthread_mutex_unlock(&BridgeMutex);
433 close(info->fd);
434 free(pkt);
435 pthread_exit(NULL);
439 * Debugging
441 static int
442 vknet_connect(const char *pathName)
444 struct sockaddr_un sunx;
445 int len;
446 int net_fd;
448 snprintf(sunx.sun_path, sizeof(sunx.sun_path), "%s", pathName);
449 len = offsetof(struct sockaddr_un, sun_path[strlen(sunx.sun_path)]);
450 ++len; /* include nul */
451 sunx.sun_family = AF_UNIX;
452 sunx.sun_len = len;
454 net_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
455 if (net_fd < 0)
456 return(-1);
457 if (connect(net_fd, (void *)&sunx, len) < 0) {
458 close(net_fd);
459 return(-1);
461 return(net_fd);
464 static void
465 vknet_monitor(int net_fd)
467 u_int8_t *pkt;
468 int bytes;
469 int i;
471 pkt = malloc(MAXPKT);
472 while ((bytes = read(net_fd, pkt, MAXPKT)) > 0) {
473 printf("%02x:%02x:%02x:%02x:%02x:%02x <- "
474 "%02x:%02x:%02x:%02x:%02x:%02x",
475 pkt[0], pkt[1], pkt[2], pkt[3], pkt[4], pkt[5],
476 pkt[6], pkt[7], pkt[8], pkt[9], pkt[10], pkt[11]);
477 for (i = 12; i < bytes; ++i) {
478 if (((i - 12) & 15) == 0) {
479 printf("\n\t");
481 printf(" %02x", pkt[i]);
483 printf("\n");
485 free(pkt);
489 * Misc
491 static void
492 writepid(void)
494 FILE *pf;
496 if ((pf = fopen(pidfile, "w+")) == NULL)
497 errx(1, "Failed to create pidfile %s", pidfile);
499 if ((fprintf(pf, "%d\n", getpid())) < 1)
500 err(1, "fprintf");
502 fclose(pf);
505 static void
506 cleanup(int __unused sig)
508 if (pidfile)
509 unlink(pidfile);
512 static
513 void
514 usage(void)
516 fprintf(stderr, "usage: vknet [-cdU] [-b bridgeN] [-p socket_path] [-i pidfile] [-t tapN] [address/cidrbits]\n");
517 fprintf(stderr, "address/cidrbits must be specified in default secure mode.\n");
518 exit(1);