some bugfixes for slirpvde
[vde.git] / vde-2 / slirpvde / slirp.c
blobb08c1edf9440b2a3c7e1c0cbcdfb54a91cce8e36
1 #include <config.h>
2 #include <slirp.h>
4 /* host address */
5 struct in_addr our_addr;
6 /* host dns address */
7 struct in_addr dns_addr;
8 /* host loopback address */
9 struct in_addr loopback_addr;
11 /* address for slirp virtual addresses */
12 struct in_addr special_addr;
14 const uint8_t special_ethaddr[6] = {
15 0x52, 0x54, 0x00, 0x12, 0x35, 0x00
18 static uint8_t client_ethaddr[256][6];
20 int do_slowtimo;
21 int link_up;
22 struct timeval tt;
23 FILE *lfd;
25 /* XXX: suppress those select globals */
26 fd_set *global_readfds, *global_writefds, *global_xfds;
28 #ifdef _WIN32
30 static int get_dns_addr(struct in_addr *pdns_addr)
32 /* XXX: add it */
33 return -1;
36 #else
38 static int get_dns_addr(struct in_addr *pdns_addr)
40 char buff[512];
41 char buff2[256];
42 FILE *f;
43 int found = 0;
44 struct in_addr tmp_addr;
46 f = fopen("/etc/resolv.conf", "r");
47 if (!f)
48 return -1;
50 lprint("IP address of your DNS(s): ");
51 while (fgets(buff, 512, f) != NULL) {
52 if (sscanf(buff, "nameserver%*[ \t]%256s", buff2) == 1) {
53 if (!inet_aton(buff2, &tmp_addr))
54 continue;
55 if (tmp_addr.s_addr == loopback_addr.s_addr)
56 tmp_addr = our_addr;
57 /* If it's the first one, set it to dns_addr */
58 if (!found)
59 *pdns_addr = tmp_addr;
60 else
61 lprint(", ");
62 if (++found > 3) {
63 lprint("(more)");
64 break;
65 } else
66 lprint("%s", inet_ntoa(tmp_addr));
69 if (!found)
70 return -1;
71 return 0;
74 #endif
76 void slirp_init(char *network)
78 debug_init("/tmp/slirp.log", DEBUG_DEFAULT);
80 link_up = 1;
82 memset(client_ethaddr,0xff,sizeof(client_ethaddr));
84 if_init();
85 ip_init();
87 /* Initialise mbufs *after* setting the MTU */
88 m_init();
90 /* set default addresses */
91 getouraddr();
92 inet_aton("127.0.0.1", &loopback_addr);
94 if (get_dns_addr(&dns_addr) < 0) {
95 fprintf(stderr, "Could not get DNS address\n");
96 exit(1);
99 if (network==NULL)
100 inet_aton(CTL_SPECIAL, &special_addr);
101 else
102 inet_aton(network, &special_addr);
106 #define CONN_CANFSEND(so) (((so)->so_state & (SS_FCANTSENDMORE|SS_ISFCONNECTED)) == SS_ISFCONNECTED)
107 #define CONN_CANFRCV(so) (((so)->so_state & (SS_FCANTRCVMORE|SS_ISFCONNECTED)) == SS_ISFCONNECTED)
108 #define UPD_NFDS(x) if (nfds < (x)) nfds = (x)
111 * curtime kept to an accuracy of 1ms
113 static void updtime(void)
115 gettimeofday(&tt, 0);
117 curtime = (u_int)tt.tv_sec * (u_int)1000;
118 curtime += (u_int)tt.tv_usec / (u_int)1000;
120 if ((tt.tv_usec % 1000) >= 500)
121 curtime++;
124 void slirp_select_fill(int *pnfds,
125 fd_set *readfds, fd_set *writefds, fd_set *xfds)
127 struct socket *so, *so_next;
128 struct timeval timeout;
129 int nfds;
130 int tmp_time;
132 /* fail safe */
133 global_readfds = NULL;
134 global_writefds = NULL;
135 global_xfds = NULL;
137 nfds = *pnfds;
139 * First, TCP sockets
141 do_slowtimo = 0;
142 if (link_up) {
144 * *_slowtimo needs calling if there are IP fragments
145 * in the fragment queue, or there are TCP connections active
147 do_slowtimo = ((tcb.so_next != &tcb) ||
148 ((struct ipasfrag *)&ipq != (struct ipasfrag *)ipq.next));
150 for (so = tcb.so_next; so != &tcb; so = so_next) {
151 so_next = so->so_next;
154 * See if we need a tcp_fasttimo
156 if (time_fasttimo == 0 && so->so_tcpcb->t_flags & TF_DELACK)
157 time_fasttimo = curtime; /* Flag when we want a fasttimo */
160 * NOFDREF can include still connecting to local-host,
161 * newly socreated() sockets etc. Don't want to select these.
163 if (so->so_state & SS_NOFDREF || so->s == -1)
164 continue;
167 * Set for reading sockets which are accepting
169 if (so->so_state & SS_FACCEPTCONN) {
170 FD_SET(so->s, readfds);
171 UPD_NFDS(so->s);
172 continue;
176 * Set for writing sockets which are connecting
178 if (so->so_state & SS_ISFCONNECTING) {
179 FD_SET(so->s, writefds);
180 UPD_NFDS(so->s);
181 continue;
185 * Set for writing if we are connected, can send more, and
186 * we have something to send
188 if (CONN_CANFSEND(so) && so->so_rcv.sb_cc) {
189 FD_SET(so->s, writefds);
190 UPD_NFDS(so->s);
194 * Set for reading (and urgent data) if we are connected, can
195 * receive more, and we have room for it XXX /2 ?
197 if (CONN_CANFRCV(so) && (so->so_snd.sb_cc < (so->so_snd.sb_datalen/2))) {
198 FD_SET(so->s, readfds);
199 FD_SET(so->s, xfds);
200 UPD_NFDS(so->s);
205 * UDP sockets
207 for (so = udb.so_next; so != &udb; so = so_next) {
208 so_next = so->so_next;
211 * See if it's timed out
213 if (so->so_expire) {
214 if (so->so_expire <= curtime) {
215 udp_detach(so);
216 continue;
217 } else
218 do_slowtimo = 1; /* Let socket expire */
222 * When UDP packets are received from over the
223 * link, they're sendto()'d straight away, so
224 * no need for setting for writing
225 * Limit the number of packets queued by this session
226 * to 4. Note that even though we try and limit this
227 * to 4 packets, the session could have more queued
228 * if the packets needed to be fragmented
229 * (XXX <= 4 ?)
231 if ((so->so_state & SS_ISFCONNECTED) && so->so_queued <= 4) {
232 FD_SET(so->s, readfds);
233 UPD_NFDS(so->s);
239 * Setup timeout to use minimum CPU usage, especially when idle
243 * First, see the timeout needed by *timo
245 timeout.tv_sec = 0;
246 timeout.tv_usec = -1;
248 * If a slowtimo is needed, set timeout to 500ms from the last
249 * slow timeout. If a fast timeout is needed, set timeout within
250 * 200ms of when it was requested.
252 if (do_slowtimo) {
253 /* XXX + 10000 because some select()'s aren't that accurate */
254 timeout.tv_usec = ((500 - (curtime - last_slowtimo)) * 1000) + 10000;
255 if (timeout.tv_usec < 0)
256 timeout.tv_usec = 0;
257 else if (timeout.tv_usec > 510000)
258 timeout.tv_usec = 510000;
260 /* Can only fasttimo if we also slowtimo */
261 if (time_fasttimo) {
262 tmp_time = (200 - (curtime - time_fasttimo)) * 1000;
263 if (tmp_time < 0)
264 tmp_time = 0;
266 /* Choose the smallest of the 2 */
267 if (tmp_time < timeout.tv_usec)
268 timeout.tv_usec = (u_int)tmp_time;
271 *pnfds = nfds;
274 void slirp_select_poll(fd_set *readfds, fd_set *writefds, fd_set *xfds)
276 struct socket *so, *so_next;
277 int ret;
279 global_readfds = readfds;
280 global_writefds = writefds;
281 global_xfds = xfds;
283 /* Update time */
284 updtime();
287 * See if anything has timed out
289 if (link_up) {
290 if (time_fasttimo && ((curtime - time_fasttimo) >= 199)) {
291 tcp_fasttimo();
292 time_fasttimo = 0;
294 if (do_slowtimo && ((curtime - last_slowtimo) >= 499)) {
295 ip_slowtimo();
296 tcp_slowtimo();
297 last_slowtimo = curtime;
302 * Check sockets
304 if (link_up) {
306 * Check TCP sockets
308 for (so = tcb.so_next; so != &tcb; so = so_next) {
309 so_next = so->so_next;
312 * FD_ISSET is meaningless on these sockets
313 * (and they can crash the program)
315 if (so->so_state & SS_NOFDREF || so->s == -1)
316 continue;
319 * Check for URG data
320 * This will soread as well, so no need to
321 * test for readfds below if this succeeds
323 if (FD_ISSET(so->s, xfds))
324 sorecvoob(so);
326 * Check sockets for reading
328 else if (FD_ISSET(so->s, readfds)) {
330 * Check for incoming connections
332 if (so->so_state & SS_FACCEPTCONN) {
333 tcp_connect(so);
334 continue;
335 } /* else */
336 ret = soread(so);
338 /* Output it if we read something */
339 if (ret > 0)
340 tcp_output(sototcpcb(so));
344 * Check sockets for writing
346 if (FD_ISSET(so->s, writefds)) {
348 * Check for non-blocking, still-connecting sockets
350 if (so->so_state & SS_ISFCONNECTING) {
351 /* Connected */
352 so->so_state &= ~SS_ISFCONNECTING;
354 ret = write(so->s, &ret, 0);
355 if (ret < 0) {
356 /* XXXXX Must fix, zero bytes is a NOP */
357 if (errno == EAGAIN || errno == EWOULDBLOCK ||
358 errno == EINPROGRESS || errno == ENOTCONN)
359 continue;
361 /* else failed */
362 so->so_state = SS_NOFDREF;
364 /* else so->so_state &= ~SS_ISFCONNECTING; */
367 * Continue tcp_input
369 tcp_input((struct mbuf *)NULL, sizeof(struct ip), so);
370 /* continue; */
371 } else
372 ret = sowrite(so);
374 * XXXXX If we wrote something (a lot), there
375 * could be a need for a window update.
376 * In the worst case, the remote will send
377 * a window probe to get things going again
382 * Probe a still-connecting, non-blocking socket
383 * to check if it's still alive
385 #ifdef PROBE_CONN
386 if (so->so_state & SS_ISFCONNECTING) {
387 ret = read(so->s, (char *)&ret, 0);
389 if (ret < 0) {
390 /* XXX */
391 if (errno == EAGAIN || errno == EWOULDBLOCK ||
392 errno == EINPROGRESS || errno == ENOTCONN)
393 continue; /* Still connecting, continue */
395 /* else failed */
396 so->so_state = SS_NOFDREF;
398 /* tcp_input will take care of it */
399 } else {
400 ret = write(so->s, &ret, 0);
401 if (ret < 0) {
402 /* XXX */
403 if (errno == EAGAIN || errno == EWOULDBLOCK ||
404 errno == EINPROGRESS || errno == ENOTCONN)
405 continue;
406 /* else failed */
407 so->so_state = SS_NOFDREF;
408 } else
409 so->so_state &= ~SS_ISFCONNECTING;
412 tcp_input((struct mbuf *)NULL, sizeof(struct ip),so);
413 } /* SS_ISFCONNECTING */
414 #endif
418 * Now UDP sockets.
419 * Incoming packets are sent straight away, they're not buffered.
420 * Incoming UDP data isn't buffered either.
422 for (so = udb.so_next; so != &udb; so = so_next) {
423 so_next = so->so_next;
425 if (so->s != -1 && FD_ISSET(so->s, readfds)) {
426 sorecvfrom(so);
432 * See if we can start outputting
434 if (if_queued && link_up)
435 if_start();
438 #define ETH_ALEN 6
439 #define ETH_HLEN 14
441 #define ETH_P_IP 0x0800 /* Internet Protocol packet */
442 #define ETH_P_ARP 0x0806 /* Address Resolution packet */
444 #define ARPOP_REQUEST 1 /* ARP request */
445 #define ARPOP_REPLY 2 /* ARP reply */
447 struct ethhdr
449 unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
450 unsigned char h_source[ETH_ALEN]; /* source ether addr */
451 unsigned short h_proto; /* packet type ID field */
454 struct arphdr
456 unsigned short ar_hrd; /* format of hardware address */
457 unsigned short ar_pro; /* format of protocol address */
458 unsigned char ar_hln; /* length of hardware address */
459 unsigned char ar_pln; /* length of protocol address */
460 unsigned short ar_op; /* ARP opcode (command) */
463 * Ethernet looks like this : This bit is variable sized however...
465 unsigned char ar_sha[ETH_ALEN]; /* sender hardware address */
466 unsigned char ar_sip[4]; /* sender IP address */
467 unsigned char ar_tha[ETH_ALEN]; /* target hardware address */
468 unsigned char ar_tip[4]; /* target IP address */
471 struct ip_part_header
473 unsigned char filler[12];
474 unsigned char ip_sip[4];
475 unsigned char ip_tip[4];
478 void client_eth_register(const unsigned char *eth_addr, const unsigned char *ip_addr)
480 int host=ip_addr[3];
481 if (memcmp(ip_addr, &special_addr, 3) == 0 && host != 0 && host != 0xff)
483 memcpy(client_ethaddr[host],eth_addr,ETH_ALEN);
484 /*printf("register %02x:%02x:%02x:%02x:%02x:%02x %d.%d.%d.%d\n",
485 eth_addr[0], eth_addr[1], eth_addr[2],
486 eth_addr[3], eth_addr[4], eth_addr[5],
487 ip_addr[0], ip_addr[1], ip_addr[2], ip_addr[3]);*/
491 static void client_eth_get(unsigned char *eth_addr, const unsigned char *ip_addr)
493 int host=ip_addr[3];
494 if (memcmp(ip_addr, &special_addr, 3) == 0 && host != 0 && host != 0xff)
496 memcpy(eth_addr,client_ethaddr[host],ETH_ALEN);
497 /*printf("get %02x:%02x:%02x:%02x:%02x:%02x %d.%d.%d.%d\n",
498 eth_addr[0], eth_addr[1], eth_addr[2],
499 eth_addr[3], eth_addr[4], eth_addr[5],
500 ip_addr[0], ip_addr[1], ip_addr[2], ip_addr[3]);*/
504 static void client_eth_register_ip(const uint8_t *pkt, int pkt_len)
506 struct ethhdr *eh = (struct ethhdr *)pkt;
507 struct ip_part_header *ih=(struct ip_part_header *) (pkt+ETH_HLEN);
508 if (pkt_len >= 20) {
509 client_eth_register(eh->h_source,ih->ip_sip);
513 static void client_eth_get_ip(unsigned char *eth_addr, const uint8_t *pkt, int pkt_len)
515 struct ip_part_header *ih=(struct ip_part_header *) pkt;
516 if (pkt_len >= 20)
517 client_eth_get(eth_addr,ih->ip_tip);
520 void arp_input(const uint8_t *pkt, int pkt_len)
522 struct ethhdr *eh = (struct ethhdr *)pkt;
523 struct arphdr *ah = (struct arphdr *)(pkt + ETH_HLEN);
524 uint8_t arp_reply[ETH_HLEN + sizeof(struct arphdr)];
525 struct ethhdr *reh = (struct ethhdr *)arp_reply;
526 struct arphdr *rah = (struct arphdr *)(arp_reply + ETH_HLEN);
527 int ar_op;
529 ar_op = ntohs(ah->ar_op);
530 switch(ar_op) {
531 case ARPOP_REQUEST:
532 if (!memcmp(ah->ar_tip, &special_addr, 3) &&
533 (ah->ar_tip[3] == CTL_DNS || ah->ar_tip[3] == CTL_ALIAS)) {
535 /* make an ARP request to have the client address */
536 client_eth_register(ah->ar_sha, ah->ar_sip);
538 /* ARP request for alias/dns mac address */
539 memcpy(reh->h_dest, pkt + ETH_ALEN, ETH_ALEN);
540 memcpy(reh->h_source, special_ethaddr, ETH_ALEN - 1);
541 reh->h_source[5] = ah->ar_tip[3];
542 reh->h_proto = htons(ETH_P_ARP);
544 rah->ar_hrd = htons(1);
545 rah->ar_pro = htons(ETH_P_IP);
546 rah->ar_hln = ETH_ALEN;
547 rah->ar_pln = 4;
548 rah->ar_op = htons(ARPOP_REPLY);
549 memcpy(rah->ar_sha, reh->h_source, ETH_ALEN);
550 memcpy(rah->ar_sip, ah->ar_tip, 4);
551 memcpy(rah->ar_tha, ah->ar_sha, ETH_ALEN);
552 memcpy(rah->ar_tip, ah->ar_sip, 4);
554 slirp_output(arp_reply, sizeof(arp_reply));
556 break;
557 default:
558 break;
562 void slirp_input(const uint8_t *pkt, int pkt_len)
564 struct mbuf *m;
565 int proto;
567 if (pkt_len < ETH_HLEN)
568 return;
570 proto = ntohs(*(uint16_t *)(pkt + 12));
571 switch(proto) {
572 case ETH_P_ARP:
573 arp_input(pkt, pkt_len);
574 break;
575 case ETH_P_IP:
576 m = m_get();
577 if (!m)
578 return;
579 m->m_len = pkt_len;
580 memcpy(m->m_data, pkt, pkt_len);
582 client_eth_register_ip(m->m_data, m->m_len);
583 m->m_data += ETH_HLEN;
584 m->m_len -= ETH_HLEN;
586 ip_input(m);
587 break;
588 default:
589 break;
593 /* output the IP packet to the ethernet device */
594 void if_encap(const uint8_t *ip_data, int ip_data_len)
596 uint8_t buf[1600];
597 struct ethhdr *eh = (struct ethhdr *)buf;
599 if (ip_data_len + ETH_HLEN > sizeof(buf))
600 return;
602 client_eth_get_ip(eh->h_dest, ip_data, ip_data_len);
603 memcpy(eh->h_source, special_ethaddr, ETH_ALEN - 1);
604 eh->h_source[5] = CTL_ALIAS;
605 eh->h_proto = htons(ETH_P_IP);
606 memcpy(buf + sizeof(struct ethhdr), ip_data, ip_data_len);
607 slirp_output(buf, ip_data_len + ETH_HLEN);