added some comments in wirefilter
[vde.git] / vde-2 / slirpvde / socket.c
blob084a0c3a88e6189b07c57e6c58f4c3a4a91d5e5e
1 /*
2 * Copyright (c) 1995 Danny Gasparovski.
3 *
4 * Please read the file COPYRIGHT for the
5 * terms and conditions of the copyright.
6 */
8 #define WANT_SYS_IOCTL_H
9 #include <config.h>
10 #include <slirp.h>
11 #include <ip_icmp.h>
12 #include <main.h>
14 void
15 so_init()
17 /* Nothing yet */
21 struct socket *
22 solookup(head, laddr, lport, faddr, fport)
23 struct socket *head;
24 struct in_addr laddr;
25 u_int lport;
26 struct in_addr faddr;
27 u_int fport;
29 struct socket *so;
31 for (so = head->so_next; so != head; so = so->so_next) {
32 if (so->so_lport == lport &&
33 so->so_laddr.s_addr == laddr.s_addr &&
34 so->so_faddr.s_addr == faddr.s_addr &&
35 so->so_fport == fport)
36 break;
39 if (so == head)
40 return (struct socket *)NULL;
41 return so;
46 * Create a new socket, initialise the fields
47 * It is the responsibility of the caller to
48 * insque() it into the correct linked-list
50 struct socket *
51 socreate()
53 struct socket *so;
55 so = (struct socket *)malloc(sizeof(struct socket));
56 if(so) {
57 memset(so, 0, sizeof(struct socket));
58 so->so_state = SS_NOFDREF;
59 so->s = -1;
61 return(so);
65 * remque and free a socket, clobber cache
67 void
68 sofree(so)
69 struct socket *so;
71 if (so->so_emu==EMU_RSH && so->extra) {
72 sofree(so->extra);
73 so->extra=NULL;
75 if (so == tcp_last_so)
76 tcp_last_so = &tcb;
77 else if (so == udp_last_so)
78 udp_last_so = &udb;
80 m_free(so->so_m);
82 if(so->so_next && so->so_prev)
83 remque(so); /* crashes if so is not in a queue */
85 free(so);
89 * Read from so's socket into sb_snd, updating all relevant sbuf fields
90 * NOTE: This will only be called if it is select()ed for reading, so
91 * a read() of 0 (or less) means it's disconnected
93 int
94 soread(so)
95 struct socket *so;
97 int n, nn, lss, total;
98 struct sbuf *sb = &so->so_snd;
99 int len = sb->sb_datalen - sb->sb_cc;
100 struct iovec iov[2];
101 int mss = so->so_tcpcb->t_maxseg;
103 DEBUG_CALL("soread");
104 DEBUG_ARG("so = %lx", (long )so);
107 * No need to check if there's enough room to read.
108 * soread wouldn't have been called if there weren't
111 len = sb->sb_datalen - sb->sb_cc;
113 iov[0].iov_base = sb->sb_wptr;
114 if (sb->sb_wptr < sb->sb_rptr) {
115 iov[0].iov_len = sb->sb_rptr - sb->sb_wptr;
116 /* Should never succeed, but... */
117 if (iov[0].iov_len > len)
118 iov[0].iov_len = len;
119 if (iov[0].iov_len > mss)
120 iov[0].iov_len -= iov[0].iov_len%mss;
121 n = 1;
122 } else {
123 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_wptr;
124 /* Should never succeed, but... */
125 if (iov[0].iov_len > len) iov[0].iov_len = len;
126 len -= iov[0].iov_len;
127 if (len) {
128 iov[1].iov_base = sb->sb_data;
129 iov[1].iov_len = sb->sb_rptr - sb->sb_data;
130 if(iov[1].iov_len > len)
131 iov[1].iov_len = len;
132 total = iov[0].iov_len + iov[1].iov_len;
133 if (total > mss) {
134 lss = total%mss;
135 if (iov[1].iov_len > lss) {
136 iov[1].iov_len -= lss;
137 n = 2;
138 } else {
139 lss -= iov[1].iov_len;
140 iov[0].iov_len -= lss;
141 n = 1;
143 } else
144 n = 2;
145 } else {
146 if (iov[0].iov_len > mss)
147 iov[0].iov_len -= iov[0].iov_len%mss;
148 n = 1;
152 #ifdef HAVE_READV
153 nn = readv(so->s, (struct iovec *)iov, n);
154 DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
155 #else
156 nn = read(so->s, iov[0].iov_base, iov[0].iov_len);
157 #endif
158 if (nn <= 0) {
159 if (nn < 0 && (errno == EINTR || errno == EAGAIN))
160 return 0;
161 else {
162 DEBUG_MISC((dfd, " --- soread() disconnected, nn = %d, errno = %d-%s\n", nn, errno,strerror(errno)));
163 sofcantrcvmore(so);
164 tcp_sockclosed(sototcpcb(so));
165 return -1;
169 #ifndef HAVE_READV
171 * If there was no error, try and read the second time round
172 * We read again if n = 2 (ie, there's another part of the buffer)
173 * and we read as much as we could in the first read
174 * We don't test for <= 0 this time, because there legitimately
175 * might not be any more data (since the socket is non-blocking),
176 * a close will be detected on next iteration.
177 * A return of -1 wont (shouldn't) happen, since it didn't happen above
179 if (n == 2 && nn == iov[0].iov_len)
180 nn += read(so->s, iov[1].iov_base, iov[1].iov_len);
182 DEBUG_MISC((dfd, " ... read nn = %d bytes\n", nn));
183 #endif
185 /* Update fields */
186 sb->sb_cc += nn;
187 sb->sb_wptr += nn;
188 if (sb->sb_wptr >= (sb->sb_data + sb->sb_datalen))
189 sb->sb_wptr -= sb->sb_datalen;
190 return nn;
194 * Get urgent data
196 * When the socket is created, we set it SO_OOBINLINE,
197 * so when OOB data arrives, we soread() it and everything
198 * in the send buffer is sent as urgent data
200 void
201 sorecvoob(so)
202 struct socket *so;
204 struct tcpcb *tp = sototcpcb(so);
206 DEBUG_CALL("sorecvoob");
207 DEBUG_ARG("so = %lx", (long)so);
210 * We take a guess at how much urgent data has arrived.
211 * In most situations, when urgent data arrives, the next
212 * read() should get all the urgent data. This guess will
213 * be wrong however if more data arrives just after the
214 * urgent data, or the read() doesn't return all the
215 * urgent data.
217 soread(so);
218 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
219 tp->t_force = 1;
220 tcp_output(tp);
221 tp->t_force = 0;
225 * Send urgent data
226 * There's a lot duplicated code here, but...
229 sosendoob(so)
230 struct socket *so;
232 struct sbuf *sb = &so->so_rcv;
233 char buff[2048]; /* XXX Shouldn't be sending more oob data than this */
235 int n, len;
237 DEBUG_CALL("sosendoob");
238 DEBUG_ARG("so = %lx", (long)so);
239 DEBUG_ARG("sb->sb_cc = %d", sb->sb_cc);
241 if (so->so_urgc > 2048)
242 so->so_urgc = 2048; /* XXXX */
244 if (sb->sb_rptr < sb->sb_wptr) {
245 /* We can send it directly */
246 n = send(so->s, sb->sb_rptr, so->so_urgc, (MSG_OOB)); /* |MSG_DONTWAIT)); */
247 so->so_urgc -= n;
249 DEBUG_MISC((dfd, " --- sent %d bytes urgent data, %d urgent bytes left\n", n, so->so_urgc));
250 } else {
252 * Since there's no sendv or sendtov like writev,
253 * we must copy all data to a linear buffer then
254 * send it all
256 len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
257 if (len > so->so_urgc) len = so->so_urgc;
258 memcpy(buff, sb->sb_rptr, len);
259 so->so_urgc -= len;
260 if (so->so_urgc) {
261 n = sb->sb_wptr - sb->sb_data;
262 if (n > so->so_urgc) n = so->so_urgc;
263 memcpy((buff + len), sb->sb_data, n);
264 so->so_urgc -= n;
265 len += n;
267 n = send(so->s, buff, len, (MSG_OOB)); /* |MSG_DONTWAIT)); */
268 #ifdef DEBUG
269 if (n != len)
270 DEBUG_ERROR((dfd, "Didn't send all data urgently XXXXX\n"));
271 #endif
272 DEBUG_MISC((dfd, " ---2 sent %d bytes urgent data, %d urgent bytes left\n", n, so->so_urgc));
275 sb->sb_cc -= n;
276 sb->sb_rptr += n;
277 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
278 sb->sb_rptr -= sb->sb_datalen;
280 return n;
284 * Write data from so_rcv to so's socket,
285 * updating all sbuf field as necessary
288 sowrite(so)
289 struct socket *so;
291 int n,nn;
292 struct sbuf *sb = &so->so_rcv;
293 int len = sb->sb_cc;
294 struct iovec iov[2];
296 DEBUG_CALL("sowrite");
297 DEBUG_ARG("so = %lx", (long)so);
299 if (so->so_urgc) {
300 sosendoob(so);
301 if (sb->sb_cc == 0)
302 return 0;
306 * No need to check if there's something to write,
307 * sowrite wouldn't have been called otherwise
310 len = sb->sb_cc;
312 iov[0].iov_base = sb->sb_rptr;
313 if (sb->sb_rptr < sb->sb_wptr) {
314 iov[0].iov_len = sb->sb_wptr - sb->sb_rptr;
315 /* Should never succeed, but... */
316 if (iov[0].iov_len > len) iov[0].iov_len = len;
317 n = 1;
318 } else {
319 iov[0].iov_len = (sb->sb_data + sb->sb_datalen) - sb->sb_rptr;
320 if (iov[0].iov_len > len) iov[0].iov_len = len;
321 len -= iov[0].iov_len;
322 if (len) {
323 iov[1].iov_base = sb->sb_data;
324 iov[1].iov_len = sb->sb_wptr - sb->sb_data;
325 if (iov[1].iov_len > len) iov[1].iov_len = len;
326 n = 2;
327 } else
328 n = 1;
330 /* Check if there's urgent data to send, and if so, send it */
332 #ifdef HAVE_READV
333 nn = writev(so->s, (const struct iovec *)iov, n);
335 DEBUG_MISC((dfd, " ... wrote nn = %d bytes\n", nn));
336 #else
337 nn = write(so->s, iov[0].iov_base, iov[0].iov_len);
338 #endif
339 /* This should never happen, but people tell me it does *shrug* */
340 if (nn < 0 && (errno == EAGAIN || errno == EINTR))
341 return 0;
343 if (nn <= 0) {
344 DEBUG_MISC((dfd, " --- sowrite disconnected, so->so_state = %x, errno = %d\n",
345 so->so_state, errno));
346 sofcantsendmore(so);
347 tcp_sockclosed(sototcpcb(so));
348 return -1;
351 #ifndef HAVE_READV
352 if (n == 2 && nn == iov[0].iov_len)
353 nn += write(so->s, iov[1].iov_base, iov[1].iov_len);
354 DEBUG_MISC((dfd, " ... wrote nn = %d bytes\n", nn));
355 #endif
357 /* Update sbuf */
358 sb->sb_cc -= nn;
359 sb->sb_rptr += nn;
360 if (sb->sb_rptr >= (sb->sb_data + sb->sb_datalen))
361 sb->sb_rptr -= sb->sb_datalen;
364 * If in DRAIN mode, and there's no more data, set
365 * it CANTSENDMORE
367 if ((so->so_state & SS_FWDRAIN) && sb->sb_cc == 0)
368 sofcantsendmore(so);
370 return nn;
374 * recvfrom() a UDP socket
376 void
377 sorecvfrom(so)
378 struct socket *so;
380 struct sockaddr_in addr;
381 int addrlen = sizeof(struct sockaddr_in);
383 DEBUG_CALL("sorecvfrom");
384 DEBUG_ARG("so = %lx", (long)so);
386 if (so->so_type == IPPROTO_ICMP) { /* This is a "ping" reply */
387 char buff[256];
388 int len;
390 len = recvfrom(so->s, buff, 256, 0,
391 (struct sockaddr *)&addr, &addrlen);
392 /* XXX Check if reply is "correct"? */
394 if(len == -1 || len == 0) {
395 u_char code=ICMP_UNREACH_PORT;
397 if(errno == EHOSTUNREACH) code=ICMP_UNREACH_HOST;
398 else if(errno == ENETUNREACH) code=ICMP_UNREACH_NET;
400 DEBUG_MISC((dfd," udp icmp rx errno = %d-%s\n",
401 errno,strerror(errno)));
402 icmp_error(so->so_m, ICMP_UNREACH,code, 0,strerror(errno));
403 } else {
404 icmp_reflect(so->so_m);
405 so->so_m = 0; /* Don't m_free() it again! */
407 /* No need for this socket anymore, udp_detach it */
408 udp_detach(so);
409 } else { /* A "normal" UDP packet */
410 struct mbuf *m;
411 int len, n;
413 if (!(m = m_get())) return;
414 m->m_data += if_maxlinkhdr;
417 * XXX Shouldn't FIONREAD packets destined for port 53,
418 * but I don't know the max packet size for DNS lookups
420 len = M_FREEROOM(m);
421 /* if (so->so_fport != htons(53)) { */
422 ioctl(so->s, FIONREAD, &n);
424 if (n > len) {
425 n = (m->m_data - m->m_dat) + m->m_len + n + 1;
426 m_inc(m, n);
427 len = M_FREEROOM(m);
429 /* } */
431 m->m_len = recvfrom(so->s, m->m_data, len, 0,
432 (struct sockaddr *)&addr, &addrlen);
433 DEBUG_MISC((dfd, " did recvfrom %d, errno = %d-%s\n",
434 m->m_len, errno,strerror(errno)));
435 if(m->m_len<0) {
436 u_char code=ICMP_UNREACH_PORT;
438 if(errno == EHOSTUNREACH) code=ICMP_UNREACH_HOST;
439 else if(errno == ENETUNREACH) code=ICMP_UNREACH_NET;
441 DEBUG_MISC((dfd," rx error, tx icmp ICMP_UNREACH:%i\n", code));
442 icmp_error(so->so_m, ICMP_UNREACH,code, 0,strerror(errno));
443 m_free(m);
444 } else {
446 * Hack: domain name lookup will be used the most for UDP,
447 * and since they'll only be used once there's no need
448 * for the 4 minute (or whatever) timeout... So we time them
449 * out much quicker (10 seconds for now...)
451 if (so->so_expire) {
452 if (so->so_fport == htons(53))
453 so->so_expire = curtime + SO_EXPIREFAST;
454 else
455 so->so_expire = curtime + SO_EXPIRE;
458 /* if (m->m_len == len) {
459 * m_inc(m, MINCSIZE);
460 * m->m_len = 0;
465 * If this packet was destined for CTL_ADDR,
466 * make it look like that's where it came from, done by udp_output
468 udp_output(so, m, &addr);
469 } /* rx error */
470 } /* if ping packet */
474 * sendto() a socket
477 sosendto(so, m)
478 struct socket *so;
479 struct mbuf *m;
481 int ret;
482 struct sockaddr_in addr;
484 DEBUG_CALL("sosendto");
485 DEBUG_ARG("so = %lx", (long)so);
486 DEBUG_ARG("m = %lx", (long)m);
488 addr.sin_family = AF_INET;
489 if ((so->so_faddr.s_addr & htonl(0xffffff00)) == special_addr.s_addr) {
490 /* It's an alias */
491 switch(ntohl(so->so_faddr.s_addr) & 0xff) {
492 case CTL_DNS:
493 addr.sin_addr = dns_addr;
494 break;
495 case CTL_ALIAS:
496 default:
497 addr.sin_addr = loopback_addr;
498 break;
500 } else
501 addr.sin_addr = so->so_faddr;
502 addr.sin_port = so->so_fport;
504 DEBUG_MISC((dfd, " sendto()ing, addr.sin_port=%d, addr.sin_addr.s_addr=%.16s\n", ntohs(addr.sin_port), inet_ntoa(addr.sin_addr)));
506 /* Don't care what port we get */
507 ret = sendto(so->s, m->m_data, m->m_len, 0,
508 (struct sockaddr *)&addr, sizeof (struct sockaddr));
509 if (ret < 0)
510 return -1;
513 * Kill the socket if there's no reply in 4 minutes,
514 * but only if it's an expirable socket
516 if (so->so_expire)
517 so->so_expire = curtime + SO_EXPIRE;
518 so->so_state = SS_ISFCONNECTED; /* So that it gets select()ed */
519 return 0;
523 * XXX This should really be tcp_listen
525 struct socket *
526 solisten(port, laddr, lport, flags)
527 u_int port;
528 u_int32_t laddr;
529 u_int lport;
530 int flags;
532 struct sockaddr_in addr;
533 struct socket *so;
534 int s, addrlen = sizeof(addr), opt = 1;
536 DEBUG_CALL("solisten");
537 DEBUG_ARG("port = %d", port);
538 DEBUG_ARG("laddr = %x", laddr);
539 DEBUG_ARG("lport = %d", lport);
540 DEBUG_ARG("flags = %x", flags);
542 if ((so = socreate()) == NULL) {
543 /* free(so); Not sofree() ??? free(NULL) == NOP */
544 return NULL;
547 /* Don't tcp_attach... we don't need so_snd nor so_rcv */
548 if ((so->so_tcpcb = tcp_newtcpcb(so)) == NULL) {
549 free(so);
550 return NULL;
552 insque(so,&tcb);
555 * SS_FACCEPTONCE sockets must time out.
557 if (flags & SS_FACCEPTONCE)
558 so->so_tcpcb->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT*2;
560 so->so_state = (SS_FACCEPTCONN|flags);
561 so->so_lport = lport; /* Kept in network format */
562 so->so_laddr.s_addr = laddr; /* Ditto */
564 addr.sin_family = AF_INET;
565 addr.sin_addr.s_addr = INADDR_ANY;
566 addr.sin_port = port;
568 if (((s = socket(AF_INET,SOCK_STREAM,0)) < 0) ||
569 (bind(s,(struct sockaddr *)&addr, sizeof(addr)) < 0) ||
570 (listen(s,1) < 0)) {
571 int tmperrno = errno; /* Don't clobber the real reason we failed */
573 close(s);
574 sofree(so);
575 /* Restore the real errno */
576 errno = tmperrno;
577 return NULL;
579 setsockopt(s,SOL_SOCKET,SO_REUSEADDR,(char *)&opt,sizeof(int));
580 setsockopt(s,SOL_SOCKET,SO_OOBINLINE,(char *)&opt,sizeof(int));
582 getsockname(s,(struct sockaddr *)&addr,&addrlen);
583 so->so_fport = addr.sin_port;
584 if (addr.sin_addr.s_addr == 0 || addr.sin_addr.s_addr == loopback_addr.s_addr)
585 so->so_faddr = our_addr;
586 else
587 so->so_faddr = addr.sin_addr;
589 so->s = s;
590 return so;
594 * Data is available in so_rcv
595 * Just write() the data to the socket
596 * XXX not yet...
598 void
599 sorwakeup(so)
600 struct socket *so;
602 /* sowrite(so); */
603 /* FD_CLR(so->s,&writefds); */
607 * Data has been freed in so_snd
608 * We have room for a read() if we want to
609 * For now, don't read, it'll be done in the main loop
611 void
612 sowwakeup(so)
613 struct socket *so;
615 /* Nothing, yet */
619 * Various session state calls
620 * XXX Should be #define's
621 * The socket state stuff needs work, these often get call 2 or 3
622 * times each when only 1 was needed
624 void
625 soisfconnecting(so)
626 register struct socket *so;
628 so->so_state &= ~(SS_NOFDREF|SS_ISFCONNECTED|SS_FCANTRCVMORE|
629 SS_FCANTSENDMORE|SS_FWDRAIN);
630 so->so_state |= SS_ISFCONNECTING; /* Clobber other states */
633 void
634 soisfconnected(so)
635 register struct socket *so;
637 so->so_state &= ~(SS_ISFCONNECTING|SS_FWDRAIN|SS_NOFDREF);
638 so->so_state |= SS_ISFCONNECTED; /* Clobber other states */
641 void
642 sofcantrcvmore(so)
643 struct socket *so;
645 if ((so->so_state & SS_NOFDREF) == 0) {
646 shutdown(so->s,0);
647 if (global_writefds != NULL)
648 FD_CLR(so->s, global_writefds);
650 so->so_state &= ~(SS_ISFCONNECTING);
651 if (so->so_state & SS_FCANTSENDMORE)
652 so->so_state = SS_NOFDREF; /* Don't select it */ /* XXX close() here as well? */
653 else
654 so->so_state |= SS_FCANTRCVMORE;
657 void
658 sofcantsendmore(so)
659 struct socket *so;
661 if ((so->so_state & SS_NOFDREF) == 0) {
662 shutdown(so->s,1); /* send FIN to fhost */
663 if (global_readfds != NULL)
664 FD_CLR(so->s, global_readfds);
665 if (global_xfds != NULL)
666 FD_CLR(so->s, global_xfds);
669 so->so_state &= ~(SS_ISFCONNECTING);
670 if (so->so_state & SS_FCANTRCVMORE)
671 so->so_state = SS_NOFDREF; /* as above */
672 else
673 so->so_state |= SS_FCANTSENDMORE;
676 void
677 soisfdisconnected(so)
678 struct socket *so;
680 /* so->so_state &= ~(SS_ISFCONNECTING|SS_ISFCONNECTED); */
681 /* close(so->s); */
682 /* so->so_state = SS_ISFDISCONNECTED; */
684 * XXX Do nothing ... ?
689 * Set write drain mode
690 * Set CANTSENDMORE once all data has been write()n
692 void
693 sofwdrain(so)
694 struct socket *so;
696 if (so->so_rcv.sb_cc)
697 so->so_state |= SS_FWDRAIN;
698 else
699 sofcantsendmore(so);