Import 2.3.6pre2
[davej-history.git] / net / ipv4 / af_inet.c
blob6ca8acba2cb3654586b002379eebd08c5570af73
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * PF_INET protocol family socket handler.
8 * Version: $Id: af_inet.c,v 1.90 1999/05/29 04:30:38 davem Exp $
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Florian La Roche, <flla@stud.uni-sb.de>
13 * Alan Cox, <A.Cox@swansea.ac.uk>
15 * Changes (see also sock.c)
17 * A.N.Kuznetsov : Socket death error in accept().
18 * John Richardson : Fix non blocking error in connect()
19 * so sockets that fail to connect
20 * don't return -EINPROGRESS.
21 * Alan Cox : Asynchronous I/O support
22 * Alan Cox : Keep correct socket pointer on sock structures
23 * when accept() ed
24 * Alan Cox : Semantics of SO_LINGER aren't state moved
25 * to close when you look carefully. With
26 * this fixed and the accept bug fixed
27 * some RPC stuff seems happier.
28 * Niibe Yutaka : 4.4BSD style write async I/O
29 * Alan Cox,
30 * Tony Gale : Fixed reuse semantics.
31 * Alan Cox : bind() shouldn't abort existing but dead
32 * sockets. Stops FTP netin:.. I hope.
33 * Alan Cox : bind() works correctly for RAW sockets. Note
34 * that FreeBSD at least was broken in this respect
35 * so be careful with compatibility tests...
36 * Alan Cox : routing cache support
37 * Alan Cox : memzero the socket structure for compactness.
38 * Matt Day : nonblock connect error handler
39 * Alan Cox : Allow large numbers of pending sockets
40 * (eg for big web sites), but only if
41 * specifically application requested.
42 * Alan Cox : New buffering throughout IP. Used dumbly.
43 * Alan Cox : New buffering now used smartly.
44 * Alan Cox : BSD rather than common sense interpretation of
45 * listen.
46 * Germano Caronni : Assorted small races.
47 * Alan Cox : sendmsg/recvmsg basic support.
48 * Alan Cox : Only sendmsg/recvmsg now supported.
49 * Alan Cox : Locked down bind (see security list).
50 * Alan Cox : Loosened bind a little.
51 * Mike McLagan : ADD/DEL DLCI Ioctls
52 * Willy Konynenberg : Transparent proxying support.
53 * David S. Miller : New socket lookup architecture.
54 * Some other random speedups.
55 * Cyrus Durgin : Cleaned up file for kmod hacks.
56 * Andi Kleen : Fix inet_stream_connect TCP race.
58 * This program is free software; you can redistribute it and/or
59 * modify it under the terms of the GNU General Public License
60 * as published by the Free Software Foundation; either version
61 * 2 of the License, or (at your option) any later version.
64 #include <linux/config.h>
65 #include <linux/errno.h>
66 #include <linux/types.h>
67 #include <linux/socket.h>
68 #include <linux/in.h>
69 #include <linux/kernel.h>
70 #include <linux/major.h>
71 #include <linux/sched.h>
72 #include <linux/timer.h>
73 #include <linux/string.h>
74 #include <linux/sockios.h>
75 #include <linux/net.h>
76 #include <linux/fcntl.h>
77 #include <linux/mm.h>
78 #include <linux/interrupt.h>
79 #include <linux/proc_fs.h>
80 #include <linux/stat.h>
81 #include <linux/init.h>
82 #include <linux/poll.h>
84 #include <asm/uaccess.h>
85 #include <asm/system.h>
87 #include <linux/inet.h>
88 #include <linux/netdevice.h>
89 #include <net/ip.h>
90 #include <net/protocol.h>
91 #include <net/arp.h>
92 #include <net/rarp.h>
93 #include <net/route.h>
94 #include <net/tcp.h>
95 #include <net/udp.h>
96 #include <linux/skbuff.h>
97 #include <net/sock.h>
98 #include <net/raw.h>
99 #include <net/icmp.h>
100 #include <net/ipip.h>
101 #include <net/inet_common.h>
102 #include <linux/ip_fw.h>
103 #ifdef CONFIG_IP_MROUTE
104 #include <linux/mroute.h>
105 #endif
106 #ifdef CONFIG_IP_MASQUERADE
107 #include <net/ip_masq.h>
108 #endif
109 #ifdef CONFIG_BRIDGE
110 #include <net/br.h>
111 #endif
112 #ifdef CONFIG_KMOD
113 #include <linux/kmod.h>
114 #endif
115 #ifdef CONFIG_NET_RADIO
116 #include <linux/wireless.h>
117 #endif /* CONFIG_NET_RADIO */
119 #define min(a,b) ((a)<(b)?(a):(b))
121 struct linux_mib net_statistics;
123 extern int raw_get_info(char *, char **, off_t, int, int);
124 extern int snmp_get_info(char *, char **, off_t, int, int);
125 extern int netstat_get_info(char *, char **, off_t, int, int);
126 extern int afinet_get_info(char *, char **, off_t, int, int);
127 extern int tcp_get_info(char *, char **, off_t, int, int);
128 extern int udp_get_info(char *, char **, off_t, int, int);
129 extern void ip_mc_drop_socket(struct sock *sk);
131 #ifdef CONFIG_DLCI
132 extern int dlci_ioctl(unsigned int, void*);
133 #endif
135 #ifdef CONFIG_DLCI_MODULE
136 int (*dlci_ioctl_hook)(unsigned int, void *) = NULL;
137 #endif
139 int (*rarp_ioctl_hook)(unsigned int,void*) = NULL;
142 * Destroy an AF_INET socket
145 static __inline__ void kill_sk_queues(struct sock *sk)
147 struct sk_buff *skb;
149 /* First the read buffer. */
150 while((skb = skb_dequeue(&sk->receive_queue)) != NULL)
151 kfree_skb(skb);
153 /* Next, the error queue. */
154 while((skb = skb_dequeue(&sk->error_queue)) != NULL)
155 kfree_skb(skb);
157 /* It is _impossible_ for the backlog to contain anything
158 * when we get here. All user references to this socket
159 * have gone away, only the net layer knows can touch it.
163 static __inline__ void kill_sk_now(struct sock *sk)
165 /* No longer exists. */
166 del_from_prot_sklist(sk);
168 /* Remove from protocol hash chains. */
169 sk->prot->unhash(sk);
171 if(sk->opt)
172 kfree(sk->opt);
173 dst_release(sk->dst_cache);
174 sk_free(sk);
177 static __inline__ void kill_sk_later(struct sock *sk)
179 /* this should never happen. */
180 /* actually it can if an ack has just been sent. */
182 * It's more normal than that...
183 * It can happen because a skb is still in the device queues
184 * [PR]
187 NETDEBUG(printk(KERN_DEBUG "Socket destroy delayed (r=%d w=%d)\n",
188 atomic_read(&sk->rmem_alloc),
189 atomic_read(&sk->wmem_alloc)));
191 sk->destroy = 1;
192 sk->ack_backlog = 0;
193 bh_unlock_sock(sk);
194 net_reset_timer(sk, TIME_DESTROY, SOCK_DESTROY_TIME);
197 /* Callers must hold the BH spinlock.
199 * At this point, there should be no process reference to this
200 * socket, and thus no user references at all. Therefore we
201 * can assume the socket waitqueue is inactive and nobody will
202 * try to jump onto it.
204 void destroy_sock(struct sock *sk)
206 /* Now we can no longer get new packets or once the
207 * timers are killed, send them.
209 net_delete_timer(sk);
211 if (sk->prot->destroy)
212 sk->prot->destroy(sk);
214 kill_sk_queues(sk);
216 /* Now if everything is gone we can free the socket
217 * structure, otherwise we need to keep it around until
218 * everything is gone.
220 if (atomic_read(&sk->rmem_alloc) == 0 && atomic_read(&sk->wmem_alloc) == 0)
221 kill_sk_now(sk);
222 else
223 kill_sk_later(sk);
227 * The routines beyond this point handle the behaviour of an AF_INET
228 * socket object. Mostly it punts to the subprotocols of IP to do
229 * the work.
234 * Set socket options on an inet socket.
237 int inet_setsockopt(struct socket *sock, int level, int optname,
238 char *optval, int optlen)
240 struct sock *sk=sock->sk;
241 if (sk->prot->setsockopt==NULL)
242 return(-EOPNOTSUPP);
243 return sk->prot->setsockopt(sk,level,optname,optval,optlen);
247 * Get a socket option on an AF_INET socket.
249 * FIX: POSIX 1003.1g is very ambiguous here. It states that
250 * asynchronous errors should be reported by getsockopt. We assume
251 * this means if you specify SO_ERROR (otherwise whats the point of it).
254 int inet_getsockopt(struct socket *sock, int level, int optname,
255 char *optval, int *optlen)
257 struct sock *sk=sock->sk;
258 if (sk->prot->getsockopt==NULL)
259 return(-EOPNOTSUPP);
260 return sk->prot->getsockopt(sk,level,optname,optval,optlen);
264 * Automatically bind an unbound socket.
267 static int inet_autobind(struct sock *sk)
269 /* We may need to bind the socket. */
270 if (sk->num == 0) {
271 sk->num = sk->prot->good_socknum();
272 if (sk->num == 0)
273 return(-EAGAIN);
274 sk->sport = htons(sk->num);
275 sk->prot->hash(sk);
276 add_to_prot_sklist(sk);
278 return 0;
281 /* Listening INET sockets never sleep to wait for memory, so
282 * it is completely silly to wake them up on queue space
283 * available events. So we hook them up to this dummy callback.
285 static void inet_listen_write_space(struct sock *sk)
290 * Move a socket into listening state.
293 int inet_listen(struct socket *sock, int backlog)
295 struct sock *sk = sock->sk;
297 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
298 return(-EINVAL);
300 if (inet_autobind(sk) != 0)
301 return -EAGAIN;
303 /* We might as well re use these. */
304 if ((unsigned) backlog == 0) /* BSDism */
305 backlog = 1;
306 if ((unsigned) backlog > SOMAXCONN)
307 backlog = SOMAXCONN;
308 sk->max_ack_backlog = backlog;
309 if (sk->state != TCP_LISTEN) {
310 sk->ack_backlog = 0;
311 sk->state = TCP_LISTEN;
312 dst_release(xchg(&sk->dst_cache, NULL));
313 sk->prot->rehash(sk);
314 add_to_prot_sklist(sk);
315 sk->write_space = inet_listen_write_space;
317 sk->socket->flags |= SO_ACCEPTCON;
318 return(0);
322 * Create an inet socket.
324 * FIXME: Gcc would generate much better code if we set the parameters
325 * up in in-memory structure order. Gcc68K even more so
328 static int inet_create(struct socket *sock, int protocol)
330 struct sock *sk;
331 struct proto *prot;
333 /* Compatibility */
334 if (sock->type == SOCK_PACKET) {
335 static int warned;
336 if (net_families[PF_PACKET]==NULL)
338 #if defined(CONFIG_KMOD) && defined(CONFIG_PACKET_MODULE)
339 char module_name[30];
340 sprintf(module_name,"net-pf-%d", PF_PACKET);
341 request_module(module_name);
342 if (net_families[PF_PACKET] == NULL)
343 #endif
344 return -ESOCKTNOSUPPORT;
346 if (!warned++)
347 printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", current->comm);
348 return net_families[PF_PACKET]->create(sock, protocol);
351 sock->state = SS_UNCONNECTED;
352 sk = sk_alloc(PF_INET, GFP_KERNEL, 1);
353 if (sk == NULL)
354 goto do_oom;
356 switch (sock->type) {
357 case SOCK_STREAM:
358 if (protocol && protocol != IPPROTO_TCP)
359 goto free_and_noproto;
360 protocol = IPPROTO_TCP;
361 if (ipv4_config.no_pmtu_disc)
362 sk->ip_pmtudisc = IP_PMTUDISC_DONT;
363 else
364 sk->ip_pmtudisc = IP_PMTUDISC_WANT;
365 prot = &tcp_prot;
366 sock->ops = &inet_stream_ops;
367 break;
368 case SOCK_SEQPACKET:
369 goto free_and_badtype;
370 case SOCK_DGRAM:
371 if (protocol && protocol != IPPROTO_UDP)
372 goto free_and_noproto;
373 protocol = IPPROTO_UDP;
374 sk->no_check = UDP_CSUM_DEFAULT;
375 sk->ip_pmtudisc = IP_PMTUDISC_DONT;
376 prot=&udp_prot;
377 sock->ops = &inet_dgram_ops;
378 break;
379 case SOCK_RAW:
380 if (!capable(CAP_NET_RAW))
381 goto free_and_badperm;
382 if (!protocol)
383 goto free_and_noproto;
384 prot = &raw_prot;
385 sk->reuse = 1;
386 sk->ip_pmtudisc = IP_PMTUDISC_DONT;
387 sk->num = protocol;
388 sock->ops = &inet_dgram_ops;
389 if (protocol == IPPROTO_RAW)
390 sk->ip_hdrincl = 1;
391 break;
392 default:
393 goto free_and_badtype;
396 sock_init_data(sock,sk);
398 sk->destruct = NULL;
400 sk->zapped=0;
401 #ifdef CONFIG_TCP_NAGLE_OFF
402 sk->nonagle = 1;
403 #endif
404 sk->family = PF_INET;
405 sk->protocol = protocol;
407 sk->prot = prot;
408 sk->backlog_rcv = prot->backlog_rcv;
410 sk->timer.data = (unsigned long)sk;
411 sk->timer.function = &net_timer;
413 sk->ip_ttl=ip_statistics.IpDefaultTTL;
415 sk->ip_mc_loop=1;
416 sk->ip_mc_ttl=1;
417 sk->ip_mc_index=0;
418 sk->ip_mc_list=NULL;
420 if (sk->num) {
421 /* It assumes that any protocol which allows
422 * the user to assign a number at socket
423 * creation time automatically
424 * shares.
426 sk->sport = htons(sk->num);
428 /* Add to protocol hash chains. */
429 sk->prot->hash(sk);
430 add_to_prot_sklist(sk);
433 if (sk->prot->init) {
434 int err = sk->prot->init(sk);
435 if (err != 0) {
436 destroy_sock(sk);
437 return(err);
440 return(0);
442 free_and_badtype:
443 sk_free(sk);
444 return -ESOCKTNOSUPPORT;
446 free_and_badperm:
447 sk_free(sk);
448 return -EPERM;
450 free_and_noproto:
451 sk_free(sk);
452 return -EPROTONOSUPPORT;
454 do_oom:
455 return -ENOBUFS;
460 * The peer socket should always be NULL (or else). When we call this
461 * function we are destroying the object and from then on nobody
462 * should refer to it.
465 int inet_release(struct socket *sock, struct socket *peersock)
467 struct sock *sk = sock->sk;
469 if (sk) {
470 long timeout;
472 /* Begin closedown and wake up sleepers. */
473 if (sock->state != SS_UNCONNECTED)
474 sock->state = SS_DISCONNECTING;
475 sk->state_change(sk);
477 /* Applications forget to leave groups before exiting */
478 ip_mc_drop_socket(sk);
480 /* If linger is set, we don't return until the close
481 * is complete. Otherwise we return immediately. The
482 * actually closing is done the same either way.
484 * If the close is due to the process exiting, we never
485 * linger..
487 timeout = 0;
488 if (sk->linger && !(current->flags & PF_EXITING)) {
489 timeout = MAX_SCHEDULE_TIMEOUT;
491 /* XXX This makes no sense whatsoever... -DaveM */
492 if (!sk->lingertime)
493 timeout = HZ*sk->lingertime;
495 sock->sk = NULL;
496 sk->socket = NULL;
497 sk->prot->close(sk, timeout);
499 return(0);
502 static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
504 struct sockaddr_in *addr=(struct sockaddr_in *)uaddr;
505 struct sock *sk=sock->sk;
506 unsigned short snum;
507 int chk_addr_ret;
509 /* If the socket has its own bind function then use it. (RAW) */
510 if(sk->prot->bind)
511 return sk->prot->bind(sk, uaddr, addr_len);
513 /* Check these errors (active socket, bad address length, double bind). */
514 if ((sk->state != TCP_CLOSE) ||
515 (addr_len < sizeof(struct sockaddr_in)) ||
516 (sk->num != 0))
517 return -EINVAL;
519 chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr);
520 if (addr->sin_addr.s_addr != 0 && chk_addr_ret != RTN_LOCAL &&
521 chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) {
522 #ifdef CONFIG_IP_TRANSPARENT_PROXY
523 /* Superuser may bind to any address to allow transparent proxying. */
524 if(chk_addr_ret != RTN_UNICAST || !capable(CAP_NET_ADMIN))
525 #endif
526 return -EADDRNOTAVAIL; /* Source address MUST be ours! */
529 /* We keep a pair of addresses. rcv_saddr is the one
530 * used by hash lookups, and saddr is used for transmit.
532 * In the BSD API these are the same except where it
533 * would be illegal to use them (multicast/broadcast) in
534 * which case the sending device address is used.
536 sk->rcv_saddr = sk->saddr = addr->sin_addr.s_addr;
537 if(chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
538 sk->saddr = 0; /* Use device */
540 snum = ntohs(addr->sin_port);
541 #ifdef CONFIG_IP_MASQUERADE
542 /* The kernel masquerader needs some ports. */
543 if((snum >= PORT_MASQ_BEGIN) && (snum <= PORT_MASQ_END))
544 return -EADDRINUSE;
545 #endif
546 if (snum == 0)
547 snum = sk->prot->good_socknum();
548 if (snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
549 return(-EACCES);
551 /* Make sure we are allowed to bind here. */
552 if(sk->prot->verify_bind(sk, snum))
553 return -EADDRINUSE;
555 sk->num = snum;
556 sk->sport = htons(snum);
557 sk->daddr = 0;
558 sk->dport = 0;
559 sk->prot->rehash(sk);
560 add_to_prot_sklist(sk);
561 dst_release(sk->dst_cache);
562 sk->dst_cache=NULL;
563 return(0);
566 int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
567 int addr_len, int flags)
569 struct sock *sk=sock->sk;
570 int err;
572 if (inet_autobind(sk) != 0)
573 return(-EAGAIN);
574 if (sk->prot->connect == NULL)
575 return(-EOPNOTSUPP);
576 err = sk->prot->connect(sk, (struct sockaddr *)uaddr, addr_len);
577 if (err < 0)
578 return(err);
579 return(0);
582 static void inet_wait_for_connect(struct sock *sk)
584 DECLARE_WAITQUEUE(wait, current);
586 add_wait_queue(sk->sleep, &wait);
587 current->state = TASK_INTERRUPTIBLE;
588 while (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) {
589 if (signal_pending(current))
590 break;
591 if (sk->err)
592 break;
593 schedule();
594 current->state = TASK_INTERRUPTIBLE;
596 current->state = TASK_RUNNING;
597 remove_wait_queue(sk->sleep, &wait);
601 * Connect to a remote host. There is regrettably still a little
602 * TCP 'magic' in here.
605 int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr,
606 int addr_len, int flags)
608 struct sock *sk=sock->sk;
609 int err;
611 if(sock->state != SS_UNCONNECTED && sock->state != SS_CONNECTING) {
612 if(sock->state == SS_CONNECTED)
613 return -EISCONN;
614 return -EINVAL;
617 if(sock->state == SS_CONNECTING) {
618 /* Note: tcp_connected contains SYN_RECV, which may cause
619 bogus results here. -AK */
620 if(tcp_connected(sk->state)) {
621 sock->state = SS_CONNECTED;
622 return 0;
624 if (sk->zapped || sk->err)
625 goto sock_error;
626 if (flags & O_NONBLOCK)
627 return -EALREADY;
628 } else {
629 /* We may need to bind the socket. */
630 if (inet_autobind(sk) != 0)
631 return(-EAGAIN);
632 if (sk->prot->connect == NULL)
633 return(-EOPNOTSUPP);
634 err = sk->prot->connect(sk, uaddr, addr_len);
635 /* Note: there is a theoretical race here when an wake up
636 occurred before inet_wait_for_connect is entered. In 2.3
637 the wait queue setup should be moved before the low level
638 connect call. -AK*/
639 if (err < 0)
640 return(err);
641 sock->state = SS_CONNECTING;
644 if (sk->state > TCP_FIN_WAIT2 && sock->state == SS_CONNECTING)
645 goto sock_error;
647 if (sk->state != TCP_ESTABLISHED && (flags & O_NONBLOCK))
648 return (-EINPROGRESS);
650 if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) {
651 inet_wait_for_connect(sk);
652 if (signal_pending(current))
653 return -ERESTARTSYS;
656 sock->state = SS_CONNECTED;
657 if ((sk->state != TCP_ESTABLISHED) && sk->err)
658 goto sock_error;
659 return(0);
661 sock_error:
662 /* This is ugly but needed to fix a race in the ICMP error handler */
663 if (sk->zapped && sk->state != TCP_CLOSE) {
664 lock_sock(sk);
665 tcp_set_state(sk, TCP_CLOSE);
666 release_sock(sk);
667 sk->zapped = 0;
669 sock->state = SS_UNCONNECTED;
670 return sock_error(sk);
674 * Accept a pending connection. The TCP layer now gives BSD semantics.
677 int inet_accept(struct socket *sock, struct socket *newsock, int flags)
679 struct sock *sk1 = sock->sk, *sk2;
680 struct sock *newsk = newsock->sk;
681 int err = -EINVAL;
683 if (sock->state != SS_UNCONNECTED || !(sock->flags & SO_ACCEPTCON))
684 goto do_err;
686 err = -EOPNOTSUPP;
687 if (sk1->prot->accept == NULL)
688 goto do_err;
690 if((sk2 = sk1->prot->accept(sk1,flags)) == NULL)
691 goto do_sk1_err;
694 * We've been passed an extra socket.
695 * We need to free it up because the tcp module creates
696 * its own when it accepts one.
698 sk2->sleep = newsk->sleep;
700 newsock->sk = sk2;
701 sk2->socket = newsock;
702 newsk->socket = NULL;
704 if (flags & O_NONBLOCK)
705 goto do_half_success;
707 if(sk2->state == TCP_ESTABLISHED)
708 goto do_full_success;
709 if(sk2->err > 0)
710 goto do_connect_err;
711 err = -ECONNABORTED;
712 if (sk2->state == TCP_CLOSE)
713 goto do_bad_connection;
714 do_full_success:
715 destroy_sock(newsk);
716 newsock->state = SS_CONNECTED;
717 return 0;
719 do_half_success:
720 destroy_sock(newsk);
721 return(0);
723 do_connect_err:
724 err = sock_error(sk2);
725 do_bad_connection:
726 sk2->sleep = NULL;
727 sk2->socket = NULL;
728 destroy_sock(sk2);
729 newsock->sk = newsk;
730 newsk->socket = newsock;
731 return err;
733 do_sk1_err:
734 err = sock_error(sk1);
735 do_err:
736 return err;
741 * This does both peername and sockname.
744 static int inet_getname(struct socket *sock, struct sockaddr *uaddr,
745 int *uaddr_len, int peer)
747 struct sock *sk = sock->sk;
748 struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
750 sin->sin_family = AF_INET;
751 if (peer) {
752 if (!tcp_connected(sk->state))
753 return(-ENOTCONN);
754 sin->sin_port = sk->dport;
755 sin->sin_addr.s_addr = sk->daddr;
756 } else {
757 __u32 addr = sk->rcv_saddr;
758 if (!addr)
759 addr = sk->saddr;
760 sin->sin_port = sk->sport;
761 sin->sin_addr.s_addr = addr;
763 *uaddr_len = sizeof(*sin);
764 return(0);
769 int inet_recvmsg(struct socket *sock, struct msghdr *msg, int size,
770 int flags, struct scm_cookie *scm)
772 struct sock *sk = sock->sk;
773 int addr_len = 0;
774 int err;
776 if (sock->flags & SO_ACCEPTCON)
777 return(-EINVAL);
778 if (sk->prot->recvmsg == NULL)
779 return(-EOPNOTSUPP);
780 /* We may need to bind the socket. */
781 if (inet_autobind(sk) != 0)
782 return(-EAGAIN);
783 err = sk->prot->recvmsg(sk, msg, size, flags&MSG_DONTWAIT,
784 flags&~MSG_DONTWAIT, &addr_len);
785 if (err >= 0)
786 msg->msg_namelen = addr_len;
787 return err;
791 int inet_sendmsg(struct socket *sock, struct msghdr *msg, int size,
792 struct scm_cookie *scm)
794 struct sock *sk = sock->sk;
796 if (sk->shutdown & SEND_SHUTDOWN) {
797 if (!(msg->msg_flags&MSG_NOSIGNAL))
798 send_sig(SIGPIPE, current, 1);
799 return(-EPIPE);
801 if (sk->prot->sendmsg == NULL)
802 return(-EOPNOTSUPP);
803 if(sk->err)
804 return sock_error(sk);
806 /* We may need to bind the socket. */
807 if(inet_autobind(sk) != 0)
808 return -EAGAIN;
810 return sk->prot->sendmsg(sk, msg, size);
814 int inet_shutdown(struct socket *sock, int how)
816 struct sock *sk = sock->sk;
818 /* This should really check to make sure
819 * the socket is a TCP socket. (WHY AC...)
821 how++; /* maps 0->1 has the advantage of making bit 1 rcvs and
822 1->2 bit 2 snds.
823 2->3 */
824 if ((how & ~SHUTDOWN_MASK) || how==0) /* MAXINT->0 */
825 return(-EINVAL);
826 if (sock->state == SS_CONNECTING && sk->state == TCP_ESTABLISHED)
827 sock->state = SS_CONNECTED;
828 if (!sk || !tcp_connected(sk->state))
829 return(-ENOTCONN);
830 sk->shutdown |= how;
831 if (sk->prot->shutdown)
832 sk->prot->shutdown(sk, how);
833 /* Wake up anyone sleeping in poll. */
834 sk->state_change(sk);
835 return(0);
839 unsigned int inet_poll(struct file * file, struct socket *sock, poll_table *wait)
841 struct sock *sk = sock->sk;
843 if (sk->prot->poll == NULL)
844 return(0);
845 return sk->prot->poll(file, sock, wait);
849 * ioctl() calls you can issue on an INET socket. Most of these are
850 * device configuration and stuff and very rarely used. Some ioctls
851 * pass on to the socket itself.
853 * NOTE: I like the idea of a module for the config stuff. ie ifconfig
854 * loads the devconfigure module does its configuring and unloads it.
855 * There's a good 20K of config code hanging around the kernel.
858 static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
860 struct sock *sk = sock->sk;
861 int err;
862 int pid;
864 switch(cmd)
866 case FIOSETOWN:
867 case SIOCSPGRP:
868 err = get_user(pid, (int *) arg);
869 if (err)
870 return err;
871 if (current->pid != pid && current->pgrp != -pid &&
872 !capable(CAP_NET_ADMIN))
873 return -EPERM;
874 sk->proc = pid;
875 return(0);
876 case FIOGETOWN:
877 case SIOCGPGRP:
878 return put_user(sk->proc, (int *)arg);
879 case SIOCGSTAMP:
880 if(sk->stamp.tv_sec==0)
881 return -ENOENT;
882 err = copy_to_user((void *)arg,&sk->stamp,sizeof(struct timeval));
883 if (err)
884 err = -EFAULT;
885 return err;
886 case SIOCADDRT:
887 case SIOCDELRT:
888 case SIOCRTMSG:
889 return(ip_rt_ioctl(cmd,(void *) arg));
890 case SIOCDARP:
891 case SIOCGARP:
892 case SIOCSARP:
893 return(arp_ioctl(cmd,(void *) arg));
894 case SIOCDRARP:
895 case SIOCGRARP:
896 case SIOCSRARP:
897 #ifdef CONFIG_KMOD
898 if (rarp_ioctl_hook == NULL)
899 request_module("rarp");
900 #endif
901 if (rarp_ioctl_hook != NULL)
902 return(rarp_ioctl_hook(cmd,(void *) arg));
903 case SIOCGIFADDR:
904 case SIOCSIFADDR:
905 case SIOCGIFBRDADDR:
906 case SIOCSIFBRDADDR:
907 case SIOCGIFNETMASK:
908 case SIOCSIFNETMASK:
909 case SIOCGIFDSTADDR:
910 case SIOCSIFDSTADDR:
911 case SIOCSIFPFLAGS:
912 case SIOCGIFPFLAGS:
913 case SIOCSIFFLAGS:
914 return(devinet_ioctl(cmd,(void *) arg));
915 case SIOCGIFBR:
916 case SIOCSIFBR:
917 #ifdef CONFIG_BRIDGE
918 return(br_ioctl(cmd,(void *) arg));
919 #else
920 return -ENOPKG;
921 #endif
923 case SIOCADDDLCI:
924 case SIOCDELDLCI:
925 #ifdef CONFIG_DLCI
926 return(dlci_ioctl(cmd, (void *) arg));
927 #endif
929 #ifdef CONFIG_DLCI_MODULE
931 #ifdef CONFIG_KMOD
932 if (dlci_ioctl_hook == NULL)
933 request_module("dlci");
934 #endif
936 if (dlci_ioctl_hook)
937 return((*dlci_ioctl_hook)(cmd, (void *) arg));
938 #endif
939 return -ENOPKG;
941 default:
942 if ((cmd >= SIOCDEVPRIVATE) &&
943 (cmd <= (SIOCDEVPRIVATE + 15)))
944 return(dev_ioctl(cmd,(void *) arg));
946 #ifdef CONFIG_NET_RADIO
947 if((cmd >= SIOCIWFIRST) && (cmd <= SIOCIWLAST))
948 return(dev_ioctl(cmd,(void *) arg));
949 #endif
951 if (sk->prot->ioctl==NULL || (err=sk->prot->ioctl(sk, cmd, arg))==-ENOIOCTLCMD)
952 return(dev_ioctl(cmd,(void *) arg));
953 return err;
955 /*NOTREACHED*/
956 return(0);
959 struct proto_ops inet_stream_ops = {
960 PF_INET,
962 sock_no_dup,
963 inet_release,
964 inet_bind,
965 inet_stream_connect,
966 sock_no_socketpair,
967 inet_accept,
968 inet_getname,
969 inet_poll,
970 inet_ioctl,
971 inet_listen,
972 inet_shutdown,
973 inet_setsockopt,
974 inet_getsockopt,
975 sock_no_fcntl,
976 inet_sendmsg,
977 inet_recvmsg
980 struct proto_ops inet_dgram_ops = {
981 PF_INET,
983 sock_no_dup,
984 inet_release,
985 inet_bind,
986 inet_dgram_connect,
987 sock_no_socketpair,
988 sock_no_accept,
989 inet_getname,
990 datagram_poll,
991 inet_ioctl,
992 sock_no_listen,
993 inet_shutdown,
994 inet_setsockopt,
995 inet_getsockopt,
996 sock_no_fcntl,
997 inet_sendmsg,
998 inet_recvmsg
1001 struct net_proto_family inet_family_ops = {
1002 PF_INET,
1003 inet_create
1007 #ifdef CONFIG_PROC_FS
1008 #ifdef CONFIG_INET_RARP
1009 static struct proc_dir_entry proc_net_rarp = {
1010 PROC_NET_RARP, 4, "rarp",
1011 S_IFREG | S_IRUGO, 1, 0, 0,
1012 0, &proc_net_inode_operations,
1013 rarp_get_info
1015 #endif /* RARP */
1016 static struct proc_dir_entry proc_net_raw = {
1017 PROC_NET_RAW, 3, "raw",
1018 S_IFREG | S_IRUGO, 1, 0, 0,
1019 0, &proc_net_inode_operations,
1020 raw_get_info
1022 static struct proc_dir_entry proc_net_netstat = {
1023 PROC_NET_NETSTAT, 7, "netstat",
1024 S_IFREG | S_IRUGO, 1, 0, 0,
1025 0, &proc_net_inode_operations,
1026 netstat_get_info
1028 static struct proc_dir_entry proc_net_snmp = {
1029 PROC_NET_SNMP, 4, "snmp",
1030 S_IFREG | S_IRUGO, 1, 0, 0,
1031 0, &proc_net_inode_operations,
1032 snmp_get_info
1034 static struct proc_dir_entry proc_net_sockstat = {
1035 PROC_NET_SOCKSTAT, 8, "sockstat",
1036 S_IFREG | S_IRUGO, 1, 0, 0,
1037 0, &proc_net_inode_operations,
1038 afinet_get_info
1040 static struct proc_dir_entry proc_net_tcp = {
1041 PROC_NET_TCP, 3, "tcp",
1042 S_IFREG | S_IRUGO, 1, 0, 0,
1043 0, &proc_net_inode_operations,
1044 tcp_get_info
1046 static struct proc_dir_entry proc_net_udp = {
1047 PROC_NET_UDP, 3, "udp",
1048 S_IFREG | S_IRUGO, 1, 0, 0,
1049 0, &proc_net_inode_operations,
1050 udp_get_info
1052 #endif /* CONFIG_PROC_FS */
1054 extern void tcp_init(void);
1055 extern void tcp_v4_init(struct net_proto_family *);
1059 * Called by socket.c on kernel startup.
1062 __initfunc(void inet_proto_init(struct net_proto *pro))
1064 struct sk_buff *dummy_skb;
1065 struct inet_protocol *p;
1067 printk(KERN_INFO "NET4: Linux TCP/IP 1.0 for NET4.0\n");
1069 if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb))
1071 printk(KERN_CRIT "inet_proto_init: panic\n");
1072 return;
1076 * Tell SOCKET that we are alive...
1079 (void) sock_register(&inet_family_ops);
1082 * Add all the protocols.
1085 printk(KERN_INFO "IP Protocols: ");
1086 for(p = inet_protocol_base; p != NULL;)
1088 struct inet_protocol *tmp = (struct inet_protocol *) p->next;
1089 inet_add_protocol(p);
1090 printk("%s%s",p->name,tmp?", ":"\n");
1091 p = tmp;
1095 * Set the ARP module up
1098 arp_init();
1101 * Set the IP module up
1104 ip_init();
1106 tcp_v4_init(&inet_family_ops);
1108 /* Setup TCP slab cache for open requests. */
1109 tcp_init();
1113 * Set the ICMP layer up
1116 icmp_init(&inet_family_ops);
1118 /* I wish inet_add_protocol had no constructor hook...
1119 I had to move IPIP from net/ipv4/protocol.c :-( --ANK
1121 #ifdef CONFIG_NET_IPIP
1122 ipip_init();
1123 #endif
1124 #ifdef CONFIG_NET_IPGRE
1125 ipgre_init();
1126 #endif
1129 * Set the firewalling up
1131 #if defined(CONFIG_IP_FIREWALL)
1132 ip_fw_init();
1133 #endif
1135 #ifdef CONFIG_IP_MASQUERADE
1136 ip_masq_init();
1137 #endif
1140 * Initialise the multicast router
1142 #if defined(CONFIG_IP_MROUTE)
1143 ip_mr_init();
1144 #endif
1146 #ifdef CONFIG_INET_RARP
1147 rarp_ioctl_hook = rarp_ioctl;
1148 #endif
1150 * Create all the /proc entries.
1153 #ifdef CONFIG_PROC_FS
1154 #ifdef CONFIG_INET_RARP
1155 proc_net_register(&proc_net_rarp);
1156 #endif /* RARP */
1157 proc_net_register(&proc_net_raw);
1158 proc_net_register(&proc_net_snmp);
1159 proc_net_register(&proc_net_netstat);
1160 proc_net_register(&proc_net_sockstat);
1161 proc_net_register(&proc_net_tcp);
1162 proc_net_register(&proc_net_udp);
1163 #endif /* CONFIG_PROC_FS */