Import 2.3.18pre1
[davej-history.git] / net / ipv4 / af_inet.c
blob526dd4dd6a67a3e11b2a0757159427acb22d2acd
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * PF_INET protocol family socket handler.
8 * Version: $Id: af_inet.c,v 1.97 1999/09/08 03:46:46 davem Exp $
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Florian La Roche, <flla@stud.uni-sb.de>
13 * Alan Cox, <A.Cox@swansea.ac.uk>
15 * Changes (see also sock.c)
17 * A.N.Kuznetsov : Socket death error in accept().
18 * John Richardson : Fix non blocking error in connect()
19 * so sockets that fail to connect
20 * don't return -EINPROGRESS.
21 * Alan Cox : Asynchronous I/O support
22 * Alan Cox : Keep correct socket pointer on sock structures
23 * when accept() ed
24 * Alan Cox : Semantics of SO_LINGER aren't state moved
25 * to close when you look carefully. With
26 * this fixed and the accept bug fixed
27 * some RPC stuff seems happier.
28 * Niibe Yutaka : 4.4BSD style write async I/O
29 * Alan Cox,
30 * Tony Gale : Fixed reuse semantics.
31 * Alan Cox : bind() shouldn't abort existing but dead
32 * sockets. Stops FTP netin:.. I hope.
33 * Alan Cox : bind() works correctly for RAW sockets. Note
34 * that FreeBSD at least was broken in this respect
35 * so be careful with compatibility tests...
36 * Alan Cox : routing cache support
37 * Alan Cox : memzero the socket structure for compactness.
38 * Matt Day : nonblock connect error handler
39 * Alan Cox : Allow large numbers of pending sockets
40 * (eg for big web sites), but only if
41 * specifically application requested.
42 * Alan Cox : New buffering throughout IP. Used dumbly.
43 * Alan Cox : New buffering now used smartly.
44 * Alan Cox : BSD rather than common sense interpretation of
45 * listen.
46 * Germano Caronni : Assorted small races.
47 * Alan Cox : sendmsg/recvmsg basic support.
48 * Alan Cox : Only sendmsg/recvmsg now supported.
49 * Alan Cox : Locked down bind (see security list).
50 * Alan Cox : Loosened bind a little.
51 * Mike McLagan : ADD/DEL DLCI Ioctls
52 * Willy Konynenberg : Transparent proxying support.
53 * David S. Miller : New socket lookup architecture.
54 * Some other random speedups.
55 * Cyrus Durgin : Cleaned up file for kmod hacks.
56 * Andi Kleen : Fix inet_stream_connect TCP race.
58 * This program is free software; you can redistribute it and/or
59 * modify it under the terms of the GNU General Public License
60 * as published by the Free Software Foundation; either version
61 * 2 of the License, or (at your option) any later version.
64 #include <linux/config.h>
65 #include <linux/errno.h>
66 #include <linux/types.h>
67 #include <linux/socket.h>
68 #include <linux/in.h>
69 #include <linux/kernel.h>
70 #include <linux/major.h>
71 #include <linux/sched.h>
72 #include <linux/timer.h>
73 #include <linux/string.h>
74 #include <linux/sockios.h>
75 #include <linux/net.h>
76 #include <linux/fcntl.h>
77 #include <linux/mm.h>
78 #include <linux/interrupt.h>
79 #include <linux/proc_fs.h>
80 #include <linux/stat.h>
81 #include <linux/init.h>
82 #include <linux/poll.h>
83 #include <linux/netfilter_ipv4.h>
85 #include <asm/uaccess.h>
86 #include <asm/system.h>
88 #include <linux/smp_lock.h>
89 #include <linux/inet.h>
90 #include <linux/netdevice.h>
91 #include <net/ip.h>
92 #include <net/protocol.h>
93 #include <net/arp.h>
94 #include <net/route.h>
95 #include <net/tcp.h>
96 #include <net/udp.h>
97 #include <linux/skbuff.h>
98 #include <net/sock.h>
99 #include <net/raw.h>
100 #include <net/icmp.h>
101 #include <net/ipip.h>
102 #include <net/inet_common.h>
103 #ifdef CONFIG_IP_MROUTE
104 #include <linux/mroute.h>
105 #endif
106 #ifdef CONFIG_BRIDGE
107 #include <net/br.h>
108 #endif
109 #ifdef CONFIG_KMOD
110 #include <linux/kmod.h>
111 #endif
112 #ifdef CONFIG_NET_RADIO
113 #include <linux/wireless.h>
114 #endif /* CONFIG_NET_RADIO */
116 #define min(a,b) ((a)<(b)?(a):(b))
118 struct linux_mib net_statistics;
120 atomic_t inet_sock_nr;
122 extern int raw_get_info(char *, char **, off_t, int, int);
123 extern int snmp_get_info(char *, char **, off_t, int, int);
124 extern int netstat_get_info(char *, char **, off_t, int, int);
125 extern int afinet_get_info(char *, char **, off_t, int, int);
126 extern int tcp_get_info(char *, char **, off_t, int, int);
127 extern int udp_get_info(char *, char **, off_t, int, int);
128 extern void ip_mc_drop_socket(struct sock *sk);
130 #ifdef CONFIG_DLCI
131 extern int dlci_ioctl(unsigned int, void*);
132 #endif
134 #ifdef CONFIG_DLCI_MODULE
135 int (*dlci_ioctl_hook)(unsigned int, void *) = NULL;
136 #endif
138 /* New destruction routine */
140 void inet_sock_destruct(struct sock *sk)
142 __skb_queue_purge(&sk->receive_queue);
143 __skb_queue_purge(&sk->error_queue);
145 if (sk->type == SOCK_STREAM && sk->state != TCP_CLOSE) {
146 printk("Attempt to release TCP socket in state %d %p\n",
147 sk->state,
148 sk);
149 return;
151 if (!sk->dead) {
152 printk("Attempt to release alive inet socket %p\n", sk);
153 return;
156 BUG_TRAP(atomic_read(&sk->rmem_alloc) == 0);
157 BUG_TRAP(atomic_read(&sk->wmem_alloc) == 0);
159 if (sk->protinfo.af_inet.opt)
160 kfree(sk->protinfo.af_inet.opt);
161 dst_release(sk->dst_cache);
162 atomic_dec(&inet_sock_nr);
163 #ifdef INET_REFCNT_DEBUG
164 printk(KERN_DEBUG "INET socket %p released, %d are still alive\n", sk, atomic_read(&inet_sock_nr));
165 #endif
168 void inet_sock_release(struct sock *sk)
170 if (sk->prot->destroy)
171 sk->prot->destroy(sk);
173 /* Observation: when inet_sock_release is called, processes have
174 no access to socket. But net still has.
175 Step one, detach it from networking:
177 A. Remove from hash tables.
180 sk->prot->unhash(sk);
182 /* In this point socket cannot receive new packets,
183 but it is possible that some packets are in flight
184 because some CPU runs receiver and did hash table lookup
185 before we unhashed socket. They will achieve receive queue
186 and will be purged by socket destructor.
188 Also we still have packets pending on receive
189 queue and probably, our own packets waiting in device queues.
190 sock_destroy will drain receive queue, but transmitted
191 packets will delay socket destruction until the last reference
192 will be released.
195 write_lock_irq(&sk->callback_lock);
196 sk->dead=1;
197 sk->socket = NULL;
198 sk->sleep = NULL;
199 write_unlock_irq(&sk->callback_lock);
201 #ifdef INET_REFCNT_DEBUG
202 if (atomic_read(&sk->refcnt) != 1) {
203 printk(KERN_DEBUG "Destruction inet %p delayed, c=%d\n", sk, atomic_read(&sk->refcnt));
205 #endif
206 sock_put(sk);
211 * The routines beyond this point handle the behaviour of an AF_INET
212 * socket object. Mostly it punts to the subprotocols of IP to do
213 * the work.
218 * Set socket options on an inet socket.
221 int inet_setsockopt(struct socket *sock, int level, int optname,
222 char *optval, int optlen)
224 struct sock *sk=sock->sk;
225 if (sk->prot->setsockopt==NULL)
226 return -EOPNOTSUPP;
227 return sk->prot->setsockopt(sk,level,optname,optval,optlen);
231 * Get a socket option on an AF_INET socket.
233 * FIX: POSIX 1003.1g is very ambiguous here. It states that
234 * asynchronous errors should be reported by getsockopt. We assume
235 * this means if you specify SO_ERROR (otherwise whats the point of it).
238 int inet_getsockopt(struct socket *sock, int level, int optname,
239 char *optval, int *optlen)
241 struct sock *sk=sock->sk;
242 if (sk->prot->getsockopt==NULL)
243 return -EOPNOTSUPP;
244 return sk->prot->getsockopt(sk,level,optname,optval,optlen);
248 * Automatically bind an unbound socket.
251 static int inet_autobind(struct sock *sk)
253 /* We may need to bind the socket. */
254 lock_sock(sk);
255 if (sk->num == 0) {
256 if (sk->prot->get_port(sk, 0) != 0) {
257 release_sock(sk);
258 return -EAGAIN;
260 sk->sport = htons(sk->num);
261 sk->prot->hash(sk);
263 release_sock(sk);
264 return 0;
267 /* Listening INET sockets never sleep to wait for memory, so
268 * it is completely silly to wake them up on queue space
269 * available events. So we hook them up to this dummy callback.
271 static void inet_listen_write_space(struct sock *sk)
276 * Move a socket into listening state.
279 int inet_listen(struct socket *sock, int backlog)
281 struct sock *sk = sock->sk;
282 unsigned char old_state;
283 int err;
285 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
286 return -EINVAL;
288 lock_sock(sk);
289 old_state = sk->state;
290 err = -EINVAL;
291 if (!((1<<old_state)&(TCPF_CLOSE|TCPF_LISTEN)))
292 goto out;
294 /* Really, if the socket is already in listen state
295 * we can only allow the backlog to be adjusted.
297 if (old_state != TCP_LISTEN) {
298 sk->state = TCP_LISTEN;
299 sk->ack_backlog = 0;
300 if (sk->num == 0) {
301 if (sk->prot->get_port(sk, 0) != 0) {
302 sk->state = old_state;
303 err = -EAGAIN;
304 goto out;
306 sk->sport = htons(sk->num);
307 } else {
308 /* Not nice, but the simplest solution however */
309 if (sk->prev)
310 ((struct tcp_bind_bucket*)sk->prev)->fastreuse = 0;
313 sk_dst_reset(sk);
314 sk->prot->hash(sk);
315 sk->socket->flags |= SO_ACCEPTCON;
316 sk->write_space = inet_listen_write_space;
318 sk->max_ack_backlog = backlog;
319 err = 0;
321 out:
322 release_sock(sk);
323 return err;
327 * Create an inet socket.
329 * FIXME: Gcc would generate much better code if we set the parameters
330 * up in in-memory structure order. Gcc68K even more so
333 static int inet_create(struct socket *sock, int protocol)
335 struct sock *sk;
336 struct proto *prot;
338 sock->state = SS_UNCONNECTED;
339 sk = sk_alloc(PF_INET, GFP_KERNEL, 1);
340 if (sk == NULL)
341 goto do_oom;
343 switch (sock->type) {
344 case SOCK_STREAM:
345 if (protocol && protocol != IPPROTO_TCP)
346 goto free_and_noproto;
347 protocol = IPPROTO_TCP;
348 if (ipv4_config.no_pmtu_disc)
349 sk->protinfo.af_inet.pmtudisc = IP_PMTUDISC_DONT;
350 else
351 sk->protinfo.af_inet.pmtudisc = IP_PMTUDISC_WANT;
352 prot = &tcp_prot;
353 sock->ops = &inet_stream_ops;
354 break;
355 case SOCK_SEQPACKET:
356 goto free_and_badtype;
357 case SOCK_DGRAM:
358 if (protocol && protocol != IPPROTO_UDP)
359 goto free_and_noproto;
360 protocol = IPPROTO_UDP;
361 sk->no_check = UDP_CSUM_DEFAULT;
362 sk->protinfo.af_inet.pmtudisc = IP_PMTUDISC_DONT;
363 prot=&udp_prot;
364 sock->ops = &inet_dgram_ops;
365 break;
366 case SOCK_RAW:
367 if (!capable(CAP_NET_RAW))
368 goto free_and_badperm;
369 if (!protocol)
370 goto free_and_noproto;
371 prot = &raw_prot;
372 sk->reuse = 1;
373 sk->protinfo.af_inet.pmtudisc = IP_PMTUDISC_DONT;
374 sk->num = protocol;
375 sock->ops = &inet_dgram_ops;
376 if (protocol == IPPROTO_RAW)
377 sk->protinfo.af_inet.hdrincl = 1;
378 break;
379 default:
380 goto free_and_badtype;
383 sock_init_data(sock,sk);
385 sk->destruct = inet_sock_destruct;
387 sk->zapped=0;
388 #ifdef CONFIG_TCP_NAGLE_OFF
389 sk->nonagle = 1;
390 #endif
391 sk->family = PF_INET;
392 sk->protocol = protocol;
394 sk->prot = prot;
395 sk->backlog_rcv = prot->backlog_rcv;
397 sk->timer.data = (unsigned long)sk;
398 sk->timer.function = &tcp_keepalive_timer;
400 sk->protinfo.af_inet.ttl=ip_statistics.IpDefaultTTL;
402 sk->protinfo.af_inet.mc_loop=1;
403 sk->protinfo.af_inet.mc_ttl=1;
404 sk->protinfo.af_inet.mc_index=0;
405 sk->protinfo.af_inet.mc_list=NULL;
407 atomic_inc(&inet_sock_nr);
409 if (sk->num) {
410 /* It assumes that any protocol which allows
411 * the user to assign a number at socket
412 * creation time automatically
413 * shares.
415 sk->sport = htons(sk->num);
417 /* Add to protocol hash chains. */
418 sk->prot->hash(sk);
421 if (sk->prot->init) {
422 int err = sk->prot->init(sk);
423 if (err != 0) {
424 sk->dead = 1;
425 inet_sock_release(sk);
426 return(err);
429 return(0);
431 free_and_badtype:
432 sk_free(sk);
433 return -ESOCKTNOSUPPORT;
435 free_and_badperm:
436 sk_free(sk);
437 return -EPERM;
439 free_and_noproto:
440 sk_free(sk);
441 return -EPROTONOSUPPORT;
443 do_oom:
444 return -ENOBUFS;
449 * The peer socket should always be NULL (or else). When we call this
450 * function we are destroying the object and from then on nobody
451 * should refer to it.
454 int inet_release(struct socket *sock)
456 struct sock *sk = sock->sk;
458 if (sk) {
459 long timeout;
461 /* Applications forget to leave groups before exiting */
462 ip_mc_drop_socket(sk);
464 /* If linger is set, we don't return until the close
465 * is complete. Otherwise we return immediately. The
466 * actually closing is done the same either way.
468 * If the close is due to the process exiting, we never
469 * linger..
471 timeout = 0;
472 if (sk->linger && !(current->flags & PF_EXITING)) {
473 timeout = HZ * sk->lingertime;
474 if (!timeout)
475 timeout = MAX_SCHEDULE_TIMEOUT;
477 sock->sk = NULL;
478 sk->prot->close(sk, timeout);
480 return(0);
483 static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
485 struct sockaddr_in *addr=(struct sockaddr_in *)uaddr;
486 struct sock *sk=sock->sk;
487 unsigned short snum;
488 int chk_addr_ret;
489 int err;
491 /* If the socket has its own bind function then use it. (RAW) */
492 if(sk->prot->bind)
493 return sk->prot->bind(sk, uaddr, addr_len);
495 if (addr_len < sizeof(struct sockaddr_in))
496 return -EINVAL;
498 chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr);
499 if (addr->sin_addr.s_addr != 0 && chk_addr_ret != RTN_LOCAL &&
500 chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST) {
501 return -EADDRNOTAVAIL; /* Source address MUST be ours! */
504 snum = ntohs(addr->sin_port);
505 if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
506 return -EACCES;
508 /* We keep a pair of addresses. rcv_saddr is the one
509 * used by hash lookups, and saddr is used for transmit.
511 * In the BSD API these are the same except where it
512 * would be illegal to use them (multicast/broadcast) in
513 * which case the sending device address is used.
515 lock_sock(sk);
517 /* Check these errors (active socket, double bind). */
518 err = -EINVAL;
519 if ((sk->state != TCP_CLOSE) ||
520 (sk->num != 0))
521 goto out;
523 sk->rcv_saddr = sk->saddr = addr->sin_addr.s_addr;
524 if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
525 sk->saddr = 0; /* Use device */
527 /* Make sure we are allowed to bind here. */
528 if (sk->prot->get_port(sk, snum) != 0) {
529 sk->saddr = sk->rcv_saddr = 0;
530 err = -EADDRINUSE;
531 goto out;
534 sk->sport = htons(sk->num);
535 sk->daddr = 0;
536 sk->dport = 0;
537 sk->prot->hash(sk);
538 sk_dst_reset(sk);
539 err = 0;
540 out:
541 release_sock(sk);
542 return err;
545 int inet_dgram_connect(struct socket *sock, struct sockaddr * uaddr,
546 int addr_len, int flags)
548 struct sock *sk=sock->sk;
550 if (uaddr->sa_family == AF_UNSPEC)
551 return sk->prot->disconnect(sk, flags);
553 if (sk->num==0 && inet_autobind(sk) != 0)
554 return -EAGAIN;
555 return sk->prot->connect(sk, (struct sockaddr *)uaddr, addr_len);
558 static void inet_wait_for_connect(struct sock *sk)
560 DECLARE_WAITQUEUE(wait, current);
562 __set_current_state(TASK_INTERRUPTIBLE);
563 add_wait_queue(sk->sleep, &wait);
565 while ((1<<sk->state)&(TCPF_SYN_SENT|TCPF_SYN_RECV)) {
566 if (signal_pending(current))
567 break;
568 if (sk->err)
569 break;
570 release_sock(sk);
571 schedule();
572 lock_sock(sk);
573 set_current_state(TASK_INTERRUPTIBLE);
575 __set_current_state(TASK_RUNNING);
576 remove_wait_queue(sk->sleep, &wait);
580 * Connect to a remote host. There is regrettably still a little
581 * TCP 'magic' in here.
584 int inet_stream_connect(struct socket *sock, struct sockaddr * uaddr,
585 int addr_len, int flags)
587 struct sock *sk=sock->sk;
588 int err;
590 if (uaddr->sa_family == AF_UNSPEC) {
591 lock_sock(sk);
592 err = sk->prot->disconnect(sk, flags);
593 sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
594 release_sock(sk);
595 return err;
598 lock_sock(sk);
599 switch (sock->state) {
600 default:
601 err = -EINVAL;
602 goto out;
603 case SS_CONNECTED:
604 err = -EISCONN;
605 goto out;
606 case SS_CONNECTING:
607 if (tcp_established(sk->state)) {
608 sock->state = SS_CONNECTED;
609 err = 0;
610 goto out;
612 if (sk->err)
613 goto sock_error;
614 err = -EALREADY;
615 if (flags & O_NONBLOCK)
616 goto out;
617 break;
618 case SS_UNCONNECTED:
619 err = sk->prot->connect(sk, uaddr, addr_len);
620 if (err < 0)
621 goto out;
622 sock->state = SS_CONNECTING;
625 if (sk->state > TCP_FIN_WAIT2)
626 goto sock_error;
628 err = -EINPROGRESS;
629 if (!tcp_established(sk->state) && (flags & O_NONBLOCK))
630 goto out;
632 if ((1<<sk->state)&(TCPF_SYN_SENT|TCPF_SYN_RECV)) {
633 inet_wait_for_connect(sk);
634 err = -ERESTARTSYS;
635 if (signal_pending(current))
636 goto out;
639 if (sk->err && !tcp_established(sk->state))
640 goto sock_error;
641 sock->state = SS_CONNECTED;
642 err = 0;
643 out:
644 release_sock(sk);
645 return err;
647 sock_error:
648 err = sock_error(sk) ? : -ECONNABORTED;
649 sock->state = SS_UNCONNECTED;
650 if (sk->prot->disconnect(sk, O_NONBLOCK))
651 sock->state = SS_DISCONNECTING;
652 release_sock(sk);
654 return err;
658 * Accept a pending connection. The TCP layer now gives BSD semantics.
661 int inet_accept(struct socket *sock, struct socket *newsock, int flags)
663 struct sock *sk1 = sock->sk;
664 struct sock *sk2;
665 int err = -EINVAL;
667 if((sk2 = sk1->prot->accept(sk1,flags,&err)) == NULL)
668 goto do_err;
670 lock_sock(sk2);
672 BUG_TRAP((1<<sk2->state)&(TCPF_ESTABLISHED|TCPF_CLOSE_WAIT|TCPF_CLOSE));
674 write_lock_irq(&sk2->callback_lock);
675 sk2->sleep = &newsock->wait;
676 newsock->sk = sk2;
677 sk2->socket = newsock;
678 write_unlock_irq(&sk2->callback_lock);
680 newsock->state = SS_CONNECTED;
681 release_sock(sk2);
682 return 0;
684 do_err:
685 return err;
690 * This does both peername and sockname.
693 static int inet_getname(struct socket *sock, struct sockaddr *uaddr,
694 int *uaddr_len, int peer)
696 struct sock *sk = sock->sk;
697 struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
699 sin->sin_family = AF_INET;
700 if (peer) {
701 if (!sk->dport)
702 return -ENOTCONN;
703 sin->sin_port = sk->dport;
704 sin->sin_addr.s_addr = sk->daddr;
705 } else {
706 __u32 addr = sk->rcv_saddr;
707 if (!addr)
708 addr = sk->saddr;
709 sin->sin_port = sk->sport;
710 sin->sin_addr.s_addr = addr;
712 *uaddr_len = sizeof(*sin);
713 return(0);
718 int inet_recvmsg(struct socket *sock, struct msghdr *msg, int size,
719 int flags, struct scm_cookie *scm)
721 struct sock *sk = sock->sk;
722 int addr_len = 0;
723 int err;
725 /* We may need to bind the socket. */
726 /* It is pretty strange. I would return error in this case --ANK */
727 if (sk->num==0 && inet_autobind(sk) != 0)
728 return -EAGAIN;
729 err = sk->prot->recvmsg(sk, msg, size, flags&MSG_DONTWAIT,
730 flags&~MSG_DONTWAIT, &addr_len);
731 if (err >= 0)
732 msg->msg_namelen = addr_len;
733 return err;
737 int inet_sendmsg(struct socket *sock, struct msghdr *msg, int size,
738 struct scm_cookie *scm)
740 struct sock *sk = sock->sk;
742 /* We may need to bind the socket. */
743 if (sk->num==0 && inet_autobind(sk) != 0)
744 return -EAGAIN;
746 return sk->prot->sendmsg(sk, msg, size);
749 int inet_shutdown(struct socket *sock, int how)
751 struct sock *sk = sock->sk;
752 int err;
754 /* This should really check to make sure
755 * the socket is a TCP socket. (WHY AC...)
757 how++; /* maps 0->1 has the advantage of making bit 1 rcvs and
758 1->2 bit 2 snds.
759 2->3 */
760 if ((how & ~SHUTDOWN_MASK) || how==0) /* MAXINT->0 */
761 return -EINVAL;
762 if (!sk)
763 return -ENOTCONN;
765 lock_sock(sk);
766 if (sock->state == SS_CONNECTING && tcp_established(sk->state))
767 sock->state = SS_CONNECTED;
768 err = -ENOTCONN;
769 if (!tcp_connected(sk->state))
770 goto out;
771 sk->shutdown |= how;
772 if (sk->prot->shutdown)
773 sk->prot->shutdown(sk, how);
774 /* Wake up anyone sleeping in poll. */
775 sk->state_change(sk);
776 err = 0;
777 out:
778 release_sock(sk);
779 return err;
782 unsigned int inet_poll(struct file * file, struct socket *sock, poll_table *wait)
784 struct sock *sk = sock->sk;
786 if (sk->prot->poll == NULL)
787 return(0);
788 return sk->prot->poll(file, sock, wait);
792 * ioctl() calls you can issue on an INET socket. Most of these are
793 * device configuration and stuff and very rarely used. Some ioctls
794 * pass on to the socket itself.
796 * NOTE: I like the idea of a module for the config stuff. ie ifconfig
797 * loads the devconfigure module does its configuring and unloads it.
798 * There's a good 20K of config code hanging around the kernel.
801 static int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
803 struct sock *sk = sock->sk;
804 int err;
805 int pid;
807 switch(cmd)
809 case FIOSETOWN:
810 case SIOCSPGRP:
811 err = get_user(pid, (int *) arg);
812 if (err)
813 return err;
814 if (current->pid != pid && current->pgrp != -pid &&
815 !capable(CAP_NET_ADMIN))
816 return -EPERM;
817 sk->proc = pid;
818 return(0);
819 case FIOGETOWN:
820 case SIOCGPGRP:
821 return put_user(sk->proc, (int *)arg);
822 case SIOCGSTAMP:
823 if(sk->stamp.tv_sec==0)
824 return -ENOENT;
825 err = copy_to_user((void *)arg,&sk->stamp,sizeof(struct timeval));
826 if (err)
827 err = -EFAULT;
828 return err;
829 case SIOCADDRT:
830 case SIOCDELRT:
831 case SIOCRTMSG:
832 return(ip_rt_ioctl(cmd,(void *) arg));
833 case SIOCDARP:
834 case SIOCGARP:
835 case SIOCSARP:
836 return(arp_ioctl(cmd,(void *) arg));
837 case SIOCGIFADDR:
838 case SIOCSIFADDR:
839 case SIOCGIFBRDADDR:
840 case SIOCSIFBRDADDR:
841 case SIOCGIFNETMASK:
842 case SIOCSIFNETMASK:
843 case SIOCGIFDSTADDR:
844 case SIOCSIFDSTADDR:
845 case SIOCSIFPFLAGS:
846 case SIOCGIFPFLAGS:
847 case SIOCSIFFLAGS:
848 return(devinet_ioctl(cmd,(void *) arg));
849 case SIOCGIFBR:
850 case SIOCSIFBR:
851 #ifdef CONFIG_BRIDGE
852 lock_kernel();
853 err = br_ioctl(cmd,(void *) arg);
854 unlock_kernel();
855 return err;
856 #else
857 return -ENOPKG;
858 #endif
860 case SIOCADDDLCI:
861 case SIOCDELDLCI:
862 #ifdef CONFIG_DLCI
863 lock_kernel();
864 err = dlci_ioctl(cmd, (void *) arg);
865 unlock_kernel();
866 return err;
867 #endif
869 #ifdef CONFIG_DLCI_MODULE
871 #ifdef CONFIG_KMOD
872 if (dlci_ioctl_hook == NULL)
873 request_module("dlci");
874 #endif
876 if (dlci_ioctl_hook) {
877 lock_kernel();
878 err = (*dlci_ioctl_hook)(cmd, (void *) arg);
879 unlock_kernel();
880 return err;
882 #endif
883 return -ENOPKG;
885 default:
886 if ((cmd >= SIOCDEVPRIVATE) &&
887 (cmd <= (SIOCDEVPRIVATE + 15)))
888 return(dev_ioctl(cmd,(void *) arg));
890 #ifdef CONFIG_NET_RADIO
891 if((cmd >= SIOCIWFIRST) && (cmd <= SIOCIWLAST))
892 return(dev_ioctl(cmd,(void *) arg));
893 #endif
895 if (sk->prot->ioctl==NULL || (err=sk->prot->ioctl(sk, cmd, arg))==-ENOIOCTLCMD)
896 return(dev_ioctl(cmd,(void *) arg));
897 return err;
899 /*NOTREACHED*/
900 return(0);
903 struct proto_ops inet_stream_ops = {
904 PF_INET,
906 inet_release,
907 inet_bind,
908 inet_stream_connect,
909 sock_no_socketpair,
910 inet_accept,
911 inet_getname,
912 inet_poll,
913 inet_ioctl,
914 inet_listen,
915 inet_shutdown,
916 inet_setsockopt,
917 inet_getsockopt,
918 sock_no_fcntl,
919 inet_sendmsg,
920 inet_recvmsg,
921 sock_no_mmap
924 struct proto_ops inet_dgram_ops = {
925 PF_INET,
927 inet_release,
928 inet_bind,
929 inet_dgram_connect,
930 sock_no_socketpair,
931 sock_no_accept,
932 inet_getname,
933 datagram_poll,
934 inet_ioctl,
935 sock_no_listen,
936 inet_shutdown,
937 inet_setsockopt,
938 inet_getsockopt,
939 sock_no_fcntl,
940 inet_sendmsg,
941 inet_recvmsg,
942 sock_no_mmap
945 struct net_proto_family inet_family_ops = {
946 PF_INET,
947 inet_create
951 #ifdef CONFIG_PROC_FS
952 static struct proc_dir_entry proc_net_raw = {
953 PROC_NET_RAW, 3, "raw",
954 S_IFREG | S_IRUGO, 1, 0, 0,
955 0, &proc_net_inode_operations,
956 raw_get_info
958 static struct proc_dir_entry proc_net_netstat = {
959 PROC_NET_NETSTAT, 7, "netstat",
960 S_IFREG | S_IRUGO, 1, 0, 0,
961 0, &proc_net_inode_operations,
962 netstat_get_info
964 static struct proc_dir_entry proc_net_snmp = {
965 PROC_NET_SNMP, 4, "snmp",
966 S_IFREG | S_IRUGO, 1, 0, 0,
967 0, &proc_net_inode_operations,
968 snmp_get_info
970 static struct proc_dir_entry proc_net_sockstat = {
971 PROC_NET_SOCKSTAT, 8, "sockstat",
972 S_IFREG | S_IRUGO, 1, 0, 0,
973 0, &proc_net_inode_operations,
974 afinet_get_info
976 static struct proc_dir_entry proc_net_tcp = {
977 PROC_NET_TCP, 3, "tcp",
978 S_IFREG | S_IRUGO, 1, 0, 0,
979 0, &proc_net_inode_operations,
980 tcp_get_info
982 static struct proc_dir_entry proc_net_udp = {
983 PROC_NET_UDP, 3, "udp",
984 S_IFREG | S_IRUGO, 1, 0, 0,
985 0, &proc_net_inode_operations,
986 udp_get_info
988 #endif /* CONFIG_PROC_FS */
990 extern void tcp_init(void);
991 extern void tcp_v4_init(struct net_proto_family *);
995 * Called by socket.c on kernel startup.
998 void __init inet_proto_init(struct net_proto *pro)
1000 struct sk_buff *dummy_skb;
1001 struct inet_protocol *p;
1003 printk(KERN_INFO "NET4: Linux TCP/IP 1.0 for NET4.0\n");
1005 if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb))
1007 printk(KERN_CRIT "inet_proto_init: panic\n");
1008 return;
1012 * Tell SOCKET that we are alive...
1015 (void) sock_register(&inet_family_ops);
1018 * Add all the protocols.
1021 printk(KERN_INFO "IP Protocols: ");
1022 for(p = inet_protocol_base; p != NULL;)
1024 struct inet_protocol *tmp = (struct inet_protocol *) p->next;
1025 inet_add_protocol(p);
1026 printk("%s%s",p->name,tmp?", ":"\n");
1027 p = tmp;
1031 * Set the ARP module up
1034 arp_init();
1037 * Set the IP module up
1040 ip_init();
1042 tcp_v4_init(&inet_family_ops);
1044 /* Setup TCP slab cache for open requests. */
1045 tcp_init();
1049 * Set the ICMP layer up
1052 icmp_init(&inet_family_ops);
1054 /* I wish inet_add_protocol had no constructor hook...
1055 I had to move IPIP from net/ipv4/protocol.c :-( --ANK
1057 #ifdef CONFIG_NET_IPIP
1058 ipip_init();
1059 #endif
1060 #ifdef CONFIG_NET_IPGRE
1061 ipgre_init();
1062 #endif
1065 * Initialise the multicast router
1067 #if defined(CONFIG_IP_MROUTE)
1068 ip_mr_init();
1069 #endif
1072 * Create all the /proc entries.
1075 #ifdef CONFIG_PROC_FS
1076 proc_net_register(&proc_net_raw);
1077 proc_net_register(&proc_net_snmp);
1078 proc_net_register(&proc_net_netstat);
1079 proc_net_register(&proc_net_sockstat);
1080 proc_net_register(&proc_net_tcp);
1081 proc_net_register(&proc_net_udp);
1082 #endif /* CONFIG_PROC_FS */