2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * Definitions for the AF_INET socket handler.
8 * Version: @(#)sock.h 1.0.4 05/13/93
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Corey Minyard <wf-rch!minyard@relay.EU.net>
13 * Florian La Roche <flla@stud.uni-sb.de>
16 * Alan Cox : Volatiles in skbuff pointers. See
17 * skbuff comments. May be overdone,
18 * better to prove they can be removed
20 * Alan Cox : Added a zapped field for tcp to note
21 * a socket is reset and must stay shut up
22 * Alan Cox : New fields for options
23 * Pauline Middelink : identd support
24 * Alan Cox : Eliminate low level recv/recvfrom
25 * David S. Miller : New socket lookup architecture.
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation; either version
30 * 2 of the License, or (at your option) any later version.
35 #include <linux/config.h>
36 #include <linux/timer.h>
37 #include <linux/in.h> /* struct sockaddr_in */
39 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
40 #include <linux/in6.h> /* struct sockaddr_in6 */
41 #include <linux/ipv6.h> /* dest_cache, inet6_options */
42 #include <linux/icmpv6.h>
43 #include <net/if_inet6.h> /* struct ipv6_mc_socklist */
46 #include <linux/tcp.h> /* struct tcphdr */
47 #include <linux/config.h>
49 #include <linux/netdevice.h>
50 #include <linux/skbuff.h> /* struct sk_buff */
51 #include <net/protocol.h> /* struct inet_protocol */
52 #if defined(CONFIG_X25) || defined(CONFIG_X25_MODULE)
55 #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
57 #if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
58 #include <net/netrom.h>
60 #if defined(CONFIG_ROSE) || defined(CONFIG_ROSE_MODULE)
65 #if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE)
69 #if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE)
70 #include <linux/atalk.h>
73 #include <linux/igmp.h>
75 #include <asm/atomic.h>
78 * The AF_UNIX specific socket options
86 struct unix_address
*addr
;
88 struct semaphore readsem
;
97 * IP packet socket options
100 struct inet_packet_opt
102 struct notifier_block notifier
; /* Used when bound */
103 struct device
*bound_dev
;
104 unsigned long dev_stamp
;
105 struct packet_type
*prot_hook
;
106 char device_name
[15];
110 * Once the IPX ncpd patches are in these are going into protinfo
113 #if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE)
116 ipx_address dest_addr
;
117 ipx_interface
*intrfc
;
119 #ifdef CONFIG_IPX_INTERN
120 unsigned char node
[IPX_NODE_LEN
];
124 * To handle asynchronous messages from the NetWare server, we have to
125 * know the connection this socket belongs to.
127 struct ncp_server
*ncp_server
;
132 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
135 struct in6_addr saddr
;
136 struct in6_addr rcv_saddr
;
137 struct in6_addr daddr
;
155 /* device for outgoing packets */
159 struct ipv6_mc_socklist
*ipv6_mc_list
;
161 * destination cache entry pointer
162 * contains a pointer to neighbour cache
163 * and other info related to network level
167 struct dst_entry
*dst
;
170 struct ipv6_options
*opt
;
174 __u32 checksum
; /* perform checksum */
175 __u32 offset
; /* checksum offset */
177 struct icmp6_filter filter
;
186 * RFC793 variables by their proper names. This means you can
187 * read the code and the spec side by side (and laugh ...)
188 * See RFC793 and RFC1122. The RFC writes these in capitals.
190 __u32 rcv_nxt
; /* What we want to receive next */
191 __u32 rcv_up
; /* The urgent point (may not be valid) */
192 __u32 rcv_wnd
; /* Current receiver window */
193 __u32 snd_nxt
; /* Next sequence we send */
194 __u32 snd_una
; /* First byte we want an ack for */
195 __u32 snd_up
; /* Outgoing urgent pointer */
196 __u32 snd_wl1
; /* Sequence for window update */
197 __u32 snd_wl2
; /* Ack sequence for update */
199 __u32 rcv_wup
; /* rcv_nxt on last window update sent */
201 __u32 fin_seq
; /* XXX This one should go, we don't need it. -DaveM */
203 __u32 srtt
; /* smothed round trip time << 3 */
204 __u32 mdev
; /* medium deviation */
205 __u32 rto
; /* retransmit timeout */
206 __u32 backoff
; /* backoff */
208 * Slow start and congestion control (see also Nagle, and Karn & Partridge)
210 __u32 snd_cwnd
; /* Sending congestion window */
211 __u32 snd_ssthresh
; /* Slow start size threshold */
216 * Options received (usually on last packet, some only on SYN packets).
218 char tstamp_ok
, /* TIMESTAMP seen on SYN packet */
219 sack_ok
; /* SACK_PERM seen on SYN packet */
220 char saw_tstamp
; /* Saw TIMESTAMP on last packet */
221 __u16 in_mss
; /* MSS option received from sender */
222 __u8 snd_wscale
; /* Window scaling received from sender */
223 __u8 rcv_wscale
; /* Window scaling to send to receiver */
224 __u32 rcv_tsval
; /* Time stamp value */
225 __u32 rcv_tsecr
; /* Time stamp echo reply */
226 __u32 ts_recent
; /* Time stamp to echo next */
227 __u32 ts_recent_stamp
;/* Time we stored ts_recent (for aging) */
228 __u32 last_ack_sent
; /* last ack we sent */
229 int sacks
; /* Number of SACK blocks if any */
230 __u32 left_sack
[4]; /* Left edges of blocks */
231 __u32 right_sack
[4]; /* Right edges of blocks */
232 int tcp_header_len
; /* Bytes of tcp header to send */
235 * Timers used by the TCP protocol layer
237 struct timer_list delack_timer
; /* Ack delay */
238 struct timer_list idle_timer
; /* Idle watch */
239 struct timer_list completion_timer
; /* Up/Down timer */
240 struct timer_list probe_timer
; /* Probes */
241 struct timer_list retransmit_timer
; /* Resend (no ack) */
243 __u32 basertt
; /* Vegas baseRTT */
244 __u32 packets_out
; /* Packets which are "in flight" */
245 __u32 window_clamp
; /* XXX Document this... -DaveM */
247 __u8 pending
; /* pending events */
249 __u8 dup_acks
; /* Consequetive duplicate acks seen from other end */
252 __u32 lrcvtime
; /* timestamp of last received data packet */
253 __u32 rcv_tstamp
; /* timestamp of last received packet */
254 __u32 iat_mdev
; /* interarrival time medium deviation */
255 __u32 iat
; /* interarrival time */
256 __u32 ato
; /* delayed ack timeout */
257 __u32 high_seq
; /* highest sequence number sent by onset of congestion */
262 struct sk_buff
* send_head
;
263 struct sk_buff
* retrans_head
; /* retrans head can be
264 * different to the head of
265 * write queue if we are doing
269 * Header prediction flags
270 * 0x5?10 << 16 + snd_wnd in net byte order
273 __u32 snd_wnd
; /* The window we expect to receive */
275 __u32 probes_out
; /* unanswered 0 window probes */
277 struct open_request
*syn_wait_queue
;
278 struct open_request
**syn_wait_last
;
279 struct tcp_func
*af_specific
;
284 * This structure really needs to be cleaned up.
285 * Most of it is for TCP, and not used by any of
286 * the other protocols.
290 * The idea is to start moving to a newer struct gradualy
292 * IMHO the newer struct should have the following format:
295 * sockmem [mem, proto, callbacks]
317 /* Define this to get the sk->debug debugging facility. */
318 #define SOCK_DEBUGGING
319 #ifdef SOCK_DEBUGGING
320 #define SOCK_DEBUG(sk, msg...) if((sk) && ((sk)->debug)) printk(KERN_DEBUG ## msg)
322 #define SOCK_DEBUG(sk, msg...) do { } while (0)
326 * TCP will start to use the new protinfo while *still using the old* fields
331 /* This must be first. */
332 struct sock
*sklist_next
;
333 struct sock
*sklist_prev
;
337 unsigned long allocation
; /* Allocation mode */
339 /* The following stuff should probably move to the tcp private area */
345 unsigned char delayed_acks
;
346 /* End of block to move */
348 int sock_readers
; /* user count */
351 * Not all are volatile, but some are, so we
352 * might as well say they all are.
362 zapped
, /* In ax25 & ipx means not linked */
366 unsigned long lingertime
;
371 struct sock
*bind_next
;
372 struct sock
**bind_pprev
;
377 struct sk_buff_head back_log
;
379 struct sk_buff_head write_queue
,
384 unsigned short family
;
386 struct wait_queue
**sleep
;
389 __u32 saddr
; /* Sending source */
390 __u32 rcv_saddr
; /* Bound address */
392 struct dst_entry
*dst_cache
;
394 unsigned short max_unacked
;
397 * mss is min(mtu, max_window)
399 unsigned short mtu
; /* mss negotiated in the syn's */
400 unsigned short mss
; /* current eff. mss - can change */
401 unsigned short user_mss
; /* mss requested by user in ioctl */
404 unsigned short shutdown
;
406 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
408 struct ipv6_pinfo af_inet6
;
413 struct tcp_opt af_tcp
;
414 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
415 struct raw6_opt tp_raw
;
419 int err
, err_soft
; /* Soft holds errors that don't
420 cause failure but are the cause
421 of a persistent failure not just
423 unsigned char protocol
;
424 volatile unsigned char state
;
425 unsigned char ack_backlog
;
426 unsigned char max_ack_backlog
;
427 unsigned char priority
;
432 unsigned char localroute
; /* Route locally only */
433 struct ucred peercred
;
435 /* What the user has tried to set with the security API */
436 short authentication
;
440 * This is where all the private (optional) areas that don't
441 * overlap will eventually live.
446 struct unix_opt af_unix
;
447 #if defined(CONFIG_ATALK) || defined(CONFIG_ATALK_MODULE)
448 struct atalk_sock af_at
;
450 #if defined(CONFIG_IPX) || defined(CONFIG_IPX_MODULE)
451 struct ipx_opt af_ipx
;
454 struct inet_packet_opt af_packet
;
456 struct tcp_opt af_tcp
;
459 #if defined(CONFIG_X25) || defined(CONFIG_X25_MODULE)
462 #if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
464 #if defined(CONFIG_NETROM) || defined(CONFIG_NETROM_MODULE)
467 #if defined(CONFIG_ROSE) || defined(CONFIG_ROSE_MODULE)
474 * IP 'private area' or will be eventually
476 int ip_ttl
; /* TTL setting */
477 int ip_tos
; /* TOS */
478 unsigned ip_cmsg_flags
;
479 struct tcphdr dummy_th
;
480 struct ip_options
*opt
;
481 unsigned char ip_hdrincl
; /* Include headers ? */
482 __u8 ip_mc_ttl
; /* Multicasting TTL */
483 __u8 ip_mc_loop
; /* Loopback */
486 int ip_mc_index
; /* Multicast device index */
488 struct ip_mc_socklist
*ip_mc_list
; /* Group array */
491 * This part is used for the timeout functions (timer.c).
494 int timeout
; /* What are we waiting for? */
495 struct timer_list timer
; /* This is the TIME_WAIT/receive timer
496 * when we are doing IP
498 struct timeval stamp
;
504 struct socket
*socket
;
507 * RPC layer private data
515 void (*state_change
)(struct sock
*sk
);
516 void (*data_ready
)(struct sock
*sk
,int bytes
);
517 void (*write_space
)(struct sock
*sk
);
518 void (*error_report
)(struct sock
*sk
);
520 int (*backlog_rcv
) (struct sock
*sk
,
521 struct sk_buff
*skb
);
525 * IP protocol blocks we attach to sockets.
526 * socket layer -> transport layer interface
527 * transport -> network interface is defined by struct inet_proto
532 /* These must be first. */
533 struct sock
*sklist_next
;
534 struct sock
*sklist_prev
;
536 void (*close
)(struct sock
*sk
,
537 unsigned long timeout
);
538 int (*connect
)(struct sock
*sk
,
539 struct sockaddr
*uaddr
,
542 struct sock
* (*accept
) (struct sock
*sk
, int flags
);
543 void (*retransmit
)(struct sock
*sk
, int all
);
544 void (*write_wakeup
)(struct sock
*sk
);
545 void (*read_wakeup
)(struct sock
*sk
);
547 unsigned int (*poll
)(struct socket
*sock
, poll_table
*wait
);
549 int (*ioctl
)(struct sock
*sk
, int cmd
,
551 int (*init
)(struct sock
*sk
);
552 int (*destroy
)(struct sock
*sk
);
553 void (*shutdown
)(struct sock
*sk
, int how
);
554 int (*setsockopt
)(struct sock
*sk
, int level
,
555 int optname
, char *optval
, int optlen
);
556 int (*getsockopt
)(struct sock
*sk
, int level
,
557 int optname
, char *optval
,
559 int (*sendmsg
)(struct sock
*sk
, struct msghdr
*msg
,
561 int (*recvmsg
)(struct sock
*sk
, struct msghdr
*msg
,
562 int len
, int noblock
, int flags
,
564 int (*bind
)(struct sock
*sk
,
565 struct sockaddr
*uaddr
, int addr_len
);
567 int (*backlog_rcv
) (struct sock
*sk
,
568 struct sk_buff
*skb
);
570 /* Keeping track of sk's, looking them up, and port selection methods. */
571 void (*hash
)(struct sock
*sk
);
572 void (*unhash
)(struct sock
*sk
);
573 void (*rehash
)(struct sock
*sk
);
574 unsigned short (*good_socknum
)(void);
575 int (*verify_bind
)(struct sock
*sk
, unsigned short snum
);
577 unsigned short max_header
;
578 unsigned long retransmits
;
580 int inuse
, highestinuse
;
583 #define TIME_WRITE 1 /* Not yet used */
584 #define TIME_RETRANS 2 /* Retransmit timer */
585 #define TIME_DACK 3 /* Delayed ack timer */
587 #define TIME_KEEPOPEN 5
588 #define TIME_DESTROY 6
589 #define TIME_DONE 7 /* Used to absorb those last few packets */
590 #define TIME_PROBE0 8
596 #define SOCK_DESTROY_TIME (10*HZ)
599 * Sockets 0-1023 can't be bound to unless you are superuser
602 #define PROT_SOCK 1024
604 #define SHUTDOWN_MASK 3
605 #define RCV_SHUTDOWN 1
606 #define SEND_SHUTDOWN 2
608 /* Per-protocol hash table implementations use this to make sure
611 #define SOCKHASH_LOCK() start_bh_atomic()
612 #define SOCKHASH_UNLOCK() end_bh_atomic()
614 /* Some things in the kernel just want to get at a protocols
615 * entire socket list commensurate, thus...
617 static __inline__
void add_to_prot_sklist(struct sock
*sk
)
620 if(!sk
->sklist_next
) {
621 struct proto
*p
= sk
->prot
;
623 sk
->sklist_prev
= (struct sock
*) p
;
624 sk
->sklist_next
= p
->sklist_next
;
625 p
->sklist_next
->sklist_prev
= sk
;
628 /* Charge the protocol. */
629 sk
->prot
->inuse
+= 1;
630 if(sk
->prot
->highestinuse
< sk
->prot
->inuse
)
631 sk
->prot
->highestinuse
= sk
->prot
->inuse
;
636 static __inline__
void del_from_prot_sklist(struct sock
*sk
)
639 if(sk
->sklist_next
) {
640 sk
->sklist_next
->sklist_prev
= sk
->sklist_prev
;
641 sk
->sklist_prev
->sklist_next
= sk
->sklist_next
;
642 sk
->sklist_next
= NULL
;
649 * Used by processes to "lock" a socket state, so that
650 * interrupts and bottom half handlers won't change it
651 * from under us. It essentially blocks any incoming
652 * packets, so that we won't get any new data or any
653 * packets that change the state of the socket.
655 * Note the 'barrier()' calls: gcc may not move a lock
656 * "downwards" or a unlock "upwards" when optimizing.
658 extern void __release_sock(struct sock
*sk
);
660 static inline void lock_sock(struct sock
*sk
)
663 /* debugging code: the test isn't even 100% correct, but it can catch bugs */
664 /* Note that a double lock is ok in theory - it's just _usually_ a bug */
665 if (sk
->sock_readers
) {
667 printk("double lock on socket at %p\n", &&here
);
673 * This is a very broken bottom half synchronization mechanism.
674 * You don't want to know..
676 { unsigned long flags
;
680 restore_flags(flags
);
688 static inline void release_sock(struct sock
*sk
)
692 /* debugging code: remove me when ok */
693 if (sk
->sock_readers
== 0) {
695 sk
->sock_readers
= 1;
696 printk("trying to unlock unlocked socket at %p\n", &&here
);
700 if ((sk
->sock_readers
= sk
->sock_readers
-1) == 0)
705 * This might not be the most apropriate place for this two
706 * but since they are used by a lot of the net related code
707 * at least they get declared on a include that is common to all
710 static __inline__
int min(unsigned int a
, unsigned int b
)
717 static __inline__
int max(unsigned int a
, unsigned int b
)
724 extern struct sock
* sk_alloc(int priority
);
725 extern void sk_free(struct sock
*sk
);
726 extern void destroy_sock(struct sock
*sk
);
728 extern struct sk_buff
*sock_wmalloc(struct sock
*sk
,
729 unsigned long size
, int force
,
731 extern struct sk_buff
*sock_rmalloc(struct sock
*sk
,
732 unsigned long size
, int force
,
734 extern void sock_wfree(struct sk_buff
*skb
);
735 extern void sock_rfree(struct sk_buff
*skb
);
736 extern unsigned long sock_rspace(struct sock
*sk
);
737 extern unsigned long sock_wspace(struct sock
*sk
);
739 extern int sock_setsockopt(struct socket
*sock
, int level
,
740 int op
, char *optval
,
743 extern int sock_getsockopt(struct socket
*sock
, int level
,
744 int op
, char *optval
,
746 extern struct sk_buff
*sock_alloc_send_skb(struct sock
*sk
,
748 unsigned long fallback
,
752 extern int sock_no_fcntl(struct socket
*, unsigned int, unsigned long);
753 extern int sock_no_getsockopt(struct socket
*, int , int,
755 extern int sock_no_setsockopt(struct socket
*, int, int,
757 extern int sock_no_listen(struct socket
*, int);
759 * Default socket callbacks and setup code
762 extern void sock_def_callback1(struct sock
*);
763 extern void sock_def_callback2(struct sock
*, int);
764 extern void sock_def_callback3(struct sock
*);
766 /* Initialise core socket variables */
767 extern void sock_init_data(struct socket
*sock
, struct sock
*sk
);
769 extern void sklist_remove_socket(struct sock
**list
, struct sock
*sk
);
770 extern void sklist_insert_socket(struct sock
**list
, struct sock
*sk
);
771 extern void sklist_destroy_socket(struct sock
**list
, struct sock
*sk
);
774 * Queue a received datagram if it will fit. Stream and sequenced
775 * protocols can't normally use this as they need to fit buffers in
776 * and play with them.
778 * Inlined as it's very short and called for pretty much every
779 * packet ever received.
782 extern __inline__
void skb_set_owner_w(struct sk_buff
*skb
, struct sock
*sk
)
785 skb
->destructor
= sock_wfree
;
786 atomic_add(skb
->truesize
, &sk
->wmem_alloc
);
789 extern __inline__
void skb_set_owner_r(struct sk_buff
*skb
, struct sock
*sk
)
792 skb
->destructor
= sock_rfree
;
793 atomic_add(skb
->truesize
, &sk
->rmem_alloc
);
797 extern __inline__
int sock_queue_rcv_skb(struct sock
*sk
, struct sk_buff
*skb
)
799 if (atomic_read(&sk
->rmem_alloc
) + skb
->truesize
>= sk
->rcvbuf
)
801 skb_set_owner_r(skb
, sk
);
802 skb_queue_tail(&sk
->receive_queue
,skb
);
804 sk
->data_ready(sk
,skb
->len
);
808 extern __inline__
int __sock_queue_rcv_skb(struct sock
*sk
, struct sk_buff
*skb
)
810 if (atomic_read(&sk
->rmem_alloc
) + skb
->truesize
>= sk
->rcvbuf
)
812 skb_set_owner_r(skb
, sk
);
813 __skb_queue_tail(&sk
->receive_queue
,skb
);
815 sk
->data_ready(sk
,skb
->len
);
819 extern __inline__
int sock_queue_err_skb(struct sock
*sk
, struct sk_buff
*skb
)
821 if (atomic_read(&sk
->rmem_alloc
) + skb
->truesize
>= sk
->rcvbuf
)
823 skb_set_owner_r(skb
, sk
);
824 __skb_queue_tail(&sk
->error_queue
,skb
);
826 sk
->data_ready(sk
,skb
->len
);
831 * Recover an error report and clear atomically
834 extern __inline__
int sock_error(struct sock
*sk
)
836 int err
=xchg(&sk
->err
,0);
842 * Declarations from timer.c
845 extern struct sock
*timer_base
;
847 extern void net_delete_timer (struct sock
*);
848 extern void net_reset_timer (struct sock
*, int, unsigned long);
849 extern void net_timer (unsigned long);
853 * Enable debug/info messages
857 #define NETDEBUG(x) do { } while (0)
859 #define NETDEBUG(x) do { x; } while (0)