2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * Implementation of the Transmission Control Protocol(TCP).
8 * Version: $Id: tcp_ipv4.c,v 1.240 2002/02/01 22:01:04 davem Exp $
10 * IPv4 specific functions
15 * linux/ipv4/tcp_input.c
16 * linux/ipv4/tcp_output.c
18 * See tcp.c for author information
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
28 * David S. Miller : New socket lookup architecture.
29 * This code is dedicated to John Dyson.
30 * David S. Miller : Change semantics of established hash,
31 * half is devoted to TIME_WAIT sockets
32 * and the rest go in the other half.
33 * Andi Kleen : Add support for syncookies and fixed
34 * some bugs: ip options weren't passed to
35 * the TCP layer, missed a check for an
37 * Andi Kleen : Implemented fast path mtu discovery.
38 * Fixed many serious bugs in the
39 * request_sock handling and moved
40 * most of it into the af independent code.
41 * Added tail drop and some other bugfixes.
42 * Added new listen sematics.
43 * Mike McLagan : Routing by source
44 * Juan Jose Ciarlante: ip_dynaddr bits
45 * Andi Kleen: various fixes.
46 * Vitaly E. Lavrov : Transparent proxy revived after year
48 * Andi Kleen : Fix new listen.
49 * Andi Kleen : Fix accept error reporting.
50 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
51 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
52 * a single port at the same time.
55 #include <linux/config.h>
57 #include <linux/types.h>
58 #include <linux/fcntl.h>
59 #include <linux/module.h>
60 #include <linux/random.h>
61 #include <linux/cache.h>
62 #include <linux/jhash.h>
63 #include <linux/init.h>
64 #include <linux/times.h>
67 #include <net/inet_hashtables.h>
70 #include <net/inet_common.h>
73 #include <linux/inet.h>
74 #include <linux/ipv6.h>
75 #include <linux/stddef.h>
76 #include <linux/proc_fs.h>
77 #include <linux/seq_file.h>
79 extern int sysctl_ip_dynaddr
;
80 int sysctl_tcp_tw_reuse
;
81 int sysctl_tcp_low_latency
;
83 /* Check TCP sequence numbers in ICMP packets. */
84 #define ICMP_MIN_LENGTH 8
86 /* Socket used for sending RSTs */
87 static struct socket
*tcp_socket
;
89 void tcp_v4_send_check(struct sock
*sk
, struct tcphdr
*th
, int len
,
92 struct inet_hashinfo __cacheline_aligned tcp_hashinfo
= {
93 .lhash_lock
= RW_LOCK_UNLOCKED
,
94 .lhash_users
= ATOMIC_INIT(0),
95 .lhash_wait
= __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo
.lhash_wait
),
96 .portalloc_lock
= SPIN_LOCK_UNLOCKED
,
97 .port_rover
= 1024 - 1,
101 * This array holds the first and last local port number.
102 * For high-usage systems, use sysctl to change this to
105 int sysctl_local_port_range
[2] = { 1024, 4999 };
107 static inline int tcp_bind_conflict(struct sock
*sk
, struct inet_bind_bucket
*tb
)
109 const u32 sk_rcv_saddr
= tcp_v4_rcv_saddr(sk
);
111 struct hlist_node
*node
;
112 int reuse
= sk
->sk_reuse
;
114 sk_for_each_bound(sk2
, node
, &tb
->owners
) {
116 !tcp_v6_ipv6only(sk2
) &&
117 (!sk
->sk_bound_dev_if
||
118 !sk2
->sk_bound_dev_if
||
119 sk
->sk_bound_dev_if
== sk2
->sk_bound_dev_if
)) {
120 if (!reuse
|| !sk2
->sk_reuse
||
121 sk2
->sk_state
== TCP_LISTEN
) {
122 const u32 sk2_rcv_saddr
= tcp_v4_rcv_saddr(sk2
);
123 if (!sk2_rcv_saddr
|| !sk_rcv_saddr
||
124 sk2_rcv_saddr
== sk_rcv_saddr
)
132 /* Obtain a reference to a local port for the given sock,
133 * if snum is zero it means select any available local port.
135 static int tcp_v4_get_port(struct sock
*sk
, unsigned short snum
)
137 struct inet_bind_hashbucket
*head
;
138 struct hlist_node
*node
;
139 struct inet_bind_bucket
*tb
;
144 int low
= sysctl_local_port_range
[0];
145 int high
= sysctl_local_port_range
[1];
146 int remaining
= (high
- low
) + 1;
149 spin_lock(&tcp_hashinfo
.portalloc_lock
);
150 if (tcp_hashinfo
.port_rover
< low
)
153 rover
= tcp_hashinfo
.port_rover
;
158 head
= &tcp_hashinfo
.bhash
[inet_bhashfn(rover
, tcp_hashinfo
.bhash_size
)];
159 spin_lock(&head
->lock
);
160 inet_bind_bucket_for_each(tb
, node
, &head
->chain
)
161 if (tb
->port
== rover
)
165 spin_unlock(&head
->lock
);
166 } while (--remaining
> 0);
167 tcp_hashinfo
.port_rover
= rover
;
168 spin_unlock(&tcp_hashinfo
.portalloc_lock
);
170 /* Exhausted local port range during search? It is not
171 * possible for us to be holding one of the bind hash
172 * locks if this test triggers, because if 'remaining'
173 * drops to zero, we broke out of the do/while loop at
174 * the top level, not from the 'break;' statement.
177 if (unlikely(remaining
<= 0))
180 /* OK, here is the one we will use. HEAD is
181 * non-NULL and we hold it's mutex.
185 head
= &tcp_hashinfo
.bhash
[inet_bhashfn(snum
, tcp_hashinfo
.bhash_size
)];
186 spin_lock(&head
->lock
);
187 inet_bind_bucket_for_each(tb
, node
, &head
->chain
)
188 if (tb
->port
== snum
)
194 if (!hlist_empty(&tb
->owners
)) {
195 if (sk
->sk_reuse
> 1)
197 if (tb
->fastreuse
> 0 &&
198 sk
->sk_reuse
&& sk
->sk_state
!= TCP_LISTEN
) {
202 if (tcp_bind_conflict(sk
, tb
))
208 if (!tb
&& (tb
= inet_bind_bucket_create(tcp_hashinfo
.bind_bucket_cachep
, head
, snum
)) == NULL
)
210 if (hlist_empty(&tb
->owners
)) {
211 if (sk
->sk_reuse
&& sk
->sk_state
!= TCP_LISTEN
)
215 } else if (tb
->fastreuse
&&
216 (!sk
->sk_reuse
|| sk
->sk_state
== TCP_LISTEN
))
219 if (!inet_sk(sk
)->bind_hash
)
220 inet_bind_hash(sk
, tb
, snum
);
221 BUG_TRAP(inet_sk(sk
)->bind_hash
== tb
);
225 spin_unlock(&head
->lock
);
231 static void tcp_v4_hash(struct sock
*sk
)
233 inet_hash(&tcp_hashinfo
, sk
);
236 void tcp_unhash(struct sock
*sk
)
238 inet_unhash(&tcp_hashinfo
, sk
);
241 /* Don't inline this cruft. Here are some nice properties to
242 * exploit here. The BSD API does not allow a listening TCP
243 * to specify the remote port nor the remote address for the
244 * connection. So always assume those are both wildcarded
245 * during the search since they can never be otherwise.
247 static struct sock
*__tcp_v4_lookup_listener(struct hlist_head
*head
,
249 const unsigned short hnum
,
252 struct sock
*result
= NULL
, *sk
;
253 struct hlist_node
*node
;
257 sk_for_each(sk
, node
, head
) {
258 struct inet_sock
*inet
= inet_sk(sk
);
260 if (inet
->num
== hnum
&& !ipv6_only_sock(sk
)) {
261 __u32 rcv_saddr
= inet
->rcv_saddr
;
263 score
= (sk
->sk_family
== PF_INET
? 1 : 0);
265 if (rcv_saddr
!= daddr
)
269 if (sk
->sk_bound_dev_if
) {
270 if (sk
->sk_bound_dev_if
!= dif
)
276 if (score
> hiscore
) {
285 /* Optimize the common listener case. */
286 static inline struct sock
*tcp_v4_lookup_listener(const u32 daddr
,
287 const unsigned short hnum
,
290 struct sock
*sk
= NULL
;
291 struct hlist_head
*head
;
293 read_lock(&tcp_hashinfo
.lhash_lock
);
294 head
= &tcp_hashinfo
.listening_hash
[inet_lhashfn(hnum
)];
295 if (!hlist_empty(head
)) {
296 struct inet_sock
*inet
= inet_sk((sk
= __sk_head(head
)));
298 if (inet
->num
== hnum
&& !sk
->sk_node
.next
&&
299 (!inet
->rcv_saddr
|| inet
->rcv_saddr
== daddr
) &&
300 (sk
->sk_family
== PF_INET
|| !ipv6_only_sock(sk
)) &&
301 !sk
->sk_bound_dev_if
)
303 sk
= __tcp_v4_lookup_listener(head
, daddr
, hnum
, dif
);
309 read_unlock(&tcp_hashinfo
.lhash_lock
);
313 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
314 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
316 * Local BH must be disabled here.
319 static inline struct sock
*__tcp_v4_lookup_established(const u32 saddr
,
325 struct inet_ehash_bucket
*head
;
326 TCP_V4_ADDR_COOKIE(acookie
, saddr
, daddr
)
327 __u32 ports
= TCP_COMBINED_PORTS(sport
, hnum
);
329 struct hlist_node
*node
;
330 /* Optimize here for direct hit, only listening connections can
331 * have wildcards anyways.
333 const int hash
= inet_ehashfn(daddr
, hnum
, saddr
, sport
, tcp_hashinfo
.ehash_size
);
334 head
= &tcp_hashinfo
.ehash
[hash
];
335 read_lock(&head
->lock
);
336 sk_for_each(sk
, node
, &head
->chain
) {
337 if (TCP_IPV4_MATCH(sk
, acookie
, saddr
, daddr
, ports
, dif
))
338 goto hit
; /* You sunk my battleship! */
341 /* Must check for a TIME_WAIT'er before going to listener hash. */
342 sk_for_each(sk
, node
, &(head
+ tcp_hashinfo
.ehash_size
)->chain
) {
343 if (TCP_IPV4_TW_MATCH(sk
, acookie
, saddr
, daddr
, ports
, dif
))
348 read_unlock(&head
->lock
);
355 static inline struct sock
*__tcp_v4_lookup(u32 saddr
, u16 sport
,
356 u32 daddr
, u16 hnum
, int dif
)
358 struct sock
*sk
= __tcp_v4_lookup_established(saddr
, sport
,
361 return sk
? : tcp_v4_lookup_listener(daddr
, hnum
, dif
);
364 inline struct sock
*tcp_v4_lookup(u32 saddr
, u16 sport
, u32 daddr
,
370 sk
= __tcp_v4_lookup(saddr
, sport
, daddr
, ntohs(dport
), dif
);
376 EXPORT_SYMBOL_GPL(tcp_v4_lookup
);
378 static inline __u32
tcp_v4_init_sequence(struct sock
*sk
, struct sk_buff
*skb
)
380 return secure_tcp_sequence_number(skb
->nh
.iph
->daddr
,
386 /* called with local bh disabled */
387 static int __tcp_v4_check_established(struct sock
*sk
, __u16 lport
,
388 struct tcp_tw_bucket
**twp
)
390 struct inet_sock
*inet
= inet_sk(sk
);
391 u32 daddr
= inet
->rcv_saddr
;
392 u32 saddr
= inet
->daddr
;
393 int dif
= sk
->sk_bound_dev_if
;
394 TCP_V4_ADDR_COOKIE(acookie
, saddr
, daddr
)
395 __u32 ports
= TCP_COMBINED_PORTS(inet
->dport
, lport
);
396 const int hash
= inet_ehashfn(daddr
, lport
, saddr
, inet
->dport
, tcp_hashinfo
.ehash_size
);
397 struct inet_ehash_bucket
*head
= &tcp_hashinfo
.ehash
[hash
];
399 struct hlist_node
*node
;
400 struct tcp_tw_bucket
*tw
;
402 write_lock(&head
->lock
);
404 /* Check TIME-WAIT sockets first. */
405 sk_for_each(sk2
, node
, &(head
+ tcp_hashinfo
.ehash_size
)->chain
) {
406 tw
= (struct tcp_tw_bucket
*)sk2
;
408 if (TCP_IPV4_TW_MATCH(sk2
, acookie
, saddr
, daddr
, ports
, dif
)) {
409 struct tcp_sock
*tp
= tcp_sk(sk
);
411 /* With PAWS, it is safe from the viewpoint
412 of data integrity. Even without PAWS it
413 is safe provided sequence spaces do not
414 overlap i.e. at data rates <= 80Mbit/sec.
416 Actually, the idea is close to VJ's one,
417 only timestamp cache is held not per host,
418 but per port pair and TW bucket is used
421 If TW bucket has been already destroyed we
422 fall back to VJ's scheme and use initial
423 timestamp retrieved from peer table.
425 if (tw
->tw_ts_recent_stamp
&&
426 (!twp
|| (sysctl_tcp_tw_reuse
&&
428 tw
->tw_ts_recent_stamp
> 1))) {
430 tw
->tw_snd_nxt
+ 65535 + 2) == 0)
432 tp
->rx_opt
.ts_recent
= tw
->tw_ts_recent
;
433 tp
->rx_opt
.ts_recent_stamp
= tw
->tw_ts_recent_stamp
;
442 /* And established part... */
443 sk_for_each(sk2
, node
, &head
->chain
) {
444 if (TCP_IPV4_MATCH(sk2
, acookie
, saddr
, daddr
, ports
, dif
))
449 /* Must record num and sport now. Otherwise we will see
450 * in hash table socket with a funny identity. */
452 inet
->sport
= htons(lport
);
453 sk
->sk_hashent
= hash
;
454 BUG_TRAP(sk_unhashed(sk
));
455 __sk_add_node(sk
, &head
->chain
);
456 sock_prot_inc_use(sk
->sk_prot
);
457 write_unlock(&head
->lock
);
461 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED
);
463 /* Silly. Should hash-dance instead... */
464 tcp_tw_deschedule(tw
);
465 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED
);
473 write_unlock(&head
->lock
);
474 return -EADDRNOTAVAIL
;
477 static inline u32
connect_port_offset(const struct sock
*sk
)
479 const struct inet_sock
*inet
= inet_sk(sk
);
481 return secure_tcp_port_ephemeral(inet
->rcv_saddr
, inet
->daddr
,
486 * Bind a port for a connect operation and hash it.
488 static inline int tcp_v4_hash_connect(struct sock
*sk
)
490 const unsigned short snum
= inet_sk(sk
)->num
;
491 struct inet_bind_hashbucket
*head
;
492 struct inet_bind_bucket
*tb
;
496 int low
= sysctl_local_port_range
[0];
497 int high
= sysctl_local_port_range
[1];
498 int range
= high
- low
;
502 u32 offset
= hint
+ connect_port_offset(sk
);
503 struct hlist_node
*node
;
504 struct tcp_tw_bucket
*tw
= NULL
;
507 for (i
= 1; i
<= range
; i
++) {
508 port
= low
+ (i
+ offset
) % range
;
509 head
= &tcp_hashinfo
.bhash
[inet_bhashfn(port
, tcp_hashinfo
.bhash_size
)];
510 spin_lock(&head
->lock
);
512 /* Does not bother with rcv_saddr checks,
513 * because the established check is already
516 inet_bind_bucket_for_each(tb
, node
, &head
->chain
) {
517 if (tb
->port
== port
) {
518 BUG_TRAP(!hlist_empty(&tb
->owners
));
519 if (tb
->fastreuse
>= 0)
521 if (!__tcp_v4_check_established(sk
,
529 tb
= inet_bind_bucket_create(tcp_hashinfo
.bind_bucket_cachep
, head
, port
);
531 spin_unlock(&head
->lock
);
538 spin_unlock(&head
->lock
);
542 return -EADDRNOTAVAIL
;
547 /* Head lock still held and bh's disabled */
548 inet_bind_hash(sk
, tb
, port
);
549 if (sk_unhashed(sk
)) {
550 inet_sk(sk
)->sport
= htons(port
);
551 __inet_hash(&tcp_hashinfo
, sk
, 0);
553 spin_unlock(&head
->lock
);
556 tcp_tw_deschedule(tw
);
564 head
= &tcp_hashinfo
.bhash
[inet_bhashfn(snum
, tcp_hashinfo
.bhash_size
)];
565 tb
= inet_sk(sk
)->bind_hash
;
566 spin_lock_bh(&head
->lock
);
567 if (sk_head(&tb
->owners
) == sk
&& !sk
->sk_bind_node
.next
) {
568 __inet_hash(&tcp_hashinfo
, sk
, 0);
569 spin_unlock_bh(&head
->lock
);
572 spin_unlock(&head
->lock
);
573 /* No definite answer... Walk to established hash table */
574 ret
= __tcp_v4_check_established(sk
, snum
, NULL
);
581 /* This will initiate an outgoing connection. */
582 int tcp_v4_connect(struct sock
*sk
, struct sockaddr
*uaddr
, int addr_len
)
584 struct inet_sock
*inet
= inet_sk(sk
);
585 struct tcp_sock
*tp
= tcp_sk(sk
);
586 struct sockaddr_in
*usin
= (struct sockaddr_in
*)uaddr
;
592 if (addr_len
< sizeof(struct sockaddr_in
))
595 if (usin
->sin_family
!= AF_INET
)
596 return -EAFNOSUPPORT
;
598 nexthop
= daddr
= usin
->sin_addr
.s_addr
;
599 if (inet
->opt
&& inet
->opt
->srr
) {
602 nexthop
= inet
->opt
->faddr
;
605 tmp
= ip_route_connect(&rt
, nexthop
, inet
->saddr
,
606 RT_CONN_FLAGS(sk
), sk
->sk_bound_dev_if
,
608 inet
->sport
, usin
->sin_port
, sk
);
612 if (rt
->rt_flags
& (RTCF_MULTICAST
| RTCF_BROADCAST
)) {
617 if (!inet
->opt
|| !inet
->opt
->srr
)
621 inet
->saddr
= rt
->rt_src
;
622 inet
->rcv_saddr
= inet
->saddr
;
624 if (tp
->rx_opt
.ts_recent_stamp
&& inet
->daddr
!= daddr
) {
625 /* Reset inherited state */
626 tp
->rx_opt
.ts_recent
= 0;
627 tp
->rx_opt
.ts_recent_stamp
= 0;
631 if (sysctl_tcp_tw_recycle
&&
632 !tp
->rx_opt
.ts_recent_stamp
&& rt
->rt_dst
== daddr
) {
633 struct inet_peer
*peer
= rt_get_peer(rt
);
635 /* VJ's idea. We save last timestamp seen from
636 * the destination in peer table, when entering state TIME-WAIT
637 * and initialize rx_opt.ts_recent from it, when trying new connection.
640 if (peer
&& peer
->tcp_ts_stamp
+ TCP_PAWS_MSL
>= xtime
.tv_sec
) {
641 tp
->rx_opt
.ts_recent_stamp
= peer
->tcp_ts_stamp
;
642 tp
->rx_opt
.ts_recent
= peer
->tcp_ts
;
646 inet
->dport
= usin
->sin_port
;
649 tp
->ext_header_len
= 0;
651 tp
->ext_header_len
= inet
->opt
->optlen
;
653 tp
->rx_opt
.mss_clamp
= 536;
655 /* Socket identity is still unknown (sport may be zero).
656 * However we set state to SYN-SENT and not releasing socket
657 * lock select source port, enter ourselves into the hash tables and
658 * complete initialization after this.
660 tcp_set_state(sk
, TCP_SYN_SENT
);
661 err
= tcp_v4_hash_connect(sk
);
665 err
= ip_route_newports(&rt
, inet
->sport
, inet
->dport
, sk
);
669 /* OK, now commit destination to socket. */
670 sk_setup_caps(sk
, &rt
->u
.dst
);
673 tp
->write_seq
= secure_tcp_sequence_number(inet
->saddr
,
678 inet
->id
= tp
->write_seq
^ jiffies
;
680 err
= tcp_connect(sk
);
688 /* This unhashes the socket and releases the local port, if necessary. */
689 tcp_set_state(sk
, TCP_CLOSE
);
691 sk
->sk_route_caps
= 0;
696 static __inline__
int tcp_v4_iif(struct sk_buff
*skb
)
698 return ((struct rtable
*)skb
->dst
)->rt_iif
;
701 static __inline__ u32
tcp_v4_synq_hash(u32 raddr
, u16 rport
, u32 rnd
)
703 return (jhash_2words(raddr
, (u32
) rport
, rnd
) & (TCP_SYNQ_HSIZE
- 1));
706 static struct request_sock
*tcp_v4_search_req(struct tcp_sock
*tp
,
707 struct request_sock
***prevp
,
709 __u32 raddr
, __u32 laddr
)
711 struct listen_sock
*lopt
= tp
->accept_queue
.listen_opt
;
712 struct request_sock
*req
, **prev
;
714 for (prev
= &lopt
->syn_table
[tcp_v4_synq_hash(raddr
, rport
, lopt
->hash_rnd
)];
715 (req
= *prev
) != NULL
;
716 prev
= &req
->dl_next
) {
717 const struct inet_request_sock
*ireq
= inet_rsk(req
);
719 if (ireq
->rmt_port
== rport
&&
720 ireq
->rmt_addr
== raddr
&&
721 ireq
->loc_addr
== laddr
&&
722 TCP_INET_FAMILY(req
->rsk_ops
->family
)) {
732 static void tcp_v4_synq_add(struct sock
*sk
, struct request_sock
*req
)
734 struct tcp_sock
*tp
= tcp_sk(sk
);
735 struct listen_sock
*lopt
= tp
->accept_queue
.listen_opt
;
736 u32 h
= tcp_v4_synq_hash(inet_rsk(req
)->rmt_addr
, inet_rsk(req
)->rmt_port
, lopt
->hash_rnd
);
738 reqsk_queue_hash_req(&tp
->accept_queue
, h
, req
, TCP_TIMEOUT_INIT
);
744 * This routine does path mtu discovery as defined in RFC1191.
746 static inline void do_pmtu_discovery(struct sock
*sk
, struct iphdr
*iph
,
749 struct dst_entry
*dst
;
750 struct inet_sock
*inet
= inet_sk(sk
);
751 struct tcp_sock
*tp
= tcp_sk(sk
);
753 /* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
754 * send out by Linux are always <576bytes so they should go through
757 if (sk
->sk_state
== TCP_LISTEN
)
760 /* We don't check in the destentry if pmtu discovery is forbidden
761 * on this route. We just assume that no packet_to_big packets
762 * are send back when pmtu discovery is not active.
763 * There is a small race when the user changes this flag in the
764 * route, but I think that's acceptable.
766 if ((dst
= __sk_dst_check(sk
, 0)) == NULL
)
769 dst
->ops
->update_pmtu(dst
, mtu
);
771 /* Something is about to be wrong... Remember soft error
772 * for the case, if this connection will not able to recover.
774 if (mtu
< dst_mtu(dst
) && ip_dont_fragment(sk
, dst
))
775 sk
->sk_err_soft
= EMSGSIZE
;
779 if (inet
->pmtudisc
!= IP_PMTUDISC_DONT
&&
780 tp
->pmtu_cookie
> mtu
) {
781 tcp_sync_mss(sk
, mtu
);
783 /* Resend the TCP packet because it's
784 * clear that the old packet has been
785 * dropped. This is the new "fast" path mtu
788 tcp_simple_retransmit(sk
);
789 } /* else let the usual retransmit timer handle it */
793 * This routine is called by the ICMP module when it gets some
794 * sort of error condition. If err < 0 then the socket should
795 * be closed and the error returned to the user. If err > 0
796 * it's just the icmp type << 8 | icmp code. After adjustment
797 * header points to the first 8 bytes of the tcp header. We need
798 * to find the appropriate port.
800 * The locking strategy used here is very "optimistic". When
801 * someone else accesses the socket the ICMP is just dropped
802 * and for some paths there is no check at all.
803 * A more general error queue to queue errors for later handling
804 * is probably better.
808 void tcp_v4_err(struct sk_buff
*skb
, u32 info
)
810 struct iphdr
*iph
= (struct iphdr
*)skb
->data
;
811 struct tcphdr
*th
= (struct tcphdr
*)(skb
->data
+ (iph
->ihl
<< 2));
813 struct inet_sock
*inet
;
814 int type
= skb
->h
.icmph
->type
;
815 int code
= skb
->h
.icmph
->code
;
820 if (skb
->len
< (iph
->ihl
<< 2) + 8) {
821 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS
);
825 sk
= tcp_v4_lookup(iph
->daddr
, th
->dest
, iph
->saddr
,
826 th
->source
, tcp_v4_iif(skb
));
828 ICMP_INC_STATS_BH(ICMP_MIB_INERRORS
);
831 if (sk
->sk_state
== TCP_TIME_WAIT
) {
832 tcp_tw_put((struct tcp_tw_bucket
*)sk
);
837 /* If too many ICMPs get dropped on busy
838 * servers this needs to be solved differently.
840 if (sock_owned_by_user(sk
))
841 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS
);
843 if (sk
->sk_state
== TCP_CLOSE
)
847 seq
= ntohl(th
->seq
);
848 if (sk
->sk_state
!= TCP_LISTEN
&&
849 !between(seq
, tp
->snd_una
, tp
->snd_nxt
)) {
850 NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS
);
855 case ICMP_SOURCE_QUENCH
:
856 /* Just silently ignore these. */
858 case ICMP_PARAMETERPROB
:
861 case ICMP_DEST_UNREACH
:
862 if (code
> NR_ICMP_UNREACH
)
865 if (code
== ICMP_FRAG_NEEDED
) { /* PMTU discovery (RFC1191) */
866 if (!sock_owned_by_user(sk
))
867 do_pmtu_discovery(sk
, iph
, info
);
871 err
= icmp_err_convert
[code
].errno
;
873 case ICMP_TIME_EXCEEDED
:
880 switch (sk
->sk_state
) {
881 struct request_sock
*req
, **prev
;
883 if (sock_owned_by_user(sk
))
886 req
= tcp_v4_search_req(tp
, &prev
, th
->dest
,
887 iph
->daddr
, iph
->saddr
);
891 /* ICMPs are not backlogged, hence we cannot get
892 an established socket here.
896 if (seq
!= tcp_rsk(req
)->snt_isn
) {
897 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS
);
902 * Still in SYN_RECV, just remove it silently.
903 * There is no good way to pass the error to the newly
904 * created socket, and POSIX does not want network
905 * errors returned from accept().
907 tcp_synq_drop(sk
, req
, prev
);
911 case TCP_SYN_RECV
: /* Cannot happen.
912 It can f.e. if SYNs crossed.
914 if (!sock_owned_by_user(sk
)) {
915 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS
);
918 sk
->sk_error_report(sk
);
922 sk
->sk_err_soft
= err
;
927 /* If we've already connected we will keep trying
928 * until we time out, or the user gives up.
930 * rfc1122 4.2.3.9 allows to consider as hard errors
931 * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
932 * but it is obsoleted by pmtu discovery).
934 * Note, that in modern internet, where routing is unreliable
935 * and in each dark corner broken firewalls sit, sending random
936 * errors ordered by their masters even this two messages finally lose
937 * their original sense (even Linux sends invalid PORT_UNREACHs)
939 * Now we are in compliance with RFCs.
944 if (!sock_owned_by_user(sk
) && inet
->recverr
) {
946 sk
->sk_error_report(sk
);
947 } else { /* Only an error on timeout */
948 sk
->sk_err_soft
= err
;
956 /* This routine computes an IPv4 TCP checksum. */
957 void tcp_v4_send_check(struct sock
*sk
, struct tcphdr
*th
, int len
,
960 struct inet_sock
*inet
= inet_sk(sk
);
962 if (skb
->ip_summed
== CHECKSUM_HW
) {
963 th
->check
= ~tcp_v4_check(th
, len
, inet
->saddr
, inet
->daddr
, 0);
964 skb
->csum
= offsetof(struct tcphdr
, check
);
966 th
->check
= tcp_v4_check(th
, len
, inet
->saddr
, inet
->daddr
,
967 csum_partial((char *)th
,
974 * This routine will send an RST to the other tcp.
976 * Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
978 * Answer: if a packet caused RST, it is not for a socket
979 * existing in our system, if it is matched to a socket,
980 * it is just duplicate segment or bug in other side's TCP.
981 * So that we build reply only basing on parameters
982 * arrived with segment.
983 * Exception: precedence violation. We do not implement it in any case.
986 static void tcp_v4_send_reset(struct sk_buff
*skb
)
988 struct tcphdr
*th
= skb
->h
.th
;
990 struct ip_reply_arg arg
;
992 /* Never send a reset in response to a reset. */
996 if (((struct rtable
*)skb
->dst
)->rt_type
!= RTN_LOCAL
)
999 /* Swap the send and the receive. */
1000 memset(&rth
, 0, sizeof(struct tcphdr
));
1001 rth
.dest
= th
->source
;
1002 rth
.source
= th
->dest
;
1003 rth
.doff
= sizeof(struct tcphdr
) / 4;
1007 rth
.seq
= th
->ack_seq
;
1010 rth
.ack_seq
= htonl(ntohl(th
->seq
) + th
->syn
+ th
->fin
+
1011 skb
->len
- (th
->doff
<< 2));
1014 memset(&arg
, 0, sizeof arg
);
1015 arg
.iov
[0].iov_base
= (unsigned char *)&rth
;
1016 arg
.iov
[0].iov_len
= sizeof rth
;
1017 arg
.csum
= csum_tcpudp_nofold(skb
->nh
.iph
->daddr
,
1018 skb
->nh
.iph
->saddr
, /*XXX*/
1019 sizeof(struct tcphdr
), IPPROTO_TCP
, 0);
1020 arg
.csumoffset
= offsetof(struct tcphdr
, check
) / 2;
1022 ip_send_reply(tcp_socket
->sk
, skb
, &arg
, sizeof rth
);
1024 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS
);
1025 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS
);
1028 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
1029 outside socket context is ugly, certainly. What can I do?
1032 static void tcp_v4_send_ack(struct sk_buff
*skb
, u32 seq
, u32 ack
,
1035 struct tcphdr
*th
= skb
->h
.th
;
1040 struct ip_reply_arg arg
;
1042 memset(&rep
.th
, 0, sizeof(struct tcphdr
));
1043 memset(&arg
, 0, sizeof arg
);
1045 arg
.iov
[0].iov_base
= (unsigned char *)&rep
;
1046 arg
.iov
[0].iov_len
= sizeof(rep
.th
);
1048 rep
.tsopt
[0] = htonl((TCPOPT_NOP
<< 24) | (TCPOPT_NOP
<< 16) |
1049 (TCPOPT_TIMESTAMP
<< 8) |
1051 rep
.tsopt
[1] = htonl(tcp_time_stamp
);
1052 rep
.tsopt
[2] = htonl(ts
);
1053 arg
.iov
[0].iov_len
= sizeof(rep
);
1056 /* Swap the send and the receive. */
1057 rep
.th
.dest
= th
->source
;
1058 rep
.th
.source
= th
->dest
;
1059 rep
.th
.doff
= arg
.iov
[0].iov_len
/ 4;
1060 rep
.th
.seq
= htonl(seq
);
1061 rep
.th
.ack_seq
= htonl(ack
);
1063 rep
.th
.window
= htons(win
);
1065 arg
.csum
= csum_tcpudp_nofold(skb
->nh
.iph
->daddr
,
1066 skb
->nh
.iph
->saddr
, /*XXX*/
1067 arg
.iov
[0].iov_len
, IPPROTO_TCP
, 0);
1068 arg
.csumoffset
= offsetof(struct tcphdr
, check
) / 2;
1070 ip_send_reply(tcp_socket
->sk
, skb
, &arg
, arg
.iov
[0].iov_len
);
1072 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS
);
1075 static void tcp_v4_timewait_ack(struct sock
*sk
, struct sk_buff
*skb
)
1077 struct tcp_tw_bucket
*tw
= (struct tcp_tw_bucket
*)sk
;
1079 tcp_v4_send_ack(skb
, tw
->tw_snd_nxt
, tw
->tw_rcv_nxt
,
1080 tw
->tw_rcv_wnd
>> tw
->tw_rcv_wscale
, tw
->tw_ts_recent
);
1085 static void tcp_v4_reqsk_send_ack(struct sk_buff
*skb
, struct request_sock
*req
)
1087 tcp_v4_send_ack(skb
, tcp_rsk(req
)->snt_isn
+ 1, tcp_rsk(req
)->rcv_isn
+ 1, req
->rcv_wnd
,
1091 static struct dst_entry
* tcp_v4_route_req(struct sock
*sk
,
1092 struct request_sock
*req
)
1095 const struct inet_request_sock
*ireq
= inet_rsk(req
);
1096 struct ip_options
*opt
= inet_rsk(req
)->opt
;
1097 struct flowi fl
= { .oif
= sk
->sk_bound_dev_if
,
1099 { .daddr
= ((opt
&& opt
->srr
) ?
1102 .saddr
= ireq
->loc_addr
,
1103 .tos
= RT_CONN_FLAGS(sk
) } },
1104 .proto
= IPPROTO_TCP
,
1106 { .sport
= inet_sk(sk
)->sport
,
1107 .dport
= ireq
->rmt_port
} } };
1109 if (ip_route_output_flow(&rt
, &fl
, sk
, 0)) {
1110 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES
);
1113 if (opt
&& opt
->is_strictroute
&& rt
->rt_dst
!= rt
->rt_gateway
) {
1115 IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES
);
1122 * Send a SYN-ACK after having received an ACK.
1123 * This still operates on a request_sock only, not on a big
1126 static int tcp_v4_send_synack(struct sock
*sk
, struct request_sock
*req
,
1127 struct dst_entry
*dst
)
1129 const struct inet_request_sock
*ireq
= inet_rsk(req
);
1131 struct sk_buff
* skb
;
1133 /* First, grab a route. */
1134 if (!dst
&& (dst
= tcp_v4_route_req(sk
, req
)) == NULL
)
1137 skb
= tcp_make_synack(sk
, dst
, req
);
1140 struct tcphdr
*th
= skb
->h
.th
;
1142 th
->check
= tcp_v4_check(th
, skb
->len
,
1145 csum_partial((char *)th
, skb
->len
,
1148 err
= ip_build_and_send_pkt(skb
, sk
, ireq
->loc_addr
,
1151 if (err
== NET_XMIT_CN
)
1161 * IPv4 request_sock destructor.
1163 static void tcp_v4_reqsk_destructor(struct request_sock
*req
)
1165 if (inet_rsk(req
)->opt
)
1166 kfree(inet_rsk(req
)->opt
);
1169 static inline void syn_flood_warning(struct sk_buff
*skb
)
1171 static unsigned long warntime
;
1173 if (time_after(jiffies
, (warntime
+ HZ
* 60))) {
1176 "possible SYN flooding on port %d. Sending cookies.\n",
1177 ntohs(skb
->h
.th
->dest
));
1182 * Save and compile IPv4 options into the request_sock if needed.
1184 static inline struct ip_options
*tcp_v4_save_options(struct sock
*sk
,
1185 struct sk_buff
*skb
)
1187 struct ip_options
*opt
= &(IPCB(skb
)->opt
);
1188 struct ip_options
*dopt
= NULL
;
1190 if (opt
&& opt
->optlen
) {
1191 int opt_size
= optlength(opt
);
1192 dopt
= kmalloc(opt_size
, GFP_ATOMIC
);
1194 if (ip_options_echo(dopt
, skb
)) {
1203 struct request_sock_ops tcp_request_sock_ops
= {
1205 .obj_size
= sizeof(struct tcp_request_sock
),
1206 .rtx_syn_ack
= tcp_v4_send_synack
,
1207 .send_ack
= tcp_v4_reqsk_send_ack
,
1208 .destructor
= tcp_v4_reqsk_destructor
,
1209 .send_reset
= tcp_v4_send_reset
,
1212 int tcp_v4_conn_request(struct sock
*sk
, struct sk_buff
*skb
)
1214 struct inet_request_sock
*ireq
;
1215 struct tcp_options_received tmp_opt
;
1216 struct request_sock
*req
;
1217 __u32 saddr
= skb
->nh
.iph
->saddr
;
1218 __u32 daddr
= skb
->nh
.iph
->daddr
;
1219 __u32 isn
= TCP_SKB_CB(skb
)->when
;
1220 struct dst_entry
*dst
= NULL
;
1221 #ifdef CONFIG_SYN_COOKIES
1222 int want_cookie
= 0;
1224 #define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
1227 /* Never answer to SYNs send to broadcast or multicast */
1228 if (((struct rtable
*)skb
->dst
)->rt_flags
&
1229 (RTCF_BROADCAST
| RTCF_MULTICAST
))
1232 /* TW buckets are converted to open requests without
1233 * limitations, they conserve resources and peer is
1234 * evidently real one.
1236 if (tcp_synq_is_full(sk
) && !isn
) {
1237 #ifdef CONFIG_SYN_COOKIES
1238 if (sysctl_tcp_syncookies
) {
1245 /* Accept backlog is full. If we have already queued enough
1246 * of warm entries in syn queue, drop request. It is better than
1247 * clogging syn queue with openreqs with exponentially increasing
1250 if (sk_acceptq_is_full(sk
) && tcp_synq_young(sk
) > 1)
1253 req
= reqsk_alloc(&tcp_request_sock_ops
);
1257 tcp_clear_options(&tmp_opt
);
1258 tmp_opt
.mss_clamp
= 536;
1259 tmp_opt
.user_mss
= tcp_sk(sk
)->rx_opt
.user_mss
;
1261 tcp_parse_options(skb
, &tmp_opt
, 0);
1264 tcp_clear_options(&tmp_opt
);
1265 tmp_opt
.saw_tstamp
= 0;
1268 if (tmp_opt
.saw_tstamp
&& !tmp_opt
.rcv_tsval
) {
1269 /* Some OSes (unknown ones, but I see them on web server, which
1270 * contains information interesting only for windows'
1271 * users) do not send their stamp in SYN. It is easy case.
1272 * We simply do not advertise TS support.
1274 tmp_opt
.saw_tstamp
= 0;
1275 tmp_opt
.tstamp_ok
= 0;
1277 tmp_opt
.tstamp_ok
= tmp_opt
.saw_tstamp
;
1279 tcp_openreq_init(req
, &tmp_opt
, skb
);
1281 ireq
= inet_rsk(req
);
1282 ireq
->loc_addr
= daddr
;
1283 ireq
->rmt_addr
= saddr
;
1284 ireq
->opt
= tcp_v4_save_options(sk
, skb
);
1286 TCP_ECN_create_request(req
, skb
->h
.th
);
1289 #ifdef CONFIG_SYN_COOKIES
1290 syn_flood_warning(skb
);
1292 isn
= cookie_v4_init_sequence(sk
, skb
, &req
->mss
);
1294 struct inet_peer
*peer
= NULL
;
1296 /* VJ's idea. We save last timestamp seen
1297 * from the destination in peer table, when entering
1298 * state TIME-WAIT, and check against it before
1299 * accepting new connection request.
1301 * If "isn" is not zero, this request hit alive
1302 * timewait bucket, so that all the necessary checks
1303 * are made in the function processing timewait state.
1305 if (tmp_opt
.saw_tstamp
&&
1306 sysctl_tcp_tw_recycle
&&
1307 (dst
= tcp_v4_route_req(sk
, req
)) != NULL
&&
1308 (peer
= rt_get_peer((struct rtable
*)dst
)) != NULL
&&
1309 peer
->v4daddr
== saddr
) {
1310 if (xtime
.tv_sec
< peer
->tcp_ts_stamp
+ TCP_PAWS_MSL
&&
1311 (s32
)(peer
->tcp_ts
- req
->ts_recent
) >
1313 NET_INC_STATS_BH(LINUX_MIB_PAWSPASSIVEREJECTED
);
1318 /* Kill the following clause, if you dislike this way. */
1319 else if (!sysctl_tcp_syncookies
&&
1320 (sysctl_max_syn_backlog
- tcp_synq_len(sk
) <
1321 (sysctl_max_syn_backlog
>> 2)) &&
1322 (!peer
|| !peer
->tcp_ts_stamp
) &&
1323 (!dst
|| !dst_metric(dst
, RTAX_RTT
))) {
1324 /* Without syncookies last quarter of
1325 * backlog is filled with destinations,
1326 * proven to be alive.
1327 * It means that we continue to communicate
1328 * to destinations, already remembered
1329 * to the moment of synflood.
1331 LIMIT_NETDEBUG(printk(KERN_DEBUG
"TCP: drop open "
1332 "request from %u.%u."
1335 ntohs(skb
->h
.th
->source
)));
1340 isn
= tcp_v4_init_sequence(sk
, skb
);
1342 tcp_rsk(req
)->snt_isn
= isn
;
1344 if (tcp_v4_send_synack(sk
, req
, dst
))
1350 tcp_v4_synq_add(sk
, req
);
1357 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS
);
1363 * The three way handshake has completed - we got a valid synack -
1364 * now create the new socket.
1366 struct sock
*tcp_v4_syn_recv_sock(struct sock
*sk
, struct sk_buff
*skb
,
1367 struct request_sock
*req
,
1368 struct dst_entry
*dst
)
1370 struct inet_request_sock
*ireq
;
1371 struct inet_sock
*newinet
;
1372 struct tcp_sock
*newtp
;
1375 if (sk_acceptq_is_full(sk
))
1378 if (!dst
&& (dst
= tcp_v4_route_req(sk
, req
)) == NULL
)
1381 newsk
= tcp_create_openreq_child(sk
, req
, skb
);
1385 sk_setup_caps(newsk
, dst
);
1387 newtp
= tcp_sk(newsk
);
1388 newinet
= inet_sk(newsk
);
1389 ireq
= inet_rsk(req
);
1390 newinet
->daddr
= ireq
->rmt_addr
;
1391 newinet
->rcv_saddr
= ireq
->loc_addr
;
1392 newinet
->saddr
= ireq
->loc_addr
;
1393 newinet
->opt
= ireq
->opt
;
1395 newinet
->mc_index
= tcp_v4_iif(skb
);
1396 newinet
->mc_ttl
= skb
->nh
.iph
->ttl
;
1397 newtp
->ext_header_len
= 0;
1399 newtp
->ext_header_len
= newinet
->opt
->optlen
;
1400 newinet
->id
= newtp
->write_seq
^ jiffies
;
1402 tcp_sync_mss(newsk
, dst_mtu(dst
));
1403 newtp
->advmss
= dst_metric(dst
, RTAX_ADVMSS
);
1404 tcp_initialize_rcv_mss(newsk
);
1406 __inet_hash(&tcp_hashinfo
, newsk
, 0);
1407 __inet_inherit_port(&tcp_hashinfo
, sk
, newsk
);
1412 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS
);
1414 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS
);
1419 static struct sock
*tcp_v4_hnd_req(struct sock
*sk
, struct sk_buff
*skb
)
1421 struct tcphdr
*th
= skb
->h
.th
;
1422 struct iphdr
*iph
= skb
->nh
.iph
;
1423 struct tcp_sock
*tp
= tcp_sk(sk
);
1425 struct request_sock
**prev
;
1426 /* Find possible connection requests. */
1427 struct request_sock
*req
= tcp_v4_search_req(tp
, &prev
, th
->source
,
1428 iph
->saddr
, iph
->daddr
);
1430 return tcp_check_req(sk
, skb
, req
, prev
);
1432 nsk
= __tcp_v4_lookup_established(skb
->nh
.iph
->saddr
,
1439 if (nsk
->sk_state
!= TCP_TIME_WAIT
) {
1443 tcp_tw_put((struct tcp_tw_bucket
*)nsk
);
1447 #ifdef CONFIG_SYN_COOKIES
1448 if (!th
->rst
&& !th
->syn
&& th
->ack
)
1449 sk
= cookie_v4_check(sk
, skb
, &(IPCB(skb
)->opt
));
1454 static int tcp_v4_checksum_init(struct sk_buff
*skb
)
1456 if (skb
->ip_summed
== CHECKSUM_HW
) {
1457 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1458 if (!tcp_v4_check(skb
->h
.th
, skb
->len
, skb
->nh
.iph
->saddr
,
1459 skb
->nh
.iph
->daddr
, skb
->csum
))
1462 LIMIT_NETDEBUG(printk(KERN_DEBUG
"hw tcp v4 csum failed\n"));
1463 skb
->ip_summed
= CHECKSUM_NONE
;
1465 if (skb
->len
<= 76) {
1466 if (tcp_v4_check(skb
->h
.th
, skb
->len
, skb
->nh
.iph
->saddr
,
1468 skb_checksum(skb
, 0, skb
->len
, 0)))
1470 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1472 skb
->csum
= ~tcp_v4_check(skb
->h
.th
, skb
->len
,
1474 skb
->nh
.iph
->daddr
, 0);
1480 /* The socket must have it's spinlock held when we get
1483 * We have a potential double-lock case here, so even when
1484 * doing backlog processing we use the BH locking scheme.
1485 * This is because we cannot sleep with the original spinlock
1488 int tcp_v4_do_rcv(struct sock
*sk
, struct sk_buff
*skb
)
1490 if (sk
->sk_state
== TCP_ESTABLISHED
) { /* Fast path */
1491 TCP_CHECK_TIMER(sk
);
1492 if (tcp_rcv_established(sk
, skb
, skb
->h
.th
, skb
->len
))
1494 TCP_CHECK_TIMER(sk
);
1498 if (skb
->len
< (skb
->h
.th
->doff
<< 2) || tcp_checksum_complete(skb
))
1501 if (sk
->sk_state
== TCP_LISTEN
) {
1502 struct sock
*nsk
= tcp_v4_hnd_req(sk
, skb
);
1507 if (tcp_child_process(sk
, nsk
, skb
))
1513 TCP_CHECK_TIMER(sk
);
1514 if (tcp_rcv_state_process(sk
, skb
, skb
->h
.th
, skb
->len
))
1516 TCP_CHECK_TIMER(sk
);
1520 tcp_v4_send_reset(skb
);
1523 /* Be careful here. If this function gets more complicated and
1524 * gcc suffers from register pressure on the x86, sk (in %ebx)
1525 * might be destroyed here. This current version compiles correctly,
1526 * but you have been warned.
1531 TCP_INC_STATS_BH(TCP_MIB_INERRS
);
1539 int tcp_v4_rcv(struct sk_buff
*skb
)
1545 if (skb
->pkt_type
!= PACKET_HOST
)
1548 /* Count it even if it's bad */
1549 TCP_INC_STATS_BH(TCP_MIB_INSEGS
);
1551 if (!pskb_may_pull(skb
, sizeof(struct tcphdr
)))
1556 if (th
->doff
< sizeof(struct tcphdr
) / 4)
1558 if (!pskb_may_pull(skb
, th
->doff
* 4))
1561 /* An explanation is required here, I think.
1562 * Packet length and doff are validated by header prediction,
1563 * provided case of th->doff==0 is elimineted.
1564 * So, we defer the checks. */
1565 if ((skb
->ip_summed
!= CHECKSUM_UNNECESSARY
&&
1566 tcp_v4_checksum_init(skb
) < 0))
1570 TCP_SKB_CB(skb
)->seq
= ntohl(th
->seq
);
1571 TCP_SKB_CB(skb
)->end_seq
= (TCP_SKB_CB(skb
)->seq
+ th
->syn
+ th
->fin
+
1572 skb
->len
- th
->doff
* 4);
1573 TCP_SKB_CB(skb
)->ack_seq
= ntohl(th
->ack_seq
);
1574 TCP_SKB_CB(skb
)->when
= 0;
1575 TCP_SKB_CB(skb
)->flags
= skb
->nh
.iph
->tos
;
1576 TCP_SKB_CB(skb
)->sacked
= 0;
1578 sk
= __tcp_v4_lookup(skb
->nh
.iph
->saddr
, th
->source
,
1579 skb
->nh
.iph
->daddr
, ntohs(th
->dest
),
1586 if (sk
->sk_state
== TCP_TIME_WAIT
)
1589 if (!xfrm4_policy_check(sk
, XFRM_POLICY_IN
, skb
))
1590 goto discard_and_relse
;
1592 if (sk_filter(sk
, skb
, 0))
1593 goto discard_and_relse
;
1599 if (!sock_owned_by_user(sk
)) {
1600 if (!tcp_prequeue(sk
, skb
))
1601 ret
= tcp_v4_do_rcv(sk
, skb
);
1603 sk_add_backlog(sk
, skb
);
1611 if (!xfrm4_policy_check(NULL
, XFRM_POLICY_IN
, skb
))
1614 if (skb
->len
< (th
->doff
<< 2) || tcp_checksum_complete(skb
)) {
1616 TCP_INC_STATS_BH(TCP_MIB_INERRS
);
1618 tcp_v4_send_reset(skb
);
1622 /* Discard frame. */
1631 if (!xfrm4_policy_check(NULL
, XFRM_POLICY_IN
, skb
)) {
1632 tcp_tw_put((struct tcp_tw_bucket
*) sk
);
1636 if (skb
->len
< (th
->doff
<< 2) || tcp_checksum_complete(skb
)) {
1637 TCP_INC_STATS_BH(TCP_MIB_INERRS
);
1638 tcp_tw_put((struct tcp_tw_bucket
*) sk
);
1641 switch (tcp_timewait_state_process((struct tcp_tw_bucket
*)sk
,
1642 skb
, th
, skb
->len
)) {
1644 struct sock
*sk2
= tcp_v4_lookup_listener(skb
->nh
.iph
->daddr
,
1648 tcp_tw_deschedule((struct tcp_tw_bucket
*)sk
);
1649 tcp_tw_put((struct tcp_tw_bucket
*)sk
);
1653 /* Fall through to ACK */
1656 tcp_v4_timewait_ack(sk
, skb
);
1660 case TCP_TW_SUCCESS
:;
1665 static void v4_addr2sockaddr(struct sock
*sk
, struct sockaddr
* uaddr
)
1667 struct sockaddr_in
*sin
= (struct sockaddr_in
*) uaddr
;
1668 struct inet_sock
*inet
= inet_sk(sk
);
1670 sin
->sin_family
= AF_INET
;
1671 sin
->sin_addr
.s_addr
= inet
->daddr
;
1672 sin
->sin_port
= inet
->dport
;
1675 /* VJ's idea. Save last timestamp seen from this destination
1676 * and hold it at least for normal timewait interval to use for duplicate
1677 * segment detection in subsequent connections, before they enter synchronized
1681 int tcp_v4_remember_stamp(struct sock
*sk
)
1683 struct inet_sock
*inet
= inet_sk(sk
);
1684 struct tcp_sock
*tp
= tcp_sk(sk
);
1685 struct rtable
*rt
= (struct rtable
*)__sk_dst_get(sk
);
1686 struct inet_peer
*peer
= NULL
;
1689 if (!rt
|| rt
->rt_dst
!= inet
->daddr
) {
1690 peer
= inet_getpeer(inet
->daddr
, 1);
1694 rt_bind_peer(rt
, 1);
1699 if ((s32
)(peer
->tcp_ts
- tp
->rx_opt
.ts_recent
) <= 0 ||
1700 (peer
->tcp_ts_stamp
+ TCP_PAWS_MSL
< xtime
.tv_sec
&&
1701 peer
->tcp_ts_stamp
<= tp
->rx_opt
.ts_recent_stamp
)) {
1702 peer
->tcp_ts_stamp
= tp
->rx_opt
.ts_recent_stamp
;
1703 peer
->tcp_ts
= tp
->rx_opt
.ts_recent
;
1713 int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket
*tw
)
1715 struct inet_peer
*peer
= NULL
;
1717 peer
= inet_getpeer(tw
->tw_daddr
, 1);
1720 if ((s32
)(peer
->tcp_ts
- tw
->tw_ts_recent
) <= 0 ||
1721 (peer
->tcp_ts_stamp
+ TCP_PAWS_MSL
< xtime
.tv_sec
&&
1722 peer
->tcp_ts_stamp
<= tw
->tw_ts_recent_stamp
)) {
1723 peer
->tcp_ts_stamp
= tw
->tw_ts_recent_stamp
;
1724 peer
->tcp_ts
= tw
->tw_ts_recent
;
1733 struct tcp_func ipv4_specific
= {
1734 .queue_xmit
= ip_queue_xmit
,
1735 .send_check
= tcp_v4_send_check
,
1736 .rebuild_header
= inet_sk_rebuild_header
,
1737 .conn_request
= tcp_v4_conn_request
,
1738 .syn_recv_sock
= tcp_v4_syn_recv_sock
,
1739 .remember_stamp
= tcp_v4_remember_stamp
,
1740 .net_header_len
= sizeof(struct iphdr
),
1741 .setsockopt
= ip_setsockopt
,
1742 .getsockopt
= ip_getsockopt
,
1743 .addr2sockaddr
= v4_addr2sockaddr
,
1744 .sockaddr_len
= sizeof(struct sockaddr_in
),
1747 /* NOTE: A lot of things set to zero explicitly by call to
1748 * sk_alloc() so need not be done here.
1750 static int tcp_v4_init_sock(struct sock
*sk
)
1752 struct tcp_sock
*tp
= tcp_sk(sk
);
1754 skb_queue_head_init(&tp
->out_of_order_queue
);
1755 tcp_init_xmit_timers(sk
);
1756 tcp_prequeue_init(tp
);
1758 tp
->rto
= TCP_TIMEOUT_INIT
;
1759 tp
->mdev
= TCP_TIMEOUT_INIT
;
1761 /* So many TCP implementations out there (incorrectly) count the
1762 * initial SYN frame in their delayed-ACK and congestion control
1763 * algorithms that we must have the following bandaid to talk
1764 * efficiently to them. -DaveM
1768 /* See draft-stevens-tcpca-spec-01 for discussion of the
1769 * initialization of these values.
1771 tp
->snd_ssthresh
= 0x7fffffff; /* Infinity */
1772 tp
->snd_cwnd_clamp
= ~0;
1773 tp
->mss_cache
= 536;
1775 tp
->reordering
= sysctl_tcp_reordering
;
1776 tp
->ca_ops
= &tcp_init_congestion_ops
;
1778 sk
->sk_state
= TCP_CLOSE
;
1780 sk
->sk_write_space
= sk_stream_write_space
;
1781 sock_set_flag(sk
, SOCK_USE_WRITE_QUEUE
);
1783 tp
->af_specific
= &ipv4_specific
;
1785 sk
->sk_sndbuf
= sysctl_tcp_wmem
[1];
1786 sk
->sk_rcvbuf
= sysctl_tcp_rmem
[1];
1788 atomic_inc(&tcp_sockets_allocated
);
1793 int tcp_v4_destroy_sock(struct sock
*sk
)
1795 struct tcp_sock
*tp
= tcp_sk(sk
);
1797 tcp_clear_xmit_timers(sk
);
1799 tcp_cleanup_congestion_control(tp
);
1801 /* Cleanup up the write buffer. */
1802 sk_stream_writequeue_purge(sk
);
1804 /* Cleans up our, hopefully empty, out_of_order_queue. */
1805 __skb_queue_purge(&tp
->out_of_order_queue
);
1807 /* Clean prequeue, it must be empty really */
1808 __skb_queue_purge(&tp
->ucopy
.prequeue
);
1810 /* Clean up a referenced TCP bind bucket. */
1811 if (inet_sk(sk
)->bind_hash
)
1812 inet_put_port(&tcp_hashinfo
, sk
);
1815 * If sendmsg cached page exists, toss it.
1817 if (sk
->sk_sndmsg_page
) {
1818 __free_page(sk
->sk_sndmsg_page
);
1819 sk
->sk_sndmsg_page
= NULL
;
1822 atomic_dec(&tcp_sockets_allocated
);
1827 EXPORT_SYMBOL(tcp_v4_destroy_sock
);
1829 #ifdef CONFIG_PROC_FS
1830 /* Proc filesystem TCP sock list dumping. */
1832 static inline struct tcp_tw_bucket
*tw_head(struct hlist_head
*head
)
1834 return hlist_empty(head
) ? NULL
:
1835 list_entry(head
->first
, struct tcp_tw_bucket
, tw_node
);
1838 static inline struct tcp_tw_bucket
*tw_next(struct tcp_tw_bucket
*tw
)
1840 return tw
->tw_node
.next
?
1841 hlist_entry(tw
->tw_node
.next
, typeof(*tw
), tw_node
) : NULL
;
1844 static void *listening_get_next(struct seq_file
*seq
, void *cur
)
1846 struct tcp_sock
*tp
;
1847 struct hlist_node
*node
;
1848 struct sock
*sk
= cur
;
1849 struct tcp_iter_state
* st
= seq
->private;
1853 sk
= sk_head(&tcp_hashinfo
.listening_hash
[0]);
1859 if (st
->state
== TCP_SEQ_STATE_OPENREQ
) {
1860 struct request_sock
*req
= cur
;
1862 tp
= tcp_sk(st
->syn_wait_sk
);
1866 if (req
->rsk_ops
->family
== st
->family
) {
1872 if (++st
->sbucket
>= TCP_SYNQ_HSIZE
)
1875 req
= tp
->accept_queue
.listen_opt
->syn_table
[st
->sbucket
];
1877 sk
= sk_next(st
->syn_wait_sk
);
1878 st
->state
= TCP_SEQ_STATE_LISTENING
;
1879 read_unlock_bh(&tp
->accept_queue
.syn_wait_lock
);
1882 read_lock_bh(&tp
->accept_queue
.syn_wait_lock
);
1883 if (reqsk_queue_len(&tp
->accept_queue
))
1885 read_unlock_bh(&tp
->accept_queue
.syn_wait_lock
);
1889 sk_for_each_from(sk
, node
) {
1890 if (sk
->sk_family
== st
->family
) {
1895 read_lock_bh(&tp
->accept_queue
.syn_wait_lock
);
1896 if (reqsk_queue_len(&tp
->accept_queue
)) {
1898 st
->uid
= sock_i_uid(sk
);
1899 st
->syn_wait_sk
= sk
;
1900 st
->state
= TCP_SEQ_STATE_OPENREQ
;
1904 read_unlock_bh(&tp
->accept_queue
.syn_wait_lock
);
1906 if (++st
->bucket
< INET_LHTABLE_SIZE
) {
1907 sk
= sk_head(&tcp_hashinfo
.listening_hash
[st
->bucket
]);
1915 static void *listening_get_idx(struct seq_file
*seq
, loff_t
*pos
)
1917 void *rc
= listening_get_next(seq
, NULL
);
1919 while (rc
&& *pos
) {
1920 rc
= listening_get_next(seq
, rc
);
1926 static void *established_get_first(struct seq_file
*seq
)
1928 struct tcp_iter_state
* st
= seq
->private;
1931 for (st
->bucket
= 0; st
->bucket
< tcp_hashinfo
.ehash_size
; ++st
->bucket
) {
1933 struct hlist_node
*node
;
1934 struct tcp_tw_bucket
*tw
;
1936 /* We can reschedule _before_ having picked the target: */
1937 cond_resched_softirq();
1939 read_lock(&tcp_hashinfo
.ehash
[st
->bucket
].lock
);
1940 sk_for_each(sk
, node
, &tcp_hashinfo
.ehash
[st
->bucket
].chain
) {
1941 if (sk
->sk_family
!= st
->family
) {
1947 st
->state
= TCP_SEQ_STATE_TIME_WAIT
;
1948 tw_for_each(tw
, node
,
1949 &tcp_hashinfo
.ehash
[st
->bucket
+ tcp_hashinfo
.ehash_size
].chain
) {
1950 if (tw
->tw_family
!= st
->family
) {
1956 read_unlock(&tcp_hashinfo
.ehash
[st
->bucket
].lock
);
1957 st
->state
= TCP_SEQ_STATE_ESTABLISHED
;
1963 static void *established_get_next(struct seq_file
*seq
, void *cur
)
1965 struct sock
*sk
= cur
;
1966 struct tcp_tw_bucket
*tw
;
1967 struct hlist_node
*node
;
1968 struct tcp_iter_state
* st
= seq
->private;
1972 if (st
->state
== TCP_SEQ_STATE_TIME_WAIT
) {
1976 while (tw
&& tw
->tw_family
!= st
->family
) {
1983 read_unlock(&tcp_hashinfo
.ehash
[st
->bucket
].lock
);
1984 st
->state
= TCP_SEQ_STATE_ESTABLISHED
;
1986 /* We can reschedule between buckets: */
1987 cond_resched_softirq();
1989 if (++st
->bucket
< tcp_hashinfo
.ehash_size
) {
1990 read_lock(&tcp_hashinfo
.ehash
[st
->bucket
].lock
);
1991 sk
= sk_head(&tcp_hashinfo
.ehash
[st
->bucket
].chain
);
1999 sk_for_each_from(sk
, node
) {
2000 if (sk
->sk_family
== st
->family
)
2004 st
->state
= TCP_SEQ_STATE_TIME_WAIT
;
2005 tw
= tw_head(&tcp_hashinfo
.ehash
[st
->bucket
+ tcp_hashinfo
.ehash_size
].chain
);
2013 static void *established_get_idx(struct seq_file
*seq
, loff_t pos
)
2015 void *rc
= established_get_first(seq
);
2018 rc
= established_get_next(seq
, rc
);
2024 static void *tcp_get_idx(struct seq_file
*seq
, loff_t pos
)
2027 struct tcp_iter_state
* st
= seq
->private;
2029 inet_listen_lock(&tcp_hashinfo
);
2030 st
->state
= TCP_SEQ_STATE_LISTENING
;
2031 rc
= listening_get_idx(seq
, &pos
);
2034 inet_listen_unlock(&tcp_hashinfo
);
2036 st
->state
= TCP_SEQ_STATE_ESTABLISHED
;
2037 rc
= established_get_idx(seq
, pos
);
2043 static void *tcp_seq_start(struct seq_file
*seq
, loff_t
*pos
)
2045 struct tcp_iter_state
* st
= seq
->private;
2046 st
->state
= TCP_SEQ_STATE_LISTENING
;
2048 return *pos
? tcp_get_idx(seq
, *pos
- 1) : SEQ_START_TOKEN
;
2051 static void *tcp_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
2054 struct tcp_iter_state
* st
;
2056 if (v
== SEQ_START_TOKEN
) {
2057 rc
= tcp_get_idx(seq
, 0);
2062 switch (st
->state
) {
2063 case TCP_SEQ_STATE_OPENREQ
:
2064 case TCP_SEQ_STATE_LISTENING
:
2065 rc
= listening_get_next(seq
, v
);
2067 inet_listen_unlock(&tcp_hashinfo
);
2069 st
->state
= TCP_SEQ_STATE_ESTABLISHED
;
2070 rc
= established_get_first(seq
);
2073 case TCP_SEQ_STATE_ESTABLISHED
:
2074 case TCP_SEQ_STATE_TIME_WAIT
:
2075 rc
= established_get_next(seq
, v
);
2083 static void tcp_seq_stop(struct seq_file
*seq
, void *v
)
2085 struct tcp_iter_state
* st
= seq
->private;
2087 switch (st
->state
) {
2088 case TCP_SEQ_STATE_OPENREQ
:
2090 struct tcp_sock
*tp
= tcp_sk(st
->syn_wait_sk
);
2091 read_unlock_bh(&tp
->accept_queue
.syn_wait_lock
);
2093 case TCP_SEQ_STATE_LISTENING
:
2094 if (v
!= SEQ_START_TOKEN
)
2095 inet_listen_unlock(&tcp_hashinfo
);
2097 case TCP_SEQ_STATE_TIME_WAIT
:
2098 case TCP_SEQ_STATE_ESTABLISHED
:
2100 read_unlock(&tcp_hashinfo
.ehash
[st
->bucket
].lock
);
2106 static int tcp_seq_open(struct inode
*inode
, struct file
*file
)
2108 struct tcp_seq_afinfo
*afinfo
= PDE(inode
)->data
;
2109 struct seq_file
*seq
;
2110 struct tcp_iter_state
*s
;
2113 if (unlikely(afinfo
== NULL
))
2116 s
= kmalloc(sizeof(*s
), GFP_KERNEL
);
2119 memset(s
, 0, sizeof(*s
));
2120 s
->family
= afinfo
->family
;
2121 s
->seq_ops
.start
= tcp_seq_start
;
2122 s
->seq_ops
.next
= tcp_seq_next
;
2123 s
->seq_ops
.show
= afinfo
->seq_show
;
2124 s
->seq_ops
.stop
= tcp_seq_stop
;
2126 rc
= seq_open(file
, &s
->seq_ops
);
2129 seq
= file
->private_data
;
2138 int tcp_proc_register(struct tcp_seq_afinfo
*afinfo
)
2141 struct proc_dir_entry
*p
;
2145 afinfo
->seq_fops
->owner
= afinfo
->owner
;
2146 afinfo
->seq_fops
->open
= tcp_seq_open
;
2147 afinfo
->seq_fops
->read
= seq_read
;
2148 afinfo
->seq_fops
->llseek
= seq_lseek
;
2149 afinfo
->seq_fops
->release
= seq_release_private
;
2151 p
= proc_net_fops_create(afinfo
->name
, S_IRUGO
, afinfo
->seq_fops
);
2159 void tcp_proc_unregister(struct tcp_seq_afinfo
*afinfo
)
2163 proc_net_remove(afinfo
->name
);
2164 memset(afinfo
->seq_fops
, 0, sizeof(*afinfo
->seq_fops
));
2167 static void get_openreq4(struct sock
*sk
, struct request_sock
*req
,
2168 char *tmpbuf
, int i
, int uid
)
2170 const struct inet_request_sock
*ireq
= inet_rsk(req
);
2171 int ttd
= req
->expires
- jiffies
;
2173 sprintf(tmpbuf
, "%4d: %08X:%04X %08X:%04X"
2174 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %p",
2177 ntohs(inet_sk(sk
)->sport
),
2179 ntohs(ireq
->rmt_port
),
2181 0, 0, /* could print option size, but that is af dependent. */
2182 1, /* timers active (only the expire timer) */
2183 jiffies_to_clock_t(ttd
),
2186 0, /* non standard timer */
2187 0, /* open_requests have no inode */
2188 atomic_read(&sk
->sk_refcnt
),
2192 static void get_tcp4_sock(struct sock
*sp
, char *tmpbuf
, int i
)
2195 unsigned long timer_expires
;
2196 struct tcp_sock
*tp
= tcp_sk(sp
);
2197 struct inet_sock
*inet
= inet_sk(sp
);
2198 unsigned int dest
= inet
->daddr
;
2199 unsigned int src
= inet
->rcv_saddr
;
2200 __u16 destp
= ntohs(inet
->dport
);
2201 __u16 srcp
= ntohs(inet
->sport
);
2203 if (tp
->pending
== TCP_TIME_RETRANS
) {
2205 timer_expires
= tp
->timeout
;
2206 } else if (tp
->pending
== TCP_TIME_PROBE0
) {
2208 timer_expires
= tp
->timeout
;
2209 } else if (timer_pending(&sp
->sk_timer
)) {
2211 timer_expires
= sp
->sk_timer
.expires
;
2214 timer_expires
= jiffies
;
2217 sprintf(tmpbuf
, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2218 "%08X %5d %8d %lu %d %p %u %u %u %u %d",
2219 i
, src
, srcp
, dest
, destp
, sp
->sk_state
,
2220 tp
->write_seq
- tp
->snd_una
, tp
->rcv_nxt
- tp
->copied_seq
,
2222 jiffies_to_clock_t(timer_expires
- jiffies
),
2227 atomic_read(&sp
->sk_refcnt
), sp
,
2228 tp
->rto
, tp
->ack
.ato
, (tp
->ack
.quick
<< 1) | tp
->ack
.pingpong
,
2230 tp
->snd_ssthresh
>= 0xFFFF ? -1 : tp
->snd_ssthresh
);
2233 static void get_timewait4_sock(struct tcp_tw_bucket
*tw
, char *tmpbuf
, int i
)
2235 unsigned int dest
, src
;
2237 int ttd
= tw
->tw_ttd
- jiffies
;
2242 dest
= tw
->tw_daddr
;
2243 src
= tw
->tw_rcv_saddr
;
2244 destp
= ntohs(tw
->tw_dport
);
2245 srcp
= ntohs(tw
->tw_sport
);
2247 sprintf(tmpbuf
, "%4d: %08X:%04X %08X:%04X"
2248 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p",
2249 i
, src
, srcp
, dest
, destp
, tw
->tw_substate
, 0, 0,
2250 3, jiffies_to_clock_t(ttd
), 0, 0, 0, 0,
2251 atomic_read(&tw
->tw_refcnt
), tw
);
2256 static int tcp4_seq_show(struct seq_file
*seq
, void *v
)
2258 struct tcp_iter_state
* st
;
2259 char tmpbuf
[TMPSZ
+ 1];
2261 if (v
== SEQ_START_TOKEN
) {
2262 seq_printf(seq
, "%-*s\n", TMPSZ
- 1,
2263 " sl local_address rem_address st tx_queue "
2264 "rx_queue tr tm->when retrnsmt uid timeout "
2270 switch (st
->state
) {
2271 case TCP_SEQ_STATE_LISTENING
:
2272 case TCP_SEQ_STATE_ESTABLISHED
:
2273 get_tcp4_sock(v
, tmpbuf
, st
->num
);
2275 case TCP_SEQ_STATE_OPENREQ
:
2276 get_openreq4(st
->syn_wait_sk
, v
, tmpbuf
, st
->num
, st
->uid
);
2278 case TCP_SEQ_STATE_TIME_WAIT
:
2279 get_timewait4_sock(v
, tmpbuf
, st
->num
);
2282 seq_printf(seq
, "%-*s\n", TMPSZ
- 1, tmpbuf
);
2287 static struct file_operations tcp4_seq_fops
;
2288 static struct tcp_seq_afinfo tcp4_seq_afinfo
= {
2289 .owner
= THIS_MODULE
,
2292 .seq_show
= tcp4_seq_show
,
2293 .seq_fops
= &tcp4_seq_fops
,
2296 int __init
tcp4_proc_init(void)
2298 return tcp_proc_register(&tcp4_seq_afinfo
);
2301 void tcp4_proc_exit(void)
2303 tcp_proc_unregister(&tcp4_seq_afinfo
);
2305 #endif /* CONFIG_PROC_FS */
2307 struct proto tcp_prot
= {
2309 .owner
= THIS_MODULE
,
2311 .connect
= tcp_v4_connect
,
2312 .disconnect
= tcp_disconnect
,
2313 .accept
= tcp_accept
,
2315 .init
= tcp_v4_init_sock
,
2316 .destroy
= tcp_v4_destroy_sock
,
2317 .shutdown
= tcp_shutdown
,
2318 .setsockopt
= tcp_setsockopt
,
2319 .getsockopt
= tcp_getsockopt
,
2320 .sendmsg
= tcp_sendmsg
,
2321 .recvmsg
= tcp_recvmsg
,
2322 .backlog_rcv
= tcp_v4_do_rcv
,
2323 .hash
= tcp_v4_hash
,
2324 .unhash
= tcp_unhash
,
2325 .get_port
= tcp_v4_get_port
,
2326 .enter_memory_pressure
= tcp_enter_memory_pressure
,
2327 .sockets_allocated
= &tcp_sockets_allocated
,
2328 .memory_allocated
= &tcp_memory_allocated
,
2329 .memory_pressure
= &tcp_memory_pressure
,
2330 .sysctl_mem
= sysctl_tcp_mem
,
2331 .sysctl_wmem
= sysctl_tcp_wmem
,
2332 .sysctl_rmem
= sysctl_tcp_rmem
,
2333 .max_header
= MAX_TCP_HEADER
,
2334 .obj_size
= sizeof(struct tcp_sock
),
2335 .rsk_prot
= &tcp_request_sock_ops
,
2340 void __init
tcp_v4_init(struct net_proto_family
*ops
)
2342 int err
= sock_create_kern(PF_INET
, SOCK_RAW
, IPPROTO_TCP
, &tcp_socket
);
2344 panic("Failed to create the TCP control socket.\n");
2345 tcp_socket
->sk
->sk_allocation
= GFP_ATOMIC
;
2346 inet_sk(tcp_socket
->sk
)->uc_ttl
= -1;
2348 /* Unhash it so that IP input processing does not even
2349 * see it, we do not wish this socket to see incoming
2352 tcp_socket
->sk
->sk_prot
->unhash(tcp_socket
->sk
);
2355 EXPORT_SYMBOL(ipv4_specific
);
2356 EXPORT_SYMBOL(inet_bind_bucket_create
);
2357 EXPORT_SYMBOL(tcp_hashinfo
);
2358 EXPORT_SYMBOL(tcp_prot
);
2359 EXPORT_SYMBOL(tcp_unhash
);
2360 EXPORT_SYMBOL(tcp_v4_conn_request
);
2361 EXPORT_SYMBOL(tcp_v4_connect
);
2362 EXPORT_SYMBOL(tcp_v4_do_rcv
);
2363 EXPORT_SYMBOL(tcp_v4_remember_stamp
);
2364 EXPORT_SYMBOL(tcp_v4_send_check
);
2365 EXPORT_SYMBOL(tcp_v4_syn_recv_sock
);
2367 #ifdef CONFIG_PROC_FS
2368 EXPORT_SYMBOL(tcp_proc_register
);
2369 EXPORT_SYMBOL(tcp_proc_unregister
);
2371 EXPORT_SYMBOL(sysctl_local_port_range
);
2372 EXPORT_SYMBOL(sysctl_tcp_low_latency
);
2373 EXPORT_SYMBOL(sysctl_tcp_tw_reuse
);