2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * Implementation of the Transmission Control Protocol(TCP).
8 * Version: $Id: tcp_input.c,v 1.136 1998/11/07 14:36:18 davem Exp $
10 * Authors: Ross Biro, <bir7@leland.Stanford.Edu>
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 * Corey Minyard <wf-rch!minyard@relay.EU.net>
14 * Florian La Roche, <flla@stud.uni-sb.de>
15 * Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
16 * Linus Torvalds, <torvalds@cs.helsinki.fi>
17 * Alan Cox, <gw4pts@gw4pts.ampr.org>
18 * Matthew Dillon, <dillon@apollo.west.oic.com>
19 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
20 * Jorge Cwik, <jorge@laser.satlink.net>
25 * Pedro Roque : Fast Retransmit/Recovery.
27 * Retransmit queue handled by TCP.
28 * Better retransmit timer handling.
29 * New congestion avoidance.
33 * Eric : Fast Retransmit.
34 * Randy Scott : MSS option defines.
35 * Eric Schenk : Fixes to slow start algorithm.
36 * Eric Schenk : Yet another double ACK bug.
37 * Eric Schenk : Delayed ACK bug fixes.
38 * Eric Schenk : Floyd style fast retrans war avoidance.
39 * David S. Miller : Don't allow zero congestion window.
40 * Eric Schenk : Fix retransmitter so that it sends
41 * next packet on ack of previous packet.
42 * Andi Kleen : Moved open_request checking here
43 * and process RSTs for open_requests.
44 * Andi Kleen : Better prune_queue, and other fixes.
45 * Andrey Savochkin: Fix RTT measurements in the presnce of
47 * Andrey Savochkin: Check sequence numbers correctly when
48 * removing SACKs due to in sequence incoming
50 * Andi Kleen: Make sure we never ack data there is not
51 * enough room for. Also make this condition
52 * a fatal error if it might still happen.
53 * Andi Kleen: Add tcp_measure_rcv_mss to make
54 * connections with MSS<min(MTU,ann. MSS)
55 * work without delayed acks.
56 * Andi Kleen: Process packets with PSH set in the
60 #include <linux/config.h>
62 #include <linux/sysctl.h>
64 #include <linux/ipsec.h>
67 #define SYNC_INIT 0 /* let the user enable it */
72 extern int sysctl_tcp_fin_timeout
;
74 /* These are on by default so the code paths get tested.
75 * For the final 2.2 this may be undone at our discretion. -DaveM
77 int sysctl_tcp_timestamps
= 1;
78 int sysctl_tcp_window_scaling
= 1;
79 int sysctl_tcp_sack
= 1;
81 int sysctl_tcp_syncookies
= SYNC_INIT
;
82 int sysctl_tcp_stdurg
;
83 int sysctl_tcp_rfc1337
;
85 static int prune_queue(struct sock
*sk
);
87 /* There is something which you must keep in mind when you analyze the
88 * behavior of the tp->ato delayed ack timeout interval. When a
89 * connection starts up, we want to ack as quickly as possible. The
90 * problem is that "good" TCP's do slow start at the beginning of data
91 * transmission. The means that until we send the first few ACK's the
92 * sender will sit on his end and only queue most of his data, because
93 * he can only send snd_cwnd unacked packets at any given time. For
94 * each ACK we send, he increments snd_cwnd and transmits more of his
97 static void tcp_delack_estimator(struct tcp_opt
*tp
)
100 tp
->lrcvtime
= jiffies
;
102 /* Help sender leave slow start quickly,
103 * this sets our initial ato value.
105 tcp_enter_quickack_mode(tp
);
107 int m
= jiffies
- tp
->lrcvtime
;
109 tp
->lrcvtime
= jiffies
;
115 tp
->ato
= (tp
->ato
>> 1) + m
;
117 /* We are not in "quick ack" mode. */
118 if(tp
->ato
<= (HZ
/100))
119 tp
->ato
= ((HZ
/100)*2);
124 * Remember to send an ACK later.
126 static __inline__
void tcp_remember_ack(struct tcp_opt
*tp
, struct tcphdr
*th
,
130 /* Tiny-grams with PSH set make us ACK quickly. */
131 if(th
->psh
&& (skb
->len
< (tp
->mss_cache
>> 1)))
135 /* Called to compute a smoothed rtt estimate. The data fed to this
136 * routine either comes from timestamps, or from segments that were
137 * known _not_ to have been retransmitted [see Karn/Partridge
138 * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88
139 * piece by Van Jacobson.
140 * NOTE: the next three routines used to be one big routine.
141 * To save cycles in the RFC 1323 implementation it was better to break
142 * it up into three procedures. -- erics
145 static __inline__
void tcp_rtt_estimator(struct tcp_opt
*tp
, __u32 mrtt
)
147 long m
= mrtt
; /* RTT */
149 /* The following amusing code comes from Jacobson's
150 * article in SIGCOMM '88. Note that rtt and mdev
151 * are scaled versions of rtt and mean deviation.
152 * This is designed to be as fast as possible
153 * m stands for "measurement".
155 * On a 1990 paper the rto value is changed to:
156 * RTO = rtt + 4 * mdev
161 m
-= (tp
->srtt
>> 3); /* m is now error in rtt est */
162 tp
->srtt
+= m
; /* rtt = 7/8 rtt + 1/8 new */
164 m
= -m
; /* m is now abs(error) */
165 m
-= (tp
->mdev
>> 2); /* similar update on mdev */
166 tp
->mdev
+= m
; /* mdev = 3/4 mdev + 1/4 new */
168 /* no previous measure. */
169 tp
->srtt
= m
<<3; /* take the measured time to be rtt */
170 tp
->mdev
= m
<<2; /* make sure rto = 3*rtt */
174 /* Calculate rto without backoff. This is the second half of Van Jacobson's
175 * routine referred to above.
178 static __inline__
void tcp_set_rto(struct tcp_opt
*tp
)
180 tp
->rto
= (tp
->srtt
>> 3) + tp
->mdev
;
181 tp
->rto
+= (tp
->rto
>> 2) + (tp
->rto
>> (tp
->snd_cwnd
-1));
185 /* Keep the rto between HZ/5 and 120*HZ. 120*HZ is the upper bound
186 * on packet lifetime in the internet. We need the HZ/5 lower
187 * bound to behave correctly against BSD stacks with a fixed
189 * FIXME: It's not entirely clear this lower bound is the best
190 * way to avoid the problem. Is it possible to drop the lower
191 * bound and still avoid trouble with BSD stacks? Perhaps
192 * some modification to the RTO calculation that takes delayed
193 * ack bias into account? This needs serious thought. -- erics
195 static __inline__
void tcp_bound_rto(struct tcp_opt
*tp
)
197 if (tp
->rto
> 120*HZ
)
203 /* WARNING: this must not be called if tp->saw_timestamp was false. */
204 extern __inline__
void tcp_replace_ts_recent(struct sock
*sk
, struct tcp_opt
*tp
,
205 __u32 start_seq
, __u32 end_seq
)
207 /* From draft-ietf-tcplw-high-performance: the correct
208 * test is last_ack_sent <= end_seq.
209 * (RFC1323 stated last_ack_sent < end_seq.)
211 * HOWEVER: The current check contradicts the draft statements.
212 * It has been done for good reasons.
213 * The implemented check improves security and eliminates
214 * unnecessary RTT overestimation.
215 * 1998/06/27 Andrey V. Savochkin <saw@msu.ru>
217 if (!before(end_seq
, tp
->last_ack_sent
- sk
->rcvbuf
) &&
218 !after(start_seq
, tp
->rcv_wup
+ tp
->rcv_wnd
)) {
219 /* PAWS bug workaround wrt. ACK frames, the PAWS discard
220 * extra check below makes sure this can only happen
221 * for pure ACK frames. -DaveM
223 if((s32
)(tp
->rcv_tsval
- tp
->ts_recent
) >= 0) {
224 tp
->ts_recent
= tp
->rcv_tsval
;
225 tp
->ts_recent_stamp
= jiffies
;
230 #define PAWS_24DAYS (HZ * 60 * 60 * 24 * 24)
232 extern __inline__
int tcp_paws_discard(struct tcp_opt
*tp
, struct tcphdr
*th
, unsigned len
)
234 /* ts_recent must be younger than 24 days */
235 return (((jiffies
- tp
->ts_recent_stamp
) >= PAWS_24DAYS
) ||
236 (((s32
)(tp
->rcv_tsval
-tp
->ts_recent
) < 0) &&
237 /* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM */
238 (len
!= (th
->doff
* 4))));
242 static int __tcp_sequence(struct tcp_opt
*tp
, u32 seq
, u32 end_seq
)
244 u32 end_window
= tp
->rcv_wup
+ tp
->rcv_wnd
;
247 after(end_seq
, tp
->rcv_nxt
) &&
248 before(seq
, end_window
))
250 if (seq
!= end_window
)
252 return (seq
== end_seq
);
255 /* This functions checks to see if the tcp header is actually acceptable. */
256 extern __inline__
int tcp_sequence(struct tcp_opt
*tp
, u32 seq
, u32 end_seq
)
258 if (seq
== tp
->rcv_nxt
)
259 return (tp
->rcv_wnd
|| (end_seq
== seq
));
261 return __tcp_sequence(tp
, seq
, end_seq
);
264 /* When we get a reset we do this. */
265 static void tcp_reset(struct sock
*sk
, struct sk_buff
*skb
)
269 /* We want the right error as BSD sees it (and indeed as we do). */
272 sk
->err
= ECONNREFUSED
;
278 sk
->err
= ECONNRESET
;
280 tcp_set_state(sk
,TCP_CLOSE
);
281 sk
->shutdown
= SHUTDOWN_MASK
;
283 sk
->state_change(sk
);
286 /* This tags the retransmission queue when SACKs arrive. */
287 static void tcp_sacktag_write_queue(struct sock
*sk
, struct tcp_sack_block
*sp
, int nsacks
)
289 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
);
293 struct sk_buff
*skb
= skb_peek(&sk
->write_queue
);
294 __u32 start_seq
= ntohl(sp
->start_seq
);
295 __u32 end_seq
= ntohl(sp
->end_seq
);
298 while((skb
!= NULL
) &&
299 (skb
!= tp
->send_head
) &&
300 (skb
!= (struct sk_buff
*)&sk
->write_queue
)) {
301 /* The retransmission queue is always in order, so
302 * we can short-circuit the walk early.
304 if(!before(start_seq
, TCP_SKB_CB(skb
)->end_seq
))
307 /* We play conservative, we don't allow SACKS to partially
308 * tag a sequence space.
311 if(!after(start_seq
, TCP_SKB_CB(skb
)->seq
) &&
312 !before(end_seq
, TCP_SKB_CB(skb
)->end_seq
)) {
313 /* If this was a retransmitted frame, account for it. */
314 if(TCP_SKB_CB(skb
)->sacked
& TCPCB_SACKED_RETRANS
)
316 TCP_SKB_CB(skb
)->sacked
|= TCPCB_SACKED_ACKED
;
318 /* RULE: All new SACKs will either decrease retrans_out
319 * or advance fackets_out.
321 if(fack_count
> tp
->fackets_out
)
322 tp
->fackets_out
= fack_count
;
326 sp
++; /* Move on to the next SACK block. */
330 /* Look for tcp options. Normally only called on SYN and SYNACK packets.
331 * But, this can also be called on packets in the established flow when
332 * the fast version below fails.
334 void tcp_parse_options(struct sock
*sk
, struct tcphdr
*th
, struct tcp_opt
*tp
, int no_fancy
)
337 int length
=(th
->doff
*4)-sizeof(struct tcphdr
);
339 ptr
= (unsigned char *)(th
+ 1);
349 case TCPOPT_NOP
: /* Ref: RFC 793 section 3.1 */
354 if (opsize
< 2) /* "silly options" */
357 break; /* don't parse partial options */
360 if(opsize
==TCPOLEN_MSS
&& th
->syn
) {
361 u16 in_mss
= ntohs(*(__u16
*)ptr
);
364 if (tp
->mss_clamp
> in_mss
)
365 tp
->mss_clamp
= in_mss
;
369 if(opsize
==TCPOLEN_WINDOW
&& th
->syn
)
370 if (!no_fancy
&& sysctl_tcp_window_scaling
) {
372 tp
->snd_wscale
= *(__u8
*)ptr
;
373 if(tp
->snd_wscale
> 14) {
375 printk("tcp_parse_options: Illegal window "
376 "scaling value %d >14 received.",
382 case TCPOPT_TIMESTAMP
:
383 if(opsize
==TCPOLEN_TIMESTAMP
) {
384 if (sysctl_tcp_timestamps
&& !no_fancy
) {
387 tp
->rcv_tsval
= ntohl(*(__u32
*)ptr
);
388 tp
->rcv_tsecr
= ntohl(*(__u32
*)(ptr
+4));
392 case TCPOPT_SACK_PERM
:
393 if(opsize
==TCPOLEN_SACK_PERM
&& th
->syn
) {
394 if (sysctl_tcp_sack
&& !no_fancy
) {
402 if((opsize
>= (TCPOLEN_SACK_BASE
+ TCPOLEN_SACK_PERBLOCK
)) &&
403 sysctl_tcp_sack
&& (sk
!= NULL
) && !th
->syn
) {
404 int sack_bytes
= opsize
- TCPOLEN_SACK_BASE
;
406 if(!(sack_bytes
% TCPOLEN_SACK_PERBLOCK
)) {
407 int num_sacks
= sack_bytes
>> 3;
408 struct tcp_sack_block
*sackp
;
410 sackp
= (struct tcp_sack_block
*)ptr
;
411 tcp_sacktag_write_queue(sk
, sackp
, num_sacks
);
421 /* Fast parse options. This hopes to only see timestamps.
422 * If it is wrong it falls back on tcp_parse_options().
424 static __inline__
int tcp_fast_parse_options(struct sock
*sk
, struct tcphdr
*th
, struct tcp_opt
*tp
)
426 /* If we didn't send out any options ignore them all. */
427 if (tp
->tcp_header_len
== sizeof(struct tcphdr
))
429 if (th
->doff
== sizeof(struct tcphdr
)>>2) {
432 } else if (th
->doff
== (sizeof(struct tcphdr
)>>2)+(TCPOLEN_TSTAMP_ALIGNED
>>2)) {
433 __u32
*ptr
= (__u32
*)(th
+ 1);
434 if (*ptr
== __constant_ntohl((TCPOPT_NOP
<< 24) | (TCPOPT_NOP
<< 16)
435 | (TCPOPT_TIMESTAMP
<< 8) | TCPOLEN_TIMESTAMP
)) {
437 tp
->rcv_tsval
= ntohl(*++ptr
);
438 tp
->rcv_tsecr
= ntohl(*++ptr
);
442 tcp_parse_options(sk
, th
, tp
, 0);
446 #define FLAG_DATA 0x01 /* Incoming frame contained data. */
447 #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
448 #define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */
449 #define FLAG_RETRANS_DATA_ACKED 0x08 /* "" "" some of which was retransmitted. */
451 static __inline__
void clear_fast_retransmit(struct tcp_opt
*tp
)
453 if (tp
->dup_acks
> 3)
454 tp
->snd_cwnd
= (tp
->snd_ssthresh
);
459 /* NOTE: This code assumes that tp->dup_acks gets cleared when a
460 * retransmit timer fires.
462 static void tcp_fast_retrans(struct sock
*sk
, u32 ack
, int not_dup
)
464 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
);
466 /* Note: If not_dup is set this implies we got a
467 * data carrying packet or a window update.
468 * This carries no new information about possible
469 * lost packets, so we have to ignore it for the purposes
470 * of counting duplicate acks. Ideally this does not imply we
471 * should stop our fast retransmit phase, more acks may come
472 * later without data to help us. Unfortunately this would make
473 * the code below much more complex. For now if I see such
474 * a packet I clear the fast retransmit phase.
476 if (ack
== tp
->snd_una
&& tp
->packets_out
&& (not_dup
== 0)) {
477 /* This is the standard reno style fast retransmit branch. */
479 /* 1. When the third duplicate ack is received, set ssthresh
480 * to one half the current congestion window, but no less
481 * than two segments. Retransmit the missing segment.
483 if (tp
->high_seq
== 0 || after(ack
, tp
->high_seq
)) {
485 if ((tp
->fackets_out
> 3) || (tp
->dup_acks
== 3)) {
486 tp
->snd_ssthresh
= max(tp
->snd_cwnd
>> 1, 2);
487 tp
->snd_cwnd
= (tp
->snd_ssthresh
+ 3);
488 tp
->high_seq
= tp
->snd_nxt
;
490 tcp_retransmit_skb(sk
, skb_peek(&sk
->write_queue
));
492 tcp_fack_retransmit(sk
);
493 tcp_reset_xmit_timer(sk
, TIME_RETRANS
, tp
->rto
);
497 /* 2. Each time another duplicate ACK arrives, increment
498 * cwnd by the segment size. [...] Transmit a packet...
500 * Packet transmission will be done on normal flow processing
501 * since we're not in "retransmit mode". We do not use duplicate
502 * ACKs to artificially inflate the congestion window when
505 if (tp
->dup_acks
> 3) {
506 if(!tp
->fackets_out
) {
509 /* Fill any further holes which may have appeared.
510 * We may want to change this to run every further
511 * multiple-of-3 dup ack increments, to be more robust
512 * against out-of-order packet delivery. -DaveM
514 tcp_fack_retransmit(sk
);
517 } else if (tp
->high_seq
!= 0) {
518 /* In this branch we deal with clearing the Floyd style
519 * block on duplicate fast retransmits, and if requested
520 * we do Hoe style secondary fast retransmits.
522 if (!before(ack
, tp
->high_seq
) || (not_dup
& FLAG_DATA
) != 0) {
523 /* Once we have acked all the packets up to high_seq
524 * we are done this fast retransmit phase.
525 * Alternatively data arrived. In this case we
526 * Have to abort the fast retransmit attempt.
527 * Note that we do want to accept a window
528 * update since this is expected with Hoe's algorithm.
530 clear_fast_retransmit(tp
);
532 /* After we have cleared up to high_seq we can
533 * clear the Floyd style block.
535 if (!before(ack
, tp
->high_seq
)) {
539 } else if (tp
->dup_acks
>= 3) {
540 if (!tp
->fackets_out
) {
541 /* Hoe Style. We didn't ack the whole
542 * window. Take this as a cue that
543 * another packet was lost and retransmit it.
544 * Don't muck with the congestion window here.
545 * Note that we have to be careful not to
546 * act if this was a window update and it
547 * didn't ack new data, since this does
548 * not indicate a packet left the system.
549 * We can test this by just checking
550 * if ack changed from snd_una, since
551 * the only way to get here without advancing
552 * from snd_una is if this was a window update.
554 if (ack
!= tp
->snd_una
&& before(ack
, tp
->high_seq
)) {
555 tcp_retransmit_skb(sk
, skb_peek(&sk
->write_queue
));
556 tcp_reset_xmit_timer(sk
, TIME_RETRANS
, tp
->rto
);
559 /* FACK style, fill any remaining holes in
562 tcp_fack_retransmit(sk
);
568 /* This is Jacobson's slow start and congestion avoidance.
569 * SIGCOMM '88, p. 328.
571 static void tcp_cong_avoid(struct tcp_opt
*tp
)
573 if (tp
->snd_cwnd
<= tp
->snd_ssthresh
) {
574 /* In "safe" area, increase. */
577 /* In dangerous area, increase slowly.
578 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
580 if (tp
->snd_cwnd_cnt
>= tp
->snd_cwnd
) {
588 /* Remove acknowledged frames from the retransmission queue. */
589 static int tcp_clean_rtx_queue(struct sock
*sk
, __u32 ack
,
590 __u32
*seq
, __u32
*seq_rtt
)
592 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
);
594 unsigned long now
= jiffies
;
597 while((skb
=skb_peek(&sk
->write_queue
)) && (skb
!= tp
->send_head
)) {
598 struct tcp_skb_cb
*scb
= TCP_SKB_CB(skb
);
599 __u8 sacked
= scb
->sacked
;
601 /* If our packet is before the ack sequence we can
602 * discard it as it's confirmed to have arrived at
605 if (after(scb
->end_seq
, ack
))
608 /* Initial outgoing SYN's get put onto the write_queue
609 * just like anything else we transmit. It is not
610 * true data, and if we misinform our callers that
611 * this ACK acks real data, we will erroneously exit
612 * connection startup slow start one packet too
613 * quickly. This is severely frowned upon behavior.
615 if((sacked
& TCPCB_SACKED_RETRANS
) && tp
->retrans_out
)
617 if(!(scb
->flags
& TCPCB_FLAG_SYN
)) {
618 acked
|= FLAG_DATA_ACKED
;
619 if(sacked
& TCPCB_SACKED_RETRANS
)
620 acked
|= FLAG_RETRANS_DATA_ACKED
;
624 tp
->retrans_head
= NULL
;
628 *seq_rtt
= now
- scb
->when
;
629 __skb_unlink(skb
, skb
->list
);
634 tp
->retrans_head
= NULL
;
638 static void tcp_ack_probe(struct sock
*sk
, __u32 ack
)
640 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
);
642 /* Our probe was answered. */
645 /* Was it a usable window open? */
647 /* should always be non-null */
648 if (tp
->send_head
!= NULL
&&
649 !before (ack
+ tp
->snd_wnd
, TCP_SKB_CB(tp
->send_head
)->end_seq
)) {
652 tcp_clear_xmit_timer(sk
, TIME_PROBE0
);
654 tcp_reset_xmit_timer(sk
, TIME_PROBE0
,
655 min(tp
->rto
<< tp
->backoff
, 120*HZ
));
659 /* Read draft-ietf-tcplw-high-performance before mucking
660 * with this code. (Superceeds RFC1323)
662 static void tcp_ack_saw_tstamp(struct sock
*sk
, struct tcp_opt
*tp
,
663 u32 seq
, u32 ack
, int flag
)
667 /* RTTM Rule: A TSecr value received in a segment is used to
668 * update the averaged RTT measurement only if the segment
669 * acknowledges some new data, i.e., only if it advances the
670 * left edge of the send window.
672 * See draft-ietf-tcplw-high-performance-00, section 3.3.
673 * 1998/04/10 Andrey V. Savochkin <saw@msu.ru>
675 if (!(flag
& FLAG_DATA_ACKED
))
678 seq_rtt
= jiffies
-tp
->rcv_tsecr
;
679 tcp_rtt_estimator(tp
, seq_rtt
);
680 if (tp
->retransmits
) {
681 if (tp
->packets_out
== 0) {
688 /* Still retransmitting, use backoff */
690 tp
->rto
= tp
->rto
<< tp
->backoff
;
696 /* NOTE: safe here so long as cong_ctl doesn't use rto */
700 static __inline__
void tcp_ack_packets_out(struct sock
*sk
, struct tcp_opt
*tp
)
702 struct sk_buff
*skb
= skb_peek(&sk
->write_queue
);
703 long when
= tp
->rto
- (jiffies
- TCP_SKB_CB(skb
)->when
);
705 /* Some data was ACK'd, if still retransmitting (due to a
706 * timeout), resend more of the retransmit queue. The
707 * congestion window is handled properly by that code.
709 if (tp
->retransmits
) {
710 tp
->retrans_head
= NULL
;
711 tcp_xmit_retransmit_queue(sk
);
712 tcp_reset_xmit_timer(sk
, TIME_RETRANS
, tp
->rto
);
714 tcp_reset_xmit_timer(sk
, TIME_RETRANS
, when
);
718 /* This routine deals with incoming acks, but not outgoing ones. */
719 static int tcp_ack(struct sock
*sk
, struct tcphdr
*th
,
720 u32 ack_seq
, u32 ack
, int len
)
722 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
);
728 return(1); /* Dead, can't ack any more so why bother */
730 if (tp
->pending
== TIME_KEEPOPEN
)
733 tp
->rcv_tstamp
= jiffies
;
735 /* If the ack is newer than sent or older than previous acks
736 * then we can probably ignore it.
738 if (after(ack
, tp
->snd_nxt
) || before(ack
, tp
->snd_una
))
739 goto uninteresting_ack
;
741 dst_confirm(sk
->dst_cache
);
743 /* If there is data set flag 1 */
744 if (len
!= th
->doff
*4) {
746 tcp_delack_estimator(tp
);
749 /* Update our send window. */
751 /* This is the window update code as per RFC 793
752 * snd_wl{1,2} are used to prevent unordered
753 * segments from shrinking the window
755 if (before(tp
->snd_wl1
, ack_seq
) ||
756 (tp
->snd_wl1
== ack_seq
&& !after(tp
->snd_wl2
, ack
))) {
757 u32 nwin
= ntohs(th
->window
) << tp
->snd_wscale
;
759 if ((tp
->snd_wl2
!= ack
) || (nwin
> tp
->snd_wnd
)) {
760 flag
|= FLAG_WIN_UPDATE
;
763 tp
->snd_wl1
= ack_seq
;
766 if (nwin
> tp
->max_window
)
767 tp
->max_window
= nwin
;
771 /* We passed data and got it acked, remove any soft error
772 * log. Something worked...
776 /* If this ack opens up a zero window, clear backoff. It was
777 * being used to time the probes, and is probably far higher than
778 * it needs to be for normal retransmission.
780 if (tp
->pending
== TIME_PROBE0
)
781 tcp_ack_probe(sk
, ack
);
783 /* See if we can take anything off of the retransmit queue. */
784 flag
|= tcp_clean_rtx_queue(sk
, ack
, &seq
, &seq_rtt
);
786 /* If we have a timestamp, we always do rtt estimates. */
787 if (tp
->saw_tstamp
) {
788 tcp_ack_saw_tstamp(sk
, tp
, seq
, ack
, flag
);
790 /* If we were retransmiting don't count rtt estimate. */
791 if (tp
->retransmits
) {
792 if (tp
->packets_out
== 0) {
798 /* We don't have a timestamp. Can only use
799 * packets that are not retransmitted to determine
800 * rtt estimates. Also, we must not reset the
801 * backoff for rto until we get a non-retransmitted
802 * packet. This allows us to deal with a situation
803 * where the network delay has increased suddenly.
804 * I.e. Karn's algorithm. (SIGCOMM '87, p5.)
806 if (flag
& FLAG_DATA_ACKED
) {
807 if(!(flag
& FLAG_RETRANS_DATA_ACKED
)) {
809 tcp_rtt_estimator(tp
, seq_rtt
);
818 if (tp
->packets_out
) {
819 if (flag
& FLAG_DATA_ACKED
)
820 tcp_ack_packets_out(sk
, tp
);
822 tcp_clear_xmit_timer(sk
, TIME_RETRANS
);
825 flag
&= (FLAG_DATA
| FLAG_WIN_UPDATE
);
826 if ((ack
== tp
->snd_una
&& tp
->packets_out
&& flag
== 0) ||
827 (tp
->high_seq
!= 0)) {
828 tcp_fast_retrans(sk
, ack
, flag
);
830 /* Clear any aborted fast retransmit starts. */
833 /* Remember the highest ack received. */
838 SOCK_DEBUG(sk
, "Ack ignored %u %u\n", ack
, tp
->snd_nxt
);
842 /* New-style handling of TIME_WAIT sockets. */
843 extern void tcp_tw_schedule(struct tcp_tw_bucket
*tw
);
844 extern void tcp_tw_reschedule(struct tcp_tw_bucket
*tw
);
845 extern void tcp_tw_deschedule(struct tcp_tw_bucket
*tw
);
847 void tcp_timewait_kill(struct tcp_tw_bucket
*tw
)
849 /* Unlink from various places. */
851 tw
->bind_next
->bind_pprev
= tw
->bind_pprev
;
852 *(tw
->bind_pprev
) = tw
->bind_next
;
853 if(tw
->tb
->owners
== NULL
)
854 tcp_inc_slow_timer(TCP_SLT_BUCKETGC
);
857 tw
->next
->pprev
= tw
->pprev
;
858 *tw
->pprev
= tw
->next
;
860 /* We decremented the prot->inuse count when we entered TIME_WAIT
861 * and the sock from which this came was destroyed.
863 tw
->sklist_next
->sklist_prev
= tw
->sklist_prev
;
864 tw
->sklist_prev
->sklist_next
= tw
->sklist_next
;
866 /* Ok, now free it up. */
867 kmem_cache_free(tcp_timewait_cachep
, tw
);
870 /* We come here as a special case from the AF specific TCP input processing,
871 * and the SKB has no owner. Essentially handling this is very simple,
872 * we just keep silently eating rx'd packets until none show up for the
873 * entire timeout period. The only special cases are for BSD TIME_WAIT
874 * reconnects and SYN/RST bits being set in the TCP header.
876 int tcp_timewait_state_process(struct tcp_tw_bucket
*tw
, struct sk_buff
*skb
,
877 struct tcphdr
*th
, unsigned len
)
880 * "When a connection is [...] on TIME-WAIT state [...]
881 * [a TCP] MAY accept a new SYN from the remote TCP to
882 * reopen the connection directly, if it:
884 * (1) assigns its initial sequence number for the new
885 * connection to be larger than the largest sequence
886 * number it used on the previous connection incarnation,
889 * (2) returns to TIME-WAIT state if the SYN turns out
890 * to be an old duplicate".
892 if(th
->syn
&& !th
->rst
&& after(TCP_SKB_CB(skb
)->seq
, tw
->rcv_nxt
)) {
894 struct tcp_func
*af_specific
= tw
->af_specific
;
897 isn
= tw
->rcv_nxt
+ 128000;
900 tcp_tw_deschedule(tw
);
901 tcp_timewait_kill(tw
);
902 sk
= af_specific
->get_sock(skb
, th
);
903 if(sk
== NULL
|| !ipsec_sk_policy(sk
,skb
))
905 skb_set_owner_r(skb
, sk
);
906 af_specific
= sk
->tp_pinfo
.af_tcp
.af_specific
;
907 if(af_specific
->conn_request(sk
, skb
, isn
) < 0)
908 return 1; /* Toss a reset back. */
909 return 0; /* Discard the frame. */
912 /* Check RST or SYN */
913 if(th
->rst
|| th
->syn
) {
914 /* This is TIME_WAIT assasination, in two flavors.
915 * Oh well... nobody has a sufficient solution to this
918 if(sysctl_tcp_rfc1337
== 0) {
919 tcp_tw_deschedule(tw
);
920 tcp_timewait_kill(tw
);
923 return 1; /* toss a reset back */
925 /* In this case we must reset the TIMEWAIT timer. */
927 tcp_tw_reschedule(tw
);
929 return 0; /* Discard the frame. */
932 /* Enter the time wait state. This is always called from BH
933 * context. Essentially we whip up a timewait bucket, copy the
934 * relevant info into it from the SK, and mess with hash chains
937 static __inline__
void tcp_tw_hashdance(struct sock
*sk
, struct tcp_tw_bucket
*tw
)
939 struct sock
**head
, *sktw
;
941 /* Step 1: Remove SK from established hash. */
943 sk
->next
->pprev
= sk
->pprev
;
944 *sk
->pprev
= sk
->next
;
948 /* Step 2: Put TW into bind hash where SK was. */
949 tw
->tb
= (struct tcp_bind_bucket
*)sk
->prev
;
950 if((tw
->bind_next
= sk
->bind_next
) != NULL
)
951 sk
->bind_next
->bind_pprev
= &tw
->bind_next
;
952 tw
->bind_pprev
= sk
->bind_pprev
;
953 *sk
->bind_pprev
= (struct sock
*)tw
;
955 /* Step 3: Same for the protocol sklist. */
956 (tw
->sklist_next
= sk
->sklist_next
)->sklist_prev
= (struct sock
*)tw
;
957 (tw
->sklist_prev
= sk
->sklist_prev
)->sklist_next
= (struct sock
*)tw
;
958 sk
->sklist_next
= NULL
;
961 /* Step 4: Hash TW into TIMEWAIT half of established hash table. */
962 head
= &tcp_established_hash
[sk
->hashent
+ (TCP_HTABLE_SIZE
/2)];
963 sktw
= (struct sock
*)tw
;
964 if((sktw
->next
= *head
) != NULL
)
965 (*head
)->pprev
= &sktw
->next
;
970 void tcp_time_wait(struct sock
*sk
)
972 struct tcp_tw_bucket
*tw
;
974 tw
= kmem_cache_alloc(tcp_timewait_cachep
, SLAB_ATOMIC
);
976 /* Give us an identity. */
977 tw
->daddr
= sk
->daddr
;
978 tw
->rcv_saddr
= sk
->rcv_saddr
;
979 tw
->bound_dev_if
= sk
->bound_dev_if
;
981 tw
->state
= TCP_TIME_WAIT
;
982 tw
->sport
= sk
->sport
;
983 tw
->dport
= sk
->dport
;
984 tw
->family
= sk
->family
;
985 tw
->reuse
= sk
->reuse
;
986 tw
->rcv_nxt
= sk
->tp_pinfo
.af_tcp
.rcv_nxt
;
987 tw
->af_specific
= sk
->tp_pinfo
.af_tcp
.af_specific
;
989 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
990 if(tw
->family
== PF_INET6
) {
991 memcpy(&tw
->v6_daddr
,
992 &sk
->net_pinfo
.af_inet6
.daddr
,
993 sizeof(struct in6_addr
));
994 memcpy(&tw
->v6_rcv_saddr
,
995 &sk
->net_pinfo
.af_inet6
.rcv_saddr
,
996 sizeof(struct in6_addr
));
999 /* Linkage updates. */
1000 tcp_tw_hashdance(sk
, tw
);
1002 /* Get the TIME_WAIT timeout firing. */
1003 tcp_tw_schedule(tw
);
1006 if(sk
->state
== TCP_ESTABLISHED
)
1007 tcp_statistics
.TcpCurrEstab
--;
1008 sk
->state
= TCP_CLOSE
;
1009 net_reset_timer(sk
, TIME_DONE
,
1010 min(sk
->tp_pinfo
.af_tcp
.srtt
* 2, TCP_DONE_TIME
));
1012 /* Sorry, we're out of memory, just CLOSE this
1013 * socket up. We've got bigger problems than
1014 * non-graceful socket closings.
1016 tcp_set_state(sk
, TCP_CLOSE
);
1019 /* Prevent rcvmsg/sndmsg calls, and wake people up. */
1020 sk
->shutdown
= SHUTDOWN_MASK
;
1022 sk
->state_change(sk
);
1026 * Process the FIN bit. This now behaves as it is supposed to work
1027 * and the FIN takes effect when it is validly part of sequence
1028 * space. Not before when we get holes.
1030 * If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
1031 * (and thence onto LAST-ACK and finally, CLOSE, we never enter
1034 * If we are in FINWAIT-1, a received FIN indicates simultaneous
1035 * close and we go into CLOSING (and later onto TIME-WAIT)
1037 * If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
1040 static void tcp_fin(struct sk_buff
*skb
, struct sock
*sk
, struct tcphdr
*th
)
1042 sk
->tp_pinfo
.af_tcp
.fin_seq
= TCP_SKB_CB(skb
)->end_seq
;
1047 sk
->state_change(sk
);
1048 sock_wake_async(sk
->socket
, 1);
1053 case TCP_ESTABLISHED
:
1054 /* Move to CLOSE_WAIT */
1055 tcp_set_state(sk
, TCP_CLOSE_WAIT
);
1057 sk
->shutdown
= SHUTDOWN_MASK
;
1060 case TCP_CLOSE_WAIT
:
1062 /* Received a retransmission of the FIN, do
1067 /* RFC793: Remain in the LAST-ACK state. */
1071 /* This case occurs when a simultaneous close
1072 * happens, we must ack the received FIN and
1073 * enter the CLOSING state.
1075 * This causes a WRITE timeout, which will either
1076 * move on to TIME_WAIT when we timeout, or resend
1077 * the FIN properly (maybe we get rid of that annoying
1078 * FIN lost hang). The TIME_WRITE code is already
1079 * correct for handling this timeout.
1081 tcp_set_state(sk
, TCP_CLOSING
);
1084 /* Received a FIN -- send ACK and enter TIME_WAIT. */
1088 /* Only TCP_LISTEN and TCP_CLOSE are left, in these
1089 * cases we should never reach this piece of code.
1091 printk("tcp_fin: Impossible, sk->state=%d\n", sk
->state
);
1096 /* These routines update the SACK block as out-of-order packets arrive or
1097 * in-order packets close up the sequence space.
1099 static void tcp_sack_maybe_coalesce(struct tcp_opt
*tp
, struct tcp_sack_block
*sp
)
1101 int this_sack
, num_sacks
= tp
->num_sacks
;
1102 struct tcp_sack_block
*swalk
= &tp
->selective_acks
[0];
1104 /* If more than one SACK block, see if the recent change to SP eats into
1105 * or hits the sequence space of other SACK blocks, if so coalesce.
1107 if(num_sacks
!= 1) {
1108 for(this_sack
= 0; this_sack
< num_sacks
; this_sack
++, swalk
++) {
1112 /* First case, bottom of SP moves into top of the
1113 * sequence space of SWALK.
1115 if(between(sp
->start_seq
, swalk
->start_seq
, swalk
->end_seq
)) {
1116 sp
->start_seq
= swalk
->start_seq
;
1119 /* Second case, top of SP moves into bottom of the
1120 * sequence space of SWALK.
1122 if(between(sp
->end_seq
, swalk
->start_seq
, swalk
->end_seq
)) {
1123 sp
->end_seq
= swalk
->end_seq
;
1128 /* SP is the only SACK, or no coalescing cases found. */
1132 /* Zap SWALK, by moving every further SACK up by one slot.
1133 * Decrease num_sacks.
1135 for(this_sack
+= 1; this_sack
< num_sacks
-1; this_sack
++, swalk
++) {
1136 struct tcp_sack_block
*next
= (swalk
+ 1);
1137 swalk
->start_seq
= next
->start_seq
;
1138 swalk
->end_seq
= next
->end_seq
;
1143 static __inline__
void tcp_sack_swap(struct tcp_sack_block
*sack1
, struct tcp_sack_block
*sack2
)
1147 tmp
= sack1
->start_seq
;
1148 sack1
->start_seq
= sack2
->start_seq
;
1149 sack2
->start_seq
= tmp
;
1151 tmp
= sack1
->end_seq
;
1152 sack1
->end_seq
= sack2
->end_seq
;
1153 sack2
->end_seq
= tmp
;
1156 static void tcp_sack_new_ofo_skb(struct sock
*sk
, struct sk_buff
*skb
)
1158 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
);
1159 struct tcp_sack_block
*sp
= &tp
->selective_acks
[0];
1160 int cur_sacks
= tp
->num_sacks
;
1165 /* Optimize for the common case, new ofo frames arrive
1166 * "in order". ;-) This also satisfies the requirements
1167 * of RFC2018 about ordering of SACKs.
1169 if(sp
->end_seq
== TCP_SKB_CB(skb
)->seq
) {
1170 sp
->end_seq
= TCP_SKB_CB(skb
)->end_seq
;
1171 tcp_sack_maybe_coalesce(tp
, sp
);
1172 } else if(sp
->start_seq
== TCP_SKB_CB(skb
)->end_seq
) {
1173 /* Re-ordered arrival, in this case, can be optimized
1176 sp
->start_seq
= TCP_SKB_CB(skb
)->seq
;
1177 tcp_sack_maybe_coalesce(tp
, sp
);
1179 struct tcp_sack_block
*swap
= sp
+ 1;
1180 int this_sack
, max_sacks
= (tp
->tstamp_ok
? 3 : 4);
1182 /* Oh well, we have to move things around.
1183 * Try to find a SACK we can tack this onto.
1186 for(this_sack
= 1; this_sack
< cur_sacks
; this_sack
++, swap
++) {
1187 if((swap
->end_seq
== TCP_SKB_CB(skb
)->seq
) ||
1188 (swap
->start_seq
== TCP_SKB_CB(skb
)->end_seq
)) {
1189 if(swap
->end_seq
== TCP_SKB_CB(skb
)->seq
)
1190 swap
->end_seq
= TCP_SKB_CB(skb
)->end_seq
;
1192 swap
->start_seq
= TCP_SKB_CB(skb
)->seq
;
1193 tcp_sack_swap(sp
, swap
);
1194 tcp_sack_maybe_coalesce(tp
, sp
);
1199 /* Could not find an adjacent existing SACK, build a new one,
1200 * put it at the front, and shift everyone else down. We
1201 * always know there is at least one SACK present already here.
1203 * If the sack array is full, forget about the last one.
1205 if (cur_sacks
>= max_sacks
) {
1209 while(cur_sacks
>= 1) {
1210 struct tcp_sack_block
*this = &tp
->selective_acks
[cur_sacks
];
1211 struct tcp_sack_block
*prev
= (this - 1);
1212 this->start_seq
= prev
->start_seq
;
1213 this->end_seq
= prev
->end_seq
;
1218 /* Build the new head SACK, and we're done. */
1219 sp
->start_seq
= TCP_SKB_CB(skb
)->seq
;
1220 sp
->end_seq
= TCP_SKB_CB(skb
)->end_seq
;
1225 static void tcp_sack_remove_skb(struct tcp_opt
*tp
, struct sk_buff
*skb
)
1227 struct tcp_sack_block
*sp
= &tp
->selective_acks
[0];
1228 int num_sacks
= tp
->num_sacks
;
1231 /* This is an in order data segment _or_ an out-of-order SKB being
1232 * moved to the receive queue, so we know this removed SKB will eat
1233 * from the front of a SACK.
1235 for(this_sack
= 0; this_sack
< num_sacks
; this_sack
++, sp
++) {
1236 /* Check if the start of the sack is covered by skb. */
1237 if(!before(sp
->start_seq
, TCP_SKB_CB(skb
)->seq
) &&
1238 before(sp
->start_seq
, TCP_SKB_CB(skb
)->end_seq
))
1242 /* This should only happen if so many SACKs get built that some get
1243 * pushed out before we get here, or we eat some in sequence packets
1244 * which are before the first SACK block.
1246 if(this_sack
>= num_sacks
)
1249 sp
->start_seq
= TCP_SKB_CB(skb
)->end_seq
;
1250 if(!before(sp
->start_seq
, sp
->end_seq
)) {
1251 /* Zap this SACK, by moving forward any other SACKS. */
1252 for(this_sack
+= 1; this_sack
< num_sacks
; this_sack
++, sp
++) {
1253 struct tcp_sack_block
*next
= (sp
+ 1);
1254 sp
->start_seq
= next
->start_seq
;
1255 sp
->end_seq
= next
->end_seq
;
1261 static void tcp_sack_extend(struct tcp_opt
*tp
, struct sk_buff
*old_skb
, struct sk_buff
*new_skb
)
1263 struct tcp_sack_block
*sp
= &tp
->selective_acks
[0];
1264 int num_sacks
= tp
->num_sacks
;
1267 for(this_sack
= 0; this_sack
< num_sacks
; this_sack
++, tp
++) {
1268 if(sp
->end_seq
== TCP_SKB_CB(old_skb
)->end_seq
)
1271 if(this_sack
>= num_sacks
)
1273 sp
->end_seq
= TCP_SKB_CB(new_skb
)->end_seq
;
1276 /* This one checks to see if we can put data from the
1277 * out_of_order queue into the receive_queue.
1279 static void tcp_ofo_queue(struct sock
*sk
)
1281 struct sk_buff
*skb
;
1282 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
);
1284 while ((skb
= skb_peek(&tp
->out_of_order_queue
))) {
1285 if (after(TCP_SKB_CB(skb
)->seq
, tp
->rcv_nxt
))
1288 if (!after(TCP_SKB_CB(skb
)->end_seq
, tp
->rcv_nxt
)) {
1289 SOCK_DEBUG(sk
, "ofo packet was already received \n");
1290 __skb_unlink(skb
, skb
->list
);
1294 SOCK_DEBUG(sk
, "ofo requeuing : rcv_next %X seq %X - %X\n",
1295 tp
->rcv_nxt
, TCP_SKB_CB(skb
)->seq
,
1296 TCP_SKB_CB(skb
)->end_seq
);
1299 tcp_sack_remove_skb(tp
, skb
);
1300 __skb_unlink(skb
, skb
->list
);
1301 __skb_queue_tail(&sk
->receive_queue
, skb
);
1302 tp
->rcv_nxt
= TCP_SKB_CB(skb
)->end_seq
;
1304 tcp_fin(skb
, sk
, skb
->h
.th
);
1308 static void tcp_data_queue(struct sock
*sk
, struct sk_buff
*skb
)
1310 struct sk_buff
*skb1
;
1311 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
);
1313 /* Queue data for delivery to the user.
1314 * Packets in sequence go to the receive queue.
1315 * Out of sequence packets to out_of_order_queue.
1317 if (TCP_SKB_CB(skb
)->seq
== tp
->rcv_nxt
) {
1318 /* Ok. In sequence. */
1320 dst_confirm(sk
->dst_cache
);
1321 __skb_queue_tail(&sk
->receive_queue
, skb
);
1322 tp
->rcv_nxt
= TCP_SKB_CB(skb
)->end_seq
;
1323 if(skb
->h
.th
->fin
) {
1324 tcp_fin(skb
, sk
, skb
->h
.th
);
1326 tcp_remember_ack(tp
, skb
->h
.th
, skb
);
1328 /* This may have eaten into a SACK block. */
1329 if(tp
->sack_ok
&& tp
->num_sacks
)
1330 tcp_sack_remove_skb(tp
, skb
);
1333 /* Turn on fast path. */
1334 if (skb_queue_len(&tp
->out_of_order_queue
) == 0)
1335 tp
->pred_flags
= htonl(((tp
->tcp_header_len
>> 2) << 28) |
1341 /* An old packet, either a retransmit or some packet got lost. */
1342 if (!after(TCP_SKB_CB(skb
)->end_seq
, tp
->rcv_nxt
)) {
1343 /* A retransmit, 2nd most common case. Force an imediate ack. */
1344 SOCK_DEBUG(sk
, "retransmit received: seq %X\n", TCP_SKB_CB(skb
)->seq
);
1345 tcp_enter_quickack_mode(tp
);
1350 if (before(TCP_SKB_CB(skb
)->seq
, tp
->rcv_nxt
)) {
1351 /* Partial packet, seq < rcv_next < end_seq */
1352 SOCK_DEBUG(sk
, "partial packet: rcv_next %X seq %X - %X\n",
1353 tp
->rcv_nxt
, TCP_SKB_CB(skb
)->seq
,
1354 TCP_SKB_CB(skb
)->end_seq
);
1359 /* Ok. This is an out_of_order segment, force an ack. */
1361 tcp_enter_quickack_mode(tp
);
1363 /* Disable header predition. */
1366 SOCK_DEBUG(sk
, "out of order segment: rcv_next %X seq %X - %X\n",
1367 tp
->rcv_nxt
, TCP_SKB_CB(skb
)->seq
, TCP_SKB_CB(skb
)->end_seq
);
1369 if (skb_peek(&tp
->out_of_order_queue
) == NULL
) {
1370 /* Initial out of order segment, build 1 SACK. */
1373 tp
->selective_acks
[0].start_seq
= TCP_SKB_CB(skb
)->seq
;
1374 tp
->selective_acks
[0].end_seq
= TCP_SKB_CB(skb
)->end_seq
;
1376 __skb_queue_head(&tp
->out_of_order_queue
,skb
);
1378 for(skb1
=tp
->out_of_order_queue
.prev
; ; skb1
= skb1
->prev
) {
1379 /* Already there. */
1380 if (TCP_SKB_CB(skb
)->seq
== TCP_SKB_CB(skb1
)->seq
) {
1381 if (skb
->len
>= skb1
->len
) {
1383 tcp_sack_extend(tp
, skb1
, skb
);
1384 __skb_append(skb1
, skb
);
1385 __skb_unlink(skb1
, skb1
->list
);
1388 /* A duplicate, smaller than what is in the
1389 * out-of-order queue right now, toss it.
1396 if (after(TCP_SKB_CB(skb
)->seq
, TCP_SKB_CB(skb1
)->seq
)) {
1397 __skb_append(skb1
, skb
);
1399 tcp_sack_new_ofo_skb(sk
, skb
);
1403 /* See if we've hit the start. If so insert. */
1404 if (skb1
== skb_peek(&tp
->out_of_order_queue
)) {
1405 __skb_queue_head(&tp
->out_of_order_queue
,skb
);
1407 tcp_sack_new_ofo_skb(sk
, skb
);
1416 * This routine handles the data. If there is room in the buffer,
1417 * it will be have already been moved into it. If there is no
1418 * room, then we will just have to discard the packet.
1421 static int tcp_data(struct sk_buff
*skb
, struct sock
*sk
, unsigned int len
)
1424 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
);
1427 skb_pull(skb
, th
->doff
*4);
1428 skb_trim(skb
, len
- (th
->doff
*4));
1430 if (skb
->len
== 0 && !th
->fin
)
1434 * If our receive queue has grown past its limits shrink it.
1435 * Make sure to do this before moving snd_nxt, otherwise
1436 * data might be acked for that we don't have enough room.
1438 if (atomic_read(&sk
->rmem_alloc
) > sk
->rcvbuf
) {
1439 if (prune_queue(sk
) < 0) {
1440 /* Still not enough room. That can happen when
1441 * skb->true_size differs significantly from skb->len.
1447 tcp_data_queue(sk
, skb
);
1449 if (before(tp
->rcv_nxt
, tp
->copied_seq
)) {
1450 printk(KERN_DEBUG
"*** tcp.c:tcp_data bug acked < copied\n");
1451 tp
->rcv_nxt
= tp
->copied_seq
;
1454 /* Above, tcp_data_queue() increments delayed_acks appropriately.
1455 * Now tell the user we may have some data.
1458 SOCK_DEBUG(sk
, "Data wakeup.\n");
1459 sk
->data_ready(sk
,0);
1464 static void __tcp_data_snd_check(struct sock
*sk
, struct sk_buff
*skb
)
1466 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
);
1468 if (!after(TCP_SKB_CB(skb
)->end_seq
, tp
->snd_una
+ tp
->snd_wnd
) &&
1469 tcp_packets_in_flight(tp
) < tp
->snd_cwnd
) {
1470 /* Put more data onto the wire. */
1472 } else if (tp
->packets_out
== 0 && !tp
->pending
) {
1473 /* Start probing the receivers window. */
1474 tcp_reset_xmit_timer(sk
, TIME_PROBE0
, tp
->rto
);
1478 static __inline__
void tcp_data_snd_check(struct sock
*sk
)
1480 struct sk_buff
*skb
= sk
->tp_pinfo
.af_tcp
.send_head
;
1483 __tcp_data_snd_check(sk
, skb
);
1487 * Adapt the MSS value used to make delayed ack decision to the
1490 static __inline__
void tcp_measure_rcv_mss(struct sock
*sk
, struct sk_buff
*skb
)
1492 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
);
1493 unsigned int len
= skb
->len
, lss
;
1495 if (len
> tp
->rcv_mss
)
1497 lss
= tp
->last_seg_size
;
1498 tp
->last_seg_size
= 0;
1502 tp
->last_seg_size
= len
;
1507 * Check if sending an ack is needed.
1509 static __inline__
void __tcp_ack_snd_check(struct sock
*sk
)
1511 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
);
1513 /* This also takes care of updating the window.
1514 * This if statement needs to be simplified.
1516 * Rules for delaying an ack:
1517 * - delay time <= 0.5 HZ
1518 * - we don't have a window update to send
1519 * - must send at least every 2 full sized packets
1520 * - must send an ACK if we have any out of order data
1522 * With an extra heuristic to handle loss of packet
1523 * situations and also helping the sender leave slow
1524 * start in an expediant manner.
1527 /* Two full frames received or... */
1528 if (((tp
->rcv_nxt
- tp
->rcv_wup
) >= tp
->rcv_mss
* MAX_DELAY_ACK
) ||
1529 /* We will update the window "significantly" or... */
1530 tcp_raise_window(sk
) ||
1531 /* We entered "quick ACK" mode or... */
1532 tcp_in_quickack_mode(tp
) ||
1533 /* We have out of order data */
1534 (skb_peek(&tp
->out_of_order_queue
) != NULL
)) {
1535 /* Then ack it now */
1538 /* Else, send delayed ack. */
1539 tcp_send_delayed_ack(tp
, HZ
/2);
1543 static __inline__
void tcp_ack_snd_check(struct sock
*sk
)
1545 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
);
1546 if (tp
->delayed_acks
== 0) {
1547 /* We sent a data segment already. */
1550 __tcp_ack_snd_check(sk
);
1555 * This routine is only called when we have urgent data
1556 * signalled. Its the 'slow' part of tcp_urg. It could be
1557 * moved inline now as tcp_urg is only called from one
1558 * place. We handle URGent data wrong. We have to - as
1559 * BSD still doesn't use the correction from RFC961.
1560 * For 1003.1g we should support a new option TCP_STDURG to permit
1561 * either form (or just set the sysctl tcp_stdurg).
1564 static void tcp_check_urg(struct sock
* sk
, struct tcphdr
* th
)
1566 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
);
1567 u32 ptr
= ntohs(th
->urg_ptr
);
1569 if (ptr
&& !sysctl_tcp_stdurg
)
1571 ptr
+= ntohl(th
->seq
);
1573 /* Ignore urgent data that we've already seen and read. */
1574 if (after(tp
->copied_seq
, ptr
))
1577 /* Do we already have a newer (or duplicate) urgent pointer? */
1578 if (tp
->urg_data
&& !after(ptr
, tp
->urg_seq
))
1581 /* Tell the world about our new urgent pointer. */
1582 if (sk
->proc
!= 0) {
1584 kill_proc(sk
->proc
, SIGURG
, 1);
1586 kill_pg(-sk
->proc
, SIGURG
, 1);
1589 /* We may be adding urgent data when the last byte read was
1590 * urgent. To do this requires some care. We cannot just ignore
1591 * tp->copied_seq since we would read the last urgent byte again
1592 * as data, nor can we alter copied_seq until this data arrives
1593 * or we break the sematics of SIOCATMARK (and thus sockatmark())
1595 if (tp
->urg_seq
== tp
->copied_seq
)
1596 tp
->copied_seq
++; /* Move the copied sequence on correctly */
1597 tp
->urg_data
= URG_NOTYET
;
1600 /* Disable header prediction. */
1604 /* This is the 'fast' part of urgent handling. */
1605 static inline void tcp_urg(struct sock
*sk
, struct tcphdr
*th
, unsigned long len
)
1607 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
);
1609 /* Check if we get a new urgent pointer - normally not. */
1611 tcp_check_urg(sk
,th
);
1613 /* Do we wait for any urgent data? - normally not... */
1614 if (tp
->urg_data
== URG_NOTYET
) {
1615 u32 ptr
= tp
->urg_seq
- ntohl(th
->seq
) + (th
->doff
*4);
1617 /* Is the urgent pointer pointing into this packet? */
1619 tp
->urg_data
= URG_VALID
| *(ptr
+ (unsigned char *) th
);
1621 sk
->data_ready(sk
,0);
1627 * Clean first the out_of_order queue, then the receive queue until
1628 * the socket is in its memory limits again.
1630 static int prune_queue(struct sock
*sk
)
1632 struct tcp_opt
*tp
= &sk
->tp_pinfo
.af_tcp
;
1633 struct sk_buff
* skb
;
1635 SOCK_DEBUG(sk
, "prune_queue: c=%x\n", tp
->copied_seq
);
1637 net_statistics
.PruneCalled
++;
1639 /* First Clean the out_of_order queue. */
1640 /* Start with the end because there are probably the least
1641 * useful packets (crossing fingers).
1643 while ((skb
= __skb_dequeue_tail(&tp
->out_of_order_queue
))) {
1644 net_statistics
.OfoPruned
+= skb
->len
;
1646 if (atomic_read(&sk
->rmem_alloc
) <= sk
->rcvbuf
)
1650 /* Now continue with the receive queue if it wasn't enough.
1651 * But only do this if we are really being abused.
1653 while ((atomic_read(&sk
->rmem_alloc
) >= (sk
->rcvbuf
* 2)) &&
1654 (skb
= skb_peek_tail(&sk
->receive_queue
))) {
1655 /* Never toss anything when we've seen the FIN.
1656 * It's just too complex to recover from it.
1661 /* Never remove packets that have been already acked */
1662 if (before(TCP_SKB_CB(skb
)->end_seq
, tp
->last_ack_sent
+1)) {
1663 SOCK_DEBUG(sk
, "prune_queue: hit acked data c=%x,%x,%x\n",
1664 tp
->copied_seq
, TCP_SKB_CB(skb
)->end_seq
,
1669 net_statistics
.RcvPruned
+= skb
->len
;
1671 __skb_unlink(skb
, skb
->list
);
1672 tp
->rcv_nxt
= TCP_SKB_CB(skb
)->seq
;
1673 SOCK_DEBUG(sk
, "prune_queue: removing %x-%x (c=%x)\n",
1674 TCP_SKB_CB(skb
)->seq
, TCP_SKB_CB(skb
)->end_seq
,
1682 * TCP receive function for the ESTABLISHED state.
1684 * It is split into a fast path and a slow path. The fast path is
1686 * - A zero window was announced from us - zero window probing
1687 * is only handled properly in the slow path.
1688 * - Out of order segments arrived.
1689 * - Urgent data is expected.
1690 * - There is no buffer space left
1691 * - Unexpected TCP flags/window values/header lengths are received
1692 * (detected by checking the TCP header against pred_flags)
1693 * - Data is sent in both directions. Fast path only supports pure senders
1694 * or pure receivers (this means either the sequence number or the ack
1695 * value must stay constant)
1697 * When these conditions are not satisfied it drops into a standard
1698 * receive procedure patterned after RFC793 to handle all cases.
1699 * The first three cases are guaranteed by proper pred_flags setting,
1700 * the rest is checked inline. Fast processing is turned on in
1701 * tcp_data_queue when everything is OK.
1703 int tcp_rcv_established(struct sock
*sk
, struct sk_buff
*skb
,
1704 struct tcphdr
*th
, unsigned len
)
1706 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
);
1711 * Header prediction.
1712 * The code follows the one in the famous
1713 * "30 instruction TCP receive" Van Jacobson mail.
1715 * Van's trick is to deposit buffers into socket queue
1716 * on a device interrupt, to call tcp_recv function
1717 * on the receive process context and checksum and copy
1718 * the buffer to user space. smart...
1720 * Our current scheme is not silly either but we take the
1721 * extra cost of the net_bh soft interrupt processing...
1722 * We do checksum and copy also but from device to kernel.
1726 * RFC1323: H1. Apply PAWS check first.
1728 if (tcp_fast_parse_options(sk
, th
, tp
)) {
1729 if (tp
->saw_tstamp
) {
1730 if (tcp_paws_discard(tp
, th
, len
)) {
1736 tcp_replace_ts_recent(sk
, tp
,
1737 TCP_SKB_CB(skb
)->seq
,
1738 TCP_SKB_CB(skb
)->end_seq
);
1742 flg
= *(((u32
*)th
) + 3) & ~htonl(0x8 << 16);
1744 /* pred_flags is 0xS?10 << 16 + snd_wnd
1745 * if header_predition is to be made
1746 * 'S' will always be tp->tcp_header_len >> 2
1747 * '?' will be 0 else it will be !0
1748 * (when there are holes in the receive
1749 * space for instance)
1750 * PSH flag is ignored.
1753 if (flg
== tp
->pred_flags
&& TCP_SKB_CB(skb
)->seq
== tp
->rcv_nxt
) {
1754 if (len
<= th
->doff
*4) {
1755 /* Bulk data transfer: sender */
1756 if (len
== th
->doff
*4) {
1757 tcp_ack(sk
, th
, TCP_SKB_CB(skb
)->seq
,
1758 TCP_SKB_CB(skb
)->ack_seq
, len
);
1760 tcp_data_snd_check(sk
);
1762 } else { /* Header too small */
1763 tcp_statistics
.TcpInErrs
++;
1766 } else if (TCP_SKB_CB(skb
)->ack_seq
== tp
->snd_una
&&
1767 atomic_read(&sk
->rmem_alloc
) <= sk
->rcvbuf
) {
1768 /* Bulk data transfer: receiver */
1769 __skb_pull(skb
,th
->doff
*4);
1771 tcp_measure_rcv_mss(sk
, skb
);
1773 /* DO NOT notify forward progress here.
1774 * It saves dozen of CPU instructions in fast path. --ANK
1776 __skb_queue_tail(&sk
->receive_queue
, skb
);
1777 tp
->rcv_nxt
= TCP_SKB_CB(skb
)->end_seq
;
1779 /* FIN bit check is not done since if FIN is set in
1780 * this frame, the pred_flags won't match up. -DaveM
1782 sk
->data_ready(sk
, 0);
1783 tcp_delack_estimator(tp
);
1785 tcp_remember_ack(tp
, th
, skb
);
1787 __tcp_ack_snd_check(sk
);
1793 * Standard slow path.
1796 if (!tcp_sequence(tp
, TCP_SKB_CB(skb
)->seq
, TCP_SKB_CB(skb
)->end_seq
)) {
1797 /* RFC793, page 37: "In all states except SYN-SENT, all reset
1798 * (RST) segments are validated by checking their SEQ-fields."
1799 * And page 69: "If an incoming segment is not acceptable,
1800 * an acknowledgment should be sent in reply (unless the RST bit
1801 * is set, if so drop the segment and return)".
1805 if (after(TCP_SKB_CB(skb
)->seq
, tp
->rcv_nxt
)) {
1806 SOCK_DEBUG(sk
, "seq:%d end:%d wup:%d wnd:%d\n",
1807 TCP_SKB_CB(skb
)->seq
, TCP_SKB_CB(skb
)->end_seq
,
1808 tp
->rcv_wup
, tp
->rcv_wnd
);
1814 if(th
->syn
&& TCP_SKB_CB(skb
)->seq
!= tp
->syn_seq
) {
1815 SOCK_DEBUG(sk
, "syn in established state\n");
1816 tcp_statistics
.TcpInErrs
++;
1827 tcp_ack(sk
, th
, TCP_SKB_CB(skb
)->seq
, TCP_SKB_CB(skb
)->ack_seq
, len
);
1829 /* Process urgent data. */
1830 tcp_urg(sk
, th
, len
);
1832 /* step 7: process the segment text */
1833 queued
= tcp_data(skb
, sk
, len
);
1835 /* This must be after tcp_data() does the skb_pull() to
1836 * remove the header size from skb->len.
1838 * Dave!!! Phrase above (and all about rcv_mss) has
1839 * nothing to do with reality. rcv_mss must measure TOTAL
1840 * size, including sacks, IP options etc. Hence, measure_rcv_mss
1841 * must occure before pulling etc, otherwise it will flap
1842 * like hell. Even putting it before tcp_data is wrong,
1843 * it should use skb->tail - skb->nh.raw instead.
1846 * BTW I broke it. Now all TCP options are handled equally
1847 * in mss_clamp calculations (i.e. ignored, rfc1122),
1848 * and mss_cache does include all of them (i.e. tstamps)
1849 * except for sacks, to calulate effective mss faster.
1852 tcp_measure_rcv_mss(sk
, skb
);
1854 /* Be careful, tcp_data() may have put this into TIME_WAIT. */
1855 if(sk
->state
!= TCP_CLOSE
) {
1856 tcp_data_snd_check(sk
);
1857 tcp_ack_snd_check(sk
);
1869 * Process an incoming SYN or SYN-ACK for SYN_RECV sockets represented
1870 * as an open_request.
1873 struct sock
*tcp_check_req(struct sock
*sk
, struct sk_buff
*skb
,
1874 struct open_request
*req
)
1876 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
);
1879 /* assumption: the socket is not in use.
1880 * as we checked the user count on tcp_rcv and we're
1881 * running from a soft interrupt.
1884 /* Check for syn retransmission */
1885 flg
= *(((u32
*)skb
->h
.th
) + 3);
1887 flg
&= __constant_htonl(0x00170000);
1889 if (flg
== __constant_htonl(0x00020000)) {
1890 if (!after(TCP_SKB_CB(skb
)->seq
, req
->rcv_isn
)) {
1891 /* retransmited syn.
1893 req
->class->rtx_syn_ack(sk
, req
);
1896 return sk
; /* Pass new SYN to the listen socket. */
1900 /* We know it's an ACK here */
1902 /* socket already created but not
1907 /* In theory the packet could be for a cookie, but
1908 * TIME_WAIT should guard us against this.
1909 * XXX: Nevertheless check for cookies?
1910 * This sequence number check is done again later,
1911 * but we do it here to prevent syn flood attackers
1912 * from creating big SYN_RECV sockets.
1914 if (!between(TCP_SKB_CB(skb
)->ack_seq
, req
->snt_isn
, req
->snt_isn
+1) ||
1915 !between(TCP_SKB_CB(skb
)->seq
, req
->rcv_isn
,
1916 req
->rcv_isn
+1+req
->rcv_wnd
)) {
1917 req
->class->send_reset(skb
);
1921 sk
= tp
->af_specific
->syn_recv_sock(sk
, skb
, req
, NULL
);
1922 tcp_dec_slow_timer(TCP_SLT_SYNACK
);
1930 skb_set_owner_r(skb
, sk
);
1935 * This function implements the receiving procedure of RFC 793 for
1936 * all states except ESTABLISHED and TIME_WAIT.
1937 * It's called from both tcp_v4_rcv and tcp_v6_rcv and should be
1938 * address independent.
1941 int tcp_rcv_state_process(struct sock
*sk
, struct sk_buff
*skb
,
1942 struct tcphdr
*th
, unsigned len
)
1944 struct tcp_opt
*tp
= &(sk
->tp_pinfo
.af_tcp
);
1947 /* state == CLOSED, hash lookup always fails, so no worries. -DaveM */
1948 switch (sk
->state
) {
1950 /* These use the socket TOS..
1951 * might want to be the received TOS
1957 if(tp
->af_specific
->conn_request(sk
, skb
, 0) < 0)
1960 /* Now we have several options: In theory there is
1961 * nothing else in the frame. KA9Q has an option to
1962 * send data with the syn, BSD accepts data with the
1963 * syn up to the [to be] advertised window and
1964 * Solaris 2.1 gives you a protocol error. For now
1965 * we just ignore it, that fits the spec precisely
1966 * and avoids incompatibilities. It would be nice in
1967 * future to drop through and process the data.
1969 * Now that TTCP is starting to be used we ought to
1971 * But, this leaves one open to an easy denial of
1972 * service attack, and SYN cookies can't defend
1973 * against this problem. So, we drop the data
1974 * in the interest of security over speed.
1983 /* SYN sent means we have to look for a suitable ack and
1984 * either reset for bad matches or go to connected.
1985 * The SYN_SENT case is unusual and should
1986 * not be in line code. [AC]
1989 tp
->snd_wl1
= TCP_SKB_CB(skb
)->seq
;
1991 /* We got an ack, but it's not a good ack. */
1992 if(!tcp_ack(sk
,th
, TCP_SKB_CB(skb
)->seq
,
1993 TCP_SKB_CB(skb
)->ack_seq
, len
)) {
1994 sk
->err
= ECONNRESET
;
1995 sk
->state_change(sk
);
1996 tcp_statistics
.TcpAttemptFails
++;
2006 /* A valid ack from a different connection
2007 * start. Shouldn't happen but cover it.
2009 sk
->err
= ECONNRESET
;
2010 sk
->state_change(sk
);
2011 tcp_statistics
.TcpAttemptFails
++;
2015 /* Ok.. it's good. Set up sequence numbers and
2016 * move to established.
2018 tp
->rcv_nxt
= TCP_SKB_CB(skb
)->seq
+1;
2019 tp
->rcv_wup
= TCP_SKB_CB(skb
)->seq
+1;
2021 /* RFC1323: The window in SYN & SYN/ACK segments is
2024 tp
->snd_wnd
= htons(th
->window
);
2025 tp
->snd_wl1
= TCP_SKB_CB(skb
)->seq
;
2026 tp
->snd_wl2
= TCP_SKB_CB(skb
)->ack_seq
;
2027 tp
->fin_seq
= TCP_SKB_CB(skb
)->seq
;
2029 tcp_set_state(sk
, TCP_ESTABLISHED
);
2030 tcp_parse_options(sk
, th
, tp
, 0);
2032 if (tp
->wscale_ok
== 0) {
2033 tp
->snd_wscale
= tp
->rcv_wscale
= 0;
2034 tp
->window_clamp
= min(tp
->window_clamp
,65535);
2037 if (tp
->tstamp_ok
) {
2038 tp
->tcp_header_len
=
2039 sizeof(struct tcphdr
) + TCPOLEN_TSTAMP_ALIGNED
;
2041 tp
->tcp_header_len
= sizeof(struct tcphdr
);
2042 if (tp
->saw_tstamp
) {
2043 tp
->ts_recent
= tp
->rcv_tsval
;
2044 tp
->ts_recent_stamp
= jiffies
;
2047 /* Can't be earlier, doff would be wrong. */
2050 sk
->dport
= th
->source
;
2051 tp
->copied_seq
= tp
->rcv_nxt
;
2054 sk
->state_change(sk
);
2055 sock_wake_async(sk
->socket
, 0);
2058 if(th
->syn
&& !th
->rst
) {
2059 /* The previous version of the code
2060 * checked for "connecting to self"
2061 * here. that check is done now in
2064 tcp_set_state(sk
, TCP_SYN_RECV
);
2065 tcp_parse_options(sk
, th
, tp
, 0);
2066 if (tp
->saw_tstamp
) {
2067 tp
->ts_recent
= tp
->rcv_tsval
;
2068 tp
->ts_recent_stamp
= jiffies
;
2071 tp
->rcv_nxt
= TCP_SKB_CB(skb
)->seq
+ 1;
2072 tp
->rcv_wup
= TCP_SKB_CB(skb
)->seq
+ 1;
2074 /* RFC1323: The window in SYN & SYN/ACK segments is
2077 tp
->snd_wnd
= htons(th
->window
);
2078 tp
->snd_wl1
= TCP_SKB_CB(skb
)->seq
;
2080 tcp_send_synack(sk
);
2085 /* tp->tcp_header_len and tp->mss_clamp
2086 probably changed, synchronize mss.
2088 tcp_sync_mss(sk
, tp
->pmtu_cookie
);
2089 tp
->rcv_mss
= tp
->mss_cache
;
2091 if (sk
->state
== TCP_SYN_RECV
)
2097 /* Parse the tcp_options present on this header.
2098 * By this point we really only expect timestamps.
2099 * Note that this really has to be here and not later for PAWS
2100 * (RFC1323) to work.
2102 if (tcp_fast_parse_options(sk
, th
, tp
)) {
2103 /* NOTE: assumes saw_tstamp is never set if we didn't
2104 * negotiate the option. tcp_fast_parse_options() must
2107 if (tp
->saw_tstamp
) {
2108 if (tcp_paws_discard(tp
, th
, len
)) {
2114 tcp_replace_ts_recent(sk
, tp
,
2115 TCP_SKB_CB(skb
)->seq
,
2116 TCP_SKB_CB(skb
)->end_seq
);
2120 /* step 1: check sequence number */
2121 if (!tcp_sequence(tp
, TCP_SKB_CB(skb
)->seq
, TCP_SKB_CB(skb
)->end_seq
)) {
2128 /* step 2: check RST bit */
2134 /* step 3: check security and precedence [ignored] */
2138 * Check for a SYN, and ensure it matches the SYN we were
2139 * first sent. We have to handle the rather unusual (but valid)
2140 * sequence that KA9Q derived products may generate of
2145 * SYN|ACK Data + More Data
2146 * .. we must ACK not RST...
2148 * We keep syn_seq as the sequence space occupied by the
2152 if (th
->syn
&& TCP_SKB_CB(skb
)->seq
!= tp
->syn_seq
) {
2157 /* step 5: check the ACK field */
2159 int acceptable
= tcp_ack(sk
, th
, TCP_SKB_CB(skb
)->seq
,
2160 TCP_SKB_CB(skb
)->ack_seq
, len
);
2165 tcp_set_state(sk
, TCP_ESTABLISHED
);
2166 sk
->dport
= th
->source
;
2167 tp
->copied_seq
= tp
->rcv_nxt
;
2170 sk
->state_change(sk
);
2172 tp
->snd_una
= TCP_SKB_CB(skb
)->ack_seq
;
2173 tp
->snd_wnd
= htons(th
->window
) << tp
->snd_wscale
;
2174 tp
->snd_wl1
= TCP_SKB_CB(skb
)->seq
;
2175 tp
->snd_wl2
= TCP_SKB_CB(skb
)->ack_seq
;
2178 SOCK_DEBUG(sk
, "bad ack\n");
2184 if (tp
->snd_una
== tp
->write_seq
) {
2185 sk
->shutdown
|= SEND_SHUTDOWN
;
2186 tcp_set_state(sk
, TCP_FIN_WAIT2
);
2188 sk
->state_change(sk
);
2190 tcp_reset_msl_timer(sk
, TIME_CLOSE
, sysctl_tcp_fin_timeout
);
2195 if (tp
->snd_una
== tp
->write_seq
) {
2202 if (tp
->snd_una
== tp
->write_seq
) {
2203 sk
->shutdown
= SHUTDOWN_MASK
;
2204 tcp_set_state(sk
,TCP_CLOSE
);
2206 sk
->state_change(sk
);
2215 /* step 6: check the URG bit */
2216 tcp_urg(sk
, th
, len
);
2218 /* step 7: process the segment text */
2219 switch (sk
->state
) {
2220 case TCP_CLOSE_WAIT
:
2222 if (!before(TCP_SKB_CB(skb
)->seq
, tp
->fin_seq
))
2227 /* RFC 793 says to queue data in these states,
2228 * RFC 1122 says we MUST send a reset.
2229 * BSD 4.4 also does reset.
2231 if ((sk
->shutdown
& RCV_SHUTDOWN
) && sk
->dead
) {
2232 if (after(TCP_SKB_CB(skb
)->end_seq
- th
->fin
, tp
->rcv_nxt
)) {
2238 case TCP_ESTABLISHED
:
2239 queued
= tcp_data(skb
, sk
, len
);
2241 /* This must be after tcp_data() does the skb_pull() to
2242 * remove the header size from skb->len.
2244 tcp_measure_rcv_mss(sk
, skb
);
2248 tcp_data_snd_check(sk
);
2249 tcp_ack_snd_check(sk
);