4 * Transmission Control Protocol, incoming traffic
6 * The input processing functions of the TCP layer.
8 * These functions are generally called in the order (ip_input() ->)
9 * tcp_input() -> * tcp_process() -> tcp_receive() (-> application).
14 * Copyright (c) 2001-2004 Swedish Institute of Computer Science.
15 * All rights reserved.
17 * Redistribution and use in source and binary forms, with or without modification,
18 * are permitted provided that the following conditions are met:
20 * 1. Redistributions of source code must retain the above copyright notice,
21 * this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright notice,
23 * this list of conditions and the following disclaimer in the documentation
24 * and/or other materials provided with the distribution.
25 * 3. The name of the author may not be used to endorse or promote products
26 * derived from this software without specific prior written permission.
28 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
29 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
30 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
31 * SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
32 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
33 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
36 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
39 * This file is part of the lwIP TCP/IP stack.
41 * Author: Adam Dunkels <adam@sics.se>
48 #include "lwip/ip_addr.h"
49 #include "lwip/netif.h"
51 #include "lwip/memp.h"
53 #include "lwip/inet.h"
56 #include "lwip/stats.h"
57 #include "arch/perf.h"
58 #include "lwip/snmp.h"
61 /* These variables are global to all functions involved in the input
62 processing of TCP segments. They are set by the tcp_input()
64 static struct tcp_seg inseg
;
65 static struct tcp_hdr
*tcphdr
;
66 static struct ip_hdr
*iphdr
;
67 static u32_t seqno
, ackno
;
71 static u8_t recv_flags
;
72 static struct pbuf
*recv_data
;
74 struct tcp_pcb
*tcp_input_pcb
;
76 /* Forward declarations. */
77 static err_t
tcp_process(struct tcp_pcb
*pcb
);
78 static u8_t
tcp_receive(struct tcp_pcb
*pcb
);
79 static void tcp_parseopt(struct tcp_pcb
*pcb
);
81 static err_t
tcp_listen_input(struct tcp_pcb_listen
*pcb
);
82 static err_t
tcp_timewait_input(struct tcp_pcb
*pcb
);
86 * The initial input processing of TCP. It verifies the TCP header, demultiplexes
87 * the segment between the PCBs and passes it on to tcp_process(), which implements
88 * the TCP finite state machine. This function is called by the IP layer (in
93 tcp_input(struct pbuf
*p
, struct netif
*inp
)
95 struct tcp_pcb
*pcb
, *prev
;
96 struct tcp_pcb_listen
*lpcb
;
102 TCP_STATS_INC(tcp
.recv
);
103 snmp_inc_tcpinsegs();
106 tcphdr
= (struct tcp_hdr
*)((u8_t
*)p
->payload
+ IPH_HL(iphdr
) * 4);
109 tcp_debug_print(tcphdr
);
112 /* remove header from payload */
113 if (pbuf_header(p
, -((s16_t
)(IPH_HL(iphdr
) * 4))) || (p
->tot_len
< sizeof(struct tcp_hdr
))) {
114 /* drop short packets */
115 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("tcp_input: short packet (%"U16_F
" bytes) discarded\n", p
->tot_len
));
116 TCP_STATS_INC(tcp
.lenerr
);
117 TCP_STATS_INC(tcp
.drop
);
122 /* Don't even process incoming broadcasts/multicasts. */
123 if (ip_addr_isbroadcast(&(iphdr
->dest
), inp
) ||
124 ip_addr_ismulticast(&(iphdr
->dest
))) {
125 snmp_inc_tcpinerrs();
130 #if CHECKSUM_CHECK_TCP
131 /* Verify TCP checksum. */
132 if (inet_chksum_pseudo(p
, (struct ip_addr
*)&(iphdr
->src
),
133 (struct ip_addr
*)&(iphdr
->dest
),
134 IP_PROTO_TCP
, p
->tot_len
) != 0) {
135 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("tcp_input: packet discarded due to failing checksum 0x%04"X16_F
"\n",
136 inet_chksum_pseudo(p
, (struct ip_addr
*)&(iphdr
->src
), (struct ip_addr
*)&(iphdr
->dest
),
137 IP_PROTO_TCP
, p
->tot_len
)));
139 tcp_debug_print(tcphdr
);
140 #endif /* TCP_DEBUG */
141 TCP_STATS_INC(tcp
.chkerr
);
142 TCP_STATS_INC(tcp
.drop
);
143 snmp_inc_tcpinerrs();
149 /* Move the payload pointer in the pbuf so that it points to the
150 TCP data instead of the TCP header. */
151 hdrlen
= TCPH_HDRLEN(tcphdr
);
152 pbuf_header(p
, -(hdrlen
* 4));
154 /* Convert fields in TCP header to host byte order. */
155 tcphdr
->src
= ntohs(tcphdr
->src
);
156 tcphdr
->dest
= ntohs(tcphdr
->dest
);
157 seqno
= tcphdr
->seqno
= ntohl(tcphdr
->seqno
);
158 ackno
= tcphdr
->ackno
= ntohl(tcphdr
->ackno
);
159 tcphdr
->wnd
= ntohs(tcphdr
->wnd
);
161 flags
= TCPH_FLAGS(tcphdr
) & TCP_FLAGS
;
162 tcplen
= p
->tot_len
+ ((flags
& TCP_FIN
|| flags
& TCP_SYN
)? 1: 0);
164 /* Demultiplex an incoming segment. First, we check if it is destined
165 for an active connection. */
169 for(pcb
= tcp_active_pcbs
; pcb
!= NULL
; pcb
= pcb
->next
) {
170 LWIP_ASSERT("tcp_input: active pcb->state != CLOSED", pcb
->state
!= CLOSED
);
171 LWIP_ASSERT("tcp_input: active pcb->state != TIME-WAIT", pcb
->state
!= TIME_WAIT
);
172 LWIP_ASSERT("tcp_input: active pcb->state != LISTEN", pcb
->state
!= LISTEN
);
173 if (pcb
->remote_port
== tcphdr
->src
&&
174 pcb
->local_port
== tcphdr
->dest
&&
175 ip_addr_cmp(&(pcb
->remote_ip
), &(iphdr
->src
)) &&
176 ip_addr_cmp(&(pcb
->local_ip
), &(iphdr
->dest
))) {
178 /* Move this PCB to the front of the list so that subsequent
179 lookups will be faster (we exploit locality in TCP segment
181 LWIP_ASSERT("tcp_input: pcb->next != pcb (before cache)", pcb
->next
!= pcb
);
183 prev
->next
= pcb
->next
;
184 pcb
->next
= tcp_active_pcbs
;
185 tcp_active_pcbs
= pcb
;
187 LWIP_ASSERT("tcp_input: pcb->next != pcb (after cache)", pcb
->next
!= pcb
);
194 /* If it did not go to an active connection, we check the connections
195 in the TIME-WAIT state. */
196 for(pcb
= tcp_tw_pcbs
; pcb
!= NULL
; pcb
= pcb
->next
) {
197 LWIP_ASSERT("tcp_input: TIME-WAIT pcb->state == TIME-WAIT", pcb
->state
== TIME_WAIT
);
198 if (pcb
->remote_port
== tcphdr
->src
&&
199 pcb
->local_port
== tcphdr
->dest
&&
200 ip_addr_cmp(&(pcb
->remote_ip
), &(iphdr
->src
)) &&
201 ip_addr_cmp(&(pcb
->local_ip
), &(iphdr
->dest
))) {
202 /* We don't really care enough to move this PCB to the front
203 of the list since we are not very likely to receive that
204 many segments for connections in TIME-WAIT. */
205 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("tcp_input: packed for TIME_WAITing connection.\n"));
206 tcp_timewait_input(pcb
);
212 /* Finally, if we still did not get a match, we check all PCBs that
213 are LISTENing for incoming connections. */
215 for(lpcb
= tcp_listen_pcbs
.listen_pcbs
; lpcb
!= NULL
; lpcb
= lpcb
->next
) {
216 if ((ip_addr_isany(&(lpcb
->local_ip
)) ||
217 ip_addr_cmp(&(lpcb
->local_ip
), &(iphdr
->dest
))) &&
218 lpcb
->local_port
== tcphdr
->dest
) {
219 /* Move this PCB to the front of the list so that subsequent
220 lookups will be faster (we exploit locality in TCP segment
223 ((struct tcp_pcb_listen
*)prev
)->next
= lpcb
->next
;
224 /* our successor is the remainder of the listening list */
225 lpcb
->next
= tcp_listen_pcbs
.listen_pcbs
;
226 /* put this listening pcb at the head of the listening list */
227 tcp_listen_pcbs
.listen_pcbs
= lpcb
;
230 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("tcp_input: packed for LISTENing connection.\n"));
231 tcp_listen_input(lpcb
);
235 prev
= (struct tcp_pcb
*)lpcb
;
240 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("+-+-+-+-+-+-+-+-+-+-+-+-+-+- tcp_input: flags "));
241 tcp_debug_print_flags(TCPH_FLAGS(tcphdr
));
242 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("-+-+-+-+-+-+-+-+-+-+-+-+-+-+\n"));
243 #endif /* TCP_INPUT_DEBUG */
247 /* The incoming segment belongs to a connection. */
250 tcp_debug_print_state(pcb
->state
);
251 #endif /* TCP_DEBUG */
252 #endif /* TCP_INPUT_DEBUG */
254 /* Set up a tcp_seg structure. */
256 inseg
.len
= p
->tot_len
;
257 inseg
.dataptr
= p
->payload
;
259 inseg
.tcphdr
= tcphdr
;
265 err
= tcp_process(pcb
);
266 tcp_input_pcb
= NULL
;
267 /* A return value of ERR_ABRT means that tcp_abort() was called
268 and that the pcb has been freed. If so, we don't do anything. */
269 if (err
!= ERR_ABRT
) {
270 if (recv_flags
& TF_RESET
) {
271 /* TF_RESET means that the connection was reset by the other
272 end. We then call the error callback to inform the
273 application that the connection is dead before we
274 deallocate the PCB. */
275 TCP_EVENT_ERR(pcb
->errf
, pcb
->callback_arg
, ERR_RST
);
276 tcp_pcb_remove(&tcp_active_pcbs
, pcb
);
277 memp_free(MEMP_TCP_PCB
, pcb
);
278 } else if (recv_flags
& TF_CLOSED
) {
279 /* The connection has been closed and we will deallocate the
281 tcp_pcb_remove(&tcp_active_pcbs
, pcb
);
282 memp_free(MEMP_TCP_PCB
, pcb
);
285 /* If the application has registered a "sent" function to be
286 called when new send buffer space is available, we call it
288 if (pcb
->acked
> 0) {
289 TCP_EVENT_SENT(pcb
, pcb
->acked
, err
);
292 if (recv_data
!= NULL
) {
293 /* Notify application that data has been received. */
294 TCP_EVENT_RECV(pcb
, recv_data
, ERR_OK
, err
);
297 /* If a FIN segment was received, we call the callback
298 function with a NULL buffer to indicate EOF. */
299 if (recv_flags
& TF_GOT_FIN
) {
300 TCP_EVENT_RECV(pcb
, NULL
, ERR_OK
, err
);
302 /* If there were no errors, we try to send something out. */
310 /* give up our reference to inseg.p */
318 tcp_debug_print_state(pcb
->state
);
319 #endif /* TCP_DEBUG */
320 #endif /* TCP_INPUT_DEBUG */
324 /* If no matching PCB was found, send a TCP RST (reset) to the
326 LWIP_DEBUGF(TCP_RST_DEBUG
, ("tcp_input: no PCB match found, resetting.\n"));
327 if (!(TCPH_FLAGS(tcphdr
) & TCP_RST
)) {
328 TCP_STATS_INC(tcp
.proterr
);
329 TCP_STATS_INC(tcp
.drop
);
330 tcp_rst(ackno
, seqno
+ tcplen
,
331 &(iphdr
->dest
), &(iphdr
->src
),
332 tcphdr
->dest
, tcphdr
->src
);
337 LWIP_ASSERT("tcp_input: tcp_pcbs_sane()", tcp_pcbs_sane());
338 PERF_STOP("tcp_input");
341 /* tcp_listen_input():
343 * Called by tcp_input() when a segment arrives for a listening
348 tcp_listen_input(struct tcp_pcb_listen
*pcb
)
350 struct tcp_pcb
*npcb
;
353 /* In the LISTEN state, we check for incoming SYN segments,
354 creates a new PCB, and responds with a SYN|ACK. */
355 if (flags
& TCP_ACK
) {
356 /* For incoming segments with the ACK flag set, respond with a
358 LWIP_DEBUGF(TCP_RST_DEBUG
, ("tcp_listen_input: ACK in LISTEN, sending reset\n"));
359 tcp_rst(ackno
+ 1, seqno
+ tcplen
,
360 &(iphdr
->dest
), &(iphdr
->src
),
361 tcphdr
->dest
, tcphdr
->src
);
362 } else if (flags
& TCP_SYN
) {
363 LWIP_DEBUGF(TCP_DEBUG
, ("TCP connection request %"U16_F
" -> %"U16_F
".\n", tcphdr
->src
, tcphdr
->dest
));
364 npcb
= tcp_alloc(pcb
->prio
);
365 /* If a new PCB could not be created (probably due to lack of memory),
366 we don't do anything, but rely on the sender will retransmit the
367 SYN at a time when we have more memory available. */
369 LWIP_DEBUGF(TCP_DEBUG
, ("tcp_listen_input: could not allocate PCB\n"));
370 TCP_STATS_INC(tcp
.memerr
);
373 /* Set up the new PCB. */
374 ip_addr_set(&(npcb
->local_ip
), &(iphdr
->dest
));
375 npcb
->local_port
= pcb
->local_port
;
376 ip_addr_set(&(npcb
->remote_ip
), &(iphdr
->src
));
377 npcb
->remote_port
= tcphdr
->src
;
378 npcb
->state
= SYN_RCVD
;
379 npcb
->rcv_nxt
= seqno
+ 1;
380 npcb
->snd_wnd
= tcphdr
->wnd
;
381 npcb
->ssthresh
= npcb
->snd_wnd
;
382 npcb
->snd_wl1
= seqno
- 1;/* initialise to seqno-1 to force window update */
383 npcb
->callback_arg
= pcb
->callback_arg
;
384 #if LWIP_CALLBACK_API
385 npcb
->accept
= pcb
->accept
;
386 #endif /* LWIP_CALLBACK_API */
387 /* inherit socket options */
388 npcb
->so_options
= pcb
->so_options
& (SOF_DEBUG
|SOF_DONTROUTE
|SOF_KEEPALIVE
|SOF_OOBINLINE
|SOF_LINGER
);
389 /* Register the new PCB so that we can begin receiving segments
391 TCP_REG(&tcp_active_pcbs
, npcb
);
393 /* Parse any options in the SYN. */
396 snmp_inc_tcppassiveopens();
398 /* Build an MSS option. */
399 optdata
= htonl(((u32_t
)2 << 24) |
401 (((u32_t
)npcb
->mss
/ 256) << 8) |
403 /* Send a SYN|ACK together with the MSS option. */
404 tcp_enqueue(npcb
, NULL
, 0, TCP_SYN
| TCP_ACK
, 0, (u8_t
*)&optdata
, 4);
405 return tcp_output(npcb
);
410 /* tcp_timewait_input():
412 * Called by tcp_input() when a segment arrives for a connection in
417 tcp_timewait_input(struct tcp_pcb
*pcb
)
419 if (TCP_SEQ_GT(seqno
+ tcplen
, pcb
->rcv_nxt
)) {
420 pcb
->rcv_nxt
= seqno
+ tcplen
;
425 return tcp_output(pcb
);
430 * Implements the TCP state machine. Called by tcp_input. In some
431 * states tcp_receive() is called to receive data. The tcp_seg
432 * argument will be freed by the caller (tcp_input()) unless the
433 * recv_data pointer in the pcb is set.
437 tcp_process(struct tcp_pcb
*pcb
)
439 struct tcp_seg
*rseg
;
446 /* Process incoming RST segments. */
447 if (flags
& TCP_RST
) {
448 /* First, determine if the reset is acceptable. */
449 if (pcb
->state
== SYN_SENT
) {
450 if (ackno
== pcb
->snd_nxt
) {
454 /*if (TCP_SEQ_GEQ(seqno, pcb->rcv_nxt) &&
455 TCP_SEQ_LEQ(seqno, pcb->rcv_nxt + pcb->rcv_wnd)) {
457 if (TCP_SEQ_BETWEEN(seqno
, pcb
->rcv_nxt
, pcb
->rcv_nxt
+pcb
->rcv_wnd
)) {
463 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("tcp_process: Connection RESET\n"));
464 LWIP_ASSERT("tcp_input: pcb->state != CLOSED", pcb
->state
!= CLOSED
);
465 recv_flags
= TF_RESET
;
466 pcb
->flags
&= ~TF_ACK_DELAY
;
469 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("tcp_process: unacceptable reset seqno %"U32_F
" rcv_nxt %"U32_F
"\n",
470 seqno
, pcb
->rcv_nxt
));
471 LWIP_DEBUGF(TCP_DEBUG
, ("tcp_process: unacceptable reset seqno %"U32_F
" rcv_nxt %"U32_F
"\n",
472 seqno
, pcb
->rcv_nxt
));
477 /* Update the PCB (in)activity timer. */
478 pcb
->tmr
= tcp_ticks
;
481 /* Do different things depending on the TCP state. */
482 switch (pcb
->state
) {
484 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("SYN-SENT: ackno %"U32_F
" pcb->snd_nxt %"U32_F
" unacked %"U32_F
"\n", ackno
,
485 pcb
->snd_nxt
, ntohl(pcb
->unacked
->tcphdr
->seqno
)));
486 /* received SYN ACK with expected sequence number? */
487 if ((flags
& TCP_ACK
) && (flags
& TCP_SYN
)
488 && ackno
== ntohl(pcb
->unacked
->tcphdr
->seqno
) + 1) {
490 pcb
->rcv_nxt
= seqno
+ 1;
491 pcb
->lastack
= ackno
;
492 pcb
->snd_wnd
= tcphdr
->wnd
;
493 pcb
->snd_wl1
= seqno
- 1; /* initialise to seqno - 1 to force window update */
494 pcb
->state
= ESTABLISHED
;
495 pcb
->cwnd
= ((pcb
->cwnd
== 1) ? (pcb
->mss
* 2) : pcb
->mss
);
497 LWIP_DEBUGF(TCP_QLEN_DEBUG
, ("tcp_process: SYN-SENT --queuelen %"U16_F
"\n", (u16_t
)pcb
->snd_queuelen
));
499 pcb
->unacked
= rseg
->next
;
502 /* Parse any options in the SYNACK. */
505 /* Call the user specified function to call when sucessfully
507 TCP_EVENT_CONNECTED(pcb
, ERR_OK
, err
);
510 /* received ACK? possibly a half-open connection */
511 else if (flags
& TCP_ACK
) {
512 /* send a RST to bring the other side in a non-synchronized state. */
513 tcp_rst(ackno
, seqno
+ tcplen
, &(iphdr
->dest
), &(iphdr
->src
),
514 tcphdr
->dest
, tcphdr
->src
);
518 if (flags
& TCP_ACK
&&
519 !(flags
& TCP_RST
)) {
520 /* expected ACK number? */
521 if (TCP_SEQ_BETWEEN(ackno
, pcb
->lastack
+1, pcb
->snd_nxt
)) {
523 pcb
->state
= ESTABLISHED
;
524 LWIP_DEBUGF(TCP_DEBUG
, ("TCP connection established %"U16_F
" -> %"U16_F
".\n", inseg
.tcphdr
->src
, inseg
.tcphdr
->dest
));
525 #if LWIP_CALLBACK_API
526 LWIP_ASSERT("pcb->accept != NULL", pcb
->accept
!= NULL
);
528 /* Call the accept function. */
529 TCP_EVENT_ACCEPT(pcb
, ERR_OK
, err
);
531 /* If the accept function returns with an error, we abort
536 old_cwnd
= pcb
->cwnd
;
537 /* If there was any data contained within this ACK,
538 * we'd better pass it on to the application as well. */
540 pcb
->cwnd
= ((old_cwnd
== 1) ? (pcb
->mss
* 2) : pcb
->mss
);
542 /* incorrect ACK number */
545 tcp_rst(ackno
, seqno
+ tcplen
, &(iphdr
->dest
), &(iphdr
->src
),
546 tcphdr
->dest
, tcphdr
->src
);
553 accepted_inseq
= tcp_receive(pcb
);
554 if ((flags
& TCP_FIN
) && accepted_inseq
) { /* passive close */
556 pcb
->state
= CLOSE_WAIT
;
561 if (flags
& TCP_FIN
) {
562 if (flags
& TCP_ACK
&& ackno
== pcb
->snd_nxt
) {
563 LWIP_DEBUGF(TCP_DEBUG
,
564 ("TCP connection closed %"U16_F
" -> %"U16_F
".\n", inseg
.tcphdr
->src
, inseg
.tcphdr
->dest
));
567 TCP_RMV(&tcp_active_pcbs
, pcb
);
568 pcb
->state
= TIME_WAIT
;
569 TCP_REG(&tcp_tw_pcbs
, pcb
);
572 pcb
->state
= CLOSING
;
574 } else if (flags
& TCP_ACK
&& ackno
== pcb
->snd_nxt
) {
575 pcb
->state
= FIN_WAIT_2
;
580 if (flags
& TCP_FIN
) {
581 LWIP_DEBUGF(TCP_DEBUG
, ("TCP connection closed %"U16_F
" -> %"U16_F
".\n", inseg
.tcphdr
->src
, inseg
.tcphdr
->dest
));
584 TCP_RMV(&tcp_active_pcbs
, pcb
);
585 pcb
->state
= TIME_WAIT
;
586 TCP_REG(&tcp_tw_pcbs
, pcb
);
591 if (flags
& TCP_ACK
&& ackno
== pcb
->snd_nxt
) {
592 LWIP_DEBUGF(TCP_DEBUG
, ("TCP connection closed %"U16_F
" -> %"U16_F
".\n", inseg
.tcphdr
->src
, inseg
.tcphdr
->dest
));
595 TCP_RMV(&tcp_active_pcbs
, pcb
);
596 pcb
->state
= TIME_WAIT
;
597 TCP_REG(&tcp_tw_pcbs
, pcb
);
602 if (flags
& TCP_ACK
&& ackno
== pcb
->snd_nxt
) {
603 LWIP_DEBUGF(TCP_DEBUG
, ("TCP connection closed %"U16_F
" -> %"U16_F
".\n", inseg
.tcphdr
->src
, inseg
.tcphdr
->dest
));
605 recv_flags
= TF_CLOSED
;
616 * Called by tcp_process. Checks if the given segment is an ACK for outstanding
617 * data, and if so frees the memory of the buffered data. Next, is places the
618 * segment on any of the receive queues (pcb->recved or pcb->ooseq). If the segment
619 * is buffered, the pbuf is referenced by pbuf_ref so that it will not be freed until
620 * i it has been removed from the buffer.
622 * If the incoming segment constitutes an ACK for a segment that was used for RTT
623 * estimation, the RTT is estimated here as well.
629 tcp_receive(struct tcp_pcb
*pcb
)
631 struct tcp_seg
*next
;
633 struct tcp_seg
*prev
, *cseg
;
638 u32_t right_wnd_edge
;
640 u8_t accepted_inseq
= 0;
642 if (flags
& TCP_ACK
) {
643 right_wnd_edge
= pcb
->snd_wnd
+ pcb
->snd_wl1
;
646 if (TCP_SEQ_LT(pcb
->snd_wl1
, seqno
) ||
647 (pcb
->snd_wl1
== seqno
&& TCP_SEQ_LT(pcb
->snd_wl2
, ackno
)) ||
648 (pcb
->snd_wl2
== ackno
&& tcphdr
->wnd
> pcb
->snd_wnd
)) {
649 pcb
->snd_wnd
= tcphdr
->wnd
;
650 pcb
->snd_wl1
= seqno
;
651 pcb
->snd_wl2
= ackno
;
652 LWIP_DEBUGF(TCP_WND_DEBUG
, ("tcp_receive: window update %"U32_F
"\n", pcb
->snd_wnd
));
655 if (pcb
->snd_wnd
!= tcphdr
->wnd
) {
656 LWIP_DEBUGF(TCP_WND_DEBUG
, ("tcp_receive: no window update lastack %"U32_F
" snd_max %"U32_F
" ackno %"U32_F
" wl1 %"U32_F
" seqno %"U32_F
" wl2 %"U32_F
"\n",
657 pcb
->lastack
, pcb
->snd_max
, ackno
, pcb
->snd_wl1
, seqno
, pcb
->snd_wl2
));
659 #endif /* TCP_WND_DEBUG */
662 if (pcb
->lastack
== ackno
) {
665 if (pcb
->snd_wl1
+ pcb
->snd_wnd
== right_wnd_edge
){
667 if (pcb
->dupacks
>= 3 && pcb
->unacked
!= NULL
) {
668 if (!(pcb
->flags
& TF_INFR
)) {
669 /* This is fast retransmit. Retransmit the first unacked segment. */
670 LWIP_DEBUGF(TCP_FR_DEBUG
, ("tcp_receive: dupacks %"U16_F
" (%"U32_F
"), fast retransmit %"U32_F
"\n",
671 (u16_t
)pcb
->dupacks
, pcb
->lastack
,
672 ntohl(pcb
->unacked
->tcphdr
->seqno
)));
674 /* Set ssthresh to max (FlightSize / 2, 2*SMSS) */
675 /*pcb->ssthresh = LWIP_MAX((pcb->snd_max -
678 /* Set ssthresh to half of the minimum of the currenct cwnd and the advertised window */
679 if (pcb
->cwnd
> pcb
->snd_wnd
)
680 pcb
->ssthresh
= pcb
->snd_wnd
/ 2;
682 pcb
->ssthresh
= pcb
->cwnd
/ 2;
684 pcb
->cwnd
= pcb
->ssthresh
+ 3 * pcb
->mss
;
685 pcb
->flags
|= TF_INFR
;
687 /* Inflate the congestion window, but not if it means that
688 the value overflows. */
689 if ((u16_t
)(pcb
->cwnd
+ pcb
->mss
) > pcb
->cwnd
) {
690 pcb
->cwnd
+= pcb
->mss
;
695 LWIP_DEBUGF(TCP_FR_DEBUG
, ("tcp_receive: dupack averted %"U32_F
" %"U32_F
"\n",
696 pcb
->snd_wl1
+ pcb
->snd_wnd
, right_wnd_edge
));
699 /*if (TCP_SEQ_LT(pcb->lastack, ackno) &&
700 TCP_SEQ_LEQ(ackno, pcb->snd_max)) { */
701 if (TCP_SEQ_BETWEEN(ackno
, pcb
->lastack
+1, pcb
->snd_max
)){
702 /* We come here when the ACK acknowledges new data. */
704 /* Reset the "IN Fast Retransmit" flag, since we are no longer
705 in fast retransmit. Also reset the congestion window to the
706 slow start threshold. */
707 if (pcb
->flags
& TF_INFR
) {
708 pcb
->flags
&= ~TF_INFR
;
709 pcb
->cwnd
= pcb
->ssthresh
;
712 /* Reset the number of retransmissions. */
715 /* Reset the retransmission time-out. */
716 pcb
->rto
= (pcb
->sa
>> 3) + pcb
->sv
;
718 /* Update the send buffer space. */
719 pcb
->acked
= ackno
- pcb
->lastack
;
721 pcb
->snd_buf
+= pcb
->acked
;
723 /* Reset the fast retransmit variables. */
725 pcb
->lastack
= ackno
;
727 /* Update the congestion control variables (cwnd and
729 if (pcb
->state
>= ESTABLISHED
) {
730 if (pcb
->cwnd
< pcb
->ssthresh
) {
731 if ((u16_t
)(pcb
->cwnd
+ pcb
->mss
) > pcb
->cwnd
) {
732 pcb
->cwnd
+= pcb
->mss
;
734 LWIP_DEBUGF(TCP_CWND_DEBUG
, ("tcp_receive: slow start cwnd %"U16_F
"\n", pcb
->cwnd
));
736 u16_t new_cwnd
= (pcb
->cwnd
+ pcb
->mss
* pcb
->mss
/ pcb
->cwnd
);
737 if (new_cwnd
> pcb
->cwnd
) {
738 pcb
->cwnd
= new_cwnd
;
740 LWIP_DEBUGF(TCP_CWND_DEBUG
, ("tcp_receive: congestion avoidance cwnd %"U16_F
"\n", pcb
->cwnd
));
743 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("tcp_receive: ACK for %"U32_F
", unacked->seqno %"U32_F
":%"U32_F
"\n",
745 pcb
->unacked
!= NULL
?
746 ntohl(pcb
->unacked
->tcphdr
->seqno
): 0,
747 pcb
->unacked
!= NULL
?
748 ntohl(pcb
->unacked
->tcphdr
->seqno
) + TCP_TCPLEN(pcb
->unacked
): 0));
750 /* Remove segment from the unacknowledged list if the incoming
751 ACK acknowlegdes them. */
752 while (pcb
->unacked
!= NULL
&&
753 TCP_SEQ_LEQ(ntohl(pcb
->unacked
->tcphdr
->seqno
) +
754 TCP_TCPLEN(pcb
->unacked
), ackno
)) {
755 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("tcp_receive: removing %"U32_F
":%"U32_F
" from pcb->unacked\n",
756 ntohl(pcb
->unacked
->tcphdr
->seqno
),
757 ntohl(pcb
->unacked
->tcphdr
->seqno
) +
758 TCP_TCPLEN(pcb
->unacked
)));
761 pcb
->unacked
= pcb
->unacked
->next
;
763 LWIP_DEBUGF(TCP_QLEN_DEBUG
, ("tcp_receive: queuelen %"U16_F
" ... ", (u16_t
)pcb
->snd_queuelen
));
764 pcb
->snd_queuelen
-= pbuf_clen(next
->p
);
767 LWIP_DEBUGF(TCP_QLEN_DEBUG
, ("%"U16_F
" (after freeing unacked)\n", (u16_t
)pcb
->snd_queuelen
));
768 if (pcb
->snd_queuelen
!= 0) {
769 LWIP_ASSERT("tcp_receive: valid queue length", pcb
->unacked
!= NULL
||
770 pcb
->unsent
!= NULL
);
776 /* We go through the ->unsent list to see if any of the segments
777 on the list are acknowledged by the ACK. This may seem
778 strange since an "unsent" segment shouldn't be acked. The
779 rationale is that lwIP puts all outstanding segments on the
780 ->unsent list after a retransmission, so these segments may
781 in fact have been sent once. */
782 while (pcb
->unsent
!= NULL
&&
783 /*TCP_SEQ_LEQ(ntohl(pcb->unsent->tcphdr->seqno) + TCP_TCPLEN(pcb->unsent), ackno) &&
784 TCP_SEQ_LEQ(ackno, pcb->snd_max)*/
785 TCP_SEQ_BETWEEN(ackno
, ntohl(pcb
->unsent
->tcphdr
->seqno
) + TCP_TCPLEN(pcb
->unsent
), pcb
->snd_max
)
787 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("tcp_receive: removing %"U32_F
":%"U32_F
" from pcb->unsent\n",
788 ntohl(pcb
->unsent
->tcphdr
->seqno
), ntohl(pcb
->unsent
->tcphdr
->seqno
) +
789 TCP_TCPLEN(pcb
->unsent
)));
792 pcb
->unsent
= pcb
->unsent
->next
;
793 LWIP_DEBUGF(TCP_QLEN_DEBUG
, ("tcp_receive: queuelen %"U16_F
" ... ", (u16_t
)pcb
->snd_queuelen
));
794 pcb
->snd_queuelen
-= pbuf_clen(next
->p
);
796 LWIP_DEBUGF(TCP_QLEN_DEBUG
, ("%"U16_F
" (after freeing unsent)\n", (u16_t
)pcb
->snd_queuelen
));
797 if (pcb
->snd_queuelen
!= 0) {
798 LWIP_ASSERT("tcp_receive: valid queue length",
799 pcb
->unacked
!= NULL
|| pcb
->unsent
!= NULL
);
802 if (pcb
->unsent
!= NULL
) {
803 pcb
->snd_nxt
= htonl(pcb
->unsent
->tcphdr
->seqno
);
806 /* End of ACK for new data processing. */
808 LWIP_DEBUGF(TCP_RTO_DEBUG
, ("tcp_receive: pcb->rttest %"U32_F
" rtseq %"U32_F
" ackno %"U32_F
"\n",
809 pcb
->rttest
, pcb
->rtseq
, ackno
));
811 /* RTT estimation calculations. This is done by checking if the
812 incoming segment acknowledges the segment we use to take a
813 round-trip time measurement. */
814 if (pcb
->rttest
&& TCP_SEQ_LT(pcb
->rtseq
, ackno
)) {
815 m
= tcp_ticks
- pcb
->rttest
;
817 LWIP_DEBUGF(TCP_RTO_DEBUG
, ("tcp_receive: experienced rtt %"U16_F
" ticks (%"U16_F
" msec).\n",
818 m
, m
* TCP_SLOW_INTERVAL
));
820 /* This is taken directly from VJs original code in his paper */
821 m
= m
- (pcb
->sa
>> 3);
826 m
= m
- (pcb
->sv
>> 2);
828 pcb
->rto
= (pcb
->sa
>> 3) + pcb
->sv
;
830 LWIP_DEBUGF(TCP_RTO_DEBUG
, ("tcp_receive: RTO %"U16_F
" (%"U16_F
" miliseconds)\n",
831 pcb
->rto
, pcb
->rto
* TCP_SLOW_INTERVAL
));
837 /* If the incoming segment contains data, we must process it
840 /* This code basically does three things:
842 +) If the incoming segment contains data that is the next
843 in-sequence data, this data is passed to the application. This
844 might involve trimming the first edge of the data. The rcv_nxt
845 variable and the advertised window are adjusted.
847 +) If the incoming segment has data that is above the next
848 sequence number expected (->rcv_nxt), the segment is placed on
849 the ->ooseq queue. This is done by finding the appropriate
850 place in the ->ooseq queue (which is ordered by sequence
851 number) and trim the segment in both ends if needed. An
852 immediate ACK is sent to indicate that we received an
853 out-of-sequence segment.
855 +) Finally, we check if the first segment on the ->ooseq queue
856 now is in sequence (i.e., if rcv_nxt >= ooseq->seqno). If
857 rcv_nxt > ooseq->seqno, we must trim the first edge of the
858 segment on ->ooseq before we adjust rcv_nxt. The data in the
859 segments that are now on sequence are chained onto the
860 incoming segment so that we only need to call the application
864 /* First, we check if we must trim the first edge. We have to do
865 this if the sequence number of the incoming segment is less
866 than rcv_nxt, and the sequence number plus the length of the
867 segment is larger than rcv_nxt. */
868 /* if (TCP_SEQ_LT(seqno, pcb->rcv_nxt)){
869 if (TCP_SEQ_LT(pcb->rcv_nxt, seqno + tcplen)) {*/
870 if (TCP_SEQ_BETWEEN(pcb
->rcv_nxt
, seqno
+ 1, seqno
+ tcplen
- 1)){
871 /* Trimming the first edge is done by pushing the payload
872 pointer in the pbuf downwards. This is somewhat tricky since
873 we do not want to discard the full contents of the pbuf up to
874 the new starting point of the data since we have to keep the
875 TCP header which is present in the first pbuf in the chain.
877 What is done is really quite a nasty hack: the first pbuf in
878 the pbuf chain is pointed to by inseg.p. Since we need to be
879 able to deallocate the whole pbuf, we cannot change this
880 inseg.p pointer to point to any of the later pbufs in the
881 chain. Instead, we point the ->payload pointer in the first
882 pbuf to data in one of the later pbufs. We also set the
883 inseg.data pointer to point to the right place. This way, the
884 ->p pointer will still point to the first pbuf, but the
885 ->p->payload pointer will point to data in another pbuf.
887 After we are done with adjusting the pbuf pointers we must
888 adjust the ->data pointer in the seg and the segment
891 off
= pcb
->rcv_nxt
- seqno
;
893 LWIP_ASSERT("inseg.p != NULL", inseg
.p
);
894 if (inseg
.p
->len
< off
) {
895 new_tot_len
= inseg
.p
->tot_len
- off
;
896 while (p
->len
< off
) {
898 /* KJM following line changed (with addition of new_tot_len var)
900 inseg.p->tot_len -= p->len; */
901 p
->tot_len
= new_tot_len
;
905 pbuf_header(p
, -off
);
907 pbuf_header(inseg
.p
, -off
);
909 /* KJM following line changed to use p->payload rather than inseg->p->payload
911 inseg
.dataptr
= p
->payload
;
912 inseg
.len
-= pcb
->rcv_nxt
- seqno
;
913 inseg
.tcphdr
->seqno
= seqno
= pcb
->rcv_nxt
;
916 if (TCP_SEQ_LT(seqno
, pcb
->rcv_nxt
)){
917 /* the whole segment is < rcv_nxt */
918 /* must be a duplicate of a packet that has already been correctly handled */
920 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("tcp_receive: duplicate seqno %"U32_F
"\n", seqno
));
925 /* The sequence number must be within the window (above rcv_nxt
926 and below rcv_nxt + rcv_wnd) in order to be further
928 /*if (TCP_SEQ_GEQ(seqno, pcb->rcv_nxt) &&
929 TCP_SEQ_LT(seqno, pcb->rcv_nxt + pcb->rcv_wnd)) {*/
930 if (TCP_SEQ_BETWEEN(seqno
, pcb
->rcv_nxt
, pcb
->rcv_nxt
+ pcb
->rcv_wnd
- 1)){
931 if (pcb
->rcv_nxt
== seqno
) {
933 /* The incoming segment is the next in sequence. We check if
934 we have to trim the end of the segment and update rcv_nxt
935 and pass the data to the application. */
937 if (pcb
->ooseq
!= NULL
&&
938 TCP_SEQ_LEQ(pcb
->ooseq
->tcphdr
->seqno
, seqno
+ inseg
.len
)) {
939 /* We have to trim the second edge of the incoming
941 inseg
.len
= pcb
->ooseq
->tcphdr
->seqno
- seqno
;
942 pbuf_realloc(inseg
.p
, inseg
.len
);
944 #endif /* TCP_QUEUE_OOSEQ */
946 tcplen
= TCP_TCPLEN(&inseg
);
948 /* First received FIN will be ACKed +1, on any successive (duplicate)
949 * FINs we are already in CLOSE_WAIT and have already done +1.
951 if (pcb
->state
!= CLOSE_WAIT
) {
952 pcb
->rcv_nxt
+= tcplen
;
955 /* Update the receiver's (our) window. */
956 if (pcb
->rcv_wnd
< tcplen
) {
959 pcb
->rcv_wnd
-= tcplen
;
962 /* If there is data in the segment, we make preparations to
963 pass this up to the application. The ->recv_data variable
964 is used for holding the pbuf that goes to the
965 application. The code for reassembling out-of-sequence data
966 chains its data on this pbuf as well.
968 If the segment was a FIN, we set the TF_GOT_FIN flag that will
969 be used to indicate to the application that the remote side has
970 closed its end of the connection. */
971 if (inseg
.p
->tot_len
> 0) {
973 /* Since this pbuf now is the responsibility of the
974 application, we delete our reference to it so that we won't
975 (mistakingly) deallocate it. */
978 if (TCPH_FLAGS(inseg
.tcphdr
) & TCP_FIN
) {
979 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("tcp_receive: received FIN.\n"));
980 recv_flags
= TF_GOT_FIN
;
984 /* We now check if we have segments on the ->ooseq queue that
985 is now in sequence. */
986 while (pcb
->ooseq
!= NULL
&&
987 pcb
->ooseq
->tcphdr
->seqno
== pcb
->rcv_nxt
) {
990 seqno
= pcb
->ooseq
->tcphdr
->seqno
;
992 pcb
->rcv_nxt
+= TCP_TCPLEN(cseg
);
993 if (pcb
->rcv_wnd
< TCP_TCPLEN(cseg
)) {
996 pcb
->rcv_wnd
-= TCP_TCPLEN(cseg
);
998 if (cseg
->p
->tot_len
> 0) {
999 /* Chain this pbuf onto the pbuf that we will pass to
1002 pbuf_cat(recv_data
, cseg
->p
);
1004 recv_data
= cseg
->p
;
1008 if (TCPH_FLAGS(cseg
->tcphdr
) & TCP_FIN
) {
1009 LWIP_DEBUGF(TCP_INPUT_DEBUG
, ("tcp_receive: dequeued FIN.\n"));
1010 recv_flags
= TF_GOT_FIN
;
1011 if (pcb
->state
== ESTABLISHED
) { /* force passive close or we can move to active close */
1012 pcb
->state
= CLOSE_WAIT
;
1017 pcb
->ooseq
= cseg
->next
;
1020 #endif /* TCP_QUEUE_OOSEQ */
1023 /* Acknowledge the segment(s). */
1027 /* We get here if the incoming segment is out-of-sequence. */
1030 /* We queue the segment on the ->ooseq queue. */
1031 if (pcb
->ooseq
== NULL
) {
1032 pcb
->ooseq
= tcp_seg_copy(&inseg
);
1034 /* If the queue is not empty, we walk through the queue and
1035 try to find a place where the sequence number of the
1036 incoming segment is between the sequence numbers of the
1037 previous and the next segment on the ->ooseq queue. That is
1038 the place where we put the incoming segment. If needed, we
1039 trim the second edges of the previous and the incoming
1040 segment so that it will fit into the sequence.
1042 If the incoming segment has the same sequence number as a
1043 segment on the ->ooseq queue, we discard the segment that
1044 contains less data. */
1047 for(next
= pcb
->ooseq
; next
!= NULL
; next
= next
->next
) {
1048 if (seqno
== next
->tcphdr
->seqno
) {
1049 /* The sequence number of the incoming segment is the
1050 same as the sequence number of the segment on
1051 ->ooseq. We check the lengths to see which one to
1053 if (inseg
.len
> next
->len
) {
1054 /* The incoming segment is larger than the old
1055 segment. We replace the old segment with the new
1057 cseg
= tcp_seg_copy(&inseg
);
1059 cseg
->next
= next
->next
;
1068 /* Either the lenghts are the same or the incoming
1069 segment was smaller than the old one; in either
1070 case, we ditch the incoming segment. */
1075 if (TCP_SEQ_LT(seqno
, next
->tcphdr
->seqno
)) {
1076 /* The sequence number of the incoming segment is lower
1077 than the sequence number of the first segment on the
1078 queue. We put the incoming segment first on the
1081 if (TCP_SEQ_GT(seqno
+ inseg
.len
, next
->tcphdr
->seqno
)) {
1082 /* We need to trim the incoming segment. */
1083 inseg
.len
= next
->tcphdr
->seqno
- seqno
;
1084 pbuf_realloc(inseg
.p
, inseg
.len
);
1086 cseg
= tcp_seg_copy(&inseg
);
1094 /*if (TCP_SEQ_LT(prev->tcphdr->seqno, seqno) &&
1095 TCP_SEQ_LT(seqno, next->tcphdr->seqno)) {*/
1096 if(TCP_SEQ_BETWEEN(seqno
, prev
->tcphdr
->seqno
+1, next
->tcphdr
->seqno
-1)){
1097 /* The sequence number of the incoming segment is in
1098 between the sequence numbers of the previous and
1099 the next segment on ->ooseq. We trim and insert the
1100 incoming segment and trim the previous segment, if
1102 if (TCP_SEQ_GT(seqno
+ inseg
.len
, next
->tcphdr
->seqno
)) {
1103 /* We need to trim the incoming segment. */
1104 inseg
.len
= next
->tcphdr
->seqno
- seqno
;
1105 pbuf_realloc(inseg
.p
, inseg
.len
);
1108 cseg
= tcp_seg_copy(&inseg
);
1112 if (TCP_SEQ_GT(prev
->tcphdr
->seqno
+ prev
->len
, seqno
)) {
1113 /* We need to trim the prev segment. */
1114 prev
->len
= seqno
- prev
->tcphdr
->seqno
;
1115 pbuf_realloc(prev
->p
, prev
->len
);
1120 /* If the "next" segment is the last segment on the
1121 ooseq queue, we add the incoming segment to the end
1123 if (next
->next
== NULL
&&
1124 TCP_SEQ_GT(seqno
, next
->tcphdr
->seqno
)) {
1125 next
->next
= tcp_seg_copy(&inseg
);
1126 if (next
->next
!= NULL
) {
1127 if (TCP_SEQ_GT(next
->tcphdr
->seqno
+ next
->len
, seqno
)) {
1128 /* We need to trim the last segment. */
1129 next
->len
= seqno
- next
->tcphdr
->seqno
;
1130 pbuf_realloc(next
->p
, next
->len
);
1139 #endif /* TCP_QUEUE_OOSEQ */
1143 /*if (TCP_SEQ_GT(pcb->rcv_nxt, seqno) ||
1144 TCP_SEQ_GEQ(seqno, pcb->rcv_nxt + pcb->rcv_wnd)) {*/
1145 if(!TCP_SEQ_BETWEEN(seqno
, pcb
->rcv_nxt
, pcb
->rcv_nxt
+ pcb
->rcv_wnd
-1)){
1150 /* Segments with length 0 is taken care of here. Segments that
1151 fall out of the window are ACKed. */
1152 /*if (TCP_SEQ_GT(pcb->rcv_nxt, seqno) ||
1153 TCP_SEQ_GEQ(seqno, pcb->rcv_nxt + pcb->rcv_wnd)) {*/
1154 if(!TCP_SEQ_BETWEEN(seqno
, pcb
->rcv_nxt
, pcb
->rcv_nxt
+ pcb
->rcv_wnd
-1)){
1158 return accepted_inseq
;
1164 * Parses the options contained in the incoming segment. (Code taken
1165 * from uIP with only small changes.)
1170 tcp_parseopt(struct tcp_pcb
*pcb
)
1176 opts
= (u8_t
*)tcphdr
+ TCP_HLEN
;
1178 /* Parse the TCP MSS option, if present. */
1179 if(TCPH_HDRLEN(tcphdr
) > 0x5) {
1180 for(c
= 0; c
< (TCPH_HDRLEN(tcphdr
) - 5) << 2 ;) {
1183 /* End of options. */
1185 } else if (opt
== 0x01) {
1188 } else if (opt
== 0x02 &&
1189 opts
[c
+ 1] == 0x04) {
1190 /* An MSS option with the right option length. */
1191 mss
= (opts
[c
+ 2] << 8) | opts
[c
+ 3];
1192 pcb
->mss
= mss
> TCP_MSS
? TCP_MSS
: mss
;
1194 /* And we are done processing options. */
1197 if (opts
[c
+ 1] == 0) {
1198 /* If the length field is zero, the options are malformed
1199 and we don't process them further. */
1202 /* All other options have a length field, so that we easily
1203 can skip past them. */
1209 #endif /* LWIP_TCP */