2 * Copyright (c) 2003, 2004 Jeffrey M. Hsu. All rights reserved.
3 * Copyright (c) 2003, 2004 The DragonFly Project. All rights reserved.
5 * This code is derived from software contributed to The DragonFly Project
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of The DragonFly Project nor the names of its
17 * contributors may be used to endorse or promote products derived
18 * from this software without specific, prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
23 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
24 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
26 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
28 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
30 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * $DragonFly: src/sys/netinet/tcp_sack.c,v 1.8 2008/08/15 21:37:16 nth Exp $
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/queue.h>
41 #include <sys/thread.h>
42 #include <sys/types.h>
43 #include <sys/socket.h>
44 #include <sys/socketvar.h>
48 #include <netinet/in.h>
49 #include <netinet/in_systm.h>
50 #include <netinet/ip.h>
51 #include <netinet/in_var.h>
52 #include <netinet/in_pcb.h>
53 #include <netinet/ip_var.h>
54 #include <netinet/tcp.h>
55 #include <netinet/tcp_seq.h>
56 #include <netinet/tcp_var.h>
70 TAILQ_ENTRY(sackblock
) sblk_list
;
73 #define MAXSAVEDBLOCKS 8 /* per connection limit */
75 static int insert_block(struct scoreboard
*scb
,
76 const struct raw_sackblock
*raw_sb
, boolean_t
*update
);
78 static MALLOC_DEFINE(M_SACKBLOCK
, "sblk", "sackblock struct");
81 * Per-tcpcb initialization.
84 tcp_sack_tcpcb_init(struct tcpcb
*tp
)
86 struct scoreboard
*scb
= &tp
->scb
;
89 TAILQ_INIT(&scb
->sackblocks
);
90 scb
->lastfound
= NULL
;
94 * Find the SACK block containing or immediately preceding "seq".
95 * The boolean result indicates whether the sequence is actually
96 * contained in the SACK block.
99 sack_block_lookup(struct scoreboard
*scb
, tcp_seq seq
, struct sackblock
**sb
)
101 struct sackblock
*hint
= scb
->lastfound
;
102 struct sackblock
*cur
, *last
, *prev
;
104 if (TAILQ_EMPTY(&scb
->sackblocks
)) {
110 /* No hint. Search from start to end. */
111 cur
= TAILQ_FIRST(&scb
->sackblocks
);
113 prev
= TAILQ_LAST(&scb
->sackblocks
, sackblock_list
);
115 if (SEQ_GEQ(seq
, hint
->sblk_start
)) {
116 /* Search from hint to end of list. */
119 prev
= TAILQ_LAST(&scb
->sackblocks
, sackblock_list
);
121 /* Search from front of list to hint. */
122 cur
= TAILQ_FIRST(&scb
->sackblocks
);
124 prev
= TAILQ_PREV(hint
, sackblock_list
, sblk_list
);
129 if (SEQ_GT(cur
->sblk_end
, seq
)) {
130 if (SEQ_GEQ(seq
, cur
->sblk_start
)) {
131 *sb
= scb
->lastfound
= cur
;
134 *sb
= scb
->lastfound
=
135 TAILQ_PREV(cur
, sackblock_list
, sblk_list
);
139 cur
= TAILQ_NEXT(cur
, sblk_list
);
140 } while (cur
!= last
);
142 *sb
= scb
->lastfound
= prev
;
147 * Allocate a SACK block.
149 static __inline
struct sackblock
*
150 alloc_sackblock(struct scoreboard
*scb
, const struct raw_sackblock
*raw_sb
)
152 struct sackblock
*sb
;
154 if (scb
->freecache
!= NULL
) {
156 scb
->freecache
= NULL
;
157 tcpstat
.tcps_sacksbfast
++;
159 sb
= kmalloc(sizeof(struct sackblock
), M_SACKBLOCK
, M_NOWAIT
);
161 tcpstat
.tcps_sacksbfailed
++;
165 sb
->sblk_start
= raw_sb
->rblk_start
;
166 sb
->sblk_end
= raw_sb
->rblk_end
;
170 static __inline
struct sackblock
*
171 alloc_sackblock_limit(struct scoreboard
*scb
,
172 const struct raw_sackblock
*raw_sb
)
174 if (scb
->nblocks
== MAXSAVEDBLOCKS
) {
176 * Should try to kick out older blocks XXX JH
177 * May be able to coalesce with existing block.
178 * Or, go other way and free all blocks if we hit
181 tcpstat
.tcps_sacksboverflow
++;
184 return alloc_sackblock(scb
, raw_sb
);
191 free_sackblock(struct scoreboard
*scb
, struct sackblock
*s
)
193 if (scb
->freecache
== NULL
) {
194 /* YYY Maybe use the latest freed block? */
198 kfree(s
, M_SACKBLOCK
);
202 * Free up SACK blocks for data that's been acked.
205 tcp_sack_ack_blocks(struct tcpcb
*tp
, tcp_seq th_ack
)
207 struct scoreboard
*scb
= &tp
->scb
;
208 struct sackblock
*sb
, *nb
;
210 sb
= TAILQ_FIRST(&scb
->sackblocks
);
211 while (sb
&& SEQ_LEQ(sb
->sblk_end
, th_ack
)) {
212 nb
= TAILQ_NEXT(sb
, sblk_list
);
213 if (scb
->lastfound
== sb
)
214 scb
->lastfound
= NULL
;
215 TAILQ_REMOVE(&scb
->sackblocks
, sb
, sblk_list
);
216 free_sackblock(scb
, sb
);
218 KASSERT(scb
->nblocks
>= 0,
219 ("SACK block count underflow: %d < 0", scb
->nblocks
));
222 if (sb
&& SEQ_GEQ(th_ack
, sb
->sblk_start
)) {
223 /* Other side reneged? XXX */
224 tcpstat
.tcps_sackrenege
++;
225 tcp_sack_discard(tp
);
230 * Delete and free SACK blocks saved in scoreboard.
233 tcp_sack_cleanup(struct scoreboard
*scb
)
235 struct sackblock
*sb
, *nb
;
237 TAILQ_FOREACH_MUTABLE(sb
, &scb
->sackblocks
, sblk_list
, nb
) {
238 free_sackblock(scb
, sb
);
241 KASSERT(scb
->nblocks
== 0,
242 ("SACK block %d count not zero", scb
->nblocks
));
243 TAILQ_INIT(&scb
->sackblocks
);
244 scb
->lastfound
= NULL
;
248 * Discard SACK scoreboard, HighRxt, RescueRxt and LostSeq.
251 tcp_sack_discard(struct tcpcb
*tp
)
253 tcp_sack_cleanup(&tp
->scb
);
254 tp
->rexmt_high
= tp
->snd_una
;
255 tp
->sack_flags
&= ~TSACK_F_SACKRESCUED
;
256 tp
->scb
.lostseq
= tp
->snd_una
;
260 * Delete and free SACK blocks saved in scoreboard.
261 * Delete the one slot block cache.
264 tcp_sack_destroy(struct scoreboard
*scb
)
266 tcp_sack_cleanup(scb
);
267 if (scb
->freecache
!= NULL
) {
268 kfree(scb
->freecache
, M_SACKBLOCK
);
269 scb
->freecache
= NULL
;
274 * Cleanup the reported SACK block information
277 tcp_sack_report_cleanup(struct tcpcb
*tp
)
280 ~(TSACK_F_DUPSEG
| TSACK_F_ENCLOSESEG
| TSACK_F_SACKLEFT
);
281 tp
->reportblk
.rblk_start
= tp
->reportblk
.rblk_end
;
285 * Whether SACK report is needed or not
288 tcp_sack_report_needed(const struct tcpcb
*tp
)
290 if ((tp
->sack_flags
&
291 (TSACK_F_DUPSEG
| TSACK_F_ENCLOSESEG
| TSACK_F_SACKLEFT
)) ||
292 tp
->reportblk
.rblk_start
!= tp
->reportblk
.rblk_end
)
299 * Returns 0 if not D-SACK block,
301 * 2 if duplicate of out-of-order D-SACK block.
304 tcp_sack_ndsack_blocks(const struct raw_sackblock
*blocks
, const int numblocks
,
310 if (SEQ_LT(blocks
[0].rblk_start
, snd_una
))
313 /* block 0 inside block 1 */
315 SEQ_GEQ(blocks
[0].rblk_start
, blocks
[1].rblk_start
) &&
316 SEQ_LEQ(blocks
[0].rblk_end
, blocks
[1].rblk_end
))
323 * Update scoreboard on new incoming ACK.
326 tcp_sack_add_blocks(struct tcpcb
*tp
, struct tcpopt
*to
)
328 const int numblocks
= to
->to_nsackblocks
;
329 struct raw_sackblock
*blocks
= to
->to_sackblocks
;
330 struct scoreboard
*scb
= &tp
->scb
;
333 if (tcp_sack_ndsack_blocks(blocks
, numblocks
, tp
->snd_una
) > 0)
338 to
->to_flags
|= TOF_SACK_REDUNDANT
;
339 for (i
= startblock
; i
< numblocks
; i
++) {
340 struct raw_sackblock
*newsackblock
= &blocks
[i
];
344 /* Guard against ACK reordering */
345 if (SEQ_LEQ(newsackblock
->rblk_start
, tp
->snd_una
))
348 /* Don't accept bad SACK blocks */
349 if (SEQ_GT(newsackblock
->rblk_end
, tp
->snd_max
)) {
350 tcpstat
.tcps_rcvbadsackopt
++;
351 break; /* skip all other blocks */
353 tcpstat
.tcps_sacksbupdate
++;
355 error
= insert_block(scb
, newsackblock
, &update
);
357 to
->to_flags
&= ~TOF_SACK_REDUNDANT
;
364 tcp_sack_update_scoreboard(struct tcpcb
*tp
, struct tcpopt
*to
)
366 struct scoreboard
*scb
= &tp
->scb
;
367 int rexmt_high_update
= 0;
369 tcp_sack_ack_blocks(tp
, tp
->snd_una
);
370 tcp_sack_add_blocks(tp
, to
);
371 tcp_sack_update_lostseq(scb
, tp
->snd_una
, tp
->t_maxseg
,
373 if (SEQ_LT(tp
->rexmt_high
, tp
->snd_una
)) {
374 tp
->rexmt_high
= tp
->snd_una
;
375 rexmt_high_update
= 1;
377 if (tp
->sack_flags
& TSACK_F_SACKRESCUED
) {
378 if (SEQ_LEQ(tp
->rexmt_rescue
, tp
->snd_una
)) {
379 tp
->sack_flags
&= ~TSACK_F_SACKRESCUED
;
380 } else if (tcp_aggressive_rescuesack
&& rexmt_high_update
&&
381 SEQ_LT(tp
->rexmt_rescue
, tp
->rexmt_high
)) {
382 /* Drag RescueRxt along with HighRxt */
383 tp
->rexmt_rescue
= tp
->rexmt_high
;
389 * Insert SACK block into sender's scoreboard.
392 insert_block(struct scoreboard
*scb
, const struct raw_sackblock
*raw_sb
,
395 struct sackblock
*sb
, *workingblock
;
396 boolean_t overlap_front
;
399 if (TAILQ_EMPTY(&scb
->sackblocks
)) {
400 struct sackblock
*newblock
;
402 KASSERT(scb
->nblocks
== 0, ("emply scb w/ blocks"));
404 newblock
= alloc_sackblock(scb
, raw_sb
);
405 if (newblock
== NULL
)
407 TAILQ_INSERT_HEAD(&scb
->sackblocks
, newblock
, sblk_list
);
412 KASSERT(scb
->nblocks
> 0, ("insert_block() called w/ no blocks"));
413 KASSERT(scb
->nblocks
<= MAXSAVEDBLOCKS
,
414 ("too many SACK blocks %d", scb
->nblocks
));
416 overlap_front
= sack_block_lookup(scb
, raw_sb
->rblk_start
, &sb
);
419 workingblock
= alloc_sackblock_limit(scb
, raw_sb
);
420 if (workingblock
== NULL
)
422 TAILQ_INSERT_HEAD(&scb
->sackblocks
, workingblock
, sblk_list
);
425 if (overlap_front
|| sb
->sblk_end
== raw_sb
->rblk_start
) {
426 tcpstat
.tcps_sacksbreused
++;
428 /* Extend old block */
430 if (SEQ_GT(raw_sb
->rblk_end
, sb
->sblk_end
)) {
431 sb
->sblk_end
= raw_sb
->rblk_end
;
433 /* Exact match, nothing to consolidate */
438 workingblock
= alloc_sackblock_limit(scb
, raw_sb
);
439 if (workingblock
== NULL
)
441 TAILQ_INSERT_AFTER(&scb
->sackblocks
, sb
, workingblock
,
447 /* Consolidate right-hand side. */
448 sb
= TAILQ_NEXT(workingblock
, sblk_list
);
450 SEQ_GEQ(workingblock
->sblk_end
, sb
->sblk_end
)) {
451 struct sackblock
*nextblock
;
453 nextblock
= TAILQ_NEXT(sb
, sblk_list
);
454 if (scb
->lastfound
== sb
)
455 scb
->lastfound
= NULL
;
456 /* Remove completely overlapped block */
457 TAILQ_REMOVE(&scb
->sackblocks
, sb
, sblk_list
);
458 free_sackblock(scb
, sb
);
460 KASSERT(scb
->nblocks
> 0,
461 ("removed overlapped block: %d blocks left", scb
->nblocks
));
465 SEQ_GEQ(workingblock
->sblk_end
, sb
->sblk_start
)) {
466 /* Extend new block to cover partially overlapped old block. */
467 workingblock
->sblk_end
= sb
->sblk_end
;
468 if (scb
->lastfound
== sb
)
469 scb
->lastfound
= NULL
;
470 TAILQ_REMOVE(&scb
->sackblocks
, sb
, sblk_list
);
471 free_sackblock(scb
, sb
);
473 KASSERT(scb
->nblocks
> 0,
474 ("removed partial right: %d blocks left", scb
->nblocks
));
479 #ifdef DEBUG_SACK_BLOCKS
481 tcp_sack_dump_blocks(const struct scoreboard
*scb
)
483 const struct sackblock
*sb
;
485 kprintf("%d blocks:", scb
->nblocks
);
486 TAILQ_FOREACH(sb
, &scb
->sackblocks
, sblk_list
)
487 kprintf(" [%u, %u)", sb
->sblk_start
, sb
->sblk_end
);
492 tcp_sack_dump_blocks(const struct scoreboard
*scb
)
498 * Optimization to quickly determine which packets are lost.
501 tcp_sack_update_lostseq(struct scoreboard
*scb
, tcp_seq snd_una
, u_int maxseg
,
504 struct sackblock
*sb
;
506 int bytes_sacked
= 0;
510 rxtthresh_bytes
= (rxtthresh
- 1) * maxseg
;
512 rxtthresh_bytes
= rxtthresh
* maxseg
;
514 sb
= TAILQ_LAST(&scb
->sackblocks
, sackblock_list
);
517 bytes_sacked
+= sb
->sblk_end
- sb
->sblk_start
;
518 if (nsackblocks
== rxtthresh
||
519 bytes_sacked
>= rxtthresh_bytes
) {
520 scb
->lostseq
= sb
->sblk_start
;
523 sb
= TAILQ_PREV(sb
, sackblock_list
, sblk_list
);
525 scb
->lostseq
= snd_una
;
529 * Return whether the given sequence number is considered lost.
532 tcp_sack_islost(const struct scoreboard
*scb
, tcp_seq seqnum
)
534 return SEQ_LT(seqnum
, scb
->lostseq
);
538 * True if at least "amount" has been SACKed. Used by Early Retransmit.
541 tcp_sack_has_sacked(const struct scoreboard
*scb
, u_int amount
)
543 const struct sackblock
*sb
;
544 int bytes_sacked
= 0;
546 TAILQ_FOREACH(sb
, &scb
->sackblocks
, sblk_list
) {
547 bytes_sacked
+= sb
->sblk_end
- sb
->sblk_start
;
548 if (bytes_sacked
>= amount
)
555 * Number of bytes SACKed below seq.
558 tcp_sack_bytes_below(const struct scoreboard
*scb
, tcp_seq seq
)
560 const struct sackblock
*sb
;
561 int bytes_sacked
= 0;
563 sb
= TAILQ_FIRST(&scb
->sackblocks
);
564 while (sb
&& SEQ_GT(seq
, sb
->sblk_start
)) {
565 bytes_sacked
+= seq_min(seq
, sb
->sblk_end
) - sb
->sblk_start
;
566 sb
= TAILQ_NEXT(sb
, sblk_list
);
572 * Return estimate of the number of bytes outstanding in the network.
575 tcp_sack_compute_pipe(const struct tcpcb
*tp
)
577 const struct scoreboard
*scb
= &tp
->scb
;
578 const struct sackblock
*sb
;
579 int nlost
, nretransmitted
;
582 nlost
= tp
->snd_max
- scb
->lostseq
;
583 nretransmitted
= tp
->rexmt_high
- tp
->snd_una
;
585 TAILQ_FOREACH(sb
, &scb
->sackblocks
, sblk_list
) {
586 if (SEQ_LT(sb
->sblk_start
, tp
->rexmt_high
)) {
587 end
= seq_min(sb
->sblk_end
, tp
->rexmt_high
);
588 nretransmitted
-= end
- sb
->sblk_start
;
590 if (SEQ_GEQ(sb
->sblk_start
, scb
->lostseq
))
591 nlost
-= sb
->sblk_end
- sb
->sblk_start
;
594 return (nlost
+ nretransmitted
);
598 * Return the sequence number and length of the next segment to transmit
599 * when in Fast Recovery.
602 tcp_sack_nextseg(struct tcpcb
*tp
, tcp_seq
*nextrexmt
, uint32_t *plen
,
605 struct scoreboard
*scb
= &tp
->scb
;
606 struct socket
*so
= tp
->t_inpcb
->inp_socket
;
607 struct sackblock
*sb
;
608 const struct sackblock
*lastblock
=
609 TAILQ_LAST(&scb
->sackblocks
, sackblock_list
);
611 long len
, off
, sendwin
;
613 /* skip SACKed data */
614 tcp_sack_skip_sacked(scb
, &tp
->rexmt_high
);
616 /* Look for lost data. */
617 torexmt
= tp
->rexmt_high
;
619 if (lastblock
!= NULL
) {
620 if (SEQ_LT(torexmt
, lastblock
->sblk_end
) &&
621 tcp_sack_islost(scb
, torexmt
)) {
623 *nextrexmt
= torexmt
;
624 /* If the left-hand edge has been SACKed, pull it in. */
625 if (sack_block_lookup(scb
, torexmt
+ tp
->t_maxseg
, &sb
))
626 *plen
= sb
->sblk_start
- torexmt
;
628 *plen
= tp
->t_maxseg
;
633 /* See if unsent data available within send window. */
634 off
= tp
->snd_max
- tp
->snd_una
;
635 sendwin
= min(tp
->snd_wnd
, tp
->snd_bwnd
);
636 len
= (long) ulmin(so
->so_snd
.ssb_cc
, sendwin
) - off
;
638 *nextrexmt
= tp
->snd_max
; /* Send new data. */
639 *plen
= tp
->t_maxseg
;
643 /* We're less certain this data has been lost. */
644 if (lastblock
!= NULL
&& SEQ_LT(torexmt
, lastblock
->sblk_end
))
647 /* Rescue retransmission */
648 if (tcp_do_rescuesack
|| tcp_do_rfc6675
) {
649 tcpstat
.tcps_sackrescue_try
++;
650 if (tp
->sack_flags
& TSACK_F_SACKRESCUED
) {
651 if (!tcp_aggressive_rescuesack
)
655 * Aggressive variant of the rescue retransmission.
657 * The idea of the rescue retransmission is to sustain
658 * the ACK clock thus to avoid timeout retransmission.
660 * Under some situations, the conservative approach
661 * suggested in the draft
662 * http://tools.ietf.org/html/
663 * draft-nishida-tcpm-rescue-retransmission-00
664 * could not sustain ACK clock, since it only allows
665 * one rescue retransmission before a cumulative ACK
666 * covers the segement transmitted by rescue
669 * We try to locate the next unSACKed segment which
670 * follows the previously sent rescue segment. If
671 * there is no such segment, we loop back to the first
672 * unacknowledged segment.
676 * Skip SACKed data, but here we follow
677 * the last transmitted rescue segment.
679 torexmt
= tp
->rexmt_rescue
;
680 tcp_sack_skip_sacked(scb
, &torexmt
);
682 if (torexmt
== tp
->snd_max
) {
683 /* Nothing left to retransmit; restart */
684 torexmt
= tp
->snd_una
;
688 } else if (tcp_do_smartsack
&& lastblock
== NULL
) {
689 tcpstat
.tcps_sackrescue_try
++;
698 * Return the next sequence number higher than "*prexmt" that has
702 tcp_sack_skip_sacked(struct scoreboard
*scb
, tcp_seq
*prexmt
)
704 struct sackblock
*sb
;
706 /* skip SACKed data */
707 if (sack_block_lookup(scb
, *prexmt
, &sb
))
708 *prexmt
= sb
->sblk_end
;
712 * The length of the first amount of unSACKed data
715 tcp_sack_first_unsacked_len(const struct tcpcb
*tp
)
717 const struct sackblock
*sb
;
719 sb
= TAILQ_FIRST(&tp
->scb
.sackblocks
);
723 KASSERT(SEQ_LT(tp
->snd_una
, sb
->sblk_start
),
724 ("invalid sb start %u, snd_una %u",
725 sb
->sblk_start
, tp
->snd_una
));
726 return (sb
->sblk_start
- tp
->snd_una
);
731 tcp_sack_save_scoreboard(struct scoreboard
*scb
)
733 struct scoreboard
*scb
= &tp
->scb
;
735 scb
->sackblocks_prev
= scb
->sackblocks
;
736 TAILQ_INIT(&scb
->sackblocks
);
740 tcp_sack_revert_scoreboard(struct scoreboard
*scb
, tcp_seq snd_una
,
743 struct sackblock
*sb
;
745 scb
->sackblocks
= scb
->sackblocks_prev
;
747 TAILQ_FOREACH(sb
, &scb
->sackblocks
, sblk_list
)
749 tcp_sack_ack_blocks(scb
, snd_una
);
750 scb
->lastfound
= NULL
;
754 #ifdef DEBUG_SACK_HISTORY
756 tcp_sack_dump_history(const char *msg
, const struct tcpcb
*tp
)
761 /* only need a couple of these to debug most problems */
765 kprintf("%s:\tnsackhistory %d: ", msg
, tp
->nsackhistory
);
766 for (i
= 0; i
< tp
->nsackhistory
; ++i
)
767 kprintf("[%u, %u) ", tp
->sackhistory
[i
].rblk_start
,
768 tp
->sackhistory
[i
].rblk_end
);
773 tcp_sack_dump_history(const char *msg
, const struct tcpcb
*tp
)
779 * Remove old SACK blocks from the SACK history that have already been ACKed.
782 tcp_sack_ack_history(struct tcpcb
*tp
)
784 int i
, nblocks
, openslot
;
786 tcp_sack_dump_history("before tcp_sack_ack_history", tp
);
787 nblocks
= tp
->nsackhistory
;
788 for (i
= openslot
= 0; i
< nblocks
; ++i
) {
789 if (SEQ_LEQ(tp
->sackhistory
[i
].rblk_end
, tp
->rcv_nxt
)) {
793 if (SEQ_LT(tp
->sackhistory
[i
].rblk_start
, tp
->rcv_nxt
))
794 tp
->sackhistory
[i
].rblk_start
= tp
->rcv_nxt
;
798 tp
->sackhistory
[openslot
++] = tp
->sackhistory
[i
];
800 tcp_sack_dump_history("after tcp_sack_ack_history", tp
);
801 KASSERT(openslot
== tp
->nsackhistory
,
802 ("tcp_sack_ack_history miscounted: %d != %d",
803 openslot
, tp
->nsackhistory
));
807 * Add or merge newblock into reported history.
808 * Also remove or update SACK blocks that will be acked.
811 tcp_sack_update_reported_history(struct tcpcb
*tp
, tcp_seq start
, tcp_seq end
)
813 struct raw_sackblock copy
[MAX_SACK_REPORT_BLOCKS
];
816 tcp_sack_dump_history("before tcp_sack_update_reported_history", tp
);
820 * 1) newblock == oldblock
821 * 2) oldblock contains newblock
822 * 3) newblock contains oldblock
823 * 4) tail of oldblock overlaps or abuts start of newblock
824 * 5) tail of newblock overlaps or abuts head of oldblock
826 for (i
= cindex
= 0; i
< tp
->nsackhistory
; ++i
) {
827 struct raw_sackblock
*oldblock
= &tp
->sackhistory
[i
];
828 tcp_seq old_start
= oldblock
->rblk_start
;
829 tcp_seq old_end
= oldblock
->rblk_end
;
831 if (SEQ_LT(end
, old_start
) || SEQ_GT(start
, old_end
)) {
832 /* Case 0: no overlap. Copy old block. */
833 copy
[cindex
++] = *oldblock
;
837 if (SEQ_GEQ(start
, old_start
) && SEQ_LEQ(end
, old_end
)) {
838 /* Cases 1 & 2. Move block to front of history. */
843 /* no need to check rest of blocks */
844 for (j
= i
+ 1; j
< tp
->nsackhistory
; ++j
)
845 copy
[cindex
++] = tp
->sackhistory
[j
];
849 if (SEQ_GEQ(old_end
, start
) && SEQ_LT(old_start
, start
)) {
850 /* Case 4: extend start of new block. */
852 } else if (SEQ_GEQ(end
, old_start
) && SEQ_GT(old_end
, end
)) {
853 /* Case 5: extend end of new block */
856 /* Case 3. Delete old block by not copying it. */
857 KASSERT(SEQ_LEQ(start
, old_start
) &&
858 SEQ_GEQ(end
, old_end
),
859 ("bad logic: old [%u, %u), new [%u, %u)",
860 old_start
, old_end
, start
, end
));
864 /* insert new block */
865 tp
->sackhistory
[0].rblk_start
= start
;
866 tp
->sackhistory
[0].rblk_end
= end
;
867 cindex
= min(cindex
, MAX_SACK_REPORT_BLOCKS
- 1);
868 for (i
= 0; i
< cindex
; ++i
)
869 tp
->sackhistory
[i
+ 1] = copy
[i
];
870 tp
->nsackhistory
= cindex
+ 1;
871 tcp_sack_dump_history("after tcp_sack_update_reported_history", tp
);
875 * Fill in SACK report to return to data sender.
878 tcp_sack_fill_report(struct tcpcb
*tp
, u_char
*opt
, u_int
*plen
)
880 u_int optlen
= *plen
;
881 uint32_t *lp
= (uint32_t *)(opt
+ optlen
);
883 tcp_seq hstart
= tp
->rcv_nxt
, hend
;
886 KASSERT(TCP_MAXOLEN
- optlen
>=
887 TCPOLEN_SACK_ALIGNED
+ TCPOLEN_SACK_BLOCK
,
888 ("no room for SACK header and one block: optlen %d", optlen
));
890 if (tp
->sack_flags
& TSACK_F_DUPSEG
)
891 tcpstat
.tcps_snddsackopt
++;
893 tcpstat
.tcps_sndsackopt
++;
896 optlen
+= TCPOLEN_SACK_ALIGNED
;
898 tcp_sack_ack_history(tp
);
899 if (tp
->reportblk
.rblk_start
!= tp
->reportblk
.rblk_end
) {
900 *lp
++ = htonl(tp
->reportblk
.rblk_start
);
901 *lp
++ = htonl(tp
->reportblk
.rblk_end
);
902 optlen
+= TCPOLEN_SACK_BLOCK
;
903 hstart
= tp
->reportblk
.rblk_start
;
904 hend
= tp
->reportblk
.rblk_end
;
905 if (tp
->sack_flags
& TSACK_F_ENCLOSESEG
) {
906 KASSERT(TCP_MAXOLEN
- optlen
>= TCPOLEN_SACK_BLOCK
,
907 ("no room for enclosing SACK block: oplen %d",
909 *lp
++ = htonl(tp
->encloseblk
.rblk_start
);
910 *lp
++ = htonl(tp
->encloseblk
.rblk_end
);
911 optlen
+= TCPOLEN_SACK_BLOCK
;
912 hstart
= tp
->encloseblk
.rblk_start
;
913 hend
= tp
->encloseblk
.rblk_end
;
915 if (SEQ_GT(hstart
, tp
->rcv_nxt
))
916 tcp_sack_update_reported_history(tp
, hstart
, hend
);
918 if (tcp_do_smartsack
&& (tp
->sack_flags
& TSACK_F_SACKLEFT
)) {
919 /* Fill in from left! Walk re-assembly queue. */
922 q
= TAILQ_FIRST(&tp
->t_segq
);
924 TCP_MAXOLEN
- optlen
>= TCPOLEN_SACK_BLOCK
) {
925 *lp
++ = htonl(q
->tqe_th
->th_seq
);
926 *lp
++ = htonl(TCP_SACK_BLKEND(
927 q
->tqe_th
->th_seq
+ q
->tqe_len
,
928 q
->tqe_th
->th_flags
));
929 optlen
+= TCPOLEN_SACK_BLOCK
;
930 q
= TAILQ_NEXT(q
, tqe_q
);
935 /* Fill in SACK blocks from right side. */
936 while (n
< tp
->nsackhistory
&&
937 TCP_MAXOLEN
- optlen
>= TCPOLEN_SACK_BLOCK
) {
938 if (tp
->sackhistory
[n
].rblk_start
!= hstart
) {
939 *lp
++ = htonl(tp
->sackhistory
[n
].rblk_start
);
940 *lp
++ = htonl(tp
->sackhistory
[n
].rblk_end
);
941 optlen
+= TCPOLEN_SACK_BLOCK
;
946 tp
->reportblk
.rblk_start
= tp
->reportblk
.rblk_end
;
948 ~(TSACK_F_DUPSEG
| TSACK_F_ENCLOSESEG
| TSACK_F_SACKLEFT
);
949 nblocks
= (lp
- olp
- 1) / 2;
950 *olp
= htonl(TCPOPT_SACK_ALIGNED
|
951 (TCPOLEN_SACK
+ nblocks
* TCPOLEN_SACK_BLOCK
));