2 * Copyright (c) 2005, 2006 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
4 * Changes to meet Linux coding standards, and DCCP infrastructure fixes.
6 * Copyright (c) 2006 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 * This implementation should follow RFC 4341
26 #include <linux/slab.h>
33 #ifdef CONFIG_IP_DCCP_CCID2_DEBUG
34 static int ccid2_debug
;
35 #define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a)
37 static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock
*hc
)
41 struct ccid2_seq
*seqp
= hc
->tx_seqh
;
43 /* there is data in the chain */
44 if (seqp
!= hc
->tx_seqt
) {
45 seqp
= seqp
->ccid2s_prev
;
47 if (!seqp
->ccid2s_acked
)
50 while (seqp
!= hc
->tx_seqt
) {
51 struct ccid2_seq
*prev
= seqp
->ccid2s_prev
;
54 if (!prev
->ccid2s_acked
)
57 /* packets are sent sequentially */
58 BUG_ON(dccp_delta_seqno(seqp
->ccid2s_seq
,
59 prev
->ccid2s_seq
) >= 0);
60 BUG_ON(time_before(seqp
->ccid2s_sent
,
67 BUG_ON(pipe
!= hc
->tx_pipe
);
68 ccid2_pr_debug("len of chain=%d\n", len
);
71 seqp
= seqp
->ccid2s_prev
;
73 } while (seqp
!= hc
->tx_seqh
);
75 ccid2_pr_debug("total len=%d\n", len
);
76 BUG_ON(len
!= hc
->tx_seqbufc
* CCID2_SEQBUF_LEN
);
79 #define ccid2_pr_debug(format, a...)
80 #define ccid2_hc_tx_check_sanity(hc)
83 static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock
*hc
)
85 struct ccid2_seq
*seqp
;
88 /* check if we have space to preserve the pointer to the buffer */
89 if (hc
->tx_seqbufc
>= (sizeof(hc
->tx_seqbuf
) /
90 sizeof(struct ccid2_seq
*)))
93 /* allocate buffer and initialize linked list */
94 seqp
= kmalloc(CCID2_SEQBUF_LEN
* sizeof(struct ccid2_seq
), gfp_any());
98 for (i
= 0; i
< (CCID2_SEQBUF_LEN
- 1); i
++) {
99 seqp
[i
].ccid2s_next
= &seqp
[i
+ 1];
100 seqp
[i
+ 1].ccid2s_prev
= &seqp
[i
];
102 seqp
[CCID2_SEQBUF_LEN
- 1].ccid2s_next
= seqp
;
103 seqp
->ccid2s_prev
= &seqp
[CCID2_SEQBUF_LEN
- 1];
105 /* This is the first allocation. Initiate the head and tail. */
106 if (hc
->tx_seqbufc
== 0)
107 hc
->tx_seqh
= hc
->tx_seqt
= seqp
;
109 /* link the existing list with the one we just created */
110 hc
->tx_seqh
->ccid2s_next
= seqp
;
111 seqp
->ccid2s_prev
= hc
->tx_seqh
;
113 hc
->tx_seqt
->ccid2s_prev
= &seqp
[CCID2_SEQBUF_LEN
- 1];
114 seqp
[CCID2_SEQBUF_LEN
- 1].ccid2s_next
= hc
->tx_seqt
;
117 /* store the original pointer to the buffer so we can free it */
118 hc
->tx_seqbuf
[hc
->tx_seqbufc
] = seqp
;
124 static int ccid2_hc_tx_send_packet(struct sock
*sk
, struct sk_buff
*skb
)
126 struct ccid2_hc_tx_sock
*hc
= ccid2_hc_tx_sk(sk
);
128 if (hc
->tx_pipe
< hc
->tx_cwnd
)
134 static void ccid2_change_l_ack_ratio(struct sock
*sk
, u32 val
)
136 struct dccp_sock
*dp
= dccp_sk(sk
);
137 u32 max_ratio
= DIV_ROUND_UP(ccid2_hc_tx_sk(sk
)->tx_cwnd
, 2);
140 * Ensure that Ack Ratio does not exceed ceil(cwnd/2), which is (2) from
141 * RFC 4341, 6.1.2. We ignore the statement that Ack Ratio 2 is always
142 * acceptable since this causes starvation/deadlock whenever cwnd < 2.
143 * The same problem arises when Ack Ratio is 0 (ie. Ack Ratio disabled).
145 if (val
== 0 || val
> max_ratio
) {
146 DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val
, max_ratio
);
149 if (val
> DCCPF_ACK_RATIO_MAX
)
150 val
= DCCPF_ACK_RATIO_MAX
;
152 if (val
== dp
->dccps_l_ack_ratio
)
155 ccid2_pr_debug("changing local ack ratio to %u\n", val
);
156 dp
->dccps_l_ack_ratio
= val
;
159 static void ccid2_change_srtt(struct ccid2_hc_tx_sock
*hc
, long val
)
161 ccid2_pr_debug("change SRTT to %ld\n", val
);
165 static void ccid2_start_rto_timer(struct sock
*sk
);
167 static void ccid2_hc_tx_rto_expire(unsigned long data
)
169 struct sock
*sk
= (struct sock
*)data
;
170 struct ccid2_hc_tx_sock
*hc
= ccid2_hc_tx_sk(sk
);
174 if (sock_owned_by_user(sk
)) {
175 sk_reset_timer(sk
, &hc
->tx_rtotimer
, jiffies
+ HZ
/ 5);
179 ccid2_pr_debug("RTO_EXPIRE\n");
181 ccid2_hc_tx_check_sanity(hc
);
188 hc
->tx_rto
= 60 * HZ
;
190 ccid2_start_rto_timer(sk
);
192 /* adjust pipe, cwnd etc */
193 hc
->tx_ssthresh
= hc
->tx_cwnd
/ 2;
194 if (hc
->tx_ssthresh
< 2)
199 /* clear state about stuff we sent */
200 hc
->tx_seqt
= hc
->tx_seqh
;
201 hc
->tx_packets_acked
= 0;
203 /* clear ack ratio state. */
205 hc
->tx_rpdupack
= -1;
206 ccid2_change_l_ack_ratio(sk
, 1);
207 ccid2_hc_tx_check_sanity(hc
);
213 static void ccid2_start_rto_timer(struct sock
*sk
)
215 struct ccid2_hc_tx_sock
*hc
= ccid2_hc_tx_sk(sk
);
217 ccid2_pr_debug("setting RTO timeout=%ld\n", hc
->tx_rto
);
219 BUG_ON(timer_pending(&hc
->tx_rtotimer
));
220 sk_reset_timer(sk
, &hc
->tx_rtotimer
, jiffies
+ hc
->tx_rto
);
223 static void ccid2_hc_tx_packet_sent(struct sock
*sk
, int more
, unsigned int len
)
225 struct dccp_sock
*dp
= dccp_sk(sk
);
226 struct ccid2_hc_tx_sock
*hc
= ccid2_hc_tx_sk(sk
);
227 struct ccid2_seq
*next
;
231 hc
->tx_seqh
->ccid2s_seq
= dp
->dccps_gss
;
232 hc
->tx_seqh
->ccid2s_acked
= 0;
233 hc
->tx_seqh
->ccid2s_sent
= jiffies
;
235 next
= hc
->tx_seqh
->ccid2s_next
;
236 /* check if we need to alloc more space */
237 if (next
== hc
->tx_seqt
) {
238 if (ccid2_hc_tx_alloc_seq(hc
)) {
239 DCCP_CRIT("packet history - out of memory!");
242 next
= hc
->tx_seqh
->ccid2s_next
;
243 BUG_ON(next
== hc
->tx_seqt
);
247 ccid2_pr_debug("cwnd=%d pipe=%d\n", hc
->tx_cwnd
, hc
->tx_pipe
);
250 /* setup RTO timer */
251 if (!timer_pending(&hc
->tx_rtotimer
))
252 ccid2_start_rto_timer(sk
);
254 #ifdef CONFIG_IP_DCCP_CCID2_DEBUG
256 struct ccid2_seq
*seqp
= hc
->tx_seqt
;
258 while (seqp
!= hc
->tx_seqh
) {
259 ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n",
260 (unsigned long long)seqp
->ccid2s_seq
,
261 seqp
->ccid2s_acked
, seqp
->ccid2s_sent
);
262 seqp
= seqp
->ccid2s_next
;
265 ccid2_pr_debug("=========\n");
266 ccid2_hc_tx_check_sanity(hc
);
270 static int ccid2_ackvector(struct sock
*sk
, struct sk_buff
*skb
, int offset
,
271 unsigned char **vec
, unsigned char *veclen
)
273 const struct dccp_hdr
*dh
= dccp_hdr(skb
);
274 unsigned char *options
= (unsigned char *)dh
+ dccp_hdr_len(skb
);
275 unsigned char *opt_ptr
;
276 const unsigned char *opt_end
= (unsigned char *)dh
+
277 (dh
->dccph_doff
* 4);
278 unsigned char opt
, len
;
279 unsigned char *value
;
284 if (opt_ptr
>= opt_end
)
287 while (opt_ptr
!= opt_end
) {
292 /* Check if this isn't a single byte option */
293 if (opt
> DCCPO_MAX_RESERVED
) {
294 if (opt_ptr
== opt_end
)
295 goto out_invalid_option
;
299 goto out_invalid_option
;
301 * Remove the type and len fields, leaving
302 * just the value size
308 if (opt_ptr
> opt_end
)
309 goto out_invalid_option
;
313 case DCCPO_ACK_VECTOR_0
:
314 case DCCPO_ACK_VECTOR_1
:
317 return offset
+ (opt_ptr
- options
);
324 DCCP_BUG("Invalid option - this should not happen (previous parsing)!");
328 static void ccid2_hc_tx_kill_rto_timer(struct sock
*sk
)
330 struct ccid2_hc_tx_sock
*hc
= ccid2_hc_tx_sk(sk
);
332 sk_stop_timer(sk
, &hc
->tx_rtotimer
);
333 ccid2_pr_debug("deleted RTO timer\n");
336 static inline void ccid2_new_ack(struct sock
*sk
,
337 struct ccid2_seq
*seqp
,
338 unsigned int *maxincr
)
340 struct ccid2_hc_tx_sock
*hc
= ccid2_hc_tx_sk(sk
);
342 if (hc
->tx_cwnd
< hc
->tx_ssthresh
) {
343 if (*maxincr
> 0 && ++hc
->tx_packets_acked
== 2) {
346 hc
->tx_packets_acked
= 0;
348 } else if (++hc
->tx_packets_acked
>= hc
->tx_cwnd
) {
350 hc
->tx_packets_acked
= 0;
354 if (hc
->tx_srtt
== -1 ||
355 time_after(jiffies
, hc
->tx_lastrtt
+ hc
->tx_srtt
)) {
356 unsigned long r
= (long)jiffies
- (long)seqp
->ccid2s_sent
;
359 /* first measurement */
360 if (hc
->tx_srtt
== -1) {
361 ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n",
363 (unsigned long long)seqp
->ccid2s_seq
);
364 ccid2_change_srtt(hc
, r
);
365 hc
->tx_rttvar
= r
>> 1;
368 long tmp
= hc
->tx_srtt
- r
;
377 hc
->tx_rttvar
+= tmp
;
385 ccid2_change_srtt(hc
, srtt
);
387 s
= hc
->tx_rttvar
<< 2;
388 /* clock granularity is 1 when based on jiffies */
391 hc
->tx_rto
= hc
->tx_srtt
+ s
;
393 /* must be at least a second */
395 /* DCCP doesn't require this [but I like it cuz my code sux] */
400 hc
->tx_rto
= HZ
* 60;
402 hc
->tx_lastrtt
= jiffies
;
404 ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n",
405 hc
->tx_srtt
, hc
->tx_rttvar
,
409 /* we got a new ack, so re-start RTO timer */
410 ccid2_hc_tx_kill_rto_timer(sk
);
411 ccid2_start_rto_timer(sk
);
414 static void ccid2_hc_tx_dec_pipe(struct sock
*sk
)
416 struct ccid2_hc_tx_sock
*hc
= ccid2_hc_tx_sk(sk
);
418 if (hc
->tx_pipe
== 0)
419 DCCP_BUG("pipe == 0");
423 if (hc
->tx_pipe
== 0)
424 ccid2_hc_tx_kill_rto_timer(sk
);
427 static void ccid2_congestion_event(struct sock
*sk
, struct ccid2_seq
*seqp
)
429 struct ccid2_hc_tx_sock
*hc
= ccid2_hc_tx_sk(sk
);
431 if (time_before(seqp
->ccid2s_sent
, hc
->tx_last_cong
)) {
432 ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
436 hc
->tx_last_cong
= jiffies
;
438 hc
->tx_cwnd
= hc
->tx_cwnd
/ 2 ? : 1U;
439 hc
->tx_ssthresh
= max(hc
->tx_cwnd
, 2U);
441 /* Avoid spurious timeouts resulting from Ack Ratio > cwnd */
442 if (dccp_sk(sk
)->dccps_l_ack_ratio
> hc
->tx_cwnd
)
443 ccid2_change_l_ack_ratio(sk
, hc
->tx_cwnd
);
446 static void ccid2_hc_tx_packet_recv(struct sock
*sk
, struct sk_buff
*skb
)
448 struct dccp_sock
*dp
= dccp_sk(sk
);
449 struct ccid2_hc_tx_sock
*hc
= ccid2_hc_tx_sk(sk
);
451 struct ccid2_seq
*seqp
;
452 unsigned char *vector
;
453 unsigned char veclen
;
456 unsigned int maxincr
= 0;
458 ccid2_hc_tx_check_sanity(hc
);
459 /* check reverse path congestion */
460 seqno
= DCCP_SKB_CB(skb
)->dccpd_seq
;
462 /* need to bootstrap */
463 if (hc
->tx_rpdupack
== -1) {
465 hc
->tx_rpseq
= seqno
;
467 /* check if packet is consecutive */
468 if (dccp_delta_seqno(hc
->tx_rpseq
, seqno
) == 1)
469 hc
->tx_rpseq
= seqno
;
470 /* it's a later packet */
471 else if (after48(seqno
, hc
->tx_rpseq
)) {
474 /* check if we got enough dupacks */
475 if (hc
->tx_rpdupack
>= NUMDUPACK
) {
476 hc
->tx_rpdupack
= -1;
479 ccid2_change_l_ack_ratio(sk
, 2 * dp
->dccps_l_ack_ratio
);
484 /* check forward path congestion */
485 /* still didn't send out new data packets */
486 if (hc
->tx_seqh
== hc
->tx_seqt
)
489 switch (DCCP_SKB_CB(skb
)->dccpd_type
) {
491 case DCCP_PKT_DATAACK
:
497 ackno
= DCCP_SKB_CB(skb
)->dccpd_ack_seq
;
498 if (after48(ackno
, hc
->tx_high_ack
))
499 hc
->tx_high_ack
= ackno
;
502 while (before48(seqp
->ccid2s_seq
, ackno
)) {
503 seqp
= seqp
->ccid2s_next
;
504 if (seqp
== hc
->tx_seqh
) {
505 seqp
= hc
->tx_seqh
->ccid2s_prev
;
511 * In slow-start, cwnd can increase up to a maximum of Ack Ratio/2
512 * packets per acknowledgement. Rounding up avoids that cwnd is not
513 * advanced when Ack Ratio is 1 and gives a slight edge otherwise.
515 if (hc
->tx_cwnd
< hc
->tx_ssthresh
)
516 maxincr
= DIV_ROUND_UP(dp
->dccps_l_ack_ratio
, 2);
518 /* go through all ack vectors */
519 while ((offset
= ccid2_ackvector(sk
, skb
, offset
,
520 &vector
, &veclen
)) != -1) {
521 /* go through this ack vector */
523 const u8 rl
= *vector
& DCCP_ACKVEC_LEN_MASK
;
524 u64 ackno_end_rl
= SUB48(ackno
, rl
);
526 ccid2_pr_debug("ackvec start:%llu end:%llu\n",
527 (unsigned long long)ackno
,
528 (unsigned long long)ackno_end_rl
);
529 /* if the seqno we are analyzing is larger than the
530 * current ackno, then move towards the tail of our
533 while (after48(seqp
->ccid2s_seq
, ackno
)) {
534 if (seqp
== hc
->tx_seqt
) {
538 seqp
= seqp
->ccid2s_prev
;
543 /* check all seqnos in the range of the vector
546 while (between48(seqp
->ccid2s_seq
,ackno_end_rl
,ackno
)) {
547 const u8 state
= *vector
&
548 DCCP_ACKVEC_STATE_MASK
;
550 /* new packet received or marked */
551 if (state
!= DCCP_ACKVEC_STATE_NOT_RECEIVED
&&
552 !seqp
->ccid2s_acked
) {
554 DCCP_ACKVEC_STATE_ECN_MARKED
) {
555 ccid2_congestion_event(sk
,
558 ccid2_new_ack(sk
, seqp
,
561 seqp
->ccid2s_acked
= 1;
562 ccid2_pr_debug("Got ack for %llu\n",
563 (unsigned long long)seqp
->ccid2s_seq
);
564 ccid2_hc_tx_dec_pipe(sk
);
566 if (seqp
== hc
->tx_seqt
) {
570 seqp
= seqp
->ccid2s_prev
;
575 ackno
= SUB48(ackno_end_rl
, 1);
582 /* The state about what is acked should be correct now
583 * Check for NUMDUPACK
586 while (before48(seqp
->ccid2s_seq
, hc
->tx_high_ack
)) {
587 seqp
= seqp
->ccid2s_next
;
588 if (seqp
== hc
->tx_seqh
) {
589 seqp
= hc
->tx_seqh
->ccid2s_prev
;
595 if (seqp
->ccid2s_acked
) {
597 if (done
== NUMDUPACK
)
600 if (seqp
== hc
->tx_seqt
)
602 seqp
= seqp
->ccid2s_prev
;
605 /* If there are at least 3 acknowledgements, anything unacknowledged
606 * below the last sequence number is considered lost
608 if (done
== NUMDUPACK
) {
609 struct ccid2_seq
*last_acked
= seqp
;
611 /* check for lost packets */
613 if (!seqp
->ccid2s_acked
) {
614 ccid2_pr_debug("Packet lost: %llu\n",
615 (unsigned long long)seqp
->ccid2s_seq
);
616 ccid2_congestion_event(sk
, seqp
);
617 ccid2_hc_tx_dec_pipe(sk
);
619 if (seqp
== hc
->tx_seqt
)
621 seqp
= seqp
->ccid2s_prev
;
624 hc
->tx_seqt
= last_acked
;
627 /* trim acked packets in tail */
628 while (hc
->tx_seqt
!= hc
->tx_seqh
) {
629 if (!hc
->tx_seqt
->ccid2s_acked
)
632 hc
->tx_seqt
= hc
->tx_seqt
->ccid2s_next
;
635 ccid2_hc_tx_check_sanity(hc
);
638 static int ccid2_hc_tx_init(struct ccid
*ccid
, struct sock
*sk
)
640 struct ccid2_hc_tx_sock
*hc
= ccid_priv(ccid
);
641 struct dccp_sock
*dp
= dccp_sk(sk
);
644 /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
645 hc
->tx_ssthresh
= ~0U;
648 * RFC 4341, 5: "The cwnd parameter is initialized to at most four
649 * packets for new connections, following the rules from [RFC3390]".
650 * We need to convert the bytes of RFC3390 into the packets of RFC 4341.
652 hc
->tx_cwnd
= clamp(4380U / dp
->dccps_mss_cache
, 2U, 4U);
654 /* Make sure that Ack Ratio is enabled and within bounds. */
655 max_ratio
= DIV_ROUND_UP(hc
->tx_cwnd
, 2);
656 if (dp
->dccps_l_ack_ratio
== 0 || dp
->dccps_l_ack_ratio
> max_ratio
)
657 dp
->dccps_l_ack_ratio
= max_ratio
;
659 if (ccid2_hc_tx_alloc_seq(hc
))
663 ccid2_change_srtt(hc
, -1);
665 hc
->tx_rpdupack
= -1;
666 hc
->tx_last_cong
= jiffies
;
667 setup_timer(&hc
->tx_rtotimer
, ccid2_hc_tx_rto_expire
,
670 ccid2_hc_tx_check_sanity(hc
);
674 static void ccid2_hc_tx_exit(struct sock
*sk
)
676 struct ccid2_hc_tx_sock
*hc
= ccid2_hc_tx_sk(sk
);
679 ccid2_hc_tx_kill_rto_timer(sk
);
681 for (i
= 0; i
< hc
->tx_seqbufc
; i
++)
682 kfree(hc
->tx_seqbuf
[i
]);
686 static void ccid2_hc_rx_packet_recv(struct sock
*sk
, struct sk_buff
*skb
)
688 const struct dccp_sock
*dp
= dccp_sk(sk
);
689 struct ccid2_hc_rx_sock
*hc
= ccid2_hc_rx_sk(sk
);
691 switch (DCCP_SKB_CB(skb
)->dccpd_type
) {
693 case DCCP_PKT_DATAACK
:
695 if (hc
->rx_data
>= dp
->dccps_r_ack_ratio
) {
703 struct ccid_operations ccid2_ops
= {
704 .ccid_id
= DCCPC_CCID2
,
705 .ccid_name
= "TCP-like",
706 .ccid_hc_tx_obj_size
= sizeof(struct ccid2_hc_tx_sock
),
707 .ccid_hc_tx_init
= ccid2_hc_tx_init
,
708 .ccid_hc_tx_exit
= ccid2_hc_tx_exit
,
709 .ccid_hc_tx_send_packet
= ccid2_hc_tx_send_packet
,
710 .ccid_hc_tx_packet_sent
= ccid2_hc_tx_packet_sent
,
711 .ccid_hc_tx_packet_recv
= ccid2_hc_tx_packet_recv
,
712 .ccid_hc_rx_obj_size
= sizeof(struct ccid2_hc_rx_sock
),
713 .ccid_hc_rx_packet_recv
= ccid2_hc_rx_packet_recv
,
716 #ifdef CONFIG_IP_DCCP_CCID2_DEBUG
717 module_param(ccid2_debug
, bool, 0644);
718 MODULE_PARM_DESC(ccid2_debug
, "Enable CCID-2 debug messages");