net/dccp/ccids/ccid2.c

   1 /*
   2  *  Copyright (c) 2005, 2006 Andrea Bittau <a.bittau@cs.ucl.ac.uk>
   3  *
   4  *  Changes to meet Linux coding standards, and DCCP infrastructure fixes.
   5  *
   6  *  Copyright (c) 2006 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
   7  *
   8  *  This program is free software; you can redistribute it and/or modify
   9  *  it under the terms of the GNU General Public License as published by
  10  *  the Free Software Foundation; either version 2 of the License, or
  11  *  (at your option) any later version.
  12  *
  13  *  This program is distributed in the hope that it will be useful,
  14  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  *  GNU General Public License for more details.
  17  *
  18  *  You should have received a copy of the GNU General Public License
  19  *  along with this program; if not, write to the Free Software
  20  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21  */
  22
  23 /*
  24  * This implementation should follow RFC 4341
  25  */
  26 #include <linux/slab.h>
  27 #include "../feat.h"
  28 #include "ccid2.h"
  29
  30
  31 #ifdef CONFIG_IP_DCCP_CCID2_DEBUG
  32 static int ccid2_debug;
  33 #define ccid2_pr_debug(format, a...)    DCCP_PR_DEBUG(ccid2_debug, format, ##a)
  34 #else
  35 #define ccid2_pr_debug(format, a...)
  36 #endif
  37
  38 static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc)
  39 {
  40         struct ccid2_seq *seqp;
  41         int i;
  42
  43         /* check if we have space to preserve the pointer to the buffer */
  44         if (hc->tx_seqbufc >= (sizeof(hc->tx_seqbuf) /
  45                                sizeof(struct ccid2_seq *)))
  46                 return -ENOMEM;
  47
  48         /* allocate buffer and initialize linked list */
  49         seqp = kmalloc(CCID2_SEQBUF_LEN * sizeof(struct ccid2_seq), gfp_any());
  50         if (seqp == NULL)
  51                 return -ENOMEM;
  52
  53         for (i = 0; i < (CCID2_SEQBUF_LEN - 1); i++) {
  54                 seqp[i].ccid2s_next = &seqp[i + 1];
  55                 seqp[i + 1].ccid2s_prev = &seqp[i];
  56         }
  57         seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = seqp;
  58         seqp->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
  59
  60         /* This is the first allocation.  Initiate the head and tail.  */
  61         if (hc->tx_seqbufc == 0)
  62                 hc->tx_seqh = hc->tx_seqt = seqp;
  63         else {
  64                 /* link the existing list with the one we just created */
  65                 hc->tx_seqh->ccid2s_next = seqp;
  66                 seqp->ccid2s_prev = hc->tx_seqh;
  67
  68                 hc->tx_seqt->ccid2s_prev = &seqp[CCID2_SEQBUF_LEN - 1];
  69                 seqp[CCID2_SEQBUF_LEN - 1].ccid2s_next = hc->tx_seqt;
  70         }
  71
  72         /* store the original pointer to the buffer so we can free it */
  73         hc->tx_seqbuf[hc->tx_seqbufc] = seqp;
  74         hc->tx_seqbufc++;
  75
  76         return 0;
  77 }
  78
  79 static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
  80 {
  81         if (ccid2_cwnd_network_limited(ccid2_hc_tx_sk(sk)))
  82                 return CCID_PACKET_WILL_DEQUEUE_LATER;
  83         return CCID_PACKET_SEND_AT_ONCE;
  84 }
  85
  86 static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
  87 {
  88         struct dccp_sock *dp = dccp_sk(sk);
  89         u32 max_ratio = DIV_ROUND_UP(ccid2_hc_tx_sk(sk)->tx_cwnd, 2);
  90
  91         /*
  92          * Ensure that Ack Ratio does not exceed ceil(cwnd/2), which is (2) from
  93          * RFC 4341, 6.1.2. We ignore the statement that Ack Ratio 2 is always
  94          * acceptable since this causes starvation/deadlock whenever cwnd < 2.
  95          * The same problem arises when Ack Ratio is 0 (ie. Ack Ratio disabled).
  96          */
  97         if (val == 0 || val > max_ratio) {
  98                 DCCP_WARN("Limiting Ack Ratio (%u) to %u\n", val, max_ratio);
  99                 val = max_ratio;
 100         }
 101         if (val > DCCPF_ACK_RATIO_MAX)
 102                 val = DCCPF_ACK_RATIO_MAX;
 103
 104         if (val == dp->dccps_l_ack_ratio)
 105                 return;
 106
 107         ccid2_pr_debug("changing local ack ratio to %u\n", val);
 108         dp->dccps_l_ack_ratio = val;
 109 }
 110
 111 static void ccid2_hc_tx_rto_expire(unsigned long data)
 112 {
 113         struct sock *sk = (struct sock *)data;
 114         struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
 115         const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
 116
 117         bh_lock_sock(sk);
 118         if (sock_owned_by_user(sk)) {
 119                 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + HZ / 5);
 120                 goto out;
 121         }
 122
 123         ccid2_pr_debug("RTO_EXPIRE\n");
 124
 125         /* back-off timer */
 126         hc->tx_rto <<= 1;
 127         if (hc->tx_rto > DCCP_RTO_MAX)
 128                 hc->tx_rto = DCCP_RTO_MAX;
 129
 130         /* adjust pipe, cwnd etc */
 131         hc->tx_ssthresh = hc->tx_cwnd / 2;
 132         if (hc->tx_ssthresh < 2)
 133                 hc->tx_ssthresh = 2;
 134         hc->tx_cwnd     = 1;
 135         hc->tx_pipe     = 0;
 136
 137         /* clear state about stuff we sent */
 138         hc->tx_seqt = hc->tx_seqh;
 139         hc->tx_packets_acked = 0;
 140
 141         /* clear ack ratio state. */
 142         hc->tx_rpseq    = 0;
 143         hc->tx_rpdupack = -1;
 144         ccid2_change_l_ack_ratio(sk, 1);
 145
 146         /* if we were blocked before, we may now send cwnd=1 packet */
 147         if (sender_was_blocked)
 148                 tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
 149         /* restart backed-off timer */
 150         sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
 151 out:
 152         bh_unlock_sock(sk);
 153         sock_put(sk);
 154 }
 155
 156 /*
 157  *      Congestion window validation (RFC 2861).
 158  */
 159 static int ccid2_do_cwv = 1;
 160 module_param(ccid2_do_cwv, bool, 0644);
 161 MODULE_PARM_DESC(ccid2_do_cwv, "Perform RFC2861 Congestion Window Validation");
 162
 163 /**
 164  * ccid2_update_used_window  -  Track how much of cwnd is actually used
 165  * This is done in addition to CWV. The sender needs to have an idea of how many
 166  * packets may be in flight, to set the local Sequence Window value accordingly
 167  * (RFC 4340, 7.5.2). The CWV mechanism is exploited to keep track of the
 168  * maximum-used window. We use an EWMA low-pass filter to filter out noise.
 169  */
 170 static void ccid2_update_used_window(struct ccid2_hc_tx_sock *hc, u32 new_wnd)
 171 {
 172         hc->tx_expected_wnd = (3 * hc->tx_expected_wnd + new_wnd) / 4;
 173 }
 174
 175 /* This borrows the code of tcp_cwnd_application_limited() */
 176 static void ccid2_cwnd_application_limited(struct sock *sk, const u32 now)
 177 {
 178         struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
 179         /* don't reduce cwnd below the initial window (IW) */
 180         u32 init_win = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache),
 181             win_used = max(hc->tx_cwnd_used, init_win);
 182
 183         if (win_used < hc->tx_cwnd) {
 184                 hc->tx_ssthresh = max(hc->tx_ssthresh,
 185                                      (hc->tx_cwnd >> 1) + (hc->tx_cwnd >> 2));
 186                 hc->tx_cwnd = (hc->tx_cwnd + win_used) >> 1;
 187         }
 188         hc->tx_cwnd_used  = 0;
 189         hc->tx_cwnd_stamp = now;
 190 }
 191
 192 /* This borrows the code of tcp_cwnd_restart() */
 193 static void ccid2_cwnd_restart(struct sock *sk, const u32 now)
 194 {
 195         struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
 196         u32 cwnd = hc->tx_cwnd, restart_cwnd,
 197             iwnd = rfc3390_bytes_to_packets(dccp_sk(sk)->dccps_mss_cache);
 198
 199         hc->tx_ssthresh = max(hc->tx_ssthresh, (cwnd >> 1) + (cwnd >> 2));
 200
 201         /* don't reduce cwnd below the initial window (IW) */
 202         restart_cwnd = min(cwnd, iwnd);
 203         cwnd >>= (now - hc->tx_lsndtime) / hc->tx_rto;
 204         hc->tx_cwnd = max(cwnd, restart_cwnd);
 205
 206         hc->tx_cwnd_stamp = now;
 207         hc->tx_cwnd_used  = 0;
 208 }
 209
 210 static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
 211 {
 212         struct dccp_sock *dp = dccp_sk(sk);
 213         struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
 214         const u32 now = ccid2_time_stamp;
 215         struct ccid2_seq *next;
 216
 217         /* slow-start after idle periods (RFC 2581, RFC 2861) */
 218         if (ccid2_do_cwv && !hc->tx_pipe &&
 219             (s32)(now - hc->tx_lsndtime) >= hc->tx_rto)
 220                 ccid2_cwnd_restart(sk, now);
 221
 222         hc->tx_lsndtime = now;
 223         hc->tx_pipe    += 1;
 224
 225         /* see whether cwnd was fully used (RFC 2861), update expected window */
 226         if (ccid2_cwnd_network_limited(hc)) {
 227                 ccid2_update_used_window(hc, hc->tx_cwnd);
 228                 hc->tx_cwnd_used  = 0;
 229                 hc->tx_cwnd_stamp = now;
 230         } else {
 231                 if (hc->tx_pipe > hc->tx_cwnd_used)
 232                         hc->tx_cwnd_used = hc->tx_pipe;
 233
 234                 ccid2_update_used_window(hc, hc->tx_cwnd_used);
 235
 236                 if (ccid2_do_cwv && (s32)(now - hc->tx_cwnd_stamp) >= hc->tx_rto)
 237                         ccid2_cwnd_application_limited(sk, now);
 238         }
 239
 240         hc->tx_seqh->ccid2s_seq   = dp->dccps_gss;
 241         hc->tx_seqh->ccid2s_acked = 0;
 242         hc->tx_seqh->ccid2s_sent  = now;
 243
 244         next = hc->tx_seqh->ccid2s_next;
 245         /* check if we need to alloc more space */
 246         if (next == hc->tx_seqt) {
 247                 if (ccid2_hc_tx_alloc_seq(hc)) {
 248                         DCCP_CRIT("packet history - out of memory!");
 249                         /* FIXME: find a more graceful way to bail out */
 250                         return;
 251                 }
 252                 next = hc->tx_seqh->ccid2s_next;
 253                 BUG_ON(next == hc->tx_seqt);
 254         }
 255         hc->tx_seqh = next;
 256
 257         ccid2_pr_debug("cwnd=%d pipe=%d\n", hc->tx_cwnd, hc->tx_pipe);
 258
 259         /*
 260          * FIXME: The code below is broken and the variables have been removed
 261          * from the socket struct. The `ackloss' variable was always set to 0,
 262          * and with arsent there are several problems:
 263          *  (i) it doesn't just count the number of Acks, but all sent packets;
 264          *  (ii) it is expressed in # of packets, not # of windows, so the
 265          *  comparison below uses the wrong formula: Appendix A of RFC 4341
 266          *  comes up with the number K = cwnd / (R^2 - R) of consecutive windows
 267          *  of data with no lost or marked Ack packets. If arsent were the # of
 268          *  consecutive Acks received without loss, then Ack Ratio needs to be
 269          *  decreased by 1 when
 270          *            arsent >=  K * cwnd / R  =  cwnd^2 / (R^3 - R^2)
 271          *  where cwnd / R is the number of Acks received per window of data
 272          *  (cf. RFC 4341, App. A). The problems are that
 273          *  - arsent counts other packets as well;
 274          *  - the comparison uses a formula different from RFC 4341;
 275          *  - computing a cubic/quadratic equation each time is too complicated.
 276          *  Hence a different algorithm is needed.
 277          */
 278 #if 0
 279         /* Ack Ratio.  Need to maintain a concept of how many windows we sent */
 280         hc->tx_arsent++;
 281         /* We had an ack loss in this window... */
 282         if (hc->tx_ackloss) {
 283                 if (hc->tx_arsent >= hc->tx_cwnd) {
 284                         hc->tx_arsent  = 0;
 285                         hc->tx_ackloss = 0;
 286                 }
 287         } else {
 288                 /* No acks lost up to now... */
 289                 /* decrease ack ratio if enough packets were sent */
 290                 if (dp->dccps_l_ack_ratio > 1) {
 291                         /* XXX don't calculate denominator each time */
 292                         int denom = dp->dccps_l_ack_ratio * dp->dccps_l_ack_ratio -
 293                                     dp->dccps_l_ack_ratio;
 294
 295                         denom = hc->tx_cwnd * hc->tx_cwnd / denom;
 296
 297                         if (hc->tx_arsent >= denom) {
 298                                 ccid2_change_l_ack_ratio(sk, dp->dccps_l_ack_ratio - 1);
 299                                 hc->tx_arsent = 0;
 300                         }
 301                 } else {
 302                         /* we can't increase ack ratio further [1] */
 303                         hc->tx_arsent = 0; /* or maybe set it to cwnd*/
 304                 }
 305         }
 306 #endif
 307
 308         sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
 309
 310 #ifdef CONFIG_IP_DCCP_CCID2_DEBUG
 311         do {
 312                 struct ccid2_seq *seqp = hc->tx_seqt;
 313
 314                 while (seqp != hc->tx_seqh) {
 315                         ccid2_pr_debug("out seq=%llu acked=%d time=%u\n",
 316                                        (unsigned long long)seqp->ccid2s_seq,
 317                                        seqp->ccid2s_acked, seqp->ccid2s_sent);
 318                         seqp = seqp->ccid2s_next;
 319                 }
 320         } while (0);
 321         ccid2_pr_debug("=========\n");
 322 #endif
 323 }
 324
 325 /**
 326  * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
 327  * This code is almost identical with TCP's tcp_rtt_estimator(), since
 328  * - it has a higher sampling frequency (recommended by RFC 1323),
 329  * - the RTO does not collapse into RTT due to RTTVAR going towards zero,
 330  * - it is simple (cf. more complex proposals such as Eifel timer or research
 331  *   which suggests that the gain should be set according to window size),
 332  * - in tests it was found to work well with CCID2 [gerrit].
 333  */
 334 static void ccid2_rtt_estimator(struct sock *sk, const long mrtt)
 335 {
 336         struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
 337         long m = mrtt ? : 1;
 338
 339         if (hc->tx_srtt == 0) {
 340                 /* First measurement m */
 341                 hc->tx_srtt = m << 3;
 342                 hc->tx_mdev = m << 1;
 343
 344                 hc->tx_mdev_max = max(hc->tx_mdev, tcp_rto_min(sk));
 345                 hc->tx_rttvar   = hc->tx_mdev_max;
 346
 347                 hc->tx_rtt_seq  = dccp_sk(sk)->dccps_gss;
 348         } else {
 349                 /* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */
 350                 m -= (hc->tx_srtt >> 3);
 351                 hc->tx_srtt += m;
 352
 353                 /* Similarly, update scaled mdev with regard to |m| */
 354                 if (m < 0) {
 355                         m = -m;
 356                         m -= (hc->tx_mdev >> 2);
 357                         /*
 358                          * This neutralises RTO increase when RTT < SRTT - mdev
 359                          * (see P. Sarolahti, A. Kuznetsov,"Congestion Control
 360                          * in Linux TCP", USENIX 2002, pp. 49-62).
 361                          */
 362                         if (m > 0)
 363                                 m >>= 3;
 364                 } else {
 365                         m -= (hc->tx_mdev >> 2);
 366                 }
 367                 hc->tx_mdev += m;
 368
 369                 if (hc->tx_mdev > hc->tx_mdev_max) {
 370                         hc->tx_mdev_max = hc->tx_mdev;
 371                         if (hc->tx_mdev_max > hc->tx_rttvar)
 372                                 hc->tx_rttvar = hc->tx_mdev_max;
 373                 }
 374
 375                 /*
 376                  * Decay RTTVAR at most once per flight, exploiting that
 377                  *  1) pipe <= cwnd <= Sequence_Window = W  (RFC 4340, 7.5.2)
 378                  *  2) AWL = GSS-W+1 <= GAR <= GSS          (RFC 4340, 7.5.1)
 379                  * GAR is a useful bound for FlightSize = pipe.
 380                  * AWL is probably too low here, as it over-estimates pipe.
 381                  */
 382                 if (after48(dccp_sk(sk)->dccps_gar, hc->tx_rtt_seq)) {
 383                         if (hc->tx_mdev_max < hc->tx_rttvar)
 384                                 hc->tx_rttvar -= (hc->tx_rttvar -
 385                                                   hc->tx_mdev_max) >> 2;
 386                         hc->tx_rtt_seq  = dccp_sk(sk)->dccps_gss;
 387                         hc->tx_mdev_max = tcp_rto_min(sk);
 388                 }
 389         }
 390
 391         /*
 392          * Set RTO from SRTT and RTTVAR
 393          * As in TCP, 4 * RTTVAR >= TCP_RTO_MIN, giving a minimum RTO of 200 ms.
 394          * This agrees with RFC 4341, 5:
 395          *      "Because DCCP does not retransmit data, DCCP does not require
 396          *       TCP's recommended minimum timeout of one second".
 397          */
 398         hc->tx_rto = (hc->tx_srtt >> 3) + hc->tx_rttvar;
 399
 400         if (hc->tx_rto > DCCP_RTO_MAX)
 401                 hc->tx_rto = DCCP_RTO_MAX;
 402 }
 403
 404 static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
 405                           unsigned int *maxincr)
 406 {
 407         struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
 408
 409         if (hc->tx_cwnd < hc->tx_ssthresh) {
 410                 if (*maxincr > 0 && ++hc->tx_packets_acked == 2) {
 411                         hc->tx_cwnd += 1;
 412                         *maxincr    -= 1;
 413                         hc->tx_packets_acked = 0;
 414                 }
 415         } else if (++hc->tx_packets_acked >= hc->tx_cwnd) {
 416                         hc->tx_cwnd += 1;
 417                         hc->tx_packets_acked = 0;
 418         }
 419         /*
 420          * FIXME: RTT is sampled several times per acknowledgment (for each
 421          * entry in the Ack Vector), instead of once per Ack (as in TCP SACK).
 422          * This causes the RTT to be over-estimated, since the older entries
 423          * in the Ack Vector have earlier sending times.
 424          * The cleanest solution is to not use the ccid2s_sent field at all
 425          * and instead use DCCP timestamps: requires changes in other places.
 426          */
 427         ccid2_rtt_estimator(sk, ccid2_time_stamp - seqp->ccid2s_sent);
 428 }
 429
 430 static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
 431 {
 432         struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
 433
 434         if ((s32)(seqp->ccid2s_sent - hc->tx_last_cong) < 0) {
 435                 ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
 436                 return;
 437         }
 438
 439         hc->tx_last_cong = ccid2_time_stamp;
 440
 441         hc->tx_cwnd      = hc->tx_cwnd / 2 ? : 1U;
 442         hc->tx_ssthresh  = max(hc->tx_cwnd, 2U);
 443
 444         /* Avoid spurious timeouts resulting from Ack Ratio > cwnd */
 445         if (dccp_sk(sk)->dccps_l_ack_ratio > hc->tx_cwnd)
 446                 ccid2_change_l_ack_ratio(sk, hc->tx_cwnd);
 447 }
 448
 449 static int ccid2_hc_tx_parse_options(struct sock *sk, u8 packet_type,
 450                                      u8 option, u8 *optval, u8 optlen)
 451 {
 452         struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
 453
 454         switch (option) {
 455         case DCCPO_ACK_VECTOR_0:
 456         case DCCPO_ACK_VECTOR_1:
 457                 return dccp_ackvec_parsed_add(&hc->tx_av_chunks, optval, optlen,
 458                                               option - DCCPO_ACK_VECTOR_0);
 459         }
 460         return 0;
 461 }
 462
 463 static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 464 {
 465         struct dccp_sock *dp = dccp_sk(sk);
 466         struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
 467         const bool sender_was_blocked = ccid2_cwnd_network_limited(hc);
 468         struct dccp_ackvec_parsed *avp;
 469         u64 ackno, seqno;
 470         struct ccid2_seq *seqp;
 471         int done = 0;
 472         unsigned int maxincr = 0;
 473
 474         /* check reverse path congestion */
 475         seqno = DCCP_SKB_CB(skb)->dccpd_seq;
 476
 477         /* XXX this whole "algorithm" is broken.  Need to fix it to keep track
 478          * of the seqnos of the dupacks so that rpseq and rpdupack are correct
 479          * -sorbo.
 480          */
 481         /* need to bootstrap */
 482         if (hc->tx_rpdupack == -1) {
 483                 hc->tx_rpdupack = 0;
 484                 hc->tx_rpseq    = seqno;
 485         } else {
 486                 /* check if packet is consecutive */
 487                 if (dccp_delta_seqno(hc->tx_rpseq, seqno) == 1)
 488                         hc->tx_rpseq = seqno;
 489                 /* it's a later packet */
 490                 else if (after48(seqno, hc->tx_rpseq)) {
 491                         hc->tx_rpdupack++;
 492
 493                         /* check if we got enough dupacks */
 494                         if (hc->tx_rpdupack >= NUMDUPACK) {
 495                                 hc->tx_rpdupack = -1; /* XXX lame */
 496                                 hc->tx_rpseq    = 0;
 497
 498                                 ccid2_change_l_ack_ratio(sk, 2 * dp->dccps_l_ack_ratio);
 499                         }
 500                 }
 501         }
 502
 503         /* check forward path congestion */
 504         if (dccp_packet_without_ack(skb))
 505                 return;
 506
 507         /* still didn't send out new data packets */
 508         if (hc->tx_seqh == hc->tx_seqt)
 509                 goto done;
 510
 511         ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq;
 512         if (after48(ackno, hc->tx_high_ack))
 513                 hc->tx_high_ack = ackno;
 514
 515         seqp = hc->tx_seqt;
 516         while (before48(seqp->ccid2s_seq, ackno)) {
 517                 seqp = seqp->ccid2s_next;
 518                 if (seqp == hc->tx_seqh) {
 519                         seqp = hc->tx_seqh->ccid2s_prev;
 520                         break;
 521                 }
 522         }
 523
 524         /*
 525          * In slow-start, cwnd can increase up to a maximum of Ack Ratio/2
 526          * packets per acknowledgement. Rounding up avoids that cwnd is not
 527          * advanced when Ack Ratio is 1 and gives a slight edge otherwise.
 528          */
 529         if (hc->tx_cwnd < hc->tx_ssthresh)
 530                 maxincr = DIV_ROUND_UP(dp->dccps_l_ack_ratio, 2);
 531
 532         /* go through all ack vectors */
 533         list_for_each_entry(avp, &hc->tx_av_chunks, node) {
 534                 /* go through this ack vector */
 535                 for (; avp->len--; avp->vec++) {
 536                         u64 ackno_end_rl = SUB48(ackno,
 537                                                  dccp_ackvec_runlen(avp->vec));
 538
 539                         ccid2_pr_debug("ackvec %llu |%u,%u|\n",
 540                                        (unsigned long long)ackno,
 541                                        dccp_ackvec_state(avp->vec) >> 6,
 542                                        dccp_ackvec_runlen(avp->vec));
 543                         /* if the seqno we are analyzing is larger than the
 544                          * current ackno, then move towards the tail of our
 545                          * seqnos.
 546                          */
 547                         while (after48(seqp->ccid2s_seq, ackno)) {
 548                                 if (seqp == hc->tx_seqt) {
 549                                         done = 1;
 550                                         break;
 551                                 }
 552                                 seqp = seqp->ccid2s_prev;
 553                         }
 554                         if (done)
 555                                 break;
 556
 557                         /* check all seqnos in the range of the vector
 558                          * run length
 559                          */
 560                         while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) {
 561                                 const u8 state = dccp_ackvec_state(avp->vec);
 562
 563                                 /* new packet received or marked */
 564                                 if (state != DCCPAV_NOT_RECEIVED &&
 565                                     !seqp->ccid2s_acked) {
 566                                         if (state == DCCPAV_ECN_MARKED)
 567                                                 ccid2_congestion_event(sk,
 568                                                                        seqp);
 569                                         else
 570                                                 ccid2_new_ack(sk, seqp,
 571                                                               &maxincr);
 572
 573                                         seqp->ccid2s_acked = 1;
 574                                         ccid2_pr_debug("Got ack for %llu\n",
 575                                                        (unsigned long long)seqp->ccid2s_seq);
 576                                         hc->tx_pipe--;
 577                                 }
 578                                 if (seqp == hc->tx_seqt) {
 579                                         done = 1;
 580                                         break;
 581                                 }
 582                                 seqp = seqp->ccid2s_prev;
 583                         }
 584                         if (done)
 585                                 break;
 586
 587                         ackno = SUB48(ackno_end_rl, 1);
 588                 }
 589                 if (done)
 590                         break;
 591         }
 592
 593         /* The state about what is acked should be correct now
 594          * Check for NUMDUPACK
 595          */
 596         seqp = hc->tx_seqt;
 597         while (before48(seqp->ccid2s_seq, hc->tx_high_ack)) {
 598                 seqp = seqp->ccid2s_next;
 599                 if (seqp == hc->tx_seqh) {
 600                         seqp = hc->tx_seqh->ccid2s_prev;
 601                         break;
 602                 }
 603         }
 604         done = 0;
 605         while (1) {
 606                 if (seqp->ccid2s_acked) {
 607                         done++;
 608                         if (done == NUMDUPACK)
 609                                 break;
 610                 }
 611                 if (seqp == hc->tx_seqt)
 612                         break;
 613                 seqp = seqp->ccid2s_prev;
 614         }
 615
 616         /* If there are at least 3 acknowledgements, anything unacknowledged
 617          * below the last sequence number is considered lost
 618          */
 619         if (done == NUMDUPACK) {
 620                 struct ccid2_seq *last_acked = seqp;
 621
 622                 /* check for lost packets */
 623                 while (1) {
 624                         if (!seqp->ccid2s_acked) {
 625                                 ccid2_pr_debug("Packet lost: %llu\n",
 626                                                (unsigned long long)seqp->ccid2s_seq);
 627                                 /* XXX need to traverse from tail -> head in
 628                                  * order to detect multiple congestion events in
 629                                  * one ack vector.
 630                                  */
 631                                 ccid2_congestion_event(sk, seqp);
 632                                 hc->tx_pipe--;
 633                         }
 634                         if (seqp == hc->tx_seqt)
 635                                 break;
 636                         seqp = seqp->ccid2s_prev;
 637                 }
 638
 639                 hc->tx_seqt = last_acked;
 640         }
 641
 642         /* trim acked packets in tail */
 643         while (hc->tx_seqt != hc->tx_seqh) {
 644                 if (!hc->tx_seqt->ccid2s_acked)
 645                         break;
 646
 647                 hc->tx_seqt = hc->tx_seqt->ccid2s_next;
 648         }
 649
 650         /* restart RTO timer if not all outstanding data has been acked */
 651         if (hc->tx_pipe == 0)
 652                 sk_stop_timer(sk, &hc->tx_rtotimer);
 653         else
 654                 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
 655 done:
 656         /* check if incoming Acks allow pending packets to be sent */
 657         if (sender_was_blocked && !ccid2_cwnd_network_limited(hc))
 658                 tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet);
 659         dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks);
 660 }
 661
 662 static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
 663 {
 664         struct ccid2_hc_tx_sock *hc = ccid_priv(ccid);
 665         struct dccp_sock *dp = dccp_sk(sk);
 666         u32 max_ratio;
 667
 668         /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
 669         hc->tx_ssthresh = ~0U;
 670
 671         /* Use larger initial windows (RFC 4341, section 5). */
 672         hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
 673         hc->tx_expected_wnd = hc->tx_cwnd;
 674
 675         /* Make sure that Ack Ratio is enabled and within bounds. */
 676         max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2);
 677         if (dp->dccps_l_ack_ratio == 0 || dp->dccps_l_ack_ratio > max_ratio)
 678                 dp->dccps_l_ack_ratio = max_ratio;
 679
 680         /* XXX init ~ to window size... */
 681         if (ccid2_hc_tx_alloc_seq(hc))
 682                 return -ENOMEM;
 683
 684         hc->tx_rto       = DCCP_TIMEOUT_INIT;
 685         hc->tx_rpdupack  = -1;
 686         hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_time_stamp;
 687         hc->tx_cwnd_used = 0;
 688         setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
 689                         (unsigned long)sk);
 690         INIT_LIST_HEAD(&hc->tx_av_chunks);
 691         return 0;
 692 }
 693
 694 static void ccid2_hc_tx_exit(struct sock *sk)
 695 {
 696         struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
 697         int i;
 698
 699         sk_stop_timer(sk, &hc->tx_rtotimer);
 700
 701         for (i = 0; i < hc->tx_seqbufc; i++)
 702                 kfree(hc->tx_seqbuf[i]);
 703         hc->tx_seqbufc = 0;
 704 }
 705
 706 static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 707 {
 708         struct ccid2_hc_rx_sock *hc = ccid2_hc_rx_sk(sk);
 709
 710         if (!dccp_data_packet(skb))
 711                 return;
 712
 713         if (++hc->rx_num_data_pkts >= dccp_sk(sk)->dccps_r_ack_ratio) {
 714                 dccp_send_ack(sk);
 715                 hc->rx_num_data_pkts = 0;
 716         }
 717 }
 718
 719 struct ccid_operations ccid2_ops = {
 720         .ccid_id                  = DCCPC_CCID2,
 721         .ccid_name                = "TCP-like",
 722         .ccid_hc_tx_obj_size      = sizeof(struct ccid2_hc_tx_sock),
 723         .ccid_hc_tx_init          = ccid2_hc_tx_init,
 724         .ccid_hc_tx_exit          = ccid2_hc_tx_exit,
 725         .ccid_hc_tx_send_packet   = ccid2_hc_tx_send_packet,
 726         .ccid_hc_tx_packet_sent   = ccid2_hc_tx_packet_sent,
 727         .ccid_hc_tx_parse_options = ccid2_hc_tx_parse_options,
 728         .ccid_hc_tx_packet_recv   = ccid2_hc_tx_packet_recv,
 729         .ccid_hc_rx_obj_size      = sizeof(struct ccid2_hc_rx_sock),
 730         .ccid_hc_rx_packet_recv   = ccid2_hc_rx_packet_recv,
 731 };
 732
 733 #ifdef CONFIG_IP_DCCP_CCID2_DEBUG
 734 module_param(ccid2_debug, bool, 0644);
 735 MODULE_PARM_DESC(ccid2_debug, "Enable CCID-2 debug messages");
 736 #endif