dccp ccid-2: Update code for the Ack Vector input/registration routine
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / dccp / ackvec.c
blobf7e647e608bbfe672abad27d9dd7327e65b179a0
1 /*
2 * net/dccp/ackvec.c
4 * An implementation of Ack Vectors for the DCCP protocol
5 * Copyright (c) 2007 University of Aberdeen, Scotland, UK
6 * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; version 2 of the License;
13 #include "ackvec.h"
14 #include "dccp.h"
16 #include <linux/init.h>
17 #include <linux/errno.h>
18 #include <linux/kernel.h>
19 #include <linux/skbuff.h>
20 #include <linux/slab.h>
22 #include <net/sock.h>
24 static struct kmem_cache *dccp_ackvec_slab;
25 static struct kmem_cache *dccp_ackvec_record_slab;
27 struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
29 struct dccp_ackvec *av = kmem_cache_zalloc(dccp_ackvec_slab, priority);
31 if (av != NULL) {
32 av->av_buf_head = av->av_buf_tail = DCCPAV_MAX_ACKVEC_LEN - 1;
33 INIT_LIST_HEAD(&av->av_records);
35 return av;
38 static void dccp_ackvec_purge_records(struct dccp_ackvec *av)
40 struct dccp_ackvec_record *cur, *next;
42 list_for_each_entry_safe(cur, next, &av->av_records, avr_node)
43 kmem_cache_free(dccp_ackvec_record_slab, cur);
44 INIT_LIST_HEAD(&av->av_records);
47 void dccp_ackvec_free(struct dccp_ackvec *av)
49 if (likely(av != NULL)) {
50 dccp_ackvec_purge_records(av);
51 kmem_cache_free(dccp_ackvec_slab, av);
55 /**
56 * dccp_ackvec_update_records - Record information about sent Ack Vectors
57 * @av: Ack Vector records to update
58 * @seqno: Sequence number of the packet carrying the Ack Vector just sent
59 * @nonce_sum: The sum of all buffer nonces contained in the Ack Vector
61 int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seqno, u8 nonce_sum)
63 struct dccp_ackvec_record *avr;
65 avr = kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
66 if (avr == NULL)
67 return -ENOBUFS;
69 avr->avr_ack_seqno = seqno;
70 avr->avr_ack_ptr = av->av_buf_head;
71 avr->avr_ack_ackno = av->av_buf_ackno;
72 avr->avr_ack_nonce = nonce_sum;
73 avr->avr_ack_runlen = dccp_ackvec_runlen(av->av_buf + av->av_buf_head);
75 * When the buffer overflows, we keep no more than one record. This is
76 * the simplest way of disambiguating sender-Acks dating from before the
77 * overflow from sender-Acks which refer to after the overflow; a simple
78 * solution is preferable here since we are handling an exception.
80 if (av->av_overflow)
81 dccp_ackvec_purge_records(av);
83 * Since GSS is incremented for each packet, the list is automatically
84 * arranged in descending order of @ack_seqno.
86 list_add(&avr->avr_node, &av->av_records);
88 dccp_pr_debug("Added Vector, ack_seqno=%llu, ack_ackno=%llu (rl=%u)\n",
89 (unsigned long long)avr->avr_ack_seqno,
90 (unsigned long long)avr->avr_ack_ackno,
91 avr->avr_ack_runlen);
92 return 0;
95 static struct dccp_ackvec_record *dccp_ackvec_lookup(struct list_head *av_list,
96 const u64 ackno)
98 struct dccp_ackvec_record *avr;
100 * Exploit that records are inserted in descending order of sequence
101 * number, start with the oldest record first. If @ackno is `before'
102 * the earliest ack_ackno, the packet is too old to be considered.
104 list_for_each_entry_reverse(avr, av_list, avr_node) {
105 if (avr->avr_ack_seqno == ackno)
106 return avr;
107 if (before48(ackno, avr->avr_ack_seqno))
108 break;
110 return NULL;
114 * Buffer index and length computation using modulo-buffersize arithmetic.
115 * Note that, as pointers move from right to left, head is `before' tail.
117 static inline u16 __ackvec_idx_add(const u16 a, const u16 b)
119 return (a + b) % DCCPAV_MAX_ACKVEC_LEN;
122 static inline u16 __ackvec_idx_sub(const u16 a, const u16 b)
124 return __ackvec_idx_add(a, DCCPAV_MAX_ACKVEC_LEN - b);
127 u16 dccp_ackvec_buflen(const struct dccp_ackvec *av)
129 if (unlikely(av->av_overflow))
130 return DCCPAV_MAX_ACKVEC_LEN;
131 return __ackvec_idx_sub(av->av_buf_tail, av->av_buf_head);
135 * dccp_ackvec_update_old - Update previous state as per RFC 4340, 11.4.1
136 * @av: non-empty buffer to update
137 * @distance: negative or zero distance of @seqno from buf_ackno downward
138 * @seqno: the (old) sequence number whose record is to be updated
139 * @state: state in which packet carrying @seqno was received
141 static void dccp_ackvec_update_old(struct dccp_ackvec *av, s64 distance,
142 u64 seqno, enum dccp_ackvec_states state)
144 u16 ptr = av->av_buf_head;
146 BUG_ON(distance > 0);
147 if (unlikely(dccp_ackvec_is_empty(av)))
148 return;
150 do {
151 u8 runlen = dccp_ackvec_runlen(av->av_buf + ptr);
153 if (distance + runlen >= 0) {
155 * Only update the state if packet has not been received
156 * yet. This is OK as per the second table in RFC 4340,
157 * 11.4.1; i.e. here we are using the following table:
158 * RECEIVED
159 * 0 1 3
160 * S +---+---+---+
161 * T 0 | 0 | 0 | 0 |
162 * O +---+---+---+
163 * R 1 | 1 | 1 | 1 |
164 * E +---+---+---+
165 * D 3 | 0 | 1 | 3 |
166 * +---+---+---+
167 * The "Not Received" state was set by reserve_seats().
169 if (av->av_buf[ptr] == DCCPAV_NOT_RECEIVED)
170 av->av_buf[ptr] = state;
171 else
172 dccp_pr_debug("Not changing %llu state to %u\n",
173 (unsigned long long)seqno, state);
174 break;
177 distance += runlen + 1;
178 ptr = __ackvec_idx_add(ptr, 1);
180 } while (ptr != av->av_buf_tail);
183 /* Mark @num entries after buf_head as "Not yet received". */
184 static void dccp_ackvec_reserve_seats(struct dccp_ackvec *av, u16 num)
186 u16 start = __ackvec_idx_add(av->av_buf_head, 1),
187 len = DCCPAV_MAX_ACKVEC_LEN - start;
189 /* check for buffer wrap-around */
190 if (num > len) {
191 memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, len);
192 start = 0;
193 num -= len;
195 if (num)
196 memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, num);
200 * dccp_ackvec_add_new - Record one or more new entries in Ack Vector buffer
201 * @av: container of buffer to update (can be empty or non-empty)
202 * @num_packets: number of packets to register (must be >= 1)
203 * @seqno: sequence number of the first packet in @num_packets
204 * @state: state in which packet carrying @seqno was received
206 static void dccp_ackvec_add_new(struct dccp_ackvec *av, u32 num_packets,
207 u64 seqno, enum dccp_ackvec_states state)
209 u32 num_cells = num_packets;
211 if (num_packets > DCCPAV_BURST_THRESH) {
212 u32 lost_packets = num_packets - 1;
214 DCCP_WARN("Warning: large burst loss (%u)\n", lost_packets);
216 * We received 1 packet and have a loss of size "num_packets-1"
217 * which we squeeze into num_cells-1 rather than reserving an
218 * entire byte for each lost packet.
219 * The reason is that the vector grows in O(burst_length); when
220 * it grows too large there will no room left for the payload.
221 * This is a trade-off: if a few packets out of the burst show
222 * up later, their state will not be changed; it is simply too
223 * costly to reshuffle/reallocate/copy the buffer each time.
224 * Should such problems persist, we will need to switch to a
225 * different underlying data structure.
227 for (num_packets = num_cells = 1; lost_packets; ++num_cells) {
228 u8 len = min(lost_packets, (u32)DCCPAV_MAX_RUNLEN);
230 av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, 1);
231 av->av_buf[av->av_buf_head] = DCCPAV_NOT_RECEIVED | len;
233 lost_packets -= len;
237 if (num_cells + dccp_ackvec_buflen(av) >= DCCPAV_MAX_ACKVEC_LEN) {
238 DCCP_CRIT("Ack Vector buffer overflow: dropping old entries\n");
239 av->av_overflow = true;
242 av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, num_packets);
243 if (av->av_overflow)
244 av->av_buf_tail = av->av_buf_head;
246 av->av_buf[av->av_buf_head] = state;
247 av->av_buf_ackno = seqno;
249 if (num_packets > 1)
250 dccp_ackvec_reserve_seats(av, num_packets - 1);
254 * dccp_ackvec_input - Register incoming packet in the buffer
256 void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb)
258 u64 seqno = DCCP_SKB_CB(skb)->dccpd_seq;
259 enum dccp_ackvec_states state = DCCPAV_RECEIVED;
261 if (dccp_ackvec_is_empty(av)) {
262 dccp_ackvec_add_new(av, 1, seqno, state);
263 av->av_tail_ackno = seqno;
265 } else {
266 s64 num_packets = dccp_delta_seqno(av->av_buf_ackno, seqno);
267 u8 *current_head = av->av_buf + av->av_buf_head;
269 if (num_packets == 1 &&
270 dccp_ackvec_state(current_head) == state &&
271 dccp_ackvec_runlen(current_head) < DCCPAV_MAX_RUNLEN) {
273 *current_head += 1;
274 av->av_buf_ackno = seqno;
276 } else if (num_packets > 0) {
277 dccp_ackvec_add_new(av, num_packets, seqno, state);
278 } else {
279 dccp_ackvec_update_old(av, num_packets, seqno, state);
285 * If several packets are missing, the HC-Receiver may prefer to enter multiple
286 * bytes with run length 0, rather than a single byte with a larger run length;
287 * this simplifies table updates if one of the missing packets arrives.
289 static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
290 const unsigned int packets,
291 const unsigned char state)
293 long gap;
294 long new_head;
296 if (av->av_vec_len + packets > DCCPAV_MAX_ACKVEC_LEN)
297 return -ENOBUFS;
299 gap = packets - 1;
300 new_head = av->av_buf_head - packets;
302 if (new_head < 0) {
303 if (gap > 0) {
304 memset(av->av_buf, DCCPAV_NOT_RECEIVED,
305 gap + new_head + 1);
306 gap = -new_head;
308 new_head += DCCPAV_MAX_ACKVEC_LEN;
311 av->av_buf_head = new_head;
313 if (gap > 0)
314 memset(av->av_buf + av->av_buf_head + 1,
315 DCCPAV_NOT_RECEIVED, gap);
317 av->av_buf[av->av_buf_head] = state;
318 av->av_vec_len += packets;
319 return 0;
323 * Implements the RFC 4340, Appendix A
325 int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
326 const u64 ackno, const u8 state)
328 u8 *cur_head = av->av_buf + av->av_buf_head,
329 *buf_end = av->av_buf + DCCPAV_MAX_ACKVEC_LEN;
331 * Check at the right places if the buffer is full, if it is, tell the
332 * caller to start dropping packets till the HC-Sender acks our ACK
333 * vectors, when we will free up space in av_buf.
335 * We may well decide to do buffer compression, etc, but for now lets
336 * just drop.
338 * From Appendix A.1.1 (`New Packets'):
340 * Of course, the circular buffer may overflow, either when the
341 * HC-Sender is sending data at a very high rate, when the
342 * HC-Receiver's acknowledgements are not reaching the HC-Sender,
343 * or when the HC-Sender is forgetting to acknowledge those acks
344 * (so the HC-Receiver is unable to clean up old state). In this
345 * case, the HC-Receiver should either compress the buffer (by
346 * increasing run lengths when possible), transfer its state to
347 * a larger buffer, or, as a last resort, drop all received
348 * packets, without processing them whatsoever, until its buffer
349 * shrinks again.
352 /* See if this is the first ackno being inserted */
353 if (av->av_vec_len == 0) {
354 *cur_head = state;
355 av->av_vec_len = 1;
356 } else if (after48(ackno, av->av_buf_ackno)) {
357 const u64 delta = dccp_delta_seqno(av->av_buf_ackno, ackno);
360 * Look if the state of this packet is the same as the
361 * previous ackno and if so if we can bump the head len.
363 if (delta == 1 && dccp_ackvec_state(cur_head) == state &&
364 dccp_ackvec_runlen(cur_head) < DCCPAV_MAX_RUNLEN)
365 *cur_head += 1;
366 else if (dccp_ackvec_set_buf_head_state(av, delta, state))
367 return -ENOBUFS;
368 } else {
370 * A.1.2. Old Packets
372 * When a packet with Sequence Number S <= buf_ackno
373 * arrives, the HC-Receiver will scan the table for
374 * the byte corresponding to S. (Indexing structures
375 * could reduce the complexity of this scan.)
377 u64 delta = dccp_delta_seqno(ackno, av->av_buf_ackno);
379 while (1) {
380 const u8 len = dccp_ackvec_runlen(cur_head);
382 * valid packets not yet in av_buf have a reserved
383 * entry, with a len equal to 0.
385 if (*cur_head == DCCPAV_NOT_RECEIVED && delta == 0) {
386 dccp_pr_debug("Found %llu reserved seat!\n",
387 (unsigned long long)ackno);
388 *cur_head = state;
389 goto out;
391 /* len == 0 means one packet */
392 if (delta < len + 1)
393 goto out_duplicate;
395 delta -= len + 1;
396 if (++cur_head == buf_end)
397 cur_head = av->av_buf;
401 av->av_buf_ackno = ackno;
402 out:
403 return 0;
405 out_duplicate:
406 /* Duplicate packet */
407 dccp_pr_debug("Received a dup or already considered lost "
408 "packet: %llu\n", (unsigned long long)ackno);
409 return -EILSEQ;
412 static void dccp_ackvec_throw_record(struct dccp_ackvec *av,
413 struct dccp_ackvec_record *avr)
415 struct dccp_ackvec_record *next;
417 /* sort out vector length */
418 if (av->av_buf_head <= avr->avr_ack_ptr)
419 av->av_vec_len = avr->avr_ack_ptr - av->av_buf_head;
420 else
421 av->av_vec_len = DCCPAV_MAX_ACKVEC_LEN - 1 -
422 av->av_buf_head + avr->avr_ack_ptr;
424 /* free records */
425 list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) {
426 list_del(&avr->avr_node);
427 kmem_cache_free(dccp_ackvec_record_slab, avr);
431 void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk,
432 const u64 ackno)
434 struct dccp_ackvec_record *avr;
437 * If we traverse backwards, it should be faster when we have large
438 * windows. We will be receiving ACKs for stuff we sent a while back
439 * -sorbo.
441 list_for_each_entry_reverse(avr, &av->av_records, avr_node) {
442 if (ackno == avr->avr_ack_seqno) {
443 dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, "
444 "ack_ackno=%llu, ACKED!\n",
445 dccp_role(sk), avr->avr_ack_runlen,
446 (unsigned long long)avr->avr_ack_seqno,
447 (unsigned long long)avr->avr_ack_ackno);
448 dccp_ackvec_throw_record(av, avr);
449 break;
450 } else if (avr->avr_ack_seqno > ackno)
451 break; /* old news */
455 static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
456 struct sock *sk, u64 *ackno,
457 const unsigned char len,
458 const unsigned char *vector)
460 unsigned char i;
461 struct dccp_ackvec_record *avr;
463 /* Check if we actually sent an ACK vector */
464 if (list_empty(&av->av_records))
465 return;
467 i = len;
469 * XXX
470 * I think it might be more efficient to work backwards. See comment on
471 * rcv_ackno. -sorbo.
473 avr = list_entry(av->av_records.next, struct dccp_ackvec_record, avr_node);
474 while (i--) {
475 const u8 rl = dccp_ackvec_runlen(vector);
476 u64 ackno_end_rl;
478 dccp_set_seqno(&ackno_end_rl, *ackno - rl);
481 * If our AVR sequence number is greater than the ack, go
482 * forward in the AVR list until it is not so.
484 list_for_each_entry_from(avr, &av->av_records, avr_node) {
485 if (!after48(avr->avr_ack_seqno, *ackno))
486 goto found;
488 /* End of the av_records list, not found, exit */
489 break;
490 found:
491 if (between48(avr->avr_ack_seqno, ackno_end_rl, *ackno)) {
492 if (dccp_ackvec_state(vector) != DCCPAV_NOT_RECEIVED) {
493 dccp_pr_debug("%s ACK vector 0, len=%d, "
494 "ack_seqno=%llu, ack_ackno=%llu, "
495 "ACKED!\n",
496 dccp_role(sk), len,
497 (unsigned long long)
498 avr->avr_ack_seqno,
499 (unsigned long long)
500 avr->avr_ack_ackno);
501 dccp_ackvec_throw_record(av, avr);
502 break;
505 * If it wasn't received, continue scanning... we might
506 * find another one.
510 dccp_set_seqno(ackno, ackno_end_rl - 1);
511 ++vector;
515 int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
516 u64 *ackno, const u8 opt, const u8 *value, const u8 len)
518 if (len > DCCP_SINGLE_OPT_MAXLEN)
519 return -1;
521 /* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */
522 dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk,
523 ackno, len, value);
524 return 0;
528 * dccp_ackvec_clear_state - Perform house-keeping / garbage-collection
529 * This routine is called when the peer acknowledges the receipt of Ack Vectors
530 * up to and including @ackno. While based on on section A.3 of RFC 4340, here
531 * are additional precautions to prevent corrupted buffer state. In particular,
532 * we use tail_ackno to identify outdated records; it always marks the earliest
533 * packet of group (2) in 11.4.2.
535 void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno)
537 struct dccp_ackvec_record *avr, *next;
538 u8 runlen_now, eff_runlen;
539 s64 delta;
541 avr = dccp_ackvec_lookup(&av->av_records, ackno);
542 if (avr == NULL)
543 return;
545 * Deal with outdated acknowledgments: this arises when e.g. there are
546 * several old records and the acks from the peer come in slowly. In
547 * that case we may still have records that pre-date tail_ackno.
549 delta = dccp_delta_seqno(av->av_tail_ackno, avr->avr_ack_ackno);
550 if (delta < 0)
551 goto free_records;
553 * Deal with overlapping Ack Vectors: don't subtract more than the
554 * number of packets between tail_ackno and ack_ackno.
556 eff_runlen = delta < avr->avr_ack_runlen ? delta : avr->avr_ack_runlen;
558 runlen_now = dccp_ackvec_runlen(av->av_buf + avr->avr_ack_ptr);
560 * The run length of Ack Vector cells does not decrease over time. If
561 * the run length is the same as at the time the Ack Vector was sent, we
562 * free the ack_ptr cell. That cell can however not be freed if the run
563 * length has increased: in this case we need to move the tail pointer
564 * backwards (towards higher indices), to its next-oldest neighbour.
566 if (runlen_now > eff_runlen) {
568 av->av_buf[avr->avr_ack_ptr] -= eff_runlen + 1;
569 av->av_buf_tail = __ackvec_idx_add(avr->avr_ack_ptr, 1);
571 /* This move may not have cleared the overflow flag. */
572 if (av->av_overflow)
573 av->av_overflow = (av->av_buf_head == av->av_buf_tail);
574 } else {
575 av->av_buf_tail = avr->avr_ack_ptr;
577 * We have made sure that avr points to a valid cell within the
578 * buffer. This cell is either older than head, or equals head
579 * (empty buffer): in both cases we no longer have any overflow.
581 av->av_overflow = 0;
585 * The peer has acknowledged up to and including ack_ackno. Hence the
586 * first packet in group (2) of 11.4.2 is the successor of ack_ackno.
588 av->av_tail_ackno = ADD48(avr->avr_ack_ackno, 1);
590 free_records:
591 list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) {
592 list_del(&avr->avr_node);
593 kmem_cache_free(dccp_ackvec_record_slab, avr);
597 int __init dccp_ackvec_init(void)
599 dccp_ackvec_slab = kmem_cache_create("dccp_ackvec",
600 sizeof(struct dccp_ackvec), 0,
601 SLAB_HWCACHE_ALIGN, NULL);
602 if (dccp_ackvec_slab == NULL)
603 goto out_err;
605 dccp_ackvec_record_slab = kmem_cache_create("dccp_ackvec_record",
606 sizeof(struct dccp_ackvec_record),
607 0, SLAB_HWCACHE_ALIGN, NULL);
608 if (dccp_ackvec_record_slab == NULL)
609 goto out_destroy_slab;
611 return 0;
613 out_destroy_slab:
614 kmem_cache_destroy(dccp_ackvec_slab);
615 dccp_ackvec_slab = NULL;
616 out_err:
617 DCCP_CRIT("Unable to create Ack Vector slab cache");
618 return -ENOBUFS;
621 void dccp_ackvec_exit(void)
623 if (dccp_ackvec_slab != NULL) {
624 kmem_cache_destroy(dccp_ackvec_slab);
625 dccp_ackvec_slab = NULL;
627 if (dccp_ackvec_record_slab != NULL) {
628 kmem_cache_destroy(dccp_ackvec_record_slab);
629 dccp_ackvec_record_slab = NULL;