split dev_queue
[cor.git] / net / cor / conn_trgt_out.c
blobda04f79b68298d2eaf723670702e7df5f88ad747
1 /**
2 * Connection oriented routing
3 * Copyright (C) 2007-2021 Michael Blizek
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
16 #include <linux/gfp.h>
17 #include <linux/jiffies.h>
18 #include <linux/slab.h>
20 #include "cor.h"
22 static struct kmem_cache *cor_connretrans_slab;
25 void cor_free_connretrans(struct kref *ref)
27 struct cor_conn_retrans *cr = container_of(ref, struct cor_conn_retrans,
28 ref);
29 struct cor_conn *cn = cr->trgt_out_o;
31 BUG_ON(cr->state != CONN_RETRANS_ACKED);
33 kmem_cache_free(cor_connretrans_slab, cr);
34 cor_conn_kref_put(cn, "conn_retrans");
37 static struct cor_conn_retrans *cor_peek_next_conn_retrans(
38 struct cor_neighbor *nb_retransconnlocked)
40 struct cor_conn_retrans *cr1 = 0;
41 struct cor_conn_retrans *cr2 = 0;
43 if (list_empty(&nb_retransconnlocked->retrans_conn_lowlatency_list) == 0)
44 cr1 = container_of(
45 nb_retransconnlocked->retrans_conn_lowlatency_list.next,
46 struct cor_conn_retrans, timeout_list);
48 if (list_empty(&nb_retransconnlocked->retrans_conn_highlatency_list) == 0)
49 cr2 = container_of(
50 nb_retransconnlocked->retrans_conn_highlatency_list.next,
51 struct cor_conn_retrans, timeout_list);
53 if (cr1 == 0)
54 return cr2;
55 if (cr2 == 0)
56 return cr1;
58 if (time_before_eq(cr1->timeout, jiffies))
59 return cr1;
61 if (time_before_eq(cr1->timeout, cr2->timeout))
62 return cr1;
64 return cr2;
67 void cor_reschedule_conn_retrans_timer(
68 struct cor_neighbor *nb_retransconnlocked)
70 struct cor_conn_retrans *cr =
71 cor_peek_next_conn_retrans(nb_retransconnlocked);
73 if (cr == 0)
74 return;
76 if (time_before_eq(cr->timeout, jiffies)) {
77 cor_dev_queue_enqueue(nb_retransconnlocked->cd,
78 &nb_retransconnlocked->rb_cr, 0,
79 ns_to_ktime(0), QOS_CALLER_CONN_RETRANS, 0);
80 } else {
81 if (mod_timer(&nb_retransconnlocked->retrans_conn_timer,
82 cr->timeout) == 0) {
83 cor_nb_kref_get(nb_retransconnlocked,
84 "retrans_conn_timer");
89 /**
90 * caller must also call kref_get/put, see cor_reschedule_conn_retrans_timer
92 static void cor_cancel_conn_retrans(struct cor_neighbor *nb_retransconnlocked,
93 struct cor_conn *trgt_out_lx, struct cor_conn_retrans *cr,
94 __u64 *bytes_acked)
96 if (unlikely(cr->state == CONN_RETRANS_ACKED))
97 return;
99 if (cr->state == CONN_RETRANS_SCHEDULED) {
100 list_del(&cr->timeout_list);
101 kref_put(&cr->ref, cor_kreffree_bug);
102 } else if (cr->state == CONN_RETRANS_LOWWINDOW) {
103 BUG_ON(trgt_out_lx->trgt.out.retrans_lowwindow == 0);
104 if (likely(trgt_out_lx->trgt.out.retrans_lowwindow != 65535))
105 trgt_out_lx->trgt.out.retrans_lowwindow--;
108 if (cr->state != CONN_RETRANS_INITIAL)
109 *bytes_acked += cr->length;
111 list_del(&cr->conn_list);
112 cr->state = CONN_RETRANS_ACKED;
114 kref_put(&cr->ref, cor_free_connretrans); /* conn_list */
118 * nb->retrans_conn_lock must be held when calling this
119 * (see cor_schedule_retransmit_conn())
121 static void cor_cancel_acked_conn_retrans(struct cor_conn *trgt_out_l,
122 __u64 *bytes_acked)
124 __u32 seqno_acked = trgt_out_l->trgt.out.seqno_acked;
126 while (list_empty(&trgt_out_l->trgt.out.retrans_list) == 0) {
127 struct cor_conn_retrans *cr = container_of(
128 trgt_out_l->trgt.out.retrans_list.next,
129 struct cor_conn_retrans, conn_list);
131 if (cor_seqno_after(cr->seqno + cr->length, seqno_acked)) {
132 if (cor_seqno_before(cr->seqno, seqno_acked)) {
133 *bytes_acked += seqno_acked - cr->seqno;
134 cr->length -= seqno_acked - cr->seqno;
135 cr->seqno = seqno_acked;
137 break;
140 cor_cancel_conn_retrans(trgt_out_l->trgt.out.nb, trgt_out_l, cr,
141 bytes_acked);
144 cor_reschedule_conn_retrans_timer(trgt_out_l->trgt.out.nb);
147 void cor_cancel_all_conn_retrans(struct cor_conn *trgt_out_lx)
149 struct cor_neighbor *nb = trgt_out_lx->trgt.out.nb;
150 __u64 bytes_acked = 0;
152 spin_lock_bh(&nb->retrans_conn_lock);
154 while (list_empty(&trgt_out_lx->trgt.out.retrans_list) == 0) {
155 struct cor_conn_retrans *cr = container_of(
156 trgt_out_lx->trgt.out.retrans_list.next,
157 struct cor_conn_retrans, conn_list);
158 BUG_ON(cr->trgt_out_o != trgt_out_lx);
160 cor_cancel_conn_retrans(nb, trgt_out_lx, cr, &bytes_acked);
163 cor_reschedule_conn_retrans_timer(nb);
165 spin_unlock_bh(&nb->retrans_conn_lock);
167 if (bytes_acked > 0)
168 cor_nbcongwin_data_acked(nb, bytes_acked);
171 static void _cor_cancel_all_conn_retrans_nb(struct cor_neighbor *nb,
172 struct list_head *retrans_list, __u64 *bytes_acked)
174 while (1) {
175 struct cor_conn_retrans *cr;
177 spin_lock_bh(&nb->retrans_conn_lock);
179 if (list_empty(retrans_list)) {
180 spin_unlock_bh(&nb->retrans_conn_lock);
181 break;
184 cr = container_of(retrans_list->next, struct cor_conn_retrans,
185 timeout_list);
187 kref_get(&cr->ref);
189 spin_unlock_bh(&nb->retrans_conn_lock);
192 spin_lock_bh(&cr->trgt_out_o->rcv_lock);
193 spin_lock_bh(&nb->retrans_conn_lock);
195 if (likely(cr == container_of(retrans_list->next,
196 struct cor_conn_retrans, timeout_list)))
197 cor_cancel_conn_retrans(nb, cr->trgt_out_o, cr,
198 bytes_acked);
200 spin_unlock_bh(&nb->retrans_conn_lock);
201 spin_unlock_bh(&cr->trgt_out_o->rcv_lock);
203 kref_put(&cr->ref, cor_free_connretrans);
207 static void cor_cancel_all_conn_retrans_nb(struct cor_neighbor *nb)
209 __u64 bytes_acked = 0;
211 _cor_cancel_all_conn_retrans_nb(nb,
212 &nb->retrans_conn_lowlatency_list, &bytes_acked);
213 _cor_cancel_all_conn_retrans_nb(nb,
214 &nb->retrans_conn_highlatency_list, &bytes_acked);
216 if (bytes_acked > 0)
217 cor_nbcongwin_data_acked(nb, bytes_acked);
220 static struct cor_conn_retrans *cor_prepare_conn_retrans(
221 struct cor_conn *trgt_out_l, __u32 seqno, __u32 len,
222 __u8 windowused, struct cor_conn_retrans *cr_splitted,
223 int retransconnlocked)
225 struct cor_neighbor *nb = trgt_out_l->trgt.out.nb;
227 struct cor_conn_retrans *cr = kmem_cache_alloc(cor_connretrans_slab,
228 GFP_ATOMIC);
230 if (unlikely(cr == 0))
231 return 0;
233 BUG_ON(trgt_out_l->isreset != 0);
235 memset(cr, 0, sizeof(struct cor_conn_retrans));
236 cr->trgt_out_o = trgt_out_l;
237 cor_conn_kref_get(trgt_out_l, "conn_retrans");
238 cr->seqno = seqno;
239 cr->length = len;
240 cr->windowused = windowused;
241 kref_init(&cr->ref);
243 if (retransconnlocked == 0)
244 spin_lock_bh(&nb->retrans_conn_lock);
246 if (cr_splitted != 0)
247 list_add(&cr->conn_list, &cr_splitted->conn_list);
248 else
249 list_add_tail(&cr->conn_list,
250 &cr->trgt_out_o->trgt.out.retrans_list);
251 kref_get(&cr->ref); /* conn_list */
253 if (retransconnlocked == 0)
254 spin_unlock_bh(&nb->retrans_conn_lock);
256 return cr;
259 #define RC_SENDRETRANS_OK 0
260 #define RC_SENDRETRANS_OOM 1
261 #define RC_SENDRETRANS_QUEUEFULL 2
262 #define RC_SENDRETRANS_QUEUEFULLDROPPED 3
264 static int __cor_send_retrans(struct cor_neighbor *nb,
265 struct cor_conn *trgt_out_l, struct cor_conn_retrans *cr,
266 __u64 *bytes_sent)
268 __u8 flush = 0;
270 BUG_ON(cr->length == 0);
272 if (trgt_out_l->flush != 0 && cor_seqno_eq(cr->seqno + cr->length,
273 trgt_out_l->trgt.out.seqno_nextsend) &&
274 trgt_out_l->data_buf.read_remaining == 0)
275 flush = 1;
277 if (cor_send_conndata_as_skb(nb, cr->length)) {
278 struct sk_buff *skb;
279 char *dst;
280 int rc;
282 skb = cor_create_packet_conndata(nb, cr->length, GFP_ATOMIC,
283 trgt_out_l->trgt.out.conn_id, cr->seqno,
284 cr->windowused, flush);
285 if (unlikely(skb == 0))
286 return RC_SENDRETRANS_OOM;
288 dst = skb_put(skb, cr->length);
290 cor_databuf_pullold(trgt_out_l, cr->seqno, dst, cr->length);
292 rc = cor_dev_queue_xmit(skb, nb->cd,
293 QOS_CALLER_CONN_RETRANS);
294 if (rc == NET_XMIT_DROP)
295 return RC_SENDRETRANS_QUEUEFULLDROPPED;
296 cor_schedule_retransmit_conn(cr, 1, 0);
297 if (rc != NET_XMIT_SUCCESS)
298 return RC_SENDRETRANS_QUEUEFULL;
300 } else {
301 struct cor_control_msg_out *cm;
302 char *buf;
304 buf = kmalloc(cr->length, GFP_ATOMIC);
305 if (unlikely(buf == 0))
306 return RC_SENDRETRANS_OOM;
308 cm = cor_alloc_control_msg(nb, ACM_PRIORITY_LOW);
309 if (unlikely(cm == 0)) {
310 kfree(buf);
311 return RC_SENDRETRANS_OOM;
314 cor_databuf_pullold(trgt_out_l, cr->seqno, buf, cr->length);
316 cor_send_conndata(cm, trgt_out_l->trgt.out.conn_id, cr->seqno,
317 buf, buf, cr->length, cr->windowused,
318 flush, trgt_out_l->is_highlatency, cr);
321 *bytes_sent += cr->length;
323 return RC_SENDRETRANS_OK;
326 static int _cor_send_retrans_splitcr_ifneeded(
327 struct cor_neighbor *nb_retransconnlocked,
328 struct cor_conn *trgt_out_l, struct cor_conn_retrans *cr)
330 __u32 targetmss = cor_mss_conndata(nb_retransconnlocked,
331 trgt_out_l->is_highlatency != 0);
332 __u32 windowlimit = trgt_out_l->trgt.out.seqno_windowlimit - cr->seqno;
333 __u32 maxsize = targetmss;
335 if (windowlimit < maxsize)
336 maxsize = windowlimit;
338 if (unlikely(cr->length > maxsize)) {
339 struct cor_conn_retrans *cr2 = cor_prepare_conn_retrans(
340 trgt_out_l, cr->seqno + maxsize,
341 cr->length - maxsize, cr->windowused, cr, 1);
343 if (unlikely(cr2 == 0))
344 return RC_SENDRETRANS_OOM;
346 cr2->timeout = cr->timeout;
348 if (trgt_out_l->is_highlatency)
349 list_add(&cr2->timeout_list,
350 &nb_retransconnlocked->retrans_conn_highlatency_list);
351 else
352 list_add(&cr2->timeout_list,
353 &nb_retransconnlocked->retrans_conn_lowlatency_list);
354 /* cr2: kref from alloc goes to kref for timeout_list */
355 cr2->state = CONN_RETRANS_SCHEDULED;
357 cr->length = maxsize;
360 return RC_SENDRETRANS_OK;
363 static int _cor_send_retrans(struct cor_neighbor *nb,
364 struct cor_conn_retrans *cr, __u64 *bytes_sent)
366 struct cor_conn *trgt_out_o = cr->trgt_out_o;
367 int rc;
369 spin_lock_bh(&trgt_out_o->rcv_lock);
371 BUG_ON(trgt_out_o->targettype != TARGET_OUT);
372 BUG_ON(trgt_out_o->trgt.out.nb != nb);
374 spin_lock_bh(&nb->retrans_conn_lock);
375 if (unlikely(cr->state == CONN_RETRANS_ACKED)) {
376 spin_unlock_bh(&nb->retrans_conn_lock);
377 spin_unlock_bh(&trgt_out_o->rcv_lock);
378 return 0;
381 BUG_ON(trgt_out_o->isreset != 0);
383 BUG_ON(cor_seqno_before(cr->seqno, trgt_out_o->trgt.out.seqno_acked));
385 if (cor_seqno_after_eq(cr->seqno,
386 trgt_out_o->trgt.out.seqno_windowlimit)) {
387 BUG_ON(cr->state != CONN_RETRANS_SENDING);
388 cr->state = CONN_RETRANS_LOWWINDOW;
389 if (likely(trgt_out_o->trgt.out.retrans_lowwindow != 65535))
390 trgt_out_o->trgt.out.retrans_lowwindow++;
392 spin_unlock_bh(&nb->retrans_conn_lock);
393 spin_unlock_bh(&trgt_out_o->rcv_lock);
394 return 0;
397 rc = _cor_send_retrans_splitcr_ifneeded(nb, trgt_out_o, cr);
399 spin_unlock_bh(&nb->retrans_conn_lock);
401 cor_conn_kref_get(trgt_out_o, "stack");
403 if (rc == RC_SENDRETRANS_OK)
404 rc = __cor_send_retrans(nb, trgt_out_o, cr, bytes_sent);
406 if (rc == RC_SENDRETRANS_OOM || rc == RC_SENDRETRANS_QUEUEFULLDROPPED) {
407 spin_lock_bh(&nb->retrans_conn_lock);
408 if (unlikely(cr->state == CONN_RETRANS_ACKED)) {
409 } else if (likely(cr->state == CONN_RETRANS_SENDING)) {
410 if (rc == RC_SENDRETRANS_OOM)
411 cr->timeout = jiffies + 1;
412 if (trgt_out_o->is_highlatency)
413 list_add(&cr->timeout_list,
414 &nb->retrans_conn_highlatency_list);
415 else
416 list_add(&cr->timeout_list,
417 &nb->retrans_conn_lowlatency_list);
418 kref_get(&cr->ref);
419 cr->state = CONN_RETRANS_SCHEDULED;
420 } else {
421 BUG();
423 spin_unlock_bh(&nb->retrans_conn_lock);
426 spin_unlock_bh(&trgt_out_o->rcv_lock);
428 cor_conn_kref_put(trgt_out_o, "stack");
430 return (rc == RC_SENDRETRANS_OOM ||
431 rc == RC_SENDRETRANS_QUEUEFULL ||
432 rc == RC_SENDRETRANS_QUEUEFULLDROPPED);
435 int cor_send_retrans(struct cor_neighbor *nb, int *sent)
437 int queuefull = 0;
438 int nbstate = cor_get_neigh_state(nb);
439 __u64 bytes_sent = 0;
441 if (unlikely(nbstate == NEIGHBOR_STATE_STALLED)) {
442 return QOS_RESUME_DONE;
443 } else if (unlikely(nbstate == NEIGHBOR_STATE_KILLED)) {
445 * cor_cancel_all_conn_retrans_nb should not be needed, because
446 * cor_reset_all_conns calls cor_cancel_all_conn_retrans
448 cor_cancel_all_conn_retrans_nb(nb);
449 return QOS_RESUME_DONE;
452 while (1) {
453 struct cor_conn_retrans *cr = 0;
455 spin_lock_bh(&nb->retrans_conn_lock);
457 cr = cor_peek_next_conn_retrans(nb);
458 if (cr == 0) {
459 spin_unlock_bh(&nb->retrans_conn_lock);
460 break;
463 BUG_ON(cr->state != CONN_RETRANS_SCHEDULED);
465 if (time_after(cr->timeout, jiffies)) {
466 cor_reschedule_conn_retrans_timer(nb);
467 spin_unlock_bh(&nb->retrans_conn_lock);
468 break;
471 list_del(&cr->timeout_list);
472 cr->state = CONN_RETRANS_SENDING;
474 spin_unlock_bh(&nb->retrans_conn_lock);
476 queuefull = _cor_send_retrans(nb, cr, &bytes_sent);
477 kref_put(&cr->ref, cor_free_connretrans); /* timeout_list */
478 if (queuefull)
479 break;
481 *sent = 1;
484 if (bytes_sent > 0)
485 cor_nbcongwin_data_retransmitted(nb, bytes_sent);
487 return queuefull ? QOS_RESUME_CONG : QOS_RESUME_DONE;
490 void cor_retransmit_conn_timerfunc(struct timer_list *retrans_conn_timer)
492 struct cor_neighbor *nb = container_of(retrans_conn_timer,
493 struct cor_neighbor, retrans_conn_timer);
494 cor_dev_queue_enqueue(nb->cd, &nb->rb_cr, 0, ns_to_ktime(0),
495 QOS_CALLER_CONN_RETRANS, 0);
496 cor_nb_kref_put(nb, "retransmit_conn_timer");
499 static void cor_conn_ack_ooo_rcvd_splitcr(struct cor_conn *trgt_out_l,
500 struct cor_conn_retrans *cr, __u32 seqno_ooo, __u32 length,
501 __u64 *bytes_acked)
503 struct cor_conn_retrans *cr2;
504 __u32 seqno_cr2start;
505 __u32 oldcrlenght = cr->length;
507 if (cr->state != CONN_RETRANS_SCHEDULED &&
508 cr->state != CONN_RETRANS_LOWWINDOW)
509 return;
511 seqno_cr2start = seqno_ooo + length;
512 cr2 = cor_prepare_conn_retrans(trgt_out_l, seqno_cr2start,
513 cr->seqno + cr->length - seqno_cr2start,
514 cr->windowused, cr, 1);
516 if (unlikely(cr2 == 0))
517 return;
519 BUG_ON(cr2->length > cr->length);
521 cr2->timeout = cr->timeout;
522 cr2->state = cr->state;
524 if (cr->state != CONN_RETRANS_SCHEDULED) {
525 list_add(&cr2->timeout_list, &cr->timeout_list);
526 kref_get(&cr2->ref);
529 BUG_ON(seqno_ooo - cr->seqno > cr->length);
531 cr->length -= seqno_ooo - cr->seqno;
532 BUG_ON(cr->length + length + cr2->length != oldcrlenght);
533 kref_put(&cr2->ref, cor_kreffree_bug); /* alloc */
535 *bytes_acked += length;
538 void cor_conn_ack_ooo_rcvd(struct cor_neighbor *nb, __u32 conn_id,
539 struct cor_conn *trgt_out, __u32 seqno_ooo, __u32 length,
540 __u64 *bytes_acked)
542 struct list_head *curr;
544 if (unlikely(length == 0))
545 return;
547 spin_lock_bh(&trgt_out->rcv_lock);
549 if (unlikely(trgt_out->targettype != TARGET_OUT))
550 goto out;
551 if (unlikely(trgt_out->trgt.out.nb != nb))
552 goto out;
553 if (unlikely(trgt_out->trgt.out.conn_id != conn_id))
554 goto out;
556 cor_nb_kref_get(nb, "stack");
557 spin_lock_bh(&nb->retrans_conn_lock);
559 curr = trgt_out->trgt.out.retrans_list.next;
560 while (curr != &trgt_out->trgt.out.retrans_list) {
561 struct cor_conn_retrans *cr = container_of(curr,
562 struct cor_conn_retrans, conn_list);
564 int ack_covers_start = cor_seqno_after_eq(cr->seqno, seqno_ooo);
565 int ack_covers_end = cor_seqno_before_eq(cr->seqno + cr->length,
566 seqno_ooo + length);
568 curr = curr->next;
570 if (cor_seqno_before(cr->seqno + cr->length, seqno_ooo))
571 continue;
573 if (cor_seqno_after(cr->seqno, seqno_ooo + length))
574 break;
576 if (likely(ack_covers_start && ack_covers_end)) {
577 cor_cancel_conn_retrans(nb, trgt_out, cr, bytes_acked);
578 cor_reschedule_conn_retrans_timer(nb);
579 } else if (ack_covers_start) {
580 __u32 diff = seqno_ooo + length - cr->seqno -
581 cr->length;
583 BUG_ON(diff >= cr->length);
584 cr->seqno += diff;
585 cr->length -= diff;
586 *bytes_acked += diff;
587 } else if (ack_covers_end) {
588 __u32 diff = seqno_ooo + length - cr->seqno;
589 BUG_ON(diff >= length);
590 cr->length -= diff;
591 *bytes_acked += diff;
592 } else {
593 cor_conn_ack_ooo_rcvd_splitcr(trgt_out, cr, seqno_ooo,
594 length, bytes_acked);
595 break;
599 if (unlikely(list_empty(&trgt_out->trgt.out.retrans_list) == 0)) {
600 trgt_out->trgt.out.seqno_acked =
601 trgt_out->trgt.out.seqno_nextsend;
602 } else {
603 struct cor_conn_retrans *cr = container_of(
604 trgt_out->trgt.out.retrans_list.next,
605 struct cor_conn_retrans, conn_list);
606 if (cor_seqno_after(cr->seqno,
607 trgt_out->trgt.out.seqno_acked))
608 trgt_out->trgt.out.seqno_acked = cr->seqno;
611 spin_unlock_bh(&nb->retrans_conn_lock);
612 cor_nb_kref_put(nb, "stack");
614 out:
615 spin_unlock_bh(&trgt_out->rcv_lock);
618 static void _cor_conn_ack_rcvd_nosendwin(struct cor_conn *trgt_out_l)
620 if (trgt_out_l->sourcetype != SOURCE_IN ||
621 trgt_out_l->is_highlatency != 0)
622 return;
624 if (trgt_out_l->bufsize.state == BUFSIZE_INCR ||
625 trgt_out_l->bufsize.state == BUFSIZE_INCR_FAST)
626 trgt_out_l->bufsize.state = BUFSIZE_NOACTION;
628 if (trgt_out_l->bufsize.state == BUFSIZE_NOACTION)
629 trgt_out_l->bufsize.act.noact.bytesleft = max(
630 trgt_out_l->bufsize.act.noact.bytesleft,
631 (__u32) BUF_OUT_WIN_NOK_NOINCR);
633 trgt_out_l->bufsize.ignore_rcv_lowbuf = max(
634 trgt_out_l->bufsize.ignore_rcv_lowbuf,
635 (__u32) BUF_OUT_WIN_NOK_NOINCR);
639 * nb->retrans_conn_lock must be held when calling this
640 * (see cor_schedule_retransmit_conn())
642 static void cor_reschedule_lowwindow_retrans(struct cor_conn *trgt_out_l)
644 struct list_head *lh = trgt_out_l->trgt.out.retrans_list.next;
645 int cnt = 0;
647 while (trgt_out_l->trgt.out.retrans_lowwindow > 0 && cnt < 100) {
648 struct cor_conn_retrans *cr;
650 if (unlikely(lh == &trgt_out_l->trgt.out.retrans_list)) {
651 BUG_ON(trgt_out_l->trgt.out.retrans_lowwindow != 65535);
652 trgt_out_l->trgt.out.retrans_lowwindow = 0;
653 break;
656 cr = container_of(lh, struct cor_conn_retrans, conn_list);
658 if (cor_seqno_after_eq(cr->seqno,
659 trgt_out_l->trgt.out.seqno_windowlimit)) {
660 break;
663 if (cr->state == CONN_RETRANS_LOWWINDOW)
664 cor_schedule_retransmit_conn(cr, 1, 1);
666 lh = lh->next;
667 cnt++;
671 void cor_conn_ack_rcvd(struct cor_neighbor *nb, __u32 conn_id,
672 struct cor_conn *trgt_out, __u32 seqno, int setwindow,
673 __u8 window, __u8 bufsize_changerate, __u64 *bytes_acked)
675 int seqno_advanced = 0;
676 int window_enlarged = 0;
678 spin_lock_bh(&trgt_out->rcv_lock);
680 /* printk(KERN_ERR "ack rcvd %x %u (+%u) %u %u %u \n", conn_id, seqno,
681 seqno - trgt_out->trgt.out.seqno_acked,
682 cor_dec_window(window),
683 trgt_out->bufsize.bufsize >> BUFSIZE_SHIFT,
684 trgt_out->data_buf.read_remaining); */
686 if (unlikely(trgt_out->isreset != 0))
687 goto out;
688 if (unlikely(trgt_out->targettype != TARGET_OUT))
689 goto out;
690 if (unlikely(trgt_out->trgt.out.nb != nb))
691 goto out;
692 if (unlikely(cor_get_connid_reverse(trgt_out->trgt.out.conn_id) !=
693 conn_id))
694 goto out;
696 if (unlikely(cor_seqno_after(seqno,
697 trgt_out->trgt.out.seqno_nextsend) ||
698 cor_seqno_before(seqno,
699 trgt_out->trgt.out.seqno_acked)))
700 goto out;
702 if (setwindow) {
703 __u32 windowdec = cor_dec_log_64_11(window);
705 if (likely(cor_seqno_after(seqno,
706 trgt_out->trgt.out.seqno_acked)) ||
707 cor_seqno_after(seqno + windowdec,
708 trgt_out->trgt.out.seqno_windowlimit)) {
709 trgt_out->trgt.out.seqno_windowlimit = seqno +
710 windowdec;
711 window_enlarged = 1;
713 trgt_out->trgt.out.remote_bufsize_changerate =
714 bufsize_changerate;
717 if (cor_seqno_after(seqno, trgt_out->trgt.out.seqno_acked))
718 seqno_advanced = 1;
720 if (seqno_advanced == 0 && window_enlarged == 0)
721 goto out;
723 cor_nb_kref_get(nb, "stack");
724 spin_lock_bh(&nb->retrans_conn_lock);
726 if (seqno_advanced) {
727 trgt_out->trgt.out.seqno_acked = seqno;
728 cor_cancel_acked_conn_retrans(trgt_out, bytes_acked);
731 if (window_enlarged)
732 cor_reschedule_lowwindow_retrans(trgt_out);
734 spin_unlock_bh(&nb->retrans_conn_lock);
735 cor_nb_kref_put(nb, "stack");
737 if (seqno_advanced)
738 cor_databuf_ack(trgt_out, trgt_out->trgt.out.seqno_acked);
740 if (cor_seqno_eq(trgt_out->trgt.out.seqno_acked,
741 trgt_out->trgt.out.seqno_nextsend))
742 _cor_conn_ack_rcvd_nosendwin(trgt_out);
744 out:
745 if (seqno_advanced || window_enlarged)
746 cor_flush_buf(trgt_out);
748 spin_unlock_bh(&trgt_out->rcv_lock);
750 cor_wake_sender(trgt_out);
753 static void cor_try_combine_conn_retrans_prev(
754 struct cor_neighbor *nb_retransconnlocked,
755 struct cor_conn *trgt_out_lx, struct cor_conn_retrans *cr)
757 struct cor_conn_retrans *cr_prev;
758 __u64 bytes_dummyacked = 0;
760 BUG_ON(cr->state != CONN_RETRANS_SCHEDULED);
762 if (cr->conn_list.prev == &trgt_out_lx->trgt.out.retrans_list)
763 return;
765 cr_prev = container_of(cr->conn_list.prev, struct cor_conn_retrans,
766 conn_list);
768 if (cr_prev->state != CONN_RETRANS_SCHEDULED)
769 return;
770 if (cr_prev->timeout != cr->timeout)
771 return;
772 if (!cor_seqno_eq(cr_prev->seqno + cr_prev->length, cr->seqno))
773 return;
775 cr->seqno -= cr_prev->length;
776 cr->length += cr_prev->length;
778 cor_cancel_conn_retrans(nb_retransconnlocked, trgt_out_lx, cr_prev,
779 &bytes_dummyacked);
782 static void cor_try_combine_conn_retrans_next(
783 struct cor_neighbor *nb_retranslocked,
784 struct cor_conn *trgt_out_lx, struct cor_conn_retrans *cr)
786 struct cor_conn_retrans *cr_next;
787 __u64 bytes_dummyacked = 0;
789 BUG_ON(cr->state != CONN_RETRANS_SCHEDULED);
791 if (cr->conn_list.next == &trgt_out_lx->trgt.out.retrans_list)
792 return;
794 cr_next = container_of(cr->conn_list.next, struct cor_conn_retrans,
795 conn_list);
797 if (cr_next->state != CONN_RETRANS_SCHEDULED)
798 return;
799 if (cr_next->timeout != cr->timeout)
800 return;
801 if (!cor_seqno_eq(cr->seqno + cr->length, cr_next->seqno))
802 return;
804 cr->length += cr_next->length;
806 cor_cancel_conn_retrans(nb_retranslocked, trgt_out_lx, cr_next,
807 &bytes_dummyacked);
810 void cor_schedule_retransmit_conn(struct cor_conn_retrans *cr, int connlocked,
811 int nbretransconn_locked)
813 struct cor_conn *trgt_out_o = cr->trgt_out_o;
814 struct cor_neighbor *nb;
815 int first;
817 if (connlocked == 0)
818 spin_lock_bh(&trgt_out_o->rcv_lock);
820 BUG_ON(trgt_out_o->targettype != TARGET_OUT);
821 nb = trgt_out_o->trgt.out.nb;
823 cr->timeout = cor_calc_timeout(
824 atomic_read(&nb->latency_retrans_us),
825 atomic_read(&nb->latency_stddev_retrans_us),
826 atomic_read(&nb->max_remote_ackconn_delay_us));
828 if (trgt_out_o->is_highlatency)
829 cr->timeout += 1 + msecs_to_jiffies(
830 atomic_read(&nb->latency_retrans_us)/1000 +
831 CMSG_MAXDELAY_ACK_FAST_MS);
833 if (nbretransconn_locked == 0)
834 spin_lock_bh(&nb->retrans_conn_lock);
836 cor_nb_kref_get(nb, "stack");
838 BUG_ON(cr->state == CONN_RETRANS_SCHEDULED);
840 if (unlikely(cr->state == CONN_RETRANS_ACKED)) {
841 goto out;
842 } else if (unlikely(cr->state == CONN_RETRANS_LOWWINDOW)) {
843 BUG_ON(trgt_out_o->trgt.out.retrans_lowwindow == 0);
844 if (likely(trgt_out_o->trgt.out.retrans_lowwindow != 65535))
845 trgt_out_o->trgt.out.retrans_lowwindow--;
848 if (trgt_out_o->is_highlatency) {
849 first = unlikely(list_empty(
850 &nb->retrans_conn_highlatency_list));
851 list_add_tail(&cr->timeout_list,
852 &nb->retrans_conn_highlatency_list);
853 } else {
854 first = unlikely(list_empty(
855 &nb->retrans_conn_lowlatency_list));
856 list_add_tail(&cr->timeout_list,
857 &nb->retrans_conn_lowlatency_list);
859 kref_get(&cr->ref);
860 cr->state = CONN_RETRANS_SCHEDULED;
862 if (unlikely(first)) {
863 cor_reschedule_conn_retrans_timer(nb);
864 } else {
865 cor_try_combine_conn_retrans_prev(nb, trgt_out_o, cr);
866 cor_try_combine_conn_retrans_next(nb, trgt_out_o, cr);
869 out:
870 if (nbretransconn_locked == 0)
871 spin_unlock_bh(&nb->retrans_conn_lock);
873 cor_nb_kref_put(nb, "stack");
875 if (connlocked == 0)
876 spin_unlock_bh(&trgt_out_o->rcv_lock);
879 static int __cor_flush_out_skb(struct cor_conn *trgt_out_lx, __u32 len)
881 struct cor_neighbor *nb = trgt_out_lx->trgt.out.nb;
883 __u32 seqno;
884 struct cor_conn_retrans *cr;
885 struct sk_buff *skb;
886 char *dst;
887 __u8 flush;
888 int rc;
890 if (trgt_out_lx->flush != 0 &&
891 trgt_out_lx->data_buf.read_remaining == len)
892 flush = 1;
894 seqno = trgt_out_lx->trgt.out.seqno_nextsend;
895 skb = cor_create_packet_conndata(trgt_out_lx->trgt.out.nb, len,
896 GFP_ATOMIC, trgt_out_lx->trgt.out.conn_id, seqno,
897 trgt_out_lx->trgt.out.lastsend_windowused, flush);
898 if (unlikely(skb == 0))
899 return RC_FLUSH_CONN_OUT_OOM;
901 cr = cor_prepare_conn_retrans(trgt_out_lx, seqno, len,
902 trgt_out_lx->trgt.out.lastsend_windowused, 0, 0);
903 if (unlikely(cr == 0)) {
904 kfree_skb(skb);
905 return RC_FLUSH_CONN_OUT_OOM;
908 dst = skb_put(skb, len);
910 cor_databuf_pull(trgt_out_lx, dst, len);
912 rc = cor_dev_queue_xmit(skb, nb->cd, QOS_CALLER_NEIGHBOR);
913 if (rc == NET_XMIT_DROP) {
914 cor_databuf_unpull(trgt_out_lx, len);
915 spin_lock_bh(&nb->retrans_conn_lock);
916 cor_cancel_conn_retrans(nb, trgt_out_lx, cr, 0);
917 spin_unlock_bh(&nb->retrans_conn_lock);
918 kref_put(&cr->ref, cor_free_connretrans); /* alloc */
919 return RC_FLUSH_CONN_OUT_CONG;
922 trgt_out_lx->trgt.out.seqno_nextsend += len;
923 cor_nbcongwin_data_sent(nb, len);
924 cor_schedule_retransmit_conn(cr, 1, 0);
925 if (trgt_out_lx->sourcetype == SOURCE_SOCK)
926 cor_update_src_sock_sndspeed(trgt_out_lx, len);
928 kref_put(&cr->ref, cor_free_connretrans); /* alloc */
930 return (rc == NET_XMIT_SUCCESS) ?
931 RC_FLUSH_CONN_OUT_OK : RC_FLUSH_CONN_OUT_SENT_CONG;
934 static int __cor_flush_out_conndata(struct cor_conn *trgt_out_lx, __u32 len)
936 __u32 seqno;
937 struct cor_control_msg_out *cm;
938 struct cor_conn_retrans *cr;
939 char *buf;
940 __u8 flush = 0;
942 if (trgt_out_lx->flush != 0 &&
943 trgt_out_lx->data_buf.read_remaining == len)
944 flush = 1;
946 buf = kmalloc(len, GFP_ATOMIC);
948 if (unlikely(buf == 0))
949 return RC_FLUSH_CONN_OUT_OOM;
951 cm = cor_alloc_control_msg(trgt_out_lx->trgt.out.nb, ACM_PRIORITY_LOW);
952 if (unlikely(cm == 0)) {
953 kfree(buf);
954 return RC_FLUSH_CONN_OUT_OOM;
957 seqno = trgt_out_lx->trgt.out.seqno_nextsend;
959 cr = cor_prepare_conn_retrans(trgt_out_lx, seqno, len,
960 trgt_out_lx->trgt.out.lastsend_windowused, 0, 0);
961 if (unlikely(cr == 0)) {
962 kfree(buf);
963 cor_free_control_msg(cm);
964 return RC_FLUSH_CONN_OUT_OOM;
967 cor_databuf_pull(trgt_out_lx, buf, len);
968 trgt_out_lx->trgt.out.seqno_nextsend += len;
969 cor_nbcongwin_data_sent(trgt_out_lx->trgt.out.nb, len);
970 if (trgt_out_lx->sourcetype == SOURCE_SOCK)
971 cor_update_src_sock_sndspeed(trgt_out_lx, len);
973 cor_send_conndata(cm, trgt_out_lx->trgt.out.conn_id, seqno, buf, buf,
974 len, trgt_out_lx->trgt.out.lastsend_windowused, flush,
975 trgt_out_lx->is_highlatency, cr);
977 kref_put(&cr->ref, cor_free_connretrans); /* alloc */
979 return RC_FLUSH_CONN_OUT_OK;
982 static void cor_set_last_windowused(struct cor_conn *trgt_out_lx)
984 __u64 total_window;
985 __u64 bytes_ackpending;
987 if (trgt_out_lx->bufsize.ignore_rcv_lowbuf > 0) {
988 trgt_out_lx->trgt.out.lastsend_windowused = 31;
989 return;
993 BUG_ON(cor_seqno_before(trgt_out_lx->trgt.out.seqno_windowlimit,
994 trgt_out_lx->trgt.out.seqno_acked));
995 BUG_ON(cor_seqno_before(trgt_out_lx->trgt.out.seqno_nextsend,
996 trgt_out_lx->trgt.out.seqno_acked));
998 total_window = (__u64) (
999 trgt_out_lx->trgt.out.seqno_windowlimit -
1000 trgt_out_lx->trgt.out.seqno_acked);
1001 bytes_ackpending = (__u64) (
1002 trgt_out_lx->trgt.out.seqno_nextsend -
1003 trgt_out_lx->trgt.out.seqno_acked);
1005 BUG_ON(bytes_ackpending > total_window);
1007 trgt_out_lx->trgt.out.lastsend_windowused = div64_u64(
1008 bytes_ackpending * 31 + total_window - 1, total_window);
1011 static int __cor_flush_out(struct cor_neighbor *nb,
1012 struct cor_conn *trgt_out_lx, __u32 len, __u32 *sent,
1013 __u32 *maxsend_left)
1015 int rc;
1017 if (likely(cor_send_conndata_as_skb(nb, len)))
1018 rc = __cor_flush_out_skb(trgt_out_lx, len);
1019 else
1020 rc = __cor_flush_out_conndata(trgt_out_lx, len);
1022 if (rc == RC_FLUSH_CONN_OUT_OK ||
1023 rc == RC_FLUSH_CONN_OUT_SENT_CONG) {
1024 *maxsend_left -= len;
1025 *sent += len;
1026 cor_set_last_windowused(trgt_out_lx);
1029 if (rc == RC_FLUSH_CONN_OUT_SENT_CONG)
1030 return RC_FLUSH_CONN_OUT_CONG;
1031 return rc;
1034 int cor_srcin_buflimit_reached(struct cor_conn *src_in_lx)
1036 __u32 window_left;
1038 if (unlikely(cor_seqno_before(src_in_lx->src.in.window_seqnolimit,
1039 src_in_lx->src.in.next_seqno)))
1040 return 1;
1042 window_left = src_in_lx->src.in.window_seqnolimit -
1043 src_in_lx->src.in.next_seqno;
1045 if (window_left < WINDOW_ENCODE_MIN)
1046 return 1;
1048 if (window_left / 2 < src_in_lx->data_buf.read_remaining)
1049 return 1;
1051 return 0;
1054 static __u32 cor_maxsend_left_to_len(__u32 maxsend_left)
1056 __u32 i;
1058 if (maxsend_left < 128)
1059 return maxsend_left;
1061 for (i = 128; i < 4096;) {
1062 if (i * 2 > maxsend_left)
1063 return i;
1064 i = i * 2;
1067 return maxsend_left - maxsend_left % 4096;
1070 static void _cor_flush_out_ignore_lowbuf(struct cor_conn *trgt_out_lx)
1072 if (trgt_out_lx->sourcetype == SOURCE_IN &&
1073 trgt_out_lx->is_highlatency == 0)
1074 trgt_out_lx->bufsize.ignore_rcv_lowbuf = max(
1075 trgt_out_lx->bufsize.ignore_rcv_lowbuf,
1076 trgt_out_lx->bufsize.bufsize >> BUFSIZE_SHIFT);
1079 static __u32 cor_get_windowlimit(struct cor_conn *trgt_out_lx)
1081 if (unlikely(cor_seqno_before(trgt_out_lx->trgt.out.seqno_windowlimit,
1082 trgt_out_lx->trgt.out.seqno_nextsend)))
1083 return 0;
1085 return trgt_out_lx->trgt.out.seqno_windowlimit -
1086 trgt_out_lx->trgt.out.seqno_nextsend;
1089 static int cor_delay_send(struct cor_conn *trgt_out_lx, __u32 len)
1091 __u32 data_inflight = trgt_out_lx->trgt.out.seqno_nextsend -
1092 trgt_out_lx->trgt.out.seqno_acked;
1093 int buflimit_reached;
1095 if (trgt_out_lx->sourcetype == SOURCE_IN) {
1096 buflimit_reached = cor_srcin_buflimit_reached(trgt_out_lx);
1097 } else if (trgt_out_lx->sourcetype == SOURCE_SOCK) {
1098 buflimit_reached = (cor_sock_sndbufavailable(
1099 trgt_out_lx, 1) == 0);
1100 } else if (trgt_out_lx->sourcetype == SOURCE_UNCONNECTED) {
1101 buflimit_reached = (cor_conn_src_unconn_write_allowed(
1102 trgt_out_lx) == 0);
1103 } else {
1104 WARN_ONCE(1, "cor_delay_send: invalid sourcetype");
1105 buflimit_reached = 1;
1108 if ((trgt_out_lx->flush != 0 || buflimit_reached) &&
1109 data_inflight == 0)
1110 return 0;
1112 if (trgt_out_lx->flush == 0)
1113 return 1;
1115 if (trgt_out_lx->is_highlatency != 0)
1116 return 1;
1118 if (data_inflight > 0)
1119 return 1;
1121 return 0;
1124 int _cor_flush_out(struct cor_conn *trgt_out_lx, __u32 maxsend, __u32 *sent,
1125 int from_qos, int maxsend_forcedelay)
1127 struct cor_neighbor *nb = trgt_out_lx->trgt.out.nb;
1128 __u32 maxsend_left = maxsend;
1129 __u32 mss;
1131 BUG_ON(trgt_out_lx->targettype != TARGET_OUT);
1133 if (unlikely(trgt_out_lx->trgt.out.established == 0) ||
1134 unlikely(trgt_out_lx->isreset != 0))
1135 return RC_FLUSH_CONN_OUT_OK;
1137 BUG_ON(trgt_out_lx->trgt.out.conn_id == 0);
1139 if (unlikely(trgt_out_lx->data_buf.read_remaining == 0))
1140 return RC_FLUSH_CONN_OUT_OK;
1142 if (from_qos == 0 && cor_qos_fastsend_allowed_conn(trgt_out_lx) == 0)
1143 return RC_FLUSH_CONN_OUT_CONG;
1145 cor_get_conn_idletime(trgt_out_lx);
1147 if (unlikely(cor_get_neigh_state(nb) != NEIGHBOR_STATE_ACTIVE))
1148 return RC_FLUSH_CONN_OUT_NBNOTACTIVE;
1150 /* printk(KERN_ERR "flush %p %u %u\n", trgt_out_lx,
1151 cor_get_windowlimit(trgt_out_lx),
1152 trgt_out_lx->data_buf.read_remaining); */
1154 mss = cor_mss_conndata(nb, trgt_out_lx->is_highlatency != 0);
1156 while (trgt_out_lx->data_buf.read_remaining >= mss &&
1157 maxsend_left >= mss) {
1158 __u32 windowlimit = cor_get_windowlimit(trgt_out_lx);
1159 int rc;
1161 if (cor_nbcongwin_send_allowed(nb) == 0)
1162 return RC_FLUSH_CONN_OUT_CONG;
1164 if (mss > windowlimit) {
1165 trgt_out_lx->trgt.out.lastsend_windowused = 31;
1166 _cor_flush_out_ignore_lowbuf(trgt_out_lx);
1167 break;
1170 rc = __cor_flush_out(nb, trgt_out_lx, mss, sent, &maxsend_left);
1171 if (rc != RC_FLUSH_CONN_OUT_OK)
1172 return rc;
1175 if (trgt_out_lx->data_buf.read_remaining > 0) {
1176 __u32 len = trgt_out_lx->data_buf.read_remaining;
1177 __u32 windowlimit = cor_get_windowlimit(trgt_out_lx);
1178 int rc;
1180 if (len > maxsend_left) {
1181 if (maxsend_left >= 65536 || (
1182 maxsend_left == maxsend &&
1183 maxsend_left >= 128 &&
1184 trgt_out_lx->is_highlatency == 0 &&
1185 !maxsend_forcedelay)) {
1186 len = cor_maxsend_left_to_len(maxsend_left);
1187 } else {
1188 return RC_FLUSH_CONN_OUT_MAXSENT;
1192 if (cor_delay_send(trgt_out_lx, len))
1193 return RC_FLUSH_CONN_OUT_OK;
1195 if (cor_nbcongwin_send_allowed(nb) == 0)
1196 return RC_FLUSH_CONN_OUT_CONG;
1198 if (len > windowlimit) {
1199 trgt_out_lx->trgt.out.lastsend_windowused = 31;
1200 _cor_flush_out_ignore_lowbuf(trgt_out_lx);
1202 if (windowlimit == 0 || cor_seqno_eq(
1203 trgt_out_lx->trgt.out.seqno_nextsend,
1204 trgt_out_lx->trgt.out.seqno_acked) == 0)
1205 return RC_FLUSH_CONN_OUT_OK;
1207 len = windowlimit;
1210 rc = __cor_flush_out(nb, trgt_out_lx, len,sent, &maxsend_left);
1211 if (rc != RC_FLUSH_CONN_OUT_OK)
1212 return rc;
1215 return RC_FLUSH_CONN_OUT_OK;
1218 unsigned long cor_get_conn_idletime(struct cor_conn *trgt_out_lx)
1220 unsigned long jiffies_shifted = jiffies << JIFFIES_LAST_IDLE_SHIFT;
1221 __u32 burst_maxidle_secs;
1222 __u32 burst_maxidle_hz_shifted;
1223 unsigned long idletime_hz_shifted;
1225 if (trgt_out_lx->is_highlatency != 0)
1226 burst_maxidle_secs = BURSTPRIO_MAXIDLETIME_HIGHLATENCY_SECS;
1227 else
1228 burst_maxidle_secs = BURSTPRIO_MAXIDLETIME_LOWLATENCY_SECS;
1230 burst_maxidle_hz_shifted = (burst_maxidle_secs * HZ) <<
1231 JIFFIES_LAST_IDLE_SHIFT;
1233 idletime_hz_shifted = jiffies_shifted -
1234 trgt_out_lx->trgt.out.jiffies_idle_since;
1236 if (unlikely(idletime_hz_shifted > burst_maxidle_hz_shifted)) {
1237 idletime_hz_shifted = burst_maxidle_hz_shifted;
1238 trgt_out_lx->trgt.out.jiffies_idle_since = jiffies_shifted -
1239 burst_maxidle_hz_shifted;
1242 return idletime_hz_shifted;
1245 int __init cor_snd_init(void)
1247 cor_connretrans_slab = kmem_cache_create("cor_connretrans",
1248 sizeof(struct cor_conn_retrans), 8, 0, 0);
1249 if (unlikely(cor_connretrans_slab == 0))
1250 return -ENOMEM;
1252 return 0;
1255 void __exit cor_snd_exit2(void)
1257 kmem_cache_destroy(cor_connretrans_slab);
1258 cor_connretrans_slab = 0;
1261 MODULE_LICENSE("GPL");