2 * Connection oriented routing
3 * Copyright (C) 2007-2021 Michael Blizek
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include <linux/delay.h>
25 static DEFINE_SPINLOCK(cor_neighbor_list_lock
);
26 static LIST_HEAD(cor_nb_list
);
27 static struct kmem_cache
*cor_nb_slab
;
28 atomic_t cor_num_neighs
;
30 static DEFINE_SPINLOCK(cor_connid_gen
);
33 void cor_neighbor_free(struct kref
*ref
)
35 struct cor_neighbor
*nb
= container_of(ref
, struct cor_neighbor
, ref
);
37 WARN_ONCE(list_empty(&(nb
->cmsg_queue_pong
)) == 0,
38 "cor_neighbor_free(): nb->cmsg_queue_pong is not empty");
39 WARN_ONCE(list_empty(&(nb
->cmsg_queue_ack
)) == 0,
40 "cor_neighbor_free(): nb->cmsg_queue_ack is not empty");
41 WARN_ONCE(list_empty(&(nb
->cmsg_queue_ackconn
)) == 0,
42 "cor_neighbor_free(): nb->cmsg_queue_ackconn is not empty");
43 WARN_ONCE(list_empty(&(nb
->cmsg_queue_conndata_lowlat
)) == 0,
44 "cor_neighbor_free(): nb->cmsg_queue_conndata_lowlat is not empty");
45 WARN_ONCE(list_empty(&(nb
->cmsg_queue_conndata_highlat
)) == 0,
46 "cor_neighbor_free(): nb->cmsg_queue_conndata_highlat is not empty");
47 WARN_ONCE(list_empty(&(nb
->cmsg_queue_other
)) == 0,
48 "cor_neighbor_free(): nb->cmsg_queue_other is not empty");
49 WARN_ONCE(nb
->pending_conn_resets_rb
.rb_node
!= 0,
50 "cor_neighbor_free(): nb->pending_conn_resets_rb is not empty");
51 WARN_ONCE(nb
->rb_kp
.in_queue
!= RB_INQUEUE_FALSE
,
52 "cor_neighbor_free(): nb->rb_kp.in_queue is not RB_INQUEUE_FALSE");
53 WARN_ONCE(nb
->rb_cr
.in_queue
!= RB_INQUEUE_FALSE
,
54 "cor_neighbor_free(): nb->rb_cr.in_queue is not RB_INQUEUE_FALSE");
55 WARN_ONCE(nb
->rb
.in_queue
!= RB_INQUEUE_FALSE
,
56 "cor_neighbor_free(): nb->rb.in_queue is not RB_INQUEUE_FALSE");
57 WARN_ONCE(list_empty(&(nb
->conns_waiting
.lh
)) == 0,
58 "cor_neighbor_free(): nb->conns_waiting.lh is not empty");
59 WARN_ONCE(list_empty(&(nb
->conns_waiting
.lh_nextpass
)) == 0,
60 "cor_neighbor_free(): nb->conns_waiting.lh_nextpass is not empty");
61 WARN_ONCE(nb
->str_timer_pending
!= 0,
62 "cor_neighbor_free(): nb->str_timer_pending is not 0");
63 WARN_ONCE(nb
->connid_rb
.rb_node
!= 0,
64 "cor _neighbor_free(): nb->connid_rb is not empty");
65 WARN_ONCE(nb
->connid_reuse_rb
.rb_node
!= 0,
66 "cor_neighbor_free(): nb->connid_reuse_rb is not empty");
67 WARN_ONCE(list_empty(&(nb
->connid_reuse_list
)) == 0,
68 "cor_neighbor_free(): nb->connid_reuse_list is not empty");
69 WARN_ONCE(nb
->kp_retransmits_rb
.rb_node
!= 0,
70 "cor_neighbor_free(): nb->kp_retransmits_rb is not empty");
71 WARN_ONCE(list_empty(&(nb
->rcv_conn_list
)) == 0,
72 "cor_neighbor_free(): nb->rcv_conn_list is not empty");
73 WARN_ONCE(nb
->stalledconn_work_scheduled
!= 0,
74 "cor_neighbor_free(): nb->stalledconn_work_scheduled is not 0");
75 WARN_ONCE(list_empty(&(nb
->stalledconn_list
)) == 0,
76 "cor_neighbor_free(): nb->stalledconn_list is not empty");
77 WARN_ONCE(list_empty(&(nb
->retrans_list
)) == 0,
78 "cor_neighbor_free(): nb->retrans_list is not empty");
79 WARN_ONCE(list_empty(&(nb
->retrans_conn_list
)) == 0,
80 "cor_neighbor_free(): nb->retrans_conn_list is not empty");
82 /* printk(KERN_ERR "neighbor free"); */
83 BUG_ON(nb
->nb_list
.next
!= LIST_POISON1
);
84 BUG_ON(nb
->nb_list
.prev
!= LIST_POISON2
);
92 kref_put(&(nb
->queue
->ref
), cor_free_qos
);
94 kmem_cache_free(cor_nb_slab
, nb
);
95 atomic_dec(&cor_num_neighs
);
98 static void cor_stall_timer(struct work_struct
*work
);
100 static void _cor_reset_neighbor(struct work_struct
*work
);
102 static struct cor_neighbor
*cor_alloc_neighbor(gfp_t allocflags
)
104 struct cor_neighbor
*nb
;
107 if (atomic_inc_return(&cor_num_neighs
) >= MAX_NEIGHBORS
) {
108 atomic_dec(&cor_num_neighs
);
112 nb
= kmem_cache_alloc(cor_nb_slab
, allocflags
);
113 if (unlikely(nb
== 0))
116 memset(nb
, 0, sizeof(struct cor_neighbor
));
118 kref_init(&(nb
->ref
));
119 atomic_set(&(nb
->sessionid_rcv_needed
), 1);
120 atomic_set(&(nb
->sessionid_snd_needed
), 1);
121 timer_setup(&(nb
->cmsg_timer
), cor_controlmsg_timerfunc
, 0);
122 spin_lock_init(&(nb
->cmsg_lock
));
123 INIT_LIST_HEAD(&(nb
->cmsg_queue_pong
));
124 INIT_LIST_HEAD(&(nb
->cmsg_queue_ack
));
125 INIT_LIST_HEAD(&(nb
->cmsg_queue_ackconn
));
126 INIT_LIST_HEAD(&(nb
->cmsg_queue_conndata_lowlat
));
127 INIT_LIST_HEAD(&(nb
->cmsg_queue_conndata_highlat
));
128 INIT_LIST_HEAD(&(nb
->cmsg_queue_other
));
129 atomic_set(&(nb
->cmsg_pongs_retrans_cnt
), 0);
130 atomic_set(&(nb
->cmsg_othercnt
), 0);
131 atomic_set(&(nb
->cmsg_bulk_readds
), 0);
132 atomic_set(&(nb
->cmsg_delay_conndata
), 0);
133 nb
->last_ping_time
= jiffies
;
134 atomic_set(&(nb
->latency_retrans_us
), PING_GUESSLATENCY_MS
*1000);
135 atomic_set(&(nb
->latency_advertised_us
), PING_GUESSLATENCY_MS
*1000);
136 atomic_set(&(nb
->max_remote_ack_delay_us
), 1000000);
137 atomic_set(&(nb
->max_remote_ackconn_delay_us
), 1000000);
138 atomic_set(&(nb
->max_remote_other_delay_us
), 1000000);
139 spin_lock_init(&(nb
->conns_waiting
.lock
));
140 INIT_LIST_HEAD(&(nb
->conns_waiting
.lh
));
141 INIT_LIST_HEAD(&(nb
->conns_waiting
.lh_nextpass
));
142 spin_lock_init(&(nb
->nbcongwin
.lock
));
143 atomic64_set(&(nb
->nbcongwin
.data_intransit
), 0);
144 atomic64_set(&(nb
->nbcongwin
.cwin
), 0);
145 spin_lock_init(&(nb
->state_lock
));
146 nb
->state
= NEIGHBOR_STATE_INITIAL
;
147 nb
->state_time
.initial_state_since
= jiffies
;
148 INIT_DELAYED_WORK(&(nb
->stalltimeout_timer
), cor_stall_timer
);
149 spin_lock_init(&(nb
->connid_lock
));
150 spin_lock_init(&(nb
->connid_reuse_lock
));
151 INIT_LIST_HEAD(&(nb
->connid_reuse_list
));
152 get_random_bytes((char *) &seqno
, sizeof(seqno
));
153 nb
->kpacket_seqno
= seqno
;
154 atomic64_set(&(nb
->priority_sum
), 0);
155 spin_lock_init(&(nb
->conn_list_lock
));
156 INIT_LIST_HEAD(&(nb
->rcv_conn_list
));
157 INIT_LIST_HEAD(&(nb
->stalledconn_list
));
158 spin_lock_init(&(nb
->stalledconn_lock
));
159 INIT_WORK(&(nb
->stalledconn_work
), cor_resume_nbstalled_conns
);
160 INIT_LIST_HEAD(&(nb
->rcv_conn_list
));
161 spin_lock_init(&(nb
->retrans_lock
));
162 INIT_LIST_HEAD(&(nb
->retrans_list
));
163 spin_lock_init(&(nb
->retrans_conn_lock
));
164 INIT_LIST_HEAD(&(nb
->retrans_conn_list
));
165 INIT_WORK(&(nb
->reset_neigh_work
), _cor_reset_neighbor
);
170 int cor_is_from_nb(struct sk_buff
*skb
, struct cor_neighbor
*nb
)
174 char source_hw
[MAX_ADDR_LEN
];
175 memset(source_hw
, 0, MAX_ADDR_LEN
);
176 if (skb
->dev
->header_ops
!= 0 &&
177 skb
->dev
->header_ops
->parse
!= 0)
178 skb
->dev
->header_ops
->parse(skb
, source_hw
);
180 rc
= (skb
->dev
== nb
->dev
&& memcmp(nb
->mac
, source_hw
,
185 struct cor_neighbor
*_cor_get_neigh_by_mac(struct net_device
*dev
,
188 struct list_head
*currlh
;
189 struct cor_neighbor
*ret
= 0;
191 spin_lock_bh(&cor_neighbor_list_lock
);
193 currlh
= cor_nb_list
.next
;
194 while (currlh
!= &cor_nb_list
) {
195 struct cor_neighbor
*curr
= container_of(currlh
,
196 struct cor_neighbor
, nb_list
);
198 if (curr
->dev
== dev
&& memcmp(curr
->mac
, source_hw
,
199 MAX_ADDR_LEN
) == 0) {
201 kref_get(&(ret
->ref
));
205 currlh
= currlh
->next
;
208 spin_unlock_bh(&cor_neighbor_list_lock
);
213 struct cor_neighbor
*cor_get_neigh_by_mac(struct sk_buff
*skb
)
215 char source_hw
[MAX_ADDR_LEN
];
216 memset(source_hw
, 0, MAX_ADDR_LEN
);
217 if (skb
->dev
->header_ops
!= 0 &&
218 skb
->dev
->header_ops
->parse
!= 0)
219 skb
->dev
->header_ops
->parse(skb
, source_hw
);
221 return _cor_get_neigh_by_mac(skb
->dev
, source_hw
);
224 struct cor_neighbor
*cor_find_neigh(char *addr
, __u16 addrlen
)
226 struct list_head
*currlh
;
227 struct cor_neighbor
*ret
= 0;
229 if (addr
== 0 || addrlen
== 0)
232 spin_lock_bh(&cor_neighbor_list_lock
);
234 currlh
= cor_nb_list
.next
;
235 while (currlh
!= &cor_nb_list
) {
236 struct cor_neighbor
*curr
= container_of(currlh
,
237 struct cor_neighbor
, nb_list
);
239 if (curr
->addr
!= 0 && curr
->addrlen
!= 0 &&
240 curr
->addrlen
== addrlen
&&
241 memcmp(curr
->addr
, addr
, addrlen
) == 0) {
243 kref_get(&(ret
->ref
));
248 currlh
= currlh
->next
;
252 spin_unlock_bh(&cor_neighbor_list_lock
);
257 __u32
cor_generate_neigh_list(char *buf
, __u32 buflen
)
259 struct list_head
*currlh
;
263 __u32 buf_offset
= 4;
268 * The variable length header rowcount need to be generated after the
269 * data. This is done by reserving the maximum space they could take. If
270 * they end up being smaller, the data is moved so that there is no gap.
274 BUG_ON(buflen
< buf_offset
);
277 rc
= cor_encode_len(buf
+ buf_offset
, buflen
- buf_offset
, 2);
282 BUG_ON(buflen
< buf_offset
+ 2);
283 cor_put_u16(buf
+ buf_offset
, LIST_NEIGH_FIELD_ADDR
);
286 rc
= cor_encode_len(buf
+ buf_offset
, buflen
- buf_offset
, 0);
291 BUG_ON(buflen
< buf_offset
+ 2);
292 cor_put_u16(buf
+ buf_offset
, LIST_NEIGH_FIELD_LATENCY
);
295 rc
= cor_encode_len(buf
+ buf_offset
, buflen
- buf_offset
, 1);
299 spin_lock_bh(&cor_neighbor_list_lock
);
301 currlh
= cor_nb_list
.next
;
302 while (currlh
!= &cor_nb_list
) {
303 struct cor_neighbor
*curr
= container_of(currlh
,
304 struct cor_neighbor
, nb_list
);
307 state
= cor_get_neigh_state(curr
);
309 if (state
!= NEIGHBOR_STATE_ACTIVE
)
312 BUG_ON((curr
->addr
== 0) != (curr
->addrlen
== 0));
313 if (curr
->addr
== 0 || curr
->addrlen
== 0)
316 if (unlikely(buflen
< buf_offset
+ 4 + curr
->addrlen
+ 1))
320 rc
= cor_encode_len(buf
+ buf_offset
, buflen
- buf_offset
,
325 BUG_ON(curr
->addrlen
> buflen
- buf_offset
);
326 memcpy(buf
+ buf_offset
, curr
->addr
, curr
->addrlen
); /* addr */
327 buf_offset
+= curr
->addrlen
;
329 buf
[buf_offset
] = cor_enc_log_64_11(atomic_read(
330 &(curr
->latency_advertised_us
)));
333 BUG_ON(buf_offset
> buflen
);
338 currlh
= currlh
->next
;
341 spin_unlock_bh(&cor_neighbor_list_lock
);
343 rc
= cor_encode_len(buf
, 4, cnt
);
348 memmove(buf
+ ((__u32
) rc
), buf
+4, buf_offset
);
350 return buf_offset
- 4 + ((__u32
) rc
);
353 static void cor_reset_all_conns(struct cor_neighbor
*nb
)
356 unsigned long iflags
;
357 struct cor_conn
*src_in
;
360 spin_lock_irqsave(&(nb
->conn_list_lock
), iflags
);
362 if (list_empty(&(nb
->rcv_conn_list
))) {
363 spin_unlock_irqrestore(&(nb
->conn_list_lock
), iflags
);
367 src_in
= container_of(nb
->rcv_conn_list
.next
, struct cor_conn
,
369 kref_get(&(src_in
->ref
));
371 spin_unlock_irqrestore(&(nb
->conn_list_lock
), iflags
);
373 if (src_in
->is_client
) {
374 spin_lock_bh(&(src_in
->rcv_lock
));
375 spin_lock_bh(&(src_in
->reversedir
->rcv_lock
));
377 spin_lock_bh(&(src_in
->reversedir
->rcv_lock
));
378 spin_lock_bh(&(src_in
->rcv_lock
));
381 if (unlikely(unlikely(src_in
->sourcetype
!= SOURCE_IN
) ||
382 unlikely(src_in
->source
.in
.nb
!= nb
))) {
387 rc
= cor_send_reset_conn(nb
,
388 src_in
->reversedir
->target
.out
.conn_id
, 1);
390 if (unlikely(rc
!= 0))
393 if (src_in
->reversedir
->isreset
== 0)
394 src_in
->reversedir
->isreset
= 1;
397 if (src_in
->is_client
) {
398 spin_unlock_bh(&(src_in
->rcv_lock
));
399 spin_unlock_bh(&(src_in
->reversedir
->rcv_lock
));
401 spin_unlock_bh(&(src_in
->reversedir
->rcv_lock
));
402 spin_unlock_bh(&(src_in
->rcv_lock
));
406 cor_reset_conn(src_in
);
407 kref_put(&(src_in
->ref
), cor_free_conn
);
409 kref_put(&(src_in
->ref
), cor_free_conn
);
410 kref_get(&(nb
->ref
));
411 schedule_delayed_work(&(nb
->stalltimeout_timer
), HZ
);
417 static void cor_delete_connid_reuse_items(struct cor_neighbor
*nb
);
419 static void _cor_reset_neighbor(struct work_struct
*work
)
421 struct cor_neighbor
*nb
= container_of(work
, struct cor_neighbor
,
424 cor_reset_all_conns(nb
);
425 cor_delete_connid_reuse_items(nb
);
427 kref_put(&(nb
->ref
), cor_neighbor_free
);
430 static void cor_reset_neighbor(struct cor_neighbor
*nb
, int use_workqueue
)
433 unsigned long iflags
;
435 spin_lock_irqsave(&(nb
->state_lock
), iflags
);
436 removenblist
= (nb
->state
!= NEIGHBOR_STATE_KILLED
);
437 nb
->state
= NEIGHBOR_STATE_KILLED
;
438 spin_unlock_irqrestore(&(nb
->state_lock
), iflags
);
440 /* if (removenblist) {
441 printk(KERN_ERR "cor_reset_neighbor");
446 schedule_work(&(nb
->reset_neigh_work
));
447 kref_get(&(nb
->ref
));
449 cor_reset_all_conns(nb
);
450 cor_delete_connid_reuse_items(nb
);
454 spin_lock_bh(&cor_neighbor_list_lock
);
455 list_del(&(nb
->nb_list
));
456 spin_unlock_bh(&cor_neighbor_list_lock
);
458 kref_put(&(nb
->ref
), cor_neighbor_free
); /* nb_list */
462 void cor_reset_neighbors(struct net_device
*dev
)
464 struct list_head
*currlh
;
467 spin_lock_bh(&cor_neighbor_list_lock
);
469 currlh
= cor_nb_list
.next
;
470 while (currlh
!= &cor_nb_list
) {
471 unsigned long iflags
;
472 struct cor_neighbor
*currnb
= container_of(currlh
,
473 struct cor_neighbor
, nb_list
);
476 if (dev
!= 0 && currnb
->dev
!= dev
)
479 spin_lock_irqsave(&(currnb
->state_lock
), iflags
);
480 state
= currnb
->state
;
481 spin_unlock_irqrestore(&(currnb
->state_lock
), iflags
);
483 if (state
!= NEIGHBOR_STATE_KILLED
) {
484 spin_unlock_bh(&cor_neighbor_list_lock
);
485 cor_reset_neighbor(currnb
, 0);
490 currlh
= currlh
->next
;
493 spin_unlock_bh(&cor_neighbor_list_lock
);
496 static void cor_stall_timer(struct work_struct
*work
)
498 struct cor_neighbor
*nb
= container_of(to_delayed_work(work
),
499 struct cor_neighbor
, stalltimeout_timer
);
504 unsigned long iflags
;
506 spin_lock_irqsave(&(nb
->state_lock
), iflags
);
508 spin_unlock_irqrestore(&(nb
->state_lock
), iflags
);
510 if (nbstate
== NEIGHBOR_STATE_STALLED
) {
511 stall_time_ms
= jiffies_to_msecs(jiffies
-
512 nb
->state_time
.last_roundtrip
);
514 if (stall_time_ms
< NB_KILL_TIME_MS
) {
515 schedule_delayed_work(&(nb
->stalltimeout_timer
),
516 msecs_to_jiffies(NB_KILL_TIME_MS
-
521 cor_reset_neighbor(nb
, 1);
524 nb
->str_timer_pending
= 0;
525 kref_put(&(nb
->ref
), cor_neighbor_free
); /* cor_stall_timer */
528 int cor_get_neigh_state(struct cor_neighbor
*nb
)
531 unsigned long iflags
;
536 spin_lock_irqsave(&(nb
->state_lock
), iflags
);
538 stall_time_ms
= jiffies_to_msecs(jiffies
-
539 nb
->state_time
.last_roundtrip
);
541 if (likely(nb
->state
== NEIGHBOR_STATE_ACTIVE
) &&
542 unlikely(stall_time_ms
> NB_STALL_TIME_MS
) && (
543 nb
->ping_intransit
>= NB_STALL_MINPINGS
||
544 nb
->ping_intransit
>= PING_COOKIES_PER_NEIGH
)) {
545 nb
->state
= NEIGHBOR_STATE_STALLED
;
546 nb
->ping_success
= 0;
547 if (nb
->str_timer_pending
== 0) {
548 nb
->str_timer_pending
= 1;
549 kref_get(&(nb
->ref
));
551 schedule_delayed_work(&(nb
->stalltimeout_timer
),
552 msecs_to_jiffies(NB_KILL_TIME_MS
-
556 /* printk(KERN_ERR "changed to stalled"); */
557 BUG_ON(nb
->ping_intransit
> PING_COOKIES_PER_NEIGH
);
558 } else if (unlikely(nb
->state
== NEIGHBOR_STATE_INITIAL
) &&
559 time_after(jiffies
, nb
->state_time
.initial_state_since
+
560 INITIAL_TIME_LIMIT_SEC
* HZ
)) {
561 spin_unlock_irqrestore(&(nb
->state_lock
), iflags
);
562 cor_reset_neighbor(nb
, 1);
563 return NEIGHBOR_STATE_KILLED
;
568 spin_unlock_irqrestore(&(nb
->state_lock
), iflags
);
573 static struct cor_ping_cookie
*cor_find_cookie(struct cor_neighbor
*nb
,
578 for(i
=0;i
<PING_COOKIES_PER_NEIGH
;i
++) {
579 if (nb
->cookies
[i
].cookie
== cookie
)
580 return &(nb
->cookies
[i
]);
585 static void cor_reset_cookie(struct cor_neighbor
*nb
, struct cor_ping_cookie
*c
)
590 if (nb
->cookie_unsent
!= c
->cookie
)
591 nb
->ping_intransit
--;
596 static __u32
sqrt(__u64 x
)
601 if (unlikely(x
<= 1))
605 y
= y
/2 + div64_u64(x
/2, y
);
606 if (unlikely(y
== 0))
610 if (unlikely(y
> U32_MAX
))
616 static __u32
cor_calc_newlatency(struct cor_neighbor
*nb_statelocked
,
617 __u32 oldlatency_us
, __s64 newlatency_ns
)
619 __s64 oldlatency
= oldlatency_us
* 1000LL;
622 if (unlikely(unlikely(nb_statelocked
->state
== NEIGHBOR_STATE_INITIAL
)&&
623 nb_statelocked
->ping_success
< 16))
624 newlatency
= div64_s64(
625 oldlatency
* nb_statelocked
->ping_success
+
627 nb_statelocked
->ping_success
+ 1);
629 newlatency
= (oldlatency
* 15 + newlatency_ns
) / 16;
631 newlatency
= div_s64(newlatency
+ 500, 1000);
633 if (unlikely(newlatency
< 0))
635 if (unlikely(newlatency
> U32_MAX
))
636 newlatency
= U32_MAX
;
638 return (__u32
) newlatency
;
641 static void cor_update_nb_latency(struct cor_neighbor
*nb_statelocked
,
642 struct cor_ping_cookie
*c
, __u32 respdelay
)
644 ktime_t now
= ktime_get();
646 __s64 pinglatency_retrans_ns
= ktime_to_ns(now
) -
647 ktime_to_ns(c
->time_sent
) - respdelay
* 1000LL;
648 __s64 pinglatency_advertised_ns
= ktime_to_ns(now
) -
649 ktime_to_ns(c
->time_created
) - respdelay
* 1000LL;
651 __u32 oldlatency_retrans_us
=
652 atomic_read(&(nb_statelocked
->latency_retrans_us
));
654 __u32 newlatency_retrans_us
= cor_calc_newlatency(nb_statelocked
,
655 oldlatency_retrans_us
, pinglatency_retrans_ns
);
657 atomic_set(&(nb_statelocked
->latency_retrans_us
),
658 newlatency_retrans_us
);
660 if (unlikely(unlikely(nb_statelocked
->state
== NEIGHBOR_STATE_INITIAL
)&&
661 nb_statelocked
->ping_success
< 16)) {
662 nb_statelocked
->latency_variance_retrans_us
=
663 ((__u64
) newlatency_retrans_us
) *
664 newlatency_retrans_us
;
665 atomic_set(&(nb_statelocked
->latency_stddev_retrans_us
), sqrt(
666 nb_statelocked
->latency_variance_retrans_us
));
667 } else if (pinglatency_retrans_ns
> oldlatency_retrans_us
*
669 __s64 newdiff
= div_s64(pinglatency_retrans_ns
-
670 oldlatency_retrans_us
* ((__s64
) 1000), 1000);
671 __u32 newdiff32
= (__u32
) (unlikely(newdiff
>= U32_MAX
) ?
673 __u64 newvar
= ((__u64
) newdiff32
) * newdiff32
;
675 __u64 oldval
= nb_statelocked
->latency_variance_retrans_us
;
677 if (unlikely(unlikely(newvar
> (1LL << 55)) || unlikely(
678 oldval
> (1LL << 55)))) {
679 nb_statelocked
->latency_variance_retrans_us
=
680 (oldval
/ 16) * 15 + newvar
/16;
682 nb_statelocked
->latency_variance_retrans_us
=
683 (oldval
* 15 + newvar
) / 16;
686 atomic_set(&(nb_statelocked
->latency_stddev_retrans_us
), sqrt(
687 nb_statelocked
->latency_variance_retrans_us
));
690 atomic_set(&(nb_statelocked
->latency_advertised_us
),
691 cor_calc_newlatency(nb_statelocked
,
692 atomic_read(&(nb_statelocked
->latency_advertised_us
)),
693 pinglatency_advertised_ns
));
695 nb_statelocked
->last_roundtrip_end
= now
;
698 static void cor_connid_used_pingsuccess(struct cor_neighbor
*nb
);
700 void cor_ping_resp(struct cor_neighbor
*nb
, __u32 cookie
, __u32 respdelay
)
702 unsigned long iflags
;
704 struct cor_ping_cookie
*c
;
706 int stalledresume
= 0;
708 int call_connidreuse
= 0;
710 if (unlikely(cookie
== 0))
713 spin_lock_irqsave(&(nb
->state_lock
), iflags
);
715 c
= cor_find_cookie(nb
, cookie
);
717 if (unlikely(c
== 0))
720 atomic_set(&(nb
->sessionid_snd_needed
), 0);
722 call_connidreuse
= ktime_before_eq(nb
->last_roundtrip_end
,
725 cor_update_nb_latency(nb
, c
, respdelay
);
729 cor_reset_cookie(nb
, c
);
731 for(i
=0;i
<PING_COOKIES_PER_NEIGH
;i
++) {
732 if (nb
->cookies
[i
].cookie
!= 0 && ktime_before(
733 nb
->cookies
[i
].time_created
, c
->time_created
)) {
734 nb
->cookies
[i
].pongs
++;
735 if (nb
->cookies
[i
].pongs
>= PING_PONGLIMIT
) {
736 cor_reset_cookie(nb
, &(nb
->cookies
[i
]));
741 if (unlikely(nb
->state
== NEIGHBOR_STATE_INITIAL
||
742 nb
->state
== NEIGHBOR_STATE_STALLED
)) {
743 call_connidreuse
= 0;
745 if ((nb
->state
== NEIGHBOR_STATE_INITIAL
&&
746 nb
->ping_success
>= PING_SUCCESS_CNT_INIT
) || (
747 nb
->state
== NEIGHBOR_STATE_STALLED
&&
748 nb
->ping_success
>= PING_SUCCESS_CNT_STALLED
)) {
749 stalledresume
= (nb
->state
== NEIGHBOR_STATE_STALLED
);
750 nb
->state
= NEIGHBOR_STATE_ACTIVE
;
751 /* printk(KERN_ERR "changed to active"); */
755 if (likely(nb
->state
== NEIGHBOR_STATE_ACTIVE
) ||
756 nb
->state
== NEIGHBOR_STATE_STALLED
)
757 nb
->state_time
.last_roundtrip
= c
->jiffies_sent
;
760 spin_unlock_irqrestore(&(nb
->state_lock
), iflags
);
762 if (call_connidreuse
)
763 cor_connid_used_pingsuccess(nb
);
765 if (unlikely(stalledresume
)) {
766 spin_lock_bh(&(nb
->retrans_conn_lock
));
767 cor_reschedule_conn_retrans_timer(nb
);
768 spin_unlock_bh(&(nb
->retrans_conn_lock
));
770 spin_lock_bh(&(nb
->stalledconn_lock
));
771 if (nb
->stalledconn_work_scheduled
== 0) {
772 kref_get(&(nb
->ref
)),
773 schedule_work(&(nb
->stalledconn_work
));
774 nb
->stalledconn_work_scheduled
= 1;
776 spin_unlock_bh(&(nb
->stalledconn_lock
));
780 __u32
cor_add_ping_req(struct cor_neighbor
*nb
, unsigned long *last_ping_time
)
782 unsigned long iflags
;
783 struct cor_ping_cookie
*c
;
788 ktime_t now
= ktime_get();
790 spin_lock_irqsave(&(nb
->state_lock
), iflags
);
792 if (nb
->cookie_unsent
!= 0) {
793 c
= cor_find_cookie(nb
, nb
->cookie_unsent
);
797 nb
->cookie_unsent
= 0;
800 c
= cor_find_cookie(nb
, 0);
804 get_random_bytes((char *) &i
, sizeof(i
));
805 i
= (i
% PING_COOKIES_PER_NEIGH
);
806 c
= &(nb
->cookies
[i
]);
807 cor_reset_cookie(nb
, c
);
811 if (unlikely(nb
->lastcookie
== 0))
813 c
->cookie
= nb
->lastcookie
;
814 c
->time_created
= now
;
819 c
->jiffies_sent
= jiffies
;
822 nb
->ping_intransit
++;
824 *last_ping_time
= nb
->last_ping_time
;
825 nb
->last_ping_time
= c
->jiffies_sent
;
827 spin_unlock_irqrestore(&(nb
->state_lock
), iflags
);
834 void cor_ping_sent(struct cor_neighbor
*nb
, __u32 cookie
)
836 unsigned long iflags
;
840 spin_lock_irqsave(&(nb
->state_lock
), iflags
);
842 if (nb
->cookie_unsent
== cookie
)
843 nb
->cookie_unsent
= 0;
845 spin_unlock_irqrestore(&(nb
->state_lock
), iflags
);
848 void cor_unadd_ping_req(struct cor_neighbor
*nb
, __u32 cookie
,
849 unsigned long last_ping_time
, int congested
)
851 unsigned long iflags
;
853 struct cor_ping_cookie
*c
;
857 spin_lock_irqsave(&(nb
->state_lock
), iflags
);
860 BUG_ON(nb
->cookie_unsent
!= 0 && nb
->cookie_unsent
!= cookie
);
861 nb
->cookie_unsent
= cookie
;
864 c
= cor_find_cookie(nb
, cookie
);
865 if (likely(c
!= 0)) {
868 nb
->ping_intransit
--;
871 nb
->last_ping_time
= last_ping_time
;
873 spin_unlock_irqrestore(&(nb
->state_lock
), iflags
);
876 static int cor_get_ping_forcetime_ms(struct cor_neighbor
*nb
)
878 unsigned long iflags
;
882 if (unlikely(cor_get_neigh_state(nb
) != NEIGHBOR_STATE_ACTIVE
))
883 return PING_FORCETIME_MS
;
885 spin_lock_irqsave(&(nb
->state_lock
), iflags
);
886 fast
= ((nb
->ping_success
< PING_ACTIVE_FASTINITIAL_COUNT
) ||
887 (nb
->ping_intransit
> 0));
888 spin_unlock_irqrestore(&(nb
->state_lock
), iflags
);
891 return PING_FORCETIME_ACTIVE_FAST_MS
;
893 spin_lock_irqsave(&(nb
->conn_list_lock
), iflags
);
894 idle
= list_empty(&(nb
->rcv_conn_list
));
895 spin_unlock_irqrestore(&(nb
->conn_list_lock
), iflags
);
898 return PING_FORCETIME_ACTIVEIDLE_MS
;
900 return PING_FORCETIME_ACTIVE_MS
;
903 static __u32
cor_get_ping_mindelay_ms(struct cor_neighbor
*nb_statelocked
)
905 __u32 latency_us
= ((__u32
) atomic_read(
906 &(nb_statelocked
->latency_advertised_us
)));
907 __u32 max_remote_other_delay_us
= ((__u32
) atomic_read(
908 &(nb_statelocked
->max_remote_other_delay_us
)));
911 if (latency_us
< PING_GUESSLATENCY_MS
* 1000)
912 latency_us
= PING_GUESSLATENCY_MS
* 1000;
914 if (unlikely(nb_statelocked
->state
!= NEIGHBOR_STATE_ACTIVE
))
915 mindelay_ms
= latency_us
/1000;
917 mindelay_ms
= ((latency_us
/2 +
918 max_remote_other_delay_us
/2)/500);
920 if (likely(nb_statelocked
->ping_intransit
< PING_COOKIES_THROTTLESTART
))
923 mindelay_ms
= mindelay_ms
* (1 + 9 * (nb_statelocked
->ping_intransit
*
924 nb_statelocked
->ping_intransit
/
925 (PING_COOKIES_PER_NEIGH
* PING_COOKIES_PER_NEIGH
)));
931 * Check whether we want to send a ping now:
932 * 0... Do not send ping.
933 * 1... Send ping now, but only if it can be merged with other messages. This
934 * can happen way before the time requested by cor_get_next_ping_time().
935 * 2... Send ping now, even if a packet has to be created just for the ping
938 int cor_time_to_send_ping(struct cor_neighbor
*nb
)
940 unsigned long iflags
;
941 int rc
= TIMETOSENDPING_YES
;
943 __u32 ms_since_last_ping
;
945 __u32 forcetime
= cor_get_ping_forcetime_ms(nb
);
948 spin_lock_irqsave(&(nb
->state_lock
), iflags
);
950 ms_since_last_ping
= jiffies_to_msecs(jiffies
- nb
->last_ping_time
);
952 mindelay
= cor_get_ping_mindelay_ms(nb
);
954 if (forcetime
< (mindelay
* 3))
955 forcetime
= mindelay
* 3;
956 else if (forcetime
> (mindelay
* 3))
957 mindelay
= forcetime
/3;
959 if (ms_since_last_ping
< mindelay
|| ms_since_last_ping
< (forcetime
/4))
960 rc
= TIMETOSENDPING_NO
;
961 else if (ms_since_last_ping
>= forcetime
)
962 rc
= TIMETOSENDPING_FORCE
;
964 spin_unlock_irqrestore(&(nb
->state_lock
), iflags
);
969 unsigned long cor_get_next_ping_time(struct cor_neighbor
*nb
)
971 unsigned long iflags
;
973 __u32 forcetime
= cor_get_ping_forcetime_ms(nb
);
976 spin_lock_irqsave(&(nb
->state_lock
), iflags
);
977 mindelay
= cor_get_ping_mindelay_ms(nb
);
978 spin_unlock_irqrestore(&(nb
->state_lock
), iflags
);
980 if (forcetime
< (mindelay
* 3))
981 forcetime
= mindelay
* 3;
983 return nb
->last_ping_time
+ msecs_to_jiffies(forcetime
);
986 void cor_add_neighbor(struct cor_neighbor_discdata
*nb_dd
)
988 struct cor_neighbor
*nb
;
989 struct list_head
*currlh
;
991 nb
= cor_alloc_neighbor(GFP_KERNEL
);
992 if (unlikely(nb
== 0))
995 nb
->queue
= cor_get_queue(nb_dd
->dev
);
996 if (nb
->queue
== 0) {
997 kmem_cache_free(cor_nb_slab
, nb
);
998 atomic_dec(&cor_num_neighs
);
1002 dev_hold(nb_dd
->dev
);
1003 nb
->dev
= nb_dd
->dev
;
1005 memcpy(nb
->mac
, nb_dd
->mac
, MAX_ADDR_LEN
);
1007 nb
->addr
= nb_dd
->addr
;
1008 nb
->addrlen
= nb_dd
->addrlen
;
1010 nb_dd
->nb_allocated
= 1;
1014 spin_lock_bh(&cor_neighbor_list_lock
);
1016 BUG_ON((nb
->addr
== 0) != (nb
->addrlen
== 0));
1018 if (cor_is_clientmode() && (nb
->addr
== 0|| nb
->addrlen
== 0))
1019 goto already_present
;
1021 currlh
= cor_nb_list
.next
;
1022 while (currlh
!= &cor_nb_list
) {
1023 struct cor_neighbor
*curr
= container_of(currlh
,
1024 struct cor_neighbor
, nb_list
);
1026 BUG_ON((curr
->addr
== 0) != (curr
->addrlen
== 0));
1028 if (curr
->dev
== nb
->dev
&&
1029 memcmp(curr
->mac
, nb
->mac
, MAX_ADDR_LEN
) == 0)
1030 goto already_present
;
1032 if (curr
->addr
!= 0 && curr
->addrlen
!= 0 &&
1033 nb
->addr
!= 0 && nb
->addrlen
!= 0 &&
1034 curr
->addrlen
== nb
->addrlen
&&
1035 memcmp(curr
->addr
, nb
->addr
, curr
->addrlen
) ==0)
1036 goto already_present
;
1038 currlh
= currlh
->next
;
1041 /* printk(KERN_ERR "add_neigh"); */
1043 spin_lock_bh(&cor_local_addr_lock
);
1044 nb
->sessionid
= cor_local_addr_sessionid
^ nb_dd
->sessionid
;
1045 spin_unlock_bh(&cor_local_addr_lock
);
1047 timer_setup(&(nb
->retrans_timer
), cor_retransmit_timerfunc
, 0);
1049 timer_setup(&(nb
->retrans_conn_timer
), cor_retransmit_conn_timerfunc
, 0);
1051 spin_lock_bh(&(nb
->cmsg_lock
));
1052 nb
->last_ping_time
= jiffies
;
1053 cor_schedule_controlmsg_timer(nb
);
1054 spin_unlock_bh(&(nb
->cmsg_lock
));
1056 list_add_tail(&(nb
->nb_list
), &cor_nb_list
);
1060 kmem_cache_free(cor_nb_slab
, nb
);
1061 atomic_dec(&cor_num_neighs
);
1064 spin_unlock_bh(&cor_neighbor_list_lock
);
1067 struct cor_conn
*cor_get_conn(struct cor_neighbor
*nb
, __u32 conn_id
)
1069 struct rb_node
* n
= 0;
1070 struct cor_conn
*ret
= 0;
1072 spin_lock_bh(&(nb
->connid_lock
));
1074 n
= nb
->connid_rb
.rb_node
;
1076 while (likely(n
!= 0) && ret
== 0) {
1077 struct cor_conn
*src_in_o
= container_of(n
, struct cor_conn
,
1080 BUG_ON(src_in_o
->sourcetype
!= SOURCE_IN
);
1082 if (conn_id
< src_in_o
->source
.in
.conn_id
)
1084 else if (conn_id
> src_in_o
->source
.in
.conn_id
)
1091 kref_get(&(ret
->ref
));
1093 spin_unlock_bh(&(nb
->connid_lock
));
1098 int cor_insert_connid(struct cor_neighbor
*nb
, struct cor_conn
*src_in_ll
)
1102 __u32 conn_id
= src_in_ll
->source
.in
.conn_id
;
1104 struct rb_root
*root
;
1106 struct rb_node
*parent
= 0;
1108 BUG_ON(src_in_ll
->sourcetype
!= SOURCE_IN
);
1110 spin_lock_bh(&(nb
->connid_lock
));
1112 root
= &(nb
->connid_rb
);
1113 p
= &(root
->rb_node
);
1116 struct cor_conn
*src_in_o
= container_of(*p
, struct cor_conn
,
1119 BUG_ON(src_in_o
->sourcetype
!= SOURCE_IN
);
1122 if (unlikely(conn_id
== src_in_o
->source
.in
.conn_id
)) {
1124 } else if (conn_id
< src_in_o
->source
.in
.conn_id
) {
1126 } else if (conn_id
> src_in_o
->source
.in
.conn_id
) {
1127 p
= &(*p
)->rb_right
;
1133 kref_get(&(src_in_ll
->ref
));
1134 rb_link_node(&(src_in_ll
->source
.in
.rbn
), parent
, p
);
1135 rb_insert_color(&(src_in_ll
->source
.in
.rbn
), root
);
1142 spin_unlock_bh(&(nb
->connid_lock
));
1147 static struct cor_connid_reuse_item
*cor_get_connid_reuseitem(
1148 struct cor_neighbor
*nb
, __u32 conn_id
)
1150 struct rb_node
*n
= 0;
1151 struct cor_connid_reuse_item
*ret
= 0;
1153 spin_lock_bh(&(nb
->connid_reuse_lock
));
1155 n
= nb
->connid_reuse_rb
.rb_node
;
1157 while (likely(n
!= 0) && ret
== 0) {
1158 struct cor_connid_reuse_item
*cir
= container_of(n
,
1159 struct cor_connid_reuse_item
, rbn
);
1161 BUG_ON(cir
->conn_id
== 0);
1163 if (conn_id
< cir
->conn_id
)
1165 else if (conn_id
> cir
->conn_id
)
1172 kref_get(&(ret
->ref
));
1174 spin_unlock_bh(&(nb
->connid_reuse_lock
));
1179 /* nb->connid_reuse_lock must be held by the caller */
1180 void cor_insert_connid_reuse(struct cor_neighbor
*nb
,
1181 struct cor_connid_reuse_item
*ins
)
1183 struct rb_root
*root
;
1185 struct rb_node
*parent
= 0;
1187 BUG_ON(ins
->conn_id
== 0);
1189 root
= &(nb
->connid_reuse_rb
);
1190 p
= &(root
->rb_node
);
1193 struct cor_connid_reuse_item
*curr
= container_of(*p
,
1194 struct cor_connid_reuse_item
, rbn
);
1196 BUG_ON(curr
->conn_id
== 0);
1199 if (unlikely(ins
->conn_id
== curr
->conn_id
)) {
1201 } else if (ins
->conn_id
< curr
->conn_id
) {
1203 } else if (ins
->conn_id
> curr
->conn_id
) {
1204 p
= &(*p
)->rb_right
;
1210 kref_get(&(ins
->ref
));
1211 rb_link_node(&(ins
->rbn
), parent
, p
);
1212 rb_insert_color(&(ins
->rbn
), root
);
1215 static void cor_free_connid_reuse(struct kref
*ref
)
1217 struct cor_connid_reuse_item
*cir
= container_of(ref
,
1218 struct cor_connid_reuse_item
, ref
);
1220 kmem_cache_free(cor_connid_reuse_slab
, cir
);
1223 static void cor_delete_connid_reuse_items(struct cor_neighbor
*nb
)
1225 struct cor_connid_reuse_item
*cri
;
1227 spin_lock_bh(&(nb
->connid_reuse_lock
));
1229 while (list_empty(&(nb
->connid_reuse_list
)) == 0) {
1230 cri
= container_of(nb
->connid_reuse_list
.next
,
1231 struct cor_connid_reuse_item
, lh
);
1233 rb_erase(&(cri
->rbn
), &(nb
->connid_reuse_rb
));
1234 kref_put(&(cri
->ref
), cor_kreffree_bug
);
1236 list_del(&(cri
->lh
));
1237 kref_put(&(cri
->ref
), cor_free_connid_reuse
);
1240 spin_unlock_bh(&(nb
->connid_reuse_lock
));
1243 static void cor_connid_used_pingsuccess(struct cor_neighbor
*nb
)
1245 struct cor_connid_reuse_item
*cri
;
1247 spin_lock_bh(&(nb
->connid_reuse_lock
));
1249 nb
->connid_reuse_pingcnt
++;
1250 while (list_empty(&(nb
->connid_reuse_list
)) == 0) {
1251 cri
= container_of(nb
->connid_reuse_list
.next
,
1252 struct cor_connid_reuse_item
, lh
);
1253 if ((cri
->pingcnt
+ CONNID_REUSE_RTTS
-
1254 nb
->connid_reuse_pingcnt
) < 32768)
1257 rb_erase(&(cri
->rbn
), &(nb
->connid_reuse_rb
));
1258 kref_put(&(cri
->ref
), cor_kreffree_bug
);
1260 list_del(&(cri
->lh
));
1261 kref_put(&(cri
->ref
), cor_free_connid_reuse
);
1264 spin_unlock_bh(&(nb
->connid_reuse_lock
));
1267 static int cor_connid_used(struct cor_neighbor
*nb
, __u32 conn_id
)
1269 struct cor_conn
*cn
;
1270 struct cor_connid_reuse_item
*cir
;
1272 cn
= cor_get_conn(nb
, conn_id
);
1273 if (unlikely(cn
!= 0)) {
1274 kref_put(&(cn
->ref
), cor_free_conn
);
1278 cir
= cor_get_connid_reuseitem(nb
, conn_id
);
1279 if (unlikely(cir
!= 0)) {
1280 kref_put(&(cir
->ref
), cor_free_connid_reuse
);
1287 int cor_connid_alloc(struct cor_neighbor
*nb
, struct cor_conn
*src_in_ll
)
1292 BUG_ON(src_in_ll
->sourcetype
!= SOURCE_IN
);
1293 BUG_ON(src_in_ll
->reversedir
->targettype
!= TARGET_OUT
);
1295 spin_lock_bh(&cor_connid_gen
);
1296 for (i
=0;i
<16;i
++) {
1298 get_random_bytes((char *) &conn_id
, sizeof(conn_id
));
1299 conn_id
= (conn_id
& ~(1 << 31));
1301 if (unlikely(conn_id
== 0))
1304 if (unlikely(cor_connid_used(nb
, conn_id
)))
1309 spin_unlock_bh(&cor_connid_gen
);
1314 src_in_ll
->source
.in
.conn_id
= conn_id
;
1315 src_in_ll
->reversedir
->target
.out
.conn_id
= (conn_id
| (1 << 31));
1316 if (unlikely(cor_insert_connid(nb
, src_in_ll
) != 0)) {
1319 spin_unlock_bh(&cor_connid_gen
);
1323 int __init
cor_neighbor_init(void)
1325 cor_nb_slab
= kmem_cache_create("cor_neighbor",
1326 sizeof(struct cor_neighbor
), 8, 0, 0);
1327 if (unlikely(cor_nb_slab
== 0))
1330 atomic_set(&cor_num_neighs
, 0);
1335 void __exit
cor_neighbor_exit2(void)
1337 BUG_ON(atomic_read(&cor_num_neighs
) != 0);
1339 kmem_cache_destroy(cor_nb_slab
);
1343 MODULE_LICENSE("GPL");