set credits on new connections
[cor_2_6_31.git] / net / cor / neighbor.c
blob0219501a3aaf80b34420b335a062a2dfb9d0a069
1 /*
2 * Connection oriented routing
3 * Copyright (C) 2007-2010 Michael Blizek
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301, USA.
21 #include "cor.h"
23 /**
24 * Splited packet data format:
25 * announce proto version [4]
26 * is 0, may be increased if format changes
27 * packet version [4]
28 * starts with 0, increments every time the data field changes
29 * total size [4]
30 * total data size of all merged packets
31 * offset [4]
32 * used to determine the order when merging the split packet
33 * unit is bytes
34 * [data]
35 * commulative checksum [8] (not yet)
36 * chunk 1 contains the checksum of the data in chunk 1
37 * chunk 2 contains the checksum of the data in chunk 1+2
38 * ...
40 * Data format of the announce packet "data" field:
41 * min_announce_proto_version [4]
42 * max_announce_proto_version [4]
43 * min_cor_proto_version [4]
44 * max_cor_proto_version [4]
45 * versions which are understood
47 * command [4]
48 * commandlength [4]
49 * commanddata [commandlength]
52 /* Commands */
54 #define NEIGHCMD_ADDADDR 1
56 /**
57 * Parameter:
58 * addrtypelen [2]
59 * addrtype [addrtypelen]
60 * addrlen [2]
61 * addr [addrlen]
65 DEFINE_MUTEX(neighbor_operation_lock);
67 char *addrtype = "id";
68 char *addr;
69 int addrlen;
72 LIST_HEAD(nb_list);
73 struct kmem_cache *nb_slab;
75 LIST_HEAD(announce_out_list);
77 struct notifier_block netdev_notify;
80 #define ADDRTYPE_UNKNOWN 0
81 #define ADDRTYPE_ID 1
83 static int get_addrtype(__u32 addrtypelen, char *addrtype)
85 if (addrtypelen == 2 &&
86 (addrtype[0] == 'i' || addrtype[0] == 'I') &&
87 (addrtype[1] == 'd' || addrtype[1] == 'D'))
88 return ADDRTYPE_ID;
90 return ADDRTYPE_UNKNOWN;
93 void neighbor_free(struct kref *ref)
95 struct neighbor *nb = container_of(ref, struct neighbor, ref);
96 printk(KERN_ERR "neighbor free");
97 BUG_ON(nb->nb_list.next != LIST_POISON1);
98 BUG_ON(nb->nb_list.prev != LIST_POISON2);
99 if (nb->addr != 0)
100 kfree(nb->addr);
101 nb->addr = 0;
102 if (nb->dev != 0)
103 dev_put(nb->dev);
104 nb->dev = 0;
105 kmem_cache_free(nb_slab, nb);
108 static struct neighbor *alloc_neighbor(gfp_t allocflags)
110 struct neighbor *nb = kmem_cache_alloc(nb_slab, allocflags);
111 __u32 seqno;
113 if (unlikely(nb == 0))
114 return 0;
116 memset(nb, 0, sizeof(struct neighbor));
118 kref_init(&(nb->ref));
119 mutex_init(&(nb->cmsg_lock));
120 INIT_LIST_HEAD(&(nb->control_msgs_out));
121 INIT_LIST_HEAD(&(nb->ucontrol_msgs_out));
122 nb->last_ping_time = jiffies;
123 atomic_set(&(nb->ooo_packets), 0);
124 spin_lock_init(&(nb->credits_lock));
125 nb->jiffies_credit_update = nb->last_ping_time;
126 nb->jiffies_credit_decay = nb->last_ping_time;
127 get_random_bytes((char *) &seqno, sizeof(seqno));
128 mutex_init(&(nb->pingcookie_lock));
129 atomic_set(&(nb->latency), 1000000);
130 atomic_set(&(nb->max_remote_cmsg_delay), 1000000);
131 spin_lock_init(&(nb->state_lock));
132 atomic_set(&(nb->kpacket_seqno), seqno);
133 mutex_init(&(nb->conn_list_lock));
134 INIT_LIST_HEAD(&(nb->rcv_conn_list));
135 INIT_LIST_HEAD(&(nb->snd_conn_list));
136 spin_lock_init(&(nb->retrans_lock));
137 INIT_LIST_HEAD(&(nb->retrans_list));
138 INIT_LIST_HEAD(&(nb->retrans_list_conn));
140 return nb;
143 struct neighbor *get_neigh_by_mac(struct sk_buff *skb)
145 struct list_head *currlh;
146 struct neighbor *ret = 0;
149 char source_hw[MAX_ADDR_LEN];
150 memset(source_hw, 0, MAX_ADDR_LEN);
151 if (skb->dev->header_ops != 0 &&
152 skb->dev->header_ops->parse != 0)
153 skb->dev->header_ops->parse(skb, source_hw);
155 mutex_lock(&(neighbor_operation_lock));
157 currlh = nb_list.next;
159 while (currlh != &nb_list) {
160 struct neighbor *curr = container_of(currlh, struct neighbor,
161 nb_list);
163 if (memcmp(curr->mac, source_hw, MAX_ADDR_LEN) == 0) {
164 ret = curr;
165 kref_get(&(ret->ref));
166 break;
169 currlh = currlh->next;
172 mutex_unlock(&(neighbor_operation_lock));
174 return ret;
177 struct neighbor *find_neigh(__u16 addrtypelen, __u8 *addrtype,
178 __u16 addrlen, __u8 *addr)
180 struct list_head *currlh;
181 struct neighbor *ret = 0;
183 if (get_addrtype(addrtypelen, addrtype) != ADDRTYPE_ID)
184 return 0;
186 mutex_lock(&(neighbor_operation_lock));
188 currlh = nb_list.next;
190 while (currlh != &nb_list) {
191 struct neighbor *curr = container_of(currlh, struct neighbor,
192 nb_list);
194 if (curr->addrlen == addrlen && memcmp(curr->addr, addr,
195 addrlen) == 0) {
196 ret = curr;
197 kref_get(&(ret->ref));
199 goto out;
202 currlh = currlh->next;
205 out:
206 mutex_unlock(&(neighbor_operation_lock));
208 return ret;
212 * TODO:
214 * address flags
215 * credit exchange factor + unstable flag
216 * throughput bound conns: throughput,credits/msecs
217 * latency bound conns: latency (ms), credits/byte
219 #warning todo extend
220 __u32 generate_neigh_list(char *buf, __u32 buflen, __u32 limit, __u32 offset)
222 struct list_head *currlh;
224 int bufferfull = 0;
226 __u32 total = 0;
227 __u32 cnt = 0;
229 __u32 buf_offset = 8;
230 __u32 headoffset = 0;
232 int rc;
235 * The variable length headers rowcount and fieldlength need to be
236 * generated after the data. This is done by reserving the maximum space
237 * they could take. If they end up being smaller, the data is moved so
238 * that there is no gap.
241 BUG_ON(buf == 0);
242 BUG_ON(buflen < buf_offset);
244 /* num_fields */
245 rc = encode_len(buf + buf_offset, buflen - buf_offset, 2);
246 BUG_ON(rc <= 0);
247 buf_offset += rc;
249 /* addr field */
250 BUG_ON(buflen < buf_offset + 2);
251 put_u16(buf + buf_offset, LIST_NEIGH_FIELD_ADDR, 1);
252 buf_offset += 2;
254 rc = encode_len(buf + buf_offset, buflen - buf_offset, 0);
255 BUG_ON(rc <= 0);
256 buf_offset += rc;
258 /* latency field */
259 BUG_ON(buflen < buf_offset + 2);
260 put_u16(buf + buf_offset, LIST_NEIGH_FIELD_LATENCY, 1);
261 buf_offset += 2;
263 rc = encode_len(buf + buf_offset, buflen - buf_offset, 1);
264 BUG_ON(rc <= 0);
265 buf_offset += rc;
267 mutex_lock(&(neighbor_operation_lock));
269 currlh = nb_list.next;
271 while (currlh != &nb_list) {
272 struct neighbor *curr = container_of(currlh, struct neighbor,
273 nb_list);
274 __u8 state;
275 unsigned long iflags;
277 __u32 addroffset = buf_offset;
279 /* get_neigh_state not used here because it would deadlock */
280 spin_lock_irqsave( &(curr->state_lock), iflags );
281 state = curr->state;
282 spin_unlock_irqrestore( &(curr->state_lock), iflags );
284 if (state != NEIGHBOR_STATE_ACTIVE)
285 goto cont2;
287 if (total < offset)
288 goto cont;
290 if (unlikely(buflen < buf_offset + 4+ 4 + 4 + 4 + 2 +
291 curr->addrlen + 1))
292 bufferfull = 1;
294 if (bufferfull)
295 goto cont;
297 buf_offset += 4; /* reserve bufferspace for fieldlen */
298 /* numaddr */
299 rc = encode_len(buf + buf_offset, buflen - buf_offset, 1);
300 BUG_ON(rc <= 0);
301 buf_offset += rc;
303 /* addrtypelen */
304 rc = encode_len(buf + buf_offset, buflen - buf_offset, 2);
305 BUG_ON(rc <= 0);
306 buf_offset += rc;
308 /* addrlen */
309 rc = encode_len(buf + buf_offset, buflen - buf_offset,
310 curr->addrlen);
311 BUG_ON(rc <= 0);
312 buf_offset += rc;
314 buf[buf_offset] = 'i'; /* addrtype */
315 buf_offset += 1;
316 buf[buf_offset] = 'd';
317 buf_offset += 1;
318 BUG_ON(curr->addrlen > buflen - buf_offset);
319 memcpy(buf + buf_offset, curr->addr, curr->addrlen); /* addr */
320 buf_offset += curr->addrlen;
322 /* fieldlen */
323 rc = encode_len(buf + addroffset, 4, buf_offset - addroffset -
325 BUG_ON(rc <= 0);
326 BUG_ON(rc > 4);
327 if (likely(rc < 4))
328 memmove(buf+addroffset+rc, buf+addroffset + 4,
329 buf_offset - addroffset - 4);
330 buf_offset -= (4-rc);
332 buf[buf_offset] = enc_log_64_11(atomic_read(&(curr->latency)));
333 buf_offset += 1;
335 BUG_ON(buf_offset > buflen);
337 cnt++;
339 cont:
340 total++;
341 cont2:
342 currlh = currlh->next;
345 mutex_unlock(&(neighbor_operation_lock));
347 rc = encode_len(buf, 4, total);
348 BUG_ON(rc <= 0);
349 BUG_ON(rc > 4);
350 headoffset += rc;
352 rc = encode_len(buf + headoffset, 4, cnt);
353 BUG_ON(rc <= 0);
354 BUG_ON(rc > 4);
355 headoffset += rc;
357 if (likely(headoffset < 8))
358 memmove(buf+headoffset, buf+8, buf_offset);
360 return buf_offset + headoffset - 8;
363 void set_last_routdtrip(struct neighbor *nb, unsigned long time)
365 unsigned long iflags;
367 BUG_ON(nb == 0);
369 spin_lock_irqsave( &(nb->state_lock), iflags );
371 if(likely(nb->state == NEIGHBOR_STATE_ACTIVE) && time_after(time,
372 nb->state_time.last_roundtrip))
373 nb->state_time.last_roundtrip = time;
375 spin_unlock_irqrestore( &(nb->state_lock), iflags );
378 static void _refresh_initial_debitsrate(struct net_device *dev,
379 __u32 debitsrate)
381 __u32 neighbors = 0;
382 struct list_head *currlh;
384 currlh = nb_list.next;
386 while (currlh != &nb_list) {
387 struct neighbor *curr = container_of(currlh, struct neighbor,
388 nb_list);
390 if (curr->dev == dev)
391 neighbors++;
393 currlh = currlh->next;
396 currlh = nb_list.next;
398 while (currlh != &nb_list) {
399 struct neighbor *curr = container_of(currlh, struct neighbor,
400 nb_list);
402 if (curr->dev == dev)
403 set_creditrate_initial(curr,
404 debitsrate/neighbors);
406 currlh = currlh->next;
410 /* neighbor operation lock has to be held while calling this */
411 static void refresh_initial_debitsrate(void)
413 struct list_head *currlh1;
414 __u32 ifcnt = 0;
415 __u32 creditrate;
417 currlh1 = nb_list.next;
419 while (currlh1 != &nb_list) {
420 struct neighbor *curr1 = container_of(currlh1, struct neighbor,
421 nb_list);
423 struct list_head *currlh2;
424 currlh2 = nb_list.next;
425 while (currlh2 != currlh1) {
426 struct neighbor *curr2 = container_of(currlh2,
427 struct neighbor, nb_list);
428 if (curr1->dev == curr2->dev)
429 goto present1;
432 ifcnt++;
434 present1:
436 currlh1 = currlh1->next;
439 creditrate = creditrate_initial();
441 currlh1 = nb_list.next;
443 while (currlh1 != &nb_list) {
444 struct neighbor *curr1 = container_of(currlh1, struct neighbor,
445 nb_list);
447 struct list_head *currlh2;
448 currlh2 = nb_list.next;
449 while (currlh2 != currlh1) {
450 struct neighbor *curr2 = container_of(currlh2,
451 struct neighbor, nb_list);
452 if (curr1->dev == curr2->dev)
453 goto present2;
456 _refresh_initial_debitsrate(curr1->dev, creditrate/ifcnt);
458 present2:
460 currlh1 = currlh1->next;
464 static void reset_all_conns(struct neighbor *nb)
466 while (1) {
467 struct conn *sconn;
469 mutex_lock(&(nb->conn_list_lock));
471 if (list_empty(&(nb->snd_conn_list))) {
472 BUG_ON(nb->num_send_conns != 0);
473 mutex_unlock(&(nb->conn_list_lock));
474 break;
477 sconn = container_of(nb->snd_conn_list.next, struct conn,
478 target.out.nb_list);
479 BUG_ON(sconn->targettype != TARGET_OUT);
482 * reset_conn must not be called with conn_list_lock
483 * held
485 mutex_unlock(&(nb->conn_list_lock));
486 reset_conn(sconn);
490 static void stall_timer(struct work_struct *work)
492 struct neighbor *nb = container_of(to_delayed_work(work),
493 struct neighbor, stalltimeout_timer);
495 int stall_time_ms;
496 __u8 nbstate;
498 unsigned long iflags;
500 spin_lock_irqsave( &(nb->state_lock), iflags );
501 stall_time_ms = jiffies_to_msecs(jiffies -
502 nb->state_time.last_roundtrip);
503 nbstate = nb->state;
504 if (unlikely(nbstate != NEIGHBOR_STATE_STALLED))
505 nb->str_timer_pending = 0;
507 spin_unlock_irqrestore( &(nb->state_lock), iflags );
509 if (unlikely(nbstate != NEIGHBOR_STATE_STALLED)) {
510 kref_put(&(nb->ref), neighbor_free);
511 return;
514 if (stall_time_ms < NB_KILL_TIME_MS) {
515 INIT_DELAYED_WORK(&(nb->stalltimeout_timer), stall_timer);
516 schedule_delayed_work(&(nb->stalltimeout_timer),
517 msecs_to_jiffies(NB_KILL_TIME_MS -
518 stall_time_ms));
519 return;
522 printk(KERN_ERR "reset_all");
524 reset_all_conns(nb);
526 spin_lock_irqsave( &(nb->state_lock), iflags );
527 nb->state = NEIGHBOR_STATE_KILLED;
528 spin_unlock_irqrestore( &(nb->state_lock), iflags );
530 mutex_lock(&neighbor_operation_lock);
531 list_del(&(nb->nb_list));
532 refresh_initial_debitsrate();
533 mutex_unlock(&neighbor_operation_lock);
535 kref_put(&(nb->ref), neighbor_free); /* nb_list */
536 kref_put(&(nb->ref), neighbor_free); /* stall_timer */
539 int get_neigh_state(struct neighbor *nb)
541 int ret;
542 unsigned long iflags;
543 int starttimer = 0;
544 int stall_time_ms;
546 BUG_ON(nb == 0);
548 spin_lock_irqsave( &(nb->state_lock), iflags );
550 if (unlikely(likely(nb->state == NEIGHBOR_STATE_ACTIVE) && unlikely(
551 time_after_eq(jiffies, nb->state_time.last_roundtrip +
552 msecs_to_jiffies(NB_STALL_TIME_MS)) && (
553 nb->ping_intransit >= NB_STALL_MINPINGS ||
554 nb->ping_intransit >= PING_COOKIES_PER_NEIGH)))) {
555 nb->state = NEIGHBOR_STATE_STALLED;
556 starttimer = (nb->str_timer_pending == 0);
557 stall_time_ms = jiffies - nb->state_time.last_roundtrip;
558 nb->str_timer_pending = 1;
559 printk(KERN_ERR "switched to stalled");
560 BUG_ON(nb->ping_intransit > PING_COOKIES_PER_NEIGH);
563 ret = nb->state;
565 spin_unlock_irqrestore( &(nb->state_lock), iflags );
568 if (unlikely(starttimer)) {
569 kref_get(&(nb->ref));
570 INIT_DELAYED_WORK(&(nb->stalltimeout_timer),
571 stall_timer);
572 schedule_delayed_work(&(nb->stalltimeout_timer),
573 NB_KILL_TIME_MS - stall_time_ms);
576 return ret;
579 static struct ping_cookie *find_cookie(struct neighbor *nb, __u32 cookie)
581 int i;
583 for(i=0;i<PING_COOKIES_PER_NEIGH;i++) {
584 if (nb->cookies[i].cookie == cookie)
585 return &(nb->cookies[i]);
587 return 0;
590 void ping_resp(struct neighbor *nb, __u32 cookie, __u32 respdelay)
592 struct ping_cookie *c;
593 int i;
595 unsigned long cookie_sendtime;
596 __s64 newlatency;
598 unsigned long iflags;
600 mutex_lock(&(nb->pingcookie_lock));
602 c = find_cookie(nb, cookie);
604 if (unlikely(c == 0))
605 goto out;
607 cookie_sendtime = c->time;
610 newlatency = ((((__s64) ((__u32)atomic_read(&(nb->latency)))) * 15 +
611 jiffies_to_usecs(jiffies - c->time) - respdelay) / 16);
612 if (unlikely(newlatency < 0))
613 newlatency = 0;
614 if (unlikely(newlatency > (((__s64)256)*256*256*256 - 1)))
615 newlatency = ((__s64)256)*256*256*256 - 1;
617 atomic_set(&(nb->latency), (__u32) newlatency);
619 c->cookie = 0;
620 nb->ping_intransit--;
622 for(i=0;i<PING_COOKIES_PER_NEIGH;i++) {
623 if (nb->cookies[i].cookie != 0 &&
624 time_before(nb->cookies[i].time, c->time)) {
625 nb->cookies[i].pongs++;
626 if (nb->cookies[i].pongs >= PING_PONGLIMIT) {
627 nb->cookies[i].cookie = 0;
628 nb->cookies[i].pongs = 0;
629 nb->ping_intransit--;
634 spin_lock_irqsave( &(nb->state_lock), iflags );
636 if (unlikely(nb->state == NEIGHBOR_STATE_INITIAL ||
637 nb->state == NEIGHBOR_STATE_STALLED)) {
638 nb->ping_success++;
640 if (nb->state == NEIGHBOR_STATE_INITIAL) {
641 __u64 jiffies64 = get_jiffies_64();
642 if (nb->state_time.last_state_change == 0)
643 nb->state_time.last_state_change = jiffies64;
644 if (jiffies64 <= (nb->state_time.last_state_change +
645 msecs_to_jiffies(INITIAL_TIME_MS)))
646 goto out2;
649 if (nb->ping_success >= PING_SUCCESS_CNT) {
650 /*if (nb->state == NEIGHBOR_STATE_INITIAL)
651 printk(KERN_ERR "switched from initial to active");
652 else
653 printk(KERN_ERR "switched from stalled to active");
655 nb->state = NEIGHBOR_STATE_ACTIVE;
656 nb->ping_success = 0;
657 nb->state_time.last_roundtrip = jiffies;
659 } else {
660 nb->state_time.last_roundtrip = cookie_sendtime;
663 out2:
664 spin_unlock_irqrestore( &(nb->state_lock), iflags );
666 out:
667 mutex_unlock(&(nb->pingcookie_lock));
670 __u32 add_ping_req(struct neighbor *nb)
672 struct ping_cookie *c;
673 __u32 i;
675 __u32 cookie;
677 mutex_lock(&(nb->pingcookie_lock));
679 for (i=0;i<PING_COOKIES_PER_NEIGH;i++) {
680 if (nb->cookies[i].cookie == 0)
681 goto found;
684 get_random_bytes((char *) &i, sizeof(i));
685 i = (i % (PING_COOKIES_PER_NEIGH - PING_COOKIES_FIFO)) +
686 PING_COOKIES_FIFO;
688 found:
689 c = &(nb->cookies[i]);
690 c->time = jiffies;
691 c->pongs = 0;
692 nb->lastcookie++;
693 if (unlikely(nb->lastcookie == 0))
694 nb->lastcookie++;
695 c->cookie = nb->lastcookie;
697 nb->ping_intransit++;
699 cookie = c->cookie;
701 nb->last_ping_time = jiffies;
703 mutex_unlock(&(nb->pingcookie_lock));
705 return cookie;
708 void unadd_ping_req(struct neighbor *nb, __u32 cookie)
710 int i;
712 if (cookie == 0)
713 return;
715 mutex_lock(&(nb->pingcookie_lock));
717 for (i=0;i<PING_COOKIES_PER_NEIGH;i++) {
718 if (nb->cookies[i].cookie == cookie) {
719 nb->cookies[i].cookie = 0;
720 nb->ping_intransit--;
721 break;
725 mutex_unlock(&(nb->pingcookie_lock));
728 static int neighbor_idle(struct neighbor *nb)
730 int ret;
731 mutex_lock(&(nb->conn_list_lock));
732 ret = (list_empty(&(nb->rcv_conn_list)) &&
733 list_empty(&(nb->snd_conn_list)));
734 BUG_ON(list_empty(&(nb->snd_conn_list)) && nb->num_send_conns != 0);
735 mutex_unlock(&(nb->conn_list_lock));
736 return ret;
740 * Check additional to the checks and timings already done in kpacket_gen.c
741 * This is primarily to make sure that we do not invalidate other ping cookies
742 * which might still receive responses. It does this by requiring a certain
743 * mimimum delay between pings, depending on how many pings are already in
744 * transit.
746 int time_to_send_ping(struct neighbor *nb)
748 int rc = 1;
750 int state = get_neigh_state(nb);
751 int idle = (state != NEIGHBOR_STATE_ACTIVE ? 0 :
752 neighbor_idle(nb));
753 __u32 forcetime;
755 #warning todo send pings for some time after the neighbor gets idle (initial latency measurement + tos_privacy)
757 mutex_lock(&(nb->pingcookie_lock));
758 if (nb->ping_intransit >= PING_COOKIES_NOTHROTTLE) {
759 __u32 mindelay = (( ((__u32) atomic_read(&(nb->latency))) +
760 ((__u32) atomic_read(
761 &(nb->max_remote_cmsg_delay))) )/1000) <<
762 (nb->ping_intransit + 1 -
763 PING_COOKIES_NOTHROTTLE);
765 if (mindelay > PING_THROTTLE_LIMIT_MS)
766 mindelay = PING_THROTTLE_LIMIT_MS;
768 if (jiffies_to_msecs(jiffies - nb->last_ping_time) < mindelay)
769 rc = 0;
772 if (unlikely(state != NEIGHBOR_STATE_ACTIVE) ||
773 nb->ping_intransit != 0)
774 forcetime = PING_FORCETIME_MS;
775 else if (idle)
776 forcetime = PING_FORCETIME_ACTIVEIDLE_MS;
777 else
778 forcetime = PING_FORCETIME_ACTIVE_MS;
780 if (jiffies_to_msecs(jiffies - nb->last_ping_time) < (forcetime/2))
781 rc = 0;
782 else if (jiffies_to_msecs(jiffies - nb->last_ping_time) >= forcetime)
783 rc = 2;
785 mutex_unlock(&(nb->pingcookie_lock));
787 return rc;
790 static void add_neighbor(struct neighbor *nb)
792 struct list_head *currlh = nb_list.next;
794 BUG_ON((nb->addr == 0) != (nb->addrlen == 0));
796 while (currlh != &nb_list) {
797 struct neighbor *curr = container_of(currlh, struct neighbor,
798 nb_list);
800 if (curr->addrlen == nb->addrlen && memcmp(curr->addr, nb->addr,
801 curr->addrlen) == 0)
802 goto already_present;
804 currlh = currlh->next;
807 /* kref_get not needed here, because the caller leaves its ref to us */
808 printk(KERN_ERR "add_neigh");
810 list_add_tail(&(nb->nb_list), &nb_list);
811 refresh_initial_debitsrate();
812 schedule_controlmsg_timerfunc(nb);
813 INIT_DELAYED_WORK(&(nb->retrans_timer), retransmit_timerfunc);
814 INIT_DELAYED_WORK(&(nb->retrans_timer_conn), retransmit_conn_timerfunc);
816 if (0) {
817 already_present:
818 kmem_cache_free(nb_slab, nb);
822 static __u32 pull_u32(struct sk_buff *skb, int convbo)
824 char *ptr = cor_pull_skb(skb, 4);
826 __u32 ret = 0;
828 BUG_ON(0 == ptr);
830 ((char *)&ret)[0] = ptr[0];
831 ((char *)&ret)[1] = ptr[1];
832 ((char *)&ret)[2] = ptr[2];
833 ((char *)&ret)[3] = ptr[3];
835 if (convbo)
836 return be32_to_cpu(ret);
837 return ret;
840 static int apply_announce_addaddr(struct neighbor *nb, __u32 cmd, __u32 len,
841 char *cmddata)
843 __u16 addrtypelen;
844 char *addrtype;
845 __u16 addrlen;
846 char *addr;
848 BUG_ON((nb->addr == 0) != (nb->addrlen == 0));
850 if (nb->addr != 0)
851 return 0;
853 if (len < 4)
854 return 0;
856 addrtypelen = be16_to_cpu(*((__u16 *) cmddata));
857 cmddata += 2;
858 len -= 2;
860 if (len < 2)
861 return 0;
863 addrlen = be16_to_cpu(*((__u16 *) cmddata));
864 cmddata += 2;
865 len -= 2;
867 addrtype = cmddata;
868 cmddata += addrtypelen;
869 len -= addrtypelen;
871 addr = cmddata;
872 cmddata += addrlen;
873 len -= addrlen;
875 if (len < 0)
876 return 0;
878 if (get_addrtype(addrtypelen, addrtype) != ADDRTYPE_ID)
879 return 0;
881 nb->addr = kmalloc(addrlen, GFP_KERNEL);
882 if (unlikely(nb->addr == 0))
883 return 1;
885 memcpy(nb->addr, addr, addrlen);
886 nb->addrlen = addrlen;
888 return 0;
891 static void apply_announce_cmd(struct neighbor *nb, __u32 cmd, __u32 len,
892 char *cmddata)
894 if (cmd == NEIGHCMD_ADDADDR) {
895 apply_announce_addaddr(nb, cmd, len, cmddata);
896 } else {
897 /* ignore unknown cmds */
901 static void apply_announce_cmds(char *msg, __u32 len, struct net_device *dev,
902 char *source_hw)
904 struct neighbor *nb = alloc_neighbor(GFP_KERNEL);
906 if (unlikely(nb == 0))
907 return;
909 while (len >= 8) {
910 __u32 cmd;
911 __u32 cmdlen;
913 cmd = be32_to_cpu(*((__u32 *) msg));
914 msg += 4;
915 len -= 4;
916 cmdlen = be32_to_cpu(*((__u32 *) msg));
917 msg += 4;
918 len -= 4;
920 BUG_ON(cmdlen > len);
922 apply_announce_cmd(nb, cmd, cmdlen, msg);
924 msg += cmdlen;
925 len -= cmdlen;
928 BUG_ON(len != 0);
930 memcpy(nb->mac, source_hw, MAX_ADDR_LEN);
932 dev_hold(dev);
933 nb->dev = dev;
934 add_neighbor(nb);
937 static int check_announce_cmds(char *msg, __u32 len)
939 while (len >= 8) {
940 __u32 cmd;
941 __u32 cmdlen;
943 cmd = be32_to_cpu(*((__u32 *) msg));
944 msg += 4;
945 len -= 4;
946 cmdlen = be32_to_cpu(*((__u32 *) msg));
947 msg += 4;
948 len -= 4;
950 /* malformated packet */
951 if (unlikely(cmdlen > len))
952 return 1;
954 msg += cmdlen;
955 len -= cmdlen;
958 if (unlikely(len != 0))
959 return 1;
961 return 0;
964 static void parse_announce(char *msg, __u32 len, struct net_device *dev,
965 char *source_hw)
967 __u32 min_announce_version;
968 __u32 max_announce_version;
969 __u32 min_cor_version;
970 __u32 max_cor_version;
972 if (unlikely(len < 16))
973 return;
975 min_announce_version = be32_to_cpu(*((__u32 *) msg));
976 msg += 4;
977 len -= 4;
978 max_announce_version = be32_to_cpu(*((__u32 *) msg));
979 msg += 4;
980 len -= 4;
981 min_cor_version = be32_to_cpu(*((__u32 *) msg));
982 msg += 4;
983 len -= 4;
984 max_cor_version = be32_to_cpu(*((__u32 *) msg));
985 msg += 4;
986 len -= 4;
988 if (min_announce_version != 0)
989 return;
990 if (min_cor_version != 0)
991 return;
992 if (check_announce_cmds(msg, len)) {
993 return;
995 apply_announce_cmds(msg, len, dev, source_hw);
998 struct announce_in {
999 /* lh has to be first */
1000 struct list_head lh;
1001 struct sk_buff_head skbs; /* sorted by offset */
1002 struct net_device *dev;
1003 char source_hw[MAX_ADDR_LEN];
1004 __u32 announce_proto_version;
1005 __u32 packet_version;
1006 __u32 total_size;
1007 __u32 received_size;
1008 __u64 last_received_packet;
1011 LIST_HEAD(announce_list);
1013 struct kmem_cache *announce_in_slab;
1015 static void merge_announce(struct announce_in *ann)
1017 char *msg = kmalloc(ann->total_size, GFP_KERNEL);
1018 __u32 copy = 0;
1020 if (msg == 0) {
1021 /* try again when next packet arrives */
1022 return;
1025 while (copy != ann->total_size) {
1026 __u32 currcpy;
1027 __u32 offset = 0;
1028 struct sk_buff *skb;
1029 struct skb_procstate *ps;
1031 if (unlikely(skb_queue_empty(&(ann->skbs)))) {
1032 printk(KERN_ERR "net/cor/neighbor.c: sk_head ran "
1033 "empty while merging packets\n");
1034 goto free;
1037 skb = skb_dequeue(&(ann->skbs));
1038 ps = skb_pstate(skb);
1040 currcpy = skb->len;
1041 if (unlikely(ps->funcstate.announce.offset > copy)) {
1042 printk(KERN_ERR "net/cor/neighbor.c: invalid offset"
1043 "value found\n");
1044 goto free;
1047 if (unlikely(ps->funcstate.announce.offset < copy)) {
1048 offset = copy - ps->funcstate.announce.offset;
1049 currcpy -= offset;
1052 if (unlikely(currcpy + copy > ann->total_size))
1053 goto free;
1055 memcpy(msg + copy, skb->data + offset, currcpy);
1056 copy += currcpy;
1057 kfree_skb(skb);
1060 parse_announce(msg, ann->total_size, ann->dev, ann->source_hw);
1062 free:
1063 if (msg != 0)
1064 kfree(msg);
1066 dev_put(ann->dev);
1067 list_del(&(ann->lh));
1068 kmem_cache_free(announce_in_slab, ann);
1071 static int _rcv_announce(struct sk_buff *skb, struct announce_in *ann)
1073 struct skb_procstate *ps = skb_pstate(skb);
1075 __u32 offset = ps->funcstate.announce.offset;
1076 __u32 len = skb->len;
1078 __u32 curroffset = 0;
1079 __u32 prevoffset = 0;
1080 __u32 prevlen = 0;
1082 struct sk_buff *curr = ann->skbs.next;
1084 if (unlikely(len + offset > ann->total_size)) {
1085 /* invalid header */
1086 kfree_skb(skb);
1087 return 0;
1091 * Try to find the right place to insert in the sorted list. This
1092 * means to process the list until we find a skb which has a greater
1093 * offset, so we can insert before it to keep the sort order. However,
1094 * this is complicated by the fact that the new skb must not be inserted
1095 * between 2 skbs if there is no data missing in between. So the loop
1096 * runs has to keep running until there is either a gap to insert or
1097 * we see that this data has already been received.
1099 while ((void *) curr != (void *) &(ann->skbs)) {
1100 struct skb_procstate *currps = skb_pstate(skb);
1102 curroffset = currps->funcstate.announce.offset;
1104 if (curroffset > offset && (prevoffset + prevlen) < curroffset)
1105 break;
1107 prevoffset = curroffset;
1108 prevlen = curr->len;
1109 curr = curr->next;
1111 if ((offset+len) <= (prevoffset+prevlen)) {
1112 /* we already have this data */
1113 kfree_skb(skb);
1114 return 0;
1119 * Calculate how much data was really received, by substracting
1120 * the bytes we already have.
1122 if (unlikely(prevoffset + prevlen > offset)) {
1123 len -= (prevoffset + prevlen) - offset;
1124 offset = prevoffset + prevlen;
1127 if (unlikely((void *) curr != (void *) &(ann->skbs) &&
1128 (offset + len) > curroffset))
1129 len = curroffset - offset;
1131 ann->received_size += len;
1132 BUG_ON(ann->received_size > ann->total_size);
1133 __skb_queue_before(&(ann->skbs), curr, skb);
1134 ann->last_received_packet = get_jiffies_64();
1136 if (ann->received_size == ann->total_size)
1137 merge_announce(ann);
1138 else if (unlikely(ann->skbs.qlen >= 16))
1139 return 1;
1141 return 0;
1144 void rcv_announce(struct sk_buff *skb)
1146 struct skb_procstate *ps = skb_pstate(skb);
1147 struct announce_in *curr = 0;
1148 struct announce_in *leastactive = 0;
1149 __u32 list_size = 0;
1151 __u32 announce_proto_version = pull_u32(skb, 1);
1152 __u32 packet_version = pull_u32(skb, 1);
1153 __u32 total_size = pull_u32(skb, 1);
1155 char source_hw[MAX_ADDR_LEN];
1156 memset(source_hw, 0, MAX_ADDR_LEN);
1157 if (skb->dev->header_ops != 0 &&
1158 skb->dev->header_ops->parse != 0)
1159 skb->dev->header_ops->parse(skb, source_hw);
1161 ps->funcstate.announce.offset = pull_u32(skb, 1);
1163 if (total_size > 8192)
1164 goto discard;
1166 mutex_lock(&(neighbor_operation_lock));
1168 if (announce_proto_version != 0)
1169 goto discard;
1171 curr = (struct announce_in *) announce_list.next;
1173 while (((struct list_head *) curr) != &(announce_list)) {
1174 list_size++;
1175 if (curr->dev == skb->dev && memcmp(curr->source_hw, source_hw,
1176 MAX_ADDR_LEN) == 0 &&
1177 curr->announce_proto_version ==
1178 announce_proto_version &&
1179 curr->packet_version == packet_version &&
1180 curr->total_size == total_size)
1181 goto found;
1183 if (leastactive == 0 || curr->last_received_packet <
1184 leastactive->last_received_packet)
1185 leastactive = curr;
1187 curr = (struct announce_in *) curr->lh.next;
1190 if (list_size >= 128) {
1191 BUG_ON(leastactive == 0);
1192 curr = leastactive;
1194 curr->last_received_packet = get_jiffies_64();
1196 while (!skb_queue_empty(&(curr->skbs))) {
1197 struct sk_buff *skb2 = skb_dequeue(&(curr->skbs));
1198 kfree_skb(skb2);
1201 dev_put(curr->dev);
1202 } else {
1203 curr = kmem_cache_alloc(announce_in_slab,
1204 GFP_KERNEL);
1205 if (curr == 0)
1206 goto discard;
1208 skb_queue_head_init(&(curr->skbs));
1209 list_add_tail((struct list_head *) curr, &announce_list);
1212 curr->packet_version = packet_version;
1213 curr->total_size = total_size;
1214 curr->received_size = 0;
1215 curr->announce_proto_version = announce_proto_version;
1216 curr->dev = skb->dev;
1217 dev_hold(curr->dev);
1218 memcpy(curr->source_hw, source_hw, MAX_ADDR_LEN);
1220 found:
1221 if (_rcv_announce(skb, curr)) {
1222 list_del((struct list_head *) curr);
1223 dev_put(curr->dev);
1224 kmem_cache_free(announce_in_slab, curr);
1227 if (0) {
1228 discard:
1229 kfree_skb(skb);
1232 mutex_unlock(&(neighbor_operation_lock));
1235 struct announce{
1236 struct kref ref;
1238 __u32 packet_version;
1239 char *announce_msg;
1240 __u32 announce_msg_len;
1243 struct announce *last_announce;
1245 static int send_announce_chunk(struct announce_data *ann)
1247 struct sk_buff *skb;
1248 __u32 packet_size = 256;
1249 __u32 remainingdata = ann->ann->announce_msg_len -
1250 ann->curr_announce_msg_offset;
1251 __u32 headroom = LL_ALLOCATED_SPACE(ann->dev);
1252 __u32 overhead = 17 + headroom;
1253 char *header;
1254 char *ptr;
1255 int rc = 0;
1257 if (remainingdata < packet_size)
1258 packet_size = remainingdata;
1260 skb = alloc_skb(packet_size + overhead, GFP_KERNEL);
1261 if (unlikely(skb == 0))
1262 return 0;
1264 skb->protocol = htons(ETH_P_COR);
1265 skb->dev = ann->dev;
1266 skb_reserve(skb, headroom);
1268 if(unlikely(dev_hard_header(skb, ann->dev, ETH_P_COR,
1269 ann->dev->broadcast, ann->dev->dev_addr, skb->len) < 0))
1270 goto out_err;
1272 skb_reset_network_header(skb);
1274 header = skb_put(skb, 17);
1275 if (unlikely(header == 0))
1276 goto out_err;
1278 header[0] = PACKET_TYPE_ANNOUNCE;
1280 put_u32(header + 1, 0, 1); /* announce proto version */
1281 put_u32(header + 5, ann->ann->packet_version, 1); /* packet version */
1282 put_u32(header + 9, ann->ann->announce_msg_len, 1); /* total size */
1283 put_u32(header + 13, ann->curr_announce_msg_offset, 1); /* offset */
1285 ptr = skb_put(skb, packet_size);
1286 if (unlikely(ptr == 0))
1287 goto out_err;
1289 memcpy(ptr, ann->ann->announce_msg + ann->curr_announce_msg_offset,
1290 packet_size);
1292 rc = dev_queue_xmit(skb);
1294 if (rc == 0) {
1295 ann->curr_announce_msg_offset += packet_size;
1297 if (ann->curr_announce_msg_offset == ann->ann->announce_msg_len)
1298 ann->curr_announce_msg_offset = 0;
1301 if (0) {
1302 out_err:
1303 if (skb != 0)
1304 kfree_skb(skb);
1307 return rc;
1310 int send_announce_qos(struct announce_data *ann)
1312 int rc;
1313 mutex_lock(&(neighbor_operation_lock));
1314 rc = send_announce_chunk(ann);
1315 mutex_unlock(&(neighbor_operation_lock));
1316 return rc;
1319 static void announce_free(struct kref *ref)
1321 struct announce *ann = container_of(ref, struct announce, ref);
1322 kfree(&(ann->announce_msg));
1323 kfree(ann);
1326 void announce_data_free(struct kref *ref)
1328 struct announce_data *ann = container_of(ref, struct announce_data,
1329 ref);
1330 if (ann->ann != 0)
1331 kref_put(&(ann->ann->ref), announce_free);
1332 kfree(ann);
1335 static void send_announce(struct work_struct *work)
1337 struct announce_data *ann = container_of(to_delayed_work(work),
1338 struct announce_data, announce_work);
1339 int reschedule = 0;
1340 int rc = 0;
1342 mutex_lock(&(neighbor_operation_lock));
1344 if (unlikely(ann->dev == 0))
1345 goto out;
1346 reschedule = 1;
1348 if (unlikely(ann->ann == 0 && last_announce == 0))
1349 goto out;
1350 if (ann->curr_announce_msg_offset == 0 &&
1351 unlikely(ann->ann != last_announce)) {
1352 if (ann->ann != 0)
1353 kref_put(&(ann->ann->ref), announce_free);
1354 ann->ann = last_announce;
1355 kref_get(&(ann->ann->ref));
1358 rc = send_announce_chunk(ann);
1360 out:
1361 mutex_unlock(&(neighbor_operation_lock));
1363 if (rc != 0)
1364 qos_enqueue(ann->dev, &(ann->rb), QOS_CALLER_ANNOUNCE);
1366 if (unlikely(reschedule == 0)) {
1367 kref_put(&(ann->ref), announce_data_free);
1368 } else {
1369 __u64 jiffies = get_jiffies_64();
1370 int delay;
1372 ann->scheduled_announce_timer += msecs_to_jiffies(
1373 ANNOUNCE_SEND_PACKETINTELVAL_MS);
1375 delay = ann->scheduled_announce_timer - jiffies;
1376 if (delay < 0)
1377 delay = 1;
1379 INIT_DELAYED_WORK(&(ann->announce_work), send_announce);
1380 schedule_delayed_work(&(ann->announce_work), delay);
1384 static struct announce_data *get_announce_by_netdev(struct net_device *dev)
1386 struct list_head *lh = announce_out_list.next;
1388 while (lh != &announce_out_list) {
1389 struct announce_data *curr = (struct announce_data *)(
1390 ((char *) lh) -
1391 offsetof(struct announce_data, lh));
1393 if (curr->dev == dev)
1394 return curr;
1397 return 0;
1400 static void announce_send_adddev(struct net_device *dev)
1402 struct announce_data *ann;
1404 ann = kmalloc(sizeof(struct announce_data), GFP_KERNEL);
1406 if (unlikely(ann == 0)) {
1407 printk(KERN_ERR "cor cannot allocate memory for sending "
1408 "announces");
1409 return;
1412 memset(ann, 0, sizeof(struct announce_data));
1414 kref_init(&(ann->ref));
1416 dev_hold(dev);
1417 ann->dev = dev;
1419 mutex_lock(&(neighbor_operation_lock));
1420 list_add_tail(&(ann->lh), &announce_out_list);
1421 mutex_unlock(&(neighbor_operation_lock));
1423 ann->scheduled_announce_timer = get_jiffies_64();
1424 INIT_DELAYED_WORK(&(ann->announce_work), send_announce);
1425 schedule_delayed_work(&(ann->announce_work), 1);
1428 static void announce_send_rmdev(struct net_device *dev)
1430 struct announce_data *ann;
1432 mutex_lock(&(neighbor_operation_lock));
1434 ann = get_announce_by_netdev(dev);
1436 if (ann == 0)
1437 goto out;
1439 dev_put(ann->dev);
1440 ann->dev = 0;
1442 out:
1443 mutex_unlock(&(neighbor_operation_lock));
1446 int netdev_notify_func(struct notifier_block *not, unsigned long event,
1447 void *ptr)
1449 struct net_device *dev = (struct net_device *) ptr;
1450 int rc;
1452 switch(event){
1453 case NETDEV_UP:
1454 rc = create_queue(dev);
1455 if (rc == 1)
1456 return 1;
1457 announce_send_adddev(dev);
1458 break;
1459 case NETDEV_DOWN:
1460 destroy_queue(dev);
1461 announce_send_rmdev(dev);
1462 break;
1463 case NETDEV_REBOOT:
1464 case NETDEV_CHANGE:
1465 case NETDEV_REGISTER:
1466 case NETDEV_UNREGISTER:
1467 case NETDEV_CHANGEMTU:
1468 case NETDEV_CHANGEADDR:
1469 case NETDEV_GOING_DOWN:
1470 case NETDEV_CHANGENAME:
1471 case NETDEV_FEAT_CHANGE:
1472 case NETDEV_BONDING_FAILOVER:
1473 break;
1474 default:
1475 return 1;
1478 return 0;
1481 static int set_announce(char *msg, __u32 len)
1483 struct announce *ann = kmalloc(sizeof(struct announce), GFP_KERNEL);
1485 if (unlikely(ann == 0)) {
1486 kfree(msg);
1487 return 1;
1490 memset(ann, 0, sizeof(struct announce));
1492 ann->announce_msg = msg;
1493 ann->announce_msg_len = len;
1495 kref_init(&(ann->ref));
1497 mutex_lock(&(neighbor_operation_lock));
1499 if (last_announce != 0) {
1500 ann->packet_version = last_announce->packet_version + 1;
1501 kref_put(&(last_announce->ref), announce_free);
1504 last_announce = ann;
1506 mutex_unlock(&(neighbor_operation_lock));
1508 return 0;
1511 static int generate_announce(void)
1513 __u32 addrtypelen = strlen(addrtype);
1515 __u32 hdr_len = 16;
1516 __u32 cmd_hdr_len = 8;
1517 __u32 cmd_len = 2 + 2 + addrtypelen + addrlen;
1519 __u32 len = hdr_len + cmd_hdr_len + cmd_len;
1520 __u32 offset = 0;
1522 char *msg = kmalloc(len, GFP_KERNEL);
1523 if (unlikely(msg == 0))
1524 return 1;
1526 put_u32(msg + offset, 0, 1); /* min_announce_proto_version */
1527 offset += 4;
1528 put_u32(msg + offset, 0, 1); /* max_announce_proto_version */
1529 offset += 4;
1530 put_u32(msg + offset, 0, 1); /* min_cor_proto_version */
1531 offset += 4;
1532 put_u32(msg + offset, 0, 1); /* max_cor_proto_version */
1533 offset += 4;
1536 put_u32(msg + offset, NEIGHCMD_ADDADDR, 1); /* command */
1537 offset += 4;
1538 put_u32(msg + offset, cmd_len, 1); /* command length */
1539 offset += 4;
1541 /* addrtypelen, addrlen */
1542 put_u16(msg + offset, addrtypelen, 1);
1543 offset += 2;
1544 put_u16(msg + offset, addrlen, 1);
1545 offset += 2;
1547 /* addrtype, addr */
1548 memcpy(msg + offset, addrtype, addrtypelen);
1549 offset += addrtypelen;
1550 memcpy(msg + offset, addr, addrlen);
1551 offset += addrlen;
1553 BUG_ON(offset != len);
1555 return set_announce(msg, len);
1558 int __init cor_neighbor_init(void)
1560 addrlen = 16;
1562 addr = kmalloc(addrlen, GFP_KERNEL);
1563 if (unlikely(addr == 0))
1564 goto error_free2;
1566 get_random_bytes(addr, addrlen);
1568 nb_slab = kmem_cache_create("cor_neighbor", sizeof(struct neighbor), 8,
1569 0, 0);
1570 announce_in_slab = kmem_cache_create("cor_announce_in",
1571 sizeof(struct announce_in), 8, 0, 0);
1573 if (unlikely(generate_announce()))
1574 goto error_free1;
1576 memset(&netdev_notify, 0, sizeof(netdev_notify));
1577 netdev_notify.notifier_call = netdev_notify_func;
1578 register_netdevice_notifier(&netdev_notify);
1580 return 0;
1582 error_free1:
1583 kfree(addr);
1585 error_free2:
1586 return -ENOMEM;
1589 MODULE_LICENSE("GPL");