2 * Connection oriented routing
3 * Copyright (C) 2007-2010 Michael Blizek
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24 * Splited packet data format:
25 * announce proto version [4]
26 * is 0, may be increased if format changes
28 * starts with 0, increments every time the data field changes
30 * total data size of all merged packets
32 * used to determine the order when merging the split packet
35 * commulative checksum [8] (not yet)
36 * chunk 1 contains the checksum of the data in chunk 1
37 * chunk 2 contains the checksum of the data in chunk 1+2
40 * Data format of the announce packet "data" field:
41 * min_announce_proto_version [4]
42 * max_announce_proto_version [4]
43 * min_cor_proto_version [4]
44 * max_cor_proto_version [4]
45 * versions which are understood
49 * commanddata [commandlength]
54 #define NEIGHCMD_ADDADDR 1
59 * addrtype [addrtypelen]
65 DEFINE_MUTEX(neighbor_operation_lock
);
67 char *addrtype
= "id";
73 struct kmem_cache
*nb_slab
;
75 LIST_HEAD(announce_out_list
);
77 struct notifier_block netdev_notify
;
80 #define ADDRTYPE_UNKNOWN 0
83 static int get_addrtype(__u32 addrtypelen
, char *addrtype
)
85 if (addrtypelen
== 2 &&
86 (addrtype
[0] == 'i' || addrtype
[0] == 'I') &&
87 (addrtype
[1] == 'd' || addrtype
[1] == 'D'))
90 return ADDRTYPE_UNKNOWN
;
93 void neighbor_free(struct kref
*ref
)
95 struct neighbor
*nb
= container_of(ref
, struct neighbor
, ref
);
96 printk(KERN_ERR
"neighbor free");
97 BUG_ON(nb
->nb_list
.next
!= LIST_POISON1
);
98 BUG_ON(nb
->nb_list
.prev
!= LIST_POISON2
);
105 kmem_cache_free(nb_slab
, nb
);
108 static struct neighbor
*alloc_neighbor(gfp_t allocflags
)
110 struct neighbor
*nb
= kmem_cache_alloc(nb_slab
, allocflags
);
113 if (unlikely(nb
== 0))
116 memset(nb
, 0, sizeof(struct neighbor
));
118 kref_init(&(nb
->ref
));
119 mutex_init(&(nb
->cmsg_lock
));
120 INIT_LIST_HEAD(&(nb
->control_msgs_out
));
121 INIT_LIST_HEAD(&(nb
->ucontrol_msgs_out
));
122 nb
->last_ping_time
= jiffies
;
123 atomic_set(&(nb
->ooo_packets
), 0);
124 spin_lock_init(&(nb
->credits_lock
));
125 nb
->jiffies_credit_update
= nb
->last_ping_time
;
126 nb
->jiffies_credit_decay
= nb
->last_ping_time
;
127 get_random_bytes((char *) &seqno
, sizeof(seqno
));
128 mutex_init(&(nb
->pingcookie_lock
));
129 atomic_set(&(nb
->latency
), 1000000);
130 atomic_set(&(nb
->max_remote_cmsg_delay
), 1000000);
131 spin_lock_init(&(nb
->state_lock
));
132 atomic_set(&(nb
->kpacket_seqno
), seqno
);
133 mutex_init(&(nb
->conn_list_lock
));
134 INIT_LIST_HEAD(&(nb
->rcv_conn_list
));
135 INIT_LIST_HEAD(&(nb
->snd_conn_list
));
136 spin_lock_init(&(nb
->retrans_lock
));
137 INIT_LIST_HEAD(&(nb
->retrans_list
));
138 INIT_LIST_HEAD(&(nb
->retrans_list_conn
));
143 struct neighbor
*get_neigh_by_mac(struct sk_buff
*skb
)
145 struct list_head
*currlh
;
146 struct neighbor
*ret
= 0;
149 char source_hw
[MAX_ADDR_LEN
];
150 memset(source_hw
, 0, MAX_ADDR_LEN
);
151 if (skb
->dev
->header_ops
!= 0 &&
152 skb
->dev
->header_ops
->parse
!= 0)
153 skb
->dev
->header_ops
->parse(skb
, source_hw
);
155 mutex_lock(&(neighbor_operation_lock
));
157 currlh
= nb_list
.next
;
159 while (currlh
!= &nb_list
) {
160 struct neighbor
*curr
= container_of(currlh
, struct neighbor
,
163 if (memcmp(curr
->mac
, source_hw
, MAX_ADDR_LEN
) == 0) {
165 kref_get(&(ret
->ref
));
169 currlh
= currlh
->next
;
172 mutex_unlock(&(neighbor_operation_lock
));
177 struct neighbor
*find_neigh(__u16 addrtypelen
, __u8
*addrtype
,
178 __u16 addrlen
, __u8
*addr
)
180 struct list_head
*currlh
;
181 struct neighbor
*ret
= 0;
183 if (get_addrtype(addrtypelen
, addrtype
) != ADDRTYPE_ID
)
186 mutex_lock(&(neighbor_operation_lock
));
188 currlh
= nb_list
.next
;
190 while (currlh
!= &nb_list
) {
191 struct neighbor
*curr
= container_of(currlh
, struct neighbor
,
194 if (curr
->addrlen
== addrlen
&& memcmp(curr
->addr
, addr
,
197 kref_get(&(ret
->ref
));
202 currlh
= currlh
->next
;
206 mutex_unlock(&(neighbor_operation_lock
));
215 * credit exchange factor + unstable flag
216 * throughput bound conns: throughput,credits/msecs
217 * latency bound conns: latency (ms), credits/byte
220 __u32
generate_neigh_list(char *buf
, __u32 buflen
, __u32 limit
, __u32 offset
)
222 struct list_head
*currlh
;
229 __u32 buf_offset
= 8;
230 __u32 headoffset
= 0;
235 * The variable length headers rowcount and fieldlength need to be
236 * generated after the data. This is done by reserving the maximum space
237 * they could take. If they end up being smaller, the data is moved so
238 * that there is no gap.
242 BUG_ON(buflen
< buf_offset
);
245 rc
= encode_len(buf
+ buf_offset
, buflen
- buf_offset
, 2);
250 BUG_ON(buflen
< buf_offset
+ 2);
251 put_u16(buf
+ buf_offset
, LIST_NEIGH_FIELD_ADDR
, 1);
254 rc
= encode_len(buf
+ buf_offset
, buflen
- buf_offset
, 0);
259 BUG_ON(buflen
< buf_offset
+ 2);
260 put_u16(buf
+ buf_offset
, LIST_NEIGH_FIELD_LATENCY
, 1);
263 rc
= encode_len(buf
+ buf_offset
, buflen
- buf_offset
, 1);
267 mutex_lock(&(neighbor_operation_lock
));
269 currlh
= nb_list
.next
;
271 while (currlh
!= &nb_list
) {
272 struct neighbor
*curr
= container_of(currlh
, struct neighbor
,
275 unsigned long iflags
;
277 __u32 addroffset
= buf_offset
;
279 /* get_neigh_state not used here because it would deadlock */
280 spin_lock_irqsave( &(curr
->state_lock
), iflags
);
282 spin_unlock_irqrestore( &(curr
->state_lock
), iflags
);
284 if (state
!= NEIGHBOR_STATE_ACTIVE
)
290 if (unlikely(buflen
< buf_offset
+ 4+ 4 + 4 + 4 + 2 +
297 buf_offset
+= 4; /* reserve bufferspace for fieldlen */
299 rc
= encode_len(buf
+ buf_offset
, buflen
- buf_offset
, 1);
304 rc
= encode_len(buf
+ buf_offset
, buflen
- buf_offset
, 2);
309 rc
= encode_len(buf
+ buf_offset
, buflen
- buf_offset
,
314 buf
[buf_offset
] = 'i'; /* addrtype */
316 buf
[buf_offset
] = 'd';
318 BUG_ON(curr
->addrlen
> buflen
- buf_offset
);
319 memcpy(buf
+ buf_offset
, curr
->addr
, curr
->addrlen
); /* addr */
320 buf_offset
+= curr
->addrlen
;
323 rc
= encode_len(buf
+ addroffset
, 4, buf_offset
- addroffset
-
328 memmove(buf
+addroffset
+rc
, buf
+addroffset
+ 4,
329 buf_offset
- addroffset
- 4);
330 buf_offset
-= (4-rc
);
332 buf
[buf_offset
] = enc_log_64_11(atomic_read(&(curr
->latency
)));
335 BUG_ON(buf_offset
> buflen
);
342 currlh
= currlh
->next
;
345 mutex_unlock(&(neighbor_operation_lock
));
347 rc
= encode_len(buf
, 4, total
);
352 rc
= encode_len(buf
+ headoffset
, 4, cnt
);
357 if (likely(headoffset
< 8))
358 memmove(buf
+headoffset
, buf
+8, buf_offset
);
360 return buf_offset
+ headoffset
- 8;
363 void set_last_routdtrip(struct neighbor
*nb
, unsigned long time
)
365 unsigned long iflags
;
369 spin_lock_irqsave( &(nb
->state_lock
), iflags
);
371 if(likely(nb
->state
== NEIGHBOR_STATE_ACTIVE
) && time_after(time
,
372 nb
->state_time
.last_roundtrip
))
373 nb
->state_time
.last_roundtrip
= time
;
375 spin_unlock_irqrestore( &(nb
->state_lock
), iflags
);
378 static void _refresh_initial_debitsrate(struct net_device
*dev
,
382 struct list_head
*currlh
;
384 currlh
= nb_list
.next
;
386 while (currlh
!= &nb_list
) {
387 struct neighbor
*curr
= container_of(currlh
, struct neighbor
,
390 if (curr
->dev
== dev
)
393 currlh
= currlh
->next
;
396 currlh
= nb_list
.next
;
398 while (currlh
!= &nb_list
) {
399 struct neighbor
*curr
= container_of(currlh
, struct neighbor
,
402 if (curr
->dev
== dev
)
403 set_creditrate_initial(curr
,
404 debitsrate
/neighbors
);
406 currlh
= currlh
->next
;
410 /* neighbor operation lock has to be held while calling this */
411 static void refresh_initial_debitsrate(void)
413 struct list_head
*currlh1
;
417 currlh1
= nb_list
.next
;
419 while (currlh1
!= &nb_list
) {
420 struct neighbor
*curr1
= container_of(currlh1
, struct neighbor
,
423 struct list_head
*currlh2
;
424 currlh2
= nb_list
.next
;
425 while (currlh2
!= currlh1
) {
426 struct neighbor
*curr2
= container_of(currlh2
,
427 struct neighbor
, nb_list
);
428 if (curr1
->dev
== curr2
->dev
)
436 currlh1
= currlh1
->next
;
439 creditrate
= creditrate_initial();
441 currlh1
= nb_list
.next
;
443 while (currlh1
!= &nb_list
) {
444 struct neighbor
*curr1
= container_of(currlh1
, struct neighbor
,
447 struct list_head
*currlh2
;
448 currlh2
= nb_list
.next
;
449 while (currlh2
!= currlh1
) {
450 struct neighbor
*curr2
= container_of(currlh2
,
451 struct neighbor
, nb_list
);
452 if (curr1
->dev
== curr2
->dev
)
456 _refresh_initial_debitsrate(curr1
->dev
, creditrate
/ifcnt
);
460 currlh1
= currlh1
->next
;
464 static void reset_all_conns(struct neighbor
*nb
)
469 mutex_lock(&(nb
->conn_list_lock
));
471 if (list_empty(&(nb
->snd_conn_list
))) {
472 BUG_ON(nb
->num_send_conns
!= 0);
473 mutex_unlock(&(nb
->conn_list_lock
));
477 sconn
= container_of(nb
->snd_conn_list
.next
, struct conn
,
479 BUG_ON(sconn
->targettype
!= TARGET_OUT
);
482 * reset_conn must not be called with conn_list_lock
485 mutex_unlock(&(nb
->conn_list_lock
));
490 static void stall_timer(struct work_struct
*work
)
492 struct neighbor
*nb
= container_of(to_delayed_work(work
),
493 struct neighbor
, stalltimeout_timer
);
498 unsigned long iflags
;
500 spin_lock_irqsave( &(nb
->state_lock
), iflags
);
501 stall_time_ms
= jiffies_to_msecs(jiffies
-
502 nb
->state_time
.last_roundtrip
);
504 if (unlikely(nbstate
!= NEIGHBOR_STATE_STALLED
))
505 nb
->str_timer_pending
= 0;
507 spin_unlock_irqrestore( &(nb
->state_lock
), iflags
);
509 if (unlikely(nbstate
!= NEIGHBOR_STATE_STALLED
)) {
510 kref_put(&(nb
->ref
), neighbor_free
);
514 if (stall_time_ms
< NB_KILL_TIME_MS
) {
515 INIT_DELAYED_WORK(&(nb
->stalltimeout_timer
), stall_timer
);
516 schedule_delayed_work(&(nb
->stalltimeout_timer
),
517 msecs_to_jiffies(NB_KILL_TIME_MS
-
522 printk(KERN_ERR
"reset_all");
526 spin_lock_irqsave( &(nb
->state_lock
), iflags
);
527 nb
->state
= NEIGHBOR_STATE_KILLED
;
528 spin_unlock_irqrestore( &(nb
->state_lock
), iflags
);
530 mutex_lock(&neighbor_operation_lock
);
531 list_del(&(nb
->nb_list
));
532 refresh_initial_debitsrate();
533 mutex_unlock(&neighbor_operation_lock
);
535 kref_put(&(nb
->ref
), neighbor_free
); /* nb_list */
536 kref_put(&(nb
->ref
), neighbor_free
); /* stall_timer */
539 int get_neigh_state(struct neighbor
*nb
)
542 unsigned long iflags
;
548 spin_lock_irqsave( &(nb
->state_lock
), iflags
);
550 if (unlikely(likely(nb
->state
== NEIGHBOR_STATE_ACTIVE
) && unlikely(
551 time_after_eq(jiffies
, nb
->state_time
.last_roundtrip
+
552 msecs_to_jiffies(NB_STALL_TIME_MS
)) && (
553 nb
->ping_intransit
>= NB_STALL_MINPINGS
||
554 nb
->ping_intransit
>= PING_COOKIES_PER_NEIGH
)))) {
555 nb
->state
= NEIGHBOR_STATE_STALLED
;
556 starttimer
= (nb
->str_timer_pending
== 0);
557 stall_time_ms
= jiffies
- nb
->state_time
.last_roundtrip
;
558 nb
->str_timer_pending
= 1;
559 printk(KERN_ERR
"switched to stalled");
560 BUG_ON(nb
->ping_intransit
> PING_COOKIES_PER_NEIGH
);
565 spin_unlock_irqrestore( &(nb
->state_lock
), iflags
);
568 if (unlikely(starttimer
)) {
569 kref_get(&(nb
->ref
));
570 INIT_DELAYED_WORK(&(nb
->stalltimeout_timer
),
572 schedule_delayed_work(&(nb
->stalltimeout_timer
),
573 NB_KILL_TIME_MS
- stall_time_ms
);
579 static struct ping_cookie
*find_cookie(struct neighbor
*nb
, __u32 cookie
)
583 for(i
=0;i
<PING_COOKIES_PER_NEIGH
;i
++) {
584 if (nb
->cookies
[i
].cookie
== cookie
)
585 return &(nb
->cookies
[i
]);
590 void ping_resp(struct neighbor
*nb
, __u32 cookie
, __u32 respdelay
)
592 struct ping_cookie
*c
;
595 unsigned long cookie_sendtime
;
598 unsigned long iflags
;
600 mutex_lock(&(nb
->pingcookie_lock
));
602 c
= find_cookie(nb
, cookie
);
604 if (unlikely(c
== 0))
607 cookie_sendtime
= c
->time
;
610 newlatency
= ((((__s64
) ((__u32
)atomic_read(&(nb
->latency
)))) * 15 +
611 jiffies_to_usecs(jiffies
- c
->time
) - respdelay
) / 16);
612 if (unlikely(newlatency
< 0))
614 if (unlikely(newlatency
> (((__s64
)256)*256*256*256 - 1)))
615 newlatency
= ((__s64
)256)*256*256*256 - 1;
617 atomic_set(&(nb
->latency
), (__u32
) newlatency
);
620 nb
->ping_intransit
--;
622 for(i
=0;i
<PING_COOKIES_PER_NEIGH
;i
++) {
623 if (nb
->cookies
[i
].cookie
!= 0 &&
624 time_before(nb
->cookies
[i
].time
, c
->time
)) {
625 nb
->cookies
[i
].pongs
++;
626 if (nb
->cookies
[i
].pongs
>= PING_PONGLIMIT
) {
627 nb
->cookies
[i
].cookie
= 0;
628 nb
->cookies
[i
].pongs
= 0;
629 nb
->ping_intransit
--;
634 spin_lock_irqsave( &(nb
->state_lock
), iflags
);
636 if (unlikely(nb
->state
== NEIGHBOR_STATE_INITIAL
||
637 nb
->state
== NEIGHBOR_STATE_STALLED
)) {
640 if (nb
->state
== NEIGHBOR_STATE_INITIAL
) {
641 __u64 jiffies64
= get_jiffies_64();
642 if (nb
->state_time
.last_state_change
== 0)
643 nb
->state_time
.last_state_change
= jiffies64
;
644 if (jiffies64
<= (nb
->state_time
.last_state_change
+
645 msecs_to_jiffies(INITIAL_TIME_MS
)))
649 if (nb
->ping_success
>= PING_SUCCESS_CNT
) {
650 /*if (nb->state == NEIGHBOR_STATE_INITIAL)
651 printk(KERN_ERR "switched from initial to active");
653 printk(KERN_ERR "switched from stalled to active");
655 nb
->state
= NEIGHBOR_STATE_ACTIVE
;
656 nb
->ping_success
= 0;
657 nb
->state_time
.last_roundtrip
= jiffies
;
660 nb
->state_time
.last_roundtrip
= cookie_sendtime
;
664 spin_unlock_irqrestore( &(nb
->state_lock
), iflags
);
667 mutex_unlock(&(nb
->pingcookie_lock
));
670 __u32
add_ping_req(struct neighbor
*nb
)
672 struct ping_cookie
*c
;
677 mutex_lock(&(nb
->pingcookie_lock
));
679 for (i
=0;i
<PING_COOKIES_PER_NEIGH
;i
++) {
680 if (nb
->cookies
[i
].cookie
== 0)
684 get_random_bytes((char *) &i
, sizeof(i
));
685 i
= (i
% (PING_COOKIES_PER_NEIGH
- PING_COOKIES_FIFO
)) +
689 c
= &(nb
->cookies
[i
]);
693 if (unlikely(nb
->lastcookie
== 0))
695 c
->cookie
= nb
->lastcookie
;
697 nb
->ping_intransit
++;
701 nb
->last_ping_time
= jiffies
;
703 mutex_unlock(&(nb
->pingcookie_lock
));
708 void unadd_ping_req(struct neighbor
*nb
, __u32 cookie
)
715 mutex_lock(&(nb
->pingcookie_lock
));
717 for (i
=0;i
<PING_COOKIES_PER_NEIGH
;i
++) {
718 if (nb
->cookies
[i
].cookie
== cookie
) {
719 nb
->cookies
[i
].cookie
= 0;
720 nb
->ping_intransit
--;
725 mutex_unlock(&(nb
->pingcookie_lock
));
728 static int neighbor_idle(struct neighbor
*nb
)
731 mutex_lock(&(nb
->conn_list_lock
));
732 ret
= (list_empty(&(nb
->rcv_conn_list
)) &&
733 list_empty(&(nb
->snd_conn_list
)));
734 BUG_ON(list_empty(&(nb
->snd_conn_list
)) && nb
->num_send_conns
!= 0);
735 mutex_unlock(&(nb
->conn_list_lock
));
740 * Check additional to the checks and timings already done in kpacket_gen.c
741 * This is primarily to make sure that we do not invalidate other ping cookies
742 * which might still receive responses. It does this by requiring a certain
743 * mimimum delay between pings, depending on how many pings are already in
746 int time_to_send_ping(struct neighbor
*nb
)
750 int state
= get_neigh_state(nb
);
751 int idle
= (state
!= NEIGHBOR_STATE_ACTIVE
? 0 :
755 #warning todo send pings for some time after the neighbor gets idle (initial latency measurement + tos_privacy)
757 mutex_lock(&(nb
->pingcookie_lock
));
758 if (nb
->ping_intransit
>= PING_COOKIES_NOTHROTTLE
) {
759 __u32 mindelay
= (( ((__u32
) atomic_read(&(nb
->latency
))) +
760 ((__u32
) atomic_read(
761 &(nb
->max_remote_cmsg_delay
))) )/1000) <<
762 (nb
->ping_intransit
+ 1 -
763 PING_COOKIES_NOTHROTTLE
);
765 if (mindelay
> PING_THROTTLE_LIMIT_MS
)
766 mindelay
= PING_THROTTLE_LIMIT_MS
;
768 if (jiffies_to_msecs(jiffies
- nb
->last_ping_time
) < mindelay
)
772 if (unlikely(state
!= NEIGHBOR_STATE_ACTIVE
) ||
773 nb
->ping_intransit
!= 0)
774 forcetime
= PING_FORCETIME_MS
;
776 forcetime
= PING_FORCETIME_ACTIVEIDLE_MS
;
778 forcetime
= PING_FORCETIME_ACTIVE_MS
;
780 if (jiffies_to_msecs(jiffies
- nb
->last_ping_time
) < (forcetime
/2))
782 else if (jiffies_to_msecs(jiffies
- nb
->last_ping_time
) >= forcetime
)
785 mutex_unlock(&(nb
->pingcookie_lock
));
790 static void add_neighbor(struct neighbor
*nb
)
792 struct list_head
*currlh
= nb_list
.next
;
794 BUG_ON((nb
->addr
== 0) != (nb
->addrlen
== 0));
796 while (currlh
!= &nb_list
) {
797 struct neighbor
*curr
= container_of(currlh
, struct neighbor
,
800 if (curr
->addrlen
== nb
->addrlen
&& memcmp(curr
->addr
, nb
->addr
,
802 goto already_present
;
804 currlh
= currlh
->next
;
807 /* kref_get not needed here, because the caller leaves its ref to us */
808 printk(KERN_ERR
"add_neigh");
810 list_add_tail(&(nb
->nb_list
), &nb_list
);
811 refresh_initial_debitsrate();
812 schedule_controlmsg_timerfunc(nb
);
813 INIT_DELAYED_WORK(&(nb
->retrans_timer
), retransmit_timerfunc
);
814 INIT_DELAYED_WORK(&(nb
->retrans_timer_conn
), retransmit_conn_timerfunc
);
818 kmem_cache_free(nb_slab
, nb
);
822 static __u32
pull_u32(struct sk_buff
*skb
, int convbo
)
824 char *ptr
= cor_pull_skb(skb
, 4);
830 ((char *)&ret
)[0] = ptr
[0];
831 ((char *)&ret
)[1] = ptr
[1];
832 ((char *)&ret
)[2] = ptr
[2];
833 ((char *)&ret
)[3] = ptr
[3];
836 return be32_to_cpu(ret
);
840 static int apply_announce_addaddr(struct neighbor
*nb
, __u32 cmd
, __u32 len
,
848 BUG_ON((nb
->addr
== 0) != (nb
->addrlen
== 0));
856 addrtypelen
= be16_to_cpu(*((__u16
*) cmddata
));
863 addrlen
= be16_to_cpu(*((__u16
*) cmddata
));
868 cmddata
+= addrtypelen
;
878 if (get_addrtype(addrtypelen
, addrtype
) != ADDRTYPE_ID
)
881 nb
->addr
= kmalloc(addrlen
, GFP_KERNEL
);
882 if (unlikely(nb
->addr
== 0))
885 memcpy(nb
->addr
, addr
, addrlen
);
886 nb
->addrlen
= addrlen
;
891 static void apply_announce_cmd(struct neighbor
*nb
, __u32 cmd
, __u32 len
,
894 if (cmd
== NEIGHCMD_ADDADDR
) {
895 apply_announce_addaddr(nb
, cmd
, len
, cmddata
);
897 /* ignore unknown cmds */
901 static void apply_announce_cmds(char *msg
, __u32 len
, struct net_device
*dev
,
904 struct neighbor
*nb
= alloc_neighbor(GFP_KERNEL
);
906 if (unlikely(nb
== 0))
913 cmd
= be32_to_cpu(*((__u32
*) msg
));
916 cmdlen
= be32_to_cpu(*((__u32
*) msg
));
920 BUG_ON(cmdlen
> len
);
922 apply_announce_cmd(nb
, cmd
, cmdlen
, msg
);
930 memcpy(nb
->mac
, source_hw
, MAX_ADDR_LEN
);
937 static int check_announce_cmds(char *msg
, __u32 len
)
943 cmd
= be32_to_cpu(*((__u32
*) msg
));
946 cmdlen
= be32_to_cpu(*((__u32
*) msg
));
950 /* malformated packet */
951 if (unlikely(cmdlen
> len
))
958 if (unlikely(len
!= 0))
964 static void parse_announce(char *msg
, __u32 len
, struct net_device
*dev
,
967 __u32 min_announce_version
;
968 __u32 max_announce_version
;
969 __u32 min_cor_version
;
970 __u32 max_cor_version
;
972 if (unlikely(len
< 16))
975 min_announce_version
= be32_to_cpu(*((__u32
*) msg
));
978 max_announce_version
= be32_to_cpu(*((__u32
*) msg
));
981 min_cor_version
= be32_to_cpu(*((__u32
*) msg
));
984 max_cor_version
= be32_to_cpu(*((__u32
*) msg
));
988 if (min_announce_version
!= 0)
990 if (min_cor_version
!= 0)
992 if (check_announce_cmds(msg
, len
)) {
995 apply_announce_cmds(msg
, len
, dev
, source_hw
);
999 /* lh has to be first */
1000 struct list_head lh
;
1001 struct sk_buff_head skbs
; /* sorted by offset */
1002 struct net_device
*dev
;
1003 char source_hw
[MAX_ADDR_LEN
];
1004 __u32 announce_proto_version
;
1005 __u32 packet_version
;
1007 __u32 received_size
;
1008 __u64 last_received_packet
;
1011 LIST_HEAD(announce_list
);
1013 struct kmem_cache
*announce_in_slab
;
1015 static void merge_announce(struct announce_in
*ann
)
1017 char *msg
= kmalloc(ann
->total_size
, GFP_KERNEL
);
1021 /* try again when next packet arrives */
1025 while (copy
!= ann
->total_size
) {
1028 struct sk_buff
*skb
;
1029 struct skb_procstate
*ps
;
1031 if (unlikely(skb_queue_empty(&(ann
->skbs
)))) {
1032 printk(KERN_ERR
"net/cor/neighbor.c: sk_head ran "
1033 "empty while merging packets\n");
1037 skb
= skb_dequeue(&(ann
->skbs
));
1038 ps
= skb_pstate(skb
);
1041 if (unlikely(ps
->funcstate
.announce
.offset
> copy
)) {
1042 printk(KERN_ERR
"net/cor/neighbor.c: invalid offset"
1047 if (unlikely(ps
->funcstate
.announce
.offset
< copy
)) {
1048 offset
= copy
- ps
->funcstate
.announce
.offset
;
1052 if (unlikely(currcpy
+ copy
> ann
->total_size
))
1055 memcpy(msg
+ copy
, skb
->data
+ offset
, currcpy
);
1060 parse_announce(msg
, ann
->total_size
, ann
->dev
, ann
->source_hw
);
1067 list_del(&(ann
->lh
));
1068 kmem_cache_free(announce_in_slab
, ann
);
1071 static int _rcv_announce(struct sk_buff
*skb
, struct announce_in
*ann
)
1073 struct skb_procstate
*ps
= skb_pstate(skb
);
1075 __u32 offset
= ps
->funcstate
.announce
.offset
;
1076 __u32 len
= skb
->len
;
1078 __u32 curroffset
= 0;
1079 __u32 prevoffset
= 0;
1082 struct sk_buff
*curr
= ann
->skbs
.next
;
1084 if (unlikely(len
+ offset
> ann
->total_size
)) {
1085 /* invalid header */
1091 * Try to find the right place to insert in the sorted list. This
1092 * means to process the list until we find a skb which has a greater
1093 * offset, so we can insert before it to keep the sort order. However,
1094 * this is complicated by the fact that the new skb must not be inserted
1095 * between 2 skbs if there is no data missing in between. So the loop
1096 * runs has to keep running until there is either a gap to insert or
1097 * we see that this data has already been received.
1099 while ((void *) curr
!= (void *) &(ann
->skbs
)) {
1100 struct skb_procstate
*currps
= skb_pstate(skb
);
1102 curroffset
= currps
->funcstate
.announce
.offset
;
1104 if (curroffset
> offset
&& (prevoffset
+ prevlen
) < curroffset
)
1107 prevoffset
= curroffset
;
1108 prevlen
= curr
->len
;
1111 if ((offset
+len
) <= (prevoffset
+prevlen
)) {
1112 /* we already have this data */
1119 * Calculate how much data was really received, by substracting
1120 * the bytes we already have.
1122 if (unlikely(prevoffset
+ prevlen
> offset
)) {
1123 len
-= (prevoffset
+ prevlen
) - offset
;
1124 offset
= prevoffset
+ prevlen
;
1127 if (unlikely((void *) curr
!= (void *) &(ann
->skbs
) &&
1128 (offset
+ len
) > curroffset
))
1129 len
= curroffset
- offset
;
1131 ann
->received_size
+= len
;
1132 BUG_ON(ann
->received_size
> ann
->total_size
);
1133 __skb_queue_before(&(ann
->skbs
), curr
, skb
);
1134 ann
->last_received_packet
= get_jiffies_64();
1136 if (ann
->received_size
== ann
->total_size
)
1137 merge_announce(ann
);
1138 else if (unlikely(ann
->skbs
.qlen
>= 16))
1144 void rcv_announce(struct sk_buff
*skb
)
1146 struct skb_procstate
*ps
= skb_pstate(skb
);
1147 struct announce_in
*curr
= 0;
1148 struct announce_in
*leastactive
= 0;
1149 __u32 list_size
= 0;
1151 __u32 announce_proto_version
= pull_u32(skb
, 1);
1152 __u32 packet_version
= pull_u32(skb
, 1);
1153 __u32 total_size
= pull_u32(skb
, 1);
1155 char source_hw
[MAX_ADDR_LEN
];
1156 memset(source_hw
, 0, MAX_ADDR_LEN
);
1157 if (skb
->dev
->header_ops
!= 0 &&
1158 skb
->dev
->header_ops
->parse
!= 0)
1159 skb
->dev
->header_ops
->parse(skb
, source_hw
);
1161 ps
->funcstate
.announce
.offset
= pull_u32(skb
, 1);
1163 if (total_size
> 8192)
1166 mutex_lock(&(neighbor_operation_lock
));
1168 if (announce_proto_version
!= 0)
1171 curr
= (struct announce_in
*) announce_list
.next
;
1173 while (((struct list_head
*) curr
) != &(announce_list
)) {
1175 if (curr
->dev
== skb
->dev
&& memcmp(curr
->source_hw
, source_hw
,
1176 MAX_ADDR_LEN
) == 0 &&
1177 curr
->announce_proto_version
==
1178 announce_proto_version
&&
1179 curr
->packet_version
== packet_version
&&
1180 curr
->total_size
== total_size
)
1183 if (leastactive
== 0 || curr
->last_received_packet
<
1184 leastactive
->last_received_packet
)
1187 curr
= (struct announce_in
*) curr
->lh
.next
;
1190 if (list_size
>= 128) {
1191 BUG_ON(leastactive
== 0);
1194 curr
->last_received_packet
= get_jiffies_64();
1196 while (!skb_queue_empty(&(curr
->skbs
))) {
1197 struct sk_buff
*skb2
= skb_dequeue(&(curr
->skbs
));
1203 curr
= kmem_cache_alloc(announce_in_slab
,
1208 skb_queue_head_init(&(curr
->skbs
));
1209 list_add_tail((struct list_head
*) curr
, &announce_list
);
1212 curr
->packet_version
= packet_version
;
1213 curr
->total_size
= total_size
;
1214 curr
->received_size
= 0;
1215 curr
->announce_proto_version
= announce_proto_version
;
1216 curr
->dev
= skb
->dev
;
1217 dev_hold(curr
->dev
);
1218 memcpy(curr
->source_hw
, source_hw
, MAX_ADDR_LEN
);
1221 if (_rcv_announce(skb
, curr
)) {
1222 list_del((struct list_head
*) curr
);
1224 kmem_cache_free(announce_in_slab
, curr
);
1232 mutex_unlock(&(neighbor_operation_lock
));
1238 __u32 packet_version
;
1240 __u32 announce_msg_len
;
1243 struct announce
*last_announce
;
1245 static int send_announce_chunk(struct announce_data
*ann
)
1247 struct sk_buff
*skb
;
1248 __u32 packet_size
= 256;
1249 __u32 remainingdata
= ann
->ann
->announce_msg_len
-
1250 ann
->curr_announce_msg_offset
;
1251 __u32 headroom
= LL_ALLOCATED_SPACE(ann
->dev
);
1252 __u32 overhead
= 17 + headroom
;
1257 if (remainingdata
< packet_size
)
1258 packet_size
= remainingdata
;
1260 skb
= alloc_skb(packet_size
+ overhead
, GFP_KERNEL
);
1261 if (unlikely(skb
== 0))
1264 skb
->protocol
= htons(ETH_P_COR
);
1265 skb
->dev
= ann
->dev
;
1266 skb_reserve(skb
, headroom
);
1268 if(unlikely(dev_hard_header(skb
, ann
->dev
, ETH_P_COR
,
1269 ann
->dev
->broadcast
, ann
->dev
->dev_addr
, skb
->len
) < 0))
1272 skb_reset_network_header(skb
);
1274 header
= skb_put(skb
, 17);
1275 if (unlikely(header
== 0))
1278 header
[0] = PACKET_TYPE_ANNOUNCE
;
1280 put_u32(header
+ 1, 0, 1); /* announce proto version */
1281 put_u32(header
+ 5, ann
->ann
->packet_version
, 1); /* packet version */
1282 put_u32(header
+ 9, ann
->ann
->announce_msg_len
, 1); /* total size */
1283 put_u32(header
+ 13, ann
->curr_announce_msg_offset
, 1); /* offset */
1285 ptr
= skb_put(skb
, packet_size
);
1286 if (unlikely(ptr
== 0))
1289 memcpy(ptr
, ann
->ann
->announce_msg
+ ann
->curr_announce_msg_offset
,
1292 rc
= dev_queue_xmit(skb
);
1295 ann
->curr_announce_msg_offset
+= packet_size
;
1297 if (ann
->curr_announce_msg_offset
== ann
->ann
->announce_msg_len
)
1298 ann
->curr_announce_msg_offset
= 0;
1310 int send_announce_qos(struct announce_data
*ann
)
1313 mutex_lock(&(neighbor_operation_lock
));
1314 rc
= send_announce_chunk(ann
);
1315 mutex_unlock(&(neighbor_operation_lock
));
1319 static void announce_free(struct kref
*ref
)
1321 struct announce
*ann
= container_of(ref
, struct announce
, ref
);
1322 kfree(&(ann
->announce_msg
));
1326 void announce_data_free(struct kref
*ref
)
1328 struct announce_data
*ann
= container_of(ref
, struct announce_data
,
1331 kref_put(&(ann
->ann
->ref
), announce_free
);
1335 static void send_announce(struct work_struct
*work
)
1337 struct announce_data
*ann
= container_of(to_delayed_work(work
),
1338 struct announce_data
, announce_work
);
1342 mutex_lock(&(neighbor_operation_lock
));
1344 if (unlikely(ann
->dev
== 0))
1348 if (unlikely(ann
->ann
== 0 && last_announce
== 0))
1350 if (ann
->curr_announce_msg_offset
== 0 &&
1351 unlikely(ann
->ann
!= last_announce
)) {
1353 kref_put(&(ann
->ann
->ref
), announce_free
);
1354 ann
->ann
= last_announce
;
1355 kref_get(&(ann
->ann
->ref
));
1358 rc
= send_announce_chunk(ann
);
1361 mutex_unlock(&(neighbor_operation_lock
));
1364 qos_enqueue(ann
->dev
, &(ann
->rb
), QOS_CALLER_ANNOUNCE
);
1366 if (unlikely(reschedule
== 0)) {
1367 kref_put(&(ann
->ref
), announce_data_free
);
1369 __u64 jiffies
= get_jiffies_64();
1372 ann
->scheduled_announce_timer
+= msecs_to_jiffies(
1373 ANNOUNCE_SEND_PACKETINTELVAL_MS
);
1375 delay
= ann
->scheduled_announce_timer
- jiffies
;
1379 INIT_DELAYED_WORK(&(ann
->announce_work
), send_announce
);
1380 schedule_delayed_work(&(ann
->announce_work
), delay
);
1384 static struct announce_data
*get_announce_by_netdev(struct net_device
*dev
)
1386 struct list_head
*lh
= announce_out_list
.next
;
1388 while (lh
!= &announce_out_list
) {
1389 struct announce_data
*curr
= (struct announce_data
*)(
1391 offsetof(struct announce_data
, lh
));
1393 if (curr
->dev
== dev
)
1400 static void announce_send_adddev(struct net_device
*dev
)
1402 struct announce_data
*ann
;
1404 ann
= kmalloc(sizeof(struct announce_data
), GFP_KERNEL
);
1406 if (unlikely(ann
== 0)) {
1407 printk(KERN_ERR
"cor cannot allocate memory for sending "
1412 memset(ann
, 0, sizeof(struct announce_data
));
1414 kref_init(&(ann
->ref
));
1419 mutex_lock(&(neighbor_operation_lock
));
1420 list_add_tail(&(ann
->lh
), &announce_out_list
);
1421 mutex_unlock(&(neighbor_operation_lock
));
1423 ann
->scheduled_announce_timer
= get_jiffies_64();
1424 INIT_DELAYED_WORK(&(ann
->announce_work
), send_announce
);
1425 schedule_delayed_work(&(ann
->announce_work
), 1);
1428 static void announce_send_rmdev(struct net_device
*dev
)
1430 struct announce_data
*ann
;
1432 mutex_lock(&(neighbor_operation_lock
));
1434 ann
= get_announce_by_netdev(dev
);
1443 mutex_unlock(&(neighbor_operation_lock
));
1446 int netdev_notify_func(struct notifier_block
*not, unsigned long event
,
1449 struct net_device
*dev
= (struct net_device
*) ptr
;
1454 rc
= create_queue(dev
);
1457 announce_send_adddev(dev
);
1461 announce_send_rmdev(dev
);
1465 case NETDEV_REGISTER
:
1466 case NETDEV_UNREGISTER
:
1467 case NETDEV_CHANGEMTU
:
1468 case NETDEV_CHANGEADDR
:
1469 case NETDEV_GOING_DOWN
:
1470 case NETDEV_CHANGENAME
:
1471 case NETDEV_FEAT_CHANGE
:
1472 case NETDEV_BONDING_FAILOVER
:
1481 static int set_announce(char *msg
, __u32 len
)
1483 struct announce
*ann
= kmalloc(sizeof(struct announce
), GFP_KERNEL
);
1485 if (unlikely(ann
== 0)) {
1490 memset(ann
, 0, sizeof(struct announce
));
1492 ann
->announce_msg
= msg
;
1493 ann
->announce_msg_len
= len
;
1495 kref_init(&(ann
->ref
));
1497 mutex_lock(&(neighbor_operation_lock
));
1499 if (last_announce
!= 0) {
1500 ann
->packet_version
= last_announce
->packet_version
+ 1;
1501 kref_put(&(last_announce
->ref
), announce_free
);
1504 last_announce
= ann
;
1506 mutex_unlock(&(neighbor_operation_lock
));
1511 static int generate_announce(void)
1513 __u32 addrtypelen
= strlen(addrtype
);
1516 __u32 cmd_hdr_len
= 8;
1517 __u32 cmd_len
= 2 + 2 + addrtypelen
+ addrlen
;
1519 __u32 len
= hdr_len
+ cmd_hdr_len
+ cmd_len
;
1522 char *msg
= kmalloc(len
, GFP_KERNEL
);
1523 if (unlikely(msg
== 0))
1526 put_u32(msg
+ offset
, 0, 1); /* min_announce_proto_version */
1528 put_u32(msg
+ offset
, 0, 1); /* max_announce_proto_version */
1530 put_u32(msg
+ offset
, 0, 1); /* min_cor_proto_version */
1532 put_u32(msg
+ offset
, 0, 1); /* max_cor_proto_version */
1536 put_u32(msg
+ offset
, NEIGHCMD_ADDADDR
, 1); /* command */
1538 put_u32(msg
+ offset
, cmd_len
, 1); /* command length */
1541 /* addrtypelen, addrlen */
1542 put_u16(msg
+ offset
, addrtypelen
, 1);
1544 put_u16(msg
+ offset
, addrlen
, 1);
1547 /* addrtype, addr */
1548 memcpy(msg
+ offset
, addrtype
, addrtypelen
);
1549 offset
+= addrtypelen
;
1550 memcpy(msg
+ offset
, addr
, addrlen
);
1553 BUG_ON(offset
!= len
);
1555 return set_announce(msg
, len
);
1558 int __init
cor_neighbor_init(void)
1562 addr
= kmalloc(addrlen
, GFP_KERNEL
);
1563 if (unlikely(addr
== 0))
1566 get_random_bytes(addr
, addrlen
);
1568 nb_slab
= kmem_cache_create("cor_neighbor", sizeof(struct neighbor
), 8,
1570 announce_in_slab
= kmem_cache_create("cor_announce_in",
1571 sizeof(struct announce_in
), 8, 0, 0);
1573 if (unlikely(generate_announce()))
1576 memset(&netdev_notify
, 0, sizeof(netdev_notify
));
1577 netdev_notify
.notifier_call
= netdev_notify_func
;
1578 register_netdevice_notifier(&netdev_notify
);
1589 MODULE_LICENSE("GPL");