2 * Generic address resolution entity
5 * Pedro Roque <roque@di.fc.ul.pt>
6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
17 #include <linux/config.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/socket.h>
21 #include <linux/sched.h>
22 #include <linux/netdevice.h>
24 #include <linux/sysctl.h>
26 #include <net/neighbour.h>
29 #include <linux/rtnetlink.h>
33 #define NEIGH_PRINTK(x...) printk(x)
34 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
35 #define NEIGH_PRINTK0 NEIGH_PRINTK
36 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
37 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
41 #define NEIGH_PRINTK1 NEIGH_PRINTK
45 #define NEIGH_PRINTK2 NEIGH_PRINTK
48 static void neigh_timer_handler(unsigned long arg
);
50 static void neigh_app_notify(struct neighbour
*n
);
52 static int pneigh_ifdown(struct neigh_table
*tbl
, struct net_device
*dev
);
54 static int neigh_glbl_allocs
;
55 static struct neigh_table
*neigh_tables
;
57 #if defined(__i386__) && defined(CONFIG_SMP)
58 #define ASSERT_WL(n) if ((int)((n)->lock.lock) > 0) { printk("WL assertion failed at " __FILE__ "(%d):" __FUNCTION__ "\n", __LINE__); }
60 #define ASSERT_WL(n) do { } while(0)
64 Neighbour hash table buckets are protected with rwlock tbl->lock.
66 - All the scans/updates to hash buckets MUST be made under this lock.
67 - NOTHING clever should be made under this lock: no callbacks
68 to protocol backends, no attempts to send something to network.
69 It will result in deadlocks, if backend/driver wants to use neighbour
71 - If the entry requires some non-trivial actions, increase
72 its reference count and release table lock.
74 Neighbour entries are protected:
75 - with reference count.
76 - with rwlock neigh->lock
78 Reference count prevents destruction.
80 neigh->lock mainly serializes ll address data and its validity state.
81 However, the same lock is used to protect another entry fields:
85 Again, nothing clever shall be made under neigh->lock,
86 the most complicated procedure, which we allow is dev->hard_header.
87 It is supposed, that dev->hard_header is simplistic and does
88 not make callbacks to neighbour tables.
90 The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
91 list of neighbour tables. This list is used only in process context,
94 static rwlock_t neigh_tbl_lock
= RW_LOCK_UNLOCKED
;
96 static int neigh_blackhole(struct sk_buff
*skb
)
103 * It is random distribution in the interval (1/2)*base...(3/2)*base.
104 * It corresponds to default IPv6 settings and is not overridable,
105 * because it is really reasonbale choice.
108 unsigned long neigh_rand_reach_time(unsigned long base
)
110 return (net_random() % base
) + (base
>>1);
114 static int neigh_forced_gc(struct neigh_table
*tbl
)
119 for (i
=0; i
<=NEIGH_HASHMASK
; i
++) {
120 struct neighbour
*n
, **np
;
122 np
= &tbl
->hash_buckets
[i
];
123 write_lock_bh(&tbl
->lock
);
124 while ((n
= *np
) != NULL
) {
125 /* Neighbour record may be discarded if:
126 - nobody refers to it.
127 - it is not premanent
128 - (NEW and probably wrong)
129 INCOMPLETE entries are kept at least for
130 n->parms->retrans_time, otherwise we could
131 flood network with resolution requests.
132 It is not clear, what is better table overflow
135 write_lock(&n
->lock
);
136 if (atomic_read(&n
->refcnt
) == 1 &&
137 !(n
->nud_state
&NUD_PERMANENT
) &&
138 (n
->nud_state
!= NUD_INCOMPLETE
||
139 jiffies
- n
->used
> n
->parms
->retrans_time
)) {
143 write_unlock(&n
->lock
);
147 write_unlock(&n
->lock
);
150 write_unlock_bh(&tbl
->lock
);
153 tbl
->last_flush
= jiffies
;
157 static int neigh_del_timer(struct neighbour
*n
)
159 if (n
->nud_state
& NUD_IN_TIMER
) {
160 if (del_timer(&n
->timer
)) {
168 int neigh_ifdown(struct neigh_table
*tbl
, struct net_device
*dev
)
172 write_lock_bh(&tbl
->lock
);
174 for (i
=0; i
<=NEIGH_HASHMASK
; i
++) {
175 struct neighbour
*n
, **np
;
177 np
= &tbl
->hash_buckets
[i
];
178 while ((n
= *np
) != NULL
) {
179 if (dev
&& n
->dev
!= dev
) {
184 write_lock(&n
->lock
);
188 if (atomic_read(&n
->refcnt
) != 1) {
189 /* The most unpleasant situation.
190 We must destroy neighbour entry,
191 but someone still uses it.
193 The destroy will be delayed until
194 the last user releases us, but
195 we must kill timers etc. and move
198 n
->parms
= &tbl
->parms
;
199 skb_queue_purge(&n
->arp_queue
);
200 n
->output
= neigh_blackhole
;
201 if (n
->nud_state
&NUD_VALID
)
202 n
->nud_state
= NUD_NOARP
;
204 n
->nud_state
= NUD_NONE
;
205 NEIGH_PRINTK2("neigh %p is stray.\n", n
);
207 write_unlock(&n
->lock
);
212 skb_queue_purge(&tbl
->proxy_queue
);
213 pneigh_ifdown(tbl
, dev
);
214 write_unlock_bh(&tbl
->lock
);
216 del_timer_sync(&tbl
->proxy_timer
);
220 static struct neighbour
*neigh_alloc(struct neigh_table
*tbl
)
223 unsigned long now
= jiffies
;
225 if (tbl
->entries
> tbl
->gc_thresh3
||
226 (tbl
->entries
> tbl
->gc_thresh2
&&
227 now
- tbl
->last_flush
> 5*HZ
)) {
228 if (neigh_forced_gc(tbl
) == 0 &&
229 tbl
->entries
> tbl
->gc_thresh3
)
233 n
= kmem_cache_alloc(tbl
->kmem_cachep
, SLAB_ATOMIC
);
237 memset(n
, 0, tbl
->entry_size
);
239 skb_queue_head_init(&n
->arp_queue
);
240 n
->lock
= RW_LOCK_UNLOCKED
;
241 n
->updated
= n
->used
= now
;
242 n
->nud_state
= NUD_NONE
;
243 n
->output
= neigh_blackhole
;
244 n
->parms
= &tbl
->parms
;
245 init_timer(&n
->timer
);
246 n
->timer
.function
= neigh_timer_handler
;
247 n
->timer
.data
= (unsigned long)n
;
252 atomic_set(&n
->refcnt
, 1);
257 struct neighbour
*neigh_lookup(struct neigh_table
*tbl
, const void *pkey
,
258 struct net_device
*dev
)
262 int key_len
= tbl
->key_len
;
264 hash_val
= tbl
->hash(pkey
, dev
);
266 read_lock_bh(&tbl
->lock
);
267 for (n
= tbl
->hash_buckets
[hash_val
]; n
; n
= n
->next
) {
269 memcmp(n
->primary_key
, pkey
, key_len
) == 0) {
274 read_unlock_bh(&tbl
->lock
);
278 struct neighbour
* neigh_create(struct neigh_table
*tbl
, const void *pkey
,
279 struct net_device
*dev
)
281 struct neighbour
*n
, *n1
;
283 int key_len
= tbl
->key_len
;
285 n
= neigh_alloc(tbl
);
289 memcpy(n
->primary_key
, pkey
, key_len
);
293 /* Protocol specific setup. */
294 if (tbl
->constructor
&& tbl
->constructor(n
) < 0) {
299 /* Device specific setup. */
300 if (n
->parms
&& n
->parms
->neigh_setup
&& n
->parms
->neigh_setup(n
) < 0) {
305 n
->confirmed
= jiffies
- (n
->parms
->base_reachable_time
<<1);
307 hash_val
= tbl
->hash(pkey
, dev
);
309 write_lock_bh(&tbl
->lock
);
310 for (n1
= tbl
->hash_buckets
[hash_val
]; n1
; n1
= n1
->next
) {
311 if (dev
== n1
->dev
&&
312 memcmp(n1
->primary_key
, pkey
, key_len
) == 0) {
314 write_unlock_bh(&tbl
->lock
);
320 n
->next
= tbl
->hash_buckets
[hash_val
];
321 tbl
->hash_buckets
[hash_val
] = n
;
324 write_unlock_bh(&tbl
->lock
);
325 NEIGH_PRINTK2("neigh %p is created.\n", n
);
329 struct pneigh_entry
* pneigh_lookup(struct neigh_table
*tbl
, const void *pkey
,
330 struct net_device
*dev
, int creat
)
332 struct pneigh_entry
*n
;
334 int key_len
= tbl
->key_len
;
336 hash_val
= *(u32
*)(pkey
+ key_len
- 4);
337 hash_val
^= (hash_val
>>16);
338 hash_val
^= hash_val
>>8;
339 hash_val
^= hash_val
>>4;
340 hash_val
&= PNEIGH_HASHMASK
;
342 read_lock_bh(&tbl
->lock
);
344 for (n
= tbl
->phash_buckets
[hash_val
]; n
; n
= n
->next
) {
345 if (memcmp(n
->key
, pkey
, key_len
) == 0 &&
346 (n
->dev
== dev
|| !n
->dev
)) {
347 read_unlock_bh(&tbl
->lock
);
351 read_unlock_bh(&tbl
->lock
);
355 n
= kmalloc(sizeof(*n
) + key_len
, GFP_KERNEL
);
359 memcpy(n
->key
, pkey
, key_len
);
362 if (tbl
->pconstructor
&& tbl
->pconstructor(n
)) {
367 write_lock_bh(&tbl
->lock
);
368 n
->next
= tbl
->phash_buckets
[hash_val
];
369 tbl
->phash_buckets
[hash_val
] = n
;
370 write_unlock_bh(&tbl
->lock
);
375 int pneigh_delete(struct neigh_table
*tbl
, const void *pkey
, struct net_device
*dev
)
377 struct pneigh_entry
*n
, **np
;
379 int key_len
= tbl
->key_len
;
381 hash_val
= *(u32
*)(pkey
+ key_len
- 4);
382 hash_val
^= (hash_val
>>16);
383 hash_val
^= hash_val
>>8;
384 hash_val
^= hash_val
>>4;
385 hash_val
&= PNEIGH_HASHMASK
;
387 for (np
= &tbl
->phash_buckets
[hash_val
]; (n
=*np
) != NULL
; np
= &n
->next
) {
388 if (memcmp(n
->key
, pkey
, key_len
) == 0 && n
->dev
== dev
) {
389 write_lock_bh(&tbl
->lock
);
391 write_unlock_bh(&tbl
->lock
);
392 if (tbl
->pdestructor
)
401 static int pneigh_ifdown(struct neigh_table
*tbl
, struct net_device
*dev
)
403 struct pneigh_entry
*n
, **np
;
406 for (h
=0; h
<=PNEIGH_HASHMASK
; h
++) {
407 np
= &tbl
->phash_buckets
[h
];
408 for (np
= &tbl
->phash_buckets
[h
]; (n
=*np
) != NULL
; np
= &n
->next
) {
409 if (n
->dev
== dev
|| dev
== NULL
) {
411 if (tbl
->pdestructor
)
424 * neighbour must already be out of the table;
427 void neigh_destroy(struct neighbour
*neigh
)
432 printk("Destroying alive neighbour %p from %08lx\n", neigh
,
433 *(((unsigned long*)&neigh
)-1));
437 if (neigh_del_timer(neigh
))
438 printk("Impossible event.\n");
440 while ((hh
= neigh
->hh
) != NULL
) {
441 neigh
->hh
= hh
->hh_next
;
443 write_lock_bh(&hh
->hh_lock
);
444 hh
->hh_output
= neigh_blackhole
;
445 write_unlock_bh(&hh
->hh_lock
);
446 if (atomic_dec_and_test(&hh
->hh_refcnt
))
450 if (neigh
->ops
&& neigh
->ops
->destructor
)
451 (neigh
->ops
->destructor
)(neigh
);
453 skb_queue_purge(&neigh
->arp_queue
);
457 NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh
);
460 neigh
->tbl
->entries
--;
461 kmem_cache_free(neigh
->tbl
->kmem_cachep
, neigh
);
464 /* Neighbour state is suspicious;
467 Called with write_locked neigh.
469 static void neigh_suspect(struct neighbour
*neigh
)
473 NEIGH_PRINTK2("neigh %p is suspecteded.\n", neigh
);
477 neigh
->output
= neigh
->ops
->output
;
479 for (hh
= neigh
->hh
; hh
; hh
= hh
->hh_next
)
480 hh
->hh_output
= neigh
->ops
->output
;
483 /* Neighbour state is OK;
486 Called with write_locked neigh.
488 static void neigh_connect(struct neighbour
*neigh
)
492 NEIGH_PRINTK2("neigh %p is connected.\n", neigh
);
496 neigh
->output
= neigh
->ops
->connected_output
;
498 for (hh
= neigh
->hh
; hh
; hh
= hh
->hh_next
)
499 hh
->hh_output
= neigh
->ops
->hh_output
;
503 Transitions NUD_STALE <-> NUD_REACHABLE do not occur
504 when fast path is built: we have no timers assotiated with
505 these states, we do not have time to check state when sending.
506 neigh_periodic_timer check periodically neigh->confirmed
507 time and moves NUD_REACHABLE -> NUD_STALE.
509 If a routine wants to know TRUE entry state, it calls
510 neigh_sync before checking state.
512 Called with write_locked neigh.
515 static void neigh_sync(struct neighbour
*n
)
517 unsigned long now
= jiffies
;
518 u8 state
= n
->nud_state
;
521 if (state
&(NUD_NOARP
|NUD_PERMANENT
))
523 if (state
&NUD_REACHABLE
) {
524 if (now
- n
->confirmed
> n
->parms
->reachable_time
) {
525 n
->nud_state
= NUD_STALE
;
528 } else if (state
&NUD_VALID
) {
529 if (now
- n
->confirmed
< n
->parms
->reachable_time
) {
531 n
->nud_state
= NUD_REACHABLE
;
537 static void SMP_TIMER_NAME(neigh_periodic_timer
)(unsigned long arg
)
539 struct neigh_table
*tbl
= (struct neigh_table
*)arg
;
540 unsigned long now
= jiffies
;
544 write_lock(&tbl
->lock
);
547 * periodicly recompute ReachableTime from random function
550 if (now
- tbl
->last_rand
> 300*HZ
) {
551 struct neigh_parms
*p
;
552 tbl
->last_rand
= now
;
553 for (p
=&tbl
->parms
; p
; p
= p
->next
)
554 p
->reachable_time
= neigh_rand_reach_time(p
->base_reachable_time
);
557 for (i
=0; i
<= NEIGH_HASHMASK
; i
++) {
558 struct neighbour
*n
, **np
;
560 np
= &tbl
->hash_buckets
[i
];
561 while ((n
= *np
) != NULL
) {
564 write_lock(&n
->lock
);
566 state
= n
->nud_state
;
567 if (state
&(NUD_PERMANENT
|NUD_IN_TIMER
)) {
568 write_unlock(&n
->lock
);
572 if ((long)(n
->used
- n
->confirmed
) < 0)
573 n
->used
= n
->confirmed
;
575 if (atomic_read(&n
->refcnt
) == 1 &&
576 (state
== NUD_FAILED
|| now
- n
->used
> n
->parms
->gc_staletime
)) {
579 write_unlock(&n
->lock
);
584 if (n
->nud_state
&NUD_REACHABLE
&&
585 now
- n
->confirmed
> n
->parms
->reachable_time
) {
586 n
->nud_state
= NUD_STALE
;
589 write_unlock(&n
->lock
);
596 mod_timer(&tbl
->gc_timer
, now
+ tbl
->gc_interval
);
597 write_unlock(&tbl
->lock
);
601 static void neigh_periodic_timer(unsigned long arg
)
603 struct neigh_table
*tbl
= (struct neigh_table
*)arg
;
605 tasklet_schedule(&tbl
->gc_task
);
609 static __inline__
int neigh_max_probes(struct neighbour
*n
)
611 struct neigh_parms
*p
= n
->parms
;
612 return p
->ucast_probes
+ p
->app_probes
+ p
->mcast_probes
;
616 /* Called when a timer expires for a neighbour entry. */
618 static void neigh_timer_handler(unsigned long arg
)
620 unsigned long now
= jiffies
;
621 struct neighbour
*neigh
= (struct neighbour
*)arg
;
625 write_lock(&neigh
->lock
);
627 state
= neigh
->nud_state
;
629 if (!(state
&NUD_IN_TIMER
)) {
631 printk("neigh: timer & !nud_in_timer\n");
636 if ((state
&NUD_VALID
) &&
637 now
- neigh
->confirmed
< neigh
->parms
->reachable_time
) {
638 neigh
->nud_state
= NUD_REACHABLE
;
639 NEIGH_PRINTK2("neigh %p is still alive.\n", neigh
);
640 neigh_connect(neigh
);
643 if (state
== NUD_DELAY
) {
644 NEIGH_PRINTK2("neigh %p is probed.\n", neigh
);
645 neigh
->nud_state
= NUD_PROBE
;
646 atomic_set(&neigh
->probes
, 0);
649 if (atomic_read(&neigh
->probes
) >= neigh_max_probes(neigh
)) {
652 neigh
->nud_state
= NUD_FAILED
;
654 neigh
->tbl
->stats
.res_failed
++;
655 NEIGH_PRINTK2("neigh %p is failed.\n", neigh
);
657 /* It is very thin place. report_unreachable is very complicated
658 routine. Particularly, it can hit the same neighbour entry!
660 So that, we try to be accurate and avoid dead loop. --ANK
662 while(neigh
->nud_state
==NUD_FAILED
&& (skb
=__skb_dequeue(&neigh
->arp_queue
)) != NULL
) {
663 write_unlock(&neigh
->lock
);
664 neigh
->ops
->error_report(neigh
, skb
);
665 write_lock(&neigh
->lock
);
667 skb_queue_purge(&neigh
->arp_queue
);
671 neigh
->timer
.expires
= now
+ neigh
->parms
->retrans_time
;
672 add_timer(&neigh
->timer
);
673 write_unlock(&neigh
->lock
);
675 neigh
->ops
->solicit(neigh
, skb_peek(&neigh
->arp_queue
));
676 atomic_inc(&neigh
->probes
);
680 write_unlock(&neigh
->lock
);
682 if (notify
&& neigh
->parms
->app_probes
)
683 neigh_app_notify(neigh
);
685 neigh_release(neigh
);
688 int __neigh_event_send(struct neighbour
*neigh
, struct sk_buff
*skb
)
690 write_lock_bh(&neigh
->lock
);
691 if (!(neigh
->nud_state
&(NUD_CONNECTED
|NUD_DELAY
|NUD_PROBE
))) {
692 if (!(neigh
->nud_state
&(NUD_STALE
|NUD_INCOMPLETE
))) {
693 if (neigh
->parms
->mcast_probes
+ neigh
->parms
->app_probes
) {
694 atomic_set(&neigh
->probes
, neigh
->parms
->ucast_probes
);
695 neigh
->nud_state
= NUD_INCOMPLETE
;
697 neigh
->timer
.expires
= jiffies
+ neigh
->parms
->retrans_time
;
698 add_timer(&neigh
->timer
);
699 write_unlock_bh(&neigh
->lock
);
700 neigh
->ops
->solicit(neigh
, skb
);
701 atomic_inc(&neigh
->probes
);
702 write_lock_bh(&neigh
->lock
);
704 neigh
->nud_state
= NUD_FAILED
;
705 write_unlock_bh(&neigh
->lock
);
712 if (neigh
->nud_state
== NUD_INCOMPLETE
) {
714 if (skb_queue_len(&neigh
->arp_queue
) >= neigh
->parms
->queue_len
) {
715 struct sk_buff
*buff
;
716 buff
= neigh
->arp_queue
.prev
;
717 __skb_unlink(buff
, &neigh
->arp_queue
);
720 __skb_queue_head(&neigh
->arp_queue
, skb
);
722 write_unlock_bh(&neigh
->lock
);
725 if (neigh
->nud_state
== NUD_STALE
) {
726 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh
);
728 neigh
->nud_state
= NUD_DELAY
;
729 neigh
->timer
.expires
= jiffies
+ neigh
->parms
->delay_probe_time
;
730 add_timer(&neigh
->timer
);
733 write_unlock_bh(&neigh
->lock
);
737 static __inline__
void neigh_update_hhs(struct neighbour
*neigh
)
740 void (*update
)(struct hh_cache
*, struct net_device
*, unsigned char*) =
741 neigh
->dev
->header_cache_update
;
744 for (hh
=neigh
->hh
; hh
; hh
=hh
->hh_next
) {
745 write_lock_bh(&hh
->hh_lock
);
746 update(hh
, neigh
->dev
, neigh
->ha
);
747 write_unlock_bh(&hh
->hh_lock
);
754 /* Generic update routine.
755 -- lladdr is new lladdr or NULL, if it is not supplied.
757 -- override==1 allows to override existing lladdr, if it is different.
758 -- arp==0 means that the change is administrative.
760 Caller MUST hold reference count on the entry.
763 int neigh_update(struct neighbour
*neigh
, const u8
*lladdr
, u8
new, int override
, int arp
)
768 struct net_device
*dev
= neigh
->dev
;
770 write_lock_bh(&neigh
->lock
);
771 old
= neigh
->nud_state
;
774 if (arp
&& (old
&(NUD_NOARP
|NUD_PERMANENT
)))
777 if (!(new&NUD_VALID
)) {
778 neigh_del_timer(neigh
);
779 if (old
&NUD_CONNECTED
)
780 neigh_suspect(neigh
);
781 neigh
->nud_state
= new;
783 notify
= old
&NUD_VALID
;
787 /* Compare new lladdr with cached one */
788 if (dev
->addr_len
== 0) {
789 /* First case: device needs no address. */
792 /* The second case: if something is already cached
793 and a new address is proposed:
795 - if they are different, check override flag
798 if (memcmp(lladdr
, neigh
->ha
, dev
->addr_len
) == 0)
804 /* No address is supplied; if we know something,
805 use it, otherwise discard the request.
808 if (!(old
&NUD_VALID
))
814 old
= neigh
->nud_state
;
815 if (new&NUD_CONNECTED
)
816 neigh
->confirmed
= jiffies
;
817 neigh
->updated
= jiffies
;
819 /* If entry was valid and address is not changed,
820 do not change entry state, if new one is STALE.
824 if (lladdr
== neigh
->ha
)
825 if (new == old
|| (new == NUD_STALE
&& (old
&NUD_CONNECTED
)))
828 neigh_del_timer(neigh
);
829 neigh
->nud_state
= new;
830 if (lladdr
!= neigh
->ha
) {
831 memcpy(&neigh
->ha
, lladdr
, dev
->addr_len
);
832 neigh_update_hhs(neigh
);
833 neigh
->confirmed
= jiffies
- (neigh
->parms
->base_reachable_time
<<1);
840 if (new&NUD_CONNECTED
)
841 neigh_connect(neigh
);
843 neigh_suspect(neigh
);
844 if (!(old
&NUD_VALID
)) {
847 /* Again: avoid dead loop if something went wrong */
849 while (neigh
->nud_state
&NUD_VALID
&&
850 (skb
=__skb_dequeue(&neigh
->arp_queue
)) != NULL
) {
851 struct neighbour
*n1
= neigh
;
852 write_unlock_bh(&neigh
->lock
);
853 /* On shaper/eql skb->dst->neighbour != neigh :( */
854 if (skb
->dst
&& skb
->dst
->neighbour
)
855 n1
= skb
->dst
->neighbour
;
857 write_lock_bh(&neigh
->lock
);
859 skb_queue_purge(&neigh
->arp_queue
);
862 write_unlock_bh(&neigh
->lock
);
864 if (notify
&& neigh
->parms
->app_probes
)
865 neigh_app_notify(neigh
);
870 struct neighbour
* neigh_event_ns(struct neigh_table
*tbl
,
871 u8
*lladdr
, void *saddr
,
872 struct net_device
*dev
)
874 struct neighbour
*neigh
;
876 neigh
= __neigh_lookup(tbl
, saddr
, dev
, lladdr
|| !dev
->addr_len
);
878 neigh_update(neigh
, lladdr
, NUD_STALE
, 1, 1);
882 static void neigh_hh_init(struct neighbour
*n
, struct dst_entry
*dst
, u16 protocol
)
884 struct hh_cache
*hh
= NULL
;
885 struct net_device
*dev
= dst
->dev
;
887 for (hh
=n
->hh
; hh
; hh
= hh
->hh_next
)
888 if (hh
->hh_type
== protocol
)
891 if (!hh
&& (hh
= kmalloc(sizeof(*hh
), GFP_ATOMIC
)) != NULL
) {
892 memset(hh
, 0, sizeof(struct hh_cache
));
893 hh
->hh_lock
= RW_LOCK_UNLOCKED
;
894 hh
->hh_type
= protocol
;
895 atomic_set(&hh
->hh_refcnt
, 0);
897 if (dev
->hard_header_cache(n
, hh
)) {
901 atomic_inc(&hh
->hh_refcnt
);
904 if (n
->nud_state
&NUD_CONNECTED
)
905 hh
->hh_output
= n
->ops
->hh_output
;
907 hh
->hh_output
= n
->ops
->output
;
911 atomic_inc(&hh
->hh_refcnt
);
916 /* This function can be used in contexts, where only old dev_queue_xmit
917 worked, f.e. if you want to override normal output path (eql, shaper),
918 but resoltution is not made yet.
921 int neigh_compat_output(struct sk_buff
*skb
)
923 struct net_device
*dev
= skb
->dev
;
925 __skb_pull(skb
, skb
->nh
.raw
- skb
->data
);
927 if (dev
->hard_header
&&
928 dev
->hard_header(skb
, dev
, ntohs(skb
->protocol
), NULL
, NULL
, skb
->len
) < 0 &&
929 dev
->rebuild_header(skb
))
932 return dev_queue_xmit(skb
);
935 /* Slow and careful. */
937 int neigh_resolve_output(struct sk_buff
*skb
)
939 struct dst_entry
*dst
= skb
->dst
;
940 struct neighbour
*neigh
;
942 if (!dst
|| !(neigh
= dst
->neighbour
))
945 __skb_pull(skb
, skb
->nh
.raw
- skb
->data
);
947 if (neigh_event_send(neigh
, skb
) == 0) {
949 struct net_device
*dev
= neigh
->dev
;
950 if (dev
->hard_header_cache
&& dst
->hh
== NULL
) {
951 write_lock_bh(&neigh
->lock
);
953 neigh_hh_init(neigh
, dst
, dst
->ops
->protocol
);
954 err
= dev
->hard_header(skb
, dev
, ntohs(skb
->protocol
), neigh
->ha
, NULL
, skb
->len
);
955 write_unlock_bh(&neigh
->lock
);
957 read_lock_bh(&neigh
->lock
);
958 err
= dev
->hard_header(skb
, dev
, ntohs(skb
->protocol
), neigh
->ha
, NULL
, skb
->len
);
959 read_unlock_bh(&neigh
->lock
);
962 return neigh
->ops
->queue_xmit(skb
);
969 NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", dst
, dst
? dst
->neighbour
: NULL
);
974 /* As fast as possible without hh cache */
976 int neigh_connected_output(struct sk_buff
*skb
)
979 struct dst_entry
*dst
= skb
->dst
;
980 struct neighbour
*neigh
= dst
->neighbour
;
981 struct net_device
*dev
= neigh
->dev
;
983 __skb_pull(skb
, skb
->nh
.raw
- skb
->data
);
985 read_lock_bh(&neigh
->lock
);
986 err
= dev
->hard_header(skb
, dev
, ntohs(skb
->protocol
), neigh
->ha
, NULL
, skb
->len
);
987 read_unlock_bh(&neigh
->lock
);
989 return neigh
->ops
->queue_xmit(skb
);
994 static void neigh_proxy_process(unsigned long arg
)
996 struct neigh_table
*tbl
= (struct neigh_table
*)arg
;
998 unsigned long now
= jiffies
;
999 struct sk_buff
*skb
= tbl
->proxy_queue
.next
;
1001 while (skb
!= (struct sk_buff
*)&tbl
->proxy_queue
) {
1002 struct sk_buff
*back
= skb
;
1003 long tdif
= back
->stamp
.tv_usec
- now
;
1007 __skb_unlink(back
, &tbl
->proxy_queue
);
1008 if (tbl
->proxy_redo
)
1009 tbl
->proxy_redo(back
);
1012 } else if (!sched_next
|| tdif
< sched_next
)
1015 del_timer(&tbl
->proxy_timer
);
1017 tbl
->proxy_timer
.expires
= jiffies
+ sched_next
;
1018 add_timer(&tbl
->proxy_timer
);
1022 void pneigh_enqueue(struct neigh_table
*tbl
, struct neigh_parms
*p
,
1023 struct sk_buff
*skb
)
1025 unsigned long now
= jiffies
;
1026 long sched_next
= net_random()%p
->proxy_delay
;
1028 if (tbl
->proxy_queue
.qlen
> p
->proxy_qlen
) {
1032 skb
->stamp
.tv_sec
= 0;
1033 skb
->stamp
.tv_usec
= now
+ sched_next
;
1034 if (del_timer(&tbl
->proxy_timer
)) {
1035 long tval
= tbl
->proxy_timer
.expires
- now
;
1036 if (tval
< sched_next
)
1039 tbl
->proxy_timer
.expires
= now
+ sched_next
;
1040 dst_release(skb
->dst
);
1042 __skb_queue_tail(&tbl
->proxy_queue
, skb
);
1043 add_timer(&tbl
->proxy_timer
);
1047 struct neigh_parms
*neigh_parms_alloc(struct net_device
*dev
, struct neigh_table
*tbl
)
1049 struct neigh_parms
*p
;
1050 p
= kmalloc(sizeof(*p
), GFP_KERNEL
);
1052 memcpy(p
, &tbl
->parms
, sizeof(*p
));
1054 p
->reachable_time
= neigh_rand_reach_time(p
->base_reachable_time
);
1055 if (dev
&& dev
->neigh_setup
) {
1056 if (dev
->neigh_setup(dev
, p
)) {
1061 write_lock_bh(&tbl
->lock
);
1062 p
->next
= tbl
->parms
.next
;
1063 tbl
->parms
.next
= p
;
1064 write_unlock_bh(&tbl
->lock
);
1069 void neigh_parms_release(struct neigh_table
*tbl
, struct neigh_parms
*parms
)
1071 struct neigh_parms
**p
;
1073 if (parms
== NULL
|| parms
== &tbl
->parms
)
1075 write_lock_bh(&tbl
->lock
);
1076 for (p
= &tbl
->parms
.next
; *p
; p
= &(*p
)->next
) {
1079 write_unlock_bh(&tbl
->lock
);
1080 #ifdef CONFIG_SYSCTL
1081 neigh_sysctl_unregister(parms
);
1087 write_unlock_bh(&tbl
->lock
);
1088 NEIGH_PRINTK1("neigh_parms_release: not found\n");
1092 void neigh_table_init(struct neigh_table
*tbl
)
1094 unsigned long now
= jiffies
;
1096 tbl
->parms
.reachable_time
= neigh_rand_reach_time(tbl
->parms
.base_reachable_time
);
1098 if (tbl
->kmem_cachep
== NULL
)
1099 tbl
->kmem_cachep
= kmem_cache_create(tbl
->id
,
1100 (tbl
->entry_size
+15)&~15,
1101 0, SLAB_HWCACHE_ALIGN
,
1105 tasklet_init(&tbl
->gc_task
, SMP_TIMER_NAME(neigh_periodic_timer
), (unsigned long)tbl
);
1107 init_timer(&tbl
->gc_timer
);
1108 tbl
->lock
= RW_LOCK_UNLOCKED
;
1109 tbl
->gc_timer
.data
= (unsigned long)tbl
;
1110 tbl
->gc_timer
.function
= neigh_periodic_timer
;
1111 tbl
->gc_timer
.expires
= now
+ tbl
->gc_interval
+ tbl
->parms
.reachable_time
;
1112 add_timer(&tbl
->gc_timer
);
1114 init_timer(&tbl
->proxy_timer
);
1115 tbl
->proxy_timer
.data
= (unsigned long)tbl
;
1116 tbl
->proxy_timer
.function
= neigh_proxy_process
;
1117 skb_queue_head_init(&tbl
->proxy_queue
);
1119 tbl
->last_flush
= now
;
1120 tbl
->last_rand
= now
+ tbl
->parms
.reachable_time
*20;
1121 write_lock(&neigh_tbl_lock
);
1122 tbl
->next
= neigh_tables
;
1124 write_unlock(&neigh_tbl_lock
);
1127 int neigh_table_clear(struct neigh_table
*tbl
)
1129 struct neigh_table
**tp
;
1131 /* It is not clean... Fix it to unload IPv6 module safely */
1132 del_timer_sync(&tbl
->gc_timer
);
1133 tasklet_kill(&tbl
->gc_task
);
1134 del_timer_sync(&tbl
->proxy_timer
);
1135 skb_queue_purge(&tbl
->proxy_queue
);
1136 neigh_ifdown(tbl
, NULL
);
1138 printk(KERN_CRIT
"neighbour leakage\n");
1139 write_lock(&neigh_tbl_lock
);
1140 for (tp
= &neigh_tables
; *tp
; tp
= &(*tp
)->next
) {
1146 write_unlock(&neigh_tbl_lock
);
1147 #ifdef CONFIG_SYSCTL
1148 neigh_sysctl_unregister(&tbl
->parms
);
1153 #ifdef CONFIG_RTNETLINK
1156 int neigh_delete(struct sk_buff
*skb
, struct nlmsghdr
*nlh
, void *arg
)
1158 struct ndmsg
*ndm
= NLMSG_DATA(nlh
);
1159 struct rtattr
**nda
= arg
;
1160 struct neigh_table
*tbl
;
1161 struct net_device
*dev
= NULL
;
1164 if (ndm
->ndm_ifindex
) {
1165 if ((dev
= dev_get_by_index(ndm
->ndm_ifindex
)) == NULL
)
1169 read_lock(&neigh_tbl_lock
);
1170 for (tbl
=neigh_tables
; tbl
; tbl
= tbl
->next
) {
1171 struct neighbour
*n
;
1173 if (tbl
->family
!= ndm
->ndm_family
)
1175 read_unlock(&neigh_tbl_lock
);
1178 if (nda
[NDA_DST
-1] == NULL
||
1179 nda
[NDA_DST
-1]->rta_len
!= RTA_LENGTH(tbl
->key_len
))
1182 if (ndm
->ndm_flags
&NTF_PROXY
) {
1183 err
= pneigh_delete(tbl
, RTA_DATA(nda
[NDA_DST
-1]), dev
);
1190 n
= neigh_lookup(tbl
, RTA_DATA(nda
[NDA_DST
-1]), dev
);
1192 err
= neigh_update(n
, NULL
, NUD_FAILED
, 1, 0);
1200 read_unlock(&neigh_tbl_lock
);
1205 return -EADDRNOTAVAIL
;
1208 int neigh_add(struct sk_buff
*skb
, struct nlmsghdr
*nlh
, void *arg
)
1210 struct ndmsg
*ndm
= NLMSG_DATA(nlh
);
1211 struct rtattr
**nda
= arg
;
1212 struct neigh_table
*tbl
;
1213 struct net_device
*dev
= NULL
;
1215 if (ndm
->ndm_ifindex
) {
1216 if ((dev
= dev_get_by_index(ndm
->ndm_ifindex
)) == NULL
)
1220 read_lock(&neigh_tbl_lock
);
1221 for (tbl
=neigh_tables
; tbl
; tbl
= tbl
->next
) {
1223 struct neighbour
*n
;
1225 if (tbl
->family
!= ndm
->ndm_family
)
1227 read_unlock(&neigh_tbl_lock
);
1230 if (nda
[NDA_DST
-1] == NULL
||
1231 nda
[NDA_DST
-1]->rta_len
!= RTA_LENGTH(tbl
->key_len
))
1233 if (ndm
->ndm_flags
&NTF_PROXY
) {
1235 if (pneigh_lookup(tbl
, RTA_DATA(nda
[NDA_DST
-1]), dev
, 1))
1242 if (nda
[NDA_LLADDR
-1] != NULL
&&
1243 nda
[NDA_LLADDR
-1]->rta_len
!= RTA_LENGTH(dev
->addr_len
))
1245 n
= neigh_lookup(tbl
, RTA_DATA(nda
[NDA_DST
-1]), dev
);
1247 if (nlh
->nlmsg_flags
&NLM_F_EXCL
)
1249 } else if (!(nlh
->nlmsg_flags
&NLM_F_CREATE
))
1252 n
= __neigh_lookup(tbl
, RTA_DATA(nda
[NDA_DST
-1]), dev
, 1);
1257 err
= neigh_update(n
, nda
[NDA_LLADDR
-1] ? RTA_DATA(nda
[NDA_LLADDR
-1]) : NULL
,
1259 nlh
->nlmsg_flags
&NLM_F_REPLACE
, 0);
1268 read_unlock(&neigh_tbl_lock
);
1272 return -EADDRNOTAVAIL
;
1276 static int neigh_fill_info(struct sk_buff
*skb
, struct neighbour
*n
,
1277 u32 pid
, u32 seq
, int event
)
1279 unsigned long now
= jiffies
;
1281 struct nlmsghdr
*nlh
;
1282 unsigned char *b
= skb
->tail
;
1283 struct nda_cacheinfo ci
;
1286 nlh
= NLMSG_PUT(skb
, pid
, seq
, event
, sizeof(*ndm
));
1287 ndm
= NLMSG_DATA(nlh
);
1288 ndm
->ndm_family
= n
->ops
->family
;
1289 ndm
->ndm_flags
= n
->flags
;
1290 ndm
->ndm_type
= n
->type
;
1291 ndm
->ndm_ifindex
= n
->dev
->ifindex
;
1292 RTA_PUT(skb
, NDA_DST
, n
->tbl
->key_len
, n
->primary_key
);
1293 read_lock_bh(&n
->lock
);
1295 ndm
->ndm_state
= n
->nud_state
;
1296 if (n
->nud_state
&NUD_VALID
)
1297 RTA_PUT(skb
, NDA_LLADDR
, n
->dev
->addr_len
, n
->ha
);
1298 ci
.ndm_used
= now
- n
->used
;
1299 ci
.ndm_confirmed
= now
- n
->confirmed
;
1300 ci
.ndm_updated
= now
- n
->updated
;
1301 ci
.ndm_refcnt
= atomic_read(&n
->refcnt
) - 1;
1302 read_unlock_bh(&n
->lock
);
1304 RTA_PUT(skb
, NDA_CACHEINFO
, sizeof(ci
), &ci
);
1305 nlh
->nlmsg_len
= skb
->tail
- b
;
1311 read_unlock_bh(&n
->lock
);
1312 skb_trim(skb
, b
- skb
->data
);
1317 static int neigh_dump_table(struct neigh_table
*tbl
, struct sk_buff
*skb
, struct netlink_callback
*cb
)
1319 struct neighbour
*n
;
1324 s_idx
= idx
= cb
->args
[2];
1325 for (h
=0; h
<= NEIGH_HASHMASK
; h
++) {
1326 if (h
< s_h
) continue;
1329 read_lock_bh(&tbl
->lock
);
1330 for (n
= tbl
->hash_buckets
[h
], idx
= 0; n
;
1331 n
= n
->next
, idx
++) {
1334 if (neigh_fill_info(skb
, n
, NETLINK_CB(cb
->skb
).pid
,
1335 cb
->nlh
->nlmsg_seq
, RTM_NEWNEIGH
) <= 0) {
1336 read_unlock_bh(&tbl
->lock
);
1342 read_unlock_bh(&tbl
->lock
);
1350 int neigh_dump_info(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1354 struct neigh_table
*tbl
;
1355 int family
= ((struct rtgenmsg
*)NLMSG_DATA(cb
->nlh
))->rtgen_family
;
1359 read_lock(&neigh_tbl_lock
);
1360 for (tbl
=neigh_tables
, t
=0; tbl
; tbl
= tbl
->next
, t
++) {
1361 if (t
< s_t
) continue;
1362 if (family
&& tbl
->family
!= family
)
1365 memset(&cb
->args
[1], 0, sizeof(cb
->args
)-sizeof(cb
->args
[0]));
1366 if (neigh_dump_table(tbl
, skb
, cb
) < 0)
1369 read_unlock(&neigh_tbl_lock
);
1377 void neigh_app_ns(struct neighbour
*n
)
1379 struct sk_buff
*skb
;
1380 struct nlmsghdr
*nlh
;
1381 int size
= NLMSG_SPACE(sizeof(struct ndmsg
)+256);
1383 skb
= alloc_skb(size
, GFP_ATOMIC
);
1387 if (neigh_fill_info(skb
, n
, 0, 0, RTM_GETNEIGH
) < 0) {
1391 nlh
= (struct nlmsghdr
*)skb
->data
;
1392 nlh
->nlmsg_flags
= NLM_F_REQUEST
;
1393 NETLINK_CB(skb
).dst_groups
= RTMGRP_NEIGH
;
1394 netlink_broadcast(rtnl
, skb
, 0, RTMGRP_NEIGH
, GFP_ATOMIC
);
1397 static void neigh_app_notify(struct neighbour
*n
)
1399 struct sk_buff
*skb
;
1400 struct nlmsghdr
*nlh
;
1401 int size
= NLMSG_SPACE(sizeof(struct ndmsg
)+256);
1403 skb
= alloc_skb(size
, GFP_ATOMIC
);
1407 if (neigh_fill_info(skb
, n
, 0, 0, RTM_NEWNEIGH
) < 0) {
1411 nlh
= (struct nlmsghdr
*)skb
->data
;
1412 NETLINK_CB(skb
).dst_groups
= RTMGRP_NEIGH
;
1413 netlink_broadcast(rtnl
, skb
, 0, RTMGRP_NEIGH
, GFP_ATOMIC
);
1423 #ifdef CONFIG_SYSCTL
1425 struct neigh_sysctl_table
1427 struct ctl_table_header
*sysctl_header
;
1428 ctl_table neigh_vars
[17];
1429 ctl_table neigh_dev
[2];
1430 ctl_table neigh_neigh_dir
[2];
1431 ctl_table neigh_proto_dir
[2];
1432 ctl_table neigh_root_dir
[2];
1433 } neigh_sysctl_template
= {
1435 {{NET_NEIGH_MCAST_SOLICIT
, "mcast_solicit",
1436 NULL
, sizeof(int), 0644, NULL
,
1438 {NET_NEIGH_UCAST_SOLICIT
, "ucast_solicit",
1439 NULL
, sizeof(int), 0644, NULL
,
1441 {NET_NEIGH_APP_SOLICIT
, "app_solicit",
1442 NULL
, sizeof(int), 0644, NULL
,
1444 {NET_NEIGH_RETRANS_TIME
, "retrans_time",
1445 NULL
, sizeof(int), 0644, NULL
,
1447 {NET_NEIGH_REACHABLE_TIME
, "base_reachable_time",
1448 NULL
, sizeof(int), 0644, NULL
,
1449 &proc_dointvec_jiffies
},
1450 {NET_NEIGH_DELAY_PROBE_TIME
, "delay_first_probe_time",
1451 NULL
, sizeof(int), 0644, NULL
,
1452 &proc_dointvec_jiffies
},
1453 {NET_NEIGH_GC_STALE_TIME
, "gc_stale_time",
1454 NULL
, sizeof(int), 0644, NULL
,
1455 &proc_dointvec_jiffies
},
1456 {NET_NEIGH_UNRES_QLEN
, "unres_qlen",
1457 NULL
, sizeof(int), 0644, NULL
,
1459 {NET_NEIGH_PROXY_QLEN
, "proxy_qlen",
1460 NULL
, sizeof(int), 0644, NULL
,
1462 {NET_NEIGH_ANYCAST_DELAY
, "anycast_delay",
1463 NULL
, sizeof(int), 0644, NULL
,
1465 {NET_NEIGH_PROXY_DELAY
, "proxy_delay",
1466 NULL
, sizeof(int), 0644, NULL
,
1468 {NET_NEIGH_LOCKTIME
, "locktime",
1469 NULL
, sizeof(int), 0644, NULL
,
1471 {NET_NEIGH_GC_INTERVAL
, "gc_interval",
1472 NULL
, sizeof(int), 0644, NULL
,
1473 &proc_dointvec_jiffies
},
1474 {NET_NEIGH_GC_THRESH1
, "gc_thresh1",
1475 NULL
, sizeof(int), 0644, NULL
,
1477 {NET_NEIGH_GC_THRESH2
, "gc_thresh2",
1478 NULL
, sizeof(int), 0644, NULL
,
1480 {NET_NEIGH_GC_THRESH3
, "gc_thresh3",
1481 NULL
, sizeof(int), 0644, NULL
,
1485 {{NET_PROTO_CONF_DEFAULT
, "default", NULL
, 0, 0555, NULL
},{0}},
1486 {{0, "neigh", NULL
, 0, 0555, NULL
},{0}},
1487 {{0, NULL
, NULL
, 0, 0555, NULL
},{0}},
1488 {{CTL_NET
, "net", NULL
, 0, 0555, NULL
},{0}}
1491 int neigh_sysctl_register(struct net_device
*dev
, struct neigh_parms
*p
,
1492 int p_id
, int pdev_id
, char *p_name
)
1494 struct neigh_sysctl_table
*t
;
1496 t
= kmalloc(sizeof(*t
), GFP_KERNEL
);
1499 memcpy(t
, &neigh_sysctl_template
, sizeof(*t
));
1500 t
->neigh_vars
[0].data
= &p
->mcast_probes
;
1501 t
->neigh_vars
[1].data
= &p
->ucast_probes
;
1502 t
->neigh_vars
[2].data
= &p
->app_probes
;
1503 t
->neigh_vars
[3].data
= &p
->retrans_time
;
1504 t
->neigh_vars
[4].data
= &p
->base_reachable_time
;
1505 t
->neigh_vars
[5].data
= &p
->delay_probe_time
;
1506 t
->neigh_vars
[6].data
= &p
->gc_staletime
;
1507 t
->neigh_vars
[7].data
= &p
->queue_len
;
1508 t
->neigh_vars
[8].data
= &p
->proxy_qlen
;
1509 t
->neigh_vars
[9].data
= &p
->anycast_delay
;
1510 t
->neigh_vars
[10].data
= &p
->proxy_delay
;
1511 t
->neigh_vars
[11].data
= &p
->locktime
;
1513 t
->neigh_dev
[0].procname
= dev
->name
;
1514 t
->neigh_dev
[0].ctl_name
= dev
->ifindex
;
1515 memset(&t
->neigh_vars
[12], 0, sizeof(ctl_table
));
1517 t
->neigh_vars
[12].data
= (int*)(p
+1);
1518 t
->neigh_vars
[13].data
= (int*)(p
+1) + 1;
1519 t
->neigh_vars
[14].data
= (int*)(p
+1) + 2;
1520 t
->neigh_vars
[15].data
= (int*)(p
+1) + 3;
1522 t
->neigh_neigh_dir
[0].ctl_name
= pdev_id
;
1524 t
->neigh_proto_dir
[0].procname
= p_name
;
1525 t
->neigh_proto_dir
[0].ctl_name
= p_id
;
1527 t
->neigh_dev
[0].child
= t
->neigh_vars
;
1528 t
->neigh_neigh_dir
[0].child
= t
->neigh_dev
;
1529 t
->neigh_proto_dir
[0].child
= t
->neigh_neigh_dir
;
1530 t
->neigh_root_dir
[0].child
= t
->neigh_proto_dir
;
1532 t
->sysctl_header
= register_sysctl_table(t
->neigh_root_dir
, 0);
1533 if (t
->sysctl_header
== NULL
) {
1537 p
->sysctl_table
= t
;
1541 void neigh_sysctl_unregister(struct neigh_parms
*p
)
1543 if (p
->sysctl_table
) {
1544 struct neigh_sysctl_table
*t
= p
->sysctl_table
;
1545 p
->sysctl_table
= NULL
;
1546 unregister_sysctl_table(t
->sysctl_header
);
1551 #endif /* CONFIG_SYSCTL */