2 * Generic address resolution entity
5 * Pedro Roque <roque@di.fc.ul.pt>
6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
17 #include <linux/config.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/socket.h>
21 #include <linux/sched.h>
22 #include <linux/netdevice.h>
24 #include <linux/sysctl.h>
26 #include <net/neighbour.h>
29 #include <linux/rtnetlink.h>
33 #define NEIGH_PRINTK(x...) printk(x)
34 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
35 #define NEIGH_PRINTK0 NEIGH_PRINTK
36 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
37 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
41 #define NEIGH_PRINTK1 NEIGH_PRINTK
45 #define NEIGH_PRINTK2 NEIGH_PRINTK
48 static void neigh_timer_handler(unsigned long arg
);
50 static void neigh_app_notify(struct neighbour
*n
);
52 static int pneigh_ifdown(struct neigh_table
*tbl
, struct net_device
*dev
);
54 static int neigh_glbl_allocs
;
55 static struct neigh_table
*neigh_tables
;
57 #if defined(__i386__) && defined(CONFIG_SMP)
58 #define ASSERT_WL(n) if ((int)((n)->lock.lock) > 0) { printk("WL assertion failed at " __FILE__ "(%d):" __FUNCTION__ "\n", __LINE__); }
60 #define ASSERT_WL(n) do { } while(0)
64 Neighbour hash table buckets are protected with rwlock tbl->lock.
66 - All the scans/updates to hash buckets MUST be made under this lock.
67 - NOTHING clever should be made under this lock: no callbacks
68 to protocol backends, no attempts to send something to network.
69 It will result in deadlocks, if backend/driver wants to use neighbour
71 - If the entry requires some non-trivial actions, increase
72 its reference count and release table lock.
74 Neighbour entries are protected:
75 - with reference count.
76 - with rwlock neigh->lock
78 Reference count prevents destruction.
80 neigh->lock mainly serializes ll address data and its validity state.
81 However, the same lock is used to protect another entry fields:
85 Again, nothing clever shall be made under neigh->lock,
86 the most complicated procedure, which we allow is dev->hard_header.
87 It is supposed, that dev->hard_header is simplistic and does
88 not make callbacks to neighbour tables.
90 The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
91 list of neighbour tables. This list is used only in process context,
94 static rwlock_t neigh_tbl_lock
= RW_LOCK_UNLOCKED
;
96 static int neigh_blackhole(struct sk_buff
*skb
)
103 * It is random distribution in the interval (1/2)*base...(3/2)*base.
104 * It corresponds to default IPv6 settings and is not overridable,
105 * because it is really reasonbale choice.
108 unsigned long neigh_rand_reach_time(unsigned long base
)
110 return (net_random() % base
) + (base
>>1);
114 static int neigh_forced_gc(struct neigh_table
*tbl
)
119 for (i
=0; i
<=NEIGH_HASHMASK
; i
++) {
120 struct neighbour
*n
, **np
;
122 np
= &tbl
->hash_buckets
[i
];
123 write_lock_bh(&tbl
->lock
);
124 while ((n
= *np
) != NULL
) {
125 /* Neighbour record may be discarded if:
126 - nobody refers to it.
127 - it is not premanent
128 - (NEW and probably wrong)
129 INCOMPLETE entries are kept at least for
130 n->parms->retrans_time, otherwise we could
131 flood network with resolution requests.
132 It is not clear, what is better table overflow
135 write_lock(&n
->lock
);
136 if (atomic_read(&n
->refcnt
) == 1 &&
137 !(n
->nud_state
&NUD_PERMANENT
) &&
138 (n
->nud_state
!= NUD_INCOMPLETE
||
139 jiffies
- n
->used
> n
->parms
->retrans_time
)) {
143 write_unlock(&n
->lock
);
147 write_unlock(&n
->lock
);
150 write_unlock_bh(&tbl
->lock
);
153 tbl
->last_flush
= jiffies
;
157 static int neigh_del_timer(struct neighbour
*n
)
159 if (n
->nud_state
& NUD_IN_TIMER
) {
160 if (del_timer(&n
->timer
)) {
168 int neigh_ifdown(struct neigh_table
*tbl
, struct net_device
*dev
)
172 write_lock_bh(&tbl
->lock
);
174 for (i
=0; i
<=NEIGH_HASHMASK
; i
++) {
175 struct neighbour
*n
, **np
;
177 np
= &tbl
->hash_buckets
[i
];
178 while ((n
= *np
) != NULL
) {
179 if (dev
&& n
->dev
!= dev
) {
184 write_lock(&n
->lock
);
188 if (atomic_read(&n
->refcnt
) != 1) {
189 /* The most unpleasant situation.
190 We must destroy neighbour entry,
191 but someone still uses it.
193 The destroy will be delayed until
194 the last user releases us, but
195 we must kill timers etc. and move
198 n
->parms
= &tbl
->parms
;
199 skb_queue_purge(&n
->arp_queue
);
200 n
->output
= neigh_blackhole
;
201 if (n
->nud_state
&NUD_VALID
)
202 n
->nud_state
= NUD_NOARP
;
204 n
->nud_state
= NUD_NONE
;
205 NEIGH_PRINTK2("neigh %p is stray.\n", n
);
207 write_unlock(&n
->lock
);
212 skb_queue_purge(&tbl
->proxy_queue
);
213 pneigh_ifdown(tbl
, dev
);
214 write_unlock_bh(&tbl
->lock
);
216 del_timer_sync(&tbl
->proxy_timer
);
220 static struct neighbour
*neigh_alloc(struct neigh_table
*tbl
)
223 unsigned long now
= jiffies
;
225 if (tbl
->entries
> tbl
->gc_thresh3
||
226 (tbl
->entries
> tbl
->gc_thresh2
&&
227 now
- tbl
->last_flush
> 5*HZ
)) {
228 if (neigh_forced_gc(tbl
) == 0 &&
229 tbl
->entries
> tbl
->gc_thresh3
)
233 n
= kmem_cache_alloc(tbl
->kmem_cachep
, SLAB_ATOMIC
);
237 memset(n
, 0, tbl
->entry_size
);
239 skb_queue_head_init(&n
->arp_queue
);
240 n
->lock
= RW_LOCK_UNLOCKED
;
241 n
->updated
= n
->used
= now
;
242 n
->nud_state
= NUD_NONE
;
243 n
->output
= neigh_blackhole
;
244 n
->parms
= &tbl
->parms
;
245 init_timer(&n
->timer
);
246 n
->timer
.function
= neigh_timer_handler
;
247 n
->timer
.data
= (unsigned long)n
;
252 atomic_set(&n
->refcnt
, 1);
257 struct neighbour
*neigh_lookup(struct neigh_table
*tbl
, const void *pkey
,
258 struct net_device
*dev
)
262 int key_len
= tbl
->key_len
;
264 hash_val
= tbl
->hash(pkey
, dev
);
266 read_lock_bh(&tbl
->lock
);
267 for (n
= tbl
->hash_buckets
[hash_val
]; n
; n
= n
->next
) {
269 memcmp(n
->primary_key
, pkey
, key_len
) == 0) {
274 read_unlock_bh(&tbl
->lock
);
278 struct neighbour
* neigh_create(struct neigh_table
*tbl
, const void *pkey
,
279 struct net_device
*dev
)
281 struct neighbour
*n
, *n1
;
283 int key_len
= tbl
->key_len
;
286 n
= neigh_alloc(tbl
);
288 return ERR_PTR(-ENOBUFS
);
290 memcpy(n
->primary_key
, pkey
, key_len
);
294 /* Protocol specific setup. */
295 if (tbl
->constructor
&& (error
= tbl
->constructor(n
)) < 0) {
297 return ERR_PTR(error
);
300 /* Device specific setup. */
301 if (n
->parms
&& n
->parms
->neigh_setup
&&
302 (error
= n
->parms
->neigh_setup(n
)) < 0) {
304 return ERR_PTR(error
);
307 n
->confirmed
= jiffies
- (n
->parms
->base_reachable_time
<<1);
309 hash_val
= tbl
->hash(pkey
, dev
);
311 write_lock_bh(&tbl
->lock
);
312 for (n1
= tbl
->hash_buckets
[hash_val
]; n1
; n1
= n1
->next
) {
313 if (dev
== n1
->dev
&&
314 memcmp(n1
->primary_key
, pkey
, key_len
) == 0) {
316 write_unlock_bh(&tbl
->lock
);
322 n
->next
= tbl
->hash_buckets
[hash_val
];
323 tbl
->hash_buckets
[hash_val
] = n
;
326 write_unlock_bh(&tbl
->lock
);
327 NEIGH_PRINTK2("neigh %p is created.\n", n
);
331 struct pneigh_entry
* pneigh_lookup(struct neigh_table
*tbl
, const void *pkey
,
332 struct net_device
*dev
, int creat
)
334 struct pneigh_entry
*n
;
336 int key_len
= tbl
->key_len
;
338 hash_val
= *(u32
*)(pkey
+ key_len
- 4);
339 hash_val
^= (hash_val
>>16);
340 hash_val
^= hash_val
>>8;
341 hash_val
^= hash_val
>>4;
342 hash_val
&= PNEIGH_HASHMASK
;
344 read_lock_bh(&tbl
->lock
);
346 for (n
= tbl
->phash_buckets
[hash_val
]; n
; n
= n
->next
) {
347 if (memcmp(n
->key
, pkey
, key_len
) == 0 &&
348 (n
->dev
== dev
|| !n
->dev
)) {
349 read_unlock_bh(&tbl
->lock
);
353 read_unlock_bh(&tbl
->lock
);
357 n
= kmalloc(sizeof(*n
) + key_len
, GFP_KERNEL
);
361 memcpy(n
->key
, pkey
, key_len
);
364 if (tbl
->pconstructor
&& tbl
->pconstructor(n
)) {
369 write_lock_bh(&tbl
->lock
);
370 n
->next
= tbl
->phash_buckets
[hash_val
];
371 tbl
->phash_buckets
[hash_val
] = n
;
372 write_unlock_bh(&tbl
->lock
);
377 int pneigh_delete(struct neigh_table
*tbl
, const void *pkey
, struct net_device
*dev
)
379 struct pneigh_entry
*n
, **np
;
381 int key_len
= tbl
->key_len
;
383 hash_val
= *(u32
*)(pkey
+ key_len
- 4);
384 hash_val
^= (hash_val
>>16);
385 hash_val
^= hash_val
>>8;
386 hash_val
^= hash_val
>>4;
387 hash_val
&= PNEIGH_HASHMASK
;
389 for (np
= &tbl
->phash_buckets
[hash_val
]; (n
=*np
) != NULL
; np
= &n
->next
) {
390 if (memcmp(n
->key
, pkey
, key_len
) == 0 && n
->dev
== dev
) {
391 write_lock_bh(&tbl
->lock
);
393 write_unlock_bh(&tbl
->lock
);
394 if (tbl
->pdestructor
)
403 static int pneigh_ifdown(struct neigh_table
*tbl
, struct net_device
*dev
)
405 struct pneigh_entry
*n
, **np
;
408 for (h
=0; h
<=PNEIGH_HASHMASK
; h
++) {
409 np
= &tbl
->phash_buckets
[h
];
410 while ((n
=*np
) != NULL
) {
411 if (n
->dev
== dev
|| dev
== NULL
) {
413 if (tbl
->pdestructor
)
426 * neighbour must already be out of the table;
429 void neigh_destroy(struct neighbour
*neigh
)
434 printk("Destroying alive neighbour %p from %08lx\n", neigh
,
435 *(((unsigned long*)&neigh
)-1));
439 if (neigh_del_timer(neigh
))
440 printk("Impossible event.\n");
442 while ((hh
= neigh
->hh
) != NULL
) {
443 neigh
->hh
= hh
->hh_next
;
445 write_lock_bh(&hh
->hh_lock
);
446 hh
->hh_output
= neigh_blackhole
;
447 write_unlock_bh(&hh
->hh_lock
);
448 if (atomic_dec_and_test(&hh
->hh_refcnt
))
452 if (neigh
->ops
&& neigh
->ops
->destructor
)
453 (neigh
->ops
->destructor
)(neigh
);
455 skb_queue_purge(&neigh
->arp_queue
);
459 NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh
);
462 neigh
->tbl
->entries
--;
463 kmem_cache_free(neigh
->tbl
->kmem_cachep
, neigh
);
466 /* Neighbour state is suspicious;
469 Called with write_locked neigh.
471 static void neigh_suspect(struct neighbour
*neigh
)
475 NEIGH_PRINTK2("neigh %p is suspecteded.\n", neigh
);
479 neigh
->output
= neigh
->ops
->output
;
481 for (hh
= neigh
->hh
; hh
; hh
= hh
->hh_next
)
482 hh
->hh_output
= neigh
->ops
->output
;
485 /* Neighbour state is OK;
488 Called with write_locked neigh.
490 static void neigh_connect(struct neighbour
*neigh
)
494 NEIGH_PRINTK2("neigh %p is connected.\n", neigh
);
498 neigh
->output
= neigh
->ops
->connected_output
;
500 for (hh
= neigh
->hh
; hh
; hh
= hh
->hh_next
)
501 hh
->hh_output
= neigh
->ops
->hh_output
;
505 Transitions NUD_STALE <-> NUD_REACHABLE do not occur
506 when fast path is built: we have no timers assotiated with
507 these states, we do not have time to check state when sending.
508 neigh_periodic_timer check periodically neigh->confirmed
509 time and moves NUD_REACHABLE -> NUD_STALE.
511 If a routine wants to know TRUE entry state, it calls
512 neigh_sync before checking state.
514 Called with write_locked neigh.
517 static void neigh_sync(struct neighbour
*n
)
519 unsigned long now
= jiffies
;
520 u8 state
= n
->nud_state
;
523 if (state
&(NUD_NOARP
|NUD_PERMANENT
))
525 if (state
&NUD_REACHABLE
) {
526 if (now
- n
->confirmed
> n
->parms
->reachable_time
) {
527 n
->nud_state
= NUD_STALE
;
530 } else if (state
&NUD_VALID
) {
531 if (now
- n
->confirmed
< n
->parms
->reachable_time
) {
533 n
->nud_state
= NUD_REACHABLE
;
539 static void SMP_TIMER_NAME(neigh_periodic_timer
)(unsigned long arg
)
541 struct neigh_table
*tbl
= (struct neigh_table
*)arg
;
542 unsigned long now
= jiffies
;
546 write_lock(&tbl
->lock
);
549 * periodicly recompute ReachableTime from random function
552 if (now
- tbl
->last_rand
> 300*HZ
) {
553 struct neigh_parms
*p
;
554 tbl
->last_rand
= now
;
555 for (p
=&tbl
->parms
; p
; p
= p
->next
)
556 p
->reachable_time
= neigh_rand_reach_time(p
->base_reachable_time
);
559 for (i
=0; i
<= NEIGH_HASHMASK
; i
++) {
560 struct neighbour
*n
, **np
;
562 np
= &tbl
->hash_buckets
[i
];
563 while ((n
= *np
) != NULL
) {
566 write_lock(&n
->lock
);
568 state
= n
->nud_state
;
569 if (state
&(NUD_PERMANENT
|NUD_IN_TIMER
)) {
570 write_unlock(&n
->lock
);
574 if ((long)(n
->used
- n
->confirmed
) < 0)
575 n
->used
= n
->confirmed
;
577 if (atomic_read(&n
->refcnt
) == 1 &&
578 (state
== NUD_FAILED
|| now
- n
->used
> n
->parms
->gc_staletime
)) {
581 write_unlock(&n
->lock
);
586 if (n
->nud_state
&NUD_REACHABLE
&&
587 now
- n
->confirmed
> n
->parms
->reachable_time
) {
588 n
->nud_state
= NUD_STALE
;
591 write_unlock(&n
->lock
);
598 mod_timer(&tbl
->gc_timer
, now
+ tbl
->gc_interval
);
599 write_unlock(&tbl
->lock
);
603 static void neigh_periodic_timer(unsigned long arg
)
605 struct neigh_table
*tbl
= (struct neigh_table
*)arg
;
607 tasklet_schedule(&tbl
->gc_task
);
611 static __inline__
int neigh_max_probes(struct neighbour
*n
)
613 struct neigh_parms
*p
= n
->parms
;
614 return p
->ucast_probes
+ p
->app_probes
+ p
->mcast_probes
;
618 /* Called when a timer expires for a neighbour entry. */
620 static void neigh_timer_handler(unsigned long arg
)
622 unsigned long now
= jiffies
;
623 struct neighbour
*neigh
= (struct neighbour
*)arg
;
627 write_lock(&neigh
->lock
);
629 state
= neigh
->nud_state
;
631 if (!(state
&NUD_IN_TIMER
)) {
633 printk("neigh: timer & !nud_in_timer\n");
638 if ((state
&NUD_VALID
) &&
639 now
- neigh
->confirmed
< neigh
->parms
->reachable_time
) {
640 neigh
->nud_state
= NUD_REACHABLE
;
641 NEIGH_PRINTK2("neigh %p is still alive.\n", neigh
);
642 neigh_connect(neigh
);
645 if (state
== NUD_DELAY
) {
646 NEIGH_PRINTK2("neigh %p is probed.\n", neigh
);
647 neigh
->nud_state
= NUD_PROBE
;
648 atomic_set(&neigh
->probes
, 0);
651 if (atomic_read(&neigh
->probes
) >= neigh_max_probes(neigh
)) {
654 neigh
->nud_state
= NUD_FAILED
;
656 neigh
->tbl
->stats
.res_failed
++;
657 NEIGH_PRINTK2("neigh %p is failed.\n", neigh
);
659 /* It is very thin place. report_unreachable is very complicated
660 routine. Particularly, it can hit the same neighbour entry!
662 So that, we try to be accurate and avoid dead loop. --ANK
664 while(neigh
->nud_state
==NUD_FAILED
&& (skb
=__skb_dequeue(&neigh
->arp_queue
)) != NULL
) {
665 write_unlock(&neigh
->lock
);
666 neigh
->ops
->error_report(neigh
, skb
);
667 write_lock(&neigh
->lock
);
669 skb_queue_purge(&neigh
->arp_queue
);
673 neigh
->timer
.expires
= now
+ neigh
->parms
->retrans_time
;
674 add_timer(&neigh
->timer
);
675 write_unlock(&neigh
->lock
);
677 neigh
->ops
->solicit(neigh
, skb_peek(&neigh
->arp_queue
));
678 atomic_inc(&neigh
->probes
);
682 write_unlock(&neigh
->lock
);
684 if (notify
&& neigh
->parms
->app_probes
)
685 neigh_app_notify(neigh
);
687 neigh_release(neigh
);
690 int __neigh_event_send(struct neighbour
*neigh
, struct sk_buff
*skb
)
692 write_lock_bh(&neigh
->lock
);
693 if (!(neigh
->nud_state
&(NUD_CONNECTED
|NUD_DELAY
|NUD_PROBE
))) {
694 if (!(neigh
->nud_state
&(NUD_STALE
|NUD_INCOMPLETE
))) {
695 if (neigh
->parms
->mcast_probes
+ neigh
->parms
->app_probes
) {
696 atomic_set(&neigh
->probes
, neigh
->parms
->ucast_probes
);
697 neigh
->nud_state
= NUD_INCOMPLETE
;
699 neigh
->timer
.expires
= jiffies
+ neigh
->parms
->retrans_time
;
700 add_timer(&neigh
->timer
);
701 write_unlock_bh(&neigh
->lock
);
702 neigh
->ops
->solicit(neigh
, skb
);
703 atomic_inc(&neigh
->probes
);
704 write_lock_bh(&neigh
->lock
);
706 neigh
->nud_state
= NUD_FAILED
;
707 write_unlock_bh(&neigh
->lock
);
714 if (neigh
->nud_state
== NUD_INCOMPLETE
) {
716 if (skb_queue_len(&neigh
->arp_queue
) >= neigh
->parms
->queue_len
) {
717 struct sk_buff
*buff
;
718 buff
= neigh
->arp_queue
.prev
;
719 __skb_unlink(buff
, &neigh
->arp_queue
);
722 __skb_queue_head(&neigh
->arp_queue
, skb
);
724 write_unlock_bh(&neigh
->lock
);
727 if (neigh
->nud_state
== NUD_STALE
) {
728 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh
);
730 neigh
->nud_state
= NUD_DELAY
;
731 neigh
->timer
.expires
= jiffies
+ neigh
->parms
->delay_probe_time
;
732 add_timer(&neigh
->timer
);
735 write_unlock_bh(&neigh
->lock
);
739 static __inline__
void neigh_update_hhs(struct neighbour
*neigh
)
742 void (*update
)(struct hh_cache
*, struct net_device
*, unsigned char*) =
743 neigh
->dev
->header_cache_update
;
746 for (hh
=neigh
->hh
; hh
; hh
=hh
->hh_next
) {
747 write_lock_bh(&hh
->hh_lock
);
748 update(hh
, neigh
->dev
, neigh
->ha
);
749 write_unlock_bh(&hh
->hh_lock
);
756 /* Generic update routine.
757 -- lladdr is new lladdr or NULL, if it is not supplied.
759 -- override==1 allows to override existing lladdr, if it is different.
760 -- arp==0 means that the change is administrative.
762 Caller MUST hold reference count on the entry.
765 int neigh_update(struct neighbour
*neigh
, const u8
*lladdr
, u8
new, int override
, int arp
)
770 struct net_device
*dev
= neigh
->dev
;
772 write_lock_bh(&neigh
->lock
);
773 old
= neigh
->nud_state
;
776 if (arp
&& (old
&(NUD_NOARP
|NUD_PERMANENT
)))
779 if (!(new&NUD_VALID
)) {
780 neigh_del_timer(neigh
);
781 if (old
&NUD_CONNECTED
)
782 neigh_suspect(neigh
);
783 neigh
->nud_state
= new;
785 notify
= old
&NUD_VALID
;
789 /* Compare new lladdr with cached one */
790 if (dev
->addr_len
== 0) {
791 /* First case: device needs no address. */
794 /* The second case: if something is already cached
795 and a new address is proposed:
797 - if they are different, check override flag
800 if (memcmp(lladdr
, neigh
->ha
, dev
->addr_len
) == 0)
806 /* No address is supplied; if we know something,
807 use it, otherwise discard the request.
810 if (!(old
&NUD_VALID
))
816 old
= neigh
->nud_state
;
817 if (new&NUD_CONNECTED
)
818 neigh
->confirmed
= jiffies
;
819 neigh
->updated
= jiffies
;
821 /* If entry was valid and address is not changed,
822 do not change entry state, if new one is STALE.
826 if (lladdr
== neigh
->ha
)
827 if (new == old
|| (new == NUD_STALE
&& (old
&NUD_CONNECTED
)))
830 neigh_del_timer(neigh
);
831 neigh
->nud_state
= new;
832 if (lladdr
!= neigh
->ha
) {
833 memcpy(&neigh
->ha
, lladdr
, dev
->addr_len
);
834 neigh_update_hhs(neigh
);
835 neigh
->confirmed
= jiffies
- (neigh
->parms
->base_reachable_time
<<1);
842 if (new&NUD_CONNECTED
)
843 neigh_connect(neigh
);
845 neigh_suspect(neigh
);
846 if (!(old
&NUD_VALID
)) {
849 /* Again: avoid dead loop if something went wrong */
851 while (neigh
->nud_state
&NUD_VALID
&&
852 (skb
=__skb_dequeue(&neigh
->arp_queue
)) != NULL
) {
853 struct neighbour
*n1
= neigh
;
854 write_unlock_bh(&neigh
->lock
);
855 /* On shaper/eql skb->dst->neighbour != neigh :( */
856 if (skb
->dst
&& skb
->dst
->neighbour
)
857 n1
= skb
->dst
->neighbour
;
859 write_lock_bh(&neigh
->lock
);
861 skb_queue_purge(&neigh
->arp_queue
);
864 write_unlock_bh(&neigh
->lock
);
866 if (notify
&& neigh
->parms
->app_probes
)
867 neigh_app_notify(neigh
);
872 struct neighbour
* neigh_event_ns(struct neigh_table
*tbl
,
873 u8
*lladdr
, void *saddr
,
874 struct net_device
*dev
)
876 struct neighbour
*neigh
;
878 neigh
= __neigh_lookup(tbl
, saddr
, dev
, lladdr
|| !dev
->addr_len
);
880 neigh_update(neigh
, lladdr
, NUD_STALE
, 1, 1);
884 static void neigh_hh_init(struct neighbour
*n
, struct dst_entry
*dst
, u16 protocol
)
886 struct hh_cache
*hh
= NULL
;
887 struct net_device
*dev
= dst
->dev
;
889 for (hh
=n
->hh
; hh
; hh
= hh
->hh_next
)
890 if (hh
->hh_type
== protocol
)
893 if (!hh
&& (hh
= kmalloc(sizeof(*hh
), GFP_ATOMIC
)) != NULL
) {
894 memset(hh
, 0, sizeof(struct hh_cache
));
895 hh
->hh_lock
= RW_LOCK_UNLOCKED
;
896 hh
->hh_type
= protocol
;
897 atomic_set(&hh
->hh_refcnt
, 0);
899 if (dev
->hard_header_cache(n
, hh
)) {
903 atomic_inc(&hh
->hh_refcnt
);
906 if (n
->nud_state
&NUD_CONNECTED
)
907 hh
->hh_output
= n
->ops
->hh_output
;
909 hh
->hh_output
= n
->ops
->output
;
913 atomic_inc(&hh
->hh_refcnt
);
918 /* This function can be used in contexts, where only old dev_queue_xmit
919 worked, f.e. if you want to override normal output path (eql, shaper),
920 but resoltution is not made yet.
923 int neigh_compat_output(struct sk_buff
*skb
)
925 struct net_device
*dev
= skb
->dev
;
927 __skb_pull(skb
, skb
->nh
.raw
- skb
->data
);
929 if (dev
->hard_header
&&
930 dev
->hard_header(skb
, dev
, ntohs(skb
->protocol
), NULL
, NULL
, skb
->len
) < 0 &&
931 dev
->rebuild_header(skb
))
934 return dev_queue_xmit(skb
);
937 /* Slow and careful. */
939 int neigh_resolve_output(struct sk_buff
*skb
)
941 struct dst_entry
*dst
= skb
->dst
;
942 struct neighbour
*neigh
;
944 if (!dst
|| !(neigh
= dst
->neighbour
))
947 __skb_pull(skb
, skb
->nh
.raw
- skb
->data
);
949 if (neigh_event_send(neigh
, skb
) == 0) {
951 struct net_device
*dev
= neigh
->dev
;
952 if (dev
->hard_header_cache
&& dst
->hh
== NULL
) {
953 write_lock_bh(&neigh
->lock
);
955 neigh_hh_init(neigh
, dst
, dst
->ops
->protocol
);
956 err
= dev
->hard_header(skb
, dev
, ntohs(skb
->protocol
), neigh
->ha
, NULL
, skb
->len
);
957 write_unlock_bh(&neigh
->lock
);
959 read_lock_bh(&neigh
->lock
);
960 err
= dev
->hard_header(skb
, dev
, ntohs(skb
->protocol
), neigh
->ha
, NULL
, skb
->len
);
961 read_unlock_bh(&neigh
->lock
);
964 return neigh
->ops
->queue_xmit(skb
);
971 NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", dst
, dst
? dst
->neighbour
: NULL
);
976 /* As fast as possible without hh cache */
978 int neigh_connected_output(struct sk_buff
*skb
)
981 struct dst_entry
*dst
= skb
->dst
;
982 struct neighbour
*neigh
= dst
->neighbour
;
983 struct net_device
*dev
= neigh
->dev
;
985 __skb_pull(skb
, skb
->nh
.raw
- skb
->data
);
987 read_lock_bh(&neigh
->lock
);
988 err
= dev
->hard_header(skb
, dev
, ntohs(skb
->protocol
), neigh
->ha
, NULL
, skb
->len
);
989 read_unlock_bh(&neigh
->lock
);
991 return neigh
->ops
->queue_xmit(skb
);
996 static void neigh_proxy_process(unsigned long arg
)
998 struct neigh_table
*tbl
= (struct neigh_table
*)arg
;
1000 unsigned long now
= jiffies
;
1001 struct sk_buff
*skb
= tbl
->proxy_queue
.next
;
1003 while (skb
!= (struct sk_buff
*)&tbl
->proxy_queue
) {
1004 struct sk_buff
*back
= skb
;
1005 long tdif
= back
->stamp
.tv_usec
- now
;
1009 __skb_unlink(back
, &tbl
->proxy_queue
);
1010 if (tbl
->proxy_redo
)
1011 tbl
->proxy_redo(back
);
1014 } else if (!sched_next
|| tdif
< sched_next
)
1017 del_timer(&tbl
->proxy_timer
);
1019 tbl
->proxy_timer
.expires
= jiffies
+ sched_next
;
1020 add_timer(&tbl
->proxy_timer
);
1024 void pneigh_enqueue(struct neigh_table
*tbl
, struct neigh_parms
*p
,
1025 struct sk_buff
*skb
)
1027 unsigned long now
= jiffies
;
1028 long sched_next
= net_random()%p
->proxy_delay
;
1030 if (tbl
->proxy_queue
.qlen
> p
->proxy_qlen
) {
1034 skb
->stamp
.tv_sec
= 0;
1035 skb
->stamp
.tv_usec
= now
+ sched_next
;
1036 if (del_timer(&tbl
->proxy_timer
)) {
1037 long tval
= tbl
->proxy_timer
.expires
- now
;
1038 if (tval
< sched_next
)
1041 tbl
->proxy_timer
.expires
= now
+ sched_next
;
1042 dst_release(skb
->dst
);
1044 __skb_queue_tail(&tbl
->proxy_queue
, skb
);
1045 add_timer(&tbl
->proxy_timer
);
1049 struct neigh_parms
*neigh_parms_alloc(struct net_device
*dev
, struct neigh_table
*tbl
)
1051 struct neigh_parms
*p
;
1052 p
= kmalloc(sizeof(*p
), GFP_KERNEL
);
1054 memcpy(p
, &tbl
->parms
, sizeof(*p
));
1056 p
->reachable_time
= neigh_rand_reach_time(p
->base_reachable_time
);
1057 if (dev
&& dev
->neigh_setup
) {
1058 if (dev
->neigh_setup(dev
, p
)) {
1063 write_lock_bh(&tbl
->lock
);
1064 p
->next
= tbl
->parms
.next
;
1065 tbl
->parms
.next
= p
;
1066 write_unlock_bh(&tbl
->lock
);
1071 void neigh_parms_release(struct neigh_table
*tbl
, struct neigh_parms
*parms
)
1073 struct neigh_parms
**p
;
1075 if (parms
== NULL
|| parms
== &tbl
->parms
)
1077 write_lock_bh(&tbl
->lock
);
1078 for (p
= &tbl
->parms
.next
; *p
; p
= &(*p
)->next
) {
1081 write_unlock_bh(&tbl
->lock
);
1082 #ifdef CONFIG_SYSCTL
1083 neigh_sysctl_unregister(parms
);
1089 write_unlock_bh(&tbl
->lock
);
1090 NEIGH_PRINTK1("neigh_parms_release: not found\n");
1094 void neigh_table_init(struct neigh_table
*tbl
)
1096 unsigned long now
= jiffies
;
1098 tbl
->parms
.reachable_time
= neigh_rand_reach_time(tbl
->parms
.base_reachable_time
);
1100 if (tbl
->kmem_cachep
== NULL
)
1101 tbl
->kmem_cachep
= kmem_cache_create(tbl
->id
,
1102 (tbl
->entry_size
+15)&~15,
1103 0, SLAB_HWCACHE_ALIGN
,
1107 tasklet_init(&tbl
->gc_task
, SMP_TIMER_NAME(neigh_periodic_timer
), (unsigned long)tbl
);
1109 init_timer(&tbl
->gc_timer
);
1110 tbl
->lock
= RW_LOCK_UNLOCKED
;
1111 tbl
->gc_timer
.data
= (unsigned long)tbl
;
1112 tbl
->gc_timer
.function
= neigh_periodic_timer
;
1113 tbl
->gc_timer
.expires
= now
+ tbl
->gc_interval
+ tbl
->parms
.reachable_time
;
1114 add_timer(&tbl
->gc_timer
);
1116 init_timer(&tbl
->proxy_timer
);
1117 tbl
->proxy_timer
.data
= (unsigned long)tbl
;
1118 tbl
->proxy_timer
.function
= neigh_proxy_process
;
1119 skb_queue_head_init(&tbl
->proxy_queue
);
1121 tbl
->last_flush
= now
;
1122 tbl
->last_rand
= now
+ tbl
->parms
.reachable_time
*20;
1123 write_lock(&neigh_tbl_lock
);
1124 tbl
->next
= neigh_tables
;
1126 write_unlock(&neigh_tbl_lock
);
1129 int neigh_table_clear(struct neigh_table
*tbl
)
1131 struct neigh_table
**tp
;
1133 /* It is not clean... Fix it to unload IPv6 module safely */
1134 del_timer_sync(&tbl
->gc_timer
);
1135 tasklet_kill(&tbl
->gc_task
);
1136 del_timer_sync(&tbl
->proxy_timer
);
1137 skb_queue_purge(&tbl
->proxy_queue
);
1138 neigh_ifdown(tbl
, NULL
);
1140 printk(KERN_CRIT
"neighbour leakage\n");
1141 write_lock(&neigh_tbl_lock
);
1142 for (tp
= &neigh_tables
; *tp
; tp
= &(*tp
)->next
) {
1148 write_unlock(&neigh_tbl_lock
);
1149 #ifdef CONFIG_SYSCTL
1150 neigh_sysctl_unregister(&tbl
->parms
);
1155 #ifdef CONFIG_RTNETLINK
1158 int neigh_delete(struct sk_buff
*skb
, struct nlmsghdr
*nlh
, void *arg
)
1160 struct ndmsg
*ndm
= NLMSG_DATA(nlh
);
1161 struct rtattr
**nda
= arg
;
1162 struct neigh_table
*tbl
;
1163 struct net_device
*dev
= NULL
;
1166 if (ndm
->ndm_ifindex
) {
1167 if ((dev
= dev_get_by_index(ndm
->ndm_ifindex
)) == NULL
)
1171 read_lock(&neigh_tbl_lock
);
1172 for (tbl
=neigh_tables
; tbl
; tbl
= tbl
->next
) {
1173 struct neighbour
*n
;
1175 if (tbl
->family
!= ndm
->ndm_family
)
1177 read_unlock(&neigh_tbl_lock
);
1180 if (nda
[NDA_DST
-1] == NULL
||
1181 nda
[NDA_DST
-1]->rta_len
!= RTA_LENGTH(tbl
->key_len
))
1184 if (ndm
->ndm_flags
&NTF_PROXY
) {
1185 err
= pneigh_delete(tbl
, RTA_DATA(nda
[NDA_DST
-1]), dev
);
1192 n
= neigh_lookup(tbl
, RTA_DATA(nda
[NDA_DST
-1]), dev
);
1194 err
= neigh_update(n
, NULL
, NUD_FAILED
, 1, 0);
1202 read_unlock(&neigh_tbl_lock
);
1207 return -EADDRNOTAVAIL
;
1210 int neigh_add(struct sk_buff
*skb
, struct nlmsghdr
*nlh
, void *arg
)
1212 struct ndmsg
*ndm
= NLMSG_DATA(nlh
);
1213 struct rtattr
**nda
= arg
;
1214 struct neigh_table
*tbl
;
1215 struct net_device
*dev
= NULL
;
1217 if (ndm
->ndm_ifindex
) {
1218 if ((dev
= dev_get_by_index(ndm
->ndm_ifindex
)) == NULL
)
1222 read_lock(&neigh_tbl_lock
);
1223 for (tbl
=neigh_tables
; tbl
; tbl
= tbl
->next
) {
1225 struct neighbour
*n
;
1227 if (tbl
->family
!= ndm
->ndm_family
)
1229 read_unlock(&neigh_tbl_lock
);
1232 if (nda
[NDA_DST
-1] == NULL
||
1233 nda
[NDA_DST
-1]->rta_len
!= RTA_LENGTH(tbl
->key_len
))
1235 if (ndm
->ndm_flags
&NTF_PROXY
) {
1237 if (pneigh_lookup(tbl
, RTA_DATA(nda
[NDA_DST
-1]), dev
, 1))
1244 if (nda
[NDA_LLADDR
-1] != NULL
&&
1245 nda
[NDA_LLADDR
-1]->rta_len
!= RTA_LENGTH(dev
->addr_len
))
1248 n
= neigh_lookup(tbl
, RTA_DATA(nda
[NDA_DST
-1]), dev
);
1250 if (nlh
->nlmsg_flags
&NLM_F_EXCL
)
1252 } else if (!(nlh
->nlmsg_flags
&NLM_F_CREATE
))
1255 n
= __neigh_lookup_errno(tbl
, RTA_DATA(nda
[NDA_DST
-1]), dev
);
1262 err
= neigh_update(n
, nda
[NDA_LLADDR
-1] ? RTA_DATA(nda
[NDA_LLADDR
-1]) : NULL
,
1264 nlh
->nlmsg_flags
&NLM_F_REPLACE
, 0);
1273 read_unlock(&neigh_tbl_lock
);
1277 return -EADDRNOTAVAIL
;
1281 static int neigh_fill_info(struct sk_buff
*skb
, struct neighbour
*n
,
1282 u32 pid
, u32 seq
, int event
)
1284 unsigned long now
= jiffies
;
1286 struct nlmsghdr
*nlh
;
1287 unsigned char *b
= skb
->tail
;
1288 struct nda_cacheinfo ci
;
1291 nlh
= NLMSG_PUT(skb
, pid
, seq
, event
, sizeof(*ndm
));
1292 ndm
= NLMSG_DATA(nlh
);
1293 ndm
->ndm_family
= n
->ops
->family
;
1294 ndm
->ndm_flags
= n
->flags
;
1295 ndm
->ndm_type
= n
->type
;
1296 ndm
->ndm_ifindex
= n
->dev
->ifindex
;
1297 RTA_PUT(skb
, NDA_DST
, n
->tbl
->key_len
, n
->primary_key
);
1298 read_lock_bh(&n
->lock
);
1300 ndm
->ndm_state
= n
->nud_state
;
1301 if (n
->nud_state
&NUD_VALID
)
1302 RTA_PUT(skb
, NDA_LLADDR
, n
->dev
->addr_len
, n
->ha
);
1303 ci
.ndm_used
= now
- n
->used
;
1304 ci
.ndm_confirmed
= now
- n
->confirmed
;
1305 ci
.ndm_updated
= now
- n
->updated
;
1306 ci
.ndm_refcnt
= atomic_read(&n
->refcnt
) - 1;
1307 read_unlock_bh(&n
->lock
);
1309 RTA_PUT(skb
, NDA_CACHEINFO
, sizeof(ci
), &ci
);
1310 nlh
->nlmsg_len
= skb
->tail
- b
;
1316 read_unlock_bh(&n
->lock
);
1317 skb_trim(skb
, b
- skb
->data
);
1322 static int neigh_dump_table(struct neigh_table
*tbl
, struct sk_buff
*skb
, struct netlink_callback
*cb
)
1324 struct neighbour
*n
;
1329 s_idx
= idx
= cb
->args
[2];
1330 for (h
=0; h
<= NEIGH_HASHMASK
; h
++) {
1331 if (h
< s_h
) continue;
1334 read_lock_bh(&tbl
->lock
);
1335 for (n
= tbl
->hash_buckets
[h
], idx
= 0; n
;
1336 n
= n
->next
, idx
++) {
1339 if (neigh_fill_info(skb
, n
, NETLINK_CB(cb
->skb
).pid
,
1340 cb
->nlh
->nlmsg_seq
, RTM_NEWNEIGH
) <= 0) {
1341 read_unlock_bh(&tbl
->lock
);
1347 read_unlock_bh(&tbl
->lock
);
1355 int neigh_dump_info(struct sk_buff
*skb
, struct netlink_callback
*cb
)
1359 struct neigh_table
*tbl
;
1360 int family
= ((struct rtgenmsg
*)NLMSG_DATA(cb
->nlh
))->rtgen_family
;
1364 read_lock(&neigh_tbl_lock
);
1365 for (tbl
=neigh_tables
, t
=0; tbl
; tbl
= tbl
->next
, t
++) {
1366 if (t
< s_t
) continue;
1367 if (family
&& tbl
->family
!= family
)
1370 memset(&cb
->args
[1], 0, sizeof(cb
->args
)-sizeof(cb
->args
[0]));
1371 if (neigh_dump_table(tbl
, skb
, cb
) < 0)
1374 read_unlock(&neigh_tbl_lock
);
1382 void neigh_app_ns(struct neighbour
*n
)
1384 struct sk_buff
*skb
;
1385 struct nlmsghdr
*nlh
;
1386 int size
= NLMSG_SPACE(sizeof(struct ndmsg
)+256);
1388 skb
= alloc_skb(size
, GFP_ATOMIC
);
1392 if (neigh_fill_info(skb
, n
, 0, 0, RTM_GETNEIGH
) < 0) {
1396 nlh
= (struct nlmsghdr
*)skb
->data
;
1397 nlh
->nlmsg_flags
= NLM_F_REQUEST
;
1398 NETLINK_CB(skb
).dst_groups
= RTMGRP_NEIGH
;
1399 netlink_broadcast(rtnl
, skb
, 0, RTMGRP_NEIGH
, GFP_ATOMIC
);
1402 static void neigh_app_notify(struct neighbour
*n
)
1404 struct sk_buff
*skb
;
1405 struct nlmsghdr
*nlh
;
1406 int size
= NLMSG_SPACE(sizeof(struct ndmsg
)+256);
1408 skb
= alloc_skb(size
, GFP_ATOMIC
);
1412 if (neigh_fill_info(skb
, n
, 0, 0, RTM_NEWNEIGH
) < 0) {
1416 nlh
= (struct nlmsghdr
*)skb
->data
;
1417 NETLINK_CB(skb
).dst_groups
= RTMGRP_NEIGH
;
1418 netlink_broadcast(rtnl
, skb
, 0, RTMGRP_NEIGH
, GFP_ATOMIC
);
1428 #ifdef CONFIG_SYSCTL
1430 struct neigh_sysctl_table
1432 struct ctl_table_header
*sysctl_header
;
1433 ctl_table neigh_vars
[17];
1434 ctl_table neigh_dev
[2];
1435 ctl_table neigh_neigh_dir
[2];
1436 ctl_table neigh_proto_dir
[2];
1437 ctl_table neigh_root_dir
[2];
1438 } neigh_sysctl_template
= {
1440 {{NET_NEIGH_MCAST_SOLICIT
, "mcast_solicit",
1441 NULL
, sizeof(int), 0644, NULL
,
1443 {NET_NEIGH_UCAST_SOLICIT
, "ucast_solicit",
1444 NULL
, sizeof(int), 0644, NULL
,
1446 {NET_NEIGH_APP_SOLICIT
, "app_solicit",
1447 NULL
, sizeof(int), 0644, NULL
,
1449 {NET_NEIGH_RETRANS_TIME
, "retrans_time",
1450 NULL
, sizeof(int), 0644, NULL
,
1452 {NET_NEIGH_REACHABLE_TIME
, "base_reachable_time",
1453 NULL
, sizeof(int), 0644, NULL
,
1454 &proc_dointvec_jiffies
},
1455 {NET_NEIGH_DELAY_PROBE_TIME
, "delay_first_probe_time",
1456 NULL
, sizeof(int), 0644, NULL
,
1457 &proc_dointvec_jiffies
},
1458 {NET_NEIGH_GC_STALE_TIME
, "gc_stale_time",
1459 NULL
, sizeof(int), 0644, NULL
,
1460 &proc_dointvec_jiffies
},
1461 {NET_NEIGH_UNRES_QLEN
, "unres_qlen",
1462 NULL
, sizeof(int), 0644, NULL
,
1464 {NET_NEIGH_PROXY_QLEN
, "proxy_qlen",
1465 NULL
, sizeof(int), 0644, NULL
,
1467 {NET_NEIGH_ANYCAST_DELAY
, "anycast_delay",
1468 NULL
, sizeof(int), 0644, NULL
,
1470 {NET_NEIGH_PROXY_DELAY
, "proxy_delay",
1471 NULL
, sizeof(int), 0644, NULL
,
1473 {NET_NEIGH_LOCKTIME
, "locktime",
1474 NULL
, sizeof(int), 0644, NULL
,
1476 {NET_NEIGH_GC_INTERVAL
, "gc_interval",
1477 NULL
, sizeof(int), 0644, NULL
,
1478 &proc_dointvec_jiffies
},
1479 {NET_NEIGH_GC_THRESH1
, "gc_thresh1",
1480 NULL
, sizeof(int), 0644, NULL
,
1482 {NET_NEIGH_GC_THRESH2
, "gc_thresh2",
1483 NULL
, sizeof(int), 0644, NULL
,
1485 {NET_NEIGH_GC_THRESH3
, "gc_thresh3",
1486 NULL
, sizeof(int), 0644, NULL
,
1490 {{NET_PROTO_CONF_DEFAULT
, "default", NULL
, 0, 0555, NULL
},{0}},
1491 {{0, "neigh", NULL
, 0, 0555, NULL
},{0}},
1492 {{0, NULL
, NULL
, 0, 0555, NULL
},{0}},
1493 {{CTL_NET
, "net", NULL
, 0, 0555, NULL
},{0}}
1496 int neigh_sysctl_register(struct net_device
*dev
, struct neigh_parms
*p
,
1497 int p_id
, int pdev_id
, char *p_name
)
1499 struct neigh_sysctl_table
*t
;
1501 t
= kmalloc(sizeof(*t
), GFP_KERNEL
);
1504 memcpy(t
, &neigh_sysctl_template
, sizeof(*t
));
1505 t
->neigh_vars
[0].data
= &p
->mcast_probes
;
1506 t
->neigh_vars
[1].data
= &p
->ucast_probes
;
1507 t
->neigh_vars
[2].data
= &p
->app_probes
;
1508 t
->neigh_vars
[3].data
= &p
->retrans_time
;
1509 t
->neigh_vars
[4].data
= &p
->base_reachable_time
;
1510 t
->neigh_vars
[5].data
= &p
->delay_probe_time
;
1511 t
->neigh_vars
[6].data
= &p
->gc_staletime
;
1512 t
->neigh_vars
[7].data
= &p
->queue_len
;
1513 t
->neigh_vars
[8].data
= &p
->proxy_qlen
;
1514 t
->neigh_vars
[9].data
= &p
->anycast_delay
;
1515 t
->neigh_vars
[10].data
= &p
->proxy_delay
;
1516 t
->neigh_vars
[11].data
= &p
->locktime
;
1518 t
->neigh_dev
[0].procname
= dev
->name
;
1519 t
->neigh_dev
[0].ctl_name
= dev
->ifindex
;
1520 memset(&t
->neigh_vars
[12], 0, sizeof(ctl_table
));
1522 t
->neigh_vars
[12].data
= (int*)(p
+1);
1523 t
->neigh_vars
[13].data
= (int*)(p
+1) + 1;
1524 t
->neigh_vars
[14].data
= (int*)(p
+1) + 2;
1525 t
->neigh_vars
[15].data
= (int*)(p
+1) + 3;
1527 t
->neigh_neigh_dir
[0].ctl_name
= pdev_id
;
1529 t
->neigh_proto_dir
[0].procname
= p_name
;
1530 t
->neigh_proto_dir
[0].ctl_name
= p_id
;
1532 t
->neigh_dev
[0].child
= t
->neigh_vars
;
1533 t
->neigh_neigh_dir
[0].child
= t
->neigh_dev
;
1534 t
->neigh_proto_dir
[0].child
= t
->neigh_neigh_dir
;
1535 t
->neigh_root_dir
[0].child
= t
->neigh_proto_dir
;
1537 t
->sysctl_header
= register_sysctl_table(t
->neigh_root_dir
, 0);
1538 if (t
->sysctl_header
== NULL
) {
1542 p
->sysctl_table
= t
;
1546 void neigh_sysctl_unregister(struct neigh_parms
*p
)
1548 if (p
->sysctl_table
) {
1549 struct neigh_sysctl_table
*t
= p
->sysctl_table
;
1550 p
->sysctl_table
= NULL
;
1551 unregister_sysctl_table(t
->sysctl_header
);
1556 #endif /* CONFIG_SYSCTL */