x86: move boot_cpu_physical_apicid to apic_32.c
[linux-2.6/mini2440.git] / net / core / neighbour.c
blob19b8e003f15084821aee62374a74038514800d80
1 /*
2 * Generic address resolution entity
4 * Authors:
5 * Pedro Roque <roque@di.fc.ul.pt>
6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
13 * Fixes:
14 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
15 * Harald Welte Add neighbour cache statistics like rtstat
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/socket.h>
22 #include <linux/netdevice.h>
23 #include <linux/proc_fs.h>
24 #ifdef CONFIG_SYSCTL
25 #include <linux/sysctl.h>
26 #endif
27 #include <linux/times.h>
28 #include <net/net_namespace.h>
29 #include <net/neighbour.h>
30 #include <net/dst.h>
31 #include <net/sock.h>
32 #include <net/netevent.h>
33 #include <net/netlink.h>
34 #include <linux/rtnetlink.h>
35 #include <linux/random.h>
36 #include <linux/string.h>
37 #include <linux/log2.h>
39 #define NEIGH_DEBUG 1
41 #define NEIGH_PRINTK(x...) printk(x)
42 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
43 #define NEIGH_PRINTK0 NEIGH_PRINTK
44 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
45 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
47 #if NEIGH_DEBUG >= 1
48 #undef NEIGH_PRINTK1
49 #define NEIGH_PRINTK1 NEIGH_PRINTK
50 #endif
51 #if NEIGH_DEBUG >= 2
52 #undef NEIGH_PRINTK2
53 #define NEIGH_PRINTK2 NEIGH_PRINTK
54 #endif
56 #define PNEIGH_HASHMASK 0xF
58 static void neigh_timer_handler(unsigned long arg);
59 static void __neigh_notify(struct neighbour *n, int type, int flags);
60 static void neigh_update_notify(struct neighbour *neigh);
61 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
63 static struct neigh_table *neigh_tables;
64 #ifdef CONFIG_PROC_FS
65 static const struct file_operations neigh_stat_seq_fops;
66 #endif
69 Neighbour hash table buckets are protected with rwlock tbl->lock.
71 - All the scans/updates to hash buckets MUST be made under this lock.
72 - NOTHING clever should be made under this lock: no callbacks
73 to protocol backends, no attempts to send something to network.
74 It will result in deadlocks, if backend/driver wants to use neighbour
75 cache.
76 - If the entry requires some non-trivial actions, increase
77 its reference count and release table lock.
79 Neighbour entries are protected:
80 - with reference count.
81 - with rwlock neigh->lock
83 Reference count prevents destruction.
85 neigh->lock mainly serializes ll address data and its validity state.
86 However, the same lock is used to protect another entry fields:
87 - timer
88 - resolution queue
90 Again, nothing clever shall be made under neigh->lock,
91 the most complicated procedure, which we allow is dev->hard_header.
92 It is supposed, that dev->hard_header is simplistic and does
93 not make callbacks to neighbour tables.
95 The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
96 list of neighbour tables. This list is used only in process context,
99 static DEFINE_RWLOCK(neigh_tbl_lock);
101 static int neigh_blackhole(struct sk_buff *skb)
103 kfree_skb(skb);
104 return -ENETDOWN;
107 static void neigh_cleanup_and_release(struct neighbour *neigh)
109 if (neigh->parms->neigh_cleanup)
110 neigh->parms->neigh_cleanup(neigh);
112 __neigh_notify(neigh, RTM_DELNEIGH, 0);
113 neigh_release(neigh);
117 * It is random distribution in the interval (1/2)*base...(3/2)*base.
118 * It corresponds to default IPv6 settings and is not overridable,
119 * because it is really reasonable choice.
122 unsigned long neigh_rand_reach_time(unsigned long base)
124 return (base ? (net_random() % base) + (base >> 1) : 0);
128 static int neigh_forced_gc(struct neigh_table *tbl)
130 int shrunk = 0;
131 int i;
133 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
135 write_lock_bh(&tbl->lock);
136 for (i = 0; i <= tbl->hash_mask; i++) {
137 struct neighbour *n, **np;
139 np = &tbl->hash_buckets[i];
140 while ((n = *np) != NULL) {
141 /* Neighbour record may be discarded if:
142 * - nobody refers to it.
143 * - it is not permanent
145 write_lock(&n->lock);
146 if (atomic_read(&n->refcnt) == 1 &&
147 !(n->nud_state & NUD_PERMANENT)) {
148 *np = n->next;
149 n->dead = 1;
150 shrunk = 1;
151 write_unlock(&n->lock);
152 neigh_cleanup_and_release(n);
153 continue;
155 write_unlock(&n->lock);
156 np = &n->next;
160 tbl->last_flush = jiffies;
162 write_unlock_bh(&tbl->lock);
164 return shrunk;
167 static void neigh_add_timer(struct neighbour *n, unsigned long when)
169 neigh_hold(n);
170 if (unlikely(mod_timer(&n->timer, when))) {
171 printk("NEIGH: BUG, double timer add, state is %x\n",
172 n->nud_state);
173 dump_stack();
177 static int neigh_del_timer(struct neighbour *n)
179 if ((n->nud_state & NUD_IN_TIMER) &&
180 del_timer(&n->timer)) {
181 neigh_release(n);
182 return 1;
184 return 0;
187 static void pneigh_queue_purge(struct sk_buff_head *list)
189 struct sk_buff *skb;
191 while ((skb = skb_dequeue(list)) != NULL) {
192 dev_put(skb->dev);
193 kfree_skb(skb);
197 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
199 int i;
201 for (i = 0; i <= tbl->hash_mask; i++) {
202 struct neighbour *n, **np = &tbl->hash_buckets[i];
204 while ((n = *np) != NULL) {
205 if (dev && n->dev != dev) {
206 np = &n->next;
207 continue;
209 *np = n->next;
210 write_lock(&n->lock);
211 neigh_del_timer(n);
212 n->dead = 1;
214 if (atomic_read(&n->refcnt) != 1) {
215 /* The most unpleasant situation.
216 We must destroy neighbour entry,
217 but someone still uses it.
219 The destroy will be delayed until
220 the last user releases us, but
221 we must kill timers etc. and move
222 it to safe state.
224 skb_queue_purge(&n->arp_queue);
225 n->output = neigh_blackhole;
226 if (n->nud_state & NUD_VALID)
227 n->nud_state = NUD_NOARP;
228 else
229 n->nud_state = NUD_NONE;
230 NEIGH_PRINTK2("neigh %p is stray.\n", n);
232 write_unlock(&n->lock);
233 neigh_cleanup_and_release(n);
238 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
240 write_lock_bh(&tbl->lock);
241 neigh_flush_dev(tbl, dev);
242 write_unlock_bh(&tbl->lock);
245 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
247 write_lock_bh(&tbl->lock);
248 neigh_flush_dev(tbl, dev);
249 pneigh_ifdown(tbl, dev);
250 write_unlock_bh(&tbl->lock);
252 del_timer_sync(&tbl->proxy_timer);
253 pneigh_queue_purge(&tbl->proxy_queue);
254 return 0;
257 static struct neighbour *neigh_alloc(struct neigh_table *tbl)
259 struct neighbour *n = NULL;
260 unsigned long now = jiffies;
261 int entries;
263 entries = atomic_inc_return(&tbl->entries) - 1;
264 if (entries >= tbl->gc_thresh3 ||
265 (entries >= tbl->gc_thresh2 &&
266 time_after(now, tbl->last_flush + 5 * HZ))) {
267 if (!neigh_forced_gc(tbl) &&
268 entries >= tbl->gc_thresh3)
269 goto out_entries;
272 n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC);
273 if (!n)
274 goto out_entries;
276 skb_queue_head_init(&n->arp_queue);
277 rwlock_init(&n->lock);
278 n->updated = n->used = now;
279 n->nud_state = NUD_NONE;
280 n->output = neigh_blackhole;
281 n->parms = neigh_parms_clone(&tbl->parms);
282 setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
284 NEIGH_CACHE_STAT_INC(tbl, allocs);
285 n->tbl = tbl;
286 atomic_set(&n->refcnt, 1);
287 n->dead = 1;
288 out:
289 return n;
291 out_entries:
292 atomic_dec(&tbl->entries);
293 goto out;
296 static struct neighbour **neigh_hash_alloc(unsigned int entries)
298 unsigned long size = entries * sizeof(struct neighbour *);
299 struct neighbour **ret;
301 if (size <= PAGE_SIZE) {
302 ret = kzalloc(size, GFP_ATOMIC);
303 } else {
304 ret = (struct neighbour **)
305 __get_free_pages(GFP_ATOMIC|__GFP_ZERO, get_order(size));
307 return ret;
310 static void neigh_hash_free(struct neighbour **hash, unsigned int entries)
312 unsigned long size = entries * sizeof(struct neighbour *);
314 if (size <= PAGE_SIZE)
315 kfree(hash);
316 else
317 free_pages((unsigned long)hash, get_order(size));
320 static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries)
322 struct neighbour **new_hash, **old_hash;
323 unsigned int i, new_hash_mask, old_entries;
325 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
327 BUG_ON(!is_power_of_2(new_entries));
328 new_hash = neigh_hash_alloc(new_entries);
329 if (!new_hash)
330 return;
332 old_entries = tbl->hash_mask + 1;
333 new_hash_mask = new_entries - 1;
334 old_hash = tbl->hash_buckets;
336 get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
337 for (i = 0; i < old_entries; i++) {
338 struct neighbour *n, *next;
340 for (n = old_hash[i]; n; n = next) {
341 unsigned int hash_val = tbl->hash(n->primary_key, n->dev);
343 hash_val &= new_hash_mask;
344 next = n->next;
346 n->next = new_hash[hash_val];
347 new_hash[hash_val] = n;
350 tbl->hash_buckets = new_hash;
351 tbl->hash_mask = new_hash_mask;
353 neigh_hash_free(old_hash, old_entries);
356 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
357 struct net_device *dev)
359 struct neighbour *n;
360 int key_len = tbl->key_len;
361 u32 hash_val;
363 NEIGH_CACHE_STAT_INC(tbl, lookups);
365 read_lock_bh(&tbl->lock);
366 hash_val = tbl->hash(pkey, dev);
367 for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
368 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
369 neigh_hold(n);
370 NEIGH_CACHE_STAT_INC(tbl, hits);
371 break;
374 read_unlock_bh(&tbl->lock);
375 return n;
378 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
379 const void *pkey)
381 struct neighbour *n;
382 int key_len = tbl->key_len;
383 u32 hash_val;
385 NEIGH_CACHE_STAT_INC(tbl, lookups);
387 read_lock_bh(&tbl->lock);
388 hash_val = tbl->hash(pkey, NULL);
389 for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
390 if (!memcmp(n->primary_key, pkey, key_len) &&
391 (net == n->dev->nd_net)) {
392 neigh_hold(n);
393 NEIGH_CACHE_STAT_INC(tbl, hits);
394 break;
397 read_unlock_bh(&tbl->lock);
398 return n;
401 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
402 struct net_device *dev)
404 u32 hash_val;
405 int key_len = tbl->key_len;
406 int error;
407 struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
409 if (!n) {
410 rc = ERR_PTR(-ENOBUFS);
411 goto out;
414 memcpy(n->primary_key, pkey, key_len);
415 n->dev = dev;
416 dev_hold(dev);
418 /* Protocol specific setup. */
419 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
420 rc = ERR_PTR(error);
421 goto out_neigh_release;
424 /* Device specific setup. */
425 if (n->parms->neigh_setup &&
426 (error = n->parms->neigh_setup(n)) < 0) {
427 rc = ERR_PTR(error);
428 goto out_neigh_release;
431 n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
433 write_lock_bh(&tbl->lock);
435 if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1))
436 neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1);
438 hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;
440 if (n->parms->dead) {
441 rc = ERR_PTR(-EINVAL);
442 goto out_tbl_unlock;
445 for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {
446 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
447 neigh_hold(n1);
448 rc = n1;
449 goto out_tbl_unlock;
453 n->next = tbl->hash_buckets[hash_val];
454 tbl->hash_buckets[hash_val] = n;
455 n->dead = 0;
456 neigh_hold(n);
457 write_unlock_bh(&tbl->lock);
458 NEIGH_PRINTK2("neigh %p is created.\n", n);
459 rc = n;
460 out:
461 return rc;
462 out_tbl_unlock:
463 write_unlock_bh(&tbl->lock);
464 out_neigh_release:
465 neigh_release(n);
466 goto out;
469 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
470 struct net *net, const void *pkey, struct net_device *dev)
472 struct pneigh_entry *n;
473 int key_len = tbl->key_len;
474 u32 hash_val = *(u32 *)(pkey + key_len - 4);
476 hash_val ^= (hash_val >> 16);
477 hash_val ^= hash_val >> 8;
478 hash_val ^= hash_val >> 4;
479 hash_val &= PNEIGH_HASHMASK;
481 for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
482 if (!memcmp(n->key, pkey, key_len) &&
483 (n->net == net) &&
484 (n->dev == dev || !n->dev))
485 break;
488 return n;
491 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
492 struct net *net, const void *pkey,
493 struct net_device *dev, int creat)
495 struct pneigh_entry *n;
496 int key_len = tbl->key_len;
497 u32 hash_val = *(u32 *)(pkey + key_len - 4);
499 hash_val ^= (hash_val >> 16);
500 hash_val ^= hash_val >> 8;
501 hash_val ^= hash_val >> 4;
502 hash_val &= PNEIGH_HASHMASK;
504 read_lock_bh(&tbl->lock);
506 for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
507 if (!memcmp(n->key, pkey, key_len) &&
508 (n->net == net) &&
509 (n->dev == dev || !n->dev)) {
510 read_unlock_bh(&tbl->lock);
511 goto out;
514 read_unlock_bh(&tbl->lock);
515 n = NULL;
516 if (!creat)
517 goto out;
519 ASSERT_RTNL();
521 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
522 if (!n)
523 goto out;
525 n->net = hold_net(net);
526 memcpy(n->key, pkey, key_len);
527 n->dev = dev;
528 if (dev)
529 dev_hold(dev);
531 if (tbl->pconstructor && tbl->pconstructor(n)) {
532 if (dev)
533 dev_put(dev);
534 release_net(net);
535 kfree(n);
536 n = NULL;
537 goto out;
540 write_lock_bh(&tbl->lock);
541 n->next = tbl->phash_buckets[hash_val];
542 tbl->phash_buckets[hash_val] = n;
543 write_unlock_bh(&tbl->lock);
544 out:
545 return n;
549 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
550 struct net_device *dev)
552 struct pneigh_entry *n, **np;
553 int key_len = tbl->key_len;
554 u32 hash_val = *(u32 *)(pkey + key_len - 4);
556 hash_val ^= (hash_val >> 16);
557 hash_val ^= hash_val >> 8;
558 hash_val ^= hash_val >> 4;
559 hash_val &= PNEIGH_HASHMASK;
561 write_lock_bh(&tbl->lock);
562 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
563 np = &n->next) {
564 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
565 (n->net == net)) {
566 *np = n->next;
567 write_unlock_bh(&tbl->lock);
568 if (tbl->pdestructor)
569 tbl->pdestructor(n);
570 if (n->dev)
571 dev_put(n->dev);
572 release_net(n->net);
573 kfree(n);
574 return 0;
577 write_unlock_bh(&tbl->lock);
578 return -ENOENT;
581 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
583 struct pneigh_entry *n, **np;
584 u32 h;
586 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
587 np = &tbl->phash_buckets[h];
588 while ((n = *np) != NULL) {
589 if (!dev || n->dev == dev) {
590 *np = n->next;
591 if (tbl->pdestructor)
592 tbl->pdestructor(n);
593 if (n->dev)
594 dev_put(n->dev);
595 release_net(n->net);
596 kfree(n);
597 continue;
599 np = &n->next;
602 return -ENOENT;
605 static void neigh_parms_destroy(struct neigh_parms *parms);
607 static inline void neigh_parms_put(struct neigh_parms *parms)
609 if (atomic_dec_and_test(&parms->refcnt))
610 neigh_parms_destroy(parms);
614 * neighbour must already be out of the table;
617 void neigh_destroy(struct neighbour *neigh)
619 struct hh_cache *hh;
621 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
623 if (!neigh->dead) {
624 printk(KERN_WARNING
625 "Destroying alive neighbour %p\n", neigh);
626 dump_stack();
627 return;
630 if (neigh_del_timer(neigh))
631 printk(KERN_WARNING "Impossible event.\n");
633 while ((hh = neigh->hh) != NULL) {
634 neigh->hh = hh->hh_next;
635 hh->hh_next = NULL;
637 write_seqlock_bh(&hh->hh_lock);
638 hh->hh_output = neigh_blackhole;
639 write_sequnlock_bh(&hh->hh_lock);
640 if (atomic_dec_and_test(&hh->hh_refcnt))
641 kfree(hh);
644 skb_queue_purge(&neigh->arp_queue);
646 dev_put(neigh->dev);
647 neigh_parms_put(neigh->parms);
649 NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
651 atomic_dec(&neigh->tbl->entries);
652 kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
655 /* Neighbour state is suspicious;
656 disable fast path.
658 Called with write_locked neigh.
660 static void neigh_suspect(struct neighbour *neigh)
662 struct hh_cache *hh;
664 NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
666 neigh->output = neigh->ops->output;
668 for (hh = neigh->hh; hh; hh = hh->hh_next)
669 hh->hh_output = neigh->ops->output;
672 /* Neighbour state is OK;
673 enable fast path.
675 Called with write_locked neigh.
677 static void neigh_connect(struct neighbour *neigh)
679 struct hh_cache *hh;
681 NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
683 neigh->output = neigh->ops->connected_output;
685 for (hh = neigh->hh; hh; hh = hh->hh_next)
686 hh->hh_output = neigh->ops->hh_output;
689 static void neigh_periodic_timer(unsigned long arg)
691 struct neigh_table *tbl = (struct neigh_table *)arg;
692 struct neighbour *n, **np;
693 unsigned long expire, now = jiffies;
695 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
697 write_lock(&tbl->lock);
700 * periodically recompute ReachableTime from random function
703 if (time_after(now, tbl->last_rand + 300 * HZ)) {
704 struct neigh_parms *p;
705 tbl->last_rand = now;
706 for (p = &tbl->parms; p; p = p->next)
707 p->reachable_time =
708 neigh_rand_reach_time(p->base_reachable_time);
711 np = &tbl->hash_buckets[tbl->hash_chain_gc];
712 tbl->hash_chain_gc = ((tbl->hash_chain_gc + 1) & tbl->hash_mask);
714 while ((n = *np) != NULL) {
715 unsigned int state;
717 write_lock(&n->lock);
719 state = n->nud_state;
720 if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
721 write_unlock(&n->lock);
722 goto next_elt;
725 if (time_before(n->used, n->confirmed))
726 n->used = n->confirmed;
728 if (atomic_read(&n->refcnt) == 1 &&
729 (state == NUD_FAILED ||
730 time_after(now, n->used + n->parms->gc_staletime))) {
731 *np = n->next;
732 n->dead = 1;
733 write_unlock(&n->lock);
734 neigh_cleanup_and_release(n);
735 continue;
737 write_unlock(&n->lock);
739 next_elt:
740 np = &n->next;
743 /* Cycle through all hash buckets every base_reachable_time/2 ticks.
744 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
745 * base_reachable_time.
747 expire = tbl->parms.base_reachable_time >> 1;
748 expire /= (tbl->hash_mask + 1);
749 if (!expire)
750 expire = 1;
752 if (expire>HZ)
753 mod_timer(&tbl->gc_timer, round_jiffies(now + expire));
754 else
755 mod_timer(&tbl->gc_timer, now + expire);
757 write_unlock(&tbl->lock);
760 static __inline__ int neigh_max_probes(struct neighbour *n)
762 struct neigh_parms *p = n->parms;
763 return (n->nud_state & NUD_PROBE ?
764 p->ucast_probes :
765 p->ucast_probes + p->app_probes + p->mcast_probes);
768 /* Called when a timer expires for a neighbour entry. */
770 static void neigh_timer_handler(unsigned long arg)
772 unsigned long now, next;
773 struct neighbour *neigh = (struct neighbour *)arg;
774 unsigned state;
775 int notify = 0;
777 write_lock(&neigh->lock);
779 state = neigh->nud_state;
780 now = jiffies;
781 next = now + HZ;
783 if (!(state & NUD_IN_TIMER)) {
784 #ifndef CONFIG_SMP
785 printk(KERN_WARNING "neigh: timer & !nud_in_timer\n");
786 #endif
787 goto out;
790 if (state & NUD_REACHABLE) {
791 if (time_before_eq(now,
792 neigh->confirmed + neigh->parms->reachable_time)) {
793 NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
794 next = neigh->confirmed + neigh->parms->reachable_time;
795 } else if (time_before_eq(now,
796 neigh->used + neigh->parms->delay_probe_time)) {
797 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
798 neigh->nud_state = NUD_DELAY;
799 neigh->updated = jiffies;
800 neigh_suspect(neigh);
801 next = now + neigh->parms->delay_probe_time;
802 } else {
803 NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
804 neigh->nud_state = NUD_STALE;
805 neigh->updated = jiffies;
806 neigh_suspect(neigh);
807 notify = 1;
809 } else if (state & NUD_DELAY) {
810 if (time_before_eq(now,
811 neigh->confirmed + neigh->parms->delay_probe_time)) {
812 NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
813 neigh->nud_state = NUD_REACHABLE;
814 neigh->updated = jiffies;
815 neigh_connect(neigh);
816 notify = 1;
817 next = neigh->confirmed + neigh->parms->reachable_time;
818 } else {
819 NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
820 neigh->nud_state = NUD_PROBE;
821 neigh->updated = jiffies;
822 atomic_set(&neigh->probes, 0);
823 next = now + neigh->parms->retrans_time;
825 } else {
826 /* NUD_PROBE|NUD_INCOMPLETE */
827 next = now + neigh->parms->retrans_time;
830 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
831 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
832 struct sk_buff *skb;
834 neigh->nud_state = NUD_FAILED;
835 neigh->updated = jiffies;
836 notify = 1;
837 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
838 NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
840 /* It is very thin place. report_unreachable is very complicated
841 routine. Particularly, it can hit the same neighbour entry!
843 So that, we try to be accurate and avoid dead loop. --ANK
845 while (neigh->nud_state == NUD_FAILED &&
846 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
847 write_unlock(&neigh->lock);
848 neigh->ops->error_report(neigh, skb);
849 write_lock(&neigh->lock);
851 skb_queue_purge(&neigh->arp_queue);
854 if (neigh->nud_state & NUD_IN_TIMER) {
855 if (time_before(next, jiffies + HZ/2))
856 next = jiffies + HZ/2;
857 if (!mod_timer(&neigh->timer, next))
858 neigh_hold(neigh);
860 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
861 struct sk_buff *skb = skb_peek(&neigh->arp_queue);
862 /* keep skb alive even if arp_queue overflows */
863 if (skb)
864 skb = skb_copy(skb, GFP_ATOMIC);
865 write_unlock(&neigh->lock);
866 neigh->ops->solicit(neigh, skb);
867 atomic_inc(&neigh->probes);
868 if (skb)
869 kfree_skb(skb);
870 } else {
871 out:
872 write_unlock(&neigh->lock);
875 if (notify)
876 neigh_update_notify(neigh);
878 neigh_release(neigh);
881 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
883 int rc;
884 unsigned long now;
886 write_lock_bh(&neigh->lock);
888 rc = 0;
889 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
890 goto out_unlock_bh;
892 now = jiffies;
894 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
895 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
896 atomic_set(&neigh->probes, neigh->parms->ucast_probes);
897 neigh->nud_state = NUD_INCOMPLETE;
898 neigh->updated = jiffies;
899 neigh_add_timer(neigh, now + 1);
900 } else {
901 neigh->nud_state = NUD_FAILED;
902 neigh->updated = jiffies;
903 write_unlock_bh(&neigh->lock);
905 if (skb)
906 kfree_skb(skb);
907 return 1;
909 } else if (neigh->nud_state & NUD_STALE) {
910 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
911 neigh->nud_state = NUD_DELAY;
912 neigh->updated = jiffies;
913 neigh_add_timer(neigh,
914 jiffies + neigh->parms->delay_probe_time);
917 if (neigh->nud_state == NUD_INCOMPLETE) {
918 if (skb) {
919 if (skb_queue_len(&neigh->arp_queue) >=
920 neigh->parms->queue_len) {
921 struct sk_buff *buff;
922 buff = neigh->arp_queue.next;
923 __skb_unlink(buff, &neigh->arp_queue);
924 kfree_skb(buff);
926 __skb_queue_tail(&neigh->arp_queue, skb);
928 rc = 1;
930 out_unlock_bh:
931 write_unlock_bh(&neigh->lock);
932 return rc;
935 static void neigh_update_hhs(struct neighbour *neigh)
937 struct hh_cache *hh;
938 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
939 = neigh->dev->header_ops->cache_update;
941 if (update) {
942 for (hh = neigh->hh; hh; hh = hh->hh_next) {
943 write_seqlock_bh(&hh->hh_lock);
944 update(hh, neigh->dev, neigh->ha);
945 write_sequnlock_bh(&hh->hh_lock);
952 /* Generic update routine.
953 -- lladdr is new lladdr or NULL, if it is not supplied.
954 -- new is new state.
955 -- flags
956 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
957 if it is different.
958 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
959 lladdr instead of overriding it
960 if it is different.
961 It also allows to retain current state
962 if lladdr is unchanged.
963 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
965 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
966 NTF_ROUTER flag.
967 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
968 a router.
970 Caller MUST hold reference count on the entry.
973 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
974 u32 flags)
976 u8 old;
977 int err;
978 int notify = 0;
979 struct net_device *dev;
980 int update_isrouter = 0;
982 write_lock_bh(&neigh->lock);
984 dev = neigh->dev;
985 old = neigh->nud_state;
986 err = -EPERM;
988 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
989 (old & (NUD_NOARP | NUD_PERMANENT)))
990 goto out;
992 if (!(new & NUD_VALID)) {
993 neigh_del_timer(neigh);
994 if (old & NUD_CONNECTED)
995 neigh_suspect(neigh);
996 neigh->nud_state = new;
997 err = 0;
998 notify = old & NUD_VALID;
999 goto out;
1002 /* Compare new lladdr with cached one */
1003 if (!dev->addr_len) {
1004 /* First case: device needs no address. */
1005 lladdr = neigh->ha;
1006 } else if (lladdr) {
1007 /* The second case: if something is already cached
1008 and a new address is proposed:
1009 - compare new & old
1010 - if they are different, check override flag
1012 if ((old & NUD_VALID) &&
1013 !memcmp(lladdr, neigh->ha, dev->addr_len))
1014 lladdr = neigh->ha;
1015 } else {
1016 /* No address is supplied; if we know something,
1017 use it, otherwise discard the request.
1019 err = -EINVAL;
1020 if (!(old & NUD_VALID))
1021 goto out;
1022 lladdr = neigh->ha;
1025 if (new & NUD_CONNECTED)
1026 neigh->confirmed = jiffies;
1027 neigh->updated = jiffies;
1029 /* If entry was valid and address is not changed,
1030 do not change entry state, if new one is STALE.
1032 err = 0;
1033 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1034 if (old & NUD_VALID) {
1035 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1036 update_isrouter = 0;
1037 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1038 (old & NUD_CONNECTED)) {
1039 lladdr = neigh->ha;
1040 new = NUD_STALE;
1041 } else
1042 goto out;
1043 } else {
1044 if (lladdr == neigh->ha && new == NUD_STALE &&
1045 ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1046 (old & NUD_CONNECTED))
1048 new = old;
1052 if (new != old) {
1053 neigh_del_timer(neigh);
1054 if (new & NUD_IN_TIMER)
1055 neigh_add_timer(neigh, (jiffies +
1056 ((new & NUD_REACHABLE) ?
1057 neigh->parms->reachable_time :
1058 0)));
1059 neigh->nud_state = new;
1062 if (lladdr != neigh->ha) {
1063 memcpy(&neigh->ha, lladdr, dev->addr_len);
1064 neigh_update_hhs(neigh);
1065 if (!(new & NUD_CONNECTED))
1066 neigh->confirmed = jiffies -
1067 (neigh->parms->base_reachable_time << 1);
1068 notify = 1;
1070 if (new == old)
1071 goto out;
1072 if (new & NUD_CONNECTED)
1073 neigh_connect(neigh);
1074 else
1075 neigh_suspect(neigh);
1076 if (!(old & NUD_VALID)) {
1077 struct sk_buff *skb;
1079 /* Again: avoid dead loop if something went wrong */
1081 while (neigh->nud_state & NUD_VALID &&
1082 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1083 struct neighbour *n1 = neigh;
1084 write_unlock_bh(&neigh->lock);
1085 /* On shaper/eql skb->dst->neighbour != neigh :( */
1086 if (skb->dst && skb->dst->neighbour)
1087 n1 = skb->dst->neighbour;
1088 n1->output(skb);
1089 write_lock_bh(&neigh->lock);
1091 skb_queue_purge(&neigh->arp_queue);
1093 out:
1094 if (update_isrouter) {
1095 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1096 (neigh->flags | NTF_ROUTER) :
1097 (neigh->flags & ~NTF_ROUTER);
1099 write_unlock_bh(&neigh->lock);
1101 if (notify)
1102 neigh_update_notify(neigh);
1104 return err;
1107 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1108 u8 *lladdr, void *saddr,
1109 struct net_device *dev)
1111 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1112 lladdr || !dev->addr_len);
1113 if (neigh)
1114 neigh_update(neigh, lladdr, NUD_STALE,
1115 NEIGH_UPDATE_F_OVERRIDE);
1116 return neigh;
1119 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
1120 __be16 protocol)
1122 struct hh_cache *hh;
1123 struct net_device *dev = dst->dev;
1125 for (hh = n->hh; hh; hh = hh->hh_next)
1126 if (hh->hh_type == protocol)
1127 break;
1129 if (!hh && (hh = kzalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
1130 seqlock_init(&hh->hh_lock);
1131 hh->hh_type = protocol;
1132 atomic_set(&hh->hh_refcnt, 0);
1133 hh->hh_next = NULL;
1135 if (dev->header_ops->cache(n, hh)) {
1136 kfree(hh);
1137 hh = NULL;
1138 } else {
1139 atomic_inc(&hh->hh_refcnt);
1140 hh->hh_next = n->hh;
1141 n->hh = hh;
1142 if (n->nud_state & NUD_CONNECTED)
1143 hh->hh_output = n->ops->hh_output;
1144 else
1145 hh->hh_output = n->ops->output;
1148 if (hh) {
1149 atomic_inc(&hh->hh_refcnt);
1150 dst->hh = hh;
1154 /* This function can be used in contexts, where only old dev_queue_xmit
1155 worked, f.e. if you want to override normal output path (eql, shaper),
1156 but resolution is not made yet.
1159 int neigh_compat_output(struct sk_buff *skb)
1161 struct net_device *dev = skb->dev;
1163 __skb_pull(skb, skb_network_offset(skb));
1165 if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1166 skb->len) < 0 &&
1167 dev->header_ops->rebuild(skb))
1168 return 0;
1170 return dev_queue_xmit(skb);
1173 /* Slow and careful. */
1175 int neigh_resolve_output(struct sk_buff *skb)
1177 struct dst_entry *dst = skb->dst;
1178 struct neighbour *neigh;
1179 int rc = 0;
1181 if (!dst || !(neigh = dst->neighbour))
1182 goto discard;
1184 __skb_pull(skb, skb_network_offset(skb));
1186 if (!neigh_event_send(neigh, skb)) {
1187 int err;
1188 struct net_device *dev = neigh->dev;
1189 if (dev->header_ops->cache && !dst->hh) {
1190 write_lock_bh(&neigh->lock);
1191 if (!dst->hh)
1192 neigh_hh_init(neigh, dst, dst->ops->protocol);
1193 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1194 neigh->ha, NULL, skb->len);
1195 write_unlock_bh(&neigh->lock);
1196 } else {
1197 read_lock_bh(&neigh->lock);
1198 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1199 neigh->ha, NULL, skb->len);
1200 read_unlock_bh(&neigh->lock);
1202 if (err >= 0)
1203 rc = neigh->ops->queue_xmit(skb);
1204 else
1205 goto out_kfree_skb;
1207 out:
1208 return rc;
1209 discard:
1210 NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1211 dst, dst ? dst->neighbour : NULL);
1212 out_kfree_skb:
1213 rc = -EINVAL;
1214 kfree_skb(skb);
1215 goto out;
1218 /* As fast as possible without hh cache */
1220 int neigh_connected_output(struct sk_buff *skb)
1222 int err;
1223 struct dst_entry *dst = skb->dst;
1224 struct neighbour *neigh = dst->neighbour;
1225 struct net_device *dev = neigh->dev;
1227 __skb_pull(skb, skb_network_offset(skb));
1229 read_lock_bh(&neigh->lock);
1230 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1231 neigh->ha, NULL, skb->len);
1232 read_unlock_bh(&neigh->lock);
1233 if (err >= 0)
1234 err = neigh->ops->queue_xmit(skb);
1235 else {
1236 err = -EINVAL;
1237 kfree_skb(skb);
1239 return err;
1242 static void neigh_proxy_process(unsigned long arg)
1244 struct neigh_table *tbl = (struct neigh_table *)arg;
1245 long sched_next = 0;
1246 unsigned long now = jiffies;
1247 struct sk_buff *skb;
1249 spin_lock(&tbl->proxy_queue.lock);
1251 skb = tbl->proxy_queue.next;
1253 while (skb != (struct sk_buff *)&tbl->proxy_queue) {
1254 struct sk_buff *back = skb;
1255 long tdif = NEIGH_CB(back)->sched_next - now;
1257 skb = skb->next;
1258 if (tdif <= 0) {
1259 struct net_device *dev = back->dev;
1260 __skb_unlink(back, &tbl->proxy_queue);
1261 if (tbl->proxy_redo && netif_running(dev))
1262 tbl->proxy_redo(back);
1263 else
1264 kfree_skb(back);
1266 dev_put(dev);
1267 } else if (!sched_next || tdif < sched_next)
1268 sched_next = tdif;
1270 del_timer(&tbl->proxy_timer);
1271 if (sched_next)
1272 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1273 spin_unlock(&tbl->proxy_queue.lock);
1276 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1277 struct sk_buff *skb)
1279 unsigned long now = jiffies;
1280 unsigned long sched_next = now + (net_random() % p->proxy_delay);
1282 if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1283 kfree_skb(skb);
1284 return;
1287 NEIGH_CB(skb)->sched_next = sched_next;
1288 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1290 spin_lock(&tbl->proxy_queue.lock);
1291 if (del_timer(&tbl->proxy_timer)) {
1292 if (time_before(tbl->proxy_timer.expires, sched_next))
1293 sched_next = tbl->proxy_timer.expires;
1295 dst_release(skb->dst);
1296 skb->dst = NULL;
1297 dev_hold(skb->dev);
1298 __skb_queue_tail(&tbl->proxy_queue, skb);
1299 mod_timer(&tbl->proxy_timer, sched_next);
1300 spin_unlock(&tbl->proxy_queue.lock);
1303 static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
1304 struct net *net, int ifindex)
1306 struct neigh_parms *p;
1308 for (p = &tbl->parms; p; p = p->next) {
1309 if (p->net != net)
1310 continue;
1311 if ((p->dev && p->dev->ifindex == ifindex) ||
1312 (!p->dev && !ifindex))
1313 return p;
1316 return NULL;
1319 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1320 struct neigh_table *tbl)
1322 struct neigh_parms *p, *ref;
1323 struct net *net;
1325 net = dev->nd_net;
1326 ref = lookup_neigh_params(tbl, net, 0);
1327 if (!ref)
1328 return NULL;
1330 p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
1331 if (p) {
1332 p->tbl = tbl;
1333 atomic_set(&p->refcnt, 1);
1334 INIT_RCU_HEAD(&p->rcu_head);
1335 p->reachable_time =
1336 neigh_rand_reach_time(p->base_reachable_time);
1338 if (dev->neigh_setup && dev->neigh_setup(dev, p)) {
1339 kfree(p);
1340 return NULL;
1343 dev_hold(dev);
1344 p->dev = dev;
1345 p->net = hold_net(net);
1346 p->sysctl_table = NULL;
1347 write_lock_bh(&tbl->lock);
1348 p->next = tbl->parms.next;
1349 tbl->parms.next = p;
1350 write_unlock_bh(&tbl->lock);
1352 return p;
1355 static void neigh_rcu_free_parms(struct rcu_head *head)
1357 struct neigh_parms *parms =
1358 container_of(head, struct neigh_parms, rcu_head);
1360 neigh_parms_put(parms);
1363 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1365 struct neigh_parms **p;
1367 if (!parms || parms == &tbl->parms)
1368 return;
1369 write_lock_bh(&tbl->lock);
1370 for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1371 if (*p == parms) {
1372 *p = parms->next;
1373 parms->dead = 1;
1374 write_unlock_bh(&tbl->lock);
1375 if (parms->dev)
1376 dev_put(parms->dev);
1377 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1378 return;
1381 write_unlock_bh(&tbl->lock);
1382 NEIGH_PRINTK1("neigh_parms_release: not found\n");
1385 static void neigh_parms_destroy(struct neigh_parms *parms)
1387 release_net(parms->net);
1388 kfree(parms);
1391 static struct lock_class_key neigh_table_proxy_queue_class;
1393 void neigh_table_init_no_netlink(struct neigh_table *tbl)
1395 unsigned long now = jiffies;
1396 unsigned long phsize;
1398 tbl->parms.net = &init_net;
1399 atomic_set(&tbl->parms.refcnt, 1);
1400 INIT_RCU_HEAD(&tbl->parms.rcu_head);
1401 tbl->parms.reachable_time =
1402 neigh_rand_reach_time(tbl->parms.base_reachable_time);
1404 if (!tbl->kmem_cachep)
1405 tbl->kmem_cachep =
1406 kmem_cache_create(tbl->id, tbl->entry_size, 0,
1407 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1408 NULL);
1409 tbl->stats = alloc_percpu(struct neigh_statistics);
1410 if (!tbl->stats)
1411 panic("cannot create neighbour cache statistics");
1413 #ifdef CONFIG_PROC_FS
1414 tbl->pde = proc_create(tbl->id, 0, init_net.proc_net_stat,
1415 &neigh_stat_seq_fops);
1416 if (!tbl->pde)
1417 panic("cannot create neighbour proc dir entry");
1418 tbl->pde->data = tbl;
1419 #endif
1421 tbl->hash_mask = 1;
1422 tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1);
1424 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1425 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1427 if (!tbl->hash_buckets || !tbl->phash_buckets)
1428 panic("cannot allocate neighbour cache hashes");
1430 get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
1432 rwlock_init(&tbl->lock);
1433 setup_timer(&tbl->gc_timer, neigh_periodic_timer, (unsigned long)tbl);
1434 tbl->gc_timer.expires = now + 1;
1435 add_timer(&tbl->gc_timer);
1437 setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1438 skb_queue_head_init_class(&tbl->proxy_queue,
1439 &neigh_table_proxy_queue_class);
1441 tbl->last_flush = now;
1442 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1445 void neigh_table_init(struct neigh_table *tbl)
1447 struct neigh_table *tmp;
1449 neigh_table_init_no_netlink(tbl);
1450 write_lock(&neigh_tbl_lock);
1451 for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1452 if (tmp->family == tbl->family)
1453 break;
1455 tbl->next = neigh_tables;
1456 neigh_tables = tbl;
1457 write_unlock(&neigh_tbl_lock);
1459 if (unlikely(tmp)) {
1460 printk(KERN_ERR "NEIGH: Registering multiple tables for "
1461 "family %d\n", tbl->family);
1462 dump_stack();
1466 int neigh_table_clear(struct neigh_table *tbl)
1468 struct neigh_table **tp;
1470 /* It is not clean... Fix it to unload IPv6 module safely */
1471 del_timer_sync(&tbl->gc_timer);
1472 del_timer_sync(&tbl->proxy_timer);
1473 pneigh_queue_purge(&tbl->proxy_queue);
1474 neigh_ifdown(tbl, NULL);
1475 if (atomic_read(&tbl->entries))
1476 printk(KERN_CRIT "neighbour leakage\n");
1477 write_lock(&neigh_tbl_lock);
1478 for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1479 if (*tp == tbl) {
1480 *tp = tbl->next;
1481 break;
1484 write_unlock(&neigh_tbl_lock);
1486 neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1);
1487 tbl->hash_buckets = NULL;
1489 kfree(tbl->phash_buckets);
1490 tbl->phash_buckets = NULL;
1492 remove_proc_entry(tbl->id, init_net.proc_net_stat);
1494 free_percpu(tbl->stats);
1495 tbl->stats = NULL;
1497 kmem_cache_destroy(tbl->kmem_cachep);
1498 tbl->kmem_cachep = NULL;
1500 return 0;
1503 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1505 struct net *net = skb->sk->sk_net;
1506 struct ndmsg *ndm;
1507 struct nlattr *dst_attr;
1508 struct neigh_table *tbl;
1509 struct net_device *dev = NULL;
1510 int err = -EINVAL;
1512 if (nlmsg_len(nlh) < sizeof(*ndm))
1513 goto out;
1515 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1516 if (dst_attr == NULL)
1517 goto out;
1519 ndm = nlmsg_data(nlh);
1520 if (ndm->ndm_ifindex) {
1521 dev = dev_get_by_index(net, ndm->ndm_ifindex);
1522 if (dev == NULL) {
1523 err = -ENODEV;
1524 goto out;
1528 read_lock(&neigh_tbl_lock);
1529 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1530 struct neighbour *neigh;
1532 if (tbl->family != ndm->ndm_family)
1533 continue;
1534 read_unlock(&neigh_tbl_lock);
1536 if (nla_len(dst_attr) < tbl->key_len)
1537 goto out_dev_put;
1539 if (ndm->ndm_flags & NTF_PROXY) {
1540 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1541 goto out_dev_put;
1544 if (dev == NULL)
1545 goto out_dev_put;
1547 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1548 if (neigh == NULL) {
1549 err = -ENOENT;
1550 goto out_dev_put;
1553 err = neigh_update(neigh, NULL, NUD_FAILED,
1554 NEIGH_UPDATE_F_OVERRIDE |
1555 NEIGH_UPDATE_F_ADMIN);
1556 neigh_release(neigh);
1557 goto out_dev_put;
1559 read_unlock(&neigh_tbl_lock);
1560 err = -EAFNOSUPPORT;
1562 out_dev_put:
1563 if (dev)
1564 dev_put(dev);
1565 out:
1566 return err;
1569 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1571 struct net *net = skb->sk->sk_net;
1572 struct ndmsg *ndm;
1573 struct nlattr *tb[NDA_MAX+1];
1574 struct neigh_table *tbl;
1575 struct net_device *dev = NULL;
1576 int err;
1578 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1579 if (err < 0)
1580 goto out;
1582 err = -EINVAL;
1583 if (tb[NDA_DST] == NULL)
1584 goto out;
1586 ndm = nlmsg_data(nlh);
1587 if (ndm->ndm_ifindex) {
1588 dev = dev_get_by_index(net, ndm->ndm_ifindex);
1589 if (dev == NULL) {
1590 err = -ENODEV;
1591 goto out;
1594 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1595 goto out_dev_put;
1598 read_lock(&neigh_tbl_lock);
1599 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1600 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1601 struct neighbour *neigh;
1602 void *dst, *lladdr;
1604 if (tbl->family != ndm->ndm_family)
1605 continue;
1606 read_unlock(&neigh_tbl_lock);
1608 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1609 goto out_dev_put;
1610 dst = nla_data(tb[NDA_DST]);
1611 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1613 if (ndm->ndm_flags & NTF_PROXY) {
1614 struct pneigh_entry *pn;
1616 err = -ENOBUFS;
1617 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1618 if (pn) {
1619 pn->flags = ndm->ndm_flags;
1620 err = 0;
1622 goto out_dev_put;
1625 if (dev == NULL)
1626 goto out_dev_put;
1628 neigh = neigh_lookup(tbl, dst, dev);
1629 if (neigh == NULL) {
1630 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1631 err = -ENOENT;
1632 goto out_dev_put;
1635 neigh = __neigh_lookup_errno(tbl, dst, dev);
1636 if (IS_ERR(neigh)) {
1637 err = PTR_ERR(neigh);
1638 goto out_dev_put;
1640 } else {
1641 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1642 err = -EEXIST;
1643 neigh_release(neigh);
1644 goto out_dev_put;
1647 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1648 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1651 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1652 neigh_release(neigh);
1653 goto out_dev_put;
1656 read_unlock(&neigh_tbl_lock);
1657 err = -EAFNOSUPPORT;
1659 out_dev_put:
1660 if (dev)
1661 dev_put(dev);
1662 out:
1663 return err;
1666 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1668 struct nlattr *nest;
1670 nest = nla_nest_start(skb, NDTA_PARMS);
1671 if (nest == NULL)
1672 return -ENOBUFS;
1674 if (parms->dev)
1675 NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1677 NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1678 NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
1679 NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1680 NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1681 NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1682 NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1683 NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1684 NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1685 parms->base_reachable_time);
1686 NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1687 NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1688 NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1689 NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1690 NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1691 NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1693 return nla_nest_end(skb, nest);
1695 nla_put_failure:
1696 return nla_nest_cancel(skb, nest);
1699 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1700 u32 pid, u32 seq, int type, int flags)
1702 struct nlmsghdr *nlh;
1703 struct ndtmsg *ndtmsg;
1705 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1706 if (nlh == NULL)
1707 return -EMSGSIZE;
1709 ndtmsg = nlmsg_data(nlh);
1711 read_lock_bh(&tbl->lock);
1712 ndtmsg->ndtm_family = tbl->family;
1713 ndtmsg->ndtm_pad1 = 0;
1714 ndtmsg->ndtm_pad2 = 0;
1716 NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1717 NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1718 NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1719 NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1720 NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1723 unsigned long now = jiffies;
1724 unsigned int flush_delta = now - tbl->last_flush;
1725 unsigned int rand_delta = now - tbl->last_rand;
1727 struct ndt_config ndc = {
1728 .ndtc_key_len = tbl->key_len,
1729 .ndtc_entry_size = tbl->entry_size,
1730 .ndtc_entries = atomic_read(&tbl->entries),
1731 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
1732 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
1733 .ndtc_hash_rnd = tbl->hash_rnd,
1734 .ndtc_hash_mask = tbl->hash_mask,
1735 .ndtc_hash_chain_gc = tbl->hash_chain_gc,
1736 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
1739 NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1743 int cpu;
1744 struct ndt_stats ndst;
1746 memset(&ndst, 0, sizeof(ndst));
1748 for_each_possible_cpu(cpu) {
1749 struct neigh_statistics *st;
1751 st = per_cpu_ptr(tbl->stats, cpu);
1752 ndst.ndts_allocs += st->allocs;
1753 ndst.ndts_destroys += st->destroys;
1754 ndst.ndts_hash_grows += st->hash_grows;
1755 ndst.ndts_res_failed += st->res_failed;
1756 ndst.ndts_lookups += st->lookups;
1757 ndst.ndts_hits += st->hits;
1758 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
1759 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
1760 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
1761 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
1764 NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1767 BUG_ON(tbl->parms.dev);
1768 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1769 goto nla_put_failure;
1771 read_unlock_bh(&tbl->lock);
1772 return nlmsg_end(skb, nlh);
1774 nla_put_failure:
1775 read_unlock_bh(&tbl->lock);
1776 nlmsg_cancel(skb, nlh);
1777 return -EMSGSIZE;
1780 static int neightbl_fill_param_info(struct sk_buff *skb,
1781 struct neigh_table *tbl,
1782 struct neigh_parms *parms,
1783 u32 pid, u32 seq, int type,
1784 unsigned int flags)
1786 struct ndtmsg *ndtmsg;
1787 struct nlmsghdr *nlh;
1789 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1790 if (nlh == NULL)
1791 return -EMSGSIZE;
1793 ndtmsg = nlmsg_data(nlh);
1795 read_lock_bh(&tbl->lock);
1796 ndtmsg->ndtm_family = tbl->family;
1797 ndtmsg->ndtm_pad1 = 0;
1798 ndtmsg->ndtm_pad2 = 0;
1800 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1801 neightbl_fill_parms(skb, parms) < 0)
1802 goto errout;
1804 read_unlock_bh(&tbl->lock);
1805 return nlmsg_end(skb, nlh);
1806 errout:
1807 read_unlock_bh(&tbl->lock);
1808 nlmsg_cancel(skb, nlh);
1809 return -EMSGSIZE;
1812 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1813 [NDTA_NAME] = { .type = NLA_STRING },
1814 [NDTA_THRESH1] = { .type = NLA_U32 },
1815 [NDTA_THRESH2] = { .type = NLA_U32 },
1816 [NDTA_THRESH3] = { .type = NLA_U32 },
1817 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
1818 [NDTA_PARMS] = { .type = NLA_NESTED },
1821 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1822 [NDTPA_IFINDEX] = { .type = NLA_U32 },
1823 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
1824 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
1825 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
1826 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
1827 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
1828 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
1829 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
1830 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
1831 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
1832 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
1833 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
1834 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
1837 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1839 struct net *net = skb->sk->sk_net;
1840 struct neigh_table *tbl;
1841 struct ndtmsg *ndtmsg;
1842 struct nlattr *tb[NDTA_MAX+1];
1843 int err;
1845 err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1846 nl_neightbl_policy);
1847 if (err < 0)
1848 goto errout;
1850 if (tb[NDTA_NAME] == NULL) {
1851 err = -EINVAL;
1852 goto errout;
1855 ndtmsg = nlmsg_data(nlh);
1856 read_lock(&neigh_tbl_lock);
1857 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1858 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1859 continue;
1861 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1862 break;
1865 if (tbl == NULL) {
1866 err = -ENOENT;
1867 goto errout_locked;
1871 * We acquire tbl->lock to be nice to the periodic timers and
1872 * make sure they always see a consistent set of values.
1874 write_lock_bh(&tbl->lock);
1876 if (tb[NDTA_PARMS]) {
1877 struct nlattr *tbp[NDTPA_MAX+1];
1878 struct neigh_parms *p;
1879 int i, ifindex = 0;
1881 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1882 nl_ntbl_parm_policy);
1883 if (err < 0)
1884 goto errout_tbl_lock;
1886 if (tbp[NDTPA_IFINDEX])
1887 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1889 p = lookup_neigh_params(tbl, net, ifindex);
1890 if (p == NULL) {
1891 err = -ENOENT;
1892 goto errout_tbl_lock;
1895 for (i = 1; i <= NDTPA_MAX; i++) {
1896 if (tbp[i] == NULL)
1897 continue;
1899 switch (i) {
1900 case NDTPA_QUEUE_LEN:
1901 p->queue_len = nla_get_u32(tbp[i]);
1902 break;
1903 case NDTPA_PROXY_QLEN:
1904 p->proxy_qlen = nla_get_u32(tbp[i]);
1905 break;
1906 case NDTPA_APP_PROBES:
1907 p->app_probes = nla_get_u32(tbp[i]);
1908 break;
1909 case NDTPA_UCAST_PROBES:
1910 p->ucast_probes = nla_get_u32(tbp[i]);
1911 break;
1912 case NDTPA_MCAST_PROBES:
1913 p->mcast_probes = nla_get_u32(tbp[i]);
1914 break;
1915 case NDTPA_BASE_REACHABLE_TIME:
1916 p->base_reachable_time = nla_get_msecs(tbp[i]);
1917 break;
1918 case NDTPA_GC_STALETIME:
1919 p->gc_staletime = nla_get_msecs(tbp[i]);
1920 break;
1921 case NDTPA_DELAY_PROBE_TIME:
1922 p->delay_probe_time = nla_get_msecs(tbp[i]);
1923 break;
1924 case NDTPA_RETRANS_TIME:
1925 p->retrans_time = nla_get_msecs(tbp[i]);
1926 break;
1927 case NDTPA_ANYCAST_DELAY:
1928 p->anycast_delay = nla_get_msecs(tbp[i]);
1929 break;
1930 case NDTPA_PROXY_DELAY:
1931 p->proxy_delay = nla_get_msecs(tbp[i]);
1932 break;
1933 case NDTPA_LOCKTIME:
1934 p->locktime = nla_get_msecs(tbp[i]);
1935 break;
1940 if (tb[NDTA_THRESH1])
1941 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
1943 if (tb[NDTA_THRESH2])
1944 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
1946 if (tb[NDTA_THRESH3])
1947 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
1949 if (tb[NDTA_GC_INTERVAL])
1950 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
1952 err = 0;
1954 errout_tbl_lock:
1955 write_unlock_bh(&tbl->lock);
1956 errout_locked:
1957 read_unlock(&neigh_tbl_lock);
1958 errout:
1959 return err;
1962 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
1964 struct net *net = skb->sk->sk_net;
1965 int family, tidx, nidx = 0;
1966 int tbl_skip = cb->args[0];
1967 int neigh_skip = cb->args[1];
1968 struct neigh_table *tbl;
1970 family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
1972 read_lock(&neigh_tbl_lock);
1973 for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
1974 struct neigh_parms *p;
1976 if (tidx < tbl_skip || (family && tbl->family != family))
1977 continue;
1979 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
1980 cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
1981 NLM_F_MULTI) <= 0)
1982 break;
1984 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
1985 if (net != p->net)
1986 continue;
1988 if (nidx++ < neigh_skip)
1989 continue;
1991 if (neightbl_fill_param_info(skb, tbl, p,
1992 NETLINK_CB(cb->skb).pid,
1993 cb->nlh->nlmsg_seq,
1994 RTM_NEWNEIGHTBL,
1995 NLM_F_MULTI) <= 0)
1996 goto out;
1999 neigh_skip = 0;
2001 out:
2002 read_unlock(&neigh_tbl_lock);
2003 cb->args[0] = tidx;
2004 cb->args[1] = nidx;
2006 return skb->len;
2009 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2010 u32 pid, u32 seq, int type, unsigned int flags)
2012 unsigned long now = jiffies;
2013 struct nda_cacheinfo ci;
2014 struct nlmsghdr *nlh;
2015 struct ndmsg *ndm;
2017 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2018 if (nlh == NULL)
2019 return -EMSGSIZE;
2021 ndm = nlmsg_data(nlh);
2022 ndm->ndm_family = neigh->ops->family;
2023 ndm->ndm_pad1 = 0;
2024 ndm->ndm_pad2 = 0;
2025 ndm->ndm_flags = neigh->flags;
2026 ndm->ndm_type = neigh->type;
2027 ndm->ndm_ifindex = neigh->dev->ifindex;
2029 NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
2031 read_lock_bh(&neigh->lock);
2032 ndm->ndm_state = neigh->nud_state;
2033 if ((neigh->nud_state & NUD_VALID) &&
2034 nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) {
2035 read_unlock_bh(&neigh->lock);
2036 goto nla_put_failure;
2039 ci.ndm_used = now - neigh->used;
2040 ci.ndm_confirmed = now - neigh->confirmed;
2041 ci.ndm_updated = now - neigh->updated;
2042 ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1;
2043 read_unlock_bh(&neigh->lock);
2045 NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
2046 NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
2048 return nlmsg_end(skb, nlh);
2050 nla_put_failure:
2051 nlmsg_cancel(skb, nlh);
2052 return -EMSGSIZE;
2055 static void neigh_update_notify(struct neighbour *neigh)
2057 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2058 __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2061 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2062 struct netlink_callback *cb)
2064 struct net * net = skb->sk->sk_net;
2065 struct neighbour *n;
2066 int rc, h, s_h = cb->args[1];
2067 int idx, s_idx = idx = cb->args[2];
2069 read_lock_bh(&tbl->lock);
2070 for (h = 0; h <= tbl->hash_mask; h++) {
2071 if (h < s_h)
2072 continue;
2073 if (h > s_h)
2074 s_idx = 0;
2075 for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) {
2076 int lidx;
2077 if (n->dev->nd_net != net)
2078 continue;
2079 lidx = idx++;
2080 if (lidx < s_idx)
2081 continue;
2082 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2083 cb->nlh->nlmsg_seq,
2084 RTM_NEWNEIGH,
2085 NLM_F_MULTI) <= 0) {
2086 read_unlock_bh(&tbl->lock);
2087 rc = -1;
2088 goto out;
2092 read_unlock_bh(&tbl->lock);
2093 rc = skb->len;
2094 out:
2095 cb->args[1] = h;
2096 cb->args[2] = idx;
2097 return rc;
2100 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2102 struct neigh_table *tbl;
2103 int t, family, s_t;
2105 read_lock(&neigh_tbl_lock);
2106 family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2107 s_t = cb->args[0];
2109 for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
2110 if (t < s_t || (family && tbl->family != family))
2111 continue;
2112 if (t > s_t)
2113 memset(&cb->args[1], 0, sizeof(cb->args) -
2114 sizeof(cb->args[0]));
2115 if (neigh_dump_table(tbl, skb, cb) < 0)
2116 break;
2118 read_unlock(&neigh_tbl_lock);
2120 cb->args[0] = t;
2121 return skb->len;
2124 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2126 int chain;
2128 read_lock_bh(&tbl->lock);
2129 for (chain = 0; chain <= tbl->hash_mask; chain++) {
2130 struct neighbour *n;
2132 for (n = tbl->hash_buckets[chain]; n; n = n->next)
2133 cb(n, cookie);
2135 read_unlock_bh(&tbl->lock);
2137 EXPORT_SYMBOL(neigh_for_each);
2139 /* The tbl->lock must be held as a writer and BH disabled. */
2140 void __neigh_for_each_release(struct neigh_table *tbl,
2141 int (*cb)(struct neighbour *))
2143 int chain;
2145 for (chain = 0; chain <= tbl->hash_mask; chain++) {
2146 struct neighbour *n, **np;
2148 np = &tbl->hash_buckets[chain];
2149 while ((n = *np) != NULL) {
2150 int release;
2152 write_lock(&n->lock);
2153 release = cb(n);
2154 if (release) {
2155 *np = n->next;
2156 n->dead = 1;
2157 } else
2158 np = &n->next;
2159 write_unlock(&n->lock);
2160 if (release)
2161 neigh_cleanup_and_release(n);
2165 EXPORT_SYMBOL(__neigh_for_each_release);
2167 #ifdef CONFIG_PROC_FS
2169 static struct neighbour *neigh_get_first(struct seq_file *seq)
2171 struct neigh_seq_state *state = seq->private;
2172 struct net *net = state->p.net;
2173 struct neigh_table *tbl = state->tbl;
2174 struct neighbour *n = NULL;
2175 int bucket = state->bucket;
2177 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2178 for (bucket = 0; bucket <= tbl->hash_mask; bucket++) {
2179 n = tbl->hash_buckets[bucket];
2181 while (n) {
2182 if (n->dev->nd_net != net)
2183 goto next;
2184 if (state->neigh_sub_iter) {
2185 loff_t fakep = 0;
2186 void *v;
2188 v = state->neigh_sub_iter(state, n, &fakep);
2189 if (!v)
2190 goto next;
2192 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2193 break;
2194 if (n->nud_state & ~NUD_NOARP)
2195 break;
2196 next:
2197 n = n->next;
2200 if (n)
2201 break;
2203 state->bucket = bucket;
2205 return n;
2208 static struct neighbour *neigh_get_next(struct seq_file *seq,
2209 struct neighbour *n,
2210 loff_t *pos)
2212 struct neigh_seq_state *state = seq->private;
2213 struct net *net = state->p.net;
2214 struct neigh_table *tbl = state->tbl;
2216 if (state->neigh_sub_iter) {
2217 void *v = state->neigh_sub_iter(state, n, pos);
2218 if (v)
2219 return n;
2221 n = n->next;
2223 while (1) {
2224 while (n) {
2225 if (n->dev->nd_net != net)
2226 goto next;
2227 if (state->neigh_sub_iter) {
2228 void *v = state->neigh_sub_iter(state, n, pos);
2229 if (v)
2230 return n;
2231 goto next;
2233 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2234 break;
2236 if (n->nud_state & ~NUD_NOARP)
2237 break;
2238 next:
2239 n = n->next;
2242 if (n)
2243 break;
2245 if (++state->bucket > tbl->hash_mask)
2246 break;
2248 n = tbl->hash_buckets[state->bucket];
2251 if (n && pos)
2252 --(*pos);
2253 return n;
2256 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2258 struct neighbour *n = neigh_get_first(seq);
2260 if (n) {
2261 while (*pos) {
2262 n = neigh_get_next(seq, n, pos);
2263 if (!n)
2264 break;
2267 return *pos ? NULL : n;
2270 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2272 struct neigh_seq_state *state = seq->private;
2273 struct net * net = state->p.net;
2274 struct neigh_table *tbl = state->tbl;
2275 struct pneigh_entry *pn = NULL;
2276 int bucket = state->bucket;
2278 state->flags |= NEIGH_SEQ_IS_PNEIGH;
2279 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2280 pn = tbl->phash_buckets[bucket];
2281 while (pn && (pn->net != net))
2282 pn = pn->next;
2283 if (pn)
2284 break;
2286 state->bucket = bucket;
2288 return pn;
2291 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2292 struct pneigh_entry *pn,
2293 loff_t *pos)
2295 struct neigh_seq_state *state = seq->private;
2296 struct net * net = state->p.net;
2297 struct neigh_table *tbl = state->tbl;
2299 pn = pn->next;
2300 while (!pn) {
2301 if (++state->bucket > PNEIGH_HASHMASK)
2302 break;
2303 pn = tbl->phash_buckets[state->bucket];
2304 while (pn && (pn->net != net))
2305 pn = pn->next;
2306 if (pn)
2307 break;
2310 if (pn && pos)
2311 --(*pos);
2313 return pn;
2316 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2318 struct pneigh_entry *pn = pneigh_get_first(seq);
2320 if (pn) {
2321 while (*pos) {
2322 pn = pneigh_get_next(seq, pn, pos);
2323 if (!pn)
2324 break;
2327 return *pos ? NULL : pn;
2330 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2332 struct neigh_seq_state *state = seq->private;
2333 void *rc;
2335 rc = neigh_get_idx(seq, pos);
2336 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2337 rc = pneigh_get_idx(seq, pos);
2339 return rc;
2342 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2343 __acquires(tbl->lock)
2345 struct neigh_seq_state *state = seq->private;
2346 loff_t pos_minus_one;
2348 state->tbl = tbl;
2349 state->bucket = 0;
2350 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2352 read_lock_bh(&tbl->lock);
2354 pos_minus_one = *pos - 1;
2355 return *pos ? neigh_get_idx_any(seq, &pos_minus_one) : SEQ_START_TOKEN;
2357 EXPORT_SYMBOL(neigh_seq_start);
2359 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2361 struct neigh_seq_state *state;
2362 void *rc;
2364 if (v == SEQ_START_TOKEN) {
2365 rc = neigh_get_idx(seq, pos);
2366 goto out;
2369 state = seq->private;
2370 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2371 rc = neigh_get_next(seq, v, NULL);
2372 if (rc)
2373 goto out;
2374 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2375 rc = pneigh_get_first(seq);
2376 } else {
2377 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2378 rc = pneigh_get_next(seq, v, NULL);
2380 out:
2381 ++(*pos);
2382 return rc;
2384 EXPORT_SYMBOL(neigh_seq_next);
2386 void neigh_seq_stop(struct seq_file *seq, void *v)
2387 __releases(tbl->lock)
2389 struct neigh_seq_state *state = seq->private;
2390 struct neigh_table *tbl = state->tbl;
2392 read_unlock_bh(&tbl->lock);
2394 EXPORT_SYMBOL(neigh_seq_stop);
2396 /* statistics via seq_file */
2398 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2400 struct proc_dir_entry *pde = seq->private;
2401 struct neigh_table *tbl = pde->data;
2402 int cpu;
2404 if (*pos == 0)
2405 return SEQ_START_TOKEN;
2407 for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
2408 if (!cpu_possible(cpu))
2409 continue;
2410 *pos = cpu+1;
2411 return per_cpu_ptr(tbl->stats, cpu);
2413 return NULL;
2416 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2418 struct proc_dir_entry *pde = seq->private;
2419 struct neigh_table *tbl = pde->data;
2420 int cpu;
2422 for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
2423 if (!cpu_possible(cpu))
2424 continue;
2425 *pos = cpu+1;
2426 return per_cpu_ptr(tbl->stats, cpu);
2428 return NULL;
2431 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2436 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2438 struct proc_dir_entry *pde = seq->private;
2439 struct neigh_table *tbl = pde->data;
2440 struct neigh_statistics *st = v;
2442 if (v == SEQ_START_TOKEN) {
2443 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs\n");
2444 return 0;
2447 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
2448 "%08lx %08lx %08lx %08lx\n",
2449 atomic_read(&tbl->entries),
2451 st->allocs,
2452 st->destroys,
2453 st->hash_grows,
2455 st->lookups,
2456 st->hits,
2458 st->res_failed,
2460 st->rcv_probes_mcast,
2461 st->rcv_probes_ucast,
2463 st->periodic_gc_runs,
2464 st->forced_gc_runs
2467 return 0;
2470 static const struct seq_operations neigh_stat_seq_ops = {
2471 .start = neigh_stat_seq_start,
2472 .next = neigh_stat_seq_next,
2473 .stop = neigh_stat_seq_stop,
2474 .show = neigh_stat_seq_show,
2477 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2479 int ret = seq_open(file, &neigh_stat_seq_ops);
2481 if (!ret) {
2482 struct seq_file *sf = file->private_data;
2483 sf->private = PDE(inode);
2485 return ret;
2488 static const struct file_operations neigh_stat_seq_fops = {
2489 .owner = THIS_MODULE,
2490 .open = neigh_stat_seq_open,
2491 .read = seq_read,
2492 .llseek = seq_lseek,
2493 .release = seq_release,
2496 #endif /* CONFIG_PROC_FS */
2498 static inline size_t neigh_nlmsg_size(void)
2500 return NLMSG_ALIGN(sizeof(struct ndmsg))
2501 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2502 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2503 + nla_total_size(sizeof(struct nda_cacheinfo))
2504 + nla_total_size(4); /* NDA_PROBES */
2507 static void __neigh_notify(struct neighbour *n, int type, int flags)
2509 struct net *net = n->dev->nd_net;
2510 struct sk_buff *skb;
2511 int err = -ENOBUFS;
2513 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2514 if (skb == NULL)
2515 goto errout;
2517 err = neigh_fill_info(skb, n, 0, 0, type, flags);
2518 if (err < 0) {
2519 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2520 WARN_ON(err == -EMSGSIZE);
2521 kfree_skb(skb);
2522 goto errout;
2524 err = rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2525 errout:
2526 if (err < 0)
2527 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2530 #ifdef CONFIG_ARPD
2531 void neigh_app_ns(struct neighbour *n)
2533 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2535 #endif /* CONFIG_ARPD */
2537 #ifdef CONFIG_SYSCTL
2539 static struct neigh_sysctl_table {
2540 struct ctl_table_header *sysctl_header;
2541 struct ctl_table neigh_vars[__NET_NEIGH_MAX];
2542 char *dev_name;
2543 } neigh_sysctl_template __read_mostly = {
2544 .neigh_vars = {
2546 .ctl_name = NET_NEIGH_MCAST_SOLICIT,
2547 .procname = "mcast_solicit",
2548 .maxlen = sizeof(int),
2549 .mode = 0644,
2550 .proc_handler = &proc_dointvec,
2553 .ctl_name = NET_NEIGH_UCAST_SOLICIT,
2554 .procname = "ucast_solicit",
2555 .maxlen = sizeof(int),
2556 .mode = 0644,
2557 .proc_handler = &proc_dointvec,
2560 .ctl_name = NET_NEIGH_APP_SOLICIT,
2561 .procname = "app_solicit",
2562 .maxlen = sizeof(int),
2563 .mode = 0644,
2564 .proc_handler = &proc_dointvec,
2567 .procname = "retrans_time",
2568 .maxlen = sizeof(int),
2569 .mode = 0644,
2570 .proc_handler = &proc_dointvec_userhz_jiffies,
2573 .ctl_name = NET_NEIGH_REACHABLE_TIME,
2574 .procname = "base_reachable_time",
2575 .maxlen = sizeof(int),
2576 .mode = 0644,
2577 .proc_handler = &proc_dointvec_jiffies,
2578 .strategy = &sysctl_jiffies,
2581 .ctl_name = NET_NEIGH_DELAY_PROBE_TIME,
2582 .procname = "delay_first_probe_time",
2583 .maxlen = sizeof(int),
2584 .mode = 0644,
2585 .proc_handler = &proc_dointvec_jiffies,
2586 .strategy = &sysctl_jiffies,
2589 .ctl_name = NET_NEIGH_GC_STALE_TIME,
2590 .procname = "gc_stale_time",
2591 .maxlen = sizeof(int),
2592 .mode = 0644,
2593 .proc_handler = &proc_dointvec_jiffies,
2594 .strategy = &sysctl_jiffies,
2597 .ctl_name = NET_NEIGH_UNRES_QLEN,
2598 .procname = "unres_qlen",
2599 .maxlen = sizeof(int),
2600 .mode = 0644,
2601 .proc_handler = &proc_dointvec,
2604 .ctl_name = NET_NEIGH_PROXY_QLEN,
2605 .procname = "proxy_qlen",
2606 .maxlen = sizeof(int),
2607 .mode = 0644,
2608 .proc_handler = &proc_dointvec,
2611 .procname = "anycast_delay",
2612 .maxlen = sizeof(int),
2613 .mode = 0644,
2614 .proc_handler = &proc_dointvec_userhz_jiffies,
2617 .procname = "proxy_delay",
2618 .maxlen = sizeof(int),
2619 .mode = 0644,
2620 .proc_handler = &proc_dointvec_userhz_jiffies,
2623 .procname = "locktime",
2624 .maxlen = sizeof(int),
2625 .mode = 0644,
2626 .proc_handler = &proc_dointvec_userhz_jiffies,
2629 .ctl_name = NET_NEIGH_RETRANS_TIME_MS,
2630 .procname = "retrans_time_ms",
2631 .maxlen = sizeof(int),
2632 .mode = 0644,
2633 .proc_handler = &proc_dointvec_ms_jiffies,
2634 .strategy = &sysctl_ms_jiffies,
2637 .ctl_name = NET_NEIGH_REACHABLE_TIME_MS,
2638 .procname = "base_reachable_time_ms",
2639 .maxlen = sizeof(int),
2640 .mode = 0644,
2641 .proc_handler = &proc_dointvec_ms_jiffies,
2642 .strategy = &sysctl_ms_jiffies,
2645 .ctl_name = NET_NEIGH_GC_INTERVAL,
2646 .procname = "gc_interval",
2647 .maxlen = sizeof(int),
2648 .mode = 0644,
2649 .proc_handler = &proc_dointvec_jiffies,
2650 .strategy = &sysctl_jiffies,
2653 .ctl_name = NET_NEIGH_GC_THRESH1,
2654 .procname = "gc_thresh1",
2655 .maxlen = sizeof(int),
2656 .mode = 0644,
2657 .proc_handler = &proc_dointvec,
2660 .ctl_name = NET_NEIGH_GC_THRESH2,
2661 .procname = "gc_thresh2",
2662 .maxlen = sizeof(int),
2663 .mode = 0644,
2664 .proc_handler = &proc_dointvec,
2667 .ctl_name = NET_NEIGH_GC_THRESH3,
2668 .procname = "gc_thresh3",
2669 .maxlen = sizeof(int),
2670 .mode = 0644,
2671 .proc_handler = &proc_dointvec,
2677 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2678 int p_id, int pdev_id, char *p_name,
2679 proc_handler *handler, ctl_handler *strategy)
2681 struct neigh_sysctl_table *t;
2682 const char *dev_name_source = NULL;
2684 #define NEIGH_CTL_PATH_ROOT 0
2685 #define NEIGH_CTL_PATH_PROTO 1
2686 #define NEIGH_CTL_PATH_NEIGH 2
2687 #define NEIGH_CTL_PATH_DEV 3
2689 struct ctl_path neigh_path[] = {
2690 { .procname = "net", .ctl_name = CTL_NET, },
2691 { .procname = "proto", .ctl_name = 0, },
2692 { .procname = "neigh", .ctl_name = 0, },
2693 { .procname = "default", .ctl_name = NET_PROTO_CONF_DEFAULT, },
2694 { },
2697 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
2698 if (!t)
2699 goto err;
2701 t->neigh_vars[0].data = &p->mcast_probes;
2702 t->neigh_vars[1].data = &p->ucast_probes;
2703 t->neigh_vars[2].data = &p->app_probes;
2704 t->neigh_vars[3].data = &p->retrans_time;
2705 t->neigh_vars[4].data = &p->base_reachable_time;
2706 t->neigh_vars[5].data = &p->delay_probe_time;
2707 t->neigh_vars[6].data = &p->gc_staletime;
2708 t->neigh_vars[7].data = &p->queue_len;
2709 t->neigh_vars[8].data = &p->proxy_qlen;
2710 t->neigh_vars[9].data = &p->anycast_delay;
2711 t->neigh_vars[10].data = &p->proxy_delay;
2712 t->neigh_vars[11].data = &p->locktime;
2713 t->neigh_vars[12].data = &p->retrans_time;
2714 t->neigh_vars[13].data = &p->base_reachable_time;
2716 if (dev) {
2717 dev_name_source = dev->name;
2718 neigh_path[NEIGH_CTL_PATH_DEV].ctl_name = dev->ifindex;
2719 /* Terminate the table early */
2720 memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14]));
2721 } else {
2722 dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
2723 t->neigh_vars[14].data = (int *)(p + 1);
2724 t->neigh_vars[15].data = (int *)(p + 1) + 1;
2725 t->neigh_vars[16].data = (int *)(p + 1) + 2;
2726 t->neigh_vars[17].data = (int *)(p + 1) + 3;
2730 if (handler || strategy) {
2731 /* RetransTime */
2732 t->neigh_vars[3].proc_handler = handler;
2733 t->neigh_vars[3].strategy = strategy;
2734 t->neigh_vars[3].extra1 = dev;
2735 if (!strategy)
2736 t->neigh_vars[3].ctl_name = CTL_UNNUMBERED;
2737 /* ReachableTime */
2738 t->neigh_vars[4].proc_handler = handler;
2739 t->neigh_vars[4].strategy = strategy;
2740 t->neigh_vars[4].extra1 = dev;
2741 if (!strategy)
2742 t->neigh_vars[4].ctl_name = CTL_UNNUMBERED;
2743 /* RetransTime (in milliseconds)*/
2744 t->neigh_vars[12].proc_handler = handler;
2745 t->neigh_vars[12].strategy = strategy;
2746 t->neigh_vars[12].extra1 = dev;
2747 if (!strategy)
2748 t->neigh_vars[12].ctl_name = CTL_UNNUMBERED;
2749 /* ReachableTime (in milliseconds) */
2750 t->neigh_vars[13].proc_handler = handler;
2751 t->neigh_vars[13].strategy = strategy;
2752 t->neigh_vars[13].extra1 = dev;
2753 if (!strategy)
2754 t->neigh_vars[13].ctl_name = CTL_UNNUMBERED;
2757 t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2758 if (!t->dev_name)
2759 goto free;
2761 neigh_path[NEIGH_CTL_PATH_DEV].procname = t->dev_name;
2762 neigh_path[NEIGH_CTL_PATH_NEIGH].ctl_name = pdev_id;
2763 neigh_path[NEIGH_CTL_PATH_PROTO].procname = p_name;
2764 neigh_path[NEIGH_CTL_PATH_PROTO].ctl_name = p_id;
2766 t->sysctl_header = register_sysctl_paths(neigh_path, t->neigh_vars);
2767 if (!t->sysctl_header)
2768 goto free_procname;
2770 p->sysctl_table = t;
2771 return 0;
2773 free_procname:
2774 kfree(t->dev_name);
2775 free:
2776 kfree(t);
2777 err:
2778 return -ENOBUFS;
2781 void neigh_sysctl_unregister(struct neigh_parms *p)
2783 if (p->sysctl_table) {
2784 struct neigh_sysctl_table *t = p->sysctl_table;
2785 p->sysctl_table = NULL;
2786 unregister_sysctl_table(t->sysctl_header);
2787 kfree(t->dev_name);
2788 kfree(t);
2792 #endif /* CONFIG_SYSCTL */
2794 static int __init neigh_init(void)
2796 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL);
2797 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL);
2798 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info);
2800 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info);
2801 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL);
2803 return 0;
2806 subsys_initcall(neigh_init);
2808 EXPORT_SYMBOL(__neigh_event_send);
2809 EXPORT_SYMBOL(neigh_changeaddr);
2810 EXPORT_SYMBOL(neigh_compat_output);
2811 EXPORT_SYMBOL(neigh_connected_output);
2812 EXPORT_SYMBOL(neigh_create);
2813 EXPORT_SYMBOL(neigh_destroy);
2814 EXPORT_SYMBOL(neigh_event_ns);
2815 EXPORT_SYMBOL(neigh_ifdown);
2816 EXPORT_SYMBOL(neigh_lookup);
2817 EXPORT_SYMBOL(neigh_lookup_nodev);
2818 EXPORT_SYMBOL(neigh_parms_alloc);
2819 EXPORT_SYMBOL(neigh_parms_release);
2820 EXPORT_SYMBOL(neigh_rand_reach_time);
2821 EXPORT_SYMBOL(neigh_resolve_output);
2822 EXPORT_SYMBOL(neigh_table_clear);
2823 EXPORT_SYMBOL(neigh_table_init);
2824 EXPORT_SYMBOL(neigh_table_init_no_netlink);
2825 EXPORT_SYMBOL(neigh_update);
2826 EXPORT_SYMBOL(pneigh_enqueue);
2827 EXPORT_SYMBOL(pneigh_lookup);
2828 EXPORT_SYMBOL_GPL(__pneigh_lookup);
2830 #ifdef CONFIG_ARPD
2831 EXPORT_SYMBOL(neigh_app_ns);
2832 #endif
2833 #ifdef CONFIG_SYSCTL
2834 EXPORT_SYMBOL(neigh_sysctl_register);
2835 EXPORT_SYMBOL(neigh_sysctl_unregister);
2836 #endif