[NET]: Fix the prototype of call_netdevice_notifiers.
[linux-2.6/kvm.git] / net / core / neighbour.c
blob2c6577c1eedd7947dbb5d0ae147dc759eba02710
1 /*
2 * Generic address resolution entity
4 * Authors:
5 * Pedro Roque <roque@di.fc.ul.pt>
6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
13 * Fixes:
14 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
15 * Harald Welte Add neighbour cache statistics like rtstat
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/socket.h>
22 #include <linux/netdevice.h>
23 #include <linux/proc_fs.h>
24 #ifdef CONFIG_SYSCTL
25 #include <linux/sysctl.h>
26 #endif
27 #include <linux/times.h>
28 #include <net/net_namespace.h>
29 #include <net/neighbour.h>
30 #include <net/dst.h>
31 #include <net/sock.h>
32 #include <net/netevent.h>
33 #include <net/netlink.h>
34 #include <linux/rtnetlink.h>
35 #include <linux/random.h>
36 #include <linux/string.h>
37 #include <linux/log2.h>
39 #define NEIGH_DEBUG 1
41 #define NEIGH_PRINTK(x...) printk(x)
42 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
43 #define NEIGH_PRINTK0 NEIGH_PRINTK
44 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
45 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
47 #if NEIGH_DEBUG >= 1
48 #undef NEIGH_PRINTK1
49 #define NEIGH_PRINTK1 NEIGH_PRINTK
50 #endif
51 #if NEIGH_DEBUG >= 2
52 #undef NEIGH_PRINTK2
53 #define NEIGH_PRINTK2 NEIGH_PRINTK
54 #endif
56 #define PNEIGH_HASHMASK 0xF
58 static void neigh_timer_handler(unsigned long arg);
59 static void __neigh_notify(struct neighbour *n, int type, int flags);
60 static void neigh_update_notify(struct neighbour *neigh);
61 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
62 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
64 static struct neigh_table *neigh_tables;
65 #ifdef CONFIG_PROC_FS
66 static const struct file_operations neigh_stat_seq_fops;
67 #endif
70 Neighbour hash table buckets are protected with rwlock tbl->lock.
72 - All the scans/updates to hash buckets MUST be made under this lock.
73 - NOTHING clever should be made under this lock: no callbacks
74 to protocol backends, no attempts to send something to network.
75 It will result in deadlocks, if backend/driver wants to use neighbour
76 cache.
77 - If the entry requires some non-trivial actions, increase
78 its reference count and release table lock.
80 Neighbour entries are protected:
81 - with reference count.
82 - with rwlock neigh->lock
84 Reference count prevents destruction.
86 neigh->lock mainly serializes ll address data and its validity state.
87 However, the same lock is used to protect another entry fields:
88 - timer
89 - resolution queue
91 Again, nothing clever shall be made under neigh->lock,
92 the most complicated procedure, which we allow is dev->hard_header.
93 It is supposed, that dev->hard_header is simplistic and does
94 not make callbacks to neighbour tables.
96 The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
97 list of neighbour tables. This list is used only in process context,
100 static DEFINE_RWLOCK(neigh_tbl_lock);
102 static int neigh_blackhole(struct sk_buff *skb)
104 kfree_skb(skb);
105 return -ENETDOWN;
108 static void neigh_cleanup_and_release(struct neighbour *neigh)
110 if (neigh->parms->neigh_cleanup)
111 neigh->parms->neigh_cleanup(neigh);
113 __neigh_notify(neigh, RTM_DELNEIGH, 0);
114 neigh_release(neigh);
118 * It is random distribution in the interval (1/2)*base...(3/2)*base.
119 * It corresponds to default IPv6 settings and is not overridable,
120 * because it is really reasonable choice.
123 unsigned long neigh_rand_reach_time(unsigned long base)
125 return (base ? (net_random() % base) + (base >> 1) : 0);
129 static int neigh_forced_gc(struct neigh_table *tbl)
131 int shrunk = 0;
132 int i;
134 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
136 write_lock_bh(&tbl->lock);
137 for (i = 0; i <= tbl->hash_mask; i++) {
138 struct neighbour *n, **np;
140 np = &tbl->hash_buckets[i];
141 while ((n = *np) != NULL) {
142 /* Neighbour record may be discarded if:
143 * - nobody refers to it.
144 * - it is not permanent
146 write_lock(&n->lock);
147 if (atomic_read(&n->refcnt) == 1 &&
148 !(n->nud_state & NUD_PERMANENT)) {
149 *np = n->next;
150 n->dead = 1;
151 shrunk = 1;
152 write_unlock(&n->lock);
153 neigh_cleanup_and_release(n);
154 continue;
156 write_unlock(&n->lock);
157 np = &n->next;
161 tbl->last_flush = jiffies;
163 write_unlock_bh(&tbl->lock);
165 return shrunk;
168 static int neigh_del_timer(struct neighbour *n)
170 if ((n->nud_state & NUD_IN_TIMER) &&
171 del_timer(&n->timer)) {
172 neigh_release(n);
173 return 1;
175 return 0;
178 static void pneigh_queue_purge(struct sk_buff_head *list)
180 struct sk_buff *skb;
182 while ((skb = skb_dequeue(list)) != NULL) {
183 dev_put(skb->dev);
184 kfree_skb(skb);
188 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
190 int i;
192 for (i = 0; i <= tbl->hash_mask; i++) {
193 struct neighbour *n, **np = &tbl->hash_buckets[i];
195 while ((n = *np) != NULL) {
196 if (dev && n->dev != dev) {
197 np = &n->next;
198 continue;
200 *np = n->next;
201 write_lock(&n->lock);
202 neigh_del_timer(n);
203 n->dead = 1;
205 if (atomic_read(&n->refcnt) != 1) {
206 /* The most unpleasant situation.
207 We must destroy neighbour entry,
208 but someone still uses it.
210 The destroy will be delayed until
211 the last user releases us, but
212 we must kill timers etc. and move
213 it to safe state.
215 skb_queue_purge(&n->arp_queue);
216 n->output = neigh_blackhole;
217 if (n->nud_state & NUD_VALID)
218 n->nud_state = NUD_NOARP;
219 else
220 n->nud_state = NUD_NONE;
221 NEIGH_PRINTK2("neigh %p is stray.\n", n);
223 write_unlock(&n->lock);
224 neigh_cleanup_and_release(n);
229 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
231 write_lock_bh(&tbl->lock);
232 neigh_flush_dev(tbl, dev);
233 write_unlock_bh(&tbl->lock);
236 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
238 write_lock_bh(&tbl->lock);
239 neigh_flush_dev(tbl, dev);
240 pneigh_ifdown(tbl, dev);
241 write_unlock_bh(&tbl->lock);
243 del_timer_sync(&tbl->proxy_timer);
244 pneigh_queue_purge(&tbl->proxy_queue);
245 return 0;
248 static struct neighbour *neigh_alloc(struct neigh_table *tbl)
250 struct neighbour *n = NULL;
251 unsigned long now = jiffies;
252 int entries;
254 entries = atomic_inc_return(&tbl->entries) - 1;
255 if (entries >= tbl->gc_thresh3 ||
256 (entries >= tbl->gc_thresh2 &&
257 time_after(now, tbl->last_flush + 5 * HZ))) {
258 if (!neigh_forced_gc(tbl) &&
259 entries >= tbl->gc_thresh3)
260 goto out_entries;
263 n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC);
264 if (!n)
265 goto out_entries;
267 skb_queue_head_init(&n->arp_queue);
268 rwlock_init(&n->lock);
269 n->updated = n->used = now;
270 n->nud_state = NUD_NONE;
271 n->output = neigh_blackhole;
272 n->parms = neigh_parms_clone(&tbl->parms);
273 init_timer(&n->timer);
274 n->timer.function = neigh_timer_handler;
275 n->timer.data = (unsigned long)n;
277 NEIGH_CACHE_STAT_INC(tbl, allocs);
278 n->tbl = tbl;
279 atomic_set(&n->refcnt, 1);
280 n->dead = 1;
281 out:
282 return n;
284 out_entries:
285 atomic_dec(&tbl->entries);
286 goto out;
289 static struct neighbour **neigh_hash_alloc(unsigned int entries)
291 unsigned long size = entries * sizeof(struct neighbour *);
292 struct neighbour **ret;
294 if (size <= PAGE_SIZE) {
295 ret = kzalloc(size, GFP_ATOMIC);
296 } else {
297 ret = (struct neighbour **)
298 __get_free_pages(GFP_ATOMIC|__GFP_ZERO, get_order(size));
300 return ret;
303 static void neigh_hash_free(struct neighbour **hash, unsigned int entries)
305 unsigned long size = entries * sizeof(struct neighbour *);
307 if (size <= PAGE_SIZE)
308 kfree(hash);
309 else
310 free_pages((unsigned long)hash, get_order(size));
313 static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries)
315 struct neighbour **new_hash, **old_hash;
316 unsigned int i, new_hash_mask, old_entries;
318 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
320 BUG_ON(!is_power_of_2(new_entries));
321 new_hash = neigh_hash_alloc(new_entries);
322 if (!new_hash)
323 return;
325 old_entries = tbl->hash_mask + 1;
326 new_hash_mask = new_entries - 1;
327 old_hash = tbl->hash_buckets;
329 get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
330 for (i = 0; i < old_entries; i++) {
331 struct neighbour *n, *next;
333 for (n = old_hash[i]; n; n = next) {
334 unsigned int hash_val = tbl->hash(n->primary_key, n->dev);
336 hash_val &= new_hash_mask;
337 next = n->next;
339 n->next = new_hash[hash_val];
340 new_hash[hash_val] = n;
343 tbl->hash_buckets = new_hash;
344 tbl->hash_mask = new_hash_mask;
346 neigh_hash_free(old_hash, old_entries);
349 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
350 struct net_device *dev)
352 struct neighbour *n;
353 int key_len = tbl->key_len;
354 u32 hash_val = tbl->hash(pkey, dev);
356 NEIGH_CACHE_STAT_INC(tbl, lookups);
358 read_lock_bh(&tbl->lock);
359 for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
360 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
361 neigh_hold(n);
362 NEIGH_CACHE_STAT_INC(tbl, hits);
363 break;
366 read_unlock_bh(&tbl->lock);
367 return n;
370 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey)
372 struct neighbour *n;
373 int key_len = tbl->key_len;
374 u32 hash_val = tbl->hash(pkey, NULL);
376 NEIGH_CACHE_STAT_INC(tbl, lookups);
378 read_lock_bh(&tbl->lock);
379 for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
380 if (!memcmp(n->primary_key, pkey, key_len)) {
381 neigh_hold(n);
382 NEIGH_CACHE_STAT_INC(tbl, hits);
383 break;
386 read_unlock_bh(&tbl->lock);
387 return n;
390 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
391 struct net_device *dev)
393 u32 hash_val;
394 int key_len = tbl->key_len;
395 int error;
396 struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
398 if (!n) {
399 rc = ERR_PTR(-ENOBUFS);
400 goto out;
403 memcpy(n->primary_key, pkey, key_len);
404 n->dev = dev;
405 dev_hold(dev);
407 /* Protocol specific setup. */
408 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
409 rc = ERR_PTR(error);
410 goto out_neigh_release;
413 /* Device specific setup. */
414 if (n->parms->neigh_setup &&
415 (error = n->parms->neigh_setup(n)) < 0) {
416 rc = ERR_PTR(error);
417 goto out_neigh_release;
420 n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
422 write_lock_bh(&tbl->lock);
424 if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1))
425 neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1);
427 hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;
429 if (n->parms->dead) {
430 rc = ERR_PTR(-EINVAL);
431 goto out_tbl_unlock;
434 for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {
435 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
436 neigh_hold(n1);
437 rc = n1;
438 goto out_tbl_unlock;
442 n->next = tbl->hash_buckets[hash_val];
443 tbl->hash_buckets[hash_val] = n;
444 n->dead = 0;
445 neigh_hold(n);
446 write_unlock_bh(&tbl->lock);
447 NEIGH_PRINTK2("neigh %p is created.\n", n);
448 rc = n;
449 out:
450 return rc;
451 out_tbl_unlock:
452 write_unlock_bh(&tbl->lock);
453 out_neigh_release:
454 neigh_release(n);
455 goto out;
458 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
459 struct net_device *dev, int creat)
461 struct pneigh_entry *n;
462 int key_len = tbl->key_len;
463 u32 hash_val = *(u32 *)(pkey + key_len - 4);
465 hash_val ^= (hash_val >> 16);
466 hash_val ^= hash_val >> 8;
467 hash_val ^= hash_val >> 4;
468 hash_val &= PNEIGH_HASHMASK;
470 read_lock_bh(&tbl->lock);
472 for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
473 if (!memcmp(n->key, pkey, key_len) &&
474 (n->dev == dev || !n->dev)) {
475 read_unlock_bh(&tbl->lock);
476 goto out;
479 read_unlock_bh(&tbl->lock);
480 n = NULL;
481 if (!creat)
482 goto out;
484 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
485 if (!n)
486 goto out;
488 memcpy(n->key, pkey, key_len);
489 n->dev = dev;
490 if (dev)
491 dev_hold(dev);
493 if (tbl->pconstructor && tbl->pconstructor(n)) {
494 if (dev)
495 dev_put(dev);
496 kfree(n);
497 n = NULL;
498 goto out;
501 write_lock_bh(&tbl->lock);
502 n->next = tbl->phash_buckets[hash_val];
503 tbl->phash_buckets[hash_val] = n;
504 write_unlock_bh(&tbl->lock);
505 out:
506 return n;
510 int pneigh_delete(struct neigh_table *tbl, const void *pkey,
511 struct net_device *dev)
513 struct pneigh_entry *n, **np;
514 int key_len = tbl->key_len;
515 u32 hash_val = *(u32 *)(pkey + key_len - 4);
517 hash_val ^= (hash_val >> 16);
518 hash_val ^= hash_val >> 8;
519 hash_val ^= hash_val >> 4;
520 hash_val &= PNEIGH_HASHMASK;
522 write_lock_bh(&tbl->lock);
523 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
524 np = &n->next) {
525 if (!memcmp(n->key, pkey, key_len) && n->dev == dev) {
526 *np = n->next;
527 write_unlock_bh(&tbl->lock);
528 if (tbl->pdestructor)
529 tbl->pdestructor(n);
530 if (n->dev)
531 dev_put(n->dev);
532 kfree(n);
533 return 0;
536 write_unlock_bh(&tbl->lock);
537 return -ENOENT;
540 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
542 struct pneigh_entry *n, **np;
543 u32 h;
545 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
546 np = &tbl->phash_buckets[h];
547 while ((n = *np) != NULL) {
548 if (!dev || n->dev == dev) {
549 *np = n->next;
550 if (tbl->pdestructor)
551 tbl->pdestructor(n);
552 if (n->dev)
553 dev_put(n->dev);
554 kfree(n);
555 continue;
557 np = &n->next;
560 return -ENOENT;
565 * neighbour must already be out of the table;
568 void neigh_destroy(struct neighbour *neigh)
570 struct hh_cache *hh;
572 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
574 if (!neigh->dead) {
575 printk(KERN_WARNING
576 "Destroying alive neighbour %p\n", neigh);
577 dump_stack();
578 return;
581 if (neigh_del_timer(neigh))
582 printk(KERN_WARNING "Impossible event.\n");
584 while ((hh = neigh->hh) != NULL) {
585 neigh->hh = hh->hh_next;
586 hh->hh_next = NULL;
588 write_seqlock_bh(&hh->hh_lock);
589 hh->hh_output = neigh_blackhole;
590 write_sequnlock_bh(&hh->hh_lock);
591 if (atomic_dec_and_test(&hh->hh_refcnt))
592 kfree(hh);
595 skb_queue_purge(&neigh->arp_queue);
597 dev_put(neigh->dev);
598 neigh_parms_put(neigh->parms);
600 NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
602 atomic_dec(&neigh->tbl->entries);
603 kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
606 /* Neighbour state is suspicious;
607 disable fast path.
609 Called with write_locked neigh.
611 static void neigh_suspect(struct neighbour *neigh)
613 struct hh_cache *hh;
615 NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
617 neigh->output = neigh->ops->output;
619 for (hh = neigh->hh; hh; hh = hh->hh_next)
620 hh->hh_output = neigh->ops->output;
623 /* Neighbour state is OK;
624 enable fast path.
626 Called with write_locked neigh.
628 static void neigh_connect(struct neighbour *neigh)
630 struct hh_cache *hh;
632 NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
634 neigh->output = neigh->ops->connected_output;
636 for (hh = neigh->hh; hh; hh = hh->hh_next)
637 hh->hh_output = neigh->ops->hh_output;
640 static void neigh_periodic_timer(unsigned long arg)
642 struct neigh_table *tbl = (struct neigh_table *)arg;
643 struct neighbour *n, **np;
644 unsigned long expire, now = jiffies;
646 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
648 write_lock(&tbl->lock);
651 * periodically recompute ReachableTime from random function
654 if (time_after(now, tbl->last_rand + 300 * HZ)) {
655 struct neigh_parms *p;
656 tbl->last_rand = now;
657 for (p = &tbl->parms; p; p = p->next)
658 p->reachable_time =
659 neigh_rand_reach_time(p->base_reachable_time);
662 np = &tbl->hash_buckets[tbl->hash_chain_gc];
663 tbl->hash_chain_gc = ((tbl->hash_chain_gc + 1) & tbl->hash_mask);
665 while ((n = *np) != NULL) {
666 unsigned int state;
668 write_lock(&n->lock);
670 state = n->nud_state;
671 if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
672 write_unlock(&n->lock);
673 goto next_elt;
676 if (time_before(n->used, n->confirmed))
677 n->used = n->confirmed;
679 if (atomic_read(&n->refcnt) == 1 &&
680 (state == NUD_FAILED ||
681 time_after(now, n->used + n->parms->gc_staletime))) {
682 *np = n->next;
683 n->dead = 1;
684 write_unlock(&n->lock);
685 neigh_cleanup_and_release(n);
686 continue;
688 write_unlock(&n->lock);
690 next_elt:
691 np = &n->next;
694 /* Cycle through all hash buckets every base_reachable_time/2 ticks.
695 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
696 * base_reachable_time.
698 expire = tbl->parms.base_reachable_time >> 1;
699 expire /= (tbl->hash_mask + 1);
700 if (!expire)
701 expire = 1;
703 if (expire>HZ)
704 mod_timer(&tbl->gc_timer, round_jiffies(now + expire));
705 else
706 mod_timer(&tbl->gc_timer, now + expire);
708 write_unlock(&tbl->lock);
711 static __inline__ int neigh_max_probes(struct neighbour *n)
713 struct neigh_parms *p = n->parms;
714 return (n->nud_state & NUD_PROBE ?
715 p->ucast_probes :
716 p->ucast_probes + p->app_probes + p->mcast_probes);
719 static inline void neigh_add_timer(struct neighbour *n, unsigned long when)
721 if (unlikely(mod_timer(&n->timer, when))) {
722 printk("NEIGH: BUG, double timer add, state is %x\n",
723 n->nud_state);
724 dump_stack();
728 /* Called when a timer expires for a neighbour entry. */
730 static void neigh_timer_handler(unsigned long arg)
732 unsigned long now, next;
733 struct neighbour *neigh = (struct neighbour *)arg;
734 unsigned state;
735 int notify = 0;
737 write_lock(&neigh->lock);
739 state = neigh->nud_state;
740 now = jiffies;
741 next = now + HZ;
743 if (!(state & NUD_IN_TIMER)) {
744 #ifndef CONFIG_SMP
745 printk(KERN_WARNING "neigh: timer & !nud_in_timer\n");
746 #endif
747 goto out;
750 if (state & NUD_REACHABLE) {
751 if (time_before_eq(now,
752 neigh->confirmed + neigh->parms->reachable_time)) {
753 NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
754 next = neigh->confirmed + neigh->parms->reachable_time;
755 } else if (time_before_eq(now,
756 neigh->used + neigh->parms->delay_probe_time)) {
757 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
758 neigh->nud_state = NUD_DELAY;
759 neigh->updated = jiffies;
760 neigh_suspect(neigh);
761 next = now + neigh->parms->delay_probe_time;
762 } else {
763 NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
764 neigh->nud_state = NUD_STALE;
765 neigh->updated = jiffies;
766 neigh_suspect(neigh);
767 notify = 1;
769 } else if (state & NUD_DELAY) {
770 if (time_before_eq(now,
771 neigh->confirmed + neigh->parms->delay_probe_time)) {
772 NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
773 neigh->nud_state = NUD_REACHABLE;
774 neigh->updated = jiffies;
775 neigh_connect(neigh);
776 notify = 1;
777 next = neigh->confirmed + neigh->parms->reachable_time;
778 } else {
779 NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
780 neigh->nud_state = NUD_PROBE;
781 neigh->updated = jiffies;
782 atomic_set(&neigh->probes, 0);
783 next = now + neigh->parms->retrans_time;
785 } else {
786 /* NUD_PROBE|NUD_INCOMPLETE */
787 next = now + neigh->parms->retrans_time;
790 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
791 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
792 struct sk_buff *skb;
794 neigh->nud_state = NUD_FAILED;
795 neigh->updated = jiffies;
796 notify = 1;
797 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
798 NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
800 /* It is very thin place. report_unreachable is very complicated
801 routine. Particularly, it can hit the same neighbour entry!
803 So that, we try to be accurate and avoid dead loop. --ANK
805 while (neigh->nud_state == NUD_FAILED &&
806 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
807 write_unlock(&neigh->lock);
808 neigh->ops->error_report(neigh, skb);
809 write_lock(&neigh->lock);
811 skb_queue_purge(&neigh->arp_queue);
814 if (neigh->nud_state & NUD_IN_TIMER) {
815 if (time_before(next, jiffies + HZ/2))
816 next = jiffies + HZ/2;
817 if (!mod_timer(&neigh->timer, next))
818 neigh_hold(neigh);
820 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
821 struct sk_buff *skb = skb_peek(&neigh->arp_queue);
822 /* keep skb alive even if arp_queue overflows */
823 if (skb)
824 skb_get(skb);
825 write_unlock(&neigh->lock);
826 neigh->ops->solicit(neigh, skb);
827 atomic_inc(&neigh->probes);
828 if (skb)
829 kfree_skb(skb);
830 } else {
831 out:
832 write_unlock(&neigh->lock);
835 if (notify)
836 neigh_update_notify(neigh);
838 neigh_release(neigh);
841 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
843 int rc;
844 unsigned long now;
846 write_lock_bh(&neigh->lock);
848 rc = 0;
849 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
850 goto out_unlock_bh;
852 now = jiffies;
854 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
855 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
856 atomic_set(&neigh->probes, neigh->parms->ucast_probes);
857 neigh->nud_state = NUD_INCOMPLETE;
858 neigh->updated = jiffies;
859 neigh_hold(neigh);
860 neigh_add_timer(neigh, now + 1);
861 } else {
862 neigh->nud_state = NUD_FAILED;
863 neigh->updated = jiffies;
864 write_unlock_bh(&neigh->lock);
866 if (skb)
867 kfree_skb(skb);
868 return 1;
870 } else if (neigh->nud_state & NUD_STALE) {
871 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
872 neigh_hold(neigh);
873 neigh->nud_state = NUD_DELAY;
874 neigh->updated = jiffies;
875 neigh_add_timer(neigh,
876 jiffies + neigh->parms->delay_probe_time);
879 if (neigh->nud_state == NUD_INCOMPLETE) {
880 if (skb) {
881 if (skb_queue_len(&neigh->arp_queue) >=
882 neigh->parms->queue_len) {
883 struct sk_buff *buff;
884 buff = neigh->arp_queue.next;
885 __skb_unlink(buff, &neigh->arp_queue);
886 kfree_skb(buff);
888 __skb_queue_tail(&neigh->arp_queue, skb);
890 rc = 1;
892 out_unlock_bh:
893 write_unlock_bh(&neigh->lock);
894 return rc;
897 static void neigh_update_hhs(struct neighbour *neigh)
899 struct hh_cache *hh;
900 void (*update)(struct hh_cache*, struct net_device*, unsigned char *) =
901 neigh->dev->header_cache_update;
903 if (update) {
904 for (hh = neigh->hh; hh; hh = hh->hh_next) {
905 write_seqlock_bh(&hh->hh_lock);
906 update(hh, neigh->dev, neigh->ha);
907 write_sequnlock_bh(&hh->hh_lock);
914 /* Generic update routine.
915 -- lladdr is new lladdr or NULL, if it is not supplied.
916 -- new is new state.
917 -- flags
918 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
919 if it is different.
920 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
921 lladdr instead of overriding it
922 if it is different.
923 It also allows to retain current state
924 if lladdr is unchanged.
925 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
927 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
928 NTF_ROUTER flag.
929 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
930 a router.
932 Caller MUST hold reference count on the entry.
935 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
936 u32 flags)
938 u8 old;
939 int err;
940 int notify = 0;
941 struct net_device *dev;
942 int update_isrouter = 0;
944 write_lock_bh(&neigh->lock);
946 dev = neigh->dev;
947 old = neigh->nud_state;
948 err = -EPERM;
950 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
951 (old & (NUD_NOARP | NUD_PERMANENT)))
952 goto out;
954 if (!(new & NUD_VALID)) {
955 neigh_del_timer(neigh);
956 if (old & NUD_CONNECTED)
957 neigh_suspect(neigh);
958 neigh->nud_state = new;
959 err = 0;
960 notify = old & NUD_VALID;
961 goto out;
964 /* Compare new lladdr with cached one */
965 if (!dev->addr_len) {
966 /* First case: device needs no address. */
967 lladdr = neigh->ha;
968 } else if (lladdr) {
969 /* The second case: if something is already cached
970 and a new address is proposed:
971 - compare new & old
972 - if they are different, check override flag
974 if ((old & NUD_VALID) &&
975 !memcmp(lladdr, neigh->ha, dev->addr_len))
976 lladdr = neigh->ha;
977 } else {
978 /* No address is supplied; if we know something,
979 use it, otherwise discard the request.
981 err = -EINVAL;
982 if (!(old & NUD_VALID))
983 goto out;
984 lladdr = neigh->ha;
987 if (new & NUD_CONNECTED)
988 neigh->confirmed = jiffies;
989 neigh->updated = jiffies;
991 /* If entry was valid and address is not changed,
992 do not change entry state, if new one is STALE.
994 err = 0;
995 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
996 if (old & NUD_VALID) {
997 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
998 update_isrouter = 0;
999 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1000 (old & NUD_CONNECTED)) {
1001 lladdr = neigh->ha;
1002 new = NUD_STALE;
1003 } else
1004 goto out;
1005 } else {
1006 if (lladdr == neigh->ha && new == NUD_STALE &&
1007 ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1008 (old & NUD_CONNECTED))
1010 new = old;
1014 if (new != old) {
1015 neigh_del_timer(neigh);
1016 if (new & NUD_IN_TIMER) {
1017 neigh_hold(neigh);
1018 neigh_add_timer(neigh, (jiffies +
1019 ((new & NUD_REACHABLE) ?
1020 neigh->parms->reachable_time :
1021 0)));
1023 neigh->nud_state = new;
1026 if (lladdr != neigh->ha) {
1027 memcpy(&neigh->ha, lladdr, dev->addr_len);
1028 neigh_update_hhs(neigh);
1029 if (!(new & NUD_CONNECTED))
1030 neigh->confirmed = jiffies -
1031 (neigh->parms->base_reachable_time << 1);
1032 notify = 1;
1034 if (new == old)
1035 goto out;
1036 if (new & NUD_CONNECTED)
1037 neigh_connect(neigh);
1038 else
1039 neigh_suspect(neigh);
1040 if (!(old & NUD_VALID)) {
1041 struct sk_buff *skb;
1043 /* Again: avoid dead loop if something went wrong */
1045 while (neigh->nud_state & NUD_VALID &&
1046 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1047 struct neighbour *n1 = neigh;
1048 write_unlock_bh(&neigh->lock);
1049 /* On shaper/eql skb->dst->neighbour != neigh :( */
1050 if (skb->dst && skb->dst->neighbour)
1051 n1 = skb->dst->neighbour;
1052 n1->output(skb);
1053 write_lock_bh(&neigh->lock);
1055 skb_queue_purge(&neigh->arp_queue);
1057 out:
1058 if (update_isrouter) {
1059 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1060 (neigh->flags | NTF_ROUTER) :
1061 (neigh->flags & ~NTF_ROUTER);
1063 write_unlock_bh(&neigh->lock);
1065 if (notify)
1066 neigh_update_notify(neigh);
1068 return err;
1071 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1072 u8 *lladdr, void *saddr,
1073 struct net_device *dev)
1075 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1076 lladdr || !dev->addr_len);
1077 if (neigh)
1078 neigh_update(neigh, lladdr, NUD_STALE,
1079 NEIGH_UPDATE_F_OVERRIDE);
1080 return neigh;
1083 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
1084 __be16 protocol)
1086 struct hh_cache *hh;
1087 struct net_device *dev = dst->dev;
1089 for (hh = n->hh; hh; hh = hh->hh_next)
1090 if (hh->hh_type == protocol)
1091 break;
1093 if (!hh && (hh = kzalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
1094 seqlock_init(&hh->hh_lock);
1095 hh->hh_type = protocol;
1096 atomic_set(&hh->hh_refcnt, 0);
1097 hh->hh_next = NULL;
1098 if (dev->hard_header_cache(n, hh)) {
1099 kfree(hh);
1100 hh = NULL;
1101 } else {
1102 atomic_inc(&hh->hh_refcnt);
1103 hh->hh_next = n->hh;
1104 n->hh = hh;
1105 if (n->nud_state & NUD_CONNECTED)
1106 hh->hh_output = n->ops->hh_output;
1107 else
1108 hh->hh_output = n->ops->output;
1111 if (hh) {
1112 atomic_inc(&hh->hh_refcnt);
1113 dst->hh = hh;
1117 /* This function can be used in contexts, where only old dev_queue_xmit
1118 worked, f.e. if you want to override normal output path (eql, shaper),
1119 but resolution is not made yet.
1122 int neigh_compat_output(struct sk_buff *skb)
1124 struct net_device *dev = skb->dev;
1126 __skb_pull(skb, skb_network_offset(skb));
1128 if (dev->hard_header &&
1129 dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1130 skb->len) < 0 &&
1131 dev->rebuild_header(skb))
1132 return 0;
1134 return dev_queue_xmit(skb);
1137 /* Slow and careful. */
1139 int neigh_resolve_output(struct sk_buff *skb)
1141 struct dst_entry *dst = skb->dst;
1142 struct neighbour *neigh;
1143 int rc = 0;
1145 if (!dst || !(neigh = dst->neighbour))
1146 goto discard;
1148 __skb_pull(skb, skb_network_offset(skb));
1150 if (!neigh_event_send(neigh, skb)) {
1151 int err;
1152 struct net_device *dev = neigh->dev;
1153 if (dev->hard_header_cache && !dst->hh) {
1154 write_lock_bh(&neigh->lock);
1155 if (!dst->hh)
1156 neigh_hh_init(neigh, dst, dst->ops->protocol);
1157 err = dev->hard_header(skb, dev, ntohs(skb->protocol),
1158 neigh->ha, NULL, skb->len);
1159 write_unlock_bh(&neigh->lock);
1160 } else {
1161 read_lock_bh(&neigh->lock);
1162 err = dev->hard_header(skb, dev, ntohs(skb->protocol),
1163 neigh->ha, NULL, skb->len);
1164 read_unlock_bh(&neigh->lock);
1166 if (err >= 0)
1167 rc = neigh->ops->queue_xmit(skb);
1168 else
1169 goto out_kfree_skb;
1171 out:
1172 return rc;
1173 discard:
1174 NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1175 dst, dst ? dst->neighbour : NULL);
1176 out_kfree_skb:
1177 rc = -EINVAL;
1178 kfree_skb(skb);
1179 goto out;
1182 /* As fast as possible without hh cache */
1184 int neigh_connected_output(struct sk_buff *skb)
1186 int err;
1187 struct dst_entry *dst = skb->dst;
1188 struct neighbour *neigh = dst->neighbour;
1189 struct net_device *dev = neigh->dev;
1191 __skb_pull(skb, skb_network_offset(skb));
1193 read_lock_bh(&neigh->lock);
1194 err = dev->hard_header(skb, dev, ntohs(skb->protocol),
1195 neigh->ha, NULL, skb->len);
1196 read_unlock_bh(&neigh->lock);
1197 if (err >= 0)
1198 err = neigh->ops->queue_xmit(skb);
1199 else {
1200 err = -EINVAL;
1201 kfree_skb(skb);
1203 return err;
1206 static void neigh_proxy_process(unsigned long arg)
1208 struct neigh_table *tbl = (struct neigh_table *)arg;
1209 long sched_next = 0;
1210 unsigned long now = jiffies;
1211 struct sk_buff *skb;
1213 spin_lock(&tbl->proxy_queue.lock);
1215 skb = tbl->proxy_queue.next;
1217 while (skb != (struct sk_buff *)&tbl->proxy_queue) {
1218 struct sk_buff *back = skb;
1219 long tdif = NEIGH_CB(back)->sched_next - now;
1221 skb = skb->next;
1222 if (tdif <= 0) {
1223 struct net_device *dev = back->dev;
1224 __skb_unlink(back, &tbl->proxy_queue);
1225 if (tbl->proxy_redo && netif_running(dev))
1226 tbl->proxy_redo(back);
1227 else
1228 kfree_skb(back);
1230 dev_put(dev);
1231 } else if (!sched_next || tdif < sched_next)
1232 sched_next = tdif;
1234 del_timer(&tbl->proxy_timer);
1235 if (sched_next)
1236 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1237 spin_unlock(&tbl->proxy_queue.lock);
1240 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1241 struct sk_buff *skb)
1243 unsigned long now = jiffies;
1244 unsigned long sched_next = now + (net_random() % p->proxy_delay);
1246 if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1247 kfree_skb(skb);
1248 return;
1251 NEIGH_CB(skb)->sched_next = sched_next;
1252 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1254 spin_lock(&tbl->proxy_queue.lock);
1255 if (del_timer(&tbl->proxy_timer)) {
1256 if (time_before(tbl->proxy_timer.expires, sched_next))
1257 sched_next = tbl->proxy_timer.expires;
1259 dst_release(skb->dst);
1260 skb->dst = NULL;
1261 dev_hold(skb->dev);
1262 __skb_queue_tail(&tbl->proxy_queue, skb);
1263 mod_timer(&tbl->proxy_timer, sched_next);
1264 spin_unlock(&tbl->proxy_queue.lock);
1268 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1269 struct neigh_table *tbl)
1271 struct neigh_parms *p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1273 if (p) {
1274 p->tbl = tbl;
1275 atomic_set(&p->refcnt, 1);
1276 INIT_RCU_HEAD(&p->rcu_head);
1277 p->reachable_time =
1278 neigh_rand_reach_time(p->base_reachable_time);
1279 if (dev) {
1280 if (dev->neigh_setup && dev->neigh_setup(dev, p)) {
1281 kfree(p);
1282 return NULL;
1285 dev_hold(dev);
1286 p->dev = dev;
1288 p->sysctl_table = NULL;
1289 write_lock_bh(&tbl->lock);
1290 p->next = tbl->parms.next;
1291 tbl->parms.next = p;
1292 write_unlock_bh(&tbl->lock);
1294 return p;
1297 static void neigh_rcu_free_parms(struct rcu_head *head)
1299 struct neigh_parms *parms =
1300 container_of(head, struct neigh_parms, rcu_head);
1302 neigh_parms_put(parms);
1305 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1307 struct neigh_parms **p;
1309 if (!parms || parms == &tbl->parms)
1310 return;
1311 write_lock_bh(&tbl->lock);
1312 for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1313 if (*p == parms) {
1314 *p = parms->next;
1315 parms->dead = 1;
1316 write_unlock_bh(&tbl->lock);
1317 if (parms->dev)
1318 dev_put(parms->dev);
1319 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1320 return;
1323 write_unlock_bh(&tbl->lock);
1324 NEIGH_PRINTK1("neigh_parms_release: not found\n");
1327 void neigh_parms_destroy(struct neigh_parms *parms)
1329 kfree(parms);
1332 static struct lock_class_key neigh_table_proxy_queue_class;
1334 void neigh_table_init_no_netlink(struct neigh_table *tbl)
1336 unsigned long now = jiffies;
1337 unsigned long phsize;
1339 atomic_set(&tbl->parms.refcnt, 1);
1340 INIT_RCU_HEAD(&tbl->parms.rcu_head);
1341 tbl->parms.reachable_time =
1342 neigh_rand_reach_time(tbl->parms.base_reachable_time);
1344 if (!tbl->kmem_cachep)
1345 tbl->kmem_cachep =
1346 kmem_cache_create(tbl->id, tbl->entry_size, 0,
1347 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1348 NULL);
1349 tbl->stats = alloc_percpu(struct neigh_statistics);
1350 if (!tbl->stats)
1351 panic("cannot create neighbour cache statistics");
1353 #ifdef CONFIG_PROC_FS
1354 tbl->pde = create_proc_entry(tbl->id, 0, init_net.proc_net_stat);
1355 if (!tbl->pde)
1356 panic("cannot create neighbour proc dir entry");
1357 tbl->pde->proc_fops = &neigh_stat_seq_fops;
1358 tbl->pde->data = tbl;
1359 #endif
1361 tbl->hash_mask = 1;
1362 tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1);
1364 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1365 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1367 if (!tbl->hash_buckets || !tbl->phash_buckets)
1368 panic("cannot allocate neighbour cache hashes");
1370 get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
1372 rwlock_init(&tbl->lock);
1373 init_timer(&tbl->gc_timer);
1374 tbl->gc_timer.data = (unsigned long)tbl;
1375 tbl->gc_timer.function = neigh_periodic_timer;
1376 tbl->gc_timer.expires = now + 1;
1377 add_timer(&tbl->gc_timer);
1379 init_timer(&tbl->proxy_timer);
1380 tbl->proxy_timer.data = (unsigned long)tbl;
1381 tbl->proxy_timer.function = neigh_proxy_process;
1382 skb_queue_head_init_class(&tbl->proxy_queue,
1383 &neigh_table_proxy_queue_class);
1385 tbl->last_flush = now;
1386 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1389 void neigh_table_init(struct neigh_table *tbl)
1391 struct neigh_table *tmp;
1393 neigh_table_init_no_netlink(tbl);
1394 write_lock(&neigh_tbl_lock);
1395 for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1396 if (tmp->family == tbl->family)
1397 break;
1399 tbl->next = neigh_tables;
1400 neigh_tables = tbl;
1401 write_unlock(&neigh_tbl_lock);
1403 if (unlikely(tmp)) {
1404 printk(KERN_ERR "NEIGH: Registering multiple tables for "
1405 "family %d\n", tbl->family);
1406 dump_stack();
1410 int neigh_table_clear(struct neigh_table *tbl)
1412 struct neigh_table **tp;
1414 /* It is not clean... Fix it to unload IPv6 module safely */
1415 del_timer_sync(&tbl->gc_timer);
1416 del_timer_sync(&tbl->proxy_timer);
1417 pneigh_queue_purge(&tbl->proxy_queue);
1418 neigh_ifdown(tbl, NULL);
1419 if (atomic_read(&tbl->entries))
1420 printk(KERN_CRIT "neighbour leakage\n");
1421 write_lock(&neigh_tbl_lock);
1422 for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1423 if (*tp == tbl) {
1424 *tp = tbl->next;
1425 break;
1428 write_unlock(&neigh_tbl_lock);
1430 neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1);
1431 tbl->hash_buckets = NULL;
1433 kfree(tbl->phash_buckets);
1434 tbl->phash_buckets = NULL;
1436 free_percpu(tbl->stats);
1437 tbl->stats = NULL;
1439 return 0;
1442 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1444 struct net *net = skb->sk->sk_net;
1445 struct ndmsg *ndm;
1446 struct nlattr *dst_attr;
1447 struct neigh_table *tbl;
1448 struct net_device *dev = NULL;
1449 int err = -EINVAL;
1451 if (nlmsg_len(nlh) < sizeof(*ndm))
1452 goto out;
1454 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1455 if (dst_attr == NULL)
1456 goto out;
1458 ndm = nlmsg_data(nlh);
1459 if (ndm->ndm_ifindex) {
1460 dev = dev_get_by_index(net, ndm->ndm_ifindex);
1461 if (dev == NULL) {
1462 err = -ENODEV;
1463 goto out;
1467 read_lock(&neigh_tbl_lock);
1468 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1469 struct neighbour *neigh;
1471 if (tbl->family != ndm->ndm_family)
1472 continue;
1473 read_unlock(&neigh_tbl_lock);
1475 if (nla_len(dst_attr) < tbl->key_len)
1476 goto out_dev_put;
1478 if (ndm->ndm_flags & NTF_PROXY) {
1479 err = pneigh_delete(tbl, nla_data(dst_attr), dev);
1480 goto out_dev_put;
1483 if (dev == NULL)
1484 goto out_dev_put;
1486 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1487 if (neigh == NULL) {
1488 err = -ENOENT;
1489 goto out_dev_put;
1492 err = neigh_update(neigh, NULL, NUD_FAILED,
1493 NEIGH_UPDATE_F_OVERRIDE |
1494 NEIGH_UPDATE_F_ADMIN);
1495 neigh_release(neigh);
1496 goto out_dev_put;
1498 read_unlock(&neigh_tbl_lock);
1499 err = -EAFNOSUPPORT;
1501 out_dev_put:
1502 if (dev)
1503 dev_put(dev);
1504 out:
1505 return err;
1508 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1510 struct net *net = skb->sk->sk_net;
1511 struct ndmsg *ndm;
1512 struct nlattr *tb[NDA_MAX+1];
1513 struct neigh_table *tbl;
1514 struct net_device *dev = NULL;
1515 int err;
1517 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1518 if (err < 0)
1519 goto out;
1521 err = -EINVAL;
1522 if (tb[NDA_DST] == NULL)
1523 goto out;
1525 ndm = nlmsg_data(nlh);
1526 if (ndm->ndm_ifindex) {
1527 dev = dev_get_by_index(net, ndm->ndm_ifindex);
1528 if (dev == NULL) {
1529 err = -ENODEV;
1530 goto out;
1533 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1534 goto out_dev_put;
1537 read_lock(&neigh_tbl_lock);
1538 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1539 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1540 struct neighbour *neigh;
1541 void *dst, *lladdr;
1543 if (tbl->family != ndm->ndm_family)
1544 continue;
1545 read_unlock(&neigh_tbl_lock);
1547 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1548 goto out_dev_put;
1549 dst = nla_data(tb[NDA_DST]);
1550 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1552 if (ndm->ndm_flags & NTF_PROXY) {
1553 struct pneigh_entry *pn;
1555 err = -ENOBUFS;
1556 pn = pneigh_lookup(tbl, dst, dev, 1);
1557 if (pn) {
1558 pn->flags = ndm->ndm_flags;
1559 err = 0;
1561 goto out_dev_put;
1564 if (dev == NULL)
1565 goto out_dev_put;
1567 neigh = neigh_lookup(tbl, dst, dev);
1568 if (neigh == NULL) {
1569 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1570 err = -ENOENT;
1571 goto out_dev_put;
1574 neigh = __neigh_lookup_errno(tbl, dst, dev);
1575 if (IS_ERR(neigh)) {
1576 err = PTR_ERR(neigh);
1577 goto out_dev_put;
1579 } else {
1580 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1581 err = -EEXIST;
1582 neigh_release(neigh);
1583 goto out_dev_put;
1586 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1587 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1590 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1591 neigh_release(neigh);
1592 goto out_dev_put;
1595 read_unlock(&neigh_tbl_lock);
1596 err = -EAFNOSUPPORT;
1598 out_dev_put:
1599 if (dev)
1600 dev_put(dev);
1601 out:
1602 return err;
1605 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1607 struct nlattr *nest;
1609 nest = nla_nest_start(skb, NDTA_PARMS);
1610 if (nest == NULL)
1611 return -ENOBUFS;
1613 if (parms->dev)
1614 NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1616 NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1617 NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
1618 NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1619 NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1620 NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1621 NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1622 NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1623 NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1624 parms->base_reachable_time);
1625 NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1626 NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1627 NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1628 NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1629 NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1630 NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1632 return nla_nest_end(skb, nest);
1634 nla_put_failure:
1635 return nla_nest_cancel(skb, nest);
1638 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1639 u32 pid, u32 seq, int type, int flags)
1641 struct nlmsghdr *nlh;
1642 struct ndtmsg *ndtmsg;
1644 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1645 if (nlh == NULL)
1646 return -EMSGSIZE;
1648 ndtmsg = nlmsg_data(nlh);
1650 read_lock_bh(&tbl->lock);
1651 ndtmsg->ndtm_family = tbl->family;
1652 ndtmsg->ndtm_pad1 = 0;
1653 ndtmsg->ndtm_pad2 = 0;
1655 NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1656 NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1657 NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1658 NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1659 NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1662 unsigned long now = jiffies;
1663 unsigned int flush_delta = now - tbl->last_flush;
1664 unsigned int rand_delta = now - tbl->last_rand;
1666 struct ndt_config ndc = {
1667 .ndtc_key_len = tbl->key_len,
1668 .ndtc_entry_size = tbl->entry_size,
1669 .ndtc_entries = atomic_read(&tbl->entries),
1670 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
1671 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
1672 .ndtc_hash_rnd = tbl->hash_rnd,
1673 .ndtc_hash_mask = tbl->hash_mask,
1674 .ndtc_hash_chain_gc = tbl->hash_chain_gc,
1675 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
1678 NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1682 int cpu;
1683 struct ndt_stats ndst;
1685 memset(&ndst, 0, sizeof(ndst));
1687 for_each_possible_cpu(cpu) {
1688 struct neigh_statistics *st;
1690 st = per_cpu_ptr(tbl->stats, cpu);
1691 ndst.ndts_allocs += st->allocs;
1692 ndst.ndts_destroys += st->destroys;
1693 ndst.ndts_hash_grows += st->hash_grows;
1694 ndst.ndts_res_failed += st->res_failed;
1695 ndst.ndts_lookups += st->lookups;
1696 ndst.ndts_hits += st->hits;
1697 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
1698 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
1699 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
1700 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
1703 NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1706 BUG_ON(tbl->parms.dev);
1707 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1708 goto nla_put_failure;
1710 read_unlock_bh(&tbl->lock);
1711 return nlmsg_end(skb, nlh);
1713 nla_put_failure:
1714 read_unlock_bh(&tbl->lock);
1715 nlmsg_cancel(skb, nlh);
1716 return -EMSGSIZE;
1719 static int neightbl_fill_param_info(struct sk_buff *skb,
1720 struct neigh_table *tbl,
1721 struct neigh_parms *parms,
1722 u32 pid, u32 seq, int type,
1723 unsigned int flags)
1725 struct ndtmsg *ndtmsg;
1726 struct nlmsghdr *nlh;
1728 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1729 if (nlh == NULL)
1730 return -EMSGSIZE;
1732 ndtmsg = nlmsg_data(nlh);
1734 read_lock_bh(&tbl->lock);
1735 ndtmsg->ndtm_family = tbl->family;
1736 ndtmsg->ndtm_pad1 = 0;
1737 ndtmsg->ndtm_pad2 = 0;
1739 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1740 neightbl_fill_parms(skb, parms) < 0)
1741 goto errout;
1743 read_unlock_bh(&tbl->lock);
1744 return nlmsg_end(skb, nlh);
1745 errout:
1746 read_unlock_bh(&tbl->lock);
1747 nlmsg_cancel(skb, nlh);
1748 return -EMSGSIZE;
1751 static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
1752 int ifindex)
1754 struct neigh_parms *p;
1756 for (p = &tbl->parms; p; p = p->next)
1757 if ((p->dev && p->dev->ifindex == ifindex) ||
1758 (!p->dev && !ifindex))
1759 return p;
1761 return NULL;
1764 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1765 [NDTA_NAME] = { .type = NLA_STRING },
1766 [NDTA_THRESH1] = { .type = NLA_U32 },
1767 [NDTA_THRESH2] = { .type = NLA_U32 },
1768 [NDTA_THRESH3] = { .type = NLA_U32 },
1769 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
1770 [NDTA_PARMS] = { .type = NLA_NESTED },
1773 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1774 [NDTPA_IFINDEX] = { .type = NLA_U32 },
1775 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
1776 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
1777 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
1778 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
1779 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
1780 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
1781 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
1782 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
1783 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
1784 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
1785 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
1786 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
1789 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1791 struct neigh_table *tbl;
1792 struct ndtmsg *ndtmsg;
1793 struct nlattr *tb[NDTA_MAX+1];
1794 int err;
1796 err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1797 nl_neightbl_policy);
1798 if (err < 0)
1799 goto errout;
1801 if (tb[NDTA_NAME] == NULL) {
1802 err = -EINVAL;
1803 goto errout;
1806 ndtmsg = nlmsg_data(nlh);
1807 read_lock(&neigh_tbl_lock);
1808 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1809 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1810 continue;
1812 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1813 break;
1816 if (tbl == NULL) {
1817 err = -ENOENT;
1818 goto errout_locked;
1822 * We acquire tbl->lock to be nice to the periodic timers and
1823 * make sure they always see a consistent set of values.
1825 write_lock_bh(&tbl->lock);
1827 if (tb[NDTA_PARMS]) {
1828 struct nlattr *tbp[NDTPA_MAX+1];
1829 struct neigh_parms *p;
1830 int i, ifindex = 0;
1832 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1833 nl_ntbl_parm_policy);
1834 if (err < 0)
1835 goto errout_tbl_lock;
1837 if (tbp[NDTPA_IFINDEX])
1838 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1840 p = lookup_neigh_params(tbl, ifindex);
1841 if (p == NULL) {
1842 err = -ENOENT;
1843 goto errout_tbl_lock;
1846 for (i = 1; i <= NDTPA_MAX; i++) {
1847 if (tbp[i] == NULL)
1848 continue;
1850 switch (i) {
1851 case NDTPA_QUEUE_LEN:
1852 p->queue_len = nla_get_u32(tbp[i]);
1853 break;
1854 case NDTPA_PROXY_QLEN:
1855 p->proxy_qlen = nla_get_u32(tbp[i]);
1856 break;
1857 case NDTPA_APP_PROBES:
1858 p->app_probes = nla_get_u32(tbp[i]);
1859 break;
1860 case NDTPA_UCAST_PROBES:
1861 p->ucast_probes = nla_get_u32(tbp[i]);
1862 break;
1863 case NDTPA_MCAST_PROBES:
1864 p->mcast_probes = nla_get_u32(tbp[i]);
1865 break;
1866 case NDTPA_BASE_REACHABLE_TIME:
1867 p->base_reachable_time = nla_get_msecs(tbp[i]);
1868 break;
1869 case NDTPA_GC_STALETIME:
1870 p->gc_staletime = nla_get_msecs(tbp[i]);
1871 break;
1872 case NDTPA_DELAY_PROBE_TIME:
1873 p->delay_probe_time = nla_get_msecs(tbp[i]);
1874 break;
1875 case NDTPA_RETRANS_TIME:
1876 p->retrans_time = nla_get_msecs(tbp[i]);
1877 break;
1878 case NDTPA_ANYCAST_DELAY:
1879 p->anycast_delay = nla_get_msecs(tbp[i]);
1880 break;
1881 case NDTPA_PROXY_DELAY:
1882 p->proxy_delay = nla_get_msecs(tbp[i]);
1883 break;
1884 case NDTPA_LOCKTIME:
1885 p->locktime = nla_get_msecs(tbp[i]);
1886 break;
1891 if (tb[NDTA_THRESH1])
1892 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
1894 if (tb[NDTA_THRESH2])
1895 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
1897 if (tb[NDTA_THRESH3])
1898 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
1900 if (tb[NDTA_GC_INTERVAL])
1901 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
1903 err = 0;
1905 errout_tbl_lock:
1906 write_unlock_bh(&tbl->lock);
1907 errout_locked:
1908 read_unlock(&neigh_tbl_lock);
1909 errout:
1910 return err;
1913 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
1915 int family, tidx, nidx = 0;
1916 int tbl_skip = cb->args[0];
1917 int neigh_skip = cb->args[1];
1918 struct neigh_table *tbl;
1920 family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
1922 read_lock(&neigh_tbl_lock);
1923 for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
1924 struct neigh_parms *p;
1926 if (tidx < tbl_skip || (family && tbl->family != family))
1927 continue;
1929 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
1930 cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
1931 NLM_F_MULTI) <= 0)
1932 break;
1934 for (nidx = 0, p = tbl->parms.next; p; p = p->next, nidx++) {
1935 if (nidx < neigh_skip)
1936 continue;
1938 if (neightbl_fill_param_info(skb, tbl, p,
1939 NETLINK_CB(cb->skb).pid,
1940 cb->nlh->nlmsg_seq,
1941 RTM_NEWNEIGHTBL,
1942 NLM_F_MULTI) <= 0)
1943 goto out;
1946 neigh_skip = 0;
1948 out:
1949 read_unlock(&neigh_tbl_lock);
1950 cb->args[0] = tidx;
1951 cb->args[1] = nidx;
1953 return skb->len;
1956 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
1957 u32 pid, u32 seq, int type, unsigned int flags)
1959 unsigned long now = jiffies;
1960 struct nda_cacheinfo ci;
1961 struct nlmsghdr *nlh;
1962 struct ndmsg *ndm;
1964 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
1965 if (nlh == NULL)
1966 return -EMSGSIZE;
1968 ndm = nlmsg_data(nlh);
1969 ndm->ndm_family = neigh->ops->family;
1970 ndm->ndm_pad1 = 0;
1971 ndm->ndm_pad2 = 0;
1972 ndm->ndm_flags = neigh->flags;
1973 ndm->ndm_type = neigh->type;
1974 ndm->ndm_ifindex = neigh->dev->ifindex;
1976 NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
1978 read_lock_bh(&neigh->lock);
1979 ndm->ndm_state = neigh->nud_state;
1980 if ((neigh->nud_state & NUD_VALID) &&
1981 nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) {
1982 read_unlock_bh(&neigh->lock);
1983 goto nla_put_failure;
1986 ci.ndm_used = now - neigh->used;
1987 ci.ndm_confirmed = now - neigh->confirmed;
1988 ci.ndm_updated = now - neigh->updated;
1989 ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1;
1990 read_unlock_bh(&neigh->lock);
1992 NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
1993 NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
1995 return nlmsg_end(skb, nlh);
1997 nla_put_failure:
1998 nlmsg_cancel(skb, nlh);
1999 return -EMSGSIZE;
2002 static void neigh_update_notify(struct neighbour *neigh)
2004 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2005 __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2008 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2009 struct netlink_callback *cb)
2011 struct neighbour *n;
2012 int rc, h, s_h = cb->args[1];
2013 int idx, s_idx = idx = cb->args[2];
2015 read_lock_bh(&tbl->lock);
2016 for (h = 0; h <= tbl->hash_mask; h++) {
2017 if (h < s_h)
2018 continue;
2019 if (h > s_h)
2020 s_idx = 0;
2021 for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) {
2022 if (idx < s_idx)
2023 continue;
2024 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2025 cb->nlh->nlmsg_seq,
2026 RTM_NEWNEIGH,
2027 NLM_F_MULTI) <= 0) {
2028 read_unlock_bh(&tbl->lock);
2029 rc = -1;
2030 goto out;
2034 read_unlock_bh(&tbl->lock);
2035 rc = skb->len;
2036 out:
2037 cb->args[1] = h;
2038 cb->args[2] = idx;
2039 return rc;
2042 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2044 struct neigh_table *tbl;
2045 int t, family, s_t;
2047 read_lock(&neigh_tbl_lock);
2048 family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2049 s_t = cb->args[0];
2051 for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
2052 if (t < s_t || (family && tbl->family != family))
2053 continue;
2054 if (t > s_t)
2055 memset(&cb->args[1], 0, sizeof(cb->args) -
2056 sizeof(cb->args[0]));
2057 if (neigh_dump_table(tbl, skb, cb) < 0)
2058 break;
2060 read_unlock(&neigh_tbl_lock);
2062 cb->args[0] = t;
2063 return skb->len;
2066 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2068 int chain;
2070 read_lock_bh(&tbl->lock);
2071 for (chain = 0; chain <= tbl->hash_mask; chain++) {
2072 struct neighbour *n;
2074 for (n = tbl->hash_buckets[chain]; n; n = n->next)
2075 cb(n, cookie);
2077 read_unlock_bh(&tbl->lock);
2079 EXPORT_SYMBOL(neigh_for_each);
2081 /* The tbl->lock must be held as a writer and BH disabled. */
2082 void __neigh_for_each_release(struct neigh_table *tbl,
2083 int (*cb)(struct neighbour *))
2085 int chain;
2087 for (chain = 0; chain <= tbl->hash_mask; chain++) {
2088 struct neighbour *n, **np;
2090 np = &tbl->hash_buckets[chain];
2091 while ((n = *np) != NULL) {
2092 int release;
2094 write_lock(&n->lock);
2095 release = cb(n);
2096 if (release) {
2097 *np = n->next;
2098 n->dead = 1;
2099 } else
2100 np = &n->next;
2101 write_unlock(&n->lock);
2102 if (release)
2103 neigh_cleanup_and_release(n);
2107 EXPORT_SYMBOL(__neigh_for_each_release);
2109 #ifdef CONFIG_PROC_FS
2111 static struct neighbour *neigh_get_first(struct seq_file *seq)
2113 struct neigh_seq_state *state = seq->private;
2114 struct neigh_table *tbl = state->tbl;
2115 struct neighbour *n = NULL;
2116 int bucket = state->bucket;
2118 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2119 for (bucket = 0; bucket <= tbl->hash_mask; bucket++) {
2120 n = tbl->hash_buckets[bucket];
2122 while (n) {
2123 if (state->neigh_sub_iter) {
2124 loff_t fakep = 0;
2125 void *v;
2127 v = state->neigh_sub_iter(state, n, &fakep);
2128 if (!v)
2129 goto next;
2131 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2132 break;
2133 if (n->nud_state & ~NUD_NOARP)
2134 break;
2135 next:
2136 n = n->next;
2139 if (n)
2140 break;
2142 state->bucket = bucket;
2144 return n;
2147 static struct neighbour *neigh_get_next(struct seq_file *seq,
2148 struct neighbour *n,
2149 loff_t *pos)
2151 struct neigh_seq_state *state = seq->private;
2152 struct neigh_table *tbl = state->tbl;
2154 if (state->neigh_sub_iter) {
2155 void *v = state->neigh_sub_iter(state, n, pos);
2156 if (v)
2157 return n;
2159 n = n->next;
2161 while (1) {
2162 while (n) {
2163 if (state->neigh_sub_iter) {
2164 void *v = state->neigh_sub_iter(state, n, pos);
2165 if (v)
2166 return n;
2167 goto next;
2169 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2170 break;
2172 if (n->nud_state & ~NUD_NOARP)
2173 break;
2174 next:
2175 n = n->next;
2178 if (n)
2179 break;
2181 if (++state->bucket > tbl->hash_mask)
2182 break;
2184 n = tbl->hash_buckets[state->bucket];
2187 if (n && pos)
2188 --(*pos);
2189 return n;
2192 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2194 struct neighbour *n = neigh_get_first(seq);
2196 if (n) {
2197 while (*pos) {
2198 n = neigh_get_next(seq, n, pos);
2199 if (!n)
2200 break;
2203 return *pos ? NULL : n;
2206 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2208 struct neigh_seq_state *state = seq->private;
2209 struct neigh_table *tbl = state->tbl;
2210 struct pneigh_entry *pn = NULL;
2211 int bucket = state->bucket;
2213 state->flags |= NEIGH_SEQ_IS_PNEIGH;
2214 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2215 pn = tbl->phash_buckets[bucket];
2216 if (pn)
2217 break;
2219 state->bucket = bucket;
2221 return pn;
2224 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2225 struct pneigh_entry *pn,
2226 loff_t *pos)
2228 struct neigh_seq_state *state = seq->private;
2229 struct neigh_table *tbl = state->tbl;
2231 pn = pn->next;
2232 while (!pn) {
2233 if (++state->bucket > PNEIGH_HASHMASK)
2234 break;
2235 pn = tbl->phash_buckets[state->bucket];
2236 if (pn)
2237 break;
2240 if (pn && pos)
2241 --(*pos);
2243 return pn;
2246 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2248 struct pneigh_entry *pn = pneigh_get_first(seq);
2250 if (pn) {
2251 while (*pos) {
2252 pn = pneigh_get_next(seq, pn, pos);
2253 if (!pn)
2254 break;
2257 return *pos ? NULL : pn;
2260 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2262 struct neigh_seq_state *state = seq->private;
2263 void *rc;
2265 rc = neigh_get_idx(seq, pos);
2266 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2267 rc = pneigh_get_idx(seq, pos);
2269 return rc;
2272 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2274 struct neigh_seq_state *state = seq->private;
2275 loff_t pos_minus_one;
2277 state->tbl = tbl;
2278 state->bucket = 0;
2279 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2281 read_lock_bh(&tbl->lock);
2283 pos_minus_one = *pos - 1;
2284 return *pos ? neigh_get_idx_any(seq, &pos_minus_one) : SEQ_START_TOKEN;
2286 EXPORT_SYMBOL(neigh_seq_start);
2288 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2290 struct neigh_seq_state *state;
2291 void *rc;
2293 if (v == SEQ_START_TOKEN) {
2294 rc = neigh_get_idx(seq, pos);
2295 goto out;
2298 state = seq->private;
2299 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2300 rc = neigh_get_next(seq, v, NULL);
2301 if (rc)
2302 goto out;
2303 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2304 rc = pneigh_get_first(seq);
2305 } else {
2306 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2307 rc = pneigh_get_next(seq, v, NULL);
2309 out:
2310 ++(*pos);
2311 return rc;
2313 EXPORT_SYMBOL(neigh_seq_next);
2315 void neigh_seq_stop(struct seq_file *seq, void *v)
2317 struct neigh_seq_state *state = seq->private;
2318 struct neigh_table *tbl = state->tbl;
2320 read_unlock_bh(&tbl->lock);
2322 EXPORT_SYMBOL(neigh_seq_stop);
2324 /* statistics via seq_file */
2326 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2328 struct proc_dir_entry *pde = seq->private;
2329 struct neigh_table *tbl = pde->data;
2330 int cpu;
2332 if (*pos == 0)
2333 return SEQ_START_TOKEN;
2335 for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
2336 if (!cpu_possible(cpu))
2337 continue;
2338 *pos = cpu+1;
2339 return per_cpu_ptr(tbl->stats, cpu);
2341 return NULL;
2344 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2346 struct proc_dir_entry *pde = seq->private;
2347 struct neigh_table *tbl = pde->data;
2348 int cpu;
2350 for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
2351 if (!cpu_possible(cpu))
2352 continue;
2353 *pos = cpu+1;
2354 return per_cpu_ptr(tbl->stats, cpu);
2356 return NULL;
2359 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2364 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2366 struct proc_dir_entry *pde = seq->private;
2367 struct neigh_table *tbl = pde->data;
2368 struct neigh_statistics *st = v;
2370 if (v == SEQ_START_TOKEN) {
2371 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs\n");
2372 return 0;
2375 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
2376 "%08lx %08lx %08lx %08lx\n",
2377 atomic_read(&tbl->entries),
2379 st->allocs,
2380 st->destroys,
2381 st->hash_grows,
2383 st->lookups,
2384 st->hits,
2386 st->res_failed,
2388 st->rcv_probes_mcast,
2389 st->rcv_probes_ucast,
2391 st->periodic_gc_runs,
2392 st->forced_gc_runs
2395 return 0;
2398 static const struct seq_operations neigh_stat_seq_ops = {
2399 .start = neigh_stat_seq_start,
2400 .next = neigh_stat_seq_next,
2401 .stop = neigh_stat_seq_stop,
2402 .show = neigh_stat_seq_show,
2405 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2407 int ret = seq_open(file, &neigh_stat_seq_ops);
2409 if (!ret) {
2410 struct seq_file *sf = file->private_data;
2411 sf->private = PDE(inode);
2413 return ret;
2416 static const struct file_operations neigh_stat_seq_fops = {
2417 .owner = THIS_MODULE,
2418 .open = neigh_stat_seq_open,
2419 .read = seq_read,
2420 .llseek = seq_lseek,
2421 .release = seq_release,
2424 #endif /* CONFIG_PROC_FS */
2426 static inline size_t neigh_nlmsg_size(void)
2428 return NLMSG_ALIGN(sizeof(struct ndmsg))
2429 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2430 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2431 + nla_total_size(sizeof(struct nda_cacheinfo))
2432 + nla_total_size(4); /* NDA_PROBES */
2435 static void __neigh_notify(struct neighbour *n, int type, int flags)
2437 struct sk_buff *skb;
2438 int err = -ENOBUFS;
2440 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2441 if (skb == NULL)
2442 goto errout;
2444 err = neigh_fill_info(skb, n, 0, 0, type, flags);
2445 if (err < 0) {
2446 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2447 WARN_ON(err == -EMSGSIZE);
2448 kfree_skb(skb);
2449 goto errout;
2451 err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2452 errout:
2453 if (err < 0)
2454 rtnl_set_sk_err(RTNLGRP_NEIGH, err);
2457 #ifdef CONFIG_ARPD
2458 void neigh_app_ns(struct neighbour *n)
2460 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2462 #endif /* CONFIG_ARPD */
2464 #ifdef CONFIG_SYSCTL
2466 static struct neigh_sysctl_table {
2467 struct ctl_table_header *sysctl_header;
2468 ctl_table neigh_vars[__NET_NEIGH_MAX];
2469 ctl_table neigh_dev[2];
2470 ctl_table neigh_neigh_dir[2];
2471 ctl_table neigh_proto_dir[2];
2472 ctl_table neigh_root_dir[2];
2473 } neigh_sysctl_template __read_mostly = {
2474 .neigh_vars = {
2476 .ctl_name = NET_NEIGH_MCAST_SOLICIT,
2477 .procname = "mcast_solicit",
2478 .maxlen = sizeof(int),
2479 .mode = 0644,
2480 .proc_handler = &proc_dointvec,
2483 .ctl_name = NET_NEIGH_UCAST_SOLICIT,
2484 .procname = "ucast_solicit",
2485 .maxlen = sizeof(int),
2486 .mode = 0644,
2487 .proc_handler = &proc_dointvec,
2490 .ctl_name = NET_NEIGH_APP_SOLICIT,
2491 .procname = "app_solicit",
2492 .maxlen = sizeof(int),
2493 .mode = 0644,
2494 .proc_handler = &proc_dointvec,
2497 .ctl_name = NET_NEIGH_RETRANS_TIME,
2498 .procname = "retrans_time",
2499 .maxlen = sizeof(int),
2500 .mode = 0644,
2501 .proc_handler = &proc_dointvec_userhz_jiffies,
2504 .ctl_name = NET_NEIGH_REACHABLE_TIME,
2505 .procname = "base_reachable_time",
2506 .maxlen = sizeof(int),
2507 .mode = 0644,
2508 .proc_handler = &proc_dointvec_jiffies,
2509 .strategy = &sysctl_jiffies,
2512 .ctl_name = NET_NEIGH_DELAY_PROBE_TIME,
2513 .procname = "delay_first_probe_time",
2514 .maxlen = sizeof(int),
2515 .mode = 0644,
2516 .proc_handler = &proc_dointvec_jiffies,
2517 .strategy = &sysctl_jiffies,
2520 .ctl_name = NET_NEIGH_GC_STALE_TIME,
2521 .procname = "gc_stale_time",
2522 .maxlen = sizeof(int),
2523 .mode = 0644,
2524 .proc_handler = &proc_dointvec_jiffies,
2525 .strategy = &sysctl_jiffies,
2528 .ctl_name = NET_NEIGH_UNRES_QLEN,
2529 .procname = "unres_qlen",
2530 .maxlen = sizeof(int),
2531 .mode = 0644,
2532 .proc_handler = &proc_dointvec,
2535 .ctl_name = NET_NEIGH_PROXY_QLEN,
2536 .procname = "proxy_qlen",
2537 .maxlen = sizeof(int),
2538 .mode = 0644,
2539 .proc_handler = &proc_dointvec,
2542 .ctl_name = NET_NEIGH_ANYCAST_DELAY,
2543 .procname = "anycast_delay",
2544 .maxlen = sizeof(int),
2545 .mode = 0644,
2546 .proc_handler = &proc_dointvec_userhz_jiffies,
2549 .ctl_name = NET_NEIGH_PROXY_DELAY,
2550 .procname = "proxy_delay",
2551 .maxlen = sizeof(int),
2552 .mode = 0644,
2553 .proc_handler = &proc_dointvec_userhz_jiffies,
2556 .ctl_name = NET_NEIGH_LOCKTIME,
2557 .procname = "locktime",
2558 .maxlen = sizeof(int),
2559 .mode = 0644,
2560 .proc_handler = &proc_dointvec_userhz_jiffies,
2563 .ctl_name = NET_NEIGH_GC_INTERVAL,
2564 .procname = "gc_interval",
2565 .maxlen = sizeof(int),
2566 .mode = 0644,
2567 .proc_handler = &proc_dointvec_jiffies,
2568 .strategy = &sysctl_jiffies,
2571 .ctl_name = NET_NEIGH_GC_THRESH1,
2572 .procname = "gc_thresh1",
2573 .maxlen = sizeof(int),
2574 .mode = 0644,
2575 .proc_handler = &proc_dointvec,
2578 .ctl_name = NET_NEIGH_GC_THRESH2,
2579 .procname = "gc_thresh2",
2580 .maxlen = sizeof(int),
2581 .mode = 0644,
2582 .proc_handler = &proc_dointvec,
2585 .ctl_name = NET_NEIGH_GC_THRESH3,
2586 .procname = "gc_thresh3",
2587 .maxlen = sizeof(int),
2588 .mode = 0644,
2589 .proc_handler = &proc_dointvec,
2592 .ctl_name = NET_NEIGH_RETRANS_TIME_MS,
2593 .procname = "retrans_time_ms",
2594 .maxlen = sizeof(int),
2595 .mode = 0644,
2596 .proc_handler = &proc_dointvec_ms_jiffies,
2597 .strategy = &sysctl_ms_jiffies,
2600 .ctl_name = NET_NEIGH_REACHABLE_TIME_MS,
2601 .procname = "base_reachable_time_ms",
2602 .maxlen = sizeof(int),
2603 .mode = 0644,
2604 .proc_handler = &proc_dointvec_ms_jiffies,
2605 .strategy = &sysctl_ms_jiffies,
2608 .neigh_dev = {
2610 .ctl_name = NET_PROTO_CONF_DEFAULT,
2611 .procname = "default",
2612 .mode = 0555,
2615 .neigh_neigh_dir = {
2617 .procname = "neigh",
2618 .mode = 0555,
2621 .neigh_proto_dir = {
2623 .mode = 0555,
2626 .neigh_root_dir = {
2628 .ctl_name = CTL_NET,
2629 .procname = "net",
2630 .mode = 0555,
2635 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2636 int p_id, int pdev_id, char *p_name,
2637 proc_handler *handler, ctl_handler *strategy)
2639 struct neigh_sysctl_table *t = kmemdup(&neigh_sysctl_template,
2640 sizeof(*t), GFP_KERNEL);
2641 const char *dev_name_source = NULL;
2642 char *dev_name = NULL;
2643 int err = 0;
2645 if (!t)
2646 return -ENOBUFS;
2647 t->neigh_vars[0].data = &p->mcast_probes;
2648 t->neigh_vars[1].data = &p->ucast_probes;
2649 t->neigh_vars[2].data = &p->app_probes;
2650 t->neigh_vars[3].data = &p->retrans_time;
2651 t->neigh_vars[4].data = &p->base_reachable_time;
2652 t->neigh_vars[5].data = &p->delay_probe_time;
2653 t->neigh_vars[6].data = &p->gc_staletime;
2654 t->neigh_vars[7].data = &p->queue_len;
2655 t->neigh_vars[8].data = &p->proxy_qlen;
2656 t->neigh_vars[9].data = &p->anycast_delay;
2657 t->neigh_vars[10].data = &p->proxy_delay;
2658 t->neigh_vars[11].data = &p->locktime;
2660 if (dev) {
2661 dev_name_source = dev->name;
2662 t->neigh_dev[0].ctl_name = dev->ifindex;
2663 t->neigh_vars[12].procname = NULL;
2664 t->neigh_vars[13].procname = NULL;
2665 t->neigh_vars[14].procname = NULL;
2666 t->neigh_vars[15].procname = NULL;
2667 } else {
2668 dev_name_source = t->neigh_dev[0].procname;
2669 t->neigh_vars[12].data = (int *)(p + 1);
2670 t->neigh_vars[13].data = (int *)(p + 1) + 1;
2671 t->neigh_vars[14].data = (int *)(p + 1) + 2;
2672 t->neigh_vars[15].data = (int *)(p + 1) + 3;
2675 t->neigh_vars[16].data = &p->retrans_time;
2676 t->neigh_vars[17].data = &p->base_reachable_time;
2678 if (handler || strategy) {
2679 /* RetransTime */
2680 t->neigh_vars[3].proc_handler = handler;
2681 t->neigh_vars[3].strategy = strategy;
2682 t->neigh_vars[3].extra1 = dev;
2683 /* ReachableTime */
2684 t->neigh_vars[4].proc_handler = handler;
2685 t->neigh_vars[4].strategy = strategy;
2686 t->neigh_vars[4].extra1 = dev;
2687 /* RetransTime (in milliseconds)*/
2688 t->neigh_vars[16].proc_handler = handler;
2689 t->neigh_vars[16].strategy = strategy;
2690 t->neigh_vars[16].extra1 = dev;
2691 /* ReachableTime (in milliseconds) */
2692 t->neigh_vars[17].proc_handler = handler;
2693 t->neigh_vars[17].strategy = strategy;
2694 t->neigh_vars[17].extra1 = dev;
2697 dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2698 if (!dev_name) {
2699 err = -ENOBUFS;
2700 goto free;
2703 t->neigh_dev[0].procname = dev_name;
2705 t->neigh_neigh_dir[0].ctl_name = pdev_id;
2707 t->neigh_proto_dir[0].procname = p_name;
2708 t->neigh_proto_dir[0].ctl_name = p_id;
2710 t->neigh_dev[0].child = t->neigh_vars;
2711 t->neigh_neigh_dir[0].child = t->neigh_dev;
2712 t->neigh_proto_dir[0].child = t->neigh_neigh_dir;
2713 t->neigh_root_dir[0].child = t->neigh_proto_dir;
2715 t->sysctl_header = register_sysctl_table(t->neigh_root_dir);
2716 if (!t->sysctl_header) {
2717 err = -ENOBUFS;
2718 goto free_procname;
2720 p->sysctl_table = t;
2721 return 0;
2723 /* error path */
2724 free_procname:
2725 kfree(dev_name);
2726 free:
2727 kfree(t);
2729 return err;
2732 void neigh_sysctl_unregister(struct neigh_parms *p)
2734 if (p->sysctl_table) {
2735 struct neigh_sysctl_table *t = p->sysctl_table;
2736 p->sysctl_table = NULL;
2737 unregister_sysctl_table(t->sysctl_header);
2738 kfree(t->neigh_dev[0].procname);
2739 kfree(t);
2743 #endif /* CONFIG_SYSCTL */
2745 static int __init neigh_init(void)
2747 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL);
2748 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL);
2749 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info);
2751 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info);
2752 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL);
2754 return 0;
2757 subsys_initcall(neigh_init);
2759 EXPORT_SYMBOL(__neigh_event_send);
2760 EXPORT_SYMBOL(neigh_changeaddr);
2761 EXPORT_SYMBOL(neigh_compat_output);
2762 EXPORT_SYMBOL(neigh_connected_output);
2763 EXPORT_SYMBOL(neigh_create);
2764 EXPORT_SYMBOL(neigh_destroy);
2765 EXPORT_SYMBOL(neigh_event_ns);
2766 EXPORT_SYMBOL(neigh_ifdown);
2767 EXPORT_SYMBOL(neigh_lookup);
2768 EXPORT_SYMBOL(neigh_lookup_nodev);
2769 EXPORT_SYMBOL(neigh_parms_alloc);
2770 EXPORT_SYMBOL(neigh_parms_release);
2771 EXPORT_SYMBOL(neigh_rand_reach_time);
2772 EXPORT_SYMBOL(neigh_resolve_output);
2773 EXPORT_SYMBOL(neigh_table_clear);
2774 EXPORT_SYMBOL(neigh_table_init);
2775 EXPORT_SYMBOL(neigh_table_init_no_netlink);
2776 EXPORT_SYMBOL(neigh_update);
2777 EXPORT_SYMBOL(pneigh_enqueue);
2778 EXPORT_SYMBOL(pneigh_lookup);
2780 #ifdef CONFIG_ARPD
2781 EXPORT_SYMBOL(neigh_app_ns);
2782 #endif
2783 #ifdef CONFIG_SYSCTL
2784 EXPORT_SYMBOL(neigh_sysctl_register);
2785 EXPORT_SYMBOL(neigh_sysctl_unregister);
2786 #endif