inotify: fix race
[linux-2.6.22.y-op.git] / net / core / neighbour.c
blob9df26a07f0672ccabe610f366026fa4b2123524c
1 /*
2 * Generic address resolution entity
4 * Authors:
5 * Pedro Roque <roque@di.fc.ul.pt>
6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
13 * Fixes:
14 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
15 * Harald Welte Add neighbour cache statistics like rtstat
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/socket.h>
22 #include <linux/netdevice.h>
23 #include <linux/proc_fs.h>
24 #ifdef CONFIG_SYSCTL
25 #include <linux/sysctl.h>
26 #endif
27 #include <linux/times.h>
28 #include <net/neighbour.h>
29 #include <net/dst.h>
30 #include <net/sock.h>
31 #include <net/netevent.h>
32 #include <net/netlink.h>
33 #include <linux/rtnetlink.h>
34 #include <linux/random.h>
35 #include <linux/string.h>
37 #define NEIGH_DEBUG 1
39 #define NEIGH_PRINTK(x...) printk(x)
40 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
41 #define NEIGH_PRINTK0 NEIGH_PRINTK
42 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
43 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
45 #if NEIGH_DEBUG >= 1
46 #undef NEIGH_PRINTK1
47 #define NEIGH_PRINTK1 NEIGH_PRINTK
48 #endif
49 #if NEIGH_DEBUG >= 2
50 #undef NEIGH_PRINTK2
51 #define NEIGH_PRINTK2 NEIGH_PRINTK
52 #endif
54 #define PNEIGH_HASHMASK 0xF
56 static void neigh_timer_handler(unsigned long arg);
57 #ifdef CONFIG_ARPD
58 static void neigh_app_notify(struct neighbour *n);
59 #endif
60 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
61 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
63 static struct neigh_table *neigh_tables;
64 #ifdef CONFIG_PROC_FS
65 static const struct file_operations neigh_stat_seq_fops;
66 #endif
69 Neighbour hash table buckets are protected with rwlock tbl->lock.
71 - All the scans/updates to hash buckets MUST be made under this lock.
72 - NOTHING clever should be made under this lock: no callbacks
73 to protocol backends, no attempts to send something to network.
74 It will result in deadlocks, if backend/driver wants to use neighbour
75 cache.
76 - If the entry requires some non-trivial actions, increase
77 its reference count and release table lock.
79 Neighbour entries are protected:
80 - with reference count.
81 - with rwlock neigh->lock
83 Reference count prevents destruction.
85 neigh->lock mainly serializes ll address data and its validity state.
86 However, the same lock is used to protect another entry fields:
87 - timer
88 - resolution queue
90 Again, nothing clever shall be made under neigh->lock,
91 the most complicated procedure, which we allow is dev->hard_header.
92 It is supposed, that dev->hard_header is simplistic and does
93 not make callbacks to neighbour tables.
95 The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
96 list of neighbour tables. This list is used only in process context,
99 static DEFINE_RWLOCK(neigh_tbl_lock);
101 static int neigh_blackhole(struct sk_buff *skb)
103 kfree_skb(skb);
104 return -ENETDOWN;
108 * It is random distribution in the interval (1/2)*base...(3/2)*base.
109 * It corresponds to default IPv6 settings and is not overridable,
110 * because it is really reasonable choice.
113 unsigned long neigh_rand_reach_time(unsigned long base)
115 return (base ? (net_random() % base) + (base >> 1) : 0);
119 static int neigh_forced_gc(struct neigh_table *tbl)
121 int shrunk = 0;
122 int i;
124 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
126 write_lock_bh(&tbl->lock);
127 for (i = 0; i <= tbl->hash_mask; i++) {
128 struct neighbour *n, **np;
130 np = &tbl->hash_buckets[i];
131 while ((n = *np) != NULL) {
132 /* Neighbour record may be discarded if:
133 * - nobody refers to it.
134 * - it is not permanent
136 write_lock(&n->lock);
137 if (atomic_read(&n->refcnt) == 1 &&
138 !(n->nud_state & NUD_PERMANENT)) {
139 *np = n->next;
140 n->dead = 1;
141 shrunk = 1;
142 write_unlock(&n->lock);
143 if (n->parms->neigh_cleanup)
144 n->parms->neigh_cleanup(n);
145 neigh_release(n);
146 continue;
148 write_unlock(&n->lock);
149 np = &n->next;
153 tbl->last_flush = jiffies;
155 write_unlock_bh(&tbl->lock);
157 return shrunk;
160 static int neigh_del_timer(struct neighbour *n)
162 if ((n->nud_state & NUD_IN_TIMER) &&
163 del_timer(&n->timer)) {
164 neigh_release(n);
165 return 1;
167 return 0;
170 static void pneigh_queue_purge(struct sk_buff_head *list)
172 struct sk_buff *skb;
174 while ((skb = skb_dequeue(list)) != NULL) {
175 dev_put(skb->dev);
176 kfree_skb(skb);
180 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
182 int i;
184 for (i = 0; i <= tbl->hash_mask; i++) {
185 struct neighbour *n, **np = &tbl->hash_buckets[i];
187 while ((n = *np) != NULL) {
188 if (dev && n->dev != dev) {
189 np = &n->next;
190 continue;
192 *np = n->next;
193 write_lock(&n->lock);
194 neigh_del_timer(n);
195 n->dead = 1;
197 if (atomic_read(&n->refcnt) != 1) {
198 /* The most unpleasant situation.
199 We must destroy neighbour entry,
200 but someone still uses it.
202 The destroy will be delayed until
203 the last user releases us, but
204 we must kill timers etc. and move
205 it to safe state.
207 skb_queue_purge(&n->arp_queue);
208 n->output = neigh_blackhole;
209 if (n->nud_state & NUD_VALID)
210 n->nud_state = NUD_NOARP;
211 else
212 n->nud_state = NUD_NONE;
213 NEIGH_PRINTK2("neigh %p is stray.\n", n);
215 write_unlock(&n->lock);
216 if (n->parms->neigh_cleanup)
217 n->parms->neigh_cleanup(n);
218 neigh_release(n);
223 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
225 write_lock_bh(&tbl->lock);
226 neigh_flush_dev(tbl, dev);
227 write_unlock_bh(&tbl->lock);
230 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
232 write_lock_bh(&tbl->lock);
233 neigh_flush_dev(tbl, dev);
234 pneigh_ifdown(tbl, dev);
235 write_unlock_bh(&tbl->lock);
237 del_timer_sync(&tbl->proxy_timer);
238 pneigh_queue_purge(&tbl->proxy_queue);
239 return 0;
242 static struct neighbour *neigh_alloc(struct neigh_table *tbl)
244 struct neighbour *n = NULL;
245 unsigned long now = jiffies;
246 int entries;
248 entries = atomic_inc_return(&tbl->entries) - 1;
249 if (entries >= tbl->gc_thresh3 ||
250 (entries >= tbl->gc_thresh2 &&
251 time_after(now, tbl->last_flush + 5 * HZ))) {
252 if (!neigh_forced_gc(tbl) &&
253 entries >= tbl->gc_thresh3)
254 goto out_entries;
257 n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC);
258 if (!n)
259 goto out_entries;
261 skb_queue_head_init(&n->arp_queue);
262 rwlock_init(&n->lock);
263 n->updated = n->used = now;
264 n->nud_state = NUD_NONE;
265 n->output = neigh_blackhole;
266 n->parms = neigh_parms_clone(&tbl->parms);
267 init_timer(&n->timer);
268 n->timer.function = neigh_timer_handler;
269 n->timer.data = (unsigned long)n;
271 NEIGH_CACHE_STAT_INC(tbl, allocs);
272 n->tbl = tbl;
273 atomic_set(&n->refcnt, 1);
274 n->dead = 1;
275 out:
276 return n;
278 out_entries:
279 atomic_dec(&tbl->entries);
280 goto out;
283 static struct neighbour **neigh_hash_alloc(unsigned int entries)
285 unsigned long size = entries * sizeof(struct neighbour *);
286 struct neighbour **ret;
288 if (size <= PAGE_SIZE) {
289 ret = kzalloc(size, GFP_ATOMIC);
290 } else {
291 ret = (struct neighbour **)
292 __get_free_pages(GFP_ATOMIC|__GFP_ZERO, get_order(size));
294 return ret;
297 static void neigh_hash_free(struct neighbour **hash, unsigned int entries)
299 unsigned long size = entries * sizeof(struct neighbour *);
301 if (size <= PAGE_SIZE)
302 kfree(hash);
303 else
304 free_pages((unsigned long)hash, get_order(size));
307 static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries)
309 struct neighbour **new_hash, **old_hash;
310 unsigned int i, new_hash_mask, old_entries;
312 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
314 BUG_ON(new_entries & (new_entries - 1));
315 new_hash = neigh_hash_alloc(new_entries);
316 if (!new_hash)
317 return;
319 old_entries = tbl->hash_mask + 1;
320 new_hash_mask = new_entries - 1;
321 old_hash = tbl->hash_buckets;
323 get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
324 for (i = 0; i < old_entries; i++) {
325 struct neighbour *n, *next;
327 for (n = old_hash[i]; n; n = next) {
328 unsigned int hash_val = tbl->hash(n->primary_key, n->dev);
330 hash_val &= new_hash_mask;
331 next = n->next;
333 n->next = new_hash[hash_val];
334 new_hash[hash_val] = n;
337 tbl->hash_buckets = new_hash;
338 tbl->hash_mask = new_hash_mask;
340 neigh_hash_free(old_hash, old_entries);
343 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
344 struct net_device *dev)
346 struct neighbour *n;
347 int key_len = tbl->key_len;
348 u32 hash_val = tbl->hash(pkey, dev);
350 NEIGH_CACHE_STAT_INC(tbl, lookups);
352 read_lock_bh(&tbl->lock);
353 for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
354 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
355 neigh_hold(n);
356 NEIGH_CACHE_STAT_INC(tbl, hits);
357 break;
360 read_unlock_bh(&tbl->lock);
361 return n;
364 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey)
366 struct neighbour *n;
367 int key_len = tbl->key_len;
368 u32 hash_val = tbl->hash(pkey, NULL);
370 NEIGH_CACHE_STAT_INC(tbl, lookups);
372 read_lock_bh(&tbl->lock);
373 for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
374 if (!memcmp(n->primary_key, pkey, key_len)) {
375 neigh_hold(n);
376 NEIGH_CACHE_STAT_INC(tbl, hits);
377 break;
380 read_unlock_bh(&tbl->lock);
381 return n;
384 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
385 struct net_device *dev)
387 u32 hash_val;
388 int key_len = tbl->key_len;
389 int error;
390 struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
392 if (!n) {
393 rc = ERR_PTR(-ENOBUFS);
394 goto out;
397 memcpy(n->primary_key, pkey, key_len);
398 n->dev = dev;
399 dev_hold(dev);
401 /* Protocol specific setup. */
402 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
403 rc = ERR_PTR(error);
404 goto out_neigh_release;
407 /* Device specific setup. */
408 if (n->parms->neigh_setup &&
409 (error = n->parms->neigh_setup(n)) < 0) {
410 rc = ERR_PTR(error);
411 goto out_neigh_release;
414 n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
416 write_lock_bh(&tbl->lock);
418 if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1))
419 neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1);
421 hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;
423 if (n->parms->dead) {
424 rc = ERR_PTR(-EINVAL);
425 goto out_tbl_unlock;
428 for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {
429 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
430 neigh_hold(n1);
431 rc = n1;
432 goto out_tbl_unlock;
436 n->next = tbl->hash_buckets[hash_val];
437 tbl->hash_buckets[hash_val] = n;
438 n->dead = 0;
439 neigh_hold(n);
440 write_unlock_bh(&tbl->lock);
441 NEIGH_PRINTK2("neigh %p is created.\n", n);
442 rc = n;
443 out:
444 return rc;
445 out_tbl_unlock:
446 write_unlock_bh(&tbl->lock);
447 out_neigh_release:
448 neigh_release(n);
449 goto out;
452 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
453 struct net_device *dev, int creat)
455 struct pneigh_entry *n;
456 int key_len = tbl->key_len;
457 u32 hash_val = *(u32 *)(pkey + key_len - 4);
459 hash_val ^= (hash_val >> 16);
460 hash_val ^= hash_val >> 8;
461 hash_val ^= hash_val >> 4;
462 hash_val &= PNEIGH_HASHMASK;
464 read_lock_bh(&tbl->lock);
466 for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
467 if (!memcmp(n->key, pkey, key_len) &&
468 (n->dev == dev || !n->dev)) {
469 read_unlock_bh(&tbl->lock);
470 goto out;
473 read_unlock_bh(&tbl->lock);
474 n = NULL;
475 if (!creat)
476 goto out;
478 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
479 if (!n)
480 goto out;
482 memcpy(n->key, pkey, key_len);
483 n->dev = dev;
484 if (dev)
485 dev_hold(dev);
487 if (tbl->pconstructor && tbl->pconstructor(n)) {
488 if (dev)
489 dev_put(dev);
490 kfree(n);
491 n = NULL;
492 goto out;
495 write_lock_bh(&tbl->lock);
496 n->next = tbl->phash_buckets[hash_val];
497 tbl->phash_buckets[hash_val] = n;
498 write_unlock_bh(&tbl->lock);
499 out:
500 return n;
504 int pneigh_delete(struct neigh_table *tbl, const void *pkey,
505 struct net_device *dev)
507 struct pneigh_entry *n, **np;
508 int key_len = tbl->key_len;
509 u32 hash_val = *(u32 *)(pkey + key_len - 4);
511 hash_val ^= (hash_val >> 16);
512 hash_val ^= hash_val >> 8;
513 hash_val ^= hash_val >> 4;
514 hash_val &= PNEIGH_HASHMASK;
516 write_lock_bh(&tbl->lock);
517 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
518 np = &n->next) {
519 if (!memcmp(n->key, pkey, key_len) && n->dev == dev) {
520 *np = n->next;
521 write_unlock_bh(&tbl->lock);
522 if (tbl->pdestructor)
523 tbl->pdestructor(n);
524 if (n->dev)
525 dev_put(n->dev);
526 kfree(n);
527 return 0;
530 write_unlock_bh(&tbl->lock);
531 return -ENOENT;
534 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
536 struct pneigh_entry *n, **np;
537 u32 h;
539 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
540 np = &tbl->phash_buckets[h];
541 while ((n = *np) != NULL) {
542 if (!dev || n->dev == dev) {
543 *np = n->next;
544 if (tbl->pdestructor)
545 tbl->pdestructor(n);
546 if (n->dev)
547 dev_put(n->dev);
548 kfree(n);
549 continue;
551 np = &n->next;
554 return -ENOENT;
559 * neighbour must already be out of the table;
562 void neigh_destroy(struct neighbour *neigh)
564 struct hh_cache *hh;
566 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
568 if (!neigh->dead) {
569 printk(KERN_WARNING
570 "Destroying alive neighbour %p\n", neigh);
571 dump_stack();
572 return;
575 if (neigh_del_timer(neigh))
576 printk(KERN_WARNING "Impossible event.\n");
578 while ((hh = neigh->hh) != NULL) {
579 neigh->hh = hh->hh_next;
580 hh->hh_next = NULL;
582 write_seqlock_bh(&hh->hh_lock);
583 hh->hh_output = neigh_blackhole;
584 write_sequnlock_bh(&hh->hh_lock);
585 if (atomic_dec_and_test(&hh->hh_refcnt))
586 kfree(hh);
589 skb_queue_purge(&neigh->arp_queue);
591 dev_put(neigh->dev);
592 neigh_parms_put(neigh->parms);
594 NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
596 atomic_dec(&neigh->tbl->entries);
597 kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
600 /* Neighbour state is suspicious;
601 disable fast path.
603 Called with write_locked neigh.
605 static void neigh_suspect(struct neighbour *neigh)
607 struct hh_cache *hh;
609 NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
611 neigh->output = neigh->ops->output;
613 for (hh = neigh->hh; hh; hh = hh->hh_next)
614 hh->hh_output = neigh->ops->output;
617 /* Neighbour state is OK;
618 enable fast path.
620 Called with write_locked neigh.
622 static void neigh_connect(struct neighbour *neigh)
624 struct hh_cache *hh;
626 NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
628 neigh->output = neigh->ops->connected_output;
630 for (hh = neigh->hh; hh; hh = hh->hh_next)
631 hh->hh_output = neigh->ops->hh_output;
634 static void neigh_periodic_timer(unsigned long arg)
636 struct neigh_table *tbl = (struct neigh_table *)arg;
637 struct neighbour *n, **np;
638 unsigned long expire, now = jiffies;
640 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
642 write_lock(&tbl->lock);
645 * periodically recompute ReachableTime from random function
648 if (time_after(now, tbl->last_rand + 300 * HZ)) {
649 struct neigh_parms *p;
650 tbl->last_rand = now;
651 for (p = &tbl->parms; p; p = p->next)
652 p->reachable_time =
653 neigh_rand_reach_time(p->base_reachable_time);
656 np = &tbl->hash_buckets[tbl->hash_chain_gc];
657 tbl->hash_chain_gc = ((tbl->hash_chain_gc + 1) & tbl->hash_mask);
659 while ((n = *np) != NULL) {
660 unsigned int state;
662 write_lock(&n->lock);
664 state = n->nud_state;
665 if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
666 write_unlock(&n->lock);
667 goto next_elt;
670 if (time_before(n->used, n->confirmed))
671 n->used = n->confirmed;
673 if (atomic_read(&n->refcnt) == 1 &&
674 (state == NUD_FAILED ||
675 time_after(now, n->used + n->parms->gc_staletime))) {
676 *np = n->next;
677 n->dead = 1;
678 write_unlock(&n->lock);
679 if (n->parms->neigh_cleanup)
680 n->parms->neigh_cleanup(n);
681 neigh_release(n);
682 continue;
684 write_unlock(&n->lock);
686 next_elt:
687 np = &n->next;
690 /* Cycle through all hash buckets every base_reachable_time/2 ticks.
691 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
692 * base_reachable_time.
694 expire = tbl->parms.base_reachable_time >> 1;
695 expire /= (tbl->hash_mask + 1);
696 if (!expire)
697 expire = 1;
699 if (expire>HZ)
700 mod_timer(&tbl->gc_timer, round_jiffies(now + expire));
701 else
702 mod_timer(&tbl->gc_timer, now + expire);
704 write_unlock(&tbl->lock);
707 static __inline__ int neigh_max_probes(struct neighbour *n)
709 struct neigh_parms *p = n->parms;
710 return (n->nud_state & NUD_PROBE ?
711 p->ucast_probes :
712 p->ucast_probes + p->app_probes + p->mcast_probes);
715 static inline void neigh_add_timer(struct neighbour *n, unsigned long when)
717 if (unlikely(mod_timer(&n->timer, when))) {
718 printk("NEIGH: BUG, double timer add, state is %x\n",
719 n->nud_state);
720 dump_stack();
724 /* Called when a timer expires for a neighbour entry. */
726 static void neigh_timer_handler(unsigned long arg)
728 unsigned long now, next;
729 struct neighbour *neigh = (struct neighbour *)arg;
730 unsigned state;
731 int notify = 0;
733 write_lock(&neigh->lock);
735 state = neigh->nud_state;
736 now = jiffies;
737 next = now + HZ;
739 if (!(state & NUD_IN_TIMER)) {
740 #ifndef CONFIG_SMP
741 printk(KERN_WARNING "neigh: timer & !nud_in_timer\n");
742 #endif
743 goto out;
746 if (state & NUD_REACHABLE) {
747 if (time_before_eq(now,
748 neigh->confirmed + neigh->parms->reachable_time)) {
749 NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
750 next = neigh->confirmed + neigh->parms->reachable_time;
751 } else if (time_before_eq(now,
752 neigh->used + neigh->parms->delay_probe_time)) {
753 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
754 neigh->nud_state = NUD_DELAY;
755 neigh->updated = jiffies;
756 neigh_suspect(neigh);
757 next = now + neigh->parms->delay_probe_time;
758 } else {
759 NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
760 neigh->nud_state = NUD_STALE;
761 neigh->updated = jiffies;
762 neigh_suspect(neigh);
763 notify = 1;
765 } else if (state & NUD_DELAY) {
766 if (time_before_eq(now,
767 neigh->confirmed + neigh->parms->delay_probe_time)) {
768 NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
769 neigh->nud_state = NUD_REACHABLE;
770 neigh->updated = jiffies;
771 neigh_connect(neigh);
772 notify = 1;
773 next = neigh->confirmed + neigh->parms->reachable_time;
774 } else {
775 NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
776 neigh->nud_state = NUD_PROBE;
777 neigh->updated = jiffies;
778 atomic_set(&neigh->probes, 0);
779 next = now + neigh->parms->retrans_time;
781 } else {
782 /* NUD_PROBE|NUD_INCOMPLETE */
783 next = now + neigh->parms->retrans_time;
786 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
787 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
788 struct sk_buff *skb;
790 neigh->nud_state = NUD_FAILED;
791 neigh->updated = jiffies;
792 notify = 1;
793 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
794 NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
796 /* It is very thin place. report_unreachable is very complicated
797 routine. Particularly, it can hit the same neighbour entry!
799 So that, we try to be accurate and avoid dead loop. --ANK
801 while (neigh->nud_state == NUD_FAILED &&
802 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
803 write_unlock(&neigh->lock);
804 neigh->ops->error_report(neigh, skb);
805 write_lock(&neigh->lock);
807 skb_queue_purge(&neigh->arp_queue);
810 if (neigh->nud_state & NUD_IN_TIMER) {
811 if (time_before(next, jiffies + HZ/2))
812 next = jiffies + HZ/2;
813 if (!mod_timer(&neigh->timer, next))
814 neigh_hold(neigh);
816 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
817 struct sk_buff *skb = skb_peek(&neigh->arp_queue);
818 /* keep skb alive even if arp_queue overflows */
819 if (skb)
820 skb_get(skb);
821 write_unlock(&neigh->lock);
822 neigh->ops->solicit(neigh, skb);
823 atomic_inc(&neigh->probes);
824 if (skb)
825 kfree_skb(skb);
826 } else {
827 out:
828 write_unlock(&neigh->lock);
830 if (notify)
831 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
833 #ifdef CONFIG_ARPD
834 if (notify && neigh->parms->app_probes)
835 neigh_app_notify(neigh);
836 #endif
837 neigh_release(neigh);
840 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
842 int rc;
843 unsigned long now;
845 write_lock_bh(&neigh->lock);
847 rc = 0;
848 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
849 goto out_unlock_bh;
851 now = jiffies;
853 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
854 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
855 atomic_set(&neigh->probes, neigh->parms->ucast_probes);
856 neigh->nud_state = NUD_INCOMPLETE;
857 neigh->updated = jiffies;
858 neigh_hold(neigh);
859 neigh_add_timer(neigh, now + 1);
860 } else {
861 neigh->nud_state = NUD_FAILED;
862 neigh->updated = jiffies;
863 write_unlock_bh(&neigh->lock);
865 if (skb)
866 kfree_skb(skb);
867 return 1;
869 } else if (neigh->nud_state & NUD_STALE) {
870 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
871 neigh_hold(neigh);
872 neigh->nud_state = NUD_DELAY;
873 neigh->updated = jiffies;
874 neigh_add_timer(neigh,
875 jiffies + neigh->parms->delay_probe_time);
878 if (neigh->nud_state == NUD_INCOMPLETE) {
879 if (skb) {
880 if (skb_queue_len(&neigh->arp_queue) >=
881 neigh->parms->queue_len) {
882 struct sk_buff *buff;
883 buff = neigh->arp_queue.next;
884 __skb_unlink(buff, &neigh->arp_queue);
885 kfree_skb(buff);
887 __skb_queue_tail(&neigh->arp_queue, skb);
889 rc = 1;
891 out_unlock_bh:
892 write_unlock_bh(&neigh->lock);
893 return rc;
896 static void neigh_update_hhs(struct neighbour *neigh)
898 struct hh_cache *hh;
899 void (*update)(struct hh_cache*, struct net_device*, unsigned char *) =
900 neigh->dev->header_cache_update;
902 if (update) {
903 for (hh = neigh->hh; hh; hh = hh->hh_next) {
904 write_seqlock_bh(&hh->hh_lock);
905 update(hh, neigh->dev, neigh->ha);
906 write_sequnlock_bh(&hh->hh_lock);
913 /* Generic update routine.
914 -- lladdr is new lladdr or NULL, if it is not supplied.
915 -- new is new state.
916 -- flags
917 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
918 if it is different.
919 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
920 lladdr instead of overriding it
921 if it is different.
922 It also allows to retain current state
923 if lladdr is unchanged.
924 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
926 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
927 NTF_ROUTER flag.
928 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
929 a router.
931 Caller MUST hold reference count on the entry.
934 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
935 u32 flags)
937 u8 old;
938 int err;
939 int notify = 0;
940 struct net_device *dev;
941 int update_isrouter = 0;
943 write_lock_bh(&neigh->lock);
945 dev = neigh->dev;
946 old = neigh->nud_state;
947 err = -EPERM;
949 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
950 (old & (NUD_NOARP | NUD_PERMANENT)))
951 goto out;
953 if (!(new & NUD_VALID)) {
954 neigh_del_timer(neigh);
955 if (old & NUD_CONNECTED)
956 neigh_suspect(neigh);
957 neigh->nud_state = new;
958 err = 0;
959 notify = old & NUD_VALID;
960 goto out;
963 /* Compare new lladdr with cached one */
964 if (!dev->addr_len) {
965 /* First case: device needs no address. */
966 lladdr = neigh->ha;
967 } else if (lladdr) {
968 /* The second case: if something is already cached
969 and a new address is proposed:
970 - compare new & old
971 - if they are different, check override flag
973 if ((old & NUD_VALID) &&
974 !memcmp(lladdr, neigh->ha, dev->addr_len))
975 lladdr = neigh->ha;
976 } else {
977 /* No address is supplied; if we know something,
978 use it, otherwise discard the request.
980 err = -EINVAL;
981 if (!(old & NUD_VALID))
982 goto out;
983 lladdr = neigh->ha;
986 if (new & NUD_CONNECTED)
987 neigh->confirmed = jiffies;
988 neigh->updated = jiffies;
990 /* If entry was valid and address is not changed,
991 do not change entry state, if new one is STALE.
993 err = 0;
994 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
995 if (old & NUD_VALID) {
996 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
997 update_isrouter = 0;
998 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
999 (old & NUD_CONNECTED)) {
1000 lladdr = neigh->ha;
1001 new = NUD_STALE;
1002 } else
1003 goto out;
1004 } else {
1005 if (lladdr == neigh->ha && new == NUD_STALE &&
1006 ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1007 (old & NUD_CONNECTED))
1009 new = old;
1013 if (new != old) {
1014 neigh_del_timer(neigh);
1015 if (new & NUD_IN_TIMER) {
1016 neigh_hold(neigh);
1017 neigh_add_timer(neigh, (jiffies +
1018 ((new & NUD_REACHABLE) ?
1019 neigh->parms->reachable_time :
1020 0)));
1022 neigh->nud_state = new;
1025 if (lladdr != neigh->ha) {
1026 memcpy(&neigh->ha, lladdr, dev->addr_len);
1027 neigh_update_hhs(neigh);
1028 if (!(new & NUD_CONNECTED))
1029 neigh->confirmed = jiffies -
1030 (neigh->parms->base_reachable_time << 1);
1031 notify = 1;
1033 if (new == old)
1034 goto out;
1035 if (new & NUD_CONNECTED)
1036 neigh_connect(neigh);
1037 else
1038 neigh_suspect(neigh);
1039 if (!(old & NUD_VALID)) {
1040 struct sk_buff *skb;
1042 /* Again: avoid dead loop if something went wrong */
1044 while (neigh->nud_state & NUD_VALID &&
1045 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1046 struct neighbour *n1 = neigh;
1047 write_unlock_bh(&neigh->lock);
1048 /* On shaper/eql skb->dst->neighbour != neigh :( */
1049 if (skb->dst && skb->dst->neighbour)
1050 n1 = skb->dst->neighbour;
1051 n1->output(skb);
1052 write_lock_bh(&neigh->lock);
1054 skb_queue_purge(&neigh->arp_queue);
1056 out:
1057 if (update_isrouter) {
1058 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1059 (neigh->flags | NTF_ROUTER) :
1060 (neigh->flags & ~NTF_ROUTER);
1062 write_unlock_bh(&neigh->lock);
1064 if (notify)
1065 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
1066 #ifdef CONFIG_ARPD
1067 if (notify && neigh->parms->app_probes)
1068 neigh_app_notify(neigh);
1069 #endif
1070 return err;
1073 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1074 u8 *lladdr, void *saddr,
1075 struct net_device *dev)
1077 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1078 lladdr || !dev->addr_len);
1079 if (neigh)
1080 neigh_update(neigh, lladdr, NUD_STALE,
1081 NEIGH_UPDATE_F_OVERRIDE);
1082 return neigh;
1085 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
1086 __be16 protocol)
1088 struct hh_cache *hh;
1089 struct net_device *dev = dst->dev;
1091 for (hh = n->hh; hh; hh = hh->hh_next)
1092 if (hh->hh_type == protocol)
1093 break;
1095 if (!hh && (hh = kzalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
1096 seqlock_init(&hh->hh_lock);
1097 hh->hh_type = protocol;
1098 atomic_set(&hh->hh_refcnt, 0);
1099 hh->hh_next = NULL;
1100 if (dev->hard_header_cache(n, hh)) {
1101 kfree(hh);
1102 hh = NULL;
1103 } else {
1104 atomic_inc(&hh->hh_refcnt);
1105 hh->hh_next = n->hh;
1106 n->hh = hh;
1107 if (n->nud_state & NUD_CONNECTED)
1108 hh->hh_output = n->ops->hh_output;
1109 else
1110 hh->hh_output = n->ops->output;
1113 if (hh) {
1114 atomic_inc(&hh->hh_refcnt);
1115 dst->hh = hh;
1119 /* This function can be used in contexts, where only old dev_queue_xmit
1120 worked, f.e. if you want to override normal output path (eql, shaper),
1121 but resolution is not made yet.
1124 int neigh_compat_output(struct sk_buff *skb)
1126 struct net_device *dev = skb->dev;
1128 __skb_pull(skb, skb_network_offset(skb));
1130 if (dev->hard_header &&
1131 dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1132 skb->len) < 0 &&
1133 dev->rebuild_header(skb))
1134 return 0;
1136 return dev_queue_xmit(skb);
1139 /* Slow and careful. */
1141 int neigh_resolve_output(struct sk_buff *skb)
1143 struct dst_entry *dst = skb->dst;
1144 struct neighbour *neigh;
1145 int rc = 0;
1147 if (!dst || !(neigh = dst->neighbour))
1148 goto discard;
1150 __skb_pull(skb, skb_network_offset(skb));
1152 if (!neigh_event_send(neigh, skb)) {
1153 int err;
1154 struct net_device *dev = neigh->dev;
1155 if (dev->hard_header_cache && !dst->hh) {
1156 write_lock_bh(&neigh->lock);
1157 if (!dst->hh)
1158 neigh_hh_init(neigh, dst, dst->ops->protocol);
1159 err = dev->hard_header(skb, dev, ntohs(skb->protocol),
1160 neigh->ha, NULL, skb->len);
1161 write_unlock_bh(&neigh->lock);
1162 } else {
1163 read_lock_bh(&neigh->lock);
1164 err = dev->hard_header(skb, dev, ntohs(skb->protocol),
1165 neigh->ha, NULL, skb->len);
1166 read_unlock_bh(&neigh->lock);
1168 if (err >= 0)
1169 rc = neigh->ops->queue_xmit(skb);
1170 else
1171 goto out_kfree_skb;
1173 out:
1174 return rc;
1175 discard:
1176 NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1177 dst, dst ? dst->neighbour : NULL);
1178 out_kfree_skb:
1179 rc = -EINVAL;
1180 kfree_skb(skb);
1181 goto out;
1184 /* As fast as possible without hh cache */
1186 int neigh_connected_output(struct sk_buff *skb)
1188 int err;
1189 struct dst_entry *dst = skb->dst;
1190 struct neighbour *neigh = dst->neighbour;
1191 struct net_device *dev = neigh->dev;
1193 __skb_pull(skb, skb_network_offset(skb));
1195 read_lock_bh(&neigh->lock);
1196 err = dev->hard_header(skb, dev, ntohs(skb->protocol),
1197 neigh->ha, NULL, skb->len);
1198 read_unlock_bh(&neigh->lock);
1199 if (err >= 0)
1200 err = neigh->ops->queue_xmit(skb);
1201 else {
1202 err = -EINVAL;
1203 kfree_skb(skb);
1205 return err;
1208 static void neigh_proxy_process(unsigned long arg)
1210 struct neigh_table *tbl = (struct neigh_table *)arg;
1211 long sched_next = 0;
1212 unsigned long now = jiffies;
1213 struct sk_buff *skb;
1215 spin_lock(&tbl->proxy_queue.lock);
1217 skb = tbl->proxy_queue.next;
1219 while (skb != (struct sk_buff *)&tbl->proxy_queue) {
1220 struct sk_buff *back = skb;
1221 long tdif = NEIGH_CB(back)->sched_next - now;
1223 skb = skb->next;
1224 if (tdif <= 0) {
1225 struct net_device *dev = back->dev;
1226 __skb_unlink(back, &tbl->proxy_queue);
1227 if (tbl->proxy_redo && netif_running(dev))
1228 tbl->proxy_redo(back);
1229 else
1230 kfree_skb(back);
1232 dev_put(dev);
1233 } else if (!sched_next || tdif < sched_next)
1234 sched_next = tdif;
1236 del_timer(&tbl->proxy_timer);
1237 if (sched_next)
1238 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1239 spin_unlock(&tbl->proxy_queue.lock);
1242 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1243 struct sk_buff *skb)
1245 unsigned long now = jiffies;
1246 unsigned long sched_next = now + (net_random() % p->proxy_delay);
1248 if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1249 kfree_skb(skb);
1250 return;
1253 NEIGH_CB(skb)->sched_next = sched_next;
1254 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1256 spin_lock(&tbl->proxy_queue.lock);
1257 if (del_timer(&tbl->proxy_timer)) {
1258 if (time_before(tbl->proxy_timer.expires, sched_next))
1259 sched_next = tbl->proxy_timer.expires;
1261 dst_release(skb->dst);
1262 skb->dst = NULL;
1263 dev_hold(skb->dev);
1264 __skb_queue_tail(&tbl->proxy_queue, skb);
1265 mod_timer(&tbl->proxy_timer, sched_next);
1266 spin_unlock(&tbl->proxy_queue.lock);
1270 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1271 struct neigh_table *tbl)
1273 struct neigh_parms *p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1275 if (p) {
1276 p->tbl = tbl;
1277 atomic_set(&p->refcnt, 1);
1278 INIT_RCU_HEAD(&p->rcu_head);
1279 p->reachable_time =
1280 neigh_rand_reach_time(p->base_reachable_time);
1281 if (dev) {
1282 if (dev->neigh_setup && dev->neigh_setup(dev, p)) {
1283 kfree(p);
1284 return NULL;
1287 dev_hold(dev);
1288 p->dev = dev;
1290 p->sysctl_table = NULL;
1291 write_lock_bh(&tbl->lock);
1292 p->next = tbl->parms.next;
1293 tbl->parms.next = p;
1294 write_unlock_bh(&tbl->lock);
1296 return p;
1299 static void neigh_rcu_free_parms(struct rcu_head *head)
1301 struct neigh_parms *parms =
1302 container_of(head, struct neigh_parms, rcu_head);
1304 neigh_parms_put(parms);
1307 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1309 struct neigh_parms **p;
1311 if (!parms || parms == &tbl->parms)
1312 return;
1313 write_lock_bh(&tbl->lock);
1314 for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1315 if (*p == parms) {
1316 *p = parms->next;
1317 parms->dead = 1;
1318 write_unlock_bh(&tbl->lock);
1319 if (parms->dev)
1320 dev_put(parms->dev);
1321 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1322 return;
1325 write_unlock_bh(&tbl->lock);
1326 NEIGH_PRINTK1("neigh_parms_release: not found\n");
1329 void neigh_parms_destroy(struct neigh_parms *parms)
1331 kfree(parms);
1334 static struct lock_class_key neigh_table_proxy_queue_class;
1336 void neigh_table_init_no_netlink(struct neigh_table *tbl)
1338 unsigned long now = jiffies;
1339 unsigned long phsize;
1341 atomic_set(&tbl->parms.refcnt, 1);
1342 INIT_RCU_HEAD(&tbl->parms.rcu_head);
1343 tbl->parms.reachable_time =
1344 neigh_rand_reach_time(tbl->parms.base_reachable_time);
1346 if (!tbl->kmem_cachep)
1347 tbl->kmem_cachep =
1348 kmem_cache_create(tbl->id, tbl->entry_size, 0,
1349 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1350 NULL, NULL);
1351 tbl->stats = alloc_percpu(struct neigh_statistics);
1352 if (!tbl->stats)
1353 panic("cannot create neighbour cache statistics");
1355 #ifdef CONFIG_PROC_FS
1356 tbl->pde = create_proc_entry(tbl->id, 0, proc_net_stat);
1357 if (!tbl->pde)
1358 panic("cannot create neighbour proc dir entry");
1359 tbl->pde->proc_fops = &neigh_stat_seq_fops;
1360 tbl->pde->data = tbl;
1361 #endif
1363 tbl->hash_mask = 1;
1364 tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1);
1366 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1367 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1369 if (!tbl->hash_buckets || !tbl->phash_buckets)
1370 panic("cannot allocate neighbour cache hashes");
1372 get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
1374 rwlock_init(&tbl->lock);
1375 init_timer(&tbl->gc_timer);
1376 tbl->gc_timer.data = (unsigned long)tbl;
1377 tbl->gc_timer.function = neigh_periodic_timer;
1378 tbl->gc_timer.expires = now + 1;
1379 add_timer(&tbl->gc_timer);
1381 init_timer(&tbl->proxy_timer);
1382 tbl->proxy_timer.data = (unsigned long)tbl;
1383 tbl->proxy_timer.function = neigh_proxy_process;
1384 skb_queue_head_init_class(&tbl->proxy_queue,
1385 &neigh_table_proxy_queue_class);
1387 tbl->last_flush = now;
1388 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1391 void neigh_table_init(struct neigh_table *tbl)
1393 struct neigh_table *tmp;
1395 neigh_table_init_no_netlink(tbl);
1396 write_lock(&neigh_tbl_lock);
1397 for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1398 if (tmp->family == tbl->family)
1399 break;
1401 tbl->next = neigh_tables;
1402 neigh_tables = tbl;
1403 write_unlock(&neigh_tbl_lock);
1405 if (unlikely(tmp)) {
1406 printk(KERN_ERR "NEIGH: Registering multiple tables for "
1407 "family %d\n", tbl->family);
1408 dump_stack();
1412 int neigh_table_clear(struct neigh_table *tbl)
1414 struct neigh_table **tp;
1416 /* It is not clean... Fix it to unload IPv6 module safely */
1417 del_timer_sync(&tbl->gc_timer);
1418 del_timer_sync(&tbl->proxy_timer);
1419 pneigh_queue_purge(&tbl->proxy_queue);
1420 neigh_ifdown(tbl, NULL);
1421 if (atomic_read(&tbl->entries))
1422 printk(KERN_CRIT "neighbour leakage\n");
1423 write_lock(&neigh_tbl_lock);
1424 for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1425 if (*tp == tbl) {
1426 *tp = tbl->next;
1427 break;
1430 write_unlock(&neigh_tbl_lock);
1432 neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1);
1433 tbl->hash_buckets = NULL;
1435 kfree(tbl->phash_buckets);
1436 tbl->phash_buckets = NULL;
1438 free_percpu(tbl->stats);
1439 tbl->stats = NULL;
1441 return 0;
1444 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1446 struct ndmsg *ndm;
1447 struct nlattr *dst_attr;
1448 struct neigh_table *tbl;
1449 struct net_device *dev = NULL;
1450 int err = -EINVAL;
1452 if (nlmsg_len(nlh) < sizeof(*ndm))
1453 goto out;
1455 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1456 if (dst_attr == NULL)
1457 goto out;
1459 ndm = nlmsg_data(nlh);
1460 if (ndm->ndm_ifindex) {
1461 dev = dev_get_by_index(ndm->ndm_ifindex);
1462 if (dev == NULL) {
1463 err = -ENODEV;
1464 goto out;
1468 read_lock(&neigh_tbl_lock);
1469 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1470 struct neighbour *neigh;
1472 if (tbl->family != ndm->ndm_family)
1473 continue;
1474 read_unlock(&neigh_tbl_lock);
1476 if (nla_len(dst_attr) < tbl->key_len)
1477 goto out_dev_put;
1479 if (ndm->ndm_flags & NTF_PROXY) {
1480 err = pneigh_delete(tbl, nla_data(dst_attr), dev);
1481 goto out_dev_put;
1484 if (dev == NULL)
1485 goto out_dev_put;
1487 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1488 if (neigh == NULL) {
1489 err = -ENOENT;
1490 goto out_dev_put;
1493 err = neigh_update(neigh, NULL, NUD_FAILED,
1494 NEIGH_UPDATE_F_OVERRIDE |
1495 NEIGH_UPDATE_F_ADMIN);
1496 neigh_release(neigh);
1497 goto out_dev_put;
1499 read_unlock(&neigh_tbl_lock);
1500 err = -EAFNOSUPPORT;
1502 out_dev_put:
1503 if (dev)
1504 dev_put(dev);
1505 out:
1506 return err;
1509 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1511 struct ndmsg *ndm;
1512 struct nlattr *tb[NDA_MAX+1];
1513 struct neigh_table *tbl;
1514 struct net_device *dev = NULL;
1515 int err;
1517 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1518 if (err < 0)
1519 goto out;
1521 err = -EINVAL;
1522 if (tb[NDA_DST] == NULL)
1523 goto out;
1525 ndm = nlmsg_data(nlh);
1526 if (ndm->ndm_ifindex) {
1527 dev = dev_get_by_index(ndm->ndm_ifindex);
1528 if (dev == NULL) {
1529 err = -ENODEV;
1530 goto out;
1533 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1534 goto out_dev_put;
1537 read_lock(&neigh_tbl_lock);
1538 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1539 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1540 struct neighbour *neigh;
1541 void *dst, *lladdr;
1543 if (tbl->family != ndm->ndm_family)
1544 continue;
1545 read_unlock(&neigh_tbl_lock);
1547 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1548 goto out_dev_put;
1549 dst = nla_data(tb[NDA_DST]);
1550 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1552 if (ndm->ndm_flags & NTF_PROXY) {
1553 struct pneigh_entry *pn;
1555 err = -ENOBUFS;
1556 pn = pneigh_lookup(tbl, dst, dev, 1);
1557 if (pn) {
1558 pn->flags = ndm->ndm_flags;
1559 err = 0;
1561 goto out_dev_put;
1564 if (dev == NULL)
1565 goto out_dev_put;
1567 neigh = neigh_lookup(tbl, dst, dev);
1568 if (neigh == NULL) {
1569 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1570 err = -ENOENT;
1571 goto out_dev_put;
1574 neigh = __neigh_lookup_errno(tbl, dst, dev);
1575 if (IS_ERR(neigh)) {
1576 err = PTR_ERR(neigh);
1577 goto out_dev_put;
1579 } else {
1580 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1581 err = -EEXIST;
1582 neigh_release(neigh);
1583 goto out_dev_put;
1586 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1587 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1590 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1591 neigh_release(neigh);
1592 goto out_dev_put;
1595 read_unlock(&neigh_tbl_lock);
1596 err = -EAFNOSUPPORT;
1598 out_dev_put:
1599 if (dev)
1600 dev_put(dev);
1601 out:
1602 return err;
1605 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1607 struct nlattr *nest;
1609 nest = nla_nest_start(skb, NDTA_PARMS);
1610 if (nest == NULL)
1611 return -ENOBUFS;
1613 if (parms->dev)
1614 NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1616 NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1617 NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
1618 NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1619 NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1620 NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1621 NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1622 NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1623 NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1624 parms->base_reachable_time);
1625 NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1626 NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1627 NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1628 NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1629 NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1630 NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1632 return nla_nest_end(skb, nest);
1634 nla_put_failure:
1635 return nla_nest_cancel(skb, nest);
1638 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1639 u32 pid, u32 seq, int type, int flags)
1641 struct nlmsghdr *nlh;
1642 struct ndtmsg *ndtmsg;
1644 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1645 if (nlh == NULL)
1646 return -EMSGSIZE;
1648 ndtmsg = nlmsg_data(nlh);
1650 read_lock_bh(&tbl->lock);
1651 ndtmsg->ndtm_family = tbl->family;
1652 ndtmsg->ndtm_pad1 = 0;
1653 ndtmsg->ndtm_pad2 = 0;
1655 NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1656 NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1657 NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1658 NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1659 NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1662 unsigned long now = jiffies;
1663 unsigned int flush_delta = now - tbl->last_flush;
1664 unsigned int rand_delta = now - tbl->last_rand;
1666 struct ndt_config ndc = {
1667 .ndtc_key_len = tbl->key_len,
1668 .ndtc_entry_size = tbl->entry_size,
1669 .ndtc_entries = atomic_read(&tbl->entries),
1670 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
1671 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
1672 .ndtc_hash_rnd = tbl->hash_rnd,
1673 .ndtc_hash_mask = tbl->hash_mask,
1674 .ndtc_hash_chain_gc = tbl->hash_chain_gc,
1675 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
1678 NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1682 int cpu;
1683 struct ndt_stats ndst;
1685 memset(&ndst, 0, sizeof(ndst));
1687 for_each_possible_cpu(cpu) {
1688 struct neigh_statistics *st;
1690 st = per_cpu_ptr(tbl->stats, cpu);
1691 ndst.ndts_allocs += st->allocs;
1692 ndst.ndts_destroys += st->destroys;
1693 ndst.ndts_hash_grows += st->hash_grows;
1694 ndst.ndts_res_failed += st->res_failed;
1695 ndst.ndts_lookups += st->lookups;
1696 ndst.ndts_hits += st->hits;
1697 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
1698 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
1699 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
1700 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
1703 NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1706 BUG_ON(tbl->parms.dev);
1707 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1708 goto nla_put_failure;
1710 read_unlock_bh(&tbl->lock);
1711 return nlmsg_end(skb, nlh);
1713 nla_put_failure:
1714 read_unlock_bh(&tbl->lock);
1715 nlmsg_cancel(skb, nlh);
1716 return -EMSGSIZE;
1719 static int neightbl_fill_param_info(struct sk_buff *skb,
1720 struct neigh_table *tbl,
1721 struct neigh_parms *parms,
1722 u32 pid, u32 seq, int type,
1723 unsigned int flags)
1725 struct ndtmsg *ndtmsg;
1726 struct nlmsghdr *nlh;
1728 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1729 if (nlh == NULL)
1730 return -EMSGSIZE;
1732 ndtmsg = nlmsg_data(nlh);
1734 read_lock_bh(&tbl->lock);
1735 ndtmsg->ndtm_family = tbl->family;
1736 ndtmsg->ndtm_pad1 = 0;
1737 ndtmsg->ndtm_pad2 = 0;
1739 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1740 neightbl_fill_parms(skb, parms) < 0)
1741 goto errout;
1743 read_unlock_bh(&tbl->lock);
1744 return nlmsg_end(skb, nlh);
1745 errout:
1746 read_unlock_bh(&tbl->lock);
1747 nlmsg_cancel(skb, nlh);
1748 return -EMSGSIZE;
1751 static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
1752 int ifindex)
1754 struct neigh_parms *p;
1756 for (p = &tbl->parms; p; p = p->next)
1757 if ((p->dev && p->dev->ifindex == ifindex) ||
1758 (!p->dev && !ifindex))
1759 return p;
1761 return NULL;
1764 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1765 [NDTA_NAME] = { .type = NLA_STRING },
1766 [NDTA_THRESH1] = { .type = NLA_U32 },
1767 [NDTA_THRESH2] = { .type = NLA_U32 },
1768 [NDTA_THRESH3] = { .type = NLA_U32 },
1769 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
1770 [NDTA_PARMS] = { .type = NLA_NESTED },
1773 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1774 [NDTPA_IFINDEX] = { .type = NLA_U32 },
1775 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
1776 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
1777 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
1778 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
1779 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
1780 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
1781 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
1782 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
1783 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
1784 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
1785 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
1786 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
1789 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1791 struct neigh_table *tbl;
1792 struct ndtmsg *ndtmsg;
1793 struct nlattr *tb[NDTA_MAX+1];
1794 int err;
1796 err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1797 nl_neightbl_policy);
1798 if (err < 0)
1799 goto errout;
1801 if (tb[NDTA_NAME] == NULL) {
1802 err = -EINVAL;
1803 goto errout;
1806 ndtmsg = nlmsg_data(nlh);
1807 read_lock(&neigh_tbl_lock);
1808 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1809 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1810 continue;
1812 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1813 break;
1816 if (tbl == NULL) {
1817 err = -ENOENT;
1818 goto errout_locked;
1822 * We acquire tbl->lock to be nice to the periodic timers and
1823 * make sure they always see a consistent set of values.
1825 write_lock_bh(&tbl->lock);
1827 if (tb[NDTA_PARMS]) {
1828 struct nlattr *tbp[NDTPA_MAX+1];
1829 struct neigh_parms *p;
1830 int i, ifindex = 0;
1832 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1833 nl_ntbl_parm_policy);
1834 if (err < 0)
1835 goto errout_tbl_lock;
1837 if (tbp[NDTPA_IFINDEX])
1838 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1840 p = lookup_neigh_params(tbl, ifindex);
1841 if (p == NULL) {
1842 err = -ENOENT;
1843 goto errout_tbl_lock;
1846 for (i = 1; i <= NDTPA_MAX; i++) {
1847 if (tbp[i] == NULL)
1848 continue;
1850 switch (i) {
1851 case NDTPA_QUEUE_LEN:
1852 p->queue_len = nla_get_u32(tbp[i]);
1853 break;
1854 case NDTPA_PROXY_QLEN:
1855 p->proxy_qlen = nla_get_u32(tbp[i]);
1856 break;
1857 case NDTPA_APP_PROBES:
1858 p->app_probes = nla_get_u32(tbp[i]);
1859 break;
1860 case NDTPA_UCAST_PROBES:
1861 p->ucast_probes = nla_get_u32(tbp[i]);
1862 break;
1863 case NDTPA_MCAST_PROBES:
1864 p->mcast_probes = nla_get_u32(tbp[i]);
1865 break;
1866 case NDTPA_BASE_REACHABLE_TIME:
1867 p->base_reachable_time = nla_get_msecs(tbp[i]);
1868 break;
1869 case NDTPA_GC_STALETIME:
1870 p->gc_staletime = nla_get_msecs(tbp[i]);
1871 break;
1872 case NDTPA_DELAY_PROBE_TIME:
1873 p->delay_probe_time = nla_get_msecs(tbp[i]);
1874 break;
1875 case NDTPA_RETRANS_TIME:
1876 p->retrans_time = nla_get_msecs(tbp[i]);
1877 break;
1878 case NDTPA_ANYCAST_DELAY:
1879 p->anycast_delay = nla_get_msecs(tbp[i]);
1880 break;
1881 case NDTPA_PROXY_DELAY:
1882 p->proxy_delay = nla_get_msecs(tbp[i]);
1883 break;
1884 case NDTPA_LOCKTIME:
1885 p->locktime = nla_get_msecs(tbp[i]);
1886 break;
1891 if (tb[NDTA_THRESH1])
1892 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
1894 if (tb[NDTA_THRESH2])
1895 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
1897 if (tb[NDTA_THRESH3])
1898 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
1900 if (tb[NDTA_GC_INTERVAL])
1901 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
1903 err = 0;
1905 errout_tbl_lock:
1906 write_unlock_bh(&tbl->lock);
1907 errout_locked:
1908 read_unlock(&neigh_tbl_lock);
1909 errout:
1910 return err;
1913 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
1915 int family, tidx, nidx = 0;
1916 int tbl_skip = cb->args[0];
1917 int neigh_skip = cb->args[1];
1918 struct neigh_table *tbl;
1920 family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
1922 read_lock(&neigh_tbl_lock);
1923 for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
1924 struct neigh_parms *p;
1926 if (tidx < tbl_skip || (family && tbl->family != family))
1927 continue;
1929 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
1930 cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
1931 NLM_F_MULTI) <= 0)
1932 break;
1934 for (nidx = 0, p = tbl->parms.next; p; p = p->next, nidx++) {
1935 if (nidx < neigh_skip)
1936 continue;
1938 if (neightbl_fill_param_info(skb, tbl, p,
1939 NETLINK_CB(cb->skb).pid,
1940 cb->nlh->nlmsg_seq,
1941 RTM_NEWNEIGHTBL,
1942 NLM_F_MULTI) <= 0)
1943 goto out;
1946 neigh_skip = 0;
1948 out:
1949 read_unlock(&neigh_tbl_lock);
1950 cb->args[0] = tidx;
1951 cb->args[1] = nidx;
1953 return skb->len;
1956 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
1957 u32 pid, u32 seq, int type, unsigned int flags)
1959 unsigned long now = jiffies;
1960 struct nda_cacheinfo ci;
1961 struct nlmsghdr *nlh;
1962 struct ndmsg *ndm;
1964 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
1965 if (nlh == NULL)
1966 return -EMSGSIZE;
1968 ndm = nlmsg_data(nlh);
1969 ndm->ndm_family = neigh->ops->family;
1970 ndm->ndm_pad1 = 0;
1971 ndm->ndm_pad2 = 0;
1972 ndm->ndm_flags = neigh->flags;
1973 ndm->ndm_type = neigh->type;
1974 ndm->ndm_ifindex = neigh->dev->ifindex;
1976 NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
1978 read_lock_bh(&neigh->lock);
1979 ndm->ndm_state = neigh->nud_state;
1980 if ((neigh->nud_state & NUD_VALID) &&
1981 nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) {
1982 read_unlock_bh(&neigh->lock);
1983 goto nla_put_failure;
1986 ci.ndm_used = now - neigh->used;
1987 ci.ndm_confirmed = now - neigh->confirmed;
1988 ci.ndm_updated = now - neigh->updated;
1989 ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1;
1990 read_unlock_bh(&neigh->lock);
1992 NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
1993 NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
1995 return nlmsg_end(skb, nlh);
1997 nla_put_failure:
1998 nlmsg_cancel(skb, nlh);
1999 return -EMSGSIZE;
2003 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2004 struct netlink_callback *cb)
2006 struct neighbour *n;
2007 int rc, h, s_h = cb->args[1];
2008 int idx, s_idx = idx = cb->args[2];
2010 read_lock_bh(&tbl->lock);
2011 for (h = 0; h <= tbl->hash_mask; h++) {
2012 if (h < s_h)
2013 continue;
2014 if (h > s_h)
2015 s_idx = 0;
2016 for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) {
2017 if (idx < s_idx)
2018 continue;
2019 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2020 cb->nlh->nlmsg_seq,
2021 RTM_NEWNEIGH,
2022 NLM_F_MULTI) <= 0) {
2023 read_unlock_bh(&tbl->lock);
2024 rc = -1;
2025 goto out;
2029 read_unlock_bh(&tbl->lock);
2030 rc = skb->len;
2031 out:
2032 cb->args[1] = h;
2033 cb->args[2] = idx;
2034 return rc;
2037 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2039 struct neigh_table *tbl;
2040 int t, family, s_t;
2042 read_lock(&neigh_tbl_lock);
2043 family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2044 s_t = cb->args[0];
2046 for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
2047 if (t < s_t || (family && tbl->family != family))
2048 continue;
2049 if (t > s_t)
2050 memset(&cb->args[1], 0, sizeof(cb->args) -
2051 sizeof(cb->args[0]));
2052 if (neigh_dump_table(tbl, skb, cb) < 0)
2053 break;
2055 read_unlock(&neigh_tbl_lock);
2057 cb->args[0] = t;
2058 return skb->len;
2061 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2063 int chain;
2065 read_lock_bh(&tbl->lock);
2066 for (chain = 0; chain <= tbl->hash_mask; chain++) {
2067 struct neighbour *n;
2069 for (n = tbl->hash_buckets[chain]; n; n = n->next)
2070 cb(n, cookie);
2072 read_unlock_bh(&tbl->lock);
2074 EXPORT_SYMBOL(neigh_for_each);
2076 /* The tbl->lock must be held as a writer and BH disabled. */
2077 void __neigh_for_each_release(struct neigh_table *tbl,
2078 int (*cb)(struct neighbour *))
2080 int chain;
2082 for (chain = 0; chain <= tbl->hash_mask; chain++) {
2083 struct neighbour *n, **np;
2085 np = &tbl->hash_buckets[chain];
2086 while ((n = *np) != NULL) {
2087 int release;
2089 write_lock(&n->lock);
2090 release = cb(n);
2091 if (release) {
2092 *np = n->next;
2093 n->dead = 1;
2094 } else
2095 np = &n->next;
2096 write_unlock(&n->lock);
2097 if (release) {
2098 if (n->parms->neigh_cleanup)
2099 n->parms->neigh_cleanup(n);
2100 neigh_release(n);
2105 EXPORT_SYMBOL(__neigh_for_each_release);
2107 #ifdef CONFIG_PROC_FS
2109 static struct neighbour *neigh_get_first(struct seq_file *seq)
2111 struct neigh_seq_state *state = seq->private;
2112 struct neigh_table *tbl = state->tbl;
2113 struct neighbour *n = NULL;
2114 int bucket = state->bucket;
2116 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2117 for (bucket = 0; bucket <= tbl->hash_mask; bucket++) {
2118 n = tbl->hash_buckets[bucket];
2120 while (n) {
2121 if (state->neigh_sub_iter) {
2122 loff_t fakep = 0;
2123 void *v;
2125 v = state->neigh_sub_iter(state, n, &fakep);
2126 if (!v)
2127 goto next;
2129 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2130 break;
2131 if (n->nud_state & ~NUD_NOARP)
2132 break;
2133 next:
2134 n = n->next;
2137 if (n)
2138 break;
2140 state->bucket = bucket;
2142 return n;
2145 static struct neighbour *neigh_get_next(struct seq_file *seq,
2146 struct neighbour *n,
2147 loff_t *pos)
2149 struct neigh_seq_state *state = seq->private;
2150 struct neigh_table *tbl = state->tbl;
2152 if (state->neigh_sub_iter) {
2153 void *v = state->neigh_sub_iter(state, n, pos);
2154 if (v)
2155 return n;
2157 n = n->next;
2159 while (1) {
2160 while (n) {
2161 if (state->neigh_sub_iter) {
2162 void *v = state->neigh_sub_iter(state, n, pos);
2163 if (v)
2164 return n;
2165 goto next;
2167 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2168 break;
2170 if (n->nud_state & ~NUD_NOARP)
2171 break;
2172 next:
2173 n = n->next;
2176 if (n)
2177 break;
2179 if (++state->bucket > tbl->hash_mask)
2180 break;
2182 n = tbl->hash_buckets[state->bucket];
2185 if (n && pos)
2186 --(*pos);
2187 return n;
2190 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2192 struct neighbour *n = neigh_get_first(seq);
2194 if (n) {
2195 while (*pos) {
2196 n = neigh_get_next(seq, n, pos);
2197 if (!n)
2198 break;
2201 return *pos ? NULL : n;
2204 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2206 struct neigh_seq_state *state = seq->private;
2207 struct neigh_table *tbl = state->tbl;
2208 struct pneigh_entry *pn = NULL;
2209 int bucket = state->bucket;
2211 state->flags |= NEIGH_SEQ_IS_PNEIGH;
2212 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2213 pn = tbl->phash_buckets[bucket];
2214 if (pn)
2215 break;
2217 state->bucket = bucket;
2219 return pn;
2222 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2223 struct pneigh_entry *pn,
2224 loff_t *pos)
2226 struct neigh_seq_state *state = seq->private;
2227 struct neigh_table *tbl = state->tbl;
2229 pn = pn->next;
2230 while (!pn) {
2231 if (++state->bucket > PNEIGH_HASHMASK)
2232 break;
2233 pn = tbl->phash_buckets[state->bucket];
2234 if (pn)
2235 break;
2238 if (pn && pos)
2239 --(*pos);
2241 return pn;
2244 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2246 struct pneigh_entry *pn = pneigh_get_first(seq);
2248 if (pn) {
2249 while (*pos) {
2250 pn = pneigh_get_next(seq, pn, pos);
2251 if (!pn)
2252 break;
2255 return *pos ? NULL : pn;
2258 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2260 struct neigh_seq_state *state = seq->private;
2261 void *rc;
2263 rc = neigh_get_idx(seq, pos);
2264 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2265 rc = pneigh_get_idx(seq, pos);
2267 return rc;
2270 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2272 struct neigh_seq_state *state = seq->private;
2273 loff_t pos_minus_one;
2275 state->tbl = tbl;
2276 state->bucket = 0;
2277 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2279 read_lock_bh(&tbl->lock);
2281 pos_minus_one = *pos - 1;
2282 return *pos ? neigh_get_idx_any(seq, &pos_minus_one) : SEQ_START_TOKEN;
2284 EXPORT_SYMBOL(neigh_seq_start);
2286 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2288 struct neigh_seq_state *state;
2289 void *rc;
2291 if (v == SEQ_START_TOKEN) {
2292 rc = neigh_get_idx(seq, pos);
2293 goto out;
2296 state = seq->private;
2297 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2298 rc = neigh_get_next(seq, v, NULL);
2299 if (rc)
2300 goto out;
2301 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2302 rc = pneigh_get_first(seq);
2303 } else {
2304 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2305 rc = pneigh_get_next(seq, v, NULL);
2307 out:
2308 ++(*pos);
2309 return rc;
2311 EXPORT_SYMBOL(neigh_seq_next);
2313 void neigh_seq_stop(struct seq_file *seq, void *v)
2315 struct neigh_seq_state *state = seq->private;
2316 struct neigh_table *tbl = state->tbl;
2318 read_unlock_bh(&tbl->lock);
2320 EXPORT_SYMBOL(neigh_seq_stop);
2322 /* statistics via seq_file */
2324 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2326 struct proc_dir_entry *pde = seq->private;
2327 struct neigh_table *tbl = pde->data;
2328 int cpu;
2330 if (*pos == 0)
2331 return SEQ_START_TOKEN;
2333 for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
2334 if (!cpu_possible(cpu))
2335 continue;
2336 *pos = cpu+1;
2337 return per_cpu_ptr(tbl->stats, cpu);
2339 return NULL;
2342 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2344 struct proc_dir_entry *pde = seq->private;
2345 struct neigh_table *tbl = pde->data;
2346 int cpu;
2348 for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
2349 if (!cpu_possible(cpu))
2350 continue;
2351 *pos = cpu+1;
2352 return per_cpu_ptr(tbl->stats, cpu);
2354 return NULL;
2357 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2362 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2364 struct proc_dir_entry *pde = seq->private;
2365 struct neigh_table *tbl = pde->data;
2366 struct neigh_statistics *st = v;
2368 if (v == SEQ_START_TOKEN) {
2369 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs\n");
2370 return 0;
2373 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
2374 "%08lx %08lx %08lx %08lx\n",
2375 atomic_read(&tbl->entries),
2377 st->allocs,
2378 st->destroys,
2379 st->hash_grows,
2381 st->lookups,
2382 st->hits,
2384 st->res_failed,
2386 st->rcv_probes_mcast,
2387 st->rcv_probes_ucast,
2389 st->periodic_gc_runs,
2390 st->forced_gc_runs
2393 return 0;
2396 static const struct seq_operations neigh_stat_seq_ops = {
2397 .start = neigh_stat_seq_start,
2398 .next = neigh_stat_seq_next,
2399 .stop = neigh_stat_seq_stop,
2400 .show = neigh_stat_seq_show,
2403 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2405 int ret = seq_open(file, &neigh_stat_seq_ops);
2407 if (!ret) {
2408 struct seq_file *sf = file->private_data;
2409 sf->private = PDE(inode);
2411 return ret;
2414 static const struct file_operations neigh_stat_seq_fops = {
2415 .owner = THIS_MODULE,
2416 .open = neigh_stat_seq_open,
2417 .read = seq_read,
2418 .llseek = seq_lseek,
2419 .release = seq_release,
2422 #endif /* CONFIG_PROC_FS */
2424 #ifdef CONFIG_ARPD
2425 static inline size_t neigh_nlmsg_size(void)
2427 return NLMSG_ALIGN(sizeof(struct ndmsg))
2428 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2429 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2430 + nla_total_size(sizeof(struct nda_cacheinfo))
2431 + nla_total_size(4); /* NDA_PROBES */
2434 static void __neigh_notify(struct neighbour *n, int type, int flags)
2436 struct sk_buff *skb;
2437 int err = -ENOBUFS;
2439 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2440 if (skb == NULL)
2441 goto errout;
2443 err = neigh_fill_info(skb, n, 0, 0, type, flags);
2444 if (err < 0) {
2445 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2446 WARN_ON(err == -EMSGSIZE);
2447 kfree_skb(skb);
2448 goto errout;
2450 err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2451 errout:
2452 if (err < 0)
2453 rtnl_set_sk_err(RTNLGRP_NEIGH, err);
2456 void neigh_app_ns(struct neighbour *n)
2458 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2461 static void neigh_app_notify(struct neighbour *n)
2463 __neigh_notify(n, RTM_NEWNEIGH, 0);
2466 #endif /* CONFIG_ARPD */
2468 #ifdef CONFIG_SYSCTL
2470 static struct neigh_sysctl_table {
2471 struct ctl_table_header *sysctl_header;
2472 ctl_table neigh_vars[__NET_NEIGH_MAX];
2473 ctl_table neigh_dev[2];
2474 ctl_table neigh_neigh_dir[2];
2475 ctl_table neigh_proto_dir[2];
2476 ctl_table neigh_root_dir[2];
2477 } neigh_sysctl_template __read_mostly = {
2478 .neigh_vars = {
2480 .ctl_name = NET_NEIGH_MCAST_SOLICIT,
2481 .procname = "mcast_solicit",
2482 .maxlen = sizeof(int),
2483 .mode = 0644,
2484 .proc_handler = &proc_dointvec,
2487 .ctl_name = NET_NEIGH_UCAST_SOLICIT,
2488 .procname = "ucast_solicit",
2489 .maxlen = sizeof(int),
2490 .mode = 0644,
2491 .proc_handler = &proc_dointvec,
2494 .ctl_name = NET_NEIGH_APP_SOLICIT,
2495 .procname = "app_solicit",
2496 .maxlen = sizeof(int),
2497 .mode = 0644,
2498 .proc_handler = &proc_dointvec,
2501 .ctl_name = NET_NEIGH_RETRANS_TIME,
2502 .procname = "retrans_time",
2503 .maxlen = sizeof(int),
2504 .mode = 0644,
2505 .proc_handler = &proc_dointvec_userhz_jiffies,
2508 .ctl_name = NET_NEIGH_REACHABLE_TIME,
2509 .procname = "base_reachable_time",
2510 .maxlen = sizeof(int),
2511 .mode = 0644,
2512 .proc_handler = &proc_dointvec_jiffies,
2513 .strategy = &sysctl_jiffies,
2516 .ctl_name = NET_NEIGH_DELAY_PROBE_TIME,
2517 .procname = "delay_first_probe_time",
2518 .maxlen = sizeof(int),
2519 .mode = 0644,
2520 .proc_handler = &proc_dointvec_jiffies,
2521 .strategy = &sysctl_jiffies,
2524 .ctl_name = NET_NEIGH_GC_STALE_TIME,
2525 .procname = "gc_stale_time",
2526 .maxlen = sizeof(int),
2527 .mode = 0644,
2528 .proc_handler = &proc_dointvec_jiffies,
2529 .strategy = &sysctl_jiffies,
2532 .ctl_name = NET_NEIGH_UNRES_QLEN,
2533 .procname = "unres_qlen",
2534 .maxlen = sizeof(int),
2535 .mode = 0644,
2536 .proc_handler = &proc_dointvec,
2539 .ctl_name = NET_NEIGH_PROXY_QLEN,
2540 .procname = "proxy_qlen",
2541 .maxlen = sizeof(int),
2542 .mode = 0644,
2543 .proc_handler = &proc_dointvec,
2546 .ctl_name = NET_NEIGH_ANYCAST_DELAY,
2547 .procname = "anycast_delay",
2548 .maxlen = sizeof(int),
2549 .mode = 0644,
2550 .proc_handler = &proc_dointvec_userhz_jiffies,
2553 .ctl_name = NET_NEIGH_PROXY_DELAY,
2554 .procname = "proxy_delay",
2555 .maxlen = sizeof(int),
2556 .mode = 0644,
2557 .proc_handler = &proc_dointvec_userhz_jiffies,
2560 .ctl_name = NET_NEIGH_LOCKTIME,
2561 .procname = "locktime",
2562 .maxlen = sizeof(int),
2563 .mode = 0644,
2564 .proc_handler = &proc_dointvec_userhz_jiffies,
2567 .ctl_name = NET_NEIGH_GC_INTERVAL,
2568 .procname = "gc_interval",
2569 .maxlen = sizeof(int),
2570 .mode = 0644,
2571 .proc_handler = &proc_dointvec_jiffies,
2572 .strategy = &sysctl_jiffies,
2575 .ctl_name = NET_NEIGH_GC_THRESH1,
2576 .procname = "gc_thresh1",
2577 .maxlen = sizeof(int),
2578 .mode = 0644,
2579 .proc_handler = &proc_dointvec,
2582 .ctl_name = NET_NEIGH_GC_THRESH2,
2583 .procname = "gc_thresh2",
2584 .maxlen = sizeof(int),
2585 .mode = 0644,
2586 .proc_handler = &proc_dointvec,
2589 .ctl_name = NET_NEIGH_GC_THRESH3,
2590 .procname = "gc_thresh3",
2591 .maxlen = sizeof(int),
2592 .mode = 0644,
2593 .proc_handler = &proc_dointvec,
2596 .ctl_name = NET_NEIGH_RETRANS_TIME_MS,
2597 .procname = "retrans_time_ms",
2598 .maxlen = sizeof(int),
2599 .mode = 0644,
2600 .proc_handler = &proc_dointvec_ms_jiffies,
2601 .strategy = &sysctl_ms_jiffies,
2604 .ctl_name = NET_NEIGH_REACHABLE_TIME_MS,
2605 .procname = "base_reachable_time_ms",
2606 .maxlen = sizeof(int),
2607 .mode = 0644,
2608 .proc_handler = &proc_dointvec_ms_jiffies,
2609 .strategy = &sysctl_ms_jiffies,
2612 .neigh_dev = {
2614 .ctl_name = NET_PROTO_CONF_DEFAULT,
2615 .procname = "default",
2616 .mode = 0555,
2619 .neigh_neigh_dir = {
2621 .procname = "neigh",
2622 .mode = 0555,
2625 .neigh_proto_dir = {
2627 .mode = 0555,
2630 .neigh_root_dir = {
2632 .ctl_name = CTL_NET,
2633 .procname = "net",
2634 .mode = 0555,
2639 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2640 int p_id, int pdev_id, char *p_name,
2641 proc_handler *handler, ctl_handler *strategy)
2643 struct neigh_sysctl_table *t = kmemdup(&neigh_sysctl_template,
2644 sizeof(*t), GFP_KERNEL);
2645 const char *dev_name_source = NULL;
2646 char *dev_name = NULL;
2647 int err = 0;
2649 if (!t)
2650 return -ENOBUFS;
2651 t->neigh_vars[0].data = &p->mcast_probes;
2652 t->neigh_vars[1].data = &p->ucast_probes;
2653 t->neigh_vars[2].data = &p->app_probes;
2654 t->neigh_vars[3].data = &p->retrans_time;
2655 t->neigh_vars[4].data = &p->base_reachable_time;
2656 t->neigh_vars[5].data = &p->delay_probe_time;
2657 t->neigh_vars[6].data = &p->gc_staletime;
2658 t->neigh_vars[7].data = &p->queue_len;
2659 t->neigh_vars[8].data = &p->proxy_qlen;
2660 t->neigh_vars[9].data = &p->anycast_delay;
2661 t->neigh_vars[10].data = &p->proxy_delay;
2662 t->neigh_vars[11].data = &p->locktime;
2664 if (dev) {
2665 dev_name_source = dev->name;
2666 t->neigh_dev[0].ctl_name = dev->ifindex;
2667 t->neigh_vars[12].procname = NULL;
2668 t->neigh_vars[13].procname = NULL;
2669 t->neigh_vars[14].procname = NULL;
2670 t->neigh_vars[15].procname = NULL;
2671 } else {
2672 dev_name_source = t->neigh_dev[0].procname;
2673 t->neigh_vars[12].data = (int *)(p + 1);
2674 t->neigh_vars[13].data = (int *)(p + 1) + 1;
2675 t->neigh_vars[14].data = (int *)(p + 1) + 2;
2676 t->neigh_vars[15].data = (int *)(p + 1) + 3;
2679 t->neigh_vars[16].data = &p->retrans_time;
2680 t->neigh_vars[17].data = &p->base_reachable_time;
2682 if (handler || strategy) {
2683 /* RetransTime */
2684 t->neigh_vars[3].proc_handler = handler;
2685 t->neigh_vars[3].strategy = strategy;
2686 t->neigh_vars[3].extra1 = dev;
2687 /* ReachableTime */
2688 t->neigh_vars[4].proc_handler = handler;
2689 t->neigh_vars[4].strategy = strategy;
2690 t->neigh_vars[4].extra1 = dev;
2691 /* RetransTime (in milliseconds)*/
2692 t->neigh_vars[16].proc_handler = handler;
2693 t->neigh_vars[16].strategy = strategy;
2694 t->neigh_vars[16].extra1 = dev;
2695 /* ReachableTime (in milliseconds) */
2696 t->neigh_vars[17].proc_handler = handler;
2697 t->neigh_vars[17].strategy = strategy;
2698 t->neigh_vars[17].extra1 = dev;
2701 dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2702 if (!dev_name) {
2703 err = -ENOBUFS;
2704 goto free;
2707 t->neigh_dev[0].procname = dev_name;
2709 t->neigh_neigh_dir[0].ctl_name = pdev_id;
2711 t->neigh_proto_dir[0].procname = p_name;
2712 t->neigh_proto_dir[0].ctl_name = p_id;
2714 t->neigh_dev[0].child = t->neigh_vars;
2715 t->neigh_neigh_dir[0].child = t->neigh_dev;
2716 t->neigh_proto_dir[0].child = t->neigh_neigh_dir;
2717 t->neigh_root_dir[0].child = t->neigh_proto_dir;
2719 t->sysctl_header = register_sysctl_table(t->neigh_root_dir);
2720 if (!t->sysctl_header) {
2721 err = -ENOBUFS;
2722 goto free_procname;
2724 p->sysctl_table = t;
2725 return 0;
2727 /* error path */
2728 free_procname:
2729 kfree(dev_name);
2730 free:
2731 kfree(t);
2733 return err;
2736 void neigh_sysctl_unregister(struct neigh_parms *p)
2738 if (p->sysctl_table) {
2739 struct neigh_sysctl_table *t = p->sysctl_table;
2740 p->sysctl_table = NULL;
2741 unregister_sysctl_table(t->sysctl_header);
2742 kfree(t->neigh_dev[0].procname);
2743 kfree(t);
2747 #endif /* CONFIG_SYSCTL */
2749 static int __init neigh_init(void)
2751 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL);
2752 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL);
2753 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info);
2755 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info);
2756 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL);
2758 return 0;
2761 subsys_initcall(neigh_init);
2763 EXPORT_SYMBOL(__neigh_event_send);
2764 EXPORT_SYMBOL(neigh_changeaddr);
2765 EXPORT_SYMBOL(neigh_compat_output);
2766 EXPORT_SYMBOL(neigh_connected_output);
2767 EXPORT_SYMBOL(neigh_create);
2768 EXPORT_SYMBOL(neigh_destroy);
2769 EXPORT_SYMBOL(neigh_event_ns);
2770 EXPORT_SYMBOL(neigh_ifdown);
2771 EXPORT_SYMBOL(neigh_lookup);
2772 EXPORT_SYMBOL(neigh_lookup_nodev);
2773 EXPORT_SYMBOL(neigh_parms_alloc);
2774 EXPORT_SYMBOL(neigh_parms_release);
2775 EXPORT_SYMBOL(neigh_rand_reach_time);
2776 EXPORT_SYMBOL(neigh_resolve_output);
2777 EXPORT_SYMBOL(neigh_table_clear);
2778 EXPORT_SYMBOL(neigh_table_init);
2779 EXPORT_SYMBOL(neigh_table_init_no_netlink);
2780 EXPORT_SYMBOL(neigh_update);
2781 EXPORT_SYMBOL(pneigh_enqueue);
2782 EXPORT_SYMBOL(pneigh_lookup);
2784 #ifdef CONFIG_ARPD
2785 EXPORT_SYMBOL(neigh_app_ns);
2786 #endif
2787 #ifdef CONFIG_SYSCTL
2788 EXPORT_SYMBOL(neigh_sysctl_register);
2789 EXPORT_SYMBOL(neigh_sysctl_unregister);
2790 #endif