[NETFILTER]: nf_conntrack: properly use RCU API for nf_ct_protos/nf_ct_l3protos arrays
[linux-2.6/zen-sources.git] / net / core / neighbour.c
blob512eed91785dc5dbab2de12a4603473dddc6b791
1 /*
2 * Generic address resolution entity
4 * Authors:
5 * Pedro Roque <roque@di.fc.ul.pt>
6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
13 * Fixes:
14 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
15 * Harald Welte Add neighbour cache statistics like rtstat
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/socket.h>
22 #include <linux/sched.h>
23 #include <linux/netdevice.h>
24 #include <linux/proc_fs.h>
25 #ifdef CONFIG_SYSCTL
26 #include <linux/sysctl.h>
27 #endif
28 #include <linux/times.h>
29 #include <net/neighbour.h>
30 #include <net/dst.h>
31 #include <net/sock.h>
32 #include <net/netevent.h>
33 #include <net/netlink.h>
34 #include <linux/rtnetlink.h>
35 #include <linux/random.h>
36 #include <linux/string.h>
38 #define NEIGH_DEBUG 1
40 #define NEIGH_PRINTK(x...) printk(x)
41 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
42 #define NEIGH_PRINTK0 NEIGH_PRINTK
43 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
44 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
46 #if NEIGH_DEBUG >= 1
47 #undef NEIGH_PRINTK1
48 #define NEIGH_PRINTK1 NEIGH_PRINTK
49 #endif
50 #if NEIGH_DEBUG >= 2
51 #undef NEIGH_PRINTK2
52 #define NEIGH_PRINTK2 NEIGH_PRINTK
53 #endif
55 #define PNEIGH_HASHMASK 0xF
57 static void neigh_timer_handler(unsigned long arg);
58 #ifdef CONFIG_ARPD
59 static void neigh_app_notify(struct neighbour *n);
60 #endif
61 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
62 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev);
64 static struct neigh_table *neigh_tables;
65 #ifdef CONFIG_PROC_FS
66 static const struct file_operations neigh_stat_seq_fops;
67 #endif
70 Neighbour hash table buckets are protected with rwlock tbl->lock.
72 - All the scans/updates to hash buckets MUST be made under this lock.
73 - NOTHING clever should be made under this lock: no callbacks
74 to protocol backends, no attempts to send something to network.
75 It will result in deadlocks, if backend/driver wants to use neighbour
76 cache.
77 - If the entry requires some non-trivial actions, increase
78 its reference count and release table lock.
80 Neighbour entries are protected:
81 - with reference count.
82 - with rwlock neigh->lock
84 Reference count prevents destruction.
86 neigh->lock mainly serializes ll address data and its validity state.
87 However, the same lock is used to protect another entry fields:
88 - timer
89 - resolution queue
91 Again, nothing clever shall be made under neigh->lock,
92 the most complicated procedure, which we allow is dev->hard_header.
93 It is supposed, that dev->hard_header is simplistic and does
94 not make callbacks to neighbour tables.
96 The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
97 list of neighbour tables. This list is used only in process context,
100 static DEFINE_RWLOCK(neigh_tbl_lock);
102 static int neigh_blackhole(struct sk_buff *skb)
104 kfree_skb(skb);
105 return -ENETDOWN;
109 * It is random distribution in the interval (1/2)*base...(3/2)*base.
110 * It corresponds to default IPv6 settings and is not overridable,
111 * because it is really reasonable choice.
114 unsigned long neigh_rand_reach_time(unsigned long base)
116 return (base ? (net_random() % base) + (base >> 1) : 0);
120 static int neigh_forced_gc(struct neigh_table *tbl)
122 int shrunk = 0;
123 int i;
125 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
127 write_lock_bh(&tbl->lock);
128 for (i = 0; i <= tbl->hash_mask; i++) {
129 struct neighbour *n, **np;
131 np = &tbl->hash_buckets[i];
132 while ((n = *np) != NULL) {
133 /* Neighbour record may be discarded if:
134 * - nobody refers to it.
135 * - it is not permanent
137 write_lock(&n->lock);
138 if (atomic_read(&n->refcnt) == 1 &&
139 !(n->nud_state & NUD_PERMANENT)) {
140 *np = n->next;
141 n->dead = 1;
142 shrunk = 1;
143 write_unlock(&n->lock);
144 neigh_release(n);
145 continue;
147 write_unlock(&n->lock);
148 np = &n->next;
152 tbl->last_flush = jiffies;
154 write_unlock_bh(&tbl->lock);
156 return shrunk;
159 static int neigh_del_timer(struct neighbour *n)
161 if ((n->nud_state & NUD_IN_TIMER) &&
162 del_timer(&n->timer)) {
163 neigh_release(n);
164 return 1;
166 return 0;
169 static void pneigh_queue_purge(struct sk_buff_head *list)
171 struct sk_buff *skb;
173 while ((skb = skb_dequeue(list)) != NULL) {
174 dev_put(skb->dev);
175 kfree_skb(skb);
179 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
181 int i;
183 for (i = 0; i <= tbl->hash_mask; i++) {
184 struct neighbour *n, **np = &tbl->hash_buckets[i];
186 while ((n = *np) != NULL) {
187 if (dev && n->dev != dev) {
188 np = &n->next;
189 continue;
191 *np = n->next;
192 write_lock(&n->lock);
193 neigh_del_timer(n);
194 n->dead = 1;
196 if (atomic_read(&n->refcnt) != 1) {
197 /* The most unpleasant situation.
198 We must destroy neighbour entry,
199 but someone still uses it.
201 The destroy will be delayed until
202 the last user releases us, but
203 we must kill timers etc. and move
204 it to safe state.
206 skb_queue_purge(&n->arp_queue);
207 n->output = neigh_blackhole;
208 if (n->nud_state & NUD_VALID)
209 n->nud_state = NUD_NOARP;
210 else
211 n->nud_state = NUD_NONE;
212 NEIGH_PRINTK2("neigh %p is stray.\n", n);
214 write_unlock(&n->lock);
215 neigh_release(n);
220 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
222 write_lock_bh(&tbl->lock);
223 neigh_flush_dev(tbl, dev);
224 write_unlock_bh(&tbl->lock);
227 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
229 write_lock_bh(&tbl->lock);
230 neigh_flush_dev(tbl, dev);
231 pneigh_ifdown(tbl, dev);
232 write_unlock_bh(&tbl->lock);
234 del_timer_sync(&tbl->proxy_timer);
235 pneigh_queue_purge(&tbl->proxy_queue);
236 return 0;
239 static struct neighbour *neigh_alloc(struct neigh_table *tbl)
241 struct neighbour *n = NULL;
242 unsigned long now = jiffies;
243 int entries;
245 entries = atomic_inc_return(&tbl->entries) - 1;
246 if (entries >= tbl->gc_thresh3 ||
247 (entries >= tbl->gc_thresh2 &&
248 time_after(now, tbl->last_flush + 5 * HZ))) {
249 if (!neigh_forced_gc(tbl) &&
250 entries >= tbl->gc_thresh3)
251 goto out_entries;
254 n = kmem_cache_zalloc(tbl->kmem_cachep, GFP_ATOMIC);
255 if (!n)
256 goto out_entries;
258 skb_queue_head_init(&n->arp_queue);
259 rwlock_init(&n->lock);
260 n->updated = n->used = now;
261 n->nud_state = NUD_NONE;
262 n->output = neigh_blackhole;
263 n->parms = neigh_parms_clone(&tbl->parms);
264 init_timer(&n->timer);
265 n->timer.function = neigh_timer_handler;
266 n->timer.data = (unsigned long)n;
268 NEIGH_CACHE_STAT_INC(tbl, allocs);
269 n->tbl = tbl;
270 atomic_set(&n->refcnt, 1);
271 n->dead = 1;
272 out:
273 return n;
275 out_entries:
276 atomic_dec(&tbl->entries);
277 goto out;
280 static struct neighbour **neigh_hash_alloc(unsigned int entries)
282 unsigned long size = entries * sizeof(struct neighbour *);
283 struct neighbour **ret;
285 if (size <= PAGE_SIZE) {
286 ret = kzalloc(size, GFP_ATOMIC);
287 } else {
288 ret = (struct neighbour **)
289 __get_free_pages(GFP_ATOMIC|__GFP_ZERO, get_order(size));
291 return ret;
294 static void neigh_hash_free(struct neighbour **hash, unsigned int entries)
296 unsigned long size = entries * sizeof(struct neighbour *);
298 if (size <= PAGE_SIZE)
299 kfree(hash);
300 else
301 free_pages((unsigned long)hash, get_order(size));
304 static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries)
306 struct neighbour **new_hash, **old_hash;
307 unsigned int i, new_hash_mask, old_entries;
309 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
311 BUG_ON(new_entries & (new_entries - 1));
312 new_hash = neigh_hash_alloc(new_entries);
313 if (!new_hash)
314 return;
316 old_entries = tbl->hash_mask + 1;
317 new_hash_mask = new_entries - 1;
318 old_hash = tbl->hash_buckets;
320 get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
321 for (i = 0; i < old_entries; i++) {
322 struct neighbour *n, *next;
324 for (n = old_hash[i]; n; n = next) {
325 unsigned int hash_val = tbl->hash(n->primary_key, n->dev);
327 hash_val &= new_hash_mask;
328 next = n->next;
330 n->next = new_hash[hash_val];
331 new_hash[hash_val] = n;
334 tbl->hash_buckets = new_hash;
335 tbl->hash_mask = new_hash_mask;
337 neigh_hash_free(old_hash, old_entries);
340 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
341 struct net_device *dev)
343 struct neighbour *n;
344 int key_len = tbl->key_len;
345 u32 hash_val = tbl->hash(pkey, dev);
347 NEIGH_CACHE_STAT_INC(tbl, lookups);
349 read_lock_bh(&tbl->lock);
350 for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
351 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
352 neigh_hold(n);
353 NEIGH_CACHE_STAT_INC(tbl, hits);
354 break;
357 read_unlock_bh(&tbl->lock);
358 return n;
361 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey)
363 struct neighbour *n;
364 int key_len = tbl->key_len;
365 u32 hash_val = tbl->hash(pkey, NULL);
367 NEIGH_CACHE_STAT_INC(tbl, lookups);
369 read_lock_bh(&tbl->lock);
370 for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
371 if (!memcmp(n->primary_key, pkey, key_len)) {
372 neigh_hold(n);
373 NEIGH_CACHE_STAT_INC(tbl, hits);
374 break;
377 read_unlock_bh(&tbl->lock);
378 return n;
381 struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
382 struct net_device *dev)
384 u32 hash_val;
385 int key_len = tbl->key_len;
386 int error;
387 struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
389 if (!n) {
390 rc = ERR_PTR(-ENOBUFS);
391 goto out;
394 memcpy(n->primary_key, pkey, key_len);
395 n->dev = dev;
396 dev_hold(dev);
398 /* Protocol specific setup. */
399 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
400 rc = ERR_PTR(error);
401 goto out_neigh_release;
404 /* Device specific setup. */
405 if (n->parms->neigh_setup &&
406 (error = n->parms->neigh_setup(n)) < 0) {
407 rc = ERR_PTR(error);
408 goto out_neigh_release;
411 n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
413 write_lock_bh(&tbl->lock);
415 if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1))
416 neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1);
418 hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;
420 if (n->parms->dead) {
421 rc = ERR_PTR(-EINVAL);
422 goto out_tbl_unlock;
425 for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {
426 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
427 neigh_hold(n1);
428 rc = n1;
429 goto out_tbl_unlock;
433 n->next = tbl->hash_buckets[hash_val];
434 tbl->hash_buckets[hash_val] = n;
435 n->dead = 0;
436 neigh_hold(n);
437 write_unlock_bh(&tbl->lock);
438 NEIGH_PRINTK2("neigh %p is created.\n", n);
439 rc = n;
440 out:
441 return rc;
442 out_tbl_unlock:
443 write_unlock_bh(&tbl->lock);
444 out_neigh_release:
445 neigh_release(n);
446 goto out;
449 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
450 struct net_device *dev, int creat)
452 struct pneigh_entry *n;
453 int key_len = tbl->key_len;
454 u32 hash_val = *(u32 *)(pkey + key_len - 4);
456 hash_val ^= (hash_val >> 16);
457 hash_val ^= hash_val >> 8;
458 hash_val ^= hash_val >> 4;
459 hash_val &= PNEIGH_HASHMASK;
461 read_lock_bh(&tbl->lock);
463 for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
464 if (!memcmp(n->key, pkey, key_len) &&
465 (n->dev == dev || !n->dev)) {
466 read_unlock_bh(&tbl->lock);
467 goto out;
470 read_unlock_bh(&tbl->lock);
471 n = NULL;
472 if (!creat)
473 goto out;
475 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
476 if (!n)
477 goto out;
479 memcpy(n->key, pkey, key_len);
480 n->dev = dev;
481 if (dev)
482 dev_hold(dev);
484 if (tbl->pconstructor && tbl->pconstructor(n)) {
485 if (dev)
486 dev_put(dev);
487 kfree(n);
488 n = NULL;
489 goto out;
492 write_lock_bh(&tbl->lock);
493 n->next = tbl->phash_buckets[hash_val];
494 tbl->phash_buckets[hash_val] = n;
495 write_unlock_bh(&tbl->lock);
496 out:
497 return n;
501 int pneigh_delete(struct neigh_table *tbl, const void *pkey,
502 struct net_device *dev)
504 struct pneigh_entry *n, **np;
505 int key_len = tbl->key_len;
506 u32 hash_val = *(u32 *)(pkey + key_len - 4);
508 hash_val ^= (hash_val >> 16);
509 hash_val ^= hash_val >> 8;
510 hash_val ^= hash_val >> 4;
511 hash_val &= PNEIGH_HASHMASK;
513 write_lock_bh(&tbl->lock);
514 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
515 np = &n->next) {
516 if (!memcmp(n->key, pkey, key_len) && n->dev == dev) {
517 *np = n->next;
518 write_unlock_bh(&tbl->lock);
519 if (tbl->pdestructor)
520 tbl->pdestructor(n);
521 if (n->dev)
522 dev_put(n->dev);
523 kfree(n);
524 return 0;
527 write_unlock_bh(&tbl->lock);
528 return -ENOENT;
531 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
533 struct pneigh_entry *n, **np;
534 u32 h;
536 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
537 np = &tbl->phash_buckets[h];
538 while ((n = *np) != NULL) {
539 if (!dev || n->dev == dev) {
540 *np = n->next;
541 if (tbl->pdestructor)
542 tbl->pdestructor(n);
543 if (n->dev)
544 dev_put(n->dev);
545 kfree(n);
546 continue;
548 np = &n->next;
551 return -ENOENT;
556 * neighbour must already be out of the table;
559 void neigh_destroy(struct neighbour *neigh)
561 struct hh_cache *hh;
563 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
565 if (!neigh->dead) {
566 printk(KERN_WARNING
567 "Destroying alive neighbour %p\n", neigh);
568 dump_stack();
569 return;
572 if (neigh_del_timer(neigh))
573 printk(KERN_WARNING "Impossible event.\n");
575 while ((hh = neigh->hh) != NULL) {
576 neigh->hh = hh->hh_next;
577 hh->hh_next = NULL;
579 write_seqlock_bh(&hh->hh_lock);
580 hh->hh_output = neigh_blackhole;
581 write_sequnlock_bh(&hh->hh_lock);
582 if (atomic_dec_and_test(&hh->hh_refcnt))
583 kfree(hh);
586 if (neigh->parms->neigh_destructor)
587 (neigh->parms->neigh_destructor)(neigh);
589 skb_queue_purge(&neigh->arp_queue);
591 dev_put(neigh->dev);
592 neigh_parms_put(neigh->parms);
594 NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
596 atomic_dec(&neigh->tbl->entries);
597 kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
600 /* Neighbour state is suspicious;
601 disable fast path.
603 Called with write_locked neigh.
605 static void neigh_suspect(struct neighbour *neigh)
607 struct hh_cache *hh;
609 NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
611 neigh->output = neigh->ops->output;
613 for (hh = neigh->hh; hh; hh = hh->hh_next)
614 hh->hh_output = neigh->ops->output;
617 /* Neighbour state is OK;
618 enable fast path.
620 Called with write_locked neigh.
622 static void neigh_connect(struct neighbour *neigh)
624 struct hh_cache *hh;
626 NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
628 neigh->output = neigh->ops->connected_output;
630 for (hh = neigh->hh; hh; hh = hh->hh_next)
631 hh->hh_output = neigh->ops->hh_output;
634 static void neigh_periodic_timer(unsigned long arg)
636 struct neigh_table *tbl = (struct neigh_table *)arg;
637 struct neighbour *n, **np;
638 unsigned long expire, now = jiffies;
640 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
642 write_lock(&tbl->lock);
645 * periodically recompute ReachableTime from random function
648 if (time_after(now, tbl->last_rand + 300 * HZ)) {
649 struct neigh_parms *p;
650 tbl->last_rand = now;
651 for (p = &tbl->parms; p; p = p->next)
652 p->reachable_time =
653 neigh_rand_reach_time(p->base_reachable_time);
656 np = &tbl->hash_buckets[tbl->hash_chain_gc];
657 tbl->hash_chain_gc = ((tbl->hash_chain_gc + 1) & tbl->hash_mask);
659 while ((n = *np) != NULL) {
660 unsigned int state;
662 write_lock(&n->lock);
664 state = n->nud_state;
665 if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
666 write_unlock(&n->lock);
667 goto next_elt;
670 if (time_before(n->used, n->confirmed))
671 n->used = n->confirmed;
673 if (atomic_read(&n->refcnt) == 1 &&
674 (state == NUD_FAILED ||
675 time_after(now, n->used + n->parms->gc_staletime))) {
676 *np = n->next;
677 n->dead = 1;
678 write_unlock(&n->lock);
679 neigh_release(n);
680 continue;
682 write_unlock(&n->lock);
684 next_elt:
685 np = &n->next;
688 /* Cycle through all hash buckets every base_reachable_time/2 ticks.
689 * ARP entry timeouts range from 1/2 base_reachable_time to 3/2
690 * base_reachable_time.
692 expire = tbl->parms.base_reachable_time >> 1;
693 expire /= (tbl->hash_mask + 1);
694 if (!expire)
695 expire = 1;
697 if (expire>HZ)
698 mod_timer(&tbl->gc_timer, round_jiffies(now + expire));
699 else
700 mod_timer(&tbl->gc_timer, now + expire);
702 write_unlock(&tbl->lock);
705 static __inline__ int neigh_max_probes(struct neighbour *n)
707 struct neigh_parms *p = n->parms;
708 return (n->nud_state & NUD_PROBE ?
709 p->ucast_probes :
710 p->ucast_probes + p->app_probes + p->mcast_probes);
713 static inline void neigh_add_timer(struct neighbour *n, unsigned long when)
715 if (unlikely(mod_timer(&n->timer, when))) {
716 printk("NEIGH: BUG, double timer add, state is %x\n",
717 n->nud_state);
718 dump_stack();
722 /* Called when a timer expires for a neighbour entry. */
724 static void neigh_timer_handler(unsigned long arg)
726 unsigned long now, next;
727 struct neighbour *neigh = (struct neighbour *)arg;
728 unsigned state;
729 int notify = 0;
731 write_lock(&neigh->lock);
733 state = neigh->nud_state;
734 now = jiffies;
735 next = now + HZ;
737 if (!(state & NUD_IN_TIMER)) {
738 #ifndef CONFIG_SMP
739 printk(KERN_WARNING "neigh: timer & !nud_in_timer\n");
740 #endif
741 goto out;
744 if (state & NUD_REACHABLE) {
745 if (time_before_eq(now,
746 neigh->confirmed + neigh->parms->reachable_time)) {
747 NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
748 next = neigh->confirmed + neigh->parms->reachable_time;
749 } else if (time_before_eq(now,
750 neigh->used + neigh->parms->delay_probe_time)) {
751 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
752 neigh->nud_state = NUD_DELAY;
753 neigh->updated = jiffies;
754 neigh_suspect(neigh);
755 next = now + neigh->parms->delay_probe_time;
756 } else {
757 NEIGH_PRINTK2("neigh %p is suspected.\n", neigh);
758 neigh->nud_state = NUD_STALE;
759 neigh->updated = jiffies;
760 neigh_suspect(neigh);
761 notify = 1;
763 } else if (state & NUD_DELAY) {
764 if (time_before_eq(now,
765 neigh->confirmed + neigh->parms->delay_probe_time)) {
766 NEIGH_PRINTK2("neigh %p is now reachable.\n", neigh);
767 neigh->nud_state = NUD_REACHABLE;
768 neigh->updated = jiffies;
769 neigh_connect(neigh);
770 notify = 1;
771 next = neigh->confirmed + neigh->parms->reachable_time;
772 } else {
773 NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
774 neigh->nud_state = NUD_PROBE;
775 neigh->updated = jiffies;
776 atomic_set(&neigh->probes, 0);
777 next = now + neigh->parms->retrans_time;
779 } else {
780 /* NUD_PROBE|NUD_INCOMPLETE */
781 next = now + neigh->parms->retrans_time;
784 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
785 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
786 struct sk_buff *skb;
788 neigh->nud_state = NUD_FAILED;
789 neigh->updated = jiffies;
790 notify = 1;
791 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
792 NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
794 /* It is very thin place. report_unreachable is very complicated
795 routine. Particularly, it can hit the same neighbour entry!
797 So that, we try to be accurate and avoid dead loop. --ANK
799 while (neigh->nud_state == NUD_FAILED &&
800 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
801 write_unlock(&neigh->lock);
802 neigh->ops->error_report(neigh, skb);
803 write_lock(&neigh->lock);
805 skb_queue_purge(&neigh->arp_queue);
808 if (neigh->nud_state & NUD_IN_TIMER) {
809 if (time_before(next, jiffies + HZ/2))
810 next = jiffies + HZ/2;
811 if (!mod_timer(&neigh->timer, next))
812 neigh_hold(neigh);
814 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
815 struct sk_buff *skb = skb_peek(&neigh->arp_queue);
816 /* keep skb alive even if arp_queue overflows */
817 if (skb)
818 skb_get(skb);
819 write_unlock(&neigh->lock);
820 neigh->ops->solicit(neigh, skb);
821 atomic_inc(&neigh->probes);
822 if (skb)
823 kfree_skb(skb);
824 } else {
825 out:
826 write_unlock(&neigh->lock);
828 if (notify)
829 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
831 #ifdef CONFIG_ARPD
832 if (notify && neigh->parms->app_probes)
833 neigh_app_notify(neigh);
834 #endif
835 neigh_release(neigh);
838 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
840 int rc;
841 unsigned long now;
843 write_lock_bh(&neigh->lock);
845 rc = 0;
846 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
847 goto out_unlock_bh;
849 now = jiffies;
851 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
852 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
853 atomic_set(&neigh->probes, neigh->parms->ucast_probes);
854 neigh->nud_state = NUD_INCOMPLETE;
855 neigh->updated = jiffies;
856 neigh_hold(neigh);
857 neigh_add_timer(neigh, now + 1);
858 } else {
859 neigh->nud_state = NUD_FAILED;
860 neigh->updated = jiffies;
861 write_unlock_bh(&neigh->lock);
863 if (skb)
864 kfree_skb(skb);
865 return 1;
867 } else if (neigh->nud_state & NUD_STALE) {
868 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
869 neigh_hold(neigh);
870 neigh->nud_state = NUD_DELAY;
871 neigh->updated = jiffies;
872 neigh_add_timer(neigh,
873 jiffies + neigh->parms->delay_probe_time);
876 if (neigh->nud_state == NUD_INCOMPLETE) {
877 if (skb) {
878 if (skb_queue_len(&neigh->arp_queue) >=
879 neigh->parms->queue_len) {
880 struct sk_buff *buff;
881 buff = neigh->arp_queue.next;
882 __skb_unlink(buff, &neigh->arp_queue);
883 kfree_skb(buff);
885 __skb_queue_tail(&neigh->arp_queue, skb);
887 rc = 1;
889 out_unlock_bh:
890 write_unlock_bh(&neigh->lock);
891 return rc;
894 static void neigh_update_hhs(struct neighbour *neigh)
896 struct hh_cache *hh;
897 void (*update)(struct hh_cache*, struct net_device*, unsigned char *) =
898 neigh->dev->header_cache_update;
900 if (update) {
901 for (hh = neigh->hh; hh; hh = hh->hh_next) {
902 write_seqlock_bh(&hh->hh_lock);
903 update(hh, neigh->dev, neigh->ha);
904 write_sequnlock_bh(&hh->hh_lock);
911 /* Generic update routine.
912 -- lladdr is new lladdr or NULL, if it is not supplied.
913 -- new is new state.
914 -- flags
915 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
916 if it is different.
917 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
918 lladdr instead of overriding it
919 if it is different.
920 It also allows to retain current state
921 if lladdr is unchanged.
922 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
924 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
925 NTF_ROUTER flag.
926 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
927 a router.
929 Caller MUST hold reference count on the entry.
932 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
933 u32 flags)
935 u8 old;
936 int err;
937 int notify = 0;
938 struct net_device *dev;
939 int update_isrouter = 0;
941 write_lock_bh(&neigh->lock);
943 dev = neigh->dev;
944 old = neigh->nud_state;
945 err = -EPERM;
947 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
948 (old & (NUD_NOARP | NUD_PERMANENT)))
949 goto out;
951 if (!(new & NUD_VALID)) {
952 neigh_del_timer(neigh);
953 if (old & NUD_CONNECTED)
954 neigh_suspect(neigh);
955 neigh->nud_state = new;
956 err = 0;
957 notify = old & NUD_VALID;
958 goto out;
961 /* Compare new lladdr with cached one */
962 if (!dev->addr_len) {
963 /* First case: device needs no address. */
964 lladdr = neigh->ha;
965 } else if (lladdr) {
966 /* The second case: if something is already cached
967 and a new address is proposed:
968 - compare new & old
969 - if they are different, check override flag
971 if ((old & NUD_VALID) &&
972 !memcmp(lladdr, neigh->ha, dev->addr_len))
973 lladdr = neigh->ha;
974 } else {
975 /* No address is supplied; if we know something,
976 use it, otherwise discard the request.
978 err = -EINVAL;
979 if (!(old & NUD_VALID))
980 goto out;
981 lladdr = neigh->ha;
984 if (new & NUD_CONNECTED)
985 neigh->confirmed = jiffies;
986 neigh->updated = jiffies;
988 /* If entry was valid and address is not changed,
989 do not change entry state, if new one is STALE.
991 err = 0;
992 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
993 if (old & NUD_VALID) {
994 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
995 update_isrouter = 0;
996 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
997 (old & NUD_CONNECTED)) {
998 lladdr = neigh->ha;
999 new = NUD_STALE;
1000 } else
1001 goto out;
1002 } else {
1003 if (lladdr == neigh->ha && new == NUD_STALE &&
1004 ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1005 (old & NUD_CONNECTED))
1007 new = old;
1011 if (new != old) {
1012 neigh_del_timer(neigh);
1013 if (new & NUD_IN_TIMER) {
1014 neigh_hold(neigh);
1015 neigh_add_timer(neigh, (jiffies +
1016 ((new & NUD_REACHABLE) ?
1017 neigh->parms->reachable_time :
1018 0)));
1020 neigh->nud_state = new;
1023 if (lladdr != neigh->ha) {
1024 memcpy(&neigh->ha, lladdr, dev->addr_len);
1025 neigh_update_hhs(neigh);
1026 if (!(new & NUD_CONNECTED))
1027 neigh->confirmed = jiffies -
1028 (neigh->parms->base_reachable_time << 1);
1029 notify = 1;
1031 if (new == old)
1032 goto out;
1033 if (new & NUD_CONNECTED)
1034 neigh_connect(neigh);
1035 else
1036 neigh_suspect(neigh);
1037 if (!(old & NUD_VALID)) {
1038 struct sk_buff *skb;
1040 /* Again: avoid dead loop if something went wrong */
1042 while (neigh->nud_state & NUD_VALID &&
1043 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1044 struct neighbour *n1 = neigh;
1045 write_unlock_bh(&neigh->lock);
1046 /* On shaper/eql skb->dst->neighbour != neigh :( */
1047 if (skb->dst && skb->dst->neighbour)
1048 n1 = skb->dst->neighbour;
1049 n1->output(skb);
1050 write_lock_bh(&neigh->lock);
1052 skb_queue_purge(&neigh->arp_queue);
1054 out:
1055 if (update_isrouter) {
1056 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1057 (neigh->flags | NTF_ROUTER) :
1058 (neigh->flags & ~NTF_ROUTER);
1060 write_unlock_bh(&neigh->lock);
1062 if (notify)
1063 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
1064 #ifdef CONFIG_ARPD
1065 if (notify && neigh->parms->app_probes)
1066 neigh_app_notify(neigh);
1067 #endif
1068 return err;
1071 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1072 u8 *lladdr, void *saddr,
1073 struct net_device *dev)
1075 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1076 lladdr || !dev->addr_len);
1077 if (neigh)
1078 neigh_update(neigh, lladdr, NUD_STALE,
1079 NEIGH_UPDATE_F_OVERRIDE);
1080 return neigh;
1083 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
1084 __be16 protocol)
1086 struct hh_cache *hh;
1087 struct net_device *dev = dst->dev;
1089 for (hh = n->hh; hh; hh = hh->hh_next)
1090 if (hh->hh_type == protocol)
1091 break;
1093 if (!hh && (hh = kzalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
1094 seqlock_init(&hh->hh_lock);
1095 hh->hh_type = protocol;
1096 atomic_set(&hh->hh_refcnt, 0);
1097 hh->hh_next = NULL;
1098 if (dev->hard_header_cache(n, hh)) {
1099 kfree(hh);
1100 hh = NULL;
1101 } else {
1102 atomic_inc(&hh->hh_refcnt);
1103 hh->hh_next = n->hh;
1104 n->hh = hh;
1105 if (n->nud_state & NUD_CONNECTED)
1106 hh->hh_output = n->ops->hh_output;
1107 else
1108 hh->hh_output = n->ops->output;
1111 if (hh) {
1112 atomic_inc(&hh->hh_refcnt);
1113 dst->hh = hh;
1117 /* This function can be used in contexts, where only old dev_queue_xmit
1118 worked, f.e. if you want to override normal output path (eql, shaper),
1119 but resolution is not made yet.
1122 int neigh_compat_output(struct sk_buff *skb)
1124 struct net_device *dev = skb->dev;
1126 __skb_pull(skb, skb->nh.raw - skb->data);
1128 if (dev->hard_header &&
1129 dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1130 skb->len) < 0 &&
1131 dev->rebuild_header(skb))
1132 return 0;
1134 return dev_queue_xmit(skb);
1137 /* Slow and careful. */
1139 int neigh_resolve_output(struct sk_buff *skb)
1141 struct dst_entry *dst = skb->dst;
1142 struct neighbour *neigh;
1143 int rc = 0;
1145 if (!dst || !(neigh = dst->neighbour))
1146 goto discard;
1148 __skb_pull(skb, skb->nh.raw - skb->data);
1150 if (!neigh_event_send(neigh, skb)) {
1151 int err;
1152 struct net_device *dev = neigh->dev;
1153 if (dev->hard_header_cache && !dst->hh) {
1154 write_lock_bh(&neigh->lock);
1155 if (!dst->hh)
1156 neigh_hh_init(neigh, dst, dst->ops->protocol);
1157 err = dev->hard_header(skb, dev, ntohs(skb->protocol),
1158 neigh->ha, NULL, skb->len);
1159 write_unlock_bh(&neigh->lock);
1160 } else {
1161 read_lock_bh(&neigh->lock);
1162 err = dev->hard_header(skb, dev, ntohs(skb->protocol),
1163 neigh->ha, NULL, skb->len);
1164 read_unlock_bh(&neigh->lock);
1166 if (err >= 0)
1167 rc = neigh->ops->queue_xmit(skb);
1168 else
1169 goto out_kfree_skb;
1171 out:
1172 return rc;
1173 discard:
1174 NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n",
1175 dst, dst ? dst->neighbour : NULL);
1176 out_kfree_skb:
1177 rc = -EINVAL;
1178 kfree_skb(skb);
1179 goto out;
1182 /* As fast as possible without hh cache */
1184 int neigh_connected_output(struct sk_buff *skb)
1186 int err;
1187 struct dst_entry *dst = skb->dst;
1188 struct neighbour *neigh = dst->neighbour;
1189 struct net_device *dev = neigh->dev;
1191 __skb_pull(skb, skb->nh.raw - skb->data);
1193 read_lock_bh(&neigh->lock);
1194 err = dev->hard_header(skb, dev, ntohs(skb->protocol),
1195 neigh->ha, NULL, skb->len);
1196 read_unlock_bh(&neigh->lock);
1197 if (err >= 0)
1198 err = neigh->ops->queue_xmit(skb);
1199 else {
1200 err = -EINVAL;
1201 kfree_skb(skb);
1203 return err;
1206 static void neigh_proxy_process(unsigned long arg)
1208 struct neigh_table *tbl = (struct neigh_table *)arg;
1209 long sched_next = 0;
1210 unsigned long now = jiffies;
1211 struct sk_buff *skb;
1213 spin_lock(&tbl->proxy_queue.lock);
1215 skb = tbl->proxy_queue.next;
1217 while (skb != (struct sk_buff *)&tbl->proxy_queue) {
1218 struct sk_buff *back = skb;
1219 long tdif = NEIGH_CB(back)->sched_next - now;
1221 skb = skb->next;
1222 if (tdif <= 0) {
1223 struct net_device *dev = back->dev;
1224 __skb_unlink(back, &tbl->proxy_queue);
1225 if (tbl->proxy_redo && netif_running(dev))
1226 tbl->proxy_redo(back);
1227 else
1228 kfree_skb(back);
1230 dev_put(dev);
1231 } else if (!sched_next || tdif < sched_next)
1232 sched_next = tdif;
1234 del_timer(&tbl->proxy_timer);
1235 if (sched_next)
1236 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1237 spin_unlock(&tbl->proxy_queue.lock);
1240 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1241 struct sk_buff *skb)
1243 unsigned long now = jiffies;
1244 unsigned long sched_next = now + (net_random() % p->proxy_delay);
1246 if (tbl->proxy_queue.qlen > p->proxy_qlen) {
1247 kfree_skb(skb);
1248 return;
1251 NEIGH_CB(skb)->sched_next = sched_next;
1252 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1254 spin_lock(&tbl->proxy_queue.lock);
1255 if (del_timer(&tbl->proxy_timer)) {
1256 if (time_before(tbl->proxy_timer.expires, sched_next))
1257 sched_next = tbl->proxy_timer.expires;
1259 dst_release(skb->dst);
1260 skb->dst = NULL;
1261 dev_hold(skb->dev);
1262 __skb_queue_tail(&tbl->proxy_queue, skb);
1263 mod_timer(&tbl->proxy_timer, sched_next);
1264 spin_unlock(&tbl->proxy_queue.lock);
1268 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1269 struct neigh_table *tbl)
1271 struct neigh_parms *p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1273 if (p) {
1274 p->tbl = tbl;
1275 atomic_set(&p->refcnt, 1);
1276 INIT_RCU_HEAD(&p->rcu_head);
1277 p->reachable_time =
1278 neigh_rand_reach_time(p->base_reachable_time);
1279 if (dev) {
1280 if (dev->neigh_setup && dev->neigh_setup(dev, p)) {
1281 kfree(p);
1282 return NULL;
1285 dev_hold(dev);
1286 p->dev = dev;
1288 p->sysctl_table = NULL;
1289 write_lock_bh(&tbl->lock);
1290 p->next = tbl->parms.next;
1291 tbl->parms.next = p;
1292 write_unlock_bh(&tbl->lock);
1294 return p;
1297 static void neigh_rcu_free_parms(struct rcu_head *head)
1299 struct neigh_parms *parms =
1300 container_of(head, struct neigh_parms, rcu_head);
1302 neigh_parms_put(parms);
1305 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1307 struct neigh_parms **p;
1309 if (!parms || parms == &tbl->parms)
1310 return;
1311 write_lock_bh(&tbl->lock);
1312 for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1313 if (*p == parms) {
1314 *p = parms->next;
1315 parms->dead = 1;
1316 write_unlock_bh(&tbl->lock);
1317 if (parms->dev)
1318 dev_put(parms->dev);
1319 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1320 return;
1323 write_unlock_bh(&tbl->lock);
1324 NEIGH_PRINTK1("neigh_parms_release: not found\n");
1327 void neigh_parms_destroy(struct neigh_parms *parms)
1329 kfree(parms);
1332 void neigh_table_init_no_netlink(struct neigh_table *tbl)
1334 unsigned long now = jiffies;
1335 unsigned long phsize;
1337 atomic_set(&tbl->parms.refcnt, 1);
1338 INIT_RCU_HEAD(&tbl->parms.rcu_head);
1339 tbl->parms.reachable_time =
1340 neigh_rand_reach_time(tbl->parms.base_reachable_time);
1342 if (!tbl->kmem_cachep)
1343 tbl->kmem_cachep =
1344 kmem_cache_create(tbl->id, tbl->entry_size, 0,
1345 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1346 NULL, NULL);
1347 tbl->stats = alloc_percpu(struct neigh_statistics);
1348 if (!tbl->stats)
1349 panic("cannot create neighbour cache statistics");
1351 #ifdef CONFIG_PROC_FS
1352 tbl->pde = create_proc_entry(tbl->id, 0, proc_net_stat);
1353 if (!tbl->pde)
1354 panic("cannot create neighbour proc dir entry");
1355 tbl->pde->proc_fops = &neigh_stat_seq_fops;
1356 tbl->pde->data = tbl;
1357 #endif
1359 tbl->hash_mask = 1;
1360 tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1);
1362 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1363 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1365 if (!tbl->hash_buckets || !tbl->phash_buckets)
1366 panic("cannot allocate neighbour cache hashes");
1368 get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
1370 rwlock_init(&tbl->lock);
1371 init_timer(&tbl->gc_timer);
1372 tbl->gc_timer.data = (unsigned long)tbl;
1373 tbl->gc_timer.function = neigh_periodic_timer;
1374 tbl->gc_timer.expires = now + 1;
1375 add_timer(&tbl->gc_timer);
1377 init_timer(&tbl->proxy_timer);
1378 tbl->proxy_timer.data = (unsigned long)tbl;
1379 tbl->proxy_timer.function = neigh_proxy_process;
1380 skb_queue_head_init(&tbl->proxy_queue);
1382 tbl->last_flush = now;
1383 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1386 void neigh_table_init(struct neigh_table *tbl)
1388 struct neigh_table *tmp;
1390 neigh_table_init_no_netlink(tbl);
1391 write_lock(&neigh_tbl_lock);
1392 for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1393 if (tmp->family == tbl->family)
1394 break;
1396 tbl->next = neigh_tables;
1397 neigh_tables = tbl;
1398 write_unlock(&neigh_tbl_lock);
1400 if (unlikely(tmp)) {
1401 printk(KERN_ERR "NEIGH: Registering multiple tables for "
1402 "family %d\n", tbl->family);
1403 dump_stack();
1407 int neigh_table_clear(struct neigh_table *tbl)
1409 struct neigh_table **tp;
1411 /* It is not clean... Fix it to unload IPv6 module safely */
1412 del_timer_sync(&tbl->gc_timer);
1413 del_timer_sync(&tbl->proxy_timer);
1414 pneigh_queue_purge(&tbl->proxy_queue);
1415 neigh_ifdown(tbl, NULL);
1416 if (atomic_read(&tbl->entries))
1417 printk(KERN_CRIT "neighbour leakage\n");
1418 write_lock(&neigh_tbl_lock);
1419 for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1420 if (*tp == tbl) {
1421 *tp = tbl->next;
1422 break;
1425 write_unlock(&neigh_tbl_lock);
1427 neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1);
1428 tbl->hash_buckets = NULL;
1430 kfree(tbl->phash_buckets);
1431 tbl->phash_buckets = NULL;
1433 free_percpu(tbl->stats);
1434 tbl->stats = NULL;
1436 return 0;
1439 int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1441 struct ndmsg *ndm;
1442 struct nlattr *dst_attr;
1443 struct neigh_table *tbl;
1444 struct net_device *dev = NULL;
1445 int err = -EINVAL;
1447 if (nlmsg_len(nlh) < sizeof(*ndm))
1448 goto out;
1450 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1451 if (dst_attr == NULL)
1452 goto out;
1454 ndm = nlmsg_data(nlh);
1455 if (ndm->ndm_ifindex) {
1456 dev = dev_get_by_index(ndm->ndm_ifindex);
1457 if (dev == NULL) {
1458 err = -ENODEV;
1459 goto out;
1463 read_lock(&neigh_tbl_lock);
1464 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1465 struct neighbour *neigh;
1467 if (tbl->family != ndm->ndm_family)
1468 continue;
1469 read_unlock(&neigh_tbl_lock);
1471 if (nla_len(dst_attr) < tbl->key_len)
1472 goto out_dev_put;
1474 if (ndm->ndm_flags & NTF_PROXY) {
1475 err = pneigh_delete(tbl, nla_data(dst_attr), dev);
1476 goto out_dev_put;
1479 if (dev == NULL)
1480 goto out_dev_put;
1482 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1483 if (neigh == NULL) {
1484 err = -ENOENT;
1485 goto out_dev_put;
1488 err = neigh_update(neigh, NULL, NUD_FAILED,
1489 NEIGH_UPDATE_F_OVERRIDE |
1490 NEIGH_UPDATE_F_ADMIN);
1491 neigh_release(neigh);
1492 goto out_dev_put;
1494 read_unlock(&neigh_tbl_lock);
1495 err = -EAFNOSUPPORT;
1497 out_dev_put:
1498 if (dev)
1499 dev_put(dev);
1500 out:
1501 return err;
1504 int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1506 struct ndmsg *ndm;
1507 struct nlattr *tb[NDA_MAX+1];
1508 struct neigh_table *tbl;
1509 struct net_device *dev = NULL;
1510 int err;
1512 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1513 if (err < 0)
1514 goto out;
1516 err = -EINVAL;
1517 if (tb[NDA_DST] == NULL)
1518 goto out;
1520 ndm = nlmsg_data(nlh);
1521 if (ndm->ndm_ifindex) {
1522 dev = dev_get_by_index(ndm->ndm_ifindex);
1523 if (dev == NULL) {
1524 err = -ENODEV;
1525 goto out;
1528 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1529 goto out_dev_put;
1532 read_lock(&neigh_tbl_lock);
1533 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1534 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1535 struct neighbour *neigh;
1536 void *dst, *lladdr;
1538 if (tbl->family != ndm->ndm_family)
1539 continue;
1540 read_unlock(&neigh_tbl_lock);
1542 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1543 goto out_dev_put;
1544 dst = nla_data(tb[NDA_DST]);
1545 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1547 if (ndm->ndm_flags & NTF_PROXY) {
1548 struct pneigh_entry *pn;
1550 err = -ENOBUFS;
1551 pn = pneigh_lookup(tbl, dst, dev, 1);
1552 if (pn) {
1553 pn->flags = ndm->ndm_flags;
1554 err = 0;
1556 goto out_dev_put;
1559 if (dev == NULL)
1560 goto out_dev_put;
1562 neigh = neigh_lookup(tbl, dst, dev);
1563 if (neigh == NULL) {
1564 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1565 err = -ENOENT;
1566 goto out_dev_put;
1569 neigh = __neigh_lookup_errno(tbl, dst, dev);
1570 if (IS_ERR(neigh)) {
1571 err = PTR_ERR(neigh);
1572 goto out_dev_put;
1574 } else {
1575 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1576 err = -EEXIST;
1577 neigh_release(neigh);
1578 goto out_dev_put;
1581 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1582 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1585 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1586 neigh_release(neigh);
1587 goto out_dev_put;
1590 read_unlock(&neigh_tbl_lock);
1591 err = -EAFNOSUPPORT;
1593 out_dev_put:
1594 if (dev)
1595 dev_put(dev);
1596 out:
1597 return err;
1600 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1602 struct nlattr *nest;
1604 nest = nla_nest_start(skb, NDTA_PARMS);
1605 if (nest == NULL)
1606 return -ENOBUFS;
1608 if (parms->dev)
1609 NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
1611 NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
1612 NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
1613 NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
1614 NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
1615 NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
1616 NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes);
1617 NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time);
1618 NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME,
1619 parms->base_reachable_time);
1620 NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime);
1621 NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time);
1622 NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time);
1623 NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay);
1624 NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay);
1625 NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime);
1627 return nla_nest_end(skb, nest);
1629 nla_put_failure:
1630 return nla_nest_cancel(skb, nest);
1633 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1634 u32 pid, u32 seq, int type, int flags)
1636 struct nlmsghdr *nlh;
1637 struct ndtmsg *ndtmsg;
1639 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1640 if (nlh == NULL)
1641 return -EMSGSIZE;
1643 ndtmsg = nlmsg_data(nlh);
1645 read_lock_bh(&tbl->lock);
1646 ndtmsg->ndtm_family = tbl->family;
1647 ndtmsg->ndtm_pad1 = 0;
1648 ndtmsg->ndtm_pad2 = 0;
1650 NLA_PUT_STRING(skb, NDTA_NAME, tbl->id);
1651 NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval);
1652 NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1);
1653 NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2);
1654 NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3);
1657 unsigned long now = jiffies;
1658 unsigned int flush_delta = now - tbl->last_flush;
1659 unsigned int rand_delta = now - tbl->last_rand;
1661 struct ndt_config ndc = {
1662 .ndtc_key_len = tbl->key_len,
1663 .ndtc_entry_size = tbl->entry_size,
1664 .ndtc_entries = atomic_read(&tbl->entries),
1665 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
1666 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
1667 .ndtc_hash_rnd = tbl->hash_rnd,
1668 .ndtc_hash_mask = tbl->hash_mask,
1669 .ndtc_hash_chain_gc = tbl->hash_chain_gc,
1670 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
1673 NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1677 int cpu;
1678 struct ndt_stats ndst;
1680 memset(&ndst, 0, sizeof(ndst));
1682 for_each_possible_cpu(cpu) {
1683 struct neigh_statistics *st;
1685 st = per_cpu_ptr(tbl->stats, cpu);
1686 ndst.ndts_allocs += st->allocs;
1687 ndst.ndts_destroys += st->destroys;
1688 ndst.ndts_hash_grows += st->hash_grows;
1689 ndst.ndts_res_failed += st->res_failed;
1690 ndst.ndts_lookups += st->lookups;
1691 ndst.ndts_hits += st->hits;
1692 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
1693 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
1694 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
1695 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
1698 NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst);
1701 BUG_ON(tbl->parms.dev);
1702 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1703 goto nla_put_failure;
1705 read_unlock_bh(&tbl->lock);
1706 return nlmsg_end(skb, nlh);
1708 nla_put_failure:
1709 read_unlock_bh(&tbl->lock);
1710 nlmsg_cancel(skb, nlh);
1711 return -EMSGSIZE;
1714 static int neightbl_fill_param_info(struct sk_buff *skb,
1715 struct neigh_table *tbl,
1716 struct neigh_parms *parms,
1717 u32 pid, u32 seq, int type,
1718 unsigned int flags)
1720 struct ndtmsg *ndtmsg;
1721 struct nlmsghdr *nlh;
1723 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1724 if (nlh == NULL)
1725 return -EMSGSIZE;
1727 ndtmsg = nlmsg_data(nlh);
1729 read_lock_bh(&tbl->lock);
1730 ndtmsg->ndtm_family = tbl->family;
1731 ndtmsg->ndtm_pad1 = 0;
1732 ndtmsg->ndtm_pad2 = 0;
1734 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1735 neightbl_fill_parms(skb, parms) < 0)
1736 goto errout;
1738 read_unlock_bh(&tbl->lock);
1739 return nlmsg_end(skb, nlh);
1740 errout:
1741 read_unlock_bh(&tbl->lock);
1742 nlmsg_cancel(skb, nlh);
1743 return -EMSGSIZE;
1746 static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
1747 int ifindex)
1749 struct neigh_parms *p;
1751 for (p = &tbl->parms; p; p = p->next)
1752 if ((p->dev && p->dev->ifindex == ifindex) ||
1753 (!p->dev && !ifindex))
1754 return p;
1756 return NULL;
1759 static struct nla_policy nl_neightbl_policy[NDTA_MAX+1] __read_mostly = {
1760 [NDTA_NAME] = { .type = NLA_STRING },
1761 [NDTA_THRESH1] = { .type = NLA_U32 },
1762 [NDTA_THRESH2] = { .type = NLA_U32 },
1763 [NDTA_THRESH3] = { .type = NLA_U32 },
1764 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
1765 [NDTA_PARMS] = { .type = NLA_NESTED },
1768 static struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] __read_mostly = {
1769 [NDTPA_IFINDEX] = { .type = NLA_U32 },
1770 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
1771 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
1772 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
1773 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
1774 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
1775 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
1776 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
1777 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
1778 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
1779 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
1780 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
1781 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
1784 int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1786 struct neigh_table *tbl;
1787 struct ndtmsg *ndtmsg;
1788 struct nlattr *tb[NDTA_MAX+1];
1789 int err;
1791 err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1792 nl_neightbl_policy);
1793 if (err < 0)
1794 goto errout;
1796 if (tb[NDTA_NAME] == NULL) {
1797 err = -EINVAL;
1798 goto errout;
1801 ndtmsg = nlmsg_data(nlh);
1802 read_lock(&neigh_tbl_lock);
1803 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1804 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1805 continue;
1807 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1808 break;
1811 if (tbl == NULL) {
1812 err = -ENOENT;
1813 goto errout_locked;
1817 * We acquire tbl->lock to be nice to the periodic timers and
1818 * make sure they always see a consistent set of values.
1820 write_lock_bh(&tbl->lock);
1822 if (tb[NDTA_PARMS]) {
1823 struct nlattr *tbp[NDTPA_MAX+1];
1824 struct neigh_parms *p;
1825 int i, ifindex = 0;
1827 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
1828 nl_ntbl_parm_policy);
1829 if (err < 0)
1830 goto errout_tbl_lock;
1832 if (tbp[NDTPA_IFINDEX])
1833 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
1835 p = lookup_neigh_params(tbl, ifindex);
1836 if (p == NULL) {
1837 err = -ENOENT;
1838 goto errout_tbl_lock;
1841 for (i = 1; i <= NDTPA_MAX; i++) {
1842 if (tbp[i] == NULL)
1843 continue;
1845 switch (i) {
1846 case NDTPA_QUEUE_LEN:
1847 p->queue_len = nla_get_u32(tbp[i]);
1848 break;
1849 case NDTPA_PROXY_QLEN:
1850 p->proxy_qlen = nla_get_u32(tbp[i]);
1851 break;
1852 case NDTPA_APP_PROBES:
1853 p->app_probes = nla_get_u32(tbp[i]);
1854 break;
1855 case NDTPA_UCAST_PROBES:
1856 p->ucast_probes = nla_get_u32(tbp[i]);
1857 break;
1858 case NDTPA_MCAST_PROBES:
1859 p->mcast_probes = nla_get_u32(tbp[i]);
1860 break;
1861 case NDTPA_BASE_REACHABLE_TIME:
1862 p->base_reachable_time = nla_get_msecs(tbp[i]);
1863 break;
1864 case NDTPA_GC_STALETIME:
1865 p->gc_staletime = nla_get_msecs(tbp[i]);
1866 break;
1867 case NDTPA_DELAY_PROBE_TIME:
1868 p->delay_probe_time = nla_get_msecs(tbp[i]);
1869 break;
1870 case NDTPA_RETRANS_TIME:
1871 p->retrans_time = nla_get_msecs(tbp[i]);
1872 break;
1873 case NDTPA_ANYCAST_DELAY:
1874 p->anycast_delay = nla_get_msecs(tbp[i]);
1875 break;
1876 case NDTPA_PROXY_DELAY:
1877 p->proxy_delay = nla_get_msecs(tbp[i]);
1878 break;
1879 case NDTPA_LOCKTIME:
1880 p->locktime = nla_get_msecs(tbp[i]);
1881 break;
1886 if (tb[NDTA_THRESH1])
1887 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
1889 if (tb[NDTA_THRESH2])
1890 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
1892 if (tb[NDTA_THRESH3])
1893 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
1895 if (tb[NDTA_GC_INTERVAL])
1896 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
1898 err = 0;
1900 errout_tbl_lock:
1901 write_unlock_bh(&tbl->lock);
1902 errout_locked:
1903 read_unlock(&neigh_tbl_lock);
1904 errout:
1905 return err;
1908 int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
1910 int family, tidx, nidx = 0;
1911 int tbl_skip = cb->args[0];
1912 int neigh_skip = cb->args[1];
1913 struct neigh_table *tbl;
1915 family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
1917 read_lock(&neigh_tbl_lock);
1918 for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
1919 struct neigh_parms *p;
1921 if (tidx < tbl_skip || (family && tbl->family != family))
1922 continue;
1924 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid,
1925 cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
1926 NLM_F_MULTI) <= 0)
1927 break;
1929 for (nidx = 0, p = tbl->parms.next; p; p = p->next, nidx++) {
1930 if (nidx < neigh_skip)
1931 continue;
1933 if (neightbl_fill_param_info(skb, tbl, p,
1934 NETLINK_CB(cb->skb).pid,
1935 cb->nlh->nlmsg_seq,
1936 RTM_NEWNEIGHTBL,
1937 NLM_F_MULTI) <= 0)
1938 goto out;
1941 neigh_skip = 0;
1943 out:
1944 read_unlock(&neigh_tbl_lock);
1945 cb->args[0] = tidx;
1946 cb->args[1] = nidx;
1948 return skb->len;
1951 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
1952 u32 pid, u32 seq, int type, unsigned int flags)
1954 unsigned long now = jiffies;
1955 struct nda_cacheinfo ci;
1956 struct nlmsghdr *nlh;
1957 struct ndmsg *ndm;
1959 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
1960 if (nlh == NULL)
1961 return -EMSGSIZE;
1963 ndm = nlmsg_data(nlh);
1964 ndm->ndm_family = neigh->ops->family;
1965 ndm->ndm_pad1 = 0;
1966 ndm->ndm_pad2 = 0;
1967 ndm->ndm_flags = neigh->flags;
1968 ndm->ndm_type = neigh->type;
1969 ndm->ndm_ifindex = neigh->dev->ifindex;
1971 NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key);
1973 read_lock_bh(&neigh->lock);
1974 ndm->ndm_state = neigh->nud_state;
1975 if ((neigh->nud_state & NUD_VALID) &&
1976 nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) {
1977 read_unlock_bh(&neigh->lock);
1978 goto nla_put_failure;
1981 ci.ndm_used = now - neigh->used;
1982 ci.ndm_confirmed = now - neigh->confirmed;
1983 ci.ndm_updated = now - neigh->updated;
1984 ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1;
1985 read_unlock_bh(&neigh->lock);
1987 NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes));
1988 NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
1990 return nlmsg_end(skb, nlh);
1992 nla_put_failure:
1993 nlmsg_cancel(skb, nlh);
1994 return -EMSGSIZE;
1998 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
1999 struct netlink_callback *cb)
2001 struct neighbour *n;
2002 int rc, h, s_h = cb->args[1];
2003 int idx, s_idx = idx = cb->args[2];
2005 read_lock_bh(&tbl->lock);
2006 for (h = 0; h <= tbl->hash_mask; h++) {
2007 if (h < s_h)
2008 continue;
2009 if (h > s_h)
2010 s_idx = 0;
2011 for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) {
2012 if (idx < s_idx)
2013 continue;
2014 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
2015 cb->nlh->nlmsg_seq,
2016 RTM_NEWNEIGH,
2017 NLM_F_MULTI) <= 0) {
2018 read_unlock_bh(&tbl->lock);
2019 rc = -1;
2020 goto out;
2024 read_unlock_bh(&tbl->lock);
2025 rc = skb->len;
2026 out:
2027 cb->args[1] = h;
2028 cb->args[2] = idx;
2029 return rc;
2032 int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2034 struct neigh_table *tbl;
2035 int t, family, s_t;
2037 read_lock(&neigh_tbl_lock);
2038 family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2039 s_t = cb->args[0];
2041 for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) {
2042 if (t < s_t || (family && tbl->family != family))
2043 continue;
2044 if (t > s_t)
2045 memset(&cb->args[1], 0, sizeof(cb->args) -
2046 sizeof(cb->args[0]));
2047 if (neigh_dump_table(tbl, skb, cb) < 0)
2048 break;
2050 read_unlock(&neigh_tbl_lock);
2052 cb->args[0] = t;
2053 return skb->len;
2056 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2058 int chain;
2060 read_lock_bh(&tbl->lock);
2061 for (chain = 0; chain <= tbl->hash_mask; chain++) {
2062 struct neighbour *n;
2064 for (n = tbl->hash_buckets[chain]; n; n = n->next)
2065 cb(n, cookie);
2067 read_unlock_bh(&tbl->lock);
2069 EXPORT_SYMBOL(neigh_for_each);
2071 /* The tbl->lock must be held as a writer and BH disabled. */
2072 void __neigh_for_each_release(struct neigh_table *tbl,
2073 int (*cb)(struct neighbour *))
2075 int chain;
2077 for (chain = 0; chain <= tbl->hash_mask; chain++) {
2078 struct neighbour *n, **np;
2080 np = &tbl->hash_buckets[chain];
2081 while ((n = *np) != NULL) {
2082 int release;
2084 write_lock(&n->lock);
2085 release = cb(n);
2086 if (release) {
2087 *np = n->next;
2088 n->dead = 1;
2089 } else
2090 np = &n->next;
2091 write_unlock(&n->lock);
2092 if (release)
2093 neigh_release(n);
2097 EXPORT_SYMBOL(__neigh_for_each_release);
2099 #ifdef CONFIG_PROC_FS
2101 static struct neighbour *neigh_get_first(struct seq_file *seq)
2103 struct neigh_seq_state *state = seq->private;
2104 struct neigh_table *tbl = state->tbl;
2105 struct neighbour *n = NULL;
2106 int bucket = state->bucket;
2108 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2109 for (bucket = 0; bucket <= tbl->hash_mask; bucket++) {
2110 n = tbl->hash_buckets[bucket];
2112 while (n) {
2113 if (state->neigh_sub_iter) {
2114 loff_t fakep = 0;
2115 void *v;
2117 v = state->neigh_sub_iter(state, n, &fakep);
2118 if (!v)
2119 goto next;
2121 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2122 break;
2123 if (n->nud_state & ~NUD_NOARP)
2124 break;
2125 next:
2126 n = n->next;
2129 if (n)
2130 break;
2132 state->bucket = bucket;
2134 return n;
2137 static struct neighbour *neigh_get_next(struct seq_file *seq,
2138 struct neighbour *n,
2139 loff_t *pos)
2141 struct neigh_seq_state *state = seq->private;
2142 struct neigh_table *tbl = state->tbl;
2144 if (state->neigh_sub_iter) {
2145 void *v = state->neigh_sub_iter(state, n, pos);
2146 if (v)
2147 return n;
2149 n = n->next;
2151 while (1) {
2152 while (n) {
2153 if (state->neigh_sub_iter) {
2154 void *v = state->neigh_sub_iter(state, n, pos);
2155 if (v)
2156 return n;
2157 goto next;
2159 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2160 break;
2162 if (n->nud_state & ~NUD_NOARP)
2163 break;
2164 next:
2165 n = n->next;
2168 if (n)
2169 break;
2171 if (++state->bucket > tbl->hash_mask)
2172 break;
2174 n = tbl->hash_buckets[state->bucket];
2177 if (n && pos)
2178 --(*pos);
2179 return n;
2182 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2184 struct neighbour *n = neigh_get_first(seq);
2186 if (n) {
2187 while (*pos) {
2188 n = neigh_get_next(seq, n, pos);
2189 if (!n)
2190 break;
2193 return *pos ? NULL : n;
2196 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2198 struct neigh_seq_state *state = seq->private;
2199 struct neigh_table *tbl = state->tbl;
2200 struct pneigh_entry *pn = NULL;
2201 int bucket = state->bucket;
2203 state->flags |= NEIGH_SEQ_IS_PNEIGH;
2204 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2205 pn = tbl->phash_buckets[bucket];
2206 if (pn)
2207 break;
2209 state->bucket = bucket;
2211 return pn;
2214 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2215 struct pneigh_entry *pn,
2216 loff_t *pos)
2218 struct neigh_seq_state *state = seq->private;
2219 struct neigh_table *tbl = state->tbl;
2221 pn = pn->next;
2222 while (!pn) {
2223 if (++state->bucket > PNEIGH_HASHMASK)
2224 break;
2225 pn = tbl->phash_buckets[state->bucket];
2226 if (pn)
2227 break;
2230 if (pn && pos)
2231 --(*pos);
2233 return pn;
2236 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2238 struct pneigh_entry *pn = pneigh_get_first(seq);
2240 if (pn) {
2241 while (*pos) {
2242 pn = pneigh_get_next(seq, pn, pos);
2243 if (!pn)
2244 break;
2247 return *pos ? NULL : pn;
2250 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2252 struct neigh_seq_state *state = seq->private;
2253 void *rc;
2255 rc = neigh_get_idx(seq, pos);
2256 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2257 rc = pneigh_get_idx(seq, pos);
2259 return rc;
2262 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2264 struct neigh_seq_state *state = seq->private;
2265 loff_t pos_minus_one;
2267 state->tbl = tbl;
2268 state->bucket = 0;
2269 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2271 read_lock_bh(&tbl->lock);
2273 pos_minus_one = *pos - 1;
2274 return *pos ? neigh_get_idx_any(seq, &pos_minus_one) : SEQ_START_TOKEN;
2276 EXPORT_SYMBOL(neigh_seq_start);
2278 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2280 struct neigh_seq_state *state;
2281 void *rc;
2283 if (v == SEQ_START_TOKEN) {
2284 rc = neigh_get_idx(seq, pos);
2285 goto out;
2288 state = seq->private;
2289 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2290 rc = neigh_get_next(seq, v, NULL);
2291 if (rc)
2292 goto out;
2293 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2294 rc = pneigh_get_first(seq);
2295 } else {
2296 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2297 rc = pneigh_get_next(seq, v, NULL);
2299 out:
2300 ++(*pos);
2301 return rc;
2303 EXPORT_SYMBOL(neigh_seq_next);
2305 void neigh_seq_stop(struct seq_file *seq, void *v)
2307 struct neigh_seq_state *state = seq->private;
2308 struct neigh_table *tbl = state->tbl;
2310 read_unlock_bh(&tbl->lock);
2312 EXPORT_SYMBOL(neigh_seq_stop);
2314 /* statistics via seq_file */
2316 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2318 struct proc_dir_entry *pde = seq->private;
2319 struct neigh_table *tbl = pde->data;
2320 int cpu;
2322 if (*pos == 0)
2323 return SEQ_START_TOKEN;
2325 for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
2326 if (!cpu_possible(cpu))
2327 continue;
2328 *pos = cpu+1;
2329 return per_cpu_ptr(tbl->stats, cpu);
2331 return NULL;
2334 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2336 struct proc_dir_entry *pde = seq->private;
2337 struct neigh_table *tbl = pde->data;
2338 int cpu;
2340 for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
2341 if (!cpu_possible(cpu))
2342 continue;
2343 *pos = cpu+1;
2344 return per_cpu_ptr(tbl->stats, cpu);
2346 return NULL;
2349 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2354 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2356 struct proc_dir_entry *pde = seq->private;
2357 struct neigh_table *tbl = pde->data;
2358 struct neigh_statistics *st = v;
2360 if (v == SEQ_START_TOKEN) {
2361 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs\n");
2362 return 0;
2365 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
2366 "%08lx %08lx %08lx %08lx\n",
2367 atomic_read(&tbl->entries),
2369 st->allocs,
2370 st->destroys,
2371 st->hash_grows,
2373 st->lookups,
2374 st->hits,
2376 st->res_failed,
2378 st->rcv_probes_mcast,
2379 st->rcv_probes_ucast,
2381 st->periodic_gc_runs,
2382 st->forced_gc_runs
2385 return 0;
2388 static struct seq_operations neigh_stat_seq_ops = {
2389 .start = neigh_stat_seq_start,
2390 .next = neigh_stat_seq_next,
2391 .stop = neigh_stat_seq_stop,
2392 .show = neigh_stat_seq_show,
2395 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2397 int ret = seq_open(file, &neigh_stat_seq_ops);
2399 if (!ret) {
2400 struct seq_file *sf = file->private_data;
2401 sf->private = PDE(inode);
2403 return ret;
2406 static const struct file_operations neigh_stat_seq_fops = {
2407 .owner = THIS_MODULE,
2408 .open = neigh_stat_seq_open,
2409 .read = seq_read,
2410 .llseek = seq_lseek,
2411 .release = seq_release,
2414 #endif /* CONFIG_PROC_FS */
2416 #ifdef CONFIG_ARPD
2417 static inline size_t neigh_nlmsg_size(void)
2419 return NLMSG_ALIGN(sizeof(struct ndmsg))
2420 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2421 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2422 + nla_total_size(sizeof(struct nda_cacheinfo))
2423 + nla_total_size(4); /* NDA_PROBES */
2426 static void __neigh_notify(struct neighbour *n, int type, int flags)
2428 struct sk_buff *skb;
2429 int err = -ENOBUFS;
2431 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2432 if (skb == NULL)
2433 goto errout;
2435 err = neigh_fill_info(skb, n, 0, 0, type, flags);
2436 if (err < 0) {
2437 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2438 WARN_ON(err == -EMSGSIZE);
2439 kfree_skb(skb);
2440 goto errout;
2442 err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2443 errout:
2444 if (err < 0)
2445 rtnl_set_sk_err(RTNLGRP_NEIGH, err);
2448 void neigh_app_ns(struct neighbour *n)
2450 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2453 static void neigh_app_notify(struct neighbour *n)
2455 __neigh_notify(n, RTM_NEWNEIGH, 0);
2458 #endif /* CONFIG_ARPD */
2460 #ifdef CONFIG_SYSCTL
2462 static struct neigh_sysctl_table {
2463 struct ctl_table_header *sysctl_header;
2464 ctl_table neigh_vars[__NET_NEIGH_MAX];
2465 ctl_table neigh_dev[2];
2466 ctl_table neigh_neigh_dir[2];
2467 ctl_table neigh_proto_dir[2];
2468 ctl_table neigh_root_dir[2];
2469 } neigh_sysctl_template __read_mostly = {
2470 .neigh_vars = {
2472 .ctl_name = NET_NEIGH_MCAST_SOLICIT,
2473 .procname = "mcast_solicit",
2474 .maxlen = sizeof(int),
2475 .mode = 0644,
2476 .proc_handler = &proc_dointvec,
2479 .ctl_name = NET_NEIGH_UCAST_SOLICIT,
2480 .procname = "ucast_solicit",
2481 .maxlen = sizeof(int),
2482 .mode = 0644,
2483 .proc_handler = &proc_dointvec,
2486 .ctl_name = NET_NEIGH_APP_SOLICIT,
2487 .procname = "app_solicit",
2488 .maxlen = sizeof(int),
2489 .mode = 0644,
2490 .proc_handler = &proc_dointvec,
2493 .ctl_name = NET_NEIGH_RETRANS_TIME,
2494 .procname = "retrans_time",
2495 .maxlen = sizeof(int),
2496 .mode = 0644,
2497 .proc_handler = &proc_dointvec_userhz_jiffies,
2500 .ctl_name = NET_NEIGH_REACHABLE_TIME,
2501 .procname = "base_reachable_time",
2502 .maxlen = sizeof(int),
2503 .mode = 0644,
2504 .proc_handler = &proc_dointvec_jiffies,
2505 .strategy = &sysctl_jiffies,
2508 .ctl_name = NET_NEIGH_DELAY_PROBE_TIME,
2509 .procname = "delay_first_probe_time",
2510 .maxlen = sizeof(int),
2511 .mode = 0644,
2512 .proc_handler = &proc_dointvec_jiffies,
2513 .strategy = &sysctl_jiffies,
2516 .ctl_name = NET_NEIGH_GC_STALE_TIME,
2517 .procname = "gc_stale_time",
2518 .maxlen = sizeof(int),
2519 .mode = 0644,
2520 .proc_handler = &proc_dointvec_jiffies,
2521 .strategy = &sysctl_jiffies,
2524 .ctl_name = NET_NEIGH_UNRES_QLEN,
2525 .procname = "unres_qlen",
2526 .maxlen = sizeof(int),
2527 .mode = 0644,
2528 .proc_handler = &proc_dointvec,
2531 .ctl_name = NET_NEIGH_PROXY_QLEN,
2532 .procname = "proxy_qlen",
2533 .maxlen = sizeof(int),
2534 .mode = 0644,
2535 .proc_handler = &proc_dointvec,
2538 .ctl_name = NET_NEIGH_ANYCAST_DELAY,
2539 .procname = "anycast_delay",
2540 .maxlen = sizeof(int),
2541 .mode = 0644,
2542 .proc_handler = &proc_dointvec_userhz_jiffies,
2545 .ctl_name = NET_NEIGH_PROXY_DELAY,
2546 .procname = "proxy_delay",
2547 .maxlen = sizeof(int),
2548 .mode = 0644,
2549 .proc_handler = &proc_dointvec_userhz_jiffies,
2552 .ctl_name = NET_NEIGH_LOCKTIME,
2553 .procname = "locktime",
2554 .maxlen = sizeof(int),
2555 .mode = 0644,
2556 .proc_handler = &proc_dointvec_userhz_jiffies,
2559 .ctl_name = NET_NEIGH_GC_INTERVAL,
2560 .procname = "gc_interval",
2561 .maxlen = sizeof(int),
2562 .mode = 0644,
2563 .proc_handler = &proc_dointvec_jiffies,
2564 .strategy = &sysctl_jiffies,
2567 .ctl_name = NET_NEIGH_GC_THRESH1,
2568 .procname = "gc_thresh1",
2569 .maxlen = sizeof(int),
2570 .mode = 0644,
2571 .proc_handler = &proc_dointvec,
2574 .ctl_name = NET_NEIGH_GC_THRESH2,
2575 .procname = "gc_thresh2",
2576 .maxlen = sizeof(int),
2577 .mode = 0644,
2578 .proc_handler = &proc_dointvec,
2581 .ctl_name = NET_NEIGH_GC_THRESH3,
2582 .procname = "gc_thresh3",
2583 .maxlen = sizeof(int),
2584 .mode = 0644,
2585 .proc_handler = &proc_dointvec,
2588 .ctl_name = NET_NEIGH_RETRANS_TIME_MS,
2589 .procname = "retrans_time_ms",
2590 .maxlen = sizeof(int),
2591 .mode = 0644,
2592 .proc_handler = &proc_dointvec_ms_jiffies,
2593 .strategy = &sysctl_ms_jiffies,
2596 .ctl_name = NET_NEIGH_REACHABLE_TIME_MS,
2597 .procname = "base_reachable_time_ms",
2598 .maxlen = sizeof(int),
2599 .mode = 0644,
2600 .proc_handler = &proc_dointvec_ms_jiffies,
2601 .strategy = &sysctl_ms_jiffies,
2604 .neigh_dev = {
2606 .ctl_name = NET_PROTO_CONF_DEFAULT,
2607 .procname = "default",
2608 .mode = 0555,
2611 .neigh_neigh_dir = {
2613 .procname = "neigh",
2614 .mode = 0555,
2617 .neigh_proto_dir = {
2619 .mode = 0555,
2622 .neigh_root_dir = {
2624 .ctl_name = CTL_NET,
2625 .procname = "net",
2626 .mode = 0555,
2631 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
2632 int p_id, int pdev_id, char *p_name,
2633 proc_handler *handler, ctl_handler *strategy)
2635 struct neigh_sysctl_table *t = kmemdup(&neigh_sysctl_template,
2636 sizeof(*t), GFP_KERNEL);
2637 const char *dev_name_source = NULL;
2638 char *dev_name = NULL;
2639 int err = 0;
2641 if (!t)
2642 return -ENOBUFS;
2643 t->neigh_vars[0].data = &p->mcast_probes;
2644 t->neigh_vars[1].data = &p->ucast_probes;
2645 t->neigh_vars[2].data = &p->app_probes;
2646 t->neigh_vars[3].data = &p->retrans_time;
2647 t->neigh_vars[4].data = &p->base_reachable_time;
2648 t->neigh_vars[5].data = &p->delay_probe_time;
2649 t->neigh_vars[6].data = &p->gc_staletime;
2650 t->neigh_vars[7].data = &p->queue_len;
2651 t->neigh_vars[8].data = &p->proxy_qlen;
2652 t->neigh_vars[9].data = &p->anycast_delay;
2653 t->neigh_vars[10].data = &p->proxy_delay;
2654 t->neigh_vars[11].data = &p->locktime;
2656 if (dev) {
2657 dev_name_source = dev->name;
2658 t->neigh_dev[0].ctl_name = dev->ifindex;
2659 t->neigh_vars[12].procname = NULL;
2660 t->neigh_vars[13].procname = NULL;
2661 t->neigh_vars[14].procname = NULL;
2662 t->neigh_vars[15].procname = NULL;
2663 } else {
2664 dev_name_source = t->neigh_dev[0].procname;
2665 t->neigh_vars[12].data = (int *)(p + 1);
2666 t->neigh_vars[13].data = (int *)(p + 1) + 1;
2667 t->neigh_vars[14].data = (int *)(p + 1) + 2;
2668 t->neigh_vars[15].data = (int *)(p + 1) + 3;
2671 t->neigh_vars[16].data = &p->retrans_time;
2672 t->neigh_vars[17].data = &p->base_reachable_time;
2674 if (handler || strategy) {
2675 /* RetransTime */
2676 t->neigh_vars[3].proc_handler = handler;
2677 t->neigh_vars[3].strategy = strategy;
2678 t->neigh_vars[3].extra1 = dev;
2679 /* ReachableTime */
2680 t->neigh_vars[4].proc_handler = handler;
2681 t->neigh_vars[4].strategy = strategy;
2682 t->neigh_vars[4].extra1 = dev;
2683 /* RetransTime (in milliseconds)*/
2684 t->neigh_vars[16].proc_handler = handler;
2685 t->neigh_vars[16].strategy = strategy;
2686 t->neigh_vars[16].extra1 = dev;
2687 /* ReachableTime (in milliseconds) */
2688 t->neigh_vars[17].proc_handler = handler;
2689 t->neigh_vars[17].strategy = strategy;
2690 t->neigh_vars[17].extra1 = dev;
2693 dev_name = kstrdup(dev_name_source, GFP_KERNEL);
2694 if (!dev_name) {
2695 err = -ENOBUFS;
2696 goto free;
2699 t->neigh_dev[0].procname = dev_name;
2701 t->neigh_neigh_dir[0].ctl_name = pdev_id;
2703 t->neigh_proto_dir[0].procname = p_name;
2704 t->neigh_proto_dir[0].ctl_name = p_id;
2706 t->neigh_dev[0].child = t->neigh_vars;
2707 t->neigh_neigh_dir[0].child = t->neigh_dev;
2708 t->neigh_proto_dir[0].child = t->neigh_neigh_dir;
2709 t->neigh_root_dir[0].child = t->neigh_proto_dir;
2711 t->sysctl_header = register_sysctl_table(t->neigh_root_dir, 0);
2712 if (!t->sysctl_header) {
2713 err = -ENOBUFS;
2714 goto free_procname;
2716 p->sysctl_table = t;
2717 return 0;
2719 /* error path */
2720 free_procname:
2721 kfree(dev_name);
2722 free:
2723 kfree(t);
2725 return err;
2728 void neigh_sysctl_unregister(struct neigh_parms *p)
2730 if (p->sysctl_table) {
2731 struct neigh_sysctl_table *t = p->sysctl_table;
2732 p->sysctl_table = NULL;
2733 unregister_sysctl_table(t->sysctl_header);
2734 kfree(t->neigh_dev[0].procname);
2735 kfree(t);
2739 #endif /* CONFIG_SYSCTL */
2741 EXPORT_SYMBOL(__neigh_event_send);
2742 EXPORT_SYMBOL(neigh_changeaddr);
2743 EXPORT_SYMBOL(neigh_compat_output);
2744 EXPORT_SYMBOL(neigh_connected_output);
2745 EXPORT_SYMBOL(neigh_create);
2746 EXPORT_SYMBOL(neigh_delete);
2747 EXPORT_SYMBOL(neigh_destroy);
2748 EXPORT_SYMBOL(neigh_dump_info);
2749 EXPORT_SYMBOL(neigh_event_ns);
2750 EXPORT_SYMBOL(neigh_ifdown);
2751 EXPORT_SYMBOL(neigh_lookup);
2752 EXPORT_SYMBOL(neigh_lookup_nodev);
2753 EXPORT_SYMBOL(neigh_parms_alloc);
2754 EXPORT_SYMBOL(neigh_parms_release);
2755 EXPORT_SYMBOL(neigh_rand_reach_time);
2756 EXPORT_SYMBOL(neigh_resolve_output);
2757 EXPORT_SYMBOL(neigh_table_clear);
2758 EXPORT_SYMBOL(neigh_table_init);
2759 EXPORT_SYMBOL(neigh_table_init_no_netlink);
2760 EXPORT_SYMBOL(neigh_update);
2761 EXPORT_SYMBOL(pneigh_enqueue);
2762 EXPORT_SYMBOL(pneigh_lookup);
2764 #ifdef CONFIG_ARPD
2765 EXPORT_SYMBOL(neigh_app_ns);
2766 #endif
2767 #ifdef CONFIG_SYSCTL
2768 EXPORT_SYMBOL(neigh_sysctl_register);
2769 EXPORT_SYMBOL(neigh_sysctl_unregister);
2770 #endif