Import 2.3.5
[davej-history.git] / net / core / neighbour.c
blob781791d08c05979dfa0f4662ef6d8293af7e3a5e
1 /*
2 * Generic address resolution entity
4 * Authors:
5 * Pedro Roque <roque@di.fc.ul.pt>
6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
13 * Fixes:
14 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
15 * Horst von Brand Add #include <linux/string.h>
18 #include <linux/config.h>
19 #include <linux/types.h>
20 #include <linux/string.h>
21 #include <linux/kernel.h>
22 #include <linux/socket.h>
23 #include <linux/sched.h>
24 #include <linux/netdevice.h>
25 #ifdef CONFIG_SYSCTL
26 #include <linux/sysctl.h>
27 #endif
28 #include <net/neighbour.h>
29 #include <net/dst.h>
30 #include <net/sock.h>
31 #include <linux/rtnetlink.h>
34 NOTE. The most unpleasent question is serialization of
35 accesses to resolved addresses. The problem is that addresses
36 are modified by bh, but they are referenced from normal
37 kernel thread. Before today no locking was made.
38 My reasoning was that corrupted address token will be copied
39 to packet with cosmologically small probability
40 (it is even difficult to estimate such small number)
41 and it is very silly to waste cycles in fast path to lock them.
43 But now I changed my mind, but not because previous statement
44 is wrong. Actually, neigh->ha MAY BE not opaque byte array,
45 but reference to some private data. In this case even neglibible
46 corruption probability becomes bug.
48 - hh cache is protected by rwlock. It assumes that
49 hh cache update procedure is short and fast, and that
50 read_lock is cheaper than start_bh_atomic().
51 - ha tokens, saved in neighbour entries, are protected
52 by bh_atomic().
53 - no protection is made in /proc reading. It is OK, because
54 /proc is broken by design in any case, and
55 corrupted output is normal behaviour there.
57 --ANK (981025)
60 #define NEIGH_DEBUG 1
62 #define NEIGH_PRINTK(x...) printk(x)
63 #define NEIGH_NOPRINTK(x...) do { ; } while(0)
64 #define NEIGH_PRINTK0 NEIGH_PRINTK
65 #define NEIGH_PRINTK1 NEIGH_NOPRINTK
66 #define NEIGH_PRINTK2 NEIGH_NOPRINTK
68 #if NEIGH_DEBUG >= 1
69 #undef NEIGH_PRINTK1
70 #define NEIGH_PRINTK1 NEIGH_PRINTK
71 #endif
72 #if NEIGH_DEBUG >= 2
73 #undef NEIGH_PRINTK2
74 #define NEIGH_PRINTK2 NEIGH_PRINTK
75 #endif
77 static void neigh_timer_handler(unsigned long arg);
78 #ifdef CONFIG_ARPD
79 static void neigh_app_notify(struct neighbour *n);
80 #endif
81 static int pneigh_ifdown(struct neigh_table *tbl, struct device *dev);
83 static int neigh_glbl_allocs;
84 static struct neigh_table *neigh_tables;
86 static int neigh_blackhole(struct sk_buff *skb)
88 kfree_skb(skb);
89 return -ENETDOWN;
93 * It is random distribution in the interval (1/2)*base...(3/2)*base.
94 * It corresponds to default IPv6 settings and is not overridable,
95 * because it is really reasonbale choice.
98 unsigned long neigh_rand_reach_time(unsigned long base)
100 return (net_random() % base) + (base>>1);
104 static int neigh_forced_gc(struct neigh_table *tbl)
106 int shrunk = 0;
107 int i;
109 if (atomic_read(&tbl->lock))
110 return 0;
112 for (i=0; i<=NEIGH_HASHMASK; i++) {
113 struct neighbour *n, **np;
115 np = &tbl->hash_buckets[i];
116 while ((n = *np) != NULL) {
117 /* Neighbour record may be discarded if:
118 - nobody refers to it.
119 - it is not premanent
120 - (NEW and probably wrong)
121 INCOMPLETE entries are kept at least for
122 n->parms->retrans_time, otherwise we could
123 flood network with resolution requests.
124 It is not clear, what is better table overflow
125 or flooding.
127 if (atomic_read(&n->refcnt) == 0 &&
128 !(n->nud_state&NUD_PERMANENT) &&
129 (n->nud_state != NUD_INCOMPLETE ||
130 jiffies - n->used > n->parms->retrans_time)) {
131 *np = n->next;
132 n->tbl = NULL;
133 tbl->entries--;
134 shrunk = 1;
135 neigh_destroy(n);
136 continue;
138 np = &n->next;
142 tbl->last_flush = jiffies;
143 return shrunk;
146 int neigh_ifdown(struct neigh_table *tbl, struct device *dev)
148 int i;
150 if (atomic_read(&tbl->lock)) {
151 NEIGH_PRINTK1("neigh_ifdown: impossible event 1763\n");
152 return -EBUSY;
155 start_bh_atomic();
156 for (i=0; i<=NEIGH_HASHMASK; i++) {
157 struct neighbour *n, **np;
159 np = &tbl->hash_buckets[i];
160 while ((n = *np) != NULL) {
161 if (dev && n->dev != dev) {
162 np = &n->next;
163 continue;
165 *np = n->next;
166 n->tbl = NULL;
167 tbl->entries--;
168 if (atomic_read(&n->refcnt)) {
169 /* The most unpleasant situation.
170 We must destroy neighbour entry,
171 but someone still uses it.
173 The destroy will be delayed until
174 the last user releases us, but
175 we must kill timers etc. and move
176 it to safe state.
178 if (n->nud_state & NUD_IN_TIMER)
179 del_timer(&n->timer);
180 n->parms = &tbl->parms;
181 skb_queue_purge(&n->arp_queue);
182 n->output = neigh_blackhole;
183 if (n->nud_state&NUD_VALID)
184 n->nud_state = NUD_NOARP;
185 else
186 n->nud_state = NUD_NONE;
187 NEIGH_PRINTK2("neigh %p is stray.\n", n);
188 } else
189 neigh_destroy(n);
193 del_timer(&tbl->proxy_timer);
194 skb_queue_purge(&tbl->proxy_queue);
195 pneigh_ifdown(tbl, dev);
196 end_bh_atomic();
197 return 0;
200 static struct neighbour *neigh_alloc(struct neigh_table *tbl, int creat)
202 struct neighbour *n;
203 unsigned long now = jiffies;
205 if (tbl->entries > tbl->gc_thresh1) {
206 if (creat < 0)
207 return NULL;
208 if (tbl->entries > tbl->gc_thresh3 ||
209 (tbl->entries > tbl->gc_thresh2 &&
210 now - tbl->last_flush > 5*HZ)) {
211 if (neigh_forced_gc(tbl) == 0 &&
212 tbl->entries > tbl->gc_thresh3)
213 return NULL;
217 n = kmalloc(tbl->entry_size, GFP_ATOMIC);
218 if (n == NULL)
219 return NULL;
221 memset(n, 0, tbl->entry_size);
223 skb_queue_head_init(&n->arp_queue);
224 n->updated = n->used = now;
225 n->nud_state = NUD_NONE;
226 n->output = neigh_blackhole;
227 n->parms = &tbl->parms;
228 init_timer(&n->timer);
229 n->timer.function = neigh_timer_handler;
230 n->timer.data = (unsigned long)n;
231 tbl->stats.allocs++;
232 neigh_glbl_allocs++;
233 return n;
237 struct neighbour * __neigh_lookup(struct neigh_table *tbl, const void *pkey,
238 struct device *dev, int creat)
240 struct neighbour *n;
241 u32 hash_val;
242 int key_len = tbl->key_len;
244 hash_val = *(u32*)(pkey + key_len - 4);
245 hash_val ^= (hash_val>>16);
246 hash_val ^= hash_val>>8;
247 hash_val ^= hash_val>>3;
248 hash_val = (hash_val^dev->ifindex)&NEIGH_HASHMASK;
250 for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
251 if (dev == n->dev &&
252 memcmp(n->primary_key, pkey, key_len) == 0) {
253 atomic_inc(&n->refcnt);
254 return n;
257 if (!creat)
258 return NULL;
260 n = neigh_alloc(tbl, creat);
261 if (n == NULL)
262 return NULL;
264 memcpy(n->primary_key, pkey, key_len);
265 n->dev = dev;
267 /* Protocol specific setup. */
268 if (tbl->constructor && tbl->constructor(n) < 0) {
269 neigh_destroy(n);
270 return NULL;
273 /* Device specific setup. */
274 if (n->parms && n->parms->neigh_setup && n->parms->neigh_setup(n) < 0) {
275 neigh_destroy(n);
276 return NULL;
279 n->confirmed = jiffies - (n->parms->base_reachable_time<<1);
280 atomic_set(&n->refcnt, 1);
281 tbl->entries++;
282 n->next = tbl->hash_buckets[hash_val];
283 tbl->hash_buckets[hash_val] = n;
284 n->tbl = tbl;
285 NEIGH_PRINTK2("neigh %p is created.\n", n);
286 return n;
289 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
290 struct device *dev, int creat)
292 struct pneigh_entry *n;
293 u32 hash_val;
294 int key_len = tbl->key_len;
296 hash_val = *(u32*)(pkey + key_len - 4);
297 hash_val ^= (hash_val>>16);
298 hash_val ^= hash_val>>8;
299 hash_val ^= hash_val>>4;
300 hash_val &= PNEIGH_HASHMASK;
302 for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
303 if (memcmp(n->key, pkey, key_len) == 0 &&
304 (n->dev == dev || !n->dev))
305 return n;
307 if (!creat)
308 return NULL;
310 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
311 if (n == NULL)
312 return NULL;
314 memcpy(n->key, pkey, key_len);
315 n->dev = dev;
317 if (tbl->pconstructor && tbl->pconstructor(n)) {
318 kfree(n);
319 return NULL;
322 n->next = tbl->phash_buckets[hash_val];
323 tbl->phash_buckets[hash_val] = n;
324 return n;
328 int pneigh_delete(struct neigh_table *tbl, const void *pkey, struct device *dev)
330 struct pneigh_entry *n, **np;
331 u32 hash_val;
332 int key_len = tbl->key_len;
334 hash_val = *(u32*)(pkey + key_len - 4);
335 hash_val ^= (hash_val>>16);
336 hash_val ^= hash_val>>8;
337 hash_val ^= hash_val>>4;
338 hash_val &= PNEIGH_HASHMASK;
340 for (np = &tbl->phash_buckets[hash_val]; (n=*np) != NULL; np = &n->next) {
341 if (memcmp(n->key, pkey, key_len) == 0 && n->dev == dev) {
342 *np = n->next;
343 synchronize_bh();
344 if (tbl->pdestructor)
345 tbl->pdestructor(n);
346 kfree(n);
347 return 0;
350 return -ENOENT;
353 static int pneigh_ifdown(struct neigh_table *tbl, struct device *dev)
355 struct pneigh_entry *n, **np;
356 u32 h;
358 for (h=0; h<=PNEIGH_HASHMASK; h++) {
359 np = &tbl->phash_buckets[h];
360 for (np = &tbl->phash_buckets[h]; (n=*np) != NULL; np = &n->next) {
361 if (n->dev == dev || dev == NULL) {
362 *np = n->next;
363 synchronize_bh();
364 if (tbl->pdestructor)
365 tbl->pdestructor(n);
366 kfree(n);
367 continue;
369 np = &n->next;
372 return -ENOENT;
377 * neighbour must already be out of the table;
380 void neigh_destroy(struct neighbour *neigh)
382 struct hh_cache *hh;
384 if (neigh->tbl || atomic_read(&neigh->refcnt)) {
385 NEIGH_PRINTK1("neigh_destroy: neighbour is use tbl=%p, ref=%d: "
386 "called from %p\n", neigh->tbl, atomic_read(&neigh->refcnt), __builtin_return_address(0));
387 return;
390 if (neigh->nud_state&NUD_IN_TIMER)
391 del_timer(&neigh->timer);
393 while ((hh = neigh->hh) != NULL) {
394 neigh->hh = hh->hh_next;
395 hh->hh_next = NULL;
396 hh->hh_output = neigh_blackhole;
397 if (atomic_dec_and_test(&hh->hh_refcnt))
398 kfree(hh);
401 if (neigh->ops && neigh->ops->destructor)
402 (neigh->ops->destructor)(neigh);
404 skb_queue_purge(&neigh->arp_queue);
406 NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
408 neigh_glbl_allocs--;
409 kfree(neigh);
412 /* Neighbour state is suspicious;
413 disable fast path.
415 static void neigh_suspect(struct neighbour *neigh)
417 struct hh_cache *hh;
419 NEIGH_PRINTK2("neigh %p is suspecteded.\n", neigh);
421 neigh->output = neigh->ops->output;
423 for (hh = neigh->hh; hh; hh = hh->hh_next)
424 hh->hh_output = neigh->ops->output;
427 /* Neighbour state is OK;
428 enable fast path.
430 static void neigh_connect(struct neighbour *neigh)
432 struct hh_cache *hh;
434 NEIGH_PRINTK2("neigh %p is connected.\n", neigh);
436 neigh->output = neigh->ops->connected_output;
438 for (hh = neigh->hh; hh; hh = hh->hh_next)
439 hh->hh_output = neigh->ops->hh_output;
443 Transitions NUD_STALE <-> NUD_REACHABLE do not occur
444 when fast path is built: we have no timers assotiated with
445 these states, we do not have time to check state when sending.
446 neigh_periodic_timer check periodically neigh->confirmed
447 time and moves NUD_REACHABLE -> NUD_STALE.
449 If a routine wants to know TRUE entry state, it calls
450 neigh_sync before checking state.
453 static void neigh_sync(struct neighbour *n)
455 unsigned long now = jiffies;
456 u8 state = n->nud_state;
458 if (state&(NUD_NOARP|NUD_PERMANENT))
459 return;
460 if (state&NUD_REACHABLE) {
461 if (now - n->confirmed > n->parms->reachable_time) {
462 n->nud_state = NUD_STALE;
463 neigh_suspect(n);
465 } else if (state&NUD_VALID) {
466 if (now - n->confirmed < n->parms->reachable_time) {
467 if (state&NUD_IN_TIMER)
468 del_timer(&n->timer);
469 n->nud_state = NUD_REACHABLE;
470 neigh_connect(n);
475 static void neigh_periodic_timer(unsigned long arg)
477 struct neigh_table *tbl = (struct neigh_table*)arg;
478 unsigned long now = jiffies;
479 int i;
481 if (atomic_read(&tbl->lock)) {
482 tbl->gc_timer.expires = now + 1*HZ;
483 add_timer(&tbl->gc_timer);
484 return;
488 * periodicly recompute ReachableTime from random function
491 if (now - tbl->last_rand > 300*HZ) {
492 struct neigh_parms *p;
493 tbl->last_rand = now;
494 for (p=&tbl->parms; p; p = p->next)
495 p->reachable_time = neigh_rand_reach_time(p->base_reachable_time);
498 for (i=0; i <= NEIGH_HASHMASK; i++) {
499 struct neighbour *n, **np;
501 np = &tbl->hash_buckets[i];
502 while ((n = *np) != NULL) {
503 unsigned state = n->nud_state;
505 if (state&(NUD_PERMANENT|NUD_IN_TIMER))
506 goto next_elt;
508 if ((long)(n->used - n->confirmed) < 0)
509 n->used = n->confirmed;
511 if (atomic_read(&n->refcnt) == 0 &&
512 (state == NUD_FAILED || now - n->used > n->parms->gc_staletime)) {
513 *np = n->next;
514 n->tbl = NULL;
515 n->next = NULL;
516 tbl->entries--;
517 neigh_destroy(n);
518 continue;
521 if (n->nud_state&NUD_REACHABLE &&
522 now - n->confirmed > n->parms->reachable_time) {
523 n->nud_state = NUD_STALE;
524 neigh_suspect(n);
527 next_elt:
528 np = &n->next;
532 tbl->gc_timer.expires = now + tbl->gc_interval;
533 add_timer(&tbl->gc_timer);
536 static __inline__ int neigh_max_probes(struct neighbour *n)
538 struct neigh_parms *p = n->parms;
539 return p->ucast_probes + p->app_probes + p->mcast_probes;
543 /* Called when a timer expires for a neighbour entry. */
545 static void neigh_timer_handler(unsigned long arg)
547 unsigned long now = jiffies;
548 struct neighbour *neigh = (struct neighbour*)arg;
549 unsigned state = neigh->nud_state;
551 if (!(state&NUD_IN_TIMER)) {
552 NEIGH_PRINTK1("neigh: timer & !nud_in_timer\n");
553 return;
556 if ((state&NUD_VALID) &&
557 now - neigh->confirmed < neigh->parms->reachable_time) {
558 neigh->nud_state = NUD_REACHABLE;
559 NEIGH_PRINTK2("neigh %p is still alive.\n", neigh);
560 neigh_connect(neigh);
561 return;
563 if (state == NUD_DELAY) {
564 NEIGH_PRINTK2("neigh %p is probed.\n", neigh);
565 neigh->nud_state = NUD_PROBE;
566 neigh->probes = 0;
569 if (neigh->probes >= neigh_max_probes(neigh)) {
570 struct sk_buff *skb;
572 neigh->nud_state = NUD_FAILED;
573 neigh->tbl->stats.res_failed++;
574 NEIGH_PRINTK2("neigh %p is failed.\n", neigh);
576 /* It is very thin place. report_unreachable is very complicated
577 routine. Particularly, it can hit the same neighbour entry!
579 So that, we try to be accurate and avoid dead loop. --ANK
581 while(neigh->nud_state==NUD_FAILED && (skb=__skb_dequeue(&neigh->arp_queue)) != NULL)
582 neigh->ops->error_report(neigh, skb);
583 skb_queue_purge(&neigh->arp_queue);
584 return;
587 neigh->timer.expires = now + neigh->parms->retrans_time;
588 add_timer(&neigh->timer);
590 neigh->ops->solicit(neigh, skb_peek(&neigh->arp_queue));
591 neigh->probes++;
594 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
596 start_bh_atomic();
597 if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) {
598 if (!(neigh->nud_state&(NUD_STALE|NUD_INCOMPLETE))) {
599 if (neigh->tbl == NULL) {
600 NEIGH_PRINTK2("neigh %p used after death.\n", neigh);
601 if (skb)
602 kfree_skb(skb);
603 end_bh_atomic();
604 return 1;
606 if (neigh->parms->mcast_probes + neigh->parms->app_probes) {
607 neigh->probes = neigh->parms->ucast_probes;
608 neigh->nud_state = NUD_INCOMPLETE;
609 neigh->timer.expires = jiffies + neigh->parms->retrans_time;
610 add_timer(&neigh->timer);
612 neigh->ops->solicit(neigh, skb);
613 neigh->probes++;
614 } else {
615 neigh->nud_state = NUD_FAILED;
616 if (skb)
617 kfree_skb(skb);
618 end_bh_atomic();
619 return 1;
622 if (neigh->nud_state == NUD_INCOMPLETE) {
623 if (skb) {
624 if (skb_queue_len(&neigh->arp_queue) >= neigh->parms->queue_len) {
625 struct sk_buff *buff;
626 buff = neigh->arp_queue.prev;
627 __skb_unlink(buff, &neigh->arp_queue);
628 kfree_skb(buff);
630 __skb_queue_head(&neigh->arp_queue, skb);
632 end_bh_atomic();
633 return 1;
635 if (neigh->nud_state == NUD_STALE) {
636 NEIGH_PRINTK2("neigh %p is delayed.\n", neigh);
637 neigh->nud_state = NUD_DELAY;
638 neigh->timer.expires = jiffies + neigh->parms->delay_probe_time;
639 add_timer(&neigh->timer);
642 end_bh_atomic();
643 return 0;
646 static __inline__ void neigh_update_hhs(struct neighbour *neigh)
648 struct hh_cache *hh;
649 void (*update)(struct hh_cache*, struct device*, unsigned char*) =
650 neigh->dev->header_cache_update;
652 if (update) {
653 for (hh=neigh->hh; hh; hh=hh->hh_next) {
654 write_lock_irq(&hh->hh_lock);
655 update(hh, neigh->dev, neigh->ha);
656 write_unlock_irq(&hh->hh_lock);
663 /* Generic update routine.
664 -- lladdr is new lladdr or NULL, if it is not supplied.
665 -- new is new state.
666 -- override==1 allows to override existing lladdr, if it is different.
667 -- arp==0 means that the change is administrative.
670 int neigh_update(struct neighbour *neigh, u8 *lladdr, u8 new, int override, int arp)
672 u8 old = neigh->nud_state;
673 struct device *dev = neigh->dev;
675 if (arp && (old&(NUD_NOARP|NUD_PERMANENT)))
676 return -EPERM;
678 if (!(new&NUD_VALID)) {
679 if (old&NUD_IN_TIMER)
680 del_timer(&neigh->timer);
681 if (old&NUD_CONNECTED)
682 neigh_suspect(neigh);
683 neigh->nud_state = new;
684 return 0;
687 /* Compare new lladdr with cached one */
688 if (dev->addr_len == 0) {
689 /* First case: device needs no address. */
690 lladdr = neigh->ha;
691 } else if (lladdr) {
692 /* The second case: if something is already cached
693 and a new address is proposed:
694 - compare new & old
695 - if they are different, check override flag
697 if (old&NUD_VALID) {
698 if (memcmp(lladdr, neigh->ha, dev->addr_len) == 0)
699 lladdr = neigh->ha;
700 else if (!override)
701 return -EPERM;
703 } else {
704 /* No address is supplied; if we know something,
705 use it, otherwise discard the request.
707 if (!(old&NUD_VALID))
708 return -EINVAL;
709 lladdr = neigh->ha;
712 neigh_sync(neigh);
713 old = neigh->nud_state;
714 if (new&NUD_CONNECTED)
715 neigh->confirmed = jiffies;
716 neigh->updated = jiffies;
718 /* If entry was valid and address is not changed,
719 do not change entry state, if new one is STALE.
721 if (old&NUD_VALID) {
722 if (lladdr == neigh->ha)
723 if (new == old || (new == NUD_STALE && (old&NUD_CONNECTED)))
724 return 0;
726 if (old&NUD_IN_TIMER)
727 del_timer(&neigh->timer);
728 neigh->nud_state = new;
729 if (lladdr != neigh->ha) {
730 memcpy(&neigh->ha, lladdr, dev->addr_len);
731 neigh_update_hhs(neigh);
732 neigh->confirmed = jiffies - (neigh->parms->base_reachable_time<<1);
733 #ifdef CONFIG_ARPD
734 if (neigh->parms->app_probes)
735 neigh_app_notify(neigh);
736 #endif
738 if (new == old)
739 return 0;
740 if (new&NUD_CONNECTED)
741 neigh_connect(neigh);
742 else
743 neigh_suspect(neigh);
744 if (!(old&NUD_VALID)) {
745 struct sk_buff *skb;
747 /* Again: avoid dead loop if something went wrong */
749 while (neigh->nud_state&NUD_VALID &&
750 (skb=__skb_dequeue(&neigh->arp_queue)) != NULL) {
751 struct neighbour *n1 = neigh;
752 /* On shaper/eql skb->dst->neighbour != neigh :( */
753 if (skb->dst && skb->dst->neighbour)
754 n1 = skb->dst->neighbour;
755 n1->output(skb);
757 skb_queue_purge(&neigh->arp_queue);
759 return 0;
762 struct neighbour * neigh_event_ns(struct neigh_table *tbl,
763 u8 *lladdr, void *saddr,
764 struct device *dev)
766 struct neighbour *neigh;
768 neigh = __neigh_lookup(tbl, saddr, dev, lladdr || !dev->addr_len);
769 if (neigh)
770 neigh_update(neigh, lladdr, NUD_STALE, 1, 1);
771 return neigh;
774 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, u16 protocol)
776 struct hh_cache *hh = NULL;
777 struct device *dev = dst->dev;
779 for (hh=n->hh; hh; hh = hh->hh_next)
780 if (hh->hh_type == protocol)
781 break;
783 if (!hh && (hh = kmalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) {
784 memset(hh, 0, sizeof(struct hh_cache));
785 hh->hh_type = protocol;
786 atomic_set(&hh->hh_refcnt, 0);
787 hh->hh_next = NULL;
788 if (dev->hard_header_cache(n, hh)) {
789 kfree(hh);
790 hh = NULL;
791 } else {
792 atomic_inc(&hh->hh_refcnt);
793 hh->hh_next = n->hh;
794 n->hh = hh;
795 if (n->nud_state&NUD_CONNECTED)
796 hh->hh_output = n->ops->hh_output;
797 else
798 hh->hh_output = n->ops->output;
801 if (hh) {
802 atomic_inc(&hh->hh_refcnt);
803 dst->hh = hh;
807 /* This function can be used in contexts, where only old dev_queue_xmit
808 worked, f.e. if you want to override normal output path (eql, shaper),
809 but resoltution is not made yet.
812 int neigh_compat_output(struct sk_buff *skb)
814 struct device *dev = skb->dev;
816 __skb_pull(skb, skb->nh.raw - skb->data);
818 if (dev->hard_header &&
819 dev->hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, skb->len) < 0 &&
820 dev->rebuild_header(skb))
821 return 0;
823 return dev_queue_xmit(skb);
826 /* Slow and careful. */
828 int neigh_resolve_output(struct sk_buff *skb)
830 struct dst_entry *dst = skb->dst;
831 struct neighbour *neigh;
833 if (!dst || !(neigh = dst->neighbour))
834 goto discard;
836 __skb_pull(skb, skb->nh.raw - skb->data);
838 if (neigh_event_send(neigh, skb) == 0) {
839 int err;
840 struct device *dev = neigh->dev;
841 if (dev->hard_header_cache && dst->hh == NULL) {
842 start_bh_atomic();
843 if (dst->hh == NULL)
844 neigh_hh_init(neigh, dst, dst->ops->protocol);
845 err = dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len);
846 end_bh_atomic();
847 } else {
848 start_bh_atomic();
849 err = dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len);
850 end_bh_atomic();
852 if (err >= 0)
853 return neigh->ops->queue_xmit(skb);
854 kfree_skb(skb);
855 return -EINVAL;
857 return 0;
859 discard:
860 NEIGH_PRINTK1("neigh_resolve_output: dst=%p neigh=%p\n", dst, dst ? dst->neighbour : NULL);
861 kfree_skb(skb);
862 return -EINVAL;
865 /* As fast as possible without hh cache */
867 int neigh_connected_output(struct sk_buff *skb)
869 int err;
870 struct dst_entry *dst = skb->dst;
871 struct neighbour *neigh = dst->neighbour;
872 struct device *dev = neigh->dev;
874 __skb_pull(skb, skb->nh.raw - skb->data);
876 start_bh_atomic();
877 err = dev->hard_header(skb, dev, ntohs(skb->protocol), neigh->ha, NULL, skb->len);
878 end_bh_atomic();
879 if (err >= 0)
880 return neigh->ops->queue_xmit(skb);
881 kfree_skb(skb);
882 return -EINVAL;
885 static void neigh_proxy_process(unsigned long arg)
887 struct neigh_table *tbl = (struct neigh_table *)arg;
888 long sched_next = 0;
889 unsigned long now = jiffies;
890 struct sk_buff *skb = tbl->proxy_queue.next;
892 while (skb != (struct sk_buff*)&tbl->proxy_queue) {
893 struct sk_buff *back = skb;
894 long tdif = back->stamp.tv_usec - now;
896 skb = skb->next;
897 if (tdif <= 0) {
898 __skb_unlink(back, &tbl->proxy_queue);
899 if (tbl->proxy_redo)
900 tbl->proxy_redo(back);
901 else
902 kfree_skb(back);
903 } else if (!sched_next || tdif < sched_next)
904 sched_next = tdif;
906 del_timer(&tbl->proxy_timer);
907 if (sched_next) {
908 tbl->proxy_timer.expires = jiffies + sched_next;
909 add_timer(&tbl->proxy_timer);
913 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
914 struct sk_buff *skb)
916 unsigned long now = jiffies;
917 long sched_next = net_random()%p->proxy_delay;
919 if (tbl->proxy_queue.qlen > p->proxy_qlen) {
920 kfree_skb(skb);
921 return;
923 skb->stamp.tv_sec = 0;
924 skb->stamp.tv_usec = now + sched_next;
925 if (del_timer(&tbl->proxy_timer)) {
926 long tval = tbl->proxy_timer.expires - now;
927 if (tval < sched_next)
928 sched_next = tval;
930 tbl->proxy_timer.expires = now + sched_next;
931 dst_release(skb->dst);
932 skb->dst = NULL;
933 __skb_queue_tail(&tbl->proxy_queue, skb);
934 add_timer(&tbl->proxy_timer);
938 struct neigh_parms *neigh_parms_alloc(struct device *dev, struct neigh_table *tbl)
940 struct neigh_parms *p;
941 p = kmalloc(sizeof(*p), GFP_KERNEL);
942 if (p) {
943 memcpy(p, &tbl->parms, sizeof(*p));
944 p->tbl = tbl;
945 p->reachable_time = neigh_rand_reach_time(p->base_reachable_time);
946 if (dev && dev->neigh_setup) {
947 if (dev->neigh_setup(dev, p)) {
948 kfree(p);
949 return NULL;
952 p->next = tbl->parms.next;
953 tbl->parms.next = p;
955 return p;
958 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
960 struct neigh_parms **p;
962 if (parms == NULL || parms == &tbl->parms)
963 return;
964 for (p = &tbl->parms.next; *p; p = &(*p)->next) {
965 if (*p == parms) {
966 *p = parms->next;
967 synchronize_bh();
968 #ifdef CONFIG_SYSCTL
969 neigh_sysctl_unregister(parms);
970 #endif
971 kfree(parms);
972 return;
975 NEIGH_PRINTK1("neigh_release_parms: not found\n");
979 void neigh_table_init(struct neigh_table *tbl)
981 unsigned long now = jiffies;
983 tbl->parms.reachable_time = neigh_rand_reach_time(tbl->parms.base_reachable_time);
985 init_timer(&tbl->gc_timer);
986 tbl->gc_timer.data = (unsigned long)tbl;
987 tbl->gc_timer.function = neigh_periodic_timer;
988 tbl->gc_timer.expires = now + tbl->gc_interval + tbl->parms.reachable_time;
989 add_timer(&tbl->gc_timer);
991 init_timer(&tbl->proxy_timer);
992 tbl->proxy_timer.data = (unsigned long)tbl;
993 tbl->proxy_timer.function = neigh_proxy_process;
994 skb_queue_head_init(&tbl->proxy_queue);
996 tbl->last_flush = now;
997 tbl->last_rand = now + tbl->parms.reachable_time*20;
998 tbl->next = neigh_tables;
999 neigh_tables = tbl;
1002 int neigh_table_clear(struct neigh_table *tbl)
1004 struct neigh_table **tp;
1006 start_bh_atomic();
1007 del_timer(&tbl->gc_timer);
1008 del_timer(&tbl->proxy_timer);
1009 skb_queue_purge(&tbl->proxy_queue);
1010 neigh_ifdown(tbl, NULL);
1011 end_bh_atomic();
1012 if (tbl->entries)
1013 printk(KERN_CRIT "neighbour leakage\n");
1014 for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1015 if (*tp == tbl) {
1016 *tp = tbl->next;
1017 synchronize_bh();
1018 break;
1021 #ifdef CONFIG_SYSCTL
1022 neigh_sysctl_unregister(&tbl->parms);
1023 #endif
1024 return 0;
1027 #ifdef CONFIG_RTNETLINK
1030 int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1032 struct ndmsg *ndm = NLMSG_DATA(nlh);
1033 struct rtattr **nda = arg;
1034 struct neigh_table *tbl;
1035 struct device *dev = NULL;
1037 if (ndm->ndm_ifindex) {
1038 if ((dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
1039 return -ENODEV;
1042 for (tbl=neigh_tables; tbl; tbl = tbl->next) {
1043 int err = 0;
1044 struct neighbour *n;
1046 if (tbl->family != ndm->ndm_family)
1047 continue;
1049 if (nda[NDA_DST-1] == NULL ||
1050 nda[NDA_DST-1]->rta_len != RTA_LENGTH(tbl->key_len))
1051 return -EINVAL;
1053 if (ndm->ndm_flags&NTF_PROXY)
1054 return pneigh_delete(tbl, RTA_DATA(nda[NDA_DST-1]), dev);
1056 if (dev == NULL)
1057 return -EINVAL;
1059 start_bh_atomic();
1060 n = __neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev, 0);
1061 if (n) {
1062 err = neigh_update(n, NULL, NUD_FAILED, 1, 0);
1063 neigh_release(n);
1065 end_bh_atomic();
1066 return err;
1069 return -EADDRNOTAVAIL;
1072 int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1074 struct ndmsg *ndm = NLMSG_DATA(nlh);
1075 struct rtattr **nda = arg;
1076 struct neigh_table *tbl;
1077 struct device *dev = NULL;
1079 if (ndm->ndm_ifindex) {
1080 if ((dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL)
1081 return -ENODEV;
1084 for (tbl=neigh_tables; tbl; tbl = tbl->next) {
1085 int err = 0;
1086 struct neighbour *n;
1088 if (tbl->family != ndm->ndm_family)
1089 continue;
1090 if (nda[NDA_DST-1] == NULL ||
1091 nda[NDA_DST-1]->rta_len != RTA_LENGTH(tbl->key_len))
1092 return -EINVAL;
1093 if (ndm->ndm_flags&NTF_PROXY) {
1094 if (pneigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev, 1))
1095 return 0;
1096 return -ENOBUFS;
1098 if (dev == NULL)
1099 return -EINVAL;
1100 if (nda[NDA_LLADDR-1] != NULL &&
1101 nda[NDA_LLADDR-1]->rta_len != RTA_LENGTH(dev->addr_len))
1102 return -EINVAL;
1103 start_bh_atomic();
1104 n = __neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev, 0);
1105 if (n) {
1106 if (nlh->nlmsg_flags&NLM_F_EXCL)
1107 err = -EEXIST;
1108 } else if (!(nlh->nlmsg_flags&NLM_F_CREATE))
1109 err = -ENOENT;
1110 else {
1111 n = __neigh_lookup(tbl, RTA_DATA(nda[NDA_DST-1]), dev, 1);
1112 if (n == NULL)
1113 err = -ENOBUFS;
1115 if (err == 0) {
1116 err = neigh_update(n, nda[NDA_LLADDR-1] ? RTA_DATA(nda[NDA_LLADDR-1]) : NULL,
1117 ndm->ndm_state,
1118 nlh->nlmsg_flags&NLM_F_REPLACE, 0);
1120 if (n)
1121 neigh_release(n);
1122 end_bh_atomic();
1123 return err;
1126 return -EADDRNOTAVAIL;
1130 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n,
1131 u32 pid, u32 seq, int event)
1133 unsigned long now = jiffies;
1134 struct ndmsg *ndm;
1135 struct nlmsghdr *nlh;
1136 unsigned char *b = skb->tail;
1137 struct nda_cacheinfo ci;
1139 nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(*ndm));
1140 ndm = NLMSG_DATA(nlh);
1141 ndm->ndm_family = n->ops->family;
1142 ndm->ndm_flags = n->flags;
1143 ndm->ndm_type = n->type;
1144 ndm->ndm_state = n->nud_state;
1145 ndm->ndm_ifindex = n->dev->ifindex;
1146 RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key);
1147 if (n->nud_state&NUD_VALID)
1148 RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha);
1149 ci.ndm_used = now - n->used;
1150 ci.ndm_confirmed = now - n->confirmed;
1151 ci.ndm_updated = now - n->updated;
1152 ci.ndm_refcnt = atomic_read(&n->refcnt);
1153 RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci);
1154 nlh->nlmsg_len = skb->tail - b;
1155 return skb->len;
1157 nlmsg_failure:
1158 rtattr_failure:
1159 skb_trim(skb, b - skb->data);
1160 return -1;
1164 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct netlink_callback *cb)
1166 struct neighbour *n;
1167 int h, s_h;
1168 int idx, s_idx;
1170 s_h = cb->args[1];
1171 s_idx = idx = cb->args[2];
1172 for (h=0; h <= NEIGH_HASHMASK; h++) {
1173 if (h < s_h) continue;
1174 if (h > s_h)
1175 s_idx = 0;
1176 start_bh_atomic();
1177 for (n = tbl->hash_buckets[h], idx = 0; n;
1178 n = n->next, idx++) {
1179 if (idx < s_idx)
1180 continue;
1181 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
1182 cb->nlh->nlmsg_seq, RTM_NEWNEIGH) <= 0) {
1183 end_bh_atomic();
1184 cb->args[1] = h;
1185 cb->args[2] = idx;
1186 return -1;
1189 end_bh_atomic();
1192 cb->args[1] = h;
1193 cb->args[2] = idx;
1194 return skb->len;
1197 int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
1199 int t;
1200 int s_t;
1201 struct neigh_table *tbl;
1202 int family = ((struct rtgenmsg*)NLMSG_DATA(cb->nlh))->rtgen_family;
1204 s_t = cb->args[0];
1206 for (tbl=neigh_tables, t=0; tbl; tbl = tbl->next, t++) {
1207 if (t < s_t) continue;
1208 if (family && tbl->family != family)
1209 continue;
1210 if (t > s_t)
1211 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1212 if (neigh_dump_table(tbl, skb, cb) < 0)
1213 break;
1216 cb->args[0] = t;
1218 return skb->len;
1221 #ifdef CONFIG_ARPD
1222 void neigh_app_ns(struct neighbour *n)
1224 struct sk_buff *skb;
1225 struct nlmsghdr *nlh;
1226 int size = NLMSG_SPACE(sizeof(struct ndmsg)+256);
1228 skb = alloc_skb(size, GFP_ATOMIC);
1229 if (!skb)
1230 return;
1232 if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH) < 0) {
1233 kfree_skb(skb);
1234 return;
1236 nlh = (struct nlmsghdr*)skb->data;
1237 nlh->nlmsg_flags = NLM_F_REQUEST;
1238 NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
1239 netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
1242 static void neigh_app_notify(struct neighbour *n)
1244 struct sk_buff *skb;
1245 struct nlmsghdr *nlh;
1246 int size = NLMSG_SPACE(sizeof(struct ndmsg)+256);
1248 skb = alloc_skb(size, GFP_ATOMIC);
1249 if (!skb)
1250 return;
1252 if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH) < 0) {
1253 kfree_skb(skb);
1254 return;
1256 nlh = (struct nlmsghdr*)skb->data;
1257 NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH;
1258 netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC);
1263 #endif
1266 #endif
1268 #ifdef CONFIG_SYSCTL
1270 struct neigh_sysctl_table
1272 struct ctl_table_header *sysctl_header;
1273 ctl_table neigh_vars[17];
1274 ctl_table neigh_dev[2];
1275 ctl_table neigh_neigh_dir[2];
1276 ctl_table neigh_proto_dir[2];
1277 ctl_table neigh_root_dir[2];
1278 } neigh_sysctl_template = {
1279 NULL,
1280 {{NET_NEIGH_MCAST_SOLICIT, "mcast_solicit",
1281 NULL, sizeof(int), 0644, NULL,
1282 &proc_dointvec},
1283 {NET_NEIGH_UCAST_SOLICIT, "ucast_solicit",
1284 NULL, sizeof(int), 0644, NULL,
1285 &proc_dointvec},
1286 {NET_NEIGH_APP_SOLICIT, "app_solicit",
1287 NULL, sizeof(int), 0644, NULL,
1288 &proc_dointvec},
1289 {NET_NEIGH_RETRANS_TIME, "retrans_time",
1290 NULL, sizeof(int), 0644, NULL,
1291 &proc_dointvec},
1292 {NET_NEIGH_REACHABLE_TIME, "base_reachable_time",
1293 NULL, sizeof(int), 0644, NULL,
1294 &proc_dointvec_jiffies},
1295 {NET_NEIGH_DELAY_PROBE_TIME, "delay_first_probe_time",
1296 NULL, sizeof(int), 0644, NULL,
1297 &proc_dointvec_jiffies},
1298 {NET_NEIGH_GC_STALE_TIME, "gc_stale_time",
1299 NULL, sizeof(int), 0644, NULL,
1300 &proc_dointvec_jiffies},
1301 {NET_NEIGH_UNRES_QLEN, "unres_qlen",
1302 NULL, sizeof(int), 0644, NULL,
1303 &proc_dointvec},
1304 {NET_NEIGH_PROXY_QLEN, "proxy_qlen",
1305 NULL, sizeof(int), 0644, NULL,
1306 &proc_dointvec},
1307 {NET_NEIGH_ANYCAST_DELAY, "anycast_delay",
1308 NULL, sizeof(int), 0644, NULL,
1309 &proc_dointvec},
1310 {NET_NEIGH_PROXY_DELAY, "proxy_delay",
1311 NULL, sizeof(int), 0644, NULL,
1312 &proc_dointvec},
1313 {NET_NEIGH_LOCKTIME, "locktime",
1314 NULL, sizeof(int), 0644, NULL,
1315 &proc_dointvec},
1316 {NET_NEIGH_GC_INTERVAL, "gc_interval",
1317 NULL, sizeof(int), 0644, NULL,
1318 &proc_dointvec_jiffies},
1319 {NET_NEIGH_GC_THRESH1, "gc_thresh1",
1320 NULL, sizeof(int), 0644, NULL,
1321 &proc_dointvec},
1322 {NET_NEIGH_GC_THRESH2, "gc_thresh2",
1323 NULL, sizeof(int), 0644, NULL,
1324 &proc_dointvec},
1325 {NET_NEIGH_GC_THRESH3, "gc_thresh3",
1326 NULL, sizeof(int), 0644, NULL,
1327 &proc_dointvec},
1328 {0}},
1330 {{NET_PROTO_CONF_DEFAULT, "default", NULL, 0, 0555, NULL},{0}},
1331 {{0, "neigh", NULL, 0, 0555, NULL},{0}},
1332 {{0, NULL, NULL, 0, 0555, NULL},{0}},
1333 {{CTL_NET, "net", NULL, 0, 0555, NULL},{0}}
1336 int neigh_sysctl_register(struct device *dev, struct neigh_parms *p,
1337 int p_id, int pdev_id, char *p_name)
1339 struct neigh_sysctl_table *t;
1341 t = kmalloc(sizeof(*t), GFP_KERNEL);
1342 if (t == NULL)
1343 return -ENOBUFS;
1344 memcpy(t, &neigh_sysctl_template, sizeof(*t));
1345 t->neigh_vars[0].data = &p->mcast_probes;
1346 t->neigh_vars[1].data = &p->ucast_probes;
1347 t->neigh_vars[2].data = &p->app_probes;
1348 t->neigh_vars[3].data = &p->retrans_time;
1349 t->neigh_vars[4].data = &p->base_reachable_time;
1350 t->neigh_vars[5].data = &p->delay_probe_time;
1351 t->neigh_vars[6].data = &p->gc_staletime;
1352 t->neigh_vars[7].data = &p->queue_len;
1353 t->neigh_vars[8].data = &p->proxy_qlen;
1354 t->neigh_vars[9].data = &p->anycast_delay;
1355 t->neigh_vars[10].data = &p->proxy_delay;
1356 t->neigh_vars[11].data = &p->locktime;
1357 if (dev) {
1358 t->neigh_dev[0].procname = dev->name;
1359 t->neigh_dev[0].ctl_name = dev->ifindex;
1360 memset(&t->neigh_vars[12], 0, sizeof(ctl_table));
1361 } else {
1362 t->neigh_vars[12].data = (int*)(p+1);
1363 t->neigh_vars[13].data = (int*)(p+1) + 1;
1364 t->neigh_vars[14].data = (int*)(p+1) + 2;
1365 t->neigh_vars[15].data = (int*)(p+1) + 3;
1367 t->neigh_neigh_dir[0].ctl_name = pdev_id;
1369 t->neigh_proto_dir[0].procname = p_name;
1370 t->neigh_proto_dir[0].ctl_name = p_id;
1372 t->neigh_dev[0].child = t->neigh_vars;
1373 t->neigh_neigh_dir[0].child = t->neigh_dev;
1374 t->neigh_proto_dir[0].child = t->neigh_neigh_dir;
1375 t->neigh_root_dir[0].child = t->neigh_proto_dir;
1377 t->sysctl_header = register_sysctl_table(t->neigh_root_dir, 0);
1378 if (t->sysctl_header == NULL) {
1379 kfree(t);
1380 return -ENOBUFS;
1382 p->sysctl_table = t;
1383 return 0;
1386 void neigh_sysctl_unregister(struct neigh_parms *p)
1388 if (p->sysctl_table) {
1389 struct neigh_sysctl_table *t = p->sysctl_table;
1390 p->sysctl_table = NULL;
1391 unregister_sysctl_table(t->sysctl_header);
1392 kfree(t);
1396 #endif /* CONFIG_SYSCTL */