initial commit with v2.6.9
[linux-2.6.9-moxart.git] / net / ipv4 / fib_hash.c
blob8aa5b76f465377bb917d74212fdc0352b440d2d9
1 /*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
6 * IPv4 FIB: lookup engine and maintenance routines.
8 * Version: $Id: fib_hash.c,v 1.13 2001/10/31 21:55:54 davem Exp $
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <asm/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/sched.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/ip_fib.h>
46 #include "fib_lookup.h"
48 static kmem_cache_t *fn_hash_kmem;
49 static kmem_cache_t *fn_alias_kmem;
51 struct fib_node {
52 struct hlist_node fn_hash;
53 struct list_head fn_alias;
54 u32 fn_key;
57 struct fn_zone {
58 struct fn_zone *fz_next; /* Next not empty zone */
59 struct hlist_head *fz_hash; /* Hash table pointer */
60 int fz_nent; /* Number of entries */
62 int fz_divisor; /* Hash divisor */
63 u32 fz_hashmask; /* (fz_divisor - 1) */
64 #define FZ_HASHMASK(fz) ((fz)->fz_hashmask)
66 int fz_order; /* Zone order */
67 u32 fz_mask;
68 #define FZ_MASK(fz) ((fz)->fz_mask)
71 /* NOTE. On fast computers evaluation of fz_hashmask and fz_mask
72 * can be cheaper than memory lookup, so that FZ_* macros are used.
75 struct fn_hash {
76 struct fn_zone *fn_zones[33];
77 struct fn_zone *fn_zone_list;
80 static inline u32 fn_hash(u32 key, struct fn_zone *fz)
82 u32 h = ntohl(key)>>(32 - fz->fz_order);
83 h ^= (h>>20);
84 h ^= (h>>10);
85 h ^= (h>>5);
86 h &= FZ_HASHMASK(fz);
87 return h;
90 static inline u32 fz_key(u32 dst, struct fn_zone *fz)
92 return dst & FZ_MASK(fz);
95 static rwlock_t fib_hash_lock = RW_LOCK_UNLOCKED;
97 #define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct hlist_head))
99 static struct hlist_head *fz_hash_alloc(int divisor)
101 unsigned long size = divisor * sizeof(struct hlist_head);
103 if (size <= PAGE_SIZE) {
104 return kmalloc(size, GFP_KERNEL);
105 } else {
106 return (struct hlist_head *)
107 __get_free_pages(GFP_KERNEL, get_order(size));
111 /* The fib hash lock must be held when this is called. */
112 static inline void fn_rebuild_zone(struct fn_zone *fz,
113 struct hlist_head *old_ht,
114 int old_divisor)
116 int i;
118 for (i = 0; i < old_divisor; i++) {
119 struct hlist_node *node, *n;
120 struct fib_node *f;
122 hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) {
123 struct hlist_head *new_head;
125 hlist_del(&f->fn_hash);
127 new_head = &fz->fz_hash[fn_hash(f->fn_key, fz)];
128 hlist_add_head(&f->fn_hash, new_head);
133 static void fz_hash_free(struct hlist_head *hash, int divisor)
135 unsigned long size = divisor * sizeof(struct hlist_head);
137 if (size <= PAGE_SIZE)
138 kfree(hash);
139 else
140 free_pages((unsigned long)hash, get_order(size));
143 static void fn_rehash_zone(struct fn_zone *fz)
145 struct hlist_head *ht, *old_ht;
146 int old_divisor, new_divisor;
147 u32 new_hashmask;
149 old_divisor = fz->fz_divisor;
151 switch (old_divisor) {
152 case 16:
153 new_divisor = 256;
154 break;
155 case 256:
156 new_divisor = 1024;
157 break;
158 default:
159 if ((old_divisor << 1) > FZ_MAX_DIVISOR) {
160 printk(KERN_CRIT "route.c: bad divisor %d!\n", old_divisor);
161 return;
163 new_divisor = (old_divisor << 1);
164 break;
167 new_hashmask = (new_divisor - 1);
169 #if RT_CACHE_DEBUG >= 2
170 printk("fn_rehash_zone: hash for zone %d grows from %d\n", fz->fz_order, old_divisor);
171 #endif
173 ht = fz_hash_alloc(new_divisor);
175 if (ht) {
176 memset(ht, 0, new_divisor * sizeof(struct hlist_head));
178 write_lock_bh(&fib_hash_lock);
179 old_ht = fz->fz_hash;
180 fz->fz_hash = ht;
181 fz->fz_hashmask = new_hashmask;
182 fz->fz_divisor = new_divisor;
183 fn_rebuild_zone(fz, old_ht, old_divisor);
184 write_unlock_bh(&fib_hash_lock);
186 fz_hash_free(old_ht, old_divisor);
190 static inline void fn_free_node(struct fib_node * f)
192 kmem_cache_free(fn_hash_kmem, f);
195 static inline void fn_free_alias(struct fib_alias *fa)
197 fib_release_info(fa->fa_info);
198 kmem_cache_free(fn_alias_kmem, fa);
201 static struct fn_zone *
202 fn_new_zone(struct fn_hash *table, int z)
204 int i;
205 struct fn_zone *fz = kmalloc(sizeof(struct fn_zone), GFP_KERNEL);
206 if (!fz)
207 return NULL;
209 memset(fz, 0, sizeof(struct fn_zone));
210 if (z) {
211 fz->fz_divisor = 16;
212 } else {
213 fz->fz_divisor = 1;
215 fz->fz_hashmask = (fz->fz_divisor - 1);
216 fz->fz_hash = fz_hash_alloc(fz->fz_divisor);
217 if (!fz->fz_hash) {
218 kfree(fz);
219 return NULL;
221 memset(fz->fz_hash, 0, fz->fz_divisor * sizeof(struct hlist_head *));
222 fz->fz_order = z;
223 fz->fz_mask = inet_make_mask(z);
225 /* Find the first not empty zone with more specific mask */
226 for (i=z+1; i<=32; i++)
227 if (table->fn_zones[i])
228 break;
229 write_lock_bh(&fib_hash_lock);
230 if (i>32) {
231 /* No more specific masks, we are the first. */
232 fz->fz_next = table->fn_zone_list;
233 table->fn_zone_list = fz;
234 } else {
235 fz->fz_next = table->fn_zones[i]->fz_next;
236 table->fn_zones[i]->fz_next = fz;
238 table->fn_zones[z] = fz;
239 write_unlock_bh(&fib_hash_lock);
240 return fz;
243 static int
244 fn_hash_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
246 int err;
247 struct fn_zone *fz;
248 struct fn_hash *t = (struct fn_hash*)tb->tb_data;
250 read_lock(&fib_hash_lock);
251 for (fz = t->fn_zone_list; fz; fz = fz->fz_next) {
252 struct hlist_head *head;
253 struct hlist_node *node;
254 struct fib_node *f;
255 u32 k = fz_key(flp->fl4_dst, fz);
257 head = &fz->fz_hash[fn_hash(k, fz)];
258 hlist_for_each_entry(f, node, head, fn_hash) {
259 if (f->fn_key != k)
260 continue;
262 err = fib_semantic_match(&f->fn_alias,
263 flp, res,
264 fz->fz_order);
265 if (err <= 0)
266 goto out;
269 err = 1;
270 out:
271 read_unlock(&fib_hash_lock);
272 return err;
275 static int fn_hash_last_dflt=-1;
277 static int fib_detect_death(struct fib_info *fi, int order,
278 struct fib_info **last_resort, int *last_idx)
280 struct neighbour *n;
281 int state = NUD_NONE;
283 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
284 if (n) {
285 state = n->nud_state;
286 neigh_release(n);
288 if (state==NUD_REACHABLE)
289 return 0;
290 if ((state&NUD_VALID) && order != fn_hash_last_dflt)
291 return 0;
292 if ((state&NUD_VALID) ||
293 (*last_idx<0 && order > fn_hash_last_dflt)) {
294 *last_resort = fi;
295 *last_idx = order;
297 return 1;
300 static void
301 fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
303 int order, last_idx;
304 struct hlist_node *node;
305 struct fib_node *f;
306 struct fib_info *fi = NULL;
307 struct fib_info *last_resort;
308 struct fn_hash *t = (struct fn_hash*)tb->tb_data;
309 struct fn_zone *fz = t->fn_zones[0];
311 if (fz == NULL)
312 return;
314 last_idx = -1;
315 last_resort = NULL;
316 order = -1;
318 read_lock(&fib_hash_lock);
319 hlist_for_each_entry(f, node, &fz->fz_hash[0], fn_hash) {
320 struct fib_alias *fa;
322 list_for_each_entry(fa, &f->fn_alias, fa_list) {
323 struct fib_info *next_fi = fa->fa_info;
325 if (fa->fa_scope != res->scope ||
326 fa->fa_type != RTN_UNICAST)
327 continue;
329 if (next_fi->fib_priority > res->fi->fib_priority)
330 break;
331 if (!next_fi->fib_nh[0].nh_gw ||
332 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
333 continue;
334 fa->fa_state |= FA_S_ACCESSED;
336 if (fi == NULL) {
337 if (next_fi != res->fi)
338 break;
339 } else if (!fib_detect_death(fi, order, &last_resort,
340 &last_idx)) {
341 if (res->fi)
342 fib_info_put(res->fi);
343 res->fi = fi;
344 atomic_inc(&fi->fib_clntref);
345 fn_hash_last_dflt = order;
346 goto out;
348 fi = next_fi;
349 order++;
353 if (order <= 0 || fi == NULL) {
354 fn_hash_last_dflt = -1;
355 goto out;
358 if (!fib_detect_death(fi, order, &last_resort, &last_idx)) {
359 if (res->fi)
360 fib_info_put(res->fi);
361 res->fi = fi;
362 atomic_inc(&fi->fib_clntref);
363 fn_hash_last_dflt = order;
364 goto out;
367 if (last_idx >= 0) {
368 if (res->fi)
369 fib_info_put(res->fi);
370 res->fi = last_resort;
371 if (last_resort)
372 atomic_inc(&last_resort->fib_clntref);
374 fn_hash_last_dflt = last_idx;
375 out:
376 read_unlock(&fib_hash_lock);
379 static void rtmsg_fib(int, struct fib_node *, struct fib_alias *,
380 int, int,
381 struct nlmsghdr *n,
382 struct netlink_skb_parms *);
384 /* Insert node F to FZ. */
385 static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)
387 struct hlist_head *head = &fz->fz_hash[fn_hash(f->fn_key, fz)];
389 hlist_add_head(&f->fn_hash, head);
392 /* Return the node in FZ matching KEY. */
393 static struct fib_node *fib_find_node(struct fn_zone *fz, u32 key)
395 struct hlist_head *head = &fz->fz_hash[fn_hash(key, fz)];
396 struct hlist_node *node;
397 struct fib_node *f;
399 hlist_for_each_entry(f, node, head, fn_hash) {
400 if (f->fn_key == key)
401 return f;
404 return NULL;
407 /* Return the first fib alias matching TOS with
408 * priority less than or equal to PRIO.
410 static struct fib_alias *fib_find_alias(struct fib_node *fn, u8 tos, u32 prio)
412 if (fn) {
413 struct list_head *head = &fn->fn_alias;
414 struct fib_alias *fa;
416 list_for_each_entry(fa, head, fa_list) {
417 if (fa->fa_tos > tos)
418 continue;
419 if (fa->fa_info->fib_priority >= prio ||
420 fa->fa_tos < tos)
421 return fa;
424 return NULL;
427 static int
428 fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
429 struct nlmsghdr *n, struct netlink_skb_parms *req)
431 struct fn_hash *table = (struct fn_hash *) tb->tb_data;
432 struct fib_node *new_f, *f;
433 struct fib_alias *fa, *new_fa;
434 struct fn_zone *fz;
435 struct fib_info *fi;
436 int z = r->rtm_dst_len;
437 int type = r->rtm_type;
438 u8 tos = r->rtm_tos;
439 u32 key;
440 int err;
442 if (z > 32)
443 return -EINVAL;
444 fz = table->fn_zones[z];
445 if (!fz && !(fz = fn_new_zone(table, z)))
446 return -ENOBUFS;
448 key = 0;
449 if (rta->rta_dst) {
450 u32 dst;
451 memcpy(&dst, rta->rta_dst, 4);
452 if (dst & ~FZ_MASK(fz))
453 return -EINVAL;
454 key = fz_key(dst, fz);
457 if ((fi = fib_create_info(r, rta, n, &err)) == NULL)
458 return err;
460 if (fz->fz_nent > (fz->fz_divisor<<1) &&
461 fz->fz_divisor < FZ_MAX_DIVISOR &&
462 (z==32 || (1<<z) > fz->fz_divisor))
463 fn_rehash_zone(fz);
465 f = fib_find_node(fz, key);
466 fa = fib_find_alias(f, tos, fi->fib_priority);
468 /* Now fa, if non-NULL, points to the first fib alias
469 * with the same keys [prefix,tos,priority], if such key already
470 * exists or to the node before which we will insert new one.
472 * If fa is NULL, we will need to allocate a new one and
473 * insert to the head of f.
475 * If f is NULL, no fib node matched the destination key
476 * and we need to allocate a new one of those as well.
479 if (fa && fa->fa_tos == tos &&
480 fa->fa_info->fib_priority == fi->fib_priority) {
481 struct fib_alias *fa_orig;
483 err = -EEXIST;
484 if (n->nlmsg_flags & NLM_F_EXCL)
485 goto out;
487 if (n->nlmsg_flags & NLM_F_REPLACE) {
488 struct fib_info *fi_drop;
489 u8 state;
491 write_lock_bh(&fib_hash_lock);
492 fi_drop = fa->fa_info;
493 fa->fa_info = fi;
494 fa->fa_type = type;
495 fa->fa_scope = r->rtm_scope;
496 state = fa->fa_state;
497 fa->fa_state &= ~FA_S_ACCESSED;
498 write_unlock_bh(&fib_hash_lock);
500 fib_release_info(fi_drop);
501 if (state & FA_S_ACCESSED)
502 rt_cache_flush(-1);
503 return 0;
506 /* Error if we find a perfect match which
507 * uses the same scope, type, and nexthop
508 * information.
510 fa_orig = fa;
511 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
512 list_for_each_entry_continue(fa, &f->fn_alias, fa_list) {
513 if (fa->fa_tos != tos)
514 break;
515 if (fa->fa_info->fib_priority != fi->fib_priority)
516 break;
517 if (fa->fa_type == type &&
518 fa->fa_scope == r->rtm_scope &&
519 fa->fa_info == fi)
520 goto out;
522 if (!(n->nlmsg_flags & NLM_F_APPEND))
523 fa = fa_orig;
526 err = -ENOENT;
527 if (!(n->nlmsg_flags&NLM_F_CREATE))
528 goto out;
530 err = -ENOBUFS;
531 new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
532 if (new_fa == NULL)
533 goto out;
535 new_f = NULL;
536 if (!f) {
537 new_f = kmem_cache_alloc(fn_hash_kmem, SLAB_KERNEL);
538 if (new_f == NULL)
539 goto out_free_new_fa;
541 INIT_HLIST_NODE(&new_f->fn_hash);
542 INIT_LIST_HEAD(&new_f->fn_alias);
543 new_f->fn_key = key;
544 f = new_f;
547 new_fa->fa_info = fi;
548 new_fa->fa_tos = tos;
549 new_fa->fa_type = type;
550 new_fa->fa_scope = r->rtm_scope;
551 new_fa->fa_state = 0;
554 * Insert new entry to the list.
557 write_lock_bh(&fib_hash_lock);
558 if (new_f)
559 fib_insert_node(fz, new_f);
560 list_add_tail(&new_fa->fa_list,
561 (fa ? &fa->fa_list : &f->fn_alias));
562 write_unlock_bh(&fib_hash_lock);
564 if (new_f)
565 fz->fz_nent++;
566 rt_cache_flush(-1);
568 rtmsg_fib(RTM_NEWROUTE, f, new_fa, z, tb->tb_id, n, req);
569 return 0;
571 out_free_new_fa:
572 kmem_cache_free(fn_alias_kmem, new_fa);
573 out:
574 fib_release_info(fi);
575 return err;
579 static int
580 fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
581 struct nlmsghdr *n, struct netlink_skb_parms *req)
583 struct fn_hash *table = (struct fn_hash*)tb->tb_data;
584 struct fib_node *f;
585 struct fib_alias *fa, *fa_to_delete;
586 int z = r->rtm_dst_len;
587 struct fn_zone *fz;
588 u32 key;
589 u8 tos = r->rtm_tos;
591 if (z > 32)
592 return -EINVAL;
593 if ((fz = table->fn_zones[z]) == NULL)
594 return -ESRCH;
596 key = 0;
597 if (rta->rta_dst) {
598 u32 dst;
599 memcpy(&dst, rta->rta_dst, 4);
600 if (dst & ~FZ_MASK(fz))
601 return -EINVAL;
602 key = fz_key(dst, fz);
605 f = fib_find_node(fz, key);
606 fa = fib_find_alias(f, tos, 0);
607 if (!fa)
608 return -ESRCH;
610 fa_to_delete = NULL;
611 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
612 list_for_each_entry_continue(fa, &f->fn_alias, fa_list) {
613 struct fib_info *fi = fa->fa_info;
615 if (fa->fa_tos != tos)
616 break;
618 if ((!r->rtm_type ||
619 fa->fa_type == r->rtm_type) &&
620 (r->rtm_scope == RT_SCOPE_NOWHERE ||
621 fa->fa_scope == r->rtm_scope) &&
622 (!r->rtm_protocol ||
623 fi->fib_protocol == r->rtm_protocol) &&
624 fib_nh_match(r, n, rta, fi) == 0) {
625 fa_to_delete = fa;
626 break;
630 if (fa_to_delete) {
631 int kill_fn;
633 fa = fa_to_delete;
634 rtmsg_fib(RTM_DELROUTE, f, fa, z, tb->tb_id, n, req);
636 kill_fn = 0;
637 write_lock_bh(&fib_hash_lock);
638 list_del(&fa->fa_list);
639 if (list_empty(&f->fn_alias)) {
640 hlist_del(&f->fn_hash);
641 kill_fn = 1;
643 write_unlock_bh(&fib_hash_lock);
645 if (fa->fa_state & FA_S_ACCESSED)
646 rt_cache_flush(-1);
647 fn_free_alias(fa);
648 if (kill_fn) {
649 fn_free_node(f);
650 fz->fz_nent--;
653 return 0;
655 return -ESRCH;
658 static int fn_flush_list(struct fn_zone *fz, int idx)
660 struct hlist_head *head = &fz->fz_hash[idx];
661 struct hlist_node *node, *n;
662 struct fib_node *f;
663 int found = 0;
665 hlist_for_each_entry_safe(f, node, n, head, fn_hash) {
666 struct fib_alias *fa, *fa_node;
667 int kill_f;
669 kill_f = 0;
670 list_for_each_entry_safe(fa, fa_node, &f->fn_alias, fa_list) {
671 struct fib_info *fi = fa->fa_info;
673 if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
674 write_lock_bh(&fib_hash_lock);
675 list_del(&fa->fa_list);
676 if (list_empty(&f->fn_alias)) {
677 hlist_del(&f->fn_hash);
678 kill_f = 1;
680 write_unlock_bh(&fib_hash_lock);
682 fn_free_alias(fa);
683 found++;
686 if (kill_f) {
687 fn_free_node(f);
688 fz->fz_nent--;
691 return found;
694 static int fn_hash_flush(struct fib_table *tb)
696 struct fn_hash *table = (struct fn_hash *) tb->tb_data;
697 struct fn_zone *fz;
698 int found = 0;
700 for (fz = table->fn_zone_list; fz; fz = fz->fz_next) {
701 int i;
703 for (i = fz->fz_divisor - 1; i >= 0; i--)
704 found += fn_flush_list(fz, i);
706 return found;
710 static inline int
711 fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
712 struct fib_table *tb,
713 struct fn_zone *fz,
714 struct hlist_head *head)
716 struct hlist_node *node;
717 struct fib_node *f;
718 int i, s_i;
720 s_i = cb->args[3];
721 i = 0;
722 hlist_for_each_entry(f, node, head, fn_hash) {
723 struct fib_alias *fa;
725 list_for_each_entry(fa, &f->fn_alias, fa_list) {
726 if (i < s_i)
727 continue;
729 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
730 cb->nlh->nlmsg_seq,
731 RTM_NEWROUTE,
732 tb->tb_id,
733 fa->fa_type,
734 fa->fa_scope,
735 &f->fn_key,
736 fz->fz_order,
737 fa->fa_tos,
738 fa->fa_info) < 0) {
739 cb->args[3] = i;
740 return -1;
743 i++;
746 cb->args[3] = i;
747 return skb->len;
750 static inline int
751 fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
752 struct fib_table *tb,
753 struct fn_zone *fz)
755 int h, s_h;
757 s_h = cb->args[2];
758 for (h=0; h < fz->fz_divisor; h++) {
759 if (h < s_h) continue;
760 if (h > s_h)
761 memset(&cb->args[3], 0,
762 sizeof(cb->args) - 3*sizeof(cb->args[0]));
763 if (fz->fz_hash == NULL ||
764 hlist_empty(&fz->fz_hash[h]))
765 continue;
766 if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h])<0) {
767 cb->args[2] = h;
768 return -1;
771 cb->args[2] = h;
772 return skb->len;
775 static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
777 int m, s_m;
778 struct fn_zone *fz;
779 struct fn_hash *table = (struct fn_hash*)tb->tb_data;
781 s_m = cb->args[1];
782 read_lock(&fib_hash_lock);
783 for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) {
784 if (m < s_m) continue;
785 if (m > s_m)
786 memset(&cb->args[2], 0,
787 sizeof(cb->args) - 2*sizeof(cb->args[0]));
788 if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
789 cb->args[1] = m;
790 read_unlock(&fib_hash_lock);
791 return -1;
794 read_unlock(&fib_hash_lock);
795 cb->args[1] = m;
796 return skb->len;
799 static void rtmsg_fib(int event, struct fib_node *f, struct fib_alias *fa,
800 int z, int tb_id,
801 struct nlmsghdr *n, struct netlink_skb_parms *req)
803 struct sk_buff *skb;
804 u32 pid = req ? req->pid : 0;
805 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
807 skb = alloc_skb(size, GFP_KERNEL);
808 if (!skb)
809 return;
811 if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
812 fa->fa_type, fa->fa_scope, &f->fn_key, z,
813 fa->fa_tos,
814 fa->fa_info) < 0) {
815 kfree_skb(skb);
816 return;
818 NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE;
819 if (n->nlmsg_flags&NLM_F_ECHO)
820 atomic_inc(&skb->users);
821 netlink_broadcast(rtnl, skb, pid, RTMGRP_IPV4_ROUTE, GFP_KERNEL);
822 if (n->nlmsg_flags&NLM_F_ECHO)
823 netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
826 #ifdef CONFIG_IP_MULTIPLE_TABLES
827 struct fib_table * fib_hash_init(int id)
828 #else
829 struct fib_table * __init fib_hash_init(int id)
830 #endif
832 struct fib_table *tb;
834 if (fn_hash_kmem == NULL)
835 fn_hash_kmem = kmem_cache_create("ip_fib_hash",
836 sizeof(struct fib_node),
837 0, SLAB_HWCACHE_ALIGN,
838 NULL, NULL);
840 if (fn_alias_kmem == NULL)
841 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
842 sizeof(struct fib_alias),
843 0, SLAB_HWCACHE_ALIGN,
844 NULL, NULL);
846 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct fn_hash),
847 GFP_KERNEL);
848 if (tb == NULL)
849 return NULL;
851 tb->tb_id = id;
852 tb->tb_lookup = fn_hash_lookup;
853 tb->tb_insert = fn_hash_insert;
854 tb->tb_delete = fn_hash_delete;
855 tb->tb_flush = fn_hash_flush;
856 tb->tb_select_default = fn_hash_select_default;
857 tb->tb_dump = fn_hash_dump;
858 memset(tb->tb_data, 0, sizeof(struct fn_hash));
859 return tb;
862 /* ------------------------------------------------------------------------ */
863 #ifdef CONFIG_PROC_FS
865 struct fib_iter_state {
866 struct fn_zone *zone;
867 int bucket;
868 struct hlist_head *hash_head;
869 struct fib_node *fn;
870 struct fib_alias *fa;
873 static struct fib_alias *fib_get_first(struct seq_file *seq)
875 struct fib_iter_state *iter = seq->private;
876 struct fn_hash *table = (struct fn_hash *) ip_fib_main_table->tb_data;
878 iter->bucket = 0;
879 iter->hash_head = NULL;
880 iter->fn = NULL;
881 iter->fa = NULL;
883 for (iter->zone = table->fn_zone_list; iter->zone;
884 iter->zone = iter->zone->fz_next) {
885 int maxslot;
887 if (!iter->zone->fz_nent)
888 continue;
890 iter->hash_head = iter->zone->fz_hash;
891 maxslot = iter->zone->fz_divisor;
893 for (iter->bucket = 0; iter->bucket < maxslot;
894 ++iter->bucket, ++iter->hash_head) {
895 struct hlist_node *node;
896 struct fib_node *fn;
898 hlist_for_each_entry(fn,node,iter->hash_head,fn_hash) {
899 struct fib_alias *fa;
901 list_for_each_entry(fa,&fn->fn_alias,fa_list) {
902 iter->fn = fn;
903 iter->fa = fa;
904 goto out;
909 out:
910 return iter->fa;
913 static struct fib_alias *fib_get_next(struct seq_file *seq)
915 struct fib_iter_state *iter = seq->private;
916 struct fib_node *fn;
917 struct fib_alias *fa;
919 /* Advance FA, if any. */
920 fn = iter->fn;
921 fa = iter->fa;
922 if (fa) {
923 BUG_ON(!fn);
924 list_for_each_entry_continue(fa, &fn->fn_alias, fa_list) {
925 iter->fa = fa;
926 goto out;
930 fa = iter->fa = NULL;
932 /* Advance FN. */
933 if (fn) {
934 struct hlist_node *node = &fn->fn_hash;
935 hlist_for_each_entry_continue(fn, node, fn_hash) {
936 iter->fn = fn;
938 list_for_each_entry(fa, &fn->fn_alias, fa_list) {
939 iter->fa = fa;
940 goto out;
945 fn = iter->fn = NULL;
947 /* Advance hash chain. */
948 if (!iter->zone)
949 goto out;
951 for (;;) {
952 struct hlist_node *node;
953 int maxslot;
955 maxslot = iter->zone->fz_divisor;
957 while (++iter->bucket < maxslot) {
958 iter->hash_head++;
960 hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) {
961 list_for_each_entry(fa, &fn->fn_alias, fa_list) {
962 iter->fn = fn;
963 iter->fa = fa;
964 goto out;
969 iter->zone = iter->zone->fz_next;
971 if (!iter->zone)
972 goto out;
974 iter->bucket = 0;
975 iter->hash_head = iter->zone->fz_hash;
977 hlist_for_each_entry(fn, node, iter->hash_head, fn_hash) {
978 list_for_each_entry(fa, &fn->fn_alias, fa_list) {
979 iter->fn = fn;
980 iter->fa = fa;
981 goto out;
985 out:
986 return fa;
989 static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
991 void *v = NULL;
993 read_lock(&fib_hash_lock);
994 if (ip_fib_main_table)
995 v = *pos ? fib_get_next(seq) : SEQ_START_TOKEN;
996 return v;
999 static void *fib_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1001 ++*pos;
1002 return v == SEQ_START_TOKEN ? fib_get_first(seq) : fib_get_next(seq);
1005 static void fib_seq_stop(struct seq_file *seq, void *v)
1007 read_unlock(&fib_hash_lock);
1010 static unsigned fib_flag_trans(int type, u32 mask, struct fib_info *fi)
1012 static unsigned type2flags[RTN_MAX + 1] = {
1013 [7] = RTF_REJECT, [8] = RTF_REJECT,
1015 unsigned flags = type2flags[type];
1017 if (fi && fi->fib_nh->nh_gw)
1018 flags |= RTF_GATEWAY;
1019 if (mask == 0xFFFFFFFF)
1020 flags |= RTF_HOST;
1021 flags |= RTF_UP;
1022 return flags;
1026 * This outputs /proc/net/route.
1028 * It always works in backward compatibility mode.
1029 * The format of the file is not supposed to be changed.
1031 static int fib_seq_show(struct seq_file *seq, void *v)
1033 struct fib_iter_state *iter;
1034 char bf[128];
1035 u32 prefix, mask;
1036 unsigned flags;
1037 struct fib_node *f;
1038 struct fib_alias *fa;
1039 struct fib_info *fi;
1041 if (v == SEQ_START_TOKEN) {
1042 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
1043 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
1044 "\tWindow\tIRTT");
1045 goto out;
1048 iter = seq->private;
1049 f = iter->fn;
1050 fa = iter->fa;
1051 fi = fa->fa_info;
1052 prefix = f->fn_key;
1053 mask = FZ_MASK(iter->zone);
1054 flags = fib_flag_trans(fa->fa_type, mask, fi);
1055 if (fi)
1056 snprintf(bf, sizeof(bf),
1057 "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
1058 fi->fib_dev ? fi->fib_dev->name : "*", prefix,
1059 fi->fib_nh->nh_gw, flags, 0, 0, fi->fib_priority,
1060 mask, (fi->fib_advmss ? fi->fib_advmss + 40 : 0),
1061 fi->fib_window,
1062 fi->fib_rtt >> 3);
1063 else
1064 snprintf(bf, sizeof(bf),
1065 "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
1066 prefix, 0, flags, 0, 0, 0, mask, 0, 0, 0);
1067 seq_printf(seq, "%-127s\n", bf);
1068 out:
1069 return 0;
1072 static struct seq_operations fib_seq_ops = {
1073 .start = fib_seq_start,
1074 .next = fib_seq_next,
1075 .stop = fib_seq_stop,
1076 .show = fib_seq_show,
1079 static int fib_seq_open(struct inode *inode, struct file *file)
1081 struct seq_file *seq;
1082 int rc = -ENOMEM;
1083 struct fib_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL);
1085 if (!s)
1086 goto out;
1088 rc = seq_open(file, &fib_seq_ops);
1089 if (rc)
1090 goto out_kfree;
1092 seq = file->private_data;
1093 seq->private = s;
1094 memset(s, 0, sizeof(*s));
1095 out:
1096 return rc;
1097 out_kfree:
1098 kfree(s);
1099 goto out;
1102 static struct file_operations fib_seq_fops = {
1103 .owner = THIS_MODULE,
1104 .open = fib_seq_open,
1105 .read = seq_read,
1106 .llseek = seq_lseek,
1107 .release = seq_release_private,
1110 int __init fib_proc_init(void)
1112 if (!proc_net_fops_create("route", S_IRUGO, &fib_seq_fops))
1113 return -ENOMEM;
1114 return 0;
1117 void __init fib_proc_exit(void)
1119 proc_net_remove("route");
1121 #endif /* CONFIG_PROC_FS */