[IPV4]: Fix Kconfig syntax error
[firewire-audio.git] / net / ipv4 / fib_trie.c
bloba701405fab0b3413389dc1899e00b90c9eb028d3
1 /*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version
5 * 2 of the License, or (at your option) any later version.
7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
8 * & Swedish University of Agricultural Sciences.
10 * Jens Laas <jens.laas@data.slu.se> Swedish University of
11 * Agricultural Sciences.
13 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
15 * This work is based on the LPC-trie which is originally descibed in:
17 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19 * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
25 * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $
28 * Code from fib_hash has been reused which includes the following header:
31 * INET An implementation of the TCP/IP protocol suite for the LINUX
32 * operating system. INET is implemented using the BSD Socket
33 * interface as the means of communication with the user level.
35 * IPv4 FIB: lookup engine and maintenance routines.
38 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
40 * This program is free software; you can redistribute it and/or
41 * modify it under the terms of the GNU General Public License
42 * as published by the Free Software Foundation; either version
43 * 2 of the License, or (at your option) any later version.
46 #define VERSION "0.325"
48 #include <linux/config.h>
49 #include <asm/uaccess.h>
50 #include <asm/system.h>
51 #include <asm/bitops.h>
52 #include <linux/types.h>
53 #include <linux/kernel.h>
54 #include <linux/sched.h>
55 #include <linux/mm.h>
56 #include <linux/string.h>
57 #include <linux/socket.h>
58 #include <linux/sockios.h>
59 #include <linux/errno.h>
60 #include <linux/in.h>
61 #include <linux/inet.h>
62 #include <linux/netdevice.h>
63 #include <linux/if_arp.h>
64 #include <linux/proc_fs.h>
65 #include <linux/skbuff.h>
66 #include <linux/netlink.h>
67 #include <linux/init.h>
68 #include <linux/list.h>
69 #include <net/ip.h>
70 #include <net/protocol.h>
71 #include <net/route.h>
72 #include <net/tcp.h>
73 #include <net/sock.h>
74 #include <net/ip_fib.h>
75 #include "fib_lookup.h"
77 #undef CONFIG_IP_FIB_TRIE_STATS
78 #define MAX_CHILDS 16384
80 #define EXTRACT(p, n, str) ((str)<<(p)>>(32-(n)))
81 #define KEYLENGTH (8*sizeof(t_key))
82 #define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l))
83 #define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset))
85 static DEFINE_RWLOCK(fib_lock);
87 typedef unsigned int t_key;
89 #define T_TNODE 0
90 #define T_LEAF 1
91 #define NODE_TYPE_MASK 0x1UL
92 #define NODE_PARENT(_node) \
93 ((struct tnode *)((_node)->_parent & ~NODE_TYPE_MASK))
94 #define NODE_SET_PARENT(_node, _ptr) \
95 ((_node)->_parent = (((unsigned long)(_ptr)) | \
96 ((_node)->_parent & NODE_TYPE_MASK)))
97 #define NODE_INIT_PARENT(_node, _type) \
98 ((_node)->_parent = (_type))
99 #define NODE_TYPE(_node) \
100 ((_node)->_parent & NODE_TYPE_MASK)
102 #define IS_TNODE(n) (!(n->_parent & T_LEAF))
103 #define IS_LEAF(n) (n->_parent & T_LEAF)
105 struct node {
106 t_key key;
107 unsigned long _parent;
110 struct leaf {
111 t_key key;
112 unsigned long _parent;
113 struct hlist_head list;
116 struct leaf_info {
117 struct hlist_node hlist;
118 int plen;
119 struct list_head falh;
122 struct tnode {
123 t_key key;
124 unsigned long _parent;
125 unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */
126 unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */
127 unsigned short full_children; /* KEYLENGTH bits needed */
128 unsigned short empty_children; /* KEYLENGTH bits needed */
129 struct node *child[0];
132 #ifdef CONFIG_IP_FIB_TRIE_STATS
133 struct trie_use_stats {
134 unsigned int gets;
135 unsigned int backtrack;
136 unsigned int semantic_match_passed;
137 unsigned int semantic_match_miss;
138 unsigned int null_node_hit;
139 unsigned int resize_node_skipped;
141 #endif
143 struct trie_stat {
144 unsigned int totdepth;
145 unsigned int maxdepth;
146 unsigned int tnodes;
147 unsigned int leaves;
148 unsigned int nullpointers;
149 unsigned int nodesizes[MAX_CHILDS];
152 struct trie {
153 struct node *trie;
154 #ifdef CONFIG_IP_FIB_TRIE_STATS
155 struct trie_use_stats stats;
156 #endif
157 int size;
158 unsigned int revision;
161 static int trie_debug = 0;
163 static int tnode_full(struct tnode *tn, struct node *n);
164 static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
165 static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
166 static int tnode_child_length(struct tnode *tn);
167 static struct node *resize(struct trie *t, struct tnode *tn);
168 static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err);
169 static struct tnode *halve(struct trie *t, struct tnode *tn, int *err);
170 static void tnode_free(struct tnode *tn);
171 static void trie_dump_seq(struct seq_file *seq, struct trie *t);
172 extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio);
173 extern int fib_detect_death(struct fib_info *fi, int order,
174 struct fib_info **last_resort, int *last_idx, int *dflt);
176 extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, int z, int tb_id,
177 struct nlmsghdr *n, struct netlink_skb_parms *req);
179 static kmem_cache_t *fn_alias_kmem;
180 static struct trie *trie_local = NULL, *trie_main = NULL;
182 static void trie_bug(char *err)
184 printk("Trie Bug: %s\n", err);
185 BUG();
188 static inline struct node *tnode_get_child(struct tnode *tn, int i)
190 if (i >= 1<<tn->bits)
191 trie_bug("tnode_get_child");
193 return tn->child[i];
196 static inline int tnode_child_length(struct tnode *tn)
198 return 1<<tn->bits;
202 _________________________________________________________________
203 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
204 ----------------------------------------------------------------
205 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
207 _________________________________________________________________
208 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
209 -----------------------------------------------------------------
210 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
212 tp->pos = 7
213 tp->bits = 3
214 n->pos = 15
215 n->bits=4
216 KEYLENGTH=32
219 static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
221 if (offset < KEYLENGTH)
222 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
223 else
224 return 0;
227 static inline int tkey_equals(t_key a, t_key b)
229 return a == b;
232 static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
234 if (bits == 0 || offset >= KEYLENGTH)
235 return 1;
236 bits = bits > KEYLENGTH ? KEYLENGTH : bits;
237 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
240 static inline int tkey_mismatch(t_key a, int offset, t_key b)
242 t_key diff = a ^ b;
243 int i = offset;
245 if (!diff)
246 return 0;
247 while ((diff << i) >> (KEYLENGTH-1) == 0)
248 i++;
249 return i;
252 /* Candiate for fib_semantics */
254 static void fn_free_alias(struct fib_alias *fa)
256 fib_release_info(fa->fa_info);
257 kmem_cache_free(fn_alias_kmem, fa);
261 To understand this stuff, an understanding of keys and all their bits is
262 necessary. Every node in the trie has a key associated with it, but not
263 all of the bits in that key are significant.
265 Consider a node 'n' and its parent 'tp'.
267 If n is a leaf, every bit in its key is significant. Its presence is
268 necessitaded by path compression, since during a tree traversal (when
269 searching for a leaf - unless we are doing an insertion) we will completely
270 ignore all skipped bits we encounter. Thus we need to verify, at the end of
271 a potentially successful search, that we have indeed been walking the
272 correct key path.
274 Note that we can never "miss" the correct key in the tree if present by
275 following the wrong path. Path compression ensures that segments of the key
276 that are the same for all keys with a given prefix are skipped, but the
277 skipped part *is* identical for each node in the subtrie below the skipped
278 bit! trie_insert() in this implementation takes care of that - note the
279 call to tkey_sub_equals() in trie_insert().
281 if n is an internal node - a 'tnode' here, the various parts of its key
282 have many different meanings.
284 Example:
285 _________________________________________________________________
286 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
287 -----------------------------------------------------------------
288 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
290 _________________________________________________________________
291 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
292 -----------------------------------------------------------------
293 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
295 tp->pos = 7
296 tp->bits = 3
297 n->pos = 15
298 n->bits=4
300 First, let's just ignore the bits that come before the parent tp, that is
301 the bits from 0 to (tp->pos-1). They are *known* but at this point we do
302 not use them for anything.
304 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
305 index into the parent's child array. That is, they will be used to find
306 'n' among tp's children.
308 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
309 for the node n.
311 All the bits we have seen so far are significant to the node n. The rest
312 of the bits are really not needed or indeed known in n->key.
314 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
315 n's child array, and will of course be different for each child.
318 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
319 at this point.
323 static void check_tnode(struct tnode *tn)
325 if (tn && tn->pos+tn->bits > 32) {
326 printk("TNODE ERROR tn=%p, pos=%d, bits=%d\n", tn, tn->pos, tn->bits);
330 static int halve_threshold = 25;
331 static int inflate_threshold = 50;
333 static struct leaf *leaf_new(void)
335 struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL);
336 if (l) {
337 NODE_INIT_PARENT(l, T_LEAF);
338 INIT_HLIST_HEAD(&l->list);
340 return l;
343 static struct leaf_info *leaf_info_new(int plen)
345 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
346 if (li) {
347 li->plen = plen;
348 INIT_LIST_HEAD(&li->falh);
350 return li;
353 static inline void free_leaf(struct leaf *l)
355 kfree(l);
358 static inline void free_leaf_info(struct leaf_info *li)
360 kfree(li);
363 static struct tnode *tnode_alloc(unsigned int size)
365 if (size <= PAGE_SIZE) {
366 return kmalloc(size, GFP_KERNEL);
367 } else {
368 return (struct tnode *)
369 __get_free_pages(GFP_KERNEL, get_order(size));
373 static void __tnode_free(struct tnode *tn)
375 unsigned int size = sizeof(struct tnode) +
376 (1<<tn->bits) * sizeof(struct node *);
378 if (size <= PAGE_SIZE)
379 kfree(tn);
380 else
381 free_pages((unsigned long)tn, get_order(size));
384 static struct tnode* tnode_new(t_key key, int pos, int bits)
386 int nchildren = 1<<bits;
387 int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *);
388 struct tnode *tn = tnode_alloc(sz);
390 if (tn) {
391 memset(tn, 0, sz);
392 NODE_INIT_PARENT(tn, T_TNODE);
393 tn->pos = pos;
394 tn->bits = bits;
395 tn->key = key;
396 tn->full_children = 0;
397 tn->empty_children = 1<<bits;
400 if (trie_debug > 0)
401 printk("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode),
402 (unsigned int) (sizeof(struct node) * 1<<bits));
403 return tn;
406 static void tnode_free(struct tnode *tn)
408 if (!tn) {
409 trie_bug("tnode_free\n");
411 if (IS_LEAF(tn)) {
412 free_leaf((struct leaf *)tn);
413 if (trie_debug > 0 )
414 printk("FL %p \n", tn);
416 else if (IS_TNODE(tn)) {
417 __tnode_free(tn);
418 if (trie_debug > 0 )
419 printk("FT %p \n", tn);
421 else {
422 trie_bug("tnode_free\n");
427 * Check whether a tnode 'n' is "full", i.e. it is an internal node
428 * and no bits are skipped. See discussion in dyntree paper p. 6
431 static inline int tnode_full(struct tnode *tn, struct node *n)
433 if (n == NULL || IS_LEAF(n))
434 return 0;
436 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
439 static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n)
441 tnode_put_child_reorg(tn, i, n, -1);
445 * Add a child at position i overwriting the old value.
446 * Update the value of full_children and empty_children.
449 static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull)
451 struct node *chi;
452 int isfull;
454 if (i >= 1<<tn->bits) {
455 printk("bits=%d, i=%d\n", tn->bits, i);
456 trie_bug("tnode_put_child_reorg bits");
458 write_lock_bh(&fib_lock);
459 chi = tn->child[i];
461 /* update emptyChildren */
462 if (n == NULL && chi != NULL)
463 tn->empty_children++;
464 else if (n != NULL && chi == NULL)
465 tn->empty_children--;
467 /* update fullChildren */
468 if (wasfull == -1)
469 wasfull = tnode_full(tn, chi);
471 isfull = tnode_full(tn, n);
472 if (wasfull && !isfull)
473 tn->full_children--;
475 else if (!wasfull && isfull)
476 tn->full_children++;
477 if (n)
478 NODE_SET_PARENT(n, tn);
480 tn->child[i] = n;
481 write_unlock_bh(&fib_lock);
484 static struct node *resize(struct trie *t, struct tnode *tn)
486 int i;
487 int err = 0;
489 if (!tn)
490 return NULL;
492 if (trie_debug)
493 printk("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
494 tn, inflate_threshold, halve_threshold);
496 /* No children */
497 if (tn->empty_children == tnode_child_length(tn)) {
498 tnode_free(tn);
499 return NULL;
501 /* One child */
502 if (tn->empty_children == tnode_child_length(tn) - 1)
503 for (i = 0; i < tnode_child_length(tn); i++) {
505 write_lock_bh(&fib_lock);
506 if (tn->child[i] != NULL) {
508 /* compress one level */
509 struct node *n = tn->child[i];
510 if (n)
511 NODE_INIT_PARENT(n, NODE_TYPE(n));
513 write_unlock_bh(&fib_lock);
514 tnode_free(tn);
515 return n;
517 write_unlock_bh(&fib_lock);
520 * Double as long as the resulting node has a number of
521 * nonempty nodes that are above the threshold.
525 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
526 * the Helsinki University of Technology and Matti Tikkanen of Nokia
527 * Telecommunications, page 6:
528 * "A node is doubled if the ratio of non-empty children to all
529 * children in the *doubled* node is at least 'high'."
531 * 'high' in this instance is the variable 'inflate_threshold'. It
532 * is expressed as a percentage, so we multiply it with
533 * tnode_child_length() and instead of multiplying by 2 (since the
534 * child array will be doubled by inflate()) and multiplying
535 * the left-hand side by 100 (to handle the percentage thing) we
536 * multiply the left-hand side by 50.
538 * The left-hand side may look a bit weird: tnode_child_length(tn)
539 * - tn->empty_children is of course the number of non-null children
540 * in the current node. tn->full_children is the number of "full"
541 * children, that is non-null tnodes with a skip value of 0.
542 * All of those will be doubled in the resulting inflated tnode, so
543 * we just count them one extra time here.
545 * A clearer way to write this would be:
547 * to_be_doubled = tn->full_children;
548 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
549 * tn->full_children;
551 * new_child_length = tnode_child_length(tn) * 2;
553 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
554 * new_child_length;
555 * if (new_fill_factor >= inflate_threshold)
557 * ...and so on, tho it would mess up the while () loop.
559 * anyway,
560 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
561 * inflate_threshold
563 * avoid a division:
564 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
565 * inflate_threshold * new_child_length
567 * expand not_to_be_doubled and to_be_doubled, and shorten:
568 * 100 * (tnode_child_length(tn) - tn->empty_children +
569 * tn->full_children ) >= inflate_threshold * new_child_length
571 * expand new_child_length:
572 * 100 * (tnode_child_length(tn) - tn->empty_children +
573 * tn->full_children ) >=
574 * inflate_threshold * tnode_child_length(tn) * 2
576 * shorten again:
577 * 50 * (tn->full_children + tnode_child_length(tn) -
578 * tn->empty_children ) >= inflate_threshold *
579 * tnode_child_length(tn)
583 check_tnode(tn);
585 err = 0;
586 while ((tn->full_children > 0 &&
587 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
588 inflate_threshold * tnode_child_length(tn))) {
590 tn = inflate(t, tn, &err);
592 if (err) {
593 #ifdef CONFIG_IP_FIB_TRIE_STATS
594 t->stats.resize_node_skipped++;
595 #endif
596 break;
600 check_tnode(tn);
603 * Halve as long as the number of empty children in this
604 * node is above threshold.
607 err = 0;
608 while (tn->bits > 1 &&
609 100 * (tnode_child_length(tn) - tn->empty_children) <
610 halve_threshold * tnode_child_length(tn)) {
612 tn = halve(t, tn, &err);
614 if (err) {
615 #ifdef CONFIG_IP_FIB_TRIE_STATS
616 t->stats.resize_node_skipped++;
617 #endif
618 break;
623 /* Only one child remains */
625 if (tn->empty_children == tnode_child_length(tn) - 1)
626 for (i = 0; i < tnode_child_length(tn); i++) {
628 write_lock_bh(&fib_lock);
629 if (tn->child[i] != NULL) {
630 /* compress one level */
631 struct node *n = tn->child[i];
633 if (n)
634 NODE_INIT_PARENT(n, NODE_TYPE(n));
636 write_unlock_bh(&fib_lock);
637 tnode_free(tn);
638 return n;
640 write_unlock_bh(&fib_lock);
643 return (struct node *) tn;
646 static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err)
648 struct tnode *inode;
649 struct tnode *oldtnode = tn;
650 int olen = tnode_child_length(tn);
651 int i;
653 if (trie_debug)
654 printk("In inflate\n");
656 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
658 if (!tn) {
659 *err = -ENOMEM;
660 return oldtnode;
664 * Preallocate and store tnodes before the actual work so we
665 * don't get into an inconsistent state if memory allocation
666 * fails. In case of failure we return the oldnode and inflate
667 * of tnode is ignored.
670 for(i = 0; i < olen; i++) {
671 struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i);
673 if (inode &&
674 IS_TNODE(inode) &&
675 inode->pos == oldtnode->pos + oldtnode->bits &&
676 inode->bits > 1) {
677 struct tnode *left, *right;
679 t_key m = TKEY_GET_MASK(inode->pos, 1);
681 left = tnode_new(inode->key&(~m), inode->pos + 1,
682 inode->bits - 1);
684 if (!left) {
685 *err = -ENOMEM;
686 break;
689 right = tnode_new(inode->key|m, inode->pos + 1,
690 inode->bits - 1);
692 if (!right) {
693 *err = -ENOMEM;
694 break;
697 put_child(t, tn, 2*i, (struct node *) left);
698 put_child(t, tn, 2*i+1, (struct node *) right);
702 if (*err) {
703 int size = tnode_child_length(tn);
704 int j;
706 for(j = 0; j < size; j++)
707 if (tn->child[j])
708 tnode_free((struct tnode *)tn->child[j]);
710 tnode_free(tn);
712 *err = -ENOMEM;
713 return oldtnode;
716 for(i = 0; i < olen; i++) {
717 struct node *node = tnode_get_child(oldtnode, i);
719 /* An empty child */
720 if (node == NULL)
721 continue;
723 /* A leaf or an internal node with skipped bits */
725 if (IS_LEAF(node) || ((struct tnode *) node)->pos >
726 tn->pos + tn->bits - 1) {
727 if (tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits,
728 1) == 0)
729 put_child(t, tn, 2*i, node);
730 else
731 put_child(t, tn, 2*i+1, node);
732 continue;
735 /* An internal node with two children */
736 inode = (struct tnode *) node;
738 if (inode->bits == 1) {
739 put_child(t, tn, 2*i, inode->child[0]);
740 put_child(t, tn, 2*i+1, inode->child[1]);
742 tnode_free(inode);
745 /* An internal node with more than two children */
746 else {
747 struct tnode *left, *right;
748 int size, j;
750 /* We will replace this node 'inode' with two new
751 * ones, 'left' and 'right', each with half of the
752 * original children. The two new nodes will have
753 * a position one bit further down the key and this
754 * means that the "significant" part of their keys
755 * (see the discussion near the top of this file)
756 * will differ by one bit, which will be "0" in
757 * left's key and "1" in right's key. Since we are
758 * moving the key position by one step, the bit that
759 * we are moving away from - the bit at position
760 * (inode->pos) - is the one that will differ between
761 * left and right. So... we synthesize that bit in the
762 * two new keys.
763 * The mask 'm' below will be a single "one" bit at
764 * the position (inode->pos)
767 /* Use the old key, but set the new significant
768 * bit to zero.
771 left = (struct tnode *) tnode_get_child(tn, 2*i);
772 put_child(t, tn, 2*i, NULL);
774 if (!left)
775 BUG();
777 right = (struct tnode *) tnode_get_child(tn, 2*i+1);
778 put_child(t, tn, 2*i+1, NULL);
780 if (!right)
781 BUG();
783 size = tnode_child_length(left);
784 for(j = 0; j < size; j++) {
785 put_child(t, left, j, inode->child[j]);
786 put_child(t, right, j, inode->child[j + size]);
788 put_child(t, tn, 2*i, resize(t, left));
789 put_child(t, tn, 2*i+1, resize(t, right));
791 tnode_free(inode);
794 tnode_free(oldtnode);
795 return tn;
798 static struct tnode *halve(struct trie *t, struct tnode *tn, int *err)
800 struct tnode *oldtnode = tn;
801 struct node *left, *right;
802 int i;
803 int olen = tnode_child_length(tn);
805 if (trie_debug) printk("In halve\n");
807 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
809 if (!tn) {
810 *err = -ENOMEM;
811 return oldtnode;
815 * Preallocate and store tnodes before the actual work so we
816 * don't get into an inconsistent state if memory allocation
817 * fails. In case of failure we return the oldnode and halve
818 * of tnode is ignored.
821 for(i = 0; i < olen; i += 2) {
822 left = tnode_get_child(oldtnode, i);
823 right = tnode_get_child(oldtnode, i+1);
825 /* Two nonempty children */
826 if (left && right) {
827 struct tnode *newBinNode =
828 tnode_new(left->key, tn->pos + tn->bits, 1);
830 if (!newBinNode) {
831 *err = -ENOMEM;
832 break;
834 put_child(t, tn, i/2, (struct node *)newBinNode);
838 if (*err) {
839 int size = tnode_child_length(tn);
840 int j;
842 for(j = 0; j < size; j++)
843 if (tn->child[j])
844 tnode_free((struct tnode *)tn->child[j]);
846 tnode_free(tn);
848 *err = -ENOMEM;
849 return oldtnode;
852 for(i = 0; i < olen; i += 2) {
853 left = tnode_get_child(oldtnode, i);
854 right = tnode_get_child(oldtnode, i+1);
856 /* At least one of the children is empty */
857 if (left == NULL) {
858 if (right == NULL) /* Both are empty */
859 continue;
860 put_child(t, tn, i/2, right);
861 } else if (right == NULL)
862 put_child(t, tn, i/2, left);
864 /* Two nonempty children */
865 else {
866 struct tnode *newBinNode =
867 (struct tnode *) tnode_get_child(tn, i/2);
868 put_child(t, tn, i/2, NULL);
870 if (!newBinNode)
871 BUG();
873 put_child(t, newBinNode, 0, left);
874 put_child(t, newBinNode, 1, right);
875 put_child(t, tn, i/2, resize(t, newBinNode));
878 tnode_free(oldtnode);
879 return tn;
882 static void *trie_init(struct trie *t)
884 if (t) {
885 t->size = 0;
886 t->trie = NULL;
887 t->revision = 0;
888 #ifdef CONFIG_IP_FIB_TRIE_STATS
889 memset(&t->stats, 0, sizeof(struct trie_use_stats));
890 #endif
892 return t;
895 static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen)
897 struct hlist_node *node;
898 struct leaf_info *li;
900 hlist_for_each_entry(li, node, head, hlist) {
901 if (li->plen == plen)
902 return li;
904 return NULL;
907 static inline struct list_head * get_fa_head(struct leaf *l, int plen)
909 struct list_head *fa_head = NULL;
910 struct leaf_info *li = find_leaf_info(&l->list, plen);
912 if (li)
913 fa_head = &li->falh;
915 return fa_head;
918 static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
920 struct leaf_info *li = NULL, *last = NULL;
921 struct hlist_node *node, *tmp;
923 write_lock_bh(&fib_lock);
925 if (hlist_empty(head))
926 hlist_add_head(&new->hlist, head);
927 else {
928 hlist_for_each_entry_safe(li, node, tmp, head, hlist) {
930 if (new->plen > li->plen)
931 break;
933 last = li;
935 if (last)
936 hlist_add_after(&last->hlist, &new->hlist);
937 else
938 hlist_add_before(&new->hlist, &li->hlist);
940 write_unlock_bh(&fib_lock);
943 static struct leaf *
944 fib_find_node(struct trie *t, u32 key)
946 int pos;
947 struct tnode *tn;
948 struct node *n;
950 pos = 0;
951 n = t->trie;
953 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
954 tn = (struct tnode *) n;
956 check_tnode(tn);
958 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
959 pos=tn->pos + tn->bits;
960 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
962 else
963 break;
965 /* Case we have found a leaf. Compare prefixes */
967 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
968 struct leaf *l = (struct leaf *) n;
969 return l;
971 return NULL;
974 static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
976 int i = 0;
977 int wasfull;
978 t_key cindex, key;
979 struct tnode *tp = NULL;
981 if (!tn)
982 BUG();
984 key = tn->key;
985 i = 0;
987 while (tn != NULL && NODE_PARENT(tn) != NULL) {
989 if (i > 10) {
990 printk("Rebalance tn=%p \n", tn);
991 if (tn) printk("tn->parent=%p \n", NODE_PARENT(tn));
993 printk("Rebalance tp=%p \n", tp);
994 if (tp) printk("tp->parent=%p \n", NODE_PARENT(tp));
997 if (i > 12) BUG();
998 i++;
1000 tp = NODE_PARENT(tn);
1001 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1002 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
1003 tn = (struct tnode *) resize (t, (struct tnode *)tn);
1004 tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull);
1006 if (!NODE_PARENT(tn))
1007 break;
1009 tn = NODE_PARENT(tn);
1011 /* Handle last (top) tnode */
1012 if (IS_TNODE(tn))
1013 tn = (struct tnode*) resize(t, (struct tnode *)tn);
1015 return (struct node*) tn;
1018 static struct list_head *
1019 fib_insert_node(struct trie *t, int *err, u32 key, int plen)
1021 int pos, newpos;
1022 struct tnode *tp = NULL, *tn = NULL;
1023 struct node *n;
1024 struct leaf *l;
1025 int missbit;
1026 struct list_head *fa_head = NULL;
1027 struct leaf_info *li;
1028 t_key cindex;
1030 pos = 0;
1031 n = t->trie;
1033 /* If we point to NULL, stop. Either the tree is empty and we should
1034 * just put a new leaf in if, or we have reached an empty child slot,
1035 * and we should just put our new leaf in that.
1036 * If we point to a T_TNODE, check if it matches our key. Note that
1037 * a T_TNODE might be skipping any number of bits - its 'pos' need
1038 * not be the parent's 'pos'+'bits'!
1040 * If it does match the current key, get pos/bits from it, extract
1041 * the index from our key, push the T_TNODE and walk the tree.
1043 * If it doesn't, we have to replace it with a new T_TNODE.
1045 * If we point to a T_LEAF, it might or might not have the same key
1046 * as we do. If it does, just change the value, update the T_LEAF's
1047 * value, and return it.
1048 * If it doesn't, we need to replace it with a T_TNODE.
1051 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
1052 tn = (struct tnode *) n;
1054 check_tnode(tn);
1056 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
1057 tp = tn;
1058 pos=tn->pos + tn->bits;
1059 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
1061 if (n && NODE_PARENT(n) != tn) {
1062 printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n));
1063 BUG();
1066 else
1067 break;
1071 * n ----> NULL, LEAF or TNODE
1073 * tp is n's (parent) ----> NULL or TNODE
1076 if (tp && IS_LEAF(tp))
1077 BUG();
1080 /* Case 1: n is a leaf. Compare prefixes */
1082 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
1083 struct leaf *l = ( struct leaf *) n;
1085 li = leaf_info_new(plen);
1087 if (!li) {
1088 *err = -ENOMEM;
1089 goto err;
1092 fa_head = &li->falh;
1093 insert_leaf_info(&l->list, li);
1094 goto done;
1096 t->size++;
1097 l = leaf_new();
1099 if (!l) {
1100 *err = -ENOMEM;
1101 goto err;
1104 l->key = key;
1105 li = leaf_info_new(plen);
1107 if (!li) {
1108 tnode_free((struct tnode *) l);
1109 *err = -ENOMEM;
1110 goto err;
1113 fa_head = &li->falh;
1114 insert_leaf_info(&l->list, li);
1116 /* Case 2: n is NULL, and will just insert a new leaf */
1117 if (t->trie && n == NULL) {
1119 NODE_SET_PARENT(l, tp);
1121 if (!tp)
1122 BUG();
1124 else {
1125 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1126 put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
1129 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
1130 else {
1132 * Add a new tnode here
1133 * first tnode need some special handling
1136 if (tp)
1137 pos=tp->pos+tp->bits;
1138 else
1139 pos=0;
1140 if (n) {
1141 newpos = tkey_mismatch(key, pos, n->key);
1142 tn = tnode_new(n->key, newpos, 1);
1144 else {
1145 newpos = 0;
1146 tn = tnode_new(key, newpos, 1); /* First tnode */
1149 if (!tn) {
1150 free_leaf_info(li);
1151 tnode_free((struct tnode *) l);
1152 *err = -ENOMEM;
1153 goto err;
1156 NODE_SET_PARENT(tn, tp);
1158 missbit=tkey_extract_bits(key, newpos, 1);
1159 put_child(t, tn, missbit, (struct node *)l);
1160 put_child(t, tn, 1-missbit, n);
1162 if (tp) {
1163 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1164 put_child(t, (struct tnode *)tp, cindex, (struct node *)tn);
1166 else {
1167 t->trie = (struct node*) tn; /* First tnode */
1168 tp = tn;
1171 if (tp && tp->pos+tp->bits > 32) {
1172 printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
1173 tp, tp->pos, tp->bits, key, plen);
1175 /* Rebalance the trie */
1176 t->trie = trie_rebalance(t, tp);
1177 done:
1178 t->revision++;
1179 err:;
1180 return fa_head;
1183 static int
1184 fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1185 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
1187 struct trie *t = (struct trie *) tb->tb_data;
1188 struct fib_alias *fa, *new_fa;
1189 struct list_head *fa_head = NULL;
1190 struct fib_info *fi;
1191 int plen = r->rtm_dst_len;
1192 int type = r->rtm_type;
1193 u8 tos = r->rtm_tos;
1194 u32 key, mask;
1195 int err;
1196 struct leaf *l;
1198 if (plen > 32)
1199 return -EINVAL;
1201 key = 0;
1202 if (rta->rta_dst)
1203 memcpy(&key, rta->rta_dst, 4);
1205 key = ntohl(key);
1207 if (trie_debug)
1208 printk("Insert table=%d %08x/%d\n", tb->tb_id, key, plen);
1210 mask = ntohl( inet_make_mask(plen) );
1212 if (key & ~mask)
1213 return -EINVAL;
1215 key = key & mask;
1217 if ((fi = fib_create_info(r, rta, nlhdr, &err)) == NULL)
1218 goto err;
1220 l = fib_find_node(t, key);
1221 fa = NULL;
1223 if (l) {
1224 fa_head = get_fa_head(l, plen);
1225 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1228 /* Now fa, if non-NULL, points to the first fib alias
1229 * with the same keys [prefix,tos,priority], if such key already
1230 * exists or to the node before which we will insert new one.
1232 * If fa is NULL, we will need to allocate a new one and
1233 * insert to the head of f.
1235 * If f is NULL, no fib node matched the destination key
1236 * and we need to allocate a new one of those as well.
1239 if (fa &&
1240 fa->fa_info->fib_priority == fi->fib_priority) {
1241 struct fib_alias *fa_orig;
1243 err = -EEXIST;
1244 if (nlhdr->nlmsg_flags & NLM_F_EXCL)
1245 goto out;
1247 if (nlhdr->nlmsg_flags & NLM_F_REPLACE) {
1248 struct fib_info *fi_drop;
1249 u8 state;
1251 write_lock_bh(&fib_lock);
1253 fi_drop = fa->fa_info;
1254 fa->fa_info = fi;
1255 fa->fa_type = type;
1256 fa->fa_scope = r->rtm_scope;
1257 state = fa->fa_state;
1258 fa->fa_state &= ~FA_S_ACCESSED;
1260 write_unlock_bh(&fib_lock);
1262 fib_release_info(fi_drop);
1263 if (state & FA_S_ACCESSED)
1264 rt_cache_flush(-1);
1266 goto succeeded;
1268 /* Error if we find a perfect match which
1269 * uses the same scope, type, and nexthop
1270 * information.
1272 fa_orig = fa;
1273 list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) {
1274 if (fa->fa_tos != tos)
1275 break;
1276 if (fa->fa_info->fib_priority != fi->fib_priority)
1277 break;
1278 if (fa->fa_type == type &&
1279 fa->fa_scope == r->rtm_scope &&
1280 fa->fa_info == fi) {
1281 goto out;
1284 if (!(nlhdr->nlmsg_flags & NLM_F_APPEND))
1285 fa = fa_orig;
1287 err = -ENOENT;
1288 if (!(nlhdr->nlmsg_flags&NLM_F_CREATE))
1289 goto out;
1291 err = -ENOBUFS;
1292 new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
1293 if (new_fa == NULL)
1294 goto out;
1296 new_fa->fa_info = fi;
1297 new_fa->fa_tos = tos;
1298 new_fa->fa_type = type;
1299 new_fa->fa_scope = r->rtm_scope;
1300 new_fa->fa_state = 0;
1301 #if 0
1302 new_fa->dst = NULL;
1303 #endif
1305 * Insert new entry to the list.
1308 if (!fa_head) {
1309 fa_head = fib_insert_node(t, &err, key, plen);
1310 err = 0;
1311 if (err)
1312 goto out_free_new_fa;
1315 write_lock_bh(&fib_lock);
1317 list_add_tail(&new_fa->fa_list,
1318 (fa ? &fa->fa_list : fa_head));
1320 write_unlock_bh(&fib_lock);
1322 rt_cache_flush(-1);
1323 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req);
1324 succeeded:
1325 return 0;
1327 out_free_new_fa:
1328 kmem_cache_free(fn_alias_kmem, new_fa);
1329 out:
1330 fib_release_info(fi);
1331 err:;
1332 return err;
1335 static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *plen, const struct flowi *flp,
1336 struct fib_result *res, int *err)
1338 int i;
1339 t_key mask;
1340 struct leaf_info *li;
1341 struct hlist_head *hhead = &l->list;
1342 struct hlist_node *node;
1344 hlist_for_each_entry(li, node, hhead, hlist) {
1346 i = li->plen;
1347 mask = ntohl(inet_make_mask(i));
1348 if (l->key != (key & mask))
1349 continue;
1351 if (((*err) = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) == 0) {
1352 *plen = i;
1353 #ifdef CONFIG_IP_FIB_TRIE_STATS
1354 t->stats.semantic_match_passed++;
1355 #endif
1356 return 1;
1358 #ifdef CONFIG_IP_FIB_TRIE_STATS
1359 t->stats.semantic_match_miss++;
1360 #endif
1362 return 0;
1365 static int
1366 fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1368 struct trie *t = (struct trie *) tb->tb_data;
1369 int plen, ret = 0;
1370 struct node *n;
1371 struct tnode *pn;
1372 int pos, bits;
1373 t_key key=ntohl(flp->fl4_dst);
1374 int chopped_off;
1375 t_key cindex = 0;
1376 int current_prefix_length = KEYLENGTH;
1377 n = t->trie;
1379 read_lock(&fib_lock);
1380 if (!n)
1381 goto failed;
1383 #ifdef CONFIG_IP_FIB_TRIE_STATS
1384 t->stats.gets++;
1385 #endif
1387 /* Just a leaf? */
1388 if (IS_LEAF(n)) {
1389 if (check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret))
1390 goto found;
1391 goto failed;
1393 pn = (struct tnode *) n;
1394 chopped_off = 0;
1396 while (pn) {
1398 pos = pn->pos;
1399 bits = pn->bits;
1401 if (!chopped_off)
1402 cindex = tkey_extract_bits(MASK_PFX(key, current_prefix_length), pos, bits);
1404 n = tnode_get_child(pn, cindex);
1406 if (n == NULL) {
1407 #ifdef CONFIG_IP_FIB_TRIE_STATS
1408 t->stats.null_node_hit++;
1409 #endif
1410 goto backtrace;
1413 if (IS_TNODE(n)) {
1414 #define HL_OPTIMIZE
1415 #ifdef HL_OPTIMIZE
1416 struct tnode *cn = (struct tnode *)n;
1417 t_key node_prefix, key_prefix, pref_mismatch;
1418 int mp;
1421 * It's a tnode, and we can do some extra checks here if we
1422 * like, to avoid descending into a dead-end branch.
1423 * This tnode is in the parent's child array at index
1424 * key[p_pos..p_pos+p_bits] but potentially with some bits
1425 * chopped off, so in reality the index may be just a
1426 * subprefix, padded with zero at the end.
1427 * We can also take a look at any skipped bits in this
1428 * tnode - everything up to p_pos is supposed to be ok,
1429 * and the non-chopped bits of the index (se previous
1430 * paragraph) are also guaranteed ok, but the rest is
1431 * considered unknown.
1433 * The skipped bits are key[pos+bits..cn->pos].
1436 /* If current_prefix_length < pos+bits, we are already doing
1437 * actual prefix matching, which means everything from
1438 * pos+(bits-chopped_off) onward must be zero along some
1439 * branch of this subtree - otherwise there is *no* valid
1440 * prefix present. Here we can only check the skipped
1441 * bits. Remember, since we have already indexed into the
1442 * parent's child array, we know that the bits we chopped of
1443 * *are* zero.
1446 /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */
1448 if (current_prefix_length < pos+bits) {
1449 if (tkey_extract_bits(cn->key, current_prefix_length,
1450 cn->pos - current_prefix_length) != 0 ||
1451 !(cn->child[0]))
1452 goto backtrace;
1456 * If chopped_off=0, the index is fully validated and we
1457 * only need to look at the skipped bits for this, the new,
1458 * tnode. What we actually want to do is to find out if
1459 * these skipped bits match our key perfectly, or if we will
1460 * have to count on finding a matching prefix further down,
1461 * because if we do, we would like to have some way of
1462 * verifying the existence of such a prefix at this point.
1465 /* The only thing we can do at this point is to verify that
1466 * any such matching prefix can indeed be a prefix to our
1467 * key, and if the bits in the node we are inspecting that
1468 * do not match our key are not ZERO, this cannot be true.
1469 * Thus, find out where there is a mismatch (before cn->pos)
1470 * and verify that all the mismatching bits are zero in the
1471 * new tnode's key.
1474 /* Note: We aren't very concerned about the piece of the key
1475 * that precede pn->pos+pn->bits, since these have already been
1476 * checked. The bits after cn->pos aren't checked since these are
1477 * by definition "unknown" at this point. Thus, what we want to
1478 * see is if we are about to enter the "prefix matching" state,
1479 * and in that case verify that the skipped bits that will prevail
1480 * throughout this subtree are zero, as they have to be if we are
1481 * to find a matching prefix.
1484 node_prefix = MASK_PFX(cn->key, cn->pos);
1485 key_prefix = MASK_PFX(key, cn->pos);
1486 pref_mismatch = key_prefix^node_prefix;
1487 mp = 0;
1489 /* In short: If skipped bits in this node do not match the search
1490 * key, enter the "prefix matching" state.directly.
1492 if (pref_mismatch) {
1493 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
1494 mp++;
1495 pref_mismatch = pref_mismatch <<1;
1497 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1499 if (key_prefix != 0)
1500 goto backtrace;
1502 if (current_prefix_length >= cn->pos)
1503 current_prefix_length=mp;
1505 #endif
1506 pn = (struct tnode *)n; /* Descend */
1507 chopped_off = 0;
1508 continue;
1510 if (IS_LEAF(n)) {
1511 if (check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret))
1512 goto found;
1514 backtrace:
1515 chopped_off++;
1517 /* As zero don't change the child key (cindex) */
1518 while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1)))) {
1519 chopped_off++;
1522 /* Decrease current_... with bits chopped off */
1523 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
1524 current_prefix_length = pn->pos + pn->bits - chopped_off;
1527 * Either we do the actual chop off according or if we have
1528 * chopped off all bits in this tnode walk up to our parent.
1531 if (chopped_off <= pn->bits)
1532 cindex &= ~(1 << (chopped_off-1));
1533 else {
1534 if (NODE_PARENT(pn) == NULL)
1535 goto failed;
1537 /* Get Child's index */
1538 cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits);
1539 pn = NODE_PARENT(pn);
1540 chopped_off = 0;
1542 #ifdef CONFIG_IP_FIB_TRIE_STATS
1543 t->stats.backtrack++;
1544 #endif
1545 goto backtrace;
1548 failed:
1549 ret = 1;
1550 found:
1551 read_unlock(&fib_lock);
1552 return ret;
1555 static int trie_leaf_remove(struct trie *t, t_key key)
1557 t_key cindex;
1558 struct tnode *tp = NULL;
1559 struct node *n = t->trie;
1560 struct leaf *l;
1562 if (trie_debug)
1563 printk("entering trie_leaf_remove(%p)\n", n);
1565 /* Note that in the case skipped bits, those bits are *not* checked!
1566 * When we finish this, we will have NULL or a T_LEAF, and the
1567 * T_LEAF may or may not match our key.
1570 while (n != NULL && IS_TNODE(n)) {
1571 struct tnode *tn = (struct tnode *) n;
1572 check_tnode(tn);
1573 n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits));
1575 if (n && NODE_PARENT(n) != tn) {
1576 printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n));
1577 BUG();
1580 l = (struct leaf *) n;
1582 if (!n || !tkey_equals(l->key, key))
1583 return 0;
1586 * Key found.
1587 * Remove the leaf and rebalance the tree
1590 t->revision++;
1591 t->size--;
1593 tp = NODE_PARENT(n);
1594 tnode_free((struct tnode *) n);
1596 if (tp) {
1597 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1598 put_child(t, (struct tnode *)tp, cindex, NULL);
1599 t->trie = trie_rebalance(t, tp);
1601 else
1602 t->trie = NULL;
1604 return 1;
1607 static int
1608 fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1609 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
1611 struct trie *t = (struct trie *) tb->tb_data;
1612 u32 key, mask;
1613 int plen = r->rtm_dst_len;
1614 u8 tos = r->rtm_tos;
1615 struct fib_alias *fa, *fa_to_delete;
1616 struct list_head *fa_head;
1617 struct leaf *l;
1619 if (plen > 32)
1620 return -EINVAL;
1622 key = 0;
1623 if (rta->rta_dst)
1624 memcpy(&key, rta->rta_dst, 4);
1626 key = ntohl(key);
1627 mask = ntohl( inet_make_mask(plen) );
1629 if (key & ~mask)
1630 return -EINVAL;
1632 key = key & mask;
1633 l = fib_find_node(t, key);
1635 if (!l)
1636 return -ESRCH;
1638 fa_head = get_fa_head(l, plen);
1639 fa = fib_find_alias(fa_head, tos, 0);
1641 if (!fa)
1642 return -ESRCH;
1644 if (trie_debug)
1645 printk("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
1647 fa_to_delete = NULL;
1648 fa_head = fa->fa_list.prev;
1649 list_for_each_entry(fa, fa_head, fa_list) {
1650 struct fib_info *fi = fa->fa_info;
1652 if (fa->fa_tos != tos)
1653 break;
1655 if ((!r->rtm_type ||
1656 fa->fa_type == r->rtm_type) &&
1657 (r->rtm_scope == RT_SCOPE_NOWHERE ||
1658 fa->fa_scope == r->rtm_scope) &&
1659 (!r->rtm_protocol ||
1660 fi->fib_protocol == r->rtm_protocol) &&
1661 fib_nh_match(r, nlhdr, rta, fi) == 0) {
1662 fa_to_delete = fa;
1663 break;
1667 if (fa_to_delete) {
1668 int kill_li = 0;
1669 struct leaf_info *li;
1671 fa = fa_to_delete;
1672 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
1674 l = fib_find_node(t, key);
1675 li = find_leaf_info(&l->list, plen);
1677 write_lock_bh(&fib_lock);
1679 list_del(&fa->fa_list);
1681 if (list_empty(fa_head)) {
1682 hlist_del(&li->hlist);
1683 kill_li = 1;
1685 write_unlock_bh(&fib_lock);
1687 if (kill_li)
1688 free_leaf_info(li);
1690 if (hlist_empty(&l->list))
1691 trie_leaf_remove(t, key);
1693 if (fa->fa_state & FA_S_ACCESSED)
1694 rt_cache_flush(-1);
1696 fn_free_alias(fa);
1697 return 0;
1699 return -ESRCH;
1702 static int trie_flush_list(struct trie *t, struct list_head *head)
1704 struct fib_alias *fa, *fa_node;
1705 int found = 0;
1707 list_for_each_entry_safe(fa, fa_node, head, fa_list) {
1708 struct fib_info *fi = fa->fa_info;
1710 if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
1712 write_lock_bh(&fib_lock);
1713 list_del(&fa->fa_list);
1714 write_unlock_bh(&fib_lock);
1716 fn_free_alias(fa);
1717 found++;
1720 return found;
1723 static int trie_flush_leaf(struct trie *t, struct leaf *l)
1725 int found = 0;
1726 struct hlist_head *lih = &l->list;
1727 struct hlist_node *node, *tmp;
1728 struct leaf_info *li = NULL;
1730 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
1732 found += trie_flush_list(t, &li->falh);
1734 if (list_empty(&li->falh)) {
1736 write_lock_bh(&fib_lock);
1737 hlist_del(&li->hlist);
1738 write_unlock_bh(&fib_lock);
1740 free_leaf_info(li);
1743 return found;
1746 static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
1748 struct node *c = (struct node *) thisleaf;
1749 struct tnode *p;
1750 int idx;
1752 if (c == NULL) {
1753 if (t->trie == NULL)
1754 return NULL;
1756 if (IS_LEAF(t->trie)) /* trie w. just a leaf */
1757 return (struct leaf *) t->trie;
1759 p = (struct tnode*) t->trie; /* Start */
1761 else
1762 p = (struct tnode *) NODE_PARENT(c);
1764 while (p) {
1765 int pos, last;
1767 /* Find the next child of the parent */
1768 if (c)
1769 pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits);
1770 else
1771 pos = 0;
1773 last = 1 << p->bits;
1774 for(idx = pos; idx < last ; idx++) {
1775 if (p->child[idx]) {
1777 /* Decend if tnode */
1779 while (IS_TNODE(p->child[idx])) {
1780 p = (struct tnode*) p->child[idx];
1781 idx = 0;
1783 /* Rightmost non-NULL branch */
1784 if (p && IS_TNODE(p))
1785 while (p->child[idx] == NULL && idx < (1 << p->bits)) idx++;
1787 /* Done with this tnode? */
1788 if (idx >= (1 << p->bits) || p->child[idx] == NULL )
1789 goto up;
1791 return (struct leaf*) p->child[idx];
1795 /* No more children go up one step */
1796 c = (struct node*) p;
1797 p = (struct tnode *) NODE_PARENT(p);
1799 return NULL; /* Ready. Root of trie */
1802 static int fn_trie_flush(struct fib_table *tb)
1804 struct trie *t = (struct trie *) tb->tb_data;
1805 struct leaf *ll = NULL, *l = NULL;
1806 int found = 0, h;
1808 t->revision++;
1810 for (h=0; (l = nextleaf(t, l)) != NULL; h++) {
1811 found += trie_flush_leaf(t, l);
1813 if (ll && hlist_empty(&ll->list))
1814 trie_leaf_remove(t, ll->key);
1815 ll = l;
1818 if (ll && hlist_empty(&ll->list))
1819 trie_leaf_remove(t, ll->key);
1821 if (trie_debug)
1822 printk("trie_flush found=%d\n", found);
1823 return found;
1826 static int trie_last_dflt=-1;
1828 static void
1829 fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1831 struct trie *t = (struct trie *) tb->tb_data;
1832 int order, last_idx;
1833 struct fib_info *fi = NULL;
1834 struct fib_info *last_resort;
1835 struct fib_alias *fa = NULL;
1836 struct list_head *fa_head;
1837 struct leaf *l;
1839 last_idx = -1;
1840 last_resort = NULL;
1841 order = -1;
1843 read_lock(&fib_lock);
1845 l = fib_find_node(t, 0);
1846 if (!l)
1847 goto out;
1849 fa_head = get_fa_head(l, 0);
1850 if (!fa_head)
1851 goto out;
1853 if (list_empty(fa_head))
1854 goto out;
1856 list_for_each_entry(fa, fa_head, fa_list) {
1857 struct fib_info *next_fi = fa->fa_info;
1859 if (fa->fa_scope != res->scope ||
1860 fa->fa_type != RTN_UNICAST)
1861 continue;
1863 if (next_fi->fib_priority > res->fi->fib_priority)
1864 break;
1865 if (!next_fi->fib_nh[0].nh_gw ||
1866 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1867 continue;
1868 fa->fa_state |= FA_S_ACCESSED;
1870 if (fi == NULL) {
1871 if (next_fi != res->fi)
1872 break;
1873 } else if (!fib_detect_death(fi, order, &last_resort,
1874 &last_idx, &trie_last_dflt)) {
1875 if (res->fi)
1876 fib_info_put(res->fi);
1877 res->fi = fi;
1878 atomic_inc(&fi->fib_clntref);
1879 trie_last_dflt = order;
1880 goto out;
1882 fi = next_fi;
1883 order++;
1885 if (order <= 0 || fi == NULL) {
1886 trie_last_dflt = -1;
1887 goto out;
1890 if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) {
1891 if (res->fi)
1892 fib_info_put(res->fi);
1893 res->fi = fi;
1894 atomic_inc(&fi->fib_clntref);
1895 trie_last_dflt = order;
1896 goto out;
1898 if (last_idx >= 0) {
1899 if (res->fi)
1900 fib_info_put(res->fi);
1901 res->fi = last_resort;
1902 if (last_resort)
1903 atomic_inc(&last_resort->fib_clntref);
1905 trie_last_dflt = last_idx;
1906 out:;
1907 read_unlock(&fib_lock);
1910 static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb,
1911 struct sk_buff *skb, struct netlink_callback *cb)
1913 int i, s_i;
1914 struct fib_alias *fa;
1916 u32 xkey=htonl(key);
1918 s_i=cb->args[3];
1919 i = 0;
1921 list_for_each_entry(fa, fah, fa_list) {
1922 if (i < s_i) {
1923 i++;
1924 continue;
1926 if (fa->fa_info->fib_nh == NULL) {
1927 printk("Trie error _fib_nh=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
1928 i++;
1929 continue;
1931 if (fa->fa_info == NULL) {
1932 printk("Trie error fa_info=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
1933 i++;
1934 continue;
1937 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
1938 cb->nlh->nlmsg_seq,
1939 RTM_NEWROUTE,
1940 tb->tb_id,
1941 fa->fa_type,
1942 fa->fa_scope,
1943 &xkey,
1944 plen,
1945 fa->fa_tos,
1946 fa->fa_info, 0) < 0) {
1947 cb->args[3] = i;
1948 return -1;
1950 i++;
1952 cb->args[3]=i;
1953 return skb->len;
1956 static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb,
1957 struct netlink_callback *cb)
1959 int h, s_h;
1960 struct list_head *fa_head;
1961 struct leaf *l = NULL;
1962 s_h=cb->args[2];
1964 for (h=0; (l = nextleaf(t, l)) != NULL; h++) {
1966 if (h < s_h)
1967 continue;
1968 if (h > s_h)
1969 memset(&cb->args[3], 0,
1970 sizeof(cb->args) - 3*sizeof(cb->args[0]));
1972 fa_head = get_fa_head(l, plen);
1974 if (!fa_head)
1975 continue;
1977 if (list_empty(fa_head))
1978 continue;
1980 if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
1981 cb->args[2]=h;
1982 return -1;
1985 cb->args[2]=h;
1986 return skb->len;
1989 static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
1991 int m, s_m;
1992 struct trie *t = (struct trie *) tb->tb_data;
1994 s_m = cb->args[1];
1996 read_lock(&fib_lock);
1997 for (m=0; m<=32; m++) {
1999 if (m < s_m)
2000 continue;
2001 if (m > s_m)
2002 memset(&cb->args[2], 0,
2003 sizeof(cb->args) - 2*sizeof(cb->args[0]));
2005 if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
2006 cb->args[1] = m;
2007 goto out;
2010 read_unlock(&fib_lock);
2011 cb->args[1] = m;
2012 return skb->len;
2013 out:
2014 read_unlock(&fib_lock);
2015 return -1;
2018 /* Fix more generic FIB names for init later */
2020 #ifdef CONFIG_IP_MULTIPLE_TABLES
2021 struct fib_table * fib_hash_init(int id)
2022 #else
2023 struct fib_table * __init fib_hash_init(int id)
2024 #endif
2026 struct fib_table *tb;
2027 struct trie *t;
2029 if (fn_alias_kmem == NULL)
2030 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
2031 sizeof(struct fib_alias),
2032 0, SLAB_HWCACHE_ALIGN,
2033 NULL, NULL);
2035 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
2036 GFP_KERNEL);
2037 if (tb == NULL)
2038 return NULL;
2040 tb->tb_id = id;
2041 tb->tb_lookup = fn_trie_lookup;
2042 tb->tb_insert = fn_trie_insert;
2043 tb->tb_delete = fn_trie_delete;
2044 tb->tb_flush = fn_trie_flush;
2045 tb->tb_select_default = fn_trie_select_default;
2046 tb->tb_dump = fn_trie_dump;
2047 memset(tb->tb_data, 0, sizeof(struct trie));
2049 t = (struct trie *) tb->tb_data;
2051 trie_init(t);
2053 if (id == RT_TABLE_LOCAL)
2054 trie_local = t;
2055 else if (id == RT_TABLE_MAIN)
2056 trie_main = t;
2058 if (id == RT_TABLE_LOCAL)
2059 printk("IPv4 FIB: Using LC-trie version %s\n", VERSION);
2061 return tb;
2064 /* Trie dump functions */
2066 static void putspace_seq(struct seq_file *seq, int n)
2068 while (n--) seq_printf(seq, " ");
2071 static void printbin_seq(struct seq_file *seq, unsigned int v, int bits)
2073 while (bits--)
2074 seq_printf(seq, "%s", (v & (1<<bits))?"1":"0");
2077 static void printnode_seq(struct seq_file *seq, int indent, struct node *n,
2078 int pend, int cindex, int bits)
2080 putspace_seq(seq, indent);
2081 if (IS_LEAF(n))
2082 seq_printf(seq, "|");
2083 else
2084 seq_printf(seq, "+");
2085 if (bits) {
2086 seq_printf(seq, "%d/", cindex);
2087 printbin_seq(seq, cindex, bits);
2088 seq_printf(seq, ": ");
2090 else
2091 seq_printf(seq, "<root>: ");
2092 seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n);
2094 if (IS_LEAF(n))
2095 seq_printf(seq, "key=%d.%d.%d.%d\n",
2096 n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256);
2097 else {
2098 int plen = ((struct tnode *)n)->pos;
2099 t_key prf=MASK_PFX(n->key, plen);
2100 seq_printf(seq, "key=%d.%d.%d.%d/%d\n",
2101 prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen);
2103 if (IS_LEAF(n)) {
2104 struct leaf *l=(struct leaf *)n;
2105 struct fib_alias *fa;
2106 int i;
2107 for (i=32; i>=0; i--)
2108 if (find_leaf_info(&l->list, i)) {
2110 struct list_head *fa_head = get_fa_head(l, i);
2112 if (!fa_head)
2113 continue;
2115 if (list_empty(fa_head))
2116 continue;
2118 putspace_seq(seq, indent+2);
2119 seq_printf(seq, "{/%d...dumping}\n", i);
2122 list_for_each_entry(fa, fa_head, fa_list) {
2123 putspace_seq(seq, indent+2);
2124 if (fa->fa_info->fib_nh == NULL) {
2125 seq_printf(seq, "Error _fib_nh=NULL\n");
2126 continue;
2128 if (fa->fa_info == NULL) {
2129 seq_printf(seq, "Error fa_info=NULL\n");
2130 continue;
2133 seq_printf(seq, "{type=%d scope=%d TOS=%d}\n",
2134 fa->fa_type,
2135 fa->fa_scope,
2136 fa->fa_tos);
2140 else if (IS_TNODE(n)) {
2141 struct tnode *tn = (struct tnode *)n;
2142 putspace_seq(seq, indent); seq_printf(seq, "| ");
2143 seq_printf(seq, "{key prefix=%08x/", tn->key&TKEY_GET_MASK(0, tn->pos));
2144 printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos);
2145 seq_printf(seq, "}\n");
2146 putspace_seq(seq, indent); seq_printf(seq, "| ");
2147 seq_printf(seq, "{pos=%d", tn->pos);
2148 seq_printf(seq, " (skip=%d bits)", tn->pos - pend);
2149 seq_printf(seq, " bits=%d (%u children)}\n", tn->bits, (1 << tn->bits));
2150 putspace_seq(seq, indent); seq_printf(seq, "| ");
2151 seq_printf(seq, "{empty=%d full=%d}\n", tn->empty_children, tn->full_children);
2155 static void trie_dump_seq(struct seq_file *seq, struct trie *t)
2157 struct node *n = t->trie;
2158 int cindex=0;
2159 int indent=1;
2160 int pend=0;
2161 int depth = 0;
2163 read_lock(&fib_lock);
2165 seq_printf(seq, "------ trie_dump of t=%p ------\n", t);
2166 if (n) {
2167 printnode_seq(seq, indent, n, pend, cindex, 0);
2168 if (IS_TNODE(n)) {
2169 struct tnode *tn = (struct tnode *)n;
2170 pend = tn->pos+tn->bits;
2171 putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
2172 indent += 3;
2173 depth++;
2175 while (tn && cindex < (1 << tn->bits)) {
2176 if (tn->child[cindex]) {
2178 /* Got a child */
2180 printnode_seq(seq, indent, tn->child[cindex], pend, cindex, tn->bits);
2181 if (IS_LEAF(tn->child[cindex])) {
2182 cindex++;
2185 else {
2187 * New tnode. Decend one level
2190 depth++;
2191 n = tn->child[cindex];
2192 tn = (struct tnode *)n;
2193 pend = tn->pos+tn->bits;
2194 putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
2195 indent+=3;
2196 cindex=0;
2199 else
2200 cindex++;
2203 * Test if we are done
2206 while (cindex >= (1 << tn->bits)) {
2209 * Move upwards and test for root
2210 * pop off all traversed nodes
2213 if (NODE_PARENT(tn) == NULL) {
2214 tn = NULL;
2215 n = NULL;
2216 break;
2218 else {
2219 cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
2220 tn = NODE_PARENT(tn);
2221 cindex++;
2222 n = (struct node *)tn;
2223 pend = tn->pos+tn->bits;
2224 indent-=3;
2225 depth--;
2230 else n = NULL;
2232 else seq_printf(seq, "------ trie is empty\n");
2234 read_unlock(&fib_lock);
2237 static struct trie_stat *trie_stat_new(void)
2239 struct trie_stat *s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL);
2240 int i;
2242 if (s) {
2243 s->totdepth = 0;
2244 s->maxdepth = 0;
2245 s->tnodes = 0;
2246 s->leaves = 0;
2247 s->nullpointers = 0;
2249 for(i=0; i< MAX_CHILDS; i++)
2250 s->nodesizes[i] = 0;
2252 return s;
2255 static struct trie_stat *trie_collect_stats(struct trie *t)
2257 struct node *n = t->trie;
2258 struct trie_stat *s = trie_stat_new();
2259 int cindex = 0;
2260 int indent = 1;
2261 int pend = 0;
2262 int depth = 0;
2264 read_lock(&fib_lock);
2266 if (s) {
2267 if (n) {
2268 if (IS_TNODE(n)) {
2269 struct tnode *tn = (struct tnode *)n;
2270 pend = tn->pos+tn->bits;
2271 indent += 3;
2272 s->nodesizes[tn->bits]++;
2273 depth++;
2275 while (tn && cindex < (1 << tn->bits)) {
2276 if (tn->child[cindex]) {
2277 /* Got a child */
2279 if (IS_LEAF(tn->child[cindex])) {
2280 cindex++;
2282 /* stats */
2283 if (depth > s->maxdepth)
2284 s->maxdepth = depth;
2285 s->totdepth += depth;
2286 s->leaves++;
2289 else {
2291 * New tnode. Decend one level
2294 s->tnodes++;
2295 s->nodesizes[tn->bits]++;
2296 depth++;
2298 n = tn->child[cindex];
2299 tn = (struct tnode *)n;
2300 pend = tn->pos+tn->bits;
2302 indent += 3;
2303 cindex = 0;
2306 else {
2307 cindex++;
2308 s->nullpointers++;
2312 * Test if we are done
2315 while (cindex >= (1 << tn->bits)) {
2318 * Move upwards and test for root
2319 * pop off all traversed nodes
2323 if (NODE_PARENT(tn) == NULL) {
2324 tn = NULL;
2325 n = NULL;
2326 break;
2328 else {
2329 cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
2330 tn = NODE_PARENT(tn);
2331 cindex++;
2332 n = (struct node *)tn;
2333 pend = tn->pos+tn->bits;
2334 indent -= 3;
2335 depth--;
2340 else n = NULL;
2344 read_unlock(&fib_lock);
2345 return s;
2348 #ifdef CONFIG_PROC_FS
2350 static struct fib_alias *fib_triestat_get_first(struct seq_file *seq)
2352 return NULL;
2355 static struct fib_alias *fib_triestat_get_next(struct seq_file *seq)
2357 return NULL;
2360 static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos)
2362 void *v = NULL;
2364 if (ip_fib_main_table)
2365 v = *pos ? fib_triestat_get_next(seq) : SEQ_START_TOKEN;
2366 return v;
2369 static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2371 ++*pos;
2372 return v == SEQ_START_TOKEN ? fib_triestat_get_first(seq) : fib_triestat_get_next(seq);
2375 static void fib_triestat_seq_stop(struct seq_file *seq, void *v)
2381 * This outputs /proc/net/fib_triestats
2383 * It always works in backward compatibility mode.
2384 * The format of the file is not supposed to be changed.
2387 static void collect_and_show(struct trie *t, struct seq_file *seq)
2389 int bytes = 0; /* How many bytes are used, a ref is 4 bytes */
2390 int i, max, pointers;
2391 struct trie_stat *stat;
2392 int avdepth;
2394 stat = trie_collect_stats(t);
2396 bytes=0;
2397 seq_printf(seq, "trie=%p\n", t);
2399 if (stat) {
2400 if (stat->leaves)
2401 avdepth=stat->totdepth*100 / stat->leaves;
2402 else
2403 avdepth=0;
2404 seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100 );
2405 seq_printf(seq, "Max depth: %4d\n", stat->maxdepth);
2407 seq_printf(seq, "Leaves: %d\n", stat->leaves);
2408 bytes += sizeof(struct leaf) * stat->leaves;
2409 seq_printf(seq, "Internal nodes: %d\n", stat->tnodes);
2410 bytes += sizeof(struct tnode) * stat->tnodes;
2412 max = MAX_CHILDS-1;
2414 while (max >= 0 && stat->nodesizes[max] == 0)
2415 max--;
2416 pointers = 0;
2418 for (i = 1; i <= max; i++)
2419 if (stat->nodesizes[i] != 0) {
2420 seq_printf(seq, " %d: %d", i, stat->nodesizes[i]);
2421 pointers += (1<<i) * stat->nodesizes[i];
2423 seq_printf(seq, "\n");
2424 seq_printf(seq, "Pointers: %d\n", pointers);
2425 bytes += sizeof(struct node *) * pointers;
2426 seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers);
2427 seq_printf(seq, "Total size: %d kB\n", bytes / 1024);
2429 kfree(stat);
2432 #ifdef CONFIG_IP_FIB_TRIE_STATS
2433 seq_printf(seq, "Counters:\n---------\n");
2434 seq_printf(seq,"gets = %d\n", t->stats.gets);
2435 seq_printf(seq,"backtracks = %d\n", t->stats.backtrack);
2436 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed);
2437 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss);
2438 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit);
2439 seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped);
2440 #ifdef CLEAR_STATS
2441 memset(&(t->stats), 0, sizeof(t->stats));
2442 #endif
2443 #endif /* CONFIG_IP_FIB_TRIE_STATS */
2446 static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2448 char bf[128];
2450 if (v == SEQ_START_TOKEN) {
2451 seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
2452 sizeof(struct leaf), sizeof(struct tnode));
2453 if (trie_local)
2454 collect_and_show(trie_local, seq);
2456 if (trie_main)
2457 collect_and_show(trie_main, seq);
2459 else {
2460 snprintf(bf, sizeof(bf),
2461 "*\t%08X\t%08X", 200, 400);
2463 seq_printf(seq, "%-127s\n", bf);
2465 return 0;
2468 static struct seq_operations fib_triestat_seq_ops = {
2469 .start = fib_triestat_seq_start,
2470 .next = fib_triestat_seq_next,
2471 .stop = fib_triestat_seq_stop,
2472 .show = fib_triestat_seq_show,
2475 static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2477 struct seq_file *seq;
2478 int rc = -ENOMEM;
2480 rc = seq_open(file, &fib_triestat_seq_ops);
2481 if (rc)
2482 goto out_kfree;
2484 seq = file->private_data;
2485 out:
2486 return rc;
2487 out_kfree:
2488 goto out;
2491 static struct file_operations fib_triestat_seq_fops = {
2492 .owner = THIS_MODULE,
2493 .open = fib_triestat_seq_open,
2494 .read = seq_read,
2495 .llseek = seq_lseek,
2496 .release = seq_release_private,
2499 int __init fib_stat_proc_init(void)
2501 if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_seq_fops))
2502 return -ENOMEM;
2503 return 0;
2506 void __init fib_stat_proc_exit(void)
2508 proc_net_remove("fib_triestat");
2511 static struct fib_alias *fib_trie_get_first(struct seq_file *seq)
2513 return NULL;
2516 static struct fib_alias *fib_trie_get_next(struct seq_file *seq)
2518 return NULL;
2521 static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2523 void *v = NULL;
2525 if (ip_fib_main_table)
2526 v = *pos ? fib_trie_get_next(seq) : SEQ_START_TOKEN;
2527 return v;
2530 static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2532 ++*pos;
2533 return v == SEQ_START_TOKEN ? fib_trie_get_first(seq) : fib_trie_get_next(seq);
2536 static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2542 * This outputs /proc/net/fib_trie.
2544 * It always works in backward compatibility mode.
2545 * The format of the file is not supposed to be changed.
2548 static int fib_trie_seq_show(struct seq_file *seq, void *v)
2550 char bf[128];
2552 if (v == SEQ_START_TOKEN) {
2553 if (trie_local)
2554 trie_dump_seq(seq, trie_local);
2556 if (trie_main)
2557 trie_dump_seq(seq, trie_main);
2560 else {
2561 snprintf(bf, sizeof(bf),
2562 "*\t%08X\t%08X", 200, 400);
2563 seq_printf(seq, "%-127s\n", bf);
2566 return 0;
2569 static struct seq_operations fib_trie_seq_ops = {
2570 .start = fib_trie_seq_start,
2571 .next = fib_trie_seq_next,
2572 .stop = fib_trie_seq_stop,
2573 .show = fib_trie_seq_show,
2576 static int fib_trie_seq_open(struct inode *inode, struct file *file)
2578 struct seq_file *seq;
2579 int rc = -ENOMEM;
2581 rc = seq_open(file, &fib_trie_seq_ops);
2582 if (rc)
2583 goto out_kfree;
2585 seq = file->private_data;
2586 out:
2587 return rc;
2588 out_kfree:
2589 goto out;
2592 static struct file_operations fib_trie_seq_fops = {
2593 .owner = THIS_MODULE,
2594 .open = fib_trie_seq_open,
2595 .read = seq_read,
2596 .llseek = seq_lseek,
2597 .release= seq_release_private,
2600 int __init fib_proc_init(void)
2602 if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_seq_fops))
2603 return -ENOMEM;
2604 return 0;
2607 void __init fib_proc_exit(void)
2609 proc_net_remove("fib_trie");
2612 #endif /* CONFIG_PROC_FS */