2 * Copyright (c) 1988, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * @(#)radix.c 8.4 (Berkeley) 11/2/94
30 * $FreeBSD: src/sys/net/radix.c,v 1.20.2.3 2002/04/28 05:40:25 suz Exp $
34 * Routines to build and maintain radix trees for routing lookups.
37 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/domain.h>
41 #include <sys/globaldata.h>
42 #include <sys/malloc.h>
43 #include <sys/queue.h>
44 #include <sys/syslog.h>
45 #include <sys/thread.h>
46 #include <net/netisr2.h>
47 #include <net/netmsg2.h>
54 #include <net/radix.h>
60 #define log(l, ...) syslog(l, __VA_ARGS__)
61 #define kprintf(fmt, ...) printf(fmt, ##__VA_ARGS__)
62 #define print_backtrace(...) /* nothing */
63 #define panic(fmt, ...) \
65 fprintf(stderr, "PANIC: " fmt "\n", ##__VA_ARGS__); \
71 * The arguments to the radix functions are really counted byte arrays with
72 * the length in the first byte. struct sockaddr's fit this type structurally.
73 * Cast the result to int as this is the dominant usage.
75 #define clen(c) (int)(*(const u_char *)(c))
78 static struct radix_mask
*rn_mkfreelist
[MAXCPU
];
79 static struct radix_node_head
*mask_rnheads
[MAXCPU
];
81 static const u_char rn_zeros
[RN_MAXKEYLEN
];
82 static const u_char rn_ones
[RN_MAXKEYLEN
] = RN_MAXKEYONES
;
85 static int rn_nodenum
;
86 static struct radix_node
*rn_clist
;
87 static bool rn_debug
= true;
91 static __inline
struct radix_mask
*
92 MKGet(struct radix_mask
**l
)
100 R_Malloc(m
, struct radix_mask
*, sizeof(*m
));
106 MKFree(struct radix_mask
**l
, struct radix_mask
*m
)
113 * The data structure for the keys is a radix tree with one way
114 * branching removed. The index rn_bit at an internal node n represents a bit
115 * position to be tested. The tree is arranged so that all descendants
116 * of a node n have keys whose bits all agree up to position rn_bit - 1.
117 * (We say the index of n is rn_bit.)
119 * There is at least one descendant which has a one bit at position rn_bit,
120 * and at least one with a zero there.
122 * A route is determined by a pair of key and mask. We require that the
123 * bit-wise logical and of the key and mask to be the key.
124 * We define the index of a route associated with the mask to be
125 * the first bit number in the mask where 0 occurs (with bit number 0
126 * representing the highest order bit).
128 * We say a mask is normal if every bit is 0, past the index of the mask.
129 * If a node n has a descendant (k, m) with index(m) == index(n) == rn_bit,
130 * and m is a normal mask, then the route applies to every descendant of n.
131 * If the index(m) < rn_bit, this implies the trailing last few bits of k
132 * before bit rn_bit are all 0, (and hence consequently true of every
133 * descendant of n), so the route applies to all descendants of the node
136 * Similar logic shows that a non-normal mask m such that
137 * index(m) <= index(n) could potentially apply to many children of n.
138 * Thus, for each non-host route, we attach its mask to a list at an internal
139 * node as high in the tree as we can go.
141 * The present version of the code makes use of normal routes in short-
142 * circuiting an explict mask and compare operation when testing whether
143 * a key satisfies a normal route, and also in remembering the unique leaf
144 * that governs a subtree.
148 * Search key <key> in the subtree from <head> until encountering
149 * a leaf node and return it.
151 * NOTE: Will never return NULL because the embedded default root node.
153 static struct radix_node
*
154 rn_search(const void *_key
, struct radix_node
*head
)
156 struct radix_node
*x
;
161 while (x
->rn_bit
>= 0) {
162 if (x
->rn_bmask
& key
[x
->rn_offset
])
171 * Similar to rn_search() but with the netmask <mask> applied.
173 * NOTE: The netmask can be the all-zero default mask.
175 static struct radix_node
*
176 rn_search_m(const void *_key
, const void *_mask
, struct radix_node
*head
)
178 struct radix_node
*x
;
179 const u_char
*key
, *mask
;
184 while (x
->rn_bit
>= 0) {
185 if ((x
->rn_bmask
& mask
[x
->rn_offset
]) &&
186 (x
->rn_bmask
& key
[x
->rn_offset
]))
195 * Compare the two netmasks and return true if netmask <m> is strictly more
196 * specific than netmask <n>.
198 * NOTE: Non-contiguous netmask is supported.
201 rn_refines(const void *_m
, const void *_n
)
203 const u_char
*m
, *n
, *lim
, *lim2
;
209 lim2
= lim
= n
+ clen(n
);
210 longer
= clen(n
++) - clen(m
++);
222 if (*n
++) /* n is longer and more specific */
225 if (equal
&& (longer
< 0)) {
228 if (*m
++) /* m is longer and more specific */
237 * Lookup the longest-prefix match of the key <key> in the tree <head>.
238 * The netmask <mask> can be NULL; if specified, the result must have the
239 * same mask, or NULL is returned.
242 rn_lookup(const void *_key
, const void *_mask
, struct radix_node_head
*head
)
244 struct radix_node
*x
;
245 const u_char
*key
, *mask
, *netmask
;
252 x
= rn_addmask(mask
, true, head
->rnh_treetop
->rn_offset
,
254 if (x
== NULL
) /* mask doesn't exist in the mask tree */
259 x
= rn_match(key
, head
);
260 if (x
!= NULL
&& netmask
!= NULL
) {
261 /* check the duped-key chain for different masks */
262 while (x
!= NULL
&& x
->rn_mask
!= netmask
)
270 * Check whether the key <key> matches the (key, mask) of the given
271 * radix node <leaf>. The <skip> parameter gives the number of bytes
272 * to skip for the keys and mask.
275 rn_satisfies_leaf(const void *key
, struct radix_node
*leaf
, int skip
)
277 const u_char
*cp
, *cp2
, *cp3
, *cplim
;
284 length
= MIN(clen(cp
), clen(cp2
));
288 length
= MIN(length
, clen(cp3
));
293 for (cp
+= skip
; cp
< cplim
; cp
++, cp2
++, cp3
++) {
294 if ((*cp
^ *cp2
) & *cp3
)
303 * Search for the longest-prefix match of the key <key>.
306 rn_match(const void *key
, struct radix_node_head
*head
)
308 struct radix_node
*top
, *t
, *saved_t
;
309 const u_char
*cp
, *cp2
, *cplim
;
310 int klen
, matched_off
, test
, bit
, rn_bit
;
312 top
= head
->rnh_treetop
;
314 t
= rn_search(key
, top
);
316 * See if we match exactly as a host destination, or at least learn
317 * how many bits match, for normal mask finesse.
319 * It doesn't hurt to limit how many bytes to check to the length of
320 * the mask, since if it matches we had a genuine match and the leaf
321 * we have is the most specific one anyway; if it didn't match with
322 * a shorter length it would fail with a long one. This wins big
323 * for class B&C netmasks which are probably the most common case...
325 if (t
->rn_mask
!= NULL
)
326 klen
= clen(t
->rn_mask
);
329 cplim
= (const u_char
*)key
+ klen
;
330 cp
= (const u_char
*)key
+ top
->rn_offset
;
331 cp2
= t
->rn_key
+ top
->rn_offset
;
332 for (; cp
< cplim
; cp
++, cp2
++) {
338 * This extra grot is in case we are explicitly asked
339 * to look up the default (i.e., all-zero address). Ugh!
341 * Never return the root node itself, it seems to cause a
344 if (t
->rn_flags
& RNF_ROOT
)
349 /* Find the first bit that differs. */
350 test
= (*cp
^ *cp2
) & 0xff;
351 for (bit
= 7; (test
>>= 1) > 0;)
353 matched_off
= cp
- (const u_char
*)key
;
354 bit
+= matched_off
<< 3;
358 * Even if we don't match exactly as a host, we may match if the leaf
359 * we wound up at has routes to networks. Check those routes.
362 /* Skip the host route, which might only appear at the first. */
363 if (t
->rn_mask
== NULL
)
365 for (; t
!= NULL
; t
= t
->rn_dupedkey
) {
366 if (t
->rn_flags
& RNF_NORMAL
) {
367 if (rn_bit
<= t
->rn_bit
)
369 } else if (rn_satisfies_leaf(key
, t
, matched_off
))
375 * Start searching up the tree for network routes.
378 struct radix_node
*x
;
379 struct radix_mask
*m
;
384 * If non-contiguous masks ever become important
385 * we can restore the masking and open coding of
386 * the search and satisfaction test and put the
387 * calculation of "skip" back before the "do".
389 for (m
= t
->rn_mklist
; m
!= NULL
; m
= m
->rm_next
) {
390 if (m
->rm_flags
& RNF_NORMAL
) {
391 if (rn_bit
<= m
->rm_bit
)
394 skip
= MIN(t
->rn_offset
, matched_off
);
395 x
= rn_search_m(key
, m
->rm_mask
, t
);
396 while (x
!= NULL
&& x
->rn_mask
!= m
->rm_mask
)
399 rn_satisfies_leaf(key
, x
, skip
))
409 * Whenever to add a new leaf to the tree, another parent node is needed.
410 * So they are allocated as an array of two elements: the first element is
411 * the leaf, the second one is the parent node.
413 * This function initializes the given pair of nodes <nodes>, so that the
414 * leaf is the left child of the parent node.
416 static struct radix_node
*
417 rn_newpair(const void *key
, int bit
, struct radix_node nodes
[2])
419 struct radix_node
*left
, *parent
;
424 parent
->rn_bit
= bit
;
425 parent
->rn_bmask
= 0x80 >> (bit
& 0x7);
426 parent
->rn_offset
= bit
>> 3;
427 parent
->rn_left
= left
;
428 parent
->rn_flags
= RNF_ACTIVE
;
429 parent
->rn_mklist
= NULL
;
433 left
->rn_parent
= parent
;
434 left
->rn_flags
= parent
->rn_flags
;
435 left
->rn_mklist
= NULL
;
438 left
->rn_info
= rn_nodenum
++;
439 parent
->rn_info
= rn_nodenum
++;
440 left
->rn_twin
= parent
;
441 left
->rn_ybro
= rn_clist
;
449 * Insert the key <key> to the radix tree <head>.
451 * If the key already exists, then set <dupentry> to 'true' and return the
452 * node of the existing duped key. Otherwise, set <dupentry> to 'false',
453 * insert the key to the tree by making use of the given nodes <nodes>, and
454 * return the node of the inserted key (i.e., &nodes[0]).
456 static struct radix_node
*
457 rn_insert(const void *key
, struct radix_node_head
*head
, bool *dupentry
,
458 struct radix_node nodes
[2])
460 struct radix_node
*top
, *t
, *tt
;
465 top
= head
->rnh_treetop
;
466 head_off
= top
->rn_offset
;
468 cp
= (const u_char
*)key
+ head_off
;
469 t
= rn_search(key
, top
);
472 * Find the first bit where the key and t->rn_key differ.
475 const u_char
*cp2
= t
->rn_key
+ head_off
;
476 const u_char
*cplim
= (const u_char
*)key
+ klen
;
489 cmp_res
= (cp
[-1] ^ cp2
[-1]) & 0xff;
490 for (bit
= (cp
- (const u_char
*)key
) << 3; cmp_res
; bit
--)
494 struct radix_node
*p
, *x
= top
;
499 if (cp
[x
->rn_offset
] & x
->rn_bmask
)
503 } while (bit
> (unsigned int)x
->rn_bit
);
504 /* shortcut of: x->rn_bit < bit && x->rn_bit >= 0 */
507 log(LOG_DEBUG
, "%s: Going In:\n", __func__
);
511 t
= rn_newpair(key
, bit
, nodes
);
513 if ((cp
[p
->rn_offset
] & p
->rn_bmask
) == 0)
518 t
->rn_parent
= p
; /* frees x, p as temp vars below */
519 if ((cp
[t
->rn_offset
] & t
->rn_bmask
) == 0) {
527 log(LOG_DEBUG
, "%s: Coming Out:\n", __func__
);
536 * Add the netmask <mask> to the mask tree <maskhead>. If <search> is
537 * 'true', then only check the existence of the given mask but don't
540 * The <skip> parameter specifies the number of bytes to skip in <mask>
541 * to obtain the mask data. (NOTE: The length of a mask key doesn't
542 * count the trailing zero bytes.)
544 * Return a pointer to the mask node on success; otherwise NULL on error.
547 rn_addmask(const void *_mask
, bool search
, int skip
,
548 struct radix_node_head
*maskhead
)
550 struct radix_node
*x
, *saved_x
;
551 const u_char
*mask
, *cp
, *cplim
;
552 u_char
*p
, addmask_key
[RN_MAXKEYLEN
];
554 bool maskduplicated
, isnormal
;
557 if ((mlen
= clen(mask
)) > RN_MAXKEYLEN
)
562 return (maskhead
->rnh_nodes
); /* all-zero key */
564 bzero(addmask_key
, sizeof(addmask_key
));
566 bcopy(rn_ones
+ 1, addmask_key
+ 1, skip
- 1);
567 bcopy(mask
+ skip
, addmask_key
+ skip
, mlen
- skip
);
568 /* Trim trailing zeroes. */
569 for (cp
= addmask_key
+ mlen
; (cp
> addmask_key
) && cp
[-1] == 0;)
571 mlen
= cp
- addmask_key
;
573 return (maskhead
->rnh_nodes
); /* all-zero key */
576 x
= rn_search(addmask_key
, maskhead
->rnh_treetop
);
577 if (x
->rn_key
== NULL
) {
578 kprintf("WARNING: radix_node->rn_key is NULL rn=%p\n", x
);
581 } else if (bcmp(addmask_key
, x
->rn_key
, mlen
) != 0) {
584 if (x
!= NULL
|| search
)
587 R_Malloc(x
, struct radix_node
*, RN_MAXKEYLEN
+ 2 * (sizeof *x
));
588 if ((saved_x
= x
) == NULL
)
591 bzero(x
, RN_MAXKEYLEN
+ 2 * (sizeof *x
));
592 mask
= p
= (u_char
*)(x
+ 2);
593 bcopy(addmask_key
, p
, mlen
);
594 x
= rn_insert(mask
, maskhead
, &maskduplicated
, x
);
595 if (maskduplicated
) {
596 log(LOG_ERR
, "%s: mask impossibly already in tree", __func__
);
602 * Calculate the index of mask, and check for normalcy.
604 * First find the first byte with a 0 bit, then if there are more
605 * bits left (remember we already trimmed the trailing zeros),
606 * the pattern must be one of those in normal_chars[], or we have
607 * a non-contiguous mask.
612 for (cp
= mask
+ skip
; cp
< cplim
; cp
++) {
617 static const u_char normal_chars
[] = {
618 0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff
622 for (j
= 0x80; (j
& *cp
) != 0; j
>>= 1)
624 if (cp
!= (cplim
- 1) || *cp
!= normal_chars
[bit
])
627 bit
+= (cp
- mask
) << 3;
628 x
->rn_bit
= -1 - bit
;
630 x
->rn_flags
|= RNF_NORMAL
;
635 * Compare the two netmasks and return true if netmask <m> is more
636 * specific than netmask <n>.
638 * NOTE: arbitrary ordering for non-contiguous masks.
641 rn_lexobetter(const void *_m
, const void *_n
)
643 const u_char
*m
, *n
, *lim
;
648 if (clen(m
) > clen(n
)) {
649 /* not really, but need to check longer one first */
653 if (clen(m
) == clen(n
)) {
654 for (lim
= m
+ clen(m
); m
< lim
; m
++, n
++) {
663 static struct radix_mask
*
664 rn_new_radix_mask(struct radix_node
*node
, struct radix_mask
*nextmask
)
666 struct radix_mask
*m
;
668 m
= MKGet(&rn_mkfreelist
[mycpuid
]);
670 log(LOG_ERR
, "Mask for route not entered\n");
674 bzero(m
, sizeof(*m
));
675 m
->rm_bit
= node
->rn_bit
;
676 m
->rm_flags
= node
->rn_flags
;
677 if (m
->rm_flags
& RNF_NORMAL
)
680 m
->rm_mask
= node
->rn_mask
;
681 m
->rm_next
= nextmask
;
688 * Add the route (key, mask) to the radix tree <head> using the given
689 * nodes <nodes>. The netmask <mask> is NULL for a host route.
691 * Return the node of the inserted route on success. Otherwise, return
692 * NULL if the following happened:
693 * - failed to add the netmask to the mask tree (e.g., out of memory)
694 * - the identical route already exists
696 * NOTE: The address <key> and netmask <mask> must be of the same data
697 * structure (e.g., both 'struct sockaddr_in') so that they have the
698 * same skip bytes and data length.
701 rn_addroute(const void *key
, const void *mask
,
702 struct radix_node_head
*head
, struct radix_node nodes
[2])
704 struct radix_node
*top
, *t
, *x
, *tt
, *saved_tt
;
705 struct radix_mask
*m
, **mp
;
710 top
= head
->rnh_treetop
;
715 * In dealing with non-contiguous masks, there may be
716 * many different routes which have the same mask.
717 * We will find it useful to have a unique pointer to
718 * the mask to speed avoiding duplicate references at
719 * nodes and possibly save time in calculating indices.
722 if ((x
= rn_addmask(mask
, false, top
->rn_offset
,
723 head
->rnh_maskhead
)) == NULL
)
725 bit_leaf
= x
->rn_bit
;
726 bit
= -1 - x
->rn_bit
;
730 * Deal with duplicated keys: attach node to previous instance
732 saved_tt
= tt
= rn_insert(key
, head
, &keyduplicated
, nodes
);
735 * Deal with duplicated key: attach node to previous instance.
737 * The masks for a duplicated key are sorted in the same way
738 * as in a mask list -- most specific to least specific.
739 * This may require the unfortunate nuisance of relocating
740 * the head of the list.
742 * If the mask is NULL (i.e., a host route), it's placed at
743 * the beginning (i.e., list head).
745 * If the mask is not duplicated, we wouldn't find it among
746 * possible duplicate key entries anyway, so the test below
749 for (t
= tt
; tt
!= NULL
; t
= tt
, tt
= tt
->rn_dupedkey
) {
750 if (tt
->rn_mask
== mask
)
751 return (NULL
); /* same route already exists */
752 if (mask
== NULL
/* host route */ ||
753 (tt
->rn_mask
!= NULL
&&
754 ((bit_leaf
< tt
->rn_bit
) /* index(mask) > node */
755 || rn_refines(mask
, tt
->rn_mask
)
756 || rn_lexobetter(mask
, tt
->rn_mask
))))
759 if (tt
== saved_tt
) {
760 struct radix_node
*xx
= x
;
761 /* link in at head of list */
762 (tt
= nodes
)->rn_dupedkey
= t
;
763 tt
->rn_flags
= t
->rn_flags
;
764 tt
->rn_parent
= x
= t
->rn_parent
;
765 t
->rn_parent
= tt
; /* parent */
770 saved_tt
= tt
; x
= xx
;
772 (tt
= nodes
)->rn_dupedkey
= t
->rn_dupedkey
;
774 tt
->rn_parent
= t
; /* parent */
775 if (tt
->rn_dupedkey
!= NULL
) /* parent */
776 tt
->rn_dupedkey
->rn_parent
= tt
; /* parent */
780 tt
->rn_flags
= RNF_ACTIVE
;
782 tt
->rn_info
= rn_nodenum
++;
783 tt
->rn_twin
= tt
+ 1;
784 tt
->rn_twin
->rn_info
= rn_nodenum
++;
785 tt
->rn_ybro
= rn_clist
;
795 tt
->rn_bit
= x
->rn_bit
;
796 tt
->rn_flags
|= x
->rn_flags
& RNF_NORMAL
;
798 t
= saved_tt
->rn_parent
;
801 bit_leaf
= -1 - t
->rn_bit
;
802 if (t
->rn_right
== saved_tt
)
806 /* Promote general routes from below */
810 if (x
->rn_mask
!= NULL
&&
811 x
->rn_bit
>= bit_leaf
&&
812 x
->rn_mklist
== NULL
) {
813 *mp
= m
= rn_new_radix_mask(x
, NULL
);
819 } else if (x
->rn_mklist
!= NULL
) {
820 /* Skip over masks whose index is > that of new node. */
821 for (mp
= &x
->rn_mklist
; (m
= *mp
) != NULL
; mp
= &m
->rm_next
) {
822 if (m
->rm_bit
>= bit_leaf
)
830 if (mask
== NULL
|| bit
> t
->rn_bit
)
831 return (tt
); /* can't lift at all */
834 * Add new route to the highest possible ancestor's list.
836 bit_leaf
= tt
->rn_bit
;
840 } while (bit
<= t
->rn_bit
&& x
!= top
);
842 * Search through routes associated with node to
843 * insert new route according to index.
844 * Need same criteria as when sorting dupedkeys to avoid
845 * double loop on deletion.
847 for (mp
= &x
->rn_mklist
; (m
= *mp
) != NULL
; mp
= &m
->rm_next
) {
848 if (m
->rm_bit
< bit_leaf
)
850 if (m
->rm_bit
> bit_leaf
)
852 if (m
->rm_flags
& RNF_NORMAL
) {
853 mmask
= m
->rm_leaf
->rn_mask
;
854 if (tt
->rn_flags
& RNF_NORMAL
) {
856 "Non-unique normal route, mask not entered\n");
866 if (rn_refines(mask
, mmask
) || rn_lexobetter(mask
, mmask
))
869 *mp
= rn_new_radix_mask(tt
, *mp
);
874 rn_delete(const void *key
, const void *mask
, struct radix_node_head
*head
)
876 struct radix_node
*top
, *t
, *p
, *x
, *tt
, *saved_tt
, *dupedkey
;
877 struct radix_mask
*m
, *saved_m
, **mp
;
878 int bit
, head_off
, klen
, cpu
;
881 x
= head
->rnh_treetop
;
882 tt
= rn_search(key
, x
);
883 head_off
= x
->rn_offset
;
888 bcmp((const u_char
*)key
+ head_off
, tt
->rn_key
+ head_off
,
889 klen
- head_off
) != 0)
893 * Delete our route from mask lists.
896 if ((x
= rn_addmask(mask
, true, head_off
,
897 head
->rnh_maskhead
)) == NULL
)
900 while (tt
->rn_mask
!= mask
) {
901 if ((tt
= tt
->rn_dupedkey
) == NULL
)
905 if (tt
->rn_mask
== NULL
|| (saved_m
= m
= tt
->rn_mklist
) == NULL
)
907 if (tt
->rn_flags
& RNF_NORMAL
) {
908 if (m
->rm_leaf
!= tt
|| m
->rm_refs
> 0) {
909 log(LOG_ERR
, "rn_delete: inconsistent annotation\n");
910 return (NULL
); /* dangling ref could cause disaster */
913 if (m
->rm_mask
!= tt
->rn_mask
) {
914 log(LOG_ERR
, "rn_delete: inconsistent annotation\n");
917 if (--m
->rm_refs
>= 0)
920 bit
= -1 - tt
->rn_bit
;
921 t
= saved_tt
->rn_parent
;
923 goto on1
; /* Wasn't lifted at all */
928 } while (bit
<= t
->rn_bit
&& x
!= top
);
929 for (mp
= &x
->rn_mklist
; (m
= *mp
) != NULL
; mp
= &m
->rm_next
)
932 MKFree(&rn_mkfreelist
[cpu
], m
);
936 log(LOG_ERR
, "rn_delete: couldn't find our annotation\n");
937 if (tt
->rn_flags
& RNF_NORMAL
)
938 return (NULL
); /* Dangling ref to us */
943 * Eliminate us from tree
945 if (tt
->rn_flags
& RNF_ROOT
)
949 /* Get us out of the creation list */
950 for (t
= rn_clist
; t
!= NULL
&& t
->rn_ybro
!= tt
; t
= t
->rn_ybro
)
953 t
->rn_ybro
= tt
->rn_ybro
;
957 dupedkey
= saved_tt
->rn_dupedkey
;
958 if (dupedkey
!= NULL
) {
960 * at this point, tt is the deletion target and saved_tt
961 * is the head of the dupekey chain
963 if (tt
== saved_tt
) {
964 /* remove from head of chain */
967 if (t
->rn_left
== tt
)
972 /* find node in front of tt on the chain */
973 for (x
= p
= saved_tt
; p
!= NULL
&& p
->rn_dupedkey
!= tt
;)
976 p
->rn_dupedkey
= tt
->rn_dupedkey
;
977 if (tt
->rn_dupedkey
) /* parent */
978 tt
->rn_dupedkey
->rn_parent
= p
;
981 log(LOG_ERR
, "rn_delete: couldn't find us\n");
985 if (t
->rn_flags
& RNF_ACTIVE
) {
999 x
->rn_left
->rn_parent
= x
;
1000 x
->rn_right
->rn_parent
= x
;
1004 if (t
->rn_left
== tt
)
1009 if (p
->rn_right
== t
)
1015 * Demote routes attached to us.
1017 if (t
->rn_mklist
!= NULL
) {
1018 if (x
->rn_bit
>= 0) {
1019 for (mp
= &x
->rn_mklist
; (m
= *mp
) != NULL
;)
1024 * If there are any (key, mask) pairs in a sibling
1025 * duped-key chain, some subset will appear sorted
1026 * in the same order attached to our mklist.
1028 for (m
= t
->rn_mklist
; m
&& x
; x
= x
->rn_dupedkey
)
1029 if (m
== x
->rn_mklist
) {
1030 struct radix_mask
*mm
= m
->rm_next
;
1032 x
->rn_mklist
= NULL
;
1033 if (--(m
->rm_refs
) < 0)
1034 MKFree(&rn_mkfreelist
[cpu
], m
);
1039 "rn_delete: Orphaned Mask %p at %p\n",
1040 (void *)m
, (void *)x
);
1045 * We may be holding an active internal node in the tree.
1056 t
->rn_left
->rn_parent
= t
;
1057 t
->rn_right
->rn_parent
= t
;
1059 if (p
->rn_left
== x
)
1066 tt
[0].rn_flags
&= ~RNF_ACTIVE
;
1067 tt
[1].rn_flags
&= ~RNF_ACTIVE
;
1072 * This is the same as rn_walktree() except for the parameters and the
1076 rn_walktree_from(struct radix_node_head
*h
, const void *_addr
,
1077 const void *_mask
, walktree_f_t
*f
, void *w
)
1079 struct radix_node
*rn
, *base
, *next
, *last
;
1080 const u_char
*addr
, *mask
;
1090 * rn_search_m() is sort-of-open-coded here. We cannot use that
1091 * function because we need to keep track of the last node seen.
1093 /* kprintf("about to search\n"); */
1094 for (rn
= h
->rnh_treetop
; rn
->rn_bit
>= 0; ) {
1096 /* kprintf("rn_bit %d, rn_bmask %x, mask[rn_offset] %x\n",
1097 rn->rn_bit, rn->rn_bmask, mask[rn->rn_offset]); */
1098 if (!(rn
->rn_bmask
& mask
[rn
->rn_offset
])) {
1101 if (rn
->rn_bmask
& addr
[rn
->rn_offset
]) {
1107 /* kprintf("done searching\n"); */
1110 * Two cases: either we stepped off the end of our mask,
1111 * in which case last == rn, or we reached a leaf, in which
1112 * case we want to start from the last node we looked at.
1113 * Either way, last is the node we want to start from.
1118 /* kprintf("rn %p, lastb %d\n", rn, lastb);*/
1121 * This gets complicated because we may delete the node
1122 * while applying the function f to it, so we need to calculate
1123 * the successor node in advance.
1125 while (rn
->rn_bit
>= 0)
1129 /* kprintf("node %p (%d)\n", rn, rn->rn_bit); */
1131 /* If at right child go back up, otherwise, go right */
1132 while (rn
->rn_parent
->rn_right
== rn
&&
1133 !(rn
->rn_flags
& RNF_ROOT
)) {
1136 /* if went up beyond last, stop */
1137 if (rn
->rn_bit
< lastb
) {
1139 /* kprintf("up too far\n"); */
1143 /* Find the next *leaf* since next node might vanish, too */
1144 for (rn
= rn
->rn_parent
->rn_right
; rn
->rn_bit
>= 0;)
1147 /* Process leaves */
1148 while ((rn
= base
) != NULL
) {
1149 base
= rn
->rn_dupedkey
;
1150 /* kprintf("leaf %p\n", rn); */
1151 if (!(rn
->rn_flags
& RNF_ROOT
) && (error
= (*f
)(rn
, w
)))
1156 if (rn
->rn_flags
& RNF_ROOT
) {
1157 /* kprintf("root, stopping"); */
1166 rn_walktree_at(struct radix_node_head
*h
, const void *addr
, const void *mask
,
1167 walktree_f_t
*f
, void *w
)
1169 struct radix_node
*rn
, *base
, *next
;
1172 rn
= h
->rnh_treetop
;
1175 * This gets complicated because we may delete the node
1176 * while applying the function f to it, so we need to calculate
1177 * the successor node in advance.
1180 /* First time through node, go left */
1181 while (rn
->rn_bit
>= 0)
1185 rn
= rn_search_m(addr
, mask
, rn
);
1187 rn
= rn_search(addr
, rn
);
1191 /* If at right child go back up, otherwise, go right */
1192 while (rn
->rn_parent
->rn_right
== rn
&&
1193 !(rn
->rn_flags
& RNF_ROOT
))
1195 /* Find the next *leaf* since next node might vanish, too */
1196 for (rn
= rn
->rn_parent
->rn_right
; rn
->rn_bit
>= 0;)
1199 /* Process leaves */
1200 while ((rn
= base
)) {
1201 base
= rn
->rn_dupedkey
;
1202 if (!(rn
->rn_flags
& RNF_ROOT
) && (error
= (*f
)(rn
, w
)))
1206 if (rn
->rn_flags
& RNF_ROOT
)
1213 rn_walktree(struct radix_node_head
*h
, walktree_f_t
*f
, void *w
)
1215 return rn_walktree_at(h
, NULL
, NULL
, f
, w
);
1219 * Allocate and initialize an empty radix tree at <head>.
1221 * The created radix_node_head embeds 3 nodes in the order of
1222 * {left,root,right}. These nodes are flagged with RNF_ROOT and thus
1223 * cannot be freed. The left and right leaves are initialized with
1224 * all-zero and all-one keys, respectively, and with the significant
1225 * byte starting at <off_bytes>.
1227 * The <maskhead> refers to another radix tree for storing the network
1228 * masks (so aka mask tree). It is also created by this function with
1229 * <maskhead>=NULL; the <off_bytes> parameter is ignored and auto set
1230 * to be zero (0). The reason of requiring <off_bytes> be zero is that
1231 * a mask tree can be shared with multiple radix trees of different
1232 * address families that have different offset bytes; e.g.,
1233 * offsetof(struct sockaddr_in, sin_addr) !=
1234 * offsetof(struct sockaddr_in6, sin6_addr).
1236 * Return 1 on success, 0 on error.
1239 rn_inithead(struct radix_node_head
**head
, struct radix_node_head
*maskhead
,
1242 struct radix_node_head
*rnh
;
1243 struct radix_node
*root
, *left
, *right
;
1245 if (*head
!= NULL
) /* already initialized */
1248 R_Malloc(rnh
, struct radix_node_head
*, sizeof *rnh
);
1252 if (maskhead
== NULL
) /* mask tree initialization */
1254 if (off_bytes
>= RN_MAXKEYLEN
) /* prevent possible misuse */
1255 panic("%s: invalid off_bytes=%d", __func__
, off_bytes
);
1257 bzero(rnh
, sizeof *rnh
);
1260 root
= rn_newpair(rn_zeros
, off_bytes
* NBBY
, rnh
->rnh_nodes
);
1261 right
= &rnh
->rnh_nodes
[2];
1262 root
->rn_parent
= root
;
1263 root
->rn_flags
= RNF_ROOT
| RNF_ACTIVE
;
1264 root
->rn_right
= right
;
1266 left
= root
->rn_left
;
1267 left
->rn_bit
= -1 - off_bytes
* NBBY
;
1268 left
->rn_flags
= root
->rn_flags
;
1271 right
->rn_key
= rn_ones
;
1273 rnh
->rnh_treetop
= root
;
1274 rnh
->rnh_maskhead
= maskhead
;
1276 rnh
->rnh_addaddr
= rn_addroute
;
1277 rnh
->rnh_deladdr
= rn_delete
;
1278 rnh
->rnh_matchaddr
= rn_match
;
1279 rnh
->rnh_lookup
= rn_lookup
;
1280 rnh
->rnh_walktree
= rn_walktree
;
1281 rnh
->rnh_walktree_from
= rn_walktree_from
;
1282 rnh
->rnh_walktree_at
= rn_walktree_at
;
1288 * Callback function to be used in rn_flush() to empty a mask tree.
1291 rn_freemask(struct radix_node
*rn
)
1293 if (rn
->rn_mask
!= NULL
)
1294 panic("%s: not a mask node", __func__
);
1299 struct rn_flush_ctx
{
1300 struct radix_node_head
*head
;
1305 rn_flush_walker(struct radix_node
*rn
, void *arg
)
1307 struct rn_flush_ctx
*ctx
= arg
;
1308 struct radix_node
*node
;
1310 node
= ctx
->head
->rnh_deladdr(rn
->rn_key
, rn
->rn_mask
, ctx
->head
);
1312 panic("%s: deleted wrong node: %p, want: %p",
1313 __func__
, node
, rn
);
1321 #define IS_EMPTY(head) \
1322 (((head)->rnh_treetop == &(head)->rnh_nodes[1]) && \
1323 ((head)->rnh_treetop->rn_left == &(head)->rnh_nodes[0]) && \
1324 ((head)->rnh_treetop->rn_right == &(head)->rnh_nodes[2]))
1327 * Flush all nodes in the radix tree at <head>.
1328 * If the callback function <f> is specified, it is called against every
1329 * flushed node to allow the caller to do extra cleanups.
1332 rn_flush(struct radix_node_head
*head
, freenode_f_t
*f
)
1334 struct rn_flush_ctx ctx
;
1336 if (f
== rn_freemask
&& head
->rnh_maskhead
!= NULL
)
1337 panic("%s: rn_freemask() used with non-mask tree", __func__
);
1341 head
->rnh_walktree(head
, rn_flush_walker
, &ctx
);
1343 if (!IS_EMPTY(head
))
1344 panic("%s: failed to flush all nodes", __func__
);
1348 * Free an empty radix tree at <head>.
1350 * NOTE: The radix tree must be first emptied by rn_flush().
1353 rn_freehead(struct radix_node_head
*head
)
1355 if (!IS_EMPTY(head
))
1356 panic("%s: radix tree not empty", __func__
);
1364 rn_init_handler(netmsg_t msg
)
1368 ASSERT_NETISR_NCPUS(cpu
);
1369 if (rn_inithead(&mask_rnheads
[cpu
], NULL
, 0) == 0)
1370 panic("%s: failed to create mask tree", __func__
);
1372 netisr_forwardmsg(&msg
->base
, cpu
+ 1);
1378 struct netmsg_base msg
;
1381 SLIST_FOREACH(dom
, &domains
, dom_next
) {
1382 if (dom
->dom_maxrtkey
> RN_MAXKEYLEN
) {
1383 panic("domain %s maxkey too big %d/%d",
1384 dom
->dom_name
, dom
->dom_maxrtkey
, RN_MAXKEYLEN
);
1388 netmsg_init(&msg
, NULL
, &curthread
->td_msgport
, 0, rn_init_handler
);
1389 netisr_domsg_global(&msg
);
1392 struct radix_node_head
*
1393 rn_cpumaskhead(int cpu
)
1395 ASSERT_NETISR_NCPUS(cpu
);
1396 KKASSERT(mask_rnheads
[cpu
] != NULL
);
1397 return mask_rnheads
[cpu
];
1400 #else /* !_KERNEL */
1405 if (rn_inithead(&mask_rnheads
[0], NULL
, 0) == 0)
1406 panic("%s: failed to create mask tree", __func__
);
1409 struct radix_node_head
*
1410 rn_cpumaskhead(int cpu __unused
)
1412 return mask_rnheads
[0];
1415 #endif /* _KERNEL */