inet6: require RTF_ANNOUNCE to proxy NS
[dragonfly.git] / sys / net / radix.c
blob1a11f109ba04e627bce0287ba3915651980f72a4
1 /*
2 * Copyright (c) 1988, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
29 * @(#)radix.c 8.4 (Berkeley) 11/2/94
30 * $FreeBSD: src/sys/net/radix.c,v 1.20.2.3 2002/04/28 05:40:25 suz Exp $
34 * Routines to build and maintain radix trees for routing lookups.
37 #include <sys/param.h>
38 #ifdef _KERNEL
39 #include <sys/systm.h>
40 #include <sys/domain.h>
41 #include <sys/globaldata.h>
42 #include <sys/malloc.h>
43 #include <sys/queue.h>
44 #include <sys/syslog.h>
45 #include <sys/thread.h>
46 #include <net/netisr2.h>
47 #include <net/netmsg2.h>
48 #else
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <strings.h>
52 #include <syslog.h>
53 #endif
54 #include <net/radix.h>
56 #ifndef _KERNEL
57 #undef MAXCPU
58 #define MAXCPU 1
59 #define mycpuid 0
60 #define log(l, ...) syslog(l, __VA_ARGS__)
61 #define kprintf(fmt, ...) printf(fmt, ##__VA_ARGS__)
62 #define print_backtrace(...) /* nothing */
63 #define panic(fmt, ...) \
64 do { \
65 fprintf(stderr, "PANIC: " fmt "\n", ##__VA_ARGS__); \
66 abort(); \
67 } while (0)
68 #endif
71 * The arguments to the radix functions are really counted byte arrays with
72 * the length in the first byte. struct sockaddr's fit this type structurally.
73 * Cast the result to int as this is the dominant usage.
75 #define clen(c) (int)(*(const u_char *)(c))
78 static struct radix_mask *rn_mkfreelist[MAXCPU];
79 static struct radix_node_head *mask_rnheads[MAXCPU];
81 static const u_char rn_zeros[RN_MAXKEYLEN];
82 static const u_char rn_ones[RN_MAXKEYLEN] = RN_MAXKEYONES;
84 #ifdef RN_DEBUG
85 static int rn_nodenum;
86 static struct radix_node *rn_clist;
87 static bool rn_debug = true;
88 #endif
91 static __inline struct radix_mask *
92 MKGet(struct radix_mask **l)
94 struct radix_mask *m;
96 if (*l != NULL) {
97 m = *l;
98 *l = m->rm_next;
99 } else {
100 R_Malloc(m, struct radix_mask *, sizeof(*m));
102 return m;
105 static __inline void
106 MKFree(struct radix_mask **l, struct radix_mask *m)
108 m->rm_next = *l;
109 *l = m;
113 * The data structure for the keys is a radix tree with one way
114 * branching removed. The index rn_bit at an internal node n represents a bit
115 * position to be tested. The tree is arranged so that all descendants
116 * of a node n have keys whose bits all agree up to position rn_bit - 1.
117 * (We say the index of n is rn_bit.)
119 * There is at least one descendant which has a one bit at position rn_bit,
120 * and at least one with a zero there.
122 * A route is determined by a pair of key and mask. We require that the
123 * bit-wise logical and of the key and mask to be the key.
124 * We define the index of a route associated with the mask to be
125 * the first bit number in the mask where 0 occurs (with bit number 0
126 * representing the highest order bit).
128 * We say a mask is normal if every bit is 0, past the index of the mask.
129 * If a node n has a descendant (k, m) with index(m) == index(n) == rn_bit,
130 * and m is a normal mask, then the route applies to every descendant of n.
131 * If the index(m) < rn_bit, this implies the trailing last few bits of k
132 * before bit rn_bit are all 0, (and hence consequently true of every
133 * descendant of n), so the route applies to all descendants of the node
134 * as well.
136 * Similar logic shows that a non-normal mask m such that
137 * index(m) <= index(n) could potentially apply to many children of n.
138 * Thus, for each non-host route, we attach its mask to a list at an internal
139 * node as high in the tree as we can go.
141 * The present version of the code makes use of normal routes in short-
142 * circuiting an explict mask and compare operation when testing whether
143 * a key satisfies a normal route, and also in remembering the unique leaf
144 * that governs a subtree.
148 * Search key <key> in the subtree from <head> until encountering
149 * a leaf node and return it.
151 * NOTE: Will never return NULL because the embedded default root node.
153 static struct radix_node *
154 rn_search(const void *_key, struct radix_node *head)
156 struct radix_node *x;
157 const u_char *key;
159 key = _key;
160 x = head;
161 while (x->rn_bit >= 0) {
162 if (x->rn_bmask & key[x->rn_offset])
163 x = x->rn_right;
164 else
165 x = x->rn_left;
167 return (x);
171 * Similar to rn_search() but with the netmask <mask> applied.
173 * NOTE: The netmask can be the all-zero default mask.
175 static struct radix_node *
176 rn_search_m(const void *_key, const void *_mask, struct radix_node *head)
178 struct radix_node *x;
179 const u_char *key, *mask;
181 key = _key;
182 mask = _mask;
183 x = head;
184 while (x->rn_bit >= 0) {
185 if ((x->rn_bmask & mask[x->rn_offset]) &&
186 (x->rn_bmask & key[x->rn_offset]))
187 x = x->rn_right;
188 else
189 x = x->rn_left;
191 return (x);
195 * Compare the two netmasks and return true if netmask <m> is strictly more
196 * specific than netmask <n>.
198 * NOTE: Non-contiguous netmask is supported.
200 bool
201 rn_refines(const void *_m, const void *_n)
203 const u_char *m, *n, *lim, *lim2;
204 int longer;
205 bool equal;
207 m = _m;
208 n = _n;
209 lim2 = lim = n + clen(n);
210 longer = clen(n++) - clen(m++);
211 if (longer > 0)
212 lim -= longer;
214 equal = true;
215 while (n < lim) {
216 if (*n & ~(*m))
217 return (false);
218 if (*n++ != *m++)
219 equal = false;
221 while (n < lim2) {
222 if (*n++) /* n is longer and more specific */
223 return (false);
225 if (equal && (longer < 0)) {
226 lim2 = m - longer;
227 while (m < lim2) {
228 if (*m++) /* m is longer and more specific */
229 return (true);
233 return (!equal);
237 * Lookup the longest-prefix match of the key <key> in the tree <head>.
238 * The netmask <mask> can be NULL; if specified, the result must have the
239 * same mask, or NULL is returned.
241 struct radix_node *
242 rn_lookup(const void *_key, const void *_mask, struct radix_node_head *head)
244 struct radix_node *x;
245 const u_char *key, *mask, *netmask;
247 key = _key;
248 mask = _mask;
249 netmask = NULL;
251 if (mask != NULL) {
252 x = rn_addmask(mask, true, head->rnh_treetop->rn_offset,
253 head->rnh_maskhead);
254 if (x == NULL) /* mask doesn't exist in the mask tree */
255 return (NULL);
256 netmask = x->rn_key;
259 x = rn_match(key, head);
260 if (x != NULL && netmask != NULL) {
261 /* check the duped-key chain for different masks */
262 while (x != NULL && x->rn_mask != netmask)
263 x = x->rn_dupedkey;
266 return (x);
270 * Check whether the key <key> matches the (key, mask) of the given
271 * radix node <leaf>. The <skip> parameter gives the number of bytes
272 * to skip for the keys and mask.
274 static bool
275 rn_satisfies_leaf(const void *key, struct radix_node *leaf, int skip)
277 const u_char *cp, *cp2, *cp3, *cplim;
278 int length;
280 cp = key;
281 cp2 = leaf->rn_key;
282 cp3 = leaf->rn_mask;
284 length = MIN(clen(cp), clen(cp2));
285 if (cp3 == NULL)
286 cp3 = rn_ones;
287 else
288 length = MIN(length, clen(cp3));
290 cplim = cp + length;
291 cp2 += skip;
292 cp3 += skip;
293 for (cp += skip; cp < cplim; cp++, cp2++, cp3++) {
294 if ((*cp ^ *cp2) & *cp3)
295 return (false);
298 return (true);
303 * Search for the longest-prefix match of the key <key>.
305 struct radix_node *
306 rn_match(const void *key, struct radix_node_head *head)
308 struct radix_node *top, *t, *saved_t;
309 const u_char *cp, *cp2, *cplim;
310 int klen, matched_off, test, bit, rn_bit;
312 top = head->rnh_treetop;
314 t = rn_search(key, top);
316 * See if we match exactly as a host destination, or at least learn
317 * how many bits match, for normal mask finesse.
319 * It doesn't hurt to limit how many bytes to check to the length of
320 * the mask, since if it matches we had a genuine match and the leaf
321 * we have is the most specific one anyway; if it didn't match with
322 * a shorter length it would fail with a long one. This wins big
323 * for class B&C netmasks which are probably the most common case...
325 if (t->rn_mask != NULL)
326 klen = clen(t->rn_mask);
327 else
328 klen = clen(key);
329 cplim = (const u_char *)key + klen;
330 cp = (const u_char *)key + top->rn_offset;
331 cp2 = t->rn_key + top->rn_offset;
332 for (; cp < cplim; cp++, cp2++) {
333 if (*cp != *cp2)
334 goto on1;
338 * This extra grot is in case we are explicitly asked
339 * to look up the default (i.e., all-zero address). Ugh!
341 * Never return the root node itself, it seems to cause a
342 * lot of confusion.
344 if (t->rn_flags & RNF_ROOT)
345 t = t->rn_dupedkey;
346 return (t);
348 on1:
349 /* Find the first bit that differs. */
350 test = (*cp ^ *cp2) & 0xff;
351 for (bit = 7; (test >>= 1) > 0;)
352 bit--;
353 matched_off = cp - (const u_char *)key;
354 bit += matched_off << 3;
355 rn_bit = -1 - bit;
358 * Even if we don't match exactly as a host, we may match if the leaf
359 * we wound up at has routes to networks. Check those routes.
361 saved_t = t;
362 /* Skip the host route, which might only appear at the first. */
363 if (t->rn_mask == NULL)
364 t = t->rn_dupedkey;
365 for (; t != NULL; t = t->rn_dupedkey) {
366 if (t->rn_flags & RNF_NORMAL) {
367 if (rn_bit <= t->rn_bit)
368 return (t);
369 } else if (rn_satisfies_leaf(key, t, matched_off))
370 return (t);
372 t = saved_t;
375 * Start searching up the tree for network routes.
377 do {
378 struct radix_node *x;
379 struct radix_mask *m;
380 int skip;
382 t = t->rn_parent;
384 * If non-contiguous masks ever become important
385 * we can restore the masking and open coding of
386 * the search and satisfaction test and put the
387 * calculation of "skip" back before the "do".
389 for (m = t->rn_mklist; m != NULL; m = m->rm_next) {
390 if (m->rm_flags & RNF_NORMAL) {
391 if (rn_bit <= m->rm_bit)
392 return (m->rm_leaf);
393 } else {
394 skip = MIN(t->rn_offset, matched_off);
395 x = rn_search_m(key, m->rm_mask, t);
396 while (x != NULL && x->rn_mask != m->rm_mask)
397 x = x->rn_dupedkey;
398 if (x != NULL &&
399 rn_satisfies_leaf(key, x, skip))
400 return (x);
403 } while (t != top);
405 return (NULL);
409 * Whenever to add a new leaf to the tree, another parent node is needed.
410 * So they are allocated as an array of two elements: the first element is
411 * the leaf, the second one is the parent node.
413 * This function initializes the given pair of nodes <nodes>, so that the
414 * leaf is the left child of the parent node.
416 static struct radix_node *
417 rn_newpair(const void *key, int bit, struct radix_node nodes[2])
419 struct radix_node *left, *parent;
421 left = &nodes[0];
422 parent = &nodes[1];
424 parent->rn_bit = bit;
425 parent->rn_bmask = 0x80 >> (bit & 0x7);
426 parent->rn_offset = bit >> 3;
427 parent->rn_left = left;
428 parent->rn_flags = RNF_ACTIVE;
429 parent->rn_mklist = NULL;
431 left->rn_bit = -1;
432 left->rn_key = key;
433 left->rn_parent = parent;
434 left->rn_flags = parent->rn_flags;
435 left->rn_mklist = NULL;
437 #ifdef RN_DEBUG
438 left->rn_info = rn_nodenum++;
439 parent->rn_info = rn_nodenum++;
440 left->rn_twin = parent;
441 left->rn_ybro = rn_clist;
442 rn_clist = left;
443 #endif
445 return (parent);
449 * Insert the key <key> to the radix tree <head>.
451 * If the key already exists, then set <dupentry> to 'true' and return the
452 * node of the existing duped key. Otherwise, set <dupentry> to 'false',
453 * insert the key to the tree by making use of the given nodes <nodes>, and
454 * return the node of the inserted key (i.e., &nodes[0]).
456 static struct radix_node *
457 rn_insert(const void *key, struct radix_node_head *head, bool *dupentry,
458 struct radix_node nodes[2])
460 struct radix_node *top, *t, *tt;
461 const u_char *cp;
462 unsigned int bit;
463 int head_off, klen;
465 top = head->rnh_treetop;
466 head_off = top->rn_offset;
467 klen = clen(key);
468 cp = (const u_char *)key + head_off;
469 t = rn_search(key, top);
472 * Find the first bit where the key and t->rn_key differ.
475 const u_char *cp2 = t->rn_key + head_off;
476 const u_char *cplim = (const u_char *)key + klen;
477 int cmp_res;
479 while (cp < cplim) {
480 if (*cp2++ != *cp++)
481 goto on1;
484 *dupentry = true;
485 return (t);
487 on1:
488 *dupentry = false;
489 cmp_res = (cp[-1] ^ cp2[-1]) & 0xff;
490 for (bit = (cp - (const u_char *)key) << 3; cmp_res; bit--)
491 cmp_res >>= 1;
494 struct radix_node *p, *x = top;
496 cp = key;
497 do {
498 p = x;
499 if (cp[x->rn_offset] & x->rn_bmask)
500 x = x->rn_right;
501 else
502 x = x->rn_left;
503 } while (bit > (unsigned int)x->rn_bit);
504 /* shortcut of: x->rn_bit < bit && x->rn_bit >= 0 */
505 #ifdef RN_DEBUG
506 if (rn_debug) {
507 log(LOG_DEBUG, "%s: Going In:\n", __func__);
508 traverse(p);
510 #endif
511 t = rn_newpair(key, bit, nodes);
512 tt = t->rn_left;
513 if ((cp[p->rn_offset] & p->rn_bmask) == 0)
514 p->rn_left = t;
515 else
516 p->rn_right = t;
517 x->rn_parent = t;
518 t->rn_parent = p; /* frees x, p as temp vars below */
519 if ((cp[t->rn_offset] & t->rn_bmask) == 0) {
520 t->rn_right = x;
521 } else {
522 t->rn_right = tt;
523 t->rn_left = x;
525 #ifdef RN_DEBUG
526 if (rn_debug) {
527 log(LOG_DEBUG, "%s: Coming Out:\n", __func__);
528 traverse(p);
530 #endif
532 return (tt);
536 * Add the netmask <mask> to the mask tree <maskhead>. If <search> is
537 * 'true', then only check the existence of the given mask but don't
538 * actually add it.
540 * The <skip> parameter specifies the number of bytes to skip in <mask>
541 * to obtain the mask data. (NOTE: The length of a mask key doesn't
542 * count the trailing zero bytes.)
544 * Return a pointer to the mask node on success; otherwise NULL on error.
546 struct radix_node *
547 rn_addmask(const void *_mask, bool search, int skip,
548 struct radix_node_head *maskhead)
550 struct radix_node *x, *saved_x;
551 const u_char *mask, *cp, *cplim;
552 u_char *p, addmask_key[RN_MAXKEYLEN];
553 int bit, mlen;
554 bool maskduplicated, isnormal;
556 mask = _mask;
557 if ((mlen = clen(mask)) > RN_MAXKEYLEN)
558 mlen = RN_MAXKEYLEN;
559 if (skip == 0)
560 skip = 1;
561 if (mlen <= skip)
562 return (maskhead->rnh_nodes); /* all-zero key */
564 bzero(addmask_key, sizeof(addmask_key));
565 if (skip > 1)
566 bcopy(rn_ones + 1, addmask_key + 1, skip - 1);
567 bcopy(mask + skip, addmask_key + skip, mlen - skip);
568 /* Trim trailing zeroes. */
569 for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;)
570 cp--;
571 mlen = cp - addmask_key;
572 if (mlen <= skip)
573 return (maskhead->rnh_nodes); /* all-zero key */
575 *addmask_key = mlen;
576 x = rn_search(addmask_key, maskhead->rnh_treetop);
577 if (x->rn_key == NULL) {
578 kprintf("WARNING: radix_node->rn_key is NULL rn=%p\n", x);
579 print_backtrace(-1);
580 x = NULL;
581 } else if (bcmp(addmask_key, x->rn_key, mlen) != 0) {
582 x = NULL;
584 if (x != NULL || search)
585 return (x);
587 R_Malloc(x, struct radix_node *, RN_MAXKEYLEN + 2 * (sizeof *x));
588 if ((saved_x = x) == NULL)
589 return (NULL);
591 bzero(x, RN_MAXKEYLEN + 2 * (sizeof *x));
592 mask = p = (u_char *)(x + 2);
593 bcopy(addmask_key, p, mlen);
594 x = rn_insert(mask, maskhead, &maskduplicated, x);
595 if (maskduplicated) {
596 log(LOG_ERR, "%s: mask impossibly already in tree", __func__);
597 R_Free(saved_x);
598 return (x);
602 * Calculate the index of mask, and check for normalcy.
604 * First find the first byte with a 0 bit, then if there are more
605 * bits left (remember we already trimmed the trailing zeros),
606 * the pattern must be one of those in normal_chars[], or we have
607 * a non-contiguous mask.
609 bit = 0;
610 isnormal = true;
611 cplim = mask + mlen;
612 for (cp = mask + skip; cp < cplim; cp++) {
613 if (*cp != 0xff)
614 break;
616 if (cp != cplim) {
617 static const u_char normal_chars[] = {
618 0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff
620 u_char j;
622 for (j = 0x80; (j & *cp) != 0; j >>= 1)
623 bit++;
624 if (cp != (cplim - 1) || *cp != normal_chars[bit])
625 isnormal = false;
627 bit += (cp - mask) << 3;
628 x->rn_bit = -1 - bit;
629 if (isnormal)
630 x->rn_flags |= RNF_NORMAL;
631 return (x);
635 * Compare the two netmasks and return true if netmask <m> is more
636 * specific than netmask <n>.
638 * NOTE: arbitrary ordering for non-contiguous masks.
640 static bool
641 rn_lexobetter(const void *_m, const void *_n)
643 const u_char *m, *n, *lim;
645 m = _m;
646 n = _n;
648 if (clen(m) > clen(n)) {
649 /* not really, but need to check longer one first */
650 return (true);
653 if (clen(m) == clen(n)) {
654 for (lim = m + clen(m); m < lim; m++, n++) {
655 if (*m > *n)
656 return (true);
660 return (false);
663 static struct radix_mask *
664 rn_new_radix_mask(struct radix_node *node, struct radix_mask *nextmask)
666 struct radix_mask *m;
668 m = MKGet(&rn_mkfreelist[mycpuid]);
669 if (m == NULL) {
670 log(LOG_ERR, "Mask for route not entered\n");
671 return (NULL);
674 bzero(m, sizeof(*m));
675 m->rm_bit = node->rn_bit;
676 m->rm_flags = node->rn_flags;
677 if (m->rm_flags & RNF_NORMAL)
678 m->rm_leaf = node;
679 else
680 m->rm_mask = node->rn_mask;
681 m->rm_next = nextmask;
682 node->rn_mklist = m;
684 return (m);
688 * Add the route (key, mask) to the radix tree <head> using the given
689 * nodes <nodes>. The netmask <mask> is NULL for a host route.
691 * Return the node of the inserted route on success. Otherwise, return
692 * NULL if the following happened:
693 * - failed to add the netmask to the mask tree (e.g., out of memory)
694 * - the identical route already exists
696 * NOTE: The address <key> and netmask <mask> must be of the same data
697 * structure (e.g., both 'struct sockaddr_in') so that they have the
698 * same skip bytes and data length.
700 struct radix_node *
701 rn_addroute(const void *key, const void *mask,
702 struct radix_node_head *head, struct radix_node nodes[2])
704 struct radix_node *top, *t, *x, *tt, *saved_tt;
705 struct radix_mask *m, **mp;
706 int bit, bit_leaf;
707 bool keyduplicated;
708 const void *mmask;
710 top = head->rnh_treetop;
711 x = NULL;
712 bit = bit_leaf = 0;
715 * In dealing with non-contiguous masks, there may be
716 * many different routes which have the same mask.
717 * We will find it useful to have a unique pointer to
718 * the mask to speed avoiding duplicate references at
719 * nodes and possibly save time in calculating indices.
721 if (mask != NULL) {
722 if ((x = rn_addmask(mask, false, top->rn_offset,
723 head->rnh_maskhead)) == NULL)
724 return (NULL);
725 bit_leaf = x->rn_bit;
726 bit = -1 - x->rn_bit;
727 mask = x->rn_key;
730 * Deal with duplicated keys: attach node to previous instance
732 saved_tt = tt = rn_insert(key, head, &keyduplicated, nodes);
733 if (keyduplicated) {
735 * Deal with duplicated key: attach node to previous instance.
737 * The masks for a duplicated key are sorted in the same way
738 * as in a mask list -- most specific to least specific.
739 * This may require the unfortunate nuisance of relocating
740 * the head of the list.
742 * If the mask is NULL (i.e., a host route), it's placed at
743 * the beginning (i.e., list head).
745 * If the mask is not duplicated, we wouldn't find it among
746 * possible duplicate key entries anyway, so the test below
747 * doesn't hurt.
749 for (t = tt; tt != NULL; t = tt, tt = tt->rn_dupedkey) {
750 if (tt->rn_mask == mask)
751 return (NULL); /* same route already exists */
752 if (mask == NULL /* host route */ ||
753 (tt->rn_mask != NULL &&
754 ((bit_leaf < tt->rn_bit) /* index(mask) > node */
755 || rn_refines(mask, tt->rn_mask)
756 || rn_lexobetter(mask, tt->rn_mask))))
757 break;
759 if (tt == saved_tt) {
760 struct radix_node *xx = x;
761 /* link in at head of list */
762 (tt = nodes)->rn_dupedkey = t;
763 tt->rn_flags = t->rn_flags;
764 tt->rn_parent = x = t->rn_parent;
765 t->rn_parent = tt; /* parent */
766 if (x->rn_left == t)
767 x->rn_left = tt;
768 else
769 x->rn_right = tt;
770 saved_tt = tt; x = xx;
771 } else {
772 (tt = nodes)->rn_dupedkey = t->rn_dupedkey;
773 t->rn_dupedkey = tt;
774 tt->rn_parent = t; /* parent */
775 if (tt->rn_dupedkey != NULL) /* parent */
776 tt->rn_dupedkey->rn_parent = tt; /* parent */
778 tt->rn_key = key;
779 tt->rn_bit = -1;
780 tt->rn_flags = RNF_ACTIVE;
781 #ifdef RN_DEBUG
782 tt->rn_info = rn_nodenum++;
783 tt->rn_twin = tt + 1;
784 tt->rn_twin->rn_info = rn_nodenum++;
785 tt->rn_ybro = rn_clist;
786 rn_clist = tt;
787 #endif
791 * Put mask in tree.
793 if (mask != NULL) {
794 tt->rn_mask = mask;
795 tt->rn_bit = x->rn_bit;
796 tt->rn_flags |= x->rn_flags & RNF_NORMAL;
798 t = saved_tt->rn_parent;
799 if (keyduplicated)
800 goto on2;
801 bit_leaf = -1 - t->rn_bit;
802 if (t->rn_right == saved_tt)
803 x = t->rn_left;
804 else
805 x = t->rn_right;
806 /* Promote general routes from below */
807 if (x->rn_bit < 0) {
808 mp = &t->rn_mklist;
809 while (x != NULL) {
810 if (x->rn_mask != NULL &&
811 x->rn_bit >= bit_leaf &&
812 x->rn_mklist == NULL) {
813 *mp = m = rn_new_radix_mask(x, NULL);
814 if (m != NULL)
815 mp = &m->rm_next;
817 x = x->rn_dupedkey;
819 } else if (x->rn_mklist != NULL) {
820 /* Skip over masks whose index is > that of new node. */
821 for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_next) {
822 if (m->rm_bit >= bit_leaf)
823 break;
825 t->rn_mklist = m;
826 *mp = NULL;
829 on2:
830 if (mask == NULL || bit > t->rn_bit)
831 return (tt); /* can't lift at all */
834 * Add new route to the highest possible ancestor's list.
836 bit_leaf = tt->rn_bit;
837 do {
838 x = t;
839 t = t->rn_parent;
840 } while (bit <= t->rn_bit && x != top);
842 * Search through routes associated with node to
843 * insert new route according to index.
844 * Need same criteria as when sorting dupedkeys to avoid
845 * double loop on deletion.
847 for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_next) {
848 if (m->rm_bit < bit_leaf)
849 continue;
850 if (m->rm_bit > bit_leaf)
851 break;
852 if (m->rm_flags & RNF_NORMAL) {
853 mmask = m->rm_leaf->rn_mask;
854 if (tt->rn_flags & RNF_NORMAL) {
855 log(LOG_ERR,
856 "Non-unique normal route, mask not entered\n");
857 return (tt);
859 } else
860 mmask = m->rm_mask;
861 if (mmask == mask) {
862 m->rm_refs++;
863 tt->rn_mklist = m;
864 return (tt);
866 if (rn_refines(mask, mmask) || rn_lexobetter(mask, mmask))
867 break;
869 *mp = rn_new_radix_mask(tt, *mp);
870 return (tt);
873 struct radix_node *
874 rn_delete(const void *key, const void *mask, struct radix_node_head *head)
876 struct radix_node *top, *t, *p, *x, *tt, *saved_tt, *dupedkey;
877 struct radix_mask *m, *saved_m, **mp;
878 int bit, head_off, klen, cpu;
880 cpu = mycpuid;
881 x = head->rnh_treetop;
882 tt = rn_search(key, x);
883 head_off = x->rn_offset;
884 klen = clen(key);
885 saved_tt = tt;
886 top = x;
887 if (tt == NULL ||
888 bcmp((const u_char *)key + head_off, tt->rn_key + head_off,
889 klen - head_off) != 0)
890 return (NULL);
893 * Delete our route from mask lists.
895 if (mask != NULL) {
896 if ((x = rn_addmask(mask, true, head_off,
897 head->rnh_maskhead)) == NULL)
898 return (NULL);
899 mask = x->rn_key;
900 while (tt->rn_mask != mask) {
901 if ((tt = tt->rn_dupedkey) == NULL)
902 return (NULL);
905 if (tt->rn_mask == NULL || (saved_m = m = tt->rn_mklist) == NULL)
906 goto on1;
907 if (tt->rn_flags & RNF_NORMAL) {
908 if (m->rm_leaf != tt || m->rm_refs > 0) {
909 log(LOG_ERR, "rn_delete: inconsistent annotation\n");
910 return (NULL); /* dangling ref could cause disaster */
912 } else {
913 if (m->rm_mask != tt->rn_mask) {
914 log(LOG_ERR, "rn_delete: inconsistent annotation\n");
915 goto on1;
917 if (--m->rm_refs >= 0)
918 goto on1;
920 bit = -1 - tt->rn_bit;
921 t = saved_tt->rn_parent;
922 if (bit > t->rn_bit)
923 goto on1; /* Wasn't lifted at all */
925 do {
926 x = t;
927 t = t->rn_parent;
928 } while (bit <= t->rn_bit && x != top);
929 for (mp = &x->rn_mklist; (m = *mp) != NULL; mp = &m->rm_next)
930 if (m == saved_m) {
931 *mp = m->rm_next;
932 MKFree(&rn_mkfreelist[cpu], m);
933 break;
935 if (m == NULL) {
936 log(LOG_ERR, "rn_delete: couldn't find our annotation\n");
937 if (tt->rn_flags & RNF_NORMAL)
938 return (NULL); /* Dangling ref to us */
941 on1:
943 * Eliminate us from tree
945 if (tt->rn_flags & RNF_ROOT)
946 return (NULL);
948 #ifdef RN_DEBUG
949 /* Get us out of the creation list */
950 for (t = rn_clist; t != NULL && t->rn_ybro != tt; t = t->rn_ybro)
952 if (t != NULL)
953 t->rn_ybro = tt->rn_ybro;
954 #endif
956 t = tt->rn_parent;
957 dupedkey = saved_tt->rn_dupedkey;
958 if (dupedkey != NULL) {
960 * at this point, tt is the deletion target and saved_tt
961 * is the head of the dupekey chain
963 if (tt == saved_tt) {
964 /* remove from head of chain */
965 x = dupedkey;
966 x->rn_parent = t;
967 if (t->rn_left == tt)
968 t->rn_left = x;
969 else
970 t->rn_right = x;
971 } else {
972 /* find node in front of tt on the chain */
973 for (x = p = saved_tt; p != NULL && p->rn_dupedkey != tt;)
974 p = p->rn_dupedkey;
975 if (p) {
976 p->rn_dupedkey = tt->rn_dupedkey;
977 if (tt->rn_dupedkey) /* parent */
978 tt->rn_dupedkey->rn_parent = p;
979 /* parent */
980 } else {
981 log(LOG_ERR, "rn_delete: couldn't find us\n");
984 t = tt + 1;
985 if (t->rn_flags & RNF_ACTIVE) {
986 #ifndef RN_DEBUG
987 *++x = *t;
988 p = t->rn_parent;
989 #else
990 bit = t->rn_info;
991 *++x = *t;
992 t->rn_info = bit;
993 p = t->rn_parent;
994 #endif
995 if (p->rn_left == t)
996 p->rn_left = x;
997 else
998 p->rn_right = x;
999 x->rn_left->rn_parent = x;
1000 x->rn_right->rn_parent = x;
1002 goto out;
1004 if (t->rn_left == tt)
1005 x = t->rn_right;
1006 else
1007 x = t->rn_left;
1008 p = t->rn_parent;
1009 if (p->rn_right == t)
1010 p->rn_right = x;
1011 else
1012 p->rn_left = x;
1013 x->rn_parent = p;
1015 * Demote routes attached to us.
1017 if (t->rn_mklist != NULL) {
1018 if (x->rn_bit >= 0) {
1019 for (mp = &x->rn_mklist; (m = *mp) != NULL;)
1020 mp = &m->rm_next;
1021 *mp = t->rn_mklist;
1022 } else {
1024 * If there are any (key, mask) pairs in a sibling
1025 * duped-key chain, some subset will appear sorted
1026 * in the same order attached to our mklist.
1028 for (m = t->rn_mklist; m && x; x = x->rn_dupedkey)
1029 if (m == x->rn_mklist) {
1030 struct radix_mask *mm = m->rm_next;
1032 x->rn_mklist = NULL;
1033 if (--(m->rm_refs) < 0)
1034 MKFree(&rn_mkfreelist[cpu], m);
1035 m = mm;
1037 if (m) {
1038 log(LOG_ERR,
1039 "rn_delete: Orphaned Mask %p at %p\n",
1040 (void *)m, (void *)x);
1045 * We may be holding an active internal node in the tree.
1047 x = tt + 1;
1048 if (t != x) {
1049 #ifndef RN_DEBUG
1050 *t = *x;
1051 #else
1052 bit = t->rn_info;
1053 *t = *x;
1054 t->rn_info = bit;
1055 #endif
1056 t->rn_left->rn_parent = t;
1057 t->rn_right->rn_parent = t;
1058 p = x->rn_parent;
1059 if (p->rn_left == x)
1060 p->rn_left = t;
1061 else
1062 p->rn_right = t;
1065 out:
1066 tt[0].rn_flags &= ~RNF_ACTIVE;
1067 tt[1].rn_flags &= ~RNF_ACTIVE;
1068 return (tt);
1072 * This is the same as rn_walktree() except for the parameters and the
1073 * exit.
1075 static int
1076 rn_walktree_from(struct radix_node_head *h, const void *_addr,
1077 const void *_mask, walktree_f_t *f, void *w)
1079 struct radix_node *rn, *base, *next, *last;
1080 const u_char *addr, *mask;
1081 bool stopping;
1082 int lastb, error;
1084 addr = _addr;
1085 mask = _mask;
1086 last = NULL;
1087 stopping = false;
1090 * rn_search_m() is sort-of-open-coded here. We cannot use that
1091 * function because we need to keep track of the last node seen.
1093 /* kprintf("about to search\n"); */
1094 for (rn = h->rnh_treetop; rn->rn_bit >= 0; ) {
1095 last = rn;
1096 /* kprintf("rn_bit %d, rn_bmask %x, mask[rn_offset] %x\n",
1097 rn->rn_bit, rn->rn_bmask, mask[rn->rn_offset]); */
1098 if (!(rn->rn_bmask & mask[rn->rn_offset])) {
1099 break;
1101 if (rn->rn_bmask & addr[rn->rn_offset]) {
1102 rn = rn->rn_right;
1103 } else {
1104 rn = rn->rn_left;
1107 /* kprintf("done searching\n"); */
1110 * Two cases: either we stepped off the end of our mask,
1111 * in which case last == rn, or we reached a leaf, in which
1112 * case we want to start from the last node we looked at.
1113 * Either way, last is the node we want to start from.
1115 rn = last;
1116 lastb = rn->rn_bit;
1118 /* kprintf("rn %p, lastb %d\n", rn, lastb);*/
1121 * This gets complicated because we may delete the node
1122 * while applying the function f to it, so we need to calculate
1123 * the successor node in advance.
1125 while (rn->rn_bit >= 0)
1126 rn = rn->rn_left;
1128 while (!stopping) {
1129 /* kprintf("node %p (%d)\n", rn, rn->rn_bit); */
1130 base = rn;
1131 /* If at right child go back up, otherwise, go right */
1132 while (rn->rn_parent->rn_right == rn &&
1133 !(rn->rn_flags & RNF_ROOT)) {
1134 rn = rn->rn_parent;
1136 /* if went up beyond last, stop */
1137 if (rn->rn_bit < lastb) {
1138 stopping = true;
1139 /* kprintf("up too far\n"); */
1143 /* Find the next *leaf* since next node might vanish, too */
1144 for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;)
1145 rn = rn->rn_left;
1146 next = rn;
1147 /* Process leaves */
1148 while ((rn = base) != NULL) {
1149 base = rn->rn_dupedkey;
1150 /* kprintf("leaf %p\n", rn); */
1151 if (!(rn->rn_flags & RNF_ROOT) && (error = (*f)(rn, w)))
1152 return (error);
1154 rn = next;
1156 if (rn->rn_flags & RNF_ROOT) {
1157 /* kprintf("root, stopping"); */
1158 stopping = true;
1162 return 0;
1165 static int
1166 rn_walktree_at(struct radix_node_head *h, const void *addr, const void *mask,
1167 walktree_f_t *f, void *w)
1169 struct radix_node *rn, *base, *next;
1170 int error;
1172 rn = h->rnh_treetop;
1175 * This gets complicated because we may delete the node
1176 * while applying the function f to it, so we need to calculate
1177 * the successor node in advance.
1179 if (addr == NULL) {
1180 /* First time through node, go left */
1181 while (rn->rn_bit >= 0)
1182 rn = rn->rn_left;
1183 } else {
1184 if (mask != NULL)
1185 rn = rn_search_m(addr, mask, rn);
1186 else
1187 rn = rn_search(addr, rn);
1189 for (;;) {
1190 base = rn;
1191 /* If at right child go back up, otherwise, go right */
1192 while (rn->rn_parent->rn_right == rn &&
1193 !(rn->rn_flags & RNF_ROOT))
1194 rn = rn->rn_parent;
1195 /* Find the next *leaf* since next node might vanish, too */
1196 for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;)
1197 rn = rn->rn_left;
1198 next = rn;
1199 /* Process leaves */
1200 while ((rn = base)) {
1201 base = rn->rn_dupedkey;
1202 if (!(rn->rn_flags & RNF_ROOT) && (error = (*f)(rn, w)))
1203 return (error);
1205 rn = next;
1206 if (rn->rn_flags & RNF_ROOT)
1207 return (0);
1209 /* NOTREACHED */
1212 static int
1213 rn_walktree(struct radix_node_head *h, walktree_f_t *f, void *w)
1215 return rn_walktree_at(h, NULL, NULL, f, w);
1219 * Allocate and initialize an empty radix tree at <head>.
1221 * The created radix_node_head embeds 3 nodes in the order of
1222 * {left,root,right}. These nodes are flagged with RNF_ROOT and thus
1223 * cannot be freed. The left and right leaves are initialized with
1224 * all-zero and all-one keys, respectively, and with the significant
1225 * byte starting at <off_bytes>.
1227 * The <maskhead> refers to another radix tree for storing the network
1228 * masks (so aka mask tree). It is also created by this function with
1229 * <maskhead>=NULL; the <off_bytes> parameter is ignored and auto set
1230 * to be zero (0). The reason of requiring <off_bytes> be zero is that
1231 * a mask tree can be shared with multiple radix trees of different
1232 * address families that have different offset bytes; e.g.,
1233 * offsetof(struct sockaddr_in, sin_addr) !=
1234 * offsetof(struct sockaddr_in6, sin6_addr).
1236 * Return 1 on success, 0 on error.
1239 rn_inithead(struct radix_node_head **head, struct radix_node_head *maskhead,
1240 int off_bytes)
1242 struct radix_node_head *rnh;
1243 struct radix_node *root, *left, *right;
1245 if (*head != NULL) /* already initialized */
1246 return (1);
1248 R_Malloc(rnh, struct radix_node_head *, sizeof *rnh);
1249 if (rnh == NULL)
1250 return (0);
1252 if (maskhead == NULL) /* mask tree initialization */
1253 off_bytes = 0;
1254 if (off_bytes >= RN_MAXKEYLEN) /* prevent possible misuse */
1255 panic("%s: invalid off_bytes=%d", __func__, off_bytes);
1257 bzero(rnh, sizeof *rnh);
1258 *head = rnh;
1260 root = rn_newpair(rn_zeros, off_bytes * NBBY, rnh->rnh_nodes);
1261 right = &rnh->rnh_nodes[2];
1262 root->rn_parent = root;
1263 root->rn_flags = RNF_ROOT | RNF_ACTIVE;
1264 root->rn_right = right;
1266 left = root->rn_left;
1267 left->rn_bit = -1 - off_bytes * NBBY;
1268 left->rn_flags = root->rn_flags;
1270 *right = *left;
1271 right->rn_key = rn_ones;
1273 rnh->rnh_treetop = root;
1274 rnh->rnh_maskhead = maskhead;
1276 rnh->rnh_addaddr = rn_addroute;
1277 rnh->rnh_deladdr = rn_delete;
1278 rnh->rnh_matchaddr = rn_match;
1279 rnh->rnh_lookup = rn_lookup;
1280 rnh->rnh_walktree = rn_walktree;
1281 rnh->rnh_walktree_from = rn_walktree_from;
1282 rnh->rnh_walktree_at = rn_walktree_at;
1284 return (1);
1288 * Callback function to be used in rn_flush() to empty a mask tree.
1290 void
1291 rn_freemask(struct radix_node *rn)
1293 if (rn->rn_mask != NULL)
1294 panic("%s: not a mask node", __func__);
1296 R_Free(rn);
1299 struct rn_flush_ctx {
1300 struct radix_node_head *head;
1301 freenode_f_t *f;
1304 static int
1305 rn_flush_walker(struct radix_node *rn, void *arg)
1307 struct rn_flush_ctx *ctx = arg;
1308 struct radix_node *node;
1310 node = ctx->head->rnh_deladdr(rn->rn_key, rn->rn_mask, ctx->head);
1311 if (node != rn) {
1312 panic("%s: deleted wrong node: %p, want: %p",
1313 __func__, node, rn);
1315 if (ctx->f)
1316 ctx->f(rn);
1318 return 0;
1321 #define IS_EMPTY(head) \
1322 (((head)->rnh_treetop == &(head)->rnh_nodes[1]) && \
1323 ((head)->rnh_treetop->rn_left == &(head)->rnh_nodes[0]) && \
1324 ((head)->rnh_treetop->rn_right == &(head)->rnh_nodes[2]))
1327 * Flush all nodes in the radix tree at <head>.
1328 * If the callback function <f> is specified, it is called against every
1329 * flushed node to allow the caller to do extra cleanups.
1331 void
1332 rn_flush(struct radix_node_head *head, freenode_f_t *f)
1334 struct rn_flush_ctx ctx;
1336 if (f == rn_freemask && head->rnh_maskhead != NULL)
1337 panic("%s: rn_freemask() used with non-mask tree", __func__);
1339 ctx.head = head;
1340 ctx.f = f;
1341 head->rnh_walktree(head, rn_flush_walker, &ctx);
1343 if (!IS_EMPTY(head))
1344 panic("%s: failed to flush all nodes", __func__);
1348 * Free an empty radix tree at <head>.
1350 * NOTE: The radix tree must be first emptied by rn_flush().
1352 void
1353 rn_freehead(struct radix_node_head *head)
1355 if (!IS_EMPTY(head))
1356 panic("%s: radix tree not empty", __func__);
1358 R_Free(head);
1361 #ifdef _KERNEL
1363 static void
1364 rn_init_handler(netmsg_t msg)
1366 int cpu = mycpuid;
1368 ASSERT_NETISR_NCPUS(cpu);
1369 if (rn_inithead(&mask_rnheads[cpu], NULL, 0) == 0)
1370 panic("%s: failed to create mask tree", __func__);
1372 netisr_forwardmsg(&msg->base, cpu + 1);
1375 void
1376 rn_init(void)
1378 struct netmsg_base msg;
1379 struct domain *dom;
1381 SLIST_FOREACH(dom, &domains, dom_next) {
1382 if (dom->dom_maxrtkey > RN_MAXKEYLEN) {
1383 panic("domain %s maxkey too big %d/%d",
1384 dom->dom_name, dom->dom_maxrtkey, RN_MAXKEYLEN);
1388 netmsg_init(&msg, NULL, &curthread->td_msgport, 0, rn_init_handler);
1389 netisr_domsg_global(&msg);
1392 struct radix_node_head *
1393 rn_cpumaskhead(int cpu)
1395 ASSERT_NETISR_NCPUS(cpu);
1396 KKASSERT(mask_rnheads[cpu] != NULL);
1397 return mask_rnheads[cpu];
1400 #else /* !_KERNEL */
1402 void
1403 rn_init(void)
1405 if (rn_inithead(&mask_rnheads[0], NULL, 0) == 0)
1406 panic("%s: failed to create mask tree", __func__);
1409 struct radix_node_head *
1410 rn_cpumaskhead(int cpu __unused)
1412 return mask_rnheads[0];
1415 #endif /* _KERNEL */