Install gettext-0.18.1.1.tar.gz
[msysgit.git] / mingw / share / gettext / intl / tsearch.c
blob5f2da1b44c3d26ee3087221e26196d2ecb77ea11
1 /* Copyright (C) 1995, 1996, 1997, 2000, 2006 Free Software Foundation, Inc.
2 Contributed by Bernd Schmidt <crux@Pool.Informatik.RWTH-Aachen.DE>, 1997.
4 NOTE: The canonical source of this file is maintained with the GNU C
5 Library. Bugs can be reported to bug-glibc@gnu.org.
7 This program is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Library General Public License as published
9 by the Free Software Foundation; either version 2, or (at your option)
10 any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Library General Public License for more details.
17 You should have received a copy of the GNU Library General Public
18 License along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
20 USA. */
22 /* Tree search for red/black trees.
23 The algorithm for adding nodes is taken from one of the many "Algorithms"
24 books by Robert Sedgewick, although the implementation differs.
25 The algorithm for deleting nodes can probably be found in a book named
26 "Introduction to Algorithms" by Cormen/Leiserson/Rivest. At least that's
27 the book that my professor took most algorithms from during the "Data
28 Structures" course...
30 Totally public domain. */
32 /* Red/black trees are binary trees in which the edges are colored either red
33 or black. They have the following properties:
34 1. The number of black edges on every path from the root to a leaf is
35 constant.
36 2. No two red edges are adjacent.
37 Therefore there is an upper bound on the length of every path, it's
38 O(log n) where n is the number of nodes in the tree. No path can be longer
39 than 1+2*P where P is the length of the shortest path in the tree.
40 Useful for the implementation:
41 3. If one of the children of a node is NULL, then the other one is red
42 (if it exists).
44 In the implementation, not the edges are colored, but the nodes. The color
45 interpreted as the color of the edge leading to this node. The color is
46 meaningless for the root node, but we color the root node black for
47 convenience. All added nodes are red initially.
49 Adding to a red/black tree is rather easy. The right place is searched
50 with a usual binary tree search. Additionally, whenever a node N is
51 reached that has two red successors, the successors are colored black and
52 the node itself colored red. This moves red edges up the tree where they
53 pose less of a problem once we get to really insert the new node. Changing
54 N's color to red may violate rule 2, however, so rotations may become
55 necessary to restore the invariants. Adding a new red leaf may violate
56 the same rule, so afterwards an additional check is run and the tree
57 possibly rotated.
59 Deleting is hairy. There are mainly two nodes involved: the node to be
60 deleted (n1), and another node that is to be unchained from the tree (n2).
61 If n1 has a successor (the node with a smallest key that is larger than
62 n1), then the successor becomes n2 and its contents are copied into n1,
63 otherwise n1 becomes n2.
64 Unchaining a node may violate rule 1: if n2 is black, one subtree is
65 missing one black edge afterwards. The algorithm must try to move this
66 error upwards towards the root, so that the subtree that does not have
67 enough black edges becomes the whole tree. Once that happens, the error
68 has disappeared. It may not be necessary to go all the way up, since it
69 is possible that rotations and recoloring can fix the error before that.
71 Although the deletion algorithm must walk upwards through the tree, we
72 do not store parent pointers in the nodes. Instead, delete allocates a
73 small array of parent pointers and fills it while descending the tree.
74 Since we know that the length of a path is O(log n), where n is the number
75 of nodes, this is likely to use less memory. */
77 /* Tree rotations look like this:
78 A C
79 / \ / \
80 B C A G
81 / \ / \ --> / \
82 D E F G B F
83 / \
84 D E
86 In this case, A has been rotated left. This preserves the ordering of the
87 binary tree. */
89 #include <config.h>
91 /* Specification. */
92 #ifdef IN_LIBINTL
93 # include "tsearch.h"
94 #else
95 # include <search.h>
96 #endif
98 #include <stdlib.h>
100 typedef int (*__compar_fn_t) (const void *, const void *);
101 typedef void (*__action_fn_t) (const void *, VISIT, int);
103 #ifndef weak_alias
104 # define __tsearch tsearch
105 # define __tfind tfind
106 # define __tdelete tdelete
107 # define __twalk twalk
108 #endif
110 #ifndef internal_function
111 /* Inside GNU libc we mark some function in a special way. In other
112 environments simply ignore the marking. */
113 # define internal_function
114 #endif
116 typedef struct node_t
118 /* Callers expect this to be the first element in the structure - do not
119 move! */
120 const void *key;
121 struct node_t *left;
122 struct node_t *right;
123 unsigned int red:1;
124 } *node;
125 typedef const struct node_t *const_node;
127 #undef DEBUGGING
129 #ifdef DEBUGGING
131 /* Routines to check tree invariants. */
133 #include <assert.h>
135 #define CHECK_TREE(a) check_tree(a)
137 static void
138 check_tree_recurse (node p, int d_sofar, int d_total)
140 if (p == NULL)
142 assert (d_sofar == d_total);
143 return;
146 check_tree_recurse (p->left, d_sofar + (p->left && !p->left->red), d_total);
147 check_tree_recurse (p->right, d_sofar + (p->right && !p->right->red), d_total);
148 if (p->left)
149 assert (!(p->left->red && p->red));
150 if (p->right)
151 assert (!(p->right->red && p->red));
154 static void
155 check_tree (node root)
157 int cnt = 0;
158 node p;
159 if (root == NULL)
160 return;
161 root->red = 0;
162 for(p = root->left; p; p = p->left)
163 cnt += !p->red;
164 check_tree_recurse (root, 0, cnt);
168 #else
170 #define CHECK_TREE(a)
172 #endif
174 /* Possibly "split" a node with two red successors, and/or fix up two red
175 edges in a row. ROOTP is a pointer to the lowest node we visited, PARENTP
176 and GPARENTP pointers to its parent/grandparent. P_R and GP_R contain the
177 comparison values that determined which way was taken in the tree to reach
178 ROOTP. MODE is 1 if we need not do the split, but must check for two red
179 edges between GPARENTP and ROOTP. */
180 static void
181 maybe_split_for_insert (node *rootp, node *parentp, node *gparentp,
182 int p_r, int gp_r, int mode)
184 node root = *rootp;
185 node *rp, *lp;
186 rp = &(*rootp)->right;
187 lp = &(*rootp)->left;
189 /* See if we have to split this node (both successors red). */
190 if (mode == 1
191 || ((*rp) != NULL && (*lp) != NULL && (*rp)->red && (*lp)->red))
193 /* This node becomes red, its successors black. */
194 root->red = 1;
195 if (*rp)
196 (*rp)->red = 0;
197 if (*lp)
198 (*lp)->red = 0;
200 /* If the parent of this node is also red, we have to do
201 rotations. */
202 if (parentp != NULL && (*parentp)->red)
204 node gp = *gparentp;
205 node p = *parentp;
206 /* There are two main cases:
207 1. The edge types (left or right) of the two red edges differ.
208 2. Both red edges are of the same type.
209 There exist two symmetries of each case, so there is a total of
210 4 cases. */
211 if ((p_r > 0) != (gp_r > 0))
213 /* Put the child at the top of the tree, with its parent
214 and grandparent as successors. */
215 p->red = 1;
216 gp->red = 1;
217 root->red = 0;
218 if (p_r < 0)
220 /* Child is left of parent. */
221 p->left = *rp;
222 *rp = p;
223 gp->right = *lp;
224 *lp = gp;
226 else
228 /* Child is right of parent. */
229 p->right = *lp;
230 *lp = p;
231 gp->left = *rp;
232 *rp = gp;
234 *gparentp = root;
236 else
238 *gparentp = *parentp;
239 /* Parent becomes the top of the tree, grandparent and
240 child are its successors. */
241 p->red = 0;
242 gp->red = 1;
243 if (p_r < 0)
245 /* Left edges. */
246 gp->left = p->right;
247 p->right = gp;
249 else
251 /* Right edges. */
252 gp->right = p->left;
253 p->left = gp;
260 /* Find or insert datum into search tree.
261 KEY is the key to be located, ROOTP is the address of tree root,
262 COMPAR the ordering function. */
263 void *
264 __tsearch (const void *key, void **vrootp, __compar_fn_t compar)
266 node q;
267 node *parentp = NULL, *gparentp = NULL;
268 node *rootp = (node *) vrootp;
269 node *nextp;
270 int r = 0, p_r = 0, gp_r = 0; /* No they might not, Mr Compiler. */
272 if (rootp == NULL)
273 return NULL;
275 /* This saves some additional tests below. */
276 if (*rootp != NULL)
277 (*rootp)->red = 0;
279 CHECK_TREE (*rootp);
281 nextp = rootp;
282 while (*nextp != NULL)
284 node root = *rootp;
285 r = (*compar) (key, root->key);
286 if (r == 0)
287 return root;
289 maybe_split_for_insert (rootp, parentp, gparentp, p_r, gp_r, 0);
290 /* If that did any rotations, parentp and gparentp are now garbage.
291 That doesn't matter, because the values they contain are never
292 used again in that case. */
294 nextp = r < 0 ? &root->left : &root->right;
295 if (*nextp == NULL)
296 break;
298 gparentp = parentp;
299 parentp = rootp;
300 rootp = nextp;
302 gp_r = p_r;
303 p_r = r;
306 q = (struct node_t *) malloc (sizeof (struct node_t));
307 if (q != NULL)
309 *nextp = q; /* link new node to old */
310 q->key = key; /* initialize new node */
311 q->red = 1;
312 q->left = q->right = NULL;
314 if (nextp != rootp)
315 /* There may be two red edges in a row now, which we must avoid by
316 rotating the tree. */
317 maybe_split_for_insert (nextp, rootp, parentp, r, p_r, 1);
320 return q;
322 #ifdef weak_alias
323 weak_alias (__tsearch, tsearch)
324 #endif
327 /* Find datum in search tree.
328 KEY is the key to be located, ROOTP is the address of tree root,
329 COMPAR the ordering function. */
330 void *
331 __tfind (key, vrootp, compar)
332 const void *key;
333 void *const *vrootp;
334 __compar_fn_t compar;
336 node *rootp = (node *) vrootp;
338 if (rootp == NULL)
339 return NULL;
341 CHECK_TREE (*rootp);
343 while (*rootp != NULL)
345 node root = *rootp;
346 int r;
348 r = (*compar) (key, root->key);
349 if (r == 0)
350 return root;
352 rootp = r < 0 ? &root->left : &root->right;
354 return NULL;
356 #ifdef weak_alias
357 weak_alias (__tfind, tfind)
358 #endif
361 /* Delete node with given key.
362 KEY is the key to be deleted, ROOTP is the address of the root of tree,
363 COMPAR the comparison function. */
364 void *
365 __tdelete (const void *key, void **vrootp, __compar_fn_t compar)
367 node p, q, r, retval;
368 int cmp;
369 node *rootp = (node *) vrootp;
370 node root, unchained;
371 /* Stack of nodes so we remember the parents without recursion. It's
372 _very_ unlikely that there are paths longer than 40 nodes. The tree
373 would need to have around 250.000 nodes. */
374 int stacksize = 100;
375 int sp = 0;
376 node *nodestack[100];
378 if (rootp == NULL)
379 return NULL;
380 p = *rootp;
381 if (p == NULL)
382 return NULL;
384 CHECK_TREE (p);
386 while ((cmp = (*compar) (key, (*rootp)->key)) != 0)
388 if (sp == stacksize)
389 abort ();
391 nodestack[sp++] = rootp;
392 p = *rootp;
393 rootp = ((cmp < 0)
394 ? &(*rootp)->left
395 : &(*rootp)->right);
396 if (*rootp == NULL)
397 return NULL;
400 /* This is bogus if the node to be deleted is the root... this routine
401 really should return an integer with 0 for success, -1 for failure
402 and errno = ESRCH or something. */
403 retval = p;
405 /* We don't unchain the node we want to delete. Instead, we overwrite
406 it with its successor and unchain the successor. If there is no
407 successor, we really unchain the node to be deleted. */
409 root = *rootp;
411 r = root->right;
412 q = root->left;
414 if (q == NULL || r == NULL)
415 unchained = root;
416 else
418 node *parent = rootp, *up = &root->right;
419 for (;;)
421 if (sp == stacksize)
422 abort ();
423 nodestack[sp++] = parent;
424 parent = up;
425 if ((*up)->left == NULL)
426 break;
427 up = &(*up)->left;
429 unchained = *up;
432 /* We know that either the left or right successor of UNCHAINED is NULL.
433 R becomes the other one, it is chained into the parent of UNCHAINED. */
434 r = unchained->left;
435 if (r == NULL)
436 r = unchained->right;
437 if (sp == 0)
438 *rootp = r;
439 else
441 q = *nodestack[sp-1];
442 if (unchained == q->right)
443 q->right = r;
444 else
445 q->left = r;
448 if (unchained != root)
449 root->key = unchained->key;
450 if (!unchained->red)
452 /* Now we lost a black edge, which means that the number of black
453 edges on every path is no longer constant. We must balance the
454 tree. */
455 /* NODESTACK now contains all parents of R. R is likely to be NULL
456 in the first iteration. */
457 /* NULL nodes are considered black throughout - this is necessary for
458 correctness. */
459 while (sp > 0 && (r == NULL || !r->red))
461 node *pp = nodestack[sp - 1];
462 p = *pp;
463 /* Two symmetric cases. */
464 if (r == p->left)
466 /* Q is R's brother, P is R's parent. The subtree with root
467 R has one black edge less than the subtree with root Q. */
468 q = p->right;
469 if (q->red)
471 /* If Q is red, we know that P is black. We rotate P left
472 so that Q becomes the top node in the tree, with P below
473 it. P is colored red, Q is colored black.
474 This action does not change the black edge count for any
475 leaf in the tree, but we will be able to recognize one
476 of the following situations, which all require that Q
477 is black. */
478 q->red = 0;
479 p->red = 1;
480 /* Left rotate p. */
481 p->right = q->left;
482 q->left = p;
483 *pp = q;
484 /* Make sure pp is right if the case below tries to use
485 it. */
486 nodestack[sp++] = pp = &q->left;
487 q = p->right;
489 /* We know that Q can't be NULL here. We also know that Q is
490 black. */
491 if ((q->left == NULL || !q->left->red)
492 && (q->right == NULL || !q->right->red))
494 /* Q has two black successors. We can simply color Q red.
495 The whole subtree with root P is now missing one black
496 edge. Note that this action can temporarily make the
497 tree invalid (if P is red). But we will exit the loop
498 in that case and set P black, which both makes the tree
499 valid and also makes the black edge count come out
500 right. If P is black, we are at least one step closer
501 to the root and we'll try again the next iteration. */
502 q->red = 1;
503 r = p;
505 else
507 /* Q is black, one of Q's successors is red. We can
508 repair the tree with one operation and will exit the
509 loop afterwards. */
510 if (q->right == NULL || !q->right->red)
512 /* The left one is red. We perform the same action as
513 in maybe_split_for_insert where two red edges are
514 adjacent but point in different directions:
515 Q's left successor (let's call it Q2) becomes the
516 top of the subtree we are looking at, its parent (Q)
517 and grandparent (P) become its successors. The former
518 successors of Q2 are placed below P and Q.
519 P becomes black, and Q2 gets the color that P had.
520 This changes the black edge count only for node R and
521 its successors. */
522 node q2 = q->left;
523 q2->red = p->red;
524 p->right = q2->left;
525 q->left = q2->right;
526 q2->right = q;
527 q2->left = p;
528 *pp = q2;
529 p->red = 0;
531 else
533 /* It's the right one. Rotate P left. P becomes black,
534 and Q gets the color that P had. Q's right successor
535 also becomes black. This changes the black edge
536 count only for node R and its successors. */
537 q->red = p->red;
538 p->red = 0;
540 q->right->red = 0;
542 /* left rotate p */
543 p->right = q->left;
544 q->left = p;
545 *pp = q;
548 /* We're done. */
549 sp = 1;
550 r = NULL;
553 else
555 /* Comments: see above. */
556 q = p->left;
557 if (q->red)
559 q->red = 0;
560 p->red = 1;
561 p->left = q->right;
562 q->right = p;
563 *pp = q;
564 nodestack[sp++] = pp = &q->right;
565 q = p->left;
567 if ((q->right == NULL || !q->right->red)
568 && (q->left == NULL || !q->left->red))
570 q->red = 1;
571 r = p;
573 else
575 if (q->left == NULL || !q->left->red)
577 node q2 = q->right;
578 q2->red = p->red;
579 p->left = q2->right;
580 q->right = q2->left;
581 q2->left = q;
582 q2->right = p;
583 *pp = q2;
584 p->red = 0;
586 else
588 q->red = p->red;
589 p->red = 0;
590 q->left->red = 0;
591 p->left = q->right;
592 q->right = p;
593 *pp = q;
595 sp = 1;
596 r = NULL;
599 --sp;
601 if (r != NULL)
602 r->red = 0;
605 free (unchained);
606 return retval;
608 #ifdef weak_alias
609 weak_alias (__tdelete, tdelete)
610 #endif
613 /* Walk the nodes of a tree.
614 ROOT is the root of the tree to be walked, ACTION the function to be
615 called at each node. LEVEL is the level of ROOT in the whole tree. */
616 static void
617 internal_function
618 trecurse (const void *vroot, __action_fn_t action, int level)
620 const_node root = (const_node) vroot;
622 if (root->left == NULL && root->right == NULL)
623 (*action) (root, leaf, level);
624 else
626 (*action) (root, preorder, level);
627 if (root->left != NULL)
628 trecurse (root->left, action, level + 1);
629 (*action) (root, postorder, level);
630 if (root->right != NULL)
631 trecurse (root->right, action, level + 1);
632 (*action) (root, endorder, level);
637 /* Walk the nodes of a tree.
638 ROOT is the root of the tree to be walked, ACTION the function to be
639 called at each node. */
640 void
641 __twalk (const void *vroot, __action_fn_t action)
643 const_node root = (const_node) vroot;
645 CHECK_TREE (root);
647 if (root != NULL && action != NULL)
648 trecurse (root, action, 0);
650 #ifdef weak_alias
651 weak_alias (__twalk, twalk)
652 #endif
655 #ifdef _LIBC
657 /* The standardized functions miss an important functionality: the
658 tree cannot be removed easily. We provide a function to do this. */
659 static void
660 internal_function
661 tdestroy_recurse (node root, __free_fn_t freefct)
663 if (root->left != NULL)
664 tdestroy_recurse (root->left, freefct);
665 if (root->right != NULL)
666 tdestroy_recurse (root->right, freefct);
667 (*freefct) ((void *) root->key);
668 /* Free the node itself. */
669 free (root);
672 void
673 __tdestroy (void *vroot, __free_fn_t freefct)
675 node root = (node) vroot;
677 CHECK_TREE (root);
679 if (root != NULL)
680 tdestroy_recurse (root, freefct);
682 weak_alias (__tdestroy, tdestroy)
684 #endif /* _LIBC */