2 * tree234.c: reasonably generic counted 2-3-4 tree routines.
\r
4 * This file is copyright 1999-2001 Simon Tatham.
\r
6 * Permission is hereby granted, free of charge, to any person
\r
7 * obtaining a copy of this software and associated documentation
\r
8 * files (the "Software"), to deal in the Software without
\r
9 * restriction, including without limitation the rights to use,
\r
10 * copy, modify, merge, publish, distribute, sublicense, and/or
\r
11 * sell copies of the Software, and to permit persons to whom the
\r
12 * Software is furnished to do so, subject to the following
\r
15 * The above copyright notice and this permission notice shall be
\r
16 * included in all copies or substantial portions of the Software.
\r
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
\r
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
\r
20 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
\r
21 * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
\r
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
\r
23 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
\r
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
\r
32 #include "puttymem.h"
\r
33 #include "tree234.h"
\r
36 #define LOG(x) (printf x)
\r
41 typedef struct node234_Tag node234;
\r
43 struct tree234_Tag {
\r
48 struct node234_Tag {
\r
56 * Create a 2-3-4 tree.
\r
58 tree234 *newtree234(cmpfn234 cmp)
\r
60 tree234 *ret = snew(tree234);
\r
61 LOG(("created tree %p\n", ret));
\r
68 * Free a 2-3-4 tree (not including freeing the elements).
\r
70 static void freenode234(node234 * n)
\r
74 freenode234(n->kids[0]);
\r
75 freenode234(n->kids[1]);
\r
76 freenode234(n->kids[2]);
\r
77 freenode234(n->kids[3]);
\r
81 void freetree234(tree234 * t)
\r
83 freenode234(t->root);
\r
88 * Internal function to count a node.
\r
90 static int countnode234(node234 * n)
\r
96 for (i = 0; i < 4; i++)
\r
97 count += n->counts[i];
\r
98 for (i = 0; i < 3; i++)
\r
105 * Count the elements in a tree.
\r
107 int count234(tree234 * t)
\r
110 return countnode234(t->root);
\r
116 * Add an element e to a 2-3-4 tree t. Returns e on success, or if
\r
117 * an existing element compares equal, returns that.
\r
119 static void *add234_internal(tree234 * t, void *e, int index)
\r
121 node234 *n, **np, *left, *right;
\r
123 int c, lcount, rcount;
\r
125 LOG(("adding node %p to tree %p\n", e, t));
\r
126 if (t->root == NULL) {
\r
127 t->root = snew(node234);
\r
128 t->root->elems[1] = t->root->elems[2] = NULL;
\r
129 t->root->kids[0] = t->root->kids[1] = NULL;
\r
130 t->root->kids[2] = t->root->kids[3] = NULL;
\r
131 t->root->counts[0] = t->root->counts[1] = 0;
\r
132 t->root->counts[2] = t->root->counts[3] = 0;
\r
133 t->root->parent = NULL;
\r
134 t->root->elems[0] = e;
\r
135 LOG((" created root %p\n", t->root));
\r
139 n = NULL; /* placate gcc; will always be set below since t->root != NULL */
\r
144 LOG((" node %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d\n",
\r
146 n->kids[0], n->counts[0], n->elems[0],
\r
147 n->kids[1], n->counts[1], n->elems[1],
\r
148 n->kids[2], n->counts[2], n->elems[2],
\r
149 n->kids[3], n->counts[3]));
\r
153 * Leaf node. We want to insert at kid position
\r
154 * equal to the index:
\r
161 * Internal node. We always descend through it (add
\r
162 * always starts at the bottom, never in the
\r
165 do { /* this is a do ... while (0) to allow `break' */
\r
166 if (index <= n->counts[0]) {
\r
170 index -= n->counts[0] + 1;
\r
171 if (index <= n->counts[1]) {
\r
175 index -= n->counts[1] + 1;
\r
176 if (index <= n->counts[2]) {
\r
180 index -= n->counts[2] + 1;
\r
181 if (index <= n->counts[3]) {
\r
185 return NULL; /* error: index out of range */
\r
189 if ((c = t->cmp(e, n->elems[0])) < 0)
\r
192 return n->elems[0]; /* already exists */
\r
193 else if (n->elems[1] == NULL
\r
194 || (c = t->cmp(e, n->elems[1])) < 0) childnum = 1;
\r
196 return n->elems[1]; /* already exists */
\r
197 else if (n->elems[2] == NULL
\r
198 || (c = t->cmp(e, n->elems[2])) < 0) childnum = 2;
\r
200 return n->elems[2]; /* already exists */
\r
204 np = &n->kids[childnum];
\r
205 LOG((" moving to child %d (%p)\n", childnum, *np));
\r
209 * We need to insert the new element in n at position np.
\r
216 LOG((" at %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d\n",
\r
218 n->kids[0], n->counts[0], n->elems[0],
\r
219 n->kids[1], n->counts[1], n->elems[1],
\r
220 n->kids[2], n->counts[2], n->elems[2],
\r
221 n->kids[3], n->counts[3]));
\r
222 LOG((" need to insert %p/%d [%p] %p/%d at position %d\n",
\r
223 left, lcount, e, right, rcount, np - n->kids));
\r
224 if (n->elems[1] == NULL) {
\r
226 * Insert in a 2-node; simple.
\r
228 if (np == &n->kids[0]) {
\r
229 LOG((" inserting on left of 2-node\n"));
\r
230 n->kids[2] = n->kids[1];
\r
231 n->counts[2] = n->counts[1];
\r
232 n->elems[1] = n->elems[0];
\r
233 n->kids[1] = right;
\r
234 n->counts[1] = rcount;
\r
237 n->counts[0] = lcount;
\r
238 } else { /* np == &n->kids[1] */
\r
239 LOG((" inserting on right of 2-node\n"));
\r
240 n->kids[2] = right;
\r
241 n->counts[2] = rcount;
\r
244 n->counts[1] = lcount;
\r
247 n->kids[0]->parent = n;
\r
249 n->kids[1]->parent = n;
\r
251 n->kids[2]->parent = n;
\r
254 } else if (n->elems[2] == NULL) {
\r
256 * Insert in a 3-node; simple.
\r
258 if (np == &n->kids[0]) {
\r
259 LOG((" inserting on left of 3-node\n"));
\r
260 n->kids[3] = n->kids[2];
\r
261 n->counts[3] = n->counts[2];
\r
262 n->elems[2] = n->elems[1];
\r
263 n->kids[2] = n->kids[1];
\r
264 n->counts[2] = n->counts[1];
\r
265 n->elems[1] = n->elems[0];
\r
266 n->kids[1] = right;
\r
267 n->counts[1] = rcount;
\r
270 n->counts[0] = lcount;
\r
271 } else if (np == &n->kids[1]) {
\r
272 LOG((" inserting in middle of 3-node\n"));
\r
273 n->kids[3] = n->kids[2];
\r
274 n->counts[3] = n->counts[2];
\r
275 n->elems[2] = n->elems[1];
\r
276 n->kids[2] = right;
\r
277 n->counts[2] = rcount;
\r
280 n->counts[1] = lcount;
\r
281 } else { /* np == &n->kids[2] */
\r
282 LOG((" inserting on right of 3-node\n"));
\r
283 n->kids[3] = right;
\r
284 n->counts[3] = rcount;
\r
287 n->counts[2] = lcount;
\r
290 n->kids[0]->parent = n;
\r
292 n->kids[1]->parent = n;
\r
294 n->kids[2]->parent = n;
\r
296 n->kids[3]->parent = n;
\r
300 node234 *m = snew(node234);
\r
301 m->parent = n->parent;
\r
302 LOG((" splitting a 4-node; created new node %p\n", m));
\r
304 * Insert in a 4-node; split into a 2-node and a
\r
305 * 3-node, and move focus up a level.
\r
307 * I don't think it matters which way round we put the
\r
308 * 2 and the 3. For simplicity, we'll put the 3 first
\r
311 if (np == &n->kids[0]) {
\r
313 m->counts[0] = lcount;
\r
315 m->kids[1] = right;
\r
316 m->counts[1] = rcount;
\r
317 m->elems[1] = n->elems[0];
\r
318 m->kids[2] = n->kids[1];
\r
319 m->counts[2] = n->counts[1];
\r
321 n->kids[0] = n->kids[2];
\r
322 n->counts[0] = n->counts[2];
\r
323 n->elems[0] = n->elems[2];
\r
324 n->kids[1] = n->kids[3];
\r
325 n->counts[1] = n->counts[3];
\r
326 } else if (np == &n->kids[1]) {
\r
327 m->kids[0] = n->kids[0];
\r
328 m->counts[0] = n->counts[0];
\r
329 m->elems[0] = n->elems[0];
\r
331 m->counts[1] = lcount;
\r
333 m->kids[2] = right;
\r
334 m->counts[2] = rcount;
\r
336 n->kids[0] = n->kids[2];
\r
337 n->counts[0] = n->counts[2];
\r
338 n->elems[0] = n->elems[2];
\r
339 n->kids[1] = n->kids[3];
\r
340 n->counts[1] = n->counts[3];
\r
341 } else if (np == &n->kids[2]) {
\r
342 m->kids[0] = n->kids[0];
\r
343 m->counts[0] = n->counts[0];
\r
344 m->elems[0] = n->elems[0];
\r
345 m->kids[1] = n->kids[1];
\r
346 m->counts[1] = n->counts[1];
\r
347 m->elems[1] = n->elems[1];
\r
349 m->counts[2] = lcount;
\r
351 n->kids[0] = right;
\r
352 n->counts[0] = rcount;
\r
353 n->elems[0] = n->elems[2];
\r
354 n->kids[1] = n->kids[3];
\r
355 n->counts[1] = n->counts[3];
\r
356 } else { /* np == &n->kids[3] */
\r
357 m->kids[0] = n->kids[0];
\r
358 m->counts[0] = n->counts[0];
\r
359 m->elems[0] = n->elems[0];
\r
360 m->kids[1] = n->kids[1];
\r
361 m->counts[1] = n->counts[1];
\r
362 m->elems[1] = n->elems[1];
\r
363 m->kids[2] = n->kids[2];
\r
364 m->counts[2] = n->counts[2];
\r
366 n->counts[0] = lcount;
\r
368 n->kids[1] = right;
\r
369 n->counts[1] = rcount;
\r
372 m->kids[3] = n->kids[3] = n->kids[2] = NULL;
\r
373 m->counts[3] = n->counts[3] = n->counts[2] = 0;
\r
374 m->elems[2] = n->elems[2] = n->elems[1] = NULL;
\r
376 m->kids[0]->parent = m;
\r
378 m->kids[1]->parent = m;
\r
380 m->kids[2]->parent = m;
\r
382 n->kids[0]->parent = n;
\r
384 n->kids[1]->parent = n;
\r
385 LOG((" left (%p): %p/%d [%p] %p/%d [%p] %p/%d\n", m,
\r
386 m->kids[0], m->counts[0], m->elems[0],
\r
387 m->kids[1], m->counts[1], m->elems[1],
\r
388 m->kids[2], m->counts[2]));
\r
389 LOG((" right (%p): %p/%d [%p] %p/%d\n", n,
\r
390 n->kids[0], n->counts[0], n->elems[0],
\r
391 n->kids[1], n->counts[1]));
\r
393 lcount = countnode234(left);
\r
395 rcount = countnode234(right);
\r
398 np = (n->parent->kids[0] == n ? &n->parent->kids[0] :
\r
399 n->parent->kids[1] == n ? &n->parent->kids[1] :
\r
400 n->parent->kids[2] == n ? &n->parent->kids[2] :
\r
401 &n->parent->kids[3]);
\r
406 * If we've come out of here by `break', n will still be
\r
407 * non-NULL and all we need to do is go back up the tree
\r
408 * updating counts. If we've come here because n is NULL, we
\r
409 * need to create a new root for the tree because the old one
\r
410 * has just split into two. */
\r
412 while (n->parent) {
\r
413 int count = countnode234(n);
\r
415 childnum = (n->parent->kids[0] == n ? 0 :
\r
416 n->parent->kids[1] == n ? 1 :
\r
417 n->parent->kids[2] == n ? 2 : 3);
\r
418 n->parent->counts[childnum] = count;
\r
422 LOG((" root is overloaded, split into two\n"));
\r
423 t->root = snew(node234);
\r
424 t->root->kids[0] = left;
\r
425 t->root->counts[0] = lcount;
\r
426 t->root->elems[0] = e;
\r
427 t->root->kids[1] = right;
\r
428 t->root->counts[1] = rcount;
\r
429 t->root->elems[1] = NULL;
\r
430 t->root->kids[2] = NULL;
\r
431 t->root->counts[2] = 0;
\r
432 t->root->elems[2] = NULL;
\r
433 t->root->kids[3] = NULL;
\r
434 t->root->counts[3] = 0;
\r
435 t->root->parent = NULL;
\r
436 if (t->root->kids[0])
\r
437 t->root->kids[0]->parent = t->root;
\r
438 if (t->root->kids[1])
\r
439 t->root->kids[1]->parent = t->root;
\r
440 LOG((" new root is %p/%d [%p] %p/%d\n",
\r
441 t->root->kids[0], t->root->counts[0],
\r
442 t->root->elems[0], t->root->kids[1], t->root->counts[1]));
\r
448 void *add234(tree234 * t, void *e)
\r
450 if (!t->cmp) /* tree is unsorted */
\r
453 return add234_internal(t, e, -1);
\r
455 void *addpos234(tree234 * t, void *e, int index)
\r
457 if (index < 0 || /* index out of range */
\r
458 t->cmp) /* tree is sorted */
\r
459 return NULL; /* return failure */
\r
461 return add234_internal(t, e, index); /* this checks the upper bound */
\r
465 * Look up the element at a given numeric index in a 2-3-4 tree.
\r
466 * Returns NULL if the index is out of range.
\r
468 void *index234(tree234 * t, int index)
\r
473 return NULL; /* tree is empty */
\r
475 if (index < 0 || index >= countnode234(t->root))
\r
476 return NULL; /* out of range */
\r
481 if (index < n->counts[0])
\r
483 else if (index -= n->counts[0] + 1, index < 0)
\r
484 return n->elems[0];
\r
485 else if (index < n->counts[1])
\r
487 else if (index -= n->counts[1] + 1, index < 0)
\r
488 return n->elems[1];
\r
489 else if (index < n->counts[2])
\r
491 else if (index -= n->counts[2] + 1, index < 0)
\r
492 return n->elems[2];
\r
497 /* We shouldn't ever get here. I wonder how we did. */
\r
502 * Find an element e in a sorted 2-3-4 tree t. Returns NULL if not
\r
503 * found. e is always passed as the first argument to cmp, so cmp
\r
504 * can be an asymmetric function if desired. cmp can also be passed
\r
505 * as NULL, in which case the compare function from the tree proper
\r
508 void *findrelpos234(tree234 * t, void *e, cmpfn234 cmp,
\r
509 int relation, int *index)
\r
514 int idx, ecount, kcount, cmpret;
\r
516 if (t->root == NULL)
\r
524 * Attempt to find the element itself.
\r
529 * Prepare a fake `cmp' result if e is NULL.
\r
533 assert(relation == REL234_LT || relation == REL234_GT);
\r
534 if (relation == REL234_LT)
\r
535 cmpret = +1; /* e is a max: always greater */
\r
536 else if (relation == REL234_GT)
\r
537 cmpret = -1; /* e is a min: always smaller */
\r
540 for (kcount = 0; kcount < 4; kcount++) {
\r
541 if (kcount >= 3 || n->elems[kcount] == NULL ||
\r
542 (c = cmpret ? cmpret : cmp(e, n->elems[kcount])) < 0) {
\r
545 if (n->kids[kcount])
\r
546 idx += n->counts[kcount];
\r
555 if (n->kids[kcount])
\r
556 n = n->kids[kcount];
\r
563 * We have found the element we're looking for. It's
\r
564 * n->elems[ecount], at tree index idx. If our search
\r
565 * relation is EQ, LE or GE we can now go home.
\r
567 if (relation != REL234_LT && relation != REL234_GT) {
\r
570 return n->elems[ecount];
\r
574 * Otherwise, we'll do an indexed lookup for the previous
\r
575 * or next element. (It would be perfectly possible to
\r
576 * implement these search types in a non-counted tree by
\r
577 * going back up from where we are, but far more fiddly.)
\r
579 if (relation == REL234_LT)
\r
585 * We've found our way to the bottom of the tree and we
\r
586 * know where we would insert this node if we wanted to:
\r
587 * we'd put it in in place of the (empty) subtree
\r
588 * n->kids[kcount], and it would have index idx
\r
590 * But the actual element isn't there. So if our search
\r
591 * relation is EQ, we're doomed.
\r
593 if (relation == REL234_EQ)
\r
597 * Otherwise, we must do an index lookup for index idx-1
\r
598 * (if we're going left - LE or LT) or index idx (if we're
\r
599 * going right - GE or GT).
\r
601 if (relation == REL234_LT || relation == REL234_LE) {
\r
607 * We know the index of the element we want; just call index234
\r
608 * to do the rest. This will return NULL if the index is out of
\r
609 * bounds, which is exactly what we want.
\r
611 ret = index234(t, idx);
\r
616 void *find234(tree234 * t, void *e, cmpfn234 cmp)
\r
618 return findrelpos234(t, e, cmp, REL234_EQ, NULL);
\r
620 void *findrel234(tree234 * t, void *e, cmpfn234 cmp, int relation)
\r
622 return findrelpos234(t, e, cmp, relation, NULL);
\r
624 void *findpos234(tree234 * t, void *e, cmpfn234 cmp, int *index)
\r
626 return findrelpos234(t, e, cmp, REL234_EQ, index);
\r
630 * Delete an element e in a 2-3-4 tree. Does not free the element,
\r
631 * merely removes all links to it from the tree nodes.
\r
633 static void *delpos234_internal(tree234 * t, int index)
\r
642 LOG(("deleting item %d from tree %p\n", index, t));
\r
649 (" node %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d index=%d\n",
\r
650 n, n->kids[0], n->counts[0], n->elems[0], n->kids[1],
\r
651 n->counts[1], n->elems[1], n->kids[2], n->counts[2],
\r
652 n->elems[2], n->kids[3], n->counts[3], index));
\r
653 if (index < n->counts[0]) {
\r
655 } else if (index -= n->counts[0] + 1, index < 0) {
\r
658 } else if (index < n->counts[1]) {
\r
660 } else if (index -= n->counts[1] + 1, index < 0) {
\r
663 } else if (index < n->counts[2]) {
\r
665 } else if (index -= n->counts[2] + 1, index < 0) {
\r
672 * Recurse down to subtree ki. If it has only one element,
\r
673 * we have to do some transformation to start with.
\r
675 LOG((" moving to subtree %d\n", ki));
\r
677 if (!sub->elems[1]) {
\r
678 LOG((" subtree has only one element!\n", ki));
\r
679 if (ki > 0 && n->kids[ki - 1]->elems[1]) {
\r
681 * Case 3a, left-handed variant. Child ki has
\r
682 * only one element, but child ki-1 has two or
\r
683 * more. So we need to move a subtree from ki-1
\r
688 * [more] a A b B c d D e [more] a A b c C d D e
\r
690 node234 *sib = n->kids[ki - 1];
\r
691 int lastelem = (sib->elems[2] ? 2 :
\r
692 sib->elems[1] ? 1 : 0);
\r
693 sub->kids[2] = sub->kids[1];
\r
694 sub->counts[2] = sub->counts[1];
\r
695 sub->elems[1] = sub->elems[0];
\r
696 sub->kids[1] = sub->kids[0];
\r
697 sub->counts[1] = sub->counts[0];
\r
698 sub->elems[0] = n->elems[ki - 1];
\r
699 sub->kids[0] = sib->kids[lastelem + 1];
\r
700 sub->counts[0] = sib->counts[lastelem + 1];
\r
702 sub->kids[0]->parent = sub;
\r
703 n->elems[ki - 1] = sib->elems[lastelem];
\r
704 sib->kids[lastelem + 1] = NULL;
\r
705 sib->counts[lastelem + 1] = 0;
\r
706 sib->elems[lastelem] = NULL;
\r
707 n->counts[ki] = countnode234(sub);
\r
708 LOG((" case 3a left\n"));
\r
710 (" index and left subtree count before adjustment: %d, %d\n",
\r
711 index, n->counts[ki - 1]));
\r
712 index += n->counts[ki - 1];
\r
713 n->counts[ki - 1] = countnode234(sib);
\r
714 index -= n->counts[ki - 1];
\r
716 (" index and left subtree count after adjustment: %d, %d\n",
\r
717 index, n->counts[ki - 1]));
\r
718 } else if (ki < 3 && n->kids[ki + 1]
\r
719 && n->kids[ki + 1]->elems[1]) {
\r
721 * Case 3a, right-handed variant. ki has only
\r
722 * one element but ki+1 has two or more. Move a
\r
723 * subtree from ki+1 to ki.
\r
727 * a A b c C d D e [more] a A b B c d D e [more]
\r
729 node234 *sib = n->kids[ki + 1];
\r
731 sub->elems[1] = n->elems[ki];
\r
732 sub->kids[2] = sib->kids[0];
\r
733 sub->counts[2] = sib->counts[0];
\r
735 sub->kids[2]->parent = sub;
\r
736 n->elems[ki] = sib->elems[0];
\r
737 sib->kids[0] = sib->kids[1];
\r
738 sib->counts[0] = sib->counts[1];
\r
739 for (j = 0; j < 2 && sib->elems[j + 1]; j++) {
\r
740 sib->kids[j + 1] = sib->kids[j + 2];
\r
741 sib->counts[j + 1] = sib->counts[j + 2];
\r
742 sib->elems[j] = sib->elems[j + 1];
\r
744 sib->kids[j + 1] = NULL;
\r
745 sib->counts[j + 1] = 0;
\r
746 sib->elems[j] = NULL;
\r
747 n->counts[ki] = countnode234(sub);
\r
748 n->counts[ki + 1] = countnode234(sib);
\r
749 LOG((" case 3a right\n"));
\r
752 * Case 3b. ki has only one element, and has no
\r
753 * neighbour with more than one. So pick a
\r
754 * neighbour and merge it with ki, taking an
\r
755 * element down from n to go in the middle.
\r
759 * a A b c C d a A b B c C d
\r
761 * (Since at all points we have avoided
\r
762 * descending to a node with only one element,
\r
763 * we can be sure that n is not reduced to
\r
764 * nothingness by this move, _unless_ it was
\r
765 * the very first node, ie the root of the
\r
766 * tree. In that case we remove the now-empty
\r
767 * root and replace it with its single large
\r
775 index += n->counts[ki] + 1;
\r
778 sub = n->kids[ki + 1];
\r
780 sub->kids[3] = sub->kids[1];
\r
781 sub->counts[3] = sub->counts[1];
\r
782 sub->elems[2] = sub->elems[0];
\r
783 sub->kids[2] = sub->kids[0];
\r
784 sub->counts[2] = sub->counts[0];
\r
785 sub->elems[1] = n->elems[ki];
\r
786 sub->kids[1] = sib->kids[1];
\r
787 sub->counts[1] = sib->counts[1];
\r
789 sub->kids[1]->parent = sub;
\r
790 sub->elems[0] = sib->elems[0];
\r
791 sub->kids[0] = sib->kids[0];
\r
792 sub->counts[0] = sib->counts[0];
\r
794 sub->kids[0]->parent = sub;
\r
796 n->counts[ki + 1] = countnode234(sub);
\r
801 * That's built the big node in sub. Now we
\r
802 * need to remove the reference to sib in n.
\r
804 for (j = ki; j < 3 && n->kids[j + 1]; j++) {
\r
805 n->kids[j] = n->kids[j + 1];
\r
806 n->counts[j] = n->counts[j + 1];
\r
807 n->elems[j] = j < 2 ? n->elems[j + 1] : NULL;
\r
812 n->elems[j] = NULL;
\r
813 LOG((" case 3b ki=%d\n", ki));
\r
815 if (!n->elems[0]) {
\r
817 * The root is empty and needs to be
\r
820 LOG((" shifting root!\n"));
\r
822 sub->parent = NULL;
\r
830 retval = n->elems[ei];
\r
833 return NULL; /* although this shouldn't happen */
\r
836 * Treat special case: this is the one remaining item in
\r
837 * the tree. n is the tree root (no parent), has one
\r
838 * element (no elems[1]), and has no kids (no kids[0]).
\r
840 if (!n->parent && !n->elems[1] && !n->kids[0]) {
\r
841 LOG((" removed last element in tree\n"));
\r
848 * Now we have the element we want, as n->elems[ei], and we
\r
849 * have also arranged for that element not to be the only
\r
850 * one in its node. So...
\r
853 if (!n->kids[0] && n->elems[1]) {
\r
855 * Case 1. n is a leaf node with more than one element,
\r
856 * so it's _really easy_. Just delete the thing and
\r
860 LOG((" case 1\n"));
\r
861 for (i = ei; i < 2 && n->elems[i + 1]; i++)
\r
862 n->elems[i] = n->elems[i + 1];
\r
863 n->elems[i] = NULL;
\r
865 * Having done that to the leaf node, we now go back up
\r
866 * the tree fixing the counts.
\r
868 while (n->parent) {
\r
870 childnum = (n->parent->kids[0] == n ? 0 :
\r
871 n->parent->kids[1] == n ? 1 :
\r
872 n->parent->kids[2] == n ? 2 : 3);
\r
873 n->parent->counts[childnum]--;
\r
876 return retval; /* finished! */
\r
877 } else if (n->kids[ei]->elems[1]) {
\r
879 * Case 2a. n is an internal node, and the root of the
\r
880 * subtree to the left of e has more than one element.
\r
881 * So find the predecessor p to e (ie the largest node
\r
882 * in that subtree), place it where e currently is, and
\r
883 * then start the deletion process over again on the
\r
884 * subtree with p as target.
\r
886 node234 *m = n->kids[ei];
\r
888 LOG((" case 2a\n"));
\r
889 while (m->kids[0]) {
\r
890 m = (m->kids[3] ? m->kids[3] :
\r
891 m->kids[2] ? m->kids[2] :
\r
892 m->kids[1] ? m->kids[1] : m->kids[0]);
\r
894 target = (m->elems[2] ? m->elems[2] :
\r
895 m->elems[1] ? m->elems[1] : m->elems[0]);
\r
896 n->elems[ei] = target;
\r
897 index = n->counts[ei] - 1;
\r
899 } else if (n->kids[ei + 1]->elems[1]) {
\r
901 * Case 2b, symmetric to 2a but s/left/right/ and
\r
902 * s/predecessor/successor/. (And s/largest/smallest/).
\r
904 node234 *m = n->kids[ei + 1];
\r
906 LOG((" case 2b\n"));
\r
907 while (m->kids[0]) {
\r
910 target = m->elems[0];
\r
911 n->elems[ei] = target;
\r
912 n = n->kids[ei + 1];
\r
916 * Case 2c. n is an internal node, and the subtrees to
\r
917 * the left and right of e both have only one element.
\r
918 * So combine the two subnodes into a single big node
\r
919 * with their own elements on the left and right and e
\r
920 * in the middle, then restart the deletion process on
\r
921 * that subtree, with e still as target.
\r
923 node234 *a = n->kids[ei], *b = n->kids[ei + 1];
\r
926 LOG((" case 2c\n"));
\r
927 a->elems[1] = n->elems[ei];
\r
928 a->kids[2] = b->kids[0];
\r
929 a->counts[2] = b->counts[0];
\r
931 a->kids[2]->parent = a;
\r
932 a->elems[2] = b->elems[0];
\r
933 a->kids[3] = b->kids[1];
\r
934 a->counts[3] = b->counts[1];
\r
936 a->kids[3]->parent = a;
\r
938 n->counts[ei] = countnode234(a);
\r
940 * That's built the big node in a, and destroyed b. Now
\r
941 * remove the reference to b (and e) in n.
\r
943 for (j = ei; j < 2 && n->elems[j + 1]; j++) {
\r
944 n->elems[j] = n->elems[j + 1];
\r
945 n->kids[j + 1] = n->kids[j + 2];
\r
946 n->counts[j + 1] = n->counts[j + 2];
\r
948 n->elems[j] = NULL;
\r
949 n->kids[j + 1] = NULL;
\r
950 n->counts[j + 1] = 0;
\r
952 * It's possible, in this case, that we've just removed
\r
953 * the only element in the root of the tree. If so,
\r
956 if (n->elems[0] == NULL) {
\r
957 LOG((" shifting root!\n"));
\r
963 * Now go round the deletion process again, with n
\r
964 * pointing at the new big node and e still the same.
\r
967 index = a->counts[0] + a->counts[1] + 1;
\r
971 void *delpos234(tree234 * t, int index)
\r
973 if (index < 0 || index >= countnode234(t->root))
\r
975 return delpos234_internal(t, index);
\r
977 void *del234(tree234 * t, void *e)
\r
980 if (!findrelpos234(t, e, NULL, REL234_EQ, &index))
\r
981 return NULL; /* it wasn't in there anyway */
\r
982 return delpos234_internal(t, index); /* it's there; delete it. */
\r
988 * Test code for the 2-3-4 tree. This code maintains an alternative
\r
989 * representation of the data in the tree, in an array (using the
\r
990 * obvious and slow insert and delete functions). After each tree
\r
991 * operation, the verify() function is called, which ensures all
\r
992 * the tree properties are preserved:
\r
993 * - node->child->parent always equals node
\r
994 * - tree->root->parent always equals NULL
\r
995 * - number of kids == 0 or number of elements + 1;
\r
996 * - tree has the same depth everywhere
\r
997 * - every node has at least one element
\r
998 * - subtree element counts are accurate
\r
999 * - any NULL kid pointer is accompanied by a zero count
\r
1000 * - in a sorted tree: ordering property between elements of a
\r
1001 * node and elements of its children is preserved
\r
1002 * and also ensures the list represented by the tree is the same
\r
1003 * list it should be. (This last check also doubly verifies the
\r
1004 * ordering properties, because the `same list it should be' is by
\r
1005 * definition correctly ordered. It also ensures all nodes are
\r
1006 * distinct, because the enum functions would get caught in a loop
\r
1010 #include <stdarg.h>
\r
1013 * Error reporting function.
\r
1015 void error(char *fmt, ...)
\r
1018 printf("ERROR: ");
\r
1019 va_start(ap, fmt);
\r
1020 vfprintf(stdout, fmt, ap);
\r
1025 /* The array representation of the data. */
\r
1027 int arraylen, arraysize;
\r
1030 /* The tree representation of the same data. */
\r
1038 int chknode(chkctx * ctx, int level, node234 * node,
\r
1039 void *lowbound, void *highbound)
\r
1041 int nkids, nelems;
\r
1045 /* Count the non-NULL kids. */
\r
1046 for (nkids = 0; nkids < 4 && node->kids[nkids]; nkids++);
\r
1047 /* Ensure no kids beyond the first NULL are non-NULL. */
\r
1048 for (i = nkids; i < 4; i++)
\r
1049 if (node->kids[i]) {
\r
1050 error("node %p: nkids=%d but kids[%d] non-NULL",
\r
1052 } else if (node->counts[i]) {
\r
1053 error("node %p: kids[%d] NULL but count[%d]=%d nonzero",
\r
1054 node, i, i, node->counts[i]);
\r
1057 /* Count the non-NULL elements. */
\r
1058 for (nelems = 0; nelems < 3 && node->elems[nelems]; nelems++);
\r
1059 /* Ensure no elements beyond the first NULL are non-NULL. */
\r
1060 for (i = nelems; i < 3; i++)
\r
1061 if (node->elems[i]) {
\r
1062 error("node %p: nelems=%d but elems[%d] non-NULL",
\r
1068 * If nkids==0, this is a leaf node; verify that the tree
\r
1069 * depth is the same everywhere.
\r
1071 if (ctx->treedepth < 0)
\r
1072 ctx->treedepth = level; /* we didn't know the depth yet */
\r
1073 else if (ctx->treedepth != level)
\r
1074 error("node %p: leaf at depth %d, previously seen depth %d",
\r
1075 node, level, ctx->treedepth);
\r
1078 * If nkids != 0, then it should be nelems+1, unless nelems
\r
1079 * is 0 in which case nkids should also be 0 (and so we
\r
1080 * shouldn't be in this condition at all).
\r
1082 int shouldkids = (nelems ? nelems + 1 : 0);
\r
1083 if (nkids != shouldkids) {
\r
1084 error("node %p: %d elems should mean %d kids but has %d",
\r
1085 node, nelems, shouldkids, nkids);
\r
1090 * nelems should be at least 1.
\r
1092 if (nelems == 0) {
\r
1093 error("node %p: no elems", node, nkids);
\r
1097 * Add nelems to the running element count of the whole tree.
\r
1099 ctx->elemcount += nelems;
\r
1102 * Check ordering property: all elements should be strictly >
\r
1103 * lowbound, strictly < highbound, and strictly < each other in
\r
1104 * sequence. (lowbound and highbound are NULL at edges of tree
\r
1105 * - both NULL at root node - and NULL is considered to be <
\r
1106 * everything and > everything. IYSWIM.)
\r
1109 for (i = -1; i < nelems; i++) {
\r
1110 void *lower = (i == -1 ? lowbound : node->elems[i]);
\r
1112 (i + 1 == nelems ? highbound : node->elems[i + 1]);
\r
1113 if (lower && higher && cmp(lower, higher) >= 0) {
\r
1114 error("node %p: kid comparison [%d=%s,%d=%s] failed",
\r
1115 node, i, lower, i + 1, higher);
\r
1121 * Check parent pointers: all non-NULL kids should have a
\r
1122 * parent pointer coming back to this node.
\r
1124 for (i = 0; i < nkids; i++)
\r
1125 if (node->kids[i]->parent != node) {
\r
1126 error("node %p kid %d: parent ptr is %p not %p",
\r
1127 node, i, node->kids[i]->parent, node);
\r
1132 * Now (finally!) recurse into subtrees.
\r
1136 for (i = 0; i < nkids; i++) {
\r
1137 void *lower = (i == 0 ? lowbound : node->elems[i - 1]);
\r
1138 void *higher = (i >= nelems ? highbound : node->elems[i]);
\r
1140 chknode(ctx, level + 1, node->kids[i], lower, higher);
\r
1141 if (node->counts[i] != subcount) {
\r
1142 error("node %p kid %d: count says %d, subtree really has %d",
\r
1143 node, i, node->counts[i], subcount);
\r
1145 count += subcount;
\r
1157 ctx.treedepth = -1; /* depth unknown yet */
\r
1158 ctx.elemcount = 0; /* no elements seen yet */
\r
1160 * Verify validity of tree properties.
\r
1163 if (tree->root->parent != NULL)
\r
1164 error("root->parent is %p should be null", tree->root->parent);
\r
1165 chknode(&ctx, 0, tree->root, NULL, NULL);
\r
1167 printf("tree depth: %d\n", ctx.treedepth);
\r
1169 * Enumerate the tree and ensure it matches up to the array.
\r
1171 for (i = 0; NULL != (p = index234(tree, i)); i++) {
\r
1172 if (i >= arraylen)
\r
1173 error("tree contains more than %d elements", arraylen);
\r
1174 if (array[i] != p)
\r
1175 error("enum at position %d: array says %s, tree says %s",
\r
1178 if (ctx.elemcount != i) {
\r
1179 error("tree really contains %d elements, enum gave %d",
\r
1180 ctx.elemcount, i);
\r
1182 if (i < arraylen) {
\r
1183 error("enum gave only %d elements, array has %d", i, arraylen);
\r
1185 i = count234(tree);
\r
1186 if (ctx.elemcount != i) {
\r
1187 error("tree really contains %d elements, count234 gave %d",
\r
1188 ctx.elemcount, i);
\r
1192 void internal_addtest(void *elem, int index, void *realret)
\r
1197 if (arraysize < arraylen + 1) {
\r
1198 arraysize = arraylen + 1 + 256;
\r
1199 array = sresize(array, arraysize, void *);
\r
1203 /* now i points to the first element >= elem */
\r
1204 retval = elem; /* expect elem returned (success) */
\r
1205 for (j = arraylen; j > i; j--)
\r
1206 array[j] = array[j - 1];
\r
1207 array[i] = elem; /* add elem to array */
\r
1210 if (realret != retval) {
\r
1211 error("add: retval was %p expected %p", realret, retval);
\r
1217 void addtest(void *elem)
\r
1222 realret = add234(tree, elem);
\r
1225 while (i < arraylen && cmp(elem, array[i]) > 0)
\r
1227 if (i < arraylen && !cmp(elem, array[i])) {
\r
1228 void *retval = array[i]; /* expect that returned not elem */
\r
1229 if (realret != retval) {
\r
1230 error("add: retval was %p expected %p", realret, retval);
\r
1233 internal_addtest(elem, i, realret);
\r
1236 void addpostest(void *elem, int i)
\r
1240 realret = addpos234(tree, elem, i);
\r
1242 internal_addtest(elem, i, realret);
\r
1245 void delpostest(int i)
\r
1248 void *elem = array[i], *ret;
\r
1250 /* i points to the right element */
\r
1251 while (i < arraylen - 1) {
\r
1252 array[i] = array[i + 1];
\r
1255 arraylen--; /* delete elem from array */
\r
1258 ret = del234(tree, elem);
\r
1260 ret = delpos234(tree, index);
\r
1262 if (ret != elem) {
\r
1263 error("del returned %p, expected %p", ret, elem);
\r
1269 void deltest(void *elem)
\r
1274 while (i < arraylen && cmp(elem, array[i]) > 0)
\r
1276 if (i >= arraylen || cmp(elem, array[i]) != 0)
\r
1277 return; /* don't do it! */
\r
1281 /* A sample data set and test utility. Designed for pseudo-randomness,
\r
1282 * and yet repeatability. */
\r
1285 * This random number generator uses the `portable implementation'
\r
1286 * given in ANSI C99 draft N869. It assumes `unsigned' is 32 bits;
\r
1287 * change it if not.
\r
1289 int randomnumber(unsigned *seed)
\r
1291 *seed *= 1103515245;
\r
1293 return ((*seed) / 65536) % 32768;
\r
1296 int mycmp(void *av, void *bv)
\r
1298 char const *a = (char const *) av;
\r
1299 char const *b = (char const *) bv;
\r
1300 return strcmp(a, b);
\r
1303 #define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
\r
1305 char *strings[] = {
\r
1306 "a", "ab", "absque", "coram", "de",
\r
1307 "palam", "clam", "cum", "ex", "e",
\r
1308 "sine", "tenus", "pro", "prae",
\r
1309 "banana", "carrot", "cabbage", "broccoli", "onion", "zebra",
\r
1310 "penguin", "blancmange", "pangolin", "whale", "hedgehog",
\r
1311 "giraffe", "peanut", "bungee", "foo", "bar", "baz", "quux",
\r
1312 "murfl", "spoo", "breen", "flarn", "octothorpe",
\r
1313 "snail", "tiger", "elephant", "octopus", "warthog", "armadillo",
\r
1314 "aardvark", "wyvern", "dragon", "elf", "dwarf", "orc", "goblin",
\r
1315 "pixie", "basilisk", "warg", "ape", "lizard", "newt", "shopkeeper",
\r
1316 "wand", "ring", "amulet"
\r
1319 #define NSTR lenof(strings)
\r
1321 int findtest(void)
\r
1323 const static int rels[] = {
\r
1324 REL234_EQ, REL234_GE, REL234_LE, REL234_LT, REL234_GT
\r
1326 const static char *const relnames[] = {
\r
1327 "EQ", "GE", "LE", "LT", "GT"
\r
1329 int i, j, rel, index;
\r
1330 char *p, *ret, *realret, *realret2;
\r
1331 int lo, hi, mid, c;
\r
1333 for (i = 0; i < NSTR; i++) {
\r
1335 for (j = 0; j < sizeof(rels) / sizeof(*rels); j++) {
\r
1339 hi = arraylen - 1;
\r
1340 while (lo <= hi) {
\r
1341 mid = (lo + hi) / 2;
\r
1342 c = strcmp(p, array[mid]);
\r
1352 if (rel == REL234_LT)
\r
1353 ret = (mid > 0 ? array[--mid] : NULL);
\r
1354 else if (rel == REL234_GT)
\r
1355 ret = (mid < arraylen - 1 ? array[++mid] : NULL);
\r
1359 assert(lo == hi + 1);
\r
1360 if (rel == REL234_LT || rel == REL234_LE) {
\r
1362 ret = (hi >= 0 ? array[hi] : NULL);
\r
1363 } else if (rel == REL234_GT || rel == REL234_GE) {
\r
1365 ret = (lo < arraylen ? array[lo] : NULL);
\r
1370 realret = findrelpos234(tree, p, NULL, rel, &index);
\r
1371 if (realret != ret) {
\r
1372 error("find(\"%s\",%s) gave %s should be %s",
\r
1373 p, relnames[j], realret, ret);
\r
1375 if (realret && index != mid) {
\r
1376 error("find(\"%s\",%s) gave %d should be %d",
\r
1377 p, relnames[j], index, mid);
\r
1379 if (realret && rel == REL234_EQ) {
\r
1380 realret2 = index234(tree, index);
\r
1381 if (realret2 != realret) {
\r
1382 error("find(\"%s\",%s) gave %s(%d) but %d -> %s",
\r
1383 p, relnames[j], realret, index, index, realret2);
\r
1387 printf("find(\"%s\",%s) gave %s(%d)\n", p, relnames[j],
\r
1393 realret = findrelpos234(tree, NULL, NULL, REL234_GT, &index);
\r
1394 if (arraylen && (realret != array[0] || index != 0)) {
\r
1395 error("find(NULL,GT) gave %s(%d) should be %s(0)",
\r
1396 realret, index, array[0]);
\r
1397 } else if (!arraylen && (realret != NULL)) {
\r
1398 error("find(NULL,GT) gave %s(%d) should be NULL", realret, index);
\r
1401 realret = findrelpos234(tree, NULL, NULL, REL234_LT, &index);
\r
1403 && (realret != array[arraylen - 1] || index != arraylen - 1)) {
\r
1404 error("find(NULL,LT) gave %s(%d) should be %s(0)", realret, index,
\r
1405 array[arraylen - 1]);
\r
1406 } else if (!arraylen && (realret != NULL)) {
\r
1407 error("find(NULL,LT) gave %s(%d) should be NULL", realret, index);
\r
1415 unsigned seed = 0;
\r
1417 for (i = 0; i < NSTR; i++)
\r
1420 arraylen = arraysize = 0;
\r
1421 tree = newtree234(mycmp);
\r
1425 for (i = 0; i < 10000; i++) {
\r
1426 j = randomnumber(&seed);
\r
1428 printf("trial: %d\n", i);
\r
1430 printf("deleting %s (%d)\n", strings[j], j);
\r
1431 deltest(strings[j]);
\r
1434 printf("adding %s (%d)\n", strings[j], j);
\r
1435 addtest(strings[j]);
\r
1441 while (arraylen > 0) {
\r
1442 j = randomnumber(&seed);
\r
1444 deltest(array[j]);
\r
1447 freetree234(tree);
\r
1450 * Now try an unsorted tree. We don't really need to test
\r
1451 * delpos234 because we know del234 is based on it, so it's
\r
1452 * already been tested in the above sorted-tree code; but for
\r
1453 * completeness we'll use it to tear down our unsorted tree
\r
1454 * once we've built it.
\r
1456 tree = newtree234(NULL);
\r
1459 for (i = 0; i < 1000; i++) {
\r
1460 printf("trial: %d\n", i);
\r
1461 j = randomnumber(&seed);
\r
1463 k = randomnumber(&seed);
\r
1464 k %= count234(tree) + 1;
\r
1465 printf("adding string %s at index %d\n", strings[j], k);
\r
1466 addpostest(strings[j], k);
\r
1468 while (count234(tree) > 0) {
\r
1469 printf("cleanup: tree size %d\n", count234(tree));
\r
1470 j = randomnumber(&seed);
\r
1471 j %= count234(tree);
\r
1472 printf("deleting string %s from index %d\n", array[j], j);
\r