2 * tree234.c: reasonably generic counted 2-3-4 tree routines.
\r
4 * This file is copyright 1999-2001 Simon Tatham.
\r
6 * Permission is hereby granted, free of charge, to any person
\r
7 * obtaining a copy of this software and associated documentation
\r
8 * files (the "Software"), to deal in the Software without
\r
9 * restriction, including without limitation the rights to use,
\r
10 * copy, modify, merge, publish, distribute, sublicense, and/or
\r
11 * sell copies of the Software, and to permit persons to whom the
\r
12 * Software is furnished to do so, subject to the following
\r
15 * The above copyright notice and this permission notice shall be
\r
16 * included in all copies or substantial portions of the Software.
\r
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
\r
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
\r
20 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
\r
21 * NONINFRINGEMENT. IN NO EVENT SHALL SIMON TATHAM BE LIABLE FOR
\r
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
\r
23 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
\r
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
\r
32 #include "tree234.h"
\r
35 #define LOG(x) (printf x)
\r
36 #define snew(type) ((type *)malloc(sizeof(type)))
\r
37 #define snewn(n, type) ((type *)malloc((n) * sizeof(type)))
\r
38 #define sresize(ptr, n, type) \
\r
39 ((type *)realloc(sizeof((type *)0 == (ptr)) ? (ptr) : (ptr), \
\r
40 (n) * sizeof(type)))
\r
41 #define sfree(ptr) free(ptr)
\r
43 #include "puttymem.h"
\r
47 typedef struct node234_Tag node234;
\r
49 struct tree234_Tag {
\r
54 struct node234_Tag {
\r
62 * Create a 2-3-4 tree.
\r
64 tree234 *newtree234(cmpfn234 cmp)
\r
66 tree234 *ret = snew(tree234);
\r
67 LOG(("created tree %p\n", ret));
\r
74 * Free a 2-3-4 tree (not including freeing the elements).
\r
76 static void freenode234(node234 * n)
\r
80 freenode234(n->kids[0]);
\r
81 freenode234(n->kids[1]);
\r
82 freenode234(n->kids[2]);
\r
83 freenode234(n->kids[3]);
\r
87 void freetree234(tree234 * t)
\r
89 freenode234(t->root);
\r
94 * Internal function to count a node.
\r
96 static int countnode234(node234 * n)
\r
102 for (i = 0; i < 4; i++)
\r
103 count += n->counts[i];
\r
104 for (i = 0; i < 3; i++)
\r
111 * Count the elements in a tree.
\r
113 int count234(tree234 * t)
\r
116 return countnode234(t->root);
\r
122 * Add an element e to a 2-3-4 tree t. Returns e on success, or if
\r
123 * an existing element compares equal, returns that.
\r
125 static void *add234_internal(tree234 * t, void *e, int index)
\r
127 node234 *n, **np, *left, *right;
\r
129 int c, lcount, rcount;
\r
131 LOG(("adding node %p to tree %p\n", e, t));
\r
132 if (t->root == NULL) {
\r
133 t->root = snew(node234);
\r
134 t->root->elems[1] = t->root->elems[2] = NULL;
\r
135 t->root->kids[0] = t->root->kids[1] = NULL;
\r
136 t->root->kids[2] = t->root->kids[3] = NULL;
\r
137 t->root->counts[0] = t->root->counts[1] = 0;
\r
138 t->root->counts[2] = t->root->counts[3] = 0;
\r
139 t->root->parent = NULL;
\r
140 t->root->elems[0] = e;
\r
141 LOG((" created root %p\n", t->root));
\r
145 n = NULL; /* placate gcc; will always be set below since t->root != NULL */
\r
150 LOG((" node %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d\n",
\r
152 n->kids[0], n->counts[0], n->elems[0],
\r
153 n->kids[1], n->counts[1], n->elems[1],
\r
154 n->kids[2], n->counts[2], n->elems[2],
\r
155 n->kids[3], n->counts[3]));
\r
159 * Leaf node. We want to insert at kid position
\r
160 * equal to the index:
\r
167 * Internal node. We always descend through it (add
\r
168 * always starts at the bottom, never in the
\r
171 do { /* this is a do ... while (0) to allow `break' */
\r
172 if (index <= n->counts[0]) {
\r
176 index -= n->counts[0] + 1;
\r
177 if (index <= n->counts[1]) {
\r
181 index -= n->counts[1] + 1;
\r
182 if (index <= n->counts[2]) {
\r
186 index -= n->counts[2] + 1;
\r
187 if (index <= n->counts[3]) {
\r
191 return NULL; /* error: index out of range */
\r
195 if ((c = t->cmp(e, n->elems[0])) < 0)
\r
198 return n->elems[0]; /* already exists */
\r
199 else if (n->elems[1] == NULL
\r
200 || (c = t->cmp(e, n->elems[1])) < 0) childnum = 1;
\r
202 return n->elems[1]; /* already exists */
\r
203 else if (n->elems[2] == NULL
\r
204 || (c = t->cmp(e, n->elems[2])) < 0) childnum = 2;
\r
206 return n->elems[2]; /* already exists */
\r
210 np = &n->kids[childnum];
\r
211 LOG((" moving to child %d (%p)\n", childnum, *np));
\r
215 * We need to insert the new element in n at position np.
\r
222 LOG((" at %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d\n",
\r
224 n->kids[0], n->counts[0], n->elems[0],
\r
225 n->kids[1], n->counts[1], n->elems[1],
\r
226 n->kids[2], n->counts[2], n->elems[2],
\r
227 n->kids[3], n->counts[3]));
\r
228 LOG((" need to insert %p/%d [%p] %p/%d at position %d\n",
\r
229 left, lcount, e, right, rcount, (int)(np - n->kids)));
\r
230 if (n->elems[1] == NULL) {
\r
232 * Insert in a 2-node; simple.
\r
234 if (np == &n->kids[0]) {
\r
235 LOG((" inserting on left of 2-node\n"));
\r
236 n->kids[2] = n->kids[1];
\r
237 n->counts[2] = n->counts[1];
\r
238 n->elems[1] = n->elems[0];
\r
239 n->kids[1] = right;
\r
240 n->counts[1] = rcount;
\r
243 n->counts[0] = lcount;
\r
244 } else { /* np == &n->kids[1] */
\r
245 LOG((" inserting on right of 2-node\n"));
\r
246 n->kids[2] = right;
\r
247 n->counts[2] = rcount;
\r
250 n->counts[1] = lcount;
\r
253 n->kids[0]->parent = n;
\r
255 n->kids[1]->parent = n;
\r
257 n->kids[2]->parent = n;
\r
260 } else if (n->elems[2] == NULL) {
\r
262 * Insert in a 3-node; simple.
\r
264 if (np == &n->kids[0]) {
\r
265 LOG((" inserting on left of 3-node\n"));
\r
266 n->kids[3] = n->kids[2];
\r
267 n->counts[3] = n->counts[2];
\r
268 n->elems[2] = n->elems[1];
\r
269 n->kids[2] = n->kids[1];
\r
270 n->counts[2] = n->counts[1];
\r
271 n->elems[1] = n->elems[0];
\r
272 n->kids[1] = right;
\r
273 n->counts[1] = rcount;
\r
276 n->counts[0] = lcount;
\r
277 } else if (np == &n->kids[1]) {
\r
278 LOG((" inserting in middle of 3-node\n"));
\r
279 n->kids[3] = n->kids[2];
\r
280 n->counts[3] = n->counts[2];
\r
281 n->elems[2] = n->elems[1];
\r
282 n->kids[2] = right;
\r
283 n->counts[2] = rcount;
\r
286 n->counts[1] = lcount;
\r
287 } else { /* np == &n->kids[2] */
\r
288 LOG((" inserting on right of 3-node\n"));
\r
289 n->kids[3] = right;
\r
290 n->counts[3] = rcount;
\r
293 n->counts[2] = lcount;
\r
296 n->kids[0]->parent = n;
\r
298 n->kids[1]->parent = n;
\r
300 n->kids[2]->parent = n;
\r
302 n->kids[3]->parent = n;
\r
306 node234 *m = snew(node234);
\r
307 m->parent = n->parent;
\r
308 LOG((" splitting a 4-node; created new node %p\n", m));
\r
310 * Insert in a 4-node; split into a 2-node and a
\r
311 * 3-node, and move focus up a level.
\r
313 * I don't think it matters which way round we put the
\r
314 * 2 and the 3. For simplicity, we'll put the 3 first
\r
317 if (np == &n->kids[0]) {
\r
319 m->counts[0] = lcount;
\r
321 m->kids[1] = right;
\r
322 m->counts[1] = rcount;
\r
323 m->elems[1] = n->elems[0];
\r
324 m->kids[2] = n->kids[1];
\r
325 m->counts[2] = n->counts[1];
\r
327 n->kids[0] = n->kids[2];
\r
328 n->counts[0] = n->counts[2];
\r
329 n->elems[0] = n->elems[2];
\r
330 n->kids[1] = n->kids[3];
\r
331 n->counts[1] = n->counts[3];
\r
332 } else if (np == &n->kids[1]) {
\r
333 m->kids[0] = n->kids[0];
\r
334 m->counts[0] = n->counts[0];
\r
335 m->elems[0] = n->elems[0];
\r
337 m->counts[1] = lcount;
\r
339 m->kids[2] = right;
\r
340 m->counts[2] = rcount;
\r
342 n->kids[0] = n->kids[2];
\r
343 n->counts[0] = n->counts[2];
\r
344 n->elems[0] = n->elems[2];
\r
345 n->kids[1] = n->kids[3];
\r
346 n->counts[1] = n->counts[3];
\r
347 } else if (np == &n->kids[2]) {
\r
348 m->kids[0] = n->kids[0];
\r
349 m->counts[0] = n->counts[0];
\r
350 m->elems[0] = n->elems[0];
\r
351 m->kids[1] = n->kids[1];
\r
352 m->counts[1] = n->counts[1];
\r
353 m->elems[1] = n->elems[1];
\r
355 m->counts[2] = lcount;
\r
357 n->kids[0] = right;
\r
358 n->counts[0] = rcount;
\r
359 n->elems[0] = n->elems[2];
\r
360 n->kids[1] = n->kids[3];
\r
361 n->counts[1] = n->counts[3];
\r
362 } else { /* np == &n->kids[3] */
\r
363 m->kids[0] = n->kids[0];
\r
364 m->counts[0] = n->counts[0];
\r
365 m->elems[0] = n->elems[0];
\r
366 m->kids[1] = n->kids[1];
\r
367 m->counts[1] = n->counts[1];
\r
368 m->elems[1] = n->elems[1];
\r
369 m->kids[2] = n->kids[2];
\r
370 m->counts[2] = n->counts[2];
\r
372 n->counts[0] = lcount;
\r
374 n->kids[1] = right;
\r
375 n->counts[1] = rcount;
\r
378 m->kids[3] = n->kids[3] = n->kids[2] = NULL;
\r
379 m->counts[3] = n->counts[3] = n->counts[2] = 0;
\r
380 m->elems[2] = n->elems[2] = n->elems[1] = NULL;
\r
382 m->kids[0]->parent = m;
\r
384 m->kids[1]->parent = m;
\r
386 m->kids[2]->parent = m;
\r
388 n->kids[0]->parent = n;
\r
390 n->kids[1]->parent = n;
\r
391 LOG((" left (%p): %p/%d [%p] %p/%d [%p] %p/%d\n", m,
\r
392 m->kids[0], m->counts[0], m->elems[0],
\r
393 m->kids[1], m->counts[1], m->elems[1],
\r
394 m->kids[2], m->counts[2]));
\r
395 LOG((" right (%p): %p/%d [%p] %p/%d\n", n,
\r
396 n->kids[0], n->counts[0], n->elems[0],
\r
397 n->kids[1], n->counts[1]));
\r
399 lcount = countnode234(left);
\r
401 rcount = countnode234(right);
\r
404 np = (n->parent->kids[0] == n ? &n->parent->kids[0] :
\r
405 n->parent->kids[1] == n ? &n->parent->kids[1] :
\r
406 n->parent->kids[2] == n ? &n->parent->kids[2] :
\r
407 &n->parent->kids[3]);
\r
412 * If we've come out of here by `break', n will still be
\r
413 * non-NULL and all we need to do is go back up the tree
\r
414 * updating counts. If we've come here because n is NULL, we
\r
415 * need to create a new root for the tree because the old one
\r
416 * has just split into two. */
\r
418 while (n->parent) {
\r
419 int count = countnode234(n);
\r
421 childnum = (n->parent->kids[0] == n ? 0 :
\r
422 n->parent->kids[1] == n ? 1 :
\r
423 n->parent->kids[2] == n ? 2 : 3);
\r
424 n->parent->counts[childnum] = count;
\r
428 LOG((" root is overloaded, split into two\n"));
\r
429 t->root = snew(node234);
\r
430 t->root->kids[0] = left;
\r
431 t->root->counts[0] = lcount;
\r
432 t->root->elems[0] = e;
\r
433 t->root->kids[1] = right;
\r
434 t->root->counts[1] = rcount;
\r
435 t->root->elems[1] = NULL;
\r
436 t->root->kids[2] = NULL;
\r
437 t->root->counts[2] = 0;
\r
438 t->root->elems[2] = NULL;
\r
439 t->root->kids[3] = NULL;
\r
440 t->root->counts[3] = 0;
\r
441 t->root->parent = NULL;
\r
442 if (t->root->kids[0])
\r
443 t->root->kids[0]->parent = t->root;
\r
444 if (t->root->kids[1])
\r
445 t->root->kids[1]->parent = t->root;
\r
446 LOG((" new root is %p/%d [%p] %p/%d\n",
\r
447 t->root->kids[0], t->root->counts[0],
\r
448 t->root->elems[0], t->root->kids[1], t->root->counts[1]));
\r
454 void *add234(tree234 * t, void *e)
\r
456 if (!t->cmp) /* tree is unsorted */
\r
459 return add234_internal(t, e, -1);
\r
461 void *addpos234(tree234 * t, void *e, int index)
\r
463 if (index < 0 || /* index out of range */
\r
464 t->cmp) /* tree is sorted */
\r
465 return NULL; /* return failure */
\r
467 return add234_internal(t, e, index); /* this checks the upper bound */
\r
471 * Look up the element at a given numeric index in a 2-3-4 tree.
\r
472 * Returns NULL if the index is out of range.
\r
474 void *index234(tree234 * t, int index)
\r
479 return NULL; /* tree is empty */
\r
481 if (index < 0 || index >= countnode234(t->root))
\r
482 return NULL; /* out of range */
\r
487 if (index < n->counts[0])
\r
489 else if (index -= n->counts[0] + 1, index < 0)
\r
490 return n->elems[0];
\r
491 else if (index < n->counts[1])
\r
493 else if (index -= n->counts[1] + 1, index < 0)
\r
494 return n->elems[1];
\r
495 else if (index < n->counts[2])
\r
497 else if (index -= n->counts[2] + 1, index < 0)
\r
498 return n->elems[2];
\r
503 /* We shouldn't ever get here. I wonder how we did. */
\r
508 * Find an element e in a sorted 2-3-4 tree t. Returns NULL if not
\r
509 * found. e is always passed as the first argument to cmp, so cmp
\r
510 * can be an asymmetric function if desired. cmp can also be passed
\r
511 * as NULL, in which case the compare function from the tree proper
\r
514 void *findrelpos234(tree234 * t, void *e, cmpfn234 cmp,
\r
515 int relation, int *index)
\r
520 int idx, ecount, kcount, cmpret;
\r
522 if (t->root == NULL)
\r
530 * Attempt to find the element itself.
\r
535 * Prepare a fake `cmp' result if e is NULL.
\r
539 assert(relation == REL234_LT || relation == REL234_GT);
\r
540 if (relation == REL234_LT)
\r
541 cmpret = +1; /* e is a max: always greater */
\r
542 else if (relation == REL234_GT)
\r
543 cmpret = -1; /* e is a min: always smaller */
\r
546 for (kcount = 0; kcount < 4; kcount++) {
\r
547 if (kcount >= 3 || n->elems[kcount] == NULL ||
\r
548 (c = cmpret ? cmpret : cmp(e, n->elems[kcount])) < 0) {
\r
551 if (n->kids[kcount])
\r
552 idx += n->counts[kcount];
\r
561 if (n->kids[kcount])
\r
562 n = n->kids[kcount];
\r
569 * We have found the element we're looking for. It's
\r
570 * n->elems[ecount], at tree index idx. If our search
\r
571 * relation is EQ, LE or GE we can now go home.
\r
573 if (relation != REL234_LT && relation != REL234_GT) {
\r
576 return n->elems[ecount];
\r
580 * Otherwise, we'll do an indexed lookup for the previous
\r
581 * or next element. (It would be perfectly possible to
\r
582 * implement these search types in a non-counted tree by
\r
583 * going back up from where we are, but far more fiddly.)
\r
585 if (relation == REL234_LT)
\r
591 * We've found our way to the bottom of the tree and we
\r
592 * know where we would insert this node if we wanted to:
\r
593 * we'd put it in in place of the (empty) subtree
\r
594 * n->kids[kcount], and it would have index idx
\r
596 * But the actual element isn't there. So if our search
\r
597 * relation is EQ, we're doomed.
\r
599 if (relation == REL234_EQ)
\r
603 * Otherwise, we must do an index lookup for index idx-1
\r
604 * (if we're going left - LE or LT) or index idx (if we're
\r
605 * going right - GE or GT).
\r
607 if (relation == REL234_LT || relation == REL234_LE) {
\r
613 * We know the index of the element we want; just call index234
\r
614 * to do the rest. This will return NULL if the index is out of
\r
615 * bounds, which is exactly what we want.
\r
617 ret = index234(t, idx);
\r
622 void *find234(tree234 * t, void *e, cmpfn234 cmp)
\r
624 return findrelpos234(t, e, cmp, REL234_EQ, NULL);
\r
626 void *findrel234(tree234 * t, void *e, cmpfn234 cmp, int relation)
\r
628 return findrelpos234(t, e, cmp, relation, NULL);
\r
630 void *findpos234(tree234 * t, void *e, cmpfn234 cmp, int *index)
\r
632 return findrelpos234(t, e, cmp, REL234_EQ, index);
\r
636 * Delete an element e in a 2-3-4 tree. Does not free the element,
\r
637 * merely removes all links to it from the tree nodes.
\r
639 static void *delpos234_internal(tree234 * t, int index)
\r
648 LOG(("deleting item %d from tree %p\n", index, t));
\r
655 (" node %p: %p/%d [%p] %p/%d [%p] %p/%d [%p] %p/%d index=%d\n",
\r
656 n, n->kids[0], n->counts[0], n->elems[0], n->kids[1],
\r
657 n->counts[1], n->elems[1], n->kids[2], n->counts[2],
\r
658 n->elems[2], n->kids[3], n->counts[3], index));
\r
659 if (index < n->counts[0]) {
\r
661 } else if (index -= n->counts[0] + 1, index < 0) {
\r
664 } else if (index < n->counts[1]) {
\r
666 } else if (index -= n->counts[1] + 1, index < 0) {
\r
669 } else if (index < n->counts[2]) {
\r
671 } else if (index -= n->counts[2] + 1, index < 0) {
\r
678 * Recurse down to subtree ki. If it has only one element,
\r
679 * we have to do some transformation to start with.
\r
681 LOG((" moving to subtree %d\n", ki));
\r
683 if (!sub->elems[1]) {
\r
684 LOG((" subtree has only one element!\n", ki));
\r
685 if (ki > 0 && n->kids[ki - 1]->elems[1]) {
\r
687 * Case 3a, left-handed variant. Child ki has
\r
688 * only one element, but child ki-1 has two or
\r
689 * more. So we need to move a subtree from ki-1
\r
694 * [more] a A b B c d D e [more] a A b c C d D e
\r
696 node234 *sib = n->kids[ki - 1];
\r
697 int lastelem = (sib->elems[2] ? 2 :
\r
698 sib->elems[1] ? 1 : 0);
\r
699 sub->kids[2] = sub->kids[1];
\r
700 sub->counts[2] = sub->counts[1];
\r
701 sub->elems[1] = sub->elems[0];
\r
702 sub->kids[1] = sub->kids[0];
\r
703 sub->counts[1] = sub->counts[0];
\r
704 sub->elems[0] = n->elems[ki - 1];
\r
705 sub->kids[0] = sib->kids[lastelem + 1];
\r
706 sub->counts[0] = sib->counts[lastelem + 1];
\r
708 sub->kids[0]->parent = sub;
\r
709 n->elems[ki - 1] = sib->elems[lastelem];
\r
710 sib->kids[lastelem + 1] = NULL;
\r
711 sib->counts[lastelem + 1] = 0;
\r
712 sib->elems[lastelem] = NULL;
\r
713 n->counts[ki] = countnode234(sub);
\r
714 LOG((" case 3a left\n"));
\r
716 (" index and left subtree count before adjustment: %d, %d\n",
\r
717 index, n->counts[ki - 1]));
\r
718 index += n->counts[ki - 1];
\r
719 n->counts[ki - 1] = countnode234(sib);
\r
720 index -= n->counts[ki - 1];
\r
722 (" index and left subtree count after adjustment: %d, %d\n",
\r
723 index, n->counts[ki - 1]));
\r
724 } else if (ki < 3 && n->kids[ki + 1]
\r
725 && n->kids[ki + 1]->elems[1]) {
\r
727 * Case 3a, right-handed variant. ki has only
\r
728 * one element but ki+1 has two or more. Move a
\r
729 * subtree from ki+1 to ki.
\r
733 * a A b c C d D e [more] a A b B c d D e [more]
\r
735 node234 *sib = n->kids[ki + 1];
\r
737 sub->elems[1] = n->elems[ki];
\r
738 sub->kids[2] = sib->kids[0];
\r
739 sub->counts[2] = sib->counts[0];
\r
741 sub->kids[2]->parent = sub;
\r
742 n->elems[ki] = sib->elems[0];
\r
743 sib->kids[0] = sib->kids[1];
\r
744 sib->counts[0] = sib->counts[1];
\r
745 for (j = 0; j < 2 && sib->elems[j + 1]; j++) {
\r
746 sib->kids[j + 1] = sib->kids[j + 2];
\r
747 sib->counts[j + 1] = sib->counts[j + 2];
\r
748 sib->elems[j] = sib->elems[j + 1];
\r
750 sib->kids[j + 1] = NULL;
\r
751 sib->counts[j + 1] = 0;
\r
752 sib->elems[j] = NULL;
\r
753 n->counts[ki] = countnode234(sub);
\r
754 n->counts[ki + 1] = countnode234(sib);
\r
755 LOG((" case 3a right\n"));
\r
758 * Case 3b. ki has only one element, and has no
\r
759 * neighbour with more than one. So pick a
\r
760 * neighbour and merge it with ki, taking an
\r
761 * element down from n to go in the middle.
\r
765 * a A b c C d a A b B c C d
\r
767 * (Since at all points we have avoided
\r
768 * descending to a node with only one element,
\r
769 * we can be sure that n is not reduced to
\r
770 * nothingness by this move, _unless_ it was
\r
771 * the very first node, ie the root of the
\r
772 * tree. In that case we remove the now-empty
\r
773 * root and replace it with its single large
\r
781 index += n->counts[ki] + 1;
\r
784 sub = n->kids[ki + 1];
\r
786 sub->kids[3] = sub->kids[1];
\r
787 sub->counts[3] = sub->counts[1];
\r
788 sub->elems[2] = sub->elems[0];
\r
789 sub->kids[2] = sub->kids[0];
\r
790 sub->counts[2] = sub->counts[0];
\r
791 sub->elems[1] = n->elems[ki];
\r
792 sub->kids[1] = sib->kids[1];
\r
793 sub->counts[1] = sib->counts[1];
\r
795 sub->kids[1]->parent = sub;
\r
796 sub->elems[0] = sib->elems[0];
\r
797 sub->kids[0] = sib->kids[0];
\r
798 sub->counts[0] = sib->counts[0];
\r
800 sub->kids[0]->parent = sub;
\r
802 n->counts[ki + 1] = countnode234(sub);
\r
807 * That's built the big node in sub. Now we
\r
808 * need to remove the reference to sib in n.
\r
810 for (j = ki; j < 3 && n->kids[j + 1]; j++) {
\r
811 n->kids[j] = n->kids[j + 1];
\r
812 n->counts[j] = n->counts[j + 1];
\r
813 n->elems[j] = j < 2 ? n->elems[j + 1] : NULL;
\r
818 n->elems[j] = NULL;
\r
819 LOG((" case 3b ki=%d\n", ki));
\r
821 if (!n->elems[0]) {
\r
823 * The root is empty and needs to be
\r
826 LOG((" shifting root!\n"));
\r
828 sub->parent = NULL;
\r
836 retval = n->elems[ei];
\r
839 return NULL; /* although this shouldn't happen */
\r
842 * Treat special case: this is the one remaining item in
\r
843 * the tree. n is the tree root (no parent), has one
\r
844 * element (no elems[1]), and has no kids (no kids[0]).
\r
846 if (!n->parent && !n->elems[1] && !n->kids[0]) {
\r
847 LOG((" removed last element in tree\n"));
\r
854 * Now we have the element we want, as n->elems[ei], and we
\r
855 * have also arranged for that element not to be the only
\r
856 * one in its node. So...
\r
859 if (!n->kids[0] && n->elems[1]) {
\r
861 * Case 1. n is a leaf node with more than one element,
\r
862 * so it's _really easy_. Just delete the thing and
\r
866 LOG((" case 1\n"));
\r
867 for (i = ei; i < 2 && n->elems[i + 1]; i++)
\r
868 n->elems[i] = n->elems[i + 1];
\r
869 n->elems[i] = NULL;
\r
871 * Having done that to the leaf node, we now go back up
\r
872 * the tree fixing the counts.
\r
874 while (n->parent) {
\r
876 childnum = (n->parent->kids[0] == n ? 0 :
\r
877 n->parent->kids[1] == n ? 1 :
\r
878 n->parent->kids[2] == n ? 2 : 3);
\r
879 n->parent->counts[childnum]--;
\r
882 return retval; /* finished! */
\r
883 } else if (n->kids[ei]->elems[1]) {
\r
885 * Case 2a. n is an internal node, and the root of the
\r
886 * subtree to the left of e has more than one element.
\r
887 * So find the predecessor p to e (ie the largest node
\r
888 * in that subtree), place it where e currently is, and
\r
889 * then start the deletion process over again on the
\r
890 * subtree with p as target.
\r
892 node234 *m = n->kids[ei];
\r
894 LOG((" case 2a\n"));
\r
895 while (m->kids[0]) {
\r
896 m = (m->kids[3] ? m->kids[3] :
\r
897 m->kids[2] ? m->kids[2] :
\r
898 m->kids[1] ? m->kids[1] : m->kids[0]);
\r
900 target = (m->elems[2] ? m->elems[2] :
\r
901 m->elems[1] ? m->elems[1] : m->elems[0]);
\r
902 n->elems[ei] = target;
\r
903 index = n->counts[ei] - 1;
\r
905 } else if (n->kids[ei + 1]->elems[1]) {
\r
907 * Case 2b, symmetric to 2a but s/left/right/ and
\r
908 * s/predecessor/successor/. (And s/largest/smallest/).
\r
910 node234 *m = n->kids[ei + 1];
\r
912 LOG((" case 2b\n"));
\r
913 while (m->kids[0]) {
\r
916 target = m->elems[0];
\r
917 n->elems[ei] = target;
\r
918 n = n->kids[ei + 1];
\r
922 * Case 2c. n is an internal node, and the subtrees to
\r
923 * the left and right of e both have only one element.
\r
924 * So combine the two subnodes into a single big node
\r
925 * with their own elements on the left and right and e
\r
926 * in the middle, then restart the deletion process on
\r
927 * that subtree, with e still as target.
\r
929 node234 *a = n->kids[ei], *b = n->kids[ei + 1];
\r
932 LOG((" case 2c\n"));
\r
933 a->elems[1] = n->elems[ei];
\r
934 a->kids[2] = b->kids[0];
\r
935 a->counts[2] = b->counts[0];
\r
937 a->kids[2]->parent = a;
\r
938 a->elems[2] = b->elems[0];
\r
939 a->kids[3] = b->kids[1];
\r
940 a->counts[3] = b->counts[1];
\r
942 a->kids[3]->parent = a;
\r
944 n->counts[ei] = countnode234(a);
\r
946 * That's built the big node in a, and destroyed b. Now
\r
947 * remove the reference to b (and e) in n.
\r
949 for (j = ei; j < 2 && n->elems[j + 1]; j++) {
\r
950 n->elems[j] = n->elems[j + 1];
\r
951 n->kids[j + 1] = n->kids[j + 2];
\r
952 n->counts[j + 1] = n->counts[j + 2];
\r
954 n->elems[j] = NULL;
\r
955 n->kids[j + 1] = NULL;
\r
956 n->counts[j + 1] = 0;
\r
958 * It's possible, in this case, that we've just removed
\r
959 * the only element in the root of the tree. If so,
\r
962 if (n->elems[0] == NULL) {
\r
963 LOG((" shifting root!\n"));
\r
969 * Now go round the deletion process again, with n
\r
970 * pointing at the new big node and e still the same.
\r
973 index = a->counts[0] + a->counts[1] + 1;
\r
977 void *delpos234(tree234 * t, int index)
\r
979 if (index < 0 || index >= countnode234(t->root))
\r
981 return delpos234_internal(t, index);
\r
983 void *del234(tree234 * t, void *e)
\r
986 if (!findrelpos234(t, e, NULL, REL234_EQ, &index))
\r
987 return NULL; /* it wasn't in there anyway */
\r
988 return delpos234_internal(t, index); /* it's there; delete it. */
\r
994 * Test code for the 2-3-4 tree. This code maintains an alternative
\r
995 * representation of the data in the tree, in an array (using the
\r
996 * obvious and slow insert and delete functions). After each tree
\r
997 * operation, the verify() function is called, which ensures all
\r
998 * the tree properties are preserved:
\r
999 * - node->child->parent always equals node
\r
1000 * - tree->root->parent always equals NULL
\r
1001 * - number of kids == 0 or number of elements + 1;
\r
1002 * - tree has the same depth everywhere
\r
1003 * - every node has at least one element
\r
1004 * - subtree element counts are accurate
\r
1005 * - any NULL kid pointer is accompanied by a zero count
\r
1006 * - in a sorted tree: ordering property between elements of a
\r
1007 * node and elements of its children is preserved
\r
1008 * and also ensures the list represented by the tree is the same
\r
1009 * list it should be. (This last check also doubly verifies the
\r
1010 * ordering properties, because the `same list it should be' is by
\r
1011 * definition correctly ordered. It also ensures all nodes are
\r
1012 * distinct, because the enum functions would get caught in a loop
\r
1016 #include <stdarg.h>
\r
1019 * Error reporting function.
\r
1021 void error(char *fmt, ...)
\r
1024 printf("ERROR: ");
\r
1025 va_start(ap, fmt);
\r
1026 vfprintf(stdout, fmt, ap);
\r
1031 /* The array representation of the data. */
\r
1033 int arraylen, arraysize;
\r
1036 /* The tree representation of the same data. */
\r
1044 int chknode(chkctx * ctx, int level, node234 * node,
\r
1045 void *lowbound, void *highbound)
\r
1047 int nkids, nelems;
\r
1051 /* Count the non-NULL kids. */
\r
1052 for (nkids = 0; nkids < 4 && node->kids[nkids]; nkids++);
\r
1053 /* Ensure no kids beyond the first NULL are non-NULL. */
\r
1054 for (i = nkids; i < 4; i++)
\r
1055 if (node->kids[i]) {
\r
1056 error("node %p: nkids=%d but kids[%d] non-NULL",
\r
1058 } else if (node->counts[i]) {
\r
1059 error("node %p: kids[%d] NULL but count[%d]=%d nonzero",
\r
1060 node, i, i, node->counts[i]);
\r
1063 /* Count the non-NULL elements. */
\r
1064 for (nelems = 0; nelems < 3 && node->elems[nelems]; nelems++);
\r
1065 /* Ensure no elements beyond the first NULL are non-NULL. */
\r
1066 for (i = nelems; i < 3; i++)
\r
1067 if (node->elems[i]) {
\r
1068 error("node %p: nelems=%d but elems[%d] non-NULL",
\r
1074 * If nkids==0, this is a leaf node; verify that the tree
\r
1075 * depth is the same everywhere.
\r
1077 if (ctx->treedepth < 0)
\r
1078 ctx->treedepth = level; /* we didn't know the depth yet */
\r
1079 else if (ctx->treedepth != level)
\r
1080 error("node %p: leaf at depth %d, previously seen depth %d",
\r
1081 node, level, ctx->treedepth);
\r
1084 * If nkids != 0, then it should be nelems+1, unless nelems
\r
1085 * is 0 in which case nkids should also be 0 (and so we
\r
1086 * shouldn't be in this condition at all).
\r
1088 int shouldkids = (nelems ? nelems + 1 : 0);
\r
1089 if (nkids != shouldkids) {
\r
1090 error("node %p: %d elems should mean %d kids but has %d",
\r
1091 node, nelems, shouldkids, nkids);
\r
1096 * nelems should be at least 1.
\r
1098 if (nelems == 0) {
\r
1099 error("node %p: no elems", node, nkids);
\r
1103 * Add nelems to the running element count of the whole tree.
\r
1105 ctx->elemcount += nelems;
\r
1108 * Check ordering property: all elements should be strictly >
\r
1109 * lowbound, strictly < highbound, and strictly < each other in
\r
1110 * sequence. (lowbound and highbound are NULL at edges of tree
\r
1111 * - both NULL at root node - and NULL is considered to be <
\r
1112 * everything and > everything. IYSWIM.)
\r
1115 for (i = -1; i < nelems; i++) {
\r
1116 void *lower = (i == -1 ? lowbound : node->elems[i]);
\r
1118 (i + 1 == nelems ? highbound : node->elems[i + 1]);
\r
1119 if (lower && higher && cmp(lower, higher) >= 0) {
\r
1120 error("node %p: kid comparison [%d=%s,%d=%s] failed",
\r
1121 node, i, lower, i + 1, higher);
\r
1127 * Check parent pointers: all non-NULL kids should have a
\r
1128 * parent pointer coming back to this node.
\r
1130 for (i = 0; i < nkids; i++)
\r
1131 if (node->kids[i]->parent != node) {
\r
1132 error("node %p kid %d: parent ptr is %p not %p",
\r
1133 node, i, node->kids[i]->parent, node);
\r
1138 * Now (finally!) recurse into subtrees.
\r
1142 for (i = 0; i < nkids; i++) {
\r
1143 void *lower = (i == 0 ? lowbound : node->elems[i - 1]);
\r
1144 void *higher = (i >= nelems ? highbound : node->elems[i]);
\r
1146 chknode(ctx, level + 1, node->kids[i], lower, higher);
\r
1147 if (node->counts[i] != subcount) {
\r
1148 error("node %p kid %d: count says %d, subtree really has %d",
\r
1149 node, i, node->counts[i], subcount);
\r
1151 count += subcount;
\r
1163 ctx.treedepth = -1; /* depth unknown yet */
\r
1164 ctx.elemcount = 0; /* no elements seen yet */
\r
1166 * Verify validity of tree properties.
\r
1169 if (tree->root->parent != NULL)
\r
1170 error("root->parent is %p should be null", tree->root->parent);
\r
1171 chknode(&ctx, 0, tree->root, NULL, NULL);
\r
1173 printf("tree depth: %d\n", ctx.treedepth);
\r
1175 * Enumerate the tree and ensure it matches up to the array.
\r
1177 for (i = 0; NULL != (p = index234(tree, i)); i++) {
\r
1178 if (i >= arraylen)
\r
1179 error("tree contains more than %d elements", arraylen);
\r
1180 if (array[i] != p)
\r
1181 error("enum at position %d: array says %s, tree says %s",
\r
1184 if (ctx.elemcount != i) {
\r
1185 error("tree really contains %d elements, enum gave %d",
\r
1186 ctx.elemcount, i);
\r
1188 if (i < arraylen) {
\r
1189 error("enum gave only %d elements, array has %d", i, arraylen);
\r
1191 i = count234(tree);
\r
1192 if (ctx.elemcount != i) {
\r
1193 error("tree really contains %d elements, count234 gave %d",
\r
1194 ctx.elemcount, i);
\r
1198 void internal_addtest(void *elem, int index, void *realret)
\r
1203 if (arraysize < arraylen + 1) {
\r
1204 arraysize = arraylen + 1 + 256;
\r
1205 array = sresize(array, arraysize, void *);
\r
1209 /* now i points to the first element >= elem */
\r
1210 retval = elem; /* expect elem returned (success) */
\r
1211 for (j = arraylen; j > i; j--)
\r
1212 array[j] = array[j - 1];
\r
1213 array[i] = elem; /* add elem to array */
\r
1216 if (realret != retval) {
\r
1217 error("add: retval was %p expected %p", realret, retval);
\r
1223 void addtest(void *elem)
\r
1228 realret = add234(tree, elem);
\r
1231 while (i < arraylen && cmp(elem, array[i]) > 0)
\r
1233 if (i < arraylen && !cmp(elem, array[i])) {
\r
1234 void *retval = array[i]; /* expect that returned not elem */
\r
1235 if (realret != retval) {
\r
1236 error("add: retval was %p expected %p", realret, retval);
\r
1239 internal_addtest(elem, i, realret);
\r
1242 void addpostest(void *elem, int i)
\r
1246 realret = addpos234(tree, elem, i);
\r
1248 internal_addtest(elem, i, realret);
\r
1251 void delpostest(int i)
\r
1254 void *elem = array[i], *ret;
\r
1256 /* i points to the right element */
\r
1257 while (i < arraylen - 1) {
\r
1258 array[i] = array[i + 1];
\r
1261 arraylen--; /* delete elem from array */
\r
1264 ret = del234(tree, elem);
\r
1266 ret = delpos234(tree, index);
\r
1268 if (ret != elem) {
\r
1269 error("del returned %p, expected %p", ret, elem);
\r
1275 void deltest(void *elem)
\r
1280 while (i < arraylen && cmp(elem, array[i]) > 0)
\r
1282 if (i >= arraylen || cmp(elem, array[i]) != 0)
\r
1283 return; /* don't do it! */
\r
1287 /* A sample data set and test utility. Designed for pseudo-randomness,
\r
1288 * and yet repeatability. */
\r
1291 * This random number generator uses the `portable implementation'
\r
1292 * given in ANSI C99 draft N869. It assumes `unsigned' is 32 bits;
\r
1293 * change it if not.
\r
1295 int randomnumber(unsigned *seed)
\r
1297 *seed *= 1103515245;
\r
1299 return ((*seed) / 65536) % 32768;
\r
1302 int mycmp(void *av, void *bv)
\r
1304 char const *a = (char const *) av;
\r
1305 char const *b = (char const *) bv;
\r
1306 return strcmp(a, b);
\r
1309 #define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
\r
1311 char *strings[] = {
\r
1312 "a", "ab", "absque", "coram", "de",
\r
1313 "palam", "clam", "cum", "ex", "e",
\r
1314 "sine", "tenus", "pro", "prae",
\r
1315 "banana", "carrot", "cabbage", "broccoli", "onion", "zebra",
\r
1316 "penguin", "blancmange", "pangolin", "whale", "hedgehog",
\r
1317 "giraffe", "peanut", "bungee", "foo", "bar", "baz", "quux",
\r
1318 "murfl", "spoo", "breen", "flarn", "octothorpe",
\r
1319 "snail", "tiger", "elephant", "octopus", "warthog", "armadillo",
\r
1320 "aardvark", "wyvern", "dragon", "elf", "dwarf", "orc", "goblin",
\r
1321 "pixie", "basilisk", "warg", "ape", "lizard", "newt", "shopkeeper",
\r
1322 "wand", "ring", "amulet"
\r
1325 #define NSTR lenof(strings)
\r
1327 int findtest(void)
\r
1329 const static int rels[] = {
\r
1330 REL234_EQ, REL234_GE, REL234_LE, REL234_LT, REL234_GT
\r
1332 const static char *const relnames[] = {
\r
1333 "EQ", "GE", "LE", "LT", "GT"
\r
1335 int i, j, rel, index;
\r
1336 char *p, *ret, *realret, *realret2;
\r
1337 int lo, hi, mid, c;
\r
1339 for (i = 0; i < NSTR; i++) {
\r
1341 for (j = 0; j < sizeof(rels) / sizeof(*rels); j++) {
\r
1345 hi = arraylen - 1;
\r
1346 while (lo <= hi) {
\r
1347 mid = (lo + hi) / 2;
\r
1348 c = strcmp(p, array[mid]);
\r
1358 if (rel == REL234_LT)
\r
1359 ret = (mid > 0 ? array[--mid] : NULL);
\r
1360 else if (rel == REL234_GT)
\r
1361 ret = (mid < arraylen - 1 ? array[++mid] : NULL);
\r
1365 assert(lo == hi + 1);
\r
1366 if (rel == REL234_LT || rel == REL234_LE) {
\r
1368 ret = (hi >= 0 ? array[hi] : NULL);
\r
1369 } else if (rel == REL234_GT || rel == REL234_GE) {
\r
1371 ret = (lo < arraylen ? array[lo] : NULL);
\r
1376 realret = findrelpos234(tree, p, NULL, rel, &index);
\r
1377 if (realret != ret) {
\r
1378 error("find(\"%s\",%s) gave %s should be %s",
\r
1379 p, relnames[j], realret, ret);
\r
1381 if (realret && index != mid) {
\r
1382 error("find(\"%s\",%s) gave %d should be %d",
\r
1383 p, relnames[j], index, mid);
\r
1385 if (realret && rel == REL234_EQ) {
\r
1386 realret2 = index234(tree, index);
\r
1387 if (realret2 != realret) {
\r
1388 error("find(\"%s\",%s) gave %s(%d) but %d -> %s",
\r
1389 p, relnames[j], realret, index, index, realret2);
\r
1393 printf("find(\"%s\",%s) gave %s(%d)\n", p, relnames[j],
\r
1399 realret = findrelpos234(tree, NULL, NULL, REL234_GT, &index);
\r
1400 if (arraylen && (realret != array[0] || index != 0)) {
\r
1401 error("find(NULL,GT) gave %s(%d) should be %s(0)",
\r
1402 realret, index, array[0]);
\r
1403 } else if (!arraylen && (realret != NULL)) {
\r
1404 error("find(NULL,GT) gave %s(%d) should be NULL", realret, index);
\r
1407 realret = findrelpos234(tree, NULL, NULL, REL234_LT, &index);
\r
1409 && (realret != array[arraylen - 1] || index != arraylen - 1)) {
\r
1410 error("find(NULL,LT) gave %s(%d) should be %s(0)", realret, index,
\r
1411 array[arraylen - 1]);
\r
1412 } else if (!arraylen && (realret != NULL)) {
\r
1413 error("find(NULL,LT) gave %s(%d) should be NULL", realret, index);
\r
1421 unsigned seed = 0;
\r
1423 for (i = 0; i < NSTR; i++)
\r
1426 arraylen = arraysize = 0;
\r
1427 tree = newtree234(mycmp);
\r
1431 for (i = 0; i < 10000; i++) {
\r
1432 j = randomnumber(&seed);
\r
1434 printf("trial: %d\n", i);
\r
1436 printf("deleting %s (%d)\n", strings[j], j);
\r
1437 deltest(strings[j]);
\r
1440 printf("adding %s (%d)\n", strings[j], j);
\r
1441 addtest(strings[j]);
\r
1447 while (arraylen > 0) {
\r
1448 j = randomnumber(&seed);
\r
1450 deltest(array[j]);
\r
1453 freetree234(tree);
\r
1456 * Now try an unsorted tree. We don't really need to test
\r
1457 * delpos234 because we know del234 is based on it, so it's
\r
1458 * already been tested in the above sorted-tree code; but for
\r
1459 * completeness we'll use it to tear down our unsorted tree
\r
1460 * once we've built it.
\r
1462 tree = newtree234(NULL);
\r
1465 for (i = 0; i < 1000; i++) {
\r
1466 printf("trial: %d\n", i);
\r
1467 j = randomnumber(&seed);
\r
1469 k = randomnumber(&seed);
\r
1470 k %= count234(tree) + 1;
\r
1471 printf("adding string %s at index %d\n", strings[j], k);
\r
1472 addpostest(strings[j], k);
\r
1474 while (count234(tree) > 0) {
\r
1475 printf("cleanup: tree size %d\n", count234(tree));
\r
1476 j = randomnumber(&seed);
\r
1477 j %= count234(tree);
\r
1478 printf("deleting string %s from index %d\n",
\r
1479 (const char *)array[j], j);
\r