string constants are of type const char * (not char *)
[suif.git] / src / basesuif / suif1 / tree_string_index.cc
blobeb59d7510ce2f3f40dda525b534bff6376c430ff
1 /* file "tree_string_index.cc" */
3 /* Copyright (c) 1995 Stanford University
5 All rights reserved.
7 This software is provided under the terms described in
8 the "suif_copyright.h" include file. */
10 #include <suif_copyright.h>
12 #define _MODULE_ "libsuif.a"
14 #pragma implementation "string_index.h"
15 #pragma implementation "tree_string_index.h"
17 #define RCS_BASE_FILE tree_string_index_cc
19 #include "suif1.h"
20 #include <string.h>
22 RCS_BASE(
23 "$Id: tree_string_index.cc,v 1.2 1999/08/25 03:29:17 brm Exp $")
25 enum tsi_node_kind { TSI_SUBSTRING, TSI_CHAR_LIST, TSI_CHAR_TABLE, TSI_LEAF };
28 class tsi_node
30 friend class tsi_substring;
31 friend class tsi_char_list;
32 friend class tsi_char_table;
33 friend class tsi_leaf;
35 private:
36 /* We make explicit copy constructor and assignment operator and
37 * make them private to foil C++'s automatic default versions. */
38 tsi_node(const tsi_node &) { assert(FALSE); }
39 void operator=(const tsi_node &) { assert(FALSE); }
41 si_entry *the_entry;
43 protected:
44 tsi_node *parent_node;
45 char parent_index;
47 tsi_node(void) : the_entry(0), parent_node(0), parent_index(0)
48 { }
50 public:
51 virtual ~tsi_node(void) { }
53 virtual tsi_node_kind kind(void) = 0;
55 void replace(tsi_node *replacement);
56 void remove(void);
58 si_entry *base_entry(void) { return the_entry; }
59 void replace_base_entry(si_entry *new_entry, tree_string_index *the_tsi);
60 tsi_node *find_branchpoint(void);
63 class tsi_substring : public tsi_node
65 private:
66 char *the_chars;
67 tsi_node *the_child;
69 public:
70 tsi_substring(unsigned long char_count, const char *init_chars,
71 tsi_node *init_child);
72 ~tsi_substring(void);
74 tsi_node_kind kind(void) { return TSI_SUBSTRING; }
76 char *chars(void) { return the_chars; }
77 tsi_node *child(void) { return the_child; }
79 void replace_child(tsi_node *new_child);
82 class tsi_char_list : public tsi_node
84 private:
85 unsigned long table_size;
86 unsigned long num_entries;
87 tsi_node **the_children;
88 char *the_chars;
90 public:
91 tsi_char_list(unsigned long init_table_size);
92 ~tsi_char_list(void);
94 tsi_node_kind kind(void) { return TSI_CHAR_LIST; }
96 unsigned long count(void) { return num_entries; }
97 tsi_node *child(unsigned long child_num)
98 { assert(child_num < num_entries); return the_children[child_num]; }
99 char which_char(unsigned long child_num)
100 { assert(child_num < num_entries); return the_chars[child_num]; }
102 void add_child(unsigned long child_num, tsi_node *new_child,
103 char this_char);
104 void remove_child(unsigned long child_num);
105 void replace_child(unsigned long child_num, tsi_node *new_child);
108 class tsi_char_table : public tsi_node
110 private:
111 tsi_node **the_children;
112 char first_char;
113 char last_char;
115 public:
116 tsi_char_table(char init_lower, char init_upper);
117 ~tsi_char_table(void);
119 tsi_node_kind kind(void) { return TSI_CHAR_TABLE; }
121 tsi_node *child(char which_char)
123 if ((which_char >= first_char) && (which_char <= last_char))
124 return the_children[which_char - (long)first_char];
125 else
126 return NULL;
128 char lower_char(void) { return first_char; }
129 char upper_char(void) { return last_char; }
131 void replace_child(char which_char, tsi_node *new_child);
134 class tsi_leaf : public tsi_node
136 public:
137 tsi_leaf(si_entry *init_entry, tree_string_index *the_tsi)
138 { replace_base_entry(init_entry, the_tsi); }
139 ~tsi_leaf(void) { }
141 tsi_node_kind kind(void) { return TSI_LEAF; }
145 void tsi_node::replace(tsi_node *replacement)
147 assert(parent_node != NULL);
148 switch (parent_node->kind())
150 case TSI_SUBSTRING:
152 tsi_substring *par_substring = (tsi_substring *)parent_node;
153 assert(par_substring->child() == this);
154 par_substring->replace_child(replacement);
155 break;
157 case TSI_CHAR_LIST:
159 tsi_char_list *par_char_list = (tsi_char_list *)parent_node;
160 unsigned long count = par_char_list->count();
161 for (unsigned long child_num = 0; child_num < count; ++child_num)
163 if (par_char_list->child(child_num) == this)
165 par_char_list->replace_child(child_num, replacement);
166 return;
169 assert(FALSE);
170 break;
172 case TSI_CHAR_TABLE:
174 tsi_char_table *par_char_table = (tsi_char_table *)parent_node;
175 assert(par_char_table->child(parent_index) == this);
176 par_char_table->replace_child(parent_index, replacement);
177 break;
179 case TSI_LEAF:
180 assert(FALSE);
181 default:
182 assert(FALSE);
186 void tsi_node::remove(void)
188 assert(parent_node != NULL);
189 switch (parent_node->kind())
191 case TSI_SUBSTRING:
193 tsi_substring *par_substring = (tsi_substring *)parent_node;
194 assert(par_substring->child() == this);
195 par_substring->replace_child(NULL);
196 break;
198 case TSI_CHAR_LIST:
200 tsi_char_list *par_char_list = (tsi_char_list *)parent_node;
201 unsigned long count = par_char_list->count();
202 for (unsigned long child_num = 0; child_num < count; ++child_num)
204 if (par_char_list->child(child_num) == this)
206 par_char_list->remove_child(child_num);
207 return;
210 assert(FALSE);
211 break;
213 case TSI_CHAR_TABLE:
215 tsi_char_table *par_char_table = (tsi_char_table *)parent_node;
216 assert(par_char_table->child(parent_index) == this);
217 par_char_table->replace_child(parent_index, NULL);
218 break;
220 case TSI_LEAF:
221 assert(FALSE);
222 default:
223 assert(FALSE);
227 void tsi_node::replace_base_entry(si_entry *new_entry,
228 tree_string_index *the_tsi)
230 if (the_entry != NULL)
232 assert(the_tsi->get_back_pointer(the_entry) == this);
233 the_tsi->set_back_pointer(the_entry, NULL);
235 the_entry = new_entry;
236 if (new_entry != NULL)
238 assert(the_tsi->get_back_pointer(new_entry) == NULL);
239 the_tsi->set_back_pointer(new_entry, this);
243 tsi_node *tsi_node::find_branchpoint(void)
245 if (parent_node == NULL)
246 return this;
247 if (parent_node->kind() != TSI_SUBSTRING)
248 return this;
249 tsi_substring *parent_substring = (tsi_substring *)parent_node;
250 if (parent_substring->base_entry() != NULL)
251 return parent_substring;
252 return parent_substring->find_branchpoint();
256 tsi_substring::tsi_substring(unsigned long char_count, const char *init_chars,
257 tsi_node *init_child)
258 : the_chars(new char[char_count + 1]), the_child(init_child)
260 assert((init_child == NULL) || (init_child->parent_node == NULL));
261 memcpy(the_chars, init_chars, char_count);
262 the_chars[char_count] = 0;
263 if (init_child != NULL)
264 init_child->parent_node = this;
267 tsi_substring::~tsi_substring(void)
269 delete[] the_chars;
270 if (the_child != NULL)
271 delete the_child;
274 void tsi_substring::replace_child(tsi_node *new_child)
276 assert((new_child == NULL) || (new_child->parent_node == NULL));
277 if (the_child != NULL)
278 the_child->parent_node = NULL;
279 the_child = new_child;
280 if (new_child != NULL)
281 new_child->parent_node = this;
285 tsi_char_list::tsi_char_list(unsigned long init_table_size)
286 : table_size(init_table_size), num_entries(0),
287 the_children(new tsi_node *[init_table_size]),
288 the_chars(new char[init_table_size])
290 table_size = init_table_size;
291 num_entries = 0;
294 tsi_char_list::~tsi_char_list(void)
296 for (unsigned long entry_num = 0; entry_num < num_entries; ++entry_num)
298 tsi_node *this_child = the_children[entry_num];
299 if (this_child != NULL)
300 delete this_child;
302 delete[] the_children;
303 delete[] the_chars;
306 void tsi_char_list::add_child(unsigned long child_num, tsi_node *new_child,
307 char this_char)
309 assert((new_child == NULL) || (new_child->parent_node == NULL));
310 assert(child_num <= num_entries);
311 if (num_entries == table_size)
313 tsi_node **new_child_list = new tsi_node *[table_size * 2];
314 char *new_char_list = new char[table_size * 2];
315 memcpy(new_child_list, the_children, table_size * sizeof(tsi_node *));
316 memcpy(new_char_list, the_chars, table_size * sizeof(char));
317 delete[] the_children;
318 delete[] the_chars;
319 the_children = new_child_list;
320 the_chars = new_char_list;
321 table_size *= 2;
323 if (child_num < num_entries)
325 memmove(&(the_children[child_num + 1]), &(the_children[child_num]),
326 (num_entries - child_num) * sizeof(tsi_node *));
327 memmove(&(the_chars[child_num + 1]), &(the_chars[child_num]),
328 (num_entries - child_num) * sizeof(char));
330 the_children[child_num] = new_child;
331 the_chars[child_num] = this_char;
332 ++num_entries;
333 if (new_child != NULL)
334 new_child->parent_node = this;
337 void tsi_char_list::remove_child(unsigned long child_num)
339 assert(child_num < num_entries);
340 if (the_children[child_num] != NULL)
341 the_children[child_num]->parent_node = NULL;
342 --num_entries;
343 if (child_num < num_entries)
345 memmove(&(the_children[child_num]), &(the_children[child_num + 1]),
346 (num_entries - child_num) * sizeof(tsi_node *));
347 memmove(&(the_chars[child_num]), &(the_chars[child_num + 1]),
348 (num_entries - child_num) * sizeof(char));
352 void tsi_char_list::replace_child(unsigned long child_num, tsi_node *new_child)
354 assert((new_child == NULL) || (new_child->parent_node == NULL));
355 assert(child_num < num_entries);
356 if (the_children[child_num] != NULL)
357 the_children[child_num]->parent_node = NULL;
358 the_children[child_num] = new_child;
359 if (new_child != NULL)
360 new_child->parent_node = this;
364 tsi_char_table::tsi_char_table(char init_lower, char init_upper)
365 : the_children(0), first_char(init_lower), last_char(init_upper)
367 assert(init_lower <= init_upper);
368 unsigned long init_size =
369 (unsigned long)(unsigned char)(init_upper - init_lower) + 1;
370 the_children = new tsi_node *[init_size];
371 for (char test_char = init_lower; test_char < init_upper; ++test_char)
372 the_children[test_char - (long)init_lower] = NULL;
373 the_children[init_upper - (long)init_lower] = NULL;
376 tsi_char_table::~tsi_char_table(void)
378 for (char test_char = first_char; test_char < last_char; ++test_char)
380 tsi_node *this_child = the_children[test_char - (long)first_char];
381 if (this_child != NULL)
382 delete this_child;
384 tsi_node *this_child = the_children[last_char - (long)first_char];
385 if (this_child != NULL)
386 delete this_child;
387 delete[] the_children;
390 void tsi_char_table::replace_child(char which_char, tsi_node *new_child)
392 assert((new_child == NULL) || (new_child->parent_node == NULL));
393 if (which_char < first_char)
395 unsigned long init_size =
396 (unsigned long)(unsigned char)(last_char - which_char) + 1;
397 tsi_node **new_child_list = new tsi_node *[init_size];
398 unsigned long extra =
399 (unsigned long)(unsigned char)(first_char - which_char) + 1;
400 unsigned long orig_size =
401 (unsigned long)(unsigned char)(last_char - first_char) + 1;
402 memcpy(&(new_child_list[extra]), the_children,
403 orig_size * sizeof(tsi_node *));
404 delete[] the_children;
405 the_children = new_child_list;
406 for (char test_char = which_char; test_char < first_char; ++test_char)
407 the_children[test_char - (long)which_char] = NULL;
408 first_char = which_char;
410 else if (which_char > last_char)
412 unsigned long init_size =
413 (unsigned long)(unsigned char)(which_char - first_char) + 1;
414 tsi_node **new_child_list = new tsi_node *[init_size];
415 unsigned long orig_size =
416 (unsigned long)(unsigned char)(last_char - first_char) + 1;
417 memcpy(new_child_list, the_children, orig_size * sizeof(tsi_node *));
418 delete[] the_children;
419 the_children = new_child_list;
420 for (char test_char = which_char; test_char > last_char; --test_char)
421 the_children[test_char - (long)first_char] = NULL;
422 last_char = which_char;
424 if (the_children[which_char - (long)first_char] != NULL)
425 the_children[which_char - (long)first_char]->parent_node = NULL;
426 the_children[which_char - (long)first_char] = new_child;
427 if (new_child != NULL)
429 new_child->parent_node = this;
430 new_child->parent_index = which_char;
435 tree_string_index::tree_string_index(char expected_lower, char expected_upper,
436 unsigned long list_size)
437 : root_node(0), the_list_size(list_size),
438 the_expected_lower(expected_lower),
439 the_expected_upper(expected_upper)
441 assert(list_size >= 2);
444 tree_string_index::~tree_string_index(void)
446 if (root_node != NULL)
448 remove_entries(root_node);
449 delete root_node;
453 tsi_node *tree_string_index::follow_prefix_match(const char *string,
454 unsigned long *prefix_length)
456 if (root_node == NULL)
457 return NULL;
458 unsigned long position = 0;
459 tsi_node *current_node = root_node;
460 while (string[position] != 0)
462 boolean done = FALSE;
463 switch (current_node->kind())
465 case TSI_SUBSTRING:
467 tsi_substring *the_tsi_substring =
468 (tsi_substring *)current_node;
469 char *the_chars = the_tsi_substring->chars();
470 unsigned long num_chars = strlen(the_chars);
471 if (strncmp(&(string[position]), the_chars, num_chars) == 0)
473 position += num_chars;
474 current_node = the_tsi_substring->child();
476 else
478 done = TRUE;
480 break;
482 case TSI_CHAR_LIST:
484 tsi_char_list *the_tsi_char_list =
485 (tsi_char_list *)current_node;
486 unsigned long count = the_tsi_char_list->count();
487 char this_char = string[position];
488 unsigned long child_num;
489 for (child_num = 0; child_num < count; ++child_num)
491 if (the_tsi_char_list->which_char(child_num) >= this_char)
492 break;
494 if ((child_num < count) &&
495 (the_tsi_char_list->which_char(child_num) == this_char))
497 ++position;
498 current_node = the_tsi_char_list->child(child_num);
500 else
502 done = TRUE;
504 break;
506 case TSI_CHAR_TABLE:
508 tsi_char_table *the_tsi_char_table =
509 (tsi_char_table *)current_node;
510 tsi_node *this_child =
511 the_tsi_char_table->child(string[position]);
512 if (this_child != NULL)
514 ++position;
515 current_node = this_child;
517 else
519 done = TRUE;
521 break;
523 case TSI_LEAF:
525 done = TRUE;
526 break;
528 default:
529 assert(FALSE);
531 if (done)
532 break;
534 *prefix_length = position;
535 return current_node;
538 void tree_string_index::remove_entries(tsi_node *base_node)
540 if (base_node == NULL)
541 return;
543 si_entry *base_entry = base_node->base_entry();
544 if (base_entry != NULL)
546 base_node->replace_base_entry(NULL, this);
547 destroy_entry(base_entry);
550 switch (base_node->kind())
552 case TSI_SUBSTRING:
554 tsi_substring *the_tsi_substring = (tsi_substring *)base_node;
555 remove_entries(the_tsi_substring->child());
556 break;
558 case TSI_CHAR_LIST:
560 tsi_char_list *the_tsi_char_list = (tsi_char_list *)base_node;
561 unsigned long count = the_tsi_char_list->count();
562 for (unsigned long child_num = 0; child_num < count; ++child_num)
563 remove_entries(the_tsi_char_list->child(child_num));
564 break;
566 case TSI_CHAR_TABLE:
568 tsi_char_table *the_tsi_char_table = (tsi_char_table *)base_node;
569 char lower_char = the_tsi_char_table->lower_char();
570 char upper_char = the_tsi_char_table->upper_char();
571 for (char which = lower_char; which < upper_char; ++which)
572 remove_entries(the_tsi_char_table->child(which));
573 remove_entries(the_tsi_char_table->child(upper_char));
574 break;
576 case TSI_LEAF:
577 break;
578 default:
579 assert(FALSE);
583 void tree_string_index::internal_development_dump(char **buffer,
584 unsigned long *buf_size, unsigned long position, tsi_node *base_node,
585 FILE *fp, void (*node_func)(void *data, FILE *fp),
586 boolean show_internals)
588 if (base_node == NULL)
589 return;
591 if (position >= *buf_size)
593 char *new_buffer = new char[*buf_size * 2];
594 memcpy(new_buffer, *buffer, *buf_size);
595 delete[] *buffer;
596 *buffer = new_buffer;
597 *buf_size *= 2;
600 if (show_internals)
602 fprintf(fp, "%*s", (int)(8 + 2 * position), "");
603 switch (base_node->kind())
605 case TSI_SUBSTRING:
607 tsi_substring *the_substring = (tsi_substring *)base_node;
608 fprintf(fp, "sub <\"%s\">", the_substring->chars());
609 break;
611 case TSI_CHAR_LIST:
613 tsi_char_list *the_char_list = (tsi_char_list *)base_node;
614 fprintf(fp, "list[%lu]", the_char_list->count());
615 break;
617 case TSI_CHAR_TABLE:
619 tsi_char_table *the_char_table = (tsi_char_table *)base_node;
620 fprintf(fp, "table['%c'..'%c']", the_char_table->lower_char(),
621 the_char_table->upper_char());
622 break;
624 case TSI_LEAF:
625 fprintf(fp, "leaf");
626 break;
627 default:
628 assert(FALSE);
630 fprintf(fp, "\n");
633 si_entry *base_entry = base_node->base_entry();
634 if (base_entry != NULL)
636 (*buffer)[position] = 0;
637 fprintf(fp, " (%p) \"%s\":", (void*)base_entry, *buffer);
638 if (node_func != NULL)
639 (*node_func)(base_entry->data_value(), fp);
640 else
641 fprintf(fp, " [%p]", base_entry->data_value());
642 fprintf(fp, "\n");
645 switch (base_node->kind())
647 case TSI_SUBSTRING:
649 tsi_substring *the_tsi_substring = (tsi_substring *)base_node;
650 char *chars = the_tsi_substring->chars();
651 unsigned long string_size = strlen(chars);
652 if (position + string_size + 1 >= *buf_size)
654 char *new_buffer =
655 new char[*buf_size + position + string_size + 1];
656 memcpy(new_buffer, *buffer, *buf_size);
657 delete[] *buffer;
658 *buffer = new_buffer;
659 *buf_size += position + string_size + 1;
661 strcpy(&((*buffer)[position]), chars);
662 internal_development_dump(buffer, buf_size, position + string_size,
663 the_tsi_substring->child(), fp,
664 node_func, show_internals);
665 break;
667 case TSI_CHAR_LIST:
669 tsi_char_list *the_tsi_char_list = (tsi_char_list *)base_node;
670 unsigned long count = the_tsi_char_list->count();
671 for (unsigned long child_num = 0; child_num < count; ++child_num)
673 (*buffer)[position] = the_tsi_char_list->which_char(child_num);
674 internal_development_dump(buffer, buf_size, position + 1,
675 the_tsi_char_list->child(child_num),
676 fp, node_func, show_internals);
678 break;
680 case TSI_CHAR_TABLE:
682 tsi_char_table *the_tsi_char_table = (tsi_char_table *)base_node;
683 char lower_char = the_tsi_char_table->lower_char();
684 char upper_char = the_tsi_char_table->upper_char();
685 for (char which = lower_char; which < upper_char; ++which)
687 (*buffer)[position] = which;
688 internal_development_dump(buffer, buf_size, position + 1,
689 the_tsi_char_table->child(which), fp,
690 node_func, show_internals);
692 (*buffer)[position] = upper_char;
693 internal_development_dump(buffer, buf_size, position + 1,
694 the_tsi_char_table->child(upper_char),
695 fp, node_func, show_internals);
696 break;
698 case TSI_LEAF:
699 break;
700 default:
701 assert(FALSE);
705 void tree_string_index::get_development_stats(tsi_node *base_node,
706 unsigned long *entry_count, unsigned long *substring_node_count,
707 unsigned long *char_list_node_count,
708 unsigned long *char_table_node_count, unsigned long *leaf_node_count,
709 unsigned long *substring_char_count,
710 unsigned long *char_list_place_count,
711 unsigned long *char_list_used_count,
712 unsigned long *char_table_place_count,
713 unsigned long *char_table_used_count)
715 assert(base_node != NULL);
716 if (base_node->base_entry() != NULL)
717 ++*entry_count;
719 switch (base_node->kind())
721 case TSI_SUBSTRING:
723 tsi_substring *the_tsi_substring = (tsi_substring *)base_node;
724 ++*substring_node_count;
725 *substring_char_count += strlen(the_tsi_substring->chars());
726 get_development_stats(the_tsi_substring->child(), entry_count,
727 substring_node_count, char_list_node_count,
728 char_table_node_count, leaf_node_count,
729 substring_char_count, char_list_place_count,
730 char_list_used_count, char_table_place_count,
731 char_table_used_count);
732 break;
734 case TSI_CHAR_LIST:
736 tsi_char_list *the_tsi_char_list = (tsi_char_list *)base_node;
737 ++*char_list_node_count;
738 unsigned long count = the_tsi_char_list->count();
739 *char_list_place_count += the_list_size;
740 *char_list_used_count += count;
741 for (unsigned long child_num = 0; child_num < count; ++child_num)
743 get_development_stats(the_tsi_char_list->child(child_num),
744 entry_count, substring_node_count,
745 char_list_node_count,
746 char_table_node_count, leaf_node_count,
747 substring_char_count,
748 char_list_place_count,
749 char_list_used_count,
750 char_table_place_count,
751 char_table_used_count);
753 break;
755 case TSI_CHAR_TABLE:
757 tsi_char_table *the_tsi_char_table = (tsi_char_table *)base_node;
758 ++*char_table_node_count;
759 char lower_char = the_tsi_char_table->lower_char();
760 char upper_char = the_tsi_char_table->upper_char();
761 *char_table_place_count +=
762 ((unsigned long)(upper_char - lower_char)) + 1;
763 for (char which = lower_char; which < upper_char; ++which)
765 tsi_node *this_child = the_tsi_char_table->child(which);
766 if (this_child != NULL)
768 get_development_stats(this_child, entry_count,
769 substring_node_count,
770 char_list_node_count,
771 char_table_node_count,
772 leaf_node_count,
773 substring_char_count,
774 char_list_place_count,
775 char_list_used_count,
776 char_table_place_count,
777 char_table_used_count);
778 ++*char_table_used_count;
781 tsi_node *this_child = the_tsi_char_table->child(upper_char);
782 if (this_child != NULL)
784 get_development_stats(this_child, entry_count,
785 substring_node_count,
786 char_list_node_count,
787 char_table_node_count, leaf_node_count,
788 substring_char_count,
789 char_list_place_count,
790 char_list_used_count,
791 char_table_place_count,
792 char_table_used_count);
793 ++*char_table_used_count;
795 break;
797 case TSI_LEAF:
798 ++*leaf_node_count;
799 break;
800 default:
801 assert(FALSE);
805 si_entry *tree_string_index::enter(const char *the_string, void *the_data)
807 assert(the_string != NULL);
809 if (root_node == NULL)
811 si_entry *result = create_entry(the_data);
812 tsi_leaf *new_leaf = new tsi_leaf(result, this);
813 root_node =
814 new tsi_substring(strlen(the_string), the_string, new_leaf);
815 return result;
818 unsigned long prefix_length;
819 tsi_node *the_node = follow_prefix_match(the_string, &prefix_length);
820 assert(the_node != NULL);
821 if (the_string[prefix_length] == 0)
823 if (the_node->base_entry() != NULL)
824 return NULL;
825 si_entry *result = create_entry(the_data);
826 the_node->replace_base_entry(result, this);
827 return result;
829 si_entry *result = create_entry(the_data);
830 si_entry *old_base = the_node->base_entry();
831 tsi_node *new_node;
832 switch (the_node->kind())
834 case TSI_SUBSTRING:
836 tsi_substring *the_tsi_substring = (tsi_substring *)the_node;
837 char *old_chars = the_tsi_substring->chars();
838 unsigned long match_length = 0;
839 while ((the_string[prefix_length + match_length] ==
840 old_chars[match_length]))
842 assert(old_chars[match_length] != 0);
843 ++match_length;
845 tsi_node *old_child = the_tsi_substring->child();
846 the_tsi_substring->replace_child(NULL);
847 unsigned long old_count = strlen(old_chars);
848 assert(match_length < old_count);
849 if (the_string[prefix_length + match_length] == 0)
851 tsi_substring *new_substring1 =
852 new tsi_substring(old_count - match_length,
853 &(old_chars[match_length]),
854 old_child);
855 new_substring1->replace_base_entry(result, this);
856 new_node =
857 new tsi_substring(match_length, old_chars,
858 new_substring1);
859 break;
861 if (match_length < (old_count - 1))
863 tsi_substring *new_substring =
864 new tsi_substring((old_count - 1) - match_length,
865 &(old_chars[match_length + 1]),
866 old_child);
867 old_child = new_substring;
869 tsi_char_list *new_char_list = new tsi_char_list(the_list_size);
870 tsi_node *new_child = new tsi_leaf(result, this);
871 const char *suffix = &(the_string[prefix_length + match_length + 1]);
872 if (suffix[0] != 0)
874 tsi_substring *new_substring =
875 new tsi_substring(strlen(suffix), suffix, new_child);
876 new_child = new_substring;
878 char old_char = old_chars[match_length];
879 char new_char = the_string[prefix_length + match_length];
880 if (old_char > new_char)
882 char temp_char = old_char;
883 old_char = new_char;
884 new_char = temp_char;
885 tsi_node *temp_child = old_child;
886 old_child = new_child;
887 new_child = temp_child;
889 new_char_list->add_child(0, old_child, old_char);
890 new_char_list->add_child(1, new_child, new_char);
891 new_node = new_char_list;
892 if (match_length > 0)
894 const char *prefix = &(the_string[prefix_length]);
895 new_node = new tsi_substring(match_length, prefix, new_node);
897 break;
899 case TSI_CHAR_LIST:
901 tsi_char_list *the_tsi_char_list = (tsi_char_list *)the_node;
902 unsigned long count = the_tsi_char_list->count();
903 tsi_node *new_child = new tsi_leaf(result, this);
904 const char *suffix = &(the_string[prefix_length + 1]);
905 if (*suffix != 0)
907 new_child =
908 new tsi_substring(strlen(suffix), suffix, new_child);
910 if (count == the_list_size)
912 tsi_char_table *new_table =
913 new tsi_char_table(the_expected_lower,
914 the_expected_upper);
915 for (unsigned child_ind = 0; child_ind < count; ++child_ind)
917 unsigned long child_num = (count - child_ind) - 1;
918 tsi_node *this_child = the_tsi_char_list->child(child_num);
919 char which_char = the_tsi_char_list->which_char(child_num);
920 the_tsi_char_list->remove_child(child_num);
921 new_table->replace_child(which_char, this_child);
923 assert(the_tsi_char_list->count() == 0);
924 new_table->replace_child(the_string[prefix_length], new_child);
925 new_node = new_table;
926 break;
928 else
930 char this_char = the_string[prefix_length];
931 unsigned child_num;
932 for (child_num = 0; child_num < count; ++child_num)
934 if (the_tsi_char_list->which_char(child_num) > this_char)
935 break;
937 the_tsi_char_list->add_child(child_num, new_child, this_char);
938 return result;
941 case TSI_CHAR_TABLE:
943 tsi_char_table *the_tsi_char_table = (tsi_char_table *)the_node;
944 assert(the_tsi_char_table->child(the_string[prefix_length]) ==
945 NULL);
946 tsi_node *new_child = new tsi_leaf(result, this);
947 const char *suffix = &(the_string[prefix_length + 1]);
948 if (*suffix != 0)
950 new_child =
951 new tsi_substring(strlen(suffix), suffix, new_child);
953 the_tsi_char_table->replace_child(the_string[prefix_length],
954 new_child);
955 return result;
957 case TSI_LEAF:
959 const char *suffix = &(the_string[prefix_length]);
960 tsi_leaf *new_leaf = new tsi_leaf(result, this);
961 new_node = new tsi_substring(strlen(suffix), suffix, new_leaf);
962 break;
964 default:
965 assert(FALSE);
966 return NULL;
968 if (the_node == root_node)
969 root_node = new_node;
970 else
971 the_node->replace(new_node);
972 the_node->replace_base_entry(NULL, this);
973 new_node->replace_base_entry(old_base, this);
974 delete the_node;
975 return result;
978 si_entry *tree_string_index::lookup_entry(const char *the_string)
980 assert(the_string != NULL);
981 unsigned long prefix_length;
982 tsi_node *the_node = follow_prefix_match(the_string, &prefix_length);
983 if ((the_node != NULL) && (the_string[prefix_length] == 0))
984 return the_node->base_entry();
985 else
986 return NULL;
989 boolean tree_string_index::exists(const char *the_string)
991 return (lookup_entry(the_string) != NULL);
994 void *tree_string_index::lookup(const char *the_string)
996 si_entry *the_entry = lookup_entry(the_string);
997 if (the_entry != NULL)
998 return the_entry->data_value();
999 else
1000 return NULL;
1003 void tree_string_index::remove_entry(si_entry *the_entry)
1005 assert(the_entry != NULL);
1006 tsi_node *back_pointer = (tsi_node *)(get_back_pointer(the_entry));
1007 assert(back_pointer != NULL);
1008 assert(back_pointer->base_entry() == the_entry);
1009 back_pointer->replace_base_entry(NULL, this);
1010 if (back_pointer->kind() == TSI_LEAF)
1012 tsi_node *branchpoint = back_pointer->find_branchpoint();
1013 si_entry *branchpoint_entry = branchpoint->base_entry();
1014 if ((branchpoint_entry != NULL) && (branchpoint_entry != the_entry))
1016 branchpoint->replace_base_entry(NULL, this);
1017 tsi_leaf *new_leaf = new tsi_leaf(branchpoint_entry, this);
1018 if (branchpoint == root_node)
1019 root_node = new_leaf;
1020 else
1021 branchpoint->replace(new_leaf);
1023 else
1025 if (branchpoint == root_node)
1026 root_node = NULL;
1027 else
1028 branchpoint->remove();
1030 delete branchpoint;
1032 destroy_entry(the_entry);
1035 void tree_string_index::development_dump(FILE *fp,
1036 void (*node_func)(void *data,
1037 FILE *fp))
1039 if (root_node == NULL)
1041 fprintf(fp, " <empty>\n");
1043 else
1045 unsigned long buf_size = 20;
1046 char *buffer = new char[buf_size];
1047 internal_development_dump(&buffer, &buf_size, 0, root_node, fp,
1048 node_func, FALSE);
1049 delete[] buffer;
1053 void tree_string_index::development_internals_dump(FILE *fp)
1055 if (root_node == NULL)
1057 fprintf(fp, " <empty>\n");
1059 else
1061 unsigned long buf_size = 20;
1062 char *buffer = new char[buf_size];
1063 internal_development_dump(&buffer, &buf_size, 0, root_node, fp, NULL,
1064 TRUE);
1065 delete[] buffer;
1069 void tree_string_index::development_stats_dump(FILE *fp)
1071 unsigned long entry_count = 0;
1073 unsigned long substring_node_count = 0;
1074 unsigned long char_list_node_count = 0;
1075 unsigned long char_table_node_count = 0;
1076 unsigned long leaf_node_count = 0;
1078 unsigned long substring_char_count = 0;
1079 unsigned long char_list_place_count = 0;
1080 unsigned long char_list_used_count = 0;
1081 unsigned long char_table_place_count = 0;
1082 unsigned long char_table_used_count = 0;
1084 if (root_node != NULL)
1086 get_development_stats(root_node, &entry_count, &substring_node_count,
1087 &char_list_node_count, &char_table_node_count,
1088 &leaf_node_count, &substring_char_count,
1089 &char_list_place_count, &char_list_used_count,
1090 &char_table_place_count, &char_table_used_count);
1093 unsigned long total_node_count =
1094 substring_node_count + char_list_node_count +
1095 char_table_node_count + leaf_node_count;
1096 unsigned long total_memory = sizeof(tree_string_index) +
1097 entry_count * sizeof(si_entry) +
1098 substring_node_count * sizeof(tsi_substring) +
1099 (substring_char_count + substring_node_count) * sizeof(char) +
1100 char_list_node_count * sizeof(tsi_char_list) +
1101 char_list_place_count * (sizeof(tsi_node *) + sizeof(char)) +
1102 char_table_node_count * sizeof(tsi_char_table) +
1103 char_table_place_count * sizeof(tsi_node *) +
1104 leaf_node_count * sizeof(tsi_leaf);
1106 fprintf(fp,
1107 " Substring nodes: %20lu\n"
1108 " Chars in substrings: %20lu\n"
1109 " Char list nodes: %20lu\n"
1110 " Char list spaces: %20lu\n"
1111 " Char list spaces occupied: %20lu (%5.1f%%)\n"
1112 " Char table nodes: %20lu\n"
1113 " Char table spaces: %20lu\n"
1114 " Char table spaces occupied: %20lu (%5.1f%%)\n"
1115 " Leaf nodes: %20lu\n"
1116 " --------------------\n"
1117 " Total nodes: %20lu\n"
1118 "\n"
1119 " Total entries: %20lu\n"
1120 "\n"
1121 " Total direct memory requirement: %20lu\n",
1122 substring_node_count, substring_char_count, char_list_node_count,
1123 char_list_place_count, char_list_used_count,
1124 (char_list_place_count == 0) ? 0.0 :
1125 ((((double)char_list_used_count) / ((double)char_list_place_count)) *
1126 100.0), char_table_node_count, char_table_place_count,
1127 char_table_used_count,
1128 (char_table_place_count == 0) ? 0.0 :
1129 ((((double)char_table_used_count) / ((double)char_table_place_count)) *
1130 100.0), leaf_node_count, total_node_count, entry_count, total_memory);
1133 // override default copy constructors and assignment ops
1134 si_entry::si_entry(const si_entry &) { assert(FALSE); }
1135 si_entry &si_entry::operator=(const si_entry &) { assert(FALSE); return *this; }
1137 string_index::string_index(const string_index &) { assert(FALSE); }
1138 string_index &string_index::operator=(const string_index &) { assert(FALSE); return *this; }
1140 tree_string_index::tree_string_index(const tree_string_index &) { assert(FALSE); }
1141 tree_string_index &tree_string_index::operator=(const tree_string_index &) { assert(FALSE); return *this; }