1 /* file "tree_string_index.cc" */
3 /* Copyright (c) 1995 Stanford University
7 This software is provided under the terms described in
8 the "suif_copyright.h" include file. */
10 #include <suif_copyright.h>
12 #define _MODULE_ "libsuif.a"
14 #pragma implementation "string_index.h"
15 #pragma implementation "tree_string_index.h"
17 #define RCS_BASE_FILE tree_string_index_cc
23 "$Id: tree_string_index.cc,v 1.2 1999/08/25 03:29:17 brm Exp $")
25 enum tsi_node_kind
{ TSI_SUBSTRING
, TSI_CHAR_LIST
, TSI_CHAR_TABLE
, TSI_LEAF
};
30 friend class tsi_substring
;
31 friend class tsi_char_list
;
32 friend class tsi_char_table
;
33 friend class tsi_leaf
;
36 /* We make explicit copy constructor and assignment operator and
37 * make them private to foil C++'s automatic default versions. */
38 tsi_node(const tsi_node
&) { assert(FALSE
); }
39 void operator=(const tsi_node
&) { assert(FALSE
); }
44 tsi_node
*parent_node
;
47 tsi_node(void) : the_entry(0), parent_node(0), parent_index(0)
51 virtual ~tsi_node(void) { }
53 virtual tsi_node_kind
kind(void) = 0;
55 void replace(tsi_node
*replacement
);
58 si_entry
*base_entry(void) { return the_entry
; }
59 void replace_base_entry(si_entry
*new_entry
, tree_string_index
*the_tsi
);
60 tsi_node
*find_branchpoint(void);
63 class tsi_substring
: public tsi_node
70 tsi_substring(unsigned long char_count
, const char *init_chars
,
71 tsi_node
*init_child
);
74 tsi_node_kind
kind(void) { return TSI_SUBSTRING
; }
76 char *chars(void) { return the_chars
; }
77 tsi_node
*child(void) { return the_child
; }
79 void replace_child(tsi_node
*new_child
);
82 class tsi_char_list
: public tsi_node
85 unsigned long table_size
;
86 unsigned long num_entries
;
87 tsi_node
**the_children
;
91 tsi_char_list(unsigned long init_table_size
);
94 tsi_node_kind
kind(void) { return TSI_CHAR_LIST
; }
96 unsigned long count(void) { return num_entries
; }
97 tsi_node
*child(unsigned long child_num
)
98 { assert(child_num
< num_entries
); return the_children
[child_num
]; }
99 char which_char(unsigned long child_num
)
100 { assert(child_num
< num_entries
); return the_chars
[child_num
]; }
102 void add_child(unsigned long child_num
, tsi_node
*new_child
,
104 void remove_child(unsigned long child_num
);
105 void replace_child(unsigned long child_num
, tsi_node
*new_child
);
108 class tsi_char_table
: public tsi_node
111 tsi_node
**the_children
;
116 tsi_char_table(char init_lower
, char init_upper
);
117 ~tsi_char_table(void);
119 tsi_node_kind
kind(void) { return TSI_CHAR_TABLE
; }
121 tsi_node
*child(char which_char
)
123 if ((which_char
>= first_char
) && (which_char
<= last_char
))
124 return the_children
[which_char
- (long)first_char
];
128 char lower_char(void) { return first_char
; }
129 char upper_char(void) { return last_char
; }
131 void replace_child(char which_char
, tsi_node
*new_child
);
134 class tsi_leaf
: public tsi_node
137 tsi_leaf(si_entry
*init_entry
, tree_string_index
*the_tsi
)
138 { replace_base_entry(init_entry
, the_tsi
); }
141 tsi_node_kind
kind(void) { return TSI_LEAF
; }
145 void tsi_node::replace(tsi_node
*replacement
)
147 assert(parent_node
!= NULL
);
148 switch (parent_node
->kind())
152 tsi_substring
*par_substring
= (tsi_substring
*)parent_node
;
153 assert(par_substring
->child() == this);
154 par_substring
->replace_child(replacement
);
159 tsi_char_list
*par_char_list
= (tsi_char_list
*)parent_node
;
160 unsigned long count
= par_char_list
->count();
161 for (unsigned long child_num
= 0; child_num
< count
; ++child_num
)
163 if (par_char_list
->child(child_num
) == this)
165 par_char_list
->replace_child(child_num
, replacement
);
174 tsi_char_table
*par_char_table
= (tsi_char_table
*)parent_node
;
175 assert(par_char_table
->child(parent_index
) == this);
176 par_char_table
->replace_child(parent_index
, replacement
);
186 void tsi_node::remove(void)
188 assert(parent_node
!= NULL
);
189 switch (parent_node
->kind())
193 tsi_substring
*par_substring
= (tsi_substring
*)parent_node
;
194 assert(par_substring
->child() == this);
195 par_substring
->replace_child(NULL
);
200 tsi_char_list
*par_char_list
= (tsi_char_list
*)parent_node
;
201 unsigned long count
= par_char_list
->count();
202 for (unsigned long child_num
= 0; child_num
< count
; ++child_num
)
204 if (par_char_list
->child(child_num
) == this)
206 par_char_list
->remove_child(child_num
);
215 tsi_char_table
*par_char_table
= (tsi_char_table
*)parent_node
;
216 assert(par_char_table
->child(parent_index
) == this);
217 par_char_table
->replace_child(parent_index
, NULL
);
227 void tsi_node::replace_base_entry(si_entry
*new_entry
,
228 tree_string_index
*the_tsi
)
230 if (the_entry
!= NULL
)
232 assert(the_tsi
->get_back_pointer(the_entry
) == this);
233 the_tsi
->set_back_pointer(the_entry
, NULL
);
235 the_entry
= new_entry
;
236 if (new_entry
!= NULL
)
238 assert(the_tsi
->get_back_pointer(new_entry
) == NULL
);
239 the_tsi
->set_back_pointer(new_entry
, this);
243 tsi_node
*tsi_node::find_branchpoint(void)
245 if (parent_node
== NULL
)
247 if (parent_node
->kind() != TSI_SUBSTRING
)
249 tsi_substring
*parent_substring
= (tsi_substring
*)parent_node
;
250 if (parent_substring
->base_entry() != NULL
)
251 return parent_substring
;
252 return parent_substring
->find_branchpoint();
256 tsi_substring::tsi_substring(unsigned long char_count
, const char *init_chars
,
257 tsi_node
*init_child
)
258 : the_chars(new char[char_count
+ 1]), the_child(init_child
)
260 assert((init_child
== NULL
) || (init_child
->parent_node
== NULL
));
261 memcpy(the_chars
, init_chars
, char_count
);
262 the_chars
[char_count
] = 0;
263 if (init_child
!= NULL
)
264 init_child
->parent_node
= this;
267 tsi_substring::~tsi_substring(void)
270 if (the_child
!= NULL
)
274 void tsi_substring::replace_child(tsi_node
*new_child
)
276 assert((new_child
== NULL
) || (new_child
->parent_node
== NULL
));
277 if (the_child
!= NULL
)
278 the_child
->parent_node
= NULL
;
279 the_child
= new_child
;
280 if (new_child
!= NULL
)
281 new_child
->parent_node
= this;
285 tsi_char_list::tsi_char_list(unsigned long init_table_size
)
286 : table_size(init_table_size
), num_entries(0),
287 the_children(new tsi_node
*[init_table_size
]),
288 the_chars(new char[init_table_size
])
290 table_size
= init_table_size
;
294 tsi_char_list::~tsi_char_list(void)
296 for (unsigned long entry_num
= 0; entry_num
< num_entries
; ++entry_num
)
298 tsi_node
*this_child
= the_children
[entry_num
];
299 if (this_child
!= NULL
)
302 delete[] the_children
;
306 void tsi_char_list::add_child(unsigned long child_num
, tsi_node
*new_child
,
309 assert((new_child
== NULL
) || (new_child
->parent_node
== NULL
));
310 assert(child_num
<= num_entries
);
311 if (num_entries
== table_size
)
313 tsi_node
**new_child_list
= new tsi_node
*[table_size
* 2];
314 char *new_char_list
= new char[table_size
* 2];
315 memcpy(new_child_list
, the_children
, table_size
* sizeof(tsi_node
*));
316 memcpy(new_char_list
, the_chars
, table_size
* sizeof(char));
317 delete[] the_children
;
319 the_children
= new_child_list
;
320 the_chars
= new_char_list
;
323 if (child_num
< num_entries
)
325 memmove(&(the_children
[child_num
+ 1]), &(the_children
[child_num
]),
326 (num_entries
- child_num
) * sizeof(tsi_node
*));
327 memmove(&(the_chars
[child_num
+ 1]), &(the_chars
[child_num
]),
328 (num_entries
- child_num
) * sizeof(char));
330 the_children
[child_num
] = new_child
;
331 the_chars
[child_num
] = this_char
;
333 if (new_child
!= NULL
)
334 new_child
->parent_node
= this;
337 void tsi_char_list::remove_child(unsigned long child_num
)
339 assert(child_num
< num_entries
);
340 if (the_children
[child_num
] != NULL
)
341 the_children
[child_num
]->parent_node
= NULL
;
343 if (child_num
< num_entries
)
345 memmove(&(the_children
[child_num
]), &(the_children
[child_num
+ 1]),
346 (num_entries
- child_num
) * sizeof(tsi_node
*));
347 memmove(&(the_chars
[child_num
]), &(the_chars
[child_num
+ 1]),
348 (num_entries
- child_num
) * sizeof(char));
352 void tsi_char_list::replace_child(unsigned long child_num
, tsi_node
*new_child
)
354 assert((new_child
== NULL
) || (new_child
->parent_node
== NULL
));
355 assert(child_num
< num_entries
);
356 if (the_children
[child_num
] != NULL
)
357 the_children
[child_num
]->parent_node
= NULL
;
358 the_children
[child_num
] = new_child
;
359 if (new_child
!= NULL
)
360 new_child
->parent_node
= this;
364 tsi_char_table::tsi_char_table(char init_lower
, char init_upper
)
365 : the_children(0), first_char(init_lower
), last_char(init_upper
)
367 assert(init_lower
<= init_upper
);
368 unsigned long init_size
=
369 (unsigned long)(unsigned char)(init_upper
- init_lower
) + 1;
370 the_children
= new tsi_node
*[init_size
];
371 for (char test_char
= init_lower
; test_char
< init_upper
; ++test_char
)
372 the_children
[test_char
- (long)init_lower
] = NULL
;
373 the_children
[init_upper
- (long)init_lower
] = NULL
;
376 tsi_char_table::~tsi_char_table(void)
378 for (char test_char
= first_char
; test_char
< last_char
; ++test_char
)
380 tsi_node
*this_child
= the_children
[test_char
- (long)first_char
];
381 if (this_child
!= NULL
)
384 tsi_node
*this_child
= the_children
[last_char
- (long)first_char
];
385 if (this_child
!= NULL
)
387 delete[] the_children
;
390 void tsi_char_table::replace_child(char which_char
, tsi_node
*new_child
)
392 assert((new_child
== NULL
) || (new_child
->parent_node
== NULL
));
393 if (which_char
< first_char
)
395 unsigned long init_size
=
396 (unsigned long)(unsigned char)(last_char
- which_char
) + 1;
397 tsi_node
**new_child_list
= new tsi_node
*[init_size
];
398 unsigned long extra
=
399 (unsigned long)(unsigned char)(first_char
- which_char
) + 1;
400 unsigned long orig_size
=
401 (unsigned long)(unsigned char)(last_char
- first_char
) + 1;
402 memcpy(&(new_child_list
[extra
]), the_children
,
403 orig_size
* sizeof(tsi_node
*));
404 delete[] the_children
;
405 the_children
= new_child_list
;
406 for (char test_char
= which_char
; test_char
< first_char
; ++test_char
)
407 the_children
[test_char
- (long)which_char
] = NULL
;
408 first_char
= which_char
;
410 else if (which_char
> last_char
)
412 unsigned long init_size
=
413 (unsigned long)(unsigned char)(which_char
- first_char
) + 1;
414 tsi_node
**new_child_list
= new tsi_node
*[init_size
];
415 unsigned long orig_size
=
416 (unsigned long)(unsigned char)(last_char
- first_char
) + 1;
417 memcpy(new_child_list
, the_children
, orig_size
* sizeof(tsi_node
*));
418 delete[] the_children
;
419 the_children
= new_child_list
;
420 for (char test_char
= which_char
; test_char
> last_char
; --test_char
)
421 the_children
[test_char
- (long)first_char
] = NULL
;
422 last_char
= which_char
;
424 if (the_children
[which_char
- (long)first_char
] != NULL
)
425 the_children
[which_char
- (long)first_char
]->parent_node
= NULL
;
426 the_children
[which_char
- (long)first_char
] = new_child
;
427 if (new_child
!= NULL
)
429 new_child
->parent_node
= this;
430 new_child
->parent_index
= which_char
;
435 tree_string_index::tree_string_index(char expected_lower
, char expected_upper
,
436 unsigned long list_size
)
437 : root_node(0), the_list_size(list_size
),
438 the_expected_lower(expected_lower
),
439 the_expected_upper(expected_upper
)
441 assert(list_size
>= 2);
444 tree_string_index::~tree_string_index(void)
446 if (root_node
!= NULL
)
448 remove_entries(root_node
);
453 tsi_node
*tree_string_index::follow_prefix_match(const char *string
,
454 unsigned long *prefix_length
)
456 if (root_node
== NULL
)
458 unsigned long position
= 0;
459 tsi_node
*current_node
= root_node
;
460 while (string
[position
] != 0)
462 boolean done
= FALSE
;
463 switch (current_node
->kind())
467 tsi_substring
*the_tsi_substring
=
468 (tsi_substring
*)current_node
;
469 char *the_chars
= the_tsi_substring
->chars();
470 unsigned long num_chars
= strlen(the_chars
);
471 if (strncmp(&(string
[position
]), the_chars
, num_chars
) == 0)
473 position
+= num_chars
;
474 current_node
= the_tsi_substring
->child();
484 tsi_char_list
*the_tsi_char_list
=
485 (tsi_char_list
*)current_node
;
486 unsigned long count
= the_tsi_char_list
->count();
487 char this_char
= string
[position
];
488 unsigned long child_num
;
489 for (child_num
= 0; child_num
< count
; ++child_num
)
491 if (the_tsi_char_list
->which_char(child_num
) >= this_char
)
494 if ((child_num
< count
) &&
495 (the_tsi_char_list
->which_char(child_num
) == this_char
))
498 current_node
= the_tsi_char_list
->child(child_num
);
508 tsi_char_table
*the_tsi_char_table
=
509 (tsi_char_table
*)current_node
;
510 tsi_node
*this_child
=
511 the_tsi_char_table
->child(string
[position
]);
512 if (this_child
!= NULL
)
515 current_node
= this_child
;
534 *prefix_length
= position
;
538 void tree_string_index::remove_entries(tsi_node
*base_node
)
540 if (base_node
== NULL
)
543 si_entry
*base_entry
= base_node
->base_entry();
544 if (base_entry
!= NULL
)
546 base_node
->replace_base_entry(NULL
, this);
547 destroy_entry(base_entry
);
550 switch (base_node
->kind())
554 tsi_substring
*the_tsi_substring
= (tsi_substring
*)base_node
;
555 remove_entries(the_tsi_substring
->child());
560 tsi_char_list
*the_tsi_char_list
= (tsi_char_list
*)base_node
;
561 unsigned long count
= the_tsi_char_list
->count();
562 for (unsigned long child_num
= 0; child_num
< count
; ++child_num
)
563 remove_entries(the_tsi_char_list
->child(child_num
));
568 tsi_char_table
*the_tsi_char_table
= (tsi_char_table
*)base_node
;
569 char lower_char
= the_tsi_char_table
->lower_char();
570 char upper_char
= the_tsi_char_table
->upper_char();
571 for (char which
= lower_char
; which
< upper_char
; ++which
)
572 remove_entries(the_tsi_char_table
->child(which
));
573 remove_entries(the_tsi_char_table
->child(upper_char
));
583 void tree_string_index::internal_development_dump(char **buffer
,
584 unsigned long *buf_size
, unsigned long position
, tsi_node
*base_node
,
585 FILE *fp
, void (*node_func
)(void *data
, FILE *fp
),
586 boolean show_internals
)
588 if (base_node
== NULL
)
591 if (position
>= *buf_size
)
593 char *new_buffer
= new char[*buf_size
* 2];
594 memcpy(new_buffer
, *buffer
, *buf_size
);
596 *buffer
= new_buffer
;
602 fprintf(fp
, "%*s", (int)(8 + 2 * position
), "");
603 switch (base_node
->kind())
607 tsi_substring
*the_substring
= (tsi_substring
*)base_node
;
608 fprintf(fp
, "sub <\"%s\">", the_substring
->chars());
613 tsi_char_list
*the_char_list
= (tsi_char_list
*)base_node
;
614 fprintf(fp
, "list[%lu]", the_char_list
->count());
619 tsi_char_table
*the_char_table
= (tsi_char_table
*)base_node
;
620 fprintf(fp
, "table['%c'..'%c']", the_char_table
->lower_char(),
621 the_char_table
->upper_char());
633 si_entry
*base_entry
= base_node
->base_entry();
634 if (base_entry
!= NULL
)
636 (*buffer
)[position
] = 0;
637 fprintf(fp
, " (%p) \"%s\":", (void*)base_entry
, *buffer
);
638 if (node_func
!= NULL
)
639 (*node_func
)(base_entry
->data_value(), fp
);
641 fprintf(fp
, " [%p]", base_entry
->data_value());
645 switch (base_node
->kind())
649 tsi_substring
*the_tsi_substring
= (tsi_substring
*)base_node
;
650 char *chars
= the_tsi_substring
->chars();
651 unsigned long string_size
= strlen(chars
);
652 if (position
+ string_size
+ 1 >= *buf_size
)
655 new char[*buf_size
+ position
+ string_size
+ 1];
656 memcpy(new_buffer
, *buffer
, *buf_size
);
658 *buffer
= new_buffer
;
659 *buf_size
+= position
+ string_size
+ 1;
661 strcpy(&((*buffer
)[position
]), chars
);
662 internal_development_dump(buffer
, buf_size
, position
+ string_size
,
663 the_tsi_substring
->child(), fp
,
664 node_func
, show_internals
);
669 tsi_char_list
*the_tsi_char_list
= (tsi_char_list
*)base_node
;
670 unsigned long count
= the_tsi_char_list
->count();
671 for (unsigned long child_num
= 0; child_num
< count
; ++child_num
)
673 (*buffer
)[position
] = the_tsi_char_list
->which_char(child_num
);
674 internal_development_dump(buffer
, buf_size
, position
+ 1,
675 the_tsi_char_list
->child(child_num
),
676 fp
, node_func
, show_internals
);
682 tsi_char_table
*the_tsi_char_table
= (tsi_char_table
*)base_node
;
683 char lower_char
= the_tsi_char_table
->lower_char();
684 char upper_char
= the_tsi_char_table
->upper_char();
685 for (char which
= lower_char
; which
< upper_char
; ++which
)
687 (*buffer
)[position
] = which
;
688 internal_development_dump(buffer
, buf_size
, position
+ 1,
689 the_tsi_char_table
->child(which
), fp
,
690 node_func
, show_internals
);
692 (*buffer
)[position
] = upper_char
;
693 internal_development_dump(buffer
, buf_size
, position
+ 1,
694 the_tsi_char_table
->child(upper_char
),
695 fp
, node_func
, show_internals
);
705 void tree_string_index::get_development_stats(tsi_node
*base_node
,
706 unsigned long *entry_count
, unsigned long *substring_node_count
,
707 unsigned long *char_list_node_count
,
708 unsigned long *char_table_node_count
, unsigned long *leaf_node_count
,
709 unsigned long *substring_char_count
,
710 unsigned long *char_list_place_count
,
711 unsigned long *char_list_used_count
,
712 unsigned long *char_table_place_count
,
713 unsigned long *char_table_used_count
)
715 assert(base_node
!= NULL
);
716 if (base_node
->base_entry() != NULL
)
719 switch (base_node
->kind())
723 tsi_substring
*the_tsi_substring
= (tsi_substring
*)base_node
;
724 ++*substring_node_count
;
725 *substring_char_count
+= strlen(the_tsi_substring
->chars());
726 get_development_stats(the_tsi_substring
->child(), entry_count
,
727 substring_node_count
, char_list_node_count
,
728 char_table_node_count
, leaf_node_count
,
729 substring_char_count
, char_list_place_count
,
730 char_list_used_count
, char_table_place_count
,
731 char_table_used_count
);
736 tsi_char_list
*the_tsi_char_list
= (tsi_char_list
*)base_node
;
737 ++*char_list_node_count
;
738 unsigned long count
= the_tsi_char_list
->count();
739 *char_list_place_count
+= the_list_size
;
740 *char_list_used_count
+= count
;
741 for (unsigned long child_num
= 0; child_num
< count
; ++child_num
)
743 get_development_stats(the_tsi_char_list
->child(child_num
),
744 entry_count
, substring_node_count
,
745 char_list_node_count
,
746 char_table_node_count
, leaf_node_count
,
747 substring_char_count
,
748 char_list_place_count
,
749 char_list_used_count
,
750 char_table_place_count
,
751 char_table_used_count
);
757 tsi_char_table
*the_tsi_char_table
= (tsi_char_table
*)base_node
;
758 ++*char_table_node_count
;
759 char lower_char
= the_tsi_char_table
->lower_char();
760 char upper_char
= the_tsi_char_table
->upper_char();
761 *char_table_place_count
+=
762 ((unsigned long)(upper_char
- lower_char
)) + 1;
763 for (char which
= lower_char
; which
< upper_char
; ++which
)
765 tsi_node
*this_child
= the_tsi_char_table
->child(which
);
766 if (this_child
!= NULL
)
768 get_development_stats(this_child
, entry_count
,
769 substring_node_count
,
770 char_list_node_count
,
771 char_table_node_count
,
773 substring_char_count
,
774 char_list_place_count
,
775 char_list_used_count
,
776 char_table_place_count
,
777 char_table_used_count
);
778 ++*char_table_used_count
;
781 tsi_node
*this_child
= the_tsi_char_table
->child(upper_char
);
782 if (this_child
!= NULL
)
784 get_development_stats(this_child
, entry_count
,
785 substring_node_count
,
786 char_list_node_count
,
787 char_table_node_count
, leaf_node_count
,
788 substring_char_count
,
789 char_list_place_count
,
790 char_list_used_count
,
791 char_table_place_count
,
792 char_table_used_count
);
793 ++*char_table_used_count
;
805 si_entry
*tree_string_index::enter(const char *the_string
, void *the_data
)
807 assert(the_string
!= NULL
);
809 if (root_node
== NULL
)
811 si_entry
*result
= create_entry(the_data
);
812 tsi_leaf
*new_leaf
= new tsi_leaf(result
, this);
814 new tsi_substring(strlen(the_string
), the_string
, new_leaf
);
818 unsigned long prefix_length
;
819 tsi_node
*the_node
= follow_prefix_match(the_string
, &prefix_length
);
820 assert(the_node
!= NULL
);
821 if (the_string
[prefix_length
] == 0)
823 if (the_node
->base_entry() != NULL
)
825 si_entry
*result
= create_entry(the_data
);
826 the_node
->replace_base_entry(result
, this);
829 si_entry
*result
= create_entry(the_data
);
830 si_entry
*old_base
= the_node
->base_entry();
832 switch (the_node
->kind())
836 tsi_substring
*the_tsi_substring
= (tsi_substring
*)the_node
;
837 char *old_chars
= the_tsi_substring
->chars();
838 unsigned long match_length
= 0;
839 while ((the_string
[prefix_length
+ match_length
] ==
840 old_chars
[match_length
]))
842 assert(old_chars
[match_length
] != 0);
845 tsi_node
*old_child
= the_tsi_substring
->child();
846 the_tsi_substring
->replace_child(NULL
);
847 unsigned long old_count
= strlen(old_chars
);
848 assert(match_length
< old_count
);
849 if (the_string
[prefix_length
+ match_length
] == 0)
851 tsi_substring
*new_substring1
=
852 new tsi_substring(old_count
- match_length
,
853 &(old_chars
[match_length
]),
855 new_substring1
->replace_base_entry(result
, this);
857 new tsi_substring(match_length
, old_chars
,
861 if (match_length
< (old_count
- 1))
863 tsi_substring
*new_substring
=
864 new tsi_substring((old_count
- 1) - match_length
,
865 &(old_chars
[match_length
+ 1]),
867 old_child
= new_substring
;
869 tsi_char_list
*new_char_list
= new tsi_char_list(the_list_size
);
870 tsi_node
*new_child
= new tsi_leaf(result
, this);
871 const char *suffix
= &(the_string
[prefix_length
+ match_length
+ 1]);
874 tsi_substring
*new_substring
=
875 new tsi_substring(strlen(suffix
), suffix
, new_child
);
876 new_child
= new_substring
;
878 char old_char
= old_chars
[match_length
];
879 char new_char
= the_string
[prefix_length
+ match_length
];
880 if (old_char
> new_char
)
882 char temp_char
= old_char
;
884 new_char
= temp_char
;
885 tsi_node
*temp_child
= old_child
;
886 old_child
= new_child
;
887 new_child
= temp_child
;
889 new_char_list
->add_child(0, old_child
, old_char
);
890 new_char_list
->add_child(1, new_child
, new_char
);
891 new_node
= new_char_list
;
892 if (match_length
> 0)
894 const char *prefix
= &(the_string
[prefix_length
]);
895 new_node
= new tsi_substring(match_length
, prefix
, new_node
);
901 tsi_char_list
*the_tsi_char_list
= (tsi_char_list
*)the_node
;
902 unsigned long count
= the_tsi_char_list
->count();
903 tsi_node
*new_child
= new tsi_leaf(result
, this);
904 const char *suffix
= &(the_string
[prefix_length
+ 1]);
908 new tsi_substring(strlen(suffix
), suffix
, new_child
);
910 if (count
== the_list_size
)
912 tsi_char_table
*new_table
=
913 new tsi_char_table(the_expected_lower
,
915 for (unsigned child_ind
= 0; child_ind
< count
; ++child_ind
)
917 unsigned long child_num
= (count
- child_ind
) - 1;
918 tsi_node
*this_child
= the_tsi_char_list
->child(child_num
);
919 char which_char
= the_tsi_char_list
->which_char(child_num
);
920 the_tsi_char_list
->remove_child(child_num
);
921 new_table
->replace_child(which_char
, this_child
);
923 assert(the_tsi_char_list
->count() == 0);
924 new_table
->replace_child(the_string
[prefix_length
], new_child
);
925 new_node
= new_table
;
930 char this_char
= the_string
[prefix_length
];
932 for (child_num
= 0; child_num
< count
; ++child_num
)
934 if (the_tsi_char_list
->which_char(child_num
) > this_char
)
937 the_tsi_char_list
->add_child(child_num
, new_child
, this_char
);
943 tsi_char_table
*the_tsi_char_table
= (tsi_char_table
*)the_node
;
944 assert(the_tsi_char_table
->child(the_string
[prefix_length
]) ==
946 tsi_node
*new_child
= new tsi_leaf(result
, this);
947 const char *suffix
= &(the_string
[prefix_length
+ 1]);
951 new tsi_substring(strlen(suffix
), suffix
, new_child
);
953 the_tsi_char_table
->replace_child(the_string
[prefix_length
],
959 const char *suffix
= &(the_string
[prefix_length
]);
960 tsi_leaf
*new_leaf
= new tsi_leaf(result
, this);
961 new_node
= new tsi_substring(strlen(suffix
), suffix
, new_leaf
);
968 if (the_node
== root_node
)
969 root_node
= new_node
;
971 the_node
->replace(new_node
);
972 the_node
->replace_base_entry(NULL
, this);
973 new_node
->replace_base_entry(old_base
, this);
978 si_entry
*tree_string_index::lookup_entry(const char *the_string
)
980 assert(the_string
!= NULL
);
981 unsigned long prefix_length
;
982 tsi_node
*the_node
= follow_prefix_match(the_string
, &prefix_length
);
983 if ((the_node
!= NULL
) && (the_string
[prefix_length
] == 0))
984 return the_node
->base_entry();
989 boolean
tree_string_index::exists(const char *the_string
)
991 return (lookup_entry(the_string
) != NULL
);
994 void *tree_string_index::lookup(const char *the_string
)
996 si_entry
*the_entry
= lookup_entry(the_string
);
997 if (the_entry
!= NULL
)
998 return the_entry
->data_value();
1003 void tree_string_index::remove_entry(si_entry
*the_entry
)
1005 assert(the_entry
!= NULL
);
1006 tsi_node
*back_pointer
= (tsi_node
*)(get_back_pointer(the_entry
));
1007 assert(back_pointer
!= NULL
);
1008 assert(back_pointer
->base_entry() == the_entry
);
1009 back_pointer
->replace_base_entry(NULL
, this);
1010 if (back_pointer
->kind() == TSI_LEAF
)
1012 tsi_node
*branchpoint
= back_pointer
->find_branchpoint();
1013 si_entry
*branchpoint_entry
= branchpoint
->base_entry();
1014 if ((branchpoint_entry
!= NULL
) && (branchpoint_entry
!= the_entry
))
1016 branchpoint
->replace_base_entry(NULL
, this);
1017 tsi_leaf
*new_leaf
= new tsi_leaf(branchpoint_entry
, this);
1018 if (branchpoint
== root_node
)
1019 root_node
= new_leaf
;
1021 branchpoint
->replace(new_leaf
);
1025 if (branchpoint
== root_node
)
1028 branchpoint
->remove();
1032 destroy_entry(the_entry
);
1035 void tree_string_index::development_dump(FILE *fp
,
1036 void (*node_func
)(void *data
,
1039 if (root_node
== NULL
)
1041 fprintf(fp
, " <empty>\n");
1045 unsigned long buf_size
= 20;
1046 char *buffer
= new char[buf_size
];
1047 internal_development_dump(&buffer
, &buf_size
, 0, root_node
, fp
,
1053 void tree_string_index::development_internals_dump(FILE *fp
)
1055 if (root_node
== NULL
)
1057 fprintf(fp
, " <empty>\n");
1061 unsigned long buf_size
= 20;
1062 char *buffer
= new char[buf_size
];
1063 internal_development_dump(&buffer
, &buf_size
, 0, root_node
, fp
, NULL
,
1069 void tree_string_index::development_stats_dump(FILE *fp
)
1071 unsigned long entry_count
= 0;
1073 unsigned long substring_node_count
= 0;
1074 unsigned long char_list_node_count
= 0;
1075 unsigned long char_table_node_count
= 0;
1076 unsigned long leaf_node_count
= 0;
1078 unsigned long substring_char_count
= 0;
1079 unsigned long char_list_place_count
= 0;
1080 unsigned long char_list_used_count
= 0;
1081 unsigned long char_table_place_count
= 0;
1082 unsigned long char_table_used_count
= 0;
1084 if (root_node
!= NULL
)
1086 get_development_stats(root_node
, &entry_count
, &substring_node_count
,
1087 &char_list_node_count
, &char_table_node_count
,
1088 &leaf_node_count
, &substring_char_count
,
1089 &char_list_place_count
, &char_list_used_count
,
1090 &char_table_place_count
, &char_table_used_count
);
1093 unsigned long total_node_count
=
1094 substring_node_count
+ char_list_node_count
+
1095 char_table_node_count
+ leaf_node_count
;
1096 unsigned long total_memory
= sizeof(tree_string_index
) +
1097 entry_count
* sizeof(si_entry
) +
1098 substring_node_count
* sizeof(tsi_substring
) +
1099 (substring_char_count
+ substring_node_count
) * sizeof(char) +
1100 char_list_node_count
* sizeof(tsi_char_list
) +
1101 char_list_place_count
* (sizeof(tsi_node
*) + sizeof(char)) +
1102 char_table_node_count
* sizeof(tsi_char_table
) +
1103 char_table_place_count
* sizeof(tsi_node
*) +
1104 leaf_node_count
* sizeof(tsi_leaf
);
1107 " Substring nodes: %20lu\n"
1108 " Chars in substrings: %20lu\n"
1109 " Char list nodes: %20lu\n"
1110 " Char list spaces: %20lu\n"
1111 " Char list spaces occupied: %20lu (%5.1f%%)\n"
1112 " Char table nodes: %20lu\n"
1113 " Char table spaces: %20lu\n"
1114 " Char table spaces occupied: %20lu (%5.1f%%)\n"
1115 " Leaf nodes: %20lu\n"
1116 " --------------------\n"
1117 " Total nodes: %20lu\n"
1119 " Total entries: %20lu\n"
1121 " Total direct memory requirement: %20lu\n",
1122 substring_node_count
, substring_char_count
, char_list_node_count
,
1123 char_list_place_count
, char_list_used_count
,
1124 (char_list_place_count
== 0) ? 0.0 :
1125 ((((double)char_list_used_count
) / ((double)char_list_place_count
)) *
1126 100.0), char_table_node_count
, char_table_place_count
,
1127 char_table_used_count
,
1128 (char_table_place_count
== 0) ? 0.0 :
1129 ((((double)char_table_used_count
) / ((double)char_table_place_count
)) *
1130 100.0), leaf_node_count
, total_node_count
, entry_count
, total_memory
);
1133 // override default copy constructors and assignment ops
1134 si_entry::si_entry(const si_entry
&) { assert(FALSE
); }
1135 si_entry
&si_entry::operator=(const si_entry
&) { assert(FALSE
); return *this; }
1137 string_index::string_index(const string_index
&) { assert(FALSE
); }
1138 string_index
&string_index::operator=(const string_index
&) { assert(FALSE
); return *this; }
1140 tree_string_index::tree_string_index(const tree_string_index
&) { assert(FALSE
); }
1141 tree_string_index
&tree_string_index::operator=(const tree_string_index
&) { assert(FALSE
); return *this; }