initial
[prop.git] / lib-src / automata / treeauto.cc
blobc065dcb76427ffa551aedf452b489398c652c082
1 //////////////////////////////////////////////////////////////////////////////
2 // NOTICE:
3 //
4 // ADLib, Prop and their related set of tools and documentation are in the
5 // public domain. The author(s) of this software reserve no copyrights on
6 // the source code and any code generated using the tools. You are encouraged
7 // to use ADLib and Prop to develop software, in both academic and commercial
8 // settings, and are free to incorporate any part of ADLib and Prop into
9 // your programs.
11 // Although you are under no obligation to do so, we strongly recommend that
12 // you give away all software developed using our tools.
14 // We also ask that credit be given to us when ADLib and/or Prop are used in
15 // your programs, and that this notice be preserved intact in all the source
16 // code.
18 // This software is still under development and we welcome any suggestions
19 // and help from the users.
21 // Allen Leung
22 // 1994
23 //////////////////////////////////////////////////////////////////////////////
25 #include <iostream.h>
26 #include <assert.h>
27 #include <AD/automata/treeauto.h> // tree automaton definitions
28 #include <AD/automata/gentable.h> // table printer
29 #include <AD/contain/n_array.h> // multi-arrays
31 //////////////////////////////////////////////////////////////////////////////
33 // Make hidden types visible
35 //////////////////////////////////////////////////////////////////////////////
36 typedef TreeAutomaton::Functor Functor;
37 typedef TreeAutomaton::Arity Arity;
38 typedef TreeAutomaton::State State;
39 typedef TreeAutomaton::Offset Offset;
41 //////////////////////////////////////////////////////////////////////////////
43 // The delta table for one functor is actually implemented as
44 // a multi-array, whose definition is in <AD/contain/n_array.h>.
45 // We'll use inheritance for renaming.
47 //////////////////////////////////////////////////////////////////////////////
48 class DeltaTable : public MultiArray<State> {
49 public:
50 DeltaTable(int arity) : MultiArray<State>(arity,(State)0) {}
53 //////////////////////////////////////////////////////////////////////////////
55 // Constructor and destructor
57 //////////////////////////////////////////////////////////////////////////////
58 TreeAutomaton:: TreeAutomaton(Mem& m)
59 : mem(m), arity(0), mu(0), mu_equiv(0), delta(0),
60 accept(0), accept1(0), accept1_cost(0),
61 accept_base(0), accept_vector(0), accept_vector_size(0),
62 mu_used(false), mu_index_used(false), accept_used(false),
63 accept_bitmap_used(false), accept1_used(false),
64 G(0), singleton_delta(0), delta_size(0), arity_size(0),
65 mu_index(0), exhaustive(true) {}
67 TreeAutomaton::~TreeAutomaton()
68 { for (int i = 0; i < delta_size; i++) delete (delta[i]);
69 delete [] accept;
70 delete [] accept1;
71 delete [] accept1_cost;
72 delete [] accept_base;
73 delete [] accept_vector;
74 delete [] mu;
77 //////////////////////////////////////////////////////////////////////////////
79 // Method to allocate the tables given a tree grammar.
81 //////////////////////////////////////////////////////////////////////////////
82 void TreeAutomaton::compile(TreeGrammar& g)
84 ///////////////////////////////////////////////////////////////////////////
85 // Compute the arity of each functor.
86 ///////////////////////////////////////////////////////////////////////////
87 arity = (Arity*) mem.c_alloc(sizeof(Arity) * (arity_size = g.max_functor() + 1));
88 mu_equiv = (Equiv**)mem.c_alloc((g.max_functor() + 1) * sizeof(Equiv*));
89 { for (Functor f = g.min_functor(); f <= g.max_functor(); f++) {
90 arity[f] = g.arity(f);
91 mu_equiv[f] = (Equiv*)mem.c_alloc(g.arity(f) * sizeof(Equiv));
92 // for (int i = g.arity(f) - 1; i >= 0; i--)
93 // mu_equiv[f][i] = 1;
97 ///////////////////////////////////////////////////////////////////////////
98 // Allocate the delta map for non-unit functors.
99 ///////////////////////////////////////////////////////////////////////////
100 delta = (DeltaTable **)mem.c_alloc(sizeof(DeltaTable*) * (delta_size = g.max_functor() + 1));
101 { for (Functor f = g.min_functor(); f <= g.max_functor(); f++) {
102 if (arity[f] == 0) continue;
103 delta[f] = new DeltaTable(arity[f]);
107 ///////////////////////////////////////////////////////////////////////////
108 // Allocate the delta map for unit functors.
109 ///////////////////////////////////////////////////////////////////////////
110 singleton_delta = (State*)mem.c_alloc(sizeof(State) * delta_size);
112 ///////////////////////////////////////////////////////////////////////////
113 // Set the number of states to zero initially.
114 ///////////////////////////////////////////////////////////////////////////
115 state_count = max_state = 0;
116 G = &g;
119 //////////////////////////////////////////////////////////////////////////////
121 // Method to increase the number of states
123 //////////////////////////////////////////////////////////////////////////////
124 void TreeAutomaton::grow_states(int increment)
126 int s;
128 // grow the mu[] array
129 { State *** new_mu = new State ** [ max_state + increment ];
130 for (s = 0; s < max_state; s++) new_mu[s] = mu[s];
131 for ( ; s < max_state + increment; s++) {
132 new_mu[s] = (State**)mem.c_alloc(
133 sizeof(State*) * (G->max_functor() + 1));
134 for (Functor f = G->min_functor(); f <= G->max_functor(); f++) {
135 new_mu[s][f] = (State*)mem.c_alloc(sizeof(State) * G->arity(f));
138 delete [] mu;
139 mu = new_mu;
142 // grow the accept[] array
143 { BitSet ** new_accept = new BitSet * [ max_state + increment ];
144 for (s = 0; s < max_state; s++) new_accept[s] = accept[s];
145 for ( ; s < max_state + increment; s++) {
146 new_accept[s] = new (mem, G->size()) BitSet;
148 delete [] accept;
149 accept = new_accept;
152 // grow the accept1[] array
153 { Rule * new_accept1 = new Rule [ max_state + increment ];
154 for (s = 0; s < max_state; s++) new_accept1[s] = accept1[s];
155 for ( ; s < max_state + increment; s++) new_accept1[s] = -1;
156 delete [] accept1;
157 accept1 = new_accept1;
160 // grow the accept1_cost[] array
161 { Cost * new_accept1_cost = new Cost [ max_state + increment ];
162 for (s = 0; s < max_state; s++) new_accept1_cost[s] = accept1_cost[s];
163 for ( ; s < max_state + increment; s++) new_accept1_cost[s] = 0;
164 delete [] accept1_cost;
165 accept1_cost = new_accept1_cost;
168 // update the state limit
169 max_state += increment;
172 //////////////////////////////////////////////////////////////////////////////
174 // Method to add a new state to the tables
176 //////////////////////////////////////////////////////////////////////////////
177 void TreeAutomaton::new_state(State s)
178 { if (s >= max_state) {
179 int increment = 2 * (max_state + 1) - s;
180 if (increment < 256) increment = 256;
181 grow_states(increment);
183 if (s >= state_count) state_count = s + 1;
186 //////////////////////////////////////////////////////////////////////////////
188 // Method to add a transition to the automaton.
190 //////////////////////////////////////////////////////////////////////////////
191 void TreeAutomaton::add_delta(Functor f, const int inputs[], State s)
192 { (*delta[f])[inputs] = s;
193 singleton_delta[f] = s;
196 //////////////////////////////////////////////////////////////////////////////
198 // Method to return a transition of the automaton.
200 //////////////////////////////////////////////////////////////////////////////
201 State TreeAutomaton::get_delta(Functor f, const int inputs[]) const
202 { return arity[f] == 0 ? singleton_delta[f] : (*delta[f])[inputs]; }
204 //////////////////////////////////////////////////////////////////////////////
206 // Method to compute the accept table in
208 //////////////////////////////////////////////////////////////////////////////
209 void TreeAutomaton::compute_accept_tables ()
210 { if (accept_base) return; // already computed.
212 // Allocate the base
213 accept_base = new Offset [ number_of_states() ];
215 // Compute the length of the vector
216 int vector_len = number_of_states();
217 { for (State s = 0; s < number_of_states(); s++)
218 vector_len += accept_rules(s).count();
220 // Allocate the vector
221 accept_vector_size = vector_len;
222 accept_vector = new Rule [vector_len];
224 // Copy the vectors
225 { Offset offset = 0;
226 for (State s = 0; s < number_of_states(); s++)
227 { if (offset > 0 && accept1_rule(s) < 0) // no accept rules
228 { accept_base[s] = offset-1;
229 } else // some accept rules
230 { accept_base[s] = offset;
231 const BitSet& set = accept_rules(s);
232 int i;
233 foreach_bit_fast (i, set)
234 { accept_vector[offset++] = i;
236 accept_vector[offset++] = -1; // sentinel
239 assert(offset <= accept_vector_size);
240 accept_vector_size = offset;
244 //////////////////////////////////////////////////////////////////////////////
246 // Method to generate code for the accept table.
247 // All accepted rules are printed in base/vector format in two arrays.
249 //////////////////////////////////////////////////////////////////////////////
250 ostream& TreeAutomaton::gen_accept(ostream& out, const char name[]) const
252 TablePrinter P;
253 ((TreeAutomaton *)this)->compute_accept_tables();
254 P.print(out, (const char *)accept_base,
255 number_of_states(), sizeof(Offset),
256 "static const TreeTables::Offset ", name, "accept_base");
257 out << "static const ";
258 P.print(out, (const char *)accept_vector,
259 accept_vector_size, sizeof(Rule),
260 get_rule_type(),
261 name, "accept_vector");
262 ((TreeAutomaton *)this)->accept_used = true;
263 return out;
266 //////////////////////////////////////////////////////////////////////////////
268 // Another method to generate code for the accept table.
269 // The first accepted rules (or -1) are printed in array format.
271 //////////////////////////////////////////////////////////////////////////////
272 ostream& TreeAutomaton::gen_accept1(ostream& out, const char name[]) const
274 out << "static const " << get_rule_type() << name
275 << "_accept1[" << number_of_states() << "] = \n{ ";
276 Bool comma = false;
277 for (State s = 0; s < number_of_states(); s++) {
278 if (comma) out << ", ";
279 if (s != 0 && s % 16 == 0) out << "\n ";
280 out << accept1[s];
281 comma = true;
283 ((TreeAutomaton*)this)->accept1_used = true;
284 return out << "\n};\n";
287 //////////////////////////////////////////////////////////////////////////////
289 // Method to generate code for the accept table.
290 // All accepted rules are printed in bitmap format.
292 //////////////////////////////////////////////////////////////////////////////
293 ostream& TreeAutomaton::gen_bitmap_accept(ostream& out, const char name[]) const
294 { int bytes = (number_of_states() + sizeof(unsigned char) * 8 - 1)/
295 (8 * sizeof(unsigned char));
296 assert(sizeof(unsigned char) == 1);
297 out << "static const unsigned char " << name
298 << "_accept[" << number_of_states() << "]["
299 << bytes << "] = \n{ ";
300 for (State s = 0; s < number_of_states(); s++)
301 { Bool comma = false;
302 out << "\n { ";
303 for (int i = 0; i < bytes; i++)
304 { if (comma) out << ", ";
305 out << accept[s]->byte(i);
306 comma = true;
308 out << " }";
309 if (s != number_of_states() - 1) out << ", ";
311 out << "\n};\n";
312 ((TreeAutomaton *)this)->accept_bitmap_used = false;
313 return out;
316 //////////////////////////////////////////////////////////////////////////////
318 // Method to generate the index table for one functor.
320 //////////////////////////////////////////////////////////////////////////////
321 ostream& TreeAutomaton::gen_index
322 (ostream& out, Functor f, Arity i, const char name[]) const
323 { if (arity[f] != 0) {
324 out << "static const " << get_state_type()
325 << name << "_mu_" << (int)f << "_" << (int)i;
326 out << '[' << state_count << "] = {";
327 Bool comma = false;
328 for (State s = 0; s < state_count; s++) {
329 if (comma) out << ", ";
330 if ((s & 15) == 0) out << "\n ";
331 out << mu[s][f][i];
332 comma = true;
334 out << "\n};\n";
336 ((TreeAutomaton *)this)->mu_used = true;
337 return out;
340 //////////////////////////////////////////////////////////////////////////////
342 // Method to generate the type for a state.
344 //////////////////////////////////////////////////////////////////////////////
345 const char * TreeAutomaton::get_state_type() const
346 { return number_of_states() <= 256 ? "TreeTables::ShortState " :
347 "TreeTables::State ";
350 //////////////////////////////////////////////////////////////////////////////
352 // Method to generate the type for a rule.
354 //////////////////////////////////////////////////////////////////////////////
355 const char * TreeAutomaton::get_rule_type() const
356 { return G->size() < 128 ? "TreeTables::ShortRule " :
357 "TreeTables::Rule ";
360 //////////////////////////////////////////////////////////////////////////////
362 // Method to generate the transition table for one functor.
364 //////////////////////////////////////////////////////////////////////////////
365 ostream& TreeAutomaton::gen_theta
366 (ostream& out, Functor f, const char name[]) const
367 { if (arity[f] != 0) {
368 int n = arity[f];
369 const DeltaTable& delta_table = *delta[f];
370 out << "static const " << get_state_type() << name << "_theta_" << f;
371 { for (int i = 0; i < n; i++)
372 out << '[' << index_range(f,i) << ']';
374 Bool comma = false;
375 out << " = {\n ";
376 int indices[256];
377 int bounds [256];
378 int total = 1;
379 { for (int i = 0; i < n; i++) {
380 indices[i] = 0; total *= bounds[i] = index_range(f,i);
381 if (i > 0) out << "{ ";
384 { int tab = 0;
385 int i, j;
386 do {
387 if (comma) out << ", ";
388 if ((tab & 15) == 0 && comma) out << "\n ";
389 out << delta_table[indices];
390 comma = true;
391 total--;
392 tab++;
393 for (i = j = n - 1; i >= 0; i--) {
394 if (++indices[i] < bounds[i])
395 { if (total > 0 && i != j)
396 { out << ",\n "; tab = 0; comma = false; }
397 break;
399 if (total > 0 || i > 0) out << " }";
400 //if (total > 0) { if (comma) out << ",";
401 // out << "\n "; tab = 0; }
402 indices[i] = 0; comma = false;
404 if (i >= 0) for ( ; j > i; j--) out << "{ ";
405 } while (i >= 0);
407 out << "\n};\n";
409 return out;
412 //////////////////////////////////////////////////////////////////////////////
414 // Compilation and table emission methods.
416 //////////////////////////////////////////////////////////////////////////////
417 void TreeAutomaton::compile_compression_index ()
419 ///////////////////////////////////////////////////////////////////////////
420 // Compress the index maps.
421 ///////////////////////////////////////////////////////////////////////////
422 int index = 1;
423 mu_index = (int**)mem.c_alloc(sizeof(int) * (G->functors()+1));
425 ///////////////////////////////////////////////////////////////////////////
426 // Temporary buffers
427 ///////////////////////////////////////////////////////////////////////////
428 SparseDFA::State * deltas =
429 (SparseDFA::State *)mem.c_alloc
430 (sizeof(SparseDFA::State) * number_of_states());
431 SparseDFA::Symbol * symbols =
432 (SparseDFA::Symbol *)mem.c_alloc
433 (sizeof(SparseDFA::Symbol) * number_of_states());
435 ///////////////////////////////////////////////////////////////////////////
436 // Start the compression process
437 ///////////////////////////////////////////////////////////////////////////
438 dfa_compiler.create_tables(64,8,0,number_of_states()-1);
439 dfa_compiler.start();
441 ///////////////////////////////////////////////////////////////////////////
442 // Compress all index maps.
443 ///////////////////////////////////////////////////////////////////////////
444 total_index_entries = 0;
445 for (Functor f = G->min_functor(); f <= G->max_functor(); f++) {
446 mu_index[f] = (int*)mem.c_alloc(sizeof(int) * G->arity(f));
447 for (int i = 0; i < G->arity(f); i++) {
448 int fan_out = 0;
449 for (int s = number_of_states() - 1; s >= 0; s--) {
450 if (index_map(f,i,s) != 0) {
451 symbols[fan_out] = s;
452 deltas [fan_out] = index_map(f,i,s);
453 fan_out++;
456 dfa_compiler.add_state(index, fan_out, symbols, deltas);
457 mu_index[f][i] = index++;
458 total_index_entries += number_of_states();
461 ///////////////////////////////////////////////////////////////////////////
462 // Finish the compression.
463 ///////////////////////////////////////////////////////////////////////////
464 dfa_compiler.finish();
465 non_error_index_entries = dfa_compiler.size();
468 //////////////////////////////////////////////////////////////////////////////
470 // Method to compute the compression rate.
471 // The compressed form in the base/check/next format.
473 //////////////////////////////////////////////////////////////////////////////
474 double TreeAutomaton::compression_rate() const
476 double original = total_index_entries;
477 double now = dfa_compiler.size() * 2 + dfa_compiler.states();
479 return (original - now) / now;
482 //////////////////////////////////////////////////////////////////////////////
484 // Method for report generation.
486 //////////////////////////////////////////////////////////////////////////////
487 ostream& TreeAutomaton::print_report (ostream& f) const
489 // Print the canonical grammar
490 f << "\nCanonical grammar:\n" << *G << '\n';
492 // Print other statistics
493 f << "\nStatistics of the tree automaton:\n"
494 << " Number of functors = " << G->functors() << '\n'
495 << " Number of non-terminals = " << G->variables() << '\n'
496 << " Number of states = " << number_of_states() << '\n'
497 << " Memory used = "
498 << (mem.bytes_used() + 512)/1024 << " k-bytes\n";
500 // Print compression statistics
501 unsigned long total_mu_index_size = 0;
502 if (mu_index) {
503 total_mu_index_size = dfa_compiler.size() * 2;
504 f << "\n"
505 "Index compression statistics:\n"
506 << " Uncompressed index entries = " << total_index_entries << '\n'
507 << " Non-empty index entries = " << non_error_index_entries << '\n'
508 << " Compressed table entries = " << total_mu_index_size << '\n'
509 << " Compression rate = "
510 << int(compression_rate() * 100.0 + .5) << "%\n"
514 // Print mu/theta table statistics
515 unsigned long total_theta_size = 0;
516 unsigned long total_mu_size = 0;
517 { for (Functor f = G->min_functor(); f <= G->max_functor(); f++)
518 { if (arity[f] != 0) {
519 int n = arity[f];
520 int theta_size = 1;
522 for (int i = 0; i < n; i++)
523 { int dimension_size = index_range(f,i);
524 theta_size *= dimension_size;
525 if (dimension_size > 1)
526 total_mu_size += number_of_states();
528 total_theta_size += theta_size;
533 // Compute the table sizes
534 size_t state_size = number_of_states() <= 256 ?
535 sizeof(ShortState) : sizeof(State);
536 size_t rule_size = G->size() < 128 ? sizeof(ShortRule) : sizeof(Rule);
537 unsigned long total_theta_bytes = total_theta_size * state_size;
538 unsigned long total_mu_bytes = total_mu_size * state_size;
539 unsigned long total_mu_index_bytes = total_mu_index_size * state_size;
540 unsigned long total_accept_bytes =
541 accept_vector_size * rule_size +
542 number_of_states() * sizeof(Offset);
543 unsigned long total_accept_bitmap_bytes =
544 number_of_states() * (number_of_states() + 7/8);
545 unsigned long total_accept1_bytes =
546 number_of_states() * rule_size;
547 unsigned total_table_bytes = total_theta_bytes;
550 // Print various table size
551 f << "\n"
552 "Table sizes:\n";
553 f << " Using " << state_size << "-byte state representation\n";
554 f << " Using " << rule_size << "-byte rule representation\n";
555 f << " Theta tables entries = " << total_theta_size
556 << " (" << total_theta_bytes << " bytes)\n";
557 if (mu_used) {
558 f << " Mu table entries = " << total_mu_size
559 << " (" << total_mu_bytes << " bytes)\n";
560 total_table_bytes += total_mu_bytes;
563 if (mu_index_used) {
564 f << " Mu index entries = " << total_mu_index_size
565 << " (" << total_mu_index_bytes << " bytes)\n";
566 total_table_bytes += total_mu_index_bytes;
569 if (accept_used) {
570 f << " Accept table entries = "
571 << (accept_vector_size + number_of_states())
572 << " (" << total_accept_bytes << " bytes)\n";
573 total_table_bytes += total_accept_bytes;
576 if (accept_bitmap_used) {
577 f << " Accept bitmap size = "
578 << total_accept_bitmap_bytes << " bytes\n";
579 total_table_bytes += total_accept_bitmap_bytes;
582 if (accept1_used) {
583 f << " Accept1 table size = "
584 << total_accept1_bytes << " bytes\n";
585 total_table_bytes += total_accept1_bytes;
588 f << " Total table size = " << total_table_bytes << " bytes\n";
590 return f;
593 //////////////////////////////////////////////////////////////////////////////
595 // Method for printing a state.
597 //////////////////////////////////////////////////////////////////////////////
598 ostream& TreeAutomaton::print_state (ostream& f, State) const { return f; }
600 //////////////////////////////////////////////////////////////////////////////
602 // Methods for emitting the compressed index in C++ form.
604 //////////////////////////////////////////////////////////////////////////////
605 ostream& TreeAutomaton::gen_compressed_index
606 (ostream& out, const char name[]) const
607 { ((TreeAutomaton *)this)->mu_index_used = true;
608 out << "static const ";
609 dfa_compiler.gen_check_next_tables(out, name, get_state_type());
610 return out;