gcc config
[prop.git] / lib-src / automata / bottomup.cc
blob5ef8f19871523d8b4292071fb7c3bcb8dc4a8474
1 //////////////////////////////////////////////////////////////////////////////
2 // NOTICE:
3 //
4 // ADLib, Prop and their related set of tools and documentation are in the
5 // public domain. The author(s) of this software reserve no copyrights on
6 // the source code and any code generated using the tools. You are encouraged
7 // to use ADLib and Prop to develop software, in both academic and commercial
8 // settings, and are free to incorporate any part of ADLib and Prop into
9 // your programs.
11 // Although you are under no obligation to do so, we strongly recommend that
12 // you give away all software developed using our tools.
14 // We also ask that credit be given to us when ADLib and/or Prop are used in
15 // your programs, and that this notice be preserved intact in all the source
16 // code.
18 // This software is still under development and we welcome any suggestions
19 // and help from the users.
21 // Allen Leung
22 // 1994
23 //////////////////////////////////////////////////////////////////////////////
25 ////////////////////////////////////////////////////////////////////////////
26 // This file implements a pattern matcher compiler for bottom-up tree
27 // matching. Instead of representing states using match sets as in
28 // Hoffmann O'Donnell, we'll use most general linear unifiers as in
29 // Lippe instead. Notice that the closure set of unifiers form a complete
30 // partial order. We'll use this property to devise our algorithms.
31 ////////////////////////////////////////////////////////////////////////////
33 #include <string.h>
34 #include <stdlib.h>
35 #include <AD/automata/bottomup.h> // bottomup tree matcher/matcher-compiler
36 #include <AD/contain/dchmap.h> // Map based on direct chaining hash tables
37 #include <AD/contain/stack.h> // generic stacks
38 #include <AD/contain/vararray.h> // generic array
39 #include <AD/contain/varqueue.h> // generic queue
40 #include <AD/memory/mempool.h> // memory pool
42 ////////////////////////////////////////////////////////////////////////////
43 // Make hidden types visible
44 ////////////////////////////////////////////////////////////////////////////
45 typedef BottomUp::Arity Arity;
46 typedef BottomUp::Functor Functor;
47 typedef BottomUp::State State;
49 ////////////////////////////////////////////////////////////////////////////
50 // Constructor and destructor
51 ////////////////////////////////////////////////////////////////////////////
52 BottomUp:: BottomUp(Mem& m) : TreeAutomaton(m) {}
53 BottomUp::~BottomUp() {}
55 ////////////////////////////////////////////////////////////////////////////
56 // Class |Pair| represents a 2-tuple, of course.
57 ////////////////////////////////////////////////////////////////////////////
58 #define Pair Pair__
59 template <class A, class B>
60 class Pair {
61 public:
62 A a; B b;
63 Pair(A x, B y) : a(x), b(y) {}
64 Pair() {}
65 A fst() const { return a; }
66 B snd() const { return b; }
67 //friend Bool operator == (const Pair&, const Pair&);
70 ////////////////////////////////////////////////////////////////////////////
71 // Class |UnifierSet| represents a set of unifiers in a bit vector format
72 ////////////////////////////////////////////////////////////////////////////
73 class UnifierSet {
74 int n;
75 Byte bits[1];
76 public:
77 inline operator const Byte * () const { return bits; }
78 inline operator Byte * () { return bits; }
79 inline void setUnion (int i) { bitSet(bits,i); }
80 inline void setUnion (UnifierSet& set)
81 { for (register int i = n - 1; i >= 0; i--) bits[i] |= set.bits[i]; }
82 inline int size () const { return n; }
83 inline Bool operator [] (int i) { return bitOf(bits,i); }
84 inline void clear()
85 { for (register int i = n - 1; i >= 0; i--) bits[i] = 0; bitSet(bits,0); }
86 inline Byte operator () (int i) { return bits[i]; }
87 inline void * operator new (size_t, MemPool& pool, int n)
88 { UnifierSet * x =
89 (UnifierSet*)pool[sizeof(UnifierSet) + (n-1) * sizeof(Byte)];
90 for (register int i = n - 1; i >= 0; i--) x->bits[i] = 0;
91 x->n = n; bitSet(x->bits,0); return x;
93 inline void operator delete (void *) {}
96 ////////////////////////////////////////////////////////////////////////////
97 // A quick test to screen out non unifiable terms
98 ////////////////////////////////////////////////////////////////////////////
99 inline Bool nonunifiable(const TreePat * a, const TreePat * b)
100 { return a->tag() != b->tag(); }
102 ////////////////////////////////////////////////////////////////////////////
103 // A pair of arguments to the unification routine, and
104 // A pair of subterm and its state number
105 ////////////////////////////////////////////////////////////////////////////
106 typedef TreePat * TreePatPtr;
107 typedef Pair<TreePatPtr,TreePatPtr> TreePatPair;
108 typedef Pair<TreePatPtr,State> Unifier;
109 typedef UnifierSet * UnifierSetPtr;
111 //////////////////////////////////////////////////////////////////////////////
112 // Shallow equality comparison for a term pattern; used by the memo functions.
113 // Shallow equality(and hashing) rather than deep equality(and hashing)
114 // is sufficient since we'll be using ``hash-consing'' to share all terms
115 // maximally.
116 //////////////////////////////////////////////////////////////////////////////
117 inline Bool equal(const TreePat * a, const TreePat * b)
118 { if (isWild(a) && isWild(b)) return true;
119 if (isWild(a) || isWild(b)) return false;
120 if (a->tag() != b->tag()) return false;
121 for (register int i = a->arity() - 1; i >= 0; i--)
122 if (a->nth(i) != b->nth(i)) return false;
123 return true;
126 ////////////////////////////////////////////////////////////////////////////
127 // Shallow hash function for a term; used by the memo functions
128 ////////////////////////////////////////////////////////////////////////////
129 inline unsigned int hash(const TreePat * a)
130 { if (isWild(a)) return 73; // some prime number for better hashing
131 unsigned int hash_value;
132 int i;
133 for (hash_value = a->tag(), i = a->arity() - 1; i >= 0; i--)
134 hash_value += hash_value + (unsigned int)a->nth(i);
135 return hash_value;
138 ////////////////////////////////////////////////////////////////////////////
139 // Equality for a pair of term patterns; used in the unification memo
140 // function. Notice that since linear unification is commutitive, we'll
141 // also commute the pairs and check for equality.
142 ////////////////////////////////////////////////////////////////////////////
143 inline Bool operator == (const TreePatPair& a, const TreePatPair& b)
144 { return equal(a.fst(), b.fst()) && equal(a.snd(), b.snd()) ||
145 equal(a.fst(), b.snd()) && equal(a.snd(), b.fst()); }
147 inline Bool equal(const TreePatPair& a, const TreePatPair& b)
148 { return a == b; }
150 ////////////////////////////////////////////////////////////////////////////
151 // Hash function for a pair of term patterns
152 ////////////////////////////////////////////////////////////////////////////
153 inline unsigned int hash(const TreePatPair& a)
154 { return hash(a.fst()) + hash(a.snd()); }
156 ////////////////////////////////////////////////////////////////////////////
157 // The map that maps unifiers to its state number is called UnifiersMemo.
158 // The map that memorizes unification is called UnifyMemo.
159 ////////////////////////////////////////////////////////////////////////////
160 typedef DCHMap <TreePatPtr, State> UnifiersMemo;
161 typedef DCHMap <TreePatPair, TreePatPtr> UnifyMemo;
162 typedef UnifiersMemo ApproxMemo;
164 ////////////////////////////////////////////////////////////////////////////
165 // Compute the set of subterms bottom-up recursively and memorize them.
166 // Notice that all isomorphic subterms are shared after this construction;
167 // we'll also give each subterm each own state number.
168 ////////////////////////////////////////////////////////////////////////////
169 static TreePat * collect_subterms
170 ( TreePat * a, UnifiersMemo& memo, MemPool& pool,
171 VarArray<TreePat *>& states, State& new_state )
172 { TreePatBuf D;
173 Ix previous;
174 TreePat * a_term;
176 if (isWild(a)) return a;
178 for (int i = 0; i < a->arity(); i++)
179 D.p.subterms[i] = collect_subterms(a->nth(i),memo,pool,states,new_state);
180 D.p.f = a->tag(); D.p.n = a->arity();
181 previous = memo.lookup(&D.p);
182 if (previous) return memo.key(previous);
183 a_term = new (pool,a->tag(),a->arity(),D.p.subterms) TreePat;
184 memo.insert(a_term, new_state);
185 states.At(new_state++) = a_term;
186 return a_term;
189 ////////////////////////////////////////////////////////////////////////////
190 // Compute the linear unifier of terms a and b with memorization.
191 // Returns the variable X if the two terms a and b are not unifiable.
192 // Also, if a new unifier is computed, we'll generate a new
193 // state and enter it into the unifiers map.
194 ////////////////////////////////////////////////////////////////////////////
195 #define X (TreePat *)0
196 #define Fail (TreePat *)1
198 static TreePat * unify
199 ( TreePat * a, TreePat * b,
200 UnifiersMemo& unifiers, UnifyMemo& memo, MemPool& pool,
201 VarArray<TreePat *>& states, State& new_state )
203 TreePat * answer;
204 TreePatBuf D;
207 // We'll check for simple situations first
209 if (a->tag() != b->tag()) return Fail;
212 // If these tests do not apply, we'll lookup up the pair a and b
213 // from the map instead.
215 Ix previous;
216 if ((previous = memo.lookup(TreePatPair(a,b))))
217 return memo.value(previous);
220 // If the answer is not in the map, we'll have to compute them recursively.
222 int i;
223 for (i = a->arity() - 1; i >= 0; i--) {
224 if (isWild(a->nth(i))) { D.p.subterms[i] = b->nth(i); continue; }
225 if (isWild(b->nth(i))) { D.p.subterms[i] = a->nth(i); continue; }
226 if ((D.p.subterms[i] =
227 unify(a->nth(i),b->nth(i),unifiers,memo,pool,states,new_state))
228 == Fail)
229 return Fail;
233 // Add a new unifier if we have computed a new one.
236 D.p.f = a->tag(); D.p.n = a->arity();
237 Ix last;
238 if ((last = unifiers.lookup(&D.p)))
239 { answer = unifiers.key(last); goto update_table; }
240 answer = new (pool, a->tag(),a->arity(),D.p.subterms) TreePat;
241 unifiers[answer] = new_state;
242 states.At(new_state++) = answer;
244 update_table:
247 // Now, update the result so that we'll find them again from the table
248 // next time.
250 if (memo.size() < 500) memo[TreePatPair(a,b)] = answer;
251 return answer;
254 ////////////////////////////////////////////////////////////////////////////
255 // Compute the subset of terms at each position of each functor.
256 ////////////////////////////////////////////////////////////////////////////
257 static void collect_arguments
258 ( TreePat * p, UnifiersMemo& unifiers, UnifierSet ** relevant_sets[])
259 { if (! isWild(p)) {
261 // For each argument, compute the set of patterns that
262 // can appear at each functor
264 for (int i = p->arity() - 1; i >= 0; i--) {
265 collect_arguments(p->nth(i), unifiers, relevant_sets);
266 relevant_sets[p->tag()][i]->setUnion(unifiers[p->nth(i)]);
271 ////////////////////////////////////////////////////////////////////////////
272 // Compute the relevant set of unifiers of argument $i$ of a functor $f$
273 ////////////////////////////////////////////////////////////////////////////
274 static UnifierSet *** compute_relevant_sets
275 ( TreePatterns& patterns, UnifiersMemo& unifiers, UnifyMemo& memo,
276 MemPool& pool, VarArray<TreePat *>& states, State number_of_states )
277 { UnifierSet *** relevant_sets;
278 Functor f;
279 int i;
280 int size;
283 // Step 1. allocate storage for the relevant sets.
285 relevant_sets =
286 (UnifierSet***)pool.m_alloc(patterns.functors() * sizeof(UnifierSet **)) -
287 patterns.min_functor() * sizeof(UnifierSet **);
289 size = (number_of_states + 7) / 8; // Size of the bit vectors
291 for (f = patterns.min_functor(); f <= patterns.max_functor(); f++) {
292 relevant_sets[f] =
293 (UnifierSet**)pool.m_alloc(patterns.arity(f) * sizeof(UnifierSet*));
294 for (i = patterns.arity(f) - 1; i >= 0; i--)
295 relevant_sets[f][i] = new(pool,size) UnifierSet;
299 // Step 2. Collect the arguments for each functor at each argument.
300 // This is done concurrently for all functors at all positions.
302 for (i = patterns.size() - 1; i >= 0; i--)
303 collect_arguments(patterns[i], unifiers, relevant_sets);
306 // Step 3. Compute the unifier closure for each relevant set.
308 Stack<State> stack(number_of_states);
309 for (f = patterns.min_functor(); f <= patterns.max_functor(); f++) {
310 for (i = patterns.arity(f) - 1; i >= 0; i--) {
311 stack.clear();
312 UnifierSet * set = relevant_sets[f][i];
313 register int j, k; State s;
314 for (j = 0, s = 0; j < size; j++) {
315 if ((*set)(j) == 0) { s += 8; continue; }
316 for ( k = 0; k < 8; k++, s++)
317 if ((*set)(j) & (1 << k)) stack.push(s);
319 for (j = 2; j < stack.size(); j++) {
320 TreePat * a = states.Get(stack[j]);
321 for (k = 1; k < j; k++) {
322 TreePat * b = states.Get(stack[k]);
323 if (nonunifiable(a,b)) continue;
324 TreePat * u = unify( a, b, unifiers, memo, pool,
325 states, number_of_states );
326 if (u == Fail) continue;
327 s = unifiers[u];
328 if (! (*set).operator[](s)) { set->setUnion(s); stack.push(s); }
334 return relevant_sets;
337 ////////////////////////////////////////////////////////////////////////////
338 // Compute the subsumption ordering between two patterns a and b
339 // Returns: -1 if a < b
340 // 0 if a = b
341 // 1 if a > b
342 // 2 if a and b are incomparable
343 ////////////////////////////////////////////////////////////////////////////
344 static int subsumes(TreePat * a, TreePat * b)
345 { if (isWild(a) && isWild(b)) return 0;
346 if (isWild(a)) return -1;
347 if (isWild(b)) return 1;
348 if (a->tag() != b->tag()) return 2;
349 int pos = 0, neg = 0;
350 for (int i = a->arity() - 1; i >= 0; i--) {
351 switch (subsumes(a->nth(i),b->nth(i))) {
352 case 1: if (neg == 0) pos++; else return 2; break;
353 case -1: if (pos == 0) neg++; else return 2; break;
354 case 2: return 2;
357 if (pos > 0) return 1;
358 if (neg > 0) return -1;
359 return 0;
362 ////////////////////////////////////////////////////////////////////////////
363 // A list of states
364 ////////////////////////////////////////////////////////////////////////////
365 struct StateList {
366 State s; StateList * tl;
367 void * operator new (size_t,MemPool& pool, State a, StateList * t = 0)
368 { StateList * l = (StateList*)pool[sizeof(StateList)];
369 l->s = a; l->tl = t; return l;
373 ////////////////////////////////////////////////////////////////////////////
374 // Compute the subsumption ordering for all states.
375 // When this routine finishes the array |ordering| will contain
376 // for each state t, a list of maximal predecessors t1, t2 ... tn;
377 // that is, for each i, t > ti and there is no other s such that
378 // t > s > ti.
379 ////////////////////////////////////////////////////////////////////////////
380 static void compute_subsumption_ordering
381 ( TreePat * states[], int number_of_states,
382 StateList * ordering[], MemPool& pool )
383 { int i, j;
384 StateList * l;
385 ordering[0] = 0;
386 for (i = 1; i < number_of_states; i++) ordering[i] = 0;
387 for (i = 2; i < number_of_states; i++) {
388 TreePat * a = states[i];
389 for (j = 1; j < i; j++) {
390 TreePat * b = states[j];
391 if (nonunifiable(a,b)) continue; // skip incomparable terms early
392 switch (subsumes(a,b)) {
393 case 1: // a > b; now check if a > b > ap_i for all i
394 for (l = ordering[i]; l; l = l->tl)
395 switch (subsumes(b,states[l->s])) {
396 case 1: { l->s = j; goto next; }
397 case -1: goto next;
399 ordering[i] = new(pool,j,ordering[i]) StateList;
400 break;
401 case -1: // b > a; now check if b > a > bp_i for all i
402 for (l = ordering[j]; l; l = l->tl)
403 switch (subsumes(a,states[l->s])) {
404 case 1: { l->s = i; goto next; }
405 case -1: goto next;
407 ordering[j] = new(pool,i,ordering[j]) StateList;
408 break;
410 next: ;
415 ////////////////////////////////////////////////////////////////////////////
416 // Compute the best approximation of state t with respect to a set of
417 // unification closed states S. Use breath first search since we
418 // want to find the best, not the first, approximation.
419 // Notice that since the set S is unification closed,
420 // the best approximation is uniquely defined.
421 /////////////////////////////////////////////////////////////////////////////
422 inline State approx
423 ( State t, UnifierSet& S, StateList * ordering[],
424 VarQueue<State>& Q, TreePat * [] )
425 { // int head = 0, tail = 0;
426 Q.clear();
427 Q.insert_tail(t);
428 // TreePat * p = states[t];
429 while (! Q.is_empty()) {
430 t = Q.remove_head();
431 if (S.operator[](t)) return t;
432 for (StateList * l = ordering[t]; l; l = l->tl)
433 Q.insert_tail(l->s);
435 return 0;
438 ////////////////////////////////////////////////////////////////////////////
439 // Given a new term t, compute the best approximation within the set
440 // of states. This new state becomes the transition of the tree automata.
441 ////////////////////////////////////////////////////////////////////////////
442 inline State approx
443 ( TreePat * t, UnifiersMemo& unifiers, StateList * ordering[],
444 TreePat * states[], ApproxMemo& memo, MemPool& pool )
445 { TreePatBuf D;
447 // First, try looking up the new term to see if is it in the unifiers
449 Ix st;
450 if ((st = unifiers.lookup(t))) return unifiers.value(st);
453 // Else try looking it up from the memo function
455 if ((st = memo.lookup(t))) return memo.value(st);
458 // If it is not there, we proceed to compute the approximations
459 // one by one until it is found.
461 int i;
462 D.p.f = t->tag(); D.p.n = t->arity();
463 for (i = t->arity() - 1; i >= 0; i--) D.p.subterms[i] = t->nth(i);
464 TreePat * max = X;
465 StateList zero; zero.s = 0; zero.tl = 0;
466 for (i = t->arity() - 1; i >= 0; i--) {
467 if (isWild(t->nth(i))) continue;
468 State s = unifiers[t->nth(i)];
469 StateList * l = ordering[s];
470 if (l == 0) l = &zero;
471 for ( ; l; l = l->tl) {
472 D.p.subterms[i] = states[l->s];
473 TreePat * a =
474 states[approx(&D.p, unifiers, ordering, states, memo, pool)];
475 if (subsumes(a,max) == 1) max = a;
477 D.p.subterms[i] = t->nth(i);
479 State best = unifiers[max];
480 memo.insert(new(pool, t->tag(), t->arity(), t->subterms) TreePat, best);
481 return best;
484 ////////////////////////////////////////////////////////////////////////////
485 // The main entry point of the pattern matching compiler
486 ////////////////////////////////////////////////////////////////////////////
487 void BottomUp::compile (TreePatterns& patterns)
490 // Compute the size of the memorization tables. This number is
491 // only used as a general guide and does not have to be exact
492 // since the tables can handle overflow gracefully.
494 int functors = patterns.functors();
495 //int size = patterns.size();
496 int table_size = functors * patterns.max_arity() * 2;
497 if (table_size < 256) table_size = 256;
500 // Here are some of the auxiliary tables used during the compilation
501 // process. The algorithm makes heavy use of memo functions to cut
502 // down on the complexity of computing unifiers and match sets. These
503 // tables are implemented as |Maps| of |TreePat *|. Furthermore, all
504 // memory needs are handled by our own fast memory pool instead of
505 // C++'s new/delete to ease the demands placed on the system(note: class
506 // |MemPool| lets us free all allocated objects in one single unit when
507 // this function exits.)
509 MemPool pool(2048); // set default page size to 2K bytes
510 UnifiersMemo unifiers(table_size); // unifier to state mapping
511 UnifyMemo unifyMemo(table_size); // memorization map for unification
512 VarArray<TreePat *> states; // state to unifier mapping
513 ApproxMemo approxMemo(table_size);
516 // Step 1: compute the set of subterms of this pattern set.
518 int i;
519 unifiers.insert(X,(State)0); // initially, state 0 is the variable X.
520 State number_of_states = 1; // we'll start at state 1
521 for (i = 0; i < patterns.size(); i++)
522 patterns.update(i,
523 collect_subterms(patterns[i],unifiers,pool,states,number_of_states));
526 // Step 2: compute the closure of the unifiers set. Initially the
527 // set of unifiers is just the set of subterms. We'll generate new unifiers
528 // by pairing all two previous unifiers and stop until we
529 // reached the fixpoint. Notice that since the set of
530 // patterns is finite, we'll always terminate. (However, in pathological
531 // cases the number of unifiers is a powerset of the size of the subterms.)
532 // Also notice that we take advantage of the fact that linear unification
533 // is commutative to half the number of iterations.
535 int j;
536 for (i = 2; i < number_of_states; i++) {
537 TreePat * a = states[i];
538 for (j = 1; j < i; j++) {
539 TreePat * b = states[j];
540 if (nonunifiable(a,b)) continue;
541 unify( a, b, unifiers, unifyMemo, pool, states, number_of_states );
546 // Step 3: Generate the relevant set of unifier closures for
547 // each argument of each functor.
549 UnifierSet *** relevant_sets =
550 compute_relevant_sets
551 ( patterns, unifiers, unifyMemo, pool, states, number_of_states );
554 // Step 4: Compute the subsumption ordering for each state.
555 // When this is done the array ordering will contain
556 // a list maximal approximations for each state.
558 StateList ** ordering =
559 (StateList**)pool[number_of_states * sizeof(StateList*)];
561 compute_subsumption_ordering(states, number_of_states, ordering, pool);
564 // Step 5. Compute the approximation mu(f,i) for each functor f and arity i
565 // mus is a mapping from offset -> state
566 // offset is its inverse from state -> offset
568 State ** mus = (State **)pool[ patterns.max_arity() * sizeof(State*)];
569 State ** offset = (State **)pool[ patterns.max_arity() * sizeof(State*)];
570 int bounds [TreeGrammar::Max_arity];
571 for (i = patterns.max_arity() - 1; i >= 0; i--) {
572 mus[i] = (State*)pool[ number_of_states * sizeof(State) ];
573 offset[i] = (State*)pool[ number_of_states * sizeof(State) ];
574 mus[i][0] = 0;
575 offset[i][0] = 0;
578 VarQueue<State> queue(number_of_states, number_of_states);
579 for (Functor f = patterns.min_functor(); f <= patterns.max_functor(); f++) {
580 int top = patterns.arity(f) - 1;
581 for (i = top; i >= 0; i--) {
582 State * mu_i = mus[i];
583 State * offset_i = offset[i];
584 int& bounds_i = bounds[i];
585 UnifierSet& S = *relevant_sets[f][i];
586 mu_i[0] = 0; bounds_i = 1;
587 for (j = number_of_states - 1; j >= 1; j--) {
588 State a = approx(j, S, ordering, queue, states);
589 State off = offset_i[a];
590 if (off < bounds_i && mu_i[off] == a) offset_i[a] = off;
591 else { mu_i[bounds[i]] = a; offset_i[a] = bounds_i++; }
595 // Step 6. Compute the transition table based on the approximations.
596 // At this point the table |bounds| will contain the compressed
597 // arities of the functor; the table |mu| will contain the list
598 // of (unique) relevant states and table |offset| will contain the
599 // offset mapping(suitable for emission.)
601 TreePatBuf D; D.p.f = f; D.p.n = patterns.arity(f);
602 State indices [TreeGrammar::Max_arity];
603 for (i = top; i >= 0; i--)
604 { indices[i] = bounds[i] - 1;
605 D.p.subterms[i] = states.Get(mus[i][bounds[i] - 1]); }
606 do {
607 //State d = approx(&D.p, unifiers, ordering, states, approxMemo, pool);
608 for (i = top; i >= 0; i--) {
609 if (indices[i] > 0) {
610 D.p.subterms[i] = states.Get(mus[i][--indices[i]]); break;
611 } else {
612 D.p.subterms[i] = states.Get(mus[i][indices[i] = bounds[i] - 1]);
615 } while (i >= 0);
619 ////////////////////////////////////////////////////////////////////////////
621 // Algorithm name
623 ////////////////////////////////////////////////////////////////////////////
624 const char * BottomUp::algorithm () const { return "Bottomup"; }