initial
[prop.git] / include / AD / automata / nfa.h
blob12265a23a269c51885ac4655132f61519329037c
1 //////////////////////////////////////////////////////////////////////////////
2 // NOTICE:
3 //
4 // ADLib, Prop and their related set of tools and documentation are in the
5 // public domain. The author(s) of this software reserve no copyrights on
6 // the source code and any code generated using the tools. You are encouraged
7 // to use ADLib and Prop to develop software, in both academic and commercial
8 // settings, and are free to incorporate any part of ADLib and Prop into
9 // your programs.
11 // Although you are under no obligation to do so, we strongly recommend that
12 // you give away all software developed using our tools.
14 // We also ask that credit be given to us when ADLib and/or Prop are used in
15 // your programs, and that this notice be preserved intact in all the source
16 // code.
18 // This software is still under development and we welcome any suggestions
19 // and help from the users.
21 // Allen Leung
22 // 1994
23 //////////////////////////////////////////////////////////////////////////////
25 #ifndef nondeterministic_finite_automata_h
26 #define nondeterministic_finite_automata_h
28 #include <AD/generic/generic.h> // Generic definitions
29 #include <AD/automata/dfatable.h> // DFA tables internals
31 //////////////////////////////////////////////////////////////////////////////
32 // Opaque definitions
33 //////////////////////////////////////////////////////////////////////////////
34 class MemPool;
35 class NFA_Node;
36 class NFA_Delta;
37 class FastBitSet;
39 //////////////////////////////////////////////////////////////////////////////
41 // Class NFA represents an NFA. Used in lexer generation.
43 //////////////////////////////////////////////////////////////////////////////
44 class NFA : public DFATables {
45 NFA(const NFA&); // no copy constructor
46 void operator = (const NFA&); // no assignment
47 public:
48 enum { MAX_STAR_HEIGHT = 256,
49 MAX_REGEXP_LENGTH = 4096,
50 MAX_CONTEXTS = 256,
51 MAX_CONTEXT_LENGTH = 1024
53 enum { RANGE_BIT = 0x80000000,
54 COMPLEMENT = 0x40000000,
55 END_CHAR_CLASS = 0x20000000,
56 MASK_BITS = 0x0000ffff
59 protected:
60 ///////////////////////////////////////////////////////////////////////////
61 // Interal members
62 ///////////////////////////////////////////////////////////////////////////
63 MemPool& mem; // current memory pool
64 Bool own_mem; // memory owned by us?
65 NFA_Node * nfa; // root of the nfa
66 const char * const * contexts; // current set of contexts
67 Symbol max_char; // maximum character encoding
68 State number_of_states; // number of states
69 char * is_singleton; // is character a singleton?
70 Bool case_insensitive; // case insensitive?
71 NFA_Node * save_top;
72 NFA_Node * save_root;
73 NFA_Node * save_next;
74 NFA_Delta ** nfa_states;
75 const char * regexp_begin;
76 Bool anchored;
78 ///////////////////////////////////////////////////////////////////////////
79 // Error reporting
80 ///////////////////////////////////////////////////////////////////////////
81 int number_of_errors;
82 Rule bad_rule;
83 const char * error_message;
85 ///////////////////////////////////////////////////////////////////////////
86 // Context stuff
87 ///////////////////////////////////////////////////////////////////////////
88 NFA_Node ** context_nfas;
89 int number_of_contexts;
91 ///////////////////////////////////////////////////////////////////////////
92 // Character class stuff
93 ///////////////////////////////////////////////////////////////////////////
94 Symbol * char_classes;
95 Symbol ** char_class_table;
96 char * partitions;
97 int number_of_char_classes;
98 FastBitSet ** char_class_maps;
100 ///////////////////////////////////////////////////////////////////////////
101 // Equivalence class stuff
102 ///////////////////////////////////////////////////////////////////////////
103 Symbol * equiv_classes;
104 int number_of_equiv_classes;
106 ///////////////////////////////////////////////////////////////////////////
107 // Helper methods
108 ///////////////////////////////////////////////////////////////////////////
109 virtual void error (const char * message);
110 virtual NFA_Node * parse (Rule rule, const char * regexp);
111 Symbol parse_dot ();
112 const char * parse_char_class (const char *, Symbol&);
113 const char * parse_context (const char *, Bool []);
114 NFA_Node * construct (const char *&, NFA_Node *&);
115 NFA_Node * construct_concat (const char *&, NFA_Node *&);
116 NFA_Node * construct_one (const char *&, NFA_Node *&);
117 void compute_equiv_classes();
118 void compute_char_class_map();
120 public:
121 ///////////////////////////////////////////////////////////////////////////
122 // Constructors and destructor
123 ///////////////////////////////////////////////////////////////////////////
124 NFA();
125 NFA(MemPool&);
126 virtual ~NFA();
128 ///////////////////////////////////////////////////////////////////////////
129 // Method to compile a set of regular expressions into an NFA.
130 ///////////////////////////////////////////////////////////////////////////
131 virtual void compile
132 ( int number_of_rules,
133 const char * const * regexps,
134 const char * const * contexts,
135 Symbol max_character = 255,
136 Bool case_insensitive = false
139 ///////////////////////////////////////////////////////////////////////////
140 // Method to compute the equiv class table.
141 ///////////////////////////////////////////////////////////////////////////
143 ///////////////////////////////////////////////////////////////////////////
144 // Selectors
145 ///////////////////////////////////////////////////////////////////////////
146 inline int errors () const { return number_of_errors; }
147 inline Rule bad() const { return bad_rule; }
148 inline const char * error_msg () const { return error_message; }
149 inline int state_count () const { return number_of_states; }
150 inline int context_count () const { return number_of_contexts; }
151 inline int equiv_class_count () const { return number_of_equiv_classes; }
152 inline int char_class_count () const { return number_of_char_classes; }
153 inline NFA_Node * nfa_unanchored(int i) const { return context_nfas[i*2]; }
154 inline NFA_Node * nfa_anchored(int i) const { return context_nfas[i*2+1];}
155 inline NFA_Node * context_nfa(int i) const { return context_nfas[i];}
156 inline NFA_Node * root () const { return nfa; }
157 inline NFA_Delta * operator [] (int i) { return nfa_states[i]; }
158 inline Symbol * get_equiv_classes () { return equiv_classes; }
159 inline const Symbol * char_class(int i) const
160 { return char_class_table[-i-1];}
161 inline const FastBitSet * char_class_map(int i) const
162 { return char_class_maps[-i-1]; }
165 #endif