1 //////////////////////////////////////////////////////////////////////////////
4 // ADLib, Prop and their related set of tools and documentation are in the
5 // public domain. The author(s) of this software reserve no copyrights on
6 // the source code and any code generated using the tools. You are encouraged
7 // to use ADLib and Prop to develop software, in both academic and commercial
8 // settings, and are free to incorporate any part of ADLib and Prop into
11 // Although you are under no obligation to do so, we strongly recommend that
12 // you give away all software developed using our tools.
14 // We also ask that credit be given to us when ADLib and/or Prop are used in
15 // your programs, and that this notice be preserved intact in all the source
18 // This software is still under development and we welcome any suggestions
19 // and help from the users.
23 //////////////////////////////////////////////////////////////////////////////
25 #ifndef nondeterministic_finite_automata_h
26 #define nondeterministic_finite_automata_h
28 #include <AD/generic/generic.h> // Generic definitions
29 #include <AD/automata/dfatable.h> // DFA tables internals
31 //////////////////////////////////////////////////////////////////////////////
33 //////////////////////////////////////////////////////////////////////////////
39 //////////////////////////////////////////////////////////////////////////////
41 // Class NFA represents an NFA. Used in lexer generation.
43 //////////////////////////////////////////////////////////////////////////////
44 class NFA
: public DFATables
{
45 NFA(const NFA
&); // no copy constructor
46 void operator = (const NFA
&); // no assignment
48 enum { MAX_STAR_HEIGHT
= 256,
49 MAX_REGEXP_LENGTH
= 4096,
51 MAX_CONTEXT_LENGTH
= 1024
53 enum { RANGE_BIT
= 0x80000000,
54 COMPLEMENT
= 0x40000000,
55 END_CHAR_CLASS
= 0x20000000,
56 MASK_BITS
= 0x0000ffff
60 ///////////////////////////////////////////////////////////////////////////
62 ///////////////////////////////////////////////////////////////////////////
63 MemPool
& mem
; // current memory pool
64 Bool own_mem
; // memory owned by us?
65 NFA_Node
* nfa
; // root of the nfa
66 const char * const * contexts
; // current set of contexts
67 Symbol max_char
; // maximum character encoding
68 State number_of_states
; // number of states
69 char * is_singleton
; // is character a singleton?
70 Bool case_insensitive
; // case insensitive?
74 NFA_Delta
** nfa_states
;
75 const char * regexp_begin
;
78 ///////////////////////////////////////////////////////////////////////////
80 ///////////////////////////////////////////////////////////////////////////
83 const char * error_message
;
85 ///////////////////////////////////////////////////////////////////////////
87 ///////////////////////////////////////////////////////////////////////////
88 NFA_Node
** context_nfas
;
89 int number_of_contexts
;
91 ///////////////////////////////////////////////////////////////////////////
92 // Character class stuff
93 ///////////////////////////////////////////////////////////////////////////
94 Symbol
* char_classes
;
95 Symbol
** char_class_table
;
97 int number_of_char_classes
;
98 FastBitSet
** char_class_maps
;
100 ///////////////////////////////////////////////////////////////////////////
101 // Equivalence class stuff
102 ///////////////////////////////////////////////////////////////////////////
103 Symbol
* equiv_classes
;
104 int number_of_equiv_classes
;
106 ///////////////////////////////////////////////////////////////////////////
108 ///////////////////////////////////////////////////////////////////////////
109 virtual void error (const char * message
);
110 virtual NFA_Node
* parse (Rule rule
, const char * regexp
);
112 const char * parse_char_class (const char *, Symbol
&);
113 const char * parse_context (const char *, Bool
[]);
114 NFA_Node
* construct (const char *&, NFA_Node
*&);
115 NFA_Node
* construct_concat (const char *&, NFA_Node
*&);
116 NFA_Node
* construct_one (const char *&, NFA_Node
*&);
117 void compute_equiv_classes();
118 void compute_char_class_map();
121 ///////////////////////////////////////////////////////////////////////////
122 // Constructors and destructor
123 ///////////////////////////////////////////////////////////////////////////
128 ///////////////////////////////////////////////////////////////////////////
129 // Method to compile a set of regular expressions into an NFA.
130 ///////////////////////////////////////////////////////////////////////////
132 ( int number_of_rules
,
133 const char * const * regexps
,
134 const char * const * contexts
,
135 Symbol max_character
= 255,
136 Bool case_insensitive
= false
139 ///////////////////////////////////////////////////////////////////////////
140 // Method to compute the equiv class table.
141 ///////////////////////////////////////////////////////////////////////////
143 ///////////////////////////////////////////////////////////////////////////
145 ///////////////////////////////////////////////////////////////////////////
146 inline int errors () const { return number_of_errors
; }
147 inline Rule
bad() const { return bad_rule
; }
148 inline const char * error_msg () const { return error_message
; }
149 inline int state_count () const { return number_of_states
; }
150 inline int context_count () const { return number_of_contexts
; }
151 inline int equiv_class_count () const { return number_of_equiv_classes
; }
152 inline int char_class_count () const { return number_of_char_classes
; }
153 inline NFA_Node
* nfa_unanchored(int i
) const { return context_nfas
[i
*2]; }
154 inline NFA_Node
* nfa_anchored(int i
) const { return context_nfas
[i
*2+1];}
155 inline NFA_Node
* context_nfa(int i
) const { return context_nfas
[i
];}
156 inline NFA_Node
* root () const { return nfa
; }
157 inline NFA_Delta
* operator [] (int i
) { return nfa_states
[i
]; }
158 inline Symbol
* get_equiv_classes () { return equiv_classes
; }
159 inline const Symbol
* char_class(int i
) const
160 { return char_class_table
[-i
-1];}
161 inline const FastBitSet
* char_class_map(int i
) const
162 { return char_class_maps
[-i
-1]; }