Merge from google/integration.
[official-gcc.git] / libstdc++-v3 / include / bits / regex_nfa.h
blobb1ae3fb3ea0d7415af5176835ab62b641f21b918
1 // class template regex -*- C++ -*-
3 // Copyright (C) 2010 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
25 /**
26 * @file bits/regex_nfa.h
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
31 namespace std _GLIBCXX_VISIBILITY(default)
33 _GLIBCXX_BEGIN_NAMESPACE_VERSION
35 namespace __regex
38 // Base class for, um, automata. Could be an NFA or a DFA. Your choice.
39 class _Automaton
41 public:
42 typedef unsigned int _SizeT;
44 public:
45 virtual
46 ~_Automaton()
47 { }
49 virtual _SizeT
50 _M_sub_count() const = 0;
52 #ifdef _GLIBCXX_DEBUG
53 virtual std::ostream&
54 _M_dot(std::ostream& __ostr) const = 0;
55 #endif
58 // Generic shred pointer to an automaton.
59 typedef std::shared_ptr<_Automaton> _AutomatonPtr;
61 // Operation codes that define the type of transitions within the base NFA
62 // that represents the regular expression.
63 enum _Opcode
65 _S_opcode_unknown = 0,
66 _S_opcode_alternative = 1,
67 _S_opcode_subexpr_begin = 4,
68 _S_opcode_subexpr_end = 5,
69 _S_opcode_match = 100,
70 _S_opcode_accept = 255
73 // Provides a generic facade for a templated match_results.
74 struct _Results
76 virtual void _M_set_pos(int __i, int __j, const _PatternCursor& __p) = 0;
77 virtual void _M_set_matched(int __i, bool __is_matched) = 0;
80 // Tags current state (for subexpr begin/end).
81 typedef std::function<void (const _PatternCursor&, _Results&)> _Tagger;
83 template<typename _FwdIterT, typename _TraitsT>
84 struct _StartTagger
85 : public _Tagger
87 explicit
88 _StartTagger(int __i)
89 : _M_index(__i)
90 { }
92 void
93 operator()(const _PatternCursor& __pc, _Results& __r)
94 { __r._M_set_pos(_M_index, 0, __pc); }
96 int _M_index;
99 template<typename _FwdIterT, typename _TraitsT>
100 struct _EndTagger
101 : public _Tagger
103 explicit
104 _EndTagger(int __i)
105 : _M_index(__i)
108 void
109 operator()(const _PatternCursor& __pc, _Results& __r)
110 { __r._M_set_pos(_M_index, 1, __pc); }
112 int _M_index;
113 _FwdIterT _M_pos;
115 // Indicates if current state matches cursor current.
116 typedef std::function<bool (const _PatternCursor&)> _Matcher;
118 // Matches any character
119 inline bool
120 _AnyMatcher(const _PatternCursor&)
121 { return true; }
123 // Matches a single character
124 template<typename _InIterT, typename _TraitsT>
125 struct _CharMatcher
126 : public _Matcher
128 typedef typename _TraitsT::char_type char_type;
130 explicit
131 _CharMatcher(char_type __c, const _TraitsT& __t = _TraitsT())
132 : _M_traits(__t), _M_c(_M_traits.translate(__c))
135 bool
136 operator()(const _PatternCursor& __pc) const
138 typedef const _SpecializedCursor<_InIterT>& _CursorT;
139 _CursorT __c = static_cast<_CursorT>(__pc);
140 return _M_traits.translate(__c._M_current()) == _M_c;
143 const _TraitsT& _M_traits;
144 char_type _M_c;
147 // Matches a character range (bracket expression)
148 template<typename _InIterT, typename _TraitsT>
149 struct _RangeMatcher
150 : public _Matcher
152 typedef typename _TraitsT::char_type _CharT;
153 typedef std::basic_string<_CharT> _StringT;
155 explicit
156 _RangeMatcher(bool __is_non_matching, const _TraitsT& __t = _TraitsT())
157 : _M_traits(__t), _M_is_non_matching(__is_non_matching)
160 bool
161 operator()(const _PatternCursor& __pc) const
163 typedef const _SpecializedCursor<_InIterT>& _CursorT;
164 _CursorT __c = static_cast<_CursorT>(__pc);
165 return true;
168 void
169 _M_add_char(_CharT __c)
172 void
173 _M_add_collating_element(const _StringT& __s)
176 void
177 _M_add_equivalence_class(const _StringT& __s)
180 void
181 _M_add_character_class(const _StringT& __s)
184 void
185 _M_make_range()
188 const _TraitsT& _M_traits;
189 bool _M_is_non_matching;
192 // Identifies a state in the NFA.
193 typedef int _StateIdT;
195 // The special case in which a state identifier is not an index.
196 static const _StateIdT _S_invalid_state_id = -1;
199 // An individual state in an NFA
201 // In this case a "state" is an entry in the NFA definition coupled with its
202 // outgoing transition(s). All states have a single outgoing transition,
203 // except for accepting states (which have no outgoing transitions) and alt
204 // states, which have two outgoing transitions.
206 struct _State
208 typedef int _OpcodeT;
210 _OpcodeT _M_opcode; // type of outgoing transition
211 _StateIdT _M_next; // outgoing tranition
212 _StateIdT _M_alt; // for _S_opcode_alternative
213 unsigned int _M_subexpr; // for _S_opcode_subexpr_*
214 _Tagger _M_tagger; // for _S_opcode_subexpr_*
215 _Matcher _M_matches; // for _S_opcode_match
217 explicit _State(_OpcodeT __opcode)
218 : _M_opcode(__opcode), _M_next(_S_invalid_state_id)
221 _State(const _Matcher& __m)
222 : _M_opcode(_S_opcode_match), _M_next(_S_invalid_state_id), _M_matches(__m)
225 _State(_OpcodeT __opcode, unsigned int __s, const _Tagger& __t)
226 : _M_opcode(__opcode), _M_next(_S_invalid_state_id), _M_subexpr(__s),
227 _M_tagger(__t)
230 _State(_StateIdT __next, _StateIdT __alt)
231 : _M_opcode(_S_opcode_alternative), _M_next(__next), _M_alt(__alt)
234 #ifdef _GLIBCXX_DEBUG
235 std::ostream&
236 _M_print(std::ostream& ostr) const;
238 // Prints graphviz dot commands for state.
239 std::ostream&
240 _M_dot(std::ostream& __ostr, _StateIdT __id) const;
241 #endif
245 // The Grep Matcher works on sets of states. Here are sets of states.
246 typedef std::set<_StateIdT> _StateSet;
248 // A collection of all states making up an NFA
250 // An NFA is a 4-tuple M = (K, S, s, F), where
251 // K is a finite set of states,
252 // S is the alphabet of the NFA,
253 // s is the initial state,
254 // F is a set of final (accepting) states.
256 // This NFA class is templated on S, a type that will hold values of the
257 // underlying alphabet (without regard to semantics of that alphabet). The
258 // other elements of the tuple are generated during construction of the NFA
259 // and are available through accessor member functions.
261 class _Nfa
262 : public _Automaton, public std::vector<_State>
264 public:
265 typedef _State _StateT;
266 typedef unsigned int _SizeT;
267 typedef regex_constants::syntax_option_type _FlagT;
269 public:
270 _Nfa(_FlagT __f)
271 : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0)
274 ~_Nfa()
277 _FlagT
278 _M_options() const
279 { return _M_flags; }
281 _StateIdT
282 _M_start() const
283 { return _M_start_state; }
285 const _StateSet&
286 _M_final_states() const
287 { return _M_accepting_states; }
289 _SizeT
290 _M_sub_count() const
291 { return _M_subexpr_count; }
293 _StateIdT
294 _M_insert_accept()
296 this->push_back(_StateT(_S_opcode_accept));
297 _M_accepting_states.insert(this->size()-1);
298 return this->size()-1;
301 _StateIdT
302 _M_insert_alt(_StateIdT __next, _StateIdT __alt)
304 this->push_back(_StateT(__next, __alt));
305 return this->size()-1;
308 _StateIdT
309 _M_insert_matcher(_Matcher __m)
311 this->push_back(_StateT(__m));
312 return this->size()-1;
315 _StateIdT
316 _M_insert_subexpr_begin(const _Tagger& __t)
318 this->push_back(_StateT(_S_opcode_subexpr_begin, _M_subexpr_count++, __t));
319 return this->size()-1;
322 _StateIdT
323 _M_insert_subexpr_end(unsigned int __i, const _Tagger& __t)
325 this->push_back(_StateT(_S_opcode_subexpr_end, __i, __t));
326 return this->size()-1;
329 #ifdef _GLIBCXX_DEBUG
330 std::ostream&
331 _M_dot(std::ostream& __ostr) const;
332 #endif
334 private:
335 _FlagT _M_flags;
336 _StateIdT _M_start_state;
337 _StateSet _M_accepting_states;
338 _SizeT _M_subexpr_count;
341 // Describes a sequence of one or more %_State, its current start and end(s).
343 // This structure contains fragments of an NFA during construction.
344 class _StateSeq
346 public:
347 // Constructs a single-node sequence
348 _StateSeq(_Nfa& __ss, _StateIdT __s, _StateIdT __e = _S_invalid_state_id)
349 : _M_nfa(__ss), _M_start(__s), _M_end1(__s), _M_end2(__e)
351 // Constructs a split sequence from two other sequencces
352 _StateSeq(const _StateSeq& __e1, const _StateSeq& __e2)
353 : _M_nfa(__e1._M_nfa),
354 _M_start(_M_nfa._M_insert_alt(__e1._M_start, __e2._M_start)),
355 _M_end1(__e1._M_end1), _M_end2(__e2._M_end1)
358 // Constructs a split sequence from a single sequence
359 _StateSeq(const _StateSeq& __e, _StateIdT __id)
360 : _M_nfa(__e._M_nfa),
361 _M_start(_M_nfa._M_insert_alt(__id, __e._M_start)),
362 _M_end1(__id), _M_end2(__e._M_end1)
365 // Constructs a copy of a %_StateSeq
366 _StateSeq(const _StateSeq& __rhs)
367 : _M_nfa(__rhs._M_nfa), _M_start(__rhs._M_start),
368 _M_end1(__rhs._M_end1), _M_end2(__rhs._M_end2)
372 _StateSeq& operator=(const _StateSeq& __rhs);
374 _StateIdT
375 _M_front() const
376 { return _M_start; }
378 // Extends a sequence by one.
379 void
380 _M_push_back(_StateIdT __id);
382 // Extends and maybe joins a sequence.
383 void
384 _M_append(_StateIdT __id);
386 void
387 _M_append(_StateSeq& __rhs);
389 // Clones an entire sequence.
390 _StateIdT
391 _M_clone();
393 private:
394 _Nfa& _M_nfa;
395 _StateIdT _M_start;
396 _StateIdT _M_end1;
397 _StateIdT _M_end2;
401 } // namespace __regex
403 _GLIBCXX_END_NAMESPACE_VERSION
404 } // namespace
406 #include <bits/regex_nfa.tcc>