1 // class template regex -*- C++ -*-
3 // Copyright (C) 2010 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
26 * @file bits/regex_nfa.h
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
31 namespace std
_GLIBCXX_VISIBILITY(default)
33 _GLIBCXX_BEGIN_NAMESPACE_VERSION
38 // Base class for, um, automata. Could be an NFA or a DFA. Your choice.
42 typedef unsigned int _SizeT
;
50 _M_sub_count() const = 0;
54 _M_dot(std::ostream
& __ostr
) const = 0;
58 // Generic shred pointer to an automaton.
59 typedef std::shared_ptr
<_Automaton
> _AutomatonPtr
;
61 // Operation codes that define the type of transitions within the base NFA
62 // that represents the regular expression.
65 _S_opcode_unknown
= 0,
66 _S_opcode_alternative
= 1,
67 _S_opcode_subexpr_begin
= 4,
68 _S_opcode_subexpr_end
= 5,
69 _S_opcode_match
= 100,
70 _S_opcode_accept
= 255
73 // Provides a generic facade for a templated match_results.
76 virtual void _M_set_pos(int __i
, int __j
, const _PatternCursor
& __p
) = 0;
77 virtual void _M_set_matched(int __i
, bool __is_matched
) = 0;
80 // Tags current state (for subexpr begin/end).
81 typedef std::function
<void (const _PatternCursor
&, _Results
&)> _Tagger
;
83 template<typename _FwdIterT
, typename _TraitsT
>
93 operator()(const _PatternCursor
& __pc
, _Results
& __r
)
94 { __r
._M_set_pos(_M_index
, 0, __pc
); }
99 template<typename _FwdIterT
, typename _TraitsT
>
109 operator()(const _PatternCursor
& __pc
, _Results
& __r
)
110 { __r
._M_set_pos(_M_index
, 1, __pc
); }
115 // Indicates if current state matches cursor current.
116 typedef std::function
<bool (const _PatternCursor
&)> _Matcher
;
118 // Matches any character
120 _AnyMatcher(const _PatternCursor
&)
123 // Matches a single character
124 template<typename _InIterT
, typename _TraitsT
>
128 typedef typename
_TraitsT::char_type char_type
;
131 _CharMatcher(char_type __c
, const _TraitsT
& __t
= _TraitsT())
132 : _M_traits(__t
), _M_c(_M_traits
.translate(__c
))
136 operator()(const _PatternCursor
& __pc
) const
138 typedef const _SpecializedCursor
<_InIterT
>& _CursorT
;
139 _CursorT __c
= static_cast<_CursorT
>(__pc
);
140 return _M_traits
.translate(__c
._M_current()) == _M_c
;
143 const _TraitsT
& _M_traits
;
147 // Matches a character range (bracket expression)
148 template<typename _InIterT
, typename _TraitsT
>
152 typedef typename
_TraitsT::char_type _CharT
;
153 typedef std::basic_string
<_CharT
> _StringT
;
156 _RangeMatcher(bool __is_non_matching
, const _TraitsT
& __t
= _TraitsT())
157 : _M_traits(__t
), _M_is_non_matching(__is_non_matching
)
161 operator()(const _PatternCursor
& __pc
) const
163 typedef const _SpecializedCursor
<_InIterT
>& _CursorT
;
164 _CursorT __c
= static_cast<_CursorT
>(__pc
);
169 _M_add_char(_CharT __c
)
173 _M_add_collating_element(const _StringT
& __s
)
177 _M_add_equivalence_class(const _StringT
& __s
)
181 _M_add_character_class(const _StringT
& __s
)
188 const _TraitsT
& _M_traits
;
189 bool _M_is_non_matching
;
192 // Identifies a state in the NFA.
193 typedef int _StateIdT
;
195 // The special case in which a state identifier is not an index.
196 static const _StateIdT _S_invalid_state_id
= -1;
199 // An individual state in an NFA
201 // In this case a "state" is an entry in the NFA definition coupled with its
202 // outgoing transition(s). All states have a single outgoing transition,
203 // except for accepting states (which have no outgoing transitions) and alt
204 // states, which have two outgoing transitions.
208 typedef int _OpcodeT
;
210 _OpcodeT _M_opcode
; // type of outgoing transition
211 _StateIdT _M_next
; // outgoing tranition
212 _StateIdT _M_alt
; // for _S_opcode_alternative
213 unsigned int _M_subexpr
; // for _S_opcode_subexpr_*
214 _Tagger _M_tagger
; // for _S_opcode_subexpr_*
215 _Matcher _M_matches
; // for _S_opcode_match
217 explicit _State(_OpcodeT __opcode
)
218 : _M_opcode(__opcode
), _M_next(_S_invalid_state_id
)
221 _State(const _Matcher
& __m
)
222 : _M_opcode(_S_opcode_match
), _M_next(_S_invalid_state_id
), _M_matches(__m
)
225 _State(_OpcodeT __opcode
, unsigned int __s
, const _Tagger
& __t
)
226 : _M_opcode(__opcode
), _M_next(_S_invalid_state_id
), _M_subexpr(__s
),
230 _State(_StateIdT __next
, _StateIdT __alt
)
231 : _M_opcode(_S_opcode_alternative
), _M_next(__next
), _M_alt(__alt
)
234 #ifdef _GLIBCXX_DEBUG
236 _M_print(std::ostream
& ostr
) const;
238 // Prints graphviz dot commands for state.
240 _M_dot(std::ostream
& __ostr
, _StateIdT __id
) const;
245 // The Grep Matcher works on sets of states. Here are sets of states.
246 typedef std::set
<_StateIdT
> _StateSet
;
248 // A collection of all states making up an NFA
250 // An NFA is a 4-tuple M = (K, S, s, F), where
251 // K is a finite set of states,
252 // S is the alphabet of the NFA,
253 // s is the initial state,
254 // F is a set of final (accepting) states.
256 // This NFA class is templated on S, a type that will hold values of the
257 // underlying alphabet (without regard to semantics of that alphabet). The
258 // other elements of the tuple are generated during construction of the NFA
259 // and are available through accessor member functions.
262 : public _Automaton
, public std::vector
<_State
>
265 typedef _State _StateT
;
266 typedef unsigned int _SizeT
;
267 typedef regex_constants::syntax_option_type _FlagT
;
271 : _M_flags(__f
), _M_start_state(0), _M_subexpr_count(0)
283 { return _M_start_state
; }
286 _M_final_states() const
287 { return _M_accepting_states
; }
291 { return _M_subexpr_count
; }
296 this->push_back(_StateT(_S_opcode_accept
));
297 _M_accepting_states
.insert(this->size()-1);
298 return this->size()-1;
302 _M_insert_alt(_StateIdT __next
, _StateIdT __alt
)
304 this->push_back(_StateT(__next
, __alt
));
305 return this->size()-1;
309 _M_insert_matcher(_Matcher __m
)
311 this->push_back(_StateT(__m
));
312 return this->size()-1;
316 _M_insert_subexpr_begin(const _Tagger
& __t
)
318 this->push_back(_StateT(_S_opcode_subexpr_begin
, _M_subexpr_count
++, __t
));
319 return this->size()-1;
323 _M_insert_subexpr_end(unsigned int __i
, const _Tagger
& __t
)
325 this->push_back(_StateT(_S_opcode_subexpr_end
, __i
, __t
));
326 return this->size()-1;
329 #ifdef _GLIBCXX_DEBUG
331 _M_dot(std::ostream
& __ostr
) const;
336 _StateIdT _M_start_state
;
337 _StateSet _M_accepting_states
;
338 _SizeT _M_subexpr_count
;
341 // Describes a sequence of one or more %_State, its current start and end(s).
343 // This structure contains fragments of an NFA during construction.
347 // Constructs a single-node sequence
348 _StateSeq(_Nfa
& __ss
, _StateIdT __s
, _StateIdT __e
= _S_invalid_state_id
)
349 : _M_nfa(__ss
), _M_start(__s
), _M_end1(__s
), _M_end2(__e
)
351 // Constructs a split sequence from two other sequencces
352 _StateSeq(const _StateSeq
& __e1
, const _StateSeq
& __e2
)
353 : _M_nfa(__e1
._M_nfa
),
354 _M_start(_M_nfa
._M_insert_alt(__e1
._M_start
, __e2
._M_start
)),
355 _M_end1(__e1
._M_end1
), _M_end2(__e2
._M_end1
)
358 // Constructs a split sequence from a single sequence
359 _StateSeq(const _StateSeq
& __e
, _StateIdT __id
)
360 : _M_nfa(__e
._M_nfa
),
361 _M_start(_M_nfa
._M_insert_alt(__id
, __e
._M_start
)),
362 _M_end1(__id
), _M_end2(__e
._M_end1
)
365 // Constructs a copy of a %_StateSeq
366 _StateSeq(const _StateSeq
& __rhs
)
367 : _M_nfa(__rhs
._M_nfa
), _M_start(__rhs
._M_start
),
368 _M_end1(__rhs
._M_end1
), _M_end2(__rhs
._M_end2
)
372 _StateSeq
& operator=(const _StateSeq
& __rhs
);
378 // Extends a sequence by one.
380 _M_push_back(_StateIdT __id
);
382 // Extends and maybe joins a sequence.
384 _M_append(_StateIdT __id
);
387 _M_append(_StateSeq
& __rhs
);
389 // Clones an entire sequence.
401 } // namespace __regex
403 _GLIBCXX_END_NAMESPACE_VERSION
406 #include <bits/regex_nfa.tcc>