1 // class template regex -*- C++ -*-
3 // Copyright (C) 2010 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
26 * @file bits/regex_nfa.h
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
31 _GLIBCXX_BEGIN_NAMESPACE(std
)
36 // Base class for, um, automata. Could be an NFA or a DFA. Your choice.
40 typedef unsigned int _SizeT
;
48 _M_sub_count() const = 0;
52 _M_dot(std::ostream
& __ostr
) const = 0;
56 // Generic shred pointer to an automaton.
57 typedef std::shared_ptr
<_Automaton
> _AutomatonPtr
;
59 // Operation codes that define the type of transitions within the base NFA
60 // that represents the regular expression.
63 _S_opcode_unknown
= 0,
64 _S_opcode_alternative
= 1,
65 _S_opcode_subexpr_begin
= 4,
66 _S_opcode_subexpr_end
= 5,
67 _S_opcode_match
= 100,
68 _S_opcode_accept
= 255
71 // Provides a generic facade for a templated match_results.
74 virtual void _M_set_pos(int __i
, int __j
, const _PatternCursor
& __p
) = 0;
75 virtual void _M_set_matched(int __i
, bool __is_matched
) = 0;
78 // Tags current state (for subexpr begin/end).
79 typedef std::function
<void (const _PatternCursor
&, _Results
&)> _Tagger
;
81 template<typename _FwdIterT
, typename _TraitsT
>
91 operator()(const _PatternCursor
& __pc
, _Results
& __r
)
92 { __r
._M_set_pos(_M_index
, 0, __pc
); }
97 template<typename _FwdIterT
, typename _TraitsT
>
107 operator()(const _PatternCursor
& __pc
, _Results
& __r
)
108 { __r
._M_set_pos(_M_index
, 1, __pc
); }
113 // Indicates if current state matches cursor current.
114 typedef std::function
<bool (const _PatternCursor
&)> _Matcher
;
116 // Matches any character
118 _AnyMatcher(const _PatternCursor
&)
121 // Matches a single character
122 template<typename _InIterT
, typename _TraitsT
>
126 typedef typename
_TraitsT::char_type char_type
;
129 _CharMatcher(char_type __c
, const _TraitsT
& __t
= _TraitsT())
130 : _M_traits(__t
), _M_c(_M_traits
.translate(__c
))
134 operator()(const _PatternCursor
& __pc
) const
136 typedef const _SpecializedCursor
<_InIterT
>& _CursorT
;
137 _CursorT __c
= static_cast<_CursorT
>(__pc
);
138 return _M_traits
.translate(__c
._M_current()) == _M_c
;
141 const _TraitsT
& _M_traits
;
145 // Matches a character range (bracket expression)
146 template<typename _InIterT
, typename _TraitsT
>
150 typedef typename
_TraitsT::char_type _CharT
;
151 typedef std::basic_string
<_CharT
> _StringT
;
154 _RangeMatcher(bool __is_non_matching
, const _TraitsT
& __t
= _TraitsT())
155 : _M_traits(__t
), _M_is_non_matching(__is_non_matching
)
159 operator()(const _PatternCursor
& __pc
) const
161 typedef const _SpecializedCursor
<_InIterT
>& _CursorT
;
162 _CursorT __c
= static_cast<_CursorT
>(__pc
);
167 _M_add_char(_CharT __c
)
171 _M_add_collating_element(const _StringT
& __s
)
175 _M_add_equivalence_class(const _StringT
& __s
)
179 _M_add_character_class(const _StringT
& __s
)
186 const _TraitsT
& _M_traits
;
187 bool _M_is_non_matching
;
190 // Identifies a state in the NFA.
191 typedef int _StateIdT
;
193 // The special case in which a state identifier is not an index.
194 static const _StateIdT _S_invalid_state_id
= -1;
197 // An individual state in an NFA
199 // In this case a "state" is an entry in the NFA definition coupled with its
200 // outgoing transition(s). All states have a single outgoing transition,
201 // except for accepting states (which have no outgoing transitions) and alt
202 // states, which have two outgoing transitions.
206 typedef int _OpcodeT
;
208 _OpcodeT _M_opcode
; // type of outgoing transition
209 _StateIdT _M_next
; // outgoing tranition
210 _StateIdT _M_alt
; // for _S_opcode_alternative
211 unsigned int _M_subexpr
; // for _S_opcode_subexpr_*
212 _Tagger _M_tagger
; // for _S_opcode_subexpr_*
213 _Matcher _M_matches
; // for _S_opcode_match
215 explicit _State(_OpcodeT __opcode
)
216 : _M_opcode(__opcode
), _M_next(_S_invalid_state_id
)
219 _State(const _Matcher
& __m
)
220 : _M_opcode(_S_opcode_match
), _M_next(_S_invalid_state_id
), _M_matches(__m
)
223 _State(_OpcodeT __opcode
, unsigned int __s
, const _Tagger
& __t
)
224 : _M_opcode(__opcode
), _M_next(_S_invalid_state_id
), _M_subexpr(__s
),
228 _State(_StateIdT __next
, _StateIdT __alt
)
229 : _M_opcode(_S_opcode_alternative
), _M_next(__next
), _M_alt(__alt
)
232 #ifdef _GLIBCXX_DEBUG
234 _M_print(std::ostream
& ostr
) const;
236 // Prints graphviz dot commands for state.
238 _M_dot(std::ostream
& __ostr
, _StateIdT __id
) const;
243 // The Grep Matcher works on sets of states. Here are sets of states.
244 typedef std::set
<_StateIdT
> _StateSet
;
246 // A collection of all states making up an NFA
248 // An NFA is a 4-tuple M = (K, S, s, F), where
249 // K is a finite set of states,
250 // S is the alphabet of the NFA,
251 // s is the initial state,
252 // F is a set of final (accepting) states.
254 // This NFA class is templated on S, a type that will hold values of the
255 // underlying alphabet (without regard to semantics of that alphabet). The
256 // other elements of the tuple are generated during construction of the NFA
257 // and are available through accessor member functions.
260 : public _Automaton
, public std::vector
<_State
>
263 typedef _State _StateT
;
264 typedef unsigned int _SizeT
;
265 typedef regex_constants::syntax_option_type _FlagT
;
269 : _M_flags(__f
), _M_start_state(0), _M_subexpr_count(0)
281 { return _M_start_state
; }
284 _M_final_states() const
285 { return _M_accepting_states
; }
289 { return _M_subexpr_count
; }
294 this->push_back(_StateT(_S_opcode_accept
));
295 _M_accepting_states
.insert(this->size()-1);
296 return this->size()-1;
300 _M_insert_alt(_StateIdT __next
, _StateIdT __alt
)
302 this->push_back(_StateT(__next
, __alt
));
303 return this->size()-1;
307 _M_insert_matcher(_Matcher __m
)
309 this->push_back(_StateT(__m
));
310 return this->size()-1;
314 _M_insert_subexpr_begin(const _Tagger
& __t
)
316 this->push_back(_StateT(_S_opcode_subexpr_begin
, _M_subexpr_count
++, __t
));
317 return this->size()-1;
321 _M_insert_subexpr_end(unsigned int __i
, const _Tagger
& __t
)
323 this->push_back(_StateT(_S_opcode_subexpr_end
, __i
, __t
));
324 return this->size()-1;
327 #ifdef _GLIBCXX_DEBUG
329 _M_dot(std::ostream
& __ostr
) const;
334 _StateIdT _M_start_state
;
335 _StateSet _M_accepting_states
;
336 _SizeT _M_subexpr_count
;
339 // Describes a sequence of one or more %_State, its current start and end(s).
341 // This structure contains fragments of an NFA during construction.
345 // Constructs a single-node sequence
346 _StateSeq(_Nfa
& __ss
, _StateIdT __s
, _StateIdT __e
= _S_invalid_state_id
)
347 : _M_nfa(__ss
), _M_start(__s
), _M_end1(__s
), _M_end2(__e
)
349 // Constructs a split sequence from two other sequencces
350 _StateSeq(const _StateSeq
& __e1
, const _StateSeq
& __e2
)
351 : _M_nfa(__e1
._M_nfa
),
352 _M_start(_M_nfa
._M_insert_alt(__e1
._M_start
, __e2
._M_start
)),
353 _M_end1(__e1
._M_end1
), _M_end2(__e2
._M_end1
)
356 // Constructs a split sequence from a single sequence
357 _StateSeq(const _StateSeq
& __e
, _StateIdT __id
)
358 : _M_nfa(__e
._M_nfa
),
359 _M_start(_M_nfa
._M_insert_alt(__id
, __e
._M_start
)),
360 _M_end1(__id
), _M_end2(__e
._M_end1
)
363 // Constructs a copy of a %_StateSeq
364 _StateSeq(const _StateSeq
& __rhs
)
365 : _M_nfa(__rhs
._M_nfa
), _M_start(__rhs
._M_start
),
366 _M_end1(__rhs
._M_end1
), _M_end2(__rhs
._M_end2
)
370 _StateSeq
& operator=(const _StateSeq
& __rhs
);
376 // Extends a sequence by one.
378 _M_push_back(_StateIdT __id
);
380 // Extends and maybe joins a sequence.
382 _M_append(_StateIdT __id
);
385 _M_append(_StateSeq
& __rhs
);
387 // Clones an entire sequence.
399 } // namespace __regex
401 _GLIBCXX_END_NAMESPACE
403 #include <bits/regex_nfa.tcc>