1 // class template regex -*- C++ -*-
3 // Copyright (C) 2013-2024 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
26 * @file bits/regex_executor.h
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
31 // FIXME convert comments to doxygen format.
33 namespace std
_GLIBCXX_VISIBILITY(default)
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
40 * @addtogroup regex-detail
45 * @brief Takes a regex and an input string and does the matching.
47 * The %_Executor class has two modes: DFS mode and BFS mode, controlled
48 * by the template parameter %__dfs_mode.
50 template<typename _BiIter
, typename _Alloc
, typename _TraitsT
,
54 using __search_mode
= integral_constant
<bool, __dfs_mode
>;
55 using __dfs
= true_type
;
56 using __bfs
= false_type
;
58 enum class _Match_mode
: unsigned char { _Exact
, _Prefix
};
61 typedef typename iterator_traits
<_BiIter
>::value_type _CharT
;
62 typedef basic_regex
<_CharT
, _TraitsT
> _RegexT
;
63 typedef _GLIBCXX_STD_C::vector
<sub_match
<_BiIter
>, _Alloc
> _ResultsVec
;
64 typedef regex_constants::match_flag_type _FlagT
;
65 typedef typename
_TraitsT::char_class_type _ClassT
;
66 typedef _NFA
<_TraitsT
> _NFAT
;
69 _Executor(_BiIter __begin
,
71 _ResultsVec
& __results
,
74 : _M_cur_results(__results
.get_allocator()),
78 _M_nfa(*__re
._M_automaton
),
79 _M_results(__results
),
80 _M_rep_count(_M_nfa
.size()),
81 _M_states(_M_nfa
._M_start(), _M_nfa
.size()),
84 using namespace regex_constants
;
85 if (__flags
& match_prev_avail
) // ignore not_bol and not_bow
86 _M_flags
&= ~(match_not_bol
| match_not_bow
);
89 // Set matched when string exactly matches the pattern.
93 _M_current
= _M_begin
;
94 return _M_main(_Match_mode::_Exact
);
97 // Set matched when some prefix of the string matches the pattern.
99 _M_search_from_first()
101 _M_current
= _M_begin
;
102 return _M_main(_Match_mode::_Prefix
);
110 _M_rep_once_more(_Match_mode __match_mode
, _StateIdT
);
113 _M_handle_repeat(_Match_mode
, _StateIdT
);
116 _M_handle_subexpr_begin(_Match_mode
, _StateIdT
);
119 _M_handle_subexpr_end(_Match_mode
, _StateIdT
);
122 _M_handle_line_begin_assertion(_Match_mode
, _StateIdT
);
125 _M_handle_line_end_assertion(_Match_mode
, _StateIdT
);
128 _M_handle_word_boundary(_Match_mode
, _StateIdT
);
131 _M_handle_subexpr_lookahead(_Match_mode
, _StateIdT
);
134 _M_handle_match(_Match_mode
, _StateIdT
);
137 _M_handle_backref(_Match_mode
, _StateIdT
);
140 _M_handle_accept(_Match_mode
, _StateIdT
);
143 _M_handle_alternative(_Match_mode
, _StateIdT
);
146 _M_dfs(_Match_mode __match_mode
, _StateIdT __start
);
149 _M_main(_Match_mode __match_mode
)
150 { return _M_main_dispatch(__match_mode
, __search_mode
{}); }
153 _M_main_dispatch(_Match_mode __match_mode
, __dfs
);
156 _M_main_dispatch(_Match_mode __match_mode
, __bfs
);
159 _M_is_word(_CharT __ch
) const
161 static const _CharT __s
[2] = { 'w' };
162 return _M_re
._M_automaton
->_M_traits
.isctype
163 (__ch
, _M_re
._M_automaton
->_M_traits
.lookup_classname(__s
, __s
+1));
169 if (_M_current
== _M_begin
)
171 // match_not_bol means ^ does not match [_M_begin,_M_begin)
172 if (_M_flags
& regex_constants::match_not_bol
)
174 // match_prev_avail means _M_begin is not the start of the input.
175 if (_M_flags
& regex_constants::match_prev_avail
)
177 // For ECMAScript multiline matches, check if the previous
178 // character is a line terminator.
179 if (_M_match_multiline())
180 return _M_is_line_terminator(*std::prev(_M_current
));
184 else // ^ matches at _M_begin
187 else if (_M_match_multiline())
188 return _M_is_line_terminator(*std::prev(_M_current
));
196 if (_M_current
== _M_end
)
197 return !(_M_flags
& regex_constants::match_not_eol
);
198 else if (_M_match_multiline())
199 return _M_is_line_terminator(*_M_current
);
205 _M_word_boundary() const;
208 _M_lookahead(_StateIdT __next
);
211 _M_is_line_terminator(_CharT __c
) const
213 const auto& __traits
= _M_re
._M_automaton
->_M_traits
;
214 const auto& __ct
= use_facet
<ctype
<_CharT
>>(__traits
.getloc());
215 const char __n
{ __ct
.narrow(__c
, ' ') };
218 if (_M_re
._M_automaton
->_M_options() & regex_constants::ECMAScript
)
222 // FIXME: U+2028 (line separator) and U+2029 (paragraph separator)
228 _M_match_multiline() const noexcept
231 = regex_constants::ECMAScript
| regex_constants::__multiline
;
232 return (_M_re
._M_automaton
->_M_options() & __m
) == __m
;
235 // Holds additional information used in BFS-mode.
236 template<typename _SearchMode
, typename _ResultsVec
>
239 template<typename _ResultsVec
>
240 struct _State_info
<__bfs
, _ResultsVec
>
243 _State_info(_StateIdT __start
, size_t __n
)
244 : _M_visited_states(new bool[__n
]()), _M_start(__start
)
247 ~_State_info() { delete[] _M_visited_states
; }
249 _State_info(const _State_info
&) = delete;
250 _State_info
& operator=(const _State_info
&) = delete;
252 bool _M_visited(_StateIdT __i
)
254 if (_M_visited_states
[__i
])
256 _M_visited_states
[__i
] = true;
260 void _M_queue(_StateIdT __i
, const _ResultsVec
& __res
)
261 { _M_match_queue
.emplace_back(__i
, __res
); }
263 // Dummy implementations for BFS mode.
264 _BiIter
* _M_get_sol_pos() { return nullptr; }
266 // Saves states that need to be considered for the next character.
267 _GLIBCXX_STD_C::vector
<pair
<_StateIdT
, _ResultsVec
>> _M_match_queue
;
268 // Indicates which states are already visited.
269 bool* _M_visited_states
;
270 // To record current solution.
274 template<typename _ResultsVec
>
275 struct _State_info
<__dfs
, _ResultsVec
>
278 _State_info(_StateIdT __start
, size_t) : _M_start(__start
)
281 // Dummy implementations for DFS mode.
282 bool _M_visited(_StateIdT
) const { return false; }
283 void _M_queue(_StateIdT
, const _ResultsVec
&) { }
285 _BiIter
* _M_get_sol_pos() { return &_M_sol_pos
; }
287 // To record current solution.
293 _ResultsVec _M_cur_results
;
296 const _BiIter _M_end
;
297 const _RegexT
& _M_re
;
299 _ResultsVec
& _M_results
;
300 _GLIBCXX_STD_C::vector
<pair
<_BiIter
, int>> _M_rep_count
;
301 _State_info
<__search_mode
, _ResultsVec
> _M_states
;
303 // Do we have a solution so far?
308 } // namespace __detail
309 _GLIBCXX_END_NAMESPACE_VERSION
312 #include <bits/regex_executor.tcc>