1 // class template regex -*- C++ -*-
3 // Copyright (C) 2010-2015 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
26 * @file bits/regex_compiler.h
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
31 namespace std
_GLIBCXX_VISIBILITY(default)
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
38 * @addtogroup regex-detail
42 template<typename
, bool, bool>
43 struct _BracketMatcher
;
46 * @brief Builds an NFA from an input iterator range.
48 * The %_TraitsT type should fulfill requirements [28.3].
50 template<typename _TraitsT
>
54 typedef typename
_TraitsT::char_type _CharT
;
55 typedef const _CharT
* _IterT
;
56 typedef _NFA
<_TraitsT
> _RegexT
;
57 typedef regex_constants::syntax_option_type _FlagT
;
59 _Compiler(_IterT __b
, _IterT __e
,
60 const typename
_TraitsT::locale_type
& __traits
, _FlagT __flags
);
62 shared_ptr
<const _RegexT
>
64 { return std::move(_M_nfa
); }
67 typedef _Scanner
<_CharT
> _ScannerT
;
68 typedef typename
_TraitsT::string_type _StringT
;
69 typedef typename
_ScannerT::_TokenT _TokenT
;
70 typedef _StateSeq
<_TraitsT
> _StateSeqT
;
71 typedef std::stack
<_StateSeqT
> _StackT
;
72 typedef std::ctype
<_CharT
> _CtypeT
;
74 // accepts a specific token or returns false.
76 _M_match_token(_TokenT __token
);
97 _M_bracket_expression();
99 template<bool __icase
, bool __collate
>
101 _M_insert_any_matcher_ecma();
103 template<bool __icase
, bool __collate
>
105 _M_insert_any_matcher_posix();
107 template<bool __icase
, bool __collate
>
109 _M_insert_char_matcher();
111 template<bool __icase
, bool __collate
>
113 _M_insert_character_class_matcher();
115 template<bool __icase
, bool __collate
>
117 _M_insert_bracket_matcher(bool __neg
);
119 // Returns true if successfully matched one term and should continue.
120 // Returns false if the compiler should move on.
121 template<bool __icase
, bool __collate
>
123 _M_expression_term(pair
<bool, _CharT
>& __last_char
,
124 _BracketMatcher
<_TraitsT
, __icase
, __collate
>&
128 _M_cur_int_value(int __radix
);
136 auto ret
= _M_stack
.top();
142 _ScannerT _M_scanner
;
143 shared_ptr
<_RegexT
> _M_nfa
;
146 const _TraitsT
& _M_traits
;
147 const _CtypeT
& _M_ctype
;
150 template<typename _Tp
>
151 struct __has_contiguous_iter
: std::false_type
{ };
153 template<typename _Ch
, typename _Tr
, typename _Alloc
>
154 struct __has_contiguous_iter
<std::basic_string
<_Ch
, _Tr
, _Alloc
>>
158 template<typename _Tp
, typename _Alloc
>
159 struct __has_contiguous_iter
<std::vector
<_Tp
, _Alloc
>>
163 template<typename _Tp
>
164 struct __is_contiguous_normal_iter
: std::false_type
{ };
166 template<typename _CharT
>
167 struct __is_contiguous_normal_iter
<_CharT
*> : std::true_type
{ };
169 template<typename _Tp
, typename _Cont
>
171 __is_contiguous_normal_iter
<__gnu_cxx::__normal_iterator
<_Tp
, _Cont
>>
172 : __has_contiguous_iter
<_Cont
>::type
175 template<typename _Iter
, typename _TraitsT
>
176 using __enable_if_contiguous_normal_iter
177 = typename enable_if
< __is_contiguous_normal_iter
<_Iter
>::value
,
178 std::shared_ptr
<const _NFA
<_TraitsT
>> >::type
;
180 template<typename _Iter
, typename _TraitsT
>
181 using __disable_if_contiguous_normal_iter
182 = typename enable_if
< !__is_contiguous_normal_iter
<_Iter
>::value
,
183 std::shared_ptr
<const _NFA
<_TraitsT
>> >::type
;
185 template<typename _FwdIter
, typename _TraitsT
>
186 inline __enable_if_contiguous_normal_iter
<_FwdIter
, _TraitsT
>
187 __compile_nfa(_FwdIter __first
, _FwdIter __last
,
188 const typename
_TraitsT::locale_type
& __loc
,
189 regex_constants::syntax_option_type __flags
)
191 size_t __len
= __last
- __first
;
192 const auto* __cfirst
= __len
? std::__addressof(*__first
) : nullptr;
193 using _Cmplr
= _Compiler
<_TraitsT
>;
194 return _Cmplr(__cfirst
, __cfirst
+ __len
, __loc
, __flags
)._M_get_nfa();
197 template<typename _FwdIter
, typename _TraitsT
>
198 inline __disable_if_contiguous_normal_iter
<_FwdIter
, _TraitsT
>
199 __compile_nfa(_FwdIter __first
, _FwdIter __last
,
200 const typename
_TraitsT::locale_type
& __loc
,
201 regex_constants::syntax_option_type __flags
)
203 basic_string
<typename
_TraitsT::char_type
> __str(__first
, __last
);
204 return __compile_nfa(__str
.data(), __str
.data() + __str
.size(), __loc
,
209 template<typename _TraitsT
, bool __icase
, bool __collate
>
210 class _RegexTranslator
213 typedef typename
_TraitsT::char_type _CharT
;
214 typedef typename
_TraitsT::string_type _StringT
;
215 typedef typename
std::conditional
<__collate
,
217 _CharT
>::type _StrTransT
;
220 _RegexTranslator(const _TraitsT
& __traits
)
221 : _M_traits(__traits
)
225 _M_translate(_CharT __ch
) const
228 return _M_traits
.translate_nocase(__ch
);
230 return _M_traits
.translate(__ch
);
236 _M_transform(_CharT __ch
) const
238 return _M_transform_impl(__ch
, typename integral_constant
<bool,
244 _M_transform_impl(_CharT __ch
, false_type
) const
248 _M_transform_impl(_CharT __ch
, true_type
) const
250 _StrTransT __str
= _StrTransT(1, _M_translate(__ch
));
251 return _M_traits
.transform(__str
.begin(), __str
.end());
254 const _TraitsT
& _M_traits
;
257 template<typename _TraitsT
>
258 class _RegexTranslator
<_TraitsT
, false, false>
261 typedef typename
_TraitsT::char_type _CharT
;
262 typedef _CharT _StrTransT
;
265 _RegexTranslator(const _TraitsT
&)
269 _M_translate(_CharT __ch
) const
273 _M_transform(_CharT __ch
) const
277 template<typename _TraitsT
, bool __is_ecma
, bool __icase
, bool __collate
>
280 template<typename _TraitsT
, bool __icase
, bool __collate
>
281 struct _AnyMatcher
<_TraitsT
, false, __icase
, __collate
>
283 typedef _RegexTranslator
<_TraitsT
, __icase
, __collate
> _TransT
;
284 typedef typename
_TransT::_CharT _CharT
;
287 _AnyMatcher(const _TraitsT
& __traits
)
288 : _M_translator(__traits
)
292 operator()(_CharT __ch
) const
294 static auto __nul
= _M_translator
._M_translate('\0');
295 return _M_translator
._M_translate(__ch
) != __nul
;
298 _TransT _M_translator
;
301 template<typename _TraitsT
, bool __icase
, bool __collate
>
302 struct _AnyMatcher
<_TraitsT
, true, __icase
, __collate
>
304 typedef _RegexTranslator
<_TraitsT
, __icase
, __collate
> _TransT
;
305 typedef typename
_TransT::_CharT _CharT
;
308 _AnyMatcher(const _TraitsT
& __traits
)
309 : _M_translator(__traits
)
313 operator()(_CharT __ch
) const
314 { return _M_apply(__ch
, typename is_same
<_CharT
, char>::type()); }
317 _M_apply(_CharT __ch
, true_type
) const
319 auto __c
= _M_translator
._M_translate(__ch
);
320 auto __n
= _M_translator
._M_translate('\n');
321 auto __r
= _M_translator
._M_translate('\r');
322 return __c
!= __n
&& __c
!= __r
;
326 _M_apply(_CharT __ch
, false_type
) const
328 auto __c
= _M_translator
._M_translate(__ch
);
329 auto __n
= _M_translator
._M_translate('\n');
330 auto __r
= _M_translator
._M_translate('\r');
331 auto __u2028
= _M_translator
._M_translate(u
'\u2028');
332 auto __u2029
= _M_translator
._M_translate(u
'\u2029');
333 return __c
!= __n
&& __c
!= __r
&& __c
!= __u2028
&& __c
!= __u2029
;
336 _TransT _M_translator
;
339 template<typename _TraitsT
, bool __icase
, bool __collate
>
342 typedef _RegexTranslator
<_TraitsT
, __icase
, __collate
> _TransT
;
343 typedef typename
_TransT::_CharT _CharT
;
345 _CharMatcher(_CharT __ch
, const _TraitsT
& __traits
)
346 : _M_translator(__traits
), _M_ch(_M_translator
._M_translate(__ch
))
350 operator()(_CharT __ch
) const
351 { return _M_ch
== _M_translator
._M_translate(__ch
); }
353 _TransT _M_translator
;
357 /// Matches a character range (bracket expression)
358 template<typename _TraitsT
, bool __icase
, bool __collate
>
359 struct _BracketMatcher
362 typedef _RegexTranslator
<_TraitsT
, __icase
, __collate
> _TransT
;
363 typedef typename
_TransT::_CharT _CharT
;
364 typedef typename
_TransT::_StrTransT _StrTransT
;
365 typedef typename
_TraitsT::string_type _StringT
;
366 typedef typename
_TraitsT::char_class_type _CharClassT
;
369 _BracketMatcher(bool __is_non_matching
,
370 const _TraitsT
& __traits
)
371 : _M_class_set(0), _M_translator(__traits
), _M_traits(__traits
),
372 _M_is_non_matching(__is_non_matching
)
373 #ifdef _GLIBCXX_DEBUG
379 operator()(_CharT __ch
) const
381 _GLIBCXX_DEBUG_ASSERT(_M_is_ready
);
382 return _M_apply(__ch
, _UseCache());
386 _M_add_char(_CharT __c
)
388 _M_char_set
.push_back(_M_translator
._M_translate(__c
));
389 #ifdef _GLIBCXX_DEBUG
395 _M_add_collate_element(const _StringT
& __s
)
397 auto __st
= _M_traits
.lookup_collatename(__s
.data(),
398 __s
.data() + __s
.size());
400 __throw_regex_error(regex_constants::error_collate
);
401 _M_char_set
.push_back(_M_translator
._M_translate(__st
[0]));
402 #ifdef _GLIBCXX_DEBUG
409 _M_add_equivalence_class(const _StringT
& __s
)
411 auto __st
= _M_traits
.lookup_collatename(__s
.data(),
412 __s
.data() + __s
.size());
414 __throw_regex_error(regex_constants::error_collate
);
415 __st
= _M_traits
.transform_primary(__st
.data(),
416 __st
.data() + __st
.size());
417 _M_equiv_set
.push_back(__st
);
418 #ifdef _GLIBCXX_DEBUG
423 // __neg should be true for \D, \S and \W only.
425 _M_add_character_class(const _StringT
& __s
, bool __neg
)
427 auto __mask
= _M_traits
.lookup_classname(__s
.data(),
428 __s
.data() + __s
.size(),
431 __throw_regex_error(regex_constants::error_ctype
);
433 _M_class_set
|= __mask
;
435 _M_neg_class_set
.push_back(__mask
);
436 #ifdef _GLIBCXX_DEBUG
442 _M_make_range(_CharT __l
, _CharT __r
)
445 __throw_regex_error(regex_constants::error_range
);
446 _M_range_set
.push_back(make_pair(_M_translator
._M_transform(__l
),
447 _M_translator
._M_transform(__r
)));
448 #ifdef _GLIBCXX_DEBUG
456 std::sort(_M_char_set
.begin(), _M_char_set
.end());
457 auto __end
= std::unique(_M_char_set
.begin(), _M_char_set
.end());
458 _M_char_set
.erase(__end
, _M_char_set
.end());
459 _M_make_cache(_UseCache());
460 #ifdef _GLIBCXX_DEBUG
466 // Currently we only use the cache for char
467 typedef typename
std::is_same
<_CharT
, char>::type _UseCache
;
469 static constexpr size_t
472 return 1ul << (sizeof(_CharT
) * __CHAR_BIT__
* int(_UseCache::value
));
476 typedef typename
std::conditional
<_UseCache::value
,
477 std::bitset
<_S_cache_size()>,
478 _Dummy
>::type _CacheT
;
479 typedef typename
std::make_unsigned
<_CharT
>::type _UnsignedCharT
;
482 _M_apply(_CharT __ch
, false_type
) const;
485 _M_apply(_CharT __ch
, true_type
) const
486 { return _M_cache
[static_cast<_UnsignedCharT
>(__ch
)]; }
489 _M_make_cache(true_type
)
491 for (unsigned __i
= 0; __i
< _M_cache
.size(); __i
++)
492 _M_cache
[__i
] = _M_apply(static_cast<_CharT
>(__i
), false_type());
496 _M_make_cache(false_type
)
500 std::vector
<_CharT
> _M_char_set
;
501 std::vector
<_StringT
> _M_equiv_set
;
502 std::vector
<pair
<_StrTransT
, _StrTransT
>> _M_range_set
;
503 std::vector
<_CharClassT
> _M_neg_class_set
;
504 _CharClassT _M_class_set
;
505 _TransT _M_translator
;
506 const _TraitsT
& _M_traits
;
507 bool _M_is_non_matching
;
509 #ifdef _GLIBCXX_DEBUG
515 _GLIBCXX_END_NAMESPACE_VERSION
516 } // namespace __detail
519 #include <bits/regex_compiler.tcc>