1 // class template regex -*- C++ -*-
3 // Copyright (C) 2010-2018 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
26 * @file bits/regex_compiler.h
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
31 namespace std
_GLIBCXX_VISIBILITY(default)
33 _GLIBCXX_BEGIN_NAMESPACE_VERSION
34 _GLIBCXX_BEGIN_NAMESPACE_CXX11
39 _GLIBCXX_END_NAMESPACE_CXX11
44 * @addtogroup regex-detail
48 template<typename
, bool, bool>
49 struct _BracketMatcher
;
52 * @brief Builds an NFA from an input iterator range.
54 * The %_TraitsT type should fulfill requirements [28.3].
56 template<typename _TraitsT
>
60 typedef typename
_TraitsT::char_type _CharT
;
61 typedef const _CharT
* _IterT
;
62 typedef _NFA
<_TraitsT
> _RegexT
;
63 typedef regex_constants::syntax_option_type _FlagT
;
65 _Compiler(_IterT __b
, _IterT __e
,
66 const typename
_TraitsT::locale_type
& __traits
, _FlagT __flags
);
68 shared_ptr
<const _RegexT
>
70 { return std::move(_M_nfa
); }
73 typedef _Scanner
<_CharT
> _ScannerT
;
74 typedef typename
_TraitsT::string_type _StringT
;
75 typedef typename
_ScannerT::_TokenT _TokenT
;
76 typedef _StateSeq
<_TraitsT
> _StateSeqT
;
77 typedef std::stack
<_StateSeqT
> _StackT
;
78 typedef std::ctype
<_CharT
> _CtypeT
;
80 // accepts a specific token or returns false.
82 _M_match_token(_TokenT __token
);
103 _M_bracket_expression();
105 template<bool __icase
, bool __collate
>
107 _M_insert_any_matcher_ecma();
109 template<bool __icase
, bool __collate
>
111 _M_insert_any_matcher_posix();
113 template<bool __icase
, bool __collate
>
115 _M_insert_char_matcher();
117 template<bool __icase
, bool __collate
>
119 _M_insert_character_class_matcher();
121 template<bool __icase
, bool __collate
>
123 _M_insert_bracket_matcher(bool __neg
);
125 // Returns true if successfully matched one term and should continue.
126 // Returns false if the compiler should move on.
127 template<bool __icase
, bool __collate
>
129 _M_expression_term(pair
<bool, _CharT
>& __last_char
,
130 _BracketMatcher
<_TraitsT
, __icase
, __collate
>&
134 _M_cur_int_value(int __radix
);
142 auto ret
= _M_stack
.top();
148 _ScannerT _M_scanner
;
149 shared_ptr
<_RegexT
> _M_nfa
;
152 const _TraitsT
& _M_traits
;
153 const _CtypeT
& _M_ctype
;
156 template<typename _Tp
>
157 struct __is_contiguous_iter
: is_pointer
<_Tp
>::type
{ };
159 template<typename _Tp
, typename _Cont
>
161 __is_contiguous_iter
<__gnu_cxx::__normal_iterator
<_Tp
*, _Cont
>>
164 template<typename _Iter
, typename _TraitsT
>
165 using __enable_if_contiguous_iter
166 = __enable_if_t
< __is_contiguous_iter
<_Iter
>::value
,
167 std::shared_ptr
<const _NFA
<_TraitsT
>> >;
169 template<typename _Iter
, typename _TraitsT
>
170 using __disable_if_contiguous_iter
171 = __enable_if_t
< !__is_contiguous_iter
<_Iter
>::value
,
172 std::shared_ptr
<const _NFA
<_TraitsT
>> >;
174 template<typename _TraitsT
, typename _FwdIter
>
175 inline __enable_if_contiguous_iter
<_FwdIter
, _TraitsT
>
176 __compile_nfa(_FwdIter __first
, _FwdIter __last
,
177 const typename
_TraitsT::locale_type
& __loc
,
178 regex_constants::syntax_option_type __flags
)
180 size_t __len
= __last
- __first
;
181 const auto* __cfirst
= __len
? std::__addressof(*__first
) : nullptr;
182 using _Cmplr
= _Compiler
<_TraitsT
>;
183 return _Cmplr(__cfirst
, __cfirst
+ __len
, __loc
, __flags
)._M_get_nfa();
186 template<typename _TraitsT
, typename _FwdIter
>
187 inline __disable_if_contiguous_iter
<_FwdIter
, _TraitsT
>
188 __compile_nfa(_FwdIter __first
, _FwdIter __last
,
189 const typename
_TraitsT::locale_type
& __loc
,
190 regex_constants::syntax_option_type __flags
)
192 const basic_string
<typename
_TraitsT::char_type
> __str(__first
, __last
);
193 return __compile_nfa
<_TraitsT
>(__str
.data(), __str
.data() + __str
.size(),
198 template<typename _TraitsT
, bool __icase
, bool __collate
>
199 class _RegexTranslatorBase
202 typedef typename
_TraitsT::char_type _CharT
;
203 typedef typename
_TraitsT::string_type _StringT
;
204 typedef _StringT _StrTransT
;
207 _RegexTranslatorBase(const _TraitsT
& __traits
)
208 : _M_traits(__traits
)
212 _M_translate(_CharT __ch
) const
215 return _M_traits
.translate_nocase(__ch
);
217 return _M_traits
.translate(__ch
);
223 _M_transform(_CharT __ch
) const
225 _StrTransT
__str(1, __ch
);
226 return _M_traits
.transform(__str
.begin(), __str
.end());
229 // See LWG 523. It's not efficiently implementable when _TraitsT is not
230 // std::regex_traits<>, and __collate is true. See specializations for
231 // implementations of other cases.
233 _M_match_range(const _StrTransT
& __first
, const _StrTransT
& __last
,
234 const _StrTransT
& __s
) const
235 { return __first
<= __s
&& __s
<= __last
; }
238 bool _M_in_range_icase(_CharT __first
, _CharT __last
, _CharT __ch
) const
240 typedef std::ctype
<_CharT
> __ctype_type
;
241 const auto& __fctyp
= use_facet
<__ctype_type
>(this->_M_traits
.getloc());
242 auto __lower
= __fctyp
.tolower(__ch
);
243 auto __upper
= __fctyp
.toupper(__ch
);
244 return (__first
<= __lower
&& __lower
<= __last
)
245 || (__first
<= __upper
&& __upper
<= __last
);
248 const _TraitsT
& _M_traits
;
251 template<typename _TraitsT
, bool __icase
, bool __collate
>
252 class _RegexTranslator
253 : public _RegexTranslatorBase
<_TraitsT
, __icase
, __collate
>
256 typedef _RegexTranslatorBase
<_TraitsT
, __icase
, __collate
> _Base
;
260 template<typename _TraitsT
, bool __icase
>
261 class _RegexTranslator
<_TraitsT
, __icase
, false>
262 : public _RegexTranslatorBase
<_TraitsT
, __icase
, false>
265 typedef _RegexTranslatorBase
<_TraitsT
, __icase
, false> _Base
;
266 typedef typename
_Base::_CharT _CharT
;
267 typedef _CharT _StrTransT
;
272 _M_transform(_CharT __ch
) const
276 _M_match_range(_CharT __first
, _CharT __last
, _CharT __ch
) const
279 return __first
<= __ch
&& __ch
<= __last
;
280 return this->_M_in_range_icase(__first
, __last
, __ch
);
284 template<typename _CharType
>
285 class _RegexTranslator
<std::regex_traits
<_CharType
>, true, true>
286 : public _RegexTranslatorBase
<std::regex_traits
<_CharType
>, true, true>
289 typedef _RegexTranslatorBase
<std::regex_traits
<_CharType
>, true, true>
291 typedef typename
_Base::_CharT _CharT
;
292 typedef typename
_Base::_StrTransT _StrTransT
;
297 _M_match_range(const _StrTransT
& __first
, const _StrTransT
& __last
,
298 const _StrTransT
& __str
) const
300 __glibcxx_assert(__first
.size() == 1);
301 __glibcxx_assert(__last
.size() == 1);
302 __glibcxx_assert(__str
.size() == 1);
303 return this->_M_in_range_icase(__first
[0], __last
[0], __str
[0]);
307 template<typename _TraitsT
>
308 class _RegexTranslator
<_TraitsT
, false, false>
311 typedef typename
_TraitsT::char_type _CharT
;
312 typedef _CharT _StrTransT
;
315 _RegexTranslator(const _TraitsT
&)
319 _M_translate(_CharT __ch
) const
323 _M_transform(_CharT __ch
) const
327 _M_match_range(_CharT __first
, _CharT __last
, _CharT __ch
) const
328 { return __first
<= __ch
&& __ch
<= __last
; }
331 template<typename _TraitsT
, bool __is_ecma
, bool __icase
, bool __collate
>
334 template<typename _TraitsT
, bool __icase
, bool __collate
>
335 struct _AnyMatcher
<_TraitsT
, false, __icase
, __collate
>
337 typedef _RegexTranslator
<_TraitsT
, __icase
, __collate
> _TransT
;
338 typedef typename
_TransT::_CharT _CharT
;
341 _AnyMatcher(const _TraitsT
& __traits
)
342 : _M_translator(__traits
)
346 operator()(_CharT __ch
) const
348 static auto __nul
= _M_translator
._M_translate('\0');
349 return _M_translator
._M_translate(__ch
) != __nul
;
352 _TransT _M_translator
;
355 template<typename _TraitsT
, bool __icase
, bool __collate
>
356 struct _AnyMatcher
<_TraitsT
, true, __icase
, __collate
>
358 typedef _RegexTranslator
<_TraitsT
, __icase
, __collate
> _TransT
;
359 typedef typename
_TransT::_CharT _CharT
;
362 _AnyMatcher(const _TraitsT
& __traits
)
363 : _M_translator(__traits
)
367 operator()(_CharT __ch
) const
368 { return _M_apply(__ch
, typename is_same
<_CharT
, char>::type()); }
371 _M_apply(_CharT __ch
, true_type
) const
373 auto __c
= _M_translator
._M_translate(__ch
);
374 auto __n
= _M_translator
._M_translate('\n');
375 auto __r
= _M_translator
._M_translate('\r');
376 return __c
!= __n
&& __c
!= __r
;
380 _M_apply(_CharT __ch
, false_type
) const
382 auto __c
= _M_translator
._M_translate(__ch
);
383 auto __n
= _M_translator
._M_translate('\n');
384 auto __r
= _M_translator
._M_translate('\r');
385 auto __u2028
= _M_translator
._M_translate(u
'\u2028');
386 auto __u2029
= _M_translator
._M_translate(u
'\u2029');
387 return __c
!= __n
&& __c
!= __r
&& __c
!= __u2028
&& __c
!= __u2029
;
390 _TransT _M_translator
;
393 template<typename _TraitsT
, bool __icase
, bool __collate
>
396 typedef _RegexTranslator
<_TraitsT
, __icase
, __collate
> _TransT
;
397 typedef typename
_TransT::_CharT _CharT
;
399 _CharMatcher(_CharT __ch
, const _TraitsT
& __traits
)
400 : _M_translator(__traits
), _M_ch(_M_translator
._M_translate(__ch
))
404 operator()(_CharT __ch
) const
405 { return _M_ch
== _M_translator
._M_translate(__ch
); }
407 _TransT _M_translator
;
411 /// Matches a character range (bracket expression)
412 template<typename _TraitsT
, bool __icase
, bool __collate
>
413 struct _BracketMatcher
416 typedef _RegexTranslator
<_TraitsT
, __icase
, __collate
> _TransT
;
417 typedef typename
_TransT::_CharT _CharT
;
418 typedef typename
_TransT::_StrTransT _StrTransT
;
419 typedef typename
_TraitsT::string_type _StringT
;
420 typedef typename
_TraitsT::char_class_type _CharClassT
;
423 _BracketMatcher(bool __is_non_matching
,
424 const _TraitsT
& __traits
)
425 : _M_class_set(0), _M_translator(__traits
), _M_traits(__traits
),
426 _M_is_non_matching(__is_non_matching
)
430 operator()(_CharT __ch
) const
432 _GLIBCXX_DEBUG_ASSERT(_M_is_ready
);
433 return _M_apply(__ch
, _UseCache());
437 _M_add_char(_CharT __c
)
439 _M_char_set
.push_back(_M_translator
._M_translate(__c
));
440 _GLIBCXX_DEBUG_ONLY(_M_is_ready
= false);
444 _M_add_collate_element(const _StringT
& __s
)
446 auto __st
= _M_traits
.lookup_collatename(__s
.data(),
447 __s
.data() + __s
.size());
449 __throw_regex_error(regex_constants::error_collate
,
450 "Invalid collate element.");
451 _M_char_set
.push_back(_M_translator
._M_translate(__st
[0]));
452 _GLIBCXX_DEBUG_ONLY(_M_is_ready
= false);
457 _M_add_equivalence_class(const _StringT
& __s
)
459 auto __st
= _M_traits
.lookup_collatename(__s
.data(),
460 __s
.data() + __s
.size());
462 __throw_regex_error(regex_constants::error_collate
,
463 "Invalid equivalence class.");
464 __st
= _M_traits
.transform_primary(__st
.data(),
465 __st
.data() + __st
.size());
466 _M_equiv_set
.push_back(__st
);
467 _GLIBCXX_DEBUG_ONLY(_M_is_ready
= false);
470 // __neg should be true for \D, \S and \W only.
472 _M_add_character_class(const _StringT
& __s
, bool __neg
)
474 auto __mask
= _M_traits
.lookup_classname(__s
.data(),
475 __s
.data() + __s
.size(),
478 __throw_regex_error(regex_constants::error_collate
,
479 "Invalid character class.");
481 _M_class_set
|= __mask
;
483 _M_neg_class_set
.push_back(__mask
);
484 _GLIBCXX_DEBUG_ONLY(_M_is_ready
= false);
488 _M_make_range(_CharT __l
, _CharT __r
)
491 __throw_regex_error(regex_constants::error_range
,
492 "Invalid range in bracket expression.");
493 _M_range_set
.push_back(make_pair(_M_translator
._M_transform(__l
),
494 _M_translator
._M_transform(__r
)));
495 _GLIBCXX_DEBUG_ONLY(_M_is_ready
= false);
501 std::sort(_M_char_set
.begin(), _M_char_set
.end());
502 auto __end
= std::unique(_M_char_set
.begin(), _M_char_set
.end());
503 _M_char_set
.erase(__end
, _M_char_set
.end());
504 _M_make_cache(_UseCache());
505 _GLIBCXX_DEBUG_ONLY(_M_is_ready
= true);
509 // Currently we only use the cache for char
510 typedef typename
std::is_same
<_CharT
, char>::type _UseCache
;
512 static constexpr size_t
514 1ul << (sizeof(_CharT
) * __CHAR_BIT__
* int(_UseCache::value
));
517 typedef typename
std::conditional
<_UseCache::value
,
518 std::bitset
<_S_cache_size
>,
519 _Dummy
>::type _CacheT
;
520 typedef typename
std::make_unsigned
<_CharT
>::type _UnsignedCharT
;
523 _M_apply(_CharT __ch
, false_type
) const;
526 _M_apply(_CharT __ch
, true_type
) const
527 { return _M_cache
[static_cast<_UnsignedCharT
>(__ch
)]; }
530 _M_make_cache(true_type
)
532 for (unsigned __i
= 0; __i
< _M_cache
.size(); __i
++)
533 _M_cache
[__i
] = _M_apply(static_cast<_CharT
>(__i
), false_type());
537 _M_make_cache(false_type
)
541 std::vector
<_CharT
> _M_char_set
;
542 std::vector
<_StringT
> _M_equiv_set
;
543 std::vector
<pair
<_StrTransT
, _StrTransT
>> _M_range_set
;
544 std::vector
<_CharClassT
> _M_neg_class_set
;
545 _CharClassT _M_class_set
;
546 _TransT _M_translator
;
547 const _TraitsT
& _M_traits
;
548 bool _M_is_non_matching
;
550 #ifdef _GLIBCXX_DEBUG
551 bool _M_is_ready
= false;
556 } // namespace __detail
557 _GLIBCXX_END_NAMESPACE_VERSION
560 #include <bits/regex_compiler.tcc>