1 // class template regex -*- C++ -*-
3 // Copyright (C) 2010-2018 Free Software Foundation, Inc.
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
26 * @file bits/regex_compiler.h
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
31 namespace std
_GLIBCXX_VISIBILITY(default)
33 _GLIBCXX_BEGIN_NAMESPACE_VERSION
34 _GLIBCXX_BEGIN_NAMESPACE_CXX11
39 _GLIBCXX_END_NAMESPACE_CXX11
44 * @addtogroup regex-detail
48 template<typename
, bool, bool>
49 struct _BracketMatcher
;
52 * @brief Builds an NFA from an input iterator range.
54 * The %_TraitsT type should fulfill requirements [28.3].
56 template<typename _TraitsT
>
60 typedef typename
_TraitsT::char_type _CharT
;
61 typedef const _CharT
* _IterT
;
62 typedef _NFA
<_TraitsT
> _RegexT
;
63 typedef regex_constants::syntax_option_type _FlagT
;
65 _Compiler(_IterT __b
, _IterT __e
,
66 const typename
_TraitsT::locale_type
& __traits
, _FlagT __flags
);
68 shared_ptr
<const _RegexT
>
70 { return std::move(_M_nfa
); }
73 typedef _Scanner
<_CharT
> _ScannerT
;
74 typedef typename
_TraitsT::string_type _StringT
;
75 typedef typename
_ScannerT::_TokenT _TokenT
;
76 typedef _StateSeq
<_TraitsT
> _StateSeqT
;
77 typedef std::stack
<_StateSeqT
> _StackT
;
78 typedef std::ctype
<_CharT
> _CtypeT
;
80 // accepts a specific token or returns false.
82 _M_match_token(_TokenT __token
);
103 _M_bracket_expression();
105 template<bool __icase
, bool __collate
>
107 _M_insert_any_matcher_ecma();
109 template<bool __icase
, bool __collate
>
111 _M_insert_any_matcher_posix();
113 template<bool __icase
, bool __collate
>
115 _M_insert_char_matcher();
117 template<bool __icase
, bool __collate
>
119 _M_insert_character_class_matcher();
121 template<bool __icase
, bool __collate
>
123 _M_insert_bracket_matcher(bool __neg
);
125 // Returns true if successfully matched one term and should continue.
126 // Returns false if the compiler should move on.
127 template<bool __icase
, bool __collate
>
129 _M_expression_term(pair
<bool, _CharT
>& __last_char
,
130 _BracketMatcher
<_TraitsT
, __icase
, __collate
>&
134 _M_cur_int_value(int __radix
);
142 auto ret
= _M_stack
.top();
148 _ScannerT _M_scanner
;
149 shared_ptr
<_RegexT
> _M_nfa
;
152 const _TraitsT
& _M_traits
;
153 const _CtypeT
& _M_ctype
;
156 template<typename _Tp
>
157 struct __has_contiguous_iter
: std::false_type
{ };
159 template<typename _Ch
, typename _Tr
, typename _Alloc
>
160 struct __has_contiguous_iter
<std::basic_string
<_Ch
, _Tr
, _Alloc
>>
164 template<typename _Tp
, typename _Alloc
>
165 struct __has_contiguous_iter
<std::vector
<_Tp
, _Alloc
>>
169 template<typename _Tp
>
170 struct __is_contiguous_normal_iter
: std::false_type
{ };
172 template<typename _CharT
>
173 struct __is_contiguous_normal_iter
<_CharT
*> : std::true_type
{ };
175 template<typename _Tp
, typename _Cont
>
177 __is_contiguous_normal_iter
<__gnu_cxx::__normal_iterator
<_Tp
, _Cont
>>
178 : __has_contiguous_iter
<_Cont
>::type
181 template<typename _Iter
, typename _TraitsT
>
182 using __enable_if_contiguous_normal_iter
183 = typename enable_if
< __is_contiguous_normal_iter
<_Iter
>::value
,
184 std::shared_ptr
<const _NFA
<_TraitsT
>> >::type
;
186 template<typename _Iter
, typename _TraitsT
>
187 using __disable_if_contiguous_normal_iter
188 = typename enable_if
< !__is_contiguous_normal_iter
<_Iter
>::value
,
189 std::shared_ptr
<const _NFA
<_TraitsT
>> >::type
;
191 template<typename _TraitsT
, typename _FwdIter
>
192 inline __enable_if_contiguous_normal_iter
<_FwdIter
, _TraitsT
>
193 __compile_nfa(_FwdIter __first
, _FwdIter __last
,
194 const typename
_TraitsT::locale_type
& __loc
,
195 regex_constants::syntax_option_type __flags
)
197 size_t __len
= __last
- __first
;
198 const auto* __cfirst
= __len
? std::__addressof(*__first
) : nullptr;
199 using _Cmplr
= _Compiler
<_TraitsT
>;
200 return _Cmplr(__cfirst
, __cfirst
+ __len
, __loc
, __flags
)._M_get_nfa();
203 template<typename _TraitsT
, typename _FwdIter
>
204 inline __disable_if_contiguous_normal_iter
<_FwdIter
, _TraitsT
>
205 __compile_nfa(_FwdIter __first
, _FwdIter __last
,
206 const typename
_TraitsT::locale_type
& __loc
,
207 regex_constants::syntax_option_type __flags
)
209 const basic_string
<typename
_TraitsT::char_type
> __str(__first
, __last
);
210 return __compile_nfa
<_TraitsT
>(__str
.data(), __str
.data() + __str
.size(),
215 template<typename _TraitsT
, bool __icase
, bool __collate
>
216 class _RegexTranslatorBase
219 typedef typename
_TraitsT::char_type _CharT
;
220 typedef typename
_TraitsT::string_type _StringT
;
221 typedef _StringT _StrTransT
;
224 _RegexTranslatorBase(const _TraitsT
& __traits
)
225 : _M_traits(__traits
)
229 _M_translate(_CharT __ch
) const
232 return _M_traits
.translate_nocase(__ch
);
234 return _M_traits
.translate(__ch
);
240 _M_transform(_CharT __ch
) const
242 _StrTransT
__str(1, __ch
);
243 return _M_traits
.transform(__str
.begin(), __str
.end());
246 // See LWG 523. It's not efficiently implementable when _TraitsT is not
247 // std::regex_traits<>, and __collate is true. See specializations for
248 // implementations of other cases.
250 _M_match_range(const _StrTransT
& __first
, const _StrTransT
& __last
,
251 const _StrTransT
& __s
) const
252 { return __first
<= __s
&& __s
<= __last
; }
255 bool _M_in_range_icase(_CharT __first
, _CharT __last
, _CharT __ch
) const
257 typedef std::ctype
<_CharT
> __ctype_type
;
258 const auto& __fctyp
= use_facet
<__ctype_type
>(this->_M_traits
.getloc());
259 auto __lower
= __fctyp
.tolower(__ch
);
260 auto __upper
= __fctyp
.toupper(__ch
);
261 return (__first
<= __lower
&& __lower
<= __last
)
262 || (__first
<= __upper
&& __upper
<= __last
);
265 const _TraitsT
& _M_traits
;
268 template<typename _TraitsT
, bool __icase
, bool __collate
>
269 class _RegexTranslator
270 : public _RegexTranslatorBase
<_TraitsT
, __icase
, __collate
>
273 typedef _RegexTranslatorBase
<_TraitsT
, __icase
, __collate
> _Base
;
277 template<typename _TraitsT
, bool __icase
>
278 class _RegexTranslator
<_TraitsT
, __icase
, false>
279 : public _RegexTranslatorBase
<_TraitsT
, __icase
, false>
282 typedef _RegexTranslatorBase
<_TraitsT
, __icase
, false> _Base
;
283 typedef typename
_Base::_CharT _CharT
;
284 typedef _CharT _StrTransT
;
289 _M_transform(_CharT __ch
) const
293 _M_match_range(_CharT __first
, _CharT __last
, _CharT __ch
) const
296 return __first
<= __ch
&& __ch
<= __last
;
297 return this->_M_in_range_icase(__first
, __last
, __ch
);
301 template<typename _CharType
>
302 class _RegexTranslator
<std::regex_traits
<_CharType
>, true, true>
303 : public _RegexTranslatorBase
<std::regex_traits
<_CharType
>, true, true>
306 typedef _RegexTranslatorBase
<std::regex_traits
<_CharType
>, true, true>
308 typedef typename
_Base::_CharT _CharT
;
309 typedef typename
_Base::_StrTransT _StrTransT
;
314 _M_match_range(const _StrTransT
& __first
, const _StrTransT
& __last
,
315 const _StrTransT
& __str
) const
317 __glibcxx_assert(__first
.size() == 1);
318 __glibcxx_assert(__last
.size() == 1);
319 __glibcxx_assert(__str
.size() == 1);
320 return this->_M_in_range_icase(__first
[0], __last
[0], __str
[0]);
324 template<typename _TraitsT
>
325 class _RegexTranslator
<_TraitsT
, false, false>
328 typedef typename
_TraitsT::char_type _CharT
;
329 typedef _CharT _StrTransT
;
332 _RegexTranslator(const _TraitsT
&)
336 _M_translate(_CharT __ch
) const
340 _M_transform(_CharT __ch
) const
344 _M_match_range(_CharT __first
, _CharT __last
, _CharT __ch
) const
345 { return __first
<= __ch
&& __ch
<= __last
; }
348 template<typename _TraitsT
, bool __is_ecma
, bool __icase
, bool __collate
>
351 template<typename _TraitsT
, bool __icase
, bool __collate
>
352 struct _AnyMatcher
<_TraitsT
, false, __icase
, __collate
>
354 typedef _RegexTranslator
<_TraitsT
, __icase
, __collate
> _TransT
;
355 typedef typename
_TransT::_CharT _CharT
;
358 _AnyMatcher(const _TraitsT
& __traits
)
359 : _M_translator(__traits
)
363 operator()(_CharT __ch
) const
365 static auto __nul
= _M_translator
._M_translate('\0');
366 return _M_translator
._M_translate(__ch
) != __nul
;
369 _TransT _M_translator
;
372 template<typename _TraitsT
, bool __icase
, bool __collate
>
373 struct _AnyMatcher
<_TraitsT
, true, __icase
, __collate
>
375 typedef _RegexTranslator
<_TraitsT
, __icase
, __collate
> _TransT
;
376 typedef typename
_TransT::_CharT _CharT
;
379 _AnyMatcher(const _TraitsT
& __traits
)
380 : _M_translator(__traits
)
384 operator()(_CharT __ch
) const
385 { return _M_apply(__ch
, typename is_same
<_CharT
, char>::type()); }
388 _M_apply(_CharT __ch
, true_type
) const
390 auto __c
= _M_translator
._M_translate(__ch
);
391 auto __n
= _M_translator
._M_translate('\n');
392 auto __r
= _M_translator
._M_translate('\r');
393 return __c
!= __n
&& __c
!= __r
;
397 _M_apply(_CharT __ch
, false_type
) const
399 auto __c
= _M_translator
._M_translate(__ch
);
400 auto __n
= _M_translator
._M_translate('\n');
401 auto __r
= _M_translator
._M_translate('\r');
402 auto __u2028
= _M_translator
._M_translate(u
'\u2028');
403 auto __u2029
= _M_translator
._M_translate(u
'\u2029');
404 return __c
!= __n
&& __c
!= __r
&& __c
!= __u2028
&& __c
!= __u2029
;
407 _TransT _M_translator
;
410 template<typename _TraitsT
, bool __icase
, bool __collate
>
413 typedef _RegexTranslator
<_TraitsT
, __icase
, __collate
> _TransT
;
414 typedef typename
_TransT::_CharT _CharT
;
416 _CharMatcher(_CharT __ch
, const _TraitsT
& __traits
)
417 : _M_translator(__traits
), _M_ch(_M_translator
._M_translate(__ch
))
421 operator()(_CharT __ch
) const
422 { return _M_ch
== _M_translator
._M_translate(__ch
); }
424 _TransT _M_translator
;
428 /// Matches a character range (bracket expression)
429 template<typename _TraitsT
, bool __icase
, bool __collate
>
430 struct _BracketMatcher
433 typedef _RegexTranslator
<_TraitsT
, __icase
, __collate
> _TransT
;
434 typedef typename
_TransT::_CharT _CharT
;
435 typedef typename
_TransT::_StrTransT _StrTransT
;
436 typedef typename
_TraitsT::string_type _StringT
;
437 typedef typename
_TraitsT::char_class_type _CharClassT
;
440 _BracketMatcher(bool __is_non_matching
,
441 const _TraitsT
& __traits
)
442 : _M_class_set(0), _M_translator(__traits
), _M_traits(__traits
),
443 _M_is_non_matching(__is_non_matching
)
447 operator()(_CharT __ch
) const
449 _GLIBCXX_DEBUG_ASSERT(_M_is_ready
);
450 return _M_apply(__ch
, _UseCache());
454 _M_add_char(_CharT __c
)
456 _M_char_set
.push_back(_M_translator
._M_translate(__c
));
457 _GLIBCXX_DEBUG_ONLY(_M_is_ready
= false);
461 _M_add_collate_element(const _StringT
& __s
)
463 auto __st
= _M_traits
.lookup_collatename(__s
.data(),
464 __s
.data() + __s
.size());
466 __throw_regex_error(regex_constants::error_collate
,
467 "Invalid collate element.");
468 _M_char_set
.push_back(_M_translator
._M_translate(__st
[0]));
469 _GLIBCXX_DEBUG_ONLY(_M_is_ready
= false);
474 _M_add_equivalence_class(const _StringT
& __s
)
476 auto __st
= _M_traits
.lookup_collatename(__s
.data(),
477 __s
.data() + __s
.size());
479 __throw_regex_error(regex_constants::error_collate
,
480 "Invalid equivalence class.");
481 __st
= _M_traits
.transform_primary(__st
.data(),
482 __st
.data() + __st
.size());
483 _M_equiv_set
.push_back(__st
);
484 _GLIBCXX_DEBUG_ONLY(_M_is_ready
= false);
487 // __neg should be true for \D, \S and \W only.
489 _M_add_character_class(const _StringT
& __s
, bool __neg
)
491 auto __mask
= _M_traits
.lookup_classname(__s
.data(),
492 __s
.data() + __s
.size(),
495 __throw_regex_error(regex_constants::error_collate
,
496 "Invalid character class.");
498 _M_class_set
|= __mask
;
500 _M_neg_class_set
.push_back(__mask
);
501 _GLIBCXX_DEBUG_ONLY(_M_is_ready
= false);
505 _M_make_range(_CharT __l
, _CharT __r
)
508 __throw_regex_error(regex_constants::error_range
,
509 "Invalid range in bracket expression.");
510 _M_range_set
.push_back(make_pair(_M_translator
._M_transform(__l
),
511 _M_translator
._M_transform(__r
)));
512 _GLIBCXX_DEBUG_ONLY(_M_is_ready
= false);
518 std::sort(_M_char_set
.begin(), _M_char_set
.end());
519 auto __end
= std::unique(_M_char_set
.begin(), _M_char_set
.end());
520 _M_char_set
.erase(__end
, _M_char_set
.end());
521 _M_make_cache(_UseCache());
522 _GLIBCXX_DEBUG_ONLY(_M_is_ready
= true);
526 // Currently we only use the cache for char
527 typedef typename
std::is_same
<_CharT
, char>::type _UseCache
;
529 static constexpr size_t
532 return 1ul << (sizeof(_CharT
) * __CHAR_BIT__
* int(_UseCache::value
));
536 typedef typename
std::conditional
<_UseCache::value
,
537 std::bitset
<_S_cache_size()>,
538 _Dummy
>::type _CacheT
;
539 typedef typename
std::make_unsigned
<_CharT
>::type _UnsignedCharT
;
542 _M_apply(_CharT __ch
, false_type
) const;
545 _M_apply(_CharT __ch
, true_type
) const
546 { return _M_cache
[static_cast<_UnsignedCharT
>(__ch
)]; }
549 _M_make_cache(true_type
)
551 for (unsigned __i
= 0; __i
< _M_cache
.size(); __i
++)
552 _M_cache
[__i
] = _M_apply(static_cast<_CharT
>(__i
), false_type());
556 _M_make_cache(false_type
)
560 std::vector
<_CharT
> _M_char_set
;
561 std::vector
<_StringT
> _M_equiv_set
;
562 std::vector
<pair
<_StrTransT
, _StrTransT
>> _M_range_set
;
563 std::vector
<_CharClassT
> _M_neg_class_set
;
564 _CharClassT _M_class_set
;
565 _TransT _M_translator
;
566 const _TraitsT
& _M_traits
;
567 bool _M_is_non_matching
;
569 #ifdef _GLIBCXX_DEBUG
570 bool _M_is_ready
= false;
575 } // namespace __detail
576 _GLIBCXX_END_NAMESPACE_VERSION
579 #include <bits/regex_compiler.tcc>