PR debug/81307
[official-gcc.git] / libstdc++-v3 / include / bits / regex.tcc
blobfc2d5846c4675b33fbbc9f02d1a3a66b589d4769
1 // class template regex -*- C++ -*-
3 // Copyright (C) 2013-2017 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
25 /**
26  *  @file bits/regex.tcc
27  *  This is an internal header file, included by other library headers.
28  *  Do not attempt to use it directly. @headername{regex}
29  */
31 namespace std _GLIBCXX_VISIBILITY(default)
33 _GLIBCXX_BEGIN_NAMESPACE_VERSION
35 namespace __detail
37   // Result of merging regex_match and regex_search.
38   //
39   // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
40   // the other one if possible, for test purpose).
41   //
42   // That __match_mode is true means regex_match, else regex_search.
43   template<typename _BiIter, typename _Alloc,
44            typename _CharT, typename _TraitsT,
45            _RegexExecutorPolicy __policy,
46            bool __match_mode>
47     bool
48     __regex_algo_impl(_BiIter                              __s,
49                       _BiIter                              __e,
50                       match_results<_BiIter, _Alloc>&      __m,
51                       const basic_regex<_CharT, _TraitsT>& __re,
52                       regex_constants::match_flag_type     __flags)
53     {
54       if (__re._M_automaton == nullptr)
55         return false;
57       typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
58       __m._M_begin = __s;
59       __m._M_resize(__re._M_automaton->_M_sub_count());
60       for (auto& __it : __res)
61         __it.matched = false;
63       bool __ret;
64       if ((__re.flags() & regex_constants::__polynomial)
65           || (__policy == _RegexExecutorPolicy::_S_alternate
66               && !__re._M_automaton->_M_has_backref))
67         {
68           _Executor<_BiIter, _Alloc, _TraitsT, false>
69             __executor(__s, __e, __m, __re, __flags);
70           if (__match_mode)
71             __ret = __executor._M_match();
72           else
73             __ret = __executor._M_search();
74         }
75       else
76         {
77           _Executor<_BiIter, _Alloc, _TraitsT, true>
78             __executor(__s, __e, __m, __re, __flags);
79           if (__match_mode)
80             __ret = __executor._M_match();
81           else
82             __ret = __executor._M_search();
83         }
84       if (__ret)
85         {
86           for (auto& __it : __res)
87             if (!__it.matched)
88               __it.first = __it.second = __e;
89           auto& __pre = __m._M_prefix();
90           auto& __suf = __m._M_suffix();
91           if (__match_mode)
92             {
93               __pre.matched = false;
94               __pre.first = __s;
95               __pre.second = __s;
96               __suf.matched = false;
97               __suf.first = __e;
98               __suf.second = __e;
99             }
100           else
101             {
102               __pre.first = __s;
103               __pre.second = __res[0].first;
104               __pre.matched = (__pre.first != __pre.second);
105               __suf.first = __res[0].second;
106               __suf.second = __e;
107               __suf.matched = (__suf.first != __suf.second);
108             }
109         }
110       else
111         {
112           __m._M_resize(0);
113           for (auto& __it : __res)
114             {
115               __it.matched = false;
116               __it.first = __it.second = __e;
117             }
118         }
119       return __ret;
120     }
123   template<typename _Ch_type>
124   template<typename _Fwd_iter>
125     typename regex_traits<_Ch_type>::string_type
126     regex_traits<_Ch_type>::
127     lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
128     {
129       typedef std::ctype<char_type> __ctype_type;
130       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
132       static const char* __collatenames[] =
133         {
134           "NUL",
135           "SOH",
136           "STX",
137           "ETX",
138           "EOT",
139           "ENQ",
140           "ACK",
141           "alert",
142           "backspace",
143           "tab",
144           "newline",
145           "vertical-tab",
146           "form-feed",
147           "carriage-return",
148           "SO",
149           "SI",
150           "DLE",
151           "DC1",
152           "DC2",
153           "DC3",
154           "DC4",
155           "NAK",
156           "SYN",
157           "ETB",
158           "CAN",
159           "EM",
160           "SUB",
161           "ESC",
162           "IS4",
163           "IS3",
164           "IS2",
165           "IS1",
166           "space",
167           "exclamation-mark",
168           "quotation-mark",
169           "number-sign",
170           "dollar-sign",
171           "percent-sign",
172           "ampersand",
173           "apostrophe",
174           "left-parenthesis",
175           "right-parenthesis",
176           "asterisk",
177           "plus-sign",
178           "comma",
179           "hyphen",
180           "period",
181           "slash",
182           "zero",
183           "one",
184           "two",
185           "three",
186           "four",
187           "five",
188           "six",
189           "seven",
190           "eight",
191           "nine",
192           "colon",
193           "semicolon",
194           "less-than-sign",
195           "equals-sign",
196           "greater-than-sign",
197           "question-mark",
198           "commercial-at",
199           "A",
200           "B",
201           "C",
202           "D",
203           "E",
204           "F",
205           "G",
206           "H",
207           "I",
208           "J",
209           "K",
210           "L",
211           "M",
212           "N",
213           "O",
214           "P",
215           "Q",
216           "R",
217           "S",
218           "T",
219           "U",
220           "V",
221           "W",
222           "X",
223           "Y",
224           "Z",
225           "left-square-bracket",
226           "backslash",
227           "right-square-bracket",
228           "circumflex",
229           "underscore",
230           "grave-accent",
231           "a",
232           "b",
233           "c",
234           "d",
235           "e",
236           "f",
237           "g",
238           "h",
239           "i",
240           "j",
241           "k",
242           "l",
243           "m",
244           "n",
245           "o",
246           "p",
247           "q",
248           "r",
249           "s",
250           "t",
251           "u",
252           "v",
253           "w",
254           "x",
255           "y",
256           "z",
257           "left-curly-bracket",
258           "vertical-line",
259           "right-curly-bracket",
260           "tilde",
261           "DEL",
262         };
264       string __s;
265       for (; __first != __last; ++__first)
266         __s += __fctyp.narrow(*__first, 0);
268       for (const auto& __it : __collatenames)
269         if (__s == __it)
270           return string_type(1, __fctyp.widen(
271             static_cast<char>(&__it - __collatenames)));
273       // TODO Add digraph support:
274       // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
276       return string_type();
277     }
279   template<typename _Ch_type>
280   template<typename _Fwd_iter>
281     typename regex_traits<_Ch_type>::char_class_type
282     regex_traits<_Ch_type>::
283     lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
284     {
285       typedef std::ctype<char_type> __ctype_type;
286       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
288       // Mappings from class name to class mask.
289       static const pair<const char*, char_class_type> __classnames[] =
290       {
291         {"d", ctype_base::digit},
292         {"w", {ctype_base::alnum, _RegexMask::_S_under}},
293         {"s", ctype_base::space},
294         {"alnum", ctype_base::alnum},
295         {"alpha", ctype_base::alpha},
296         {"blank", ctype_base::blank},
297         {"cntrl", ctype_base::cntrl},
298         {"digit", ctype_base::digit},
299         {"graph", ctype_base::graph},
300         {"lower", ctype_base::lower},
301         {"print", ctype_base::print},
302         {"punct", ctype_base::punct},
303         {"space", ctype_base::space},
304         {"upper", ctype_base::upper},
305         {"xdigit", ctype_base::xdigit},
306       };
308       string __s;
309       for (; __first != __last; ++__first)
310         __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
312       for (const auto& __it : __classnames)
313         if (__s == __it.first)
314           {
315             if (__icase
316                 && ((__it.second
317                      & (ctype_base::lower | ctype_base::upper)) != 0))
318               return ctype_base::alpha;
319             return __it.second;
320           }
321       return 0;
322     }
324   template<typename _Ch_type>
325     bool
326     regex_traits<_Ch_type>::
327     isctype(_Ch_type __c, char_class_type __f) const
328     {
329       typedef std::ctype<char_type> __ctype_type;
330       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
332       return __fctyp.is(__f._M_base, __c)
333         // [[:w:]]
334         || ((__f._M_extended & _RegexMask::_S_under)
335             && __c == __fctyp.widen('_'));
336     }
338   template<typename _Ch_type>
339     int
340     regex_traits<_Ch_type>::
341     value(_Ch_type __ch, int __radix) const
342     {
343       std::basic_istringstream<char_type> __is(string_type(1, __ch));
344       long __v;
345       if (__radix == 8)
346         __is >> std::oct;
347       else if (__radix == 16)
348         __is >> std::hex;
349       __is >> __v;
350       return __is.fail() ? -1 : __v;
351     }
353   template<typename _Bi_iter, typename _Alloc>
354   template<typename _Out_iter>
355     _Out_iter match_results<_Bi_iter, _Alloc>::
356     format(_Out_iter __out,
357            const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
358            const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
359            match_flag_type __flags) const
360     {
361       __glibcxx_assert( ready() );
362       regex_traits<char_type> __traits;
363       typedef std::ctype<char_type> __ctype_type;
364       const __ctype_type&
365         __fctyp(use_facet<__ctype_type>(__traits.getloc()));
367       auto __output = [&](size_t __idx)
368         {
369           auto& __sub = (*this)[__idx];
370           if (__sub.matched)
371             __out = std::copy(__sub.first, __sub.second, __out);
372         };
374       if (__flags & regex_constants::format_sed)
375         {
376           for (; __fmt_first != __fmt_last;)
377             if (*__fmt_first == '&')
378               {
379                 __output(0);
380                 ++__fmt_first;
381               }
382             else if (*__fmt_first == '\\')
383               {
384                 if (++__fmt_first != __fmt_last
385                     && __fctyp.is(__ctype_type::digit, *__fmt_first))
386                   __output(__traits.value(*__fmt_first++, 10));
387                 else
388                   *__out++ = '\\';
389               }
390             else
391               *__out++ = *__fmt_first++;
392         }
393       else
394         {
395           while (1)
396             {
397               auto __next = std::find(__fmt_first, __fmt_last, '$');
398               if (__next == __fmt_last)
399                 break;
401               __out = std::copy(__fmt_first, __next, __out);
403               auto __eat = [&](char __ch) -> bool
404                 {
405                   if (*__next == __ch)
406                     {
407                       ++__next;
408                       return true;
409                     }
410                   return false;
411                 };
413               if (++__next == __fmt_last)
414                 *__out++ = '$';
415               else if (__eat('$'))
416                 *__out++ = '$';
417               else if (__eat('&'))
418                 __output(0);
419               else if (__eat('`'))
420                 {
421                   auto& __sub = _M_prefix();
422                   if (__sub.matched)
423                     __out = std::copy(__sub.first, __sub.second, __out);
424                 }
425               else if (__eat('\''))
426                 {
427                   auto& __sub = _M_suffix();
428                   if (__sub.matched)
429                     __out = std::copy(__sub.first, __sub.second, __out);
430                 }
431               else if (__fctyp.is(__ctype_type::digit, *__next))
432                 {
433                   long __num = __traits.value(*__next, 10);
434                   if (++__next != __fmt_last
435                       && __fctyp.is(__ctype_type::digit, *__next))
436                     {
437                       __num *= 10;
438                       __num += __traits.value(*__next++, 10);
439                     }
440                   if (0 <= __num && __num < this->size())
441                     __output(__num);
442                 }
443               else
444                 *__out++ = '$';
445               __fmt_first = __next;
446             }
447           __out = std::copy(__fmt_first, __fmt_last, __out);
448         }
449       return __out;
450     }
452   template<typename _Out_iter, typename _Bi_iter,
453            typename _Rx_traits, typename _Ch_type>
454     _Out_iter
455     regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
456                   const basic_regex<_Ch_type, _Rx_traits>& __e,
457                   const _Ch_type* __fmt,
458                   regex_constants::match_flag_type __flags)
459     {
460       typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
461       _IterT __i(__first, __last, __e, __flags);
462       _IterT __end;
463       if (__i == __end)
464         {
465           if (!(__flags & regex_constants::format_no_copy))
466             __out = std::copy(__first, __last, __out);
467         }
468       else
469         {
470           sub_match<_Bi_iter> __last;
471           auto __len = char_traits<_Ch_type>::length(__fmt);
472           for (; __i != __end; ++__i)
473             {
474               if (!(__flags & regex_constants::format_no_copy))
475                 __out = std::copy(__i->prefix().first, __i->prefix().second,
476                                   __out);
477               __out = __i->format(__out, __fmt, __fmt + __len, __flags);
478               __last = __i->suffix();
479               if (__flags & regex_constants::format_first_only)
480                 break;
481             }
482           if (!(__flags & regex_constants::format_no_copy))
483             __out = std::copy(__last.first, __last.second, __out);
484         }
485       return __out;
486     }
488   template<typename _Bi_iter,
489            typename _Ch_type,
490            typename _Rx_traits>
491     bool
492     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
493     operator==(const regex_iterator& __rhs) const
494     {
495       if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
496         return true;
497       return _M_pregex == __rhs._M_pregex
498           && _M_begin == __rhs._M_begin
499           && _M_end == __rhs._M_end
500           && _M_flags == __rhs._M_flags
501           && _M_match[0] == __rhs._M_match[0];
502     }
504   template<typename _Bi_iter,
505            typename _Ch_type,
506            typename _Rx_traits>
507     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
508     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
509     operator++()
510     {
511       // In all cases in which the call to regex_search returns true,
512       // match.prefix().first shall be equal to the previous value of
513       // match[0].second, and for each index i in the half-open range
514       // [0, match.size()) for which match[i].matched is true,
515       // match[i].position() shall return distance(begin, match[i].first).
516       // [28.12.1.4.5]
517       if (_M_match[0].matched)
518         {
519           auto __start = _M_match[0].second;
520           auto __prefix_first = _M_match[0].second;
521           if (_M_match[0].first == _M_match[0].second)
522             {
523               if (__start == _M_end)
524                 {
525                   _M_pregex = nullptr;
526                   return *this;
527                 }
528               else
529                 {
530                   if (regex_search(__start, _M_end, _M_match, *_M_pregex,
531                                    _M_flags
532                                    | regex_constants::match_not_null
533                                    | regex_constants::match_continuous))
534                     {
535                       __glibcxx_assert(_M_match[0].matched);
536                       auto& __prefix = _M_match._M_prefix();
537                       __prefix.first = __prefix_first;
538                       __prefix.matched = __prefix.first != __prefix.second;
539                       // [28.12.1.4.5]
540                       _M_match._M_begin = _M_begin;
541                       return *this;
542                     }
543                   else
544                     ++__start;
545                 }
546             }
547           _M_flags |= regex_constants::match_prev_avail;
548           if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
549             {
550               __glibcxx_assert(_M_match[0].matched);
551               auto& __prefix = _M_match._M_prefix();
552               __prefix.first = __prefix_first;
553               __prefix.matched = __prefix.first != __prefix.second;
554               // [28.12.1.4.5]
555               _M_match._M_begin = _M_begin;
556             }
557           else
558             _M_pregex = nullptr;
559         }
560       return *this;
561     }
563   template<typename _Bi_iter,
564            typename _Ch_type,
565            typename _Rx_traits>
566     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
567     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
568     operator=(const regex_token_iterator& __rhs)
569     {
570       _M_position = __rhs._M_position;
571       _M_subs = __rhs._M_subs;
572       _M_n = __rhs._M_n;
573       _M_suffix = __rhs._M_suffix;
574       _M_has_m1 = __rhs._M_has_m1;
575       _M_normalize_result();
576       return *this;
577     }
579   template<typename _Bi_iter,
580            typename _Ch_type,
581            typename _Rx_traits>
582     bool
583     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
584     operator==(const regex_token_iterator& __rhs) const
585     {
586       if (_M_end_of_seq() && __rhs._M_end_of_seq())
587         return true;
588       if (_M_suffix.matched && __rhs._M_suffix.matched
589           && _M_suffix == __rhs._M_suffix)
590         return true;
591       if (_M_end_of_seq() || _M_suffix.matched
592           || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
593         return false;
594       return _M_position == __rhs._M_position
595         && _M_n == __rhs._M_n
596         && _M_subs == __rhs._M_subs;
597     }
599   template<typename _Bi_iter,
600            typename _Ch_type,
601            typename _Rx_traits>
602     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
603     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
604     operator++()
605     {
606       _Position __prev = _M_position;
607       if (_M_suffix.matched)
608         *this = regex_token_iterator();
609       else if (_M_n + 1 < _M_subs.size())
610         {
611           _M_n++;
612           _M_result = &_M_current_match();
613         }
614       else
615         {
616           _M_n = 0;
617           ++_M_position;
618           if (_M_position != _Position())
619             _M_result = &_M_current_match();
620           else if (_M_has_m1 && __prev->suffix().length() != 0)
621             {
622               _M_suffix.matched = true;
623               _M_suffix.first = __prev->suffix().first;
624               _M_suffix.second = __prev->suffix().second;
625               _M_result = &_M_suffix;
626             }
627           else
628             *this = regex_token_iterator();
629         }
630       return *this;
631     }
633   template<typename _Bi_iter,
634            typename _Ch_type,
635            typename _Rx_traits>
636     void
637     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
638     _M_init(_Bi_iter __a, _Bi_iter __b)
639     {
640       _M_has_m1 = false;
641       for (auto __it : _M_subs)
642         if (__it == -1)
643           {
644             _M_has_m1 = true;
645             break;
646           }
647       if (_M_position != _Position())
648         _M_result = &_M_current_match();
649       else if (_M_has_m1)
650         {
651           _M_suffix.matched = true;
652           _M_suffix.first = __a;
653           _M_suffix.second = __b;
654           _M_result = &_M_suffix;
655         }
656       else
657         _M_result = nullptr;
658     }
660 _GLIBCXX_END_NAMESPACE_VERSION
661 } // namespace