2015-05-05 Yvan Roux <yvan.roux@linaro.org>
[official-gcc.git] / libstdc++-v3 / include / bits / regex.tcc
blobfedc2b9edff898f33177750e51d4886e89deaf08
1 // class template regex -*- C++ -*-
3 // Copyright (C) 2013-2015 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
25 /**
26  *  @file bits/regex.tcc
27  *  This is an internal header file, included by other library headers.
28  *  Do not attempt to use it directly. @headername{regex}
29  */
31 namespace std _GLIBCXX_VISIBILITY(default)
33 namespace __detail
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
37   // Result of merging regex_match and regex_search.
38   //
39   // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
40   // the other one if possible, for test purpose).
41   //
42   // That __match_mode is true means regex_match, else regex_search.
43   template<typename _BiIter, typename _Alloc,
44            typename _CharT, typename _TraitsT,
45            _RegexExecutorPolicy __policy,
46            bool __match_mode>
47     bool
48     __regex_algo_impl(_BiIter                              __s,
49                       _BiIter                              __e,
50                       match_results<_BiIter, _Alloc>&      __m,
51                       const basic_regex<_CharT, _TraitsT>& __re,
52                       regex_constants::match_flag_type     __flags)
53     {
54       if (__re._M_automaton == nullptr)
55         return false;
57       typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
58       __m._M_begin = __s;
59       __m._M_resize(__re._M_automaton->_M_sub_count());
60       for (auto& __it : __res)
61         __it.matched = false;
63       bool __ret;
64       if ((__re.flags() & regex_constants::__polynomial)
65           || (__policy == _RegexExecutorPolicy::_S_alternate
66               && !__re._M_automaton->_M_has_backref))
67         {
68           _Executor<_BiIter, _Alloc, _TraitsT, false>
69             __executor(__s, __e, __m, __re, __flags);
70           if (__match_mode)
71             __ret = __executor._M_match();
72           else
73             __ret = __executor._M_search();
74         }
75       else
76         {
77           _Executor<_BiIter, _Alloc, _TraitsT, true>
78             __executor(__s, __e, __m, __re, __flags);
79           if (__match_mode)
80             __ret = __executor._M_match();
81           else
82             __ret = __executor._M_search();
83         }
84       if (__ret)
85         {
86           for (auto& __it : __res)
87             if (!__it.matched)
88               __it.first = __it.second = __e;
89           auto& __pre = __m._M_prefix();
90           auto& __suf = __m._M_suffix();
91           if (__match_mode)
92             {
93               __pre.matched = false;
94               __pre.first = __s;
95               __pre.second = __s;
96               __suf.matched = false;
97               __suf.first = __e;
98               __suf.second = __e;
99             }
100           else
101             {
102               __pre.first = __s;
103               __pre.second = __res[0].first;
104               __pre.matched = (__pre.first != __pre.second);
105               __suf.first = __res[0].second;
106               __suf.second = __e;
107               __suf.matched = (__suf.first != __suf.second);
108             }
109         }
110       else
111         {
112           __m._M_resize(0);
113           for (auto& __it : __res)
114             {
115               __it.matched = false;
116               __it.first = __it.second = __e;
117             }
118         }
119       return __ret;
120     }
122 _GLIBCXX_END_NAMESPACE_VERSION
125 _GLIBCXX_BEGIN_NAMESPACE_VERSION
127   template<typename _Ch_type>
128   template<typename _Fwd_iter>
129     typename regex_traits<_Ch_type>::string_type
130     regex_traits<_Ch_type>::
131     lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
132     {
133       typedef std::ctype<char_type> __ctype_type;
134       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
136       static const char* __collatenames[] =
137         {
138           "NUL",
139           "SOH",
140           "STX",
141           "ETX",
142           "EOT",
143           "ENQ",
144           "ACK",
145           "alert",
146           "backspace",
147           "tab",
148           "newline",
149           "vertical-tab",
150           "form-feed",
151           "carriage-return",
152           "SO",
153           "SI",
154           "DLE",
155           "DC1",
156           "DC2",
157           "DC3",
158           "DC4",
159           "NAK",
160           "SYN",
161           "ETB",
162           "CAN",
163           "EM",
164           "SUB",
165           "ESC",
166           "IS4",
167           "IS3",
168           "IS2",
169           "IS1",
170           "space",
171           "exclamation-mark",
172           "quotation-mark",
173           "number-sign",
174           "dollar-sign",
175           "percent-sign",
176           "ampersand",
177           "apostrophe",
178           "left-parenthesis",
179           "right-parenthesis",
180           "asterisk",
181           "plus-sign",
182           "comma",
183           "hyphen",
184           "period",
185           "slash",
186           "zero",
187           "one",
188           "two",
189           "three",
190           "four",
191           "five",
192           "six",
193           "seven",
194           "eight",
195           "nine",
196           "colon",
197           "semicolon",
198           "less-than-sign",
199           "equals-sign",
200           "greater-than-sign",
201           "question-mark",
202           "commercial-at",
203           "A",
204           "B",
205           "C",
206           "D",
207           "E",
208           "F",
209           "G",
210           "H",
211           "I",
212           "J",
213           "K",
214           "L",
215           "M",
216           "N",
217           "O",
218           "P",
219           "Q",
220           "R",
221           "S",
222           "T",
223           "U",
224           "V",
225           "W",
226           "X",
227           "Y",
228           "Z",
229           "left-square-bracket",
230           "backslash",
231           "right-square-bracket",
232           "circumflex",
233           "underscore",
234           "grave-accent",
235           "a",
236           "b",
237           "c",
238           "d",
239           "e",
240           "f",
241           "g",
242           "h",
243           "i",
244           "j",
245           "k",
246           "l",
247           "m",
248           "n",
249           "o",
250           "p",
251           "q",
252           "r",
253           "s",
254           "t",
255           "u",
256           "v",
257           "w",
258           "x",
259           "y",
260           "z",
261           "left-curly-bracket",
262           "vertical-line",
263           "right-curly-bracket",
264           "tilde",
265           "DEL",
266         };
268       string __s;
269       for (; __first != __last; ++__first)
270         __s += __fctyp.narrow(*__first, 0);
272       for (const auto& __it : __collatenames)
273         if (__s == __it)
274           return string_type(1, __fctyp.widen(
275             static_cast<char>(&__it - __collatenames)));
277       // TODO Add digraph support:
278       // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
280       return string_type();
281     }
283   template<typename _Ch_type>
284   template<typename _Fwd_iter>
285     typename regex_traits<_Ch_type>::char_class_type
286     regex_traits<_Ch_type>::
287     lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
288     {
289       typedef std::ctype<char_type> __ctype_type;
290       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
292       // Mappings from class name to class mask.
293       static const pair<const char*, char_class_type> __classnames[] =
294       {
295         {"d", ctype_base::digit},
296         {"w", {ctype_base::alnum, _RegexMask::_S_under}},
297         {"s", ctype_base::space},
298         {"alnum", ctype_base::alnum},
299         {"alpha", ctype_base::alpha},
300         {"blank", ctype_base::blank},
301         {"cntrl", ctype_base::cntrl},
302         {"digit", ctype_base::digit},
303         {"graph", ctype_base::graph},
304         {"lower", ctype_base::lower},
305         {"print", ctype_base::print},
306         {"punct", ctype_base::punct},
307         {"space", ctype_base::space},
308         {"upper", ctype_base::upper},
309         {"xdigit", ctype_base::xdigit},
310       };
312       string __s;
313       for (; __first != __last; ++__first)
314         __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
316       for (const auto& __it : __classnames)
317         if (__s == __it.first)
318           {
319             if (__icase
320                 && ((__it.second
321                      & (ctype_base::lower | ctype_base::upper)) != 0))
322               return ctype_base::alpha;
323             return __it.second;
324           }
325       return 0;
326     }
328   template<typename _Ch_type>
329     bool
330     regex_traits<_Ch_type>::
331     isctype(_Ch_type __c, char_class_type __f) const
332     {
333       typedef std::ctype<char_type> __ctype_type;
334       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
336       return __fctyp.is(__f._M_base, __c)
337         // [[:w:]]
338         || ((__f._M_extended & _RegexMask::_S_under)
339             && __c == __fctyp.widen('_'));
340     }
342   template<typename _Ch_type>
343     int
344     regex_traits<_Ch_type>::
345     value(_Ch_type __ch, int __radix) const
346     {
347       std::basic_istringstream<char_type> __is(string_type(1, __ch));
348       long __v;
349       if (__radix == 8)
350         __is >> std::oct;
351       else if (__radix == 16)
352         __is >> std::hex;
353       __is >> __v;
354       return __is.fail() ? -1 : __v;
355     }
357   template<typename _Bi_iter, typename _Alloc>
358   template<typename _Out_iter>
359     _Out_iter match_results<_Bi_iter, _Alloc>::
360     format(_Out_iter __out,
361            const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
362            const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
363            match_flag_type __flags) const
364     {
365       _GLIBCXX_DEBUG_ASSERT( ready() );
366       regex_traits<char_type> __traits;
367       typedef std::ctype<char_type> __ctype_type;
368       const __ctype_type&
369         __fctyp(use_facet<__ctype_type>(__traits.getloc()));
371       auto __output = [&](size_t __idx)
372         {
373           auto& __sub = (*this)[__idx];
374           if (__sub.matched)
375             __out = std::copy(__sub.first, __sub.second, __out);
376         };
378       if (__flags & regex_constants::format_sed)
379         {
380           for (; __fmt_first != __fmt_last;)
381             if (*__fmt_first == '&')
382               {
383                 __output(0);
384                 ++__fmt_first;
385               }
386             else if (*__fmt_first == '\\')
387               {
388                 if (++__fmt_first != __fmt_last
389                     && __fctyp.is(__ctype_type::digit, *__fmt_first))
390                   __output(__traits.value(*__fmt_first++, 10));
391                 else
392                   *__out++ = '\\';
393               }
394             else
395               *__out++ = *__fmt_first++;
396         }
397       else
398         {
399           while (1)
400             {
401               auto __next = std::find(__fmt_first, __fmt_last, '$');
402               if (__next == __fmt_last)
403                 break;
405               __out = std::copy(__fmt_first, __next, __out);
407               auto __eat = [&](char __ch) -> bool
408                 {
409                   if (*__next == __ch)
410                     {
411                       ++__next;
412                       return true;
413                     }
414                   return false;
415                 };
417               if (++__next == __fmt_last)
418                 *__out++ = '$';
419               else if (__eat('$'))
420                 *__out++ = '$';
421               else if (__eat('&'))
422                 __output(0);
423               else if (__eat('`'))
424                 {
425                   auto& __sub = _M_prefix();
426                   if (__sub.matched)
427                     __out = std::copy(__sub.first, __sub.second, __out);
428                 }
429               else if (__eat('\''))
430                 {
431                   auto& __sub = _M_suffix();
432                   if (__sub.matched)
433                     __out = std::copy(__sub.first, __sub.second, __out);
434                 }
435               else if (__fctyp.is(__ctype_type::digit, *__next))
436                 {
437                   long __num = __traits.value(*__next, 10);
438                   if (++__next != __fmt_last
439                       && __fctyp.is(__ctype_type::digit, *__next))
440                     {
441                       __num *= 10;
442                       __num += __traits.value(*__next++, 10);
443                     }
444                   if (0 <= __num && __num < this->size())
445                     __output(__num);
446                 }
447               else
448                 *__out++ = '$';
449               __fmt_first = __next;
450             }
451           __out = std::copy(__fmt_first, __fmt_last, __out);
452         }
453       return __out;
454     }
456   template<typename _Out_iter, typename _Bi_iter,
457            typename _Rx_traits, typename _Ch_type>
458     _Out_iter
459     regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
460                   const basic_regex<_Ch_type, _Rx_traits>& __e,
461                   const _Ch_type* __fmt,
462                   regex_constants::match_flag_type __flags)
463     {
464       typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
465       _IterT __i(__first, __last, __e, __flags);
466       _IterT __end;
467       if (__i == __end)
468         {
469           if (!(__flags & regex_constants::format_no_copy))
470             __out = std::copy(__first, __last, __out);
471         }
472       else
473         {
474           sub_match<_Bi_iter> __last;
475           auto __len = char_traits<_Ch_type>::length(__fmt);
476           for (; __i != __end; ++__i)
477             {
478               if (!(__flags & regex_constants::format_no_copy))
479                 __out = std::copy(__i->prefix().first, __i->prefix().second,
480                                   __out);
481               __out = __i->format(__out, __fmt, __fmt + __len, __flags);
482               __last = __i->suffix();
483               if (__flags & regex_constants::format_first_only)
484                 break;
485             }
486           if (!(__flags & regex_constants::format_no_copy))
487             __out = std::copy(__last.first, __last.second, __out);
488         }
489       return __out;
490     }
492   template<typename _Bi_iter,
493            typename _Ch_type,
494            typename _Rx_traits>
495     bool
496     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
497     operator==(const regex_iterator& __rhs) const
498     {
499       return (_M_match.empty() && __rhs._M_match.empty())
500         || (_M_begin == __rhs._M_begin
501             && _M_end == __rhs._M_end
502             && _M_pregex == __rhs._M_pregex
503             && _M_flags == __rhs._M_flags
504             && _M_match[0] == __rhs._M_match[0]);
505     }
507   template<typename _Bi_iter,
508            typename _Ch_type,
509            typename _Rx_traits>
510     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
511     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
512     operator++()
513     {
514       // In all cases in which the call to regex_search returns true,
515       // match.prefix().first shall be equal to the previous value of
516       // match[0].second, and for each index i in the half-open range
517       // [0, match.size()) for which match[i].matched is true,
518       // match[i].position() shall return distance(begin, match[i].first).
519       // [28.12.1.4.5]
520       if (_M_match[0].matched)
521         {
522           auto __start = _M_match[0].second;
523           auto __prefix_first = _M_match[0].second;
524           if (_M_match[0].first == _M_match[0].second)
525             {
526               if (__start == _M_end)
527                 {
528                   _M_match = value_type();
529                   return *this;
530                 }
531               else
532                 {
533                   if (regex_search(__start, _M_end, _M_match, *_M_pregex,
534                                    _M_flags
535                                    | regex_constants::match_not_null
536                                    | regex_constants::match_continuous))
537                     {
538                       _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
539                       auto& __prefix = _M_match._M_prefix();
540                       __prefix.first = __prefix_first;
541                       __prefix.matched = __prefix.first != __prefix.second;
542                       // [28.12.1.4.5]
543                       _M_match._M_begin = _M_begin;
544                       return *this;
545                     }
546                   else
547                     ++__start;
548                 }
549             }
550           _M_flags |= regex_constants::match_prev_avail;
551           if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
552             {
553               _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
554               auto& __prefix = _M_match._M_prefix();
555               __prefix.first = __prefix_first;
556               __prefix.matched = __prefix.first != __prefix.second;
557               // [28.12.1.4.5]
558               _M_match._M_begin = _M_begin;
559             }
560           else
561             _M_match = value_type();
562         }
563       return *this;
564     }
566   template<typename _Bi_iter,
567            typename _Ch_type,
568            typename _Rx_traits>
569     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
570     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
571     operator=(const regex_token_iterator& __rhs)
572     {
573       _M_position = __rhs._M_position;
574       _M_subs = __rhs._M_subs;
575       _M_n = __rhs._M_n;
576       _M_suffix = __rhs._M_suffix;
577       _M_has_m1 = __rhs._M_has_m1;
578       _M_normalize_result();
579       return *this;
580     }
582   template<typename _Bi_iter,
583            typename _Ch_type,
584            typename _Rx_traits>
585     bool
586     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
587     operator==(const regex_token_iterator& __rhs) const
588     {
589       if (_M_end_of_seq() && __rhs._M_end_of_seq())
590         return true;
591       if (_M_suffix.matched && __rhs._M_suffix.matched
592           && _M_suffix == __rhs._M_suffix)
593         return true;
594       if (_M_end_of_seq() || _M_suffix.matched
595           || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
596         return false;
597       return _M_position == __rhs._M_position
598         && _M_n == __rhs._M_n
599         && _M_subs == __rhs._M_subs;
600     }
602   template<typename _Bi_iter,
603            typename _Ch_type,
604            typename _Rx_traits>
605     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
606     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
607     operator++()
608     {
609       _Position __prev = _M_position;
610       if (_M_suffix.matched)
611         *this = regex_token_iterator();
612       else if (_M_n + 1 < _M_subs.size())
613         {
614           _M_n++;
615           _M_result = &_M_current_match();
616         }
617       else
618         {
619           _M_n = 0;
620           ++_M_position;
621           if (_M_position != _Position())
622             _M_result = &_M_current_match();
623           else if (_M_has_m1 && __prev->suffix().length() != 0)
624             {
625               _M_suffix.matched = true;
626               _M_suffix.first = __prev->suffix().first;
627               _M_suffix.second = __prev->suffix().second;
628               _M_result = &_M_suffix;
629             }
630           else
631             *this = regex_token_iterator();
632         }
633       return *this;
634     }
636   template<typename _Bi_iter,
637            typename _Ch_type,
638            typename _Rx_traits>
639     void
640     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
641     _M_init(_Bi_iter __a, _Bi_iter __b)
642     {
643       _M_has_m1 = false;
644       for (auto __it : _M_subs)
645         if (__it == -1)
646           {
647             _M_has_m1 = true;
648             break;
649           }
650       if (_M_position != _Position())
651         _M_result = &_M_current_match();
652       else if (_M_has_m1)
653         {
654           _M_suffix.matched = true;
655           _M_suffix.first = __a;
656           _M_suffix.second = __b;
657           _M_result = &_M_suffix;
658         }
659       else
660         _M_result = nullptr;
661     }
663 _GLIBCXX_END_NAMESPACE_VERSION
664 } // namespace