Cleanup release tags.
[boost.git] / Version_1_18_3 / boost / boost / regex.hpp
blob1da96a8c9baa3075d0eed1af455c68b9e14bae2f
1 /*
3 * Copyright (c) 1998-2000
4 * Dr John Maddock
6 * Permission to use, copy, modify, distribute and sell this software
7 * and its documentation for any purpose is hereby granted without fee,
8 * provided that the above copyright notice appear in all copies and
9 * that both that copyright notice and this permission notice appear
10 * in supporting documentation. Dr John Maddock makes no representations
11 * about the suitability of this software for any purpose.
12 * It is provided "as is" without express or implied warranty.
17 * LOCATION: see http://www.boost.org for most recent version.
18 * FILE regex.cpp
19 * VERSION 3.02
20 * DESCRIPTION: Declares boost::reg_expression<> and associated
21 * functions and classes. This header is the main
22 * entry point for the template regex code.
26 /* start with C compatability API */
28 #ifndef BOOST_RE_REGEX_HPP
29 #define BOOST_RE_REGEX_HPP
31 #include <boost/cregex.hpp>
33 #ifdef __cplusplus
35 // what follows is all C++ don't include in C builds!!
37 #ifdef BOOST_RE_DEBUG
38 # include <iosfwd>
39 #endif
41 #include <new>
42 #include <boost/re_detail/regex_config.hpp>
43 #if !defined(BOOST_RE_NO_TYPEINFO)
44 #include <typeinfo>
45 #endif
46 #include <cstring>
47 #include <boost/re_detail/regex_stack.hpp>
48 #include <boost/re_detail/regex_raw_buffer.hpp>
49 #include <boost/re_detail/regex_kmp.hpp>
50 #include <boost/pattern_except.hpp>
51 #include <boost/regex_traits.hpp>
52 #include <boost/type_traits.hpp>
55 namespace boost{
57 #ifdef __BORLANDC__
58 #if __BORLANDC__ == 0x530
59 #pragma option push -a4 -b -Ve
60 #elif __BORLANDC__ > 0x530
61 #pragma option push -a8 -b -Ve
62 #endif
63 #endif
65 namespace re_detail{
67 struct re_set_long;
68 struct re_syntax_base;
70 } // namespace re_detail
72 namespace deprecated{
74 // class char_regex_traits_i
75 // provides case insensitive traits classes (deprecated):
76 template <class charT>
77 class char_regex_traits_i : public regex_traits<charT> {};
79 template<>
80 class char_regex_traits_i<char> : public regex_traits<char>
82 public:
83 typedef char char_type;
84 typedef unsigned char uchar_type;
85 typedef unsigned int size_type;
86 typedef regex_traits<char> base_type;
88 char BOOST_RE_CALL translate(char c, bool)const
90 return static_cast<const regex_traits<char>*>(this)->translate(c, true);
94 #ifndef BOOST_RE_NO_WCSTRING
95 template<>
96 class char_regex_traits_i<wchar_t> : public regex_traits<wchar_t>
98 public:
99 typedef wchar_t char_type;
100 typedef unsigned short uchar_type;
101 typedef unsigned int size_type;
102 typedef regex_traits<wchar_t> base_type;
104 wchar_t BOOST_RE_CALL translate(wchar_t c, bool)const
106 return static_cast<const regex_traits<wchar_t>*>(this)->translate(c, true);
108 jm_uintfast32_t BOOST_RE_CALL lookup_classname(const wchar_t* first, const wchar_t* last)const
110 jm_uintfast32_t result = static_cast<const regex_traits<wchar_t>*>(this)->lookup_classname(first, last);
111 if((result & base_type::char_class_upper) == base_type::char_class_upper)
112 result |= base_type::char_class_alpha;
113 return result;
116 #endif
117 } // namespace deprecated
120 namespace re_detail{
122 enum mask_type
124 mask_take = 1,
125 mask_skip = 2,
126 mask_any = mask_skip | mask_take,
127 mask_all = mask_any
130 struct _narrow_type{};
131 struct _wide_type{};
133 template <class charT>
134 class is_byte;
136 template<>
137 class is_byte<char>
139 public:
140 typedef _narrow_type width_type;
143 template<>
144 class is_byte<unsigned char>
146 public:
147 typedef _narrow_type width_type;
150 template<>
151 class is_byte<signed char>
153 public:
154 typedef _narrow_type width_type;
157 template <class charT>
158 class is_byte
160 public:
161 typedef _wide_type width_type;
166 // compiled structures
168 // the following defs describe the format of the compiled string
172 // enum syntax_element_type
173 // describes the type of a record
174 enum syntax_element_type
176 syntax_element_startmark = 0,
177 syntax_element_endmark = syntax_element_startmark + 1,
178 syntax_element_literal = syntax_element_endmark + 1,
179 syntax_element_start_line = syntax_element_literal + 1,
180 syntax_element_end_line = syntax_element_start_line + 1,
181 syntax_element_wild = syntax_element_end_line + 1,
182 syntax_element_match = syntax_element_wild + 1,
183 syntax_element_word_boundary = syntax_element_match + 1,
184 syntax_element_within_word = syntax_element_word_boundary + 1,
185 syntax_element_word_start = syntax_element_within_word + 1,
186 syntax_element_word_end = syntax_element_word_start + 1,
187 syntax_element_buffer_start = syntax_element_word_end + 1,
188 syntax_element_buffer_end = syntax_element_buffer_start + 1,
189 syntax_element_backref = syntax_element_buffer_end + 1,
190 syntax_element_long_set = syntax_element_backref + 1,
191 syntax_element_set = syntax_element_long_set + 1,
192 syntax_element_jump = syntax_element_set + 1,
193 syntax_element_alt = syntax_element_jump + 1,
194 syntax_element_rep = syntax_element_alt + 1,
195 syntax_element_combining = syntax_element_rep + 1,
196 syntax_element_soft_buffer_end = syntax_element_combining + 1,
197 syntax_element_restart_continue = syntax_element_soft_buffer_end + 1
200 #ifdef BOOST_RE_DEBUG
201 // dwa 09/26/00 - This is needed to suppress warnings about an ambiguous conversion
202 std::ostream& operator<<(std::ostream&, syntax_element_type);
203 #endif
205 union offset_type
207 re_syntax_base* p;
208 unsigned i;
212 // struct re_syntax_base
213 // base class for all syntax types:
214 struct re_syntax_base
216 syntax_element_type type;
217 offset_type next;
218 unsigned int can_be_null;
222 // struct re_brace
223 // marks start or end of (...)
224 struct re_brace : public re_syntax_base
226 unsigned int index;
230 // struct re_literal
231 // marks a literal string and
232 // is followed by an array of charT[length]:
233 struct re_literal : public re_syntax_base
235 unsigned int length;
239 // struct re_long_set
240 // provides data for sets [...] containing
241 // wide characters
242 struct re_set_long : public re_syntax_base
244 unsigned int csingles, cranges, cequivalents;
245 jm_uintfast32_t cclasses;
246 bool isnot;
250 // struct re_set
251 // provides a map of bools for sets containing
252 // narrow, single byte characters.
253 struct re_set : public re_syntax_base
255 unsigned char _map[256];
259 // struct re_jump
260 // provides alternative next destination
261 struct re_jump : public re_syntax_base
263 offset_type alt;
264 unsigned char _map[256];
268 // struct re_repeat
269 // provides repeat expressions
270 struct re_repeat : public re_jump
272 unsigned min, max;
273 int id;
274 bool leading;
275 bool greedy;
280 // enum re_jump_size_type
281 // provides compiled size of re_jump
282 // allowing for trailing alignment
283 // provide this so we know how many
284 // bytes to insert
285 enum re_jump_size_type
287 re_jump_size = (sizeof(re_jump) + padding_mask) & ~(padding_mask),
288 re_repeater_size = (sizeof(re_repeat) + padding_mask) & ~(padding_mask)
291 } // namespace re_detail
294 // class basic_regex
295 // handles error codes and flags
297 class BOOST_RE_IX_DECL regbase
299 public:
300 enum flag_type_
302 escape_in_lists = 1, // '\' special inside [...]
303 char_classes = escape_in_lists << 1, // [[:CLASS:]] allowed
304 intervals = char_classes << 1, // {x,y} allowed
305 limited_ops = intervals << 1, // all of + ? and | are normal characters
306 newline_alt = limited_ops << 1, // \n is the same as |
307 bk_plus_qm = newline_alt << 1, // uses \+ and \?
308 bk_braces = bk_plus_qm << 1, // uses \{ and \}
309 bk_parens = bk_braces << 1, // uses \( and \)
310 bk_refs = bk_parens << 1, // \d allowed
311 bk_vbar = bk_refs << 1, // uses \|
313 use_except = bk_vbar << 1, // exception on error
314 failbit = use_except << 1, // error flag
315 literal = failbit << 1, // all characters are literals
316 icase = literal << 1, // characters are matched regardless of case
317 nocollate = icase << 1, // don't use locale specific collation
319 basic = char_classes | intervals | limited_ops | bk_braces | bk_parens | bk_refs,
320 extended = char_classes | intervals | bk_refs,
321 normal = escape_in_lists | char_classes | intervals | bk_refs | nocollate,
322 emacs = bk_braces | bk_parens | bk_refs | bk_vbar,
323 awk = extended | escape_in_lists,
324 grep = basic | newline_alt,
325 egrep = extended | newline_alt,
326 sed = basic,
327 perl = normal
329 typedef unsigned int flag_type;
331 enum restart_info
333 restart_any = 0,
334 restart_word = 1,
335 restart_line = 2,
336 restart_buf = 3,
337 restart_continue = 4,
338 restart_lit = 5,
339 restart_fixed_lit = 6
342 flag_type BOOST_RE_CALL flags()const
344 return _flags;
347 regbase();
348 regbase(const regbase& b);
349 protected:
350 flag_type _flags;
354 // some forward declarations:
355 namespace re_detail{
356 template <class iterator, class Allocator>
357 class _priv_match_data;
359 #if defined(BOOST_NO_STD_ITERATOR_TRAITS) || defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
361 template <class T>
362 struct regex_iterator_traits
364 typedef typename T::iterator_category iterator_category;
365 typedef typename T::value_type value_type;
366 #ifndef BOOST_MSVC
367 typedef typename T::difference_type difference_type;
368 typedef typename T::pointer pointer;
369 typedef typename T::reference reference;
370 #else
371 typedef std::ptrdiff_t difference_type;
372 typedef value_type* pointer;
373 typedef value_type& reference;
374 #endif
377 template <class T>
378 struct pointer_iterator_traits
380 typedef std::ptrdiff_t difference_type;
381 typedef T value_type;
382 typedef T* pointer;
383 typedef T& reference;
384 typedef std::random_access_iterator_tag iterator_category;
386 template <class T>
387 struct const_pointer_iterator_traits
389 typedef std::ptrdiff_t difference_type;
390 typedef T value_type;
391 typedef const T* pointer;
392 typedef const T& reference;
393 typedef std::random_access_iterator_tag iterator_category;
396 template<>
397 struct regex_iterator_traits<char*> : pointer_iterator_traits<char>{};
398 template<>
399 struct regex_iterator_traits<const char*> : const_pointer_iterator_traits<char>{};
400 template<>
401 struct regex_iterator_traits<wchar_t*> : pointer_iterator_traits<wchar_t>{};
402 template<>
403 struct regex_iterator_traits<const wchar_t*> : const_pointer_iterator_traits<wchar_t>{};
405 #if defined(__SGI_STL_PORT) && defined(__STL_DEBUG)
406 template<>
407 struct regex_iterator_traits<std::string::iterator> : pointer_iterator_traits<char>{};
408 template<>
409 struct regex_iterator_traits<std::string::const_iterator> : const_pointer_iterator_traits<char>{};
410 #ifndef BOOST_NO_WSTRING
411 template<>
412 struct regex_iterator_traits<std::wstring::iterator> : pointer_iterator_traits<wchar_t>{};
413 template<>
414 struct regex_iterator_traits<std::wstring::const_iterator> : const_pointer_iterator_traits<wchar_t>{};
415 #endif // BOOST_NO_WSTRING
416 #endif // stport
418 #else
420 template <class T>
421 struct regex_iterator_traits : public std::iterator_traits<T> {};
423 #endif
425 template <class I>
426 struct def_alloc_param_traits
428 typedef typename regex_iterator_traits<I>::value_type const_value_type;
429 typedef typename remove_cv<const_value_type>::type type;
434 template <class iterator, class Allocator BOOST_RE_DEF_ALLOC_PARAM(typename re_detail::def_alloc_param_traits<iterator>::type) >
435 class match_results;
438 // class reg_expression
439 // represents the compiled
440 // regular expression:
443 #if defined(BOOST_RE_NO_TEMPLATE_SWITCH_MERGE) && !defined(BOOST_RE_NO_NAMESPACES)
445 // Ugly ugly hack,
446 // template don't merge if they contain switch statements so declare these
447 // templates in unnamed namespace (ie with internal linkage), each translation
448 // unit then gets its own local copy, it works seemlessly but bloats the app.
449 namespace{
450 #endif
452 template <class charT, class traits BOOST_RE_TRICKY_DEFAULT_PARAM(regex_traits<charT>), class Allocator BOOST_RE_DEF_ALLOC_PARAM(charT) >
453 class reg_expression : public regbase
455 typedef typename traits::size_type traits_size_type;
456 typedef typename traits::uchar_type traits_uchar_type;
457 typedef typename traits::string_type traits_string_type;
458 public:
459 // typedefs:
460 typedef charT char_type;
461 typedef traits traits_type;
463 // locale_type
464 // placeholder for actual locale type used by the
465 // traits class to localise *this.
466 typedef typename traits::locale_type locale_type;
467 // value_type
468 typedef charT value_type;
469 // reference, const_reference
470 typedef charT& reference;
471 typedef const charT& const_reference;
472 // iterator, const_iterator
473 typedef const charT* const_iterator;
474 typedef const_iterator iterator;
475 // difference_type
476 typedef typename Allocator::difference_type difference_type;
477 // size_type
478 typedef typename Allocator::size_type size_type;
479 // allocator_type
480 typedef Allocator allocator_type;
481 typedef Allocator alloc_type;
482 // flag_type
483 typedef regbase::flag_type flag_type;
485 public:
486 explicit reg_expression(const Allocator& a = Allocator());
487 explicit reg_expression(const charT* p, flag_type f = regbase::normal, const Allocator& a = Allocator());
488 reg_expression(const charT* p1, const charT* p2, flag_type f = regbase::normal, const Allocator& a = Allocator());
489 reg_expression(const charT* p, size_type len, flag_type f, const Allocator& a = Allocator());
490 reg_expression(const reg_expression&);
491 ~reg_expression();
492 reg_expression& BOOST_RE_CALL operator=(const reg_expression&);
493 reg_expression& BOOST_RE_CALL operator=(const charT* ptr)
495 set_expression(ptr, regbase::normal | regbase::use_except);
496 return *this;
500 // assign:
501 reg_expression& assign(const reg_expression& that)
502 { return *this = that; }
503 reg_expression& assign(const charT* ptr, flag_type f = regbase::normal)
505 set_expression(ptr, f | regbase::use_except);
506 return *this;
509 reg_expression& assign(const charT* first,
510 const charT* last,
511 flag_type f = regbase::normal)
513 set_expression(first, last, f | regbase::use_except);
514 return *this;
516 #ifndef BOOST_RE_NO_MEMBER_TEMPLATES
518 template <class ST, class SA>
519 unsigned int BOOST_RE_CALL set_expression(const std::basic_string<charT, ST, SA>& p, flag_type f = regbase::normal)
520 { return set_expression(p.data(), p.data() + p.size(), f); }
522 template <class ST, class SA>
523 explicit reg_expression(const std::basic_string<charT, ST, SA>& p, flag_type f = regbase::normal, const Allocator& a = Allocator())
524 : data(a), pkmp(0) { set_expression(p, f); }
526 template <class I>
527 reg_expression(I first, I last, flag_type f = regbase::normal, const Allocator& a = Allocator())
528 : data(a), pkmp(0)
530 size_type len = last-first;
531 scoped_array<charT> a(new charT[len]);
532 std::copy(first, last, a.get());
533 set_expression(a.get(), a.get() + len, f | regbase::use_except);
536 template <class ST, class SA>
537 reg_expression& BOOST_RE_CALL operator=(const std::basic_string<charT, ST, SA>& p)
539 set_expression(p.c_str(), p.c_str() + p.size(), regbase::normal | regbase::use_except);
540 return *this;
543 template <class string_traits, class A>
544 reg_expression& BOOST_RE_CALL assign(
545 const std::basic_string<charT, string_traits, A>& s,
546 flag_type f = regbase::normal)
548 set_expression(p.c_str(), p.c_str() + p.size(), f | regbase::use_except);
549 return *this;
552 template <class fwd_iterator>
553 reg_expression& BOOST_RE_CALL assign(fwd_iterator first,
554 fwd_iterator last,
555 flag_type f = regbase::normal)
557 size_type len = last-first;
558 scoped_array<charT> a(new charT[len]);
559 std::copy(first, last, a.get());
560 set_expression(a.get(), a.get() + len, f | regbase::use_except);
561 return *this;
563 #elif !defined(BOOST_RE_NO_STRING_DEF_ARGS)
564 unsigned int BOOST_RE_CALL set_expression(const std::basic_string<charT>& p, flag_type f = regbase::normal)
565 { return set_expression(p.data(), p.data() + p.size(), f); }
567 reg_expression(const std::basic_string<charT>& p, flag_type f = regbase::normal, const Allocator& a = Allocator())
568 : data(a), pkmp(0) { set_expression(p, f); }
570 reg_expression& BOOST_RE_CALL operator=(const std::basic_string<charT>& p)
572 set_expression(p.c_str(), p.c_str() + p.size(), regbase::normal | regbase::use_except);
573 return *this;
576 reg_expression& BOOST_RE_CALL assign(
577 const std::basic_string<charT>& s,
578 flag_type f = regbase::normal)
580 set_expression(s.c_str(), s.c_str() + s.size(), f | regbase::use_except);
581 return *this;
584 #endif
588 // allocator access:
589 Allocator BOOST_RE_CALL get_allocator()const;
591 // locale:
592 locale_type BOOST_RE_CALL imbue(locale_type l){ return traits_inst.imbue(l); }
593 locale_type BOOST_RE_CALL getloc()const{ return traits_inst.getloc(); }
595 // flags:
596 flag_type BOOST_RE_CALL getflags()const
597 { return flags(); }
599 // str:
600 std::basic_string<charT> BOOST_RE_CALL str()const
601 { return std::basic_string<charT>(_expression, _expression_len); }
603 // begin, end:
604 const_iterator BOOST_RE_CALL begin()const
605 { return _expression; }
606 const_iterator BOOST_RE_CALL end()const
607 { return _expression + _expression_len; }
609 // swap:
610 void BOOST_RE_CALL swap(reg_expression&)throw();
612 // size:
613 size_type BOOST_RE_CALL size()const
614 { return _expression_len; }
616 // max_size:
617 size_type BOOST_RE_CALL max_size()const
618 { return UINT_MAX; }
620 // empty:
621 bool BOOST_RE_CALL empty()const
622 { return this->error_code(); }
624 unsigned BOOST_RE_CALL mark_count()const { return marks; }
625 bool BOOST_RE_CALL operator==(const reg_expression&)const;
626 bool BOOST_RE_CALL operator<(const reg_expression&)const;
628 // The following are deprecated as public interfaces
629 // but are available for compatability with earlier versions.
630 allocator_type BOOST_RE_CALL allocator()const;
631 const charT* BOOST_RE_CALL expression()const { return _expression; }
632 unsigned int BOOST_RE_CALL set_expression(const charT* p, const charT* end, flag_type f = regbase::normal);
633 unsigned int BOOST_RE_CALL set_expression(const charT* p, flag_type f = regbase::normal) { return set_expression(p, p + traits_type::length(p), f); }
635 // this should be private but template friends don't work:
636 const traits_type& get_traits()const { return traits_inst; }
637 unsigned int BOOST_RE_CALL error_code()const
639 return error_code_;
642 private:
643 re_detail::raw_storage<Allocator> data;
644 unsigned _restart_type;
645 unsigned marks;
646 int repeats;
647 unsigned char* startmap;
648 charT* _expression;
649 unsigned _expression_len;
650 unsigned int _leading_len;
651 const charT* _leading_string;
652 unsigned int _leading_string_len;
653 re_detail::kmp_info<charT>* pkmp;
654 traits_type traits_inst;
655 unsigned error_code_;
657 void BOOST_RE_CALL compile_maps();
658 void BOOST_RE_CALL compile_map(re_detail::re_syntax_base* node, unsigned char* _map, unsigned int* pnull, unsigned char mask, re_detail::re_syntax_base* terminal = NULL)const;
659 bool BOOST_RE_CALL probe_start(re_detail::re_syntax_base* node, charT c, re_detail::re_syntax_base* terminal)const;
660 bool BOOST_RE_CALL probe_start_null(re_detail::re_syntax_base* node, re_detail::re_syntax_base* terminal)const;
661 void BOOST_RE_CALL fixup_apply(re_detail::re_syntax_base* b, unsigned cbraces);
662 void BOOST_RE_CALL move_offsets(re_detail::re_syntax_base* j, unsigned size);
663 re_detail::re_syntax_base* BOOST_RE_CALL compile_set(const charT*& first, const charT* last);
664 re_detail::re_syntax_base* BOOST_RE_CALL compile_set_aux(re_detail::jstack<traits_string_type, Allocator>& singles, re_detail::jstack<traits_string_type, Allocator>& ranges, re_detail::jstack<jm_uintfast32_t, Allocator>& classes, re_detail::jstack<traits_string_type, Allocator>& equivalents, bool isnot, const re_detail::_narrow_type&);
665 re_detail::re_syntax_base* BOOST_RE_CALL compile_set_aux(re_detail::jstack<traits_string_type, Allocator>& singles, re_detail::jstack<traits_string_type, Allocator>& ranges, re_detail::jstack<jm_uintfast32_t, Allocator>& classes, re_detail::jstack<traits_string_type, Allocator>& equivalents, bool isnot, const re_detail::_wide_type&);
666 re_detail::re_syntax_base* BOOST_RE_CALL compile_set_simple(re_detail::re_syntax_base* dat, unsigned long cls, bool isnot = false);
667 unsigned int BOOST_RE_CALL parse_inner_set(const charT*& first, const charT* last);
669 re_detail::re_syntax_base* BOOST_RE_CALL add_simple(re_detail::re_syntax_base* dat, re_detail::syntax_element_type type, unsigned int size = sizeof(re_detail::re_syntax_base));
670 re_detail::re_syntax_base* BOOST_RE_CALL add_literal(re_detail::re_syntax_base* dat, charT c);
671 charT BOOST_RE_CALL parse_escape(const charT*& first, const charT* last);
672 void BOOST_RE_CALL parse_range(const charT*& first, const charT* last, unsigned& min, unsigned& max);
673 bool BOOST_RE_CALL skip_space(const charT*& first, const charT* last);
674 unsigned int BOOST_RE_CALL probe_restart(re_detail::re_syntax_base* dat);
675 unsigned int BOOST_RE_CALL fixup_leading_rep(re_detail::re_syntax_base* dat, re_detail::re_syntax_base* end);
676 void BOOST_RE_CALL fail(unsigned int err);
678 protected:
679 static int BOOST_RE_CALL repeat_count(const reg_expression& e)
680 { return e.repeats; }
681 static unsigned int BOOST_RE_CALL restart_type(const reg_expression& e)
682 { return e._restart_type; }
683 static const re_detail::re_syntax_base* BOOST_RE_CALL first(const reg_expression& e)
684 { return (const re_detail::re_syntax_base*)e.data.data(); }
685 static const unsigned char* BOOST_RE_CALL get_map(const reg_expression& e)
686 { return e.startmap; }
687 static unsigned int BOOST_RE_CALL leading_length(const reg_expression& e)
688 { return e._leading_len; }
689 static const re_detail::kmp_info<charT>* get_kmp(const reg_expression& e)
690 { return e.pkmp; }
691 static bool BOOST_RE_CALL can_start(charT c, const unsigned char* _map, unsigned char mask, const re_detail::_wide_type&);
692 static bool BOOST_RE_CALL can_start(charT c, const unsigned char* _map, unsigned char mask, const re_detail::_narrow_type&);
695 template <class charT, class traits, class Allocator>
696 void BOOST_RE_CALL reg_expression<charT, traits, Allocator>::swap(reg_expression& that)throw()
698 // this is not as efficient as it should be,
699 // however swapping traits classes is problematic
700 // so just use 'brute force' method for now:
701 reg_expression<charT, traits, Allocator> e(that);
702 that = *this;
703 *this = e;
707 #if defined(BOOST_RE_NO_TEMPLATE_SWITCH_MERGE) && !defined(BOOST_RE_NO_NAMESPACES)
708 } // namespace
709 #endif
712 // class match_results and match_results_base
713 // handles what matched where
715 template <class iterator>
716 struct sub_match
718 typedef typename re_detail::regex_iterator_traits<iterator>::value_type value_type;
719 #if defined(BOOST_NO_STD_ITERATOR_TRAITS) || defined(BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION)
720 typedef std::ptrdiff_t difference_type;
721 #else
722 typedef typename re_detail::regex_iterator_traits<iterator>::difference_type difference_type;
723 #endif
724 typedef iterator iterator_type;
726 iterator first;
727 iterator second;
728 bool matched;
730 operator std::basic_string<value_type> ()const
732 std::basic_string<value_type> result;
733 unsigned len;
734 BOOST_RE_DISTANCE((iterator)first, (iterator)second, len);
735 result.reserve(len);
736 iterator i = first;
737 while(i != second)
739 result.append(1, *i);
740 ++i;
742 return result;
744 #ifdef BOOST_OLD_REGEX_H
746 // the following are deprecated, do not use!!
748 operator int()const;
749 operator unsigned int()const;
750 operator short()const
752 return (short)(int)(*this);
754 operator unsigned short()const
756 return (unsigned short)(unsigned int)(*this);
758 #endif
759 sub_match() { matched = false; }
760 sub_match(iterator i) : first(i), second(i), matched(false) {}
762 bool operator==(const sub_match& that)const
764 return (first == that.first) && (second == that.second) && (matched == that.matched);
766 bool BOOST_RE_CALL operator !=(const sub_match& that)const
767 { return !(*this == that); }
769 difference_type BOOST_RE_CALL length()const
771 difference_type n;
772 BOOST_RE_DISTANCE((iterator)first, (iterator)second, n);
773 return n;
777 #ifdef BOOST_OLD_REGEX_H
778 namespace re_detail{
779 template <class iterator, class charT>
780 int do_toi(iterator i, iterator j, char c, int radix)
782 std::string s(i, j);
783 char* p;
784 int result = std::strtol(s.c_str(), &p, radix);
785 if(*p)throw bad_pattern("Bad sub-expression");
786 return result;
790 // helper:
791 template <class I, class charT>
792 int do_toi(I& i, I j, charT c)
794 int result = 0;
795 while((i != j) && (isdigit(*i)))
797 result = result*10 + (*i - '0');
798 ++i;
800 return result;
805 template <class iterator>
806 sub_match<iterator>::operator int()const
808 iterator i = first;
809 iterator j = second;
810 if(i == j)throw bad_pattern("Bad sub-expression");
811 int neg = 1;
812 if((i != j) && (*i == '-'))
814 neg = -1;
815 ++i;
817 neg *= re_detail::do_toi(i, j, *i);
818 if(i != j)throw bad_pattern("Bad sub-expression");
819 return neg;
821 template <class iterator>
822 sub_match<iterator>::operator unsigned int()const
824 iterator i = first;
825 iterator j = second;
826 if(i == j)
827 throw bad_pattern("Bad sub-expression");
828 return re_detail::do_toi(i, j, *first);
830 #endif
832 namespace re_detail{
834 template <class iterator, class Allocator BOOST_RE_DEF_ALLOC_PARAM(typename def_alloc_param_traits<iterator>::type) >
835 class match_results_base
837 public:
838 typedef Allocator alloc_type;
839 typedef typename REBIND_TYPE(iterator, Allocator)::size_type size_type;
840 #ifndef BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION
841 typedef typename std::iterator_traits<iterator>::difference_type difference_type;
842 typedef typename std::iterator_traits<iterator>::value_type char_type;
843 #else
844 typedef std::ptrdiff_t difference_type;
845 #endif
846 typedef sub_match<iterator> value_type;
847 typedef iterator iterator_type;
849 protected:
850 typedef BOOST_RE_MAYBE_TYPENAME REBIND_TYPE(char, Allocator) c_alloc;
852 struct c_reference : public c_alloc
854 unsigned int cmatches;
855 unsigned count;
856 sub_match<iterator> head, tail, null;
857 unsigned int lines;
858 iterator line_pos, base;
859 c_reference(const Allocator& a) : c_alloc(a) { }
861 bool operator==(const c_reference& that)const
863 return (cmatches == that.cmatches) &&
864 (count == that.count) &&
865 (head == that.head) &&
866 (tail == that.tail) &&
867 (lines == that.lines) &&
868 (base == that.base);
870 bool operator!=(const c_reference& that)const
871 { return !(*this == that); }
874 c_reference* ref;
876 void BOOST_RE_CALL cow();
878 // protected contructor for derived class...
879 match_results_base(bool){}
880 void BOOST_RE_CALL free();
882 public:
884 match_results_base(const Allocator& a = Allocator());
886 match_results_base(const match_results_base& m)
888 ref = m.ref;
889 ++(ref->count);
892 match_results_base& BOOST_RE_CALL operator=(const match_results_base& m);
894 ~match_results_base()
896 free();
899 size_type BOOST_RE_CALL size()const
901 //return (*this)[0].matched ? ref->cmatches : 0;
902 return ref->cmatches;
905 const sub_match<iterator>& BOOST_RE_CALL operator[](int n) const
907 if((n >= 0) && ((unsigned int)n < ref->cmatches))
908 return *(sub_match<iterator>*)((char*)ref + sizeof(c_reference) + sizeof(sub_match<iterator>)*n);
909 return (n == -1) ? ref->head : (n == -2) ? ref->tail : ref->null;
912 Allocator BOOST_RE_CALL allocator()const;
914 difference_type BOOST_RE_CALL length(unsigned int sub = 0)const
916 jm_assert(ref->cmatches);
917 const sub_match<iterator>& m = (*this)[sub];
918 if(m.matched == false)
919 return 0;
920 difference_type n;
921 BOOST_RE_DISTANCE((iterator)m.first, (iterator)m.second, n);
922 return n;
925 #ifndef BOOST_NO_TEMPLATE_PARTIAL_SPECIALIZATION
926 std::basic_string<value_type> str(int i)const
928 return static_cast<std::basic_string<value_type> >((*this)[i]);
930 #else
931 std::basic_string<char> str(int i)const
933 return static_cast<std::basic_string<char> >((*this)[i]);
935 #endif
937 unsigned int BOOST_RE_CALL line()const
939 return ref->lines;
942 difference_type BOOST_RE_CALL position(unsigned int sub = 0)const
944 jm_assert(ref->cmatches);
945 const sub_match<iterator>& s = (*this)[sub];
946 if(s.matched == false)
947 return -1;
948 difference_type n;
949 BOOST_RE_DISTANCE((iterator)ref->base, (iterator)s.first, n);
950 return n;
953 iterator BOOST_RE_CALL line_start()const
955 return ref->line_pos;
958 void swap(match_results_base& that)
960 c_reference* t = that.ref;
961 that.ref = ref;
962 ref = t;
965 bool operator==(const match_results_base& that)const;
966 bool operator<(const match_results_base& that)const
967 { return position() < that.position(); }
969 friend class match_results<iterator, Allocator>;
971 void BOOST_RE_CALL set_size(size_type n);
972 void BOOST_RE_CALL set_size(size_type n, iterator i, iterator j);
973 void BOOST_RE_CALL maybe_assign(const match_results_base& m);
974 void BOOST_RE_CALL init_fail(iterator i, iterator j);
976 void BOOST_RE_CALL set_first(iterator i);
977 void BOOST_RE_CALL set_first(iterator i, size_t pos);
979 void BOOST_RE_CALL set_second(iterator i)
981 cow();
982 ((sub_match<iterator>*)(ref+1))->second = i;
983 ((sub_match<iterator>*)(ref+1))->matched = true;
984 ref->tail.first = i;
985 ref->tail.matched = (ref->tail.first == ref->tail.second) ? false : true;
988 void BOOST_RE_CALL set_second(iterator i, size_t pos)
990 cow();
991 ((sub_match<iterator>*)((char*)ref + sizeof(c_reference) + sizeof(sub_match<iterator>) * pos))->second = i;
992 ((sub_match<iterator>*)((char*)ref + sizeof(c_reference) + sizeof(sub_match<iterator>) * pos))->matched = true;
993 if(pos == 0)
995 ref->tail.first = i;
996 ref->tail.matched = (ref->tail.first == ref->tail.second) ? false : true;
1000 void BOOST_RE_CALL set_line(unsigned int i, iterator pos)
1002 ref->lines = i;
1003 ref->line_pos = pos;
1006 void BOOST_RE_CALL set_base(iterator pos)
1008 ref->base = pos;
1012 template <class iterator, class Allocator>
1013 void BOOST_RE_CALL match_results_base<iterator, Allocator>::set_first(iterator i)
1015 cow();
1016 ref->head.second = i;
1017 ref->head.matched = (ref->head.first == ref->head.second) ? false : true;
1018 sub_match<iterator>* p1 = (sub_match<iterator>*)(ref+1);
1019 sub_match<iterator>* p2 = p1 + ref->cmatches;
1020 p1->first = i;
1021 p1->matched = false;
1022 ++p1;
1023 while(p1 != p2)
1025 p1->matched = false;
1026 p1->first = ref->tail.second;
1027 p1->second = ref->tail.second;
1028 ++p1;
1032 template <class iterator, class Allocator>
1033 void BOOST_RE_CALL match_results_base<iterator, Allocator>::set_first(iterator i, size_t pos)
1035 cow();
1036 ((sub_match<iterator>*)((char*)ref + sizeof(c_reference) + sizeof(sub_match<iterator>) * pos))->first = i;
1037 if(pos == 0)
1039 ref->head.second = i;
1040 ref->head.matched = (ref->head.first == ref->head.second) ? false : true;
1041 sub_match<iterator>* p1 = (sub_match<iterator>*)(ref+1);
1042 sub_match<iterator>* p2 = p1 + ref->cmatches;
1043 p1->first = i;
1044 p1->matched = false;
1045 ++p1;
1046 while(p1 != p2)
1048 p1->matched = false;
1049 p1->first = ref->tail.second;
1050 p1->second = ref->tail.second;
1051 ++p1;
1057 template <class iterator, class Allocator>
1058 match_results_base<iterator, Allocator>::match_results_base(const Allocator& a)
1060 ref = (c_reference*)c_alloc(a).allocate(sizeof(sub_match<iterator>) + sizeof(c_reference));
1063 new (ref) c_reference(a);
1064 ref->cmatches = 1;
1065 ref->count = 1;
1066 // construct the sub_match<iterator>:
1069 new ((sub_match<iterator>*)(ref+1)) sub_match<iterator>();
1071 catch(...)
1073 jm_destroy(ref);
1074 throw;
1077 catch(...)
1079 c_alloc(a).deallocate((char*)(void*)ref, sizeof(sub_match<iterator>) + sizeof(c_reference));
1080 throw;
1084 template <class iterator, class Allocator>
1085 Allocator BOOST_RE_CALL match_results_base<iterator, Allocator>::allocator()const
1087 return *((c_alloc*)ref);
1090 template <class iterator, class Allocator>
1091 inline match_results_base<iterator, Allocator>& BOOST_RE_CALL match_results_base<iterator, Allocator>::operator=(const match_results_base<iterator, Allocator>& m)
1093 if(ref != m.ref)
1095 free();
1096 ref = m.ref;
1097 ++(ref->count);
1099 return *this;
1103 template <class iterator, class Allocator>
1104 void BOOST_RE_CALL match_results_base<iterator, Allocator>::free()
1106 if(--(ref->count) == 0)
1108 c_alloc a(*ref);
1109 sub_match<iterator>* p1, *p2;
1110 p1 = (sub_match<iterator>*)(ref+1);
1111 p2 = p1 + ref->cmatches;
1112 while(p1 != p2)
1114 jm_destroy(p1);
1115 ++p1;
1117 jm_destroy(ref);
1118 a.deallocate((char*)(void*)ref, sizeof(sub_match<iterator>) * ref->cmatches + sizeof(c_reference));
1122 template <class iterator, class Allocator>
1123 bool match_results_base<iterator, Allocator>::operator==(const match_results_base<iterator, Allocator>& that)const
1125 if(*ref != *(that.ref))
1126 return false;
1127 const sub_match<iterator>* p1 = (sub_match<iterator>*)(ref+1);
1128 const sub_match<iterator>* p2 = p1 + ref->cmatches;
1129 const sub_match<iterator>* p3 = (sub_match<iterator>*)(that.ref+1);
1130 while(p1 != p2)
1132 if(*p1 != *p3)
1133 return false;
1134 ++p1;
1135 ++p3;
1137 return true;
1140 template <class iterator, class Allocator>
1141 void BOOST_RE_CALL match_results_base<iterator, Allocator>::set_size(size_type n)
1143 if(ref->cmatches != n)
1145 c_reference* newref = (c_reference*)ref->allocate(sizeof(sub_match<iterator>) * n + sizeof(c_reference));
1148 new (newref) c_reference(*ref);
1149 newref->count = 1;
1150 newref->cmatches = n;
1151 sub_match<iterator>* p1, *p2;
1152 p1 = (sub_match<iterator>*)(newref+1);
1153 p2 = p1 + newref->cmatches;
1156 while(p1 != p2)
1158 new (p1) sub_match<iterator>();
1159 ++p1;
1161 free();
1163 catch(...)
1165 p2 = (sub_match<iterator>*)(newref+1);
1166 while(p2 != p1)
1168 jm_destroy(p2);
1169 ++p2;
1171 jm_destroy(ref);
1172 throw;
1174 ref = newref;
1176 catch(...)
1178 ref->deallocate((char*)(void*)newref, sizeof(sub_match<iterator>) * n + sizeof(c_reference));
1179 throw;
1184 template <class iterator, class Allocator>
1185 void BOOST_RE_CALL match_results_base<iterator, Allocator>::set_size(size_type n, iterator i, iterator j)
1187 if(ref->cmatches != n)
1189 c_reference* newref = (c_reference*)ref->allocate(sizeof(sub_match<iterator>) * n + sizeof(c_reference));;
1190 try{
1191 new (newref) c_reference(*ref);
1192 newref->count = 1;
1193 newref->cmatches = n;
1194 sub_match<iterator>* p1 = (sub_match<iterator>*)(newref+1);
1195 sub_match<iterator>* p2 = p1 + newref->cmatches;
1198 while(p1 != p2)
1200 new (p1) sub_match<iterator>(j);
1201 ++p1;
1203 free();
1205 catch(...)
1207 p2 = (sub_match<iterator>*)(newref+1);
1208 while(p2 != p1)
1210 jm_destroy(p2);
1211 ++p2;
1213 jm_destroy(ref);
1214 throw;
1216 ref = newref;
1218 catch(...)
1220 ref->deallocate((char*)(void*)newref, sizeof(sub_match<iterator>) * n + sizeof(c_reference));
1221 throw;
1224 else
1226 cow();
1227 // set iterators to be i, matched to false:
1228 sub_match<iterator>* p1, *p2;
1229 p1 = (sub_match<iterator>*)(ref+1);
1230 p2 = p1 + ref->cmatches;
1231 while(p1 != p2)
1233 p1->first = j;
1234 p1->second = j;
1235 p1->matched = false;
1236 ++p1;
1239 ref->head.first = i;
1240 ref->tail.second = j;
1241 ref->head.matched = ref->tail.matched = true;
1242 ref->null.first = ref->null.second = j;
1243 ref->null.matched = false;
1246 template <class iterator, class Allocator>
1247 inline void BOOST_RE_CALL match_results_base<iterator, Allocator>::init_fail(iterator i, iterator j)
1249 set_size(ref->cmatches, i, j);
1252 template <class iterator, class Allocator>
1253 void BOOST_RE_CALL match_results_base<iterator, Allocator>::maybe_assign(const match_results_base<iterator, Allocator>& m)
1255 sub_match<iterator>* p1, *p2;
1256 p1 = (sub_match<iterator>*)(ref+1);
1257 p2 = (sub_match<iterator>*)(m.ref+1);
1258 iterator base = (*this)[-1].first;
1259 unsigned int len1 = 0;
1260 unsigned int len2 = 0;
1261 unsigned int base1 = 0;
1262 unsigned int base2 = 0;
1263 unsigned int i;
1264 for(i = 0; i < ref->cmatches; ++i)
1267 // leftmost takes priority over longest:
1268 BOOST_RE_DISTANCE(base, p1->first, base1);
1269 BOOST_RE_DISTANCE(base, p2->first, base2);
1270 if(base1 < base2) return;
1271 if(base2 < base1) break;
1273 BOOST_RE_DISTANCE(p1->first, p1->second, len1);
1274 BOOST_RE_DISTANCE(p2->first, p2->second, len2);
1275 if((len1 != len2) || ((p1->matched == false) && (p2->matched == true)))
1276 break;
1277 if((p1->matched == true) && (p2->matched == false))
1278 return;
1279 ++p1;
1280 ++p2;
1282 if(i == ref->cmatches)
1283 return;
1284 if(base2 < base1)
1285 *this = m;
1286 else if((len2 > len1) || ((p1->matched == false) && (p2->matched == true)) )
1287 *this = m;
1290 template <class iterator, class Allocator>
1291 void BOOST_RE_CALL match_results_base<iterator, Allocator>::cow()
1293 if(ref->count > 1)
1295 c_reference* newref = (c_reference*)ref->allocate(sizeof(sub_match<iterator>) * ref->cmatches + sizeof(c_reference));
1296 try{
1297 new (newref) c_reference(*ref);
1298 newref->count = 1;
1299 sub_match<iterator>* p1 = (sub_match<iterator>*)(newref+1);
1300 sub_match<iterator>* p2 = p1 + newref->cmatches;
1301 sub_match<iterator>* p3 = (sub_match<iterator>*)(ref+1);
1302 try{
1303 while(p1 != p2)
1305 new (p1) sub_match<iterator>(*p3);
1306 ++p1;
1307 ++p3;
1310 catch(...)
1312 p2 = (sub_match<iterator>*)(newref+1);
1313 while(p2 != p1)
1315 jm_destroy(p2);
1316 ++p2;
1318 jm_destroy(ref);
1319 throw;
1321 --(ref->count);
1322 ref = newref;
1324 catch(...)
1326 ref->deallocate((char*)(void*)newref, sizeof(sub_match<iterator>) * ref->cmatches + sizeof(c_reference));
1327 throw;
1332 } // namespace re_detail
1335 // class match_results
1336 // encapsulates match_results_base, does a deep copy rather than
1337 // reference counting to ensure thread safety when copying
1338 // other match_results instances
1340 template <class iterator, class Allocator>
1341 class match_results : public re_detail::match_results_base<iterator, Allocator>
1343 typedef re_detail::match_results_base<iterator, Allocator> base_type;
1344 public:
1345 explicit match_results(const Allocator& a = Allocator())
1346 : re_detail::match_results_base<iterator, Allocator>(a){}
1348 match_results(const re_detail::match_results_base<iterator, Allocator>& m)
1349 : re_detail::match_results_base<iterator, Allocator>(m){}
1351 match_results& operator=(const re_detail::match_results_base<iterator, Allocator>& m)
1353 // shallow copy
1354 base_type::operator=(m);
1355 return *this;
1358 match_results(const match_results& m);
1359 match_results& operator=(const match_results& m);
1361 bool operator==(const match_results& that)const
1362 { return re_detail::match_results_base<iterator, Allocator>::operator==(that); }
1363 bool operator<(const match_results& that) const
1364 { return position() < that.position(); }
1368 template <class iterator, class Allocator>
1369 match_results<iterator, Allocator>::match_results(const match_results<iterator, Allocator>& m)
1370 : re_detail::match_results_base<iterator, Allocator>(false)
1372 this->ref =
1373 reinterpret_cast<typename re_detail::match_results_base<iterator, Allocator>::c_reference *>
1374 (m.ref->allocate(sizeof(sub_match<iterator>) * m.ref->cmatches +
1375 sizeof(typename re_detail::match_results_base<iterator, Allocator>::c_reference)));
1376 try{
1377 new (this->ref) typename re_detail::match_results_base<iterator, Allocator>::c_reference(*m.ref);
1378 this->ref->count = 1;
1379 sub_match<iterator>* p1 = (sub_match<iterator>*)(this->ref+1);
1380 sub_match<iterator>* p2 = p1 + this->ref->cmatches;
1381 sub_match<iterator>* p3 = (sub_match<iterator>*)(m.ref+1);
1382 try{
1383 while(p1 != p2)
1385 new (p1) sub_match<iterator>(*p3);
1386 ++p1;
1387 ++p3;
1390 catch(...)
1392 p2 = (sub_match<iterator>*)(this->ref+1);
1393 while(p2 != p1)
1395 re_detail::jm_destroy(p2);
1396 ++p2;
1398 re_detail::jm_destroy(ref);
1399 throw;
1402 catch(...)
1404 m.ref->deallocate((char*)(void*)this->ref, sizeof(sub_match<iterator>) * m.ref->cmatches + sizeof(typename re_detail::match_results_base<iterator, Allocator>::c_reference));
1405 throw;
1409 template <class iterator, class Allocator>
1410 match_results<iterator, Allocator>& match_results<iterator, Allocator>::operator=(const match_results<iterator, Allocator>& m)
1412 match_results<iterator, Allocator> t(m);
1413 this->swap(t);
1414 return *this;
1417 namespace re_detail{
1418 template <class iterator, class charT, class traits_type, class Allocator>
1419 iterator BOOST_RE_CALL re_is_set_member(iterator next,
1420 iterator last,
1421 re_set_long* set_,
1422 const reg_expression<charT, traits_type, Allocator>& e);
1423 } // namepsace re_detail
1425 #ifdef __BORLANDC__
1426 #if __BORLANDC__ > 0x520
1427 #pragma option pop
1428 #endif
1429 #endif
1431 } // namespace boost
1433 #include <boost/re_detail/regex_compile.hpp>
1435 namespace boost{
1437 typedef reg_expression<char, regex_traits<char>, BOOST_RE_DEF_ALLOC(char)> regex;
1438 #ifndef BOOST_RE_NO_WCSTRING
1439 typedef reg_expression<wchar_t, regex_traits<wchar_t>, BOOST_RE_DEF_ALLOC(wchar_t)> wregex;
1440 #endif
1442 typedef match_results<const char*> cmatch;
1443 #ifndef BOOST_RE_NO_WCSTRING
1444 typedef match_results<const wchar_t*> wcmatch;
1445 #endif
1447 } // namespace boost
1449 #include <boost/re_detail/regex_match.hpp>
1450 #include <boost/re_detail/regex_format.hpp>
1451 #include <boost/re_detail/regex_split.hpp>
1454 #endif // __cplusplus
1456 #endif // include