1 #ifndef BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__
2 #define BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__
4 /* Copyright (c) 2004-2005 CrystalClear Software, Inc.
5 * Use, modification and distribution is subject to the
6 * Boost Software License, Version 1.0. (See accompanying
7 * file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
8 * Author: Jeff Garland, Bart Garst
13 #include "boost/lexical_cast.hpp" //error without?
14 #include "boost/algorithm/string/case_conv.hpp"
20 namespace boost
{ namespace date_time
{
23 template<typename charT
>
24 struct parse_match_result
26 parse_match_result() :
28 current_match(-1)// -1 is match_not-found value
30 typedef std::basic_string
<charT
> string_type
;
31 string_type
remaining() const
33 if (match_depth
== cache
.size()) {
36 if (current_match
== -1) {
39 //some of the cache was used return the rest
40 return string_type(cache
, match_depth
);
42 charT
last_char() const
44 return cache
[cache
.size()-1];
46 //! Returns true if more characters were parsed than was necessary
47 /*! Should be used in conjunction with last_char()
48 * to get the remaining character.
50 bool has_remaining() const
52 return (cache
.size() > match_depth
);
55 // cache will hold characters that have been read from the stream
57 unsigned short match_depth
;
59 enum PARSE_STATE
{ PARSE_ERROR
= -1 };
62 //for debug -- really only char streams...
63 template<typename charT
>
64 std::basic_ostream
<charT
>&
65 operator<<(std::basic_ostream
<charT
>& os
, parse_match_result
<charT
>& mr
)
67 os
<< "cm: " << mr
.current_match
68 << " C: '" << mr
.cache
69 << "' md: " << mr
.match_depth
70 << " R: " << mr
.remaining();
76 //! Recursive data structure to allow efficient parsing of various strings
77 /*! This class provides a quick lookup by building what amounts to a
78 * tree data structure. It also features a match function which can
79 * can handle nasty input interators by caching values as it recurses
80 * the tree so that it can backtrack as needed.
82 template<typename charT
>
83 struct string_parse_tree
85 #if BOOST_WORKAROUND( __BORLANDC__, BOOST_TESTED_AT(0x581) )
86 typedef std::multimap
<charT
, string_parse_tree
< charT
> > ptree_coll
;
88 typedef std::multimap
<charT
, string_parse_tree
> ptree_coll
;
90 typedef typename
ptree_coll::value_type value_type
;
91 typedef typename
ptree_coll::iterator iterator
;
92 typedef typename
ptree_coll::const_iterator const_iterator
;
93 typedef std::basic_string
<charT
> string_type
;
94 typedef std::vector
<std::basic_string
<charT
> > collection_type
;
95 typedef parse_match_result
<charT
> parse_match_result_type
;
97 /*! Parameter "starting_point" designates where the numbering begins.
98 * A starting_point of zero will start the numbering at zero
99 * (Sun=0, Mon=1, ...) were a starting_point of one starts the
100 * numbering at one (Jan=1, Feb=2, ...). The default is zero,
101 * negative vaules are not allowed */
102 string_parse_tree(collection_type names
, unsigned int starting_point
=0)
104 // iterate thru all the elements and build the tree
105 unsigned short index
= 0;
106 while (index
!= names
.size() ) {
107 string_type s
= boost::algorithm::to_lower_copy(names
[index
]);
108 insert(s
, static_cast<unsigned short>(index
+ starting_point
));
111 //set the last tree node = index+1 indicating a value
116 string_parse_tree(short value
= -1) :
119 ptree_coll m_next_chars
;
122 void insert(const string_type
& s
, unsigned short value
)
126 while(i
< s
.size()) {
128 if (i
== (s
.size()-1)) {
129 ti
= m_next_chars
.insert(value_type(s
[i
],
130 string_parse_tree
<charT
>(value
)));
133 ti
= m_next_chars
.insert(value_type(s
[i
],
134 string_parse_tree
<charT
>()));
138 if (i
== (s
.size()-1)) {
139 ti
= ti
->second
.m_next_chars
.insert(value_type(s
[i
],
140 string_parse_tree
<charT
>(value
)));
144 ti
= ti
->second
.m_next_chars
.insert(value_type(s
[i
],
145 string_parse_tree
<charT
>()));
154 //! Recursive function that finds a matching string in the tree.
155 /*! Must check match_results::has_remaining() after match() is
156 * called. This is required so the user can determine if
157 * stream iterator is already pointing to the expected
158 * character or not (match() might advance sitr to next char in stream).
160 * A parse_match_result that has been returned from a failed match
161 * attempt can be sent in to the match function of a different
162 * string_parse_tree to attempt a match there. Use the iterators
163 * for the partially consumed stream, the parse_match_result object,
164 * and '0' for the level parameter. */
166 match(std::istreambuf_iterator
<charT
>& sitr
,
167 std::istreambuf_iterator
<charT
>& stream_end
,
168 parse_match_result_type
& result
,
169 unsigned int& level
) const
174 // if we conditionally advance sitr, we won't have
175 // to consume the next character past the input
177 if (level
> result
.cache
.size()) {
178 if (sitr
== stream_end
) return 0; //bail - input exhausted
179 c
= static_cast<charT
>(std::tolower(*sitr
));
184 // if we're looking for characters from the cache,
185 // we don't want to increment sitr
187 c
= static_cast<charT
>(std::tolower(result
.cache
[level
-1]));
189 const_iterator litr
= m_next_chars
.lower_bound(c
);
190 const_iterator uitr
= m_next_chars
.upper_bound(c
);
191 while (litr
!= uitr
) { // equal if not found
196 if (litr
->second
.m_value
!= -1) { // -1 is default value
197 if (result
.match_depth
< level
) {
198 result
.current_match
= litr
->second
.m_value
;
199 result
.match_depth
= static_cast<unsigned short>(level
);
201 litr
->second
.match(sitr
, stream_end
,
206 litr
->second
.match(sitr
, stream_end
,
211 if(level
<= result
.cache
.size()) {
217 return result
.current_match
;
221 /*! Must check match_results::has_remaining() after match() is
222 * called. This is required so the user can determine if
223 * stream iterator is already pointing to the expected
224 * character or not (match() might advance sitr to next char in stream).
226 parse_match_result_type
227 match(std::istreambuf_iterator
<charT
>& sitr
,
228 std::istreambuf_iterator
<charT
>& stream_end
) const
230 // lookup to_lower of char in tree.
231 unsigned int level
= 0;
232 // string_type cache;
233 parse_match_result_type result
;
234 match(sitr
, stream_end
, result
, level
);
238 void printme(std::ostream
& os
, int& level
)
241 iterator itr
= m_next_chars
.begin();
242 iterator end
= m_next_chars
.end();
243 // os << "starting level: " << level << std::endl;
245 os
<< "level: " << level
246 << " node: " << itr
->first
247 << " value: " << itr
->second
.m_value
249 itr
->second
.printme(os
, level
);
255 void print(std::ostream
& os
)
261 void printmatch(std::ostream
& os
, charT c
)
263 iterator litr
= m_next_chars
.lower_bound(c
);
264 iterator uitr
= m_next_chars
.upper_bound(c
);
265 os
<< "matches for: " << c
<< std::endl
;
266 while (litr
!= uitr
) {
267 os
<< " node: " << litr
->first
268 << " value: " << litr
->second
.m_value