6 #include "map-pointer.hpp"
9 #include <boost/regex.hpp>
10 namespace regex_ns
= boost
;
13 namespace regex_ns
= std
;
17 std::string
strip_CR(const std::string
& str
)
24 void istrip_CR(std::string
& str
)
27 size_t xl
= str
.length();
29 char y
= str
[xl
- crc
- 1];
30 if(y
!= '\r' && y
!= '\n')
34 str
= str
.substr(0, xl
- crc
);
37 int firstchar(const std::string
& str
)
40 return static_cast<unsigned char>(str
[0]);
45 int string_to_bool(const std::string
& x
)
48 for(size_t i
= 0; i
< y
.length(); i
++)
50 if(y
== "on" || y
== "true" || y
== "yes" || y
== "1" || y
== "enable" || y
== "enabled")
52 if(y
== "off" || y
== "false" || y
== "no" || y
== "0" || y
== "disable" || y
== "disabled")
57 regex_results::regex_results()
62 regex_results::regex_results(std::vector
<std::string
> res
, std::vector
<std::pair
<size_t, size_t>> mch
)
69 regex_results::operator bool() const
74 bool regex_results::operator!() const
79 size_t regex_results::size() const
81 return results
.size();
84 const std::string
& regex_results::operator[](size_t i
) const
89 std::pair
<size_t, size_t> regex_results::match(size_t i
) const
94 regex_results
regex(const std::string
& regexp
, const std::string
& str
, const char* ex
) throw(std::bad_alloc
,
97 static threads::lock m
;
99 static std::map
<std::string
, map_pointer
<regex_ns::regex
>> regexps
;
100 if(!regexps
.count(regexp
)) {
101 regex_ns::regex
* y
= NULL
;
103 y
= new regex_ns::regex(regexp
, regex_ns::regex::extended
& ~regex_ns::regex::collate
);
105 } catch(std::bad_alloc
& e
) {
108 } catch(std::exception
& e
) {
109 throw std::runtime_error(e
.what());
113 regex_ns::smatch matches
;
114 bool x
= regex_ns::regex_match(str
.begin(), str
.end(), matches
, *(regexps
[regexp
]));
116 std::vector
<std::string
> res
;
117 std::vector
<std::pair
<size_t, size_t>> mch
;
118 for(size_t i
= 0; i
< matches
.size(); i
++) {
119 res
.push_back(matches
.str(i
));
120 mch
.push_back(std::make_pair(matches
[i
].first
- str
.begin(),
121 matches
[i
].second
- matches
[i
].first
));
123 return regex_results(res
, mch
);
125 throw std::runtime_error(ex
);
127 return regex_results();
130 bool regex_match(const std::string
& regexp
, const std::string
& str
, enum regex_match_mode mode
)
131 throw(std::bad_alloc
, std::runtime_error
)
133 static threads::lock m
;
134 static std::map
<std::string
, map_pointer
<regex_ns::regex
>> regexps
;
135 static std::map
<std::pair
<regex_match_mode
, std::string
> , std::pair
<std::string
, bool>> transform_cache
;
138 std::ostringstream y
;
140 //See if we have cached transform.
142 auto key
= std::make_pair(mode
, regexp
);
143 if(transform_cache
.count(key
)) {
144 auto entry
= transform_cache
[key
];
145 _regexp
= entry
.first
;
146 icase
= entry
.second
;
151 case REGEX_MATCH_REGEX
:
155 case REGEX_MATCH_IWILDCARDS
:
156 case REGEX_MATCH_LITERIAL
:
157 for(size_t i
= 0; i
< regexp
.length(); i
++)
158 if(regexp
[i
] == '?' && mode
== REGEX_MATCH_IWILDCARDS
)
160 else if(regexp
[i
] == '*' && mode
== REGEX_MATCH_IWILDCARDS
)
162 else if(regexp
[i
] >= 'A' && regexp
[i
] <= 'Z')
164 else if(regexp
[i
] >= 'a' && regexp
[i
] <= 'z')
166 else if(regexp
[i
] >= '0' && regexp
[i
] <= '9')
168 else if((unsigned char)regexp
[i
] > 127) //UTF-8.
171 y
<< "\\" << regexp
[i
];
172 _regexp
= ".*" + y
.str() + ".*";
175 case REGEX_MATCH_IREGEX
:
177 _regexp
= ".*" + regexp
+ ".*";
182 auto key
= std::make_pair(mode
, regexp
);
183 if(!transform_cache
.count(key
))
184 transform_cache
[key
] = std::make_pair(_regexp
, icase
);
186 if(!regexps
.count(regexp
)) {
187 regex_ns::regex
* y
= NULL
;
188 auto flags
= regex_ns::regex::extended
& ~regex_ns::regex::collate
;
189 flags
|= regex_ns::regex::nosubs
;
190 if(icase
) flags
|= regex_ns::regex::icase
;
192 y
= new regex_ns::regex(_regexp
, flags
);
193 regexps
[_regexp
] = y
;
194 } catch(std::bad_alloc
& e
) {
197 } catch(std::exception
& e
) {
198 throw std::runtime_error(e
.what());
201 return regex_ns::regex_match(str
.begin(), str
.end(), *(regexps
[_regexp
]));
206 template<typename ch
>
207 std::list
<std::basic_string
<ch
>> _split_on_codepoint(const std::basic_string
<ch
>& s
,
208 const std::basic_string
<ch
>& cp
)
210 std::list
<std::basic_string
<ch
>> ret
;
213 size_t len
= s
.length();
215 end
= s
.find(cp
, start
);
216 std::basic_string
<ch
> x
;
218 x
.resize(end
- start
);
219 std::copy(s
.begin() + start
, s
.begin() + end
, x
.begin());
220 start
= end
+ cp
.length();
222 x
.resize(len
- start
);
223 std::copy(s
.begin() + start
, s
.end(), x
.begin());
232 string_list
<T
>::string_list()
237 string_list
<T
>::string_list(const std::list
<std::basic_string
<T
>>& list
)
239 v
.resize(list
.size());
240 std::copy(list
.begin(), list
.end(), v
.begin());
244 bool string_list
<T
>::empty()
246 return (v
.size() == 0);
250 string_list
<T
> string_list
<T
>::strip_one() const
252 return string_list
<T
>(&v
[0], (v
.size() > 0) ? (v
.size() - 1) : 0);
256 size_t string_list
<T
>::size() const
262 const std::basic_string
<T
>& string_list
<T
>::operator[](size_t idx
) const
265 throw std::runtime_error("Index out of range");
270 string_list
<T
>::string_list(const std::basic_string
<T
>* array
, size_t arrsize
)
273 std::copy(array
, array
+ arrsize
, v
.begin());
277 bool string_list
<T
>::operator<(const string_list
<T
>& x
) const
279 for(size_t i
= 0; i
< v
.size() && i
< x
.v
.size(); i
++)
282 else if(v
[i
] > x
.v
[i
])
284 return (v
.size() < x
.v
.size());
288 bool string_list
<T
>::operator==(const string_list
<T
>& x
) const
290 if(v
.size() != x
.v
.size())
292 for(size_t i
= 0; i
< v
.size(); i
++)
299 bool string_list
<T
>::prefix_of(const string_list
<T
>& x
) const
301 if(v
.size() > x
.v
.size())
303 for(size_t i
= 0; i
< v
.size(); i
++)
311 template<typename T
> std::basic_string
<T
> separator();
312 template<> std::basic_string
<char> separator()
314 return utf8::to8(U
"\u2023");
317 template<> std::basic_string
<char16_t
> separator()
322 template<> std::basic_string
<char32_t
> separator()
327 template<> std::basic_string
<wchar_t> separator()
334 std::basic_string
<T
> string_list
<T
>::debug_name() const
336 std::basic_stringstream
<T
> x
;
337 for(size_t i
= 0; i
< v
.size(); i
++)
339 x
<< separator
<T
>() << v
[i
];
345 template class string_list
<char>;
346 template class string_list
<wchar_t>;
347 template class string_list
<char16_t
>;
348 template class string_list
<char32_t
>;
351 string_list
<char> split_on_codepoint(const std::string
& s
, char32_t cp
)
353 std::string _cp
= utf8::to8(std::u32string(1, cp
));
354 return _split_on_codepoint
<char>(s
, _cp
);
357 string_list
<char32_t
> split_on_codepoint(const std::u32string
& s
, char32_t cp
)
359 std::u32string
_cp(1, cp
);
360 return _split_on_codepoint
<char32_t
>(s
, _cp
);
363 template<typename T
> void token_iterator
<T
>::ctor_eos()
365 is_end_iterator
= true;
368 template<typename T
> void token_iterator
<T
>::ctor_itr(std::initializer_list
<const T
*> sep
, bool whole_sequence
)
369 throw(std::bad_alloc
)
371 whole_seq
= whole_sequence
;
372 is_end_iterator
= false;
380 template<typename T
> bool token_iterator
<T
>::equals_op(const token_iterator
<T
>& itr
) const throw()
382 bool is_end_a
= is_end_iterator
|| (bidx
>= str
.length());
383 bool is_end_b
= itr
.is_end_iterator
|| (itr
.bidx
>= itr
.str
.length());
393 return bidx
== itr
.bidx
;
396 template<typename T
> const std::basic_string
<T
>& token_iterator
<T
>::dereference() const throw()
401 template<typename T
> token_iterator
<T
> token_iterator
<T
>::postincrement() throw(std::bad_alloc
)
403 token_iterator
<T
> t
= *this;
408 template<typename T
> token_iterator
<T
>& token_iterator
<T
>::preincrement() throw(std::bad_alloc
)
410 bidx
= eidx
+ is_sep(eidx
);
415 template<typename T
> void token_iterator
<T
>::load_helper()
419 while(bidx
< str
.length() && (t
= is_sep(bidx
)))
422 while(eidx
< str
.length() && !is_sep(eidx
))
424 tmp
.resize(eidx
- bidx
);
425 std::copy(str
.begin() + bidx
, str
.begin() + eidx
, tmp
.begin());
428 template<typename T
> size_t token_iterator
<T
>::is_sep(size_t pos
)
430 if(pos
>= str
.length())
432 std::basic_string
<T
> h(1, str
[pos
++]);
436 auto i
= spliton
.lower_bound(h
);
437 //If string at i is end-of-set or does not start with h, there can't be a match.
438 if(i
== spliton
.end())
440 std::basic_string
<T
> i2
= *i
;
441 if(i2
.length() < h
.length() || (i2
.substr(0, h
.length()) != h
))
443 h
= h
+ std::basic_string
<T
>(1, str
[pos
++]);
447 template<typename T
> void token_iterator
<T
>::pull_fn()
449 eat_argument(&token_iterator
<T
>::ctor_itr
);
450 eat_argument(&token_iterator
<T
>::ctor_eos
);
451 eat_argument(&token_iterator
<T
>::postincrement
);
452 eat_argument(&token_iterator
<T
>::preincrement
);
453 eat_argument(&token_iterator
<T
>::dereference
);
454 eat_argument(&token_iterator
<T
>::equals_op
);
455 eat_argument(&token_iterator
<T
>::is_sep
);
456 eat_argument(&token_iterator
<T
>::load_helper
);
461 template<typename T
> void pull_token_itr()
463 token_iterator
<T
>::pull_fn();
466 void pull_token_itr2()
468 pull_token_itr
<char>();
469 pull_token_itr
<char32_t
>();
473 void _dummy_63263896236732867328673826783276283673867()