lsnes rr2-β24
[lsnes.git] / src / library / string.cpp
bloba06e31a3dd215937b92cfc38e4ca8fa8fe6c7e84
1 #include "string.hpp"
2 #include "minmax.hpp"
3 #include "threads.hpp"
4 #include "eatarg.hpp"
5 #include <cctype>
6 #include "map-pointer.hpp"
8 #ifdef USE_BOOST_REGEX
9 #include <boost/regex.hpp>
10 namespace regex_ns = boost;
11 #else
12 #include <regex>
13 namespace regex_ns = std;
14 #endif
17 std::string strip_CR(const std::string& str)
19 std::string x = str;
20 istrip_CR(x);
21 return x;
24 void istrip_CR(std::string& str)
26 size_t crc = 0;
27 size_t xl = str.length();
28 while(crc < xl) {
29 char y = str[xl - crc - 1];
30 if(y != '\r' && y != '\n')
31 break;
32 crc++;
34 str = str.substr(0, xl - crc);
37 int firstchar(const std::string& str)
39 if(str.length())
40 return static_cast<unsigned char>(str[0]);
41 else
42 return -1;
45 int string_to_bool(const std::string& x)
47 std::string y = x;
48 for(size_t i = 0; i < y.length(); i++)
49 y[i] = tolower(y[i]);
50 if(y == "on" || y == "true" || y == "yes" || y == "1" || y == "enable" || y == "enabled")
51 return 1;
52 if(y == "off" || y == "false" || y == "no" || y == "0" || y == "disable" || y == "disabled")
53 return 0;
54 return -1;
57 regex_results::regex_results()
59 matched = false;
62 regex_results::regex_results(std::vector<std::string> res, std::vector<std::pair<size_t, size_t>> mch)
64 matched = true;
65 matches = mch;
66 results = res;
69 regex_results::operator bool() const
71 return matched;
74 bool regex_results::operator!() const
76 return !matched;
79 size_t regex_results::size() const
81 return results.size();
84 const std::string& regex_results::operator[](size_t i) const
86 return results[i];
89 std::pair<size_t, size_t> regex_results::match(size_t i) const
91 return matches[i];
94 regex_results regex(const std::string& regexp, const std::string& str, const char* ex) throw(std::bad_alloc,
95 std::runtime_error)
97 static threads::lock m;
98 threads::alock h(m);
99 static std::map<std::string, map_pointer<regex_ns::regex>> regexps;
100 if(!regexps.count(regexp)) {
101 regex_ns::regex* y = NULL;
102 try {
103 y = new regex_ns::regex(regexp, regex_ns::regex::extended & ~regex_ns::regex::collate);
104 regexps[regexp] = y;
105 } catch(std::bad_alloc& e) {
106 delete y;
107 throw;
108 } catch(std::exception& e) {
109 throw std::runtime_error(e.what());
113 regex_ns::smatch matches;
114 bool x = regex_ns::regex_match(str.begin(), str.end(), matches, *(regexps[regexp]));
115 if(x) {
116 std::vector<std::string> res;
117 std::vector<std::pair<size_t, size_t>> mch;
118 for(size_t i = 0; i < matches.size(); i++) {
119 res.push_back(matches.str(i));
120 mch.push_back(std::make_pair(matches[i].first - str.begin(),
121 matches[i].second - matches[i].first));
123 return regex_results(res, mch);
124 } else if(ex)
125 throw std::runtime_error(ex);
126 else
127 return regex_results();
130 bool regex_match(const std::string& regexp, const std::string& str, enum regex_match_mode mode)
131 throw(std::bad_alloc, std::runtime_error)
133 static threads::lock m;
134 static std::map<std::string, map_pointer<regex_ns::regex>> regexps;
135 static std::map<std::pair<regex_match_mode, std::string> , std::pair<std::string, bool>> transform_cache;
136 std::string _regexp;
137 bool icase = false;
138 std::ostringstream y;
140 //See if we have cached transform.
141 threads::alock h(m);
142 auto key = std::make_pair(mode, regexp);
143 if(transform_cache.count(key)) {
144 auto entry = transform_cache[key];
145 _regexp = entry.first;
146 icase = entry.second;
147 goto transformed;
150 switch(mode) {
151 case REGEX_MATCH_REGEX:
152 icase = false;
153 _regexp = regexp;
154 break;
155 case REGEX_MATCH_IWILDCARDS:
156 case REGEX_MATCH_LITERIAL:
157 for(size_t i = 0; i < regexp.length(); i++)
158 if(regexp[i] == '?' && mode == REGEX_MATCH_IWILDCARDS)
159 y << ".";
160 else if(regexp[i] == '*' && mode == REGEX_MATCH_IWILDCARDS)
161 y << ".*";
162 else if(regexp[i] >= 'A' && regexp[i] <= 'Z')
163 y << regexp[i];
164 else if(regexp[i] >= 'a' && regexp[i] <= 'z')
165 y << regexp[i];
166 else if(regexp[i] >= '0' && regexp[i] <= '9')
167 y << regexp[i];
168 else if((unsigned char)regexp[i] > 127) //UTF-8.
169 y << regexp[i];
170 else
171 y << "\\" << regexp[i];
172 _regexp = ".*" + y.str() + ".*";
173 icase = true;
174 break;
175 case REGEX_MATCH_IREGEX:
176 icase = true;
177 _regexp = ".*" + regexp + ".*";
178 break;
180 transformed:
181 threads::alock h(m);
182 auto key = std::make_pair(mode, regexp);
183 if(!transform_cache.count(key))
184 transform_cache[key] = std::make_pair(_regexp, icase);
186 if(!regexps.count(regexp)) {
187 regex_ns::regex* y = NULL;
188 auto flags = regex_ns::regex::extended & ~regex_ns::regex::collate;
189 flags |= regex_ns::regex::nosubs;
190 if(icase) flags |= regex_ns::regex::icase;
191 try {
192 y = new regex_ns::regex(_regexp, flags);
193 regexps[_regexp] = y;
194 } catch(std::bad_alloc& e) {
195 delete y;
196 throw;
197 } catch(std::exception& e) {
198 throw std::runtime_error(e.what());
201 return regex_ns::regex_match(str.begin(), str.end(), *(regexps[_regexp]));
204 namespace
206 template<typename ch>
207 std::list<std::basic_string<ch>> _split_on_codepoint(const std::basic_string<ch>& s,
208 const std::basic_string<ch>& cp)
210 std::list<std::basic_string<ch>> ret;
211 size_t start = 0;
212 size_t end = 0;
213 size_t len = s.length();
214 while(end < len) {
215 end = s.find(cp, start);
216 std::basic_string<ch> x;
217 if(end < len) {
218 x.resize(end - start);
219 std::copy(s.begin() + start, s.begin() + end, x.begin());
220 start = end + cp.length();
221 } else {
222 x.resize(len - start);
223 std::copy(s.begin() + start, s.end(), x.begin());
225 ret.push_back(x);
227 return ret;
231 template<typename T>
232 string_list<T>::string_list()
236 template<typename T>
237 string_list<T>::string_list(const std::list<std::basic_string<T>>& list)
239 v.resize(list.size());
240 std::copy(list.begin(), list.end(), v.begin());
243 template<typename T>
244 bool string_list<T>::empty()
246 return (v.size() == 0);
249 template<typename T>
250 string_list<T> string_list<T>::strip_one() const
252 return string_list<T>(&v[0], (v.size() > 0) ? (v.size() - 1) : 0);
255 template<typename T>
256 size_t string_list<T>::size() const
258 return v.size();
261 template<typename T>
262 const std::basic_string<T>& string_list<T>::operator[](size_t idx) const
264 if(idx >= v.size())
265 throw std::runtime_error("Index out of range");
266 return v[idx];
269 template<typename T>
270 string_list<T>::string_list(const std::basic_string<T>* array, size_t arrsize)
272 v.resize(arrsize);
273 std::copy(array, array + arrsize, v.begin());
276 template<typename T>
277 bool string_list<T>::operator<(const string_list<T>& x) const
279 for(size_t i = 0; i < v.size() && i < x.v.size(); i++)
280 if(v[i] < x.v[i])
281 return true;
282 else if(v[i] > x.v[i])
283 return false;
284 return (v.size() < x.v.size());
287 template<typename T>
288 bool string_list<T>::operator==(const string_list<T>& x) const
290 if(v.size() != x.v.size())
291 return false;
292 for(size_t i = 0; i < v.size(); i++)
293 if(v[i] != x.v[i])
294 return false;
295 return true;
298 template<typename T>
299 bool string_list<T>::prefix_of(const string_list<T>& x) const
301 if(v.size() > x.v.size())
302 return false;
303 for(size_t i = 0; i < v.size(); i++)
304 if(v[i] != x.v[i])
305 return false;
306 return true;
309 namespace
311 template<typename T> std::basic_string<T> separator();
312 template<> std::basic_string<char> separator()
314 return utf8::to8(U"\u2023");
317 template<> std::basic_string<char16_t> separator()
319 return u"\u2023";
322 template<> std::basic_string<char32_t> separator()
324 return U"\u2023";
327 template<> std::basic_string<wchar_t> separator()
329 return L"->";
333 template<typename T>
334 std::basic_string<T> string_list<T>::debug_name() const
336 std::basic_stringstream<T> x;
337 for(size_t i = 0; i < v.size(); i++)
338 if(i != 0)
339 x << separator<T>() << v[i];
340 else
341 x << v[i];
342 return x.str();
345 template class string_list<char>;
346 template class string_list<wchar_t>;
347 template class string_list<char16_t>;
348 template class string_list<char32_t>;
351 string_list<char> split_on_codepoint(const std::string& s, char32_t cp)
353 std::string _cp = utf8::to8(std::u32string(1, cp));
354 return _split_on_codepoint<char>(s, _cp);
357 string_list<char32_t> split_on_codepoint(const std::u32string& s, char32_t cp)
359 std::u32string _cp(1, cp);
360 return _split_on_codepoint<char32_t>(s, _cp);
363 template<typename T> void token_iterator<T>::ctor_eos()
365 is_end_iterator = true;
368 template<typename T> void token_iterator<T>::ctor_itr(std::initializer_list<const T*> sep, bool whole_sequence)
369 throw(std::bad_alloc)
371 whole_seq = whole_sequence;
372 is_end_iterator = false;
373 bidx = 0;
374 eidx = 0;
375 for(auto i : sep)
376 spliton.insert(i);
377 load_helper();
380 template<typename T> bool token_iterator<T>::equals_op(const token_iterator<T>& itr) const throw()
382 bool is_end_a = is_end_iterator || (bidx >= str.length());
383 bool is_end_b = itr.is_end_iterator || (itr.bidx >= itr.str.length());
384 if(is_end_a)
385 if(is_end_b)
386 return true;
387 else
388 return false;
389 else
390 if(is_end_b)
391 return false;
392 else
393 return bidx == itr.bidx;
396 template<typename T> const std::basic_string<T>& token_iterator<T>::dereference() const throw()
398 return tmp;
401 template<typename T> token_iterator<T> token_iterator<T>::postincrement() throw(std::bad_alloc)
403 token_iterator<T> t = *this;
404 ++*this;
405 return t;
408 template<typename T> token_iterator<T>& token_iterator<T>::preincrement() throw(std::bad_alloc)
410 bidx = eidx + is_sep(eidx);
411 load_helper();
412 return *this;
415 template<typename T> void token_iterator<T>::load_helper()
417 size_t t;
418 if(whole_seq)
419 while(bidx < str.length() && (t = is_sep(bidx)))
420 bidx += t;
421 eidx = bidx;
422 while(eidx < str.length() && !is_sep(eidx))
423 eidx++;
424 tmp.resize(eidx - bidx);
425 std::copy(str.begin() + bidx, str.begin() + eidx, tmp.begin());
428 template<typename T> size_t token_iterator<T>::is_sep(size_t pos)
430 if(pos >= str.length())
431 return 0;
432 std::basic_string<T> h(1, str[pos++]);
433 while(true) {
434 if(spliton.count(h))
435 return h.length();
436 auto i = spliton.lower_bound(h);
437 //If string at i is end-of-set or does not start with h, there can't be a match.
438 if(i == spliton.end())
439 return 0;
440 std::basic_string<T> i2 = *i;
441 if(i2.length() < h.length() || (i2.substr(0, h.length()) != h))
442 return 0;
443 h = h + std::basic_string<T>(1, str[pos++]);
447 template<typename T> void token_iterator<T>::pull_fn()
449 eat_argument(&token_iterator<T>::ctor_itr);
450 eat_argument(&token_iterator<T>::ctor_eos);
451 eat_argument(&token_iterator<T>::postincrement);
452 eat_argument(&token_iterator<T>::preincrement);
453 eat_argument(&token_iterator<T>::dereference);
454 eat_argument(&token_iterator<T>::equals_op);
455 eat_argument(&token_iterator<T>::is_sep);
456 eat_argument(&token_iterator<T>::load_helper);
459 namespace
461 template<typename T> void pull_token_itr()
463 token_iterator<T>::pull_fn();
466 void pull_token_itr2()
468 pull_token_itr<char>();
469 pull_token_itr<char32_t>();
473 void _dummy_63263896236732867328673826783276283673867()
475 pull_token_itr2();