src/util/string.h

   1 /*
   2 Minetest
   3 Copyright (C) 2010-2013 celeron55, Perttu Ahola <celeron55@gmail.com>
   4
   5 This program is free software; you can redistribute it and/or modify
   6 it under the terms of the GNU Lesser General Public License as published by
   7 the Free Software Foundation; either version 2.1 of the License, or
   8 (at your option) any later version.
   9
  10 This program is distributed in the hope that it will be useful,
  11 but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 GNU Lesser General Public License for more details.
  14
  15 You should have received a copy of the GNU Lesser General Public License along
  16 with this program; if not, write to the Free Software Foundation, Inc.,
  17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  18 */
  19
  20 #pragma once
  21
  22 #include "irrlichttypes_bloated.h"
  23 #include "irrString.h"
  24 #include <cstdlib>
  25 #include <string>
  26 #include <cstring>
  27 #include <vector>
  28 #include <map>
  29 #include <sstream>
  30 #include <iomanip>
  31 #include <cctype>
  32 #include <unordered_map>
  33
  34 class Translations;
  35
  36 #define STRINGIFY(x) #x
  37 #define TOSTRING(x) STRINGIFY(x)
  38
  39 // Checks whether a value is an ASCII printable character
  40 #define IS_ASCII_PRINTABLE_CHAR(x)   \
  41         (((unsigned int)(x) >= 0x20) &&  \
  42         ( (unsigned int)(x) <= 0x7e))
  43
  44 // Checks whether a byte is an inner byte for an utf-8 multibyte sequence
  45 #define IS_UTF8_MULTB_INNER(x)       \
  46         (((unsigned char)(x) >= 0x80) && \
  47         ( (unsigned char)(x) <= 0xbf))
  48
  49 // Checks whether a byte is a start byte for an utf-8 multibyte sequence
  50 #define IS_UTF8_MULTB_START(x)       \
  51         (((unsigned char)(x) >= 0xc2) && \
  52         ( (unsigned char)(x) <= 0xf4))
  53
  54 // Given a start byte x for an utf-8 multibyte sequence
  55 // it gives the length of the whole sequence in bytes.
  56 #define UTF8_MULTB_START_LEN(x)            \
  57         (((unsigned char)(x) < 0xe0) ? 2 :     \
  58         (((unsigned char)(x) < 0xf0) ? 3 : 4))
  59
  60 typedef std::unordered_map<std::string, std::string> StringMap;
  61
  62 struct FlagDesc {
  63         const char *name;
  64         u32 flag;
  65 };
  66
  67 // try not to convert between wide/utf8 encodings; this can result in data loss
  68 // try to only convert between them when you need to input/output stuff via Irrlicht
  69 std::wstring utf8_to_wide(const std::string &input);
  70 std::string wide_to_utf8(const std::wstring &input);
  71
  72 wchar_t *utf8_to_wide_c(const char *str);
  73
  74 // NEVER use those two functions unless you have a VERY GOOD reason to
  75 // they just convert between wide and multibyte encoding
  76 // multibyte encoding depends on current locale, this is no good, especially on Windows
  77
  78 // You must free the returned string!
  79 // The returned string is allocated using new
  80 wchar_t *narrow_to_wide_c(const char *str);
  81 std::wstring narrow_to_wide(const std::string &mbs);
  82 std::string wide_to_narrow(const std::wstring &wcs);
  83
  84 std::string urlencode(const std::string &str);
  85 std::string urldecode(const std::string &str);
  86 u32 readFlagString(std::string str, const FlagDesc *flagdesc, u32 *flagmask);
  87 std::string writeFlagString(u32 flags, const FlagDesc *flagdesc, u32 flagmask);
  88 size_t mystrlcpy(char *dst, const char *src, size_t size);
  89 char *mystrtok_r(char *s, const char *sep, char **lasts);
  90 u64 read_seed(const char *str);
  91 bool parseColorString(const std::string &value, video::SColor &color, bool quiet,
  92                 unsigned char default_alpha = 0xff);
  93
  94
  95 /**
  96  * Returns a copy of \p str with spaces inserted at the right hand side to ensure
  97  * that the string is \p len characters in length. If \p str is <= \p len then the
  98  * returned string will be identical to str.
  99  */
 100 inline std::string padStringRight(std::string str, size_t len)
 101 {
 102         if (len > str.size())
 103                 str.insert(str.end(), len - str.size(), ' ');
 104
 105         return str;
 106 }
 107
 108 /**
 109  * Returns a version of \p str with the first occurrence of a string
 110  * contained within ends[] removed from the end of the string.
 111  *
 112  * @param str
 113  * @param ends A NULL- or ""- terminated array of strings to remove from s in
 114  *      the copy produced.  Note that once one of these strings is removed
 115  *      that no further postfixes contained within this array are removed.
 116  *
 117  * @return If no end could be removed then "" is returned.
 118  */
 119 inline std::string removeStringEnd(const std::string &str,
 120                 const char *ends[])
 121 {
 122         const char **p = ends;
 123
 124         for (; *p && (*p)[0] != '\0'; p++) {
 125                 std::string end = *p;
 126                 if (str.size() < end.size())
 127                         continue;
 128                 if (str.compare(str.size() - end.size(), end.size(), end) == 0)
 129                         return str.substr(0, str.size() - end.size());
 130         }
 131
 132         return "";
 133 }
 134
 135
 136 /**
 137  * Check two strings for equivalence.  If \p case_insensitive is true
 138  * then the case of the strings is ignored (default is false).
 139  *
 140  * @param s1
 141  * @param s2
 142  * @param case_insensitive
 143  * @return true if the strings match
 144  */
 145 template <typename T>
 146 inline bool str_equal(const std::basic_string<T> &s1,
 147                 const std::basic_string<T> &s2,
 148                 bool case_insensitive = false)
 149 {
 150         if (!case_insensitive)
 151                 return s1 == s2;
 152
 153         if (s1.size() != s2.size())
 154                 return false;
 155
 156         for (size_t i = 0; i < s1.size(); ++i)
 157                 if(tolower(s1[i]) != tolower(s2[i]))
 158                         return false;
 159
 160         return true;
 161 }
 162
 163
 164 /**
 165  * Check whether \p str begins with the string prefix. If \p case_insensitive
 166  * is true then the check is case insensitve (default is false; i.e. case is
 167  * significant).
 168  *
 169  * @param str
 170  * @param prefix
 171  * @param case_insensitive
 172  * @return true if the str begins with prefix
 173  */
 174 template <typename T>
 175 inline bool str_starts_with(const std::basic_string<T> &str,
 176                 const std::basic_string<T> &prefix,
 177                 bool case_insensitive = false)
 178 {
 179         if (str.size() < prefix.size())
 180                 return false;
 181
 182         if (!case_insensitive)
 183                 return str.compare(0, prefix.size(), prefix) == 0;
 184
 185         for (size_t i = 0; i < prefix.size(); ++i)
 186                 if (tolower(str[i]) != tolower(prefix[i]))
 187                         return false;
 188         return true;
 189 }
 190
 191 /**
 192  * Check whether \p str begins with the string prefix. If \p case_insensitive
 193  * is true then the check is case insensitve (default is false; i.e. case is
 194  * significant).
 195  *
 196  * @param str
 197  * @param prefix
 198  * @param case_insensitive
 199  * @return true if the str begins with prefix
 200  */
 201 template <typename T>
 202 inline bool str_starts_with(const std::basic_string<T> &str,
 203                 const T *prefix,
 204                 bool case_insensitive = false)
 205 {
 206         return str_starts_with(str, std::basic_string<T>(prefix),
 207                         case_insensitive);
 208 }
 209
 210
 211 /**
 212  * Check whether \p str ends with the string suffix. If \p case_insensitive
 213  * is true then the check is case insensitve (default is false; i.e. case is
 214  * significant).
 215  *
 216  * @param str
 217  * @param suffix
 218  * @param case_insensitive
 219  * @return true if the str begins with suffix
 220  */
 221 template <typename T>
 222 inline bool str_ends_with(const std::basic_string<T> &str,
 223                 const std::basic_string<T> &suffix,
 224                 bool case_insensitive = false)
 225 {
 226         if (str.size() < suffix.size())
 227                 return false;
 228
 229         size_t start = str.size() - suffix.size();
 230         if (!case_insensitive)
 231                 return str.compare(start, suffix.size(), suffix) == 0;
 232
 233         for (size_t i = 0; i < suffix.size(); ++i)
 234                 if (tolower(str[start + i]) != tolower(suffix[i]))
 235                         return false;
 236         return true;
 237 }
 238
 239
 240 /**
 241  * Check whether \p str ends with the string suffix. If \p case_insensitive
 242  * is true then the check is case insensitve (default is false; i.e. case is
 243  * significant).
 244  *
 245  * @param str
 246  * @param suffix
 247  * @param case_insensitive
 248  * @return true if the str begins with suffix
 249  */
 250 template <typename T>
 251 inline bool str_ends_with(const std::basic_string<T> &str,
 252                 const T *suffix,
 253                 bool case_insensitive = false)
 254 {
 255         return str_ends_with(str, std::basic_string<T>(suffix),
 256                         case_insensitive);
 257 }
 258
 259
 260 /**
 261  * Splits a string into its component parts separated by the character
 262  * \p delimiter.
 263  *
 264  * @return An std::vector<std::basic_string<T> > of the component parts
 265  */
 266 template <typename T>
 267 inline std::vector<std::basic_string<T> > str_split(
 268                 const std::basic_string<T> &str,
 269                 T delimiter)
 270 {
 271         std::vector<std::basic_string<T> > parts;
 272         std::basic_stringstream<T> sstr(str);
 273         std::basic_string<T> part;
 274
 275         while (std::getline(sstr, part, delimiter))
 276                 parts.push_back(part);
 277
 278         return parts;
 279 }
 280
 281
 282 /**
 283  * @param str
 284  * @return A copy of \p str converted to all lowercase characters.
 285  */
 286 inline std::string lowercase(const std::string &str)
 287 {
 288         std::string s2;
 289
 290         s2.reserve(str.size());
 291
 292         for (char i : str)
 293                 s2 += tolower(i);
 294
 295         return s2;
 296 }
 297
 298
 299 /**
 300  * @param str
 301  * @return A copy of \p str with leading and trailing whitespace removed.
 302  */
 303 inline std::string trim(const std::string &str)
 304 {
 305         size_t front = 0;
 306
 307         while (std::isspace(str[front]))
 308                 ++front;
 309
 310         size_t back = str.size();
 311         while (back > front && std::isspace(str[back - 1]))
 312                 --back;
 313
 314         return str.substr(front, back - front);
 315 }
 316
 317
 318 /**
 319  * Returns whether \p str should be regarded as (bool) true.  Case and leading
 320  * and trailing whitespace are ignored.  Values that will return
 321  * true are "y", "yes", "true" and any number that is not 0.
 322  * @param str
 323  */
 324 inline bool is_yes(const std::string &str)
 325 {
 326         std::string s2 = lowercase(trim(str));
 327
 328         return s2 == "y" || s2 == "yes" || s2 == "true" || atoi(s2.c_str()) != 0;
 329 }
 330
 331
 332 /**
 333  * Converts the string \p str to a signed 32-bit integer. The converted value
 334  * is constrained so that min <= value <= max.
 335  *
 336  * @see atoi(3) for limitations
 337  *
 338  * @param str
 339  * @param min Range minimum
 340  * @param max Range maximum
 341  * @return The value converted to a signed 32-bit integer and constrained
 342  *      within the range defined by min and max (inclusive)
 343  */
 344 inline s32 mystoi(const std::string &str, s32 min, s32 max)
 345 {
 346         s32 i = atoi(str.c_str());
 347
 348         if (i < min)
 349                 i = min;
 350         if (i > max)
 351                 i = max;
 352
 353         return i;
 354 }
 355
 356
 357 // MSVC2010 includes it's own versions of these
 358 //#if !defined(_MSC_VER) || _MSC_VER < 1600
 359
 360
 361 /**
 362  * Returns a 32-bit value reprensented by the string \p str (decimal).
 363  * @see atoi(3) for further limitations
 364  */
 365 inline s32 mystoi(const std::string &str)
 366 {
 367         return atoi(str.c_str());
 368 }
 369
 370
 371 /**
 372  * Returns s 32-bit value represented by the wide string \p str (decimal).
 373  * @see atoi(3) for further limitations
 374  */
 375 inline s32 mystoi(const std::wstring &str)
 376 {
 377         return mystoi(wide_to_narrow(str));
 378 }
 379
 380
 381 /**
 382  * Returns a float reprensented by the string \p str (decimal).
 383  * @see atof(3)
 384  */
 385 inline float mystof(const std::string &str)
 386 {
 387         return atof(str.c_str());
 388 }
 389
 390 //#endif
 391
 392 #define stoi mystoi
 393 #define stof mystof
 394
 395 /// Returns a value represented by the string \p val.
 396 template <typename T>
 397 inline T from_string(const std::string &str)
 398 {
 399         std::stringstream tmp(str);
 400         T t;
 401         tmp >> t;
 402         return t;
 403 }
 404
 405 /// Returns a 64-bit signed value represented by the string \p str (decimal).
 406 inline s64 stoi64(const std::string &str) { return from_string<s64>(str); }
 407
 408 #if __cplusplus < 201103L
 409 namespace std {
 410
 411 /// Returns a string representing the value \p val.
 412 template <typename T>
 413 inline string to_string(T val)
 414 {
 415         ostringstream oss;
 416         oss << val;
 417         return oss.str();
 418 }
 419 #define DEFINE_STD_TOSTRING_FLOATINGPOINT(T)            \
 420         template <>                                     \
 421         inline string to_string<T>(T val)               \
 422         {                                               \
 423                 ostringstream oss;                      \
 424                 oss << std::fixed                       \
 425                         << std::setprecision(6)         \
 426                         << val;                         \
 427                 return oss.str();                       \
 428         }
 429 DEFINE_STD_TOSTRING_FLOATINGPOINT(float)
 430 DEFINE_STD_TOSTRING_FLOATINGPOINT(double)
 431 DEFINE_STD_TOSTRING_FLOATINGPOINT(long double)
 432
 433 #undef DEFINE_STD_TOSTRING_FLOATINGPOINT
 434
 435 /// Returns a wide string representing the value \p val
 436 template <typename T>
 437 inline wstring to_wstring(T val)
 438 {
 439       return utf8_to_wide(to_string(val));
 440 }
 441 }
 442 #endif
 443
 444 /// Returns a string representing the decimal value of the 32-bit value \p i.
 445 inline std::string itos(s32 i) { return std::to_string(i); }
 446 /// Returns a string representing the decimal value of the 64-bit value \p i.
 447 inline std::string i64tos(s64 i) { return std::to_string(i); }
 448
 449 // std::to_string uses the '%.6f' conversion, which is inconsistent with
 450 // std::ostream::operator<<() and impractical too.  ftos() uses the
 451 // more generic and std::ostream::operator<<()-compatible '%G' format.
 452 /// Returns a string representing the decimal value of the float value \p f.
 453 inline std::string ftos(float f)
 454 {
 455         std::ostringstream oss;
 456         oss << f;
 457         return oss.str();
 458 }
 459
 460
 461 /**
 462  * Replace all occurrences of \p pattern in \p str with \p replacement.
 463  *
 464  * @param str String to replace pattern with replacement within.
 465  * @param pattern The pattern to replace.
 466  * @param replacement What to replace the pattern with.
 467  */
 468 inline void str_replace(std::string &str, const std::string &pattern,
 469                 const std::string &replacement)
 470 {
 471         std::string::size_type start = str.find(pattern, 0);
 472         while (start != str.npos) {
 473                 str.replace(start, pattern.size(), replacement);
 474                 start = str.find(pattern, start + replacement.size());
 475         }
 476 }
 477
 478 /**
 479  * Escapes characters [ ] \ , ; that can not be used in formspecs
 480  */
 481 inline void str_formspec_escape(std::string &str)
 482 {
 483         str_replace(str, "\\", "\\\\");
 484         str_replace(str, "]", "\\]");
 485         str_replace(str, "[", "\\[");
 486         str_replace(str, ";", "\\;");
 487         str_replace(str, ",", "\\,");
 488 }
 489
 490 /**
 491  * Replace all occurrences of the character \p from in \p str with \p to.
 492  *
 493  * @param str The string to (potentially) modify.
 494  * @param from The character in str to replace.
 495  * @param to The replacement character.
 496  */
 497 void str_replace(std::string &str, char from, char to);
 498
 499
 500 /**
 501  * Check that a string only contains whitelisted characters. This is the
 502  * opposite of string_allowed_blacklist().
 503  *
 504  * @param str The string to be checked.
 505  * @param allowed_chars A string containing permitted characters.
 506  * @return true if the string is allowed, otherwise false.
 507  *
 508  * @see string_allowed_blacklist()
 509  */
 510 inline bool string_allowed(const std::string &str, const std::string &allowed_chars)
 511 {
 512         return str.find_first_not_of(allowed_chars) == str.npos;
 513 }
 514
 515
 516 /**
 517  * Check that a string contains no blacklisted characters. This is the
 518  * opposite of string_allowed().
 519  *
 520  * @param str The string to be checked.
 521  * @param blacklisted_chars A string containing prohibited characters.
 522  * @return true if the string is allowed, otherwise false.
 523
 524  * @see string_allowed()
 525  */
 526 inline bool string_allowed_blacklist(const std::string &str,
 527                 const std::string &blacklisted_chars)
 528 {
 529         return str.find_first_of(blacklisted_chars) == str.npos;
 530 }
 531
 532
 533 /**
 534  * Create a string based on \p from where a newline is forcefully inserted
 535  * every \p row_len characters.
 536  *
 537  * @note This function does not honour word wraps and blindy inserts a newline
 538  *      every \p row_len characters whether it breaks a word or not.  It is
 539  *      intended to be used for, for example, showing paths in the GUI.
 540  *
 541  * @note This function doesn't wrap inside utf-8 multibyte sequences and also
 542  *      counts multibyte sequences correcly as single characters.
 543  *
 544  * @param from The (utf-8) string to be wrapped into rows.
 545  * @param row_len The row length (in characters).
 546  * @return A new string with the wrapping applied.
 547  */
 548 inline std::string wrap_rows(const std::string &from,
 549                 unsigned row_len)
 550 {
 551         std::string to;
 552
 553         size_t character_idx = 0;
 554         for (size_t i = 0; i < from.size(); i++) {
 555                 if (!IS_UTF8_MULTB_INNER(from[i])) {
 556                         // Wrap string after last inner byte of char
 557                         if (character_idx > 0 && character_idx % row_len == 0)
 558                                 to += '\n';
 559                         character_idx++;
 560                 }
 561                 to += from[i];
 562         }
 563
 564         return to;
 565 }
 566
 567
 568 /**
 569  * Removes backslashes from an escaped string (FormSpec strings)
 570  */
 571 template <typename T>
 572 inline std::basic_string<T> unescape_string(const std::basic_string<T> &s)
 573 {
 574         std::basic_string<T> res;
 575
 576         for (size_t i = 0; i < s.length(); i++) {
 577                 if (s[i] == '\\') {
 578                         i++;
 579                         if (i >= s.length())
 580                                 break;
 581                 }
 582                 res += s[i];
 583         }
 584
 585         return res;
 586 }
 587
 588 /**
 589  * Remove all escape sequences in \p s.
 590  *
 591  * @param s The string in which to remove escape sequences.
 592  * @return \p s, with escape sequences removed.
 593  */
 594 template <typename T>
 595 std::basic_string<T> unescape_enriched(const std::basic_string<T> &s)
 596 {
 597         std::basic_string<T> output;
 598         size_t i = 0;
 599         while (i < s.length()) {
 600                 if (s[i] == '\x1b') {
 601                         ++i;
 602                         if (i == s.length()) continue;
 603                         if (s[i] == '(') {
 604                                 ++i;
 605                                 while (i < s.length() && s[i] != ')') {
 606                                         if (s[i] == '\\') {
 607                                                 ++i;
 608                                         }
 609                                         ++i;
 610                                 }
 611                                 ++i;
 612                         } else {
 613                                 ++i;
 614                         }
 615                         continue;
 616                 }
 617                 output += s[i];
 618                 ++i;
 619         }
 620         return output;
 621 }
 622
 623 template <typename T>
 624 std::vector<std::basic_string<T> > split(const std::basic_string<T> &s, T delim)
 625 {
 626         std::vector<std::basic_string<T> > tokens;
 627
 628         std::basic_string<T> current;
 629         bool last_was_escape = false;
 630         for (size_t i = 0; i < s.length(); i++) {
 631                 T si = s[i];
 632                 if (last_was_escape) {
 633                         current += '\\';
 634                         current += si;
 635                         last_was_escape = false;
 636                 } else {
 637                         if (si == delim) {
 638                                 tokens.push_back(current);
 639                                 current = std::basic_string<T>();
 640                                 last_was_escape = false;
 641                         } else if (si == '\\') {
 642                                 last_was_escape = true;
 643                         } else {
 644                                 current += si;
 645                                 last_was_escape = false;
 646                         }
 647                 }
 648         }
 649         //push last element
 650         tokens.push_back(current);
 651
 652         return tokens;
 653 }
 654
 655 std::wstring translate_string(const std::wstring &s, Translations *translations);
 656
 657 std::wstring translate_string(const std::wstring &s);
 658
 659 inline std::wstring unescape_translate(const std::wstring &s) {
 660         return unescape_enriched(translate_string(s));
 661 }
 662
 663 /**
 664  * Checks that all characters in \p to_check are a decimal digits.
 665  *
 666  * @param to_check
 667  * @return true if to_check is not empty and all characters in to_check are
 668  *      decimal digits, otherwise false
 669  */
 670 inline bool is_number(const std::string &to_check)
 671 {
 672         for (char i : to_check)
 673                 if (!std::isdigit(i))
 674                         return false;
 675
 676         return !to_check.empty();
 677 }
 678
 679
 680 /**
 681  * Returns a C-string, either "true" or "false", corresponding to \p val.
 682  *
 683  * @return If \p val is true, then "true" is returned, otherwise "false".
 684  */
 685 inline const char *bool_to_cstr(bool val)
 686 {
 687         return val ? "true" : "false";
 688 }
 689
 690 inline const std::string duration_to_string(int sec)
 691 {
 692         int min = sec / 60;
 693         sec %= 60;
 694         int hour = min / 60;
 695         min %= 60;
 696
 697         std::stringstream ss;
 698         if (hour > 0) {
 699                 ss << hour << "h ";
 700         }
 701
 702         if (min > 0) {
 703                 ss << min << "m ";
 704         }
 705
 706         if (sec > 0) {
 707                 ss << sec << "s ";
 708         }
 709
 710         return ss.str();
 711 }
 712
 713 /**
 714  * Joins a vector of strings by the string \p delimiter.
 715  *
 716  * @return A std::string
 717  */
 718 inline std::string str_join(const std::vector<std::string> &list,
 719                 const std::string &delimiter)
 720 {
 721         std::ostringstream oss;
 722         bool first = true;
 723         for (const auto &part : list) {
 724                 if (!first)
 725                         oss << delimiter;
 726                 oss << part;
 727                 first = false;
 728         }
 729         return oss.str();
 730 }
 731
 732 /**
 733  * Create a UTF8 std::string from a irr::core::stringw.
 734  */
 735 inline std::string stringw_to_utf8(const irr::core::stringw &input)
 736 {
 737         std::wstring str(input.c_str());
 738         return wide_to_utf8(str);
 739 }
 740
 741  /**
 742   * Create a irr::core:stringw from a UTF8 std::string.
 743   */
 744 inline irr::core::stringw utf8_to_stringw(const std::string &input)
 745 {
 746         std::wstring str = utf8_to_wide(input);
 747         return irr::core::stringw(str.c_str());
 748 }
 749
 750 /**
 751  * Sanitize the name of a new directory. This consists of two stages:
 752  * 1. Check for 'reserved filenames' that can't be used on some filesystems
 753  *    and prefix them
 754  * 2. Remove 'unsafe' characters from the name by replacing them with '_'
 755  */
 756 std::string sanitizeDirName(const std::string &str, const std::string &optional_prefix);