Turn a conditional into an assertion in ConLoad
[openttd/fttd.git] / src / string.h
blob765c384ab7f8b491bbce6a7b65911273233c219e
1 /* $Id$ */
3 /*
4 * This file is part of OpenTTD.
5 * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
6 * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
7 * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
8 */
10 /** @file string.h Types and function related to low-level strings. */
12 #ifndef STRING_H
13 #define STRING_H
15 #include <stdarg.h>
17 /* Needed for NetBSD version (so feature) testing */
18 #if defined(__NetBSD__) || defined(__FreeBSD__)
19 #include <sys/param.h>
20 #endif
22 #include "core/bitmath_func.hpp"
23 #include "core/enum_type.hpp"
24 #include "core/alloc_func.hpp"
27 /** Allocate dynamic memory with a copy of given data, and error out on failure. */
28 static inline void *xmemdup (const void *src, size_t size)
30 return memcpy (xmalloc(size), src, size);
33 /** Allocate dynamic memory with a copy of given type data, and error out on failure. */
34 template <typename T>
35 static inline T *xmemdupt (const T *src, size_t size = 1)
37 return (T*) memcpy (xmalloct<T>(size), src, size * sizeof(T));
41 #ifdef _GNU_SOURCE
42 #define ttd_strnlen strnlen
43 #else
44 /**
45 * Get the length of a string, within a limited buffer.
47 * @param str The pointer to the first element of the buffer
48 * @param maxlen The maximum size of the buffer
49 * @return The length of the string
51 static inline size_t ttd_strnlen(const char *str, size_t maxlen)
53 const char *t;
54 for (t = str; (size_t)(t - str) < maxlen && *t != '\0'; t++) {}
55 return t - str;
57 #endif
59 void ttd_strlcpy(char *dst, const char *src, size_t size);
62 char *xstrdup (const char *s);
63 char *xstrmemdup (const char *s, size_t n);
64 char *xstrndup (const char *s, size_t n);
66 char *str_vfmt(const char *str, va_list args) WARN_FORMAT(1, 0);
67 char *CDECL str_fmt(const char *str, ...) WARN_FORMAT(1, 2);
70 /* strcasestr is available for _GNU_SOURCE, BSD and some Apple */
71 #if defined(_GNU_SOURCE) || (defined(__BSD_VISIBLE) && __BSD_VISIBLE) || (defined(__APPLE__) && (!defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE))) || defined(_NETBSD_SOURCE)
72 # undef DEFINE_STRCASESTR
73 #else
74 # define DEFINE_STRCASESTR
75 char *strcasestr(const char *haystack, const char *needle);
76 #endif /* strcasestr is available */
78 int strnatcmp(const char *s1, const char *s2, bool ignore_garbage_at_front = false);
80 bool strtolower(char *str);
82 /**
83 * Check if a string buffer is empty.
85 * @param s The pointer to the first element of the buffer
86 * @return true if the buffer starts with the terminating null-character or
87 * if the given pointer points to NULL else return false
89 static inline bool StrEmpty(const char *s)
91 return s == NULL || s[0] == '\0';
95 /* UTF-8 handling */
97 /** Type for wide characters, i.e. non-UTF8 encoded unicode characters. */
98 typedef uint32 WChar;
100 /** Max. length of UTF-8 encoded unicode character. */
101 static const uint MAX_CHAR_LENGTH = 4;
103 /* The following are directional formatting codes used to get the LTR and RTL strings right:
104 * http://www.unicode.org/unicode/reports/tr9/#Directional_Formatting_Codes */
105 static const WChar CHAR_TD_LRM = 0x200E; ///< The next character acts like a left-to-right character.
106 static const WChar CHAR_TD_RLM = 0x200F; ///< The next character acts like a right-to-left character.
107 static const WChar CHAR_TD_LRE = 0x202A; ///< The following text is embedded left-to-right.
108 static const WChar CHAR_TD_RLE = 0x202B; ///< The following text is embedded right-to-left.
109 static const WChar CHAR_TD_LRO = 0x202D; ///< Force the following characters to be treated as left-to-right characters.
110 static const WChar CHAR_TD_RLO = 0x202E; ///< Force the following characters to be treated as right-to-left characters.
111 static const WChar CHAR_TD_PDF = 0x202C; ///< Restore the text-direction state to before the last LRE, RLE, LRO or RLO.
113 /** A non-breaking space. */
114 #define NBSP "\xC2\xA0"
116 /** A left-to-right marker, marks the next character as left-to-right. */
117 #define LRM "\xE2\x80\x8E"
120 * Return the length of a UTF-8 encoded character.
121 * @param c Unicode character.
122 * @return Length of UTF-8 encoding for character.
124 static inline int8 Utf8CharLen(WChar c)
126 if (c < 0x80) return 1;
127 if (c < 0x800) return 2;
128 if (c < 0x10000) return 3;
129 if (c < 0x110000) return 4;
131 /* Invalid valid, we encode as a '?' */
132 return 1;
136 * Return the length of an UTF-8 encoded value based on a single char. This
137 * char should be the first byte of the UTF-8 encoding. If not, or encoding
138 * is invalid, return value is 0
139 * @param c char to query length of
140 * @return requested size
142 static inline int8 Utf8EncodedCharLen(char c)
144 if (GB(c, 3, 5) == 0x1E) return 4;
145 if (GB(c, 4, 4) == 0x0E) return 3;
146 if (GB(c, 5, 3) == 0x06) return 2;
147 if (GB(c, 7, 1) == 0x00) return 1;
149 /* Invalid UTF8 start encoding */
150 return 0;
153 /* Check if the given character is part of a UTF8 sequence */
154 static inline bool IsUtf8Part(char c)
156 return GB(c, 6, 2) == 2;
160 * Retrieve the previous UNICODE character in an UTF-8 encoded string.
161 * @param s char pointer pointing to (the first char of) the next character
162 * @return a pointer in 's' to the previous UNICODE character's first byte
163 * @note The function should not be used to determine the length of the previous
164 * encoded char because it might be an invalid/corrupt start-sequence
166 static inline char *Utf8PrevChar(char *s)
168 char *ret = s;
169 while (IsUtf8Part(*--ret)) {}
170 return ret;
173 static inline const char *Utf8PrevChar(const char *s)
175 const char *ret = s;
176 while (IsUtf8Part(*--ret)) {}
177 return ret;
180 size_t Utf8Decode(WChar *c, const char *s);
181 size_t Utf8Encode(char *buf, WChar c);
182 size_t Utf8TrimString(char *s, size_t maxlen);
184 static inline WChar Utf8Consume(const char **s)
186 WChar c;
187 *s += Utf8Decode(&c, *s);
188 return c;
191 size_t Utf8StringLength(const char *s);
194 * Is the given character a text direction character.
195 * @param c The character to test.
196 * @return true iff the character is used to influence
197 * the text direction.
199 static inline bool IsTextDirectionChar(WChar c)
201 switch (c) {
202 case CHAR_TD_LRM:
203 case CHAR_TD_RLM:
204 case CHAR_TD_LRE:
205 case CHAR_TD_RLE:
206 case CHAR_TD_LRO:
207 case CHAR_TD_RLO:
208 case CHAR_TD_PDF:
209 return true;
211 default:
212 return false;
216 static inline bool IsPrintable(WChar c)
218 if (c < 0x20) return false;
219 if (c < 0xE000) return true;
220 if (c < 0xE200) return false;
221 return true;
225 * Check whether UNICODE character is whitespace or not, i.e. whether
226 * this is a potential line-break character.
227 * @param c UNICODE character to check
228 * @return a boolean value whether 'c' is a whitespace character or not
229 * @see http://www.fileformat.info/info/unicode/category/Zs/list.htm
231 static inline bool IsWhitespace(WChar c)
233 return c == 0x0020 /* SPACE */ || c == 0x3000; /* IDEOGRAPHIC SPACE */
237 * Valid filter types for IsValidChar.
239 enum CharSetFilter {
240 CS_ALPHANUMERAL, ///< Both numeric and alphabetic and spaces and stuff
241 CS_NUMERAL, ///< Only numeric ones
242 CS_NUMERAL_SPACE, ///< Only numbers and spaces
243 CS_ALPHA, ///< Only alphabetic values
244 CS_HEXADECIMAL, ///< Only hexadecimal characters
247 bool IsValidChar(WChar key, CharSetFilter afilter);
249 /** Settings for the string validation. */
250 enum StringValidationSettings {
251 SVS_NONE = 0, ///< Allow nothing and replace nothing.
252 SVS_REPLACE_WITH_QUESTION_MARK = 1 << 0, ///< Replace the unknown/bad bits with question marks.
253 SVS_ALLOW_NEWLINE = 1 << 1, ///< Allow newlines.
254 SVS_ALLOW_CONTROL_CODE = 1 << 2, ///< Allow the special control codes.
256 DECLARE_ENUM_AS_BIT_SET(StringValidationSettings)
258 bool StrValid(const char *str, const char *last);
259 void str_validate(char *str, const char *last, StringValidationSettings settings = SVS_REPLACE_WITH_QUESTION_MARK);
260 void ValidateString(const char *str);
262 void str_fix_scc_encoded(char *str, const char *last);
263 void str_strip_colours(char *str);
266 * Is the given character a lead surrogate code point?
267 * @param c The character to test.
268 * @return True if the character is a lead surrogate code point.
270 static inline bool Utf16IsLeadSurrogate(uint c)
272 return c >= 0xD800 && c <= 0xDBFF;
276 * Is the given character a lead surrogate code point?
277 * @param c The character to test.
278 * @return True if the character is a lead surrogate code point.
280 static inline bool Utf16IsTrailSurrogate(uint c)
282 return c >= 0xDC00 && c <= 0xDFFF;
286 * Convert an UTF-16 surrogate pair to the corresponding Unicode character.
287 * @param lead Lead surrogate code point.
288 * @param trail Trail surrogate code point.
289 * @return Decoded Unicode character.
291 static inline WChar Utf16DecodeSurrogate(uint lead, uint trail)
293 return 0x10000 + (((lead - 0xD800) << 10) | (trail - 0xDC00));
297 * Decode an UTF-16 character.
298 * @param c Pointer to one or two UTF-16 code points.
299 * @return Decoded Unicode character.
301 static inline WChar Utf16DecodeChar(const uint16 *c)
303 if (Utf16IsLeadSurrogate(c[0])) {
304 return Utf16DecodeSurrogate(c[0], c[1]);
305 } else {
306 return *c;
311 /* buffer-aware string functions */
313 /** Copy a string, pointer version. */
314 template <uint N>
315 static inline void bstrcpy (char (*dest) [N], const char *src)
317 snprintf (&(*dest)[0], N, "%s", src);
320 /** Copy a string, reference version. */
321 template <uint N>
322 static inline void bstrcpy (char (&dest) [N], const char *src)
324 bstrcpy (&dest, src);
327 /** Format a string from a va_list, pointer version. */
328 template <uint N>
329 static inline void bstrvfmt (char (*dest) [N], const char *fmt, va_list args)
331 vsnprintf (&(*dest)[0], N, fmt, args);
334 /** Format a string from a va_list, reference version. */
335 template <uint N>
336 static inline void bstrvfmt (char (&dest) [N], const char *fmt, va_list args)
338 bstrvfmt (&dest, fmt, args);
341 /* The following one must be a macro because there is no variadic template
342 * support in MSVC. */
344 /** Get the pointer and size to use for a static buffer, pointer version. */
345 template <uint N>
346 static inline void bstrptr (char (*dest) [N], char **buffer, uint *size)
348 *buffer = &(*dest)[0];
349 *size = N;
352 /** Get the pointer and size to use for a static buffer, reference version. */
353 template <uint N>
354 static inline void bstrptr (char (&dest) [N], char **buffer, uint *size)
356 *buffer = &(dest)[0];
357 *size = N;
360 /** Format a string. */
361 #define bstrfmt(dest, ...) do { \
362 char *bstrfmt__buffer; \
363 uint bstrfmt__size; \
364 bstrptr (dest, &bstrfmt__buffer, &bstrfmt__size); \
365 snprintf (bstrfmt__buffer, bstrfmt__size, __VA_ARGS__); \
366 } while(0)
369 /** Fixed buffer string template class. */
370 template <class T>
371 struct stringt : T {
372 stringt (void) : T() { }
374 template <class T1>
375 stringt (T1 t1) : T (t1) { }
377 template <class T1, class T2>
378 stringt (T1 t1, T2 t2) : T (t1, t2) { }
380 /** Get the storage size. */
381 size_t get__capacity (void) const
383 return T::get_capacity();
386 /** Get the storage buffer. */
387 char *get__buffer (void)
389 return T::get_buffer();
392 /** Get the storage buffer, const version. */
393 const char *get__buffer (void) const
395 return const_cast<stringt*>(this)->get__buffer();
398 const char *c_str() const
400 return get__buffer();
403 /** Get the current length of the string. */
404 size_t length (void) const
406 return T::len;
409 /** Check if this string is empty. */
410 bool empty (void) const
412 return length() == 0;
415 /** Get the current length of the string in utf8 chars. */
416 size_t utf8length (void) const
418 return Utf8StringLength (c_str());
421 /** Check if this string is full. */
422 bool full (void) const
424 return length() == get__capacity() - 1;
427 /** Reset the string. */
428 void clear (void)
430 T::len = 0;
431 get__buffer()[0] = '\0';
434 /** Fill the string with zeroes (to avoid undefined contents). */
435 void zerofill (void)
437 T::len = 0;
438 memset (get__buffer(), 0, get__capacity());
441 /** Truncate the string to a given length. */
442 void truncate (size_t newlen)
444 assert (newlen <= T::len);
445 T::len = newlen;
446 get__buffer()[T::len] = '\0';
449 /** Set string length and provide return value. */
450 bool set__return (uint n)
452 const size_t m = get__capacity();
453 if (n < m) {
454 T::len = n;
455 return true;
456 } else {
457 T::len = m - 1;
458 return false;
462 /** Copy a given string into this one. */
463 bool copy (const char *src)
465 uint n = snprintf (get__buffer(), get__capacity(), "%s", src);
466 return set__return (n);
469 /** Set this string according to a format and args. */
470 bool vfmt (const char *fmt, va_list args) WARN_FORMAT(2, 0)
472 uint n = vsnprintf (get__buffer(), get__capacity(), fmt, args);
473 return set__return (n);
476 /** Append a single char to the string. */
477 bool append (char c)
479 assert (T::len < get__capacity());
480 if (full()) return false;
481 char *data = get__buffer();
482 data[T::len++] = c;
483 data[T::len] = '\0';
484 return true;
487 /** Update string length and provide return value when appending. */
488 bool append__return (uint n)
490 const size_t m = get__capacity();
491 if (n < m - T::len) {
492 T::len += n;
493 return true;
494 } else {
495 T::len = m - 1;
496 return false;
500 /** Append a given string to this one. */
501 bool append (const char *src)
503 assert (T::len < get__capacity());
504 uint n = snprintf (get__buffer() + T::len,
505 get__capacity() - T::len, "%s", src);
506 return append__return (n);
509 /** Append to this string according to a format and args. */
510 bool append_vfmt (const char *fmt, va_list args) WARN_FORMAT(2, 0)
512 assert (T::len < get__capacity());
513 uint n = vsnprintf (get__buffer() + T::len,
514 get__capacity() - T::len, fmt, args);
515 return append__return (n);
518 /** Replace invalid chars in string. */
519 void validate (StringValidationSettings settings = SVS_REPLACE_WITH_QUESTION_MARK)
521 assert (T::len < get__capacity());
522 char *buffer = get__buffer();
523 str_validate (buffer, buffer + T::len, settings);
526 /** Convert string to lowercase. */
527 void tolower (void)
529 strtolower (get__buffer());
533 /** Fixed buffer string base class. */
534 struct stringb_ {
535 size_t len; ///< current string length
536 const size_t capacity; ///< allocated storage capacity
537 char * const buffer; ///< allocated storage buffer
539 size_t get_capacity (void) const
541 return capacity;
544 char *get_buffer (void)
546 return buffer;
549 stringb_ (size_t capacity, char *buffer)
550 : len(0), capacity(capacity), buffer(buffer)
552 assert (capacity > 0);
553 buffer[0] = '\0';
556 stringb_ (const stringb_ &) : len(0), capacity(0), buffer(NULL)
558 NOT_REACHED();
562 /** Fixed buffer string class. */
563 struct stringb : stringt<stringb_> {
564 stringb (size_t capacity, char *buffer)
565 : stringt<stringb_> (capacity, buffer)
569 template <uint N>
570 stringb (char (*buffer) [N]) : stringt<stringb_> (N, &(*buffer)[0])
574 template <uint N>
575 stringb (char (&buffer) [N]) : stringt<stringb_> (N, &buffer[0])
579 /* Set this string according to a format and args. */
580 bool fmt (const char *fmt, ...) WARN_FORMAT(2, 3);
582 /* Append to this string according to a format and args. */
583 bool append_fmt (const char *fmt, ...) WARN_FORMAT(2, 3);
585 /* Append a unicode character encoded as utf-8 to the string. */
586 bool append_utf8 (WChar c);
588 /* Append the hexadecimal representation of an md5sum. */
589 bool append_md5sum (const uint8 md5sum [16]);
592 /** Static string with (some) built-in bounds checking. */
593 template <uint N>
594 struct sstring_ : stringb {
595 char data[N]; ///< string storage
597 sstring_ (void) : stringb (N, data)
599 assert_tcompile (N > 0);
600 assert (data[0] == '\0'); // should have been set by stringb constructor
603 static inline size_t get_capacity (void)
605 return N;
608 inline char *get_buffer (void)
610 return data;
614 /** Static string with (some) built-in bounds checking. */
615 template <uint N>
616 struct sstring : stringt<sstring_<N> > {
620 /** Convert the md5sum to a hexadecimal string representation, pointer version. */
621 template <uint N>
622 static inline void md5sumToString (char (*buf) [N], const uint8 md5sum [16])
624 assert_tcompile (N > 2 * 16);
625 stringb tmp (N, &(*buf)[0]);
626 tmp.append_md5sum (md5sum);
629 /** Convert the md5sum to a hexadecimal string representation, reference version. */
630 template <uint N>
631 static inline void md5sumToString (char (&buf) [N], const uint8 md5sum [16])
633 md5sumToString (&buf, md5sum);
636 #endif /* STRING_H */