4 * This file is part of OpenTTD.
5 * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
6 * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
7 * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
10 /** @file string.h Types and function related to low-level strings. */
17 /* Needed for NetBSD version (so feature) testing */
18 #if defined(__NetBSD__) || defined(__FreeBSD__)
19 #include <sys/param.h>
22 #include "core/bitmath_func.hpp"
23 #include "core/enum_type.hpp"
24 #include "core/alloc_func.hpp"
27 /** Allocate dynamic memory with a copy of given data, and error out on failure. */
28 static inline void *xmemdup (const void *src
, size_t size
)
30 return memcpy (xmalloc(size
), src
, size
);
33 /** Allocate dynamic memory with a copy of given type data, and error out on failure. */
35 static inline T
*xmemdupt (const T
*src
, size_t size
= 1)
37 return (T
*) memcpy (xmalloct
<T
>(size
), src
, size
* sizeof(T
));
42 #define ttd_strnlen strnlen
45 * Get the length of a string, within a limited buffer.
47 * @param str The pointer to the first element of the buffer
48 * @param maxlen The maximum size of the buffer
49 * @return The length of the string
51 static inline size_t ttd_strnlen(const char *str
, size_t maxlen
)
54 for (t
= str
; (size_t)(t
- str
) < maxlen
&& *t
!= '\0'; t
++) {}
59 void ttd_strlcpy(char *dst
, const char *src
, size_t size
);
62 char *xstrdup (const char *s
);
63 char *xstrmemdup (const char *s
, size_t n
);
64 char *xstrndup (const char *s
, size_t n
);
66 char *str_vfmt(const char *str
, va_list args
) WARN_FORMAT(1, 0);
67 char *CDECL
str_fmt(const char *str
, ...) WARN_FORMAT(1, 2);
70 /* strcasestr is available for _GNU_SOURCE, BSD and some Apple */
71 #if defined(_GNU_SOURCE) || (defined(__BSD_VISIBLE) && __BSD_VISIBLE) || (defined(__APPLE__) && (!defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE))) || defined(_NETBSD_SOURCE)
72 # undef DEFINE_STRCASESTR
74 # define DEFINE_STRCASESTR
75 char *strcasestr(const char *haystack
, const char *needle
);
76 #endif /* strcasestr is available */
78 int strnatcmp(const char *s1
, const char *s2
, bool ignore_garbage_at_front
= false);
80 bool strtolower(char *str
);
83 * Check if a string buffer is empty.
85 * @param s The pointer to the first element of the buffer
86 * @return true if the buffer starts with the terminating null-character or
87 * if the given pointer points to NULL else return false
89 static inline bool StrEmpty(const char *s
)
91 return s
== NULL
|| s
[0] == '\0';
97 /** Type for wide characters, i.e. non-UTF8 encoded unicode characters. */
100 /** Max. length of UTF-8 encoded unicode character. */
101 static const uint MAX_CHAR_LENGTH
= 4;
103 /* The following are directional formatting codes used to get the LTR and RTL strings right:
104 * http://www.unicode.org/unicode/reports/tr9/#Directional_Formatting_Codes */
105 static const WChar CHAR_TD_LRM
= 0x200E; ///< The next character acts like a left-to-right character.
106 static const WChar CHAR_TD_RLM
= 0x200F; ///< The next character acts like a right-to-left character.
107 static const WChar CHAR_TD_LRE
= 0x202A; ///< The following text is embedded left-to-right.
108 static const WChar CHAR_TD_RLE
= 0x202B; ///< The following text is embedded right-to-left.
109 static const WChar CHAR_TD_LRO
= 0x202D; ///< Force the following characters to be treated as left-to-right characters.
110 static const WChar CHAR_TD_RLO
= 0x202E; ///< Force the following characters to be treated as right-to-left characters.
111 static const WChar CHAR_TD_PDF
= 0x202C; ///< Restore the text-direction state to before the last LRE, RLE, LRO or RLO.
113 /** A non-breaking space. */
114 #define NBSP "\xC2\xA0"
116 /** A left-to-right marker, marks the next character as left-to-right. */
117 #define LRM "\xE2\x80\x8E"
120 * Return the length of a UTF-8 encoded character.
121 * @param c Unicode character.
122 * @return Length of UTF-8 encoding for character.
124 static inline int8
Utf8CharLen(WChar c
)
126 if (c
< 0x80) return 1;
127 if (c
< 0x800) return 2;
128 if (c
< 0x10000) return 3;
129 if (c
< 0x110000) return 4;
131 /* Invalid valid, we encode as a '?' */
136 * Return the length of an UTF-8 encoded value based on a single char. This
137 * char should be the first byte of the UTF-8 encoding. If not, or encoding
138 * is invalid, return value is 0
139 * @param c char to query length of
140 * @return requested size
142 static inline int8
Utf8EncodedCharLen(char c
)
144 if (GB(c
, 3, 5) == 0x1E) return 4;
145 if (GB(c
, 4, 4) == 0x0E) return 3;
146 if (GB(c
, 5, 3) == 0x06) return 2;
147 if (GB(c
, 7, 1) == 0x00) return 1;
149 /* Invalid UTF8 start encoding */
153 /* Check if the given character is part of a UTF8 sequence */
154 static inline bool IsUtf8Part(char c
)
156 return GB(c
, 6, 2) == 2;
160 * Retrieve the previous UNICODE character in an UTF-8 encoded string.
161 * @param s char pointer pointing to (the first char of) the next character
162 * @return a pointer in 's' to the previous UNICODE character's first byte
163 * @note The function should not be used to determine the length of the previous
164 * encoded char because it might be an invalid/corrupt start-sequence
166 static inline char *Utf8PrevChar(char *s
)
169 while (IsUtf8Part(*--ret
)) {}
173 static inline const char *Utf8PrevChar(const char *s
)
176 while (IsUtf8Part(*--ret
)) {}
180 size_t Utf8Decode(WChar
*c
, const char *s
);
181 size_t Utf8Encode(char *buf
, WChar c
);
182 size_t Utf8TrimString(char *s
, size_t maxlen
);
184 static inline WChar
Utf8Consume(const char **s
)
187 *s
+= Utf8Decode(&c
, *s
);
191 size_t Utf8StringLength(const char *s
);
194 * Is the given character a text direction character.
195 * @param c The character to test.
196 * @return true iff the character is used to influence
197 * the text direction.
199 static inline bool IsTextDirectionChar(WChar c
)
216 static inline bool IsPrintable(WChar c
)
218 if (c
< 0x20) return false;
219 if (c
< 0xE000) return true;
220 if (c
< 0xE200) return false;
225 * Check whether UNICODE character is whitespace or not, i.e. whether
226 * this is a potential line-break character.
227 * @param c UNICODE character to check
228 * @return a boolean value whether 'c' is a whitespace character or not
229 * @see http://www.fileformat.info/info/unicode/category/Zs/list.htm
231 static inline bool IsWhitespace(WChar c
)
233 return c
== 0x0020 /* SPACE */ || c
== 0x3000; /* IDEOGRAPHIC SPACE */
237 * Valid filter types for IsValidChar.
240 CS_ALPHANUMERAL
, ///< Both numeric and alphabetic and spaces and stuff
241 CS_NUMERAL
, ///< Only numeric ones
242 CS_NUMERAL_SPACE
, ///< Only numbers and spaces
243 CS_ALPHA
, ///< Only alphabetic values
244 CS_HEXADECIMAL
, ///< Only hexadecimal characters
247 bool IsValidChar(WChar key
, CharSetFilter afilter
);
249 /** Settings for the string validation. */
250 enum StringValidationSettings
{
251 SVS_NONE
= 0, ///< Allow nothing and replace nothing.
252 SVS_REPLACE_WITH_QUESTION_MARK
= 1 << 0, ///< Replace the unknown/bad bits with question marks.
253 SVS_ALLOW_NEWLINE
= 1 << 1, ///< Allow newlines.
254 SVS_ALLOW_CONTROL_CODE
= 1 << 2, ///< Allow the special control codes.
256 DECLARE_ENUM_AS_BIT_SET(StringValidationSettings
)
258 bool StrValid(const char *str
, const char *last
);
259 void str_validate(char *str
, const char *last
, StringValidationSettings settings
= SVS_REPLACE_WITH_QUESTION_MARK
);
260 void ValidateString(const char *str
);
262 void str_fix_scc_encoded(char *str
, const char *last
);
263 void str_strip_colours(char *str
);
266 * Is the given character a lead surrogate code point?
267 * @param c The character to test.
268 * @return True if the character is a lead surrogate code point.
270 static inline bool Utf16IsLeadSurrogate(uint c
)
272 return c
>= 0xD800 && c
<= 0xDBFF;
276 * Is the given character a lead surrogate code point?
277 * @param c The character to test.
278 * @return True if the character is a lead surrogate code point.
280 static inline bool Utf16IsTrailSurrogate(uint c
)
282 return c
>= 0xDC00 && c
<= 0xDFFF;
286 * Convert an UTF-16 surrogate pair to the corresponding Unicode character.
287 * @param lead Lead surrogate code point.
288 * @param trail Trail surrogate code point.
289 * @return Decoded Unicode character.
291 static inline WChar
Utf16DecodeSurrogate(uint lead
, uint trail
)
293 return 0x10000 + (((lead
- 0xD800) << 10) | (trail
- 0xDC00));
297 * Decode an UTF-16 character.
298 * @param c Pointer to one or two UTF-16 code points.
299 * @return Decoded Unicode character.
301 static inline WChar
Utf16DecodeChar(const uint16
*c
)
303 if (Utf16IsLeadSurrogate(c
[0])) {
304 return Utf16DecodeSurrogate(c
[0], c
[1]);
311 /* buffer-aware string functions */
313 /** Copy a string, pointer version. */
315 static inline void bstrcpy (char (*dest
) [N
], const char *src
)
317 snprintf (&(*dest
)[0], N
, "%s", src
);
320 /** Copy a string, reference version. */
322 static inline void bstrcpy (char (&dest
) [N
], const char *src
)
324 bstrcpy (&dest
, src
);
327 /** Format a string from a va_list, pointer version. */
329 static inline void bstrvfmt (char (*dest
) [N
], const char *fmt
, va_list args
)
331 vsnprintf (&(*dest
)[0], N
, fmt
, args
);
334 /** Format a string from a va_list, reference version. */
336 static inline void bstrvfmt (char (&dest
) [N
], const char *fmt
, va_list args
)
338 bstrvfmt (&dest
, fmt
, args
);
341 /* The following one must be a macro because there is no variadic template
342 * support in MSVC. */
344 /** Get the pointer and size to use for a static buffer, pointer version. */
346 static inline void bstrptr (char (*dest
) [N
], char **buffer
, uint
*size
)
348 *buffer
= &(*dest
)[0];
352 /** Get the pointer and size to use for a static buffer, reference version. */
354 static inline void bstrptr (char (&dest
) [N
], char **buffer
, uint
*size
)
356 *buffer
= &(dest
)[0];
360 /** Format a string. */
361 #define bstrfmt(dest, ...) do { \
362 char *bstrfmt__buffer; \
363 uint bstrfmt__size; \
364 bstrptr (dest, &bstrfmt__buffer, &bstrfmt__size); \
365 snprintf (bstrfmt__buffer, bstrfmt__size, __VA_ARGS__); \
369 /** Fixed buffer string template class. */
372 stringt (void) : T() { }
375 stringt (T1 t1
) : T (t1
) { }
377 template <class T1
, class T2
>
378 stringt (T1 t1
, T2 t2
) : T (t1
, t2
) { }
380 /** Get the storage size. */
381 size_t get__capacity (void) const
383 return T::get_capacity();
386 /** Get the storage buffer. */
387 char *get__buffer (void)
389 return T::get_buffer();
392 /** Get the storage buffer, const version. */
393 const char *get__buffer (void) const
395 return const_cast<stringt
*>(this)->get__buffer();
398 const char *c_str() const
400 return get__buffer();
403 /** Get the current length of the string. */
404 size_t length (void) const
409 /** Check if this string is empty. */
410 bool empty (void) const
412 return length() == 0;
415 /** Get the current length of the string in utf8 chars. */
416 size_t utf8length (void) const
418 return Utf8StringLength (c_str());
421 /** Check if this string is full. */
422 bool full (void) const
424 return length() == get__capacity() - 1;
427 /** Reset the string. */
431 get__buffer()[0] = '\0';
434 /** Fill the string with zeroes (to avoid undefined contents). */
438 memset (get__buffer(), 0, get__capacity());
441 /** Truncate the string to a given length. */
442 void truncate (size_t newlen
)
444 assert (newlen
<= T::len
);
446 get__buffer()[T::len
] = '\0';
449 /** Set string length and provide return value. */
450 bool set__return (uint n
)
452 const size_t m
= get__capacity();
462 /** Copy a given string into this one. */
463 bool copy (const char *src
)
465 uint n
= snprintf (get__buffer(), get__capacity(), "%s", src
);
466 return set__return (n
);
469 /** Set this string according to a format and args. */
470 bool vfmt (const char *fmt
, va_list args
) WARN_FORMAT(2, 0)
472 uint n
= vsnprintf (get__buffer(), get__capacity(), fmt
, args
);
473 return set__return (n
);
476 /** Append a single char to the string. */
479 assert (T::len
< get__capacity());
480 if (full()) return false;
481 char *data
= get__buffer();
487 /** Update string length and provide return value when appending. */
488 bool append__return (uint n
)
490 const size_t m
= get__capacity();
491 if (n
< m
- T::len
) {
500 /** Append a given string to this one. */
501 bool append (const char *src
)
503 assert (T::len
< get__capacity());
504 uint n
= snprintf (get__buffer() + T::len
,
505 get__capacity() - T::len
, "%s", src
);
506 return append__return (n
);
509 /** Append to this string according to a format and args. */
510 bool append_vfmt (const char *fmt
, va_list args
) WARN_FORMAT(2, 0)
512 assert (T::len
< get__capacity());
513 uint n
= vsnprintf (get__buffer() + T::len
,
514 get__capacity() - T::len
, fmt
, args
);
515 return append__return (n
);
518 /** Replace invalid chars in string. */
519 void validate (StringValidationSettings settings
= SVS_REPLACE_WITH_QUESTION_MARK
)
521 assert (T::len
< get__capacity());
522 char *buffer
= get__buffer();
523 str_validate (buffer
, buffer
+ T::len
, settings
);
526 /** Convert string to lowercase. */
529 strtolower (get__buffer());
533 /** Fixed buffer string base class. */
535 size_t len
; ///< current string length
536 const size_t capacity
; ///< allocated storage capacity
537 char * const buffer
; ///< allocated storage buffer
539 size_t get_capacity (void) const
544 char *get_buffer (void)
549 stringb_ (size_t capacity
, char *buffer
)
550 : len(0), capacity(capacity
), buffer(buffer
)
552 assert (capacity
> 0);
556 stringb_ (const stringb_
&) : len(0), capacity(0), buffer(NULL
)
562 /** Fixed buffer string class. */
563 struct stringb
: stringt
<stringb_
> {
564 stringb (size_t capacity
, char *buffer
)
565 : stringt
<stringb_
> (capacity
, buffer
)
570 stringb (char (*buffer
) [N
]) : stringt
<stringb_
> (N
, &(*buffer
)[0])
575 stringb (char (&buffer
) [N
]) : stringt
<stringb_
> (N
, &buffer
[0])
579 /* Set this string according to a format and args. */
580 bool fmt (const char *fmt
, ...) WARN_FORMAT(2, 3);
582 /* Append to this string according to a format and args. */
583 bool append_fmt (const char *fmt
, ...) WARN_FORMAT(2, 3);
585 /* Append a unicode character encoded as utf-8 to the string. */
586 bool append_utf8 (WChar c
);
588 /* Append the hexadecimal representation of an md5sum. */
589 bool append_md5sum (const uint8 md5sum
[16]);
592 /** Static string with (some) built-in bounds checking. */
594 struct sstring_
: stringb
{
595 char data
[N
]; ///< string storage
597 sstring_ (void) : stringb (N
, data
)
599 assert_tcompile (N
> 0);
600 assert (data
[0] == '\0'); // should have been set by stringb constructor
603 static inline size_t get_capacity (void)
608 inline char *get_buffer (void)
614 /** Static string with (some) built-in bounds checking. */
616 struct sstring
: stringt
<sstring_
<N
> > {
620 /** Convert the md5sum to a hexadecimal string representation, pointer version. */
622 static inline void md5sumToString (char (*buf
) [N
], const uint8 md5sum
[16])
624 assert_tcompile (N
> 2 * 16);
625 stringb
tmp (N
, &(*buf
)[0]);
626 tmp
.append_md5sum (md5sum
);
629 /** Convert the md5sum to a hexadecimal string representation, reference version. */
631 static inline void md5sumToString (char (&buf
) [N
], const uint8 md5sum
[16])
633 md5sumToString (&buf
, md5sum
);
636 #endif /* STRING_H */