4 * This file is part of OpenTTD.
5 * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
6 * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
7 * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
10 /** @file string.h Types and function related to low-level strings. */
17 /* Needed for NetBSD version (so feature) testing */
18 #if defined(__NetBSD__) || defined(__FreeBSD__)
19 #include <sys/param.h>
22 #include "core/bitmath_func.hpp"
23 #include "core/enum_type.hpp"
24 #include "core/alloc_func.hpp"
27 /** Allocate dynamic memory with a copy of given data, and error out on failure. */
28 static inline void *xmemdup (const void *src
, size_t size
)
30 return memcpy (xmalloc(size
), src
, size
);
33 /** Allocate dynamic memory with a copy of given type data, and error out on failure. */
35 static inline T
*xmemdupt (const T
*src
, size_t size
= 1)
37 return (T
*) memcpy (xmalloct
<T
>(size
), src
, size
* sizeof(T
));
42 #define ttd_strnlen strnlen
45 * Get the length of a string, within a limited buffer.
47 * @param str The pointer to the first element of the buffer
48 * @param maxlen The maximum size of the buffer
49 * @return The length of the string
51 static inline size_t ttd_strnlen(const char *str
, size_t maxlen
)
54 for (t
= str
; (size_t)(t
- str
) < maxlen
&& *t
!= '\0'; t
++) {}
59 void ttd_strlcpy(char *dst
, const char *src
, size_t size
);
62 char *xstrdup (const char *s
);
63 char *xstrmemdup (const char *s
, size_t n
);
64 char *xstrndup (const char *s
, size_t n
);
66 char *str_vfmt(const char *str
, va_list args
) WARN_FORMAT(1, 0);
67 char *CDECL
str_fmt(const char *str
, ...) WARN_FORMAT(1, 2);
70 /* strcasestr is available for _GNU_SOURCE, BSD and some Apple */
71 #if defined(_GNU_SOURCE) || (defined(__BSD_VISIBLE) && __BSD_VISIBLE) || (defined(__APPLE__) && (!defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE))) || defined(_NETBSD_SOURCE)
72 # undef DEFINE_STRCASESTR
74 # define DEFINE_STRCASESTR
75 char *strcasestr(const char *haystack
, const char *needle
);
76 #endif /* strcasestr is available */
78 int strnatcmp(const char *s1
, const char *s2
, bool ignore_garbage_at_front
= false);
80 bool strtolower(char *str
);
83 * Check if a string buffer is empty.
85 * @param s The pointer to the first element of the buffer
86 * @return true if the buffer starts with the terminating null-character or
87 * if the given pointer points to NULL else return false
89 static inline bool StrEmpty(const char *s
)
91 return s
== NULL
|| s
[0] == '\0';
97 /** Type for wide characters, i.e. non-UTF8 encoded unicode characters. */
100 /** Max. length of UTF-8 encoded unicode character. */
101 static const uint MAX_CHAR_LENGTH
= 4;
103 /* The following are directional formatting codes used to get the LTR and RTL strings right:
104 * http://www.unicode.org/unicode/reports/tr9/#Directional_Formatting_Codes */
105 static const WChar CHAR_TD_LRM
= 0x200E; ///< The next character acts like a left-to-right character.
106 static const WChar CHAR_TD_RLM
= 0x200F; ///< The next character acts like a right-to-left character.
107 static const WChar CHAR_TD_LRE
= 0x202A; ///< The following text is embedded left-to-right.
108 static const WChar CHAR_TD_RLE
= 0x202B; ///< The following text is embedded right-to-left.
109 static const WChar CHAR_TD_LRO
= 0x202D; ///< Force the following characters to be treated as left-to-right characters.
110 static const WChar CHAR_TD_RLO
= 0x202E; ///< Force the following characters to be treated as right-to-left characters.
111 static const WChar CHAR_TD_PDF
= 0x202C; ///< Restore the text-direction state to before the last LRE, RLE, LRO or RLO.
113 /** A non-breaking space. */
114 #define NBSP "\xC2\xA0"
116 /** A left-to-right marker, marks the next character as left-to-right. */
117 #define LRM "\xE2\x80\x8E"
120 * Return the length of a UTF-8 encoded character.
121 * @param c Unicode character.
122 * @return Length of UTF-8 encoding for character.
124 static inline int8
Utf8CharLen(WChar c
)
126 if (c
< 0x80) return 1;
127 if (c
< 0x800) return 2;
128 if (c
< 0x10000) return 3;
129 if (c
< 0x110000) return 4;
131 /* Invalid valid, we encode as a '?' */
136 * Return the length of an UTF-8 encoded value based on a single char. This
137 * char should be the first byte of the UTF-8 encoding. If not, or encoding
138 * is invalid, return value is 0
139 * @param c char to query length of
140 * @return requested size
142 static inline int8
Utf8EncodedCharLen(char c
)
144 if (GB(c
, 3, 5) == 0x1E) return 4;
145 if (GB(c
, 4, 4) == 0x0E) return 3;
146 if (GB(c
, 5, 3) == 0x06) return 2;
147 if (GB(c
, 7, 1) == 0x00) return 1;
149 /* Invalid UTF8 start encoding */
153 /* Check if the given character is part of a UTF8 sequence */
154 static inline bool IsUtf8Part(char c
)
156 return GB(c
, 6, 2) == 2;
160 * Retrieve the previous UNICODE character in an UTF-8 encoded string.
161 * @param s char pointer pointing to (the first char of) the next character
162 * @return a pointer in 's' to the previous UNICODE character's first byte
163 * @note The function should not be used to determine the length of the previous
164 * encoded char because it might be an invalid/corrupt start-sequence
166 static inline char *Utf8PrevChar(char *s
)
169 while (IsUtf8Part(*--ret
)) {}
173 static inline const char *Utf8PrevChar(const char *s
)
176 while (IsUtf8Part(*--ret
)) {}
180 size_t Utf8Decode(WChar
*c
, const char *s
);
181 size_t Utf8Encode(char *buf
, WChar c
);
182 size_t Utf8TrimString(char *s
, size_t maxlen
);
184 static inline WChar
Utf8Consume(const char **s
)
187 *s
+= Utf8Decode(&c
, *s
);
191 size_t Utf8StringLength(const char *s
);
194 * Is the given character a text direction character.
195 * @param c The character to test.
196 * @return true iff the character is used to influence
197 * the text direction.
199 static inline bool IsTextDirectionChar(WChar c
)
216 static inline bool IsPrintable(WChar c
)
218 if (c
< 0x20) return false;
219 if (c
< 0xE000) return true;
220 if (c
< 0xE200) return false;
225 * Check whether UNICODE character is whitespace or not, i.e. whether
226 * this is a potential line-break character.
227 * @param c UNICODE character to check
228 * @return a boolean value whether 'c' is a whitespace character or not
229 * @see http://www.fileformat.info/info/unicode/category/Zs/list.htm
231 static inline bool IsWhitespace(WChar c
)
233 return c
== 0x0020 /* SPACE */ || c
== 0x3000; /* IDEOGRAPHIC SPACE */
236 /** Settings for the string validation. */
237 enum StringValidationSettings
{
238 SVS_NONE
= 0, ///< Allow nothing and replace nothing.
239 SVS_REPLACE_WITH_QUESTION_MARK
= 1 << 0, ///< Replace the unknown/bad bits with question marks.
240 SVS_ALLOW_NEWLINE
= 1 << 1, ///< Allow newlines.
241 SVS_ALLOW_CONTROL_CODE
= 1 << 2, ///< Allow the special control codes.
243 DECLARE_ENUM_AS_BIT_SET(StringValidationSettings
)
245 bool StrValid(const char *str
, const char *last
);
246 void str_validate(char *str
, const char *last
, StringValidationSettings settings
= SVS_REPLACE_WITH_QUESTION_MARK
);
247 void ValidateString(const char *str
);
249 void str_fix_scc_encoded(char *str
, const char *last
);
250 void str_strip_colours(char *str
);
253 * Is the given character a lead surrogate code point?
254 * @param c The character to test.
255 * @return True if the character is a lead surrogate code point.
257 static inline bool Utf16IsLeadSurrogate(uint c
)
259 return c
>= 0xD800 && c
<= 0xDBFF;
263 * Is the given character a lead surrogate code point?
264 * @param c The character to test.
265 * @return True if the character is a lead surrogate code point.
267 static inline bool Utf16IsTrailSurrogate(uint c
)
269 return c
>= 0xDC00 && c
<= 0xDFFF;
273 * Convert an UTF-16 surrogate pair to the corresponding Unicode character.
274 * @param lead Lead surrogate code point.
275 * @param trail Trail surrogate code point.
276 * @return Decoded Unicode character.
278 static inline WChar
Utf16DecodeSurrogate(uint lead
, uint trail
)
280 return 0x10000 + (((lead
- 0xD800) << 10) | (trail
- 0xDC00));
284 * Decode an UTF-16 character.
285 * @param c Pointer to one or two UTF-16 code points.
286 * @return Decoded Unicode character.
288 static inline WChar
Utf16DecodeChar(const uint16
*c
)
290 if (Utf16IsLeadSurrogate(c
[0])) {
291 return Utf16DecodeSurrogate(c
[0], c
[1]);
298 /* buffer-aware string functions */
300 /** Copy a string, pointer version. */
302 static inline void bstrcpy (char (*dest
) [N
], const char *src
)
304 snprintf (&(*dest
)[0], N
, "%s", src
);
307 /** Copy a string, reference version. */
309 static inline void bstrcpy (char (&dest
) [N
], const char *src
)
311 bstrcpy (&dest
, src
);
314 /** Format a string from a va_list, pointer version. */
316 static inline void bstrvfmt (char (*dest
) [N
], const char *fmt
, va_list args
)
318 vsnprintf (&(*dest
)[0], N
, fmt
, args
);
321 /** Format a string from a va_list, reference version. */
323 static inline void bstrvfmt (char (&dest
) [N
], const char *fmt
, va_list args
)
325 bstrvfmt (&dest
, fmt
, args
);
328 /* The following one must be a macro because there is no variadic template
329 * support in MSVC. */
331 /** Get the pointer and size to use for a static buffer, pointer version. */
333 static inline void bstrptr (char (*dest
) [N
], char **buffer
, uint
*size
)
335 *buffer
= &(*dest
)[0];
339 /** Get the pointer and size to use for a static buffer, reference version. */
341 static inline void bstrptr (char (&dest
) [N
], char **buffer
, uint
*size
)
343 *buffer
= &(dest
)[0];
347 /** Format a string. */
348 #define bstrfmt(dest, ...) do { \
349 char *bstrfmt__buffer; \
350 uint bstrfmt__size; \
351 bstrptr (dest, &bstrfmt__buffer, &bstrfmt__size); \
352 snprintf (bstrfmt__buffer, bstrfmt__size, __VA_ARGS__); \
356 /** Fixed buffer string template class. */
359 stringt (void) : T() { }
362 stringt (T1 t1
) : T (t1
) { }
364 template <class T1
, class T2
>
365 stringt (T1 t1
, T2 t2
) : T (t1
, t2
) { }
367 /** Get the storage size. */
368 size_t get__capacity (void) const
370 return T::get_capacity();
373 /** Get the storage buffer. */
374 char *get__buffer (void)
376 return T::get_buffer();
379 /** Get the storage buffer, const version. */
380 const char *get__buffer (void) const
382 return const_cast<stringt
*>(this)->get__buffer();
385 const char *c_str() const
387 return get__buffer();
390 /** Get the current length of the string. */
391 size_t length (void) const
396 /** Check if this string is empty. */
397 bool empty (void) const
399 return length() == 0;
402 /** Get the current length of the string in utf8 chars. */
403 size_t utf8length (void) const
405 return Utf8StringLength (c_str());
408 /** Check if this string is full. */
409 bool full (void) const
411 return length() == get__capacity() - 1;
414 /** Reset the string. */
418 get__buffer()[0] = '\0';
421 /** Fill the string with zeroes (to avoid undefined contents). */
425 memset (get__buffer(), 0, get__capacity());
428 /** Truncate the string to a given length. */
429 void truncate (size_t newlen
)
431 assert (newlen
<= T::len
);
433 get__buffer()[T::len
] = '\0';
436 /** Set string length and provide return value. */
437 bool set__return (uint n
)
439 const size_t m
= get__capacity();
449 /** Copy a given string into this one. */
450 bool copy (const char *src
)
452 uint n
= snprintf (get__buffer(), get__capacity(), "%s", src
);
453 return set__return (n
);
456 /** Set this string according to a format and args. */
457 bool vfmt (const char *fmt
, va_list args
) WARN_FORMAT(2, 0)
459 uint n
= vsnprintf (get__buffer(), get__capacity(), fmt
, args
);
460 return set__return (n
);
463 /** Append a single char to the string. */
466 assert (T::len
< get__capacity());
467 if (full()) return false;
468 char *data
= get__buffer();
474 /** Update string length and provide return value when appending. */
475 bool append__return (uint n
)
477 const size_t m
= get__capacity();
478 if (n
< m
- T::len
) {
487 /** Append a given string to this one. */
488 bool append (const char *src
)
490 assert (T::len
< get__capacity());
491 uint n
= snprintf (get__buffer() + T::len
,
492 get__capacity() - T::len
, "%s", src
);
493 return append__return (n
);
496 /** Append to this string according to a format and args. */
497 bool append_vfmt (const char *fmt
, va_list args
) WARN_FORMAT(2, 0)
499 assert (T::len
< get__capacity());
500 uint n
= vsnprintf (get__buffer() + T::len
,
501 get__capacity() - T::len
, fmt
, args
);
502 return append__return (n
);
505 /** Replace invalid chars in string. */
506 void validate (StringValidationSettings settings
= SVS_REPLACE_WITH_QUESTION_MARK
)
508 assert (T::len
< get__capacity());
509 char *buffer
= get__buffer();
510 str_validate (buffer
, buffer
+ T::len
, settings
);
513 /** Convert string to lowercase. */
516 strtolower (get__buffer());
520 /** Fixed buffer string base class. */
522 size_t len
; ///< current string length
523 const size_t capacity
; ///< allocated storage capacity
524 char * const buffer
; ///< allocated storage buffer
526 size_t get_capacity (void) const
531 char *get_buffer (void)
536 stringb_ (size_t capacity
, char *buffer
)
537 : len(0), capacity(capacity
), buffer(buffer
)
539 assert (capacity
> 0);
543 stringb_ (const stringb_
&) : len(0), capacity(0), buffer(NULL
)
549 /** Fixed buffer string class. */
550 struct stringb
: stringt
<stringb_
> {
551 stringb (size_t capacity
, char *buffer
)
552 : stringt
<stringb_
> (capacity
, buffer
)
557 stringb (char (*buffer
) [N
]) : stringt
<stringb_
> (N
, &(*buffer
)[0])
562 stringb (char (&buffer
) [N
]) : stringt
<stringb_
> (N
, &buffer
[0])
566 /* Set this string according to a format and args. */
567 bool fmt (const char *fmt
, ...) WARN_FORMAT(2, 3);
569 /* Append to this string according to a format and args. */
570 bool append_fmt (const char *fmt
, ...) WARN_FORMAT(2, 3);
572 /* Append a unicode character encoded as utf-8 to the string. */
573 bool append_utf8 (WChar c
);
575 /* Append the hexadecimal representation of an md5sum. */
576 bool append_md5sum (const uint8 md5sum
[16]);
579 /** Static string with (some) built-in bounds checking. */
581 struct sstring_
: stringb
{
582 char data
[N
]; ///< string storage
584 sstring_ (void) : stringb (N
, data
)
586 assert_tcompile (N
> 0);
587 assert (data
[0] == '\0'); // should have been set by stringb constructor
590 static inline size_t get_capacity (void)
595 inline char *get_buffer (void)
601 /** Static string with (some) built-in bounds checking. */
603 struct sstring
: stringt
<sstring_
<N
> > {
607 /** Convert the md5sum to a hexadecimal string representation, pointer version. */
609 static inline void md5sumToString (char (*buf
) [N
], const uint8 md5sum
[16])
611 assert_tcompile (N
> 2 * 16);
612 stringb
tmp (N
, &(*buf
)[0]);
613 tmp
.append_md5sum (md5sum
);
616 /** Convert the md5sum to a hexadecimal string representation, reference version. */
618 static inline void md5sumToString (char (&buf
) [N
], const uint8 md5sum
[16])
620 md5sumToString (&buf
, md5sum
);
623 #endif /* STRING_H */