Let HandleWindowDragging return a boolean status
[openttd/fttd.git] / src / string.cpp
blobf803a9d0119f12e22cf5921823bfed4b7ba8ce95
1 /* $Id$ */
3 /*
4 * This file is part of OpenTTD.
5 * OpenTTD is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2.
6 * OpenTTD is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
7 * See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with OpenTTD. If not, see <http://www.gnu.org/licenses/>.
8 */
10 /** @file string.cpp Handling of C-type strings (char*). */
12 #include "stdafx.h"
13 #include "debug.h"
14 #include "core/alloc_func.hpp"
15 #include "core/math_func.hpp"
16 #include "string.h"
18 #include "table/control_codes.h"
20 #include <stdarg.h>
21 #include <ctype.h> /* required for tolower() */
23 #ifdef _MSC_VER
24 #include <errno.h> // required by vsnprintf implementation for MSVC
25 #endif
27 #ifdef WITH_ICU_SORT
28 /* Required by strnatcmp. */
29 #include <unicode/ustring.h>
30 #include "language.h"
31 #include "gfx_func.h"
32 #endif /* WITH_ICU_SORT */
35 #ifdef WIN32
36 /* Since version 3.14, MinGW Runtime has snprintf() and vsnprintf() conform to C99 but it's not the case for older versions */
37 #if (__MINGW32_MAJOR_VERSION < 3) || ((__MINGW32_MAJOR_VERSION == 3) && (__MINGW32_MINOR_VERSION < 14))
38 int CDECL snprintf(char *str, size_t size, const char *format, ...)
40 va_list ap;
41 int ret;
43 va_start(ap, format);
44 ret = vsnprintf(str, size, format, ap);
45 va_end(ap);
46 return ret;
48 #endif /* MinGW Runtime < 3.14 */
50 #if defined(_MSC_VER) && _MSC_VER < 1900
51 /**
52 * Almost POSIX compliant implementation of \c vsnprintf for VC compiler.
53 * The difference is in the value returned on output truncation. This
54 * implementation returns size whereas a POSIX implementation returns
55 * size or more (the number of bytes that would be written to str
56 * had size been sufficiently large excluding the terminating null byte).
58 int CDECL vsnprintf(char *str, size_t size, const char *format, va_list ap)
60 if (size == 0) return 0;
62 errno = 0;
63 int ret = _vsnprintf(str, size, format, ap);
65 if (ret < 0) {
66 if (errno != ERANGE) {
67 /* There's a formatting error, better get that looked
68 * at properly instead of ignoring it. */
69 NOT_REACHED();
71 } else if ((size_t)ret < size) {
72 /* The buffer is big enough for the number of
73 * characters stored (excluding null), i.e.
74 * the string has been null-terminated. */
75 return ret;
78 /* The buffer is too small for _vsnprintf to write the
79 * null-terminator at its end and return size. */
80 str[size - 1] = '\0';
81 return (int)size;
83 #endif /* _MSC_VER */
85 #endif /* WIN32 */
87 /**
88 * Copies characters from one buffer to another.
90 * Copies the source string to the destination buffer with respect of the
91 * terminating null-character and the maximum size of the destination
92 * buffer.
94 * @note usage ttd_strlcpy(dst, src, lengthof(dst));
95 * @note lengthof() applies only to fixed size arrays
97 * @param dst The destination buffer
98 * @param src The buffer containing the string to copy
99 * @param size The maximum size of the destination buffer
101 void ttd_strlcpy(char *dst, const char *src, size_t size)
103 assert(size > 0);
104 while (--size > 0 && *src != '\0') {
105 *dst++ = *src++;
107 *dst = '\0';
111 /** Allocate a copy of a given string, and error out on failure. */
112 char *xstrdup (const char *s)
114 return (char*) xmemdup (s, strlen(s) + 1);
118 * Allocate a copy of a given string, with bounded size, and error out
119 * on failure.
121 * Note! This is not the same as strndup, because it assumes that the
122 * string passed in is at least of the required size, unlike strndup,
123 * which will check if there is a null in the requested initial segment.
125 char *xstrmemdup (const char *s, size_t n)
127 char *p = xmalloc (n + 1);
128 memcpy (p, s, n);
129 p[n] = '\0';
130 return p;
133 /** Allocate a copy of a given string, with bounded size, and error out on failure. */
134 char *xstrndup (const char *s, size_t n)
136 return xstrmemdup (s, ttd_strnlen (s, n));
139 /** Allocate a formatted string. */
140 char *str_vfmt (const char *fmt, va_list args)
142 #ifdef _GNU_SOURCE
143 char *s;
144 if (vasprintf (&s, fmt, args) == -1) out_of_memory();
145 return s;
146 #else
147 char buf[4096];
148 int len = vsnprintf (buf, lengthof(buf), fmt, args);
149 return (char*) xmemdup (buf, len + 1);
150 #endif
154 * Format, "printf", into a newly allocated string.
155 * @param str The formatting string.
156 * @return The formatted string. You must free this!
158 char *CDECL str_fmt(const char *str, ...)
160 va_list va;
161 va_start(va, str);
162 char *s = str_vfmt (str, va);
163 va_end(va);
164 return s;
168 #ifdef DEFINE_STRCASESTR
169 char *strcasestr(const char *haystack, const char *needle)
171 size_t hay_len = strlen(haystack);
172 size_t needle_len = strlen(needle);
173 while (hay_len >= needle_len) {
174 if (strncasecmp(haystack, needle, needle_len) == 0) return const_cast<char *>(haystack);
176 haystack++;
177 hay_len--;
180 return NULL;
182 #endif /* DEFINE_STRCASESTR */
185 * Skip some of the 'garbage' in the string that we don't want to use
186 * to sort on. This way the alphabetical sorting will work better as
187 * we would be actually using those characters instead of some other
188 * characters such as spaces and tildes at the begin of the name.
189 * @param str The string to skip the initial garbage of.
190 * @return The string with the garbage skipped.
192 static const char *SkipGarbage(const char *str)
194 while (*str != '\0' && (*str < '0' || IsInsideMM(*str, ';', '@' + 1) || IsInsideMM(*str, '[', '`' + 1) || IsInsideMM(*str, '{', '~' + 1))) str++;
195 return str;
199 * Compares two strings using case insensitive natural sort.
201 * @param s1 First string to compare.
202 * @param s2 Second string to compare.
203 * @param ignore_garbage_at_front Skip punctuation characters in the front
204 * @return Less than zero if s1 < s2, zero if s1 == s2, greater than zero if s1 > s2.
206 int strnatcmp(const char *s1, const char *s2, bool ignore_garbage_at_front)
208 if (ignore_garbage_at_front) {
209 s1 = SkipGarbage(s1);
210 s2 = SkipGarbage(s2);
212 #ifdef WITH_ICU_SORT
213 if (_current_collator != NULL) {
214 UErrorCode status = U_ZERO_ERROR;
215 int result = _current_collator->compareUTF8(s1, s2, status);
216 if (U_SUCCESS(status)) return result;
219 #endif /* WITH_ICU_SORT */
221 /* Do a normal comparison if ICU is missing or if we cannot create a collator. */
222 return strcasecmp(s1, s2);
226 * Convert a given ASCII string to lowercase.
227 * NOTE: only support ASCII characters, no UTF8 fancy. As currently
228 * the function is only used to lowercase data-filenames if they are
229 * not found, this is sufficient. If more, or general functionality is
230 * needed, look to r7271 where it was removed because it was broken when
231 * using certain locales: eg in Turkish the uppercase 'I' was converted to
232 * '?', so just revert to the old functionality
233 * @param str string to convert
234 * @return String has changed.
236 bool strtolower(char *str)
238 bool changed = false;
239 for (; *str != '\0'; str++) {
240 char new_str = tolower(*str);
241 changed |= new_str != *str;
242 *str = new_str;
244 return changed;
248 /* UTF-8 handling */
251 * Decode and consume the next UTF-8 encoded character.
252 * @param c Buffer to place decoded character.
253 * @param s Character stream to retrieve character from.
254 * @return Number of characters in the sequence.
256 size_t Utf8Decode(WChar *c, const char *s)
258 assert(c != NULL);
260 if (!HasBit(s[0], 7)) {
261 /* Single byte character: 0xxxxxxx */
262 *c = s[0];
263 return 1;
264 } else if (GB(s[0], 5, 3) == 6) {
265 if (IsUtf8Part(s[1])) {
266 /* Double byte character: 110xxxxx 10xxxxxx */
267 *c = GB(s[0], 0, 5) << 6 | GB(s[1], 0, 6);
268 if (*c >= 0x80) return 2;
270 } else if (GB(s[0], 4, 4) == 14) {
271 if (IsUtf8Part(s[1]) && IsUtf8Part(s[2])) {
272 /* Triple byte character: 1110xxxx 10xxxxxx 10xxxxxx */
273 *c = GB(s[0], 0, 4) << 12 | GB(s[1], 0, 6) << 6 | GB(s[2], 0, 6);
274 if (*c >= 0x800) return 3;
276 } else if (GB(s[0], 3, 5) == 30) {
277 if (IsUtf8Part(s[1]) && IsUtf8Part(s[2]) && IsUtf8Part(s[3])) {
278 /* 4 byte character: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
279 *c = GB(s[0], 0, 3) << 18 | GB(s[1], 0, 6) << 12 | GB(s[2], 0, 6) << 6 | GB(s[3], 0, 6);
280 if (*c >= 0x10000 && *c <= 0x10FFFF) return 4;
284 /* DEBUG(misc, 1, "[utf8] invalid UTF-8 sequence"); */
285 *c = '?';
286 return 1;
291 * Encode a unicode character and place it in the buffer.
292 * @param buf Buffer to place character.
293 * @param c Unicode character to encode.
294 * @return Number of characters in the encoded sequence.
296 size_t Utf8Encode(char *buf, WChar c)
298 if (c < 0x80) {
299 *buf = c;
300 return 1;
301 } else if (c < 0x800) {
302 *buf++ = 0xC0 + GB(c, 6, 5);
303 *buf = 0x80 + GB(c, 0, 6);
304 return 2;
305 } else if (c < 0x10000) {
306 *buf++ = 0xE0 + GB(c, 12, 4);
307 *buf++ = 0x80 + GB(c, 6, 6);
308 *buf = 0x80 + GB(c, 0, 6);
309 return 3;
310 } else if (c < 0x110000) {
311 *buf++ = 0xF0 + GB(c, 18, 3);
312 *buf++ = 0x80 + GB(c, 12, 6);
313 *buf++ = 0x80 + GB(c, 6, 6);
314 *buf = 0x80 + GB(c, 0, 6);
315 return 4;
318 /* DEBUG(misc, 1, "[utf8] can't UTF-8 encode value 0x%X", c); */
319 *buf = '?';
320 return 1;
324 * Properly terminate an UTF8 string to some maximum length
325 * @param s string to check if it needs additional trimming
326 * @param maxlen the maximum length the buffer can have.
327 * @return the new length in bytes of the string (eg. strlen(new_string))
328 * @note maxlen is the string length _INCLUDING_ the terminating '\0'
330 size_t Utf8TrimString(char *s, size_t maxlen)
332 size_t length = 0;
334 for (const char *ptr = strchr(s, '\0'); *s != '\0';) {
335 size_t len = Utf8EncodedCharLen(*s);
336 /* Silently ignore invalid UTF8 sequences, our only concern trimming */
337 if (len == 0) len = 1;
339 /* Take care when a hard cutoff was made for the string and
340 * the last UTF8 sequence is invalid */
341 if (length + len >= maxlen || (s + len > ptr)) break;
342 s += len;
343 length += len;
346 *s = '\0';
347 return length;
351 * Get the length of an UTF-8 encoded string in number of characters
352 * and thus not the number of bytes that the encoded string contains.
353 * @param s The string to get the length for.
354 * @return The length of the string in characters.
356 size_t Utf8StringLength(const char *s)
358 size_t len = 0;
359 const char *t = s;
360 while (Utf8Consume(&t) != 0) len++;
361 return len;
365 * Checks whether the given string is valid, i.e. contains only
366 * valid (printable) characters and is properly terminated.
367 * @param str The string to validate.
368 * @param last The last character of the string, i.e. the string
369 * must be terminated here or earlier.
371 bool StrValid(const char *str, const char *last)
373 /* Assume the ABSOLUTE WORST to be in str as it comes from the outside. */
375 while (str <= last && *str != '\0') {
376 size_t len = Utf8EncodedCharLen(*str);
377 /* Encoded length is 0 if the character isn't known.
378 * The length check is needed to prevent Utf8Decode to read
379 * over the terminating '\0' if that happens to be placed
380 * within the encoding of an UTF8 character. */
381 if (len == 0 || str + len > last) return false;
383 WChar c;
384 len = Utf8Decode(&c, str);
385 if (!IsPrintable(c) || (c >= SCC_SPRITE_START && c <= SCC_SPRITE_END)) {
386 return false;
389 str += len;
392 return *str == '\0';
396 * Scans the string for valid characters and if it finds invalid ones,
397 * replaces them with a question mark '?' (if not ignored)
398 * @param str the string to validate
399 * @param last the last valid character of str
400 * @param settings the settings for the string validation.
402 void str_validate(char *str, const char *last, StringValidationSettings settings)
404 /* Assume the ABSOLUTE WORST to be in str as it comes from the outside. */
406 char *dst = str;
407 while (str <= last && *str != '\0') {
408 size_t len = Utf8EncodedCharLen(*str);
409 /* If the character is unknown, i.e. encoded length is 0
410 * we assume worst case for the length check.
411 * The length check is needed to prevent Utf8Decode to read
412 * over the terminating '\0' if that happens to be placed
413 * within the encoding of an UTF8 character. */
414 if ((len == 0 && str + 4 > last) || str + len > last) break;
416 WChar c;
417 len = Utf8Decode(&c, str);
418 /* It's possible to encode the string termination character
419 * into a multiple bytes. This prevents those termination
420 * characters to be skipped */
421 if (c == '\0') break;
423 if ((IsPrintable(c) && (c < SCC_SPRITE_START || c > SCC_SPRITE_END)) || ((settings & SVS_ALLOW_CONTROL_CODE) != 0 && c == SCC_ENCODED)) {
424 /* Copy the character back. Even if dst is current the same as str
425 * (i.e. no characters have been changed) this is quicker than
426 * moving the pointers ahead by len */
427 do {
428 *dst++ = *str++;
429 } while (--len != 0);
430 } else if ((settings & SVS_ALLOW_NEWLINE) != 0 && c == '\n') {
431 *dst++ = *str++;
432 } else {
433 if ((settings & SVS_ALLOW_NEWLINE) != 0 && c == '\r' && str[1] == '\n') {
434 str += len;
435 continue;
437 /* Replace the undesirable character with a question mark */
438 str += len;
439 if ((settings & SVS_REPLACE_WITH_QUESTION_MARK) != 0) *dst++ = '?';
443 *dst = '\0';
447 * Scans the string for valid characters and if it finds invalid ones,
448 * replaces them with a question mark '?'.
449 * @param str the string to validate
451 void ValidateString(const char *str)
453 /* We know it is '\0' terminated. */
454 str_validate(const_cast<char *>(str), str + strlen(str) + 1);
458 * Scan the string for old values of SCC_ENCODED and fix it to
459 * it's new, static value.
460 * @param str the string to scan
461 * @param last the last valid character of str
463 void str_fix_scc_encoded(char *str, const char *last)
465 while (str <= last && *str != '\0') {
466 size_t len = Utf8EncodedCharLen(*str);
467 if ((len == 0 && str + 4 > last) || str + len > last) break;
469 WChar c;
470 len = Utf8Decode(&c, str);
471 if (c == '\0') break;
473 if (c == 0xE028 || c == 0xE02A) {
474 c = SCC_ENCODED;
476 str += Utf8Encode(str, c);
478 *str = '\0';
481 /** Scans the string for colour codes and strips them */
482 void str_strip_colours(char *str)
484 char *dst = str;
485 WChar c;
486 size_t len;
488 for (len = Utf8Decode(&c, str); c != '\0'; len = Utf8Decode(&c, str)) {
489 if (c < SCC_BLUE || c > SCC_BLACK) {
490 /* Copy the character back. Even if dst is current the same as str
491 * (i.e. no characters have been changed) this is quicker than
492 * moving the pointers ahead by len */
493 do {
494 *dst++ = *str++;
495 } while (--len != 0);
496 } else {
497 /* Just skip (strip) the colour codes */
498 str += len;
501 *dst = '\0';
505 /* buffer-aware string functions */
507 /** Set this string according to a format and args. */
508 bool stringb::fmt (const char *fmt, ...)
510 va_list args;
511 va_start (args, fmt);
512 bool r = vfmt (fmt, args);
513 va_end (args);
514 return r;
517 /** Append to this string according to a format and args. */
518 bool stringb::append_fmt (const char *fmt, ...)
520 va_list args;
521 va_start (args, fmt);
522 bool r = append_vfmt (fmt, args);
523 va_end (args);
524 return r;
527 /** Append a unicode character encoded as utf-8 to the string. */
528 bool stringb::append_utf8 (WChar c)
530 assert (len < capacity);
531 size_t left = capacity - len;
533 if (c < 0x80) {
534 if (left <= 1) return false;
535 buffer[len++] = c;
536 } else if (c < 0x800) {
537 if (left <= 2) return false;
538 buffer[len++] = 0xC0 + GB(c, 6, 5);
539 buffer[len++] = 0x80 + GB(c, 0, 6);
540 } else if (c < 0x10000) {
541 if (left <= 3) return false;
542 buffer[len++] = 0xE0 + GB(c, 12, 4);
543 buffer[len++] = 0x80 + GB(c, 6, 6);
544 buffer[len++] = 0x80 + GB(c, 0, 6);
545 } else if (c < 0x110000) {
546 if (left <= 4) return false;
547 buffer[len++] = 0xF0 + GB(c, 18, 3);
548 buffer[len++] = 0x80 + GB(c, 12, 6);
549 buffer[len++] = 0x80 + GB(c, 6, 6);
550 buffer[len++] = 0x80 + GB(c, 0, 6);
551 } else {
552 /* DEBUG(misc, 1, "[utf8] can't UTF-8 encode value 0x%X", c); */
553 if (left <= 1) return false;
554 buffer[len++] = '?';
557 buffer[len] = '\0';
558 return true;
561 /** Append the hexadecimal representation of an md5sum. */
562 bool stringb::append_md5sum (const uint8 md5sum [16])
564 for (uint i = 0; i < 16; i++) {
565 if (!append_fmt ("%02X", md5sum[i])) return false;
568 return true;