Removed mention of gtk3x-client from README.msys2
[freeciv.git] / utility / fc_utf8.c
blob15d159532d8f79c4666d7a409c885ea6f0623bd4
1 /**********************************************************************
2 Freeciv - Copyright (C) 1996 - A Kjeldberg, L Gregersen, P Unold
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; either version 2, or (at your option)
6 any later version.
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12 ***********************************************************************/
14 #ifdef HAVE_CONFIG_H
15 #include <fc_config.h>
16 #endif
18 #include <stdarg.h>
19 #include <string.h>
21 /* utility */
22 #include "log.h"
23 #include "mem.h"
24 #include "support.h"
26 #include "fc_utf8.h"
29 /* The length of a character for external use (at least 1 to avoid infinite
30 * loops). See also fc_ut8_next_char(). */
31 const char fc_utf8_skip[256] = {
32 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00000000 to 00001111. */
33 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00010000 to 00011111. */
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00100000 to 00101111. */
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00110000 to 00111111. */
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 01000000 to 01001111. */
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 01010000 to 01011111. */
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 01100000 to 01101111. */
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 01110000 to 01111111. */
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 10000000 to 10001111. */
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 10010000 to 10011111. */
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 10100000 to 10101111. */
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 10110000 to 10111111. */
44 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 11000000 to 11001111. */
45 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 11010000 to 11011111. */
46 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 11100000 to 11101111. */
47 #ifdef USE_6_BYTES_CHAR
48 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1 /* 11110000 to 11111111. */
49 #else
50 4, 4, 4, 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1 /* 11110000 to 11111111. */
51 #endif /* USE_6_BYTES_CHAR */
54 /* The length of a character for internal use (0 means an invalid start of
55 * a character). */
56 static const char fc_utf8_char_size[256] = {
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00000000 to 00001111. */
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00010000 to 00011111. */
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00100000 to 00101111. */
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00110000 to 00111111. */
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 01000000 to 01001111. */
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 01010000 to 01011111. */
63 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 01100000 to 01101111. */
64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 01110000 to 01111111. */
65 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10000000 to 10001111. */
66 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10010000 to 10011111. */
67 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10100000 to 10101111. */
68 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10110000 to 10111111. */
69 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 11000000 to 11001111. */
70 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 11010000 to 11011111. */
71 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, /* 11100000 to 11101111. */
72 #ifdef USE_6_BYTES_CHAR
73 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0 /* 11110000 to 11111111. */
74 #else
75 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0 /* 11110000 to 11111111. */
76 #endif /* USE_6_BYTES_CHAR */
79 #define FC_UTF8_CHAR_SIZE(utf8_char) \
80 fc_utf8_char_size[*(unsigned char *) utf8_char]
82 #define FC_UTF8_REP_CHAR "\xef\xbf\xbd" /* U+FFFD. */
85 /****************************************************************************
86 Returns TRUE if the character beginning at the pointer 'utf8_char' of size
87 'size' is a valid UTF-8 character.
88 ****************************************************************************/
89 static inline bool base_fc_utf8_char_validate(const char *utf8_char,
90 char size)
92 if (1 < size) {
93 do {
94 utf8_char++;
95 if (0x80 != (0xC0 & *(unsigned char *) utf8_char)) {
96 /* Not a valid byte of the sequence. */
97 return FALSE;
99 size--;
100 } while (1 < size);
101 return TRUE;
102 } else {
103 return (1 == size);
107 /****************************************************************************
108 UTF-8-safe variant of fc_strlcpy() base function.
109 ****************************************************************************/
110 static inline size_t base_fc_utf8_strlcpy_trunc(char *dest, const char *src,
111 size_t n)
113 const char *end;
114 size_t len;
116 (void) fc_utf8_validate_len(src, n, &end);
117 len = end - src;
118 fc_assert(len < n);
119 if (0 < len) {
120 memcpy(dest, src, len);
122 dest[len] = '\0';
123 return strlen(src);
126 /****************************************************************************
127 UTF-8-safe variant of fc_strlcpy() base function.
128 ****************************************************************************/
129 static inline size_t base_fc_utf8_strlcpy_rep(char *dest, const char *src,
130 size_t n)
132 const char *end;
133 size_t src_len, len;
135 fc_assert_ret_val(NULL != src, 0);
137 src_len = strlen(src);
138 while (TRUE) {
139 if (fc_utf8_validate_len(src, n, &end)) {
140 /* Valid UTF-8. */
141 len = end - src;
143 fc_assert(len < n);
145 if (0 < len) {
146 memcpy(dest, src, len);
148 dest[len] = '\0'; /* Valid UTF-8 string part. */
149 return src_len;
150 } else {
151 /* '*end' is not a valid UTF-8 character. */
152 len = end - src;
154 fc_assert(len < n);
156 if (0 < len) {
157 memcpy(dest, src, len);
160 n -= len;
161 dest += len;
163 /* Try to insert the replacement character. */
164 len = sizeof(FC_UTF8_REP_CHAR);
165 if (n > len) {
166 memcpy(dest, FC_UTF8_REP_CHAR, len);
167 n -= len;
168 dest += len;
171 if (1 == n) {
172 *dest = '\0';
173 return src_len; /* End of 'dest' reached. */
176 /* Jump to next character in src. */
177 src = fc_utf8_find_next_char(end);
178 if ('\0' == *src) {
179 *dest = '\0';
180 return src_len; /* End of 'src' reached. */
184 fc_assert(FALSE); /* Shouldn't occur! */
185 return src_len;
189 /****************************************************************************
190 Returns TRUE if the character beginning at the pointer 'utf8_char' is
191 a valid UTF-8 character.
192 ****************************************************************************/
193 bool fc_utf8_char_validate(const char *utf8_char)
195 fc_assert_ret_val(NULL != utf8_char, FALSE);
197 return base_fc_utf8_char_validate(utf8_char, FC_UTF8_CHAR_SIZE(utf8_char));
200 /****************************************************************************
201 Jump to next UTF-8 character start.
203 NB: This function can return a invalid UTF-8 character. Check with
204 fc_utf8_char_validate() to unsure.
205 ****************************************************************************/
206 char *fc_utf8_find_next_char(const char *utf8_char)
208 fc_assert_ret_val(NULL != utf8_char, NULL);
210 do {
211 utf8_char++;
212 } while (0 == FC_UTF8_CHAR_SIZE(utf8_char));
213 return (char *) utf8_char;
216 /****************************************************************************
217 Jump to previous UTF-8 character start in the limit of the 'utf8_string'
218 pointer. If no character is found, returns 'utf8_string'.
220 NB: This function can return a invalid UTF-8 character. Check with
221 fc_utf8_char_validate() to unsure.
222 ****************************************************************************/
223 char *fc_utf8_find_prev_char(const char *utf8_char, const char *utf8_string)
225 fc_assert_ret_val(NULL != utf8_char, NULL);
227 for (utf8_char--; utf8_char > utf8_string; utf8_char--) {
228 if (0 != FC_UTF8_CHAR_SIZE(utf8_char)) {
229 return (char *) utf8_char;
232 return (char *) utf8_string;
236 /****************************************************************************
237 Returns TRUE if the string 'utf8_string' contains only valid UTF-8
238 characters. If 'end' is not NULL, the end of the valid string will be
239 stored there, even if it returns TRUE.
241 See also fc_utf8_validate_len().
242 ****************************************************************************/
243 bool fc_utf8_validate(const char *utf8_string, const char **end)
245 char size;
247 fc_assert_ret_val(NULL != utf8_string, FALSE);
249 while ('\0' != *utf8_string) {
250 size = FC_UTF8_CHAR_SIZE(utf8_string);
251 if (!base_fc_utf8_char_validate(utf8_string, size)) {
252 if (NULL != end) {
253 *end = utf8_string;
255 return FALSE;
257 utf8_string += size;
259 if (NULL != end) {
260 *end = utf8_string;
262 return TRUE;
265 /****************************************************************************
266 Returns TRUE if the string 'utf8_string' contains only valid UTF-8
267 characters in the limit of the length (in bytes) 'byte_len'. If 'end' is
268 not NULL, the end of the valid string will be stored there, even if it
269 returns TRUE.
271 See also fc_utf8_validate().
272 ****************************************************************************/
273 bool fc_utf8_validate_len(const char *utf8_string, size_t byte_len,
274 const char **end)
276 char size;
278 fc_assert_ret_val(NULL != utf8_string, FALSE);
280 while ('\0' != *utf8_string) {
281 size = FC_UTF8_CHAR_SIZE(utf8_string);
283 if (!base_fc_utf8_char_validate(utf8_string, size)) {
284 if (NULL != end) {
285 *end = utf8_string;
287 return FALSE;
290 if (size > byte_len) {
291 if (NULL != end) {
292 *end = utf8_string;
294 return FALSE;
295 } else {
296 byte_len -= size;
299 utf8_string += size;
301 if (NULL != end) {
302 *end = utf8_string;
304 return TRUE;
307 /****************************************************************************
308 Truncate the string 'utf8_string' at the first invalid UTF-8 character.
309 Returns 'utf8_string'.
311 See also fc_utf8_validate(), fc_utf8_validate_trunc_len(),
312 and fc_utf8_validate_trunc_dup().
313 ****************************************************************************/
314 char *fc_utf8_validate_trunc(char *utf8_string)
316 char *end;
318 fc_assert_ret_val(NULL != utf8_string, NULL);
320 if (!fc_utf8_validate(utf8_string, (const char **) &end)) {
321 *end = '\0';
323 return utf8_string;
326 /****************************************************************************
327 Truncate the string 'utf8_string' at the first invalid UTF-8 character in
328 the limit (in bytes) of 'byte_len'. Returns 'utf8_string'.
330 See also fc_utf8_validate_trunc(), fc_utf8_validate_trunc_dup(),
331 and fc_utf8_validate_rep_len().
332 ****************************************************************************/
333 char *fc_utf8_validate_trunc_len(char *utf8_string, size_t byte_len)
335 char *end;
337 fc_assert_ret_val(NULL != utf8_string, NULL);
339 if (!fc_utf8_validate_len(utf8_string, byte_len, (const char **) &end)) {
340 *end = '\0';
342 return utf8_string;
345 /****************************************************************************
346 Duplicate the truncation of the string 'utf8_string' at the first invalid
347 UTF-8 character.
349 See also fc_utf8_validate_trunc(), fc_utf8_validate_trunc_len(),
350 and fc_utf8_validate_rep_dup().
351 ****************************************************************************/
352 char *fc_utf8_validate_trunc_dup(const char *utf8_string)
354 const char *end;
355 size_t size;
356 char *ret;
358 fc_assert_ret_val(NULL != utf8_string, NULL);
360 (void) fc_utf8_validate(utf8_string, &end);
361 size = end - utf8_string;
362 ret = fc_malloc(size + 1); /* Keep a spot for '\0'. */
363 memcpy(ret, utf8_string, size);
364 ret[size] = '\0';
366 return ret;
369 /****************************************************************************
370 Transform 'utf8_string' with replacing all invalid characters with the
371 replacement character in the limit of 'byte_len', truncate the last
372 character. Returns 'utf8_string'.
374 See also fc_utf8_validate_len(), fc_utf8_validate_trunc(),
375 and fc_utf8_validate_rep_dup().
376 ****************************************************************************/
377 char *fc_utf8_validate_rep_len(char *utf8_string, size_t byte_len)
379 fc_assert_ret_val(NULL != utf8_string, NULL);
381 if (0 < byte_len) {
382 char copy[byte_len];
384 fc_strlcpy(copy, utf8_string, byte_len);
385 base_fc_utf8_strlcpy_rep(utf8_string, copy, byte_len);
387 return utf8_string;
390 /****************************************************************************
391 Duplicate 'utf8_string' and replace all invalid characters with the
392 replacement character.
394 See also fc_utf8_validate_rep_len(), and fc_utf8_validate_trunc_dup().
395 ****************************************************************************/
396 char *fc_utf8_validate_rep_dup(const char *utf8_string)
398 char *ret;
399 const char *utf8_char;
400 size_t size = 1; /* '\0'. */
401 char char_size;
403 fc_assert_ret_val(NULL != utf8_string, NULL);
405 /* Check needed size. */
406 utf8_char = utf8_string;
407 while ('\0' != *utf8_char) {
408 char_size = FC_UTF8_CHAR_SIZE(utf8_char);
409 if (base_fc_utf8_char_validate(utf8_char, char_size)) {
410 /* Normal valid character. */
411 size += char_size;
412 utf8_char += char_size;
413 } else {
414 /* Replacement character. */
415 size += sizeof(FC_UTF8_REP_CHAR);
416 /* Find next character. */
417 do {
418 utf8_char++;
419 } while (0 == FC_UTF8_CHAR_SIZE(utf8_char));
423 /* Do the allocation. */
424 ret = fc_malloc(size);
425 base_fc_utf8_strlcpy_rep(ret, utf8_string, size);
427 return ret;
430 /****************************************************************************
431 Returns the number of characters in the string 'utf8_string'. To know the
432 number of used bytes, used strlen() instead.
434 NB: 'utf8_string' must be UTF-8 valid (see fc_utf8_validate()), or the
435 behaviour of this function will be unknown.
436 ****************************************************************************/
437 size_t fc_utf8_strlen(const char *utf8_string)
439 size_t len;
441 fc_assert_ret_val(NULL != utf8_string, 0);
443 for (len = 0; '\0' != *utf8_string; len++) {
444 utf8_string = fc_ut8_next_char(utf8_string);
446 return len;
450 /****************************************************************************
451 This is a variant of fc_strlcpy() to unsure the result will be a valid
452 UTF-8 string. It truncates the string at the first UTF-8 invalid
453 character.
455 See also fc_strlcpy(), fc_utf8_strlcpy_rep().
456 ****************************************************************************/
457 size_t fc_utf8_strlcpy_trunc(char *dest, const char *src, size_t n)
459 fc_assert_ret_val(NULL != dest, -1);
460 fc_assert_ret_val(NULL != src, -1);
461 fc_assert_ret_val(0 < n, -1);
463 return base_fc_utf8_strlcpy_trunc(dest, src, n);
466 /****************************************************************************
467 This is a variant of fc_strlcpy() to unsure the result will be a valid
468 UTF-8 string. Unlike fc_utf8_strlcpy_trunc(), it replaces the invalid
469 characters by the replacement character, instead of truncating the string.
471 See also fc_strlcpy(), fc_utf8_strlcpy_trunc().
472 ****************************************************************************/
473 size_t fc_utf8_strlcpy_rep(char *dest, const char *src, size_t n)
475 fc_assert_ret_val(NULL != dest, -1);
476 fc_assert_ret_val(NULL != src, -1);
477 fc_assert_ret_val(0 < n, -1);
479 return base_fc_utf8_strlcpy_rep(dest, src, n);
482 /****************************************************************************
483 This is a variant of fc_strlcat() to unsure the result will be a valid
484 UTF-8 string. It truncates the string at the first UTF-8 invalid
485 character.
487 NB: This function doesn't perform anything on the already edited part of
488 the string 'dest', which can contain invalid UTF-8 characters.
490 See also fc_strlcat(), fc_utf8_strlcat_rep().
491 ****************************************************************************/
492 size_t fc_utf8_strlcat_trunc(char *dest, const char *src, size_t n)
494 size_t len;
496 fc_assert_ret_val(NULL != dest, -1);
497 fc_assert_ret_val(NULL != src, -1);
498 fc_assert_ret_val(0 < n, -1);
500 len = strlen(dest);
501 fc_assert_ret_val(len < n, -1);
502 return len + base_fc_utf8_strlcpy_trunc(dest + len, src, n - len);
505 /****************************************************************************
506 This is a variant of fc_strlcat() to unsure the result will be a valid
507 UTF-8 string. Unlike fc_utf8_strlcat_trunc(), it replaces the invalid
508 characters by the replacement character, instead of truncating the string.
510 NB: This function doesn't perform anything on the already edited part of
511 the string 'dest', which can contain invalid UTF-8 characters.
513 See also fc_strlcat(), fc_utf8_strlcat_trunc().
514 ****************************************************************************/
515 size_t fc_utf8_strlcat_rep(char *dest, const char *src, size_t n)
517 size_t len;
519 fc_assert_ret_val(NULL != dest, -1);
520 fc_assert_ret_val(NULL != src, -1);
521 fc_assert_ret_val(0 < n, -1);
523 len = strlen(dest);
524 fc_assert_ret_val(len < n, -1);
525 return len + base_fc_utf8_strlcpy_rep(dest + len, src, n - len);
528 /****************************************************************************
529 This is a variant of fc_snprintf() to unsure the result will be a valid
530 UTF-8 string. It truncates the string at the first UTF-8 invalid
531 character.
533 See also fc_snprintf(), fc_utf8_snprintf_rep().
534 ****************************************************************************/
535 int fc_utf8_snprintf_trunc(char *str, size_t n, const char *format, ...)
537 int ret;
538 va_list args;
540 va_start(args, format);
541 ret = fc_utf8_vsnprintf_trunc(str, n, format, args);
542 va_end(args);
543 return ret;
546 /****************************************************************************
547 This is a variant of fc_snprintf() to unsure the result will be a valid
548 UTF-8 string. Unlike fc_utf8_snprintf_trunc(), it replaces the invalid
549 characters by the replacement character, instead of truncating the string.
551 See also fc_snprintf(), fc_utf8_snprintf_trunc().
552 ****************************************************************************/
553 int fc_utf8_snprintf_rep(char *str, size_t n, const char *format, ...)
555 int ret;
556 va_list args;
558 va_start(args, format);
559 ret = fc_utf8_vsnprintf_rep(str, n, format, args);
560 va_end(args);
561 return ret;
564 /****************************************************************************
565 This is a variant of fc_vsnprintf() to unsure the result will be a valid
566 UTF-8 string. It truncates the string at the first UTF-8 invalid
567 character.
569 See also fc_vsnprintf(), fc_utf8_vsnprintf_rep().
570 ****************************************************************************/
571 int fc_utf8_vsnprintf_trunc(char *str, size_t n, const char *format,
572 va_list args)
574 char *end;
575 int ret;
577 fc_assert_ret_val(NULL != str, -1);
578 fc_assert_ret_val(0 < n, -1);
579 fc_assert_ret_val(NULL != format, -1);
581 ret = fc_vsnprintf(str, n, format, args);
582 if (fc_utf8_validate(str, (const char **) &end)) {
583 /* Already valid UTF-8. */
584 return ret;
585 } else {
586 /* Truncate at last valid UTF-8 character. */
587 *end = '\0';
588 return (-1 == ret ? -1 : end - str);
592 /****************************************************************************
593 This is a variant of fc_vsnprintf() to unsure the result will be a valid
594 UTF-8 string. Unlike fc_utf8_vsnprintf_trunc(), it replaces the invalid
595 characters by the replacement character, instead of truncating the string.
597 See also fc_vsnprintf(), fc_utf8_vsnprintf_trunc().
598 ****************************************************************************/
599 int fc_utf8_vsnprintf_rep(char *str, size_t n, const char *format,
600 va_list args)
602 char *end;
603 int ret;
605 fc_assert_ret_val(NULL != str, -1);
606 fc_assert_ret_val(0 < n, -1);
607 fc_assert_ret_val(NULL != format, -1);
609 ret = fc_vsnprintf(str, n, format, args);
610 if (fc_utf8_validate(str, (const char **) &end)) {
611 /* Already valid UTF-8. */
612 return ret;
613 } else {
614 (void) fc_utf8_validate_rep_len(end, n - (end - str));
615 return (-1 == ret ? -1 : strlen(str));
619 /****************************************************************************
620 This is a variant of cat_snprintf() to unsure the result will be a valid
621 UTF-8 string. It truncates the string at the first UTF-8 invalid
622 character.
624 NB: This function doesn't perform anything on the already edited part of
625 the string 'str', which can contain invalid UTF-8 characters.
627 See also cat_snprintf(), cat_utf8_snprintf_rep().
628 ****************************************************************************/
629 int cat_utf8_snprintf_trunc(char *str, size_t n, const char *format, ...)
631 size_t len;
632 int ret;
633 va_list args;
635 fc_assert_ret_val(NULL != format, -1);
636 fc_assert_ret_val(NULL != str, -1);
637 fc_assert_ret_val(0 < n, -1);
639 len = strlen(str);
640 fc_assert_ret_val(len < n, -1);
642 va_start(args, format);
643 ret = fc_utf8_vsnprintf_trunc(str + len, n - len, format, args);
644 va_end(args);
645 return (-1 == ret ? -1 : ret + len);
648 /****************************************************************************
649 This is a variant of cat_snprintf() to unsure the result will be a valid
650 UTF-8 string. Unlike cat_utf8_snprintf_trunc(), it replaces the invalid
651 characters by the replacement character, instead of truncating the string.
653 NB: This function doesn't perform anything on the already edited part of
654 the string 'str', which can contain invalid UTF-8 characters.
656 See also cat_snprintf(), cat_utf8_snprintf_trunc().
657 ****************************************************************************/
658 int cat_utf8_snprintf_rep(char *str, size_t n, const char *format, ...)
660 size_t len;
661 int ret;
662 va_list args;
664 fc_assert_ret_val(NULL != format, -1);
665 fc_assert_ret_val(NULL != str, -1);
666 fc_assert_ret_val(0 < n, -1);
668 len = strlen(str);
669 fc_assert_ret_val(len < n, -1);
671 va_start(args, format);
672 ret = fc_utf8_vsnprintf_rep(str + len, n - len, format, args);
673 va_end(args);
674 return (-1 == ret ? -1 : ret + len);