unistr/u{8,16,32}-uctomb: Avoid possible trouble with huge strings.
[gnulib.git] / lib / c32rtomb.c
blob8e4fb56761118be75c1304f6e7bcf82158ca8f16
1 /* Convert 32-bit wide character to multibyte character.
2 Copyright (C) 2020 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2020. */
19 #include <config.h>
21 /* Specification. */
22 #include <uchar.h>
24 #include <errno.h>
25 #include <wchar.h>
27 #include "attribute.h" /* FALLTHROUGH */
28 #include "localcharset.h"
29 #include "streq.h"
31 size_t
32 c32rtomb (char *s, char32_t wc, mbstate_t *ps)
33 #undef c32rtomb
35 #if HAVE_WORKING_MBRTOC32
37 # if C32RTOMB_RETVAL_BUG
38 if (s == NULL)
39 /* We know the NUL wide character corresponds to the NUL character. */
40 return 1;
41 # endif
43 return c32rtomb (s, wc, ps);
45 #elif _GL_LARGE_CHAR32_T
47 if (s == NULL)
48 return wcrtomb (NULL, 0, ps);
49 else
51 /* Special-case all encodings that may produce wide character values
52 > WCHAR_MAX. */
53 const char *encoding = locale_charset ();
54 if (STREQ_OPT (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0, 0))
56 /* Special-case the UTF-8 encoding. Assume that the wide-character
57 encoding in a UTF-8 locale is UCS-2 or, equivalently, UTF-16. */
58 if (wc < 0x80)
60 s[0] = (unsigned char) wc;
61 return 1;
63 else
65 int count;
67 if (wc < 0x800)
68 count = 2;
69 else if (wc < 0x10000)
71 if (wc < 0xd800 || wc >= 0xe000)
72 count = 3;
73 else
75 errno = EILSEQ;
76 return (size_t)(-1);
79 else if (wc < 0x110000)
80 count = 4;
81 else
83 errno = EILSEQ;
84 return (size_t)(-1);
87 switch (count) /* note: code falls through cases! */
89 case 4: s[3] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x10000;
90 FALLTHROUGH;
91 case 3: s[2] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0x800;
92 FALLTHROUGH;
93 case 2: s[1] = 0x80 | (wc & 0x3f); wc = wc >> 6; wc |= 0xc0;
94 /*case 1:*/ s[0] = wc;
96 return count;
99 else
101 if ((wchar_t) wc == wc)
102 return wcrtomb (s, (wchar_t) wc, ps);
103 else
105 errno = EILSEQ;
106 return (size_t)(-1);
111 #else
113 /* char32_t and wchar_t are equivalent. */
114 return wcrtomb (s, (wchar_t) wc, ps);
116 #endif