unistr/u{8,16,32}-uctomb: Avoid possible trouble with huge strings.
[gnulib.git] / lib / mbrtowc.c
bloba71d3943034aac56a45f8de5768b50d6ec50067a
1 /* Convert multibyte character to wide character.
2 Copyright (C) 1999-2002, 2005-2020 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2008.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <https://www.gnu.org/licenses/>. */
18 #include <config.h>
20 /* Specification. */
21 #include <wchar.h>
23 #if GNULIB_defined_mbstate_t
24 /* Implement mbrtowc() on top of mbtowc() for the non-UTF-8 locales
25 and directly for the UTF-8 locales. */
27 # include <errno.h>
28 # include <stdint.h>
29 # include <stdlib.h>
31 # if defined _WIN32 && !defined __CYGWIN__
33 # define WIN32_LEAN_AND_MEAN /* avoid including junk */
34 # include <windows.h>
36 # elif HAVE_PTHREAD_API
38 # include <pthread.h>
39 # if HAVE_THREADS_H && HAVE_WEAK_SYMBOLS
40 # include <threads.h>
41 # pragma weak thrd_exit
42 # define c11_threads_in_use() (thrd_exit != NULL)
43 # else
44 # define c11_threads_in_use() 0
45 # endif
47 # elif HAVE_THREADS_H
49 # include <threads.h>
51 # endif
53 # include "attribute.h"
54 # include "verify.h"
55 # include "lc-charset-dispatch.h"
56 # include "mbtowc-lock.h"
58 verify (sizeof (mbstate_t) >= 4);
59 static char internal_state[4];
61 size_t
62 mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
64 # define FITS_IN_CHAR_TYPE(wc) ((wc) <= WCHAR_MAX)
65 # include "mbrtowc-impl.h"
68 #else
69 /* Override the system's mbrtowc() function. */
71 # if MBRTOWC_IN_C_LOCALE_MAYBE_EILSEQ
72 # include "hard-locale.h"
73 # include <locale.h>
74 # endif
76 # undef mbrtowc
78 size_t
79 rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
81 size_t ret;
82 wchar_t wc;
84 # if MBRTOWC_NULL_ARG2_BUG || MBRTOWC_RETVAL_BUG || MBRTOWC_EMPTY_INPUT_BUG
85 if (s == NULL)
87 pwc = NULL;
88 s = "";
89 n = 1;
91 # endif
93 # if MBRTOWC_EMPTY_INPUT_BUG
94 if (n == 0)
95 return (size_t) -2;
96 # endif
98 if (! pwc)
99 pwc = &wc;
101 # if MBRTOWC_RETVAL_BUG
103 static mbstate_t internal_state;
105 /* Override mbrtowc's internal state. We cannot call mbsinit() on the
106 hidden internal state, but we can call it on our variable. */
107 if (ps == NULL)
108 ps = &internal_state;
110 if (!mbsinit (ps))
112 /* Parse the rest of the multibyte character byte for byte. */
113 size_t count = 0;
114 for (; n > 0; s++, n--)
116 ret = mbrtowc (&wc, s, 1, ps);
118 if (ret == (size_t)(-1))
119 return (size_t)(-1);
120 count++;
121 if (ret != (size_t)(-2))
123 /* The multibyte character has been completed. */
124 *pwc = wc;
125 return (wc == 0 ? 0 : count);
128 return (size_t)(-2);
131 # endif
133 # if MBRTOWC_STORES_INCOMPLETE_BUG
134 ret = mbrtowc (&wc, s, n, ps);
135 if (ret < (size_t) -2 && pwc != NULL)
136 *pwc = wc;
137 # else
138 ret = mbrtowc (pwc, s, n, ps);
139 # endif
141 # if MBRTOWC_NUL_RETVAL_BUG
142 if (ret < (size_t) -2 && !*pwc)
143 return 0;
144 # endif
146 # if MBRTOWC_IN_C_LOCALE_MAYBE_EILSEQ
147 if ((size_t) -2 <= ret && n != 0 && ! hard_locale (LC_CTYPE))
149 unsigned char uc = *s;
150 *pwc = uc;
151 return 1;
153 # endif
155 return ret;
158 #endif