xmllite: Update prefix when moving to first attribute.
[wine.git] / libs / port / mbtowc.c
blob4977c82d8b12e06fdfa918c6f3ec2fb6b9db0a45
1 /*
2 * MultiByteToWideChar implementation
4 * Copyright 2000 Alexandre Julliard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #include <string.h>
23 #include "wine/unicode.h"
25 extern unsigned int wine_decompose( WCHAR ch, WCHAR *dst, unsigned int dstlen ) DECLSPEC_HIDDEN;
27 /* check the code whether it is in Unicode Private Use Area (PUA). */
28 /* MB_ERR_INVALID_CHARS raises an error converting from 1-byte character to PUA. */
29 static inline int is_private_use_area_char(WCHAR code)
31 return (code >= 0xe000 && code <= 0xf8ff);
34 /* check src string for invalid chars; return non-zero if invalid char found */
35 static inline int check_invalid_chars_sbcs( const struct sbcs_table *table, int flags,
36 const unsigned char *src, unsigned int srclen )
38 const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
39 const WCHAR def_unicode_char = table->info.def_unicode_char;
40 const unsigned char def_char = table->uni2cp_low[table->uni2cp_high[def_unicode_char >> 8]
41 + (def_unicode_char & 0xff)];
42 while (srclen)
44 if ((cp2uni[*src] == def_unicode_char && *src != def_char) ||
45 is_private_use_area_char(cp2uni[*src])) break;
46 src++;
47 srclen--;
49 return srclen;
52 /* mbstowcs for single-byte code page */
53 /* all lengths are in characters, not bytes */
54 static inline int mbstowcs_sbcs( const struct sbcs_table *table, int flags,
55 const unsigned char *src, unsigned int srclen,
56 WCHAR *dst, unsigned int dstlen )
58 const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
59 int ret = srclen;
61 if (dstlen < srclen)
63 /* buffer too small: fill it up to dstlen and return error */
64 srclen = dstlen;
65 ret = -1;
68 for (;;)
70 switch(srclen)
72 default:
73 case 16: dst[15] = cp2uni[src[15]];
74 case 15: dst[14] = cp2uni[src[14]];
75 case 14: dst[13] = cp2uni[src[13]];
76 case 13: dst[12] = cp2uni[src[12]];
77 case 12: dst[11] = cp2uni[src[11]];
78 case 11: dst[10] = cp2uni[src[10]];
79 case 10: dst[9] = cp2uni[src[9]];
80 case 9: dst[8] = cp2uni[src[8]];
81 case 8: dst[7] = cp2uni[src[7]];
82 case 7: dst[6] = cp2uni[src[6]];
83 case 6: dst[5] = cp2uni[src[5]];
84 case 5: dst[4] = cp2uni[src[4]];
85 case 4: dst[3] = cp2uni[src[3]];
86 case 3: dst[2] = cp2uni[src[2]];
87 case 2: dst[1] = cp2uni[src[1]];
88 case 1: dst[0] = cp2uni[src[0]];
89 case 0: break;
91 if (srclen < 16) return ret;
92 dst += 16;
93 src += 16;
94 srclen -= 16;
98 /* mbstowcs for single-byte code page with char decomposition */
99 static int mbstowcs_sbcs_decompose( const struct sbcs_table *table, int flags,
100 const unsigned char *src, unsigned int srclen,
101 WCHAR *dst, unsigned int dstlen )
103 const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
104 unsigned int len;
106 if (!dstlen) /* compute length */
108 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
109 for (len = 0; srclen; srclen--, src++)
110 len += wine_decompose( cp2uni[*src], dummy, 4 );
111 return len;
114 for (len = dstlen; srclen && len; srclen--, src++)
116 unsigned int res = wine_decompose( cp2uni[*src], dst, len );
117 if (!res) break;
118 len -= res;
119 dst += res;
121 if (srclen) return -1; /* overflow */
122 return dstlen - len;
125 /* query necessary dst length for src string */
126 static inline int get_length_dbcs( const struct dbcs_table *table,
127 const unsigned char *src, unsigned int srclen )
129 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
130 int len;
132 for (len = 0; srclen; srclen--, src++, len++)
134 if (cp2uni_lb[*src] && srclen > 1 && src[1])
136 src++;
137 srclen--;
140 return len;
143 /* check src string for invalid chars; return non-zero if invalid char found */
144 static inline int check_invalid_chars_dbcs( const struct dbcs_table *table,
145 const unsigned char *src, unsigned int srclen )
147 const WCHAR * const cp2uni = table->cp2uni;
148 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
149 const WCHAR def_unicode_char = table->info.def_unicode_char;
150 const unsigned short def_char = table->uni2cp_low[table->uni2cp_high[def_unicode_char >> 8]
151 + (def_unicode_char & 0xff)];
152 while (srclen)
154 unsigned char off = cp2uni_lb[*src];
155 if (off) /* multi-byte char */
157 if (srclen == 1) break; /* partial char, error */
158 if (cp2uni[(off << 8) + src[1]] == def_unicode_char &&
159 ((src[0] << 8) | src[1]) != def_char) break;
160 src++;
161 srclen--;
163 else if ((cp2uni[*src] == def_unicode_char && *src != def_char) ||
164 is_private_use_area_char(cp2uni[*src])) break;
165 src++;
166 srclen--;
168 return srclen;
171 /* mbstowcs for double-byte code page */
172 /* all lengths are in characters, not bytes */
173 static inline int mbstowcs_dbcs( const struct dbcs_table *table,
174 const unsigned char *src, unsigned int srclen,
175 WCHAR *dst, unsigned int dstlen )
177 const WCHAR * const cp2uni = table->cp2uni;
178 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
179 unsigned int len;
181 if (!dstlen) return get_length_dbcs( table, src, srclen );
183 for (len = dstlen; srclen && len; len--, srclen--, src++, dst++)
185 unsigned char off = cp2uni_lb[*src];
186 if (off && srclen > 1 && src[1])
188 src++;
189 srclen--;
190 *dst = cp2uni[(off << 8) + *src];
192 else *dst = cp2uni[*src];
194 if (srclen) return -1; /* overflow */
195 return dstlen - len;
199 /* mbstowcs for double-byte code page with character decomposition */
200 static int mbstowcs_dbcs_decompose( const struct dbcs_table *table,
201 const unsigned char *src, unsigned int srclen,
202 WCHAR *dst, unsigned int dstlen )
204 const WCHAR * const cp2uni = table->cp2uni;
205 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
206 unsigned int len, res;
207 WCHAR ch;
209 if (!dstlen) /* compute length */
211 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
212 for (len = 0; srclen; srclen--, src++)
214 unsigned char off = cp2uni_lb[*src];
215 if (off && srclen > 1 && src[1])
217 src++;
218 srclen--;
219 ch = cp2uni[(off << 8) + *src];
221 else ch = cp2uni[*src];
222 len += wine_decompose( ch, dummy, 4 );
224 return len;
227 for (len = dstlen; srclen && len; srclen--, src++)
229 unsigned char off = cp2uni_lb[*src];
230 if (off && srclen > 1 && src[1])
232 src++;
233 srclen--;
234 ch = cp2uni[(off << 8) + *src];
236 else ch = cp2uni[*src];
237 if (!(res = wine_decompose( ch, dst, len ))) break;
238 dst += res;
239 len -= res;
241 if (srclen) return -1; /* overflow */
242 return dstlen - len;
246 /* return -1 on dst buffer overflow, -2 on invalid input char */
247 int wine_cp_mbstowcs( const union cptable *table, int flags,
248 const char *s, int srclen,
249 WCHAR *dst, int dstlen )
251 const unsigned char *src = (const unsigned char*) s;
253 if (table->info.char_size == 1)
255 if (flags & MB_ERR_INVALID_CHARS)
257 if (check_invalid_chars_sbcs( &table->sbcs, flags, src, srclen )) return -2;
259 if (!(flags & MB_COMPOSITE))
261 if (!dstlen) return srclen;
262 return mbstowcs_sbcs( &table->sbcs, flags, src, srclen, dst, dstlen );
264 return mbstowcs_sbcs_decompose( &table->sbcs, flags, src, srclen, dst, dstlen );
266 else /* mbcs */
268 if (flags & MB_ERR_INVALID_CHARS)
270 if (check_invalid_chars_dbcs( &table->dbcs, src, srclen )) return -2;
272 if (!(flags & MB_COMPOSITE))
273 return mbstowcs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
274 else
275 return mbstowcs_dbcs_decompose( &table->dbcs, src, srclen, dst, dstlen );