Added ability to turn on/off debug channels.
[wine.git] / unicode / mbtowc.c
blob71581a8ebb8dec90602645348928054d0bf738dc
1 /*
2 * MultiByteToWideChar implementation
4 * Copyright 2000 Alexandre Julliard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #include <string.h>
23 #include "winnls.h"
24 #include "wine/unicode.h"
26 /* get the decomposition of a Unicode char */
27 static int get_decomposition( WCHAR src, WCHAR *dst, unsigned int dstlen )
29 extern const WCHAR unicode_decompose_table[];
30 const WCHAR *ptr = unicode_decompose_table;
31 int res;
33 *dst = src;
34 ptr = unicode_decompose_table + ptr[src >> 8];
35 ptr = unicode_decompose_table + ptr[(src >> 4) & 0x0f] + 2 * (src & 0x0f);
36 if (!*ptr) return 1;
37 if (dstlen <= 1) return 0;
38 /* apply the decomposition recursively to the first char */
39 if ((res = get_decomposition( *ptr, dst, dstlen-1 ))) dst[res++] = ptr[1];
40 return res;
43 /* check src string for invalid chars; return non-zero if invalid char found */
44 static inline int check_invalid_chars_sbcs( const struct sbcs_table *table,
45 const unsigned char *src, unsigned int srclen )
47 const WCHAR * const cp2uni = table->cp2uni;
48 while (srclen)
50 if (cp2uni[*src] == table->info.def_unicode_char && *src != table->info.def_char)
51 break;
52 src++;
53 srclen--;
55 return srclen;
58 /* mbstowcs for single-byte code page */
59 /* all lengths are in characters, not bytes */
60 static inline int mbstowcs_sbcs( const struct sbcs_table *table,
61 const unsigned char *src, unsigned int srclen,
62 WCHAR *dst, unsigned int dstlen )
64 const WCHAR * const cp2uni = table->cp2uni;
65 int ret = srclen;
67 if (dstlen < srclen)
69 /* buffer too small: fill it up to dstlen and return error */
70 srclen = dstlen;
71 ret = -1;
74 for (;;)
76 switch(srclen)
78 default:
79 case 16: dst[15] = cp2uni[src[15]];
80 case 15: dst[14] = cp2uni[src[14]];
81 case 14: dst[13] = cp2uni[src[13]];
82 case 13: dst[12] = cp2uni[src[12]];
83 case 12: dst[11] = cp2uni[src[11]];
84 case 11: dst[10] = cp2uni[src[10]];
85 case 10: dst[9] = cp2uni[src[9]];
86 case 9: dst[8] = cp2uni[src[8]];
87 case 8: dst[7] = cp2uni[src[7]];
88 case 7: dst[6] = cp2uni[src[6]];
89 case 6: dst[5] = cp2uni[src[5]];
90 case 5: dst[4] = cp2uni[src[4]];
91 case 4: dst[3] = cp2uni[src[3]];
92 case 3: dst[2] = cp2uni[src[2]];
93 case 2: dst[1] = cp2uni[src[1]];
94 case 1: dst[0] = cp2uni[src[0]];
95 case 0: break;
97 if (srclen < 16) return ret;
98 dst += 16;
99 src += 16;
100 srclen -= 16;
104 /* mbstowcs for single-byte code page with char decomposition */
105 static int mbstowcs_sbcs_decompose( const struct sbcs_table *table,
106 const unsigned char *src, unsigned int srclen,
107 WCHAR *dst, unsigned int dstlen )
109 const WCHAR * const cp2uni = table->cp2uni;
110 unsigned int len;
112 if (!dstlen) /* compute length */
114 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
115 for (len = 0; srclen; srclen--, src++)
116 len += get_decomposition( cp2uni[*src], dummy, 4 );
117 return len;
120 for (len = dstlen; srclen && len; srclen--, src++)
122 int res = get_decomposition( cp2uni[*src], dst, len );
123 if (!res) break;
124 len -= res;
125 dst += res;
127 if (srclen) return -1; /* overflow */
128 return dstlen - len;
131 /* query necessary dst length for src string */
132 static inline int get_length_dbcs( const struct dbcs_table *table,
133 const unsigned char *src, unsigned int srclen )
135 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
136 int len;
138 for (len = 0; srclen; srclen--, src++, len++)
140 if (cp2uni_lb[*src])
142 if (!--srclen) break; /* partial char, ignore it */
143 src++;
146 return len;
149 /* check src string for invalid chars; return non-zero if invalid char found */
150 static inline int check_invalid_chars_dbcs( const struct dbcs_table *table,
151 const unsigned char *src, unsigned int srclen )
153 const WCHAR * const cp2uni = table->cp2uni;
154 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
156 while (srclen)
158 unsigned char off = cp2uni_lb[*src];
159 if (off) /* multi-byte char */
161 if (srclen == 1) break; /* partial char, error */
162 if (cp2uni[(off << 8) + src[1]] == table->info.def_unicode_char &&
163 ((src[0] << 8) | src[1]) != table->info.def_char) break;
164 src++;
165 srclen--;
167 else if (cp2uni[*src] == table->info.def_unicode_char &&
168 *src != table->info.def_char) break;
169 src++;
170 srclen--;
172 return srclen;
175 /* mbstowcs for double-byte code page */
176 /* all lengths are in characters, not bytes */
177 static inline int mbstowcs_dbcs( const struct dbcs_table *table,
178 const unsigned char *src, unsigned int srclen,
179 WCHAR *dst, unsigned int dstlen )
181 const WCHAR * const cp2uni = table->cp2uni;
182 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
183 unsigned int len;
185 if (!dstlen) return get_length_dbcs( table, src, srclen );
187 for (len = dstlen; srclen && len; len--, srclen--, src++, dst++)
189 unsigned char off = cp2uni_lb[*src];
190 if (off)
192 if (!--srclen) break; /* partial char, ignore it */
193 src++;
194 *dst = cp2uni[(off << 8) + *src];
196 else *dst = cp2uni[*src];
198 if (srclen) return -1; /* overflow */
199 return dstlen - len;
203 /* mbstowcs for double-byte code page with character decomposition */
204 static int mbstowcs_dbcs_decompose( const struct dbcs_table *table,
205 const unsigned char *src, unsigned int srclen,
206 WCHAR *dst, unsigned int dstlen )
208 const WCHAR * const cp2uni = table->cp2uni;
209 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
210 unsigned int len;
211 WCHAR ch;
212 int res;
214 if (!dstlen) /* compute length */
216 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
217 for (len = 0; srclen; srclen--, src++)
219 unsigned char off = cp2uni_lb[*src];
220 if (off)
222 if (!--srclen) break; /* partial char, ignore it */
223 src++;
224 ch = cp2uni[(off << 8) + *src];
226 else ch = cp2uni[*src];
227 len += get_decomposition( ch, dummy, 4 );
229 return len;
232 for (len = dstlen; srclen && len; srclen--, src++)
234 unsigned char off = cp2uni_lb[*src];
235 if (off)
237 if (!--srclen) break; /* partial char, ignore it */
238 src++;
239 ch = cp2uni[(off << 8) + *src];
241 else ch = cp2uni[*src];
242 if (!(res = get_decomposition( ch, dst, len ))) break;
243 dst += res;
244 len -= res;
246 if (srclen) return -1; /* overflow */
247 return dstlen - len;
251 /* return -1 on dst buffer overflow, -2 on invalid input char */
252 int cp_mbstowcs( const union cptable *table, int flags,
253 const char *src, int srclen,
254 WCHAR *dst, int dstlen )
256 if (table->info.char_size == 1)
258 if (flags & MB_ERR_INVALID_CHARS)
260 if (check_invalid_chars_sbcs( &table->sbcs, src, srclen )) return -2;
262 if (!(flags & MB_COMPOSITE))
264 if (!dstlen) return srclen;
265 return mbstowcs_sbcs( &table->sbcs, src, srclen, dst, dstlen );
267 return mbstowcs_sbcs_decompose( &table->sbcs, src, srclen, dst, dstlen );
269 else /* mbcs */
271 if (flags & MB_ERR_INVALID_CHARS)
273 if (check_invalid_chars_dbcs( &table->dbcs, src, srclen )) return -2;
275 if (!(flags & MB_COMPOSITE))
276 return mbstowcs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
277 else
278 return mbstowcs_dbcs_decompose( &table->dbcs, src, srclen, dst, dstlen );