Don't call strtok twice if it failed the first time.
[wine.git] / unicode / mbtowc.c
blob9f6407b2f5ff954b07660f0d7b2bacd7693dbc84
1 /*
2 * MultiByteToWideChar implementation
4 * Copyright 2000 Alexandre Julliard
5 */
7 #include <string.h>
9 #include "winnls.h"
10 #include "wine/unicode.h"
12 /* get the decomposition of a Unicode char */
13 static int get_decomposition( WCHAR src, WCHAR *dst, unsigned int dstlen )
15 extern const WCHAR unicode_decompose_table[];
16 const WCHAR *ptr = unicode_decompose_table;
17 int res;
19 *dst = src;
20 ptr = unicode_decompose_table + ptr[src >> 8];
21 ptr = unicode_decompose_table + ptr[(src >> 4) & 0x0f] + 2 * (src & 0x0f);
22 if (!*ptr) return 1;
23 if (dstlen <= 1) return 0;
24 /* apply the decomposition recursively to the first char */
25 if ((res = get_decomposition( *ptr, dst, dstlen-1 ))) dst[res++] = ptr[1];
26 return res;
29 /* check src string for invalid chars; return non-zero if invalid char found */
30 static inline int check_invalid_chars_sbcs( const struct sbcs_table *table,
31 const unsigned char *src, unsigned int srclen )
33 const WCHAR * const cp2uni = table->cp2uni;
34 while (srclen)
36 if (cp2uni[*src] == table->info.def_unicode_char && *src != table->info.def_char)
37 break;
38 src++;
39 srclen--;
41 return srclen;
44 /* mbstowcs for single-byte code page */
45 /* all lengths are in characters, not bytes */
46 static inline int mbstowcs_sbcs( const struct sbcs_table *table,
47 const unsigned char *src, unsigned int srclen,
48 WCHAR *dst, unsigned int dstlen )
50 const WCHAR * const cp2uni = table->cp2uni;
51 int ret = srclen;
53 if (dstlen < srclen)
55 /* buffer too small: fill it up to dstlen and return error */
56 srclen = dstlen;
57 ret = -1;
60 for (;;)
62 switch(srclen)
64 default:
65 case 16: dst[15] = cp2uni[src[15]];
66 case 15: dst[14] = cp2uni[src[14]];
67 case 14: dst[13] = cp2uni[src[13]];
68 case 13: dst[12] = cp2uni[src[12]];
69 case 12: dst[11] = cp2uni[src[11]];
70 case 11: dst[10] = cp2uni[src[10]];
71 case 10: dst[9] = cp2uni[src[9]];
72 case 9: dst[8] = cp2uni[src[8]];
73 case 8: dst[7] = cp2uni[src[7]];
74 case 7: dst[6] = cp2uni[src[6]];
75 case 6: dst[5] = cp2uni[src[5]];
76 case 5: dst[4] = cp2uni[src[4]];
77 case 4: dst[3] = cp2uni[src[3]];
78 case 3: dst[2] = cp2uni[src[2]];
79 case 2: dst[1] = cp2uni[src[1]];
80 case 1: dst[0] = cp2uni[src[0]];
81 case 0: break;
83 if (srclen < 16) return ret;
84 dst += 16;
85 src += 16;
86 srclen -= 16;
90 /* mbstowcs for single-byte code page with char decomposition */
91 static int mbstowcs_sbcs_decompose( const struct sbcs_table *table,
92 const unsigned char *src, unsigned int srclen,
93 WCHAR *dst, unsigned int dstlen )
95 const WCHAR * const cp2uni = table->cp2uni;
96 unsigned int len;
98 if (!dstlen) /* compute length */
100 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
101 for (len = 0; srclen; srclen--, src++)
102 len += get_decomposition( cp2uni[*src], dummy, 4 );
103 return len;
106 for (len = dstlen; srclen && len; srclen--, src++)
108 int res = get_decomposition( cp2uni[*src], dst, len );
109 if (!res) break;
110 len -= res;
111 dst += res;
113 if (srclen) return -1; /* overflow */
114 return dstlen - len;
117 /* query necessary dst length for src string */
118 static inline int get_length_dbcs( const struct dbcs_table *table,
119 const unsigned char *src, unsigned int srclen )
121 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
122 int len;
124 for (len = 0; srclen; srclen--, src++, len++)
126 if (cp2uni_lb[*src])
128 if (!--srclen) break; /* partial char, ignore it */
129 src++;
132 return len;
135 /* check src string for invalid chars; return non-zero if invalid char found */
136 static inline int check_invalid_chars_dbcs( const struct dbcs_table *table,
137 const unsigned char *src, unsigned int srclen )
139 const WCHAR * const cp2uni = table->cp2uni;
140 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
142 while (srclen)
144 unsigned char off = cp2uni_lb[*src];
145 if (off) /* multi-byte char */
147 if (srclen == 1) break; /* partial char, error */
148 if (cp2uni[(off << 8) + src[1]] == table->info.def_unicode_char &&
149 ((src[0] << 8) | src[1]) != table->info.def_char) break;
150 src++;
151 srclen--;
153 else if (cp2uni[*src] == table->info.def_unicode_char &&
154 *src != table->info.def_char) break;
155 src++;
156 srclen--;
158 return srclen;
161 /* mbstowcs for double-byte code page */
162 /* all lengths are in characters, not bytes */
163 static inline int mbstowcs_dbcs( const struct dbcs_table *table,
164 const unsigned char *src, unsigned int srclen,
165 WCHAR *dst, unsigned int dstlen )
167 const WCHAR * const cp2uni = table->cp2uni;
168 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
169 unsigned int len;
171 if (!dstlen) return get_length_dbcs( table, src, srclen );
173 for (len = dstlen; srclen && len; len--, srclen--, src++, dst++)
175 unsigned char off = cp2uni_lb[*src];
176 if (off)
178 if (!--srclen) break; /* partial char, ignore it */
179 src++;
180 *dst = cp2uni[(off << 8) + *src];
182 else *dst = cp2uni[*src];
184 if (srclen) return -1; /* overflow */
185 return dstlen - len;
189 /* mbstowcs for double-byte code page with character decomposition */
190 static int mbstowcs_dbcs_decompose( const struct dbcs_table *table,
191 const unsigned char *src, unsigned int srclen,
192 WCHAR *dst, unsigned int dstlen )
194 const WCHAR * const cp2uni = table->cp2uni;
195 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
196 unsigned int len;
197 WCHAR ch;
198 int res;
200 if (!dstlen) /* compute length */
202 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
203 for (len = 0; srclen; srclen--, src++)
205 unsigned char off = cp2uni_lb[*src];
206 if (off)
208 if (!--srclen) break; /* partial char, ignore it */
209 src++;
210 ch = cp2uni[(off << 8) + *src];
212 else ch = cp2uni[*src];
213 len += get_decomposition( ch, dummy, 4 );
215 return len;
218 for (len = dstlen; srclen && len; srclen--, src++)
220 unsigned char off = cp2uni_lb[*src];
221 if (off)
223 if (!--srclen) break; /* partial char, ignore it */
224 src++;
225 ch = cp2uni[(off << 8) + *src];
227 else ch = cp2uni[*src];
228 if (!(res = get_decomposition( ch, dst, len ))) break;
229 dst += res;
230 len -= res;
232 if (srclen) return -1; /* overflow */
233 return dstlen - len;
237 /* return -1 on dst buffer overflow, -2 on invalid input char */
238 int cp_mbstowcs( const union cptable *table, int flags,
239 const char *src, int srclen,
240 WCHAR *dst, int dstlen )
242 if (table->info.char_size == 1)
244 if (flags & MB_ERR_INVALID_CHARS)
246 if (check_invalid_chars_sbcs( &table->sbcs, src, srclen )) return -2;
248 if (!(flags & MB_COMPOSITE))
250 if (!dstlen) return srclen;
251 return mbstowcs_sbcs( &table->sbcs, src, srclen, dst, dstlen );
253 return mbstowcs_sbcs_decompose( &table->sbcs, src, srclen, dst, dstlen );
255 else /* mbcs */
257 if (flags & MB_ERR_INVALID_CHARS)
259 if (check_invalid_chars_dbcs( &table->dbcs, src, srclen )) return -2;
261 if (!(flags & MB_COMPOSITE))
262 return mbstowcs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
263 else
264 return mbstowcs_dbcs_decompose( &table->dbcs, src, srclen, dst, dstlen );