Don't call strtok twice if it failed the first time.
[wine.git] / unicode / wctomb.c
blobe80c253b89e9ee336bcfe5d4cd9fe44bf457dd71
1 /*
2 * WideCharToMultiByte implementation
4 * Copyright 2000 Alexandre Julliard
5 */
7 #include <string.h>
9 #include "winnls.h"
10 #include "wine/unicode.h"
12 /* search for a character in the unicode_compose_table; helper for compose() */
13 static inline int binary_search( WCHAR ch, int low, int high )
15 extern const WCHAR unicode_compose_table[];
16 while (low <= high)
18 int pos = (low + high) / 2;
19 if (unicode_compose_table[2*pos] < ch)
21 low = pos + 1;
22 continue;
24 if (unicode_compose_table[2*pos] > ch)
26 high = pos - 1;
27 continue;
29 return pos;
31 return -1;
34 /* return the result of the composition of two Unicode chars, or 0 if none */
35 static WCHAR compose( const WCHAR *str )
37 extern const WCHAR unicode_compose_table[];
38 extern const unsigned int unicode_compose_table_size;
40 int idx = 1, low = 0, high = unicode_compose_table_size - 1;
41 for (;;)
43 int pos = binary_search( str[idx], low, high );
44 if (pos == -1) return 0;
45 if (!idx--) return unicode_compose_table[2*pos+1];
46 low = unicode_compose_table[2*pos+1];
47 high = unicode_compose_table[2*pos+3] - 1;
52 /****************************************************************/
53 /* sbcs support */
55 /* check if 'ch' is an acceptable sbcs mapping for 'wch' */
56 static inline int is_valid_sbcs_mapping( const struct sbcs_table *table, int flags,
57 WCHAR wch, unsigned char ch )
59 if (flags & WC_NO_BEST_FIT_CHARS) return (table->cp2uni[ch] == wch);
60 if (ch != (unsigned char)table->info.def_char) return 1;
61 return (wch == table->info.def_unicode_char);
64 /* query necessary dst length for src string */
65 static inline int get_length_sbcs( const struct sbcs_table *table, int flags,
66 const WCHAR *src, unsigned int srclen )
68 unsigned int ret = srclen;
70 if (flags & WC_COMPOSITECHECK)
72 const unsigned char * const uni2cp_low = table->uni2cp_low;
73 const unsigned short * const uni2cp_high = table->uni2cp_high;
74 WCHAR composed;
76 for (ret = 0; srclen > 1; ret++, srclen--, src++)
78 if (!(composed = compose(src))) continue;
79 /* check if we should skip the next char */
81 /* in WC_DEFAULTCHAR and WC_DISCARDNS mode, we always skip */
82 /* the next char no matter if the composition is valid or not */
83 if (!(flags & (WC_DEFAULTCHAR|WC_DISCARDNS)))
85 unsigned char ch = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
86 if (!is_valid_sbcs_mapping( table, flags, composed, ch )) continue;
88 src++;
89 srclen--;
91 if (srclen) ret++; /* last char */
93 return ret;
96 /* wcstombs for single-byte code page */
97 static inline int wcstombs_sbcs( const struct sbcs_table *table,
98 const WCHAR *src, unsigned int srclen,
99 char *dst, unsigned int dstlen )
101 const unsigned char * const uni2cp_low = table->uni2cp_low;
102 const unsigned short * const uni2cp_high = table->uni2cp_high;
103 int ret = srclen;
105 if (dstlen < srclen)
107 /* buffer too small: fill it up to dstlen and return error */
108 srclen = dstlen;
109 ret = -1;
112 for (;;)
114 switch(srclen)
116 default:
117 case 16: dst[15] = uni2cp_low[uni2cp_high[src[15] >> 8] + (src[15] & 0xff)];
118 case 15: dst[14] = uni2cp_low[uni2cp_high[src[14] >> 8] + (src[14] & 0xff)];
119 case 14: dst[13] = uni2cp_low[uni2cp_high[src[13] >> 8] + (src[13] & 0xff)];
120 case 13: dst[12] = uni2cp_low[uni2cp_high[src[12] >> 8] + (src[12] & 0xff)];
121 case 12: dst[11] = uni2cp_low[uni2cp_high[src[11] >> 8] + (src[11] & 0xff)];
122 case 11: dst[10] = uni2cp_low[uni2cp_high[src[10] >> 8] + (src[10] & 0xff)];
123 case 10: dst[9] = uni2cp_low[uni2cp_high[src[9] >> 8] + (src[9] & 0xff)];
124 case 9: dst[8] = uni2cp_low[uni2cp_high[src[8] >> 8] + (src[8] & 0xff)];
125 case 8: dst[7] = uni2cp_low[uni2cp_high[src[7] >> 8] + (src[7] & 0xff)];
126 case 7: dst[6] = uni2cp_low[uni2cp_high[src[6] >> 8] + (src[6] & 0xff)];
127 case 6: dst[5] = uni2cp_low[uni2cp_high[src[5] >> 8] + (src[5] & 0xff)];
128 case 5: dst[4] = uni2cp_low[uni2cp_high[src[4] >> 8] + (src[4] & 0xff)];
129 case 4: dst[3] = uni2cp_low[uni2cp_high[src[3] >> 8] + (src[3] & 0xff)];
130 case 3: dst[2] = uni2cp_low[uni2cp_high[src[2] >> 8] + (src[2] & 0xff)];
131 case 2: dst[1] = uni2cp_low[uni2cp_high[src[1] >> 8] + (src[1] & 0xff)];
132 case 1: dst[0] = uni2cp_low[uni2cp_high[src[0] >> 8] + (src[0] & 0xff)];
133 case 0: break;
135 if (srclen < 16) return ret;
136 dst += 16;
137 src += 16;
138 srclen -= 16;
142 /* slow version of wcstombs_sbcs that handles the various flags */
143 static int wcstombs_sbcs_slow( const struct sbcs_table *table, int flags,
144 const WCHAR *src, unsigned int srclen,
145 char *dst, unsigned int dstlen,
146 const char *defchar, int *used )
148 const unsigned char * const uni2cp_low = table->uni2cp_low;
149 const unsigned short * const uni2cp_high = table->uni2cp_high;
150 const unsigned char table_default = table->info.def_char & 0xff;
151 unsigned int len;
152 int tmp;
153 WCHAR composed;
155 if (!defchar) defchar = &table_default;
156 if (!used) used = &tmp; /* avoid checking on every char */
158 for (len = dstlen; srclen && len; dst++, len--, src++, srclen--)
160 WCHAR wch = *src;
162 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
164 /* now check if we can use the composed char */
165 *dst = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
166 if (is_valid_sbcs_mapping( table, flags, composed, *dst ))
168 /* we have a good mapping, use it */
169 src++;
170 srclen--;
171 continue;
173 /* no mapping for the composed char, check the other flags */
174 if (flags & WC_DEFAULTCHAR) /* use the default char instead */
176 *dst = *defchar;
177 *used = 1;
178 src++; /* skip the non-spacing char */
179 srclen--;
180 continue;
182 if (flags & WC_DISCARDNS) /* skip the second char of the composition */
184 src++;
185 srclen--;
187 /* WC_SEPCHARS is the default */
190 *dst = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
191 if (!is_valid_sbcs_mapping( table, flags, wch, *dst ))
193 *dst = *defchar;
194 *used = 1;
197 if (srclen) return -1; /* overflow */
198 return dstlen - len;
202 /****************************************************************/
203 /* dbcs support */
205 /* check if 'ch' is an acceptable dbcs mapping for 'wch' */
206 static inline int is_valid_dbcs_mapping( const struct dbcs_table *table, int flags,
207 WCHAR wch, unsigned short ch )
209 if (ch == table->info.def_char && wch != table->info.def_unicode_char) return 0;
210 if (flags & WC_NO_BEST_FIT_CHARS)
212 /* check if char maps back to the same Unicode value */
213 if (ch & 0xff00)
215 unsigned char off = table->cp2uni_leadbytes[ch >> 8];
216 return (table->cp2uni[(off << 8) + (ch & 0xff)] == wch);
218 return (table->cp2uni[ch & 0xff] == wch);
220 return 1;
223 /* query necessary dst length for src string */
224 static int get_length_dbcs( const struct dbcs_table *table, int flags,
225 const WCHAR *src, unsigned int srclen,
226 const char *defchar )
228 const unsigned short * const uni2cp_low = table->uni2cp_low;
229 const unsigned short * const uni2cp_high = table->uni2cp_high;
230 WCHAR defchar_value = table->info.def_char;
231 WCHAR composed;
232 int len;
234 if (!defchar && !(flags & WC_COMPOSITECHECK))
236 for (len = 0; srclen; srclen--, src++, len++)
238 if (uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)] & 0xff00) len++;
240 return len;
243 if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0];
244 for (len = 0; srclen; len++, srclen--, src++)
246 unsigned short res;
247 WCHAR wch = *src;
249 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
251 /* now check if we can use the composed char */
252 res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
254 if (is_valid_dbcs_mapping( table, flags, composed, res ))
256 /* we have a good mapping for the composed char, use it */
257 if (res & 0xff00) len++;
258 src++;
259 srclen--;
260 continue;
262 /* no mapping for the composed char, check the other flags */
263 if (flags & WC_DEFAULTCHAR) /* use the default char instead */
265 if (defchar_value & 0xff00) len++;
266 src++; /* skip the non-spacing char */
267 srclen--;
268 continue;
270 if (flags & WC_DISCARDNS) /* skip the second char of the composition */
272 src++;
273 srclen--;
275 /* WC_SEPCHARS is the default */
278 res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
279 if (!is_valid_dbcs_mapping( table, flags, wch, res )) res = defchar_value;
280 if (res & 0xff00) len++;
282 return len;
285 /* wcstombs for double-byte code page */
286 static inline int wcstombs_dbcs( const struct dbcs_table *table,
287 const WCHAR *src, unsigned int srclen,
288 char *dst, unsigned int dstlen )
290 const unsigned short * const uni2cp_low = table->uni2cp_low;
291 const unsigned short * const uni2cp_high = table->uni2cp_high;
292 int len;
294 for (len = dstlen; srclen && len; len--, srclen--, src++)
296 unsigned short res = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];
297 if (res & 0xff00)
299 if (len == 1) break; /* do not output a partial char */
300 len--;
301 *dst++ = res >> 8;
303 *dst++ = (char)res;
305 if (srclen) return -1; /* overflow */
306 return dstlen - len;
309 /* slow version of wcstombs_dbcs that handles the various flags */
310 static int wcstombs_dbcs_slow( const struct dbcs_table *table, int flags,
311 const WCHAR *src, unsigned int srclen,
312 char *dst, unsigned int dstlen,
313 const char *defchar, int *used )
315 const unsigned short * const uni2cp_low = table->uni2cp_low;
316 const unsigned short * const uni2cp_high = table->uni2cp_high;
317 WCHAR defchar_value = table->info.def_char;
318 WCHAR composed;
319 int len, tmp;
321 if (defchar) defchar_value = defchar[1] ? ((defchar[0] << 8) | defchar[1]) : defchar[0];
322 if (!used) used = &tmp; /* avoid checking on every char */
324 for (len = dstlen; srclen && len; len--, srclen--, src++)
326 unsigned short res;
327 WCHAR wch = *src;
329 if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = compose(src)))
331 /* now check if we can use the composed char */
332 res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
334 if (is_valid_dbcs_mapping( table, flags, composed, res ))
336 /* we have a good mapping for the composed char, use it */
337 src++;
338 srclen--;
339 goto output_char;
341 /* no mapping for the composed char, check the other flags */
342 if (flags & WC_DEFAULTCHAR) /* use the default char instead */
344 res = defchar_value;
345 *used = 1;
346 src++; /* skip the non-spacing char */
347 srclen--;
348 goto output_char;
350 if (flags & WC_DISCARDNS) /* skip the second char of the composition */
352 src++;
353 srclen--;
355 /* WC_SEPCHARS is the default */
358 res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
359 if (!is_valid_dbcs_mapping( table, flags, wch, res ))
361 res = defchar_value;
362 *used = 1;
365 output_char:
366 if (res & 0xff00)
368 if (len == 1) break; /* do not output a partial char */
369 len--;
370 *dst++ = res >> 8;
372 *dst++ = (char)res;
374 if (srclen) return -1; /* overflow */
375 return dstlen - len;
378 /* wide char to multi byte string conversion */
379 /* return -1 on dst buffer overflow */
380 int cp_wcstombs( const union cptable *table, int flags,
381 const WCHAR *src, int srclen,
382 char *dst, int dstlen, const char *defchar, int *used )
384 if (table->info.char_size == 1)
386 if (!dstlen) return get_length_sbcs( &table->sbcs, flags, src, srclen );
387 if (flags || defchar || used)
388 return wcstombs_sbcs_slow( &table->sbcs, flags, src, srclen,
389 dst, dstlen, defchar, used );
390 return wcstombs_sbcs( &table->sbcs, src, srclen, dst, dstlen );
392 else /* mbcs */
394 if (!dstlen) return get_length_dbcs( &table->dbcs, flags, src, srclen, defchar );
395 if (flags || defchar || used)
396 return wcstombs_dbcs_slow( &table->dbcs, flags, src, srclen,
397 dst, dstlen, defchar, used );
398 return wcstombs_dbcs( &table->dbcs, src, srclen, dst, dstlen );