2 * WideCharToMultiByte implementation
4 * Copyright 2000 Alexandre Julliard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
23 #include "wine/unicode.h"
25 /* search for a character in the unicode_compose_table; helper for compose() */
26 static inline int binary_search( WCHAR ch
, int low
, int high
)
28 extern const WCHAR unicode_compose_table
[];
31 int pos
= (low
+ high
) / 2;
32 if (unicode_compose_table
[2*pos
] < ch
)
37 if (unicode_compose_table
[2*pos
] > ch
)
47 /* return the result of the composition of two Unicode chars, or 0 if none */
48 WCHAR
compose( const WCHAR
*str
)
50 extern const WCHAR unicode_compose_table
[];
51 extern const unsigned int unicode_compose_table_size
;
53 int idx
= 1, low
= 0, high
= unicode_compose_table_size
- 1;
56 int pos
= binary_search( str
[idx
], low
, high
);
57 if (pos
== -1) return 0;
58 if (!idx
--) return unicode_compose_table
[2*pos
+1];
59 low
= unicode_compose_table
[2*pos
+1];
60 high
= unicode_compose_table
[2*pos
+3] - 1;
65 /****************************************************************/
68 /* check if 'ch' is an acceptable sbcs mapping for 'wch' */
69 static inline int is_valid_sbcs_mapping( const struct sbcs_table
*table
, int flags
,
70 WCHAR wch
, unsigned char ch
)
72 if ((flags
& WC_NO_BEST_FIT_CHARS
) || ch
== (unsigned char)table
->info
.def_char
)
73 return (table
->cp2uni
[ch
] == wch
);
77 /* query necessary dst length for src string */
78 static int get_length_sbcs( const struct sbcs_table
*table
, int flags
,
79 const WCHAR
*src
, unsigned int srclen
, int *used
)
81 const unsigned char * const uni2cp_low
= table
->uni2cp_low
;
82 const unsigned short * const uni2cp_high
= table
->uni2cp_high
;
86 if (!used
) used
= &tmp
; /* avoid checking on every char */
89 for (ret
= 0; srclen
; ret
++, src
++, srclen
--)
94 if ((flags
& WC_COMPOSITECHECK
) && (srclen
> 1) && (composed
= compose(src
)))
96 /* now check if we can use the composed char */
97 ch
= uni2cp_low
[uni2cp_high
[composed
>> 8] + (composed
& 0xff)];
98 if (is_valid_sbcs_mapping( table
, flags
, composed
, ch
))
100 /* we have a good mapping, use it */
105 /* no mapping for the composed char, check the other flags */
106 if (flags
& WC_DEFAULTCHAR
) /* use the default char instead */
109 src
++; /* skip the non-spacing char */
113 if (flags
& WC_DISCARDNS
) /* skip the second char of the composition */
118 /* WC_SEPCHARS is the default */
122 ch
= uni2cp_low
[uni2cp_high
[wch
>> 8] + (wch
& 0xff)];
123 *used
= !is_valid_sbcs_mapping( table
, flags
, wch
, ch
);
129 /* wcstombs for single-byte code page */
130 static inline int wcstombs_sbcs( const struct sbcs_table
*table
,
131 const WCHAR
*src
, unsigned int srclen
,
132 char *dst
, unsigned int dstlen
)
134 const unsigned char * const uni2cp_low
= table
->uni2cp_low
;
135 const unsigned short * const uni2cp_high
= table
->uni2cp_high
;
140 /* buffer too small: fill it up to dstlen and return error */
147 dst
[0] = uni2cp_low
[uni2cp_high
[src
[0] >> 8] + (src
[0] & 0xff)];
148 dst
[1] = uni2cp_low
[uni2cp_high
[src
[1] >> 8] + (src
[1] & 0xff)];
149 dst
[2] = uni2cp_low
[uni2cp_high
[src
[2] >> 8] + (src
[2] & 0xff)];
150 dst
[3] = uni2cp_low
[uni2cp_high
[src
[3] >> 8] + (src
[3] & 0xff)];
151 dst
[4] = uni2cp_low
[uni2cp_high
[src
[4] >> 8] + (src
[4] & 0xff)];
152 dst
[5] = uni2cp_low
[uni2cp_high
[src
[5] >> 8] + (src
[5] & 0xff)];
153 dst
[6] = uni2cp_low
[uni2cp_high
[src
[6] >> 8] + (src
[6] & 0xff)];
154 dst
[7] = uni2cp_low
[uni2cp_high
[src
[7] >> 8] + (src
[7] & 0xff)];
155 dst
[8] = uni2cp_low
[uni2cp_high
[src
[8] >> 8] + (src
[8] & 0xff)];
156 dst
[9] = uni2cp_low
[uni2cp_high
[src
[9] >> 8] + (src
[9] & 0xff)];
157 dst
[10] = uni2cp_low
[uni2cp_high
[src
[10] >> 8] + (src
[10] & 0xff)];
158 dst
[11] = uni2cp_low
[uni2cp_high
[src
[11] >> 8] + (src
[11] & 0xff)];
159 dst
[12] = uni2cp_low
[uni2cp_high
[src
[12] >> 8] + (src
[12] & 0xff)];
160 dst
[13] = uni2cp_low
[uni2cp_high
[src
[13] >> 8] + (src
[13] & 0xff)];
161 dst
[14] = uni2cp_low
[uni2cp_high
[src
[14] >> 8] + (src
[14] & 0xff)];
162 dst
[15] = uni2cp_low
[uni2cp_high
[src
[15] >> 8] + (src
[15] & 0xff)];
168 /* now handle remaining characters */
173 case 15: dst
[-15] = uni2cp_low
[uni2cp_high
[src
[-15] >> 8] + (src
[-15] & 0xff)];
174 case 14: dst
[-14] = uni2cp_low
[uni2cp_high
[src
[-14] >> 8] + (src
[-14] & 0xff)];
175 case 13: dst
[-13] = uni2cp_low
[uni2cp_high
[src
[-13] >> 8] + (src
[-13] & 0xff)];
176 case 12: dst
[-12] = uni2cp_low
[uni2cp_high
[src
[-12] >> 8] + (src
[-12] & 0xff)];
177 case 11: dst
[-11] = uni2cp_low
[uni2cp_high
[src
[-11] >> 8] + (src
[-11] & 0xff)];
178 case 10: dst
[-10] = uni2cp_low
[uni2cp_high
[src
[-10] >> 8] + (src
[-10] & 0xff)];
179 case 9: dst
[-9] = uni2cp_low
[uni2cp_high
[src
[-9] >> 8] + (src
[-9] & 0xff)];
180 case 8: dst
[-8] = uni2cp_low
[uni2cp_high
[src
[-8] >> 8] + (src
[-8] & 0xff)];
181 case 7: dst
[-7] = uni2cp_low
[uni2cp_high
[src
[-7] >> 8] + (src
[-7] & 0xff)];
182 case 6: dst
[-6] = uni2cp_low
[uni2cp_high
[src
[-6] >> 8] + (src
[-6] & 0xff)];
183 case 5: dst
[-5] = uni2cp_low
[uni2cp_high
[src
[-5] >> 8] + (src
[-5] & 0xff)];
184 case 4: dst
[-4] = uni2cp_low
[uni2cp_high
[src
[-4] >> 8] + (src
[-4] & 0xff)];
185 case 3: dst
[-3] = uni2cp_low
[uni2cp_high
[src
[-3] >> 8] + (src
[-3] & 0xff)];
186 case 2: dst
[-2] = uni2cp_low
[uni2cp_high
[src
[-2] >> 8] + (src
[-2] & 0xff)];
187 case 1: dst
[-1] = uni2cp_low
[uni2cp_high
[src
[-1] >> 8] + (src
[-1] & 0xff)];
193 /* slow version of wcstombs_sbcs that handles the various flags */
194 static int wcstombs_sbcs_slow( const struct sbcs_table
*table
, int flags
,
195 const WCHAR
*src
, unsigned int srclen
,
196 char *dst
, unsigned int dstlen
,
197 const char *defchar
, int *used
)
199 const unsigned char * const uni2cp_low
= table
->uni2cp_low
;
200 const unsigned short * const uni2cp_high
= table
->uni2cp_high
;
207 def
= table
->info
.def_char
& 0xff;
211 if (!used
) used
= &tmp
; /* avoid checking on every char */
214 for (len
= dstlen
; srclen
&& len
; dst
++, len
--, src
++, srclen
--)
218 if ((flags
& WC_COMPOSITECHECK
) && (srclen
> 1) && (composed
= compose(src
)))
220 /* now check if we can use the composed char */
221 *dst
= uni2cp_low
[uni2cp_high
[composed
>> 8] + (composed
& 0xff)];
222 if (is_valid_sbcs_mapping( table
, flags
, composed
, *dst
))
224 /* we have a good mapping, use it */
229 /* no mapping for the composed char, check the other flags */
230 if (flags
& WC_DEFAULTCHAR
) /* use the default char instead */
234 src
++; /* skip the non-spacing char */
238 if (flags
& WC_DISCARDNS
) /* skip the second char of the composition */
243 /* WC_SEPCHARS is the default */
246 *dst
= uni2cp_low
[uni2cp_high
[wch
>> 8] + (wch
& 0xff)];
247 if (!is_valid_sbcs_mapping( table
, flags
, wch
, *dst
))
253 if (srclen
) return -1; /* overflow */
258 /****************************************************************/
261 /* check if 'ch' is an acceptable dbcs mapping for 'wch' */
262 static inline int is_valid_dbcs_mapping( const struct dbcs_table
*table
, int flags
,
263 WCHAR wch
, unsigned short ch
)
265 if ((flags
& WC_NO_BEST_FIT_CHARS
) || ch
== table
->info
.def_char
)
267 /* check if char maps back to the same Unicode value */
270 unsigned char off
= table
->cp2uni_leadbytes
[ch
>> 8];
271 return (table
->cp2uni
[(off
<< 8) + (ch
& 0xff)] == wch
);
273 return (table
->cp2uni
[ch
& 0xff] == wch
);
278 /* compute the default char for the dbcs case */
279 static inline WCHAR
get_defchar_dbcs( const struct dbcs_table
*table
, const char *defchar
)
281 if (!defchar
) return table
->info
.def_char
;
282 if (!defchar
[1]) return (unsigned char)defchar
[0];
283 return ((unsigned char)defchar
[0] << 8) | (unsigned char)defchar
[1];
286 /* query necessary dst length for src string */
287 static int get_length_dbcs( const struct dbcs_table
*table
, int flags
,
288 const WCHAR
*src
, unsigned int srclen
,
289 const char *defchar
, int *used
)
291 const unsigned short * const uni2cp_low
= table
->uni2cp_low
;
292 const unsigned short * const uni2cp_high
= table
->uni2cp_high
;
293 WCHAR defchar_value
, composed
;
296 if (!defchar
&& !used
&& !(flags
& WC_COMPOSITECHECK
))
298 for (len
= 0; srclen
; srclen
--, src
++, len
++)
300 if (uni2cp_low
[uni2cp_high
[*src
>> 8] + (*src
& 0xff)] & 0xff00) len
++;
305 defchar_value
= get_defchar_dbcs( table
, defchar
);
306 if (!used
) used
= &tmp
; /* avoid checking on every char */
308 for (len
= 0; srclen
; len
++, srclen
--, src
++)
313 if ((flags
& WC_COMPOSITECHECK
) && (srclen
> 1) && (composed
= compose(src
)))
315 /* now check if we can use the composed char */
316 res
= uni2cp_low
[uni2cp_high
[composed
>> 8] + (composed
& 0xff)];
318 if (is_valid_dbcs_mapping( table
, flags
, composed
, res
))
320 /* we have a good mapping for the composed char, use it */
321 if (res
& 0xff00) len
++;
326 /* no mapping for the composed char, check the other flags */
327 if (flags
& WC_DEFAULTCHAR
) /* use the default char instead */
329 if (defchar_value
& 0xff00) len
++;
331 src
++; /* skip the non-spacing char */
335 if (flags
& WC_DISCARDNS
) /* skip the second char of the composition */
340 /* WC_SEPCHARS is the default */
343 res
= uni2cp_low
[uni2cp_high
[wch
>> 8] + (wch
& 0xff)];
344 if (!is_valid_dbcs_mapping( table
, flags
, wch
, res
))
349 if (res
& 0xff00) len
++;
354 /* wcstombs for double-byte code page */
355 static inline int wcstombs_dbcs( const struct dbcs_table
*table
,
356 const WCHAR
*src
, unsigned int srclen
,
357 char *dst
, unsigned int dstlen
)
359 const unsigned short * const uni2cp_low
= table
->uni2cp_low
;
360 const unsigned short * const uni2cp_high
= table
->uni2cp_high
;
363 for (len
= dstlen
; srclen
&& len
; len
--, srclen
--, src
++)
365 unsigned short res
= uni2cp_low
[uni2cp_high
[*src
>> 8] + (*src
& 0xff)];
368 if (len
== 1) break; /* do not output a partial char */
374 if (srclen
) return -1; /* overflow */
378 /* slow version of wcstombs_dbcs that handles the various flags */
379 static int wcstombs_dbcs_slow( const struct dbcs_table
*table
, int flags
,
380 const WCHAR
*src
, unsigned int srclen
,
381 char *dst
, unsigned int dstlen
,
382 const char *defchar
, int *used
)
384 const unsigned short * const uni2cp_low
= table
->uni2cp_low
;
385 const unsigned short * const uni2cp_high
= table
->uni2cp_high
;
386 WCHAR defchar_value
= get_defchar_dbcs( table
, defchar
);
390 if (!used
) used
= &tmp
; /* avoid checking on every char */
393 for (len
= dstlen
; srclen
&& len
; len
--, srclen
--, src
++)
398 if ((flags
& WC_COMPOSITECHECK
) && (srclen
> 1) && (composed
= compose(src
)))
400 /* now check if we can use the composed char */
401 res
= uni2cp_low
[uni2cp_high
[composed
>> 8] + (composed
& 0xff)];
403 if (is_valid_dbcs_mapping( table
, flags
, composed
, res
))
405 /* we have a good mapping for the composed char, use it */
410 /* no mapping for the composed char, check the other flags */
411 if (flags
& WC_DEFAULTCHAR
) /* use the default char instead */
415 src
++; /* skip the non-spacing char */
419 if (flags
& WC_DISCARDNS
) /* skip the second char of the composition */
424 /* WC_SEPCHARS is the default */
427 res
= uni2cp_low
[uni2cp_high
[wch
>> 8] + (wch
& 0xff)];
428 if (!is_valid_dbcs_mapping( table
, flags
, wch
, res
))
437 if (len
== 1) break; /* do not output a partial char */
443 if (srclen
) return -1; /* overflow */
447 /* wide char to multi byte string conversion */
448 /* return -1 on dst buffer overflow */
449 int wine_cp_wcstombs( const union cptable
*table
, int flags
,
450 const WCHAR
*src
, int srclen
,
451 char *dst
, int dstlen
, const char *defchar
, int *used
)
453 if (table
->info
.char_size
== 1)
455 if (flags
|| defchar
|| used
)
457 if (!dstlen
) return get_length_sbcs( &table
->sbcs
, flags
, src
, srclen
, used
);
458 return wcstombs_sbcs_slow( &table
->sbcs
, flags
, src
, srclen
,
459 dst
, dstlen
, defchar
, used
);
461 if (!dstlen
) return srclen
;
462 return wcstombs_sbcs( &table
->sbcs
, src
, srclen
, dst
, dstlen
);
466 if (!dstlen
) return get_length_dbcs( &table
->dbcs
, flags
, src
, srclen
, defchar
, used
);
467 if (flags
|| defchar
|| used
)
468 return wcstombs_dbcs_slow( &table
->dbcs
, flags
, src
, srclen
,
469 dst
, dstlen
, defchar
, used
);
470 return wcstombs_dbcs( &table
->dbcs
, src
, srclen
, dst
, dstlen
);
474 /* CP_SYMBOL implementation */
475 /* return -1 on dst buffer overflow, -2 on invalid character */
476 int wine_cpsymbol_wcstombs( const WCHAR
*src
, int srclen
, char *dst
, int dstlen
)
479 if( dstlen
== 0) return srclen
;
480 len
= dstlen
> srclen
? srclen
: dstlen
;
481 for( i
= 0; i
< len
; i
++)
486 else if( w
>= 0xf020 && w
< 0xf100)
491 if( srclen
> len
) return -1;