2 * WideCharToMultiByte implementation
4 * Copyright 2000 Alexandre Julliard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 #include "wine/unicode.h"
26 /* search for a character in the unicode_compose_table; helper for compose() */
27 static inline int binary_search( WCHAR ch
, int low
, int high
)
29 extern const WCHAR unicode_compose_table
[];
32 int pos
= (low
+ high
) / 2;
33 if (unicode_compose_table
[2*pos
] < ch
)
38 if (unicode_compose_table
[2*pos
] > ch
)
48 /* return the result of the composition of two Unicode chars, or 0 if none */
49 static WCHAR
compose( const WCHAR
*str
)
51 extern const WCHAR unicode_compose_table
[];
52 extern const unsigned int unicode_compose_table_size
;
54 int idx
= 1, low
= 0, high
= unicode_compose_table_size
- 1;
57 int pos
= binary_search( str
[idx
], low
, high
);
58 if (pos
== -1) return 0;
59 if (!idx
--) return unicode_compose_table
[2*pos
+1];
60 low
= unicode_compose_table
[2*pos
+1];
61 high
= unicode_compose_table
[2*pos
+3] - 1;
66 /****************************************************************/
69 /* check if 'ch' is an acceptable sbcs mapping for 'wch' */
70 static inline int is_valid_sbcs_mapping( const struct sbcs_table
*table
, int flags
,
71 WCHAR wch
, unsigned char ch
)
73 if (flags
& WC_NO_BEST_FIT_CHARS
) return (table
->cp2uni
[ch
] == wch
);
74 if (ch
!= (unsigned char)table
->info
.def_char
) return 1;
75 return (wch
== table
->info
.def_unicode_char
);
78 /* query necessary dst length for src string */
79 static int get_length_sbcs( const struct sbcs_table
*table
, int flags
,
80 const WCHAR
*src
, unsigned int srclen
, int *used
)
82 const unsigned char * const uni2cp_low
= table
->uni2cp_low
;
83 const unsigned short * const uni2cp_high
= table
->uni2cp_high
;
87 if (!used
) used
= &tmp
; /* avoid checking on every char */
90 for (ret
= 0; srclen
; ret
++, src
++, srclen
--)
95 if ((flags
& WC_COMPOSITECHECK
) && (srclen
> 1) && (composed
= compose(src
)))
97 /* now check if we can use the composed char */
98 ch
= uni2cp_low
[uni2cp_high
[composed
>> 8] + (composed
& 0xff)];
99 if (is_valid_sbcs_mapping( table
, flags
, composed
, ch
))
101 /* we have a good mapping, use it */
106 /* no mapping for the composed char, check the other flags */
107 if (flags
& WC_DEFAULTCHAR
) /* use the default char instead */
110 src
++; /* skip the non-spacing char */
114 if (flags
& WC_DISCARDNS
) /* skip the second char of the composition */
119 /* WC_SEPCHARS is the default */
123 ch
= uni2cp_low
[uni2cp_high
[wch
>> 8] + (wch
& 0xff)];
124 *used
= !is_valid_sbcs_mapping( table
, flags
, wch
, ch
);
130 /* wcstombs for single-byte code page */
131 static inline int wcstombs_sbcs( const struct sbcs_table
*table
,
132 const WCHAR
*src
, unsigned int srclen
,
133 char *dst
, unsigned int dstlen
)
135 const unsigned char * const uni2cp_low
= table
->uni2cp_low
;
136 const unsigned short * const uni2cp_high
= table
->uni2cp_high
;
141 /* buffer too small: fill it up to dstlen and return error */
151 case 16: dst
[15] = uni2cp_low
[uni2cp_high
[src
[15] >> 8] + (src
[15] & 0xff)];
152 case 15: dst
[14] = uni2cp_low
[uni2cp_high
[src
[14] >> 8] + (src
[14] & 0xff)];
153 case 14: dst
[13] = uni2cp_low
[uni2cp_high
[src
[13] >> 8] + (src
[13] & 0xff)];
154 case 13: dst
[12] = uni2cp_low
[uni2cp_high
[src
[12] >> 8] + (src
[12] & 0xff)];
155 case 12: dst
[11] = uni2cp_low
[uni2cp_high
[src
[11] >> 8] + (src
[11] & 0xff)];
156 case 11: dst
[10] = uni2cp_low
[uni2cp_high
[src
[10] >> 8] + (src
[10] & 0xff)];
157 case 10: dst
[9] = uni2cp_low
[uni2cp_high
[src
[9] >> 8] + (src
[9] & 0xff)];
158 case 9: dst
[8] = uni2cp_low
[uni2cp_high
[src
[8] >> 8] + (src
[8] & 0xff)];
159 case 8: dst
[7] = uni2cp_low
[uni2cp_high
[src
[7] >> 8] + (src
[7] & 0xff)];
160 case 7: dst
[6] = uni2cp_low
[uni2cp_high
[src
[6] >> 8] + (src
[6] & 0xff)];
161 case 6: dst
[5] = uni2cp_low
[uni2cp_high
[src
[5] >> 8] + (src
[5] & 0xff)];
162 case 5: dst
[4] = uni2cp_low
[uni2cp_high
[src
[4] >> 8] + (src
[4] & 0xff)];
163 case 4: dst
[3] = uni2cp_low
[uni2cp_high
[src
[3] >> 8] + (src
[3] & 0xff)];
164 case 3: dst
[2] = uni2cp_low
[uni2cp_high
[src
[2] >> 8] + (src
[2] & 0xff)];
165 case 2: dst
[1] = uni2cp_low
[uni2cp_high
[src
[1] >> 8] + (src
[1] & 0xff)];
166 case 1: dst
[0] = uni2cp_low
[uni2cp_high
[src
[0] >> 8] + (src
[0] & 0xff)];
169 if (srclen
< 16) return ret
;
176 /* slow version of wcstombs_sbcs that handles the various flags */
177 static int wcstombs_sbcs_slow( const struct sbcs_table
*table
, int flags
,
178 const WCHAR
*src
, unsigned int srclen
,
179 char *dst
, unsigned int dstlen
,
180 const char *defchar
, int *used
)
182 const unsigned char * const uni2cp_low
= table
->uni2cp_low
;
183 const unsigned short * const uni2cp_high
= table
->uni2cp_high
;
184 const unsigned char table_default
= table
->info
.def_char
& 0xff;
189 if (!defchar
) defchar
= &table_default
;
190 if (!used
) used
= &tmp
; /* avoid checking on every char */
193 for (len
= dstlen
; srclen
&& len
; dst
++, len
--, src
++, srclen
--)
197 if ((flags
& WC_COMPOSITECHECK
) && (srclen
> 1) && (composed
= compose(src
)))
199 /* now check if we can use the composed char */
200 *dst
= uni2cp_low
[uni2cp_high
[composed
>> 8] + (composed
& 0xff)];
201 if (is_valid_sbcs_mapping( table
, flags
, composed
, *dst
))
203 /* we have a good mapping, use it */
208 /* no mapping for the composed char, check the other flags */
209 if (flags
& WC_DEFAULTCHAR
) /* use the default char instead */
213 src
++; /* skip the non-spacing char */
217 if (flags
& WC_DISCARDNS
) /* skip the second char of the composition */
222 /* WC_SEPCHARS is the default */
225 *dst
= uni2cp_low
[uni2cp_high
[wch
>> 8] + (wch
& 0xff)];
226 if (!is_valid_sbcs_mapping( table
, flags
, wch
, *dst
))
232 if (srclen
) return -1; /* overflow */
237 /****************************************************************/
240 /* check if 'ch' is an acceptable dbcs mapping for 'wch' */
241 static inline int is_valid_dbcs_mapping( const struct dbcs_table
*table
, int flags
,
242 WCHAR wch
, unsigned short ch
)
244 if (ch
== table
->info
.def_char
&& wch
!= table
->info
.def_unicode_char
) return 0;
245 if (flags
& WC_NO_BEST_FIT_CHARS
)
247 /* check if char maps back to the same Unicode value */
250 unsigned char off
= table
->cp2uni_leadbytes
[ch
>> 8];
251 return (table
->cp2uni
[(off
<< 8) + (ch
& 0xff)] == wch
);
253 return (table
->cp2uni
[ch
& 0xff] == wch
);
258 /* query necessary dst length for src string */
259 static int get_length_dbcs( const struct dbcs_table
*table
, int flags
,
260 const WCHAR
*src
, unsigned int srclen
,
261 const char *defchar
, int *used
)
263 const unsigned short * const uni2cp_low
= table
->uni2cp_low
;
264 const unsigned short * const uni2cp_high
= table
->uni2cp_high
;
265 WCHAR defchar_value
= table
->info
.def_char
;
269 if (!defchar
&& !used
&& !(flags
& WC_COMPOSITECHECK
))
271 for (len
= 0; srclen
; srclen
--, src
++, len
++)
273 if (uni2cp_low
[uni2cp_high
[*src
>> 8] + (*src
& 0xff)] & 0xff00) len
++;
278 if (defchar
) defchar_value
= defchar
[1] ? ((defchar
[0] << 8) | defchar
[1]) : defchar
[0];
279 if (!used
) used
= &tmp
; /* avoid checking on every char */
281 for (len
= 0; srclen
; len
++, srclen
--, src
++)
286 if ((flags
& WC_COMPOSITECHECK
) && (srclen
> 1) && (composed
= compose(src
)))
288 /* now check if we can use the composed char */
289 res
= uni2cp_low
[uni2cp_high
[composed
>> 8] + (composed
& 0xff)];
291 if (is_valid_dbcs_mapping( table
, flags
, composed
, res
))
293 /* we have a good mapping for the composed char, use it */
294 if (res
& 0xff00) len
++;
299 /* no mapping for the composed char, check the other flags */
300 if (flags
& WC_DEFAULTCHAR
) /* use the default char instead */
302 if (defchar_value
& 0xff00) len
++;
304 src
++; /* skip the non-spacing char */
308 if (flags
& WC_DISCARDNS
) /* skip the second char of the composition */
313 /* WC_SEPCHARS is the default */
316 res
= uni2cp_low
[uni2cp_high
[wch
>> 8] + (wch
& 0xff)];
317 if (!is_valid_dbcs_mapping( table
, flags
, wch
, res
))
322 if (res
& 0xff00) len
++;
327 /* wcstombs for double-byte code page */
328 static inline int wcstombs_dbcs( const struct dbcs_table
*table
,
329 const WCHAR
*src
, unsigned int srclen
,
330 char *dst
, unsigned int dstlen
)
332 const unsigned short * const uni2cp_low
= table
->uni2cp_low
;
333 const unsigned short * const uni2cp_high
= table
->uni2cp_high
;
336 for (len
= dstlen
; srclen
&& len
; len
--, srclen
--, src
++)
338 unsigned short res
= uni2cp_low
[uni2cp_high
[*src
>> 8] + (*src
& 0xff)];
341 if (len
== 1) break; /* do not output a partial char */
347 if (srclen
) return -1; /* overflow */
351 /* slow version of wcstombs_dbcs that handles the various flags */
352 static int wcstombs_dbcs_slow( const struct dbcs_table
*table
, int flags
,
353 const WCHAR
*src
, unsigned int srclen
,
354 char *dst
, unsigned int dstlen
,
355 const char *defchar
, int *used
)
357 const unsigned short * const uni2cp_low
= table
->uni2cp_low
;
358 const unsigned short * const uni2cp_high
= table
->uni2cp_high
;
359 WCHAR defchar_value
= table
->info
.def_char
;
363 if (defchar
) defchar_value
= defchar
[1] ? ((defchar
[0] << 8) | defchar
[1]) : defchar
[0];
364 if (!used
) used
= &tmp
; /* avoid checking on every char */
367 for (len
= dstlen
; srclen
&& len
; len
--, srclen
--, src
++)
372 if ((flags
& WC_COMPOSITECHECK
) && (srclen
> 1) && (composed
= compose(src
)))
374 /* now check if we can use the composed char */
375 res
= uni2cp_low
[uni2cp_high
[composed
>> 8] + (composed
& 0xff)];
377 if (is_valid_dbcs_mapping( table
, flags
, composed
, res
))
379 /* we have a good mapping for the composed char, use it */
384 /* no mapping for the composed char, check the other flags */
385 if (flags
& WC_DEFAULTCHAR
) /* use the default char instead */
389 src
++; /* skip the non-spacing char */
393 if (flags
& WC_DISCARDNS
) /* skip the second char of the composition */
398 /* WC_SEPCHARS is the default */
401 res
= uni2cp_low
[uni2cp_high
[wch
>> 8] + (wch
& 0xff)];
402 if (!is_valid_dbcs_mapping( table
, flags
, wch
, res
))
411 if (len
== 1) break; /* do not output a partial char */
417 if (srclen
) return -1; /* overflow */
421 /* wide char to multi byte string conversion */
422 /* return -1 on dst buffer overflow */
423 int cp_wcstombs( const union cptable
*table
, int flags
,
424 const WCHAR
*src
, int srclen
,
425 char *dst
, int dstlen
, const char *defchar
, int *used
)
427 if (table
->info
.char_size
== 1)
429 if (flags
|| defchar
|| used
)
431 if (!dstlen
) return get_length_sbcs( &table
->sbcs
, flags
, src
, srclen
, used
);
432 return wcstombs_sbcs_slow( &table
->sbcs
, flags
, src
, srclen
,
433 dst
, dstlen
, defchar
, used
);
435 if (!dstlen
) return srclen
;
436 return wcstombs_sbcs( &table
->sbcs
, src
, srclen
, dst
, dstlen
);
440 if (!dstlen
) return get_length_dbcs( &table
->dbcs
, flags
, src
, srclen
, defchar
, used
);
441 if (flags
|| defchar
|| used
)
442 return wcstombs_dbcs_slow( &table
->dbcs
, flags
, src
, srclen
,
443 dst
, dstlen
, defchar
, used
);
444 return wcstombs_dbcs( &table
->dbcs
, src
, srclen
, dst
, dstlen
);