2 * MultiByteToWideChar implementation
4 * Copyright 2000 Alexandre Julliard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
23 #include "wine/unicode.h"
25 extern unsigned int wine_decompose( WCHAR ch
, WCHAR
*dst
, unsigned int dstlen
) DECLSPEC_HIDDEN
;
27 /* check the code whether it is in Unicode Private Use Area (PUA). */
28 /* MB_ERR_INVALID_CHARS raises an error converting from 1-byte character to PUA. */
29 static inline int is_private_use_area_char(WCHAR code
)
31 return (code
>= 0xe000 && code
<= 0xf8ff);
34 /* check src string for invalid chars; return non-zero if invalid char found */
35 static inline int check_invalid_chars_sbcs( const struct sbcs_table
*table
, int flags
,
36 const unsigned char *src
, unsigned int srclen
)
38 const WCHAR
* const cp2uni
= (flags
& MB_USEGLYPHCHARS
) ? table
->cp2uni_glyphs
: table
->cp2uni
;
39 const WCHAR def_unicode_char
= table
->info
.def_unicode_char
;
40 const unsigned char def_char
= table
->uni2cp_low
[table
->uni2cp_high
[def_unicode_char
>> 8]
41 + (def_unicode_char
& 0xff)];
44 if ((cp2uni
[*src
] == def_unicode_char
&& *src
!= def_char
) ||
45 is_private_use_area_char(cp2uni
[*src
])) break;
52 /* mbstowcs for single-byte code page */
53 /* all lengths are in characters, not bytes */
54 static inline int mbstowcs_sbcs( const struct sbcs_table
*table
, int flags
,
55 const unsigned char *src
, unsigned int srclen
,
56 WCHAR
*dst
, unsigned int dstlen
)
58 const WCHAR
* const cp2uni
= (flags
& MB_USEGLYPHCHARS
) ? table
->cp2uni_glyphs
: table
->cp2uni
;
63 /* buffer too small: fill it up to dstlen and return error */
73 case 16: dst
[15] = cp2uni
[src
[15]];
74 case 15: dst
[14] = cp2uni
[src
[14]];
75 case 14: dst
[13] = cp2uni
[src
[13]];
76 case 13: dst
[12] = cp2uni
[src
[12]];
77 case 12: dst
[11] = cp2uni
[src
[11]];
78 case 11: dst
[10] = cp2uni
[src
[10]];
79 case 10: dst
[9] = cp2uni
[src
[9]];
80 case 9: dst
[8] = cp2uni
[src
[8]];
81 case 8: dst
[7] = cp2uni
[src
[7]];
82 case 7: dst
[6] = cp2uni
[src
[6]];
83 case 6: dst
[5] = cp2uni
[src
[5]];
84 case 5: dst
[4] = cp2uni
[src
[4]];
85 case 4: dst
[3] = cp2uni
[src
[3]];
86 case 3: dst
[2] = cp2uni
[src
[2]];
87 case 2: dst
[1] = cp2uni
[src
[1]];
88 case 1: dst
[0] = cp2uni
[src
[0]];
91 if (srclen
< 16) return ret
;
98 /* mbstowcs for single-byte code page with char decomposition */
99 static int mbstowcs_sbcs_decompose( const struct sbcs_table
*table
, int flags
,
100 const unsigned char *src
, unsigned int srclen
,
101 WCHAR
*dst
, unsigned int dstlen
)
103 const WCHAR
* const cp2uni
= (flags
& MB_USEGLYPHCHARS
) ? table
->cp2uni_glyphs
: table
->cp2uni
;
106 if (!dstlen
) /* compute length */
108 WCHAR dummy
[4]; /* no decomposition is larger than 4 chars */
109 for (len
= 0; srclen
; srclen
--, src
++)
110 len
+= wine_decompose( cp2uni
[*src
], dummy
, 4 );
114 for (len
= dstlen
; srclen
&& len
; srclen
--, src
++)
116 unsigned int res
= wine_decompose( cp2uni
[*src
], dst
, len
);
121 if (srclen
) return -1; /* overflow */
125 /* query necessary dst length for src string */
126 static inline int get_length_dbcs( const struct dbcs_table
*table
,
127 const unsigned char *src
, unsigned int srclen
)
129 const unsigned char * const cp2uni_lb
= table
->cp2uni_leadbytes
;
132 for (len
= 0; srclen
; srclen
--, src
++, len
++)
134 if (cp2uni_lb
[*src
] && srclen
> 1 && src
[1])
143 /* check src string for invalid chars; return non-zero if invalid char found */
144 static inline int check_invalid_chars_dbcs( const struct dbcs_table
*table
,
145 const unsigned char *src
, unsigned int srclen
)
147 const WCHAR
* const cp2uni
= table
->cp2uni
;
148 const unsigned char * const cp2uni_lb
= table
->cp2uni_leadbytes
;
149 const WCHAR def_unicode_char
= table
->info
.def_unicode_char
;
150 const unsigned short def_char
= table
->uni2cp_low
[table
->uni2cp_high
[def_unicode_char
>> 8]
151 + (def_unicode_char
& 0xff)];
154 unsigned char off
= cp2uni_lb
[*src
];
155 if (off
) /* multi-byte char */
157 if (srclen
== 1) break; /* partial char, error */
158 if (cp2uni
[(off
<< 8) + src
[1]] == def_unicode_char
&&
159 ((src
[0] << 8) | src
[1]) != def_char
) break;
163 else if ((cp2uni
[*src
] == def_unicode_char
&& *src
!= def_char
) ||
164 is_private_use_area_char(cp2uni
[*src
])) break;
171 /* mbstowcs for double-byte code page */
172 /* all lengths are in characters, not bytes */
173 static inline int mbstowcs_dbcs( const struct dbcs_table
*table
,
174 const unsigned char *src
, unsigned int srclen
,
175 WCHAR
*dst
, unsigned int dstlen
)
177 const WCHAR
* const cp2uni
= table
->cp2uni
;
178 const unsigned char * const cp2uni_lb
= table
->cp2uni_leadbytes
;
181 if (!dstlen
) return get_length_dbcs( table
, src
, srclen
);
183 for (len
= dstlen
; srclen
&& len
; len
--, srclen
--, src
++, dst
++)
185 unsigned char off
= cp2uni_lb
[*src
];
186 if (off
&& srclen
> 1 && src
[1])
190 *dst
= cp2uni
[(off
<< 8) + *src
];
192 else *dst
= cp2uni
[*src
];
194 if (srclen
) return -1; /* overflow */
199 /* mbstowcs for double-byte code page with character decomposition */
200 static int mbstowcs_dbcs_decompose( const struct dbcs_table
*table
,
201 const unsigned char *src
, unsigned int srclen
,
202 WCHAR
*dst
, unsigned int dstlen
)
204 const WCHAR
* const cp2uni
= table
->cp2uni
;
205 const unsigned char * const cp2uni_lb
= table
->cp2uni_leadbytes
;
206 unsigned int len
, res
;
209 if (!dstlen
) /* compute length */
211 WCHAR dummy
[4]; /* no decomposition is larger than 4 chars */
212 for (len
= 0; srclen
; srclen
--, src
++)
214 unsigned char off
= cp2uni_lb
[*src
];
215 if (off
&& srclen
> 1 && src
[1])
219 ch
= cp2uni
[(off
<< 8) + *src
];
221 else ch
= cp2uni
[*src
];
222 len
+= wine_decompose( ch
, dummy
, 4 );
227 for (len
= dstlen
; srclen
&& len
; srclen
--, src
++)
229 unsigned char off
= cp2uni_lb
[*src
];
230 if (off
&& srclen
> 1 && src
[1])
234 ch
= cp2uni
[(off
<< 8) + *src
];
236 else ch
= cp2uni
[*src
];
237 if (!(res
= wine_decompose( ch
, dst
, len
))) break;
241 if (srclen
) return -1; /* overflow */
246 /* return -1 on dst buffer overflow, -2 on invalid input char */
247 int wine_cp_mbstowcs( const union cptable
*table
, int flags
,
248 const char *s
, int srclen
,
249 WCHAR
*dst
, int dstlen
)
251 const unsigned char *src
= (const unsigned char*) s
;
253 if (table
->info
.char_size
== 1)
255 if (flags
& MB_ERR_INVALID_CHARS
)
257 if (check_invalid_chars_sbcs( &table
->sbcs
, flags
, src
, srclen
)) return -2;
259 if (!(flags
& MB_COMPOSITE
))
261 if (!dstlen
) return srclen
;
262 return mbstowcs_sbcs( &table
->sbcs
, flags
, src
, srclen
, dst
, dstlen
);
264 return mbstowcs_sbcs_decompose( &table
->sbcs
, flags
, src
, srclen
, dst
, dstlen
);
268 if (flags
& MB_ERR_INVALID_CHARS
)
270 if (check_invalid_chars_dbcs( &table
->dbcs
, src
, srclen
)) return -2;
272 if (!(flags
& MB_COMPOSITE
))
273 return mbstowcs_dbcs( &table
->dbcs
, src
, srclen
, dst
, dstlen
);
275 return mbstowcs_dbcs_decompose( &table
->dbcs
, src
, srclen
, dst
, dstlen
);