2 * MultiByteToWideChar implementation
4 * Copyright 2000 Alexandre Julliard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
23 #include "wine/unicode.h"
25 /* get the decomposition of a Unicode char */
26 static int get_decomposition( WCHAR src
, WCHAR
*dst
, unsigned int dstlen
)
28 extern const WCHAR unicode_decompose_table
[];
29 const WCHAR
*ptr
= unicode_decompose_table
;
33 ptr
= unicode_decompose_table
+ ptr
[src
>> 8];
34 ptr
= unicode_decompose_table
+ ptr
[(src
>> 4) & 0x0f] + 2 * (src
& 0x0f);
36 if (dstlen
<= 1) return 0;
37 /* apply the decomposition recursively to the first char */
38 if ((res
= get_decomposition( *ptr
, dst
, dstlen
-1 ))) dst
[res
++] = ptr
[1];
42 /* check src string for invalid chars; return non-zero if invalid char found */
43 static inline int check_invalid_chars_sbcs( const struct sbcs_table
*table
, int flags
,
44 const unsigned char *src
, unsigned int srclen
)
46 const WCHAR
* const cp2uni
= (flags
& MB_USEGLYPHCHARS
) ? table
->cp2uni_glyphs
: table
->cp2uni
;
47 const WCHAR def_unicode_char
= table
->info
.def_unicode_char
;
48 const unsigned char def_char
= table
->uni2cp_low
[table
->uni2cp_high
[def_unicode_char
>> 8]
49 + (def_unicode_char
& 0xff)];
52 if (cp2uni
[*src
] == def_unicode_char
&& *src
!= def_char
) break;
59 /* mbstowcs for single-byte code page */
60 /* all lengths are in characters, not bytes */
61 static inline int mbstowcs_sbcs( const struct sbcs_table
*table
, int flags
,
62 const unsigned char *src
, unsigned int srclen
,
63 WCHAR
*dst
, unsigned int dstlen
)
65 const WCHAR
* const cp2uni
= (flags
& MB_USEGLYPHCHARS
) ? table
->cp2uni_glyphs
: table
->cp2uni
;
70 /* buffer too small: fill it up to dstlen and return error */
80 case 16: dst
[15] = cp2uni
[src
[15]];
81 case 15: dst
[14] = cp2uni
[src
[14]];
82 case 14: dst
[13] = cp2uni
[src
[13]];
83 case 13: dst
[12] = cp2uni
[src
[12]];
84 case 12: dst
[11] = cp2uni
[src
[11]];
85 case 11: dst
[10] = cp2uni
[src
[10]];
86 case 10: dst
[9] = cp2uni
[src
[9]];
87 case 9: dst
[8] = cp2uni
[src
[8]];
88 case 8: dst
[7] = cp2uni
[src
[7]];
89 case 7: dst
[6] = cp2uni
[src
[6]];
90 case 6: dst
[5] = cp2uni
[src
[5]];
91 case 5: dst
[4] = cp2uni
[src
[4]];
92 case 4: dst
[3] = cp2uni
[src
[3]];
93 case 3: dst
[2] = cp2uni
[src
[2]];
94 case 2: dst
[1] = cp2uni
[src
[1]];
95 case 1: dst
[0] = cp2uni
[src
[0]];
98 if (srclen
< 16) return ret
;
105 /* mbstowcs for single-byte code page with char decomposition */
106 static int mbstowcs_sbcs_decompose( const struct sbcs_table
*table
, int flags
,
107 const unsigned char *src
, unsigned int srclen
,
108 WCHAR
*dst
, unsigned int dstlen
)
110 const WCHAR
* const cp2uni
= (flags
& MB_USEGLYPHCHARS
) ? table
->cp2uni_glyphs
: table
->cp2uni
;
113 if (!dstlen
) /* compute length */
115 WCHAR dummy
[4]; /* no decomposition is larger than 4 chars */
116 for (len
= 0; srclen
; srclen
--, src
++)
117 len
+= get_decomposition( cp2uni
[*src
], dummy
, 4 );
121 for (len
= dstlen
; srclen
&& len
; srclen
--, src
++)
123 int res
= get_decomposition( cp2uni
[*src
], dst
, len
);
128 if (srclen
) return -1; /* overflow */
132 /* query necessary dst length for src string */
133 static inline int get_length_dbcs( const struct dbcs_table
*table
,
134 const unsigned char *src
, unsigned int srclen
)
136 const unsigned char * const cp2uni_lb
= table
->cp2uni_leadbytes
;
139 for (len
= 0; srclen
; srclen
--, src
++, len
++)
143 if (!--srclen
) break; /* partial char, ignore it */
150 /* check src string for invalid chars; return non-zero if invalid char found */
151 static inline int check_invalid_chars_dbcs( const struct dbcs_table
*table
,
152 const unsigned char *src
, unsigned int srclen
)
154 const WCHAR
* const cp2uni
= table
->cp2uni
;
155 const unsigned char * const cp2uni_lb
= table
->cp2uni_leadbytes
;
156 const WCHAR def_unicode_char
= table
->info
.def_unicode_char
;
157 const unsigned short def_char
= table
->uni2cp_low
[table
->uni2cp_high
[def_unicode_char
>> 8]
158 + (def_unicode_char
& 0xff)];
161 unsigned char off
= cp2uni_lb
[*src
];
162 if (off
) /* multi-byte char */
164 if (srclen
== 1) break; /* partial char, error */
165 if (cp2uni
[(off
<< 8) + src
[1]] == def_unicode_char
&&
166 ((src
[0] << 8) | src
[1]) != def_char
) break;
170 else if (cp2uni
[*src
] == def_unicode_char
&& *src
!= def_char
) break;
177 /* mbstowcs for double-byte code page */
178 /* all lengths are in characters, not bytes */
179 static inline int mbstowcs_dbcs( const struct dbcs_table
*table
,
180 const unsigned char *src
, unsigned int srclen
,
181 WCHAR
*dst
, unsigned int dstlen
)
183 const WCHAR
* const cp2uni
= table
->cp2uni
;
184 const unsigned char * const cp2uni_lb
= table
->cp2uni_leadbytes
;
187 if (!dstlen
) return get_length_dbcs( table
, src
, srclen
);
189 for (len
= dstlen
; srclen
&& len
; len
--, srclen
--, src
++, dst
++)
191 unsigned char off
= cp2uni_lb
[*src
];
194 if (!--srclen
) break; /* partial char, ignore it */
196 *dst
= cp2uni
[(off
<< 8) + *src
];
198 else *dst
= cp2uni
[*src
];
200 if (srclen
) return -1; /* overflow */
205 /* mbstowcs for double-byte code page with character decomposition */
206 static int mbstowcs_dbcs_decompose( const struct dbcs_table
*table
,
207 const unsigned char *src
, unsigned int srclen
,
208 WCHAR
*dst
, unsigned int dstlen
)
210 const WCHAR
* const cp2uni
= table
->cp2uni
;
211 const unsigned char * const cp2uni_lb
= table
->cp2uni_leadbytes
;
216 if (!dstlen
) /* compute length */
218 WCHAR dummy
[4]; /* no decomposition is larger than 4 chars */
219 for (len
= 0; srclen
; srclen
--, src
++)
221 unsigned char off
= cp2uni_lb
[*src
];
224 if (!--srclen
) break; /* partial char, ignore it */
226 ch
= cp2uni
[(off
<< 8) + *src
];
228 else ch
= cp2uni
[*src
];
229 len
+= get_decomposition( ch
, dummy
, 4 );
234 for (len
= dstlen
; srclen
&& len
; srclen
--, src
++)
236 unsigned char off
= cp2uni_lb
[*src
];
239 if (!--srclen
) break; /* partial char, ignore it */
241 ch
= cp2uni
[(off
<< 8) + *src
];
243 else ch
= cp2uni
[*src
];
244 if (!(res
= get_decomposition( ch
, dst
, len
))) break;
248 if (srclen
) return -1; /* overflow */
253 /* return -1 on dst buffer overflow, -2 on invalid input char */
254 int wine_cp_mbstowcs( const union cptable
*table
, int flags
,
255 const char *s
, int srclen
,
256 WCHAR
*dst
, int dstlen
)
258 const unsigned char *src
= (const unsigned char*) s
;
260 if (table
->info
.char_size
== 1)
262 if (flags
& MB_ERR_INVALID_CHARS
)
264 if (check_invalid_chars_sbcs( &table
->sbcs
, flags
, src
, srclen
)) return -2;
266 if (!(flags
& MB_COMPOSITE
))
268 if (!dstlen
) return srclen
;
269 return mbstowcs_sbcs( &table
->sbcs
, flags
, src
, srclen
, dst
, dstlen
);
271 return mbstowcs_sbcs_decompose( &table
->sbcs
, flags
, src
, srclen
, dst
, dstlen
);
275 if (flags
& MB_ERR_INVALID_CHARS
)
277 if (check_invalid_chars_dbcs( &table
->dbcs
, src
, srclen
)) return -2;
279 if (!(flags
& MB_COMPOSITE
))
280 return mbstowcs_dbcs( &table
->dbcs
, src
, srclen
, dst
, dstlen
);
282 return mbstowcs_dbcs_decompose( &table
->dbcs
, src
, srclen
, dst
, dstlen
);
286 /* CP_SYMBOL implementation */
287 /* return -1 on dst buffer overflow */
288 int wine_cpsymbol_mbstowcs( const char *src
, int srclen
, WCHAR
*dst
, int dstlen
)
291 if( dstlen
== 0) return srclen
;
292 len
= dstlen
> srclen
? srclen
: dstlen
;
293 for( i
= 0; i
< len
; i
++)
295 unsigned char c
= src
[ i
];
301 if( srclen
> len
) return -1;