2 * MultiByteToWideChar implementation
4 * Copyright 2000 Alexandre Julliard
10 #include "wine/unicode.h"
12 /* get the decomposition of a Unicode char */
13 static int get_decomposition( WCHAR src
, WCHAR
*dst
, unsigned int dstlen
)
15 extern const WCHAR unicode_decompose_table
[];
16 const WCHAR
*ptr
= unicode_decompose_table
;
20 ptr
= unicode_decompose_table
+ ptr
[src
>> 8];
21 ptr
= unicode_decompose_table
+ ptr
[(src
>> 4) & 0x0f] + 2 * (src
& 0x0f);
23 if (dstlen
<= 1) return 0;
24 /* apply the decomposition recursively to the first char */
25 if ((res
= get_decomposition( *ptr
, dst
, dstlen
-1 ))) dst
[res
++] = ptr
[1];
29 /* check src string for invalid chars; return non-zero if invalid char found */
30 static inline int check_invalid_chars_sbcs( const struct sbcs_table
*table
,
31 const unsigned char *src
, unsigned int srclen
)
33 const WCHAR
* const cp2uni
= table
->cp2uni
;
36 if (cp2uni
[*src
] == table
->info
.def_unicode_char
&& *src
!= table
->info
.def_char
)
44 /* mbstowcs for single-byte code page */
45 /* all lengths are in characters, not bytes */
46 static inline int mbstowcs_sbcs( const struct sbcs_table
*table
,
47 const unsigned char *src
, unsigned int srclen
,
48 WCHAR
*dst
, unsigned int dstlen
)
50 const WCHAR
* const cp2uni
= table
->cp2uni
;
55 /* buffer too small: fill it up to dstlen and return error */
65 case 16: dst
[15] = cp2uni
[src
[15]];
66 case 15: dst
[14] = cp2uni
[src
[14]];
67 case 14: dst
[13] = cp2uni
[src
[13]];
68 case 13: dst
[12] = cp2uni
[src
[12]];
69 case 12: dst
[11] = cp2uni
[src
[11]];
70 case 11: dst
[10] = cp2uni
[src
[10]];
71 case 10: dst
[9] = cp2uni
[src
[9]];
72 case 9: dst
[8] = cp2uni
[src
[8]];
73 case 8: dst
[7] = cp2uni
[src
[7]];
74 case 7: dst
[6] = cp2uni
[src
[6]];
75 case 6: dst
[5] = cp2uni
[src
[5]];
76 case 5: dst
[4] = cp2uni
[src
[4]];
77 case 4: dst
[3] = cp2uni
[src
[3]];
78 case 3: dst
[2] = cp2uni
[src
[2]];
79 case 2: dst
[1] = cp2uni
[src
[1]];
80 case 1: dst
[0] = cp2uni
[src
[0]];
83 if (srclen
< 16) return ret
;
90 /* mbstowcs for single-byte code page with char decomposition */
91 static int mbstowcs_sbcs_decompose( const struct sbcs_table
*table
,
92 const unsigned char *src
, unsigned int srclen
,
93 WCHAR
*dst
, unsigned int dstlen
)
95 const WCHAR
* const cp2uni
= table
->cp2uni
;
98 if (!dstlen
) /* compute length */
100 WCHAR dummy
[4]; /* no decomposition is larger than 4 chars */
101 for (len
= 0; srclen
; srclen
--, src
++)
102 len
+= get_decomposition( cp2uni
[*src
], dummy
, 4 );
106 for (len
= dstlen
; srclen
&& len
; srclen
--, src
++)
108 int res
= get_decomposition( cp2uni
[*src
], dst
, len
);
113 if (srclen
) return -1; /* overflow */
117 /* query necessary dst length for src string */
118 static inline int get_length_dbcs( const struct dbcs_table
*table
,
119 const unsigned char *src
, unsigned int srclen
)
121 const unsigned char * const cp2uni_lb
= table
->cp2uni_leadbytes
;
124 for (len
= 0; srclen
; srclen
--, src
++, len
++)
128 if (!--srclen
) break; /* partial char, ignore it */
135 /* check src string for invalid chars; return non-zero if invalid char found */
136 static inline int check_invalid_chars_dbcs( const struct dbcs_table
*table
,
137 const unsigned char *src
, unsigned int srclen
)
139 const WCHAR
* const cp2uni
= table
->cp2uni
;
140 const unsigned char * const cp2uni_lb
= table
->cp2uni_leadbytes
;
144 unsigned char off
= cp2uni_lb
[*src
];
145 if (off
) /* multi-byte char */
147 if (srclen
== 1) break; /* partial char, error */
148 if (cp2uni
[(off
<< 8) + src
[1]] == table
->info
.def_unicode_char
&&
149 ((src
[0] << 8) | src
[1]) != table
->info
.def_char
) break;
153 else if (cp2uni
[*src
] == table
->info
.def_unicode_char
&&
154 *src
!= table
->info
.def_char
) break;
161 /* mbstowcs for double-byte code page */
162 /* all lengths are in characters, not bytes */
163 static inline int mbstowcs_dbcs( const struct dbcs_table
*table
,
164 const unsigned char *src
, unsigned int srclen
,
165 WCHAR
*dst
, unsigned int dstlen
)
167 const WCHAR
* const cp2uni
= table
->cp2uni
;
168 const unsigned char * const cp2uni_lb
= table
->cp2uni_leadbytes
;
171 if (!dstlen
) return get_length_dbcs( table
, src
, srclen
);
173 for (len
= dstlen
; srclen
&& len
; len
--, srclen
--, src
++, dst
++)
175 unsigned char off
= cp2uni_lb
[*src
];
178 if (!--srclen
) break; /* partial char, ignore it */
180 *dst
= cp2uni
[(off
<< 8) + *src
];
182 else *dst
= cp2uni
[*src
];
184 if (srclen
) return -1; /* overflow */
189 /* mbstowcs for double-byte code page with character decomposition */
190 static int mbstowcs_dbcs_decompose( const struct dbcs_table
*table
,
191 const unsigned char *src
, unsigned int srclen
,
192 WCHAR
*dst
, unsigned int dstlen
)
194 const WCHAR
* const cp2uni
= table
->cp2uni
;
195 const unsigned char * const cp2uni_lb
= table
->cp2uni_leadbytes
;
200 if (!dstlen
) /* compute length */
202 WCHAR dummy
[4]; /* no decomposition is larger than 4 chars */
203 for (len
= 0; srclen
; srclen
--, src
++)
205 unsigned char off
= cp2uni_lb
[*src
];
208 if (!--srclen
) break; /* partial char, ignore it */
210 ch
= cp2uni
[(off
<< 8) + *src
];
212 else ch
= cp2uni
[*src
];
213 len
+= get_decomposition( ch
, dummy
, 4 );
218 for (len
= dstlen
; srclen
&& len
; srclen
--, src
++)
220 unsigned char off
= cp2uni_lb
[*src
];
223 if (!--srclen
) break; /* partial char, ignore it */
225 ch
= cp2uni
[(off
<< 8) + *src
];
227 else ch
= cp2uni
[*src
];
228 if (!(res
= get_decomposition( ch
, dst
, len
))) break;
232 if (srclen
) return -1; /* overflow */
237 /* return -1 on dst buffer overflow, -2 on invalid input char */
238 int cp_mbstowcs( const union cptable
*table
, int flags
,
239 const char *src
, int srclen
,
240 WCHAR
*dst
, int dstlen
)
242 if (table
->info
.char_size
== 1)
244 if (flags
& MB_ERR_INVALID_CHARS
)
246 if (check_invalid_chars_sbcs( &table
->sbcs
, src
, srclen
)) return -2;
248 if (!(flags
& MB_COMPOSITE
))
250 if (!dstlen
) return srclen
;
251 return mbstowcs_sbcs( &table
->sbcs
, src
, srclen
, dst
, dstlen
);
253 return mbstowcs_sbcs_decompose( &table
->sbcs
, src
, srclen
, dst
, dstlen
);
257 if (flags
& MB_ERR_INVALID_CHARS
)
259 if (check_invalid_chars_dbcs( &table
->dbcs
, src
, srclen
)) return -2;
261 if (!(flags
& MB_COMPOSITE
))
262 return mbstowcs_dbcs( &table
->dbcs
, src
, srclen
, dst
, dstlen
);
264 return mbstowcs_dbcs_decompose( &table
->dbcs
, src
, srclen
, dst
, dstlen
);