2 * MultiByteToWideChar implementation
4 * Copyright 2000 Alexandre Julliard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
29 extern const unsigned short nfd_table
[] DECLSPEC_HIDDEN
;
31 static const WCHAR
*get_decomposition( WCHAR ch
, unsigned int *len
)
33 unsigned short offset
= nfd_table
[nfd_table
[ch
>> 8] + ((ch
>> 4) & 0xf)] + (ch
& 0xf);
34 unsigned short start
= nfd_table
[offset
];
35 unsigned short end
= nfd_table
[offset
+ 1];
37 if ((*len
= end
- start
)) return nfd_table
+ start
;
42 /* check the code whether it is in Unicode Private Use Area (PUA). */
43 /* MB_ERR_INVALID_CHARS raises an error converting from 1-byte character to PUA. */
44 static inline int is_private_use_area_char(WCHAR code
)
46 return (code
>= 0xe000 && code
<= 0xf8ff);
49 /* check src string for invalid chars; return non-zero if invalid char found */
50 static inline int check_invalid_chars_sbcs( const struct sbcs_table
*table
, int flags
,
51 const unsigned char *src
, unsigned int srclen
)
53 const WCHAR
* const cp2uni
= (flags
& MB_USEGLYPHCHARS
) ? table
->cp2uni_glyphs
: table
->cp2uni
;
54 const WCHAR def_unicode_char
= table
->info
.def_unicode_char
;
55 const unsigned char def_char
= table
->uni2cp_low
[table
->uni2cp_high
[def_unicode_char
>> 8]
56 + (def_unicode_char
& 0xff)];
59 if ((cp2uni
[*src
] == def_unicode_char
&& *src
!= def_char
) ||
60 is_private_use_area_char(cp2uni
[*src
])) break;
67 /* mbstowcs for single-byte code page */
68 /* all lengths are in characters, not bytes */
69 static inline int mbstowcs_sbcs( const struct sbcs_table
*table
, int flags
,
70 const unsigned char *src
, unsigned int srclen
,
71 WCHAR
*dst
, unsigned int dstlen
)
73 const WCHAR
* const cp2uni
= (flags
& MB_USEGLYPHCHARS
) ? table
->cp2uni_glyphs
: table
->cp2uni
;
78 /* buffer too small: fill it up to dstlen and return error */
85 dst
[0] = cp2uni
[src
[0]];
86 dst
[1] = cp2uni
[src
[1]];
87 dst
[2] = cp2uni
[src
[2]];
88 dst
[3] = cp2uni
[src
[3]];
89 dst
[4] = cp2uni
[src
[4]];
90 dst
[5] = cp2uni
[src
[5]];
91 dst
[6] = cp2uni
[src
[6]];
92 dst
[7] = cp2uni
[src
[7]];
93 dst
[8] = cp2uni
[src
[8]];
94 dst
[9] = cp2uni
[src
[9]];
95 dst
[10] = cp2uni
[src
[10]];
96 dst
[11] = cp2uni
[src
[11]];
97 dst
[12] = cp2uni
[src
[12]];
98 dst
[13] = cp2uni
[src
[13]];
99 dst
[14] = cp2uni
[src
[14]];
100 dst
[15] = cp2uni
[src
[15]];
106 /* now handle the remaining characters */
111 case 15: dst
[-15] = cp2uni
[src
[-15]];
112 case 14: dst
[-14] = cp2uni
[src
[-14]];
113 case 13: dst
[-13] = cp2uni
[src
[-13]];
114 case 12: dst
[-12] = cp2uni
[src
[-12]];
115 case 11: dst
[-11] = cp2uni
[src
[-11]];
116 case 10: dst
[-10] = cp2uni
[src
[-10]];
117 case 9: dst
[-9] = cp2uni
[src
[-9]];
118 case 8: dst
[-8] = cp2uni
[src
[-8]];
119 case 7: dst
[-7] = cp2uni
[src
[-7]];
120 case 6: dst
[-6] = cp2uni
[src
[-6]];
121 case 5: dst
[-5] = cp2uni
[src
[-5]];
122 case 4: dst
[-4] = cp2uni
[src
[-4]];
123 case 3: dst
[-3] = cp2uni
[src
[-3]];
124 case 2: dst
[-2] = cp2uni
[src
[-2]];
125 case 1: dst
[-1] = cp2uni
[src
[-1]];
131 /* mbstowcs for single-byte code page with char decomposition */
132 static int mbstowcs_sbcs_decompose( const struct sbcs_table
*table
, int flags
,
133 const unsigned char *src
, unsigned int srclen
,
134 WCHAR
*dst
, unsigned int dstlen
)
136 const WCHAR
* const cp2uni
= (flags
& MB_USEGLYPHCHARS
) ? table
->cp2uni_glyphs
: table
->cp2uni
;
138 unsigned int len
, decomp_len
;
140 if (!dstlen
) /* compute length */
142 for (len
= 0; srclen
; srclen
--, src
++, len
+= decomp_len
)
143 get_decomposition( cp2uni
[*src
], &decomp_len
);
147 for (len
= dstlen
; srclen
&& len
; srclen
--, src
++, dst
+= decomp_len
, len
-= decomp_len
)
149 if ((decomp
= get_decomposition( cp2uni
[*src
], &decomp_len
)))
151 if (len
< decomp_len
) break;
152 memcpy( dst
, decomp
, decomp_len
* sizeof(WCHAR
) );
154 else *dst
= cp2uni
[*src
];
156 if (srclen
) return -1; /* overflow */
160 /* query necessary dst length for src string */
161 static inline int get_length_dbcs( const struct dbcs_table
*table
,
162 const unsigned char *src
, unsigned int srclen
)
164 const unsigned char * const cp2uni_lb
= table
->cp2uni_leadbytes
;
167 for (len
= 0; srclen
; srclen
--, src
++, len
++)
169 if (cp2uni_lb
[*src
] && srclen
> 1 && src
[1])
178 /* check src string for invalid chars; return non-zero if invalid char found */
179 static inline int check_invalid_chars_dbcs( const struct dbcs_table
*table
,
180 const unsigned char *src
, unsigned int srclen
)
182 const WCHAR
* const cp2uni
= table
->cp2uni
;
183 const unsigned char * const cp2uni_lb
= table
->cp2uni_leadbytes
;
184 const WCHAR def_unicode_char
= table
->info
.def_unicode_char
;
185 const unsigned short def_char
= table
->uni2cp_low
[table
->uni2cp_high
[def_unicode_char
>> 8]
186 + (def_unicode_char
& 0xff)];
189 unsigned char off
= cp2uni_lb
[*src
];
190 if (off
) /* multi-byte char */
192 if (srclen
== 1) break; /* partial char, error */
193 if (cp2uni
[(off
<< 8) + src
[1]] == def_unicode_char
&&
194 ((src
[0] << 8) | src
[1]) != def_char
) break;
198 else if ((cp2uni
[*src
] == def_unicode_char
&& *src
!= def_char
) ||
199 is_private_use_area_char(cp2uni
[*src
])) break;
206 /* mbstowcs for double-byte code page */
207 /* all lengths are in characters, not bytes */
208 static inline int mbstowcs_dbcs( const struct dbcs_table
*table
,
209 const unsigned char *src
, unsigned int srclen
,
210 WCHAR
*dst
, unsigned int dstlen
)
212 const WCHAR
* const cp2uni
= table
->cp2uni
;
213 const unsigned char * const cp2uni_lb
= table
->cp2uni_leadbytes
;
216 if (!dstlen
) return get_length_dbcs( table
, src
, srclen
);
218 for (len
= dstlen
; srclen
&& len
; len
--, srclen
--, src
++, dst
++)
220 unsigned char off
= cp2uni_lb
[*src
];
221 if (off
&& srclen
> 1 && src
[1])
225 *dst
= cp2uni
[(off
<< 8) + *src
];
227 else *dst
= cp2uni
[*src
];
229 if (srclen
) return -1; /* overflow */
234 /* mbstowcs for double-byte code page with character decomposition */
235 static int mbstowcs_dbcs_decompose( const struct dbcs_table
*table
,
236 const unsigned char *src
, unsigned int srclen
,
237 WCHAR
*dst
, unsigned int dstlen
)
239 const WCHAR
* const cp2uni
= table
->cp2uni
;
240 const unsigned char * const cp2uni_lb
= table
->cp2uni_leadbytes
;
242 unsigned int len
, decomp_len
;
245 if (!dstlen
) /* compute length */
247 for (len
= 0; srclen
; srclen
--, src
++, len
+= decomp_len
)
249 unsigned char off
= cp2uni_lb
[*src
];
250 if (off
&& srclen
> 1 && src
[1])
254 ch
= cp2uni
[(off
<< 8) + *src
];
256 else ch
= cp2uni
[*src
];
257 get_decomposition( ch
, &decomp_len
);
262 for (len
= dstlen
; srclen
&& len
; srclen
--, src
++, dst
+= decomp_len
, len
-= decomp_len
)
264 unsigned char off
= cp2uni_lb
[*src
];
265 if (off
&& srclen
> 1 && src
[1])
269 ch
= cp2uni
[(off
<< 8) + *src
];
271 else ch
= cp2uni
[*src
];
273 if ((decomp
= get_decomposition( ch
, &decomp_len
)))
275 if (len
< decomp_len
) break;
276 memcpy( dst
, decomp
, decomp_len
* sizeof(WCHAR
) );
280 if (srclen
) return -1; /* overflow */
285 /* return -1 on dst buffer overflow, -2 on invalid input char */
286 int wine_cp_mbstowcs_obsolete( const union cptable
*table
, int flags
,
287 const char *s
, int srclen
, WCHAR
*dst
, int dstlen
)
289 const unsigned char *src
= (const unsigned char*) s
;
291 if (table
->info
.char_size
== 1)
293 if (flags
& MB_ERR_INVALID_CHARS
)
295 if (check_invalid_chars_sbcs( &table
->sbcs
, flags
, src
, srclen
)) return -2;
297 if (!(flags
& MB_COMPOSITE
))
299 if (!dstlen
) return srclen
;
300 return mbstowcs_sbcs( &table
->sbcs
, flags
, src
, srclen
, dst
, dstlen
);
302 return mbstowcs_sbcs_decompose( &table
->sbcs
, flags
, src
, srclen
, dst
, dstlen
);
306 if (flags
& MB_ERR_INVALID_CHARS
)
308 if (check_invalid_chars_dbcs( &table
->dbcs
, src
, srclen
)) return -2;
310 if (!(flags
& MB_COMPOSITE
))
311 return mbstowcs_dbcs( &table
->dbcs
, src
, srclen
, dst
, dstlen
);
313 return mbstowcs_dbcs_decompose( &table
->dbcs
, src
, srclen
, dst
, dstlen
);
317 __ASM_OBSOLETE(wine_cp_mbstowcs
);
319 #endif /* __ASM_OBSOLETE */