oleacc: Added Ukrainian translation.
[wine/hacks.git] / libs / wine / mbtowc.c
blob8b06a89d8ec360855d1fea83c2185ae3513b5859
1 /*
2 * MultiByteToWideChar implementation
4 * Copyright 2000 Alexandre Julliard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #include <string.h>
23 #include "wine/unicode.h"
25 /* get the decomposition of a Unicode char */
26 static int get_decomposition( WCHAR src, WCHAR *dst, unsigned int dstlen )
28 extern const WCHAR unicode_decompose_table[];
29 const WCHAR *ptr = unicode_decompose_table;
30 int res;
32 *dst = src;
33 ptr = unicode_decompose_table + ptr[src >> 8];
34 ptr = unicode_decompose_table + ptr[(src >> 4) & 0x0f] + 2 * (src & 0x0f);
35 if (!*ptr) return 1;
36 if (dstlen <= 1) return 0;
37 /* apply the decomposition recursively to the first char */
38 if ((res = get_decomposition( *ptr, dst, dstlen-1 ))) dst[res++] = ptr[1];
39 return res;
42 /* check src string for invalid chars; return non-zero if invalid char found */
43 static inline int check_invalid_chars_sbcs( const struct sbcs_table *table, int flags,
44 const unsigned char *src, unsigned int srclen )
46 const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
47 const WCHAR def_unicode_char = table->info.def_unicode_char;
48 const unsigned char def_char = table->uni2cp_low[table->uni2cp_high[def_unicode_char >> 8]
49 + (def_unicode_char & 0xff)];
50 while (srclen)
52 if (cp2uni[*src] == def_unicode_char && *src != def_char) break;
53 src++;
54 srclen--;
56 return srclen;
59 /* mbstowcs for single-byte code page */
60 /* all lengths are in characters, not bytes */
61 static inline int mbstowcs_sbcs( const struct sbcs_table *table, int flags,
62 const unsigned char *src, unsigned int srclen,
63 WCHAR *dst, unsigned int dstlen )
65 const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
66 int ret = srclen;
68 if (dstlen < srclen)
70 /* buffer too small: fill it up to dstlen and return error */
71 srclen = dstlen;
72 ret = -1;
75 for (;;)
77 switch(srclen)
79 default:
80 case 16: dst[15] = cp2uni[src[15]];
81 case 15: dst[14] = cp2uni[src[14]];
82 case 14: dst[13] = cp2uni[src[13]];
83 case 13: dst[12] = cp2uni[src[12]];
84 case 12: dst[11] = cp2uni[src[11]];
85 case 11: dst[10] = cp2uni[src[10]];
86 case 10: dst[9] = cp2uni[src[9]];
87 case 9: dst[8] = cp2uni[src[8]];
88 case 8: dst[7] = cp2uni[src[7]];
89 case 7: dst[6] = cp2uni[src[6]];
90 case 6: dst[5] = cp2uni[src[5]];
91 case 5: dst[4] = cp2uni[src[4]];
92 case 4: dst[3] = cp2uni[src[3]];
93 case 3: dst[2] = cp2uni[src[2]];
94 case 2: dst[1] = cp2uni[src[1]];
95 case 1: dst[0] = cp2uni[src[0]];
96 case 0: break;
98 if (srclen < 16) return ret;
99 dst += 16;
100 src += 16;
101 srclen -= 16;
105 /* mbstowcs for single-byte code page with char decomposition */
106 static int mbstowcs_sbcs_decompose( const struct sbcs_table *table, int flags,
107 const unsigned char *src, unsigned int srclen,
108 WCHAR *dst, unsigned int dstlen )
110 const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
111 unsigned int len;
113 if (!dstlen) /* compute length */
115 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
116 for (len = 0; srclen; srclen--, src++)
117 len += get_decomposition( cp2uni[*src], dummy, 4 );
118 return len;
121 for (len = dstlen; srclen && len; srclen--, src++)
123 int res = get_decomposition( cp2uni[*src], dst, len );
124 if (!res) break;
125 len -= res;
126 dst += res;
128 if (srclen) return -1; /* overflow */
129 return dstlen - len;
132 /* query necessary dst length for src string */
133 static inline int get_length_dbcs( const struct dbcs_table *table,
134 const unsigned char *src, unsigned int srclen )
136 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
137 int len;
139 for (len = 0; srclen; srclen--, src++, len++)
141 if (cp2uni_lb[*src])
143 if (!--srclen) break; /* partial char, ignore it */
144 src++;
147 return len;
150 /* check src string for invalid chars; return non-zero if invalid char found */
151 static inline int check_invalid_chars_dbcs( const struct dbcs_table *table,
152 const unsigned char *src, unsigned int srclen )
154 const WCHAR * const cp2uni = table->cp2uni;
155 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
156 const WCHAR def_unicode_char = table->info.def_unicode_char;
157 const unsigned short def_char = table->uni2cp_low[table->uni2cp_high[def_unicode_char >> 8]
158 + (def_unicode_char & 0xff)];
159 while (srclen)
161 unsigned char off = cp2uni_lb[*src];
162 if (off) /* multi-byte char */
164 if (srclen == 1) break; /* partial char, error */
165 if (cp2uni[(off << 8) + src[1]] == def_unicode_char &&
166 ((src[0] << 8) | src[1]) != def_char) break;
167 src++;
168 srclen--;
170 else if (cp2uni[*src] == def_unicode_char && *src != def_char) break;
171 src++;
172 srclen--;
174 return srclen;
177 /* mbstowcs for double-byte code page */
178 /* all lengths are in characters, not bytes */
179 static inline int mbstowcs_dbcs( const struct dbcs_table *table,
180 const unsigned char *src, unsigned int srclen,
181 WCHAR *dst, unsigned int dstlen )
183 const WCHAR * const cp2uni = table->cp2uni;
184 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
185 unsigned int len;
187 if (!dstlen) return get_length_dbcs( table, src, srclen );
189 for (len = dstlen; srclen && len; len--, srclen--, src++, dst++)
191 unsigned char off = cp2uni_lb[*src];
192 if (off)
194 if (!--srclen) break; /* partial char, ignore it */
195 src++;
196 *dst = cp2uni[(off << 8) + *src];
198 else *dst = cp2uni[*src];
200 if (srclen) return -1; /* overflow */
201 return dstlen - len;
205 /* mbstowcs for double-byte code page with character decomposition */
206 static int mbstowcs_dbcs_decompose( const struct dbcs_table *table,
207 const unsigned char *src, unsigned int srclen,
208 WCHAR *dst, unsigned int dstlen )
210 const WCHAR * const cp2uni = table->cp2uni;
211 const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
212 unsigned int len;
213 WCHAR ch;
214 int res;
216 if (!dstlen) /* compute length */
218 WCHAR dummy[4]; /* no decomposition is larger than 4 chars */
219 for (len = 0; srclen; srclen--, src++)
221 unsigned char off = cp2uni_lb[*src];
222 if (off)
224 if (!--srclen) break; /* partial char, ignore it */
225 src++;
226 ch = cp2uni[(off << 8) + *src];
228 else ch = cp2uni[*src];
229 len += get_decomposition( ch, dummy, 4 );
231 return len;
234 for (len = dstlen; srclen && len; srclen--, src++)
236 unsigned char off = cp2uni_lb[*src];
237 if (off)
239 if (!--srclen) break; /* partial char, ignore it */
240 src++;
241 ch = cp2uni[(off << 8) + *src];
243 else ch = cp2uni[*src];
244 if (!(res = get_decomposition( ch, dst, len ))) break;
245 dst += res;
246 len -= res;
248 if (srclen) return -1; /* overflow */
249 return dstlen - len;
253 /* return -1 on dst buffer overflow, -2 on invalid input char */
254 int wine_cp_mbstowcs( const union cptable *table, int flags,
255 const char *s, int srclen,
256 WCHAR *dst, int dstlen )
258 const unsigned char *src = (const unsigned char*) s;
260 if (table->info.char_size == 1)
262 if (flags & MB_ERR_INVALID_CHARS)
264 if (check_invalid_chars_sbcs( &table->sbcs, flags, src, srclen )) return -2;
266 if (!(flags & MB_COMPOSITE))
268 if (!dstlen) return srclen;
269 return mbstowcs_sbcs( &table->sbcs, flags, src, srclen, dst, dstlen );
271 return mbstowcs_sbcs_decompose( &table->sbcs, flags, src, srclen, dst, dstlen );
273 else /* mbcs */
275 if (flags & MB_ERR_INVALID_CHARS)
277 if (check_invalid_chars_dbcs( &table->dbcs, src, srclen )) return -2;
279 if (!(flags & MB_COMPOSITE))
280 return mbstowcs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
281 else
282 return mbstowcs_dbcs_decompose( &table->dbcs, src, srclen, dst, dstlen );
286 /* CP_SYMBOL implementation */
287 /* return -1 on dst buffer overflow */
288 int wine_cpsymbol_mbstowcs( const char *src, int srclen, WCHAR *dst, int dstlen)
290 int len, i;
291 if( dstlen == 0) return srclen;
292 len = dstlen > srclen ? srclen : dstlen;
293 for( i = 0; i < len; i++)
295 unsigned char c = src [ i ];
296 if( c < 0x20 )
297 dst[i] = c;
298 else
299 dst[i] = c + 0xf000;
301 if( srclen > len) return -1;
302 return len;