wmc: Directly implement UTF-8 conversions.
[wine.git] / tools / wmc / utils.c
blobe3da5422d7e45fbbae416a8fe4666e41ceb897f2
1 /*
2 * Utility routines
4 * Copyright 1998,2000 Bertho A. Stultiens
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
21 #include "config.h"
22 #include "wine/port.h"
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <stdarg.h>
27 #include <string.h>
28 #include <assert.h>
29 #include <ctype.h>
31 #include "wmctypes.h"
32 #include "utils.h"
33 #include "wmc.h"
35 #define SUPPRESS_YACC_ERROR_MESSAGE
37 static void generic_msg(const char *s, const char *t, va_list ap)
39 fprintf(stderr, "%s:%d:%d: %s: ", input_name ? input_name : "stdin", line_number, char_number, t);
40 vfprintf(stderr, s, ap);
44 * The yyerror routine should not exit because we use the error-token
45 * to determine the syntactic error in the source. However, YACC
46 * uses the same routine to print an error just before the error
47 * token is reduced.
48 * The extra routine 'xyyerror' is used to exit after giving a real
49 * message.
51 int mcy_error(const char *s, ...)
53 #ifndef SUPPRESS_YACC_ERROR_MESSAGE
54 va_list ap;
55 va_start(ap, s);
56 generic_msg(s, "Yacc error", ap);
57 va_end(ap);
58 #endif
59 return 1;
62 int xyyerror(const char *s, ...)
64 va_list ap;
65 va_start(ap, s);
66 generic_msg(s, "Error", ap);
67 va_end(ap);
68 exit(1);
69 return 1;
72 int mcy_warning(const char *s, ...)
74 va_list ap;
75 va_start(ap, s);
76 generic_msg(s, "Warning", ap);
77 va_end(ap);
78 return 0;
81 void internal_error(const char *file, int line, const char *s, ...)
83 va_list ap;
84 va_start(ap, s);
85 fprintf(stderr, "Internal error (please report) %s %d: ", file, line);
86 vfprintf(stderr, s, ap);
87 va_end(ap);
88 exit(3);
91 void fatal_perror( const char *msg, ... )
93 va_list valist;
94 va_start( valist, msg );
95 fprintf(stderr, "Error: ");
96 vfprintf( stderr, msg, valist );
97 perror( " " );
98 va_end( valist );
99 exit(2);
102 void error(const char *s, ...)
104 va_list ap;
105 va_start(ap, s);
106 fprintf(stderr, "Error: ");
107 vfprintf(stderr, s, ap);
108 va_end(ap);
109 exit(2);
112 void warning(const char *s, ...)
114 va_list ap;
115 va_start(ap, s);
116 fprintf(stderr, "Warning: ");
117 vfprintf(stderr, s, ap);
118 va_end(ap);
121 char *dup_basename(const char *name, const char *ext)
123 int namelen;
124 int extlen = strlen(ext);
125 char *base;
126 char *slash;
128 if(!name)
129 name = "wmc.tab";
131 slash = strrchr(name, '/');
132 if (slash)
133 name = slash + 1;
135 namelen = strlen(name);
137 /* +4 for later extension and +1 for '\0' */
138 base = xmalloc(namelen +4 +1);
139 strcpy(base, name);
140 if(!strcasecmp(name + namelen-extlen, ext))
142 base[namelen - extlen] = '\0';
144 return base;
147 void *xmalloc(size_t size)
149 void *res;
151 assert(size > 0);
152 res = malloc(size);
153 if(res == NULL)
155 error("Virtual memory exhausted.\n");
157 memset(res, 0x55, size);
158 return res;
162 void *xrealloc(void *p, size_t size)
164 void *res;
166 assert(size > 0);
167 res = realloc(p, size);
168 if(res == NULL)
170 error("Virtual memory exhausted.\n");
172 return res;
175 char *xstrdup(const char *str)
177 char *s;
179 assert(str != NULL);
180 s = xmalloc(strlen(str)+1);
181 return strcpy(s, str);
184 char *strmake( const char* fmt, ... )
186 int n;
187 size_t size = 100;
188 va_list ap;
190 for (;;)
192 char *p = xmalloc( size );
193 va_start( ap, fmt );
194 n = vsnprintf( p, size, fmt, ap );
195 va_end( ap );
196 if (n == -1) size *= 2;
197 else if ((size_t)n >= size) size = n + 1;
198 else return p;
199 free( p );
203 int unistrlen(const WCHAR *s)
205 int n;
206 for(n = 0; *s; n++, s++)
208 return n;
211 WCHAR *unistrcpy(WCHAR *dst, const WCHAR *src)
213 WCHAR *t = dst;
214 while(*src)
215 *t++ = *src++;
216 *t = 0;
217 return dst;
220 WCHAR *xunistrdup(const WCHAR * str)
222 WCHAR *s;
224 assert(str != NULL);
225 s = xmalloc((unistrlen(str)+1) * sizeof(WCHAR));
226 return unistrcpy(s, str);
229 int unistricmp(const WCHAR *s1, const WCHAR *s2)
231 int i;
232 int once = 0;
233 static const char warn[] = "Don't know the uppercase equivalent of non ascii characters;"
234 "comparison might yield wrong results";
235 while(*s1 && *s2)
237 if((*s1 & 0xffff) > 0x7f || (*s2 & 0xffff) > 0x7f)
239 if(!once)
241 once++;
242 mcy_warning(warn);
244 i = *s1++ - *s2++;
246 else
247 i = toupper(*s1++) - toupper(*s2++);
248 if(i)
249 return i;
252 if((*s1 & 0xffff) > 0x7f || (*s2 & 0xffff) > 0x7f)
254 if(!once)
255 mcy_warning(warn);
256 return *s1 - *s2;
258 else
259 return toupper(*s1) - toupper(*s2);
262 int unistrcmp(const WCHAR *s1, const WCHAR *s2)
264 int i;
265 while(*s1 && *s2)
267 i = *s1++ - *s2++;
268 if(i)
269 return i;
272 return *s1 - *s2;
275 WCHAR *utf8_to_unicode( const char *src, int srclen, int *dstlen )
277 static const char utf8_length[128] =
279 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x80-0x8f */
280 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0x90-0x9f */
281 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xa0-0xaf */
282 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 0xb0-0xbf */
283 0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xc0-0xcf */
284 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 0xd0-0xdf */
285 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, /* 0xe0-0xef */
286 3,3,3,3,3,0,0,0,0,0,0,0,0,0,0,0 /* 0xf0-0xff */
288 static const unsigned char utf8_mask[4] = { 0x7f, 0x1f, 0x0f, 0x07 };
290 const char *srcend = src + srclen;
291 int len, res;
292 WCHAR *ret, *dst;
294 dst = ret = xmalloc( (srclen + 1) * sizeof(WCHAR) );
295 while (src < srcend)
297 unsigned char ch = *src++;
298 if (ch < 0x80) /* special fast case for 7-bit ASCII */
300 *dst++ = ch;
301 continue;
303 len = utf8_length[ch - 0x80];
304 if (len && src + len <= srcend)
306 res = ch & utf8_mask[len];
307 switch (len)
309 case 3:
310 if ((ch = *src ^ 0x80) >= 0x40) break;
311 res = (res << 6) | ch;
312 src++;
313 if (res < 0x10) break;
314 case 2:
315 if ((ch = *src ^ 0x80) >= 0x40) break;
316 res = (res << 6) | ch;
317 if (res >= 0x110000 >> 6) break;
318 src++;
319 if (res < 0x20) break;
320 if (res >= 0xd800 >> 6 && res <= 0xdfff >> 6) break;
321 case 1:
322 if ((ch = *src ^ 0x80) >= 0x40) break;
323 res = (res << 6) | ch;
324 src++;
325 if (res < 0x80) break;
326 if (res <= 0xffff) *dst++ = res;
327 else
329 res -= 0x10000;
330 *dst++ = 0xd800 | (res >> 10);
331 *dst++ = 0xdc00 | (res & 0x3ff);
333 continue;
336 *dst++ = 0xfffd;
338 *dst = 0;
339 *dstlen = dst - ret;
340 return ret;
343 char *unicode_to_utf8( const WCHAR *src, int srclen, int *dstlen )
345 char *ret, *dst;
347 dst = ret = xmalloc( srclen * 3 + 1 );
348 for ( ; srclen; srclen--, src++)
350 unsigned int ch = *src;
352 if (ch < 0x80) /* 0x00-0x7f: 1 byte */
354 *dst++ = ch;
355 continue;
357 if (ch < 0x800) /* 0x80-0x7ff: 2 bytes */
359 dst[1] = 0x80 | (ch & 0x3f);
360 ch >>= 6;
361 dst[0] = 0xc0 | ch;
362 dst += 2;
363 continue;
365 if (ch >= 0xd800 && ch <= 0xdbff && srclen > 1 && src[1] >= 0xdc00 && src[1] <= 0xdfff)
367 /* 0x10000-0x10ffff: 4 bytes */
368 ch = 0x10000 + ((ch & 0x3ff) << 10) + (src[1] & 0x3ff);
369 dst[3] = 0x80 | (ch & 0x3f);
370 ch >>= 6;
371 dst[2] = 0x80 | (ch & 0x3f);
372 ch >>= 6;
373 dst[1] = 0x80 | (ch & 0x3f);
374 ch >>= 6;
375 dst[0] = 0xf0 | ch;
376 dst += 4;
377 src++;
378 srclen--;
379 continue;
381 if (ch >= 0xd800 && ch <= 0xdfff) ch = 0xfffd; /* invalid surrogate pair */
383 /* 0x800-0xffff: 3 bytes */
384 dst[2] = 0x80 | (ch & 0x3f);
385 ch >>= 6;
386 dst[1] = 0x80 | (ch & 0x3f);
387 ch >>= 6;
388 dst[0] = 0xe0 | ch;
389 dst += 3;
391 *dst = 0;
392 *dstlen = dst - ret;
393 return ret;
396 /*******************************************************************
397 * buffer management
399 * Function for writing to a memory buffer.
402 int byte_swapped = 0;
403 unsigned char *output_buffer;
404 size_t output_buffer_pos;
405 size_t output_buffer_size;
407 static void check_output_buffer_space( size_t size )
409 if (output_buffer_pos + size >= output_buffer_size)
411 output_buffer_size = max( output_buffer_size * 2, output_buffer_pos + size );
412 output_buffer = xrealloc( output_buffer, output_buffer_size );
416 void init_output_buffer(void)
418 output_buffer_size = 1024;
419 output_buffer_pos = 0;
420 output_buffer = xmalloc( output_buffer_size );
423 void flush_output_buffer( const char *name )
425 int fd = open( name, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666 );
426 if (fd == -1) error( "Error creating %s\n", name );
427 if (write( fd, output_buffer, output_buffer_pos ) != output_buffer_pos)
428 error( "Error writing to %s\n", name );
429 close( fd );
430 free( output_buffer );
433 void put_data( const void *data, size_t size )
435 check_output_buffer_space( size );
436 memcpy( output_buffer + output_buffer_pos, data, size );
437 output_buffer_pos += size;
440 void put_byte( unsigned char val )
442 check_output_buffer_space( 1 );
443 output_buffer[output_buffer_pos++] = val;
446 void put_word( unsigned short val )
448 if (byte_swapped) val = (val << 8) | (val >> 8);
449 put_data( &val, sizeof(val) );
452 void put_dword( unsigned int val )
454 if (byte_swapped)
455 val = ((val << 24) | ((val << 8) & 0x00ff0000) | ((val >> 8) & 0x0000ff00) | (val >> 24));
456 put_data( &val, sizeof(val) );
459 void align_output( unsigned int align )
461 size_t size = align - (output_buffer_pos % align);
463 if (size == align) return;
464 check_output_buffer_space( size );
465 memset( output_buffer + output_buffer_pos, 0, size );
466 output_buffer_pos += size;