4 * Copyright 1998 Bertho A. Stultiens
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 #include "wine/port.h"
35 /* #define WANT_NEAR_INDICATION */
37 #ifdef WANT_NEAR_INDICATION
38 void make_print(char *str
)
49 static void generic_msg(const char *s
, const char *t
, const char *n
, va_list ap
)
51 fprintf(stderr
, "%s:%d:%d: %s: ", input_name
? input_name
: "stdin", line_number
, char_number
, t
);
52 vfprintf(stderr
, s
, ap
);
53 #ifdef WANT_NEAR_INDICATION
60 fprintf(stderr
, " near '%s'", cpy
);
68 int parser_error(const char *s
, ...)
72 generic_msg(s
, "Error", parser_text
, ap
);
73 fputc( '\n', stderr
);
79 int parser_warning(const char *s
, ...)
83 generic_msg(s
, "Warning", parser_text
, ap
);
88 void internal_error(const char *file
, int line
, const char *s
, ...)
92 fprintf(stderr
, "Internal error (please report) %s %d: ", file
, line
);
93 vfprintf(stderr
, s
, ap
);
98 void fatal_perror( const char *msg
, ... )
101 va_start( valist
, msg
);
102 fprintf(stderr
, "Error: ");
103 vfprintf( stderr
, msg
, valist
);
109 void error(const char *s
, ...)
113 fprintf(stderr
, "Error: ");
114 vfprintf(stderr
, s
, ap
);
119 void warning(const char *s
, ...)
123 fprintf(stderr
, "Warning: ");
124 vfprintf(stderr
, s
, ap
);
128 void chat(const char *s
, ...)
130 if(debuglevel
& DEBUGLEVEL_CHAT
)
134 fprintf(stderr
, "FYI: ");
135 vfprintf(stderr
, s
, ap
);
140 char *dup_basename(const char *name
, const char *ext
)
143 int extlen
= strlen(ext
);
150 slash
= strrchr(name
, '/');
154 namelen
= strlen(name
);
156 /* +4 for later extension and +1 for '\0' */
157 base
= xmalloc(namelen
+4 +1);
159 if(!strcasecmp(name
+ namelen
-extlen
, ext
))
161 base
[namelen
- extlen
] = '\0';
166 void *xmalloc(size_t size
)
174 error("Virtual memory exhausted.\n");
176 memset(res
, 0x55, size
);
181 void *xrealloc(void *p
, size_t size
)
186 res
= realloc(p
, size
);
189 error("Virtual memory exhausted.\n");
194 char *strmake( const char* fmt
, ... )
202 char *p
= xmalloc( size
);
204 n
= vsnprintf( p
, size
, fmt
, ap
);
206 if (n
== -1) size
*= 2;
207 else if ((size_t)n
>= size
) size
= n
+ 1;
213 char *xstrdup(const char *str
)
218 s
= xmalloc(strlen(str
)+1);
219 return strcpy(s
, str
);
222 int compare_striA( const char *str1
, const char *str2
)
226 /* only the A-Z range is case-insensitive */
227 char ch1
= (*str1
>= 'a' && *str1
<= 'z') ? *str1
+ 'A' - 'a' : *str1
;
228 char ch2
= (*str2
>= 'a' && *str2
<= 'z') ? *str2
+ 'A' - 'a' : *str2
;
229 if (!ch1
|| ch1
!= ch2
) return ch1
- ch2
;
235 int compare_striW( const WCHAR
*str1
, const WCHAR
*str2
)
239 /* only the A-Z range is case-insensitive */
240 WCHAR ch1
= (*str1
>= 'a' && *str1
<= 'z') ? *str1
+ 'A' - 'a' : *str1
;
241 WCHAR ch2
= (*str2
>= 'a' && *str2
<= 'z') ? *str2
+ 'A' - 'a' : *str2
;
242 if (!ch1
|| ch1
!= ch2
) return ch1
- ch2
;
249 *****************************************************************************
250 * Function : compare_name_id
251 * Syntax : int compare_name_id(const name_id_t *n1, const name_id_t *n2)
256 *****************************************************************************
258 int compare_name_id(const name_id_t
*n1
, const name_id_t
*n2
)
260 if(n1
->type
== name_ord
&& n2
->type
== name_ord
)
262 return n1
->name
.i_name
- n2
->name
.i_name
;
264 else if(n1
->type
== name_str
&& n2
->type
== name_str
)
266 if(n1
->name
.s_name
->type
== str_char
267 && n2
->name
.s_name
->type
== str_char
)
269 return compare_striA(n1
->name
.s_name
->str
.cstr
, n2
->name
.s_name
->str
.cstr
);
271 else if(n1
->name
.s_name
->type
== str_unicode
272 && n2
->name
.s_name
->type
== str_unicode
)
274 return compare_striW(n1
->name
.s_name
->str
.wstr
, n2
->name
.s_name
->str
.wstr
);
278 internal_error(__FILE__
, __LINE__
, "Can't yet compare strings of mixed type\n");
281 else if(n1
->type
== name_ord
&& n2
->type
== name_str
)
283 else if(n1
->type
== name_str
&& n2
->type
== name_ord
)
286 internal_error(__FILE__
, __LINE__
, "Comparing name-ids with unknown types (%d, %d)\n",
289 return 0; /* Keep the compiler happy */
292 string_t
*convert_string(const string_t
*str
, enum str_e type
, int codepage
)
294 const union cptable
*cptable
= codepage
? wine_cp_get_table( codepage
) : NULL
;
295 string_t
*ret
= xmalloc(sizeof(*ret
));
300 if (!codepage
&& str
->type
!= type
)
301 parser_error( "Current language is Unicode only, cannot convert string" );
303 if((str
->type
== str_char
) && (type
== str_unicode
))
305 ret
->type
= str_unicode
;
306 ret
->size
= cptable
? wine_cp_mbstowcs( cptable
, 0, str
->str
.cstr
, str
->size
, NULL
, 0 )
307 : wine_utf8_mbstowcs( 0, str
->str
.cstr
, str
->size
, NULL
, 0 );
308 ret
->str
.wstr
= xmalloc( (ret
->size
+1) * sizeof(WCHAR
) );
310 res
= wine_cp_mbstowcs( cptable
, MB_ERR_INVALID_CHARS
, str
->str
.cstr
, str
->size
,
311 ret
->str
.wstr
, ret
->size
);
313 res
= wine_utf8_mbstowcs( MB_ERR_INVALID_CHARS
, str
->str
.cstr
, str
->size
,
314 ret
->str
.wstr
, ret
->size
);
316 parser_error( "Invalid character in string '%.*s' for codepage %u",
317 str
->size
, str
->str
.cstr
, codepage
);
318 ret
->str
.wstr
[ret
->size
] = 0;
320 else if((str
->type
== str_unicode
) && (type
== str_char
))
322 ret
->type
= str_char
;
323 ret
->size
= cptable
? wine_cp_wcstombs( cptable
, 0, str
->str
.wstr
, str
->size
, NULL
, 0, NULL
, NULL
)
324 : wine_utf8_wcstombs( 0, str
->str
.wstr
, str
->size
, NULL
, 0 );
325 ret
->str
.cstr
= xmalloc( ret
->size
+ 1 );
327 wine_cp_wcstombs( cptable
, 0, str
->str
.wstr
, str
->size
, ret
->str
.cstr
, ret
->size
, NULL
, NULL
);
329 wine_utf8_wcstombs( 0, str
->str
.wstr
, str
->size
, ret
->str
.cstr
, ret
->size
);
330 ret
->str
.cstr
[ret
->size
] = 0;
332 else if(str
->type
== str_unicode
)
334 ret
->type
= str_unicode
;
335 ret
->size
= str
->size
;
336 ret
->str
.wstr
= xmalloc(sizeof(WCHAR
)*(ret
->size
+1));
337 memcpy( ret
->str
.wstr
, str
->str
.wstr
, ret
->size
* sizeof(WCHAR
) );
338 ret
->str
.wstr
[ret
->size
] = 0;
340 else /* str->type == str_char */
342 ret
->type
= str_char
;
343 ret
->size
= str
->size
;
344 ret
->str
.cstr
= xmalloc( ret
->size
+ 1 );
345 memcpy( ret
->str
.cstr
, str
->str
.cstr
, ret
->size
);
346 ret
->str
.cstr
[ret
->size
] = 0;
352 void free_string(string_t
*str
)
354 if (str
->type
== str_unicode
) free( str
->str
.wstr
);
355 else free( str
->str
.cstr
);
359 /* check if the string is valid utf8 despite a different codepage being in use */
360 int check_valid_utf8( const string_t
*str
, int codepage
)
364 if (!check_utf8
) return 0;
365 if (!codepage
) return 0;
366 if (!wine_cp_get_table( codepage
)) return 0;
368 for (i
= 0; i
< str
->size
; i
++)
370 if ((unsigned char)str
->str
.cstr
[i
] >= 0xf5) goto done
;
371 if ((unsigned char)str
->str
.cstr
[i
] >= 0xc2) break;
372 if ((unsigned char)str
->str
.cstr
[i
] >= 0x80) goto done
;
374 if (i
== str
->size
) return 0; /* no 8-bit chars at all */
376 if (wine_utf8_mbstowcs( MB_ERR_INVALID_CHARS
, str
->str
.cstr
, str
->size
, NULL
, 0 ) >= 0) return 1;
379 check_utf8
= 0; /* at least one 8-bit non-utf8 string found, stop checking */
383 int check_unicode_conversion( const string_t
*str_a
, const string_t
*str_w
, int codepage
)
386 string_t
*teststr
= convert_string( str_w
, str_char
, codepage
);
388 ok
= (teststr
->size
== str_a
->size
&& !memcmp( teststr
->str
.cstr
, str_a
->str
.cstr
, str_a
->size
));
394 fprintf( stderr
, "Source: %s", str_a
->str
.cstr
);
395 for (i
= 0; i
< str_a
->size
; i
++)
396 fprintf( stderr
, " %02x", (unsigned char)str_a
->str
.cstr
[i
] );
397 fprintf( stderr
, "\nUnicode: " );
398 for (i
= 0; i
< str_w
->size
; i
++)
399 fprintf( stderr
, " %04x", str_w
->str
.wstr
[i
] );
400 fprintf( stderr
, "\nBack: %s", teststr
->str
.cstr
);
401 for (i
= 0; i
< teststr
->size
; i
++)
402 fprintf( stderr
, " %02x", (unsigned char)teststr
->str
.cstr
[i
] );
403 fprintf( stderr
, "\n" );
405 free_string( teststr
);
413 unsigned short sublang
;
417 /* language to codepage conversion table */
418 /* specific sublanguages need only be specified if their codepage */
419 /* differs from the default (SUBLANG_NEUTRAL) */
420 static const struct lang2cp lang2cps
[] =
422 { LANG_AFRIKAANS
, SUBLANG_NEUTRAL
, 1252 },
423 { LANG_ALBANIAN
, SUBLANG_NEUTRAL
, 1250 },
424 { LANG_ALSATIAN
, SUBLANG_NEUTRAL
, 1252 },
425 { LANG_AMHARIC
, SUBLANG_NEUTRAL
, 0 },
426 { LANG_ARABIC
, SUBLANG_NEUTRAL
, 1256 },
427 { LANG_ARMENIAN
, SUBLANG_NEUTRAL
, 0 },
428 { LANG_ASSAMESE
, SUBLANG_NEUTRAL
, 0 },
429 { LANG_AZERI
, SUBLANG_NEUTRAL
, 1254 },
430 { LANG_AZERI
, SUBLANG_AZERI_CYRILLIC
, 1251 },
431 { LANG_BASHKIR
, SUBLANG_NEUTRAL
, 1251 },
432 { LANG_BASQUE
, SUBLANG_NEUTRAL
, 1252 },
433 { LANG_BELARUSIAN
, SUBLANG_NEUTRAL
, 1251 },
434 { LANG_BENGALI
, SUBLANG_NEUTRAL
, 0 },
435 { LANG_BOSNIAN
, SUBLANG_NEUTRAL
, 1250 },
436 { LANG_BOSNIAN
, SUBLANG_BOSNIAN_BOSNIA_HERZEGOVINA_CYRILLIC
, 1251 },
437 { LANG_BRETON
, SUBLANG_NEUTRAL
, 1252 },
438 { LANG_BULGARIAN
, SUBLANG_NEUTRAL
, 1251 },
439 { LANG_CATALAN
, SUBLANG_NEUTRAL
, 1252 },
440 { LANG_CHINESE
, SUBLANG_NEUTRAL
, 950 },
441 { LANG_CHINESE
, SUBLANG_CHINESE_SIMPLIFIED
, 936 },
442 { LANG_CHINESE
, SUBLANG_CHINESE_SINGAPORE
, 936 },
444 { LANG_CORNISH
, SUBLANG_NEUTRAL
, 1252 },
445 #endif /* LANG_CORNISH */
446 { LANG_CORSICAN
, SUBLANG_NEUTRAL
, 1252 },
447 { LANG_CROATIAN
, SUBLANG_NEUTRAL
, 1250 },
448 { LANG_CZECH
, SUBLANG_NEUTRAL
, 1250 },
449 { LANG_DANISH
, SUBLANG_NEUTRAL
, 1252 },
450 { LANG_DARI
, SUBLANG_NEUTRAL
, 1256 },
451 { LANG_DIVEHI
, SUBLANG_NEUTRAL
, 0 },
452 { LANG_DUTCH
, SUBLANG_NEUTRAL
, 1252 },
453 { LANG_ENGLISH
, SUBLANG_NEUTRAL
, 1252 },
454 #ifdef LANG_ESPERANTO
455 { LANG_ESPERANTO
, SUBLANG_NEUTRAL
, 1252 },
456 #endif /* LANG_ESPERANTO */
457 { LANG_ESTONIAN
, SUBLANG_NEUTRAL
, 1257 },
458 { LANG_FAEROESE
, SUBLANG_NEUTRAL
, 1252 },
459 { LANG_FILIPINO
, SUBLANG_NEUTRAL
, 1252 },
460 { LANG_FINNISH
, SUBLANG_NEUTRAL
, 1252 },
461 { LANG_FRENCH
, SUBLANG_NEUTRAL
, 1252 },
462 { LANG_FRISIAN
, SUBLANG_NEUTRAL
, 1252 },
463 #ifdef LANG_MANX_GAELIC
464 { LANG_MANX_GAELIC
, SUBLANG_NEUTRAL
, 1252 },
465 #endif /* LANG_MANX_GAELIC */
466 { LANG_GALICIAN
, SUBLANG_NEUTRAL
, 1252 },
467 { LANG_GEORGIAN
, SUBLANG_NEUTRAL
, 0 },
468 { LANG_GERMAN
, SUBLANG_NEUTRAL
, 1252 },
469 { LANG_GREEK
, SUBLANG_NEUTRAL
, 1253 },
470 { LANG_GREENLANDIC
, SUBLANG_NEUTRAL
, 1252 },
471 { LANG_GUJARATI
, SUBLANG_NEUTRAL
, 0 },
472 { LANG_HAUSA
, SUBLANG_NEUTRAL
, 1252 },
473 { LANG_HEBREW
, SUBLANG_NEUTRAL
, 1255 },
474 { LANG_HINDI
, SUBLANG_NEUTRAL
, 0 },
475 { LANG_HUNGARIAN
, SUBLANG_NEUTRAL
, 1250 },
476 { LANG_ICELANDIC
, SUBLANG_NEUTRAL
, 1252 },
477 { LANG_IGBO
, SUBLANG_NEUTRAL
, 1252 },
478 { LANG_INDONESIAN
, SUBLANG_NEUTRAL
, 1252 },
479 { LANG_INUKTITUT
, SUBLANG_NEUTRAL
, 0 },
480 { LANG_INUKTITUT
, SUBLANG_INUKTITUT_CANADA_LATIN
, 0 },
481 { LANG_INVARIANT
, SUBLANG_NEUTRAL
, 0 },
482 { LANG_IRISH
, SUBLANG_NEUTRAL
, 1252 },
483 { LANG_ITALIAN
, SUBLANG_NEUTRAL
, 1252 },
484 { LANG_JAPANESE
, SUBLANG_NEUTRAL
, 932 },
485 { LANG_KANNADA
, SUBLANG_NEUTRAL
, 0 },
486 { LANG_KAZAK
, SUBLANG_NEUTRAL
, 1251 },
487 { LANG_KHMER
, SUBLANG_NEUTRAL
, 0 },
488 { LANG_KICHE
, SUBLANG_NEUTRAL
, 1252 },
489 { LANG_KINYARWANDA
, SUBLANG_NEUTRAL
, 1252 },
490 { LANG_KONKANI
, SUBLANG_NEUTRAL
, 0 },
491 { LANG_KOREAN
, SUBLANG_NEUTRAL
, 949 },
492 { LANG_KYRGYZ
, SUBLANG_NEUTRAL
, 1251 },
493 { LANG_LAO
, SUBLANG_NEUTRAL
, 0 },
494 { LANG_LATVIAN
, SUBLANG_NEUTRAL
, 1257 },
495 { LANG_LITHUANIAN
, SUBLANG_NEUTRAL
, 1257 },
496 { LANG_LOWER_SORBIAN
, SUBLANG_NEUTRAL
, 1252 },
497 { LANG_LUXEMBOURGISH
, SUBLANG_NEUTRAL
, 1252 },
498 { LANG_MACEDONIAN
, SUBLANG_NEUTRAL
, 1251 },
499 { LANG_MALAY
, SUBLANG_NEUTRAL
, 1252 },
500 { LANG_MALAYALAM
, SUBLANG_NEUTRAL
, 0 },
501 { LANG_MALTESE
, SUBLANG_NEUTRAL
, 0 },
502 { LANG_MAORI
, SUBLANG_NEUTRAL
, 0 },
503 { LANG_MAPUDUNGUN
, SUBLANG_NEUTRAL
, 1252 },
504 { LANG_MARATHI
, SUBLANG_NEUTRAL
, 0 },
505 { LANG_MOHAWK
, SUBLANG_NEUTRAL
, 1252 },
506 { LANG_MONGOLIAN
, SUBLANG_NEUTRAL
, 1251 },
507 { LANG_NEPALI
, SUBLANG_NEUTRAL
, 0 },
508 { LANG_NEUTRAL
, SUBLANG_NEUTRAL
, 1252 },
509 { LANG_NORWEGIAN
, SUBLANG_NEUTRAL
, 1252 },
510 { LANG_OCCITAN
, SUBLANG_NEUTRAL
, 1252 },
511 { LANG_ORIYA
, SUBLANG_NEUTRAL
, 0 },
512 { LANG_PASHTO
, SUBLANG_NEUTRAL
, 0 },
513 { LANG_PERSIAN
, SUBLANG_NEUTRAL
, 1256 },
514 { LANG_POLISH
, SUBLANG_NEUTRAL
, 1250 },
515 { LANG_PORTUGUESE
, SUBLANG_NEUTRAL
, 1252 },
516 { LANG_PUNJABI
, SUBLANG_NEUTRAL
, 0 },
517 { LANG_QUECHUA
, SUBLANG_NEUTRAL
, 1252 },
518 { LANG_ROMANIAN
, SUBLANG_NEUTRAL
, 1250 },
519 { LANG_ROMANSH
, SUBLANG_NEUTRAL
, 1252 },
520 { LANG_RUSSIAN
, SUBLANG_NEUTRAL
, 1251 },
521 { LANG_SAMI
, SUBLANG_NEUTRAL
, 1252 },
522 { LANG_SANSKRIT
, SUBLANG_NEUTRAL
, 0 },
523 { LANG_SCOTTISH_GAELIC
,SUBLANG_NEUTRAL
, 1252 },
524 { LANG_SERBIAN
, SUBLANG_NEUTRAL
, 1250 },
525 { LANG_SERBIAN
, SUBLANG_SERBIAN_CYRILLIC
, 1251 },
526 { LANG_SINHALESE
, SUBLANG_NEUTRAL
, 0 },
527 { LANG_SLOVAK
, SUBLANG_NEUTRAL
, 1250 },
528 { LANG_SLOVENIAN
, SUBLANG_NEUTRAL
, 1250 },
529 { LANG_SOTHO
, SUBLANG_NEUTRAL
, 1252 },
530 { LANG_SPANISH
, SUBLANG_NEUTRAL
, 1252 },
531 { LANG_SWAHILI
, SUBLANG_NEUTRAL
, 1252 },
532 { LANG_SWEDISH
, SUBLANG_NEUTRAL
, 1252 },
533 { LANG_SYRIAC
, SUBLANG_NEUTRAL
, 0 },
534 { LANG_TAJIK
, SUBLANG_NEUTRAL
, 1251 },
535 { LANG_TAMAZIGHT
, SUBLANG_NEUTRAL
, 1252 },
536 { LANG_TAMIL
, SUBLANG_NEUTRAL
, 0 },
537 { LANG_TATAR
, SUBLANG_NEUTRAL
, 1251 },
538 { LANG_TELUGU
, SUBLANG_NEUTRAL
, 0 },
539 { LANG_THAI
, SUBLANG_NEUTRAL
, 874 },
540 { LANG_TIBETAN
, SUBLANG_NEUTRAL
, 0 },
541 { LANG_TSWANA
, SUBLANG_NEUTRAL
, 1252 },
542 { LANG_TURKISH
, SUBLANG_NEUTRAL
, 1254 },
543 { LANG_TURKMEN
, SUBLANG_NEUTRAL
, 1250 },
544 { LANG_UIGHUR
, SUBLANG_NEUTRAL
, 1256 },
545 { LANG_UKRAINIAN
, SUBLANG_NEUTRAL
, 1251 },
546 { LANG_UPPER_SORBIAN
, SUBLANG_NEUTRAL
, 1252 },
547 { LANG_URDU
, SUBLANG_NEUTRAL
, 1256 },
548 { LANG_UZBEK
, SUBLANG_NEUTRAL
, 1254 },
549 { LANG_UZBEK
, SUBLANG_UZBEK_CYRILLIC
, 1251 },
550 { LANG_VIETNAMESE
, SUBLANG_NEUTRAL
, 1258 },
552 { LANG_WALON
, SUBLANG_NEUTRAL
, 1252 },
553 #endif /* LANG_WALON */
554 { LANG_WELSH
, SUBLANG_NEUTRAL
, 1252 },
555 { LANG_WOLOF
, SUBLANG_NEUTRAL
, 1252 },
556 { LANG_XHOSA
, SUBLANG_NEUTRAL
, 1252 },
557 { LANG_YAKUT
, SUBLANG_NEUTRAL
, 1251 },
558 { LANG_YI
, SUBLANG_NEUTRAL
, 0 },
559 { LANG_YORUBA
, SUBLANG_NEUTRAL
, 1252 },
560 { LANG_ZULU
, SUBLANG_NEUTRAL
, 1252 }
563 int get_language_codepage( unsigned short lang
, unsigned short sublang
)
566 int cp
= -1, defcp
= -1;
568 for (i
= 0; i
< sizeof(lang2cps
)/sizeof(lang2cps
[0]); i
++)
570 if (lang2cps
[i
].lang
!= lang
) continue;
571 if (lang2cps
[i
].sublang
== sublang
)
576 if (lang2cps
[i
].sublang
== SUBLANG_NEUTRAL
) defcp
= lang2cps
[i
].cp
;
579 if (cp
== -1) cp
= defcp
;
580 assert( cp
<= 0 || wine_cp_get_table(cp
) );