1 /* winduni.c -- unicode support for the windres program.
2 Copyright (C) 1997-2015 Free Software Foundation, Inc.
3 Written by Ian Lance Taylor, Cygnus Support.
4 Rewritten by Kai Tietz, Onevision.
6 This file is part of GNU Binutils.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
24 /* This file contains unicode support routines for the windres
25 program. Ideally, we would have generic unicode support which
26 would work on all systems. However, we don't. Instead, on a
27 Windows host, we are prepared to call some Windows routines. This
28 means that we will generate different output on Windows and Unix
29 hosts, but that seems better than not really supporting unicode at
34 #include "libiberty.h" /* for xstrdup */
36 /* Must be include before windows.h and winnls.h. */
37 #if defined (_WIN32) || defined (__CYGWIN__)
42 #include "safe-ctype.h"
48 static rc_uint_type
wind_WideCharToMultiByte (rc_uint_type
, const unichar
*, char *, rc_uint_type
);
49 static rc_uint_type
wind_MultiByteToWideChar (rc_uint_type
, const char *, unichar
*, rc_uint_type
);
50 static int unichar_isascii (const unichar
*, rc_uint_type
);
52 /* Convert an ASCII string to a unicode string. We just copy it,
53 expanding chars to shorts, rather than doing something intelligent. */
55 #if !defined (_WIN32) && !defined (__CYGWIN__)
57 /* Codepages mapped. */
58 static local_iconv_map codepages
[] =
61 { 1, "WINDOWS-1252" },
64 { 775, "WINBALTRIM" },
71 { 874, "WINDOWS-874" },
76 { 1250, "WINDOWS-1250" },
77 { 1251, "WINDOWS-1251" },
78 { 1252, "WINDOWS-1252" },
79 { 1253, "WINDOWS-1253" },
80 { 1254, "WINDOWS-1254" },
81 { 1255, "WINDOWS-1255" },
82 { 1256, "WINDOWS-1256" },
83 { 1257, "WINDOWS-1257" },
84 { 1258, "WINDOWS-1258" },
87 { CP_UTF16
, "UTF-16LE" },
88 { (rc_uint_type
) -1, NULL
}
91 /* Languages supported. */
92 static const wind_language_t languages
[] =
94 { 0x0000, 437, 1252, "Neutral", "Neutral" },
95 { 0x0401, 864, 1256, "Arabic", "Saudi Arabia" }, { 0x0402, 866, 1251, "Bulgarian", "Bulgaria" },
96 { 0x0403, 850, 1252, "Catalan", "Spain" }, { 0x0404, 950, 950, "Chinese", "Taiwan" },
97 { 0x0405, 852, 1250, "Czech", "Czech Republic" }, { 0x0406, 850, 1252, "Danish", "Denmark" },
98 { 0x0407, 850, 1252, "German", "Germany" }, { 0x0408, 737, 1253, "Greek", "Greece" },
99 { 0x0409, 437, 1252, "English", "United States" }, { 0x040A, 850, 1252, "Spanish - Traditional Sort", "Spain" },
100 { 0x040B, 850, 1252, "Finnish", "Finland" }, { 0x040C, 850, 1252, "French", "France" },
101 { 0x040D, 862, 1255, "Hebrew", "Israel" }, { 0x040E, 852, 1250, "Hungarian", "Hungary" },
102 { 0x040F, 850, 1252, "Icelandic", "Iceland" }, { 0x0410, 850, 1252, "Italian", "Italy" },
103 { 0x0411, 932, 932, "Japanese", "Japan" }, { 0x0412, 949, 949, "Korean", "Korea (south)" },
104 { 0x0413, 850, 1252, "Dutch", "Netherlands" }, { 0x0414, 850, 1252, "Norwegian (Bokm\345l)", "Norway" },
105 { 0x0415, 852, 1250, "Polish", "Poland" }, { 0x0416, 850, 1252, "Portuguese", "Brazil" },
106 { 0x0418, 852, 1250, "Romanian", "Romania" }, { 0x0419, 866, 1251, "Russian", "Russia" },
107 { 0x041A, 852, 1250, "Croatian", "Croatia" }, { 0x041B, 852, 1250, "Slovak", "Slovakia" },
108 { 0x041C, 852, 1250, "Albanian", "Albania" }, { 0x041D, 850, 1252, "Swedish", "Sweden" },
109 { 0x041E, 874, 874, "Thai", "Thailand" }, { 0x041F, 857, 1254, "Turkish", "Turkey" },
110 { 0x0421, 850, 1252, "Indonesian", "Indonesia" }, { 0x0422, 866, 1251, "Ukrainian", "Ukraine" },
111 { 0x0423, 866, 1251, "Belarusian", "Belarus" }, { 0x0424, 852, 1250, "Slovene", "Slovenia" },
112 { 0x0425, 775, 1257, "Estonian", "Estonia" }, { 0x0426, 775, 1257, "Latvian", "Latvia" },
113 { 0x0427, 775, 1257, "Lithuanian", "Lithuania" },
114 { 0x0429, 864, 1256, "Arabic", "Farsi" }, { 0x042A,1258, 1258, "Vietnamese", "Vietnam" },
115 { 0x042D, 850, 1252, "Basque", "Spain" },
116 { 0x042F, 866, 1251, "Macedonian", "Former Yugoslav Republic of Macedonia" },
117 { 0x0436, 850, 1252, "Afrikaans", "South Africa" },
118 { 0x0438, 850, 1252, "Faroese", "Faroe Islands" },
119 { 0x043C, 437, 1252, "Irish", "Ireland" },
120 { 0x043E, 850, 1252, "Malay", "Malaysia" },
121 { 0x0801, 864, 1256, "Arabic", "Iraq" },
122 { 0x0804, 936, 936, "Chinese (People's republic of China)", "People's republic of China" },
123 { 0x0807, 850, 1252, "German", "Switzerland" },
124 { 0x0809, 850, 1252, "English", "United Kingdom" }, { 0x080A, 850, 1252, "Spanish", "Mexico" },
125 { 0x080C, 850, 1252, "French", "Belgium" },
126 { 0x0810, 850, 1252, "Italian", "Switzerland" },
127 { 0x0813, 850, 1252, "Dutch", "Belgium" }, { 0x0814, 850, 1252, "Norwegian (Nynorsk)", "Norway" },
128 { 0x0816, 850, 1252, "Portuguese", "Portugal" },
129 { 0x081A, 852, 1252, "Serbian (latin)", "Yugoslavia" },
130 { 0x081D, 850, 1252, "Swedish (Finland)", "Finland" },
131 { 0x0C01, 864, 1256, "Arabic", "Egypt" },
132 { 0x0C04, 950, 950, "Chinese", "Hong Kong" },
133 { 0x0C07, 850, 1252, "German", "Austria" },
134 { 0x0C09, 850, 1252, "English", "Australia" }, { 0x0C0A, 850, 1252, "Spanish - International Sort", "Spain" },
135 { 0x0C0C, 850, 1252, "French", "Canada"},
136 { 0x0C1A, 855, 1251, "Serbian (Cyrillic)", "Serbia" },
137 { 0x1001, 864, 1256, "Arabic", "Libya" },
138 { 0x1004, 936, 936, "Chinese", "Singapore" },
139 { 0x1007, 850, 1252, "German", "Luxembourg" },
140 { 0x1009, 850, 1252, "English", "Canada" },
141 { 0x100A, 850, 1252, "Spanish", "Guatemala" },
142 { 0x100C, 850, 1252, "French", "Switzerland" },
143 { 0x1401, 864, 1256, "Arabic", "Algeria" },
144 { 0x1407, 850, 1252, "German", "Liechtenstein" },
145 { 0x1409, 850, 1252, "English", "New Zealand" }, { 0x140A, 850, 1252, "Spanish", "Costa Rica" },
146 { 0x140C, 850, 1252, "French", "Luxembourg" },
147 { 0x1801, 864, 1256, "Arabic", "Morocco" },
148 { 0x1809, 850, 1252, "English", "Ireland" }, { 0x180A, 850, 1252, "Spanish", "Panama" },
149 { 0x180C, 850, 1252, "French", "Monaco" },
150 { 0x1C01, 864, 1256, "Arabic", "Tunisia" },
151 { 0x1C09, 437, 1252, "English", "South Africa" }, { 0x1C0A, 850, 1252, "Spanish", "Dominican Republic" },
152 { 0x2001, 864, 1256, "Arabic", "Oman" },
153 { 0x2009, 850, 1252, "English", "Jamaica" }, { 0x200A, 850, 1252, "Spanish", "Venezuela" },
154 { 0x2401, 864, 1256, "Arabic", "Yemen" },
155 { 0x2409, 850, 1252, "English", "Caribbean" }, { 0x240A, 850, 1252, "Spanish", "Colombia" },
156 { 0x2801, 864, 1256, "Arabic", "Syria" },
157 { 0x2809, 850, 1252, "English", "Belize" }, { 0x280A, 850, 1252, "Spanish", "Peru" },
158 { 0x2C01, 864, 1256, "Arabic", "Jordan" },
159 { 0x2C09, 437, 1252, "English", "Trinidad & Tobago" },{ 0x2C0A, 850, 1252, "Spanish", "Argentina" },
160 { 0x3001, 864, 1256, "Arabic", "Lebanon" },
161 { 0x3009, 437, 1252, "English", "Zimbabwe" }, { 0x300A, 850, 1252, "Spanish", "Ecuador" },
162 { 0x3401, 864, 1256, "Arabic", "Kuwait" },
163 { 0x3409, 437, 1252, "English", "Philippines" }, { 0x340A, 850, 1252, "Spanish", "Chile" },
164 { 0x3801, 864, 1256, "Arabic", "United Arab Emirates" },
165 { 0x380A, 850, 1252, "Spanish", "Uruguay" },
166 { 0x3C01, 864, 1256, "Arabic", "Bahrain" },
167 { 0x3C0A, 850, 1252, "Spanish", "Paraguay" },
168 { 0x4001, 864, 1256, "Arabic", "Qatar" },
169 { 0x400A, 850, 1252, "Spanish", "Bolivia" },
170 { 0x440A, 850, 1252, "Spanish", "El Salvador" },
171 { 0x480A, 850, 1252, "Spanish", "Honduras" },
172 { 0x4C0A, 850, 1252, "Spanish", "Nicaragua" },
173 { 0x500A, 850, 1252, "Spanish", "Puerto Rico" },
174 { (unsigned) -1, 0, 0, NULL
, NULL
}
179 /* Specifies the default codepage to be used for unicode
180 transformations. By default this is CP_ACP. */
181 rc_uint_type wind_default_codepage
= CP_ACP
;
183 /* Specifies the currently used codepage for unicode
184 transformations. By default this is CP_ACP. */
185 rc_uint_type wind_current_codepage
= CP_ACP
;
187 /* Convert an ASCII string to a unicode string. We just copy it,
188 expanding chars to shorts, rather than doing something intelligent. */
191 unicode_from_ascii (rc_uint_type
*length
, unichar
**unicode
, const char *ascii
)
193 unicode_from_codepage (length
, unicode
, ascii
, wind_current_codepage
);
196 /* Convert an ASCII string with length A_LENGTH to a unicode string. We just
197 copy it, expanding chars to shorts, rather than doing something intelligent.
198 This routine converts also \0 within a string. */
201 unicode_from_ascii_len (rc_uint_type
*length
, unichar
**unicode
, const char *ascii
, rc_uint_type a_length
)
204 rc_uint_type tlen
, elen
, idx
= 0;
215 /* Make sure we have zero terminated string. */
216 p
= tmp
= (char *) alloca (a_length
+ 1);
217 memcpy (tmp
, ascii
, a_length
);
230 /* Make room for one more character. */
231 utmp
= (unichar
*) res_alloc (sizeof (unichar
) * (idx
+ 1));
234 memcpy (utmp
, *unicode
, idx
* sizeof (unichar
));
244 elen
= wind_MultiByteToWideChar (wind_current_codepage
, p
, NULL
, 0);
247 utmp
= ((unichar
*) res_alloc (elen
+ sizeof (unichar
) * 2));
248 wind_MultiByteToWideChar (wind_current_codepage
, p
, utmp
, elen
);
249 elen
/= sizeof (unichar
);
254 /* Make room for one more character. */
255 utmp
= (unichar
*) res_alloc (sizeof (unichar
) * (idx
+ 1));
258 memcpy (utmp
, *unicode
, idx
* sizeof (unichar
));
261 utmp
[idx
++] = ((unichar
) *p
) & 0xff;
269 up
= (unichar
*) res_alloc (sizeof (unichar
) * (idx
+ elen
));
271 memcpy (up
, *unicode
, idx
* sizeof (unichar
));
275 memcpy (&up
[idx
], utmp
, sizeof (unichar
) * elen
);
284 /* Convert an unicode string to an ASCII string. We just copy it,
285 shrink shorts to chars, rather than doing something intelligent.
286 Shorts with not within the char range are replaced by '_'. */
289 ascii_from_unicode (rc_uint_type
*length
, const unichar
*unicode
, char **ascii
)
291 codepage_from_unicode (length
, unicode
, ascii
, wind_current_codepage
);
294 /* Print the unicode string UNICODE to the file E. LENGTH is the
295 number of characters to print, or -1 if we should print until the
296 end of the string. FIXME: On a Windows host, we should be calling
297 some Windows function, probably WideCharToMultiByte. */
300 unicode_print (FILE *e
, const unichar
*unicode
, rc_uint_type length
)
308 if ((bfd_signed_vma
) length
> 0)
313 if (ch
== 0 && (bfd_signed_vma
) length
< 0)
318 if ((ch
& 0x7f) == ch
)
324 else if (ISPRINT (ch
))
359 fprintf (e
, "\\%03o", (unsigned int) ch
);
364 else if ((ch
& 0xff) == ch
)
365 fprintf (e
, "\\%03o", (unsigned int) ch
);
367 fprintf (e
, "\\x%04x", (unsigned int) ch
);
371 /* Print a unicode string to a file. */
374 ascii_print (FILE *e
, const char *s
, rc_uint_type length
)
382 if ((bfd_signed_vma
) length
> 0)
387 if (ch
== 0 && (bfd_signed_vma
) length
< 0)
392 if ((ch
& 0x7f) == ch
)
398 else if (ISPRINT (ch
))
433 fprintf (e
, "\\%03o", (unsigned int) ch
);
439 fprintf (e
, "\\%03o", (unsigned int) ch
& 0xff);
444 unichar_len (const unichar
*unicode
)
449 while (unicode
[r
] != 0)
457 unichar_dup (const unichar
*unicode
)
464 for (len
= 0; unicode
[len
] != 0; ++len
)
467 r
= ((unichar
*) res_alloc (len
* sizeof (unichar
)));
468 memcpy (r
, unicode
, len
* sizeof (unichar
));
473 unichar_dup_uppercase (const unichar
*u
)
475 unichar
*r
= unichar_dup (u
);
481 for (i
= 0; r
[i
] != 0; ++i
)
483 if (r
[i
] >= 'a' && r
[i
] <= 'z')
490 unichar_isascii (const unichar
*u
, rc_uint_type len
)
494 if ((bfd_signed_vma
) len
< 0)
497 len
= (rc_uint_type
) unichar_len (u
);
502 for (i
= 0; i
< len
; i
++)
503 if ((u
[i
] & 0xff80) != 0)
509 unicode_print_quoted (FILE *e
, const unichar
*u
, rc_uint_type len
)
511 if (! unichar_isascii (u
, len
))
514 unicode_print (e
, u
, len
);
519 unicode_is_valid_codepage (rc_uint_type cp
)
521 if ((cp
& 0xffff) != cp
)
523 if (cp
== CP_UTF16
|| cp
== CP_ACP
)
526 #if !defined (_WIN32) && !defined (__CYGWIN__)
527 if (! wind_find_codepage_info (cp
))
531 return !! IsValidCodePage ((UINT
) cp
);
535 #if defined (_WIN32) || defined (__CYGWIN__)
537 #define max_cp_string_len 6
540 codepage_from_langid (unsigned short langid
)
542 char cp_string
[max_cp_string_len
];
545 memset (cp_string
, 0, max_cp_string_len
);
546 /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
547 but is unavailable on Win95. */
548 c
= GetLocaleInfoA (MAKELCID (langid
, SORT_DEFAULT
),
549 LOCALE_IDEFAULTANSICODEPAGE
,
550 cp_string
, max_cp_string_len
);
551 /* If codepage data for an LCID is not installed on users's system,
552 GetLocaleInfo returns an empty string. Fall back to system ANSI
556 return strtoul (cp_string
, 0, 10);
560 wincodepage_from_langid (unsigned short langid
)
562 char cp_string
[max_cp_string_len
];
565 memset (cp_string
, 0, max_cp_string_len
);
566 /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
567 but is unavailable on Win95. */
568 c
= GetLocaleInfoA (MAKELCID (langid
, SORT_DEFAULT
),
569 LOCALE_IDEFAULTCODEPAGE
,
570 cp_string
, max_cp_string_len
);
571 /* If codepage data for an LCID is not installed on users's system,
572 GetLocaleInfo returns an empty string. Fall back to system ANSI
576 return strtoul (cp_string
, 0, 10);
580 lang_from_langid (unsigned short langid
)
585 memset (cp_string
, 0, 261);
586 c
= GetLocaleInfoA (MAKELCID (langid
, SORT_DEFAULT
),
589 /* If codepage data for an LCID is not installed on users's system,
590 GetLocaleInfo returns an empty string. Fall back to system ANSI
593 strcpy (cp_string
, "Neutral");
594 return xstrdup (cp_string
);
598 country_from_langid (unsigned short langid
)
603 memset (cp_string
, 0, 261);
604 c
= GetLocaleInfoA (MAKELCID (langid
, SORT_DEFAULT
),
607 /* If codepage data for an LCID is not installed on users's system,
608 GetLocaleInfo returns an empty string. Fall back to system ANSI
611 strcpy (cp_string
, "Neutral");
612 return xstrdup (cp_string
);
617 const wind_language_t
*
618 wind_find_language_by_id (unsigned id
)
620 #if !defined (_WIN32) && !defined (__CYGWIN__)
625 for (i
= 0; languages
[i
].id
!= (unsigned) -1 && languages
[i
].id
!= id
; i
++)
627 if (languages
[i
].id
== id
)
628 return &languages
[i
];
631 static wind_language_t wl
;
634 wl
.doscp
= codepage_from_langid ((unsigned short) id
);
635 wl
.wincp
= wincodepage_from_langid ((unsigned short) id
);
636 wl
.name
= lang_from_langid ((unsigned short) id
);
637 wl
.country
= country_from_langid ((unsigned short) id
);
643 const local_iconv_map
*
644 wind_find_codepage_info (unsigned cp
)
646 #if !defined (_WIN32) && !defined (__CYGWIN__)
649 for (i
= 0; codepages
[i
].codepage
!= (rc_uint_type
) -1 && codepages
[i
].codepage
!= cp
; i
++)
651 if (codepages
[i
].codepage
== (rc_uint_type
) -1)
653 return &codepages
[i
];
655 static local_iconv_map lim
;
656 if (!unicode_is_valid_codepage (cp
))
664 /* Convert an Codepage string to a unicode string. */
667 unicode_from_codepage (rc_uint_type
*length
, unichar
**u
, const char *src
, rc_uint_type cp
)
671 len
= wind_MultiByteToWideChar (cp
, src
, NULL
, 0);
674 *u
= ((unichar
*) res_alloc (len
));
675 wind_MultiByteToWideChar (cp
, src
, *u
, len
);
677 /* Discount the trailing '/0'. If MultiByteToWideChar failed,
678 this will set *length to -1. */
679 len
-= sizeof (unichar
);
682 *length
= len
/ sizeof (unichar
);
685 /* Convert an unicode string to an codepage string. */
688 codepage_from_unicode (rc_uint_type
*length
, const unichar
*unicode
, char **ascii
, rc_uint_type cp
)
692 len
= wind_WideCharToMultiByte (cp
, unicode
, NULL
, 0);
695 *ascii
= (char *) res_alloc (len
* sizeof (char));
696 wind_WideCharToMultiByte (cp
, unicode
, *ascii
, len
);
698 /* Discount the trailing '/0'. If MultiByteToWideChar failed,
699 this will set *length to -1. */
706 #if defined (HAVE_ICONV) && !defined (_WIN32) && !defined (__CYGWIN__)
708 iconv_onechar (iconv_t cd
, ICONV_CONST
char *s
, char *d
, int d_len
, const char **n_s
, char **n_d
)
712 for (i
= 1; i
<= 32; i
++)
715 ICONV_CONST
char *tmp_s
= s
;
717 size_t s_left
= (size_t) i
;
718 size_t d_left
= (size_t) d_len
;
720 ret
= iconv (cd
, & tmp_s
, & s_left
, & tmp_d
, & d_left
);
722 if (ret
!= (size_t) -1)
734 wind_iconv_cp (rc_uint_type cp
)
736 const local_iconv_map
*lim
= wind_find_codepage_info (cp
);
740 return lim
->iconv_name
;
742 #endif /* HAVE_ICONV */
745 wind_MultiByteToWideChar (rc_uint_type cp
, const char *mb
,
746 unichar
*u
, rc_uint_type u_len
)
748 rc_uint_type ret
= 0;
750 #if defined (_WIN32) || defined (__CYGWIN__)
751 rc_uint_type conv_flags
= MB_PRECOMPOSED
;
753 /* MB_PRECOMPOSED is not allowed for UTF-7 or UTF-8.
754 MultiByteToWideChar will set the last error to
755 ERROR_INVALID_FLAGS if we do. */
756 if (cp
== CP_UTF8
|| cp
== CP_UTF7
)
759 ret
= (rc_uint_type
) MultiByteToWideChar (cp
, conv_flags
,
761 /* Convert to bytes. */
762 ret
*= sizeof (unichar
);
764 #elif defined (HAVE_ICONV)
768 const char *iconv_name
= wind_iconv_cp (cp
);
770 if (!mb
|| !iconv_name
)
772 iconv_t cd
= iconv_open ("UTF-16LE", iconv_name
);
777 const char *n_mb
= "";
781 iret
= iconv_onechar (cd
, (ICONV_CONST
char *) mb
, p_tmp
, 32, & n_mb
, & n_tmp
);
789 size_t l_tmp
= (size_t) (n_tmp
- p_tmp
);
793 if ((size_t) u_len
< l_tmp
)
795 memcpy (u
, tmp
, l_tmp
);
803 if (tmp
[0] == 0 && tmp
[1] == 0)
811 ret
= strlen (mb
) + 1;
812 ret
*= sizeof (unichar
);
813 if (u
!= NULL
&& u_len
!= 0)
817 *u
++ = ((unichar
) *mb
) & 0xff;
820 while (u_len
!= 0 && mb
[-1] != 0);
822 if (u
!= NULL
&& u_len
!= 0)
829 wind_WideCharToMultiByte (rc_uint_type cp
, const unichar
*u
, char *mb
, rc_uint_type mb_len
)
831 rc_uint_type ret
= 0;
832 #if defined (_WIN32) || defined (__CYGWIN__)
833 WINBOOL used_def
= FALSE
;
835 ret
= (rc_uint_type
) WideCharToMultiByte (cp
, 0, u
, -1, mb
, mb_len
,
837 #elif defined (HAVE_ICONV)
841 const char *iconv_name
= wind_iconv_cp (cp
);
843 if (!u
|| !iconv_name
)
845 iconv_t cd
= iconv_open (iconv_name
, "UTF-16LE");
850 const char *n_u
= "";
854 iret
= iconv_onechar (cd
, (ICONV_CONST
char *) u
, p_tmp
, 32, &n_u
, & n_tmp
);
862 size_t l_tmp
= (size_t) (n_tmp
- p_tmp
);
866 if ((size_t) mb_len
< l_tmp
)
868 memcpy (mb
, tmp
, l_tmp
);
878 u
= (const unichar
*) n_u
;
892 while (*u
!= 0 && mb_len
!= 0)
894 if (u
[0] == (u
[0] & 0x7f))