1 /* winduni.c -- unicode support for the windres program.
2 Copyright (C) 1997-2023 Free Software Foundation, Inc.
3 Written by Ian Lance Taylor, Cygnus Support.
4 Rewritten by Kai Tietz, Onevision.
6 This file is part of GNU Binutils.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
24 /* This file contains unicode support routines for the windres
25 program. Ideally, we would have generic unicode support which
26 would work on all systems. However, we don't. Instead, on a
27 Windows host, we are prepared to call some Windows routines. This
28 means that we will generate different output on Windows and Unix
29 hosts, but that seems better than not really supporting unicode at
34 #include "libiberty.h" /* for xstrdup */
36 /* Must be include before windows.h and winnls.h. */
37 #if defined (_WIN32) || defined (__CYGWIN__)
42 #include "safe-ctype.h"
48 static rc_uint_type
wind_WideCharToMultiByte (rc_uint_type
, const unichar
*, char *, rc_uint_type
);
49 static rc_uint_type
wind_MultiByteToWideChar (rc_uint_type
, const char *, unichar
*, rc_uint_type
);
50 static int unichar_isascii (const unichar
*, rc_uint_type
);
52 /* Convert an ASCII string to a unicode string. We just copy it,
53 expanding chars to shorts, rather than doing something intelligent. */
55 #if !defined (_WIN32) && !defined (__CYGWIN__)
57 /* Codepages mapped. */
58 static local_iconv_map codepages
[] =
61 { 1, "WINDOWS-1252" },
64 { 775, "WINBALTRIM" },
71 { 874, "WINDOWS-874" },
76 { 1250, "WINDOWS-1250" },
77 { 1251, "WINDOWS-1251" },
78 { 1252, "WINDOWS-1252" },
79 { 1253, "WINDOWS-1253" },
80 { 1254, "WINDOWS-1254" },
81 { 1255, "WINDOWS-1255" },
82 { 1256, "WINDOWS-1256" },
83 { 1257, "WINDOWS-1257" },
84 { 1258, "WINDOWS-1258" },
87 { CP_UTF16
, "UTF-16LE" },
88 { (rc_uint_type
) -1, NULL
}
91 /* Languages supported. */
92 static const wind_language_t languages
[] =
94 { 0x0000, 437, 1252, "Neutral", "Neutral" },
95 { 0x0401, 864, 1256, "Arabic", "Saudi Arabia" }, { 0x0402, 866, 1251, "Bulgarian", "Bulgaria" },
96 { 0x0403, 850, 1252, "Catalan", "Spain" }, { 0x0404, 950, 950, "Chinese", "Taiwan" },
97 { 0x0405, 852, 1250, "Czech", "Czech Republic" }, { 0x0406, 850, 1252, "Danish", "Denmark" },
98 { 0x0407, 850, 1252, "German", "Germany" }, { 0x0408, 737, 1253, "Greek", "Greece" },
99 { 0x0409, 437, 1252, "English", "United States" }, { 0x040A, 850, 1252, "Spanish - Traditional Sort", "Spain" },
100 { 0x040B, 850, 1252, "Finnish", "Finland" }, { 0x040C, 850, 1252, "French", "France" },
101 { 0x040D, 862, 1255, "Hebrew", "Israel" }, { 0x040E, 852, 1250, "Hungarian", "Hungary" },
102 { 0x040F, 850, 1252, "Icelandic", "Iceland" }, { 0x0410, 850, 1252, "Italian", "Italy" },
103 { 0x0411, 932, 932, "Japanese", "Japan" }, { 0x0412, 949, 949, "Korean", "Korea (south)" },
104 { 0x0413, 850, 1252, "Dutch", "Netherlands" }, { 0x0414, 850, 1252, "Norwegian (Bokm\345l)", "Norway" },
105 { 0x0415, 852, 1250, "Polish", "Poland" }, { 0x0416, 850, 1252, "Portuguese", "Brazil" },
106 { 0x0418, 852, 1250, "Romanian", "Romania" }, { 0x0419, 866, 1251, "Russian", "Russia" },
107 { 0x041A, 852, 1250, "Croatian", "Croatia" }, { 0x041B, 852, 1250, "Slovak", "Slovakia" },
108 { 0x041C, 852, 1250, "Albanian", "Albania" }, { 0x041D, 850, 1252, "Swedish", "Sweden" },
109 { 0x041E, 874, 874, "Thai", "Thailand" }, { 0x041F, 857, 1254, "Turkish", "Turkey" },
110 { 0x0421, 850, 1252, "Indonesian", "Indonesia" }, { 0x0422, 866, 1251, "Ukrainian", "Ukraine" },
111 { 0x0423, 866, 1251, "Belarusian", "Belarus" }, { 0x0424, 852, 1250, "Slovene", "Slovenia" },
112 { 0x0425, 775, 1257, "Estonian", "Estonia" }, { 0x0426, 775, 1257, "Latvian", "Latvia" },
113 { 0x0427, 775, 1257, "Lithuanian", "Lithuania" },
114 { 0x0429, 864, 1256, "Arabic", "Farsi" }, { 0x042A,1258, 1258, "Vietnamese", "Vietnam" },
115 { 0x042D, 850, 1252, "Basque", "Spain" },
116 { 0x042F, 866, 1251, "Macedonian", "Former Yugoslav Republic of Macedonia" },
117 { 0x0436, 850, 1252, "Afrikaans", "South Africa" },
118 { 0x0438, 850, 1252, "Faroese", "Faroe Islands" },
119 { 0x043C, 437, 1252, "Irish", "Ireland" },
120 { 0x043E, 850, 1252, "Malay", "Malaysia" },
121 { 0x0801, 864, 1256, "Arabic", "Iraq" },
122 { 0x0804, 936, 936, "Chinese (People's republic of China)", "People's republic of China" },
123 { 0x0807, 850, 1252, "German", "Switzerland" },
124 { 0x0809, 850, 1252, "English", "United Kingdom" }, { 0x080A, 850, 1252, "Spanish", "Mexico" },
125 { 0x080C, 850, 1252, "French", "Belgium" },
126 { 0x0810, 850, 1252, "Italian", "Switzerland" },
127 { 0x0813, 850, 1252, "Dutch", "Belgium" }, { 0x0814, 850, 1252, "Norwegian (Nynorsk)", "Norway" },
128 { 0x0816, 850, 1252, "Portuguese", "Portugal" },
129 { 0x081A, 852, 1252, "Serbian (latin)", "Yugoslavia" },
130 { 0x081D, 850, 1252, "Swedish (Finland)", "Finland" },
131 { 0x0C01, 864, 1256, "Arabic", "Egypt" },
132 { 0x0C04, 950, 950, "Chinese", "Hong Kong" },
133 { 0x0C07, 850, 1252, "German", "Austria" },
134 { 0x0C09, 850, 1252, "English", "Australia" }, { 0x0C0A, 850, 1252, "Spanish - International Sort", "Spain" },
135 { 0x0C0C, 850, 1252, "French", "Canada"},
136 { 0x0C1A, 855, 1251, "Serbian (Cyrillic)", "Serbia" },
137 { 0x1001, 864, 1256, "Arabic", "Libya" },
138 { 0x1004, 936, 936, "Chinese", "Singapore" },
139 { 0x1007, 850, 1252, "German", "Luxembourg" },
140 { 0x1009, 850, 1252, "English", "Canada" },
141 { 0x100A, 850, 1252, "Spanish", "Guatemala" },
142 { 0x100C, 850, 1252, "French", "Switzerland" },
143 { 0x1401, 864, 1256, "Arabic", "Algeria" },
144 { 0x1407, 850, 1252, "German", "Liechtenstein" },
145 { 0x1409, 850, 1252, "English", "New Zealand" }, { 0x140A, 850, 1252, "Spanish", "Costa Rica" },
146 { 0x140C, 850, 1252, "French", "Luxembourg" },
147 { 0x1801, 864, 1256, "Arabic", "Morocco" },
148 { 0x1809, 850, 1252, "English", "Ireland" }, { 0x180A, 850, 1252, "Spanish", "Panama" },
149 { 0x180C, 850, 1252, "French", "Monaco" },
150 { 0x1C01, 864, 1256, "Arabic", "Tunisia" },
151 { 0x1C09, 437, 1252, "English", "South Africa" }, { 0x1C0A, 850, 1252, "Spanish", "Dominican Republic" },
152 { 0x2001, 864, 1256, "Arabic", "Oman" },
153 { 0x2009, 850, 1252, "English", "Jamaica" }, { 0x200A, 850, 1252, "Spanish", "Venezuela" },
154 { 0x2401, 864, 1256, "Arabic", "Yemen" },
155 { 0x2409, 850, 1252, "English", "Caribbean" }, { 0x240A, 850, 1252, "Spanish", "Colombia" },
156 { 0x2801, 864, 1256, "Arabic", "Syria" },
157 { 0x2809, 850, 1252, "English", "Belize" }, { 0x280A, 850, 1252, "Spanish", "Peru" },
158 { 0x2C01, 864, 1256, "Arabic", "Jordan" },
159 { 0x2C09, 437, 1252, "English", "Trinidad & Tobago" },{ 0x2C0A, 850, 1252, "Spanish", "Argentina" },
160 { 0x3001, 864, 1256, "Arabic", "Lebanon" },
161 { 0x3009, 437, 1252, "English", "Zimbabwe" }, { 0x300A, 850, 1252, "Spanish", "Ecuador" },
162 { 0x3401, 864, 1256, "Arabic", "Kuwait" },
163 { 0x3409, 437, 1252, "English", "Philippines" }, { 0x340A, 850, 1252, "Spanish", "Chile" },
164 { 0x3801, 864, 1256, "Arabic", "United Arab Emirates" },
165 { 0x380A, 850, 1252, "Spanish", "Uruguay" },
166 { 0x3C01, 864, 1256, "Arabic", "Bahrain" },
167 { 0x3C0A, 850, 1252, "Spanish", "Paraguay" },
168 { 0x4001, 864, 1256, "Arabic", "Qatar" },
169 { 0x400A, 850, 1252, "Spanish", "Bolivia" },
170 { 0x440A, 850, 1252, "Spanish", "El Salvador" },
171 { 0x480A, 850, 1252, "Spanish", "Honduras" },
172 { 0x4C0A, 850, 1252, "Spanish", "Nicaragua" },
173 { 0x500A, 850, 1252, "Spanish", "Puerto Rico" },
174 { (unsigned) -1, 0, 0, NULL
, NULL
}
179 /* Specifies the default codepage to be used for unicode
180 transformations. By default this is CP_ACP. */
181 rc_uint_type wind_default_codepage
= CP_ACP
;
183 /* Specifies the currently used codepage for unicode
184 transformations. By default this is CP_ACP. */
185 rc_uint_type wind_current_codepage
= CP_ACP
;
187 /* Convert an ASCII string to a unicode string. We just copy it,
188 expanding chars to shorts, rather than doing something intelligent. */
191 unicode_from_ascii (rc_uint_type
*length
, unichar
**unicode
, const char *ascii
)
193 unicode_from_codepage (length
, unicode
, ascii
, wind_current_codepage
);
196 /* Convert an ASCII string with length A_LENGTH to a unicode string. We just
197 copy it, expanding chars to shorts, rather than doing something intelligent.
198 This routine converts also \0 within a string. */
201 unicode_from_ascii_len (rc_uint_type
*length
, unichar
**unicode
, const char *ascii
, rc_uint_type a_length
)
204 rc_uint_type tlen
, elen
, idx
= 0;
215 /* Make sure we have zero terminated string. */
216 p
= tmp
= (char *) xmalloc (a_length
+ 1);
217 memcpy (tmp
, ascii
, a_length
);
230 /* Make room for one more character. */
231 utmp
= (unichar
*) res_alloc (sizeof (unichar
) * (idx
+ 1));
234 memcpy (utmp
, *unicode
, idx
* sizeof (unichar
));
244 elen
= wind_MultiByteToWideChar (wind_current_codepage
, p
, NULL
, 0);
247 utmp
= ((unichar
*) res_alloc (elen
+ sizeof (unichar
) * 2));
248 wind_MultiByteToWideChar (wind_current_codepage
, p
, utmp
, elen
);
249 elen
/= sizeof (unichar
);
254 /* Make room for one more character. */
255 utmp
= (unichar
*) res_alloc (sizeof (unichar
) * (idx
+ 1));
258 memcpy (utmp
, *unicode
, idx
* sizeof (unichar
));
261 utmp
[idx
++] = ((unichar
) *p
) & 0xff;
269 up
= (unichar
*) res_alloc (sizeof (unichar
) * (idx
+ elen
));
271 memcpy (up
, *unicode
, idx
* sizeof (unichar
));
275 memcpy (&up
[idx
], utmp
, sizeof (unichar
) * elen
);
286 /* Convert an unicode string to an ASCII string. We just copy it,
287 shrink shorts to chars, rather than doing something intelligent.
288 Shorts with not within the char range are replaced by '_'. */
291 ascii_from_unicode (rc_uint_type
*length
, const unichar
*unicode
, char **ascii
)
293 codepage_from_unicode (length
, unicode
, ascii
, wind_current_codepage
);
296 /* Print the unicode string UNICODE to the file E. LENGTH is the
297 number of characters to print, or -1 if we should print until the
298 end of the string. FIXME: On a Windows host, we should be calling
299 some Windows function, probably WideCharToMultiByte. */
302 unicode_print (FILE *e
, const unichar
*unicode
, rc_uint_type length
)
310 if ((bfd_signed_vma
) length
> 0)
315 if (ch
== 0 && (bfd_signed_vma
) length
< 0)
320 if ((ch
& 0x7f) == ch
)
326 else if (ISPRINT (ch
))
361 fprintf (e
, "\\%03o", (unsigned int) ch
);
366 else if ((ch
& 0xff) == ch
)
367 fprintf (e
, "\\%03o", (unsigned int) ch
);
369 fprintf (e
, "\\x%04x", (unsigned int) ch
);
373 /* Print a unicode string to a file. */
376 ascii_print (FILE *e
, const char *s
, rc_uint_type length
)
384 if ((bfd_signed_vma
) length
> 0)
389 if (ch
== 0 && (bfd_signed_vma
) length
< 0)
394 if ((ch
& 0x7f) == ch
)
400 else if (ISPRINT (ch
))
435 fprintf (e
, "\\%03o", (unsigned int) ch
);
441 fprintf (e
, "\\%03o", (unsigned int) ch
& 0xff);
446 unichar_len (const unichar
*unicode
)
451 while (unicode
[r
] != 0)
459 unichar_dup (const unichar
*unicode
)
466 for (len
= 0; unicode
[len
] != 0; ++len
)
469 r
= ((unichar
*) res_alloc (len
* sizeof (unichar
)));
470 memcpy (r
, unicode
, len
* sizeof (unichar
));
475 unichar_dup_uppercase (const unichar
*u
)
477 unichar
*r
= unichar_dup (u
);
483 for (i
= 0; r
[i
] != 0; ++i
)
485 if (r
[i
] >= 'a' && r
[i
] <= 'z')
492 unichar_isascii (const unichar
*u
, rc_uint_type len
)
496 if ((bfd_signed_vma
) len
< 0)
499 len
= (rc_uint_type
) unichar_len (u
);
504 for (i
= 0; i
< len
; i
++)
505 if ((u
[i
] & 0xff80) != 0)
511 unicode_print_quoted (FILE *e
, const unichar
*u
, rc_uint_type len
)
513 if (! unichar_isascii (u
, len
))
516 unicode_print (e
, u
, len
);
521 unicode_is_valid_codepage (rc_uint_type cp
)
523 if ((cp
& 0xffff) != cp
)
525 if (cp
== CP_UTF16
|| cp
== CP_ACP
)
528 #if !defined (_WIN32) && !defined (__CYGWIN__)
529 if (! wind_find_codepage_info (cp
))
533 return !! IsValidCodePage ((UINT
) cp
);
537 #if defined (_WIN32) || defined (__CYGWIN__)
539 #define max_cp_string_len 6
542 codepage_from_langid (unsigned short langid
)
544 char cp_string
[max_cp_string_len
];
547 memset (cp_string
, 0, max_cp_string_len
);
548 /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
549 but is unavailable on Win95. */
550 c
= GetLocaleInfoA (MAKELCID (langid
, SORT_DEFAULT
),
551 LOCALE_IDEFAULTANSICODEPAGE
,
552 cp_string
, max_cp_string_len
);
553 /* If codepage data for an LCID is not installed on users's system,
554 GetLocaleInfo returns an empty string. Fall back to system ANSI
558 return strtoul (cp_string
, 0, 10);
562 wincodepage_from_langid (unsigned short langid
)
564 char cp_string
[max_cp_string_len
];
567 memset (cp_string
, 0, max_cp_string_len
);
568 /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
569 but is unavailable on Win95. */
570 c
= GetLocaleInfoA (MAKELCID (langid
, SORT_DEFAULT
),
571 LOCALE_IDEFAULTCODEPAGE
,
572 cp_string
, max_cp_string_len
);
573 /* If codepage data for an LCID is not installed on users's system,
574 GetLocaleInfo returns an empty string. Fall back to system ANSI
578 return strtoul (cp_string
, 0, 10);
582 lang_from_langid (unsigned short langid
)
587 memset (cp_string
, 0, 261);
588 c
= GetLocaleInfoA (MAKELCID (langid
, SORT_DEFAULT
),
591 /* If codepage data for an LCID is not installed on users's system,
592 GetLocaleInfo returns an empty string. Fall back to system ANSI
595 strcpy (cp_string
, "Neutral");
596 return xstrdup (cp_string
);
600 country_from_langid (unsigned short langid
)
605 memset (cp_string
, 0, 261);
606 c
= GetLocaleInfoA (MAKELCID (langid
, SORT_DEFAULT
),
609 /* If codepage data for an LCID is not installed on users's system,
610 GetLocaleInfo returns an empty string. Fall back to system ANSI
613 strcpy (cp_string
, "Neutral");
614 return xstrdup (cp_string
);
619 const wind_language_t
*
620 wind_find_language_by_id (unsigned id
)
622 #if !defined (_WIN32) && !defined (__CYGWIN__)
627 for (i
= 0; languages
[i
].id
!= (unsigned) -1 && languages
[i
].id
!= id
; i
++)
629 if (languages
[i
].id
== id
)
630 return &languages
[i
];
633 static wind_language_t wl
;
636 wl
.doscp
= codepage_from_langid ((unsigned short) id
);
637 wl
.wincp
= wincodepage_from_langid ((unsigned short) id
);
638 wl
.name
= lang_from_langid ((unsigned short) id
);
639 wl
.country
= country_from_langid ((unsigned short) id
);
645 const local_iconv_map
*
646 wind_find_codepage_info (unsigned cp
)
648 #if !defined (_WIN32) && !defined (__CYGWIN__)
651 for (i
= 0; codepages
[i
].codepage
!= (rc_uint_type
) -1 && codepages
[i
].codepage
!= cp
; i
++)
653 if (codepages
[i
].codepage
== (rc_uint_type
) -1)
655 return &codepages
[i
];
657 static local_iconv_map lim
;
658 if (!unicode_is_valid_codepage (cp
))
666 /* Convert an Codepage string to a unicode string. */
669 unicode_from_codepage (rc_uint_type
*length
, unichar
**u
, const char *src
, rc_uint_type cp
)
673 len
= wind_MultiByteToWideChar (cp
, src
, NULL
, 0);
676 *u
= ((unichar
*) res_alloc (len
));
677 wind_MultiByteToWideChar (cp
, src
, *u
, len
);
679 /* Discount the trailing '/0'. If MultiByteToWideChar failed,
680 this will set *length to -1. */
681 len
-= sizeof (unichar
);
684 *length
= len
/ sizeof (unichar
);
687 /* Convert an unicode string to an codepage string. */
690 codepage_from_unicode (rc_uint_type
*length
, const unichar
*unicode
, char **ascii
, rc_uint_type cp
)
694 len
= wind_WideCharToMultiByte (cp
, unicode
, NULL
, 0);
697 *ascii
= (char *) res_alloc (len
* sizeof (char));
698 wind_WideCharToMultiByte (cp
, unicode
, *ascii
, len
);
700 /* Discount the trailing '/0'. If MultiByteToWideChar failed,
701 this will set *length to -1. */
708 #if defined (HAVE_ICONV) && !defined (_WIN32) && !defined (__CYGWIN__)
710 iconv_onechar (iconv_t cd
, ICONV_CONST
char *s
, char *d
, int d_len
, const char **n_s
, char **n_d
)
714 for (i
= 1; i
<= 32; i
++)
717 ICONV_CONST
char *tmp_s
= s
;
719 size_t s_left
= (size_t) i
;
720 size_t d_left
= (size_t) d_len
;
722 ret
= iconv (cd
, & tmp_s
, & s_left
, & tmp_d
, & d_left
);
724 if (ret
!= (size_t) -1)
736 wind_iconv_cp (rc_uint_type cp
)
738 const local_iconv_map
*lim
= wind_find_codepage_info (cp
);
742 return lim
->iconv_name
;
744 #endif /* HAVE_ICONV */
747 wind_MultiByteToWideChar (rc_uint_type cp
, const char *mb
,
748 unichar
*u
, rc_uint_type u_len
)
750 rc_uint_type ret
= 0;
752 #if defined (_WIN32) || defined (__CYGWIN__)
753 rc_uint_type conv_flags
= MB_PRECOMPOSED
;
755 /* MB_PRECOMPOSED is not allowed for UTF-7 or UTF-8.
756 MultiByteToWideChar will set the last error to
757 ERROR_INVALID_FLAGS if we do. */
758 if (cp
== CP_UTF8
|| cp
== CP_UTF7
)
761 ret
= (rc_uint_type
) MultiByteToWideChar (cp
, conv_flags
,
763 /* Convert to bytes. */
764 ret
*= sizeof (unichar
);
766 #elif defined (HAVE_ICONV)
770 const char *iconv_name
= wind_iconv_cp (cp
);
772 if (!mb
|| !iconv_name
)
774 iconv_t cd
= iconv_open ("UTF-16LE", iconv_name
);
779 const char *n_mb
= "";
783 iret
= iconv_onechar (cd
, (ICONV_CONST
char *) mb
, p_tmp
, 32, & n_mb
, & n_tmp
);
791 size_t l_tmp
= (size_t) (n_tmp
- p_tmp
);
795 if ((size_t) u_len
< l_tmp
)
797 memcpy (u
, tmp
, l_tmp
);
805 if (tmp
[0] == 0 && tmp
[1] == 0)
813 ret
= strlen (mb
) + 1;
814 ret
*= sizeof (unichar
);
815 if (u
!= NULL
&& u_len
!= 0)
819 *u
++ = ((unichar
) *mb
) & 0xff;
822 while (u_len
!= 0 && mb
[-1] != 0);
824 if (u
!= NULL
&& u_len
!= 0)
831 wind_WideCharToMultiByte (rc_uint_type cp
, const unichar
*u
, char *mb
, rc_uint_type mb_len
)
833 rc_uint_type ret
= 0;
834 #if defined (_WIN32) || defined (__CYGWIN__)
835 WINBOOL used_def
= false;
837 ret
= (rc_uint_type
) WideCharToMultiByte (cp
, 0, u
, -1, mb
, mb_len
,
839 #elif defined (HAVE_ICONV)
843 const char *iconv_name
= wind_iconv_cp (cp
);
845 if (!u
|| !iconv_name
)
847 iconv_t cd
= iconv_open (iconv_name
, "UTF-16LE");
852 const char *n_u
= "";
856 iret
= iconv_onechar (cd
, (ICONV_CONST
char *) u
, p_tmp
, 32, &n_u
, & n_tmp
);
864 size_t l_tmp
= (size_t) (n_tmp
- p_tmp
);
868 if ((size_t) mb_len
< l_tmp
)
870 memcpy (mb
, tmp
, l_tmp
);
880 u
= (const unichar
*) n_u
;
894 while (*u
!= 0 && mb_len
!= 0)
896 if (u
[0] == (u
[0] & 0x7f))