1 /****************************************************************************
3 * GNAT COMPILER COMPONENTS *
7 * C Implementation File *
9 * Copyright (C) 2010-2023, Free Software Foundation, Inc. *
11 * GNAT is free software; you can redistribute it and/or modify it under *
12 * terms of the GNU General Public License as published by the Free Soft- *
13 * ware Foundation; either version 3, or (at your option) any later ver- *
14 * sion. GNAT is distributed in the hope that it will be useful, but WITH- *
15 * OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY *
16 * or FITNESS FOR A PARTICULAR PURPOSE. *
18 * As a special exception under Section 7 of GPL version 3, you are granted *
19 * additional permissions described in the GCC Runtime Library Exception, *
20 * version 3.1, as published by the Free Software Foundation. *
22 * You should have received a copy of the GNU General Public License and *
23 * a copy of the GCC Runtime Library Exception along with this program; *
24 * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see *
25 * <http://www.gnu.org/licenses/>. *
27 * GNAT was originally developed by the GNAT team at New York University. *
28 * Extensive contributions were provided by Ada Core Technologies Inc. *
30 ****************************************************************************/
32 /* This file provides OS-dependent support for the Ada.Locales package. */
38 #define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
40 typedef char char4
[4];
42 /* Table containing equivalences between ISO_639_1 codes and their ISO_639_3
43 alpha-3 code plus their language name. */
45 static char* iso_639
[] =
48 "ab", "abk", "Abkhazian",
49 "ae", "ave", "Avestan",
50 "af", "afr", "Afrikaans",
52 "am", "amh", "Amharic",
53 "an", "arg", "Aragonese",
54 "ar", "ara", "Arabic",
55 "as", "asm", "Assamese",
56 "av", "ava", "Avaric",
57 "ay", "aym", "Aymara",
58 "az", "aze", "Azerbaijani",
60 "ba", "bak", "Bashkir",
61 "be", "bel", "Belarusian",
62 "bg", "bul", "Bulgarian",
63 "bi", "bis", "Bislama",
64 "bm", "bam", "Bambara",
65 "bn", "ben", "Bengali",
66 "bo", "bod", "Tibetan",
67 "br", "bre", "Breton",
68 "bs", "bos", "Bosnian",
70 "ca", "cat", "Catalan",
71 "ce", "che", "Chechen",
72 "ch", "cha", "Chamorro",
73 "co", "cos", "Corsican",
76 "cu", "chu", "Church Slavic",
77 "cv", "chv", "Chuvash",
80 "da", "dan", "Danish",
81 "de", "deu", "German",
82 "dv", "div", "Divehi",
83 "dz", "dzo", "Dzongkha",
86 "el", "ell", "Modern Greek",
87 "en", "eng", "English",
88 "eo", "epo", "Esperanto",
89 "es", "spa", "Spanish",
90 "et", "est", "Estonian",
91 "eu", "eus", "Basque",
93 "fa", "fas", "Persian",
95 "fi", "fin", "Finnish",
96 "fj", "fij", "Fijian",
97 "fo", "fao", "Faroese",
98 "fr", "fra", "French",
99 "fy", "fry", "Western Frisian",
101 "ga", "gle", "Irish",
102 "gd", "gla", "Scottish Gaelic",
103 "gl", "glg", "Galician",
104 "gn", "grn", "Guarani",
105 "gu", "guj", "Gujarati",
108 "ha", "hau", "Hausa",
109 "he", "heb", "Hebrew",
110 "hi", "hin", "Hindi",
111 "ho", "hmo", "Hiri Motu",
112 "hr", "hrv", "Croatian",
113 "ht", "hat", "Haitian",
114 "hu", "hun", "Hungarian",
115 "hy", "hye", "Armenian",
116 "hz", "her", "Herero",
118 "ia", "ina", "Interlingua",
119 "id", "ind", "Indonesian",
120 "ie", "ile", "Interlingue",
122 "ii", "iii", "Sichuan Yi",
123 "ik", "ipk", "Inupiaq",
125 "is", "isl", "Icelandic",
126 "it", "ita", "Italian",
127 "iu", "iku", "Inuktitut",
129 "ja", "jpn", "Japanese",
130 "jv", "jav", "Javanese",
132 "ka", "kat", "Georgian",
133 "kg", "kon", "Kongo",
134 "ki", "kik", "Kikuyu",
135 "kj", "kua", "Kuanyama",
136 "kk", "kaz", "Kazakh",
137 "kl", "kal", "Kalaallisut",
138 "km", "khm", "Central Khmer",
139 "kn", "kan", "Kannada",
140 "ko", "kor", "Korean",
141 "kr", "kau", "Kanuri",
142 "ks", "kas", "Kashmiri",
143 "ku", "kur", "Kurdish",
145 "kw", "cor", "Cornish",
146 "ky", "kir", "Kirghiz",
148 "la", "lat", "Latin",
149 "lb", "ltz", "Luxembourgish",
150 "lg", "lug", "Ganda",
151 "li", "lim", "Limburgan",
152 "ln", "lin", "Lingala",
154 "lt", "lit", "Lithuanian",
155 "lu", "lub", "Luba-Katanga",
156 "lv", "lav", "Latvian",
158 "mg", "mlg", "Malagasy",
159 "mh", "mah", "Marshallese",
160 "mi", "mri", "Maori",
161 "mk", "mkd", "Macedonian",
162 "ml", "mal", "Malayalam",
163 "mn", "mon", "Mongolian",
164 "mr", "mar", "Marathi",
165 "ms", "msa", "Malay",
166 "mt", "mlt", "Maltese",
167 "my", "mya", "Burmese",
169 "na", "nau", "Nauru",
170 "nb", "nob", "Norwegian Bokmal",
171 "nd", "nde", "North Ndebele",
172 "ne", "nep", "Nepali",
173 "ng", "ndo", "Ndonga",
174 "nl", "nld", "Dutch",
175 "nn", "nno", "Norwegian Nynorsk",
176 "no", "nor", "Norwegian",
177 "nr", "nbl", "South Ndebele",
178 "nv", "nav", "Navajo",
179 "ny", "nya", "Nyanja",
181 "oc", "oci", "Occitan",
182 "oj", "oji", "Ojibwa",
183 "om", "orm", "Oromo",
184 "or", "ori", "Oriya",
185 "os", "oss", "Ossetian",
187 "pa", "pan", "Panjabi",
189 "pl", "pol", "Polish",
190 "ps", "pus", "Pushto",
191 "pt", "por", "Portuguese",
193 "qu", "que", "Quechua",
195 "rm", "roh", "Romansh",
196 "rn", "run", "Rundi",
197 "ro", "ron", "Romanian",
198 "ru", "rus", "Russian",
199 "rw", "kin", "Kinyarwanda",
201 "sa", "san", "Sanskrit",
202 "sc", "srd", "Sardinian",
203 "sd", "snd", "Sindhi",
204 "se", "sme", "Northern Sami",
205 "sg", "sag", "Sango",
206 "sh", "hbs", "Serbo-Croatian",
207 "si", "sin", "Sinhala",
208 "sk", "slk", "Slovak",
209 "sl", "slv", "Slovenian",
210 "sm", "smo", "Samoan",
211 "sn", "sna", "Shona",
212 "so", "som", "Somali",
213 "sq", "sqi", "Albanian",
214 "sr", "srp", "Serbian",
215 "ss", "ssw", "Swati",
216 "st", "sot", "Southern Sotho",
217 "su", "sun", "Sundanese",
218 "sv", "swe", "Swedish",
219 "sw", "swa", "Swahili",
221 "ta", "tam", "Tamil",
222 "te", "tel", "Telugu",
223 "tg", "tgk", "Tajik",
225 "ti", "tir", "Tigrinya",
226 "tk", "tuk", "Turkmen",
227 "tl", "tgl", "Tagalog",
228 "tn", "tsn", "Tswana",
229 "to", "ton", "Tonga",
230 "tr", "tur", "Turkish",
231 "ts", "tso", "Tsonga",
232 "tt", "tat", "Tatar",
234 "ty", "tah", "Tahitian",
236 "ug", "uig", "Uighur",
237 "uk", "ukr", "Ukrainian",
239 "uz", "uzb", "Uzbek",
241 "ve", "ven", "Venda",
242 "vi", "vie", "Vietnamese",
243 "vo", "vol", "Volapuk",
245 "wa", "wln", "Walloon",
246 "wo", "wol", "Wolof",
248 "xh", "xho", "Xhosa",
250 "yi", "yid", "Yiddish",
251 "yo", "yor", "Yoruba",
253 "za", "zha", "Zhuang",
254 "zh", "zho", "Chinese",
258 /* Table containing equivalences between ISO_3166 alpha-2 codes and country
259 names. This table has several entries for codes that have several valid
262 static char* iso_3166
[] =
266 "AE", "United Arab Emirates",
268 "AG", "Antigua and Barbuda",
272 "AN", "Netherlands Antilles",
276 "AS", "American Samoa",
280 "AX", "Aland Islands",
283 "BA", "Bosnia and Herzegovina",
287 "BF", "Burkina Faso",
292 "BL", "Saint Barthélemy",
294 "BN", "Brunei Darussalam",
295 "BO", "Bolivia, Plurinational State of",
296 "BQ", "Bonaire, Sint Eustatius and Saba",
300 "BV", "Bouvet Island",
306 "CC", "Cocos (Keeling) Islands",
307 "CD", "Congo, Democratic Republic of the",
308 "CF", "Central African Republic",
311 "CI", "Côte d'Ivoire",
312 "CK", "Cook Islands",
316 "CN", "People’s Republic of China",
321 "CS", "Czechoslovakia",
325 "CX", "Christmas Island",
327 "CZ", "Czech Republic",
333 "DO", "Dominican Republic",
339 "EH", "Western Sahara",
346 "FK", "Falkland Islands (Malvinas)",
347 "FM", "Micronesia, Federated States of",
348 "FO", "Faroe Islands",
352 "GB", "United Kingdom",
353 "GB", "United-Kingdom",
356 "GB", "Great Britain",
359 "GF", "French Guiana",
367 "GQ", "Equatorial Guinea",
369 "GS", "South Georgia and the South Sandwich Islands",
372 "GW", "Guinea-Bissau",
377 "HM", "Heard Island and McDonald Islands",
388 "IO", "British Indian Ocean Territory",
391 "IR", "Iran, Islamic Republic of",
405 "KN", "Saint Kitts and Nevis",
406 "KP", "Korea, Democratic People's Republic of",
407 "KR", "Korea, Republic of",
409 "KY", "Cayman Islands",
412 "LA", "Lao People's Democratic Republic",
415 "LI", "Liechtenstein",
426 "MD", "Moldova, Republic of",
428 "MF", "Saint Martin",
430 "MH", "Marshall Islands",
436 "MP", "Northern Mariana Islands",
449 "NC", "New Caledonia",
451 "NF", "Norfolk Island",
467 "PF", "French Polynesia",
468 "PG", "Papua New Guinea",
472 "PM", "Saint Pierre and Miquelon",
475 "PS", "Palestine, State of",
485 "RU", "Russian Federation",
488 "SA", "Saudi Arabia",
489 "SB", "Solomon Islands",
494 "SH", "Saint Helena, Ascension and Tristan da Cunha",
496 "SJ", "Svalbard and Jan Mayen",
498 "SL", "Sierra Leone",
505 "SX", "Sint Maarten (Dutch part)",
506 "SY", "Syrian Arab Republic",
509 "TC", "Turks and Caicos Islands",
511 "TF", "French Southern Territories",
517 "TM", "Turkmenistan",
522 "TT", "Trinidad and Tobago",
525 "TW", "Taiwan, Province of China",
527 "TZ", "Tanzania, United Republic of",
531 "UM", "United States Minor Outlying Islands",
532 "US", "United States",
533 "US", "United States of America",
534 "US", "United-States",
538 "VA", "Holy See (Vatican City State)",
539 "VC", "Saint Vincent and the Grenadines",
541 "VE", "Venezuela, Bolivarian Republic of",
542 "VG", "Virgin Islands, British",
543 "VI", "Virgin Islands, U.S.",
546 "WF", "Wallis and Futuna",
553 "ZA", "South Africa",
558 /* Utility function to perform case insensitive string comparison. Returns 1
559 if both strings are equal and 0 otherwise. */
562 str_case_equals (const char *s1
, const char *s2
) {
563 while (*s1
!= '\0' && *s2
!= '\0' && tolower(*s1
) == tolower(*s2
)) {
568 return (*s1
== '\0') && (*s2
== '\0');
571 /* Utility function to copy length characters of a string. The target string
572 must have space to store the extra string null terminator. */
575 str_copy (char *target
, char *source
, int length
) {
576 for (; length
> 0; source
++, target
++, length
--) {
583 /* Utility function to search for the last byte of the lc_all string to be
584 processed. Required because in some targets (for example, AIX), the
585 string returned by setlocale() has duplicates. */
588 str_get_last_byte (char *lc_all
) {
589 char* first_space
= NULL
;
590 char* second_space
= NULL
;
591 char* last_byte
= NULL
;
594 /* Search for the 1st space (if any) */
595 while (*s1
!= ' ' && *s1
!= '\0')
604 /* Skip this space and search for the 2nd one (if available) */
606 while (*s1
!= ' ' && *s1
!= '\0')
615 /* Search for the last byte of lc_all */
621 /* Check if the two strings match */
623 int len1
= first_space
- lc_all
;
624 int len2
= second_space
- first_space
- 1;
628 char* p2
= first_space
+ 1;
630 /* Compare their contents */
631 while (*p1
== *p2
&& p2
!= second_space
) {
636 /* if the two strings match then update the last byte */
638 if (p2
== second_space
) {
639 last_byte
= first_space
;
649 /* Utility function to search in the iso_639_1 table for an iso-639-1 code;
650 returns the corresponding iso-639-3 code or NULL if not found. */
653 iso_639_1_to_639_3(char* iso_639_1_code
) {
654 int len
= ARRAY_SIZE (iso_639
);
658 for (j
=0; j
< len
/3; j
++) {
659 char* s1
= iso_639_1_code
;
662 if (s1
[0]==s2
[0] && s1
[1]==s2
[1]) {
673 /* Utility function to search in the iso_639_1 table for a language name;
674 returns the corresponding iso-639-3 code or NULL if not found. */
677 language_name_to_639_3(char* name
) {
678 int len
= ARRAY_SIZE (iso_639
);
683 for (j
=0; j
< len
/3; j
++) {
684 if (str_case_equals(name
, *p
)) {
695 /* Utility function to search in the iso_3166 table for a country name;
696 returns the corresponding iso-3166 code or NULL if not found. */
699 country_name_to_3166 (char* name
) {
700 int len
= ARRAY_SIZE (iso_3166
);
705 for (j
=0; j
< len
/2; j
++) {
706 if (str_case_equals(name
, *p
)) {
718 c_get_language_code needs to fill in the Alpha-3 encoding of the
719 language code (3 lowercase letters). That should be "und" if the
720 language is unknown. [see Ada.Locales]
723 c_get_language_code (char4 p
) {
724 char* Saved_Locale
= setlocale(LC_ALL
, NULL
);
725 char iso_639_3_code
[] = "und"; /* Language Unknown */
729 /* Get locales set in the environment */
731 setlocale(LC_ALL
, "");
732 lc_all
= setlocale(LC_ALL
, NULL
);
734 /* The string returned by setlocale has the following format:
736 language[_territory][.code-set][@modifier]
738 where language is an ISO 639 language code, territory is an ISO 3166
739 country code, and codeset is a character set or encoding identifier
740 like ISO-8859-1 or UTF-8.
743 if (lc_all
!= NULL
) {
747 /* Copy the language part (which may be an ISO-639-1 code, an ISO-639-3
748 code, or a language name) adding a string terminator */
750 while (*s
!= '_' && *s
!= '.' && *s
!= '@' && *s
!= '\0')
753 lang_length
= s
- lc_all
;
755 /* Handle conversion of ISO-639-1 to ISO-639-3 */
757 if (lang_length
== 2) {
761 /* Duplicate the ISO-639-1 code adding the null terminator required to
762 search for the equivalent ISO-639-3 code; we cannot just append the
763 null terminator since the pointer may reference non-writable memory.
766 str_copy(iso_639_1
, lc_all
, lang_length
);
767 to_iso_639_3
= iso_639_1_to_639_3(iso_639_1
);
770 str_copy(iso_639_3_code
, to_iso_639_3
, 3);
772 /* Copy the ISO-639-3 code (adding a null terminator) */
774 } else if (lang_length
== 3) {
775 str_copy(iso_639_3_code
, lc_all
, lang_length
);
777 /* Handle conversion of language name to ISO-639-3 */
779 } else if (lang_length
> 3) {
780 char name_copy
[lang_length
+ 1];
783 /* Duplicate the ISO-639-1 code adding the null terminator required to
784 search for the equivalent ISO-639-3 code; we cannot just append the
785 null terminator since the pointer may reference non-writable memory.
788 str_copy(name_copy
, lc_all
, lang_length
);
789 to_iso_639_3
= language_name_to_639_3(name_copy
);
792 str_copy(iso_639_3_code
, to_iso_639_3
, 3);
796 /* Copy out the computed ISO_639_3 code */
798 result
= iso_639_3_code
;
799 for (; *result
!= '\0'; p
++, result
++)
802 /* Restore the original locale settings */
804 setlocale(LC_ALL
, Saved_Locale
);
810 c_get_country_code needs to fill in the Alpha-2 encoding of the
811 country code (2 uppercase letters). That should be "ZZ" if the
812 country is unknown. [see Ada.Locales]
815 c_get_country_code (char4 p
) {
816 char* Saved_Locale
= setlocale(LC_ALL
, NULL
);
817 char iso_3166_code
[] = "ZZ"; /* Country Unknown */
821 /* Get locales set in the environment */
823 setlocale(LC_ALL
, "");
824 lc_all
= setlocale(LC_ALL
, NULL
);
826 /* The string returned by setlocale has the following format:
828 language[_territory][.code-set][@modifier]
830 where language is an ISO 639 language code, territory is an ISO 3166
831 country code, and codeset is a character set or encoding identifier
832 like ISO-8859-1 or UTF-8.
835 if (lc_all
!= NULL
) {
838 char* last_byte
= str_get_last_byte(lc_all
);
839 int country_length
= 0;
841 /* Search for the beginning of the country code */
844 while (*s1
!= '_' && *s1
!= '.' && *s1
!= '@' && s1
!= last_byte
)
851 while (*s2
!= '.' && *s2
!= '@' && s2
!= last_byte
)
854 country_length
= s2
- s1
;
856 if (country_length
== 2) {
857 str_copy(iso_3166_code
, s1
, country_length
);
859 /* setlocale returned us the country name */
861 } else if (country_length
> 3) {
862 char name_copy
[country_length
+ 1];
865 str_copy(name_copy
, s1
, country_length
);
866 to_3166
= country_name_to_3166(name_copy
);
869 str_copy(iso_3166_code
, to_3166
, 2);
874 /* Copy out the computed ISO_3166 code */
876 result
= iso_3166_code
;
877 for (; *result
!= '\0'; p
++, result
++)
880 /* Restore the original locale settings */
882 setlocale(LC_ALL
, Saved_Locale
);