Update LOCAL_PATCHES after libsanitizer merge.
[official-gcc.git] / gcc / ada / locales.c
blobdd549e181dfaef103dcc320811f58ca3049b516a
1 /****************************************************************************
2 * *
3 * GNAT COMPILER COMPONENTS *
4 * *
5 * L O C A L E S *
6 * *
7 * C Implementation File *
8 * *
9 * Copyright (C) 2010-2018, Free Software Foundation, Inc. *
10 * *
11 * GNAT is free software; you can redistribute it and/or modify it under *
12 * terms of the GNU General Public License as published by the Free Soft- *
13 * ware Foundation; either version 3, or (at your option) any later ver- *
14 * sion. GNAT is distributed in the hope that it will be useful, but WITH- *
15 * OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY *
16 * or FITNESS FOR A PARTICULAR PURPOSE. *
17 * *
18 * As a special exception under Section 7 of GPL version 3, you are granted *
19 * additional permissions described in the GCC Runtime Library Exception, *
20 * version 3.1, as published by the Free Software Foundation. *
21 * *
22 * You should have received a copy of the GNU General Public License and *
23 * a copy of the GCC Runtime Library Exception along with this program; *
24 * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see *
25 * <http://www.gnu.org/licenses/>. *
26 * *
27 * GNAT was originally developed by the GNAT team at New York University. *
28 * Extensive contributions were provided by Ada Core Technologies Inc. *
29 * *
30 ****************************************************************************/
32 /* This file provides OS-dependent support for the Ada.Locales package. */
34 #include <locale.h>
35 #include <ctype.h>
36 #include <stddef.h>
38 typedef char char4 [4];
40 /* Table containing equivalences between ISO_639_1 codes and their ISO_639_3
41 alpha-3 code plus their language name. */
43 static char* iso_639[] =
45 "aa", "aar", "Afar",
46 "ab", "abk", "Abkhazian",
47 "ae", "ave", "Avestan",
48 "af", "afr", "Afrikaans",
49 "ak", "aka", "Akan",
50 "am", "amh", "Amharic",
51 "an", "arg", "Aragonese",
52 "ar", "ara", "Arabic",
53 "as", "asm", "Assamese",
54 "av", "ava", "Avaric",
55 "ay", "aym", "Aymara",
56 "az", "aze", "Azerbaijani",
58 "ba", "bak", "Bashkir",
59 "be", "bel", "Belarusian",
60 "bg", "bul", "Bulgarian",
61 "bi", "bis", "Bislama",
62 "bm", "bam", "Bambara",
63 "bn", "ben", "Bengali",
64 "bo", "bod", "Tibetan",
65 "br", "bre", "Breton",
66 "bs", "bos", "Bosnian",
68 "ca", "cat", "Catalan",
69 "ce", "che", "Chechen",
70 "ch", "cha", "Chamorro",
71 "co", "cos", "Corsican",
72 "cr", "cre", "Cree",
73 "cs", "ces", "Czech",
74 "cu", "chu", "Church Slavic",
75 "cv", "chv", "Chuvash",
76 "cy", "cym", "Welsh",
78 "da", "dan", "Danish",
79 "de", "deu", "German",
80 "dv", "div", "Divehi",
81 "dz", "dzo", "Dzongkha",
83 "ee", "ewe", "Ewe",
84 "el", "ell", "Modern Greek",
85 "en", "eng", "English",
86 "eo", "epo", "Esperanto",
87 "es", "spa", "Spanish",
88 "et", "est", "Estonian",
89 "eu", "eus", "Basque",
91 "fa", "fas", "Persian",
92 "ff", "ful", "Fulah",
93 "fi", "fin", "Finnish",
94 "fj", "fij", "Fijian",
95 "fo", "fao", "Faroese",
96 "fr", "fra", "French",
97 "fy", "fry", "Western Frisian",
99 "ga", "gle", "Irish",
100 "gd", "gla", "Scottish Gaelic",
101 "gl", "glg", "Galician",
102 "gn", "grn", "Guarani",
103 "gu", "guj", "Gujarati",
104 "gv", "glv", "Manx",
106 "ha", "hau", "Hausa",
107 "he", "heb", "Hebrew",
108 "hi", "hin", "Hindi",
109 "ho", "hmo", "Hiri Motu",
110 "hr", "hrv", "Croatian",
111 "ht", "hat", "Haitian",
112 "hu", "hun", "Hungarian",
113 "hy", "hye", "Armenian",
114 "hz", "her", "Herero",
116 "ia", "ina", "Interlingua",
117 "id", "ind", "Indonesian",
118 "ie", "ile", "Interlingue",
119 "ig", "ibo", "Igbo",
120 "ii", "iii", "Sichuan Yi",
121 "ik", "ipk", "Inupiaq",
122 "io", "ido", "Ido",
123 "is", "isl", "Icelandic",
124 "it", "ita", "Italian",
125 "iu", "iku", "Inuktitut",
127 "ja", "jpn", "Japanese",
128 "jv", "jav", "Javanese",
130 "ka", "kat", "Georgian",
131 "kg", "kon", "Kongo",
132 "ki", "kik", "Kikuyu",
133 "kj", "kua", "Kuanyama",
134 "kk", "kaz", "Kazakh",
135 "kl", "kal", "Kalaallisut",
136 "km", "khm", "Central Khmer",
137 "kn", "kan", "Kannada",
138 "ko", "kor", "Korean",
139 "kr", "kau", "Kanuri",
140 "ks", "kas", "Kashmiri",
141 "ku", "kur", "Kurdish",
142 "kv", "kom", "Komi",
143 "kw", "cor", "Cornish",
144 "ky", "kir", "Kirghiz",
146 "la", "lat", "Latin",
147 "lb", "ltz", "Luxembourgish",
148 "lg", "lug", "Ganda",
149 "li", "lim", "Limburgan",
150 "ln", "lin", "Lingala",
151 "lo", "lao", "Lao",
152 "lt", "lit", "Lithuanian",
153 "lu", "lub", "Luba-Katanga",
154 "lv", "lav", "Latvian",
156 "mg", "mlg", "Malagasy",
157 "mh", "mah", "Marshallese",
158 "mi", "mri", "Maori",
159 "mk", "mkd", "Macedonian",
160 "ml", "mal", "Malayalam",
161 "mn", "mon", "Mongolian",
162 "mr", "mar", "Marathi",
163 "ms", "msa", "Malay",
164 "mt", "mlt", "Maltese",
165 "my", "mya", "Burmese",
167 "na", "nau", "Nauru",
168 "nb", "nob", "Norwegian Bokmal",
169 "nd", "nde", "North Ndebele",
170 "ne", "nep", "Nepali",
171 "ng", "ndo", "Ndonga",
172 "nl", "nld", "Dutch",
173 "nn", "nno", "Norwegian Nynorsk",
174 "no", "nor", "Norwegian",
175 "nr", "nbl", "South Ndebele",
176 "nv", "nav", "Navajo",
177 "ny", "nya", "Nyanja",
179 "oc", "oci", "Occitan",
180 "oj", "oji", "Ojibwa",
181 "om", "orm", "Oromo",
182 "or", "ori", "Oriya",
183 "os", "oss", "Ossetian",
185 "pa", "pan", "Panjabi",
186 "pi", "pli", "Pali",
187 "pl", "pol", "Polish",
188 "ps", "pus", "Pushto",
189 "pt", "por", "Portuguese",
191 "qu", "que", "Quechua",
193 "rm", "roh", "Romansh",
194 "rn", "run", "Rundi",
195 "ro", "ron", "Romanian",
196 "ru", "rus", "Russian",
197 "rw", "kin", "Kinyarwanda",
199 "sa", "san", "Sanskrit",
200 "sc", "srd", "Sardinian",
201 "sd", "snd", "Sindhi",
202 "se", "sme", "Northern Sami",
203 "sg", "sag", "Sango",
204 "sh", "hbs", "Serbo-Croatian",
205 "si", "sin", "Sinhala",
206 "sk", "slk", "Slovak",
207 "sl", "slv", "Slovenian",
208 "sm", "smo", "Samoan",
209 "sn", "sna", "Shona",
210 "so", "som", "Somali",
211 "sq", "sqi", "Albanian",
212 "sr", "srp", "Serbian",
213 "ss", "ssw", "Swati",
214 "st", "sot", "Southern Sotho",
215 "su", "sun", "Sundanese",
216 "sv", "swe", "Swedish",
217 "sw", "swa", "Swahili",
219 "ta", "tam", "Tamil",
220 "te", "tel", "Telugu",
221 "tg", "tgk", "Tajik",
222 "th", "tha", "Thai",
223 "ti", "tir", "Tigrinya",
224 "tk", "tuk", "Turkmen",
225 "tl", "tgl", "Tagalog",
226 "tn", "tsn", "Tswana",
227 "to", "ton", "Tonga",
228 "tr", "tur", "Turkish",
229 "ts", "tso", "Tsonga",
230 "tt", "tat", "Tatar",
231 "tw", "twi", "Twi",
232 "ty", "tah", "Tahitian",
234 "ug", "uig", "Uighur",
235 "uk", "ukr", "Ukrainian",
236 "ur", "urd", "Urdu",
237 "uz", "uzb", "Uzbek",
239 "ve", "ven", "Venda",
240 "vi", "vie", "Vietnamese",
241 "vo", "vol", "Volapuk",
243 "wa", "wln", "Walloon",
244 "wo", "wol", "Wolof",
246 "xh", "xho", "Xhosa",
248 "yi", "yid", "Yiddish",
249 "yo", "yor", "Yoruba",
251 "za", "zha", "Zhuang",
252 "zh", "zho", "Chinese",
253 "zu", "zul", "Zulu"
256 /* Table containing equivalences between ISO_3166 alpha-2 codes and country
257 names. This table has several entries for codes that have several valid
258 country names. */
260 static char* iso_3166[] =
262 "AU", "Australia",
263 "AD", "Andorra",
264 "AE", "United Arab Emirates",
265 "AF", "Afghanistan",
266 "AG", "Antigua and Barbuda",
267 "AI", "Anguilla",
268 "AL", "Albania",
269 "AM", "Armenia",
270 "AN", "Netherlands Antilles",
271 "AO", "Angola",
272 "AQ", "Antarctica",
273 "AR", "Argentina",
274 "AS", "American Samoa",
275 "AT", "Austria",
276 "AU", "Australia",
277 "AW", "Aruba",
278 "AX", "Aland Islands",
279 "AZ", "Azerbaijan",
281 "BA", "Bosnia and Herzegovina",
282 "BB", "Barbados",
283 "BD", "Bangladesh",
284 "BE", "Belgium",
285 "BF", "Burkina Faso",
286 "BG", "Bulgaria",
287 "BH", "Bahrain",
288 "BI", "Burundi",
289 "BJ", "Benin",
290 "BL", "Saint Barthélemy",
291 "BM", "Bermuda",
292 "BN", "Brunei Darussalam",
293 "BO", "Bolivia, Plurinational State of",
294 "BQ", "Bonaire, Sint Eustatius and Saba",
295 "BR", "Brazil",
296 "BS", "Bahamas",
297 "BT", "Bhutan",
298 "BV", "Bouvet Island",
299 "BW", "Botswana",
300 "BY", "Belarus",
301 "BZ", "Belize",
303 "CA", "Canada",
304 "CC", "Cocos (Keeling) Islands",
305 "CD", "Congo, Democratic Republic of the",
306 "CF", "Central African Republic",
307 "CG", "Congo",
308 "CH", "Switzerland",
309 "CI", "Côte d'Ivoire",
310 "CK", "Cook Islands",
311 "CL", "Chile",
312 "CM", "Cameroon",
313 "CN", "China",
314 "CN", "People’s Republic of China",
315 "CN", "PR China",
316 "CN", "PR-China",
317 "CO", "Colombia",
318 "CR", "Costa Rica",
319 "CS", "Czechoslovakia",
320 "CU", "Cuba",
321 "CV", "Cape Verde",
322 "CW", "Curaçao",
323 "CX", "Christmas Island",
324 "CY", "Cyprus",
325 "CZ", "Czech Republic",
327 "DE", "Germany",
328 "DJ", "Djibouti",
329 "DK", "Denmark",
330 "DM", "Dominica",
331 "DO", "Dominican Republic",
332 "DZ", "Algeria",
334 "EC", "Ecuador",
335 "EE", "Estonia",
336 "EG", "Egypt",
337 "EH", "Western Sahara",
338 "ER", "Eritrea",
339 "ES", "Spain",
340 "ET", "Ethiopia",
342 "FI", "Finland",
343 "FG", "Fiji",
344 "FK", "Falkland Islands (Malvinas)",
345 "FM", "Micronesia, Federated States of",
346 "FO", "Faroe Islands",
347 "FR", "France",
349 "GA", "Gabon",
350 "GB", "United Kingdom",
351 "GB", "United-Kingdom",
352 "GB", "England",
353 "GB", "Britain",
354 "GB", "Great Britain",
355 "GD", "Grenada",
356 "GE", "Georgia",
357 "GF", "French Guiana",
358 "GG", "Guernsey",
359 "GH", "Ghana",
360 "GI", "Gibraltar",
361 "GL", "Greenland",
362 "GM", "Gambia",
363 "GN", "Guinea",
364 "GP", "Guadeloupe",
365 "GQ", "Equatorial Guinea",
366 "GR", "Greece",
367 "GS", "South Georgia and the South Sandwich Islands",
368 "GT", "Guatemala",
369 "GU", "Guam",
370 "GW", "Guinea-Bissau",
371 "GY", "Guyana",
373 "HK", "Hong Kong",
374 "HK", "Hong-Kong",
375 "HM", "Heard Island and McDonald Islands",
376 "HN", "Honduras",
377 "HR", "Croatia",
378 "HT", "Haiti",
379 "HU", "Hungary",
381 "ID", "Indonesia",
382 "IE", "Ireland",
383 "IL", "Israel",
384 "IM", "Isle of Man",
385 "IN", "India",
386 "IO", "British Indian Ocean Territory",
387 "IQ", "Iraq",
388 "IR", "Iran",
389 "IR", "Iran, Islamic Republic of",
390 "IS", "Iceland",
391 "IT", "Italy",
393 "JE", "Jersey",
394 "JM", "Jamaica",
395 "JO", "Jordan",
396 "JP", "Japan",
398 "KE", "Kenya",
399 "KG", "Kyrgyzstan",
400 "KH", "Cambodia",
401 "KI", "Kiribati",
402 "KM", "Comoros",
403 "KN", "Saint Kitts and Nevis",
404 "KP", "Korea, Democratic People's Republic of",
405 "KR", "Korea, Republic of",
406 "KW", "Kuwait",
407 "KY", "Cayman Islands",
408 "KZ", "Kazakhstan",
410 "LA", "Lao People's Democratic Republic",
411 "LB", "Lebanon",
412 "LC", "Saint Lucia",
413 "LI", "Liechtenstein",
414 "LK", "Sri Lanka",
415 "LR", "Liberia",
416 "LS", "Lesotho",
417 "LT", "Lithuania",
418 "LU", "Luxembourg",
419 "LV", "Latvia",
420 "LY", "Libya",
422 "MA", "Morocco",
423 "MC", "Monaco",
424 "MD", "Moldova, Republic of",
425 "ME", "Montenegro",
426 "MF", "Saint Martin",
427 "MG", "Madagascar",
428 "MH", "Marshall Islands",
429 "MK", "Macedonia",
430 "ML", "Mali",
431 "MM", "Myanmar",
432 "MN", "Mongolia",
433 "MO", "Macao",
434 "MP", "Northern Mariana Islands",
435 "MQ", "Martinique",
436 "MR", "Mauritania",
437 "MS", "Montserrat",
438 "MT", "Malta",
439 "MU", "Mauritius",
440 "MV", "Maldives",
441 "MW", "Malawi",
442 "MX", "Mexico",
443 "MY", "Malaysia",
444 "MZ", "Mozambique",
446 "NA", "Namibia",
447 "NC", "New Caledonia",
448 "NE", "Niger",
449 "NF", "Norfolk Island",
450 "NG", "Nigeria",
451 "NI", "Nicaragua",
452 "NL", "Netherlands",
453 "NL", "Holland",
454 "NO", "Norway",
455 "NP", "Nepal",
456 "NR", "Nauru",
457 "NU", "Niue",
458 "NZ", "New Zealand",
459 "NZ", "New-Zealand",
461 "OM", "Oman",
463 "PA", "Panama",
464 "PE", "Peru",
465 "PF", "French Polynesia",
466 "PG", "Papua New Guinea",
467 "PH", "Philippines",
468 "PK", "Pakistan",
469 "PL", "Poland",
470 "PM", "Saint Pierre and Miquelon",
471 "PN", "Pitcairn",
472 "PR", "Puerto Rico",
473 "PS", "Palestine, State of",
474 "PT", "Portugal",
475 "PW", "Palau",
476 "PY", "Paraguay",
478 "QA", "Qatar",
480 "RE", "Réunion",
481 "RO", "Romania",
482 "RS", "Serbia",
483 "RU", "Russian Federation",
484 "RW", "Rwanda",
486 "SA", "Saudi Arabia",
487 "SB", "Solomon Islands",
488 "SC", "Seychelles",
489 "SD", "Sudan",
490 "SE", "Sweden",
491 "SG", "Singapore",
492 "SH", "Saint Helena, Ascension and Tristan da Cunha",
493 "SI", "Slovenia",
494 "SJ", "Svalbard and Jan Mayen",
495 "SK", "Slovakia",
496 "SL", "Sierra Leone",
497 "SM", "San Marino",
498 "SN", "Senegal",
499 "SO", "Somalia",
500 "SR", "Suriname",
501 "SS", "South Sudan",
502 "SV", "El Salvador",
503 "SX", "Sint Maarten (Dutch part)",
504 "SY", "Syrian Arab Republic",
505 "SZ", "Swaziland",
507 "TC", "Turks and Caicos Islands",
508 "TD", "Chad",
509 "TF", "French Southern Territories",
510 "TG", "Togo",
511 "TH", "Thailand",
512 "TJ", "Tajikistan",
513 "TK", "Tokelau",
514 "TL", "Timor-Leste",
515 "TM", "Turkmenistan",
516 "TN", "Tunisia",
517 "TO", "Tonga",
518 "TP", "East Timor",
519 "TR", "Turkey",
520 "TT", "Trinidad and Tobago",
521 "TV", "Tuvalu",
522 "TW", "Taiwan",
523 "TW", "Taiwan, Province of China",
524 "TZ", "Tanzania",
525 "TZ", "Tanzania, United Republic of",
527 "UA", "Ukraine",
528 "UG", "Uganda",
529 "UM", "United States Minor Outlying Islands",
530 "US", "United States",
531 "US", "United States of America",
532 "US", "United-States"
533 "UY", "Uruguay",
534 "UZ", "Uzbekistan",
536 "VA", "Holy See (Vatican City State)",
537 "VC", "Saint Vincent and the Grenadines",
538 "VE", "Venezuela",
539 "VE", "Venezuela, Bolivarian Republic of",
540 "VG", "Virgin Islands, British",
541 "VI", "Virgin Islands, U.S.",
542 "VN", "Viet Nam",
543 "VU", "Vanuatu",
544 "WF", "Wallis and Futuna",
545 "WS", "Samoa",
547 "YE", "Yemen",
548 "YT", "Mayotte",
549 "YU", "Yugoslavia",
551 "ZA", "South Africa",
552 "ZM", "Zambia",
553 "ZW", "Zimbabwe"
556 /* Utility function to perform case insensitive string comparison. Returns 1
557 if both strings are equal and 0 otherwise. */
559 static int
560 str_case_equals (const char *s1, const char *s2) {
561 while (*s1 != '\0' && *s2 != '\0' && tolower(*s1) == tolower(*s2)) {
562 s1++;
563 s2++;
566 return (*s1 == '\0') && (*s2 == '\0');
569 /* Utility function to copy length characters of a string. The target string
570 must have space to store the extra string null terminator. */
572 static void
573 str_copy (char *target, char *source, int length) {
574 for (; length > 0; source++, target++, length--) {
575 *target = *source;
578 *target = '\0';
581 /* Utility function to search for the last byte of the lc_all string to be
582 processed. Required because in some targets (for example, AIX), the
583 string returned by setlocale() has duplicates. */
585 static char*
586 str_get_last_byte (char *lc_all) {
587 char* first_space = NULL;
588 char* second_space = NULL;
589 char* last_byte = NULL;
590 char* s1 = lc_all;
592 /* Search for the 1st space (if any) */
593 while (*s1 != ' ' && *s1 != '\0')
594 s1++;
596 if (*s1 == '\0') {
597 last_byte = s1;
599 } else {
600 first_space = s1;
602 /* Skip this space and search for the 2nd one (if available) */
603 s1++;
604 while (*s1 != ' ' && *s1 != '\0')
605 s1++;
607 if (*s1 == '\0') {
608 last_byte = s1;
610 } else {
611 second_space=s1;
613 /* Search for the last byte of lc_all */
614 while (*s1 != '\0')
615 s1++;
617 last_byte = s1;
619 /* Check if the two strings match */
621 int len1 = first_space - lc_all;
622 int len2 = second_space - first_space - 1;
624 if (len1 == len2) {
625 char* p1 = lc_all;
626 char* p2 = first_space + 1;
628 /* Compare their contents */
629 while (*p1 == *p2 && p2 != second_space) {
630 p1++;
631 p2++;
634 /* if the two strings match then update the last byte */
636 if (p2 == second_space) {
637 last_byte = first_space;
644 return last_byte;
647 /* Utility function to search in the iso_639_1 table for an iso-639-1 code;
648 returns the corresponding iso-639-3 code or NULL if not found. */
650 static char*
651 iso_639_1_to_639_3(char* iso_639_1_code) {
652 int len = sizeof(iso_639)/sizeof(iso_639[0]);
653 char **p = iso_639;
654 int j;
656 for (j=0; j < len/3; j++) {
657 char* s1 = iso_639_1_code;
658 char* s2 = *p;
660 if (s1[0]==s2[0] && s1[1]==s2[1]) {
661 p++;
662 return *p;
665 p = p + 3;
668 return NULL;
671 /* Utility function to search in the iso_639_1 table for a language name;
672 returns the corresponding iso-639-3 code or NULL if not found. */
674 static char*
675 language_name_to_639_3(char* name) {
676 int len = sizeof(iso_639)/sizeof(iso_639[0]);
677 char **p = iso_639;
678 int j;
680 p = p + 2;
681 for (j=0; j < len/3; j++) {
682 if (str_case_equals(name, *p)) {
683 p--;
684 return *p;
687 p = p + 3;
690 return NULL;
693 /* Utility function to search in the iso_3166 table for a country name;
694 returns the corresponding iso-3166 code or NULL if not found. */
696 static char*
697 country_name_to_3166 (char* name) {
698 int len = sizeof(iso_3166)/sizeof(iso_3166[0]);
699 char **p = iso_3166;
700 int j;
702 p++;
703 for (j=0; j < len/2; j++) {
704 if (str_case_equals(name, *p)) {
705 p--;
706 return *p;
709 p = p + 2;
712 return NULL;
716 c_get_language_code needs to fill in the Alpha-3 encoding of the
717 language code (3 lowercase letters). That should be "und" if the
718 language is unknown. [see Ada.Locales]
720 void
721 c_get_language_code (char4 p) {
722 char* Saved_Locale = setlocale(LC_ALL, NULL);
723 char iso_639_3_code[] = "und"; /* Language Unknown */
724 char* lc_all;
725 char* result;
727 /* Get locales set in the environment */
729 setlocale(LC_ALL, "");
730 lc_all = setlocale(LC_ALL, NULL);
732 /* The string returned by setlocale has the following format:
734 language[_territory][.code-set][@modifier]
736 where language is an ISO 639 language code, territory is an ISO 3166
737 country code, and codeset is a character set or encoding identifier
738 like ISO-8859-1 or UTF-8.
741 if (lc_all != NULL) {
742 char* s = lc_all;
743 int lang_length = 0;
745 /* Copy the language part (which may be an ISO-639-1 code, an ISO-639-3
746 code, or a language name) adding a string terminator */
748 while (*s != '_' && *s != '.' && *s != '@' && *s != '\0')
749 s++;
751 lang_length = s - lc_all;
753 /* Handle conversion of ISO-639-1 to ISO-639-3 */
755 if (lang_length == 2) {
756 char iso_639_1[3];
757 char* to_iso_639_3;
759 /* Duplicate the ISO-639-1 code adding the null terminator required to
760 search for the equivalent ISO-639-3 code; we cannot just append the
761 null terminator since the pointer may reference non-writable memory.
764 str_copy(iso_639_1, lc_all, lang_length);
765 to_iso_639_3 = iso_639_1_to_639_3(iso_639_1);
767 if (to_iso_639_3)
768 str_copy(iso_639_3_code, to_iso_639_3, 3);
770 /* Copy the ISO-639-3 code (adding a null terminator) */
772 } else if (lang_length == 3) {
773 str_copy(iso_639_3_code, lc_all, lang_length);
775 /* Handle conversion of language name to ISO-639-3 */
777 } else if (lang_length > 3) {
778 char name_copy[lang_length + 1];
779 char* to_iso_639_3;
781 /* Duplicate the ISO-639-1 code adding the null terminator required to
782 search for the equivalent ISO-639-3 code; we cannot just append the
783 null terminator since the pointer may reference non-writable memory.
786 str_copy(name_copy, lc_all, lang_length);
787 to_iso_639_3 = language_name_to_639_3(name_copy);
789 if (to_iso_639_3)
790 str_copy(iso_639_3_code, to_iso_639_3, 3);
794 /* Copy out the computed ISO_639_3 code */
796 result = iso_639_3_code;
797 for (; *result != '\0'; p++, result++)
798 *p = *result;
800 /* Restore the original locale settings */
802 setlocale(LC_ALL, Saved_Locale);
804 return;
808 c_get_country_code needs to fill in the Alpha-2 encoding of the
809 country code (2 uppercase letters). That should be "ZZ" if the
810 country is unknown. [see Ada.Locales]
812 void
813 c_get_country_code (char4 p) {
814 char* Saved_Locale = setlocale(LC_ALL, NULL);
815 char iso_3166_code[] = "ZZ"; /* Country Unknown */
816 char* lc_all;
817 char* result;
819 /* Get locales set in the environment */
821 setlocale(LC_ALL, "");
822 lc_all = setlocale(LC_ALL, NULL);
824 /* The string returned by setlocale has the following format:
826 language[_territory][.code-set][@modifier]
828 where language is an ISO 639 language code, territory is an ISO 3166
829 country code, and codeset is a character set or encoding identifier
830 like ISO-8859-1 or UTF-8.
833 if (lc_all != NULL) {
834 char* s1 = lc_all;
835 char* s2 = NULL;
836 char* last_byte = str_get_last_byte(lc_all);
837 int country_length = 0;
839 /* Search for the beginning of the country code */
841 s1 = lc_all;
842 while (*s1 != '_' && *s1 != '.' && *s1 != '@' && s1 != last_byte)
843 s1++;
845 if (*s1 == '_') {
846 s1++;
847 s2 = s1;
849 while (*s2 != '.' && *s2 != '@' && s2 != last_byte)
850 s2++;
852 country_length = s2 - s1;
854 if (country_length == 2) {
855 str_copy(iso_3166_code, s1, country_length);
857 /* setlocale returned us the country name */
859 } else if (country_length > 3) {
860 char name_copy[country_length + 1];
861 char* to_3166;
863 str_copy(name_copy, s1, country_length);
864 to_3166 = country_name_to_3166(name_copy);
866 if (to_3166)
867 str_copy(iso_3166_code, to_3166, 2);
872 /* Copy out the computed ISO_3166 code */
874 result = iso_3166_code;
875 for (; *result != '\0'; p++, result++)
876 *p = *result;
878 /* Restore the original locale settings */
880 setlocale(LC_ALL, Saved_Locale);
882 return;