gcc/ada/locales.c

   1 /****************************************************************************
   2  *                                                                          *
   3  *                         GNAT COMPILER COMPONENTS                         *
   4  *                                                                          *
   5  *                             L O C A L E S                                *
   6  *                                                                          *
   7  *                          C Implementation File                           *
   8  *                                                                          *
   9  *             Copyright (C) 2010-2018, Free Software Foundation, Inc.      *
  10  *                                                                          *
  11  * GNAT is free software;  you can  redistribute it  and/or modify it under *
  12  * terms of the  GNU General Public License as published  by the Free Soft- *
  13  * ware  Foundation;  either version 3,  or (at your option) any later ver- *
  14  * sion.  GNAT is distributed in the hope that it will be useful, but WITH- *
  15  * OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY *
  16  * or FITNESS FOR A PARTICULAR PURPOSE.                                     *
  17  *                                                                          *
  18  * As a special exception under Section 7 of GPL version 3, you are granted *
  19  * additional permissions described in the GCC Runtime Library Exception,   *
  20  * version 3.1, as published by the Free Software Foundation.               *
  21  *                                                                          *
  22  * You should have received a copy of the GNU General Public License and    *
  23  * a copy of the GCC Runtime Library Exception along with this program;     *
  24  * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    *
  25  * <http://www.gnu.org/licenses/>.                                          *
  26  *                                                                          *
  27  * GNAT was originally developed  by the GNAT team at  New York University. *
  28  * Extensive contributions were provided by Ada Core Technologies Inc.      *
  29  *                                                                          *
  30  ****************************************************************************/
  31
  32 /*  This file provides OS-dependent support for the Ada.Locales package.    */
  33
  34 #include <locale.h>
  35 #include <ctype.h>
  36 #include <stddef.h>
  37
  38 typedef char char4 [4];
  39
  40 /* Table containing equivalences between ISO_639_1 codes and their ISO_639_3
  41    alpha-3 code plus their language name. */
  42
  43 static char* iso_639[] =
  44 {
  45   "aa", "aar", "Afar",
  46   "ab", "abk", "Abkhazian",
  47   "ae", "ave", "Avestan",
  48   "af", "afr", "Afrikaans",
  49   "ak", "aka", "Akan",
  50   "am", "amh", "Amharic",
  51   "an", "arg", "Aragonese",
  52   "ar", "ara", "Arabic",
  53   "as", "asm", "Assamese",
  54   "av", "ava", "Avaric",
  55   "ay", "aym", "Aymara",
  56   "az", "aze", "Azerbaijani",
  57
  58   "ba", "bak", "Bashkir",
  59   "be", "bel", "Belarusian",
  60   "bg", "bul", "Bulgarian",
  61   "bi", "bis", "Bislama",
  62   "bm", "bam", "Bambara",
  63   "bn", "ben", "Bengali",
  64   "bo", "bod", "Tibetan",
  65   "br", "bre", "Breton",
  66   "bs", "bos", "Bosnian",
  67
  68   "ca", "cat", "Catalan",
  69   "ce", "che", "Chechen",
  70   "ch", "cha", "Chamorro",
  71   "co", "cos", "Corsican",
  72   "cr", "cre", "Cree",
  73   "cs", "ces", "Czech",
  74   "cu", "chu", "Church Slavic",
  75   "cv", "chv", "Chuvash",
  76   "cy", "cym", "Welsh",
  77
  78   "da", "dan", "Danish",
  79   "de", "deu", "German",
  80   "dv", "div", "Divehi",
  81   "dz", "dzo", "Dzongkha",
  82
  83   "ee", "ewe", "Ewe",
  84   "el", "ell", "Modern Greek",
  85   "en", "eng", "English",
  86   "eo", "epo", "Esperanto",
  87   "es", "spa", "Spanish",
  88   "et", "est", "Estonian",
  89   "eu", "eus", "Basque",
  90
  91   "fa", "fas", "Persian",
  92   "ff", "ful", "Fulah",
  93   "fi", "fin", "Finnish",
  94   "fj", "fij", "Fijian",
  95   "fo", "fao", "Faroese",
  96   "fr", "fra", "French",
  97   "fy", "fry", "Western Frisian",
  98
  99   "ga", "gle", "Irish",
 100   "gd", "gla", "Scottish Gaelic",
 101   "gl", "glg", "Galician",
 102   "gn", "grn", "Guarani",
 103   "gu", "guj", "Gujarati",
 104   "gv", "glv", "Manx",
 105
 106   "ha", "hau", "Hausa",
 107   "he", "heb", "Hebrew",
 108   "hi", "hin", "Hindi",
 109   "ho", "hmo", "Hiri Motu",
 110   "hr", "hrv", "Croatian",
 111   "ht", "hat", "Haitian",
 112   "hu", "hun", "Hungarian",
 113   "hy", "hye", "Armenian",
 114   "hz", "her", "Herero",
 115
 116   "ia", "ina", "Interlingua",
 117   "id", "ind", "Indonesian",
 118   "ie", "ile", "Interlingue",
 119   "ig", "ibo", "Igbo",
 120   "ii", "iii", "Sichuan Yi",
 121   "ik", "ipk", "Inupiaq",
 122   "io", "ido", "Ido",
 123   "is", "isl", "Icelandic",
 124   "it", "ita", "Italian",
 125   "iu", "iku", "Inuktitut",
 126
 127   "ja", "jpn", "Japanese",
 128   "jv", "jav", "Javanese",
 129
 130   "ka", "kat", "Georgian",
 131   "kg", "kon", "Kongo",
 132   "ki", "kik", "Kikuyu",
 133   "kj", "kua", "Kuanyama",
 134   "kk", "kaz", "Kazakh",
 135   "kl", "kal", "Kalaallisut",
 136   "km", "khm", "Central Khmer",
 137   "kn", "kan", "Kannada",
 138   "ko", "kor", "Korean",
 139   "kr", "kau", "Kanuri",
 140   "ks", "kas", "Kashmiri",
 141   "ku", "kur", "Kurdish",
 142   "kv", "kom", "Komi",
 143   "kw", "cor", "Cornish",
 144   "ky", "kir", "Kirghiz",
 145
 146   "la", "lat", "Latin",
 147   "lb", "ltz", "Luxembourgish",
 148   "lg", "lug", "Ganda",
 149   "li", "lim", "Limburgan",
 150   "ln", "lin", "Lingala",
 151   "lo", "lao", "Lao",
 152   "lt", "lit", "Lithuanian",
 153   "lu", "lub", "Luba-Katanga",
 154   "lv", "lav", "Latvian",
 155
 156   "mg", "mlg", "Malagasy",
 157   "mh", "mah", "Marshallese",
 158   "mi", "mri", "Maori",
 159   "mk", "mkd", "Macedonian",
 160   "ml", "mal", "Malayalam",
 161   "mn", "mon", "Mongolian",
 162   "mr", "mar", "Marathi",
 163   "ms", "msa", "Malay",
 164   "mt", "mlt", "Maltese",
 165   "my", "mya", "Burmese",
 166
 167   "na", "nau", "Nauru",
 168   "nb", "nob", "Norwegian Bokmal",
 169   "nd", "nde", "North Ndebele",
 170   "ne", "nep", "Nepali",
 171   "ng", "ndo", "Ndonga",
 172   "nl", "nld", "Dutch",
 173   "nn", "nno", "Norwegian Nynorsk",
 174   "no", "nor", "Norwegian",
 175   "nr", "nbl", "South Ndebele",
 176   "nv", "nav", "Navajo",
 177   "ny", "nya", "Nyanja",
 178
 179   "oc", "oci", "Occitan",
 180   "oj", "oji", "Ojibwa",
 181   "om", "orm", "Oromo",
 182   "or", "ori", "Oriya",
 183   "os", "oss", "Ossetian",
 184
 185   "pa", "pan", "Panjabi",
 186   "pi", "pli", "Pali",
 187   "pl", "pol", "Polish",
 188   "ps", "pus", "Pushto",
 189   "pt", "por", "Portuguese",
 190
 191   "qu", "que", "Quechua",
 192
 193   "rm", "roh", "Romansh",
 194   "rn", "run", "Rundi",
 195   "ro", "ron", "Romanian",
 196   "ru", "rus", "Russian",
 197   "rw", "kin", "Kinyarwanda",
 198
 199   "sa", "san", "Sanskrit",
 200   "sc", "srd", "Sardinian",
 201   "sd", "snd", "Sindhi",
 202   "se", "sme", "Northern Sami",
 203   "sg", "sag", "Sango",
 204   "sh", "hbs", "Serbo-Croatian",
 205   "si", "sin", "Sinhala",
 206   "sk", "slk", "Slovak",
 207   "sl", "slv", "Slovenian",
 208   "sm", "smo", "Samoan",
 209   "sn", "sna", "Shona",
 210   "so", "som", "Somali",
 211   "sq", "sqi", "Albanian",
 212   "sr", "srp", "Serbian",
 213   "ss", "ssw", "Swati",
 214   "st", "sot", "Southern Sotho",
 215   "su", "sun", "Sundanese",
 216   "sv", "swe", "Swedish",
 217   "sw", "swa", "Swahili",
 218
 219   "ta", "tam", "Tamil",
 220   "te", "tel", "Telugu",
 221   "tg", "tgk", "Tajik",
 222   "th", "tha", "Thai",
 223   "ti", "tir", "Tigrinya",
 224   "tk", "tuk", "Turkmen",
 225   "tl", "tgl", "Tagalog",
 226   "tn", "tsn", "Tswana",
 227   "to", "ton", "Tonga",
 228   "tr", "tur", "Turkish",
 229   "ts", "tso", "Tsonga",
 230   "tt", "tat", "Tatar",
 231   "tw", "twi", "Twi",
 232   "ty", "tah", "Tahitian",
 233
 234   "ug", "uig", "Uighur",
 235   "uk", "ukr", "Ukrainian",
 236   "ur", "urd", "Urdu",
 237   "uz", "uzb", "Uzbek",
 238
 239   "ve", "ven", "Venda",
 240   "vi", "vie", "Vietnamese",
 241   "vo", "vol", "Volapuk",
 242
 243   "wa", "wln", "Walloon",
 244   "wo", "wol", "Wolof",
 245
 246   "xh", "xho", "Xhosa",
 247
 248   "yi", "yid", "Yiddish",
 249   "yo", "yor", "Yoruba",
 250
 251   "za", "zha", "Zhuang",
 252   "zh", "zho", "Chinese",
 253   "zu", "zul", "Zulu"
 254 };
 255
 256 /* Table containing equivalences between ISO_3166 alpha-2 codes and country
 257    names. This table has several entries for codes that have several valid
 258    country names. */
 259
 260 static char* iso_3166[] =
 261 {
 262   "AU", "Australia",
 263   "AD", "Andorra",
 264   "AE", "United Arab Emirates",
 265   "AF", "Afghanistan",
 266   "AG", "Antigua and Barbuda",
 267   "AI", "Anguilla",
 268   "AL", "Albania",
 269   "AM", "Armenia",
 270   "AN", "Netherlands Antilles",
 271   "AO", "Angola",
 272   "AQ", "Antarctica",
 273   "AR", "Argentina",
 274   "AS", "American Samoa",
 275   "AT", "Austria",
 276   "AU", "Australia",
 277   "AW", "Aruba",
 278   "AX", "Aland Islands",
 279   "AZ", "Azerbaijan",
 280
 281   "BA", "Bosnia and Herzegovina",
 282   "BB", "Barbados",
 283   "BD", "Bangladesh",
 284   "BE", "Belgium",
 285   "BF", "Burkina Faso",
 286   "BG", "Bulgaria",
 287   "BH", "Bahrain",
 288   "BI", "Burundi",
 289   "BJ", "Benin",
 290   "BL", "Saint Barthélemy",
 291   "BM", "Bermuda",
 292   "BN", "Brunei Darussalam",
 293   "BO", "Bolivia, Plurinational State of",
 294   "BQ", "Bonaire, Sint Eustatius and Saba",
 295   "BR", "Brazil",
 296   "BS", "Bahamas",
 297   "BT", "Bhutan",
 298   "BV", "Bouvet Island",
 299   "BW", "Botswana",
 300   "BY", "Belarus",
 301   "BZ", "Belize",
 302
 303   "CA", "Canada",
 304   "CC", "Cocos (Keeling) Islands",
 305   "CD", "Congo, Democratic Republic of the",
 306   "CF", "Central African Republic",
 307   "CG", "Congo",
 308   "CH", "Switzerland",
 309   "CI", "Côte d'Ivoire",
 310   "CK", "Cook Islands",
 311   "CL", "Chile",
 312   "CM", "Cameroon",
 313   "CN", "China",
 314   "CN", "People’s Republic of China",
 315   "CN", "PR China",
 316   "CN", "PR-China",
 317   "CO", "Colombia",
 318   "CR", "Costa Rica",
 319   "CS", "Czechoslovakia",
 320   "CU", "Cuba",
 321   "CV", "Cape Verde",
 322   "CW", "Curaçao",
 323   "CX", "Christmas Island",
 324   "CY", "Cyprus",
 325   "CZ", "Czech Republic",
 326
 327   "DE", "Germany",
 328   "DJ", "Djibouti",
 329   "DK", "Denmark",
 330   "DM", "Dominica",
 331   "DO", "Dominican Republic",
 332   "DZ", "Algeria",
 333
 334   "EC", "Ecuador",
 335   "EE", "Estonia",
 336   "EG", "Egypt",
 337   "EH", "Western Sahara",
 338   "ER", "Eritrea",
 339   "ES", "Spain",
 340   "ET", "Ethiopia",
 341
 342   "FI", "Finland",
 343   "FG", "Fiji",
 344   "FK", "Falkland Islands (Malvinas)",
 345   "FM", "Micronesia, Federated States of",
 346   "FO", "Faroe Islands",
 347   "FR", "France",
 348
 349   "GA", "Gabon",
 350   "GB", "United Kingdom",
 351   "GB", "United-Kingdom",
 352   "GB", "England",
 353   "GB", "Britain",
 354   "GB", "Great Britain",
 355   "GD", "Grenada",
 356   "GE", "Georgia",
 357   "GF", "French Guiana",
 358   "GG", "Guernsey",
 359   "GH", "Ghana",
 360   "GI", "Gibraltar",
 361   "GL", "Greenland",
 362   "GM", "Gambia",
 363   "GN", "Guinea",
 364   "GP", "Guadeloupe",
 365   "GQ", "Equatorial Guinea",
 366   "GR", "Greece",
 367   "GS", "South Georgia and the South Sandwich Islands",
 368   "GT", "Guatemala",
 369   "GU", "Guam",
 370   "GW", "Guinea-Bissau",
 371   "GY", "Guyana",
 372
 373   "HK", "Hong Kong",
 374   "HK", "Hong-Kong",
 375   "HM", "Heard Island and McDonald Islands",
 376   "HN", "Honduras",
 377   "HR", "Croatia",
 378   "HT", "Haiti",
 379   "HU", "Hungary",
 380
 381   "ID", "Indonesia",
 382   "IE", "Ireland",
 383   "IL", "Israel",
 384   "IM", "Isle of Man",
 385   "IN", "India",
 386   "IO", "British Indian Ocean Territory",
 387   "IQ", "Iraq",
 388   "IR", "Iran",
 389   "IR", "Iran, Islamic Republic of",
 390   "IS", "Iceland",
 391   "IT", "Italy",
 392
 393   "JE", "Jersey",
 394   "JM", "Jamaica",
 395   "JO", "Jordan",
 396   "JP", "Japan",
 397
 398   "KE", "Kenya",
 399   "KG", "Kyrgyzstan",
 400   "KH", "Cambodia",
 401   "KI", "Kiribati",
 402   "KM", "Comoros",
 403   "KN", "Saint Kitts and Nevis",
 404   "KP", "Korea, Democratic People's Republic of",
 405   "KR", "Korea, Republic of",
 406   "KW", "Kuwait",
 407   "KY", "Cayman Islands",
 408   "KZ", "Kazakhstan",
 409
 410   "LA", "Lao People's Democratic Republic",
 411   "LB", "Lebanon",
 412   "LC", "Saint Lucia",
 413   "LI", "Liechtenstein",
 414   "LK", "Sri Lanka",
 415   "LR", "Liberia",
 416   "LS", "Lesotho",
 417   "LT", "Lithuania",
 418   "LU", "Luxembourg",
 419   "LV", "Latvia",
 420   "LY", "Libya",
 421
 422   "MA", "Morocco",
 423   "MC", "Monaco",
 424   "MD", "Moldova, Republic of",
 425   "ME", "Montenegro",
 426   "MF", "Saint Martin",
 427   "MG", "Madagascar",
 428   "MH", "Marshall Islands",
 429   "MK", "Macedonia",
 430   "ML", "Mali",
 431   "MM", "Myanmar",
 432   "MN", "Mongolia",
 433   "MO", "Macao",
 434   "MP", "Northern Mariana Islands",
 435   "MQ", "Martinique",
 436   "MR", "Mauritania",
 437   "MS", "Montserrat",
 438   "MT", "Malta",
 439   "MU", "Mauritius",
 440   "MV", "Maldives",
 441   "MW", "Malawi",
 442   "MX", "Mexico",
 443   "MY", "Malaysia",
 444   "MZ", "Mozambique",
 445
 446   "NA", "Namibia",
 447   "NC", "New Caledonia",
 448   "NE", "Niger",
 449   "NF", "Norfolk Island",
 450   "NG", "Nigeria",
 451   "NI", "Nicaragua",
 452   "NL", "Netherlands",
 453   "NL", "Holland",
 454   "NO", "Norway",
 455   "NP", "Nepal",
 456   "NR", "Nauru",
 457   "NU", "Niue",
 458   "NZ", "New Zealand",
 459   "NZ", "New-Zealand",
 460
 461   "OM", "Oman",
 462
 463   "PA", "Panama",
 464   "PE", "Peru",
 465   "PF", "French Polynesia",
 466   "PG", "Papua New Guinea",
 467   "PH", "Philippines",
 468   "PK", "Pakistan",
 469   "PL", "Poland",
 470   "PM", "Saint Pierre and Miquelon",
 471   "PN", "Pitcairn",
 472   "PR", "Puerto Rico",
 473   "PS", "Palestine, State of",
 474   "PT", "Portugal",
 475   "PW", "Palau",
 476   "PY", "Paraguay",
 477
 478   "QA", "Qatar",
 479
 480   "RE", "Réunion",
 481   "RO", "Romania",
 482   "RS", "Serbia",
 483   "RU", "Russian Federation",
 484   "RW", "Rwanda",
 485
 486   "SA", "Saudi Arabia",
 487   "SB", "Solomon Islands",
 488   "SC", "Seychelles",
 489   "SD", "Sudan",
 490   "SE", "Sweden",
 491   "SG", "Singapore",
 492   "SH", "Saint Helena, Ascension and Tristan da Cunha",
 493   "SI", "Slovenia",
 494   "SJ", "Svalbard and Jan Mayen",
 495   "SK", "Slovakia",
 496   "SL", "Sierra Leone",
 497   "SM", "San Marino",
 498   "SN", "Senegal",
 499   "SO", "Somalia",
 500   "SR", "Suriname",
 501   "SS", "South Sudan",
 502   "SV", "El Salvador",
 503   "SX", "Sint Maarten (Dutch part)",
 504   "SY", "Syrian Arab Republic",
 505   "SZ", "Swaziland",
 506
 507   "TC", "Turks and Caicos Islands",
 508   "TD", "Chad",
 509   "TF", "French Southern Territories",
 510   "TG", "Togo",
 511   "TH", "Thailand",
 512   "TJ", "Tajikistan",
 513   "TK", "Tokelau",
 514   "TL", "Timor-Leste",
 515   "TM", "Turkmenistan",
 516   "TN", "Tunisia",
 517   "TO", "Tonga",
 518   "TP", "East Timor",
 519   "TR", "Turkey",
 520   "TT", "Trinidad and Tobago",
 521   "TV", "Tuvalu",
 522   "TW", "Taiwan",
 523   "TW", "Taiwan, Province of China",
 524   "TZ", "Tanzania",
 525   "TZ", "Tanzania, United Republic of",
 526
 527   "UA", "Ukraine",
 528   "UG", "Uganda",
 529   "UM", "United States Minor Outlying Islands",
 530   "US", "United States",
 531   "US", "United States of America",
 532   "US", "United-States"
 533   "UY", "Uruguay",
 534   "UZ", "Uzbekistan",
 535
 536   "VA", "Holy See (Vatican City State)",
 537   "VC", "Saint Vincent and the Grenadines",
 538   "VE", "Venezuela",
 539   "VE", "Venezuela, Bolivarian Republic of",
 540   "VG", "Virgin Islands, British",
 541   "VI", "Virgin Islands, U.S.",
 542   "VN", "Viet Nam",
 543   "VU", "Vanuatu",
 544   "WF", "Wallis and Futuna",
 545   "WS", "Samoa",
 546
 547   "YE", "Yemen",
 548   "YT", "Mayotte",
 549   "YU", "Yugoslavia",
 550
 551   "ZA", "South Africa",
 552   "ZM", "Zambia",
 553   "ZW", "Zimbabwe"
 554 };
 555
 556 /* Utility function to perform case insensitive string comparison. Returns 1
 557    if both strings are equal and 0 otherwise. */
 558
 559 static int
 560 str_case_equals (const char *s1, const char *s2) {
 561   while (*s1 != '\0' && *s2 != '\0' && tolower(*s1) == tolower(*s2)) {
 562     s1++;
 563     s2++;
 564   }
 565
 566   return (*s1 == '\0') && (*s2 == '\0');
 567 }
 568
 569 /* Utility function to copy length characters of a string. The target string
 570    must have space to store the extra string null terminator. */
 571
 572 static void
 573 str_copy (char *target, char *source, int length) {
 574   for (; length > 0; source++, target++, length--) {
 575     *target = *source;
 576   }
 577
 578   *target = '\0';
 579 }
 580
 581 /* Utility function to search for the last byte of the lc_all string to be
 582    processed. Required because in some targets (for example, AIX), the
 583    string returned by setlocale() has duplicates. */
 584
 585 static char*
 586 str_get_last_byte (char *lc_all) {
 587   char* first_space = NULL;
 588   char* second_space = NULL;
 589   char* last_byte = NULL;
 590   char* s1 = lc_all;
 591
 592   /* Search for the 1st space (if any) */
 593   while (*s1 != ' ' && *s1 != '\0')
 594     s1++;
 595
 596   if (*s1 == '\0') {
 597     last_byte = s1;
 598
 599   } else {
 600     first_space = s1;
 601
 602     /* Skip this space and search for the 2nd one (if available) */
 603     s1++;
 604     while (*s1 != ' ' && *s1 != '\0')
 605       s1++;
 606
 607     if (*s1 == '\0') {
 608       last_byte = s1;
 609
 610     } else {
 611       second_space=s1;
 612
 613       /* Search for the last byte of lc_all */
 614       while (*s1 != '\0')
 615         s1++;
 616
 617       last_byte = s1;
 618
 619       /* Check if the two strings match */
 620       {
 621         int len1 = first_space - lc_all;
 622         int len2 = second_space - first_space - 1;
 623
 624         if (len1 == len2) {
 625           char* p1 = lc_all;
 626           char* p2 = first_space + 1;
 627
 628           /* Compare their contents */
 629           while (*p1 == *p2 && p2 != second_space) {
 630             p1++;
 631             p2++;
 632           }
 633
 634           /* if the two strings match then update the last byte */
 635
 636           if (p2 == second_space) {
 637             last_byte = first_space;
 638           }
 639         }
 640       }
 641     }
 642   }
 643
 644   return last_byte;
 645 }
 646
 647 /* Utility function to search in the iso_639_1 table for an iso-639-1 code;
 648    returns the corresponding iso-639-3 code or NULL if not found. */
 649
 650 static char*
 651 iso_639_1_to_639_3(char* iso_639_1_code) {
 652   int len = sizeof(iso_639)/sizeof(iso_639[0]);
 653   char **p = iso_639;
 654   int j;
 655
 656   for (j=0; j < len/3; j++) {
 657     char* s1 = iso_639_1_code;
 658     char* s2 = *p;
 659
 660     if (s1[0]==s2[0] && s1[1]==s2[1]) {
 661       p++;
 662       return *p;
 663     }
 664
 665     p = p + 3;
 666   }
 667
 668   return NULL;
 669 }
 670
 671 /* Utility function to search in the iso_639_1 table for a language name;
 672    returns the corresponding iso-639-3 code or NULL if not found. */
 673
 674 static char*
 675 language_name_to_639_3(char* name) {
 676   int len = sizeof(iso_639)/sizeof(iso_639[0]);
 677   char **p = iso_639;
 678   int j;
 679
 680   p = p + 2;
 681   for (j=0; j < len/3; j++) {
 682     if (str_case_equals(name, *p)) {
 683       p--;
 684       return *p;
 685     }
 686
 687     p = p + 3;
 688   }
 689
 690   return NULL;
 691 }
 692
 693 /* Utility function to search in the iso_3166 table for a country name;
 694    returns the corresponding iso-3166 code or NULL if not found. */
 695
 696 static char*
 697 country_name_to_3166 (char* name) {
 698   int len = sizeof(iso_3166)/sizeof(iso_3166[0]);
 699   char **p = iso_3166;
 700   int j;
 701
 702   p++;
 703   for (j=0; j < len/2; j++) {
 704     if (str_case_equals(name, *p)) {
 705       p--;
 706       return *p;
 707     }
 708
 709     p = p + 2;
 710   }
 711
 712   return NULL;
 713 }
 714
 715 /*
 716   c_get_language_code needs to fill in the Alpha-3 encoding of the
 717   language code (3 lowercase letters). That should be "und" if the
 718   language is unknown. [see Ada.Locales]
 719 */
 720 void
 721 c_get_language_code (char4 p) {
 722   char* Saved_Locale = setlocale(LC_ALL, NULL);
 723   char  iso_639_3_code[] = "und";                        /* Language Unknown */
 724   char* lc_all;
 725   char* result;
 726
 727   /* Get locales set in the environment */
 728
 729   setlocale(LC_ALL, "");
 730   lc_all = setlocale(LC_ALL, NULL);
 731
 732   /* The string returned by setlocale has the following format:
 733
 734            language[_territory][.code-set][@modifier]
 735
 736      where language is an ISO 639 language code, territory is an ISO 3166
 737      country code, and codeset is a character set or encoding identifier
 738      like ISO-8859-1 or UTF-8.
 739   */
 740
 741   if (lc_all != NULL) {
 742     char* s = lc_all;
 743     int lang_length = 0;
 744
 745     /* Copy the language part (which may be an ISO-639-1 code, an ISO-639-3
 746        code, or a language name) adding a string terminator */
 747
 748     while (*s != '_' && *s != '.' && *s != '@' && *s != '\0')
 749       s++;
 750
 751     lang_length = s - lc_all;
 752
 753     /* Handle conversion of ISO-639-1 to ISO-639-3 */
 754
 755     if (lang_length == 2) {
 756       char  iso_639_1[3];
 757       char* to_iso_639_3;
 758
 759       /* Duplicate the ISO-639-1 code adding the null terminator required to
 760          search for the equivalent ISO-639-3 code; we cannot just append the
 761          null terminator since the pointer may reference non-writable memory.
 762       */
 763
 764       str_copy(iso_639_1, lc_all, lang_length);
 765       to_iso_639_3 = iso_639_1_to_639_3(iso_639_1);
 766
 767       if (to_iso_639_3)
 768         str_copy(iso_639_3_code, to_iso_639_3, 3);
 769
 770     /* Copy the ISO-639-3 code (adding a null terminator) */
 771
 772     } else if (lang_length == 3) {
 773       str_copy(iso_639_3_code, lc_all, lang_length);
 774
 775     /* Handle conversion of language name to ISO-639-3 */
 776
 777     } else if (lang_length > 3) {
 778       char  name_copy[lang_length + 1];
 779       char* to_iso_639_3;
 780
 781       /* Duplicate the ISO-639-1 code adding the null terminator required to
 782          search for the equivalent ISO-639-3 code; we cannot just append the
 783          null terminator since the pointer may reference non-writable memory.
 784       */
 785
 786       str_copy(name_copy, lc_all, lang_length);
 787       to_iso_639_3 = language_name_to_639_3(name_copy);
 788
 789       if (to_iso_639_3)
 790         str_copy(iso_639_3_code, to_iso_639_3, 3);
 791     }
 792   }
 793
 794   /* Copy out the computed ISO_639_3 code */
 795
 796   result = iso_639_3_code;
 797   for (; *result != '\0'; p++, result++)
 798     *p = *result;
 799
 800   /* Restore the original locale settings */
 801
 802   setlocale(LC_ALL, Saved_Locale);
 803
 804   return;
 805 }
 806
 807 /*
 808   c_get_country_code needs to fill in the Alpha-2 encoding of the
 809   country code (2 uppercase letters). That should be "ZZ" if the
 810   country is unknown. [see Ada.Locales]
 811 */
 812 void
 813 c_get_country_code (char4 p) {
 814   char* Saved_Locale = setlocale(LC_ALL, NULL);
 815   char  iso_3166_code[] = "ZZ";                           /* Country Unknown */
 816   char* lc_all;
 817   char* result;
 818
 819   /* Get locales set in the environment */
 820
 821   setlocale(LC_ALL, "");
 822   lc_all = setlocale(LC_ALL, NULL);
 823
 824   /* The string returned by setlocale has the following format:
 825
 826            language[_territory][.code-set][@modifier]
 827
 828      where language is an ISO 639 language code, territory is an ISO 3166
 829      country code, and codeset is a character set or encoding identifier
 830      like ISO-8859-1 or UTF-8.
 831   */
 832
 833   if (lc_all != NULL) {
 834     char* s1 = lc_all;
 835     char* s2 = NULL;
 836     char* last_byte = str_get_last_byte(lc_all);
 837     int country_length = 0;
 838
 839     /* Search for the beginning of the country code */
 840
 841     s1 = lc_all;
 842     while (*s1 != '_' && *s1 != '.' && *s1 != '@' && s1 != last_byte)
 843       s1++;
 844
 845     if (*s1 == '_') {
 846       s1++;
 847       s2 = s1;
 848
 849       while (*s2 != '.' && *s2 != '@' && s2 != last_byte)
 850         s2++;
 851
 852       country_length = s2 - s1;
 853
 854       if (country_length == 2) {
 855         str_copy(iso_3166_code, s1, country_length);
 856
 857       /* setlocale returned us the country name */
 858
 859       } else if (country_length > 3) {
 860         char  name_copy[country_length + 1];
 861         char* to_3166;
 862
 863         str_copy(name_copy, s1, country_length);
 864         to_3166 = country_name_to_3166(name_copy);
 865
 866         if (to_3166)
 867           str_copy(iso_3166_code, to_3166, 2);
 868       }
 869     }
 870   }
 871
 872   /* Copy out the computed ISO_3166 code */
 873
 874   result = iso_3166_code;
 875   for (; *result != '\0'; p++, result++)
 876     *p = *result;
 877
 878   /* Restore the original locale settings */
 879
 880   setlocale(LC_ALL, Saved_Locale);
 881
 882   return;
 883 }