gcc/ada/locales.c

   1 /****************************************************************************
   2  *                                                                          *
   3  *                         GNAT COMPILER COMPONENTS                         *
   4  *                                                                          *
   5  *                             L O C A L E S                                *
   6  *                                                                          *
   7  *                          C Implementation File                           *
   8  *                                                                          *
   9  *             Copyright (C) 2010-2023, Free Software Foundation, Inc.      *
  10  *                                                                          *
  11  * GNAT is free software;  you can  redistribute it  and/or modify it under *
  12  * terms of the  GNU General Public License as published  by the Free Soft- *
  13  * ware  Foundation;  either version 3,  or (at your option) any later ver- *
  14  * sion.  GNAT is distributed in the hope that it will be useful, but WITH- *
  15  * OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY *
  16  * or FITNESS FOR A PARTICULAR PURPOSE.                                     *
  17  *                                                                          *
  18  * As a special exception under Section 7 of GPL version 3, you are granted *
  19  * additional permissions described in the GCC Runtime Library Exception,   *
  20  * version 3.1, as published by the Free Software Foundation.               *
  21  *                                                                          *
  22  * You should have received a copy of the GNU General Public License and    *
  23  * a copy of the GCC Runtime Library Exception along with this program;     *
  24  * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    *
  25  * <http://www.gnu.org/licenses/>.                                          *
  26  *                                                                          *
  27  * GNAT was originally developed  by the GNAT team at  New York University. *
  28  * Extensive contributions were provided by Ada Core Technologies Inc.      *
  29  *                                                                          *
  30  ****************************************************************************/
  31
  32 /*  This file provides OS-dependent support for the Ada.Locales package.    */
  33
  34 #include <locale.h>
  35 #include <ctype.h>
  36 #include <stddef.h>
  37
  38 #define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
  39
  40 typedef char char4 [4];
  41
  42 /* Table containing equivalences between ISO_639_1 codes and their ISO_639_3
  43    alpha-3 code plus their language name. */
  44
  45 static char* iso_639[] =
  46 {
  47   "aa", "aar", "Afar",
  48   "ab", "abk", "Abkhazian",
  49   "ae", "ave", "Avestan",
  50   "af", "afr", "Afrikaans",
  51   "ak", "aka", "Akan",
  52   "am", "amh", "Amharic",
  53   "an", "arg", "Aragonese",
  54   "ar", "ara", "Arabic",
  55   "as", "asm", "Assamese",
  56   "av", "ava", "Avaric",
  57   "ay", "aym", "Aymara",
  58   "az", "aze", "Azerbaijani",
  59
  60   "ba", "bak", "Bashkir",
  61   "be", "bel", "Belarusian",
  62   "bg", "bul", "Bulgarian",
  63   "bi", "bis", "Bislama",
  64   "bm", "bam", "Bambara",
  65   "bn", "ben", "Bengali",
  66   "bo", "bod", "Tibetan",
  67   "br", "bre", "Breton",
  68   "bs", "bos", "Bosnian",
  69
  70   "ca", "cat", "Catalan",
  71   "ce", "che", "Chechen",
  72   "ch", "cha", "Chamorro",
  73   "co", "cos", "Corsican",
  74   "cr", "cre", "Cree",
  75   "cs", "ces", "Czech",
  76   "cu", "chu", "Church Slavic",
  77   "cv", "chv", "Chuvash",
  78   "cy", "cym", "Welsh",
  79
  80   "da", "dan", "Danish",
  81   "de", "deu", "German",
  82   "dv", "div", "Divehi",
  83   "dz", "dzo", "Dzongkha",
  84
  85   "ee", "ewe", "Ewe",
  86   "el", "ell", "Modern Greek",
  87   "en", "eng", "English",
  88   "eo", "epo", "Esperanto",
  89   "es", "spa", "Spanish",
  90   "et", "est", "Estonian",
  91   "eu", "eus", "Basque",
  92
  93   "fa", "fas", "Persian",
  94   "ff", "ful", "Fulah",
  95   "fi", "fin", "Finnish",
  96   "fj", "fij", "Fijian",
  97   "fo", "fao", "Faroese",
  98   "fr", "fra", "French",
  99   "fy", "fry", "Western Frisian",
 100
 101   "ga", "gle", "Irish",
 102   "gd", "gla", "Scottish Gaelic",
 103   "gl", "glg", "Galician",
 104   "gn", "grn", "Guarani",
 105   "gu", "guj", "Gujarati",
 106   "gv", "glv", "Manx",
 107
 108   "ha", "hau", "Hausa",
 109   "he", "heb", "Hebrew",
 110   "hi", "hin", "Hindi",
 111   "ho", "hmo", "Hiri Motu",
 112   "hr", "hrv", "Croatian",
 113   "ht", "hat", "Haitian",
 114   "hu", "hun", "Hungarian",
 115   "hy", "hye", "Armenian",
 116   "hz", "her", "Herero",
 117
 118   "ia", "ina", "Interlingua",
 119   "id", "ind", "Indonesian",
 120   "ie", "ile", "Interlingue",
 121   "ig", "ibo", "Igbo",
 122   "ii", "iii", "Sichuan Yi",
 123   "ik", "ipk", "Inupiaq",
 124   "io", "ido", "Ido",
 125   "is", "isl", "Icelandic",
 126   "it", "ita", "Italian",
 127   "iu", "iku", "Inuktitut",
 128
 129   "ja", "jpn", "Japanese",
 130   "jv", "jav", "Javanese",
 131
 132   "ka", "kat", "Georgian",
 133   "kg", "kon", "Kongo",
 134   "ki", "kik", "Kikuyu",
 135   "kj", "kua", "Kuanyama",
 136   "kk", "kaz", "Kazakh",
 137   "kl", "kal", "Kalaallisut",
 138   "km", "khm", "Central Khmer",
 139   "kn", "kan", "Kannada",
 140   "ko", "kor", "Korean",
 141   "kr", "kau", "Kanuri",
 142   "ks", "kas", "Kashmiri",
 143   "ku", "kur", "Kurdish",
 144   "kv", "kom", "Komi",
 145   "kw", "cor", "Cornish",
 146   "ky", "kir", "Kirghiz",
 147
 148   "la", "lat", "Latin",
 149   "lb", "ltz", "Luxembourgish",
 150   "lg", "lug", "Ganda",
 151   "li", "lim", "Limburgan",
 152   "ln", "lin", "Lingala",
 153   "lo", "lao", "Lao",
 154   "lt", "lit", "Lithuanian",
 155   "lu", "lub", "Luba-Katanga",
 156   "lv", "lav", "Latvian",
 157
 158   "mg", "mlg", "Malagasy",
 159   "mh", "mah", "Marshallese",
 160   "mi", "mri", "Maori",
 161   "mk", "mkd", "Macedonian",
 162   "ml", "mal", "Malayalam",
 163   "mn", "mon", "Mongolian",
 164   "mr", "mar", "Marathi",
 165   "ms", "msa", "Malay",
 166   "mt", "mlt", "Maltese",
 167   "my", "mya", "Burmese",
 168
 169   "na", "nau", "Nauru",
 170   "nb", "nob", "Norwegian Bokmal",
 171   "nd", "nde", "North Ndebele",
 172   "ne", "nep", "Nepali",
 173   "ng", "ndo", "Ndonga",
 174   "nl", "nld", "Dutch",
 175   "nn", "nno", "Norwegian Nynorsk",
 176   "no", "nor", "Norwegian",
 177   "nr", "nbl", "South Ndebele",
 178   "nv", "nav", "Navajo",
 179   "ny", "nya", "Nyanja",
 180
 181   "oc", "oci", "Occitan",
 182   "oj", "oji", "Ojibwa",
 183   "om", "orm", "Oromo",
 184   "or", "ori", "Oriya",
 185   "os", "oss", "Ossetian",
 186
 187   "pa", "pan", "Panjabi",
 188   "pi", "pli", "Pali",
 189   "pl", "pol", "Polish",
 190   "ps", "pus", "Pushto",
 191   "pt", "por", "Portuguese",
 192
 193   "qu", "que", "Quechua",
 194
 195   "rm", "roh", "Romansh",
 196   "rn", "run", "Rundi",
 197   "ro", "ron", "Romanian",
 198   "ru", "rus", "Russian",
 199   "rw", "kin", "Kinyarwanda",
 200
 201   "sa", "san", "Sanskrit",
 202   "sc", "srd", "Sardinian",
 203   "sd", "snd", "Sindhi",
 204   "se", "sme", "Northern Sami",
 205   "sg", "sag", "Sango",
 206   "sh", "hbs", "Serbo-Croatian",
 207   "si", "sin", "Sinhala",
 208   "sk", "slk", "Slovak",
 209   "sl", "slv", "Slovenian",
 210   "sm", "smo", "Samoan",
 211   "sn", "sna", "Shona",
 212   "so", "som", "Somali",
 213   "sq", "sqi", "Albanian",
 214   "sr", "srp", "Serbian",
 215   "ss", "ssw", "Swati",
 216   "st", "sot", "Southern Sotho",
 217   "su", "sun", "Sundanese",
 218   "sv", "swe", "Swedish",
 219   "sw", "swa", "Swahili",
 220
 221   "ta", "tam", "Tamil",
 222   "te", "tel", "Telugu",
 223   "tg", "tgk", "Tajik",
 224   "th", "tha", "Thai",
 225   "ti", "tir", "Tigrinya",
 226   "tk", "tuk", "Turkmen",
 227   "tl", "tgl", "Tagalog",
 228   "tn", "tsn", "Tswana",
 229   "to", "ton", "Tonga",
 230   "tr", "tur", "Turkish",
 231   "ts", "tso", "Tsonga",
 232   "tt", "tat", "Tatar",
 233   "tw", "twi", "Twi",
 234   "ty", "tah", "Tahitian",
 235
 236   "ug", "uig", "Uighur",
 237   "uk", "ukr", "Ukrainian",
 238   "ur", "urd", "Urdu",
 239   "uz", "uzb", "Uzbek",
 240
 241   "ve", "ven", "Venda",
 242   "vi", "vie", "Vietnamese",
 243   "vo", "vol", "Volapuk",
 244
 245   "wa", "wln", "Walloon",
 246   "wo", "wol", "Wolof",
 247
 248   "xh", "xho", "Xhosa",
 249
 250   "yi", "yid", "Yiddish",
 251   "yo", "yor", "Yoruba",
 252
 253   "za", "zha", "Zhuang",
 254   "zh", "zho", "Chinese",
 255   "zu", "zul", "Zulu"
 256 };
 257
 258 /* Table containing equivalences between ISO_3166 alpha-2 codes and country
 259    names. This table has several entries for codes that have several valid
 260    country names. */
 261
 262 static char* iso_3166[] =
 263 {
 264   "AU", "Australia",
 265   "AD", "Andorra",
 266   "AE", "United Arab Emirates",
 267   "AF", "Afghanistan",
 268   "AG", "Antigua and Barbuda",
 269   "AI", "Anguilla",
 270   "AL", "Albania",
 271   "AM", "Armenia",
 272   "AN", "Netherlands Antilles",
 273   "AO", "Angola",
 274   "AQ", "Antarctica",
 275   "AR", "Argentina",
 276   "AS", "American Samoa",
 277   "AT", "Austria",
 278   "AU", "Australia",
 279   "AW", "Aruba",
 280   "AX", "Aland Islands",
 281   "AZ", "Azerbaijan",
 282
 283   "BA", "Bosnia and Herzegovina",
 284   "BB", "Barbados",
 285   "BD", "Bangladesh",
 286   "BE", "Belgium",
 287   "BF", "Burkina Faso",
 288   "BG", "Bulgaria",
 289   "BH", "Bahrain",
 290   "BI", "Burundi",
 291   "BJ", "Benin",
 292   "BL", "Saint Barthélemy",
 293   "BM", "Bermuda",
 294   "BN", "Brunei Darussalam",
 295   "BO", "Bolivia, Plurinational State of",
 296   "BQ", "Bonaire, Sint Eustatius and Saba",
 297   "BR", "Brazil",
 298   "BS", "Bahamas",
 299   "BT", "Bhutan",
 300   "BV", "Bouvet Island",
 301   "BW", "Botswana",
 302   "BY", "Belarus",
 303   "BZ", "Belize",
 304
 305   "CA", "Canada",
 306   "CC", "Cocos (Keeling) Islands",
 307   "CD", "Congo, Democratic Republic of the",
 308   "CF", "Central African Republic",
 309   "CG", "Congo",
 310   "CH", "Switzerland",
 311   "CI", "Côte d'Ivoire",
 312   "CK", "Cook Islands",
 313   "CL", "Chile",
 314   "CM", "Cameroon",
 315   "CN", "China",
 316   "CN", "People’s Republic of China",
 317   "CN", "PR China",
 318   "CN", "PR-China",
 319   "CO", "Colombia",
 320   "CR", "Costa Rica",
 321   "CS", "Czechoslovakia",
 322   "CU", "Cuba",
 323   "CV", "Cape Verde",
 324   "CW", "Curaçao",
 325   "CX", "Christmas Island",
 326   "CY", "Cyprus",
 327   "CZ", "Czech Republic",
 328
 329   "DE", "Germany",
 330   "DJ", "Djibouti",
 331   "DK", "Denmark",
 332   "DM", "Dominica",
 333   "DO", "Dominican Republic",
 334   "DZ", "Algeria",
 335
 336   "EC", "Ecuador",
 337   "EE", "Estonia",
 338   "EG", "Egypt",
 339   "EH", "Western Sahara",
 340   "ER", "Eritrea",
 341   "ES", "Spain",
 342   "ET", "Ethiopia",
 343
 344   "FI", "Finland",
 345   "FG", "Fiji",
 346   "FK", "Falkland Islands (Malvinas)",
 347   "FM", "Micronesia, Federated States of",
 348   "FO", "Faroe Islands",
 349   "FR", "France",
 350
 351   "GA", "Gabon",
 352   "GB", "United Kingdom",
 353   "GB", "United-Kingdom",
 354   "GB", "England",
 355   "GB", "Britain",
 356   "GB", "Great Britain",
 357   "GD", "Grenada",
 358   "GE", "Georgia",
 359   "GF", "French Guiana",
 360   "GG", "Guernsey",
 361   "GH", "Ghana",
 362   "GI", "Gibraltar",
 363   "GL", "Greenland",
 364   "GM", "Gambia",
 365   "GN", "Guinea",
 366   "GP", "Guadeloupe",
 367   "GQ", "Equatorial Guinea",
 368   "GR", "Greece",
 369   "GS", "South Georgia and the South Sandwich Islands",
 370   "GT", "Guatemala",
 371   "GU", "Guam",
 372   "GW", "Guinea-Bissau",
 373   "GY", "Guyana",
 374
 375   "HK", "Hong Kong",
 376   "HK", "Hong-Kong",
 377   "HM", "Heard Island and McDonald Islands",
 378   "HN", "Honduras",
 379   "HR", "Croatia",
 380   "HT", "Haiti",
 381   "HU", "Hungary",
 382
 383   "ID", "Indonesia",
 384   "IE", "Ireland",
 385   "IL", "Israel",
 386   "IM", "Isle of Man",
 387   "IN", "India",
 388   "IO", "British Indian Ocean Territory",
 389   "IQ", "Iraq",
 390   "IR", "Iran",
 391   "IR", "Iran, Islamic Republic of",
 392   "IS", "Iceland",
 393   "IT", "Italy",
 394
 395   "JE", "Jersey",
 396   "JM", "Jamaica",
 397   "JO", "Jordan",
 398   "JP", "Japan",
 399
 400   "KE", "Kenya",
 401   "KG", "Kyrgyzstan",
 402   "KH", "Cambodia",
 403   "KI", "Kiribati",
 404   "KM", "Comoros",
 405   "KN", "Saint Kitts and Nevis",
 406   "KP", "Korea, Democratic People's Republic of",
 407   "KR", "Korea, Republic of",
 408   "KW", "Kuwait",
 409   "KY", "Cayman Islands",
 410   "KZ", "Kazakhstan",
 411
 412   "LA", "Lao People's Democratic Republic",
 413   "LB", "Lebanon",
 414   "LC", "Saint Lucia",
 415   "LI", "Liechtenstein",
 416   "LK", "Sri Lanka",
 417   "LR", "Liberia",
 418   "LS", "Lesotho",
 419   "LT", "Lithuania",
 420   "LU", "Luxembourg",
 421   "LV", "Latvia",
 422   "LY", "Libya",
 423
 424   "MA", "Morocco",
 425   "MC", "Monaco",
 426   "MD", "Moldova, Republic of",
 427   "ME", "Montenegro",
 428   "MF", "Saint Martin",
 429   "MG", "Madagascar",
 430   "MH", "Marshall Islands",
 431   "MK", "Macedonia",
 432   "ML", "Mali",
 433   "MM", "Myanmar",
 434   "MN", "Mongolia",
 435   "MO", "Macao",
 436   "MP", "Northern Mariana Islands",
 437   "MQ", "Martinique",
 438   "MR", "Mauritania",
 439   "MS", "Montserrat",
 440   "MT", "Malta",
 441   "MU", "Mauritius",
 442   "MV", "Maldives",
 443   "MW", "Malawi",
 444   "MX", "Mexico",
 445   "MY", "Malaysia",
 446   "MZ", "Mozambique",
 447
 448   "NA", "Namibia",
 449   "NC", "New Caledonia",
 450   "NE", "Niger",
 451   "NF", "Norfolk Island",
 452   "NG", "Nigeria",
 453   "NI", "Nicaragua",
 454   "NL", "Netherlands",
 455   "NL", "Holland",
 456   "NO", "Norway",
 457   "NP", "Nepal",
 458   "NR", "Nauru",
 459   "NU", "Niue",
 460   "NZ", "New Zealand",
 461   "NZ", "New-Zealand",
 462
 463   "OM", "Oman",
 464
 465   "PA", "Panama",
 466   "PE", "Peru",
 467   "PF", "French Polynesia",
 468   "PG", "Papua New Guinea",
 469   "PH", "Philippines",
 470   "PK", "Pakistan",
 471   "PL", "Poland",
 472   "PM", "Saint Pierre and Miquelon",
 473   "PN", "Pitcairn",
 474   "PR", "Puerto Rico",
 475   "PS", "Palestine, State of",
 476   "PT", "Portugal",
 477   "PW", "Palau",
 478   "PY", "Paraguay",
 479
 480   "QA", "Qatar",
 481
 482   "RE", "Réunion",
 483   "RO", "Romania",
 484   "RS", "Serbia",
 485   "RU", "Russian Federation",
 486   "RW", "Rwanda",
 487
 488   "SA", "Saudi Arabia",
 489   "SB", "Solomon Islands",
 490   "SC", "Seychelles",
 491   "SD", "Sudan",
 492   "SE", "Sweden",
 493   "SG", "Singapore",
 494   "SH", "Saint Helena, Ascension and Tristan da Cunha",
 495   "SI", "Slovenia",
 496   "SJ", "Svalbard and Jan Mayen",
 497   "SK", "Slovakia",
 498   "SL", "Sierra Leone",
 499   "SM", "San Marino",
 500   "SN", "Senegal",
 501   "SO", "Somalia",
 502   "SR", "Suriname",
 503   "SS", "South Sudan",
 504   "SV", "El Salvador",
 505   "SX", "Sint Maarten (Dutch part)",
 506   "SY", "Syrian Arab Republic",
 507   "SZ", "Swaziland",
 508
 509   "TC", "Turks and Caicos Islands",
 510   "TD", "Chad",
 511   "TF", "French Southern Territories",
 512   "TG", "Togo",
 513   "TH", "Thailand",
 514   "TJ", "Tajikistan",
 515   "TK", "Tokelau",
 516   "TL", "Timor-Leste",
 517   "TM", "Turkmenistan",
 518   "TN", "Tunisia",
 519   "TO", "Tonga",
 520   "TP", "East Timor",
 521   "TR", "Turkey",
 522   "TT", "Trinidad and Tobago",
 523   "TV", "Tuvalu",
 524   "TW", "Taiwan",
 525   "TW", "Taiwan, Province of China",
 526   "TZ", "Tanzania",
 527   "TZ", "Tanzania, United Republic of",
 528
 529   "UA", "Ukraine",
 530   "UG", "Uganda",
 531   "UM", "United States Minor Outlying Islands",
 532   "US", "United States",
 533   "US", "United States of America",
 534   "US", "United-States",
 535   "UY", "Uruguay",
 536   "UZ", "Uzbekistan",
 537
 538   "VA", "Holy See (Vatican City State)",
 539   "VC", "Saint Vincent and the Grenadines",
 540   "VE", "Venezuela",
 541   "VE", "Venezuela, Bolivarian Republic of",
 542   "VG", "Virgin Islands, British",
 543   "VI", "Virgin Islands, U.S.",
 544   "VN", "Viet Nam",
 545   "VU", "Vanuatu",
 546   "WF", "Wallis and Futuna",
 547   "WS", "Samoa",
 548
 549   "YE", "Yemen",
 550   "YT", "Mayotte",
 551   "YU", "Yugoslavia",
 552
 553   "ZA", "South Africa",
 554   "ZM", "Zambia",
 555   "ZW", "Zimbabwe"
 556 };
 557
 558 /* Utility function to perform case insensitive string comparison. Returns 1
 559    if both strings are equal and 0 otherwise. */
 560
 561 static int
 562 str_case_equals (const char *s1, const char *s2) {
 563   while (*s1 != '\0' && *s2 != '\0' && tolower(*s1) == tolower(*s2)) {
 564     s1++;
 565     s2++;
 566   }
 567
 568   return (*s1 == '\0') && (*s2 == '\0');
 569 }
 570
 571 /* Utility function to copy length characters of a string. The target string
 572    must have space to store the extra string null terminator. */
 573
 574 static void
 575 str_copy (char *target, char *source, int length) {
 576   for (; length > 0; source++, target++, length--) {
 577     *target = *source;
 578   }
 579
 580   *target = '\0';
 581 }
 582
 583 /* Utility function to search for the last byte of the lc_all string to be
 584    processed. Required because in some targets (for example, AIX), the
 585    string returned by setlocale() has duplicates. */
 586
 587 static char*
 588 str_get_last_byte (char *lc_all) {
 589   char* first_space = NULL;
 590   char* second_space = NULL;
 591   char* last_byte = NULL;
 592   char* s1 = lc_all;
 593
 594   /* Search for the 1st space (if any) */
 595   while (*s1 != ' ' && *s1 != '\0')
 596     s1++;
 597
 598   if (*s1 == '\0') {
 599     last_byte = s1;
 600
 601   } else {
 602     first_space = s1;
 603
 604     /* Skip this space and search for the 2nd one (if available) */
 605     s1++;
 606     while (*s1 != ' ' && *s1 != '\0')
 607       s1++;
 608
 609     if (*s1 == '\0') {
 610       last_byte = s1;
 611
 612     } else {
 613       second_space=s1;
 614
 615       /* Search for the last byte of lc_all */
 616       while (*s1 != '\0')
 617         s1++;
 618
 619       last_byte = s1;
 620
 621       /* Check if the two strings match */
 622       {
 623         int len1 = first_space - lc_all;
 624         int len2 = second_space - first_space - 1;
 625
 626         if (len1 == len2) {
 627           char* p1 = lc_all;
 628           char* p2 = first_space + 1;
 629
 630           /* Compare their contents */
 631           while (*p1 == *p2 && p2 != second_space) {
 632             p1++;
 633             p2++;
 634           }
 635
 636           /* if the two strings match then update the last byte */
 637
 638           if (p2 == second_space) {
 639             last_byte = first_space;
 640           }
 641         }
 642       }
 643     }
 644   }
 645
 646   return last_byte;
 647 }
 648
 649 /* Utility function to search in the iso_639_1 table for an iso-639-1 code;
 650    returns the corresponding iso-639-3 code or NULL if not found. */
 651
 652 static char*
 653 iso_639_1_to_639_3(char* iso_639_1_code) {
 654   int len = ARRAY_SIZE (iso_639);
 655   char **p = iso_639;
 656   int j;
 657
 658   for (j=0; j < len/3; j++) {
 659     char* s1 = iso_639_1_code;
 660     char* s2 = *p;
 661
 662     if (s1[0]==s2[0] && s1[1]==s2[1]) {
 663       p++;
 664       return *p;
 665     }
 666
 667     p = p + 3;
 668   }
 669
 670   return NULL;
 671 }
 672
 673 /* Utility function to search in the iso_639_1 table for a language name;
 674    returns the corresponding iso-639-3 code or NULL if not found. */
 675
 676 static char*
 677 language_name_to_639_3(char* name) {
 678   int len = ARRAY_SIZE (iso_639);
 679   char **p = iso_639;
 680   int j;
 681
 682   p = p + 2;
 683   for (j=0; j < len/3; j++) {
 684     if (str_case_equals(name, *p)) {
 685       p--;
 686       return *p;
 687     }
 688
 689     p = p + 3;
 690   }
 691
 692   return NULL;
 693 }
 694
 695 /* Utility function to search in the iso_3166 table for a country name;
 696    returns the corresponding iso-3166 code or NULL if not found. */
 697
 698 static char*
 699 country_name_to_3166 (char* name) {
 700   int len = ARRAY_SIZE (iso_3166);
 701   char **p = iso_3166;
 702   int j;
 703
 704   p++;
 705   for (j=0; j < len/2; j++) {
 706     if (str_case_equals(name, *p)) {
 707       p--;
 708       return *p;
 709     }
 710
 711     p = p + 2;
 712   }
 713
 714   return NULL;
 715 }
 716
 717 /*
 718   c_get_language_code needs to fill in the Alpha-3 encoding of the
 719   language code (3 lowercase letters). That should be "und" if the
 720   language is unknown. [see Ada.Locales]
 721 */
 722 void
 723 c_get_language_code (char4 p) {
 724   char* Saved_Locale = setlocale(LC_ALL, NULL);
 725   char  iso_639_3_code[] = "und";                        /* Language Unknown */
 726   char* lc_all;
 727   char* result;
 728
 729   /* Get locales set in the environment */
 730
 731   setlocale(LC_ALL, "");
 732   lc_all = setlocale(LC_ALL, NULL);
 733
 734   /* The string returned by setlocale has the following format:
 735
 736            language[_territory][.code-set][@modifier]
 737
 738      where language is an ISO 639 language code, territory is an ISO 3166
 739      country code, and codeset is a character set or encoding identifier
 740      like ISO-8859-1 or UTF-8.
 741   */
 742
 743   if (lc_all != NULL) {
 744     char* s = lc_all;
 745     int lang_length = 0;
 746
 747     /* Copy the language part (which may be an ISO-639-1 code, an ISO-639-3
 748        code, or a language name) adding a string terminator */
 749
 750     while (*s != '_' && *s != '.' && *s != '@' && *s != '\0')
 751       s++;
 752
 753     lang_length = s - lc_all;
 754
 755     /* Handle conversion of ISO-639-1 to ISO-639-3 */
 756
 757     if (lang_length == 2) {
 758       char  iso_639_1[3];
 759       char* to_iso_639_3;
 760
 761       /* Duplicate the ISO-639-1 code adding the null terminator required to
 762          search for the equivalent ISO-639-3 code; we cannot just append the
 763          null terminator since the pointer may reference non-writable memory.
 764       */
 765
 766       str_copy(iso_639_1, lc_all, lang_length);
 767       to_iso_639_3 = iso_639_1_to_639_3(iso_639_1);
 768
 769       if (to_iso_639_3)
 770         str_copy(iso_639_3_code, to_iso_639_3, 3);
 771
 772     /* Copy the ISO-639-3 code (adding a null terminator) */
 773
 774     } else if (lang_length == 3) {
 775       str_copy(iso_639_3_code, lc_all, lang_length);
 776
 777     /* Handle conversion of language name to ISO-639-3 */
 778
 779     } else if (lang_length > 3) {
 780       char  name_copy[lang_length + 1];
 781       char* to_iso_639_3;
 782
 783       /* Duplicate the ISO-639-1 code adding the null terminator required to
 784          search for the equivalent ISO-639-3 code; we cannot just append the
 785          null terminator since the pointer may reference non-writable memory.
 786       */
 787
 788       str_copy(name_copy, lc_all, lang_length);
 789       to_iso_639_3 = language_name_to_639_3(name_copy);
 790
 791       if (to_iso_639_3)
 792         str_copy(iso_639_3_code, to_iso_639_3, 3);
 793     }
 794   }
 795
 796   /* Copy out the computed ISO_639_3 code */
 797
 798   result = iso_639_3_code;
 799   for (; *result != '\0'; p++, result++)
 800     *p = *result;
 801
 802   /* Restore the original locale settings */
 803
 804   setlocale(LC_ALL, Saved_Locale);
 805
 806   return;
 807 }
 808
 809 /*
 810   c_get_country_code needs to fill in the Alpha-2 encoding of the
 811   country code (2 uppercase letters). That should be "ZZ" if the
 812   country is unknown. [see Ada.Locales]
 813 */
 814 void
 815 c_get_country_code (char4 p) {
 816   char* Saved_Locale = setlocale(LC_ALL, NULL);
 817   char  iso_3166_code[] = "ZZ";                           /* Country Unknown */
 818   char* lc_all;
 819   char* result;
 820
 821   /* Get locales set in the environment */
 822
 823   setlocale(LC_ALL, "");
 824   lc_all = setlocale(LC_ALL, NULL);
 825
 826   /* The string returned by setlocale has the following format:
 827
 828            language[_territory][.code-set][@modifier]
 829
 830      where language is an ISO 639 language code, territory is an ISO 3166
 831      country code, and codeset is a character set or encoding identifier
 832      like ISO-8859-1 or UTF-8.
 833   */
 834
 835   if (lc_all != NULL) {
 836     char* s1 = lc_all;
 837     char* s2 = NULL;
 838     char* last_byte = str_get_last_byte(lc_all);
 839     int country_length = 0;
 840
 841     /* Search for the beginning of the country code */
 842
 843     s1 = lc_all;
 844     while (*s1 != '_' && *s1 != '.' && *s1 != '@' && s1 != last_byte)
 845       s1++;
 846
 847     if (*s1 == '_') {
 848       s1++;
 849       s2 = s1;
 850
 851       while (*s2 != '.' && *s2 != '@' && s2 != last_byte)
 852         s2++;
 853
 854       country_length = s2 - s1;
 855
 856       if (country_length == 2) {
 857         str_copy(iso_3166_code, s1, country_length);
 858
 859       /* setlocale returned us the country name */
 860
 861       } else if (country_length > 3) {
 862         char  name_copy[country_length + 1];
 863         char* to_3166;
 864
 865         str_copy(name_copy, s1, country_length);
 866         to_3166 = country_name_to_3166(name_copy);
 867
 868         if (to_3166)
 869           str_copy(iso_3166_code, to_3166, 2);
 870       }
 871     }
 872   }
 873
 874   /* Copy out the computed ISO_3166 code */
 875
 876   result = iso_3166_code;
 877   for (; *result != '\0'; p++, result++)
 878     *p = *result;
 879
 880   /* Restore the original locale settings */
 881
 882   setlocale(LC_ALL, Saved_Locale);
 883
 884   return;
 885 }