lib/setlocale.c

   1 /* Set the current locale.  -*- coding: utf-8 -*-
   2    Copyright (C) 2009, 2011-2024 Free Software Foundation, Inc.
   3
   4    This file is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU Lesser General Public License as
   6    published by the Free Software Foundation, either version 3 of the
   7    License, or (at your option) any later version.
   8
   9    This file is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU Lesser General Public License for more details.
  13
  14    You should have received a copy of the GNU Lesser General Public License
  15    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  16
  17 /* Written by Bruno Haible <bruno@clisp.org>, 2009.  */
  18
  19 #include <config.h>
  20
  21 /* Override setlocale() so that when the default locale is requested
  22    (locale = ""), the environment variables LC_ALL, LC_*, and LANG are
  23    considered.
  24    Also include all the functionality from libintl's setlocale() override.  */
  25
  26 /* Please keep this file in sync with
  27    gettext/gettext-runtime/intl/setlocale.c !  */
  28
  29 /* Specification.  */
  30 #include <locale.h>
  31
  32 #include <stdio.h>
  33 #include <stdlib.h>
  34 #include <string.h>
  35
  36 #include "localename.h"
  37
  38 #if HAVE_CFLOCALECOPYPREFERREDLANGUAGES || HAVE_CFPREFERENCESCOPYAPPVALUE
  39 # if HAVE_CFLOCALECOPYPREFERREDLANGUAGES
  40 #  include <CoreFoundation/CFLocale.h>
  41 # elif HAVE_CFPREFERENCESCOPYAPPVALUE
  42 #  include <CoreFoundation/CFPreferences.h>
  43 # endif
  44 # include <CoreFoundation/CFPropertyList.h>
  45 # include <CoreFoundation/CFArray.h>
  46 # include <CoreFoundation/CFString.h>
  47 extern void gl_locale_name_canonicalize (char *name);
  48 #endif
  49
  50 #if 1
  51
  52 # undef setlocale
  53
  54 /* Which of the replacements to activate?  */
  55 # if NEED_SETLOCALE_IMPROVED
  56 #  define setlocale_improved rpl_setlocale
  57 # elif NEED_SETLOCALE_MTSAFE
  58 #  define setlocale_mtsafe rpl_setlocale
  59 # else
  60 #  error "This file should only be compiled if NEED_SETLOCALE_IMPROVED || NEED_SETLOCALE_MTSAFE."
  61 # endif
  62
  63 /* Like setlocale, but guaranteed to be multithread-safe if LOCALE == NULL.  */
  64 # if !SETLOCALE_NULL_ALL_MTSAFE || !SETLOCALE_NULL_ONE_MTSAFE /* i.e. if NEED_SETLOCALE_MTSAFE */
  65
  66 #  if NEED_SETLOCALE_IMPROVED
  67 static
  68 #  endif
  69 char *
  70 setlocale_mtsafe (int category, const char *locale)
  71 {
  72   if (locale == NULL)
  73     return (char *) setlocale_null (category);
  74   else
  75     return setlocale (category, locale);
  76 }
  77 # else /* !NEED_SETLOCALE_MTSAFE */
  78
  79 #  define setlocale_mtsafe setlocale
  80
  81 # endif /* NEED_SETLOCALE_MTSAFE */
  82
  83 # if NEED_SETLOCALE_IMPROVED
  84
  85 /* Return string representation of locale category CATEGORY.  */
  86 static const char *
  87 category_to_name (int category)
  88 {
  89   const char *retval;
  90
  91   switch (category)
  92   {
  93   case LC_COLLATE:
  94     retval = "LC_COLLATE";
  95     break;
  96   case LC_CTYPE:
  97     retval = "LC_CTYPE";
  98     break;
  99   case LC_MONETARY:
 100     retval = "LC_MONETARY";
 101     break;
 102   case LC_NUMERIC:
 103     retval = "LC_NUMERIC";
 104     break;
 105   case LC_TIME:
 106     retval = "LC_TIME";
 107     break;
 108   case LC_MESSAGES:
 109     retval = "LC_MESSAGES";
 110     break;
 111   default:
 112     /* If you have a better idea for a default value let me know.  */
 113     retval = "LC_XXX";
 114   }
 115
 116   return retval;
 117 }
 118
 119 #  if defined _WIN32 && ! defined __CYGWIN__
 120
 121 /* The native Windows setlocale() function expects locale names of the form
 122    "German" or "German_Germany" or "DEU", but not "de" or "de_DE".  We need
 123    to convert the names from the form with ISO 639 language code and ISO 3166
 124    country code to the form with English names or with three-letter identifier.
 125    The three-letter identifiers known by a Windows XP SP2 or SP3 are:
 126      AFK  Afrikaans_South Africa.1252
 127      ARA  Arabic_Saudi Arabia.1256
 128      ARB  Arabic_Lebanon.1256
 129      ARE  Arabic_Egypt.1256
 130      ARG  Arabic_Algeria.1256
 131      ARH  Arabic_Bahrain.1256
 132      ARI  Arabic_Iraq.1256
 133      ARJ  Arabic_Jordan.1256
 134      ARK  Arabic_Kuwait.1256
 135      ARL  Arabic_Libya.1256
 136      ARM  Arabic_Morocco.1256
 137      ARO  Arabic_Oman.1256
 138      ARQ  Arabic_Qatar.1256
 139      ARS  Arabic_Syria.1256
 140      ART  Arabic_Tunisia.1256
 141      ARU  Arabic_U.A.E..1256
 142      ARY  Arabic_Yemen.1256
 143      AZE  Azeri (Latin)_Azerbaijan.1254
 144      BEL  Belarusian_Belarus.1251
 145      BGR  Bulgarian_Bulgaria.1251
 146      BSB  Bosnian_Bosnia and Herzegovina.1250
 147      BSC  Bosnian (Cyrillic)_Bosnia and Herzegovina.1250  (wrong encoding!)
 148      CAT  Catalan_Spain.1252
 149      CHH  Chinese_Hong Kong S.A.R..950
 150      CHI  Chinese_Singapore.936
 151      CHS  Chinese_People's Republic of China.936
 152      CHT  Chinese_Taiwan.950
 153      CSY  Czech_Czech Republic.1250
 154      CYM  Welsh_United Kingdom.1252
 155      DAN  Danish_Denmark.1252
 156      DEA  German_Austria.1252
 157      DEC  German_Liechtenstein.1252
 158      DEL  German_Luxembourg.1252
 159      DES  German_Switzerland.1252
 160      DEU  German_Germany.1252
 161      ELL  Greek_Greece.1253
 162      ENA  English_Australia.1252
 163      ENB  English_Caribbean.1252
 164      ENC  English_Canada.1252
 165      ENG  English_United Kingdom.1252
 166      ENI  English_Ireland.1252
 167      ENJ  English_Jamaica.1252
 168      ENL  English_Belize.1252
 169      ENP  English_Republic of the Philippines.1252
 170      ENS  English_South Africa.1252
 171      ENT  English_Trinidad and Tobago.1252
 172      ENU  English_United States.1252
 173      ENW  English_Zimbabwe.1252
 174      ENZ  English_New Zealand.1252
 175      ESA  Spanish_Panama.1252
 176      ESB  Spanish_Bolivia.1252
 177      ESC  Spanish_Costa Rica.1252
 178      ESD  Spanish_Dominican Republic.1252
 179      ESE  Spanish_El Salvador.1252
 180      ESF  Spanish_Ecuador.1252
 181      ESG  Spanish_Guatemala.1252
 182      ESH  Spanish_Honduras.1252
 183      ESI  Spanish_Nicaragua.1252
 184      ESL  Spanish_Chile.1252
 185      ESM  Spanish_Mexico.1252
 186      ESN  Spanish_Spain.1252
 187      ESO  Spanish_Colombia.1252
 188      ESP  Spanish_Spain.1252
 189      ESR  Spanish_Peru.1252
 190      ESS  Spanish_Argentina.1252
 191      ESU  Spanish_Puerto Rico.1252
 192      ESV  Spanish_Venezuela.1252
 193      ESY  Spanish_Uruguay.1252
 194      ESZ  Spanish_Paraguay.1252
 195      ETI  Estonian_Estonia.1257
 196      EUQ  Basque_Spain.1252
 197      FAR  Farsi_Iran.1256
 198      FIN  Finnish_Finland.1252
 199      FOS  Faroese_Faroe Islands.1252
 200      FPO  Filipino_Philippines.1252
 201      FRA  French_France.1252
 202      FRB  French_Belgium.1252
 203      FRC  French_Canada.1252
 204      FRL  French_Luxembourg.1252
 205      FRM  French_Principality of Monaco.1252
 206      FRS  French_Switzerland.1252
 207      FYN  Frisian_Netherlands.1252
 208      GLC  Galician_Spain.1252
 209      HEB  Hebrew_Israel.1255
 210      HRB  Croatian_Bosnia and Herzegovina.1250
 211      HRV  Croatian_Croatia.1250
 212      HUN  Hungarian_Hungary.1250
 213      IND  Indonesian_Indonesia.1252
 214      IRE  Irish_Ireland.1252
 215      ISL  Icelandic_Iceland.1252
 216      ITA  Italian_Italy.1252
 217      ITS  Italian_Switzerland.1252
 218      IUK  Inuktitut (Latin)_Canada.1252
 219      JPN  Japanese_Japan.932
 220      KKZ  Kazakh_Kazakhstan.1251
 221      KOR  Korean_Korea.949
 222      KYR  Kyrgyz_Kyrgyzstan.1251
 223      LBX  Luxembourgish_Luxembourg.1252
 224      LTH  Lithuanian_Lithuania.1257
 225      LVI  Latvian_Latvia.1257
 226      MKI  FYRO Macedonian_Former Yugoslav Republic of Macedonia.1251
 227      MON  Mongolian_Mongolia.1251
 228      MPD  Mapudungun_Chile.1252
 229      MSB  Malay_Brunei Darussalam.1252
 230      MSL  Malay_Malaysia.1252
 231      MWK  Mohawk_Canada.1252
 232      NLB  Dutch_Belgium.1252
 233      NLD  Dutch_Netherlands.1252
 234      NON  Norwegian-Nynorsk_Norway.1252
 235      NOR  Norwegian (Bokmål)_Norway.1252
 236      NSO  Northern Sotho_South Africa.1252
 237      PLK  Polish_Poland.1250
 238      PTB  Portuguese_Brazil.1252
 239      PTG  Portuguese_Portugal.1252
 240      QUB  Quechua_Bolivia.1252
 241      QUE  Quechua_Ecuador.1252
 242      QUP  Quechua_Peru.1252
 243      RMC  Romansh_Switzerland.1252
 244      ROM  Romanian_Romania.1250
 245      RUS  Russian_Russia.1251
 246      SKY  Slovak_Slovakia.1250
 247      SLV  Slovenian_Slovenia.1250
 248      SMA  Sami (Southern)_Norway.1252
 249      SMB  Sami (Southern)_Sweden.1252
 250      SME  Sami (Northern)_Norway.1252
 251      SMF  Sami (Northern)_Sweden.1252
 252      SMG  Sami (Northern)_Finland.1252
 253      SMJ  Sami (Lule)_Norway.1252
 254      SMK  Sami (Lule)_Sweden.1252
 255      SMN  Sami (Inari)_Finland.1252
 256      SMS  Sami (Skolt)_Finland.1252
 257      SQI  Albanian_Albania.1250
 258      SRB  Serbian (Cyrillic)_Serbia and Montenegro.1251
 259      SRL  Serbian (Latin)_Serbia and Montenegro.1250
 260      SRN  Serbian (Cyrillic)_Bosnia and Herzegovina.1251
 261      SRS  Serbian (Latin)_Bosnia and Herzegovina.1250
 262      SVE  Swedish_Sweden.1252
 263      SVF  Swedish_Finland.1252
 264      SWK  Swahili_Kenya.1252
 265      THA  Thai_Thailand.874
 266      TRK  Turkish_Turkey.1254
 267      TSN  Tswana_South Africa.1252
 268      TTT  Tatar_Russia.1251
 269      UKR  Ukrainian_Ukraine.1251
 270      URD  Urdu_Islamic Republic of Pakistan.1256
 271      USA  English_United States.1252
 272      UZB  Uzbek (Latin)_Uzbekistan.1254
 273      VIT  Vietnamese_Viet Nam.1258
 274      XHO  Xhosa_South Africa.1252
 275      ZHH  Chinese_Hong Kong S.A.R..950
 276      ZHI  Chinese_Singapore.936
 277      ZHM  Chinese_Macau S.A.R..950
 278      ZUL  Zulu_South Africa.1252
 279  */
 280
 281 /* Table from ISO 639 language code, optionally with country or script suffix,
 282    to English name.
 283    Keep in sync with the gl_locale_name_from_win32_LANGID function in
 284    localename.c!  */
 285 struct table_entry
 286 {
 287   const char *code;
 288   const char *english;
 289 };
 290 static const struct table_entry language_table[] =
 291   {
 292     { "af", "Afrikaans" },
 293     { "am", "Amharic" },
 294     { "ar", "Arabic" },
 295     { "arn", "Mapudungun" },
 296     { "as", "Assamese" },
 297     { "az@cyrillic", "Azeri (Cyrillic)" },
 298     { "az@latin", "Azeri (Latin)" },
 299     { "ba", "Bashkir" },
 300     { "be", "Belarusian" },
 301     { "ber", "Tamazight" },
 302     { "ber@arabic", "Tamazight (Arabic)" },
 303     { "ber@latin", "Tamazight (Latin)" },
 304     { "bg", "Bulgarian" },
 305     { "bin", "Edo" },
 306     { "bn", "Bengali" },
 307     { "bn_BD", "Bengali (Bangladesh)" },
 308     { "bn_IN", "Bengali (India)" },
 309     { "bnt", "Sutu" },
 310     { "bo", "Tibetan" },
 311     { "br", "Breton" },
 312     { "bs", "BSB" }, /* "Bosnian (Latin)" */
 313     { "bs@cyrillic", "BSC" }, /* Bosnian (Cyrillic) */
 314     { "ca", "Catalan" },
 315     { "chr", "Cherokee" },
 316     { "co", "Corsican" },
 317     { "cpe", "Hawaiian" },
 318     { "cs", "Czech" },
 319     { "cy", "Welsh" },
 320     { "da", "Danish" },
 321     { "de", "German" },
 322     { "dsb", "Lower Sorbian" },
 323     { "dv", "Divehi" },
 324     { "el", "Greek" },
 325     { "en", "English" },
 326     { "es", "Spanish" },
 327     { "et", "Estonian" },
 328     { "eu", "Basque" },
 329     { "fa", "Farsi" },
 330     { "ff", "Fulfulde" },
 331     { "fi", "Finnish" },
 332     { "fo", "Faroese" }, /* "Faeroese" does not work */
 333     { "fr", "French" },
 334     { "fy", "Frisian" },
 335     { "ga", "IRE" }, /* Gaelic (Ireland) */
 336     { "gd", "Gaelic (Scotland)" },
 337     { "gd", "Scottish Gaelic" },
 338     { "gl", "Galician" },
 339     { "gn", "Guarani" },
 340     { "gsw", "Alsatian" },
 341     { "gu", "Gujarati" },
 342     { "ha", "Hausa" },
 343     { "he", "Hebrew" },
 344     { "hi", "Hindi" },
 345     { "hr", "Croatian" },
 346     { "hsb", "Upper Sorbian" },
 347     { "hu", "Hungarian" },
 348     { "hy", "Armenian" },
 349     { "id", "Indonesian" },
 350     { "ig", "Igbo" },
 351     { "ii", "Yi" },
 352     { "is", "Icelandic" },
 353     { "it", "Italian" },
 354     { "iu", "IUK" }, /* Inuktitut */
 355     { "ja", "Japanese" },
 356     { "ka", "Georgian" },
 357     { "kk", "Kazakh" },
 358     { "kl", "Greenlandic" },
 359     { "km", "Cambodian" },
 360     { "km", "Khmer" },
 361     { "kn", "Kannada" },
 362     { "ko", "Korean" },
 363     { "kok", "Konkani" },
 364     { "kr", "Kanuri" },
 365     { "ks", "Kashmiri" },
 366     { "ks_IN", "Kashmiri_India" },
 367     { "ks_PK", "Kashmiri (Arabic)_Pakistan" },
 368     { "ky", "Kyrgyz" },
 369     { "la", "Latin" },
 370     { "lb", "Luxembourgish" },
 371     { "lo", "Lao" },
 372     { "lt", "Lithuanian" },
 373     { "lv", "Latvian" },
 374     { "mi", "Maori" },
 375     { "mk", "FYRO Macedonian" },
 376     { "mk", "Macedonian" },
 377     { "ml", "Malayalam" },
 378     { "mn", "Mongolian" },
 379     { "mni", "Manipuri" },
 380     { "moh", "Mohawk" },
 381     { "mr", "Marathi" },
 382     { "ms", "Malay" },
 383     { "mt", "Maltese" },
 384     { "my", "Burmese" },
 385     { "nb", "NOR" }, /* Norwegian Bokmål */
 386     { "ne", "Nepali" },
 387     { "nic", "Ibibio" },
 388     { "nl", "Dutch" },
 389     { "nn", "NON" }, /* Norwegian Nynorsk */
 390     { "no", "Norwegian" },
 391     { "nso", "Northern Sotho" },
 392     { "nso", "Sepedi" },
 393     { "oc", "Occitan" },
 394     { "om", "Oromo" },
 395     { "or", "Oriya" },
 396     { "pa", "Punjabi" },
 397     { "pap", "Papiamentu" },
 398     { "pl", "Polish" },
 399     { "prs", "Dari" },
 400     { "ps", "Pashto" },
 401     { "pt", "Portuguese" },
 402     { "qu", "Quechua" },
 403     { "qut", "K'iche'" },
 404     { "rm", "Romansh" },
 405     { "ro", "Romanian" },
 406     { "ru", "Russian" },
 407     { "rw", "Kinyarwanda" },
 408     { "sa", "Sanskrit" },
 409     { "sah", "Yakut" },
 410     { "sd", "Sindhi" },
 411     { "se", "Sami (Northern)" },
 412     { "se", "Northern Sami" },
 413     { "si", "Sinhalese" },
 414     { "sk", "Slovak" },
 415     { "sl", "Slovenian" },
 416     { "sma", "Sami (Southern)" },
 417     { "sma", "Southern Sami" },
 418     { "smj", "Sami (Lule)" },
 419     { "smj", "Lule Sami" },
 420     { "smn", "Sami (Inari)" },
 421     { "smn", "Inari Sami" },
 422     { "sms", "Sami (Skolt)" },
 423     { "sms", "Skolt Sami" },
 424     { "so", "Somali" },
 425     { "sq", "Albanian" },
 426     { "sr", "Serbian (Latin)" },
 427     { "sr@cyrillic", "SRB" }, /* Serbian (Cyrillic) */
 428     { "sv", "Swedish" },
 429     { "sw", "Swahili" },
 430     { "syr", "Syriac" },
 431     { "ta", "Tamil" },
 432     { "te", "Telugu" },
 433     { "tg", "Tajik" },
 434     { "th", "Thai" },
 435     { "ti", "Tigrinya" },
 436     { "tk", "Turkmen" },
 437     { "tl", "Filipino" },
 438     { "tn", "Tswana" },
 439     { "tr", "Turkish" },
 440     { "ts", "Tsonga" },
 441     { "tt", "Tatar" },
 442     { "ug", "Uighur" },
 443     { "uk", "Ukrainian" },
 444     { "ur", "Urdu" },
 445     { "uz", "Uzbek" },
 446     { "uz", "Uzbek (Latin)" },
 447     { "uz@cyrillic", "Uzbek (Cyrillic)" },
 448     { "ve", "Venda" },
 449     { "vi", "Vietnamese" },
 450     { "wen", "Sorbian" },
 451     { "wo", "Wolof" },
 452     { "xh", "Xhosa" },
 453     { "yi", "Yiddish" },
 454     { "yo", "Yoruba" },
 455     { "zh", "Chinese" },
 456     { "zu", "Zulu" }
 457   };
 458
 459 /* Table from ISO 3166 country code to English name.
 460    Keep in sync with the gl_locale_name_from_win32_LANGID function in
 461    localename.c!  */
 462 static const struct table_entry country_table[] =
 463   {
 464     { "AE", "U.A.E." },
 465     { "AF", "Afghanistan" },
 466     { "AL", "Albania" },
 467     { "AM", "Armenia" },
 468     { "AN", "Netherlands Antilles" },
 469     { "AR", "Argentina" },
 470     { "AT", "Austria" },
 471     { "AU", "Australia" },
 472     { "AZ", "Azerbaijan" },
 473     { "BA", "Bosnia and Herzegovina" },
 474     { "BD", "Bangladesh" },
 475     { "BE", "Belgium" },
 476     { "BG", "Bulgaria" },
 477     { "BH", "Bahrain" },
 478     { "BN", "Brunei Darussalam" },
 479     { "BO", "Bolivia" },
 480     { "BR", "Brazil" },
 481     { "BT", "Bhutan" },
 482     { "BY", "Belarus" },
 483     { "BZ", "Belize" },
 484     { "CA", "Canada" },
 485     { "CG", "Congo" },
 486     { "CH", "Switzerland" },
 487     { "CI", "Cote d'Ivoire" },
 488     { "CL", "Chile" },
 489     { "CM", "Cameroon" },
 490     { "CN", "People's Republic of China" },
 491     { "CO", "Colombia" },
 492     { "CR", "Costa Rica" },
 493     { "CS", "Serbia and Montenegro" },
 494     { "CZ", "Czech Republic" },
 495     { "DE", "Germany" },
 496     { "DK", "Denmark" },
 497     { "DO", "Dominican Republic" },
 498     { "DZ", "Algeria" },
 499     { "EC", "Ecuador" },
 500     { "EE", "Estonia" },
 501     { "EG", "Egypt" },
 502     { "ER", "Eritrea" },
 503     { "ES", "Spain" },
 504     { "ET", "Ethiopia" },
 505     { "FI", "Finland" },
 506     { "FO", "Faroe Islands" },
 507     { "FR", "France" },
 508     { "GB", "United Kingdom" },
 509     { "GD", "Caribbean" },
 510     { "GE", "Georgia" },
 511     { "GL", "Greenland" },
 512     { "GR", "Greece" },
 513     { "GT", "Guatemala" },
 514     { "HK", "Hong Kong" },
 515     { "HK", "Hong Kong S.A.R." },
 516     { "HN", "Honduras" },
 517     { "HR", "Croatia" },
 518     { "HT", "Haiti" },
 519     { "HU", "Hungary" },
 520     { "ID", "Indonesia" },
 521     { "IE", "Ireland" },
 522     { "IL", "Israel" },
 523     { "IN", "India" },
 524     { "IQ", "Iraq" },
 525     { "IR", "Iran" },
 526     { "IS", "Iceland" },
 527     { "IT", "Italy" },
 528     { "JM", "Jamaica" },
 529     { "JO", "Jordan" },
 530     { "JP", "Japan" },
 531     { "KE", "Kenya" },
 532     { "KG", "Kyrgyzstan" },
 533     { "KH", "Cambodia" },
 534     { "KR", "South Korea" },
 535     { "KW", "Kuwait" },
 536     { "KZ", "Kazakhstan" },
 537     { "LA", "Laos" },
 538     { "LB", "Lebanon" },
 539     { "LI", "Liechtenstein" },
 540     { "LK", "Sri Lanka" },
 541     { "LT", "Lithuania" },
 542     { "LU", "Luxembourg" },
 543     { "LV", "Latvia" },
 544     { "LY", "Libya" },
 545     { "MA", "Morocco" },
 546     { "MC", "Principality of Monaco" },
 547     { "MD", "Moldava" },
 548     { "MD", "Moldova" },
 549     { "ME", "Montenegro" },
 550     { "MK", "Former Yugoslav Republic of Macedonia" },
 551     { "ML", "Mali" },
 552     { "MM", "Myanmar" },
 553     { "MN", "Mongolia" },
 554     { "MO", "Macau S.A.R." },
 555     { "MT", "Malta" },
 556     { "MV", "Maldives" },
 557     { "MX", "Mexico" },
 558     { "MY", "Malaysia" },
 559     { "NG", "Nigeria" },
 560     { "NI", "Nicaragua" },
 561     { "NL", "Netherlands" },
 562     { "NO", "Norway" },
 563     { "NP", "Nepal" },
 564     { "NZ", "New Zealand" },
 565     { "OM", "Oman" },
 566     { "PA", "Panama" },
 567     { "PE", "Peru" },
 568     { "PH", "Philippines" },
 569     { "PK", "Islamic Republic of Pakistan" },
 570     { "PL", "Poland" },
 571     { "PR", "Puerto Rico" },
 572     { "PT", "Portugal" },
 573     { "PY", "Paraguay" },
 574     { "QA", "Qatar" },
 575     { "RE", "Reunion" },
 576     { "RO", "Romania" },
 577     { "RS", "Serbia" },
 578     { "RU", "Russia" },
 579     { "RW", "Rwanda" },
 580     { "SA", "Saudi Arabia" },
 581     { "SE", "Sweden" },
 582     { "SG", "Singapore" },
 583     { "SI", "Slovenia" },
 584     { "SK", "Slovak" },
 585     { "SN", "Senegal" },
 586     { "SO", "Somalia" },
 587     { "SR", "Suriname" },
 588     { "SV", "El Salvador" },
 589     { "SY", "Syria" },
 590     { "TH", "Thailand" },
 591     { "TJ", "Tajikistan" },
 592     { "TM", "Turkmenistan" },
 593     { "TN", "Tunisia" },
 594     { "TR", "Turkey" },
 595     { "TT", "Trinidad and Tobago" },
 596     { "TW", "Taiwan" },
 597     { "TZ", "Tanzania" },
 598     { "UA", "Ukraine" },
 599     { "US", "United States" },
 600     { "UY", "Uruguay" },
 601     { "VA", "Vatican" },
 602     { "VE", "Venezuela" },
 603     { "VN", "Viet Nam" },
 604     { "YE", "Yemen" },
 605     { "ZA", "South Africa" },
 606     { "ZW", "Zimbabwe" }
 607   };
 608
 609 /* Given a string STRING, find the set of indices i such that TABLE[i].code is
 610    the given STRING.  It is a range [lo,hi-1].  */
 611 typedef struct { size_t lo; size_t hi; } range_t;
 612 static void
 613 search (const struct table_entry *table, size_t table_size, const char *string,
 614         range_t *result)
 615 {
 616   /* The table is sorted.  Perform a binary search.  */
 617   size_t hi = table_size;
 618   size_t lo = 0;
 619   while (lo < hi)
 620     {
 621       /* Invariant:
 622          for i < lo, strcmp (table[i].code, string) < 0,
 623          for i >= hi, strcmp (table[i].code, string) > 0.  */
 624       size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
 625       int cmp = strcmp (table[mid].code, string);
 626       if (cmp < 0)
 627         lo = mid + 1;
 628       else if (cmp > 0)
 629         hi = mid;
 630       else
 631         {
 632           /* Found an i with
 633                strcmp (language_table[i].code, string) == 0.
 634              Find the entire interval of such i.  */
 635           {
 636             size_t i;
 637
 638             for (i = mid; i > lo; )
 639               {
 640                 i--;
 641                 if (strcmp (table[i].code, string) < 0)
 642                   {
 643                     lo = i + 1;
 644                     break;
 645                   }
 646               }
 647           }
 648           {
 649             size_t i;
 650
 651             for (i = mid + 1; i < hi; i++)
 652               {
 653                 if (strcmp (table[i].code, string) > 0)
 654                   {
 655                     hi = i;
 656                     break;
 657                   }
 658               }
 659           }
 660           /* The set of i with
 661                strcmp (language_table[i].code, string) == 0
 662              is the interval [lo, hi-1].  */
 663           break;
 664         }
 665     }
 666   result->lo = lo;
 667   result->hi = hi;
 668 }
 669
 670 /* Like setlocale, but accept also locale names in the form ll or ll_CC,
 671    where ll is an ISO 639 language code and CC is an ISO 3166 country code.  */
 672 static char *
 673 setlocale_unixlike (int category, const char *locale)
 674 {
 675   char *result;
 676   char llCC_buf[64];
 677   char ll_buf[64];
 678   char CC_buf[64];
 679
 680   /* The native Windows implementation of setlocale understands the special
 681      locale name "C", but not "POSIX".  Therefore map "POSIX" to "C".  */
 682   if (locale != NULL && strcmp (locale, "POSIX") == 0)
 683     locale = "C";
 684
 685   /* First, try setlocale with the original argument unchanged.  */
 686   result = setlocale_mtsafe (category, locale);
 687   if (result != NULL)
 688     return result;
 689
 690   /* Otherwise, assume the argument is in the form
 691        language[_territory][.codeset][@modifier]
 692      and try to map it using the tables.  */
 693   if (strlen (locale) < sizeof (llCC_buf))
 694     {
 695       /* Second try: Remove the codeset part.  */
 696       {
 697         const char *p = locale;
 698         char *q = llCC_buf;
 699
 700         /* Copy the part before the dot.  */
 701         for (; *p != '\0' && *p != '.'; p++, q++)
 702           *q = *p;
 703         if (*p == '.')
 704           /* Skip the part up to the '@', if any.  */
 705           for (; *p != '\0' && *p != '@'; p++)
 706             ;
 707         /* Copy the part starting with '@', if any.  */
 708         for (; *p != '\0'; p++, q++)
 709           *q = *p;
 710         *q = '\0';
 711       }
 712       /* llCC_buf now contains
 713            language[_territory][@modifier]
 714        */
 715       if (strcmp (llCC_buf, locale) != 0)
 716         {
 717           result = setlocale (category, llCC_buf);
 718           if (result != NULL)
 719             return result;
 720         }
 721       /* Look it up in language_table.  */
 722       {
 723         range_t range;
 724         size_t i;
 725
 726         search (language_table,
 727                 sizeof (language_table) / sizeof (language_table[0]),
 728                 llCC_buf,
 729                 &range);
 730
 731         for (i = range.lo; i < range.hi; i++)
 732           {
 733             /* Try the replacement in language_table[i].  */
 734             result = setlocale (category, language_table[i].english);
 735             if (result != NULL)
 736               return result;
 737           }
 738       }
 739       /* Split language[_territory][@modifier]
 740          into  ll_buf = language[@modifier]
 741          and   CC_buf = territory
 742        */
 743       {
 744         const char *underscore = strchr (llCC_buf, '_');
 745         if (underscore != NULL)
 746           {
 747             const char *territory_start = underscore + 1;
 748             const char *territory_end = strchr (territory_start, '@');
 749             if (territory_end == NULL)
 750               territory_end = territory_start + strlen (territory_start);
 751
 752             memcpy (ll_buf, llCC_buf, underscore - llCC_buf);
 753             strcpy (ll_buf + (underscore - llCC_buf), territory_end);
 754
 755             memcpy (CC_buf, territory_start, territory_end - territory_start);
 756             CC_buf[territory_end - territory_start] = '\0';
 757
 758             {
 759               /* Look up ll_buf in language_table
 760                  and CC_buf in country_table.  */
 761               range_t language_range;
 762
 763               search (language_table,
 764                       sizeof (language_table) / sizeof (language_table[0]),
 765                       ll_buf,
 766                       &language_range);
 767               if (language_range.lo < language_range.hi)
 768                 {
 769                   range_t country_range;
 770
 771                   search (country_table,
 772                           sizeof (country_table) / sizeof (country_table[0]),
 773                           CC_buf,
 774                           &country_range);
 775                   if (country_range.lo < country_range.hi)
 776                     {
 777                       size_t i;
 778                       size_t j;
 779
 780                       for (i = language_range.lo; i < language_range.hi; i++)
 781                         for (j = country_range.lo; j < country_range.hi; j++)
 782                           {
 783                             /* Concatenate the replacements.  */
 784                             const char *part1 = language_table[i].english;
 785                             size_t part1_len = strlen (part1);
 786                             const char *part2 = country_table[j].english;
 787                             size_t part2_len = strlen (part2) + 1;
 788                             char buf[64+64];
 789
 790                             if (!(part1_len + 1 + part2_len <= sizeof (buf)))
 791                               abort ();
 792                             memcpy (buf, part1, part1_len);
 793                             buf[part1_len] = '_';
 794                             memcpy (buf + part1_len + 1, part2, part2_len);
 795
 796                             /* Try the concatenated replacements.  */
 797                             result = setlocale (category, buf);
 798                             if (result != NULL)
 799                               return result;
 800                           }
 801                     }
 802
 803                   /* Try omitting the country entirely.  This may set a locale
 804                      corresponding to the wrong country, but is better than
 805                      failing entirely.  */
 806                   {
 807                     size_t i;
 808
 809                     for (i = language_range.lo; i < language_range.hi; i++)
 810                       {
 811                         /* Try only the language replacement.  */
 812                         result =
 813                           setlocale (category, language_table[i].english);
 814                         if (result != NULL)
 815                           return result;
 816                       }
 817                   }
 818                 }
 819             }
 820           }
 821       }
 822     }
 823
 824   /* Failed.  */
 825   return NULL;
 826 }
 827
 828 #  elif defined __ANDROID__
 829
 830 /* Like setlocale, but accept also the locale names "C" and "POSIX".  */
 831 static char *
 832 setlocale_unixlike (int category, const char *locale)
 833 {
 834   char *result = setlocale_mtsafe (category, locale);
 835   if (result == NULL)
 836     switch (category)
 837       {
 838       case LC_CTYPE:
 839       case LC_NUMERIC:
 840       case LC_TIME:
 841       case LC_COLLATE:
 842       case LC_MONETARY:
 843       case LC_MESSAGES:
 844       case LC_ALL:
 845       case LC_PAPER:
 846       case LC_NAME:
 847       case LC_ADDRESS:
 848       case LC_TELEPHONE:
 849       case LC_MEASUREMENT:
 850         if (locale == NULL
 851             || strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0)
 852           result = (char *) "C";
 853         break;
 854       default:
 855         break;
 856       }
 857   return result;
 858 }
 859 #   define setlocale setlocale_unixlike
 860
 861 #  else
 862 #   define setlocale_unixlike setlocale_mtsafe
 863 #  endif
 864
 865 #  if LC_MESSAGES == 1729
 866
 867 /* The system does not store an LC_MESSAGES locale category.  Do it here.  */
 868 static char lc_messages_name[64] = "C";
 869
 870 /* Like setlocale, but support also LC_MESSAGES.  */
 871 static char *
 872 setlocale_single (int category, const char *locale)
 873 {
 874   if (category == LC_MESSAGES)
 875     {
 876       if (locale != NULL)
 877         {
 878           lc_messages_name[sizeof (lc_messages_name) - 1] = '\0';
 879           strncpy (lc_messages_name, locale, sizeof (lc_messages_name) - 1);
 880         }
 881       return lc_messages_name;
 882     }
 883   else
 884     return setlocale_unixlike (category, locale);
 885 }
 886
 887 #  else
 888 #   define setlocale_single setlocale_unixlike
 889 #  endif
 890
 891 #  if defined __APPLE__ && defined __MACH__
 892
 893 /* Mapping from language to main territory where that language is spoken.  */
 894 static char const locales_with_principal_territory[][6 + 1] =
 895   {
 896                 /* Language     Main territory */
 897     "ace_ID",   /* Achinese     Indonesia */
 898     "af_ZA",    /* Afrikaans    South Africa */
 899     "ak_GH",    /* Akan         Ghana */
 900     "am_ET",    /* Amharic      Ethiopia */
 901     "an_ES",    /* Aragonese    Spain */
 902     "ang_GB",   /* Old English  Britain */
 903     "arn_CL",   /* Mapudungun   Chile */
 904     "as_IN",    /* Assamese     India */
 905     "ast_ES",   /* Asturian     Spain */
 906     "av_RU",    /* Avaric       Russia */
 907     "awa_IN",   /* Awadhi       India */
 908     "az_AZ",    /* Azerbaijani  Azerbaijan */
 909     "ban_ID",   /* Balinese     Indonesia */
 910     "be_BY",    /* Belarusian   Belarus */
 911     "bej_SD",   /* Beja         Sudan */
 912     "bem_ZM",   /* Bemba        Zambia */
 913     "bg_BG",    /* Bulgarian    Bulgaria */
 914     "bho_IN",   /* Bhojpuri     India */
 915     "bi_VU",    /* Bislama      Vanuatu */
 916     "bik_PH",   /* Bikol        Philippines */
 917     "bin_NG",   /* Bini         Nigeria */
 918     "bm_ML",    /* Bambara      Mali */
 919     "bn_IN",    /* Bengali      India */
 920     "bo_CN",    /* Tibetan      China */
 921     "br_FR",    /* Breton       France */
 922     "bs_BA",    /* Bosnian      Bosnia */
 923     "bug_ID",   /* Buginese     Indonesia */
 924     "ca_ES",    /* Catalan      Spain */
 925     "ce_RU",    /* Chechen      Russia */
 926     "ceb_PH",   /* Cebuano      Philippines */
 927     "co_FR",    /* Corsican     France */
 928     "cr_CA",    /* Cree         Canada */
 929     /* Don't put "crh_UZ" or "crh_UA" here.  That would be asking for fruitless
 930        political discussion.  */
 931     "cs_CZ",    /* Czech        Czech Republic */
 932     "csb_PL",   /* Kashubian    Poland */
 933     "cy_GB",    /* Welsh        Britain */
 934     "da_DK",    /* Danish       Denmark */
 935     "de_DE",    /* German       Germany */
 936     "din_SD",   /* Dinka        Sudan */
 937     "doi_IN",   /* Dogri        India */
 938     "dsb_DE",   /* Lower Sorbian        Germany */
 939     "dv_MV",    /* Divehi       Maldives */
 940     "dz_BT",    /* Dzongkha     Bhutan */
 941     "ee_GH",    /* Éwé          Ghana */
 942     "el_GR",    /* Greek        Greece */
 943     /* Don't put "en_GB" or "en_US" here.  That would be asking for fruitless
 944        political discussion.  */
 945     "es_ES",    /* Spanish      Spain */
 946     "et_EE",    /* Estonian     Estonia */
 947     "fa_IR",    /* Persian      Iran */
 948     "fi_FI",    /* Finnish      Finland */
 949     "fil_PH",   /* Filipino     Philippines */
 950     "fj_FJ",    /* Fijian       Fiji */
 951     "fo_FO",    /* Faroese      Faeroe Islands */
 952     "fon_BJ",   /* Fon          Benin */
 953     "fr_FR",    /* French       France */
 954     "fur_IT",   /* Friulian     Italy */
 955     "fy_NL",    /* Western Frisian      Netherlands */
 956     "ga_IE",    /* Irish        Ireland */
 957     "gd_GB",    /* Scottish Gaelic      Britain */
 958     "gon_IN",   /* Gondi        India */
 959     "gsw_CH",   /* Swiss German Switzerland */
 960     "gu_IN",    /* Gujarati     India */
 961     "he_IL",    /* Hebrew       Israel */
 962     "hi_IN",    /* Hindi        India */
 963     "hil_PH",   /* Hiligaynon   Philippines */
 964     "hr_HR",    /* Croatian     Croatia */
 965     "hsb_DE",   /* Upper Sorbian        Germany */
 966     "ht_HT",    /* Haitian      Haiti */
 967     "hu_HU",    /* Hungarian    Hungary */
 968     "hy_AM",    /* Armenian     Armenia */
 969     "id_ID",    /* Indonesian   Indonesia */
 970     "ig_NG",    /* Igbo         Nigeria */
 971     "ii_CN",    /* Sichuan Yi   China */
 972     "ilo_PH",   /* Iloko        Philippines */
 973     "is_IS",    /* Icelandic    Iceland */
 974     "it_IT",    /* Italian      Italy */
 975     "ja_JP",    /* Japanese     Japan */
 976     "jab_NG",   /* Hyam         Nigeria */
 977     "jv_ID",    /* Javanese     Indonesia */
 978     "ka_GE",    /* Georgian     Georgia */
 979     "kab_DZ",   /* Kabyle       Algeria */
 980     "kaj_NG",   /* Jju          Nigeria */
 981     "kam_KE",   /* Kamba        Kenya */
 982     "kmb_AO",   /* Kimbundu     Angola */
 983     "kcg_NG",   /* Tyap         Nigeria */
 984     "kdm_NG",   /* Kagoma       Nigeria */
 985     "kg_CD",    /* Kongo        Democratic Republic of Congo */
 986     "kk_KZ",    /* Kazakh       Kazakhstan */
 987     "kl_GL",    /* Kalaallisut  Greenland */
 988     "km_KH",    /* Central Khmer        Cambodia */
 989     "kn_IN",    /* Kannada      India */
 990     "ko_KR",    /* Korean       Korea (South) */
 991     "kok_IN",   /* Konkani      India */
 992     "kr_NG",    /* Kanuri       Nigeria */
 993     "kru_IN",   /* Kurukh       India */
 994     "ky_KG",    /* Kyrgyz       Kyrgyzstan */
 995     "lg_UG",    /* Ganda        Uganda */
 996     "li_BE",    /* Limburgish   Belgium */
 997     "lo_LA",    /* Laotian      Laos */
 998     "lt_LT",    /* Lithuanian   Lithuania */
 999     "lu_CD",    /* Luba-Katanga Democratic Republic of Congo */
1000     "lua_CD",   /* Luba-Lulua   Democratic Republic of Congo */
1001     "luo_KE",   /* Luo          Kenya */
1002     "lv_LV",    /* Latvian      Latvia */
1003     "mad_ID",   /* Madurese     Indonesia */
1004     "mag_IN",   /* Magahi       India */
1005     "mai_IN",   /* Maithili     India */
1006     "mak_ID",   /* Makasar      Indonesia */
1007     "man_ML",   /* Mandingo     Mali */
1008     "men_SL",   /* Mende        Sierra Leone */
1009     "mfe_MU",   /* Mauritian Creole     Mauritius */
1010     "mg_MG",    /* Malagasy     Madagascar */
1011     "mi_NZ",    /* Maori        New Zealand */
1012     "min_ID",   /* Minangkabau  Indonesia */
1013     "mk_MK",    /* Macedonian   North Macedonia */
1014     "ml_IN",    /* Malayalam    India */
1015     "mn_MN",    /* Mongolian    Mongolia */
1016     "mni_IN",   /* Manipuri     India */
1017     "mos_BF",   /* Mossi        Burkina Faso */
1018     "mr_IN",    /* Marathi      India */
1019     "ms_MY",    /* Malay        Malaysia */
1020     "mt_MT",    /* Maltese      Malta */
1021     "mwr_IN",   /* Marwari      India */
1022     "my_MM",    /* Burmese      Myanmar */
1023     "na_NR",    /* Nauru        Nauru */
1024     "nah_MX",   /* Nahuatl      Mexico */
1025     "nap_IT",   /* Neapolitan   Italy */
1026     "nb_NO",    /* Norwegian Bokmål    Norway */
1027     "nds_DE",   /* Low Saxon    Germany */
1028     "ne_NP",    /* Nepali       Nepal */
1029     "nl_NL",    /* Dutch        Netherlands */
1030     "nn_NO",    /* Norwegian Nynorsk    Norway */
1031     "no_NO",    /* Norwegian    Norway */
1032     "nr_ZA",    /* South Ndebele        South Africa */
1033     "nso_ZA",   /* Northern Sotho       South Africa */
1034     "ny_MW",    /* Chichewa     Malawi */
1035     "nym_TZ",   /* Nyamwezi     Tanzania */
1036     "nyn_UG",   /* Nyankole     Uganda */
1037     "oc_FR",    /* Occitan      France */
1038     "oj_CA",    /* Ojibwa       Canada */
1039     "or_IN",    /* Oriya        India */
1040     "pa_IN",    /* Punjabi      India */
1041     "pag_PH",   /* Pangasinan   Philippines */
1042     "pam_PH",   /* Pampanga     Philippines */
1043     "pap_AN",   /* Papiamento   Netherlands Antilles - this line can be removed in 2018 */
1044     "pbb_CO",   /* Páez         Colombia */
1045     "pl_PL",    /* Polish       Poland */
1046     "ps_AF",    /* Pashto       Afghanistan */
1047     "pt_PT",    /* Portuguese   Portugal */
1048     "raj_IN",   /* Rajasthani   India */
1049     "rm_CH",    /* Romansh      Switzerland */
1050     "rn_BI",    /* Kirundi      Burundi */
1051     "ro_RO",    /* Romanian     Romania */
1052     "ru_RU",    /* Russian      Russia */
1053     "rw_RW",    /* Kinyarwanda  Rwanda */
1054     "sa_IN",    /* Sanskrit     India */
1055     "sah_RU",   /* Yakut        Russia */
1056     "sas_ID",   /* Sasak        Indonesia */
1057     "sat_IN",   /* Santali      India */
1058     "sc_IT",    /* Sardinian    Italy */
1059     "scn_IT",   /* Sicilian     Italy */
1060     "sg_CF",    /* Sango        Central African Republic */
1061     "shn_MM",   /* Shan         Myanmar */
1062     "si_LK",    /* Sinhala      Sri Lanka */
1063     "sid_ET",   /* Sidamo       Ethiopia */
1064     "sk_SK",    /* Slovak       Slovakia */
1065     "sl_SI",    /* Slovenian    Slovenia */
1066     "sm_WS",    /* Samoan       Samoa */
1067     "smn_FI",   /* Inari Sami   Finland */
1068     "sms_FI",   /* Skolt Sami   Finland */
1069     "so_SO",    /* Somali       Somalia */
1070     "sq_AL",    /* Albanian     Albania */
1071     "sr_RS",    /* Serbian      Serbia */
1072     "srr_SN",   /* Serer        Senegal */
1073     "suk_TZ",   /* Sukuma       Tanzania */
1074     "sus_GN",   /* Susu         Guinea */
1075     "sv_SE",    /* Swedish      Sweden */
1076     "te_IN",    /* Telugu       India */
1077     "tem_SL",   /* Timne        Sierra Leone */
1078     "tet_ID",   /* Tetum        Indonesia */
1079     "tg_TJ",    /* Tajik        Tajikistan */
1080     "th_TH",    /* Thai         Thailand */
1081     "ti_ER",    /* Tigrinya     Eritrea */
1082     "tiv_NG",   /* Tiv          Nigeria */
1083     "tk_TM",    /* Turkmen      Turkmenistan */
1084     "tl_PH",    /* Tagalog      Philippines */
1085     "to_TO",    /* Tonga        Tonga */
1086     "tpi_PG",   /* Tok Pisin    Papua New Guinea */
1087     "tr_TR",    /* Turkish      Türkiye */
1088     "tum_MW",   /* Tumbuka      Malawi */
1089     "ug_CN",    /* Uighur       China */
1090     "uk_UA",    /* Ukrainian    Ukraine */
1091     "umb_AO",   /* Umbundu      Angola */
1092     "ur_PK",    /* Urdu         Pakistan */
1093     "uz_UZ",    /* Uzbek        Uzbekistan */
1094     "ve_ZA",    /* Venda        South Africa */
1095     "vi_VN",    /* Vietnamese   Vietnam */
1096     "wa_BE",    /* Walloon      Belgium */
1097     "wal_ET",   /* Walamo       Ethiopia */
1098     "war_PH",   /* Waray        Philippines */
1099     "wen_DE",   /* Sorbian      Germany */
1100     "yao_MW",   /* Yao          Malawi */
1101     "zap_MX"    /* Zapotec      Mexico */
1102   };
1103
1104 /* Compare just the language part of two locale names.  */
1105 static int
1106 langcmp (const char *locale1, const char *locale2)
1107 {
1108   size_t locale1_len;
1109   size_t locale2_len;
1110   int cmp;
1111
1112   {
1113     const char *locale1_end = strchr (locale1, '_');
1114     if (locale1_end != NULL)
1115       locale1_len = locale1_end - locale1;
1116     else
1117       locale1_len = strlen (locale1);
1118   }
1119   {
1120     const char *locale2_end = strchr (locale2, '_');
1121     if (locale2_end != NULL)
1122       locale2_len = locale2_end - locale2;
1123     else
1124       locale2_len = strlen (locale2);
1125   }
1126
1127   if (locale1_len < locale2_len)
1128     {
1129       cmp = memcmp (locale1, locale2, locale1_len);
1130       if (cmp == 0)
1131         cmp = -1;
1132     }
1133   else
1134     {
1135       cmp = memcmp (locale1, locale2, locale2_len);
1136       if (locale1_len > locale2_len && cmp == 0)
1137         cmp = 1;
1138     }
1139
1140   return cmp;
1141 }
1142
1143 /* Given a locale name, return the main locale with the same language,
1144    or NULL if not found.
1145    For example: "fr_DE" -> "fr_FR".  */
1146 static const char *
1147 get_main_locale_with_same_language (const char *locale)
1148 {
1149 #   define table locales_with_principal_territory
1150   /* The table is sorted.  Perform a binary search.  */
1151   size_t hi = sizeof (table) / sizeof (table[0]);
1152   size_t lo = 0;
1153   while (lo < hi)
1154     {
1155       /* Invariant:
1156          for i < lo, langcmp (table[i], locale) < 0,
1157          for i >= hi, langcmp (table[i], locale) > 0.  */
1158       size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
1159       int cmp = langcmp (table[mid], locale);
1160       if (cmp < 0)
1161         lo = mid + 1;
1162       else if (cmp > 0)
1163         hi = mid;
1164       else
1165         {
1166           /* Found an i with
1167                langcmp (language_table[i], locale) == 0.
1168              Verify that it is the only such i.  */
1169           if (mid > lo && langcmp (table[mid - 1], locale) >= 0)
1170             abort ();
1171           if (mid + 1 < hi && langcmp (table[mid + 1], locale) <= 0)
1172             abort ();
1173           return table[mid];
1174         }
1175     }
1176 #   undef table
1177   return NULL;
1178 }
1179
1180 /* Mapping from territory to main language that is spoken in that territory.  */
1181 static char const locales_with_principal_language[][6 + 1] =
1182   {
1183     /* This is based on the set of existing locales in glibc, with duplicates
1184        removed, and on the Wikipedia pages named "Languages of <territory>".
1185        If in doubt, use the locale that exists in macOS.  For example, the only
1186        "*_IN" locale in macOS 10.13 is "hi_IN", so use that.  */
1187     /* A useful shell function for producing a line of this table is:
1188          func_line ()
1189          {
1190            # Usage: func_line ll_CC
1191            ll=`echo "$1" | sed -e 's|_.*||'`
1192            cc=`echo "$1" | sed -e 's|^.*_||'`
1193            llx=`sed -n -e "s|^${ll} ||p" < gettext-tools/doc/ISO_639`
1194            ccx=`expand gettext-tools/doc/ISO_3166 | sed -n -e "s|^${cc}  *||p"`
1195            echo "    \"$1\",    /$X* ${llx} ${ccx} *$X/"
1196          }
1197      */
1198               /* Main language  Territory */
1199     "ca_AD",    /* Catalan      Andorra */
1200     "ar_AE",    /* Arabic       United Arab Emirates */
1201     "ps_AF",    /* Pashto       Afghanistan */
1202     "en_AG",    /* English      Antigua and Barbuda */
1203     "sq_AL",    /* Albanian     Albania */
1204     "hy_AM",    /* Armenian     Armenia */
1205     "pap_AN",   /* Papiamento   Netherlands Antilles - this line can be removed in 2018 */
1206     "pt_AO",    /* Portuguese   Angola */
1207     "es_AR",    /* Spanish      Argentina */
1208     "de_AT",    /* German       Austria */
1209     "en_AU",    /* English      Australia */
1210     /* Aruba has two official languages: "nl_AW", "pap_AW".  */
1211     "az_AZ",    /* Azerbaijani  Azerbaijan */
1212     "bs_BA",    /* Bosnian      Bosnia */
1213     "bn_BD",    /* Bengali      Bangladesh */
1214     "nl_BE",    /* Dutch        Belgium */
1215     "fr_BF",    /* French       Burkina Faso */
1216     "bg_BG",    /* Bulgarian    Bulgaria */
1217     "ar_BH",    /* Arabic       Bahrain */
1218     "rn_BI",    /* Kirundi      Burundi */
1219     "fr_BJ",    /* French       Benin */
1220     "es_BO",    /* Spanish      Bolivia */
1221     "pt_BR",    /* Portuguese   Brazil */
1222     "dz_BT",    /* Dzongkha     Bhutan */
1223     "en_BW",    /* English      Botswana */
1224     "be_BY",    /* Belarusian   Belarus */
1225     "en_CA",    /* English      Canada */
1226     "fr_CD",    /* French       Democratic Republic of Congo */
1227     "sg_CF",    /* Sango        Central African Republic */
1228     "de_CH",    /* German       Switzerland */
1229     "es_CL",    /* Spanish      Chile */
1230     "zh_CN",    /* Chinese      China */
1231     "es_CO",    /* Spanish      Colombia */
1232     "es_CR",    /* Spanish      Costa Rica */
1233     "es_CU",    /* Spanish      Cuba */
1234     /* Curaçao has three official languages: "nl_CW", "pap_CW", "en_CW".  */
1235     "el_CY",    /* Greek        Cyprus */
1236     "cs_CZ",    /* Czech        Czech Republic */
1237     "de_DE",    /* German       Germany */
1238     /* Djibouti has two official languages: "ar_DJ" and "fr_DJ".  */
1239     "da_DK",    /* Danish       Denmark */
1240     "es_DO",    /* Spanish      Dominican Republic */
1241     "ar_DZ",    /* Arabic       Algeria */
1242     "es_EC",    /* Spanish      Ecuador */
1243     "et_EE",    /* Estonian     Estonia */
1244     "ar_EG",    /* Arabic       Egypt */
1245     "ti_ER",    /* Tigrinya     Eritrea */
1246     "es_ES",    /* Spanish      Spain */
1247     "am_ET",    /* Amharic      Ethiopia */
1248     "fi_FI",    /* Finnish      Finland */
1249     /* Fiji has three official languages: "en_FJ", "fj_FJ", "hif_FJ".  */
1250     "fo_FO",    /* Faroese      Faeroe Islands */
1251     "fr_FR",    /* French       France */
1252     "en_GB",    /* English      Britain */
1253     "ka_GE",    /* Georgian     Georgia */
1254     "en_GH",    /* English      Ghana */
1255     "kl_GL",    /* Kalaallisut  Greenland */
1256     "fr_GN",    /* French       Guinea */
1257     "el_GR",    /* Greek        Greece */
1258     "es_GT",    /* Spanish      Guatemala */
1259     "zh_HK",    /* Chinese      Hong Kong */
1260     "es_HN",    /* Spanish      Honduras */
1261     "hr_HR",    /* Croatian     Croatia */
1262     "ht_HT",    /* Haitian      Haiti */
1263     "hu_HU",    /* Hungarian    Hungary */
1264     "id_ID",    /* Indonesian   Indonesia */
1265     "en_IE",    /* English      Ireland */
1266     "he_IL",    /* Hebrew       Israel */
1267     "hi_IN",    /* Hindi        India */
1268     "ar_IQ",    /* Arabic       Iraq */
1269     "fa_IR",    /* Persian      Iran */
1270     "is_IS",    /* Icelandic    Iceland */
1271     "it_IT",    /* Italian      Italy */
1272     "ar_JO",    /* Arabic       Jordan */
1273     "ja_JP",    /* Japanese     Japan */
1274     "sw_KE",    /* Swahili      Kenya */
1275     "ky_KG",    /* Kyrgyz       Kyrgyzstan */
1276     "km_KH",    /* Central Khmer        Cambodia */
1277     "ko_KR",    /* Korean       Korea (South) */
1278     "ar_KW",    /* Arabic       Kuwait */
1279     "kk_KZ",    /* Kazakh       Kazakhstan */
1280     "lo_LA",    /* Laotian      Laos */
1281     "ar_LB",    /* Arabic       Lebanon */
1282     "de_LI",    /* German       Liechtenstein */
1283     "si_LK",    /* Sinhala      Sri Lanka */
1284     "lt_LT",    /* Lithuanian   Lithuania */
1285     /* Luxembourg has three official languages: "lb_LU", "fr_LU", "de_LU".  */
1286     "lv_LV",    /* Latvian      Latvia */
1287     "ar_LY",    /* Arabic       Libya */
1288     "ar_MA",    /* Arabic       Morocco */
1289     "sr_ME",    /* Serbian      Montenegro */
1290     "mg_MG",    /* Malagasy     Madagascar */
1291     "mk_MK",    /* Macedonian   North Macedonia */
1292     "fr_ML",    /* French       Mali */
1293     "my_MM",    /* Burmese      Myanmar */
1294     "mn_MN",    /* Mongolian    Mongolia */
1295     "mt_MT",    /* Maltese      Malta */
1296     "mfe_MU",   /* Mauritian Creole     Mauritius */
1297     "dv_MV",    /* Divehi       Maldives */
1298     "ny_MW",    /* Chichewa     Malawi */
1299     "es_MX",    /* Spanish      Mexico */
1300     "ms_MY",    /* Malay        Malaysia */
1301     "en_NG",    /* English      Nigeria */
1302     "es_NI",    /* Spanish      Nicaragua */
1303     "nl_NL",    /* Dutch        Netherlands */
1304     "no_NO",    /* Norwegian    Norway */
1305     "ne_NP",    /* Nepali       Nepal */
1306     "na_NR",    /* Nauru        Nauru */
1307     "niu_NU",   /* Niuean       Niue */
1308     "en_NZ",    /* English      New Zealand */
1309     "ar_OM",    /* Arabic       Oman */
1310     "es_PA",    /* Spanish      Panama */
1311     "es_PE",    /* Spanish      Peru */
1312     "tpi_PG",   /* Tok Pisin    Papua New Guinea */
1313     "fil_PH",   /* Filipino     Philippines */
1314     "pa_PK",    /* Punjabi      Pakistan */
1315     "pl_PL",    /* Polish       Poland */
1316     "es_PR",    /* Spanish      Puerto Rico */
1317     "pt_PT",    /* Portuguese   Portugal */
1318     "es_PY",    /* Spanish      Paraguay */
1319     "ar_QA",    /* Arabic       Qatar */
1320     "ro_RO",    /* Romanian     Romania */
1321     "sr_RS",    /* Serbian      Serbia */
1322     "ru_RU",    /* Russian      Russia */
1323     "rw_RW",    /* Kinyarwanda  Rwanda */
1324     "ar_SA",    /* Arabic       Saudi Arabia */
1325     "en_SC",    /* English      Seychelles */
1326     "ar_SD",    /* Arabic       Sudan */
1327     "sv_SE",    /* Swedish      Sweden */
1328     "en_SG",    /* English      Singapore */
1329     "sl_SI",    /* Slovenian    Slovenia */
1330     "sk_SK",    /* Slovak       Slovakia */
1331     "en_SL",    /* English      Sierra Leone */
1332     "fr_SN",    /* French       Senegal */
1333     "so_SO",    /* Somali       Somalia */
1334     "ar_SS",    /* Arabic       South Sudan */
1335     "es_SV",    /* Spanish      El Salvador */
1336     "ar_SY",    /* Arabic       Syria */
1337     "th_TH",    /* Thai         Thailand */
1338     "tg_TJ",    /* Tajik        Tajikistan */
1339     "tk_TM",    /* Turkmen      Turkmenistan */
1340     "ar_TN",    /* Arabic       Tunisia */
1341     "to_TO",    /* Tonga        Tonga */
1342     "tr_TR",    /* Turkish      Türkiye */
1343     "zh_TW",    /* Chinese      Taiwan */
1344     "sw_TZ",    /* Swahili      Tanzania */
1345     "uk_UA",    /* Ukrainian    Ukraine */
1346     "lg_UG",    /* Ganda        Uganda */
1347     "en_US",    /* English      United States of America */
1348     "es_UY",    /* Spanish      Uruguay */
1349     "uz_UZ",    /* Uzbek        Uzbekistan */
1350     "es_VE",    /* Spanish      Venezuela */
1351     "vi_VN",    /* Vietnamese   Vietnam */
1352     "bi_VU",    /* Bislama      Vanuatu */
1353     "sm_WS",    /* Samoan       Samoa */
1354     "ar_YE",    /* Arabic       Yemen */
1355     "en_ZA",    /* English      South Africa */
1356     "en_ZM",    /* English      Zambia */
1357     "en_ZW"     /* English      Zimbabwe */
1358   };
1359
1360 /* Compare just the territory part of two locale names.  */
1361 static int
1362 terrcmp (const char *locale1, const char *locale2)
1363 {
1364   const char *territory1 = strrchr (locale1, '_') + 1;
1365   const char *territory2 = strrchr (locale2, '_') + 1;
1366
1367   return strcmp (territory1, territory2);
1368 }
1369
1370 /* Given a locale name, return the locale corresponding to the main language
1371    with the same territory, or NULL if not found.
1372    For example: "fr_DE" -> "de_DE".  */
1373 static const char *
1374 get_main_locale_with_same_territory (const char *locale)
1375 {
1376   if (strrchr (locale, '_') != NULL)
1377     {
1378 #   define table locales_with_principal_language
1379       /* The table is sorted.  Perform a binary search.  */
1380       size_t hi = sizeof (table) / sizeof (table[0]);
1381       size_t lo = 0;
1382       while (lo < hi)
1383         {
1384           /* Invariant:
1385              for i < lo, terrcmp (table[i], locale) < 0,
1386              for i >= hi, terrcmp (table[i], locale) > 0.  */
1387           size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
1388           int cmp = terrcmp (table[mid], locale);
1389           if (cmp < 0)
1390             lo = mid + 1;
1391           else if (cmp > 0)
1392             hi = mid;
1393           else
1394             {
1395               /* Found an i with
1396                    terrcmp (language_table[i], locale) == 0.
1397                  Verify that it is the only such i.  */
1398               if (mid > lo && terrcmp (table[mid - 1], locale) >= 0)
1399                 abort ();
1400               if (mid + 1 < hi && terrcmp (table[mid + 1], locale) <= 0)
1401                 abort ();
1402               return table[mid];
1403             }
1404         }
1405 #   undef table
1406     }
1407   return NULL;
1408 }
1409
1410 #  endif
1411
1412 char *
1413 setlocale_improved (int category, const char *locale)
1414 {
1415   if (locale != NULL && locale[0] == '\0')
1416     {
1417       /* A request to the set the current locale to the default locale.  */
1418       if (category == LC_ALL)
1419         {
1420           /* Set LC_CTYPE first.  Then the other categories.  */
1421           static int const categories[] =
1422             {
1423               LC_CTYPE,
1424               LC_NUMERIC,
1425               LC_TIME,
1426               LC_COLLATE,
1427               LC_MONETARY,
1428               LC_MESSAGES
1429             };
1430           char *saved_locale;
1431           const char *base_name;
1432           unsigned int i;
1433
1434           /* Back up the old locale, in case one of the steps fails.  */
1435           saved_locale = setlocale (LC_ALL, NULL);
1436           if (saved_locale == NULL)
1437             return NULL;
1438           saved_locale = strdup (saved_locale);
1439           if (saved_locale == NULL)
1440             return NULL;
1441
1442           /* Set LC_CTYPE category.  Set all other categories (except possibly
1443              LC_MESSAGES) to the same value in the same call; this is likely to
1444              save calls.  */
1445           base_name =
1446             gl_locale_name_environ (LC_CTYPE, category_to_name (LC_CTYPE));
1447           if (base_name == NULL)
1448             base_name = gl_locale_name_default ();
1449
1450           if (setlocale_unixlike (LC_ALL, base_name) != NULL)
1451             {
1452               /* LC_CTYPE category already set.  */
1453               i = 1;
1454             }
1455           else
1456             {
1457               /* On Mac OS X, "UTF-8" is a valid locale name for LC_CTYPE but
1458                  not for LC_ALL.  Therefore this call may fail.  So, try
1459                  another base_name.  */
1460               base_name = "C";
1461               if (setlocale_unixlike (LC_ALL, base_name) == NULL)
1462                 goto fail;
1463               i = 0;
1464             }
1465 #  if defined _WIN32 && ! defined __CYGWIN__
1466           /* On native Windows, setlocale(LC_ALL,...) may succeed but set the
1467              LC_CTYPE category to an invalid value ("C") when it does not
1468              support the specified encoding.  Report a failure instead.  */
1469           if (strchr (base_name, '.') != NULL
1470               && strcmp (setlocale (LC_CTYPE, NULL), "C") == 0)
1471             goto fail;
1472 #  endif
1473
1474           for (; i < sizeof (categories) / sizeof (categories[0]); i++)
1475             {
1476               int cat = categories[i];
1477               const char *name;
1478
1479               name = gl_locale_name_environ (cat, category_to_name (cat));
1480               if (name == NULL)
1481                 name = gl_locale_name_default ();
1482
1483               /* If name is the same as base_name, it has already been set
1484                  through the setlocale call before the loop.  */
1485               if (strcmp (name, base_name) != 0
1486 #  if LC_MESSAGES == 1729
1487                   || cat == LC_MESSAGES
1488 #  endif
1489                  )
1490                 if (setlocale_single (cat, name) == NULL)
1491 #  if defined __APPLE__ && defined __MACH__
1492                   {
1493                     /* On Mac OS X 10.13, some locales can be set through
1494                        System Preferences > Language & Region, that are not
1495                        supported by libc.  The system's setlocale() falls
1496                        back to "C" for these locale categories.  We can do
1497                        better, by trying an existing locale with the same
1498                        language or an existing locale with the same territory.
1499                        If we can't, print a warning, to limit user
1500                        expectations.  */
1501                     int warn = 0;
1502
1503                     if (cat == LC_CTYPE)
1504                       warn = (setlocale_single (cat, "UTF-8") == NULL);
1505                     else if (cat == LC_MESSAGES)
1506                       {
1507 #   if HAVE_CFLOCALECOPYPREFERREDLANGUAGES || HAVE_CFPREFERENCESCOPYAPPVALUE /* MacOS X 10.4 or newer */
1508                         /* Take the primary language preference.  */
1509 #    if HAVE_CFLOCALECOPYPREFERREDLANGUAGES /* MacOS X 10.5 or newer */
1510                         CFArrayRef prefArray = CFLocaleCopyPreferredLanguages ();
1511 #    elif HAVE_CFPREFERENCESCOPYAPPVALUE /* MacOS X 10.4 or newer */
1512                         CFTypeRef preferences =
1513                           CFPreferencesCopyAppValue (CFSTR ("AppleLanguages"),
1514                                                      kCFPreferencesCurrentApplication);
1515                         if (preferences != NULL
1516                             && CFGetTypeID (preferences) == CFArrayGetTypeID ())
1517                           {
1518                             CFArrayRef prefArray = (CFArrayRef)preferences;
1519 #    endif
1520                             int n = CFArrayGetCount (prefArray);
1521                             if (n > 0)
1522                               {
1523                                 char buf[256];
1524                                 CFTypeRef element = CFArrayGetValueAtIndex (prefArray, 0);
1525                                 if (element != NULL
1526                                     && CFGetTypeID (element) == CFStringGetTypeID ()
1527                                     && CFStringGetCString ((CFStringRef)element,
1528                                                            buf, sizeof (buf),
1529                                                            kCFStringEncodingASCII))
1530                                   {
1531                                     /* Remove the country.
1532                                        E.g. "zh-Hans-DE" -> "zh-Hans".  */
1533                                     char *last_minus = strrchr (buf, '-');
1534                                     if (last_minus != NULL)
1535                                       *last_minus = '\0';
1536
1537                                     /* Convert to Unix locale name.
1538                                        E.g. "zh-Hans" -> "zh_CN".  */
1539                                     gl_locale_name_canonicalize (buf);
1540
1541                                     /* Try setlocale with this value.  */
1542                                     if (setlocale_single (cat, buf) == NULL)
1543                                       {
1544                                         const char *last_try =
1545                                           get_main_locale_with_same_language (buf);
1546
1547                                         if (last_try == NULL
1548                                             || setlocale_single (cat, last_try) == NULL)
1549                                           warn = 1;
1550                                       }
1551                                   }
1552                               }
1553 #    if HAVE_CFLOCALECOPYPREFERREDLANGUAGES /* MacOS X 10.5 or newer */
1554                         CFRelease (prefArray);
1555 #    elif HAVE_CFPREFERENCESCOPYAPPVALUE /* MacOS X 10.4 or newer */
1556                           }
1557 #    endif
1558 #   else
1559                         const char *last_try =
1560                           get_main_locale_with_same_language (name);
1561
1562                         if (last_try == NULL
1563                             || setlocale_single (cat, last_try) == NULL)
1564                           warn = 1;
1565 #   endif
1566                       }
1567                     else
1568                       {
1569                         /* For LC_NUMERIC, the application should use the locale
1570                            properties kCFLocaleDecimalSeparator,
1571                            kCFLocaleGroupingSeparator.
1572                            For LC_TIME, the application should use the locale
1573                            property kCFLocaleCalendarIdentifier.
1574                            For LC_COLLATE, the application should use the locale
1575                            properties kCFLocaleCollationIdentifier,
1576                            kCFLocaleCollatorIdentifier.
1577                            For LC_MONETARY, the application should use the locale
1578                            properties kCFLocaleCurrencySymbol,
1579                            kCFLocaleCurrencyCode.
1580                            But since most applications don't have macOS specific
1581                            code like this, try an existing locale with the same
1582                            territory.  */
1583                         const char *last_try =
1584                           get_main_locale_with_same_territory (name);
1585
1586                         if (last_try == NULL
1587                             || setlocale_single (cat, last_try) == NULL)
1588                           warn = 1;
1589                       }
1590
1591                     if (warn)
1592                       {
1593                         /* Warn only if the environment variable
1594                            SETLOCALE_VERBOSE is set.  Otherwise these warnings
1595                            are just annoyances, since normal users won't invoke
1596                            'localedef'.  */
1597                         const char *verbose = getenv ("SETLOCALE_VERBOSE");
1598                         if (verbose != NULL && verbose[0] != '\0')
1599                           fprintf (stderr,
1600                                    "Warning: Failed to set locale category %s to %s.\n",
1601                                    category_to_name (cat), name);
1602                       }
1603                   }
1604 #  else
1605                   goto fail;
1606 #  endif
1607             }
1608
1609           /* All steps were successful.  */
1610           free (saved_locale);
1611           return setlocale (LC_ALL, NULL);
1612
1613         fail:
1614           if (saved_locale[0] != '\0') /* don't risk an endless recursion */
1615             setlocale (LC_ALL, saved_locale);
1616           free (saved_locale);
1617           return NULL;
1618         }
1619       else
1620         {
1621           const char *name =
1622             gl_locale_name_environ (category, category_to_name (category));
1623           if (name == NULL)
1624             name = gl_locale_name_default ();
1625
1626           return setlocale_single (category, name);
1627         }
1628     }
1629   else
1630     {
1631 #  if defined _WIN32 && ! defined __CYGWIN__
1632       if (category == LC_ALL && locale != NULL && strchr (locale, '.') != NULL)
1633         {
1634           char *saved_locale;
1635
1636           /* Back up the old locale.  */
1637           saved_locale = setlocale (LC_ALL, NULL);
1638           if (saved_locale == NULL)
1639             return NULL;
1640           saved_locale = strdup (saved_locale);
1641           if (saved_locale == NULL)
1642             return NULL;
1643
1644           if (setlocale_unixlike (LC_ALL, locale) == NULL)
1645             {
1646               free (saved_locale);
1647               return NULL;
1648             }
1649
1650           /* On native Windows, setlocale(LC_ALL,...) may succeed but set the
1651              LC_CTYPE category to an invalid value ("C") when it does not
1652              support the specified encoding.  Report a failure instead.  */
1653           if (strcmp (setlocale (LC_CTYPE, NULL), "C") == 0)
1654             {
1655               if (saved_locale[0] != '\0') /* don't risk an endless recursion */
1656                 setlocale (LC_ALL, saved_locale);
1657               free (saved_locale);
1658               return NULL;
1659             }
1660
1661           /* It was really successful.  */
1662           free (saved_locale);
1663           return setlocale (LC_ALL, NULL);
1664         }
1665       else
1666 #  endif
1667         return setlocale_single (category, locale);
1668     }
1669 }
1670
1671 # endif /* NEED_SETLOCALE_IMPROVED */
1672
1673 #endif