exp2l: Work around a NetBSD 10.0/i386 bug.
[gnulib.git] / lib / setlocale.c
blobeb263617dc8c83a4aadf6f6620a9682e1b020731
1 /* Set the current locale. -*- coding: utf-8 -*-
2 Copyright (C) 2009, 2011-2024 Free Software Foundation, Inc.
4 This file is free software: you can redistribute it and/or modify
5 it under the terms of the GNU Lesser General Public License as
6 published by the Free Software Foundation, either version 3 of the
7 License, or (at your option) any later version.
9 This file is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public License
15 along with this program. If not, see <https://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2009. */
19 #include <config.h>
21 /* Override setlocale() so that when the default locale is requested
22 (locale = ""), the environment variables LC_ALL, LC_*, and LANG are
23 considered.
24 Also include all the functionality from libintl's setlocale() override. */
26 /* Please keep this file in sync with
27 gettext/gettext-runtime/intl/setlocale.c ! */
29 /* Specification. */
30 #include <locale.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
36 #include "localename.h"
38 #if HAVE_CFLOCALECOPYPREFERREDLANGUAGES || HAVE_CFPREFERENCESCOPYAPPVALUE
39 # if HAVE_CFLOCALECOPYPREFERREDLANGUAGES
40 # include <CoreFoundation/CFLocale.h>
41 # elif HAVE_CFPREFERENCESCOPYAPPVALUE
42 # include <CoreFoundation/CFPreferences.h>
43 # endif
44 # include <CoreFoundation/CFPropertyList.h>
45 # include <CoreFoundation/CFArray.h>
46 # include <CoreFoundation/CFString.h>
47 extern void gl_locale_name_canonicalize (char *name);
48 #endif
50 #if 1
52 # undef setlocale
54 /* Which of the replacements to activate? */
55 # if NEED_SETLOCALE_IMPROVED
56 # define setlocale_improved rpl_setlocale
57 # elif NEED_SETLOCALE_MTSAFE
58 # define setlocale_mtsafe rpl_setlocale
59 # else
60 # error "This file should only be compiled if NEED_SETLOCALE_IMPROVED || NEED_SETLOCALE_MTSAFE."
61 # endif
63 /* Like setlocale, but guaranteed to be multithread-safe if LOCALE == NULL. */
64 # if !SETLOCALE_NULL_ALL_MTSAFE || !SETLOCALE_NULL_ONE_MTSAFE /* i.e. if NEED_SETLOCALE_MTSAFE */
66 # if NEED_SETLOCALE_IMPROVED
67 static
68 # endif
69 char *
70 setlocale_mtsafe (int category, const char *locale)
72 if (locale == NULL)
73 return (char *) setlocale_null (category);
74 else
75 return setlocale (category, locale);
77 # else /* !NEED_SETLOCALE_MTSAFE */
79 # define setlocale_mtsafe setlocale
81 # endif /* NEED_SETLOCALE_MTSAFE */
83 # if NEED_SETLOCALE_IMPROVED
85 /* Return string representation of locale category CATEGORY. */
86 static const char *
87 category_to_name (int category)
89 const char *retval;
91 switch (category)
93 case LC_COLLATE:
94 retval = "LC_COLLATE";
95 break;
96 case LC_CTYPE:
97 retval = "LC_CTYPE";
98 break;
99 case LC_MONETARY:
100 retval = "LC_MONETARY";
101 break;
102 case LC_NUMERIC:
103 retval = "LC_NUMERIC";
104 break;
105 case LC_TIME:
106 retval = "LC_TIME";
107 break;
108 case LC_MESSAGES:
109 retval = "LC_MESSAGES";
110 break;
111 default:
112 /* If you have a better idea for a default value let me know. */
113 retval = "LC_XXX";
116 return retval;
119 # if defined _WIN32 && ! defined __CYGWIN__
121 /* The native Windows setlocale() function expects locale names of the form
122 "German" or "German_Germany" or "DEU", but not "de" or "de_DE". We need
123 to convert the names from the form with ISO 639 language code and ISO 3166
124 country code to the form with English names or with three-letter identifier.
125 The three-letter identifiers known by a Windows XP SP2 or SP3 are:
126 AFK Afrikaans_South Africa.1252
127 ARA Arabic_Saudi Arabia.1256
128 ARB Arabic_Lebanon.1256
129 ARE Arabic_Egypt.1256
130 ARG Arabic_Algeria.1256
131 ARH Arabic_Bahrain.1256
132 ARI Arabic_Iraq.1256
133 ARJ Arabic_Jordan.1256
134 ARK Arabic_Kuwait.1256
135 ARL Arabic_Libya.1256
136 ARM Arabic_Morocco.1256
137 ARO Arabic_Oman.1256
138 ARQ Arabic_Qatar.1256
139 ARS Arabic_Syria.1256
140 ART Arabic_Tunisia.1256
141 ARU Arabic_U.A.E..1256
142 ARY Arabic_Yemen.1256
143 AZE Azeri (Latin)_Azerbaijan.1254
144 BEL Belarusian_Belarus.1251
145 BGR Bulgarian_Bulgaria.1251
146 BSB Bosnian_Bosnia and Herzegovina.1250
147 BSC Bosnian (Cyrillic)_Bosnia and Herzegovina.1250 (wrong encoding!)
148 CAT Catalan_Spain.1252
149 CHH Chinese_Hong Kong S.A.R..950
150 CHI Chinese_Singapore.936
151 CHS Chinese_People's Republic of China.936
152 CHT Chinese_Taiwan.950
153 CSY Czech_Czech Republic.1250
154 CYM Welsh_United Kingdom.1252
155 DAN Danish_Denmark.1252
156 DEA German_Austria.1252
157 DEC German_Liechtenstein.1252
158 DEL German_Luxembourg.1252
159 DES German_Switzerland.1252
160 DEU German_Germany.1252
161 ELL Greek_Greece.1253
162 ENA English_Australia.1252
163 ENB English_Caribbean.1252
164 ENC English_Canada.1252
165 ENG English_United Kingdom.1252
166 ENI English_Ireland.1252
167 ENJ English_Jamaica.1252
168 ENL English_Belize.1252
169 ENP English_Republic of the Philippines.1252
170 ENS English_South Africa.1252
171 ENT English_Trinidad and Tobago.1252
172 ENU English_United States.1252
173 ENW English_Zimbabwe.1252
174 ENZ English_New Zealand.1252
175 ESA Spanish_Panama.1252
176 ESB Spanish_Bolivia.1252
177 ESC Spanish_Costa Rica.1252
178 ESD Spanish_Dominican Republic.1252
179 ESE Spanish_El Salvador.1252
180 ESF Spanish_Ecuador.1252
181 ESG Spanish_Guatemala.1252
182 ESH Spanish_Honduras.1252
183 ESI Spanish_Nicaragua.1252
184 ESL Spanish_Chile.1252
185 ESM Spanish_Mexico.1252
186 ESN Spanish_Spain.1252
187 ESO Spanish_Colombia.1252
188 ESP Spanish_Spain.1252
189 ESR Spanish_Peru.1252
190 ESS Spanish_Argentina.1252
191 ESU Spanish_Puerto Rico.1252
192 ESV Spanish_Venezuela.1252
193 ESY Spanish_Uruguay.1252
194 ESZ Spanish_Paraguay.1252
195 ETI Estonian_Estonia.1257
196 EUQ Basque_Spain.1252
197 FAR Farsi_Iran.1256
198 FIN Finnish_Finland.1252
199 FOS Faroese_Faroe Islands.1252
200 FPO Filipino_Philippines.1252
201 FRA French_France.1252
202 FRB French_Belgium.1252
203 FRC French_Canada.1252
204 FRL French_Luxembourg.1252
205 FRM French_Principality of Monaco.1252
206 FRS French_Switzerland.1252
207 FYN Frisian_Netherlands.1252
208 GLC Galician_Spain.1252
209 HEB Hebrew_Israel.1255
210 HRB Croatian_Bosnia and Herzegovina.1250
211 HRV Croatian_Croatia.1250
212 HUN Hungarian_Hungary.1250
213 IND Indonesian_Indonesia.1252
214 IRE Irish_Ireland.1252
215 ISL Icelandic_Iceland.1252
216 ITA Italian_Italy.1252
217 ITS Italian_Switzerland.1252
218 IUK Inuktitut (Latin)_Canada.1252
219 JPN Japanese_Japan.932
220 KKZ Kazakh_Kazakhstan.1251
221 KOR Korean_Korea.949
222 KYR Kyrgyz_Kyrgyzstan.1251
223 LBX Luxembourgish_Luxembourg.1252
224 LTH Lithuanian_Lithuania.1257
225 LVI Latvian_Latvia.1257
226 MKI FYRO Macedonian_Former Yugoslav Republic of Macedonia.1251
227 MON Mongolian_Mongolia.1251
228 MPD Mapudungun_Chile.1252
229 MSB Malay_Brunei Darussalam.1252
230 MSL Malay_Malaysia.1252
231 MWK Mohawk_Canada.1252
232 NLB Dutch_Belgium.1252
233 NLD Dutch_Netherlands.1252
234 NON Norwegian-Nynorsk_Norway.1252
235 NOR Norwegian (Bokmål)_Norway.1252
236 NSO Northern Sotho_South Africa.1252
237 PLK Polish_Poland.1250
238 PTB Portuguese_Brazil.1252
239 PTG Portuguese_Portugal.1252
240 QUB Quechua_Bolivia.1252
241 QUE Quechua_Ecuador.1252
242 QUP Quechua_Peru.1252
243 RMC Romansh_Switzerland.1252
244 ROM Romanian_Romania.1250
245 RUS Russian_Russia.1251
246 SKY Slovak_Slovakia.1250
247 SLV Slovenian_Slovenia.1250
248 SMA Sami (Southern)_Norway.1252
249 SMB Sami (Southern)_Sweden.1252
250 SME Sami (Northern)_Norway.1252
251 SMF Sami (Northern)_Sweden.1252
252 SMG Sami (Northern)_Finland.1252
253 SMJ Sami (Lule)_Norway.1252
254 SMK Sami (Lule)_Sweden.1252
255 SMN Sami (Inari)_Finland.1252
256 SMS Sami (Skolt)_Finland.1252
257 SQI Albanian_Albania.1250
258 SRB Serbian (Cyrillic)_Serbia and Montenegro.1251
259 SRL Serbian (Latin)_Serbia and Montenegro.1250
260 SRN Serbian (Cyrillic)_Bosnia and Herzegovina.1251
261 SRS Serbian (Latin)_Bosnia and Herzegovina.1250
262 SVE Swedish_Sweden.1252
263 SVF Swedish_Finland.1252
264 SWK Swahili_Kenya.1252
265 THA Thai_Thailand.874
266 TRK Turkish_Turkey.1254
267 TSN Tswana_South Africa.1252
268 TTT Tatar_Russia.1251
269 UKR Ukrainian_Ukraine.1251
270 URD Urdu_Islamic Republic of Pakistan.1256
271 USA English_United States.1252
272 UZB Uzbek (Latin)_Uzbekistan.1254
273 VIT Vietnamese_Viet Nam.1258
274 XHO Xhosa_South Africa.1252
275 ZHH Chinese_Hong Kong S.A.R..950
276 ZHI Chinese_Singapore.936
277 ZHM Chinese_Macau S.A.R..950
278 ZUL Zulu_South Africa.1252
281 /* Table from ISO 639 language code, optionally with country or script suffix,
282 to English name.
283 Keep in sync with the gl_locale_name_from_win32_LANGID function in
284 localename.c! */
285 struct table_entry
287 const char *code;
288 const char *english;
290 static const struct table_entry language_table[] =
292 { "af", "Afrikaans" },
293 { "am", "Amharic" },
294 { "ar", "Arabic" },
295 { "arn", "Mapudungun" },
296 { "as", "Assamese" },
297 { "az@cyrillic", "Azeri (Cyrillic)" },
298 { "az@latin", "Azeri (Latin)" },
299 { "ba", "Bashkir" },
300 { "be", "Belarusian" },
301 { "ber", "Tamazight" },
302 { "ber@arabic", "Tamazight (Arabic)" },
303 { "ber@latin", "Tamazight (Latin)" },
304 { "bg", "Bulgarian" },
305 { "bin", "Edo" },
306 { "bn", "Bengali" },
307 { "bn_BD", "Bengali (Bangladesh)" },
308 { "bn_IN", "Bengali (India)" },
309 { "bnt", "Sutu" },
310 { "bo", "Tibetan" },
311 { "br", "Breton" },
312 { "bs", "BSB" }, /* "Bosnian (Latin)" */
313 { "bs@cyrillic", "BSC" }, /* Bosnian (Cyrillic) */
314 { "ca", "Catalan" },
315 { "chr", "Cherokee" },
316 { "co", "Corsican" },
317 { "cpe", "Hawaiian" },
318 { "cs", "Czech" },
319 { "cy", "Welsh" },
320 { "da", "Danish" },
321 { "de", "German" },
322 { "dsb", "Lower Sorbian" },
323 { "dv", "Divehi" },
324 { "el", "Greek" },
325 { "en", "English" },
326 { "es", "Spanish" },
327 { "et", "Estonian" },
328 { "eu", "Basque" },
329 { "fa", "Farsi" },
330 { "ff", "Fulfulde" },
331 { "fi", "Finnish" },
332 { "fo", "Faroese" }, /* "Faeroese" does not work */
333 { "fr", "French" },
334 { "fy", "Frisian" },
335 { "ga", "IRE" }, /* Gaelic (Ireland) */
336 { "gd", "Gaelic (Scotland)" },
337 { "gd", "Scottish Gaelic" },
338 { "gl", "Galician" },
339 { "gn", "Guarani" },
340 { "gsw", "Alsatian" },
341 { "gu", "Gujarati" },
342 { "ha", "Hausa" },
343 { "he", "Hebrew" },
344 { "hi", "Hindi" },
345 { "hr", "Croatian" },
346 { "hsb", "Upper Sorbian" },
347 { "hu", "Hungarian" },
348 { "hy", "Armenian" },
349 { "id", "Indonesian" },
350 { "ig", "Igbo" },
351 { "ii", "Yi" },
352 { "is", "Icelandic" },
353 { "it", "Italian" },
354 { "iu", "IUK" }, /* Inuktitut */
355 { "ja", "Japanese" },
356 { "ka", "Georgian" },
357 { "kk", "Kazakh" },
358 { "kl", "Greenlandic" },
359 { "km", "Cambodian" },
360 { "km", "Khmer" },
361 { "kn", "Kannada" },
362 { "ko", "Korean" },
363 { "kok", "Konkani" },
364 { "kr", "Kanuri" },
365 { "ks", "Kashmiri" },
366 { "ks_IN", "Kashmiri_India" },
367 { "ks_PK", "Kashmiri (Arabic)_Pakistan" },
368 { "ky", "Kyrgyz" },
369 { "la", "Latin" },
370 { "lb", "Luxembourgish" },
371 { "lo", "Lao" },
372 { "lt", "Lithuanian" },
373 { "lv", "Latvian" },
374 { "mi", "Maori" },
375 { "mk", "FYRO Macedonian" },
376 { "mk", "Macedonian" },
377 { "ml", "Malayalam" },
378 { "mn", "Mongolian" },
379 { "mni", "Manipuri" },
380 { "moh", "Mohawk" },
381 { "mr", "Marathi" },
382 { "ms", "Malay" },
383 { "mt", "Maltese" },
384 { "my", "Burmese" },
385 { "nb", "NOR" }, /* Norwegian Bokmål */
386 { "ne", "Nepali" },
387 { "nic", "Ibibio" },
388 { "nl", "Dutch" },
389 { "nn", "NON" }, /* Norwegian Nynorsk */
390 { "no", "Norwegian" },
391 { "nso", "Northern Sotho" },
392 { "nso", "Sepedi" },
393 { "oc", "Occitan" },
394 { "om", "Oromo" },
395 { "or", "Oriya" },
396 { "pa", "Punjabi" },
397 { "pap", "Papiamentu" },
398 { "pl", "Polish" },
399 { "prs", "Dari" },
400 { "ps", "Pashto" },
401 { "pt", "Portuguese" },
402 { "qu", "Quechua" },
403 { "qut", "K'iche'" },
404 { "rm", "Romansh" },
405 { "ro", "Romanian" },
406 { "ru", "Russian" },
407 { "rw", "Kinyarwanda" },
408 { "sa", "Sanskrit" },
409 { "sah", "Yakut" },
410 { "sd", "Sindhi" },
411 { "se", "Sami (Northern)" },
412 { "se", "Northern Sami" },
413 { "si", "Sinhalese" },
414 { "sk", "Slovak" },
415 { "sl", "Slovenian" },
416 { "sma", "Sami (Southern)" },
417 { "sma", "Southern Sami" },
418 { "smj", "Sami (Lule)" },
419 { "smj", "Lule Sami" },
420 { "smn", "Sami (Inari)" },
421 { "smn", "Inari Sami" },
422 { "sms", "Sami (Skolt)" },
423 { "sms", "Skolt Sami" },
424 { "so", "Somali" },
425 { "sq", "Albanian" },
426 { "sr", "Serbian (Latin)" },
427 { "sr@cyrillic", "SRB" }, /* Serbian (Cyrillic) */
428 { "sv", "Swedish" },
429 { "sw", "Swahili" },
430 { "syr", "Syriac" },
431 { "ta", "Tamil" },
432 { "te", "Telugu" },
433 { "tg", "Tajik" },
434 { "th", "Thai" },
435 { "ti", "Tigrinya" },
436 { "tk", "Turkmen" },
437 { "tl", "Filipino" },
438 { "tn", "Tswana" },
439 { "tr", "Turkish" },
440 { "ts", "Tsonga" },
441 { "tt", "Tatar" },
442 { "ug", "Uighur" },
443 { "uk", "Ukrainian" },
444 { "ur", "Urdu" },
445 { "uz", "Uzbek" },
446 { "uz", "Uzbek (Latin)" },
447 { "uz@cyrillic", "Uzbek (Cyrillic)" },
448 { "ve", "Venda" },
449 { "vi", "Vietnamese" },
450 { "wen", "Sorbian" },
451 { "wo", "Wolof" },
452 { "xh", "Xhosa" },
453 { "yi", "Yiddish" },
454 { "yo", "Yoruba" },
455 { "zh", "Chinese" },
456 { "zu", "Zulu" }
459 /* Table from ISO 3166 country code to English name.
460 Keep in sync with the gl_locale_name_from_win32_LANGID function in
461 localename.c! */
462 static const struct table_entry country_table[] =
464 { "AE", "U.A.E." },
465 { "AF", "Afghanistan" },
466 { "AL", "Albania" },
467 { "AM", "Armenia" },
468 { "AN", "Netherlands Antilles" },
469 { "AR", "Argentina" },
470 { "AT", "Austria" },
471 { "AU", "Australia" },
472 { "AZ", "Azerbaijan" },
473 { "BA", "Bosnia and Herzegovina" },
474 { "BD", "Bangladesh" },
475 { "BE", "Belgium" },
476 { "BG", "Bulgaria" },
477 { "BH", "Bahrain" },
478 { "BN", "Brunei Darussalam" },
479 { "BO", "Bolivia" },
480 { "BR", "Brazil" },
481 { "BT", "Bhutan" },
482 { "BY", "Belarus" },
483 { "BZ", "Belize" },
484 { "CA", "Canada" },
485 { "CG", "Congo" },
486 { "CH", "Switzerland" },
487 { "CI", "Cote d'Ivoire" },
488 { "CL", "Chile" },
489 { "CM", "Cameroon" },
490 { "CN", "People's Republic of China" },
491 { "CO", "Colombia" },
492 { "CR", "Costa Rica" },
493 { "CS", "Serbia and Montenegro" },
494 { "CZ", "Czech Republic" },
495 { "DE", "Germany" },
496 { "DK", "Denmark" },
497 { "DO", "Dominican Republic" },
498 { "DZ", "Algeria" },
499 { "EC", "Ecuador" },
500 { "EE", "Estonia" },
501 { "EG", "Egypt" },
502 { "ER", "Eritrea" },
503 { "ES", "Spain" },
504 { "ET", "Ethiopia" },
505 { "FI", "Finland" },
506 { "FO", "Faroe Islands" },
507 { "FR", "France" },
508 { "GB", "United Kingdom" },
509 { "GD", "Caribbean" },
510 { "GE", "Georgia" },
511 { "GL", "Greenland" },
512 { "GR", "Greece" },
513 { "GT", "Guatemala" },
514 { "HK", "Hong Kong" },
515 { "HK", "Hong Kong S.A.R." },
516 { "HN", "Honduras" },
517 { "HR", "Croatia" },
518 { "HT", "Haiti" },
519 { "HU", "Hungary" },
520 { "ID", "Indonesia" },
521 { "IE", "Ireland" },
522 { "IL", "Israel" },
523 { "IN", "India" },
524 { "IQ", "Iraq" },
525 { "IR", "Iran" },
526 { "IS", "Iceland" },
527 { "IT", "Italy" },
528 { "JM", "Jamaica" },
529 { "JO", "Jordan" },
530 { "JP", "Japan" },
531 { "KE", "Kenya" },
532 { "KG", "Kyrgyzstan" },
533 { "KH", "Cambodia" },
534 { "KR", "South Korea" },
535 { "KW", "Kuwait" },
536 { "KZ", "Kazakhstan" },
537 { "LA", "Laos" },
538 { "LB", "Lebanon" },
539 { "LI", "Liechtenstein" },
540 { "LK", "Sri Lanka" },
541 { "LT", "Lithuania" },
542 { "LU", "Luxembourg" },
543 { "LV", "Latvia" },
544 { "LY", "Libya" },
545 { "MA", "Morocco" },
546 { "MC", "Principality of Monaco" },
547 { "MD", "Moldava" },
548 { "MD", "Moldova" },
549 { "ME", "Montenegro" },
550 { "MK", "Former Yugoslav Republic of Macedonia" },
551 { "ML", "Mali" },
552 { "MM", "Myanmar" },
553 { "MN", "Mongolia" },
554 { "MO", "Macau S.A.R." },
555 { "MT", "Malta" },
556 { "MV", "Maldives" },
557 { "MX", "Mexico" },
558 { "MY", "Malaysia" },
559 { "NG", "Nigeria" },
560 { "NI", "Nicaragua" },
561 { "NL", "Netherlands" },
562 { "NO", "Norway" },
563 { "NP", "Nepal" },
564 { "NZ", "New Zealand" },
565 { "OM", "Oman" },
566 { "PA", "Panama" },
567 { "PE", "Peru" },
568 { "PH", "Philippines" },
569 { "PK", "Islamic Republic of Pakistan" },
570 { "PL", "Poland" },
571 { "PR", "Puerto Rico" },
572 { "PT", "Portugal" },
573 { "PY", "Paraguay" },
574 { "QA", "Qatar" },
575 { "RE", "Reunion" },
576 { "RO", "Romania" },
577 { "RS", "Serbia" },
578 { "RU", "Russia" },
579 { "RW", "Rwanda" },
580 { "SA", "Saudi Arabia" },
581 { "SE", "Sweden" },
582 { "SG", "Singapore" },
583 { "SI", "Slovenia" },
584 { "SK", "Slovak" },
585 { "SN", "Senegal" },
586 { "SO", "Somalia" },
587 { "SR", "Suriname" },
588 { "SV", "El Salvador" },
589 { "SY", "Syria" },
590 { "TH", "Thailand" },
591 { "TJ", "Tajikistan" },
592 { "TM", "Turkmenistan" },
593 { "TN", "Tunisia" },
594 { "TR", "Turkey" },
595 { "TT", "Trinidad and Tobago" },
596 { "TW", "Taiwan" },
597 { "TZ", "Tanzania" },
598 { "UA", "Ukraine" },
599 { "US", "United States" },
600 { "UY", "Uruguay" },
601 { "VA", "Vatican" },
602 { "VE", "Venezuela" },
603 { "VN", "Viet Nam" },
604 { "YE", "Yemen" },
605 { "ZA", "South Africa" },
606 { "ZW", "Zimbabwe" }
609 /* Given a string STRING, find the set of indices i such that TABLE[i].code is
610 the given STRING. It is a range [lo,hi-1]. */
611 typedef struct { size_t lo; size_t hi; } range_t;
612 static void
613 search (const struct table_entry *table, size_t table_size, const char *string,
614 range_t *result)
616 /* The table is sorted. Perform a binary search. */
617 size_t hi = table_size;
618 size_t lo = 0;
619 while (lo < hi)
621 /* Invariant:
622 for i < lo, strcmp (table[i].code, string) < 0,
623 for i >= hi, strcmp (table[i].code, string) > 0. */
624 size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
625 int cmp = strcmp (table[mid].code, string);
626 if (cmp < 0)
627 lo = mid + 1;
628 else if (cmp > 0)
629 hi = mid;
630 else
632 /* Found an i with
633 strcmp (language_table[i].code, string) == 0.
634 Find the entire interval of such i. */
636 size_t i;
638 for (i = mid; i > lo; )
640 i--;
641 if (strcmp (table[i].code, string) < 0)
643 lo = i + 1;
644 break;
649 size_t i;
651 for (i = mid + 1; i < hi; i++)
653 if (strcmp (table[i].code, string) > 0)
655 hi = i;
656 break;
660 /* The set of i with
661 strcmp (language_table[i].code, string) == 0
662 is the interval [lo, hi-1]. */
663 break;
666 result->lo = lo;
667 result->hi = hi;
670 /* Like setlocale, but accept also locale names in the form ll or ll_CC,
671 where ll is an ISO 639 language code and CC is an ISO 3166 country code. */
672 static char *
673 setlocale_unixlike (int category, const char *locale)
675 char *result;
676 char llCC_buf[64];
677 char ll_buf[64];
678 char CC_buf[64];
680 /* The native Windows implementation of setlocale understands the special
681 locale name "C", but not "POSIX". Therefore map "POSIX" to "C". */
682 if (locale != NULL && strcmp (locale, "POSIX") == 0)
683 locale = "C";
685 /* First, try setlocale with the original argument unchanged. */
686 result = setlocale_mtsafe (category, locale);
687 if (result != NULL)
688 return result;
690 /* Otherwise, assume the argument is in the form
691 language[_territory][.codeset][@modifier]
692 and try to map it using the tables. */
693 if (strlen (locale) < sizeof (llCC_buf))
695 /* Second try: Remove the codeset part. */
697 const char *p = locale;
698 char *q = llCC_buf;
700 /* Copy the part before the dot. */
701 for (; *p != '\0' && *p != '.'; p++, q++)
702 *q = *p;
703 if (*p == '.')
704 /* Skip the part up to the '@', if any. */
705 for (; *p != '\0' && *p != '@'; p++)
707 /* Copy the part starting with '@', if any. */
708 for (; *p != '\0'; p++, q++)
709 *q = *p;
710 *q = '\0';
712 /* llCC_buf now contains
713 language[_territory][@modifier]
715 if (strcmp (llCC_buf, locale) != 0)
717 result = setlocale (category, llCC_buf);
718 if (result != NULL)
719 return result;
721 /* Look it up in language_table. */
723 range_t range;
724 size_t i;
726 search (language_table,
727 sizeof (language_table) / sizeof (language_table[0]),
728 llCC_buf,
729 &range);
731 for (i = range.lo; i < range.hi; i++)
733 /* Try the replacement in language_table[i]. */
734 result = setlocale (category, language_table[i].english);
735 if (result != NULL)
736 return result;
739 /* Split language[_territory][@modifier]
740 into ll_buf = language[@modifier]
741 and CC_buf = territory
744 const char *underscore = strchr (llCC_buf, '_');
745 if (underscore != NULL)
747 const char *territory_start = underscore + 1;
748 const char *territory_end = strchr (territory_start, '@');
749 if (territory_end == NULL)
750 territory_end = territory_start + strlen (territory_start);
752 memcpy (ll_buf, llCC_buf, underscore - llCC_buf);
753 strcpy (ll_buf + (underscore - llCC_buf), territory_end);
755 memcpy (CC_buf, territory_start, territory_end - territory_start);
756 CC_buf[territory_end - territory_start] = '\0';
759 /* Look up ll_buf in language_table
760 and CC_buf in country_table. */
761 range_t language_range;
763 search (language_table,
764 sizeof (language_table) / sizeof (language_table[0]),
765 ll_buf,
766 &language_range);
767 if (language_range.lo < language_range.hi)
769 range_t country_range;
771 search (country_table,
772 sizeof (country_table) / sizeof (country_table[0]),
773 CC_buf,
774 &country_range);
775 if (country_range.lo < country_range.hi)
777 size_t i;
778 size_t j;
780 for (i = language_range.lo; i < language_range.hi; i++)
781 for (j = country_range.lo; j < country_range.hi; j++)
783 /* Concatenate the replacements. */
784 const char *part1 = language_table[i].english;
785 size_t part1_len = strlen (part1);
786 const char *part2 = country_table[j].english;
787 size_t part2_len = strlen (part2) + 1;
788 char buf[64+64];
790 if (!(part1_len + 1 + part2_len <= sizeof (buf)))
791 abort ();
792 memcpy (buf, part1, part1_len);
793 buf[part1_len] = '_';
794 memcpy (buf + part1_len + 1, part2, part2_len);
796 /* Try the concatenated replacements. */
797 result = setlocale (category, buf);
798 if (result != NULL)
799 return result;
803 /* Try omitting the country entirely. This may set a locale
804 corresponding to the wrong country, but is better than
805 failing entirely. */
807 size_t i;
809 for (i = language_range.lo; i < language_range.hi; i++)
811 /* Try only the language replacement. */
812 result =
813 setlocale (category, language_table[i].english);
814 if (result != NULL)
815 return result;
824 /* Failed. */
825 return NULL;
828 # elif defined __ANDROID__
830 /* Like setlocale, but accept also the locale names "C" and "POSIX". */
831 static char *
832 setlocale_unixlike (int category, const char *locale)
834 char *result = setlocale_mtsafe (category, locale);
835 if (result == NULL)
836 switch (category)
838 case LC_CTYPE:
839 case LC_NUMERIC:
840 case LC_TIME:
841 case LC_COLLATE:
842 case LC_MONETARY:
843 case LC_MESSAGES:
844 case LC_ALL:
845 case LC_PAPER:
846 case LC_NAME:
847 case LC_ADDRESS:
848 case LC_TELEPHONE:
849 case LC_MEASUREMENT:
850 if (locale == NULL
851 || strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0)
852 result = (char *) "C";
853 break;
854 default:
855 break;
857 return result;
859 # define setlocale setlocale_unixlike
861 # else
862 # define setlocale_unixlike setlocale_mtsafe
863 # endif
865 # if LC_MESSAGES == 1729
867 /* The system does not store an LC_MESSAGES locale category. Do it here. */
868 static char lc_messages_name[64] = "C";
870 /* Like setlocale, but support also LC_MESSAGES. */
871 static char *
872 setlocale_single (int category, const char *locale)
874 if (category == LC_MESSAGES)
876 if (locale != NULL)
878 lc_messages_name[sizeof (lc_messages_name) - 1] = '\0';
879 strncpy (lc_messages_name, locale, sizeof (lc_messages_name) - 1);
881 return lc_messages_name;
883 else
884 return setlocale_unixlike (category, locale);
887 # else
888 # define setlocale_single setlocale_unixlike
889 # endif
891 # if defined __APPLE__ && defined __MACH__
893 /* Mapping from language to main territory where that language is spoken. */
894 static char const locales_with_principal_territory[][6 + 1] =
896 /* Language Main territory */
897 "ace_ID", /* Achinese Indonesia */
898 "af_ZA", /* Afrikaans South Africa */
899 "ak_GH", /* Akan Ghana */
900 "am_ET", /* Amharic Ethiopia */
901 "an_ES", /* Aragonese Spain */
902 "ang_GB", /* Old English Britain */
903 "arn_CL", /* Mapudungun Chile */
904 "as_IN", /* Assamese India */
905 "ast_ES", /* Asturian Spain */
906 "av_RU", /* Avaric Russia */
907 "awa_IN", /* Awadhi India */
908 "az_AZ", /* Azerbaijani Azerbaijan */
909 "ban_ID", /* Balinese Indonesia */
910 "be_BY", /* Belarusian Belarus */
911 "bej_SD", /* Beja Sudan */
912 "bem_ZM", /* Bemba Zambia */
913 "bg_BG", /* Bulgarian Bulgaria */
914 "bho_IN", /* Bhojpuri India */
915 "bi_VU", /* Bislama Vanuatu */
916 "bik_PH", /* Bikol Philippines */
917 "bin_NG", /* Bini Nigeria */
918 "bm_ML", /* Bambara Mali */
919 "bn_IN", /* Bengali India */
920 "bo_CN", /* Tibetan China */
921 "br_FR", /* Breton France */
922 "bs_BA", /* Bosnian Bosnia */
923 "bug_ID", /* Buginese Indonesia */
924 "ca_ES", /* Catalan Spain */
925 "ce_RU", /* Chechen Russia */
926 "ceb_PH", /* Cebuano Philippines */
927 "co_FR", /* Corsican France */
928 "cr_CA", /* Cree Canada */
929 /* Don't put "crh_UZ" or "crh_UA" here. That would be asking for fruitless
930 political discussion. */
931 "cs_CZ", /* Czech Czech Republic */
932 "csb_PL", /* Kashubian Poland */
933 "cy_GB", /* Welsh Britain */
934 "da_DK", /* Danish Denmark */
935 "de_DE", /* German Germany */
936 "din_SD", /* Dinka Sudan */
937 "doi_IN", /* Dogri India */
938 "dsb_DE", /* Lower Sorbian Germany */
939 "dv_MV", /* Divehi Maldives */
940 "dz_BT", /* Dzongkha Bhutan */
941 "ee_GH", /* Éwé Ghana */
942 "el_GR", /* Greek Greece */
943 /* Don't put "en_GB" or "en_US" here. That would be asking for fruitless
944 political discussion. */
945 "es_ES", /* Spanish Spain */
946 "et_EE", /* Estonian Estonia */
947 "fa_IR", /* Persian Iran */
948 "fi_FI", /* Finnish Finland */
949 "fil_PH", /* Filipino Philippines */
950 "fj_FJ", /* Fijian Fiji */
951 "fo_FO", /* Faroese Faeroe Islands */
952 "fon_BJ", /* Fon Benin */
953 "fr_FR", /* French France */
954 "fur_IT", /* Friulian Italy */
955 "fy_NL", /* Western Frisian Netherlands */
956 "ga_IE", /* Irish Ireland */
957 "gd_GB", /* Scottish Gaelic Britain */
958 "gon_IN", /* Gondi India */
959 "gsw_CH", /* Swiss German Switzerland */
960 "gu_IN", /* Gujarati India */
961 "he_IL", /* Hebrew Israel */
962 "hi_IN", /* Hindi India */
963 "hil_PH", /* Hiligaynon Philippines */
964 "hr_HR", /* Croatian Croatia */
965 "hsb_DE", /* Upper Sorbian Germany */
966 "ht_HT", /* Haitian Haiti */
967 "hu_HU", /* Hungarian Hungary */
968 "hy_AM", /* Armenian Armenia */
969 "id_ID", /* Indonesian Indonesia */
970 "ig_NG", /* Igbo Nigeria */
971 "ii_CN", /* Sichuan Yi China */
972 "ilo_PH", /* Iloko Philippines */
973 "is_IS", /* Icelandic Iceland */
974 "it_IT", /* Italian Italy */
975 "ja_JP", /* Japanese Japan */
976 "jab_NG", /* Hyam Nigeria */
977 "jv_ID", /* Javanese Indonesia */
978 "ka_GE", /* Georgian Georgia */
979 "kab_DZ", /* Kabyle Algeria */
980 "kaj_NG", /* Jju Nigeria */
981 "kam_KE", /* Kamba Kenya */
982 "kmb_AO", /* Kimbundu Angola */
983 "kcg_NG", /* Tyap Nigeria */
984 "kdm_NG", /* Kagoma Nigeria */
985 "kg_CD", /* Kongo Democratic Republic of Congo */
986 "kk_KZ", /* Kazakh Kazakhstan */
987 "kl_GL", /* Kalaallisut Greenland */
988 "km_KH", /* Central Khmer Cambodia */
989 "kn_IN", /* Kannada India */
990 "ko_KR", /* Korean Korea (South) */
991 "kok_IN", /* Konkani India */
992 "kr_NG", /* Kanuri Nigeria */
993 "kru_IN", /* Kurukh India */
994 "ky_KG", /* Kyrgyz Kyrgyzstan */
995 "lg_UG", /* Ganda Uganda */
996 "li_BE", /* Limburgish Belgium */
997 "lo_LA", /* Laotian Laos */
998 "lt_LT", /* Lithuanian Lithuania */
999 "lu_CD", /* Luba-Katanga Democratic Republic of Congo */
1000 "lua_CD", /* Luba-Lulua Democratic Republic of Congo */
1001 "luo_KE", /* Luo Kenya */
1002 "lv_LV", /* Latvian Latvia */
1003 "mad_ID", /* Madurese Indonesia */
1004 "mag_IN", /* Magahi India */
1005 "mai_IN", /* Maithili India */
1006 "mak_ID", /* Makasar Indonesia */
1007 "man_ML", /* Mandingo Mali */
1008 "men_SL", /* Mende Sierra Leone */
1009 "mfe_MU", /* Mauritian Creole Mauritius */
1010 "mg_MG", /* Malagasy Madagascar */
1011 "mi_NZ", /* Maori New Zealand */
1012 "min_ID", /* Minangkabau Indonesia */
1013 "mk_MK", /* Macedonian North Macedonia */
1014 "ml_IN", /* Malayalam India */
1015 "mn_MN", /* Mongolian Mongolia */
1016 "mni_IN", /* Manipuri India */
1017 "mos_BF", /* Mossi Burkina Faso */
1018 "mr_IN", /* Marathi India */
1019 "ms_MY", /* Malay Malaysia */
1020 "mt_MT", /* Maltese Malta */
1021 "mwr_IN", /* Marwari India */
1022 "my_MM", /* Burmese Myanmar */
1023 "na_NR", /* Nauru Nauru */
1024 "nah_MX", /* Nahuatl Mexico */
1025 "nap_IT", /* Neapolitan Italy */
1026 "nb_NO", /* Norwegian Bokmål Norway */
1027 "nds_DE", /* Low Saxon Germany */
1028 "ne_NP", /* Nepali Nepal */
1029 "nl_NL", /* Dutch Netherlands */
1030 "nn_NO", /* Norwegian Nynorsk Norway */
1031 "no_NO", /* Norwegian Norway */
1032 "nr_ZA", /* South Ndebele South Africa */
1033 "nso_ZA", /* Northern Sotho South Africa */
1034 "ny_MW", /* Chichewa Malawi */
1035 "nym_TZ", /* Nyamwezi Tanzania */
1036 "nyn_UG", /* Nyankole Uganda */
1037 "oc_FR", /* Occitan France */
1038 "oj_CA", /* Ojibwa Canada */
1039 "or_IN", /* Oriya India */
1040 "pa_IN", /* Punjabi India */
1041 "pag_PH", /* Pangasinan Philippines */
1042 "pam_PH", /* Pampanga Philippines */
1043 "pap_AN", /* Papiamento Netherlands Antilles - this line can be removed in 2018 */
1044 "pbb_CO", /* Páez Colombia */
1045 "pl_PL", /* Polish Poland */
1046 "ps_AF", /* Pashto Afghanistan */
1047 "pt_PT", /* Portuguese Portugal */
1048 "raj_IN", /* Rajasthani India */
1049 "rm_CH", /* Romansh Switzerland */
1050 "rn_BI", /* Kirundi Burundi */
1051 "ro_RO", /* Romanian Romania */
1052 "ru_RU", /* Russian Russia */
1053 "rw_RW", /* Kinyarwanda Rwanda */
1054 "sa_IN", /* Sanskrit India */
1055 "sah_RU", /* Yakut Russia */
1056 "sas_ID", /* Sasak Indonesia */
1057 "sat_IN", /* Santali India */
1058 "sc_IT", /* Sardinian Italy */
1059 "scn_IT", /* Sicilian Italy */
1060 "sg_CF", /* Sango Central African Republic */
1061 "shn_MM", /* Shan Myanmar */
1062 "si_LK", /* Sinhala Sri Lanka */
1063 "sid_ET", /* Sidamo Ethiopia */
1064 "sk_SK", /* Slovak Slovakia */
1065 "sl_SI", /* Slovenian Slovenia */
1066 "sm_WS", /* Samoan Samoa */
1067 "smn_FI", /* Inari Sami Finland */
1068 "sms_FI", /* Skolt Sami Finland */
1069 "so_SO", /* Somali Somalia */
1070 "sq_AL", /* Albanian Albania */
1071 "sr_RS", /* Serbian Serbia */
1072 "srr_SN", /* Serer Senegal */
1073 "suk_TZ", /* Sukuma Tanzania */
1074 "sus_GN", /* Susu Guinea */
1075 "sv_SE", /* Swedish Sweden */
1076 "te_IN", /* Telugu India */
1077 "tem_SL", /* Timne Sierra Leone */
1078 "tet_ID", /* Tetum Indonesia */
1079 "tg_TJ", /* Tajik Tajikistan */
1080 "th_TH", /* Thai Thailand */
1081 "ti_ER", /* Tigrinya Eritrea */
1082 "tiv_NG", /* Tiv Nigeria */
1083 "tk_TM", /* Turkmen Turkmenistan */
1084 "tl_PH", /* Tagalog Philippines */
1085 "to_TO", /* Tonga Tonga */
1086 "tpi_PG", /* Tok Pisin Papua New Guinea */
1087 "tr_TR", /* Turkish Türkiye */
1088 "tum_MW", /* Tumbuka Malawi */
1089 "ug_CN", /* Uighur China */
1090 "uk_UA", /* Ukrainian Ukraine */
1091 "umb_AO", /* Umbundu Angola */
1092 "ur_PK", /* Urdu Pakistan */
1093 "uz_UZ", /* Uzbek Uzbekistan */
1094 "ve_ZA", /* Venda South Africa */
1095 "vi_VN", /* Vietnamese Vietnam */
1096 "wa_BE", /* Walloon Belgium */
1097 "wal_ET", /* Walamo Ethiopia */
1098 "war_PH", /* Waray Philippines */
1099 "wen_DE", /* Sorbian Germany */
1100 "yao_MW", /* Yao Malawi */
1101 "zap_MX" /* Zapotec Mexico */
1104 /* Compare just the language part of two locale names. */
1105 static int
1106 langcmp (const char *locale1, const char *locale2)
1108 size_t locale1_len;
1109 size_t locale2_len;
1110 int cmp;
1113 const char *locale1_end = strchr (locale1, '_');
1114 if (locale1_end != NULL)
1115 locale1_len = locale1_end - locale1;
1116 else
1117 locale1_len = strlen (locale1);
1120 const char *locale2_end = strchr (locale2, '_');
1121 if (locale2_end != NULL)
1122 locale2_len = locale2_end - locale2;
1123 else
1124 locale2_len = strlen (locale2);
1127 if (locale1_len < locale2_len)
1129 cmp = memcmp (locale1, locale2, locale1_len);
1130 if (cmp == 0)
1131 cmp = -1;
1133 else
1135 cmp = memcmp (locale1, locale2, locale2_len);
1136 if (locale1_len > locale2_len && cmp == 0)
1137 cmp = 1;
1140 return cmp;
1143 /* Given a locale name, return the main locale with the same language,
1144 or NULL if not found.
1145 For example: "fr_DE" -> "fr_FR". */
1146 static const char *
1147 get_main_locale_with_same_language (const char *locale)
1149 # define table locales_with_principal_territory
1150 /* The table is sorted. Perform a binary search. */
1151 size_t hi = sizeof (table) / sizeof (table[0]);
1152 size_t lo = 0;
1153 while (lo < hi)
1155 /* Invariant:
1156 for i < lo, langcmp (table[i], locale) < 0,
1157 for i >= hi, langcmp (table[i], locale) > 0. */
1158 size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
1159 int cmp = langcmp (table[mid], locale);
1160 if (cmp < 0)
1161 lo = mid + 1;
1162 else if (cmp > 0)
1163 hi = mid;
1164 else
1166 /* Found an i with
1167 langcmp (language_table[i], locale) == 0.
1168 Verify that it is the only such i. */
1169 if (mid > lo && langcmp (table[mid - 1], locale) >= 0)
1170 abort ();
1171 if (mid + 1 < hi && langcmp (table[mid + 1], locale) <= 0)
1172 abort ();
1173 return table[mid];
1176 # undef table
1177 return NULL;
1180 /* Mapping from territory to main language that is spoken in that territory. */
1181 static char const locales_with_principal_language[][6 + 1] =
1183 /* This is based on the set of existing locales in glibc, with duplicates
1184 removed, and on the Wikipedia pages named "Languages of <territory>".
1185 If in doubt, use the locale that exists in macOS. For example, the only
1186 "*_IN" locale in macOS 10.13 is "hi_IN", so use that. */
1187 /* A useful shell function for producing a line of this table is:
1188 func_line ()
1190 # Usage: func_line ll_CC
1191 ll=`echo "$1" | sed -e 's|_.*||'`
1192 cc=`echo "$1" | sed -e 's|^.*_||'`
1193 llx=`sed -n -e "s|^${ll} ||p" < gettext-tools/doc/ISO_639`
1194 ccx=`expand gettext-tools/doc/ISO_3166 | sed -n -e "s|^${cc} *||p"`
1195 echo " \"$1\", /$X* ${llx} ${ccx} *$X/"
1198 /* Main language Territory */
1199 "ca_AD", /* Catalan Andorra */
1200 "ar_AE", /* Arabic United Arab Emirates */
1201 "ps_AF", /* Pashto Afghanistan */
1202 "en_AG", /* English Antigua and Barbuda */
1203 "sq_AL", /* Albanian Albania */
1204 "hy_AM", /* Armenian Armenia */
1205 "pap_AN", /* Papiamento Netherlands Antilles - this line can be removed in 2018 */
1206 "pt_AO", /* Portuguese Angola */
1207 "es_AR", /* Spanish Argentina */
1208 "de_AT", /* German Austria */
1209 "en_AU", /* English Australia */
1210 /* Aruba has two official languages: "nl_AW", "pap_AW". */
1211 "az_AZ", /* Azerbaijani Azerbaijan */
1212 "bs_BA", /* Bosnian Bosnia */
1213 "bn_BD", /* Bengali Bangladesh */
1214 "nl_BE", /* Dutch Belgium */
1215 "fr_BF", /* French Burkina Faso */
1216 "bg_BG", /* Bulgarian Bulgaria */
1217 "ar_BH", /* Arabic Bahrain */
1218 "rn_BI", /* Kirundi Burundi */
1219 "fr_BJ", /* French Benin */
1220 "es_BO", /* Spanish Bolivia */
1221 "pt_BR", /* Portuguese Brazil */
1222 "dz_BT", /* Dzongkha Bhutan */
1223 "en_BW", /* English Botswana */
1224 "be_BY", /* Belarusian Belarus */
1225 "en_CA", /* English Canada */
1226 "fr_CD", /* French Democratic Republic of Congo */
1227 "sg_CF", /* Sango Central African Republic */
1228 "de_CH", /* German Switzerland */
1229 "es_CL", /* Spanish Chile */
1230 "zh_CN", /* Chinese China */
1231 "es_CO", /* Spanish Colombia */
1232 "es_CR", /* Spanish Costa Rica */
1233 "es_CU", /* Spanish Cuba */
1234 /* Curaçao has three official languages: "nl_CW", "pap_CW", "en_CW". */
1235 "el_CY", /* Greek Cyprus */
1236 "cs_CZ", /* Czech Czech Republic */
1237 "de_DE", /* German Germany */
1238 /* Djibouti has two official languages: "ar_DJ" and "fr_DJ". */
1239 "da_DK", /* Danish Denmark */
1240 "es_DO", /* Spanish Dominican Republic */
1241 "ar_DZ", /* Arabic Algeria */
1242 "es_EC", /* Spanish Ecuador */
1243 "et_EE", /* Estonian Estonia */
1244 "ar_EG", /* Arabic Egypt */
1245 "ti_ER", /* Tigrinya Eritrea */
1246 "es_ES", /* Spanish Spain */
1247 "am_ET", /* Amharic Ethiopia */
1248 "fi_FI", /* Finnish Finland */
1249 /* Fiji has three official languages: "en_FJ", "fj_FJ", "hif_FJ". */
1250 "fo_FO", /* Faroese Faeroe Islands */
1251 "fr_FR", /* French France */
1252 "en_GB", /* English Britain */
1253 "ka_GE", /* Georgian Georgia */
1254 "en_GH", /* English Ghana */
1255 "kl_GL", /* Kalaallisut Greenland */
1256 "fr_GN", /* French Guinea */
1257 "el_GR", /* Greek Greece */
1258 "es_GT", /* Spanish Guatemala */
1259 "zh_HK", /* Chinese Hong Kong */
1260 "es_HN", /* Spanish Honduras */
1261 "hr_HR", /* Croatian Croatia */
1262 "ht_HT", /* Haitian Haiti */
1263 "hu_HU", /* Hungarian Hungary */
1264 "id_ID", /* Indonesian Indonesia */
1265 "en_IE", /* English Ireland */
1266 "he_IL", /* Hebrew Israel */
1267 "hi_IN", /* Hindi India */
1268 "ar_IQ", /* Arabic Iraq */
1269 "fa_IR", /* Persian Iran */
1270 "is_IS", /* Icelandic Iceland */
1271 "it_IT", /* Italian Italy */
1272 "ar_JO", /* Arabic Jordan */
1273 "ja_JP", /* Japanese Japan */
1274 "sw_KE", /* Swahili Kenya */
1275 "ky_KG", /* Kyrgyz Kyrgyzstan */
1276 "km_KH", /* Central Khmer Cambodia */
1277 "ko_KR", /* Korean Korea (South) */
1278 "ar_KW", /* Arabic Kuwait */
1279 "kk_KZ", /* Kazakh Kazakhstan */
1280 "lo_LA", /* Laotian Laos */
1281 "ar_LB", /* Arabic Lebanon */
1282 "de_LI", /* German Liechtenstein */
1283 "si_LK", /* Sinhala Sri Lanka */
1284 "lt_LT", /* Lithuanian Lithuania */
1285 /* Luxembourg has three official languages: "lb_LU", "fr_LU", "de_LU". */
1286 "lv_LV", /* Latvian Latvia */
1287 "ar_LY", /* Arabic Libya */
1288 "ar_MA", /* Arabic Morocco */
1289 "sr_ME", /* Serbian Montenegro */
1290 "mg_MG", /* Malagasy Madagascar */
1291 "mk_MK", /* Macedonian North Macedonia */
1292 "fr_ML", /* French Mali */
1293 "my_MM", /* Burmese Myanmar */
1294 "mn_MN", /* Mongolian Mongolia */
1295 "mt_MT", /* Maltese Malta */
1296 "mfe_MU", /* Mauritian Creole Mauritius */
1297 "dv_MV", /* Divehi Maldives */
1298 "ny_MW", /* Chichewa Malawi */
1299 "es_MX", /* Spanish Mexico */
1300 "ms_MY", /* Malay Malaysia */
1301 "en_NG", /* English Nigeria */
1302 "es_NI", /* Spanish Nicaragua */
1303 "nl_NL", /* Dutch Netherlands */
1304 "no_NO", /* Norwegian Norway */
1305 "ne_NP", /* Nepali Nepal */
1306 "na_NR", /* Nauru Nauru */
1307 "niu_NU", /* Niuean Niue */
1308 "en_NZ", /* English New Zealand */
1309 "ar_OM", /* Arabic Oman */
1310 "es_PA", /* Spanish Panama */
1311 "es_PE", /* Spanish Peru */
1312 "tpi_PG", /* Tok Pisin Papua New Guinea */
1313 "fil_PH", /* Filipino Philippines */
1314 "pa_PK", /* Punjabi Pakistan */
1315 "pl_PL", /* Polish Poland */
1316 "es_PR", /* Spanish Puerto Rico */
1317 "pt_PT", /* Portuguese Portugal */
1318 "es_PY", /* Spanish Paraguay */
1319 "ar_QA", /* Arabic Qatar */
1320 "ro_RO", /* Romanian Romania */
1321 "sr_RS", /* Serbian Serbia */
1322 "ru_RU", /* Russian Russia */
1323 "rw_RW", /* Kinyarwanda Rwanda */
1324 "ar_SA", /* Arabic Saudi Arabia */
1325 "en_SC", /* English Seychelles */
1326 "ar_SD", /* Arabic Sudan */
1327 "sv_SE", /* Swedish Sweden */
1328 "en_SG", /* English Singapore */
1329 "sl_SI", /* Slovenian Slovenia */
1330 "sk_SK", /* Slovak Slovakia */
1331 "en_SL", /* English Sierra Leone */
1332 "fr_SN", /* French Senegal */
1333 "so_SO", /* Somali Somalia */
1334 "ar_SS", /* Arabic South Sudan */
1335 "es_SV", /* Spanish El Salvador */
1336 "ar_SY", /* Arabic Syria */
1337 "th_TH", /* Thai Thailand */
1338 "tg_TJ", /* Tajik Tajikistan */
1339 "tk_TM", /* Turkmen Turkmenistan */
1340 "ar_TN", /* Arabic Tunisia */
1341 "to_TO", /* Tonga Tonga */
1342 "tr_TR", /* Turkish Türkiye */
1343 "zh_TW", /* Chinese Taiwan */
1344 "sw_TZ", /* Swahili Tanzania */
1345 "uk_UA", /* Ukrainian Ukraine */
1346 "lg_UG", /* Ganda Uganda */
1347 "en_US", /* English United States of America */
1348 "es_UY", /* Spanish Uruguay */
1349 "uz_UZ", /* Uzbek Uzbekistan */
1350 "es_VE", /* Spanish Venezuela */
1351 "vi_VN", /* Vietnamese Vietnam */
1352 "bi_VU", /* Bislama Vanuatu */
1353 "sm_WS", /* Samoan Samoa */
1354 "ar_YE", /* Arabic Yemen */
1355 "en_ZA", /* English South Africa */
1356 "en_ZM", /* English Zambia */
1357 "en_ZW" /* English Zimbabwe */
1360 /* Compare just the territory part of two locale names. */
1361 static int
1362 terrcmp (const char *locale1, const char *locale2)
1364 const char *territory1 = strrchr (locale1, '_') + 1;
1365 const char *territory2 = strrchr (locale2, '_') + 1;
1367 return strcmp (territory1, territory2);
1370 /* Given a locale name, return the locale corresponding to the main language
1371 with the same territory, or NULL if not found.
1372 For example: "fr_DE" -> "de_DE". */
1373 static const char *
1374 get_main_locale_with_same_territory (const char *locale)
1376 if (strrchr (locale, '_') != NULL)
1378 # define table locales_with_principal_language
1379 /* The table is sorted. Perform a binary search. */
1380 size_t hi = sizeof (table) / sizeof (table[0]);
1381 size_t lo = 0;
1382 while (lo < hi)
1384 /* Invariant:
1385 for i < lo, terrcmp (table[i], locale) < 0,
1386 for i >= hi, terrcmp (table[i], locale) > 0. */
1387 size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
1388 int cmp = terrcmp (table[mid], locale);
1389 if (cmp < 0)
1390 lo = mid + 1;
1391 else if (cmp > 0)
1392 hi = mid;
1393 else
1395 /* Found an i with
1396 terrcmp (language_table[i], locale) == 0.
1397 Verify that it is the only such i. */
1398 if (mid > lo && terrcmp (table[mid - 1], locale) >= 0)
1399 abort ();
1400 if (mid + 1 < hi && terrcmp (table[mid + 1], locale) <= 0)
1401 abort ();
1402 return table[mid];
1405 # undef table
1407 return NULL;
1410 # endif
1412 char *
1413 setlocale_improved (int category, const char *locale)
1415 if (locale != NULL && locale[0] == '\0')
1417 /* A request to the set the current locale to the default locale. */
1418 if (category == LC_ALL)
1420 /* Set LC_CTYPE first. Then the other categories. */
1421 static int const categories[] =
1423 LC_CTYPE,
1424 LC_NUMERIC,
1425 LC_TIME,
1426 LC_COLLATE,
1427 LC_MONETARY,
1428 LC_MESSAGES
1430 char *saved_locale;
1431 const char *base_name;
1432 unsigned int i;
1434 /* Back up the old locale, in case one of the steps fails. */
1435 saved_locale = setlocale (LC_ALL, NULL);
1436 if (saved_locale == NULL)
1437 return NULL;
1438 saved_locale = strdup (saved_locale);
1439 if (saved_locale == NULL)
1440 return NULL;
1442 /* Set LC_CTYPE category. Set all other categories (except possibly
1443 LC_MESSAGES) to the same value in the same call; this is likely to
1444 save calls. */
1445 base_name =
1446 gl_locale_name_environ (LC_CTYPE, category_to_name (LC_CTYPE));
1447 if (base_name == NULL)
1448 base_name = gl_locale_name_default ();
1450 if (setlocale_unixlike (LC_ALL, base_name) != NULL)
1452 /* LC_CTYPE category already set. */
1453 i = 1;
1455 else
1457 /* On Mac OS X, "UTF-8" is a valid locale name for LC_CTYPE but
1458 not for LC_ALL. Therefore this call may fail. So, try
1459 another base_name. */
1460 base_name = "C";
1461 if (setlocale_unixlike (LC_ALL, base_name) == NULL)
1462 goto fail;
1463 i = 0;
1465 # if defined _WIN32 && ! defined __CYGWIN__
1466 /* On native Windows, setlocale(LC_ALL,...) may succeed but set the
1467 LC_CTYPE category to an invalid value ("C") when it does not
1468 support the specified encoding. Report a failure instead. */
1469 if (strchr (base_name, '.') != NULL
1470 && strcmp (setlocale (LC_CTYPE, NULL), "C") == 0)
1471 goto fail;
1472 # endif
1474 for (; i < sizeof (categories) / sizeof (categories[0]); i++)
1476 int cat = categories[i];
1477 const char *name;
1479 name = gl_locale_name_environ (cat, category_to_name (cat));
1480 if (name == NULL)
1481 name = gl_locale_name_default ();
1483 /* If name is the same as base_name, it has already been set
1484 through the setlocale call before the loop. */
1485 if (strcmp (name, base_name) != 0
1486 # if LC_MESSAGES == 1729
1487 || cat == LC_MESSAGES
1488 # endif
1490 if (setlocale_single (cat, name) == NULL)
1491 # if defined __APPLE__ && defined __MACH__
1493 /* On Mac OS X 10.13, some locales can be set through
1494 System Preferences > Language & Region, that are not
1495 supported by libc. The system's setlocale() falls
1496 back to "C" for these locale categories. We can do
1497 better, by trying an existing locale with the same
1498 language or an existing locale with the same territory.
1499 If we can't, print a warning, to limit user
1500 expectations. */
1501 int warn = 0;
1503 if (cat == LC_CTYPE)
1504 warn = (setlocale_single (cat, "UTF-8") == NULL);
1505 else if (cat == LC_MESSAGES)
1507 # if HAVE_CFLOCALECOPYPREFERREDLANGUAGES || HAVE_CFPREFERENCESCOPYAPPVALUE /* MacOS X 10.4 or newer */
1508 /* Take the primary language preference. */
1509 # if HAVE_CFLOCALECOPYPREFERREDLANGUAGES /* MacOS X 10.5 or newer */
1510 CFArrayRef prefArray = CFLocaleCopyPreferredLanguages ();
1511 # elif HAVE_CFPREFERENCESCOPYAPPVALUE /* MacOS X 10.4 or newer */
1512 CFTypeRef preferences =
1513 CFPreferencesCopyAppValue (CFSTR ("AppleLanguages"),
1514 kCFPreferencesCurrentApplication);
1515 if (preferences != NULL
1516 && CFGetTypeID (preferences) == CFArrayGetTypeID ())
1518 CFArrayRef prefArray = (CFArrayRef)preferences;
1519 # endif
1520 int n = CFArrayGetCount (prefArray);
1521 if (n > 0)
1523 char buf[256];
1524 CFTypeRef element = CFArrayGetValueAtIndex (prefArray, 0);
1525 if (element != NULL
1526 && CFGetTypeID (element) == CFStringGetTypeID ()
1527 && CFStringGetCString ((CFStringRef)element,
1528 buf, sizeof (buf),
1529 kCFStringEncodingASCII))
1531 /* Remove the country.
1532 E.g. "zh-Hans-DE" -> "zh-Hans". */
1533 char *last_minus = strrchr (buf, '-');
1534 if (last_minus != NULL)
1535 *last_minus = '\0';
1537 /* Convert to Unix locale name.
1538 E.g. "zh-Hans" -> "zh_CN". */
1539 gl_locale_name_canonicalize (buf);
1541 /* Try setlocale with this value. */
1542 if (setlocale_single (cat, buf) == NULL)
1544 const char *last_try =
1545 get_main_locale_with_same_language (buf);
1547 if (last_try == NULL
1548 || setlocale_single (cat, last_try) == NULL)
1549 warn = 1;
1553 # if HAVE_CFLOCALECOPYPREFERREDLANGUAGES /* MacOS X 10.5 or newer */
1554 CFRelease (prefArray);
1555 # elif HAVE_CFPREFERENCESCOPYAPPVALUE /* MacOS X 10.4 or newer */
1557 # endif
1558 # else
1559 const char *last_try =
1560 get_main_locale_with_same_language (name);
1562 if (last_try == NULL
1563 || setlocale_single (cat, last_try) == NULL)
1564 warn = 1;
1565 # endif
1567 else
1569 /* For LC_NUMERIC, the application should use the locale
1570 properties kCFLocaleDecimalSeparator,
1571 kCFLocaleGroupingSeparator.
1572 For LC_TIME, the application should use the locale
1573 property kCFLocaleCalendarIdentifier.
1574 For LC_COLLATE, the application should use the locale
1575 properties kCFLocaleCollationIdentifier,
1576 kCFLocaleCollatorIdentifier.
1577 For LC_MONETARY, the application should use the locale
1578 properties kCFLocaleCurrencySymbol,
1579 kCFLocaleCurrencyCode.
1580 But since most applications don't have macOS specific
1581 code like this, try an existing locale with the same
1582 territory. */
1583 const char *last_try =
1584 get_main_locale_with_same_territory (name);
1586 if (last_try == NULL
1587 || setlocale_single (cat, last_try) == NULL)
1588 warn = 1;
1591 if (warn)
1593 /* Warn only if the environment variable
1594 SETLOCALE_VERBOSE is set. Otherwise these warnings
1595 are just annoyances, since normal users won't invoke
1596 'localedef'. */
1597 const char *verbose = getenv ("SETLOCALE_VERBOSE");
1598 if (verbose != NULL && verbose[0] != '\0')
1599 fprintf (stderr,
1600 "Warning: Failed to set locale category %s to %s.\n",
1601 category_to_name (cat), name);
1604 # else
1605 goto fail;
1606 # endif
1609 /* All steps were successful. */
1610 free (saved_locale);
1611 return setlocale (LC_ALL, NULL);
1613 fail:
1614 if (saved_locale[0] != '\0') /* don't risk an endless recursion */
1615 setlocale (LC_ALL, saved_locale);
1616 free (saved_locale);
1617 return NULL;
1619 else
1621 const char *name =
1622 gl_locale_name_environ (category, category_to_name (category));
1623 if (name == NULL)
1624 name = gl_locale_name_default ();
1626 return setlocale_single (category, name);
1629 else
1631 # if defined _WIN32 && ! defined __CYGWIN__
1632 if (category == LC_ALL && locale != NULL && strchr (locale, '.') != NULL)
1634 char *saved_locale;
1636 /* Back up the old locale. */
1637 saved_locale = setlocale (LC_ALL, NULL);
1638 if (saved_locale == NULL)
1639 return NULL;
1640 saved_locale = strdup (saved_locale);
1641 if (saved_locale == NULL)
1642 return NULL;
1644 if (setlocale_unixlike (LC_ALL, locale) == NULL)
1646 free (saved_locale);
1647 return NULL;
1650 /* On native Windows, setlocale(LC_ALL,...) may succeed but set the
1651 LC_CTYPE category to an invalid value ("C") when it does not
1652 support the specified encoding. Report a failure instead. */
1653 if (strcmp (setlocale (LC_CTYPE, NULL), "C") == 0)
1655 if (saved_locale[0] != '\0') /* don't risk an endless recursion */
1656 setlocale (LC_ALL, saved_locale);
1657 free (saved_locale);
1658 return NULL;
1661 /* It was really successful. */
1662 free (saved_locale);
1663 return setlocale (LC_ALL, NULL);
1665 else
1666 # endif
1667 return setlocale_single (category, locale);
1671 # endif /* NEED_SETLOCALE_IMPROVED */
1673 #endif