1 /* Set the current locale. -*- coding: utf-8 -*-
2 Copyright (C) 2009, 2011-2017 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 /* Written by Bruno Haible <bruno@clisp.org>, 2009. */
21 /* Override setlocale() so that when the default locale is requested
22 (locale = ""), the environment variables LC_ALL, LC_*, and LANG are
24 Also include all the functionality from libintl's setlocale() override. */
26 /* Please keep this file in sync with
27 gettext/gettext-runtime/intl/setlocale.c ! */
35 #include "localename.h"
41 /* Return string representation of locale category CATEGORY. */
43 category_to_name (int category
)
50 retval
= "LC_COLLATE";
56 retval
= "LC_MONETARY";
59 retval
= "LC_NUMERIC";
65 retval
= "LC_MESSAGES";
68 /* If you have a better idea for a default value let me know. */
75 # if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
77 /* The native Windows setlocale() function expects locale names of the form
78 "German" or "German_Germany" or "DEU", but not "de" or "de_DE". We need
79 to convert the names from the form with ISO 639 language code and ISO 3166
80 country code to the form with English names or with three-letter identifier.
81 The three-letter identifiers known by a Windows XP SP2 or SP3 are:
82 AFK Afrikaans_South Africa.1252
83 ARA Arabic_Saudi Arabia.1256
84 ARB Arabic_Lebanon.1256
86 ARG Arabic_Algeria.1256
87 ARH Arabic_Bahrain.1256
89 ARJ Arabic_Jordan.1256
90 ARK Arabic_Kuwait.1256
92 ARM Arabic_Morocco.1256
96 ART Arabic_Tunisia.1256
97 ARU Arabic_U.A.E..1256
99 AZE Azeri (Latin)_Azerbaijan.1254
100 BEL Belarusian_Belarus.1251
101 BGR Bulgarian_Bulgaria.1251
102 BSB Bosnian_Bosnia and Herzegovina.1250
103 BSC Bosnian (Cyrillic)_Bosnia and Herzegovina.1250 (wrong encoding!)
104 CAT Catalan_Spain.1252
105 CHH Chinese_Hong Kong S.A.R..950
106 CHI Chinese_Singapore.936
107 CHS Chinese_People's Republic of China.936
108 CHT Chinese_Taiwan.950
109 CSY Czech_Czech Republic.1250
110 CYM Welsh_United Kingdom.1252
111 DAN Danish_Denmark.1252
112 DEA German_Austria.1252
113 DEC German_Liechtenstein.1252
114 DEL German_Luxembourg.1252
115 DES German_Switzerland.1252
116 DEU German_Germany.1252
117 ELL Greek_Greece.1253
118 ENA English_Australia.1252
119 ENB English_Caribbean.1252
120 ENC English_Canada.1252
121 ENG English_United Kingdom.1252
122 ENI English_Ireland.1252
123 ENJ English_Jamaica.1252
124 ENL English_Belize.1252
125 ENP English_Republic of the Philippines.1252
126 ENS English_South Africa.1252
127 ENT English_Trinidad and Tobago.1252
128 ENU English_United States.1252
129 ENW English_Zimbabwe.1252
130 ENZ English_New Zealand.1252
131 ESA Spanish_Panama.1252
132 ESB Spanish_Bolivia.1252
133 ESC Spanish_Costa Rica.1252
134 ESD Spanish_Dominican Republic.1252
135 ESE Spanish_El Salvador.1252
136 ESF Spanish_Ecuador.1252
137 ESG Spanish_Guatemala.1252
138 ESH Spanish_Honduras.1252
139 ESI Spanish_Nicaragua.1252
140 ESL Spanish_Chile.1252
141 ESM Spanish_Mexico.1252
142 ESN Spanish_Spain.1252
143 ESO Spanish_Colombia.1252
144 ESP Spanish_Spain.1252
145 ESR Spanish_Peru.1252
146 ESS Spanish_Argentina.1252
147 ESU Spanish_Puerto Rico.1252
148 ESV Spanish_Venezuela.1252
149 ESY Spanish_Uruguay.1252
150 ESZ Spanish_Paraguay.1252
151 ETI Estonian_Estonia.1257
152 EUQ Basque_Spain.1252
154 FIN Finnish_Finland.1252
155 FOS Faroese_Faroe Islands.1252
156 FPO Filipino_Philippines.1252
157 FRA French_France.1252
158 FRB French_Belgium.1252
159 FRC French_Canada.1252
160 FRL French_Luxembourg.1252
161 FRM French_Principality of Monaco.1252
162 FRS French_Switzerland.1252
163 FYN Frisian_Netherlands.1252
164 GLC Galician_Spain.1252
165 HEB Hebrew_Israel.1255
166 HRB Croatian_Bosnia and Herzegovina.1250
167 HRV Croatian_Croatia.1250
168 HUN Hungarian_Hungary.1250
169 IND Indonesian_Indonesia.1252
170 IRE Irish_Ireland.1252
171 ISL Icelandic_Iceland.1252
172 ITA Italian_Italy.1252
173 ITS Italian_Switzerland.1252
174 IUK Inuktitut (Latin)_Canada.1252
175 JPN Japanese_Japan.932
176 KKZ Kazakh_Kazakhstan.1251
178 KYR Kyrgyz_Kyrgyzstan.1251
179 LBX Luxembourgish_Luxembourg.1252
180 LTH Lithuanian_Lithuania.1257
181 LVI Latvian_Latvia.1257
182 MKI FYRO Macedonian_Former Yugoslav Republic of Macedonia.1251
183 MON Mongolian_Mongolia.1251
184 MPD Mapudungun_Chile.1252
185 MSB Malay_Brunei Darussalam.1252
186 MSL Malay_Malaysia.1252
187 MWK Mohawk_Canada.1252
188 NLB Dutch_Belgium.1252
189 NLD Dutch_Netherlands.1252
190 NON Norwegian-Nynorsk_Norway.1252
191 NOR Norwegian (Bokmål)_Norway.1252
192 NSO Northern Sotho_South Africa.1252
193 PLK Polish_Poland.1250
194 PTB Portuguese_Brazil.1252
195 PTG Portuguese_Portugal.1252
196 QUB Quechua_Bolivia.1252
197 QUE Quechua_Ecuador.1252
198 QUP Quechua_Peru.1252
199 RMC Romansh_Switzerland.1252
200 ROM Romanian_Romania.1250
201 RUS Russian_Russia.1251
202 SKY Slovak_Slovakia.1250
203 SLV Slovenian_Slovenia.1250
204 SMA Sami (Southern)_Norway.1252
205 SMB Sami (Southern)_Sweden.1252
206 SME Sami (Northern)_Norway.1252
207 SMF Sami (Northern)_Sweden.1252
208 SMG Sami (Northern)_Finland.1252
209 SMJ Sami (Lule)_Norway.1252
210 SMK Sami (Lule)_Sweden.1252
211 SMN Sami (Inari)_Finland.1252
212 SMS Sami (Skolt)_Finland.1252
213 SQI Albanian_Albania.1250
214 SRB Serbian (Cyrillic)_Serbia and Montenegro.1251
215 SRL Serbian (Latin)_Serbia and Montenegro.1250
216 SRN Serbian (Cyrillic)_Bosnia and Herzegovina.1251
217 SRS Serbian (Latin)_Bosnia and Herzegovina.1250
218 SVE Swedish_Sweden.1252
219 SVF Swedish_Finland.1252
220 SWK Swahili_Kenya.1252
221 THA Thai_Thailand.874
222 TRK Turkish_Turkey.1254
223 TSN Tswana_South Africa.1252
224 TTT Tatar_Russia.1251
225 UKR Ukrainian_Ukraine.1251
226 URD Urdu_Islamic Republic of Pakistan.1256
227 USA English_United States.1252
228 UZB Uzbek (Latin)_Uzbekistan.1254
229 VIT Vietnamese_Viet Nam.1258
230 XHO Xhosa_South Africa.1252
231 ZHH Chinese_Hong Kong S.A.R..950
232 ZHI Chinese_Singapore.936
233 ZHM Chinese_Macau S.A.R..950
234 ZUL Zulu_South Africa.1252
237 /* Table from ISO 639 language code, optionally with country or script suffix,
239 Keep in sync with the gl_locale_name_from_win32_LANGID function in
246 static const struct table_entry language_table
[] =
248 { "af", "Afrikaans" },
251 { "arn", "Mapudungun" },
252 { "as", "Assamese" },
253 { "az@cyrillic", "Azeri (Cyrillic)" },
254 { "az@latin", "Azeri (Latin)" },
256 { "be", "Belarusian" },
257 { "ber", "Tamazight" },
258 { "ber@arabic", "Tamazight (Arabic)" },
259 { "ber@latin", "Tamazight (Latin)" },
260 { "bg", "Bulgarian" },
263 { "bn_BD", "Bengali (Bangladesh)" },
264 { "bn_IN", "Bengali (India)" },
268 { "bs", "BSB" }, /* "Bosnian (Latin)" */
269 { "bs@cyrillic", "BSC" }, /* Bosnian (Cyrillic) */
271 { "chr", "Cherokee" },
272 { "co", "Corsican" },
273 { "cpe", "Hawaiian" },
278 { "dsb", "Lower Sorbian" },
283 { "et", "Estonian" },
286 { "ff", "Fulfulde" },
288 { "fo", "Faroese" }, /* "Faeroese" does not work */
291 { "ga", "IRE" }, /* Gaelic (Ireland) */
292 { "gd", "Gaelic (Scotland)" },
293 { "gd", "Scottish Gaelic" },
294 { "gl", "Galician" },
296 { "gsw", "Alsatian" },
297 { "gu", "Gujarati" },
301 { "hr", "Croatian" },
302 { "hsb", "Upper Sorbian" },
303 { "hu", "Hungarian" },
304 { "hy", "Armenian" },
305 { "id", "Indonesian" },
308 { "is", "Icelandic" },
310 { "iu", "IUK" }, /* Inuktitut */
311 { "ja", "Japanese" },
312 { "ka", "Georgian" },
314 { "kl", "Greenlandic" },
315 { "km", "Cambodian" },
319 { "kok", "Konkani" },
321 { "ks", "Kashmiri" },
322 { "ks_IN", "Kashmiri_India" },
323 { "ks_PK", "Kashmiri (Arabic)_Pakistan" },
326 { "lb", "Luxembourgish" },
328 { "lt", "Lithuanian" },
331 { "mk", "FYRO Macedonian" },
332 { "mk", "Macedonian" },
333 { "ml", "Malayalam" },
334 { "mn", "Mongolian" },
335 { "mni", "Manipuri" },
341 { "nb", "NOR" }, /* Norwegian Bokmål */
345 { "nn", "NON" }, /* Norwegian Nynorsk */
346 { "no", "Norwegian" },
347 { "nso", "Northern Sotho" },
353 { "pap", "Papiamentu" },
357 { "pt", "Portuguese" },
359 { "qut", "K'iche'" },
361 { "ro", "Romanian" },
363 { "rw", "Kinyarwanda" },
364 { "sa", "Sanskrit" },
367 { "se", "Sami (Northern)" },
368 { "se", "Northern Sami" },
369 { "si", "Sinhalese" },
371 { "sl", "Slovenian" },
372 { "sma", "Sami (Southern)" },
373 { "sma", "Southern Sami" },
374 { "smj", "Sami (Lule)" },
375 { "smj", "Lule Sami" },
376 { "smn", "Sami (Inari)" },
377 { "smn", "Inari Sami" },
378 { "sms", "Sami (Skolt)" },
379 { "sms", "Skolt Sami" },
381 { "sq", "Albanian" },
382 { "sr", "Serbian (Latin)" },
383 { "sr@cyrillic", "SRB" }, /* Serbian (Cyrillic) */
391 { "ti", "Tigrinya" },
393 { "tl", "Filipino" },
399 { "uk", "Ukrainian" },
402 { "uz", "Uzbek (Latin)" },
403 { "uz@cyrillic", "Uzbek (Cyrillic)" },
405 { "vi", "Vietnamese" },
406 { "wen", "Sorbian" },
415 /* Table from ISO 3166 country code to English name.
416 Keep in sync with the gl_locale_name_from_win32_LANGID function in
418 static const struct table_entry country_table
[] =
421 { "AF", "Afghanistan" },
424 { "AN", "Netherlands Antilles" },
425 { "AR", "Argentina" },
427 { "AU", "Australia" },
428 { "AZ", "Azerbaijan" },
429 { "BA", "Bosnia and Herzegovina" },
430 { "BD", "Bangladesh" },
432 { "BG", "Bulgaria" },
434 { "BN", "Brunei Darussalam" },
442 { "CH", "Switzerland" },
443 { "CI", "Cote d'Ivoire" },
445 { "CM", "Cameroon" },
446 { "CN", "People's Republic of China" },
447 { "CO", "Colombia" },
448 { "CR", "Costa Rica" },
449 { "CS", "Serbia and Montenegro" },
450 { "CZ", "Czech Republic" },
453 { "DO", "Dominican Republic" },
460 { "ET", "Ethiopia" },
462 { "FO", "Faroe Islands" },
464 { "GB", "United Kingdom" },
465 { "GD", "Caribbean" },
467 { "GL", "Greenland" },
469 { "GT", "Guatemala" },
470 { "HK", "Hong Kong" },
471 { "HK", "Hong Kong S.A.R." },
472 { "HN", "Honduras" },
476 { "ID", "Indonesia" },
488 { "KG", "Kyrgyzstan" },
489 { "KH", "Cambodia" },
490 { "KR", "South Korea" },
492 { "KZ", "Kazakhstan" },
495 { "LI", "Liechtenstein" },
496 { "LK", "Sri Lanka" },
497 { "LT", "Lithuania" },
498 { "LU", "Luxembourg" },
502 { "MC", "Principality of Monaco" },
505 { "ME", "Montenegro" },
506 { "MK", "Former Yugoslav Republic of Macedonia" },
509 { "MN", "Mongolia" },
510 { "MO", "Macau S.A.R." },
512 { "MV", "Maldives" },
514 { "MY", "Malaysia" },
516 { "NI", "Nicaragua" },
517 { "NL", "Netherlands" },
520 { "NZ", "New Zealand" },
524 { "PH", "Philippines" },
525 { "PK", "Islamic Republic of Pakistan" },
527 { "PR", "Puerto Rico" },
528 { "PT", "Portugal" },
529 { "PY", "Paraguay" },
536 { "SA", "Saudi Arabia" },
538 { "SG", "Singapore" },
539 { "SI", "Slovenia" },
543 { "SR", "Suriname" },
544 { "SV", "El Salvador" },
546 { "TH", "Thailand" },
547 { "TJ", "Tajikistan" },
548 { "TM", "Turkmenistan" },
551 { "TT", "Trinidad and Tobago" },
553 { "TZ", "Tanzania" },
555 { "US", "United States" },
558 { "VE", "Venezuela" },
559 { "VN", "Viet Nam" },
561 { "ZA", "South Africa" },
565 /* Given a string STRING, find the set of indices i such that TABLE[i].code is
566 the given STRING. It is a range [lo,hi-1]. */
567 typedef struct { size_t lo
; size_t hi
; } range_t
;
569 search (const struct table_entry
*table
, size_t table_size
, const char *string
,
572 /* The table is sorted. Perform a binary search. */
573 size_t hi
= table_size
;
578 for i < lo, strcmp (table[i].code, string) < 0,
579 for i >= hi, strcmp (table[i].code, string) > 0. */
580 size_t mid
= (hi
+ lo
) >> 1; /* >= lo, < hi */
581 int cmp
= strcmp (table
[mid
].code
, string
);
589 strcmp (language_table[i].code, string) == 0.
590 Find the entire interval of such i. */
594 for (i
= mid
; i
> lo
; )
597 if (strcmp (table
[i
].code
, string
) < 0)
607 for (i
= mid
; i
< hi
; i
++)
609 if (strcmp (table
[i
].code
, string
) > 0)
617 strcmp (language_table[i].code, string) == 0
618 is the interval [lo, hi-1]. */
626 /* Like setlocale, but accept also locale names in the form ll or ll_CC,
627 where ll is an ISO 639 language code and CC is an ISO 3166 country code. */
629 setlocale_unixlike (int category
, const char *locale
)
636 /* The native Windows implementation of setlocale understands the special
637 locale name "C", but not "POSIX". Therefore map "POSIX" to "C". */
638 #if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__
639 if (locale
!= NULL
&& strcmp (locale
, "POSIX") == 0)
643 /* First, try setlocale with the original argument unchanged. */
644 result
= setlocale (category
, locale
);
648 /* Otherwise, assume the argument is in the form
649 language[_territory][.codeset][@modifier]
650 and try to map it using the tables. */
651 if (strlen (locale
) < sizeof (llCC_buf
))
653 /* Second try: Remove the codeset part. */
655 const char *p
= locale
;
658 /* Copy the part before the dot. */
659 for (; *p
!= '\0' && *p
!= '.'; p
++, q
++)
662 /* Skip the part up to the '@', if any. */
663 for (; *p
!= '\0' && *p
!= '@'; p
++)
665 /* Copy the part starting with '@', if any. */
666 for (; *p
!= '\0'; p
++, q
++)
670 /* llCC_buf now contains
671 language[_territory][@modifier]
673 if (strcmp (llCC_buf
, locale
) != 0)
675 result
= setlocale (category
, llCC_buf
);
679 /* Look it up in language_table. */
684 search (language_table
,
685 sizeof (language_table
) / sizeof (language_table
[0]),
689 for (i
= range
.lo
; i
< range
.hi
; i
++)
691 /* Try the replacement in language_table[i]. */
692 result
= setlocale (category
, language_table
[i
].english
);
697 /* Split language[_territory][@modifier]
698 into ll_buf = language[@modifier]
699 and CC_buf = territory
702 const char *underscore
= strchr (llCC_buf
, '_');
703 if (underscore
!= NULL
)
705 const char *territory_start
= underscore
+ 1;
706 const char *territory_end
= strchr (territory_start
, '@');
707 if (territory_end
== NULL
)
708 territory_end
= territory_start
+ strlen (territory_start
);
710 memcpy (ll_buf
, llCC_buf
, underscore
- llCC_buf
);
711 strcpy (ll_buf
+ (underscore
- llCC_buf
), territory_end
);
713 memcpy (CC_buf
, territory_start
, territory_end
- territory_start
);
714 CC_buf
[territory_end
- territory_start
] = '\0';
717 /* Look up ll_buf in language_table
718 and CC_buf in country_table. */
719 range_t language_range
;
721 search (language_table
,
722 sizeof (language_table
) / sizeof (language_table
[0]),
725 if (language_range
.lo
< language_range
.hi
)
727 range_t country_range
;
729 search (country_table
,
730 sizeof (country_table
) / sizeof (country_table
[0]),
733 if (country_range
.lo
< country_range
.hi
)
738 for (i
= language_range
.lo
; i
< language_range
.hi
; i
++)
739 for (j
= country_range
.lo
; j
< country_range
.hi
; j
++)
741 /* Concatenate the replacements. */
742 const char *part1
= language_table
[i
].english
;
743 size_t part1_len
= strlen (part1
);
744 const char *part2
= country_table
[j
].english
;
745 size_t part2_len
= strlen (part2
) + 1;
748 if (!(part1_len
+ 1 + part2_len
<= sizeof (buf
)))
750 memcpy (buf
, part1
, part1_len
);
751 buf
[part1_len
] = '_';
752 memcpy (buf
+ part1_len
+ 1, part2
, part2_len
);
754 /* Try the concatenated replacements. */
755 result
= setlocale (category
, buf
);
761 /* Try omitting the country entirely. This may set a locale
762 corresponding to the wrong country, but is better than
767 for (i
= language_range
.lo
; i
< language_range
.hi
; i
++)
769 /* Try only the language replacement. */
771 setlocale (category
, language_table
[i
].english
);
787 # define setlocale_unixlike setlocale
790 # if LC_MESSAGES == 1729
792 /* The system does not store an LC_MESSAGES locale category. Do it here. */
793 static char lc_messages_name
[64] = "C";
795 /* Like setlocale, but support also LC_MESSAGES. */
797 setlocale_single (int category
, const char *locale
)
799 if (category
== LC_MESSAGES
)
803 lc_messages_name
[sizeof (lc_messages_name
) - 1] = '\0';
804 strncpy (lc_messages_name
, locale
, sizeof (lc_messages_name
) - 1);
806 return lc_messages_name
;
809 return setlocale_unixlike (category
, locale
);
813 # define setlocale_single setlocale_unixlike
817 rpl_setlocale (int category
, const char *locale
)
819 if (locale
!= NULL
&& locale
[0] == '\0')
821 /* A request to the set the current locale to the default locale. */
822 if (category
== LC_ALL
)
824 /* Set LC_CTYPE first. Then the other categories. */
825 static int const categories
[] =
834 const char *base_name
;
837 /* Back up the old locale, in case one of the steps fails. */
838 saved_locale
= setlocale (LC_ALL
, NULL
);
839 if (saved_locale
== NULL
)
841 saved_locale
= strdup (saved_locale
);
842 if (saved_locale
== NULL
)
845 /* Set LC_CTYPE category. Set all other categories (except possibly
846 LC_MESSAGES) to the same value in the same call; this is likely to
849 gl_locale_name_environ (LC_CTYPE
, category_to_name (LC_CTYPE
));
850 if (base_name
== NULL
)
851 base_name
= gl_locale_name_default ();
853 if (setlocale_unixlike (LC_ALL
, base_name
) == NULL
)
855 # if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
856 /* On native Windows, setlocale(LC_ALL,...) may succeed but set the
857 LC_CTYPE category to an invalid value ("C") when it does not
858 support the specified encoding. Report a failure instead. */
859 if (strchr (base_name
, '.') != NULL
860 && strcmp (setlocale (LC_CTYPE
, NULL
), "C") == 0)
864 for (i
= 0; i
< sizeof (categories
) / sizeof (categories
[0]); i
++)
866 int cat
= categories
[i
];
869 name
= gl_locale_name_environ (cat
, category_to_name (cat
));
871 name
= gl_locale_name_default ();
873 /* If name is the same as base_name, it has already been set
874 through the setlocale call before the loop. */
875 if (strcmp (name
, base_name
) != 0
876 # if LC_MESSAGES == 1729
877 || cat
== LC_MESSAGES
880 if (setlocale_single (cat
, name
) == NULL
)
884 /* All steps were successful. */
886 return setlocale (LC_ALL
, NULL
);
889 if (saved_locale
[0] != '\0') /* don't risk an endless recursion */
890 setlocale (LC_ALL
, saved_locale
);
897 gl_locale_name_environ (category
, category_to_name (category
));
899 name
= gl_locale_name_default ();
901 return setlocale_single (category
, name
);
906 # if (defined _WIN32 || defined __WIN32__) && ! defined __CYGWIN__
907 if (category
== LC_ALL
&& locale
!= NULL
&& strchr (locale
, '.') != NULL
)
911 /* Back up the old locale. */
912 saved_locale
= setlocale (LC_ALL
, NULL
);
913 if (saved_locale
== NULL
)
915 saved_locale
= strdup (saved_locale
);
916 if (saved_locale
== NULL
)
919 if (setlocale_unixlike (LC_ALL
, locale
) == NULL
)
925 /* On native Windows, setlocale(LC_ALL,...) may succeed but set the
926 LC_CTYPE category to an invalid value ("C") when it does not
927 support the specified encoding. Report a failure instead. */
928 if (strcmp (setlocale (LC_CTYPE
, NULL
), "C") == 0)
930 if (saved_locale
[0] != '\0') /* don't risk an endless recursion */
931 setlocale (LC_ALL
, saved_locale
);
936 /* It was really successful. */
938 return setlocale (LC_ALL
, NULL
);
942 return setlocale_single (category
, locale
);