Backed out 2 changesets (bug 903746) for causing non-unified build bustages on nsIPri...
[gecko.git] / intl / locale / nsLanguageAtomService.cpp
blob6c57fb8743bc1e5869bbf45c0851a5239dca67ff
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "nsLanguageAtomService.h"
7 #include "nsUConvPropertySearch.h"
8 #include "nsUnicharUtils.h"
9 #include "nsAtom.h"
10 #include "nsGkAtoms.h"
11 #include "mozilla/ArrayUtils.h"
12 #include "mozilla/ClearOnShutdown.h"
13 #include "mozilla/Encoding.h"
14 #include "mozilla/intl/Locale.h"
15 #include "mozilla/intl/OSPreferences.h"
16 #include "mozilla/ServoBindings.h"
17 #include "mozilla/ServoUtils.h"
18 #include "mozilla/StaticPtr.h"
20 using namespace mozilla;
21 using mozilla::intl::OSPreferences;
23 static constexpr nsUConvProp encodingsGroups[] = {
24 #include "encodingsgroups.properties.h"
27 // List of mozilla internal x-* tags that map to themselves (see bug 256257)
28 static constexpr nsStaticAtom* kLangGroups[] = {
29 // This list must be sorted!
30 nsGkAtoms::x_armn, nsGkAtoms::x_cyrillic, nsGkAtoms::x_devanagari,
31 nsGkAtoms::x_geor, nsGkAtoms::x_math, nsGkAtoms::x_tamil,
32 nsGkAtoms::Unicode, nsGkAtoms::x_western
33 // These self-mappings are not necessary unless somebody use them to specify
34 // lang in (X)HTML/XML documents, which they shouldn't. (see bug 256257)
35 // x-beng=x-beng
36 // x-cans=x-cans
37 // x-ethi=x-ethi
38 // x-guru=x-guru
39 // x-gujr=x-gujr
40 // x-khmr=x-khmr
41 // x-mlym=x-mlym
44 // Map ISO 15924 script codes from BCP47 lang tag to mozilla's langGroups.
45 static constexpr struct {
46 const char* mTag;
47 nsStaticAtom* mAtom;
48 } kScriptLangGroup[] = {
49 // This list must be sorted by script code!
50 {"Arab", nsGkAtoms::ar},
51 {"Armn", nsGkAtoms::x_armn},
52 {"Beng", nsGkAtoms::x_beng},
53 {"Cans", nsGkAtoms::x_cans},
54 {"Cyrl", nsGkAtoms::x_cyrillic},
55 {"Deva", nsGkAtoms::x_devanagari},
56 {"Ethi", nsGkAtoms::x_ethi},
57 {"Geok", nsGkAtoms::x_geor},
58 {"Geor", nsGkAtoms::x_geor},
59 {"Grek", nsGkAtoms::el},
60 {"Gujr", nsGkAtoms::x_gujr},
61 {"Guru", nsGkAtoms::x_guru},
62 {"Hang", nsGkAtoms::ko},
63 // Hani is not mapped to a specific langGroup, we prefer to look at the
64 // primary language subtag in this case
65 {"Hans", nsGkAtoms::Chinese},
66 // Hant is special-cased in code
67 // Hant=zh-HK
68 // Hant=zh-TW
69 {"Hebr", nsGkAtoms::he},
70 {"Hira", nsGkAtoms::Japanese},
71 {"Jpan", nsGkAtoms::Japanese},
72 {"Kana", nsGkAtoms::Japanese},
73 {"Khmr", nsGkAtoms::x_khmr},
74 {"Knda", nsGkAtoms::x_knda},
75 {"Kore", nsGkAtoms::ko},
76 {"Latn", nsGkAtoms::x_western},
77 {"Mlym", nsGkAtoms::x_mlym},
78 {"Orya", nsGkAtoms::x_orya},
79 {"Sinh", nsGkAtoms::x_sinh},
80 {"Taml", nsGkAtoms::x_tamil},
81 {"Telu", nsGkAtoms::x_telu},
82 {"Thai", nsGkAtoms::th},
83 {"Tibt", nsGkAtoms::x_tibt}};
85 static StaticAutoPtr<nsLanguageAtomService> gLangAtomService;
87 // static
88 nsLanguageAtomService* nsLanguageAtomService::GetService() {
89 if (!gLangAtomService) {
90 gLangAtomService = new nsLanguageAtomService();
92 return gLangAtomService.get();
95 // static
96 void nsLanguageAtomService::Shutdown() { gLangAtomService = nullptr; }
98 nsStaticAtom* nsLanguageAtomService::LookupLanguage(
99 const nsACString& aLanguage) {
100 nsAutoCString lowered(aLanguage);
101 ToLowerCase(lowered);
103 RefPtr<nsAtom> lang = NS_Atomize(lowered);
104 return GetLanguageGroup(lang);
107 already_AddRefed<nsAtom> nsLanguageAtomService::LookupCharSet(
108 NotNull<const Encoding*> aEncoding) {
109 nsAutoCString charset;
110 aEncoding->Name(charset);
111 nsAutoCString group;
112 if (NS_FAILED(nsUConvPropertySearch::SearchPropertyValue(
113 encodingsGroups, ArrayLength(encodingsGroups), charset, group))) {
114 return RefPtr<nsAtom>(nsGkAtoms::Unicode).forget();
116 return NS_Atomize(group);
119 nsAtom* nsLanguageAtomService::GetLocaleLanguage() {
120 do {
121 if (!mLocaleLanguage) {
122 AutoTArray<nsCString, 10> regionalPrefsLocales;
123 if (NS_SUCCEEDED(OSPreferences::GetInstance()->GetRegionalPrefsLocales(
124 regionalPrefsLocales))) {
125 // use lowercase for all language atoms
126 ToLowerCase(regionalPrefsLocales[0]);
127 mLocaleLanguage = NS_Atomize(regionalPrefsLocales[0]);
128 } else {
129 nsAutoCString locale;
130 OSPreferences::GetInstance()->GetSystemLocale(locale);
132 ToLowerCase(locale); // use lowercase for all language atoms
133 mLocaleLanguage = NS_Atomize(locale);
136 } while (0);
138 return mLocaleLanguage;
141 nsStaticAtom* nsLanguageAtomService::GetLanguageGroup(nsAtom* aLanguage,
142 bool* aNeedsToCache) {
143 if (aNeedsToCache) {
144 if (nsStaticAtom* atom = mLangToGroup.Get(aLanguage)) {
145 return atom;
147 *aNeedsToCache = true;
148 return nullptr;
151 return mLangToGroup.LookupOrInsertWith(aLanguage, [&] {
152 AssertIsMainThreadOrServoFontMetricsLocked();
153 return GetUncachedLanguageGroup(aLanguage);
157 nsStaticAtom* nsLanguageAtomService::GetUncachedLanguageGroup(
158 nsAtom* aLanguage) const {
159 nsAutoCString langStr;
160 aLanguage->ToUTF8String(langStr);
161 ToLowerCase(langStr);
163 if (langStr[0] == 'x' && langStr[1] == '-') {
164 // Internal x-* langGroup codes map to themselves (see bug 256257)
165 for (nsStaticAtom* langGroup : kLangGroups) {
166 if (langGroup == aLanguage) {
167 return langGroup;
169 if (aLanguage->IsAsciiLowercase()) {
170 continue;
172 // Do the slow ascii-case-insensitive comparison just if needed.
173 nsDependentAtomString string(langGroup);
174 if (string.EqualsASCII(langStr.get(), langStr.Length())) {
175 return langGroup;
178 } else {
179 // If the lang code can be parsed as BCP47, look up its (likely) script.
181 // https://bugzilla.mozilla.org/show_bug.cgi?id=1618034:
182 // First strip any private subtags that would cause Locale to reject the
183 // tag as non-wellformed.
184 nsACString::const_iterator start, end;
185 langStr.BeginReading(start);
186 langStr.EndReading(end);
187 if (FindInReadable("-x-"_ns, start, end)) {
188 // The substring we want ends at the beginning of the "-x-" subtag.
189 langStr.Truncate(start.get() - langStr.BeginReading());
192 intl::Locale loc;
193 auto result = intl::LocaleParser::TryParse(langStr, loc);
194 if (!result.isOk()) {
195 // Did the author (wrongly) use '_' instead of '-' to separate subtags?
196 // If so, fix it up and re-try parsing.
197 if (langStr.Contains('_')) {
198 langStr.ReplaceChar('_', '-');
200 // Throw away the partially parsed locale and re-start parsing.
201 loc = {};
202 result = intl::LocaleParser::TryParse(langStr, loc);
205 if (result.isOk() && loc.Canonicalize().isOk()) {
206 // Fill in script subtag if not present.
207 if (loc.Script().Missing()) {
208 if (loc.AddLikelySubtags().isErr()) {
209 // Fall back to x-unicode if no match was found
210 return nsGkAtoms::Unicode;
213 // Traditional Chinese has separate prefs for Hong Kong / Taiwan;
214 // check the region subtag.
215 if (loc.Script().EqualTo("Hant")) {
216 if (loc.Region().EqualTo("HK")) {
217 return nsGkAtoms::HongKongChinese;
219 return nsGkAtoms::Taiwanese;
221 // Search list of known script subtags that map to langGroup codes.
222 size_t foundIndex;
223 Span<const char> scriptAsSpan = loc.Script().Span();
224 nsDependentCSubstring script(scriptAsSpan.data(), scriptAsSpan.size());
225 if (BinarySearchIf(
226 kScriptLangGroup, 0, ArrayLength(kScriptLangGroup),
227 [script](const auto& entry) -> int {
228 return Compare(script, nsDependentCString(entry.mTag));
230 &foundIndex)) {
231 return kScriptLangGroup[foundIndex].mAtom;
233 // Script subtag was not recognized (includes "Hani"); check the language
234 // subtag for CJK possibilities so that we'll prefer the appropriate font
235 // rather than falling back to the browser's hardcoded preference.
236 if (loc.Language().EqualTo("zh")) {
237 if (loc.Region().EqualTo("HK")) {
238 return nsGkAtoms::HongKongChinese;
240 if (loc.Region().EqualTo("TW")) {
241 return nsGkAtoms::Taiwanese;
243 return nsGkAtoms::Chinese;
245 if (loc.Language().EqualTo("ja")) {
246 return nsGkAtoms::Japanese;
248 if (loc.Language().EqualTo("ko")) {
249 return nsGkAtoms::ko;
254 // Fall back to x-unicode if no match was found
255 return nsGkAtoms::Unicode;