Bug 1849098 - Disable browser_searchMode_sessionStore.js for frequent failures. r...
[gecko.git] / intl / locale / nsLanguageAtomService.cpp
blob4a9d9b92d28918683a5fcb5a9bbe797842457049
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "nsLanguageAtomService.h"
7 #include "nsUConvPropertySearch.h"
8 #include "nsUnicharUtils.h"
9 #include "nsAtom.h"
10 #include "nsGkAtoms.h"
11 #include "mozilla/ArrayUtils.h"
12 #include "mozilla/ClearOnShutdown.h"
13 #include "mozilla/Encoding.h"
14 #include "mozilla/intl/Locale.h"
15 #include "mozilla/intl/OSPreferences.h"
16 #include "mozilla/ServoBindings.h"
17 #include "mozilla/ServoUtils.h"
19 using namespace mozilla;
20 using mozilla::intl::OSPreferences;
22 static constexpr nsUConvProp encodingsGroups[] = {
23 #include "encodingsgroups.properties.h"
26 // List of mozilla internal x-* tags that map to themselves (see bug 256257)
27 static constexpr nsStaticAtom* kLangGroups[] = {
28 // This list must be sorted!
29 nsGkAtoms::x_armn, nsGkAtoms::x_cyrillic, nsGkAtoms::x_devanagari,
30 nsGkAtoms::x_geor, nsGkAtoms::x_math, nsGkAtoms::x_tamil,
31 nsGkAtoms::Unicode, nsGkAtoms::x_western
32 // These self-mappings are not necessary unless somebody use them to specify
33 // lang in (X)HTML/XML documents, which they shouldn't. (see bug 256257)
34 // x-beng=x-beng
35 // x-cans=x-cans
36 // x-ethi=x-ethi
37 // x-guru=x-guru
38 // x-gujr=x-gujr
39 // x-khmr=x-khmr
40 // x-mlym=x-mlym
43 // Map ISO 15924 script codes from BCP47 lang tag to mozilla's langGroups.
44 static constexpr struct {
45 const char* mTag;
46 nsStaticAtom* mAtom;
47 } kScriptLangGroup[] = {
48 // This list must be sorted by script code!
49 {"Arab", nsGkAtoms::ar},
50 {"Armn", nsGkAtoms::x_armn},
51 {"Beng", nsGkAtoms::x_beng},
52 {"Cans", nsGkAtoms::x_cans},
53 {"Cyrl", nsGkAtoms::x_cyrillic},
54 {"Deva", nsGkAtoms::x_devanagari},
55 {"Ethi", nsGkAtoms::x_ethi},
56 {"Geok", nsGkAtoms::x_geor},
57 {"Geor", nsGkAtoms::x_geor},
58 {"Grek", nsGkAtoms::el},
59 {"Gujr", nsGkAtoms::x_gujr},
60 {"Guru", nsGkAtoms::x_guru},
61 {"Hang", nsGkAtoms::ko},
62 // Hani is not mapped to a specific langGroup, we prefer to look at the
63 // primary language subtag in this case
64 {"Hans", nsGkAtoms::Chinese},
65 // Hant is special-cased in code
66 // Hant=zh-HK
67 // Hant=zh-TW
68 {"Hebr", nsGkAtoms::he},
69 {"Hira", nsGkAtoms::Japanese},
70 {"Jpan", nsGkAtoms::Japanese},
71 {"Kana", nsGkAtoms::Japanese},
72 {"Khmr", nsGkAtoms::x_khmr},
73 {"Knda", nsGkAtoms::x_knda},
74 {"Kore", nsGkAtoms::ko},
75 {"Latn", nsGkAtoms::x_western},
76 {"Mlym", nsGkAtoms::x_mlym},
77 {"Orya", nsGkAtoms::x_orya},
78 {"Sinh", nsGkAtoms::x_sinh},
79 {"Taml", nsGkAtoms::x_tamil},
80 {"Telu", nsGkAtoms::x_telu},
81 {"Thai", nsGkAtoms::th},
82 {"Tibt", nsGkAtoms::x_tibt}};
84 static UniquePtr<nsLanguageAtomService> gLangAtomService;
86 // static
87 nsLanguageAtomService* nsLanguageAtomService::GetService() {
88 if (!gLangAtomService) {
89 gLangAtomService = MakeUnique<nsLanguageAtomService>();
91 return gLangAtomService.get();
94 // static
95 void nsLanguageAtomService::Shutdown() { gLangAtomService = nullptr; }
97 nsStaticAtom* nsLanguageAtomService::LookupLanguage(
98 const nsACString& aLanguage) {
99 nsAutoCString lowered(aLanguage);
100 ToLowerCase(lowered);
102 RefPtr<nsAtom> lang = NS_Atomize(lowered);
103 return GetLanguageGroup(lang);
106 already_AddRefed<nsAtom> nsLanguageAtomService::LookupCharSet(
107 NotNull<const Encoding*> aEncoding) {
108 nsAutoCString charset;
109 aEncoding->Name(charset);
110 nsAutoCString group;
111 if (NS_FAILED(nsUConvPropertySearch::SearchPropertyValue(
112 encodingsGroups, ArrayLength(encodingsGroups), charset, group))) {
113 return RefPtr<nsAtom>(nsGkAtoms::Unicode).forget();
115 return NS_Atomize(group);
118 nsAtom* nsLanguageAtomService::GetLocaleLanguage() {
119 do {
120 if (!mLocaleLanguage) {
121 AutoTArray<nsCString, 10> regionalPrefsLocales;
122 if (NS_SUCCEEDED(OSPreferences::GetInstance()->GetRegionalPrefsLocales(
123 regionalPrefsLocales))) {
124 // use lowercase for all language atoms
125 ToLowerCase(regionalPrefsLocales[0]);
126 mLocaleLanguage = NS_Atomize(regionalPrefsLocales[0]);
127 } else {
128 nsAutoCString locale;
129 OSPreferences::GetInstance()->GetSystemLocale(locale);
131 ToLowerCase(locale); // use lowercase for all language atoms
132 mLocaleLanguage = NS_Atomize(locale);
135 } while (0);
137 return mLocaleLanguage;
140 nsStaticAtom* nsLanguageAtomService::GetLanguageGroup(nsAtom* aLanguage,
141 bool* aNeedsToCache) {
142 if (aNeedsToCache) {
143 if (nsStaticAtom* atom = mLangToGroup.Get(aLanguage)) {
144 return atom;
146 *aNeedsToCache = true;
147 return nullptr;
150 return mLangToGroup.LookupOrInsertWith(aLanguage, [&] {
151 AssertIsMainThreadOrServoFontMetricsLocked();
152 return GetUncachedLanguageGroup(aLanguage);
156 nsStaticAtom* nsLanguageAtomService::GetUncachedLanguageGroup(
157 nsAtom* aLanguage) const {
158 nsAutoCString langStr;
159 aLanguage->ToUTF8String(langStr);
160 ToLowerCase(langStr);
162 if (langStr[0] == 'x' && langStr[1] == '-') {
163 // Internal x-* langGroup codes map to themselves (see bug 256257)
164 for (nsStaticAtom* langGroup : kLangGroups) {
165 if (langGroup == aLanguage) {
166 return langGroup;
168 if (aLanguage->IsAsciiLowercase()) {
169 continue;
171 // Do the slow ascii-case-insensitive comparison just if needed.
172 nsDependentAtomString string(langGroup);
173 if (string.EqualsASCII(langStr.get(), langStr.Length())) {
174 return langGroup;
177 } else {
178 // If the lang code can be parsed as BCP47, look up its (likely) script.
180 // https://bugzilla.mozilla.org/show_bug.cgi?id=1618034:
181 // First strip any private subtags that would cause Locale to reject the
182 // tag as non-wellformed.
183 nsACString::const_iterator start, end;
184 langStr.BeginReading(start);
185 langStr.EndReading(end);
186 if (FindInReadable("-x-"_ns, start, end)) {
187 // The substring we want ends at the beginning of the "-x-" subtag.
188 langStr.Truncate(start.get() - langStr.BeginReading());
191 intl::Locale loc;
192 auto result = intl::LocaleParser::TryParse(langStr, loc);
193 if (!result.isOk()) {
194 // Did the author (wrongly) use '_' instead of '-' to separate subtags?
195 // If so, fix it up and re-try parsing.
196 if (langStr.Contains('_')) {
197 langStr.ReplaceChar('_', '-');
199 // Throw away the partially parsed locale and re-start parsing.
200 loc = {};
201 result = intl::LocaleParser::TryParse(langStr, loc);
204 if (result.isOk() && loc.Canonicalize().isOk()) {
205 // Fill in script subtag if not present.
206 if (loc.Script().Missing()) {
207 if (loc.AddLikelySubtags().isErr()) {
208 // Fall back to x-unicode if no match was found
209 return nsGkAtoms::Unicode;
212 // Traditional Chinese has separate prefs for Hong Kong / Taiwan;
213 // check the region subtag.
214 if (loc.Script().EqualTo("Hant")) {
215 if (loc.Region().EqualTo("HK")) {
216 return nsGkAtoms::HongKongChinese;
218 return nsGkAtoms::Taiwanese;
220 // Search list of known script subtags that map to langGroup codes.
221 size_t foundIndex;
222 Span<const char> scriptAsSpan = loc.Script().Span();
223 nsDependentCSubstring script(scriptAsSpan.data(), scriptAsSpan.size());
224 if (BinarySearchIf(
225 kScriptLangGroup, 0, ArrayLength(kScriptLangGroup),
226 [script](const auto& entry) -> int {
227 return Compare(script, nsDependentCString(entry.mTag));
229 &foundIndex)) {
230 return kScriptLangGroup[foundIndex].mAtom;
232 // Script subtag was not recognized (includes "Hani"); check the language
233 // subtag for CJK possibilities so that we'll prefer the appropriate font
234 // rather than falling back to the browser's hardcoded preference.
235 if (loc.Language().EqualTo("zh")) {
236 if (loc.Region().EqualTo("HK")) {
237 return nsGkAtoms::HongKongChinese;
239 if (loc.Region().EqualTo("TW")) {
240 return nsGkAtoms::Taiwanese;
242 return nsGkAtoms::Chinese;
244 if (loc.Language().EqualTo("ja")) {
245 return nsGkAtoms::Japanese;
247 if (loc.Language().EqualTo("ko")) {
248 return nsGkAtoms::ko;
253 // Fall back to x-unicode if no match was found
254 return nsGkAtoms::Unicode;