1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "nsLanguageAtomService.h"
7 #include "nsUConvPropertySearch.h"
8 #include "nsUnicharUtils.h"
10 #include "nsGkAtoms.h"
11 #include "mozilla/ArrayUtils.h"
12 #include "mozilla/ClearOnShutdown.h"
13 #include "mozilla/Encoding.h"
14 #include "mozilla/intl/Locale.h"
15 #include "mozilla/intl/OSPreferences.h"
16 #include "mozilla/ServoBindings.h"
17 #include "mozilla/ServoUtils.h"
19 using namespace mozilla
;
20 using mozilla::intl::OSPreferences
;
22 static constexpr nsUConvProp encodingsGroups
[] = {
23 #include "encodingsgroups.properties.h"
26 // List of mozilla internal x-* tags that map to themselves (see bug 256257)
27 static constexpr nsStaticAtom
* kLangGroups
[] = {
28 // This list must be sorted!
29 nsGkAtoms::x_armn
, nsGkAtoms::x_cyrillic
, nsGkAtoms::x_devanagari
,
30 nsGkAtoms::x_geor
, nsGkAtoms::x_math
, nsGkAtoms::x_tamil
,
31 nsGkAtoms::Unicode
, nsGkAtoms::x_western
32 // These self-mappings are not necessary unless somebody use them to specify
33 // lang in (X)HTML/XML documents, which they shouldn't. (see bug 256257)
43 // Map ISO 15924 script codes from BCP47 lang tag to mozilla's langGroups.
44 static constexpr struct {
47 } kScriptLangGroup
[] = {
48 // This list must be sorted by script code!
49 {"Arab", nsGkAtoms::ar
},
50 {"Armn", nsGkAtoms::x_armn
},
51 {"Beng", nsGkAtoms::x_beng
},
52 {"Cans", nsGkAtoms::x_cans
},
53 {"Cyrl", nsGkAtoms::x_cyrillic
},
54 {"Deva", nsGkAtoms::x_devanagari
},
55 {"Ethi", nsGkAtoms::x_ethi
},
56 {"Geok", nsGkAtoms::x_geor
},
57 {"Geor", nsGkAtoms::x_geor
},
58 {"Grek", nsGkAtoms::el
},
59 {"Gujr", nsGkAtoms::x_gujr
},
60 {"Guru", nsGkAtoms::x_guru
},
61 {"Hang", nsGkAtoms::ko
},
62 // Hani is not mapped to a specific langGroup, we prefer to look at the
63 // primary language subtag in this case
64 {"Hans", nsGkAtoms::Chinese
},
65 // Hant is special-cased in code
68 {"Hebr", nsGkAtoms::he
},
69 {"Hira", nsGkAtoms::Japanese
},
70 {"Jpan", nsGkAtoms::Japanese
},
71 {"Kana", nsGkAtoms::Japanese
},
72 {"Khmr", nsGkAtoms::x_khmr
},
73 {"Knda", nsGkAtoms::x_knda
},
74 {"Kore", nsGkAtoms::ko
},
75 {"Latn", nsGkAtoms::x_western
},
76 {"Mlym", nsGkAtoms::x_mlym
},
77 {"Orya", nsGkAtoms::x_orya
},
78 {"Sinh", nsGkAtoms::x_sinh
},
79 {"Taml", nsGkAtoms::x_tamil
},
80 {"Telu", nsGkAtoms::x_telu
},
81 {"Thai", nsGkAtoms::th
},
82 {"Tibt", nsGkAtoms::x_tibt
}};
84 static UniquePtr
<nsLanguageAtomService
> gLangAtomService
;
87 nsLanguageAtomService
* nsLanguageAtomService::GetService() {
88 if (!gLangAtomService
) {
89 gLangAtomService
= MakeUnique
<nsLanguageAtomService
>();
91 return gLangAtomService
.get();
95 void nsLanguageAtomService::Shutdown() { gLangAtomService
= nullptr; }
97 nsStaticAtom
* nsLanguageAtomService::LookupLanguage(
98 const nsACString
& aLanguage
) {
99 nsAutoCString
lowered(aLanguage
);
100 ToLowerCase(lowered
);
102 RefPtr
<nsAtom
> lang
= NS_Atomize(lowered
);
103 return GetLanguageGroup(lang
);
106 already_AddRefed
<nsAtom
> nsLanguageAtomService::LookupCharSet(
107 NotNull
<const Encoding
*> aEncoding
) {
108 nsAutoCString charset
;
109 aEncoding
->Name(charset
);
111 if (NS_FAILED(nsUConvPropertySearch::SearchPropertyValue(
112 encodingsGroups
, ArrayLength(encodingsGroups
), charset
, group
))) {
113 return RefPtr
<nsAtom
>(nsGkAtoms::Unicode
).forget();
115 return NS_Atomize(group
);
118 nsAtom
* nsLanguageAtomService::GetLocaleLanguage() {
120 if (!mLocaleLanguage
) {
121 AutoTArray
<nsCString
, 10> regionalPrefsLocales
;
122 if (NS_SUCCEEDED(OSPreferences::GetInstance()->GetRegionalPrefsLocales(
123 regionalPrefsLocales
))) {
124 // use lowercase for all language atoms
125 ToLowerCase(regionalPrefsLocales
[0]);
126 mLocaleLanguage
= NS_Atomize(regionalPrefsLocales
[0]);
128 nsAutoCString locale
;
129 OSPreferences::GetInstance()->GetSystemLocale(locale
);
131 ToLowerCase(locale
); // use lowercase for all language atoms
132 mLocaleLanguage
= NS_Atomize(locale
);
137 return mLocaleLanguage
;
140 nsStaticAtom
* nsLanguageAtomService::GetLanguageGroup(nsAtom
* aLanguage
,
141 bool* aNeedsToCache
) {
143 if (nsStaticAtom
* atom
= mLangToGroup
.Get(aLanguage
)) {
146 *aNeedsToCache
= true;
150 return mLangToGroup
.LookupOrInsertWith(aLanguage
, [&] {
151 AssertIsMainThreadOrServoFontMetricsLocked();
152 return GetUncachedLanguageGroup(aLanguage
);
156 nsStaticAtom
* nsLanguageAtomService::GetUncachedLanguageGroup(
157 nsAtom
* aLanguage
) const {
158 nsAutoCString langStr
;
159 aLanguage
->ToUTF8String(langStr
);
160 ToLowerCase(langStr
);
162 if (langStr
[0] == 'x' && langStr
[1] == '-') {
163 // Internal x-* langGroup codes map to themselves (see bug 256257)
164 for (nsStaticAtom
* langGroup
: kLangGroups
) {
165 if (langGroup
== aLanguage
) {
168 if (aLanguage
->IsAsciiLowercase()) {
171 // Do the slow ascii-case-insensitive comparison just if needed.
172 nsDependentAtomString
string(langGroup
);
173 if (string
.EqualsASCII(langStr
.get(), langStr
.Length())) {
178 // If the lang code can be parsed as BCP47, look up its (likely) script.
180 // https://bugzilla.mozilla.org/show_bug.cgi?id=1618034:
181 // First strip any private subtags that would cause Locale to reject the
182 // tag as non-wellformed.
183 nsACString::const_iterator start
, end
;
184 langStr
.BeginReading(start
);
185 langStr
.EndReading(end
);
186 if (FindInReadable("-x-"_ns
, start
, end
)) {
187 // The substring we want ends at the beginning of the "-x-" subtag.
188 langStr
.Truncate(start
.get() - langStr
.BeginReading());
192 auto result
= intl::LocaleParser::TryParse(langStr
, loc
);
193 if (!result
.isOk()) {
194 // Did the author (wrongly) use '_' instead of '-' to separate subtags?
195 // If so, fix it up and re-try parsing.
196 if (langStr
.Contains('_')) {
197 langStr
.ReplaceChar('_', '-');
199 // Throw away the partially parsed locale and re-start parsing.
201 result
= intl::LocaleParser::TryParse(langStr
, loc
);
204 if (result
.isOk() && loc
.Canonicalize().isOk()) {
205 // Fill in script subtag if not present.
206 if (loc
.Script().Missing()) {
207 if (loc
.AddLikelySubtags().isErr()) {
208 // Fall back to x-unicode if no match was found
209 return nsGkAtoms::Unicode
;
212 // Traditional Chinese has separate prefs for Hong Kong / Taiwan;
213 // check the region subtag.
214 if (loc
.Script().EqualTo("Hant")) {
215 if (loc
.Region().EqualTo("HK")) {
216 return nsGkAtoms::HongKongChinese
;
218 return nsGkAtoms::Taiwanese
;
220 // Search list of known script subtags that map to langGroup codes.
222 Span
<const char> scriptAsSpan
= loc
.Script().Span();
223 nsDependentCSubstring
script(scriptAsSpan
.data(), scriptAsSpan
.size());
225 kScriptLangGroup
, 0, ArrayLength(kScriptLangGroup
),
226 [script
](const auto& entry
) -> int {
227 return Compare(script
, nsDependentCString(entry
.mTag
));
230 return kScriptLangGroup
[foundIndex
].mAtom
;
232 // Script subtag was not recognized (includes "Hani"); check the language
233 // subtag for CJK possibilities so that we'll prefer the appropriate font
234 // rather than falling back to the browser's hardcoded preference.
235 if (loc
.Language().EqualTo("zh")) {
236 if (loc
.Region().EqualTo("HK")) {
237 return nsGkAtoms::HongKongChinese
;
239 if (loc
.Region().EqualTo("TW")) {
240 return nsGkAtoms::Taiwanese
;
242 return nsGkAtoms::Chinese
;
244 if (loc
.Language().EqualTo("ja")) {
245 return nsGkAtoms::Japanese
;
247 if (loc
.Language().EqualTo("ko")) {
248 return nsGkAtoms::ko
;
253 // Fall back to x-unicode if no match was found
254 return nsGkAtoms::Unicode
;