1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "nsLanguageAtomService.h"
7 #include "nsUConvPropertySearch.h"
8 #include "nsUnicharUtils.h"
10 #include "nsGkAtoms.h"
11 #include "mozilla/ArrayUtils.h"
12 #include "mozilla/ClearOnShutdown.h"
13 #include "mozilla/Encoding.h"
14 #include "mozilla/intl/OSPreferences.h"
15 #include "mozilla/ServoBindings.h"
16 #include "mozilla/ServoUtils.h"
18 using namespace mozilla
;
19 using mozilla::intl::OSPreferences
;
21 static constexpr nsUConvProp encodingsGroups
[] = {
22 #include "encodingsgroups.properties.h"
25 // List of mozilla internal x-* tags that map to themselves (see bug 256257)
26 static constexpr nsStaticAtom
* kLangGroups
[] = {
27 // This list must be sorted!
28 nsGkAtoms::x_armn
, nsGkAtoms::x_cyrillic
, nsGkAtoms::x_devanagari
,
29 nsGkAtoms::x_geor
, nsGkAtoms::x_math
, nsGkAtoms::x_tamil
,
30 nsGkAtoms::Unicode
, nsGkAtoms::x_western
31 // These self-mappings are not necessary unless somebody use them to specify
32 // lang in (X)HTML/XML documents, which they shouldn't. (see bug 256257)
42 // Map ISO 15924 script codes from BCP47 lang tag to mozilla's langGroups.
43 static constexpr struct {
46 } kScriptLangGroup
[] = {
47 // This list must be sorted by script code!
48 {"Arab", nsGkAtoms::ar
},
49 {"Armn", nsGkAtoms::x_armn
},
50 {"Beng", nsGkAtoms::x_beng
},
51 {"Cans", nsGkAtoms::x_cans
},
52 {"Cyrl", nsGkAtoms::x_cyrillic
},
53 {"Deva", nsGkAtoms::x_devanagari
},
54 {"Ethi", nsGkAtoms::x_ethi
},
55 {"Geok", nsGkAtoms::x_geor
},
56 {"Geor", nsGkAtoms::x_geor
},
57 {"Grek", nsGkAtoms::el
},
58 {"Gujr", nsGkAtoms::x_gujr
},
59 {"Guru", nsGkAtoms::x_guru
},
60 {"Hang", nsGkAtoms::ko
},
61 {"Hani", nsGkAtoms::Japanese
},
62 {"Hans", nsGkAtoms::Chinese
},
63 // Hant is special-cased in code
66 {"Hebr", nsGkAtoms::he
},
67 {"Hira", nsGkAtoms::Japanese
},
68 {"Jpan", nsGkAtoms::Japanese
},
69 {"Kana", nsGkAtoms::Japanese
},
70 {"Khmr", nsGkAtoms::x_khmr
},
71 {"Knda", nsGkAtoms::x_knda
},
72 {"Kore", nsGkAtoms::ko
},
73 {"Latn", nsGkAtoms::x_western
},
74 {"Mlym", nsGkAtoms::x_mlym
},
75 {"Orya", nsGkAtoms::x_orya
},
76 {"Sinh", nsGkAtoms::x_sinh
},
77 {"Taml", nsGkAtoms::x_tamil
},
78 {"Telu", nsGkAtoms::x_telu
},
79 {"Thai", nsGkAtoms::th
},
80 {"Tibt", nsGkAtoms::x_tibt
}};
83 nsLanguageAtomService
* nsLanguageAtomService::GetService() {
84 static UniquePtr
<nsLanguageAtomService
> gLangAtomService
;
85 if (!gLangAtomService
) {
86 gLangAtomService
= MakeUnique
<nsLanguageAtomService
>();
87 ClearOnShutdown(&gLangAtomService
);
89 return gLangAtomService
.get();
92 nsStaticAtom
* nsLanguageAtomService::LookupLanguage(
93 const nsACString
& aLanguage
) {
94 nsAutoCString
lowered(aLanguage
);
97 RefPtr
<nsAtom
> lang
= NS_Atomize(lowered
);
98 return GetLanguageGroup(lang
);
101 already_AddRefed
<nsAtom
> nsLanguageAtomService::LookupCharSet(
102 NotNull
<const Encoding
*> aEncoding
) {
103 nsAutoCString charset
;
104 aEncoding
->Name(charset
);
106 if (NS_FAILED(nsUConvPropertySearch::SearchPropertyValue(
107 encodingsGroups
, ArrayLength(encodingsGroups
), charset
, group
))) {
108 return RefPtr
<nsAtom
>(nsGkAtoms::Unicode
).forget();
110 return NS_Atomize(group
);
113 nsAtom
* nsLanguageAtomService::GetLocaleLanguage() {
115 if (!mLocaleLanguage
) {
116 AutoTArray
<nsCString
, 10> regionalPrefsLocales
;
117 if (NS_SUCCEEDED(OSPreferences::GetInstance()->GetRegionalPrefsLocales(
118 regionalPrefsLocales
))) {
119 // use lowercase for all language atoms
120 ToLowerCase(regionalPrefsLocales
[0]);
121 mLocaleLanguage
= NS_Atomize(regionalPrefsLocales
[0]);
123 nsAutoCString locale
;
124 OSPreferences::GetInstance()->GetSystemLocale(locale
);
126 ToLowerCase(locale
); // use lowercase for all language atoms
127 mLocaleLanguage
= NS_Atomize(locale
);
132 return mLocaleLanguage
;
135 nsStaticAtom
* nsLanguageAtomService::GetLanguageGroup(nsAtom
* aLanguage
,
136 bool* aNeedsToCache
) {
137 if (nsStaticAtom
* group
= mLangToGroup
.Get(aLanguage
)) {
141 *aNeedsToCache
= true;
144 AssertIsMainThreadOrServoFontMetricsLocked();
145 nsStaticAtom
* group
= GetUncachedLanguageGroup(aLanguage
);
146 mLangToGroup
.Put(aLanguage
, group
);
150 nsStaticAtom
* nsLanguageAtomService::GetUncachedLanguageGroup(
151 nsAtom
* aLanguage
) const {
152 nsAutoCString langStr
;
153 aLanguage
->ToUTF8String(langStr
);
154 ToLowerCase(langStr
);
156 if (langStr
[0] == 'x' && langStr
[1] == '-') {
157 // Internal x-* langGroup codes map to themselves (see bug 256257)
158 for (nsStaticAtom
* langGroup
: kLangGroups
) {
159 if (langGroup
== aLanguage
) {
162 if (aLanguage
->IsAsciiLowercase()) {
165 // Do the slow ascii-case-insensitive comparison just if needed.
166 nsDependentAtomString
string(langGroup
);
167 if (string
.EqualsASCII(langStr
.get(), langStr
.Length())) {
172 // If the lang code can be parsed as BCP47, look up its (likely) script
174 if (loc
.IsWellFormed()) {
175 if (loc
.GetScript().IsEmpty()) {
176 loc
.AddLikelySubtags();
178 if (loc
.GetScript().EqualsLiteral("Hant")) {
179 if (loc
.GetRegion().EqualsLiteral("HK")) {
180 return nsGkAtoms::HongKongChinese
;
182 return nsGkAtoms::Taiwanese
;
185 const nsCString
& script
= loc
.GetScript();
187 kScriptLangGroup
, 0, ArrayLength(kScriptLangGroup
),
188 [script
](const auto& entry
) -> int {
189 return script
.Compare(entry
.mTag
);
192 return kScriptLangGroup
[foundIndex
].mAtom
;
198 // Fall back to x-unicode if no match was found
199 return nsGkAtoms::Unicode
;