no bug - Import translations from android-l10n r=release a=l10n CLOSED TREE
[gecko.git] / netwerk / dns / nsIDNService.cpp
blobc13671a5810b6c83dc6331af31a50d37e6da44aa
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "MainThreadUtils.h"
7 #include "mozilla/ClearOnShutdown.h"
8 #include "mozilla/Preferences.h"
9 #include "nsIDNService.h"
10 #include "nsReadableUtils.h"
11 #include "nsCRT.h"
12 #include "nsServiceManagerUtils.h"
13 #include "nsString.h"
14 #include "nsStringFwd.h"
15 #include "nsUnicharUtils.h"
16 #include "nsUnicodeProperties.h"
17 #include "harfbuzz/hb.h"
18 #include "mozilla/ArrayUtils.h"
19 #include "mozilla/Casting.h"
20 #include "mozilla/StaticPrefs_network.h"
21 #include "mozilla/TextUtils.h"
22 #include "mozilla/Utf8.h"
23 #include "mozilla/intl/UnicodeProperties.h"
24 #include "mozilla/intl/UnicodeScriptCodes.h"
25 #include "nsNetUtil.h"
26 #include "nsStandardURL.h"
28 using namespace mozilla;
29 using namespace mozilla::intl;
30 using namespace mozilla::unicode;
31 using namespace mozilla::net;
32 using mozilla::Preferences;
34 //-----------------------------------------------------------------------------
36 #define NS_NET_PREF_EXTRAALLOWED "network.IDN.extra_allowed_chars"
37 #define NS_NET_PREF_EXTRABLOCKED "network.IDN.extra_blocked_chars"
38 #define NS_NET_PREF_IDNRESTRICTION "network.IDN.restriction_profile"
40 template <int N>
41 static inline bool TLDEqualsLiteral(mozilla::Span<const char32_t> aTLD,
42 const char (&aStr)[N]) {
43 if (aTLD.Length() != N - 1) {
44 return false;
46 const char* a = aStr;
47 for (const char32_t c : aTLD) {
48 if (c != char32_t(*a)) {
49 return false;
51 ++a;
53 return true;
56 static inline bool isOnlySafeChars(mozilla::Span<const char32_t> aLabel,
57 const nsTArray<BlocklistRange>& aBlocklist) {
58 if (aBlocklist.IsEmpty()) {
59 return true;
61 for (const char32_t c : aLabel) {
62 if (c > 0xFFFF) {
63 // The blocklist only support BMP!
64 continue;
66 if (CharInBlocklist(char16_t(c), aBlocklist)) {
67 return false;
70 return true;
73 //-----------------------------------------------------------------------------
74 // nsIDNService
75 //-----------------------------------------------------------------------------
77 /* Implementation file */
78 NS_IMPL_ISUPPORTS(nsIDNService, nsIIDNService)
80 static const char* gCallbackPrefs[] = {
81 NS_NET_PREF_EXTRAALLOWED,
82 NS_NET_PREF_EXTRABLOCKED,
83 NS_NET_PREF_IDNRESTRICTION,
84 nullptr,
87 nsresult nsIDNService::Init() {
88 MOZ_ASSERT(NS_IsMainThread());
89 // Take a strong reference for our listener with the preferences service,
90 // which we will release on shutdown.
91 // It's OK if we remove the observer a bit early, as it just means we won't
92 // respond to `network.IDN.extra_{allowed,blocked}_chars` and
93 // `network.IDN.restriction_profile` pref changes during shutdown.
94 Preferences::RegisterPrefixCallbacks(PrefChanged, gCallbackPrefs, this);
95 RunOnShutdown(
96 [self = RefPtr{this}]() mutable {
97 Preferences::UnregisterPrefixCallbacks(PrefChanged, gCallbackPrefs,
98 self.get());
99 self = nullptr;
101 ShutdownPhase::XPCOMWillShutdown);
102 prefsChanged(nullptr);
104 return NS_OK;
107 void nsIDNService::prefsChanged(const char* pref) {
108 MOZ_ASSERT(NS_IsMainThread());
109 AutoWriteLock lock(mLock);
111 if (!pref || nsLiteralCString(NS_NET_PREF_EXTRAALLOWED).Equals(pref) ||
112 nsLiteralCString(NS_NET_PREF_EXTRABLOCKED).Equals(pref)) {
113 InitializeBlocklist(mIDNBlocklist);
115 if (!pref || nsLiteralCString(NS_NET_PREF_IDNRESTRICTION).Equals(pref)) {
116 nsAutoCString profile;
117 if (NS_FAILED(
118 Preferences::GetCString(NS_NET_PREF_IDNRESTRICTION, profile))) {
119 profile.Truncate();
121 if (profile.EqualsLiteral("moderate")) {
122 mRestrictionProfile = eModeratelyRestrictiveProfile;
123 } else if (profile.EqualsLiteral("high")) {
124 mRestrictionProfile = eHighlyRestrictiveProfile;
125 } else {
126 mRestrictionProfile = eASCIIOnlyProfile;
131 nsIDNService::nsIDNService() { MOZ_ASSERT(NS_IsMainThread()); }
133 nsIDNService::~nsIDNService() = default;
135 NS_IMETHODIMP nsIDNService::DomainToASCII(const nsACString& input,
136 nsACString& ace) {
137 return NS_DomainToASCII(input, ace);
140 NS_IMETHODIMP nsIDNService::ConvertUTF8toACE(const nsACString& input,
141 nsACString& ace) {
142 return NS_DomainToASCIIAllowAnyGlyphfulASCII(input, ace);
145 NS_IMETHODIMP nsIDNService::ConvertACEtoUTF8(const nsACString& input,
146 nsACString& _retval) {
147 return NS_DomainToUnicodeAllowAnyGlyphfulASCII(input, _retval);
150 NS_IMETHODIMP nsIDNService::IsACE(const nsACString& input, bool* _retval) {
151 // look for the ACE prefix in the input string. it may occur
152 // at the beginning of any segment in the domain name. for
153 // example: "www.xn--ENCODED.com"
155 if (!IsAscii(input)) {
156 *_retval = false;
157 return NS_OK;
160 auto stringContains = [](const nsACString& haystack,
161 const nsACString& needle) {
162 return std::search(haystack.BeginReading(), haystack.EndReading(),
163 needle.BeginReading(), needle.EndReading(),
164 [](unsigned char ch1, unsigned char ch2) {
165 return tolower(ch1) == tolower(ch2);
166 }) != haystack.EndReading();
169 *_retval =
170 StringBeginsWith(input, "xn--"_ns, nsCaseInsensitiveCStringComparator) ||
171 (!input.IsEmpty() && input[0] != '.' &&
172 stringContains(input, ".xn--"_ns));
173 return NS_OK;
176 NS_IMETHODIMP nsIDNService::DomainToDisplay(const nsACString& input,
177 nsACString& _retval) {
178 nsresult rv = NS_DomainToDisplay(input, _retval);
179 return rv;
182 NS_IMETHODIMP nsIDNService::ConvertToDisplayIDN(const nsACString& input,
183 nsACString& _retval) {
184 nsresult rv = NS_DomainToDisplayAllowAnyGlyphfulASCII(input, _retval);
185 return rv;
188 //-----------------------------------------------------------------------------
190 namespace mozilla::net {
192 enum ScriptCombo : int32_t {
193 UNSET = -1,
194 BOPO = 0,
195 CYRL = 1,
196 GREK = 2,
197 HANG = 3,
198 HANI = 4,
199 HIRA = 5,
200 KATA = 6,
201 LATN = 7,
202 OTHR = 8,
203 JPAN = 9, // Latin + Han + Hiragana + Katakana
204 CHNA = 10, // Latin + Han + Bopomofo
205 KORE = 11, // Latin + Han + Hangul
206 HNLT = 12, // Latin + Han (could be any of the above combinations)
207 FAIL = 13,
210 } // namespace mozilla::net
212 bool nsIDNService::IsLabelSafe(mozilla::Span<const char32_t> aLabel,
213 mozilla::Span<const char32_t> aTLD) {
214 restrictionProfile profile{eASCIIOnlyProfile};
216 AutoReadLock lock(mLock);
218 if (!isOnlySafeChars(aLabel, mIDNBlocklist)) {
219 return false;
222 // We should never get here if the label is ASCII
223 if (mRestrictionProfile == eASCIIOnlyProfile) {
224 return false;
226 profile = mRestrictionProfile;
229 mozilla::Span<const char32_t>::const_iterator current = aLabel.cbegin();
230 mozilla::Span<const char32_t>::const_iterator end = aLabel.cend();
232 Script lastScript = Script::INVALID;
233 char32_t previousChar = 0;
234 char32_t baseChar = 0; // last non-diacritic seen (base char for marks)
235 char32_t savedNumberingSystem = 0;
236 // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
237 #if 0
238 HanVariantType savedHanVariant = HVT_NotHan;
239 #endif
241 ScriptCombo savedScript = ScriptCombo::UNSET;
243 while (current != end) {
244 char32_t ch = *current++;
246 IdentifierType idType = GetIdentifierType(ch);
247 if (idType == IDTYPE_RESTRICTED) {
248 return false;
250 MOZ_ASSERT(idType == IDTYPE_ALLOWED);
252 // Check for mixed script
253 Script script = UnicodeProperties::GetScriptCode(ch);
254 if (script != Script::COMMON && script != Script::INHERITED &&
255 script != lastScript) {
256 if (illegalScriptCombo(profile, script, savedScript)) {
257 return false;
261 // U+30FC should be preceded by a Hiragana/Katakana.
262 if (ch == 0x30fc && lastScript != Script::HIRAGANA &&
263 lastScript != Script::KATAKANA) {
264 return false;
267 Script nextScript = Script::INVALID;
268 if (current != end) {
269 nextScript = UnicodeProperties::GetScriptCode(*current);
272 if (ch == 0x30FB &&
273 (lastScript == Script::LATIN || nextScript == Script::LATIN)) {
274 return false;
277 if (ch == 0x307 &&
278 (previousChar == 'i' || previousChar == 'j' || previousChar == 'l')) {
279 return false;
282 // U+00B7 is only allowed on Catalan domains between two l's.
283 if (ch == 0xB7 && (!TLDEqualsLiteral(aTLD, "cat") || previousChar != 'l' ||
284 current == end || *current != 'l')) {
285 return false;
288 // Disallow Icelandic confusables for domains outside Icelandic and Faroese
289 // ccTLD (.is, .fo)
290 if ((ch == 0xFE || ch == 0xF0) && !TLDEqualsLiteral(aTLD, "is") &&
291 !TLDEqualsLiteral(aTLD, "fo")) {
292 return false;
295 // Block single/double-quote-like characters.
296 if (ch == 0x2BB || ch == 0x2BC) {
297 return false;
300 // Check for mixed numbering systems
301 auto genCat = GetGeneralCategory(ch);
302 if (genCat == HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) {
303 uint32_t zeroCharacter =
304 ch - mozilla::intl::UnicodeProperties::GetNumericValue(ch);
305 if (savedNumberingSystem == 0) {
306 // If we encounter a decimal number, save the zero character from that
307 // numbering system.
308 savedNumberingSystem = zeroCharacter;
309 } else if (zeroCharacter != savedNumberingSystem) {
310 return false;
314 if (genCat == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) {
315 // Check for consecutive non-spacing marks.
316 if (previousChar != 0 && previousChar == ch) {
317 return false;
319 // Check for marks whose expected script doesn't match the base script.
320 if (lastScript != Script::INVALID) {
321 UnicodeProperties::ScriptExtensionVector scripts;
322 auto extResult = UnicodeProperties::GetExtensions(ch, scripts);
323 MOZ_ASSERT(extResult.isOk());
324 if (extResult.isErr()) {
325 return false;
328 int nScripts = AssertedCast<int>(scripts.length());
330 // nScripts will always be >= 1, because even for undefined characters
331 // it will return Script::INVALID.
332 // If the mark just has script=COMMON or INHERITED, we can't check any
333 // more carefully, but if it has specific scriptExtension codes, then
334 // assume those are the only valid scripts to use it with.
335 if (nScripts > 1 || (Script(scripts[0]) != Script::COMMON &&
336 Script(scripts[0]) != Script::INHERITED)) {
337 while (--nScripts >= 0) {
338 if (Script(scripts[nScripts]) == lastScript) {
339 break;
342 if (nScripts == -1) {
343 return false;
347 // Check for diacritics on dotless-i, which would be indistinguishable
348 // from normal accented letter i.
349 if (baseChar == 0x0131 &&
350 ((ch >= 0x0300 && ch <= 0x0314) || ch == 0x031a)) {
351 return false;
353 } else {
354 baseChar = ch;
357 if (script != Script::COMMON && script != Script::INHERITED) {
358 lastScript = script;
361 // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
362 #if 0
364 // Check for both simplified-only and traditional-only Chinese characters
365 HanVariantType hanVariant = GetHanVariant(ch);
366 if (hanVariant == HVT_SimplifiedOnly || hanVariant == HVT_TraditionalOnly) {
367 if (savedHanVariant == HVT_NotHan) {
368 savedHanVariant = hanVariant;
369 } else if (hanVariant != savedHanVariant) {
370 return false;
373 #endif
375 previousChar = ch;
377 return true;
380 // Scripts that we care about in illegalScriptCombo
381 static inline ScriptCombo findScriptIndex(Script aScript) {
382 switch (aScript) {
383 case Script::BOPOMOFO:
384 return ScriptCombo::BOPO;
385 case Script::CYRILLIC:
386 return ScriptCombo::CYRL;
387 case Script::GREEK:
388 return ScriptCombo::GREK;
389 case Script::HANGUL:
390 return ScriptCombo::HANG;
391 case Script::HAN:
392 return ScriptCombo::HANI;
393 case Script::HIRAGANA:
394 return ScriptCombo::HIRA;
395 case Script::KATAKANA:
396 return ScriptCombo::KATA;
397 case Script::LATIN:
398 return ScriptCombo::LATN;
399 default:
400 return ScriptCombo::OTHR;
404 static const ScriptCombo scriptComboTable[13][9] = {
405 /* thisScript: BOPO CYRL GREK HANG HANI HIRA KATA LATN OTHR
406 * savedScript */
407 /* BOPO */ {BOPO, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL},
408 /* CYRL */ {FAIL, CYRL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL},
409 /* GREK */ {FAIL, FAIL, GREK, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL},
410 /* HANG */ {FAIL, FAIL, FAIL, HANG, KORE, FAIL, FAIL, KORE, FAIL},
411 /* HANI */ {CHNA, FAIL, FAIL, KORE, HANI, JPAN, JPAN, HNLT, FAIL},
412 /* HIRA */ {FAIL, FAIL, FAIL, FAIL, JPAN, HIRA, JPAN, JPAN, FAIL},
413 /* KATA */ {FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, KATA, JPAN, FAIL},
414 /* LATN */ {CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, LATN, OTHR},
415 /* OTHR */ {FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, OTHR, FAIL},
416 /* JPAN */ {FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, JPAN, JPAN, FAIL},
417 /* CHNA */ {CHNA, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL},
418 /* KORE */ {FAIL, FAIL, FAIL, KORE, KORE, FAIL, FAIL, KORE, FAIL},
419 /* HNLT */ {CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, HNLT, FAIL}};
421 bool nsIDNService::illegalScriptCombo(restrictionProfile profile, Script script,
422 ScriptCombo& savedScript) {
423 if (savedScript == ScriptCombo::UNSET) {
424 savedScript = findScriptIndex(script);
425 return false;
428 savedScript = scriptComboTable[savedScript][findScriptIndex(script)];
430 * Special case combinations that depend on which profile is in use
431 * In the Highly Restrictive profile Latin is not allowed with any
432 * other script
434 * In the Moderately Restrictive profile Latin mixed with any other
435 * single script is allowed.
437 return ((savedScript == OTHR && profile == eHighlyRestrictiveProfile) ||
438 savedScript == FAIL);
441 extern "C" MOZ_EXPORT bool mozilla_net_is_label_safe(const char32_t* aLabel,
442 size_t aLabelLen,
443 const char32_t* aTld,
444 size_t aTldLen) {
445 return static_cast<nsIDNService*>(nsStandardURL::GetIDNService())
446 ->IsLabelSafe(mozilla::Span<const char32_t>(aLabel, aLabelLen),
447 mozilla::Span<const char32_t>(aTld, aTldLen));