1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "MainThreadUtils.h"
7 #include "mozilla/ClearOnShutdown.h"
8 #include "mozilla/Preferences.h"
9 #include "nsIDNService.h"
10 #include "nsReadableUtils.h"
12 #include "nsServiceManagerUtils.h"
14 #include "nsStringFwd.h"
15 #include "nsUnicharUtils.h"
16 #include "nsUnicodeProperties.h"
17 #include "harfbuzz/hb.h"
18 #include "mozilla/ArrayUtils.h"
19 #include "mozilla/Casting.h"
20 #include "mozilla/StaticPrefs_network.h"
21 #include "mozilla/TextUtils.h"
22 #include "mozilla/Utf8.h"
23 #include "mozilla/intl/UnicodeProperties.h"
24 #include "mozilla/intl/UnicodeScriptCodes.h"
25 #include "nsNetUtil.h"
26 #include "nsStandardURL.h"
28 using namespace mozilla
;
29 using namespace mozilla::intl
;
30 using namespace mozilla::unicode
;
31 using namespace mozilla::net
;
32 using mozilla::Preferences
;
34 //-----------------------------------------------------------------------------
36 #define NS_NET_PREF_EXTRAALLOWED "network.IDN.extra_allowed_chars"
37 #define NS_NET_PREF_EXTRABLOCKED "network.IDN.extra_blocked_chars"
38 #define NS_NET_PREF_IDNRESTRICTION "network.IDN.restriction_profile"
41 static inline bool TLDEqualsLiteral(mozilla::Span
<const char32_t
> aTLD
,
42 const char (&aStr
)[N
]) {
43 if (aTLD
.Length() != N
- 1) {
47 for (const char32_t c
: aTLD
) {
48 if (c
!= char32_t(*a
)) {
56 static inline bool isOnlySafeChars(mozilla::Span
<const char32_t
> aLabel
,
57 const nsTArray
<BlocklistRange
>& aBlocklist
) {
58 if (aBlocklist
.IsEmpty()) {
61 for (const char32_t c
: aLabel
) {
63 // The blocklist only support BMP!
66 if (CharInBlocklist(char16_t(c
), aBlocklist
)) {
73 //-----------------------------------------------------------------------------
75 //-----------------------------------------------------------------------------
77 /* Implementation file */
78 NS_IMPL_ISUPPORTS(nsIDNService
, nsIIDNService
)
80 static const char* gCallbackPrefs
[] = {
81 NS_NET_PREF_EXTRAALLOWED
,
82 NS_NET_PREF_EXTRABLOCKED
,
83 NS_NET_PREF_IDNRESTRICTION
,
87 nsresult
nsIDNService::Init() {
88 MOZ_ASSERT(NS_IsMainThread());
89 // Take a strong reference for our listener with the preferences service,
90 // which we will release on shutdown.
91 // It's OK if we remove the observer a bit early, as it just means we won't
92 // respond to `network.IDN.extra_{allowed,blocked}_chars` and
93 // `network.IDN.restriction_profile` pref changes during shutdown.
94 Preferences::RegisterPrefixCallbacks(PrefChanged
, gCallbackPrefs
, this);
96 [self
= RefPtr
{this}]() mutable {
97 Preferences::UnregisterPrefixCallbacks(PrefChanged
, gCallbackPrefs
,
101 ShutdownPhase::XPCOMWillShutdown
);
102 prefsChanged(nullptr);
107 void nsIDNService::prefsChanged(const char* pref
) {
108 MOZ_ASSERT(NS_IsMainThread());
109 AutoWriteLock
lock(mLock
);
111 if (!pref
|| nsLiteralCString(NS_NET_PREF_EXTRAALLOWED
).Equals(pref
) ||
112 nsLiteralCString(NS_NET_PREF_EXTRABLOCKED
).Equals(pref
)) {
113 InitializeBlocklist(mIDNBlocklist
);
115 if (!pref
|| nsLiteralCString(NS_NET_PREF_IDNRESTRICTION
).Equals(pref
)) {
116 nsAutoCString profile
;
118 Preferences::GetCString(NS_NET_PREF_IDNRESTRICTION
, profile
))) {
121 if (profile
.EqualsLiteral("moderate")) {
122 mRestrictionProfile
= eModeratelyRestrictiveProfile
;
123 } else if (profile
.EqualsLiteral("high")) {
124 mRestrictionProfile
= eHighlyRestrictiveProfile
;
126 mRestrictionProfile
= eASCIIOnlyProfile
;
131 nsIDNService::nsIDNService() { MOZ_ASSERT(NS_IsMainThread()); }
133 nsIDNService::~nsIDNService() = default;
135 NS_IMETHODIMP
nsIDNService::DomainToASCII(const nsACString
& input
,
137 return NS_DomainToASCII(input
, ace
);
140 NS_IMETHODIMP
nsIDNService::ConvertUTF8toACE(const nsACString
& input
,
142 return NS_DomainToASCIIAllowAnyGlyphfulASCII(input
, ace
);
145 NS_IMETHODIMP
nsIDNService::ConvertACEtoUTF8(const nsACString
& input
,
146 nsACString
& _retval
) {
147 return NS_DomainToUnicodeAllowAnyGlyphfulASCII(input
, _retval
);
150 NS_IMETHODIMP
nsIDNService::IsACE(const nsACString
& input
, bool* _retval
) {
151 // look for the ACE prefix in the input string. it may occur
152 // at the beginning of any segment in the domain name. for
153 // example: "www.xn--ENCODED.com"
155 if (!IsAscii(input
)) {
160 auto stringContains
= [](const nsACString
& haystack
,
161 const nsACString
& needle
) {
162 return std::search(haystack
.BeginReading(), haystack
.EndReading(),
163 needle
.BeginReading(), needle
.EndReading(),
164 [](unsigned char ch1
, unsigned char ch2
) {
165 return tolower(ch1
) == tolower(ch2
);
166 }) != haystack
.EndReading();
170 StringBeginsWith(input
, "xn--"_ns
, nsCaseInsensitiveCStringComparator
) ||
171 (!input
.IsEmpty() && input
[0] != '.' &&
172 stringContains(input
, ".xn--"_ns
));
176 NS_IMETHODIMP
nsIDNService::DomainToDisplay(const nsACString
& input
,
177 nsACString
& _retval
) {
178 nsresult rv
= NS_DomainToDisplay(input
, _retval
);
182 NS_IMETHODIMP
nsIDNService::ConvertToDisplayIDN(const nsACString
& input
,
183 nsACString
& _retval
) {
184 nsresult rv
= NS_DomainToDisplayAllowAnyGlyphfulASCII(input
, _retval
);
188 //-----------------------------------------------------------------------------
190 namespace mozilla::net
{
192 enum ScriptCombo
: int32_t {
203 JPAN
= 9, // Latin + Han + Hiragana + Katakana
204 CHNA
= 10, // Latin + Han + Bopomofo
205 KORE
= 11, // Latin + Han + Hangul
206 HNLT
= 12, // Latin + Han (could be any of the above combinations)
210 } // namespace mozilla::net
212 bool nsIDNService::IsLabelSafe(mozilla::Span
<const char32_t
> aLabel
,
213 mozilla::Span
<const char32_t
> aTLD
) {
214 restrictionProfile profile
{eASCIIOnlyProfile
};
216 AutoReadLock
lock(mLock
);
218 if (!isOnlySafeChars(aLabel
, mIDNBlocklist
)) {
222 // We should never get here if the label is ASCII
223 if (mRestrictionProfile
== eASCIIOnlyProfile
) {
226 profile
= mRestrictionProfile
;
229 mozilla::Span
<const char32_t
>::const_iterator current
= aLabel
.cbegin();
230 mozilla::Span
<const char32_t
>::const_iterator end
= aLabel
.cend();
232 Script lastScript
= Script::INVALID
;
233 char32_t previousChar
= 0;
234 char32_t baseChar
= 0; // last non-diacritic seen (base char for marks)
235 char32_t savedNumberingSystem
= 0;
236 // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
238 HanVariantType savedHanVariant
= HVT_NotHan
;
241 ScriptCombo savedScript
= ScriptCombo::UNSET
;
243 while (current
!= end
) {
244 char32_t ch
= *current
++;
246 IdentifierType idType
= GetIdentifierType(ch
);
247 if (idType
== IDTYPE_RESTRICTED
) {
250 MOZ_ASSERT(idType
== IDTYPE_ALLOWED
);
252 // Check for mixed script
253 Script script
= UnicodeProperties::GetScriptCode(ch
);
254 if (script
!= Script::COMMON
&& script
!= Script::INHERITED
&&
255 script
!= lastScript
) {
256 if (illegalScriptCombo(profile
, script
, savedScript
)) {
261 // U+30FC should be preceded by a Hiragana/Katakana.
262 if (ch
== 0x30fc && lastScript
!= Script::HIRAGANA
&&
263 lastScript
!= Script::KATAKANA
) {
267 Script nextScript
= Script::INVALID
;
268 if (current
!= end
) {
269 nextScript
= UnicodeProperties::GetScriptCode(*current
);
273 (lastScript
== Script::LATIN
|| nextScript
== Script::LATIN
)) {
278 (previousChar
== 'i' || previousChar
== 'j' || previousChar
== 'l')) {
282 // U+00B7 is only allowed on Catalan domains between two l's.
283 if (ch
== 0xB7 && (!TLDEqualsLiteral(aTLD
, "cat") || previousChar
!= 'l' ||
284 current
== end
|| *current
!= 'l')) {
288 // Disallow Icelandic confusables for domains outside Icelandic and Faroese
290 if ((ch
== 0xFE || ch
== 0xF0) && !TLDEqualsLiteral(aTLD
, "is") &&
291 !TLDEqualsLiteral(aTLD
, "fo")) {
295 // Block single/double-quote-like characters.
296 if (ch
== 0x2BB || ch
== 0x2BC) {
300 // Check for mixed numbering systems
301 auto genCat
= GetGeneralCategory(ch
);
302 if (genCat
== HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER
) {
303 uint32_t zeroCharacter
=
304 ch
- mozilla::intl::UnicodeProperties::GetNumericValue(ch
);
305 if (savedNumberingSystem
== 0) {
306 // If we encounter a decimal number, save the zero character from that
308 savedNumberingSystem
= zeroCharacter
;
309 } else if (zeroCharacter
!= savedNumberingSystem
) {
314 if (genCat
== HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK
) {
315 // Check for consecutive non-spacing marks.
316 if (previousChar
!= 0 && previousChar
== ch
) {
319 // Check for marks whose expected script doesn't match the base script.
320 if (lastScript
!= Script::INVALID
) {
321 UnicodeProperties::ScriptExtensionVector scripts
;
322 auto extResult
= UnicodeProperties::GetExtensions(ch
, scripts
);
323 MOZ_ASSERT(extResult
.isOk());
324 if (extResult
.isErr()) {
328 int nScripts
= AssertedCast
<int>(scripts
.length());
330 // nScripts will always be >= 1, because even for undefined characters
331 // it will return Script::INVALID.
332 // If the mark just has script=COMMON or INHERITED, we can't check any
333 // more carefully, but if it has specific scriptExtension codes, then
334 // assume those are the only valid scripts to use it with.
335 if (nScripts
> 1 || (Script(scripts
[0]) != Script::COMMON
&&
336 Script(scripts
[0]) != Script::INHERITED
)) {
337 while (--nScripts
>= 0) {
338 if (Script(scripts
[nScripts
]) == lastScript
) {
342 if (nScripts
== -1) {
347 // Check for diacritics on dotless-i, which would be indistinguishable
348 // from normal accented letter i.
349 if (baseChar
== 0x0131 &&
350 ((ch
>= 0x0300 && ch
<= 0x0314) || ch
== 0x031a)) {
357 if (script
!= Script::COMMON
&& script
!= Script::INHERITED
) {
361 // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
364 // Check for both simplified-only and traditional-only Chinese characters
365 HanVariantType hanVariant
= GetHanVariant(ch
);
366 if (hanVariant
== HVT_SimplifiedOnly
|| hanVariant
== HVT_TraditionalOnly
) {
367 if (savedHanVariant
== HVT_NotHan
) {
368 savedHanVariant
= hanVariant
;
369 } else if (hanVariant
!= savedHanVariant
) {
380 // Scripts that we care about in illegalScriptCombo
381 static inline ScriptCombo
findScriptIndex(Script aScript
) {
383 case Script::BOPOMOFO
:
384 return ScriptCombo::BOPO
;
385 case Script::CYRILLIC
:
386 return ScriptCombo::CYRL
;
388 return ScriptCombo::GREK
;
390 return ScriptCombo::HANG
;
392 return ScriptCombo::HANI
;
393 case Script::HIRAGANA
:
394 return ScriptCombo::HIRA
;
395 case Script::KATAKANA
:
396 return ScriptCombo::KATA
;
398 return ScriptCombo::LATN
;
400 return ScriptCombo::OTHR
;
404 static const ScriptCombo scriptComboTable
[13][9] = {
405 /* thisScript: BOPO CYRL GREK HANG HANI HIRA KATA LATN OTHR
407 /* BOPO */ {BOPO
, FAIL
, FAIL
, FAIL
, CHNA
, FAIL
, FAIL
, CHNA
, FAIL
},
408 /* CYRL */ {FAIL
, CYRL
, FAIL
, FAIL
, FAIL
, FAIL
, FAIL
, FAIL
, FAIL
},
409 /* GREK */ {FAIL
, FAIL
, GREK
, FAIL
, FAIL
, FAIL
, FAIL
, FAIL
, FAIL
},
410 /* HANG */ {FAIL
, FAIL
, FAIL
, HANG
, KORE
, FAIL
, FAIL
, KORE
, FAIL
},
411 /* HANI */ {CHNA
, FAIL
, FAIL
, KORE
, HANI
, JPAN
, JPAN
, HNLT
, FAIL
},
412 /* HIRA */ {FAIL
, FAIL
, FAIL
, FAIL
, JPAN
, HIRA
, JPAN
, JPAN
, FAIL
},
413 /* KATA */ {FAIL
, FAIL
, FAIL
, FAIL
, JPAN
, JPAN
, KATA
, JPAN
, FAIL
},
414 /* LATN */ {CHNA
, FAIL
, FAIL
, KORE
, HNLT
, JPAN
, JPAN
, LATN
, OTHR
},
415 /* OTHR */ {FAIL
, FAIL
, FAIL
, FAIL
, FAIL
, FAIL
, FAIL
, OTHR
, FAIL
},
416 /* JPAN */ {FAIL
, FAIL
, FAIL
, FAIL
, JPAN
, JPAN
, JPAN
, JPAN
, FAIL
},
417 /* CHNA */ {CHNA
, FAIL
, FAIL
, FAIL
, CHNA
, FAIL
, FAIL
, CHNA
, FAIL
},
418 /* KORE */ {FAIL
, FAIL
, FAIL
, KORE
, KORE
, FAIL
, FAIL
, KORE
, FAIL
},
419 /* HNLT */ {CHNA
, FAIL
, FAIL
, KORE
, HNLT
, JPAN
, JPAN
, HNLT
, FAIL
}};
421 bool nsIDNService::illegalScriptCombo(restrictionProfile profile
, Script script
,
422 ScriptCombo
& savedScript
) {
423 if (savedScript
== ScriptCombo::UNSET
) {
424 savedScript
= findScriptIndex(script
);
428 savedScript
= scriptComboTable
[savedScript
][findScriptIndex(script
)];
430 * Special case combinations that depend on which profile is in use
431 * In the Highly Restrictive profile Latin is not allowed with any
434 * In the Moderately Restrictive profile Latin mixed with any other
435 * single script is allowed.
437 return ((savedScript
== OTHR
&& profile
== eHighlyRestrictiveProfile
) ||
438 savedScript
== FAIL
);
441 extern "C" MOZ_EXPORT
bool mozilla_net_is_label_safe(const char32_t
* aLabel
,
443 const char32_t
* aTld
,
445 return static_cast<nsIDNService
*>(nsStandardURL::GetIDNService())
446 ->IsLabelSafe(mozilla::Span
<const char32_t
>(aLabel
, aLabelLen
),
447 mozilla::Span
<const char32_t
>(aTld
, aTldLen
));