1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "MainThreadUtils.h"
7 #include "mozilla/ClearOnShutdown.h"
8 #include "mozilla/Preferences.h"
9 #include "nsIDNService.h"
10 #include "nsReadableUtils.h"
12 #include "nsServiceManagerUtils.h"
14 #include "nsStringFwd.h"
15 #include "nsUnicharUtils.h"
16 #include "nsUnicodeProperties.h"
17 #include "harfbuzz/hb.h"
19 #include "mozilla/ArrayUtils.h"
20 #include "mozilla/Casting.h"
21 #include "mozilla/StaticPrefs_network.h"
22 #include "mozilla/TextUtils.h"
23 #include "mozilla/Utf8.h"
24 #include "mozilla/intl/FormatBuffer.h"
25 #include "mozilla/intl/UnicodeProperties.h"
26 #include "mozilla/intl/UnicodeScriptCodes.h"
30 using namespace mozilla
;
31 using namespace mozilla::intl
;
32 using namespace mozilla::unicode
;
33 using namespace mozilla::net
;
34 using mozilla::Preferences
;
36 // Currently we use the non-transitional processing option -- see
37 // http://unicode.org/reports/tr46/
38 // To switch to transitional processing, change the value of this flag
39 // and kTransitionalProcessing in netwerk/test/unit/test_idna2008.js to true
40 // (revert bug 1218179).
41 const intl::IDNA::ProcessingType kIDNA2008_DefaultProcessingType
=
42 intl::IDNA::ProcessingType::NonTransitional
;
44 //-----------------------------------------------------------------------------
45 // According to RFC 1034 - 3.1. Name space specifications and terminology
46 // the maximum label size would be 63. However, this is enforced at the DNS
47 // level and none of the other browsers seem to not enforce the VerifyDnsLength
48 // check in https://unicode.org/reports/tr46/#ToASCII
49 // Instead, we choose a rather arbitrary but larger size.
50 static const uint32_t kMaxULabelSize
= 256;
51 // RFC 3490 - 5. ACE prefix
52 static const char kACEPrefix
[] = "xn--";
54 //-----------------------------------------------------------------------------
56 #define NS_NET_PREF_EXTRAALLOWED "network.IDN.extra_allowed_chars"
57 #define NS_NET_PREF_EXTRABLOCKED "network.IDN.extra_blocked_chars"
58 #define NS_NET_PREF_IDNRESTRICTION "network.IDN.restriction_profile"
60 static inline bool isOnlySafeChars(const nsString
& in
,
61 const nsTArray
<BlocklistRange
>& aBlocklist
) {
62 if (aBlocklist
.IsEmpty()) {
65 const char16_t
* cur
= in
.BeginReading();
66 const char16_t
* end
= in
.EndReading();
68 for (; cur
< end
; ++cur
) {
69 if (CharInBlocklist(*cur
, aBlocklist
)) {
76 //-----------------------------------------------------------------------------
78 //-----------------------------------------------------------------------------
80 /* Implementation file */
81 NS_IMPL_ISUPPORTS(nsIDNService
, nsIIDNService
)
83 static const char* gCallbackPrefs
[] = {
84 NS_NET_PREF_EXTRAALLOWED
,
85 NS_NET_PREF_EXTRABLOCKED
,
86 NS_NET_PREF_IDNRESTRICTION
,
90 nsresult
nsIDNService::Init() {
91 MOZ_ASSERT(NS_IsMainThread());
92 // Take a strong reference for our listener with the preferences service,
93 // which we will release on shutdown.
94 // It's OK if we remove the observer a bit early, as it just means we won't
95 // respond to `network.IDN.extra_{allowed,blocked}_chars` and
96 // `network.IDN.restriction_profile` pref changes during shutdown.
97 Preferences::RegisterPrefixCallbacks(PrefChanged
, gCallbackPrefs
, this);
99 [self
= RefPtr
{this}]() mutable {
100 Preferences::UnregisterPrefixCallbacks(PrefChanged
, gCallbackPrefs
,
104 ShutdownPhase::XPCOMWillShutdown
);
105 prefsChanged(nullptr);
110 void nsIDNService::prefsChanged(const char* pref
) {
111 MOZ_ASSERT(NS_IsMainThread());
112 AutoWriteLock
lock(mLock
);
114 if (!pref
|| nsLiteralCString(NS_NET_PREF_EXTRAALLOWED
).Equals(pref
) ||
115 nsLiteralCString(NS_NET_PREF_EXTRABLOCKED
).Equals(pref
)) {
116 InitializeBlocklist(mIDNBlocklist
);
118 if (!pref
|| nsLiteralCString(NS_NET_PREF_IDNRESTRICTION
).Equals(pref
)) {
119 nsAutoCString profile
;
121 Preferences::GetCString(NS_NET_PREF_IDNRESTRICTION
, profile
))) {
124 if (profile
.EqualsLiteral("moderate")) {
125 mRestrictionProfile
= eModeratelyRestrictiveProfile
;
126 } else if (profile
.EqualsLiteral("high")) {
127 mRestrictionProfile
= eHighlyRestrictiveProfile
;
129 mRestrictionProfile
= eASCIIOnlyProfile
;
134 nsIDNService::nsIDNService() {
135 MOZ_ASSERT(NS_IsMainThread());
138 mozilla::intl::IDNA::TryCreate(kIDNA2008_DefaultProcessingType
);
139 MOZ_ASSERT(createResult
.isOk());
140 mIDNA
= createResult
.unwrap();
143 nsIDNService::~nsIDNService() = default;
145 nsresult
nsIDNService::IDNA2008ToUnicode(const nsACString
& input
,
147 NS_ConvertUTF8toUTF16
inputStr(input
);
149 Span
<const char16_t
> inputSpan
{inputStr
};
150 intl::nsTStringToBufferAdapter
buffer(output
);
151 auto result
= mIDNA
->LabelToUnicode(inputSpan
, buffer
);
154 if (result
.isErr()) {
155 rv
= ICUUtils::ICUErrorToNsResult(result
.unwrapErr());
156 if (rv
== NS_ERROR_FAILURE
) {
157 rv
= NS_ERROR_MALFORMED_URI
;
160 NS_ENSURE_SUCCESS(rv
, rv
);
162 intl::IDNA::Info info
= result
.unwrap();
163 if (info
.HasErrors()) {
164 rv
= NS_ERROR_MALFORMED_URI
;
170 nsresult
nsIDNService::IDNA2008StringPrep(const nsAString
& input
,
172 stringPrepFlag flag
) {
173 Span
<const char16_t
> inputSpan
{input
};
174 intl::nsTStringToBufferAdapter
buffer(output
);
175 auto result
= mIDNA
->LabelToUnicode(inputSpan
, buffer
);
178 if (result
.isErr()) {
179 rv
= ICUUtils::ICUErrorToNsResult(result
.unwrapErr());
180 if (rv
== NS_ERROR_FAILURE
) {
181 rv
= NS_ERROR_MALFORMED_URI
;
184 NS_ENSURE_SUCCESS(rv
, rv
);
186 intl::IDNA::Info info
= result
.unwrap();
188 // Output the result of nameToUnicode even if there were errors.
189 // But in the case of invalid punycode, the uidna_labelToUnicode result
190 // appears to get an appended U+FFFD REPLACEMENT CHARACTER, which will
191 // confuse our subsequent processing, so we drop that.
192 // (https://bugzilla.mozilla.org/show_bug.cgi?id=1399540#c9)
193 if ((info
.HasInvalidPunycode() || info
.HasInvalidAceLabel()) &&
194 !output
.IsEmpty() && output
.Last() == 0xfffd) {
195 output
.Truncate(output
.Length() - 1);
198 if (flag
== eStringPrepIgnoreErrors
) {
202 if (flag
== eStringPrepForDNS
) {
203 // We ignore errors if the result is empty, or if the errors were just
204 // invalid hyphens (not punycode-decoding failure or invalid chars).
205 if (!output
.IsEmpty()) {
206 if (info
.HasErrorsIgnoringInvalidHyphen()) {
208 rv
= NS_ERROR_MALFORMED_URI
;
212 if (info
.HasErrors()) {
213 rv
= NS_ERROR_MALFORMED_URI
;
220 NS_IMETHODIMP
nsIDNService::ConvertUTF8toACE(const nsACString
& input
,
222 return UTF8toACE(input
, ace
, eStringPrepForDNS
);
225 nsresult
nsIDNService::UTF8toACE(const nsACString
& input
, nsACString
& ace
,
226 stringPrepFlag flag
) {
228 NS_ConvertUTF8toUTF16
ustr(input
);
230 // map ideographic period to ASCII period etc.
231 normalizeFullStops(ustr
);
233 uint32_t len
, offset
;
236 nsAutoCString encodedBuf
;
238 nsAString::const_iterator start
, end
;
239 ustr
.BeginReading(start
);
240 ustr
.EndReading(end
);
243 // encode nodes if non ASCII
244 while (start
!= end
) {
246 if (*start
++ == (char16_t
)'.') {
247 rv
= stringPrepAndACE(Substring(ustr
, offset
, len
- 1), encodedBuf
, flag
);
248 NS_ENSURE_SUCCESS(rv
, rv
);
250 ace
.Append(encodedBuf
);
257 // encode the last node if non ASCII
259 rv
= stringPrepAndACE(Substring(ustr
, offset
, len
), encodedBuf
, flag
);
260 NS_ENSURE_SUCCESS(rv
, rv
);
262 ace
.Append(encodedBuf
);
268 NS_IMETHODIMP
nsIDNService::ConvertACEtoUTF8(const nsACString
& input
,
269 nsACString
& _retval
) {
270 return ACEtoUTF8(input
, _retval
, eStringPrepForDNS
);
273 nsresult
nsIDNService::ACEtoUTF8(const nsACString
& input
, nsACString
& _retval
,
274 stringPrepFlag flag
) {
275 // RFC 3490 - 4.2 ToUnicode
276 // ToUnicode never fails. If any step fails, then the original input
277 // sequence is returned immediately in that step.
279 // Note that this refers to the decoding of a single label.
280 // ACEtoUTF8 may be called with a sequence of labels separated by dots;
281 // this test applies individually to each label.
283 uint32_t len
= 0, offset
= 0;
284 nsAutoCString decodedBuf
;
286 nsACString::const_iterator start
, end
;
287 input
.BeginReading(start
);
288 input
.EndReading(end
);
291 if (input
.IsEmpty()) {
296 nsCString::const_iterator it
= end
, tldEnd
= end
;
298 if (it
!= start
&& *it
== (char16_t
)'.') {
299 // This is an FQDN (ends in .)
300 // Skip this dot to extract the TLD
304 // Find last . and compute TLD
305 while (it
!= start
) {
306 if (*it
== (char16_t
)'.') {
308 tld
.Assign(Substring(it
, tldEnd
));
314 // loop and decode nodes
315 while (start
!= end
) {
317 if (*start
++ == '.') {
318 nsDependentCSubstring
origLabel(input
, offset
, len
- 1);
319 if (NS_FAILED(decodeACE(origLabel
, decodedBuf
, flag
, tld
))) {
320 // If decoding failed, use the original input sequence
322 _retval
.Append(origLabel
);
324 _retval
.Append(decodedBuf
);
332 // decode the last node
334 nsDependentCSubstring
origLabel(input
, offset
, len
);
335 if (NS_FAILED(decodeACE(origLabel
, decodedBuf
, flag
, tld
))) {
336 _retval
.Append(origLabel
);
338 _retval
.Append(decodedBuf
);
345 NS_IMETHODIMP
nsIDNService::IsACE(const nsACString
& input
, bool* _retval
) {
346 // look for the ACE prefix in the input string. it may occur
347 // at the beginning of any segment in the domain name. for
348 // example: "www.xn--ENCODED.com"
350 if (!IsAscii(input
)) {
355 auto stringContains
= [](const nsACString
& haystack
,
356 const nsACString
& needle
) {
357 return std::search(haystack
.BeginReading(), haystack
.EndReading(),
358 needle
.BeginReading(), needle
.EndReading(),
359 [](unsigned char ch1
, unsigned char ch2
) {
360 return tolower(ch1
) == tolower(ch2
);
361 }) != haystack
.EndReading();
365 StringBeginsWith(input
, "xn--"_ns
, nsCaseInsensitiveCStringComparator
) ||
366 (!input
.IsEmpty() && input
[0] != '.' &&
367 stringContains(input
, ".xn--"_ns
));
371 nsresult
nsIDNService::Normalize(const nsACString
& input
, nsACString
& output
) {
372 // protect against bogus input
373 NS_ENSURE_TRUE(IsUtf8(input
), NS_ERROR_UNEXPECTED
);
375 NS_ConvertUTF8toUTF16
inUTF16(input
);
376 normalizeFullStops(inUTF16
);
378 // pass the domain name to stringprep label by label
379 nsAutoString outUTF16
, outLabel
;
381 uint32_t len
= 0, offset
= 0;
383 nsAString::const_iterator start
, end
;
384 inUTF16
.BeginReading(start
);
385 inUTF16
.EndReading(end
);
387 while (start
!= end
) {
389 if (*start
++ == char16_t('.')) {
390 rv
= stringPrep(Substring(inUTF16
, offset
, len
- 1), outLabel
,
391 eStringPrepIgnoreErrors
);
392 NS_ENSURE_SUCCESS(rv
, rv
);
394 outUTF16
.Append(outLabel
);
395 outUTF16
.Append(char16_t('.'));
401 rv
= stringPrep(Substring(inUTF16
, offset
, len
), outLabel
,
402 eStringPrepIgnoreErrors
);
403 NS_ENSURE_SUCCESS(rv
, rv
);
405 outUTF16
.Append(outLabel
);
408 CopyUTF16toUTF8(outUTF16
, output
);
412 NS_IMETHODIMP
nsIDNService::ConvertToDisplayIDN(const nsACString
& input
,
414 nsACString
& _retval
) {
415 // If host is ACE, then convert to UTF-8 if the host is in the IDN whitelist.
416 // Else, if host is already UTF-8, then make sure it is normalized per IDN.
420 // Even if the hostname is not ASCII, individual labels may still be ACE, so
421 // test IsACE before testing IsASCII
423 IsACE(input
, &isACE
);
425 if (IsAscii(input
)) {
426 // first, canonicalize the host to lowercase, for whitelist lookup
428 ToLowerCase(_retval
);
430 if (isACE
&& !StaticPrefs::network_IDN_show_punycode()) {
431 // ACEtoUTF8() can't fail, but might return the original ACE string
432 nsAutoCString
temp(_retval
);
433 // Convert from ACE to UTF8 only those labels which are considered safe
435 ACEtoUTF8(temp
, _retval
, eStringPrepForUI
);
436 *_isASCII
= IsAscii(_retval
);
441 // We have to normalize the hostname before testing against the domain
442 // whitelist (see bug 315411), and to ensure the entire string gets
445 // Normalization and the tests for safe display below, assume that the
446 // input is Unicode, so first convert any ACE labels to UTF8
449 ACEtoUTF8(input
, temp
, eStringPrepIgnoreErrors
);
450 rv
= Normalize(temp
, _retval
);
452 rv
= Normalize(input
, _retval
);
458 if (StaticPrefs::network_IDN_show_punycode() &&
459 NS_SUCCEEDED(UTF8toACE(_retval
, _retval
, eStringPrepIgnoreErrors
))) {
464 // normalization could result in an ASCII-only hostname. alternatively, if
465 // the host is converted to ACE by the normalizer, then the host may contain
466 // unsafe characters, so leave it ACE encoded. see bug 283016, bug 301694,
468 *_isASCII
= IsAscii(_retval
);
470 // UTF8toACE with eStringPrepForUI may return a domain name where
471 // some labels are in UTF-8 and some are in ACE, depending on
472 // whether they are considered safe for display
473 rv
= UTF8toACE(_retval
, _retval
, eStringPrepForUI
);
474 *_isASCII
= IsAscii(_retval
);
480 } // Will generate a mutex still-held warning
482 //-----------------------------------------------------------------------------
484 static nsresult
utf16ToUcs4(const nsAString
& in
, uint32_t* out
,
485 uint32_t outBufLen
, uint32_t* outLen
) {
487 nsAString::const_iterator start
, end
;
488 in
.BeginReading(start
);
491 while (start
!= end
) {
496 if (start
!= end
&& NS_IS_SURROGATE_PAIR(curChar
, *start
)) {
497 out
[i
] = SURROGATE_TO_UCS4(curChar
, *start
);
504 if (i
>= outBufLen
) {
505 return NS_ERROR_MALFORMED_URI
;
508 out
[i
] = (uint32_t)'\0';
513 static nsresult
punycode(const nsAString
& in
, nsACString
& out
) {
514 uint32_t ucs4Buf
[kMaxULabelSize
+ 1];
515 uint32_t ucs4Len
= 0u;
516 nsresult rv
= utf16ToUcs4(in
, ucs4Buf
, kMaxULabelSize
, &ucs4Len
);
517 NS_ENSURE_SUCCESS(rv
, rv
);
519 // need maximum 20 bits to encode 16 bit Unicode character
520 // (include null terminator)
521 const uint32_t kEncodedBufSize
= kMaxULabelSize
* 20 / 8 + 1 + 1;
522 char encodedBuf
[kEncodedBufSize
];
523 punycode_uint encodedLength
= kEncodedBufSize
;
525 enum punycode_status status
=
526 punycode_encode(ucs4Len
, ucs4Buf
, nullptr, &encodedLength
, encodedBuf
);
528 if (punycode_success
!= status
|| encodedLength
>= kEncodedBufSize
) {
529 return NS_ERROR_MALFORMED_URI
;
532 encodedBuf
[encodedLength
] = '\0';
533 out
.Assign(nsDependentCString(kACEPrefix
) + nsDependentCString(encodedBuf
));
540 // 1) Map -- For each character in the input, check if it has a mapping
541 // and, if so, replace it with its mapping. This is described in section 3.
543 // 2) Normalize -- Possibly normalize the result of step 1 using Unicode
544 // normalization. This is described in section 4.
546 // 3) Prohibit -- Check for any characters that are not allowed in the
547 // output. If any are found, return an error. This is described in section
550 // 4) Check bidi -- Possibly check for right-to-left characters, and if any
551 // are found, make sure that the whole string satisfies the requirements
552 // for bidirectional strings. If the string does not satisfy the requirements
553 // for bidirectional strings, return an error. This is described in section 6.
555 // 5) Check unassigned code points -- If allowUnassigned is false, check for
556 // any unassigned Unicode points and if any are found return an error.
557 // This is described in section 7.
559 nsresult
nsIDNService::stringPrep(const nsAString
& in
, nsAString
& out
,
560 stringPrepFlag flag
) {
561 return IDNA2008StringPrep(in
, out
, flag
);
564 nsresult
nsIDNService::stringPrepAndACE(const nsAString
& in
, nsACString
& out
,
565 stringPrepFlag flag
) {
571 LossyCopyUTF16toASCII(in
, out
);
572 // If label begins with xn-- we still want to check its validity
573 if (!StringBeginsWith(in
, u
"xn--"_ns
, nsCaseInsensitiveStringComparator
)) {
578 nsAutoString strPrep
;
579 rv
= stringPrep(in
, strPrep
, flag
);
580 if (flag
== eStringPrepForDNS
) {
581 NS_ENSURE_SUCCESS(rv
, rv
);
584 if (IsAscii(strPrep
)) {
585 LossyCopyUTF16toASCII(strPrep
, out
);
589 if (flag
== eStringPrepForUI
&& NS_SUCCEEDED(rv
) && isLabelSafe(in
, u
""_ns
)) {
590 CopyUTF16toUTF8(strPrep
, out
);
594 return punycode(strPrep
, out
);
598 // 1) Whenever dots are used as label separators, the following characters
599 // MUST be recognized as dots: U+002E (full stop), U+3002 (ideographic full
600 // stop), U+FF0E (fullwidth full stop), U+FF61 (halfwidth ideographic full
603 void nsIDNService::normalizeFullStops(nsAString
& s
) {
604 nsAString::const_iterator start
, end
;
605 s
.BeginReading(start
);
609 while (start
!= end
) {
614 s
.ReplaceLiteral(index
, 1, u
".");
624 nsresult
nsIDNService::decodeACE(const nsACString
& in
, nsACString
& out
,
625 stringPrepFlag flag
, const nsACString
& aTLD
) {
634 nsresult result
= IDNA2008ToUnicode(in
, utf16
);
635 NS_ENSURE_SUCCESS(result
, result
);
637 NS_ConvertUTF8toUTF16
tld(aTLD
);
639 if (flag
!= eStringPrepForUI
|| isLabelSafe(utf16
, tld
)) {
640 CopyUTF16toUTF8(utf16
, out
);
646 // Validation: encode back to ACE and compare the strings
648 nsresult rv
= UTF8toACE(out
, ace
, flag
);
649 NS_ENSURE_SUCCESS(rv
, rv
);
651 if (flag
== eStringPrepForDNS
&&
652 !ace
.Equals(in
, nsCaseInsensitiveCStringComparator
)) {
653 return NS_ERROR_MALFORMED_URI
;
659 namespace mozilla::net
{
661 enum ScriptCombo
: int32_t {
672 JPAN
= 9, // Latin + Han + Hiragana + Katakana
673 CHNA
= 10, // Latin + Han + Bopomofo
674 KORE
= 11, // Latin + Han + Hangul
675 HNLT
= 12, // Latin + Han (could be any of the above combinations)
679 } // namespace mozilla::net
681 bool nsIDNService::isLabelSafe(const nsAString
& label
, const nsAString
& tld
) {
682 restrictionProfile profile
{eASCIIOnlyProfile
};
684 AutoReadLock
lock(mLock
);
686 if (!isOnlySafeChars(PromiseFlatString(label
), mIDNBlocklist
)) {
690 // We should never get here if the label is ASCII
691 NS_ASSERTION(!IsAscii(label
), "ASCII label in IDN checking");
692 if (mRestrictionProfile
== eASCIIOnlyProfile
) {
695 profile
= mRestrictionProfile
;
698 nsAString::const_iterator current
, end
;
699 label
.BeginReading(current
);
700 label
.EndReading(end
);
702 Script lastScript
= Script::INVALID
;
703 uint32_t previousChar
= 0;
704 uint32_t baseChar
= 0; // last non-diacritic seen (base char for marks)
705 uint32_t savedNumberingSystem
= 0;
706 // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
708 HanVariantType savedHanVariant
= HVT_NotHan
;
711 ScriptCombo savedScript
= ScriptCombo::UNSET
;
713 while (current
!= end
) {
714 uint32_t ch
= *current
++;
716 if (current
!= end
&& NS_IS_SURROGATE_PAIR(ch
, *current
)) {
717 ch
= SURROGATE_TO_UCS4(ch
, *current
++);
720 IdentifierType idType
= GetIdentifierType(ch
);
721 if (idType
== IDTYPE_RESTRICTED
) {
724 MOZ_ASSERT(idType
== IDTYPE_ALLOWED
);
726 // Check for mixed script
727 Script script
= UnicodeProperties::GetScriptCode(ch
);
728 if (script
!= Script::COMMON
&& script
!= Script::INHERITED
&&
729 script
!= lastScript
) {
730 if (illegalScriptCombo(profile
, script
, savedScript
)) {
735 // U+30FC should be preceded by a Hiragana/Katakana.
736 if (ch
== 0x30fc && lastScript
!= Script::HIRAGANA
&&
737 lastScript
!= Script::KATAKANA
) {
741 Script nextScript
= Script::INVALID
;
742 if (current
!= end
) {
743 nextScript
= UnicodeProperties::GetScriptCode(*current
);
747 (lastScript
== Script::LATIN
|| nextScript
== Script::LATIN
)) {
752 (previousChar
== 'i' || previousChar
== 'j' || previousChar
== 'l')) {
756 // U+00B7 is only allowed on Catalan domains between two l's.
757 if (ch
== 0xB7 && (!tld
.EqualsLiteral("cat") || previousChar
!= 'l' ||
758 current
== end
|| *current
!= 'l')) {
762 // Disallow Icelandic confusables for domains outside Icelandic and Faroese
764 if ((ch
== 0xFE || ch
== 0xF0) && !tld
.EqualsLiteral("is") &&
765 !tld
.EqualsLiteral("fo")) {
769 // Block single/double-quote-like characters.
770 if (ch
== 0x2BB || ch
== 0x2BC) {
774 // Check for mixed numbering systems
775 auto genCat
= GetGeneralCategory(ch
);
776 if (genCat
== HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER
) {
777 uint32_t zeroCharacter
=
778 ch
- mozilla::intl::UnicodeProperties::GetNumericValue(ch
);
779 if (savedNumberingSystem
== 0) {
780 // If we encounter a decimal number, save the zero character from that
782 savedNumberingSystem
= zeroCharacter
;
783 } else if (zeroCharacter
!= savedNumberingSystem
) {
788 if (genCat
== HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK
) {
789 // Check for consecutive non-spacing marks.
790 if (previousChar
!= 0 && previousChar
== ch
) {
793 // Check for marks whose expected script doesn't match the base script.
794 if (lastScript
!= Script::INVALID
) {
795 UnicodeProperties::ScriptExtensionVector scripts
;
796 auto extResult
= UnicodeProperties::GetExtensions(ch
, scripts
);
797 MOZ_ASSERT(extResult
.isOk());
798 if (extResult
.isErr()) {
802 int nScripts
= AssertedCast
<int>(scripts
.length());
804 // nScripts will always be >= 1, because even for undefined characters
805 // it will return Script::INVALID.
806 // If the mark just has script=COMMON or INHERITED, we can't check any
807 // more carefully, but if it has specific scriptExtension codes, then
808 // assume those are the only valid scripts to use it with.
809 if (nScripts
> 1 || (Script(scripts
[0]) != Script::COMMON
&&
810 Script(scripts
[0]) != Script::INHERITED
)) {
811 while (--nScripts
>= 0) {
812 if (Script(scripts
[nScripts
]) == lastScript
) {
816 if (nScripts
== -1) {
821 // Check for diacritics on dotless-i, which would be indistinguishable
822 // from normal accented letter i.
823 if (baseChar
== 0x0131 &&
824 ((ch
>= 0x0300 && ch
<= 0x0314) || ch
== 0x031a)) {
831 if (script
!= Script::COMMON
&& script
!= Script::INHERITED
) {
835 // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
838 // Check for both simplified-only and traditional-only Chinese characters
839 HanVariantType hanVariant
= GetHanVariant(ch
);
840 if (hanVariant
== HVT_SimplifiedOnly
|| hanVariant
== HVT_TraditionalOnly
) {
841 if (savedHanVariant
== HVT_NotHan
) {
842 savedHanVariant
= hanVariant
;
843 } else if (hanVariant
!= savedHanVariant
) {
854 // Scripts that we care about in illegalScriptCombo
855 static inline ScriptCombo
findScriptIndex(Script aScript
) {
857 case Script::BOPOMOFO
:
858 return ScriptCombo::BOPO
;
859 case Script::CYRILLIC
:
860 return ScriptCombo::CYRL
;
862 return ScriptCombo::GREK
;
864 return ScriptCombo::HANG
;
866 return ScriptCombo::HANI
;
867 case Script::HIRAGANA
:
868 return ScriptCombo::HIRA
;
869 case Script::KATAKANA
:
870 return ScriptCombo::KATA
;
872 return ScriptCombo::LATN
;
874 return ScriptCombo::OTHR
;
878 static const ScriptCombo scriptComboTable
[13][9] = {
879 /* thisScript: BOPO CYRL GREK HANG HANI HIRA KATA LATN OTHR
881 /* BOPO */ {BOPO
, FAIL
, FAIL
, FAIL
, CHNA
, FAIL
, FAIL
, CHNA
, FAIL
},
882 /* CYRL */ {FAIL
, CYRL
, FAIL
, FAIL
, FAIL
, FAIL
, FAIL
, FAIL
, FAIL
},
883 /* GREK */ {FAIL
, FAIL
, GREK
, FAIL
, FAIL
, FAIL
, FAIL
, FAIL
, FAIL
},
884 /* HANG */ {FAIL
, FAIL
, FAIL
, HANG
, KORE
, FAIL
, FAIL
, KORE
, FAIL
},
885 /* HANI */ {CHNA
, FAIL
, FAIL
, KORE
, HANI
, JPAN
, JPAN
, HNLT
, FAIL
},
886 /* HIRA */ {FAIL
, FAIL
, FAIL
, FAIL
, JPAN
, HIRA
, JPAN
, JPAN
, FAIL
},
887 /* KATA */ {FAIL
, FAIL
, FAIL
, FAIL
, JPAN
, JPAN
, KATA
, JPAN
, FAIL
},
888 /* LATN */ {CHNA
, FAIL
, FAIL
, KORE
, HNLT
, JPAN
, JPAN
, LATN
, OTHR
},
889 /* OTHR */ {FAIL
, FAIL
, FAIL
, FAIL
, FAIL
, FAIL
, FAIL
, OTHR
, FAIL
},
890 /* JPAN */ {FAIL
, FAIL
, FAIL
, FAIL
, JPAN
, JPAN
, JPAN
, JPAN
, FAIL
},
891 /* CHNA */ {CHNA
, FAIL
, FAIL
, FAIL
, CHNA
, FAIL
, FAIL
, CHNA
, FAIL
},
892 /* KORE */ {FAIL
, FAIL
, FAIL
, KORE
, KORE
, FAIL
, FAIL
, KORE
, FAIL
},
893 /* HNLT */ {CHNA
, FAIL
, FAIL
, KORE
, HNLT
, JPAN
, JPAN
, HNLT
, FAIL
}};
895 bool nsIDNService::illegalScriptCombo(restrictionProfile profile
, Script script
,
896 ScriptCombo
& savedScript
) {
897 if (savedScript
== ScriptCombo::UNSET
) {
898 savedScript
= findScriptIndex(script
);
902 savedScript
= scriptComboTable
[savedScript
][findScriptIndex(script
)];
904 * Special case combinations that depend on which profile is in use
905 * In the Highly Restrictive profile Latin is not allowed with any
908 * In the Moderately Restrictive profile Latin mixed with any other
909 * single script is allowed.
911 return ((savedScript
== OTHR
&& profile
== eHighlyRestrictiveProfile
) ||
912 savedScript
== FAIL
);