Bug 1807268 - Re-enable verifyShowClipboardSuggestionsToggleTest UI test r=jajohnson
[gecko.git] / netwerk / dns / nsIDNService.cpp
blob6a87c8ee068c296e4d75217e22a8bf2e7186d7d5
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "MainThreadUtils.h"
7 #include "mozilla/ClearOnShutdown.h"
8 #include "mozilla/Preferences.h"
9 #include "nsIDNService.h"
10 #include "nsReadableUtils.h"
11 #include "nsCRT.h"
12 #include "nsServiceManagerUtils.h"
13 #include "nsString.h"
14 #include "nsStringFwd.h"
15 #include "nsUnicharUtils.h"
16 #include "nsUnicodeProperties.h"
17 #include "harfbuzz/hb.h"
18 #include "punycode.h"
19 #include "mozilla/ArrayUtils.h"
20 #include "mozilla/Casting.h"
21 #include "mozilla/StaticPrefs_network.h"
22 #include "mozilla/TextUtils.h"
23 #include "mozilla/Utf8.h"
24 #include "mozilla/intl/FormatBuffer.h"
25 #include "mozilla/intl/UnicodeProperties.h"
26 #include "mozilla/intl/UnicodeScriptCodes.h"
28 #include "ICUUtils.h"
30 using namespace mozilla;
31 using namespace mozilla::intl;
32 using namespace mozilla::unicode;
33 using namespace mozilla::net;
34 using mozilla::Preferences;
36 // Currently we use the non-transitional processing option -- see
37 // http://unicode.org/reports/tr46/
38 // To switch to transitional processing, change the value of this flag
39 // and kTransitionalProcessing in netwerk/test/unit/test_idna2008.js to true
40 // (revert bug 1218179).
41 const intl::IDNA::ProcessingType kIDNA2008_DefaultProcessingType =
42 intl::IDNA::ProcessingType::NonTransitional;
44 //-----------------------------------------------------------------------------
45 // According to RFC 1034 - 3.1. Name space specifications and terminology
46 // the maximum label size would be 63. However, this is enforced at the DNS
47 // level and none of the other browsers seem to not enforce the VerifyDnsLength
48 // check in https://unicode.org/reports/tr46/#ToASCII
49 // Instead, we choose a rather arbitrary but larger size.
50 static const uint32_t kMaxULabelSize = 256;
51 // RFC 3490 - 5. ACE prefix
52 static const char kACEPrefix[] = "xn--";
54 //-----------------------------------------------------------------------------
56 #define NS_NET_PREF_EXTRAALLOWED "network.IDN.extra_allowed_chars"
57 #define NS_NET_PREF_EXTRABLOCKED "network.IDN.extra_blocked_chars"
58 #define NS_NET_PREF_IDNRESTRICTION "network.IDN.restriction_profile"
60 static inline bool isOnlySafeChars(const nsString& in,
61 const nsTArray<BlocklistRange>& aBlocklist) {
62 if (aBlocklist.IsEmpty()) {
63 return true;
65 const char16_t* cur = in.BeginReading();
66 const char16_t* end = in.EndReading();
68 for (; cur < end; ++cur) {
69 if (CharInBlocklist(*cur, aBlocklist)) {
70 return false;
73 return true;
76 //-----------------------------------------------------------------------------
77 // nsIDNService
78 //-----------------------------------------------------------------------------
80 /* Implementation file */
81 NS_IMPL_ISUPPORTS(nsIDNService, nsIIDNService)
83 static const char* gCallbackPrefs[] = {
84 NS_NET_PREF_EXTRAALLOWED,
85 NS_NET_PREF_EXTRABLOCKED,
86 NS_NET_PREF_IDNRESTRICTION,
87 nullptr,
90 nsresult nsIDNService::Init() {
91 MOZ_ASSERT(NS_IsMainThread());
92 // Take a strong reference for our listener with the preferences service,
93 // which we will release on shutdown.
94 // It's OK if we remove the observer a bit early, as it just means we won't
95 // respond to `network.IDN.extra_{allowed,blocked}_chars` and
96 // `network.IDN.restriction_profile` pref changes during shutdown.
97 Preferences::RegisterPrefixCallbacks(PrefChanged, gCallbackPrefs, this);
98 RunOnShutdown(
99 [self = RefPtr{this}]() mutable {
100 Preferences::UnregisterPrefixCallbacks(PrefChanged, gCallbackPrefs,
101 self.get());
102 self = nullptr;
104 ShutdownPhase::XPCOMWillShutdown);
105 prefsChanged(nullptr);
107 return NS_OK;
110 void nsIDNService::prefsChanged(const char* pref) {
111 MOZ_ASSERT(NS_IsMainThread());
112 AutoWriteLock lock(mLock);
114 if (!pref || nsLiteralCString(NS_NET_PREF_EXTRAALLOWED).Equals(pref) ||
115 nsLiteralCString(NS_NET_PREF_EXTRABLOCKED).Equals(pref)) {
116 InitializeBlocklist(mIDNBlocklist);
118 if (!pref || nsLiteralCString(NS_NET_PREF_IDNRESTRICTION).Equals(pref)) {
119 nsAutoCString profile;
120 if (NS_FAILED(
121 Preferences::GetCString(NS_NET_PREF_IDNRESTRICTION, profile))) {
122 profile.Truncate();
124 if (profile.EqualsLiteral("moderate")) {
125 mRestrictionProfile = eModeratelyRestrictiveProfile;
126 } else if (profile.EqualsLiteral("high")) {
127 mRestrictionProfile = eHighlyRestrictiveProfile;
128 } else {
129 mRestrictionProfile = eASCIIOnlyProfile;
134 nsIDNService::nsIDNService() {
135 MOZ_ASSERT(NS_IsMainThread());
137 auto createResult =
138 mozilla::intl::IDNA::TryCreate(kIDNA2008_DefaultProcessingType);
139 MOZ_ASSERT(createResult.isOk());
140 mIDNA = createResult.unwrap();
143 nsIDNService::~nsIDNService() = default;
145 nsresult nsIDNService::IDNA2008ToUnicode(const nsACString& input,
146 nsAString& output) {
147 NS_ConvertUTF8toUTF16 inputStr(input);
149 Span<const char16_t> inputSpan{inputStr};
150 intl::nsTStringToBufferAdapter buffer(output);
151 auto result = mIDNA->LabelToUnicode(inputSpan, buffer);
153 nsresult rv = NS_OK;
154 if (result.isErr()) {
155 rv = ICUUtils::ICUErrorToNsResult(result.unwrapErr());
156 if (rv == NS_ERROR_FAILURE) {
157 rv = NS_ERROR_MALFORMED_URI;
160 NS_ENSURE_SUCCESS(rv, rv);
162 intl::IDNA::Info info = result.unwrap();
163 if (info.HasErrors()) {
164 rv = NS_ERROR_MALFORMED_URI;
167 return rv;
170 nsresult nsIDNService::IDNA2008StringPrep(const nsAString& input,
171 nsAString& output,
172 stringPrepFlag flag) {
173 Span<const char16_t> inputSpan{input};
174 intl::nsTStringToBufferAdapter buffer(output);
175 auto result = mIDNA->LabelToUnicode(inputSpan, buffer);
177 nsresult rv = NS_OK;
178 if (result.isErr()) {
179 rv = ICUUtils::ICUErrorToNsResult(result.unwrapErr());
180 if (rv == NS_ERROR_FAILURE) {
181 rv = NS_ERROR_MALFORMED_URI;
184 NS_ENSURE_SUCCESS(rv, rv);
186 intl::IDNA::Info info = result.unwrap();
188 // Output the result of nameToUnicode even if there were errors.
189 // But in the case of invalid punycode, the uidna_labelToUnicode result
190 // appears to get an appended U+FFFD REPLACEMENT CHARACTER, which will
191 // confuse our subsequent processing, so we drop that.
192 // (https://bugzilla.mozilla.org/show_bug.cgi?id=1399540#c9)
193 if ((info.HasInvalidPunycode() || info.HasInvalidAceLabel()) &&
194 !output.IsEmpty() && output.Last() == 0xfffd) {
195 output.Truncate(output.Length() - 1);
198 if (flag == eStringPrepIgnoreErrors) {
199 return NS_OK;
202 if (flag == eStringPrepForDNS) {
203 // We ignore errors if the result is empty, or if the errors were just
204 // invalid hyphens (not punycode-decoding failure or invalid chars).
205 if (!output.IsEmpty()) {
206 if (info.HasErrorsIgnoringInvalidHyphen()) {
207 output.Truncate();
208 rv = NS_ERROR_MALFORMED_URI;
211 } else {
212 if (info.HasErrors()) {
213 rv = NS_ERROR_MALFORMED_URI;
217 return rv;
220 NS_IMETHODIMP nsIDNService::ConvertUTF8toACE(const nsACString& input,
221 nsACString& ace) {
222 return UTF8toACE(input, ace, eStringPrepForDNS);
225 nsresult nsIDNService::UTF8toACE(const nsACString& input, nsACString& ace,
226 stringPrepFlag flag) {
227 nsresult rv;
228 NS_ConvertUTF8toUTF16 ustr(input);
230 // map ideographic period to ASCII period etc.
231 normalizeFullStops(ustr);
233 uint32_t len, offset;
234 len = 0;
235 offset = 0;
236 nsAutoCString encodedBuf;
238 nsAString::const_iterator start, end;
239 ustr.BeginReading(start);
240 ustr.EndReading(end);
241 ace.Truncate();
243 // encode nodes if non ASCII
244 while (start != end) {
245 len++;
246 if (*start++ == (char16_t)'.') {
247 rv = stringPrepAndACE(Substring(ustr, offset, len - 1), encodedBuf, flag);
248 NS_ENSURE_SUCCESS(rv, rv);
250 ace.Append(encodedBuf);
251 ace.Append('.');
252 offset += len;
253 len = 0;
257 // encode the last node if non ASCII
258 if (len) {
259 rv = stringPrepAndACE(Substring(ustr, offset, len), encodedBuf, flag);
260 NS_ENSURE_SUCCESS(rv, rv);
262 ace.Append(encodedBuf);
265 return NS_OK;
268 NS_IMETHODIMP nsIDNService::ConvertACEtoUTF8(const nsACString& input,
269 nsACString& _retval) {
270 return ACEtoUTF8(input, _retval, eStringPrepForDNS);
273 nsresult nsIDNService::ACEtoUTF8(const nsACString& input, nsACString& _retval,
274 stringPrepFlag flag) {
275 // RFC 3490 - 4.2 ToUnicode
276 // ToUnicode never fails. If any step fails, then the original input
277 // sequence is returned immediately in that step.
279 // Note that this refers to the decoding of a single label.
280 // ACEtoUTF8 may be called with a sequence of labels separated by dots;
281 // this test applies individually to each label.
283 uint32_t len = 0, offset = 0;
284 nsAutoCString decodedBuf;
286 nsACString::const_iterator start, end;
287 input.BeginReading(start);
288 input.EndReading(end);
289 _retval.Truncate();
291 if (input.IsEmpty()) {
292 return NS_OK;
295 nsAutoCString tld;
296 nsCString::const_iterator it = end, tldEnd = end;
297 --it;
298 if (it != start && *it == (char16_t)'.') {
299 // This is an FQDN (ends in .)
300 // Skip this dot to extract the TLD
301 tldEnd = it;
302 --it;
304 // Find last . and compute TLD
305 while (it != start) {
306 if (*it == (char16_t)'.') {
307 ++it;
308 tld.Assign(Substring(it, tldEnd));
309 break;
311 --it;
314 // loop and decode nodes
315 while (start != end) {
316 len++;
317 if (*start++ == '.') {
318 nsDependentCSubstring origLabel(input, offset, len - 1);
319 if (NS_FAILED(decodeACE(origLabel, decodedBuf, flag, tld))) {
320 // If decoding failed, use the original input sequence
321 // for this label.
322 _retval.Append(origLabel);
323 } else {
324 _retval.Append(decodedBuf);
327 _retval.Append('.');
328 offset += len;
329 len = 0;
332 // decode the last node
333 if (len) {
334 nsDependentCSubstring origLabel(input, offset, len);
335 if (NS_FAILED(decodeACE(origLabel, decodedBuf, flag, tld))) {
336 _retval.Append(origLabel);
337 } else {
338 _retval.Append(decodedBuf);
342 return NS_OK;
345 NS_IMETHODIMP nsIDNService::IsACE(const nsACString& input, bool* _retval) {
346 // look for the ACE prefix in the input string. it may occur
347 // at the beginning of any segment in the domain name. for
348 // example: "www.xn--ENCODED.com"
350 if (!IsAscii(input)) {
351 *_retval = false;
352 return NS_OK;
355 auto stringContains = [](const nsACString& haystack,
356 const nsACString& needle) {
357 return std::search(haystack.BeginReading(), haystack.EndReading(),
358 needle.BeginReading(), needle.EndReading(),
359 [](unsigned char ch1, unsigned char ch2) {
360 return tolower(ch1) == tolower(ch2);
361 }) != haystack.EndReading();
364 *_retval =
365 StringBeginsWith(input, "xn--"_ns, nsCaseInsensitiveCStringComparator) ||
366 (!input.IsEmpty() && input[0] != '.' &&
367 stringContains(input, ".xn--"_ns));
368 return NS_OK;
371 nsresult nsIDNService::Normalize(const nsACString& input, nsACString& output) {
372 // protect against bogus input
373 NS_ENSURE_TRUE(IsUtf8(input), NS_ERROR_UNEXPECTED);
375 NS_ConvertUTF8toUTF16 inUTF16(input);
376 normalizeFullStops(inUTF16);
378 // pass the domain name to stringprep label by label
379 nsAutoString outUTF16, outLabel;
381 uint32_t len = 0, offset = 0;
382 nsresult rv;
383 nsAString::const_iterator start, end;
384 inUTF16.BeginReading(start);
385 inUTF16.EndReading(end);
387 while (start != end) {
388 len++;
389 if (*start++ == char16_t('.')) {
390 rv = stringPrep(Substring(inUTF16, offset, len - 1), outLabel,
391 eStringPrepIgnoreErrors);
392 NS_ENSURE_SUCCESS(rv, rv);
394 outUTF16.Append(outLabel);
395 outUTF16.Append(char16_t('.'));
396 offset += len;
397 len = 0;
400 if (len) {
401 rv = stringPrep(Substring(inUTF16, offset, len), outLabel,
402 eStringPrepIgnoreErrors);
403 NS_ENSURE_SUCCESS(rv, rv);
405 outUTF16.Append(outLabel);
408 CopyUTF16toUTF8(outUTF16, output);
409 return NS_OK;
412 NS_IMETHODIMP nsIDNService::ConvertToDisplayIDN(const nsACString& input,
413 bool* _isASCII,
414 nsACString& _retval) {
415 // If host is ACE, then convert to UTF-8 if the host is in the IDN whitelist.
416 // Else, if host is already UTF-8, then make sure it is normalized per IDN.
418 nsresult rv = NS_OK;
420 // Even if the hostname is not ASCII, individual labels may still be ACE, so
421 // test IsACE before testing IsASCII
422 bool isACE;
423 IsACE(input, &isACE);
425 if (IsAscii(input)) {
426 // first, canonicalize the host to lowercase, for whitelist lookup
427 _retval = input;
428 ToLowerCase(_retval);
430 if (isACE && !StaticPrefs::network_IDN_show_punycode()) {
431 // ACEtoUTF8() can't fail, but might return the original ACE string
432 nsAutoCString temp(_retval);
433 // Convert from ACE to UTF8 only those labels which are considered safe
434 // for display
435 ACEtoUTF8(temp, _retval, eStringPrepForUI);
436 *_isASCII = IsAscii(_retval);
437 } else {
438 *_isASCII = true;
440 } else {
441 // We have to normalize the hostname before testing against the domain
442 // whitelist (see bug 315411), and to ensure the entire string gets
443 // normalized.
445 // Normalization and the tests for safe display below, assume that the
446 // input is Unicode, so first convert any ACE labels to UTF8
447 if (isACE) {
448 nsAutoCString temp;
449 ACEtoUTF8(input, temp, eStringPrepIgnoreErrors);
450 rv = Normalize(temp, _retval);
451 } else {
452 rv = Normalize(input, _retval);
454 if (NS_FAILED(rv)) {
455 return rv;
458 if (StaticPrefs::network_IDN_show_punycode() &&
459 NS_SUCCEEDED(UTF8toACE(_retval, _retval, eStringPrepIgnoreErrors))) {
460 *_isASCII = true;
461 return NS_OK;
464 // normalization could result in an ASCII-only hostname. alternatively, if
465 // the host is converted to ACE by the normalizer, then the host may contain
466 // unsafe characters, so leave it ACE encoded. see bug 283016, bug 301694,
467 // and bug 309311.
468 *_isASCII = IsAscii(_retval);
469 if (!*_isASCII) {
470 // UTF8toACE with eStringPrepForUI may return a domain name where
471 // some labels are in UTF-8 and some are in ACE, depending on
472 // whether they are considered safe for display
473 rv = UTF8toACE(_retval, _retval, eStringPrepForUI);
474 *_isASCII = IsAscii(_retval);
475 return rv;
479 return NS_OK;
480 } // Will generate a mutex still-held warning
482 //-----------------------------------------------------------------------------
484 static nsresult utf16ToUcs4(const nsAString& in, uint32_t* out,
485 uint32_t outBufLen, uint32_t* outLen) {
486 uint32_t i = 0;
487 nsAString::const_iterator start, end;
488 in.BeginReading(start);
489 in.EndReading(end);
491 while (start != end) {
492 char16_t curChar;
494 curChar = *start++;
496 if (start != end && NS_IS_SURROGATE_PAIR(curChar, *start)) {
497 out[i] = SURROGATE_TO_UCS4(curChar, *start);
498 ++start;
499 } else {
500 out[i] = curChar;
503 i++;
504 if (i >= outBufLen) {
505 return NS_ERROR_MALFORMED_URI;
508 out[i] = (uint32_t)'\0';
509 *outLen = i;
510 return NS_OK;
513 static nsresult punycode(const nsAString& in, nsACString& out) {
514 uint32_t ucs4Buf[kMaxULabelSize + 1];
515 uint32_t ucs4Len = 0u;
516 nsresult rv = utf16ToUcs4(in, ucs4Buf, kMaxULabelSize, &ucs4Len);
517 NS_ENSURE_SUCCESS(rv, rv);
519 // need maximum 20 bits to encode 16 bit Unicode character
520 // (include null terminator)
521 const uint32_t kEncodedBufSize = kMaxULabelSize * 20 / 8 + 1 + 1;
522 char encodedBuf[kEncodedBufSize];
523 punycode_uint encodedLength = kEncodedBufSize;
525 enum punycode_status status =
526 punycode_encode(ucs4Len, ucs4Buf, nullptr, &encodedLength, encodedBuf);
528 if (punycode_success != status || encodedLength >= kEncodedBufSize) {
529 return NS_ERROR_MALFORMED_URI;
532 encodedBuf[encodedLength] = '\0';
533 out.Assign(nsDependentCString(kACEPrefix) + nsDependentCString(encodedBuf));
535 return rv;
538 // RFC 3454
540 // 1) Map -- For each character in the input, check if it has a mapping
541 // and, if so, replace it with its mapping. This is described in section 3.
543 // 2) Normalize -- Possibly normalize the result of step 1 using Unicode
544 // normalization. This is described in section 4.
546 // 3) Prohibit -- Check for any characters that are not allowed in the
547 // output. If any are found, return an error. This is described in section
548 // 5.
550 // 4) Check bidi -- Possibly check for right-to-left characters, and if any
551 // are found, make sure that the whole string satisfies the requirements
552 // for bidirectional strings. If the string does not satisfy the requirements
553 // for bidirectional strings, return an error. This is described in section 6.
555 // 5) Check unassigned code points -- If allowUnassigned is false, check for
556 // any unassigned Unicode points and if any are found return an error.
557 // This is described in section 7.
559 nsresult nsIDNService::stringPrep(const nsAString& in, nsAString& out,
560 stringPrepFlag flag) {
561 return IDNA2008StringPrep(in, out, flag);
564 nsresult nsIDNService::stringPrepAndACE(const nsAString& in, nsACString& out,
565 stringPrepFlag flag) {
566 nsresult rv = NS_OK;
568 out.Truncate();
570 if (IsAscii(in)) {
571 LossyCopyUTF16toASCII(in, out);
572 // If label begins with xn-- we still want to check its validity
573 if (!StringBeginsWith(in, u"xn--"_ns, nsCaseInsensitiveStringComparator)) {
574 return NS_OK;
578 nsAutoString strPrep;
579 rv = stringPrep(in, strPrep, flag);
580 if (flag == eStringPrepForDNS) {
581 NS_ENSURE_SUCCESS(rv, rv);
584 if (IsAscii(strPrep)) {
585 LossyCopyUTF16toASCII(strPrep, out);
586 return NS_OK;
589 if (flag == eStringPrepForUI && NS_SUCCEEDED(rv) && isLabelSafe(in, u""_ns)) {
590 CopyUTF16toUTF8(strPrep, out);
591 return NS_OK;
594 return punycode(strPrep, out);
597 // RFC 3490
598 // 1) Whenever dots are used as label separators, the following characters
599 // MUST be recognized as dots: U+002E (full stop), U+3002 (ideographic full
600 // stop), U+FF0E (fullwidth full stop), U+FF61 (halfwidth ideographic full
601 // stop).
603 void nsIDNService::normalizeFullStops(nsAString& s) {
604 nsAString::const_iterator start, end;
605 s.BeginReading(start);
606 s.EndReading(end);
607 int32_t index = 0;
609 while (start != end) {
610 switch (*start) {
611 case 0x3002:
612 case 0xFF0E:
613 case 0xFF61:
614 s.ReplaceLiteral(index, 1, u".");
615 break;
616 default:
617 break;
619 start++;
620 index++;
624 nsresult nsIDNService::decodeACE(const nsACString& in, nsACString& out,
625 stringPrepFlag flag, const nsACString& aTLD) {
626 bool isAce;
627 IsACE(in, &isAce);
628 if (!isAce) {
629 out.Assign(in);
630 return NS_OK;
633 nsAutoString utf16;
634 nsresult result = IDNA2008ToUnicode(in, utf16);
635 NS_ENSURE_SUCCESS(result, result);
637 NS_ConvertUTF8toUTF16 tld(aTLD);
639 if (flag != eStringPrepForUI || isLabelSafe(utf16, tld)) {
640 CopyUTF16toUTF8(utf16, out);
641 } else {
642 out.Assign(in);
643 return NS_OK;
646 // Validation: encode back to ACE and compare the strings
647 nsAutoCString ace;
648 nsresult rv = UTF8toACE(out, ace, flag);
649 NS_ENSURE_SUCCESS(rv, rv);
651 if (flag == eStringPrepForDNS &&
652 !ace.Equals(in, nsCaseInsensitiveCStringComparator)) {
653 return NS_ERROR_MALFORMED_URI;
656 return NS_OK;
659 namespace mozilla::net {
661 enum ScriptCombo : int32_t {
662 UNSET = -1,
663 BOPO = 0,
664 CYRL = 1,
665 GREK = 2,
666 HANG = 3,
667 HANI = 4,
668 HIRA = 5,
669 KATA = 6,
670 LATN = 7,
671 OTHR = 8,
672 JPAN = 9, // Latin + Han + Hiragana + Katakana
673 CHNA = 10, // Latin + Han + Bopomofo
674 KORE = 11, // Latin + Han + Hangul
675 HNLT = 12, // Latin + Han (could be any of the above combinations)
676 FAIL = 13,
679 } // namespace mozilla::net
681 bool nsIDNService::isLabelSafe(const nsAString& label, const nsAString& tld) {
682 restrictionProfile profile{eASCIIOnlyProfile};
684 AutoReadLock lock(mLock);
686 if (!isOnlySafeChars(PromiseFlatString(label), mIDNBlocklist)) {
687 return false;
690 // We should never get here if the label is ASCII
691 NS_ASSERTION(!IsAscii(label), "ASCII label in IDN checking");
692 if (mRestrictionProfile == eASCIIOnlyProfile) {
693 return false;
695 profile = mRestrictionProfile;
698 nsAString::const_iterator current, end;
699 label.BeginReading(current);
700 label.EndReading(end);
702 Script lastScript = Script::INVALID;
703 uint32_t previousChar = 0;
704 uint32_t baseChar = 0; // last non-diacritic seen (base char for marks)
705 uint32_t savedNumberingSystem = 0;
706 // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
707 #if 0
708 HanVariantType savedHanVariant = HVT_NotHan;
709 #endif
711 ScriptCombo savedScript = ScriptCombo::UNSET;
713 while (current != end) {
714 uint32_t ch = *current++;
716 if (current != end && NS_IS_SURROGATE_PAIR(ch, *current)) {
717 ch = SURROGATE_TO_UCS4(ch, *current++);
720 IdentifierType idType = GetIdentifierType(ch);
721 if (idType == IDTYPE_RESTRICTED) {
722 return false;
724 MOZ_ASSERT(idType == IDTYPE_ALLOWED);
726 // Check for mixed script
727 Script script = UnicodeProperties::GetScriptCode(ch);
728 if (script != Script::COMMON && script != Script::INHERITED &&
729 script != lastScript) {
730 if (illegalScriptCombo(profile, script, savedScript)) {
731 return false;
735 // U+30FC should be preceded by a Hiragana/Katakana.
736 if (ch == 0x30fc && lastScript != Script::HIRAGANA &&
737 lastScript != Script::KATAKANA) {
738 return false;
741 Script nextScript = Script::INVALID;
742 if (current != end) {
743 nextScript = UnicodeProperties::GetScriptCode(*current);
746 if (ch == 0x30FB &&
747 (lastScript == Script::LATIN || nextScript == Script::LATIN)) {
748 return false;
751 if (ch == 0x307 &&
752 (previousChar == 'i' || previousChar == 'j' || previousChar == 'l')) {
753 return false;
756 // U+00B7 is only allowed on Catalan domains between two l's.
757 if (ch == 0xB7 && (!tld.EqualsLiteral("cat") || previousChar != 'l' ||
758 current == end || *current != 'l')) {
759 return false;
762 // Disallow Icelandic confusables for domains outside Icelandic and Faroese
763 // ccTLD (.is, .fo)
764 if ((ch == 0xFE || ch == 0xF0) && !tld.EqualsLiteral("is") &&
765 !tld.EqualsLiteral("fo")) {
766 return false;
769 // Block single/double-quote-like characters.
770 if (ch == 0x2BB || ch == 0x2BC) {
771 return false;
774 // Check for mixed numbering systems
775 auto genCat = GetGeneralCategory(ch);
776 if (genCat == HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) {
777 uint32_t zeroCharacter =
778 ch - mozilla::intl::UnicodeProperties::GetNumericValue(ch);
779 if (savedNumberingSystem == 0) {
780 // If we encounter a decimal number, save the zero character from that
781 // numbering system.
782 savedNumberingSystem = zeroCharacter;
783 } else if (zeroCharacter != savedNumberingSystem) {
784 return false;
788 if (genCat == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) {
789 // Check for consecutive non-spacing marks.
790 if (previousChar != 0 && previousChar == ch) {
791 return false;
793 // Check for marks whose expected script doesn't match the base script.
794 if (lastScript != Script::INVALID) {
795 UnicodeProperties::ScriptExtensionVector scripts;
796 auto extResult = UnicodeProperties::GetExtensions(ch, scripts);
797 MOZ_ASSERT(extResult.isOk());
798 if (extResult.isErr()) {
799 return false;
802 int nScripts = AssertedCast<int>(scripts.length());
804 // nScripts will always be >= 1, because even for undefined characters
805 // it will return Script::INVALID.
806 // If the mark just has script=COMMON or INHERITED, we can't check any
807 // more carefully, but if it has specific scriptExtension codes, then
808 // assume those are the only valid scripts to use it with.
809 if (nScripts > 1 || (Script(scripts[0]) != Script::COMMON &&
810 Script(scripts[0]) != Script::INHERITED)) {
811 while (--nScripts >= 0) {
812 if (Script(scripts[nScripts]) == lastScript) {
813 break;
816 if (nScripts == -1) {
817 return false;
821 // Check for diacritics on dotless-i, which would be indistinguishable
822 // from normal accented letter i.
823 if (baseChar == 0x0131 &&
824 ((ch >= 0x0300 && ch <= 0x0314) || ch == 0x031a)) {
825 return false;
827 } else {
828 baseChar = ch;
831 if (script != Script::COMMON && script != Script::INHERITED) {
832 lastScript = script;
835 // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
836 #if 0
838 // Check for both simplified-only and traditional-only Chinese characters
839 HanVariantType hanVariant = GetHanVariant(ch);
840 if (hanVariant == HVT_SimplifiedOnly || hanVariant == HVT_TraditionalOnly) {
841 if (savedHanVariant == HVT_NotHan) {
842 savedHanVariant = hanVariant;
843 } else if (hanVariant != savedHanVariant) {
844 return false;
847 #endif
849 previousChar = ch;
851 return true;
854 // Scripts that we care about in illegalScriptCombo
855 static inline ScriptCombo findScriptIndex(Script aScript) {
856 switch (aScript) {
857 case Script::BOPOMOFO:
858 return ScriptCombo::BOPO;
859 case Script::CYRILLIC:
860 return ScriptCombo::CYRL;
861 case Script::GREEK:
862 return ScriptCombo::GREK;
863 case Script::HANGUL:
864 return ScriptCombo::HANG;
865 case Script::HAN:
866 return ScriptCombo::HANI;
867 case Script::HIRAGANA:
868 return ScriptCombo::HIRA;
869 case Script::KATAKANA:
870 return ScriptCombo::KATA;
871 case Script::LATIN:
872 return ScriptCombo::LATN;
873 default:
874 return ScriptCombo::OTHR;
878 static const ScriptCombo scriptComboTable[13][9] = {
879 /* thisScript: BOPO CYRL GREK HANG HANI HIRA KATA LATN OTHR
880 * savedScript */
881 /* BOPO */ {BOPO, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL},
882 /* CYRL */ {FAIL, CYRL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL},
883 /* GREK */ {FAIL, FAIL, GREK, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL},
884 /* HANG */ {FAIL, FAIL, FAIL, HANG, KORE, FAIL, FAIL, KORE, FAIL},
885 /* HANI */ {CHNA, FAIL, FAIL, KORE, HANI, JPAN, JPAN, HNLT, FAIL},
886 /* HIRA */ {FAIL, FAIL, FAIL, FAIL, JPAN, HIRA, JPAN, JPAN, FAIL},
887 /* KATA */ {FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, KATA, JPAN, FAIL},
888 /* LATN */ {CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, LATN, OTHR},
889 /* OTHR */ {FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, OTHR, FAIL},
890 /* JPAN */ {FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, JPAN, JPAN, FAIL},
891 /* CHNA */ {CHNA, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL},
892 /* KORE */ {FAIL, FAIL, FAIL, KORE, KORE, FAIL, FAIL, KORE, FAIL},
893 /* HNLT */ {CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, HNLT, FAIL}};
895 bool nsIDNService::illegalScriptCombo(restrictionProfile profile, Script script,
896 ScriptCombo& savedScript) {
897 if (savedScript == ScriptCombo::UNSET) {
898 savedScript = findScriptIndex(script);
899 return false;
902 savedScript = scriptComboTable[savedScript][findScriptIndex(script)];
904 * Special case combinations that depend on which profile is in use
905 * In the Highly Restrictive profile Latin is not allowed with any
906 * other script
908 * In the Moderately Restrictive profile Latin mixed with any other
909 * single script is allowed.
911 return ((savedScript == OTHR && profile == eHighlyRestrictiveProfile) ||
912 savedScript == FAIL);