Bug 1810189 - Update MOTS for WebGPU: +jimb,+egubler,+nical,+teoxoy. DONTBUILD r...
[gecko.git] / netwerk / dns / nsIDNService.cpp
blob60bb6f201f3523b2cda8a97da822ebff29bc8300
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "MainThreadUtils.h"
7 #include "mozilla/ClearOnShutdown.h"
8 #include "mozilla/Preferences.h"
9 #include "nsIDNService.h"
10 #include "nsReadableUtils.h"
11 #include "nsCRT.h"
12 #include "nsServiceManagerUtils.h"
13 #include "nsUnicharUtils.h"
14 #include "nsUnicodeProperties.h"
15 #include "harfbuzz/hb.h"
16 #include "punycode.h"
17 #include "mozilla/ArrayUtils.h"
18 #include "mozilla/Casting.h"
19 #include "mozilla/StaticPrefs_network.h"
20 #include "mozilla/TextUtils.h"
21 #include "mozilla/Utf8.h"
22 #include "mozilla/intl/FormatBuffer.h"
23 #include "mozilla/intl/UnicodeProperties.h"
24 #include "mozilla/intl/UnicodeScriptCodes.h"
26 #include "ICUUtils.h"
28 using namespace mozilla;
29 using namespace mozilla::intl;
30 using namespace mozilla::unicode;
31 using namespace mozilla::net;
32 using mozilla::Preferences;
34 // Currently we use the non-transitional processing option -- see
35 // http://unicode.org/reports/tr46/
36 // To switch to transitional processing, change the value of this flag
37 // and kTransitionalProcessing in netwerk/test/unit/test_idna2008.js to true
38 // (revert bug 1218179).
39 const intl::IDNA::ProcessingType kIDNA2008_DefaultProcessingType =
40 intl::IDNA::ProcessingType::NonTransitional;
42 //-----------------------------------------------------------------------------
43 // According to RFC 1034 - 3.1. Name space specifications and terminology
44 // the maximum label size would be 63. However, this is enforced at the DNS
45 // level and none of the other browsers seem to not enforce the VerifyDnsLength
46 // check in https://unicode.org/reports/tr46/#ToASCII
47 // Instead, we choose a rather arbitrary but larger size.
48 static const uint32_t kMaxULabelSize = 256;
49 // RFC 3490 - 5. ACE prefix
50 static const char kACEPrefix[] = "xn--";
52 //-----------------------------------------------------------------------------
54 #define NS_NET_PREF_EXTRAALLOWED "network.IDN.extra_allowed_chars"
55 #define NS_NET_PREF_EXTRABLOCKED "network.IDN.extra_blocked_chars"
56 #define NS_NET_PREF_IDNRESTRICTION "network.IDN.restriction_profile"
58 static inline bool isOnlySafeChars(const nsString& in,
59 const nsTArray<BlocklistRange>& aBlocklist) {
60 if (aBlocklist.IsEmpty()) {
61 return true;
63 const char16_t* cur = in.BeginReading();
64 const char16_t* end = in.EndReading();
66 for (; cur < end; ++cur) {
67 if (CharInBlocklist(*cur, aBlocklist)) {
68 return false;
71 return true;
74 //-----------------------------------------------------------------------------
75 // nsIDNService
76 //-----------------------------------------------------------------------------
78 /* Implementation file */
79 NS_IMPL_ISUPPORTS(nsIDNService, nsIIDNService)
81 static const char* gCallbackPrefs[] = {
82 NS_NET_PREF_EXTRAALLOWED,
83 NS_NET_PREF_EXTRABLOCKED,
84 NS_NET_PREF_IDNRESTRICTION,
85 nullptr,
88 nsresult nsIDNService::Init() {
89 MOZ_ASSERT(NS_IsMainThread());
90 // Take a strong reference for our listener with the preferences service,
91 // which we will release on shutdown.
92 // It's OK if we remove the observer a bit early, as it just means we won't
93 // respond to `network.IDN.extra_{allowed,blocked}_chars` and
94 // `network.IDN.restriction_profile` pref changes during shutdown.
95 Preferences::RegisterPrefixCallbacks(PrefChanged, gCallbackPrefs, this);
96 RunOnShutdown(
97 [self = RefPtr{this}]() mutable {
98 Preferences::UnregisterPrefixCallbacks(PrefChanged, gCallbackPrefs,
99 self.get());
100 self = nullptr;
102 ShutdownPhase::XPCOMWillShutdown);
103 prefsChanged(nullptr);
105 return NS_OK;
108 void nsIDNService::prefsChanged(const char* pref) {
109 MOZ_ASSERT(NS_IsMainThread());
110 AutoWriteLock lock(mLock);
112 if (!pref || nsLiteralCString(NS_NET_PREF_EXTRAALLOWED).Equals(pref) ||
113 nsLiteralCString(NS_NET_PREF_EXTRABLOCKED).Equals(pref)) {
114 InitializeBlocklist(mIDNBlocklist);
116 if (!pref || nsLiteralCString(NS_NET_PREF_IDNRESTRICTION).Equals(pref)) {
117 nsAutoCString profile;
118 if (NS_FAILED(
119 Preferences::GetCString(NS_NET_PREF_IDNRESTRICTION, profile))) {
120 profile.Truncate();
122 if (profile.EqualsLiteral("moderate")) {
123 mRestrictionProfile = eModeratelyRestrictiveProfile;
124 } else if (profile.EqualsLiteral("high")) {
125 mRestrictionProfile = eHighlyRestrictiveProfile;
126 } else {
127 mRestrictionProfile = eASCIIOnlyProfile;
132 nsIDNService::nsIDNService() {
133 MOZ_ASSERT(NS_IsMainThread());
135 auto createResult =
136 mozilla::intl::IDNA::TryCreate(kIDNA2008_DefaultProcessingType);
137 MOZ_ASSERT(createResult.isOk());
138 mIDNA = createResult.unwrap();
141 nsIDNService::~nsIDNService() = default;
143 nsresult nsIDNService::IDNA2008ToUnicode(const nsACString& input,
144 nsAString& output) {
145 NS_ConvertUTF8toUTF16 inputStr(input);
147 Span<const char16_t> inputSpan{inputStr};
148 intl::nsTStringToBufferAdapter buffer(output);
149 auto result = mIDNA->LabelToUnicode(inputSpan, buffer);
151 nsresult rv = NS_OK;
152 if (result.isErr()) {
153 rv = ICUUtils::ICUErrorToNsResult(result.unwrapErr());
154 if (rv == NS_ERROR_FAILURE) {
155 rv = NS_ERROR_MALFORMED_URI;
158 NS_ENSURE_SUCCESS(rv, rv);
160 intl::IDNA::Info info = result.unwrap();
161 if (info.HasErrors()) {
162 rv = NS_ERROR_MALFORMED_URI;
165 return rv;
168 nsresult nsIDNService::IDNA2008StringPrep(const nsAString& input,
169 nsAString& output,
170 stringPrepFlag flag) {
171 Span<const char16_t> inputSpan{input};
172 intl::nsTStringToBufferAdapter buffer(output);
173 auto result = mIDNA->LabelToUnicode(inputSpan, buffer);
175 nsresult rv = NS_OK;
176 if (result.isErr()) {
177 rv = ICUUtils::ICUErrorToNsResult(result.unwrapErr());
178 if (rv == NS_ERROR_FAILURE) {
179 rv = NS_ERROR_MALFORMED_URI;
182 NS_ENSURE_SUCCESS(rv, rv);
184 intl::IDNA::Info info = result.unwrap();
186 // Output the result of nameToUnicode even if there were errors.
187 // But in the case of invalid punycode, the uidna_labelToUnicode result
188 // appears to get an appended U+FFFD REPLACEMENT CHARACTER, which will
189 // confuse our subsequent processing, so we drop that.
190 // (https://bugzilla.mozilla.org/show_bug.cgi?id=1399540#c9)
191 if ((info.HasInvalidPunycode() || info.HasInvalidAceLabel()) &&
192 !output.IsEmpty() && output.Last() == 0xfffd) {
193 output.Truncate(output.Length() - 1);
196 if (flag == eStringPrepIgnoreErrors) {
197 return NS_OK;
200 bool hasError = flag == eStringPrepForDNS
201 ? info.HasErrors() && !info.HasInvalidHyphen()
202 : info.HasErrors();
204 if (hasError) {
205 if (flag == eStringPrepForDNS) {
206 output.Truncate();
208 rv = NS_ERROR_MALFORMED_URI;
211 return rv;
214 NS_IMETHODIMP nsIDNService::ConvertUTF8toACE(const nsACString& input,
215 nsACString& ace) {
216 return UTF8toACE(input, ace, eStringPrepForDNS);
219 nsresult nsIDNService::UTF8toACE(const nsACString& input, nsACString& ace,
220 stringPrepFlag flag) {
221 nsresult rv;
222 NS_ConvertUTF8toUTF16 ustr(input);
224 // map ideographic period to ASCII period etc.
225 normalizeFullStops(ustr);
227 uint32_t len, offset;
228 len = 0;
229 offset = 0;
230 nsAutoCString encodedBuf;
232 nsAString::const_iterator start, end;
233 ustr.BeginReading(start);
234 ustr.EndReading(end);
235 ace.Truncate();
237 // encode nodes if non ASCII
238 while (start != end) {
239 len++;
240 if (*start++ == (char16_t)'.') {
241 rv = stringPrepAndACE(Substring(ustr, offset, len - 1), encodedBuf, flag);
242 NS_ENSURE_SUCCESS(rv, rv);
244 ace.Append(encodedBuf);
245 ace.Append('.');
246 offset += len;
247 len = 0;
251 // encode the last node if non ASCII
252 if (len) {
253 rv = stringPrepAndACE(Substring(ustr, offset, len), encodedBuf, flag);
254 NS_ENSURE_SUCCESS(rv, rv);
256 ace.Append(encodedBuf);
259 return NS_OK;
262 NS_IMETHODIMP nsIDNService::ConvertACEtoUTF8(const nsACString& input,
263 nsACString& _retval) {
264 return ACEtoUTF8(input, _retval, eStringPrepForDNS);
267 nsresult nsIDNService::ACEtoUTF8(const nsACString& input, nsACString& _retval,
268 stringPrepFlag flag) {
269 // RFC 3490 - 4.2 ToUnicode
270 // ToUnicode never fails. If any step fails, then the original input
271 // sequence is returned immediately in that step.
273 // Note that this refers to the decoding of a single label.
274 // ACEtoUTF8 may be called with a sequence of labels separated by dots;
275 // this test applies individually to each label.
277 uint32_t len = 0, offset = 0;
278 nsAutoCString decodedBuf;
280 nsACString::const_iterator start, end;
281 input.BeginReading(start);
282 input.EndReading(end);
283 _retval.Truncate();
285 // loop and decode nodes
286 while (start != end) {
287 len++;
288 if (*start++ == '.') {
289 nsDependentCSubstring origLabel(input, offset, len - 1);
290 if (NS_FAILED(decodeACE(origLabel, decodedBuf, flag))) {
291 // If decoding failed, use the original input sequence
292 // for this label.
293 _retval.Append(origLabel);
294 } else {
295 _retval.Append(decodedBuf);
298 _retval.Append('.');
299 offset += len;
300 len = 0;
303 // decode the last node
304 if (len) {
305 nsDependentCSubstring origLabel(input, offset, len);
306 if (NS_FAILED(decodeACE(origLabel, decodedBuf, flag))) {
307 _retval.Append(origLabel);
308 } else {
309 _retval.Append(decodedBuf);
313 return NS_OK;
316 NS_IMETHODIMP nsIDNService::IsACE(const nsACString& input, bool* _retval) {
317 // look for the ACE prefix in the input string. it may occur
318 // at the beginning of any segment in the domain name. for
319 // example: "www.xn--ENCODED.com"
321 if (!IsAscii(input)) {
322 *_retval = false;
323 return NS_OK;
326 auto stringContains = [](const nsACString& haystack,
327 const nsACString& needle) {
328 return std::search(haystack.BeginReading(), haystack.EndReading(),
329 needle.BeginReading(), needle.EndReading(),
330 [](unsigned char ch1, unsigned char ch2) {
331 return tolower(ch1) == tolower(ch2);
332 }) != haystack.EndReading();
335 *_retval =
336 StringBeginsWith(input, "xn--"_ns, nsCaseInsensitiveCStringComparator) ||
337 (!input.IsEmpty() && input[0] != '.' &&
338 stringContains(input, ".xn--"_ns));
339 return NS_OK;
342 NS_IMETHODIMP nsIDNService::Normalize(const nsACString& input,
343 nsACString& output) {
344 // protect against bogus input
345 NS_ENSURE_TRUE(IsUtf8(input), NS_ERROR_UNEXPECTED);
347 NS_ConvertUTF8toUTF16 inUTF16(input);
348 normalizeFullStops(inUTF16);
350 // pass the domain name to stringprep label by label
351 nsAutoString outUTF16, outLabel;
353 uint32_t len = 0, offset = 0;
354 nsresult rv;
355 nsAString::const_iterator start, end;
356 inUTF16.BeginReading(start);
357 inUTF16.EndReading(end);
359 while (start != end) {
360 len++;
361 if (*start++ == char16_t('.')) {
362 rv = stringPrep(Substring(inUTF16, offset, len - 1), outLabel,
363 eStringPrepIgnoreErrors);
364 NS_ENSURE_SUCCESS(rv, rv);
366 outUTF16.Append(outLabel);
367 outUTF16.Append(char16_t('.'));
368 offset += len;
369 len = 0;
372 if (len) {
373 rv = stringPrep(Substring(inUTF16, offset, len), outLabel,
374 eStringPrepIgnoreErrors);
375 NS_ENSURE_SUCCESS(rv, rv);
377 outUTF16.Append(outLabel);
380 CopyUTF16toUTF8(outUTF16, output);
381 return NS_OK;
384 NS_IMETHODIMP nsIDNService::ConvertToDisplayIDN(const nsACString& input,
385 bool* _isASCII,
386 nsACString& _retval) {
387 // If host is ACE, then convert to UTF-8 if the host is in the IDN whitelist.
388 // Else, if host is already UTF-8, then make sure it is normalized per IDN.
390 nsresult rv = NS_OK;
392 // Even if the hostname is not ASCII, individual labels may still be ACE, so
393 // test IsACE before testing IsASCII
394 bool isACE;
395 IsACE(input, &isACE);
397 if (IsAscii(input)) {
398 // first, canonicalize the host to lowercase, for whitelist lookup
399 _retval = input;
400 ToLowerCase(_retval);
402 if (isACE && !StaticPrefs::network_IDN_show_punycode()) {
403 // ACEtoUTF8() can't fail, but might return the original ACE string
404 nsAutoCString temp(_retval);
405 // Convert from ACE to UTF8 only those labels which are considered safe
406 // for display
407 ACEtoUTF8(temp, _retval, eStringPrepForUI);
408 *_isASCII = IsAscii(_retval);
409 } else {
410 *_isASCII = true;
412 } else {
413 // We have to normalize the hostname before testing against the domain
414 // whitelist (see bug 315411), and to ensure the entire string gets
415 // normalized.
417 // Normalization and the tests for safe display below, assume that the
418 // input is Unicode, so first convert any ACE labels to UTF8
419 if (isACE) {
420 nsAutoCString temp;
421 ACEtoUTF8(input, temp, eStringPrepIgnoreErrors);
422 rv = Normalize(temp, _retval);
423 } else {
424 rv = Normalize(input, _retval);
426 if (NS_FAILED(rv)) {
427 return rv;
430 if (StaticPrefs::network_IDN_show_punycode() &&
431 NS_SUCCEEDED(UTF8toACE(_retval, _retval, eStringPrepIgnoreErrors))) {
432 *_isASCII = true;
433 return NS_OK;
436 // normalization could result in an ASCII-only hostname. alternatively, if
437 // the host is converted to ACE by the normalizer, then the host may contain
438 // unsafe characters, so leave it ACE encoded. see bug 283016, bug 301694,
439 // and bug 309311.
440 *_isASCII = IsAscii(_retval);
441 if (!*_isASCII) {
442 // UTF8toACE with eStringPrepForUI may return a domain name where
443 // some labels are in UTF-8 and some are in ACE, depending on
444 // whether they are considered safe for display
445 rv = UTF8toACE(_retval, _retval, eStringPrepForUI);
446 *_isASCII = IsAscii(_retval);
447 return rv;
451 return NS_OK;
452 } // Will generate a mutex still-held warning
454 //-----------------------------------------------------------------------------
456 static nsresult utf16ToUcs4(const nsAString& in, uint32_t* out,
457 uint32_t outBufLen, uint32_t* outLen) {
458 uint32_t i = 0;
459 nsAString::const_iterator start, end;
460 in.BeginReading(start);
461 in.EndReading(end);
463 while (start != end) {
464 char16_t curChar;
466 curChar = *start++;
468 if (start != end && NS_IS_SURROGATE_PAIR(curChar, *start)) {
469 out[i] = SURROGATE_TO_UCS4(curChar, *start);
470 ++start;
471 } else {
472 out[i] = curChar;
475 i++;
476 if (i >= outBufLen) {
477 return NS_ERROR_MALFORMED_URI;
480 out[i] = (uint32_t)'\0';
481 *outLen = i;
482 return NS_OK;
485 static nsresult punycode(const nsAString& in, nsACString& out) {
486 uint32_t ucs4Buf[kMaxULabelSize + 1];
487 uint32_t ucs4Len = 0u;
488 nsresult rv = utf16ToUcs4(in, ucs4Buf, kMaxULabelSize, &ucs4Len);
489 NS_ENSURE_SUCCESS(rv, rv);
491 // need maximum 20 bits to encode 16 bit Unicode character
492 // (include null terminator)
493 const uint32_t kEncodedBufSize = kMaxULabelSize * 20 / 8 + 1 + 1;
494 char encodedBuf[kEncodedBufSize];
495 punycode_uint encodedLength = kEncodedBufSize;
497 enum punycode_status status =
498 punycode_encode(ucs4Len, ucs4Buf, nullptr, &encodedLength, encodedBuf);
500 if (punycode_success != status || encodedLength >= kEncodedBufSize) {
501 return NS_ERROR_MALFORMED_URI;
504 encodedBuf[encodedLength] = '\0';
505 out.Assign(nsDependentCString(kACEPrefix) + nsDependentCString(encodedBuf));
507 return rv;
510 // RFC 3454
512 // 1) Map -- For each character in the input, check if it has a mapping
513 // and, if so, replace it with its mapping. This is described in section 3.
515 // 2) Normalize -- Possibly normalize the result of step 1 using Unicode
516 // normalization. This is described in section 4.
518 // 3) Prohibit -- Check for any characters that are not allowed in the
519 // output. If any are found, return an error. This is described in section
520 // 5.
522 // 4) Check bidi -- Possibly check for right-to-left characters, and if any
523 // are found, make sure that the whole string satisfies the requirements
524 // for bidirectional strings. If the string does not satisfy the requirements
525 // for bidirectional strings, return an error. This is described in section 6.
527 // 5) Check unassigned code points -- If allowUnassigned is false, check for
528 // any unassigned Unicode points and if any are found return an error.
529 // This is described in section 7.
531 nsresult nsIDNService::stringPrep(const nsAString& in, nsAString& out,
532 stringPrepFlag flag) {
533 return IDNA2008StringPrep(in, out, flag);
536 nsresult nsIDNService::stringPrepAndACE(const nsAString& in, nsACString& out,
537 stringPrepFlag flag) {
538 nsresult rv = NS_OK;
540 out.Truncate();
542 if (IsAscii(in)) {
543 LossyCopyUTF16toASCII(in, out);
544 // If label begins with xn-- we still want to check its validity
545 if (!StringBeginsWith(in, u"xn--"_ns, nsCaseInsensitiveStringComparator)) {
546 return NS_OK;
550 nsAutoString strPrep;
551 rv = stringPrep(in, strPrep, flag);
552 if (flag == eStringPrepForDNS) {
553 NS_ENSURE_SUCCESS(rv, rv);
556 if (IsAscii(strPrep)) {
557 LossyCopyUTF16toASCII(strPrep, out);
558 return NS_OK;
561 if (flag == eStringPrepForUI && NS_SUCCEEDED(rv) && isLabelSafe(in)) {
562 CopyUTF16toUTF8(strPrep, out);
563 return NS_OK;
566 return punycode(strPrep, out);
569 // RFC 3490
570 // 1) Whenever dots are used as label separators, the following characters
571 // MUST be recognized as dots: U+002E (full stop), U+3002 (ideographic full
572 // stop), U+FF0E (fullwidth full stop), U+FF61 (halfwidth ideographic full
573 // stop).
575 void nsIDNService::normalizeFullStops(nsAString& s) {
576 nsAString::const_iterator start, end;
577 s.BeginReading(start);
578 s.EndReading(end);
579 int32_t index = 0;
581 while (start != end) {
582 switch (*start) {
583 case 0x3002:
584 case 0xFF0E:
585 case 0xFF61:
586 s.ReplaceLiteral(index, 1, u".");
587 break;
588 default:
589 break;
591 start++;
592 index++;
596 nsresult nsIDNService::decodeACE(const nsACString& in, nsACString& out,
597 stringPrepFlag flag) {
598 bool isAce;
599 IsACE(in, &isAce);
600 if (!isAce) {
601 out.Assign(in);
602 return NS_OK;
605 nsAutoString utf16;
606 nsresult result = IDNA2008ToUnicode(in, utf16);
607 NS_ENSURE_SUCCESS(result, result);
609 if (flag != eStringPrepForUI || isLabelSafe(utf16)) {
610 CopyUTF16toUTF8(utf16, out);
611 } else {
612 out.Assign(in);
613 return NS_OK;
616 // Validation: encode back to ACE and compare the strings
617 nsAutoCString ace;
618 nsresult rv = UTF8toACE(out, ace, flag);
619 NS_ENSURE_SUCCESS(rv, rv);
621 if (flag == eStringPrepForDNS &&
622 !ace.Equals(in, nsCaseInsensitiveCStringComparator)) {
623 return NS_ERROR_MALFORMED_URI;
626 return NS_OK;
629 namespace mozilla::net {
631 enum ScriptCombo : int32_t {
632 UNSET = -1,
633 BOPO = 0,
634 CYRL = 1,
635 GREK = 2,
636 HANG = 3,
637 HANI = 4,
638 HIRA = 5,
639 KATA = 6,
640 LATN = 7,
641 OTHR = 8,
642 JPAN = 9, // Latin + Han + Hiragana + Katakana
643 CHNA = 10, // Latin + Han + Bopomofo
644 KORE = 11, // Latin + Han + Hangul
645 HNLT = 12, // Latin + Han (could be any of the above combinations)
646 FAIL = 13,
649 } // namespace mozilla::net
651 bool nsIDNService::isLabelSafe(const nsAString& label) {
652 AutoReadLock lock(mLock);
654 if (!isOnlySafeChars(PromiseFlatString(label), mIDNBlocklist)) {
655 return false;
658 // We should never get here if the label is ASCII
659 NS_ASSERTION(!IsAscii(label), "ASCII label in IDN checking");
660 if (mRestrictionProfile == eASCIIOnlyProfile) {
661 return false;
664 nsAString::const_iterator current, end;
665 label.BeginReading(current);
666 label.EndReading(end);
668 Script lastScript = Script::INVALID;
669 uint32_t previousChar = 0;
670 uint32_t baseChar = 0; // last non-diacritic seen (base char for marks)
671 uint32_t savedNumberingSystem = 0;
672 // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
673 #if 0
674 HanVariantType savedHanVariant = HVT_NotHan;
675 #endif
677 ScriptCombo savedScript = ScriptCombo::UNSET;
679 while (current != end) {
680 uint32_t ch = *current++;
682 if (current != end && NS_IS_SURROGATE_PAIR(ch, *current)) {
683 ch = SURROGATE_TO_UCS4(ch, *current++);
686 IdentifierType idType = GetIdentifierType(ch);
687 if (idType == IDTYPE_RESTRICTED) {
688 return false;
690 MOZ_ASSERT(idType == IDTYPE_ALLOWED);
692 // Check for mixed script
693 Script script = UnicodeProperties::GetScriptCode(ch);
694 if (script != Script::COMMON && script != Script::INHERITED &&
695 script != lastScript) {
696 if (illegalScriptCombo(script, savedScript)) {
697 return false;
701 // U+30FC should be preceded by a Hiragana/Katakana.
702 if (ch == 0x30fc && lastScript != Script::HIRAGANA &&
703 lastScript != Script::KATAKANA) {
704 return false;
707 if (ch == 0x307 &&
708 (previousChar == 'i' || previousChar == 'j' || previousChar == 'l')) {
709 return false;
712 // Check for mixed numbering systems
713 auto genCat = GetGeneralCategory(ch);
714 if (genCat == HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) {
715 uint32_t zeroCharacter =
716 ch - mozilla::intl::UnicodeProperties::GetNumericValue(ch);
717 if (savedNumberingSystem == 0) {
718 // If we encounter a decimal number, save the zero character from that
719 // numbering system.
720 savedNumberingSystem = zeroCharacter;
721 } else if (zeroCharacter != savedNumberingSystem) {
722 return false;
726 if (genCat == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) {
727 // Check for consecutive non-spacing marks.
728 if (previousChar != 0 && previousChar == ch) {
729 return false;
731 // Check for marks whose expected script doesn't match the base script.
732 if (lastScript != Script::INVALID) {
733 UnicodeProperties::ScriptExtensionVector scripts;
734 auto extResult = UnicodeProperties::GetExtensions(ch, scripts);
735 MOZ_ASSERT(extResult.isOk());
736 if (extResult.isErr()) {
737 return false;
740 int nScripts = AssertedCast<int>(scripts.length());
742 // nScripts will always be >= 1, because even for undefined characters
743 // it will return Script::INVALID.
744 // If the mark just has script=COMMON or INHERITED, we can't check any
745 // more carefully, but if it has specific scriptExtension codes, then
746 // assume those are the only valid scripts to use it with.
747 if (nScripts > 1 || (Script(scripts[0]) != Script::COMMON &&
748 Script(scripts[0]) != Script::INHERITED)) {
749 while (--nScripts >= 0) {
750 if (Script(scripts[nScripts]) == lastScript) {
751 break;
754 if (nScripts == -1) {
755 return false;
759 // Check for diacritics on dotless-i, which would be indistinguishable
760 // from normal accented letter i.
761 if (baseChar == 0x0131 &&
762 ((ch >= 0x0300 && ch <= 0x0314) || ch == 0x031a)) {
763 return false;
765 } else {
766 baseChar = ch;
769 if (script != Script::COMMON && script != Script::INHERITED) {
770 lastScript = script;
773 // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
774 #if 0
776 // Check for both simplified-only and traditional-only Chinese characters
777 HanVariantType hanVariant = GetHanVariant(ch);
778 if (hanVariant == HVT_SimplifiedOnly || hanVariant == HVT_TraditionalOnly) {
779 if (savedHanVariant == HVT_NotHan) {
780 savedHanVariant = hanVariant;
781 } else if (hanVariant != savedHanVariant) {
782 return false;
785 #endif
787 previousChar = ch;
789 return true;
792 // Scripts that we care about in illegalScriptCombo
793 static inline ScriptCombo findScriptIndex(Script aScript) {
794 switch (aScript) {
795 case Script::BOPOMOFO:
796 return ScriptCombo::BOPO;
797 case Script::CYRILLIC:
798 return ScriptCombo::CYRL;
799 case Script::GREEK:
800 return ScriptCombo::GREK;
801 case Script::HANGUL:
802 return ScriptCombo::HANG;
803 case Script::HAN:
804 return ScriptCombo::HANI;
805 case Script::HIRAGANA:
806 return ScriptCombo::HIRA;
807 case Script::KATAKANA:
808 return ScriptCombo::KATA;
809 case Script::LATIN:
810 return ScriptCombo::LATN;
811 default:
812 return ScriptCombo::OTHR;
816 static const ScriptCombo scriptComboTable[13][9] = {
817 /* thisScript: BOPO CYRL GREK HANG HANI HIRA KATA LATN OTHR
818 * savedScript */
819 /* BOPO */ {BOPO, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL},
820 /* CYRL */ {FAIL, CYRL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL},
821 /* GREK */ {FAIL, FAIL, GREK, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL},
822 /* HANG */ {FAIL, FAIL, FAIL, HANG, KORE, FAIL, FAIL, KORE, FAIL},
823 /* HANI */ {CHNA, FAIL, FAIL, KORE, HANI, JPAN, JPAN, HNLT, FAIL},
824 /* HIRA */ {FAIL, FAIL, FAIL, FAIL, JPAN, HIRA, JPAN, JPAN, FAIL},
825 /* KATA */ {FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, KATA, JPAN, FAIL},
826 /* LATN */ {CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, LATN, OTHR},
827 /* OTHR */ {FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, OTHR, FAIL},
828 /* JPAN */ {FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, JPAN, JPAN, FAIL},
829 /* CHNA */ {CHNA, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL},
830 /* KORE */ {FAIL, FAIL, FAIL, KORE, KORE, FAIL, FAIL, KORE, FAIL},
831 /* HNLT */ {CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, HNLT, FAIL}};
833 bool nsIDNService::illegalScriptCombo(Script script, ScriptCombo& savedScript) {
834 if (savedScript == ScriptCombo::UNSET) {
835 savedScript = findScriptIndex(script);
836 return false;
839 savedScript = scriptComboTable[savedScript][findScriptIndex(script)];
841 * Special case combinations that depend on which profile is in use
842 * In the Highly Restrictive profile Latin is not allowed with any
843 * other script
845 * In the Moderately Restrictive profile Latin mixed with any other
846 * single script is allowed.
848 return ((savedScript == OTHR &&
849 mRestrictionProfile == eHighlyRestrictiveProfile) ||
850 savedScript == FAIL);