Backed out 10 changesets (bug 1803810) for xpcshell failures on test_import_global...
[gecko.git] / netwerk / dns / nsIDNService.cpp
blob3db169d3afb7cede75d056195068276e444fefe3
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "MainThreadUtils.h"
7 #include "mozilla/ClearOnShutdown.h"
8 #include "mozilla/Preferences.h"
9 #include "nsIDNService.h"
10 #include "nsReadableUtils.h"
11 #include "nsCRT.h"
12 #include "nsServiceManagerUtils.h"
13 #include "nsUnicharUtils.h"
14 #include "nsUnicodeProperties.h"
15 #include "harfbuzz/hb.h"
16 #include "punycode.h"
17 #include "mozilla/ArrayUtils.h"
18 #include "mozilla/Casting.h"
19 #include "mozilla/StaticPrefs_network.h"
20 #include "mozilla/TextUtils.h"
21 #include "mozilla/Utf8.h"
22 #include "mozilla/intl/FormatBuffer.h"
23 #include "mozilla/intl/UnicodeProperties.h"
24 #include "mozilla/intl/UnicodeScriptCodes.h"
26 #include "ICUUtils.h"
28 using namespace mozilla;
29 using namespace mozilla::intl;
30 using namespace mozilla::unicode;
31 using namespace mozilla::net;
32 using mozilla::Preferences;
34 // Currently we use the non-transitional processing option -- see
35 // http://unicode.org/reports/tr46/
36 // To switch to transitional processing, change the value of this flag
37 // and kTransitionalProcessing in netwerk/test/unit/test_idna2008.js to true
38 // (revert bug 1218179).
39 const intl::IDNA::ProcessingType kIDNA2008_DefaultProcessingType =
40 intl::IDNA::ProcessingType::NonTransitional;
42 //-----------------------------------------------------------------------------
43 // According to RFC 1034 - 3.1. Name space specifications and terminology
44 // the maximum label size would be 63. However, this is enforced at the DNS
45 // level and none of the other browsers seem to not enforce the VerifyDnsLength
46 // check in https://unicode.org/reports/tr46/#ToASCII
47 // Instead, we choose a rather arbitrary but larger size.
48 static const uint32_t kMaxULabelSize = 256;
49 // RFC 3490 - 5. ACE prefix
50 static const char kACEPrefix[] = "xn--";
52 //-----------------------------------------------------------------------------
54 #define NS_NET_PREF_EXTRAALLOWED "network.IDN.extra_allowed_chars"
55 #define NS_NET_PREF_EXTRABLOCKED "network.IDN.extra_blocked_chars"
56 #define NS_NET_PREF_IDNRESTRICTION "network.IDN.restriction_profile"
58 static inline bool isOnlySafeChars(const nsString& in,
59 const nsTArray<BlocklistRange>& aBlocklist) {
60 if (aBlocklist.IsEmpty()) {
61 return true;
63 const char16_t* cur = in.BeginReading();
64 const char16_t* end = in.EndReading();
66 for (; cur < end; ++cur) {
67 if (CharInBlocklist(*cur, aBlocklist)) {
68 return false;
71 return true;
74 //-----------------------------------------------------------------------------
75 // nsIDNService
76 //-----------------------------------------------------------------------------
78 /* Implementation file */
79 NS_IMPL_ISUPPORTS(nsIDNService, nsIIDNService)
81 static const char* gCallbackPrefs[] = {
82 NS_NET_PREF_EXTRAALLOWED,
83 NS_NET_PREF_EXTRABLOCKED,
84 NS_NET_PREF_IDNRESTRICTION,
85 nullptr,
88 nsresult nsIDNService::Init() {
89 MOZ_ASSERT(NS_IsMainThread());
90 // Take a strong reference for our listener with the preferences service,
91 // which we will release on shutdown.
92 // It's OK if we remove the observer a bit early, as it just means we won't
93 // respond to `network.IDN.extra_{allowed,blocked}_chars` and
94 // `network.IDN.restriction_profile` pref changes during shutdown.
95 Preferences::RegisterPrefixCallbacks(PrefChanged, gCallbackPrefs, this);
96 RunOnShutdown(
97 [self = RefPtr{this}]() mutable {
98 Preferences::UnregisterPrefixCallbacks(PrefChanged, gCallbackPrefs,
99 self.get());
100 self = nullptr;
102 ShutdownPhase::XPCOMWillShutdown);
103 prefsChanged(nullptr);
105 return NS_OK;
108 void nsIDNService::prefsChanged(const char* pref) {
109 MOZ_ASSERT(NS_IsMainThread());
110 AutoWriteLock lock(mLock);
112 if (!pref || nsLiteralCString(NS_NET_PREF_EXTRAALLOWED).Equals(pref) ||
113 nsLiteralCString(NS_NET_PREF_EXTRABLOCKED).Equals(pref)) {
114 InitializeBlocklist(mIDNBlocklist);
116 if (!pref || nsLiteralCString(NS_NET_PREF_IDNRESTRICTION).Equals(pref)) {
117 nsAutoCString profile;
118 if (NS_FAILED(
119 Preferences::GetCString(NS_NET_PREF_IDNRESTRICTION, profile))) {
120 profile.Truncate();
122 if (profile.EqualsLiteral("moderate")) {
123 mRestrictionProfile = eModeratelyRestrictiveProfile;
124 } else if (profile.EqualsLiteral("high")) {
125 mRestrictionProfile = eHighlyRestrictiveProfile;
126 } else {
127 mRestrictionProfile = eASCIIOnlyProfile;
132 nsIDNService::nsIDNService() {
133 MOZ_ASSERT(NS_IsMainThread());
135 auto createResult =
136 mozilla::intl::IDNA::TryCreate(kIDNA2008_DefaultProcessingType);
137 MOZ_ASSERT(createResult.isOk());
138 mIDNA = createResult.unwrap();
141 nsIDNService::~nsIDNService() = default;
143 nsresult nsIDNService::IDNA2008ToUnicode(const nsACString& input,
144 nsAString& output) {
145 NS_ConvertUTF8toUTF16 inputStr(input);
147 Span<const char16_t> inputSpan{inputStr};
148 intl::nsTStringToBufferAdapter buffer(output);
149 auto result = mIDNA->LabelToUnicode(inputSpan, buffer);
151 nsresult rv = NS_OK;
152 if (result.isErr()) {
153 rv = ICUUtils::ICUErrorToNsResult(result.unwrapErr());
154 if (rv == NS_ERROR_FAILURE) {
155 rv = NS_ERROR_MALFORMED_URI;
158 NS_ENSURE_SUCCESS(rv, rv);
160 intl::IDNA::Info info = result.unwrap();
161 if (info.HasErrors()) {
162 rv = NS_ERROR_MALFORMED_URI;
165 return rv;
168 nsresult nsIDNService::IDNA2008StringPrep(const nsAString& input,
169 nsAString& output,
170 stringPrepFlag flag) {
171 Span<const char16_t> inputSpan{input};
172 intl::nsTStringToBufferAdapter buffer(output);
173 auto result = mIDNA->LabelToUnicode(inputSpan, buffer);
175 nsresult rv = NS_OK;
176 if (result.isErr()) {
177 rv = ICUUtils::ICUErrorToNsResult(result.unwrapErr());
178 if (rv == NS_ERROR_FAILURE) {
179 rv = NS_ERROR_MALFORMED_URI;
182 NS_ENSURE_SUCCESS(rv, rv);
184 intl::IDNA::Info info = result.unwrap();
186 // Output the result of nameToUnicode even if there were errors.
187 // But in the case of invalid punycode, the uidna_labelToUnicode result
188 // appears to get an appended U+FFFD REPLACEMENT CHARACTER, which will
189 // confuse our subsequent processing, so we drop that.
190 // (https://bugzilla.mozilla.org/show_bug.cgi?id=1399540#c9)
191 if ((info.HasInvalidPunycode() || info.HasInvalidAceLabel()) &&
192 !output.IsEmpty() && output.Last() == 0xfffd) {
193 output.Truncate(output.Length() - 1);
196 if (flag == eStringPrepIgnoreErrors) {
197 return NS_OK;
200 if (flag == eStringPrepForDNS) {
201 // We ignore errors if the result is empty, or if the errors were just
202 // invalid hyphens (not punycode-decoding failure or invalid chars).
203 if (!output.IsEmpty()) {
204 if (info.HasErrorsIgnoringInvalidHyphen()) {
205 output.Truncate();
206 rv = NS_ERROR_MALFORMED_URI;
209 } else {
210 if (info.HasErrors()) {
211 rv = NS_ERROR_MALFORMED_URI;
215 return rv;
218 NS_IMETHODIMP nsIDNService::ConvertUTF8toACE(const nsACString& input,
219 nsACString& ace) {
220 return UTF8toACE(input, ace, eStringPrepForDNS);
223 nsresult nsIDNService::UTF8toACE(const nsACString& input, nsACString& ace,
224 stringPrepFlag flag) {
225 nsresult rv;
226 NS_ConvertUTF8toUTF16 ustr(input);
228 // map ideographic period to ASCII period etc.
229 normalizeFullStops(ustr);
231 uint32_t len, offset;
232 len = 0;
233 offset = 0;
234 nsAutoCString encodedBuf;
236 nsAString::const_iterator start, end;
237 ustr.BeginReading(start);
238 ustr.EndReading(end);
239 ace.Truncate();
241 // encode nodes if non ASCII
242 while (start != end) {
243 len++;
244 if (*start++ == (char16_t)'.') {
245 rv = stringPrepAndACE(Substring(ustr, offset, len - 1), encodedBuf, flag);
246 NS_ENSURE_SUCCESS(rv, rv);
248 ace.Append(encodedBuf);
249 ace.Append('.');
250 offset += len;
251 len = 0;
255 // encode the last node if non ASCII
256 if (len) {
257 rv = stringPrepAndACE(Substring(ustr, offset, len), encodedBuf, flag);
258 NS_ENSURE_SUCCESS(rv, rv);
260 ace.Append(encodedBuf);
263 return NS_OK;
266 NS_IMETHODIMP nsIDNService::ConvertACEtoUTF8(const nsACString& input,
267 nsACString& _retval) {
268 return ACEtoUTF8(input, _retval, eStringPrepForDNS);
271 nsresult nsIDNService::ACEtoUTF8(const nsACString& input, nsACString& _retval,
272 stringPrepFlag flag) {
273 // RFC 3490 - 4.2 ToUnicode
274 // ToUnicode never fails. If any step fails, then the original input
275 // sequence is returned immediately in that step.
277 // Note that this refers to the decoding of a single label.
278 // ACEtoUTF8 may be called with a sequence of labels separated by dots;
279 // this test applies individually to each label.
281 uint32_t len = 0, offset = 0;
282 nsAutoCString decodedBuf;
284 nsACString::const_iterator start, end;
285 input.BeginReading(start);
286 input.EndReading(end);
287 _retval.Truncate();
289 // loop and decode nodes
290 while (start != end) {
291 len++;
292 if (*start++ == '.') {
293 nsDependentCSubstring origLabel(input, offset, len - 1);
294 if (NS_FAILED(decodeACE(origLabel, decodedBuf, flag))) {
295 // If decoding failed, use the original input sequence
296 // for this label.
297 _retval.Append(origLabel);
298 } else {
299 _retval.Append(decodedBuf);
302 _retval.Append('.');
303 offset += len;
304 len = 0;
307 // decode the last node
308 if (len) {
309 nsDependentCSubstring origLabel(input, offset, len);
310 if (NS_FAILED(decodeACE(origLabel, decodedBuf, flag))) {
311 _retval.Append(origLabel);
312 } else {
313 _retval.Append(decodedBuf);
317 return NS_OK;
320 NS_IMETHODIMP nsIDNService::IsACE(const nsACString& input, bool* _retval) {
321 // look for the ACE prefix in the input string. it may occur
322 // at the beginning of any segment in the domain name. for
323 // example: "www.xn--ENCODED.com"
325 if (!IsAscii(input)) {
326 *_retval = false;
327 return NS_OK;
330 auto stringContains = [](const nsACString& haystack,
331 const nsACString& needle) {
332 return std::search(haystack.BeginReading(), haystack.EndReading(),
333 needle.BeginReading(), needle.EndReading(),
334 [](unsigned char ch1, unsigned char ch2) {
335 return tolower(ch1) == tolower(ch2);
336 }) != haystack.EndReading();
339 *_retval =
340 StringBeginsWith(input, "xn--"_ns, nsCaseInsensitiveCStringComparator) ||
341 (!input.IsEmpty() && input[0] != '.' &&
342 stringContains(input, ".xn--"_ns));
343 return NS_OK;
346 NS_IMETHODIMP nsIDNService::Normalize(const nsACString& input,
347 nsACString& output) {
348 // protect against bogus input
349 NS_ENSURE_TRUE(IsUtf8(input), NS_ERROR_UNEXPECTED);
351 NS_ConvertUTF8toUTF16 inUTF16(input);
352 normalizeFullStops(inUTF16);
354 // pass the domain name to stringprep label by label
355 nsAutoString outUTF16, outLabel;
357 uint32_t len = 0, offset = 0;
358 nsresult rv;
359 nsAString::const_iterator start, end;
360 inUTF16.BeginReading(start);
361 inUTF16.EndReading(end);
363 while (start != end) {
364 len++;
365 if (*start++ == char16_t('.')) {
366 rv = stringPrep(Substring(inUTF16, offset, len - 1), outLabel,
367 eStringPrepIgnoreErrors);
368 NS_ENSURE_SUCCESS(rv, rv);
370 outUTF16.Append(outLabel);
371 outUTF16.Append(char16_t('.'));
372 offset += len;
373 len = 0;
376 if (len) {
377 rv = stringPrep(Substring(inUTF16, offset, len), outLabel,
378 eStringPrepIgnoreErrors);
379 NS_ENSURE_SUCCESS(rv, rv);
381 outUTF16.Append(outLabel);
384 CopyUTF16toUTF8(outUTF16, output);
385 return NS_OK;
388 NS_IMETHODIMP nsIDNService::ConvertToDisplayIDN(const nsACString& input,
389 bool* _isASCII,
390 nsACString& _retval) {
391 // If host is ACE, then convert to UTF-8 if the host is in the IDN whitelist.
392 // Else, if host is already UTF-8, then make sure it is normalized per IDN.
394 nsresult rv = NS_OK;
396 // Even if the hostname is not ASCII, individual labels may still be ACE, so
397 // test IsACE before testing IsASCII
398 bool isACE;
399 IsACE(input, &isACE);
401 if (IsAscii(input)) {
402 // first, canonicalize the host to lowercase, for whitelist lookup
403 _retval = input;
404 ToLowerCase(_retval);
406 if (isACE && !StaticPrefs::network_IDN_show_punycode()) {
407 // ACEtoUTF8() can't fail, but might return the original ACE string
408 nsAutoCString temp(_retval);
409 // Convert from ACE to UTF8 only those labels which are considered safe
410 // for display
411 ACEtoUTF8(temp, _retval, eStringPrepForUI);
412 *_isASCII = IsAscii(_retval);
413 } else {
414 *_isASCII = true;
416 } else {
417 // We have to normalize the hostname before testing against the domain
418 // whitelist (see bug 315411), and to ensure the entire string gets
419 // normalized.
421 // Normalization and the tests for safe display below, assume that the
422 // input is Unicode, so first convert any ACE labels to UTF8
423 if (isACE) {
424 nsAutoCString temp;
425 ACEtoUTF8(input, temp, eStringPrepIgnoreErrors);
426 rv = Normalize(temp, _retval);
427 } else {
428 rv = Normalize(input, _retval);
430 if (NS_FAILED(rv)) {
431 return rv;
434 if (StaticPrefs::network_IDN_show_punycode() &&
435 NS_SUCCEEDED(UTF8toACE(_retval, _retval, eStringPrepIgnoreErrors))) {
436 *_isASCII = true;
437 return NS_OK;
440 // normalization could result in an ASCII-only hostname. alternatively, if
441 // the host is converted to ACE by the normalizer, then the host may contain
442 // unsafe characters, so leave it ACE encoded. see bug 283016, bug 301694,
443 // and bug 309311.
444 *_isASCII = IsAscii(_retval);
445 if (!*_isASCII) {
446 // UTF8toACE with eStringPrepForUI may return a domain name where
447 // some labels are in UTF-8 and some are in ACE, depending on
448 // whether they are considered safe for display
449 rv = UTF8toACE(_retval, _retval, eStringPrepForUI);
450 *_isASCII = IsAscii(_retval);
451 return rv;
455 return NS_OK;
456 } // Will generate a mutex still-held warning
458 //-----------------------------------------------------------------------------
460 static nsresult utf16ToUcs4(const nsAString& in, uint32_t* out,
461 uint32_t outBufLen, uint32_t* outLen) {
462 uint32_t i = 0;
463 nsAString::const_iterator start, end;
464 in.BeginReading(start);
465 in.EndReading(end);
467 while (start != end) {
468 char16_t curChar;
470 curChar = *start++;
472 if (start != end && NS_IS_SURROGATE_PAIR(curChar, *start)) {
473 out[i] = SURROGATE_TO_UCS4(curChar, *start);
474 ++start;
475 } else {
476 out[i] = curChar;
479 i++;
480 if (i >= outBufLen) {
481 return NS_ERROR_MALFORMED_URI;
484 out[i] = (uint32_t)'\0';
485 *outLen = i;
486 return NS_OK;
489 static nsresult punycode(const nsAString& in, nsACString& out) {
490 uint32_t ucs4Buf[kMaxULabelSize + 1];
491 uint32_t ucs4Len = 0u;
492 nsresult rv = utf16ToUcs4(in, ucs4Buf, kMaxULabelSize, &ucs4Len);
493 NS_ENSURE_SUCCESS(rv, rv);
495 // need maximum 20 bits to encode 16 bit Unicode character
496 // (include null terminator)
497 const uint32_t kEncodedBufSize = kMaxULabelSize * 20 / 8 + 1 + 1;
498 char encodedBuf[kEncodedBufSize];
499 punycode_uint encodedLength = kEncodedBufSize;
501 enum punycode_status status =
502 punycode_encode(ucs4Len, ucs4Buf, nullptr, &encodedLength, encodedBuf);
504 if (punycode_success != status || encodedLength >= kEncodedBufSize) {
505 return NS_ERROR_MALFORMED_URI;
508 encodedBuf[encodedLength] = '\0';
509 out.Assign(nsDependentCString(kACEPrefix) + nsDependentCString(encodedBuf));
511 return rv;
514 // RFC 3454
516 // 1) Map -- For each character in the input, check if it has a mapping
517 // and, if so, replace it with its mapping. This is described in section 3.
519 // 2) Normalize -- Possibly normalize the result of step 1 using Unicode
520 // normalization. This is described in section 4.
522 // 3) Prohibit -- Check for any characters that are not allowed in the
523 // output. If any are found, return an error. This is described in section
524 // 5.
526 // 4) Check bidi -- Possibly check for right-to-left characters, and if any
527 // are found, make sure that the whole string satisfies the requirements
528 // for bidirectional strings. If the string does not satisfy the requirements
529 // for bidirectional strings, return an error. This is described in section 6.
531 // 5) Check unassigned code points -- If allowUnassigned is false, check for
532 // any unassigned Unicode points and if any are found return an error.
533 // This is described in section 7.
535 nsresult nsIDNService::stringPrep(const nsAString& in, nsAString& out,
536 stringPrepFlag flag) {
537 return IDNA2008StringPrep(in, out, flag);
540 nsresult nsIDNService::stringPrepAndACE(const nsAString& in, nsACString& out,
541 stringPrepFlag flag) {
542 nsresult rv = NS_OK;
544 out.Truncate();
546 if (IsAscii(in)) {
547 LossyCopyUTF16toASCII(in, out);
548 // If label begins with xn-- we still want to check its validity
549 if (!StringBeginsWith(in, u"xn--"_ns, nsCaseInsensitiveStringComparator)) {
550 return NS_OK;
554 nsAutoString strPrep;
555 rv = stringPrep(in, strPrep, flag);
556 if (flag == eStringPrepForDNS) {
557 NS_ENSURE_SUCCESS(rv, rv);
560 if (IsAscii(strPrep)) {
561 LossyCopyUTF16toASCII(strPrep, out);
562 return NS_OK;
565 if (flag == eStringPrepForUI && NS_SUCCEEDED(rv) && isLabelSafe(in)) {
566 CopyUTF16toUTF8(strPrep, out);
567 return NS_OK;
570 return punycode(strPrep, out);
573 // RFC 3490
574 // 1) Whenever dots are used as label separators, the following characters
575 // MUST be recognized as dots: U+002E (full stop), U+3002 (ideographic full
576 // stop), U+FF0E (fullwidth full stop), U+FF61 (halfwidth ideographic full
577 // stop).
579 void nsIDNService::normalizeFullStops(nsAString& s) {
580 nsAString::const_iterator start, end;
581 s.BeginReading(start);
582 s.EndReading(end);
583 int32_t index = 0;
585 while (start != end) {
586 switch (*start) {
587 case 0x3002:
588 case 0xFF0E:
589 case 0xFF61:
590 s.ReplaceLiteral(index, 1, u".");
591 break;
592 default:
593 break;
595 start++;
596 index++;
600 nsresult nsIDNService::decodeACE(const nsACString& in, nsACString& out,
601 stringPrepFlag flag) {
602 bool isAce;
603 IsACE(in, &isAce);
604 if (!isAce) {
605 out.Assign(in);
606 return NS_OK;
609 nsAutoString utf16;
610 nsresult result = IDNA2008ToUnicode(in, utf16);
611 NS_ENSURE_SUCCESS(result, result);
613 if (flag != eStringPrepForUI || isLabelSafe(utf16)) {
614 CopyUTF16toUTF8(utf16, out);
615 } else {
616 out.Assign(in);
617 return NS_OK;
620 // Validation: encode back to ACE and compare the strings
621 nsAutoCString ace;
622 nsresult rv = UTF8toACE(out, ace, flag);
623 NS_ENSURE_SUCCESS(rv, rv);
625 if (flag == eStringPrepForDNS &&
626 !ace.Equals(in, nsCaseInsensitiveCStringComparator)) {
627 return NS_ERROR_MALFORMED_URI;
630 return NS_OK;
633 namespace mozilla::net {
635 enum ScriptCombo : int32_t {
636 UNSET = -1,
637 BOPO = 0,
638 CYRL = 1,
639 GREK = 2,
640 HANG = 3,
641 HANI = 4,
642 HIRA = 5,
643 KATA = 6,
644 LATN = 7,
645 OTHR = 8,
646 JPAN = 9, // Latin + Han + Hiragana + Katakana
647 CHNA = 10, // Latin + Han + Bopomofo
648 KORE = 11, // Latin + Han + Hangul
649 HNLT = 12, // Latin + Han (could be any of the above combinations)
650 FAIL = 13,
653 } // namespace mozilla::net
655 bool nsIDNService::isLabelSafe(const nsAString& label) {
656 AutoReadLock lock(mLock);
658 if (!isOnlySafeChars(PromiseFlatString(label), mIDNBlocklist)) {
659 return false;
662 // We should never get here if the label is ASCII
663 NS_ASSERTION(!IsAscii(label), "ASCII label in IDN checking");
664 if (mRestrictionProfile == eASCIIOnlyProfile) {
665 return false;
668 nsAString::const_iterator current, end;
669 label.BeginReading(current);
670 label.EndReading(end);
672 Script lastScript = Script::INVALID;
673 uint32_t previousChar = 0;
674 uint32_t baseChar = 0; // last non-diacritic seen (base char for marks)
675 uint32_t savedNumberingSystem = 0;
676 // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
677 #if 0
678 HanVariantType savedHanVariant = HVT_NotHan;
679 #endif
681 ScriptCombo savedScript = ScriptCombo::UNSET;
683 while (current != end) {
684 uint32_t ch = *current++;
686 if (current != end && NS_IS_SURROGATE_PAIR(ch, *current)) {
687 ch = SURROGATE_TO_UCS4(ch, *current++);
690 IdentifierType idType = GetIdentifierType(ch);
691 if (idType == IDTYPE_RESTRICTED) {
692 return false;
694 MOZ_ASSERT(idType == IDTYPE_ALLOWED);
696 // Check for mixed script
697 Script script = UnicodeProperties::GetScriptCode(ch);
698 if (script != Script::COMMON && script != Script::INHERITED &&
699 script != lastScript) {
700 if (illegalScriptCombo(script, savedScript)) {
701 return false;
705 // U+30FC should be preceded by a Hiragana/Katakana.
706 if (ch == 0x30fc && lastScript != Script::HIRAGANA &&
707 lastScript != Script::KATAKANA) {
708 return false;
711 if (ch == 0x307 &&
712 (previousChar == 'i' || previousChar == 'j' || previousChar == 'l')) {
713 return false;
716 // Check for mixed numbering systems
717 auto genCat = GetGeneralCategory(ch);
718 if (genCat == HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) {
719 uint32_t zeroCharacter =
720 ch - mozilla::intl::UnicodeProperties::GetNumericValue(ch);
721 if (savedNumberingSystem == 0) {
722 // If we encounter a decimal number, save the zero character from that
723 // numbering system.
724 savedNumberingSystem = zeroCharacter;
725 } else if (zeroCharacter != savedNumberingSystem) {
726 return false;
730 if (genCat == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) {
731 // Check for consecutive non-spacing marks.
732 if (previousChar != 0 && previousChar == ch) {
733 return false;
735 // Check for marks whose expected script doesn't match the base script.
736 if (lastScript != Script::INVALID) {
737 UnicodeProperties::ScriptExtensionVector scripts;
738 auto extResult = UnicodeProperties::GetExtensions(ch, scripts);
739 MOZ_ASSERT(extResult.isOk());
740 if (extResult.isErr()) {
741 return false;
744 int nScripts = AssertedCast<int>(scripts.length());
746 // nScripts will always be >= 1, because even for undefined characters
747 // it will return Script::INVALID.
748 // If the mark just has script=COMMON or INHERITED, we can't check any
749 // more carefully, but if it has specific scriptExtension codes, then
750 // assume those are the only valid scripts to use it with.
751 if (nScripts > 1 || (Script(scripts[0]) != Script::COMMON &&
752 Script(scripts[0]) != Script::INHERITED)) {
753 while (--nScripts >= 0) {
754 if (Script(scripts[nScripts]) == lastScript) {
755 break;
758 if (nScripts == -1) {
759 return false;
763 // Check for diacritics on dotless-i, which would be indistinguishable
764 // from normal accented letter i.
765 if (baseChar == 0x0131 &&
766 ((ch >= 0x0300 && ch <= 0x0314) || ch == 0x031a)) {
767 return false;
769 } else {
770 baseChar = ch;
773 if (script != Script::COMMON && script != Script::INHERITED) {
774 lastScript = script;
777 // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
778 #if 0
780 // Check for both simplified-only and traditional-only Chinese characters
781 HanVariantType hanVariant = GetHanVariant(ch);
782 if (hanVariant == HVT_SimplifiedOnly || hanVariant == HVT_TraditionalOnly) {
783 if (savedHanVariant == HVT_NotHan) {
784 savedHanVariant = hanVariant;
785 } else if (hanVariant != savedHanVariant) {
786 return false;
789 #endif
791 previousChar = ch;
793 return true;
796 // Scripts that we care about in illegalScriptCombo
797 static inline ScriptCombo findScriptIndex(Script aScript) {
798 switch (aScript) {
799 case Script::BOPOMOFO:
800 return ScriptCombo::BOPO;
801 case Script::CYRILLIC:
802 return ScriptCombo::CYRL;
803 case Script::GREEK:
804 return ScriptCombo::GREK;
805 case Script::HANGUL:
806 return ScriptCombo::HANG;
807 case Script::HAN:
808 return ScriptCombo::HANI;
809 case Script::HIRAGANA:
810 return ScriptCombo::HIRA;
811 case Script::KATAKANA:
812 return ScriptCombo::KATA;
813 case Script::LATIN:
814 return ScriptCombo::LATN;
815 default:
816 return ScriptCombo::OTHR;
820 static const ScriptCombo scriptComboTable[13][9] = {
821 /* thisScript: BOPO CYRL GREK HANG HANI HIRA KATA LATN OTHR
822 * savedScript */
823 /* BOPO */ {BOPO, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL},
824 /* CYRL */ {FAIL, CYRL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL},
825 /* GREK */ {FAIL, FAIL, GREK, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL},
826 /* HANG */ {FAIL, FAIL, FAIL, HANG, KORE, FAIL, FAIL, KORE, FAIL},
827 /* HANI */ {CHNA, FAIL, FAIL, KORE, HANI, JPAN, JPAN, HNLT, FAIL},
828 /* HIRA */ {FAIL, FAIL, FAIL, FAIL, JPAN, HIRA, JPAN, JPAN, FAIL},
829 /* KATA */ {FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, KATA, JPAN, FAIL},
830 /* LATN */ {CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, LATN, OTHR},
831 /* OTHR */ {FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, OTHR, FAIL},
832 /* JPAN */ {FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, JPAN, JPAN, FAIL},
833 /* CHNA */ {CHNA, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL},
834 /* KORE */ {FAIL, FAIL, FAIL, KORE, KORE, FAIL, FAIL, KORE, FAIL},
835 /* HNLT */ {CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, HNLT, FAIL}};
837 bool nsIDNService::illegalScriptCombo(Script script, ScriptCombo& savedScript) {
838 if (savedScript == ScriptCombo::UNSET) {
839 savedScript = findScriptIndex(script);
840 return false;
843 savedScript = scriptComboTable[savedScript][findScriptIndex(script)];
845 * Special case combinations that depend on which profile is in use
846 * In the Highly Restrictive profile Latin is not allowed with any
847 * other script
849 * In the Moderately Restrictive profile Latin mixed with any other
850 * single script is allowed.
852 return ((savedScript == OTHR &&
853 mRestrictionProfile == eHighlyRestrictiveProfile) ||
854 savedScript == FAIL);