netwerk/dns/nsIDNService.cpp

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* This Source Code Form is subject to the terms of the Mozilla Public
   3  * License, v. 2.0. If a copy of the MPL was not distributed with this
   4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   5
   6 #include "MainThreadUtils.h"
   7 #include "mozilla/ClearOnShutdown.h"
   8 #include "mozilla/Preferences.h"
   9 #include "nsIDNService.h"
  10 #include "nsReadableUtils.h"
  11 #include "nsCRT.h"
  12 #include "nsServiceManagerUtils.h"
  13 #include "nsUnicharUtils.h"
  14 #include "nsUnicodeProperties.h"
  15 #include "harfbuzz/hb.h"
  16 #include "punycode.h"
  17 #include "mozilla/ArrayUtils.h"
  18 #include "mozilla/Casting.h"
  19 #include "mozilla/StaticPrefs_network.h"
  20 #include "mozilla/TextUtils.h"
  21 #include "mozilla/Utf8.h"
  22 #include "mozilla/intl/FormatBuffer.h"
  23 #include "mozilla/intl/UnicodeProperties.h"
  24 #include "mozilla/intl/UnicodeScriptCodes.h"
  25
  26 #include "ICUUtils.h"
  27
  28 using namespace mozilla;
  29 using namespace mozilla::intl;
  30 using namespace mozilla::unicode;
  31 using namespace mozilla::net;
  32 using mozilla::Preferences;
  33
  34 // Currently we use the non-transitional processing option -- see
  35 // http://unicode.org/reports/tr46/
  36 // To switch to transitional processing, change the value of this flag
  37 // and kTransitionalProcessing in netwerk/test/unit/test_idna2008.js to true
  38 // (revert bug 1218179).
  39 const intl::IDNA::ProcessingType kIDNA2008_DefaultProcessingType =
  40     intl::IDNA::ProcessingType::NonTransitional;
  41
  42 //-----------------------------------------------------------------------------
  43 // According to RFC 1034 - 3.1. Name space specifications and terminology
  44 // the maximum label size would be 63. However, this is enforced at the DNS
  45 // level and none of the other browsers seem to not enforce the VerifyDnsLength
  46 // check in https://unicode.org/reports/tr46/#ToASCII
  47 // Instead, we choose a rather arbitrary but larger size.
  48 static const uint32_t kMaxULabelSize = 256;
  49 // RFC 3490 - 5.   ACE prefix
  50 static const char kACEPrefix[] = "xn--";
  51
  52 //-----------------------------------------------------------------------------
  53
  54 #define NS_NET_PREF_EXTRAALLOWED "network.IDN.extra_allowed_chars"
  55 #define NS_NET_PREF_EXTRABLOCKED "network.IDN.extra_blocked_chars"
  56 #define NS_NET_PREF_IDNRESTRICTION "network.IDN.restriction_profile"
  57
  58 static inline bool isOnlySafeChars(const nsString& in,
  59                                    const nsTArray<BlocklistRange>& aBlocklist) {
  60   if (aBlocklist.IsEmpty()) {
  61     return true;
  62   }
  63   const char16_t* cur = in.BeginReading();
  64   const char16_t* end = in.EndReading();
  65
  66   for (; cur < end; ++cur) {
  67     if (CharInBlocklist(*cur, aBlocklist)) {
  68       return false;
  69     }
  70   }
  71   return true;
  72 }
  73
  74 //-----------------------------------------------------------------------------
  75 // nsIDNService
  76 //-----------------------------------------------------------------------------
  77
  78 /* Implementation file */
  79 NS_IMPL_ISUPPORTS(nsIDNService, nsIIDNService)
  80
  81 static const char* gCallbackPrefs[] = {
  82     NS_NET_PREF_EXTRAALLOWED,
  83     NS_NET_PREF_EXTRABLOCKED,
  84     NS_NET_PREF_IDNRESTRICTION,
  85     nullptr,
  86 };
  87
  88 nsresult nsIDNService::Init() {
  89   MOZ_ASSERT(NS_IsMainThread());
  90   // Take a strong reference for our listener with the preferences service,
  91   // which we will release on shutdown.
  92   // It's OK if we remove the observer a bit early, as it just means we won't
  93   // respond to `network.IDN.extra_{allowed,blocked}_chars` and
  94   // `network.IDN.restriction_profile` pref changes during shutdown.
  95   Preferences::RegisterPrefixCallbacks(PrefChanged, gCallbackPrefs, this);
  96   RunOnShutdown(
  97       [self = RefPtr{this}]() mutable {
  98         Preferences::UnregisterPrefixCallbacks(PrefChanged, gCallbackPrefs,
  99                                                self.get());
 100         self = nullptr;
 101       },
 102       ShutdownPhase::XPCOMWillShutdown);
 103   prefsChanged(nullptr);
 104
 105   return NS_OK;
 106 }
 107
 108 void nsIDNService::prefsChanged(const char* pref) {
 109   MOZ_ASSERT(NS_IsMainThread());
 110   AutoWriteLock lock(mLock);
 111
 112   if (!pref || nsLiteralCString(NS_NET_PREF_EXTRAALLOWED).Equals(pref) ||
 113       nsLiteralCString(NS_NET_PREF_EXTRABLOCKED).Equals(pref)) {
 114     InitializeBlocklist(mIDNBlocklist);
 115   }
 116   if (!pref || nsLiteralCString(NS_NET_PREF_IDNRESTRICTION).Equals(pref)) {
 117     nsAutoCString profile;
 118     if (NS_FAILED(
 119             Preferences::GetCString(NS_NET_PREF_IDNRESTRICTION, profile))) {
 120       profile.Truncate();
 121     }
 122     if (profile.EqualsLiteral("moderate")) {
 123       mRestrictionProfile = eModeratelyRestrictiveProfile;
 124     } else if (profile.EqualsLiteral("high")) {
 125       mRestrictionProfile = eHighlyRestrictiveProfile;
 126     } else {
 127       mRestrictionProfile = eASCIIOnlyProfile;
 128     }
 129   }
 130 }
 131
 132 nsIDNService::nsIDNService() {
 133   MOZ_ASSERT(NS_IsMainThread());
 134
 135   auto createResult =
 136       mozilla::intl::IDNA::TryCreate(kIDNA2008_DefaultProcessingType);
 137   MOZ_ASSERT(createResult.isOk());
 138   mIDNA = createResult.unwrap();
 139 }
 140
 141 nsIDNService::~nsIDNService() = default;
 142
 143 nsresult nsIDNService::IDNA2008ToUnicode(const nsACString& input,
 144                                          nsAString& output) {
 145   NS_ConvertUTF8toUTF16 inputStr(input);
 146
 147   Span<const char16_t> inputSpan{inputStr};
 148   intl::nsTStringToBufferAdapter buffer(output);
 149   auto result = mIDNA->LabelToUnicode(inputSpan, buffer);
 150
 151   nsresult rv = NS_OK;
 152   if (result.isErr()) {
 153     rv = ICUUtils::ICUErrorToNsResult(result.unwrapErr());
 154     if (rv == NS_ERROR_FAILURE) {
 155       rv = NS_ERROR_MALFORMED_URI;
 156     }
 157   }
 158   NS_ENSURE_SUCCESS(rv, rv);
 159
 160   intl::IDNA::Info info = result.unwrap();
 161   if (info.HasErrors()) {
 162     rv = NS_ERROR_MALFORMED_URI;
 163   }
 164
 165   return rv;
 166 }
 167
 168 nsresult nsIDNService::IDNA2008StringPrep(const nsAString& input,
 169                                           nsAString& output,
 170                                           stringPrepFlag flag) {
 171   Span<const char16_t> inputSpan{input};
 172   intl::nsTStringToBufferAdapter buffer(output);
 173   auto result = mIDNA->LabelToUnicode(inputSpan, buffer);
 174
 175   nsresult rv = NS_OK;
 176   if (result.isErr()) {
 177     rv = ICUUtils::ICUErrorToNsResult(result.unwrapErr());
 178     if (rv == NS_ERROR_FAILURE) {
 179       rv = NS_ERROR_MALFORMED_URI;
 180     }
 181   }
 182   NS_ENSURE_SUCCESS(rv, rv);
 183
 184   intl::IDNA::Info info = result.unwrap();
 185
 186   // Output the result of nameToUnicode even if there were errors.
 187   // But in the case of invalid punycode, the uidna_labelToUnicode result
 188   // appears to get an appended U+FFFD REPLACEMENT CHARACTER, which will
 189   // confuse our subsequent processing, so we drop that.
 190   // (https://bugzilla.mozilla.org/show_bug.cgi?id=1399540#c9)
 191   if ((info.HasInvalidPunycode() || info.HasInvalidAceLabel()) &&
 192       !output.IsEmpty() && output.Last() == 0xfffd) {
 193     output.Truncate(output.Length() - 1);
 194   }
 195
 196   if (flag == eStringPrepIgnoreErrors) {
 197     return NS_OK;
 198   }
 199
 200   if (flag == eStringPrepForDNS) {
 201     // We ignore errors if the result is empty, or if the errors were just
 202     // invalid hyphens (not punycode-decoding failure or invalid chars).
 203     if (!output.IsEmpty()) {
 204       if (info.HasErrorsIgnoringInvalidHyphen()) {
 205         output.Truncate();
 206         rv = NS_ERROR_MALFORMED_URI;
 207       }
 208     }
 209   } else {
 210     if (info.HasErrors()) {
 211       rv = NS_ERROR_MALFORMED_URI;
 212     }
 213   }
 214
 215   return rv;
 216 }
 217
 218 NS_IMETHODIMP nsIDNService::ConvertUTF8toACE(const nsACString& input,
 219                                              nsACString& ace) {
 220   return UTF8toACE(input, ace, eStringPrepForDNS);
 221 }
 222
 223 nsresult nsIDNService::UTF8toACE(const nsACString& input, nsACString& ace,
 224                                  stringPrepFlag flag) {
 225   nsresult rv;
 226   NS_ConvertUTF8toUTF16 ustr(input);
 227
 228   // map ideographic period to ASCII period etc.
 229   normalizeFullStops(ustr);
 230
 231   uint32_t len, offset;
 232   len = 0;
 233   offset = 0;
 234   nsAutoCString encodedBuf;
 235
 236   nsAString::const_iterator start, end;
 237   ustr.BeginReading(start);
 238   ustr.EndReading(end);
 239   ace.Truncate();
 240
 241   // encode nodes if non ASCII
 242   while (start != end) {
 243     len++;
 244     if (*start++ == (char16_t)'.') {
 245       rv = stringPrepAndACE(Substring(ustr, offset, len - 1), encodedBuf, flag);
 246       NS_ENSURE_SUCCESS(rv, rv);
 247
 248       ace.Append(encodedBuf);
 249       ace.Append('.');
 250       offset += len;
 251       len = 0;
 252     }
 253   }
 254
 255   // encode the last node if non ASCII
 256   if (len) {
 257     rv = stringPrepAndACE(Substring(ustr, offset, len), encodedBuf, flag);
 258     NS_ENSURE_SUCCESS(rv, rv);
 259
 260     ace.Append(encodedBuf);
 261   }
 262
 263   return NS_OK;
 264 }
 265
 266 NS_IMETHODIMP nsIDNService::ConvertACEtoUTF8(const nsACString& input,
 267                                              nsACString& _retval) {
 268   return ACEtoUTF8(input, _retval, eStringPrepForDNS);
 269 }
 270
 271 nsresult nsIDNService::ACEtoUTF8(const nsACString& input, nsACString& _retval,
 272                                  stringPrepFlag flag) {
 273   // RFC 3490 - 4.2 ToUnicode
 274   // ToUnicode never fails.  If any step fails, then the original input
 275   // sequence is returned immediately in that step.
 276   //
 277   // Note that this refers to the decoding of a single label.
 278   // ACEtoUTF8 may be called with a sequence of labels separated by dots;
 279   // this test applies individually to each label.
 280
 281   uint32_t len = 0, offset = 0;
 282   nsAutoCString decodedBuf;
 283
 284   nsACString::const_iterator start, end;
 285   input.BeginReading(start);
 286   input.EndReading(end);
 287   _retval.Truncate();
 288
 289   // loop and decode nodes
 290   while (start != end) {
 291     len++;
 292     if (*start++ == '.') {
 293       nsDependentCSubstring origLabel(input, offset, len - 1);
 294       if (NS_FAILED(decodeACE(origLabel, decodedBuf, flag))) {
 295         // If decoding failed, use the original input sequence
 296         // for this label.
 297         _retval.Append(origLabel);
 298       } else {
 299         _retval.Append(decodedBuf);
 300       }
 301
 302       _retval.Append('.');
 303       offset += len;
 304       len = 0;
 305     }
 306   }
 307   // decode the last node
 308   if (len) {
 309     nsDependentCSubstring origLabel(input, offset, len);
 310     if (NS_FAILED(decodeACE(origLabel, decodedBuf, flag))) {
 311       _retval.Append(origLabel);
 312     } else {
 313       _retval.Append(decodedBuf);
 314     }
 315   }
 316
 317   return NS_OK;
 318 }
 319
 320 NS_IMETHODIMP nsIDNService::IsACE(const nsACString& input, bool* _retval) {
 321   // look for the ACE prefix in the input string.  it may occur
 322   // at the beginning of any segment in the domain name.  for
 323   // example: "www.xn--ENCODED.com"
 324
 325   if (!IsAscii(input)) {
 326     *_retval = false;
 327     return NS_OK;
 328   }
 329
 330   auto stringContains = [](const nsACString& haystack,
 331                            const nsACString& needle) {
 332     return std::search(haystack.BeginReading(), haystack.EndReading(),
 333                        needle.BeginReading(), needle.EndReading(),
 334                        [](unsigned char ch1, unsigned char ch2) {
 335                          return tolower(ch1) == tolower(ch2);
 336                        }) != haystack.EndReading();
 337   };
 338
 339   *_retval =
 340       StringBeginsWith(input, "xn--"_ns, nsCaseInsensitiveCStringComparator) ||
 341       (!input.IsEmpty() && input[0] != '.' &&
 342        stringContains(input, ".xn--"_ns));
 343   return NS_OK;
 344 }
 345
 346 NS_IMETHODIMP nsIDNService::Normalize(const nsACString& input,
 347                                       nsACString& output) {
 348   // protect against bogus input
 349   NS_ENSURE_TRUE(IsUtf8(input), NS_ERROR_UNEXPECTED);
 350
 351   NS_ConvertUTF8toUTF16 inUTF16(input);
 352   normalizeFullStops(inUTF16);
 353
 354   // pass the domain name to stringprep label by label
 355   nsAutoString outUTF16, outLabel;
 356
 357   uint32_t len = 0, offset = 0;
 358   nsresult rv;
 359   nsAString::const_iterator start, end;
 360   inUTF16.BeginReading(start);
 361   inUTF16.EndReading(end);
 362
 363   while (start != end) {
 364     len++;
 365     if (*start++ == char16_t('.')) {
 366       rv = stringPrep(Substring(inUTF16, offset, len - 1), outLabel,
 367                       eStringPrepIgnoreErrors);
 368       NS_ENSURE_SUCCESS(rv, rv);
 369
 370       outUTF16.Append(outLabel);
 371       outUTF16.Append(char16_t('.'));
 372       offset += len;
 373       len = 0;
 374     }
 375   }
 376   if (len) {
 377     rv = stringPrep(Substring(inUTF16, offset, len), outLabel,
 378                     eStringPrepIgnoreErrors);
 379     NS_ENSURE_SUCCESS(rv, rv);
 380
 381     outUTF16.Append(outLabel);
 382   }
 383
 384   CopyUTF16toUTF8(outUTF16, output);
 385   return NS_OK;
 386 }
 387
 388 NS_IMETHODIMP nsIDNService::ConvertToDisplayIDN(const nsACString& input,
 389                                                 bool* _isASCII,
 390                                                 nsACString& _retval) {
 391   // If host is ACE, then convert to UTF-8 if the host is in the IDN whitelist.
 392   // Else, if host is already UTF-8, then make sure it is normalized per IDN.
 393
 394   nsresult rv = NS_OK;
 395
 396   // Even if the hostname is not ASCII, individual labels may still be ACE, so
 397   // test IsACE before testing IsASCII
 398   bool isACE;
 399   IsACE(input, &isACE);
 400
 401   if (IsAscii(input)) {
 402     // first, canonicalize the host to lowercase, for whitelist lookup
 403     _retval = input;
 404     ToLowerCase(_retval);
 405
 406     if (isACE && !StaticPrefs::network_IDN_show_punycode()) {
 407       // ACEtoUTF8() can't fail, but might return the original ACE string
 408       nsAutoCString temp(_retval);
 409       // Convert from ACE to UTF8 only those labels which are considered safe
 410       // for display
 411       ACEtoUTF8(temp, _retval, eStringPrepForUI);
 412       *_isASCII = IsAscii(_retval);
 413     } else {
 414       *_isASCII = true;
 415     }
 416   } else {
 417     // We have to normalize the hostname before testing against the domain
 418     // whitelist (see bug 315411), and to ensure the entire string gets
 419     // normalized.
 420     //
 421     // Normalization and the tests for safe display below, assume that the
 422     // input is Unicode, so first convert any ACE labels to UTF8
 423     if (isACE) {
 424       nsAutoCString temp;
 425       ACEtoUTF8(input, temp, eStringPrepIgnoreErrors);
 426       rv = Normalize(temp, _retval);
 427     } else {
 428       rv = Normalize(input, _retval);
 429     }
 430     if (NS_FAILED(rv)) {
 431       return rv;
 432     }
 433
 434     if (StaticPrefs::network_IDN_show_punycode() &&
 435         NS_SUCCEEDED(UTF8toACE(_retval, _retval, eStringPrepIgnoreErrors))) {
 436       *_isASCII = true;
 437       return NS_OK;
 438     }
 439
 440     // normalization could result in an ASCII-only hostname. alternatively, if
 441     // the host is converted to ACE by the normalizer, then the host may contain
 442     // unsafe characters, so leave it ACE encoded. see bug 283016, bug 301694,
 443     // and bug 309311.
 444     *_isASCII = IsAscii(_retval);
 445     if (!*_isASCII) {
 446       // UTF8toACE with eStringPrepForUI may return a domain name where
 447       // some labels are in UTF-8 and some are in ACE, depending on
 448       // whether they are considered safe for display
 449       rv = UTF8toACE(_retval, _retval, eStringPrepForUI);
 450       *_isASCII = IsAscii(_retval);
 451       return rv;
 452     }
 453   }
 454
 455   return NS_OK;
 456 }  // Will generate a mutex still-held warning
 457
 458 //-----------------------------------------------------------------------------
 459
 460 static nsresult utf16ToUcs4(const nsAString& in, uint32_t* out,
 461                             uint32_t outBufLen, uint32_t* outLen) {
 462   uint32_t i = 0;
 463   nsAString::const_iterator start, end;
 464   in.BeginReading(start);
 465   in.EndReading(end);
 466
 467   while (start != end) {
 468     char16_t curChar;
 469
 470     curChar = *start++;
 471
 472     if (start != end && NS_IS_SURROGATE_PAIR(curChar, *start)) {
 473       out[i] = SURROGATE_TO_UCS4(curChar, *start);
 474       ++start;
 475     } else {
 476       out[i] = curChar;
 477     }
 478
 479     i++;
 480     if (i >= outBufLen) {
 481       return NS_ERROR_MALFORMED_URI;
 482     }
 483   }
 484   out[i] = (uint32_t)'\0';
 485   *outLen = i;
 486   return NS_OK;
 487 }
 488
 489 static nsresult punycode(const nsAString& in, nsACString& out) {
 490   uint32_t ucs4Buf[kMaxULabelSize + 1];
 491   uint32_t ucs4Len = 0u;
 492   nsresult rv = utf16ToUcs4(in, ucs4Buf, kMaxULabelSize, &ucs4Len);
 493   NS_ENSURE_SUCCESS(rv, rv);
 494
 495   // need maximum 20 bits to encode 16 bit Unicode character
 496   // (include null terminator)
 497   const uint32_t kEncodedBufSize = kMaxULabelSize * 20 / 8 + 1 + 1;
 498   char encodedBuf[kEncodedBufSize];
 499   punycode_uint encodedLength = kEncodedBufSize;
 500
 501   enum punycode_status status =
 502       punycode_encode(ucs4Len, ucs4Buf, nullptr, &encodedLength, encodedBuf);
 503
 504   if (punycode_success != status || encodedLength >= kEncodedBufSize) {
 505     return NS_ERROR_MALFORMED_URI;
 506   }
 507
 508   encodedBuf[encodedLength] = '\0';
 509   out.Assign(nsDependentCString(kACEPrefix) + nsDependentCString(encodedBuf));
 510
 511   return rv;
 512 }
 513
 514 // RFC 3454
 515 //
 516 // 1) Map -- For each character in the input, check if it has a mapping
 517 // and, if so, replace it with its mapping. This is described in section 3.
 518 //
 519 // 2) Normalize -- Possibly normalize the result of step 1 using Unicode
 520 // normalization. This is described in section 4.
 521 //
 522 // 3) Prohibit -- Check for any characters that are not allowed in the
 523 // output. If any are found, return an error. This is described in section
 524 // 5.
 525 //
 526 // 4) Check bidi -- Possibly check for right-to-left characters, and if any
 527 // are found, make sure that the whole string satisfies the requirements
 528 // for bidirectional strings. If the string does not satisfy the requirements
 529 // for bidirectional strings, return an error. This is described in section 6.
 530 //
 531 // 5) Check unassigned code points -- If allowUnassigned is false, check for
 532 // any unassigned Unicode points and if any are found return an error.
 533 // This is described in section 7.
 534 //
 535 nsresult nsIDNService::stringPrep(const nsAString& in, nsAString& out,
 536                                   stringPrepFlag flag) {
 537   return IDNA2008StringPrep(in, out, flag);
 538 }
 539
 540 nsresult nsIDNService::stringPrepAndACE(const nsAString& in, nsACString& out,
 541                                         stringPrepFlag flag) {
 542   nsresult rv = NS_OK;
 543
 544   out.Truncate();
 545
 546   if (IsAscii(in)) {
 547     LossyCopyUTF16toASCII(in, out);
 548     // If label begins with xn-- we still want to check its validity
 549     if (!StringBeginsWith(in, u"xn--"_ns, nsCaseInsensitiveStringComparator)) {
 550       return NS_OK;
 551     }
 552   }
 553
 554   nsAutoString strPrep;
 555   rv = stringPrep(in, strPrep, flag);
 556   if (flag == eStringPrepForDNS) {
 557     NS_ENSURE_SUCCESS(rv, rv);
 558   }
 559
 560   if (IsAscii(strPrep)) {
 561     LossyCopyUTF16toASCII(strPrep, out);
 562     return NS_OK;
 563   }
 564
 565   if (flag == eStringPrepForUI && NS_SUCCEEDED(rv) && isLabelSafe(in)) {
 566     CopyUTF16toUTF8(strPrep, out);
 567     return NS_OK;
 568   }
 569
 570   return punycode(strPrep, out);
 571 }
 572
 573 // RFC 3490
 574 // 1) Whenever dots are used as label separators, the following characters
 575 //    MUST be recognized as dots: U+002E (full stop), U+3002 (ideographic full
 576 //    stop), U+FF0E (fullwidth full stop), U+FF61 (halfwidth ideographic full
 577 //    stop).
 578
 579 void nsIDNService::normalizeFullStops(nsAString& s) {
 580   nsAString::const_iterator start, end;
 581   s.BeginReading(start);
 582   s.EndReading(end);
 583   int32_t index = 0;
 584
 585   while (start != end) {
 586     switch (*start) {
 587       case 0x3002:
 588       case 0xFF0E:
 589       case 0xFF61:
 590         s.ReplaceLiteral(index, 1, u".");
 591         break;
 592       default:
 593         break;
 594     }
 595     start++;
 596     index++;
 597   }
 598 }
 599
 600 nsresult nsIDNService::decodeACE(const nsACString& in, nsACString& out,
 601                                  stringPrepFlag flag) {
 602   bool isAce;
 603   IsACE(in, &isAce);
 604   if (!isAce) {
 605     out.Assign(in);
 606     return NS_OK;
 607   }
 608
 609   nsAutoString utf16;
 610   nsresult result = IDNA2008ToUnicode(in, utf16);
 611   NS_ENSURE_SUCCESS(result, result);
 612
 613   if (flag != eStringPrepForUI || isLabelSafe(utf16)) {
 614     CopyUTF16toUTF8(utf16, out);
 615   } else {
 616     out.Assign(in);
 617     return NS_OK;
 618   }
 619
 620   // Validation: encode back to ACE and compare the strings
 621   nsAutoCString ace;
 622   nsresult rv = UTF8toACE(out, ace, flag);
 623   NS_ENSURE_SUCCESS(rv, rv);
 624
 625   if (flag == eStringPrepForDNS &&
 626       !ace.Equals(in, nsCaseInsensitiveCStringComparator)) {
 627     return NS_ERROR_MALFORMED_URI;
 628   }
 629
 630   return NS_OK;
 631 }
 632
 633 namespace mozilla::net {
 634
 635 enum ScriptCombo : int32_t {
 636   UNSET = -1,
 637   BOPO = 0,
 638   CYRL = 1,
 639   GREK = 2,
 640   HANG = 3,
 641   HANI = 4,
 642   HIRA = 5,
 643   KATA = 6,
 644   LATN = 7,
 645   OTHR = 8,
 646   JPAN = 9,   // Latin + Han + Hiragana + Katakana
 647   CHNA = 10,  // Latin + Han + Bopomofo
 648   KORE = 11,  // Latin + Han + Hangul
 649   HNLT = 12,  // Latin + Han (could be any of the above combinations)
 650   FAIL = 13,
 651 };
 652
 653 }  // namespace mozilla::net
 654
 655 bool nsIDNService::isLabelSafe(const nsAString& label) {
 656   AutoReadLock lock(mLock);
 657
 658   if (!isOnlySafeChars(PromiseFlatString(label), mIDNBlocklist)) {
 659     return false;
 660   }
 661
 662   // We should never get here if the label is ASCII
 663   NS_ASSERTION(!IsAscii(label), "ASCII label in IDN checking");
 664   if (mRestrictionProfile == eASCIIOnlyProfile) {
 665     return false;
 666   }
 667
 668   nsAString::const_iterator current, end;
 669   label.BeginReading(current);
 670   label.EndReading(end);
 671
 672   Script lastScript = Script::INVALID;
 673   uint32_t previousChar = 0;
 674   uint32_t baseChar = 0;  // last non-diacritic seen (base char for marks)
 675   uint32_t savedNumberingSystem = 0;
 676 // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
 677 #if 0
 678   HanVariantType savedHanVariant = HVT_NotHan;
 679 #endif
 680
 681   ScriptCombo savedScript = ScriptCombo::UNSET;
 682
 683   while (current != end) {
 684     uint32_t ch = *current++;
 685
 686     if (current != end && NS_IS_SURROGATE_PAIR(ch, *current)) {
 687       ch = SURROGATE_TO_UCS4(ch, *current++);
 688     }
 689
 690     IdentifierType idType = GetIdentifierType(ch);
 691     if (idType == IDTYPE_RESTRICTED) {
 692       return false;
 693     }
 694     MOZ_ASSERT(idType == IDTYPE_ALLOWED);
 695
 696     // Check for mixed script
 697     Script script = UnicodeProperties::GetScriptCode(ch);
 698     if (script != Script::COMMON && script != Script::INHERITED &&
 699         script != lastScript) {
 700       if (illegalScriptCombo(script, savedScript)) {
 701         return false;
 702       }
 703     }
 704
 705     // U+30FC should be preceded by a Hiragana/Katakana.
 706     if (ch == 0x30fc && lastScript != Script::HIRAGANA &&
 707         lastScript != Script::KATAKANA) {
 708       return false;
 709     }
 710
 711     if (ch == 0x307 &&
 712         (previousChar == 'i' || previousChar == 'j' || previousChar == 'l')) {
 713       return false;
 714     }
 715
 716     // Check for mixed numbering systems
 717     auto genCat = GetGeneralCategory(ch);
 718     if (genCat == HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) {
 719       uint32_t zeroCharacter =
 720           ch - mozilla::intl::UnicodeProperties::GetNumericValue(ch);
 721       if (savedNumberingSystem == 0) {
 722         // If we encounter a decimal number, save the zero character from that
 723         // numbering system.
 724         savedNumberingSystem = zeroCharacter;
 725       } else if (zeroCharacter != savedNumberingSystem) {
 726         return false;
 727       }
 728     }
 729
 730     if (genCat == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) {
 731       // Check for consecutive non-spacing marks.
 732       if (previousChar != 0 && previousChar == ch) {
 733         return false;
 734       }
 735       // Check for marks whose expected script doesn't match the base script.
 736       if (lastScript != Script::INVALID) {
 737         UnicodeProperties::ScriptExtensionVector scripts;
 738         auto extResult = UnicodeProperties::GetExtensions(ch, scripts);
 739         MOZ_ASSERT(extResult.isOk());
 740         if (extResult.isErr()) {
 741           return false;
 742         }
 743
 744         int nScripts = AssertedCast<int>(scripts.length());
 745
 746         // nScripts will always be >= 1, because even for undefined characters
 747         // it will return Script::INVALID.
 748         // If the mark just has script=COMMON or INHERITED, we can't check any
 749         // more carefully, but if it has specific scriptExtension codes, then
 750         // assume those are the only valid scripts to use it with.
 751         if (nScripts > 1 || (Script(scripts[0]) != Script::COMMON &&
 752                              Script(scripts[0]) != Script::INHERITED)) {
 753           while (--nScripts >= 0) {
 754             if (Script(scripts[nScripts]) == lastScript) {
 755               break;
 756             }
 757           }
 758           if (nScripts == -1) {
 759             return false;
 760           }
 761         }
 762       }
 763       // Check for diacritics on dotless-i, which would be indistinguishable
 764       // from normal accented letter i.
 765       if (baseChar == 0x0131 &&
 766           ((ch >= 0x0300 && ch <= 0x0314) || ch == 0x031a)) {
 767         return false;
 768       }
 769     } else {
 770       baseChar = ch;
 771     }
 772
 773     if (script != Script::COMMON && script != Script::INHERITED) {
 774       lastScript = script;
 775     }
 776
 777     // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
 778 #if 0
 779
 780     // Check for both simplified-only and traditional-only Chinese characters
 781     HanVariantType hanVariant = GetHanVariant(ch);
 782     if (hanVariant == HVT_SimplifiedOnly || hanVariant == HVT_TraditionalOnly) {
 783       if (savedHanVariant == HVT_NotHan) {
 784         savedHanVariant = hanVariant;
 785       } else if (hanVariant != savedHanVariant)  {
 786         return false;
 787       }
 788     }
 789 #endif
 790
 791     previousChar = ch;
 792   }
 793   return true;
 794 }
 795
 796 // Scripts that we care about in illegalScriptCombo
 797 static inline ScriptCombo findScriptIndex(Script aScript) {
 798   switch (aScript) {
 799     case Script::BOPOMOFO:
 800       return ScriptCombo::BOPO;
 801     case Script::CYRILLIC:
 802       return ScriptCombo::CYRL;
 803     case Script::GREEK:
 804       return ScriptCombo::GREK;
 805     case Script::HANGUL:
 806       return ScriptCombo::HANG;
 807     case Script::HAN:
 808       return ScriptCombo::HANI;
 809     case Script::HIRAGANA:
 810       return ScriptCombo::HIRA;
 811     case Script::KATAKANA:
 812       return ScriptCombo::KATA;
 813     case Script::LATIN:
 814       return ScriptCombo::LATN;
 815     default:
 816       return ScriptCombo::OTHR;
 817   }
 818 }
 819
 820 static const ScriptCombo scriptComboTable[13][9] = {
 821     /* thisScript: BOPO  CYRL  GREK  HANG  HANI  HIRA  KATA  LATN  OTHR
 822      * savedScript */
 823     /* BOPO */ {BOPO, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL},
 824     /* CYRL */ {FAIL, CYRL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL},
 825     /* GREK */ {FAIL, FAIL, GREK, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL},
 826     /* HANG */ {FAIL, FAIL, FAIL, HANG, KORE, FAIL, FAIL, KORE, FAIL},
 827     /* HANI */ {CHNA, FAIL, FAIL, KORE, HANI, JPAN, JPAN, HNLT, FAIL},
 828     /* HIRA */ {FAIL, FAIL, FAIL, FAIL, JPAN, HIRA, JPAN, JPAN, FAIL},
 829     /* KATA */ {FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, KATA, JPAN, FAIL},
 830     /* LATN */ {CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, LATN, OTHR},
 831     /* OTHR */ {FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, OTHR, FAIL},
 832     /* JPAN */ {FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, JPAN, JPAN, FAIL},
 833     /* CHNA */ {CHNA, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL},
 834     /* KORE */ {FAIL, FAIL, FAIL, KORE, KORE, FAIL, FAIL, KORE, FAIL},
 835     /* HNLT */ {CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, HNLT, FAIL}};
 836
 837 bool nsIDNService::illegalScriptCombo(Script script, ScriptCombo& savedScript) {
 838   if (savedScript == ScriptCombo::UNSET) {
 839     savedScript = findScriptIndex(script);
 840     return false;
 841   }
 842
 843   savedScript = scriptComboTable[savedScript][findScriptIndex(script)];
 844   /*
 845    * Special case combinations that depend on which profile is in use
 846    * In the Highly Restrictive profile Latin is not allowed with any
 847    *  other script
 848    *
 849    * In the Moderately Restrictive profile Latin mixed with any other
 850    *  single script is allowed.
 851    */
 852   return ((savedScript == OTHR &&
 853            mRestrictionProfile == eHighlyRestrictiveProfile) ||
 854           savedScript == FAIL);
 855 }