netwerk/dns/nsIDNService.cpp

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* This Source Code Form is subject to the terms of the Mozilla Public
   3  * License, v. 2.0. If a copy of the MPL was not distributed with this
   4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   5
   6 #include "MainThreadUtils.h"
   7 #include "mozilla/ClearOnShutdown.h"
   8 #include "mozilla/Preferences.h"
   9 #include "nsIDNService.h"
  10 #include "nsReadableUtils.h"
  11 #include "nsCRT.h"
  12 #include "nsServiceManagerUtils.h"
  13 #include "nsUnicharUtils.h"
  14 #include "nsUnicodeProperties.h"
  15 #include "harfbuzz/hb.h"
  16 #include "punycode.h"
  17 #include "mozilla/ArrayUtils.h"
  18 #include "mozilla/Casting.h"
  19 #include "mozilla/StaticPrefs_network.h"
  20 #include "mozilla/TextUtils.h"
  21 #include "mozilla/Utf8.h"
  22 #include "mozilla/intl/FormatBuffer.h"
  23 #include "mozilla/intl/UnicodeProperties.h"
  24 #include "mozilla/intl/UnicodeScriptCodes.h"
  25
  26 #include "ICUUtils.h"
  27
  28 using namespace mozilla;
  29 using namespace mozilla::intl;
  30 using namespace mozilla::unicode;
  31 using namespace mozilla::net;
  32 using mozilla::Preferences;
  33
  34 // Currently we use the non-transitional processing option -- see
  35 // http://unicode.org/reports/tr46/
  36 // To switch to transitional processing, change the value of this flag
  37 // and kTransitionalProcessing in netwerk/test/unit/test_idna2008.js to true
  38 // (revert bug 1218179).
  39 const intl::IDNA::ProcessingType kIDNA2008_DefaultProcessingType =
  40     intl::IDNA::ProcessingType::NonTransitional;
  41
  42 //-----------------------------------------------------------------------------
  43 // According to RFC 1034 - 3.1. Name space specifications and terminology
  44 // the maximum label size would be 63. However, this is enforced at the DNS
  45 // level and none of the other browsers seem to not enforce the VerifyDnsLength
  46 // check in https://unicode.org/reports/tr46/#ToASCII
  47 // Instead, we choose a rather arbitrary but larger size.
  48 static const uint32_t kMaxULabelSize = 256;
  49 // RFC 3490 - 5.   ACE prefix
  50 static const char kACEPrefix[] = "xn--";
  51
  52 //-----------------------------------------------------------------------------
  53
  54 #define NS_NET_PREF_EXTRAALLOWED "network.IDN.extra_allowed_chars"
  55 #define NS_NET_PREF_EXTRABLOCKED "network.IDN.extra_blocked_chars"
  56 #define NS_NET_PREF_IDNRESTRICTION "network.IDN.restriction_profile"
  57
  58 static inline bool isOnlySafeChars(const nsString& in,
  59                                    const nsTArray<BlocklistRange>& aBlocklist) {
  60   if (aBlocklist.IsEmpty()) {
  61     return true;
  62   }
  63   const char16_t* cur = in.BeginReading();
  64   const char16_t* end = in.EndReading();
  65
  66   for (; cur < end; ++cur) {
  67     if (CharInBlocklist(*cur, aBlocklist)) {
  68       return false;
  69     }
  70   }
  71   return true;
  72 }
  73
  74 //-----------------------------------------------------------------------------
  75 // nsIDNService
  76 //-----------------------------------------------------------------------------
  77
  78 /* Implementation file */
  79 NS_IMPL_ISUPPORTS(nsIDNService, nsIIDNService)
  80
  81 static const char* gCallbackPrefs[] = {
  82     NS_NET_PREF_EXTRAALLOWED,
  83     NS_NET_PREF_EXTRABLOCKED,
  84     NS_NET_PREF_IDNRESTRICTION,
  85     nullptr,
  86 };
  87
  88 nsresult nsIDNService::Init() {
  89   MOZ_ASSERT(NS_IsMainThread());
  90   // Take a strong reference for our listener with the preferences service,
  91   // which we will release on shutdown.
  92   // It's OK if we remove the observer a bit early, as it just means we won't
  93   // respond to `network.IDN.extra_{allowed,blocked}_chars` and
  94   // `network.IDN.restriction_profile` pref changes during shutdown.
  95   Preferences::RegisterPrefixCallbacks(PrefChanged, gCallbackPrefs, this);
  96   RunOnShutdown(
  97       [self = RefPtr{this}]() mutable {
  98         Preferences::UnregisterPrefixCallbacks(PrefChanged, gCallbackPrefs,
  99                                                self.get());
 100         self = nullptr;
 101       },
 102       ShutdownPhase::XPCOMWillShutdown);
 103   prefsChanged(nullptr);
 104
 105   return NS_OK;
 106 }
 107
 108 void nsIDNService::prefsChanged(const char* pref) {
 109   MOZ_ASSERT(NS_IsMainThread());
 110   AutoWriteLock lock(mLock);
 111
 112   if (!pref || nsLiteralCString(NS_NET_PREF_EXTRAALLOWED).Equals(pref) ||
 113       nsLiteralCString(NS_NET_PREF_EXTRABLOCKED).Equals(pref)) {
 114     InitializeBlocklist(mIDNBlocklist);
 115   }
 116   if (!pref || nsLiteralCString(NS_NET_PREF_IDNRESTRICTION).Equals(pref)) {
 117     nsAutoCString profile;
 118     if (NS_FAILED(
 119             Preferences::GetCString(NS_NET_PREF_IDNRESTRICTION, profile))) {
 120       profile.Truncate();
 121     }
 122     if (profile.EqualsLiteral("moderate")) {
 123       mRestrictionProfile = eModeratelyRestrictiveProfile;
 124     } else if (profile.EqualsLiteral("high")) {
 125       mRestrictionProfile = eHighlyRestrictiveProfile;
 126     } else {
 127       mRestrictionProfile = eASCIIOnlyProfile;
 128     }
 129   }
 130 }
 131
 132 nsIDNService::nsIDNService() {
 133   MOZ_ASSERT(NS_IsMainThread());
 134
 135   auto createResult =
 136       mozilla::intl::IDNA::TryCreate(kIDNA2008_DefaultProcessingType);
 137   MOZ_ASSERT(createResult.isOk());
 138   mIDNA = createResult.unwrap();
 139 }
 140
 141 nsIDNService::~nsIDNService() = default;
 142
 143 nsresult nsIDNService::IDNA2008ToUnicode(const nsACString& input,
 144                                          nsAString& output) {
 145   NS_ConvertUTF8toUTF16 inputStr(input);
 146
 147   Span<const char16_t> inputSpan{inputStr};
 148   intl::nsTStringToBufferAdapter buffer(output);
 149   auto result = mIDNA->LabelToUnicode(inputSpan, buffer);
 150
 151   nsresult rv = NS_OK;
 152   if (result.isErr()) {
 153     rv = ICUUtils::ICUErrorToNsResult(result.unwrapErr());
 154     if (rv == NS_ERROR_FAILURE) {
 155       rv = NS_ERROR_MALFORMED_URI;
 156     }
 157   }
 158   NS_ENSURE_SUCCESS(rv, rv);
 159
 160   intl::IDNA::Info info = result.unwrap();
 161   if (info.HasErrors()) {
 162     rv = NS_ERROR_MALFORMED_URI;
 163   }
 164
 165   return rv;
 166 }
 167
 168 nsresult nsIDNService::IDNA2008StringPrep(const nsAString& input,
 169                                           nsAString& output,
 170                                           stringPrepFlag flag) {
 171   Span<const char16_t> inputSpan{input};
 172   intl::nsTStringToBufferAdapter buffer(output);
 173   auto result = mIDNA->LabelToUnicode(inputSpan, buffer);
 174
 175   nsresult rv = NS_OK;
 176   if (result.isErr()) {
 177     rv = ICUUtils::ICUErrorToNsResult(result.unwrapErr());
 178     if (rv == NS_ERROR_FAILURE) {
 179       rv = NS_ERROR_MALFORMED_URI;
 180     }
 181   }
 182   NS_ENSURE_SUCCESS(rv, rv);
 183
 184   intl::IDNA::Info info = result.unwrap();
 185
 186   // Output the result of nameToUnicode even if there were errors.
 187   // But in the case of invalid punycode, the uidna_labelToUnicode result
 188   // appears to get an appended U+FFFD REPLACEMENT CHARACTER, which will
 189   // confuse our subsequent processing, so we drop that.
 190   // (https://bugzilla.mozilla.org/show_bug.cgi?id=1399540#c9)
 191   if ((info.HasInvalidPunycode() || info.HasInvalidAceLabel()) &&
 192       !output.IsEmpty() && output.Last() == 0xfffd) {
 193     output.Truncate(output.Length() - 1);
 194   }
 195
 196   if (flag == eStringPrepIgnoreErrors) {
 197     return NS_OK;
 198   }
 199
 200   bool hasError = flag == eStringPrepForDNS
 201                       ? info.HasErrors() && !info.HasInvalidHyphen()
 202                       : info.HasErrors();
 203
 204   if (hasError) {
 205     if (flag == eStringPrepForDNS) {
 206       output.Truncate();
 207     }
 208     rv = NS_ERROR_MALFORMED_URI;
 209   }
 210
 211   return rv;
 212 }
 213
 214 NS_IMETHODIMP nsIDNService::ConvertUTF8toACE(const nsACString& input,
 215                                              nsACString& ace) {
 216   return UTF8toACE(input, ace, eStringPrepForDNS);
 217 }
 218
 219 nsresult nsIDNService::UTF8toACE(const nsACString& input, nsACString& ace,
 220                                  stringPrepFlag flag) {
 221   nsresult rv;
 222   NS_ConvertUTF8toUTF16 ustr(input);
 223
 224   // map ideographic period to ASCII period etc.
 225   normalizeFullStops(ustr);
 226
 227   uint32_t len, offset;
 228   len = 0;
 229   offset = 0;
 230   nsAutoCString encodedBuf;
 231
 232   nsAString::const_iterator start, end;
 233   ustr.BeginReading(start);
 234   ustr.EndReading(end);
 235   ace.Truncate();
 236
 237   // encode nodes if non ASCII
 238   while (start != end) {
 239     len++;
 240     if (*start++ == (char16_t)'.') {
 241       rv = stringPrepAndACE(Substring(ustr, offset, len - 1), encodedBuf, flag);
 242       NS_ENSURE_SUCCESS(rv, rv);
 243
 244       ace.Append(encodedBuf);
 245       ace.Append('.');
 246       offset += len;
 247       len = 0;
 248     }
 249   }
 250
 251   // encode the last node if non ASCII
 252   if (len) {
 253     rv = stringPrepAndACE(Substring(ustr, offset, len), encodedBuf, flag);
 254     NS_ENSURE_SUCCESS(rv, rv);
 255
 256     ace.Append(encodedBuf);
 257   }
 258
 259   return NS_OK;
 260 }
 261
 262 NS_IMETHODIMP nsIDNService::ConvertACEtoUTF8(const nsACString& input,
 263                                              nsACString& _retval) {
 264   return ACEtoUTF8(input, _retval, eStringPrepForDNS);
 265 }
 266
 267 nsresult nsIDNService::ACEtoUTF8(const nsACString& input, nsACString& _retval,
 268                                  stringPrepFlag flag) {
 269   // RFC 3490 - 4.2 ToUnicode
 270   // ToUnicode never fails.  If any step fails, then the original input
 271   // sequence is returned immediately in that step.
 272   //
 273   // Note that this refers to the decoding of a single label.
 274   // ACEtoUTF8 may be called with a sequence of labels separated by dots;
 275   // this test applies individually to each label.
 276
 277   uint32_t len = 0, offset = 0;
 278   nsAutoCString decodedBuf;
 279
 280   nsACString::const_iterator start, end;
 281   input.BeginReading(start);
 282   input.EndReading(end);
 283   _retval.Truncate();
 284
 285   // loop and decode nodes
 286   while (start != end) {
 287     len++;
 288     if (*start++ == '.') {
 289       nsDependentCSubstring origLabel(input, offset, len - 1);
 290       if (NS_FAILED(decodeACE(origLabel, decodedBuf, flag))) {
 291         // If decoding failed, use the original input sequence
 292         // for this label.
 293         _retval.Append(origLabel);
 294       } else {
 295         _retval.Append(decodedBuf);
 296       }
 297
 298       _retval.Append('.');
 299       offset += len;
 300       len = 0;
 301     }
 302   }
 303   // decode the last node
 304   if (len) {
 305     nsDependentCSubstring origLabel(input, offset, len);
 306     if (NS_FAILED(decodeACE(origLabel, decodedBuf, flag))) {
 307       _retval.Append(origLabel);
 308     } else {
 309       _retval.Append(decodedBuf);
 310     }
 311   }
 312
 313   return NS_OK;
 314 }
 315
 316 NS_IMETHODIMP nsIDNService::IsACE(const nsACString& input, bool* _retval) {
 317   // look for the ACE prefix in the input string.  it may occur
 318   // at the beginning of any segment in the domain name.  for
 319   // example: "www.xn--ENCODED.com"
 320
 321   if (!IsAscii(input)) {
 322     *_retval = false;
 323     return NS_OK;
 324   }
 325
 326   auto stringContains = [](const nsACString& haystack,
 327                            const nsACString& needle) {
 328     return std::search(haystack.BeginReading(), haystack.EndReading(),
 329                        needle.BeginReading(), needle.EndReading(),
 330                        [](unsigned char ch1, unsigned char ch2) {
 331                          return tolower(ch1) == tolower(ch2);
 332                        }) != haystack.EndReading();
 333   };
 334
 335   *_retval =
 336       StringBeginsWith(input, "xn--"_ns, nsCaseInsensitiveCStringComparator) ||
 337       (!input.IsEmpty() && input[0] != '.' &&
 338        stringContains(input, ".xn--"_ns));
 339   return NS_OK;
 340 }
 341
 342 NS_IMETHODIMP nsIDNService::Normalize(const nsACString& input,
 343                                       nsACString& output) {
 344   // protect against bogus input
 345   NS_ENSURE_TRUE(IsUtf8(input), NS_ERROR_UNEXPECTED);
 346
 347   NS_ConvertUTF8toUTF16 inUTF16(input);
 348   normalizeFullStops(inUTF16);
 349
 350   // pass the domain name to stringprep label by label
 351   nsAutoString outUTF16, outLabel;
 352
 353   uint32_t len = 0, offset = 0;
 354   nsresult rv;
 355   nsAString::const_iterator start, end;
 356   inUTF16.BeginReading(start);
 357   inUTF16.EndReading(end);
 358
 359   while (start != end) {
 360     len++;
 361     if (*start++ == char16_t('.')) {
 362       rv = stringPrep(Substring(inUTF16, offset, len - 1), outLabel,
 363                       eStringPrepIgnoreErrors);
 364       NS_ENSURE_SUCCESS(rv, rv);
 365
 366       outUTF16.Append(outLabel);
 367       outUTF16.Append(char16_t('.'));
 368       offset += len;
 369       len = 0;
 370     }
 371   }
 372   if (len) {
 373     rv = stringPrep(Substring(inUTF16, offset, len), outLabel,
 374                     eStringPrepIgnoreErrors);
 375     NS_ENSURE_SUCCESS(rv, rv);
 376
 377     outUTF16.Append(outLabel);
 378   }
 379
 380   CopyUTF16toUTF8(outUTF16, output);
 381   return NS_OK;
 382 }
 383
 384 NS_IMETHODIMP nsIDNService::ConvertToDisplayIDN(const nsACString& input,
 385                                                 bool* _isASCII,
 386                                                 nsACString& _retval) {
 387   // If host is ACE, then convert to UTF-8 if the host is in the IDN whitelist.
 388   // Else, if host is already UTF-8, then make sure it is normalized per IDN.
 389
 390   nsresult rv = NS_OK;
 391
 392   // Even if the hostname is not ASCII, individual labels may still be ACE, so
 393   // test IsACE before testing IsASCII
 394   bool isACE;
 395   IsACE(input, &isACE);
 396
 397   if (IsAscii(input)) {
 398     // first, canonicalize the host to lowercase, for whitelist lookup
 399     _retval = input;
 400     ToLowerCase(_retval);
 401
 402     if (isACE && !StaticPrefs::network_IDN_show_punycode()) {
 403       // ACEtoUTF8() can't fail, but might return the original ACE string
 404       nsAutoCString temp(_retval);
 405       // Convert from ACE to UTF8 only those labels which are considered safe
 406       // for display
 407       ACEtoUTF8(temp, _retval, eStringPrepForUI);
 408       *_isASCII = IsAscii(_retval);
 409     } else {
 410       *_isASCII = true;
 411     }
 412   } else {
 413     // We have to normalize the hostname before testing against the domain
 414     // whitelist (see bug 315411), and to ensure the entire string gets
 415     // normalized.
 416     //
 417     // Normalization and the tests for safe display below, assume that the
 418     // input is Unicode, so first convert any ACE labels to UTF8
 419     if (isACE) {
 420       nsAutoCString temp;
 421       ACEtoUTF8(input, temp, eStringPrepIgnoreErrors);
 422       rv = Normalize(temp, _retval);
 423     } else {
 424       rv = Normalize(input, _retval);
 425     }
 426     if (NS_FAILED(rv)) {
 427       return rv;
 428     }
 429
 430     if (StaticPrefs::network_IDN_show_punycode() &&
 431         NS_SUCCEEDED(UTF8toACE(_retval, _retval, eStringPrepIgnoreErrors))) {
 432       *_isASCII = true;
 433       return NS_OK;
 434     }
 435
 436     // normalization could result in an ASCII-only hostname. alternatively, if
 437     // the host is converted to ACE by the normalizer, then the host may contain
 438     // unsafe characters, so leave it ACE encoded. see bug 283016, bug 301694,
 439     // and bug 309311.
 440     *_isASCII = IsAscii(_retval);
 441     if (!*_isASCII) {
 442       // UTF8toACE with eStringPrepForUI may return a domain name where
 443       // some labels are in UTF-8 and some are in ACE, depending on
 444       // whether they are considered safe for display
 445       rv = UTF8toACE(_retval, _retval, eStringPrepForUI);
 446       *_isASCII = IsAscii(_retval);
 447       return rv;
 448     }
 449   }
 450
 451   return NS_OK;
 452 }  // Will generate a mutex still-held warning
 453
 454 //-----------------------------------------------------------------------------
 455
 456 static nsresult utf16ToUcs4(const nsAString& in, uint32_t* out,
 457                             uint32_t outBufLen, uint32_t* outLen) {
 458   uint32_t i = 0;
 459   nsAString::const_iterator start, end;
 460   in.BeginReading(start);
 461   in.EndReading(end);
 462
 463   while (start != end) {
 464     char16_t curChar;
 465
 466     curChar = *start++;
 467
 468     if (start != end && NS_IS_SURROGATE_PAIR(curChar, *start)) {
 469       out[i] = SURROGATE_TO_UCS4(curChar, *start);
 470       ++start;
 471     } else {
 472       out[i] = curChar;
 473     }
 474
 475     i++;
 476     if (i >= outBufLen) {
 477       return NS_ERROR_MALFORMED_URI;
 478     }
 479   }
 480   out[i] = (uint32_t)'\0';
 481   *outLen = i;
 482   return NS_OK;
 483 }
 484
 485 static nsresult punycode(const nsAString& in, nsACString& out) {
 486   uint32_t ucs4Buf[kMaxULabelSize + 1];
 487   uint32_t ucs4Len = 0u;
 488   nsresult rv = utf16ToUcs4(in, ucs4Buf, kMaxULabelSize, &ucs4Len);
 489   NS_ENSURE_SUCCESS(rv, rv);
 490
 491   // need maximum 20 bits to encode 16 bit Unicode character
 492   // (include null terminator)
 493   const uint32_t kEncodedBufSize = kMaxULabelSize * 20 / 8 + 1 + 1;
 494   char encodedBuf[kEncodedBufSize];
 495   punycode_uint encodedLength = kEncodedBufSize;
 496
 497   enum punycode_status status =
 498       punycode_encode(ucs4Len, ucs4Buf, nullptr, &encodedLength, encodedBuf);
 499
 500   if (punycode_success != status || encodedLength >= kEncodedBufSize) {
 501     return NS_ERROR_MALFORMED_URI;
 502   }
 503
 504   encodedBuf[encodedLength] = '\0';
 505   out.Assign(nsDependentCString(kACEPrefix) + nsDependentCString(encodedBuf));
 506
 507   return rv;
 508 }
 509
 510 // RFC 3454
 511 //
 512 // 1) Map -- For each character in the input, check if it has a mapping
 513 // and, if so, replace it with its mapping. This is described in section 3.
 514 //
 515 // 2) Normalize -- Possibly normalize the result of step 1 using Unicode
 516 // normalization. This is described in section 4.
 517 //
 518 // 3) Prohibit -- Check for any characters that are not allowed in the
 519 // output. If any are found, return an error. This is described in section
 520 // 5.
 521 //
 522 // 4) Check bidi -- Possibly check for right-to-left characters, and if any
 523 // are found, make sure that the whole string satisfies the requirements
 524 // for bidirectional strings. If the string does not satisfy the requirements
 525 // for bidirectional strings, return an error. This is described in section 6.
 526 //
 527 // 5) Check unassigned code points -- If allowUnassigned is false, check for
 528 // any unassigned Unicode points and if any are found return an error.
 529 // This is described in section 7.
 530 //
 531 nsresult nsIDNService::stringPrep(const nsAString& in, nsAString& out,
 532                                   stringPrepFlag flag) {
 533   return IDNA2008StringPrep(in, out, flag);
 534 }
 535
 536 nsresult nsIDNService::stringPrepAndACE(const nsAString& in, nsACString& out,
 537                                         stringPrepFlag flag) {
 538   nsresult rv = NS_OK;
 539
 540   out.Truncate();
 541
 542   if (IsAscii(in)) {
 543     LossyCopyUTF16toASCII(in, out);
 544     // If label begins with xn-- we still want to check its validity
 545     if (!StringBeginsWith(in, u"xn--"_ns, nsCaseInsensitiveStringComparator)) {
 546       return NS_OK;
 547     }
 548   }
 549
 550   nsAutoString strPrep;
 551   rv = stringPrep(in, strPrep, flag);
 552   if (flag == eStringPrepForDNS) {
 553     NS_ENSURE_SUCCESS(rv, rv);
 554   }
 555
 556   if (IsAscii(strPrep)) {
 557     LossyCopyUTF16toASCII(strPrep, out);
 558     return NS_OK;
 559   }
 560
 561   if (flag == eStringPrepForUI && NS_SUCCEEDED(rv) && isLabelSafe(in)) {
 562     CopyUTF16toUTF8(strPrep, out);
 563     return NS_OK;
 564   }
 565
 566   return punycode(strPrep, out);
 567 }
 568
 569 // RFC 3490
 570 // 1) Whenever dots are used as label separators, the following characters
 571 //    MUST be recognized as dots: U+002E (full stop), U+3002 (ideographic full
 572 //    stop), U+FF0E (fullwidth full stop), U+FF61 (halfwidth ideographic full
 573 //    stop).
 574
 575 void nsIDNService::normalizeFullStops(nsAString& s) {
 576   nsAString::const_iterator start, end;
 577   s.BeginReading(start);
 578   s.EndReading(end);
 579   int32_t index = 0;
 580
 581   while (start != end) {
 582     switch (*start) {
 583       case 0x3002:
 584       case 0xFF0E:
 585       case 0xFF61:
 586         s.ReplaceLiteral(index, 1, u".");
 587         break;
 588       default:
 589         break;
 590     }
 591     start++;
 592     index++;
 593   }
 594 }
 595
 596 nsresult nsIDNService::decodeACE(const nsACString& in, nsACString& out,
 597                                  stringPrepFlag flag) {
 598   bool isAce;
 599   IsACE(in, &isAce);
 600   if (!isAce) {
 601     out.Assign(in);
 602     return NS_OK;
 603   }
 604
 605   nsAutoString utf16;
 606   nsresult result = IDNA2008ToUnicode(in, utf16);
 607   NS_ENSURE_SUCCESS(result, result);
 608
 609   if (flag != eStringPrepForUI || isLabelSafe(utf16)) {
 610     CopyUTF16toUTF8(utf16, out);
 611   } else {
 612     out.Assign(in);
 613     return NS_OK;
 614   }
 615
 616   // Validation: encode back to ACE and compare the strings
 617   nsAutoCString ace;
 618   nsresult rv = UTF8toACE(out, ace, flag);
 619   NS_ENSURE_SUCCESS(rv, rv);
 620
 621   if (flag == eStringPrepForDNS &&
 622       !ace.Equals(in, nsCaseInsensitiveCStringComparator)) {
 623     return NS_ERROR_MALFORMED_URI;
 624   }
 625
 626   return NS_OK;
 627 }
 628
 629 namespace mozilla::net {
 630
 631 enum ScriptCombo : int32_t {
 632   UNSET = -1,
 633   BOPO = 0,
 634   CYRL = 1,
 635   GREK = 2,
 636   HANG = 3,
 637   HANI = 4,
 638   HIRA = 5,
 639   KATA = 6,
 640   LATN = 7,
 641   OTHR = 8,
 642   JPAN = 9,   // Latin + Han + Hiragana + Katakana
 643   CHNA = 10,  // Latin + Han + Bopomofo
 644   KORE = 11,  // Latin + Han + Hangul
 645   HNLT = 12,  // Latin + Han (could be any of the above combinations)
 646   FAIL = 13,
 647 };
 648
 649 }  // namespace mozilla::net
 650
 651 bool nsIDNService::isLabelSafe(const nsAString& label) {
 652   AutoReadLock lock(mLock);
 653
 654   if (!isOnlySafeChars(PromiseFlatString(label), mIDNBlocklist)) {
 655     return false;
 656   }
 657
 658   // We should never get here if the label is ASCII
 659   NS_ASSERTION(!IsAscii(label), "ASCII label in IDN checking");
 660   if (mRestrictionProfile == eASCIIOnlyProfile) {
 661     return false;
 662   }
 663
 664   nsAString::const_iterator current, end;
 665   label.BeginReading(current);
 666   label.EndReading(end);
 667
 668   Script lastScript = Script::INVALID;
 669   uint32_t previousChar = 0;
 670   uint32_t baseChar = 0;  // last non-diacritic seen (base char for marks)
 671   uint32_t savedNumberingSystem = 0;
 672 // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
 673 #if 0
 674   HanVariantType savedHanVariant = HVT_NotHan;
 675 #endif
 676
 677   ScriptCombo savedScript = ScriptCombo::UNSET;
 678
 679   while (current != end) {
 680     uint32_t ch = *current++;
 681
 682     if (current != end && NS_IS_SURROGATE_PAIR(ch, *current)) {
 683       ch = SURROGATE_TO_UCS4(ch, *current++);
 684     }
 685
 686     IdentifierType idType = GetIdentifierType(ch);
 687     if (idType == IDTYPE_RESTRICTED) {
 688       return false;
 689     }
 690     MOZ_ASSERT(idType == IDTYPE_ALLOWED);
 691
 692     // Check for mixed script
 693     Script script = UnicodeProperties::GetScriptCode(ch);
 694     if (script != Script::COMMON && script != Script::INHERITED &&
 695         script != lastScript) {
 696       if (illegalScriptCombo(script, savedScript)) {
 697         return false;
 698       }
 699     }
 700
 701     // U+30FC should be preceded by a Hiragana/Katakana.
 702     if (ch == 0x30fc && lastScript != Script::HIRAGANA &&
 703         lastScript != Script::KATAKANA) {
 704       return false;
 705     }
 706
 707     if (ch == 0x307 &&
 708         (previousChar == 'i' || previousChar == 'j' || previousChar == 'l')) {
 709       return false;
 710     }
 711
 712     // Check for mixed numbering systems
 713     auto genCat = GetGeneralCategory(ch);
 714     if (genCat == HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) {
 715       uint32_t zeroCharacter =
 716           ch - mozilla::intl::UnicodeProperties::GetNumericValue(ch);
 717       if (savedNumberingSystem == 0) {
 718         // If we encounter a decimal number, save the zero character from that
 719         // numbering system.
 720         savedNumberingSystem = zeroCharacter;
 721       } else if (zeroCharacter != savedNumberingSystem) {
 722         return false;
 723       }
 724     }
 725
 726     if (genCat == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) {
 727       // Check for consecutive non-spacing marks.
 728       if (previousChar != 0 && previousChar == ch) {
 729         return false;
 730       }
 731       // Check for marks whose expected script doesn't match the base script.
 732       if (lastScript != Script::INVALID) {
 733         UnicodeProperties::ScriptExtensionVector scripts;
 734         auto extResult = UnicodeProperties::GetExtensions(ch, scripts);
 735         MOZ_ASSERT(extResult.isOk());
 736         if (extResult.isErr()) {
 737           return false;
 738         }
 739
 740         int nScripts = AssertedCast<int>(scripts.length());
 741
 742         // nScripts will always be >= 1, because even for undefined characters
 743         // it will return Script::INVALID.
 744         // If the mark just has script=COMMON or INHERITED, we can't check any
 745         // more carefully, but if it has specific scriptExtension codes, then
 746         // assume those are the only valid scripts to use it with.
 747         if (nScripts > 1 || (Script(scripts[0]) != Script::COMMON &&
 748                              Script(scripts[0]) != Script::INHERITED)) {
 749           while (--nScripts >= 0) {
 750             if (Script(scripts[nScripts]) == lastScript) {
 751               break;
 752             }
 753           }
 754           if (nScripts == -1) {
 755             return false;
 756           }
 757         }
 758       }
 759       // Check for diacritics on dotless-i, which would be indistinguishable
 760       // from normal accented letter i.
 761       if (baseChar == 0x0131 &&
 762           ((ch >= 0x0300 && ch <= 0x0314) || ch == 0x031a)) {
 763         return false;
 764       }
 765     } else {
 766       baseChar = ch;
 767     }
 768
 769     if (script != Script::COMMON && script != Script::INHERITED) {
 770       lastScript = script;
 771     }
 772
 773     // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
 774 #if 0
 775
 776     // Check for both simplified-only and traditional-only Chinese characters
 777     HanVariantType hanVariant = GetHanVariant(ch);
 778     if (hanVariant == HVT_SimplifiedOnly || hanVariant == HVT_TraditionalOnly) {
 779       if (savedHanVariant == HVT_NotHan) {
 780         savedHanVariant = hanVariant;
 781       } else if (hanVariant != savedHanVariant)  {
 782         return false;
 783       }
 784     }
 785 #endif
 786
 787     previousChar = ch;
 788   }
 789   return true;
 790 }
 791
 792 // Scripts that we care about in illegalScriptCombo
 793 static inline ScriptCombo findScriptIndex(Script aScript) {
 794   switch (aScript) {
 795     case Script::BOPOMOFO:
 796       return ScriptCombo::BOPO;
 797     case Script::CYRILLIC:
 798       return ScriptCombo::CYRL;
 799     case Script::GREEK:
 800       return ScriptCombo::GREK;
 801     case Script::HANGUL:
 802       return ScriptCombo::HANG;
 803     case Script::HAN:
 804       return ScriptCombo::HANI;
 805     case Script::HIRAGANA:
 806       return ScriptCombo::HIRA;
 807     case Script::KATAKANA:
 808       return ScriptCombo::KATA;
 809     case Script::LATIN:
 810       return ScriptCombo::LATN;
 811     default:
 812       return ScriptCombo::OTHR;
 813   }
 814 }
 815
 816 static const ScriptCombo scriptComboTable[13][9] = {
 817     /* thisScript: BOPO  CYRL  GREK  HANG  HANI  HIRA  KATA  LATN  OTHR
 818      * savedScript */
 819     /* BOPO */ {BOPO, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL},
 820     /* CYRL */ {FAIL, CYRL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL},
 821     /* GREK */ {FAIL, FAIL, GREK, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL},
 822     /* HANG */ {FAIL, FAIL, FAIL, HANG, KORE, FAIL, FAIL, KORE, FAIL},
 823     /* HANI */ {CHNA, FAIL, FAIL, KORE, HANI, JPAN, JPAN, HNLT, FAIL},
 824     /* HIRA */ {FAIL, FAIL, FAIL, FAIL, JPAN, HIRA, JPAN, JPAN, FAIL},
 825     /* KATA */ {FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, KATA, JPAN, FAIL},
 826     /* LATN */ {CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, LATN, OTHR},
 827     /* OTHR */ {FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, OTHR, FAIL},
 828     /* JPAN */ {FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, JPAN, JPAN, FAIL},
 829     /* CHNA */ {CHNA, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL},
 830     /* KORE */ {FAIL, FAIL, FAIL, KORE, KORE, FAIL, FAIL, KORE, FAIL},
 831     /* HNLT */ {CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, HNLT, FAIL}};
 832
 833 bool nsIDNService::illegalScriptCombo(Script script, ScriptCombo& savedScript) {
 834   if (savedScript == ScriptCombo::UNSET) {
 835     savedScript = findScriptIndex(script);
 836     return false;
 837   }
 838
 839   savedScript = scriptComboTable[savedScript][findScriptIndex(script)];
 840   /*
 841    * Special case combinations that depend on which profile is in use
 842    * In the Highly Restrictive profile Latin is not allowed with any
 843    *  other script
 844    *
 845    * In the Moderately Restrictive profile Latin mixed with any other
 846    *  single script is allowed.
 847    */
 848   return ((savedScript == OTHR &&
 849            mRestrictionProfile == eHighlyRestrictiveProfile) ||
 850           savedScript == FAIL);
 851 }