netwerk/dns/nsIDNService.cpp

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* This Source Code Form is subject to the terms of the Mozilla Public
   3  * License, v. 2.0. If a copy of the MPL was not distributed with this
   4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   5
   6 #include "MainThreadUtils.h"
   7 #include "mozilla/ClearOnShutdown.h"
   8 #include "mozilla/Preferences.h"
   9 #include "nsIDNService.h"
  10 #include "nsReadableUtils.h"
  11 #include "nsCRT.h"
  12 #include "nsServiceManagerUtils.h"
  13 #include "nsString.h"
  14 #include "nsStringFwd.h"
  15 #include "nsUnicharUtils.h"
  16 #include "nsUnicodeProperties.h"
  17 #include "harfbuzz/hb.h"
  18 #include "punycode.h"
  19 #include "mozilla/ArrayUtils.h"
  20 #include "mozilla/Casting.h"
  21 #include "mozilla/StaticPrefs_network.h"
  22 #include "mozilla/TextUtils.h"
  23 #include "mozilla/Utf8.h"
  24 #include "mozilla/intl/FormatBuffer.h"
  25 #include "mozilla/intl/UnicodeProperties.h"
  26 #include "mozilla/intl/UnicodeScriptCodes.h"
  27
  28 #include "ICUUtils.h"
  29
  30 using namespace mozilla;
  31 using namespace mozilla::intl;
  32 using namespace mozilla::unicode;
  33 using namespace mozilla::net;
  34 using mozilla::Preferences;
  35
  36 // Currently we use the non-transitional processing option -- see
  37 // http://unicode.org/reports/tr46/
  38 // To switch to transitional processing, change the value of this flag
  39 // and kTransitionalProcessing in netwerk/test/unit/test_idna2008.js to true
  40 // (revert bug 1218179).
  41 const intl::IDNA::ProcessingType kIDNA2008_DefaultProcessingType =
  42     intl::IDNA::ProcessingType::NonTransitional;
  43
  44 //-----------------------------------------------------------------------------
  45 // According to RFC 1034 - 3.1. Name space specifications and terminology
  46 // the maximum label size would be 63. However, this is enforced at the DNS
  47 // level and none of the other browsers seem to not enforce the VerifyDnsLength
  48 // check in https://unicode.org/reports/tr46/#ToASCII
  49 // Instead, we choose a rather arbitrary but larger size.
  50 static const uint32_t kMaxULabelSize = 256;
  51 // RFC 3490 - 5.   ACE prefix
  52 static const char kACEPrefix[] = "xn--";
  53
  54 //-----------------------------------------------------------------------------
  55
  56 #define NS_NET_PREF_EXTRAALLOWED "network.IDN.extra_allowed_chars"
  57 #define NS_NET_PREF_EXTRABLOCKED "network.IDN.extra_blocked_chars"
  58 #define NS_NET_PREF_IDNRESTRICTION "network.IDN.restriction_profile"
  59
  60 static inline bool isOnlySafeChars(const nsString& in,
  61                                    const nsTArray<BlocklistRange>& aBlocklist) {
  62   if (aBlocklist.IsEmpty()) {
  63     return true;
  64   }
  65   const char16_t* cur = in.BeginReading();
  66   const char16_t* end = in.EndReading();
  67
  68   for (; cur < end; ++cur) {
  69     if (CharInBlocklist(*cur, aBlocklist)) {
  70       return false;
  71     }
  72   }
  73   return true;
  74 }
  75
  76 //-----------------------------------------------------------------------------
  77 // nsIDNService
  78 //-----------------------------------------------------------------------------
  79
  80 /* Implementation file */
  81 NS_IMPL_ISUPPORTS(nsIDNService, nsIIDNService)
  82
  83 static const char* gCallbackPrefs[] = {
  84     NS_NET_PREF_EXTRAALLOWED,
  85     NS_NET_PREF_EXTRABLOCKED,
  86     NS_NET_PREF_IDNRESTRICTION,
  87     nullptr,
  88 };
  89
  90 nsresult nsIDNService::Init() {
  91   MOZ_ASSERT(NS_IsMainThread());
  92   // Take a strong reference for our listener with the preferences service,
  93   // which we will release on shutdown.
  94   // It's OK if we remove the observer a bit early, as it just means we won't
  95   // respond to `network.IDN.extra_{allowed,blocked}_chars` and
  96   // `network.IDN.restriction_profile` pref changes during shutdown.
  97   Preferences::RegisterPrefixCallbacks(PrefChanged, gCallbackPrefs, this);
  98   RunOnShutdown(
  99       [self = RefPtr{this}]() mutable {
 100         Preferences::UnregisterPrefixCallbacks(PrefChanged, gCallbackPrefs,
 101                                                self.get());
 102         self = nullptr;
 103       },
 104       ShutdownPhase::XPCOMWillShutdown);
 105   prefsChanged(nullptr);
 106
 107   return NS_OK;
 108 }
 109
 110 void nsIDNService::prefsChanged(const char* pref) {
 111   MOZ_ASSERT(NS_IsMainThread());
 112   AutoWriteLock lock(mLock);
 113
 114   if (!pref || nsLiteralCString(NS_NET_PREF_EXTRAALLOWED).Equals(pref) ||
 115       nsLiteralCString(NS_NET_PREF_EXTRABLOCKED).Equals(pref)) {
 116     InitializeBlocklist(mIDNBlocklist);
 117   }
 118   if (!pref || nsLiteralCString(NS_NET_PREF_IDNRESTRICTION).Equals(pref)) {
 119     nsAutoCString profile;
 120     if (NS_FAILED(
 121             Preferences::GetCString(NS_NET_PREF_IDNRESTRICTION, profile))) {
 122       profile.Truncate();
 123     }
 124     if (profile.EqualsLiteral("moderate")) {
 125       mRestrictionProfile = eModeratelyRestrictiveProfile;
 126     } else if (profile.EqualsLiteral("high")) {
 127       mRestrictionProfile = eHighlyRestrictiveProfile;
 128     } else {
 129       mRestrictionProfile = eASCIIOnlyProfile;
 130     }
 131   }
 132 }
 133
 134 nsIDNService::nsIDNService() {
 135   MOZ_ASSERT(NS_IsMainThread());
 136
 137   auto createResult =
 138       mozilla::intl::IDNA::TryCreate(kIDNA2008_DefaultProcessingType);
 139   MOZ_ASSERT(createResult.isOk());
 140   mIDNA = createResult.unwrap();
 141 }
 142
 143 nsIDNService::~nsIDNService() = default;
 144
 145 nsresult nsIDNService::IDNA2008ToUnicode(const nsACString& input,
 146                                          nsAString& output) {
 147   NS_ConvertUTF8toUTF16 inputStr(input);
 148
 149   Span<const char16_t> inputSpan{inputStr};
 150   intl::nsTStringToBufferAdapter buffer(output);
 151   auto result = mIDNA->LabelToUnicode(inputSpan, buffer);
 152
 153   nsresult rv = NS_OK;
 154   if (result.isErr()) {
 155     rv = ICUUtils::ICUErrorToNsResult(result.unwrapErr());
 156     if (rv == NS_ERROR_FAILURE) {
 157       rv = NS_ERROR_MALFORMED_URI;
 158     }
 159   }
 160   NS_ENSURE_SUCCESS(rv, rv);
 161
 162   intl::IDNA::Info info = result.unwrap();
 163   if (info.HasErrors()) {
 164     rv = NS_ERROR_MALFORMED_URI;
 165   }
 166
 167   return rv;
 168 }
 169
 170 nsresult nsIDNService::IDNA2008StringPrep(const nsAString& input,
 171                                           nsAString& output,
 172                                           stringPrepFlag flag) {
 173   Span<const char16_t> inputSpan{input};
 174   intl::nsTStringToBufferAdapter buffer(output);
 175   auto result = mIDNA->LabelToUnicode(inputSpan, buffer);
 176
 177   nsresult rv = NS_OK;
 178   if (result.isErr()) {
 179     rv = ICUUtils::ICUErrorToNsResult(result.unwrapErr());
 180     if (rv == NS_ERROR_FAILURE) {
 181       rv = NS_ERROR_MALFORMED_URI;
 182     }
 183   }
 184   NS_ENSURE_SUCCESS(rv, rv);
 185
 186   intl::IDNA::Info info = result.unwrap();
 187
 188   // Output the result of nameToUnicode even if there were errors.
 189   // But in the case of invalid punycode, the uidna_labelToUnicode result
 190   // appears to get an appended U+FFFD REPLACEMENT CHARACTER, which will
 191   // confuse our subsequent processing, so we drop that.
 192   // (https://bugzilla.mozilla.org/show_bug.cgi?id=1399540#c9)
 193   if ((info.HasInvalidPunycode() || info.HasInvalidAceLabel()) &&
 194       !output.IsEmpty() && output.Last() == 0xfffd) {
 195     output.Truncate(output.Length() - 1);
 196   }
 197
 198   if (flag == eStringPrepIgnoreErrors) {
 199     return NS_OK;
 200   }
 201
 202   if (flag == eStringPrepForDNS) {
 203     // We ignore errors if the result is empty, or if the errors were just
 204     // invalid hyphens (not punycode-decoding failure or invalid chars).
 205     if (!output.IsEmpty()) {
 206       if (info.HasErrorsIgnoringInvalidHyphen()) {
 207         output.Truncate();
 208         rv = NS_ERROR_MALFORMED_URI;
 209       }
 210     }
 211   } else {
 212     if (info.HasErrors()) {
 213       rv = NS_ERROR_MALFORMED_URI;
 214     }
 215   }
 216
 217   return rv;
 218 }
 219
 220 NS_IMETHODIMP nsIDNService::ConvertUTF8toACE(const nsACString& input,
 221                                              nsACString& ace) {
 222   return UTF8toACE(input, ace, eStringPrepForDNS);
 223 }
 224
 225 nsresult nsIDNService::UTF8toACE(const nsACString& input, nsACString& ace,
 226                                  stringPrepFlag flag) {
 227   nsresult rv;
 228   NS_ConvertUTF8toUTF16 ustr(input);
 229
 230   // map ideographic period to ASCII period etc.
 231   normalizeFullStops(ustr);
 232
 233   uint32_t len, offset;
 234   len = 0;
 235   offset = 0;
 236   nsAutoCString encodedBuf;
 237
 238   nsAString::const_iterator start, end;
 239   ustr.BeginReading(start);
 240   ustr.EndReading(end);
 241   ace.Truncate();
 242
 243   // encode nodes if non ASCII
 244   while (start != end) {
 245     len++;
 246     if (*start++ == (char16_t)'.') {
 247       rv = stringPrepAndACE(Substring(ustr, offset, len - 1), encodedBuf, flag);
 248       NS_ENSURE_SUCCESS(rv, rv);
 249
 250       ace.Append(encodedBuf);
 251       ace.Append('.');
 252       offset += len;
 253       len = 0;
 254     }
 255   }
 256
 257   // encode the last node if non ASCII
 258   if (len) {
 259     rv = stringPrepAndACE(Substring(ustr, offset, len), encodedBuf, flag);
 260     NS_ENSURE_SUCCESS(rv, rv);
 261
 262     ace.Append(encodedBuf);
 263   }
 264
 265   return NS_OK;
 266 }
 267
 268 NS_IMETHODIMP nsIDNService::ConvertACEtoUTF8(const nsACString& input,
 269                                              nsACString& _retval) {
 270   return ACEtoUTF8(input, _retval, eStringPrepForDNS);
 271 }
 272
 273 nsresult nsIDNService::ACEtoUTF8(const nsACString& input, nsACString& _retval,
 274                                  stringPrepFlag flag) {
 275   // RFC 3490 - 4.2 ToUnicode
 276   // ToUnicode never fails.  If any step fails, then the original input
 277   // sequence is returned immediately in that step.
 278   //
 279   // Note that this refers to the decoding of a single label.
 280   // ACEtoUTF8 may be called with a sequence of labels separated by dots;
 281   // this test applies individually to each label.
 282
 283   uint32_t len = 0, offset = 0;
 284   nsAutoCString decodedBuf;
 285
 286   nsACString::const_iterator start, end;
 287   input.BeginReading(start);
 288   input.EndReading(end);
 289   _retval.Truncate();
 290
 291   if (input.IsEmpty()) {
 292     return NS_OK;
 293   }
 294
 295   nsAutoCString tld;
 296   nsCString::const_iterator it = end, tldEnd = end;
 297   --it;
 298   if (it != start && *it == (char16_t)'.') {
 299     // This is an FQDN (ends in .)
 300     // Skip this dot to extract the TLD
 301     tldEnd = it;
 302     --it;
 303   }
 304   // Find last . and compute TLD
 305   while (it != start) {
 306     if (*it == (char16_t)'.') {
 307       ++it;
 308       tld.Assign(Substring(it, tldEnd));
 309       break;
 310     }
 311     --it;
 312   }
 313
 314   // loop and decode nodes
 315   while (start != end) {
 316     len++;
 317     if (*start++ == '.') {
 318       nsDependentCSubstring origLabel(input, offset, len - 1);
 319       if (NS_FAILED(decodeACE(origLabel, decodedBuf, flag, tld))) {
 320         // If decoding failed, use the original input sequence
 321         // for this label.
 322         _retval.Append(origLabel);
 323       } else {
 324         _retval.Append(decodedBuf);
 325       }
 326
 327       _retval.Append('.');
 328       offset += len;
 329       len = 0;
 330     }
 331   }
 332   // decode the last node
 333   if (len) {
 334     nsDependentCSubstring origLabel(input, offset, len);
 335     if (NS_FAILED(decodeACE(origLabel, decodedBuf, flag, tld))) {
 336       _retval.Append(origLabel);
 337     } else {
 338       _retval.Append(decodedBuf);
 339     }
 340   }
 341
 342   return NS_OK;
 343 }
 344
 345 NS_IMETHODIMP nsIDNService::IsACE(const nsACString& input, bool* _retval) {
 346   // look for the ACE prefix in the input string.  it may occur
 347   // at the beginning of any segment in the domain name.  for
 348   // example: "www.xn--ENCODED.com"
 349
 350   if (!IsAscii(input)) {
 351     *_retval = false;
 352     return NS_OK;
 353   }
 354
 355   auto stringContains = [](const nsACString& haystack,
 356                            const nsACString& needle) {
 357     return std::search(haystack.BeginReading(), haystack.EndReading(),
 358                        needle.BeginReading(), needle.EndReading(),
 359                        [](unsigned char ch1, unsigned char ch2) {
 360                          return tolower(ch1) == tolower(ch2);
 361                        }) != haystack.EndReading();
 362   };
 363
 364   *_retval =
 365       StringBeginsWith(input, "xn--"_ns, nsCaseInsensitiveCStringComparator) ||
 366       (!input.IsEmpty() && input[0] != '.' &&
 367        stringContains(input, ".xn--"_ns));
 368   return NS_OK;
 369 }
 370
 371 nsresult nsIDNService::Normalize(const nsACString& input, nsACString& output) {
 372   // protect against bogus input
 373   NS_ENSURE_TRUE(IsUtf8(input), NS_ERROR_UNEXPECTED);
 374
 375   NS_ConvertUTF8toUTF16 inUTF16(input);
 376   normalizeFullStops(inUTF16);
 377
 378   // pass the domain name to stringprep label by label
 379   nsAutoString outUTF16, outLabel;
 380
 381   uint32_t len = 0, offset = 0;
 382   nsresult rv;
 383   nsAString::const_iterator start, end;
 384   inUTF16.BeginReading(start);
 385   inUTF16.EndReading(end);
 386
 387   while (start != end) {
 388     len++;
 389     if (*start++ == char16_t('.')) {
 390       rv = stringPrep(Substring(inUTF16, offset, len - 1), outLabel,
 391                       eStringPrepIgnoreErrors);
 392       NS_ENSURE_SUCCESS(rv, rv);
 393
 394       outUTF16.Append(outLabel);
 395       outUTF16.Append(char16_t('.'));
 396       offset += len;
 397       len = 0;
 398     }
 399   }
 400   if (len) {
 401     rv = stringPrep(Substring(inUTF16, offset, len), outLabel,
 402                     eStringPrepIgnoreErrors);
 403     NS_ENSURE_SUCCESS(rv, rv);
 404
 405     outUTF16.Append(outLabel);
 406   }
 407
 408   CopyUTF16toUTF8(outUTF16, output);
 409   return NS_OK;
 410 }
 411
 412 NS_IMETHODIMP nsIDNService::ConvertToDisplayIDN(const nsACString& input,
 413                                                 bool* _isASCII,
 414                                                 nsACString& _retval) {
 415   // If host is ACE, then convert to UTF-8 if the host is in the IDN whitelist.
 416   // Else, if host is already UTF-8, then make sure it is normalized per IDN.
 417
 418   nsresult rv = NS_OK;
 419
 420   // Even if the hostname is not ASCII, individual labels may still be ACE, so
 421   // test IsACE before testing IsASCII
 422   bool isACE;
 423   IsACE(input, &isACE);
 424
 425   if (IsAscii(input)) {
 426     // first, canonicalize the host to lowercase, for whitelist lookup
 427     _retval = input;
 428     ToLowerCase(_retval);
 429
 430     if (isACE && !StaticPrefs::network_IDN_show_punycode()) {
 431       // ACEtoUTF8() can't fail, but might return the original ACE string
 432       nsAutoCString temp(_retval);
 433       // Convert from ACE to UTF8 only those labels which are considered safe
 434       // for display
 435       ACEtoUTF8(temp, _retval, eStringPrepForUI);
 436       *_isASCII = IsAscii(_retval);
 437     } else {
 438       *_isASCII = true;
 439     }
 440   } else {
 441     // We have to normalize the hostname before testing against the domain
 442     // whitelist (see bug 315411), and to ensure the entire string gets
 443     // normalized.
 444     //
 445     // Normalization and the tests for safe display below, assume that the
 446     // input is Unicode, so first convert any ACE labels to UTF8
 447     if (isACE) {
 448       nsAutoCString temp;
 449       ACEtoUTF8(input, temp, eStringPrepIgnoreErrors);
 450       rv = Normalize(temp, _retval);
 451     } else {
 452       rv = Normalize(input, _retval);
 453     }
 454     if (NS_FAILED(rv)) {
 455       return rv;
 456     }
 457
 458     if (StaticPrefs::network_IDN_show_punycode() &&
 459         NS_SUCCEEDED(UTF8toACE(_retval, _retval, eStringPrepIgnoreErrors))) {
 460       *_isASCII = true;
 461       return NS_OK;
 462     }
 463
 464     // normalization could result in an ASCII-only hostname. alternatively, if
 465     // the host is converted to ACE by the normalizer, then the host may contain
 466     // unsafe characters, so leave it ACE encoded. see bug 283016, bug 301694,
 467     // and bug 309311.
 468     *_isASCII = IsAscii(_retval);
 469     if (!*_isASCII) {
 470       // UTF8toACE with eStringPrepForUI may return a domain name where
 471       // some labels are in UTF-8 and some are in ACE, depending on
 472       // whether they are considered safe for display
 473       rv = UTF8toACE(_retval, _retval, eStringPrepForUI);
 474       *_isASCII = IsAscii(_retval);
 475       return rv;
 476     }
 477   }
 478
 479   return NS_OK;
 480 }  // Will generate a mutex still-held warning
 481
 482 //-----------------------------------------------------------------------------
 483
 484 static nsresult utf16ToUcs4(const nsAString& in, uint32_t* out,
 485                             uint32_t outBufLen, uint32_t* outLen) {
 486   uint32_t i = 0;
 487   nsAString::const_iterator start, end;
 488   in.BeginReading(start);
 489   in.EndReading(end);
 490
 491   while (start != end) {
 492     char16_t curChar;
 493
 494     curChar = *start++;
 495
 496     if (start != end && NS_IS_SURROGATE_PAIR(curChar, *start)) {
 497       out[i] = SURROGATE_TO_UCS4(curChar, *start);
 498       ++start;
 499     } else {
 500       out[i] = curChar;
 501     }
 502
 503     i++;
 504     if (i >= outBufLen) {
 505       return NS_ERROR_MALFORMED_URI;
 506     }
 507   }
 508   out[i] = (uint32_t)'\0';
 509   *outLen = i;
 510   return NS_OK;
 511 }
 512
 513 static nsresult punycode(const nsAString& in, nsACString& out) {
 514   uint32_t ucs4Buf[kMaxULabelSize + 1];
 515   uint32_t ucs4Len = 0u;
 516   nsresult rv = utf16ToUcs4(in, ucs4Buf, kMaxULabelSize, &ucs4Len);
 517   NS_ENSURE_SUCCESS(rv, rv);
 518
 519   // need maximum 20 bits to encode 16 bit Unicode character
 520   // (include null terminator)
 521   const uint32_t kEncodedBufSize = kMaxULabelSize * 20 / 8 + 1 + 1;
 522   char encodedBuf[kEncodedBufSize];
 523   punycode_uint encodedLength = kEncodedBufSize;
 524
 525   enum punycode_status status =
 526       punycode_encode(ucs4Len, ucs4Buf, nullptr, &encodedLength, encodedBuf);
 527
 528   if (punycode_success != status || encodedLength >= kEncodedBufSize) {
 529     return NS_ERROR_MALFORMED_URI;
 530   }
 531
 532   encodedBuf[encodedLength] = '\0';
 533   out.Assign(nsDependentCString(kACEPrefix) + nsDependentCString(encodedBuf));
 534
 535   return rv;
 536 }
 537
 538 // RFC 3454
 539 //
 540 // 1) Map -- For each character in the input, check if it has a mapping
 541 // and, if so, replace it with its mapping. This is described in section 3.
 542 //
 543 // 2) Normalize -- Possibly normalize the result of step 1 using Unicode
 544 // normalization. This is described in section 4.
 545 //
 546 // 3) Prohibit -- Check for any characters that are not allowed in the
 547 // output. If any are found, return an error. This is described in section
 548 // 5.
 549 //
 550 // 4) Check bidi -- Possibly check for right-to-left characters, and if any
 551 // are found, make sure that the whole string satisfies the requirements
 552 // for bidirectional strings. If the string does not satisfy the requirements
 553 // for bidirectional strings, return an error. This is described in section 6.
 554 //
 555 // 5) Check unassigned code points -- If allowUnassigned is false, check for
 556 // any unassigned Unicode points and if any are found return an error.
 557 // This is described in section 7.
 558 //
 559 nsresult nsIDNService::stringPrep(const nsAString& in, nsAString& out,
 560                                   stringPrepFlag flag) {
 561   return IDNA2008StringPrep(in, out, flag);
 562 }
 563
 564 nsresult nsIDNService::stringPrepAndACE(const nsAString& in, nsACString& out,
 565                                         stringPrepFlag flag) {
 566   nsresult rv = NS_OK;
 567
 568   out.Truncate();
 569
 570   if (IsAscii(in)) {
 571     LossyCopyUTF16toASCII(in, out);
 572     // If label begins with xn-- we still want to check its validity
 573     if (!StringBeginsWith(in, u"xn--"_ns, nsCaseInsensitiveStringComparator)) {
 574       return NS_OK;
 575     }
 576   }
 577
 578   nsAutoString strPrep;
 579   rv = stringPrep(in, strPrep, flag);
 580   if (flag == eStringPrepForDNS) {
 581     NS_ENSURE_SUCCESS(rv, rv);
 582   }
 583
 584   if (IsAscii(strPrep)) {
 585     LossyCopyUTF16toASCII(strPrep, out);
 586     return NS_OK;
 587   }
 588
 589   if (flag == eStringPrepForUI && NS_SUCCEEDED(rv) && isLabelSafe(in, u""_ns)) {
 590     CopyUTF16toUTF8(strPrep, out);
 591     return NS_OK;
 592   }
 593
 594   return punycode(strPrep, out);
 595 }
 596
 597 // RFC 3490
 598 // 1) Whenever dots are used as label separators, the following characters
 599 //    MUST be recognized as dots: U+002E (full stop), U+3002 (ideographic full
 600 //    stop), U+FF0E (fullwidth full stop), U+FF61 (halfwidth ideographic full
 601 //    stop).
 602
 603 void nsIDNService::normalizeFullStops(nsAString& s) {
 604   nsAString::const_iterator start, end;
 605   s.BeginReading(start);
 606   s.EndReading(end);
 607   int32_t index = 0;
 608
 609   while (start != end) {
 610     switch (*start) {
 611       case 0x3002:
 612       case 0xFF0E:
 613       case 0xFF61:
 614         s.ReplaceLiteral(index, 1, u".");
 615         break;
 616       default:
 617         break;
 618     }
 619     start++;
 620     index++;
 621   }
 622 }
 623
 624 nsresult nsIDNService::decodeACE(const nsACString& in, nsACString& out,
 625                                  stringPrepFlag flag, const nsACString& aTLD) {
 626   bool isAce;
 627   IsACE(in, &isAce);
 628   if (!isAce) {
 629     out.Assign(in);
 630     return NS_OK;
 631   }
 632
 633   nsAutoString utf16;
 634   nsresult result = IDNA2008ToUnicode(in, utf16);
 635   NS_ENSURE_SUCCESS(result, result);
 636
 637   NS_ConvertUTF8toUTF16 tld(aTLD);
 638
 639   if (flag != eStringPrepForUI || isLabelSafe(utf16, tld)) {
 640     CopyUTF16toUTF8(utf16, out);
 641   } else {
 642     out.Assign(in);
 643     return NS_OK;
 644   }
 645
 646   // Validation: encode back to ACE and compare the strings
 647   nsAutoCString ace;
 648   nsresult rv = UTF8toACE(out, ace, flag);
 649   NS_ENSURE_SUCCESS(rv, rv);
 650
 651   if (flag == eStringPrepForDNS &&
 652       !ace.Equals(in, nsCaseInsensitiveCStringComparator)) {
 653     return NS_ERROR_MALFORMED_URI;
 654   }
 655
 656   return NS_OK;
 657 }
 658
 659 namespace mozilla::net {
 660
 661 enum ScriptCombo : int32_t {
 662   UNSET = -1,
 663   BOPO = 0,
 664   CYRL = 1,
 665   GREK = 2,
 666   HANG = 3,
 667   HANI = 4,
 668   HIRA = 5,
 669   KATA = 6,
 670   LATN = 7,
 671   OTHR = 8,
 672   JPAN = 9,   // Latin + Han + Hiragana + Katakana
 673   CHNA = 10,  // Latin + Han + Bopomofo
 674   KORE = 11,  // Latin + Han + Hangul
 675   HNLT = 12,  // Latin + Han (could be any of the above combinations)
 676   FAIL = 13,
 677 };
 678
 679 }  // namespace mozilla::net
 680
 681 bool nsIDNService::isLabelSafe(const nsAString& label, const nsAString& tld) {
 682   restrictionProfile profile{eASCIIOnlyProfile};
 683   {
 684     AutoReadLock lock(mLock);
 685
 686     if (!isOnlySafeChars(PromiseFlatString(label), mIDNBlocklist)) {
 687       return false;
 688     }
 689
 690     // We should never get here if the label is ASCII
 691     NS_ASSERTION(!IsAscii(label), "ASCII label in IDN checking");
 692     if (mRestrictionProfile == eASCIIOnlyProfile) {
 693       return false;
 694     }
 695     profile = mRestrictionProfile;
 696   }
 697
 698   nsAString::const_iterator current, end;
 699   label.BeginReading(current);
 700   label.EndReading(end);
 701
 702   Script lastScript = Script::INVALID;
 703   uint32_t previousChar = 0;
 704   uint32_t baseChar = 0;  // last non-diacritic seen (base char for marks)
 705   uint32_t savedNumberingSystem = 0;
 706 // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
 707 #if 0
 708   HanVariantType savedHanVariant = HVT_NotHan;
 709 #endif
 710
 711   ScriptCombo savedScript = ScriptCombo::UNSET;
 712
 713   while (current != end) {
 714     uint32_t ch = *current++;
 715
 716     if (current != end && NS_IS_SURROGATE_PAIR(ch, *current)) {
 717       ch = SURROGATE_TO_UCS4(ch, *current++);
 718     }
 719
 720     IdentifierType idType = GetIdentifierType(ch);
 721     if (idType == IDTYPE_RESTRICTED) {
 722       return false;
 723     }
 724     MOZ_ASSERT(idType == IDTYPE_ALLOWED);
 725
 726     // Check for mixed script
 727     Script script = UnicodeProperties::GetScriptCode(ch);
 728     if (script != Script::COMMON && script != Script::INHERITED &&
 729         script != lastScript) {
 730       if (illegalScriptCombo(profile, script, savedScript)) {
 731         return false;
 732       }
 733     }
 734
 735     // U+30FC should be preceded by a Hiragana/Katakana.
 736     if (ch == 0x30fc && lastScript != Script::HIRAGANA &&
 737         lastScript != Script::KATAKANA) {
 738       return false;
 739     }
 740
 741     Script nextScript = Script::INVALID;
 742     if (current != end) {
 743       nextScript = UnicodeProperties::GetScriptCode(*current);
 744     }
 745
 746     if (ch == 0x30FB &&
 747         (lastScript == Script::LATIN || nextScript == Script::LATIN)) {
 748       return false;
 749     }
 750
 751     if (ch == 0x307 &&
 752         (previousChar == 'i' || previousChar == 'j' || previousChar == 'l')) {
 753       return false;
 754     }
 755
 756     // U+00B7 is only allowed on Catalan domains between two l's.
 757     if (ch == 0xB7 && (!tld.EqualsLiteral("cat") || previousChar != 'l' ||
 758                        current == end || *current != 'l')) {
 759       return false;
 760     }
 761
 762     // Disallow Icelandic confusables for domains outside Icelandic and Faroese
 763     // ccTLD (.is, .fo)
 764     if ((ch == 0xFE || ch == 0xF0) && !tld.EqualsLiteral("is") &&
 765         !tld.EqualsLiteral("fo")) {
 766       return false;
 767     }
 768
 769     // Block single/double-quote-like characters.
 770     if (ch == 0x2BB || ch == 0x2BC) {
 771       return false;
 772     }
 773
 774     // Check for mixed numbering systems
 775     auto genCat = GetGeneralCategory(ch);
 776     if (genCat == HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) {
 777       uint32_t zeroCharacter =
 778           ch - mozilla::intl::UnicodeProperties::GetNumericValue(ch);
 779       if (savedNumberingSystem == 0) {
 780         // If we encounter a decimal number, save the zero character from that
 781         // numbering system.
 782         savedNumberingSystem = zeroCharacter;
 783       } else if (zeroCharacter != savedNumberingSystem) {
 784         return false;
 785       }
 786     }
 787
 788     if (genCat == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) {
 789       // Check for consecutive non-spacing marks.
 790       if (previousChar != 0 && previousChar == ch) {
 791         return false;
 792       }
 793       // Check for marks whose expected script doesn't match the base script.
 794       if (lastScript != Script::INVALID) {
 795         UnicodeProperties::ScriptExtensionVector scripts;
 796         auto extResult = UnicodeProperties::GetExtensions(ch, scripts);
 797         MOZ_ASSERT(extResult.isOk());
 798         if (extResult.isErr()) {
 799           return false;
 800         }
 801
 802         int nScripts = AssertedCast<int>(scripts.length());
 803
 804         // nScripts will always be >= 1, because even for undefined characters
 805         // it will return Script::INVALID.
 806         // If the mark just has script=COMMON or INHERITED, we can't check any
 807         // more carefully, but if it has specific scriptExtension codes, then
 808         // assume those are the only valid scripts to use it with.
 809         if (nScripts > 1 || (Script(scripts[0]) != Script::COMMON &&
 810                              Script(scripts[0]) != Script::INHERITED)) {
 811           while (--nScripts >= 0) {
 812             if (Script(scripts[nScripts]) == lastScript) {
 813               break;
 814             }
 815           }
 816           if (nScripts == -1) {
 817             return false;
 818           }
 819         }
 820       }
 821       // Check for diacritics on dotless-i, which would be indistinguishable
 822       // from normal accented letter i.
 823       if (baseChar == 0x0131 &&
 824           ((ch >= 0x0300 && ch <= 0x0314) || ch == 0x031a)) {
 825         return false;
 826       }
 827     } else {
 828       baseChar = ch;
 829     }
 830
 831     if (script != Script::COMMON && script != Script::INHERITED) {
 832       lastScript = script;
 833     }
 834
 835     // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
 836 #if 0
 837
 838     // Check for both simplified-only and traditional-only Chinese characters
 839     HanVariantType hanVariant = GetHanVariant(ch);
 840     if (hanVariant == HVT_SimplifiedOnly || hanVariant == HVT_TraditionalOnly) {
 841       if (savedHanVariant == HVT_NotHan) {
 842         savedHanVariant = hanVariant;
 843       } else if (hanVariant != savedHanVariant)  {
 844         return false;
 845       }
 846     }
 847 #endif
 848
 849     previousChar = ch;
 850   }
 851   return true;
 852 }
 853
 854 // Scripts that we care about in illegalScriptCombo
 855 static inline ScriptCombo findScriptIndex(Script aScript) {
 856   switch (aScript) {
 857     case Script::BOPOMOFO:
 858       return ScriptCombo::BOPO;
 859     case Script::CYRILLIC:
 860       return ScriptCombo::CYRL;
 861     case Script::GREEK:
 862       return ScriptCombo::GREK;
 863     case Script::HANGUL:
 864       return ScriptCombo::HANG;
 865     case Script::HAN:
 866       return ScriptCombo::HANI;
 867     case Script::HIRAGANA:
 868       return ScriptCombo::HIRA;
 869     case Script::KATAKANA:
 870       return ScriptCombo::KATA;
 871     case Script::LATIN:
 872       return ScriptCombo::LATN;
 873     default:
 874       return ScriptCombo::OTHR;
 875   }
 876 }
 877
 878 static const ScriptCombo scriptComboTable[13][9] = {
 879     /* thisScript: BOPO  CYRL  GREK  HANG  HANI  HIRA  KATA  LATN  OTHR
 880      * savedScript */
 881     /* BOPO */ {BOPO, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL},
 882     /* CYRL */ {FAIL, CYRL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL},
 883     /* GREK */ {FAIL, FAIL, GREK, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL},
 884     /* HANG */ {FAIL, FAIL, FAIL, HANG, KORE, FAIL, FAIL, KORE, FAIL},
 885     /* HANI */ {CHNA, FAIL, FAIL, KORE, HANI, JPAN, JPAN, HNLT, FAIL},
 886     /* HIRA */ {FAIL, FAIL, FAIL, FAIL, JPAN, HIRA, JPAN, JPAN, FAIL},
 887     /* KATA */ {FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, KATA, JPAN, FAIL},
 888     /* LATN */ {CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, LATN, OTHR},
 889     /* OTHR */ {FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, OTHR, FAIL},
 890     /* JPAN */ {FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, JPAN, JPAN, FAIL},
 891     /* CHNA */ {CHNA, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL},
 892     /* KORE */ {FAIL, FAIL, FAIL, KORE, KORE, FAIL, FAIL, KORE, FAIL},
 893     /* HNLT */ {CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, HNLT, FAIL}};
 894
 895 bool nsIDNService::illegalScriptCombo(restrictionProfile profile, Script script,
 896                                       ScriptCombo& savedScript) {
 897   if (savedScript == ScriptCombo::UNSET) {
 898     savedScript = findScriptIndex(script);
 899     return false;
 900   }
 901
 902   savedScript = scriptComboTable[savedScript][findScriptIndex(script)];
 903   /*
 904    * Special case combinations that depend on which profile is in use
 905    * In the Highly Restrictive profile Latin is not allowed with any
 906    *  other script
 907    *
 908    * In the Moderately Restrictive profile Latin mixed with any other
 909    *  single script is allowed.
 910    */
 911   return ((savedScript == OTHR && profile == eHighlyRestrictiveProfile) ||
 912           savedScript == FAIL);
 913 }