Bug 1866777 - Disable test_race_cache_with_network.js on windows opt for frequent...
[gecko.git] / netwerk / dns / nsEffectiveTLDService.cpp
blobfafbc296d5f3f45f5d5aece5676de978fb3eca41
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 // This service reads a file of rules describing TLD-like domain names. For a
8 // complete description of the expected file format and parsing rules, see
9 // http://wiki.mozilla.org/Gecko:Effective_TLD_Service
11 #include "mozilla/ArrayUtils.h"
12 #include "mozilla/HashFunctions.h"
13 #include "mozilla/MemoryReporting.h"
14 #include "mozilla/ResultExtensions.h"
15 #include "mozilla/TextUtils.h"
16 #include "mozilla/Try.h"
18 #include "MainThreadUtils.h"
19 #include "nsContentUtils.h"
20 #include "nsCRT.h"
21 #include "nsEffectiveTLDService.h"
22 #include "nsIFile.h"
23 #include "nsIIDNService.h"
24 #include "nsIObserverService.h"
25 #include "nsIURI.h"
26 #include "nsNetCID.h"
27 #include "nsNetUtil.h"
28 #include "nsServiceManagerUtils.h"
29 #include "mozilla/net/DNS.h"
31 namespace etld_dafsa {
33 // Generated file that includes kDafsa
34 #include "etld_data.inc"
36 } // namespace etld_dafsa
38 using namespace mozilla;
40 NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService,
41 nsIMemoryReporter, nsIObserver)
43 // ----------------------------------------------------------------------
45 static nsEffectiveTLDService* gService = nullptr;
47 nsEffectiveTLDService::nsEffectiveTLDService()
48 : mGraphLock("nsEffectiveTLDService::mGraph") {
49 mGraph.emplace(etld_dafsa::kDafsa);
52 nsresult nsEffectiveTLDService::Init() {
53 MOZ_ASSERT(NS_IsMainThread());
54 nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService();
55 obs->AddObserver(this, "public-suffix-list-updated", false);
57 if (gService) {
58 return NS_ERROR_ALREADY_INITIALIZED;
61 nsresult rv;
62 mIDNService = do_GetService(NS_IDNSERVICE_CONTRACTID, &rv);
63 if (NS_FAILED(rv)) {
64 return rv;
67 gService = this;
68 RegisterWeakMemoryReporter(this);
70 return NS_OK;
73 NS_IMETHODIMP nsEffectiveTLDService::Observe(nsISupports* aSubject,
74 const char* aTopic,
75 const char16_t* aData) {
76 /**
77 * Signal sent from netwerk/dns/PublicSuffixList.jsm
78 * aSubject is the nsIFile object for dafsa.bin
79 * aData is the absolute path to the dafsa.bin file (not used)
81 if (aSubject && (nsCRT::strcmp(aTopic, "public-suffix-list-updated") == 0)) {
82 nsCOMPtr<nsIFile> mDafsaBinFile(do_QueryInterface(aSubject));
83 NS_ENSURE_TRUE(mDafsaBinFile, NS_ERROR_ILLEGAL_VALUE);
85 AutoWriteLock lock(mGraphLock);
86 // Reset mGraph with kDafsa in case reassigning to mDafsaMap fails
87 mGraph.reset();
88 mGraph.emplace(etld_dafsa::kDafsa);
90 mDafsaMap.reset();
91 mMruTable.Clear();
93 MOZ_TRY(mDafsaMap.init(mDafsaBinFile));
95 size_t size = mDafsaMap.size();
96 const uint8_t* remoteDafsaPtr = mDafsaMap.get<uint8_t>().get();
98 auto remoteDafsa = mozilla::Span(remoteDafsaPtr, size);
100 mGraph.reset();
101 mGraph.emplace(remoteDafsa);
103 return NS_OK;
106 nsEffectiveTLDService::~nsEffectiveTLDService() {
107 UnregisterWeakMemoryReporter(this);
108 if (mIDNService) {
109 // Only clear gService if Init() finished successfully.
110 gService = nullptr;
114 // static
115 nsEffectiveTLDService* nsEffectiveTLDService::GetInstance() {
116 if (gService) {
117 return gService;
119 nsCOMPtr<nsIEffectiveTLDService> tldService =
120 do_GetService(NS_EFFECTIVETLDSERVICE_CONTRACTID);
121 if (!tldService) {
122 return nullptr;
124 MOZ_ASSERT(
125 gService,
126 "gService must have been initialized in nsEffectiveTLDService::Init");
127 return gService;
130 MOZ_DEFINE_MALLOC_SIZE_OF(EffectiveTLDServiceMallocSizeOf)
132 // The amount of heap memory measured here is tiny. It used to be bigger when
133 // nsEffectiveTLDService used a separate hash table instead of binary search.
134 // Nonetheless, we keep this code here in anticipation of bug 1083971 which will
135 // change ETLDEntries::entries to a heap-allocated array modifiable at runtime.
136 NS_IMETHODIMP
137 nsEffectiveTLDService::CollectReports(nsIHandleReportCallback* aHandleReport,
138 nsISupports* aData, bool aAnonymize) {
139 MOZ_COLLECT_REPORT("explicit/network/effective-TLD-service", KIND_HEAP,
140 UNITS_BYTES,
141 SizeOfIncludingThis(EffectiveTLDServiceMallocSizeOf),
142 "Memory used by the effective TLD service.");
144 return NS_OK;
147 size_t nsEffectiveTLDService::SizeOfIncludingThis(
148 mozilla::MallocSizeOf aMallocSizeOf) {
149 size_t n = aMallocSizeOf(this);
151 // Measurement of the following members may be added later if DMD finds it is
152 // worthwhile:
153 // - mIDNService
155 return n;
158 // External function for dealing with URI's correctly.
159 // Pulls out the host portion from an nsIURI, and calls through to
160 // GetPublicSuffixFromHost().
161 NS_IMETHODIMP
162 nsEffectiveTLDService::GetPublicSuffix(nsIURI* aURI,
163 nsACString& aPublicSuffix) {
164 NS_ENSURE_ARG_POINTER(aURI);
166 nsAutoCString host;
167 nsresult rv = NS_GetInnermostURIHost(aURI, host);
168 if (NS_FAILED(rv)) {
169 return rv;
172 return GetBaseDomainInternal(host, 0, false, aPublicSuffix);
175 NS_IMETHODIMP
176 nsEffectiveTLDService::GetKnownPublicSuffix(nsIURI* aURI,
177 nsACString& aPublicSuffix) {
178 NS_ENSURE_ARG_POINTER(aURI);
180 nsAutoCString host;
181 nsresult rv = NS_GetInnermostURIHost(aURI, host);
182 if (NS_FAILED(rv)) {
183 return rv;
186 return GetBaseDomainInternal(host, 0, true, aPublicSuffix);
189 // External function for dealing with URI's correctly.
190 // Pulls out the host portion from an nsIURI, and calls through to
191 // GetBaseDomainFromHost().
192 NS_IMETHODIMP
193 nsEffectiveTLDService::GetBaseDomain(nsIURI* aURI, uint32_t aAdditionalParts,
194 nsACString& aBaseDomain) {
195 NS_ENSURE_ARG_POINTER(aURI);
196 NS_ENSURE_TRUE(((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG);
198 nsAutoCString host;
199 nsresult rv = NS_GetInnermostURIHost(aURI, host);
200 if (NS_FAILED(rv)) {
201 return rv;
204 return GetBaseDomainInternal(host, aAdditionalParts + 1, false, aBaseDomain);
207 // External function for dealing with URIs to get a schemeless site.
208 // Calls through to GetBaseDomain(), handling IP addresses and aliases by
209 // just returning their serialized host.
210 NS_IMETHODIMP
211 nsEffectiveTLDService::GetSchemelessSite(nsIURI* aURI, nsACString& aSite) {
212 NS_ENSURE_ARG_POINTER(aURI);
214 nsresult rv = GetBaseDomain(aURI, 0, aSite);
215 if (rv == NS_ERROR_HOST_IS_IP_ADDRESS ||
216 rv == NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS) {
217 rv = nsContentUtils::GetHostOrIPv6WithBrackets(aURI, aSite);
219 return rv;
222 // External function for dealing with URIs to get site correctly.
223 // Calls through to GetSchemelessSite(), and serializes with the scheme and
224 // "://" prepended.
225 NS_IMETHODIMP
226 nsEffectiveTLDService::GetSite(nsIURI* aURI, nsACString& aSite) {
227 NS_ENSURE_ARG_POINTER(aURI);
229 nsAutoCString scheme;
230 nsresult rv = aURI->GetScheme(scheme);
231 NS_ENSURE_SUCCESS(rv, rv);
233 nsAutoCString schemeless;
234 rv = GetSchemelessSite(aURI, schemeless);
235 NS_ENSURE_SUCCESS(rv, rv);
237 // aURI (and thus BaseDomain) may be the string '.'. If so, fail.
238 if (schemeless.Length() == 1 && schemeless.Last() == '.') {
239 return NS_ERROR_INVALID_ARG;
242 // Reject any URIs without a host that aren't file:// URIs.
243 if (schemeless.IsEmpty() && !aURI->SchemeIs("file")) {
244 return NS_ERROR_INVALID_ARG;
247 aSite.SetCapacity(scheme.Length() + 3 + schemeless.Length());
248 aSite.Append(scheme);
249 aSite.Append("://"_ns);
250 aSite.Append(schemeless);
252 return NS_OK;
255 // External function for dealing with a host string directly: finds the public
256 // suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal().
257 NS_IMETHODIMP
258 nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString& aHostname,
259 nsACString& aPublicSuffix) {
260 // Create a mutable copy of the hostname and normalize it to ACE.
261 // This will fail if the hostname includes invalid characters.
262 nsAutoCString normHostname(aHostname);
263 nsresult rv = NormalizeHostname(normHostname);
264 if (NS_FAILED(rv)) {
265 return rv;
268 return GetBaseDomainInternal(normHostname, 0, false, aPublicSuffix);
271 NS_IMETHODIMP
272 nsEffectiveTLDService::GetKnownPublicSuffixFromHost(const nsACString& aHostname,
273 nsACString& aPublicSuffix) {
274 // Create a mutable copy of the hostname and normalize it to ACE.
275 // This will fail if the hostname includes invalid characters.
276 nsAutoCString normHostname(aHostname);
277 nsresult rv = NormalizeHostname(normHostname);
278 if (NS_FAILED(rv)) {
279 return rv;
282 return GetBaseDomainInternal(normHostname, 0, true, aPublicSuffix);
285 // External function for dealing with a host string directly: finds the base
286 // domain (e.g. www.co.uk) for the given hostname and number of subdomain parts
287 // requested. See GetBaseDomainInternal().
288 NS_IMETHODIMP
289 nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString& aHostname,
290 uint32_t aAdditionalParts,
291 nsACString& aBaseDomain) {
292 NS_ENSURE_TRUE(((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG);
294 // Create a mutable copy of the hostname and normalize it to ACE.
295 // This will fail if the hostname includes invalid characters.
296 nsAutoCString normHostname(aHostname);
297 nsresult rv = NormalizeHostname(normHostname);
298 if (NS_FAILED(rv)) {
299 return rv;
302 return GetBaseDomainInternal(normHostname, aAdditionalParts + 1, false,
303 aBaseDomain);
306 NS_IMETHODIMP
307 nsEffectiveTLDService::GetNextSubDomain(const nsACString& aHostname,
308 nsACString& aBaseDomain) {
309 // Create a mutable copy of the hostname and normalize it to ACE.
310 // This will fail if the hostname includes invalid characters.
311 nsAutoCString normHostname(aHostname);
312 nsresult rv = NormalizeHostname(normHostname);
313 NS_ENSURE_SUCCESS(rv, rv);
315 return GetBaseDomainInternal(normHostname, -1, false, aBaseDomain);
318 // Finds the base domain for a host, with requested number of additional parts.
319 // This will fail, generating an error, if the host is an IPv4/IPv6 address,
320 // if more subdomain parts are requested than are available, or if the hostname
321 // includes characters that are not valid in a URL. Normalization is performed
322 // on the host string and the result will be in UTF8.
323 nsresult nsEffectiveTLDService::GetBaseDomainInternal(
324 nsCString& aHostname, int32_t aAdditionalParts, bool aOnlyKnownPublicSuffix,
325 nsACString& aBaseDomain) {
326 const int kExceptionRule = 1;
327 const int kWildcardRule = 2;
329 if (aHostname.IsEmpty()) {
330 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
333 // chomp any trailing dot, and keep track of it for later
334 bool trailingDot = aHostname.Last() == '.';
335 if (trailingDot) {
336 aHostname.Truncate(aHostname.Length() - 1);
339 // check the edge cases of the host being '.' or having a second trailing '.',
340 // since subsequent checks won't catch it.
341 if (aHostname.IsEmpty() || aHostname.Last() == '.') {
342 return NS_ERROR_INVALID_ARG;
345 // Lookup in the cache if this is a normal query. This is restricted to
346 // main thread-only as the cache is not thread-safe.
347 Maybe<TldCache::Entry> entry;
348 if (aAdditionalParts == 1 && NS_IsMainThread()) {
349 auto p = mMruTable.Lookup(aHostname);
350 if (p) {
351 if (NS_FAILED(p.Data().mResult)) {
352 return p.Data().mResult;
355 // There was a match, just return the cached value.
356 aBaseDomain = p.Data().mBaseDomain;
357 if (trailingDot) {
358 aBaseDomain.Append('.');
361 return NS_OK;
364 entry = Some(p);
367 // Check if we're dealing with an IPv4/IPv6 hostname, and return
368 if (mozilla::net::HostIsIPLiteral(aHostname)) {
369 // Update the MRU table if in use.
370 if (entry) {
371 entry->Set(TLDCacheEntry{aHostname, ""_ns, NS_ERROR_HOST_IS_IP_ADDRESS});
374 return NS_ERROR_HOST_IS_IP_ADDRESS;
377 // Walk up the domain tree, most specific to least specific,
378 // looking for matches at each level. Note that a given level may
379 // have multiple attributes (e.g. IsWild() and IsNormal()).
380 const char* prevDomain = nullptr;
381 const char* currDomain = aHostname.get();
382 const char* nextDot = strchr(currDomain, '.');
383 const char* end = currDomain + aHostname.Length();
384 // Default value of *eTLD is currDomain as set in the while loop below
385 const char* eTLD = nullptr;
386 bool hasKnownPublicSuffix = false;
387 while (true) {
388 // sanity check the string we're about to look up: it should not begin
389 // with a '.'; this would mean the hostname began with a '.' or had an
390 // embedded '..' sequence.
391 if (*currDomain == '.') {
392 // Update the MRU table if in use.
393 if (entry) {
394 entry->Set(TLDCacheEntry{aHostname, ""_ns, NS_ERROR_INVALID_ARG});
397 return NS_ERROR_INVALID_ARG;
400 int result;
402 AutoReadLock lock(mGraphLock);
403 // Perform the lookup.
404 result = mGraph->Lookup(Substring(currDomain, end));
406 if (result != Dafsa::kKeyNotFound) {
407 hasKnownPublicSuffix = true;
408 if (result == kWildcardRule && prevDomain) {
409 // wildcard rules imply an eTLD one level inferior to the match.
410 eTLD = prevDomain;
411 break;
413 if (result != kExceptionRule || !nextDot) {
414 // specific match, or we've hit the top domain level
415 eTLD = currDomain;
416 break;
418 if (result == kExceptionRule) {
419 // exception rules imply an eTLD one level superior to the match.
420 eTLD = nextDot + 1;
421 break;
425 if (!nextDot) {
426 // we've hit the top domain level; use it by default.
427 eTLD = currDomain;
428 break;
431 prevDomain = currDomain;
432 currDomain = nextDot + 1;
433 nextDot = strchr(currDomain, '.');
436 if (aOnlyKnownPublicSuffix && !hasKnownPublicSuffix) {
437 aBaseDomain.Truncate();
438 return NS_OK;
441 const char *begin, *iter;
442 if (aAdditionalParts < 0) {
443 NS_ASSERTION(aAdditionalParts == -1,
444 "aAdditionalParts can't be negative and different from -1");
446 for (iter = aHostname.get(); iter != eTLD && *iter != '.'; iter++) {
450 if (iter != eTLD) {
451 iter++;
453 if (iter != eTLD) {
454 aAdditionalParts = 0;
456 } else {
457 // count off the number of requested domains.
458 begin = aHostname.get();
459 iter = eTLD;
461 while (true) {
462 if (iter == begin) {
463 break;
466 if (*(--iter) == '.' && aAdditionalParts-- == 0) {
467 ++iter;
468 ++aAdditionalParts;
469 break;
474 if (aAdditionalParts != 0) {
475 // Update the MRU table if in use.
476 if (entry) {
477 entry->Set(
478 TLDCacheEntry{aHostname, ""_ns, NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS});
481 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
484 aBaseDomain = Substring(iter, end);
486 // Update the MRU table if in use.
487 if (entry) {
488 entry->Set(TLDCacheEntry{aHostname, nsCString(aBaseDomain), NS_OK});
491 // add on the trailing dot, if applicable
492 if (trailingDot) {
493 aBaseDomain.Append('.');
496 return NS_OK;
499 // Normalizes the given hostname, component by component. ASCII/ACE
500 // components are lower-cased, and UTF-8 components are normalized per
501 // RFC 3454 and converted to ACE.
502 nsresult nsEffectiveTLDService::NormalizeHostname(nsCString& aHostname) {
503 if (!IsAscii(aHostname)) {
504 nsresult rv = mIDNService->ConvertUTF8toACE(aHostname, aHostname);
505 if (NS_FAILED(rv)) {
506 return rv;
510 ToLowerCase(aHostname);
511 return NS_OK;
514 NS_IMETHODIMP
515 nsEffectiveTLDService::HasRootDomain(const nsACString& aInput,
516 const nsACString& aHost, bool* aResult) {
517 return net::HasRootDomain(aInput, aHost, aResult);