Bug 1897246 - Implement "Add to Home screen" menu functionality. r=android-reviewers,gl
[gecko.git] / netwerk / dns / nsEffectiveTLDService.cpp
blob1821678f2b2b54dfb1ebc910cf52139d7e4060ad
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 // This service reads a file of rules describing TLD-like domain names. For a
8 // complete description of the expected file format and parsing rules, see
9 // http://wiki.mozilla.org/Gecko:Effective_TLD_Service
11 #include "mozilla/ArrayUtils.h"
12 #include "mozilla/Components.h"
13 #include "mozilla/HashFunctions.h"
14 #include "mozilla/MemoryReporting.h"
15 #include "mozilla/ResultExtensions.h"
16 #include "mozilla/TextUtils.h"
17 #include "mozilla/Try.h"
19 #include "MainThreadUtils.h"
20 #include "nsContentUtils.h"
21 #include "nsCRT.h"
22 #include "nsEffectiveTLDService.h"
23 #include "nsIFile.h"
24 #include "nsIIDNService.h"
25 #include "nsIObserverService.h"
26 #include "nsIURI.h"
27 #include "nsNetCID.h"
28 #include "nsNetUtil.h"
29 #include "nsServiceManagerUtils.h"
30 #include "mozilla/net/DNS.h"
32 namespace etld_dafsa {
34 // Generated file that includes kDafsa
35 #include "etld_data.inc"
37 } // namespace etld_dafsa
39 using namespace mozilla;
41 NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService,
42 nsIMemoryReporter, nsIObserver)
44 // ----------------------------------------------------------------------
46 static nsEffectiveTLDService* gService = nullptr;
48 nsEffectiveTLDService::nsEffectiveTLDService()
49 : mGraphLock("nsEffectiveTLDService::mGraph") {
50 mGraph.emplace(etld_dafsa::kDafsa);
53 nsresult nsEffectiveTLDService::Init() {
54 MOZ_ASSERT(NS_IsMainThread());
55 nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService();
56 obs->AddObserver(this, "public-suffix-list-updated", false);
58 if (gService) {
59 return NS_ERROR_ALREADY_INITIALIZED;
62 nsresult rv;
63 mIDNService = mozilla::components::IDN::Service(&rv);
64 if (NS_FAILED(rv)) {
65 return rv;
68 gService = this;
69 RegisterWeakMemoryReporter(this);
71 return NS_OK;
74 NS_IMETHODIMP nsEffectiveTLDService::Observe(nsISupports* aSubject,
75 const char* aTopic,
76 const char16_t* aData) {
77 /**
78 * Signal sent from netwerk/dns/PublicSuffixList.sys.mjs
79 * aSubject is the nsIFile object for dafsa.bin
80 * aData is the absolute path to the dafsa.bin file (not used)
82 if (aSubject && (nsCRT::strcmp(aTopic, "public-suffix-list-updated") == 0)) {
83 nsCOMPtr<nsIFile> mDafsaBinFile(do_QueryInterface(aSubject));
84 NS_ENSURE_TRUE(mDafsaBinFile, NS_ERROR_ILLEGAL_VALUE);
86 AutoWriteLock lock(mGraphLock);
87 // Reset mGraph with kDafsa in case reassigning to mDafsaMap fails
88 mGraph.reset();
89 mGraph.emplace(etld_dafsa::kDafsa);
91 mDafsaMap.reset();
92 mMruTable.Clear();
94 MOZ_TRY(mDafsaMap.init(mDafsaBinFile));
96 size_t size = mDafsaMap.size();
97 const uint8_t* remoteDafsaPtr = mDafsaMap.get<uint8_t>().get();
99 auto remoteDafsa = mozilla::Span(remoteDafsaPtr, size);
101 mGraph.reset();
102 mGraph.emplace(remoteDafsa);
104 return NS_OK;
107 nsEffectiveTLDService::~nsEffectiveTLDService() {
108 UnregisterWeakMemoryReporter(this);
109 if (mIDNService) {
110 // Only clear gService if Init() finished successfully.
111 gService = nullptr;
115 // static
116 nsEffectiveTLDService* nsEffectiveTLDService::GetInstance() {
117 if (gService) {
118 return gService;
120 nsCOMPtr<nsIEffectiveTLDService> tldService;
121 tldService = mozilla::components::EffectiveTLD::Service();
122 if (!tldService) {
123 return nullptr;
125 MOZ_ASSERT(
126 gService,
127 "gService must have been initialized in nsEffectiveTLDService::Init");
128 return gService;
131 MOZ_DEFINE_MALLOC_SIZE_OF(EffectiveTLDServiceMallocSizeOf)
133 // The amount of heap memory measured here is tiny. It used to be bigger when
134 // nsEffectiveTLDService used a separate hash table instead of binary search.
135 // Nonetheless, we keep this code here in anticipation of bug 1083971 which will
136 // change ETLDEntries::entries to a heap-allocated array modifiable at runtime.
137 NS_IMETHODIMP
138 nsEffectiveTLDService::CollectReports(nsIHandleReportCallback* aHandleReport,
139 nsISupports* aData, bool aAnonymize) {
140 MOZ_COLLECT_REPORT("explicit/network/effective-TLD-service", KIND_HEAP,
141 UNITS_BYTES,
142 SizeOfIncludingThis(EffectiveTLDServiceMallocSizeOf),
143 "Memory used by the effective TLD service.");
145 return NS_OK;
148 size_t nsEffectiveTLDService::SizeOfIncludingThis(
149 mozilla::MallocSizeOf aMallocSizeOf) {
150 size_t n = aMallocSizeOf(this);
152 // Measurement of the following members may be added later if DMD finds it is
153 // worthwhile:
154 // - mIDNService
156 return n;
159 // External function for dealing with URI's correctly.
160 // Pulls out the host portion from an nsIURI, and calls through to
161 // GetPublicSuffixFromHost().
162 NS_IMETHODIMP
163 nsEffectiveTLDService::GetPublicSuffix(nsIURI* aURI,
164 nsACString& aPublicSuffix) {
165 NS_ENSURE_ARG_POINTER(aURI);
167 nsAutoCString host;
168 nsresult rv = NS_GetInnermostURIHost(aURI, host);
169 if (NS_FAILED(rv)) {
170 return rv;
173 return GetBaseDomainInternal(host, 0, false, aPublicSuffix);
176 NS_IMETHODIMP
177 nsEffectiveTLDService::GetKnownPublicSuffix(nsIURI* aURI,
178 nsACString& aPublicSuffix) {
179 NS_ENSURE_ARG_POINTER(aURI);
181 nsAutoCString host;
182 nsresult rv = NS_GetInnermostURIHost(aURI, host);
183 if (NS_FAILED(rv)) {
184 return rv;
187 return GetBaseDomainInternal(host, 0, true, aPublicSuffix);
190 // External function for dealing with URI's correctly.
191 // Pulls out the host portion from an nsIURI, and calls through to
192 // GetBaseDomainFromHost().
193 NS_IMETHODIMP
194 nsEffectiveTLDService::GetBaseDomain(nsIURI* aURI, uint32_t aAdditionalParts,
195 nsACString& aBaseDomain) {
196 NS_ENSURE_ARG_POINTER(aURI);
197 NS_ENSURE_TRUE(((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG);
199 nsAutoCString host;
200 nsresult rv = NS_GetInnermostURIHost(aURI, host);
201 if (NS_FAILED(rv)) {
202 return rv;
205 return GetBaseDomainInternal(host, aAdditionalParts + 1, false, aBaseDomain);
208 // External function for dealing with URIs to get a schemeless site.
209 // Calls through to GetBaseDomain(), handling IP addresses and aliases by
210 // just returning their serialized host.
211 NS_IMETHODIMP
212 nsEffectiveTLDService::GetSchemelessSite(nsIURI* aURI, nsACString& aSite) {
213 NS_ENSURE_ARG_POINTER(aURI);
215 nsresult rv = GetBaseDomain(aURI, 0, aSite);
216 if (rv == NS_ERROR_HOST_IS_IP_ADDRESS ||
217 rv == NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS) {
218 rv = nsContentUtils::GetHostOrIPv6WithBrackets(aURI, aSite);
220 return rv;
223 // External function for dealing with URIs to get site correctly.
224 // Calls through to GetSchemelessSite(), and serializes with the scheme and
225 // "://" prepended.
226 NS_IMETHODIMP
227 nsEffectiveTLDService::GetSite(nsIURI* aURI, nsACString& aSite) {
228 NS_ENSURE_ARG_POINTER(aURI);
230 nsAutoCString scheme;
231 nsresult rv = aURI->GetScheme(scheme);
232 NS_ENSURE_SUCCESS(rv, rv);
234 nsAutoCString schemeless;
235 rv = GetSchemelessSite(aURI, schemeless);
236 NS_ENSURE_SUCCESS(rv, rv);
238 // aURI (and thus BaseDomain) may be the string '.'. If so, fail.
239 if (schemeless.Length() == 1 && schemeless.Last() == '.') {
240 return NS_ERROR_INVALID_ARG;
243 // Reject any URIs without a host that aren't file:// URIs.
244 if (schemeless.IsEmpty() && !aURI->SchemeIs("file")) {
245 return NS_ERROR_INVALID_ARG;
248 aSite.SetCapacity(scheme.Length() + 3 + schemeless.Length());
249 aSite.Append(scheme);
250 aSite.Append("://"_ns);
251 aSite.Append(schemeless);
253 return NS_OK;
256 // External function for dealing with a host string directly: finds the public
257 // suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal().
258 NS_IMETHODIMP
259 nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString& aHostname,
260 nsACString& aPublicSuffix) {
261 // Create a mutable copy of the hostname and normalize it to ACE.
262 // This will fail if the hostname includes invalid characters.
263 nsAutoCString normHostname(aHostname);
264 nsresult rv = NormalizeHostname(normHostname);
265 if (NS_FAILED(rv)) {
266 return rv;
269 return GetBaseDomainInternal(normHostname, 0, false, aPublicSuffix);
272 NS_IMETHODIMP
273 nsEffectiveTLDService::GetKnownPublicSuffixFromHost(const nsACString& aHostname,
274 nsACString& aPublicSuffix) {
275 // Create a mutable copy of the hostname and normalize it to ACE.
276 // This will fail if the hostname includes invalid characters.
277 nsAutoCString normHostname(aHostname);
278 nsresult rv = NormalizeHostname(normHostname);
279 if (NS_FAILED(rv)) {
280 return rv;
283 return GetBaseDomainInternal(normHostname, 0, true, aPublicSuffix);
286 // External function for dealing with a host string directly: finds the base
287 // domain (e.g. www.co.uk) for the given hostname and number of subdomain parts
288 // requested. See GetBaseDomainInternal().
289 NS_IMETHODIMP
290 nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString& aHostname,
291 uint32_t aAdditionalParts,
292 nsACString& aBaseDomain) {
293 NS_ENSURE_TRUE(((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG);
295 // Create a mutable copy of the hostname and normalize it to ACE.
296 // This will fail if the hostname includes invalid characters.
297 nsAutoCString normHostname(aHostname);
298 nsresult rv = NormalizeHostname(normHostname);
299 if (NS_FAILED(rv)) {
300 return rv;
303 return GetBaseDomainInternal(normHostname, aAdditionalParts + 1, false,
304 aBaseDomain);
307 NS_IMETHODIMP
308 nsEffectiveTLDService::GetNextSubDomain(const nsACString& aHostname,
309 nsACString& aBaseDomain) {
310 // Create a mutable copy of the hostname and normalize it to ACE.
311 // This will fail if the hostname includes invalid characters.
312 nsAutoCString normHostname(aHostname);
313 nsresult rv = NormalizeHostname(normHostname);
314 NS_ENSURE_SUCCESS(rv, rv);
316 return GetBaseDomainInternal(normHostname, -1, false, aBaseDomain);
319 // Finds the base domain for a host, with requested number of additional parts.
320 // This will fail, generating an error, if the host is an IPv4/IPv6 address,
321 // if more subdomain parts are requested than are available, or if the hostname
322 // includes characters that are not valid in a URL. Normalization is performed
323 // on the host string and the result will be in UTF8.
324 nsresult nsEffectiveTLDService::GetBaseDomainInternal(
325 nsCString& aHostname, int32_t aAdditionalParts, bool aOnlyKnownPublicSuffix,
326 nsACString& aBaseDomain) {
327 const int kExceptionRule = 1;
328 const int kWildcardRule = 2;
330 if (aHostname.IsEmpty()) {
331 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
334 // chomp any trailing dot, and keep track of it for later
335 bool trailingDot = aHostname.Last() == '.';
336 if (trailingDot) {
337 aHostname.Truncate(aHostname.Length() - 1);
340 // check the edge cases of the host being '.' or having a second trailing '.',
341 // since subsequent checks won't catch it.
342 if (aHostname.IsEmpty() || aHostname.Last() == '.') {
343 return NS_ERROR_INVALID_ARG;
346 // Lookup in the cache if this is a normal query. This is restricted to
347 // main thread-only as the cache is not thread-safe.
348 Maybe<TldCache::Entry> entry;
349 if (aAdditionalParts == 1 && NS_IsMainThread()) {
350 auto p = mMruTable.Lookup(aHostname);
351 if (p) {
352 if (NS_FAILED(p.Data().mResult)) {
353 return p.Data().mResult;
356 // There was a match, just return the cached value.
357 aBaseDomain = p.Data().mBaseDomain;
358 if (trailingDot) {
359 aBaseDomain.Append('.');
362 return NS_OK;
365 entry = Some(p);
368 // Check if we're dealing with an IPv4/IPv6 hostname, and return
369 if (mozilla::net::HostIsIPLiteral(aHostname)) {
370 // Update the MRU table if in use.
371 if (entry) {
372 entry->Set(TLDCacheEntry{aHostname, ""_ns, NS_ERROR_HOST_IS_IP_ADDRESS});
375 return NS_ERROR_HOST_IS_IP_ADDRESS;
378 // Walk up the domain tree, most specific to least specific,
379 // looking for matches at each level. Note that a given level may
380 // have multiple attributes (e.g. IsWild() and IsNormal()).
381 const char* prevDomain = nullptr;
382 const char* currDomain = aHostname.get();
383 const char* nextDot = strchr(currDomain, '.');
384 const char* end = currDomain + aHostname.Length();
385 // Default value of *eTLD is currDomain as set in the while loop below
386 const char* eTLD = nullptr;
387 bool hasKnownPublicSuffix = false;
388 while (true) {
389 // sanity check the string we're about to look up: it should not begin
390 // with a '.'; this would mean the hostname began with a '.' or had an
391 // embedded '..' sequence.
392 if (*currDomain == '.') {
393 // Update the MRU table if in use.
394 if (entry) {
395 entry->Set(TLDCacheEntry{aHostname, ""_ns, NS_ERROR_INVALID_ARG});
398 return NS_ERROR_INVALID_ARG;
401 int result;
403 AutoReadLock lock(mGraphLock);
404 // Perform the lookup.
405 result = mGraph->Lookup(Substring(currDomain, end));
407 if (result != Dafsa::kKeyNotFound) {
408 hasKnownPublicSuffix = true;
409 if (result == kWildcardRule && prevDomain) {
410 // wildcard rules imply an eTLD one level inferior to the match.
411 eTLD = prevDomain;
412 break;
414 if (result != kExceptionRule || !nextDot) {
415 // specific match, or we've hit the top domain level
416 eTLD = currDomain;
417 break;
419 if (result == kExceptionRule) {
420 // exception rules imply an eTLD one level superior to the match.
421 eTLD = nextDot + 1;
422 break;
426 if (!nextDot) {
427 // we've hit the top domain level; use it by default.
428 eTLD = currDomain;
429 break;
432 prevDomain = currDomain;
433 currDomain = nextDot + 1;
434 nextDot = strchr(currDomain, '.');
437 if (aOnlyKnownPublicSuffix && !hasKnownPublicSuffix) {
438 aBaseDomain.Truncate();
439 return NS_OK;
442 const char *begin, *iter;
443 if (aAdditionalParts < 0) {
444 NS_ASSERTION(aAdditionalParts == -1,
445 "aAdditionalParts can't be negative and different from -1");
447 for (iter = aHostname.get(); iter != eTLD && *iter != '.'; iter++) {
451 if (iter != eTLD) {
452 iter++;
454 if (iter != eTLD) {
455 aAdditionalParts = 0;
457 } else {
458 // count off the number of requested domains.
459 begin = aHostname.get();
460 iter = eTLD;
462 while (true) {
463 if (iter == begin) {
464 break;
467 if (*(--iter) == '.' && aAdditionalParts-- == 0) {
468 ++iter;
469 ++aAdditionalParts;
470 break;
475 if (aAdditionalParts != 0) {
476 // Update the MRU table if in use.
477 if (entry) {
478 entry->Set(
479 TLDCacheEntry{aHostname, ""_ns, NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS});
482 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
485 aBaseDomain = Substring(iter, end);
487 // Update the MRU table if in use.
488 if (entry) {
489 entry->Set(TLDCacheEntry{aHostname, nsCString(aBaseDomain), NS_OK});
492 // add on the trailing dot, if applicable
493 if (trailingDot) {
494 aBaseDomain.Append('.');
497 return NS_OK;
500 // Normalizes the given hostname, component by component. ASCII/ACE
501 // components are lower-cased, and UTF-8 components are normalized per
502 // RFC 3454 and converted to ACE.
503 nsresult nsEffectiveTLDService::NormalizeHostname(nsCString& aHostname) {
504 if (!IsAscii(aHostname)) {
505 nsresult rv = mIDNService->ConvertUTF8toACE(aHostname, aHostname);
506 if (NS_FAILED(rv)) {
507 return rv;
511 ToLowerCase(aHostname);
512 return NS_OK;
515 NS_IMETHODIMP
516 nsEffectiveTLDService::HasRootDomain(const nsACString& aInput,
517 const nsACString& aHost, bool* aResult) {
518 return net::HasRootDomain(aInput, aHost, aResult);
521 NS_IMETHODIMP
522 nsEffectiveTLDService::HasKnownPublicSuffix(nsIURI* aURI, bool* aResult) {
523 NS_ENSURE_ARG_POINTER(aURI);
525 nsAutoCString host;
526 nsresult rv = NS_GetInnermostURIHost(aURI, host);
527 if (NS_FAILED(rv)) {
528 return rv;
531 return HasKnownPublicSuffixFromHost(host, aResult);
534 NS_IMETHODIMP
535 nsEffectiveTLDService::HasKnownPublicSuffixFromHost(const nsACString& aHostname,
536 bool* aResult) {
537 // Create a mutable copy of the hostname and normalize it to ACE.
538 // This will fail if the hostname includes invalid characters.
539 nsCString hostname(aHostname);
540 nsresult rv = NormalizeHostname(hostname);
541 if (NS_FAILED(rv)) {
542 return rv;
545 if (hostname.IsEmpty() || hostname == ".") {
546 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
549 // Remove any trailing dot ("example.com." should have a valid suffix)
550 if (hostname.Last() == '.') {
551 hostname.Truncate(hostname.Length() - 1);
554 AutoReadLock lock(mGraphLock);
556 // Check if we can find a suffix on the PSL. Start with the top level domain
557 // (for example "com" in "example.com"). If that isn't on the PSL, continue to
558 // add domain segments from the end (for example for "example.co.za", "za" is
559 // not on the PSL, but "co.za" is).
560 int32_t dotBeforeSuffix = -1;
561 int8_t i = 0;
562 do {
563 dotBeforeSuffix = Substring(hostname, 0, dotBeforeSuffix).RFindChar('.');
565 const nsACString& suffix = Substring(
566 hostname, dotBeforeSuffix == kNotFound ? 0 : dotBeforeSuffix + 1);
568 if (mGraph->Lookup(suffix) != Dafsa::kKeyNotFound) {
569 *aResult = true;
570 return NS_OK;
573 // To save time, only check up to 9 segments. We can be certain at that
574 // point that the PSL doesn't contain a suffix with that many segments if we
575 // didn't find a suffix earlier.
576 i++;
577 } while (dotBeforeSuffix != kNotFound && i < 10);
579 *aResult = false;
580 return NS_OK;