1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 // This service reads a file of rules describing TLD-like domain names. For a
8 // complete description of the expected file format and parsing rules, see
9 // http://wiki.mozilla.org/Gecko:Effective_TLD_Service
11 #include "mozilla/ArrayUtils.h"
12 #include "mozilla/Components.h"
13 #include "mozilla/HashFunctions.h"
14 #include "mozilla/MemoryReporting.h"
15 #include "mozilla/ResultExtensions.h"
16 #include "mozilla/TextUtils.h"
17 #include "mozilla/Try.h"
19 #include "MainThreadUtils.h"
20 #include "nsContentUtils.h"
22 #include "nsEffectiveTLDService.h"
24 #include "nsIIDNService.h"
25 #include "nsIObserverService.h"
28 #include "nsNetUtil.h"
29 #include "nsServiceManagerUtils.h"
30 #include "mozilla/net/DNS.h"
32 namespace etld_dafsa
{
34 // Generated file that includes kDafsa
35 #include "etld_data.inc"
37 } // namespace etld_dafsa
39 using namespace mozilla
;
41 NS_IMPL_ISUPPORTS(nsEffectiveTLDService
, nsIEffectiveTLDService
,
42 nsIMemoryReporter
, nsIObserver
)
44 // ----------------------------------------------------------------------
46 static nsEffectiveTLDService
* gService
= nullptr;
48 nsEffectiveTLDService::nsEffectiveTLDService()
49 : mGraphLock("nsEffectiveTLDService::mGraph") {
50 mGraph
.emplace(etld_dafsa::kDafsa
);
53 nsresult
nsEffectiveTLDService::Init() {
54 MOZ_ASSERT(NS_IsMainThread());
55 nsCOMPtr
<nsIObserverService
> obs
= mozilla::services::GetObserverService();
56 obs
->AddObserver(this, "public-suffix-list-updated", false);
59 return NS_ERROR_ALREADY_INITIALIZED
;
63 mIDNService
= mozilla::components::IDN::Service(&rv
);
69 RegisterWeakMemoryReporter(this);
74 NS_IMETHODIMP
nsEffectiveTLDService::Observe(nsISupports
* aSubject
,
76 const char16_t
* aData
) {
78 * Signal sent from netwerk/dns/PublicSuffixList.sys.mjs
79 * aSubject is the nsIFile object for dafsa.bin
80 * aData is the absolute path to the dafsa.bin file (not used)
82 if (aSubject
&& (nsCRT::strcmp(aTopic
, "public-suffix-list-updated") == 0)) {
83 nsCOMPtr
<nsIFile
> mDafsaBinFile(do_QueryInterface(aSubject
));
84 NS_ENSURE_TRUE(mDafsaBinFile
, NS_ERROR_ILLEGAL_VALUE
);
86 AutoWriteLock
lock(mGraphLock
);
87 // Reset mGraph with kDafsa in case reassigning to mDafsaMap fails
89 mGraph
.emplace(etld_dafsa::kDafsa
);
94 MOZ_TRY(mDafsaMap
.init(mDafsaBinFile
));
96 size_t size
= mDafsaMap
.size();
97 const uint8_t* remoteDafsaPtr
= mDafsaMap
.get
<uint8_t>().get();
99 auto remoteDafsa
= mozilla::Span(remoteDafsaPtr
, size
);
102 mGraph
.emplace(remoteDafsa
);
107 nsEffectiveTLDService::~nsEffectiveTLDService() {
108 UnregisterWeakMemoryReporter(this);
110 // Only clear gService if Init() finished successfully.
116 nsEffectiveTLDService
* nsEffectiveTLDService::GetInstance() {
120 nsCOMPtr
<nsIEffectiveTLDService
> tldService
;
121 tldService
= mozilla::components::EffectiveTLD::Service();
127 "gService must have been initialized in nsEffectiveTLDService::Init");
131 MOZ_DEFINE_MALLOC_SIZE_OF(EffectiveTLDServiceMallocSizeOf
)
133 // The amount of heap memory measured here is tiny. It used to be bigger when
134 // nsEffectiveTLDService used a separate hash table instead of binary search.
135 // Nonetheless, we keep this code here in anticipation of bug 1083971 which will
136 // change ETLDEntries::entries to a heap-allocated array modifiable at runtime.
138 nsEffectiveTLDService::CollectReports(nsIHandleReportCallback
* aHandleReport
,
139 nsISupports
* aData
, bool aAnonymize
) {
140 MOZ_COLLECT_REPORT("explicit/network/effective-TLD-service", KIND_HEAP
,
142 SizeOfIncludingThis(EffectiveTLDServiceMallocSizeOf
),
143 "Memory used by the effective TLD service.");
148 size_t nsEffectiveTLDService::SizeOfIncludingThis(
149 mozilla::MallocSizeOf aMallocSizeOf
) {
150 size_t n
= aMallocSizeOf(this);
152 // Measurement of the following members may be added later if DMD finds it is
159 // External function for dealing with URI's correctly.
160 // Pulls out the host portion from an nsIURI, and calls through to
161 // GetPublicSuffixFromHost().
163 nsEffectiveTLDService::GetPublicSuffix(nsIURI
* aURI
,
164 nsACString
& aPublicSuffix
) {
165 NS_ENSURE_ARG_POINTER(aURI
);
168 nsresult rv
= NS_GetInnermostURIHost(aURI
, host
);
173 return GetBaseDomainInternal(host
, 0, false, aPublicSuffix
);
177 nsEffectiveTLDService::GetKnownPublicSuffix(nsIURI
* aURI
,
178 nsACString
& aPublicSuffix
) {
179 NS_ENSURE_ARG_POINTER(aURI
);
182 nsresult rv
= NS_GetInnermostURIHost(aURI
, host
);
187 return GetBaseDomainInternal(host
, 0, true, aPublicSuffix
);
190 // External function for dealing with URI's correctly.
191 // Pulls out the host portion from an nsIURI, and calls through to
192 // GetBaseDomainFromHost().
194 nsEffectiveTLDService::GetBaseDomain(nsIURI
* aURI
, uint32_t aAdditionalParts
,
195 nsACString
& aBaseDomain
) {
196 NS_ENSURE_ARG_POINTER(aURI
);
197 NS_ENSURE_TRUE(((int32_t)aAdditionalParts
) >= 0, NS_ERROR_INVALID_ARG
);
200 nsresult rv
= NS_GetInnermostURIHost(aURI
, host
);
205 return GetBaseDomainInternal(host
, aAdditionalParts
+ 1, false, aBaseDomain
);
208 // External function for dealing with URIs to get a schemeless site.
209 // Calls through to GetBaseDomain(), handling IP addresses and aliases by
210 // just returning their serialized host.
212 nsEffectiveTLDService::GetSchemelessSite(nsIURI
* aURI
, nsACString
& aSite
) {
213 NS_ENSURE_ARG_POINTER(aURI
);
215 nsresult rv
= GetBaseDomain(aURI
, 0, aSite
);
216 if (rv
== NS_ERROR_HOST_IS_IP_ADDRESS
||
217 rv
== NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS
) {
218 rv
= nsContentUtils::GetHostOrIPv6WithBrackets(aURI
, aSite
);
223 // External function for dealing with URIs to get site correctly.
224 // Calls through to GetSchemelessSite(), and serializes with the scheme and
227 nsEffectiveTLDService::GetSite(nsIURI
* aURI
, nsACString
& aSite
) {
228 NS_ENSURE_ARG_POINTER(aURI
);
230 nsAutoCString scheme
;
231 nsresult rv
= aURI
->GetScheme(scheme
);
232 NS_ENSURE_SUCCESS(rv
, rv
);
234 nsAutoCString schemeless
;
235 rv
= GetSchemelessSite(aURI
, schemeless
);
236 NS_ENSURE_SUCCESS(rv
, rv
);
238 // aURI (and thus BaseDomain) may be the string '.'. If so, fail.
239 if (schemeless
.Length() == 1 && schemeless
.Last() == '.') {
240 return NS_ERROR_INVALID_ARG
;
243 // Reject any URIs without a host that aren't file:// URIs.
244 if (schemeless
.IsEmpty() && !aURI
->SchemeIs("file")) {
245 return NS_ERROR_INVALID_ARG
;
248 aSite
.SetCapacity(scheme
.Length() + 3 + schemeless
.Length());
249 aSite
.Append(scheme
);
250 aSite
.Append("://"_ns
);
251 aSite
.Append(schemeless
);
256 // External function for dealing with a host string directly: finds the public
257 // suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal().
259 nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString
& aHostname
,
260 nsACString
& aPublicSuffix
) {
261 // Create a mutable copy of the hostname and normalize it to ACE.
262 // This will fail if the hostname includes invalid characters.
263 nsAutoCString
normHostname(aHostname
);
264 nsresult rv
= NormalizeHostname(normHostname
);
269 return GetBaseDomainInternal(normHostname
, 0, false, aPublicSuffix
);
273 nsEffectiveTLDService::GetKnownPublicSuffixFromHost(const nsACString
& aHostname
,
274 nsACString
& aPublicSuffix
) {
275 // Create a mutable copy of the hostname and normalize it to ACE.
276 // This will fail if the hostname includes invalid characters.
277 nsAutoCString
normHostname(aHostname
);
278 nsresult rv
= NormalizeHostname(normHostname
);
283 return GetBaseDomainInternal(normHostname
, 0, true, aPublicSuffix
);
286 // External function for dealing with a host string directly: finds the base
287 // domain (e.g. www.co.uk) for the given hostname and number of subdomain parts
288 // requested. See GetBaseDomainInternal().
290 nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString
& aHostname
,
291 uint32_t aAdditionalParts
,
292 nsACString
& aBaseDomain
) {
293 NS_ENSURE_TRUE(((int32_t)aAdditionalParts
) >= 0, NS_ERROR_INVALID_ARG
);
295 // Create a mutable copy of the hostname and normalize it to ACE.
296 // This will fail if the hostname includes invalid characters.
297 nsAutoCString
normHostname(aHostname
);
298 nsresult rv
= NormalizeHostname(normHostname
);
303 return GetBaseDomainInternal(normHostname
, aAdditionalParts
+ 1, false,
308 nsEffectiveTLDService::GetNextSubDomain(const nsACString
& aHostname
,
309 nsACString
& aBaseDomain
) {
310 // Create a mutable copy of the hostname and normalize it to ACE.
311 // This will fail if the hostname includes invalid characters.
312 nsAutoCString
normHostname(aHostname
);
313 nsresult rv
= NormalizeHostname(normHostname
);
314 NS_ENSURE_SUCCESS(rv
, rv
);
316 return GetBaseDomainInternal(normHostname
, -1, false, aBaseDomain
);
319 // Finds the base domain for a host, with requested number of additional parts.
320 // This will fail, generating an error, if the host is an IPv4/IPv6 address,
321 // if more subdomain parts are requested than are available, or if the hostname
322 // includes characters that are not valid in a URL. Normalization is performed
323 // on the host string and the result will be in UTF8.
324 nsresult
nsEffectiveTLDService::GetBaseDomainInternal(
325 nsCString
& aHostname
, int32_t aAdditionalParts
, bool aOnlyKnownPublicSuffix
,
326 nsACString
& aBaseDomain
) {
327 const int kExceptionRule
= 1;
328 const int kWildcardRule
= 2;
330 if (aHostname
.IsEmpty()) {
331 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS
;
334 // chomp any trailing dot, and keep track of it for later
335 bool trailingDot
= aHostname
.Last() == '.';
337 aHostname
.Truncate(aHostname
.Length() - 1);
340 // check the edge cases of the host being '.' or having a second trailing '.',
341 // since subsequent checks won't catch it.
342 if (aHostname
.IsEmpty() || aHostname
.Last() == '.') {
343 return NS_ERROR_INVALID_ARG
;
346 // Lookup in the cache if this is a normal query. This is restricted to
347 // main thread-only as the cache is not thread-safe.
348 Maybe
<TldCache::Entry
> entry
;
349 if (aAdditionalParts
== 1 && NS_IsMainThread()) {
350 auto p
= mMruTable
.Lookup(aHostname
);
352 if (NS_FAILED(p
.Data().mResult
)) {
353 return p
.Data().mResult
;
356 // There was a match, just return the cached value.
357 aBaseDomain
= p
.Data().mBaseDomain
;
359 aBaseDomain
.Append('.');
368 // Check if we're dealing with an IPv4/IPv6 hostname, and return
369 if (mozilla::net::HostIsIPLiteral(aHostname
)) {
370 // Update the MRU table if in use.
372 entry
->Set(TLDCacheEntry
{aHostname
, ""_ns
, NS_ERROR_HOST_IS_IP_ADDRESS
});
375 return NS_ERROR_HOST_IS_IP_ADDRESS
;
378 // Walk up the domain tree, most specific to least specific,
379 // looking for matches at each level. Note that a given level may
380 // have multiple attributes (e.g. IsWild() and IsNormal()).
381 const char* prevDomain
= nullptr;
382 const char* currDomain
= aHostname
.get();
383 const char* nextDot
= strchr(currDomain
, '.');
384 const char* end
= currDomain
+ aHostname
.Length();
385 // Default value of *eTLD is currDomain as set in the while loop below
386 const char* eTLD
= nullptr;
387 bool hasKnownPublicSuffix
= false;
389 // sanity check the string we're about to look up: it should not begin
390 // with a '.'; this would mean the hostname began with a '.' or had an
391 // embedded '..' sequence.
392 if (*currDomain
== '.') {
393 // Update the MRU table if in use.
395 entry
->Set(TLDCacheEntry
{aHostname
, ""_ns
, NS_ERROR_INVALID_ARG
});
398 return NS_ERROR_INVALID_ARG
;
403 AutoReadLock
lock(mGraphLock
);
404 // Perform the lookup.
405 result
= mGraph
->Lookup(Substring(currDomain
, end
));
407 if (result
!= Dafsa::kKeyNotFound
) {
408 hasKnownPublicSuffix
= true;
409 if (result
== kWildcardRule
&& prevDomain
) {
410 // wildcard rules imply an eTLD one level inferior to the match.
414 if (result
!= kExceptionRule
|| !nextDot
) {
415 // specific match, or we've hit the top domain level
419 if (result
== kExceptionRule
) {
420 // exception rules imply an eTLD one level superior to the match.
427 // we've hit the top domain level; use it by default.
432 prevDomain
= currDomain
;
433 currDomain
= nextDot
+ 1;
434 nextDot
= strchr(currDomain
, '.');
437 if (aOnlyKnownPublicSuffix
&& !hasKnownPublicSuffix
) {
438 aBaseDomain
.Truncate();
442 const char *begin
, *iter
;
443 if (aAdditionalParts
< 0) {
444 NS_ASSERTION(aAdditionalParts
== -1,
445 "aAdditionalParts can't be negative and different from -1");
447 for (iter
= aHostname
.get(); iter
!= eTLD
&& *iter
!= '.'; iter
++) {
455 aAdditionalParts
= 0;
458 // count off the number of requested domains.
459 begin
= aHostname
.get();
467 if (*(--iter
) == '.' && aAdditionalParts
-- == 0) {
475 if (aAdditionalParts
!= 0) {
476 // Update the MRU table if in use.
479 TLDCacheEntry
{aHostname
, ""_ns
, NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS
});
482 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS
;
485 aBaseDomain
= Substring(iter
, end
);
487 // Update the MRU table if in use.
489 entry
->Set(TLDCacheEntry
{aHostname
, nsCString(aBaseDomain
), NS_OK
});
492 // add on the trailing dot, if applicable
494 aBaseDomain
.Append('.');
500 // Normalizes the given hostname, component by component. ASCII/ACE
501 // components are lower-cased, and UTF-8 components are normalized per
502 // RFC 3454 and converted to ACE.
503 nsresult
nsEffectiveTLDService::NormalizeHostname(nsCString
& aHostname
) {
504 if (!IsAscii(aHostname
)) {
505 nsresult rv
= mIDNService
->ConvertUTF8toACE(aHostname
, aHostname
);
511 ToLowerCase(aHostname
);
516 nsEffectiveTLDService::HasRootDomain(const nsACString
& aInput
,
517 const nsACString
& aHost
, bool* aResult
) {
518 return net::HasRootDomain(aInput
, aHost
, aResult
);
522 nsEffectiveTLDService::HasKnownPublicSuffix(nsIURI
* aURI
, bool* aResult
) {
523 NS_ENSURE_ARG_POINTER(aURI
);
526 nsresult rv
= NS_GetInnermostURIHost(aURI
, host
);
531 return HasKnownPublicSuffixFromHost(host
, aResult
);
535 nsEffectiveTLDService::HasKnownPublicSuffixFromHost(const nsACString
& aHostname
,
537 // Create a mutable copy of the hostname and normalize it to ACE.
538 // This will fail if the hostname includes invalid characters.
539 nsCString
hostname(aHostname
);
540 nsresult rv
= NormalizeHostname(hostname
);
545 if (hostname
.IsEmpty() || hostname
== ".") {
546 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS
;
549 // Remove any trailing dot ("example.com." should have a valid suffix)
550 if (hostname
.Last() == '.') {
551 hostname
.Truncate(hostname
.Length() - 1);
554 AutoReadLock
lock(mGraphLock
);
556 // Check if we can find a suffix on the PSL. Start with the top level domain
557 // (for example "com" in "example.com"). If that isn't on the PSL, continue to
558 // add domain segments from the end (for example for "example.co.za", "za" is
559 // not on the PSL, but "co.za" is).
560 int32_t dotBeforeSuffix
= -1;
563 dotBeforeSuffix
= Substring(hostname
, 0, dotBeforeSuffix
).RFindChar('.');
565 const nsACString
& suffix
= Substring(
566 hostname
, dotBeforeSuffix
== kNotFound
? 0 : dotBeforeSuffix
+ 1);
568 if (mGraph
->Lookup(suffix
) != Dafsa::kKeyNotFound
) {
573 // To save time, only check up to 9 segments. We can be certain at that
574 // point that the PSL doesn't contain a suffix with that many segments if we
575 // didn't find a suffix earlier.
577 } while (dotBeforeSuffix
!= kNotFound
&& i
< 10);