1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 // This service reads a file of rules describing TLD-like domain names. For a
8 // complete description of the expected file format and parsing rules, see
9 // http://wiki.mozilla.org/Gecko:Effective_TLD_Service
11 #include "mozilla/ArrayUtils.h"
12 #include "mozilla/HashFunctions.h"
13 #include "mozilla/MemoryReporting.h"
14 #include "mozilla/ResultExtensions.h"
15 #include "mozilla/TextUtils.h"
16 #include "mozilla/Try.h"
18 #include "MainThreadUtils.h"
19 #include "nsContentUtils.h"
21 #include "nsEffectiveTLDService.h"
23 #include "nsIIDNService.h"
24 #include "nsIObserverService.h"
27 #include "nsNetUtil.h"
28 #include "nsServiceManagerUtils.h"
29 #include "mozilla/net/DNS.h"
31 namespace etld_dafsa
{
33 // Generated file that includes kDafsa
34 #include "etld_data.inc"
36 } // namespace etld_dafsa
38 using namespace mozilla
;
40 NS_IMPL_ISUPPORTS(nsEffectiveTLDService
, nsIEffectiveTLDService
,
41 nsIMemoryReporter
, nsIObserver
)
43 // ----------------------------------------------------------------------
45 static nsEffectiveTLDService
* gService
= nullptr;
47 nsEffectiveTLDService::nsEffectiveTLDService()
48 : mGraphLock("nsEffectiveTLDService::mGraph") {
49 mGraph
.emplace(etld_dafsa::kDafsa
);
52 nsresult
nsEffectiveTLDService::Init() {
53 MOZ_ASSERT(NS_IsMainThread());
54 nsCOMPtr
<nsIObserverService
> obs
= mozilla::services::GetObserverService();
55 obs
->AddObserver(this, "public-suffix-list-updated", false);
58 return NS_ERROR_ALREADY_INITIALIZED
;
62 mIDNService
= do_GetService(NS_IDNSERVICE_CONTRACTID
, &rv
);
68 RegisterWeakMemoryReporter(this);
73 NS_IMETHODIMP
nsEffectiveTLDService::Observe(nsISupports
* aSubject
,
75 const char16_t
* aData
) {
77 * Signal sent from netwerk/dns/PublicSuffixList.jsm
78 * aSubject is the nsIFile object for dafsa.bin
79 * aData is the absolute path to the dafsa.bin file (not used)
81 if (aSubject
&& (nsCRT::strcmp(aTopic
, "public-suffix-list-updated") == 0)) {
82 nsCOMPtr
<nsIFile
> mDafsaBinFile(do_QueryInterface(aSubject
));
83 NS_ENSURE_TRUE(mDafsaBinFile
, NS_ERROR_ILLEGAL_VALUE
);
85 AutoWriteLock
lock(mGraphLock
);
86 // Reset mGraph with kDafsa in case reassigning to mDafsaMap fails
88 mGraph
.emplace(etld_dafsa::kDafsa
);
93 MOZ_TRY(mDafsaMap
.init(mDafsaBinFile
));
95 size_t size
= mDafsaMap
.size();
96 const uint8_t* remoteDafsaPtr
= mDafsaMap
.get
<uint8_t>().get();
98 auto remoteDafsa
= mozilla::Span(remoteDafsaPtr
, size
);
101 mGraph
.emplace(remoteDafsa
);
106 nsEffectiveTLDService::~nsEffectiveTLDService() {
107 UnregisterWeakMemoryReporter(this);
109 // Only clear gService if Init() finished successfully.
115 nsEffectiveTLDService
* nsEffectiveTLDService::GetInstance() {
119 nsCOMPtr
<nsIEffectiveTLDService
> tldService
=
120 do_GetService(NS_EFFECTIVETLDSERVICE_CONTRACTID
);
126 "gService must have been initialized in nsEffectiveTLDService::Init");
130 MOZ_DEFINE_MALLOC_SIZE_OF(EffectiveTLDServiceMallocSizeOf
)
132 // The amount of heap memory measured here is tiny. It used to be bigger when
133 // nsEffectiveTLDService used a separate hash table instead of binary search.
134 // Nonetheless, we keep this code here in anticipation of bug 1083971 which will
135 // change ETLDEntries::entries to a heap-allocated array modifiable at runtime.
137 nsEffectiveTLDService::CollectReports(nsIHandleReportCallback
* aHandleReport
,
138 nsISupports
* aData
, bool aAnonymize
) {
139 MOZ_COLLECT_REPORT("explicit/network/effective-TLD-service", KIND_HEAP
,
141 SizeOfIncludingThis(EffectiveTLDServiceMallocSizeOf
),
142 "Memory used by the effective TLD service.");
147 size_t nsEffectiveTLDService::SizeOfIncludingThis(
148 mozilla::MallocSizeOf aMallocSizeOf
) {
149 size_t n
= aMallocSizeOf(this);
151 // Measurement of the following members may be added later if DMD finds it is
158 // External function for dealing with URI's correctly.
159 // Pulls out the host portion from an nsIURI, and calls through to
160 // GetPublicSuffixFromHost().
162 nsEffectiveTLDService::GetPublicSuffix(nsIURI
* aURI
,
163 nsACString
& aPublicSuffix
) {
164 NS_ENSURE_ARG_POINTER(aURI
);
167 nsresult rv
= NS_GetInnermostURIHost(aURI
, host
);
172 return GetBaseDomainInternal(host
, 0, false, aPublicSuffix
);
176 nsEffectiveTLDService::GetKnownPublicSuffix(nsIURI
* aURI
,
177 nsACString
& aPublicSuffix
) {
178 NS_ENSURE_ARG_POINTER(aURI
);
181 nsresult rv
= NS_GetInnermostURIHost(aURI
, host
);
186 return GetBaseDomainInternal(host
, 0, true, aPublicSuffix
);
189 // External function for dealing with URI's correctly.
190 // Pulls out the host portion from an nsIURI, and calls through to
191 // GetBaseDomainFromHost().
193 nsEffectiveTLDService::GetBaseDomain(nsIURI
* aURI
, uint32_t aAdditionalParts
,
194 nsACString
& aBaseDomain
) {
195 NS_ENSURE_ARG_POINTER(aURI
);
196 NS_ENSURE_TRUE(((int32_t)aAdditionalParts
) >= 0, NS_ERROR_INVALID_ARG
);
199 nsresult rv
= NS_GetInnermostURIHost(aURI
, host
);
204 return GetBaseDomainInternal(host
, aAdditionalParts
+ 1, false, aBaseDomain
);
207 // External function for dealing with URIs to get a schemeless site.
208 // Calls through to GetBaseDomain(), handling IP addresses and aliases by
209 // just returning their serialized host.
211 nsEffectiveTLDService::GetSchemelessSite(nsIURI
* aURI
, nsACString
& aSite
) {
212 NS_ENSURE_ARG_POINTER(aURI
);
214 nsresult rv
= GetBaseDomain(aURI
, 0, aSite
);
215 if (rv
== NS_ERROR_HOST_IS_IP_ADDRESS
||
216 rv
== NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS
) {
217 rv
= nsContentUtils::GetHostOrIPv6WithBrackets(aURI
, aSite
);
222 // External function for dealing with URIs to get site correctly.
223 // Calls through to GetSchemelessSite(), and serializes with the scheme and
226 nsEffectiveTLDService::GetSite(nsIURI
* aURI
, nsACString
& aSite
) {
227 NS_ENSURE_ARG_POINTER(aURI
);
229 nsAutoCString scheme
;
230 nsresult rv
= aURI
->GetScheme(scheme
);
231 NS_ENSURE_SUCCESS(rv
, rv
);
233 nsAutoCString schemeless
;
234 rv
= GetSchemelessSite(aURI
, schemeless
);
235 NS_ENSURE_SUCCESS(rv
, rv
);
237 // aURI (and thus BaseDomain) may be the string '.'. If so, fail.
238 if (schemeless
.Length() == 1 && schemeless
.Last() == '.') {
239 return NS_ERROR_INVALID_ARG
;
242 // Reject any URIs without a host that aren't file:// URIs.
243 if (schemeless
.IsEmpty() && !aURI
->SchemeIs("file")) {
244 return NS_ERROR_INVALID_ARG
;
247 aSite
.SetCapacity(scheme
.Length() + 3 + schemeless
.Length());
248 aSite
.Append(scheme
);
249 aSite
.Append("://"_ns
);
250 aSite
.Append(schemeless
);
255 // External function for dealing with a host string directly: finds the public
256 // suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal().
258 nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString
& aHostname
,
259 nsACString
& aPublicSuffix
) {
260 // Create a mutable copy of the hostname and normalize it to ACE.
261 // This will fail if the hostname includes invalid characters.
262 nsAutoCString
normHostname(aHostname
);
263 nsresult rv
= NormalizeHostname(normHostname
);
268 return GetBaseDomainInternal(normHostname
, 0, false, aPublicSuffix
);
272 nsEffectiveTLDService::GetKnownPublicSuffixFromHost(const nsACString
& aHostname
,
273 nsACString
& aPublicSuffix
) {
274 // Create a mutable copy of the hostname and normalize it to ACE.
275 // This will fail if the hostname includes invalid characters.
276 nsAutoCString
normHostname(aHostname
);
277 nsresult rv
= NormalizeHostname(normHostname
);
282 return GetBaseDomainInternal(normHostname
, 0, true, aPublicSuffix
);
285 // External function for dealing with a host string directly: finds the base
286 // domain (e.g. www.co.uk) for the given hostname and number of subdomain parts
287 // requested. See GetBaseDomainInternal().
289 nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString
& aHostname
,
290 uint32_t aAdditionalParts
,
291 nsACString
& aBaseDomain
) {
292 NS_ENSURE_TRUE(((int32_t)aAdditionalParts
) >= 0, NS_ERROR_INVALID_ARG
);
294 // Create a mutable copy of the hostname and normalize it to ACE.
295 // This will fail if the hostname includes invalid characters.
296 nsAutoCString
normHostname(aHostname
);
297 nsresult rv
= NormalizeHostname(normHostname
);
302 return GetBaseDomainInternal(normHostname
, aAdditionalParts
+ 1, false,
307 nsEffectiveTLDService::GetNextSubDomain(const nsACString
& aHostname
,
308 nsACString
& aBaseDomain
) {
309 // Create a mutable copy of the hostname and normalize it to ACE.
310 // This will fail if the hostname includes invalid characters.
311 nsAutoCString
normHostname(aHostname
);
312 nsresult rv
= NormalizeHostname(normHostname
);
313 NS_ENSURE_SUCCESS(rv
, rv
);
315 return GetBaseDomainInternal(normHostname
, -1, false, aBaseDomain
);
318 // Finds the base domain for a host, with requested number of additional parts.
319 // This will fail, generating an error, if the host is an IPv4/IPv6 address,
320 // if more subdomain parts are requested than are available, or if the hostname
321 // includes characters that are not valid in a URL. Normalization is performed
322 // on the host string and the result will be in UTF8.
323 nsresult
nsEffectiveTLDService::GetBaseDomainInternal(
324 nsCString
& aHostname
, int32_t aAdditionalParts
, bool aOnlyKnownPublicSuffix
,
325 nsACString
& aBaseDomain
) {
326 const int kExceptionRule
= 1;
327 const int kWildcardRule
= 2;
329 if (aHostname
.IsEmpty()) {
330 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS
;
333 // chomp any trailing dot, and keep track of it for later
334 bool trailingDot
= aHostname
.Last() == '.';
336 aHostname
.Truncate(aHostname
.Length() - 1);
339 // check the edge cases of the host being '.' or having a second trailing '.',
340 // since subsequent checks won't catch it.
341 if (aHostname
.IsEmpty() || aHostname
.Last() == '.') {
342 return NS_ERROR_INVALID_ARG
;
345 // Lookup in the cache if this is a normal query. This is restricted to
346 // main thread-only as the cache is not thread-safe.
347 Maybe
<TldCache::Entry
> entry
;
348 if (aAdditionalParts
== 1 && NS_IsMainThread()) {
349 auto p
= mMruTable
.Lookup(aHostname
);
351 if (NS_FAILED(p
.Data().mResult
)) {
352 return p
.Data().mResult
;
355 // There was a match, just return the cached value.
356 aBaseDomain
= p
.Data().mBaseDomain
;
358 aBaseDomain
.Append('.');
367 // Check if we're dealing with an IPv4/IPv6 hostname, and return
368 if (mozilla::net::HostIsIPLiteral(aHostname
)) {
369 // Update the MRU table if in use.
371 entry
->Set(TLDCacheEntry
{aHostname
, ""_ns
, NS_ERROR_HOST_IS_IP_ADDRESS
});
374 return NS_ERROR_HOST_IS_IP_ADDRESS
;
377 // Walk up the domain tree, most specific to least specific,
378 // looking for matches at each level. Note that a given level may
379 // have multiple attributes (e.g. IsWild() and IsNormal()).
380 const char* prevDomain
= nullptr;
381 const char* currDomain
= aHostname
.get();
382 const char* nextDot
= strchr(currDomain
, '.');
383 const char* end
= currDomain
+ aHostname
.Length();
384 // Default value of *eTLD is currDomain as set in the while loop below
385 const char* eTLD
= nullptr;
386 bool hasKnownPublicSuffix
= false;
388 // sanity check the string we're about to look up: it should not begin
389 // with a '.'; this would mean the hostname began with a '.' or had an
390 // embedded '..' sequence.
391 if (*currDomain
== '.') {
392 // Update the MRU table if in use.
394 entry
->Set(TLDCacheEntry
{aHostname
, ""_ns
, NS_ERROR_INVALID_ARG
});
397 return NS_ERROR_INVALID_ARG
;
402 AutoReadLock
lock(mGraphLock
);
403 // Perform the lookup.
404 result
= mGraph
->Lookup(Substring(currDomain
, end
));
406 if (result
!= Dafsa::kKeyNotFound
) {
407 hasKnownPublicSuffix
= true;
408 if (result
== kWildcardRule
&& prevDomain
) {
409 // wildcard rules imply an eTLD one level inferior to the match.
413 if (result
!= kExceptionRule
|| !nextDot
) {
414 // specific match, or we've hit the top domain level
418 if (result
== kExceptionRule
) {
419 // exception rules imply an eTLD one level superior to the match.
426 // we've hit the top domain level; use it by default.
431 prevDomain
= currDomain
;
432 currDomain
= nextDot
+ 1;
433 nextDot
= strchr(currDomain
, '.');
436 if (aOnlyKnownPublicSuffix
&& !hasKnownPublicSuffix
) {
437 aBaseDomain
.Truncate();
441 const char *begin
, *iter
;
442 if (aAdditionalParts
< 0) {
443 NS_ASSERTION(aAdditionalParts
== -1,
444 "aAdditionalParts can't be negative and different from -1");
446 for (iter
= aHostname
.get(); iter
!= eTLD
&& *iter
!= '.'; iter
++) {
454 aAdditionalParts
= 0;
457 // count off the number of requested domains.
458 begin
= aHostname
.get();
466 if (*(--iter
) == '.' && aAdditionalParts
-- == 0) {
474 if (aAdditionalParts
!= 0) {
475 // Update the MRU table if in use.
478 TLDCacheEntry
{aHostname
, ""_ns
, NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS
});
481 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS
;
484 aBaseDomain
= Substring(iter
, end
);
486 // Update the MRU table if in use.
488 entry
->Set(TLDCacheEntry
{aHostname
, nsCString(aBaseDomain
), NS_OK
});
491 // add on the trailing dot, if applicable
493 aBaseDomain
.Append('.');
499 // Normalizes the given hostname, component by component. ASCII/ACE
500 // components are lower-cased, and UTF-8 components are normalized per
501 // RFC 3454 and converted to ACE.
502 nsresult
nsEffectiveTLDService::NormalizeHostname(nsCString
& aHostname
) {
503 if (!IsAscii(aHostname
)) {
504 nsresult rv
= mIDNService
->ConvertUTF8toACE(aHostname
, aHostname
);
510 ToLowerCase(aHostname
);
515 nsEffectiveTLDService::HasRootDomain(const nsACString
& aInput
,
516 const nsACString
& aHost
, bool* aResult
) {
517 return net::HasRootDomain(aInput
, aHost
, aResult
);