1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 // This service reads a file of rules describing TLD-like domain names. For a
8 // complete description of the expected file format and parsing rules, see
9 // http://wiki.mozilla.org/Gecko:Effective_TLD_Service
11 #include "mozilla/ArrayUtils.h"
12 #include "mozilla/HashFunctions.h"
13 #include "mozilla/MemoryReporting.h"
14 #include "mozilla/ResultExtensions.h"
15 #include "mozilla/TextUtils.h"
17 #include "MainThreadUtils.h"
18 #include "nsContentUtils.h"
20 #include "nsEffectiveTLDService.h"
22 #include "nsIIDNService.h"
23 #include "nsIObserverService.h"
26 #include "nsNetUtil.h"
27 #include "nsServiceManagerUtils.h"
28 #include "mozilla/net/DNS.h"
30 namespace etld_dafsa
{
32 // Generated file that includes kDafsa
33 #include "etld_data.inc"
35 } // namespace etld_dafsa
37 using namespace mozilla
;
39 NS_IMPL_ISUPPORTS(nsEffectiveTLDService
, nsIEffectiveTLDService
,
40 nsIMemoryReporter
, nsIObserver
)
42 // ----------------------------------------------------------------------
44 static nsEffectiveTLDService
* gService
= nullptr;
46 nsEffectiveTLDService::nsEffectiveTLDService()
47 : mIDNService(), mGraphLock("nsEffectiveTLDService::mGraph") {
48 mGraph
.emplace(etld_dafsa::kDafsa
);
51 nsresult
nsEffectiveTLDService::Init() {
52 MOZ_ASSERT(NS_IsMainThread());
53 nsCOMPtr
<nsIObserverService
> obs
= mozilla::services::GetObserverService();
54 obs
->AddObserver(this, "public-suffix-list-updated", false);
57 return NS_ERROR_ALREADY_INITIALIZED
;
61 mIDNService
= do_GetService(NS_IDNSERVICE_CONTRACTID
, &rv
);
67 RegisterWeakMemoryReporter(this);
72 NS_IMETHODIMP
nsEffectiveTLDService::Observe(nsISupports
* aSubject
,
74 const char16_t
* aData
) {
76 * Signal sent from netwerk/dns/PublicSuffixList.jsm
77 * aSubject is the nsIFile object for dafsa.bin
78 * aData is the absolute path to the dafsa.bin file (not used)
80 if (aSubject
&& (nsCRT::strcmp(aTopic
, "public-suffix-list-updated") == 0)) {
81 nsCOMPtr
<nsIFile
> mDafsaBinFile(do_QueryInterface(aSubject
));
82 NS_ENSURE_TRUE(mDafsaBinFile
, NS_ERROR_ILLEGAL_VALUE
);
84 AutoWriteLock
lock(mGraphLock
);
85 // Reset mGraph with kDafsa in case reassigning to mDafsaMap fails
87 mGraph
.emplace(etld_dafsa::kDafsa
);
92 MOZ_TRY(mDafsaMap
.init(mDafsaBinFile
));
94 size_t size
= mDafsaMap
.size();
95 const uint8_t* remoteDafsaPtr
= mDafsaMap
.get
<uint8_t>().get();
97 auto remoteDafsa
= mozilla::Span(remoteDafsaPtr
, size
);
100 mGraph
.emplace(remoteDafsa
);
105 nsEffectiveTLDService::~nsEffectiveTLDService() {
106 UnregisterWeakMemoryReporter(this);
108 // Only clear gService if Init() finished successfully.
114 nsEffectiveTLDService
* nsEffectiveTLDService::GetInstance() {
118 nsCOMPtr
<nsIEffectiveTLDService
> tldService
=
119 do_GetService(NS_EFFECTIVETLDSERVICE_CONTRACTID
);
125 "gService must have been initialized in nsEffectiveTLDService::Init");
129 MOZ_DEFINE_MALLOC_SIZE_OF(EffectiveTLDServiceMallocSizeOf
)
131 // The amount of heap memory measured here is tiny. It used to be bigger when
132 // nsEffectiveTLDService used a separate hash table instead of binary search.
133 // Nonetheless, we keep this code here in anticipation of bug 1083971 which will
134 // change ETLDEntries::entries to a heap-allocated array modifiable at runtime.
136 nsEffectiveTLDService::CollectReports(nsIHandleReportCallback
* aHandleReport
,
137 nsISupports
* aData
, bool aAnonymize
) {
138 MOZ_COLLECT_REPORT("explicit/network/effective-TLD-service", KIND_HEAP
,
140 SizeOfIncludingThis(EffectiveTLDServiceMallocSizeOf
),
141 "Memory used by the effective TLD service.");
146 size_t nsEffectiveTLDService::SizeOfIncludingThis(
147 mozilla::MallocSizeOf aMallocSizeOf
) {
148 size_t n
= aMallocSizeOf(this);
150 // Measurement of the following members may be added later if DMD finds it is
157 // External function for dealing with URI's correctly.
158 // Pulls out the host portion from an nsIURI, and calls through to
159 // GetPublicSuffixFromHost().
161 nsEffectiveTLDService::GetPublicSuffix(nsIURI
* aURI
,
162 nsACString
& aPublicSuffix
) {
163 NS_ENSURE_ARG_POINTER(aURI
);
166 nsresult rv
= NS_GetInnermostURIHost(aURI
, host
);
171 return GetBaseDomainInternal(host
, 0, false, aPublicSuffix
);
175 nsEffectiveTLDService::GetKnownPublicSuffix(nsIURI
* aURI
,
176 nsACString
& aPublicSuffix
) {
177 NS_ENSURE_ARG_POINTER(aURI
);
180 nsresult rv
= NS_GetInnermostURIHost(aURI
, host
);
185 return GetBaseDomainInternal(host
, 0, true, aPublicSuffix
);
188 // External function for dealing with URI's correctly.
189 // Pulls out the host portion from an nsIURI, and calls through to
190 // GetBaseDomainFromHost().
192 nsEffectiveTLDService::GetBaseDomain(nsIURI
* aURI
, uint32_t aAdditionalParts
,
193 nsACString
& aBaseDomain
) {
194 NS_ENSURE_ARG_POINTER(aURI
);
195 NS_ENSURE_TRUE(((int32_t)aAdditionalParts
) >= 0, NS_ERROR_INVALID_ARG
);
198 nsresult rv
= NS_GetInnermostURIHost(aURI
, host
);
203 return GetBaseDomainInternal(host
, aAdditionalParts
+ 1, false, aBaseDomain
);
206 // External function for dealing with URIs to get a schemeless site.
207 // Calls through to GetBaseDomain(), handling IP addresses and aliases by
208 // just returning their serialized host.
210 nsEffectiveTLDService::GetSchemelessSite(nsIURI
* aURI
, nsACString
& aSite
) {
211 NS_ENSURE_ARG_POINTER(aURI
);
213 nsresult rv
= GetBaseDomain(aURI
, 0, aSite
);
214 if (rv
== NS_ERROR_HOST_IS_IP_ADDRESS
||
215 rv
== NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS
) {
216 rv
= nsContentUtils::GetHostOrIPv6WithBrackets(aURI
, aSite
);
221 // External function for dealing with URIs to get site correctly.
222 // Calls through to GetSchemelessSite(), and serializes with the scheme and
225 nsEffectiveTLDService::GetSite(nsIURI
* aURI
, nsACString
& aSite
) {
226 NS_ENSURE_ARG_POINTER(aURI
);
228 nsAutoCString scheme
;
229 nsresult rv
= aURI
->GetScheme(scheme
);
230 NS_ENSURE_SUCCESS(rv
, rv
);
232 nsAutoCString schemeless
;
233 rv
= GetSchemelessSite(aURI
, schemeless
);
234 NS_ENSURE_SUCCESS(rv
, rv
);
236 // aURI (and thus BaseDomain) may be the string '.'. If so, fail.
237 if (schemeless
.Length() == 1 && schemeless
.Last() == '.') {
238 return NS_ERROR_INVALID_ARG
;
241 // Reject any URIs without a host that aren't file:// URIs.
242 if (schemeless
.IsEmpty() && !aURI
->SchemeIs("file")) {
243 return NS_ERROR_INVALID_ARG
;
246 aSite
.SetCapacity(scheme
.Length() + 3 + schemeless
.Length());
247 aSite
.Append(scheme
);
248 aSite
.Append("://"_ns
);
249 aSite
.Append(schemeless
);
254 // External function for dealing with a host string directly: finds the public
255 // suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal().
257 nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString
& aHostname
,
258 nsACString
& aPublicSuffix
) {
259 // Create a mutable copy of the hostname and normalize it to ACE.
260 // This will fail if the hostname includes invalid characters.
261 nsAutoCString
normHostname(aHostname
);
262 nsresult rv
= NormalizeHostname(normHostname
);
267 return GetBaseDomainInternal(normHostname
, 0, false, aPublicSuffix
);
271 nsEffectiveTLDService::GetKnownPublicSuffixFromHost(const nsACString
& aHostname
,
272 nsACString
& aPublicSuffix
) {
273 // Create a mutable copy of the hostname and normalize it to ACE.
274 // This will fail if the hostname includes invalid characters.
275 nsAutoCString
normHostname(aHostname
);
276 nsresult rv
= NormalizeHostname(normHostname
);
281 return GetBaseDomainInternal(normHostname
, 0, true, aPublicSuffix
);
284 // External function for dealing with a host string directly: finds the base
285 // domain (e.g. www.co.uk) for the given hostname and number of subdomain parts
286 // requested. See GetBaseDomainInternal().
288 nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString
& aHostname
,
289 uint32_t aAdditionalParts
,
290 nsACString
& aBaseDomain
) {
291 NS_ENSURE_TRUE(((int32_t)aAdditionalParts
) >= 0, NS_ERROR_INVALID_ARG
);
293 // Create a mutable copy of the hostname and normalize it to ACE.
294 // This will fail if the hostname includes invalid characters.
295 nsAutoCString
normHostname(aHostname
);
296 nsresult rv
= NormalizeHostname(normHostname
);
301 return GetBaseDomainInternal(normHostname
, aAdditionalParts
+ 1, false,
306 nsEffectiveTLDService::GetNextSubDomain(const nsACString
& aHostname
,
307 nsACString
& aBaseDomain
) {
308 // Create a mutable copy of the hostname and normalize it to ACE.
309 // This will fail if the hostname includes invalid characters.
310 nsAutoCString
normHostname(aHostname
);
311 nsresult rv
= NormalizeHostname(normHostname
);
312 NS_ENSURE_SUCCESS(rv
, rv
);
314 return GetBaseDomainInternal(normHostname
, -1, false, aBaseDomain
);
317 // Finds the base domain for a host, with requested number of additional parts.
318 // This will fail, generating an error, if the host is an IPv4/IPv6 address,
319 // if more subdomain parts are requested than are available, or if the hostname
320 // includes characters that are not valid in a URL. Normalization is performed
321 // on the host string and the result will be in UTF8.
322 nsresult
nsEffectiveTLDService::GetBaseDomainInternal(
323 nsCString
& aHostname
, int32_t aAdditionalParts
, bool aOnlyKnownPublicSuffix
,
324 nsACString
& aBaseDomain
) {
325 const int kExceptionRule
= 1;
326 const int kWildcardRule
= 2;
328 if (aHostname
.IsEmpty()) {
329 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS
;
332 // chomp any trailing dot, and keep track of it for later
333 bool trailingDot
= aHostname
.Last() == '.';
335 aHostname
.Truncate(aHostname
.Length() - 1);
338 // check the edge cases of the host being '.' or having a second trailing '.',
339 // since subsequent checks won't catch it.
340 if (aHostname
.IsEmpty() || aHostname
.Last() == '.') {
341 return NS_ERROR_INVALID_ARG
;
344 // Lookup in the cache if this is a normal query. This is restricted to
345 // main thread-only as the cache is not thread-safe.
346 Maybe
<TldCache::Entry
> entry
;
347 if (aAdditionalParts
== 1 && NS_IsMainThread()) {
348 auto p
= mMruTable
.Lookup(aHostname
);
350 if (NS_FAILED(p
.Data().mResult
)) {
351 return p
.Data().mResult
;
354 // There was a match, just return the cached value.
355 aBaseDomain
= p
.Data().mBaseDomain
;
357 aBaseDomain
.Append('.');
366 // Check if we're dealing with an IPv4/IPv6 hostname, and return
367 if (mozilla::net::HostIsIPLiteral(aHostname
)) {
368 // Update the MRU table if in use.
370 entry
->Set(TLDCacheEntry
{aHostname
, ""_ns
, NS_ERROR_HOST_IS_IP_ADDRESS
});
373 return NS_ERROR_HOST_IS_IP_ADDRESS
;
376 // Walk up the domain tree, most specific to least specific,
377 // looking for matches at each level. Note that a given level may
378 // have multiple attributes (e.g. IsWild() and IsNormal()).
379 const char* prevDomain
= nullptr;
380 const char* currDomain
= aHostname
.get();
381 const char* nextDot
= strchr(currDomain
, '.');
382 const char* end
= currDomain
+ aHostname
.Length();
383 // Default value of *eTLD is currDomain as set in the while loop below
384 const char* eTLD
= nullptr;
385 bool hasKnownPublicSuffix
= false;
387 // sanity check the string we're about to look up: it should not begin
388 // with a '.'; this would mean the hostname began with a '.' or had an
389 // embedded '..' sequence.
390 if (*currDomain
== '.') {
391 // Update the MRU table if in use.
393 entry
->Set(TLDCacheEntry
{aHostname
, ""_ns
, NS_ERROR_INVALID_ARG
});
396 return NS_ERROR_INVALID_ARG
;
401 AutoReadLock
lock(mGraphLock
);
402 // Perform the lookup.
403 result
= mGraph
->Lookup(Substring(currDomain
, end
));
405 if (result
!= Dafsa::kKeyNotFound
) {
406 hasKnownPublicSuffix
= true;
407 if (result
== kWildcardRule
&& prevDomain
) {
408 // wildcard rules imply an eTLD one level inferior to the match.
412 if (result
!= kExceptionRule
|| !nextDot
) {
413 // specific match, or we've hit the top domain level
417 if (result
== kExceptionRule
) {
418 // exception rules imply an eTLD one level superior to the match.
425 // we've hit the top domain level; use it by default.
430 prevDomain
= currDomain
;
431 currDomain
= nextDot
+ 1;
432 nextDot
= strchr(currDomain
, '.');
435 if (aOnlyKnownPublicSuffix
&& !hasKnownPublicSuffix
) {
436 aBaseDomain
.Truncate();
440 const char *begin
, *iter
;
441 if (aAdditionalParts
< 0) {
442 NS_ASSERTION(aAdditionalParts
== -1,
443 "aAdditionalParts can't be negative and different from -1");
445 for (iter
= aHostname
.get(); iter
!= eTLD
&& *iter
!= '.'; iter
++) {
453 aAdditionalParts
= 0;
456 // count off the number of requested domains.
457 begin
= aHostname
.get();
465 if (*(--iter
) == '.' && aAdditionalParts
-- == 0) {
473 if (aAdditionalParts
!= 0) {
474 // Update the MRU table if in use.
477 TLDCacheEntry
{aHostname
, ""_ns
, NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS
});
480 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS
;
483 aBaseDomain
= Substring(iter
, end
);
485 // Update the MRU table if in use.
487 entry
->Set(TLDCacheEntry
{aHostname
, nsCString(aBaseDomain
), NS_OK
});
490 // add on the trailing dot, if applicable
492 aBaseDomain
.Append('.');
498 // Normalizes the given hostname, component by component. ASCII/ACE
499 // components are lower-cased, and UTF-8 components are normalized per
500 // RFC 3454 and converted to ACE.
501 nsresult
nsEffectiveTLDService::NormalizeHostname(nsCString
& aHostname
) {
502 if (!IsAscii(aHostname
)) {
503 nsresult rv
= mIDNService
->ConvertUTF8toACE(aHostname
, aHostname
);
509 ToLowerCase(aHostname
);
514 nsEffectiveTLDService::HasRootDomain(const nsACString
& aInput
,
515 const nsACString
& aHost
, bool* aResult
) {
516 return net::HasRootDomain(aInput
, aHost
, aResult
);