1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 // This service reads a file of rules describing TLD-like domain names. For a
8 // complete description of the expected file format and parsing rules, see
9 // http://wiki.mozilla.org/Gecko:Effective_TLD_Service
11 #include "mozilla/ArrayUtils.h"
12 #include "mozilla/MemoryReporting.h"
14 #include "nsEffectiveTLDService.h"
15 #include "nsIIDNService.h"
16 #include "nsNetUtil.h"
19 using namespace mozilla
;
21 NS_IMPL_ISUPPORTS(nsEffectiveTLDService
, nsIEffectiveTLDService
,
24 // ----------------------------------------------------------------------
26 #define ETLD_STR_NUM_1(line) str##line
27 #define ETLD_STR_NUM(line) ETLD_STR_NUM_1(line)
28 #define ETLD_ENTRY_OFFSET(name) offsetof(struct etld_string_list, ETLD_STR_NUM(__LINE__))
30 const ETLDEntry
nsDomainEntry::entries
[] = {
31 #define ETLD_ENTRY(name, ex, wild) { ETLD_ENTRY_OFFSET(name), ex, wild },
32 #include "etld_data.inc"
36 const union nsDomainEntry::etld_strings
nsDomainEntry::strings
= {
38 #define ETLD_ENTRY(name, ex, wild) name,
39 #include "etld_data.inc"
44 // Dummy function to statically ensure that our indices don't overflow
45 // the storage provided for them.
47 nsDomainEntry::FuncForStaticAsserts(void)
49 #define ETLD_ENTRY(name, ex, wild) \
50 static_assert(ETLD_ENTRY_OFFSET(name) < (1 << ETLD_ENTRY_N_INDEX_BITS), \
51 "invalid strtab index");
52 #include "etld_data.inc"
56 #undef ETLD_ENTRY_OFFSET
60 // ----------------------------------------------------------------------
62 static nsEffectiveTLDService
*gService
= nullptr;
64 nsEffectiveTLDService::nsEffectiveTLDService()
65 : mHash(ArrayLength(nsDomainEntry::entries
))
70 nsEffectiveTLDService::Init()
72 const ETLDEntry
*entries
= nsDomainEntry::entries
;
75 mIDNService
= do_GetService(NS_IDNSERVICE_CONTRACTID
, &rv
);
76 if (NS_FAILED(rv
)) return rv
;
78 // Initialize eTLD hash from static array
79 for (uint32_t i
= 0; i
< ArrayLength(nsDomainEntry::entries
); i
++) {
80 const char *domain
= nsDomainEntry::GetEffectiveTLDName(entries
[i
].strtab_index
);
82 nsDependentCString
name(domain
);
83 nsAutoCString
normalizedName(domain
);
84 NS_ASSERTION(NS_SUCCEEDED(NormalizeHostname(normalizedName
)),
85 "normalization failure!");
86 NS_ASSERTION(name
.Equals(normalizedName
), "domain not normalized!");
88 nsDomainEntry
*entry
= mHash
.PutEntry(domain
);
89 NS_ENSURE_TRUE(entry
, NS_ERROR_OUT_OF_MEMORY
);
90 entry
->SetData(&entries
[i
]);
93 MOZ_ASSERT(!gService
);
95 RegisterWeakMemoryReporter(this);
100 nsEffectiveTLDService::~nsEffectiveTLDService()
102 UnregisterWeakMemoryReporter(this);
106 MOZ_DEFINE_MALLOC_SIZE_OF(EffectiveTLDServiceMallocSizeOf
)
109 nsEffectiveTLDService::CollectReports(nsIHandleReportCallback
* aHandleReport
,
110 nsISupports
* aData
, bool aAnonymize
)
112 return MOZ_COLLECT_REPORT(
113 "explicit/xpcom/effective-TLD-service", KIND_HEAP
, UNITS_BYTES
,
114 SizeOfIncludingThis(EffectiveTLDServiceMallocSizeOf
),
115 "Memory used by the effective TLD service.");
119 nsEffectiveTLDService::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf
)
121 size_t n
= aMallocSizeOf(this);
122 n
+= mHash
.SizeOfExcludingThis(nullptr, aMallocSizeOf
);
124 // Measurement of the following members may be added later if DMD finds it is
131 // External function for dealing with URI's correctly.
132 // Pulls out the host portion from an nsIURI, and calls through to
133 // GetPublicSuffixFromHost().
135 nsEffectiveTLDService::GetPublicSuffix(nsIURI
*aURI
,
136 nsACString
&aPublicSuffix
)
138 NS_ENSURE_ARG_POINTER(aURI
);
140 nsCOMPtr
<nsIURI
> innerURI
= NS_GetInnermostURI(aURI
);
141 NS_ENSURE_ARG_POINTER(innerURI
);
144 nsresult rv
= innerURI
->GetAsciiHost(host
);
145 if (NS_FAILED(rv
)) return rv
;
147 return GetBaseDomainInternal(host
, 0, aPublicSuffix
);
150 // External function for dealing with URI's correctly.
151 // Pulls out the host portion from an nsIURI, and calls through to
152 // GetBaseDomainFromHost().
154 nsEffectiveTLDService::GetBaseDomain(nsIURI
*aURI
,
155 uint32_t aAdditionalParts
,
156 nsACString
&aBaseDomain
)
158 NS_ENSURE_ARG_POINTER(aURI
);
159 NS_ENSURE_TRUE( ((int32_t)aAdditionalParts
) >= 0, NS_ERROR_INVALID_ARG
);
161 nsCOMPtr
<nsIURI
> innerURI
= NS_GetInnermostURI(aURI
);
162 NS_ENSURE_ARG_POINTER(innerURI
);
165 nsresult rv
= innerURI
->GetAsciiHost(host
);
166 if (NS_FAILED(rv
)) return rv
;
168 return GetBaseDomainInternal(host
, aAdditionalParts
+ 1, aBaseDomain
);
171 // External function for dealing with a host string directly: finds the public
172 // suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal().
174 nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString
&aHostname
,
175 nsACString
&aPublicSuffix
)
177 // Create a mutable copy of the hostname and normalize it to ACE.
178 // This will fail if the hostname includes invalid characters.
179 nsAutoCString
normHostname(aHostname
);
180 nsresult rv
= NormalizeHostname(normHostname
);
181 if (NS_FAILED(rv
)) return rv
;
183 return GetBaseDomainInternal(normHostname
, 0, aPublicSuffix
);
186 // External function for dealing with a host string directly: finds the base
187 // domain (e.g. www.co.uk) for the given hostname and number of subdomain parts
188 // requested. See GetBaseDomainInternal().
190 nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString
&aHostname
,
191 uint32_t aAdditionalParts
,
192 nsACString
&aBaseDomain
)
194 NS_ENSURE_TRUE( ((int32_t)aAdditionalParts
) >= 0, NS_ERROR_INVALID_ARG
);
196 // Create a mutable copy of the hostname and normalize it to ACE.
197 // This will fail if the hostname includes invalid characters.
198 nsAutoCString
normHostname(aHostname
);
199 nsresult rv
= NormalizeHostname(normHostname
);
200 if (NS_FAILED(rv
)) return rv
;
202 return GetBaseDomainInternal(normHostname
, aAdditionalParts
+ 1, aBaseDomain
);
206 nsEffectiveTLDService::GetNextSubDomain(const nsACString
& aHostname
,
207 nsACString
& aBaseDomain
)
209 // Create a mutable copy of the hostname and normalize it to ACE.
210 // This will fail if the hostname includes invalid characters.
211 nsAutoCString
normHostname(aHostname
);
212 nsresult rv
= NormalizeHostname(normHostname
);
213 NS_ENSURE_SUCCESS(rv
, rv
);
215 return GetBaseDomainInternal(normHostname
, -1, aBaseDomain
);
218 // Finds the base domain for a host, with requested number of additional parts.
219 // This will fail, generating an error, if the host is an IPv4/IPv6 address,
220 // if more subdomain parts are requested than are available, or if the hostname
221 // includes characters that are not valid in a URL. Normalization is performed
222 // on the host string and the result will be in UTF8.
224 nsEffectiveTLDService::GetBaseDomainInternal(nsCString
&aHostname
,
225 int32_t aAdditionalParts
,
226 nsACString
&aBaseDomain
)
228 if (aHostname
.IsEmpty())
229 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS
;
231 // chomp any trailing dot, and keep track of it for later
232 bool trailingDot
= aHostname
.Last() == '.';
234 aHostname
.Truncate(aHostname
.Length() - 1);
236 // check the edge cases of the host being '.' or having a second trailing '.',
237 // since subsequent checks won't catch it.
238 if (aHostname
.IsEmpty() || aHostname
.Last() == '.')
239 return NS_ERROR_INVALID_ARG
;
241 // Check if we're dealing with an IPv4/IPv6 hostname, and return
243 PRStatus result
= PR_StringToNetAddr(aHostname
.get(), &addr
);
244 if (result
== PR_SUCCESS
)
245 return NS_ERROR_HOST_IS_IP_ADDRESS
;
247 // Walk up the domain tree, most specific to least specific,
248 // looking for matches at each level. Note that a given level may
249 // have multiple attributes (e.g. IsWild() and IsNormal()).
250 const char *prevDomain
= nullptr;
251 const char *currDomain
= aHostname
.get();
252 const char *nextDot
= strchr(currDomain
, '.');
253 const char *end
= currDomain
+ aHostname
.Length();
254 const char *eTLD
= currDomain
;
256 // sanity check the string we're about to look up: it should not begin with
257 // a '.'; this would mean the hostname began with a '.' or had an
258 // embedded '..' sequence.
259 if (*currDomain
== '.')
260 return NS_ERROR_INVALID_ARG
;
262 // perform the hash lookup.
263 nsDomainEntry
*entry
= mHash
.GetEntry(currDomain
);
265 if (entry
->IsWild() && prevDomain
) {
266 // wildcard rules imply an eTLD one level inferior to the match.
270 } else if (entry
->IsNormal() || !nextDot
) {
271 // specific match, or we've hit the top domain level
275 } else if (entry
->IsException()) {
276 // exception rules imply an eTLD one level superior to the match.
283 // we've hit the top domain level; use it by default.
288 prevDomain
= currDomain
;
289 currDomain
= nextDot
+ 1;
290 nextDot
= strchr(currDomain
, '.');
293 const char *begin
, *iter
;
294 if (aAdditionalParts
< 0) {
295 NS_ASSERTION(aAdditionalParts
== -1,
296 "aAdditionalParts can't be negative and different from -1");
298 for (iter
= aHostname
.get(); iter
!= eTLD
&& *iter
!= '.'; iter
++);
304 aAdditionalParts
= 0;
307 // count off the number of requested domains.
308 begin
= aHostname
.get();
315 if (*(--iter
) == '.' && aAdditionalParts
-- == 0) {
323 if (aAdditionalParts
!= 0)
324 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS
;
326 aBaseDomain
= Substring(iter
, end
);
327 // add on the trailing dot, if applicable
329 aBaseDomain
.Append('.');
334 // Normalizes the given hostname, component by component. ASCII/ACE
335 // components are lower-cased, and UTF-8 components are normalized per
336 // RFC 3454 and converted to ACE.
338 nsEffectiveTLDService::NormalizeHostname(nsCString
&aHostname
)
340 if (!IsASCII(aHostname
)) {
341 nsresult rv
= mIDNService
->ConvertUTF8toACE(aHostname
, aHostname
);
346 ToLowerCase(aHostname
);