1 //* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 // This service reads a file of rules describing TLD-like domain names. For a
7 // complete description of the expected file format and parsing rules, see
8 // http://wiki.mozilla.org/Gecko:Effective_TLD_Service
10 #include "mozilla/Util.h"
12 #include "nsEffectiveTLDService.h"
13 #include "nsIIDNService.h"
14 #include "nsIMemoryReporter.h"
15 #include "nsNetUtil.h"
18 using namespace mozilla
;
20 NS_IMPL_ISUPPORTS1(nsEffectiveTLDService
, nsIEffectiveTLDService
)
22 // ----------------------------------------------------------------------
24 #define ETLD_STR_NUM_1(line) str##line
25 #define ETLD_STR_NUM(line) ETLD_STR_NUM_1(line)
26 #define ETLD_ENTRY_OFFSET(name) offsetof(struct etld_string_list, ETLD_STR_NUM(__LINE__))
28 const ETLDEntry
nsDomainEntry::entries
[] = {
29 #define ETLD_ENTRY(name, ex, wild) { ETLD_ENTRY_OFFSET(name), ex, wild },
30 #include "etld_data.inc"
34 const union nsDomainEntry::etld_strings
nsDomainEntry::strings
= {
36 #define ETLD_ENTRY(name, ex, wild) name,
37 #include "etld_data.inc"
42 // Dummy function to statically ensure that our indices don't overflow
43 // the storage provided for them.
45 nsDomainEntry::FuncForStaticAsserts(void)
47 #define ETLD_ENTRY(name, ex, wild) \
48 MOZ_STATIC_ASSERT(ETLD_ENTRY_OFFSET(name) < (1 << ETLD_ENTRY_N_INDEX_BITS), \
49 "invalid strtab index");
50 #include "etld_data.inc"
54 #undef ETLD_ENTRY_OFFSET
58 // ----------------------------------------------------------------------
60 static nsEffectiveTLDService
*gService
= nullptr;
62 NS_MEMORY_REPORTER_MALLOC_SIZEOF_FUN(EffectiveTLDServiceMallocSizeOf
)
67 return gService
->SizeOfIncludingThis(EffectiveTLDServiceMallocSizeOf
);
70 NS_MEMORY_REPORTER_IMPLEMENT(
72 "explicit/xpcom/effective-TLD-service",
74 nsIMemoryReporter::UNITS_BYTES
,
76 "Memory used by the effective TLD service.")
79 nsEffectiveTLDService::Init()
81 const ETLDEntry
*entries
= nsDomainEntry::entries
;
83 // We'll probably have to rehash at least once, since nsTHashtable doesn't
84 // use a perfect hash, but at least we'll save a few rehashes along the way.
85 // Next optimization here is to precompute the hash using something like
86 // gperf, but one step at a time. :-)
87 mHash
.Init(ArrayLength(nsDomainEntry::entries
));
90 mIDNService
= do_GetService(NS_IDNSERVICE_CONTRACTID
, &rv
);
91 if (NS_FAILED(rv
)) return rv
;
93 // Initialize eTLD hash from static array
94 for (uint32_t i
= 0; i
< ArrayLength(nsDomainEntry::entries
); i
++) {
95 const char *domain
= nsDomainEntry::GetEffectiveTLDName(entries
[i
].strtab_index
);
97 nsDependentCString
name(domain
);
98 nsAutoCString
normalizedName(domain
);
99 NS_ASSERTION(NS_SUCCEEDED(NormalizeHostname(normalizedName
)),
100 "normalization failure!");
101 NS_ASSERTION(name
.Equals(normalizedName
), "domain not normalized!");
103 nsDomainEntry
*entry
= mHash
.PutEntry(domain
);
104 NS_ENSURE_TRUE(entry
, NS_ERROR_OUT_OF_MEMORY
);
105 entry
->SetData(&entries
[i
]);
108 MOZ_ASSERT(!gService
);
110 mReporter
= new NS_MEMORY_REPORTER_NAME(EffectiveTLDService
);
111 (void)::NS_RegisterMemoryReporter(mReporter
);
116 nsEffectiveTLDService::~nsEffectiveTLDService()
118 (void)::NS_UnregisterMemoryReporter(mReporter
);
124 nsEffectiveTLDService::SizeOfIncludingThis(nsMallocSizeOfFun aMallocSizeOf
)
126 size_t n
= aMallocSizeOf(this);
127 n
+= mHash
.SizeOfExcludingThis(nullptr, aMallocSizeOf
);
129 // Measurement of the following members may be added later if DMD finds it is
137 // External function for dealing with URI's correctly.
138 // Pulls out the host portion from an nsIURI, and calls through to
139 // GetPublicSuffixFromHost().
141 nsEffectiveTLDService::GetPublicSuffix(nsIURI
*aURI
,
142 nsACString
&aPublicSuffix
)
144 NS_ENSURE_ARG_POINTER(aURI
);
146 nsCOMPtr
<nsIURI
> innerURI
= NS_GetInnermostURI(aURI
);
147 NS_ENSURE_ARG_POINTER(innerURI
);
150 nsresult rv
= innerURI
->GetAsciiHost(host
);
151 if (NS_FAILED(rv
)) return rv
;
153 return GetBaseDomainInternal(host
, 0, aPublicSuffix
);
156 // External function for dealing with URI's correctly.
157 // Pulls out the host portion from an nsIURI, and calls through to
158 // GetBaseDomainFromHost().
160 nsEffectiveTLDService::GetBaseDomain(nsIURI
*aURI
,
161 uint32_t aAdditionalParts
,
162 nsACString
&aBaseDomain
)
164 NS_ENSURE_ARG_POINTER(aURI
);
166 nsCOMPtr
<nsIURI
> innerURI
= NS_GetInnermostURI(aURI
);
167 NS_ENSURE_ARG_POINTER(innerURI
);
170 nsresult rv
= innerURI
->GetAsciiHost(host
);
171 if (NS_FAILED(rv
)) return rv
;
173 return GetBaseDomainInternal(host
, aAdditionalParts
+ 1, aBaseDomain
);
176 // External function for dealing with a host string directly: finds the public
177 // suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal().
179 nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString
&aHostname
,
180 nsACString
&aPublicSuffix
)
182 // Create a mutable copy of the hostname and normalize it to ACE.
183 // This will fail if the hostname includes invalid characters.
184 nsAutoCString
normHostname(aHostname
);
185 nsresult rv
= NormalizeHostname(normHostname
);
186 if (NS_FAILED(rv
)) return rv
;
188 return GetBaseDomainInternal(normHostname
, 0, aPublicSuffix
);
191 // External function for dealing with a host string directly: finds the base
192 // domain (e.g. www.co.uk) for the given hostname and number of subdomain parts
193 // requested. See GetBaseDomainInternal().
195 nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString
&aHostname
,
196 uint32_t aAdditionalParts
,
197 nsACString
&aBaseDomain
)
199 // Create a mutable copy of the hostname and normalize it to ACE.
200 // This will fail if the hostname includes invalid characters.
201 nsAutoCString
normHostname(aHostname
);
202 nsresult rv
= NormalizeHostname(normHostname
);
203 if (NS_FAILED(rv
)) return rv
;
205 return GetBaseDomainInternal(normHostname
, aAdditionalParts
+ 1, aBaseDomain
);
209 nsEffectiveTLDService::GetNextSubDomain(const nsACString
& aHostname
,
210 nsACString
& aBaseDomain
)
212 // Create a mutable copy of the hostname and normalize it to ACE.
213 // This will fail if the hostname includes invalid characters.
214 nsAutoCString
normHostname(aHostname
);
215 nsresult rv
= NormalizeHostname(normHostname
);
216 NS_ENSURE_SUCCESS(rv
, rv
);
218 return GetBaseDomainInternal(normHostname
, -1, aBaseDomain
);
221 // Finds the base domain for a host, with requested number of additional parts.
222 // This will fail, generating an error, if the host is an IPv4/IPv6 address,
223 // if more subdomain parts are requested than are available, or if the hostname
224 // includes characters that are not valid in a URL. Normalization is performed
225 // on the host string and the result will be in UTF8.
227 nsEffectiveTLDService::GetBaseDomainInternal(nsCString
&aHostname
,
228 int32_t aAdditionalParts
,
229 nsACString
&aBaseDomain
)
231 if (aHostname
.IsEmpty())
232 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS
;
234 // chomp any trailing dot, and keep track of it for later
235 bool trailingDot
= aHostname
.Last() == '.';
237 aHostname
.Truncate(aHostname
.Length() - 1);
239 // check the edge cases of the host being '.' or having a second trailing '.',
240 // since subsequent checks won't catch it.
241 if (aHostname
.IsEmpty() || aHostname
.Last() == '.')
242 return NS_ERROR_INVALID_ARG
;
244 // Check if we're dealing with an IPv4/IPv6 hostname, and return
246 PRStatus result
= PR_StringToNetAddr(aHostname
.get(), &addr
);
247 if (result
== PR_SUCCESS
)
248 return NS_ERROR_HOST_IS_IP_ADDRESS
;
250 // Walk up the domain tree, most specific to least specific,
251 // looking for matches at each level. Note that a given level may
252 // have multiple attributes (e.g. IsWild() and IsNormal()).
253 const char *prevDomain
= nullptr;
254 const char *currDomain
= aHostname
.get();
255 const char *nextDot
= strchr(currDomain
, '.');
256 const char *end
= currDomain
+ aHostname
.Length();
257 const char *eTLD
= currDomain
;
259 // sanity check the string we're about to look up: it should not begin with
260 // a '.'; this would mean the hostname began with a '.' or had an
261 // embedded '..' sequence.
262 if (*currDomain
== '.')
263 return NS_ERROR_INVALID_ARG
;
265 // perform the hash lookup.
266 nsDomainEntry
*entry
= mHash
.GetEntry(currDomain
);
268 if (entry
->IsWild() && prevDomain
) {
269 // wildcard rules imply an eTLD one level inferior to the match.
273 } else if (entry
->IsNormal() || !nextDot
) {
274 // specific match, or we've hit the top domain level
278 } else if (entry
->IsException()) {
279 // exception rules imply an eTLD one level superior to the match.
286 // we've hit the top domain level; use it by default.
291 prevDomain
= currDomain
;
292 currDomain
= nextDot
+ 1;
293 nextDot
= strchr(currDomain
, '.');
296 const char *begin
, *iter
;
297 if (aAdditionalParts
< 0) {
298 NS_ASSERTION(aAdditionalParts
== -1,
299 "aAdditionalParts should can't be negative and different from -1");
301 for (iter
= aHostname
.get(); iter
!= eTLD
&& *iter
!= '.'; iter
++);
307 aAdditionalParts
= 0;
310 // count off the number of requested domains.
311 begin
= aHostname
.get();
318 if (*(--iter
) == '.' && aAdditionalParts
-- == 0) {
326 if (aAdditionalParts
!= 0)
327 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS
;
329 aBaseDomain
= Substring(iter
, end
);
330 // add on the trailing dot, if applicable
332 aBaseDomain
.Append('.');
337 // Normalizes the given hostname, component by component. ASCII/ACE
338 // components are lower-cased, and UTF-8 components are normalized per
339 // RFC 3454 and converted to ACE.
341 nsEffectiveTLDService::NormalizeHostname(nsCString
&aHostname
)
343 if (!IsASCII(aHostname
)) {
344 nsresult rv
= mIDNService
->ConvertUTF8toACE(aHostname
, aHostname
);
349 ToLowerCase(aHostname
);