Bumping manifests a=b2g-bump
[gecko.git] / netwerk / dns / nsEffectiveTLDService.cpp
blob4619ed5b0952cf2a1def1c16cca849fb9a337017
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 // This service reads a file of rules describing TLD-like domain names. For a
8 // complete description of the expected file format and parsing rules, see
9 // http://wiki.mozilla.org/Gecko:Effective_TLD_Service
11 #include "mozilla/ArrayUtils.h"
12 #include "mozilla/MemoryReporting.h"
14 #include "nsEffectiveTLDService.h"
15 #include "nsIIDNService.h"
16 #include "nsNetUtil.h"
17 #include "prnetdb.h"
19 using namespace mozilla;
21 NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService,
22 nsIMemoryReporter)
24 // ----------------------------------------------------------------------
26 #define ETLD_STR_NUM_1(line) str##line
27 #define ETLD_STR_NUM(line) ETLD_STR_NUM_1(line)
28 #define ETLD_ENTRY_OFFSET(name) offsetof(struct etld_string_list, ETLD_STR_NUM(__LINE__))
30 const ETLDEntry nsDomainEntry::entries[] = {
31 #define ETLD_ENTRY(name, ex, wild) { ETLD_ENTRY_OFFSET(name), ex, wild },
32 #include "etld_data.inc"
33 #undef ETLD_ENTRY
36 const union nsDomainEntry::etld_strings nsDomainEntry::strings = {
38 #define ETLD_ENTRY(name, ex, wild) name,
39 #include "etld_data.inc"
40 #undef ETLD_ENTRY
44 // Dummy function to statically ensure that our indices don't overflow
45 // the storage provided for them.
46 void
47 nsDomainEntry::FuncForStaticAsserts(void)
49 #define ETLD_ENTRY(name, ex, wild) \
50 static_assert(ETLD_ENTRY_OFFSET(name) < (1 << ETLD_ENTRY_N_INDEX_BITS), \
51 "invalid strtab index");
52 #include "etld_data.inc"
53 #undef ETLD_ENTRY
56 #undef ETLD_ENTRY_OFFSET
57 #undef ETLD_STR_NUM
58 #undef ETLD_STR_NUM1
60 // ----------------------------------------------------------------------
62 static nsEffectiveTLDService *gService = nullptr;
64 nsEffectiveTLDService::nsEffectiveTLDService()
65 : mHash(ArrayLength(nsDomainEntry::entries))
69 nsresult
70 nsEffectiveTLDService::Init()
72 const ETLDEntry *entries = nsDomainEntry::entries;
74 nsresult rv;
75 mIDNService = do_GetService(NS_IDNSERVICE_CONTRACTID, &rv);
76 if (NS_FAILED(rv)) return rv;
78 // Initialize eTLD hash from static array
79 for (uint32_t i = 0; i < ArrayLength(nsDomainEntry::entries); i++) {
80 const char *domain = nsDomainEntry::GetEffectiveTLDName(entries[i].strtab_index);
81 #ifdef DEBUG
82 nsDependentCString name(domain);
83 nsAutoCString normalizedName(domain);
84 NS_ASSERTION(NS_SUCCEEDED(NormalizeHostname(normalizedName)),
85 "normalization failure!");
86 NS_ASSERTION(name.Equals(normalizedName), "domain not normalized!");
87 #endif
88 nsDomainEntry *entry = mHash.PutEntry(domain);
89 NS_ENSURE_TRUE(entry, NS_ERROR_OUT_OF_MEMORY);
90 entry->SetData(&entries[i]);
93 MOZ_ASSERT(!gService);
94 gService = this;
95 RegisterWeakMemoryReporter(this);
97 return NS_OK;
100 nsEffectiveTLDService::~nsEffectiveTLDService()
102 UnregisterWeakMemoryReporter(this);
103 gService = nullptr;
106 MOZ_DEFINE_MALLOC_SIZE_OF(EffectiveTLDServiceMallocSizeOf)
108 NS_IMETHODIMP
109 nsEffectiveTLDService::CollectReports(nsIHandleReportCallback* aHandleReport,
110 nsISupports* aData, bool aAnonymize)
112 return MOZ_COLLECT_REPORT(
113 "explicit/xpcom/effective-TLD-service", KIND_HEAP, UNITS_BYTES,
114 SizeOfIncludingThis(EffectiveTLDServiceMallocSizeOf),
115 "Memory used by the effective TLD service.");
118 size_t
119 nsEffectiveTLDService::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf)
121 size_t n = aMallocSizeOf(this);
122 n += mHash.SizeOfExcludingThis(nullptr, aMallocSizeOf);
124 // Measurement of the following members may be added later if DMD finds it is
125 // worthwhile:
126 // - mIDNService
128 return n;
131 // External function for dealing with URI's correctly.
132 // Pulls out the host portion from an nsIURI, and calls through to
133 // GetPublicSuffixFromHost().
134 NS_IMETHODIMP
135 nsEffectiveTLDService::GetPublicSuffix(nsIURI *aURI,
136 nsACString &aPublicSuffix)
138 NS_ENSURE_ARG_POINTER(aURI);
140 nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI);
141 NS_ENSURE_ARG_POINTER(innerURI);
143 nsAutoCString host;
144 nsresult rv = innerURI->GetAsciiHost(host);
145 if (NS_FAILED(rv)) return rv;
147 return GetBaseDomainInternal(host, 0, aPublicSuffix);
150 // External function for dealing with URI's correctly.
151 // Pulls out the host portion from an nsIURI, and calls through to
152 // GetBaseDomainFromHost().
153 NS_IMETHODIMP
154 nsEffectiveTLDService::GetBaseDomain(nsIURI *aURI,
155 uint32_t aAdditionalParts,
156 nsACString &aBaseDomain)
158 NS_ENSURE_ARG_POINTER(aURI);
159 NS_ENSURE_TRUE( ((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG);
161 nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI);
162 NS_ENSURE_ARG_POINTER(innerURI);
164 nsAutoCString host;
165 nsresult rv = innerURI->GetAsciiHost(host);
166 if (NS_FAILED(rv)) return rv;
168 return GetBaseDomainInternal(host, aAdditionalParts + 1, aBaseDomain);
171 // External function for dealing with a host string directly: finds the public
172 // suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal().
173 NS_IMETHODIMP
174 nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString &aHostname,
175 nsACString &aPublicSuffix)
177 // Create a mutable copy of the hostname and normalize it to ACE.
178 // This will fail if the hostname includes invalid characters.
179 nsAutoCString normHostname(aHostname);
180 nsresult rv = NormalizeHostname(normHostname);
181 if (NS_FAILED(rv)) return rv;
183 return GetBaseDomainInternal(normHostname, 0, aPublicSuffix);
186 // External function for dealing with a host string directly: finds the base
187 // domain (e.g. www.co.uk) for the given hostname and number of subdomain parts
188 // requested. See GetBaseDomainInternal().
189 NS_IMETHODIMP
190 nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString &aHostname,
191 uint32_t aAdditionalParts,
192 nsACString &aBaseDomain)
194 NS_ENSURE_TRUE( ((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG);
196 // Create a mutable copy of the hostname and normalize it to ACE.
197 // This will fail if the hostname includes invalid characters.
198 nsAutoCString normHostname(aHostname);
199 nsresult rv = NormalizeHostname(normHostname);
200 if (NS_FAILED(rv)) return rv;
202 return GetBaseDomainInternal(normHostname, aAdditionalParts + 1, aBaseDomain);
205 NS_IMETHODIMP
206 nsEffectiveTLDService::GetNextSubDomain(const nsACString& aHostname,
207 nsACString& aBaseDomain)
209 // Create a mutable copy of the hostname and normalize it to ACE.
210 // This will fail if the hostname includes invalid characters.
211 nsAutoCString normHostname(aHostname);
212 nsresult rv = NormalizeHostname(normHostname);
213 NS_ENSURE_SUCCESS(rv, rv);
215 return GetBaseDomainInternal(normHostname, -1, aBaseDomain);
218 // Finds the base domain for a host, with requested number of additional parts.
219 // This will fail, generating an error, if the host is an IPv4/IPv6 address,
220 // if more subdomain parts are requested than are available, or if the hostname
221 // includes characters that are not valid in a URL. Normalization is performed
222 // on the host string and the result will be in UTF8.
223 nsresult
224 nsEffectiveTLDService::GetBaseDomainInternal(nsCString &aHostname,
225 int32_t aAdditionalParts,
226 nsACString &aBaseDomain)
228 if (aHostname.IsEmpty())
229 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
231 // chomp any trailing dot, and keep track of it for later
232 bool trailingDot = aHostname.Last() == '.';
233 if (trailingDot)
234 aHostname.Truncate(aHostname.Length() - 1);
236 // check the edge cases of the host being '.' or having a second trailing '.',
237 // since subsequent checks won't catch it.
238 if (aHostname.IsEmpty() || aHostname.Last() == '.')
239 return NS_ERROR_INVALID_ARG;
241 // Check if we're dealing with an IPv4/IPv6 hostname, and return
242 PRNetAddr addr;
243 PRStatus result = PR_StringToNetAddr(aHostname.get(), &addr);
244 if (result == PR_SUCCESS)
245 return NS_ERROR_HOST_IS_IP_ADDRESS;
247 // Walk up the domain tree, most specific to least specific,
248 // looking for matches at each level. Note that a given level may
249 // have multiple attributes (e.g. IsWild() and IsNormal()).
250 const char *prevDomain = nullptr;
251 const char *currDomain = aHostname.get();
252 const char *nextDot = strchr(currDomain, '.');
253 const char *end = currDomain + aHostname.Length();
254 const char *eTLD = currDomain;
255 while (1) {
256 // sanity check the string we're about to look up: it should not begin with
257 // a '.'; this would mean the hostname began with a '.' or had an
258 // embedded '..' sequence.
259 if (*currDomain == '.')
260 return NS_ERROR_INVALID_ARG;
262 // perform the hash lookup.
263 nsDomainEntry *entry = mHash.GetEntry(currDomain);
264 if (entry) {
265 if (entry->IsWild() && prevDomain) {
266 // wildcard rules imply an eTLD one level inferior to the match.
267 eTLD = prevDomain;
268 break;
270 } else if (entry->IsNormal() || !nextDot) {
271 // specific match, or we've hit the top domain level
272 eTLD = currDomain;
273 break;
275 } else if (entry->IsException()) {
276 // exception rules imply an eTLD one level superior to the match.
277 eTLD = nextDot + 1;
278 break;
282 if (!nextDot) {
283 // we've hit the top domain level; use it by default.
284 eTLD = currDomain;
285 break;
288 prevDomain = currDomain;
289 currDomain = nextDot + 1;
290 nextDot = strchr(currDomain, '.');
293 const char *begin, *iter;
294 if (aAdditionalParts < 0) {
295 NS_ASSERTION(aAdditionalParts == -1,
296 "aAdditionalParts can't be negative and different from -1");
298 for (iter = aHostname.get(); iter != eTLD && *iter != '.'; iter++);
300 if (iter != eTLD) {
301 iter++;
303 if (iter != eTLD) {
304 aAdditionalParts = 0;
306 } else {
307 // count off the number of requested domains.
308 begin = aHostname.get();
309 iter = eTLD;
311 while (1) {
312 if (iter == begin)
313 break;
315 if (*(--iter) == '.' && aAdditionalParts-- == 0) {
316 ++iter;
317 ++aAdditionalParts;
318 break;
323 if (aAdditionalParts != 0)
324 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
326 aBaseDomain = Substring(iter, end);
327 // add on the trailing dot, if applicable
328 if (trailingDot)
329 aBaseDomain.Append('.');
331 return NS_OK;
334 // Normalizes the given hostname, component by component. ASCII/ACE
335 // components are lower-cased, and UTF-8 components are normalized per
336 // RFC 3454 and converted to ACE.
337 nsresult
338 nsEffectiveTLDService::NormalizeHostname(nsCString &aHostname)
340 if (!IsASCII(aHostname)) {
341 nsresult rv = mIDNService->ConvertUTF8toACE(aHostname, aHostname);
342 if (NS_FAILED(rv))
343 return rv;
346 ToLowerCase(aHostname);
347 return NS_OK;