Bug 850713 - Bump the required NDK version to 9. r=blassey.bugs,mh+mozilla
[gecko.git] / netwerk / dns / nsEffectiveTLDService.cpp
blob328283e20c4e52b3c254419275c69a20c5dd6f01
1 //* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 // This service reads a file of rules describing TLD-like domain names. For a
7 // complete description of the expected file format and parsing rules, see
8 // http://wiki.mozilla.org/Gecko:Effective_TLD_Service
10 #include "mozilla/Util.h"
12 #include "nsEffectiveTLDService.h"
13 #include "nsIIDNService.h"
14 #include "nsIMemoryReporter.h"
15 #include "nsNetUtil.h"
16 #include "prnetdb.h"
18 using namespace mozilla;
20 NS_IMPL_ISUPPORTS1(nsEffectiveTLDService, nsIEffectiveTLDService)
22 // ----------------------------------------------------------------------
24 #define ETLD_STR_NUM_1(line) str##line
25 #define ETLD_STR_NUM(line) ETLD_STR_NUM_1(line)
26 #define ETLD_ENTRY_OFFSET(name) offsetof(struct etld_string_list, ETLD_STR_NUM(__LINE__))
28 const ETLDEntry nsDomainEntry::entries[] = {
29 #define ETLD_ENTRY(name, ex, wild) { ETLD_ENTRY_OFFSET(name), ex, wild },
30 #include "etld_data.inc"
31 #undef ETLD_ENTRY
34 const union nsDomainEntry::etld_strings nsDomainEntry::strings = {
36 #define ETLD_ENTRY(name, ex, wild) name,
37 #include "etld_data.inc"
38 #undef ETLD_ENTRY
42 // Dummy function to statically ensure that our indices don't overflow
43 // the storage provided for them.
44 void
45 nsDomainEntry::FuncForStaticAsserts(void)
47 #define ETLD_ENTRY(name, ex, wild) \
48 MOZ_STATIC_ASSERT(ETLD_ENTRY_OFFSET(name) < (1 << ETLD_ENTRY_N_INDEX_BITS), \
49 "invalid strtab index");
50 #include "etld_data.inc"
51 #undef ETLD_ENTRY
54 #undef ETLD_ENTRY_OFFSET
55 #undef ETLD_STR_NUM
56 #undef ETLD_STR_NUM1
58 // ----------------------------------------------------------------------
60 static nsEffectiveTLDService *gService = nullptr;
62 NS_MEMORY_REPORTER_MALLOC_SIZEOF_FUN(EffectiveTLDServiceMallocSizeOf)
64 static int64_t
65 GetEffectiveTLDSize()
67 return gService->SizeOfIncludingThis(EffectiveTLDServiceMallocSizeOf);
70 NS_MEMORY_REPORTER_IMPLEMENT(
71 EffectiveTLDService,
72 "explicit/xpcom/effective-TLD-service",
73 KIND_HEAP,
74 nsIMemoryReporter::UNITS_BYTES,
75 GetEffectiveTLDSize,
76 "Memory used by the effective TLD service.")
78 nsresult
79 nsEffectiveTLDService::Init()
81 const ETLDEntry *entries = nsDomainEntry::entries;
83 // We'll probably have to rehash at least once, since nsTHashtable doesn't
84 // use a perfect hash, but at least we'll save a few rehashes along the way.
85 // Next optimization here is to precompute the hash using something like
86 // gperf, but one step at a time. :-)
87 mHash.Init(ArrayLength(nsDomainEntry::entries));
89 nsresult rv;
90 mIDNService = do_GetService(NS_IDNSERVICE_CONTRACTID, &rv);
91 if (NS_FAILED(rv)) return rv;
93 // Initialize eTLD hash from static array
94 for (uint32_t i = 0; i < ArrayLength(nsDomainEntry::entries); i++) {
95 const char *domain = nsDomainEntry::GetEffectiveTLDName(entries[i].strtab_index);
96 #ifdef DEBUG
97 nsDependentCString name(domain);
98 nsAutoCString normalizedName(domain);
99 NS_ASSERTION(NS_SUCCEEDED(NormalizeHostname(normalizedName)),
100 "normalization failure!");
101 NS_ASSERTION(name.Equals(normalizedName), "domain not normalized!");
102 #endif
103 nsDomainEntry *entry = mHash.PutEntry(domain);
104 NS_ENSURE_TRUE(entry, NS_ERROR_OUT_OF_MEMORY);
105 entry->SetData(&entries[i]);
108 MOZ_ASSERT(!gService);
109 gService = this;
110 mReporter = new NS_MEMORY_REPORTER_NAME(EffectiveTLDService);
111 (void)::NS_RegisterMemoryReporter(mReporter);
113 return NS_OK;
116 nsEffectiveTLDService::~nsEffectiveTLDService()
118 (void)::NS_UnregisterMemoryReporter(mReporter);
119 mReporter = nullptr;
120 gService = nullptr;
123 size_t
124 nsEffectiveTLDService::SizeOfIncludingThis(nsMallocSizeOfFun aMallocSizeOf)
126 size_t n = aMallocSizeOf(this);
127 n += mHash.SizeOfExcludingThis(nullptr, aMallocSizeOf);
129 // Measurement of the following members may be added later if DMD finds it is
130 // worthwhile:
131 // - mReporter
132 // - mIDNService
134 return n;
137 // External function for dealing with URI's correctly.
138 // Pulls out the host portion from an nsIURI, and calls through to
139 // GetPublicSuffixFromHost().
140 NS_IMETHODIMP
141 nsEffectiveTLDService::GetPublicSuffix(nsIURI *aURI,
142 nsACString &aPublicSuffix)
144 NS_ENSURE_ARG_POINTER(aURI);
146 nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI);
147 NS_ENSURE_ARG_POINTER(innerURI);
149 nsAutoCString host;
150 nsresult rv = innerURI->GetAsciiHost(host);
151 if (NS_FAILED(rv)) return rv;
153 return GetBaseDomainInternal(host, 0, aPublicSuffix);
156 // External function for dealing with URI's correctly.
157 // Pulls out the host portion from an nsIURI, and calls through to
158 // GetBaseDomainFromHost().
159 NS_IMETHODIMP
160 nsEffectiveTLDService::GetBaseDomain(nsIURI *aURI,
161 uint32_t aAdditionalParts,
162 nsACString &aBaseDomain)
164 NS_ENSURE_ARG_POINTER(aURI);
166 nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI);
167 NS_ENSURE_ARG_POINTER(innerURI);
169 nsAutoCString host;
170 nsresult rv = innerURI->GetAsciiHost(host);
171 if (NS_FAILED(rv)) return rv;
173 return GetBaseDomainInternal(host, aAdditionalParts + 1, aBaseDomain);
176 // External function for dealing with a host string directly: finds the public
177 // suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal().
178 NS_IMETHODIMP
179 nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString &aHostname,
180 nsACString &aPublicSuffix)
182 // Create a mutable copy of the hostname and normalize it to ACE.
183 // This will fail if the hostname includes invalid characters.
184 nsAutoCString normHostname(aHostname);
185 nsresult rv = NormalizeHostname(normHostname);
186 if (NS_FAILED(rv)) return rv;
188 return GetBaseDomainInternal(normHostname, 0, aPublicSuffix);
191 // External function for dealing with a host string directly: finds the base
192 // domain (e.g. www.co.uk) for the given hostname and number of subdomain parts
193 // requested. See GetBaseDomainInternal().
194 NS_IMETHODIMP
195 nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString &aHostname,
196 uint32_t aAdditionalParts,
197 nsACString &aBaseDomain)
199 // Create a mutable copy of the hostname and normalize it to ACE.
200 // This will fail if the hostname includes invalid characters.
201 nsAutoCString normHostname(aHostname);
202 nsresult rv = NormalizeHostname(normHostname);
203 if (NS_FAILED(rv)) return rv;
205 return GetBaseDomainInternal(normHostname, aAdditionalParts + 1, aBaseDomain);
208 NS_IMETHODIMP
209 nsEffectiveTLDService::GetNextSubDomain(const nsACString& aHostname,
210 nsACString& aBaseDomain)
212 // Create a mutable copy of the hostname and normalize it to ACE.
213 // This will fail if the hostname includes invalid characters.
214 nsAutoCString normHostname(aHostname);
215 nsresult rv = NormalizeHostname(normHostname);
216 NS_ENSURE_SUCCESS(rv, rv);
218 return GetBaseDomainInternal(normHostname, -1, aBaseDomain);
221 // Finds the base domain for a host, with requested number of additional parts.
222 // This will fail, generating an error, if the host is an IPv4/IPv6 address,
223 // if more subdomain parts are requested than are available, or if the hostname
224 // includes characters that are not valid in a URL. Normalization is performed
225 // on the host string and the result will be in UTF8.
226 nsresult
227 nsEffectiveTLDService::GetBaseDomainInternal(nsCString &aHostname,
228 int32_t aAdditionalParts,
229 nsACString &aBaseDomain)
231 if (aHostname.IsEmpty())
232 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
234 // chomp any trailing dot, and keep track of it for later
235 bool trailingDot = aHostname.Last() == '.';
236 if (trailingDot)
237 aHostname.Truncate(aHostname.Length() - 1);
239 // check the edge cases of the host being '.' or having a second trailing '.',
240 // since subsequent checks won't catch it.
241 if (aHostname.IsEmpty() || aHostname.Last() == '.')
242 return NS_ERROR_INVALID_ARG;
244 // Check if we're dealing with an IPv4/IPv6 hostname, and return
245 PRNetAddr addr;
246 PRStatus result = PR_StringToNetAddr(aHostname.get(), &addr);
247 if (result == PR_SUCCESS)
248 return NS_ERROR_HOST_IS_IP_ADDRESS;
250 // Walk up the domain tree, most specific to least specific,
251 // looking for matches at each level. Note that a given level may
252 // have multiple attributes (e.g. IsWild() and IsNormal()).
253 const char *prevDomain = nullptr;
254 const char *currDomain = aHostname.get();
255 const char *nextDot = strchr(currDomain, '.');
256 const char *end = currDomain + aHostname.Length();
257 const char *eTLD = currDomain;
258 while (1) {
259 // sanity check the string we're about to look up: it should not begin with
260 // a '.'; this would mean the hostname began with a '.' or had an
261 // embedded '..' sequence.
262 if (*currDomain == '.')
263 return NS_ERROR_INVALID_ARG;
265 // perform the hash lookup.
266 nsDomainEntry *entry = mHash.GetEntry(currDomain);
267 if (entry) {
268 if (entry->IsWild() && prevDomain) {
269 // wildcard rules imply an eTLD one level inferior to the match.
270 eTLD = prevDomain;
271 break;
273 } else if (entry->IsNormal() || !nextDot) {
274 // specific match, or we've hit the top domain level
275 eTLD = currDomain;
276 break;
278 } else if (entry->IsException()) {
279 // exception rules imply an eTLD one level superior to the match.
280 eTLD = nextDot + 1;
281 break;
285 if (!nextDot) {
286 // we've hit the top domain level; use it by default.
287 eTLD = currDomain;
288 break;
291 prevDomain = currDomain;
292 currDomain = nextDot + 1;
293 nextDot = strchr(currDomain, '.');
296 const char *begin, *iter;
297 if (aAdditionalParts < 0) {
298 NS_ASSERTION(aAdditionalParts == -1,
299 "aAdditionalParts should can't be negative and different from -1");
301 for (iter = aHostname.get(); iter != eTLD && *iter != '.'; iter++);
303 if (iter != eTLD) {
304 iter++;
306 if (iter != eTLD) {
307 aAdditionalParts = 0;
309 } else {
310 // count off the number of requested domains.
311 begin = aHostname.get();
312 iter = eTLD;
314 while (1) {
315 if (iter == begin)
316 break;
318 if (*(--iter) == '.' && aAdditionalParts-- == 0) {
319 ++iter;
320 ++aAdditionalParts;
321 break;
326 if (aAdditionalParts != 0)
327 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
329 aBaseDomain = Substring(iter, end);
330 // add on the trailing dot, if applicable
331 if (trailingDot)
332 aBaseDomain.Append('.');
334 return NS_OK;
337 // Normalizes the given hostname, component by component. ASCII/ACE
338 // components are lower-cased, and UTF-8 components are normalized per
339 // RFC 3454 and converted to ACE.
340 nsresult
341 nsEffectiveTLDService::NormalizeHostname(nsCString &aHostname)
343 if (!IsASCII(aHostname)) {
344 nsresult rv = mIDNService->ConvertUTF8toACE(aHostname, aHostname);
345 if (NS_FAILED(rv))
346 return rv;
349 ToLowerCase(aHostname);
350 return NS_OK;