1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "net/base/sdch_manager.h"
7 #include "base/base64.h"
8 #include "base/logging.h"
9 #include "base/metrics/histogram.h"
10 #include "base/string_number_conversions.h"
11 #include "base/string_util.h"
12 #include "crypto/sha2.h"
13 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
14 #include "net/url_request/url_request_http_job.h"
18 //------------------------------------------------------------------------------
20 const size_t SdchManager::kMaxDictionarySize
= 1000000;
23 const size_t SdchManager::kMaxDictionaryCount
= 20;
26 SdchManager
* SdchManager::global_
= NULL
;
29 bool SdchManager::g_sdch_enabled_
= true;
31 //------------------------------------------------------------------------------
32 SdchManager::Dictionary::Dictionary(const std::string
& dictionary_text
,
34 const std::string
& client_hash
,
36 const std::string
& domain
,
37 const std::string
& path
,
38 const base::Time
& expiration
,
39 const std::set
<int>& ports
)
40 : text_(dictionary_text
, offset
),
41 client_hash_(client_hash
),
45 expiration_(expiration
),
49 SdchManager::Dictionary::~Dictionary() {
52 bool SdchManager::Dictionary::CanAdvertise(const GURL
& target_url
) {
53 if (!SdchManager::Global()->IsInSupportedDomain(target_url
))
55 /* The specific rules of when a dictionary should be advertised in an
56 Avail-Dictionary header are modeled after the rules for cookie scoping. The
57 terms "domain-match" and "pathmatch" are defined in RFC 2965 [6]. A
58 dictionary may be advertised in the Avail-Dictionaries header exactly when
59 all of the following are true:
60 1. The server's effective host name domain-matches the Domain attribute of
62 2. If the dictionary has a Port attribute, the request port is one of the
63 ports listed in the Port attribute.
64 3. The request URI path-matches the path header of the dictionary.
65 4. The request is not an HTTPS request.
67 if (!DomainMatch(target_url
, domain_
))
69 if (!ports_
.empty() && 0 == ports_
.count(target_url
.EffectiveIntPort()))
71 if (path_
.size() && !PathMatch(target_url
.path(), path_
))
73 if (target_url
.SchemeIsSecure())
75 if (base::Time::Now() > expiration_
)
80 //------------------------------------------------------------------------------
81 // Security functions restricting loads and use of dictionaries.
84 bool SdchManager::Dictionary::CanSet(const std::string
& domain
,
85 const std::string
& path
,
86 const std::set
<int>& ports
,
87 const GURL
& dictionary_url
) {
88 if (!SdchManager::Global()->IsInSupportedDomain(dictionary_url
))
91 A dictionary is invalid and must not be stored if any of the following are
93 1. The dictionary has no Domain attribute.
94 2. The effective host name that derives from the referer URL host name does
95 not domain-match the Domain attribute.
96 3. The Domain attribute is a top level domain.
97 4. The referer URL host is a host domain name (not IP address) and has the
98 form HD, where D is the value of the Domain attribute, and H is a string
99 that contains one or more dots.
100 5. If the dictionary has a Port attribute and the referer URL's port was not
104 // TODO(jar): Redirects in dictionary fetches might plausibly be problematic,
105 // and hence the conservative approach is to not allow any redirects (if there
106 // were any... then don't allow the dictionary to be set).
108 if (domain
.empty()) {
109 SdchErrorRecovery(DICTIONARY_MISSING_DOMAIN_SPECIFIER
);
110 return false; // Domain is required.
112 if (RegistryControlledDomainService::GetDomainAndRegistry(domain
).empty()) {
113 SdchErrorRecovery(DICTIONARY_SPECIFIES_TOP_LEVEL_DOMAIN
);
114 return false; // domain was a TLD.
116 if (!Dictionary::DomainMatch(dictionary_url
, domain
)) {
117 SdchErrorRecovery(DICTIONARY_DOMAIN_NOT_MATCHING_SOURCE_URL
);
121 std::string referrer_url_host
= dictionary_url
.host();
122 size_t postfix_domain_index
= referrer_url_host
.rfind(domain
);
123 // See if it is indeed a postfix, or just an internal string.
124 if (referrer_url_host
.size() == postfix_domain_index
+ domain
.size()) {
125 // It is a postfix... so check to see if there's a dot in the prefix.
126 size_t end_of_host_index
= referrer_url_host
.find_first_of('.');
127 if (referrer_url_host
.npos
!= end_of_host_index
&&
128 end_of_host_index
< postfix_domain_index
) {
129 SdchErrorRecovery(DICTIONARY_REFERER_URL_HAS_DOT_IN_PREFIX
);
135 && 0 == ports
.count(dictionary_url
.EffectiveIntPort())) {
136 SdchErrorRecovery(DICTIONARY_PORT_NOT_MATCHING_SOURCE_URL
);
143 bool SdchManager::Dictionary::CanUse(const GURL
& referring_url
) {
144 if (!SdchManager::Global()->IsInSupportedDomain(referring_url
))
147 1. The request URL's host name domain-matches the Domain attribute of the
149 2. If the dictionary has a Port attribute, the request port is one of the
150 ports listed in the Port attribute.
151 3. The request URL path-matches the path attribute of the dictionary.
152 4. The request is not an HTTPS request.
154 if (!DomainMatch(referring_url
, domain_
)) {
155 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_DOMAIN
);
159 && 0 == ports_
.count(referring_url
.EffectiveIntPort())) {
160 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PORT_LIST
);
163 if (path_
.size() && !PathMatch(referring_url
.path(), path_
)) {
164 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_PATH
);
167 if (referring_url
.SchemeIsSecure()) {
168 SdchErrorRecovery(DICTIONARY_FOUND_HAS_WRONG_SCHEME
);
172 // TODO(jar): Remove overly restrictive failsafe test (added per security
173 // review) when we have a need to be more general.
174 if (!referring_url
.SchemeIs("http")) {
175 SdchErrorRecovery(ATTEMPT_TO_DECODE_NON_HTTP_DATA
);
182 bool SdchManager::Dictionary::PathMatch(const std::string
& path
,
183 const std::string
& restriction
) {
186 2. P2 is a prefix of P1 and either the final character in P2 is "/" or the
187 character following P2 in P1 is "/".
189 if (path
== restriction
)
191 size_t prefix_length
= restriction
.size();
192 if (prefix_length
> path
.size())
193 return false; // Can't be a prefix.
194 if (0 != path
.compare(0, prefix_length
, restriction
))
196 return restriction
[prefix_length
- 1] == '/' || path
[prefix_length
] == '/';
200 bool SdchManager::Dictionary::DomainMatch(const GURL
& gurl
,
201 const std::string
& restriction
) {
202 // TODO(jar): This is not precisely a domain match definition.
203 return gurl
.DomainIs(restriction
.data(), restriction
.size());
206 //------------------------------------------------------------------------------
207 SdchManager::SdchManager() {
209 DCHECK(CalledOnValidThread());
213 SdchManager::~SdchManager() {
214 DCHECK_EQ(this, global_
);
215 DCHECK(CalledOnValidThread());
216 while (!dictionaries_
.empty()) {
217 DictionaryMap::iterator it
= dictionaries_
.begin();
218 it
->second
->Release();
219 dictionaries_
.erase(it
->first
);
225 void SdchManager::Shutdown() {
226 EnableSdchSupport(false);
229 global_
->set_sdch_fetcher(NULL
);
233 SdchManager
* SdchManager::Global() {
238 void SdchManager::SdchErrorRecovery(ProblemCodes problem
) {
239 UMA_HISTOGRAM_ENUMERATION("Sdch3.ProblemCodes_4", problem
, MAX_PROBLEM_CODE
);
242 void SdchManager::set_sdch_fetcher(SdchFetcher
* fetcher
) {
243 DCHECK(CalledOnValidThread());
244 fetcher_
.reset(fetcher
);
248 void SdchManager::EnableSdchSupport(bool enabled
) {
249 g_sdch_enabled_
= enabled
;
253 void SdchManager::BlacklistDomain(const GURL
& url
) {
256 global_
->SetAllowLatencyExperiment(url
, false);
258 std::string
domain(StringToLowerASCII(url
.host()));
259 int count
= global_
->blacklisted_domains_
[domain
];
261 return; // Domain is already blacklisted.
263 count
= 1 + 2 * global_
->exponential_blacklist_count
[domain
];
265 global_
->exponential_blacklist_count
[domain
] = count
;
269 global_
->blacklisted_domains_
[domain
] = count
;
273 void SdchManager::BlacklistDomainForever(const GURL
& url
) {
276 global_
->SetAllowLatencyExperiment(url
, false);
278 std::string
domain(StringToLowerASCII(url
.host()));
279 global_
->exponential_blacklist_count
[domain
] = INT_MAX
;
280 global_
->blacklisted_domains_
[domain
] = INT_MAX
;
284 void SdchManager::ClearBlacklistings() {
285 Global()->blacklisted_domains_
.clear();
286 Global()->exponential_blacklist_count
.clear();
290 void SdchManager::ClearDomainBlacklisting(const std::string
& domain
) {
291 Global()->blacklisted_domains_
.erase(StringToLowerASCII(domain
));
295 int SdchManager::BlackListDomainCount(const std::string
& domain
) {
296 if (Global()->blacklisted_domains_
.end() ==
297 Global()->blacklisted_domains_
.find(domain
))
299 return Global()->blacklisted_domains_
[StringToLowerASCII(domain
)];
303 int SdchManager::BlacklistDomainExponential(const std::string
& domain
) {
304 if (Global()->exponential_blacklist_count
.end() ==
305 Global()->exponential_blacklist_count
.find(domain
))
307 return Global()->exponential_blacklist_count
[StringToLowerASCII(domain
)];
310 bool SdchManager::IsInSupportedDomain(const GURL
& url
) {
311 DCHECK(CalledOnValidThread());
312 if (!g_sdch_enabled_
)
315 if (blacklisted_domains_
.empty())
318 std::string
domain(StringToLowerASCII(url
.host()));
319 DomainCounter::iterator it
= blacklisted_domains_
.find(domain
);
320 if (blacklisted_domains_
.end() == it
)
323 int count
= it
->second
- 1;
325 blacklisted_domains_
[domain
] = count
;
327 blacklisted_domains_
.erase(domain
);
328 SdchErrorRecovery(DOMAIN_BLACKLIST_INCLUDES_TARGET
);
332 void SdchManager::FetchDictionary(const GURL
& request_url
,
333 const GURL
& dictionary_url
) {
334 DCHECK(CalledOnValidThread());
335 if (SdchManager::Global()->CanFetchDictionary(request_url
, dictionary_url
) &&
337 fetcher_
->Schedule(dictionary_url
);
340 bool SdchManager::CanFetchDictionary(const GURL
& referring_url
,
341 const GURL
& dictionary_url
) const {
342 DCHECK(CalledOnValidThread());
343 /* The user agent may retrieve a dictionary from the dictionary URL if all of
344 the following are true:
345 1 The dictionary URL host name matches the referrer URL host name
346 2 The dictionary URL host name domain matches the parent domain of the
347 referrer URL host name
348 3 The parent domain of the referrer URL host name is not a top level
350 4 The dictionary URL is not an HTTPS URL.
352 // Item (1) above implies item (2). Spec should be updated.
353 // I take "host name match" to be "is identical to"
354 if (referring_url
.host() != dictionary_url
.host()) {
355 SdchErrorRecovery(DICTIONARY_LOAD_ATTEMPT_FROM_DIFFERENT_HOST
);
358 if (referring_url
.SchemeIs("https")) {
359 SdchErrorRecovery(DICTIONARY_SELECTED_FOR_SSL
);
363 // TODO(jar): Remove this failsafe conservative hack which is more restrictive
364 // than current SDCH spec when needed, and justified by security audit.
365 if (!referring_url
.SchemeIs("http")) {
366 SdchErrorRecovery(DICTIONARY_SELECTED_FROM_NON_HTTP
);
373 bool SdchManager::AddSdchDictionary(const std::string
& dictionary_text
,
374 const GURL
& dictionary_url
) {
375 DCHECK(CalledOnValidThread());
376 std::string client_hash
;
377 std::string server_hash
;
378 GenerateHash(dictionary_text
, &client_hash
, &server_hash
);
379 if (dictionaries_
.find(server_hash
) != dictionaries_
.end()) {
380 SdchErrorRecovery(DICTIONARY_ALREADY_LOADED
);
381 return false; // Already loaded.
384 std::string domain
, path
;
386 base::Time
expiration(base::Time::Now() + base::TimeDelta::FromDays(30));
388 if (dictionary_text
.empty()) {
389 SdchErrorRecovery(DICTIONARY_HAS_NO_TEXT
);
390 return false; // Missing header.
393 size_t header_end
= dictionary_text
.find("\n\n");
394 if (std::string::npos
== header_end
) {
395 SdchErrorRecovery(DICTIONARY_HAS_NO_HEADER
);
396 return false; // Missing header.
398 size_t line_start
= 0; // Start of line being parsed.
400 size_t line_end
= dictionary_text
.find('\n', line_start
);
401 DCHECK(std::string::npos
!= line_end
);
402 DCHECK_LE(line_end
, header_end
);
404 size_t colon_index
= dictionary_text
.find(':', line_start
);
405 if (std::string::npos
== colon_index
) {
406 SdchErrorRecovery(DICTIONARY_HEADER_LINE_MISSING_COLON
);
407 return false; // Illegal line missing a colon.
410 if (colon_index
> line_end
)
413 size_t value_start
= dictionary_text
.find_first_not_of(" \t",
415 if (std::string::npos
!= value_start
) {
416 if (value_start
>= line_end
)
418 std::string
name(dictionary_text
, line_start
, colon_index
- line_start
);
419 std::string
value(dictionary_text
, value_start
, line_end
- value_start
);
420 name
= StringToLowerASCII(name
);
421 if (name
== "domain") {
423 } else if (name
== "path") {
425 } else if (name
== "format-version") {
428 } else if (name
== "max-age") {
430 base::StringToInt64(value
, &seconds
);
431 expiration
= base::Time::Now() + base::TimeDelta::FromSeconds(seconds
);
432 } else if (name
== "port") {
434 base::StringToInt(value
, &port
);
440 if (line_end
>= header_end
)
442 line_start
= line_end
+ 1;
445 if (!Dictionary::CanSet(domain
, path
, ports
, dictionary_url
))
448 // TODO(jar): Remove these hacks to preclude a DOS attack involving piles of
449 // useless dictionaries. We should probably have a cache eviction plan,
450 // instead of just blocking additions. For now, with the spec in flux, it
451 // is probably not worth doing eviction handling.
452 if (kMaxDictionarySize
< dictionary_text
.size()) {
453 SdchErrorRecovery(DICTIONARY_IS_TOO_LARGE
);
456 if (kMaxDictionaryCount
<= dictionaries_
.size()) {
457 SdchErrorRecovery(DICTIONARY_COUNT_EXCEEDED
);
461 UMA_HISTOGRAM_COUNTS("Sdch3.Dictionary size loaded", dictionary_text
.size());
462 DVLOG(1) << "Loaded dictionary with client hash " << client_hash
463 << " and server hash " << server_hash
;
464 Dictionary
* dictionary
=
465 new Dictionary(dictionary_text
, header_end
+ 2, client_hash
,
466 dictionary_url
, domain
, path
, expiration
, ports
);
467 dictionary
->AddRef();
468 dictionaries_
[server_hash
] = dictionary
;
472 void SdchManager::GetVcdiffDictionary(const std::string
& server_hash
,
473 const GURL
& referring_url
, Dictionary
** dictionary
) {
474 DCHECK(CalledOnValidThread());
476 DictionaryMap::iterator it
= dictionaries_
.find(server_hash
);
477 if (it
== dictionaries_
.end()) {
480 Dictionary
* matching_dictionary
= it
->second
;
481 if (!matching_dictionary
->CanUse(referring_url
))
483 *dictionary
= matching_dictionary
;
486 // TODO(jar): If we have evictions from the dictionaries_, then we need to
487 // change this interface to return a list of reference counted Dictionary
488 // instances that can be used if/when a server specifies one.
489 void SdchManager::GetAvailDictionaryList(const GURL
& target_url
,
491 DCHECK(CalledOnValidThread());
493 for (DictionaryMap::iterator it
= dictionaries_
.begin();
494 it
!= dictionaries_
.end(); ++it
) {
495 if (!it
->second
->CanAdvertise(target_url
))
500 list
->append(it
->second
->client_hash());
502 // Watch to see if we have corrupt or numerous dictionaries.
504 UMA_HISTOGRAM_COUNTS("Sdch3.Advertisement_Count", count
);
508 void SdchManager::GenerateHash(const std::string
& dictionary_text
,
509 std::string
* client_hash
, std::string
* server_hash
) {
510 char binary_hash
[32];
511 crypto::SHA256HashString(dictionary_text
, binary_hash
, sizeof(binary_hash
));
513 std::string
first_48_bits(&binary_hash
[0], 6);
514 std::string
second_48_bits(&binary_hash
[6], 6);
515 UrlSafeBase64Encode(first_48_bits
, client_hash
);
516 UrlSafeBase64Encode(second_48_bits
, server_hash
);
518 DCHECK_EQ(server_hash
->length(), 8u);
519 DCHECK_EQ(client_hash
->length(), 8u);
522 //------------------------------------------------------------------------------
523 // Methods for supporting latency experiments.
525 bool SdchManager::AllowLatencyExperiment(const GURL
& url
) const {
526 DCHECK(CalledOnValidThread());
527 return allow_latency_experiment_
.end() !=
528 allow_latency_experiment_
.find(url
.host());
531 void SdchManager::SetAllowLatencyExperiment(const GURL
& url
, bool enable
) {
532 DCHECK(CalledOnValidThread());
534 allow_latency_experiment_
.insert(url
.host());
537 ExperimentSet::iterator it
= allow_latency_experiment_
.find(url
.host());
538 if (allow_latency_experiment_
.end() == it
)
539 return; // It was already erased, or never allowed.
540 SdchErrorRecovery(LATENCY_TEST_DISALLOWED
);
541 allow_latency_experiment_
.erase(it
);
545 void SdchManager::UrlSafeBase64Encode(const std::string
& input
,
546 std::string
* output
) {
547 // Since this is only done during a dictionary load, and hashes are only 8
548 // characters, we just do the simple fixup, rather than rewriting the encoder.
549 base::Base64Encode(input
, output
);
550 for (size_t i
= 0; i
< output
->size(); ++i
) {
551 switch (output
->data()[i
]) {