1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/child/site_isolation_policy.h"
7 #include "base/basictypes.h"
8 #include "base/command_line.h"
9 #include "base/lazy_instance.h"
10 #include "base/logging.h"
11 #include "base/metrics/histogram.h"
12 #include "base/strings/string_util.h"
13 #include "content/public/common/content_switches.h"
14 #include "content/public/common/resource_response_info.h"
15 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
16 #include "net/http/http_response_headers.h"
18 using base::StringPiece
;
24 // The cross-site document blocking/UMA data collection is deactivated by
25 // default, and only activated in renderer processes.
26 static bool g_policy_enabled
= false;
29 const char kTextHtml
[] = "text/html";
30 const char kTextXml
[] = "text/xml";
31 const char xAppRssXml
[] = "application/rss+xml";
32 const char kAppXml
[] = "application/xml";
33 const char kAppJson
[] = "application/json";
34 const char kTextJson
[] = "text/json";
35 const char kTextXjson
[] = "text/x-json";
36 const char kTextPlain
[] = "text/plain";
38 // TODO(dsjang): this is only needed for collecting UMA stat. Will be deleted
39 // when this class is used for actual blocking.
40 bool IsRenderableStatusCode(int status_code
) {
41 // Chrome only uses the content of a response with one of these status codes
42 // for CSS/JavaScript. For images, Chrome just ignores status code.
43 const int renderable_status_code
[] = {200, 201, 202, 203, 206, 300,
44 301, 302, 303, 305, 306, 307};
45 for (size_t i
= 0; i
< arraysize(renderable_status_code
); ++i
) {
46 if (renderable_status_code
[i
] == status_code
)
52 bool MatchesSignature(StringPiece data
,
53 const StringPiece signatures
[],
56 size_t offset
= data
.find_first_not_of(" \t\r\n");
57 // There is no not-whitespace character in this document.
58 if (offset
== base::StringPiece::npos
)
61 data
.remove_prefix(offset
);
62 size_t length
= data
.length();
64 for (size_t sig_index
= 0; sig_index
< arr_size
; ++sig_index
) {
65 const StringPiece
& signature
= signatures
[sig_index
];
66 size_t signature_length
= signature
.length();
67 if (length
< signature_length
)
70 if (LowerCaseEqualsASCII(
71 data
.begin(), data
.begin() + signature_length
, signature
.data()))
77 void IncrementHistogramCount(const std::string
& name
) {
78 // The default value of min, max, bucket_count are copied from histogram.h.
79 base::HistogramBase
* histogram_pointer
= base::Histogram::FactoryGet(
80 name
, 1, 100000, 50, base::HistogramBase::kUmaTargetedHistogramFlag
);
81 histogram_pointer
->Add(1);
84 void IncrementHistogramEnum(const std::string
& name
,
86 uint32 boundary_value
) {
87 // The default value of min, max, bucket_count are copied from histogram.h.
88 base::HistogramBase
* histogram_pointer
= base::LinearHistogram::FactoryGet(
93 base::HistogramBase::kUmaTargetedHistogramFlag
);
94 histogram_pointer
->Add(sample
);
97 void HistogramCountBlockedResponse(
98 const std::string
& bucket_prefix
,
99 linked_ptr
<SiteIsolationResponseMetaData
>& resp_data
,
100 bool nosniff_block
) {
101 std::string
block_label(nosniff_block
? ".NoSniffBlocked" : ".Blocked");
102 IncrementHistogramCount(bucket_prefix
+ block_label
);
104 // The content is blocked if it is sniffed as HTML/JSON/XML. When
105 // the blocked response is with an error status code, it is not
106 // disruptive for the following reasons : 1) the blocked content is
107 // not a binary object (such as an image) since it is sniffed as
108 // text; 2) then, this blocking only breaks the renderer behavior
109 // only if it is either JavaScript or CSS. However, the renderer
110 // doesn't use the contents of JS/CSS with unaffected status code
111 // (e.g, 404). 3) the renderer is expected not to use the cross-site
112 // document content for purposes other than JS/CSS (e.g, XHR).
113 bool renderable_status_code
=
114 IsRenderableStatusCode(resp_data
->http_status_code
);
116 if (renderable_status_code
) {
117 IncrementHistogramEnum(
118 bucket_prefix
+ block_label
+ ".RenderableStatusCode",
119 resp_data
->resource_type
,
120 RESOURCE_TYPE_LAST_TYPE
);
122 IncrementHistogramCount(bucket_prefix
+ block_label
+
123 ".NonRenderableStatusCode");
127 void HistogramCountNotBlockedResponse(const std::string
& bucket_prefix
,
128 bool sniffed_as_js
) {
129 IncrementHistogramCount(bucket_prefix
+ ".NotBlocked");
131 IncrementHistogramCount(bucket_prefix
+ ".NotBlocked.MaybeJS");
136 SiteIsolationResponseMetaData::SiteIsolationResponseMetaData() {}
138 void SiteIsolationPolicy::SetPolicyEnabled(bool enabled
) {
139 g_policy_enabled
= enabled
;
142 linked_ptr
<SiteIsolationResponseMetaData
>
143 SiteIsolationPolicy::OnReceivedResponse(const GURL
& frame_origin
,
144 const GURL
& response_url
,
145 ResourceType resource_type
,
147 const ResourceResponseInfo
& info
) {
148 if (!g_policy_enabled
)
149 return linked_ptr
<SiteIsolationResponseMetaData
>();
151 // if |origin_pid| is non-zero, it means that this response is for a plugin
152 // spawned from this renderer process. We exclude responses for plugins for
153 // now, but eventually, we're going to make plugin processes directly talk to
154 // the browser process so that we don't apply cross-site document blocking to
157 return linked_ptr
<SiteIsolationResponseMetaData
>();
159 UMA_HISTOGRAM_COUNTS("SiteIsolation.AllResponses", 1);
161 // See if this is for navigation. If it is, don't block it, under the
162 // assumption that we will put it in an appropriate process.
163 if (IsResourceTypeFrame(resource_type
))
164 return linked_ptr
<SiteIsolationResponseMetaData
>();
166 if (!IsBlockableScheme(response_url
))
167 return linked_ptr
<SiteIsolationResponseMetaData
>();
169 if (IsSameSite(frame_origin
, response_url
))
170 return linked_ptr
<SiteIsolationResponseMetaData
>();
172 SiteIsolationResponseMetaData::CanonicalMimeType canonical_mime_type
=
173 GetCanonicalMimeType(info
.mime_type
);
175 if (canonical_mime_type
== SiteIsolationResponseMetaData::Others
)
176 return linked_ptr
<SiteIsolationResponseMetaData
>();
178 // Every CORS request should have the Access-Control-Allow-Origin header even
179 // if it is preceded by a pre-flight request. Therefore, if this is a CORS
180 // request, it has this header. response.httpHeaderField() internally uses
181 // case-insensitive matching for the header name.
182 std::string access_control_origin
;
184 // We can use a case-insensitive header name for EnumerateHeader().
185 info
.headers
->EnumerateHeader(
186 NULL
, "access-control-allow-origin", &access_control_origin
);
187 if (IsValidCorsHeaderSet(frame_origin
, response_url
, access_control_origin
))
188 return linked_ptr
<SiteIsolationResponseMetaData
>();
190 // Real XSD data collection starts from here.
191 std::string no_sniff
;
192 info
.headers
->EnumerateHeader(NULL
, "x-content-type-options", &no_sniff
);
194 linked_ptr
<SiteIsolationResponseMetaData
> resp_data(
195 new SiteIsolationResponseMetaData
);
196 resp_data
->frame_origin
= frame_origin
.spec();
197 resp_data
->response_url
= response_url
;
198 resp_data
->resource_type
= resource_type
;
199 resp_data
->canonical_mime_type
= canonical_mime_type
;
200 resp_data
->http_status_code
= info
.headers
->response_code();
201 resp_data
->no_sniff
= LowerCaseEqualsASCII(no_sniff
, "nosniff");
206 bool SiteIsolationPolicy::ShouldBlockResponse(
207 linked_ptr
<SiteIsolationResponseMetaData
>& resp_data
,
208 const char* raw_data
,
210 std::string
* alternative_data
) {
211 if (!g_policy_enabled
)
214 DCHECK(resp_data
.get());
216 StringPiece
data(raw_data
, raw_length
);
218 // Record the length of the first received network packet to see if it's
219 // enough for sniffing.
220 UMA_HISTOGRAM_COUNTS("SiteIsolation.XSD.DataLength", raw_length
);
222 // Record the number of cross-site document responses with a specific mime
223 // type (text/html, text/xml, etc).
224 UMA_HISTOGRAM_ENUMERATION(
225 "SiteIsolation.XSD.MimeType",
226 resp_data
->canonical_mime_type
,
227 SiteIsolationResponseMetaData::MaxCanonicalMimeType
);
229 // Store the result of cross-site document blocking analysis.
230 bool is_blocked
= false;
231 bool sniffed_as_js
= SniffForJS(data
);
233 // Record the number of responses whose content is sniffed for what its mime
234 // type claims it to be. For example, we apply a HTML sniffer for a document
235 // tagged with text/html here. Whenever this check becomes true, we'll block
237 if (resp_data
->canonical_mime_type
!=
238 SiteIsolationResponseMetaData::Plain
) {
239 std::string bucket_prefix
;
240 bool sniffed_as_target_document
= false;
241 if (resp_data
->canonical_mime_type
==
242 SiteIsolationResponseMetaData::HTML
) {
243 bucket_prefix
= "SiteIsolation.XSD.HTML";
244 sniffed_as_target_document
= SniffForHTML(data
);
245 } else if (resp_data
->canonical_mime_type
==
246 SiteIsolationResponseMetaData::XML
) {
247 bucket_prefix
= "SiteIsolation.XSD.XML";
248 sniffed_as_target_document
= SniffForXML(data
);
249 } else if (resp_data
->canonical_mime_type
==
250 SiteIsolationResponseMetaData::JSON
) {
251 bucket_prefix
= "SiteIsolation.XSD.JSON";
252 sniffed_as_target_document
= SniffForJSON(data
);
254 NOTREACHED() << "Not a blockable mime type: "
255 << resp_data
->canonical_mime_type
;
258 if (sniffed_as_target_document
) {
260 HistogramCountBlockedResponse(bucket_prefix
, resp_data
, false);
262 if (resp_data
->no_sniff
) {
264 HistogramCountBlockedResponse(bucket_prefix
, resp_data
, true);
266 HistogramCountNotBlockedResponse(bucket_prefix
, sniffed_as_js
);
270 // This block is for plain text documents. We apply our HTML, XML,
271 // and JSON sniffer to a text document in the order, and block it
272 // if any of them succeeds in sniffing.
273 std::string bucket_prefix
;
274 if (SniffForHTML(data
))
275 bucket_prefix
= "SiteIsolation.XSD.Plain.HTML";
276 else if (SniffForXML(data
))
277 bucket_prefix
= "SiteIsolation.XSD.Plain.XML";
278 else if (SniffForJSON(data
))
279 bucket_prefix
= "SiteIsolation.XSD.Plain.JSON";
281 if (bucket_prefix
.size() > 0) {
283 HistogramCountBlockedResponse(bucket_prefix
, resp_data
, false);
284 } else if (resp_data
->no_sniff
) {
286 HistogramCountBlockedResponse("SiteIsolation.XSD.Plain", resp_data
, true);
288 HistogramCountNotBlockedResponse("SiteIsolation.XSD.Plain",
293 if (!CommandLine::ForCurrentProcess()->HasSwitch(
294 switches::kBlockCrossSiteDocuments
))
298 alternative_data
->erase();
299 alternative_data
->insert(0, " ");
300 LOG(ERROR
) << resp_data
->response_url
301 << " is blocked as an illegal cross-site document from "
302 << resp_data
->frame_origin
;
307 SiteIsolationResponseMetaData::CanonicalMimeType
308 SiteIsolationPolicy::GetCanonicalMimeType(const std::string
& mime_type
) {
309 if (LowerCaseEqualsASCII(mime_type
, kTextHtml
)) {
310 return SiteIsolationResponseMetaData::HTML
;
313 if (LowerCaseEqualsASCII(mime_type
, kTextPlain
)) {
314 return SiteIsolationResponseMetaData::Plain
;
317 if (LowerCaseEqualsASCII(mime_type
, kAppJson
) ||
318 LowerCaseEqualsASCII(mime_type
, kTextJson
) ||
319 LowerCaseEqualsASCII(mime_type
, kTextXjson
)) {
320 return SiteIsolationResponseMetaData::JSON
;
323 if (LowerCaseEqualsASCII(mime_type
, kTextXml
) ||
324 LowerCaseEqualsASCII(mime_type
, xAppRssXml
) ||
325 LowerCaseEqualsASCII(mime_type
, kAppXml
)) {
326 return SiteIsolationResponseMetaData::XML
;
329 return SiteIsolationResponseMetaData::Others
;
332 bool SiteIsolationPolicy::IsBlockableScheme(const GURL
& url
) {
333 // We exclude ftp:// from here. FTP doesn't provide a Content-Type
334 // header which our policy depends on, so we cannot protect any
335 // document from FTP servers.
336 return url
.SchemeIs(url::kHttpScheme
) || url
.SchemeIs(url::kHttpsScheme
);
339 bool SiteIsolationPolicy::IsSameSite(const GURL
& frame_origin
,
340 const GURL
& response_url
) {
342 if (!frame_origin
.is_valid() || !response_url
.is_valid())
345 if (frame_origin
.scheme() != response_url
.scheme())
348 // SameDomainOrHost() extracts the effective domains (public suffix plus one)
349 // from the two URLs and compare them.
350 return net::registry_controlled_domains::SameDomainOrHost(
353 net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES
);
356 // We don't use Webkit's existing CORS policy implementation since
357 // their policy works in terms of origins, not sites. For example,
358 // when frame is sub.a.com and it is not allowed to access a document
359 // with sub1.a.com. But under Site Isolation, it's allowed.
360 bool SiteIsolationPolicy::IsValidCorsHeaderSet(
361 const GURL
& frame_origin
,
362 const GURL
& website_origin
,
363 const std::string
& access_control_origin
) {
364 // Many websites are sending back "\"*\"" instead of "*". This is
365 // non-standard practice, and not supported by Chrome. Refer to
366 // CrossOriginAccessControl::passesAccessControlCheck().
368 // TODO(dsjang): * is not allowed for the response from a request
369 // with cookies. This allows for more than what the renderer will
370 // eventually be able to receive, so we won't see illegal cross-site
371 // documents allowed by this. We have to find a way to see if this
372 // response is from a cookie-tagged request or not in the future.
373 if (access_control_origin
== "*")
376 // TODO(dsjang): The CORS spec only treats a fully specified URL, except for
377 // "*", but many websites are using just a domain for access_control_origin,
378 // and this is blocked by Webkit's CORS logic here :
379 // CrossOriginAccessControl::passesAccessControlCheck(). GURL is set
380 // is_valid() to false when it is created from a URL containing * in the
383 GURL
cors_origin(access_control_origin
);
384 return IsSameSite(frame_origin
, cors_origin
);
387 // This function is a slight modification of |net::SniffForHTML|.
388 bool SiteIsolationPolicy::SniffForHTML(StringPiece data
) {
389 // The content sniffer used by Chrome and Firefox are using "<!--"
390 // as one of the HTML signatures, but it also appears in valid
391 // JavaScript, considered as well-formed JS by the browser. Since
392 // we do not want to block any JS, we exclude it from our HTML
393 // signatures. This can weaken our document block policy, but we can
394 // break less websites.
395 // TODO(dsjang): parameterize |net::SniffForHTML| with an option
396 // that decides whether to include <!-- or not, so that we can
397 // remove this function.
398 // TODO(dsjang): Once SiteIsolationPolicy is moved into the browser
399 // process, we should do single-thread checking here for the static
401 static const StringPiece kHtmlSignatures
[] = {
402 StringPiece("<!DOCTYPE html"), // HTML5 spec
403 StringPiece("<script"), // HTML5 spec, Mozilla
404 StringPiece("<html"), // HTML5 spec, Mozilla
405 StringPiece("<head"), // HTML5 spec, Mozilla
406 StringPiece("<iframe"), // Mozilla
407 StringPiece("<h1"), // Mozilla
408 StringPiece("<div"), // Mozilla
409 StringPiece("<font"), // Mozilla
410 StringPiece("<table"), // Mozilla
411 StringPiece("<a"), // Mozilla
412 StringPiece("<style"), // Mozilla
413 StringPiece("<title"), // Mozilla
414 StringPiece("<b"), // Mozilla
415 StringPiece("<body"), // Mozilla
416 StringPiece("<br"), // Mozilla
417 StringPiece("<p"), // Mozilla
418 StringPiece("<?xml") // Mozilla
421 while (data
.length() > 0) {
422 if (MatchesSignature(
423 data
, kHtmlSignatures
, arraysize(kHtmlSignatures
)))
426 // If we cannot find "<!--", we fail sniffing this as HTML.
427 static const StringPiece kCommentBegins
[] = { StringPiece("<!--") };
428 if (!MatchesSignature(data
, kCommentBegins
, arraysize(kCommentBegins
)))
431 // Search for --> and do SniffForHTML after that. If we can find the
432 // comment's end, we start HTML sniffing from there again.
433 static const char kEndComment
[] = "-->";
434 size_t offset
= data
.find(kEndComment
);
435 if (offset
== base::StringPiece::npos
)
438 // Proceed to the index next to the ending comment (-->).
439 data
.remove_prefix(offset
+ strlen(kEndComment
));
445 bool SiteIsolationPolicy::SniffForXML(base::StringPiece data
) {
446 // TODO(dsjang): Chrome's mime_sniffer is using strncasecmp() for
447 // this signature. However, XML is case-sensitive. Don't we have to
448 // be more lenient only to block documents starting with the exact
449 // string <?xml rather than <?XML ?
450 // TODO(dsjang): Once SiteIsolationPolicy is moved into the browser
451 // process, we should do single-thread checking here for the static
453 static const StringPiece kXmlSignatures
[] = { StringPiece("<?xml") };
454 return MatchesSignature(data
, kXmlSignatures
, arraysize(kXmlSignatures
));
457 bool SiteIsolationPolicy::SniffForJSON(base::StringPiece data
) {
458 // TODO(dsjang): We have to come up with a better way to sniff
459 // JSON. However, even RE cannot help us that much due to the fact
460 // that we don't do full parsing. This DFA starts with state 0, and
461 // finds {, "/' and : in that order. We're avoiding adding a
462 // dependency on a regular expression library.
469 } state
= kStartState
;
471 size_t length
= data
.length();
472 for (size_t i
= 0; i
< length
&& state
< kColonState
; ++i
) {
473 const char c
= data
[i
];
474 if (c
== ' ' || c
== '\t' || c
== '\r' || c
== '\n')
480 state
= kLeftBraceState
;
482 state
= kTerminalState
;
484 case kLeftBraceState
:
485 if (c
== '\"' || c
== '\'')
486 state
= kLeftQuoteState
;
488 state
= kTerminalState
;
490 case kLeftQuoteState
:
500 return state
== kColonState
;
503 bool SiteIsolationPolicy::SniffForJS(StringPiece data
) {
504 // TODO(dsjang): This is a real hack. The only purpose of this function is to
505 // try to see if there's any possibility that this data can be JavaScript
506 // (superset of JS). This function will be removed once UMA stats are
509 // Search for "var " for JS detection.
510 return data
.find("var ") != base::StringPiece::npos
;
513 } // namespace content