1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/compiler_specific.h"
6 #include "base/file_path.h"
7 #include "base/file_util.h"
8 #include "base/hash_tables.h"
9 #include "base/string_util.h"
10 #include "base/utf_string_conversions.h"
11 #include "net/base/net_util.h"
12 #include "net/url_request/url_request_context.h"
13 #include "third_party/WebKit/Source/WebKit/chromium/public/platform/WebCString.h"
14 #include "third_party/WebKit/Source/WebKit/chromium/public/platform/WebData.h"
15 #include "third_party/WebKit/Source/WebKit/chromium/public/WebDocument.h"
16 #include "third_party/WebKit/Source/WebKit/chromium/public/WebElement.h"
17 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h"
18 #include "third_party/WebKit/Source/WebKit/chromium/public/WebNode.h"
19 #include "third_party/WebKit/Source/WebKit/chromium/public/WebNodeCollection.h"
20 #include "third_party/WebKit/Source/WebKit/chromium/public/WebNodeList.h"
21 #include "third_party/WebKit/Source/WebKit/chromium/public/WebPageSerializer.h"
22 #include "third_party/WebKit/Source/WebKit/chromium/public/WebPageSerializerClient.h"
23 #include "third_party/WebKit/Source/WebKit/chromium/public/platform/WebString.h"
24 #include "third_party/WebKit/Source/WebKit/chromium/public/platform/WebURL.h"
25 #include "third_party/WebKit/Source/WebKit/chromium/public/platform/WebVector.h"
26 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h"
27 #include "webkit/glue/dom_operations.h"
28 #include "webkit/glue/webkit_glue.h"
29 #include "webkit/tools/test_shell/simple_resource_loader_bridge.h"
30 #include "webkit/tools/test_shell/test_shell_test.h"
32 using WebKit::WebCString
;
33 using WebKit::WebData
;
34 using WebKit::WebDocument
;
35 using WebKit::WebElement
;
36 using WebKit::WebFrame
;
37 using WebKit::WebNode
;
38 using WebKit::WebNodeCollection
;
39 using WebKit::WebNodeList
;
40 using WebKit::WebPageSerializer
;
41 using WebKit::WebPageSerializerClient
;
42 using WebKit::WebNode
;
43 using WebKit::WebString
;
45 using WebKit::WebView
;
46 using WebKit::WebVector
;
50 // Iterate recursively over sub-frames to find one with with a given url.
51 WebFrame
* FindSubFrameByURL(WebView
* web_view
, const GURL
& url
) {
52 if (!web_view
->mainFrame())
55 std::vector
<WebFrame
*> stack
;
56 stack
.push_back(web_view
->mainFrame());
58 while (!stack
.empty()) {
59 WebFrame
* current_frame
= stack
.back();
61 if (GURL(current_frame
->document().url()) == url
)
63 WebNodeCollection all
= current_frame
->document().all();
64 for (WebNode node
= all
.firstItem();
65 !node
.isNull(); node
= all
.nextItem()) {
66 if (!node
.isElementNode())
68 // Check frame tag and iframe tag
69 WebElement element
= node
.to
<WebElement
>();
70 if (!element
.hasTagName("frame") && !element
.hasTagName("iframe"))
72 WebFrame
* sub_frame
= WebFrame::fromFrameOwnerElement(element
);
74 stack
.push_back(sub_frame
);
80 class DomSerializerTests
: public TestShellTest
,
81 public WebPageSerializerClient
{
84 : local_directory_name_(FILE_PATH_LITERAL("./dummy_files/")) { }
86 // DomSerializerDelegate.
87 void didSerializeDataForFrame(const WebURL
& frame_web_url
,
88 const WebCString
& data
,
89 PageSerializationStatus status
) {
91 GURL
frame_url(frame_web_url
);
92 // If the all frames are finished saving, check all finish status
93 if (status
== WebPageSerializerClient::AllFramesAreFinished
) {
94 SerializationFinishStatusMap::iterator it
=
95 serialization_finish_status_
.begin();
96 for (; it
!= serialization_finish_status_
.end(); ++it
)
97 ASSERT_TRUE(it
->second
);
102 // Check finish status of current frame.
103 SerializationFinishStatusMap::iterator it
=
104 serialization_finish_status_
.find(frame_url
.spec());
105 // New frame, set initial status as false.
106 if (it
== serialization_finish_status_
.end())
107 serialization_finish_status_
[frame_url
.spec()] = false;
109 it
= serialization_finish_status_
.find(frame_url
.spec());
110 ASSERT_TRUE(it
!= serialization_finish_status_
.end());
111 // In process frame, finish status should be false.
112 ASSERT_FALSE(it
->second
);
114 // Add data to corresponding frame's content.
115 serialized_frame_map_
[frame_url
.spec()] += data
.data();
117 // Current frame is completed saving, change the finish status.
118 if (status
== WebPageSerializerClient::CurrentFrameIsFinished
)
122 bool HasSerializedFrame(const GURL
& frame_url
) {
123 return serialized_frame_map_
.find(frame_url
.spec()) !=
124 serialized_frame_map_
.end();
127 const std::string
& GetSerializedContentForFrame(
128 const GURL
& frame_url
) {
129 return serialized_frame_map_
[frame_url
.spec()];
132 // Load web page according to specific URL.
133 void LoadPageFromURL(const GURL
& page_url
) {
134 // Load the test file.
135 test_shell_
->ResetTestController();
136 test_shell_
->LoadURL(page_url
);
137 test_shell_
->WaitTestFinished();
140 // Load web page according to input content and relative URLs within
142 void LoadContents(const std::string
& contents
,
143 const GURL
& base_url
,
144 const WebString encoding_info
) {
145 test_shell_
->ResetTestController();
146 // If input encoding is empty, use UTF-8 as default encoding.
147 if (encoding_info
.isEmpty()) {
148 test_shell_
->webView()->mainFrame()->loadHTMLString(contents
, base_url
);
150 WebData
data(contents
.data(), contents
.length());
152 // Do not use WebFrame.LoadHTMLString because it assumes that input
153 // html contents use UTF-8 encoding.
154 // TODO(darin): This should use WebFrame::loadData.
155 WebFrame
* web_frame
=
156 test_shell_
->webView()->mainFrame();
158 ASSERT_TRUE(web_frame
!= NULL
);
160 web_frame
->loadData(data
, "text/html", encoding_info
, base_url
);
163 test_shell_
->WaitTestFinished();
166 // Serialize page DOM according to specific page URL. The parameter
167 // recursive_serialization indicates whether we will serialize all
169 void SerializeDomForURL(const GURL
& page_url
,
170 bool recursive_serialization
) {
171 // Find corresponding WebFrame according to page_url.
172 WebFrame
* web_frame
= FindSubFrameByURL(test_shell_
->webView(),
174 ASSERT_TRUE(web_frame
!= NULL
);
175 // Add input file URl to links_.
176 links_
.assign(&page_url
,1);
177 // Add dummy file path to local_path_.
178 WebString file_path
= webkit_glue::FilePathStringToWebString(
179 FILE_PATH_LITERAL("c:\\dummy.htm"));
180 local_paths_
.assign(&file_path
, 1);
181 // Start serializing DOM.
182 bool result
= WebPageSerializer::serialize(web_frame
,
183 recursive_serialization
,
184 static_cast<WebPageSerializerClient
*>(this),
187 webkit_glue::FilePathToWebString(local_directory_name_
));
189 ASSERT_TRUE(serialized_
);
193 // Map frame_url to corresponding serialized_content.
194 typedef base::hash_map
<std::string
, std::string
> SerializedFrameContentMap
;
195 SerializedFrameContentMap serialized_frame_map_
;
196 // Map frame_url to corresponding status of serialization finish.
197 typedef base::hash_map
<std::string
, bool> SerializationFinishStatusMap
;
198 SerializationFinishStatusMap serialization_finish_status_
;
199 // Flag indicates whether the process of serializing DOM is finished or not.
201 // The links_ contain dummy original URLs of all saved links.
202 WebVector
<WebURL
> links_
;
203 // The local_paths_ contain dummy corresponding local file paths of all saved
204 // links, which matched links_ one by one.
205 WebVector
<WebString
> local_paths_
;
206 // The local_directory_name_ is dummy relative path of directory which
207 // contain all saved auxiliary files included all sub frames and resources.
208 const FilePath local_directory_name_
;
212 virtual void SetUp() {
213 TestShellTest::SetUp();
217 virtual void TearDown() {
218 TestShellTest::TearDown();
222 // Helper function that test whether the first node in the doc is a doc type
224 bool HasDocType(const WebDocument
& doc
) {
225 WebNode node
= doc
.firstChild();
228 return node
.nodeType() == WebNode::DocumentTypeNode
;
231 // Helper function for checking whether input node is META tag. Return true
232 // means it is META element, otherwise return false. The parameter charset_info
233 // return actual charset info if the META tag has charset declaration.
234 bool IsMetaElement(const WebNode
& node
, std::string
& charset_info
) {
235 if (!node
.isElementNode())
237 const WebElement meta
= node
.toConst
<WebElement
>();
238 if (!meta
.hasTagName("meta"))
240 charset_info
.erase(0, charset_info
.length());
241 // Check the META charset declaration.
242 WebString httpEquiv
= meta
.getAttribute("http-equiv");
243 if (LowerCaseEqualsASCII(httpEquiv
, "content-type")) {
244 std::string content
= meta
.getAttribute("content").utf8();
245 int pos
= content
.find("charset", 0);
247 // Add a dummy charset declaration to charset_info, which indicates this
248 // META tag has charset declaration although we do not get correct value
250 charset_info
.append("has-charset-declaration");
251 int remaining_length
= content
.length() - pos
- 7;
252 if (!remaining_length
)
254 int start_pos
= pos
+ 7;
256 while (remaining_length
--)
257 if (content
[start_pos
++] == L
'=')
259 // Skip beginning space.
260 while (remaining_length
) {
261 if (content
[start_pos
] > 0x0020)
266 if (!remaining_length
)
268 int end_pos
= start_pos
;
269 // Now we find out the start point of charset info. Search the end point.
270 while (remaining_length
--) {
271 if (content
[end_pos
] <= 0x0020 || content
[end_pos
] == L
';')
275 // Get actual charset info.
276 charset_info
= content
.substr(start_pos
, end_pos
- start_pos
);
283 // If original contents have document type, the serialized contents also have
285 TEST_F(DomSerializerTests
, SerializeHTMLDOMWithDocType
) {
286 FilePath page_file_path
= data_dir_
;
287 page_file_path
= page_file_path
.AppendASCII("dom_serializer");
288 page_file_path
= page_file_path
.AppendASCII("youtube_1.htm");
289 GURL file_url
= net::FilePathToFileURL(page_file_path
);
290 ASSERT_TRUE(file_url
.SchemeIsFile());
291 // Load the test file.
292 LoadPageFromURL(file_url
);
293 // Make sure original contents have document type.
294 WebFrame
* web_frame
= FindSubFrameByURL(test_shell_
->webView(), file_url
);
295 ASSERT_TRUE(web_frame
!= NULL
);
296 WebDocument doc
= web_frame
->document();
297 ASSERT_TRUE(HasDocType(doc
));
299 SerializeDomForURL(file_url
, false);
300 // Load the serialized contents.
301 ASSERT_TRUE(HasSerializedFrame(file_url
));
302 const std::string
& serialized_contents
=
303 GetSerializedContentForFrame(file_url
);
304 LoadContents(serialized_contents
, file_url
,
305 web_frame
->document().encoding());
306 // Make sure serialized contents still have document type.
307 web_frame
= test_shell_
->webView()->mainFrame();
308 doc
= web_frame
->document();
309 ASSERT_TRUE(HasDocType(doc
));
312 // If original contents do not have document type, the serialized contents
313 // also do not have document type.
314 TEST_F(DomSerializerTests
, SerializeHTMLDOMWithoutDocType
) {
315 FilePath page_file_path
= data_dir_
;
316 page_file_path
= page_file_path
.AppendASCII("dom_serializer");
317 page_file_path
= page_file_path
.AppendASCII("youtube_2.htm");
318 GURL file_url
= net::FilePathToFileURL(page_file_path
);
319 ASSERT_TRUE(file_url
.SchemeIsFile());
320 // Load the test file.
321 LoadPageFromURL(file_url
);
322 // Make sure original contents do not have document type.
323 WebFrame
* web_frame
= FindSubFrameByURL(test_shell_
->webView(), file_url
);
324 ASSERT_TRUE(web_frame
!= NULL
);
325 WebDocument doc
= web_frame
->document();
326 ASSERT_TRUE(!HasDocType(doc
));
328 SerializeDomForURL(file_url
, false);
329 // Load the serialized contents.
330 ASSERT_TRUE(HasSerializedFrame(file_url
));
331 const std::string
& serialized_contents
=
332 GetSerializedContentForFrame(file_url
);
333 LoadContents(serialized_contents
, file_url
,
334 web_frame
->document().encoding());
335 // Make sure serialized contents do not have document type.
336 web_frame
= test_shell_
->webView()->mainFrame();
337 doc
= web_frame
->document();
338 ASSERT_TRUE(!HasDocType(doc
));
341 // Serialize XML document which has all 5 built-in entities. After
342 // finishing serialization, the serialized contents should be same
343 // with original XML document.
344 TEST_F(DomSerializerTests
, SerializeXMLDocWithBuiltInEntities
) {
345 FilePath page_file_path
= data_dir_
;
346 page_file_path
= page_file_path
.AppendASCII("dom_serializer");
347 page_file_path
= page_file_path
.AppendASCII("note.xml");
348 // Read original contents for later comparison.
349 std::string original_contents
;
350 ASSERT_TRUE(file_util::ReadFileToString(page_file_path
, &original_contents
));
352 GURL file_url
= net::FilePathToFileURL(page_file_path
);
353 ASSERT_TRUE(file_url
.SchemeIsFile());
354 // Load the test file.
355 LoadPageFromURL(file_url
);
357 SerializeDomForURL(file_url
, false);
358 // Compare the serialized contents with original contents.
359 ASSERT_TRUE(HasSerializedFrame(file_url
));
360 const std::string
& serialized_contents
=
361 GetSerializedContentForFrame(file_url
);
362 ASSERT_EQ(original_contents
, serialized_contents
);
365 // When serializing DOM, we add MOTW declaration before html tag.
366 TEST_F(DomSerializerTests
, SerializeHTMLDOMWithAddingMOTW
) {
367 FilePath page_file_path
= data_dir_
;
368 page_file_path
= page_file_path
.AppendASCII("dom_serializer");
369 page_file_path
= page_file_path
.AppendASCII("youtube_2.htm");
370 // Read original contents for later comparison .
371 std::string original_contents
;
372 ASSERT_TRUE(file_util::ReadFileToString(page_file_path
, &original_contents
));
374 GURL file_url
= net::FilePathToFileURL(page_file_path
);
375 ASSERT_TRUE(file_url
.SchemeIsFile());
376 // Make sure original contents does not have MOTW;
377 std::string motw_declaration
=
378 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url
).utf8();
379 ASSERT_FALSE(motw_declaration
.empty());
380 // The encoding of original contents is ISO-8859-1, so we convert the MOTW
381 // declaration to ASCII and search whether original contents has it or not.
382 ASSERT_TRUE(std::string::npos
==
383 original_contents
.find(motw_declaration
));
384 // Load the test file.
385 LoadPageFromURL(file_url
);
387 SerializeDomForURL(file_url
, false);
388 // Make sure the serialized contents have MOTW ;
389 ASSERT_TRUE(HasSerializedFrame(file_url
));
390 const std::string
& serialized_contents
=
391 GetSerializedContentForFrame(file_url
);
392 ASSERT_FALSE(std::string::npos
==
393 serialized_contents
.find(motw_declaration
));
396 // When serializing DOM, we will add the META which have correct charset
397 // declaration as first child of HEAD element for resolving WebKit bug:
398 // http://bugs.webkit.org/show_bug.cgi?id=16621 even the original document
399 // does not have META charset declaration.
400 TEST_F(DomSerializerTests
, SerializeHTMLDOMWithNoMetaCharsetInOriginalDoc
) {
401 FilePath page_file_path
= data_dir_
;
402 page_file_path
= page_file_path
.AppendASCII("dom_serializer");
403 page_file_path
= page_file_path
.AppendASCII("youtube_1.htm");
405 GURL file_url
= net::FilePathToFileURL(page_file_path
);
406 ASSERT_TRUE(file_url
.SchemeIsFile());
407 // Load the test file.
408 LoadPageFromURL(file_url
);
410 // Make sure there is no META charset declaration in original document.
411 WebFrame
* web_frame
= FindSubFrameByURL(test_shell_
->webView(), file_url
);
412 ASSERT_TRUE(web_frame
!= NULL
);
413 WebDocument doc
= web_frame
->document();
414 ASSERT_TRUE(doc
.isHTMLDocument());
415 WebElement head_element
= doc
.head();
416 ASSERT_TRUE(!head_element
.isNull());
417 // Go through all children of HEAD element.
418 for (WebNode child
= head_element
.firstChild(); !child
.isNull();
419 child
= child
.nextSibling()) {
420 std::string charset_info
;
421 if (IsMetaElement(child
, charset_info
))
422 ASSERT_TRUE(charset_info
.empty());
425 SerializeDomForURL(file_url
, false);
427 // Load the serialized contents.
428 ASSERT_TRUE(HasSerializedFrame(file_url
));
429 const std::string
& serialized_contents
=
430 GetSerializedContentForFrame(file_url
);
431 LoadContents(serialized_contents
, file_url
,
432 web_frame
->document().encoding());
433 // Make sure the first child of HEAD element is META which has charset
434 // declaration in serialized contents.
435 web_frame
= test_shell_
->webView()->mainFrame();
436 ASSERT_TRUE(web_frame
!= NULL
);
437 doc
= web_frame
->document();
438 ASSERT_TRUE(doc
.isHTMLDocument());
439 head_element
= doc
.head();
440 ASSERT_TRUE(!head_element
.isNull());
441 WebNode meta_node
= head_element
.firstChild();
442 ASSERT_TRUE(!meta_node
.isNull());
443 // Get meta charset info.
444 std::string charset_info2
;
445 ASSERT_TRUE(IsMetaElement(meta_node
, charset_info2
));
446 ASSERT_TRUE(!charset_info2
.empty());
447 ASSERT_EQ(charset_info2
,
448 std::string(web_frame
->document().encoding().utf8()));
450 // Make sure no more additional META tags which have charset declaration.
451 for (WebNode child
= meta_node
.nextSibling(); !child
.isNull();
452 child
= child
.nextSibling()) {
453 std::string charset_info
;
454 if (IsMetaElement(child
, charset_info
))
455 ASSERT_TRUE(charset_info
.empty());
459 // When serializing DOM, if the original document has multiple META charset
460 // declaration, we will add the META which have correct charset declaration
461 // as first child of HEAD element and remove all original META charset
463 TEST_F(DomSerializerTests
,
464 SerializeHTMLDOMWithMultipleMetaCharsetInOriginalDoc
) {
465 FilePath page_file_path
= data_dir_
;
466 page_file_path
= page_file_path
.AppendASCII("dom_serializer");
467 page_file_path
= page_file_path
.AppendASCII("youtube_2.htm");
469 GURL file_url
= net::FilePathToFileURL(page_file_path
);
470 ASSERT_TRUE(file_url
.SchemeIsFile());
471 // Load the test file.
472 LoadPageFromURL(file_url
);
474 // Make sure there are multiple META charset declarations in original
476 WebFrame
* web_frame
= FindSubFrameByURL(test_shell_
->webView(), file_url
);
477 ASSERT_TRUE(web_frame
!= NULL
);
478 WebDocument doc
= web_frame
->document();
479 ASSERT_TRUE(doc
.isHTMLDocument());
480 WebElement head_ele
= doc
.head();
481 ASSERT_TRUE(!head_ele
.isNull());
482 // Go through all children of HEAD element.
483 int charset_declaration_count
= 0;
484 for (WebNode child
= head_ele
.firstChild(); !child
.isNull();
485 child
= child
.nextSibling()) {
486 std::string charset_info
;
487 if (IsMetaElement(child
, charset_info
) && !charset_info
.empty())
488 charset_declaration_count
++;
490 // The original doc has more than META tags which have charset declaration.
491 ASSERT_TRUE(charset_declaration_count
> 1);
494 SerializeDomForURL(file_url
, false);
496 // Load the serialized contents.
497 ASSERT_TRUE(HasSerializedFrame(file_url
));
498 const std::string
& serialized_contents
=
499 GetSerializedContentForFrame(file_url
);
500 LoadContents(serialized_contents
, file_url
,
501 web_frame
->document().encoding());
502 // Make sure only first child of HEAD element is META which has charset
503 // declaration in serialized contents.
504 web_frame
= test_shell_
->webView()->mainFrame();
505 ASSERT_TRUE(web_frame
!= NULL
);
506 doc
= web_frame
->document();
507 ASSERT_TRUE(doc
.isHTMLDocument());
508 head_ele
= doc
.head();
509 ASSERT_TRUE(!head_ele
.isNull());
510 WebNode meta_node
= head_ele
.firstChild();
511 ASSERT_TRUE(!meta_node
.isNull());
512 // Get meta charset info.
513 std::string charset_info2
;
514 ASSERT_TRUE(IsMetaElement(meta_node
, charset_info2
));
515 ASSERT_TRUE(!charset_info2
.empty());
516 ASSERT_EQ(charset_info2
,
517 std::string(web_frame
->document().encoding().utf8()));
519 // Make sure no more additional META tags which have charset declaration.
520 for (WebNode child
= meta_node
.nextSibling(); !child
.isNull();
521 child
= child
.nextSibling()) {
522 std::string charset_info
;
523 if (IsMetaElement(child
, charset_info
))
524 ASSERT_TRUE(charset_info
.empty());
528 // Test situation of html entities in text when serializing HTML DOM.
529 TEST_F(DomSerializerTests
, SerializeHTMLDOMWithEntitiesInText
) {
530 FilePath page_file_path
= data_dir_
;
531 page_file_path
= page_file_path
.AppendASCII(
532 "dom_serializer/htmlentities_in_text.htm");
533 // Get file URL. The URL is dummy URL to identify the following loading
534 // actions. The test content is in constant:original_contents.
535 GURL file_url
= net::FilePathToFileURL(page_file_path
);
536 ASSERT_TRUE(file_url
.SchemeIsFile());
538 static const char* const original_contents
=
539 "<html><body>&<>\"\'</body></html>";
540 // Load the test contents.
541 LoadContents(original_contents
, file_url
, WebString());
543 // Get BODY's text content in DOM.
544 WebFrame
* web_frame
= FindSubFrameByURL(test_shell_
->webView(), file_url
);
545 ASSERT_TRUE(web_frame
!= NULL
);
546 WebDocument doc
= web_frame
->document();
547 ASSERT_TRUE(doc
.isHTMLDocument());
548 WebElement body_ele
= doc
.body();
549 ASSERT_TRUE(!body_ele
.isNull());
550 WebNode text_node
= body_ele
.firstChild();
551 ASSERT_TRUE(text_node
.isTextNode());
552 ASSERT_TRUE(std::string(text_node
.createMarkup().utf8()) ==
553 "&<>\"\'");
555 SerializeDomForURL(file_url
, false);
556 // Compare the serialized contents with original contents.
557 ASSERT_TRUE(HasSerializedFrame(file_url
));
558 const std::string
& serialized_contents
=
559 GetSerializedContentForFrame(file_url
);
560 // Compare the serialized contents with original contents to make sure
562 // Because we add MOTW when serializing DOM, so before comparison, we also
563 // need to add MOTW to original_contents.
564 std::string original_str
=
565 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url
).utf8();
566 original_str
+= original_contents
;
567 // Since WebCore now inserts a new HEAD element if there is no HEAD element
568 // when creating BODY element. (Please see HTMLParser::bodyCreateErrorCheck.)
569 // We need to append the HEAD content and corresponding META content if we
570 // find WebCore-generated HEAD element.
571 if (!doc
.head().isNull()) {
572 WebString encoding
= web_frame
->document().encoding();
573 std::string
htmlTag("<html>");
574 std::string::size_type pos
= original_str
.find(htmlTag
);
575 ASSERT_NE(std::string::npos
, pos
);
576 pos
+= htmlTag
.length();
577 std::string
head_part("<head>");
579 WebPageSerializer::generateMetaCharsetDeclaration(encoding
).utf8();
580 head_part
+= "</head>";
581 original_str
.insert(pos
, head_part
);
583 ASSERT_EQ(original_str
, serialized_contents
);
586 // Test situation of html entities in attribute value when serializing
588 // This test started to fail at WebKit r65388. See http://crbug.com/52279.
589 TEST_F(DomSerializerTests
, SerializeHTMLDOMWithEntitiesInAttributeValue
) {
590 FilePath page_file_path
= data_dir_
;
591 page_file_path
= page_file_path
.AppendASCII(
592 "dom_serializer/htmlentities_in_attribute_value.htm");
593 // Get file URL. The URL is dummy URL to identify the following loading
594 // actions. The test content is in constant:original_contents.
595 GURL file_url
= net::FilePathToFileURL(page_file_path
);
596 ASSERT_TRUE(file_url
.SchemeIsFile());
598 static const char* const original_contents
=
599 "<html><body title=\"&<>"'\"></body></html>";
600 // Load the test contents.
601 LoadContents(original_contents
, file_url
, WebString());
602 // Get value of BODY's title attribute in DOM.
603 WebFrame
* web_frame
= FindSubFrameByURL(test_shell_
->webView(), file_url
);
604 ASSERT_TRUE(web_frame
!= NULL
);
605 WebDocument doc
= web_frame
->document();
606 ASSERT_TRUE(doc
.isHTMLDocument());
607 WebElement body_ele
= doc
.body();
608 ASSERT_TRUE(!body_ele
.isNull());
609 WebString value
= body_ele
.getAttribute("title");
610 ASSERT_TRUE(std::string(value
.utf8()) == "&<>\"\'");
612 SerializeDomForURL(file_url
, false);
613 // Compare the serialized contents with original contents.
614 ASSERT_TRUE(HasSerializedFrame(file_url
));
615 const std::string
& serialized_contents
=
616 GetSerializedContentForFrame(file_url
);
617 // Compare the serialized contents with original contents to make sure
619 std::string original_str
=
620 WebPageSerializer::generateMarkOfTheWebDeclaration(file_url
).utf8();
621 original_str
+= original_contents
;
623 WebString encoding
= web_frame
->document().encoding();
624 std::string
htmlTag("<html>");
625 std::string::size_type pos
= original_str
.find(htmlTag
);
626 ASSERT_NE(std::string::npos
, pos
);
627 pos
+= htmlTag
.length();
628 std::string
head_part("<head>");
630 WebPageSerializer::generateMetaCharsetDeclaration(encoding
).utf8();
631 head_part
+= "</head>";
632 original_str
.insert(pos
, head_part
);
634 ASSERT_EQ(original_str
, serialized_contents
);
637 // Test situation of non-standard HTML entities when serializing HTML DOM.
638 // This test started to fail at WebKit r65351. See http://crbug.com/52279.
639 TEST_F(DomSerializerTests
, SerializeHTMLDOMWithNonStandardEntities
) {
640 // Make a test file URL and load it.
641 FilePath page_file_path
= data_dir_
;
642 page_file_path
= page_file_path
.AppendASCII("dom_serializer");
643 page_file_path
= page_file_path
.AppendASCII("nonstandard_htmlentities.htm");
644 GURL file_url
= net::FilePathToFileURL(page_file_path
);
645 LoadPageFromURL(file_url
);
647 // Get value of BODY's title attribute in DOM.
648 WebFrame
* web_frame
= FindSubFrameByURL(test_shell_
->webView(), file_url
);
649 WebDocument doc
= web_frame
->document();
650 ASSERT_TRUE(doc
.isHTMLDocument());
651 WebElement body_element
= doc
.body();
652 // Unescaped string for "%⊅¹'".
653 static const wchar_t parsed_value
[] = {
654 '%', 0x2285, 0x00b9, '\'', 0
656 WebString value
= body_element
.getAttribute("title");
657 ASSERT_TRUE(UTF16ToWide(value
) == parsed_value
);
658 ASSERT_TRUE(UTF16ToWide(body_element
.innerText()) == parsed_value
);
661 SerializeDomForURL(file_url
, false);
662 // Check the serialized string.
663 ASSERT_TRUE(HasSerializedFrame(file_url
));
664 const std::string
& serialized_contents
=
665 GetSerializedContentForFrame(file_url
);
666 // Confirm that the serialized string has no non-standard HTML entities.
667 ASSERT_EQ(std::string::npos
, serialized_contents
.find("%"));
668 ASSERT_EQ(std::string::npos
, serialized_contents
.find("⊅"));
669 ASSERT_EQ(std::string::npos
, serialized_contents
.find("¹"));
670 ASSERT_EQ(std::string::npos
, serialized_contents
.find("'"));
673 // Test situation of BASE tag in original document when serializing HTML DOM.
674 // When serializing, we should comment the BASE tag, append a new BASE tag.
675 // rewrite all the savable URLs to relative local path, and change other URLs
677 TEST_F(DomSerializerTests
, SerializeHTMLDOMWithBaseTag
) {
678 // There are total 2 available base tags in this test file.
679 const int kTotalBaseTagCountInTestFile
= 2;
681 FilePath page_file_path
= data_dir_
.AppendASCII("dom_serializer");
682 file_util::EnsureEndsWithSeparator(&page_file_path
);
684 // Get page dir URL which is base URL of this file.
685 GURL path_dir_url
= net::FilePathToFileURL(page_file_path
);
688 page_file_path
.AppendASCII("html_doc_has_base_tag.htm");
690 GURL file_url
= net::FilePathToFileURL(page_file_path
);
691 ASSERT_TRUE(file_url
.SchemeIsFile());
692 // Load the test file.
693 LoadPageFromURL(file_url
);
694 // Since for this test, we assume there is no savable sub-resource links for
695 // this test file, also all links are relative URLs in this test file, so we
696 // need to check those relative URLs and make sure document has BASE tag.
697 WebFrame
* web_frame
= FindSubFrameByURL(test_shell_
->webView(), file_url
);
698 ASSERT_TRUE(web_frame
!= NULL
);
699 WebDocument doc
= web_frame
->document();
700 ASSERT_TRUE(doc
.isHTMLDocument());
701 // Go through all descent nodes.
702 WebNodeCollection all
= doc
.all();
703 int original_base_tag_count
= 0;
704 for (WebNode node
= all
.firstItem(); !node
.isNull();
705 node
= all
.nextItem()) {
706 if (!node
.isElementNode())
708 WebElement element
= node
.to
<WebElement
>();
709 if (element
.hasTagName("base")) {
710 original_base_tag_count
++;
714 webkit_glue::GetSubResourceLinkFromElement(element
);
715 if (value
.isNull() && element
.hasTagName("a")) {
716 value
= element
.getAttribute("href");
720 // Each link is relative link.
721 if (!value
.isNull()) {
722 GURL
link(value
.utf8());
723 ASSERT_TRUE(link
.scheme().empty());
727 ASSERT_EQ(original_base_tag_count
, kTotalBaseTagCountInTestFile
);
728 // Make sure in original document, the base URL is not equal with the
730 GURL
original_base_url(doc
.baseURL());
731 ASSERT_NE(original_base_url
, path_dir_url
);
734 SerializeDomForURL(file_url
, false);
736 // Load the serialized contents.
737 ASSERT_TRUE(HasSerializedFrame(file_url
));
738 const std::string
& serialized_contents
=
739 GetSerializedContentForFrame(file_url
);
740 LoadContents(serialized_contents
, file_url
,
741 web_frame
->document().encoding());
743 // Make sure all links are absolute URLs and doc there are some number of
744 // BASE tags in serialized HTML data. Each of those BASE tags have same base
745 // URL which is as same as URL of current test file.
746 web_frame
= test_shell_
->webView()->mainFrame();
747 ASSERT_TRUE(web_frame
!= NULL
);
748 doc
= web_frame
->document();
749 ASSERT_TRUE(doc
.isHTMLDocument());
750 // Go through all descent nodes.
752 int new_base_tag_count
= 0;
753 for (WebNode node
= all
.firstItem(); !node
.isNull();
754 node
= all
.nextItem()) {
755 if (!node
.isElementNode())
757 WebElement element
= node
.to
<WebElement
>();
758 if (element
.hasTagName("base")) {
759 new_base_tag_count
++;
763 webkit_glue::GetSubResourceLinkFromElement(element
);
764 if (value
.isNull() && element
.hasTagName("a")) {
765 value
= element
.getAttribute("href");
769 // Each link is absolute link.
770 if (!value
.isNull()) {
771 GURL
link(std::string(value
.utf8()));
772 ASSERT_FALSE(link
.scheme().empty());
776 // We have one more added BASE tag which is generated by JavaScript.
777 ASSERT_EQ(new_base_tag_count
, original_base_tag_count
+ 1);
778 // Make sure in new document, the base URL is equal with the |path_dir_url|.
779 GURL
new_base_url(doc
.baseURL());
780 ASSERT_EQ(new_base_url
, path_dir_url
);
783 // Serializing page which has an empty HEAD tag.
784 TEST_F(DomSerializerTests
, SerializeHTMLDOMWithEmptyHead
) {
785 FilePath page_file_path
= data_dir_
;
786 page_file_path
= page_file_path
.AppendASCII("dom_serializer");
787 page_file_path
= page_file_path
.AppendASCII("empty_head.htm");
788 GURL file_url
= net::FilePathToFileURL(page_file_path
);
789 ASSERT_TRUE(file_url
.SchemeIsFile());
791 // Load the test html content.
792 static const char* const empty_head_contents
=
793 "<html><head></head><body>hello world</body></html>";
794 LoadContents(empty_head_contents
, file_url
, WebString());
796 // Make sure the head tag is empty.
797 WebFrame
* web_frame
= test_shell_
->webView()->mainFrame();
798 ASSERT_TRUE(web_frame
!= NULL
);
799 WebDocument doc
= web_frame
->document();
800 ASSERT_TRUE(doc
.isHTMLDocument());
801 WebElement head_element
= doc
.head();
802 ASSERT_TRUE(!head_element
.isNull());
803 ASSERT_TRUE(!head_element
.hasChildNodes());
804 ASSERT_TRUE(head_element
.childNodes().length() == 0);
807 SerializeDomForURL(file_url
, false);
808 // Make sure the serialized contents have META ;
809 ASSERT_TRUE(HasSerializedFrame(file_url
));
810 const std::string
& serialized_contents
=
811 GetSerializedContentForFrame(file_url
);
813 // Reload serialized contents and make sure there is only one META tag.
814 LoadContents(serialized_contents
, file_url
, web_frame
->document().encoding());
815 web_frame
= test_shell_
->webView()->mainFrame();
816 ASSERT_TRUE(web_frame
!= NULL
);
817 doc
= web_frame
->document();
818 ASSERT_TRUE(doc
.isHTMLDocument());
819 head_element
= doc
.head();
820 ASSERT_TRUE(!head_element
.isNull());
821 ASSERT_TRUE(head_element
.hasChildNodes());
822 ASSERT_TRUE(head_element
.childNodes().length() == 1);
823 WebNode meta_node
= head_element
.firstChild();
824 ASSERT_TRUE(!meta_node
.isNull());
825 // Get meta charset info.
826 std::string charset_info
;
827 ASSERT_TRUE(IsMetaElement(meta_node
, charset_info
));
828 ASSERT_TRUE(!charset_info
.empty());
829 ASSERT_EQ(charset_info
,
830 std::string(web_frame
->document().encoding().utf8()));
832 // Check the body's first node is text node and its contents are
834 WebElement body_element
= doc
.body();
835 ASSERT_TRUE(!body_element
.isNull());
836 WebNode text_node
= body_element
.firstChild();
837 ASSERT_TRUE(text_node
.isTextNode());
838 WebString text_node_contents
= text_node
.nodeValue();
839 ASSERT_TRUE(std::string(text_node_contents
.utf8()) == "hello world");
842 // Test that we don't crash when the page contains an iframe that
843 // was handled as a download (http://crbug.com/42212).
844 TEST_F(DomSerializerTests
, SerializeDocumentWithDownloadedIFrame
) {
845 FilePath page_file_path
= data_dir_
;
846 page_file_path
= page_file_path
.AppendASCII("dom_serializer");
847 page_file_path
= page_file_path
.AppendASCII("iframe-src-is-exe.htm");
848 GURL file_url
= net::FilePathToFileURL(page_file_path
);
849 ASSERT_TRUE(file_url
.SchemeIsFile());
850 // Load the test file.
851 LoadPageFromURL(file_url
);
852 // Do a recursive serialization. We pass if we don't crash.
853 SerializeDomForURL(file_url
, true);