PlzNavigate: Improvements to RFHM commit logic.
[chromium-blink-merge.git] / net / base / data_url_unittest.cc
blobbcb2b49ad6c013ea88d7614a04089539c979cf67
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/basictypes.h"
6 #include "net/base/data_url.h"
7 #include "testing/gtest/include/gtest/gtest.h"
8 #include "url/gurl.h"
10 namespace {
12 struct ParseTestData {
13 const char* url;
14 bool is_valid;
15 const char* mime_type;
16 const char* charset;
17 const char* data;
22 TEST(DataURLTest, Parse) {
23 const ParseTestData tests[] = {
24 { "data:",
25 false,
26 "",
27 "",
28 "" },
30 { "data:,",
31 true,
32 "text/plain",
33 "US-ASCII",
34 "" },
36 { "data:;base64,",
37 true,
38 "text/plain",
39 "US-ASCII",
40 "" },
42 { "data:;charset=,test",
43 false,
44 "",
45 "",
46 "" },
48 { "data:TeXt/HtMl,<b>x</b>",
49 true,
50 "text/html",
51 "US-ASCII",
52 "<b>x</b>" },
54 { "data:,foo",
55 true,
56 "text/plain",
57 "US-ASCII",
58 "foo" },
60 { "data:;base64,aGVsbG8gd29ybGQ=",
61 true,
62 "text/plain",
63 "US-ASCII",
64 "hello world" },
66 // Allow invalid mediatype for backward compatibility but set mime_type to
67 // "text/plain" instead of the invalid mediatype.
68 { "data:foo,boo",
69 true,
70 "text/plain",
71 "US-ASCII",
72 "boo" },
74 // When accepting an invalid mediatype, override charset with "US-ASCII"
75 { "data:foo;charset=UTF-8,boo",
76 true,
77 "text/plain",
78 "US-ASCII",
79 "boo" },
81 // Invalid mediatype. Includes a slash but the type part is not a token.
82 { "data:f(oo/bar;baz=1;charset=kk,boo",
83 true,
84 "text/plain",
85 "US-ASCII",
86 "boo" },
88 { "data:foo/bar;baz=1;charset=kk,boo",
89 true,
90 "foo/bar",
91 "kk",
92 "boo" },
94 { "data:foo/bar;charset=kk;baz=1,boo",
95 true,
96 "foo/bar",
97 "kk",
98 "boo" },
100 { "data:text/html,%3Chtml%3E%3Cbody%3E%3Cb%3Ehello%20world"
101 "%3C%2Fb%3E%3C%2Fbody%3E%3C%2Fhtml%3E",
102 true,
103 "text/html",
104 "US-ASCII",
105 "<html><body><b>hello world</b></body></html>" },
107 { "data:text/html,<html><body><b>hello world</b></body></html>",
108 true,
109 "text/html",
110 "US-ASCII",
111 "<html><body><b>hello world</b></body></html>" },
113 // the comma cannot be url-escaped!
114 { "data:%2Cblah",
115 false,
118 "" },
120 // invalid base64 content
121 { "data:;base64,aGVs_-_-",
122 false,
125 "" },
127 // Spaces should be removed from non-text data URLs (we already tested
128 // spaces above).
129 { " bG8gd2 9ybGQ=",
137 true,
138 "text/plain",
139 "US-ASCII",
140 "hello world" },
142 // Other whitespace should also be removed from anything base-64 encoded.
143 { "data:;base64,aGVs bG8gd2 \n9ybGQ=",
144 true,
145 "text/plain",
146 "US-ASCII",
147 "hello world" },
149 // In base64 encoding, escaped whitespace should be stripped.
150 // (This test was taken from acid3)
151 // http://b/1054495
152 { "data:text/javascript;base64,%20ZD%20Qg%0D%0APS%20An%20Zm91cic%0D%0A%207"
153 "%20",
154 true,
155 "text/javascript",
156 "US-ASCII",
157 "d4 = 'four';" },
159 // Only unescaped whitespace should be stripped in non-base64.
160 // http://b/1157796
161 { "data:img/png,A B %20 %0A C",
162 true,
163 "img/png",
164 "US-ASCII",
165 "AB \nC" },
167 { "data:text/plain;charset=utf-8;base64,SGVsbMO2",
168 true,
169 "text/plain",
170 "utf-8",
171 "Hell\xC3\xB6" },
173 // Not sufficiently padded.
174 { "data:;base64,aGVsbG8gd29ybGQ",
175 true,
176 "text/plain",
177 "US-ASCII",
178 "hello world" },
180 // Bad encoding (truncated).
181 { "data:;base64,aGVsbG8gd29yb",
182 false,
185 "" },
187 // BiDi control characters should be unescaped and preserved as is, and
188 // should not be replaced with % versions. In the below case, \xE2\x80\x8F
189 // is the RTL mark and the parsed text should preserve it as is.
191 "data:text/plain;charset=utf-8,\xE2\x80\x8Ftest",
192 true,
193 "text/plain",
194 "utf-8",
195 "\xE2\x80\x8Ftest"},
197 // Same as above but with Arabic text after RTL mark.
199 "data:text/plain;charset=utf-8,"
200 "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1",
201 true,
202 "text/plain",
203 "utf-8",
204 "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1"},
206 // RTL mark encoded as %E2%80%8F should be unescaped too. Note that when
207 // wrapped in a GURL, this URL and the next effectively become the same as
208 // the previous two URLs.
210 "data:text/plain;charset=utf-8,%E2%80%8Ftest",
211 true,
212 "text/plain",
213 "utf-8",
214 "\xE2\x80\x8Ftest"},
216 // Same as above but with Arabic text after RTL mark.
218 "data:text/plain;charset=utf-8,"
219 "%E2%80%8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1",
220 true,
221 "text/plain",
222 "utf-8",
223 "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1"}
225 // TODO(darin): add more interesting tests
228 for (size_t i = 0; i < arraysize(tests); ++i) {
229 std::string mime_type;
230 std::string charset;
231 std::string data;
232 bool ok =
233 net::DataURL::Parse(GURL(tests[i].url), &mime_type, &charset, &data);
234 EXPECT_EQ(ok, tests[i].is_valid);
235 if (tests[i].is_valid) {
236 EXPECT_EQ(tests[i].mime_type, mime_type);
237 EXPECT_EQ(tests[i].charset, charset);
238 EXPECT_EQ(tests[i].data, data);