net/base/data_url_unittest.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/basictypes.h"
   6 #include "net/base/data_url.h"
   7 #include "testing/gtest/include/gtest/gtest.h"
   8 #include "url/gurl.h"
   9
  10 namespace {
  11
  12 struct ParseTestData {
  13   const char* url;
  14   bool is_valid;
  15   const char* mime_type;
  16   const char* charset;
  17   const char* data;
  18 };
  19
  20 }
  21
  22 TEST(DataURLTest, Parse) {
  23   const ParseTestData tests[] = {
  24     { "data:",
  25        false,
  26        "",
  27        "",
  28        "" },
  29
  30     { "data:,",
  31       true,
  32       "text/plain",
  33       "US-ASCII",
  34       "" },
  35
  36     { "data:;base64,",
  37       true,
  38       "text/plain",
  39       "US-ASCII",
  40       "" },
  41
  42     { "data:;charset=,test",
  43       false,
  44       "",
  45       "",
  46       "" },
  47
  48     { "data:TeXt/HtMl,<b>x</b>",
  49       true,
  50       "text/html",
  51       "US-ASCII",
  52       "<b>x</b>" },
  53
  54     { "data:,foo",
  55       true,
  56       "text/plain",
  57       "US-ASCII",
  58       "foo" },
  59
  60     { "data:;base64,aGVsbG8gd29ybGQ=",
  61       true,
  62       "text/plain",
  63       "US-ASCII",
  64       "hello world" },
  65
  66     // Allow invalid mediatype for backward compatibility but set mime_type to
  67     // "text/plain" instead of the invalid mediatype.
  68     { "data:foo,boo",
  69       true,
  70       "text/plain",
  71       "US-ASCII",
  72       "boo" },
  73
  74     // When accepting an invalid mediatype, override charset with "US-ASCII"
  75     { "data:foo;charset=UTF-8,boo",
  76       true,
  77       "text/plain",
  78       "US-ASCII",
  79       "boo" },
  80
  81     // Invalid mediatype. Includes a slash but the type part is not a token.
  82     { "data:f(oo/bar;baz=1;charset=kk,boo",
  83       true,
  84       "text/plain",
  85       "US-ASCII",
  86       "boo" },
  87
  88     { "data:foo/bar;baz=1;charset=kk,boo",
  89       true,
  90       "foo/bar",
  91       "kk",
  92       "boo" },
  93
  94     { "data:foo/bar;charset=kk;baz=1,boo",
  95       true,
  96       "foo/bar",
  97       "kk",
  98       "boo" },
  99
 100     { "data:text/html,%3Chtml%3E%3Cbody%3E%3Cb%3Ehello%20world"
 101           "%3C%2Fb%3E%3C%2Fbody%3E%3C%2Fhtml%3E",
 102       true,
 103       "text/html",
 104       "US-ASCII",
 105       "<html><body><b>hello world</b></body></html>" },
 106
 107     { "data:text/html,<html><body><b>hello world</b></body></html>",
 108       true,
 109       "text/html",
 110       "US-ASCII",
 111       "<html><body><b>hello world</b></body></html>" },
 112
 113     // the comma cannot be url-escaped!
 114     { "data:%2Cblah",
 115       false,
 116       "",
 117       "",
 118       "" },
 119
 120     // invalid base64 content
 121     { "data:;base64,aGVs_-_-",
 122       false,
 123       "",
 124       "",
 125       "" },
 126
 127     // Spaces should be removed from non-text data URLs (we already tested
 128     // spaces above).
 129     { "data:image/fractal,a b c d e f g",
 130       true,
 131       "image/fractal",
 132       "US-ASCII",
 133       "abcdefg" },
 134
 135     // Spaces should also be removed from anything base-64 encoded
 136     { "data:;base64,aGVs bG8gd2  9ybGQ=",
 137       true,
 138       "text/plain",
 139       "US-ASCII",
 140       "hello world" },
 141
 142     // Other whitespace should also be removed from anything base-64 encoded.
 143     { "data:;base64,aGVs bG8gd2  \n9ybGQ=",
 144       true,
 145       "text/plain",
 146       "US-ASCII",
 147       "hello world" },
 148
 149     // In base64 encoding, escaped whitespace should be stripped.
 150     // (This test was taken from acid3)
 151     // http://b/1054495
 152     { "data:text/javascript;base64,%20ZD%20Qg%0D%0APS%20An%20Zm91cic%0D%0A%207"
 153           "%20",
 154       true,
 155       "text/javascript",
 156       "US-ASCII",
 157       "d4 = 'four';" },
 158
 159     // Only unescaped whitespace should be stripped in non-base64.
 160     // http://b/1157796
 161     { "data:img/png,A  B  %20  %0A  C",
 162       true,
 163       "img/png",
 164       "US-ASCII",
 165       "AB \nC" },
 166
 167     { "data:text/plain;charset=utf-8;base64,SGVsbMO2",
 168       true,
 169       "text/plain",
 170       "utf-8",
 171       "Hell\xC3\xB6" },
 172
 173     // Not sufficiently padded.
 174     { "data:;base64,aGVsbG8gd29ybGQ",
 175       true,
 176       "text/plain",
 177       "US-ASCII",
 178       "hello world" },
 179
 180     // Bad encoding (truncated).
 181     { "data:;base64,aGVsbG8gd29yb",
 182       false,
 183       "",
 184       "",
 185       "" },
 186
 187     // BiDi control characters should be unescaped and preserved as is, and
 188     // should not be replaced with % versions. In the below case, \xE2\x80\x8F
 189     // is the RTL mark and the parsed text should preserve it as is.
 190     {
 191       "data:text/plain;charset=utf-8,\xE2\x80\x8Ftest",
 192       true,
 193       "text/plain",
 194       "utf-8",
 195       "\xE2\x80\x8Ftest"},
 196
 197     // Same as above but with Arabic text after RTL mark.
 198     {
 199       "data:text/plain;charset=utf-8,"
 200           "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1",
 201       true,
 202       "text/plain",
 203       "utf-8",
 204       "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1"},
 205
 206     // RTL mark encoded as %E2%80%8F should be unescaped too. Note that when
 207     // wrapped in a GURL, this URL and the next effectively become the same as
 208     // the previous two URLs.
 209     {
 210       "data:text/plain;charset=utf-8,%E2%80%8Ftest",
 211       true,
 212       "text/plain",
 213       "utf-8",
 214       "\xE2\x80\x8Ftest"},
 215
 216     // Same as above but with Arabic text after RTL mark.
 217     {
 218       "data:text/plain;charset=utf-8,"
 219           "%E2%80%8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1",
 220       true,
 221       "text/plain",
 222       "utf-8",
 223       "\xE2\x80\x8F\xD8\xA7\xD8\xAE\xD8\xAA\xD8\xA8\xD8\xA7\xD8\xB1"}
 224
 225     // TODO(darin): add more interesting tests
 226   };
 227
 228   for (size_t i = 0; i < arraysize(tests); ++i) {
 229     std::string mime_type;
 230     std::string charset;
 231     std::string data;
 232     bool ok =
 233         net::DataURL::Parse(GURL(tests[i].url), &mime_type, &charset, &data);
 234     EXPECT_EQ(ok, tests[i].is_valid);
 235     if (tests[i].is_valid) {
 236       EXPECT_EQ(tests[i].mime_type, mime_type);
 237       EXPECT_EQ(tests[i].charset, charset);
 238       EXPECT_EQ(tests[i].data, data);
 239     }
 240   }
 241 }