base/string_util_unittest.cc

   1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include <math.h>
   6 #include <stdarg.h>
   7
   8 #include <limits>
   9 #include <sstream>
  10
  11 #include "base/basictypes.h"
  12 #include "base/string_util.h"
  13 #include "base/utf_string_conversions.h"
  14 #include "testing/gmock/include/gmock/gmock.h"
  15 #include "testing/gtest/include/gtest/gtest.h"
  16
  17 using ::testing::ElementsAre;
  18
  19 namespace base {
  20
  21 static const struct trim_case {
  22   const wchar_t* input;
  23   const TrimPositions positions;
  24   const wchar_t* output;
  25   const TrimPositions return_value;
  26 } trim_cases[] = {
  27   {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
  28   {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
  29   {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
  30   {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
  31   {L"", TRIM_ALL, L"", TRIM_NONE},
  32   {L"  ", TRIM_LEADING, L"", TRIM_LEADING},
  33   {L"  ", TRIM_TRAILING, L"", TRIM_TRAILING},
  34   {L"  ", TRIM_ALL, L"", TRIM_ALL},
  35   {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
  36   {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
  37 };
  38
  39 static const struct trim_case_ascii {
  40   const char* input;
  41   const TrimPositions positions;
  42   const char* output;
  43   const TrimPositions return_value;
  44 } trim_cases_ascii[] = {
  45   {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
  46   {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
  47   {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
  48   {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
  49   {"", TRIM_ALL, "", TRIM_NONE},
  50   {"  ", TRIM_LEADING, "", TRIM_LEADING},
  51   {"  ", TRIM_TRAILING, "", TRIM_TRAILING},
  52   {"  ", TRIM_ALL, "", TRIM_ALL},
  53   {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
  54 };
  55
  56 namespace {
  57
  58 // Helper used to test TruncateUTF8ToByteSize.
  59 bool Truncated(const std::string& input, const size_t byte_size,
  60                std::string* output) {
  61     size_t prev = input.length();
  62     TruncateUTF8ToByteSize(input, byte_size, output);
  63     return prev != output->length();
  64 }
  65
  66 }  // namespace
  67
  68 TEST(StringUtilTest, TruncateUTF8ToByteSize) {
  69   std::string output;
  70
  71   // Empty strings and invalid byte_size arguments
  72   EXPECT_FALSE(Truncated("", 0, &output));
  73   EXPECT_EQ(output, "");
  74   EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
  75   EXPECT_EQ(output, "");
  76   EXPECT_FALSE(Truncated("\xe1\x80\xbf", -1, &output));
  77   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
  78
  79   // Testing the truncation of valid UTF8 correctly
  80   EXPECT_TRUE(Truncated("abc", 2, &output));
  81   EXPECT_EQ(output, "ab");
  82   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
  83   EXPECT_EQ(output.compare("\xc2\x81"), 0);
  84   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
  85   EXPECT_EQ(output.compare("\xc2\x81"), 0);
  86   EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
  87   EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
  88
  89   {
  90     const char array[] = "\x00\x00\xc2\x81\xc2\x81";
  91     const std::string array_string(array, arraysize(array));
  92     EXPECT_TRUE(Truncated(array_string, 4, &output));
  93     EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
  94   }
  95
  96   {
  97     const char array[] = "\x00\xc2\x81\xc2\x81";
  98     const std::string array_string(array, arraysize(array));
  99     EXPECT_TRUE(Truncated(array_string, 4, &output));
 100     EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
 101   }
 102
 103   // Testing invalid UTF8
 104   EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
 105   EXPECT_EQ(output.compare(""), 0);
 106   EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
 107   EXPECT_EQ(output.compare(""), 0);
 108   EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
 109   EXPECT_EQ(output.compare(""), 0);
 110
 111   // Testing invalid UTF8 mixed with valid UTF8
 112   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
 113   EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
 114   EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
 115   EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
 116   EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
 117               10, &output));
 118   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
 119   EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
 120               10, &output));
 121   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
 122   EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
 123   EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
 124
 125   // Overlong sequences
 126   EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
 127   EXPECT_EQ(output.compare(""), 0);
 128   EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
 129   EXPECT_EQ(output.compare(""), 0);
 130   EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
 131   EXPECT_EQ(output.compare(""), 0);
 132   EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
 133   EXPECT_EQ(output.compare(""), 0);
 134   EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
 135   EXPECT_EQ(output.compare(""), 0);
 136   EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
 137   EXPECT_EQ(output.compare(""), 0);
 138   EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
 139   EXPECT_EQ(output.compare(""), 0);
 140   EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
 141   EXPECT_EQ(output.compare(""), 0);
 142   EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
 143   EXPECT_EQ(output.compare(""), 0);
 144   EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
 145   EXPECT_EQ(output.compare(""), 0);
 146   EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
 147   EXPECT_EQ(output.compare(""), 0);
 148
 149   // Beyond U+10FFFF (the upper limit of Unicode codespace)
 150   EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
 151   EXPECT_EQ(output.compare(""), 0);
 152   EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
 153   EXPECT_EQ(output.compare(""), 0);
 154   EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
 155   EXPECT_EQ(output.compare(""), 0);
 156
 157   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
 158   EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
 159   EXPECT_EQ(output.compare(""), 0);
 160   EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
 161   EXPECT_EQ(output.compare(""), 0);
 162
 163   {
 164     const char array[] = "\x00\x00\xfe\xff";
 165     const std::string array_string(array, arraysize(array));
 166     EXPECT_TRUE(Truncated(array_string, 4, &output));
 167     EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
 168   }
 169
 170   // Variants on the previous test
 171   {
 172     const char array[] = "\xff\xfe\x00\x00";
 173     const std::string array_string(array, 4);
 174     EXPECT_FALSE(Truncated(array_string, 4, &output));
 175     EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
 176   }
 177   {
 178     const char array[] = "\xff\x00\x00\xfe";
 179     const std::string array_string(array, arraysize(array));
 180     EXPECT_TRUE(Truncated(array_string, 4, &output));
 181     EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
 182   }
 183
 184   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
 185   EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
 186   EXPECT_EQ(output.compare(""), 0);
 187   EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
 188   EXPECT_EQ(output.compare(""), 0);
 189   EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
 190   EXPECT_EQ(output.compare(""), 0);
 191   EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
 192   EXPECT_EQ(output.compare(""), 0);
 193   EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
 194   EXPECT_EQ(output.compare(""), 0);
 195
 196   // Strings in legacy encodings that are valid in UTF-8, but
 197   // are invalid as UTF-8 in real data.
 198   EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
 199   EXPECT_EQ(output.compare("caf"), 0);
 200   EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
 201   EXPECT_EQ(output.compare(""), 0);
 202   EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
 203   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
 204   EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
 205               &output));
 206   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
 207
 208   // Testing using the same string as input and output.
 209   EXPECT_FALSE(Truncated(output, 4, &output));
 210   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
 211   EXPECT_TRUE(Truncated(output, 3, &output));
 212   EXPECT_EQ(output.compare("\xa7\x41"), 0);
 213
 214   // "abc" with U+201[CD] in windows-125[0-8]
 215   EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
 216   EXPECT_EQ(output.compare("\x93" "abc"), 0);
 217
 218   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
 219   EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
 220   EXPECT_EQ(output.compare(""), 0);
 221
 222   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
 223   EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
 224   EXPECT_EQ(output.compare(""), 0);
 225 }
 226
 227 TEST(StringUtilTest, TrimWhitespace) {
 228   std::wstring output;  // Allow contents to carry over to next testcase
 229   for (size_t i = 0; i < arraysize(trim_cases); ++i) {
 230     const trim_case& value = trim_cases[i];
 231     EXPECT_EQ(value.return_value,
 232               TrimWhitespace(value.input, value.positions, &output));
 233     EXPECT_EQ(value.output, output);
 234   }
 235
 236   // Test that TrimWhitespace() can take the same string for input and output
 237   output = L"  This is a test \r\n";
 238   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
 239   EXPECT_EQ(L"This is a test", output);
 240
 241   // Once more, but with a string of whitespace
 242   output = L"  \r\n";
 243   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
 244   EXPECT_EQ(L"", output);
 245
 246   std::string output_ascii;
 247   for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
 248     const trim_case_ascii& value = trim_cases_ascii[i];
 249     EXPECT_EQ(value.return_value,
 250               TrimWhitespace(value.input, value.positions, &output_ascii));
 251     EXPECT_EQ(value.output, output_ascii);
 252   }
 253 }
 254
 255 static const struct collapse_case {
 256   const wchar_t* input;
 257   const bool trim;
 258   const wchar_t* output;
 259 } collapse_cases[] = {
 260   {L" Google Video ", false, L"Google Video"},
 261   {L"Google Video", false, L"Google Video"},
 262   {L"", false, L""},
 263   {L"  ", false, L""},
 264   {L"\t\rTest String\n", false, L"Test String"},
 265   {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
 266   {L"    Test     \n  \t String    ", false, L"Test String"},
 267   {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
 268   {L"   Test String", false, L"Test String"},
 269   {L"Test String    ", false, L"Test String"},
 270   {L"Test String", false, L"Test String"},
 271   {L"", true, L""},
 272   {L"\n", true, L""},
 273   {L"  \r  ", true, L""},
 274   {L"\nFoo", true, L"Foo"},
 275   {L"\r  Foo  ", true, L"Foo"},
 276   {L" Foo bar ", true, L"Foo bar"},
 277   {L"  \tFoo  bar  \n", true, L"Foo bar"},
 278   {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
 279 };
 280
 281 TEST(StringUtilTest, CollapseWhitespace) {
 282   for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
 283     const collapse_case& value = collapse_cases[i];
 284     EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim));
 285   }
 286 }
 287
 288 static const struct collapse_case_ascii {
 289   const char* input;
 290   const bool trim;
 291   const char* output;
 292 } collapse_cases_ascii[] = {
 293   {" Google Video ", false, "Google Video"},
 294   {"Google Video", false, "Google Video"},
 295   {"", false, ""},
 296   {"  ", false, ""},
 297   {"\t\rTest String\n", false, "Test String"},
 298   {"    Test     \n  \t String    ", false, "Test String"},
 299   {"   Test String", false, "Test String"},
 300   {"Test String    ", false, "Test String"},
 301   {"Test String", false, "Test String"},
 302   {"", true, ""},
 303   {"\n", true, ""},
 304   {"  \r  ", true, ""},
 305   {"\nFoo", true, "Foo"},
 306   {"\r  Foo  ", true, "Foo"},
 307   {" Foo bar ", true, "Foo bar"},
 308   {"  \tFoo  bar  \n", true, "Foo bar"},
 309   {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
 310 };
 311
 312 TEST(StringUtilTest, CollapseWhitespaceASCII) {
 313   for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
 314     const collapse_case_ascii& value = collapse_cases_ascii[i];
 315     EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
 316   }
 317 }
 318
 319 TEST(StringUtilTest, ContainsOnlyWhitespaceASCII) {
 320   EXPECT_TRUE(ContainsOnlyWhitespaceASCII(""));
 321   EXPECT_TRUE(ContainsOnlyWhitespaceASCII(" "));
 322   EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t"));
 323   EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t \r \n  "));
 324   EXPECT_FALSE(ContainsOnlyWhitespaceASCII("a"));
 325   EXPECT_FALSE(ContainsOnlyWhitespaceASCII("\thello\r \n  "));
 326 }
 327
 328 TEST(StringUtilTest, ContainsOnlyWhitespace) {
 329   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("")));
 330   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(" ")));
 331   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t")));
 332   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t \r \n  ")));
 333   EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("a")));
 334   EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("\thello\r \n  ")));
 335 }
 336
 337 TEST(StringUtilTest, IsStringUTF8) {
 338   EXPECT_TRUE(IsStringUTF8("abc"));
 339   EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
 340   EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
 341   EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
 342   EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
 343   EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc"));  // UTF-8 BOM
 344
 345   // surrogate code points
 346   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
 347   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
 348   EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
 349
 350   // overlong sequences
 351   EXPECT_FALSE(IsStringUTF8("\xc0\x80"));  // U+0000
 352   EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81"));  // "AB"
 353   EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80"));  // U+0000
 354   EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80"));  // U+0080
 355   EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf"));  // U+07ff
 356   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D"));  // U+000D
 357   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91"));  // U+0091
 358   EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80"));  // U+0800
 359   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf"));  // U+FEFF (BOM)
 360   EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf"));  // U+003F
 361   EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5"));  // U+00A5
 362
 363   // Beyond U+10FFFF (the upper limit of Unicode codespace)
 364   EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80"));  // U+110000
 365   EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf"));  // 5 bytes
 366   EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80"));  // 6 bytes
 367
 368   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
 369   EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
 370   EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
 371   EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
 372   EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
 373
 374   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
 375   EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe"));  // U+FFFE)
 376   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe"));  // U+1FFFE
 377   EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf"));  // U+10FFFF
 378   EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90"));  // U+FDD0
 379   EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf"));  // U+FDEF
 380   // Strings in legacy encodings. We can certainly make up strings
 381   // in a legacy encoding that are valid in UTF-8, but in real data,
 382   // most of them are invalid as UTF-8.
 383   EXPECT_FALSE(IsStringUTF8("caf\xe9"));  // cafe with U+00E9 in ISO-8859-1
 384   EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2"));  // U+AC00, U+AC001 in EUC-KR
 385   EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e"));  // U+4F60 U+597D in Big5
 386   // "abc" with U+201[CD] in windows-125[0-8]
 387   EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
 388   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
 389   EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
 390   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
 391   EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
 392
 393   // Check that we support Embedded Nulls. The first uses the canonical UTF-8
 394   // representation, and the second uses a 2-byte sequence. The second version
 395   // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
 396   // given codepoint must be used.
 397   static const char kEmbeddedNull[] = "embedded\0null";
 398   EXPECT_TRUE(IsStringUTF8(
 399       std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
 400   EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
 401 }
 402
 403 TEST(StringUtilTest, ConvertASCII) {
 404   static const char* char_cases[] = {
 405     "Google Video",
 406     "Hello, world\n",
 407     "0123ABCDwxyz \a\b\t\r\n!+,.~"
 408   };
 409
 410   static const wchar_t* const wchar_cases[] = {
 411     L"Google Video",
 412     L"Hello, world\n",
 413     L"0123ABCDwxyz \a\b\t\r\n!+,.~"
 414   };
 415
 416   for (size_t i = 0; i < arraysize(char_cases); ++i) {
 417     EXPECT_TRUE(IsStringASCII(char_cases[i]));
 418     std::wstring wide = ASCIIToWide(char_cases[i]);
 419     EXPECT_EQ(wchar_cases[i], wide);
 420
 421     EXPECT_TRUE(IsStringASCII(wchar_cases[i]));
 422     std::string ascii = WideToASCII(wchar_cases[i]);
 423     EXPECT_EQ(char_cases[i], ascii);
 424   }
 425
 426   EXPECT_FALSE(IsStringASCII("Google \x80Video"));
 427   EXPECT_FALSE(IsStringASCII(L"Google \x80Video"));
 428
 429   // Convert empty strings.
 430   std::wstring wempty;
 431   std::string empty;
 432   EXPECT_EQ(empty, WideToASCII(wempty));
 433   EXPECT_EQ(wempty, ASCIIToWide(empty));
 434
 435   // Convert strings with an embedded NUL character.
 436   const char chars_with_nul[] = "test\0string";
 437   const int length_with_nul = arraysize(chars_with_nul) - 1;
 438   std::string string_with_nul(chars_with_nul, length_with_nul);
 439   std::wstring wide_with_nul = ASCIIToWide(string_with_nul);
 440   EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),
 441             wide_with_nul.length());
 442   std::string narrow_with_nul = WideToASCII(wide_with_nul);
 443   EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
 444             narrow_with_nul.length());
 445   EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
 446 }
 447
 448 TEST(StringUtilTest, ToUpperASCII) {
 449   EXPECT_EQ('C', ToUpperASCII('C'));
 450   EXPECT_EQ('C', ToUpperASCII('c'));
 451   EXPECT_EQ('2', ToUpperASCII('2'));
 452
 453   EXPECT_EQ(L'C', ToUpperASCII(L'C'));
 454   EXPECT_EQ(L'C', ToUpperASCII(L'c'));
 455   EXPECT_EQ(L'2', ToUpperASCII(L'2'));
 456
 457   std::string in_place_a("Cc2");
 458   StringToUpperASCII(&in_place_a);
 459   EXPECT_EQ("CC2", in_place_a);
 460
 461   std::wstring in_place_w(L"Cc2");
 462   StringToUpperASCII(&in_place_w);
 463   EXPECT_EQ(L"CC2", in_place_w);
 464
 465   std::string original_a("Cc2");
 466   std::string upper_a = StringToUpperASCII(original_a);
 467   EXPECT_EQ("CC2", upper_a);
 468
 469   std::wstring original_w(L"Cc2");
 470   std::wstring upper_w = StringToUpperASCII(original_w);
 471   EXPECT_EQ(L"CC2", upper_w);
 472 }
 473
 474 static const struct {
 475   const wchar_t* src_w;
 476   const char*    src_a;
 477   const char*    dst;
 478 } lowercase_cases[] = {
 479   {L"FoO", "FoO", "foo"},
 480   {L"foo", "foo", "foo"},
 481   {L"FOO", "FOO", "foo"},
 482 };
 483
 484 TEST(StringUtilTest, LowerCaseEqualsASCII) {
 485   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) {
 486     EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w,
 487                                      lowercase_cases[i].dst));
 488     EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
 489                                      lowercase_cases[i].dst));
 490   }
 491 }
 492
 493 TEST(StringUtilTest, GetByteDisplayUnits) {
 494   static const struct {
 495     int64 bytes;
 496     DataUnits expected;
 497   } cases[] = {
 498     {0, DATA_UNITS_BYTE},
 499     {512, DATA_UNITS_BYTE},
 500     {10*1024, DATA_UNITS_KIBIBYTE},
 501     {10*1024*1024, DATA_UNITS_MEBIBYTE},
 502     {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE},
 503     {~(1LL<<63), DATA_UNITS_GIBIBYTE},
 504 #ifdef NDEBUG
 505     {-1, DATA_UNITS_BYTE},
 506 #endif
 507   };
 508
 509   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
 510     EXPECT_EQ(cases[i].expected, GetByteDisplayUnits(cases[i].bytes));
 511 }
 512
 513 TEST(StringUtilTest, FormatBytes) {
 514   static const struct {
 515     int64 bytes;
 516     DataUnits units;
 517     const char* expected;
 518     const char* expected_with_units;
 519   } cases[] = {
 520     // Expected behavior: we show one post-decimal digit when we have
 521     // under two pre-decimal digits, except in cases where it makes no
 522     // sense (zero or bytes).
 523     // Since we switch units once we cross the 1000 mark, this keeps
 524     // the display of file sizes or bytes consistently around three
 525     // digits.
 526     {0, DATA_UNITS_BYTE, "0", "0 B"},
 527     {512, DATA_UNITS_BYTE, "512", "512 B"},
 528     {512, DATA_UNITS_KIBIBYTE, "0.5", "0.5 kB"},
 529     {1024*1024, DATA_UNITS_KIBIBYTE, "1024", "1024 kB"},
 530     {1024*1024, DATA_UNITS_MEBIBYTE, "1.0", "1.0 MB"},
 531     {1024*1024*1024, DATA_UNITS_GIBIBYTE, "1.0", "1.0 GB"},
 532     {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "10.0", "10.0 GB"},
 533     {99LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "99.0", "99.0 GB"},
 534     {105LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "105", "105 GB"},
 535     {105LL*1024*1024*1024 + 500LL*1024*1024, DATA_UNITS_GIBIBYTE,
 536      "105", "105 GB"},
 537     {~(1LL<<63), DATA_UNITS_GIBIBYTE, "8589934592", "8589934592 GB"},
 538
 539     {99*1024 + 103, DATA_UNITS_KIBIBYTE, "99.1", "99.1 kB"},
 540     {1024*1024 + 103, DATA_UNITS_KIBIBYTE, "1024", "1024 kB"},
 541     {1024*1024 + 205 * 1024, DATA_UNITS_MEBIBYTE, "1.2", "1.2 MB"},
 542     {1024*1024*1024 + (927 * 1024*1024), DATA_UNITS_GIBIBYTE,
 543      "1.9", "1.9 GB"},
 544     {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "10.0", "10.0 GB"},
 545     {100LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "100", "100 GB"},
 546 #ifdef NDEBUG
 547     {-1, DATA_UNITS_BYTE, "", ""},
 548 #endif
 549   };
 550
 551   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
 552     EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
 553               FormatBytes(cases[i].bytes, cases[i].units, false));
 554     EXPECT_EQ(ASCIIToUTF16(cases[i].expected_with_units),
 555               FormatBytes(cases[i].bytes, cases[i].units, true));
 556   }
 557 }
 558
 559 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
 560   static const struct {
 561     const char* str;
 562     string16::size_type start_offset;
 563     const char* find_this;
 564     const char* replace_with;
 565     const char* expected;
 566   } cases[] = {
 567     {"aaa", 0, "a", "b", "bbb"},
 568     {"abb", 0, "ab", "a", "ab"},
 569     {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
 570     {"Not found", 0, "x", "0", "Not found"},
 571     {"Not found again", 5, "x", "0", "Not found again"},
 572     {" Making it much longer ", 0, " ", "Four score and seven years ago",
 573      "Four score and seven years agoMakingFour score and seven years agoit"
 574      "Four score and seven years agomuchFour score and seven years agolonger"
 575      "Four score and seven years ago"},
 576     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
 577     {"Replace me only me once", 9, "me ", "", "Replace me only once"},
 578     {"abababab", 2, "ab", "c", "abccc"},
 579   };
 580
 581   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
 582     string16 str = ASCIIToUTF16(cases[i].str);
 583     ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
 584                                  ASCIIToUTF16(cases[i].find_this),
 585                                  ASCIIToUTF16(cases[i].replace_with));
 586     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
 587   }
 588 }
 589
 590 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
 591   static const struct {
 592     const char* str;
 593     string16::size_type start_offset;
 594     const char* find_this;
 595     const char* replace_with;
 596     const char* expected;
 597   } cases[] = {
 598     {"aaa", 0, "a", "b", "baa"},
 599     {"abb", 0, "ab", "a", "ab"},
 600     {"Removing some substrings inging", 0, "ing", "",
 601       "Remov some substrings inging"},
 602     {"Not found", 0, "x", "0", "Not found"},
 603     {"Not found again", 5, "x", "0", "Not found again"},
 604     {" Making it much longer ", 0, " ", "Four score and seven years ago",
 605      "Four score and seven years agoMaking it much longer "},
 606     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
 607     {"Replace me only me once", 4, "me ", "", "Replace only me once"},
 608     {"abababab", 2, "ab", "c", "abcabab"},
 609   };
 610
 611   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
 612     string16 str = ASCIIToUTF16(cases[i].str);
 613     ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
 614                                      ASCIIToUTF16(cases[i].find_this),
 615                                      ASCIIToUTF16(cases[i].replace_with));
 616     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
 617   }
 618 }
 619
 620 TEST(StringUtilTest, HexDigitToInt) {
 621   EXPECT_EQ(0, HexDigitToInt('0'));
 622   EXPECT_EQ(1, HexDigitToInt('1'));
 623   EXPECT_EQ(2, HexDigitToInt('2'));
 624   EXPECT_EQ(3, HexDigitToInt('3'));
 625   EXPECT_EQ(4, HexDigitToInt('4'));
 626   EXPECT_EQ(5, HexDigitToInt('5'));
 627   EXPECT_EQ(6, HexDigitToInt('6'));
 628   EXPECT_EQ(7, HexDigitToInt('7'));
 629   EXPECT_EQ(8, HexDigitToInt('8'));
 630   EXPECT_EQ(9, HexDigitToInt('9'));
 631   EXPECT_EQ(10, HexDigitToInt('A'));
 632   EXPECT_EQ(11, HexDigitToInt('B'));
 633   EXPECT_EQ(12, HexDigitToInt('C'));
 634   EXPECT_EQ(13, HexDigitToInt('D'));
 635   EXPECT_EQ(14, HexDigitToInt('E'));
 636   EXPECT_EQ(15, HexDigitToInt('F'));
 637
 638   // Verify the lower case as well.
 639   EXPECT_EQ(10, HexDigitToInt('a'));
 640   EXPECT_EQ(11, HexDigitToInt('b'));
 641   EXPECT_EQ(12, HexDigitToInt('c'));
 642   EXPECT_EQ(13, HexDigitToInt('d'));
 643   EXPECT_EQ(14, HexDigitToInt('e'));
 644   EXPECT_EQ(15, HexDigitToInt('f'));
 645 }
 646
 647 // This checks where we can use the assignment operator for a va_list. We need
 648 // a way to do this since Visual C doesn't support va_copy, but assignment on
 649 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this
 650 // capability.
 651 static void VariableArgsFunc(const char* format, ...) {
 652   va_list org;
 653   va_start(org, format);
 654
 655   va_list dup;
 656   GG_VA_COPY(dup, org);
 657   int i1 = va_arg(org, int);
 658   int j1 = va_arg(org, int);
 659   char* s1 = va_arg(org, char*);
 660   double d1 = va_arg(org, double);
 661   va_end(org);
 662
 663   int i2 = va_arg(dup, int);
 664   int j2 = va_arg(dup, int);
 665   char* s2 = va_arg(dup, char*);
 666   double d2 = va_arg(dup, double);
 667
 668   EXPECT_EQ(i1, i2);
 669   EXPECT_EQ(j1, j2);
 670   EXPECT_STREQ(s1, s2);
 671   EXPECT_EQ(d1, d2);
 672
 673   va_end(dup);
 674 }
 675
 676 TEST(StringUtilTest, VAList) {
 677   VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
 678 }
 679
 680 // Test for Tokenize
 681 template <typename STR>
 682 void TokenizeTest() {
 683   std::vector<STR> r;
 684   size_t size;
 685
 686   size = Tokenize(STR("This is a string"), STR(" "), &r);
 687   EXPECT_EQ(4U, size);
 688   ASSERT_EQ(4U, r.size());
 689   EXPECT_EQ(r[0], STR("This"));
 690   EXPECT_EQ(r[1], STR("is"));
 691   EXPECT_EQ(r[2], STR("a"));
 692   EXPECT_EQ(r[3], STR("string"));
 693   r.clear();
 694
 695   size = Tokenize(STR("one,two,three"), STR(","), &r);
 696   EXPECT_EQ(3U, size);
 697   ASSERT_EQ(3U, r.size());
 698   EXPECT_EQ(r[0], STR("one"));
 699   EXPECT_EQ(r[1], STR("two"));
 700   EXPECT_EQ(r[2], STR("three"));
 701   r.clear();
 702
 703   size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);
 704   EXPECT_EQ(3U, size);
 705   ASSERT_EQ(3U, r.size());
 706   EXPECT_EQ(r[0], STR("one"));
 707   EXPECT_EQ(r[1], STR("two"));
 708   EXPECT_EQ(r[2], STR("three;four"));
 709   r.clear();
 710
 711   size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);
 712   EXPECT_EQ(4U, size);
 713   ASSERT_EQ(4U, r.size());
 714   EXPECT_EQ(r[0], STR("one"));
 715   EXPECT_EQ(r[1], STR("two"));
 716   EXPECT_EQ(r[2], STR("three"));
 717   EXPECT_EQ(r[3], STR("four"));
 718   r.clear();
 719
 720   size = Tokenize(STR("one, two, three"), STR(","), &r);
 721   EXPECT_EQ(3U, size);
 722   ASSERT_EQ(3U, r.size());
 723   EXPECT_EQ(r[0], STR("one"));
 724   EXPECT_EQ(r[1], STR(" two"));
 725   EXPECT_EQ(r[2], STR(" three"));
 726   r.clear();
 727
 728   size = Tokenize(STR("one, two, three, "), STR(","), &r);
 729   EXPECT_EQ(4U, size);
 730   ASSERT_EQ(4U, r.size());
 731   EXPECT_EQ(r[0], STR("one"));
 732   EXPECT_EQ(r[1], STR(" two"));
 733   EXPECT_EQ(r[2], STR(" three"));
 734   EXPECT_EQ(r[3], STR(" "));
 735   r.clear();
 736
 737   size = Tokenize(STR("one, two, three,"), STR(","), &r);
 738   EXPECT_EQ(3U, size);
 739   ASSERT_EQ(3U, r.size());
 740   EXPECT_EQ(r[0], STR("one"));
 741   EXPECT_EQ(r[1], STR(" two"));
 742   EXPECT_EQ(r[2], STR(" three"));
 743   r.clear();
 744
 745   size = Tokenize(STR(""), STR(","), &r);
 746   EXPECT_EQ(0U, size);
 747   ASSERT_EQ(0U, r.size());
 748   r.clear();
 749
 750   size = Tokenize(STR(","), STR(","), &r);
 751   EXPECT_EQ(0U, size);
 752   ASSERT_EQ(0U, r.size());
 753   r.clear();
 754
 755   size = Tokenize(STR(",;:."), STR(".:;,"), &r);
 756   EXPECT_EQ(0U, size);
 757   ASSERT_EQ(0U, r.size());
 758   r.clear();
 759
 760   size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);
 761   EXPECT_EQ(1U, size);
 762   ASSERT_EQ(1U, r.size());
 763   EXPECT_EQ(r[0], STR("a"));
 764   r.clear();
 765
 766   size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);
 767   EXPECT_EQ(2U, size);
 768   ASSERT_EQ(2U, r.size());
 769   EXPECT_EQ(r[0], STR("\ta\t"));
 770   EXPECT_EQ(r[1], STR("b\tcc"));
 771   r.clear();
 772 }
 773
 774 TEST(StringUtilTest, TokenizeStdString) {
 775   TokenizeTest<std::string>();
 776 }
 777
 778 TEST(StringUtilTest, TokenizeStringPiece) {
 779   TokenizeTest<base::StringPiece>();
 780 }
 781
 782 // Test for JoinString
 783 TEST(StringUtilTest, JoinString) {
 784   std::vector<std::string> in;
 785   EXPECT_EQ("", JoinString(in, ','));
 786
 787   in.push_back("a");
 788   EXPECT_EQ("a", JoinString(in, ','));
 789
 790   in.push_back("b");
 791   in.push_back("c");
 792   EXPECT_EQ("a,b,c", JoinString(in, ','));
 793
 794   in.push_back("");
 795   EXPECT_EQ("a,b,c,", JoinString(in, ','));
 796   in.push_back(" ");
 797   EXPECT_EQ("a|b|c|| ", JoinString(in, '|'));
 798 }
 799
 800 TEST(StringUtilTest, StartsWith) {
 801   EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
 802   EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
 803   EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
 804   EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
 805   EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
 806   EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
 807   EXPECT_FALSE(StartsWithASCII("", "javascript", false));
 808   EXPECT_FALSE(StartsWithASCII("", "javascript", true));
 809   EXPECT_TRUE(StartsWithASCII("java", "", false));
 810   EXPECT_TRUE(StartsWithASCII("java", "", true));
 811
 812   EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", true));
 813   EXPECT_FALSE(StartsWith(L"JavaScript:url", L"javascript", true));
 814   EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", false));
 815   EXPECT_TRUE(StartsWith(L"JavaScript:url", L"javascript", false));
 816   EXPECT_FALSE(StartsWith(L"java", L"javascript", true));
 817   EXPECT_FALSE(StartsWith(L"java", L"javascript", false));
 818   EXPECT_FALSE(StartsWith(L"", L"javascript", false));
 819   EXPECT_FALSE(StartsWith(L"", L"javascript", true));
 820   EXPECT_TRUE(StartsWith(L"java", L"", false));
 821   EXPECT_TRUE(StartsWith(L"java", L"", true));
 822 }
 823
 824 TEST(StringUtilTest, EndsWith) {
 825   EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", true));
 826   EXPECT_FALSE(EndsWith(L"Foo.Plugin", L".plugin", true));
 827   EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", false));
 828   EXPECT_TRUE(EndsWith(L"Foo.Plugin", L".plugin", false));
 829   EXPECT_FALSE(EndsWith(L".plug", L".plugin", true));
 830   EXPECT_FALSE(EndsWith(L".plug", L".plugin", false));
 831   EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", true));
 832   EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", false));
 833   EXPECT_FALSE(EndsWith(L"", L".plugin", false));
 834   EXPECT_FALSE(EndsWith(L"", L".plugin", true));
 835   EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", false));
 836   EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", true));
 837   EXPECT_TRUE(EndsWith(L".plugin", L".plugin", false));
 838   EXPECT_TRUE(EndsWith(L".plugin", L".plugin", true));
 839   EXPECT_TRUE(EndsWith(L"", L"", false));
 840   EXPECT_TRUE(EndsWith(L"", L"", true));
 841 }
 842
 843 TEST(StringUtilTest, GetStringFWithOffsets) {
 844   std::vector<string16> subst;
 845   subst.push_back(ASCIIToUTF16("1"));
 846   subst.push_back(ASCIIToUTF16("2"));
 847   std::vector<size_t> offsets;
 848
 849   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
 850                             subst,
 851                             &offsets);
 852   EXPECT_EQ(2U, offsets.size());
 853   EXPECT_EQ(7U, offsets[0]);
 854   EXPECT_EQ(25U, offsets[1]);
 855   offsets.clear();
 856
 857   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
 858                             subst,
 859                             &offsets);
 860   EXPECT_EQ(2U, offsets.size());
 861   EXPECT_EQ(25U, offsets[0]);
 862   EXPECT_EQ(7U, offsets[1]);
 863   offsets.clear();
 864 }
 865
 866 TEST(StringUtilTest, ReplaceStringPlaceholders) {
 867   std::vector<string16> subst;
 868   subst.push_back(ASCIIToUTF16("9a"));
 869   subst.push_back(ASCIIToUTF16("8b"));
 870   subst.push_back(ASCIIToUTF16("7c"));
 871   subst.push_back(ASCIIToUTF16("6d"));
 872   subst.push_back(ASCIIToUTF16("5e"));
 873   subst.push_back(ASCIIToUTF16("4f"));
 874   subst.push_back(ASCIIToUTF16("3g"));
 875   subst.push_back(ASCIIToUTF16("2h"));
 876   subst.push_back(ASCIIToUTF16("1i"));
 877
 878   string16 formatted =
 879       ReplaceStringPlaceholders(
 880           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
 881
 882   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
 883 }
 884
 885 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
 886   // Test whether replacestringplaceholders works as expected when there
 887   // are fewer inputs than outputs.
 888   std::vector<string16> subst;
 889   subst.push_back(ASCIIToUTF16("9a"));
 890   subst.push_back(ASCIIToUTF16("8b"));
 891   subst.push_back(ASCIIToUTF16("7c"));
 892
 893   string16 formatted =
 894       ReplaceStringPlaceholders(
 895           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
 896
 897   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
 898 }
 899
 900 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
 901   std::vector<std::string> subst;
 902   subst.push_back("9a");
 903   subst.push_back("8b");
 904   subst.push_back("7c");
 905   subst.push_back("6d");
 906   subst.push_back("5e");
 907   subst.push_back("4f");
 908   subst.push_back("3g");
 909   subst.push_back("2h");
 910   subst.push_back("1i");
 911
 912   std::string formatted =
 913       ReplaceStringPlaceholders(
 914           "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
 915
 916   EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
 917 }
 918
 919 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
 920   std::vector<std::string> subst;
 921   subst.push_back("a");
 922   subst.push_back("b");
 923   subst.push_back("c");
 924   EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),
 925             "$1 $$2 $$$3");
 926 }
 927
 928 TEST(StringUtilTest, MatchPatternTest) {
 929   EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
 930   EXPECT_TRUE(MatchPattern("www.google.com", "*"));
 931   EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
 932   EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
 933   EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
 934   EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
 935   EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
 936   EXPECT_FALSE(MatchPattern("", "*.*"));
 937   EXPECT_TRUE(MatchPattern("", "*"));
 938   EXPECT_TRUE(MatchPattern("", "?"));
 939   EXPECT_TRUE(MatchPattern("", ""));
 940   EXPECT_FALSE(MatchPattern("Hello", ""));
 941   EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
 942   // Stop after a certain recursion depth.
 943   EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
 944
 945   // Test UTF8 matching.
 946   EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
 947   EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
 948   EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
 949   // Invalid sequences should be handled as a single invalid character.
 950   EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
 951   // If the pattern has invalid characters, it shouldn't match anything.
 952   EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
 953
 954   // Test UTF16 character matching.
 955   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
 956                            UTF8ToUTF16("*.com")));
 957   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
 958                            UTF8ToUTF16("He??o\\*1*")));
 959
 960   // This test verifies that consecutive wild cards are collapsed into 1
 961   // wildcard (when this doesn't occur, MatchPattern reaches it's maximum
 962   // recursion depth).
 963   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
 964                            UTF8ToUTF16("He********************************o")));
 965 }
 966
 967 TEST(StringUtilTest, LcpyTest) {
 968   // Test the normal case where we fit in our buffer.
 969   {
 970     char dst[10];
 971     wchar_t wdst[10];
 972     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
 973     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
 974     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
 975     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
 976   }
 977
 978   // Test dst_size == 0, nothing should be written to |dst| and we should
 979   // have the equivalent of strlen(src).
 980   {
 981     char dst[2] = {1, 2};
 982     wchar_t wdst[2] = {1, 2};
 983     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0));
 984     EXPECT_EQ(1, dst[0]);
 985     EXPECT_EQ(2, dst[1]);
 986     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0));
 987 #if defined(WCHAR_T_IS_UNSIGNED)
 988     EXPECT_EQ(1U, wdst[0]);
 989     EXPECT_EQ(2U, wdst[1]);
 990 #else
 991     EXPECT_EQ(1, wdst[0]);
 992     EXPECT_EQ(2, wdst[1]);
 993 #endif
 994   }
 995
 996   // Test the case were we _just_ competely fit including the null.
 997   {
 998     char dst[8];
 999     wchar_t wdst[8];
1000     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1001     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1002     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1003     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1004   }
1005
1006   // Test the case were we we are one smaller, so we can't fit the null.
1007   {
1008     char dst[7];
1009     wchar_t wdst[7];
1010     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1011     EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
1012     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1013     EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
1014   }
1015
1016   // Test the case were we are just too small.
1017   {
1018     char dst[3];
1019     wchar_t wdst[3];
1020     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1021     EXPECT_EQ(0, memcmp(dst, "ab", 3));
1022     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1023     EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
1024   }
1025 }
1026
1027 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
1028   struct TestData {
1029     const wchar_t* input;
1030     bool portable;
1031   } cases[] = {
1032     { L"%ls", true },
1033     { L"%s", false },
1034     { L"%S", false },
1035     { L"%lS", false },
1036     { L"Hello, %s", false },
1037     { L"%lc", true },
1038     { L"%c", false },
1039     { L"%C", false },
1040     { L"%lC", false },
1041     { L"%ls %s", false },
1042     { L"%s %ls", false },
1043     { L"%s %ls %s", false },
1044     { L"%f", true },
1045     { L"%f %F", false },
1046     { L"%d %D", false },
1047     { L"%o %O", false },
1048     { L"%u %U", false },
1049     { L"%f %d %o %u", true },
1050     { L"%-8d (%02.1f%)", true },
1051     { L"% 10s", false },
1052     { L"% 10ls", true }
1053   };
1054   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
1055     EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input));
1056   }
1057 }
1058
1059 TEST(StringUtilTest, RemoveChars) {
1060   const char* kRemoveChars = "-/+*";
1061   std::string input = "A-+bc/d!*";
1062   EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
1063   EXPECT_EQ("Abcd!", input);
1064
1065   // No characters match kRemoveChars.
1066   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1067   EXPECT_EQ("Abcd!", input);
1068
1069   // Empty string.
1070   input.clear();
1071   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1072   EXPECT_EQ(std::string(), input);
1073 }
1074
1075 TEST(StringUtilTest, ContainsOnlyChars) {
1076   // Providing an empty list of characters should return false but for the empty
1077   // string.
1078   EXPECT_TRUE(ContainsOnlyChars("", ""));
1079   EXPECT_FALSE(ContainsOnlyChars("Hello", ""));
1080
1081   EXPECT_TRUE(ContainsOnlyChars("", "1234"));
1082   EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1083   EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1084   EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1085   EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1086 }
1087
1088 }  // namespace base