base/string_util_unittest.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include <math.h>
   6 #include <stdarg.h>
   7
   8 #include <limits>
   9 #include <sstream>
  10
  11 #include "base/basictypes.h"
  12 #include "base/string16.h"
  13 #include "base/string_util.h"
  14 #include "base/utf_string_conversions.h"
  15 #include "testing/gmock/include/gmock/gmock.h"
  16 #include "testing/gtest/include/gtest/gtest.h"
  17
  18 using ::testing::ElementsAre;
  19
  20 namespace base {
  21
  22 static const struct trim_case {
  23   const wchar_t* input;
  24   const TrimPositions positions;
  25   const wchar_t* output;
  26   const TrimPositions return_value;
  27 } trim_cases[] = {
  28   {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
  29   {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
  30   {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
  31   {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
  32   {L"", TRIM_ALL, L"", TRIM_NONE},
  33   {L"  ", TRIM_LEADING, L"", TRIM_LEADING},
  34   {L"  ", TRIM_TRAILING, L"", TRIM_TRAILING},
  35   {L"  ", TRIM_ALL, L"", TRIM_ALL},
  36   {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
  37   {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
  38 };
  39
  40 static const struct trim_case_ascii {
  41   const char* input;
  42   const TrimPositions positions;
  43   const char* output;
  44   const TrimPositions return_value;
  45 } trim_cases_ascii[] = {
  46   {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
  47   {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
  48   {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
  49   {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
  50   {"", TRIM_ALL, "", TRIM_NONE},
  51   {"  ", TRIM_LEADING, "", TRIM_LEADING},
  52   {"  ", TRIM_TRAILING, "", TRIM_TRAILING},
  53   {"  ", TRIM_ALL, "", TRIM_ALL},
  54   {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
  55 };
  56
  57 namespace {
  58
  59 // Helper used to test TruncateUTF8ToByteSize.
  60 bool Truncated(const std::string& input, const size_t byte_size,
  61                std::string* output) {
  62     size_t prev = input.length();
  63     TruncateUTF8ToByteSize(input, byte_size, output);
  64     return prev != output->length();
  65 }
  66
  67 }  // namespace
  68
  69 TEST(StringUtilTest, TruncateUTF8ToByteSize) {
  70   std::string output;
  71
  72   // Empty strings and invalid byte_size arguments
  73   EXPECT_FALSE(Truncated(std::string(), 0, &output));
  74   EXPECT_EQ(output, "");
  75   EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
  76   EXPECT_EQ(output, "");
  77   EXPECT_FALSE(Truncated("\xe1\x80\xbf", -1, &output));
  78   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
  79
  80   // Testing the truncation of valid UTF8 correctly
  81   EXPECT_TRUE(Truncated("abc", 2, &output));
  82   EXPECT_EQ(output, "ab");
  83   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
  84   EXPECT_EQ(output.compare("\xc2\x81"), 0);
  85   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
  86   EXPECT_EQ(output.compare("\xc2\x81"), 0);
  87   EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
  88   EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
  89
  90   {
  91     const char array[] = "\x00\x00\xc2\x81\xc2\x81";
  92     const std::string array_string(array, arraysize(array));
  93     EXPECT_TRUE(Truncated(array_string, 4, &output));
  94     EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
  95   }
  96
  97   {
  98     const char array[] = "\x00\xc2\x81\xc2\x81";
  99     const std::string array_string(array, arraysize(array));
 100     EXPECT_TRUE(Truncated(array_string, 4, &output));
 101     EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
 102   }
 103
 104   // Testing invalid UTF8
 105   EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
 106   EXPECT_EQ(output.compare(""), 0);
 107   EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
 108   EXPECT_EQ(output.compare(""), 0);
 109   EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
 110   EXPECT_EQ(output.compare(""), 0);
 111
 112   // Testing invalid UTF8 mixed with valid UTF8
 113   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
 114   EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
 115   EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
 116   EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
 117   EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
 118               10, &output));
 119   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
 120   EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
 121               10, &output));
 122   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
 123   EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
 124   EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
 125
 126   // Overlong sequences
 127   EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
 128   EXPECT_EQ(output.compare(""), 0);
 129   EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
 130   EXPECT_EQ(output.compare(""), 0);
 131   EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
 132   EXPECT_EQ(output.compare(""), 0);
 133   EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
 134   EXPECT_EQ(output.compare(""), 0);
 135   EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
 136   EXPECT_EQ(output.compare(""), 0);
 137   EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
 138   EXPECT_EQ(output.compare(""), 0);
 139   EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
 140   EXPECT_EQ(output.compare(""), 0);
 141   EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
 142   EXPECT_EQ(output.compare(""), 0);
 143   EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
 144   EXPECT_EQ(output.compare(""), 0);
 145   EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
 146   EXPECT_EQ(output.compare(""), 0);
 147   EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
 148   EXPECT_EQ(output.compare(""), 0);
 149
 150   // Beyond U+10FFFF (the upper limit of Unicode codespace)
 151   EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
 152   EXPECT_EQ(output.compare(""), 0);
 153   EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
 154   EXPECT_EQ(output.compare(""), 0);
 155   EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
 156   EXPECT_EQ(output.compare(""), 0);
 157
 158   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
 159   EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
 160   EXPECT_EQ(output.compare(""), 0);
 161   EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
 162   EXPECT_EQ(output.compare(""), 0);
 163
 164   {
 165     const char array[] = "\x00\x00\xfe\xff";
 166     const std::string array_string(array, arraysize(array));
 167     EXPECT_TRUE(Truncated(array_string, 4, &output));
 168     EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
 169   }
 170
 171   // Variants on the previous test
 172   {
 173     const char array[] = "\xff\xfe\x00\x00";
 174     const std::string array_string(array, 4);
 175     EXPECT_FALSE(Truncated(array_string, 4, &output));
 176     EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
 177   }
 178   {
 179     const char array[] = "\xff\x00\x00\xfe";
 180     const std::string array_string(array, arraysize(array));
 181     EXPECT_TRUE(Truncated(array_string, 4, &output));
 182     EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
 183   }
 184
 185   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
 186   EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
 187   EXPECT_EQ(output.compare(""), 0);
 188   EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
 189   EXPECT_EQ(output.compare(""), 0);
 190   EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
 191   EXPECT_EQ(output.compare(""), 0);
 192   EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
 193   EXPECT_EQ(output.compare(""), 0);
 194   EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
 195   EXPECT_EQ(output.compare(""), 0);
 196
 197   // Strings in legacy encodings that are valid in UTF-8, but
 198   // are invalid as UTF-8 in real data.
 199   EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
 200   EXPECT_EQ(output.compare("caf"), 0);
 201   EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
 202   EXPECT_EQ(output.compare(""), 0);
 203   EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
 204   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
 205   EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
 206               &output));
 207   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
 208
 209   // Testing using the same string as input and output.
 210   EXPECT_FALSE(Truncated(output, 4, &output));
 211   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
 212   EXPECT_TRUE(Truncated(output, 3, &output));
 213   EXPECT_EQ(output.compare("\xa7\x41"), 0);
 214
 215   // "abc" with U+201[CD] in windows-125[0-8]
 216   EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
 217   EXPECT_EQ(output.compare("\x93" "abc"), 0);
 218
 219   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
 220   EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
 221   EXPECT_EQ(output.compare(""), 0);
 222
 223   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
 224   EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
 225   EXPECT_EQ(output.compare(""), 0);
 226 }
 227
 228 TEST(StringUtilTest, TrimWhitespace) {
 229   string16 output;  // Allow contents to carry over to next testcase
 230   for (size_t i = 0; i < arraysize(trim_cases); ++i) {
 231     const trim_case& value = trim_cases[i];
 232     EXPECT_EQ(value.return_value,
 233               TrimWhitespace(WideToUTF16(value.input), value.positions,
 234                              &output));
 235     EXPECT_EQ(WideToUTF16(value.output), output);
 236   }
 237
 238   // Test that TrimWhitespace() can take the same string for input and output
 239   output = ASCIIToUTF16("  This is a test \r\n");
 240   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
 241   EXPECT_EQ(ASCIIToUTF16("This is a test"), output);
 242
 243   // Once more, but with a string of whitespace
 244   output = ASCIIToUTF16("  \r\n");
 245   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
 246   EXPECT_EQ(string16(), output);
 247
 248   std::string output_ascii;
 249   for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
 250     const trim_case_ascii& value = trim_cases_ascii[i];
 251     EXPECT_EQ(value.return_value,
 252               TrimWhitespace(value.input, value.positions, &output_ascii));
 253     EXPECT_EQ(value.output, output_ascii);
 254   }
 255 }
 256
 257 static const struct collapse_case {
 258   const wchar_t* input;
 259   const bool trim;
 260   const wchar_t* output;
 261 } collapse_cases[] = {
 262   {L" Google Video ", false, L"Google Video"},
 263   {L"Google Video", false, L"Google Video"},
 264   {L"", false, L""},
 265   {L"  ", false, L""},
 266   {L"\t\rTest String\n", false, L"Test String"},
 267   {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
 268   {L"    Test     \n  \t String    ", false, L"Test String"},
 269   {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
 270   {L"   Test String", false, L"Test String"},
 271   {L"Test String    ", false, L"Test String"},
 272   {L"Test String", false, L"Test String"},
 273   {L"", true, L""},
 274   {L"\n", true, L""},
 275   {L"  \r  ", true, L""},
 276   {L"\nFoo", true, L"Foo"},
 277   {L"\r  Foo  ", true, L"Foo"},
 278   {L" Foo bar ", true, L"Foo bar"},
 279   {L"  \tFoo  bar  \n", true, L"Foo bar"},
 280   {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
 281 };
 282
 283 TEST(StringUtilTest, CollapseWhitespace) {
 284   for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
 285     const collapse_case& value = collapse_cases[i];
 286     EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim));
 287   }
 288 }
 289
 290 static const struct collapse_case_ascii {
 291   const char* input;
 292   const bool trim;
 293   const char* output;
 294 } collapse_cases_ascii[] = {
 295   {" Google Video ", false, "Google Video"},
 296   {"Google Video", false, "Google Video"},
 297   {"", false, ""},
 298   {"  ", false, ""},
 299   {"\t\rTest String\n", false, "Test String"},
 300   {"    Test     \n  \t String    ", false, "Test String"},
 301   {"   Test String", false, "Test String"},
 302   {"Test String    ", false, "Test String"},
 303   {"Test String", false, "Test String"},
 304   {"", true, ""},
 305   {"\n", true, ""},
 306   {"  \r  ", true, ""},
 307   {"\nFoo", true, "Foo"},
 308   {"\r  Foo  ", true, "Foo"},
 309   {" Foo bar ", true, "Foo bar"},
 310   {"  \tFoo  bar  \n", true, "Foo bar"},
 311   {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
 312 };
 313
 314 TEST(StringUtilTest, CollapseWhitespaceASCII) {
 315   for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
 316     const collapse_case_ascii& value = collapse_cases_ascii[i];
 317     EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
 318   }
 319 }
 320
 321 TEST(StringUtilTest, ContainsOnlyWhitespaceASCII) {
 322   EXPECT_TRUE(ContainsOnlyWhitespaceASCII(std::string()));
 323   EXPECT_TRUE(ContainsOnlyWhitespaceASCII(" "));
 324   EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t"));
 325   EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t \r \n  "));
 326   EXPECT_FALSE(ContainsOnlyWhitespaceASCII("a"));
 327   EXPECT_FALSE(ContainsOnlyWhitespaceASCII("\thello\r \n  "));
 328 }
 329
 330 TEST(StringUtilTest, ContainsOnlyWhitespace) {
 331   EXPECT_TRUE(ContainsOnlyWhitespace(string16()));
 332   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(" ")));
 333   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t")));
 334   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t \r \n  ")));
 335   EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("a")));
 336   EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("\thello\r \n  ")));
 337 }
 338
 339 TEST(StringUtilTest, IsStringUTF8) {
 340   EXPECT_TRUE(IsStringUTF8("abc"));
 341   EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
 342   EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
 343   EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
 344   EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
 345   EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc"));  // UTF-8 BOM
 346
 347   // surrogate code points
 348   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
 349   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
 350   EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
 351
 352   // overlong sequences
 353   EXPECT_FALSE(IsStringUTF8("\xc0\x80"));  // U+0000
 354   EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81"));  // "AB"
 355   EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80"));  // U+0000
 356   EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80"));  // U+0080
 357   EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf"));  // U+07ff
 358   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D"));  // U+000D
 359   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91"));  // U+0091
 360   EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80"));  // U+0800
 361   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf"));  // U+FEFF (BOM)
 362   EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf"));  // U+003F
 363   EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5"));  // U+00A5
 364
 365   // Beyond U+10FFFF (the upper limit of Unicode codespace)
 366   EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80"));  // U+110000
 367   EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf"));  // 5 bytes
 368   EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80"));  // 6 bytes
 369
 370   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
 371   EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
 372   EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
 373   EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
 374   EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
 375
 376   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
 377   EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe"));  // U+FFFE)
 378   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe"));  // U+1FFFE
 379   EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf"));  // U+10FFFF
 380   EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90"));  // U+FDD0
 381   EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf"));  // U+FDEF
 382   // Strings in legacy encodings. We can certainly make up strings
 383   // in a legacy encoding that are valid in UTF-8, but in real data,
 384   // most of them are invalid as UTF-8.
 385   EXPECT_FALSE(IsStringUTF8("caf\xe9"));  // cafe with U+00E9 in ISO-8859-1
 386   EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2"));  // U+AC00, U+AC001 in EUC-KR
 387   EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e"));  // U+4F60 U+597D in Big5
 388   // "abc" with U+201[CD] in windows-125[0-8]
 389   EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
 390   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
 391   EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
 392   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
 393   EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
 394
 395   // Check that we support Embedded Nulls. The first uses the canonical UTF-8
 396   // representation, and the second uses a 2-byte sequence. The second version
 397   // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
 398   // given codepoint must be used.
 399   static const char kEmbeddedNull[] = "embedded\0null";
 400   EXPECT_TRUE(IsStringUTF8(
 401       std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
 402   EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
 403 }
 404
 405 TEST(StringUtilTest, ConvertASCII) {
 406   static const char* char_cases[] = {
 407     "Google Video",
 408     "Hello, world\n",
 409     "0123ABCDwxyz \a\b\t\r\n!+,.~"
 410   };
 411
 412   static const wchar_t* const wchar_cases[] = {
 413     L"Google Video",
 414     L"Hello, world\n",
 415     L"0123ABCDwxyz \a\b\t\r\n!+,.~"
 416   };
 417
 418   for (size_t i = 0; i < arraysize(char_cases); ++i) {
 419     EXPECT_TRUE(IsStringASCII(char_cases[i]));
 420     std::wstring wide = ASCIIToWide(char_cases[i]);
 421     EXPECT_EQ(wchar_cases[i], wide);
 422
 423     EXPECT_TRUE(IsStringASCII(wchar_cases[i]));
 424     std::string ascii = WideToASCII(wchar_cases[i]);
 425     EXPECT_EQ(char_cases[i], ascii);
 426   }
 427
 428   EXPECT_FALSE(IsStringASCII("Google \x80Video"));
 429   EXPECT_FALSE(IsStringASCII(L"Google \x80Video"));
 430
 431   // Convert empty strings.
 432   std::wstring wempty;
 433   std::string empty;
 434   EXPECT_EQ(empty, WideToASCII(wempty));
 435   EXPECT_EQ(wempty, ASCIIToWide(empty));
 436
 437   // Convert strings with an embedded NUL character.
 438   const char chars_with_nul[] = "test\0string";
 439   const int length_with_nul = arraysize(chars_with_nul) - 1;
 440   std::string string_with_nul(chars_with_nul, length_with_nul);
 441   std::wstring wide_with_nul = ASCIIToWide(string_with_nul);
 442   EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),
 443             wide_with_nul.length());
 444   std::string narrow_with_nul = WideToASCII(wide_with_nul);
 445   EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
 446             narrow_with_nul.length());
 447   EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
 448 }
 449
 450 TEST(StringUtilTest, ToUpperASCII) {
 451   EXPECT_EQ('C', ToUpperASCII('C'));
 452   EXPECT_EQ('C', ToUpperASCII('c'));
 453   EXPECT_EQ('2', ToUpperASCII('2'));
 454
 455   EXPECT_EQ(L'C', ToUpperASCII(L'C'));
 456   EXPECT_EQ(L'C', ToUpperASCII(L'c'));
 457   EXPECT_EQ(L'2', ToUpperASCII(L'2'));
 458
 459   std::string in_place_a("Cc2");
 460   StringToUpperASCII(&in_place_a);
 461   EXPECT_EQ("CC2", in_place_a);
 462
 463   std::wstring in_place_w(L"Cc2");
 464   StringToUpperASCII(&in_place_w);
 465   EXPECT_EQ(L"CC2", in_place_w);
 466
 467   std::string original_a("Cc2");
 468   std::string upper_a = StringToUpperASCII(original_a);
 469   EXPECT_EQ("CC2", upper_a);
 470
 471   std::wstring original_w(L"Cc2");
 472   std::wstring upper_w = StringToUpperASCII(original_w);
 473   EXPECT_EQ(L"CC2", upper_w);
 474 }
 475
 476 TEST(StringUtilTest, LowerCaseEqualsASCII) {
 477   static const struct {
 478     const wchar_t* src_w;
 479     const char*    src_a;
 480     const char*    dst;
 481   } lowercase_cases[] = {
 482     { L"FoO", "FoO", "foo" },
 483     { L"foo", "foo", "foo" },
 484     { L"FOO", "FOO", "foo" },
 485   };
 486
 487   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) {
 488     EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w,
 489                                      lowercase_cases[i].dst));
 490     EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
 491                                      lowercase_cases[i].dst));
 492   }
 493 }
 494
 495 TEST(StringUtilTest, FormatBytesUnlocalized) {
 496   static const struct {
 497     int64 bytes;
 498     const char* expected;
 499   } cases[] = {
 500     // Expected behavior: we show one post-decimal digit when we have
 501     // under two pre-decimal digits, except in cases where it makes no
 502     // sense (zero or bytes).
 503     // Since we switch units once we cross the 1000 mark, this keeps
 504     // the display of file sizes or bytes consistently around three
 505     // digits.
 506     {0, "0 B"},
 507     {512, "512 B"},
 508     {1024*1024, "1.0 MB"},
 509     {1024*1024*1024, "1.0 GB"},
 510     {10LL*1024*1024*1024, "10.0 GB"},
 511     {99LL*1024*1024*1024, "99.0 GB"},
 512     {105LL*1024*1024*1024, "105 GB"},
 513     {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
 514     {~(1LL<<63), "8192 PB"},
 515
 516     {99*1024 + 103, "99.1 kB"},
 517     {1024*1024 + 103, "1.0 MB"},
 518     {1024*1024 + 205 * 1024, "1.2 MB"},
 519     {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
 520     {10LL*1024*1024*1024, "10.0 GB"},
 521     {100LL*1024*1024*1024, "100 GB"},
 522   };
 523
 524   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
 525     EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
 526               FormatBytesUnlocalized(cases[i].bytes));
 527   }
 528 }
 529 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
 530   static const struct {
 531     const char* str;
 532     string16::size_type start_offset;
 533     const char* find_this;
 534     const char* replace_with;
 535     const char* expected;
 536   } cases[] = {
 537     {"aaa", 0, "a", "b", "bbb"},
 538     {"abb", 0, "ab", "a", "ab"},
 539     {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
 540     {"Not found", 0, "x", "0", "Not found"},
 541     {"Not found again", 5, "x", "0", "Not found again"},
 542     {" Making it much longer ", 0, " ", "Four score and seven years ago",
 543      "Four score and seven years agoMakingFour score and seven years agoit"
 544      "Four score and seven years agomuchFour score and seven years agolonger"
 545      "Four score and seven years ago"},
 546     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
 547     {"Replace me only me once", 9, "me ", "", "Replace me only once"},
 548     {"abababab", 2, "ab", "c", "abccc"},
 549   };
 550
 551   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
 552     string16 str = ASCIIToUTF16(cases[i].str);
 553     ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
 554                                  ASCIIToUTF16(cases[i].find_this),
 555                                  ASCIIToUTF16(cases[i].replace_with));
 556     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
 557   }
 558 }
 559
 560 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
 561   static const struct {
 562     const char* str;
 563     string16::size_type start_offset;
 564     const char* find_this;
 565     const char* replace_with;
 566     const char* expected;
 567   } cases[] = {
 568     {"aaa", 0, "a", "b", "baa"},
 569     {"abb", 0, "ab", "a", "ab"},
 570     {"Removing some substrings inging", 0, "ing", "",
 571       "Remov some substrings inging"},
 572     {"Not found", 0, "x", "0", "Not found"},
 573     {"Not found again", 5, "x", "0", "Not found again"},
 574     {" Making it much longer ", 0, " ", "Four score and seven years ago",
 575      "Four score and seven years agoMaking it much longer "},
 576     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
 577     {"Replace me only me once", 4, "me ", "", "Replace only me once"},
 578     {"abababab", 2, "ab", "c", "abcabab"},
 579   };
 580
 581   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
 582     string16 str = ASCIIToUTF16(cases[i].str);
 583     ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
 584                                      ASCIIToUTF16(cases[i].find_this),
 585                                      ASCIIToUTF16(cases[i].replace_with));
 586     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
 587   }
 588 }
 589
 590 TEST(StringUtilTest, HexDigitToInt) {
 591   EXPECT_EQ(0, HexDigitToInt('0'));
 592   EXPECT_EQ(1, HexDigitToInt('1'));
 593   EXPECT_EQ(2, HexDigitToInt('2'));
 594   EXPECT_EQ(3, HexDigitToInt('3'));
 595   EXPECT_EQ(4, HexDigitToInt('4'));
 596   EXPECT_EQ(5, HexDigitToInt('5'));
 597   EXPECT_EQ(6, HexDigitToInt('6'));
 598   EXPECT_EQ(7, HexDigitToInt('7'));
 599   EXPECT_EQ(8, HexDigitToInt('8'));
 600   EXPECT_EQ(9, HexDigitToInt('9'));
 601   EXPECT_EQ(10, HexDigitToInt('A'));
 602   EXPECT_EQ(11, HexDigitToInt('B'));
 603   EXPECT_EQ(12, HexDigitToInt('C'));
 604   EXPECT_EQ(13, HexDigitToInt('D'));
 605   EXPECT_EQ(14, HexDigitToInt('E'));
 606   EXPECT_EQ(15, HexDigitToInt('F'));
 607
 608   // Verify the lower case as well.
 609   EXPECT_EQ(10, HexDigitToInt('a'));
 610   EXPECT_EQ(11, HexDigitToInt('b'));
 611   EXPECT_EQ(12, HexDigitToInt('c'));
 612   EXPECT_EQ(13, HexDigitToInt('d'));
 613   EXPECT_EQ(14, HexDigitToInt('e'));
 614   EXPECT_EQ(15, HexDigitToInt('f'));
 615 }
 616
 617 // This checks where we can use the assignment operator for a va_list. We need
 618 // a way to do this since Visual C doesn't support va_copy, but assignment on
 619 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this
 620 // capability.
 621 static void VariableArgsFunc(const char* format, ...) {
 622   va_list org;
 623   va_start(org, format);
 624
 625   va_list dup;
 626   GG_VA_COPY(dup, org);
 627   int i1 = va_arg(org, int);
 628   int j1 = va_arg(org, int);
 629   char* s1 = va_arg(org, char*);
 630   double d1 = va_arg(org, double);
 631   va_end(org);
 632
 633   int i2 = va_arg(dup, int);
 634   int j2 = va_arg(dup, int);
 635   char* s2 = va_arg(dup, char*);
 636   double d2 = va_arg(dup, double);
 637
 638   EXPECT_EQ(i1, i2);
 639   EXPECT_EQ(j1, j2);
 640   EXPECT_STREQ(s1, s2);
 641   EXPECT_EQ(d1, d2);
 642
 643   va_end(dup);
 644 }
 645
 646 TEST(StringUtilTest, VAList) {
 647   VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
 648 }
 649
 650 // Test for Tokenize
 651 template <typename STR>
 652 void TokenizeTest() {
 653   std::vector<STR> r;
 654   size_t size;
 655
 656   size = Tokenize(STR("This is a string"), STR(" "), &r);
 657   EXPECT_EQ(4U, size);
 658   ASSERT_EQ(4U, r.size());
 659   EXPECT_EQ(r[0], STR("This"));
 660   EXPECT_EQ(r[1], STR("is"));
 661   EXPECT_EQ(r[2], STR("a"));
 662   EXPECT_EQ(r[3], STR("string"));
 663   r.clear();
 664
 665   size = Tokenize(STR("one,two,three"), STR(","), &r);
 666   EXPECT_EQ(3U, size);
 667   ASSERT_EQ(3U, r.size());
 668   EXPECT_EQ(r[0], STR("one"));
 669   EXPECT_EQ(r[1], STR("two"));
 670   EXPECT_EQ(r[2], STR("three"));
 671   r.clear();
 672
 673   size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);
 674   EXPECT_EQ(3U, size);
 675   ASSERT_EQ(3U, r.size());
 676   EXPECT_EQ(r[0], STR("one"));
 677   EXPECT_EQ(r[1], STR("two"));
 678   EXPECT_EQ(r[2], STR("three;four"));
 679   r.clear();
 680
 681   size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);
 682   EXPECT_EQ(4U, size);
 683   ASSERT_EQ(4U, r.size());
 684   EXPECT_EQ(r[0], STR("one"));
 685   EXPECT_EQ(r[1], STR("two"));
 686   EXPECT_EQ(r[2], STR("three"));
 687   EXPECT_EQ(r[3], STR("four"));
 688   r.clear();
 689
 690   size = Tokenize(STR("one, two, three"), STR(","), &r);
 691   EXPECT_EQ(3U, size);
 692   ASSERT_EQ(3U, r.size());
 693   EXPECT_EQ(r[0], STR("one"));
 694   EXPECT_EQ(r[1], STR(" two"));
 695   EXPECT_EQ(r[2], STR(" three"));
 696   r.clear();
 697
 698   size = Tokenize(STR("one, two, three, "), STR(","), &r);
 699   EXPECT_EQ(4U, size);
 700   ASSERT_EQ(4U, r.size());
 701   EXPECT_EQ(r[0], STR("one"));
 702   EXPECT_EQ(r[1], STR(" two"));
 703   EXPECT_EQ(r[2], STR(" three"));
 704   EXPECT_EQ(r[3], STR(" "));
 705   r.clear();
 706
 707   size = Tokenize(STR("one, two, three,"), STR(","), &r);
 708   EXPECT_EQ(3U, size);
 709   ASSERT_EQ(3U, r.size());
 710   EXPECT_EQ(r[0], STR("one"));
 711   EXPECT_EQ(r[1], STR(" two"));
 712   EXPECT_EQ(r[2], STR(" three"));
 713   r.clear();
 714
 715   size = Tokenize(STR(), STR(","), &r);
 716   EXPECT_EQ(0U, size);
 717   ASSERT_EQ(0U, r.size());
 718   r.clear();
 719
 720   size = Tokenize(STR(","), STR(","), &r);
 721   EXPECT_EQ(0U, size);
 722   ASSERT_EQ(0U, r.size());
 723   r.clear();
 724
 725   size = Tokenize(STR(",;:."), STR(".:;,"), &r);
 726   EXPECT_EQ(0U, size);
 727   ASSERT_EQ(0U, r.size());
 728   r.clear();
 729
 730   size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);
 731   EXPECT_EQ(1U, size);
 732   ASSERT_EQ(1U, r.size());
 733   EXPECT_EQ(r[0], STR("a"));
 734   r.clear();
 735
 736   size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);
 737   EXPECT_EQ(2U, size);
 738   ASSERT_EQ(2U, r.size());
 739   EXPECT_EQ(r[0], STR("\ta\t"));
 740   EXPECT_EQ(r[1], STR("b\tcc"));
 741   r.clear();
 742 }
 743
 744 TEST(StringUtilTest, TokenizeStdString) {
 745   TokenizeTest<std::string>();
 746 }
 747
 748 TEST(StringUtilTest, TokenizeStringPiece) {
 749   TokenizeTest<base::StringPiece>();
 750 }
 751
 752 // Test for JoinString
 753 TEST(StringUtilTest, JoinString) {
 754   std::vector<std::string> in;
 755   EXPECT_EQ("", JoinString(in, ','));
 756
 757   in.push_back("a");
 758   EXPECT_EQ("a", JoinString(in, ','));
 759
 760   in.push_back("b");
 761   in.push_back("c");
 762   EXPECT_EQ("a,b,c", JoinString(in, ','));
 763
 764   in.push_back(std::string());
 765   EXPECT_EQ("a,b,c,", JoinString(in, ','));
 766   in.push_back(" ");
 767   EXPECT_EQ("a|b|c|| ", JoinString(in, '|'));
 768 }
 769
 770 // Test for JoinString overloaded with std::string separator
 771 TEST(StringUtilTest, JoinStringWithString) {
 772   std::string separator(", ");
 773   std::vector<std::string> parts;
 774   EXPECT_EQ(std::string(), JoinString(parts, separator));
 775
 776   parts.push_back("a");
 777   EXPECT_EQ("a", JoinString(parts, separator));
 778
 779   parts.push_back("b");
 780   parts.push_back("c");
 781   EXPECT_EQ("a, b, c", JoinString(parts, separator));
 782
 783   parts.push_back(std::string());
 784   EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
 785   parts.push_back(" ");
 786   EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
 787 }
 788
 789 // Test for JoinString overloaded with string16 separator
 790 TEST(StringUtilTest, JoinStringWithString16) {
 791   string16 separator = ASCIIToUTF16(", ");
 792   std::vector<string16> parts;
 793   EXPECT_EQ(string16(), JoinString(parts, separator));
 794
 795   parts.push_back(ASCIIToUTF16("a"));
 796   EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
 797
 798   parts.push_back(ASCIIToUTF16("b"));
 799   parts.push_back(ASCIIToUTF16("c"));
 800   EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
 801
 802   parts.push_back(ASCIIToUTF16(""));
 803   EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
 804   parts.push_back(ASCIIToUTF16(" "));
 805   EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
 806 }
 807
 808 TEST(StringUtilTest, StartsWith) {
 809   EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
 810   EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
 811   EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
 812   EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
 813   EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
 814   EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
 815   EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", false));
 816   EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", true));
 817   EXPECT_TRUE(StartsWithASCII("java", std::string(), false));
 818   EXPECT_TRUE(StartsWithASCII("java", std::string(), true));
 819
 820   EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", true));
 821   EXPECT_FALSE(StartsWith(L"JavaScript:url", L"javascript", true));
 822   EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", false));
 823   EXPECT_TRUE(StartsWith(L"JavaScript:url", L"javascript", false));
 824   EXPECT_FALSE(StartsWith(L"java", L"javascript", true));
 825   EXPECT_FALSE(StartsWith(L"java", L"javascript", false));
 826   EXPECT_FALSE(StartsWith(std::wstring(), L"javascript", false));
 827   EXPECT_FALSE(StartsWith(std::wstring(), L"javascript", true));
 828   EXPECT_TRUE(StartsWith(L"java", std::wstring(), false));
 829   EXPECT_TRUE(StartsWith(L"java", std::wstring(), true));
 830 }
 831
 832 TEST(StringUtilTest, EndsWith) {
 833   EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", true));
 834   EXPECT_FALSE(EndsWith(L"Foo.Plugin", L".plugin", true));
 835   EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", false));
 836   EXPECT_TRUE(EndsWith(L"Foo.Plugin", L".plugin", false));
 837   EXPECT_FALSE(EndsWith(L".plug", L".plugin", true));
 838   EXPECT_FALSE(EndsWith(L".plug", L".plugin", false));
 839   EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", true));
 840   EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", false));
 841   EXPECT_FALSE(EndsWith(std::wstring(), L".plugin", false));
 842   EXPECT_FALSE(EndsWith(std::wstring(), L".plugin", true));
 843   EXPECT_TRUE(EndsWith(L"Foo.plugin", std::wstring(), false));
 844   EXPECT_TRUE(EndsWith(L"Foo.plugin", std::wstring(), true));
 845   EXPECT_TRUE(EndsWith(L".plugin", L".plugin", false));
 846   EXPECT_TRUE(EndsWith(L".plugin", L".plugin", true));
 847   EXPECT_TRUE(EndsWith(std::wstring(), std::wstring(), false));
 848   EXPECT_TRUE(EndsWith(std::wstring(), std::wstring(), true));
 849 }
 850
 851 TEST(StringUtilTest, GetStringFWithOffsets) {
 852   std::vector<string16> subst;
 853   subst.push_back(ASCIIToUTF16("1"));
 854   subst.push_back(ASCIIToUTF16("2"));
 855   std::vector<size_t> offsets;
 856
 857   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
 858                             subst,
 859                             &offsets);
 860   EXPECT_EQ(2U, offsets.size());
 861   EXPECT_EQ(7U, offsets[0]);
 862   EXPECT_EQ(25U, offsets[1]);
 863   offsets.clear();
 864
 865   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
 866                             subst,
 867                             &offsets);
 868   EXPECT_EQ(2U, offsets.size());
 869   EXPECT_EQ(25U, offsets[0]);
 870   EXPECT_EQ(7U, offsets[1]);
 871   offsets.clear();
 872 }
 873
 874 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
 875   // Test whether replacestringplaceholders works as expected when there
 876   // are fewer inputs than outputs.
 877   std::vector<string16> subst;
 878   subst.push_back(ASCIIToUTF16("9a"));
 879   subst.push_back(ASCIIToUTF16("8b"));
 880   subst.push_back(ASCIIToUTF16("7c"));
 881
 882   string16 formatted =
 883       ReplaceStringPlaceholders(
 884           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
 885
 886   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
 887 }
 888
 889 TEST(StringUtilTest, ReplaceStringPlaceholders) {
 890   std::vector<string16> subst;
 891   subst.push_back(ASCIIToUTF16("9a"));
 892   subst.push_back(ASCIIToUTF16("8b"));
 893   subst.push_back(ASCIIToUTF16("7c"));
 894   subst.push_back(ASCIIToUTF16("6d"));
 895   subst.push_back(ASCIIToUTF16("5e"));
 896   subst.push_back(ASCIIToUTF16("4f"));
 897   subst.push_back(ASCIIToUTF16("3g"));
 898   subst.push_back(ASCIIToUTF16("2h"));
 899   subst.push_back(ASCIIToUTF16("1i"));
 900
 901   string16 formatted =
 902       ReplaceStringPlaceholders(
 903           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
 904
 905   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
 906 }
 907
 908 TEST(StringUtilTest, ReplaceStringPlaceholdersMoreThan9Replacements) {
 909   std::vector<string16> subst;
 910   subst.push_back(ASCIIToUTF16("9a"));
 911   subst.push_back(ASCIIToUTF16("8b"));
 912   subst.push_back(ASCIIToUTF16("7c"));
 913   subst.push_back(ASCIIToUTF16("6d"));
 914   subst.push_back(ASCIIToUTF16("5e"));
 915   subst.push_back(ASCIIToUTF16("4f"));
 916   subst.push_back(ASCIIToUTF16("3g"));
 917   subst.push_back(ASCIIToUTF16("2h"));
 918   subst.push_back(ASCIIToUTF16("1i"));
 919   subst.push_back(ASCIIToUTF16("0j"));
 920   subst.push_back(ASCIIToUTF16("-1k"));
 921   subst.push_back(ASCIIToUTF16("-2l"));
 922   subst.push_back(ASCIIToUTF16("-3m"));
 923   subst.push_back(ASCIIToUTF16("-4n"));
 924
 925   string16 formatted =
 926       ReplaceStringPlaceholders(
 927           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i,"
 928                        "$10j,$11k,$12l,$13m,$14n,$1"), subst, NULL);
 929
 930   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,"
 931                                     "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a"));
 932 }
 933
 934 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
 935   std::vector<std::string> subst;
 936   subst.push_back("9a");
 937   subst.push_back("8b");
 938   subst.push_back("7c");
 939   subst.push_back("6d");
 940   subst.push_back("5e");
 941   subst.push_back("4f");
 942   subst.push_back("3g");
 943   subst.push_back("2h");
 944   subst.push_back("1i");
 945
 946   std::string formatted =
 947       ReplaceStringPlaceholders(
 948           "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
 949
 950   EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
 951 }
 952
 953 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
 954   std::vector<std::string> subst;
 955   subst.push_back("a");
 956   subst.push_back("b");
 957   subst.push_back("c");
 958   EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),
 959             "$1 $$2 $$$3");
 960 }
 961
 962 TEST(StringUtilTest, MatchPatternTest) {
 963   EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
 964   EXPECT_TRUE(MatchPattern("www.google.com", "*"));
 965   EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
 966   EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
 967   EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
 968   EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
 969   EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
 970   EXPECT_FALSE(MatchPattern("", "*.*"));
 971   EXPECT_TRUE(MatchPattern("", "*"));
 972   EXPECT_TRUE(MatchPattern("", "?"));
 973   EXPECT_TRUE(MatchPattern("", ""));
 974   EXPECT_FALSE(MatchPattern("Hello", ""));
 975   EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
 976   // Stop after a certain recursion depth.
 977   EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
 978
 979   // Test UTF8 matching.
 980   EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
 981   EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
 982   EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
 983   // Invalid sequences should be handled as a single invalid character.
 984   EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
 985   // If the pattern has invalid characters, it shouldn't match anything.
 986   EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
 987
 988   // Test UTF16 character matching.
 989   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
 990                            UTF8ToUTF16("*.com")));
 991   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
 992                            UTF8ToUTF16("He??o\\*1*")));
 993
 994   // This test verifies that consecutive wild cards are collapsed into 1
 995   // wildcard (when this doesn't occur, MatchPattern reaches it's maximum
 996   // recursion depth).
 997   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
 998                            UTF8ToUTF16("He********************************o")));
 999 }
1000
1001 TEST(StringUtilTest, LcpyTest) {
1002   // Test the normal case where we fit in our buffer.
1003   {
1004     char dst[10];
1005     wchar_t wdst[10];
1006     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1007     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1008     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1009     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1010   }
1011
1012   // Test dst_size == 0, nothing should be written to |dst| and we should
1013   // have the equivalent of strlen(src).
1014   {
1015     char dst[2] = {1, 2};
1016     wchar_t wdst[2] = {1, 2};
1017     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0));
1018     EXPECT_EQ(1, dst[0]);
1019     EXPECT_EQ(2, dst[1]);
1020     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0));
1021 #if defined(WCHAR_T_IS_UNSIGNED)
1022     EXPECT_EQ(1U, wdst[0]);
1023     EXPECT_EQ(2U, wdst[1]);
1024 #else
1025     EXPECT_EQ(1, wdst[0]);
1026     EXPECT_EQ(2, wdst[1]);
1027 #endif
1028   }
1029
1030   // Test the case were we _just_ competely fit including the null.
1031   {
1032     char dst[8];
1033     wchar_t wdst[8];
1034     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1035     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1036     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1037     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1038   }
1039
1040   // Test the case were we we are one smaller, so we can't fit the null.
1041   {
1042     char dst[7];
1043     wchar_t wdst[7];
1044     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1045     EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
1046     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1047     EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
1048   }
1049
1050   // Test the case were we are just too small.
1051   {
1052     char dst[3];
1053     wchar_t wdst[3];
1054     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1055     EXPECT_EQ(0, memcmp(dst, "ab", 3));
1056     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1057     EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
1058   }
1059 }
1060
1061 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
1062   static const struct {
1063     const wchar_t* input;
1064     bool portable;
1065   } cases[] = {
1066     { L"%ls", true },
1067     { L"%s", false },
1068     { L"%S", false },
1069     { L"%lS", false },
1070     { L"Hello, %s", false },
1071     { L"%lc", true },
1072     { L"%c", false },
1073     { L"%C", false },
1074     { L"%lC", false },
1075     { L"%ls %s", false },
1076     { L"%s %ls", false },
1077     { L"%s %ls %s", false },
1078     { L"%f", true },
1079     { L"%f %F", false },
1080     { L"%d %D", false },
1081     { L"%o %O", false },
1082     { L"%u %U", false },
1083     { L"%f %d %o %u", true },
1084     { L"%-8d (%02.1f%)", true },
1085     { L"% 10s", false },
1086     { L"% 10ls", true }
1087   };
1088   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
1089     EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input));
1090 }
1091
1092 TEST(StringUtilTest, RemoveChars) {
1093   const char* kRemoveChars = "-/+*";
1094   std::string input = "A-+bc/d!*";
1095   EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
1096   EXPECT_EQ("Abcd!", input);
1097
1098   // No characters match kRemoveChars.
1099   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1100   EXPECT_EQ("Abcd!", input);
1101
1102   // Empty string.
1103   input.clear();
1104   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1105   EXPECT_EQ(std::string(), input);
1106 }
1107
1108 TEST(StringUtilTest, ReplaceChars) {
1109   struct TestData {
1110     const char* input;
1111     const char* replace_chars;
1112     const char* replace_with;
1113     const char* output;
1114     bool result;
1115   } cases[] = {
1116     { "", "", "", "", false },
1117     { "test", "", "", "test", false },
1118     { "test", "", "!", "test", false },
1119     { "test", "z", "!", "test", false },
1120     { "test", "e", "!", "t!st", true },
1121     { "test", "e", "!?", "t!?st", true },
1122     { "test", "ez", "!", "t!st", true },
1123     { "test", "zed", "!?", "t!?st", true },
1124     { "test", "t", "!?", "!?es!?", true },
1125     { "test", "et", "!>", "!>!>s!>", true },
1126     { "test", "zest", "!", "!!!!", true },
1127     { "test", "szt", "!", "!e!!", true },
1128     { "test", "t", "test", "testestest", true },
1129   };
1130
1131   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
1132     std::string output;
1133     bool result = ReplaceChars(cases[i].input,
1134                                cases[i].replace_chars,
1135                                cases[i].replace_with,
1136                                &output);
1137     EXPECT_EQ(cases[i].result, result);
1138     EXPECT_EQ(cases[i].output, output);
1139   }
1140 }
1141
1142 TEST(StringUtilTest, ContainsOnlyChars) {
1143   // Providing an empty list of characters should return false but for the empty
1144   // string.
1145   EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
1146   EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
1147
1148   EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
1149   EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1150   EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1151   EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1152   EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1153 }
1154
1155 class WriteIntoTest : public testing::Test {
1156  protected:
1157   static void WritesCorrectly(size_t num_chars) {
1158     std::string buffer;
1159     char kOriginal[] = "supercali";
1160     strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
1161     // Using std::string(buffer.c_str()) instead of |buffer| truncates the
1162     // string at the first \0.
1163     EXPECT_EQ(std::string(kOriginal,
1164                           std::min(num_chars, arraysize(kOriginal) - 1)),
1165               std::string(buffer.c_str()));
1166     EXPECT_EQ(num_chars, buffer.size());
1167   }
1168 };
1169
1170 TEST_F(WriteIntoTest, WriteInto) {
1171   // Validate that WriteInto reserves enough space and
1172   // sizes a string correctly.
1173   WritesCorrectly(1);
1174   WritesCorrectly(2);
1175   WritesCorrectly(5000);
1176
1177   // Validate that WriteInto doesn't modify other strings
1178   // when using a Copy-on-Write implementation.
1179   const char kLive[] = "live";
1180   const char kDead[] = "dead";
1181   const std::string live = kLive;
1182   std::string dead = live;
1183   strncpy(WriteInto(&dead, 5), kDead, 4);
1184   EXPECT_EQ(kDead, dead);
1185   EXPECT_EQ(4u, dead.size());
1186   EXPECT_EQ(kLive, live);
1187   EXPECT_EQ(4u, live.size());
1188 }
1189
1190 }  // namespace base