base/strings/string_util_unittest.cc

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "base/strings/string_util.h"
   6
   7 #include <math.h>
   8 #include <stdarg.h>
   9
  10 #include <limits>
  11 #include <sstream>
  12
  13 #include "base/basictypes.h"
  14 #include "base/strings/string16.h"
  15 #include "base/strings/utf_string_conversions.h"
  16 #include "testing/gmock/include/gmock/gmock.h"
  17 #include "testing/gtest/include/gtest/gtest.h"
  18
  19 using ::testing::ElementsAre;
  20
  21 namespace base {
  22
  23 static const struct trim_case {
  24   const wchar_t* input;
  25   const TrimPositions positions;
  26   const wchar_t* output;
  27   const TrimPositions return_value;
  28 } trim_cases[] = {
  29   {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
  30   {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
  31   {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
  32   {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
  33   {L"", TRIM_ALL, L"", TRIM_NONE},
  34   {L"  ", TRIM_LEADING, L"", TRIM_LEADING},
  35   {L"  ", TRIM_TRAILING, L"", TRIM_TRAILING},
  36   {L"  ", TRIM_ALL, L"", TRIM_ALL},
  37   {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
  38   {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
  39 };
  40
  41 static const struct trim_case_ascii {
  42   const char* input;
  43   const TrimPositions positions;
  44   const char* output;
  45   const TrimPositions return_value;
  46 } trim_cases_ascii[] = {
  47   {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
  48   {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
  49   {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
  50   {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
  51   {"", TRIM_ALL, "", TRIM_NONE},
  52   {"  ", TRIM_LEADING, "", TRIM_LEADING},
  53   {"  ", TRIM_TRAILING, "", TRIM_TRAILING},
  54   {"  ", TRIM_ALL, "", TRIM_ALL},
  55   {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
  56 };
  57
  58 namespace {
  59
  60 // Helper used to test TruncateUTF8ToByteSize.
  61 bool Truncated(const std::string& input, const size_t byte_size,
  62                std::string* output) {
  63     size_t prev = input.length();
  64     TruncateUTF8ToByteSize(input, byte_size, output);
  65     return prev != output->length();
  66 }
  67
  68 }  // namespace
  69
  70 TEST(StringUtilTest, TruncateUTF8ToByteSize) {
  71   std::string output;
  72
  73   // Empty strings and invalid byte_size arguments
  74   EXPECT_FALSE(Truncated(std::string(), 0, &output));
  75   EXPECT_EQ(output, "");
  76   EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
  77   EXPECT_EQ(output, "");
  78   EXPECT_FALSE(Truncated("\xe1\x80\xbf", -1, &output));
  79   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
  80
  81   // Testing the truncation of valid UTF8 correctly
  82   EXPECT_TRUE(Truncated("abc", 2, &output));
  83   EXPECT_EQ(output, "ab");
  84   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
  85   EXPECT_EQ(output.compare("\xc2\x81"), 0);
  86   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
  87   EXPECT_EQ(output.compare("\xc2\x81"), 0);
  88   EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
  89   EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
  90
  91   {
  92     const char array[] = "\x00\x00\xc2\x81\xc2\x81";
  93     const std::string array_string(array, arraysize(array));
  94     EXPECT_TRUE(Truncated(array_string, 4, &output));
  95     EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
  96   }
  97
  98   {
  99     const char array[] = "\x00\xc2\x81\xc2\x81";
 100     const std::string array_string(array, arraysize(array));
 101     EXPECT_TRUE(Truncated(array_string, 4, &output));
 102     EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
 103   }
 104
 105   // Testing invalid UTF8
 106   EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
 107   EXPECT_EQ(output.compare(""), 0);
 108   EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
 109   EXPECT_EQ(output.compare(""), 0);
 110   EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
 111   EXPECT_EQ(output.compare(""), 0);
 112
 113   // Testing invalid UTF8 mixed with valid UTF8
 114   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
 115   EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
 116   EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
 117   EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
 118   EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
 119               10, &output));
 120   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
 121   EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
 122               10, &output));
 123   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
 124   EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
 125   EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
 126
 127   // Overlong sequences
 128   EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
 129   EXPECT_EQ(output.compare(""), 0);
 130   EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
 131   EXPECT_EQ(output.compare(""), 0);
 132   EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
 133   EXPECT_EQ(output.compare(""), 0);
 134   EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
 135   EXPECT_EQ(output.compare(""), 0);
 136   EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
 137   EXPECT_EQ(output.compare(""), 0);
 138   EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
 139   EXPECT_EQ(output.compare(""), 0);
 140   EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
 141   EXPECT_EQ(output.compare(""), 0);
 142   EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
 143   EXPECT_EQ(output.compare(""), 0);
 144   EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
 145   EXPECT_EQ(output.compare(""), 0);
 146   EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
 147   EXPECT_EQ(output.compare(""), 0);
 148   EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
 149   EXPECT_EQ(output.compare(""), 0);
 150
 151   // Beyond U+10FFFF (the upper limit of Unicode codespace)
 152   EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
 153   EXPECT_EQ(output.compare(""), 0);
 154   EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
 155   EXPECT_EQ(output.compare(""), 0);
 156   EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
 157   EXPECT_EQ(output.compare(""), 0);
 158
 159   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
 160   EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
 161   EXPECT_EQ(output.compare(""), 0);
 162   EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
 163   EXPECT_EQ(output.compare(""), 0);
 164
 165   {
 166     const char array[] = "\x00\x00\xfe\xff";
 167     const std::string array_string(array, arraysize(array));
 168     EXPECT_TRUE(Truncated(array_string, 4, &output));
 169     EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
 170   }
 171
 172   // Variants on the previous test
 173   {
 174     const char array[] = "\xff\xfe\x00\x00";
 175     const std::string array_string(array, 4);
 176     EXPECT_FALSE(Truncated(array_string, 4, &output));
 177     EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
 178   }
 179   {
 180     const char array[] = "\xff\x00\x00\xfe";
 181     const std::string array_string(array, arraysize(array));
 182     EXPECT_TRUE(Truncated(array_string, 4, &output));
 183     EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
 184   }
 185
 186   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
 187   EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
 188   EXPECT_EQ(output.compare(""), 0);
 189   EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
 190   EXPECT_EQ(output.compare(""), 0);
 191   EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
 192   EXPECT_EQ(output.compare(""), 0);
 193   EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
 194   EXPECT_EQ(output.compare(""), 0);
 195   EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
 196   EXPECT_EQ(output.compare(""), 0);
 197
 198   // Strings in legacy encodings that are valid in UTF-8, but
 199   // are invalid as UTF-8 in real data.
 200   EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
 201   EXPECT_EQ(output.compare("caf"), 0);
 202   EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
 203   EXPECT_EQ(output.compare(""), 0);
 204   EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
 205   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
 206   EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
 207               &output));
 208   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
 209
 210   // Testing using the same string as input and output.
 211   EXPECT_FALSE(Truncated(output, 4, &output));
 212   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
 213   EXPECT_TRUE(Truncated(output, 3, &output));
 214   EXPECT_EQ(output.compare("\xa7\x41"), 0);
 215
 216   // "abc" with U+201[CD] in windows-125[0-8]
 217   EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
 218   EXPECT_EQ(output.compare("\x93" "abc"), 0);
 219
 220   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
 221   EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
 222   EXPECT_EQ(output.compare(""), 0);
 223
 224   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
 225   EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
 226   EXPECT_EQ(output.compare(""), 0);
 227 }
 228
 229 TEST(StringUtilTest, TrimWhitespace) {
 230   string16 output;  // Allow contents to carry over to next testcase
 231   for (size_t i = 0; i < arraysize(trim_cases); ++i) {
 232     const trim_case& value = trim_cases[i];
 233     EXPECT_EQ(value.return_value,
 234               TrimWhitespace(WideToUTF16(value.input), value.positions,
 235                              &output));
 236     EXPECT_EQ(WideToUTF16(value.output), output);
 237   }
 238
 239   // Test that TrimWhitespace() can take the same string for input and output
 240   output = ASCIIToUTF16("  This is a test \r\n");
 241   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
 242   EXPECT_EQ(ASCIIToUTF16("This is a test"), output);
 243
 244   // Once more, but with a string of whitespace
 245   output = ASCIIToUTF16("  \r\n");
 246   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
 247   EXPECT_EQ(string16(), output);
 248
 249   std::string output_ascii;
 250   for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
 251     const trim_case_ascii& value = trim_cases_ascii[i];
 252     EXPECT_EQ(value.return_value,
 253               TrimWhitespace(value.input, value.positions, &output_ascii));
 254     EXPECT_EQ(value.output, output_ascii);
 255   }
 256 }
 257
 258 static const struct collapse_case {
 259   const wchar_t* input;
 260   const bool trim;
 261   const wchar_t* output;
 262 } collapse_cases[] = {
 263   {L" Google Video ", false, L"Google Video"},
 264   {L"Google Video", false, L"Google Video"},
 265   {L"", false, L""},
 266   {L"  ", false, L""},
 267   {L"\t\rTest String\n", false, L"Test String"},
 268   {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
 269   {L"    Test     \n  \t String    ", false, L"Test String"},
 270   {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
 271   {L"   Test String", false, L"Test String"},
 272   {L"Test String    ", false, L"Test String"},
 273   {L"Test String", false, L"Test String"},
 274   {L"", true, L""},
 275   {L"\n", true, L""},
 276   {L"  \r  ", true, L""},
 277   {L"\nFoo", true, L"Foo"},
 278   {L"\r  Foo  ", true, L"Foo"},
 279   {L" Foo bar ", true, L"Foo bar"},
 280   {L"  \tFoo  bar  \n", true, L"Foo bar"},
 281   {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
 282 };
 283
 284 TEST(StringUtilTest, CollapseWhitespace) {
 285   for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
 286     const collapse_case& value = collapse_cases[i];
 287     EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim));
 288   }
 289 }
 290
 291 static const struct collapse_case_ascii {
 292   const char* input;
 293   const bool trim;
 294   const char* output;
 295 } collapse_cases_ascii[] = {
 296   {" Google Video ", false, "Google Video"},
 297   {"Google Video", false, "Google Video"},
 298   {"", false, ""},
 299   {"  ", false, ""},
 300   {"\t\rTest String\n", false, "Test String"},
 301   {"    Test     \n  \t String    ", false, "Test String"},
 302   {"   Test String", false, "Test String"},
 303   {"Test String    ", false, "Test String"},
 304   {"Test String", false, "Test String"},
 305   {"", true, ""},
 306   {"\n", true, ""},
 307   {"  \r  ", true, ""},
 308   {"\nFoo", true, "Foo"},
 309   {"\r  Foo  ", true, "Foo"},
 310   {" Foo bar ", true, "Foo bar"},
 311   {"  \tFoo  bar  \n", true, "Foo bar"},
 312   {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
 313 };
 314
 315 TEST(StringUtilTest, CollapseWhitespaceASCII) {
 316   for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
 317     const collapse_case_ascii& value = collapse_cases_ascii[i];
 318     EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
 319   }
 320 }
 321
 322 TEST(StringUtilTest, ContainsOnlyWhitespaceASCII) {
 323   EXPECT_TRUE(ContainsOnlyWhitespaceASCII(std::string()));
 324   EXPECT_TRUE(ContainsOnlyWhitespaceASCII(" "));
 325   EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t"));
 326   EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t \r \n  "));
 327   EXPECT_FALSE(ContainsOnlyWhitespaceASCII("a"));
 328   EXPECT_FALSE(ContainsOnlyWhitespaceASCII("\thello\r \n  "));
 329 }
 330
 331 TEST(StringUtilTest, ContainsOnlyWhitespace) {
 332   EXPECT_TRUE(ContainsOnlyWhitespace(string16()));
 333   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(" ")));
 334   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t")));
 335   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t \r \n  ")));
 336   EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("a")));
 337   EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("\thello\r \n  ")));
 338 }
 339
 340 TEST(StringUtilTest, IsStringUTF8) {
 341   EXPECT_TRUE(IsStringUTF8("abc"));
 342   EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
 343   EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
 344   EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
 345   EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
 346   EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc"));  // UTF-8 BOM
 347
 348   // surrogate code points
 349   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
 350   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
 351   EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
 352
 353   // overlong sequences
 354   EXPECT_FALSE(IsStringUTF8("\xc0\x80"));  // U+0000
 355   EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81"));  // "AB"
 356   EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80"));  // U+0000
 357   EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80"));  // U+0080
 358   EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf"));  // U+07ff
 359   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D"));  // U+000D
 360   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91"));  // U+0091
 361   EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80"));  // U+0800
 362   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf"));  // U+FEFF (BOM)
 363   EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf"));  // U+003F
 364   EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5"));  // U+00A5
 365
 366   // Beyond U+10FFFF (the upper limit of Unicode codespace)
 367   EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80"));  // U+110000
 368   EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf"));  // 5 bytes
 369   EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80"));  // 6 bytes
 370
 371   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
 372   EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
 373   EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
 374   EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
 375   EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
 376
 377   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
 378   EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe"));  // U+FFFE)
 379   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe"));  // U+1FFFE
 380   EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf"));  // U+10FFFF
 381   EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90"));  // U+FDD0
 382   EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf"));  // U+FDEF
 383   // Strings in legacy encodings. We can certainly make up strings
 384   // in a legacy encoding that are valid in UTF-8, but in real data,
 385   // most of them are invalid as UTF-8.
 386   EXPECT_FALSE(IsStringUTF8("caf\xe9"));  // cafe with U+00E9 in ISO-8859-1
 387   EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2"));  // U+AC00, U+AC001 in EUC-KR
 388   EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e"));  // U+4F60 U+597D in Big5
 389   // "abc" with U+201[CD] in windows-125[0-8]
 390   EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
 391   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
 392   EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
 393   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
 394   EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
 395
 396   // Check that we support Embedded Nulls. The first uses the canonical UTF-8
 397   // representation, and the second uses a 2-byte sequence. The second version
 398   // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
 399   // given codepoint must be used.
 400   static const char kEmbeddedNull[] = "embedded\0null";
 401   EXPECT_TRUE(IsStringUTF8(
 402       std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
 403   EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
 404 }
 405
 406 TEST(StringUtilTest, ConvertASCII) {
 407   static const char* char_cases[] = {
 408     "Google Video",
 409     "Hello, world\n",
 410     "0123ABCDwxyz \a\b\t\r\n!+,.~"
 411   };
 412
 413   static const wchar_t* const wchar_cases[] = {
 414     L"Google Video",
 415     L"Hello, world\n",
 416     L"0123ABCDwxyz \a\b\t\r\n!+,.~"
 417   };
 418
 419   for (size_t i = 0; i < arraysize(char_cases); ++i) {
 420     EXPECT_TRUE(IsStringASCII(char_cases[i]));
 421     std::wstring wide = ASCIIToWide(char_cases[i]);
 422     EXPECT_EQ(wchar_cases[i], wide);
 423
 424     EXPECT_TRUE(IsStringASCII(wchar_cases[i]));
 425     std::string ascii = WideToASCII(wchar_cases[i]);
 426     EXPECT_EQ(char_cases[i], ascii);
 427   }
 428
 429   EXPECT_FALSE(IsStringASCII("Google \x80Video"));
 430   EXPECT_FALSE(IsStringASCII(L"Google \x80Video"));
 431
 432   // Convert empty strings.
 433   std::wstring wempty;
 434   std::string empty;
 435   EXPECT_EQ(empty, WideToASCII(wempty));
 436   EXPECT_EQ(wempty, ASCIIToWide(empty));
 437
 438   // Convert strings with an embedded NUL character.
 439   const char chars_with_nul[] = "test\0string";
 440   const int length_with_nul = arraysize(chars_with_nul) - 1;
 441   std::string string_with_nul(chars_with_nul, length_with_nul);
 442   std::wstring wide_with_nul = ASCIIToWide(string_with_nul);
 443   EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),
 444             wide_with_nul.length());
 445   std::string narrow_with_nul = WideToASCII(wide_with_nul);
 446   EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
 447             narrow_with_nul.length());
 448   EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
 449 }
 450
 451 TEST(StringUtilTest, ToUpperASCII) {
 452   EXPECT_EQ('C', ToUpperASCII('C'));
 453   EXPECT_EQ('C', ToUpperASCII('c'));
 454   EXPECT_EQ('2', ToUpperASCII('2'));
 455
 456   EXPECT_EQ(L'C', ToUpperASCII(L'C'));
 457   EXPECT_EQ(L'C', ToUpperASCII(L'c'));
 458   EXPECT_EQ(L'2', ToUpperASCII(L'2'));
 459
 460   std::string in_place_a("Cc2");
 461   StringToUpperASCII(&in_place_a);
 462   EXPECT_EQ("CC2", in_place_a);
 463
 464   std::wstring in_place_w(L"Cc2");
 465   StringToUpperASCII(&in_place_w);
 466   EXPECT_EQ(L"CC2", in_place_w);
 467
 468   std::string original_a("Cc2");
 469   std::string upper_a = StringToUpperASCII(original_a);
 470   EXPECT_EQ("CC2", upper_a);
 471
 472   std::wstring original_w(L"Cc2");
 473   std::wstring upper_w = StringToUpperASCII(original_w);
 474   EXPECT_EQ(L"CC2", upper_w);
 475 }
 476
 477 TEST(StringUtilTest, LowerCaseEqualsASCII) {
 478   static const struct {
 479     const wchar_t* src_w;
 480     const char*    src_a;
 481     const char*    dst;
 482   } lowercase_cases[] = {
 483     { L"FoO", "FoO", "foo" },
 484     { L"foo", "foo", "foo" },
 485     { L"FOO", "FOO", "foo" },
 486   };
 487
 488   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) {
 489     EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w,
 490                                      lowercase_cases[i].dst));
 491     EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
 492                                      lowercase_cases[i].dst));
 493   }
 494 }
 495
 496 TEST(StringUtilTest, FormatBytesUnlocalized) {
 497   static const struct {
 498     int64 bytes;
 499     const char* expected;
 500   } cases[] = {
 501     // Expected behavior: we show one post-decimal digit when we have
 502     // under two pre-decimal digits, except in cases where it makes no
 503     // sense (zero or bytes).
 504     // Since we switch units once we cross the 1000 mark, this keeps
 505     // the display of file sizes or bytes consistently around three
 506     // digits.
 507     {0, "0 B"},
 508     {512, "512 B"},
 509     {1024*1024, "1.0 MB"},
 510     {1024*1024*1024, "1.0 GB"},
 511     {10LL*1024*1024*1024, "10.0 GB"},
 512     {99LL*1024*1024*1024, "99.0 GB"},
 513     {105LL*1024*1024*1024, "105 GB"},
 514     {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
 515     {~(1LL<<63), "8192 PB"},
 516
 517     {99*1024 + 103, "99.1 kB"},
 518     {1024*1024 + 103, "1.0 MB"},
 519     {1024*1024 + 205 * 1024, "1.2 MB"},
 520     {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
 521     {10LL*1024*1024*1024, "10.0 GB"},
 522     {100LL*1024*1024*1024, "100 GB"},
 523   };
 524
 525   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
 526     EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
 527               FormatBytesUnlocalized(cases[i].bytes));
 528   }
 529 }
 530 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
 531   static const struct {
 532     const char* str;
 533     string16::size_type start_offset;
 534     const char* find_this;
 535     const char* replace_with;
 536     const char* expected;
 537   } cases[] = {
 538     {"aaa", 0, "a", "b", "bbb"},
 539     {"abb", 0, "ab", "a", "ab"},
 540     {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
 541     {"Not found", 0, "x", "0", "Not found"},
 542     {"Not found again", 5, "x", "0", "Not found again"},
 543     {" Making it much longer ", 0, " ", "Four score and seven years ago",
 544      "Four score and seven years agoMakingFour score and seven years agoit"
 545      "Four score and seven years agomuchFour score and seven years agolonger"
 546      "Four score and seven years ago"},
 547     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
 548     {"Replace me only me once", 9, "me ", "", "Replace me only once"},
 549     {"abababab", 2, "ab", "c", "abccc"},
 550   };
 551
 552   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
 553     string16 str = ASCIIToUTF16(cases[i].str);
 554     ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
 555                                  ASCIIToUTF16(cases[i].find_this),
 556                                  ASCIIToUTF16(cases[i].replace_with));
 557     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
 558   }
 559 }
 560
 561 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
 562   static const struct {
 563     const char* str;
 564     string16::size_type start_offset;
 565     const char* find_this;
 566     const char* replace_with;
 567     const char* expected;
 568   } cases[] = {
 569     {"aaa", 0, "a", "b", "baa"},
 570     {"abb", 0, "ab", "a", "ab"},
 571     {"Removing some substrings inging", 0, "ing", "",
 572       "Remov some substrings inging"},
 573     {"Not found", 0, "x", "0", "Not found"},
 574     {"Not found again", 5, "x", "0", "Not found again"},
 575     {" Making it much longer ", 0, " ", "Four score and seven years ago",
 576      "Four score and seven years agoMaking it much longer "},
 577     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
 578     {"Replace me only me once", 4, "me ", "", "Replace only me once"},
 579     {"abababab", 2, "ab", "c", "abcabab"},
 580   };
 581
 582   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
 583     string16 str = ASCIIToUTF16(cases[i].str);
 584     ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
 585                                      ASCIIToUTF16(cases[i].find_this),
 586                                      ASCIIToUTF16(cases[i].replace_with));
 587     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
 588   }
 589 }
 590
 591 TEST(StringUtilTest, HexDigitToInt) {
 592   EXPECT_EQ(0, HexDigitToInt('0'));
 593   EXPECT_EQ(1, HexDigitToInt('1'));
 594   EXPECT_EQ(2, HexDigitToInt('2'));
 595   EXPECT_EQ(3, HexDigitToInt('3'));
 596   EXPECT_EQ(4, HexDigitToInt('4'));
 597   EXPECT_EQ(5, HexDigitToInt('5'));
 598   EXPECT_EQ(6, HexDigitToInt('6'));
 599   EXPECT_EQ(7, HexDigitToInt('7'));
 600   EXPECT_EQ(8, HexDigitToInt('8'));
 601   EXPECT_EQ(9, HexDigitToInt('9'));
 602   EXPECT_EQ(10, HexDigitToInt('A'));
 603   EXPECT_EQ(11, HexDigitToInt('B'));
 604   EXPECT_EQ(12, HexDigitToInt('C'));
 605   EXPECT_EQ(13, HexDigitToInt('D'));
 606   EXPECT_EQ(14, HexDigitToInt('E'));
 607   EXPECT_EQ(15, HexDigitToInt('F'));
 608
 609   // Verify the lower case as well.
 610   EXPECT_EQ(10, HexDigitToInt('a'));
 611   EXPECT_EQ(11, HexDigitToInt('b'));
 612   EXPECT_EQ(12, HexDigitToInt('c'));
 613   EXPECT_EQ(13, HexDigitToInt('d'));
 614   EXPECT_EQ(14, HexDigitToInt('e'));
 615   EXPECT_EQ(15, HexDigitToInt('f'));
 616 }
 617
 618 // This checks where we can use the assignment operator for a va_list. We need
 619 // a way to do this since Visual C doesn't support va_copy, but assignment on
 620 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this
 621 // capability.
 622 static void VariableArgsFunc(const char* format, ...) {
 623   va_list org;
 624   va_start(org, format);
 625
 626   va_list dup;
 627   GG_VA_COPY(dup, org);
 628   int i1 = va_arg(org, int);
 629   int j1 = va_arg(org, int);
 630   char* s1 = va_arg(org, char*);
 631   double d1 = va_arg(org, double);
 632   va_end(org);
 633
 634   int i2 = va_arg(dup, int);
 635   int j2 = va_arg(dup, int);
 636   char* s2 = va_arg(dup, char*);
 637   double d2 = va_arg(dup, double);
 638
 639   EXPECT_EQ(i1, i2);
 640   EXPECT_EQ(j1, j2);
 641   EXPECT_STREQ(s1, s2);
 642   EXPECT_EQ(d1, d2);
 643
 644   va_end(dup);
 645 }
 646
 647 TEST(StringUtilTest, VAList) {
 648   VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
 649 }
 650
 651 // Test for Tokenize
 652 template <typename STR>
 653 void TokenizeTest() {
 654   std::vector<STR> r;
 655   size_t size;
 656
 657   size = Tokenize(STR("This is a string"), STR(" "), &r);
 658   EXPECT_EQ(4U, size);
 659   ASSERT_EQ(4U, r.size());
 660   EXPECT_EQ(r[0], STR("This"));
 661   EXPECT_EQ(r[1], STR("is"));
 662   EXPECT_EQ(r[2], STR("a"));
 663   EXPECT_EQ(r[3], STR("string"));
 664   r.clear();
 665
 666   size = Tokenize(STR("one,two,three"), STR(","), &r);
 667   EXPECT_EQ(3U, size);
 668   ASSERT_EQ(3U, r.size());
 669   EXPECT_EQ(r[0], STR("one"));
 670   EXPECT_EQ(r[1], STR("two"));
 671   EXPECT_EQ(r[2], STR("three"));
 672   r.clear();
 673
 674   size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);
 675   EXPECT_EQ(3U, size);
 676   ASSERT_EQ(3U, r.size());
 677   EXPECT_EQ(r[0], STR("one"));
 678   EXPECT_EQ(r[1], STR("two"));
 679   EXPECT_EQ(r[2], STR("three;four"));
 680   r.clear();
 681
 682   size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);
 683   EXPECT_EQ(4U, size);
 684   ASSERT_EQ(4U, r.size());
 685   EXPECT_EQ(r[0], STR("one"));
 686   EXPECT_EQ(r[1], STR("two"));
 687   EXPECT_EQ(r[2], STR("three"));
 688   EXPECT_EQ(r[3], STR("four"));
 689   r.clear();
 690
 691   size = Tokenize(STR("one, two, three"), STR(","), &r);
 692   EXPECT_EQ(3U, size);
 693   ASSERT_EQ(3U, r.size());
 694   EXPECT_EQ(r[0], STR("one"));
 695   EXPECT_EQ(r[1], STR(" two"));
 696   EXPECT_EQ(r[2], STR(" three"));
 697   r.clear();
 698
 699   size = Tokenize(STR("one, two, three, "), STR(","), &r);
 700   EXPECT_EQ(4U, size);
 701   ASSERT_EQ(4U, r.size());
 702   EXPECT_EQ(r[0], STR("one"));
 703   EXPECT_EQ(r[1], STR(" two"));
 704   EXPECT_EQ(r[2], STR(" three"));
 705   EXPECT_EQ(r[3], STR(" "));
 706   r.clear();
 707
 708   size = Tokenize(STR("one, two, three,"), STR(","), &r);
 709   EXPECT_EQ(3U, size);
 710   ASSERT_EQ(3U, r.size());
 711   EXPECT_EQ(r[0], STR("one"));
 712   EXPECT_EQ(r[1], STR(" two"));
 713   EXPECT_EQ(r[2], STR(" three"));
 714   r.clear();
 715
 716   size = Tokenize(STR(), STR(","), &r);
 717   EXPECT_EQ(0U, size);
 718   ASSERT_EQ(0U, r.size());
 719   r.clear();
 720
 721   size = Tokenize(STR(","), STR(","), &r);
 722   EXPECT_EQ(0U, size);
 723   ASSERT_EQ(0U, r.size());
 724   r.clear();
 725
 726   size = Tokenize(STR(",;:."), STR(".:;,"), &r);
 727   EXPECT_EQ(0U, size);
 728   ASSERT_EQ(0U, r.size());
 729   r.clear();
 730
 731   size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);
 732   EXPECT_EQ(1U, size);
 733   ASSERT_EQ(1U, r.size());
 734   EXPECT_EQ(r[0], STR("a"));
 735   r.clear();
 736
 737   size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);
 738   EXPECT_EQ(2U, size);
 739   ASSERT_EQ(2U, r.size());
 740   EXPECT_EQ(r[0], STR("\ta\t"));
 741   EXPECT_EQ(r[1], STR("b\tcc"));
 742   r.clear();
 743 }
 744
 745 TEST(StringUtilTest, TokenizeStdString) {
 746   TokenizeTest<std::string>();
 747 }
 748
 749 TEST(StringUtilTest, TokenizeStringPiece) {
 750   TokenizeTest<base::StringPiece>();
 751 }
 752
 753 // Test for JoinString
 754 TEST(StringUtilTest, JoinString) {
 755   std::vector<std::string> in;
 756   EXPECT_EQ("", JoinString(in, ','));
 757
 758   in.push_back("a");
 759   EXPECT_EQ("a", JoinString(in, ','));
 760
 761   in.push_back("b");
 762   in.push_back("c");
 763   EXPECT_EQ("a,b,c", JoinString(in, ','));
 764
 765   in.push_back(std::string());
 766   EXPECT_EQ("a,b,c,", JoinString(in, ','));
 767   in.push_back(" ");
 768   EXPECT_EQ("a|b|c|| ", JoinString(in, '|'));
 769 }
 770
 771 // Test for JoinString overloaded with std::string separator
 772 TEST(StringUtilTest, JoinStringWithString) {
 773   std::string separator(", ");
 774   std::vector<std::string> parts;
 775   EXPECT_EQ(std::string(), JoinString(parts, separator));
 776
 777   parts.push_back("a");
 778   EXPECT_EQ("a", JoinString(parts, separator));
 779
 780   parts.push_back("b");
 781   parts.push_back("c");
 782   EXPECT_EQ("a, b, c", JoinString(parts, separator));
 783
 784   parts.push_back(std::string());
 785   EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
 786   parts.push_back(" ");
 787   EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
 788 }
 789
 790 // Test for JoinString overloaded with string16 separator
 791 TEST(StringUtilTest, JoinStringWithString16) {
 792   string16 separator = ASCIIToUTF16(", ");
 793   std::vector<string16> parts;
 794   EXPECT_EQ(string16(), JoinString(parts, separator));
 795
 796   parts.push_back(ASCIIToUTF16("a"));
 797   EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
 798
 799   parts.push_back(ASCIIToUTF16("b"));
 800   parts.push_back(ASCIIToUTF16("c"));
 801   EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
 802
 803   parts.push_back(ASCIIToUTF16(""));
 804   EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
 805   parts.push_back(ASCIIToUTF16(" "));
 806   EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
 807 }
 808
 809 TEST(StringUtilTest, StartsWith) {
 810   EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
 811   EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
 812   EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
 813   EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
 814   EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
 815   EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
 816   EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", false));
 817   EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", true));
 818   EXPECT_TRUE(StartsWithASCII("java", std::string(), false));
 819   EXPECT_TRUE(StartsWithASCII("java", std::string(), true));
 820
 821   EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", true));
 822   EXPECT_FALSE(StartsWith(L"JavaScript:url", L"javascript", true));
 823   EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", false));
 824   EXPECT_TRUE(StartsWith(L"JavaScript:url", L"javascript", false));
 825   EXPECT_FALSE(StartsWith(L"java", L"javascript", true));
 826   EXPECT_FALSE(StartsWith(L"java", L"javascript", false));
 827   EXPECT_FALSE(StartsWith(std::wstring(), L"javascript", false));
 828   EXPECT_FALSE(StartsWith(std::wstring(), L"javascript", true));
 829   EXPECT_TRUE(StartsWith(L"java", std::wstring(), false));
 830   EXPECT_TRUE(StartsWith(L"java", std::wstring(), true));
 831 }
 832
 833 TEST(StringUtilTest, EndsWith) {
 834   EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", true));
 835   EXPECT_FALSE(EndsWith(L"Foo.Plugin", L".plugin", true));
 836   EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", false));
 837   EXPECT_TRUE(EndsWith(L"Foo.Plugin", L".plugin", false));
 838   EXPECT_FALSE(EndsWith(L".plug", L".plugin", true));
 839   EXPECT_FALSE(EndsWith(L".plug", L".plugin", false));
 840   EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", true));
 841   EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", false));
 842   EXPECT_FALSE(EndsWith(std::wstring(), L".plugin", false));
 843   EXPECT_FALSE(EndsWith(std::wstring(), L".plugin", true));
 844   EXPECT_TRUE(EndsWith(L"Foo.plugin", std::wstring(), false));
 845   EXPECT_TRUE(EndsWith(L"Foo.plugin", std::wstring(), true));
 846   EXPECT_TRUE(EndsWith(L".plugin", L".plugin", false));
 847   EXPECT_TRUE(EndsWith(L".plugin", L".plugin", true));
 848   EXPECT_TRUE(EndsWith(std::wstring(), std::wstring(), false));
 849   EXPECT_TRUE(EndsWith(std::wstring(), std::wstring(), true));
 850 }
 851
 852 TEST(StringUtilTest, GetStringFWithOffsets) {
 853   std::vector<string16> subst;
 854   subst.push_back(ASCIIToUTF16("1"));
 855   subst.push_back(ASCIIToUTF16("2"));
 856   std::vector<size_t> offsets;
 857
 858   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
 859                             subst,
 860                             &offsets);
 861   EXPECT_EQ(2U, offsets.size());
 862   EXPECT_EQ(7U, offsets[0]);
 863   EXPECT_EQ(25U, offsets[1]);
 864   offsets.clear();
 865
 866   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
 867                             subst,
 868                             &offsets);
 869   EXPECT_EQ(2U, offsets.size());
 870   EXPECT_EQ(25U, offsets[0]);
 871   EXPECT_EQ(7U, offsets[1]);
 872   offsets.clear();
 873 }
 874
 875 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
 876   // Test whether replacestringplaceholders works as expected when there
 877   // are fewer inputs than outputs.
 878   std::vector<string16> subst;
 879   subst.push_back(ASCIIToUTF16("9a"));
 880   subst.push_back(ASCIIToUTF16("8b"));
 881   subst.push_back(ASCIIToUTF16("7c"));
 882
 883   string16 formatted =
 884       ReplaceStringPlaceholders(
 885           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
 886
 887   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
 888 }
 889
 890 TEST(StringUtilTest, ReplaceStringPlaceholders) {
 891   std::vector<string16> subst;
 892   subst.push_back(ASCIIToUTF16("9a"));
 893   subst.push_back(ASCIIToUTF16("8b"));
 894   subst.push_back(ASCIIToUTF16("7c"));
 895   subst.push_back(ASCIIToUTF16("6d"));
 896   subst.push_back(ASCIIToUTF16("5e"));
 897   subst.push_back(ASCIIToUTF16("4f"));
 898   subst.push_back(ASCIIToUTF16("3g"));
 899   subst.push_back(ASCIIToUTF16("2h"));
 900   subst.push_back(ASCIIToUTF16("1i"));
 901
 902   string16 formatted =
 903       ReplaceStringPlaceholders(
 904           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
 905
 906   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
 907 }
 908
 909 TEST(StringUtilTest, ReplaceStringPlaceholdersMoreThan9Replacements) {
 910   std::vector<string16> subst;
 911   subst.push_back(ASCIIToUTF16("9a"));
 912   subst.push_back(ASCIIToUTF16("8b"));
 913   subst.push_back(ASCIIToUTF16("7c"));
 914   subst.push_back(ASCIIToUTF16("6d"));
 915   subst.push_back(ASCIIToUTF16("5e"));
 916   subst.push_back(ASCIIToUTF16("4f"));
 917   subst.push_back(ASCIIToUTF16("3g"));
 918   subst.push_back(ASCIIToUTF16("2h"));
 919   subst.push_back(ASCIIToUTF16("1i"));
 920   subst.push_back(ASCIIToUTF16("0j"));
 921   subst.push_back(ASCIIToUTF16("-1k"));
 922   subst.push_back(ASCIIToUTF16("-2l"));
 923   subst.push_back(ASCIIToUTF16("-3m"));
 924   subst.push_back(ASCIIToUTF16("-4n"));
 925
 926   string16 formatted =
 927       ReplaceStringPlaceholders(
 928           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i,"
 929                        "$10j,$11k,$12l,$13m,$14n,$1"), subst, NULL);
 930
 931   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,"
 932                                     "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a"));
 933 }
 934
 935 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
 936   std::vector<std::string> subst;
 937   subst.push_back("9a");
 938   subst.push_back("8b");
 939   subst.push_back("7c");
 940   subst.push_back("6d");
 941   subst.push_back("5e");
 942   subst.push_back("4f");
 943   subst.push_back("3g");
 944   subst.push_back("2h");
 945   subst.push_back("1i");
 946
 947   std::string formatted =
 948       ReplaceStringPlaceholders(
 949           "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
 950
 951   EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
 952 }
 953
 954 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
 955   std::vector<std::string> subst;
 956   subst.push_back("a");
 957   subst.push_back("b");
 958   subst.push_back("c");
 959   EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),
 960             "$1 $$2 $$$3");
 961 }
 962
 963 TEST(StringUtilTest, MatchPatternTest) {
 964   EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
 965   EXPECT_TRUE(MatchPattern("www.google.com", "*"));
 966   EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
 967   EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
 968   EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
 969   EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
 970   EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
 971   EXPECT_FALSE(MatchPattern("", "*.*"));
 972   EXPECT_TRUE(MatchPattern("", "*"));
 973   EXPECT_TRUE(MatchPattern("", "?"));
 974   EXPECT_TRUE(MatchPattern("", ""));
 975   EXPECT_FALSE(MatchPattern("Hello", ""));
 976   EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
 977   // Stop after a certain recursion depth.
 978   EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
 979
 980   // Test UTF8 matching.
 981   EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
 982   EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
 983   EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
 984   // Invalid sequences should be handled as a single invalid character.
 985   EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
 986   // If the pattern has invalid characters, it shouldn't match anything.
 987   EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
 988
 989   // Test UTF16 character matching.
 990   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
 991                            UTF8ToUTF16("*.com")));
 992   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
 993                            UTF8ToUTF16("He??o\\*1*")));
 994
 995   // This test verifies that consecutive wild cards are collapsed into 1
 996   // wildcard (when this doesn't occur, MatchPattern reaches it's maximum
 997   // recursion depth).
 998   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
 999                            UTF8ToUTF16("He********************************o")));
1000 }
1001
1002 TEST(StringUtilTest, LcpyTest) {
1003   // Test the normal case where we fit in our buffer.
1004   {
1005     char dst[10];
1006     wchar_t wdst[10];
1007     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1008     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1009     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1010     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1011   }
1012
1013   // Test dst_size == 0, nothing should be written to |dst| and we should
1014   // have the equivalent of strlen(src).
1015   {
1016     char dst[2] = {1, 2};
1017     wchar_t wdst[2] = {1, 2};
1018     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0));
1019     EXPECT_EQ(1, dst[0]);
1020     EXPECT_EQ(2, dst[1]);
1021     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0));
1022 #if defined(WCHAR_T_IS_UNSIGNED)
1023     EXPECT_EQ(1U, wdst[0]);
1024     EXPECT_EQ(2U, wdst[1]);
1025 #else
1026     EXPECT_EQ(1, wdst[0]);
1027     EXPECT_EQ(2, wdst[1]);
1028 #endif
1029   }
1030
1031   // Test the case were we _just_ competely fit including the null.
1032   {
1033     char dst[8];
1034     wchar_t wdst[8];
1035     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1036     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
1037     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1038     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
1039   }
1040
1041   // Test the case were we we are one smaller, so we can't fit the null.
1042   {
1043     char dst[7];
1044     wchar_t wdst[7];
1045     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1046     EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
1047     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1048     EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
1049   }
1050
1051   // Test the case were we are just too small.
1052   {
1053     char dst[3];
1054     wchar_t wdst[3];
1055     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
1056     EXPECT_EQ(0, memcmp(dst, "ab", 3));
1057     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
1058     EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
1059   }
1060 }
1061
1062 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
1063   static const struct {
1064     const wchar_t* input;
1065     bool portable;
1066   } cases[] = {
1067     { L"%ls", true },
1068     { L"%s", false },
1069     { L"%S", false },
1070     { L"%lS", false },
1071     { L"Hello, %s", false },
1072     { L"%lc", true },
1073     { L"%c", false },
1074     { L"%C", false },
1075     { L"%lC", false },
1076     { L"%ls %s", false },
1077     { L"%s %ls", false },
1078     { L"%s %ls %s", false },
1079     { L"%f", true },
1080     { L"%f %F", false },
1081     { L"%d %D", false },
1082     { L"%o %O", false },
1083     { L"%u %U", false },
1084     { L"%f %d %o %u", true },
1085     { L"%-8d (%02.1f%)", true },
1086     { L"% 10s", false },
1087     { L"% 10ls", true }
1088   };
1089   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
1090     EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input));
1091 }
1092
1093 TEST(StringUtilTest, RemoveChars) {
1094   const char* kRemoveChars = "-/+*";
1095   std::string input = "A-+bc/d!*";
1096   EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
1097   EXPECT_EQ("Abcd!", input);
1098
1099   // No characters match kRemoveChars.
1100   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1101   EXPECT_EQ("Abcd!", input);
1102
1103   // Empty string.
1104   input.clear();
1105   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
1106   EXPECT_EQ(std::string(), input);
1107 }
1108
1109 TEST(StringUtilTest, ReplaceChars) {
1110   struct TestData {
1111     const char* input;
1112     const char* replace_chars;
1113     const char* replace_with;
1114     const char* output;
1115     bool result;
1116   } cases[] = {
1117     { "", "", "", "", false },
1118     { "test", "", "", "test", false },
1119     { "test", "", "!", "test", false },
1120     { "test", "z", "!", "test", false },
1121     { "test", "e", "!", "t!st", true },
1122     { "test", "e", "!?", "t!?st", true },
1123     { "test", "ez", "!", "t!st", true },
1124     { "test", "zed", "!?", "t!?st", true },
1125     { "test", "t", "!?", "!?es!?", true },
1126     { "test", "et", "!>", "!>!>s!>", true },
1127     { "test", "zest", "!", "!!!!", true },
1128     { "test", "szt", "!", "!e!!", true },
1129     { "test", "t", "test", "testestest", true },
1130   };
1131
1132   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
1133     std::string output;
1134     bool result = ReplaceChars(cases[i].input,
1135                                cases[i].replace_chars,
1136                                cases[i].replace_with,
1137                                &output);
1138     EXPECT_EQ(cases[i].result, result);
1139     EXPECT_EQ(cases[i].output, output);
1140   }
1141 }
1142
1143 TEST(StringUtilTest, ContainsOnlyChars) {
1144   // Providing an empty list of characters should return false but for the empty
1145   // string.
1146   EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
1147   EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
1148
1149   EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
1150   EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
1151   EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
1152   EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
1153   EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
1154 }
1155
1156 class WriteIntoTest : public testing::Test {
1157  protected:
1158   static void WritesCorrectly(size_t num_chars) {
1159     std::string buffer;
1160     char kOriginal[] = "supercali";
1161     strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
1162     // Using std::string(buffer.c_str()) instead of |buffer| truncates the
1163     // string at the first \0.
1164     EXPECT_EQ(std::string(kOriginal,
1165                           std::min(num_chars, arraysize(kOriginal) - 1)),
1166               std::string(buffer.c_str()));
1167     EXPECT_EQ(num_chars, buffer.size());
1168   }
1169 };
1170
1171 TEST_F(WriteIntoTest, WriteInto) {
1172   // Validate that WriteInto reserves enough space and
1173   // sizes a string correctly.
1174   WritesCorrectly(1);
1175   WritesCorrectly(2);
1176   WritesCorrectly(5000);
1177
1178   // Validate that WriteInto doesn't modify other strings
1179   // when using a Copy-on-Write implementation.
1180   const char kLive[] = "live";
1181   const char kDead[] = "dead";
1182   const std::string live = kLive;
1183   std::string dead = live;
1184   strncpy(WriteInto(&dead, 5), kDead, 4);
1185   EXPECT_EQ(kDead, dead);
1186   EXPECT_EQ(4u, dead.size());
1187   EXPECT_EQ(kLive, live);
1188   EXPECT_EQ(4u, live.size());
1189 }
1190
1191 }  // namespace base