Un-refcount AutofillWebData and TokenWebData
[chromium-blink-merge.git] / base / string_util.cc
blobbbded5737445be5ddb2a3359eac9d956d92fd493
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/string_util.h"
7 #include "build/build_config.h"
9 #include <ctype.h>
10 #include <errno.h>
11 #include <math.h>
12 #include <stdarg.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <time.h>
17 #include <wchar.h>
18 #include <wctype.h>
20 #include <algorithm>
21 #include <vector>
23 #include "base/basictypes.h"
24 #include "base/logging.h"
25 #include "base/memory/singleton.h"
26 #include "base/strings/utf_string_conversion_utils.h"
27 #include "base/strings/utf_string_conversions.h"
28 #include "base/third_party/icu/icu_utf.h"
30 namespace {
32 // Force the singleton used by Empty[W]String[16] to be a unique type. This
33 // prevents other code that might accidentally use Singleton<string> from
34 // getting our internal one.
35 struct EmptyStrings {
36 EmptyStrings() {}
37 const std::string s;
38 const std::wstring ws;
39 const string16 s16;
41 static EmptyStrings* GetInstance() {
42 return Singleton<EmptyStrings>::get();
46 // Used by ReplaceStringPlaceholders to track the position in the string of
47 // replaced parameters.
48 struct ReplacementOffset {
49 ReplacementOffset(uintptr_t parameter, size_t offset)
50 : parameter(parameter),
51 offset(offset) {}
53 // Index of the parameter.
54 uintptr_t parameter;
56 // Starting position in the string.
57 size_t offset;
60 static bool CompareParameter(const ReplacementOffset& elem1,
61 const ReplacementOffset& elem2) {
62 return elem1.parameter < elem2.parameter;
65 } // namespace
67 namespace base {
69 bool IsWprintfFormatPortable(const wchar_t* format) {
70 for (const wchar_t* position = format; *position != '\0'; ++position) {
71 if (*position == '%') {
72 bool in_specification = true;
73 bool modifier_l = false;
74 while (in_specification) {
75 // Eat up characters until reaching a known specifier.
76 if (*++position == '\0') {
77 // The format string ended in the middle of a specification. Call
78 // it portable because no unportable specifications were found. The
79 // string is equally broken on all platforms.
80 return true;
83 if (*position == 'l') {
84 // 'l' is the only thing that can save the 's' and 'c' specifiers.
85 modifier_l = true;
86 } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
87 *position == 'S' || *position == 'C' || *position == 'F' ||
88 *position == 'D' || *position == 'O' || *position == 'U') {
89 // Not portable.
90 return false;
93 if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
94 // Portable, keep scanning the rest of the format string.
95 in_specification = false;
101 return true;
104 } // namespace base
107 const std::string& EmptyString() {
108 return EmptyStrings::GetInstance()->s;
111 const std::wstring& EmptyWString() {
112 return EmptyStrings::GetInstance()->ws;
115 const string16& EmptyString16() {
116 return EmptyStrings::GetInstance()->s16;
119 template<typename STR>
120 bool ReplaceCharsT(const STR& input,
121 const typename STR::value_type replace_chars[],
122 const STR& replace_with,
123 STR* output) {
124 bool removed = false;
125 size_t replace_length = replace_with.length();
127 *output = input;
129 size_t found = output->find_first_of(replace_chars);
130 while (found != STR::npos) {
131 removed = true;
132 output->replace(found, 1, replace_with);
133 found = output->find_first_of(replace_chars, found + replace_length);
136 return removed;
139 bool ReplaceChars(const string16& input,
140 const char16 replace_chars[],
141 const string16& replace_with,
142 string16* output) {
143 return ReplaceCharsT(input, replace_chars, replace_with, output);
146 bool ReplaceChars(const std::string& input,
147 const char replace_chars[],
148 const std::string& replace_with,
149 std::string* output) {
150 return ReplaceCharsT(input, replace_chars, replace_with, output);
153 bool RemoveChars(const string16& input,
154 const char16 remove_chars[],
155 string16* output) {
156 return ReplaceChars(input, remove_chars, string16(), output);
159 bool RemoveChars(const std::string& input,
160 const char remove_chars[],
161 std::string* output) {
162 return ReplaceChars(input, remove_chars, std::string(), output);
165 template<typename STR>
166 TrimPositions TrimStringT(const STR& input,
167 const typename STR::value_type trim_chars[],
168 TrimPositions positions,
169 STR* output) {
170 // Find the edges of leading/trailing whitespace as desired.
171 const typename STR::size_type last_char = input.length() - 1;
172 const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ?
173 input.find_first_not_of(trim_chars) : 0;
174 const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ?
175 input.find_last_not_of(trim_chars) : last_char;
177 // When the string was all whitespace, report that we stripped off whitespace
178 // from whichever position the caller was interested in. For empty input, we
179 // stripped no whitespace, but we still need to clear |output|.
180 if (input.empty() ||
181 (first_good_char == STR::npos) || (last_good_char == STR::npos)) {
182 bool input_was_empty = input.empty(); // in case output == &input
183 output->clear();
184 return input_was_empty ? TRIM_NONE : positions;
187 // Trim the whitespace.
188 *output =
189 input.substr(first_good_char, last_good_char - first_good_char + 1);
191 // Return where we trimmed from.
192 return static_cast<TrimPositions>(
193 ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
194 ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
197 bool TrimString(const std::wstring& input,
198 const wchar_t trim_chars[],
199 std::wstring* output) {
200 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
203 #if !defined(WCHAR_T_IS_UTF16)
204 bool TrimString(const string16& input,
205 const char16 trim_chars[],
206 string16* output) {
207 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
209 #endif
211 bool TrimString(const std::string& input,
212 const char trim_chars[],
213 std::string* output) {
214 return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
217 void TruncateUTF8ToByteSize(const std::string& input,
218 const size_t byte_size,
219 std::string* output) {
220 DCHECK(output);
221 if (byte_size > input.length()) {
222 *output = input;
223 return;
225 DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
226 // Note: This cast is necessary because CBU8_NEXT uses int32s.
227 int32 truncation_length = static_cast<int32>(byte_size);
228 int32 char_index = truncation_length - 1;
229 const char* data = input.data();
231 // Using CBU8, we will move backwards from the truncation point
232 // to the beginning of the string looking for a valid UTF8
233 // character. Once a full UTF8 character is found, we will
234 // truncate the string to the end of that character.
235 while (char_index >= 0) {
236 int32 prev = char_index;
237 uint32 code_point = 0;
238 CBU8_NEXT(data, char_index, truncation_length, code_point);
239 if (!base::IsValidCharacter(code_point) ||
240 !base::IsValidCodepoint(code_point)) {
241 char_index = prev - 1;
242 } else {
243 break;
247 if (char_index >= 0 )
248 *output = input.substr(0, char_index);
249 else
250 output->clear();
253 TrimPositions TrimWhitespace(const string16& input,
254 TrimPositions positions,
255 string16* output) {
256 return TrimStringT(input, kWhitespaceUTF16, positions, output);
259 TrimPositions TrimWhitespaceASCII(const std::string& input,
260 TrimPositions positions,
261 std::string* output) {
262 return TrimStringT(input, kWhitespaceASCII, positions, output);
265 // This function is only for backward-compatibility.
266 // To be removed when all callers are updated.
267 TrimPositions TrimWhitespace(const std::string& input,
268 TrimPositions positions,
269 std::string* output) {
270 return TrimWhitespaceASCII(input, positions, output);
273 template<typename STR>
274 STR CollapseWhitespaceT(const STR& text,
275 bool trim_sequences_with_line_breaks) {
276 STR result;
277 result.resize(text.size());
279 // Set flags to pretend we're already in a trimmed whitespace sequence, so we
280 // will trim any leading whitespace.
281 bool in_whitespace = true;
282 bool already_trimmed = true;
284 int chars_written = 0;
285 for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
286 if (IsWhitespace(*i)) {
287 if (!in_whitespace) {
288 // Reduce all whitespace sequences to a single space.
289 in_whitespace = true;
290 result[chars_written++] = L' ';
292 if (trim_sequences_with_line_breaks && !already_trimmed &&
293 ((*i == '\n') || (*i == '\r'))) {
294 // Whitespace sequences containing CR or LF are eliminated entirely.
295 already_trimmed = true;
296 --chars_written;
298 } else {
299 // Non-whitespace chracters are copied straight across.
300 in_whitespace = false;
301 already_trimmed = false;
302 result[chars_written++] = *i;
306 if (in_whitespace && !already_trimmed) {
307 // Any trailing whitespace is eliminated.
308 --chars_written;
311 result.resize(chars_written);
312 return result;
315 std::wstring CollapseWhitespace(const std::wstring& text,
316 bool trim_sequences_with_line_breaks) {
317 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
320 #if !defined(WCHAR_T_IS_UTF16)
321 string16 CollapseWhitespace(const string16& text,
322 bool trim_sequences_with_line_breaks) {
323 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
325 #endif
327 std::string CollapseWhitespaceASCII(const std::string& text,
328 bool trim_sequences_with_line_breaks) {
329 return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
332 bool ContainsOnlyWhitespaceASCII(const std::string& str) {
333 for (std::string::const_iterator i(str.begin()); i != str.end(); ++i) {
334 if (!IsAsciiWhitespace(*i))
335 return false;
337 return true;
340 bool ContainsOnlyWhitespace(const string16& str) {
341 return str.find_first_not_of(kWhitespaceUTF16) == string16::npos;
344 template<typename STR>
345 static bool ContainsOnlyCharsT(const STR& input, const STR& characters) {
346 for (typename STR::const_iterator iter = input.begin();
347 iter != input.end(); ++iter) {
348 if (characters.find(*iter) == STR::npos)
349 return false;
351 return true;
354 bool ContainsOnlyChars(const std::wstring& input,
355 const std::wstring& characters) {
356 return ContainsOnlyCharsT(input, characters);
359 #if !defined(WCHAR_T_IS_UTF16)
360 bool ContainsOnlyChars(const string16& input, const string16& characters) {
361 return ContainsOnlyCharsT(input, characters);
363 #endif
365 bool ContainsOnlyChars(const std::string& input,
366 const std::string& characters) {
367 return ContainsOnlyCharsT(input, characters);
370 std::string WideToASCII(const std::wstring& wide) {
371 DCHECK(IsStringASCII(wide)) << wide;
372 return std::string(wide.begin(), wide.end());
375 std::string UTF16ToASCII(const string16& utf16) {
376 DCHECK(IsStringASCII(utf16)) << utf16;
377 return std::string(utf16.begin(), utf16.end());
380 // Latin1 is just the low range of Unicode, so we can copy directly to convert.
381 bool WideToLatin1(const std::wstring& wide, std::string* latin1) {
382 std::string output;
383 output.resize(wide.size());
384 latin1->clear();
385 for (size_t i = 0; i < wide.size(); i++) {
386 if (wide[i] > 255)
387 return false;
388 output[i] = static_cast<char>(wide[i]);
390 latin1->swap(output);
391 return true;
394 template<class STR>
395 static bool DoIsStringASCII(const STR& str) {
396 for (size_t i = 0; i < str.length(); i++) {
397 typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i];
398 if (c > 0x7F)
399 return false;
401 return true;
404 bool IsStringASCII(const std::wstring& str) {
405 return DoIsStringASCII(str);
408 #if !defined(WCHAR_T_IS_UTF16)
409 bool IsStringASCII(const string16& str) {
410 return DoIsStringASCII(str);
412 #endif
414 bool IsStringASCII(const base::StringPiece& str) {
415 return DoIsStringASCII(str);
418 bool IsStringUTF8(const std::string& str) {
419 const char *src = str.data();
420 int32 src_len = static_cast<int32>(str.length());
421 int32 char_index = 0;
423 while (char_index < src_len) {
424 int32 code_point;
425 CBU8_NEXT(src, char_index, src_len, code_point);
426 if (!base::IsValidCharacter(code_point))
427 return false;
429 return true;
432 template<typename Iter>
433 static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
434 Iter a_end,
435 const char* b) {
436 for (Iter it = a_begin; it != a_end; ++it, ++b) {
437 if (!*b || base::ToLowerASCII(*it) != *b)
438 return false;
440 return *b == 0;
443 // Front-ends for LowerCaseEqualsASCII.
444 bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
445 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
448 bool LowerCaseEqualsASCII(const std::wstring& a, const char* b) {
449 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
452 #if !defined(WCHAR_T_IS_UTF16)
453 bool LowerCaseEqualsASCII(const string16& a, const char* b) {
454 return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
456 #endif
458 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
459 std::string::const_iterator a_end,
460 const char* b) {
461 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
464 bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin,
465 std::wstring::const_iterator a_end,
466 const char* b) {
467 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
470 #if !defined(WCHAR_T_IS_UTF16)
471 bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
472 string16::const_iterator a_end,
473 const char* b) {
474 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
476 #endif
478 // TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here.
479 #if !defined(OS_ANDROID)
480 bool LowerCaseEqualsASCII(const char* a_begin,
481 const char* a_end,
482 const char* b) {
483 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
486 bool LowerCaseEqualsASCII(const wchar_t* a_begin,
487 const wchar_t* a_end,
488 const char* b) {
489 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
492 #if !defined(WCHAR_T_IS_UTF16)
493 bool LowerCaseEqualsASCII(const char16* a_begin,
494 const char16* a_end,
495 const char* b) {
496 return DoLowerCaseEqualsASCII(a_begin, a_end, b);
498 #endif
500 #endif // !defined(OS_ANDROID)
502 bool EqualsASCII(const string16& a, const base::StringPiece& b) {
503 if (a.length() != b.length())
504 return false;
505 return std::equal(b.begin(), b.end(), a.begin());
508 bool StartsWithASCII(const std::string& str,
509 const std::string& search,
510 bool case_sensitive) {
511 if (case_sensitive)
512 return str.compare(0, search.length(), search) == 0;
513 else
514 return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;
517 template <typename STR>
518 bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) {
519 if (case_sensitive) {
520 return str.compare(0, search.length(), search) == 0;
521 } else {
522 if (search.size() > str.size())
523 return false;
524 return std::equal(search.begin(), search.end(), str.begin(),
525 base::CaseInsensitiveCompare<typename STR::value_type>());
529 bool StartsWith(const std::wstring& str, const std::wstring& search,
530 bool case_sensitive) {
531 return StartsWithT(str, search, case_sensitive);
534 #if !defined(WCHAR_T_IS_UTF16)
535 bool StartsWith(const string16& str, const string16& search,
536 bool case_sensitive) {
537 return StartsWithT(str, search, case_sensitive);
539 #endif
541 template <typename STR>
542 bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) {
543 typename STR::size_type str_length = str.length();
544 typename STR::size_type search_length = search.length();
545 if (search_length > str_length)
546 return false;
547 if (case_sensitive) {
548 return str.compare(str_length - search_length, search_length, search) == 0;
549 } else {
550 return std::equal(search.begin(), search.end(),
551 str.begin() + (str_length - search_length),
552 base::CaseInsensitiveCompare<typename STR::value_type>());
556 bool EndsWith(const std::string& str, const std::string& search,
557 bool case_sensitive) {
558 return EndsWithT(str, search, case_sensitive);
561 bool EndsWith(const std::wstring& str, const std::wstring& search,
562 bool case_sensitive) {
563 return EndsWithT(str, search, case_sensitive);
566 #if !defined(WCHAR_T_IS_UTF16)
567 bool EndsWith(const string16& str, const string16& search,
568 bool case_sensitive) {
569 return EndsWithT(str, search, case_sensitive);
571 #endif
573 static const char* const kByteStringsUnlocalized[] = {
574 " B",
575 " kB",
576 " MB",
577 " GB",
578 " TB",
579 " PB"
582 string16 FormatBytesUnlocalized(int64 bytes) {
583 double unit_amount = static_cast<double>(bytes);
584 size_t dimension = 0;
585 const int kKilo = 1024;
586 while (unit_amount >= kKilo &&
587 dimension < arraysize(kByteStringsUnlocalized) - 1) {
588 unit_amount /= kKilo;
589 dimension++;
592 char buf[64];
593 if (bytes != 0 && dimension > 0 && unit_amount < 100) {
594 base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,
595 kByteStringsUnlocalized[dimension]);
596 } else {
597 base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
598 kByteStringsUnlocalized[dimension]);
601 return ASCIIToUTF16(buf);
604 template<class StringType>
605 void DoReplaceSubstringsAfterOffset(StringType* str,
606 typename StringType::size_type start_offset,
607 const StringType& find_this,
608 const StringType& replace_with,
609 bool replace_all) {
610 if ((start_offset == StringType::npos) || (start_offset >= str->length()))
611 return;
613 DCHECK(!find_this.empty());
614 for (typename StringType::size_type offs(str->find(find_this, start_offset));
615 offs != StringType::npos; offs = str->find(find_this, offs)) {
616 str->replace(offs, find_this.length(), replace_with);
617 offs += replace_with.length();
619 if (!replace_all)
620 break;
624 void ReplaceFirstSubstringAfterOffset(string16* str,
625 string16::size_type start_offset,
626 const string16& find_this,
627 const string16& replace_with) {
628 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
629 false); // replace first instance
632 void ReplaceFirstSubstringAfterOffset(std::string* str,
633 std::string::size_type start_offset,
634 const std::string& find_this,
635 const std::string& replace_with) {
636 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
637 false); // replace first instance
640 void ReplaceSubstringsAfterOffset(string16* str,
641 string16::size_type start_offset,
642 const string16& find_this,
643 const string16& replace_with) {
644 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
645 true); // replace all instances
648 void ReplaceSubstringsAfterOffset(std::string* str,
649 std::string::size_type start_offset,
650 const std::string& find_this,
651 const std::string& replace_with) {
652 DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
653 true); // replace all instances
657 template<typename STR>
658 static size_t TokenizeT(const STR& str,
659 const STR& delimiters,
660 std::vector<STR>* tokens) {
661 tokens->clear();
663 typename STR::size_type start = str.find_first_not_of(delimiters);
664 while (start != STR::npos) {
665 typename STR::size_type end = str.find_first_of(delimiters, start + 1);
666 if (end == STR::npos) {
667 tokens->push_back(str.substr(start));
668 break;
669 } else {
670 tokens->push_back(str.substr(start, end - start));
671 start = str.find_first_not_of(delimiters, end + 1);
675 return tokens->size();
678 size_t Tokenize(const std::wstring& str,
679 const std::wstring& delimiters,
680 std::vector<std::wstring>* tokens) {
681 return TokenizeT(str, delimiters, tokens);
684 #if !defined(WCHAR_T_IS_UTF16)
685 size_t Tokenize(const string16& str,
686 const string16& delimiters,
687 std::vector<string16>* tokens) {
688 return TokenizeT(str, delimiters, tokens);
690 #endif
692 size_t Tokenize(const std::string& str,
693 const std::string& delimiters,
694 std::vector<std::string>* tokens) {
695 return TokenizeT(str, delimiters, tokens);
698 size_t Tokenize(const base::StringPiece& str,
699 const base::StringPiece& delimiters,
700 std::vector<base::StringPiece>* tokens) {
701 return TokenizeT(str, delimiters, tokens);
704 template<typename STR>
705 static STR JoinStringT(const std::vector<STR>& parts, const STR& sep) {
706 if (parts.empty())
707 return STR();
709 STR result(parts[0]);
710 typename std::vector<STR>::const_iterator iter = parts.begin();
711 ++iter;
713 for (; iter != parts.end(); ++iter) {
714 result += sep;
715 result += *iter;
718 return result;
721 std::string JoinString(const std::vector<std::string>& parts, char sep) {
722 return JoinStringT(parts, std::string(1, sep));
725 string16 JoinString(const std::vector<string16>& parts, char16 sep) {
726 return JoinStringT(parts, string16(1, sep));
729 std::string JoinString(const std::vector<std::string>& parts,
730 const std::string& separator) {
731 return JoinStringT(parts, separator);
734 string16 JoinString(const std::vector<string16>& parts,
735 const string16& separator) {
736 return JoinStringT(parts, separator);
739 template<class FormatStringType, class OutStringType>
740 OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,
741 const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {
742 size_t substitutions = subst.size();
744 size_t sub_length = 0;
745 for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();
746 iter != subst.end(); ++iter) {
747 sub_length += iter->length();
750 OutStringType formatted;
751 formatted.reserve(format_string.length() + sub_length);
753 std::vector<ReplacementOffset> r_offsets;
754 for (typename FormatStringType::const_iterator i = format_string.begin();
755 i != format_string.end(); ++i) {
756 if ('$' == *i) {
757 if (i + 1 != format_string.end()) {
758 ++i;
759 DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
760 if ('$' == *i) {
761 while (i != format_string.end() && '$' == *i) {
762 formatted.push_back('$');
763 ++i;
765 --i;
766 } else {
767 uintptr_t index = 0;
768 while (i != format_string.end() && '0' <= *i && *i <= '9') {
769 index *= 10;
770 index += *i - '0';
771 ++i;
773 --i;
774 index -= 1;
775 if (offsets) {
776 ReplacementOffset r_offset(index,
777 static_cast<int>(formatted.size()));
778 r_offsets.insert(std::lower_bound(r_offsets.begin(),
779 r_offsets.end(),
780 r_offset,
781 &CompareParameter),
782 r_offset);
784 if (index < substitutions)
785 formatted.append(subst.at(index));
788 } else {
789 formatted.push_back(*i);
792 if (offsets) {
793 for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
794 i != r_offsets.end(); ++i) {
795 offsets->push_back(i->offset);
798 return formatted;
801 string16 ReplaceStringPlaceholders(const string16& format_string,
802 const std::vector<string16>& subst,
803 std::vector<size_t>* offsets) {
804 return DoReplaceStringPlaceholders(format_string, subst, offsets);
807 std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,
808 const std::vector<std::string>& subst,
809 std::vector<size_t>* offsets) {
810 return DoReplaceStringPlaceholders(format_string, subst, offsets);
813 string16 ReplaceStringPlaceholders(const string16& format_string,
814 const string16& a,
815 size_t* offset) {
816 std::vector<size_t> offsets;
817 std::vector<string16> subst;
818 subst.push_back(a);
819 string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
821 DCHECK(offsets.size() == 1);
822 if (offset) {
823 *offset = offsets[0];
825 return result;
828 static bool IsWildcard(base_icu::UChar32 character) {
829 return character == '*' || character == '?';
832 // Move the strings pointers to the point where they start to differ.
833 template <typename CHAR, typename NEXT>
834 static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
835 const CHAR** string, const CHAR* string_end,
836 NEXT next) {
837 const CHAR* escape = NULL;
838 while (*pattern != pattern_end && *string != string_end) {
839 if (!escape && IsWildcard(**pattern)) {
840 // We don't want to match wildcard here, except if it's escaped.
841 return;
844 // Check if the escapement char is found. If so, skip it and move to the
845 // next character.
846 if (!escape && **pattern == '\\') {
847 escape = *pattern;
848 next(pattern, pattern_end);
849 continue;
852 // Check if the chars match, if so, increment the ptrs.
853 const CHAR* pattern_next = *pattern;
854 const CHAR* string_next = *string;
855 base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
856 if (pattern_char == next(&string_next, string_end) &&
857 pattern_char != (base_icu::UChar32) CBU_SENTINEL) {
858 *pattern = pattern_next;
859 *string = string_next;
860 } else {
861 // Uh ho, it did not match, we are done. If the last char was an
862 // escapement, that means that it was an error to advance the ptr here,
863 // let's put it back where it was. This also mean that the MatchPattern
864 // function will return false because if we can't match an escape char
865 // here, then no one will.
866 if (escape) {
867 *pattern = escape;
869 return;
872 escape = NULL;
876 template <typename CHAR, typename NEXT>
877 static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
878 while (*pattern != end) {
879 if (!IsWildcard(**pattern))
880 return;
881 next(pattern, end);
885 template <typename CHAR, typename NEXT>
886 static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
887 const CHAR* pattern, const CHAR* pattern_end,
888 int depth,
889 NEXT next) {
890 const int kMaxDepth = 16;
891 if (depth > kMaxDepth)
892 return false;
894 // Eat all the matching chars.
895 EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
897 // If the string is empty, then the pattern must be empty too, or contains
898 // only wildcards.
899 if (eval == eval_end) {
900 EatWildcard(&pattern, pattern_end, next);
901 return pattern == pattern_end;
904 // Pattern is empty but not string, this is not a match.
905 if (pattern == pattern_end)
906 return false;
908 // If this is a question mark, then we need to compare the rest with
909 // the current string or the string with one character eaten.
910 const CHAR* next_pattern = pattern;
911 next(&next_pattern, pattern_end);
912 if (pattern[0] == '?') {
913 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
914 depth + 1, next))
915 return true;
916 const CHAR* next_eval = eval;
917 next(&next_eval, eval_end);
918 if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
919 depth + 1, next))
920 return true;
923 // This is a *, try to match all the possible substrings with the remainder
924 // of the pattern.
925 if (pattern[0] == '*') {
926 // Collapse duplicate wild cards (********** into *) so that the
927 // method does not recurse unnecessarily. http://crbug.com/52839
928 EatWildcard(&next_pattern, pattern_end, next);
930 while (eval != eval_end) {
931 if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
932 depth + 1, next))
933 return true;
934 eval++;
937 // We reached the end of the string, let see if the pattern contains only
938 // wildcards.
939 if (eval == eval_end) {
940 EatWildcard(&pattern, pattern_end, next);
941 if (pattern != pattern_end)
942 return false;
943 return true;
947 return false;
950 struct NextCharUTF8 {
951 base_icu::UChar32 operator()(const char** p, const char* end) {
952 base_icu::UChar32 c;
953 int offset = 0;
954 CBU8_NEXT(*p, offset, end - *p, c);
955 *p += offset;
956 return c;
960 struct NextCharUTF16 {
961 base_icu::UChar32 operator()(const char16** p, const char16* end) {
962 base_icu::UChar32 c;
963 int offset = 0;
964 CBU16_NEXT(*p, offset, end - *p, c);
965 *p += offset;
966 return c;
970 bool MatchPattern(const base::StringPiece& eval,
971 const base::StringPiece& pattern) {
972 return MatchPatternT(eval.data(), eval.data() + eval.size(),
973 pattern.data(), pattern.data() + pattern.size(),
974 0, NextCharUTF8());
977 bool MatchPattern(const string16& eval, const string16& pattern) {
978 return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(),
979 pattern.c_str(), pattern.c_str() + pattern.size(),
980 0, NextCharUTF16());
983 // The following code is compatible with the OpenBSD lcpy interface. See:
984 // http://www.gratisoft.us/todd/papers/strlcpy.html
985 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
987 namespace {
989 template <typename CHAR>
990 size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
991 for (size_t i = 0; i < dst_size; ++i) {
992 if ((dst[i] = src[i]) == 0) // We hit and copied the terminating NULL.
993 return i;
996 // We were left off at dst_size. We over copied 1 byte. Null terminate.
997 if (dst_size != 0)
998 dst[dst_size - 1] = 0;
1000 // Count the rest of the |src|, and return it's length in characters.
1001 while (src[dst_size]) ++dst_size;
1002 return dst_size;
1005 } // namespace
1007 size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
1008 return lcpyT<char>(dst, src, dst_size);
1010 size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
1011 return lcpyT<wchar_t>(dst, src, dst_size);