1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/string_util.h"
7 #include "build/build_config.h"
23 #include "base/basictypes.h"
24 #include "base/logging.h"
25 #include "base/memory/singleton.h"
26 #include "base/strings/utf_string_conversion_utils.h"
27 #include "base/strings/utf_string_conversions.h"
28 #include "base/third_party/icu/icu_utf.h"
32 // Force the singleton used by Empty[W]String[16] to be a unique type. This
33 // prevents other code that might accidentally use Singleton<string> from
34 // getting our internal one.
38 const std::wstring ws
;
41 static EmptyStrings
* GetInstance() {
42 return Singleton
<EmptyStrings
>::get();
46 // Used by ReplaceStringPlaceholders to track the position in the string of
47 // replaced parameters.
48 struct ReplacementOffset
{
49 ReplacementOffset(uintptr_t parameter
, size_t offset
)
50 : parameter(parameter
),
53 // Index of the parameter.
56 // Starting position in the string.
60 static bool CompareParameter(const ReplacementOffset
& elem1
,
61 const ReplacementOffset
& elem2
) {
62 return elem1
.parameter
< elem2
.parameter
;
69 bool IsWprintfFormatPortable(const wchar_t* format
) {
70 for (const wchar_t* position
= format
; *position
!= '\0'; ++position
) {
71 if (*position
== '%') {
72 bool in_specification
= true;
73 bool modifier_l
= false;
74 while (in_specification
) {
75 // Eat up characters until reaching a known specifier.
76 if (*++position
== '\0') {
77 // The format string ended in the middle of a specification. Call
78 // it portable because no unportable specifications were found. The
79 // string is equally broken on all platforms.
83 if (*position
== 'l') {
84 // 'l' is the only thing that can save the 's' and 'c' specifiers.
86 } else if (((*position
== 's' || *position
== 'c') && !modifier_l
) ||
87 *position
== 'S' || *position
== 'C' || *position
== 'F' ||
88 *position
== 'D' || *position
== 'O' || *position
== 'U') {
93 if (wcschr(L
"diouxXeEfgGaAcspn%", *position
)) {
94 // Portable, keep scanning the rest of the format string.
95 in_specification
= false;
107 const std::string
& EmptyString() {
108 return EmptyStrings::GetInstance()->s
;
111 const std::wstring
& EmptyWString() {
112 return EmptyStrings::GetInstance()->ws
;
115 const string16
& EmptyString16() {
116 return EmptyStrings::GetInstance()->s16
;
119 template<typename STR
>
120 bool ReplaceCharsT(const STR
& input
,
121 const typename
STR::value_type replace_chars
[],
122 const STR
& replace_with
,
124 bool removed
= false;
125 size_t replace_length
= replace_with
.length();
129 size_t found
= output
->find_first_of(replace_chars
);
130 while (found
!= STR::npos
) {
132 output
->replace(found
, 1, replace_with
);
133 found
= output
->find_first_of(replace_chars
, found
+ replace_length
);
139 bool ReplaceChars(const string16
& input
,
140 const char16 replace_chars
[],
141 const string16
& replace_with
,
143 return ReplaceCharsT(input
, replace_chars
, replace_with
, output
);
146 bool ReplaceChars(const std::string
& input
,
147 const char replace_chars
[],
148 const std::string
& replace_with
,
149 std::string
* output
) {
150 return ReplaceCharsT(input
, replace_chars
, replace_with
, output
);
153 bool RemoveChars(const string16
& input
,
154 const char16 remove_chars
[],
156 return ReplaceChars(input
, remove_chars
, string16(), output
);
159 bool RemoveChars(const std::string
& input
,
160 const char remove_chars
[],
161 std::string
* output
) {
162 return ReplaceChars(input
, remove_chars
, std::string(), output
);
165 template<typename STR
>
166 TrimPositions
TrimStringT(const STR
& input
,
167 const typename
STR::value_type trim_chars
[],
168 TrimPositions positions
,
170 // Find the edges of leading/trailing whitespace as desired.
171 const typename
STR::size_type last_char
= input
.length() - 1;
172 const typename
STR::size_type first_good_char
= (positions
& TRIM_LEADING
) ?
173 input
.find_first_not_of(trim_chars
) : 0;
174 const typename
STR::size_type last_good_char
= (positions
& TRIM_TRAILING
) ?
175 input
.find_last_not_of(trim_chars
) : last_char
;
177 // When the string was all whitespace, report that we stripped off whitespace
178 // from whichever position the caller was interested in. For empty input, we
179 // stripped no whitespace, but we still need to clear |output|.
181 (first_good_char
== STR::npos
) || (last_good_char
== STR::npos
)) {
182 bool input_was_empty
= input
.empty(); // in case output == &input
184 return input_was_empty
? TRIM_NONE
: positions
;
187 // Trim the whitespace.
189 input
.substr(first_good_char
, last_good_char
- first_good_char
+ 1);
191 // Return where we trimmed from.
192 return static_cast<TrimPositions
>(
193 ((first_good_char
== 0) ? TRIM_NONE
: TRIM_LEADING
) |
194 ((last_good_char
== last_char
) ? TRIM_NONE
: TRIM_TRAILING
));
197 bool TrimString(const std::wstring
& input
,
198 const wchar_t trim_chars
[],
199 std::wstring
* output
) {
200 return TrimStringT(input
, trim_chars
, TRIM_ALL
, output
) != TRIM_NONE
;
203 #if !defined(WCHAR_T_IS_UTF16)
204 bool TrimString(const string16
& input
,
205 const char16 trim_chars
[],
207 return TrimStringT(input
, trim_chars
, TRIM_ALL
, output
) != TRIM_NONE
;
211 bool TrimString(const std::string
& input
,
212 const char trim_chars
[],
213 std::string
* output
) {
214 return TrimStringT(input
, trim_chars
, TRIM_ALL
, output
) != TRIM_NONE
;
217 void TruncateUTF8ToByteSize(const std::string
& input
,
218 const size_t byte_size
,
219 std::string
* output
) {
221 if (byte_size
> input
.length()) {
225 DCHECK_LE(byte_size
, static_cast<uint32
>(kint32max
));
226 // Note: This cast is necessary because CBU8_NEXT uses int32s.
227 int32 truncation_length
= static_cast<int32
>(byte_size
);
228 int32 char_index
= truncation_length
- 1;
229 const char* data
= input
.data();
231 // Using CBU8, we will move backwards from the truncation point
232 // to the beginning of the string looking for a valid UTF8
233 // character. Once a full UTF8 character is found, we will
234 // truncate the string to the end of that character.
235 while (char_index
>= 0) {
236 int32 prev
= char_index
;
237 uint32 code_point
= 0;
238 CBU8_NEXT(data
, char_index
, truncation_length
, code_point
);
239 if (!base::IsValidCharacter(code_point
) ||
240 !base::IsValidCodepoint(code_point
)) {
241 char_index
= prev
- 1;
247 if (char_index
>= 0 )
248 *output
= input
.substr(0, char_index
);
253 TrimPositions
TrimWhitespace(const string16
& input
,
254 TrimPositions positions
,
256 return TrimStringT(input
, kWhitespaceUTF16
, positions
, output
);
259 TrimPositions
TrimWhitespaceASCII(const std::string
& input
,
260 TrimPositions positions
,
261 std::string
* output
) {
262 return TrimStringT(input
, kWhitespaceASCII
, positions
, output
);
265 // This function is only for backward-compatibility.
266 // To be removed when all callers are updated.
267 TrimPositions
TrimWhitespace(const std::string
& input
,
268 TrimPositions positions
,
269 std::string
* output
) {
270 return TrimWhitespaceASCII(input
, positions
, output
);
273 template<typename STR
>
274 STR
CollapseWhitespaceT(const STR
& text
,
275 bool trim_sequences_with_line_breaks
) {
277 result
.resize(text
.size());
279 // Set flags to pretend we're already in a trimmed whitespace sequence, so we
280 // will trim any leading whitespace.
281 bool in_whitespace
= true;
282 bool already_trimmed
= true;
284 int chars_written
= 0;
285 for (typename
STR::const_iterator
i(text
.begin()); i
!= text
.end(); ++i
) {
286 if (IsWhitespace(*i
)) {
287 if (!in_whitespace
) {
288 // Reduce all whitespace sequences to a single space.
289 in_whitespace
= true;
290 result
[chars_written
++] = L
' ';
292 if (trim_sequences_with_line_breaks
&& !already_trimmed
&&
293 ((*i
== '\n') || (*i
== '\r'))) {
294 // Whitespace sequences containing CR or LF are eliminated entirely.
295 already_trimmed
= true;
299 // Non-whitespace chracters are copied straight across.
300 in_whitespace
= false;
301 already_trimmed
= false;
302 result
[chars_written
++] = *i
;
306 if (in_whitespace
&& !already_trimmed
) {
307 // Any trailing whitespace is eliminated.
311 result
.resize(chars_written
);
315 std::wstring
CollapseWhitespace(const std::wstring
& text
,
316 bool trim_sequences_with_line_breaks
) {
317 return CollapseWhitespaceT(text
, trim_sequences_with_line_breaks
);
320 #if !defined(WCHAR_T_IS_UTF16)
321 string16
CollapseWhitespace(const string16
& text
,
322 bool trim_sequences_with_line_breaks
) {
323 return CollapseWhitespaceT(text
, trim_sequences_with_line_breaks
);
327 std::string
CollapseWhitespaceASCII(const std::string
& text
,
328 bool trim_sequences_with_line_breaks
) {
329 return CollapseWhitespaceT(text
, trim_sequences_with_line_breaks
);
332 bool ContainsOnlyWhitespaceASCII(const std::string
& str
) {
333 for (std::string::const_iterator
i(str
.begin()); i
!= str
.end(); ++i
) {
334 if (!IsAsciiWhitespace(*i
))
340 bool ContainsOnlyWhitespace(const string16
& str
) {
341 return str
.find_first_not_of(kWhitespaceUTF16
) == string16::npos
;
344 template<typename STR
>
345 static bool ContainsOnlyCharsT(const STR
& input
, const STR
& characters
) {
346 for (typename
STR::const_iterator iter
= input
.begin();
347 iter
!= input
.end(); ++iter
) {
348 if (characters
.find(*iter
) == STR::npos
)
354 bool ContainsOnlyChars(const std::wstring
& input
,
355 const std::wstring
& characters
) {
356 return ContainsOnlyCharsT(input
, characters
);
359 #if !defined(WCHAR_T_IS_UTF16)
360 bool ContainsOnlyChars(const string16
& input
, const string16
& characters
) {
361 return ContainsOnlyCharsT(input
, characters
);
365 bool ContainsOnlyChars(const std::string
& input
,
366 const std::string
& characters
) {
367 return ContainsOnlyCharsT(input
, characters
);
370 std::string
WideToASCII(const std::wstring
& wide
) {
371 DCHECK(IsStringASCII(wide
)) << wide
;
372 return std::string(wide
.begin(), wide
.end());
375 std::string
UTF16ToASCII(const string16
& utf16
) {
376 DCHECK(IsStringASCII(utf16
)) << utf16
;
377 return std::string(utf16
.begin(), utf16
.end());
380 // Latin1 is just the low range of Unicode, so we can copy directly to convert.
381 bool WideToLatin1(const std::wstring
& wide
, std::string
* latin1
) {
383 output
.resize(wide
.size());
385 for (size_t i
= 0; i
< wide
.size(); i
++) {
388 output
[i
] = static_cast<char>(wide
[i
]);
390 latin1
->swap(output
);
395 static bool DoIsStringASCII(const STR
& str
) {
396 for (size_t i
= 0; i
< str
.length(); i
++) {
397 typename ToUnsigned
<typename
STR::value_type
>::Unsigned c
= str
[i
];
404 bool IsStringASCII(const std::wstring
& str
) {
405 return DoIsStringASCII(str
);
408 #if !defined(WCHAR_T_IS_UTF16)
409 bool IsStringASCII(const string16
& str
) {
410 return DoIsStringASCII(str
);
414 bool IsStringASCII(const base::StringPiece
& str
) {
415 return DoIsStringASCII(str
);
418 bool IsStringUTF8(const std::string
& str
) {
419 const char *src
= str
.data();
420 int32 src_len
= static_cast<int32
>(str
.length());
421 int32 char_index
= 0;
423 while (char_index
< src_len
) {
425 CBU8_NEXT(src
, char_index
, src_len
, code_point
);
426 if (!base::IsValidCharacter(code_point
))
432 template<typename Iter
>
433 static inline bool DoLowerCaseEqualsASCII(Iter a_begin
,
436 for (Iter it
= a_begin
; it
!= a_end
; ++it
, ++b
) {
437 if (!*b
|| base::ToLowerASCII(*it
) != *b
)
443 // Front-ends for LowerCaseEqualsASCII.
444 bool LowerCaseEqualsASCII(const std::string
& a
, const char* b
) {
445 return DoLowerCaseEqualsASCII(a
.begin(), a
.end(), b
);
448 bool LowerCaseEqualsASCII(const std::wstring
& a
, const char* b
) {
449 return DoLowerCaseEqualsASCII(a
.begin(), a
.end(), b
);
452 #if !defined(WCHAR_T_IS_UTF16)
453 bool LowerCaseEqualsASCII(const string16
& a
, const char* b
) {
454 return DoLowerCaseEqualsASCII(a
.begin(), a
.end(), b
);
458 bool LowerCaseEqualsASCII(std::string::const_iterator a_begin
,
459 std::string::const_iterator a_end
,
461 return DoLowerCaseEqualsASCII(a_begin
, a_end
, b
);
464 bool LowerCaseEqualsASCII(std::wstring::const_iterator a_begin
,
465 std::wstring::const_iterator a_end
,
467 return DoLowerCaseEqualsASCII(a_begin
, a_end
, b
);
470 #if !defined(WCHAR_T_IS_UTF16)
471 bool LowerCaseEqualsASCII(string16::const_iterator a_begin
,
472 string16::const_iterator a_end
,
474 return DoLowerCaseEqualsASCII(a_begin
, a_end
, b
);
478 // TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here.
479 #if !defined(OS_ANDROID)
480 bool LowerCaseEqualsASCII(const char* a_begin
,
483 return DoLowerCaseEqualsASCII(a_begin
, a_end
, b
);
486 bool LowerCaseEqualsASCII(const wchar_t* a_begin
,
487 const wchar_t* a_end
,
489 return DoLowerCaseEqualsASCII(a_begin
, a_end
, b
);
492 #if !defined(WCHAR_T_IS_UTF16)
493 bool LowerCaseEqualsASCII(const char16
* a_begin
,
496 return DoLowerCaseEqualsASCII(a_begin
, a_end
, b
);
500 #endif // !defined(OS_ANDROID)
502 bool EqualsASCII(const string16
& a
, const base::StringPiece
& b
) {
503 if (a
.length() != b
.length())
505 return std::equal(b
.begin(), b
.end(), a
.begin());
508 bool StartsWithASCII(const std::string
& str
,
509 const std::string
& search
,
510 bool case_sensitive
) {
512 return str
.compare(0, search
.length(), search
) == 0;
514 return base::strncasecmp(str
.c_str(), search
.c_str(), search
.length()) == 0;
517 template <typename STR
>
518 bool StartsWithT(const STR
& str
, const STR
& search
, bool case_sensitive
) {
519 if (case_sensitive
) {
520 return str
.compare(0, search
.length(), search
) == 0;
522 if (search
.size() > str
.size())
524 return std::equal(search
.begin(), search
.end(), str
.begin(),
525 base::CaseInsensitiveCompare
<typename
STR::value_type
>());
529 bool StartsWith(const std::wstring
& str
, const std::wstring
& search
,
530 bool case_sensitive
) {
531 return StartsWithT(str
, search
, case_sensitive
);
534 #if !defined(WCHAR_T_IS_UTF16)
535 bool StartsWith(const string16
& str
, const string16
& search
,
536 bool case_sensitive
) {
537 return StartsWithT(str
, search
, case_sensitive
);
541 template <typename STR
>
542 bool EndsWithT(const STR
& str
, const STR
& search
, bool case_sensitive
) {
543 typename
STR::size_type str_length
= str
.length();
544 typename
STR::size_type search_length
= search
.length();
545 if (search_length
> str_length
)
547 if (case_sensitive
) {
548 return str
.compare(str_length
- search_length
, search_length
, search
) == 0;
550 return std::equal(search
.begin(), search
.end(),
551 str
.begin() + (str_length
- search_length
),
552 base::CaseInsensitiveCompare
<typename
STR::value_type
>());
556 bool EndsWith(const std::string
& str
, const std::string
& search
,
557 bool case_sensitive
) {
558 return EndsWithT(str
, search
, case_sensitive
);
561 bool EndsWith(const std::wstring
& str
, const std::wstring
& search
,
562 bool case_sensitive
) {
563 return EndsWithT(str
, search
, case_sensitive
);
566 #if !defined(WCHAR_T_IS_UTF16)
567 bool EndsWith(const string16
& str
, const string16
& search
,
568 bool case_sensitive
) {
569 return EndsWithT(str
, search
, case_sensitive
);
573 static const char* const kByteStringsUnlocalized
[] = {
582 string16
FormatBytesUnlocalized(int64 bytes
) {
583 double unit_amount
= static_cast<double>(bytes
);
584 size_t dimension
= 0;
585 const int kKilo
= 1024;
586 while (unit_amount
>= kKilo
&&
587 dimension
< arraysize(kByteStringsUnlocalized
) - 1) {
588 unit_amount
/= kKilo
;
593 if (bytes
!= 0 && dimension
> 0 && unit_amount
< 100) {
594 base::snprintf(buf
, arraysize(buf
), "%.1lf%s", unit_amount
,
595 kByteStringsUnlocalized
[dimension
]);
597 base::snprintf(buf
, arraysize(buf
), "%.0lf%s", unit_amount
,
598 kByteStringsUnlocalized
[dimension
]);
601 return ASCIIToUTF16(buf
);
604 template<class StringType
>
605 void DoReplaceSubstringsAfterOffset(StringType
* str
,
606 typename
StringType::size_type start_offset
,
607 const StringType
& find_this
,
608 const StringType
& replace_with
,
610 if ((start_offset
== StringType::npos
) || (start_offset
>= str
->length()))
613 DCHECK(!find_this
.empty());
614 for (typename
StringType::size_type
offs(str
->find(find_this
, start_offset
));
615 offs
!= StringType::npos
; offs
= str
->find(find_this
, offs
)) {
616 str
->replace(offs
, find_this
.length(), replace_with
);
617 offs
+= replace_with
.length();
624 void ReplaceFirstSubstringAfterOffset(string16
* str
,
625 string16::size_type start_offset
,
626 const string16
& find_this
,
627 const string16
& replace_with
) {
628 DoReplaceSubstringsAfterOffset(str
, start_offset
, find_this
, replace_with
,
629 false); // replace first instance
632 void ReplaceFirstSubstringAfterOffset(std::string
* str
,
633 std::string::size_type start_offset
,
634 const std::string
& find_this
,
635 const std::string
& replace_with
) {
636 DoReplaceSubstringsAfterOffset(str
, start_offset
, find_this
, replace_with
,
637 false); // replace first instance
640 void ReplaceSubstringsAfterOffset(string16
* str
,
641 string16::size_type start_offset
,
642 const string16
& find_this
,
643 const string16
& replace_with
) {
644 DoReplaceSubstringsAfterOffset(str
, start_offset
, find_this
, replace_with
,
645 true); // replace all instances
648 void ReplaceSubstringsAfterOffset(std::string
* str
,
649 std::string::size_type start_offset
,
650 const std::string
& find_this
,
651 const std::string
& replace_with
) {
652 DoReplaceSubstringsAfterOffset(str
, start_offset
, find_this
, replace_with
,
653 true); // replace all instances
657 template<typename STR
>
658 static size_t TokenizeT(const STR
& str
,
659 const STR
& delimiters
,
660 std::vector
<STR
>* tokens
) {
663 typename
STR::size_type start
= str
.find_first_not_of(delimiters
);
664 while (start
!= STR::npos
) {
665 typename
STR::size_type end
= str
.find_first_of(delimiters
, start
+ 1);
666 if (end
== STR::npos
) {
667 tokens
->push_back(str
.substr(start
));
670 tokens
->push_back(str
.substr(start
, end
- start
));
671 start
= str
.find_first_not_of(delimiters
, end
+ 1);
675 return tokens
->size();
678 size_t Tokenize(const std::wstring
& str
,
679 const std::wstring
& delimiters
,
680 std::vector
<std::wstring
>* tokens
) {
681 return TokenizeT(str
, delimiters
, tokens
);
684 #if !defined(WCHAR_T_IS_UTF16)
685 size_t Tokenize(const string16
& str
,
686 const string16
& delimiters
,
687 std::vector
<string16
>* tokens
) {
688 return TokenizeT(str
, delimiters
, tokens
);
692 size_t Tokenize(const std::string
& str
,
693 const std::string
& delimiters
,
694 std::vector
<std::string
>* tokens
) {
695 return TokenizeT(str
, delimiters
, tokens
);
698 size_t Tokenize(const base::StringPiece
& str
,
699 const base::StringPiece
& delimiters
,
700 std::vector
<base::StringPiece
>* tokens
) {
701 return TokenizeT(str
, delimiters
, tokens
);
704 template<typename STR
>
705 static STR
JoinStringT(const std::vector
<STR
>& parts
, const STR
& sep
) {
709 STR
result(parts
[0]);
710 typename
std::vector
<STR
>::const_iterator iter
= parts
.begin();
713 for (; iter
!= parts
.end(); ++iter
) {
721 std::string
JoinString(const std::vector
<std::string
>& parts
, char sep
) {
722 return JoinStringT(parts
, std::string(1, sep
));
725 string16
JoinString(const std::vector
<string16
>& parts
, char16 sep
) {
726 return JoinStringT(parts
, string16(1, sep
));
729 std::string
JoinString(const std::vector
<std::string
>& parts
,
730 const std::string
& separator
) {
731 return JoinStringT(parts
, separator
);
734 string16
JoinString(const std::vector
<string16
>& parts
,
735 const string16
& separator
) {
736 return JoinStringT(parts
, separator
);
739 template<class FormatStringType
, class OutStringType
>
740 OutStringType
DoReplaceStringPlaceholders(const FormatStringType
& format_string
,
741 const std::vector
<OutStringType
>& subst
, std::vector
<size_t>* offsets
) {
742 size_t substitutions
= subst
.size();
744 size_t sub_length
= 0;
745 for (typename
std::vector
<OutStringType
>::const_iterator iter
= subst
.begin();
746 iter
!= subst
.end(); ++iter
) {
747 sub_length
+= iter
->length();
750 OutStringType formatted
;
751 formatted
.reserve(format_string
.length() + sub_length
);
753 std::vector
<ReplacementOffset
> r_offsets
;
754 for (typename
FormatStringType::const_iterator i
= format_string
.begin();
755 i
!= format_string
.end(); ++i
) {
757 if (i
+ 1 != format_string
.end()) {
759 DCHECK('$' == *i
|| '1' <= *i
) << "Invalid placeholder: " << *i
;
761 while (i
!= format_string
.end() && '$' == *i
) {
762 formatted
.push_back('$');
768 while (i
!= format_string
.end() && '0' <= *i
&& *i
<= '9') {
776 ReplacementOffset
r_offset(index
,
777 static_cast<int>(formatted
.size()));
778 r_offsets
.insert(std::lower_bound(r_offsets
.begin(),
784 if (index
< substitutions
)
785 formatted
.append(subst
.at(index
));
789 formatted
.push_back(*i
);
793 for (std::vector
<ReplacementOffset
>::const_iterator i
= r_offsets
.begin();
794 i
!= r_offsets
.end(); ++i
) {
795 offsets
->push_back(i
->offset
);
801 string16
ReplaceStringPlaceholders(const string16
& format_string
,
802 const std::vector
<string16
>& subst
,
803 std::vector
<size_t>* offsets
) {
804 return DoReplaceStringPlaceholders(format_string
, subst
, offsets
);
807 std::string
ReplaceStringPlaceholders(const base::StringPiece
& format_string
,
808 const std::vector
<std::string
>& subst
,
809 std::vector
<size_t>* offsets
) {
810 return DoReplaceStringPlaceholders(format_string
, subst
, offsets
);
813 string16
ReplaceStringPlaceholders(const string16
& format_string
,
816 std::vector
<size_t> offsets
;
817 std::vector
<string16
> subst
;
819 string16 result
= ReplaceStringPlaceholders(format_string
, subst
, &offsets
);
821 DCHECK(offsets
.size() == 1);
823 *offset
= offsets
[0];
828 static bool IsWildcard(base_icu::UChar32 character
) {
829 return character
== '*' || character
== '?';
832 // Move the strings pointers to the point where they start to differ.
833 template <typename CHAR
, typename NEXT
>
834 static void EatSameChars(const CHAR
** pattern
, const CHAR
* pattern_end
,
835 const CHAR
** string
, const CHAR
* string_end
,
837 const CHAR
* escape
= NULL
;
838 while (*pattern
!= pattern_end
&& *string
!= string_end
) {
839 if (!escape
&& IsWildcard(**pattern
)) {
840 // We don't want to match wildcard here, except if it's escaped.
844 // Check if the escapement char is found. If so, skip it and move to the
846 if (!escape
&& **pattern
== '\\') {
848 next(pattern
, pattern_end
);
852 // Check if the chars match, if so, increment the ptrs.
853 const CHAR
* pattern_next
= *pattern
;
854 const CHAR
* string_next
= *string
;
855 base_icu::UChar32 pattern_char
= next(&pattern_next
, pattern_end
);
856 if (pattern_char
== next(&string_next
, string_end
) &&
857 pattern_char
!= (base_icu::UChar32
) CBU_SENTINEL
) {
858 *pattern
= pattern_next
;
859 *string
= string_next
;
861 // Uh ho, it did not match, we are done. If the last char was an
862 // escapement, that means that it was an error to advance the ptr here,
863 // let's put it back where it was. This also mean that the MatchPattern
864 // function will return false because if we can't match an escape char
865 // here, then no one will.
876 template <typename CHAR
, typename NEXT
>
877 static void EatWildcard(const CHAR
** pattern
, const CHAR
* end
, NEXT next
) {
878 while (*pattern
!= end
) {
879 if (!IsWildcard(**pattern
))
885 template <typename CHAR
, typename NEXT
>
886 static bool MatchPatternT(const CHAR
* eval
, const CHAR
* eval_end
,
887 const CHAR
* pattern
, const CHAR
* pattern_end
,
890 const int kMaxDepth
= 16;
891 if (depth
> kMaxDepth
)
894 // Eat all the matching chars.
895 EatSameChars(&pattern
, pattern_end
, &eval
, eval_end
, next
);
897 // If the string is empty, then the pattern must be empty too, or contains
899 if (eval
== eval_end
) {
900 EatWildcard(&pattern
, pattern_end
, next
);
901 return pattern
== pattern_end
;
904 // Pattern is empty but not string, this is not a match.
905 if (pattern
== pattern_end
)
908 // If this is a question mark, then we need to compare the rest with
909 // the current string or the string with one character eaten.
910 const CHAR
* next_pattern
= pattern
;
911 next(&next_pattern
, pattern_end
);
912 if (pattern
[0] == '?') {
913 if (MatchPatternT(eval
, eval_end
, next_pattern
, pattern_end
,
916 const CHAR
* next_eval
= eval
;
917 next(&next_eval
, eval_end
);
918 if (MatchPatternT(next_eval
, eval_end
, next_pattern
, pattern_end
,
923 // This is a *, try to match all the possible substrings with the remainder
925 if (pattern
[0] == '*') {
926 // Collapse duplicate wild cards (********** into *) so that the
927 // method does not recurse unnecessarily. http://crbug.com/52839
928 EatWildcard(&next_pattern
, pattern_end
, next
);
930 while (eval
!= eval_end
) {
931 if (MatchPatternT(eval
, eval_end
, next_pattern
, pattern_end
,
937 // We reached the end of the string, let see if the pattern contains only
939 if (eval
== eval_end
) {
940 EatWildcard(&pattern
, pattern_end
, next
);
941 if (pattern
!= pattern_end
)
950 struct NextCharUTF8
{
951 base_icu::UChar32
operator()(const char** p
, const char* end
) {
954 CBU8_NEXT(*p
, offset
, end
- *p
, c
);
960 struct NextCharUTF16
{
961 base_icu::UChar32
operator()(const char16
** p
, const char16
* end
) {
964 CBU16_NEXT(*p
, offset
, end
- *p
, c
);
970 bool MatchPattern(const base::StringPiece
& eval
,
971 const base::StringPiece
& pattern
) {
972 return MatchPatternT(eval
.data(), eval
.data() + eval
.size(),
973 pattern
.data(), pattern
.data() + pattern
.size(),
977 bool MatchPattern(const string16
& eval
, const string16
& pattern
) {
978 return MatchPatternT(eval
.c_str(), eval
.c_str() + eval
.size(),
979 pattern
.c_str(), pattern
.c_str() + pattern
.size(),
983 // The following code is compatible with the OpenBSD lcpy interface. See:
984 // http://www.gratisoft.us/todd/papers/strlcpy.html
985 // ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
989 template <typename CHAR
>
990 size_t lcpyT(CHAR
* dst
, const CHAR
* src
, size_t dst_size
) {
991 for (size_t i
= 0; i
< dst_size
; ++i
) {
992 if ((dst
[i
] = src
[i
]) == 0) // We hit and copied the terminating NULL.
996 // We were left off at dst_size. We over copied 1 byte. Null terminate.
998 dst
[dst_size
- 1] = 0;
1000 // Count the rest of the |src|, and return it's length in characters.
1001 while (src
[dst_size
]) ++dst_size
;
1007 size_t base::strlcpy(char* dst
, const char* src
, size_t dst_size
) {
1008 return lcpyT
<char>(dst
, src
, dst_size
);
1010 size_t base::wcslcpy(wchar_t* dst
, const wchar_t* src
, size_t dst_size
) {
1011 return lcpyT
<wchar_t>(dst
, src
, dst_size
);