1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "base/string_split.h"
7 #include "base/logging.h"
8 #include "base/string_util.h"
9 #include "base/third_party/icu/icu_utf.h"
10 #include "base/utf_string_conversions.h"
14 template<typename STR
>
15 static void SplitStringT(const STR
& str
,
16 const typename
STR::value_type s
,
18 std::vector
<STR
>* r
) {
21 size_t c
= str
.size();
22 for (i
= 0; i
<= c
; ++i
) {
23 if (i
== c
|| str
[i
] == s
) {
24 size_t len
= i
- last
;
25 STR tmp
= str
.substr(last
, len
);
26 if (trim_whitespace
) {
28 TrimWhitespace(tmp
, TRIM_ALL
, &t_tmp
);
38 void SplitString(const string16
& str
,
40 std::vector
<string16
>* r
) {
41 DCHECK(CBU16_IS_SINGLE(c
));
42 SplitStringT(str
, c
, true, r
);
45 void SplitString(const std::string
& str
,
47 std::vector
<std::string
>* r
) {
48 DCHECK(c
>= 0 && c
< 0x7F);
49 SplitStringT(str
, c
, true, r
);
52 bool SplitStringIntoKeyValues(
53 const std::string
& line
,
54 char key_value_delimiter
,
55 std::string
* key
, std::vector
<std::string
>* values
) {
59 // Find the key string.
60 size_t end_key_pos
= line
.find_first_of(key_value_delimiter
);
61 if (end_key_pos
== std::string::npos
) {
62 DVLOG(1) << "cannot parse key from line: " << line
;
63 return false; // no key
65 key
->assign(line
, 0, end_key_pos
);
67 // Find the values string.
68 std::string
remains(line
, end_key_pos
, line
.size() - end_key_pos
);
69 size_t begin_values_pos
= remains
.find_first_not_of(key_value_delimiter
);
70 if (begin_values_pos
== std::string::npos
) {
71 DVLOG(1) << "cannot parse value from line: " << line
;
72 return false; // no value
74 std::string
values_string(remains
, begin_values_pos
,
75 remains
.size() - begin_values_pos
);
77 // Construct the values vector.
78 values
->push_back(values_string
);
82 bool SplitStringIntoKeyValuePairs(
83 const std::string
& line
,
84 char key_value_delimiter
,
85 char key_value_pair_delimiter
,
86 std::vector
<std::pair
<std::string
, std::string
> >* kv_pairs
) {
89 std::vector
<std::string
> pairs
;
90 SplitString(line
, key_value_pair_delimiter
, &pairs
);
93 for (size_t i
= 0; i
< pairs
.size(); ++i
) {
94 // Empty pair. SplitStringIntoKeyValues is more strict about an empty pair
95 // line, so continue with the next pair.
100 std::vector
<std::string
> value
;
101 if (!SplitStringIntoKeyValues(pairs
[i
],
104 // Don't return here, to allow for keys without associated
105 // values; just record that our split failed.
108 DCHECK_LE(value
.size(), 1U);
109 kv_pairs
->push_back(make_pair(key
, value
.empty()? "" : value
[0]));
114 template <typename STR
>
115 static void SplitStringUsingSubstrT(const STR
& str
,
117 std::vector
<STR
>* r
) {
118 typename
STR::size_type begin_index
= 0;
120 const typename
STR::size_type end_index
= str
.find(s
, begin_index
);
121 if (end_index
== STR::npos
) {
122 const STR term
= str
.substr(begin_index
);
124 TrimWhitespace(term
, TRIM_ALL
, &tmp
);
128 const STR term
= str
.substr(begin_index
, end_index
- begin_index
);
130 TrimWhitespace(term
, TRIM_ALL
, &tmp
);
132 begin_index
= end_index
+ s
.size();
136 void SplitStringUsingSubstr(const string16
& str
,
138 std::vector
<string16
>* r
) {
139 SplitStringUsingSubstrT(str
, s
, r
);
142 void SplitStringUsingSubstr(const std::string
& str
,
143 const std::string
& s
,
144 std::vector
<std::string
>* r
) {
145 SplitStringUsingSubstrT(str
, s
, r
);
148 void SplitStringDontTrim(const string16
& str
,
150 std::vector
<string16
>* r
) {
151 DCHECK(CBU16_IS_SINGLE(c
));
152 SplitStringT(str
, c
, false, r
);
155 void SplitStringDontTrim(const std::string
& str
,
157 std::vector
<std::string
>* r
) {
158 DCHECK(IsStringUTF8(str
));
159 DCHECK(c
>= 0 && c
< 0x7F);
160 SplitStringT(str
, c
, false, r
);
163 template<typename STR
>
164 void SplitStringAlongWhitespaceT(const STR
& str
, std::vector
<STR
>* result
) {
165 const size_t length
= str
.length();
169 bool last_was_ws
= false;
170 size_t last_non_ws_start
= 0;
171 for (size_t i
= 0; i
< length
; ++i
) {
173 // HTML 5 defines whitespace as: space, tab, LF, line tab, FF, or CR.
183 str
.substr(last_non_ws_start
, i
- last_non_ws_start
));
189 default: // Not a space character.
192 last_non_ws_start
= i
;
199 str
.substr(last_non_ws_start
, length
- last_non_ws_start
));
203 void SplitStringAlongWhitespace(const std::wstring
& str
,
204 std::vector
<std::wstring
>* result
) {
205 SplitStringAlongWhitespaceT(str
, result
);
208 #if !defined(WCHAR_T_IS_UTF16)
209 void SplitStringAlongWhitespace(const string16
& str
,
210 std::vector
<string16
>* result
) {
211 SplitStringAlongWhitespaceT(str
, result
);
215 void SplitStringAlongWhitespace(const std::string
& str
,
216 std::vector
<std::string
>* result
) {
217 SplitStringAlongWhitespaceT(str
, result
);