1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #ifndef __nsCharSeparatedTokenizer_h
7 #define __nsCharSeparatedTokenizer_h
9 #include "nsDependentSubstring.h"
13 * This parses a SeparatorChar-separated string into tokens.
14 * Whitespace surrounding tokens is not treated as part of tokens, however
15 * whitespace inside a token is. If the final token is the empty string, it is
18 * Some examples, with SeparatorChar = ',':
20 * "foo, bar, baz" -> "foo" "bar" "baz"
21 * "foo,bar,baz" -> "foo" "bar" "baz"
22 * "foo , bar hi , baz" -> "foo" "bar hi" "baz"
23 * "foo, ,bar,baz" -> "foo" "" "bar" "baz"
24 * "foo,,bar,baz" -> "foo" "" "bar" "baz"
25 * "foo,bar,baz," -> "foo" "bar" "baz"
27 * The function used for whitespace detection is a template argument.
28 * By default, it is NS_IsAsciiWhitespace.
30 template<bool IsWhitespace(PRUnichar
) = NS_IsAsciiWhitespace
>
31 class nsCharSeparatedTokenizerTemplate
34 // Flags -- only one for now. If we need more, they should be defined to
35 // be 1<<1, 1<<2, etc. (They're masks, and aFlags/mFlags are bitfields.)
37 SEPARATOR_OPTIONAL
= 1
40 nsCharSeparatedTokenizerTemplate(const nsSubstring
& aSource
,
41 PRUnichar aSeparatorChar
,
43 : mFirstTokenBeganWithWhitespace(false),
44 mLastTokenEndedWithWhitespace(false),
45 mLastTokenEndedWithSeparator(false),
46 mSeparatorChar(aSeparatorChar
),
49 aSource
.BeginReading(mIter
);
50 aSource
.EndReading(mEnd
);
52 // Skip initial whitespace
53 while (mIter
!= mEnd
&& IsWhitespace(*mIter
)) {
54 mFirstTokenBeganWithWhitespace
= true;
60 * Checks if any more tokens are available.
64 NS_ASSERTION(mIter
== mEnd
|| !IsWhitespace(*mIter
),
65 "Should be at beginning of token if there is one");
70 bool firstTokenBeganWithWhitespace() const
72 return mFirstTokenBeganWithWhitespace
;
75 bool lastTokenEndedWithSeparator() const
77 return mLastTokenEndedWithSeparator
;
80 bool lastTokenEndedWithWhitespace() const
82 return mLastTokenEndedWithWhitespace
;
86 * Returns the next token.
88 const nsDependentSubstring
nextToken()
90 nsSubstring::const_char_iterator end
= mIter
, begin
= mIter
;
92 NS_ASSERTION(mIter
== mEnd
|| !IsWhitespace(*mIter
),
93 "Should be at beginning of token if there is one");
95 // Search until we hit separator or end (or whitespace, if separator
96 // isn't required -- see clause with 'break' below).
97 while (mIter
!= mEnd
&& *mIter
!= mSeparatorChar
) {
98 // Skip to end of current word.
99 while (mIter
!= mEnd
&&
100 !IsWhitespace(*mIter
) && *mIter
!= mSeparatorChar
) {
105 // Skip whitespace after current word.
106 mLastTokenEndedWithWhitespace
= false;
107 while (mIter
!= mEnd
&& IsWhitespace(*mIter
)) {
108 mLastTokenEndedWithWhitespace
= true;
111 if (mFlags
& SEPARATOR_OPTIONAL
) {
112 // We've hit (and skipped) whitespace, and that's sufficient to end
113 // our token, regardless of whether we've reached a SeparatorChar.
115 } // (else, we'll keep looping until we hit mEnd or SeparatorChar)
118 mLastTokenEndedWithSeparator
= (mIter
!= mEnd
&&
119 *mIter
== mSeparatorChar
);
120 NS_ASSERTION((mFlags
& SEPARATOR_OPTIONAL
) ||
121 (mLastTokenEndedWithSeparator
== (mIter
!= mEnd
)),
122 "If we require a separator and haven't hit the end of "
123 "our string, then we shouldn't have left the loop "
124 "unless we hit a separator");
126 // Skip separator (and any whitespace after it), if we're at one.
127 if (mLastTokenEndedWithSeparator
) {
130 while (mIter
!= mEnd
&& IsWhitespace(*mIter
)) {
135 return Substring(begin
, end
);
139 nsSubstring::const_char_iterator mIter
, mEnd
;
140 bool mFirstTokenBeganWithWhitespace
;
141 bool mLastTokenEndedWithWhitespace
;
142 bool mLastTokenEndedWithSeparator
;
143 PRUnichar mSeparatorChar
;
147 class nsCharSeparatedTokenizer
: public nsCharSeparatedTokenizerTemplate
<>
150 nsCharSeparatedTokenizer(const nsSubstring
& aSource
,
151 PRUnichar aSeparatorChar
,
153 : nsCharSeparatedTokenizerTemplate
<>(aSource
, aSeparatorChar
, aFlags
)
158 class nsCCharSeparatedTokenizer
161 nsCCharSeparatedTokenizer(const nsCSubstring
& aSource
,
163 : mSeparatorChar(aSeparatorChar
)
165 aSource
.BeginReading(mIter
);
166 aSource
.EndReading(mEnd
);
168 while (mIter
!= mEnd
&& isWhitespace(*mIter
)) {
174 * Checks if any more tokens are available.
178 return mIter
!= mEnd
;
182 * Returns the next token.
184 const nsDependentCSubstring
nextToken()
186 nsCSubstring::const_char_iterator end
= mIter
, begin
= mIter
;
188 // Search until we hit separator or end.
189 while (mIter
!= mEnd
&& *mIter
!= mSeparatorChar
) {
190 while (mIter
!= mEnd
&&
191 !isWhitespace(*mIter
) && *mIter
!= mSeparatorChar
) {
196 while (mIter
!= mEnd
&& isWhitespace(*mIter
)) {
201 // Skip separator (and any whitespace after it).
203 NS_ASSERTION(*mIter
== mSeparatorChar
, "Ended loop too soon");
206 while (mIter
!= mEnd
&& isWhitespace(*mIter
)) {
211 return Substring(begin
, end
);
215 nsCSubstring::const_char_iterator mIter
, mEnd
;
218 bool isWhitespace(unsigned char aChar
)
220 return aChar
<= ' ' &&
221 (aChar
== ' ' || aChar
== '\n' ||
222 aChar
== '\r'|| aChar
== '\t');
226 #endif /* __nsCharSeparatedTokenizer_h */