1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #ifndef __nsCharSeparatedTokenizer_h
7 #define __nsCharSeparatedTokenizer_h
9 #include "mozilla/RangedPtr.h"
11 #include "nsDependentSubstring.h"
15 * This parses a SeparatorChar-separated string into tokens.
16 * Whitespace surrounding tokens is not treated as part of tokens, however
17 * whitespace inside a token is. If the final token is the empty string, it is
20 * Some examples, with SeparatorChar = ',':
22 * "foo, bar, baz" -> "foo" "bar" "baz"
23 * "foo,bar,baz" -> "foo" "bar" "baz"
24 * "foo , bar hi , baz" -> "foo" "bar hi" "baz"
25 * "foo, ,bar,baz" -> "foo" "" "bar" "baz"
26 * "foo,,bar,baz" -> "foo" "" "bar" "baz"
27 * "foo,bar,baz," -> "foo" "bar" "baz"
29 * The function used for whitespace detection is a template argument.
30 * By default, it is NS_IsAsciiWhitespace.
32 template<typename DependentSubstringType
, bool IsWhitespace(char16_t
)>
33 class nsTCharSeparatedTokenizer
35 typedef typename
DependentSubstringType::char_type CharType
;
36 typedef typename
DependentSubstringType::substring_type SubstringType
;
39 // Flags -- only one for now. If we need more, they should be defined to
40 // be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.)
43 SEPARATOR_OPTIONAL
= 1
46 nsTCharSeparatedTokenizer(const SubstringType
& aSource
,
47 CharType aSeparatorChar
,
49 : mIter(aSource
.Data(), aSource
.Length())
50 , mEnd(aSource
.Data() + aSource
.Length(), aSource
.Data(),
52 , mSeparatorChar(aSeparatorChar
)
53 , mWhitespaceBeforeFirstToken(false)
54 , mWhitespaceAfterCurrentToken(false)
55 , mSeparatorAfterCurrentToken(false)
56 , mSeparatorOptional(aFlags
& SEPARATOR_OPTIONAL
)
58 // Skip initial whitespace
59 while (mIter
< mEnd
&& IsWhitespace(*mIter
)) {
60 mWhitespaceBeforeFirstToken
= true;
66 * Checks if any more tokens are available.
68 bool hasMoreTokens() const
70 MOZ_ASSERT(mIter
== mEnd
|| !IsWhitespace(*mIter
),
71 "Should be at beginning of token if there is one");
77 * Returns true if there is whitespace prior to the first token.
79 bool whitespaceBeforeFirstToken() const
81 return mWhitespaceBeforeFirstToken
;
85 * Returns true if there is a separator after the current token.
86 * Useful if you want to check whether the last token has a separator
87 * after it which may not be valid.
89 bool separatorAfterCurrentToken() const
91 return mSeparatorAfterCurrentToken
;
95 * Returns true if there is any whitespace after the current token.
97 bool whitespaceAfterCurrentToken() const
99 return mWhitespaceAfterCurrentToken
;
103 * Returns the next token.
105 const DependentSubstringType
nextToken()
107 mozilla::RangedPtr
<const CharType
> tokenStart
= mIter
;
108 mozilla::RangedPtr
<const CharType
> tokenEnd
= mIter
;
110 MOZ_ASSERT(mIter
== mEnd
|| !IsWhitespace(*mIter
),
111 "Should be at beginning of token if there is one");
113 // Search until we hit separator or end (or whitespace, if a separator
114 // isn't required -- see clause with 'break' below).
115 while (mIter
< mEnd
&& *mIter
!= mSeparatorChar
) {
116 // Skip to end of the current word.
117 while (mIter
< mEnd
&&
118 !IsWhitespace(*mIter
) && *mIter
!= mSeparatorChar
) {
123 // Skip whitespace after the current word.
124 mWhitespaceAfterCurrentToken
= false;
125 while (mIter
< mEnd
&& IsWhitespace(*mIter
)) {
126 mWhitespaceAfterCurrentToken
= true;
129 if (mSeparatorOptional
) {
130 // We've hit (and skipped) whitespace, and that's sufficient to end
131 // our token, regardless of whether we've reached a SeparatorChar.
133 } // (else, we'll keep looping until we hit mEnd or SeparatorChar)
136 mSeparatorAfterCurrentToken
= (mIter
!= mEnd
&&
137 *mIter
== mSeparatorChar
);
138 MOZ_ASSERT(mSeparatorOptional
||
139 (mSeparatorAfterCurrentToken
== (mIter
< mEnd
)),
140 "If we require a separator and haven't hit the end of "
141 "our string, then we shouldn't have left the loop "
142 "unless we hit a separator");
144 // Skip separator (and any whitespace after it), if we're at one.
145 if (mSeparatorAfterCurrentToken
) {
148 while (mIter
< mEnd
&& IsWhitespace(*mIter
)) {
149 mWhitespaceAfterCurrentToken
= true;
154 return Substring(tokenStart
.get(), tokenEnd
.get());
158 mozilla::RangedPtr
<const CharType
> mIter
;
159 const mozilla::RangedPtr
<const CharType
> mEnd
;
160 CharType mSeparatorChar
;
161 bool mWhitespaceBeforeFirstToken
;
162 bool mWhitespaceAfterCurrentToken
;
163 bool mSeparatorAfterCurrentToken
;
164 bool mSeparatorOptional
;
167 template<bool IsWhitespace(char16_t
) = NS_IsAsciiWhitespace
>
168 class nsCharSeparatedTokenizerTemplate
169 : public nsTCharSeparatedTokenizer
<nsDependentSubstring
, IsWhitespace
>
172 nsCharSeparatedTokenizerTemplate(const nsSubstring
& aSource
,
173 char16_t aSeparatorChar
,
175 : nsTCharSeparatedTokenizer
<nsDependentSubstring
,
176 IsWhitespace
>(aSource
, aSeparatorChar
, aFlags
)
181 typedef nsCharSeparatedTokenizerTemplate
<> nsCharSeparatedTokenizer
;
183 template<bool IsWhitespace(char16_t
) = NS_IsAsciiWhitespace
>
184 class nsCCharSeparatedTokenizerTemplate
185 : public nsTCharSeparatedTokenizer
<nsDependentCSubstring
, IsWhitespace
>
188 nsCCharSeparatedTokenizerTemplate(const nsCSubstring
& aSource
,
191 : nsTCharSeparatedTokenizer
<nsDependentCSubstring
,
192 IsWhitespace
>(aSource
, aSeparatorChar
, aFlags
)
197 typedef nsCCharSeparatedTokenizerTemplate
<> nsCCharSeparatedTokenizer
;
199 #endif /* __nsCharSeparatedTokenizer_h */