1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #ifndef __nsCharSeparatedTokenizer_h
8 #define __nsCharSeparatedTokenizer_h
10 #include "mozilla/Maybe.h"
11 #include "mozilla/RangedPtr.h"
12 #include "mozilla/TypedEnumBits.h"
14 #include "nsCRTGlue.h"
15 #include "nsTDependentSubstring.h"
17 // Flags -- only one for now. If we need more, they should be defined to
18 // be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.)
19 enum class nsTokenizerFlags
{
21 SeparatorOptional
= 1 << 0,
22 IncludeEmptyTokenAtEnd
= 1 << 1
25 MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(nsTokenizerFlags
)
28 * This parses a SeparatorChar-separated string into tokens.
29 * Whitespace surrounding tokens is not treated as part of tokens, however
30 * whitespace inside a token is. If the final token is the empty string, it is
31 * not returned by default.
33 * Some examples, with SeparatorChar = ',':
35 * "foo, bar, baz" -> "foo" "bar" "baz"
36 * "foo,bar,baz" -> "foo" "bar" "baz"
37 * "foo , bar hi , baz" -> "foo" "bar hi" "baz"
38 * "foo, ,bar,baz" -> "foo" "" "bar" "baz"
39 * "foo,,bar,baz" -> "foo" "" "bar" "baz"
40 * "foo,bar,baz," -> "foo" "bar" "baz"
42 * The function used for whitespace detection is a template argument.
43 * By default, it is NS_IsAsciiWhitespace.
45 template <typename TDependentSubstringType
, bool IsWhitespace(char16_t
),
46 nsTokenizerFlags Flags
= nsTokenizerFlags::Default
>
47 class nsTCharSeparatedTokenizer
{
48 using CharType
= typename
TDependentSubstringType::char_type
;
49 using SubstringType
= typename
TDependentSubstringType::substring_type
;
52 using DependentSubstringType
= TDependentSubstringType
;
54 nsTCharSeparatedTokenizer(const SubstringType
& aSource
,
55 CharType aSeparatorChar
)
56 : mIter(aSource
.Data(), aSource
.Length()),
57 mEnd(aSource
.Data() + aSource
.Length(), aSource
.Data(),
59 mSeparatorChar(aSeparatorChar
),
60 mWhitespaceBeforeFirstToken(false),
61 mWhitespaceAfterCurrentToken(false),
62 mSeparatorAfterCurrentToken(false) {
63 // Skip initial whitespace
64 while (mIter
< mEnd
&& IsWhitespace(*mIter
)) {
65 mWhitespaceBeforeFirstToken
= true;
71 * Checks if any more tokens are available.
73 bool hasMoreTokens() const {
74 MOZ_ASSERT(mIter
== mEnd
|| !IsWhitespace(*mIter
),
75 "Should be at beginning of token if there is one");
77 if constexpr (Flags
& nsTokenizerFlags::IncludeEmptyTokenAtEnd
) {
78 return mIter
< mEnd
|| (mIter
== mEnd
&& mSeparatorAfterCurrentToken
);
85 * Returns true if there is whitespace prior to the first token.
87 bool whitespaceBeforeFirstToken() const {
88 return mWhitespaceBeforeFirstToken
;
92 * Returns true if there is a separator after the current token.
93 * Useful if you want to check whether the last token has a separator
94 * after it which may not be valid.
96 bool separatorAfterCurrentToken() const {
97 return mSeparatorAfterCurrentToken
;
101 * Returns true if there is any whitespace after the current token.
103 bool whitespaceAfterCurrentToken() const {
104 return mWhitespaceAfterCurrentToken
;
108 * Returns the next token.
110 const DependentSubstringType
nextToken() {
111 mozilla::RangedPtr
<const CharType
> tokenStart
= mIter
;
112 mozilla::RangedPtr
<const CharType
> tokenEnd
= mIter
;
114 MOZ_ASSERT(mIter
== mEnd
|| !IsWhitespace(*mIter
),
115 "Should be at beginning of token if there is one");
117 // Search until we hit separator or end (or whitespace, if a separator
118 // isn't required -- see clause with 'break' below).
119 while (mIter
< mEnd
&& *mIter
!= mSeparatorChar
) {
120 // Skip to end of the current word.
121 while (mIter
< mEnd
&& !IsWhitespace(*mIter
) &&
122 *mIter
!= mSeparatorChar
) {
127 // Skip whitespace after the current word.
128 mWhitespaceAfterCurrentToken
= false;
129 while (mIter
< mEnd
&& IsWhitespace(*mIter
)) {
130 mWhitespaceAfterCurrentToken
= true;
133 if constexpr (Flags
& nsTokenizerFlags::SeparatorOptional
) {
134 // We've hit (and skipped) whitespace, and that's sufficient to end
135 // our token, regardless of whether we've reached a SeparatorChar.
137 } // (else, we'll keep looping until we hit mEnd or SeparatorChar)
140 mSeparatorAfterCurrentToken
= (mIter
!= mEnd
&& *mIter
== mSeparatorChar
);
141 MOZ_ASSERT((Flags
& nsTokenizerFlags::SeparatorOptional
) ||
142 (mSeparatorAfterCurrentToken
== (mIter
< mEnd
)),
143 "If we require a separator and haven't hit the end of "
144 "our string, then we shouldn't have left the loop "
145 "unless we hit a separator");
147 // Skip separator (and any whitespace after it), if we're at one.
148 if (mSeparatorAfterCurrentToken
) {
151 while (mIter
< mEnd
&& IsWhitespace(*mIter
)) {
152 mWhitespaceAfterCurrentToken
= true;
157 return Substring(tokenStart
.get(), tokenEnd
.get());
160 auto ToRange() const;
163 mozilla::RangedPtr
<const CharType
> mIter
;
164 const mozilla::RangedPtr
<const CharType
> mEnd
;
165 const CharType mSeparatorChar
;
166 bool mWhitespaceBeforeFirstToken
;
167 bool mWhitespaceAfterCurrentToken
;
168 bool mSeparatorAfterCurrentToken
;
171 constexpr bool NS_TokenizerIgnoreNothing(char16_t
) { return false; }
173 template <bool IsWhitespace(char16_t
), typename CharType
,
174 nsTokenizerFlags Flags
= nsTokenizerFlags::Default
>
175 using nsTCharSeparatedTokenizerTemplate
=
176 nsTCharSeparatedTokenizer
<nsTDependentSubstring
<CharType
>, IsWhitespace
,
179 template <bool IsWhitespace(char16_t
),
180 nsTokenizerFlags Flags
= nsTokenizerFlags::Default
>
181 using nsCharSeparatedTokenizerTemplate
=
182 nsTCharSeparatedTokenizerTemplate
<IsWhitespace
, char16_t
, Flags
>;
184 using nsCharSeparatedTokenizer
=
185 nsCharSeparatedTokenizerTemplate
<NS_IsAsciiWhitespace
>;
187 template <bool IsWhitespace(char16_t
),
188 nsTokenizerFlags Flags
= nsTokenizerFlags::Default
>
189 using nsCCharSeparatedTokenizerTemplate
=
190 nsTCharSeparatedTokenizerTemplate
<IsWhitespace
, char, Flags
>;
192 using nsCCharSeparatedTokenizer
=
193 nsCCharSeparatedTokenizerTemplate
<NS_IsAsciiWhitespace
>;
196 * Adapts a char separated tokenizer for use in a range-based for loop.
198 * Use this typically only indirectly, e.g. like
200 * for (const auto& token : nsCharSeparatedTokenizer(aText, ' ').ToRange()) {
204 template <typename Tokenizer
>
205 class nsTokenizedRange
{
207 using DependentSubstringType
= typename
Tokenizer::DependentSubstringType
;
209 explicit nsTokenizedRange(Tokenizer
&& aTokenizer
)
210 : mTokenizer(std::move(aTokenizer
)) {}
212 struct EndSentinel
{};
214 explicit Iterator(const Tokenizer
& aTokenizer
) : mTokenizer(aTokenizer
) {
218 const DependentSubstringType
& operator*() const { return *mCurrentToken
; }
220 Iterator
& operator++() {
225 bool operator==(const EndSentinel
&) const {
226 return mCurrentToken
.isNothing();
229 bool operator!=(const EndSentinel
&) const { return mCurrentToken
.isSome(); }
233 mCurrentToken
.reset();
235 if (mTokenizer
.hasMoreTokens()) {
236 mCurrentToken
.emplace(mTokenizer
.nextToken());
240 Tokenizer mTokenizer
;
241 mozilla::Maybe
<DependentSubstringType
> mCurrentToken
;
244 auto begin() const { return Iterator
{mTokenizer
}; }
245 auto end() const { return EndSentinel
{}; }
248 const Tokenizer mTokenizer
;
251 template <typename TDependentSubstringType
, bool IsWhitespace(char16_t
),
252 nsTokenizerFlags Flags
>
253 auto nsTCharSeparatedTokenizer
<TDependentSubstringType
, IsWhitespace
,
254 Flags
>::ToRange() const {
255 return nsTokenizedRange
{nsTCharSeparatedTokenizer
{*this}};
258 // You should not need to instantiate this class directly.
259 // Use nsTSubstring::Split instead.
260 template <typename T
>
261 class nsTSubstringSplitter
262 : public nsTokenizedRange
<nsTCharSeparatedTokenizerTemplate
<
263 NS_TokenizerIgnoreNothing
, T
,
264 nsTokenizerFlags::IncludeEmptyTokenAtEnd
>> {
266 using nsTokenizedRange
<nsTCharSeparatedTokenizerTemplate
<
267 NS_TokenizerIgnoreNothing
, T
,
268 nsTokenizerFlags::IncludeEmptyTokenAtEnd
>>::nsTokenizedRange
;
271 extern template class nsTSubstringSplitter
<char>;
272 extern template class nsTSubstringSplitter
<char16_t
>;
274 #endif /* __nsCharSeparatedTokenizer_h */