no bug - Bumping Firefox l10n changesets r=release a=l10n-bump DONTBUILD CLOSED TREE
[gecko.git] / xpcom / ds / nsCharSeparatedTokenizer.h
blob5cf6992e3e8f68638b6de90de2ebaf0ce9f5d275
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #ifndef __nsCharSeparatedTokenizer_h
8 #define __nsCharSeparatedTokenizer_h
10 #include "mozilla/Maybe.h"
11 #include "mozilla/RangedPtr.h"
12 #include "mozilla/TypedEnumBits.h"
14 #include "nsCRTGlue.h"
15 #include "nsTDependentSubstring.h"
17 // Flags -- only one for now. If we need more, they should be defined to
18 // be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.)
19 enum class nsTokenizerFlags {
20 Default = 0,
21 SeparatorOptional = 1 << 0,
22 IncludeEmptyTokenAtEnd = 1 << 1
25 MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(nsTokenizerFlags)
27 /**
28 * This parses a SeparatorChar-separated string into tokens.
29 * Whitespace surrounding tokens is not treated as part of tokens, however
30 * whitespace inside a token is. If the final token is the empty string, it is
31 * not returned by default.
33 * Some examples, with SeparatorChar = ',':
35 * "foo, bar, baz" -> "foo" "bar" "baz"
36 * "foo,bar,baz" -> "foo" "bar" "baz"
37 * "foo , bar hi , baz" -> "foo" "bar hi" "baz"
38 * "foo, ,bar,baz" -> "foo" "" "bar" "baz"
39 * "foo,,bar,baz" -> "foo" "" "bar" "baz"
40 * "foo,bar,baz," -> "foo" "bar" "baz"
42 * The function used for whitespace detection is a template argument.
43 * By default, it is NS_IsAsciiWhitespace.
45 template <typename TDependentSubstringType, bool IsWhitespace(char16_t),
46 nsTokenizerFlags Flags = nsTokenizerFlags::Default>
47 class nsTCharSeparatedTokenizer {
48 using CharType = typename TDependentSubstringType::char_type;
49 using SubstringType = typename TDependentSubstringType::substring_type;
51 public:
52 using DependentSubstringType = TDependentSubstringType;
54 nsTCharSeparatedTokenizer(const SubstringType& aSource,
55 CharType aSeparatorChar)
56 : mIter(aSource.Data(), aSource.Length()),
57 mEnd(aSource.Data() + aSource.Length(), aSource.Data(),
58 aSource.Length()),
59 mSeparatorChar(aSeparatorChar),
60 mWhitespaceBeforeFirstToken(false),
61 mWhitespaceAfterCurrentToken(false),
62 mSeparatorAfterCurrentToken(false) {
63 // Skip initial whitespace
64 while (mIter < mEnd && IsWhitespace(*mIter)) {
65 mWhitespaceBeforeFirstToken = true;
66 ++mIter;
70 /**
71 * Checks if any more tokens are available.
73 bool hasMoreTokens() const {
74 MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),
75 "Should be at beginning of token if there is one");
77 if constexpr (Flags & nsTokenizerFlags::IncludeEmptyTokenAtEnd) {
78 return mIter < mEnd || (mIter == mEnd && mSeparatorAfterCurrentToken);
79 } else {
80 return mIter < mEnd;
85 * Returns true if there is whitespace prior to the first token.
87 bool whitespaceBeforeFirstToken() const {
88 return mWhitespaceBeforeFirstToken;
92 * Returns true if there is a separator after the current token.
93 * Useful if you want to check whether the last token has a separator
94 * after it which may not be valid.
96 bool separatorAfterCurrentToken() const {
97 return mSeparatorAfterCurrentToken;
101 * Returns true if there is any whitespace after the current token.
103 bool whitespaceAfterCurrentToken() const {
104 return mWhitespaceAfterCurrentToken;
108 * Returns the next token.
110 const DependentSubstringType nextToken() {
111 mozilla::RangedPtr<const CharType> tokenStart = mIter;
112 mozilla::RangedPtr<const CharType> tokenEnd = mIter;
114 MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),
115 "Should be at beginning of token if there is one");
117 // Search until we hit separator or end (or whitespace, if a separator
118 // isn't required -- see clause with 'break' below).
119 while (mIter < mEnd && *mIter != mSeparatorChar) {
120 // Skip to end of the current word.
121 while (mIter < mEnd && !IsWhitespace(*mIter) &&
122 *mIter != mSeparatorChar) {
123 ++mIter;
125 tokenEnd = mIter;
127 // Skip whitespace after the current word.
128 mWhitespaceAfterCurrentToken = false;
129 while (mIter < mEnd && IsWhitespace(*mIter)) {
130 mWhitespaceAfterCurrentToken = true;
131 ++mIter;
133 if constexpr (Flags & nsTokenizerFlags::SeparatorOptional) {
134 // We've hit (and skipped) whitespace, and that's sufficient to end
135 // our token, regardless of whether we've reached a SeparatorChar.
136 break;
137 } // (else, we'll keep looping until we hit mEnd or SeparatorChar)
140 mSeparatorAfterCurrentToken = (mIter != mEnd && *mIter == mSeparatorChar);
141 MOZ_ASSERT((Flags & nsTokenizerFlags::SeparatorOptional) ||
142 (mSeparatorAfterCurrentToken == (mIter < mEnd)),
143 "If we require a separator and haven't hit the end of "
144 "our string, then we shouldn't have left the loop "
145 "unless we hit a separator");
147 // Skip separator (and any whitespace after it), if we're at one.
148 if (mSeparatorAfterCurrentToken) {
149 ++mIter;
151 while (mIter < mEnd && IsWhitespace(*mIter)) {
152 mWhitespaceAfterCurrentToken = true;
153 ++mIter;
157 return Substring(tokenStart.get(), tokenEnd.get());
160 auto ToRange() const;
162 private:
163 mozilla::RangedPtr<const CharType> mIter;
164 const mozilla::RangedPtr<const CharType> mEnd;
165 const CharType mSeparatorChar;
166 bool mWhitespaceBeforeFirstToken;
167 bool mWhitespaceAfterCurrentToken;
168 bool mSeparatorAfterCurrentToken;
171 constexpr bool NS_TokenizerIgnoreNothing(char16_t) { return false; }
173 template <bool IsWhitespace(char16_t), typename CharType,
174 nsTokenizerFlags Flags = nsTokenizerFlags::Default>
175 using nsTCharSeparatedTokenizerTemplate =
176 nsTCharSeparatedTokenizer<nsTDependentSubstring<CharType>, IsWhitespace,
177 Flags>;
179 template <bool IsWhitespace(char16_t),
180 nsTokenizerFlags Flags = nsTokenizerFlags::Default>
181 using nsCharSeparatedTokenizerTemplate =
182 nsTCharSeparatedTokenizerTemplate<IsWhitespace, char16_t, Flags>;
184 using nsCharSeparatedTokenizer =
185 nsCharSeparatedTokenizerTemplate<NS_IsAsciiWhitespace>;
187 template <bool IsWhitespace(char16_t),
188 nsTokenizerFlags Flags = nsTokenizerFlags::Default>
189 using nsCCharSeparatedTokenizerTemplate =
190 nsTCharSeparatedTokenizerTemplate<IsWhitespace, char, Flags>;
192 using nsCCharSeparatedTokenizer =
193 nsCCharSeparatedTokenizerTemplate<NS_IsAsciiWhitespace>;
196 * Adapts a char separated tokenizer for use in a range-based for loop.
198 * Use this typically only indirectly, e.g. like
200 * for (const auto& token : nsCharSeparatedTokenizer(aText, ' ').ToRange()) {
201 * // ...
204 template <typename Tokenizer>
205 class nsTokenizedRange {
206 public:
207 using DependentSubstringType = typename Tokenizer::DependentSubstringType;
209 explicit nsTokenizedRange(Tokenizer&& aTokenizer)
210 : mTokenizer(std::move(aTokenizer)) {}
212 struct EndSentinel {};
213 struct Iterator {
214 explicit Iterator(const Tokenizer& aTokenizer) : mTokenizer(aTokenizer) {
215 Next();
218 const DependentSubstringType& operator*() const { return *mCurrentToken; }
220 Iterator& operator++() {
221 Next();
222 return *this;
225 bool operator==(const EndSentinel&) const {
226 return mCurrentToken.isNothing();
229 bool operator!=(const EndSentinel&) const { return mCurrentToken.isSome(); }
231 private:
232 void Next() {
233 mCurrentToken.reset();
235 if (mTokenizer.hasMoreTokens()) {
236 mCurrentToken.emplace(mTokenizer.nextToken());
240 Tokenizer mTokenizer;
241 mozilla::Maybe<DependentSubstringType> mCurrentToken;
244 auto begin() const { return Iterator{mTokenizer}; }
245 auto end() const { return EndSentinel{}; }
247 private:
248 const Tokenizer mTokenizer;
251 template <typename TDependentSubstringType, bool IsWhitespace(char16_t),
252 nsTokenizerFlags Flags>
253 auto nsTCharSeparatedTokenizer<TDependentSubstringType, IsWhitespace,
254 Flags>::ToRange() const {
255 return nsTokenizedRange{nsTCharSeparatedTokenizer{*this}};
258 // You should not need to instantiate this class directly.
259 // Use nsTSubstring::Split instead.
260 template <typename T>
261 class nsTSubstringSplitter
262 : public nsTokenizedRange<nsTCharSeparatedTokenizerTemplate<
263 NS_TokenizerIgnoreNothing, T,
264 nsTokenizerFlags::IncludeEmptyTokenAtEnd>> {
265 public:
266 using nsTokenizedRange<nsTCharSeparatedTokenizerTemplate<
267 NS_TokenizerIgnoreNothing, T,
268 nsTokenizerFlags::IncludeEmptyTokenAtEnd>>::nsTokenizedRange;
271 extern template class nsTSubstringSplitter<char>;
272 extern template class nsTSubstringSplitter<char16_t>;
274 #endif /* __nsCharSeparatedTokenizer_h */