Bumping manifests a=b2g-bump
[gecko.git] / xpcom / ds / nsCharSeparatedTokenizer.h
blob6285770fb70b1691ee6d3af0dc96fe9b11c46fd9
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #ifndef __nsCharSeparatedTokenizer_h
7 #define __nsCharSeparatedTokenizer_h
9 #include "mozilla/RangedPtr.h"
11 #include "nsDependentSubstring.h"
12 #include "nsCRT.h"
14 /**
15 * This parses a SeparatorChar-separated string into tokens.
16 * Whitespace surrounding tokens is not treated as part of tokens, however
17 * whitespace inside a token is. If the final token is the empty string, it is
18 * not returned.
20 * Some examples, with SeparatorChar = ',':
22 * "foo, bar, baz" -> "foo" "bar" "baz"
23 * "foo,bar,baz" -> "foo" "bar" "baz"
24 * "foo , bar hi , baz" -> "foo" "bar hi" "baz"
25 * "foo, ,bar,baz" -> "foo" "" "bar" "baz"
26 * "foo,,bar,baz" -> "foo" "" "bar" "baz"
27 * "foo,bar,baz," -> "foo" "bar" "baz"
29 * The function used for whitespace detection is a template argument.
30 * By default, it is NS_IsAsciiWhitespace.
32 template<typename DependentSubstringType, bool IsWhitespace(char16_t)>
33 class nsTCharSeparatedTokenizer
35 typedef typename DependentSubstringType::char_type CharType;
36 typedef typename DependentSubstringType::substring_type SubstringType;
38 public:
39 // Flags -- only one for now. If we need more, they should be defined to
40 // be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.)
41 enum
43 SEPARATOR_OPTIONAL = 1
46 nsTCharSeparatedTokenizer(const SubstringType& aSource,
47 CharType aSeparatorChar,
48 uint32_t aFlags = 0)
49 : mIter(aSource.Data(), aSource.Length())
50 , mEnd(aSource.Data() + aSource.Length(), aSource.Data(),
51 aSource.Length())
52 , mSeparatorChar(aSeparatorChar)
53 , mWhitespaceBeforeFirstToken(false)
54 , mWhitespaceAfterCurrentToken(false)
55 , mSeparatorAfterCurrentToken(false)
56 , mSeparatorOptional(aFlags & SEPARATOR_OPTIONAL)
58 // Skip initial whitespace
59 while (mIter < mEnd && IsWhitespace(*mIter)) {
60 mWhitespaceBeforeFirstToken = true;
61 ++mIter;
65 /**
66 * Checks if any more tokens are available.
68 bool hasMoreTokens() const
70 MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),
71 "Should be at beginning of token if there is one");
73 return mIter < mEnd;
77 * Returns true if there is whitespace prior to the first token.
79 bool whitespaceBeforeFirstToken() const
81 return mWhitespaceBeforeFirstToken;
85 * Returns true if there is a separator after the current token.
86 * Useful if you want to check whether the last token has a separator
87 * after it which may not be valid.
89 bool separatorAfterCurrentToken() const
91 return mSeparatorAfterCurrentToken;
95 * Returns true if there is any whitespace after the current token.
97 bool whitespaceAfterCurrentToken() const
99 return mWhitespaceAfterCurrentToken;
103 * Returns the next token.
105 const DependentSubstringType nextToken()
107 mozilla::RangedPtr<const CharType> tokenStart = mIter;
108 mozilla::RangedPtr<const CharType> tokenEnd = mIter;
110 MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),
111 "Should be at beginning of token if there is one");
113 // Search until we hit separator or end (or whitespace, if a separator
114 // isn't required -- see clause with 'break' below).
115 while (mIter < mEnd && *mIter != mSeparatorChar) {
116 // Skip to end of the current word.
117 while (mIter < mEnd &&
118 !IsWhitespace(*mIter) && *mIter != mSeparatorChar) {
119 ++mIter;
121 tokenEnd = mIter;
123 // Skip whitespace after the current word.
124 mWhitespaceAfterCurrentToken = false;
125 while (mIter < mEnd && IsWhitespace(*mIter)) {
126 mWhitespaceAfterCurrentToken = true;
127 ++mIter;
129 if (mSeparatorOptional) {
130 // We've hit (and skipped) whitespace, and that's sufficient to end
131 // our token, regardless of whether we've reached a SeparatorChar.
132 break;
133 } // (else, we'll keep looping until we hit mEnd or SeparatorChar)
136 mSeparatorAfterCurrentToken = (mIter != mEnd &&
137 *mIter == mSeparatorChar);
138 MOZ_ASSERT(mSeparatorOptional ||
139 (mSeparatorAfterCurrentToken == (mIter < mEnd)),
140 "If we require a separator and haven't hit the end of "
141 "our string, then we shouldn't have left the loop "
142 "unless we hit a separator");
144 // Skip separator (and any whitespace after it), if we're at one.
145 if (mSeparatorAfterCurrentToken) {
146 ++mIter;
148 while (mIter < mEnd && IsWhitespace(*mIter)) {
149 mWhitespaceAfterCurrentToken = true;
150 ++mIter;
154 return Substring(tokenStart.get(), tokenEnd.get());
157 private:
158 mozilla::RangedPtr<const CharType> mIter;
159 const mozilla::RangedPtr<const CharType> mEnd;
160 CharType mSeparatorChar;
161 bool mWhitespaceBeforeFirstToken;
162 bool mWhitespaceAfterCurrentToken;
163 bool mSeparatorAfterCurrentToken;
164 bool mSeparatorOptional;
167 template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace>
168 class nsCharSeparatedTokenizerTemplate
169 : public nsTCharSeparatedTokenizer<nsDependentSubstring, IsWhitespace>
171 public:
172 nsCharSeparatedTokenizerTemplate(const nsSubstring& aSource,
173 char16_t aSeparatorChar,
174 uint32_t aFlags = 0)
175 : nsTCharSeparatedTokenizer<nsDependentSubstring,
176 IsWhitespace>(aSource, aSeparatorChar, aFlags)
181 typedef nsCharSeparatedTokenizerTemplate<> nsCharSeparatedTokenizer;
183 template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace>
184 class nsCCharSeparatedTokenizerTemplate
185 : public nsTCharSeparatedTokenizer<nsDependentCSubstring, IsWhitespace>
187 public:
188 nsCCharSeparatedTokenizerTemplate(const nsCSubstring& aSource,
189 char aSeparatorChar,
190 uint32_t aFlags = 0)
191 : nsTCharSeparatedTokenizer<nsDependentCSubstring,
192 IsWhitespace>(aSource, aSeparatorChar, aFlags)
197 typedef nsCCharSeparatedTokenizerTemplate<> nsCCharSeparatedTokenizer;
199 #endif /* __nsCharSeparatedTokenizer_h */