xpcom/ds/nsCharSeparatedTokenizer.h

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /* This Source Code Form is subject to the terms of the Mozilla Public
   3  * License, v. 2.0. If a copy of the MPL was not distributed with this
   4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   5
   6 #ifndef __nsCharSeparatedTokenizer_h
   7 #define __nsCharSeparatedTokenizer_h
   8
   9 #include "mozilla/RangedPtr.h"
  10
  11 #include "nsDependentSubstring.h"
  12 #include "nsCRT.h"
  13
  14 /**
  15  * This parses a SeparatorChar-separated string into tokens.
  16  * Whitespace surrounding tokens is not treated as part of tokens, however
  17  * whitespace inside a token is. If the final token is the empty string, it is
  18  * not returned.
  19  *
  20  * Some examples, with SeparatorChar = ',':
  21  *
  22  * "foo, bar, baz" ->      "foo" "bar" "baz"
  23  * "foo,bar,baz" ->        "foo" "bar" "baz"
  24  * "foo , bar hi , baz" -> "foo" "bar hi" "baz"
  25  * "foo, ,bar,baz" ->      "foo" "" "bar" "baz"
  26  * "foo,,bar,baz" ->       "foo" "" "bar" "baz"
  27  * "foo,bar,baz," ->       "foo" "bar" "baz"
  28  *
  29  * The function used for whitespace detection is a template argument.
  30  * By default, it is NS_IsAsciiWhitespace.
  31  */
  32 template<typename DependentSubstringType, bool IsWhitespace(char16_t)>
  33 class nsTCharSeparatedTokenizer
  34 {
  35   typedef typename DependentSubstringType::char_type CharType;
  36   typedef typename DependentSubstringType::substring_type SubstringType;
  37
  38 public:
  39   // Flags -- only one for now. If we need more, they should be defined to
  40   // be 1 << 1, 1 << 2, etc. (They're masks, and aFlags is a bitfield.)
  41   enum
  42   {
  43     SEPARATOR_OPTIONAL = 1
  44   };
  45
  46   nsTCharSeparatedTokenizer(const SubstringType& aSource,
  47                             CharType aSeparatorChar,
  48                             uint32_t aFlags = 0)
  49     : mIter(aSource.Data(), aSource.Length())
  50     , mEnd(aSource.Data() + aSource.Length(), aSource.Data(),
  51            aSource.Length())
  52     , mSeparatorChar(aSeparatorChar)
  53     , mWhitespaceBeforeFirstToken(false)
  54     , mWhitespaceAfterCurrentToken(false)
  55     , mSeparatorAfterCurrentToken(false)
  56     , mSeparatorOptional(aFlags & SEPARATOR_OPTIONAL)
  57   {
  58     // Skip initial whitespace
  59     while (mIter < mEnd && IsWhitespace(*mIter)) {
  60       mWhitespaceBeforeFirstToken = true;
  61       ++mIter;
  62     }
  63   }
  64
  65   /**
  66    * Checks if any more tokens are available.
  67    */
  68   bool hasMoreTokens() const
  69   {
  70     MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),
  71                "Should be at beginning of token if there is one");
  72
  73     return mIter < mEnd;
  74   }
  75
  76   /*
  77    * Returns true if there is whitespace prior to the first token.
  78    */
  79   bool whitespaceBeforeFirstToken() const
  80   {
  81     return mWhitespaceBeforeFirstToken;
  82   }
  83
  84   /*
  85    * Returns true if there is a separator after the current token.
  86    * Useful if you want to check whether the last token has a separator
  87    * after it which may not be valid.
  88    */
  89   bool separatorAfterCurrentToken() const
  90   {
  91     return mSeparatorAfterCurrentToken;
  92   }
  93
  94   /*
  95    * Returns true if there is any whitespace after the current token.
  96    */
  97   bool whitespaceAfterCurrentToken() const
  98   {
  99     return mWhitespaceAfterCurrentToken;
 100   }
 101
 102   /**
 103    * Returns the next token.
 104    */
 105   const DependentSubstringType nextToken()
 106   {
 107     mozilla::RangedPtr<const CharType> tokenStart = mIter;
 108     mozilla::RangedPtr<const CharType> tokenEnd = mIter;
 109
 110     MOZ_ASSERT(mIter == mEnd || !IsWhitespace(*mIter),
 111                "Should be at beginning of token if there is one");
 112
 113     // Search until we hit separator or end (or whitespace, if a separator
 114     // isn't required -- see clause with 'break' below).
 115     while (mIter < mEnd && *mIter != mSeparatorChar) {
 116       // Skip to end of the current word.
 117       while (mIter < mEnd &&
 118              !IsWhitespace(*mIter) && *mIter != mSeparatorChar) {
 119         ++mIter;
 120       }
 121       tokenEnd = mIter;
 122
 123       // Skip whitespace after the current word.
 124       mWhitespaceAfterCurrentToken = false;
 125       while (mIter < mEnd && IsWhitespace(*mIter)) {
 126         mWhitespaceAfterCurrentToken = true;
 127         ++mIter;
 128       }
 129       if (mSeparatorOptional) {
 130         // We've hit (and skipped) whitespace, and that's sufficient to end
 131         // our token, regardless of whether we've reached a SeparatorChar.
 132         break;
 133       } // (else, we'll keep looping until we hit mEnd or SeparatorChar)
 134     }
 135
 136     mSeparatorAfterCurrentToken = (mIter != mEnd &&
 137                                    *mIter == mSeparatorChar);
 138     MOZ_ASSERT(mSeparatorOptional ||
 139                (mSeparatorAfterCurrentToken == (mIter < mEnd)),
 140                "If we require a separator and haven't hit the end of "
 141                "our string, then we shouldn't have left the loop "
 142                "unless we hit a separator");
 143
 144     // Skip separator (and any whitespace after it), if we're at one.
 145     if (mSeparatorAfterCurrentToken) {
 146       ++mIter;
 147
 148       while (mIter < mEnd && IsWhitespace(*mIter)) {
 149         mWhitespaceAfterCurrentToken = true;
 150         ++mIter;
 151       }
 152     }
 153
 154     return Substring(tokenStart.get(), tokenEnd.get());
 155   }
 156
 157 private:
 158   mozilla::RangedPtr<const CharType> mIter;
 159   const mozilla::RangedPtr<const CharType> mEnd;
 160   CharType mSeparatorChar;
 161   bool mWhitespaceBeforeFirstToken;
 162   bool mWhitespaceAfterCurrentToken;
 163   bool mSeparatorAfterCurrentToken;
 164   bool mSeparatorOptional;
 165 };
 166
 167 template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace>
 168 class nsCharSeparatedTokenizerTemplate
 169   : public nsTCharSeparatedTokenizer<nsDependentSubstring, IsWhitespace>
 170 {
 171 public:
 172   nsCharSeparatedTokenizerTemplate(const nsSubstring& aSource,
 173                                    char16_t aSeparatorChar,
 174                                    uint32_t aFlags = 0)
 175     : nsTCharSeparatedTokenizer<nsDependentSubstring,
 176                                 IsWhitespace>(aSource, aSeparatorChar, aFlags)
 177   {
 178   }
 179 };
 180
 181 typedef nsCharSeparatedTokenizerTemplate<> nsCharSeparatedTokenizer;
 182
 183 template<bool IsWhitespace(char16_t) = NS_IsAsciiWhitespace>
 184 class nsCCharSeparatedTokenizerTemplate
 185   : public nsTCharSeparatedTokenizer<nsDependentCSubstring, IsWhitespace>
 186 {
 187 public:
 188   nsCCharSeparatedTokenizerTemplate(const nsCSubstring& aSource,
 189                                     char aSeparatorChar,
 190                                     uint32_t aFlags = 0)
 191     : nsTCharSeparatedTokenizer<nsDependentCSubstring,
 192                                 IsWhitespace>(aSource, aSeparatorChar, aFlags)
 193   {
 194   }
 195 };
 196
 197 typedef nsCCharSeparatedTokenizerTemplate<> nsCCharSeparatedTokenizer;
 198
 199 #endif /* __nsCharSeparatedTokenizer_h */