extensions/spellcheck/src/mozInlineSpellWordUtil.h

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* This Source Code Form is subject to the terms of the Mozilla Public
   3  * License, v. 2.0. If a copy of the MPL was not distributed with this
   4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   5
   6 #ifndef mozInlineSpellWordUtil_h
   7 #define mozInlineSpellWordUtil_h
   8
   9 #include "mozilla/Attributes.h"
  10 #include "mozilla/Maybe.h"
  11 #include "mozilla/Result.h"
  12 #include "mozilla/dom/Document.h"
  13 #include "nsCOMPtr.h"
  14 #include "nsString.h"
  15 #include "nsTArray.h"
  16
  17 //#define DEBUG_SPELLCHECK
  18
  19 class nsRange;
  20 class nsINode;
  21
  22 namespace mozilla {
  23 class TextEditor;
  24
  25 namespace dom {
  26 class Document;
  27 }
  28 }  // namespace mozilla
  29
  30 struct NodeOffset {
  31   nsCOMPtr<nsINode> mNode;
  32   int32_t mOffset;
  33
  34   NodeOffset() : mOffset(0) {}
  35   NodeOffset(nsINode* aNode, int32_t aOffset)
  36       : mNode(aNode), mOffset(aOffset) {}
  37
  38   bool operator==(const NodeOffset& aOther) const {
  39     return mNode == aOther.mNode && mOffset == aOther.mOffset;
  40   }
  41
  42   bool operator!=(const NodeOffset& aOther) const { return !(*this == aOther); }
  43
  44   nsINode* Node() const { return mNode.get(); }
  45   int32_t Offset() const { return mOffset; }
  46 };
  47
  48 class NodeOffsetRange {
  49  private:
  50   NodeOffset mBegin;
  51   NodeOffset mEnd;
  52
  53  public:
  54   NodeOffsetRange() {}
  55   NodeOffsetRange(NodeOffset b, NodeOffset e) : mBegin(b), mEnd(e) {}
  56
  57   NodeOffset Begin() const { return mBegin; }
  58
  59   NodeOffset End() const { return mEnd; }
  60 };
  61
  62 /**
  63  *    This class extracts text from the DOM and builds it into a single string.
  64  *    The string includes whitespace breaks whereever non-inline elements begin
  65  *    and end. This string is broken into "real words", following somewhat
  66  *    complex rules; for example substrings that look like URLs or
  67  *    email addresses are treated as single words, but otherwise many kinds of
  68  *    punctuation are treated as word separators. GetNextWord provides a way
  69  *    to iterate over these "real words".
  70  *
  71  *    The basic operation is:
  72  *
  73  *    1. Call Init with the editor that you're using.
  74  *    2. Call SetPositionAndEnd to to initialize the current position inside the
  75  *       previously given range and set where you want to stop spellchecking.
  76  *       We'll stop at the word boundary after that. If SetEnd is not called,
  77  *       we'll stop at the end of the root element.
  78  *    3. Call GetNextWord over and over until it returns false.
  79  */
  80
  81 class MOZ_STACK_CLASS mozInlineSpellWordUtil {
  82  public:
  83   static mozilla::Maybe<mozInlineSpellWordUtil> Create(
  84       const mozilla::TextEditor& aTextEditor);
  85
  86   // sets the current position, this should be inside the range. If we are in
  87   // the middle of a word, we'll move to its start.
  88   nsresult SetPositionAndEnd(nsINode* aPositionNode, int32_t aPositionOffset,
  89                              nsINode* aEndNode, int32_t aEndOffset);
  90
  91   // Given a point inside or immediately following a word, this returns the
  92   // DOM range that exactly encloses that word's characters. The current
  93   // position will be at the end of the word. This will find the previous
  94   // word if the current position is space, so if you care that the point is
  95   // inside the word, you should check the range.
  96   //
  97   // THIS CHANGES THE CURRENT POSITION AND RANGE. It is designed to be called
  98   // before you actually generate the range you are interested in and iterate
  99   // the words in it.
 100   nsresult GetRangeForWord(nsINode* aWordNode, int32_t aWordOffset,
 101                            nsRange** aRange);
 102
 103   // Convenience functions, object must be initialized
 104   nsresult MakeRange(NodeOffset aBegin, NodeOffset aEnd,
 105                      nsRange** aRange) const;
 106   static already_AddRefed<nsRange> MakeRange(const NodeOffsetRange& aRange);
 107
 108   // Moves to the the next word in the range, and retrieves it's text and range.
 109   // false is returned when we are done checking.
 110   // aSkipChecking will be set if the word is "special" and shouldn't be
 111   // checked (e.g., an email address).
 112   bool GetNextWord(nsAString& aText, NodeOffsetRange* aNodeOffsetRange,
 113                    bool* aSkipChecking);
 114
 115   // Call to normalize some punctuation. This function takes an autostring
 116   // so we can access characters directly.
 117   static void NormalizeWord(nsAString& aWord);
 118
 119   mozilla::dom::Document* GetDocument() const { return mDocument; }
 120   const nsINode* GetRootNode() const { return mRootNode; }
 121
 122  private:
 123   struct SoftText {
 124     // DOM text covering the soft range, with newlines added at block boundaries
 125     nsString mValue;
 126
 127     NodeOffset mBegin = NodeOffset(nullptr, 0);
 128   };
 129
 130   SoftText mSoftText;
 131
 132   mozInlineSpellWordUtil(mozilla::dom::Document& aDocument,
 133                          bool aIsContentEditableOrDesignMode, nsINode& aRootNode
 134
 135                          )
 136       : mDocument(&aDocument),
 137         mIsContentEditableOrDesignMode(aIsContentEditableOrDesignMode),
 138         mRootNode(&aRootNode),
 139         mSoftEnd(nullptr, 0),
 140         mNextWordIndex(-1),
 141         mSoftTextValid(false) {}
 142
 143   // cached stuff for the editor
 144   const RefPtr<mozilla::dom::Document> mDocument;
 145   const bool mIsContentEditableOrDesignMode;
 146
 147   // range to check, see SetPosition and SetEnd
 148   const nsINode* mRootNode;
 149   NodeOffset mSoftEnd;
 150
 151   // A list of where we extracted text from, ordered by mSoftTextOffset. A given
 152   // DOM node appears at most once in this list.
 153   struct DOMTextMapping {
 154     NodeOffset mNodeOffset;
 155     int32_t mSoftTextOffset;
 156     int32_t mLength;
 157
 158     DOMTextMapping(NodeOffset aNodeOffset, int32_t aSoftTextOffset,
 159                    int32_t aLength)
 160         : mNodeOffset(aNodeOffset),
 161           mSoftTextOffset(aSoftTextOffset),
 162           mLength(aLength) {}
 163   };
 164   nsTArray<DOMTextMapping> mSoftTextDOMMapping;
 165
 166   // A list of the "real words" in mSoftText.mValue, ordered by mSoftTextOffset
 167   struct RealWord {
 168     int32_t mSoftTextOffset;
 169     uint32_t mLength : 31;
 170     uint32_t mCheckableWord : 1;
 171
 172     RealWord(int32_t aOffset, uint32_t aLength, bool aCheckable)
 173         : mSoftTextOffset(aOffset),
 174           mLength(aLength),
 175           mCheckableWord(aCheckable) {
 176       static_assert(sizeof(RealWord) == 8,
 177                     "RealWord should be limited to 8 bytes");
 178       MOZ_ASSERT(aLength < INT32_MAX,
 179                  "Word length is too large to fit in the bitfield");
 180     }
 181
 182     int32_t EndOffset() const { return mSoftTextOffset + mLength; }
 183   };
 184   using RealWords = nsTArray<RealWord>;
 185   RealWords mRealWords;
 186   int32_t mNextWordIndex;
 187
 188   bool mSoftTextValid;
 189
 190   void InvalidateWords() { mSoftTextValid = false; }
 191   nsresult EnsureWords();
 192
 193   int32_t MapDOMPositionToSoftTextOffset(NodeOffset aNodeOffset) const;
 194   // Map an offset into mSoftText.mValue to a DOM position. Note that two DOM
 195   // positions can map to the same mSoftText.mValue offset, e.g. given nodes
 196   // A=aaaa and B=bbbb forming aaaabbbb, (A,4) and (B,0) give the same string
 197   // offset. So, aHintBefore controls which position we return ... if aHint is
 198   // eEnd then the position indicates the END of a range so we return (A,4).
 199   // Otherwise the position indicates the START of a range so we return (B,0).
 200   enum DOMMapHint { HINT_BEGIN, HINT_END };
 201   NodeOffset MapSoftTextOffsetToDOMPosition(int32_t aSoftTextOffset,
 202                                             DOMMapHint aHint) const;
 203
 204   static void ToString(DOMMapHint aHint, nsACString& aResult);
 205
 206   // Finds the index of the real word containing aSoftTextOffset, or -1 if none
 207   // If it's exactly between two words, then if aHint is HINT_BEGIN, return the
 208   // later word (favouring the assumption that it's the BEGINning of a word),
 209   // otherwise return the earlier word (assuming it's the END of a word).
 210   // If aSearchForward is true, then if we don't find a word at the given
 211   // position, search forward until we do find a word and return that (if
 212   // found).
 213   int32_t FindRealWordContaining(int32_t aSoftTextOffset, DOMMapHint aHint,
 214                                  bool aSearchForward) const;
 215
 216   // build mSoftText.mValue and mSoftTextDOMMapping and adjust mSoftText.mBegin.
 217   void AdjustSoftBeginAndBuildSoftText();
 218
 219   mozilla::Result<RealWords, nsresult> BuildRealWords() const;
 220
 221   nsresult SplitDOMWordAndAppendTo(int32_t aStart, int32_t aEnd,
 222                                    nsTArray<RealWord>& aRealWords) const;
 223
 224   nsresult MakeRangeForWord(const RealWord& aWord, nsRange** aRange) const;
 225   void MakeNodeOffsetRangeForWord(const RealWord& aWord,
 226                                   NodeOffsetRange* aNodeOffsetRange);
 227 };
 228
 229 #endif