Bug 1700051: part 31.4) Move `mSoftTextValid` to `SoftText`. r=smaug
[gecko.git] / extensions / spellcheck / src / mozInlineSpellWordUtil.h
blob850ad7a6d6591e11e67494c87506d09e8f340dea
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #ifndef mozInlineSpellWordUtil_h
7 #define mozInlineSpellWordUtil_h
9 #include "mozilla/Attributes.h"
10 #include "mozilla/Maybe.h"
11 #include "mozilla/Result.h"
12 #include "mozilla/dom/Document.h"
13 #include "nsCOMPtr.h"
14 #include "nsString.h"
15 #include "nsTArray.h"
17 //#define DEBUG_SPELLCHECK
19 class nsRange;
20 class nsINode;
22 namespace mozilla {
23 class TextEditor;
25 namespace dom {
26 class Document;
28 } // namespace mozilla
30 struct NodeOffset {
31 nsCOMPtr<nsINode> mNode;
32 int32_t mOffset;
34 NodeOffset() : mOffset(0) {}
35 NodeOffset(nsINode* aNode, int32_t aOffset)
36 : mNode(aNode), mOffset(aOffset) {}
38 bool operator==(const NodeOffset& aOther) const {
39 return mNode == aOther.mNode && mOffset == aOther.mOffset;
42 bool operator!=(const NodeOffset& aOther) const { return !(*this == aOther); }
44 nsINode* Node() const { return mNode.get(); }
45 int32_t Offset() const { return mOffset; }
48 class NodeOffsetRange {
49 private:
50 NodeOffset mBegin;
51 NodeOffset mEnd;
53 public:
54 NodeOffsetRange() {}
55 NodeOffsetRange(NodeOffset b, NodeOffset e) : mBegin(b), mEnd(e) {}
57 NodeOffset Begin() const { return mBegin; }
59 NodeOffset End() const { return mEnd; }
62 /**
63 * This class extracts text from the DOM and builds it into a single string.
64 * The string includes whitespace breaks whereever non-inline elements begin
65 * and end. This string is broken into "real words", following somewhat
66 * complex rules; for example substrings that look like URLs or
67 * email addresses are treated as single words, but otherwise many kinds of
68 * punctuation are treated as word separators. GetNextWord provides a way
69 * to iterate over these "real words".
71 * The basic operation is:
73 * 1. Call Init with the editor that you're using.
74 * 2. Call SetPositionAndEnd to to initialize the current position inside the
75 * previously given range and set where you want to stop spellchecking.
76 * We'll stop at the word boundary after that. If SetEnd is not called,
77 * we'll stop at the end of the root element.
78 * 3. Call GetNextWord over and over until it returns false.
81 class MOZ_STACK_CLASS mozInlineSpellWordUtil {
82 public:
83 static mozilla::Maybe<mozInlineSpellWordUtil> Create(
84 const mozilla::TextEditor& aTextEditor);
86 // sets the current position, this should be inside the range. If we are in
87 // the middle of a word, we'll move to its start.
88 nsresult SetPositionAndEnd(nsINode* aPositionNode, int32_t aPositionOffset,
89 nsINode* aEndNode, int32_t aEndOffset);
91 // Given a point inside or immediately following a word, this returns the
92 // DOM range that exactly encloses that word's characters. The current
93 // position will be at the end of the word. This will find the previous
94 // word if the current position is space, so if you care that the point is
95 // inside the word, you should check the range.
97 // THIS CHANGES THE CURRENT POSITION AND RANGE. It is designed to be called
98 // before you actually generate the range you are interested in and iterate
99 // the words in it.
100 nsresult GetRangeForWord(nsINode* aWordNode, int32_t aWordOffset,
101 nsRange** aRange);
103 // Convenience functions, object must be initialized
104 nsresult MakeRange(NodeOffset aBegin, NodeOffset aEnd,
105 nsRange** aRange) const;
106 static already_AddRefed<nsRange> MakeRange(const NodeOffsetRange& aRange);
108 // Moves to the the next word in the range, and retrieves it's text and range.
109 // false is returned when we are done checking.
110 // aSkipChecking will be set if the word is "special" and shouldn't be
111 // checked (e.g., an email address).
112 bool GetNextWord(nsAString& aText, NodeOffsetRange* aNodeOffsetRange,
113 bool* aSkipChecking);
115 // Call to normalize some punctuation. This function takes an autostring
116 // so we can access characters directly.
117 static void NormalizeWord(nsAString& aWord);
119 mozilla::dom::Document* GetDocument() const { return mDocument; }
120 const nsINode* GetRootNode() const { return mRootNode; }
122 private:
123 struct SoftText {
124 // DOM text covering the soft range, with newlines added at block boundaries
125 nsString mValue;
127 NodeOffset mBegin = NodeOffset(nullptr, 0);
128 NodeOffset mEnd = NodeOffset(nullptr, 0);
130 bool mIsValid = false;
133 SoftText mSoftText;
135 mozInlineSpellWordUtil(mozilla::dom::Document& aDocument,
136 bool aIsContentEditableOrDesignMode, nsINode& aRootNode
139 : mDocument(&aDocument),
140 mIsContentEditableOrDesignMode(aIsContentEditableOrDesignMode),
141 mRootNode(&aRootNode),
142 mNextWordIndex(-1) {}
144 // cached stuff for the editor
145 const RefPtr<mozilla::dom::Document> mDocument;
146 const bool mIsContentEditableOrDesignMode;
148 // range to check, see SetPosition and SetEnd
149 const nsINode* mRootNode;
151 // A list of where we extracted text from, ordered by mSoftTextOffset. A given
152 // DOM node appears at most once in this list.
153 struct DOMTextMapping {
154 NodeOffset mNodeOffset;
155 int32_t mSoftTextOffset;
156 int32_t mLength;
158 DOMTextMapping(NodeOffset aNodeOffset, int32_t aSoftTextOffset,
159 int32_t aLength)
160 : mNodeOffset(aNodeOffset),
161 mSoftTextOffset(aSoftTextOffset),
162 mLength(aLength) {}
164 nsTArray<DOMTextMapping> mSoftTextDOMMapping;
166 // A list of the "real words" in mSoftText.mValue, ordered by mSoftTextOffset
167 struct RealWord {
168 int32_t mSoftTextOffset;
169 uint32_t mLength : 31;
170 uint32_t mCheckableWord : 1;
172 RealWord(int32_t aOffset, uint32_t aLength, bool aCheckable)
173 : mSoftTextOffset(aOffset),
174 mLength(aLength),
175 mCheckableWord(aCheckable) {
176 static_assert(sizeof(RealWord) == 8,
177 "RealWord should be limited to 8 bytes");
178 MOZ_ASSERT(aLength < INT32_MAX,
179 "Word length is too large to fit in the bitfield");
182 int32_t EndOffset() const { return mSoftTextOffset + mLength; }
184 using RealWords = nsTArray<RealWord>;
185 RealWords mRealWords;
186 int32_t mNextWordIndex;
188 void InvalidateWords() { mSoftText.mIsValid = false; }
189 nsresult EnsureWords();
191 int32_t MapDOMPositionToSoftTextOffset(NodeOffset aNodeOffset) const;
192 // Map an offset into mSoftText.mValue to a DOM position. Note that two DOM
193 // positions can map to the same mSoftText.mValue offset, e.g. given nodes
194 // A=aaaa and B=bbbb forming aaaabbbb, (A,4) and (B,0) give the same string
195 // offset. So, aHintBefore controls which position we return ... if aHint is
196 // eEnd then the position indicates the END of a range so we return (A,4).
197 // Otherwise the position indicates the START of a range so we return (B,0).
198 enum DOMMapHint { HINT_BEGIN, HINT_END };
199 NodeOffset MapSoftTextOffsetToDOMPosition(int32_t aSoftTextOffset,
200 DOMMapHint aHint) const;
202 static void ToString(DOMMapHint aHint, nsACString& aResult);
204 // Finds the index of the real word containing aSoftTextOffset, or -1 if none
205 // If it's exactly between two words, then if aHint is HINT_BEGIN, return the
206 // later word (favouring the assumption that it's the BEGINning of a word),
207 // otherwise return the earlier word (assuming it's the END of a word).
208 // If aSearchForward is true, then if we don't find a word at the given
209 // position, search forward until we do find a word and return that (if
210 // found).
211 int32_t FindRealWordContaining(int32_t aSoftTextOffset, DOMMapHint aHint,
212 bool aSearchForward) const;
214 // build mSoftText.mValue and mSoftTextDOMMapping and adjust mSoftText.mBegin.
215 void AdjustSoftBeginAndBuildSoftText();
217 mozilla::Result<RealWords, nsresult> BuildRealWords() const;
219 nsresult SplitDOMWordAndAppendTo(int32_t aStart, int32_t aEnd,
220 nsTArray<RealWord>& aRealWords) const;
222 nsresult MakeRangeForWord(const RealWord& aWord, nsRange** aRange) const;
223 void MakeNodeOffsetRangeForWord(const RealWord& aWord,
224 NodeOffsetRange* aNodeOffsetRange);
227 #endif