Bug 1888590 - Mark some subtests on trusted-types-event-handlers.html as failing...
[gecko.git] / extensions / spellcheck / src / mozInlineSpellWordUtil.h
blob2a1b4b912e82145baf86b77831435a2011e479b2
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #ifndef mozInlineSpellWordUtil_h
7 #define mozInlineSpellWordUtil_h
9 #include <utility>
11 #include "mozilla/Attributes.h"
12 #include "mozilla/Maybe.h"
13 #include "mozilla/RangeBoundary.h"
14 #include "mozilla/Result.h"
15 #include "mozilla/dom/Document.h"
16 #include "nsCOMPtr.h"
17 #include "nsString.h"
18 #include "nsTArray.h"
20 // #define DEBUG_SPELLCHECK
22 class nsRange;
23 class nsINode;
25 namespace mozilla {
26 class EditorBase;
28 namespace dom {
29 class Document;
31 } // namespace mozilla
33 struct NodeOffset {
34 nsCOMPtr<nsINode> mNode;
35 int32_t mOffset;
37 NodeOffset() : mOffset(0) {}
38 NodeOffset(nsINode* aNode, int32_t aOffset)
39 : mNode(aNode), mOffset(aOffset) {}
41 bool operator==(const NodeOffset& aOther) const {
42 return mNode == aOther.mNode && mOffset == aOther.mOffset;
45 bool operator==(const mozilla::RangeBoundary& aRangeBoundary) const;
47 bool operator!=(const NodeOffset& aOther) const { return !(*this == aOther); }
49 nsINode* Node() const { return mNode.get(); }
50 int32_t Offset() const { return mOffset; }
53 class NodeOffsetRange {
54 private:
55 NodeOffset mBegin;
56 NodeOffset mEnd;
58 public:
59 NodeOffsetRange() {}
60 NodeOffsetRange(NodeOffset b, NodeOffset e)
61 : mBegin(std::move(b)), mEnd(std::move(e)) {}
63 bool operator==(const nsRange& aRange) const;
65 const NodeOffset& Begin() const { return mBegin; }
67 const NodeOffset& End() const { return mEnd; }
70 /**
71 * This class extracts text from the DOM and builds it into a single string.
72 * The string includes whitespace breaks whereever non-inline elements begin
73 * and end. This string is broken into "real words", following somewhat
74 * complex rules; for example substrings that look like URLs or
75 * email addresses are treated as single words, but otherwise many kinds of
76 * punctuation are treated as word separators. GetNextWord provides a way
77 * to iterate over these "real words".
79 * The basic operation is:
81 * 1. Call Init with the editor that you're using.
82 * 2. Call SetPositionAndEnd to to initialize the current position inside the
83 * previously given range and set where you want to stop spellchecking.
84 * We'll stop at the word boundary after that. If SetEnd is not called,
85 * we'll stop at the end of the root element.
86 * 3. Call GetNextWord over and over until it returns false.
89 class MOZ_STACK_CLASS mozInlineSpellWordUtil {
90 public:
91 static mozilla::Maybe<mozInlineSpellWordUtil> Create(
92 const mozilla::EditorBase& aEditorBase);
94 // sets the current position, this should be inside the range. If we are in
95 // the middle of a word, we'll move to its start.
96 nsresult SetPositionAndEnd(nsINode* aPositionNode, int32_t aPositionOffset,
97 nsINode* aEndNode, int32_t aEndOffset);
99 // Given a point inside or immediately following a word, this returns the
100 // DOM range that exactly encloses that word's characters. The current
101 // position will be at the end of the word. This will find the previous
102 // word if the current position is space, so if you care that the point is
103 // inside the word, you should check the range.
105 // THIS CHANGES THE CURRENT POSITION AND RANGE. It is designed to be called
106 // before you actually generate the range you are interested in and iterate
107 // the words in it.
108 nsresult GetRangeForWord(nsINode* aWordNode, int32_t aWordOffset,
109 nsRange** aRange);
111 // Convenience functions, object must be initialized
112 nsresult MakeRange(NodeOffset aBegin, NodeOffset aEnd,
113 nsRange** aRange) const;
114 static already_AddRefed<nsRange> MakeRange(const NodeOffsetRange& aRange);
116 struct Word {
117 nsAutoString mText;
118 NodeOffsetRange mNodeOffsetRange;
119 bool mSkipChecking = false;
122 // Moves to the the next word in the range, and retrieves it's text and range.
123 // `false` is returned when we are done checking.
124 // mSkipChecking will be set if the word is "special" and shouldn't be
125 // checked (e.g., an email address).
126 bool GetNextWord(Word& aWord);
128 // Call to normalize some punctuation. This function takes an autostring
129 // so we can access characters directly.
130 static void NormalizeWord(nsAString& aWord);
132 mozilla::dom::Document* GetDocument() const { return mDocument; }
133 const nsINode* GetRootNode() const { return mRootNode; }
135 private:
136 // A list of where we extracted text from, ordered by mSoftTextOffset. A given
137 // DOM node appears at most once in this list.
138 struct DOMTextMapping {
139 NodeOffset mNodeOffset;
140 int32_t mSoftTextOffset;
141 int32_t mLength;
143 DOMTextMapping(NodeOffset aNodeOffset, int32_t aSoftTextOffset,
144 int32_t aLength)
145 : mNodeOffset(std::move(aNodeOffset)),
146 mSoftTextOffset(aSoftTextOffset),
147 mLength(aLength) {}
150 struct SoftText {
151 void AdjustBeginAndBuildText(NodeOffset aBegin, NodeOffset aEnd,
152 const nsINode* aRootNode);
154 void Invalidate() { mIsValid = false; }
156 const NodeOffset& GetBegin() const { return mBegin; }
157 const NodeOffset& GetEnd() const { return mEnd; }
159 const nsTArray<DOMTextMapping>& GetDOMMapping() const {
160 return mDOMMapping;
163 const nsString& GetValue() const { return mValue; }
165 bool mIsValid = false;
167 private:
168 NodeOffset mBegin = NodeOffset(nullptr, 0);
169 NodeOffset mEnd = NodeOffset(nullptr, 0);
171 nsTArray<DOMTextMapping> mDOMMapping;
173 // DOM text covering the soft range, with newlines added at block boundaries
174 nsString mValue;
177 SoftText mSoftText;
179 mozInlineSpellWordUtil(mozilla::dom::Document& aDocument,
180 bool aIsContentEditableOrDesignMode, nsINode& aRootNode
183 : mDocument(&aDocument),
184 mIsContentEditableOrDesignMode(aIsContentEditableOrDesignMode),
185 mRootNode(&aRootNode),
186 mNextWordIndex(-1) {}
188 // cached stuff for the editor
189 const RefPtr<mozilla::dom::Document> mDocument;
190 const bool mIsContentEditableOrDesignMode;
192 // range to check, see SetPosition and SetEnd
193 const nsINode* mRootNode;
195 // A list of the "real words" in mSoftText.mValue, ordered by mSoftTextOffset
196 struct RealWord {
197 int32_t mSoftTextOffset;
198 uint32_t mLength : 31;
199 uint32_t mCheckableWord : 1;
201 RealWord(int32_t aOffset, uint32_t aLength, bool aCheckable)
202 : mSoftTextOffset(aOffset),
203 mLength(aLength),
204 mCheckableWord(aCheckable) {
205 static_assert(sizeof(RealWord) == 8,
206 "RealWord should be limited to 8 bytes");
207 MOZ_ASSERT(aLength < INT32_MAX,
208 "Word length is too large to fit in the bitfield");
211 int32_t EndOffset() const { return mSoftTextOffset + mLength; }
213 using RealWords = nsTArray<RealWord>;
214 RealWords mRealWords;
215 int32_t mNextWordIndex;
217 nsresult EnsureWords(NodeOffset aSoftBegin, NodeOffset aSoftEnd);
219 int32_t MapDOMPositionToSoftTextOffset(const NodeOffset& aNodeOffset) const;
220 // Map an offset into mSoftText.mValue to a DOM position. Note that two DOM
221 // positions can map to the same mSoftText.mValue offset, e.g. given nodes
222 // A=aaaa and B=bbbb forming aaaabbbb, (A,4) and (B,0) give the same string
223 // offset. So, aHintBefore controls which position we return ... if aHint is
224 // eEnd then the position indicates the END of a range so we return (A,4).
225 // Otherwise the position indicates the START of a range so we return (B,0).
226 enum DOMMapHint { HINT_BEGIN, HINT_END };
227 NodeOffset MapSoftTextOffsetToDOMPosition(int32_t aSoftTextOffset,
228 DOMMapHint aHint) const;
230 static void ToString(DOMMapHint aHint, nsACString& aResult);
232 // Finds the index of the real word containing aSoftTextOffset, or -1 if none.
234 // If it's exactly between two words, then if aHint is HINT_BEGIN, return the
235 // later word (favouring the assumption that it's the BEGINning of a word),
236 // otherwise return the earlier word (assuming it's the END of a word).
237 // If aSearchForward is true, then if we don't find a word at the given
238 // position, search forward until we do find a word and return that (if
239 // found).
240 int32_t FindRealWordContaining(int32_t aSoftTextOffset, DOMMapHint aHint,
241 bool aSearchForward) const;
243 mozilla::Result<RealWords, nsresult> BuildRealWords() const;
245 nsresult SplitDOMWordAndAppendTo(int32_t aStart, int32_t aEnd,
246 nsTArray<RealWord>& aRealWords) const;
248 nsresult MakeRangeForWord(const RealWord& aWord, nsRange** aRange) const;
249 void MakeNodeOffsetRangeForWord(const RealWord& aWord,
250 NodeOffsetRange* aNodeOffsetRange);
253 #endif