1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #ifndef mozInlineSpellWordUtil_h
7 #define mozInlineSpellWordUtil_h
11 #include "mozilla/Attributes.h"
12 #include "mozilla/Maybe.h"
13 #include "mozilla/RangeBoundary.h"
14 #include "mozilla/Result.h"
15 #include "mozilla/dom/Document.h"
20 // #define DEBUG_SPELLCHECK
31 } // namespace mozilla
34 nsCOMPtr
<nsINode
> mNode
;
37 NodeOffset() : mOffset(0) {}
38 NodeOffset(nsINode
* aNode
, int32_t aOffset
)
39 : mNode(aNode
), mOffset(aOffset
) {}
41 bool operator==(const NodeOffset
& aOther
) const {
42 return mNode
== aOther
.mNode
&& mOffset
== aOther
.mOffset
;
45 bool operator==(const mozilla::RangeBoundary
& aRangeBoundary
) const;
47 bool operator!=(const NodeOffset
& aOther
) const { return !(*this == aOther
); }
49 nsINode
* Node() const { return mNode
.get(); }
50 int32_t Offset() const { return mOffset
; }
53 class NodeOffsetRange
{
60 NodeOffsetRange(NodeOffset b
, NodeOffset e
)
61 : mBegin(std::move(b
)), mEnd(std::move(e
)) {}
63 bool operator==(const nsRange
& aRange
) const;
65 const NodeOffset
& Begin() const { return mBegin
; }
67 const NodeOffset
& End() const { return mEnd
; }
71 * This class extracts text from the DOM and builds it into a single string.
72 * The string includes whitespace breaks whereever non-inline elements begin
73 * and end. This string is broken into "real words", following somewhat
74 * complex rules; for example substrings that look like URLs or
75 * email addresses are treated as single words, but otherwise many kinds of
76 * punctuation are treated as word separators. GetNextWord provides a way
77 * to iterate over these "real words".
79 * The basic operation is:
81 * 1. Call Init with the editor that you're using.
82 * 2. Call SetPositionAndEnd to to initialize the current position inside the
83 * previously given range and set where you want to stop spellchecking.
84 * We'll stop at the word boundary after that. If SetEnd is not called,
85 * we'll stop at the end of the root element.
86 * 3. Call GetNextWord over and over until it returns false.
89 class MOZ_STACK_CLASS mozInlineSpellWordUtil
{
91 static mozilla::Maybe
<mozInlineSpellWordUtil
> Create(
92 const mozilla::EditorBase
& aEditorBase
);
94 // sets the current position, this should be inside the range. If we are in
95 // the middle of a word, we'll move to its start.
96 nsresult
SetPositionAndEnd(nsINode
* aPositionNode
, int32_t aPositionOffset
,
97 nsINode
* aEndNode
, int32_t aEndOffset
);
99 // Given a point inside or immediately following a word, this returns the
100 // DOM range that exactly encloses that word's characters. The current
101 // position will be at the end of the word. This will find the previous
102 // word if the current position is space, so if you care that the point is
103 // inside the word, you should check the range.
105 // THIS CHANGES THE CURRENT POSITION AND RANGE. It is designed to be called
106 // before you actually generate the range you are interested in and iterate
108 nsresult
GetRangeForWord(nsINode
* aWordNode
, int32_t aWordOffset
,
111 // Convenience functions, object must be initialized
112 nsresult
MakeRange(NodeOffset aBegin
, NodeOffset aEnd
,
113 nsRange
** aRange
) const;
114 static already_AddRefed
<nsRange
> MakeRange(const NodeOffsetRange
& aRange
);
118 NodeOffsetRange mNodeOffsetRange
;
119 bool mSkipChecking
= false;
122 // Moves to the the next word in the range, and retrieves it's text and range.
123 // `false` is returned when we are done checking.
124 // mSkipChecking will be set if the word is "special" and shouldn't be
125 // checked (e.g., an email address).
126 bool GetNextWord(Word
& aWord
);
128 // Call to normalize some punctuation. This function takes an autostring
129 // so we can access characters directly.
130 static void NormalizeWord(nsAString
& aWord
);
132 mozilla::dom::Document
* GetDocument() const { return mDocument
; }
133 const nsINode
* GetRootNode() const { return mRootNode
; }
136 // A list of where we extracted text from, ordered by mSoftTextOffset. A given
137 // DOM node appears at most once in this list.
138 struct DOMTextMapping
{
139 NodeOffset mNodeOffset
;
140 int32_t mSoftTextOffset
;
143 DOMTextMapping(NodeOffset aNodeOffset
, int32_t aSoftTextOffset
,
145 : mNodeOffset(std::move(aNodeOffset
)),
146 mSoftTextOffset(aSoftTextOffset
),
151 void AdjustBeginAndBuildText(NodeOffset aBegin
, NodeOffset aEnd
,
152 const nsINode
* aRootNode
);
154 void Invalidate() { mIsValid
= false; }
156 const NodeOffset
& GetBegin() const { return mBegin
; }
157 const NodeOffset
& GetEnd() const { return mEnd
; }
159 const nsTArray
<DOMTextMapping
>& GetDOMMapping() const {
163 const nsString
& GetValue() const { return mValue
; }
165 bool mIsValid
= false;
168 NodeOffset mBegin
= NodeOffset(nullptr, 0);
169 NodeOffset mEnd
= NodeOffset(nullptr, 0);
171 nsTArray
<DOMTextMapping
> mDOMMapping
;
173 // DOM text covering the soft range, with newlines added at block boundaries
179 mozInlineSpellWordUtil(mozilla::dom::Document
& aDocument
,
180 bool aIsContentEditableOrDesignMode
, nsINode
& aRootNode
183 : mDocument(&aDocument
),
184 mIsContentEditableOrDesignMode(aIsContentEditableOrDesignMode
),
185 mRootNode(&aRootNode
),
186 mNextWordIndex(-1) {}
188 // cached stuff for the editor
189 const RefPtr
<mozilla::dom::Document
> mDocument
;
190 const bool mIsContentEditableOrDesignMode
;
192 // range to check, see SetPosition and SetEnd
193 const nsINode
* mRootNode
;
195 // A list of the "real words" in mSoftText.mValue, ordered by mSoftTextOffset
197 int32_t mSoftTextOffset
;
198 uint32_t mLength
: 31;
199 uint32_t mCheckableWord
: 1;
201 RealWord(int32_t aOffset
, uint32_t aLength
, bool aCheckable
)
202 : mSoftTextOffset(aOffset
),
204 mCheckableWord(aCheckable
) {
205 static_assert(sizeof(RealWord
) == 8,
206 "RealWord should be limited to 8 bytes");
207 MOZ_ASSERT(aLength
< INT32_MAX
,
208 "Word length is too large to fit in the bitfield");
211 int32_t EndOffset() const { return mSoftTextOffset
+ mLength
; }
213 using RealWords
= nsTArray
<RealWord
>;
214 RealWords mRealWords
;
215 int32_t mNextWordIndex
;
217 nsresult
EnsureWords(NodeOffset aSoftBegin
, NodeOffset aSoftEnd
);
219 int32_t MapDOMPositionToSoftTextOffset(const NodeOffset
& aNodeOffset
) const;
220 // Map an offset into mSoftText.mValue to a DOM position. Note that two DOM
221 // positions can map to the same mSoftText.mValue offset, e.g. given nodes
222 // A=aaaa and B=bbbb forming aaaabbbb, (A,4) and (B,0) give the same string
223 // offset. So, aHintBefore controls which position we return ... if aHint is
224 // eEnd then the position indicates the END of a range so we return (A,4).
225 // Otherwise the position indicates the START of a range so we return (B,0).
226 enum DOMMapHint
{ HINT_BEGIN
, HINT_END
};
227 NodeOffset
MapSoftTextOffsetToDOMPosition(int32_t aSoftTextOffset
,
228 DOMMapHint aHint
) const;
230 static void ToString(DOMMapHint aHint
, nsACString
& aResult
);
232 // Finds the index of the real word containing aSoftTextOffset, or -1 if none.
234 // If it's exactly between two words, then if aHint is HINT_BEGIN, return the
235 // later word (favouring the assumption that it's the BEGINning of a word),
236 // otherwise return the earlier word (assuming it's the END of a word).
237 // If aSearchForward is true, then if we don't find a word at the given
238 // position, search forward until we do find a word and return that (if
240 int32_t FindRealWordContaining(int32_t aSoftTextOffset
, DOMMapHint aHint
,
241 bool aSearchForward
) const;
243 mozilla::Result
<RealWords
, nsresult
> BuildRealWords() const;
245 nsresult
SplitDOMWordAndAppendTo(int32_t aStart
, int32_t aEnd
,
246 nsTArray
<RealWord
>& aRealWords
) const;
248 nsresult
MakeRangeForWord(const RealWord
& aWord
, nsRange
** aRange
) const;
249 void MakeNodeOffsetRangeForWord(const RealWord
& aWord
,
250 NodeOffsetRange
* aNodeOffsetRange
);