1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #ifndef mozInlineSpellWordUtil_h
7 #define mozInlineSpellWordUtil_h
9 #include "mozilla/Attributes.h"
10 #include "mozilla/Maybe.h"
11 #include "mozilla/Result.h"
12 #include "mozilla/dom/Document.h"
17 //#define DEBUG_SPELLCHECK
28 } // namespace mozilla
31 nsCOMPtr
<nsINode
> mNode
;
34 NodeOffset() : mOffset(0) {}
35 NodeOffset(nsINode
* aNode
, int32_t aOffset
)
36 : mNode(aNode
), mOffset(aOffset
) {}
38 bool operator==(const NodeOffset
& aOther
) const {
39 return mNode
== aOther
.mNode
&& mOffset
== aOther
.mOffset
;
42 bool operator!=(const NodeOffset
& aOther
) const { return !(*this == aOther
); }
44 nsINode
* Node() const { return mNode
.get(); }
45 int32_t Offset() const { return mOffset
; }
48 class NodeOffsetRange
{
55 NodeOffsetRange(NodeOffset b
, NodeOffset e
) : mBegin(b
), mEnd(e
) {}
57 NodeOffset
Begin() const { return mBegin
; }
59 NodeOffset
End() const { return mEnd
; }
63 * This class extracts text from the DOM and builds it into a single string.
64 * The string includes whitespace breaks whereever non-inline elements begin
65 * and end. This string is broken into "real words", following somewhat
66 * complex rules; for example substrings that look like URLs or
67 * email addresses are treated as single words, but otherwise many kinds of
68 * punctuation are treated as word separators. GetNextWord provides a way
69 * to iterate over these "real words".
71 * The basic operation is:
73 * 1. Call Init with the editor that you're using.
74 * 2. Call SetPositionAndEnd to to initialize the current position inside the
75 * previously given range and set where you want to stop spellchecking.
76 * We'll stop at the word boundary after that. If SetEnd is not called,
77 * we'll stop at the end of the root element.
78 * 3. Call GetNextWord over and over until it returns false.
81 class MOZ_STACK_CLASS mozInlineSpellWordUtil
{
83 static mozilla::Maybe
<mozInlineSpellWordUtil
> Create(
84 const mozilla::TextEditor
& aTextEditor
);
86 // sets the current position, this should be inside the range. If we are in
87 // the middle of a word, we'll move to its start.
88 nsresult
SetPositionAndEnd(nsINode
* aPositionNode
, int32_t aPositionOffset
,
89 nsINode
* aEndNode
, int32_t aEndOffset
);
91 // Given a point inside or immediately following a word, this returns the
92 // DOM range that exactly encloses that word's characters. The current
93 // position will be at the end of the word. This will find the previous
94 // word if the current position is space, so if you care that the point is
95 // inside the word, you should check the range.
97 // THIS CHANGES THE CURRENT POSITION AND RANGE. It is designed to be called
98 // before you actually generate the range you are interested in and iterate
100 nsresult
GetRangeForWord(nsINode
* aWordNode
, int32_t aWordOffset
,
103 // Convenience functions, object must be initialized
104 nsresult
MakeRange(NodeOffset aBegin
, NodeOffset aEnd
,
105 nsRange
** aRange
) const;
106 static already_AddRefed
<nsRange
> MakeRange(const NodeOffsetRange
& aRange
);
108 // Moves to the the next word in the range, and retrieves it's text and range.
109 // false is returned when we are done checking.
110 // aSkipChecking will be set if the word is "special" and shouldn't be
111 // checked (e.g., an email address).
112 bool GetNextWord(nsAString
& aText
, NodeOffsetRange
* aNodeOffsetRange
,
113 bool* aSkipChecking
);
115 // Call to normalize some punctuation. This function takes an autostring
116 // so we can access characters directly.
117 static void NormalizeWord(nsAString
& aWord
);
119 mozilla::dom::Document
* GetDocument() const { return mDocument
; }
120 const nsINode
* GetRootNode() const { return mRootNode
; }
123 mozInlineSpellWordUtil(mozilla::dom::Document
& aDocument
,
124 bool aIsContentEditableOrDesignMode
, nsINode
& aRootNode
127 : mDocument(&aDocument
),
128 mIsContentEditableOrDesignMode(aIsContentEditableOrDesignMode
),
129 mRootNode(&aRootNode
),
130 mSoftBegin(nullptr, 0),
131 mSoftEnd(nullptr, 0),
133 mSoftTextValid(false) {}
135 // cached stuff for the editor
136 const RefPtr
<mozilla::dom::Document
> mDocument
;
137 const bool mIsContentEditableOrDesignMode
;
139 // range to check, see SetPosition and SetEnd
140 const nsINode
* mRootNode
;
141 NodeOffset mSoftBegin
;
144 // DOM text covering the soft range, with newlines added at block boundaries
146 // A list of where we extracted text from, ordered by mSoftTextOffset. A given
147 // DOM node appears at most once in this list.
148 struct DOMTextMapping
{
149 NodeOffset mNodeOffset
;
150 int32_t mSoftTextOffset
;
153 DOMTextMapping(NodeOffset aNodeOffset
, int32_t aSoftTextOffset
,
155 : mNodeOffset(aNodeOffset
),
156 mSoftTextOffset(aSoftTextOffset
),
159 nsTArray
<DOMTextMapping
> mSoftTextDOMMapping
;
161 // A list of the "real words" in mSoftText, ordered by mSoftTextOffset
163 int32_t mSoftTextOffset
;
164 uint32_t mLength
: 31;
165 uint32_t mCheckableWord
: 1;
167 RealWord(int32_t aOffset
, uint32_t aLength
, bool aCheckable
)
168 : mSoftTextOffset(aOffset
),
170 mCheckableWord(aCheckable
) {
171 static_assert(sizeof(RealWord
) == 8,
172 "RealWord should be limited to 8 bytes");
173 MOZ_ASSERT(aLength
< INT32_MAX
,
174 "Word length is too large to fit in the bitfield");
177 int32_t EndOffset() const { return mSoftTextOffset
+ mLength
; }
179 using RealWords
= nsTArray
<RealWord
>;
180 RealWords mRealWords
;
181 int32_t mNextWordIndex
;
185 void InvalidateWords() { mSoftTextValid
= false; }
186 nsresult
EnsureWords();
188 int32_t MapDOMPositionToSoftTextOffset(NodeOffset aNodeOffset
) const;
189 // Map an offset into mSoftText to a DOM position. Note that two DOM positions
190 // can map to the same mSoftText offset, e.g. given nodes A=aaaa and B=bbbb
191 // forming aaaabbbb, (A,4) and (B,0) give the same string offset. So,
192 // aHintBefore controls which position we return ... if aHint is eEnd
193 // then the position indicates the END of a range so we return (A,4).
194 // Otherwise the position indicates the START of a range so we return (B,0).
195 enum DOMMapHint
{ HINT_BEGIN
, HINT_END
};
196 NodeOffset
MapSoftTextOffsetToDOMPosition(int32_t aSoftTextOffset
,
197 DOMMapHint aHint
) const;
199 static void ToString(DOMMapHint aHint
, nsACString
& aResult
);
201 // Finds the index of the real word containing aSoftTextOffset, or -1 if none
202 // If it's exactly between two words, then if aHint is HINT_BEGIN, return the
203 // later word (favouring the assumption that it's the BEGINning of a word),
204 // otherwise return the earlier word (assuming it's the END of a word).
205 // If aSearchForward is true, then if we don't find a word at the given
206 // position, search forward until we do find a word and return that (if
208 int32_t FindRealWordContaining(int32_t aSoftTextOffset
, DOMMapHint aHint
,
209 bool aSearchForward
) const;
211 // build mSoftText and mSoftTextDOMMapping
212 void BuildSoftText();
214 mozilla::Result
<RealWords
, nsresult
> BuildRealWords() const;
216 nsresult
SplitDOMWordAndAppendTo(int32_t aStart
, int32_t aEnd
,
217 nsTArray
<RealWord
>& aRealWords
) const;
219 nsresult
MakeRangeForWord(const RealWord
& aWord
, nsRange
** aRange
) const;
220 void MakeNodeOffsetRangeForWord(const RealWord
& aWord
,
221 NodeOffsetRange
* aNodeOffsetRange
);