1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #ifndef mozInlineSpellWordUtil_h
7 #define mozInlineSpellWordUtil_h
9 #include "mozilla/Attributes.h"
10 #include "mozilla/Maybe.h"
11 #include "mozilla/Result.h"
12 #include "mozilla/dom/Document.h"
17 //#define DEBUG_SPELLCHECK
28 } // namespace mozilla
31 nsCOMPtr
<nsINode
> mNode
;
34 NodeOffset() : mOffset(0) {}
35 NodeOffset(nsINode
* aNode
, int32_t aOffset
)
36 : mNode(aNode
), mOffset(aOffset
) {}
38 bool operator==(const NodeOffset
& aOther
) const {
39 return mNode
== aOther
.mNode
&& mOffset
== aOther
.mOffset
;
42 bool operator!=(const NodeOffset
& aOther
) const { return !(*this == aOther
); }
44 nsINode
* Node() const { return mNode
.get(); }
45 int32_t Offset() const { return mOffset
; }
48 class NodeOffsetRange
{
55 NodeOffsetRange(NodeOffset b
, NodeOffset e
) : mBegin(b
), mEnd(e
) {}
57 NodeOffset
Begin() const { return mBegin
; }
59 NodeOffset
End() const { return mEnd
; }
63 * This class extracts text from the DOM and builds it into a single string.
64 * The string includes whitespace breaks whereever non-inline elements begin
65 * and end. This string is broken into "real words", following somewhat
66 * complex rules; for example substrings that look like URLs or
67 * email addresses are treated as single words, but otherwise many kinds of
68 * punctuation are treated as word separators. GetNextWord provides a way
69 * to iterate over these "real words".
71 * The basic operation is:
73 * 1. Call Init with the editor that you're using.
74 * 2. Call SetPositionAndEnd to to initialize the current position inside the
75 * previously given range and set where you want to stop spellchecking.
76 * We'll stop at the word boundary after that. If SetEnd is not called,
77 * we'll stop at the end of the root element.
78 * 3. Call GetNextWord over and over until it returns false.
81 class MOZ_STACK_CLASS mozInlineSpellWordUtil
{
83 static mozilla::Maybe
<mozInlineSpellWordUtil
> Create(
84 const mozilla::TextEditor
& aTextEditor
);
86 // sets the current position, this should be inside the range. If we are in
87 // the middle of a word, we'll move to its start.
88 nsresult
SetPositionAndEnd(nsINode
* aPositionNode
, int32_t aPositionOffset
,
89 nsINode
* aEndNode
, int32_t aEndOffset
);
91 // Given a point inside or immediately following a word, this returns the
92 // DOM range that exactly encloses that word's characters. The current
93 // position will be at the end of the word. This will find the previous
94 // word if the current position is space, so if you care that the point is
95 // inside the word, you should check the range.
97 // THIS CHANGES THE CURRENT POSITION AND RANGE. It is designed to be called
98 // before you actually generate the range you are interested in and iterate
100 nsresult
GetRangeForWord(nsINode
* aWordNode
, int32_t aWordOffset
,
103 // Convenience functions, object must be initialized
104 nsresult
MakeRange(NodeOffset aBegin
, NodeOffset aEnd
,
105 nsRange
** aRange
) const;
106 static already_AddRefed
<nsRange
> MakeRange(const NodeOffsetRange
& aRange
);
108 // Moves to the the next word in the range, and retrieves it's text and range.
109 // false is returned when we are done checking.
110 // aSkipChecking will be set if the word is "special" and shouldn't be
111 // checked (e.g., an email address).
112 bool GetNextWord(nsAString
& aText
, NodeOffsetRange
* aNodeOffsetRange
,
113 bool* aSkipChecking
);
115 // Call to normalize some punctuation. This function takes an autostring
116 // so we can access characters directly.
117 static void NormalizeWord(nsAString
& aWord
);
119 mozilla::dom::Document
* GetDocument() const { return mDocument
; }
120 const nsINode
* GetRootNode() const { return mRootNode
; }
124 // DOM text covering the soft range, with newlines added at block boundaries
127 NodeOffset mBegin
= NodeOffset(nullptr, 0);
132 mozInlineSpellWordUtil(mozilla::dom::Document
& aDocument
,
133 bool aIsContentEditableOrDesignMode
, nsINode
& aRootNode
136 : mDocument(&aDocument
),
137 mIsContentEditableOrDesignMode(aIsContentEditableOrDesignMode
),
138 mRootNode(&aRootNode
),
139 mSoftEnd(nullptr, 0),
141 mSoftTextValid(false) {}
143 // cached stuff for the editor
144 const RefPtr
<mozilla::dom::Document
> mDocument
;
145 const bool mIsContentEditableOrDesignMode
;
147 // range to check, see SetPosition and SetEnd
148 const nsINode
* mRootNode
;
151 // A list of where we extracted text from, ordered by mSoftTextOffset. A given
152 // DOM node appears at most once in this list.
153 struct DOMTextMapping
{
154 NodeOffset mNodeOffset
;
155 int32_t mSoftTextOffset
;
158 DOMTextMapping(NodeOffset aNodeOffset
, int32_t aSoftTextOffset
,
160 : mNodeOffset(aNodeOffset
),
161 mSoftTextOffset(aSoftTextOffset
),
164 nsTArray
<DOMTextMapping
> mSoftTextDOMMapping
;
166 // A list of the "real words" in mSoftText.mValue, ordered by mSoftTextOffset
168 int32_t mSoftTextOffset
;
169 uint32_t mLength
: 31;
170 uint32_t mCheckableWord
: 1;
172 RealWord(int32_t aOffset
, uint32_t aLength
, bool aCheckable
)
173 : mSoftTextOffset(aOffset
),
175 mCheckableWord(aCheckable
) {
176 static_assert(sizeof(RealWord
) == 8,
177 "RealWord should be limited to 8 bytes");
178 MOZ_ASSERT(aLength
< INT32_MAX
,
179 "Word length is too large to fit in the bitfield");
182 int32_t EndOffset() const { return mSoftTextOffset
+ mLength
; }
184 using RealWords
= nsTArray
<RealWord
>;
185 RealWords mRealWords
;
186 int32_t mNextWordIndex
;
190 void InvalidateWords() { mSoftTextValid
= false; }
191 nsresult
EnsureWords();
193 int32_t MapDOMPositionToSoftTextOffset(NodeOffset aNodeOffset
) const;
194 // Map an offset into mSoftText.mValue to a DOM position. Note that two DOM
195 // positions can map to the same mSoftText.mValue offset, e.g. given nodes
196 // A=aaaa and B=bbbb forming aaaabbbb, (A,4) and (B,0) give the same string
197 // offset. So, aHintBefore controls which position we return ... if aHint is
198 // eEnd then the position indicates the END of a range so we return (A,4).
199 // Otherwise the position indicates the START of a range so we return (B,0).
200 enum DOMMapHint
{ HINT_BEGIN
, HINT_END
};
201 NodeOffset
MapSoftTextOffsetToDOMPosition(int32_t aSoftTextOffset
,
202 DOMMapHint aHint
) const;
204 static void ToString(DOMMapHint aHint
, nsACString
& aResult
);
206 // Finds the index of the real word containing aSoftTextOffset, or -1 if none
207 // If it's exactly between two words, then if aHint is HINT_BEGIN, return the
208 // later word (favouring the assumption that it's the BEGINning of a word),
209 // otherwise return the earlier word (assuming it's the END of a word).
210 // If aSearchForward is true, then if we don't find a word at the given
211 // position, search forward until we do find a word and return that (if
213 int32_t FindRealWordContaining(int32_t aSoftTextOffset
, DOMMapHint aHint
,
214 bool aSearchForward
) const;
216 // build mSoftText.mValue and mSoftTextDOMMapping and adjust mSoftText.mBegin.
217 void AdjustSoftBeginAndBuildSoftText();
219 mozilla::Result
<RealWords
, nsresult
> BuildRealWords() const;
221 nsresult
SplitDOMWordAndAppendTo(int32_t aStart
, int32_t aEnd
,
222 nsTArray
<RealWord
>& aRealWords
) const;
224 nsresult
MakeRangeForWord(const RealWord
& aWord
, nsRange
** aRange
) const;
225 void MakeNodeOffsetRangeForWord(const RealWord
& aWord
,
226 NodeOffsetRange
* aNodeOffsetRange
);