extensions/spellcheck/src/mozInlineSpellWordUtil.cpp

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* This Source Code Form is subject to the terms of the Mozilla Public
   3  * License, v. 2.0. If a copy of the MPL was not distributed with this
   4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   5
   6 #include "mozInlineSpellWordUtil.h"
   7
   8 #include <algorithm>
   9 #include <utility>
  10
  11 #include "mozilla/BinarySearch.h"
  12 #include "mozilla/EditorBase.h"
  13 #include "mozilla/HTMLEditor.h"
  14 #include "mozilla/Logging.h"
  15 #include "mozilla/dom/Element.h"
  16
  17 #include "nsDebug.h"
  18 #include "nsAtom.h"
  19 #include "nsComponentManagerUtils.h"
  20 #include "nsUnicodeProperties.h"
  21 #include "nsServiceManagerUtils.h"
  22 #include "nsIContent.h"
  23 #include "nsTextFragment.h"
  24 #include "nsRange.h"
  25 #include "nsContentUtils.h"
  26 #include "nsIFrame.h"
  27
  28 using namespace mozilla;
  29
  30 static LazyLogModule sInlineSpellWordUtilLog{"InlineSpellWordUtil"};
  31
  32 // IsIgnorableCharacter
  33 //
  34 //    These characters are ones that we should ignore in input.
  35
  36 inline bool IsIgnorableCharacter(char ch) {
  37   return (ch == static_cast<char>(0xAD));  // SOFT HYPHEN
  38 }
  39
  40 inline bool IsIgnorableCharacter(char16_t ch) {
  41   return (ch == 0xAD ||   // SOFT HYPHEN
  42           ch == 0x1806);  // MONGOLIAN TODO SOFT HYPHEN
  43 }
  44
  45 // IsConditionalPunctuation
  46 //
  47 //    Some characters (like apostrophes) require characters on each side to be
  48 //    part of a word, and are otherwise punctuation.
  49
  50 inline bool IsConditionalPunctuation(char ch) {
  51   return (ch == '\'' ||                    // RIGHT SINGLE QUOTATION MARK
  52           ch == static_cast<char>(0xB7));  // MIDDLE DOT
  53 }
  54
  55 inline bool IsConditionalPunctuation(char16_t ch) {
  56   return (ch == '\'' || ch == 0x2019 ||  // RIGHT SINGLE QUOTATION MARK
  57           ch == 0x00B7);                 // MIDDLE DOT
  58 }
  59
  60 static bool IsAmbiguousDOMWordSeprator(char16_t ch) {
  61   // This class may be CHAR_CLASS_SEPARATOR, but it depends on context.
  62   return (ch == '@' || ch == ':' || ch == '.' || ch == '/' || ch == '-' ||
  63           IsConditionalPunctuation(ch));
  64 }
  65
  66 static bool IsAmbiguousDOMWordSeprator(char ch) {
  67   // This class may be CHAR_CLASS_SEPARATOR, but it depends on context.
  68   return IsAmbiguousDOMWordSeprator(static_cast<char16_t>(ch));
  69 }
  70
  71 // IsDOMWordSeparator
  72 //
  73 //    Determines if the given character should be considered as a DOM Word
  74 //    separator. Basically, this is whitespace, although it could also have
  75 //    certain punctuation that we know ALWAYS breaks words. This is important.
  76 //    For example, we can't have any punctuation that could appear in a URL
  77 //    or email address in this, because those need to always fit into a single
  78 //    DOM word.
  79
  80 static bool IsDOMWordSeparator(char ch) {
  81   // simple spaces or no-break space
  82   return (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' ||
  83           ch == static_cast<char>(0xA0));
  84 }
  85
  86 static bool IsDOMWordSeparator(char16_t ch) {
  87   // simple spaces
  88   if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') return true;
  89
  90   // complex spaces - check only if char isn't ASCII (uncommon)
  91   if (ch >= 0xA0 && (ch == 0x00A0 ||  // NO-BREAK SPACE
  92                      ch == 0x2002 ||  // EN SPACE
  93                      ch == 0x2003 ||  // EM SPACE
  94                      ch == 0x2009 ||  // THIN SPACE
  95                      ch == 0x3000))   // IDEOGRAPHIC SPACE
  96     return true;
  97
  98   // otherwise not a space
  99   return false;
 100 }
 101
 102 bool NodeOffset::operator==(
 103     const mozilla::RangeBoundary& aRangeBoundary) const {
 104   if (aRangeBoundary.Container() != mNode) {
 105     return false;
 106   }
 107
 108   const Maybe<uint32_t> rangeBoundaryOffset =
 109       aRangeBoundary.Offset(RangeBoundary::OffsetFilter::kValidOffsets);
 110
 111   MOZ_ASSERT(mOffset >= 0);
 112   return rangeBoundaryOffset &&
 113          (*rangeBoundaryOffset == static_cast<uint32_t>(mOffset));
 114 }
 115
 116 bool NodeOffsetRange::operator==(const nsRange& aRange) const {
 117   return mBegin == aRange.StartRef() && mEnd == aRange.EndRef();
 118 }
 119
 120 // static
 121 Maybe<mozInlineSpellWordUtil> mozInlineSpellWordUtil::Create(
 122     const EditorBase& aEditorBase) {
 123   dom::Document* document = aEditorBase.GetDocument();
 124   if (NS_WARN_IF(!document)) {
 125     return Nothing();
 126   }
 127
 128   const bool isContentEditableOrDesignMode = aEditorBase.IsHTMLEditor();
 129
 130   // Find the root node for the editor. For contenteditable the mRootNode could
 131   // change to shadow root if the begin and end are inside the shadowDOM.
 132   nsINode* rootNode = aEditorBase.GetRoot();
 133   if (NS_WARN_IF(!rootNode)) {
 134     return Nothing();
 135   }
 136
 137   mozInlineSpellWordUtil util{*document, isContentEditableOrDesignMode,
 138                               *rootNode};
 139   return Some(std::move(util));
 140 }
 141
 142 static inline bool IsSpellCheckingTextNode(nsINode* aNode) {
 143   nsIContent* parent = aNode->GetParent();
 144   if (parent &&
 145       parent->IsAnyOfHTMLElements(nsGkAtoms::script, nsGkAtoms::style))
 146     return false;
 147   return aNode->IsText();
 148 }
 149
 150 typedef void (*OnLeaveNodeFunPtr)(nsINode* aNode, void* aClosure);
 151
 152 // Find the next node in the DOM tree in preorder.
 153 // Calls OnLeaveNodeFunPtr when the traversal leaves a node, which is
 154 // why we can't just use GetNextNode here, sadly.
 155 static nsINode* FindNextNode(nsINode* aNode, const nsINode* aRoot,
 156                              OnLeaveNodeFunPtr aOnLeaveNode, void* aClosure) {
 157   MOZ_ASSERT(aNode, "Null starting node?");
 158
 159   nsINode* next = aNode->GetFirstChild();
 160   if (next) return next;
 161
 162   // Don't look at siblings or otherwise outside of aRoot
 163   if (aNode == aRoot) return nullptr;
 164
 165   next = aNode->GetNextSibling();
 166   if (next) return next;
 167
 168   // Go up
 169   for (;;) {
 170     if (aOnLeaveNode) {
 171       aOnLeaveNode(aNode, aClosure);
 172     }
 173
 174     next = aNode->GetParent();
 175     if (next == aRoot || !next) return nullptr;
 176     aNode = next;
 177
 178     next = aNode->GetNextSibling();
 179     if (next) return next;
 180   }
 181 }
 182
 183 // aNode is not a text node. Find the first text node starting at aNode/aOffset
 184 // in a preorder DOM traversal.
 185 static nsINode* FindNextTextNode(nsINode* aNode, int32_t aOffset,
 186                                  const nsINode* aRoot) {
 187   MOZ_ASSERT(aNode, "Null starting node?");
 188   MOZ_ASSERT(!IsSpellCheckingTextNode(aNode),
 189              "FindNextTextNode should start with a non-text node");
 190
 191   nsINode* checkNode;
 192   // Need to start at the aOffset'th child
 193   nsIContent* child = aNode->GetChildAt_Deprecated(aOffset);
 194
 195   if (child) {
 196     checkNode = child;
 197   } else {
 198     // aOffset was beyond the end of the child list.
 199     // goto next node after the last descendant of aNode in
 200     // a preorder DOM traversal.
 201     checkNode = aNode->GetNextNonChildNode(aRoot);
 202   }
 203
 204   while (checkNode && !IsSpellCheckingTextNode(checkNode)) {
 205     checkNode = checkNode->GetNextNode(aRoot);
 206   }
 207   return checkNode;
 208 }
 209
 210 // mozInlineSpellWordUtil::SetPositionAndEnd
 211 //
 212 //    We have two ranges "hard" and "soft". The hard boundary is simply
 213 //    the scope of the root node. The soft boundary is that which is set
 214 //    by the caller of this class by calling this function. If this function is
 215 //    not called, the soft boundary is the same as the hard boundary.
 216 //
 217 //    When we reach the soft boundary (mSoftText.GetEnd()), we keep
 218 //    going until we reach the end of a word. This allows the caller to set the
 219 //    end of the range to anything, and we will always check whole multiples of
 220 //    words. When we reach the hard boundary we stop no matter what.
 221 //
 222 //    There is no beginning soft boundary. This is because we only go to the
 223 //    previous node once, when finding the previous word boundary in
 224 //    SetPosition(). You might think of the soft boundary as being this initial
 225 //    position.
 226
 227 nsresult mozInlineSpellWordUtil::SetPositionAndEnd(nsINode* aPositionNode,
 228                                                    int32_t aPositionOffset,
 229                                                    nsINode* aEndNode,
 230                                                    int32_t aEndOffset) {
 231   MOZ_LOG(sInlineSpellWordUtilLog, LogLevel::Debug,
 232           ("%s: pos=(%p, %i), end=(%p, %i)", __FUNCTION__, aPositionNode,
 233            aPositionOffset, aEndNode, aEndOffset));
 234
 235   MOZ_ASSERT(aPositionNode, "Null begin node?");
 236   MOZ_ASSERT(aEndNode, "Null end node?");
 237
 238   MOZ_ASSERT(mRootNode, "Not initialized");
 239
 240   // Find a appropriate root if we are dealing with contenteditable nodes which
 241   // are in the shadow DOM.
 242   if (mIsContentEditableOrDesignMode) {
 243     nsINode* rootNode = aPositionNode->SubtreeRoot();
 244     if (rootNode != aEndNode->SubtreeRoot()) {
 245       return NS_ERROR_FAILURE;
 246     }
 247
 248     if (mozilla::dom::ShadowRoot::FromNode(rootNode)) {
 249       mRootNode = rootNode;
 250     }
 251   }
 252
 253   mSoftText.Invalidate();
 254
 255   if (!IsSpellCheckingTextNode(aPositionNode)) {
 256     // Start at the start of the first text node after aNode/aOffset.
 257     aPositionNode = FindNextTextNode(aPositionNode, aPositionOffset, mRootNode);
 258     aPositionOffset = 0;
 259   }
 260   NodeOffset softBegin = NodeOffset(aPositionNode, aPositionOffset);
 261
 262   if (!IsSpellCheckingTextNode(aEndNode)) {
 263     // End at the start of the first text node after aEndNode/aEndOffset.
 264     aEndNode = FindNextTextNode(aEndNode, aEndOffset, mRootNode);
 265     aEndOffset = 0;
 266   }
 267   NodeOffset softEnd = NodeOffset(aEndNode, aEndOffset);
 268
 269   nsresult rv = EnsureWords(std::move(softBegin), std::move(softEnd));
 270   if (NS_FAILED(rv)) {
 271     return rv;
 272   }
 273
 274   int32_t textOffset = MapDOMPositionToSoftTextOffset(mSoftText.GetBegin());
 275   if (textOffset < 0) {
 276     return NS_OK;
 277   }
 278
 279   mNextWordIndex = FindRealWordContaining(textOffset, HINT_END, true);
 280   return NS_OK;
 281 }
 282
 283 nsresult mozInlineSpellWordUtil::EnsureWords(NodeOffset aSoftBegin,
 284                                              NodeOffset aSoftEnd) {
 285   if (mSoftText.mIsValid) return NS_OK;
 286   mSoftText.AdjustBeginAndBuildText(std::move(aSoftBegin), std::move(aSoftEnd),
 287                                     mRootNode);
 288
 289   mRealWords.Clear();
 290   Result<RealWords, nsresult> realWords = BuildRealWords();
 291   if (realWords.isErr()) {
 292     return realWords.unwrapErr();
 293   }
 294
 295   mRealWords = realWords.unwrap();
 296   mSoftText.mIsValid = true;
 297   return NS_OK;
 298 }
 299
 300 nsresult mozInlineSpellWordUtil::MakeRangeForWord(const RealWord& aWord,
 301                                                   nsRange** aRange) const {
 302   NodeOffset begin =
 303       MapSoftTextOffsetToDOMPosition(aWord.mSoftTextOffset, HINT_BEGIN);
 304   NodeOffset end = MapSoftTextOffsetToDOMPosition(aWord.EndOffset(), HINT_END);
 305   return MakeRange(begin, end, aRange);
 306 }
 307 void mozInlineSpellWordUtil::MakeNodeOffsetRangeForWord(
 308     const RealWord& aWord, NodeOffsetRange* aNodeOffsetRange) {
 309   NodeOffset begin =
 310       MapSoftTextOffsetToDOMPosition(aWord.mSoftTextOffset, HINT_BEGIN);
 311   NodeOffset end = MapSoftTextOffsetToDOMPosition(aWord.EndOffset(), HINT_END);
 312   *aNodeOffsetRange = NodeOffsetRange(begin, end);
 313 }
 314
 315 // mozInlineSpellWordUtil::GetRangeForWord
 316
 317 nsresult mozInlineSpellWordUtil::GetRangeForWord(nsINode* aWordNode,
 318                                                  int32_t aWordOffset,
 319                                                  nsRange** aRange) {
 320   // Set our soft end and start
 321   NodeOffset pt(aWordNode, aWordOffset);
 322
 323   if (!mSoftText.mIsValid || pt != mSoftText.GetBegin() ||
 324       pt != mSoftText.GetEnd()) {
 325     mSoftText.Invalidate();
 326     NodeOffset softBegin = pt;
 327     NodeOffset softEnd = pt;
 328     nsresult rv = EnsureWords(std::move(softBegin), std::move(softEnd));
 329     if (NS_FAILED(rv)) {
 330       return rv;
 331     }
 332   }
 333
 334   int32_t offset = MapDOMPositionToSoftTextOffset(pt);
 335   if (offset < 0) return MakeRange(pt, pt, aRange);
 336   int32_t wordIndex = FindRealWordContaining(offset, HINT_BEGIN, false);
 337   if (wordIndex < 0) return MakeRange(pt, pt, aRange);
 338   return MakeRangeForWord(mRealWords[wordIndex], aRange);
 339 }
 340
 341 // This is to fix characters that the spellchecker may not like
 342 static void NormalizeWord(const nsAString& aInput, int32_t aPos, int32_t aLen,
 343                           nsAString& aOutput) {
 344   aOutput.Truncate();
 345   for (int32_t i = 0; i < aLen; i++) {
 346     char16_t ch = aInput.CharAt(i + aPos);
 347
 348     // remove ignorable characters from the word
 349     if (IsIgnorableCharacter(ch)) continue;
 350
 351     // the spellchecker doesn't handle curly apostrophes in all languages
 352     if (ch == 0x2019) {  // RIGHT SINGLE QUOTATION MARK
 353       ch = '\'';
 354     }
 355
 356     aOutput.Append(ch);
 357   }
 358 }
 359
 360 // mozInlineSpellWordUtil::GetNextWord
 361 //
 362 //    FIXME-optimization: we shouldn't have to generate a range every single
 363 //    time. It would be better if the inline spellchecker didn't require a
 364 //    range unless the word was misspelled. This may or may not be possible.
 365
 366 bool mozInlineSpellWordUtil::GetNextWord(Word& aWord) {
 367   MOZ_LOG(sInlineSpellWordUtilLog, LogLevel::Debug,
 368           ("%s: mNextWordIndex=%d", __FUNCTION__, mNextWordIndex));
 369
 370   if (mNextWordIndex < 0 || mNextWordIndex >= int32_t(mRealWords.Length())) {
 371     mNextWordIndex = -1;
 372     aWord.mSkipChecking = true;
 373     return false;
 374   }
 375
 376   const RealWord& realWord = mRealWords[mNextWordIndex];
 377   MakeNodeOffsetRangeForWord(realWord, &aWord.mNodeOffsetRange);
 378   ++mNextWordIndex;
 379   aWord.mSkipChecking = !realWord.mCheckableWord;
 380   ::NormalizeWord(mSoftText.GetValue(), realWord.mSoftTextOffset,
 381                   realWord.mLength, aWord.mText);
 382
 383   MOZ_LOG(sInlineSpellWordUtilLog, LogLevel::Debug,
 384           ("%s: returning: %s (skip=%d)", __FUNCTION__,
 385            NS_ConvertUTF16toUTF8(aWord.mText).get(), aWord.mSkipChecking));
 386
 387   return true;
 388 }
 389
 390 // mozInlineSpellWordUtil::MakeRange
 391 //
 392 //    Convenience function for creating a range over the current document.
 393
 394 nsresult mozInlineSpellWordUtil::MakeRange(NodeOffset aBegin, NodeOffset aEnd,
 395                                            nsRange** aRange) const {
 396   NS_ENSURE_ARG_POINTER(aBegin.mNode);
 397   if (!mDocument) {
 398     return NS_ERROR_NOT_INITIALIZED;
 399   }
 400
 401   ErrorResult error;
 402   RefPtr<nsRange> range = nsRange::Create(aBegin.mNode, aBegin.mOffset,
 403                                           aEnd.mNode, aEnd.mOffset, error);
 404   if (NS_WARN_IF(error.Failed())) {
 405     return error.StealNSResult();
 406   }
 407   MOZ_ASSERT(range);
 408   range.forget(aRange);
 409   return NS_OK;
 410 }
 411
 412 // static
 413 already_AddRefed<nsRange> mozInlineSpellWordUtil::MakeRange(
 414     const NodeOffsetRange& aRange) {
 415   IgnoredErrorResult ignoredError;
 416   RefPtr<nsRange> range =
 417       nsRange::Create(aRange.Begin().Node(), aRange.Begin().Offset(),
 418                       aRange.End().Node(), aRange.End().Offset(), ignoredError);
 419   NS_WARNING_ASSERTION(!ignoredError.Failed(), "Creating a range failed");
 420   return range.forget();
 421 }
 422
 423 /*********** Word Splitting ************/
 424
 425 // classifies a given character in the DOM word
 426 enum CharClass {
 427   CHAR_CLASS_WORD,
 428   CHAR_CLASS_SEPARATOR,
 429   CHAR_CLASS_END_OF_INPUT
 430 };
 431
 432 // Encapsulates DOM-word to real-word splitting
 433 template <class T>
 434 struct MOZ_STACK_CLASS WordSplitState {
 435   const T& mDOMWordText;
 436   int32_t mDOMWordOffset;
 437   CharClass mCurCharClass;
 438
 439   explicit WordSplitState(const T& aString)
 440       : mDOMWordText(aString),
 441         mDOMWordOffset(0),
 442         mCurCharClass(CHAR_CLASS_END_OF_INPUT) {}
 443
 444   CharClass ClassifyCharacter(int32_t aIndex, bool aRecurse) const;
 445   void Advance();
 446   void AdvanceThroughSeparators();
 447   void AdvanceThroughWord();
 448
 449   // Finds special words like email addresses and URLs that may start at the
 450   // current position, and returns their length, or 0 if not found. This allows
 451   // arbitrary word breaking rules to be used for these special entities, as
 452   // long as they can not contain whitespace.
 453   bool IsSpecialWord() const;
 454
 455   // Similar to IsSpecialWord except that this takes a split word as
 456   // input. This checks for things that do not require special word-breaking
 457   // rules.
 458   bool ShouldSkipWord(int32_t aStart, int32_t aLength) const;
 459
 460   // Finds the last sequence of DOM word separators before aBeforeOffset and
 461   // returns the offset to its first element.
 462   Maybe<int32_t> FindOffsetOfLastDOMWordSeparatorSequence(
 463       int32_t aBeforeOffset) const;
 464
 465   char16_t GetUnicharAt(int32_t aIndex) const;
 466 };
 467
 468 // WordSplitState::ClassifyCharacter
 469 template <class T>
 470 CharClass WordSplitState<T>::ClassifyCharacter(int32_t aIndex,
 471                                                bool aRecurse) const {
 472   MOZ_ASSERT(aIndex >= 0 && aIndex <= int32_t(mDOMWordText.Length()),
 473              "Index out of range");
 474   if (aIndex == int32_t(mDOMWordText.Length())) return CHAR_CLASS_SEPARATOR;
 475
 476   // this will classify the character, we want to treat "ignorable" characters
 477   // such as soft hyphens, and also ZWJ and ZWNJ as word characters.
 478   nsUGenCategory charCategory =
 479       mozilla::unicode::GetGenCategory(GetUnicharAt(aIndex));
 480   if (charCategory == nsUGenCategory::kLetter ||
 481       IsIgnorableCharacter(mDOMWordText[aIndex]) ||
 482       mDOMWordText[aIndex] == 0x200C /* ZWNJ */ ||
 483       mDOMWordText[aIndex] == 0x200D /* ZWJ */)
 484     return CHAR_CLASS_WORD;
 485
 486   // If conditional punctuation is surrounded immediately on both sides by word
 487   // characters it also counts as a word character.
 488   if (IsConditionalPunctuation(mDOMWordText[aIndex])) {
 489     if (!aRecurse) {
 490       // not allowed to look around, this punctuation counts like a separator
 491       return CHAR_CLASS_SEPARATOR;
 492     }
 493
 494     // check the left-hand character
 495     if (aIndex == 0) return CHAR_CLASS_SEPARATOR;
 496     if (ClassifyCharacter(aIndex - 1, false) != CHAR_CLASS_WORD)
 497       return CHAR_CLASS_SEPARATOR;
 498     // If the previous charatcer is a word-char, make sure that it's not a
 499     // special dot character.
 500     if (mDOMWordText[aIndex - 1] == '.') return CHAR_CLASS_SEPARATOR;
 501
 502     // now we know left char is a word-char, check the right-hand character
 503     if (aIndex == int32_t(mDOMWordText.Length() - 1)) {
 504       return CHAR_CLASS_SEPARATOR;
 505     }
 506
 507     if (ClassifyCharacter(aIndex + 1, false) != CHAR_CLASS_WORD)
 508       return CHAR_CLASS_SEPARATOR;
 509     // If the next charatcer is a word-char, make sure that it's not a
 510     // special dot character.
 511     if (mDOMWordText[aIndex + 1] == '.') return CHAR_CLASS_SEPARATOR;
 512
 513     // char on either side is a word, this counts as a word
 514     return CHAR_CLASS_WORD;
 515   }
 516
 517   // The dot character, if appearing at the end of a word, should
 518   // be considered part of that word.  Example: "etc.", or
 519   // abbreviations
 520   if (aIndex > 0 && mDOMWordText[aIndex] == '.' &&
 521       mDOMWordText[aIndex - 1] != '.' &&
 522       ClassifyCharacter(aIndex - 1, false) != CHAR_CLASS_WORD) {
 523     return CHAR_CLASS_WORD;
 524   }
 525
 526   // all other punctuation
 527   if (charCategory == nsUGenCategory::kSeparator ||
 528       charCategory == nsUGenCategory::kOther ||
 529       charCategory == nsUGenCategory::kPunctuation ||
 530       charCategory == nsUGenCategory::kSymbol) {
 531     // Don't break on hyphens, as hunspell handles them on its own.
 532     if (aIndex > 0 && mDOMWordText[aIndex] == '-' &&
 533         mDOMWordText[aIndex - 1] != '-' &&
 534         ClassifyCharacter(aIndex - 1, false) == CHAR_CLASS_WORD) {
 535       // A hyphen is only meaningful as a separator inside a word
 536       // if the previous and next characters are a word character.
 537       if (aIndex == int32_t(mDOMWordText.Length()) - 1)
 538         return CHAR_CLASS_SEPARATOR;
 539       if (mDOMWordText[aIndex + 1] != '.' &&
 540           ClassifyCharacter(aIndex + 1, false) == CHAR_CLASS_WORD)
 541         return CHAR_CLASS_WORD;
 542     }
 543     return CHAR_CLASS_SEPARATOR;
 544   }
 545
 546   // any other character counts as a word
 547   return CHAR_CLASS_WORD;
 548 }
 549
 550 // WordSplitState::Advance
 551 template <class T>
 552 void WordSplitState<T>::Advance() {
 553   MOZ_ASSERT(mDOMWordOffset >= 0, "Negative word index");
 554   MOZ_ASSERT(mDOMWordOffset < (int32_t)mDOMWordText.Length(),
 555              "Length beyond end");
 556
 557   mDOMWordOffset++;
 558   if (mDOMWordOffset >= (int32_t)mDOMWordText.Length())
 559     mCurCharClass = CHAR_CLASS_END_OF_INPUT;
 560   else
 561     mCurCharClass = ClassifyCharacter(mDOMWordOffset, true);
 562 }
 563
 564 // WordSplitState::AdvanceThroughSeparators
 565 template <class T>
 566 void WordSplitState<T>::AdvanceThroughSeparators() {
 567   while (mCurCharClass == CHAR_CLASS_SEPARATOR) Advance();
 568 }
 569
 570 // WordSplitState::AdvanceThroughWord
 571 template <class T>
 572 void WordSplitState<T>::AdvanceThroughWord() {
 573   while (mCurCharClass == CHAR_CLASS_WORD) Advance();
 574 }
 575
 576 // WordSplitState::IsSpecialWord
 577 template <class T>
 578 bool WordSplitState<T>::IsSpecialWord() const {
 579   // Search for email addresses. We simply define these as any sequence of
 580   // characters with an '@' character in the middle. The DOM word is already
 581   // split on whitepace, so we know that everything to the end is the address
 582   int32_t firstColon = -1;
 583   for (int32_t i = mDOMWordOffset; i < int32_t(mDOMWordText.Length()); i++) {
 584     if (mDOMWordText[i] == '@') {
 585       // only accept this if there are unambiguous word characters (don't bother
 586       // recursing to disambiguate apostrophes) on each side. This prevents
 587       // classifying, e.g. "@home" as an email address
 588
 589       // Use this condition to only accept words with '@' in the middle of
 590       // them. It works, but the inlinespellcker doesn't like this. The problem
 591       // is that you type "fhsgfh@" that's a misspelled word followed by a
 592       // symbol, but when you type another letter "fhsgfh@g" that first word
 593       // need to be unmarked misspelled. It doesn't do this. it only checks the
 594       // current position for potentially removing a spelling range.
 595       if (i > 0 && ClassifyCharacter(i - 1, false) == CHAR_CLASS_WORD &&
 596           i < (int32_t)mDOMWordText.Length() - 1 &&
 597           ClassifyCharacter(i + 1, false) == CHAR_CLASS_WORD) {
 598         return true;
 599       }
 600     } else if (mDOMWordText[i] == ':' && firstColon < 0) {
 601       firstColon = i;
 602
 603       // If the first colon is followed by a slash, consider it a URL
 604       // This will catch things like asdf://foo.com
 605       if (firstColon < (int32_t)mDOMWordText.Length() - 1 &&
 606           mDOMWordText[firstColon + 1] == '/') {
 607         return true;
 608       }
 609     }
 610   }
 611
 612   // Check the text before the first colon against some known protocols. It
 613   // is impossible to check against all protocols, especially since you can
 614   // plug in new protocols. We also don't want to waste time here checking
 615   // against a lot of obscure protocols.
 616   if (firstColon > mDOMWordOffset) {
 617     nsString protocol(
 618         Substring(mDOMWordText, mDOMWordOffset, firstColon - mDOMWordOffset));
 619     if (protocol.EqualsIgnoreCase("http") ||
 620         protocol.EqualsIgnoreCase("https") ||
 621         protocol.EqualsIgnoreCase("news") ||
 622         protocol.EqualsIgnoreCase("file") ||
 623         protocol.EqualsIgnoreCase("javascript") ||
 624         protocol.EqualsIgnoreCase("data") || protocol.EqualsIgnoreCase("ftp")) {
 625       return true;
 626     }
 627   }
 628
 629   // not anything special
 630   return false;
 631 }
 632
 633 // WordSplitState::ShouldSkipWord
 634 template <class T>
 635 bool WordSplitState<T>::ShouldSkipWord(int32_t aStart, int32_t aLength) const {
 636   int32_t last = aStart + aLength;
 637
 638   // check to see if the word contains a digit
 639   for (int32_t i = aStart; i < last; i++) {
 640     if (mozilla::unicode::GetGenCategory(GetUnicharAt(i)) ==
 641         nsUGenCategory::kNumber) {
 642       return true;
 643     }
 644   }
 645
 646   // not special
 647   return false;
 648 }
 649
 650 template <class T>
 651 Maybe<int32_t> WordSplitState<T>::FindOffsetOfLastDOMWordSeparatorSequence(
 652     const int32_t aBeforeOffset) const {
 653   for (int32_t i = aBeforeOffset - 1; i >= 0; --i) {
 654     if (IsDOMWordSeparator(mDOMWordText[i]) ||
 655         (!IsAmbiguousDOMWordSeprator(mDOMWordText[i]) &&
 656          ClassifyCharacter(i, true) == CHAR_CLASS_SEPARATOR)) {
 657       // Be greedy, find as many separators as we can
 658       for (int32_t j = i - 1; j >= 0; --j) {
 659         if (IsDOMWordSeparator(mDOMWordText[j]) ||
 660             (!IsAmbiguousDOMWordSeprator(mDOMWordText[j]) &&
 661              ClassifyCharacter(j, true) == CHAR_CLASS_SEPARATOR)) {
 662           i = j;
 663         } else {
 664           break;
 665         }
 666       }
 667       return Some(i);
 668     }
 669   }
 670   return Nothing();
 671 }
 672
 673 template <>
 674 char16_t WordSplitState<nsDependentSubstring>::GetUnicharAt(
 675     int32_t aIndex) const {
 676   return mDOMWordText[aIndex];
 677 }
 678
 679 template <>
 680 char16_t WordSplitState<nsDependentCSubstring>::GetUnicharAt(
 681     int32_t aIndex) const {
 682   return static_cast<char16_t>(static_cast<uint8_t>(mDOMWordText[aIndex]));
 683 }
 684
 685 static inline bool IsBRElement(nsINode* aNode) {
 686   return aNode->IsHTMLElement(nsGkAtoms::br);
 687 }
 688
 689 /**
 690  * Given a TextNode, finds the last sequence of DOM word separators before
 691  * aBeforeOffset and returns the offset to its first element.
 692  *
 693  * @param aContent the TextNode to check.
 694  * @param aBeforeOffset the offset in the TextNode before which we will search
 695  *        for the DOM separator. You can pass INT32_MAX to search the entire
 696  *        length of the string.
 697  */
 698 static Maybe<int32_t> FindOffsetOfLastDOMWordSeparatorSequence(
 699     nsIContent* aContent, int32_t aBeforeOffset) {
 700   const nsTextFragment* textFragment = aContent->GetText();
 701   MOZ_ASSERT(textFragment, "Where is our text?");
 702   int32_t end = std::min(aBeforeOffset, int32_t(textFragment->GetLength()));
 703
 704   if (textFragment->Is2b()) {
 705     nsDependentSubstring targetText(textFragment->Get2b(), end);
 706     WordSplitState<nsDependentSubstring> state(targetText);
 707     return state.FindOffsetOfLastDOMWordSeparatorSequence(end);
 708   }
 709
 710   nsDependentCSubstring targetText(textFragment->Get1b(), end);
 711   WordSplitState<nsDependentCSubstring> state(targetText);
 712   return state.FindOffsetOfLastDOMWordSeparatorSequence(end);
 713 }
 714
 715 /**
 716  * Check if there's a DOM word separator before aBeforeOffset in this node.
 717  * Always returns true if it's a BR element.
 718  * aSeparatorOffset is set to the index of the first character in the last
 719  * separator if any is found (0 for BR elements).
 720  *
 721  * This function does not modify aSeparatorOffset when it returns false.
 722  */
 723 static bool ContainsDOMWordSeparator(nsINode* aNode, int32_t aBeforeOffset,
 724                                      int32_t* aSeparatorOffset) {
 725   if (IsBRElement(aNode)) {
 726     *aSeparatorOffset = 0;
 727     return true;
 728   }
 729
 730   if (!IsSpellCheckingTextNode(aNode)) return false;
 731
 732   const Maybe<int32_t> separatorOffset =
 733       FindOffsetOfLastDOMWordSeparatorSequence(aNode->AsContent(),
 734                                                aBeforeOffset);
 735   if (separatorOffset) {
 736     *aSeparatorOffset = *separatorOffset;
 737     return true;
 738   }
 739
 740   return false;
 741 }
 742
 743 static bool IsBreakElement(nsINode* aNode) {
 744   if (!aNode->IsElement()) {
 745     return false;
 746   }
 747
 748   dom::Element* element = aNode->AsElement();
 749   if (element->IsHTMLElement(nsGkAtoms::br)) {
 750     return true;
 751   }
 752
 753   // If we don't have a frame, we don't consider ourselves a break
 754   // element.  In particular, words can span us.
 755   nsIFrame* frame = element->GetPrimaryFrame();
 756   if (!frame) {
 757     return false;
 758   }
 759
 760   auto* disp = frame->StyleDisplay();
 761   // Anything that's not an inline element is a break element.
 762   // XXXbz should replaced inlines be break elements, though?
 763   // Also should inline-block and such be break elements?
 764   //
 765   // FIXME(emilio): We should teach the spell checker to deal with generated
 766   // content (it doesn't at all), then remove the IsListItem() check, as there
 767   // could be no marker, etc...
 768   return !disp->IsInlineFlow() || disp->IsListItem();
 769 }
 770
 771 struct CheckLeavingBreakElementClosure {
 772   bool mLeftBreakElement;
 773 };
 774
 775 static void CheckLeavingBreakElement(nsINode* aNode, void* aClosure) {
 776   CheckLeavingBreakElementClosure* cl =
 777       static_cast<CheckLeavingBreakElementClosure*>(aClosure);
 778   if (!cl->mLeftBreakElement && IsBreakElement(aNode)) {
 779     cl->mLeftBreakElement = true;
 780   }
 781 }
 782
 783 void mozInlineSpellWordUtil::NormalizeWord(nsAString& aWord) {
 784   nsAutoString result;
 785   ::NormalizeWord(aWord, 0, aWord.Length(), result);
 786   aWord = result;
 787 }
 788
 789 void mozInlineSpellWordUtil::SoftText::AdjustBeginAndBuildText(
 790     NodeOffset aBegin, NodeOffset aEnd, const nsINode* aRootNode) {
 791   MOZ_LOG(sInlineSpellWordUtilLog, LogLevel::Debug, ("%s", __FUNCTION__));
 792
 793   mBegin = std::move(aBegin);
 794   mEnd = std::move(aEnd);
 795
 796   // First we have to work backwards from mBegin to find a text node
 797   // containing a DOM word separator, a non-inline-element
 798   // boundary, or the hard start node. That's where we'll start building the
 799   // soft string from.
 800   nsINode* node = mBegin.mNode;
 801   int32_t firstOffsetInNode = 0;
 802   int32_t checkBeforeOffset = mBegin.mOffset;
 803   while (node) {
 804     if (ContainsDOMWordSeparator(node, checkBeforeOffset, &firstOffsetInNode)) {
 805       if (node == mBegin.mNode) {
 806         // If we find a word separator on the first node, look at the preceding
 807         // word on the text node as well.
 808         if (firstOffsetInNode > 0) {
 809           // Try to find the previous word boundary in the current node. If
 810           // we can't find one, start checking previous sibling nodes (if any
 811           // adjacent ones exist) to see if we can find any text nodes with
 812           // DOM word separators. We bail out as soon as we see a node that is
 813           // not a text node, or we run out of previous sibling nodes. In the
 814           // event that we simply cannot find any preceding word separator, the
 815           // offset is set to 0, and the soft text beginning node is set to the
 816           // "most previous" text node before the original starting node, or
 817           // kept at the original starting node if no previous text nodes exist.
 818           int32_t newOffset = 0;
 819           if (!ContainsDOMWordSeparator(node, firstOffsetInNode - 1,
 820                                         &newOffset)) {
 821             nsIContent* prevNode = node->GetPreviousSibling();
 822             while (prevNode && IsSpellCheckingTextNode(prevNode)) {
 823               mBegin.mNode = prevNode;
 824               const Maybe<int32_t> separatorOffset =
 825                   FindOffsetOfLastDOMWordSeparatorSequence(prevNode, INT32_MAX);
 826               if (separatorOffset) {
 827                 newOffset = *separatorOffset;
 828                 break;
 829               }
 830               prevNode = prevNode->GetPreviousSibling();
 831             }
 832           }
 833           firstOffsetInNode = newOffset;
 834         } else {
 835           firstOffsetInNode = 0;
 836         }
 837
 838         MOZ_LOG(sInlineSpellWordUtilLog, LogLevel::Debug,
 839                 ("%s: adjusting mBegin.mOffset from %i to %i.", __FUNCTION__,
 840                  mBegin.mOffset, firstOffsetInNode));
 841         mBegin.mOffset = firstOffsetInNode;
 842       }
 843       break;
 844     }
 845     checkBeforeOffset = INT32_MAX;
 846     if (IsBreakElement(node)) {
 847       // Since GerPrevNode follows tree *preorder*, we're about to traverse up
 848       // out of 'node'. Since node induces breaks (e.g., it's a block), don't
 849       // bother trying to look outside it, just stop now.
 850       break;
 851     }
 852     // GetPreviousContent below expects aRootNode to be an ancestor of node.
 853     if (!node->IsInclusiveDescendantOf(aRootNode)) {
 854       break;
 855     }
 856     node = node->GetPrevNode(aRootNode);
 857   }
 858
 859   // Now build up the string moving forward through the DOM until we reach
 860   // the soft end and *then* see a DOM word separator, a non-inline-element
 861   // boundary, or the hard end node.
 862   mValue.Truncate();
 863   mDOMMapping.Clear();
 864   bool seenSoftEnd = false;
 865   // Leave this outside the loop so large heap string allocations can be reused
 866   // across iterations
 867   while (node) {
 868     if (node == mEnd.mNode) {
 869       seenSoftEnd = true;
 870     }
 871
 872     bool exit = false;
 873     if (IsSpellCheckingTextNode(node)) {
 874       nsIContent* content = static_cast<nsIContent*>(node);
 875       MOZ_ASSERT(content, "Where is our content?");
 876       const nsTextFragment* textFragment = content->GetText();
 877       MOZ_ASSERT(textFragment, "Where is our text?");
 878       uint32_t lastOffsetInNode = textFragment->GetLength();
 879
 880       if (seenSoftEnd) {
 881         // check whether we can stop after this
 882         for (uint32_t i =
 883                  node == mEnd.mNode ? AssertedCast<uint32_t>(mEnd.mOffset) : 0;
 884              i < textFragment->GetLength(); ++i) {
 885           if (IsDOMWordSeparator(textFragment->CharAt(i))) {
 886             exit = true;
 887             // stop at the first separator after the soft end point
 888             lastOffsetInNode = i;
 889             break;
 890           }
 891         }
 892       }
 893
 894       if (firstOffsetInNode >= 0 &&
 895           static_cast<uint32_t>(firstOffsetInNode) < lastOffsetInNode) {
 896         const uint32_t len = lastOffsetInNode - firstOffsetInNode;
 897         mDOMMapping.AppendElement(DOMTextMapping(
 898             NodeOffset(node, firstOffsetInNode), mValue.Length(), len));
 899
 900         const bool ok = textFragment->AppendTo(
 901             mValue, static_cast<uint32_t>(firstOffsetInNode), len,
 902             mozilla::fallible);
 903         if (!ok) {
 904           // probably out of memory, remove from mDOMMapping
 905           mDOMMapping.RemoveLastElement();
 906           exit = true;
 907         }
 908       }
 909
 910       firstOffsetInNode = 0;
 911     }
 912
 913     if (exit) break;
 914
 915     CheckLeavingBreakElementClosure closure = {false};
 916     node = FindNextNode(node, aRootNode, CheckLeavingBreakElement, &closure);
 917     if (closure.mLeftBreakElement || (node && IsBreakElement(node))) {
 918       // We left, or are entering, a break element (e.g., block). Maybe we can
 919       // stop now.
 920       if (seenSoftEnd) break;
 921       // Record the break
 922       mValue.Append(' ');
 923     }
 924   }
 925
 926   MOZ_LOG(sInlineSpellWordUtilLog, LogLevel::Debug,
 927           ("%s: got DOM string: %s", __FUNCTION__,
 928            NS_ConvertUTF16toUTF8(mValue).get()));
 929 }
 930
 931 auto mozInlineSpellWordUtil::BuildRealWords() const
 932     -> Result<RealWords, nsresult> {
 933   // This is pretty simple. We just have to walk mSoftText.GetValue(),
 934   // tokenizing it into "real words". We do an outer traversal of words
 935   // delimited by IsDOMWordSeparator, calling SplitDOMWordAndAppendTo on each of
 936   // those DOM words
 937   int32_t wordStart = -1;
 938   RealWords realWords;
 939   for (int32_t i = 0; i < int32_t(mSoftText.GetValue().Length()); ++i) {
 940     if (IsDOMWordSeparator(mSoftText.GetValue().CharAt(i))) {
 941       if (wordStart >= 0) {
 942         nsresult rv = SplitDOMWordAndAppendTo(wordStart, i, realWords);
 943         if (NS_FAILED(rv)) {
 944           return Err(rv);
 945         }
 946         wordStart = -1;
 947       }
 948     } else {
 949       if (wordStart < 0) {
 950         wordStart = i;
 951       }
 952     }
 953   }
 954   if (wordStart >= 0) {
 955     nsresult rv = SplitDOMWordAndAppendTo(
 956         wordStart, mSoftText.GetValue().Length(), realWords);
 957     if (NS_FAILED(rv)) {
 958       return Err(rv);
 959     }
 960   }
 961
 962   return realWords;
 963 }
 964
 965 /*********** DOM/realwords<->mSoftText.GetValue() mapping functions
 966  * ************/
 967
 968 int32_t mozInlineSpellWordUtil::MapDOMPositionToSoftTextOffset(
 969     const NodeOffset& aNodeOffset) const {
 970   if (!mSoftText.mIsValid) {
 971     NS_ERROR("Soft text must be valid if we're to map into it");
 972     return -1;
 973   }
 974
 975   for (int32_t i = 0; i < int32_t(mSoftText.GetDOMMapping().Length()); ++i) {
 976     const DOMTextMapping& map = mSoftText.GetDOMMapping()[i];
 977     if (map.mNodeOffset.mNode == aNodeOffset.mNode) {
 978       // Allow offsets at either end of the string, in particular, allow the
 979       // offset that's at the end of the contributed string
 980       int32_t offsetInContributedString =
 981           aNodeOffset.mOffset - map.mNodeOffset.mOffset;
 982       if (offsetInContributedString >= 0 &&
 983           offsetInContributedString <= map.mLength)
 984         return map.mSoftTextOffset + offsetInContributedString;
 985       return -1;
 986     }
 987   }
 988   return -1;
 989 }
 990
 991 namespace {
 992
 993 template <class T>
 994 class FirstLargerOffset {
 995   int32_t mSoftTextOffset;
 996
 997  public:
 998   explicit FirstLargerOffset(int32_t aSoftTextOffset)
 999       : mSoftTextOffset(aSoftTextOffset) {}
1000   int operator()(const T& t) const {
1001     // We want the first larger offset, so never return 0 (which would
1002     // short-circuit evaluation before finding the last such offset).
1003     return mSoftTextOffset < t.mSoftTextOffset ? -1 : 1;
1004   }
1005 };
1006
1007 template <class T>
1008 bool FindLastNongreaterOffset(const nsTArray<T>& aContainer,
1009                               int32_t aSoftTextOffset, size_t* aIndex) {
1010   if (aContainer.Length() == 0) {
1011     return false;
1012   }
1013
1014   BinarySearchIf(aContainer, 0, aContainer.Length(),
1015                  FirstLargerOffset<T>(aSoftTextOffset), aIndex);
1016   if (*aIndex > 0) {
1017     // There was at least one mapping with offset <= aSoftTextOffset. Step back
1018     // to find the last element with |mSoftTextOffset <= aSoftTextOffset|.
1019     *aIndex -= 1;
1020   } else {
1021     // Every mapping had offset greater than aSoftTextOffset.
1022     MOZ_ASSERT(aContainer[*aIndex].mSoftTextOffset > aSoftTextOffset);
1023   }
1024   return true;
1025 }
1026
1027 }  // namespace
1028
1029 NodeOffset mozInlineSpellWordUtil::MapSoftTextOffsetToDOMPosition(
1030     int32_t aSoftTextOffset, DOMMapHint aHint) const {
1031   MOZ_ASSERT(mSoftText.mIsValid,
1032              "Soft text must be valid if we're to map out of it");
1033   if (!mSoftText.mIsValid) return NodeOffset(nullptr, -1);
1034
1035   // Find the last mapping, if any, such that mSoftTextOffset <= aSoftTextOffset
1036   size_t index;
1037   bool found = FindLastNongreaterOffset(mSoftText.GetDOMMapping(),
1038                                         aSoftTextOffset, &index);
1039   if (!found) {
1040     return NodeOffset(nullptr, -1);
1041   }
1042
1043   // 'index' is now the last mapping, if any, such that
1044   // mSoftTextOffset <= aSoftTextOffset.
1045   // If we're doing HINT_END, then we may want to return the end of the
1046   // the previous mapping instead of the start of this mapping
1047   if (aHint == HINT_END && index > 0) {
1048     const DOMTextMapping& map = mSoftText.GetDOMMapping()[index - 1];
1049     if (map.mSoftTextOffset + map.mLength == aSoftTextOffset)
1050       return NodeOffset(map.mNodeOffset.mNode,
1051                         map.mNodeOffset.mOffset + map.mLength);
1052   }
1053
1054   // We allow ourselves to return the end of this mapping even if we're
1055   // doing HINT_START. This will only happen if there is no mapping which this
1056   // point is the start of. I'm not 100% sure this is OK...
1057   const DOMTextMapping& map = mSoftText.GetDOMMapping()[index];
1058   int32_t offset = aSoftTextOffset - map.mSoftTextOffset;
1059   if (offset >= 0 && offset <= map.mLength)
1060     return NodeOffset(map.mNodeOffset.mNode, map.mNodeOffset.mOffset + offset);
1061
1062   return NodeOffset(nullptr, -1);
1063 }
1064
1065 // static
1066 void mozInlineSpellWordUtil::ToString(const DOMMapHint aHint,
1067                                       nsACString& aResult) {
1068   switch (aHint) {
1069     case HINT_BEGIN:
1070       aResult.AssignLiteral("begin");
1071       break;
1072     case HINT_END:
1073       aResult.AssignLiteral("end");
1074       break;
1075   }
1076 }
1077
1078 int32_t mozInlineSpellWordUtil::FindRealWordContaining(
1079     int32_t aSoftTextOffset, DOMMapHint aHint, bool aSearchForward) const {
1080   if (MOZ_LOG_TEST(sInlineSpellWordUtilLog, LogLevel::Debug)) {
1081     nsAutoCString hint;
1082     mozInlineSpellWordUtil::ToString(aHint, hint);
1083
1084     MOZ_LOG(
1085         sInlineSpellWordUtilLog, LogLevel::Debug,
1086         ("%s: offset=%i, hint=%s, searchForward=%i.", __FUNCTION__,
1087          aSoftTextOffset, hint.get(), static_cast<int32_t>(aSearchForward)));
1088   }
1089
1090   MOZ_ASSERT(mSoftText.mIsValid,
1091              "Soft text must be valid if we're to map out of it");
1092   if (!mSoftText.mIsValid) return -1;
1093
1094   // Find the last word, if any, such that mRealWords[index].mSoftTextOffset
1095   // <= aSoftTextOffset
1096   size_t index;
1097   bool found = FindLastNongreaterOffset(mRealWords, aSoftTextOffset, &index);
1098   if (!found) {
1099     return -1;
1100   }
1101
1102   // 'index' is now the last word, if any, such that
1103   // mSoftTextOffset <= aSoftTextOffset.
1104   // If we're doing HINT_END, then we may want to return the end of the
1105   // the previous word instead of the start of this word
1106   if (aHint == HINT_END && index > 0) {
1107     const RealWord& word = mRealWords[index - 1];
1108     if (word.EndOffset() == aSoftTextOffset) {
1109       return index - 1;
1110     }
1111   }
1112
1113   // We allow ourselves to return the end of this word even if we're
1114   // doing HINT_BEGIN. This will only happen if there is no word which this
1115   // point is the start of. I'm not 100% sure this is OK...
1116   const RealWord& word = mRealWords[index];
1117   int32_t offset = aSoftTextOffset - word.mSoftTextOffset;
1118   if (offset >= 0 && offset <= static_cast<int32_t>(word.mLength)) return index;
1119
1120   if (aSearchForward) {
1121     if (mRealWords[0].mSoftTextOffset > aSoftTextOffset) {
1122       // All words have mSoftTextOffset > aSoftTextOffset
1123       return 0;
1124     }
1125     // 'index' is the last word such that mSoftTextOffset <= aSoftTextOffset.
1126     // Word index+1, if it exists, will be the first with
1127     // mSoftTextOffset > aSoftTextOffset.
1128     if (index + 1 < mRealWords.Length()) return index + 1;
1129   }
1130
1131   return -1;
1132 }
1133
1134 // mozInlineSpellWordUtil::SplitDOMWordAndAppendTo
1135
1136 nsresult mozInlineSpellWordUtil::SplitDOMWordAndAppendTo(
1137     int32_t aStart, int32_t aEnd, nsTArray<RealWord>& aRealWords) const {
1138   nsDependentSubstring targetText(mSoftText.GetValue(), aStart, aEnd - aStart);
1139   WordSplitState<nsDependentSubstring> state(targetText);
1140   state.mCurCharClass = state.ClassifyCharacter(0, true);
1141
1142   state.AdvanceThroughSeparators();
1143   if (state.mCurCharClass != CHAR_CLASS_END_OF_INPUT && state.IsSpecialWord()) {
1144     int32_t specialWordLength =
1145         state.mDOMWordText.Length() - state.mDOMWordOffset;
1146     if (!aRealWords.AppendElement(
1147             RealWord(aStart + state.mDOMWordOffset, specialWordLength, false),
1148             fallible)) {
1149       return NS_ERROR_OUT_OF_MEMORY;
1150     }
1151
1152     return NS_OK;
1153   }
1154
1155   while (state.mCurCharClass != CHAR_CLASS_END_OF_INPUT) {
1156     state.AdvanceThroughSeparators();
1157     if (state.mCurCharClass == CHAR_CLASS_END_OF_INPUT) break;
1158
1159     // save the beginning of the word
1160     int32_t wordOffset = state.mDOMWordOffset;
1161
1162     // find the end of the word
1163     state.AdvanceThroughWord();
1164     int32_t wordLen = state.mDOMWordOffset - wordOffset;
1165     if (!aRealWords.AppendElement(
1166             RealWord(aStart + wordOffset, wordLen,
1167                      !state.ShouldSkipWord(wordOffset, wordLen)),
1168             fallible)) {
1169       return NS_ERROR_OUT_OF_MEMORY;
1170     }
1171   }
1172
1173   return NS_OK;
1174 }