extensions/spellcheck/src/mozInlineSpellWordUtil.cpp

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* This Source Code Form is subject to the terms of the Mozilla Public
   3  * License, v. 2.0. If a copy of the MPL was not distributed with this
   4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   5
   6 #include "mozInlineSpellWordUtil.h"
   7
   8 #include <algorithm>
   9 #include <utility>
  10
  11 #include "mozilla/BinarySearch.h"
  12 #include "mozilla/HTMLEditor.h"
  13 #include "mozilla/Logging.h"
  14 #include "mozilla/TextEditor.h"
  15 #include "mozilla/dom/Element.h"
  16
  17 #include "nsDebug.h"
  18 #include "nsAtom.h"
  19 #include "nsComponentManagerUtils.h"
  20 #include "nsUnicodeProperties.h"
  21 #include "nsServiceManagerUtils.h"
  22 #include "nsIContent.h"
  23 #include "nsTextFragment.h"
  24 #include "nsRange.h"
  25 #include "nsContentUtils.h"
  26 #include "nsIFrame.h"
  27
  28 using namespace mozilla;
  29
  30 static LazyLogModule sInlineSpellWordUtilLog{"InlineSpellWordUtil"};
  31
  32 // IsIgnorableCharacter
  33 //
  34 //    These characters are ones that we should ignore in input.
  35
  36 inline bool IsIgnorableCharacter(char ch) {
  37   return (ch == static_cast<char>(0xAD));  // SOFT HYPHEN
  38 }
  39
  40 inline bool IsIgnorableCharacter(char16_t ch) {
  41   return (ch == 0xAD ||   // SOFT HYPHEN
  42           ch == 0x1806);  // MONGOLIAN TODO SOFT HYPHEN
  43 }
  44
  45 // IsConditionalPunctuation
  46 //
  47 //    Some characters (like apostrophes) require characters on each side to be
  48 //    part of a word, and are otherwise punctuation.
  49
  50 inline bool IsConditionalPunctuation(char ch) {
  51   return (ch == '\'' ||                    // RIGHT SINGLE QUOTATION MARK
  52           ch == static_cast<char>(0xB7));  // MIDDLE DOT
  53 }
  54
  55 inline bool IsConditionalPunctuation(char16_t ch) {
  56   return (ch == '\'' || ch == 0x2019 ||  // RIGHT SINGLE QUOTATION MARK
  57           ch == 0x00B7);                 // MIDDLE DOT
  58 }
  59
  60 static bool IsAmbiguousDOMWordSeprator(char16_t ch) {
  61   // This class may be CHAR_CLASS_SEPARATOR, but it depends on context.
  62   return (ch == '@' || ch == ':' || ch == '.' || ch == '/' || ch == '-' ||
  63           IsConditionalPunctuation(ch));
  64 }
  65
  66 static bool IsAmbiguousDOMWordSeprator(char ch) {
  67   // This class may be CHAR_CLASS_SEPARATOR, but it depends on context.
  68   return IsAmbiguousDOMWordSeprator(static_cast<char16_t>(ch));
  69 }
  70
  71 // IsDOMWordSeparator
  72 //
  73 //    Determines if the given character should be considered as a DOM Word
  74 //    separator. Basically, this is whitespace, although it could also have
  75 //    certain punctuation that we know ALWAYS breaks words. This is important.
  76 //    For example, we can't have any punctuation that could appear in a URL
  77 //    or email address in this, because those need to always fit into a single
  78 //    DOM word.
  79
  80 static bool IsDOMWordSeparator(char ch) {
  81   // simple spaces or no-break space
  82   return (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' ||
  83           ch == static_cast<char>(0xA0));
  84 }
  85
  86 static bool IsDOMWordSeparator(char16_t ch) {
  87   // simple spaces
  88   if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') return true;
  89
  90   // complex spaces - check only if char isn't ASCII (uncommon)
  91   if (ch >= 0xA0 && (ch == 0x00A0 ||  // NO-BREAK SPACE
  92                      ch == 0x2002 ||  // EN SPACE
  93                      ch == 0x2003 ||  // EM SPACE
  94                      ch == 0x2009 ||  // THIN SPACE
  95                      ch == 0x3000))   // IDEOGRAPHIC SPACE
  96     return true;
  97
  98   // otherwise not a space
  99   return false;
 100 }
 101
 102 // static
 103 Maybe<mozInlineSpellWordUtil> mozInlineSpellWordUtil::Create(
 104     const TextEditor& aTextEditor) {
 105   dom::Document* document = aTextEditor.GetDocument();
 106   if (NS_WARN_IF(!document)) {
 107     return Nothing();
 108   }
 109
 110   const bool isContentEditableOrDesignMode = !!aTextEditor.AsHTMLEditor();
 111
 112   // Find the root node for the editor. For contenteditable the mRootNode could
 113   // change to shadow root if the begin and end are inside the shadowDOM.
 114   nsINode* rootNode = aTextEditor.GetRoot();
 115   if (NS_WARN_IF(!rootNode)) {
 116     return Nothing();
 117   }
 118
 119   mozInlineSpellWordUtil util{*document, isContentEditableOrDesignMode,
 120                               *rootNode};
 121   return Some(std::move(util));
 122 }
 123
 124 static inline bool IsSpellCheckingTextNode(nsINode* aNode) {
 125   nsIContent* parent = aNode->GetParent();
 126   if (parent &&
 127       parent->IsAnyOfHTMLElements(nsGkAtoms::script, nsGkAtoms::style))
 128     return false;
 129   return aNode->IsText();
 130 }
 131
 132 typedef void (*OnLeaveNodeFunPtr)(nsINode* aNode, void* aClosure);
 133
 134 // Find the next node in the DOM tree in preorder.
 135 // Calls OnLeaveNodeFunPtr when the traversal leaves a node, which is
 136 // why we can't just use GetNextNode here, sadly.
 137 static nsINode* FindNextNode(nsINode* aNode, const nsINode* aRoot,
 138                              OnLeaveNodeFunPtr aOnLeaveNode, void* aClosure) {
 139   MOZ_ASSERT(aNode, "Null starting node?");
 140
 141   nsINode* next = aNode->GetFirstChild();
 142   if (next) return next;
 143
 144   // Don't look at siblings or otherwise outside of aRoot
 145   if (aNode == aRoot) return nullptr;
 146
 147   next = aNode->GetNextSibling();
 148   if (next) return next;
 149
 150   // Go up
 151   for (;;) {
 152     if (aOnLeaveNode) {
 153       aOnLeaveNode(aNode, aClosure);
 154     }
 155
 156     next = aNode->GetParent();
 157     if (next == aRoot || !next) return nullptr;
 158     aNode = next;
 159
 160     next = aNode->GetNextSibling();
 161     if (next) return next;
 162   }
 163 }
 164
 165 // aNode is not a text node. Find the first text node starting at aNode/aOffset
 166 // in a preorder DOM traversal.
 167 static nsINode* FindNextTextNode(nsINode* aNode, int32_t aOffset,
 168                                  const nsINode* aRoot) {
 169   MOZ_ASSERT(aNode, "Null starting node?");
 170   NS_ASSERTION(!IsSpellCheckingTextNode(aNode),
 171                "FindNextTextNode should start with a non-text node");
 172
 173   nsINode* checkNode;
 174   // Need to start at the aOffset'th child
 175   nsIContent* child = aNode->GetChildAt_Deprecated(aOffset);
 176
 177   if (child) {
 178     checkNode = child;
 179   } else {
 180     // aOffset was beyond the end of the child list.
 181     // goto next node after the last descendant of aNode in
 182     // a preorder DOM traversal.
 183     checkNode = aNode->GetNextNonChildNode(aRoot);
 184   }
 185
 186   while (checkNode && !IsSpellCheckingTextNode(checkNode)) {
 187     checkNode = checkNode->GetNextNode(aRoot);
 188   }
 189   return checkNode;
 190 }
 191
 192 // mozInlineSpellWordUtil::SetPositionAndEnd
 193 //
 194 //    We have two ranges "hard" and "soft". The hard boundary is simply
 195 //    the scope of the root node. The soft boundary is that which is set
 196 //    by the caller of this class by calling this function. If this function is
 197 //    not called, the soft boundary is the same as the hard boundary.
 198 //
 199 //    When we reach the soft boundary (mSoftEnd), we keep
 200 //    going until we reach the end of a word. This allows the caller to set the
 201 //    end of the range to anything, and we will always check whole multiples of
 202 //    words. When we reach the hard boundary we stop no matter what.
 203 //
 204 //    There is no beginning soft boundary. This is because we only go to the
 205 //    previous node once, when finding the previous word boundary in
 206 //    SetPosition(). You might think of the soft boundary as being this initial
 207 //    position.
 208
 209 nsresult mozInlineSpellWordUtil::SetPositionAndEnd(nsINode* aPositionNode,
 210                                                    int32_t aPositionOffset,
 211                                                    nsINode* aEndNode,
 212                                                    int32_t aEndOffset) {
 213   MOZ_LOG(sInlineSpellWordUtilLog, LogLevel::Debug,
 214           ("%s: pos=(%p, %i), end=(%p, %i)", __FUNCTION__, aPositionNode,
 215            aPositionOffset, aEndNode, aEndOffset));
 216
 217   MOZ_ASSERT(aPositionNode, "Null begin node?");
 218   MOZ_ASSERT(aEndNode, "Null end node?");
 219
 220   NS_ASSERTION(mRootNode, "Not initialized");
 221
 222   // Find a appropriate root if we are dealing with contenteditable nodes which
 223   // are in the shadow DOM.
 224   if (mIsContentEditableOrDesignMode) {
 225     nsINode* rootNode = aPositionNode->SubtreeRoot();
 226     if (rootNode != aEndNode->SubtreeRoot()) {
 227       return NS_ERROR_FAILURE;
 228     }
 229
 230     if (mozilla::dom::ShadowRoot::FromNode(rootNode)) {
 231       mRootNode = rootNode;
 232     }
 233   }
 234
 235   InvalidateWords();
 236
 237   if (!IsSpellCheckingTextNode(aPositionNode)) {
 238     // Start at the start of the first text node after aNode/aOffset.
 239     aPositionNode = FindNextTextNode(aPositionNode, aPositionOffset, mRootNode);
 240     aPositionOffset = 0;
 241   }
 242   mSoftBegin = NodeOffset(aPositionNode, aPositionOffset);
 243
 244   if (!IsSpellCheckingTextNode(aEndNode)) {
 245     // End at the start of the first text node after aEndNode/aEndOffset.
 246     aEndNode = FindNextTextNode(aEndNode, aEndOffset, mRootNode);
 247     aEndOffset = 0;
 248   }
 249   mSoftEnd = NodeOffset(aEndNode, aEndOffset);
 250
 251   nsresult rv = EnsureWords();
 252   if (NS_FAILED(rv)) {
 253     return rv;
 254   }
 255
 256   int32_t textOffset = MapDOMPositionToSoftTextOffset(mSoftBegin);
 257   if (textOffset < 0) {
 258     return NS_OK;
 259   }
 260
 261   mNextWordIndex = FindRealWordContaining(textOffset, HINT_END, true);
 262   return NS_OK;
 263 }
 264
 265 nsresult mozInlineSpellWordUtil::EnsureWords() {
 266   if (mSoftTextValid) return NS_OK;
 267   BuildSoftText();
 268
 269   mRealWords.Clear();
 270   Result<RealWords, nsresult> realWords = BuildRealWords();
 271   if (realWords.isErr()) {
 272     return realWords.unwrapErr();
 273   }
 274
 275   mRealWords = realWords.unwrap();
 276   mSoftTextValid = true;
 277   return NS_OK;
 278 }
 279
 280 nsresult mozInlineSpellWordUtil::MakeRangeForWord(const RealWord& aWord,
 281                                                   nsRange** aRange) const {
 282   NodeOffset begin =
 283       MapSoftTextOffsetToDOMPosition(aWord.mSoftTextOffset, HINT_BEGIN);
 284   NodeOffset end = MapSoftTextOffsetToDOMPosition(aWord.EndOffset(), HINT_END);
 285   return MakeRange(begin, end, aRange);
 286 }
 287 void mozInlineSpellWordUtil::MakeNodeOffsetRangeForWord(
 288     const RealWord& aWord, NodeOffsetRange* aNodeOffsetRange) {
 289   NodeOffset begin =
 290       MapSoftTextOffsetToDOMPosition(aWord.mSoftTextOffset, HINT_BEGIN);
 291   NodeOffset end = MapSoftTextOffsetToDOMPosition(aWord.EndOffset(), HINT_END);
 292   *aNodeOffsetRange = NodeOffsetRange(begin, end);
 293 }
 294
 295 // mozInlineSpellWordUtil::GetRangeForWord
 296
 297 nsresult mozInlineSpellWordUtil::GetRangeForWord(nsINode* aWordNode,
 298                                                  int32_t aWordOffset,
 299                                                  nsRange** aRange) {
 300   // Set our soft end and start
 301   NodeOffset pt(aWordNode, aWordOffset);
 302
 303   if (!mSoftTextValid || pt != mSoftBegin || pt != mSoftEnd) {
 304     InvalidateWords();
 305     mSoftBegin = mSoftEnd = pt;
 306     nsresult rv = EnsureWords();
 307     if (NS_FAILED(rv)) {
 308       return rv;
 309     }
 310   }
 311
 312   int32_t offset = MapDOMPositionToSoftTextOffset(pt);
 313   if (offset < 0) return MakeRange(pt, pt, aRange);
 314   int32_t wordIndex = FindRealWordContaining(offset, HINT_BEGIN, false);
 315   if (wordIndex < 0) return MakeRange(pt, pt, aRange);
 316   return MakeRangeForWord(mRealWords[wordIndex], aRange);
 317 }
 318
 319 // This is to fix characters that the spellchecker may not like
 320 static void NormalizeWord(const nsAString& aInput, int32_t aPos, int32_t aLen,
 321                           nsAString& aOutput) {
 322   aOutput.Truncate();
 323   for (int32_t i = 0; i < aLen; i++) {
 324     char16_t ch = aInput.CharAt(i + aPos);
 325
 326     // remove ignorable characters from the word
 327     if (IsIgnorableCharacter(ch)) continue;
 328
 329     // the spellchecker doesn't handle curly apostrophes in all languages
 330     if (ch == 0x2019) {  // RIGHT SINGLE QUOTATION MARK
 331       ch = '\'';
 332     }
 333
 334     aOutput.Append(ch);
 335   }
 336 }
 337
 338 // mozInlineSpellWordUtil::GetNextWord
 339 //
 340 //    FIXME-optimization: we shouldn't have to generate a range every single
 341 //    time. It would be better if the inline spellchecker didn't require a
 342 //    range unless the word was misspelled. This may or may not be possible.
 343
 344 bool mozInlineSpellWordUtil::GetNextWord(nsAString& aText,
 345                                          NodeOffsetRange* aNodeOffsetRange,
 346                                          bool* aSkipChecking) {
 347   MOZ_LOG(sInlineSpellWordUtilLog, LogLevel::Debug,
 348           ("%s: mNextWordIndex=%d", __FUNCTION__, mNextWordIndex));
 349
 350   if (mNextWordIndex < 0 || mNextWordIndex >= int32_t(mRealWords.Length())) {
 351     mNextWordIndex = -1;
 352     *aSkipChecking = true;
 353     return false;
 354   }
 355
 356   const RealWord& word = mRealWords[mNextWordIndex];
 357   MakeNodeOffsetRangeForWord(word, aNodeOffsetRange);
 358   ++mNextWordIndex;
 359   *aSkipChecking = !word.mCheckableWord;
 360   ::NormalizeWord(mSoftText, word.mSoftTextOffset, word.mLength, aText);
 361
 362   MOZ_LOG(sInlineSpellWordUtilLog, LogLevel::Debug,
 363           ("%s: returning: %s (skip=%d)", __FUNCTION__,
 364            NS_ConvertUTF16toUTF8(aText).get(), *aSkipChecking));
 365
 366   return true;
 367 }
 368
 369 // mozInlineSpellWordUtil::MakeRange
 370 //
 371 //    Convenience function for creating a range over the current document.
 372
 373 nsresult mozInlineSpellWordUtil::MakeRange(NodeOffset aBegin, NodeOffset aEnd,
 374                                            nsRange** aRange) const {
 375   NS_ENSURE_ARG_POINTER(aBegin.mNode);
 376   if (!mDocument) {
 377     return NS_ERROR_NOT_INITIALIZED;
 378   }
 379
 380   ErrorResult error;
 381   RefPtr<nsRange> range = nsRange::Create(aBegin.mNode, aBegin.mOffset,
 382                                           aEnd.mNode, aEnd.mOffset, error);
 383   if (NS_WARN_IF(error.Failed())) {
 384     return error.StealNSResult();
 385   }
 386   MOZ_ASSERT(range);
 387   range.forget(aRange);
 388   return NS_OK;
 389 }
 390
 391 // static
 392 already_AddRefed<nsRange> mozInlineSpellWordUtil::MakeRange(
 393     const NodeOffsetRange& aRange) {
 394   IgnoredErrorResult ignoredError;
 395   RefPtr<nsRange> range =
 396       nsRange::Create(aRange.Begin().Node(), aRange.Begin().Offset(),
 397                       aRange.End().Node(), aRange.End().Offset(), ignoredError);
 398   NS_WARNING_ASSERTION(!ignoredError.Failed(), "Creating a range failed");
 399   return range.forget();
 400 }
 401
 402 /*********** Word Splitting ************/
 403
 404 // classifies a given character in the DOM word
 405 enum CharClass {
 406   CHAR_CLASS_WORD,
 407   CHAR_CLASS_SEPARATOR,
 408   CHAR_CLASS_END_OF_INPUT
 409 };
 410
 411 // Encapsulates DOM-word to real-word splitting
 412 template <class T>
 413 struct MOZ_STACK_CLASS WordSplitState {
 414   const T& mDOMWordText;
 415   int32_t mDOMWordOffset;
 416   CharClass mCurCharClass;
 417
 418   explicit WordSplitState(const T& aString)
 419       : mDOMWordText(aString),
 420         mDOMWordOffset(0),
 421         mCurCharClass(CHAR_CLASS_END_OF_INPUT) {}
 422
 423   CharClass ClassifyCharacter(int32_t aIndex, bool aRecurse) const;
 424   void Advance();
 425   void AdvanceThroughSeparators();
 426   void AdvanceThroughWord();
 427
 428   // Finds special words like email addresses and URLs that may start at the
 429   // current position, and returns their length, or 0 if not found. This allows
 430   // arbitrary word breaking rules to be used for these special entities, as
 431   // long as they can not contain whitespace.
 432   bool IsSpecialWord() const;
 433
 434   // Similar to IsSpecialWord except that this takes a split word as
 435   // input. This checks for things that do not require special word-breaking
 436   // rules.
 437   bool ShouldSkipWord(int32_t aStart, int32_t aLength) const;
 438
 439   // Checks to see if there's a DOM word separator before aBeforeOffset within
 440   // it. This function does not modify aSeparatorOffset when it returns false.
 441   bool GetDOMWordSeparatorOffset(int32_t aOffset,
 442                                  int32_t* aSeparatorOffset) const;
 443
 444   char16_t GetUnicharAt(int32_t aIndex) const;
 445 };
 446
 447 // WordSplitState::ClassifyCharacter
 448 template <class T>
 449 CharClass WordSplitState<T>::ClassifyCharacter(int32_t aIndex,
 450                                                bool aRecurse) const {
 451   NS_ASSERTION(aIndex >= 0 && aIndex <= int32_t(mDOMWordText.Length()),
 452                "Index out of range");
 453   if (aIndex == int32_t(mDOMWordText.Length())) return CHAR_CLASS_SEPARATOR;
 454
 455   // this will classify the character, we want to treat "ignorable" characters
 456   // such as soft hyphens, and also ZWJ and ZWNJ as word characters.
 457   nsUGenCategory charCategory =
 458       mozilla::unicode::GetGenCategory(GetUnicharAt(aIndex));
 459   if (charCategory == nsUGenCategory::kLetter ||
 460       IsIgnorableCharacter(mDOMWordText[aIndex]) ||
 461       mDOMWordText[aIndex] == 0x200C /* ZWNJ */ ||
 462       mDOMWordText[aIndex] == 0x200D /* ZWJ */)
 463     return CHAR_CLASS_WORD;
 464
 465   // If conditional punctuation is surrounded immediately on both sides by word
 466   // characters it also counts as a word character.
 467   if (IsConditionalPunctuation(mDOMWordText[aIndex])) {
 468     if (!aRecurse) {
 469       // not allowed to look around, this punctuation counts like a separator
 470       return CHAR_CLASS_SEPARATOR;
 471     }
 472
 473     // check the left-hand character
 474     if (aIndex == 0) return CHAR_CLASS_SEPARATOR;
 475     if (ClassifyCharacter(aIndex - 1, false) != CHAR_CLASS_WORD)
 476       return CHAR_CLASS_SEPARATOR;
 477     // If the previous charatcer is a word-char, make sure that it's not a
 478     // special dot character.
 479     if (mDOMWordText[aIndex - 1] == '.') return CHAR_CLASS_SEPARATOR;
 480
 481     // now we know left char is a word-char, check the right-hand character
 482     if (aIndex == int32_t(mDOMWordText.Length() - 1)) {
 483       return CHAR_CLASS_SEPARATOR;
 484     }
 485
 486     if (ClassifyCharacter(aIndex + 1, false) != CHAR_CLASS_WORD)
 487       return CHAR_CLASS_SEPARATOR;
 488     // If the next charatcer is a word-char, make sure that it's not a
 489     // special dot character.
 490     if (mDOMWordText[aIndex + 1] == '.') return CHAR_CLASS_SEPARATOR;
 491
 492     // char on either side is a word, this counts as a word
 493     return CHAR_CLASS_WORD;
 494   }
 495
 496   // The dot character, if appearing at the end of a word, should
 497   // be considered part of that word.  Example: "etc.", or
 498   // abbreviations
 499   if (aIndex > 0 && mDOMWordText[aIndex] == '.' &&
 500       mDOMWordText[aIndex - 1] != '.' &&
 501       ClassifyCharacter(aIndex - 1, false) != CHAR_CLASS_WORD) {
 502     return CHAR_CLASS_WORD;
 503   }
 504
 505   // all other punctuation
 506   if (charCategory == nsUGenCategory::kSeparator ||
 507       charCategory == nsUGenCategory::kOther ||
 508       charCategory == nsUGenCategory::kPunctuation ||
 509       charCategory == nsUGenCategory::kSymbol) {
 510     // Don't break on hyphens, as hunspell handles them on its own.
 511     if (aIndex > 0 && mDOMWordText[aIndex] == '-' &&
 512         mDOMWordText[aIndex - 1] != '-' &&
 513         ClassifyCharacter(aIndex - 1, false) == CHAR_CLASS_WORD) {
 514       // A hyphen is only meaningful as a separator inside a word
 515       // if the previous and next characters are a word character.
 516       if (aIndex == int32_t(mDOMWordText.Length()) - 1)
 517         return CHAR_CLASS_SEPARATOR;
 518       if (mDOMWordText[aIndex + 1] != '.' &&
 519           ClassifyCharacter(aIndex + 1, false) == CHAR_CLASS_WORD)
 520         return CHAR_CLASS_WORD;
 521     }
 522     return CHAR_CLASS_SEPARATOR;
 523   }
 524
 525   // any other character counts as a word
 526   return CHAR_CLASS_WORD;
 527 }
 528
 529 // WordSplitState::Advance
 530 template <class T>
 531 void WordSplitState<T>::Advance() {
 532   NS_ASSERTION(mDOMWordOffset >= 0, "Negative word index");
 533   NS_ASSERTION(mDOMWordOffset < (int32_t)mDOMWordText.Length(),
 534                "Length beyond end");
 535
 536   mDOMWordOffset++;
 537   if (mDOMWordOffset >= (int32_t)mDOMWordText.Length())
 538     mCurCharClass = CHAR_CLASS_END_OF_INPUT;
 539   else
 540     mCurCharClass = ClassifyCharacter(mDOMWordOffset, true);
 541 }
 542
 543 // WordSplitState::AdvanceThroughSeparators
 544 template <class T>
 545 void WordSplitState<T>::AdvanceThroughSeparators() {
 546   while (mCurCharClass == CHAR_CLASS_SEPARATOR) Advance();
 547 }
 548
 549 // WordSplitState::AdvanceThroughWord
 550 template <class T>
 551 void WordSplitState<T>::AdvanceThroughWord() {
 552   while (mCurCharClass == CHAR_CLASS_WORD) Advance();
 553 }
 554
 555 // WordSplitState::IsSpecialWord
 556 template <class T>
 557 bool WordSplitState<T>::IsSpecialWord() const {
 558   // Search for email addresses. We simply define these as any sequence of
 559   // characters with an '@' character in the middle. The DOM word is already
 560   // split on whitepace, so we know that everything to the end is the address
 561   int32_t firstColon = -1;
 562   for (int32_t i = mDOMWordOffset; i < int32_t(mDOMWordText.Length()); i++) {
 563     if (mDOMWordText[i] == '@') {
 564       // only accept this if there are unambiguous word characters (don't bother
 565       // recursing to disambiguate apostrophes) on each side. This prevents
 566       // classifying, e.g. "@home" as an email address
 567
 568       // Use this condition to only accept words with '@' in the middle of
 569       // them. It works, but the inlinespellcker doesn't like this. The problem
 570       // is that you type "fhsgfh@" that's a misspelled word followed by a
 571       // symbol, but when you type another letter "fhsgfh@g" that first word
 572       // need to be unmarked misspelled. It doesn't do this. it only checks the
 573       // current position for potentially removing a spelling range.
 574       if (i > 0 && ClassifyCharacter(i - 1, false) == CHAR_CLASS_WORD &&
 575           i < (int32_t)mDOMWordText.Length() - 1 &&
 576           ClassifyCharacter(i + 1, false) == CHAR_CLASS_WORD) {
 577         return true;
 578       }
 579     } else if (mDOMWordText[i] == ':' && firstColon < 0) {
 580       firstColon = i;
 581
 582       // If the first colon is followed by a slash, consider it a URL
 583       // This will catch things like asdf://foo.com
 584       if (firstColon < (int32_t)mDOMWordText.Length() - 1 &&
 585           mDOMWordText[firstColon + 1] == '/') {
 586         return true;
 587       }
 588     }
 589   }
 590
 591   // Check the text before the first colon against some known protocols. It
 592   // is impossible to check against all protocols, especially since you can
 593   // plug in new protocols. We also don't want to waste time here checking
 594   // against a lot of obscure protocols.
 595   if (firstColon > mDOMWordOffset) {
 596     nsString protocol(
 597         Substring(mDOMWordText, mDOMWordOffset, firstColon - mDOMWordOffset));
 598     if (protocol.EqualsIgnoreCase("http") ||
 599         protocol.EqualsIgnoreCase("https") ||
 600         protocol.EqualsIgnoreCase("news") ||
 601         protocol.EqualsIgnoreCase("file") ||
 602         protocol.EqualsIgnoreCase("javascript") ||
 603         protocol.EqualsIgnoreCase("data") || protocol.EqualsIgnoreCase("ftp")) {
 604       return true;
 605     }
 606   }
 607
 608   // not anything special
 609   return false;
 610 }
 611
 612 // WordSplitState::ShouldSkipWord
 613 template <class T>
 614 bool WordSplitState<T>::ShouldSkipWord(int32_t aStart, int32_t aLength) const {
 615   int32_t last = aStart + aLength;
 616
 617   // check to see if the word contains a digit
 618   for (int32_t i = aStart; i < last; i++) {
 619     if (mozilla::unicode::GetGenCategory(GetUnicharAt(i)) ==
 620         nsUGenCategory::kNumber) {
 621       return true;
 622     }
 623   }
 624
 625   // not special
 626   return false;
 627 }
 628
 629 template <class T>
 630 bool WordSplitState<T>::GetDOMWordSeparatorOffset(
 631     int32_t aOffset, int32_t* aSeparatorOffset) const {
 632   for (int32_t i = aOffset - 1; i >= 0; --i) {
 633     if (IsDOMWordSeparator(mDOMWordText[i]) ||
 634         (!IsAmbiguousDOMWordSeprator(mDOMWordText[i]) &&
 635          ClassifyCharacter(i, true) == CHAR_CLASS_SEPARATOR)) {
 636       // Be greedy, find as many separators as we can
 637       for (int32_t j = i - 1; j >= 0; --j) {
 638         if (IsDOMWordSeparator(mDOMWordText[j]) ||
 639             (!IsAmbiguousDOMWordSeprator(mDOMWordText[j]) &&
 640              ClassifyCharacter(j, true) == CHAR_CLASS_SEPARATOR)) {
 641           i = j;
 642         } else {
 643           break;
 644         }
 645       }
 646       *aSeparatorOffset = i;
 647       return true;
 648     }
 649   }
 650   return false;
 651 }
 652
 653 template <>
 654 char16_t WordSplitState<nsDependentSubstring>::GetUnicharAt(
 655     int32_t aIndex) const {
 656   return mDOMWordText[aIndex];
 657 }
 658
 659 template <>
 660 char16_t WordSplitState<nsDependentCSubstring>::GetUnicharAt(
 661     int32_t aIndex) const {
 662   return static_cast<char16_t>(static_cast<uint8_t>(mDOMWordText[aIndex]));
 663 }
 664
 665 static inline bool IsBRElement(nsINode* aNode) {
 666   return aNode->IsHTMLElement(nsGkAtoms::br);
 667 }
 668
 669 /**
 670  * Given a TextNode, checks to see if there's a DOM word separator before
 671  * aBeforeOffset within it. This function does not modify aSeparatorOffset when
 672  * it returns false.
 673  *
 674  * @param aContent the TextNode to check.
 675  * @param aBeforeOffset the offset in the TextNode before which we will search
 676  *        for the DOM separator. You can pass INT32_MAX to search the entire
 677  *        length of the string.
 678  * @param aSeparatorOffset will be set to the offset of the first separator it
 679  *        encounters. Will not be written to if no separator is found.
 680  * @returns True if it found a separator.
 681  */
 682 static bool TextNodeContainsDOMWordSeparator(nsIContent* aContent,
 683                                              int32_t aBeforeOffset,
 684                                              int32_t* aSeparatorOffset) {
 685   const nsTextFragment* textFragment = aContent->GetText();
 686   NS_ASSERTION(textFragment, "Where is our text?");
 687   int32_t end = std::min(aBeforeOffset, int32_t(textFragment->GetLength()));
 688
 689   if (textFragment->Is2b()) {
 690     nsDependentSubstring targetText(textFragment->Get2b(), end);
 691     WordSplitState<nsDependentSubstring> state(targetText);
 692     return state.GetDOMWordSeparatorOffset(end, aSeparatorOffset);
 693   }
 694
 695   nsDependentCSubstring targetText(textFragment->Get1b(), end);
 696   WordSplitState<nsDependentCSubstring> state(targetText);
 697   return state.GetDOMWordSeparatorOffset(end, aSeparatorOffset);
 698 }
 699
 700 /**
 701  * Check if there's a DOM word separator before aBeforeOffset in this node.
 702  * Always returns true if it's a BR element.
 703  * aSeparatorOffset is set to the index of the first character in the last
 704  * separator if any is found (0 for BR elements).
 705  *
 706  * This function does not modify aSeparatorOffset when it returns false.
 707  */
 708 static bool ContainsDOMWordSeparator(nsINode* aNode, int32_t aBeforeOffset,
 709                                      int32_t* aSeparatorOffset) {
 710   if (IsBRElement(aNode)) {
 711     *aSeparatorOffset = 0;
 712     return true;
 713   }
 714
 715   if (!IsSpellCheckingTextNode(aNode)) return false;
 716
 717   return TextNodeContainsDOMWordSeparator(aNode->AsContent(), aBeforeOffset,
 718                                           aSeparatorOffset);
 719 }
 720
 721 static bool IsBreakElement(nsINode* aNode) {
 722   if (!aNode->IsElement()) {
 723     return false;
 724   }
 725
 726   dom::Element* element = aNode->AsElement();
 727   if (element->IsHTMLElement(nsGkAtoms::br)) {
 728     return true;
 729   }
 730
 731   // If we don't have a frame, we don't consider ourselves a break
 732   // element.  In particular, words can span us.
 733   nsIFrame* frame = element->GetPrimaryFrame();
 734   if (!frame) {
 735     return false;
 736   }
 737
 738   auto* disp = frame->StyleDisplay();
 739   // Anything that's not an inline element is a break element.
 740   // XXXbz should replaced inlines be break elements, though?
 741   // Also should inline-block and such be break elements?
 742   //
 743   // FIXME(emilio): We should teach the spell checker to deal with generated
 744   // content (it doesn't at all), then remove the IsListItem() check, as there
 745   // could be no marker, etc...
 746   return !disp->IsInlineFlow() || disp->IsListItem();
 747 }
 748
 749 struct CheckLeavingBreakElementClosure {
 750   bool mLeftBreakElement;
 751 };
 752
 753 static void CheckLeavingBreakElement(nsINode* aNode, void* aClosure) {
 754   CheckLeavingBreakElementClosure* cl =
 755       static_cast<CheckLeavingBreakElementClosure*>(aClosure);
 756   if (!cl->mLeftBreakElement && IsBreakElement(aNode)) {
 757     cl->mLeftBreakElement = true;
 758   }
 759 }
 760
 761 void mozInlineSpellWordUtil::NormalizeWord(nsAString& aWord) {
 762   nsAutoString result;
 763   ::NormalizeWord(aWord, 0, aWord.Length(), result);
 764   aWord = result;
 765 }
 766
 767 void mozInlineSpellWordUtil::BuildSoftText() {
 768   MOZ_LOG(sInlineSpellWordUtilLog, LogLevel::Debug, ("%s", __FUNCTION__));
 769
 770   // First we have to work backwards from mSoftStart to find a text node
 771   // containing a DOM word separator, a non-inline-element
 772   // boundary, or the hard start node. That's where we'll start building the
 773   // soft string from.
 774   nsINode* node = mSoftBegin.mNode;
 775   int32_t firstOffsetInNode = 0;
 776   int32_t checkBeforeOffset = mSoftBegin.mOffset;
 777   while (node) {
 778     if (ContainsDOMWordSeparator(node, checkBeforeOffset, &firstOffsetInNode)) {
 779       if (node == mSoftBegin.mNode) {
 780         // If we find a word separator on the first node, look at the preceding
 781         // word on the text node as well.
 782         int32_t newOffset = 0;
 783         if (firstOffsetInNode > 0) {
 784           // Try to find the previous word boundary in the current node. If
 785           // we can't find one, start checking previous sibling nodes (if any
 786           // adjacent ones exist) to see if we can find any text nodes with
 787           // DOM word separators. We bail out as soon as we see a node that is
 788           // not a text node, or we run out of previous sibling nodes. In the
 789           // event that we simply cannot find any preceding word separator, the
 790           // offset is set to 0, and the soft text beginning node is set to the
 791           // "most previous" text node before the original starting node, or
 792           // kept at the original starting node if no previous text nodes exist.
 793           if (!ContainsDOMWordSeparator(node, firstOffsetInNode - 1,
 794                                         &newOffset)) {
 795             nsIContent* prevNode = node->GetPreviousSibling();
 796             while (prevNode && IsSpellCheckingTextNode(prevNode)) {
 797               mSoftBegin.mNode = prevNode;
 798               if (TextNodeContainsDOMWordSeparator(prevNode, INT32_MAX,
 799                                                    &newOffset)) {
 800                 break;
 801               }
 802               prevNode = prevNode->GetPreviousSibling();
 803             }
 804           }
 805         }
 806         firstOffsetInNode = newOffset;
 807         mSoftBegin.mOffset = newOffset;
 808       }
 809       break;
 810     }
 811     checkBeforeOffset = INT32_MAX;
 812     if (IsBreakElement(node)) {
 813       // Since GetPreviousContent follows tree *preorder*, we're about to
 814       // traverse up out of 'node'. Since node induces breaks (e.g., it's a
 815       // block), don't bother trying to look outside it, just stop now.
 816       break;
 817     }
 818     // GetPreviousContent below expects mRootNode to be an ancestor of node.
 819     if (!node->IsInclusiveDescendantOf(mRootNode)) {
 820       break;
 821     }
 822     node = node->GetPreviousContent(mRootNode);
 823   }
 824
 825   // Now build up the string moving forward through the DOM until we reach
 826   // the soft end and *then* see a DOM word separator, a non-inline-element
 827   // boundary, or the hard end node.
 828   mSoftText.Truncate();
 829   mSoftTextDOMMapping.Clear();
 830   bool seenSoftEnd = false;
 831   // Leave this outside the loop so large heap string allocations can be reused
 832   // across iterations
 833   while (node) {
 834     if (node == mSoftEnd.mNode) {
 835       seenSoftEnd = true;
 836     }
 837
 838     bool exit = false;
 839     if (IsSpellCheckingTextNode(node)) {
 840       nsIContent* content = static_cast<nsIContent*>(node);
 841       NS_ASSERTION(content, "Where is our content?");
 842       const nsTextFragment* textFragment = content->GetText();
 843       NS_ASSERTION(textFragment, "Where is our text?");
 844       int32_t lastOffsetInNode = textFragment->GetLength();
 845
 846       if (seenSoftEnd) {
 847         // check whether we can stop after this
 848         for (int32_t i = node == mSoftEnd.mNode ? mSoftEnd.mOffset : 0;
 849              i < int32_t(textFragment->GetLength()); ++i) {
 850           if (IsDOMWordSeparator(textFragment->CharAt(i))) {
 851             exit = true;
 852             // stop at the first separator after the soft end point
 853             lastOffsetInNode = i;
 854             break;
 855           }
 856         }
 857       }
 858
 859       if (firstOffsetInNode < lastOffsetInNode) {
 860         int32_t len = lastOffsetInNode - firstOffsetInNode;
 861         mSoftTextDOMMapping.AppendElement(DOMTextMapping(
 862             NodeOffset(node, firstOffsetInNode), mSoftText.Length(), len));
 863
 864         bool ok = textFragment->AppendTo(mSoftText, firstOffsetInNode, len,
 865                                          mozilla::fallible);
 866         if (!ok) {
 867           // probably out of memory, remove from mSoftTextDOMMapping
 868           mSoftTextDOMMapping.RemoveLastElement();
 869           exit = true;
 870         }
 871       }
 872
 873       firstOffsetInNode = 0;
 874     }
 875
 876     if (exit) break;
 877
 878     CheckLeavingBreakElementClosure closure = {false};
 879     node = FindNextNode(node, mRootNode, CheckLeavingBreakElement, &closure);
 880     if (closure.mLeftBreakElement || (node && IsBreakElement(node))) {
 881       // We left, or are entering, a break element (e.g., block). Maybe we can
 882       // stop now.
 883       if (seenSoftEnd) break;
 884       // Record the break
 885       mSoftText.Append(' ');
 886     }
 887   }
 888
 889   MOZ_LOG(sInlineSpellWordUtilLog, LogLevel::Debug,
 890           ("%s: got DOM string: %s", __FUNCTION__,
 891            NS_ConvertUTF16toUTF8(mSoftText).get()));
 892 }
 893
 894 auto mozInlineSpellWordUtil::BuildRealWords() const
 895     -> Result<RealWords, nsresult> {
 896   // This is pretty simple. We just have to walk mSoftText, tokenizing it
 897   // into "real words".
 898   // We do an outer traversal of words delimited by IsDOMWordSeparator, calling
 899   // SplitDOMWordAndAppendTo on each of those DOM words
 900   int32_t wordStart = -1;
 901   RealWords realWords;
 902   for (int32_t i = 0; i < int32_t(mSoftText.Length()); ++i) {
 903     if (IsDOMWordSeparator(mSoftText.CharAt(i))) {
 904       if (wordStart >= 0) {
 905         nsresult rv = SplitDOMWordAndAppendTo(wordStart, i, realWords);
 906         if (NS_FAILED(rv)) {
 907           return Err(rv);
 908         }
 909         wordStart = -1;
 910       }
 911     } else {
 912       if (wordStart < 0) {
 913         wordStart = i;
 914       }
 915     }
 916   }
 917   if (wordStart >= 0) {
 918     nsresult rv =
 919         SplitDOMWordAndAppendTo(wordStart, mSoftText.Length(), realWords);
 920     if (NS_FAILED(rv)) {
 921       return Err(rv);
 922     }
 923   }
 924
 925   return realWords;
 926 }
 927
 928 /*********** DOM/realwords<->mSoftText mapping functions ************/
 929
 930 int32_t mozInlineSpellWordUtil::MapDOMPositionToSoftTextOffset(
 931     NodeOffset aNodeOffset) const {
 932   if (!mSoftTextValid) {
 933     NS_ERROR("Soft text must be valid if we're to map into it");
 934     return -1;
 935   }
 936
 937   for (int32_t i = 0; i < int32_t(mSoftTextDOMMapping.Length()); ++i) {
 938     const DOMTextMapping& map = mSoftTextDOMMapping[i];
 939     if (map.mNodeOffset.mNode == aNodeOffset.mNode) {
 940       // Allow offsets at either end of the string, in particular, allow the
 941       // offset that's at the end of the contributed string
 942       int32_t offsetInContributedString =
 943           aNodeOffset.mOffset - map.mNodeOffset.mOffset;
 944       if (offsetInContributedString >= 0 &&
 945           offsetInContributedString <= map.mLength)
 946         return map.mSoftTextOffset + offsetInContributedString;
 947       return -1;
 948     }
 949   }
 950   return -1;
 951 }
 952
 953 namespace {
 954
 955 template <class T>
 956 class FirstLargerOffset {
 957   int32_t mSoftTextOffset;
 958
 959  public:
 960   explicit FirstLargerOffset(int32_t aSoftTextOffset)
 961       : mSoftTextOffset(aSoftTextOffset) {}
 962   int operator()(const T& t) const {
 963     // We want the first larger offset, so never return 0 (which would
 964     // short-circuit evaluation before finding the last such offset).
 965     return mSoftTextOffset < t.mSoftTextOffset ? -1 : 1;
 966   }
 967 };
 968
 969 template <class T>
 970 bool FindLastNongreaterOffset(const nsTArray<T>& aContainer,
 971                               int32_t aSoftTextOffset, size_t* aIndex) {
 972   if (aContainer.Length() == 0) {
 973     return false;
 974   }
 975
 976   BinarySearchIf(aContainer, 0, aContainer.Length(),
 977                  FirstLargerOffset<T>(aSoftTextOffset), aIndex);
 978   if (*aIndex > 0) {
 979     // There was at least one mapping with offset <= aSoftTextOffset. Step back
 980     // to find the last element with |mSoftTextOffset <= aSoftTextOffset|.
 981     *aIndex -= 1;
 982   } else {
 983     // Every mapping had offset greater than aSoftTextOffset.
 984     MOZ_ASSERT(aContainer[*aIndex].mSoftTextOffset > aSoftTextOffset);
 985   }
 986   return true;
 987 }
 988
 989 }  // namespace
 990
 991 NodeOffset mozInlineSpellWordUtil::MapSoftTextOffsetToDOMPosition(
 992     int32_t aSoftTextOffset, DOMMapHint aHint) const {
 993   NS_ASSERTION(mSoftTextValid,
 994                "Soft text must be valid if we're to map out of it");
 995   if (!mSoftTextValid) return NodeOffset(nullptr, -1);
 996
 997   // Find the last mapping, if any, such that mSoftTextOffset <= aSoftTextOffset
 998   size_t index;
 999   bool found =
1000       FindLastNongreaterOffset(mSoftTextDOMMapping, aSoftTextOffset, &index);
1001   if (!found) {
1002     return NodeOffset(nullptr, -1);
1003   }
1004
1005   // 'index' is now the last mapping, if any, such that
1006   // mSoftTextOffset <= aSoftTextOffset.
1007   // If we're doing HINT_END, then we may want to return the end of the
1008   // the previous mapping instead of the start of this mapping
1009   if (aHint == HINT_END && index > 0) {
1010     const DOMTextMapping& map = mSoftTextDOMMapping[index - 1];
1011     if (map.mSoftTextOffset + map.mLength == aSoftTextOffset)
1012       return NodeOffset(map.mNodeOffset.mNode,
1013                         map.mNodeOffset.mOffset + map.mLength);
1014   }
1015
1016   // We allow ourselves to return the end of this mapping even if we're
1017   // doing HINT_START. This will only happen if there is no mapping which this
1018   // point is the start of. I'm not 100% sure this is OK...
1019   const DOMTextMapping& map = mSoftTextDOMMapping[index];
1020   int32_t offset = aSoftTextOffset - map.mSoftTextOffset;
1021   if (offset >= 0 && offset <= map.mLength)
1022     return NodeOffset(map.mNodeOffset.mNode, map.mNodeOffset.mOffset + offset);
1023
1024   return NodeOffset(nullptr, -1);
1025 }
1026
1027 // static
1028 void mozInlineSpellWordUtil::ToString(const DOMMapHint aHint,
1029                                       nsACString& aResult) {
1030   switch (aHint) {
1031     case HINT_BEGIN:
1032       aResult.AssignLiteral("begin");
1033       break;
1034     case HINT_END:
1035       aResult.AssignLiteral("end");
1036       break;
1037   }
1038 }
1039
1040 int32_t mozInlineSpellWordUtil::FindRealWordContaining(
1041     int32_t aSoftTextOffset, DOMMapHint aHint, bool aSearchForward) const {
1042   if (MOZ_LOG_TEST(sInlineSpellWordUtilLog, LogLevel::Debug)) {
1043     nsAutoCString hint;
1044     mozInlineSpellWordUtil::ToString(aHint, hint);
1045
1046     MOZ_LOG(
1047         sInlineSpellWordUtilLog, LogLevel::Debug,
1048         ("%s: offset=%i, hint=%s, searchForward=%i.", __FUNCTION__,
1049          aSoftTextOffset, hint.get(), static_cast<int32_t>(aSearchForward)));
1050   }
1051
1052   NS_ASSERTION(mSoftTextValid,
1053                "Soft text must be valid if we're to map out of it");
1054   if (!mSoftTextValid) return -1;
1055
1056   // Find the last word, if any, such that mSoftTextOffset <= aSoftTextOffset
1057   size_t index;
1058   bool found = FindLastNongreaterOffset(mRealWords, aSoftTextOffset, &index);
1059   if (!found) {
1060     return -1;
1061   }
1062
1063   // 'index' is now the last word, if any, such that
1064   // mSoftTextOffset <= aSoftTextOffset.
1065   // If we're doing HINT_END, then we may want to return the end of the
1066   // the previous word instead of the start of this word
1067   if (aHint == HINT_END && index > 0) {
1068     const RealWord& word = mRealWords[index - 1];
1069     if (word.mSoftTextOffset + word.mLength == aSoftTextOffset)
1070       return index - 1;
1071   }
1072
1073   // We allow ourselves to return the end of this word even if we're
1074   // doing HINT_START. This will only happen if there is no word which this
1075   // point is the start of. I'm not 100% sure this is OK...
1076   const RealWord& word = mRealWords[index];
1077   int32_t offset = aSoftTextOffset - word.mSoftTextOffset;
1078   if (offset >= 0 && offset <= static_cast<int32_t>(word.mLength)) return index;
1079
1080   if (aSearchForward) {
1081     if (mRealWords[0].mSoftTextOffset > aSoftTextOffset) {
1082       // All words have mSoftTextOffset > aSoftTextOffset
1083       return 0;
1084     }
1085     // 'index' is the last word such that mSoftTextOffset <= aSoftTextOffset.
1086     // Word index+1, if it exists, will be the first with
1087     // mSoftTextOffset > aSoftTextOffset.
1088     if (index + 1 < mRealWords.Length()) return index + 1;
1089   }
1090
1091   return -1;
1092 }
1093
1094 // mozInlineSpellWordUtil::SplitDOMWordAndAppendTo
1095
1096 nsresult mozInlineSpellWordUtil::SplitDOMWordAndAppendTo(
1097     int32_t aStart, int32_t aEnd, nsTArray<RealWord>& aRealWords) const {
1098   nsDependentSubstring targetText(mSoftText, aStart, aEnd - aStart);
1099   WordSplitState<nsDependentSubstring> state(targetText);
1100   state.mCurCharClass = state.ClassifyCharacter(0, true);
1101
1102   state.AdvanceThroughSeparators();
1103   if (state.mCurCharClass != CHAR_CLASS_END_OF_INPUT && state.IsSpecialWord()) {
1104     int32_t specialWordLength =
1105         state.mDOMWordText.Length() - state.mDOMWordOffset;
1106     if (!aRealWords.AppendElement(
1107             RealWord(aStart + state.mDOMWordOffset, specialWordLength, false),
1108             fallible)) {
1109       return NS_ERROR_OUT_OF_MEMORY;
1110     }
1111
1112     return NS_OK;
1113   }
1114
1115   while (state.mCurCharClass != CHAR_CLASS_END_OF_INPUT) {
1116     state.AdvanceThroughSeparators();
1117     if (state.mCurCharClass == CHAR_CLASS_END_OF_INPUT) break;
1118
1119     // save the beginning of the word
1120     int32_t wordOffset = state.mDOMWordOffset;
1121
1122     // find the end of the word
1123     state.AdvanceThroughWord();
1124     int32_t wordLen = state.mDOMWordOffset - wordOffset;
1125     if (!aRealWords.AppendElement(
1126             RealWord(aStart + wordOffset, wordLen,
1127                      !state.ShouldSkipWord(wordOffset, wordLen)),
1128             fallible)) {
1129       return NS_ERROR_OUT_OF_MEMORY;
1130     }
1131   }
1132
1133   return NS_OK;
1134 }