dom/base/FragmentDirective.cpp

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 #include "FragmentDirective.h"
   8 #include <cstdint>
   9 #include "RangeBoundary.h"
  10 #include "mozilla/Assertions.h"
  11 #include "Document.h"
  12 #include "mozilla/dom/FragmentDirectiveBinding.h"
  13 #include "mozilla/dom/FragmentOrElement.h"
  14 #include "mozilla/dom/NodeBinding.h"
  15 #include "mozilla/dom/Text.h"
  16 #include "mozilla/intl/WordBreaker.h"
  17 #include "nsComputedDOMStyle.h"
  18 #include "nsContentUtils.h"
  19 #include "nsDOMAttributeMap.h"
  20 #include "nsGkAtoms.h"
  21 #include "nsICSSDeclaration.h"
  22 #include "nsIFrame.h"
  23 #include "nsINode.h"
  24 #include "nsIURIMutator.h"
  25 #include "nsRange.h"
  26 #include "nsString.h"
  27
  28 namespace mozilla::dom {
  29 static LazyLogModule sFragmentDirectiveLog("FragmentDirective");
  30
  31 /** Converts a `TextDirective` into a percent-encoded string. */
  32 nsCString ToString(const TextDirective& aTextDirective) {
  33   nsCString str;
  34   create_text_directive(&aTextDirective, &str);
  35   return str;
  36 }
  37
  38 NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE(FragmentDirective, mDocument)
  39 NS_IMPL_CYCLE_COLLECTING_ADDREF(FragmentDirective)
  40 NS_IMPL_CYCLE_COLLECTING_RELEASE(FragmentDirective)
  41 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(FragmentDirective)
  42   NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
  43   NS_INTERFACE_MAP_ENTRY(nsISupports)
  44 NS_INTERFACE_MAP_END
  45
  46 FragmentDirective::FragmentDirective(Document* aDocument)
  47     : mDocument(aDocument) {}
  48
  49 JSObject* FragmentDirective::WrapObject(JSContext* aCx,
  50                                         JS::Handle<JSObject*> aGivenProto) {
  51   return FragmentDirective_Binding::Wrap(aCx, this, aGivenProto);
  52 }
  53
  54 void FragmentDirective::ParseAndRemoveFragmentDirectiveFromFragment(
  55     nsCOMPtr<nsIURI>& aURI, nsTArray<TextDirective>* aTextDirectives) {
  56   if (!aURI || !StaticPrefs::dom_text_fragments_enabled()) {
  57     return;
  58   }
  59   bool hasRef = false;
  60   aURI->GetHasRef(&hasRef);
  61   if (!hasRef) {
  62     return;
  63   }
  64
  65   nsAutoCString hash;
  66   aURI->GetRef(hash);
  67
  68   ParsedFragmentDirectiveResult fragmentDirective;
  69   const bool hasRemovedFragmentDirective =
  70       parse_fragment_directive(&hash, &fragmentDirective);
  71   if (!hasRemovedFragmentDirective) {
  72     return;
  73   }
  74   Unused << NS_MutateURI(aURI)
  75                 .SetRef(fragmentDirective.url_without_fragment_directive)
  76                 .Finalize(aURI);
  77   if (aTextDirectives) {
  78     aTextDirectives->SwapElements(fragmentDirective.text_directives);
  79   }
  80 }
  81
  82 nsTArray<RefPtr<nsRange>> FragmentDirective::FindTextFragmentsInDocument() {
  83   MOZ_ASSERT(mDocument);
  84   mDocument->FlushPendingNotifications(FlushType::Frames);
  85   nsTArray<RefPtr<nsRange>> textDirectiveRanges;
  86   for (const TextDirective& textDirective : mUninvokedTextDirectives) {
  87     if (RefPtr<nsRange> range = FindRangeForTextDirective(textDirective)) {
  88       textDirectiveRanges.AppendElement(range);
  89     }
  90   }
  91   mUninvokedTextDirectives.Clear();
  92   return textDirectiveRanges;
  93 }
  94
  95 /**
  96  * @brief Determine if `aNode` should be considered when traversing the DOM.
  97  *
  98  * A node is "search invisible" if it is an element in the HTML namespace and
  99  *  1. The computed value of its `display` property is `none`
 100  *  2. It serializes as void
 101  *  3. It is one of the following types:
 102  *    - HTMLIFrameElement
 103  *    - HTMLImageElement
 104  *    - HTMLMeterElement
 105  *    - HTMLObjectElement
 106  *    - HTMLProgressElement
 107  *    - HTMLStyleElement
 108  *    - HTMLScriptElement
 109  *    - HTMLVideoElement
 110  *    - HTMLAudioElement
 111  *  4. It is a `select` element whose `multiple` content attribute is absent
 112  *
 113  * see https://wicg.github.io/scroll-to-text-fragment/#search-invisible
 114  */
 115 bool NodeIsSearchInvisible(nsINode& aNode) {
 116   if (!aNode.IsElement()) {
 117     return false;
 118   }
 119   // 2. If the node serializes as void.
 120   nsAtom* nodeNameAtom = aNode.NodeInfo()->NameAtom();
 121   if (FragmentOrElement::IsHTMLVoid(nodeNameAtom)) {
 122     return true;
 123   }
 124   // 3. Is any of the following types: HTMLIFrameElement, HTMLImageElement,
 125   // HTMLMeterElement, HTMLObjectElement, HTMLProgressElement, HTMLStyleElement,
 126   // HTMLScriptElement, HTMLVideoElement, HTMLAudioElement
 127   if (aNode.IsAnyOfHTMLElements(
 128           nsGkAtoms::iframe, nsGkAtoms::image, nsGkAtoms::meter,
 129           nsGkAtoms::object, nsGkAtoms::progress, nsGkAtoms::style,
 130           nsGkAtoms::script, nsGkAtoms::video, nsGkAtoms::audio)) {
 131     return true;
 132   }
 133   // 4. Is a select element whose multiple content attribute is absent.
 134   if (aNode.IsHTMLElement(nsGkAtoms::select)) {
 135     return aNode.GetAttributes()->GetNamedItem(u"multiple"_ns) == nullptr;
 136   }
 137   // This is tested last because it's the most expensive check.
 138   // 1. The computed value of its 'display' property is 'none'.
 139   const Element* nodeAsElement = Element::FromNode(aNode);
 140   const RefPtr<const ComputedStyle> computedStyle =
 141       nsComputedDOMStyle::GetComputedStyleNoFlush(nodeAsElement);
 142   return !computedStyle ||
 143          computedStyle->StyleDisplay()->mDisplay == StyleDisplay::None;
 144 }
 145
 146 /**
 147  * @brief Returns true if `aNode` has block-level display.
 148  * A node has block-level display if it is an element and the computed value
 149  * of its display property is any of
 150  *  - block
 151  *  - table
 152  *  - flow-root
 153  *  - grid
 154  *  - flex
 155  *  - list-item
 156  *
 157  * See https://wicg.github.io/scroll-to-text-fragment/#has-block-level-display
 158  */
 159 bool NodeHasBlockLevelDisplay(nsINode& aNode) {
 160   if (!aNode.IsElement()) {
 161     return false;
 162   }
 163   const Element* nodeAsElement = Element::FromNode(aNode);
 164   const RefPtr<const ComputedStyle> computedStyle =
 165       nsComputedDOMStyle::GetComputedStyleNoFlush(nodeAsElement);
 166   if (!computedStyle) {
 167     return false;
 168   }
 169   const StyleDisplay& styleDisplay = computedStyle->StyleDisplay()->mDisplay;
 170   return styleDisplay == StyleDisplay::Block ||
 171          styleDisplay == StyleDisplay::Table ||
 172          styleDisplay == StyleDisplay::FlowRoot ||
 173          styleDisplay == StyleDisplay::Grid ||
 174          styleDisplay == StyleDisplay::Flex || styleDisplay.IsListItem();
 175 }
 176
 177 /**
 178  * @brief Get the Block Ancestor For `aNode`.
 179  *
 180  * see https://wicg.github.io/scroll-to-text-fragment/#nearest-block-ancestor
 181  */
 182 nsINode* GetBlockAncestorForNode(nsINode* aNode) {
 183   // 1. Let curNode be node.
 184   RefPtr<nsINode> curNode = aNode;
 185   // 2. While curNode is non-null
 186   while (curNode) {
 187     // 2.1. If curNode is not a Text node and it has block-level display then
 188     // return curNode.
 189     if (!curNode->IsText() && NodeHasBlockLevelDisplay(*curNode)) {
 190       return curNode;
 191     }
 192     // 2.2. Otherwise, set curNode to curNode’s parent.
 193     curNode = curNode->GetParentNode();
 194   }
 195   // 3.Return node’s node document's document element.
 196   return aNode->GetOwnerDocument();
 197 }
 198
 199 /**
 200  * @brief Returns true if `aNode` is part of a non-searchable subtree.
 201  *
 202  * A node is part of a non-searchable subtree if it is or has a shadow-including
 203  * ancestor that is search invisible.
 204  *
 205  * see https://wicg.github.io/scroll-to-text-fragment/#non-searchable-subtree
 206  */
 207 bool NodeIsPartOfNonSearchableSubTree(nsINode& aNode) {
 208   nsINode* node = &aNode;
 209   do {
 210     if (NodeIsSearchInvisible(*node)) {
 211       return true;
 212     }
 213   } while ((node = node->GetParentOrShadowHostNode()));
 214   return false;
 215 }
 216
 217 /**
 218  * @brief Return true if `aNode` is a visible Text node.
 219  *
 220  * A node is a visible text node if it is a Text node, the computed value of
 221  * its parent element's visibility property is visible, and it is being
 222  * rendered.
 223  *
 224  * see https://wicg.github.io/scroll-to-text-fragment/#visible-text-node
 225  */
 226 bool NodeIsVisibleTextNode(const nsINode& aNode) {
 227   const Text* text = Text::FromNode(aNode);
 228   if (!text) {
 229     return false;
 230   }
 231   const nsIFrame* frame = text->GetPrimaryFrame();
 232   return frame && frame->StyleVisibility()->IsVisible();
 233 }
 234
 235 enum class TextScanDirection { Left = -1, Right = 1 };
 236
 237 /**
 238  * @brief Tests if there is whitespace at the given position and direction.
 239  *
 240  * This algorithm tests for whitespaces and `&nbsp;` at `aPos`.
 241  * It returns the size of the whitespace found at the position, i.e. 5/6 for
 242  * `&nbsp/;` and 1 otherwise.
 243  *
 244  * This function follows a subsection of this section of the spec, but has been
 245  * adapted to be able to scan in both directions:
 246  * https://wicg.github.io/scroll-to-text-fragment/#next-non-whitespace-position
 247  */
 248 uint32_t IsWhitespaceAtPosition(nsString& aText, uint32_t aPos,
 249                                 TextScanDirection aDirection) {
 250   if (aText.Length() == 0) {
 251     return 0;
 252   }
 253   if (aDirection == TextScanDirection::Right) {
 254     if (aText.Length() > (aPos + 5)) {
 255       if (Substring(aText, aPos, 5).Equals(u"&nbsp")) {
 256         return aText.Length() > (aPos + 6) && aText.CharAt(aPos + 6) == u';'
 257                    ? 6
 258                    : 5;
 259       }
 260     }
 261   } else {
 262     if (aPos > 6 && Substring(aText, aPos - 6, 6).Equals(u"&nbsp;")) {
 263       return 6;
 264     }
 265     if (aPos > 5 && Substring(aText, aPos - 5, 5).Equals(u"&nbsp")) {
 266       return 5;
 267     }
 268   }
 269   return uint32_t(IsSpaceCharacter(aText.CharAt(aPos)));
 270 }
 271
 272 /** Advances the start of `aRange` to the next non-whitespace position.
 273  * The function follows this section of the spec:
 274  * https://wicg.github.io/scroll-to-text-fragment/#next-non-whitespace-position
 275  */
 276 void AdvanceStartToNextNonWhitespacePosition(nsRange& aRange) {
 277   // 1. While range is not collapsed:
 278   while (!aRange.Collapsed()) {
 279     // 1.1. Let node be range's start node.
 280     RefPtr<nsINode> node = aRange.GetStartContainer();
 281     MOZ_ASSERT(node);
 282     // 1.2. Let offset be range's start offset.
 283     const uint32_t offset = aRange.StartOffset();
 284     // 1.3. If node is part of a non-searchable subtree or if node is not a
 285     // visible text node or if offset is equal to node's length then:
 286     if (NodeIsPartOfNonSearchableSubTree(*node) ||
 287         !NodeIsVisibleTextNode(*node) || offset == node->Length()) {
 288       // 1.3.1. Set range's start node to the next node, in shadow-including
 289       // tree order.
 290       // 1.3.2. Set range's start offset to 0.
 291       if (NS_FAILED(aRange.SetStart(node->GetNextNode(), 0))) {
 292         return;
 293       }
 294       // 1.3.3. Continue.
 295       continue;
 296     }
 297     const Text* text = Text::FromNode(node);
 298     nsAutoString textData;
 299     text->GetData(textData);
 300     // These steps are moved to `IsWhitespaceAtPosition()`.
 301     // 1.4. If the substring data of node at offset offset and count 6 is equal
 302     // to the string "&nbsp;" then:
 303     // 1.4.1. Add 6 to range’s start offset.
 304     // 1.5. Otherwise, if the substring data of node at offset offset and count
 305     // 5 is equal to the string "&nbsp" then:
 306     // 1.5.1. Add 5 to range’s start offset.
 307     // 1.6. Otherwise:
 308     // 1.6.1 Let cp be the code point at the offset index in node’s data.
 309     // 1.6.2 If cp does not have the White_Space property set, return.
 310     // 1.6.3 Add 1 to range’s start offset.
 311     const uint32_t whitespace =
 312         IsWhitespaceAtPosition(textData, offset, TextScanDirection::Right);
 313     if (whitespace == 0) {
 314       return;
 315     }
 316
 317     aRange.SetStart(node, offset + whitespace);
 318   }
 319 }
 320
 321 /**
 322  * @brief Moves `aRangeBoundary` one word in `aDirection`.
 323  *
 324  * Word boundaries are determined using `intl::WordBreaker::FindWord()`.
 325  *
 326  *
 327  * @param aRangeBoundary[in] The range boundary that should be moved.
 328  *                           Must be set and valid.
 329  * @param aDirection[in]     The direction into which to move.
 330  * @return A new `RangeBoundary` which is moved to the next word.
 331  */
 332 RangeBoundary MoveRangeBoundaryOneWord(const RangeBoundary& aRangeBoundary,
 333                                        TextScanDirection aDirection) {
 334   MOZ_ASSERT(aRangeBoundary.IsSetAndValid());
 335   RefPtr<nsINode> curNode = aRangeBoundary.Container();
 336   uint32_t offset = *aRangeBoundary.Offset(
 337       RangeBoundary::OffsetFilter::kValidOrInvalidOffsets);
 338
 339   const int offsetIncrement = int(aDirection);
 340   // Get the text node of the start of the range and the offset.
 341   // This is the current position of the start of the range.
 342   nsAutoString text;
 343   if (NodeIsVisibleTextNode(*curNode)) {
 344     const Text* textNode = Text::FromNode(curNode);
 345     textNode->GetData(text);
 346
 347     // Assuming that the current position might not be at a word boundary,
 348     // advance to the word boundary at word begin/end.
 349     if (!IsWhitespaceAtPosition(text, offset, aDirection)) {
 350       const intl::WordRange wordRange =
 351           intl::WordBreaker::FindWord(text, offset);
 352       if (aDirection == TextScanDirection::Right &&
 353           offset != wordRange.mBegin) {
 354         offset = wordRange.mEnd;
 355       } else if (aDirection == TextScanDirection::Left &&
 356                  offset != wordRange.mEnd) {
 357         // The additional -1 is necessary to move to offset to *before* the
 358         // start of the word.
 359         offset = wordRange.mBegin - 1;
 360       }
 361     }
 362   }
 363   // Now, skip any whitespace, so that `offset` points to the word boundary of
 364   // the next word (which is the one this algorithm actually aims to move over).
 365   while (curNode) {
 366     if (!NodeIsVisibleTextNode(*curNode) || NodeIsSearchInvisible(*curNode) ||
 367         offset >= curNode->Length()) {
 368       curNode = aDirection == TextScanDirection::Left ? curNode->GetPrevNode()
 369                                                       : curNode->GetNextNode();
 370       if (!curNode) {
 371         break;
 372       }
 373       offset =
 374           aDirection == TextScanDirection::Left ? curNode->Length() - 1 : 0;
 375       if (const Text* textNode = Text::FromNode(curNode)) {
 376         textNode->GetData(text);
 377       }
 378       continue;
 379     }
 380     if (const uint32_t whitespace =
 381             IsWhitespaceAtPosition(text, offset, aDirection)) {
 382       offset += offsetIncrement * whitespace;
 383       continue;
 384     }
 385
 386     // At this point, the caret has been moved to the next non-whitespace
 387     // position.
 388     // find word boundaries at the current position
 389     const intl::WordRange wordRange = intl::WordBreaker::FindWord(text, offset);
 390     offset = aDirection == TextScanDirection::Left ? wordRange.mBegin
 391                                                    : wordRange.mEnd;
 392
 393     return {curNode, offset};
 394   }
 395   return {};
 396 }
 397
 398 RefPtr<nsRange> FragmentDirective::FindRangeForTextDirective(
 399     const TextDirective& aTextDirective) {
 400   MOZ_LOG(sFragmentDirectiveLog, LogLevel::Info,
 401           ("FragmentDirective::%s(): Find range for text directive '%s'.",
 402            __FUNCTION__, ToString(aTextDirective).Data()));
 403   // 1. Let searchRange be a range with start (document, 0) and end (document,
 404   // document’s length)
 405   ErrorResult rv;
 406   RefPtr<nsRange> searchRange =
 407       nsRange::Create(mDocument, 0, mDocument, mDocument->Length(), rv);
 408   if (rv.Failed()) {
 409     return nullptr;
 410   }
 411   // 2. While searchRange is not collapsed:
 412   while (!searchRange->Collapsed()) {
 413     // 2.1. Let potentialMatch be null.
 414     RefPtr<nsRange> potentialMatch;
 415     // 2.2. If parsedValues’s prefix is not null:
 416     if (!aTextDirective.prefix.IsEmpty()) {
 417       // 2.2.1. Let prefixMatch be the the result of running the find a string
 418       // in range steps with query parsedValues’s prefix, searchRange
 419       // searchRange, wordStartBounded true and wordEndBounded false.
 420       RefPtr<nsRange> prefixMatch =
 421           FindStringInRange(searchRange, aTextDirective.prefix, true, false);
 422       // 2.2.2. If prefixMatch is null, return null.
 423       if (!prefixMatch) {
 424         return nullptr;
 425       }
 426       // 2.2.3. Set searchRange’s start to the first boundary point after
 427       // prefixMatch’s start
 428       const RangeBoundary boundaryPoint = MoveRangeBoundaryOneWord(
 429           {prefixMatch->GetStartContainer(), prefixMatch->StartOffset()},
 430           TextScanDirection::Right);
 431       if (!boundaryPoint.IsSetAndValid()) {
 432         return nullptr;
 433       }
 434       searchRange->SetStart(boundaryPoint.AsRaw(), rv);
 435       if (rv.Failed()) {
 436         return nullptr;
 437       }
 438
 439       // 2.2.4. Let matchRange be a range whose start is prefixMatch’s end and
 440       // end is searchRange’s end.
 441       RefPtr<nsRange> matchRange = nsRange::Create(
 442           prefixMatch->GetEndContainer(), prefixMatch->EndOffset(),
 443           searchRange->GetEndContainer(), searchRange->EndOffset(), rv);
 444       if (rv.Failed()) {
 445         return nullptr;
 446       }
 447       // 2.2.5. Advance matchRange’s start to the next non-whitespace position.
 448       AdvanceStartToNextNonWhitespacePosition(*matchRange);
 449       // 2.2.6. If matchRange is collapsed return null.
 450       // (This can happen if prefixMatch’s end or its subsequent non-whitespace
 451       // position is at the end of the document.)
 452       if (matchRange->Collapsed()) {
 453         return nullptr;
 454       }
 455       // 2.2.7. Assert: matchRange’s start node is a Text node.
 456       // (matchRange’s start now points to the next non-whitespace text data
 457       // following a matched prefix.)
 458       MOZ_ASSERT(matchRange->GetStartContainer()->IsText());
 459
 460       // 2.2.8. Let mustEndAtWordBoundary be true if parsedValues’s end is
 461       // non-null or parsedValues’s suffix is null, false otherwise.
 462       const bool mustEndAtWordBoundary =
 463           !aTextDirective.end.IsEmpty() || aTextDirective.suffix.IsEmpty();
 464       // 2.2.9. Set potentialMatch to the result of running the find a string in
 465       // range steps with query parsedValues’s start, searchRange matchRange,
 466       // wordStartBounded false, and wordEndBounded mustEndAtWordBoundary.
 467       potentialMatch = FindStringInRange(matchRange, aTextDirective.start,
 468                                          false, mustEndAtWordBoundary);
 469       // 2.2.10. If potentialMatch is null, return null.
 470       if (!potentialMatch) {
 471         return nullptr;
 472       }
 473       // 2.2.11. If potentialMatch’s start is not matchRange’s start, then
 474       // continue.
 475       // (In this case, we found a prefix but it was followed by something other
 476       // than a matching text so we’ll continue searching for the next instance
 477       // of prefix.)
 478       if (potentialMatch->GetStartContainer() !=
 479           matchRange->GetStartContainer()) {
 480         continue;
 481       }
 482     }
 483     // 2.3. Otherwise:
 484     else {
 485       // 2.3.1. Let mustEndAtWordBoundary be true if parsedValues’s end is
 486       // non-null or parsedValues’s suffix is null, false otherwise.
 487       const bool mustEndAtWordBoundary =
 488           !aTextDirective.end.IsEmpty() || aTextDirective.suffix.IsEmpty();
 489       // 2.3.2. Set potentialMatch to the result of running the find a string in
 490       // range steps with query parsedValues’s start, searchRange searchRange,
 491       // wordStartBounded true, and wordEndBounded mustEndAtWordBoundary.
 492       potentialMatch = FindStringInRange(searchRange, aTextDirective.start,
 493                                          true, mustEndAtWordBoundary);
 494       // 2.3.3. If potentialMatch is null, return null.
 495       if (!potentialMatch) {
 496         return nullptr;
 497       }
 498       // 2.3.4. Set searchRange’s start to the first boundary point after
 499       // potentialMatch’s start
 500       RangeBoundary newRangeBoundary = MoveRangeBoundaryOneWord(
 501           {potentialMatch->GetStartContainer(), potentialMatch->StartOffset()},
 502           TextScanDirection::Right);
 503       if (!newRangeBoundary.IsSetAndValid()) {
 504         return nullptr;
 505       }
 506       searchRange->SetStart(newRangeBoundary.AsRaw(), rv);
 507       if (rv.Failed()) {
 508         return nullptr;
 509       }
 510     }
 511     // 2.4. Let rangeEndSearchRange be a range whose start is potentialMatch’s
 512     // end and whose end is searchRange’s end.
 513     RefPtr<nsRange> rangeEndSearchRange = nsRange::Create(
 514         potentialMatch->GetEndContainer(), potentialMatch->EndOffset(),
 515         searchRange->GetEndContainer(), searchRange->EndOffset(), rv);
 516     if (rv.Failed()) {
 517       return nullptr;
 518     }
 519     // 2.5. While rangeEndSearchRange is not collapsed:
 520     while (!rangeEndSearchRange->Collapsed()) {
 521       // 2.5.1. If parsedValues’s end item is non-null, then:
 522       if (!aTextDirective.end.IsEmpty()) {
 523         // 2.5.1.1. Let mustEndAtWordBoundary be true if parsedValues’s suffix
 524         // is null, false otherwise.
 525         const bool mustEndAtWordBoundary = aTextDirective.suffix.IsEmpty();
 526         // 2.5.1.2. Let endMatch be the result of running the find a string in
 527         // range steps with query parsedValues’s end, searchRange
 528         // rangeEndSearchRange, wordStartBounded true, and wordEndBounded
 529         // mustEndAtWordBoundary.
 530         RefPtr<nsRange> endMatch =
 531             FindStringInRange(rangeEndSearchRange, aTextDirective.end, true,
 532                               mustEndAtWordBoundary);
 533         // 2.5.1.3. If endMatch is null then return null.
 534         if (!endMatch) {
 535           return nullptr;
 536         }
 537         // 2.5.1.4. Set potentialMatch’s end to endMatch’s end.
 538         potentialMatch->SetEnd(endMatch->GetEndContainer(),
 539                                endMatch->EndOffset());
 540       }
 541       // 2.5.2. Assert: potentialMatch is non-null, not collapsed and represents
 542       // a range exactly containing an instance of matching text.
 543       MOZ_ASSERT(potentialMatch && !potentialMatch->Collapsed());
 544
 545       // 2.5.3. If parsedValues’s suffix is null, return potentialMatch.
 546       if (aTextDirective.suffix.IsEmpty()) {
 547         return potentialMatch;
 548       }
 549       // 2.5.4. Let suffixRange be a range with start equal to potentialMatch’s
 550       // end and end equal to searchRange’s end.
 551       RefPtr<nsRange> suffixRange = nsRange::Create(
 552           potentialMatch->GetEndContainer(), potentialMatch->EndOffset(),
 553           searchRange->GetEndContainer(), searchRange->EndOffset(), rv);
 554       if (rv.Failed()) {
 555         return nullptr;
 556       }
 557       // 2.5.5. Advance suffixRange's start to the next non-whitespace position.
 558       AdvanceStartToNextNonWhitespacePosition(*suffixRange);
 559
 560       // 2.5.6. Let suffixMatch be result of running the find a string in range
 561       // steps with query parsedValue's suffix, searchRange suffixRange,
 562       // wordStartBounded false, and wordEndBounded true.
 563       RefPtr<nsRange> suffixMatch =
 564           FindStringInRange(suffixRange, aTextDirective.suffix, false, true);
 565
 566       // 2.5.7. If suffixMatch is null, return null.
 567       // (If the suffix doesn't appear in the remaining text of the document,
 568       // there's no possible way to make a match.)
 569       if (!suffixMatch) {
 570         return nullptr;
 571       }
 572       // 2.5.8. If suffixMatch's start is suffixRange's start, return
 573       // potentialMatch.
 574       if (suffixMatch->GetStartContainer() ==
 575               suffixRange->GetStartContainer() &&
 576           suffixMatch->StartOffset() == suffixRange->StartOffset()) {
 577         return potentialMatch;
 578       }
 579       // 2.5.9. If parsedValue's end item is null then break;
 580       // (If this is an exact match and the suffix doesn’t match, start
 581       // searching for the next range start by breaking out of this loop without
 582       // rangeEndSearchRange being collapsed. If we’re looking for a range
 583       // match, we’ll continue iterating this inner loop since the range start
 584       // will already be correct.)
 585       if (aTextDirective.end.IsEmpty()) {
 586         break;
 587       }
 588       // 2.5.10. Set rangeEndSearchRange's start to potentialMatch's end.
 589       // (Otherwise, it is possible that we found the correct range start, but
 590       // not the correct range end. Continue the inner loop to keep searching
 591       // for another matching instance of rangeEnd.)
 592       rangeEndSearchRange->SetStart(potentialMatch->GetEndContainer(),
 593                                     potentialMatch->EndOffset());
 594     }
 595     // 2.6. If rangeEndSearchRange is collapsed then:
 596     if (rangeEndSearchRange->Collapsed()) {
 597       // 2.6.1. Assert parsedValue's end item is non-null.
 598       // (This can only happen for range matches due to the break for exact
 599       // matches in step 9 of the above loop. If we couldn’t find a valid
 600       // rangeEnd+suffix pair anywhere in the doc then there’s no possible way
 601       // to make a match.)
 602       // XXX(:jjaschke): should this really assert?
 603       MOZ_ASSERT(!aTextDirective.end.IsEmpty());
 604     }
 605   }
 606   // 3. Return null.
 607   return nullptr;
 608 }
 609
 610 /**
 611  * @brief Convenience function that returns true if the given position in a
 612  * string is a word boundary.
 613  *
 614  * This is a thin wrapper around the `WordBreaker::FindWord()` function.
 615  *
 616  * @param aText The text input.
 617  * @param aPosition The position to check.
 618  * @return true if there is a word boundary at `aPosition`.
 619  * @return false otherwise.
 620  */
 621 bool IsAtWordBoundary(const nsAString& aText, uint32_t aPosition) {
 622   const intl::WordRange wordRange =
 623       intl::WordBreaker::FindWord(aText, aPosition);
 624   return wordRange.mBegin == aPosition || wordRange.mEnd == aPosition;
 625 }
 626
 627 enum class IsEndIndex : bool { No, Yes };
 628 RangeBoundary GetBoundaryPointAtIndex(
 629     uint32_t aIndex, const nsTArray<RefPtr<Text>>& aTextNodeList,
 630     IsEndIndex aIsEndIndex) {
 631   // 1. Let counted be 0.
 632   uint32_t counted = 0;
 633   // 2. For each curNode of nodes:
 634   for (Text* curNode : aTextNodeList) {
 635     // 2.1. Let nodeEnd be counted + curNode’s length.
 636     uint32_t nodeEnd = counted + curNode->Length();
 637     // 2.2. If isEnd is true, add 1 to nodeEnd.
 638     if (aIsEndIndex == IsEndIndex::Yes) {
 639       ++nodeEnd;
 640     }
 641     // 2.3. If nodeEnd is greater than index then:
 642     if (nodeEnd > aIndex) {
 643       // 2.3.1. Return the boundary point (curNode, index − counted).
 644       return RangeBoundary(curNode->AsNode(), aIndex - counted);
 645     }
 646     // 2.4. Increment counted by curNode’s length.
 647     counted += curNode->Length();
 648   }
 649   return {};
 650 }
 651
 652 RefPtr<nsRange> FindRangeFromNodeList(
 653     nsRange* aSearchRange, const nsAString& aQuery,
 654     const nsTArray<RefPtr<Text>>& aTextNodeList, bool aWordStartBounded,
 655     bool aWordEndBounded) {
 656   // 1. Let searchBuffer be the concatenation of the data of each item in nodes.
 657   // XXX(:jjaschke): There's an open issue here that deals with what
 658   // data is supposed to be (text data vs. rendered text)
 659   // https://github.com/WICG/scroll-to-text-fragment/issues/98
 660   uint32_t bufferLength = 0;
 661   for (const Text* text : aTextNodeList) {
 662     bufferLength += text->Length();
 663   }
 664   // bail out if the search query is longer than the text data.
 665   if (bufferLength < aQuery.Length()) {
 666     return nullptr;
 667   }
 668   nsAutoString searchBuffer;
 669   searchBuffer.SetCapacity(bufferLength);
 670   for (Text* text : aTextNodeList) {
 671     text->AppendTextTo(searchBuffer);
 672   }
 673   // 2. Let searchStart be 0.
 674   // 3. If the first item in nodes is searchRange’s start node then set
 675   // searchStart to searchRange’s start offset.
 676   uint32_t searchStart =
 677       aTextNodeList.SafeElementAt(0) == aSearchRange->GetStartContainer()
 678           ? aSearchRange->StartOffset()
 679           : 0;
 680
 681   // 4. Let start and end be boundary points, initially null.
 682   RangeBoundary start, end;
 683   // 5. Let matchIndex be null.
 684   // "null" here doesn't mean 0, instead "not set". 0 would be a valid index.
 685   // Therefore, "null" is represented by the value -1.
 686   int32_t matchIndex = -1;
 687
 688   // 6. While matchIndex is null
 689   // As explained above, "null" == -1 in this algorithm.
 690   while (matchIndex == -1) {
 691     // 6.1. Set matchIndex to the index of the first instance of queryString in
 692     // searchBuffer, starting at searchStart. The string search must be
 693     // performed using a base character comparison, or the primary level, as
 694     // defined in [UTS10].
 695     // [UTS10]
 696     // Ken Whistler; Markus Scherer.Unicode Collation Algorithm.26 August 2022.
 697     // Unicode Technical Standard #10.
 698     // URL : https://www.unicode.org/reports/tr10/tr10-47.html
 699
 700     // XXX(:jjaschke): For the initial implementation, a standard case-sensitive
 701     // find-in-string is used.
 702     // See: https://github.com/WICG/scroll-to-text-fragment/issues/233
 703     matchIndex = searchBuffer.Find(aQuery, searchStart);
 704     // 6.2. If matchIndex is null, return null.
 705     if (matchIndex == -1) {
 706       return nullptr;
 707     }
 708
 709     // 6.3. Let endIx be matchIndex + queryString’s length.
 710     // endIx is the index of the last character in the match + 1.
 711     const uint32_t endIx = matchIndex + aQuery.Length();
 712
 713     // 6.4. Set start to the boundary point result of get boundary point at
 714     // index matchIndex run over nodes with isEnd false.
 715     start = GetBoundaryPointAtIndex(matchIndex, aTextNodeList, IsEndIndex::No);
 716     // 6.5. Set end to the boundary point result of get boundary point at index
 717     // endIx run over nodes with isEnd true.
 718     end = GetBoundaryPointAtIndex(endIx, aTextNodeList, IsEndIndex::Yes);
 719
 720     // 6.6. If wordStartBounded is true and matchIndex is not at a word boundary
 721     // in searchBuffer, given the language from start’s node as the locale; or
 722     // wordEndBounded is true and matchIndex + queryString’s length is not at a
 723     // word boundary in searchBuffer, given the language from end’s node as the
 724     // locale:
 725     if ((aWordStartBounded && !IsAtWordBoundary(searchBuffer, matchIndex)) ||
 726         (aWordEndBounded && !IsAtWordBoundary(searchBuffer, endIx))) {
 727       // 6.6.1. Set searchStart to matchIndex + 1.
 728       searchStart = matchIndex + 1;
 729       // 6.6.2. Set matchIndex to null.
 730       matchIndex = -1;
 731     }
 732   }
 733   // 7. Let endInset be 0.
 734   // 8. If the last item in nodes is searchRange’s end node then set endInset
 735   // to (searchRange’s end node's length − searchRange’s end offset)
 736   // (endInset is the offset from the last position in the last node in the
 737   // reverse direction. Alternatively, it is the length of the node that’s not
 738   // included in the range.)
 739   uint32_t endInset =
 740       aTextNodeList.LastElement() == aSearchRange->GetEndContainer()
 741           ? aSearchRange->GetEndContainer()->Length() -
 742                 aSearchRange->EndOffset()
 743           : 0;
 744
 745   // 9. If matchIndex + queryString’s length is greater than searchBuffer’s
 746   // length − endInset return null.
 747   // (If the match runs past the end of the search range, return null.)
 748   if (matchIndex + aQuery.Length() > searchBuffer.Length() - endInset) {
 749     return nullptr;
 750   }
 751
 752   // 10. Assert: start and end are non-null, valid boundary points in
 753   // searchRange.
 754   MOZ_ASSERT(start.IsSetAndValid());
 755   MOZ_ASSERT(end.IsSetAndValid());
 756
 757   // 11. Return a range with start start and end end.
 758   ErrorResult rv;
 759   RefPtr<nsRange> range = nsRange::Create(start, end, rv);
 760   if (rv.Failed()) {
 761     return nullptr;
 762   }
 763
 764   return range;
 765 }
 766
 767 RefPtr<nsRange> FragmentDirective::FindStringInRange(nsRange* aSearchRange,
 768                                                      const nsAString& aQuery,
 769                                                      bool aWordStartBounded,
 770                                                      bool aWordEndBounded) {
 771   MOZ_ASSERT(aSearchRange);
 772   RefPtr<nsRange> searchRange = aSearchRange->CloneRange();
 773   // 1. While searchRange is not collapsed
 774   while (searchRange && !searchRange->Collapsed()) {
 775     // 1.1. Let curNode be searchRange’s start node.
 776     RefPtr<nsINode> curNode = searchRange->GetStartContainer();
 777
 778     // 1.2. If curNode is part of a non-searchable subtree:
 779     if (NodeIsPartOfNonSearchableSubTree(*curNode)) {
 780       // 1.2.1. Set searchRange’s start node to the next node, in
 781       // shadow-including tree order, that isn’t a shadow-including descendant
 782       // of curNode.
 783       RefPtr<nsINode> next = curNode;
 784       while ((next = next->GetNextNode())) {
 785         if (!next->IsShadowIncludingInclusiveDescendantOf(curNode)) {
 786           break;
 787         }
 788       }
 789       if (!next) {
 790         return nullptr;
 791       }
 792       // 1.2.2. Set `searchRange`s `start offset` to 0
 793       searchRange->SetStart(next, 0);
 794       // 1.2.3. continue.
 795       continue;
 796     }
 797     // 1.3. If curNode is not a visible TextNode:
 798     if (!NodeIsVisibleTextNode(*curNode)) {
 799       // 1.3.1. Set searchRange’s start node to the next node, in
 800       // shadow-including tree order, that is not a doctype.
 801       RefPtr<nsINode> next = curNode;
 802       while ((next = next->GetNextNode())) {
 803         if (next->NodeType() != Node_Binding::DOCUMENT_TYPE_NODE) {
 804           break;
 805         }
 806       }
 807       if (!next) {
 808         return nullptr;
 809       }
 810       // 1.3.2. Set searchRange’s start offset to 0.
 811       searchRange->SetStart(next, 0);
 812       // 1.3.3. continue.
 813       continue;
 814     }
 815     // 1.4. Let blockAncestor be the nearest block ancestor of `curNode`
 816     RefPtr<nsINode> blockAncestor = GetBlockAncestorForNode(curNode);
 817
 818     // 1.5. Let textNodeList be a list of Text nodes, initially empty.
 819     nsTArray<RefPtr<Text>> textNodeList;
 820     // 1.6. While curNode is a shadow-including descendant of blockAncestor and
 821     // the position of the boundary point (curNode,0) is not after searchRange's
 822     // end:
 823     while (curNode &&
 824            curNode->IsShadowIncludingInclusiveDescendantOf(blockAncestor)) {
 825       Maybe<int32_t> comp = nsContentUtils::ComparePoints(
 826           curNode, 0, searchRange->GetEndContainer(), searchRange->EndOffset());
 827       if (comp) {
 828         if (*comp >= 0) {
 829           break;
 830         }
 831       } else {
 832         // This means that the compared nodes are disconnected.
 833         return nullptr;
 834       }
 835       // 1.6.1. If curNode has block-level display, then break.
 836       if (NodeHasBlockLevelDisplay(*curNode)) {
 837         break;
 838       }
 839       // 1.6.2. If curNode is search invisible:
 840       if (NodeIsSearchInvisible(*curNode)) {
 841         // 1.6.2.1. Set curNode to the next node, in shadow-including tree
 842         // order, that isn't a shadow-including descendant of curNode.
 843         curNode = curNode->GetNextNode();
 844         // 1.6.2.2. Continue.
 845         continue;
 846       }
 847       // 1.6.3. If curNode is a visible text node then append it to
 848       // textNodeList.
 849       if (NodeIsVisibleTextNode(*curNode)) {
 850         textNodeList.AppendElement(curNode->AsText());
 851       }
 852       // 1.6.4. Set curNode to the next node in shadow-including
 853       // tree order.
 854       curNode = curNode->GetNextNode();
 855     }
 856     // 1.7. Run the find a range from a node list steps given
 857     // query, searchRange, textNodeList, wordStartBounded, wordEndBounded as
 858     // input. If the resulting Range is not null, then  return it.
 859     if (RefPtr<nsRange> range =
 860             FindRangeFromNodeList(searchRange, aQuery, textNodeList,
 861                                   aWordStartBounded, aWordEndBounded)) {
 862       return range;
 863     }
 864
 865     // 1.8. If curNode is null, then break.
 866     if (!curNode) {
 867       break;
 868     }
 869
 870     // 1.9. Assert: curNode follows searchRange's start node.
 871
 872     // 1.10. Set searchRange's start to the boundary point (curNode,0).
 873     searchRange->SetStart(curNode, 0);
 874   }
 875
 876   // 2. Return null.
 877   return nullptr;
 878 }
 879 }  // namespace mozilla::dom