Backed out changeset 2450366cf7ca (bug 1891629) for causing win msix mochitest failures
[gecko.git] / dom / base / FragmentDirective.cpp
blob3300a85751b300d9605f5e69f6aac6a614a40f54
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "FragmentDirective.h"
8 #include <cstdint>
9 #include "RangeBoundary.h"
10 #include "mozilla/Assertions.h"
11 #include "Document.h"
12 #include "mozilla/dom/FragmentDirectiveBinding.h"
13 #include "mozilla/dom/FragmentOrElement.h"
14 #include "mozilla/dom/NodeBinding.h"
15 #include "mozilla/dom/Text.h"
16 #include "mozilla/intl/WordBreaker.h"
17 #include "nsComputedDOMStyle.h"
18 #include "nsContentUtils.h"
19 #include "nsDOMAttributeMap.h"
20 #include "nsGkAtoms.h"
21 #include "nsICSSDeclaration.h"
22 #include "nsIFrame.h"
23 #include "nsINode.h"
24 #include "nsIURIMutator.h"
25 #include "nsRange.h"
26 #include "nsString.h"
28 namespace mozilla::dom {
29 static LazyLogModule sFragmentDirectiveLog("FragmentDirective");
31 /** Converts a `TextDirective` into a percent-encoded string. */
32 nsCString ToString(const TextDirective& aTextDirective) {
33 nsCString str;
34 create_text_directive(&aTextDirective, &str);
35 return str;
38 NS_IMPL_CYCLE_COLLECTION_WRAPPERCACHE(FragmentDirective, mDocument)
39 NS_IMPL_CYCLE_COLLECTING_ADDREF(FragmentDirective)
40 NS_IMPL_CYCLE_COLLECTING_RELEASE(FragmentDirective)
41 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(FragmentDirective)
42 NS_WRAPPERCACHE_INTERFACE_MAP_ENTRY
43 NS_INTERFACE_MAP_ENTRY(nsISupports)
44 NS_INTERFACE_MAP_END
46 FragmentDirective::FragmentDirective(Document* aDocument)
47 : mDocument(aDocument) {}
49 JSObject* FragmentDirective::WrapObject(JSContext* aCx,
50 JS::Handle<JSObject*> aGivenProto) {
51 return FragmentDirective_Binding::Wrap(aCx, this, aGivenProto);
54 void FragmentDirective::ParseAndRemoveFragmentDirectiveFromFragment(
55 nsCOMPtr<nsIURI>& aURI, nsTArray<TextDirective>* aTextDirectives) {
56 if (!aURI || !StaticPrefs::dom_text_fragments_enabled()) {
57 return;
59 bool hasRef = false;
60 aURI->GetHasRef(&hasRef);
61 if (!hasRef) {
62 return;
65 nsAutoCString hash;
66 aURI->GetRef(hash);
68 ParsedFragmentDirectiveResult fragmentDirective;
69 const bool hasRemovedFragmentDirective =
70 parse_fragment_directive(&hash, &fragmentDirective);
71 if (!hasRemovedFragmentDirective) {
72 return;
74 Unused << NS_MutateURI(aURI)
75 .SetRef(fragmentDirective.url_without_fragment_directive)
76 .Finalize(aURI);
77 if (aTextDirectives) {
78 aTextDirectives->SwapElements(fragmentDirective.text_directives);
82 nsTArray<RefPtr<nsRange>> FragmentDirective::FindTextFragmentsInDocument() {
83 MOZ_ASSERT(mDocument);
84 mDocument->FlushPendingNotifications(FlushType::Frames);
85 nsTArray<RefPtr<nsRange>> textDirectiveRanges;
86 for (const TextDirective& textDirective : mUninvokedTextDirectives) {
87 if (RefPtr<nsRange> range = FindRangeForTextDirective(textDirective)) {
88 textDirectiveRanges.AppendElement(range);
91 mUninvokedTextDirectives.Clear();
92 return textDirectiveRanges;
95 /**
96 * @brief Determine if `aNode` should be considered when traversing the DOM.
98 * A node is "search invisible" if it is an element in the HTML namespace and
99 * 1. The computed value of its `display` property is `none`
100 * 2. It serializes as void
101 * 3. It is one of the following types:
102 * - HTMLIFrameElement
103 * - HTMLImageElement
104 * - HTMLMeterElement
105 * - HTMLObjectElement
106 * - HTMLProgressElement
107 * - HTMLStyleElement
108 * - HTMLScriptElement
109 * - HTMLVideoElement
110 * - HTMLAudioElement
111 * 4. It is a `select` element whose `multiple` content attribute is absent
113 * see https://wicg.github.io/scroll-to-text-fragment/#search-invisible
115 bool NodeIsSearchInvisible(nsINode& aNode) {
116 if (!aNode.IsElement()) {
117 return false;
119 // 2. If the node serializes as void.
120 nsAtom* nodeNameAtom = aNode.NodeInfo()->NameAtom();
121 if (FragmentOrElement::IsHTMLVoid(nodeNameAtom)) {
122 return true;
124 // 3. Is any of the following types: HTMLIFrameElement, HTMLImageElement,
125 // HTMLMeterElement, HTMLObjectElement, HTMLProgressElement, HTMLStyleElement,
126 // HTMLScriptElement, HTMLVideoElement, HTMLAudioElement
127 if (aNode.IsAnyOfHTMLElements(
128 nsGkAtoms::iframe, nsGkAtoms::image, nsGkAtoms::meter,
129 nsGkAtoms::object, nsGkAtoms::progress, nsGkAtoms::style,
130 nsGkAtoms::script, nsGkAtoms::video, nsGkAtoms::audio)) {
131 return true;
133 // 4. Is a select element whose multiple content attribute is absent.
134 if (aNode.IsHTMLElement(nsGkAtoms::select)) {
135 return aNode.GetAttributes()->GetNamedItem(u"multiple"_ns) == nullptr;
137 // This is tested last because it's the most expensive check.
138 // 1. The computed value of its 'display' property is 'none'.
139 const Element* nodeAsElement = Element::FromNode(aNode);
140 const RefPtr<const ComputedStyle> computedStyle =
141 nsComputedDOMStyle::GetComputedStyleNoFlush(nodeAsElement);
142 return !computedStyle ||
143 computedStyle->StyleDisplay()->mDisplay == StyleDisplay::None;
147 * @brief Returns true if `aNode` has block-level display.
148 * A node has block-level display if it is an element and the computed value
149 * of its display property is any of
150 * - block
151 * - table
152 * - flow-root
153 * - grid
154 * - flex
155 * - list-item
157 * See https://wicg.github.io/scroll-to-text-fragment/#has-block-level-display
159 bool NodeHasBlockLevelDisplay(nsINode& aNode) {
160 if (!aNode.IsElement()) {
161 return false;
163 const Element* nodeAsElement = Element::FromNode(aNode);
164 const RefPtr<const ComputedStyle> computedStyle =
165 nsComputedDOMStyle::GetComputedStyleNoFlush(nodeAsElement);
166 if (!computedStyle) {
167 return false;
169 const StyleDisplay& styleDisplay = computedStyle->StyleDisplay()->mDisplay;
170 return styleDisplay == StyleDisplay::Block ||
171 styleDisplay == StyleDisplay::Table ||
172 styleDisplay == StyleDisplay::FlowRoot ||
173 styleDisplay == StyleDisplay::Grid ||
174 styleDisplay == StyleDisplay::Flex || styleDisplay.IsListItem();
178 * @brief Get the Block Ancestor For `aNode`.
180 * see https://wicg.github.io/scroll-to-text-fragment/#nearest-block-ancestor
182 nsINode* GetBlockAncestorForNode(nsINode* aNode) {
183 // 1. Let curNode be node.
184 RefPtr<nsINode> curNode = aNode;
185 // 2. While curNode is non-null
186 while (curNode) {
187 // 2.1. If curNode is not a Text node and it has block-level display then
188 // return curNode.
189 if (!curNode->IsText() && NodeHasBlockLevelDisplay(*curNode)) {
190 return curNode;
192 // 2.2. Otherwise, set curNode to curNode’s parent.
193 curNode = curNode->GetParentNode();
195 // 3.Return node’s node document's document element.
196 return aNode->GetOwnerDocument();
200 * @brief Returns true if `aNode` is part of a non-searchable subtree.
202 * A node is part of a non-searchable subtree if it is or has a shadow-including
203 * ancestor that is search invisible.
205 * see https://wicg.github.io/scroll-to-text-fragment/#non-searchable-subtree
207 bool NodeIsPartOfNonSearchableSubTree(nsINode& aNode) {
208 nsINode* node = &aNode;
209 do {
210 if (NodeIsSearchInvisible(*node)) {
211 return true;
213 } while ((node = node->GetParentOrShadowHostNode()));
214 return false;
218 * @brief Return true if `aNode` is a visible Text node.
220 * A node is a visible text node if it is a Text node, the computed value of
221 * its parent element's visibility property is visible, and it is being
222 * rendered.
224 * see https://wicg.github.io/scroll-to-text-fragment/#visible-text-node
226 bool NodeIsVisibleTextNode(const nsINode& aNode) {
227 const Text* text = Text::FromNode(aNode);
228 if (!text) {
229 return false;
231 const nsIFrame* frame = text->GetPrimaryFrame();
232 return frame && frame->StyleVisibility()->IsVisible();
235 enum class TextScanDirection { Left = -1, Right = 1 };
238 * @brief Tests if there is whitespace at the given position and direction.
240 * This algorithm tests for whitespaces and `&nbsp;` at `aPos`.
241 * It returns the size of the whitespace found at the position, i.e. 5/6 for
242 * `&nbsp/;` and 1 otherwise.
244 * This function follows a subsection of this section of the spec, but has been
245 * adapted to be able to scan in both directions:
246 * https://wicg.github.io/scroll-to-text-fragment/#next-non-whitespace-position
248 uint32_t IsWhitespaceAtPosition(nsString& aText, uint32_t aPos,
249 TextScanDirection aDirection) {
250 if (aText.Length() == 0) {
251 return 0;
253 if (aDirection == TextScanDirection::Right) {
254 if (aText.Length() > (aPos + 5)) {
255 if (Substring(aText, aPos, 5).Equals(u"&nbsp")) {
256 return aText.Length() > (aPos + 6) && aText.CharAt(aPos + 6) == u';'
258 : 5;
261 } else {
262 if (aPos > 6 && Substring(aText, aPos - 6, 6).Equals(u"&nbsp;")) {
263 return 6;
265 if (aPos > 5 && Substring(aText, aPos - 5, 5).Equals(u"&nbsp")) {
266 return 5;
269 return uint32_t(IsSpaceCharacter(aText.CharAt(aPos)));
272 /** Advances the start of `aRange` to the next non-whitespace position.
273 * The function follows this section of the spec:
274 * https://wicg.github.io/scroll-to-text-fragment/#next-non-whitespace-position
276 void AdvanceStartToNextNonWhitespacePosition(nsRange& aRange) {
277 // 1. While range is not collapsed:
278 while (!aRange.Collapsed()) {
279 // 1.1. Let node be range's start node.
280 RefPtr<nsINode> node = aRange.GetStartContainer();
281 MOZ_ASSERT(node);
282 // 1.2. Let offset be range's start offset.
283 const uint32_t offset = aRange.StartOffset();
284 // 1.3. If node is part of a non-searchable subtree or if node is not a
285 // visible text node or if offset is equal to node's length then:
286 if (NodeIsPartOfNonSearchableSubTree(*node) ||
287 !NodeIsVisibleTextNode(*node) || offset == node->Length()) {
288 // 1.3.1. Set range's start node to the next node, in shadow-including
289 // tree order.
290 // 1.3.2. Set range's start offset to 0.
291 if (NS_FAILED(aRange.SetStart(node->GetNextNode(), 0))) {
292 return;
294 // 1.3.3. Continue.
295 continue;
297 const Text* text = Text::FromNode(node);
298 nsAutoString textData;
299 text->GetData(textData);
300 // These steps are moved to `IsWhitespaceAtPosition()`.
301 // 1.4. If the substring data of node at offset offset and count 6 is equal
302 // to the string "&nbsp;" then:
303 // 1.4.1. Add 6 to range’s start offset.
304 // 1.5. Otherwise, if the substring data of node at offset offset and count
305 // 5 is equal to the string "&nbsp" then:
306 // 1.5.1. Add 5 to range’s start offset.
307 // 1.6. Otherwise:
308 // 1.6.1 Let cp be the code point at the offset index in node’s data.
309 // 1.6.2 If cp does not have the White_Space property set, return.
310 // 1.6.3 Add 1 to range’s start offset.
311 const uint32_t whitespace =
312 IsWhitespaceAtPosition(textData, offset, TextScanDirection::Right);
313 if (whitespace == 0) {
314 return;
317 aRange.SetStart(node, offset + whitespace);
322 * @brief Moves `aRangeBoundary` one word in `aDirection`.
324 * Word boundaries are determined using `intl::WordBreaker::FindWord()`.
327 * @param aRangeBoundary[in] The range boundary that should be moved.
328 * Must be set and valid.
329 * @param aDirection[in] The direction into which to move.
330 * @return A new `RangeBoundary` which is moved to the next word.
332 RangeBoundary MoveRangeBoundaryOneWord(const RangeBoundary& aRangeBoundary,
333 TextScanDirection aDirection) {
334 MOZ_ASSERT(aRangeBoundary.IsSetAndValid());
335 RefPtr<nsINode> curNode = aRangeBoundary.Container();
336 uint32_t offset = *aRangeBoundary.Offset(
337 RangeBoundary::OffsetFilter::kValidOrInvalidOffsets);
339 const int offsetIncrement = int(aDirection);
340 // Get the text node of the start of the range and the offset.
341 // This is the current position of the start of the range.
342 nsAutoString text;
343 if (NodeIsVisibleTextNode(*curNode)) {
344 const Text* textNode = Text::FromNode(curNode);
345 textNode->GetData(text);
347 // Assuming that the current position might not be at a word boundary,
348 // advance to the word boundary at word begin/end.
349 if (!IsWhitespaceAtPosition(text, offset, aDirection)) {
350 const intl::WordRange wordRange =
351 intl::WordBreaker::FindWord(text, offset);
352 if (aDirection == TextScanDirection::Right &&
353 offset != wordRange.mBegin) {
354 offset = wordRange.mEnd;
355 } else if (aDirection == TextScanDirection::Left &&
356 offset != wordRange.mEnd) {
357 // The additional -1 is necessary to move to offset to *before* the
358 // start of the word.
359 offset = wordRange.mBegin - 1;
363 // Now, skip any whitespace, so that `offset` points to the word boundary of
364 // the next word (which is the one this algorithm actually aims to move over).
365 while (curNode) {
366 if (!NodeIsVisibleTextNode(*curNode) || NodeIsSearchInvisible(*curNode) ||
367 offset >= curNode->Length()) {
368 curNode = aDirection == TextScanDirection::Left ? curNode->GetPrevNode()
369 : curNode->GetNextNode();
370 if (!curNode) {
371 break;
373 offset =
374 aDirection == TextScanDirection::Left ? curNode->Length() - 1 : 0;
375 if (const Text* textNode = Text::FromNode(curNode)) {
376 textNode->GetData(text);
378 continue;
380 if (const uint32_t whitespace =
381 IsWhitespaceAtPosition(text, offset, aDirection)) {
382 offset += offsetIncrement * whitespace;
383 continue;
386 // At this point, the caret has been moved to the next non-whitespace
387 // position.
388 // find word boundaries at the current position
389 const intl::WordRange wordRange = intl::WordBreaker::FindWord(text, offset);
390 offset = aDirection == TextScanDirection::Left ? wordRange.mBegin
391 : wordRange.mEnd;
393 return {curNode, offset};
395 return {};
398 RefPtr<nsRange> FragmentDirective::FindRangeForTextDirective(
399 const TextDirective& aTextDirective) {
400 MOZ_LOG(sFragmentDirectiveLog, LogLevel::Info,
401 ("FragmentDirective::%s(): Find range for text directive '%s'.",
402 __FUNCTION__, ToString(aTextDirective).Data()));
403 // 1. Let searchRange be a range with start (document, 0) and end (document,
404 // document’s length)
405 ErrorResult rv;
406 RefPtr<nsRange> searchRange =
407 nsRange::Create(mDocument, 0, mDocument, mDocument->Length(), rv);
408 if (rv.Failed()) {
409 return nullptr;
411 // 2. While searchRange is not collapsed:
412 while (!searchRange->Collapsed()) {
413 // 2.1. Let potentialMatch be null.
414 RefPtr<nsRange> potentialMatch;
415 // 2.2. If parsedValues’s prefix is not null:
416 if (!aTextDirective.prefix.IsEmpty()) {
417 // 2.2.1. Let prefixMatch be the the result of running the find a string
418 // in range steps with query parsedValues’s prefix, searchRange
419 // searchRange, wordStartBounded true and wordEndBounded false.
420 RefPtr<nsRange> prefixMatch =
421 FindStringInRange(searchRange, aTextDirective.prefix, true, false);
422 // 2.2.2. If prefixMatch is null, return null.
423 if (!prefixMatch) {
424 return nullptr;
426 // 2.2.3. Set searchRange’s start to the first boundary point after
427 // prefixMatch’s start
428 const RangeBoundary boundaryPoint = MoveRangeBoundaryOneWord(
429 {prefixMatch->GetStartContainer(), prefixMatch->StartOffset()},
430 TextScanDirection::Right);
431 if (!boundaryPoint.IsSetAndValid()) {
432 return nullptr;
434 searchRange->SetStart(boundaryPoint.AsRaw(), rv);
435 if (rv.Failed()) {
436 return nullptr;
439 // 2.2.4. Let matchRange be a range whose start is prefixMatch’s end and
440 // end is searchRange’s end.
441 RefPtr<nsRange> matchRange = nsRange::Create(
442 prefixMatch->GetEndContainer(), prefixMatch->EndOffset(),
443 searchRange->GetEndContainer(), searchRange->EndOffset(), rv);
444 if (rv.Failed()) {
445 return nullptr;
447 // 2.2.5. Advance matchRange’s start to the next non-whitespace position.
448 AdvanceStartToNextNonWhitespacePosition(*matchRange);
449 // 2.2.6. If matchRange is collapsed return null.
450 // (This can happen if prefixMatch’s end or its subsequent non-whitespace
451 // position is at the end of the document.)
452 if (matchRange->Collapsed()) {
453 return nullptr;
455 // 2.2.7. Assert: matchRange’s start node is a Text node.
456 // (matchRange’s start now points to the next non-whitespace text data
457 // following a matched prefix.)
458 MOZ_ASSERT(matchRange->GetStartContainer()->IsText());
460 // 2.2.8. Let mustEndAtWordBoundary be true if parsedValues’s end is
461 // non-null or parsedValues’s suffix is null, false otherwise.
462 const bool mustEndAtWordBoundary =
463 !aTextDirective.end.IsEmpty() || aTextDirective.suffix.IsEmpty();
464 // 2.2.9. Set potentialMatch to the result of running the find a string in
465 // range steps with query parsedValues’s start, searchRange matchRange,
466 // wordStartBounded false, and wordEndBounded mustEndAtWordBoundary.
467 potentialMatch = FindStringInRange(matchRange, aTextDirective.start,
468 false, mustEndAtWordBoundary);
469 // 2.2.10. If potentialMatch is null, return null.
470 if (!potentialMatch) {
471 return nullptr;
473 // 2.2.11. If potentialMatch’s start is not matchRange’s start, then
474 // continue.
475 // (In this case, we found a prefix but it was followed by something other
476 // than a matching text so we’ll continue searching for the next instance
477 // of prefix.)
478 if (potentialMatch->GetStartContainer() !=
479 matchRange->GetStartContainer()) {
480 continue;
483 // 2.3. Otherwise:
484 else {
485 // 2.3.1. Let mustEndAtWordBoundary be true if parsedValues’s end is
486 // non-null or parsedValues’s suffix is null, false otherwise.
487 const bool mustEndAtWordBoundary =
488 !aTextDirective.end.IsEmpty() || aTextDirective.suffix.IsEmpty();
489 // 2.3.2. Set potentialMatch to the result of running the find a string in
490 // range steps with query parsedValues’s start, searchRange searchRange,
491 // wordStartBounded true, and wordEndBounded mustEndAtWordBoundary.
492 potentialMatch = FindStringInRange(searchRange, aTextDirective.start,
493 true, mustEndAtWordBoundary);
494 // 2.3.3. If potentialMatch is null, return null.
495 if (!potentialMatch) {
496 return nullptr;
498 // 2.3.4. Set searchRange’s start to the first boundary point after
499 // potentialMatch’s start
500 RangeBoundary newRangeBoundary = MoveRangeBoundaryOneWord(
501 {potentialMatch->GetStartContainer(), potentialMatch->StartOffset()},
502 TextScanDirection::Right);
503 if (!newRangeBoundary.IsSetAndValid()) {
504 return nullptr;
506 searchRange->SetStart(newRangeBoundary.AsRaw(), rv);
507 if (rv.Failed()) {
508 return nullptr;
511 // 2.4. Let rangeEndSearchRange be a range whose start is potentialMatch’s
512 // end and whose end is searchRange’s end.
513 RefPtr<nsRange> rangeEndSearchRange = nsRange::Create(
514 potentialMatch->GetEndContainer(), potentialMatch->EndOffset(),
515 searchRange->GetEndContainer(), searchRange->EndOffset(), rv);
516 if (rv.Failed()) {
517 return nullptr;
519 // 2.5. While rangeEndSearchRange is not collapsed:
520 while (!rangeEndSearchRange->Collapsed()) {
521 // 2.5.1. If parsedValues’s end item is non-null, then:
522 if (!aTextDirective.end.IsEmpty()) {
523 // 2.5.1.1. Let mustEndAtWordBoundary be true if parsedValues’s suffix
524 // is null, false otherwise.
525 const bool mustEndAtWordBoundary = aTextDirective.suffix.IsEmpty();
526 // 2.5.1.2. Let endMatch be the result of running the find a string in
527 // range steps with query parsedValues’s end, searchRange
528 // rangeEndSearchRange, wordStartBounded true, and wordEndBounded
529 // mustEndAtWordBoundary.
530 RefPtr<nsRange> endMatch =
531 FindStringInRange(rangeEndSearchRange, aTextDirective.end, true,
532 mustEndAtWordBoundary);
533 // 2.5.1.3. If endMatch is null then return null.
534 if (!endMatch) {
535 return nullptr;
537 // 2.5.1.4. Set potentialMatch’s end to endMatch’s end.
538 potentialMatch->SetEnd(endMatch->GetEndContainer(),
539 endMatch->EndOffset());
541 // 2.5.2. Assert: potentialMatch is non-null, not collapsed and represents
542 // a range exactly containing an instance of matching text.
543 MOZ_ASSERT(potentialMatch && !potentialMatch->Collapsed());
545 // 2.5.3. If parsedValues’s suffix is null, return potentialMatch.
546 if (aTextDirective.suffix.IsEmpty()) {
547 return potentialMatch;
549 // 2.5.4. Let suffixRange be a range with start equal to potentialMatch’s
550 // end and end equal to searchRange’s end.
551 RefPtr<nsRange> suffixRange = nsRange::Create(
552 potentialMatch->GetEndContainer(), potentialMatch->EndOffset(),
553 searchRange->GetEndContainer(), searchRange->EndOffset(), rv);
554 if (rv.Failed()) {
555 return nullptr;
557 // 2.5.5. Advance suffixRange's start to the next non-whitespace position.
558 AdvanceStartToNextNonWhitespacePosition(*suffixRange);
560 // 2.5.6. Let suffixMatch be result of running the find a string in range
561 // steps with query parsedValue's suffix, searchRange suffixRange,
562 // wordStartBounded false, and wordEndBounded true.
563 RefPtr<nsRange> suffixMatch =
564 FindStringInRange(suffixRange, aTextDirective.suffix, false, true);
566 // 2.5.7. If suffixMatch is null, return null.
567 // (If the suffix doesn't appear in the remaining text of the document,
568 // there's no possible way to make a match.)
569 if (!suffixMatch) {
570 return nullptr;
572 // 2.5.8. If suffixMatch's start is suffixRange's start, return
573 // potentialMatch.
574 if (suffixMatch->GetStartContainer() ==
575 suffixRange->GetStartContainer() &&
576 suffixMatch->StartOffset() == suffixRange->StartOffset()) {
577 return potentialMatch;
579 // 2.5.9. If parsedValue's end item is null then break;
580 // (If this is an exact match and the suffix doesn’t match, start
581 // searching for the next range start by breaking out of this loop without
582 // rangeEndSearchRange being collapsed. If we’re looking for a range
583 // match, we’ll continue iterating this inner loop since the range start
584 // will already be correct.)
585 if (aTextDirective.end.IsEmpty()) {
586 break;
588 // 2.5.10. Set rangeEndSearchRange's start to potentialMatch's end.
589 // (Otherwise, it is possible that we found the correct range start, but
590 // not the correct range end. Continue the inner loop to keep searching
591 // for another matching instance of rangeEnd.)
592 rangeEndSearchRange->SetStart(potentialMatch->GetEndContainer(),
593 potentialMatch->EndOffset());
595 // 2.6. If rangeEndSearchRange is collapsed then:
596 if (rangeEndSearchRange->Collapsed()) {
597 // 2.6.1. Assert parsedValue's end item is non-null.
598 // (This can only happen for range matches due to the break for exact
599 // matches in step 9 of the above loop. If we couldn’t find a valid
600 // rangeEnd+suffix pair anywhere in the doc then there’s no possible way
601 // to make a match.)
602 // XXX(:jjaschke): should this really assert?
603 MOZ_ASSERT(!aTextDirective.end.IsEmpty());
606 // 3. Return null.
607 return nullptr;
611 * @brief Convenience function that returns true if the given position in a
612 * string is a word boundary.
614 * This is a thin wrapper around the `WordBreaker::FindWord()` function.
616 * @param aText The text input.
617 * @param aPosition The position to check.
618 * @return true if there is a word boundary at `aPosition`.
619 * @return false otherwise.
621 bool IsAtWordBoundary(const nsAString& aText, uint32_t aPosition) {
622 const intl::WordRange wordRange =
623 intl::WordBreaker::FindWord(aText, aPosition);
624 return wordRange.mBegin == aPosition || wordRange.mEnd == aPosition;
627 enum class IsEndIndex : bool { No, Yes };
628 RangeBoundary GetBoundaryPointAtIndex(
629 uint32_t aIndex, const nsTArray<RefPtr<Text>>& aTextNodeList,
630 IsEndIndex aIsEndIndex) {
631 // 1. Let counted be 0.
632 uint32_t counted = 0;
633 // 2. For each curNode of nodes:
634 for (Text* curNode : aTextNodeList) {
635 // 2.1. Let nodeEnd be counted + curNode’s length.
636 uint32_t nodeEnd = counted + curNode->Length();
637 // 2.2. If isEnd is true, add 1 to nodeEnd.
638 if (aIsEndIndex == IsEndIndex::Yes) {
639 ++nodeEnd;
641 // 2.3. If nodeEnd is greater than index then:
642 if (nodeEnd > aIndex) {
643 // 2.3.1. Return the boundary point (curNode, index − counted).
644 return RangeBoundary(curNode->AsNode(), aIndex - counted);
646 // 2.4. Increment counted by curNode’s length.
647 counted += curNode->Length();
649 return {};
652 RefPtr<nsRange> FindRangeFromNodeList(
653 nsRange* aSearchRange, const nsAString& aQuery,
654 const nsTArray<RefPtr<Text>>& aTextNodeList, bool aWordStartBounded,
655 bool aWordEndBounded) {
656 // 1. Let searchBuffer be the concatenation of the data of each item in nodes.
657 // XXX(:jjaschke): There's an open issue here that deals with what
658 // data is supposed to be (text data vs. rendered text)
659 // https://github.com/WICG/scroll-to-text-fragment/issues/98
660 uint32_t bufferLength = 0;
661 for (const Text* text : aTextNodeList) {
662 bufferLength += text->Length();
664 // bail out if the search query is longer than the text data.
665 if (bufferLength < aQuery.Length()) {
666 return nullptr;
668 nsAutoString searchBuffer;
669 searchBuffer.SetCapacity(bufferLength);
670 for (Text* text : aTextNodeList) {
671 text->AppendTextTo(searchBuffer);
673 // 2. Let searchStart be 0.
674 // 3. If the first item in nodes is searchRange’s start node then set
675 // searchStart to searchRange’s start offset.
676 uint32_t searchStart =
677 aTextNodeList.SafeElementAt(0) == aSearchRange->GetStartContainer()
678 ? aSearchRange->StartOffset()
679 : 0;
681 // 4. Let start and end be boundary points, initially null.
682 RangeBoundary start, end;
683 // 5. Let matchIndex be null.
684 // "null" here doesn't mean 0, instead "not set". 0 would be a valid index.
685 // Therefore, "null" is represented by the value -1.
686 int32_t matchIndex = -1;
688 // 6. While matchIndex is null
689 // As explained above, "null" == -1 in this algorithm.
690 while (matchIndex == -1) {
691 // 6.1. Set matchIndex to the index of the first instance of queryString in
692 // searchBuffer, starting at searchStart. The string search must be
693 // performed using a base character comparison, or the primary level, as
694 // defined in [UTS10].
695 // [UTS10]
696 // Ken Whistler; Markus Scherer.Unicode Collation Algorithm.26 August 2022.
697 // Unicode Technical Standard #10.
698 // URL : https://www.unicode.org/reports/tr10/tr10-47.html
700 // XXX(:jjaschke): For the initial implementation, a standard case-sensitive
701 // find-in-string is used.
702 // See: https://github.com/WICG/scroll-to-text-fragment/issues/233
703 matchIndex = searchBuffer.Find(aQuery, searchStart);
704 // 6.2. If matchIndex is null, return null.
705 if (matchIndex == -1) {
706 return nullptr;
709 // 6.3. Let endIx be matchIndex + queryString’s length.
710 // endIx is the index of the last character in the match + 1.
711 const uint32_t endIx = matchIndex + aQuery.Length();
713 // 6.4. Set start to the boundary point result of get boundary point at
714 // index matchIndex run over nodes with isEnd false.
715 start = GetBoundaryPointAtIndex(matchIndex, aTextNodeList, IsEndIndex::No);
716 // 6.5. Set end to the boundary point result of get boundary point at index
717 // endIx run over nodes with isEnd true.
718 end = GetBoundaryPointAtIndex(endIx, aTextNodeList, IsEndIndex::Yes);
720 // 6.6. If wordStartBounded is true and matchIndex is not at a word boundary
721 // in searchBuffer, given the language from start’s node as the locale; or
722 // wordEndBounded is true and matchIndex + queryString’s length is not at a
723 // word boundary in searchBuffer, given the language from end’s node as the
724 // locale:
725 if ((aWordStartBounded && !IsAtWordBoundary(searchBuffer, matchIndex)) ||
726 (aWordEndBounded && !IsAtWordBoundary(searchBuffer, endIx))) {
727 // 6.6.1. Set searchStart to matchIndex + 1.
728 searchStart = matchIndex + 1;
729 // 6.6.2. Set matchIndex to null.
730 matchIndex = -1;
733 // 7. Let endInset be 0.
734 // 8. If the last item in nodes is searchRange’s end node then set endInset
735 // to (searchRange’s end node's length − searchRange’s end offset)
736 // (endInset is the offset from the last position in the last node in the
737 // reverse direction. Alternatively, it is the length of the node that’s not
738 // included in the range.)
739 uint32_t endInset =
740 aTextNodeList.LastElement() == aSearchRange->GetEndContainer()
741 ? aSearchRange->GetEndContainer()->Length() -
742 aSearchRange->EndOffset()
743 : 0;
745 // 9. If matchIndex + queryString’s length is greater than searchBuffer’s
746 // length − endInset return null.
747 // (If the match runs past the end of the search range, return null.)
748 if (matchIndex + aQuery.Length() > searchBuffer.Length() - endInset) {
749 return nullptr;
752 // 10. Assert: start and end are non-null, valid boundary points in
753 // searchRange.
754 MOZ_ASSERT(start.IsSetAndValid());
755 MOZ_ASSERT(end.IsSetAndValid());
757 // 11. Return a range with start start and end end.
758 ErrorResult rv;
759 RefPtr<nsRange> range = nsRange::Create(start, end, rv);
760 if (rv.Failed()) {
761 return nullptr;
764 return range;
767 RefPtr<nsRange> FragmentDirective::FindStringInRange(nsRange* aSearchRange,
768 const nsAString& aQuery,
769 bool aWordStartBounded,
770 bool aWordEndBounded) {
771 MOZ_ASSERT(aSearchRange);
772 RefPtr<nsRange> searchRange = aSearchRange->CloneRange();
773 // 1. While searchRange is not collapsed
774 while (searchRange && !searchRange->Collapsed()) {
775 // 1.1. Let curNode be searchRange’s start node.
776 RefPtr<nsINode> curNode = searchRange->GetStartContainer();
778 // 1.2. If curNode is part of a non-searchable subtree:
779 if (NodeIsPartOfNonSearchableSubTree(*curNode)) {
780 // 1.2.1. Set searchRange’s start node to the next node, in
781 // shadow-including tree order, that isn’t a shadow-including descendant
782 // of curNode.
783 RefPtr<nsINode> next = curNode;
784 while ((next = next->GetNextNode())) {
785 if (!next->IsShadowIncludingInclusiveDescendantOf(curNode)) {
786 break;
789 if (!next) {
790 return nullptr;
792 // 1.2.2. Set `searchRange`s `start offset` to 0
793 searchRange->SetStart(next, 0);
794 // 1.2.3. continue.
795 continue;
797 // 1.3. If curNode is not a visible TextNode:
798 if (!NodeIsVisibleTextNode(*curNode)) {
799 // 1.3.1. Set searchRange’s start node to the next node, in
800 // shadow-including tree order, that is not a doctype.
801 RefPtr<nsINode> next = curNode;
802 while ((next = next->GetNextNode())) {
803 if (next->NodeType() != Node_Binding::DOCUMENT_TYPE_NODE) {
804 break;
807 if (!next) {
808 return nullptr;
810 // 1.3.2. Set searchRange’s start offset to 0.
811 searchRange->SetStart(next, 0);
812 // 1.3.3. continue.
813 continue;
815 // 1.4. Let blockAncestor be the nearest block ancestor of `curNode`
816 RefPtr<nsINode> blockAncestor = GetBlockAncestorForNode(curNode);
818 // 1.5. Let textNodeList be a list of Text nodes, initially empty.
819 nsTArray<RefPtr<Text>> textNodeList;
820 // 1.6. While curNode is a shadow-including descendant of blockAncestor and
821 // the position of the boundary point (curNode,0) is not after searchRange's
822 // end:
823 while (curNode &&
824 curNode->IsShadowIncludingInclusiveDescendantOf(blockAncestor)) {
825 Maybe<int32_t> comp = nsContentUtils::ComparePoints(
826 curNode, 0, searchRange->GetEndContainer(), searchRange->EndOffset());
827 if (comp) {
828 if (*comp >= 0) {
829 break;
831 } else {
832 // This means that the compared nodes are disconnected.
833 return nullptr;
835 // 1.6.1. If curNode has block-level display, then break.
836 if (NodeHasBlockLevelDisplay(*curNode)) {
837 break;
839 // 1.6.2. If curNode is search invisible:
840 if (NodeIsSearchInvisible(*curNode)) {
841 // 1.6.2.1. Set curNode to the next node, in shadow-including tree
842 // order, that isn't a shadow-including descendant of curNode.
843 curNode = curNode->GetNextNode();
844 // 1.6.2.2. Continue.
845 continue;
847 // 1.6.3. If curNode is a visible text node then append it to
848 // textNodeList.
849 if (NodeIsVisibleTextNode(*curNode)) {
850 textNodeList.AppendElement(curNode->AsText());
852 // 1.6.4. Set curNode to the next node in shadow-including
853 // tree order.
854 curNode = curNode->GetNextNode();
856 // 1.7. Run the find a range from a node list steps given
857 // query, searchRange, textNodeList, wordStartBounded, wordEndBounded as
858 // input. If the resulting Range is not null, then return it.
859 if (RefPtr<nsRange> range =
860 FindRangeFromNodeList(searchRange, aQuery, textNodeList,
861 aWordStartBounded, aWordEndBounded)) {
862 return range;
865 // 1.8. If curNode is null, then break.
866 if (!curNode) {
867 break;
870 // 1.9. Assert: curNode follows searchRange's start node.
872 // 1.10. Set searchRange's start to the boundary point (curNode,0).
873 searchRange->SetStart(curNode, 0);
876 // 2. Return null.
877 return nullptr;
879 } // namespace mozilla::dom