1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "nsTextFrameUtils.h"
9 #include "mozilla/dom/Text.h"
10 #include "nsBidiUtils.h"
11 #include "nsCharTraits.h"
12 #include "nsIContent.h"
13 #include "nsStyleStruct.h"
14 #include "nsTextFragment.h"
15 #include "nsUnicharUtils.h"
16 #include "nsUnicodeProperties.h"
19 using namespace mozilla
;
20 using namespace mozilla::dom
;
23 bool nsTextFrameUtils::IsSpaceCombiningSequenceTail(const char16_t
* aChars
,
26 (mozilla::unicode::IsClusterExtenderExcludingJoiners(aChars
[0]) ||
27 (IsBidiControl(aChars
[0]) &&
28 IsSpaceCombiningSequenceTail(aChars
+ 1, aLength
- 1)));
31 static bool IsDiscardable(char16_t ch
, nsTextFrameUtils::Flags
* aFlags
) {
32 // Unlike IS_DISCARDABLE, we don't discard \r. \r will be ignored by
33 // gfxTextRun and discarding it would force us to copy text in many cases of
34 // preformatted text containing \r\n.
36 *aFlags
|= nsTextFrameUtils::Flags::HasShy
;
39 return IsBidiControl(ch
);
42 static bool IsDiscardable(uint8_t ch
, nsTextFrameUtils::Flags
* aFlags
) {
44 *aFlags
|= nsTextFrameUtils::Flags::HasShy
;
50 static bool IsSegmentBreak(char16_t aCh
) { return aCh
== '\n'; }
52 static bool IsSpaceOrTab(char16_t aCh
) { return aCh
== ' ' || aCh
== '\t'; }
54 static bool IsSpaceOrTabOrSegmentBreak(char16_t aCh
) {
55 return IsSpaceOrTab(aCh
) || IsSegmentBreak(aCh
);
58 template <typename CharT
>
60 bool nsTextFrameUtils::IsSkippableCharacterForTransformText(CharT aChar
) {
61 return aChar
== ' ' || aChar
== '\t' || aChar
== '\n' || aChar
== CH_SHY
||
62 (aChar
> 0xFF && IsBidiControl(aChar
));
66 template <typename CharT
>
67 static void AssertSkippedExpectedChars(const CharT
* aText
,
68 const gfxSkipChars
& aSkipChars
,
69 int32_t aSkipCharsOffset
) {
70 gfxSkipCharsIterator
it(aSkipChars
);
71 it
.AdvanceOriginal(aSkipCharsOffset
);
72 while (it
.GetOriginalOffset() < it
.GetOriginalEnd()) {
73 CharT ch
= aText
[it
.GetOriginalOffset() - aSkipCharsOffset
];
74 MOZ_ASSERT(!it
.IsOriginalCharSkipped() ||
75 nsTextFrameUtils::IsSkippableCharacterForTransformText(ch
),
76 "skipped unexpected character; need to update "
77 "IsSkippableCharacterForTransformText?");
78 it
.AdvanceOriginal(1);
83 template <class CharT
>
84 static CharT
* TransformWhiteSpaces(
85 const CharT
* aText
, uint32_t aLength
, uint32_t aBegin
, uint32_t aEnd
,
86 bool aHasSegmentBreak
, bool& aInWhitespace
, CharT
* aOutput
,
87 nsTextFrameUtils::Flags
& aFlags
,
88 nsTextFrameUtils::CompressionMode aCompression
, gfxSkipChars
* aSkipChars
) {
89 MOZ_ASSERT(aCompression
== nsTextFrameUtils::COMPRESS_WHITESPACE
||
90 aCompression
== nsTextFrameUtils::COMPRESS_WHITESPACE_NEWLINE
,
91 "whitespaces should be skippable!!");
92 // Get the context preceding/following this white space range.
93 // For 8-bit text (sizeof CharT == 1), the checks here should get optimized
94 // out, and isSegmentBreakSkippable should be initialized to be 'false'.
95 bool isSegmentBreakSkippable
=
97 ((aBegin
> 0 && IS_ZERO_WIDTH_SPACE(aText
[aBegin
- 1])) ||
98 (aEnd
< aLength
&& IS_ZERO_WIDTH_SPACE(aText
[aEnd
])));
99 if (sizeof(CharT
) > 1 && !isSegmentBreakSkippable
&& aBegin
> 0 &&
104 NS_IS_SURROGATE_PAIR(aText
[aBegin
- 2], aText
[aBegin
- 1])) {
105 ucs4before
= SURROGATE_TO_UCS4(aText
[aBegin
- 2], aText
[aBegin
- 1]);
107 ucs4before
= aText
[aBegin
- 1];
109 if (aEnd
+ 1 < aLength
&&
110 NS_IS_SURROGATE_PAIR(aText
[aEnd
], aText
[aEnd
+ 1])) {
111 ucs4after
= SURROGATE_TO_UCS4(aText
[aEnd
], aText
[aEnd
+ 1]);
113 ucs4after
= aText
[aEnd
];
115 // Discard newlines between characters that have F, W, or H
116 // EastAsianWidth property and neither side is Hangul.
117 isSegmentBreakSkippable
=
118 IsSegmentBreakSkipChar(ucs4before
) && IsSegmentBreakSkipChar(ucs4after
);
121 for (uint32_t i
= aBegin
; i
< aEnd
; ++i
) {
123 bool keepChar
= false;
124 bool keepTransformedWhiteSpace
= false;
125 if (IsDiscardable(ch
, &aFlags
)) {
126 aSkipChars
->SkipChar();
129 if (IsSpaceOrTab(ch
)) {
130 if (aHasSegmentBreak
) {
131 // If white-space is set to normal, nowrap, or pre-line, white space
132 // characters are considered collapsible and all spaces and tabs
133 // immediately preceding or following a segment break are removed.
134 aSkipChars
->SkipChar();
139 aSkipChars
->SkipChar();
142 keepTransformedWhiteSpace
= true;
145 // Apply Segment Break Transformation Rules (CSS Text 3 - 4.1.2) for
146 // segment break characters.
147 if (aCompression
== nsTextFrameUtils::COMPRESS_WHITESPACE
||
148 // XXX: According to CSS Text 3, a lone CR should not always be
149 // kept, but still go through the Segment Break Transformation
150 // Rules. However, this is what current modern browser engines
151 // (webkit/blink/edge) do. So, once we can get some clarity
152 // from the specification issue, we should either remove the
153 // lone CR condition here, or leave it here with this comment
155 // Please see https://github.com/w3c/csswg-drafts/issues/855.
159 // aCompression == COMPRESS_WHITESPACE_NEWLINE
161 // Any collapsible segment break immediately following another
162 // collapsible segment break is removed. Then the remaining segment
163 // break is either transformed into a space (U+0020) or removed
164 // depending on the context before and after the break.
165 if (isSegmentBreakSkippable
|| aInWhitespace
) {
166 aSkipChars
->SkipChar();
169 isSegmentBreakSkippable
= true;
170 keepTransformedWhiteSpace
= true;
176 aSkipChars
->KeepChar();
177 aInWhitespace
= IsSpaceOrTab(ch
);
178 } else if (keepTransformedWhiteSpace
) {
180 aSkipChars
->KeepChar();
181 aInWhitespace
= true;
183 MOZ_ASSERT_UNREACHABLE("Should've skipped the character!!");
189 template <class CharT
>
190 CharT
* nsTextFrameUtils::TransformText(const CharT
* aText
, uint32_t aLength
,
192 CompressionMode aCompression
,
193 uint8_t* aIncomingFlags
,
194 gfxSkipChars
* aSkipChars
,
195 Flags
* aAnalysisFlags
) {
196 Flags flags
= Flags();
198 int32_t skipCharsOffset
= aSkipChars
->GetOriginalCharCount();
201 bool lastCharArabic
= false;
202 if (aCompression
== COMPRESS_NONE
||
203 aCompression
== COMPRESS_NONE_TRANSFORM_TO_SPACE
) {
204 // Skip discardables.
206 for (i
= 0; i
< aLength
; ++i
) {
208 if (IsDiscardable(ch
, &flags
)) {
209 aSkipChars
->SkipChar();
211 aSkipChars
->KeepChar();
213 lastCharArabic
= IS_ARABIC_CHAR(ch
);
214 } else if (aCompression
== COMPRESS_NONE_TRANSFORM_TO_SPACE
) {
215 if (ch
== '\t' || ch
== '\n') {
219 // aCompression == COMPRESS_NONE
221 flags
|= Flags::HasTab
;
222 } else if (ch
== '\n') {
223 flags
|= Flags::HasNewline
;
229 if (lastCharArabic
) {
230 *aIncomingFlags
|= INCOMING_ARABICCHAR
;
232 *aIncomingFlags
&= ~INCOMING_ARABICCHAR
;
234 *aIncomingFlags
&= ~INCOMING_WHITESPACE
;
236 bool inWhitespace
= (*aIncomingFlags
& INCOMING_WHITESPACE
) != 0;
238 for (i
= 0; i
< aLength
; ++i
) {
240 // CSS Text 3 - 4.1. The White Space Processing Rules
241 // White space processing in CSS affects only the document white space
242 // characters: spaces (U+0020), tabs (U+0009), and segment breaks.
243 // Since we need the context of segment breaks and their surrounding
244 // white spaces to proceed the white space processing, a consecutive run
245 // of spaces/tabs/segment breaks is collected in a first pass loop, then
246 // we apply the collapsing and transformation rules to this run in a
248 if (IsSpaceOrTabOrSegmentBreak(ch
)) {
249 bool keepLastSpace
= false;
250 bool hasSegmentBreak
= IsSegmentBreak(ch
);
251 uint32_t countTrailingDiscardables
= 0;
253 for (j
= i
+ 1; j
< aLength
&& (IsSpaceOrTabOrSegmentBreak(aText
[j
]) ||
254 IsDiscardable(aText
[j
], &flags
));
256 if (IsSegmentBreak(aText
[j
])) {
257 hasSegmentBreak
= true;
260 // Exclude trailing discardables before checking space combining
262 for (; IsDiscardable(aText
[j
- 1], &flags
); j
--) {
263 countTrailingDiscardables
++;
265 // If the last white space is followed by a combining sequence tail,
266 // exclude it from the range of TransformWhiteSpaces.
267 if (sizeof(CharT
) > 1 && aText
[j
- 1] == ' ' && j
< aLength
&&
268 IsSpaceCombiningSequenceTail(&aText
[j
], aLength
- j
)) {
269 keepLastSpace
= true;
273 aOutput
= TransformWhiteSpaces(aText
, aLength
, i
, j
, hasSegmentBreak
,
274 inWhitespace
, aOutput
, flags
,
275 aCompression
, aSkipChars
);
277 // We need to keep KeepChar()/SkipChar() in order, so process the
278 // last white space first, then process the trailing discardables.
280 keepLastSpace
= false;
282 aSkipChars
->KeepChar();
283 lastCharArabic
= false;
286 for (; countTrailingDiscardables
> 0; countTrailingDiscardables
--) {
287 aSkipChars
->SkipChar();
293 // Process characters other than the document white space characters.
294 if (IsDiscardable(ch
, &flags
)) {
295 aSkipChars
->SkipChar();
298 aSkipChars
->KeepChar();
300 lastCharArabic
= IS_ARABIC_CHAR(ch
);
301 inWhitespace
= false;
304 if (lastCharArabic
) {
305 *aIncomingFlags
|= INCOMING_ARABICCHAR
;
307 *aIncomingFlags
&= ~INCOMING_ARABICCHAR
;
310 *aIncomingFlags
|= INCOMING_WHITESPACE
;
312 *aIncomingFlags
&= ~INCOMING_WHITESPACE
;
316 *aAnalysisFlags
= flags
;
319 AssertSkippedExpectedChars(aText
, *aSkipChars
, skipCharsOffset
);
325 * NOTE: The TransformText and IsSkippableCharacterForTransformText template
326 * functions are part of the public API of nsTextFrameUtils, while
327 * their function bodies are not available in the header. They may stop working
328 * (fail to resolve symbol in link time) once their callsites are moved to a
329 * different translation unit (e.g. a different unified source file).
330 * Explicit instantiating this function template with `uint8_t` and `char16_t`
331 * could prevent us from the potential risk.
333 template uint8_t* nsTextFrameUtils::TransformText(
334 const uint8_t* aText
, uint32_t aLength
, uint8_t* aOutput
,
335 CompressionMode aCompression
, uint8_t* aIncomingFlags
,
336 gfxSkipChars
* aSkipChars
, Flags
* aAnalysisFlags
);
337 template char16_t
* nsTextFrameUtils::TransformText(
338 const char16_t
* aText
, uint32_t aLength
, char16_t
* aOutput
,
339 CompressionMode aCompression
, uint8_t* aIncomingFlags
,
340 gfxSkipChars
* aSkipChars
, Flags
* aAnalysisFlags
);
341 template bool nsTextFrameUtils::IsSkippableCharacterForTransformText(
343 template bool nsTextFrameUtils::IsSkippableCharacterForTransformText(
346 template <typename CharT
>
347 static uint32_t DoComputeApproximateLengthWithWhitespaceCompression(
348 const CharT
* aChars
, uint32_t aLength
, const nsStyleText
* aStyleText
) {
349 // This is an approximation so we don't really need anything
352 if (aStyleText
->WhiteSpaceIsSignificant()) {
355 bool prevWS
= true; // more important to ignore blocks with
356 // only whitespace than get inline boundaries
359 for (uint32_t i
= 0; i
< aLength
; ++i
) {
361 if (c
== ' ' || c
== '\n' || c
== '\t' || c
== '\r') {
374 uint32_t nsTextFrameUtils::ComputeApproximateLengthWithWhitespaceCompression(
375 Text
* aText
, const nsStyleText
* aStyleText
) {
376 const nsTextFragment
* frag
= &aText
->TextFragment();
378 return DoComputeApproximateLengthWithWhitespaceCompression(
379 frag
->Get2b(), frag
->GetLength(), aStyleText
);
381 return DoComputeApproximateLengthWithWhitespaceCompression(
382 frag
->Get1b(), frag
->GetLength(), aStyleText
);
385 uint32_t nsTextFrameUtils::ComputeApproximateLengthWithWhitespaceCompression(
386 const nsAString
& aString
, const nsStyleText
* aStyleText
) {
387 return DoComputeApproximateLengthWithWhitespaceCompression(
388 aString
.BeginReading(), aString
.Length(), aStyleText
);
391 bool nsSkipCharsRunIterator::NextRun() {
394 mIterator
.AdvanceOriginal(mRunLength
);
395 NS_ASSERTION(mRunLength
> 0,
396 "No characters in run (initial length too large?)");
397 if (!mSkipped
|| mLengthIncludesSkipped
) {
398 mRemainingLength
-= mRunLength
;
401 if (!mRemainingLength
) {
405 mSkipped
= mIterator
.IsOriginalCharSkipped(&length
);
406 mRunLength
= std::min(length
, mRemainingLength
);
407 } while (!mVisitSkipped
&& mSkipped
);