Bumping manifests a=b2g-bump
[gecko.git] / layout / generic / nsTextFrameUtils.cpp
blobd1eedd48abb6f0e0652790fe3e329c083cc77919
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "nsTextFrameUtils.h"
8 #include "nsUnicharUtils.h"
9 #include "nsBidiUtils.h"
10 #include "nsIContent.h"
11 #include "nsStyleStruct.h"
12 #include "nsTextFragment.h"
13 #include <algorithm>
15 static bool IsDiscardable(char16_t ch, uint32_t* aFlags)
17 // Unlike IS_DISCARDABLE, we don't discard \r. \r will be ignored by gfxTextRun
18 // and discarding it would force us to copy text in many cases of preformatted
19 // text containing \r\n.
20 if (ch == CH_SHY) {
21 *aFlags |= nsTextFrameUtils::TEXT_HAS_SHY;
22 return true;
24 return IsBidiControl(ch);
27 static bool IsDiscardable(uint8_t ch, uint32_t* aFlags)
29 if (ch == CH_SHY) {
30 *aFlags |= nsTextFrameUtils::TEXT_HAS_SHY;
31 return true;
33 return false;
36 char16_t*
37 nsTextFrameUtils::TransformText(const char16_t* aText, uint32_t aLength,
38 char16_t* aOutput,
39 CompressionMode aCompression,
40 uint8_t* aIncomingFlags,
41 gfxSkipChars* aSkipChars,
42 uint32_t* aAnalysisFlags)
44 uint32_t flags = 0;
45 char16_t* outputStart = aOutput;
47 bool lastCharArabic = false;
49 if (aCompression == COMPRESS_NONE ||
50 aCompression == COMPRESS_NONE_TRANSFORM_TO_SPACE) {
51 // Skip discardables.
52 uint32_t i;
53 for (i = 0; i < aLength; ++i) {
54 char16_t ch = *aText++;
55 if (IsDiscardable(ch, &flags)) {
56 aSkipChars->SkipChar();
57 } else {
58 aSkipChars->KeepChar();
59 if (ch > ' ') {
60 lastCharArabic = IS_ARABIC_CHAR(ch);
61 } else if (aCompression == COMPRESS_NONE_TRANSFORM_TO_SPACE) {
62 if (ch == '\t' || ch == '\n') {
63 ch = ' ';
64 flags |= TEXT_WAS_TRANSFORMED;
66 } else {
67 // aCompression == COMPRESS_NONE
68 if (ch == '\t') {
69 flags |= TEXT_HAS_TAB;
72 *aOutput++ = ch;
75 if (lastCharArabic) {
76 *aIncomingFlags |= INCOMING_ARABICCHAR;
77 } else {
78 *aIncomingFlags &= ~INCOMING_ARABICCHAR;
80 *aIncomingFlags &= ~INCOMING_WHITESPACE;
81 } else {
82 bool inWhitespace = (*aIncomingFlags & INCOMING_WHITESPACE) != 0;
83 uint32_t i;
84 for (i = 0; i < aLength; ++i) {
85 char16_t ch = *aText++;
86 bool nowInWhitespace;
87 if (ch == ' ' &&
88 (i + 1 >= aLength ||
89 !IsSpaceCombiningSequenceTail(aText, aLength - (i + 1)))) {
90 nowInWhitespace = true;
91 } else if (ch == '\n' && aCompression == COMPRESS_WHITESPACE_NEWLINE) {
92 if (i > 0 && IS_CJ_CHAR(aText[-1]) &&
93 i + 1 < aLength && IS_CJ_CHAR(aText[1])) {
94 // Discard newlines between CJK chars.
95 // XXX this really requires more context to get right!
96 aSkipChars->SkipChar();
97 continue;
99 nowInWhitespace = true;
100 } else {
101 nowInWhitespace = ch == '\t';
104 if (!nowInWhitespace) {
105 if (IsDiscardable(ch, &flags)) {
106 aSkipChars->SkipChar();
107 nowInWhitespace = inWhitespace;
108 } else {
109 *aOutput++ = ch;
110 aSkipChars->KeepChar();
111 lastCharArabic = IS_ARABIC_CHAR(ch);
113 } else {
114 if (inWhitespace) {
115 aSkipChars->SkipChar();
116 } else {
117 if (ch != ' ') {
118 flags |= TEXT_WAS_TRANSFORMED;
120 *aOutput++ = ' ';
121 aSkipChars->KeepChar();
124 inWhitespace = nowInWhitespace;
126 if (lastCharArabic) {
127 *aIncomingFlags |= INCOMING_ARABICCHAR;
128 } else {
129 *aIncomingFlags &= ~INCOMING_ARABICCHAR;
131 if (inWhitespace) {
132 *aIncomingFlags |= INCOMING_WHITESPACE;
133 } else {
134 *aIncomingFlags &= ~INCOMING_WHITESPACE;
138 if (outputStart + aLength != aOutput) {
139 flags |= TEXT_WAS_TRANSFORMED;
141 *aAnalysisFlags = flags;
142 return aOutput;
145 uint8_t*
146 nsTextFrameUtils::TransformText(const uint8_t* aText, uint32_t aLength,
147 uint8_t* aOutput,
148 CompressionMode aCompression,
149 uint8_t* aIncomingFlags,
150 gfxSkipChars* aSkipChars,
151 uint32_t* aAnalysisFlags)
153 uint32_t flags = 0;
154 uint8_t* outputStart = aOutput;
156 if (aCompression == COMPRESS_NONE ||
157 aCompression == COMPRESS_NONE_TRANSFORM_TO_SPACE) {
158 // Skip discardables.
159 uint32_t i;
160 for (i = 0; i < aLength; ++i) {
161 uint8_t ch = *aText++;
162 if (IsDiscardable(ch, &flags)) {
163 aSkipChars->SkipChar();
164 } else {
165 aSkipChars->KeepChar();
166 if (aCompression == COMPRESS_NONE_TRANSFORM_TO_SPACE) {
167 if (ch == '\t' || ch == '\n') {
168 ch = ' ';
169 flags |= TEXT_WAS_TRANSFORMED;
171 } else {
172 // aCompression == COMPRESS_NONE
173 if (ch == '\t') {
174 flags |= TEXT_HAS_TAB;
177 *aOutput++ = ch;
180 *aIncomingFlags &= ~(INCOMING_ARABICCHAR | INCOMING_WHITESPACE);
181 } else {
182 bool inWhitespace = (*aIncomingFlags & INCOMING_WHITESPACE) != 0;
183 uint32_t i;
184 for (i = 0; i < aLength; ++i) {
185 uint8_t ch = *aText++;
186 bool nowInWhitespace = ch == ' ' || ch == '\t' ||
187 (ch == '\n' && aCompression == COMPRESS_WHITESPACE_NEWLINE);
188 if (!nowInWhitespace) {
189 if (IsDiscardable(ch, &flags)) {
190 aSkipChars->SkipChar();
191 nowInWhitespace = inWhitespace;
192 } else {
193 *aOutput++ = ch;
194 aSkipChars->KeepChar();
196 } else {
197 if (inWhitespace) {
198 aSkipChars->SkipChar();
199 } else {
200 if (ch != ' ') {
201 flags |= TEXT_WAS_TRANSFORMED;
203 *aOutput++ = ' ';
204 aSkipChars->KeepChar();
207 inWhitespace = nowInWhitespace;
209 *aIncomingFlags &= ~INCOMING_ARABICCHAR;
210 if (inWhitespace) {
211 *aIncomingFlags |= INCOMING_WHITESPACE;
212 } else {
213 *aIncomingFlags &= ~INCOMING_WHITESPACE;
217 if (outputStart + aLength != aOutput) {
218 flags |= TEXT_WAS_TRANSFORMED;
220 *aAnalysisFlags = flags;
221 return aOutput;
224 uint32_t
225 nsTextFrameUtils::ComputeApproximateLengthWithWhitespaceCompression(
226 nsIContent *aContent, const nsStyleText *aStyleText)
228 const nsTextFragment *frag = aContent->GetText();
229 // This is an approximation so we don't really need anything
230 // too fancy here.
231 uint32_t len;
232 if (aStyleText->WhiteSpaceIsSignificant()) {
233 len = frag->GetLength();
234 } else {
235 bool is2b = frag->Is2b();
236 union {
237 const char *s1b;
238 const char16_t *s2b;
239 } u;
240 if (is2b) {
241 u.s2b = frag->Get2b();
242 } else {
243 u.s1b = frag->Get1b();
245 bool prevWS = true; // more important to ignore blocks with
246 // only whitespace than get inline boundaries
247 // exactly right
248 len = 0;
249 for (uint32_t i = 0, i_end = frag->GetLength(); i < i_end; ++i) {
250 char16_t c = is2b ? u.s2b[i] : u.s1b[i];
251 if (c == ' ' || c == '\n' || c == '\t' || c == '\r') {
252 if (!prevWS) {
253 ++len;
255 prevWS = true;
256 } else {
257 ++len;
258 prevWS = false;
262 return len;
265 bool nsSkipCharsRunIterator::NextRun() {
266 do {
267 if (mRunLength) {
268 mIterator.AdvanceOriginal(mRunLength);
269 NS_ASSERTION(mRunLength > 0, "No characters in run (initial length too large?)");
270 if (!mSkipped || mLengthIncludesSkipped) {
271 mRemainingLength -= mRunLength;
274 if (!mRemainingLength)
275 return false;
276 int32_t length;
277 mSkipped = mIterator.IsOriginalCharSkipped(&length);
278 mRunLength = std::min(length, mRemainingLength);
279 } while (!mVisitSkipped && mSkipped);
281 return true;