Bumping manifests a=b2g-bump
[gecko.git] / dom / base / nsLineBreaker.cpp
blobc074b3624a76da3d38ea82d5a0f1e75c56ecef2b
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "nsLineBreaker.h"
7 #include "nsContentUtils.h"
8 #include "nsILineBreaker.h"
9 #include "gfxTextRun.h" // for the gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_* values
10 #include "nsHyphenationManager.h"
11 #include "nsHyphenator.h"
12 #include "mozilla/gfx/2D.h"
14 nsLineBreaker::nsLineBreaker()
15 : mCurrentWordLanguage(nullptr),
16 mCurrentWordContainsMixedLang(false),
17 mCurrentWordContainsComplexChar(false),
18 mAfterBreakableSpace(false), mBreakHere(false),
19 mWordBreak(nsILineBreaker::kWordBreak_Normal)
23 nsLineBreaker::~nsLineBreaker()
25 NS_ASSERTION(mCurrentWord.Length() == 0, "Should have Reset() before destruction!");
28 static void
29 SetupCapitalization(const char16_t* aWord, uint32_t aLength,
30 bool* aCapitalization)
32 // Capitalize the first alphanumeric character after a space or start
33 // of the word.
34 // The only space character a word can contain is NBSP.
35 bool capitalizeNextChar = true;
36 for (uint32_t i = 0; i < aLength; ++i) {
37 uint32_t ch = aWord[i];
38 if (capitalizeNextChar) {
39 if (NS_IS_HIGH_SURROGATE(ch) && i + 1 < aLength &&
40 NS_IS_LOW_SURROGATE(aWord[i + 1])) {
41 ch = SURROGATE_TO_UCS4(ch, aWord[i + 1]);
43 if (nsContentUtils::IsAlphanumeric(ch)) {
44 aCapitalization[i] = true;
45 capitalizeNextChar = false;
47 if (!IS_IN_BMP(ch)) {
48 ++i;
51 if (ch == 0xA0 /*NBSP*/) {
52 capitalizeNextChar = true;
57 nsresult
58 nsLineBreaker::FlushCurrentWord()
60 uint32_t length = mCurrentWord.Length();
61 nsAutoTArray<uint8_t,4000> breakState;
62 if (!breakState.AppendElements(length))
63 return NS_ERROR_OUT_OF_MEMORY;
65 nsTArray<bool> capitalizationState;
67 if (!mCurrentWordContainsComplexChar) {
68 // For break-strict set everything internal to "break", otherwise
69 // to "no break"!
70 memset(breakState.Elements(),
71 mWordBreak == nsILineBreaker::kWordBreak_BreakAll ?
72 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL :
73 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
74 length*sizeof(uint8_t));
75 } else {
76 nsContentUtils::LineBreaker()->
77 GetJISx4051Breaks(mCurrentWord.Elements(), length, mWordBreak,
78 breakState.Elements());
81 bool autoHyphenate = mCurrentWordLanguage &&
82 !mCurrentWordContainsMixedLang;
83 uint32_t i;
84 for (i = 0; autoHyphenate && i < mTextItems.Length(); ++i) {
85 TextItem* ti = &mTextItems[i];
86 if (!(ti->mFlags & BREAK_USE_AUTO_HYPHENATION)) {
87 autoHyphenate = false;
90 if (autoHyphenate) {
91 nsRefPtr<nsHyphenator> hyphenator =
92 nsHyphenationManager::Instance()->GetHyphenator(mCurrentWordLanguage);
93 if (hyphenator) {
94 FindHyphenationPoints(hyphenator,
95 mCurrentWord.Elements(),
96 mCurrentWord.Elements() + length,
97 breakState.Elements());
101 uint32_t offset = 0;
102 for (i = 0; i < mTextItems.Length(); ++i) {
103 TextItem* ti = &mTextItems[i];
104 NS_ASSERTION(ti->mLength > 0, "Zero length word contribution?");
106 if ((ti->mFlags & BREAK_SUPPRESS_INITIAL) && ti->mSinkOffset == 0) {
107 breakState[offset] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
109 if (ti->mFlags & BREAK_SUPPRESS_INSIDE) {
110 uint32_t exclude = ti->mSinkOffset == 0 ? 1 : 0;
111 memset(breakState.Elements() + offset + exclude,
112 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
113 (ti->mLength - exclude)*sizeof(uint8_t));
116 // Don't set the break state for the first character of the word, because
117 // it was already set correctly earlier and we don't know what the true
118 // value should be.
119 uint32_t skipSet = i == 0 ? 1 : 0;
120 if (ti->mSink) {
121 ti->mSink->SetBreaks(ti->mSinkOffset + skipSet, ti->mLength - skipSet,
122 breakState.Elements() + offset + skipSet);
124 if (ti->mFlags & BREAK_NEED_CAPITALIZATION) {
125 if (capitalizationState.Length() == 0) {
126 if (!capitalizationState.AppendElements(length))
127 return NS_ERROR_OUT_OF_MEMORY;
128 memset(capitalizationState.Elements(), false, length*sizeof(bool));
129 SetupCapitalization(mCurrentWord.Elements(), length,
130 capitalizationState.Elements());
132 ti->mSink->SetCapitalization(ti->mSinkOffset, ti->mLength,
133 capitalizationState.Elements() + offset);
137 offset += ti->mLength;
140 mCurrentWord.Clear();
141 mTextItems.Clear();
142 mCurrentWordContainsComplexChar = false;
143 mCurrentWordContainsMixedLang = false;
144 mCurrentWordLanguage = nullptr;
145 return NS_OK;
148 nsresult
149 nsLineBreaker::AppendText(nsIAtom* aHyphenationLanguage, const char16_t* aText, uint32_t aLength,
150 uint32_t aFlags, nsILineBreakSink* aSink)
152 NS_ASSERTION(aLength > 0, "Appending empty text...");
154 uint32_t offset = 0;
156 // Continue the current word
157 if (mCurrentWord.Length() > 0) {
158 NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere, "These should not be set");
160 while (offset < aLength && !IsSpace(aText[offset])) {
161 mCurrentWord.AppendElement(aText[offset]);
162 if (!mCurrentWordContainsComplexChar && IsComplexChar(aText[offset])) {
163 mCurrentWordContainsComplexChar = true;
165 UpdateCurrentWordLanguage(aHyphenationLanguage);
166 ++offset;
169 if (offset > 0) {
170 mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
173 if (offset == aLength)
174 return NS_OK;
176 // We encountered whitespace, so we're done with this word
177 nsresult rv = FlushCurrentWord();
178 if (NS_FAILED(rv))
179 return rv;
182 nsAutoTArray<uint8_t,4000> breakState;
183 if (aSink) {
184 if (!breakState.AppendElements(aLength))
185 return NS_ERROR_OUT_OF_MEMORY;
188 nsTArray<bool> capitalizationState;
189 if (aSink && (aFlags & BREAK_NEED_CAPITALIZATION)) {
190 if (!capitalizationState.AppendElements(aLength))
191 return NS_ERROR_OUT_OF_MEMORY;
192 memset(capitalizationState.Elements(), false, aLength*sizeof(bool));
195 uint32_t start = offset;
196 bool noBreaksNeeded = !aSink ||
197 (aFlags == (BREAK_SUPPRESS_INITIAL | BREAK_SUPPRESS_INSIDE | BREAK_SKIP_SETTING_NO_BREAKS) &&
198 !mBreakHere && !mAfterBreakableSpace);
199 if (noBreaksNeeded) {
200 // Skip to the space before the last word, since either the break data
201 // here is not needed, or no breaks are set in the sink and there cannot
202 // be any breaks in this chunk; all we need is the context for the next
203 // chunk (if any)
204 offset = aLength;
205 while (offset > start) {
206 --offset;
207 if (IsSpace(aText[offset]))
208 break;
211 uint32_t wordStart = offset;
212 bool wordHasComplexChar = false;
214 nsRefPtr<nsHyphenator> hyphenator;
215 if ((aFlags & BREAK_USE_AUTO_HYPHENATION) &&
216 !(aFlags & BREAK_SUPPRESS_INSIDE) &&
217 aHyphenationLanguage) {
218 hyphenator = nsHyphenationManager::Instance()->GetHyphenator(aHyphenationLanguage);
221 for (;;) {
222 char16_t ch = aText[offset];
223 bool isSpace = IsSpace(ch);
224 bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);
226 if (aSink) {
227 breakState[offset] =
228 mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) ||
229 (mWordBreak == nsILineBreaker::kWordBreak_BreakAll) ?
230 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL :
231 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
233 mBreakHere = false;
234 mAfterBreakableSpace = isBreakableSpace;
236 if (isSpace) {
237 if (offset > wordStart && aSink) {
238 if (!(aFlags & BREAK_SUPPRESS_INSIDE)) {
239 if (wordHasComplexChar) {
240 // Save current start-of-word state because GetJISx4051Breaks will
241 // set it to false
242 uint8_t currentStart = breakState[wordStart];
243 nsContentUtils::LineBreaker()->
244 GetJISx4051Breaks(aText + wordStart, offset - wordStart,
245 mWordBreak,
246 breakState.Elements() + wordStart);
247 breakState[wordStart] = currentStart;
249 if (hyphenator) {
250 FindHyphenationPoints(hyphenator,
251 aText + wordStart, aText + offset,
252 breakState.Elements() + wordStart);
255 if (aFlags & BREAK_NEED_CAPITALIZATION) {
256 SetupCapitalization(aText + wordStart, offset - wordStart,
257 capitalizationState.Elements() + wordStart);
260 wordHasComplexChar = false;
261 ++offset;
262 if (offset >= aLength)
263 break;
264 wordStart = offset;
265 } else {
266 if (!wordHasComplexChar && IsComplexChar(ch)) {
267 wordHasComplexChar = true;
269 ++offset;
270 if (offset >= aLength) {
271 // Save this word
272 mCurrentWordContainsComplexChar = wordHasComplexChar;
273 uint32_t len = offset - wordStart;
274 char16_t* elems = mCurrentWord.AppendElements(len);
275 if (!elems)
276 return NS_ERROR_OUT_OF_MEMORY;
277 memcpy(elems, aText + wordStart, sizeof(char16_t)*len);
278 mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
279 // Ensure that the break-before for this word is written out
280 offset = wordStart + 1;
281 UpdateCurrentWordLanguage(aHyphenationLanguage);
282 break;
287 if (!noBreaksNeeded) {
288 // aSink must not be null
289 aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
290 if (aFlags & BREAK_NEED_CAPITALIZATION) {
291 aSink->SetCapitalization(start, offset - start,
292 capitalizationState.Elements() + start);
295 return NS_OK;
298 void
299 nsLineBreaker::FindHyphenationPoints(nsHyphenator *aHyphenator,
300 const char16_t *aTextStart,
301 const char16_t *aTextLimit,
302 uint8_t *aBreakState)
304 nsDependentSubstring string(aTextStart, aTextLimit);
305 AutoFallibleTArray<bool,200> hyphens;
306 if (NS_SUCCEEDED(aHyphenator->Hyphenate(string, hyphens))) {
307 for (uint32_t i = 0; i + 1 < string.Length(); ++i) {
308 if (hyphens[i]) {
309 aBreakState[i + 1] =
310 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN;
316 nsresult
317 nsLineBreaker::AppendText(nsIAtom* aHyphenationLanguage, const uint8_t* aText, uint32_t aLength,
318 uint32_t aFlags, nsILineBreakSink* aSink)
320 NS_ASSERTION(aLength > 0, "Appending empty text...");
322 if (aFlags & (BREAK_NEED_CAPITALIZATION | BREAK_USE_AUTO_HYPHENATION)) {
323 // Defer to the Unicode path if capitalization or hyphenation is required
324 nsAutoString str;
325 const char* cp = reinterpret_cast<const char*>(aText);
326 CopyASCIItoUTF16(nsDependentCSubstring(cp, cp + aLength), str);
327 return AppendText(aHyphenationLanguage, str.get(), aLength, aFlags, aSink);
330 uint32_t offset = 0;
332 // Continue the current word
333 if (mCurrentWord.Length() > 0) {
334 NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere, "These should not be set");
336 while (offset < aLength && !IsSpace(aText[offset])) {
337 mCurrentWord.AppendElement(aText[offset]);
338 if (!mCurrentWordContainsComplexChar &&
339 IsComplexASCIIChar(aText[offset])) {
340 mCurrentWordContainsComplexChar = true;
342 ++offset;
345 if (offset > 0) {
346 mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
349 if (offset == aLength) {
350 // We did not encounter whitespace so the word hasn't finished yet.
351 return NS_OK;
354 // We encountered whitespace, so we're done with this word
355 nsresult rv = FlushCurrentWord();
356 if (NS_FAILED(rv))
357 return rv;
360 nsAutoTArray<uint8_t,4000> breakState;
361 if (aSink) {
362 if (!breakState.AppendElements(aLength))
363 return NS_ERROR_OUT_OF_MEMORY;
366 uint32_t start = offset;
367 bool noBreaksNeeded = !aSink ||
368 (aFlags == (BREAK_SUPPRESS_INITIAL | BREAK_SUPPRESS_INSIDE | BREAK_SKIP_SETTING_NO_BREAKS) &&
369 !mBreakHere && !mAfterBreakableSpace);
370 if (noBreaksNeeded) {
371 // Skip to the space before the last word, since either the break data
372 // here is not needed, or no breaks are set in the sink and there cannot
373 // be any breaks in this chunk; all we need is the context for the next
374 // chunk (if any)
375 offset = aLength;
376 while (offset > start) {
377 --offset;
378 if (IsSpace(aText[offset]))
379 break;
382 uint32_t wordStart = offset;
383 bool wordHasComplexChar = false;
385 for (;;) {
386 uint8_t ch = aText[offset];
387 bool isSpace = IsSpace(ch);
388 bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);
390 if (aSink) {
391 // Consider word-break style. Since the break position of CJK scripts
392 // will be set by nsILineBreaker, we don't consider CJK at this point.
393 breakState[offset] =
394 mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) ||
395 (mWordBreak == nsILineBreaker::kWordBreak_BreakAll) ?
396 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL :
397 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
399 mBreakHere = false;
400 mAfterBreakableSpace = isBreakableSpace;
402 if (isSpace) {
403 if (offset > wordStart && wordHasComplexChar) {
404 if (aSink && !(aFlags & BREAK_SUPPRESS_INSIDE)) {
405 // Save current start-of-word state because GetJISx4051Breaks will
406 // set it to false
407 uint8_t currentStart = breakState[wordStart];
408 nsContentUtils::LineBreaker()->
409 GetJISx4051Breaks(aText + wordStart, offset - wordStart,
410 mWordBreak,
411 breakState.Elements() + wordStart);
412 breakState[wordStart] = currentStart;
414 wordHasComplexChar = false;
417 ++offset;
418 if (offset >= aLength)
419 break;
420 wordStart = offset;
421 } else {
422 if (!wordHasComplexChar && IsComplexASCIIChar(ch)) {
423 wordHasComplexChar = true;
425 ++offset;
426 if (offset >= aLength) {
427 // Save this word
428 mCurrentWordContainsComplexChar = wordHasComplexChar;
429 uint32_t len = offset - wordStart;
430 char16_t* elems = mCurrentWord.AppendElements(len);
431 if (!elems)
432 return NS_ERROR_OUT_OF_MEMORY;
433 uint32_t i;
434 for (i = wordStart; i < offset; ++i) {
435 elems[i - wordStart] = aText[i];
437 mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
438 // Ensure that the break-before for this word is written out
439 offset = wordStart + 1;
440 break;
445 if (!noBreaksNeeded) {
446 aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
448 return NS_OK;
451 void
452 nsLineBreaker::UpdateCurrentWordLanguage(nsIAtom *aHyphenationLanguage)
454 if (mCurrentWordLanguage && mCurrentWordLanguage != aHyphenationLanguage) {
455 mCurrentWordContainsMixedLang = true;
456 } else {
457 mCurrentWordLanguage = aHyphenationLanguage;
461 nsresult
462 nsLineBreaker::AppendInvisibleWhitespace(uint32_t aFlags)
464 nsresult rv = FlushCurrentWord();
465 if (NS_FAILED(rv))
466 return rv;
468 bool isBreakableSpace = !(aFlags & BREAK_SUPPRESS_INSIDE);
469 if (mAfterBreakableSpace && !isBreakableSpace) {
470 mBreakHere = true;
472 mAfterBreakableSpace = isBreakableSpace;
473 return NS_OK;
476 nsresult
477 nsLineBreaker::Reset(bool* aTrailingBreak)
479 nsresult rv = FlushCurrentWord();
480 if (NS_FAILED(rv))
481 return rv;
483 *aTrailingBreak = mBreakHere || mAfterBreakableSpace;
484 mBreakHere = false;
485 mAfterBreakableSpace = false;
486 return NS_OK;