Bug 1728955: part 3) Add logging to `nsBaseClipboard`. r=masayuki
[gecko.git] / dom / base / nsLineBreaker.cpp
blobc582a5b2037b09738036852d73e41ae8b1fa04e9
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "nsLineBreaker.h"
8 #include "nsContentUtils.h"
9 #include "gfxTextRun.h" // for the gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_* values
10 #include "nsHyphenationManager.h"
11 #include "nsHyphenator.h"
12 #include "mozilla/gfx/2D.h"
13 #include "mozilla/intl/LineBreaker.h"
14 #include "mozilla/intl/MozLocale.h"
16 using mozilla::intl::LineBreaker;
17 using mozilla::intl::Locale;
19 nsLineBreaker::nsLineBreaker()
20 : mCurrentWordLanguage(nullptr),
21 mCurrentWordContainsMixedLang(false),
22 mCurrentWordContainsComplexChar(false),
23 mScriptIsChineseOrJapanese(false),
24 mAfterBreakableSpace(false),
25 mBreakHere(false),
26 mWordBreak(LineBreaker::WordBreak::Normal),
27 mStrictness(LineBreaker::Strictness::Auto),
28 mWordContinuation(false) {}
30 nsLineBreaker::~nsLineBreaker() {
31 NS_ASSERTION(mCurrentWord.Length() == 0,
32 "Should have Reset() before destruction!");
35 static void SetupCapitalization(const char16_t* aWord, uint32_t aLength,
36 bool* aCapitalization) {
37 // Capitalize the first alphanumeric character after a space or start
38 // of the word.
39 // The only space character a word can contain is NBSP.
40 bool capitalizeNextChar = true;
41 for (uint32_t i = 0; i < aLength; ++i) {
42 uint32_t ch = aWord[i];
43 if (capitalizeNextChar) {
44 if (i + 1 < aLength && NS_IS_SURROGATE_PAIR(ch, aWord[i + 1])) {
45 ch = SURROGATE_TO_UCS4(ch, aWord[i + 1]);
47 if (nsContentUtils::IsAlphanumeric(ch)) {
48 aCapitalization[i] = true;
49 capitalizeNextChar = false;
51 if (!IS_IN_BMP(ch)) {
52 ++i;
55 if (ch == 0xA0 /*NBSP*/) {
56 capitalizeNextChar = true;
61 nsresult nsLineBreaker::FlushCurrentWord() {
62 uint32_t length = mCurrentWord.Length();
63 AutoTArray<uint8_t, 4000> breakState;
64 // XXX(Bug 1631371) Check if this should use a fallible operation as it
65 // pretended earlier.
66 breakState.AppendElements(length);
68 nsTArray<bool> capitalizationState;
70 if (mStrictness == LineBreaker::Strictness::Anywhere) {
71 memset(breakState.Elements(),
72 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL,
73 length * sizeof(uint8_t));
74 } else if (!mCurrentWordContainsComplexChar) {
75 // For break-strict set everything internal to "break", otherwise
76 // to "no break"!
77 memset(breakState.Elements(),
78 mWordBreak == LineBreaker::WordBreak::BreakAll
79 ? gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
80 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
81 length * sizeof(uint8_t));
82 } else {
83 nsContentUtils::LineBreaker()->GetJISx4051Breaks(
84 mCurrentWord.Elements(), length, mWordBreak, mStrictness,
85 mScriptIsChineseOrJapanese, breakState.Elements());
88 bool autoHyphenate = mCurrentWordLanguage && !mCurrentWordContainsMixedLang;
89 uint32_t i;
90 for (i = 0; autoHyphenate && i < mTextItems.Length(); ++i) {
91 TextItem* ti = &mTextItems[i];
92 if (!(ti->mFlags & BREAK_USE_AUTO_HYPHENATION)) {
93 autoHyphenate = false;
96 if (autoHyphenate) {
97 RefPtr<nsHyphenator> hyphenator =
98 nsHyphenationManager::Instance()->GetHyphenator(mCurrentWordLanguage);
99 if (hyphenator) {
100 FindHyphenationPoints(hyphenator, mCurrentWord.Elements(),
101 mCurrentWord.Elements() + length,
102 breakState.Elements());
106 uint32_t offset = 0;
107 for (i = 0; i < mTextItems.Length(); ++i) {
108 TextItem* ti = &mTextItems[i];
109 NS_ASSERTION(ti->mLength > 0, "Zero length word contribution?");
111 if ((ti->mFlags & BREAK_SUPPRESS_INITIAL) && ti->mSinkOffset == 0) {
112 breakState[offset] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
114 if (ti->mFlags & BREAK_SUPPRESS_INSIDE) {
115 uint32_t exclude = ti->mSinkOffset == 0 ? 1 : 0;
116 memset(breakState.Elements() + offset + exclude,
117 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
118 (ti->mLength - exclude) * sizeof(uint8_t));
121 // Don't set the break state for the first character of the word, because
122 // it was already set correctly earlier and we don't know what the true
123 // value should be.
124 uint32_t skipSet = i == 0 ? 1 : 0;
125 if (ti->mSink) {
126 ti->mSink->SetBreaks(ti->mSinkOffset + skipSet, ti->mLength - skipSet,
127 breakState.Elements() + offset + skipSet);
129 if (!mWordContinuation && (ti->mFlags & BREAK_NEED_CAPITALIZATION)) {
130 if (capitalizationState.Length() == 0) {
131 // XXX(Bug 1631371) Check if this should use a fallible operation as
132 // it pretended earlier.
133 capitalizationState.AppendElements(length);
134 memset(capitalizationState.Elements(), false, length * sizeof(bool));
135 SetupCapitalization(mCurrentWord.Elements(), length,
136 capitalizationState.Elements());
138 ti->mSink->SetCapitalization(ti->mSinkOffset, ti->mLength,
139 capitalizationState.Elements() + offset);
143 offset += ti->mLength;
146 mCurrentWord.Clear();
147 mTextItems.Clear();
148 mCurrentWordContainsComplexChar = false;
149 mCurrentWordContainsMixedLang = false;
150 mCurrentWordLanguage = nullptr;
151 mWordContinuation = false;
152 return NS_OK;
155 // If the aFlags parameter to AppendText has all these bits set,
156 // then we don't need to worry about finding break opportunities
157 // in the appended text.
158 #define NO_BREAKS_NEEDED_FLAGS \
159 (BREAK_SUPPRESS_INITIAL | BREAK_SUPPRESS_INSIDE | \
160 BREAK_SKIP_SETTING_NO_BREAKS)
162 nsresult nsLineBreaker::AppendText(nsAtom* aHyphenationLanguage,
163 const char16_t* aText, uint32_t aLength,
164 uint32_t aFlags, nsILineBreakSink* aSink) {
165 NS_ASSERTION(aLength > 0, "Appending empty text...");
167 uint32_t offset = 0;
169 // Continue the current word
170 if (mCurrentWord.Length() > 0) {
171 NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere,
172 "These should not be set");
174 while (offset < aLength && !IsSpace(aText[offset])) {
175 mCurrentWord.AppendElement(aText[offset]);
176 if (!mCurrentWordContainsComplexChar && IsComplexChar(aText[offset])) {
177 mCurrentWordContainsComplexChar = true;
179 UpdateCurrentWordLanguage(aHyphenationLanguage);
180 ++offset;
183 if (offset > 0) {
184 mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
187 if (offset == aLength) return NS_OK;
189 // We encountered whitespace, so we're done with this word
190 nsresult rv = FlushCurrentWord();
191 if (NS_FAILED(rv)) return rv;
194 AutoTArray<uint8_t, 4000> breakState;
195 if (aSink) {
196 // XXX(Bug 1631371) Check if this should use a fallible operation as it
197 // pretended earlier.
198 breakState.AppendElements(aLength);
201 bool noCapitalizationNeeded = true;
202 nsTArray<bool> capitalizationState;
203 if (aSink && (aFlags & BREAK_NEED_CAPITALIZATION)) {
204 // XXX(Bug 1631371) Check if this should use a fallible operation as it
205 // pretended earlier.
206 capitalizationState.AppendElements(aLength);
207 memset(capitalizationState.Elements(), false, aLength * sizeof(bool));
208 noCapitalizationNeeded = false;
211 uint32_t start = offset;
212 bool noBreaksNeeded =
213 !aSink || ((aFlags & NO_BREAKS_NEEDED_FLAGS) == NO_BREAKS_NEEDED_FLAGS &&
214 !mBreakHere && !mAfterBreakableSpace);
215 if (noBreaksNeeded && noCapitalizationNeeded) {
216 // Skip to the space before the last word, since either the break data
217 // here is not needed, or no breaks are set in the sink and there cannot
218 // be any breaks in this chunk; and we don't need to do word-initial
219 // capitalization. All we need is the context for the next chunk (if any).
220 offset = aLength;
221 while (offset > start) {
222 --offset;
223 if (IsSpace(aText[offset])) break;
226 uint32_t wordStart = offset;
227 bool wordHasComplexChar = false;
229 RefPtr<nsHyphenator> hyphenator;
230 if ((aFlags & BREAK_USE_AUTO_HYPHENATION) &&
231 !(aFlags & BREAK_SUPPRESS_INSIDE) && aHyphenationLanguage) {
232 hyphenator =
233 nsHyphenationManager::Instance()->GetHyphenator(aHyphenationLanguage);
236 for (;;) {
237 char16_t ch = aText[offset];
238 bool isSpace = IsSpace(ch);
239 bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);
241 if (aSink && !noBreaksNeeded) {
242 breakState[offset] =
243 mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) ||
244 mWordBreak == LineBreaker::WordBreak::BreakAll ||
245 mStrictness == LineBreaker::Strictness::Anywhere
246 ? gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
247 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
249 mBreakHere = false;
250 mAfterBreakableSpace = isBreakableSpace;
252 if (isSpace || ch == '\n') {
253 if (offset > wordStart && aSink) {
254 if (!(aFlags & BREAK_SUPPRESS_INSIDE)) {
255 if (mStrictness == LineBreaker::Strictness::Anywhere) {
256 memset(breakState.Elements() + wordStart,
257 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL,
258 offset - wordStart);
259 } else if (wordHasComplexChar) {
260 // Save current start-of-word state because GetJISx4051Breaks will
261 // set it to false
262 uint8_t currentStart = breakState[wordStart];
263 nsContentUtils::LineBreaker()->GetJISx4051Breaks(
264 aText + wordStart, offset - wordStart, mWordBreak, mStrictness,
265 mScriptIsChineseOrJapanese, breakState.Elements() + wordStart);
266 breakState[wordStart] = currentStart;
268 if (hyphenator) {
269 FindHyphenationPoints(hyphenator, aText + wordStart, aText + offset,
270 breakState.Elements() + wordStart);
273 if (!mWordContinuation && !noCapitalizationNeeded) {
274 SetupCapitalization(aText + wordStart, offset - wordStart,
275 capitalizationState.Elements() + wordStart);
278 wordHasComplexChar = false;
279 mWordContinuation = false;
280 ++offset;
281 if (offset >= aLength) break;
282 wordStart = offset;
283 } else {
284 if (!wordHasComplexChar && IsComplexChar(ch)) {
285 wordHasComplexChar = true;
287 ++offset;
288 if (offset >= aLength) {
289 // Save this word
290 mCurrentWordContainsComplexChar = wordHasComplexChar;
291 uint32_t len = offset - wordStart;
292 char16_t* elems = mCurrentWord.AppendElements(len);
293 if (!elems) return NS_ERROR_OUT_OF_MEMORY;
294 memcpy(elems, aText + wordStart, sizeof(char16_t) * len);
295 mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
296 // Ensure that the break-before for this word is written out
297 offset = wordStart + 1;
298 UpdateCurrentWordLanguage(aHyphenationLanguage);
299 break;
304 if (aSink) {
305 if (!noBreaksNeeded) {
306 aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
308 if (!noCapitalizationNeeded) {
309 aSink->SetCapitalization(start, offset - start,
310 capitalizationState.Elements() + start);
313 return NS_OK;
316 void nsLineBreaker::FindHyphenationPoints(nsHyphenator* aHyphenator,
317 const char16_t* aTextStart,
318 const char16_t* aTextLimit,
319 uint8_t* aBreakState) {
320 nsDependentSubstring string(aTextStart, aTextLimit);
321 AutoTArray<bool, 200> hyphens;
322 if (NS_SUCCEEDED(aHyphenator->Hyphenate(string, hyphens))) {
323 for (uint32_t i = 0; i + 1 < string.Length(); ++i) {
324 if (hyphens[i]) {
325 aBreakState[i + 1] =
326 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN;
332 nsresult nsLineBreaker::AppendText(nsAtom* aHyphenationLanguage,
333 const uint8_t* aText, uint32_t aLength,
334 uint32_t aFlags, nsILineBreakSink* aSink) {
335 NS_ASSERTION(aLength > 0, "Appending empty text...");
337 if (aFlags & (BREAK_NEED_CAPITALIZATION | BREAK_USE_AUTO_HYPHENATION)) {
338 // Defer to the Unicode path if capitalization or hyphenation is required
339 nsAutoString str;
340 const char* cp = reinterpret_cast<const char*>(aText);
341 CopyASCIItoUTF16(nsDependentCSubstring(cp, cp + aLength), str);
342 return AppendText(aHyphenationLanguage, str.get(), aLength, aFlags, aSink);
345 uint32_t offset = 0;
347 // Continue the current word
348 if (mCurrentWord.Length() > 0) {
349 NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere,
350 "These should not be set");
352 while (offset < aLength && !IsSpace(aText[offset])) {
353 mCurrentWord.AppendElement(aText[offset]);
354 if (!mCurrentWordContainsComplexChar &&
355 IsComplexASCIIChar(aText[offset])) {
356 mCurrentWordContainsComplexChar = true;
358 ++offset;
361 if (offset > 0) {
362 mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
365 if (offset == aLength) {
366 // We did not encounter whitespace so the word hasn't finished yet.
367 return NS_OK;
370 // We encountered whitespace, so we're done with this word
371 nsresult rv = FlushCurrentWord();
372 if (NS_FAILED(rv)) return rv;
375 AutoTArray<uint8_t, 4000> breakState;
376 if (aSink) {
377 // XXX(Bug 1631371) Check if this should use a fallible operation as it
378 // pretended earlier.
379 breakState.AppendElements(aLength);
382 uint32_t start = offset;
383 bool noBreaksNeeded =
384 !aSink || ((aFlags & NO_BREAKS_NEEDED_FLAGS) == NO_BREAKS_NEEDED_FLAGS &&
385 !mBreakHere && !mAfterBreakableSpace);
386 if (noBreaksNeeded) {
387 // Skip to the space before the last word, since either the break data
388 // here is not needed, or no breaks are set in the sink and there cannot
389 // be any breaks in this chunk; all we need is the context for the next
390 // chunk (if any)
391 offset = aLength;
392 while (offset > start) {
393 --offset;
394 if (IsSpace(aText[offset])) break;
397 uint32_t wordStart = offset;
398 bool wordHasComplexChar = false;
400 for (;;) {
401 uint8_t ch = aText[offset];
402 bool isSpace = IsSpace(ch);
403 bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);
405 if (aSink) {
406 // Consider word-break style. Since the break position of CJK scripts
407 // will be set by nsILineBreaker, we don't consider CJK at this point.
408 breakState[offset] =
409 mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) ||
410 mWordBreak == LineBreaker::WordBreak::BreakAll ||
411 mStrictness == LineBreaker::Strictness::Anywhere
412 ? gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
413 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
415 mBreakHere = false;
416 mAfterBreakableSpace = isBreakableSpace;
418 if (isSpace) {
419 if (offset > wordStart && aSink && !(aFlags & BREAK_SUPPRESS_INSIDE)) {
420 if (mStrictness == LineBreaker::Strictness::Anywhere) {
421 memset(breakState.Elements() + wordStart,
422 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL,
423 offset - wordStart);
424 } else if (wordHasComplexChar) {
425 // Save current start-of-word state because GetJISx4051Breaks will
426 // set it to false
427 uint8_t currentStart = breakState[wordStart];
428 nsContentUtils::LineBreaker()->GetJISx4051Breaks(
429 aText + wordStart, offset - wordStart, mWordBreak, mStrictness,
430 mScriptIsChineseOrJapanese, breakState.Elements() + wordStart);
431 breakState[wordStart] = currentStart;
435 wordHasComplexChar = false;
436 ++offset;
437 if (offset >= aLength) break;
438 wordStart = offset;
439 } else {
440 if (!wordHasComplexChar && IsComplexASCIIChar(ch)) {
441 wordHasComplexChar = true;
443 ++offset;
444 if (offset >= aLength) {
445 // Save this word
446 mCurrentWordContainsComplexChar = wordHasComplexChar;
447 uint32_t len = offset - wordStart;
448 char16_t* elems = mCurrentWord.AppendElements(len);
449 if (!elems) return NS_ERROR_OUT_OF_MEMORY;
450 uint32_t i;
451 for (i = wordStart; i < offset; ++i) {
452 elems[i - wordStart] = aText[i];
454 mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
455 // Ensure that the break-before for this word is written out
456 offset = wordStart + 1;
457 break;
462 if (!noBreaksNeeded) {
463 aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
465 return NS_OK;
468 void nsLineBreaker::UpdateCurrentWordLanguage(nsAtom* aHyphenationLanguage) {
469 if (mCurrentWordLanguage && mCurrentWordLanguage != aHyphenationLanguage) {
470 mCurrentWordContainsMixedLang = true;
471 mScriptIsChineseOrJapanese = false;
472 } else {
473 if (aHyphenationLanguage && !mCurrentWordLanguage) {
474 Locale loc = Locale(nsAtomCString(aHyphenationLanguage));
475 if (loc.GetScript().IsEmpty()) {
476 loc.Maximize();
478 const nsDependentCSubstring& script = loc.GetScript();
479 mScriptIsChineseOrJapanese =
480 script.EqualsLiteral("Hans") || script.EqualsLiteral("Hant") ||
481 script.EqualsLiteral("Jpan") || script.EqualsLiteral("Hrkt");
483 mCurrentWordLanguage = aHyphenationLanguage;
487 nsresult nsLineBreaker::AppendInvisibleWhitespace(uint32_t aFlags) {
488 nsresult rv = FlushCurrentWord();
489 if (NS_FAILED(rv)) return rv;
491 bool isBreakableSpace = !(aFlags & BREAK_SUPPRESS_INSIDE);
492 if (mAfterBreakableSpace && !isBreakableSpace) {
493 mBreakHere = true;
495 mAfterBreakableSpace = isBreakableSpace;
496 mWordContinuation = false;
497 return NS_OK;
500 nsresult nsLineBreaker::Reset(bool* aTrailingBreak) {
501 nsresult rv = FlushCurrentWord();
502 if (NS_FAILED(rv)) return rv;
504 *aTrailingBreak = mBreakHere || mAfterBreakableSpace;
505 mBreakHere = false;
506 mAfterBreakableSpace = false;
507 return NS_OK;