Bug 1850713: remove duplicated setting of early hint preloader id in `ScriptLoader...
[gecko.git] / dom / base / nsLineBreaker.cpp
blob987979280b45fa47f3b4189b7a3548c248ac2b0a
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "nsLineBreaker.h"
8 #include "nsContentUtils.h"
9 #include "gfxTextRun.h" // for the gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_* values
10 #include "nsHyphenationManager.h"
11 #include "nsHyphenator.h"
12 #include "mozilla/AutoRestore.h"
13 #include "mozilla/gfx/2D.h"
14 #include "mozilla/intl/LineBreaker.h" // for LineBreaker::ComputeBreakPositions
15 #include "mozilla/intl/Locale.h"
16 #include "mozilla/intl/UnicodeProperties.h"
18 using mozilla::AutoRestore;
19 using mozilla::intl::LineBreaker;
20 using mozilla::intl::LineBreakRule;
21 using mozilla::intl::Locale;
22 using mozilla::intl::LocaleParser;
23 using mozilla::intl::UnicodeProperties;
24 using mozilla::intl::WordBreakRule;
26 nsLineBreaker::nsLineBreaker()
27 : mCurrentWordLanguage(nullptr),
28 mCurrentWordContainsMixedLang(false),
29 mCurrentWordContainsComplexChar(false),
30 mScriptIsChineseOrJapanese(false),
31 mAfterBreakableSpace(false),
32 mBreakHere(false),
33 mWordBreak(WordBreakRule::Normal),
34 mLineBreak(LineBreakRule::Auto),
35 mWordContinuation(false) {}
37 nsLineBreaker::~nsLineBreaker() {
38 NS_ASSERTION(mCurrentWord.Length() == 0,
39 "Should have Reset() before destruction!");
42 static void SetupCapitalization(const char16_t* aWord, uint32_t aLength,
43 bool* aCapitalization) {
44 // Capitalize the first alphanumeric character after a space or punctuation.
45 using mozilla::intl::GeneralCategory;
46 bool capitalizeNextChar = true;
47 for (uint32_t i = 0; i < aLength; ++i) {
48 uint32_t ch = aWord[i];
49 if (i + 1 < aLength && NS_IS_SURROGATE_PAIR(ch, aWord[i + 1])) {
50 ch = SURROGATE_TO_UCS4(ch, aWord[i + 1]);
52 auto category = UnicodeProperties::CharType(ch);
53 switch (category) {
54 case GeneralCategory::Uppercase_Letter:
55 case GeneralCategory::Lowercase_Letter:
56 case GeneralCategory::Titlecase_Letter:
57 case GeneralCategory::Modifier_Letter:
58 case GeneralCategory::Other_Letter:
59 case GeneralCategory::Decimal_Number:
60 case GeneralCategory::Letter_Number:
61 case GeneralCategory::Other_Number:
62 if (capitalizeNextChar) {
63 aCapitalization[i] = true;
64 capitalizeNextChar = false;
66 break;
67 case GeneralCategory::Space_Separator:
68 case GeneralCategory::Line_Separator:
69 case GeneralCategory::Paragraph_Separator:
70 case GeneralCategory::Dash_Punctuation:
71 case GeneralCategory::Initial_Punctuation:
72 /* These punctuation categories are excluded, for examples like
73 * "what colo[u]r" -> "What Colo[u]r?" (rather than "What Colo[U]R?")
74 * and
75 * "snake_case" -> "Snake_case" (to match word selection behavior)
76 case GeneralCategory::Open_Punctuation:
77 case GeneralCategory::Close_Punctuation:
78 case GeneralCategory::Connector_Punctuation:
80 capitalizeNextChar = true;
81 break;
82 case GeneralCategory::Final_Punctuation:
83 /* Special-case: exclude Unicode single-close-quote/apostrophe,
84 for examples like "Lowe’s" etc. */
85 if (ch != 0x2019) {
86 capitalizeNextChar = true;
88 break;
89 case GeneralCategory::Other_Punctuation:
90 /* Special-case: exclude ASCII apostrophe, for "Lowe's" etc.,
91 and MIDDLE DOT, for Catalan "l·l". */
92 if (ch != '\'' && ch != 0x00B7) {
93 capitalizeNextChar = true;
95 break;
96 default:
97 break;
99 if (!IS_IN_BMP(ch)) {
100 ++i;
105 nsresult nsLineBreaker::FlushCurrentWord() {
106 uint32_t length = mCurrentWord.Length();
107 AutoTArray<uint8_t, 4000> breakState;
108 if (!breakState.AppendElements(length, mozilla::fallible)) {
109 return NS_ERROR_OUT_OF_MEMORY;
112 if (mLineBreak == LineBreakRule::Anywhere) {
113 memset(breakState.Elements(),
114 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL,
115 length * sizeof(uint8_t));
116 } else if (!mCurrentWordContainsComplexChar) {
117 // For break-strict set everything internal to "break", otherwise
118 // to "no break"!
119 memset(breakState.Elements(),
120 mWordBreak == WordBreakRule::BreakAll
121 ? gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
122 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
123 length * sizeof(uint8_t));
124 } else {
125 LineBreaker::ComputeBreakPositions(
126 mCurrentWord.Elements(), length, mWordBreak, mLineBreak,
127 mScriptIsChineseOrJapanese, breakState.Elements());
130 bool autoHyphenate = mCurrentWordLanguage && !mCurrentWordContainsMixedLang;
131 uint32_t i;
132 for (i = 0; autoHyphenate && i < mTextItems.Length(); ++i) {
133 TextItem* ti = &mTextItems[i];
134 if (!(ti->mFlags & BREAK_USE_AUTO_HYPHENATION)) {
135 autoHyphenate = false;
138 if (autoHyphenate) {
139 RefPtr<nsHyphenator> hyphenator =
140 nsHyphenationManager::Instance()->GetHyphenator(mCurrentWordLanguage);
141 if (hyphenator) {
142 FindHyphenationPoints(hyphenator, mCurrentWord.Elements(),
143 mCurrentWord.Elements() + length,
144 breakState.Elements());
148 nsTArray<bool> capitalizationState;
149 uint32_t offset = 0;
150 for (i = 0; i < mTextItems.Length(); ++i) {
151 TextItem* ti = &mTextItems[i];
152 NS_ASSERTION(ti->mLength > 0, "Zero length word contribution?");
154 if ((ti->mFlags & BREAK_SUPPRESS_INITIAL) && ti->mSinkOffset == 0) {
155 breakState[offset] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
157 if (ti->mFlags & BREAK_SUPPRESS_INSIDE) {
158 uint32_t exclude = ti->mSinkOffset == 0 ? 1 : 0;
159 memset(breakState.Elements() + offset + exclude,
160 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE,
161 (ti->mLength - exclude) * sizeof(uint8_t));
164 // Don't set the break state for the first character of the word, because
165 // it was already set correctly earlier and we don't know what the true
166 // value should be.
167 uint32_t skipSet = i == 0 ? 1 : 0;
168 if (ti->mSink) {
169 ti->mSink->SetBreaks(ti->mSinkOffset + skipSet, ti->mLength - skipSet,
170 breakState.Elements() + offset + skipSet);
172 if (!mWordContinuation && (ti->mFlags & BREAK_NEED_CAPITALIZATION)) {
173 if (capitalizationState.Length() == 0) {
174 if (!capitalizationState.AppendElements(length, mozilla::fallible)) {
175 return NS_ERROR_OUT_OF_MEMORY;
177 memset(capitalizationState.Elements(), false, length * sizeof(bool));
178 SetupCapitalization(mCurrentWord.Elements(), length,
179 capitalizationState.Elements());
181 ti->mSink->SetCapitalization(ti->mSinkOffset, ti->mLength,
182 capitalizationState.Elements() + offset);
186 offset += ti->mLength;
189 mCurrentWord.Clear();
190 mTextItems.Clear();
191 mCurrentWordContainsComplexChar = false;
192 mCurrentWordContainsMixedLang = false;
193 mCurrentWordLanguage = nullptr;
194 mWordContinuation = false;
195 return NS_OK;
198 // If the aFlags parameter to AppendText has all these bits set,
199 // then we don't need to worry about finding break opportunities
200 // in the appended text.
201 #define NO_BREAKS_NEEDED_FLAGS \
202 (BREAK_SUPPRESS_INITIAL | BREAK_SUPPRESS_INSIDE | \
203 BREAK_SKIP_SETTING_NO_BREAKS)
205 nsresult nsLineBreaker::AppendText(nsAtom* aHyphenationLanguage,
206 const char16_t* aText, uint32_t aLength,
207 uint32_t aFlags, nsILineBreakSink* aSink) {
208 NS_ASSERTION(aLength > 0, "Appending empty text...");
210 uint32_t offset = 0;
212 // Continue the current word
213 if (mCurrentWord.Length() > 0) {
214 NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere,
215 "These should not be set");
217 while (offset < aLength && !IsSpace(aText[offset])) {
218 mCurrentWord.AppendElement(aText[offset]);
219 if (!mCurrentWordContainsComplexChar && IsComplexChar(aText[offset])) {
220 mCurrentWordContainsComplexChar = true;
222 UpdateCurrentWordLanguage(aHyphenationLanguage);
223 ++offset;
226 if (offset > 0) {
227 mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
230 if (offset == aLength) {
231 return NS_OK;
234 // We encountered whitespace, so we're done with this word
235 nsresult rv = FlushCurrentWord();
236 if (NS_FAILED(rv)) {
237 return rv;
241 AutoTArray<uint8_t, 4000> breakState;
242 if (aSink) {
243 if (!breakState.AppendElements(aLength, mozilla::fallible)) {
244 return NS_ERROR_OUT_OF_MEMORY;
248 bool noCapitalizationNeeded = true;
249 nsTArray<bool> capitalizationState;
250 if (aSink && (aFlags & BREAK_NEED_CAPITALIZATION)) {
251 if (!capitalizationState.AppendElements(aLength, mozilla::fallible)) {
252 return NS_ERROR_OUT_OF_MEMORY;
254 memset(capitalizationState.Elements(), false, aLength * sizeof(bool));
255 noCapitalizationNeeded = false;
258 uint32_t start = offset;
259 bool noBreaksNeeded =
260 !aSink || ((aFlags & NO_BREAKS_NEEDED_FLAGS) == NO_BREAKS_NEEDED_FLAGS &&
261 !mBreakHere && !mAfterBreakableSpace);
262 if (noBreaksNeeded && noCapitalizationNeeded) {
263 // Skip to the space before the last word, since either the break data
264 // here is not needed, or no breaks are set in the sink and there cannot
265 // be any breaks in this chunk; and we don't need to do word-initial
266 // capitalization. All we need is the context for the next chunk (if any).
267 offset = aLength;
268 while (offset > start) {
269 --offset;
270 if (IsSpace(aText[offset])) {
271 break;
275 uint32_t wordStart = offset;
276 bool wordHasComplexChar = false;
278 RefPtr<nsHyphenator> hyphenator;
279 if ((aFlags & BREAK_USE_AUTO_HYPHENATION) &&
280 !(aFlags & BREAK_SUPPRESS_INSIDE) && aHyphenationLanguage) {
281 hyphenator =
282 nsHyphenationManager::Instance()->GetHyphenator(aHyphenationLanguage);
285 for (;;) {
286 char16_t ch = aText[offset];
287 bool isSpace = IsSpace(ch);
288 bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);
290 if (aSink && !noBreaksNeeded) {
291 breakState[offset] =
292 mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) ||
293 mWordBreak == WordBreakRule::BreakAll ||
294 mLineBreak == LineBreakRule::Anywhere
295 ? gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
296 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
298 mBreakHere = false;
299 mAfterBreakableSpace = isBreakableSpace;
301 if (isSpace || ch == '\n') {
302 if (offset > wordStart && aSink) {
303 if (!(aFlags & BREAK_SUPPRESS_INSIDE)) {
304 if (mLineBreak == LineBreakRule::Anywhere) {
305 memset(breakState.Elements() + wordStart,
306 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL,
307 offset - wordStart);
308 } else if (wordHasComplexChar) {
309 // Save current start-of-word state because ComputeBreakPositions()
310 // will set it to false.
311 AutoRestore<uint8_t> saveWordStartBreakState(breakState[wordStart]);
312 LineBreaker::ComputeBreakPositions(
313 aText + wordStart, offset - wordStart, mWordBreak, mLineBreak,
314 mScriptIsChineseOrJapanese, breakState.Elements() + wordStart);
316 if (hyphenator) {
317 FindHyphenationPoints(hyphenator, aText + wordStart, aText + offset,
318 breakState.Elements() + wordStart);
321 if (!mWordContinuation && !noCapitalizationNeeded) {
322 SetupCapitalization(aText + wordStart, offset - wordStart,
323 capitalizationState.Elements() + wordStart);
326 wordHasComplexChar = false;
327 mWordContinuation = false;
328 ++offset;
329 if (offset >= aLength) {
330 break;
332 wordStart = offset;
333 } else {
334 if (!wordHasComplexChar && IsComplexChar(ch)) {
335 wordHasComplexChar = true;
337 ++offset;
338 if (offset >= aLength) {
339 // Save this word
340 mCurrentWordContainsComplexChar = wordHasComplexChar;
341 uint32_t len = offset - wordStart;
342 char16_t* elems = mCurrentWord.AppendElements(len);
343 if (!elems) {
344 return NS_ERROR_OUT_OF_MEMORY;
346 memcpy(elems, aText + wordStart, sizeof(char16_t) * len);
347 mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
348 // Ensure that the break-before for this word is written out
349 offset = wordStart + 1;
350 UpdateCurrentWordLanguage(aHyphenationLanguage);
351 break;
356 if (aSink) {
357 if (!noBreaksNeeded) {
358 aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
360 if (!noCapitalizationNeeded) {
361 aSink->SetCapitalization(start, offset - start,
362 capitalizationState.Elements() + start);
365 return NS_OK;
368 void nsLineBreaker::FindHyphenationPoints(nsHyphenator* aHyphenator,
369 const char16_t* aTextStart,
370 const char16_t* aTextLimit,
371 uint8_t* aBreakState) {
372 nsDependentSubstring string(aTextStart, aTextLimit);
373 AutoTArray<bool, 200> hyphens;
374 if (NS_SUCCEEDED(aHyphenator->Hyphenate(string, hyphens))) {
375 for (uint32_t i = 0; i + 1 < string.Length(); ++i) {
376 if (hyphens[i]) {
377 aBreakState[i + 1] =
378 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN;
384 nsresult nsLineBreaker::AppendText(nsAtom* aHyphenationLanguage,
385 const uint8_t* aText, uint32_t aLength,
386 uint32_t aFlags, nsILineBreakSink* aSink) {
387 NS_ASSERTION(aLength > 0, "Appending empty text...");
389 if (aFlags & (BREAK_NEED_CAPITALIZATION | BREAK_USE_AUTO_HYPHENATION)) {
390 // Defer to the Unicode path if capitalization or hyphenation is required
391 nsAutoString str;
392 const char* cp = reinterpret_cast<const char*>(aText);
393 CopyASCIItoUTF16(nsDependentCSubstring(cp, cp + aLength), str);
394 return AppendText(aHyphenationLanguage, str.get(), aLength, aFlags, aSink);
397 uint32_t offset = 0;
399 // Continue the current word
400 if (mCurrentWord.Length() > 0) {
401 NS_ASSERTION(!mAfterBreakableSpace && !mBreakHere,
402 "These should not be set");
404 while (offset < aLength && !IsSpace(aText[offset])) {
405 mCurrentWord.AppendElement(aText[offset]);
406 if (!mCurrentWordContainsComplexChar &&
407 IsComplexASCIIChar(aText[offset])) {
408 mCurrentWordContainsComplexChar = true;
410 ++offset;
413 if (offset > 0) {
414 mTextItems.AppendElement(TextItem(aSink, 0, offset, aFlags));
417 if (offset == aLength) {
418 // We did not encounter whitespace so the word hasn't finished yet.
419 return NS_OK;
422 // We encountered whitespace, so we're done with this word
423 nsresult rv = FlushCurrentWord();
424 if (NS_FAILED(rv)) {
425 return rv;
429 AutoTArray<uint8_t, 4000> breakState;
430 if (aSink) {
431 if (!breakState.AppendElements(aLength, mozilla::fallible)) {
432 return NS_ERROR_OUT_OF_MEMORY;
436 uint32_t start = offset;
437 bool noBreaksNeeded =
438 !aSink || ((aFlags & NO_BREAKS_NEEDED_FLAGS) == NO_BREAKS_NEEDED_FLAGS &&
439 !mBreakHere && !mAfterBreakableSpace);
440 if (noBreaksNeeded) {
441 // Skip to the space before the last word, since either the break data
442 // here is not needed, or no breaks are set in the sink and there cannot
443 // be any breaks in this chunk; all we need is the context for the next
444 // chunk (if any)
445 offset = aLength;
446 while (offset > start) {
447 --offset;
448 if (IsSpace(aText[offset])) {
449 break;
453 uint32_t wordStart = offset;
454 bool wordHasComplexChar = false;
456 for (;;) {
457 uint8_t ch = aText[offset];
458 bool isSpace = IsSpace(ch);
459 bool isBreakableSpace = isSpace && !(aFlags & BREAK_SUPPRESS_INSIDE);
461 if (aSink) {
462 // Consider word-break style. Since the break position of CJK scripts
463 // will be set by nsILineBreaker, we don't consider CJK at this point.
464 breakState[offset] =
465 mBreakHere || (mAfterBreakableSpace && !isBreakableSpace) ||
466 mWordBreak == WordBreakRule::BreakAll ||
467 mLineBreak == LineBreakRule::Anywhere
468 ? gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
469 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE;
471 mBreakHere = false;
472 mAfterBreakableSpace = isBreakableSpace;
474 if (isSpace) {
475 if (offset > wordStart && aSink && !(aFlags & BREAK_SUPPRESS_INSIDE)) {
476 if (mLineBreak == LineBreakRule::Anywhere) {
477 memset(breakState.Elements() + wordStart,
478 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL,
479 offset - wordStart);
480 } else if (wordHasComplexChar) {
481 // Save current start-of-word state because ComputeBreakPositions()
482 // will set it to false.
483 AutoRestore<uint8_t> saveWordStartBreakState(breakState[wordStart]);
484 LineBreaker::ComputeBreakPositions(
485 aText + wordStart, offset - wordStart, mWordBreak, mLineBreak,
486 mScriptIsChineseOrJapanese, breakState.Elements() + wordStart);
490 wordHasComplexChar = false;
491 mWordContinuation = false;
492 ++offset;
493 if (offset >= aLength) {
494 break;
496 wordStart = offset;
497 } else {
498 if (!wordHasComplexChar && IsComplexASCIIChar(ch)) {
499 wordHasComplexChar = true;
501 ++offset;
502 if (offset >= aLength) {
503 // Save this word
504 mCurrentWordContainsComplexChar = wordHasComplexChar;
505 uint32_t len = offset - wordStart;
506 char16_t* elems = mCurrentWord.AppendElements(len);
507 if (!elems) {
508 return NS_ERROR_OUT_OF_MEMORY;
510 uint32_t i;
511 for (i = wordStart; i < offset; ++i) {
512 elems[i - wordStart] = aText[i];
514 mTextItems.AppendElement(TextItem(aSink, wordStart, len, aFlags));
515 // Ensure that the break-before for this word is written out
516 offset = wordStart + 1;
517 break;
522 if (!noBreaksNeeded) {
523 aSink->SetBreaks(start, offset - start, breakState.Elements() + start);
525 return NS_OK;
528 void nsLineBreaker::UpdateCurrentWordLanguage(nsAtom* aHyphenationLanguage) {
529 if (mCurrentWordLanguage && mCurrentWordLanguage != aHyphenationLanguage) {
530 mCurrentWordContainsMixedLang = true;
531 mScriptIsChineseOrJapanese = false;
532 } else {
533 if (aHyphenationLanguage && !mCurrentWordLanguage) {
534 Locale loc;
535 auto result =
536 LocaleParser::TryParse(nsAtomCString(aHyphenationLanguage), loc);
538 if (result.isErr()) {
539 return;
541 if (loc.Script().Missing() && loc.AddLikelySubtags().isErr()) {
542 return;
544 mScriptIsChineseOrJapanese =
545 loc.Script().EqualTo("Hans") || loc.Script().EqualTo("Hant") ||
546 loc.Script().EqualTo("Jpan") || loc.Script().EqualTo("Hrkt");
548 mCurrentWordLanguage = aHyphenationLanguage;
552 nsresult nsLineBreaker::AppendInvisibleWhitespace(uint32_t aFlags) {
553 nsresult rv = FlushCurrentWord();
554 if (NS_FAILED(rv)) {
555 return rv;
558 bool isBreakableSpace = !(aFlags & BREAK_SUPPRESS_INSIDE);
559 if (mAfterBreakableSpace && !isBreakableSpace) {
560 mBreakHere = true;
562 mAfterBreakableSpace = isBreakableSpace;
563 mWordContinuation = false;
564 return NS_OK;
567 nsresult nsLineBreaker::Reset(bool* aTrailingBreak) {
568 nsresult rv = FlushCurrentWord();
569 if (NS_FAILED(rv)) {
570 return rv;
573 *aTrailingBreak = mBreakHere || mAfterBreakableSpace;
574 mBreakHere = false;
575 mAfterBreakableSpace = false;
576 return NS_OK;