1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "nsLineBreaker.h"
8 #include "nsContentUtils.h"
9 #include "gfxTextRun.h" // for the gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_* values
10 #include "nsHyphenationManager.h"
11 #include "nsHyphenator.h"
12 #include "mozilla/gfx/2D.h"
13 #include "mozilla/intl/LineBreaker.h"
14 #include "mozilla/intl/MozLocale.h"
16 using mozilla::intl::LineBreaker
;
17 using mozilla::intl::Locale
;
19 nsLineBreaker::nsLineBreaker()
20 : mCurrentWordLanguage(nullptr),
21 mCurrentWordContainsMixedLang(false),
22 mCurrentWordContainsComplexChar(false),
23 mScriptIsChineseOrJapanese(false),
24 mAfterBreakableSpace(false),
26 mWordBreak(LineBreaker::WordBreak::Normal
),
27 mStrictness(LineBreaker::Strictness::Auto
),
28 mWordContinuation(false) {}
30 nsLineBreaker::~nsLineBreaker() {
31 NS_ASSERTION(mCurrentWord
.Length() == 0,
32 "Should have Reset() before destruction!");
35 static void SetupCapitalization(const char16_t
* aWord
, uint32_t aLength
,
36 bool* aCapitalization
) {
37 // Capitalize the first alphanumeric character after a space or start
39 // The only space character a word can contain is NBSP.
40 bool capitalizeNextChar
= true;
41 for (uint32_t i
= 0; i
< aLength
; ++i
) {
42 uint32_t ch
= aWord
[i
];
43 if (capitalizeNextChar
) {
44 if (i
+ 1 < aLength
&& NS_IS_SURROGATE_PAIR(ch
, aWord
[i
+ 1])) {
45 ch
= SURROGATE_TO_UCS4(ch
, aWord
[i
+ 1]);
47 if (nsContentUtils::IsAlphanumeric(ch
)) {
48 aCapitalization
[i
] = true;
49 capitalizeNextChar
= false;
55 if (ch
== 0xA0 /*NBSP*/) {
56 capitalizeNextChar
= true;
61 nsresult
nsLineBreaker::FlushCurrentWord() {
62 uint32_t length
= mCurrentWord
.Length();
63 AutoTArray
<uint8_t, 4000> breakState
;
64 // XXX(Bug 1631371) Check if this should use a fallible operation as it
66 breakState
.AppendElements(length
);
68 nsTArray
<bool> capitalizationState
;
70 if (mStrictness
== LineBreaker::Strictness::Anywhere
) {
71 memset(breakState
.Elements(),
72 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
,
73 length
* sizeof(uint8_t));
74 } else if (!mCurrentWordContainsComplexChar
) {
75 // For break-strict set everything internal to "break", otherwise
77 memset(breakState
.Elements(),
78 mWordBreak
== LineBreaker::WordBreak::BreakAll
79 ? gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
80 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE
,
81 length
* sizeof(uint8_t));
83 nsContentUtils::LineBreaker()->GetJISx4051Breaks(
84 mCurrentWord
.Elements(), length
, mWordBreak
, mStrictness
,
85 mScriptIsChineseOrJapanese
, breakState
.Elements());
88 bool autoHyphenate
= mCurrentWordLanguage
&& !mCurrentWordContainsMixedLang
;
90 for (i
= 0; autoHyphenate
&& i
< mTextItems
.Length(); ++i
) {
91 TextItem
* ti
= &mTextItems
[i
];
92 if (!(ti
->mFlags
& BREAK_USE_AUTO_HYPHENATION
)) {
93 autoHyphenate
= false;
97 RefPtr
<nsHyphenator
> hyphenator
=
98 nsHyphenationManager::Instance()->GetHyphenator(mCurrentWordLanguage
);
100 FindHyphenationPoints(hyphenator
, mCurrentWord
.Elements(),
101 mCurrentWord
.Elements() + length
,
102 breakState
.Elements());
107 for (i
= 0; i
< mTextItems
.Length(); ++i
) {
108 TextItem
* ti
= &mTextItems
[i
];
109 NS_ASSERTION(ti
->mLength
> 0, "Zero length word contribution?");
111 if ((ti
->mFlags
& BREAK_SUPPRESS_INITIAL
) && ti
->mSinkOffset
== 0) {
112 breakState
[offset
] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE
;
114 if (ti
->mFlags
& BREAK_SUPPRESS_INSIDE
) {
115 uint32_t exclude
= ti
->mSinkOffset
== 0 ? 1 : 0;
116 memset(breakState
.Elements() + offset
+ exclude
,
117 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE
,
118 (ti
->mLength
- exclude
) * sizeof(uint8_t));
121 // Don't set the break state for the first character of the word, because
122 // it was already set correctly earlier and we don't know what the true
124 uint32_t skipSet
= i
== 0 ? 1 : 0;
126 ti
->mSink
->SetBreaks(ti
->mSinkOffset
+ skipSet
, ti
->mLength
- skipSet
,
127 breakState
.Elements() + offset
+ skipSet
);
129 if (!mWordContinuation
&& (ti
->mFlags
& BREAK_NEED_CAPITALIZATION
)) {
130 if (capitalizationState
.Length() == 0) {
131 // XXX(Bug 1631371) Check if this should use a fallible operation as
132 // it pretended earlier.
133 capitalizationState
.AppendElements(length
);
134 memset(capitalizationState
.Elements(), false, length
* sizeof(bool));
135 SetupCapitalization(mCurrentWord
.Elements(), length
,
136 capitalizationState
.Elements());
138 ti
->mSink
->SetCapitalization(ti
->mSinkOffset
, ti
->mLength
,
139 capitalizationState
.Elements() + offset
);
143 offset
+= ti
->mLength
;
146 mCurrentWord
.Clear();
148 mCurrentWordContainsComplexChar
= false;
149 mCurrentWordContainsMixedLang
= false;
150 mCurrentWordLanguage
= nullptr;
151 mWordContinuation
= false;
155 // If the aFlags parameter to AppendText has all these bits set,
156 // then we don't need to worry about finding break opportunities
157 // in the appended text.
158 #define NO_BREAKS_NEEDED_FLAGS \
159 (BREAK_SUPPRESS_INITIAL | BREAK_SUPPRESS_INSIDE | \
160 BREAK_SKIP_SETTING_NO_BREAKS)
162 nsresult
nsLineBreaker::AppendText(nsAtom
* aHyphenationLanguage
,
163 const char16_t
* aText
, uint32_t aLength
,
164 uint32_t aFlags
, nsILineBreakSink
* aSink
) {
165 NS_ASSERTION(aLength
> 0, "Appending empty text...");
169 // Continue the current word
170 if (mCurrentWord
.Length() > 0) {
171 NS_ASSERTION(!mAfterBreakableSpace
&& !mBreakHere
,
172 "These should not be set");
174 while (offset
< aLength
&& !IsSpace(aText
[offset
])) {
175 mCurrentWord
.AppendElement(aText
[offset
]);
176 if (!mCurrentWordContainsComplexChar
&& IsComplexChar(aText
[offset
])) {
177 mCurrentWordContainsComplexChar
= true;
179 UpdateCurrentWordLanguage(aHyphenationLanguage
);
184 mTextItems
.AppendElement(TextItem(aSink
, 0, offset
, aFlags
));
187 if (offset
== aLength
) return NS_OK
;
189 // We encountered whitespace, so we're done with this word
190 nsresult rv
= FlushCurrentWord();
191 if (NS_FAILED(rv
)) return rv
;
194 AutoTArray
<uint8_t, 4000> breakState
;
196 // XXX(Bug 1631371) Check if this should use a fallible operation as it
197 // pretended earlier.
198 breakState
.AppendElements(aLength
);
201 bool noCapitalizationNeeded
= true;
202 nsTArray
<bool> capitalizationState
;
203 if (aSink
&& (aFlags
& BREAK_NEED_CAPITALIZATION
)) {
204 // XXX(Bug 1631371) Check if this should use a fallible operation as it
205 // pretended earlier.
206 capitalizationState
.AppendElements(aLength
);
207 memset(capitalizationState
.Elements(), false, aLength
* sizeof(bool));
208 noCapitalizationNeeded
= false;
211 uint32_t start
= offset
;
212 bool noBreaksNeeded
=
213 !aSink
|| ((aFlags
& NO_BREAKS_NEEDED_FLAGS
) == NO_BREAKS_NEEDED_FLAGS
&&
214 !mBreakHere
&& !mAfterBreakableSpace
);
215 if (noBreaksNeeded
&& noCapitalizationNeeded
) {
216 // Skip to the space before the last word, since either the break data
217 // here is not needed, or no breaks are set in the sink and there cannot
218 // be any breaks in this chunk; and we don't need to do word-initial
219 // capitalization. All we need is the context for the next chunk (if any).
221 while (offset
> start
) {
223 if (IsSpace(aText
[offset
])) break;
226 uint32_t wordStart
= offset
;
227 bool wordHasComplexChar
= false;
229 RefPtr
<nsHyphenator
> hyphenator
;
230 if ((aFlags
& BREAK_USE_AUTO_HYPHENATION
) &&
231 !(aFlags
& BREAK_SUPPRESS_INSIDE
) && aHyphenationLanguage
) {
233 nsHyphenationManager::Instance()->GetHyphenator(aHyphenationLanguage
);
237 char16_t ch
= aText
[offset
];
238 bool isSpace
= IsSpace(ch
);
239 bool isBreakableSpace
= isSpace
&& !(aFlags
& BREAK_SUPPRESS_INSIDE
);
241 if (aSink
&& !noBreaksNeeded
) {
243 mBreakHere
|| (mAfterBreakableSpace
&& !isBreakableSpace
) ||
244 mWordBreak
== LineBreaker::WordBreak::BreakAll
||
245 mStrictness
== LineBreaker::Strictness::Anywhere
246 ? gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
247 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE
;
250 mAfterBreakableSpace
= isBreakableSpace
;
252 if (isSpace
|| ch
== '\n') {
253 if (offset
> wordStart
&& aSink
) {
254 if (!(aFlags
& BREAK_SUPPRESS_INSIDE
)) {
255 if (mStrictness
== LineBreaker::Strictness::Anywhere
) {
256 memset(breakState
.Elements() + wordStart
,
257 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
,
259 } else if (wordHasComplexChar
) {
260 // Save current start-of-word state because GetJISx4051Breaks will
262 uint8_t currentStart
= breakState
[wordStart
];
263 nsContentUtils::LineBreaker()->GetJISx4051Breaks(
264 aText
+ wordStart
, offset
- wordStart
, mWordBreak
, mStrictness
,
265 mScriptIsChineseOrJapanese
, breakState
.Elements() + wordStart
);
266 breakState
[wordStart
] = currentStart
;
269 FindHyphenationPoints(hyphenator
, aText
+ wordStart
, aText
+ offset
,
270 breakState
.Elements() + wordStart
);
273 if (!mWordContinuation
&& !noCapitalizationNeeded
) {
274 SetupCapitalization(aText
+ wordStart
, offset
- wordStart
,
275 capitalizationState
.Elements() + wordStart
);
278 wordHasComplexChar
= false;
279 mWordContinuation
= false;
281 if (offset
>= aLength
) break;
284 if (!wordHasComplexChar
&& IsComplexChar(ch
)) {
285 wordHasComplexChar
= true;
288 if (offset
>= aLength
) {
290 mCurrentWordContainsComplexChar
= wordHasComplexChar
;
291 uint32_t len
= offset
- wordStart
;
292 char16_t
* elems
= mCurrentWord
.AppendElements(len
);
293 if (!elems
) return NS_ERROR_OUT_OF_MEMORY
;
294 memcpy(elems
, aText
+ wordStart
, sizeof(char16_t
) * len
);
295 mTextItems
.AppendElement(TextItem(aSink
, wordStart
, len
, aFlags
));
296 // Ensure that the break-before for this word is written out
297 offset
= wordStart
+ 1;
298 UpdateCurrentWordLanguage(aHyphenationLanguage
);
305 if (!noBreaksNeeded
) {
306 aSink
->SetBreaks(start
, offset
- start
, breakState
.Elements() + start
);
308 if (!noCapitalizationNeeded
) {
309 aSink
->SetCapitalization(start
, offset
- start
,
310 capitalizationState
.Elements() + start
);
316 void nsLineBreaker::FindHyphenationPoints(nsHyphenator
* aHyphenator
,
317 const char16_t
* aTextStart
,
318 const char16_t
* aTextLimit
,
319 uint8_t* aBreakState
) {
320 nsDependentSubstring
string(aTextStart
, aTextLimit
);
321 AutoTArray
<bool, 200> hyphens
;
322 if (NS_SUCCEEDED(aHyphenator
->Hyphenate(string
, hyphens
))) {
323 for (uint32_t i
= 0; i
+ 1 < string
.Length(); ++i
) {
326 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN
;
332 nsresult
nsLineBreaker::AppendText(nsAtom
* aHyphenationLanguage
,
333 const uint8_t* aText
, uint32_t aLength
,
334 uint32_t aFlags
, nsILineBreakSink
* aSink
) {
335 NS_ASSERTION(aLength
> 0, "Appending empty text...");
337 if (aFlags
& (BREAK_NEED_CAPITALIZATION
| BREAK_USE_AUTO_HYPHENATION
)) {
338 // Defer to the Unicode path if capitalization or hyphenation is required
340 const char* cp
= reinterpret_cast<const char*>(aText
);
341 CopyASCIItoUTF16(nsDependentCSubstring(cp
, cp
+ aLength
), str
);
342 return AppendText(aHyphenationLanguage
, str
.get(), aLength
, aFlags
, aSink
);
347 // Continue the current word
348 if (mCurrentWord
.Length() > 0) {
349 NS_ASSERTION(!mAfterBreakableSpace
&& !mBreakHere
,
350 "These should not be set");
352 while (offset
< aLength
&& !IsSpace(aText
[offset
])) {
353 mCurrentWord
.AppendElement(aText
[offset
]);
354 if (!mCurrentWordContainsComplexChar
&&
355 IsComplexASCIIChar(aText
[offset
])) {
356 mCurrentWordContainsComplexChar
= true;
362 mTextItems
.AppendElement(TextItem(aSink
, 0, offset
, aFlags
));
365 if (offset
== aLength
) {
366 // We did not encounter whitespace so the word hasn't finished yet.
370 // We encountered whitespace, so we're done with this word
371 nsresult rv
= FlushCurrentWord();
372 if (NS_FAILED(rv
)) return rv
;
375 AutoTArray
<uint8_t, 4000> breakState
;
377 // XXX(Bug 1631371) Check if this should use a fallible operation as it
378 // pretended earlier.
379 breakState
.AppendElements(aLength
);
382 uint32_t start
= offset
;
383 bool noBreaksNeeded
=
384 !aSink
|| ((aFlags
& NO_BREAKS_NEEDED_FLAGS
) == NO_BREAKS_NEEDED_FLAGS
&&
385 !mBreakHere
&& !mAfterBreakableSpace
);
386 if (noBreaksNeeded
) {
387 // Skip to the space before the last word, since either the break data
388 // here is not needed, or no breaks are set in the sink and there cannot
389 // be any breaks in this chunk; all we need is the context for the next
392 while (offset
> start
) {
394 if (IsSpace(aText
[offset
])) break;
397 uint32_t wordStart
= offset
;
398 bool wordHasComplexChar
= false;
401 uint8_t ch
= aText
[offset
];
402 bool isSpace
= IsSpace(ch
);
403 bool isBreakableSpace
= isSpace
&& !(aFlags
& BREAK_SUPPRESS_INSIDE
);
406 // Consider word-break style. Since the break position of CJK scripts
407 // will be set by nsILineBreaker, we don't consider CJK at this point.
409 mBreakHere
|| (mAfterBreakableSpace
&& !isBreakableSpace
) ||
410 mWordBreak
== LineBreaker::WordBreak::BreakAll
||
411 mStrictness
== LineBreaker::Strictness::Anywhere
412 ? gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
413 : gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE
;
416 mAfterBreakableSpace
= isBreakableSpace
;
419 if (offset
> wordStart
&& aSink
&& !(aFlags
& BREAK_SUPPRESS_INSIDE
)) {
420 if (mStrictness
== LineBreaker::Strictness::Anywhere
) {
421 memset(breakState
.Elements() + wordStart
,
422 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
,
424 } else if (wordHasComplexChar
) {
425 // Save current start-of-word state because GetJISx4051Breaks will
427 uint8_t currentStart
= breakState
[wordStart
];
428 nsContentUtils::LineBreaker()->GetJISx4051Breaks(
429 aText
+ wordStart
, offset
- wordStart
, mWordBreak
, mStrictness
,
430 mScriptIsChineseOrJapanese
, breakState
.Elements() + wordStart
);
431 breakState
[wordStart
] = currentStart
;
435 wordHasComplexChar
= false;
437 if (offset
>= aLength
) break;
440 if (!wordHasComplexChar
&& IsComplexASCIIChar(ch
)) {
441 wordHasComplexChar
= true;
444 if (offset
>= aLength
) {
446 mCurrentWordContainsComplexChar
= wordHasComplexChar
;
447 uint32_t len
= offset
- wordStart
;
448 char16_t
* elems
= mCurrentWord
.AppendElements(len
);
449 if (!elems
) return NS_ERROR_OUT_OF_MEMORY
;
451 for (i
= wordStart
; i
< offset
; ++i
) {
452 elems
[i
- wordStart
] = aText
[i
];
454 mTextItems
.AppendElement(TextItem(aSink
, wordStart
, len
, aFlags
));
455 // Ensure that the break-before for this word is written out
456 offset
= wordStart
+ 1;
462 if (!noBreaksNeeded
) {
463 aSink
->SetBreaks(start
, offset
- start
, breakState
.Elements() + start
);
468 void nsLineBreaker::UpdateCurrentWordLanguage(nsAtom
* aHyphenationLanguage
) {
469 if (mCurrentWordLanguage
&& mCurrentWordLanguage
!= aHyphenationLanguage
) {
470 mCurrentWordContainsMixedLang
= true;
471 mScriptIsChineseOrJapanese
= false;
473 if (aHyphenationLanguage
&& !mCurrentWordLanguage
) {
474 Locale loc
= Locale(nsAtomCString(aHyphenationLanguage
));
475 if (loc
.GetScript().IsEmpty()) {
478 const nsDependentCSubstring
& script
= loc
.GetScript();
479 mScriptIsChineseOrJapanese
=
480 script
.EqualsLiteral("Hans") || script
.EqualsLiteral("Hant") ||
481 script
.EqualsLiteral("Jpan") || script
.EqualsLiteral("Hrkt");
483 mCurrentWordLanguage
= aHyphenationLanguage
;
487 nsresult
nsLineBreaker::AppendInvisibleWhitespace(uint32_t aFlags
) {
488 nsresult rv
= FlushCurrentWord();
489 if (NS_FAILED(rv
)) return rv
;
491 bool isBreakableSpace
= !(aFlags
& BREAK_SUPPRESS_INSIDE
);
492 if (mAfterBreakableSpace
&& !isBreakableSpace
) {
495 mAfterBreakableSpace
= isBreakableSpace
;
496 mWordContinuation
= false;
500 nsresult
nsLineBreaker::Reset(bool* aTrailingBreak
) {
501 nsresult rv
= FlushCurrentWord();
502 if (NS_FAILED(rv
)) return rv
;
504 *aTrailingBreak
= mBreakHere
|| mAfterBreakableSpace
;
506 mAfterBreakableSpace
= false;