1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "nsLineBreaker.h"
7 #include "nsContentUtils.h"
8 #include "nsILineBreaker.h"
9 #include "gfxTextRun.h" // for the gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_* values
10 #include "nsHyphenationManager.h"
11 #include "nsHyphenator.h"
12 #include "mozilla/gfx/2D.h"
14 nsLineBreaker::nsLineBreaker()
15 : mCurrentWordLanguage(nullptr),
16 mCurrentWordContainsMixedLang(false),
17 mCurrentWordContainsComplexChar(false),
18 mAfterBreakableSpace(false), mBreakHere(false),
19 mWordBreak(nsILineBreaker::kWordBreak_Normal
)
23 nsLineBreaker::~nsLineBreaker()
25 NS_ASSERTION(mCurrentWord
.Length() == 0, "Should have Reset() before destruction!");
29 SetupCapitalization(const char16_t
* aWord
, uint32_t aLength
,
30 bool* aCapitalization
)
32 // Capitalize the first alphanumeric character after a space or start
34 // The only space character a word can contain is NBSP.
35 bool capitalizeNextChar
= true;
36 for (uint32_t i
= 0; i
< aLength
; ++i
) {
37 uint32_t ch
= aWord
[i
];
38 if (capitalizeNextChar
) {
39 if (NS_IS_HIGH_SURROGATE(ch
) && i
+ 1 < aLength
&&
40 NS_IS_LOW_SURROGATE(aWord
[i
+ 1])) {
41 ch
= SURROGATE_TO_UCS4(ch
, aWord
[i
+ 1]);
43 if (nsContentUtils::IsAlphanumeric(ch
)) {
44 aCapitalization
[i
] = true;
45 capitalizeNextChar
= false;
51 if (ch
== 0xA0 /*NBSP*/) {
52 capitalizeNextChar
= true;
58 nsLineBreaker::FlushCurrentWord()
60 uint32_t length
= mCurrentWord
.Length();
61 nsAutoTArray
<uint8_t,4000> breakState
;
62 if (!breakState
.AppendElements(length
))
63 return NS_ERROR_OUT_OF_MEMORY
;
65 nsTArray
<bool> capitalizationState
;
67 if (!mCurrentWordContainsComplexChar
) {
68 // For break-strict set everything internal to "break", otherwise
70 memset(breakState
.Elements(),
71 mWordBreak
== nsILineBreaker::kWordBreak_BreakAll
?
72 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
:
73 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE
,
74 length
*sizeof(uint8_t));
76 nsContentUtils::LineBreaker()->
77 GetJISx4051Breaks(mCurrentWord
.Elements(), length
, mWordBreak
,
78 breakState
.Elements());
81 bool autoHyphenate
= mCurrentWordLanguage
&&
82 !mCurrentWordContainsMixedLang
;
84 for (i
= 0; autoHyphenate
&& i
< mTextItems
.Length(); ++i
) {
85 TextItem
* ti
= &mTextItems
[i
];
86 if (!(ti
->mFlags
& BREAK_USE_AUTO_HYPHENATION
)) {
87 autoHyphenate
= false;
91 nsRefPtr
<nsHyphenator
> hyphenator
=
92 nsHyphenationManager::Instance()->GetHyphenator(mCurrentWordLanguage
);
94 FindHyphenationPoints(hyphenator
,
95 mCurrentWord
.Elements(),
96 mCurrentWord
.Elements() + length
,
97 breakState
.Elements());
102 for (i
= 0; i
< mTextItems
.Length(); ++i
) {
103 TextItem
* ti
= &mTextItems
[i
];
104 NS_ASSERTION(ti
->mLength
> 0, "Zero length word contribution?");
106 if ((ti
->mFlags
& BREAK_SUPPRESS_INITIAL
) && ti
->mSinkOffset
== 0) {
107 breakState
[offset
] = gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE
;
109 if (ti
->mFlags
& BREAK_SUPPRESS_INSIDE
) {
110 uint32_t exclude
= ti
->mSinkOffset
== 0 ? 1 : 0;
111 memset(breakState
.Elements() + offset
+ exclude
,
112 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE
,
113 (ti
->mLength
- exclude
)*sizeof(uint8_t));
116 // Don't set the break state for the first character of the word, because
117 // it was already set correctly earlier and we don't know what the true
119 uint32_t skipSet
= i
== 0 ? 1 : 0;
121 ti
->mSink
->SetBreaks(ti
->mSinkOffset
+ skipSet
, ti
->mLength
- skipSet
,
122 breakState
.Elements() + offset
+ skipSet
);
124 if (ti
->mFlags
& BREAK_NEED_CAPITALIZATION
) {
125 if (capitalizationState
.Length() == 0) {
126 if (!capitalizationState
.AppendElements(length
))
127 return NS_ERROR_OUT_OF_MEMORY
;
128 memset(capitalizationState
.Elements(), false, length
*sizeof(bool));
129 SetupCapitalization(mCurrentWord
.Elements(), length
,
130 capitalizationState
.Elements());
132 ti
->mSink
->SetCapitalization(ti
->mSinkOffset
, ti
->mLength
,
133 capitalizationState
.Elements() + offset
);
137 offset
+= ti
->mLength
;
140 mCurrentWord
.Clear();
142 mCurrentWordContainsComplexChar
= false;
143 mCurrentWordContainsMixedLang
= false;
144 mCurrentWordLanguage
= nullptr;
149 nsLineBreaker::AppendText(nsIAtom
* aHyphenationLanguage
, const char16_t
* aText
, uint32_t aLength
,
150 uint32_t aFlags
, nsILineBreakSink
* aSink
)
152 NS_ASSERTION(aLength
> 0, "Appending empty text...");
156 // Continue the current word
157 if (mCurrentWord
.Length() > 0) {
158 NS_ASSERTION(!mAfterBreakableSpace
&& !mBreakHere
, "These should not be set");
160 while (offset
< aLength
&& !IsSpace(aText
[offset
])) {
161 mCurrentWord
.AppendElement(aText
[offset
]);
162 if (!mCurrentWordContainsComplexChar
&& IsComplexChar(aText
[offset
])) {
163 mCurrentWordContainsComplexChar
= true;
165 UpdateCurrentWordLanguage(aHyphenationLanguage
);
170 mTextItems
.AppendElement(TextItem(aSink
, 0, offset
, aFlags
));
173 if (offset
== aLength
)
176 // We encountered whitespace, so we're done with this word
177 nsresult rv
= FlushCurrentWord();
182 nsAutoTArray
<uint8_t,4000> breakState
;
184 if (!breakState
.AppendElements(aLength
))
185 return NS_ERROR_OUT_OF_MEMORY
;
188 nsTArray
<bool> capitalizationState
;
189 if (aSink
&& (aFlags
& BREAK_NEED_CAPITALIZATION
)) {
190 if (!capitalizationState
.AppendElements(aLength
))
191 return NS_ERROR_OUT_OF_MEMORY
;
192 memset(capitalizationState
.Elements(), false, aLength
*sizeof(bool));
195 uint32_t start
= offset
;
196 bool noBreaksNeeded
= !aSink
||
197 (aFlags
== (BREAK_SUPPRESS_INITIAL
| BREAK_SUPPRESS_INSIDE
| BREAK_SKIP_SETTING_NO_BREAKS
) &&
198 !mBreakHere
&& !mAfterBreakableSpace
);
199 if (noBreaksNeeded
) {
200 // Skip to the space before the last word, since either the break data
201 // here is not needed, or no breaks are set in the sink and there cannot
202 // be any breaks in this chunk; all we need is the context for the next
205 while (offset
> start
) {
207 if (IsSpace(aText
[offset
]))
211 uint32_t wordStart
= offset
;
212 bool wordHasComplexChar
= false;
214 nsRefPtr
<nsHyphenator
> hyphenator
;
215 if ((aFlags
& BREAK_USE_AUTO_HYPHENATION
) &&
216 !(aFlags
& BREAK_SUPPRESS_INSIDE
) &&
217 aHyphenationLanguage
) {
218 hyphenator
= nsHyphenationManager::Instance()->GetHyphenator(aHyphenationLanguage
);
222 char16_t ch
= aText
[offset
];
223 bool isSpace
= IsSpace(ch
);
224 bool isBreakableSpace
= isSpace
&& !(aFlags
& BREAK_SUPPRESS_INSIDE
);
228 mBreakHere
|| (mAfterBreakableSpace
&& !isBreakableSpace
) ||
229 (mWordBreak
== nsILineBreaker::kWordBreak_BreakAll
) ?
230 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
:
231 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE
;
234 mAfterBreakableSpace
= isBreakableSpace
;
237 if (offset
> wordStart
&& aSink
) {
238 if (!(aFlags
& BREAK_SUPPRESS_INSIDE
)) {
239 if (wordHasComplexChar
) {
240 // Save current start-of-word state because GetJISx4051Breaks will
242 uint8_t currentStart
= breakState
[wordStart
];
243 nsContentUtils::LineBreaker()->
244 GetJISx4051Breaks(aText
+ wordStart
, offset
- wordStart
,
246 breakState
.Elements() + wordStart
);
247 breakState
[wordStart
] = currentStart
;
250 FindHyphenationPoints(hyphenator
,
251 aText
+ wordStart
, aText
+ offset
,
252 breakState
.Elements() + wordStart
);
255 if (aFlags
& BREAK_NEED_CAPITALIZATION
) {
256 SetupCapitalization(aText
+ wordStart
, offset
- wordStart
,
257 capitalizationState
.Elements() + wordStart
);
260 wordHasComplexChar
= false;
262 if (offset
>= aLength
)
266 if (!wordHasComplexChar
&& IsComplexChar(ch
)) {
267 wordHasComplexChar
= true;
270 if (offset
>= aLength
) {
272 mCurrentWordContainsComplexChar
= wordHasComplexChar
;
273 uint32_t len
= offset
- wordStart
;
274 char16_t
* elems
= mCurrentWord
.AppendElements(len
);
276 return NS_ERROR_OUT_OF_MEMORY
;
277 memcpy(elems
, aText
+ wordStart
, sizeof(char16_t
)*len
);
278 mTextItems
.AppendElement(TextItem(aSink
, wordStart
, len
, aFlags
));
279 // Ensure that the break-before for this word is written out
280 offset
= wordStart
+ 1;
281 UpdateCurrentWordLanguage(aHyphenationLanguage
);
287 if (!noBreaksNeeded
) {
288 // aSink must not be null
289 aSink
->SetBreaks(start
, offset
- start
, breakState
.Elements() + start
);
290 if (aFlags
& BREAK_NEED_CAPITALIZATION
) {
291 aSink
->SetCapitalization(start
, offset
- start
,
292 capitalizationState
.Elements() + start
);
299 nsLineBreaker::FindHyphenationPoints(nsHyphenator
*aHyphenator
,
300 const char16_t
*aTextStart
,
301 const char16_t
*aTextLimit
,
302 uint8_t *aBreakState
)
304 nsDependentSubstring
string(aTextStart
, aTextLimit
);
305 AutoFallibleTArray
<bool,200> hyphens
;
306 if (NS_SUCCEEDED(aHyphenator
->Hyphenate(string
, hyphens
))) {
307 for (uint32_t i
= 0; i
+ 1 < string
.Length(); ++i
) {
310 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_HYPHEN
;
317 nsLineBreaker::AppendText(nsIAtom
* aHyphenationLanguage
, const uint8_t* aText
, uint32_t aLength
,
318 uint32_t aFlags
, nsILineBreakSink
* aSink
)
320 NS_ASSERTION(aLength
> 0, "Appending empty text...");
322 if (aFlags
& (BREAK_NEED_CAPITALIZATION
| BREAK_USE_AUTO_HYPHENATION
)) {
323 // Defer to the Unicode path if capitalization or hyphenation is required
325 const char* cp
= reinterpret_cast<const char*>(aText
);
326 CopyASCIItoUTF16(nsDependentCSubstring(cp
, cp
+ aLength
), str
);
327 return AppendText(aHyphenationLanguage
, str
.get(), aLength
, aFlags
, aSink
);
332 // Continue the current word
333 if (mCurrentWord
.Length() > 0) {
334 NS_ASSERTION(!mAfterBreakableSpace
&& !mBreakHere
, "These should not be set");
336 while (offset
< aLength
&& !IsSpace(aText
[offset
])) {
337 mCurrentWord
.AppendElement(aText
[offset
]);
338 if (!mCurrentWordContainsComplexChar
&&
339 IsComplexASCIIChar(aText
[offset
])) {
340 mCurrentWordContainsComplexChar
= true;
346 mTextItems
.AppendElement(TextItem(aSink
, 0, offset
, aFlags
));
349 if (offset
== aLength
) {
350 // We did not encounter whitespace so the word hasn't finished yet.
354 // We encountered whitespace, so we're done with this word
355 nsresult rv
= FlushCurrentWord();
360 nsAutoTArray
<uint8_t,4000> breakState
;
362 if (!breakState
.AppendElements(aLength
))
363 return NS_ERROR_OUT_OF_MEMORY
;
366 uint32_t start
= offset
;
367 bool noBreaksNeeded
= !aSink
||
368 (aFlags
== (BREAK_SUPPRESS_INITIAL
| BREAK_SUPPRESS_INSIDE
| BREAK_SKIP_SETTING_NO_BREAKS
) &&
369 !mBreakHere
&& !mAfterBreakableSpace
);
370 if (noBreaksNeeded
) {
371 // Skip to the space before the last word, since either the break data
372 // here is not needed, or no breaks are set in the sink and there cannot
373 // be any breaks in this chunk; all we need is the context for the next
376 while (offset
> start
) {
378 if (IsSpace(aText
[offset
]))
382 uint32_t wordStart
= offset
;
383 bool wordHasComplexChar
= false;
386 uint8_t ch
= aText
[offset
];
387 bool isSpace
= IsSpace(ch
);
388 bool isBreakableSpace
= isSpace
&& !(aFlags
& BREAK_SUPPRESS_INSIDE
);
391 // Consider word-break style. Since the break position of CJK scripts
392 // will be set by nsILineBreaker, we don't consider CJK at this point.
394 mBreakHere
|| (mAfterBreakableSpace
&& !isBreakableSpace
) ||
395 (mWordBreak
== nsILineBreaker::kWordBreak_BreakAll
) ?
396 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NORMAL
:
397 gfxTextRun::CompressedGlyph::FLAG_BREAK_TYPE_NONE
;
400 mAfterBreakableSpace
= isBreakableSpace
;
403 if (offset
> wordStart
&& wordHasComplexChar
) {
404 if (aSink
&& !(aFlags
& BREAK_SUPPRESS_INSIDE
)) {
405 // Save current start-of-word state because GetJISx4051Breaks will
407 uint8_t currentStart
= breakState
[wordStart
];
408 nsContentUtils::LineBreaker()->
409 GetJISx4051Breaks(aText
+ wordStart
, offset
- wordStart
,
411 breakState
.Elements() + wordStart
);
412 breakState
[wordStart
] = currentStart
;
414 wordHasComplexChar
= false;
418 if (offset
>= aLength
)
422 if (!wordHasComplexChar
&& IsComplexASCIIChar(ch
)) {
423 wordHasComplexChar
= true;
426 if (offset
>= aLength
) {
428 mCurrentWordContainsComplexChar
= wordHasComplexChar
;
429 uint32_t len
= offset
- wordStart
;
430 char16_t
* elems
= mCurrentWord
.AppendElements(len
);
432 return NS_ERROR_OUT_OF_MEMORY
;
434 for (i
= wordStart
; i
< offset
; ++i
) {
435 elems
[i
- wordStart
] = aText
[i
];
437 mTextItems
.AppendElement(TextItem(aSink
, wordStart
, len
, aFlags
));
438 // Ensure that the break-before for this word is written out
439 offset
= wordStart
+ 1;
445 if (!noBreaksNeeded
) {
446 aSink
->SetBreaks(start
, offset
- start
, breakState
.Elements() + start
);
452 nsLineBreaker::UpdateCurrentWordLanguage(nsIAtom
*aHyphenationLanguage
)
454 if (mCurrentWordLanguage
&& mCurrentWordLanguage
!= aHyphenationLanguage
) {
455 mCurrentWordContainsMixedLang
= true;
457 mCurrentWordLanguage
= aHyphenationLanguage
;
462 nsLineBreaker::AppendInvisibleWhitespace(uint32_t aFlags
)
464 nsresult rv
= FlushCurrentWord();
468 bool isBreakableSpace
= !(aFlags
& BREAK_SUPPRESS_INSIDE
);
469 if (mAfterBreakableSpace
&& !isBreakableSpace
) {
472 mAfterBreakableSpace
= isBreakableSpace
;
477 nsLineBreaker::Reset(bool* aTrailingBreak
)
479 nsresult rv
= FlushCurrentWord();
483 *aTrailingBreak
= mBreakHere
|| mAfterBreakableSpace
;
485 mAfterBreakableSpace
= false;