1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "nsTextRunTransformations.h"
8 #include "mozilla/MemoryReporting.h"
10 #include "nsGkAtoms.h"
11 #include "nsStyleConsts.h"
12 #include "nsStyleContext.h"
13 #include "nsUnicharUtils.h"
14 #include "nsUnicodeProperties.h"
15 #include "nsSpecialCasingData.h"
16 #include "mozilla/gfx/2D.h"
17 #include "nsTextFrameUtils.h"
18 #include "nsIPersistentProperties2.h"
19 #include "nsNetUtil.h"
20 #include "GreekCasing.h"
21 #include "IrishCasing.h"
23 // Unicode characters needing special casing treatment in tr/az languages
24 #define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE 0x0130
25 #define LATIN_SMALL_LETTER_DOTLESS_I 0x0131
27 // Greek sigma needs custom handling for the lowercase transform; for details
28 // see comments under "case NS_STYLE_TEXT_TRANSFORM_LOWERCASE" within
29 // nsCaseTransformTextRunFactory::RebuildTextRun(), and bug 740120.
30 #define GREEK_CAPITAL_LETTER_SIGMA 0x03A3
31 #define GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2
32 #define GREEK_SMALL_LETTER_SIGMA 0x03C3
34 nsTransformedTextRun
*
35 nsTransformedTextRun::Create(const gfxTextRunFactory::Parameters
* aParams
,
36 nsTransformingTextRunFactory
* aFactory
,
37 gfxFontGroup
* aFontGroup
,
38 const char16_t
* aString
, uint32_t aLength
,
39 const uint32_t aFlags
, nsStyleContext
** aStyles
,
42 NS_ASSERTION(!(aFlags
& gfxTextRunFactory::TEXT_IS_8BIT
),
43 "didn't expect text to be marked as 8-bit here");
45 void *storage
= AllocateStorageForTextRun(sizeof(nsTransformedTextRun
), aLength
);
50 return new (storage
) nsTransformedTextRun(aParams
, aFactory
, aFontGroup
,
52 aFlags
, aStyles
, aOwnsFactory
);
56 nsTransformedTextRun::SetCapitalization(uint32_t aStart
, uint32_t aLength
,
57 bool* aCapitalization
,
58 gfxContext
* aRefContext
)
60 if (mCapitalize
.IsEmpty()) {
61 if (!mCapitalize
.AppendElements(GetLength()))
63 memset(mCapitalize
.Elements(), 0, GetLength()*sizeof(bool));
65 memcpy(mCapitalize
.Elements() + aStart
, aCapitalization
, aLength
*sizeof(bool));
70 nsTransformedTextRun::SetPotentialLineBreaks(uint32_t aStart
, uint32_t aLength
,
71 uint8_t* aBreakBefore
,
72 gfxContext
* aRefContext
)
74 bool changed
= gfxTextRun::SetPotentialLineBreaks(aStart
, aLength
,
75 aBreakBefore
, aRefContext
);
83 nsTransformedTextRun::SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf
)
85 size_t total
= gfxTextRun::SizeOfExcludingThis(aMallocSizeOf
);
86 total
+= mStyles
.SizeOfExcludingThis(aMallocSizeOf
);
87 total
+= mCapitalize
.SizeOfExcludingThis(aMallocSizeOf
);
89 total
+= aMallocSizeOf(mFactory
);
95 nsTransformedTextRun::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf
)
97 return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf
);
100 nsTransformedTextRun
*
101 nsTransformingTextRunFactory::MakeTextRun(const char16_t
* aString
, uint32_t aLength
,
102 const gfxTextRunFactory::Parameters
* aParams
,
103 gfxFontGroup
* aFontGroup
, uint32_t aFlags
,
104 nsStyleContext
** aStyles
, bool aOwnsFactory
)
106 return nsTransformedTextRun::Create(aParams
, this, aFontGroup
,
107 aString
, aLength
, aFlags
, aStyles
, aOwnsFactory
);
110 nsTransformedTextRun
*
111 nsTransformingTextRunFactory::MakeTextRun(const uint8_t* aString
, uint32_t aLength
,
112 const gfxTextRunFactory::Parameters
* aParams
,
113 gfxFontGroup
* aFontGroup
, uint32_t aFlags
,
114 nsStyleContext
** aStyles
, bool aOwnsFactory
)
116 // We'll only have a Unicode code path to minimize the amount of code needed
117 // for these rarely used features
118 NS_ConvertASCIItoUTF16
unicodeString(reinterpret_cast<const char*>(aString
), aLength
);
119 return MakeTextRun(unicodeString
.get(), aLength
, aParams
, aFontGroup
,
120 aFlags
& ~(gfxFontGroup::TEXT_IS_PERSISTENT
| gfxFontGroup::TEXT_IS_8BIT
),
121 aStyles
, aOwnsFactory
);
125 MergeCharactersInTextRun(gfxTextRun
* aDest
, gfxTextRun
* aSrc
,
126 const bool* aCharsToMerge
, const bool* aDeletedChars
)
128 aDest
->ResetGlyphRuns();
130 gfxTextRun::GlyphRunIterator
iter(aSrc
, 0, aSrc
->GetLength());
132 nsAutoTArray
<gfxTextRun::DetailedGlyph
,2> glyphs
;
133 while (iter
.NextRun()) {
134 gfxTextRun::GlyphRun
* run
= iter
.GetGlyphRun();
135 nsresult rv
= aDest
->AddGlyphRun(run
->mFont
, run
->mMatchType
,
140 bool anyMissing
= false;
141 uint32_t mergeRunStart
= iter
.GetStringStart();
142 const gfxTextRun::CompressedGlyph
*srcGlyphs
= aSrc
->GetCharacterGlyphs();
143 gfxTextRun::CompressedGlyph mergedGlyph
= srcGlyphs
[mergeRunStart
];
144 uint32_t stringEnd
= iter
.GetStringEnd();
145 for (uint32_t k
= iter
.GetStringStart(); k
< stringEnd
; ++k
) {
146 const gfxTextRun::CompressedGlyph g
= srcGlyphs
[k
];
147 if (g
.IsSimpleGlyph()) {
149 gfxTextRun::DetailedGlyph details
;
150 details
.mGlyphID
= g
.GetSimpleGlyph();
151 details
.mAdvance
= g
.GetSimpleAdvance();
152 details
.mXOffset
= 0;
153 details
.mYOffset
= 0;
154 glyphs
.AppendElement(details
);
161 if (g
.GetGlyphCount() > 0) {
162 glyphs
.AppendElements(aSrc
->GetDetailedGlyphs(k
), g
.GetGlyphCount());
166 if (k
+ 1 < iter
.GetStringEnd() && aCharsToMerge
[k
+ 1]) {
167 // next char is supposed to merge with current, so loop without
168 // writing current merged glyph to the destination
172 // If the start of the merge run is actually a character that should
173 // have been merged with the previous character (this can happen
174 // if there's a font change in the middle of a case-mapped character,
175 // that decomposed into a sequence of base+diacritics, for example),
176 // just discard the entire merge run. See comment at start of this
178 NS_WARN_IF_FALSE(!aCharsToMerge
[mergeRunStart
],
179 "unable to merge across a glyph run boundary, "
180 "glyph(s) discarded");
181 if (!aCharsToMerge
[mergeRunStart
]) {
183 mergedGlyph
.SetMissing(glyphs
.Length());
185 mergedGlyph
.SetComplex(mergedGlyph
.IsClusterStart(),
186 mergedGlyph
.IsLigatureGroupStart(),
189 aDest
->SetGlyphs(offset
, mergedGlyph
, glyphs
.Elements());
192 while (offset
< aDest
->GetLength() && aDeletedChars
[offset
]) {
193 aDest
->SetGlyphs(offset
++, gfxTextRun::CompressedGlyph(), nullptr);
199 mergeRunStart
= k
+ 1;
200 if (mergeRunStart
< stringEnd
) {
201 mergedGlyph
= srcGlyphs
[mergeRunStart
];
204 NS_ASSERTION(glyphs
.Length() == 0,
205 "Leftover glyphs, don't request merging of the last character with its next!");
207 NS_ASSERTION(offset
== aDest
->GetLength(), "Bad offset calculations");
210 gfxTextRunFactory::Parameters
211 GetParametersForInner(nsTransformedTextRun
* aTextRun
, uint32_t* aFlags
,
212 gfxContext
* aRefContext
)
214 gfxTextRunFactory::Parameters params
=
215 { aRefContext
, nullptr, nullptr,
216 nullptr, 0, aTextRun
->GetAppUnitsPerDevUnit()
218 *aFlags
= aTextRun
->GetFlags() & ~gfxFontGroup::TEXT_IS_PERSISTENT
;
222 // Some languages have special casing conventions that differ from the
223 // default Unicode mappings.
224 // The enum values here are named for well-known exemplar languages that
225 // exhibit the behavior in question; multiple lang tags may map to the
226 // same setting here, if the behavior is shared by other languages.
227 enum LanguageSpecificCasingBehavior
{
228 eLSCB_None
, // default non-lang-specific behavior
229 eLSCB_Dutch
, // treat "ij" digraph as a unit for capitalization
230 eLSCB_Greek
, // strip accent when uppercasing Greek vowels
231 eLSCB_Irish
, // keep prefix letters as lowercase when uppercasing Irish
232 eLSCB_Turkish
// preserve dotted/dotless-i distinction in uppercase
235 static LanguageSpecificCasingBehavior
236 GetCasingFor(const nsIAtom
* aLang
)
241 if (aLang
== nsGkAtoms::tr
||
242 aLang
== nsGkAtoms::az
||
243 aLang
== nsGkAtoms::ba
||
244 aLang
== nsGkAtoms::crh
||
245 aLang
== nsGkAtoms::tt
) {
246 return eLSCB_Turkish
;
248 if (aLang
== nsGkAtoms::nl
) {
251 if (aLang
== nsGkAtoms::el
) {
254 if (aLang
== nsGkAtoms::ga
) {
258 // Is there a region subtag we should ignore?
259 nsAtomString
langStr(const_cast<nsIAtom
*>(aLang
));
260 int index
= langStr
.FindChar('-');
262 langStr
.Truncate(index
);
263 nsCOMPtr
<nsIAtom
> truncatedLang
= do_GetAtom(langStr
);
264 return GetCasingFor(truncatedLang
);
271 nsCaseTransformTextRunFactory::TransformString(
272 const nsAString
& aString
,
273 nsString
& aConvertedString
,
275 const nsIAtom
* aLanguage
,
276 nsTArray
<bool>& aCharsToMergeArray
,
277 nsTArray
<bool>& aDeletedCharsArray
,
278 nsTransformedTextRun
* aTextRun
,
279 nsTArray
<uint8_t>* aCanBreakBeforeArray
,
280 nsTArray
<nsStyleContext
*>* aStyleArray
)
282 NS_PRECONDITION(!aTextRun
|| (aCanBreakBeforeArray
&& aStyleArray
),
283 "either none or all three optional parameters required");
285 uint32_t length
= aString
.Length();
286 const char16_t
* str
= aString
.BeginReading();
288 bool mergeNeeded
= false;
290 bool capitalizeDutchIJ
= false;
291 bool prevIsLetter
= false;
292 bool ntPrefix
= false; // true immediately after a word-initial 'n' or 't'
293 // when doing Irish lowercasing
294 uint32_t sigmaIndex
= uint32_t(-1);
295 nsIUGenCategory::nsUGenCategory cat
;
297 uint8_t style
= aAllUppercase
? NS_STYLE_TEXT_TRANSFORM_UPPERCASE
: 0;
298 const nsIAtom
* lang
= aLanguage
;
300 LanguageSpecificCasingBehavior languageSpecificCasing
= GetCasingFor(lang
);
301 mozilla::GreekCasing::State greekState
;
302 mozilla::IrishCasing::State irishState
;
303 uint32_t irishMark
= uint32_t(-1); // location of possible prefix letter(s)
305 for (uint32_t i
= 0; i
< length
; ++i
) {
306 uint32_t ch
= str
[i
];
308 nsStyleContext
* styleContext
;
310 styleContext
= aTextRun
->mStyles
[i
];
311 style
= aAllUppercase
? NS_STYLE_TEXT_TRANSFORM_UPPERCASE
:
312 styleContext
->StyleText()->mTextTransform
;
314 if (lang
!= styleContext
->StyleFont()->mLanguage
) {
315 lang
= styleContext
->StyleFont()->mLanguage
;
316 languageSpecificCasing
= GetCasingFor(lang
);
319 irishMark
= uint32_t(-1);
324 const mozilla::unicode::MultiCharMapping
*mcm
;
325 bool inhibitBreakBefore
= false; // have we just deleted preceding hyphen?
327 if (NS_IS_HIGH_SURROGATE(ch
) && i
< length
- 1 &&
328 NS_IS_LOW_SURROGATE(str
[i
+ 1])) {
329 ch
= SURROGATE_TO_UCS4(ch
, str
[i
+ 1]);
333 case NS_STYLE_TEXT_TRANSFORM_LOWERCASE
:
334 if (languageSpecificCasing
== eLSCB_Turkish
) {
336 ch
= LATIN_SMALL_LETTER_DOTLESS_I
;
338 sigmaIndex
= uint32_t(-1);
341 if (ch
== LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE
) {
344 sigmaIndex
= uint32_t(-1);
349 cat
= mozilla::unicode::GetGenCategory(ch
);
351 if (languageSpecificCasing
== eLSCB_Irish
&&
352 cat
== nsIUGenCategory::kLetter
) {
353 // See bug 1018805 for Irish lowercasing requirements
354 if (!prevIsLetter
&& (ch
== 'n' || ch
== 't')) {
357 if (ntPrefix
&& mozilla::IrishCasing::IsUpperVowel(ch
)) {
358 aConvertedString
.Append('-');
367 // Special lowercasing behavior for Greek Sigma: note that this is listed
368 // as context-sensitive in Unicode's SpecialCasing.txt, but is *not* a
369 // language-specific mapping; it applies regardless of the language of
372 // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA (i.e.
373 // the non-final form) whenever there is a following letter, or when the
374 // CAPITAL SIGMA occurs in isolation (neither preceded nor followed by a
375 // LETTER); and to FINAL SIGMA when it is preceded by another letter but
376 // not followed by one.
378 // To implement the context-sensitive nature of this mapping, we keep
379 // track of whether the previous character was a letter. If not, CAPITAL
380 // SIGMA will map directly to SMALL SIGMA. If the previous character
381 // was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we record the
382 // position in the converted string; if we then encounter another letter,
383 // that FINAL SIGMA is replaced with a standard SMALL SIGMA.
385 // If sigmaIndex is not -1, it marks where we have provisionally mapped
386 // a CAPITAL SIGMA to FINAL SIGMA; if we now find another letter, we
387 // need to change it to SMALL SIGMA.
388 if (sigmaIndex
!= uint32_t(-1)) {
389 if (cat
== nsIUGenCategory::kLetter
) {
390 aConvertedString
.SetCharAt(GREEK_SMALL_LETTER_SIGMA
, sigmaIndex
);
394 if (ch
== GREEK_CAPITAL_LETTER_SIGMA
) {
395 // If preceding char was a letter, map to FINAL instead of SMALL,
396 // and note where it occurred by setting sigmaIndex; we'll change it
397 // to standard SMALL SIGMA later if another letter follows
399 ch
= GREEK_SMALL_LETTER_FINAL_SIGMA
;
400 sigmaIndex
= aConvertedString
.Length();
402 // CAPITAL SIGMA not preceded by a letter is unconditionally mapped
404 ch
= GREEK_SMALL_LETTER_SIGMA
;
405 sigmaIndex
= uint32_t(-1);
411 // ignore diacritics for the purpose of contextual sigma mapping;
412 // otherwise, reset prevIsLetter appropriately and clear the
414 if (cat
!= nsIUGenCategory::kMark
) {
415 prevIsLetter
= (cat
== nsIUGenCategory::kLetter
);
416 sigmaIndex
= uint32_t(-1);
419 mcm
= mozilla::unicode::SpecialLower(ch
);
422 while (j
< 2 && mcm
->mMappedChars
[j
+ 1]) {
423 aConvertedString
.Append(mcm
->mMappedChars
[j
]);
427 ch
= mcm
->mMappedChars
[j
];
431 ch
= ToLowerCase(ch
);
434 case NS_STYLE_TEXT_TRANSFORM_UPPERCASE
:
435 if (languageSpecificCasing
== eLSCB_Turkish
&& ch
== 'i') {
436 ch
= LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE
;
440 if (languageSpecificCasing
== eLSCB_Greek
) {
441 ch
= mozilla::GreekCasing::UpperCase(ch
, greekState
);
445 if (languageSpecificCasing
== eLSCB_Irish
) {
448 ch
= mozilla::IrishCasing::UpperCase(ch
, irishState
, mark
, action
);
450 irishMark
= aConvertedString
.Length();
453 nsString
& str
= aConvertedString
; // shorthand
456 // lowercase a single prefix letter
457 NS_ASSERTION(str
.Length() > 0 && irishMark
< str
.Length(),
459 str
.SetCharAt(ToLowerCase(str
[irishMark
]), irishMark
);
460 irishMark
= uint32_t(-1);
463 // lowercase two prefix letters (immediately before current pos)
464 NS_ASSERTION(str
.Length() >= 2 && irishMark
== str
.Length() - 2,
466 str
.SetCharAt(ToLowerCase(str
[irishMark
]), irishMark
);
467 str
.SetCharAt(ToLowerCase(str
[irishMark
+ 1]), irishMark
+ 1);
468 irishMark
= uint32_t(-1);
471 // lowercase one prefix letter, and delete following hyphen
472 // (which must be the immediately-preceding char)
473 NS_ASSERTION(str
.Length() >= 2 && irishMark
== str
.Length() - 2,
475 str
.Replace(irishMark
, 2, ToLowerCase(str
[irishMark
]));
476 aDeletedCharsArray
[irishMark
+ 1] = true;
477 // Remove the trailing entries (corresponding to the deleted hyphen)
478 // from the auxiliary arrays.
479 aCharsToMergeArray
.SetLength(aCharsToMergeArray
.Length() - 1);
481 aStyleArray
->SetLength(aStyleArray
->Length() - 1);
482 aCanBreakBeforeArray
->SetLength(aCanBreakBeforeArray
->Length() - 1);
483 inhibitBreakBefore
= true;
486 irishMark
= uint32_t(-1);
489 // ch has been set to the uppercase for current char;
490 // No need to check for SpecialUpper here as none of the characters
491 // that could trigger an Irish casing action have special mappings.
494 // If we didn't have any special action to perform, fall through
495 // to check for special uppercase (ß)
498 mcm
= mozilla::unicode::SpecialUpper(ch
);
501 while (j
< 2 && mcm
->mMappedChars
[j
+ 1]) {
502 aConvertedString
.Append(mcm
->mMappedChars
[j
]);
506 ch
= mcm
->mMappedChars
[j
];
510 ch
= ToUpperCase(ch
);
513 case NS_STYLE_TEXT_TRANSFORM_CAPITALIZE
:
515 if (capitalizeDutchIJ
&& ch
== 'j') {
517 capitalizeDutchIJ
= false;
520 capitalizeDutchIJ
= false;
521 if (i
< aTextRun
->mCapitalize
.Length() && aTextRun
->mCapitalize
[i
]) {
522 if (languageSpecificCasing
== eLSCB_Turkish
&& ch
== 'i') {
523 ch
= LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE
;
526 if (languageSpecificCasing
== eLSCB_Dutch
&& ch
== 'i') {
528 capitalizeDutchIJ
= true;
532 mcm
= mozilla::unicode::SpecialTitle(ch
);
535 while (j
< 2 && mcm
->mMappedChars
[j
+ 1]) {
536 aConvertedString
.Append(mcm
->mMappedChars
[j
]);
540 ch
= mcm
->mMappedChars
[j
];
544 ch
= ToTitleCase(ch
);
549 case NS_STYLE_TEXT_TRANSFORM_FULLWIDTH
:
550 ch
= mozilla::unicode::GetFullWidth(ch
);
557 if (ch
== uint32_t(-1)) {
558 aDeletedCharsArray
.AppendElement(true);
561 aDeletedCharsArray
.AppendElement(false);
562 aCharsToMergeArray
.AppendElement(false);
564 aStyleArray
->AppendElement(styleContext
);
565 aCanBreakBeforeArray
->AppendElement(inhibitBreakBefore
? false :
566 aTextRun
->CanBreakLineBefore(i
));
570 aConvertedString
.Append(ch
);
572 aConvertedString
.Append(H_SURROGATE(ch
));
573 aConvertedString
.Append(L_SURROGATE(ch
));
575 aDeletedCharsArray
.AppendElement(true); // not exactly deleted, but the
576 // trailing surrogate is skipped
580 while (extraChars
-- > 0) {
582 aCharsToMergeArray
.AppendElement(true);
584 aStyleArray
->AppendElement(styleContext
);
585 aCanBreakBeforeArray
->AppendElement(false);
595 nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun
* aTextRun
,
596 gfxContext
* aRefContext
)
598 nsAutoString convertedString
;
599 nsAutoTArray
<bool,50> charsToMergeArray
;
600 nsAutoTArray
<bool,50> deletedCharsArray
;
601 nsAutoTArray
<uint8_t,50> canBreakBeforeArray
;
602 nsAutoTArray
<nsStyleContext
*,50> styleArray
;
604 bool mergeNeeded
= TransformString(aTextRun
->mString
,
611 &canBreakBeforeArray
,
615 gfxTextRunFactory::Parameters innerParams
=
616 GetParametersForInner(aTextRun
, &flags
, aRefContext
);
617 gfxFontGroup
* fontGroup
= aTextRun
->GetFontGroup();
619 nsAutoPtr
<nsTransformedTextRun
> transformedChild
;
620 nsAutoPtr
<gfxTextRun
> cachedChild
;
623 if (mInnerTransformingTextRunFactory
) {
624 transformedChild
= mInnerTransformingTextRunFactory
->MakeTextRun(
625 convertedString
.BeginReading(), convertedString
.Length(),
626 &innerParams
, fontGroup
, flags
, styleArray
.Elements(), false);
627 child
= transformedChild
.get();
629 cachedChild
= fontGroup
->MakeTextRun(
630 convertedString
.BeginReading(), convertedString
.Length(),
631 &innerParams
, flags
);
632 child
= cachedChild
.get();
636 // Copy potential linebreaks into child so they're preserved
637 // (and also child will be shaped appropriately)
638 NS_ASSERTION(convertedString
.Length() == canBreakBeforeArray
.Length(),
639 "Dropped characters or break-before values somewhere!");
640 child
->SetPotentialLineBreaks(0, canBreakBeforeArray
.Length(),
641 canBreakBeforeArray
.Elements(), aRefContext
);
642 if (transformedChild
) {
643 transformedChild
->FinishSettingProperties(aRefContext
);
647 // Now merge multiple characters into one multi-glyph character as required
648 // and deal with skipping deleted accent chars
649 NS_ASSERTION(charsToMergeArray
.Length() == child
->GetLength(),
650 "source length mismatch");
651 NS_ASSERTION(deletedCharsArray
.Length() == aTextRun
->GetLength(),
652 "destination length mismatch");
653 MergeCharactersInTextRun(aTextRun
, child
, charsToMergeArray
.Elements(),
654 deletedCharsArray
.Elements());
656 // No merging to do, so just copy; this produces a more optimized textrun.
657 // We can't steal the data because the child may be cached and stealing
658 // the data would break the cache.
659 aTextRun
->ResetGlyphRuns();
660 aTextRun
->CopyGlyphDataFrom(child
, 0, child
->GetLength(), 0);