1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "nsTextRunTransformations.h"
11 #include "GreekCasing.h"
12 #include "IrishCasing.h"
13 #include "mozilla/ComputedStyleInlines.h"
14 #include "mozilla/MemoryReporting.h"
15 #include "mozilla/TextEditor.h"
16 #include "mozilla/gfx/2D.h"
17 #include "nsGkAtoms.h"
18 #include "nsSpecialCasingData.h"
19 #include "nsStyleConsts.h"
20 #include "nsTextFrameUtils.h"
21 #include "nsUnicharUtils.h"
22 #include "nsUnicodeProperties.h"
24 using namespace mozilla
;
25 using namespace mozilla::gfx
;
27 // Unicode characters needing special casing treatment in tr/az languages
28 #define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE 0x0130
29 #define LATIN_SMALL_LETTER_DOTLESS_I 0x0131
31 // Greek sigma needs custom handling for the lowercase transform; for details
33 #define GREEK_CAPITAL_LETTER_SIGMA 0x03A3
34 #define GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2
35 #define GREEK_SMALL_LETTER_SIGMA 0x03C3
37 already_AddRefed
<nsTransformedTextRun
> nsTransformedTextRun::Create(
38 const gfxTextRunFactory::Parameters
* aParams
,
39 nsTransformingTextRunFactory
* aFactory
, gfxFontGroup
* aFontGroup
,
40 const char16_t
* aString
, uint32_t aLength
,
41 const gfx::ShapedTextFlags aFlags
, const nsTextFrameUtils::Flags aFlags2
,
42 nsTArray
<RefPtr
<nsTransformedCharStyle
>>&& aStyles
, bool aOwnsFactory
) {
43 NS_ASSERTION(!(aFlags
& gfx::ShapedTextFlags::TEXT_IS_8BIT
),
44 "didn't expect text to be marked as 8-bit here");
47 AllocateStorageForTextRun(sizeof(nsTransformedTextRun
), aLength
);
52 RefPtr
<nsTransformedTextRun
> result
= new (storage
)
53 nsTransformedTextRun(aParams
, aFactory
, aFontGroup
, aString
, aLength
,
54 aFlags
, aFlags2
, std::move(aStyles
), aOwnsFactory
);
55 return result
.forget();
58 void nsTransformedTextRun::SetCapitalization(uint32_t aStart
, uint32_t aLength
,
59 bool* aCapitalization
) {
60 if (mCapitalize
.IsEmpty()) {
61 // XXX(Bug 1631371) Check if this should use a fallible operation as it
63 mCapitalize
.AppendElements(GetLength());
64 memset(mCapitalize
.Elements(), 0, GetLength() * sizeof(bool));
66 memcpy(mCapitalize
.Elements() + aStart
, aCapitalization
,
67 aLength
* sizeof(bool));
71 bool nsTransformedTextRun::SetPotentialLineBreaks(Range aRange
,
72 const uint8_t* aBreakBefore
) {
73 bool changed
= gfxTextRun::SetPotentialLineBreaks(aRange
, aBreakBefore
);
80 size_t nsTransformedTextRun::SizeOfExcludingThis(
81 mozilla::MallocSizeOf aMallocSizeOf
) {
82 size_t total
= gfxTextRun::SizeOfExcludingThis(aMallocSizeOf
);
83 total
+= mStyles
.ShallowSizeOfExcludingThis(aMallocSizeOf
);
84 total
+= mCapitalize
.ShallowSizeOfExcludingThis(aMallocSizeOf
);
86 total
+= aMallocSizeOf(mFactory
);
91 size_t nsTransformedTextRun::SizeOfIncludingThis(
92 mozilla::MallocSizeOf aMallocSizeOf
) {
93 return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf
);
96 already_AddRefed
<nsTransformedTextRun
>
97 nsTransformingTextRunFactory::MakeTextRun(
98 const char16_t
* aString
, uint32_t aLength
,
99 const gfxTextRunFactory::Parameters
* aParams
, gfxFontGroup
* aFontGroup
,
100 gfx::ShapedTextFlags aFlags
, nsTextFrameUtils::Flags aFlags2
,
101 nsTArray
<RefPtr
<nsTransformedCharStyle
>>&& aStyles
, bool aOwnsFactory
) {
102 return nsTransformedTextRun::Create(aParams
, this, aFontGroup
, aString
,
103 aLength
, aFlags
, aFlags2
,
104 std::move(aStyles
), aOwnsFactory
);
107 already_AddRefed
<nsTransformedTextRun
>
108 nsTransformingTextRunFactory::MakeTextRun(
109 const uint8_t* aString
, uint32_t aLength
,
110 const gfxTextRunFactory::Parameters
* aParams
, gfxFontGroup
* aFontGroup
,
111 gfx::ShapedTextFlags aFlags
, nsTextFrameUtils::Flags aFlags2
,
112 nsTArray
<RefPtr
<nsTransformedCharStyle
>>&& aStyles
, bool aOwnsFactory
) {
113 // We'll only have a Unicode code path to minimize the amount of code needed
114 // for these rarely used features
115 NS_ConvertASCIItoUTF16
unicodeString(reinterpret_cast<const char*>(aString
),
117 return MakeTextRun(unicodeString
.get(), aLength
, aParams
, aFontGroup
,
118 aFlags
& ~gfx::ShapedTextFlags::TEXT_IS_8BIT
, aFlags2
,
119 std::move(aStyles
), aOwnsFactory
);
122 void MergeCharactersInTextRun(gfxTextRun
* aDest
, gfxTextRun
* aSrc
,
123 const bool* aCharsToMerge
,
124 const bool* aDeletedChars
) {
125 MOZ_ASSERT(!aDest
->TrailingGlyphRun(), "unexpected glyphRuns in aDest!");
126 gfxTextRun::GlyphRunIterator
iter(aSrc
, gfxTextRun::Range(aSrc
));
128 AutoTArray
<gfxTextRun::DetailedGlyph
, 2> glyphs
;
129 const gfxTextRun::CompressedGlyph continuationGlyph
=
130 gfxTextRun::CompressedGlyph::MakeComplex(false, false);
131 const gfxTextRun::CompressedGlyph
* srcGlyphs
= aSrc
->GetCharacterGlyphs();
132 gfxTextRun::CompressedGlyph
* destGlyphs
= aDest
->GetCharacterGlyphs();
133 while (iter
.NextRun()) {
134 const gfxTextRun::GlyphRun
* run
= iter
.GetGlyphRun();
135 aDest
->AddGlyphRun(run
->mFont
, run
->mMatchType
, offset
, false,
136 run
->mOrientation
, run
->mIsCJK
);
138 bool anyMissing
= false;
139 uint32_t mergeRunStart
= iter
.GetStringStart();
140 // Initialize to a copy of the first source glyph in the merge run.
141 gfxTextRun::CompressedGlyph mergedGlyph
= srcGlyphs
[mergeRunStart
];
142 uint32_t stringEnd
= iter
.GetStringEnd();
143 for (uint32_t k
= iter
.GetStringStart(); k
< stringEnd
; ++k
) {
144 const gfxTextRun::CompressedGlyph g
= srcGlyphs
[k
];
145 if (g
.IsSimpleGlyph()) {
147 gfxTextRun::DetailedGlyph details
;
148 details
.mGlyphID
= g
.GetSimpleGlyph();
149 details
.mAdvance
= g
.GetSimpleAdvance();
150 glyphs
.AppendElement(details
);
157 if (g
.GetGlyphCount() > 0) {
158 glyphs
.AppendElements(aSrc
->GetDetailedGlyphs(k
), g
.GetGlyphCount());
162 if (k
+ 1 < iter
.GetStringEnd() && aCharsToMerge
[k
+ 1]) {
163 // next char is supposed to merge with current, so loop without
164 // writing current merged glyph to the destination
168 // If the start of the merge run is actually a character that should
169 // have been merged with the previous character (this can happen
170 // if there's a font change in the middle of a case-mapped character,
171 // that decomposed into a sequence of base+diacritics, for example),
172 // just discard the entire merge run. See comment at start of this
174 NS_WARNING_ASSERTION(
175 !aCharsToMerge
[mergeRunStart
],
176 "unable to merge across a glyph run boundary, glyph(s) discarded");
177 if (!aCharsToMerge
[mergeRunStart
]) {
178 // Determine if we can just copy the existing simple glyph record.
179 if (mergedGlyph
.IsSimpleGlyph() && glyphs
.Length() == 1) {
180 destGlyphs
[offset
] = mergedGlyph
;
182 // Otherwise set up complex glyph record and store detailed glyphs.
183 mergedGlyph
.SetComplex(mergedGlyph
.IsClusterStart(),
184 mergedGlyph
.IsLigatureGroupStart());
185 destGlyphs
[offset
] = mergedGlyph
;
186 aDest
->SetDetailedGlyphs(offset
, glyphs
.Length(), glyphs
.Elements());
188 destGlyphs
[offset
].SetMissing();
193 while (offset
< aDest
->GetLength() && aDeletedChars
[offset
]) {
194 destGlyphs
[offset
++] = continuationGlyph
;
200 mergeRunStart
= k
+ 1;
201 if (mergeRunStart
< stringEnd
) {
202 mergedGlyph
= srcGlyphs
[mergeRunStart
];
205 NS_ASSERTION(glyphs
.Length() == 0,
206 "Leftover glyphs, don't request merging of the last character "
209 NS_ASSERTION(offset
== aDest
->GetLength(), "Bad offset calculations");
212 gfxTextRunFactory::Parameters
GetParametersForInner(
213 nsTransformedTextRun
* aTextRun
, gfx::ShapedTextFlags
* aFlags
,
214 DrawTarget
* aRefDrawTarget
) {
215 gfxTextRunFactory::Parameters params
= {
216 aRefDrawTarget
, nullptr, nullptr,
217 nullptr, 0, aTextRun
->GetAppUnitsPerDevUnit()};
218 *aFlags
= aTextRun
->GetFlags();
222 // Some languages have special casing conventions that differ from the
223 // default Unicode mappings.
224 // The enum values here are named for well-known exemplar languages that
225 // exhibit the behavior in question; multiple lang tags may map to the
226 // same setting here, if the behavior is shared by other languages.
227 enum LanguageSpecificCasingBehavior
{
228 eLSCB_None
, // default non-lang-specific behavior
229 eLSCB_Dutch
, // treat "ij" digraph as a unit for capitalization
230 eLSCB_Greek
, // strip accent when uppercasing Greek vowels
231 eLSCB_Irish
, // keep prefix letters as lowercase when uppercasing Irish
232 eLSCB_Turkish
, // preserve dotted/dotless-i distinction in uppercase
233 eLSCB_Lithuanian
// retain dot on lowercase i/j when an accent is present
236 static LanguageSpecificCasingBehavior
GetCasingFor(const nsAtom
* aLang
) {
240 if (aLang
== nsGkAtoms::tr
|| aLang
== nsGkAtoms::az
||
241 aLang
== nsGkAtoms::ba
|| aLang
== nsGkAtoms::crh
||
242 aLang
== nsGkAtoms::tt
) {
243 return eLSCB_Turkish
;
245 if (aLang
== nsGkAtoms::nl
) {
248 if (aLang
== nsGkAtoms::el
) {
251 if (aLang
== nsGkAtoms::ga
) {
254 if (aLang
== nsGkAtoms::lt_
) {
255 return eLSCB_Lithuanian
;
258 // Is there a region subtag we should ignore?
259 nsAtomString
langStr(const_cast<nsAtom
*>(aLang
));
260 int index
= langStr
.FindChar('-');
262 langStr
.Truncate(index
);
263 RefPtr
<nsAtom
> truncatedLang
= NS_Atomize(langStr
);
264 return GetCasingFor(truncatedLang
);
270 bool nsCaseTransformTextRunFactory::TransformString(
271 const nsAString
& aString
, nsString
& aConvertedString
,
272 const Maybe
<StyleTextTransform
>& aGlobalTransform
, bool aCaseTransformsOnly
,
273 const nsAtom
* aLanguage
, nsTArray
<bool>& aCharsToMergeArray
,
274 nsTArray
<bool>& aDeletedCharsArray
, const nsTransformedTextRun
* aTextRun
,
275 uint32_t aOffsetInTextRun
, nsTArray
<uint8_t>* aCanBreakBeforeArray
,
276 nsTArray
<RefPtr
<nsTransformedCharStyle
>>* aStyleArray
) {
277 bool auxiliaryOutputArrays
= aCanBreakBeforeArray
&& aStyleArray
;
278 MOZ_ASSERT(!auxiliaryOutputArrays
|| aTextRun
,
279 "text run must be provided to use aux output arrays");
281 uint32_t length
= aString
.Length();
282 const char16_t
* str
= aString
.BeginReading();
283 const char16_t kPasswordMask
= TextEditor::PasswordMask();
285 bool mergeNeeded
= false;
287 bool capitalizeDutchIJ
= false;
288 bool prevIsLetter
= false;
289 bool ntPrefix
= false; // true immediately after a word-initial 'n' or 't'
290 // when doing Irish lowercasing
291 bool seenSoftDotted
= false; // true immediately after an I or J that is
292 // converted to lowercase in Lithuanian mode
293 uint32_t sigmaIndex
= uint32_t(-1);
296 StyleTextTransform style
=
297 aGlobalTransform
.valueOr(StyleTextTransform::None());
298 bool forceNonFullWidth
= false;
299 const nsAtom
* lang
= aLanguage
;
301 LanguageSpecificCasingBehavior languageSpecificCasing
= GetCasingFor(lang
);
302 mozilla::GreekCasing::State greekState
;
303 mozilla::IrishCasing::State irishState
;
304 uint32_t irishMark
= uint32_t(-1); // location of possible prefix letter(s)
305 // in the output string
306 uint32_t irishMarkSrc
= uint32_t(-1); // corresponding location in source
307 // string (may differ from output due
308 // to expansions like eszet -> 'SS')
309 uint32_t greekMark
= uint32_t(-1); // location of uppercase ETA that may need
310 // tonos added (if it is disjunctive eta)
311 const char16_t kGreekUpperEta
= 0x0397;
313 for (uint32_t i
= 0; i
< length
; ++i
, ++aOffsetInTextRun
) {
314 uint32_t ch
= str
[i
];
316 RefPtr
<nsTransformedCharStyle
> charStyle
;
318 charStyle
= aTextRun
->mStyles
[aOffsetInTextRun
];
319 style
= aGlobalTransform
.valueOr(charStyle
->mTextTransform
);
320 forceNonFullWidth
= charStyle
->mForceNonFullWidth
;
323 charStyle
->mExplicitLanguage
? charStyle
->mLanguage
.get() : nullptr;
324 if (lang
!= newLang
) {
326 languageSpecificCasing
= GetCasingFor(lang
);
329 irishMark
= uint32_t(-1);
330 irishMarkSrc
= uint32_t(-1);
331 greekMark
= uint32_t(-1);
335 bool maskPassword
= charStyle
&& charStyle
->mMaskPassword
;
337 const mozilla::unicode::MultiCharMapping
* mcm
;
338 bool inhibitBreakBefore
= false; // have we just deleted preceding hyphen?
340 if (i
< length
- 1 && NS_IS_SURROGATE_PAIR(ch
, str
[i
+ 1])) {
341 ch
= SURROGATE_TO_UCS4(ch
, str
[i
+ 1]);
344 // Skip case transform if we're masking current character.
346 switch (style
.case_
) {
347 case StyleTextTransformCase::None
:
350 case StyleTextTransformCase::Lowercase
:
351 if (languageSpecificCasing
== eLSCB_Turkish
) {
353 ch
= LATIN_SMALL_LETTER_DOTLESS_I
;
355 sigmaIndex
= uint32_t(-1);
358 if (ch
== LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE
) {
361 sigmaIndex
= uint32_t(-1);
366 if (languageSpecificCasing
== eLSCB_Lithuanian
) {
368 /* From SpecialCasing.txt:
369 * # Introduce an explicit dot above when lowercasing capital I's and J's
370 * # whenever there are more accents above.
371 * # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
373 * 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
374 * 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
375 * 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
376 * 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
377 * 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
378 * 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
381 if (ch
== 'I' || ch
== 'J' || ch
== 0x012E) {
382 ch
= ToLowerCase(ch
);
384 seenSoftDotted
= true;
385 sigmaIndex
= uint32_t(-1);
389 aConvertedString
.Append('i');
390 aConvertedString
.Append(0x0307);
394 seenSoftDotted
= false;
395 sigmaIndex
= uint32_t(-1);
399 aConvertedString
.Append('i');
400 aConvertedString
.Append(0x0307);
404 seenSoftDotted
= false;
405 sigmaIndex
= uint32_t(-1);
409 aConvertedString
.Append('i');
410 aConvertedString
.Append(0x0307);
414 seenSoftDotted
= false;
415 sigmaIndex
= uint32_t(-1);
420 cat
= mozilla::unicode::GetGenCategory(ch
);
422 if (languageSpecificCasing
== eLSCB_Irish
&&
423 cat
== nsUGenCategory::kLetter
) {
424 // See bug 1018805 for Irish lowercasing requirements
425 if (!prevIsLetter
&& (ch
== 'n' || ch
== 't')) {
428 if (ntPrefix
&& mozilla::IrishCasing::IsUpperVowel(ch
)) {
429 aConvertedString
.Append('-');
438 if (seenSoftDotted
&& cat
== nsUGenCategory::kMark
) {
439 // The seenSoftDotted flag will only be set in Lithuanian mode.
440 if (ch
== 0x0300 || ch
== 0x0301 || ch
== 0x0303) {
441 aConvertedString
.Append(0x0307);
445 seenSoftDotted
= false;
447 // Special lowercasing behavior for Greek Sigma: note that this is
448 // listed as context-sensitive in Unicode's SpecialCasing.txt, but is
449 // *not* a language-specific mapping; it applies regardless of the
450 // language of the element.
452 // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA
453 // (i.e. the non-final form) whenever there is a following letter, or
454 // when the CAPITAL SIGMA occurs in isolation (neither preceded nor
455 // followed by a LETTER); and to FINAL SIGMA when it is preceded by
456 // another letter but not followed by one.
458 // To implement the context-sensitive nature of this mapping, we keep
459 // track of whether the previous character was a letter. If not,
460 // CAPITAL SIGMA will map directly to SMALL SIGMA. If the previous
461 // character was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we
462 // record the position in the converted string; if we then encounter
463 // another letter, that FINAL SIGMA is replaced with a standard
466 // If sigmaIndex is not -1, it marks where we have provisionally
467 // mapped a CAPITAL SIGMA to FINAL SIGMA; if we now find another
468 // letter, we need to change it to SMALL SIGMA.
469 if (sigmaIndex
!= uint32_t(-1)) {
470 if (cat
== nsUGenCategory::kLetter
) {
471 aConvertedString
.SetCharAt(GREEK_SMALL_LETTER_SIGMA
, sigmaIndex
);
475 if (ch
== GREEK_CAPITAL_LETTER_SIGMA
) {
476 // If preceding char was a letter, map to FINAL instead of SMALL,
477 // and note where it occurred by setting sigmaIndex; we'll change
478 // it to standard SMALL SIGMA later if another letter follows
480 ch
= GREEK_SMALL_LETTER_FINAL_SIGMA
;
481 sigmaIndex
= aConvertedString
.Length();
483 // CAPITAL SIGMA not preceded by a letter is unconditionally
484 // mapped to SMALL SIGMA
485 ch
= GREEK_SMALL_LETTER_SIGMA
;
486 sigmaIndex
= uint32_t(-1);
492 // ignore diacritics for the purpose of contextual sigma mapping;
493 // otherwise, reset prevIsLetter appropriately and clear the
495 if (cat
!= nsUGenCategory::kMark
) {
496 prevIsLetter
= (cat
== nsUGenCategory::kLetter
);
497 sigmaIndex
= uint32_t(-1);
500 mcm
= mozilla::unicode::SpecialLower(ch
);
503 while (j
< 2 && mcm
->mMappedChars
[j
+ 1]) {
504 aConvertedString
.Append(mcm
->mMappedChars
[j
]);
508 ch
= mcm
->mMappedChars
[j
];
512 ch
= ToLowerCase(ch
);
515 case StyleTextTransformCase::Uppercase
:
516 if (languageSpecificCasing
== eLSCB_Turkish
&& ch
== 'i') {
517 ch
= LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE
;
521 if (languageSpecificCasing
== eLSCB_Greek
) {
524 ch
= mozilla::GreekCasing::UpperCase(ch
, greekState
, markEta
,
527 greekMark
= aConvertedString
.Length();
528 } else if (updateEta
) {
529 // Remove the TONOS from an uppercase ETA-TONOS that turned out
530 // not to be disjunctive-eta.
531 MOZ_ASSERT(aConvertedString
.Length() > 0 &&
532 greekMark
< aConvertedString
.Length(),
534 aConvertedString
.SetCharAt(kGreekUpperEta
, greekMark
);
535 greekMark
= uint32_t(-1);
540 if (languageSpecificCasing
== eLSCB_Lithuanian
) {
542 * # Remove DOT ABOVE after "i" with upper or titlecase
544 * 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
546 if (ch
== 'i' || ch
== 'j' || ch
== 0x012F) {
547 seenSoftDotted
= true;
548 ch
= ToTitleCase(ch
);
551 if (seenSoftDotted
) {
552 seenSoftDotted
= false;
560 if (languageSpecificCasing
== eLSCB_Irish
) {
563 ch
= mozilla::IrishCasing::UpperCase(ch
, irishState
, mark
, action
);
565 irishMark
= aConvertedString
.Length();
569 nsString
& str
= aConvertedString
; // shorthand
572 // lowercase a single prefix letter
573 NS_ASSERTION(str
.Length() > 0 && irishMark
< str
.Length(),
575 str
.SetCharAt(ToLowerCase(str
[irishMark
]), irishMark
);
576 irishMark
= uint32_t(-1);
577 irishMarkSrc
= uint32_t(-1);
580 // lowercase two prefix letters (immediately before current
583 str
.Length() >= 2 && irishMark
== str
.Length() - 2,
585 str
.SetCharAt(ToLowerCase(str
[irishMark
]), irishMark
);
586 str
.SetCharAt(ToLowerCase(str
[irishMark
+ 1]), irishMark
+ 1);
587 irishMark
= uint32_t(-1);
588 irishMarkSrc
= uint32_t(-1);
591 // lowercase one prefix letter, and delete following hyphen
592 // (which must be the immediately-preceding char)
594 str
.Length() >= 2 && irishMark
== str
.Length() - 2,
597 irishMark
!= uint32_t(-1) && irishMarkSrc
!= uint32_t(-1),
598 "failed to set irishMarks");
599 str
.Replace(irishMark
, 2, ToLowerCase(str
[irishMark
]));
600 aDeletedCharsArray
[irishMarkSrc
+ 1] = true;
601 // Remove the trailing entries (corresponding to the deleted
602 // hyphen) from the auxiliary arrays.
603 aCharsToMergeArray
.SetLength(aCharsToMergeArray
.Length() - 1);
604 if (auxiliaryOutputArrays
) {
605 aStyleArray
->SetLength(aStyleArray
->Length() - 1);
606 aCanBreakBeforeArray
->SetLength(
607 aCanBreakBeforeArray
->Length() - 1);
608 inhibitBreakBefore
= true;
611 irishMark
= uint32_t(-1);
612 irishMarkSrc
= uint32_t(-1);
615 // ch has been set to the uppercase for current char;
616 // No need to check for SpecialUpper here as none of the
617 // characters that could trigger an Irish casing action have
621 // If we didn't have any special action to perform, fall through
622 // to check for special uppercase (ß)
625 mcm
= mozilla::unicode::SpecialUpper(ch
);
628 while (j
< 2 && mcm
->mMappedChars
[j
+ 1]) {
629 aConvertedString
.Append(mcm
->mMappedChars
[j
]);
633 ch
= mcm
->mMappedChars
[j
];
637 // Bug 1476304: we exclude Georgian letters U+10D0..10FF because of
638 // lack of widespread font support for the corresponding Mtavruli
639 // characters at this time (July 2018).
640 // This condition is to be removed once the major platforms ship with
641 // fonts that support U+1C90..1CBF.
642 if (ch
< 0x10D0 || ch
> 0x10FF) {
643 ch
= ToUpperCase(ch
);
647 case StyleTextTransformCase::Capitalize
:
649 if (capitalizeDutchIJ
&& ch
== 'j') {
651 capitalizeDutchIJ
= false;
654 capitalizeDutchIJ
= false;
655 if (aOffsetInTextRun
< aTextRun
->mCapitalize
.Length() &&
656 aTextRun
->mCapitalize
[aOffsetInTextRun
]) {
657 if (languageSpecificCasing
== eLSCB_Turkish
&& ch
== 'i') {
658 ch
= LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE
;
661 if (languageSpecificCasing
== eLSCB_Dutch
&& ch
== 'i') {
663 capitalizeDutchIJ
= true;
666 if (languageSpecificCasing
== eLSCB_Lithuanian
) {
668 * # Remove DOT ABOVE after "i" with upper or titlecase
670 * 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
672 if (ch
== 'i' || ch
== 'j' || ch
== 0x012F) {
673 seenSoftDotted
= true;
674 ch
= ToTitleCase(ch
);
677 if (seenSoftDotted
) {
678 seenSoftDotted
= false;
686 mcm
= mozilla::unicode::SpecialTitle(ch
);
689 while (j
< 2 && mcm
->mMappedChars
[j
+ 1]) {
690 aConvertedString
.Append(mcm
->mMappedChars
[j
]);
694 ch
= mcm
->mMappedChars
[j
];
698 ch
= ToTitleCase(ch
);
704 MOZ_ASSERT_UNREACHABLE("all cases should be handled");
708 if (!aCaseTransformsOnly
) {
709 if (!forceNonFullWidth
&&
710 (style
.other_
& StyleTextTransformOther::FULL_WIDTH
)) {
711 ch
= mozilla::unicode::GetFullWidth(ch
);
714 if (style
.other_
& StyleTextTransformOther::FULL_SIZE_KANA
) {
716 static const uint16_t kSmallKanas
[] = {
718 0x3041, 0x3043, 0x3045, 0x3047, 0x3049, 0x3063, 0x3083, 0x3085, 0x3087,
720 0x308E, 0x3095, 0x3096,
722 0x30A1, 0x30A3, 0x30A5, 0x30A7, 0x30A9, 0x30C3, 0x30E3, 0x30E5, 0x30E7,
724 0x30EE, 0x30F5, 0x30F6, 0x31F0, 0x31F1, 0x31F2, 0x31F3, 0x31F4, 0x31F5,
726 0x31F6, 0x31F7, 0x31F8, 0x31F9, 0x31FA, 0x31FB, 0x31FC, 0x31FD, 0x31FE,
730 0xFF67, 0xFF68, 0xFF69, 0xFF6A, 0xFF6B, 0xFF6C, 0xFF6D, 0xFF6E, 0xFF6F};
731 static const uint16_t kFullSizeKanas
[] = {
733 0x3042, 0x3044, 0x3046, 0x3048, 0x304A, 0x3064, 0x3084, 0x3086, 0x3088,
735 0x308F, 0x304B, 0x3051,
737 0x30A2, 0x30A4, 0x30A6, 0x30A8, 0x30AA, 0x30C4, 0x30E4, 0x30E6, 0x30E8,
739 0x30EF, 0x30AB, 0x30B1, 0x30AF, 0x30B7, 0x30B9, 0x30C8, 0x30CC, 0x30CF,
741 0x30D2, 0x30D5, 0x30D8, 0x30DB, 0x30E0, 0x30E9, 0x30EA, 0x30EB, 0x30EC,
745 0xFF71, 0xFF72, 0xFF73, 0xFF74, 0xFF75, 0xFF94, 0xFF95, 0xFF96, 0xFF82};
749 const uint16_t len
= MOZ_ARRAY_LENGTH(kSmallKanas
);
750 if (mozilla::BinarySearch(kSmallKanas
, 0, len
, ch
, &index
)) {
751 ch
= kFullSizeKanas
[index
];
756 if (forceNonFullWidth
) {
757 ch
= mozilla::unicode::GetFullWidthInverse(ch
);
761 if (ch
== uint32_t(-1)) {
762 aDeletedCharsArray
.AppendElement(true);
765 aDeletedCharsArray
.AppendElement(false);
766 aCharsToMergeArray
.AppendElement(false);
767 if (auxiliaryOutputArrays
) {
768 aStyleArray
->AppendElement(charStyle
);
769 aCanBreakBeforeArray
->AppendElement(
771 ? gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE
772 : aTextRun
->CanBreakBefore(aOffsetInTextRun
));
776 aConvertedString
.Append(maskPassword
? kPasswordMask
: ch
);
779 aConvertedString
.Append(kPasswordMask
);
780 // TODO: We should show a password mask for a surrogate pair later.
781 aConvertedString
.Append(kPasswordMask
);
783 aConvertedString
.Append(H_SURROGATE(ch
));
784 aConvertedString
.Append(L_SURROGATE(ch
));
789 // Skip the trailing surrogate.
790 aDeletedCharsArray
.AppendElement(true);
793 while (extraChars
-- > 0) {
795 aCharsToMergeArray
.AppendElement(true);
796 if (auxiliaryOutputArrays
) {
797 aStyleArray
->AppendElement(charStyle
);
798 aCanBreakBeforeArray
->AppendElement(
799 gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE
);
808 void nsCaseTransformTextRunFactory::RebuildTextRun(
809 nsTransformedTextRun
* aTextRun
, DrawTarget
* aRefDrawTarget
,
810 gfxMissingFontRecorder
* aMFR
) {
811 nsAutoString convertedString
;
812 AutoTArray
<bool, 50> charsToMergeArray
;
813 AutoTArray
<bool, 50> deletedCharsArray
;
814 AutoTArray
<uint8_t, 50> canBreakBeforeArray
;
815 AutoTArray
<RefPtr
<nsTransformedCharStyle
>, 50> styleArray
;
817 auto globalTransform
=
819 ? Some(StyleTextTransform
{StyleTextTransformCase::Uppercase
, {}})
821 bool mergeNeeded
= TransformString(
822 aTextRun
->mString
, convertedString
, globalTransform
,
823 /* aCaseTransformsOnly = */ false, nullptr, charsToMergeArray
,
824 deletedCharsArray
, aTextRun
, 0, &canBreakBeforeArray
, &styleArray
);
826 gfx::ShapedTextFlags flags
;
827 gfxTextRunFactory::Parameters innerParams
=
828 GetParametersForInner(aTextRun
, &flags
, aRefDrawTarget
);
829 gfxFontGroup
* fontGroup
= aTextRun
->GetFontGroup();
831 RefPtr
<nsTransformedTextRun
> transformedChild
;
832 RefPtr
<gfxTextRun
> cachedChild
;
835 if (mInnerTransformingTextRunFactory
) {
836 transformedChild
= mInnerTransformingTextRunFactory
->MakeTextRun(
837 convertedString
.BeginReading(), convertedString
.Length(), &innerParams
,
838 fontGroup
, flags
, nsTextFrameUtils::Flags(), std::move(styleArray
),
840 child
= transformedChild
.get();
842 cachedChild
= fontGroup
->MakeTextRun(
843 convertedString
.BeginReading(), convertedString
.Length(), &innerParams
,
844 flags
, nsTextFrameUtils::Flags(), aMFR
);
845 child
= cachedChild
.get();
850 // Copy potential linebreaks into child so they're preserved
851 // (and also child will be shaped appropriately)
852 NS_ASSERTION(convertedString
.Length() == canBreakBeforeArray
.Length(),
853 "Dropped characters or break-before values somewhere!");
854 gfxTextRun::Range
range(0, uint32_t(canBreakBeforeArray
.Length()));
855 child
->SetPotentialLineBreaks(range
, canBreakBeforeArray
.Elements());
856 if (transformedChild
) {
857 transformedChild
->FinishSettingProperties(aRefDrawTarget
, aMFR
);
860 aTextRun
->ResetGlyphRuns();
862 // Now merge multiple characters into one multi-glyph character as required
863 // and deal with skipping deleted accent chars
864 NS_ASSERTION(charsToMergeArray
.Length() == child
->GetLength(),
865 "source length mismatch");
866 NS_ASSERTION(deletedCharsArray
.Length() == aTextRun
->GetLength(),
867 "destination length mismatch");
868 MergeCharactersInTextRun(aTextRun
, child
, charsToMergeArray
.Elements(),
869 deletedCharsArray
.Elements());
871 // No merging to do, so just copy; this produces a more optimized textrun.
872 // We can't steal the data because the child may be cached and stealing
873 // the data would break the cache.
874 aTextRun
->CopyGlyphDataFrom(child
, gfxTextRun::Range(child
), 0);