1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "nsTextRunTransformations.h"
11 #include "GreekCasing.h"
12 #include "IrishCasing.h"
13 #include "MathMLTextRunFactory.h"
14 #include "mozilla/ComputedStyleInlines.h"
15 #include "mozilla/MemoryReporting.h"
16 #include "mozilla/StaticPrefs_layout.h"
17 #include "mozilla/StaticPrefs_mathml.h"
18 #include "mozilla/TextEditor.h"
19 #include "mozilla/gfx/2D.h"
20 #include "nsGkAtoms.h"
21 #include "nsSpecialCasingData.h"
22 #include "nsStyleConsts.h"
23 #include "nsTextFrameUtils.h"
24 #include "nsUnicharUtils.h"
25 #include "nsUnicodeProperties.h"
27 using namespace mozilla
;
28 using namespace mozilla::gfx
;
30 // Unicode characters needing special casing treatment in tr/az languages
31 #define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE 0x0130
32 #define LATIN_SMALL_LETTER_DOTLESS_I 0x0131
34 // Greek sigma needs custom handling for the lowercase transform; for details
36 #define GREEK_CAPITAL_LETTER_SIGMA 0x03A3
37 #define GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2
38 #define GREEK_SMALL_LETTER_SIGMA 0x03C3
40 already_AddRefed
<nsTransformedTextRun
> nsTransformedTextRun::Create(
41 const gfxTextRunFactory::Parameters
* aParams
,
42 nsTransformingTextRunFactory
* aFactory
, gfxFontGroup
* aFontGroup
,
43 const char16_t
* aString
, uint32_t aLength
,
44 const gfx::ShapedTextFlags aFlags
, const nsTextFrameUtils::Flags aFlags2
,
45 nsTArray
<RefPtr
<nsTransformedCharStyle
>>&& aStyles
, bool aOwnsFactory
) {
46 NS_ASSERTION(!(aFlags
& gfx::ShapedTextFlags::TEXT_IS_8BIT
),
47 "didn't expect text to be marked as 8-bit here");
50 AllocateStorageForTextRun(sizeof(nsTransformedTextRun
), aLength
);
55 RefPtr
<nsTransformedTextRun
> result
= new (storage
)
56 nsTransformedTextRun(aParams
, aFactory
, aFontGroup
, aString
, aLength
,
57 aFlags
, aFlags2
, std::move(aStyles
), aOwnsFactory
);
58 return result
.forget();
61 void nsTransformedTextRun::SetCapitalization(uint32_t aStart
, uint32_t aLength
,
62 bool* aCapitalization
) {
63 if (mCapitalize
.IsEmpty()) {
64 // XXX(Bug 1631371) Check if this should use a fallible operation as it
66 mCapitalize
.AppendElements(GetLength());
67 memset(mCapitalize
.Elements(), 0, GetLength() * sizeof(bool));
69 memcpy(mCapitalize
.Elements() + aStart
, aCapitalization
,
70 aLength
* sizeof(bool));
74 bool nsTransformedTextRun::SetPotentialLineBreaks(Range aRange
,
75 const uint8_t* aBreakBefore
) {
76 bool changed
= gfxTextRun::SetPotentialLineBreaks(aRange
, aBreakBefore
);
83 size_t nsTransformedTextRun::SizeOfExcludingThis(
84 mozilla::MallocSizeOf aMallocSizeOf
) {
85 size_t total
= gfxTextRun::SizeOfExcludingThis(aMallocSizeOf
);
86 total
+= mStyles
.ShallowSizeOfExcludingThis(aMallocSizeOf
);
87 total
+= mCapitalize
.ShallowSizeOfExcludingThis(aMallocSizeOf
);
89 total
+= aMallocSizeOf(mFactory
);
94 size_t nsTransformedTextRun::SizeOfIncludingThis(
95 mozilla::MallocSizeOf aMallocSizeOf
) {
96 return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf
);
99 already_AddRefed
<nsTransformedTextRun
>
100 nsTransformingTextRunFactory::MakeTextRun(
101 const char16_t
* aString
, uint32_t aLength
,
102 const gfxTextRunFactory::Parameters
* aParams
, gfxFontGroup
* aFontGroup
,
103 gfx::ShapedTextFlags aFlags
, nsTextFrameUtils::Flags aFlags2
,
104 nsTArray
<RefPtr
<nsTransformedCharStyle
>>&& aStyles
, bool aOwnsFactory
) {
105 return nsTransformedTextRun::Create(aParams
, this, aFontGroup
, aString
,
106 aLength
, aFlags
, aFlags2
,
107 std::move(aStyles
), aOwnsFactory
);
110 already_AddRefed
<nsTransformedTextRun
>
111 nsTransformingTextRunFactory::MakeTextRun(
112 const uint8_t* aString
, uint32_t aLength
,
113 const gfxTextRunFactory::Parameters
* aParams
, gfxFontGroup
* aFontGroup
,
114 gfx::ShapedTextFlags aFlags
, nsTextFrameUtils::Flags aFlags2
,
115 nsTArray
<RefPtr
<nsTransformedCharStyle
>>&& aStyles
, bool aOwnsFactory
) {
116 // We'll only have a Unicode code path to minimize the amount of code needed
117 // for these rarely used features
118 NS_ConvertASCIItoUTF16
unicodeString(reinterpret_cast<const char*>(aString
),
120 return MakeTextRun(unicodeString
.get(), aLength
, aParams
, aFontGroup
,
121 aFlags
& ~gfx::ShapedTextFlags::TEXT_IS_8BIT
, aFlags2
,
122 std::move(aStyles
), aOwnsFactory
);
125 void MergeCharactersInTextRun(gfxTextRun
* aDest
, gfxTextRun
* aSrc
,
126 const bool* aCharsToMerge
,
127 const bool* aDeletedChars
) {
128 MOZ_ASSERT(!aDest
->TrailingGlyphRun(), "unexpected glyphRuns in aDest!");
130 AutoTArray
<gfxTextRun::DetailedGlyph
, 2> glyphs
;
131 const gfxTextRun::CompressedGlyph continuationGlyph
=
132 gfxTextRun::CompressedGlyph::MakeComplex(false, false);
133 const gfxTextRun::CompressedGlyph
* srcGlyphs
= aSrc
->GetCharacterGlyphs();
134 gfxTextRun::CompressedGlyph
* destGlyphs
= aDest
->GetCharacterGlyphs();
135 for (gfxTextRun::GlyphRunIterator
iter(aSrc
, gfxTextRun::Range(aSrc
));
136 !iter
.AtEnd(); iter
.NextRun()) {
137 const gfxTextRun::GlyphRun
* run
= iter
.GlyphRun();
138 aDest
->AddGlyphRun(run
->mFont
, run
->mMatchType
, offset
, false,
139 run
->mOrientation
, run
->mIsCJK
);
141 bool anyMissing
= false;
142 uint32_t mergeRunStart
= iter
.StringStart();
143 // Initialize to a copy of the first source glyph in the merge run.
144 gfxTextRun::CompressedGlyph mergedGlyph
= srcGlyphs
[mergeRunStart
];
145 uint32_t stringEnd
= iter
.StringEnd();
146 for (uint32_t k
= iter
.StringStart(); k
< stringEnd
; ++k
) {
147 const gfxTextRun::CompressedGlyph g
= srcGlyphs
[k
];
148 if (g
.IsSimpleGlyph()) {
150 gfxTextRun::DetailedGlyph details
;
151 details
.mGlyphID
= g
.GetSimpleGlyph();
152 details
.mAdvance
= g
.GetSimpleAdvance();
153 glyphs
.AppendElement(details
);
160 if (g
.GetGlyphCount() > 0) {
161 glyphs
.AppendElements(aSrc
->GetDetailedGlyphs(k
), g
.GetGlyphCount());
165 if (k
+ 1 < iter
.StringEnd() && aCharsToMerge
[k
+ 1]) {
166 // next char is supposed to merge with current, so loop without
167 // writing current merged glyph to the destination
171 // If the start of the merge run is actually a character that should
172 // have been merged with the previous character (this can happen
173 // if there's a font change in the middle of a case-mapped character,
174 // that decomposed into a sequence of base+diacritics, for example),
175 // just discard the entire merge run. See comment at start of this
177 NS_WARNING_ASSERTION(
178 !aCharsToMerge
[mergeRunStart
],
179 "unable to merge across a glyph run boundary, glyph(s) discarded");
180 if (!aCharsToMerge
[mergeRunStart
]) {
181 // Determine if we can just copy the existing simple glyph record.
182 if (mergedGlyph
.IsSimpleGlyph() && glyphs
.Length() == 1) {
183 destGlyphs
[offset
] = mergedGlyph
;
185 // Otherwise set up complex glyph record and store detailed glyphs.
186 mergedGlyph
.SetComplex(mergedGlyph
.IsClusterStart(),
187 mergedGlyph
.IsLigatureGroupStart());
188 destGlyphs
[offset
] = mergedGlyph
;
189 aDest
->SetDetailedGlyphs(offset
, glyphs
.Length(), glyphs
.Elements());
191 destGlyphs
[offset
].SetMissing();
196 while (offset
< aDest
->GetLength() && aDeletedChars
[offset
]) {
197 destGlyphs
[offset
++] = continuationGlyph
;
203 mergeRunStart
= k
+ 1;
204 if (mergeRunStart
< stringEnd
) {
205 mergedGlyph
= srcGlyphs
[mergeRunStart
];
208 NS_ASSERTION(glyphs
.Length() == 0,
209 "Leftover glyphs, don't request merging of the last character "
212 NS_ASSERTION(offset
== aDest
->GetLength(), "Bad offset calculations");
215 gfxTextRunFactory::Parameters
GetParametersForInner(
216 nsTransformedTextRun
* aTextRun
, gfx::ShapedTextFlags
* aFlags
,
217 DrawTarget
* aRefDrawTarget
) {
218 gfxTextRunFactory::Parameters params
= {
219 aRefDrawTarget
, nullptr, nullptr,
220 nullptr, 0, aTextRun
->GetAppUnitsPerDevUnit()};
221 *aFlags
= aTextRun
->GetFlags();
225 // Some languages have special casing conventions that differ from the
226 // default Unicode mappings.
227 // The enum values here are named for well-known exemplar languages that
228 // exhibit the behavior in question; multiple lang tags may map to the
229 // same setting here, if the behavior is shared by other languages.
230 enum LanguageSpecificCasingBehavior
{
231 eLSCB_None
, // default non-lang-specific behavior
232 eLSCB_Dutch
, // treat "ij" digraph as a unit for capitalization
233 eLSCB_Greek
, // strip accent when uppercasing Greek vowels
234 eLSCB_Irish
, // keep prefix letters as lowercase when uppercasing Irish
235 eLSCB_Turkish
, // preserve dotted/dotless-i distinction in uppercase
236 eLSCB_Lithuanian
// retain dot on lowercase i/j when an accent is present
239 static LanguageSpecificCasingBehavior
GetCasingFor(const nsAtom
* aLang
) {
243 if (aLang
== nsGkAtoms::tr
|| aLang
== nsGkAtoms::az
||
244 aLang
== nsGkAtoms::ba
|| aLang
== nsGkAtoms::crh
||
245 aLang
== nsGkAtoms::tt
) {
246 return eLSCB_Turkish
;
248 if (aLang
== nsGkAtoms::nl
) {
251 if (aLang
== nsGkAtoms::el
) {
254 if (aLang
== nsGkAtoms::ga
) {
257 if (aLang
== nsGkAtoms::lt_
) {
258 return eLSCB_Lithuanian
;
261 // Is there a region subtag we should ignore?
262 nsAtomString
langStr(const_cast<nsAtom
*>(aLang
));
263 int index
= langStr
.FindChar('-');
265 langStr
.Truncate(index
);
266 RefPtr
<nsAtom
> truncatedLang
= NS_Atomize(langStr
);
267 return GetCasingFor(truncatedLang
);
273 bool nsCaseTransformTextRunFactory::TransformString(
274 const nsAString
& aString
, nsString
& aConvertedString
,
275 const Maybe
<StyleTextTransform
>& aGlobalTransform
, char16_t aMaskChar
,
276 bool aCaseTransformsOnly
, const nsAtom
* aLanguage
,
277 nsTArray
<bool>& aCharsToMergeArray
, nsTArray
<bool>& aDeletedCharsArray
,
278 const nsTransformedTextRun
* aTextRun
, uint32_t aOffsetInTextRun
,
279 nsTArray
<uint8_t>* aCanBreakBeforeArray
,
280 nsTArray
<RefPtr
<nsTransformedCharStyle
>>* aStyleArray
) {
281 bool auxiliaryOutputArrays
= aCanBreakBeforeArray
&& aStyleArray
;
282 MOZ_ASSERT(!auxiliaryOutputArrays
|| aTextRun
,
283 "text run must be provided to use aux output arrays");
285 uint32_t length
= aString
.Length();
286 const char16_t
* str
= aString
.BeginReading();
287 // If an unconditional mask character was passed, we'll use it; if not, any
288 // masking called for by the textrun styles will use TextEditor's mask char.
289 const char16_t mask
= aMaskChar
? aMaskChar
: TextEditor::PasswordMask();
291 bool mergeNeeded
= false;
293 bool capitalizeDutchIJ
= false;
294 bool prevIsLetter
= false;
295 bool ntPrefix
= false; // true immediately after a word-initial 'n' or 't'
296 // when doing Irish lowercasing
297 bool seenSoftDotted
= false; // true immediately after an I or J that is
298 // converted to lowercase in Lithuanian mode
299 uint32_t sigmaIndex
= uint32_t(-1);
302 StyleTextTransform style
=
303 aGlobalTransform
.valueOr(StyleTextTransform::None());
304 bool forceNonFullWidth
= false;
305 const nsAtom
* lang
= aLanguage
;
307 LanguageSpecificCasingBehavior languageSpecificCasing
= GetCasingFor(lang
);
308 mozilla::GreekCasing::State greekState
;
309 mozilla::IrishCasing::State irishState
;
310 uint32_t irishMark
= uint32_t(-1); // location of possible prefix letter(s)
311 // in the output string
312 uint32_t irishMarkSrc
= uint32_t(-1); // corresponding location in source
313 // string (may differ from output due
314 // to expansions like eszet -> 'SS')
315 uint32_t greekMark
= uint32_t(-1); // location of uppercase ETA that may need
316 // tonos added (if it is disjunctive eta)
317 const char16_t kGreekUpperEta
= 0x0397;
319 for (uint32_t i
= 0; i
< length
; ++i
, ++aOffsetInTextRun
) {
320 uint32_t ch
= str
[i
];
322 RefPtr
<nsTransformedCharStyle
> charStyle
;
324 charStyle
= aTextRun
->mStyles
[aOffsetInTextRun
];
325 style
= aGlobalTransform
.valueOr(charStyle
->mTextTransform
);
326 forceNonFullWidth
= charStyle
->mForceNonFullWidth
;
329 charStyle
->mExplicitLanguage
? charStyle
->mLanguage
.get() : nullptr;
330 if (lang
!= newLang
) {
332 languageSpecificCasing
= GetCasingFor(lang
);
335 irishMark
= uint32_t(-1);
336 irishMarkSrc
= uint32_t(-1);
337 greekMark
= uint32_t(-1);
341 // These should be mutually exclusive: mMaskPassword is set if we are
342 // handling <input type=password>, where the TextEditor code controls
343 // masking and we use its PasswordMask() character, in which case
344 // aMaskChar (from -webkit-text-security) is not used.
345 MOZ_ASSERT_IF(aMaskChar
, !(charStyle
&& charStyle
->mMaskPassword
));
347 bool maskPassword
= (charStyle
&& charStyle
->mMaskPassword
) || aMaskChar
;
349 const mozilla::unicode::MultiCharMapping
* mcm
;
350 bool inhibitBreakBefore
= false; // have we just deleted preceding hyphen?
352 if (i
< length
- 1 && NS_IS_SURROGATE_PAIR(ch
, str
[i
+ 1])) {
353 ch
= SURROGATE_TO_UCS4(ch
, str
[i
+ 1]);
355 const uint32_t originalCh
= ch
;
357 // Skip case transform if we're masking current character.
359 switch (style
.case_
) {
360 case StyleTextTransformCase::None
:
363 case StyleTextTransformCase::Lowercase
:
364 if (languageSpecificCasing
== eLSCB_Turkish
) {
366 ch
= LATIN_SMALL_LETTER_DOTLESS_I
;
368 sigmaIndex
= uint32_t(-1);
371 if (ch
== LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE
) {
374 sigmaIndex
= uint32_t(-1);
379 if (languageSpecificCasing
== eLSCB_Lithuanian
) {
381 /* From SpecialCasing.txt:
382 * # Introduce an explicit dot above when lowercasing capital I's and J's
383 * # whenever there are more accents above.
384 * # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
386 * 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
387 * 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
388 * 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
389 * 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
390 * 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
391 * 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
394 if (ch
== 'I' || ch
== 'J' || ch
== 0x012E) {
395 ch
= ToLowerCase(ch
);
397 seenSoftDotted
= true;
398 sigmaIndex
= uint32_t(-1);
402 aConvertedString
.Append('i');
403 aConvertedString
.Append(0x0307);
407 seenSoftDotted
= false;
408 sigmaIndex
= uint32_t(-1);
412 aConvertedString
.Append('i');
413 aConvertedString
.Append(0x0307);
417 seenSoftDotted
= false;
418 sigmaIndex
= uint32_t(-1);
422 aConvertedString
.Append('i');
423 aConvertedString
.Append(0x0307);
427 seenSoftDotted
= false;
428 sigmaIndex
= uint32_t(-1);
433 cat
= mozilla::unicode::GetGenCategory(ch
);
435 if (languageSpecificCasing
== eLSCB_Irish
&&
436 cat
== nsUGenCategory::kLetter
) {
437 // See bug 1018805 for Irish lowercasing requirements
438 if (!prevIsLetter
&& (ch
== 'n' || ch
== 't')) {
441 if (ntPrefix
&& mozilla::IrishCasing::IsUpperVowel(ch
)) {
442 aConvertedString
.Append('-');
451 if (seenSoftDotted
&& cat
== nsUGenCategory::kMark
) {
452 // The seenSoftDotted flag will only be set in Lithuanian mode.
453 if (ch
== 0x0300 || ch
== 0x0301 || ch
== 0x0303) {
454 aConvertedString
.Append(0x0307);
458 seenSoftDotted
= false;
460 // Special lowercasing behavior for Greek Sigma: note that this is
461 // listed as context-sensitive in Unicode's SpecialCasing.txt, but is
462 // *not* a language-specific mapping; it applies regardless of the
463 // language of the element.
465 // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA
466 // (i.e. the non-final form) whenever there is a following letter, or
467 // when the CAPITAL SIGMA occurs in isolation (neither preceded nor
468 // followed by a LETTER); and to FINAL SIGMA when it is preceded by
469 // another letter but not followed by one.
471 // To implement the context-sensitive nature of this mapping, we keep
472 // track of whether the previous character was a letter. If not,
473 // CAPITAL SIGMA will map directly to SMALL SIGMA. If the previous
474 // character was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we
475 // record the position in the converted string; if we then encounter
476 // another letter, that FINAL SIGMA is replaced with a standard
479 // If sigmaIndex is not -1, it marks where we have provisionally
480 // mapped a CAPITAL SIGMA to FINAL SIGMA; if we now find another
481 // letter, we need to change it to SMALL SIGMA.
482 if (sigmaIndex
!= uint32_t(-1)) {
483 if (cat
== nsUGenCategory::kLetter
) {
484 aConvertedString
.SetCharAt(GREEK_SMALL_LETTER_SIGMA
, sigmaIndex
);
488 if (ch
== GREEK_CAPITAL_LETTER_SIGMA
) {
489 // If preceding char was a letter, map to FINAL instead of SMALL,
490 // and note where it occurred by setting sigmaIndex; we'll change
491 // it to standard SMALL SIGMA later if another letter follows
493 ch
= GREEK_SMALL_LETTER_FINAL_SIGMA
;
494 sigmaIndex
= aConvertedString
.Length();
496 // CAPITAL SIGMA not preceded by a letter is unconditionally
497 // mapped to SMALL SIGMA
498 ch
= GREEK_SMALL_LETTER_SIGMA
;
499 sigmaIndex
= uint32_t(-1);
505 // ignore diacritics for the purpose of contextual sigma mapping;
506 // otherwise, reset prevIsLetter appropriately and clear the
508 if (cat
!= nsUGenCategory::kMark
) {
509 prevIsLetter
= (cat
== nsUGenCategory::kLetter
);
510 sigmaIndex
= uint32_t(-1);
513 mcm
= mozilla::unicode::SpecialLower(ch
);
516 while (j
< 2 && mcm
->mMappedChars
[j
+ 1]) {
517 aConvertedString
.Append(mcm
->mMappedChars
[j
]);
521 ch
= mcm
->mMappedChars
[j
];
525 ch
= ToLowerCase(ch
);
528 case StyleTextTransformCase::Uppercase
:
529 if (languageSpecificCasing
== eLSCB_Turkish
&& ch
== 'i') {
530 ch
= LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE
;
534 if (languageSpecificCasing
== eLSCB_Greek
) {
537 ch
= mozilla::GreekCasing::UpperCase(ch
, greekState
, markEta
,
540 greekMark
= aConvertedString
.Length();
541 } else if (updateEta
) {
542 // Remove the TONOS from an uppercase ETA-TONOS that turned out
543 // not to be disjunctive-eta.
544 MOZ_ASSERT(aConvertedString
.Length() > 0 &&
545 greekMark
< aConvertedString
.Length(),
547 aConvertedString
.SetCharAt(kGreekUpperEta
, greekMark
);
548 greekMark
= uint32_t(-1);
553 if (languageSpecificCasing
== eLSCB_Lithuanian
) {
555 * # Remove DOT ABOVE after "i" with upper or titlecase
557 * 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
559 if (ch
== 'i' || ch
== 'j' || ch
== 0x012F) {
560 seenSoftDotted
= true;
561 ch
= ToTitleCase(ch
);
564 if (seenSoftDotted
) {
565 seenSoftDotted
= false;
573 if (languageSpecificCasing
== eLSCB_Irish
) {
576 ch
= mozilla::IrishCasing::UpperCase(ch
, irishState
, mark
, action
);
578 irishMark
= aConvertedString
.Length();
582 nsString
& str
= aConvertedString
; // shorthand
585 // lowercase a single prefix letter
586 MOZ_ASSERT(str
.Length() > 0 && irishMark
< str
.Length(),
588 str
.SetCharAt(ToLowerCase(str
[irishMark
]), irishMark
);
589 irishMark
= uint32_t(-1);
590 irishMarkSrc
= uint32_t(-1);
593 // lowercase two prefix letters (immediately before current
595 MOZ_ASSERT(str
.Length() >= 2 && irishMark
== str
.Length() - 2,
597 str
.SetCharAt(ToLowerCase(str
[irishMark
]), irishMark
);
598 str
.SetCharAt(ToLowerCase(str
[irishMark
+ 1]), irishMark
+ 1);
599 irishMark
= uint32_t(-1);
600 irishMarkSrc
= uint32_t(-1);
603 // lowercase one prefix letter, and delete following hyphen
604 // (which must be the immediately-preceding char)
605 MOZ_ASSERT(str
.Length() >= 2 && irishMark
== str
.Length() - 2,
608 irishMark
!= uint32_t(-1) && irishMarkSrc
!= uint32_t(-1),
609 "failed to set irishMarks");
610 str
.Replace(irishMark
, 2, ToLowerCase(str
[irishMark
]));
611 aDeletedCharsArray
[irishMarkSrc
+ 1] = true;
612 // Remove the trailing entries (corresponding to the deleted
613 // hyphen) from the auxiliary arrays.
614 uint32_t len
= aCharsToMergeArray
.Length();
615 MOZ_ASSERT(len
>= 2);
616 aCharsToMergeArray
.TruncateLength(len
- 1);
617 if (auxiliaryOutputArrays
) {
618 MOZ_ASSERT(aStyleArray
->Length() == len
);
619 MOZ_ASSERT(aCanBreakBeforeArray
->Length() == len
);
620 aStyleArray
->TruncateLength(len
- 1);
621 aCanBreakBeforeArray
->TruncateLength(len
- 1);
622 inhibitBreakBefore
= true;
625 irishMark
= uint32_t(-1);
626 irishMarkSrc
= uint32_t(-1);
629 // ch has been set to the uppercase for current char;
630 // No need to check for SpecialUpper here as none of the
631 // characters that could trigger an Irish casing action have
635 // If we didn't have any special action to perform, fall through
636 // to check for special uppercase (ß)
639 // Updated mapping for German eszett, not currently reflected in the
640 // Unicode data files. This is behind a pref, as it may not work well
641 // with many (esp. older) fonts.
644 layout_css_text_transform_uppercase_eszett_enabled()) {
649 mcm
= mozilla::unicode::SpecialUpper(ch
);
652 while (j
< 2 && mcm
->mMappedChars
[j
+ 1]) {
653 aConvertedString
.Append(mcm
->mMappedChars
[j
]);
657 ch
= mcm
->mMappedChars
[j
];
661 // Bug 1476304: we exclude Georgian letters U+10D0..10FF because of
662 // lack of widespread font support for the corresponding Mtavruli
663 // characters at this time (July 2018).
664 // This condition is to be removed once the major platforms ship with
665 // fonts that support U+1C90..1CBF.
666 if (ch
< 0x10D0 || ch
> 0x10FF) {
667 ch
= ToUpperCase(ch
);
671 case StyleTextTransformCase::Capitalize
:
673 if (capitalizeDutchIJ
&& ch
== 'j') {
675 capitalizeDutchIJ
= false;
678 capitalizeDutchIJ
= false;
679 if (aOffsetInTextRun
< aTextRun
->mCapitalize
.Length() &&
680 aTextRun
->mCapitalize
[aOffsetInTextRun
]) {
681 if (languageSpecificCasing
== eLSCB_Turkish
&& ch
== 'i') {
682 ch
= LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE
;
685 if (languageSpecificCasing
== eLSCB_Dutch
&& ch
== 'i') {
687 capitalizeDutchIJ
= true;
690 if (languageSpecificCasing
== eLSCB_Lithuanian
) {
692 * # Remove DOT ABOVE after "i" with upper or titlecase
694 * 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
696 if (ch
== 'i' || ch
== 'j' || ch
== 0x012F) {
697 seenSoftDotted
= true;
698 ch
= ToTitleCase(ch
);
701 if (seenSoftDotted
) {
702 seenSoftDotted
= false;
710 mcm
= mozilla::unicode::SpecialTitle(ch
);
713 while (j
< 2 && mcm
->mMappedChars
[j
+ 1]) {
714 aConvertedString
.Append(mcm
->mMappedChars
[j
]);
718 ch
= mcm
->mMappedChars
[j
];
722 ch
= ToTitleCase(ch
);
727 case StyleTextTransformCase::MathAuto
:
728 // text-transform: math-auto is used for automatic italicization of
729 // single-char <mi> elements. However, some legacy cases (italic style
730 // fallback and <mi> with leading/trailing whitespace) are still
731 // handled in MathMLTextRunFactory.
734 MathMLTextRunFactory::MathVariant(ch
, StyleMathVariant::Italic
);
735 if (StaticPrefs::mathml_mathvariant_styling_fallback_disabled()) {
737 } else if (ch2
!= ch
) {
738 // Bug 930504. Some platforms do not have fonts for Mathematical
739 // Alphanumeric Symbols. Hence we only perform the transform if a
740 // character is actually available.
741 FontMatchType matchType
;
742 RefPtr
<gfxFont
> mathFont
=
743 aTextRun
->GetFontGroup()->FindFontForChar(
744 ch2
, 0, 0, intl::Script::COMMON
, nullptr, &matchType
);
753 MOZ_ASSERT_UNREACHABLE("all cases should be handled");
757 if (!aCaseTransformsOnly
) {
758 if (!forceNonFullWidth
&&
759 (style
.other_
& StyleTextTransformOther::FULL_WIDTH
)) {
760 ch
= mozilla::unicode::GetFullWidth(ch
);
763 if (style
.other_
& StyleTextTransformOther::FULL_SIZE_KANA
) {
765 static const uint32_t kSmallKanas
[] = {
767 0x3041, 0x3043, 0x3045, 0x3047, 0x3049, 0x3063, 0x3083, 0x3085, 0x3087,
769 0x308E, 0x3095, 0x3096,
771 0x30A1, 0x30A3, 0x30A5, 0x30A7, 0x30A9, 0x30C3, 0x30E3, 0x30E5, 0x30E7,
773 0x30EE, 0x30F5, 0x30F6, 0x31F0, 0x31F1, 0x31F2, 0x31F3, 0x31F4, 0x31F5,
775 0x31F6, 0x31F7, 0x31F8, 0x31F9, 0x31FA, 0x31FB, 0x31FC, 0x31FD, 0x31FE,
779 0xFF67, 0xFF68, 0xFF69, 0xFF6A, 0xFF6B, 0xFF6C, 0xFF6D, 0xFF6E, 0xFF6F,
781 0x1B132, 0x1B150, 0x1B151, 0x1B152, 0x1B155, 0x1B164, 0x1B165, 0x1B166,
784 static const uint16_t kFullSizeKanas
[] = {
786 0x3042, 0x3044, 0x3046, 0x3048, 0x304A, 0x3064, 0x3084, 0x3086, 0x3088,
788 0x308F, 0x304B, 0x3051,
790 0x30A2, 0x30A4, 0x30A6, 0x30A8, 0x30AA, 0x30C4, 0x30E4, 0x30E6, 0x30E8,
792 0x30EF, 0x30AB, 0x30B1, 0x30AF, 0x30B7, 0x30B9, 0x30C8, 0x30CC, 0x30CF,
794 0x30D2, 0x30D5, 0x30D8, 0x30DB, 0x30E0, 0x30E9, 0x30EA, 0x30EB, 0x30EC,
798 0xFF71, 0xFF72, 0xFF73, 0xFF74, 0xFF75, 0xFF94, 0xFF95, 0xFF96, 0xFF82,
800 0x3053, 0x3090, 0x3091, 0x3092, 0x30B3, 0x30F0, 0x30F1, 0x30F2, 0x30F3};
804 const uint16_t len
= MOZ_ARRAY_LENGTH(kSmallKanas
);
805 if (mozilla::BinarySearch(kSmallKanas
, 0, len
, ch
, &index
)) {
806 ch
= kFullSizeKanas
[index
];
811 if (forceNonFullWidth
) {
812 ch
= mozilla::unicode::GetFullWidthInverse(ch
);
816 if (ch
== uint32_t(-1)) {
817 aDeletedCharsArray
.AppendElement(true);
820 aDeletedCharsArray
.AppendElement(false);
821 aCharsToMergeArray
.AppendElement(false);
822 if (auxiliaryOutputArrays
) {
823 aStyleArray
->AppendElement(charStyle
);
824 aCanBreakBeforeArray
->AppendElement(
826 ? gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE
827 : aTextRun
->CanBreakBefore(aOffsetInTextRun
));
831 aConvertedString
.Append(maskPassword
? mask
: ch
);
834 aConvertedString
.Append(mask
);
835 // TODO: We should show a password mask for a surrogate pair later.
836 aConvertedString
.Append(mask
);
838 aConvertedString
.Append(H_SURROGATE(ch
));
839 aConvertedString
.Append(L_SURROGATE(ch
));
843 if (!IS_IN_BMP(originalCh
)) {
844 // Skip the trailing surrogate.
847 aDeletedCharsArray
.AppendElement(true);
850 while (extraChars
-- > 0) {
852 aCharsToMergeArray
.AppendElement(true);
853 if (auxiliaryOutputArrays
) {
854 aStyleArray
->AppendElement(charStyle
);
855 aCanBreakBeforeArray
->AppendElement(
856 gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE
);
862 // These output arrays, if present, must always have matching lengths:
863 if (auxiliaryOutputArrays
) {
864 DebugOnly
<uint32_t> len
= aCharsToMergeArray
.Length();
865 MOZ_ASSERT(aStyleArray
->Length() == len
);
866 MOZ_ASSERT(aCanBreakBeforeArray
->Length() == len
);
872 void nsCaseTransformTextRunFactory::RebuildTextRun(
873 nsTransformedTextRun
* aTextRun
, DrawTarget
* aRefDrawTarget
,
874 gfxMissingFontRecorder
* aMFR
) {
875 nsAutoString convertedString
;
876 AutoTArray
<bool, 50> charsToMergeArray
;
877 AutoTArray
<bool, 50> deletedCharsArray
;
878 AutoTArray
<uint8_t, 50> canBreakBeforeArray
;
879 AutoTArray
<RefPtr
<nsTransformedCharStyle
>, 50> styleArray
;
881 auto globalTransform
=
883 ? Some(StyleTextTransform
{StyleTextTransformCase::Uppercase
, {}})
885 bool mergeNeeded
= TransformString(
886 aTextRun
->mString
, convertedString
, globalTransform
, mMaskChar
,
887 /* aCaseTransformsOnly = */ false, nullptr, charsToMergeArray
,
888 deletedCharsArray
, aTextRun
, 0, &canBreakBeforeArray
, &styleArray
);
890 gfx::ShapedTextFlags flags
;
891 gfxTextRunFactory::Parameters innerParams
=
892 GetParametersForInner(aTextRun
, &flags
, aRefDrawTarget
);
893 gfxFontGroup
* fontGroup
= aTextRun
->GetFontGroup();
895 RefPtr
<nsTransformedTextRun
> transformedChild
;
896 RefPtr
<gfxTextRun
> cachedChild
;
899 if (mInnerTransformingTextRunFactory
) {
900 transformedChild
= mInnerTransformingTextRunFactory
->MakeTextRun(
901 convertedString
.BeginReading(), convertedString
.Length(), &innerParams
,
902 fontGroup
, flags
, nsTextFrameUtils::Flags(), std::move(styleArray
),
904 child
= transformedChild
.get();
906 cachedChild
= fontGroup
->MakeTextRun(
907 convertedString
.BeginReading(), convertedString
.Length(), &innerParams
,
908 flags
, nsTextFrameUtils::Flags(), aMFR
);
909 child
= cachedChild
.get();
914 // Copy potential linebreaks into child so they're preserved
915 // (and also child will be shaped appropriately)
916 NS_ASSERTION(convertedString
.Length() == canBreakBeforeArray
.Length(),
917 "Dropped characters or break-before values somewhere!");
918 gfxTextRun::Range
range(0, uint32_t(canBreakBeforeArray
.Length()));
919 child
->SetPotentialLineBreaks(range
, canBreakBeforeArray
.Elements());
920 if (transformedChild
) {
921 transformedChild
->FinishSettingProperties(aRefDrawTarget
, aMFR
);
924 aTextRun
->ResetGlyphRuns();
926 // Now merge multiple characters into one multi-glyph character as required
927 // and deal with skipping deleted accent chars
928 NS_ASSERTION(charsToMergeArray
.Length() == child
->GetLength(),
929 "source length mismatch");
930 NS_ASSERTION(deletedCharsArray
.Length() == aTextRun
->GetLength(),
931 "destination length mismatch");
932 MergeCharactersInTextRun(aTextRun
, child
, charsToMergeArray
.Elements(),
933 deletedCharsArray
.Elements());
935 // No merging to do, so just copy; this produces a more optimized textrun.
936 // We can't steal the data because the child may be cached and stealing
937 // the data would break the cache.
938 aTextRun
->CopyGlyphDataFrom(child
, gfxTextRun::Range(child
), 0);