Backed out changeset 496886cb30a5 (bug 1867152) for bc failures on browser_user_input...
[gecko.git] / layout / generic / nsTextRunTransformations.cpp
blobd18a7ec2936c652f4631a16978717f2f0a2516fe
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "nsTextRunTransformations.h"
9 #include <utility>
11 #include "GreekCasing.h"
12 #include "IrishCasing.h"
13 #include "MathMLTextRunFactory.h"
14 #include "mozilla/ComputedStyleInlines.h"
15 #include "mozilla/MemoryReporting.h"
16 #include "mozilla/StaticPrefs_layout.h"
17 #include "mozilla/StaticPrefs_mathml.h"
18 #include "mozilla/TextEditor.h"
19 #include "mozilla/gfx/2D.h"
20 #include "nsGkAtoms.h"
21 #include "nsSpecialCasingData.h"
22 #include "nsStyleConsts.h"
23 #include "nsTextFrameUtils.h"
24 #include "nsUnicharUtils.h"
25 #include "nsUnicodeProperties.h"
27 using namespace mozilla;
28 using namespace mozilla::gfx;
30 // Unicode characters needing special casing treatment in tr/az languages
31 #define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE 0x0130
32 #define LATIN_SMALL_LETTER_DOTLESS_I 0x0131
34 // Greek sigma needs custom handling for the lowercase transform; for details
35 // see bug 740120.
36 #define GREEK_CAPITAL_LETTER_SIGMA 0x03A3
37 #define GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2
38 #define GREEK_SMALL_LETTER_SIGMA 0x03C3
40 already_AddRefed<nsTransformedTextRun> nsTransformedTextRun::Create(
41 const gfxTextRunFactory::Parameters* aParams,
42 nsTransformingTextRunFactory* aFactory, gfxFontGroup* aFontGroup,
43 const char16_t* aString, uint32_t aLength,
44 const gfx::ShapedTextFlags aFlags, const nsTextFrameUtils::Flags aFlags2,
45 nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
46 NS_ASSERTION(!(aFlags & gfx::ShapedTextFlags::TEXT_IS_8BIT),
47 "didn't expect text to be marked as 8-bit here");
49 void* storage =
50 AllocateStorageForTextRun(sizeof(nsTransformedTextRun), aLength);
51 if (!storage) {
52 return nullptr;
55 RefPtr<nsTransformedTextRun> result = new (storage)
56 nsTransformedTextRun(aParams, aFactory, aFontGroup, aString, aLength,
57 aFlags, aFlags2, std::move(aStyles), aOwnsFactory);
58 return result.forget();
61 void nsTransformedTextRun::SetCapitalization(uint32_t aStart, uint32_t aLength,
62 bool* aCapitalization) {
63 if (mCapitalize.IsEmpty()) {
64 // XXX(Bug 1631371) Check if this should use a fallible operation as it
65 // pretended earlier.
66 mCapitalize.AppendElements(GetLength());
67 memset(mCapitalize.Elements(), 0, GetLength() * sizeof(bool));
69 memcpy(mCapitalize.Elements() + aStart, aCapitalization,
70 aLength * sizeof(bool));
71 mNeedsRebuild = true;
74 bool nsTransformedTextRun::SetPotentialLineBreaks(Range aRange,
75 const uint8_t* aBreakBefore) {
76 bool changed = gfxTextRun::SetPotentialLineBreaks(aRange, aBreakBefore);
77 if (changed) {
78 mNeedsRebuild = true;
80 return changed;
83 size_t nsTransformedTextRun::SizeOfExcludingThis(
84 mozilla::MallocSizeOf aMallocSizeOf) {
85 size_t total = gfxTextRun::SizeOfExcludingThis(aMallocSizeOf);
86 total += mStyles.ShallowSizeOfExcludingThis(aMallocSizeOf);
87 total += mCapitalize.ShallowSizeOfExcludingThis(aMallocSizeOf);
88 if (mOwnsFactory) {
89 total += aMallocSizeOf(mFactory);
91 return total;
94 size_t nsTransformedTextRun::SizeOfIncludingThis(
95 mozilla::MallocSizeOf aMallocSizeOf) {
96 return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
99 already_AddRefed<nsTransformedTextRun>
100 nsTransformingTextRunFactory::MakeTextRun(
101 const char16_t* aString, uint32_t aLength,
102 const gfxTextRunFactory::Parameters* aParams, gfxFontGroup* aFontGroup,
103 gfx::ShapedTextFlags aFlags, nsTextFrameUtils::Flags aFlags2,
104 nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
105 return nsTransformedTextRun::Create(aParams, this, aFontGroup, aString,
106 aLength, aFlags, aFlags2,
107 std::move(aStyles), aOwnsFactory);
110 already_AddRefed<nsTransformedTextRun>
111 nsTransformingTextRunFactory::MakeTextRun(
112 const uint8_t* aString, uint32_t aLength,
113 const gfxTextRunFactory::Parameters* aParams, gfxFontGroup* aFontGroup,
114 gfx::ShapedTextFlags aFlags, nsTextFrameUtils::Flags aFlags2,
115 nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
116 // We'll only have a Unicode code path to minimize the amount of code needed
117 // for these rarely used features
118 NS_ConvertASCIItoUTF16 unicodeString(reinterpret_cast<const char*>(aString),
119 aLength);
120 return MakeTextRun(unicodeString.get(), aLength, aParams, aFontGroup,
121 aFlags & ~gfx::ShapedTextFlags::TEXT_IS_8BIT, aFlags2,
122 std::move(aStyles), aOwnsFactory);
125 void MergeCharactersInTextRun(gfxTextRun* aDest, gfxTextRun* aSrc,
126 const bool* aCharsToMerge,
127 const bool* aDeletedChars) {
128 MOZ_ASSERT(!aDest->TrailingGlyphRun(), "unexpected glyphRuns in aDest!");
129 uint32_t offset = 0;
130 AutoTArray<gfxTextRun::DetailedGlyph, 2> glyphs;
131 const gfxTextRun::CompressedGlyph continuationGlyph =
132 gfxTextRun::CompressedGlyph::MakeComplex(false, false);
133 const gfxTextRun::CompressedGlyph* srcGlyphs = aSrc->GetCharacterGlyphs();
134 gfxTextRun::CompressedGlyph* destGlyphs = aDest->GetCharacterGlyphs();
135 for (gfxTextRun::GlyphRunIterator iter(aSrc, gfxTextRun::Range(aSrc));
136 !iter.AtEnd(); iter.NextRun()) {
137 const gfxTextRun::GlyphRun* run = iter.GlyphRun();
138 aDest->AddGlyphRun(run->mFont, run->mMatchType, offset, false,
139 run->mOrientation, run->mIsCJK);
141 bool anyMissing = false;
142 uint32_t mergeRunStart = iter.StringStart();
143 // Initialize to a copy of the first source glyph in the merge run.
144 gfxTextRun::CompressedGlyph mergedGlyph = srcGlyphs[mergeRunStart];
145 uint32_t stringEnd = iter.StringEnd();
146 for (uint32_t k = iter.StringStart(); k < stringEnd; ++k) {
147 const gfxTextRun::CompressedGlyph g = srcGlyphs[k];
148 if (g.IsSimpleGlyph()) {
149 if (!anyMissing) {
150 gfxTextRun::DetailedGlyph details;
151 details.mGlyphID = g.GetSimpleGlyph();
152 details.mAdvance = g.GetSimpleAdvance();
153 glyphs.AppendElement(details);
155 } else {
156 if (g.IsMissing()) {
157 anyMissing = true;
158 glyphs.Clear();
160 if (g.GetGlyphCount() > 0) {
161 glyphs.AppendElements(aSrc->GetDetailedGlyphs(k), g.GetGlyphCount());
165 if (k + 1 < iter.StringEnd() && aCharsToMerge[k + 1]) {
166 // next char is supposed to merge with current, so loop without
167 // writing current merged glyph to the destination
168 continue;
171 // If the start of the merge run is actually a character that should
172 // have been merged with the previous character (this can happen
173 // if there's a font change in the middle of a case-mapped character,
174 // that decomposed into a sequence of base+diacritics, for example),
175 // just discard the entire merge run. See comment at start of this
176 // function.
177 NS_WARNING_ASSERTION(
178 !aCharsToMerge[mergeRunStart],
179 "unable to merge across a glyph run boundary, glyph(s) discarded");
180 if (!aCharsToMerge[mergeRunStart]) {
181 // Determine if we can just copy the existing simple glyph record.
182 if (mergedGlyph.IsSimpleGlyph() && glyphs.Length() == 1) {
183 destGlyphs[offset] = mergedGlyph;
184 } else {
185 // Otherwise set up complex glyph record and store detailed glyphs.
186 mergedGlyph.SetComplex(mergedGlyph.IsClusterStart(),
187 mergedGlyph.IsLigatureGroupStart());
188 destGlyphs[offset] = mergedGlyph;
189 aDest->SetDetailedGlyphs(offset, glyphs.Length(), glyphs.Elements());
190 if (anyMissing) {
191 destGlyphs[offset].SetMissing();
194 offset++;
196 while (offset < aDest->GetLength() && aDeletedChars[offset]) {
197 destGlyphs[offset++] = continuationGlyph;
201 glyphs.Clear();
202 anyMissing = false;
203 mergeRunStart = k + 1;
204 if (mergeRunStart < stringEnd) {
205 mergedGlyph = srcGlyphs[mergeRunStart];
208 NS_ASSERTION(glyphs.Length() == 0,
209 "Leftover glyphs, don't request merging of the last character "
210 "with its next!");
212 NS_ASSERTION(offset == aDest->GetLength(), "Bad offset calculations");
215 gfxTextRunFactory::Parameters GetParametersForInner(
216 nsTransformedTextRun* aTextRun, gfx::ShapedTextFlags* aFlags,
217 DrawTarget* aRefDrawTarget) {
218 gfxTextRunFactory::Parameters params = {
219 aRefDrawTarget, nullptr, nullptr,
220 nullptr, 0, aTextRun->GetAppUnitsPerDevUnit()};
221 *aFlags = aTextRun->GetFlags();
222 return params;
225 // Some languages have special casing conventions that differ from the
226 // default Unicode mappings.
227 // The enum values here are named for well-known exemplar languages that
228 // exhibit the behavior in question; multiple lang tags may map to the
229 // same setting here, if the behavior is shared by other languages.
230 enum LanguageSpecificCasingBehavior {
231 eLSCB_None, // default non-lang-specific behavior
232 eLSCB_Dutch, // treat "ij" digraph as a unit for capitalization
233 eLSCB_Greek, // strip accent when uppercasing Greek vowels
234 eLSCB_Irish, // keep prefix letters as lowercase when uppercasing Irish
235 eLSCB_Turkish, // preserve dotted/dotless-i distinction in uppercase
236 eLSCB_Lithuanian // retain dot on lowercase i/j when an accent is present
239 static LanguageSpecificCasingBehavior GetCasingFor(const nsAtom* aLang) {
240 if (!aLang) {
241 return eLSCB_None;
243 if (aLang == nsGkAtoms::tr || aLang == nsGkAtoms::az ||
244 aLang == nsGkAtoms::ba || aLang == nsGkAtoms::crh ||
245 aLang == nsGkAtoms::tt) {
246 return eLSCB_Turkish;
248 if (aLang == nsGkAtoms::nl) {
249 return eLSCB_Dutch;
251 if (aLang == nsGkAtoms::el) {
252 return eLSCB_Greek;
254 if (aLang == nsGkAtoms::ga) {
255 return eLSCB_Irish;
257 if (aLang == nsGkAtoms::lt_) {
258 return eLSCB_Lithuanian;
261 // Is there a region subtag we should ignore?
262 nsAtomString langStr(const_cast<nsAtom*>(aLang));
263 int index = langStr.FindChar('-');
264 if (index > 0) {
265 langStr.Truncate(index);
266 RefPtr<nsAtom> truncatedLang = NS_Atomize(langStr);
267 return GetCasingFor(truncatedLang);
270 return eLSCB_None;
273 bool nsCaseTransformTextRunFactory::TransformString(
274 const nsAString& aString, nsString& aConvertedString,
275 const Maybe<StyleTextTransform>& aGlobalTransform, char16_t aMaskChar,
276 bool aCaseTransformsOnly, const nsAtom* aLanguage,
277 nsTArray<bool>& aCharsToMergeArray, nsTArray<bool>& aDeletedCharsArray,
278 const nsTransformedTextRun* aTextRun, uint32_t aOffsetInTextRun,
279 nsTArray<uint8_t>* aCanBreakBeforeArray,
280 nsTArray<RefPtr<nsTransformedCharStyle>>* aStyleArray) {
281 bool auxiliaryOutputArrays = aCanBreakBeforeArray && aStyleArray;
282 MOZ_ASSERT(!auxiliaryOutputArrays || aTextRun,
283 "text run must be provided to use aux output arrays");
285 uint32_t length = aString.Length();
286 const char16_t* str = aString.BeginReading();
287 // If an unconditional mask character was passed, we'll use it; if not, any
288 // masking called for by the textrun styles will use TextEditor's mask char.
289 const char16_t mask = aMaskChar ? aMaskChar : TextEditor::PasswordMask();
291 bool mergeNeeded = false;
293 bool capitalizeDutchIJ = false;
294 bool prevIsLetter = false;
295 bool ntPrefix = false; // true immediately after a word-initial 'n' or 't'
296 // when doing Irish lowercasing
297 bool seenSoftDotted = false; // true immediately after an I or J that is
298 // converted to lowercase in Lithuanian mode
299 uint32_t sigmaIndex = uint32_t(-1);
300 nsUGenCategory cat;
302 StyleTextTransform style =
303 aGlobalTransform.valueOr(StyleTextTransform::None());
304 bool forceNonFullWidth = false;
305 const nsAtom* lang = aLanguage;
307 LanguageSpecificCasingBehavior languageSpecificCasing = GetCasingFor(lang);
308 mozilla::GreekCasing::State greekState;
309 mozilla::IrishCasing::State irishState;
310 uint32_t irishMark = uint32_t(-1); // location of possible prefix letter(s)
311 // in the output string
312 uint32_t irishMarkSrc = uint32_t(-1); // corresponding location in source
313 // string (may differ from output due
314 // to expansions like eszet -> 'SS')
315 uint32_t greekMark = uint32_t(-1); // location of uppercase ETA that may need
316 // tonos added (if it is disjunctive eta)
317 const char16_t kGreekUpperEta = 0x0397;
319 for (uint32_t i = 0; i < length; ++i, ++aOffsetInTextRun) {
320 uint32_t ch = str[i];
322 RefPtr<nsTransformedCharStyle> charStyle;
323 if (aTextRun) {
324 charStyle = aTextRun->mStyles[aOffsetInTextRun];
325 style = aGlobalTransform.valueOr(charStyle->mTextTransform);
326 forceNonFullWidth = charStyle->mForceNonFullWidth;
328 nsAtom* newLang =
329 charStyle->mExplicitLanguage ? charStyle->mLanguage.get() : nullptr;
330 if (lang != newLang) {
331 lang = newLang;
332 languageSpecificCasing = GetCasingFor(lang);
333 greekState.Reset();
334 irishState.Reset();
335 irishMark = uint32_t(-1);
336 irishMarkSrc = uint32_t(-1);
337 greekMark = uint32_t(-1);
341 // These should be mutually exclusive: mMaskPassword is set if we are
342 // handling <input type=password>, where the TextEditor code controls
343 // masking and we use its PasswordMask() character, in which case
344 // aMaskChar (from -webkit-text-security) is not used.
345 MOZ_ASSERT_IF(aMaskChar, !(charStyle && charStyle->mMaskPassword));
347 bool maskPassword = (charStyle && charStyle->mMaskPassword) || aMaskChar;
348 int extraChars = 0;
349 const mozilla::unicode::MultiCharMapping* mcm;
350 bool inhibitBreakBefore = false; // have we just deleted preceding hyphen?
352 if (i < length - 1 && NS_IS_SURROGATE_PAIR(ch, str[i + 1])) {
353 ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
355 const uint32_t originalCh = ch;
357 // Skip case transform if we're masking current character.
358 if (!maskPassword) {
359 switch (style.case_) {
360 case StyleTextTransformCase::None:
361 break;
363 case StyleTextTransformCase::Lowercase:
364 if (languageSpecificCasing == eLSCB_Turkish) {
365 if (ch == 'I') {
366 ch = LATIN_SMALL_LETTER_DOTLESS_I;
367 prevIsLetter = true;
368 sigmaIndex = uint32_t(-1);
369 break;
371 if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
372 ch = 'i';
373 prevIsLetter = true;
374 sigmaIndex = uint32_t(-1);
375 break;
379 if (languageSpecificCasing == eLSCB_Lithuanian) {
380 // clang-format off
381 /* From SpecialCasing.txt:
382 * # Introduce an explicit dot above when lowercasing capital I's and J's
383 * # whenever there are more accents above.
384 * # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
386 * 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
387 * 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
388 * 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
389 * 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
390 * 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
391 * 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
393 // clang-format on
394 if (ch == 'I' || ch == 'J' || ch == 0x012E) {
395 ch = ToLowerCase(ch);
396 prevIsLetter = true;
397 seenSoftDotted = true;
398 sigmaIndex = uint32_t(-1);
399 break;
401 if (ch == 0x00CC) {
402 aConvertedString.Append('i');
403 aConvertedString.Append(0x0307);
404 extraChars += 2;
405 ch = 0x0300;
406 prevIsLetter = true;
407 seenSoftDotted = false;
408 sigmaIndex = uint32_t(-1);
409 break;
411 if (ch == 0x00CD) {
412 aConvertedString.Append('i');
413 aConvertedString.Append(0x0307);
414 extraChars += 2;
415 ch = 0x0301;
416 prevIsLetter = true;
417 seenSoftDotted = false;
418 sigmaIndex = uint32_t(-1);
419 break;
421 if (ch == 0x0128) {
422 aConvertedString.Append('i');
423 aConvertedString.Append(0x0307);
424 extraChars += 2;
425 ch = 0x0303;
426 prevIsLetter = true;
427 seenSoftDotted = false;
428 sigmaIndex = uint32_t(-1);
429 break;
433 cat = mozilla::unicode::GetGenCategory(ch);
435 if (languageSpecificCasing == eLSCB_Irish &&
436 cat == nsUGenCategory::kLetter) {
437 // See bug 1018805 for Irish lowercasing requirements
438 if (!prevIsLetter && (ch == 'n' || ch == 't')) {
439 ntPrefix = true;
440 } else {
441 if (ntPrefix && mozilla::IrishCasing::IsUpperVowel(ch)) {
442 aConvertedString.Append('-');
443 ++extraChars;
445 ntPrefix = false;
447 } else {
448 ntPrefix = false;
451 if (seenSoftDotted && cat == nsUGenCategory::kMark) {
452 // The seenSoftDotted flag will only be set in Lithuanian mode.
453 if (ch == 0x0300 || ch == 0x0301 || ch == 0x0303) {
454 aConvertedString.Append(0x0307);
455 ++extraChars;
458 seenSoftDotted = false;
460 // Special lowercasing behavior for Greek Sigma: note that this is
461 // listed as context-sensitive in Unicode's SpecialCasing.txt, but is
462 // *not* a language-specific mapping; it applies regardless of the
463 // language of the element.
465 // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA
466 // (i.e. the non-final form) whenever there is a following letter, or
467 // when the CAPITAL SIGMA occurs in isolation (neither preceded nor
468 // followed by a LETTER); and to FINAL SIGMA when it is preceded by
469 // another letter but not followed by one.
471 // To implement the context-sensitive nature of this mapping, we keep
472 // track of whether the previous character was a letter. If not,
473 // CAPITAL SIGMA will map directly to SMALL SIGMA. If the previous
474 // character was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we
475 // record the position in the converted string; if we then encounter
476 // another letter, that FINAL SIGMA is replaced with a standard
477 // SMALL SIGMA.
479 // If sigmaIndex is not -1, it marks where we have provisionally
480 // mapped a CAPITAL SIGMA to FINAL SIGMA; if we now find another
481 // letter, we need to change it to SMALL SIGMA.
482 if (sigmaIndex != uint32_t(-1)) {
483 if (cat == nsUGenCategory::kLetter) {
484 aConvertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex);
488 if (ch == GREEK_CAPITAL_LETTER_SIGMA) {
489 // If preceding char was a letter, map to FINAL instead of SMALL,
490 // and note where it occurred by setting sigmaIndex; we'll change
491 // it to standard SMALL SIGMA later if another letter follows
492 if (prevIsLetter) {
493 ch = GREEK_SMALL_LETTER_FINAL_SIGMA;
494 sigmaIndex = aConvertedString.Length();
495 } else {
496 // CAPITAL SIGMA not preceded by a letter is unconditionally
497 // mapped to SMALL SIGMA
498 ch = GREEK_SMALL_LETTER_SIGMA;
499 sigmaIndex = uint32_t(-1);
501 prevIsLetter = true;
502 break;
505 // ignore diacritics for the purpose of contextual sigma mapping;
506 // otherwise, reset prevIsLetter appropriately and clear the
507 // sigmaIndex marker
508 if (cat != nsUGenCategory::kMark) {
509 prevIsLetter = (cat == nsUGenCategory::kLetter);
510 sigmaIndex = uint32_t(-1);
513 mcm = mozilla::unicode::SpecialLower(ch);
514 if (mcm) {
515 int j = 0;
516 while (j < 2 && mcm->mMappedChars[j + 1]) {
517 aConvertedString.Append(mcm->mMappedChars[j]);
518 ++extraChars;
519 ++j;
521 ch = mcm->mMappedChars[j];
522 break;
525 ch = ToLowerCase(ch);
526 break;
528 case StyleTextTransformCase::Uppercase:
529 if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') {
530 ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
531 break;
534 if (languageSpecificCasing == eLSCB_Greek) {
535 bool markEta;
536 bool updateEta;
537 ch = mozilla::GreekCasing::UpperCase(ch, greekState, markEta,
538 updateEta);
539 if (markEta) {
540 greekMark = aConvertedString.Length();
541 } else if (updateEta) {
542 // Remove the TONOS from an uppercase ETA-TONOS that turned out
543 // not to be disjunctive-eta.
544 MOZ_ASSERT(aConvertedString.Length() > 0 &&
545 greekMark < aConvertedString.Length(),
546 "bad greekMark!");
547 aConvertedString.SetCharAt(kGreekUpperEta, greekMark);
548 greekMark = uint32_t(-1);
550 break;
553 if (languageSpecificCasing == eLSCB_Lithuanian) {
555 * # Remove DOT ABOVE after "i" with upper or titlecase
557 * 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
559 if (ch == 'i' || ch == 'j' || ch == 0x012F) {
560 seenSoftDotted = true;
561 ch = ToTitleCase(ch);
562 break;
564 if (seenSoftDotted) {
565 seenSoftDotted = false;
566 if (ch == 0x0307) {
567 ch = uint32_t(-1);
568 break;
573 if (languageSpecificCasing == eLSCB_Irish) {
574 bool mark;
575 uint8_t action;
576 ch = mozilla::IrishCasing::UpperCase(ch, irishState, mark, action);
577 if (mark) {
578 irishMark = aConvertedString.Length();
579 irishMarkSrc = i;
580 break;
581 } else if (action) {
582 nsString& str = aConvertedString; // shorthand
583 switch (action) {
584 case 1:
585 // lowercase a single prefix letter
586 MOZ_ASSERT(str.Length() > 0 && irishMark < str.Length(),
587 "bad irishMark!");
588 str.SetCharAt(ToLowerCase(str[irishMark]), irishMark);
589 irishMark = uint32_t(-1);
590 irishMarkSrc = uint32_t(-1);
591 break;
592 case 2:
593 // lowercase two prefix letters (immediately before current
594 // pos)
595 MOZ_ASSERT(str.Length() >= 2 && irishMark == str.Length() - 2,
596 "bad irishMark!");
597 str.SetCharAt(ToLowerCase(str[irishMark]), irishMark);
598 str.SetCharAt(ToLowerCase(str[irishMark + 1]), irishMark + 1);
599 irishMark = uint32_t(-1);
600 irishMarkSrc = uint32_t(-1);
601 break;
602 case 3:
603 // lowercase one prefix letter, and delete following hyphen
604 // (which must be the immediately-preceding char)
605 MOZ_ASSERT(str.Length() >= 2 && irishMark == str.Length() - 2,
606 "bad irishMark!");
607 MOZ_ASSERT(
608 irishMark != uint32_t(-1) && irishMarkSrc != uint32_t(-1),
609 "failed to set irishMarks");
610 str.Replace(irishMark, 2, ToLowerCase(str[irishMark]));
611 aDeletedCharsArray[irishMarkSrc + 1] = true;
612 // Remove the trailing entries (corresponding to the deleted
613 // hyphen) from the auxiliary arrays.
614 uint32_t len = aCharsToMergeArray.Length();
615 MOZ_ASSERT(len >= 2);
616 aCharsToMergeArray.TruncateLength(len - 1);
617 if (auxiliaryOutputArrays) {
618 MOZ_ASSERT(aStyleArray->Length() == len);
619 MOZ_ASSERT(aCanBreakBeforeArray->Length() == len);
620 aStyleArray->TruncateLength(len - 1);
621 aCanBreakBeforeArray->TruncateLength(len - 1);
622 inhibitBreakBefore = true;
624 mergeNeeded = true;
625 irishMark = uint32_t(-1);
626 irishMarkSrc = uint32_t(-1);
627 break;
629 // ch has been set to the uppercase for current char;
630 // No need to check for SpecialUpper here as none of the
631 // characters that could trigger an Irish casing action have
632 // special mappings.
633 break;
635 // If we didn't have any special action to perform, fall through
636 // to check for special uppercase (ß)
639 // Updated mapping for German eszett, not currently reflected in the
640 // Unicode data files. This is behind a pref, as it may not work well
641 // with many (esp. older) fonts.
642 if (ch == 0x00DF &&
643 StaticPrefs::
644 layout_css_text_transform_uppercase_eszett_enabled()) {
645 ch = 0x1E9E;
646 break;
649 mcm = mozilla::unicode::SpecialUpper(ch);
650 if (mcm) {
651 int j = 0;
652 while (j < 2 && mcm->mMappedChars[j + 1]) {
653 aConvertedString.Append(mcm->mMappedChars[j]);
654 ++extraChars;
655 ++j;
657 ch = mcm->mMappedChars[j];
658 break;
661 // Bug 1476304: we exclude Georgian letters U+10D0..10FF because of
662 // lack of widespread font support for the corresponding Mtavruli
663 // characters at this time (July 2018).
664 // This condition is to be removed once the major platforms ship with
665 // fonts that support U+1C90..1CBF.
666 if (ch < 0x10D0 || ch > 0x10FF) {
667 ch = ToUpperCase(ch);
669 break;
671 case StyleTextTransformCase::Capitalize:
672 if (aTextRun) {
673 if (capitalizeDutchIJ && ch == 'j') {
674 ch = 'J';
675 capitalizeDutchIJ = false;
676 break;
678 capitalizeDutchIJ = false;
679 if (aOffsetInTextRun < aTextRun->mCapitalize.Length() &&
680 aTextRun->mCapitalize[aOffsetInTextRun]) {
681 if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') {
682 ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
683 break;
685 if (languageSpecificCasing == eLSCB_Dutch && ch == 'i') {
686 ch = 'I';
687 capitalizeDutchIJ = true;
688 break;
690 if (languageSpecificCasing == eLSCB_Lithuanian) {
692 * # Remove DOT ABOVE after "i" with upper or titlecase
694 * 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
696 if (ch == 'i' || ch == 'j' || ch == 0x012F) {
697 seenSoftDotted = true;
698 ch = ToTitleCase(ch);
699 break;
701 if (seenSoftDotted) {
702 seenSoftDotted = false;
703 if (ch == 0x0307) {
704 ch = uint32_t(-1);
705 break;
710 mcm = mozilla::unicode::SpecialTitle(ch);
711 if (mcm) {
712 int j = 0;
713 while (j < 2 && mcm->mMappedChars[j + 1]) {
714 aConvertedString.Append(mcm->mMappedChars[j]);
715 ++extraChars;
716 ++j;
718 ch = mcm->mMappedChars[j];
719 break;
722 ch = ToTitleCase(ch);
725 break;
727 case StyleTextTransformCase::MathAuto:
728 // text-transform: math-auto is used for automatic italicization of
729 // single-char <mi> elements. However, some legacy cases (italic style
730 // fallback and <mi> with leading/trailing whitespace) are still
731 // handled in MathMLTextRunFactory.
732 if (length == 1) {
733 uint32_t ch2 =
734 MathMLTextRunFactory::MathVariant(ch, StyleMathVariant::Italic);
735 if (StaticPrefs::mathml_mathvariant_styling_fallback_disabled()) {
736 ch = ch2;
737 } else if (ch2 != ch) {
738 // Bug 930504. Some platforms do not have fonts for Mathematical
739 // Alphanumeric Symbols. Hence we only perform the transform if a
740 // character is actually available.
741 FontMatchType matchType;
742 RefPtr<gfxFont> mathFont =
743 aTextRun->GetFontGroup()->FindFontForChar(
744 ch2, 0, 0, intl::Script::COMMON, nullptr, &matchType);
745 if (mathFont) {
746 ch = ch2;
750 break;
752 default:
753 MOZ_ASSERT_UNREACHABLE("all cases should be handled");
754 break;
757 if (!aCaseTransformsOnly) {
758 if (!forceNonFullWidth &&
759 (style.other_ & StyleTextTransformOther::FULL_WIDTH)) {
760 ch = mozilla::unicode::GetFullWidth(ch);
763 if (style.other_ & StyleTextTransformOther::FULL_SIZE_KANA) {
764 // clang-format off
765 static const uint32_t kSmallKanas[] = {
766 // ぁ ぃ ぅ ぇ ぉ っ ゃ ゅ ょ
767 0x3041, 0x3043, 0x3045, 0x3047, 0x3049, 0x3063, 0x3083, 0x3085, 0x3087,
768 // ゎ ゕ ゖ
769 0x308E, 0x3095, 0x3096,
770 // ァ ィ ゥ ェ ォ ッ ャ ュ ョ
771 0x30A1, 0x30A3, 0x30A5, 0x30A7, 0x30A9, 0x30C3, 0x30E3, 0x30E5, 0x30E7,
772 // ヮ ヵ ヶ ㇰ ㇱ ㇲ ㇳ ㇴ ㇵ
773 0x30EE, 0x30F5, 0x30F6, 0x31F0, 0x31F1, 0x31F2, 0x31F3, 0x31F4, 0x31F5,
774 // ㇶ ㇷ ㇸ ㇹ ㇺ ㇻ ㇼ ㇽ ㇾ
775 0x31F6, 0x31F7, 0x31F8, 0x31F9, 0x31FA, 0x31FB, 0x31FC, 0x31FD, 0x31FE,
776 // ㇿ
777 0x31FF,
778 // ァ ィ ゥ ェ ォ ャ ュ ョ ッ
779 0xFF67, 0xFF68, 0xFF69, 0xFF6A, 0xFF6B, 0xFF6C, 0xFF6D, 0xFF6E, 0xFF6F,
780 // 𛄲 𛅐 𛅑 𛅒 𛅕 𛅤 𛅥 𛅦
781 0x1B132, 0x1B150, 0x1B151, 0x1B152, 0x1B155, 0x1B164, 0x1B165, 0x1B166,
782 // 𛅧
783 0x1B167};
784 static const uint16_t kFullSizeKanas[] = {
785 // あ い う え お つ や ゆ よ
786 0x3042, 0x3044, 0x3046, 0x3048, 0x304A, 0x3064, 0x3084, 0x3086, 0x3088,
787 // わ か け
788 0x308F, 0x304B, 0x3051,
789 // ア イ ウ エ オ ツ ヤ ユ ヨ
790 0x30A2, 0x30A4, 0x30A6, 0x30A8, 0x30AA, 0x30C4, 0x30E4, 0x30E6, 0x30E8,
791 // ワ カ ケ ク シ ス ト ヌ ハ
792 0x30EF, 0x30AB, 0x30B1, 0x30AF, 0x30B7, 0x30B9, 0x30C8, 0x30CC, 0x30CF,
793 // ヒ フ ヘ ホ ム ラ リ ル レ
794 0x30D2, 0x30D5, 0x30D8, 0x30DB, 0x30E0, 0x30E9, 0x30EA, 0x30EB, 0x30EC,
795 // ロ
796 0x30ED,
797 // ア イ ウ エ オ ヤ ユ ヨ ツ
798 0xFF71, 0xFF72, 0xFF73, 0xFF74, 0xFF75, 0xFF94, 0xFF95, 0xFF96, 0xFF82,
799 // こ ゐ ゑ を コ ヰ ヱ ヲ ン
800 0x3053, 0x3090, 0x3091, 0x3092, 0x30B3, 0x30F0, 0x30F1, 0x30F2, 0x30F3};
801 // clang-format on
803 size_t index;
804 const uint16_t len = MOZ_ARRAY_LENGTH(kSmallKanas);
805 if (mozilla::BinarySearch(kSmallKanas, 0, len, ch, &index)) {
806 ch = kFullSizeKanas[index];
811 if (forceNonFullWidth) {
812 ch = mozilla::unicode::GetFullWidthInverse(ch);
816 if (ch == uint32_t(-1)) {
817 aDeletedCharsArray.AppendElement(true);
818 mergeNeeded = true;
819 } else {
820 aDeletedCharsArray.AppendElement(false);
821 aCharsToMergeArray.AppendElement(false);
822 if (auxiliaryOutputArrays) {
823 aStyleArray->AppendElement(charStyle);
824 aCanBreakBeforeArray->AppendElement(
825 inhibitBreakBefore
826 ? gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE
827 : aTextRun->CanBreakBefore(aOffsetInTextRun));
830 if (IS_IN_BMP(ch)) {
831 aConvertedString.Append(maskPassword ? mask : ch);
832 } else {
833 if (maskPassword) {
834 aConvertedString.Append(mask);
835 // TODO: We should show a password mask for a surrogate pair later.
836 aConvertedString.Append(mask);
837 } else {
838 aConvertedString.Append(H_SURROGATE(ch));
839 aConvertedString.Append(L_SURROGATE(ch));
841 ++extraChars;
843 if (!IS_IN_BMP(originalCh)) {
844 // Skip the trailing surrogate.
845 ++aOffsetInTextRun;
846 ++i;
847 aDeletedCharsArray.AppendElement(true);
850 while (extraChars-- > 0) {
851 mergeNeeded = true;
852 aCharsToMergeArray.AppendElement(true);
853 if (auxiliaryOutputArrays) {
854 aStyleArray->AppendElement(charStyle);
855 aCanBreakBeforeArray->AppendElement(
856 gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE);
862 // These output arrays, if present, must always have matching lengths:
863 if (auxiliaryOutputArrays) {
864 DebugOnly<uint32_t> len = aCharsToMergeArray.Length();
865 MOZ_ASSERT(aStyleArray->Length() == len);
866 MOZ_ASSERT(aCanBreakBeforeArray->Length() == len);
869 return mergeNeeded;
872 void nsCaseTransformTextRunFactory::RebuildTextRun(
873 nsTransformedTextRun* aTextRun, DrawTarget* aRefDrawTarget,
874 gfxMissingFontRecorder* aMFR) {
875 nsAutoString convertedString;
876 AutoTArray<bool, 50> charsToMergeArray;
877 AutoTArray<bool, 50> deletedCharsArray;
878 AutoTArray<uint8_t, 50> canBreakBeforeArray;
879 AutoTArray<RefPtr<nsTransformedCharStyle>, 50> styleArray;
881 auto globalTransform =
882 mAllUppercase
883 ? Some(StyleTextTransform{StyleTextTransformCase::Uppercase, {}})
884 : Nothing();
885 bool mergeNeeded = TransformString(
886 aTextRun->mString, convertedString, globalTransform, mMaskChar,
887 /* aCaseTransformsOnly = */ false, nullptr, charsToMergeArray,
888 deletedCharsArray, aTextRun, 0, &canBreakBeforeArray, &styleArray);
890 gfx::ShapedTextFlags flags;
891 gfxTextRunFactory::Parameters innerParams =
892 GetParametersForInner(aTextRun, &flags, aRefDrawTarget);
893 gfxFontGroup* fontGroup = aTextRun->GetFontGroup();
895 RefPtr<nsTransformedTextRun> transformedChild;
896 RefPtr<gfxTextRun> cachedChild;
897 gfxTextRun* child;
899 if (mInnerTransformingTextRunFactory) {
900 transformedChild = mInnerTransformingTextRunFactory->MakeTextRun(
901 convertedString.BeginReading(), convertedString.Length(), &innerParams,
902 fontGroup, flags, nsTextFrameUtils::Flags(), std::move(styleArray),
903 false);
904 child = transformedChild.get();
905 } else {
906 cachedChild = fontGroup->MakeTextRun(
907 convertedString.BeginReading(), convertedString.Length(), &innerParams,
908 flags, nsTextFrameUtils::Flags(), aMFR);
909 child = cachedChild.get();
911 if (!child) {
912 return;
914 // Copy potential linebreaks into child so they're preserved
915 // (and also child will be shaped appropriately)
916 NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(),
917 "Dropped characters or break-before values somewhere!");
918 gfxTextRun::Range range(0, uint32_t(canBreakBeforeArray.Length()));
919 child->SetPotentialLineBreaks(range, canBreakBeforeArray.Elements());
920 if (transformedChild) {
921 transformedChild->FinishSettingProperties(aRefDrawTarget, aMFR);
924 aTextRun->ResetGlyphRuns();
925 if (mergeNeeded) {
926 // Now merge multiple characters into one multi-glyph character as required
927 // and deal with skipping deleted accent chars
928 NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(),
929 "source length mismatch");
930 NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(),
931 "destination length mismatch");
932 MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(),
933 deletedCharsArray.Elements());
934 } else {
935 // No merging to do, so just copy; this produces a more optimized textrun.
936 // We can't steal the data because the child may be cached and stealing
937 // the data would break the cache.
938 aTextRun->CopyGlyphDataFrom(child, gfxTextRun::Range(child), 0);