layout/generic/nsTextRunTransformations.cpp

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 #include "nsTextRunTransformations.h"
   8
   9 #include <utility>
  10
  11 #include "GreekCasing.h"
  12 #include "IrishCasing.h"
  13 #include "MathMLTextRunFactory.h"
  14 #include "mozilla/ComputedStyleInlines.h"
  15 #include "mozilla/MemoryReporting.h"
  16 #include "mozilla/StaticPrefs_layout.h"
  17 #include "mozilla/StaticPrefs_mathml.h"
  18 #include "mozilla/TextEditor.h"
  19 #include "mozilla/gfx/2D.h"
  20 #include "nsGkAtoms.h"
  21 #include "nsSpecialCasingData.h"
  22 #include "nsStyleConsts.h"
  23 #include "nsTextFrameUtils.h"
  24 #include "nsUnicharUtils.h"
  25 #include "nsUnicodeProperties.h"
  26
  27 using namespace mozilla;
  28 using namespace mozilla::gfx;
  29
  30 // Unicode characters needing special casing treatment in tr/az languages
  31 #define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE 0x0130
  32 #define LATIN_SMALL_LETTER_DOTLESS_I 0x0131
  33
  34 // Greek sigma needs custom handling for the lowercase transform; for details
  35 // see bug 740120.
  36 #define GREEK_CAPITAL_LETTER_SIGMA 0x03A3
  37 #define GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2
  38 #define GREEK_SMALL_LETTER_SIGMA 0x03C3
  39
  40 already_AddRefed<nsTransformedTextRun> nsTransformedTextRun::Create(
  41     const gfxTextRunFactory::Parameters* aParams,
  42     nsTransformingTextRunFactory* aFactory, gfxFontGroup* aFontGroup,
  43     const char16_t* aString, uint32_t aLength,
  44     const gfx::ShapedTextFlags aFlags, const nsTextFrameUtils::Flags aFlags2,
  45     nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
  46   NS_ASSERTION(!(aFlags & gfx::ShapedTextFlags::TEXT_IS_8BIT),
  47                "didn't expect text to be marked as 8-bit here");
  48
  49   void* storage =
  50       AllocateStorageForTextRun(sizeof(nsTransformedTextRun), aLength);
  51   if (!storage) {
  52     return nullptr;
  53   }
  54
  55   RefPtr<nsTransformedTextRun> result = new (storage)
  56       nsTransformedTextRun(aParams, aFactory, aFontGroup, aString, aLength,
  57                            aFlags, aFlags2, std::move(aStyles), aOwnsFactory);
  58   return result.forget();
  59 }
  60
  61 void nsTransformedTextRun::SetCapitalization(uint32_t aStart, uint32_t aLength,
  62                                              bool* aCapitalization) {
  63   if (mCapitalize.IsEmpty()) {
  64     // XXX(Bug 1631371) Check if this should use a fallible operation as it
  65     // pretended earlier.
  66     mCapitalize.AppendElements(GetLength());
  67     memset(mCapitalize.Elements(), 0, GetLength() * sizeof(bool));
  68   }
  69   memcpy(mCapitalize.Elements() + aStart, aCapitalization,
  70          aLength * sizeof(bool));
  71   mNeedsRebuild = true;
  72 }
  73
  74 bool nsTransformedTextRun::SetPotentialLineBreaks(Range aRange,
  75                                                   const uint8_t* aBreakBefore) {
  76   bool changed = gfxTextRun::SetPotentialLineBreaks(aRange, aBreakBefore);
  77   if (changed) {
  78     mNeedsRebuild = true;
  79   }
  80   return changed;
  81 }
  82
  83 size_t nsTransformedTextRun::SizeOfExcludingThis(
  84     mozilla::MallocSizeOf aMallocSizeOf) {
  85   size_t total = gfxTextRun::SizeOfExcludingThis(aMallocSizeOf);
  86   total += mStyles.ShallowSizeOfExcludingThis(aMallocSizeOf);
  87   total += mCapitalize.ShallowSizeOfExcludingThis(aMallocSizeOf);
  88   if (mOwnsFactory) {
  89     total += aMallocSizeOf(mFactory);
  90   }
  91   return total;
  92 }
  93
  94 size_t nsTransformedTextRun::SizeOfIncludingThis(
  95     mozilla::MallocSizeOf aMallocSizeOf) {
  96   return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
  97 }
  98
  99 already_AddRefed<nsTransformedTextRun>
 100 nsTransformingTextRunFactory::MakeTextRun(
 101     const char16_t* aString, uint32_t aLength,
 102     const gfxTextRunFactory::Parameters* aParams, gfxFontGroup* aFontGroup,
 103     gfx::ShapedTextFlags aFlags, nsTextFrameUtils::Flags aFlags2,
 104     nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
 105   return nsTransformedTextRun::Create(aParams, this, aFontGroup, aString,
 106                                       aLength, aFlags, aFlags2,
 107                                       std::move(aStyles), aOwnsFactory);
 108 }
 109
 110 already_AddRefed<nsTransformedTextRun>
 111 nsTransformingTextRunFactory::MakeTextRun(
 112     const uint8_t* aString, uint32_t aLength,
 113     const gfxTextRunFactory::Parameters* aParams, gfxFontGroup* aFontGroup,
 114     gfx::ShapedTextFlags aFlags, nsTextFrameUtils::Flags aFlags2,
 115     nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
 116   // We'll only have a Unicode code path to minimize the amount of code needed
 117   // for these rarely used features
 118   NS_ConvertASCIItoUTF16 unicodeString(reinterpret_cast<const char*>(aString),
 119                                        aLength);
 120   return MakeTextRun(unicodeString.get(), aLength, aParams, aFontGroup,
 121                      aFlags & ~gfx::ShapedTextFlags::TEXT_IS_8BIT, aFlags2,
 122                      std::move(aStyles), aOwnsFactory);
 123 }
 124
 125 void MergeCharactersInTextRun(gfxTextRun* aDest, gfxTextRun* aSrc,
 126                               const bool* aCharsToMerge,
 127                               const bool* aDeletedChars) {
 128   MOZ_ASSERT(!aDest->TrailingGlyphRun(), "unexpected glyphRuns in aDest!");
 129   uint32_t offset = 0;
 130   AutoTArray<gfxTextRun::DetailedGlyph, 2> glyphs;
 131   const gfxTextRun::CompressedGlyph continuationGlyph =
 132       gfxTextRun::CompressedGlyph::MakeComplex(false, false);
 133   const gfxTextRun::CompressedGlyph* srcGlyphs = aSrc->GetCharacterGlyphs();
 134   gfxTextRun::CompressedGlyph* destGlyphs = aDest->GetCharacterGlyphs();
 135   for (gfxTextRun::GlyphRunIterator iter(aSrc, gfxTextRun::Range(aSrc));
 136        !iter.AtEnd(); iter.NextRun()) {
 137     const gfxTextRun::GlyphRun* run = iter.GlyphRun();
 138     aDest->AddGlyphRun(run->mFont, run->mMatchType, offset, false,
 139                        run->mOrientation, run->mIsCJK);
 140
 141     bool anyMissing = false;
 142     uint32_t mergeRunStart = iter.StringStart();
 143     // Initialize to a copy of the first source glyph in the merge run.
 144     gfxTextRun::CompressedGlyph mergedGlyph = srcGlyphs[mergeRunStart];
 145     uint32_t stringEnd = iter.StringEnd();
 146     for (uint32_t k = iter.StringStart(); k < stringEnd; ++k) {
 147       const gfxTextRun::CompressedGlyph g = srcGlyphs[k];
 148       if (g.IsSimpleGlyph()) {
 149         if (!anyMissing) {
 150           gfxTextRun::DetailedGlyph details;
 151           details.mGlyphID = g.GetSimpleGlyph();
 152           details.mAdvance = g.GetSimpleAdvance();
 153           glyphs.AppendElement(details);
 154         }
 155       } else {
 156         if (g.IsMissing()) {
 157           anyMissing = true;
 158           glyphs.Clear();
 159         }
 160         if (g.GetGlyphCount() > 0) {
 161           glyphs.AppendElements(aSrc->GetDetailedGlyphs(k), g.GetGlyphCount());
 162         }
 163       }
 164
 165       if (k + 1 < iter.StringEnd() && aCharsToMerge[k + 1]) {
 166         // next char is supposed to merge with current, so loop without
 167         // writing current merged glyph to the destination
 168         continue;
 169       }
 170
 171       // If the start of the merge run is actually a character that should
 172       // have been merged with the previous character (this can happen
 173       // if there's a font change in the middle of a case-mapped character,
 174       // that decomposed into a sequence of base+diacritics, for example),
 175       // just discard the entire merge run. See comment at start of this
 176       // function.
 177       NS_WARNING_ASSERTION(
 178           !aCharsToMerge[mergeRunStart],
 179           "unable to merge across a glyph run boundary, glyph(s) discarded");
 180       if (!aCharsToMerge[mergeRunStart]) {
 181         // Determine if we can just copy the existing simple glyph record.
 182         if (mergedGlyph.IsSimpleGlyph() && glyphs.Length() == 1) {
 183           destGlyphs[offset] = mergedGlyph;
 184         } else {
 185           // Otherwise set up complex glyph record and store detailed glyphs.
 186           mergedGlyph.SetComplex(mergedGlyph.IsClusterStart(),
 187                                  mergedGlyph.IsLigatureGroupStart());
 188           destGlyphs[offset] = mergedGlyph;
 189           aDest->SetDetailedGlyphs(offset, glyphs.Length(), glyphs.Elements());
 190           if (anyMissing) {
 191             destGlyphs[offset].SetMissing();
 192           }
 193         }
 194         offset++;
 195
 196         while (offset < aDest->GetLength() && aDeletedChars[offset]) {
 197           destGlyphs[offset++] = continuationGlyph;
 198         }
 199       }
 200
 201       glyphs.Clear();
 202       anyMissing = false;
 203       mergeRunStart = k + 1;
 204       if (mergeRunStart < stringEnd) {
 205         mergedGlyph = srcGlyphs[mergeRunStart];
 206       }
 207     }
 208     NS_ASSERTION(glyphs.Length() == 0,
 209                  "Leftover glyphs, don't request merging of the last character "
 210                  "with its next!");
 211   }
 212   NS_ASSERTION(offset == aDest->GetLength(), "Bad offset calculations");
 213 }
 214
 215 gfxTextRunFactory::Parameters GetParametersForInner(
 216     nsTransformedTextRun* aTextRun, gfx::ShapedTextFlags* aFlags,
 217     DrawTarget* aRefDrawTarget) {
 218   gfxTextRunFactory::Parameters params = {
 219       aRefDrawTarget, nullptr, nullptr,
 220       nullptr,        0,       aTextRun->GetAppUnitsPerDevUnit()};
 221   *aFlags = aTextRun->GetFlags();
 222   return params;
 223 }
 224
 225 // Some languages have special casing conventions that differ from the
 226 // default Unicode mappings.
 227 // The enum values here are named for well-known exemplar languages that
 228 // exhibit the behavior in question; multiple lang tags may map to the
 229 // same setting here, if the behavior is shared by other languages.
 230 enum LanguageSpecificCasingBehavior {
 231   eLSCB_None,       // default non-lang-specific behavior
 232   eLSCB_Dutch,      // treat "ij" digraph as a unit for capitalization
 233   eLSCB_Greek,      // strip accent when uppercasing Greek vowels
 234   eLSCB_Irish,      // keep prefix letters as lowercase when uppercasing Irish
 235   eLSCB_Turkish,    // preserve dotted/dotless-i distinction in uppercase
 236   eLSCB_Lithuanian  // retain dot on lowercase i/j when an accent is present
 237 };
 238
 239 static LanguageSpecificCasingBehavior GetCasingFor(const nsAtom* aLang) {
 240   if (!aLang) {
 241     return eLSCB_None;
 242   }
 243   if (aLang == nsGkAtoms::tr || aLang == nsGkAtoms::az ||
 244       aLang == nsGkAtoms::ba || aLang == nsGkAtoms::crh ||
 245       aLang == nsGkAtoms::tt) {
 246     return eLSCB_Turkish;
 247   }
 248   if (aLang == nsGkAtoms::nl) {
 249     return eLSCB_Dutch;
 250   }
 251   if (aLang == nsGkAtoms::el) {
 252     return eLSCB_Greek;
 253   }
 254   if (aLang == nsGkAtoms::ga) {
 255     return eLSCB_Irish;
 256   }
 257   if (aLang == nsGkAtoms::lt_) {
 258     return eLSCB_Lithuanian;
 259   }
 260
 261   // Is there a region subtag we should ignore?
 262   nsAtomString langStr(const_cast<nsAtom*>(aLang));
 263   int index = langStr.FindChar('-');
 264   if (index > 0) {
 265     langStr.Truncate(index);
 266     RefPtr<nsAtom> truncatedLang = NS_Atomize(langStr);
 267     return GetCasingFor(truncatedLang);
 268   }
 269
 270   return eLSCB_None;
 271 }
 272
 273 bool nsCaseTransformTextRunFactory::TransformString(
 274     const nsAString& aString, nsString& aConvertedString,
 275     const Maybe<StyleTextTransform>& aGlobalTransform, char16_t aMaskChar,
 276     bool aCaseTransformsOnly, const nsAtom* aLanguage,
 277     nsTArray<bool>& aCharsToMergeArray, nsTArray<bool>& aDeletedCharsArray,
 278     const nsTransformedTextRun* aTextRun, uint32_t aOffsetInTextRun,
 279     nsTArray<uint8_t>* aCanBreakBeforeArray,
 280     nsTArray<RefPtr<nsTransformedCharStyle>>* aStyleArray) {
 281   bool auxiliaryOutputArrays = aCanBreakBeforeArray && aStyleArray;
 282   MOZ_ASSERT(!auxiliaryOutputArrays || aTextRun,
 283              "text run must be provided to use aux output arrays");
 284
 285   uint32_t length = aString.Length();
 286   const char16_t* str = aString.BeginReading();
 287   // If an unconditional mask character was passed, we'll use it; if not, any
 288   // masking called for by the textrun styles will use TextEditor's mask char.
 289   const char16_t mask = aMaskChar ? aMaskChar : TextEditor::PasswordMask();
 290
 291   bool mergeNeeded = false;
 292
 293   bool capitalizeDutchIJ = false;
 294   bool prevIsLetter = false;
 295   bool ntPrefix = false;  // true immediately after a word-initial 'n' or 't'
 296                           // when doing Irish lowercasing
 297   bool seenSoftDotted = false;  // true immediately after an I or J that is
 298                                 // converted to lowercase in Lithuanian mode
 299   uint32_t sigmaIndex = uint32_t(-1);
 300   nsUGenCategory cat;
 301
 302   StyleTextTransform style =
 303       aGlobalTransform.valueOr(StyleTextTransform::None());
 304   bool forceNonFullWidth = false;
 305   const nsAtom* lang = aLanguage;
 306
 307   LanguageSpecificCasingBehavior languageSpecificCasing = GetCasingFor(lang);
 308   mozilla::GreekCasing::State greekState;
 309   mozilla::IrishCasing::State irishState;
 310   uint32_t irishMark = uint32_t(-1);  // location of possible prefix letter(s)
 311                                       // in the output string
 312   uint32_t irishMarkSrc = uint32_t(-1);  // corresponding location in source
 313                                          // string (may differ from output due
 314                                          // to expansions like eszet -> 'SS')
 315   uint32_t greekMark = uint32_t(-1);  // location of uppercase ETA that may need
 316                                       // tonos added (if it is disjunctive eta)
 317   const char16_t kGreekUpperEta = 0x0397;
 318
 319   for (uint32_t i = 0; i < length; ++i, ++aOffsetInTextRun) {
 320     uint32_t ch = str[i];
 321
 322     RefPtr<nsTransformedCharStyle> charStyle;
 323     if (aTextRun) {
 324       charStyle = aTextRun->mStyles[aOffsetInTextRun];
 325       style = aGlobalTransform.valueOr(charStyle->mTextTransform);
 326       forceNonFullWidth = charStyle->mForceNonFullWidth;
 327
 328       nsAtom* newLang =
 329           charStyle->mExplicitLanguage ? charStyle->mLanguage.get() : nullptr;
 330       if (lang != newLang) {
 331         lang = newLang;
 332         languageSpecificCasing = GetCasingFor(lang);
 333         greekState.Reset();
 334         irishState.Reset();
 335         irishMark = uint32_t(-1);
 336         irishMarkSrc = uint32_t(-1);
 337         greekMark = uint32_t(-1);
 338       }
 339     }
 340
 341     // These should be mutually exclusive: mMaskPassword is set if we are
 342     // handling <input type=password>, where the TextEditor code controls
 343     // masking and we use its PasswordMask() character, in which case
 344     // aMaskChar (from -webkit-text-security) is not used.
 345     MOZ_ASSERT_IF(aMaskChar, !(charStyle && charStyle->mMaskPassword));
 346
 347     bool maskPassword = (charStyle && charStyle->mMaskPassword) || aMaskChar;
 348     int extraChars = 0;
 349     const mozilla::unicode::MultiCharMapping* mcm;
 350     bool inhibitBreakBefore = false;  // have we just deleted preceding hyphen?
 351
 352     if (i < length - 1 && NS_IS_SURROGATE_PAIR(ch, str[i + 1])) {
 353       ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
 354     }
 355     const uint32_t originalCh = ch;
 356
 357     // Skip case transform if we're masking current character.
 358     if (!maskPassword) {
 359       switch (style.case_) {
 360         case StyleTextTransformCase::None:
 361           break;
 362
 363         case StyleTextTransformCase::Lowercase:
 364           if (languageSpecificCasing == eLSCB_Turkish) {
 365             if (ch == 'I') {
 366               ch = LATIN_SMALL_LETTER_DOTLESS_I;
 367               prevIsLetter = true;
 368               sigmaIndex = uint32_t(-1);
 369               break;
 370             }
 371             if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
 372               ch = 'i';
 373               prevIsLetter = true;
 374               sigmaIndex = uint32_t(-1);
 375               break;
 376             }
 377           }
 378
 379           if (languageSpecificCasing == eLSCB_Lithuanian) {
 380             // clang-format off
 381             /* From SpecialCasing.txt:
 382              * # Introduce an explicit dot above when lowercasing capital I's and J's
 383              * # whenever there are more accents above.
 384              * # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
 385              *
 386              * 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
 387              * 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
 388              * 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
 389              * 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
 390              * 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
 391              * 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
 392              */
 393             // clang-format on
 394             if (ch == 'I' || ch == 'J' || ch == 0x012E) {
 395               ch = ToLowerCase(ch);
 396               prevIsLetter = true;
 397               seenSoftDotted = true;
 398               sigmaIndex = uint32_t(-1);
 399               break;
 400             }
 401             if (ch == 0x00CC) {
 402               aConvertedString.Append('i');
 403               aConvertedString.Append(0x0307);
 404               extraChars += 2;
 405               ch = 0x0300;
 406               prevIsLetter = true;
 407               seenSoftDotted = false;
 408               sigmaIndex = uint32_t(-1);
 409               break;
 410             }
 411             if (ch == 0x00CD) {
 412               aConvertedString.Append('i');
 413               aConvertedString.Append(0x0307);
 414               extraChars += 2;
 415               ch = 0x0301;
 416               prevIsLetter = true;
 417               seenSoftDotted = false;
 418               sigmaIndex = uint32_t(-1);
 419               break;
 420             }
 421             if (ch == 0x0128) {
 422               aConvertedString.Append('i');
 423               aConvertedString.Append(0x0307);
 424               extraChars += 2;
 425               ch = 0x0303;
 426               prevIsLetter = true;
 427               seenSoftDotted = false;
 428               sigmaIndex = uint32_t(-1);
 429               break;
 430             }
 431           }
 432
 433           cat = mozilla::unicode::GetGenCategory(ch);
 434
 435           if (languageSpecificCasing == eLSCB_Irish &&
 436               cat == nsUGenCategory::kLetter) {
 437             // See bug 1018805 for Irish lowercasing requirements
 438             if (!prevIsLetter && (ch == 'n' || ch == 't')) {
 439               ntPrefix = true;
 440             } else {
 441               if (ntPrefix && mozilla::IrishCasing::IsUpperVowel(ch)) {
 442                 aConvertedString.Append('-');
 443                 ++extraChars;
 444               }
 445               ntPrefix = false;
 446             }
 447           } else {
 448             ntPrefix = false;
 449           }
 450
 451           if (seenSoftDotted && cat == nsUGenCategory::kMark) {
 452             // The seenSoftDotted flag will only be set in Lithuanian mode.
 453             if (ch == 0x0300 || ch == 0x0301 || ch == 0x0303) {
 454               aConvertedString.Append(0x0307);
 455               ++extraChars;
 456             }
 457           }
 458           seenSoftDotted = false;
 459
 460           // Special lowercasing behavior for Greek Sigma: note that this is
 461           // listed as context-sensitive in Unicode's SpecialCasing.txt, but is
 462           // *not* a language-specific mapping; it applies regardless of the
 463           // language of the element.
 464           //
 465           // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA
 466           // (i.e. the non-final form) whenever there is a following letter, or
 467           // when the CAPITAL SIGMA occurs in isolation (neither preceded nor
 468           // followed by a LETTER); and to FINAL SIGMA when it is preceded by
 469           // another letter but not followed by one.
 470           //
 471           // To implement the context-sensitive nature of this mapping, we keep
 472           // track of whether the previous character was a letter. If not,
 473           // CAPITAL SIGMA will map directly to SMALL SIGMA. If the previous
 474           // character was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we
 475           // record the position in the converted string; if we then encounter
 476           // another letter, that FINAL SIGMA is replaced with a standard
 477           // SMALL SIGMA.
 478
 479           // If sigmaIndex is not -1, it marks where we have provisionally
 480           // mapped a CAPITAL SIGMA to FINAL SIGMA; if we now find another
 481           // letter, we need to change it to SMALL SIGMA.
 482           if (sigmaIndex != uint32_t(-1)) {
 483             if (cat == nsUGenCategory::kLetter) {
 484               aConvertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex);
 485             }
 486           }
 487
 488           if (ch == GREEK_CAPITAL_LETTER_SIGMA) {
 489             // If preceding char was a letter, map to FINAL instead of SMALL,
 490             // and note where it occurred by setting sigmaIndex; we'll change
 491             // it to standard SMALL SIGMA later if another letter follows
 492             if (prevIsLetter) {
 493               ch = GREEK_SMALL_LETTER_FINAL_SIGMA;
 494               sigmaIndex = aConvertedString.Length();
 495             } else {
 496               // CAPITAL SIGMA not preceded by a letter is unconditionally
 497               // mapped to SMALL SIGMA
 498               ch = GREEK_SMALL_LETTER_SIGMA;
 499               sigmaIndex = uint32_t(-1);
 500             }
 501             prevIsLetter = true;
 502             break;
 503           }
 504
 505           // ignore diacritics for the purpose of contextual sigma mapping;
 506           // otherwise, reset prevIsLetter appropriately and clear the
 507           // sigmaIndex marker
 508           if (cat != nsUGenCategory::kMark) {
 509             prevIsLetter = (cat == nsUGenCategory::kLetter);
 510             sigmaIndex = uint32_t(-1);
 511           }
 512
 513           mcm = mozilla::unicode::SpecialLower(ch);
 514           if (mcm) {
 515             int j = 0;
 516             while (j < 2 && mcm->mMappedChars[j + 1]) {
 517               aConvertedString.Append(mcm->mMappedChars[j]);
 518               ++extraChars;
 519               ++j;
 520             }
 521             ch = mcm->mMappedChars[j];
 522             break;
 523           }
 524
 525           ch = ToLowerCase(ch);
 526           break;
 527
 528         case StyleTextTransformCase::Uppercase:
 529           if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') {
 530             ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
 531             break;
 532           }
 533
 534           if (languageSpecificCasing == eLSCB_Greek) {
 535             bool markEta;
 536             bool updateEta;
 537             ch = mozilla::GreekCasing::UpperCase(ch, greekState, markEta,
 538                                                  updateEta);
 539             if (markEta) {
 540               greekMark = aConvertedString.Length();
 541             } else if (updateEta) {
 542               // Remove the TONOS from an uppercase ETA-TONOS that turned out
 543               // not to be disjunctive-eta.
 544               MOZ_ASSERT(aConvertedString.Length() > 0 &&
 545                              greekMark < aConvertedString.Length(),
 546                          "bad greekMark!");
 547               aConvertedString.SetCharAt(kGreekUpperEta, greekMark);
 548               greekMark = uint32_t(-1);
 549             }
 550             break;
 551           }
 552
 553           if (languageSpecificCasing == eLSCB_Lithuanian) {
 554             /*
 555              * # Remove DOT ABOVE after "i" with upper or titlecase
 556              *
 557              * 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
 558              */
 559             if (ch == 'i' || ch == 'j' || ch == 0x012F) {
 560               seenSoftDotted = true;
 561               ch = ToTitleCase(ch);
 562               break;
 563             }
 564             if (seenSoftDotted) {
 565               seenSoftDotted = false;
 566               if (ch == 0x0307) {
 567                 ch = uint32_t(-1);
 568                 break;
 569               }
 570             }
 571           }
 572
 573           if (languageSpecificCasing == eLSCB_Irish) {
 574             bool mark;
 575             uint8_t action;
 576             ch = mozilla::IrishCasing::UpperCase(ch, irishState, mark, action);
 577             if (mark) {
 578               irishMark = aConvertedString.Length();
 579               irishMarkSrc = i;
 580               break;
 581             } else if (action) {
 582               nsString& str = aConvertedString;  // shorthand
 583               switch (action) {
 584                 case 1:
 585                   // lowercase a single prefix letter
 586                   MOZ_ASSERT(str.Length() > 0 && irishMark < str.Length(),
 587                              "bad irishMark!");
 588                   str.SetCharAt(ToLowerCase(str[irishMark]), irishMark);
 589                   irishMark = uint32_t(-1);
 590                   irishMarkSrc = uint32_t(-1);
 591                   break;
 592                 case 2:
 593                   // lowercase two prefix letters (immediately before current
 594                   // pos)
 595                   MOZ_ASSERT(str.Length() >= 2 && irishMark == str.Length() - 2,
 596                              "bad irishMark!");
 597                   str.SetCharAt(ToLowerCase(str[irishMark]), irishMark);
 598                   str.SetCharAt(ToLowerCase(str[irishMark + 1]), irishMark + 1);
 599                   irishMark = uint32_t(-1);
 600                   irishMarkSrc = uint32_t(-1);
 601                   break;
 602                 case 3:
 603                   // lowercase one prefix letter, and delete following hyphen
 604                   // (which must be the immediately-preceding char)
 605                   MOZ_ASSERT(str.Length() >= 2 && irishMark == str.Length() - 2,
 606                              "bad irishMark!");
 607                   MOZ_ASSERT(
 608                       irishMark != uint32_t(-1) && irishMarkSrc != uint32_t(-1),
 609                       "failed to set irishMarks");
 610                   str.Replace(irishMark, 2, ToLowerCase(str[irishMark]));
 611                   aDeletedCharsArray[irishMarkSrc + 1] = true;
 612                   // Remove the trailing entries (corresponding to the deleted
 613                   // hyphen) from the auxiliary arrays.
 614                   uint32_t len = aCharsToMergeArray.Length();
 615                   MOZ_ASSERT(len >= 2);
 616                   aCharsToMergeArray.TruncateLength(len - 1);
 617                   if (auxiliaryOutputArrays) {
 618                     MOZ_ASSERT(aStyleArray->Length() == len);
 619                     MOZ_ASSERT(aCanBreakBeforeArray->Length() == len);
 620                     aStyleArray->TruncateLength(len - 1);
 621                     aCanBreakBeforeArray->TruncateLength(len - 1);
 622                     inhibitBreakBefore = true;
 623                   }
 624                   mergeNeeded = true;
 625                   irishMark = uint32_t(-1);
 626                   irishMarkSrc = uint32_t(-1);
 627                   break;
 628               }
 629               // ch has been set to the uppercase for current char;
 630               // No need to check for SpecialUpper here as none of the
 631               // characters that could trigger an Irish casing action have
 632               // special mappings.
 633               break;
 634             }
 635             // If we didn't have any special action to perform, fall through
 636             // to check for special uppercase (ß)
 637           }
 638
 639           // Updated mapping for German eszett, not currently reflected in the
 640           // Unicode data files. This is behind a pref, as it may not work well
 641           // with many (esp. older) fonts.
 642           if (ch == 0x00DF &&
 643               StaticPrefs::
 644                   layout_css_text_transform_uppercase_eszett_enabled()) {
 645             ch = 0x1E9E;
 646             break;
 647           }
 648
 649           mcm = mozilla::unicode::SpecialUpper(ch);
 650           if (mcm) {
 651             int j = 0;
 652             while (j < 2 && mcm->mMappedChars[j + 1]) {
 653               aConvertedString.Append(mcm->mMappedChars[j]);
 654               ++extraChars;
 655               ++j;
 656             }
 657             ch = mcm->mMappedChars[j];
 658             break;
 659           }
 660
 661           // Bug 1476304: we exclude Georgian letters U+10D0..10FF because of
 662           // lack of widespread font support for the corresponding Mtavruli
 663           // characters at this time (July 2018).
 664           // This condition is to be removed once the major platforms ship with
 665           // fonts that support U+1C90..1CBF.
 666           if (ch < 0x10D0 || ch > 0x10FF) {
 667             ch = ToUpperCase(ch);
 668           }
 669           break;
 670
 671         case StyleTextTransformCase::Capitalize:
 672           if (aTextRun) {
 673             if (capitalizeDutchIJ && ch == 'j') {
 674               ch = 'J';
 675               capitalizeDutchIJ = false;
 676               break;
 677             }
 678             capitalizeDutchIJ = false;
 679             if (aOffsetInTextRun < aTextRun->mCapitalize.Length() &&
 680                 aTextRun->mCapitalize[aOffsetInTextRun]) {
 681               if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') {
 682                 ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
 683                 break;
 684               }
 685               if (languageSpecificCasing == eLSCB_Dutch && ch == 'i') {
 686                 ch = 'I';
 687                 capitalizeDutchIJ = true;
 688                 break;
 689               }
 690               if (languageSpecificCasing == eLSCB_Lithuanian) {
 691                 /*
 692                  * # Remove DOT ABOVE after "i" with upper or titlecase
 693                  *
 694                  * 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
 695                  */
 696                 if (ch == 'i' || ch == 'j' || ch == 0x012F) {
 697                   seenSoftDotted = true;
 698                   ch = ToTitleCase(ch);
 699                   break;
 700                 }
 701                 if (seenSoftDotted) {
 702                   seenSoftDotted = false;
 703                   if (ch == 0x0307) {
 704                     ch = uint32_t(-1);
 705                     break;
 706                   }
 707                 }
 708               }
 709
 710               mcm = mozilla::unicode::SpecialTitle(ch);
 711               if (mcm) {
 712                 int j = 0;
 713                 while (j < 2 && mcm->mMappedChars[j + 1]) {
 714                   aConvertedString.Append(mcm->mMappedChars[j]);
 715                   ++extraChars;
 716                   ++j;
 717                 }
 718                 ch = mcm->mMappedChars[j];
 719                 break;
 720               }
 721
 722               ch = ToTitleCase(ch);
 723             }
 724           }
 725           break;
 726
 727         case StyleTextTransformCase::MathAuto:
 728           // text-transform: math-auto is used for automatic italicization of
 729           // single-char <mi> elements. However, some legacy cases (italic style
 730           // fallback and <mi> with leading/trailing whitespace) are still
 731           // handled in MathMLTextRunFactory.
 732           if (length == 1) {
 733             uint32_t ch2 =
 734                 MathMLTextRunFactory::MathVariant(ch, StyleMathVariant::Italic);
 735             if (StaticPrefs::mathml_mathvariant_styling_fallback_disabled()) {
 736               ch = ch2;
 737             } else if (ch2 != ch) {
 738               // Bug 930504. Some platforms do not have fonts for Mathematical
 739               // Alphanumeric Symbols. Hence we only perform the transform if a
 740               // character is actually available.
 741               FontMatchType matchType;
 742               RefPtr<gfxFont> mathFont =
 743                   aTextRun->GetFontGroup()->FindFontForChar(
 744                       ch2, 0, 0, intl::Script::COMMON, nullptr, &matchType);
 745               if (mathFont) {
 746                 ch = ch2;
 747               }
 748             }
 749           }
 750           break;
 751
 752         default:
 753           MOZ_ASSERT_UNREACHABLE("all cases should be handled");
 754           break;
 755       }
 756
 757       if (!aCaseTransformsOnly) {
 758         if (!forceNonFullWidth &&
 759             (style.other_ & StyleTextTransformOther::FULL_WIDTH)) {
 760           ch = mozilla::unicode::GetFullWidth(ch);
 761         }
 762
 763         if (style.other_ & StyleTextTransformOther::FULL_SIZE_KANA) {
 764           // clang-format off
 765           static const uint32_t kSmallKanas[] = {
 766               // ぁ   ぃ      ぅ      ぇ      ぉ      っ      ゃ      ゅ      ょ
 767               0x3041, 0x3043, 0x3045, 0x3047, 0x3049, 0x3063, 0x3083, 0x3085, 0x3087,
 768               // ゎ   ゕ      ゖ
 769               0x308E, 0x3095, 0x3096,
 770               // ァ   ィ      ゥ      ェ      ォ      ッ      ャ      ュ      ョ
 771               0x30A1, 0x30A3, 0x30A5, 0x30A7, 0x30A9, 0x30C3, 0x30E3, 0x30E5, 0x30E7,
 772               // ヮ   ヵ      ヶ      ㇰ      ㇱ      ㇲ      ㇳ      ㇴ      ㇵ
 773               0x30EE, 0x30F5, 0x30F6, 0x31F0, 0x31F1, 0x31F2, 0x31F3, 0x31F4, 0x31F5,
 774               // ㇶ   ㇷ      ㇸ      ㇹ      ㇺ      ㇻ      ㇼ      ㇽ      ㇾ
 775               0x31F6, 0x31F7, 0x31F8, 0x31F9, 0x31FA, 0x31FB, 0x31FC, 0x31FD, 0x31FE,
 776               // ㇿ
 777               0x31FF,
 778               // ｧ    ｨ       ｩ       ｪ       ｫ       ｬ       ｭ       ｮ       ｯ
 779               0xFF67, 0xFF68, 0xFF69, 0xFF6A, 0xFF6B, 0xFF6C, 0xFF6D, 0xFF6E, 0xFF6F,
 780               // 𛄲    𛅐       𛅑       𛅒       𛅕       𛅤       𛅥       𛅦
 781               0x1B132, 0x1B150, 0x1B151, 0x1B152, 0x1B155, 0x1B164, 0x1B165, 0x1B166,
 782               // 𛅧
 783               0x1B167};
 784           static const uint16_t kFullSizeKanas[] = {
 785               // あ   い      う      え      お      つ      や      ゆ      よ
 786               0x3042, 0x3044, 0x3046, 0x3048, 0x304A, 0x3064, 0x3084, 0x3086, 0x3088,
 787               // わ   か      け
 788               0x308F, 0x304B, 0x3051,
 789               // ア   イ      ウ      エ      オ      ツ      ヤ      ユ      ヨ
 790               0x30A2, 0x30A4, 0x30A6, 0x30A8, 0x30AA, 0x30C4, 0x30E4, 0x30E6, 0x30E8,
 791               // ワ   カ      ケ      ク      シ      ス      ト      ヌ      ハ
 792               0x30EF, 0x30AB, 0x30B1, 0x30AF, 0x30B7, 0x30B9, 0x30C8, 0x30CC, 0x30CF,
 793               // ヒ   フ      ヘ      ホ      ム      ラ      リ      ル      レ
 794               0x30D2, 0x30D5, 0x30D8, 0x30DB, 0x30E0, 0x30E9, 0x30EA, 0x30EB, 0x30EC,
 795               // ロ
 796               0x30ED,
 797               // ｱ    ｲ       ｳ       ｴ       ｵ       ﾔ       ﾕ       ﾖ        ﾂ
 798               0xFF71, 0xFF72, 0xFF73, 0xFF74, 0xFF75, 0xFF94, 0xFF95, 0xFF96, 0xFF82,
 799               // こ   ゐ       ゑ      を      コ       ヰ      ヱ      ヲ       ン
 800               0x3053, 0x3090, 0x3091, 0x3092, 0x30B3, 0x30F0, 0x30F1, 0x30F2, 0x30F3};
 801           // clang-format on
 802
 803           size_t index;
 804           const uint16_t len = MOZ_ARRAY_LENGTH(kSmallKanas);
 805           if (mozilla::BinarySearch(kSmallKanas, 0, len, ch, &index)) {
 806             ch = kFullSizeKanas[index];
 807           }
 808         }
 809       }
 810
 811       if (forceNonFullWidth) {
 812         ch = mozilla::unicode::GetFullWidthInverse(ch);
 813       }
 814     }
 815
 816     if (ch == uint32_t(-1)) {
 817       aDeletedCharsArray.AppendElement(true);
 818       mergeNeeded = true;
 819     } else {
 820       aDeletedCharsArray.AppendElement(false);
 821       aCharsToMergeArray.AppendElement(false);
 822       if (auxiliaryOutputArrays) {
 823         aStyleArray->AppendElement(charStyle);
 824         aCanBreakBeforeArray->AppendElement(
 825             inhibitBreakBefore
 826                 ? gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE
 827                 : aTextRun->CanBreakBefore(aOffsetInTextRun));
 828       }
 829
 830       if (IS_IN_BMP(ch)) {
 831         aConvertedString.Append(maskPassword ? mask : ch);
 832       } else {
 833         if (maskPassword) {
 834           aConvertedString.Append(mask);
 835           // TODO: We should show a password mask for a surrogate pair later.
 836           aConvertedString.Append(mask);
 837         } else {
 838           aConvertedString.Append(H_SURROGATE(ch));
 839           aConvertedString.Append(L_SURROGATE(ch));
 840         }
 841         ++extraChars;
 842       }
 843       if (!IS_IN_BMP(originalCh)) {
 844         // Skip the trailing surrogate.
 845         ++aOffsetInTextRun;
 846         ++i;
 847         aDeletedCharsArray.AppendElement(true);
 848       }
 849
 850       while (extraChars-- > 0) {
 851         mergeNeeded = true;
 852         aCharsToMergeArray.AppendElement(true);
 853         if (auxiliaryOutputArrays) {
 854           aStyleArray->AppendElement(charStyle);
 855           aCanBreakBeforeArray->AppendElement(
 856               gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE);
 857         }
 858       }
 859     }
 860   }
 861
 862   // These output arrays, if present, must always have matching lengths:
 863   if (auxiliaryOutputArrays) {
 864     DebugOnly<uint32_t> len = aCharsToMergeArray.Length();
 865     MOZ_ASSERT(aStyleArray->Length() == len);
 866     MOZ_ASSERT(aCanBreakBeforeArray->Length() == len);
 867   }
 868
 869   return mergeNeeded;
 870 }
 871
 872 void nsCaseTransformTextRunFactory::RebuildTextRun(
 873     nsTransformedTextRun* aTextRun, DrawTarget* aRefDrawTarget,
 874     gfxMissingFontRecorder* aMFR) {
 875   nsAutoString convertedString;
 876   AutoTArray<bool, 50> charsToMergeArray;
 877   AutoTArray<bool, 50> deletedCharsArray;
 878   AutoTArray<uint8_t, 50> canBreakBeforeArray;
 879   AutoTArray<RefPtr<nsTransformedCharStyle>, 50> styleArray;
 880
 881   auto globalTransform =
 882       mAllUppercase
 883           ? Some(StyleTextTransform{StyleTextTransformCase::Uppercase, {}})
 884           : Nothing();
 885   bool mergeNeeded = TransformString(
 886       aTextRun->mString, convertedString, globalTransform, mMaskChar,
 887       /* aCaseTransformsOnly = */ false, nullptr, charsToMergeArray,
 888       deletedCharsArray, aTextRun, 0, &canBreakBeforeArray, &styleArray);
 889
 890   gfx::ShapedTextFlags flags;
 891   gfxTextRunFactory::Parameters innerParams =
 892       GetParametersForInner(aTextRun, &flags, aRefDrawTarget);
 893   gfxFontGroup* fontGroup = aTextRun->GetFontGroup();
 894
 895   RefPtr<nsTransformedTextRun> transformedChild;
 896   RefPtr<gfxTextRun> cachedChild;
 897   gfxTextRun* child;
 898
 899   if (mInnerTransformingTextRunFactory) {
 900     transformedChild = mInnerTransformingTextRunFactory->MakeTextRun(
 901         convertedString.BeginReading(), convertedString.Length(), &innerParams,
 902         fontGroup, flags, nsTextFrameUtils::Flags(), std::move(styleArray),
 903         false);
 904     child = transformedChild.get();
 905   } else {
 906     cachedChild = fontGroup->MakeTextRun(
 907         convertedString.BeginReading(), convertedString.Length(), &innerParams,
 908         flags, nsTextFrameUtils::Flags(), aMFR);
 909     child = cachedChild.get();
 910   }
 911   if (!child) {
 912     return;
 913   }
 914   // Copy potential linebreaks into child so they're preserved
 915   // (and also child will be shaped appropriately)
 916   NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(),
 917                "Dropped characters or break-before values somewhere!");
 918   gfxTextRun::Range range(0, uint32_t(canBreakBeforeArray.Length()));
 919   child->SetPotentialLineBreaks(range, canBreakBeforeArray.Elements());
 920   if (transformedChild) {
 921     transformedChild->FinishSettingProperties(aRefDrawTarget, aMFR);
 922   }
 923
 924   aTextRun->ResetGlyphRuns();
 925   if (mergeNeeded) {
 926     // Now merge multiple characters into one multi-glyph character as required
 927     // and deal with skipping deleted accent chars
 928     NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(),
 929                  "source length mismatch");
 930     NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(),
 931                  "destination length mismatch");
 932     MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(),
 933                              deletedCharsArray.Elements());
 934   } else {
 935     // No merging to do, so just copy; this produces a more optimized textrun.
 936     // We can't steal the data because the child may be cached and stealing
 937     // the data would break the cache.
 938     aTextRun->CopyGlyphDataFrom(child, gfxTextRun::Range(child), 0);
 939   }
 940 }