layout/generic/nsTextRunTransformations.cpp

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 #include "nsTextRunTransformations.h"
   8
   9 #include <utility>
  10
  11 #include "GreekCasing.h"
  12 #include "IrishCasing.h"
  13 #include "mozilla/ComputedStyleInlines.h"
  14 #include "mozilla/MemoryReporting.h"
  15 #include "mozilla/TextEditor.h"
  16 #include "mozilla/gfx/2D.h"
  17 #include "nsGkAtoms.h"
  18 #include "nsSpecialCasingData.h"
  19 #include "nsStyleConsts.h"
  20 #include "nsTextFrameUtils.h"
  21 #include "nsUnicharUtils.h"
  22 #include "nsUnicodeProperties.h"
  23
  24 using namespace mozilla;
  25 using namespace mozilla::gfx;
  26
  27 // Unicode characters needing special casing treatment in tr/az languages
  28 #define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE 0x0130
  29 #define LATIN_SMALL_LETTER_DOTLESS_I 0x0131
  30
  31 // Greek sigma needs custom handling for the lowercase transform; for details
  32 // see bug 740120.
  33 #define GREEK_CAPITAL_LETTER_SIGMA 0x03A3
  34 #define GREEK_SMALL_LETTER_FINAL_SIGMA 0x03C2
  35 #define GREEK_SMALL_LETTER_SIGMA 0x03C3
  36
  37 already_AddRefed<nsTransformedTextRun> nsTransformedTextRun::Create(
  38     const gfxTextRunFactory::Parameters* aParams,
  39     nsTransformingTextRunFactory* aFactory, gfxFontGroup* aFontGroup,
  40     const char16_t* aString, uint32_t aLength,
  41     const gfx::ShapedTextFlags aFlags, const nsTextFrameUtils::Flags aFlags2,
  42     nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
  43   NS_ASSERTION(!(aFlags & gfx::ShapedTextFlags::TEXT_IS_8BIT),
  44                "didn't expect text to be marked as 8-bit here");
  45
  46   void* storage =
  47       AllocateStorageForTextRun(sizeof(nsTransformedTextRun), aLength);
  48   if (!storage) {
  49     return nullptr;
  50   }
  51
  52   RefPtr<nsTransformedTextRun> result = new (storage)
  53       nsTransformedTextRun(aParams, aFactory, aFontGroup, aString, aLength,
  54                            aFlags, aFlags2, std::move(aStyles), aOwnsFactory);
  55   return result.forget();
  56 }
  57
  58 void nsTransformedTextRun::SetCapitalization(uint32_t aStart, uint32_t aLength,
  59                                              bool* aCapitalization) {
  60   if (mCapitalize.IsEmpty()) {
  61     // XXX(Bug 1631371) Check if this should use a fallible operation as it
  62     // pretended earlier.
  63     mCapitalize.AppendElements(GetLength());
  64     memset(mCapitalize.Elements(), 0, GetLength() * sizeof(bool));
  65   }
  66   memcpy(mCapitalize.Elements() + aStart, aCapitalization,
  67          aLength * sizeof(bool));
  68   mNeedsRebuild = true;
  69 }
  70
  71 bool nsTransformedTextRun::SetPotentialLineBreaks(Range aRange,
  72                                                   const uint8_t* aBreakBefore) {
  73   bool changed = gfxTextRun::SetPotentialLineBreaks(aRange, aBreakBefore);
  74   if (changed) {
  75     mNeedsRebuild = true;
  76   }
  77   return changed;
  78 }
  79
  80 size_t nsTransformedTextRun::SizeOfExcludingThis(
  81     mozilla::MallocSizeOf aMallocSizeOf) {
  82   size_t total = gfxTextRun::SizeOfExcludingThis(aMallocSizeOf);
  83   total += mStyles.ShallowSizeOfExcludingThis(aMallocSizeOf);
  84   total += mCapitalize.ShallowSizeOfExcludingThis(aMallocSizeOf);
  85   if (mOwnsFactory) {
  86     total += aMallocSizeOf(mFactory);
  87   }
  88   return total;
  89 }
  90
  91 size_t nsTransformedTextRun::SizeOfIncludingThis(
  92     mozilla::MallocSizeOf aMallocSizeOf) {
  93   return aMallocSizeOf(this) + SizeOfExcludingThis(aMallocSizeOf);
  94 }
  95
  96 already_AddRefed<nsTransformedTextRun>
  97 nsTransformingTextRunFactory::MakeTextRun(
  98     const char16_t* aString, uint32_t aLength,
  99     const gfxTextRunFactory::Parameters* aParams, gfxFontGroup* aFontGroup,
 100     gfx::ShapedTextFlags aFlags, nsTextFrameUtils::Flags aFlags2,
 101     nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
 102   return nsTransformedTextRun::Create(aParams, this, aFontGroup, aString,
 103                                       aLength, aFlags, aFlags2,
 104                                       std::move(aStyles), aOwnsFactory);
 105 }
 106
 107 already_AddRefed<nsTransformedTextRun>
 108 nsTransformingTextRunFactory::MakeTextRun(
 109     const uint8_t* aString, uint32_t aLength,
 110     const gfxTextRunFactory::Parameters* aParams, gfxFontGroup* aFontGroup,
 111     gfx::ShapedTextFlags aFlags, nsTextFrameUtils::Flags aFlags2,
 112     nsTArray<RefPtr<nsTransformedCharStyle>>&& aStyles, bool aOwnsFactory) {
 113   // We'll only have a Unicode code path to minimize the amount of code needed
 114   // for these rarely used features
 115   NS_ConvertASCIItoUTF16 unicodeString(reinterpret_cast<const char*>(aString),
 116                                        aLength);
 117   return MakeTextRun(unicodeString.get(), aLength, aParams, aFontGroup,
 118                      aFlags & ~gfx::ShapedTextFlags::TEXT_IS_8BIT, aFlags2,
 119                      std::move(aStyles), aOwnsFactory);
 120 }
 121
 122 void MergeCharactersInTextRun(gfxTextRun* aDest, gfxTextRun* aSrc,
 123                               const bool* aCharsToMerge,
 124                               const bool* aDeletedChars) {
 125   MOZ_ASSERT(!aDest->TrailingGlyphRun(), "unexpected glyphRuns in aDest!");
 126   gfxTextRun::GlyphRunIterator iter(aSrc, gfxTextRun::Range(aSrc));
 127   uint32_t offset = 0;
 128   AutoTArray<gfxTextRun::DetailedGlyph, 2> glyphs;
 129   const gfxTextRun::CompressedGlyph continuationGlyph =
 130       gfxTextRun::CompressedGlyph::MakeComplex(false, false);
 131   const gfxTextRun::CompressedGlyph* srcGlyphs = aSrc->GetCharacterGlyphs();
 132   gfxTextRun::CompressedGlyph* destGlyphs = aDest->GetCharacterGlyphs();
 133   while (iter.NextRun()) {
 134     const gfxTextRun::GlyphRun* run = iter.GetGlyphRun();
 135     aDest->AddGlyphRun(run->mFont, run->mMatchType, offset, false,
 136                        run->mOrientation, run->mIsCJK);
 137
 138     bool anyMissing = false;
 139     uint32_t mergeRunStart = iter.GetStringStart();
 140     // Initialize to a copy of the first source glyph in the merge run.
 141     gfxTextRun::CompressedGlyph mergedGlyph = srcGlyphs[mergeRunStart];
 142     uint32_t stringEnd = iter.GetStringEnd();
 143     for (uint32_t k = iter.GetStringStart(); k < stringEnd; ++k) {
 144       const gfxTextRun::CompressedGlyph g = srcGlyphs[k];
 145       if (g.IsSimpleGlyph()) {
 146         if (!anyMissing) {
 147           gfxTextRun::DetailedGlyph details;
 148           details.mGlyphID = g.GetSimpleGlyph();
 149           details.mAdvance = g.GetSimpleAdvance();
 150           glyphs.AppendElement(details);
 151         }
 152       } else {
 153         if (g.IsMissing()) {
 154           anyMissing = true;
 155           glyphs.Clear();
 156         }
 157         if (g.GetGlyphCount() > 0) {
 158           glyphs.AppendElements(aSrc->GetDetailedGlyphs(k), g.GetGlyphCount());
 159         }
 160       }
 161
 162       if (k + 1 < iter.GetStringEnd() && aCharsToMerge[k + 1]) {
 163         // next char is supposed to merge with current, so loop without
 164         // writing current merged glyph to the destination
 165         continue;
 166       }
 167
 168       // If the start of the merge run is actually a character that should
 169       // have been merged with the previous character (this can happen
 170       // if there's a font change in the middle of a case-mapped character,
 171       // that decomposed into a sequence of base+diacritics, for example),
 172       // just discard the entire merge run. See comment at start of this
 173       // function.
 174       NS_WARNING_ASSERTION(
 175           !aCharsToMerge[mergeRunStart],
 176           "unable to merge across a glyph run boundary, glyph(s) discarded");
 177       if (!aCharsToMerge[mergeRunStart]) {
 178         // Determine if we can just copy the existing simple glyph record.
 179         if (mergedGlyph.IsSimpleGlyph() && glyphs.Length() == 1) {
 180           destGlyphs[offset] = mergedGlyph;
 181         } else {
 182           // Otherwise set up complex glyph record and store detailed glyphs.
 183           mergedGlyph.SetComplex(mergedGlyph.IsClusterStart(),
 184                                  mergedGlyph.IsLigatureGroupStart());
 185           destGlyphs[offset] = mergedGlyph;
 186           aDest->SetDetailedGlyphs(offset, glyphs.Length(), glyphs.Elements());
 187           if (anyMissing) {
 188             destGlyphs[offset].SetMissing();
 189           }
 190         }
 191         offset++;
 192
 193         while (offset < aDest->GetLength() && aDeletedChars[offset]) {
 194           destGlyphs[offset++] = continuationGlyph;
 195         }
 196       }
 197
 198       glyphs.Clear();
 199       anyMissing = false;
 200       mergeRunStart = k + 1;
 201       if (mergeRunStart < stringEnd) {
 202         mergedGlyph = srcGlyphs[mergeRunStart];
 203       }
 204     }
 205     NS_ASSERTION(glyphs.Length() == 0,
 206                  "Leftover glyphs, don't request merging of the last character "
 207                  "with its next!");
 208   }
 209   NS_ASSERTION(offset == aDest->GetLength(), "Bad offset calculations");
 210 }
 211
 212 gfxTextRunFactory::Parameters GetParametersForInner(
 213     nsTransformedTextRun* aTextRun, gfx::ShapedTextFlags* aFlags,
 214     DrawTarget* aRefDrawTarget) {
 215   gfxTextRunFactory::Parameters params = {
 216       aRefDrawTarget, nullptr, nullptr,
 217       nullptr,        0,       aTextRun->GetAppUnitsPerDevUnit()};
 218   *aFlags = aTextRun->GetFlags();
 219   return params;
 220 }
 221
 222 // Some languages have special casing conventions that differ from the
 223 // default Unicode mappings.
 224 // The enum values here are named for well-known exemplar languages that
 225 // exhibit the behavior in question; multiple lang tags may map to the
 226 // same setting here, if the behavior is shared by other languages.
 227 enum LanguageSpecificCasingBehavior {
 228   eLSCB_None,       // default non-lang-specific behavior
 229   eLSCB_Dutch,      // treat "ij" digraph as a unit for capitalization
 230   eLSCB_Greek,      // strip accent when uppercasing Greek vowels
 231   eLSCB_Irish,      // keep prefix letters as lowercase when uppercasing Irish
 232   eLSCB_Turkish,    // preserve dotted/dotless-i distinction in uppercase
 233   eLSCB_Lithuanian  // retain dot on lowercase i/j when an accent is present
 234 };
 235
 236 static LanguageSpecificCasingBehavior GetCasingFor(const nsAtom* aLang) {
 237   if (!aLang) {
 238     return eLSCB_None;
 239   }
 240   if (aLang == nsGkAtoms::tr || aLang == nsGkAtoms::az ||
 241       aLang == nsGkAtoms::ba || aLang == nsGkAtoms::crh ||
 242       aLang == nsGkAtoms::tt) {
 243     return eLSCB_Turkish;
 244   }
 245   if (aLang == nsGkAtoms::nl) {
 246     return eLSCB_Dutch;
 247   }
 248   if (aLang == nsGkAtoms::el) {
 249     return eLSCB_Greek;
 250   }
 251   if (aLang == nsGkAtoms::ga) {
 252     return eLSCB_Irish;
 253   }
 254   if (aLang == nsGkAtoms::lt_) {
 255     return eLSCB_Lithuanian;
 256   }
 257
 258   // Is there a region subtag we should ignore?
 259   nsAtomString langStr(const_cast<nsAtom*>(aLang));
 260   int index = langStr.FindChar('-');
 261   if (index > 0) {
 262     langStr.Truncate(index);
 263     RefPtr<nsAtom> truncatedLang = NS_Atomize(langStr);
 264     return GetCasingFor(truncatedLang);
 265   }
 266
 267   return eLSCB_None;
 268 }
 269
 270 bool nsCaseTransformTextRunFactory::TransformString(
 271     const nsAString& aString, nsString& aConvertedString,
 272     const Maybe<StyleTextTransform>& aGlobalTransform, bool aCaseTransformsOnly,
 273     const nsAtom* aLanguage, nsTArray<bool>& aCharsToMergeArray,
 274     nsTArray<bool>& aDeletedCharsArray, const nsTransformedTextRun* aTextRun,
 275     uint32_t aOffsetInTextRun, nsTArray<uint8_t>* aCanBreakBeforeArray,
 276     nsTArray<RefPtr<nsTransformedCharStyle>>* aStyleArray) {
 277   bool auxiliaryOutputArrays = aCanBreakBeforeArray && aStyleArray;
 278   MOZ_ASSERT(!auxiliaryOutputArrays || aTextRun,
 279              "text run must be provided to use aux output arrays");
 280
 281   uint32_t length = aString.Length();
 282   const char16_t* str = aString.BeginReading();
 283   const char16_t kPasswordMask = TextEditor::PasswordMask();
 284
 285   bool mergeNeeded = false;
 286
 287   bool capitalizeDutchIJ = false;
 288   bool prevIsLetter = false;
 289   bool ntPrefix = false;  // true immediately after a word-initial 'n' or 't'
 290                           // when doing Irish lowercasing
 291   bool seenSoftDotted = false;  // true immediately after an I or J that is
 292                                 // converted to lowercase in Lithuanian mode
 293   uint32_t sigmaIndex = uint32_t(-1);
 294   nsUGenCategory cat;
 295
 296   StyleTextTransform style =
 297       aGlobalTransform.valueOr(StyleTextTransform::None());
 298   bool forceNonFullWidth = false;
 299   const nsAtom* lang = aLanguage;
 300
 301   LanguageSpecificCasingBehavior languageSpecificCasing = GetCasingFor(lang);
 302   mozilla::GreekCasing::State greekState;
 303   mozilla::IrishCasing::State irishState;
 304   uint32_t irishMark = uint32_t(-1);  // location of possible prefix letter(s)
 305                                       // in the output string
 306   uint32_t irishMarkSrc = uint32_t(-1);  // corresponding location in source
 307                                          // string (may differ from output due
 308                                          // to expansions like eszet -> 'SS')
 309   uint32_t greekMark = uint32_t(-1);  // location of uppercase ETA that may need
 310                                       // tonos added (if it is disjunctive eta)
 311   const char16_t kGreekUpperEta = 0x0397;
 312
 313   for (uint32_t i = 0; i < length; ++i, ++aOffsetInTextRun) {
 314     uint32_t ch = str[i];
 315
 316     RefPtr<nsTransformedCharStyle> charStyle;
 317     if (aTextRun) {
 318       charStyle = aTextRun->mStyles[aOffsetInTextRun];
 319       style = aGlobalTransform.valueOr(charStyle->mTextTransform);
 320       forceNonFullWidth = charStyle->mForceNonFullWidth;
 321
 322       nsAtom* newLang =
 323           charStyle->mExplicitLanguage ? charStyle->mLanguage.get() : nullptr;
 324       if (lang != newLang) {
 325         lang = newLang;
 326         languageSpecificCasing = GetCasingFor(lang);
 327         greekState.Reset();
 328         irishState.Reset();
 329         irishMark = uint32_t(-1);
 330         irishMarkSrc = uint32_t(-1);
 331         greekMark = uint32_t(-1);
 332       }
 333     }
 334
 335     bool maskPassword = charStyle && charStyle->mMaskPassword;
 336     int extraChars = 0;
 337     const mozilla::unicode::MultiCharMapping* mcm;
 338     bool inhibitBreakBefore = false;  // have we just deleted preceding hyphen?
 339
 340     if (i < length - 1 && NS_IS_SURROGATE_PAIR(ch, str[i + 1])) {
 341       ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
 342     }
 343
 344     // Skip case transform if we're masking current character.
 345     if (!maskPassword) {
 346       switch (style.case_) {
 347         case StyleTextTransformCase::None:
 348           break;
 349
 350         case StyleTextTransformCase::Lowercase:
 351           if (languageSpecificCasing == eLSCB_Turkish) {
 352             if (ch == 'I') {
 353               ch = LATIN_SMALL_LETTER_DOTLESS_I;
 354               prevIsLetter = true;
 355               sigmaIndex = uint32_t(-1);
 356               break;
 357             }
 358             if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
 359               ch = 'i';
 360               prevIsLetter = true;
 361               sigmaIndex = uint32_t(-1);
 362               break;
 363             }
 364           }
 365
 366           if (languageSpecificCasing == eLSCB_Lithuanian) {
 367             // clang-format off
 368             /* From SpecialCasing.txt:
 369              * # Introduce an explicit dot above when lowercasing capital I's and J's
 370              * # whenever there are more accents above.
 371              * # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
 372              *
 373              * 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
 374              * 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
 375              * 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
 376              * 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
 377              * 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
 378              * 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
 379              */
 380             // clang-format on
 381             if (ch == 'I' || ch == 'J' || ch == 0x012E) {
 382               ch = ToLowerCase(ch);
 383               prevIsLetter = true;
 384               seenSoftDotted = true;
 385               sigmaIndex = uint32_t(-1);
 386               break;
 387             }
 388             if (ch == 0x00CC) {
 389               aConvertedString.Append('i');
 390               aConvertedString.Append(0x0307);
 391               extraChars += 2;
 392               ch = 0x0300;
 393               prevIsLetter = true;
 394               seenSoftDotted = false;
 395               sigmaIndex = uint32_t(-1);
 396               break;
 397             }
 398             if (ch == 0x00CD) {
 399               aConvertedString.Append('i');
 400               aConvertedString.Append(0x0307);
 401               extraChars += 2;
 402               ch = 0x0301;
 403               prevIsLetter = true;
 404               seenSoftDotted = false;
 405               sigmaIndex = uint32_t(-1);
 406               break;
 407             }
 408             if (ch == 0x0128) {
 409               aConvertedString.Append('i');
 410               aConvertedString.Append(0x0307);
 411               extraChars += 2;
 412               ch = 0x0303;
 413               prevIsLetter = true;
 414               seenSoftDotted = false;
 415               sigmaIndex = uint32_t(-1);
 416               break;
 417             }
 418           }
 419
 420           cat = mozilla::unicode::GetGenCategory(ch);
 421
 422           if (languageSpecificCasing == eLSCB_Irish &&
 423               cat == nsUGenCategory::kLetter) {
 424             // See bug 1018805 for Irish lowercasing requirements
 425             if (!prevIsLetter && (ch == 'n' || ch == 't')) {
 426               ntPrefix = true;
 427             } else {
 428               if (ntPrefix && mozilla::IrishCasing::IsUpperVowel(ch)) {
 429                 aConvertedString.Append('-');
 430                 ++extraChars;
 431               }
 432               ntPrefix = false;
 433             }
 434           } else {
 435             ntPrefix = false;
 436           }
 437
 438           if (seenSoftDotted && cat == nsUGenCategory::kMark) {
 439             // The seenSoftDotted flag will only be set in Lithuanian mode.
 440             if (ch == 0x0300 || ch == 0x0301 || ch == 0x0303) {
 441               aConvertedString.Append(0x0307);
 442               ++extraChars;
 443             }
 444           }
 445           seenSoftDotted = false;
 446
 447           // Special lowercasing behavior for Greek Sigma: note that this is
 448           // listed as context-sensitive in Unicode's SpecialCasing.txt, but is
 449           // *not* a language-specific mapping; it applies regardless of the
 450           // language of the element.
 451           //
 452           // The lowercase mapping for CAPITAL SIGMA should be to SMALL SIGMA
 453           // (i.e. the non-final form) whenever there is a following letter, or
 454           // when the CAPITAL SIGMA occurs in isolation (neither preceded nor
 455           // followed by a LETTER); and to FINAL SIGMA when it is preceded by
 456           // another letter but not followed by one.
 457           //
 458           // To implement the context-sensitive nature of this mapping, we keep
 459           // track of whether the previous character was a letter. If not,
 460           // CAPITAL SIGMA will map directly to SMALL SIGMA. If the previous
 461           // character was a letter, CAPITAL SIGMA maps to FINAL SIGMA and we
 462           // record the position in the converted string; if we then encounter
 463           // another letter, that FINAL SIGMA is replaced with a standard
 464           // SMALL SIGMA.
 465
 466           // If sigmaIndex is not -1, it marks where we have provisionally
 467           // mapped a CAPITAL SIGMA to FINAL SIGMA; if we now find another
 468           // letter, we need to change it to SMALL SIGMA.
 469           if (sigmaIndex != uint32_t(-1)) {
 470             if (cat == nsUGenCategory::kLetter) {
 471               aConvertedString.SetCharAt(GREEK_SMALL_LETTER_SIGMA, sigmaIndex);
 472             }
 473           }
 474
 475           if (ch == GREEK_CAPITAL_LETTER_SIGMA) {
 476             // If preceding char was a letter, map to FINAL instead of SMALL,
 477             // and note where it occurred by setting sigmaIndex; we'll change
 478             // it to standard SMALL SIGMA later if another letter follows
 479             if (prevIsLetter) {
 480               ch = GREEK_SMALL_LETTER_FINAL_SIGMA;
 481               sigmaIndex = aConvertedString.Length();
 482             } else {
 483               // CAPITAL SIGMA not preceded by a letter is unconditionally
 484               // mapped to SMALL SIGMA
 485               ch = GREEK_SMALL_LETTER_SIGMA;
 486               sigmaIndex = uint32_t(-1);
 487             }
 488             prevIsLetter = true;
 489             break;
 490           }
 491
 492           // ignore diacritics for the purpose of contextual sigma mapping;
 493           // otherwise, reset prevIsLetter appropriately and clear the
 494           // sigmaIndex marker
 495           if (cat != nsUGenCategory::kMark) {
 496             prevIsLetter = (cat == nsUGenCategory::kLetter);
 497             sigmaIndex = uint32_t(-1);
 498           }
 499
 500           mcm = mozilla::unicode::SpecialLower(ch);
 501           if (mcm) {
 502             int j = 0;
 503             while (j < 2 && mcm->mMappedChars[j + 1]) {
 504               aConvertedString.Append(mcm->mMappedChars[j]);
 505               ++extraChars;
 506               ++j;
 507             }
 508             ch = mcm->mMappedChars[j];
 509             break;
 510           }
 511
 512           ch = ToLowerCase(ch);
 513           break;
 514
 515         case StyleTextTransformCase::Uppercase:
 516           if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') {
 517             ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
 518             break;
 519           }
 520
 521           if (languageSpecificCasing == eLSCB_Greek) {
 522             bool markEta;
 523             bool updateEta;
 524             ch = mozilla::GreekCasing::UpperCase(ch, greekState, markEta,
 525                                                  updateEta);
 526             if (markEta) {
 527               greekMark = aConvertedString.Length();
 528             } else if (updateEta) {
 529               // Remove the TONOS from an uppercase ETA-TONOS that turned out
 530               // not to be disjunctive-eta.
 531               MOZ_ASSERT(aConvertedString.Length() > 0 &&
 532                              greekMark < aConvertedString.Length(),
 533                          "bad greekMark!");
 534               aConvertedString.SetCharAt(kGreekUpperEta, greekMark);
 535               greekMark = uint32_t(-1);
 536             }
 537             break;
 538           }
 539
 540           if (languageSpecificCasing == eLSCB_Lithuanian) {
 541             /*
 542              * # Remove DOT ABOVE after "i" with upper or titlecase
 543              *
 544              * 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
 545              */
 546             if (ch == 'i' || ch == 'j' || ch == 0x012F) {
 547               seenSoftDotted = true;
 548               ch = ToTitleCase(ch);
 549               break;
 550             }
 551             if (seenSoftDotted) {
 552               seenSoftDotted = false;
 553               if (ch == 0x0307) {
 554                 ch = uint32_t(-1);
 555                 break;
 556               }
 557             }
 558           }
 559
 560           if (languageSpecificCasing == eLSCB_Irish) {
 561             bool mark;
 562             uint8_t action;
 563             ch = mozilla::IrishCasing::UpperCase(ch, irishState, mark, action);
 564             if (mark) {
 565               irishMark = aConvertedString.Length();
 566               irishMarkSrc = i;
 567               break;
 568             } else if (action) {
 569               nsString& str = aConvertedString;  // shorthand
 570               switch (action) {
 571                 case 1:
 572                   // lowercase a single prefix letter
 573                   NS_ASSERTION(str.Length() > 0 && irishMark < str.Length(),
 574                                "bad irishMark!");
 575                   str.SetCharAt(ToLowerCase(str[irishMark]), irishMark);
 576                   irishMark = uint32_t(-1);
 577                   irishMarkSrc = uint32_t(-1);
 578                   break;
 579                 case 2:
 580                   // lowercase two prefix letters (immediately before current
 581                   // pos)
 582                   NS_ASSERTION(
 583                       str.Length() >= 2 && irishMark == str.Length() - 2,
 584                       "bad irishMark!");
 585                   str.SetCharAt(ToLowerCase(str[irishMark]), irishMark);
 586                   str.SetCharAt(ToLowerCase(str[irishMark + 1]), irishMark + 1);
 587                   irishMark = uint32_t(-1);
 588                   irishMarkSrc = uint32_t(-1);
 589                   break;
 590                 case 3:
 591                   // lowercase one prefix letter, and delete following hyphen
 592                   // (which must be the immediately-preceding char)
 593                   NS_ASSERTION(
 594                       str.Length() >= 2 && irishMark == str.Length() - 2,
 595                       "bad irishMark!");
 596                   MOZ_ASSERT(
 597                       irishMark != uint32_t(-1) && irishMarkSrc != uint32_t(-1),
 598                       "failed to set irishMarks");
 599                   str.Replace(irishMark, 2, ToLowerCase(str[irishMark]));
 600                   aDeletedCharsArray[irishMarkSrc + 1] = true;
 601                   // Remove the trailing entries (corresponding to the deleted
 602                   // hyphen) from the auxiliary arrays.
 603                   aCharsToMergeArray.SetLength(aCharsToMergeArray.Length() - 1);
 604                   if (auxiliaryOutputArrays) {
 605                     aStyleArray->SetLength(aStyleArray->Length() - 1);
 606                     aCanBreakBeforeArray->SetLength(
 607                         aCanBreakBeforeArray->Length() - 1);
 608                     inhibitBreakBefore = true;
 609                   }
 610                   mergeNeeded = true;
 611                   irishMark = uint32_t(-1);
 612                   irishMarkSrc = uint32_t(-1);
 613                   break;
 614               }
 615               // ch has been set to the uppercase for current char;
 616               // No need to check for SpecialUpper here as none of the
 617               // characters that could trigger an Irish casing action have
 618               // special mappings.
 619               break;
 620             }
 621             // If we didn't have any special action to perform, fall through
 622             // to check for special uppercase (ß)
 623           }
 624
 625           mcm = mozilla::unicode::SpecialUpper(ch);
 626           if (mcm) {
 627             int j = 0;
 628             while (j < 2 && mcm->mMappedChars[j + 1]) {
 629               aConvertedString.Append(mcm->mMappedChars[j]);
 630               ++extraChars;
 631               ++j;
 632             }
 633             ch = mcm->mMappedChars[j];
 634             break;
 635           }
 636
 637           // Bug 1476304: we exclude Georgian letters U+10D0..10FF because of
 638           // lack of widespread font support for the corresponding Mtavruli
 639           // characters at this time (July 2018).
 640           // This condition is to be removed once the major platforms ship with
 641           // fonts that support U+1C90..1CBF.
 642           if (ch < 0x10D0 || ch > 0x10FF) {
 643             ch = ToUpperCase(ch);
 644           }
 645           break;
 646
 647         case StyleTextTransformCase::Capitalize:
 648           if (aTextRun) {
 649             if (capitalizeDutchIJ && ch == 'j') {
 650               ch = 'J';
 651               capitalizeDutchIJ = false;
 652               break;
 653             }
 654             capitalizeDutchIJ = false;
 655             if (aOffsetInTextRun < aTextRun->mCapitalize.Length() &&
 656                 aTextRun->mCapitalize[aOffsetInTextRun]) {
 657               if (languageSpecificCasing == eLSCB_Turkish && ch == 'i') {
 658                 ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
 659                 break;
 660               }
 661               if (languageSpecificCasing == eLSCB_Dutch && ch == 'i') {
 662                 ch = 'I';
 663                 capitalizeDutchIJ = true;
 664                 break;
 665               }
 666               if (languageSpecificCasing == eLSCB_Lithuanian) {
 667                 /*
 668                  * # Remove DOT ABOVE after "i" with upper or titlecase
 669                  *
 670                  * 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
 671                  */
 672                 if (ch == 'i' || ch == 'j' || ch == 0x012F) {
 673                   seenSoftDotted = true;
 674                   ch = ToTitleCase(ch);
 675                   break;
 676                 }
 677                 if (seenSoftDotted) {
 678                   seenSoftDotted = false;
 679                   if (ch == 0x0307) {
 680                     ch = uint32_t(-1);
 681                     break;
 682                   }
 683                 }
 684               }
 685
 686               mcm = mozilla::unicode::SpecialTitle(ch);
 687               if (mcm) {
 688                 int j = 0;
 689                 while (j < 2 && mcm->mMappedChars[j + 1]) {
 690                   aConvertedString.Append(mcm->mMappedChars[j]);
 691                   ++extraChars;
 692                   ++j;
 693                 }
 694                 ch = mcm->mMappedChars[j];
 695                 break;
 696               }
 697
 698               ch = ToTitleCase(ch);
 699             }
 700           }
 701           break;
 702
 703         default:
 704           MOZ_ASSERT_UNREACHABLE("all cases should be handled");
 705           break;
 706       }
 707
 708       if (!aCaseTransformsOnly) {
 709         if (!forceNonFullWidth &&
 710             (style.other_ & StyleTextTransformOther::FULL_WIDTH)) {
 711           ch = mozilla::unicode::GetFullWidth(ch);
 712         }
 713
 714         if (style.other_ & StyleTextTransformOther::FULL_SIZE_KANA) {
 715           // clang-format off
 716           static const uint16_t kSmallKanas[] = {
 717               // ぁ   ぃ      ぅ      ぇ      ぉ      っ      ゃ      ゅ      ょ
 718               0x3041, 0x3043, 0x3045, 0x3047, 0x3049, 0x3063, 0x3083, 0x3085, 0x3087,
 719               // ゎ   ゕ      ゖ
 720               0x308E, 0x3095, 0x3096,
 721               // ァ   ィ      ゥ      ェ      ォ      ッ      ャ      ュ      ョ
 722               0x30A1, 0x30A3, 0x30A5, 0x30A7, 0x30A9, 0x30C3, 0x30E3, 0x30E5, 0x30E7,
 723               // ヮ   ヵ      ヶ      ㇰ      ㇱ      ㇲ      ㇳ      ㇴ      ㇵ
 724               0x30EE, 0x30F5, 0x30F6, 0x31F0, 0x31F1, 0x31F2, 0x31F3, 0x31F4, 0x31F5,
 725               // ㇶ   ㇷ      ㇸ      ㇹ      ㇺ      ㇻ      ㇼ      ㇽ      ㇾ
 726               0x31F6, 0x31F7, 0x31F8, 0x31F9, 0x31FA, 0x31FB, 0x31FC, 0x31FD, 0x31FE,
 727               // ㇿ
 728               0x31FF,
 729               // ｧ    ｨ       ｩ       ｪ       ｫ       ｬ       ｭ       ｮ       ｯ
 730               0xFF67, 0xFF68, 0xFF69, 0xFF6A, 0xFF6B, 0xFF6C, 0xFF6D, 0xFF6E, 0xFF6F};
 731           static const uint16_t kFullSizeKanas[] = {
 732               // あ   い      う      え      お      つ      や      ゆ      よ
 733               0x3042, 0x3044, 0x3046, 0x3048, 0x304A, 0x3064, 0x3084, 0x3086, 0x3088,
 734               // わ   か      け
 735               0x308F, 0x304B, 0x3051,
 736               // ア   イ      ウ      エ      オ      ツ      ヤ      ユ      ヨ
 737               0x30A2, 0x30A4, 0x30A6, 0x30A8, 0x30AA, 0x30C4, 0x30E4, 0x30E6, 0x30E8,
 738               // ワ   カ      ケ      ク      シ      ス      ト      ヌ      ハ
 739               0x30EF, 0x30AB, 0x30B1, 0x30AF, 0x30B7, 0x30B9, 0x30C8, 0x30CC, 0x30CF,
 740               // ヒ   フ      ヘ      ホ      ム      ラ      リ      ル      レ
 741               0x30D2, 0x30D5, 0x30D8, 0x30DB, 0x30E0, 0x30E9, 0x30EA, 0x30EB, 0x30EC,
 742               // ロ
 743               0x30ED,
 744               // ｱ    ｲ       ｳ       ｴ       ｵ       ﾔ       ﾕ       ﾖ        ﾂ
 745               0xFF71, 0xFF72, 0xFF73, 0xFF74, 0xFF75, 0xFF94, 0xFF95, 0xFF96, 0xFF82};
 746           // clang-format on
 747
 748           size_t index;
 749           const uint16_t len = MOZ_ARRAY_LENGTH(kSmallKanas);
 750           if (mozilla::BinarySearch(kSmallKanas, 0, len, ch, &index)) {
 751             ch = kFullSizeKanas[index];
 752           }
 753         }
 754       }
 755
 756       if (forceNonFullWidth) {
 757         ch = mozilla::unicode::GetFullWidthInverse(ch);
 758       }
 759     }
 760
 761     if (ch == uint32_t(-1)) {
 762       aDeletedCharsArray.AppendElement(true);
 763       mergeNeeded = true;
 764     } else {
 765       aDeletedCharsArray.AppendElement(false);
 766       aCharsToMergeArray.AppendElement(false);
 767       if (auxiliaryOutputArrays) {
 768         aStyleArray->AppendElement(charStyle);
 769         aCanBreakBeforeArray->AppendElement(
 770             inhibitBreakBefore
 771                 ? gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE
 772                 : aTextRun->CanBreakBefore(aOffsetInTextRun));
 773       }
 774
 775       if (IS_IN_BMP(ch)) {
 776         aConvertedString.Append(maskPassword ? kPasswordMask : ch);
 777       } else {
 778         if (maskPassword) {
 779           aConvertedString.Append(kPasswordMask);
 780           // TODO: We should show a password mask for a surrogate pair later.
 781           aConvertedString.Append(kPasswordMask);
 782         } else {
 783           aConvertedString.Append(H_SURROGATE(ch));
 784           aConvertedString.Append(L_SURROGATE(ch));
 785         }
 786         ++extraChars;
 787         ++i;
 788         ++aOffsetInTextRun;
 789         // Skip the trailing surrogate.
 790         aDeletedCharsArray.AppendElement(true);
 791       }
 792
 793       while (extraChars-- > 0) {
 794         mergeNeeded = true;
 795         aCharsToMergeArray.AppendElement(true);
 796         if (auxiliaryOutputArrays) {
 797           aStyleArray->AppendElement(charStyle);
 798           aCanBreakBeforeArray->AppendElement(
 799               gfxShapedText::CompressedGlyph::FLAG_BREAK_TYPE_NONE);
 800         }
 801       }
 802     }
 803   }
 804
 805   return mergeNeeded;
 806 }
 807
 808 void nsCaseTransformTextRunFactory::RebuildTextRun(
 809     nsTransformedTextRun* aTextRun, DrawTarget* aRefDrawTarget,
 810     gfxMissingFontRecorder* aMFR) {
 811   nsAutoString convertedString;
 812   AutoTArray<bool, 50> charsToMergeArray;
 813   AutoTArray<bool, 50> deletedCharsArray;
 814   AutoTArray<uint8_t, 50> canBreakBeforeArray;
 815   AutoTArray<RefPtr<nsTransformedCharStyle>, 50> styleArray;
 816
 817   auto globalTransform =
 818       mAllUppercase
 819           ? Some(StyleTextTransform{StyleTextTransformCase::Uppercase, {}})
 820           : Nothing();
 821   bool mergeNeeded = TransformString(
 822       aTextRun->mString, convertedString, globalTransform,
 823       /* aCaseTransformsOnly = */ false, nullptr, charsToMergeArray,
 824       deletedCharsArray, aTextRun, 0, &canBreakBeforeArray, &styleArray);
 825
 826   gfx::ShapedTextFlags flags;
 827   gfxTextRunFactory::Parameters innerParams =
 828       GetParametersForInner(aTextRun, &flags, aRefDrawTarget);
 829   gfxFontGroup* fontGroup = aTextRun->GetFontGroup();
 830
 831   RefPtr<nsTransformedTextRun> transformedChild;
 832   RefPtr<gfxTextRun> cachedChild;
 833   gfxTextRun* child;
 834
 835   if (mInnerTransformingTextRunFactory) {
 836     transformedChild = mInnerTransformingTextRunFactory->MakeTextRun(
 837         convertedString.BeginReading(), convertedString.Length(), &innerParams,
 838         fontGroup, flags, nsTextFrameUtils::Flags(), std::move(styleArray),
 839         false);
 840     child = transformedChild.get();
 841   } else {
 842     cachedChild = fontGroup->MakeTextRun(
 843         convertedString.BeginReading(), convertedString.Length(), &innerParams,
 844         flags, nsTextFrameUtils::Flags(), aMFR);
 845     child = cachedChild.get();
 846   }
 847   if (!child) {
 848     return;
 849   }
 850   // Copy potential linebreaks into child so they're preserved
 851   // (and also child will be shaped appropriately)
 852   NS_ASSERTION(convertedString.Length() == canBreakBeforeArray.Length(),
 853                "Dropped characters or break-before values somewhere!");
 854   gfxTextRun::Range range(0, uint32_t(canBreakBeforeArray.Length()));
 855   child->SetPotentialLineBreaks(range, canBreakBeforeArray.Elements());
 856   if (transformedChild) {
 857     transformedChild->FinishSettingProperties(aRefDrawTarget, aMFR);
 858   }
 859
 860   aTextRun->ResetGlyphRuns();
 861   if (mergeNeeded) {
 862     // Now merge multiple characters into one multi-glyph character as required
 863     // and deal with skipping deleted accent chars
 864     NS_ASSERTION(charsToMergeArray.Length() == child->GetLength(),
 865                  "source length mismatch");
 866     NS_ASSERTION(deletedCharsArray.Length() == aTextRun->GetLength(),
 867                  "destination length mismatch");
 868     MergeCharactersInTextRun(aTextRun, child, charsToMergeArray.Elements(),
 869                              deletedCharsArray.Elements());
 870   } else {
 871     // No merging to do, so just copy; this produces a more optimized textrun.
 872     // We can't steal the data because the child may be cached and stealing
 873     // the data would break the cache.
 874     aTextRun->CopyGlyphDataFrom(child, gfxTextRun::Range(child), 0);
 875   }
 876 }