dom/base/nsTextFragment.cpp

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 /*
   8  * A class which represents a fragment of text (eg inside a text
   9  * node); if only codepoints below 256 are used, the text is stored as
  10  * a char*; otherwise the text is stored as a char16_t*
  11  */
  12
  13 #include "nsTextFragment.h"
  14 #include "nsCRT.h"
  15 #include "nsReadableUtils.h"
  16 #include "nsBidiUtils.h"
  17 #include "nsUnicharUtils.h"
  18 #include "mozilla/CheckedInt.h"
  19 #include "mozilla/MemoryReporting.h"
  20 #include "mozilla/SSE.h"
  21 #include "mozilla/ppc.h"
  22 #include "nsTextFragmentImpl.h"
  23 #include <algorithm>
  24
  25 #define TEXTFRAG_WHITE_AFTER_NEWLINE 50
  26 #define TEXTFRAG_MAX_NEWLINES 7
  27
  28 // Static buffer used for common fragments
  29 static char* sSpaceSharedString[TEXTFRAG_MAX_NEWLINES + 1];
  30 static char* sTabSharedString[TEXTFRAG_MAX_NEWLINES + 1];
  31 static char sSingleCharSharedString[256];
  32
  33 using namespace mozilla;
  34
  35 // static
  36 nsresult nsTextFragment::Init() {
  37   // Create whitespace strings
  38   uint32_t i;
  39   for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {
  40     sSpaceSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];
  41     sTabSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];
  42     sSpaceSharedString[i][0] = ' ';
  43     sTabSharedString[i][0] = ' ';
  44     uint32_t j;
  45     for (j = 1; j < 1 + i; ++j) {
  46       sSpaceSharedString[i][j] = '\n';
  47       sTabSharedString[i][j] = '\n';
  48     }
  49     for (; j < (1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE); ++j) {
  50       sSpaceSharedString[i][j] = ' ';
  51       sTabSharedString[i][j] = '\t';
  52     }
  53   }
  54
  55   // Create single-char strings
  56   for (i = 0; i < 256; ++i) {
  57     sSingleCharSharedString[i] = i;
  58   }
  59
  60   return NS_OK;
  61 }
  62
  63 // static
  64 void nsTextFragment::Shutdown() {
  65   uint32_t i;
  66   for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {
  67     delete[] sSpaceSharedString[i];
  68     delete[] sTabSharedString[i];
  69     sSpaceSharedString[i] = nullptr;
  70     sTabSharedString[i] = nullptr;
  71   }
  72 }
  73
  74 nsTextFragment::~nsTextFragment() {
  75   ReleaseText();
  76   MOZ_COUNT_DTOR(nsTextFragment);
  77 }
  78
  79 void nsTextFragment::ReleaseText() {
  80   if (mState.mIs2b) {
  81     NS_RELEASE(m2b);
  82   } else if (mState.mLength && m1b && mState.mInHeap) {
  83     free(const_cast<char*>(m1b));
  84   }
  85
  86   m1b = nullptr;
  87   mState.mIsBidi = false;
  88
  89   // Set mState.mIs2b, mState.mInHeap, and mState.mLength = 0 with mAllBits;
  90   mAllBits = 0;
  91 }
  92
  93 nsTextFragment& nsTextFragment::operator=(const nsTextFragment& aOther) {
  94   ReleaseText();
  95
  96   if (aOther.mState.mLength) {
  97     if (!aOther.mState.mInHeap) {
  98       MOZ_ASSERT(!aOther.mState.mIs2b);
  99       m1b = aOther.m1b;
 100     } else if (aOther.mState.mIs2b) {
 101       m2b = aOther.m2b;
 102       NS_ADDREF(m2b);
 103     } else {
 104       m1b = static_cast<char*>(malloc(aOther.mState.mLength));
 105       if (m1b) {
 106         memcpy(const_cast<char*>(m1b), aOther.m1b, aOther.mState.mLength);
 107       } else {
 108         // allocate a buffer for a single REPLACEMENT CHARACTER
 109         m2b = nsStringBuffer::Alloc(sizeof(char16_t) * 2).take();
 110         if (!m2b) {
 111           MOZ_CRASH("OOM!");
 112         }
 113         char16_t* data = static_cast<char16_t*>(m2b->Data());
 114         data[0] = 0xFFFD;  // REPLACEMENT CHARACTER
 115         data[1] = char16_t(0);
 116         mState.mIs2b = true;
 117         mState.mInHeap = true;
 118         mState.mLength = 1;
 119         return *this;
 120       }
 121     }
 122
 123     mAllBits = aOther.mAllBits;
 124   }
 125
 126   return *this;
 127 }
 128
 129 static inline int32_t FirstNon8BitUnvectorized(const char16_t* str,
 130                                                const char16_t* end) {
 131   using p = Non8BitParameters<sizeof(size_t)>;
 132   const size_t mask = p::mask();
 133   const uint32_t alignMask = p::alignMask();
 134   const uint32_t numUnicharsPerWord = p::numUnicharsPerWord();
 135   const int32_t len = end - str;
 136   int32_t i = 0;
 137
 138   // Align ourselves to a word boundary.
 139   int32_t alignLen = std::min(
 140       len, int32_t(((-NS_PTR_TO_INT32(str)) & alignMask) / sizeof(char16_t)));
 141   for (; i < alignLen; i++) {
 142     if (str[i] > 255) return i;
 143   }
 144
 145   // Check one word at a time.
 146   const int32_t wordWalkEnd =
 147       ((len - i) / numUnicharsPerWord) * numUnicharsPerWord;
 148   for (; i < wordWalkEnd; i += numUnicharsPerWord) {
 149     const size_t word = *reinterpret_cast<const size_t*>(str + i);
 150     if (word & mask) return i;
 151   }
 152
 153   // Take care of the remainder one character at a time.
 154   for (; i < len; i++) {
 155     if (str[i] > 255) return i;
 156   }
 157
 158   return -1;
 159 }
 160
 161 #if defined(MOZILLA_MAY_SUPPORT_SSE2)
 162 #  include "nsTextFragmentGenericFwd.h"
 163 #endif
 164
 165 #ifdef __powerpc__
 166 namespace mozilla {
 167 namespace VMX {
 168 int32_t FirstNon8Bit(const char16_t* str, const char16_t* end);
 169 }  // namespace VMX
 170 }  // namespace mozilla
 171 #endif
 172
 173 /*
 174  * This function returns -1 if all characters in str are 8 bit characters.
 175  * Otherwise, it returns a value less than or equal to the index of the first
 176  * non-8bit character in str. For example, if first non-8bit character is at
 177  * position 25, it may return 25, or for example 24, or 16. But it guarantees
 178  * there is no non-8bit character before returned value.
 179  */
 180 static inline int32_t FirstNon8Bit(const char16_t* str, const char16_t* end) {
 181 #ifdef MOZILLA_MAY_SUPPORT_SSE2
 182   if (mozilla::supports_sse2()) {
 183     return mozilla::FirstNon8Bit<xsimd::sse2>(str, end);
 184   }
 185 #elif defined(__powerpc__)
 186   if (mozilla::supports_vmx()) {
 187     return mozilla::VMX::FirstNon8Bit(str, end);
 188   }
 189 #endif
 190
 191   return FirstNon8BitUnvectorized(str, end);
 192 }
 193
 194 bool nsTextFragment::SetTo(const char16_t* aBuffer, uint32_t aLength,
 195                            bool aUpdateBidi, bool aForce2b) {
 196   if (MOZ_UNLIKELY(aLength > NS_MAX_TEXT_FRAGMENT_LENGTH)) {
 197     return false;
 198   }
 199
 200   if (aForce2b && mState.mIs2b && !m2b->IsReadonly()) {
 201     uint32_t storageSize = m2b->StorageSize();
 202     uint32_t neededSize = aLength * sizeof(char16_t);
 203     if (!neededSize) {
 204       if (storageSize < AutoStringDefaultStorageSize) {
 205         // If we're storing small enough nsStringBuffer, let's preserve it.
 206
 207         static_cast<char16_t*>(m2b->Data())[0] = char16_t(0);
 208         mState.mLength = 0;
 209         mState.mIsBidi = false;
 210         return true;
 211       }
 212     } else if ((neededSize < storageSize) &&
 213                ((storageSize / 2) <
 214                 (neededSize + AutoStringDefaultStorageSize))) {
 215       // Don't try to reuse the existing nsStringBuffer, if it would have
 216       // lots of unused space.
 217
 218       memcpy(m2b->Data(), aBuffer, neededSize);
 219       static_cast<char16_t*>(m2b->Data())[aLength] = char16_t(0);
 220       mState.mLength = aLength;
 221       mState.mIsBidi = false;
 222       if (aUpdateBidi) {
 223         UpdateBidiFlag(aBuffer, aLength);
 224       }
 225       return true;
 226     }
 227   }
 228
 229   ReleaseText();
 230
 231   if (aLength == 0) {
 232     return true;
 233   }
 234
 235   char16_t firstChar = *aBuffer;
 236   if (!aForce2b && aLength == 1 && firstChar < 256) {
 237     m1b = sSingleCharSharedString + firstChar;
 238     mState.mInHeap = false;
 239     mState.mIs2b = false;
 240     mState.mLength = 1;
 241
 242     return true;
 243   }
 244
 245   const char16_t* ucp = aBuffer;
 246   const char16_t* uend = aBuffer + aLength;
 247
 248   // Check if we can use a shared string
 249   if (!aForce2b &&
 250       aLength <= 1 + TEXTFRAG_WHITE_AFTER_NEWLINE + TEXTFRAG_MAX_NEWLINES &&
 251       (firstChar == ' ' || firstChar == '\n' || firstChar == '\t')) {
 252     if (firstChar == ' ') {
 253       ++ucp;
 254     }
 255
 256     const char16_t* start = ucp;
 257     while (ucp < uend && *ucp == '\n') {
 258       ++ucp;
 259     }
 260     const char16_t* endNewLine = ucp;
 261
 262     char16_t space = ucp < uend && *ucp == '\t' ? '\t' : ' ';
 263     while (ucp < uend && *ucp == space) {
 264       ++ucp;
 265     }
 266
 267     if (ucp == uend && endNewLine - start <= TEXTFRAG_MAX_NEWLINES &&
 268         ucp - endNewLine <= TEXTFRAG_WHITE_AFTER_NEWLINE) {
 269       char** strings = space == ' ' ? sSpaceSharedString : sTabSharedString;
 270       m1b = strings[endNewLine - start];
 271
 272       // If we didn't find a space in the beginning, skip it now.
 273       if (firstChar != ' ') {
 274         ++m1b;
 275       }
 276
 277       mState.mInHeap = false;
 278       mState.mIs2b = false;
 279       mState.mLength = aLength;
 280
 281       return true;
 282     }
 283   }
 284
 285   // See if we need to store the data in ucs2 or not
 286   int32_t first16bit = aForce2b ? 0 : FirstNon8Bit(ucp, uend);
 287
 288   if (first16bit != -1) {  // aBuffer contains no non-8bit character
 289     // Use ucs2 storage because we have to
 290     CheckedUint32 m2bSize = CheckedUint32(aLength) + 1;
 291     if (!m2bSize.isValid()) {
 292       return false;
 293     }
 294     m2bSize *= sizeof(char16_t);
 295     if (!m2bSize.isValid()) {
 296       return false;
 297     }
 298
 299     m2b = nsStringBuffer::Alloc(m2bSize.value()).take();
 300     if (!m2b) {
 301       return false;
 302     }
 303     memcpy(m2b->Data(), aBuffer, aLength * sizeof(char16_t));
 304     static_cast<char16_t*>(m2b->Data())[aLength] = char16_t(0);
 305
 306     mState.mIs2b = true;
 307     if (aUpdateBidi) {
 308       UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);
 309     }
 310
 311   } else {
 312     // Use 1 byte storage because we can
 313     char* buff = static_cast<char*>(malloc(aLength));
 314     if (!buff) {
 315       return false;
 316     }
 317
 318     // Copy data
 319     LossyConvertUtf16toLatin1(Span(aBuffer, aLength), Span(buff, aLength));
 320     m1b = buff;
 321     mState.mIs2b = false;
 322   }
 323
 324   // Setup our fields
 325   mState.mInHeap = true;
 326   mState.mLength = aLength;
 327
 328   return true;
 329 }
 330
 331 void nsTextFragment::CopyTo(char16_t* aDest, uint32_t aOffset,
 332                             uint32_t aCount) {
 333   const CheckedUint32 endOffset = CheckedUint32(aOffset) + aCount;
 334   if (!endOffset.isValid() || endOffset.value() > GetLength()) {
 335     aCount = mState.mLength - aOffset;
 336   }
 337
 338   if (aCount) {
 339     if (mState.mIs2b) {
 340       memcpy(aDest, Get2b() + aOffset, sizeof(char16_t) * aCount);
 341     } else {
 342       const char* cp = m1b + aOffset;
 343       ConvertLatin1toUtf16(Span(cp, aCount), Span(aDest, aCount));
 344     }
 345   }
 346 }
 347
 348 bool nsTextFragment::Append(const char16_t* aBuffer, uint32_t aLength,
 349                             bool aUpdateBidi, bool aForce2b) {
 350   if (!aLength) {
 351     return true;
 352   }
 353
 354   // This is a common case because some callsites create a textnode
 355   // with a value by creating the node and then calling AppendData.
 356   if (mState.mLength == 0) {
 357     return SetTo(aBuffer, aLength, aUpdateBidi, aForce2b);
 358   }
 359
 360   // Should we optimize for aData.Length() == 0?
 361
 362   // FYI: Don't use CheckedInt in this method since here is very hot path
 363   //      in some performance tests.
 364   if (NS_MAX_TEXT_FRAGMENT_LENGTH - mState.mLength < aLength) {
 365     return false;  // Would be overflown if we'd keep handling.
 366   }
 367
 368   if (mState.mIs2b) {
 369     size_t size = mState.mLength + aLength + 1;
 370     if (SIZE_MAX / sizeof(char16_t) < size) {
 371       return false;  // Would be overflown if we'd keep handling.
 372     }
 373     size *= sizeof(char16_t);
 374
 375     // Already a 2-byte string so the result will be too
 376     nsStringBuffer* buff = nullptr;
 377     nsStringBuffer* bufferToRelease = nullptr;
 378     if (m2b->IsReadonly()) {
 379       buff = nsStringBuffer::Alloc(size).take();
 380       if (!buff) {
 381         return false;
 382       }
 383       bufferToRelease = m2b;
 384       memcpy(static_cast<char16_t*>(buff->Data()), m2b->Data(),
 385              mState.mLength * sizeof(char16_t));
 386     } else {
 387       buff = nsStringBuffer::Realloc(m2b, size);
 388       if (!buff) {
 389         return false;
 390       }
 391     }
 392
 393     char16_t* data = static_cast<char16_t*>(buff->Data());
 394     memcpy(data + mState.mLength, aBuffer, aLength * sizeof(char16_t));
 395     mState.mLength += aLength;
 396     m2b = buff;
 397     data[mState.mLength] = char16_t(0);
 398
 399     NS_IF_RELEASE(bufferToRelease);
 400
 401     if (aUpdateBidi) {
 402       UpdateBidiFlag(aBuffer, aLength);
 403     }
 404
 405     return true;
 406   }
 407
 408   // Current string is a 1-byte string, check if the new data fits in one byte
 409   // too.
 410   int32_t first16bit = aForce2b ? 0 : FirstNon8Bit(aBuffer, aBuffer + aLength);
 411
 412   if (first16bit != -1) {  // aBuffer contains no non-8bit character
 413     size_t size = mState.mLength + aLength + 1;
 414     if (SIZE_MAX / sizeof(char16_t) < size) {
 415       return false;  // Would be overflown if we'd keep handling.
 416     }
 417     size *= sizeof(char16_t);
 418
 419     // The old data was 1-byte, but the new is not so we have to expand it
 420     // all to 2-byte
 421     nsStringBuffer* buff = nsStringBuffer::Alloc(size).take();
 422     if (!buff) {
 423       return false;
 424     }
 425
 426     // Copy data into buff
 427     char16_t* data = static_cast<char16_t*>(buff->Data());
 428     ConvertLatin1toUtf16(Span(m1b, mState.mLength), Span(data, mState.mLength));
 429
 430     memcpy(data + mState.mLength, aBuffer, aLength * sizeof(char16_t));
 431     mState.mLength += aLength;
 432     mState.mIs2b = true;
 433
 434     if (mState.mInHeap) {
 435       free(const_cast<char*>(m1b));
 436     }
 437     data[mState.mLength] = char16_t(0);
 438     m2b = buff;
 439
 440     mState.mInHeap = true;
 441
 442     if (aUpdateBidi) {
 443       UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);
 444     }
 445
 446     return true;
 447   }
 448
 449   // The new and the old data is all 1-byte
 450   size_t size = mState.mLength + aLength;
 451   MOZ_ASSERT(sizeof(char) == 1);
 452   char* buff;
 453   if (mState.mInHeap) {
 454     buff = static_cast<char*>(realloc(const_cast<char*>(m1b), size));
 455     if (!buff) {
 456       return false;
 457     }
 458   } else {
 459     buff = static_cast<char*>(malloc(size));
 460     if (!buff) {
 461       return false;
 462     }
 463
 464     memcpy(buff, m1b, mState.mLength);
 465     mState.mInHeap = true;
 466   }
 467
 468   // Copy aBuffer into buff.
 469   LossyConvertUtf16toLatin1(Span(aBuffer, aLength),
 470                             Span(buff + mState.mLength, aLength));
 471
 472   m1b = buff;
 473   mState.mLength += aLength;
 474
 475   return true;
 476 }
 477
 478 /* virtual */
 479 size_t nsTextFragment::SizeOfExcludingThis(
 480     mozilla::MallocSizeOf aMallocSizeOf) const {
 481   if (Is2b()) {
 482     return m2b->SizeOfIncludingThisIfUnshared(aMallocSizeOf);
 483   }
 484
 485   if (mState.mInHeap) {
 486     return aMallocSizeOf(m1b);
 487   }
 488
 489   return 0;
 490 }
 491
 492 // To save time we only do this when we really want to know, not during
 493 // every allocation
 494 void nsTextFragment::UpdateBidiFlag(const char16_t* aBuffer, uint32_t aLength) {
 495   if (mState.mIs2b && !mState.mIsBidi) {
 496     if (HasRTLChars(Span(aBuffer, aLength))) {
 497       mState.mIsBidi = true;
 498     }
 499   }
 500 }
 501
 502 bool nsTextFragment::TextEquals(const nsTextFragment& aOther) const {
 503   if (!Is2b()) {
 504     // We're 1-byte.
 505     if (!aOther.Is2b()) {
 506       nsDependentCSubstring ourStr(Get1b(), GetLength());
 507       return ourStr.Equals(
 508           nsDependentCSubstring(aOther.Get1b(), aOther.GetLength()));
 509     }
 510
 511     // We're 1-byte, the other thing is 2-byte.  Instead of implementing a
 512     // separate codepath for this, just use our code below.
 513     return aOther.TextEquals(*this);
 514   }
 515
 516   nsDependentSubstring ourStr(Get2b(), GetLength());
 517   if (aOther.Is2b()) {
 518     return ourStr.Equals(
 519         nsDependentSubstring(aOther.Get2b(), aOther.GetLength()));
 520   }
 521
 522   // We can't use EqualsASCII here, because the other string might not
 523   // actually be ASCII.  Just roll our own compare; do it in the simple way.
 524   // Bug 1532356 tracks not having to roll our own.
 525   if (GetLength() != aOther.GetLength()) {
 526     return false;
 527   }
 528
 529   const char16_t* ourChars = Get2b();
 530   const char* otherChars = aOther.Get1b();
 531   for (uint32_t i = 0; i < GetLength(); ++i) {
 532     if (ourChars[i] != static_cast<char16_t>(otherChars[i])) {
 533       return false;
 534     }
 535   }
 536
 537   return true;
 538 }