dom/base/nsTextFragment.cpp

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 /*
   8  * A class which represents a fragment of text (eg inside a text
   9  * node); if only codepoints below 256 are used, the text is stored as
  10  * a char*; otherwise the text is stored as a char16_t*
  11  */
  12
  13 #include "nsTextFragment.h"
  14 #include "nsCRT.h"
  15 #include "nsReadableUtils.h"
  16 #include "nsMemory.h"
  17 #include "nsBidiUtils.h"
  18 #include "nsUnicharUtils.h"
  19 #include "mozilla/CheckedInt.h"
  20 #include "mozilla/MemoryReporting.h"
  21 #include "mozilla/SSE.h"
  22 #include "mozilla/ppc.h"
  23 #include "nsTextFragmentImpl.h"
  24 #include <algorithm>
  25
  26 #define TEXTFRAG_WHITE_AFTER_NEWLINE 50
  27 #define TEXTFRAG_MAX_NEWLINES 7
  28
  29 // Static buffer used for common fragments
  30 static char* sSpaceSharedString[TEXTFRAG_MAX_NEWLINES + 1];
  31 static char* sTabSharedString[TEXTFRAG_MAX_NEWLINES + 1];
  32 static char sSingleCharSharedString[256];
  33
  34 using namespace mozilla;
  35
  36 // static
  37 nsresult nsTextFragment::Init() {
  38   // Create whitespace strings
  39   uint32_t i;
  40   for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {
  41     sSpaceSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];
  42     sTabSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];
  43     sSpaceSharedString[i][0] = ' ';
  44     sTabSharedString[i][0] = ' ';
  45     uint32_t j;
  46     for (j = 1; j < 1 + i; ++j) {
  47       sSpaceSharedString[i][j] = '\n';
  48       sTabSharedString[i][j] = '\n';
  49     }
  50     for (; j < (1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE); ++j) {
  51       sSpaceSharedString[i][j] = ' ';
  52       sTabSharedString[i][j] = '\t';
  53     }
  54   }
  55
  56   // Create single-char strings
  57   for (i = 0; i < 256; ++i) {
  58     sSingleCharSharedString[i] = i;
  59   }
  60
  61   return NS_OK;
  62 }
  63
  64 // static
  65 void nsTextFragment::Shutdown() {
  66   uint32_t i;
  67   for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {
  68     delete[] sSpaceSharedString[i];
  69     delete[] sTabSharedString[i];
  70     sSpaceSharedString[i] = nullptr;
  71     sTabSharedString[i] = nullptr;
  72   }
  73 }
  74
  75 nsTextFragment::~nsTextFragment() {
  76   ReleaseText();
  77   MOZ_COUNT_DTOR(nsTextFragment);
  78 }
  79
  80 void nsTextFragment::ReleaseText() {
  81   if (mState.mIs2b) {
  82     NS_RELEASE(m2b);
  83   } else if (mState.mLength && m1b && mState.mInHeap) {
  84     free(const_cast<char*>(m1b));
  85   }
  86
  87   m1b = nullptr;
  88   mState.mIsBidi = false;
  89
  90   // Set mState.mIs2b, mState.mInHeap, and mState.mLength = 0 with mAllBits;
  91   mAllBits = 0;
  92 }
  93
  94 nsTextFragment& nsTextFragment::operator=(const nsTextFragment& aOther) {
  95   ReleaseText();
  96
  97   if (aOther.mState.mLength) {
  98     if (!aOther.mState.mInHeap) {
  99       MOZ_ASSERT(!aOther.mState.mIs2b);
 100       m1b = aOther.m1b;
 101     } else if (aOther.mState.mIs2b) {
 102       m2b = aOther.m2b;
 103       NS_ADDREF(m2b);
 104     } else {
 105       m1b = static_cast<char*>(malloc(aOther.mState.mLength));
 106       if (m1b) {
 107         memcpy(const_cast<char*>(m1b), aOther.m1b, aOther.mState.mLength);
 108       } else {
 109         // allocate a buffer for a single REPLACEMENT CHARACTER
 110         m2b = nsStringBuffer::Alloc(sizeof(char16_t) * 2).take();
 111         if (!m2b) {
 112           MOZ_CRASH("OOM!");
 113         }
 114         char16_t* data = static_cast<char16_t*>(m2b->Data());
 115         data[0] = 0xFFFD;  // REPLACEMENT CHARACTER
 116         data[1] = char16_t(0);
 117         mState.mIs2b = true;
 118         mState.mInHeap = true;
 119         mState.mLength = 1;
 120         return *this;
 121       }
 122     }
 123
 124     mAllBits = aOther.mAllBits;
 125   }
 126
 127   return *this;
 128 }
 129
 130 static inline int32_t FirstNon8BitUnvectorized(const char16_t* str,
 131                                                const char16_t* end) {
 132   typedef Non8BitParameters<sizeof(size_t)> p;
 133   const size_t mask = p::mask();
 134   const uint32_t alignMask = p::alignMask();
 135   const uint32_t numUnicharsPerWord = p::numUnicharsPerWord();
 136   const int32_t len = end - str;
 137   int32_t i = 0;
 138
 139   // Align ourselves to a word boundary.
 140   int32_t alignLen = std::min(
 141       len, int32_t(((-NS_PTR_TO_INT32(str)) & alignMask) / sizeof(char16_t)));
 142   for (; i < alignLen; i++) {
 143     if (str[i] > 255) return i;
 144   }
 145
 146   // Check one word at a time.
 147   const int32_t wordWalkEnd =
 148       ((len - i) / numUnicharsPerWord) * numUnicharsPerWord;
 149   for (; i < wordWalkEnd; i += numUnicharsPerWord) {
 150     const size_t word = *reinterpret_cast<const size_t*>(str + i);
 151     if (word & mask) return i;
 152   }
 153
 154   // Take care of the remainder one character at a time.
 155   for (; i < len; i++) {
 156     if (str[i] > 255) return i;
 157   }
 158
 159   return -1;
 160 }
 161
 162 #ifdef MOZILLA_MAY_SUPPORT_SSE2
 163 namespace mozilla {
 164 namespace SSE2 {
 165 int32_t FirstNon8Bit(const char16_t* str, const char16_t* end);
 166 }  // namespace SSE2
 167 }  // namespace mozilla
 168 #endif
 169
 170 #ifdef __powerpc__
 171 namespace mozilla {
 172 namespace VMX {
 173 int32_t FirstNon8Bit(const char16_t* str, const char16_t* end);
 174 }  // namespace VMX
 175 }  // namespace mozilla
 176 #endif
 177
 178 /*
 179  * This function returns -1 if all characters in str are 8 bit characters.
 180  * Otherwise, it returns a value less than or equal to the index of the first
 181  * non-8bit character in str. For example, if first non-8bit character is at
 182  * position 25, it may return 25, or for example 24, or 16. But it guarantees
 183  * there is no non-8bit character before returned value.
 184  */
 185 static inline int32_t FirstNon8Bit(const char16_t* str, const char16_t* end) {
 186 #ifdef MOZILLA_MAY_SUPPORT_SSE2
 187   if (mozilla::supports_sse2()) {
 188     return mozilla::SSE2::FirstNon8Bit(str, end);
 189   }
 190 #elif defined(__powerpc__)
 191   if (mozilla::supports_vmx()) {
 192     return mozilla::VMX::FirstNon8Bit(str, end);
 193   }
 194 #endif
 195
 196   return FirstNon8BitUnvectorized(str, end);
 197 }
 198
 199 bool nsTextFragment::SetTo(const char16_t* aBuffer, int32_t aLength,
 200                            bool aUpdateBidi, bool aForce2b) {
 201   if (aForce2b && mState.mIs2b && !m2b->IsReadonly()) {
 202     uint32_t storageSize = m2b->StorageSize();
 203     uint32_t neededSize = aLength * sizeof(char16_t);
 204     if (!neededSize) {
 205       if (storageSize < AutoStringDefaultStorageSize) {
 206         // If we're storing small enough nsStringBuffer, let's preserve it.
 207
 208         static_cast<char16_t*>(m2b->Data())[0] = char16_t(0);
 209         mState.mLength = 0;
 210         mState.mIsBidi = false;
 211         return true;
 212       }
 213     } else if ((neededSize < storageSize) &&
 214                ((storageSize / 2) <
 215                 (neededSize + AutoStringDefaultStorageSize))) {
 216       // Don't try to reuse the existing nsStringBuffer, if it would have
 217       // lots of unused space.
 218
 219       memcpy(m2b->Data(), aBuffer, neededSize);
 220       static_cast<char16_t*>(m2b->Data())[aLength] = char16_t(0);
 221       mState.mLength = aLength;
 222       mState.mIsBidi = false;
 223       if (aUpdateBidi) {
 224         UpdateBidiFlag(aBuffer, aLength);
 225       }
 226       return true;
 227     }
 228   }
 229
 230   ReleaseText();
 231
 232   if (aLength == 0) {
 233     return true;
 234   }
 235
 236   char16_t firstChar = *aBuffer;
 237   if (!aForce2b && aLength == 1 && firstChar < 256) {
 238     m1b = sSingleCharSharedString + firstChar;
 239     mState.mInHeap = false;
 240     mState.mIs2b = false;
 241     mState.mLength = 1;
 242
 243     return true;
 244   }
 245
 246   const char16_t* ucp = aBuffer;
 247   const char16_t* uend = aBuffer + aLength;
 248
 249   // Check if we can use a shared string
 250   if (!aForce2b &&
 251       aLength <= 1 + TEXTFRAG_WHITE_AFTER_NEWLINE + TEXTFRAG_MAX_NEWLINES &&
 252       (firstChar == ' ' || firstChar == '\n' || firstChar == '\t')) {
 253     if (firstChar == ' ') {
 254       ++ucp;
 255     }
 256
 257     const char16_t* start = ucp;
 258     while (ucp < uend && *ucp == '\n') {
 259       ++ucp;
 260     }
 261     const char16_t* endNewLine = ucp;
 262
 263     char16_t space = ucp < uend && *ucp == '\t' ? '\t' : ' ';
 264     while (ucp < uend && *ucp == space) {
 265       ++ucp;
 266     }
 267
 268     if (ucp == uend && endNewLine - start <= TEXTFRAG_MAX_NEWLINES &&
 269         ucp - endNewLine <= TEXTFRAG_WHITE_AFTER_NEWLINE) {
 270       char** strings = space == ' ' ? sSpaceSharedString : sTabSharedString;
 271       m1b = strings[endNewLine - start];
 272
 273       // If we didn't find a space in the beginning, skip it now.
 274       if (firstChar != ' ') {
 275         ++m1b;
 276       }
 277
 278       mState.mInHeap = false;
 279       mState.mIs2b = false;
 280       mState.mLength = aLength;
 281
 282       return true;
 283     }
 284   }
 285
 286   // See if we need to store the data in ucs2 or not
 287   int32_t first16bit = aForce2b ? 0 : FirstNon8Bit(ucp, uend);
 288
 289   if (first16bit != -1) {  // aBuffer contains no non-8bit character
 290     // Use ucs2 storage because we have to
 291     CheckedUint32 m2bSize = aLength + 1;
 292     m2bSize *= sizeof(char16_t);
 293     if (!m2bSize.isValid()) {
 294       return false;
 295     }
 296
 297     m2b = nsStringBuffer::Alloc(m2bSize.value()).take();
 298     if (!m2b) {
 299       return false;
 300     }
 301     memcpy(m2b->Data(), aBuffer, aLength * sizeof(char16_t));
 302     static_cast<char16_t*>(m2b->Data())[aLength] = char16_t(0);
 303
 304     mState.mIs2b = true;
 305     if (aUpdateBidi) {
 306       UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);
 307     }
 308
 309   } else {
 310     // Use 1 byte storage because we can
 311     char* buff = static_cast<char*>(malloc(aLength));
 312     if (!buff) {
 313       return false;
 314     }
 315
 316     // Copy data
 317     LossyConvertUtf16toLatin1(Span(aBuffer, aLength), Span(buff, aLength));
 318     m1b = buff;
 319     mState.mIs2b = false;
 320   }
 321
 322   // Setup our fields
 323   mState.mInHeap = true;
 324   mState.mLength = aLength;
 325
 326   return true;
 327 }
 328
 329 void nsTextFragment::CopyTo(char16_t* aDest, int32_t aOffset, int32_t aCount) {
 330   NS_ASSERTION(aOffset >= 0, "Bad offset passed to nsTextFragment::CopyTo()!");
 331   NS_ASSERTION(aCount >= 0, "Bad count passed to nsTextFragment::CopyTo()!");
 332
 333   if (aOffset < 0) {
 334     aOffset = 0;
 335   }
 336
 337   if (uint32_t(aOffset + aCount) > GetLength()) {
 338     aCount = mState.mLength - aOffset;
 339   }
 340
 341   if (aCount != 0) {
 342     if (mState.mIs2b) {
 343       memcpy(aDest, Get2b() + aOffset, sizeof(char16_t) * aCount);
 344     } else {
 345       const char* cp = m1b + aOffset;
 346       ConvertLatin1toUtf16(Span(cp, aCount), Span(aDest, aCount));
 347     }
 348   }
 349 }
 350
 351 bool nsTextFragment::Append(const char16_t* aBuffer, uint32_t aLength,
 352                             bool aUpdateBidi, bool aForce2b) {
 353   if (!aLength) {
 354     return true;
 355   }
 356
 357   // This is a common case because some callsites create a textnode
 358   // with a value by creating the node and then calling AppendData.
 359   if (mState.mLength == 0) {
 360     return SetTo(aBuffer, aLength, aUpdateBidi, aForce2b);
 361   }
 362
 363   // Should we optimize for aData.Length() == 0?
 364
 365   // FYI: Don't use CheckedInt in this method since here is very hot path
 366   //      in some performance tests.
 367   if (NS_MAX_TEXT_FRAGMENT_LENGTH - mState.mLength < aLength) {
 368     return false;  // Would be overflown if we'd keep handling.
 369   }
 370
 371   if (mState.mIs2b) {
 372     size_t size = mState.mLength + aLength + 1;
 373     if (SIZE_MAX / sizeof(char16_t) < size) {
 374       return false;  // Would be overflown if we'd keep handling.
 375     }
 376     size *= sizeof(char16_t);
 377
 378     // Already a 2-byte string so the result will be too
 379     nsStringBuffer* buff = nullptr;
 380     nsStringBuffer* bufferToRelease = nullptr;
 381     if (m2b->IsReadonly()) {
 382       buff = nsStringBuffer::Alloc(size).take();
 383       if (!buff) {
 384         return false;
 385       }
 386       bufferToRelease = m2b;
 387       memcpy(static_cast<char16_t*>(buff->Data()), m2b->Data(),
 388              mState.mLength * sizeof(char16_t));
 389     } else {
 390       buff = nsStringBuffer::Realloc(m2b, size);
 391       if (!buff) {
 392         return false;
 393       }
 394     }
 395
 396     char16_t* data = static_cast<char16_t*>(buff->Data());
 397     memcpy(data + mState.mLength, aBuffer, aLength * sizeof(char16_t));
 398     mState.mLength += aLength;
 399     m2b = buff;
 400     data[mState.mLength] = char16_t(0);
 401
 402     NS_IF_RELEASE(bufferToRelease);
 403
 404     if (aUpdateBidi) {
 405       UpdateBidiFlag(aBuffer, aLength);
 406     }
 407
 408     return true;
 409   }
 410
 411   // Current string is a 1-byte string, check if the new data fits in one byte
 412   // too.
 413   int32_t first16bit = aForce2b ? 0 : FirstNon8Bit(aBuffer, aBuffer + aLength);
 414
 415   if (first16bit != -1) {  // aBuffer contains no non-8bit character
 416     size_t size = mState.mLength + aLength + 1;
 417     if (SIZE_MAX / sizeof(char16_t) < size) {
 418       return false;  // Would be overflown if we'd keep handling.
 419     }
 420     size *= sizeof(char16_t);
 421
 422     // The old data was 1-byte, but the new is not so we have to expand it
 423     // all to 2-byte
 424     nsStringBuffer* buff = nsStringBuffer::Alloc(size).take();
 425     if (!buff) {
 426       return false;
 427     }
 428
 429     // Copy data into buff
 430     char16_t* data = static_cast<char16_t*>(buff->Data());
 431     ConvertLatin1toUtf16(Span(m1b, mState.mLength), Span(data, mState.mLength));
 432
 433     memcpy(data + mState.mLength, aBuffer, aLength * sizeof(char16_t));
 434     mState.mLength += aLength;
 435     mState.mIs2b = true;
 436
 437     if (mState.mInHeap) {
 438       free(const_cast<char*>(m1b));
 439     }
 440     data[mState.mLength] = char16_t(0);
 441     m2b = buff;
 442
 443     mState.mInHeap = true;
 444
 445     if (aUpdateBidi) {
 446       UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);
 447     }
 448
 449     return true;
 450   }
 451
 452   // The new and the old data is all 1-byte
 453   size_t size = mState.mLength + aLength;
 454   MOZ_ASSERT(sizeof(char) == 1);
 455   char* buff;
 456   if (mState.mInHeap) {
 457     buff = static_cast<char*>(realloc(const_cast<char*>(m1b), size));
 458     if (!buff) {
 459       return false;
 460     }
 461   } else {
 462     buff = static_cast<char*>(malloc(size));
 463     if (!buff) {
 464       return false;
 465     }
 466
 467     memcpy(buff, m1b, mState.mLength);
 468     mState.mInHeap = true;
 469   }
 470
 471   // Copy aBuffer into buff.
 472   LossyConvertUtf16toLatin1(Span(aBuffer, aLength),
 473                             Span(buff + mState.mLength, aLength));
 474
 475   m1b = buff;
 476   mState.mLength += aLength;
 477
 478   return true;
 479 }
 480
 481 /* virtual */
 482 size_t nsTextFragment::SizeOfExcludingThis(
 483     mozilla::MallocSizeOf aMallocSizeOf) const {
 484   if (Is2b()) {
 485     return m2b->SizeOfIncludingThisIfUnshared(aMallocSizeOf);
 486   }
 487
 488   if (mState.mInHeap) {
 489     return aMallocSizeOf(m1b);
 490   }
 491
 492   return 0;
 493 }
 494
 495 // To save time we only do this when we really want to know, not during
 496 // every allocation
 497 void nsTextFragment::UpdateBidiFlag(const char16_t* aBuffer, uint32_t aLength) {
 498   if (mState.mIs2b && !mState.mIsBidi) {
 499     if (HasRTLChars(Span(aBuffer, aLength))) {
 500       mState.mIsBidi = true;
 501     }
 502   }
 503 }
 504
 505 bool nsTextFragment::TextEquals(const nsTextFragment& aOther) const {
 506   if (!Is2b()) {
 507     // We're 1-byte.
 508     if (!aOther.Is2b()) {
 509       nsDependentCSubstring ourStr(Get1b(), GetLength());
 510       return ourStr.Equals(
 511           nsDependentCSubstring(aOther.Get1b(), aOther.GetLength()));
 512     }
 513
 514     // We're 1-byte, the other thing is 2-byte.  Instead of implementing a
 515     // separate codepath for this, just use our code below.
 516     return aOther.TextEquals(*this);
 517   }
 518
 519   nsDependentSubstring ourStr(Get2b(), GetLength());
 520   if (aOther.Is2b()) {
 521     return ourStr.Equals(
 522         nsDependentSubstring(aOther.Get2b(), aOther.GetLength()));
 523   }
 524
 525   // We can't use EqualsASCII here, because the other string might not
 526   // actually be ASCII.  Just roll our own compare; do it in the simple way.
 527   // Bug 1532356 tracks not having to roll our own.
 528   if (GetLength() != aOther.GetLength()) {
 529     return false;
 530   }
 531
 532   const char16_t* ourChars = Get2b();
 533   const char* otherChars = aOther.Get1b();
 534   for (uint32_t i = 0; i < GetLength(); ++i) {
 535     if (ourChars[i] != static_cast<char16_t>(otherChars[i])) {
 536       return false;
 537     }
 538   }
 539
 540   return true;
 541 }