Bug 1842773 - Part 5: Add ArrayBuffer.prototype.{maxByteLength,resizable} getters...
[gecko.git] / dom / base / nsTextFragment.cpp
blob5cba2577b83aeeb45ef7b0d4f0e2f89276a98c18
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 /*
8 * A class which represents a fragment of text (eg inside a text
9 * node); if only codepoints below 256 are used, the text is stored as
10 * a char*; otherwise the text is stored as a char16_t*
13 #include "nsTextFragment.h"
14 #include "nsCRT.h"
15 #include "nsReadableUtils.h"
16 #include "nsBidiUtils.h"
17 #include "nsUnicharUtils.h"
18 #include "mozilla/CheckedInt.h"
19 #include "mozilla/MemoryReporting.h"
20 #include "mozilla/SSE.h"
21 #include "mozilla/ppc.h"
22 #include "nsTextFragmentImpl.h"
23 #include <algorithm>
25 #define TEXTFRAG_WHITE_AFTER_NEWLINE 50
26 #define TEXTFRAG_MAX_NEWLINES 7
28 // Static buffer used for common fragments
29 static char* sSpaceSharedString[TEXTFRAG_MAX_NEWLINES + 1];
30 static char* sTabSharedString[TEXTFRAG_MAX_NEWLINES + 1];
31 static char sSingleCharSharedString[256];
33 using namespace mozilla;
35 // static
36 nsresult nsTextFragment::Init() {
37 // Create whitespace strings
38 uint32_t i;
39 for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {
40 sSpaceSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];
41 sTabSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];
42 sSpaceSharedString[i][0] = ' ';
43 sTabSharedString[i][0] = ' ';
44 uint32_t j;
45 for (j = 1; j < 1 + i; ++j) {
46 sSpaceSharedString[i][j] = '\n';
47 sTabSharedString[i][j] = '\n';
49 for (; j < (1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE); ++j) {
50 sSpaceSharedString[i][j] = ' ';
51 sTabSharedString[i][j] = '\t';
55 // Create single-char strings
56 for (i = 0; i < 256; ++i) {
57 sSingleCharSharedString[i] = i;
60 return NS_OK;
63 // static
64 void nsTextFragment::Shutdown() {
65 uint32_t i;
66 for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {
67 delete[] sSpaceSharedString[i];
68 delete[] sTabSharedString[i];
69 sSpaceSharedString[i] = nullptr;
70 sTabSharedString[i] = nullptr;
74 nsTextFragment::~nsTextFragment() {
75 ReleaseText();
76 MOZ_COUNT_DTOR(nsTextFragment);
79 void nsTextFragment::ReleaseText() {
80 if (mState.mIs2b) {
81 NS_RELEASE(m2b);
82 } else if (mState.mLength && m1b && mState.mInHeap) {
83 free(const_cast<char*>(m1b));
86 m1b = nullptr;
87 mState.mIsBidi = false;
89 // Set mState.mIs2b, mState.mInHeap, and mState.mLength = 0 with mAllBits;
90 mAllBits = 0;
93 nsTextFragment& nsTextFragment::operator=(const nsTextFragment& aOther) {
94 ReleaseText();
96 if (aOther.mState.mLength) {
97 if (!aOther.mState.mInHeap) {
98 MOZ_ASSERT(!aOther.mState.mIs2b);
99 m1b = aOther.m1b;
100 } else if (aOther.mState.mIs2b) {
101 m2b = aOther.m2b;
102 NS_ADDREF(m2b);
103 } else {
104 m1b = static_cast<char*>(malloc(aOther.mState.mLength));
105 if (m1b) {
106 memcpy(const_cast<char*>(m1b), aOther.m1b, aOther.mState.mLength);
107 } else {
108 // allocate a buffer for a single REPLACEMENT CHARACTER
109 m2b = nsStringBuffer::Alloc(sizeof(char16_t) * 2).take();
110 if (!m2b) {
111 MOZ_CRASH("OOM!");
113 char16_t* data = static_cast<char16_t*>(m2b->Data());
114 data[0] = 0xFFFD; // REPLACEMENT CHARACTER
115 data[1] = char16_t(0);
116 mState.mIs2b = true;
117 mState.mInHeap = true;
118 mState.mLength = 1;
119 return *this;
123 mAllBits = aOther.mAllBits;
126 return *this;
129 static inline int32_t FirstNon8BitUnvectorized(const char16_t* str,
130 const char16_t* end) {
131 using p = Non8BitParameters<sizeof(size_t)>;
132 const size_t mask = p::mask();
133 const uint32_t alignMask = p::alignMask();
134 const uint32_t numUnicharsPerWord = p::numUnicharsPerWord();
135 const int32_t len = end - str;
136 int32_t i = 0;
138 // Align ourselves to a word boundary.
139 int32_t alignLen = std::min(
140 len, int32_t(((-NS_PTR_TO_INT32(str)) & alignMask) / sizeof(char16_t)));
141 for (; i < alignLen; i++) {
142 if (str[i] > 255) return i;
145 // Check one word at a time.
146 const int32_t wordWalkEnd =
147 ((len - i) / numUnicharsPerWord) * numUnicharsPerWord;
148 for (; i < wordWalkEnd; i += numUnicharsPerWord) {
149 const size_t word = *reinterpret_cast<const size_t*>(str + i);
150 if (word & mask) return i;
153 // Take care of the remainder one character at a time.
154 for (; i < len; i++) {
155 if (str[i] > 255) return i;
158 return -1;
161 #if defined(MOZILLA_MAY_SUPPORT_SSE2)
162 # include "nsTextFragmentGenericFwd.h"
163 #endif
165 #ifdef __powerpc__
166 namespace mozilla {
167 namespace VMX {
168 int32_t FirstNon8Bit(const char16_t* str, const char16_t* end);
169 } // namespace VMX
170 } // namespace mozilla
171 #endif
174 * This function returns -1 if all characters in str are 8 bit characters.
175 * Otherwise, it returns a value less than or equal to the index of the first
176 * non-8bit character in str. For example, if first non-8bit character is at
177 * position 25, it may return 25, or for example 24, or 16. But it guarantees
178 * there is no non-8bit character before returned value.
180 static inline int32_t FirstNon8Bit(const char16_t* str, const char16_t* end) {
181 #ifdef MOZILLA_MAY_SUPPORT_SSE2
182 if (mozilla::supports_sse2()) {
183 return mozilla::FirstNon8Bit<xsimd::sse2>(str, end);
185 #elif defined(__powerpc__)
186 if (mozilla::supports_vmx()) {
187 return mozilla::VMX::FirstNon8Bit(str, end);
189 #endif
191 return FirstNon8BitUnvectorized(str, end);
194 bool nsTextFragment::SetTo(const char16_t* aBuffer, uint32_t aLength,
195 bool aUpdateBidi, bool aForce2b) {
196 if (MOZ_UNLIKELY(aLength > NS_MAX_TEXT_FRAGMENT_LENGTH)) {
197 return false;
200 if (aForce2b && mState.mIs2b && !m2b->IsReadonly()) {
201 uint32_t storageSize = m2b->StorageSize();
202 uint32_t neededSize = aLength * sizeof(char16_t);
203 if (!neededSize) {
204 if (storageSize < AutoStringDefaultStorageSize) {
205 // If we're storing small enough nsStringBuffer, let's preserve it.
207 static_cast<char16_t*>(m2b->Data())[0] = char16_t(0);
208 mState.mLength = 0;
209 mState.mIsBidi = false;
210 return true;
212 } else if ((neededSize < storageSize) &&
213 ((storageSize / 2) <
214 (neededSize + AutoStringDefaultStorageSize))) {
215 // Don't try to reuse the existing nsStringBuffer, if it would have
216 // lots of unused space.
218 memcpy(m2b->Data(), aBuffer, neededSize);
219 static_cast<char16_t*>(m2b->Data())[aLength] = char16_t(0);
220 mState.mLength = aLength;
221 mState.mIsBidi = false;
222 if (aUpdateBidi) {
223 UpdateBidiFlag(aBuffer, aLength);
225 return true;
229 ReleaseText();
231 if (aLength == 0) {
232 return true;
235 char16_t firstChar = *aBuffer;
236 if (!aForce2b && aLength == 1 && firstChar < 256) {
237 m1b = sSingleCharSharedString + firstChar;
238 mState.mInHeap = false;
239 mState.mIs2b = false;
240 mState.mLength = 1;
242 return true;
245 const char16_t* ucp = aBuffer;
246 const char16_t* uend = aBuffer + aLength;
248 // Check if we can use a shared string
249 if (!aForce2b &&
250 aLength <= 1 + TEXTFRAG_WHITE_AFTER_NEWLINE + TEXTFRAG_MAX_NEWLINES &&
251 (firstChar == ' ' || firstChar == '\n' || firstChar == '\t')) {
252 if (firstChar == ' ') {
253 ++ucp;
256 const char16_t* start = ucp;
257 while (ucp < uend && *ucp == '\n') {
258 ++ucp;
260 const char16_t* endNewLine = ucp;
262 char16_t space = ucp < uend && *ucp == '\t' ? '\t' : ' ';
263 while (ucp < uend && *ucp == space) {
264 ++ucp;
267 if (ucp == uend && endNewLine - start <= TEXTFRAG_MAX_NEWLINES &&
268 ucp - endNewLine <= TEXTFRAG_WHITE_AFTER_NEWLINE) {
269 char** strings = space == ' ' ? sSpaceSharedString : sTabSharedString;
270 m1b = strings[endNewLine - start];
272 // If we didn't find a space in the beginning, skip it now.
273 if (firstChar != ' ') {
274 ++m1b;
277 mState.mInHeap = false;
278 mState.mIs2b = false;
279 mState.mLength = aLength;
281 return true;
285 // See if we need to store the data in ucs2 or not
286 int32_t first16bit = aForce2b ? 0 : FirstNon8Bit(ucp, uend);
288 if (first16bit != -1) { // aBuffer contains no non-8bit character
289 // Use ucs2 storage because we have to
290 CheckedUint32 m2bSize = CheckedUint32(aLength) + 1;
291 if (!m2bSize.isValid()) {
292 return false;
294 m2bSize *= sizeof(char16_t);
295 if (!m2bSize.isValid()) {
296 return false;
299 m2b = nsStringBuffer::Alloc(m2bSize.value()).take();
300 if (!m2b) {
301 return false;
303 memcpy(m2b->Data(), aBuffer, aLength * sizeof(char16_t));
304 static_cast<char16_t*>(m2b->Data())[aLength] = char16_t(0);
306 mState.mIs2b = true;
307 if (aUpdateBidi) {
308 UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);
311 } else {
312 // Use 1 byte storage because we can
313 char* buff = static_cast<char*>(malloc(aLength));
314 if (!buff) {
315 return false;
318 // Copy data
319 LossyConvertUtf16toLatin1(Span(aBuffer, aLength), Span(buff, aLength));
320 m1b = buff;
321 mState.mIs2b = false;
324 // Setup our fields
325 mState.mInHeap = true;
326 mState.mLength = aLength;
328 return true;
331 void nsTextFragment::CopyTo(char16_t* aDest, uint32_t aOffset,
332 uint32_t aCount) {
333 const CheckedUint32 endOffset = CheckedUint32(aOffset) + aCount;
334 if (!endOffset.isValid() || endOffset.value() > GetLength()) {
335 aCount = mState.mLength - aOffset;
338 if (aCount) {
339 if (mState.mIs2b) {
340 memcpy(aDest, Get2b() + aOffset, sizeof(char16_t) * aCount);
341 } else {
342 const char* cp = m1b + aOffset;
343 ConvertLatin1toUtf16(Span(cp, aCount), Span(aDest, aCount));
348 bool nsTextFragment::Append(const char16_t* aBuffer, uint32_t aLength,
349 bool aUpdateBidi, bool aForce2b) {
350 if (!aLength) {
351 return true;
354 // This is a common case because some callsites create a textnode
355 // with a value by creating the node and then calling AppendData.
356 if (mState.mLength == 0) {
357 return SetTo(aBuffer, aLength, aUpdateBidi, aForce2b);
360 // Should we optimize for aData.Length() == 0?
362 // FYI: Don't use CheckedInt in this method since here is very hot path
363 // in some performance tests.
364 if (NS_MAX_TEXT_FRAGMENT_LENGTH - mState.mLength < aLength) {
365 return false; // Would be overflown if we'd keep handling.
368 if (mState.mIs2b) {
369 size_t size = mState.mLength + aLength + 1;
370 if (SIZE_MAX / sizeof(char16_t) < size) {
371 return false; // Would be overflown if we'd keep handling.
373 size *= sizeof(char16_t);
375 // Already a 2-byte string so the result will be too
376 nsStringBuffer* buff = nullptr;
377 nsStringBuffer* bufferToRelease = nullptr;
378 if (m2b->IsReadonly()) {
379 buff = nsStringBuffer::Alloc(size).take();
380 if (!buff) {
381 return false;
383 bufferToRelease = m2b;
384 memcpy(static_cast<char16_t*>(buff->Data()), m2b->Data(),
385 mState.mLength * sizeof(char16_t));
386 } else {
387 buff = nsStringBuffer::Realloc(m2b, size);
388 if (!buff) {
389 return false;
393 char16_t* data = static_cast<char16_t*>(buff->Data());
394 memcpy(data + mState.mLength, aBuffer, aLength * sizeof(char16_t));
395 mState.mLength += aLength;
396 m2b = buff;
397 data[mState.mLength] = char16_t(0);
399 NS_IF_RELEASE(bufferToRelease);
401 if (aUpdateBidi) {
402 UpdateBidiFlag(aBuffer, aLength);
405 return true;
408 // Current string is a 1-byte string, check if the new data fits in one byte
409 // too.
410 int32_t first16bit = aForce2b ? 0 : FirstNon8Bit(aBuffer, aBuffer + aLength);
412 if (first16bit != -1) { // aBuffer contains no non-8bit character
413 size_t size = mState.mLength + aLength + 1;
414 if (SIZE_MAX / sizeof(char16_t) < size) {
415 return false; // Would be overflown if we'd keep handling.
417 size *= sizeof(char16_t);
419 // The old data was 1-byte, but the new is not so we have to expand it
420 // all to 2-byte
421 nsStringBuffer* buff = nsStringBuffer::Alloc(size).take();
422 if (!buff) {
423 return false;
426 // Copy data into buff
427 char16_t* data = static_cast<char16_t*>(buff->Data());
428 ConvertLatin1toUtf16(Span(m1b, mState.mLength), Span(data, mState.mLength));
430 memcpy(data + mState.mLength, aBuffer, aLength * sizeof(char16_t));
431 mState.mLength += aLength;
432 mState.mIs2b = true;
434 if (mState.mInHeap) {
435 free(const_cast<char*>(m1b));
437 data[mState.mLength] = char16_t(0);
438 m2b = buff;
440 mState.mInHeap = true;
442 if (aUpdateBidi) {
443 UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);
446 return true;
449 // The new and the old data is all 1-byte
450 size_t size = mState.mLength + aLength;
451 MOZ_ASSERT(sizeof(char) == 1);
452 char* buff;
453 if (mState.mInHeap) {
454 buff = static_cast<char*>(realloc(const_cast<char*>(m1b), size));
455 if (!buff) {
456 return false;
458 } else {
459 buff = static_cast<char*>(malloc(size));
460 if (!buff) {
461 return false;
464 memcpy(buff, m1b, mState.mLength);
465 mState.mInHeap = true;
468 // Copy aBuffer into buff.
469 LossyConvertUtf16toLatin1(Span(aBuffer, aLength),
470 Span(buff + mState.mLength, aLength));
472 m1b = buff;
473 mState.mLength += aLength;
475 return true;
478 /* virtual */
479 size_t nsTextFragment::SizeOfExcludingThis(
480 mozilla::MallocSizeOf aMallocSizeOf) const {
481 if (Is2b()) {
482 return m2b->SizeOfIncludingThisIfUnshared(aMallocSizeOf);
485 if (mState.mInHeap) {
486 return aMallocSizeOf(m1b);
489 return 0;
492 // To save time we only do this when we really want to know, not during
493 // every allocation
494 void nsTextFragment::UpdateBidiFlag(const char16_t* aBuffer, uint32_t aLength) {
495 if (mState.mIs2b && !mState.mIsBidi) {
496 if (HasRTLChars(Span(aBuffer, aLength))) {
497 mState.mIsBidi = true;
502 bool nsTextFragment::TextEquals(const nsTextFragment& aOther) const {
503 if (!Is2b()) {
504 // We're 1-byte.
505 if (!aOther.Is2b()) {
506 nsDependentCSubstring ourStr(Get1b(), GetLength());
507 return ourStr.Equals(
508 nsDependentCSubstring(aOther.Get1b(), aOther.GetLength()));
511 // We're 1-byte, the other thing is 2-byte. Instead of implementing a
512 // separate codepath for this, just use our code below.
513 return aOther.TextEquals(*this);
516 nsDependentSubstring ourStr(Get2b(), GetLength());
517 if (aOther.Is2b()) {
518 return ourStr.Equals(
519 nsDependentSubstring(aOther.Get2b(), aOther.GetLength()));
522 // We can't use EqualsASCII here, because the other string might not
523 // actually be ASCII. Just roll our own compare; do it in the simple way.
524 // Bug 1532356 tracks not having to roll our own.
525 if (GetLength() != aOther.GetLength()) {
526 return false;
529 const char16_t* ourChars = Get2b();
530 const char* otherChars = aOther.Get1b();
531 for (uint32_t i = 0; i < GetLength(); ++i) {
532 if (ourChars[i] != static_cast<char16_t>(otherChars[i])) {
533 return false;
537 return true;