Bug 1660051 [wpt PR 25111] - Origin isolation: expand getter test coverage, a=testonly
[gecko.git] / dom / base / nsTextFragment.cpp
bloba851def01c6d4a3629bfc17ec256028f51ab86a1
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 /*
8 * A class which represents a fragment of text (eg inside a text
9 * node); if only codepoints below 256 are used, the text is stored as
10 * a char*; otherwise the text is stored as a char16_t*
13 #include "nsTextFragment.h"
14 #include "nsCRT.h"
15 #include "nsReadableUtils.h"
16 #include "nsMemory.h"
17 #include "nsBidiUtils.h"
18 #include "nsUnicharUtils.h"
19 #include "mozilla/CheckedInt.h"
20 #include "mozilla/MemoryReporting.h"
21 #include "mozilla/SSE.h"
22 #include "mozilla/ppc.h"
23 #include "nsTextFragmentImpl.h"
24 #include <algorithm>
26 #define TEXTFRAG_WHITE_AFTER_NEWLINE 50
27 #define TEXTFRAG_MAX_NEWLINES 7
29 // Static buffer used for common fragments
30 static char* sSpaceSharedString[TEXTFRAG_MAX_NEWLINES + 1];
31 static char* sTabSharedString[TEXTFRAG_MAX_NEWLINES + 1];
32 static char sSingleCharSharedString[256];
34 using namespace mozilla;
36 // static
37 nsresult nsTextFragment::Init() {
38 // Create whitespace strings
39 uint32_t i;
40 for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {
41 sSpaceSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];
42 sTabSharedString[i] = new char[1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE];
43 sSpaceSharedString[i][0] = ' ';
44 sTabSharedString[i][0] = ' ';
45 uint32_t j;
46 for (j = 1; j < 1 + i; ++j) {
47 sSpaceSharedString[i][j] = '\n';
48 sTabSharedString[i][j] = '\n';
50 for (; j < (1 + i + TEXTFRAG_WHITE_AFTER_NEWLINE); ++j) {
51 sSpaceSharedString[i][j] = ' ';
52 sTabSharedString[i][j] = '\t';
56 // Create single-char strings
57 for (i = 0; i < 256; ++i) {
58 sSingleCharSharedString[i] = i;
61 return NS_OK;
64 // static
65 void nsTextFragment::Shutdown() {
66 uint32_t i;
67 for (i = 0; i <= TEXTFRAG_MAX_NEWLINES; ++i) {
68 delete[] sSpaceSharedString[i];
69 delete[] sTabSharedString[i];
70 sSpaceSharedString[i] = nullptr;
71 sTabSharedString[i] = nullptr;
75 nsTextFragment::~nsTextFragment() {
76 ReleaseText();
77 MOZ_COUNT_DTOR(nsTextFragment);
80 void nsTextFragment::ReleaseText() {
81 if (mState.mIs2b) {
82 NS_RELEASE(m2b);
83 } else if (mState.mLength && m1b && mState.mInHeap) {
84 free(const_cast<char*>(m1b));
87 m1b = nullptr;
88 mState.mIsBidi = false;
90 // Set mState.mIs2b, mState.mInHeap, and mState.mLength = 0 with mAllBits;
91 mAllBits = 0;
94 nsTextFragment& nsTextFragment::operator=(const nsTextFragment& aOther) {
95 ReleaseText();
97 if (aOther.mState.mLength) {
98 if (!aOther.mState.mInHeap) {
99 MOZ_ASSERT(!aOther.mState.mIs2b);
100 m1b = aOther.m1b;
101 } else if (aOther.mState.mIs2b) {
102 m2b = aOther.m2b;
103 NS_ADDREF(m2b);
104 } else {
105 m1b = static_cast<char*>(malloc(aOther.mState.mLength));
106 if (m1b) {
107 memcpy(const_cast<char*>(m1b), aOther.m1b, aOther.mState.mLength);
108 } else {
109 // allocate a buffer for a single REPLACEMENT CHARACTER
110 m2b = nsStringBuffer::Alloc(sizeof(char16_t) * 2).take();
111 if (!m2b) {
112 MOZ_CRASH("OOM!");
114 char16_t* data = static_cast<char16_t*>(m2b->Data());
115 data[0] = 0xFFFD; // REPLACEMENT CHARACTER
116 data[1] = char16_t(0);
117 mState.mIs2b = true;
118 mState.mInHeap = true;
119 mState.mLength = 1;
120 return *this;
124 mAllBits = aOther.mAllBits;
127 return *this;
130 static inline int32_t FirstNon8BitUnvectorized(const char16_t* str,
131 const char16_t* end) {
132 typedef Non8BitParameters<sizeof(size_t)> p;
133 const size_t mask = p::mask();
134 const uint32_t alignMask = p::alignMask();
135 const uint32_t numUnicharsPerWord = p::numUnicharsPerWord();
136 const int32_t len = end - str;
137 int32_t i = 0;
139 // Align ourselves to a word boundary.
140 int32_t alignLen = std::min(
141 len, int32_t(((-NS_PTR_TO_INT32(str)) & alignMask) / sizeof(char16_t)));
142 for (; i < alignLen; i++) {
143 if (str[i] > 255) return i;
146 // Check one word at a time.
147 const int32_t wordWalkEnd =
148 ((len - i) / numUnicharsPerWord) * numUnicharsPerWord;
149 for (; i < wordWalkEnd; i += numUnicharsPerWord) {
150 const size_t word = *reinterpret_cast<const size_t*>(str + i);
151 if (word & mask) return i;
154 // Take care of the remainder one character at a time.
155 for (; i < len; i++) {
156 if (str[i] > 255) return i;
159 return -1;
162 #ifdef MOZILLA_MAY_SUPPORT_SSE2
163 namespace mozilla {
164 namespace SSE2 {
165 int32_t FirstNon8Bit(const char16_t* str, const char16_t* end);
166 } // namespace SSE2
167 } // namespace mozilla
168 #endif
170 #ifdef __powerpc__
171 namespace mozilla {
172 namespace VMX {
173 int32_t FirstNon8Bit(const char16_t* str, const char16_t* end);
174 } // namespace VMX
175 } // namespace mozilla
176 #endif
179 * This function returns -1 if all characters in str are 8 bit characters.
180 * Otherwise, it returns a value less than or equal to the index of the first
181 * non-8bit character in str. For example, if first non-8bit character is at
182 * position 25, it may return 25, or for example 24, or 16. But it guarantees
183 * there is no non-8bit character before returned value.
185 static inline int32_t FirstNon8Bit(const char16_t* str, const char16_t* end) {
186 #ifdef MOZILLA_MAY_SUPPORT_SSE2
187 if (mozilla::supports_sse2()) {
188 return mozilla::SSE2::FirstNon8Bit(str, end);
190 #elif defined(__powerpc__)
191 if (mozilla::supports_vmx()) {
192 return mozilla::VMX::FirstNon8Bit(str, end);
194 #endif
196 return FirstNon8BitUnvectorized(str, end);
199 bool nsTextFragment::SetTo(const char16_t* aBuffer, int32_t aLength,
200 bool aUpdateBidi, bool aForce2b) {
201 if (aForce2b && mState.mIs2b && !m2b->IsReadonly()) {
202 uint32_t storageSize = m2b->StorageSize();
203 uint32_t neededSize = aLength * sizeof(char16_t);
204 if (!neededSize) {
205 if (storageSize < AutoStringDefaultStorageSize) {
206 // If we're storing small enough nsStringBuffer, let's preserve it.
208 static_cast<char16_t*>(m2b->Data())[0] = char16_t(0);
209 mState.mLength = 0;
210 mState.mIsBidi = false;
211 return true;
213 } else if ((neededSize < storageSize) &&
214 ((storageSize / 2) <
215 (neededSize + AutoStringDefaultStorageSize))) {
216 // Don't try to reuse the existing nsStringBuffer, if it would have
217 // lots of unused space.
219 memcpy(m2b->Data(), aBuffer, neededSize);
220 static_cast<char16_t*>(m2b->Data())[aLength] = char16_t(0);
221 mState.mLength = aLength;
222 mState.mIsBidi = false;
223 if (aUpdateBidi) {
224 UpdateBidiFlag(aBuffer, aLength);
226 return true;
230 ReleaseText();
232 if (aLength == 0) {
233 return true;
236 char16_t firstChar = *aBuffer;
237 if (!aForce2b && aLength == 1 && firstChar < 256) {
238 m1b = sSingleCharSharedString + firstChar;
239 mState.mInHeap = false;
240 mState.mIs2b = false;
241 mState.mLength = 1;
243 return true;
246 const char16_t* ucp = aBuffer;
247 const char16_t* uend = aBuffer + aLength;
249 // Check if we can use a shared string
250 if (!aForce2b &&
251 aLength <= 1 + TEXTFRAG_WHITE_AFTER_NEWLINE + TEXTFRAG_MAX_NEWLINES &&
252 (firstChar == ' ' || firstChar == '\n' || firstChar == '\t')) {
253 if (firstChar == ' ') {
254 ++ucp;
257 const char16_t* start = ucp;
258 while (ucp < uend && *ucp == '\n') {
259 ++ucp;
261 const char16_t* endNewLine = ucp;
263 char16_t space = ucp < uend && *ucp == '\t' ? '\t' : ' ';
264 while (ucp < uend && *ucp == space) {
265 ++ucp;
268 if (ucp == uend && endNewLine - start <= TEXTFRAG_MAX_NEWLINES &&
269 ucp - endNewLine <= TEXTFRAG_WHITE_AFTER_NEWLINE) {
270 char** strings = space == ' ' ? sSpaceSharedString : sTabSharedString;
271 m1b = strings[endNewLine - start];
273 // If we didn't find a space in the beginning, skip it now.
274 if (firstChar != ' ') {
275 ++m1b;
278 mState.mInHeap = false;
279 mState.mIs2b = false;
280 mState.mLength = aLength;
282 return true;
286 // See if we need to store the data in ucs2 or not
287 int32_t first16bit = aForce2b ? 0 : FirstNon8Bit(ucp, uend);
289 if (first16bit != -1) { // aBuffer contains no non-8bit character
290 // Use ucs2 storage because we have to
291 CheckedUint32 m2bSize = aLength + 1;
292 m2bSize *= sizeof(char16_t);
293 if (!m2bSize.isValid()) {
294 return false;
297 m2b = nsStringBuffer::Alloc(m2bSize.value()).take();
298 if (!m2b) {
299 return false;
301 memcpy(m2b->Data(), aBuffer, aLength * sizeof(char16_t));
302 static_cast<char16_t*>(m2b->Data())[aLength] = char16_t(0);
304 mState.mIs2b = true;
305 if (aUpdateBidi) {
306 UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);
309 } else {
310 // Use 1 byte storage because we can
311 char* buff = static_cast<char*>(malloc(aLength));
312 if (!buff) {
313 return false;
316 // Copy data
317 LossyConvertUtf16toLatin1(Span(aBuffer, aLength), Span(buff, aLength));
318 m1b = buff;
319 mState.mIs2b = false;
322 // Setup our fields
323 mState.mInHeap = true;
324 mState.mLength = aLength;
326 return true;
329 void nsTextFragment::CopyTo(char16_t* aDest, int32_t aOffset, int32_t aCount) {
330 NS_ASSERTION(aOffset >= 0, "Bad offset passed to nsTextFragment::CopyTo()!");
331 NS_ASSERTION(aCount >= 0, "Bad count passed to nsTextFragment::CopyTo()!");
333 if (aOffset < 0) {
334 aOffset = 0;
337 if (uint32_t(aOffset + aCount) > GetLength()) {
338 aCount = mState.mLength - aOffset;
341 if (aCount != 0) {
342 if (mState.mIs2b) {
343 memcpy(aDest, Get2b() + aOffset, sizeof(char16_t) * aCount);
344 } else {
345 const char* cp = m1b + aOffset;
346 ConvertLatin1toUtf16(Span(cp, aCount), Span(aDest, aCount));
351 bool nsTextFragment::Append(const char16_t* aBuffer, uint32_t aLength,
352 bool aUpdateBidi, bool aForce2b) {
353 if (!aLength) {
354 return true;
357 // This is a common case because some callsites create a textnode
358 // with a value by creating the node and then calling AppendData.
359 if (mState.mLength == 0) {
360 return SetTo(aBuffer, aLength, aUpdateBidi, aForce2b);
363 // Should we optimize for aData.Length() == 0?
365 // FYI: Don't use CheckedInt in this method since here is very hot path
366 // in some performance tests.
367 if (NS_MAX_TEXT_FRAGMENT_LENGTH - mState.mLength < aLength) {
368 return false; // Would be overflown if we'd keep handling.
371 if (mState.mIs2b) {
372 size_t size = mState.mLength + aLength + 1;
373 if (SIZE_MAX / sizeof(char16_t) < size) {
374 return false; // Would be overflown if we'd keep handling.
376 size *= sizeof(char16_t);
378 // Already a 2-byte string so the result will be too
379 nsStringBuffer* buff = nullptr;
380 nsStringBuffer* bufferToRelease = nullptr;
381 if (m2b->IsReadonly()) {
382 buff = nsStringBuffer::Alloc(size).take();
383 if (!buff) {
384 return false;
386 bufferToRelease = m2b;
387 memcpy(static_cast<char16_t*>(buff->Data()), m2b->Data(),
388 mState.mLength * sizeof(char16_t));
389 } else {
390 buff = nsStringBuffer::Realloc(m2b, size);
391 if (!buff) {
392 return false;
396 char16_t* data = static_cast<char16_t*>(buff->Data());
397 memcpy(data + mState.mLength, aBuffer, aLength * sizeof(char16_t));
398 mState.mLength += aLength;
399 m2b = buff;
400 data[mState.mLength] = char16_t(0);
402 NS_IF_RELEASE(bufferToRelease);
404 if (aUpdateBidi) {
405 UpdateBidiFlag(aBuffer, aLength);
408 return true;
411 // Current string is a 1-byte string, check if the new data fits in one byte
412 // too.
413 int32_t first16bit = aForce2b ? 0 : FirstNon8Bit(aBuffer, aBuffer + aLength);
415 if (first16bit != -1) { // aBuffer contains no non-8bit character
416 size_t size = mState.mLength + aLength + 1;
417 if (SIZE_MAX / sizeof(char16_t) < size) {
418 return false; // Would be overflown if we'd keep handling.
420 size *= sizeof(char16_t);
422 // The old data was 1-byte, but the new is not so we have to expand it
423 // all to 2-byte
424 nsStringBuffer* buff = nsStringBuffer::Alloc(size).take();
425 if (!buff) {
426 return false;
429 // Copy data into buff
430 char16_t* data = static_cast<char16_t*>(buff->Data());
431 ConvertLatin1toUtf16(Span(m1b, mState.mLength), Span(data, mState.mLength));
433 memcpy(data + mState.mLength, aBuffer, aLength * sizeof(char16_t));
434 mState.mLength += aLength;
435 mState.mIs2b = true;
437 if (mState.mInHeap) {
438 free(const_cast<char*>(m1b));
440 data[mState.mLength] = char16_t(0);
441 m2b = buff;
443 mState.mInHeap = true;
445 if (aUpdateBidi) {
446 UpdateBidiFlag(aBuffer + first16bit, aLength - first16bit);
449 return true;
452 // The new and the old data is all 1-byte
453 size_t size = mState.mLength + aLength;
454 MOZ_ASSERT(sizeof(char) == 1);
455 char* buff;
456 if (mState.mInHeap) {
457 buff = static_cast<char*>(realloc(const_cast<char*>(m1b), size));
458 if (!buff) {
459 return false;
461 } else {
462 buff = static_cast<char*>(malloc(size));
463 if (!buff) {
464 return false;
467 memcpy(buff, m1b, mState.mLength);
468 mState.mInHeap = true;
471 // Copy aBuffer into buff.
472 LossyConvertUtf16toLatin1(Span(aBuffer, aLength),
473 Span(buff + mState.mLength, aLength));
475 m1b = buff;
476 mState.mLength += aLength;
478 return true;
481 /* virtual */
482 size_t nsTextFragment::SizeOfExcludingThis(
483 mozilla::MallocSizeOf aMallocSizeOf) const {
484 if (Is2b()) {
485 return m2b->SizeOfIncludingThisIfUnshared(aMallocSizeOf);
488 if (mState.mInHeap) {
489 return aMallocSizeOf(m1b);
492 return 0;
495 // To save time we only do this when we really want to know, not during
496 // every allocation
497 void nsTextFragment::UpdateBidiFlag(const char16_t* aBuffer, uint32_t aLength) {
498 if (mState.mIs2b && !mState.mIsBidi) {
499 if (HasRTLChars(Span(aBuffer, aLength))) {
500 mState.mIsBidi = true;
505 bool nsTextFragment::TextEquals(const nsTextFragment& aOther) const {
506 if (!Is2b()) {
507 // We're 1-byte.
508 if (!aOther.Is2b()) {
509 nsDependentCSubstring ourStr(Get1b(), GetLength());
510 return ourStr.Equals(
511 nsDependentCSubstring(aOther.Get1b(), aOther.GetLength()));
514 // We're 1-byte, the other thing is 2-byte. Instead of implementing a
515 // separate codepath for this, just use our code below.
516 return aOther.TextEquals(*this);
519 nsDependentSubstring ourStr(Get2b(), GetLength());
520 if (aOther.Is2b()) {
521 return ourStr.Equals(
522 nsDependentSubstring(aOther.Get2b(), aOther.GetLength()));
525 // We can't use EqualsASCII here, because the other string might not
526 // actually be ASCII. Just roll our own compare; do it in the simple way.
527 // Bug 1532356 tracks not having to roll our own.
528 if (GetLength() != aOther.GetLength()) {
529 return false;
532 const char16_t* ourChars = Get2b();
533 const char* otherChars = aOther.Get1b();
534 for (uint32_t i = 0; i < GetLength(); ++i) {
535 if (ourChars[i] != static_cast<char16_t>(otherChars[i])) {
536 return false;
540 return true;