1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
8 * A class which represents a fragment of text (eg inside a text
9 * node); if only codepoints below 256 are used, the text is stored as
10 * a char*; otherwise the text is stored as a char16_t*
13 #include "nsTextFragment.h"
15 #include "nsReadableUtils.h"
17 #include "nsBidiUtils.h"
18 #include "nsUnicharUtils.h"
19 #include "mozilla/CheckedInt.h"
20 #include "mozilla/MemoryReporting.h"
21 #include "mozilla/SSE.h"
22 #include "mozilla/ppc.h"
23 #include "nsTextFragmentImpl.h"
26 #define TEXTFRAG_WHITE_AFTER_NEWLINE 50
27 #define TEXTFRAG_MAX_NEWLINES 7
29 // Static buffer used for common fragments
30 static char* sSpaceSharedString
[TEXTFRAG_MAX_NEWLINES
+ 1];
31 static char* sTabSharedString
[TEXTFRAG_MAX_NEWLINES
+ 1];
32 static char sSingleCharSharedString
[256];
34 using namespace mozilla
;
37 nsresult
nsTextFragment::Init() {
38 // Create whitespace strings
40 for (i
= 0; i
<= TEXTFRAG_MAX_NEWLINES
; ++i
) {
41 sSpaceSharedString
[i
] = new char[1 + i
+ TEXTFRAG_WHITE_AFTER_NEWLINE
];
42 sTabSharedString
[i
] = new char[1 + i
+ TEXTFRAG_WHITE_AFTER_NEWLINE
];
43 sSpaceSharedString
[i
][0] = ' ';
44 sTabSharedString
[i
][0] = ' ';
46 for (j
= 1; j
< 1 + i
; ++j
) {
47 sSpaceSharedString
[i
][j
] = '\n';
48 sTabSharedString
[i
][j
] = '\n';
50 for (; j
< (1 + i
+ TEXTFRAG_WHITE_AFTER_NEWLINE
); ++j
) {
51 sSpaceSharedString
[i
][j
] = ' ';
52 sTabSharedString
[i
][j
] = '\t';
56 // Create single-char strings
57 for (i
= 0; i
< 256; ++i
) {
58 sSingleCharSharedString
[i
] = i
;
65 void nsTextFragment::Shutdown() {
67 for (i
= 0; i
<= TEXTFRAG_MAX_NEWLINES
; ++i
) {
68 delete[] sSpaceSharedString
[i
];
69 delete[] sTabSharedString
[i
];
70 sSpaceSharedString
[i
] = nullptr;
71 sTabSharedString
[i
] = nullptr;
75 nsTextFragment::~nsTextFragment() {
77 MOZ_COUNT_DTOR(nsTextFragment
);
80 void nsTextFragment::ReleaseText() {
83 } else if (mState
.mLength
&& m1b
&& mState
.mInHeap
) {
84 free(const_cast<char*>(m1b
));
88 mState
.mIsBidi
= false;
90 // Set mState.mIs2b, mState.mInHeap, and mState.mLength = 0 with mAllBits;
94 nsTextFragment
& nsTextFragment::operator=(const nsTextFragment
& aOther
) {
97 if (aOther
.mState
.mLength
) {
98 if (!aOther
.mState
.mInHeap
) {
99 MOZ_ASSERT(!aOther
.mState
.mIs2b
);
101 } else if (aOther
.mState
.mIs2b
) {
105 m1b
= static_cast<char*>(malloc(aOther
.mState
.mLength
));
107 memcpy(const_cast<char*>(m1b
), aOther
.m1b
, aOther
.mState
.mLength
);
109 // allocate a buffer for a single REPLACEMENT CHARACTER
110 m2b
= nsStringBuffer::Alloc(sizeof(char16_t
) * 2).take();
114 char16_t
* data
= static_cast<char16_t
*>(m2b
->Data());
115 data
[0] = 0xFFFD; // REPLACEMENT CHARACTER
116 data
[1] = char16_t(0);
118 mState
.mInHeap
= true;
124 mAllBits
= aOther
.mAllBits
;
130 static inline int32_t FirstNon8BitUnvectorized(const char16_t
* str
,
131 const char16_t
* end
) {
132 typedef Non8BitParameters
<sizeof(size_t)> p
;
133 const size_t mask
= p::mask();
134 const uint32_t alignMask
= p::alignMask();
135 const uint32_t numUnicharsPerWord
= p::numUnicharsPerWord();
136 const int32_t len
= end
- str
;
139 // Align ourselves to a word boundary.
140 int32_t alignLen
= std::min(
141 len
, int32_t(((-NS_PTR_TO_INT32(str
)) & alignMask
) / sizeof(char16_t
)));
142 for (; i
< alignLen
; i
++) {
143 if (str
[i
] > 255) return i
;
146 // Check one word at a time.
147 const int32_t wordWalkEnd
=
148 ((len
- i
) / numUnicharsPerWord
) * numUnicharsPerWord
;
149 for (; i
< wordWalkEnd
; i
+= numUnicharsPerWord
) {
150 const size_t word
= *reinterpret_cast<const size_t*>(str
+ i
);
151 if (word
& mask
) return i
;
154 // Take care of the remainder one character at a time.
155 for (; i
< len
; i
++) {
156 if (str
[i
] > 255) return i
;
162 #ifdef MOZILLA_MAY_SUPPORT_SSE2
165 int32_t FirstNon8Bit(const char16_t
* str
, const char16_t
* end
);
167 } // namespace mozilla
173 int32_t FirstNon8Bit(const char16_t
* str
, const char16_t
* end
);
175 } // namespace mozilla
179 * This function returns -1 if all characters in str are 8 bit characters.
180 * Otherwise, it returns a value less than or equal to the index of the first
181 * non-8bit character in str. For example, if first non-8bit character is at
182 * position 25, it may return 25, or for example 24, or 16. But it guarantees
183 * there is no non-8bit character before returned value.
185 static inline int32_t FirstNon8Bit(const char16_t
* str
, const char16_t
* end
) {
186 #ifdef MOZILLA_MAY_SUPPORT_SSE2
187 if (mozilla::supports_sse2()) {
188 return mozilla::SSE2::FirstNon8Bit(str
, end
);
190 #elif defined(__powerpc__)
191 if (mozilla::supports_vmx()) {
192 return mozilla::VMX::FirstNon8Bit(str
, end
);
196 return FirstNon8BitUnvectorized(str
, end
);
199 bool nsTextFragment::SetTo(const char16_t
* aBuffer
, int32_t aLength
,
200 bool aUpdateBidi
, bool aForce2b
) {
201 if (aForce2b
&& mState
.mIs2b
&& !m2b
->IsReadonly()) {
202 uint32_t storageSize
= m2b
->StorageSize();
203 uint32_t neededSize
= aLength
* sizeof(char16_t
);
205 if (storageSize
< AutoStringDefaultStorageSize
) {
206 // If we're storing small enough nsStringBuffer, let's preserve it.
208 static_cast<char16_t
*>(m2b
->Data())[0] = char16_t(0);
210 mState
.mIsBidi
= false;
213 } else if ((neededSize
< storageSize
) &&
215 (neededSize
+ AutoStringDefaultStorageSize
))) {
216 // Don't try to reuse the existing nsStringBuffer, if it would have
217 // lots of unused space.
219 memcpy(m2b
->Data(), aBuffer
, neededSize
);
220 static_cast<char16_t
*>(m2b
->Data())[aLength
] = char16_t(0);
221 mState
.mLength
= aLength
;
222 mState
.mIsBidi
= false;
224 UpdateBidiFlag(aBuffer
, aLength
);
236 char16_t firstChar
= *aBuffer
;
237 if (!aForce2b
&& aLength
== 1 && firstChar
< 256) {
238 m1b
= sSingleCharSharedString
+ firstChar
;
239 mState
.mInHeap
= false;
240 mState
.mIs2b
= false;
246 const char16_t
* ucp
= aBuffer
;
247 const char16_t
* uend
= aBuffer
+ aLength
;
249 // Check if we can use a shared string
251 aLength
<= 1 + TEXTFRAG_WHITE_AFTER_NEWLINE
+ TEXTFRAG_MAX_NEWLINES
&&
252 (firstChar
== ' ' || firstChar
== '\n' || firstChar
== '\t')) {
253 if (firstChar
== ' ') {
257 const char16_t
* start
= ucp
;
258 while (ucp
< uend
&& *ucp
== '\n') {
261 const char16_t
* endNewLine
= ucp
;
263 char16_t space
= ucp
< uend
&& *ucp
== '\t' ? '\t' : ' ';
264 while (ucp
< uend
&& *ucp
== space
) {
268 if (ucp
== uend
&& endNewLine
- start
<= TEXTFRAG_MAX_NEWLINES
&&
269 ucp
- endNewLine
<= TEXTFRAG_WHITE_AFTER_NEWLINE
) {
270 char** strings
= space
== ' ' ? sSpaceSharedString
: sTabSharedString
;
271 m1b
= strings
[endNewLine
- start
];
273 // If we didn't find a space in the beginning, skip it now.
274 if (firstChar
!= ' ') {
278 mState
.mInHeap
= false;
279 mState
.mIs2b
= false;
280 mState
.mLength
= aLength
;
286 // See if we need to store the data in ucs2 or not
287 int32_t first16bit
= aForce2b
? 0 : FirstNon8Bit(ucp
, uend
);
289 if (first16bit
!= -1) { // aBuffer contains no non-8bit character
290 // Use ucs2 storage because we have to
291 CheckedUint32 m2bSize
= aLength
+ 1;
292 m2bSize
*= sizeof(char16_t
);
293 if (!m2bSize
.isValid()) {
297 m2b
= nsStringBuffer::Alloc(m2bSize
.value()).take();
301 memcpy(m2b
->Data(), aBuffer
, aLength
* sizeof(char16_t
));
302 static_cast<char16_t
*>(m2b
->Data())[aLength
] = char16_t(0);
306 UpdateBidiFlag(aBuffer
+ first16bit
, aLength
- first16bit
);
310 // Use 1 byte storage because we can
311 char* buff
= static_cast<char*>(malloc(aLength
));
317 LossyConvertUtf16toLatin1(Span(aBuffer
, aLength
), Span(buff
, aLength
));
319 mState
.mIs2b
= false;
323 mState
.mInHeap
= true;
324 mState
.mLength
= aLength
;
329 void nsTextFragment::CopyTo(char16_t
* aDest
, int32_t aOffset
, int32_t aCount
) {
330 NS_ASSERTION(aOffset
>= 0, "Bad offset passed to nsTextFragment::CopyTo()!");
331 NS_ASSERTION(aCount
>= 0, "Bad count passed to nsTextFragment::CopyTo()!");
337 if (uint32_t(aOffset
+ aCount
) > GetLength()) {
338 aCount
= mState
.mLength
- aOffset
;
343 memcpy(aDest
, Get2b() + aOffset
, sizeof(char16_t
) * aCount
);
345 const char* cp
= m1b
+ aOffset
;
346 ConvertLatin1toUtf16(Span(cp
, aCount
), Span(aDest
, aCount
));
351 bool nsTextFragment::Append(const char16_t
* aBuffer
, uint32_t aLength
,
352 bool aUpdateBidi
, bool aForce2b
) {
357 // This is a common case because some callsites create a textnode
358 // with a value by creating the node and then calling AppendData.
359 if (mState
.mLength
== 0) {
360 return SetTo(aBuffer
, aLength
, aUpdateBidi
, aForce2b
);
363 // Should we optimize for aData.Length() == 0?
365 // FYI: Don't use CheckedInt in this method since here is very hot path
366 // in some performance tests.
367 if (NS_MAX_TEXT_FRAGMENT_LENGTH
- mState
.mLength
< aLength
) {
368 return false; // Would be overflown if we'd keep handling.
372 size_t size
= mState
.mLength
+ aLength
+ 1;
373 if (SIZE_MAX
/ sizeof(char16_t
) < size
) {
374 return false; // Would be overflown if we'd keep handling.
376 size
*= sizeof(char16_t
);
378 // Already a 2-byte string so the result will be too
379 nsStringBuffer
* buff
= nullptr;
380 nsStringBuffer
* bufferToRelease
= nullptr;
381 if (m2b
->IsReadonly()) {
382 buff
= nsStringBuffer::Alloc(size
).take();
386 bufferToRelease
= m2b
;
387 memcpy(static_cast<char16_t
*>(buff
->Data()), m2b
->Data(),
388 mState
.mLength
* sizeof(char16_t
));
390 buff
= nsStringBuffer::Realloc(m2b
, size
);
396 char16_t
* data
= static_cast<char16_t
*>(buff
->Data());
397 memcpy(data
+ mState
.mLength
, aBuffer
, aLength
* sizeof(char16_t
));
398 mState
.mLength
+= aLength
;
400 data
[mState
.mLength
] = char16_t(0);
402 NS_IF_RELEASE(bufferToRelease
);
405 UpdateBidiFlag(aBuffer
, aLength
);
411 // Current string is a 1-byte string, check if the new data fits in one byte
413 int32_t first16bit
= aForce2b
? 0 : FirstNon8Bit(aBuffer
, aBuffer
+ aLength
);
415 if (first16bit
!= -1) { // aBuffer contains no non-8bit character
416 size_t size
= mState
.mLength
+ aLength
+ 1;
417 if (SIZE_MAX
/ sizeof(char16_t
) < size
) {
418 return false; // Would be overflown if we'd keep handling.
420 size
*= sizeof(char16_t
);
422 // The old data was 1-byte, but the new is not so we have to expand it
424 nsStringBuffer
* buff
= nsStringBuffer::Alloc(size
).take();
429 // Copy data into buff
430 char16_t
* data
= static_cast<char16_t
*>(buff
->Data());
431 ConvertLatin1toUtf16(Span(m1b
, mState
.mLength
), Span(data
, mState
.mLength
));
433 memcpy(data
+ mState
.mLength
, aBuffer
, aLength
* sizeof(char16_t
));
434 mState
.mLength
+= aLength
;
437 if (mState
.mInHeap
) {
438 free(const_cast<char*>(m1b
));
440 data
[mState
.mLength
] = char16_t(0);
443 mState
.mInHeap
= true;
446 UpdateBidiFlag(aBuffer
+ first16bit
, aLength
- first16bit
);
452 // The new and the old data is all 1-byte
453 size_t size
= mState
.mLength
+ aLength
;
454 MOZ_ASSERT(sizeof(char) == 1);
456 if (mState
.mInHeap
) {
457 buff
= static_cast<char*>(realloc(const_cast<char*>(m1b
), size
));
462 buff
= static_cast<char*>(malloc(size
));
467 memcpy(buff
, m1b
, mState
.mLength
);
468 mState
.mInHeap
= true;
471 // Copy aBuffer into buff.
472 LossyConvertUtf16toLatin1(Span(aBuffer
, aLength
),
473 Span(buff
+ mState
.mLength
, aLength
));
476 mState
.mLength
+= aLength
;
482 size_t nsTextFragment::SizeOfExcludingThis(
483 mozilla::MallocSizeOf aMallocSizeOf
) const {
485 return m2b
->SizeOfIncludingThisIfUnshared(aMallocSizeOf
);
488 if (mState
.mInHeap
) {
489 return aMallocSizeOf(m1b
);
495 // To save time we only do this when we really want to know, not during
497 void nsTextFragment::UpdateBidiFlag(const char16_t
* aBuffer
, uint32_t aLength
) {
498 if (mState
.mIs2b
&& !mState
.mIsBidi
) {
499 if (HasRTLChars(Span(aBuffer
, aLength
))) {
500 mState
.mIsBidi
= true;
505 bool nsTextFragment::TextEquals(const nsTextFragment
& aOther
) const {
508 if (!aOther
.Is2b()) {
509 nsDependentCSubstring
ourStr(Get1b(), GetLength());
510 return ourStr
.Equals(
511 nsDependentCSubstring(aOther
.Get1b(), aOther
.GetLength()));
514 // We're 1-byte, the other thing is 2-byte. Instead of implementing a
515 // separate codepath for this, just use our code below.
516 return aOther
.TextEquals(*this);
519 nsDependentSubstring
ourStr(Get2b(), GetLength());
521 return ourStr
.Equals(
522 nsDependentSubstring(aOther
.Get2b(), aOther
.GetLength()));
525 // We can't use EqualsASCII here, because the other string might not
526 // actually be ASCII. Just roll our own compare; do it in the simple way.
527 // Bug 1532356 tracks not having to roll our own.
528 if (GetLength() != aOther
.GetLength()) {
532 const char16_t
* ourChars
= Get2b();
533 const char* otherChars
= aOther
.Get1b();
534 for (uint32_t i
= 0; i
< GetLength(); ++i
) {
535 if (ourChars
[i
] != static_cast<char16_t
>(otherChars
[i
])) {