1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
8 * A class which represents a fragment of text (eg inside a text
9 * node); if only codepoints below 256 are used, the text is stored as
10 * a char*; otherwise the text is stored as a char16_t*
13 #include "nsTextFragment.h"
15 #include "nsReadableUtils.h"
16 #include "nsBidiUtils.h"
17 #include "nsUnicharUtils.h"
18 #include "mozilla/CheckedInt.h"
19 #include "mozilla/MemoryReporting.h"
20 #include "mozilla/SSE.h"
21 #include "mozilla/ppc.h"
22 #include "nsTextFragmentImpl.h"
25 #define TEXTFRAG_WHITE_AFTER_NEWLINE 50
26 #define TEXTFRAG_MAX_NEWLINES 7
28 // Static buffer used for common fragments
29 static char* sSpaceSharedString
[TEXTFRAG_MAX_NEWLINES
+ 1];
30 static char* sTabSharedString
[TEXTFRAG_MAX_NEWLINES
+ 1];
31 static char sSingleCharSharedString
[256];
33 using namespace mozilla
;
36 nsresult
nsTextFragment::Init() {
37 // Create whitespace strings
39 for (i
= 0; i
<= TEXTFRAG_MAX_NEWLINES
; ++i
) {
40 sSpaceSharedString
[i
] = new char[1 + i
+ TEXTFRAG_WHITE_AFTER_NEWLINE
];
41 sTabSharedString
[i
] = new char[1 + i
+ TEXTFRAG_WHITE_AFTER_NEWLINE
];
42 sSpaceSharedString
[i
][0] = ' ';
43 sTabSharedString
[i
][0] = ' ';
45 for (j
= 1; j
< 1 + i
; ++j
) {
46 sSpaceSharedString
[i
][j
] = '\n';
47 sTabSharedString
[i
][j
] = '\n';
49 for (; j
< (1 + i
+ TEXTFRAG_WHITE_AFTER_NEWLINE
); ++j
) {
50 sSpaceSharedString
[i
][j
] = ' ';
51 sTabSharedString
[i
][j
] = '\t';
55 // Create single-char strings
56 for (i
= 0; i
< 256; ++i
) {
57 sSingleCharSharedString
[i
] = i
;
64 void nsTextFragment::Shutdown() {
66 for (i
= 0; i
<= TEXTFRAG_MAX_NEWLINES
; ++i
) {
67 delete[] sSpaceSharedString
[i
];
68 delete[] sTabSharedString
[i
];
69 sSpaceSharedString
[i
] = nullptr;
70 sTabSharedString
[i
] = nullptr;
74 nsTextFragment::~nsTextFragment() {
76 MOZ_COUNT_DTOR(nsTextFragment
);
79 void nsTextFragment::ReleaseText() {
82 } else if (mState
.mLength
&& m1b
&& mState
.mInHeap
) {
83 free(const_cast<char*>(m1b
));
87 mState
.mIsBidi
= false;
89 // Set mState.mIs2b, mState.mInHeap, and mState.mLength = 0 with mAllBits;
93 nsTextFragment
& nsTextFragment::operator=(const nsTextFragment
& aOther
) {
96 if (aOther
.mState
.mLength
) {
97 if (!aOther
.mState
.mInHeap
) {
98 MOZ_ASSERT(!aOther
.mState
.mIs2b
);
100 } else if (aOther
.mState
.mIs2b
) {
104 m1b
= static_cast<char*>(malloc(aOther
.mState
.mLength
));
106 memcpy(const_cast<char*>(m1b
), aOther
.m1b
, aOther
.mState
.mLength
);
108 // allocate a buffer for a single REPLACEMENT CHARACTER
109 m2b
= nsStringBuffer::Alloc(sizeof(char16_t
) * 2).take();
113 char16_t
* data
= static_cast<char16_t
*>(m2b
->Data());
114 data
[0] = 0xFFFD; // REPLACEMENT CHARACTER
115 data
[1] = char16_t(0);
117 mState
.mInHeap
= true;
123 mAllBits
= aOther
.mAllBits
;
129 static inline int32_t FirstNon8BitUnvectorized(const char16_t
* str
,
130 const char16_t
* end
) {
131 using p
= Non8BitParameters
<sizeof(size_t)>;
132 const size_t mask
= p::mask();
133 const uint32_t alignMask
= p::alignMask();
134 const uint32_t numUnicharsPerWord
= p::numUnicharsPerWord();
135 const int32_t len
= end
- str
;
138 // Align ourselves to a word boundary.
139 int32_t alignLen
= std::min(
140 len
, int32_t(((-NS_PTR_TO_INT32(str
)) & alignMask
) / sizeof(char16_t
)));
141 for (; i
< alignLen
; i
++) {
142 if (str
[i
] > 255) return i
;
145 // Check one word at a time.
146 const int32_t wordWalkEnd
=
147 ((len
- i
) / numUnicharsPerWord
) * numUnicharsPerWord
;
148 for (; i
< wordWalkEnd
; i
+= numUnicharsPerWord
) {
149 const size_t word
= *reinterpret_cast<const size_t*>(str
+ i
);
150 if (word
& mask
) return i
;
153 // Take care of the remainder one character at a time.
154 for (; i
< len
; i
++) {
155 if (str
[i
] > 255) return i
;
161 #if defined(MOZILLA_MAY_SUPPORT_SSE2)
162 # include "nsTextFragmentGenericFwd.h"
168 int32_t FirstNon8Bit(const char16_t
* str
, const char16_t
* end
);
170 } // namespace mozilla
174 * This function returns -1 if all characters in str are 8 bit characters.
175 * Otherwise, it returns a value less than or equal to the index of the first
176 * non-8bit character in str. For example, if first non-8bit character is at
177 * position 25, it may return 25, or for example 24, or 16. But it guarantees
178 * there is no non-8bit character before returned value.
180 static inline int32_t FirstNon8Bit(const char16_t
* str
, const char16_t
* end
) {
181 #ifdef MOZILLA_MAY_SUPPORT_SSE2
182 if (mozilla::supports_sse2()) {
183 return mozilla::FirstNon8Bit
<xsimd::sse2
>(str
, end
);
185 #elif defined(__powerpc__)
186 if (mozilla::supports_vmx()) {
187 return mozilla::VMX::FirstNon8Bit(str
, end
);
191 return FirstNon8BitUnvectorized(str
, end
);
194 bool nsTextFragment::SetTo(const char16_t
* aBuffer
, uint32_t aLength
,
195 bool aUpdateBidi
, bool aForce2b
) {
196 if (MOZ_UNLIKELY(aLength
> NS_MAX_TEXT_FRAGMENT_LENGTH
)) {
200 if (aForce2b
&& mState
.mIs2b
&& !m2b
->IsReadonly()) {
201 uint32_t storageSize
= m2b
->StorageSize();
202 uint32_t neededSize
= aLength
* sizeof(char16_t
);
204 if (storageSize
< AutoStringDefaultStorageSize
) {
205 // If we're storing small enough nsStringBuffer, let's preserve it.
207 static_cast<char16_t
*>(m2b
->Data())[0] = char16_t(0);
209 mState
.mIsBidi
= false;
212 } else if ((neededSize
< storageSize
) &&
214 (neededSize
+ AutoStringDefaultStorageSize
))) {
215 // Don't try to reuse the existing nsStringBuffer, if it would have
216 // lots of unused space.
218 memcpy(m2b
->Data(), aBuffer
, neededSize
);
219 static_cast<char16_t
*>(m2b
->Data())[aLength
] = char16_t(0);
220 mState
.mLength
= aLength
;
221 mState
.mIsBidi
= false;
223 UpdateBidiFlag(aBuffer
, aLength
);
235 char16_t firstChar
= *aBuffer
;
236 if (!aForce2b
&& aLength
== 1 && firstChar
< 256) {
237 m1b
= sSingleCharSharedString
+ firstChar
;
238 mState
.mInHeap
= false;
239 mState
.mIs2b
= false;
245 const char16_t
* ucp
= aBuffer
;
246 const char16_t
* uend
= aBuffer
+ aLength
;
248 // Check if we can use a shared string
250 aLength
<= 1 + TEXTFRAG_WHITE_AFTER_NEWLINE
+ TEXTFRAG_MAX_NEWLINES
&&
251 (firstChar
== ' ' || firstChar
== '\n' || firstChar
== '\t')) {
252 if (firstChar
== ' ') {
256 const char16_t
* start
= ucp
;
257 while (ucp
< uend
&& *ucp
== '\n') {
260 const char16_t
* endNewLine
= ucp
;
262 char16_t space
= ucp
< uend
&& *ucp
== '\t' ? '\t' : ' ';
263 while (ucp
< uend
&& *ucp
== space
) {
267 if (ucp
== uend
&& endNewLine
- start
<= TEXTFRAG_MAX_NEWLINES
&&
268 ucp
- endNewLine
<= TEXTFRAG_WHITE_AFTER_NEWLINE
) {
269 char** strings
= space
== ' ' ? sSpaceSharedString
: sTabSharedString
;
270 m1b
= strings
[endNewLine
- start
];
272 // If we didn't find a space in the beginning, skip it now.
273 if (firstChar
!= ' ') {
277 mState
.mInHeap
= false;
278 mState
.mIs2b
= false;
279 mState
.mLength
= aLength
;
285 // See if we need to store the data in ucs2 or not
286 int32_t first16bit
= aForce2b
? 0 : FirstNon8Bit(ucp
, uend
);
288 if (first16bit
!= -1) { // aBuffer contains no non-8bit character
289 // Use ucs2 storage because we have to
290 CheckedUint32 m2bSize
= CheckedUint32(aLength
) + 1;
291 if (!m2bSize
.isValid()) {
294 m2bSize
*= sizeof(char16_t
);
295 if (!m2bSize
.isValid()) {
299 m2b
= nsStringBuffer::Alloc(m2bSize
.value()).take();
303 memcpy(m2b
->Data(), aBuffer
, aLength
* sizeof(char16_t
));
304 static_cast<char16_t
*>(m2b
->Data())[aLength
] = char16_t(0);
308 UpdateBidiFlag(aBuffer
+ first16bit
, aLength
- first16bit
);
312 // Use 1 byte storage because we can
313 char* buff
= static_cast<char*>(malloc(aLength
));
319 LossyConvertUtf16toLatin1(Span(aBuffer
, aLength
), Span(buff
, aLength
));
321 mState
.mIs2b
= false;
325 mState
.mInHeap
= true;
326 mState
.mLength
= aLength
;
331 void nsTextFragment::CopyTo(char16_t
* aDest
, uint32_t aOffset
,
333 const CheckedUint32 endOffset
= CheckedUint32(aOffset
) + aCount
;
334 if (!endOffset
.isValid() || endOffset
.value() > GetLength()) {
335 aCount
= mState
.mLength
- aOffset
;
340 memcpy(aDest
, Get2b() + aOffset
, sizeof(char16_t
) * aCount
);
342 const char* cp
= m1b
+ aOffset
;
343 ConvertLatin1toUtf16(Span(cp
, aCount
), Span(aDest
, aCount
));
348 bool nsTextFragment::Append(const char16_t
* aBuffer
, uint32_t aLength
,
349 bool aUpdateBidi
, bool aForce2b
) {
354 // This is a common case because some callsites create a textnode
355 // with a value by creating the node and then calling AppendData.
356 if (mState
.mLength
== 0) {
357 return SetTo(aBuffer
, aLength
, aUpdateBidi
, aForce2b
);
360 // Should we optimize for aData.Length() == 0?
362 // FYI: Don't use CheckedInt in this method since here is very hot path
363 // in some performance tests.
364 if (NS_MAX_TEXT_FRAGMENT_LENGTH
- mState
.mLength
< aLength
) {
365 return false; // Would be overflown if we'd keep handling.
369 size_t size
= mState
.mLength
+ aLength
+ 1;
370 if (SIZE_MAX
/ sizeof(char16_t
) < size
) {
371 return false; // Would be overflown if we'd keep handling.
373 size
*= sizeof(char16_t
);
375 // Already a 2-byte string so the result will be too
376 nsStringBuffer
* buff
= nullptr;
377 nsStringBuffer
* bufferToRelease
= nullptr;
378 if (m2b
->IsReadonly()) {
379 buff
= nsStringBuffer::Alloc(size
).take();
383 bufferToRelease
= m2b
;
384 memcpy(static_cast<char16_t
*>(buff
->Data()), m2b
->Data(),
385 mState
.mLength
* sizeof(char16_t
));
387 buff
= nsStringBuffer::Realloc(m2b
, size
);
393 char16_t
* data
= static_cast<char16_t
*>(buff
->Data());
394 memcpy(data
+ mState
.mLength
, aBuffer
, aLength
* sizeof(char16_t
));
395 mState
.mLength
+= aLength
;
397 data
[mState
.mLength
] = char16_t(0);
399 NS_IF_RELEASE(bufferToRelease
);
402 UpdateBidiFlag(aBuffer
, aLength
);
408 // Current string is a 1-byte string, check if the new data fits in one byte
410 int32_t first16bit
= aForce2b
? 0 : FirstNon8Bit(aBuffer
, aBuffer
+ aLength
);
412 if (first16bit
!= -1) { // aBuffer contains no non-8bit character
413 size_t size
= mState
.mLength
+ aLength
+ 1;
414 if (SIZE_MAX
/ sizeof(char16_t
) < size
) {
415 return false; // Would be overflown if we'd keep handling.
417 size
*= sizeof(char16_t
);
419 // The old data was 1-byte, but the new is not so we have to expand it
421 nsStringBuffer
* buff
= nsStringBuffer::Alloc(size
).take();
426 // Copy data into buff
427 char16_t
* data
= static_cast<char16_t
*>(buff
->Data());
428 ConvertLatin1toUtf16(Span(m1b
, mState
.mLength
), Span(data
, mState
.mLength
));
430 memcpy(data
+ mState
.mLength
, aBuffer
, aLength
* sizeof(char16_t
));
431 mState
.mLength
+= aLength
;
434 if (mState
.mInHeap
) {
435 free(const_cast<char*>(m1b
));
437 data
[mState
.mLength
] = char16_t(0);
440 mState
.mInHeap
= true;
443 UpdateBidiFlag(aBuffer
+ first16bit
, aLength
- first16bit
);
449 // The new and the old data is all 1-byte
450 size_t size
= mState
.mLength
+ aLength
;
451 MOZ_ASSERT(sizeof(char) == 1);
453 if (mState
.mInHeap
) {
454 buff
= static_cast<char*>(realloc(const_cast<char*>(m1b
), size
));
459 buff
= static_cast<char*>(malloc(size
));
464 memcpy(buff
, m1b
, mState
.mLength
);
465 mState
.mInHeap
= true;
468 // Copy aBuffer into buff.
469 LossyConvertUtf16toLatin1(Span(aBuffer
, aLength
),
470 Span(buff
+ mState
.mLength
, aLength
));
473 mState
.mLength
+= aLength
;
479 size_t nsTextFragment::SizeOfExcludingThis(
480 mozilla::MallocSizeOf aMallocSizeOf
) const {
482 return m2b
->SizeOfIncludingThisIfUnshared(aMallocSizeOf
);
485 if (mState
.mInHeap
) {
486 return aMallocSizeOf(m1b
);
492 // To save time we only do this when we really want to know, not during
494 void nsTextFragment::UpdateBidiFlag(const char16_t
* aBuffer
, uint32_t aLength
) {
495 if (mState
.mIs2b
&& !mState
.mIsBidi
) {
496 if (HasRTLChars(Span(aBuffer
, aLength
))) {
497 mState
.mIsBidi
= true;
502 bool nsTextFragment::TextEquals(const nsTextFragment
& aOther
) const {
505 if (!aOther
.Is2b()) {
506 nsDependentCSubstring
ourStr(Get1b(), GetLength());
507 return ourStr
.Equals(
508 nsDependentCSubstring(aOther
.Get1b(), aOther
.GetLength()));
511 // We're 1-byte, the other thing is 2-byte. Instead of implementing a
512 // separate codepath for this, just use our code below.
513 return aOther
.TextEquals(*this);
516 nsDependentSubstring
ourStr(Get2b(), GetLength());
518 return ourStr
.Equals(
519 nsDependentSubstring(aOther
.Get2b(), aOther
.GetLength()));
522 // We can't use EqualsASCII here, because the other string might not
523 // actually be ASCII. Just roll our own compare; do it in the simple way.
524 // Bug 1532356 tracks not having to roll our own.
525 if (GetLength() != aOther
.GetLength()) {
529 const char16_t
* ourChars
= Get2b();
530 const char* otherChars
= aOther
.Get1b();
531 for (uint32_t i
= 0; i
< GetLength(); ++i
) {
532 if (ourChars
[i
] != static_cast<char16_t
>(otherChars
[i
])) {