1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
8 * A class which represents a fragment of text (eg inside a text
9 * node); if only codepoints below 256 are used, the text is stored as
10 * a char*; otherwise the text is stored as a char16_t*
13 #include "nsTextFragment.h"
15 #include "nsReadableUtils.h"
17 #include "nsBidiUtils.h"
18 #include "nsUnicharUtils.h"
19 #include "mozilla/CheckedInt.h"
20 #include "mozilla/MemoryReporting.h"
21 #include "mozilla/SSE.h"
22 #include "nsTextFragmentImpl.h"
25 #define TEXTFRAG_WHITE_AFTER_NEWLINE 50
26 #define TEXTFRAG_MAX_NEWLINES 7
28 // Static buffer used for common fragments
29 static char* sSpaceSharedString
[TEXTFRAG_MAX_NEWLINES
+ 1];
30 static char* sTabSharedString
[TEXTFRAG_MAX_NEWLINES
+ 1];
31 static char sSingleCharSharedString
[256];
33 using namespace mozilla
;
36 nsresult
nsTextFragment::Init() {
37 // Create whitespace strings
39 for (i
= 0; i
<= TEXTFRAG_MAX_NEWLINES
; ++i
) {
40 sSpaceSharedString
[i
] = new char[1 + i
+ TEXTFRAG_WHITE_AFTER_NEWLINE
];
41 sTabSharedString
[i
] = new char[1 + i
+ TEXTFRAG_WHITE_AFTER_NEWLINE
];
42 sSpaceSharedString
[i
][0] = ' ';
43 sTabSharedString
[i
][0] = ' ';
45 for (j
= 1; j
< 1 + i
; ++j
) {
46 sSpaceSharedString
[i
][j
] = '\n';
47 sTabSharedString
[i
][j
] = '\n';
49 for (; j
< (1 + i
+ TEXTFRAG_WHITE_AFTER_NEWLINE
); ++j
) {
50 sSpaceSharedString
[i
][j
] = ' ';
51 sTabSharedString
[i
][j
] = '\t';
55 // Create single-char strings
56 for (i
= 0; i
< 256; ++i
) {
57 sSingleCharSharedString
[i
] = i
;
64 void nsTextFragment::Shutdown() {
66 for (i
= 0; i
<= TEXTFRAG_MAX_NEWLINES
; ++i
) {
67 delete[] sSpaceSharedString
[i
];
68 delete[] sTabSharedString
[i
];
69 sSpaceSharedString
[i
] = nullptr;
70 sTabSharedString
[i
] = nullptr;
74 nsTextFragment::~nsTextFragment() {
76 MOZ_COUNT_DTOR(nsTextFragment
);
79 void nsTextFragment::ReleaseText() {
82 } else if (mState
.mLength
&& m1b
&& mState
.mInHeap
) {
83 free(const_cast<char*>(m1b
));
87 mState
.mIsBidi
= false;
89 // Set mState.mIs2b, mState.mInHeap, and mState.mLength = 0 with mAllBits;
93 nsTextFragment
& nsTextFragment::operator=(const nsTextFragment
& aOther
) {
96 if (aOther
.mState
.mLength
) {
97 if (!aOther
.mState
.mInHeap
) {
98 MOZ_ASSERT(!aOther
.mState
.mIs2b
);
100 } else if (aOther
.mState
.mIs2b
) {
104 m1b
= static_cast<char*>(malloc(aOther
.mState
.mLength
));
106 memcpy(const_cast<char*>(m1b
), aOther
.m1b
, aOther
.mState
.mLength
);
108 // allocate a buffer for a single REPLACEMENT CHARACTER
109 m2b
= nsStringBuffer::Alloc(sizeof(char16_t
) * 2).take();
113 char16_t
* data
= static_cast<char16_t
*>(m2b
->Data());
114 data
[0] = 0xFFFD; // REPLACEMENT CHARACTER
115 data
[1] = char16_t(0);
117 mState
.mInHeap
= true;
123 mAllBits
= aOther
.mAllBits
;
129 static inline int32_t FirstNon8BitUnvectorized(const char16_t
* str
,
130 const char16_t
* end
) {
131 typedef Non8BitParameters
<sizeof(size_t)> p
;
132 const size_t mask
= p::mask();
133 const uint32_t alignMask
= p::alignMask();
134 const uint32_t numUnicharsPerWord
= p::numUnicharsPerWord();
135 const int32_t len
= end
- str
;
138 // Align ourselves to a word boundary.
139 int32_t alignLen
= std::min(
140 len
, int32_t(((-NS_PTR_TO_INT32(str
)) & alignMask
) / sizeof(char16_t
)));
141 for (; i
< alignLen
; i
++) {
142 if (str
[i
] > 255) return i
;
145 // Check one word at a time.
146 const int32_t wordWalkEnd
=
147 ((len
- i
) / numUnicharsPerWord
) * numUnicharsPerWord
;
148 for (; i
< wordWalkEnd
; i
+= numUnicharsPerWord
) {
149 const size_t word
= *reinterpret_cast<const size_t*>(str
+ i
);
150 if (word
& mask
) return i
;
153 // Take care of the remainder one character at a time.
154 for (; i
< len
; i
++) {
155 if (str
[i
] > 255) return i
;
161 #ifdef MOZILLA_MAY_SUPPORT_SSE2
164 int32_t FirstNon8Bit(const char16_t
* str
, const char16_t
* end
);
166 } // namespace mozilla
170 * This function returns -1 if all characters in str are 8 bit characters.
171 * Otherwise, it returns a value less than or equal to the index of the first
172 * non-8bit character in str. For example, if first non-8bit character is at
173 * position 25, it may return 25, or for example 24, or 16. But it guarantees
174 * there is no non-8bit character before returned value.
176 static inline int32_t FirstNon8Bit(const char16_t
* str
, const char16_t
* end
) {
177 #ifdef MOZILLA_MAY_SUPPORT_SSE2
178 if (mozilla::supports_sse2()) {
179 return mozilla::SSE2::FirstNon8Bit(str
, end
);
183 return FirstNon8BitUnvectorized(str
, end
);
186 bool nsTextFragment::SetTo(const char16_t
* aBuffer
, int32_t aLength
,
187 bool aUpdateBidi
, bool aForce2b
) {
188 if (aForce2b
&& mState
.mIs2b
&& !m2b
->IsReadonly()) {
189 uint32_t storageSize
= m2b
->StorageSize();
190 uint32_t neededSize
= aLength
* sizeof(char16_t
);
192 if (storageSize
< AutoStringDefaultStorageSize
) {
193 // If we're storing small enough nsStringBuffer, let's preserve it.
195 static_cast<char16_t
*>(m2b
->Data())[0] = char16_t(0);
197 mState
.mIsBidi
= false;
200 } else if ((neededSize
< storageSize
) &&
202 (neededSize
+ AutoStringDefaultStorageSize
))) {
203 // Don't try to reuse the existing nsStringBuffer, if it would have
204 // lots of unused space.
206 memcpy(m2b
->Data(), aBuffer
, neededSize
);
207 static_cast<char16_t
*>(m2b
->Data())[aLength
] = char16_t(0);
208 mState
.mLength
= aLength
;
209 mState
.mIsBidi
= false;
211 UpdateBidiFlag(aBuffer
, aLength
);
223 char16_t firstChar
= *aBuffer
;
224 if (!aForce2b
&& aLength
== 1 && firstChar
< 256) {
225 m1b
= sSingleCharSharedString
+ firstChar
;
226 mState
.mInHeap
= false;
227 mState
.mIs2b
= false;
233 const char16_t
* ucp
= aBuffer
;
234 const char16_t
* uend
= aBuffer
+ aLength
;
236 // Check if we can use a shared string
238 aLength
<= 1 + TEXTFRAG_WHITE_AFTER_NEWLINE
+ TEXTFRAG_MAX_NEWLINES
&&
239 (firstChar
== ' ' || firstChar
== '\n' || firstChar
== '\t')) {
240 if (firstChar
== ' ') {
244 const char16_t
* start
= ucp
;
245 while (ucp
< uend
&& *ucp
== '\n') {
248 const char16_t
* endNewLine
= ucp
;
250 char16_t space
= ucp
< uend
&& *ucp
== '\t' ? '\t' : ' ';
251 while (ucp
< uend
&& *ucp
== space
) {
255 if (ucp
== uend
&& endNewLine
- start
<= TEXTFRAG_MAX_NEWLINES
&&
256 ucp
- endNewLine
<= TEXTFRAG_WHITE_AFTER_NEWLINE
) {
257 char** strings
= space
== ' ' ? sSpaceSharedString
: sTabSharedString
;
258 m1b
= strings
[endNewLine
- start
];
260 // If we didn't find a space in the beginning, skip it now.
261 if (firstChar
!= ' ') {
265 mState
.mInHeap
= false;
266 mState
.mIs2b
= false;
267 mState
.mLength
= aLength
;
273 // See if we need to store the data in ucs2 or not
274 int32_t first16bit
= aForce2b
? 0 : FirstNon8Bit(ucp
, uend
);
276 if (first16bit
!= -1) { // aBuffer contains no non-8bit character
277 // Use ucs2 storage because we have to
278 CheckedUint32 m2bSize
= aLength
+ 1;
279 m2bSize
*= sizeof(char16_t
);
280 if (!m2bSize
.isValid()) {
284 m2b
= nsStringBuffer::Alloc(m2bSize
.value()).take();
288 memcpy(m2b
->Data(), aBuffer
, aLength
* sizeof(char16_t
));
289 static_cast<char16_t
*>(m2b
->Data())[aLength
] = char16_t(0);
293 UpdateBidiFlag(aBuffer
+ first16bit
, aLength
- first16bit
);
297 // Use 1 byte storage because we can
298 char* buff
= static_cast<char*>(malloc(aLength
));
304 LossyConvertUTF16toLatin1(MakeSpan(aBuffer
, aLength
),
305 MakeSpan(buff
, aLength
));
307 mState
.mIs2b
= false;
311 mState
.mInHeap
= true;
312 mState
.mLength
= aLength
;
317 void nsTextFragment::CopyTo(char16_t
* aDest
, int32_t aOffset
, int32_t aCount
) {
318 NS_ASSERTION(aOffset
>= 0, "Bad offset passed to nsTextFragment::CopyTo()!");
319 NS_ASSERTION(aCount
>= 0, "Bad count passed to nsTextFragment::CopyTo()!");
325 if (uint32_t(aOffset
+ aCount
) > GetLength()) {
326 aCount
= mState
.mLength
- aOffset
;
331 memcpy(aDest
, Get2b() + aOffset
, sizeof(char16_t
) * aCount
);
333 const char* cp
= m1b
+ aOffset
;
334 ConvertLatin1toUTF16(MakeSpan(cp
, aCount
), MakeSpan(aDest
, aCount
));
339 bool nsTextFragment::Append(const char16_t
* aBuffer
, uint32_t aLength
,
340 bool aUpdateBidi
, bool aForce2b
) {
345 // This is a common case because some callsites create a textnode
346 // with a value by creating the node and then calling AppendData.
347 if (mState
.mLength
== 0) {
348 return SetTo(aBuffer
, aLength
, aUpdateBidi
, aForce2b
);
351 // Should we optimize for aData.Length() == 0?
353 // FYI: Don't use CheckedInt in this method since here is very hot path
354 // in some performance tests.
355 if (NS_MAX_TEXT_FRAGMENT_LENGTH
- mState
.mLength
< aLength
) {
356 return false; // Would be overflown if we'd keep handling.
360 size_t size
= mState
.mLength
+ aLength
+ 1;
361 if (SIZE_MAX
/ sizeof(char16_t
) < size
) {
362 return false; // Would be overflown if we'd keep handling.
364 size
*= sizeof(char16_t
);
366 // Already a 2-byte string so the result will be too
367 nsStringBuffer
* buff
= nullptr;
368 nsStringBuffer
* bufferToRelease
= nullptr;
369 if (m2b
->IsReadonly()) {
370 buff
= nsStringBuffer::Alloc(size
).take();
374 bufferToRelease
= m2b
;
375 memcpy(static_cast<char16_t
*>(buff
->Data()), m2b
->Data(),
376 mState
.mLength
* sizeof(char16_t
));
378 buff
= nsStringBuffer::Realloc(m2b
, size
);
384 char16_t
* data
= static_cast<char16_t
*>(buff
->Data());
385 memcpy(data
+ mState
.mLength
, aBuffer
, aLength
* sizeof(char16_t
));
386 mState
.mLength
+= aLength
;
388 data
[mState
.mLength
] = char16_t(0);
390 NS_IF_RELEASE(bufferToRelease
);
393 UpdateBidiFlag(aBuffer
, aLength
);
399 // Current string is a 1-byte string, check if the new data fits in one byte
401 int32_t first16bit
= aForce2b
? 0 : FirstNon8Bit(aBuffer
, aBuffer
+ aLength
);
403 if (first16bit
!= -1) { // aBuffer contains no non-8bit character
404 size_t size
= mState
.mLength
+ aLength
+ 1;
405 if (SIZE_MAX
/ sizeof(char16_t
) < size
) {
406 return false; // Would be overflown if we'd keep handling.
408 size
*= sizeof(char16_t
);
410 // The old data was 1-byte, but the new is not so we have to expand it
412 nsStringBuffer
* buff
= nsStringBuffer::Alloc(size
).take();
417 // Copy data into buff
418 char16_t
* data
= static_cast<char16_t
*>(buff
->Data());
419 ConvertLatin1toUTF16(MakeSpan(m1b
, mState
.mLength
),
420 MakeSpan(data
, mState
.mLength
));
422 memcpy(data
+ mState
.mLength
, aBuffer
, aLength
* sizeof(char16_t
));
423 mState
.mLength
+= aLength
;
426 if (mState
.mInHeap
) {
427 free(const_cast<char*>(m1b
));
429 data
[mState
.mLength
] = char16_t(0);
432 mState
.mInHeap
= true;
435 UpdateBidiFlag(aBuffer
+ first16bit
, aLength
- first16bit
);
441 // The new and the old data is all 1-byte
442 size_t size
= mState
.mLength
+ aLength
;
443 MOZ_ASSERT(sizeof(char) == 1);
445 if (mState
.mInHeap
) {
446 buff
= static_cast<char*>(realloc(const_cast<char*>(m1b
), size
));
451 buff
= static_cast<char*>(malloc(size
));
456 memcpy(buff
, m1b
, mState
.mLength
);
457 mState
.mInHeap
= true;
460 // Copy aBuffer into buff.
461 LossyConvertUTF16toLatin1(MakeSpan(aBuffer
, aLength
),
462 MakeSpan(buff
+ mState
.mLength
, aLength
));
465 mState
.mLength
+= aLength
;
471 size_t nsTextFragment::SizeOfExcludingThis(
472 mozilla::MallocSizeOf aMallocSizeOf
) const {
474 return m2b
->SizeOfIncludingThisIfUnshared(aMallocSizeOf
);
477 if (mState
.mInHeap
) {
478 return aMallocSizeOf(m1b
);
484 // To save time we only do this when we really want to know, not during
486 void nsTextFragment::UpdateBidiFlag(const char16_t
* aBuffer
, uint32_t aLength
) {
487 if (mState
.mIs2b
&& !mState
.mIsBidi
) {
488 if (HasRTLChars(MakeSpan(aBuffer
, aLength
))) {
489 mState
.mIsBidi
= true;
494 bool nsTextFragment::TextEquals(const nsTextFragment
& aOther
) const {
497 if (!aOther
.Is2b()) {
498 nsDependentCSubstring
ourStr(Get1b(), GetLength());
499 return ourStr
.Equals(
500 nsDependentCSubstring(aOther
.Get1b(), aOther
.GetLength()));
503 // We're 1-byte, the other thing is 2-byte. Instead of implementing a
504 // separate codepath for this, just use our code below.
505 return aOther
.TextEquals(*this);
508 nsDependentSubstring
ourStr(Get2b(), GetLength());
510 return ourStr
.Equals(
511 nsDependentSubstring(aOther
.Get2b(), aOther
.GetLength()));
514 // We can't use EqualsASCII here, because the other string might not
515 // actually be ASCII. Just roll our own compare; do it in the simple way.
516 // Bug 1532356 tracks not having to roll our own.
517 if (GetLength() != aOther
.GetLength()) {
521 const char16_t
* ourChars
= Get2b();
522 const char* otherChars
= aOther
.Get1b();
523 for (uint32_t i
= 0; i
< GetLength(); ++i
) {
524 if (ourChars
[i
] != static_cast<char16_t
>(otherChars
[i
])) {