1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
8 * A class which represents a fragment of text (eg inside a text
9 * node); if only codepoints below 256 are used, the text is stored as
10 * a char*; otherwise the text is stored as a char16_t*
13 #ifndef nsTextFragment_h___
14 #define nsTextFragment_h___
16 #include "mozilla/Attributes.h"
17 #include "mozilla/MemoryReporting.h"
19 #include "nsCharTraits.h"
21 #include "nsStringBuffer.h"
22 #include "nsReadableUtils.h"
23 #include "nsISupportsImpl.h"
25 // XXX should this normalize the code to keep a \u0000 at the end?
27 // XXX nsTextFragmentPool?
30 * A fragment of text. If mIs2b is 1 then the m2b pointer is valid
31 * otherwise the m1b pointer is valid. If m1b is used then each byte
32 * of data represents a single ucs2 character with the high byte being
35 * This class does not have a virtual destructor therefore it is not
36 * meant to be subclassed.
38 class nsTextFragment final
{
40 static nsresult
Init();
41 static void Shutdown();
44 * Default constructor. Initialize the fragment to be empty.
46 nsTextFragment() : m1b(nullptr), mAllBits(0) {
47 MOZ_COUNT_CTOR(nsTextFragment
);
48 NS_ASSERTION(sizeof(FragmentBits
) == 4, "Bad field packing!");
54 * Change the contents of this fragment to be a copy of the
55 * the argument fragment, or to "" if unable to allocate enough memory.
57 nsTextFragment
& operator=(const nsTextFragment
& aOther
);
60 * Return true if this fragment is represented by char16_t data
62 bool Is2b() const { return mState
.mIs2b
; }
65 * Return true if this fragment contains Bidi text
66 * For performance reasons this flag is only set if explicitely requested (by
67 * setting the aUpdateBidi argument on SetTo or Append to true).
69 bool IsBidi() const { return mState
.mIsBidi
; }
72 * Get a pointer to constant char16_t data.
74 const char16_t
* Get2b() const {
75 MOZ_ASSERT(Is2b(), "not 2b text");
76 return static_cast<char16_t
*>(m2b
->Data());
80 * Get a pointer to constant char data.
82 const char* Get1b() const {
83 NS_ASSERTION(!Is2b(), "not 1b text");
84 return (const char*)m1b
;
88 * Get the length of the fragment. The length is the number of logical
89 * characters, not the number of bytes to store the characters.
91 uint32_t GetLength() const { return mState
.mLength
; }
93 bool CanGrowBy(size_t n
) const {
94 return n
< (1 << 29) && mState
.mLength
+ n
< (1 << 29);
98 * Change the contents of this fragment to be a copy of the given
99 * buffer. If aUpdateBidi is true, contents of the fragment will be scanned,
100 * and mState.mIsBidi will be turned on if it includes any Bidi characters.
101 * If aForce2b is true, aBuffer will be stored as char16_t as is. Then,
102 * you can access the value faster but may waste memory if all characters
103 * are less than U+0100.
105 bool SetTo(const char16_t
* aBuffer
, int32_t aLength
, bool aUpdateBidi
,
108 bool SetTo(const nsString
& aString
, bool aUpdateBidi
, bool aForce2b
) {
110 if (aForce2b
&& !aUpdateBidi
) {
111 nsStringBuffer
* buffer
= nsStringBuffer::FromString(aString
);
113 NS_ADDREF(m2b
= buffer
);
114 mState
.mInHeap
= true;
116 mState
.mLength
= aString
.Length();
121 return SetTo(aString
.get(), aString
.Length(), aUpdateBidi
, aForce2b
);
125 * Append aData to the end of this fragment. If aUpdateBidi is true, contents
126 * of the fragment will be scanned, and mState.mIsBidi will be turned on if
127 * it includes any Bidi characters.
128 * If aForce2b is true, the string will be stored as char16_t as is. Then,
129 * you can access the value faster but may waste memory if all characters
130 * are less than U+0100.
132 bool Append(const char16_t
* aBuffer
, uint32_t aLength
, bool aUpdateBidi
,
136 * Append the contents of this string fragment to aString
138 void AppendTo(nsAString
& aString
) const {
139 if (!AppendTo(aString
, mozilla::fallible
)) {
140 aString
.AllocFailed(aString
.Length() + GetLength());
145 * Append the contents of this string fragment to aString
146 * @return false if an out of memory condition is detected, true otherwise
148 [[nodiscard
]] bool AppendTo(nsAString
& aString
,
149 const mozilla::fallible_t
& aFallible
) const {
151 if (aString
.IsEmpty()) {
152 m2b
->ToString(mState
.mLength
, aString
);
155 bool ok
= aString
.Append(Get2b(), mState
.mLength
, aFallible
);
162 return AppendASCIItoUTF16(Substring(m1b
, mState
.mLength
), aString
,
168 * Append a substring of the contents of this string fragment to aString.
169 * @param aOffset where to start the substring in this text fragment
170 * @param aLength the length of the substring
172 void AppendTo(nsAString
& aString
, int32_t aOffset
, int32_t aLength
) const {
173 if (!AppendTo(aString
, aOffset
, aLength
, mozilla::fallible
)) {
174 aString
.AllocFailed(aString
.Length() + aLength
);
179 * Append a substring of the contents of this string fragment to aString.
180 * @param aString the string in which to append
181 * @param aOffset where to start the substring in this text fragment
182 * @param aLength the length of the substring
183 * @return false if an out of memory condition is detected, true otherwise
185 [[nodiscard
]] bool AppendTo(nsAString
& aString
, int32_t aOffset
,
187 const mozilla::fallible_t
& aFallible
) const {
189 bool ok
= aString
.Append(Get2b() + aOffset
, aLength
, aFallible
);
196 return AppendASCIItoUTF16(Substring(m1b
+ aOffset
, aLength
), aString
,
202 * Make a copy of the fragments contents starting at offset for
203 * count characters. The offset and count will be adjusted to
204 * lie within the fragments data. The fragments data is converted if
207 void CopyTo(char16_t
* aDest
, int32_t aOffset
, int32_t aCount
);
210 * Return the character in the text-fragment at the given
211 * index. This always returns a char16_t.
213 char16_t
CharAt(int32_t aIndex
) const {
214 MOZ_ASSERT(uint32_t(aIndex
) < mState
.mLength
, "bad index");
215 return mState
.mIs2b
? Get2b()[aIndex
]
216 : static_cast<unsigned char>(m1b
[aIndex
]);
220 * IsHighSurrogateFollowedByLowSurrogateAt() returns true if character at
221 * aIndex is high surrogate and it's followed by low surrogate.
223 inline bool IsHighSurrogateFollowedByLowSurrogateAt(int32_t aIndex
) const {
224 MOZ_ASSERT(aIndex
>= 0);
225 MOZ_ASSERT(aIndex
< mState
.mLength
);
226 if (!mState
.mIs2b
|| aIndex
+ 1 >= mState
.mLength
) {
229 return NS_IS_SURROGATE_PAIR(Get2b()[aIndex
], Get2b()[aIndex
+ 1]);
233 * IsLowSurrogateFollowingHighSurrogateAt() returns true if character at
234 * aIndex is low surrogate and it follows high surrogate.
236 inline bool IsLowSurrogateFollowingHighSurrogateAt(int32_t aIndex
) const {
237 MOZ_ASSERT(aIndex
>= 0);
238 MOZ_ASSERT(aIndex
< mState
.mLength
);
239 if (!mState
.mIs2b
|| aIndex
<= 0) {
242 return NS_IS_SURROGATE_PAIR(Get2b()[aIndex
- 1], Get2b()[aIndex
]);
246 * ScalarValueAt() returns a Unicode scalar value at aIndex. If the character
247 * at aIndex is a high surrogate followed by low surrogate, returns character
248 * code for the pair. If the index is low surrogate, or a high surrogate but
249 * not in a pair, returns 0.
251 inline char32_t
ScalarValueAt(int32_t aIndex
) const {
252 MOZ_ASSERT(aIndex
>= 0);
253 MOZ_ASSERT(aIndex
< mState
.mLength
);
255 return static_cast<unsigned char>(m1b
[aIndex
]);
257 char16_t ch
= Get2b()[aIndex
];
258 if (!IS_SURROGATE(ch
)) {
261 if (aIndex
+ 1 < mState
.mLength
&& NS_IS_HIGH_SURROGATE(ch
)) {
262 char16_t nextCh
= Get2b()[aIndex
+ 1];
263 if (NS_IS_LOW_SURROGATE(nextCh
)) {
264 return SURROGATE_TO_UCS4(ch
, nextCh
);
270 void SetBidi(bool aBidi
) { mState
.mIsBidi
= aBidi
; }
272 struct FragmentBits
{
273 // uint32_t to ensure that the values are unsigned, because we
274 // want 0/1, not 0/-1!
275 // Making these bool causes Windows to not actually pack them,
276 // which causes crashes because we assume this structure is no more than
278 uint32_t mInHeap
: 1;
280 uint32_t mIsBidi
: 1;
281 // Note that when you change the bits of mLength, you also need to change
282 // NS_MAX_TEXT_FRAGMENT_LENGTH.
283 uint32_t mLength
: 29;
286 #define NS_MAX_TEXT_FRAGMENT_LENGTH (static_cast<uint32_t>(0x1FFFFFFF))
288 size_t SizeOfExcludingThis(mozilla::MallocSizeOf aMallocSizeOf
) const;
291 * Check whether the text in this fragment is the same as the text in the
294 [[nodiscard
]] bool TextEquals(const nsTextFragment
& aOther
) const;
300 * Scan the contents of the fragment and turn on mState.mIsBidi if it
301 * includes any Bidi characters.
303 void UpdateBidiFlag(const char16_t
* aBuffer
, uint32_t aLength
);
307 const char* m1b
; // This is const since it can point to shared data
316 #endif /* nsTextFragment_h___ */