1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
8 #include "nsScannerString.h"
9 #include "mozilla/CheckedInt.h"
15 #define MAX_CAPACITY \
16 ((UINT32_MAX / sizeof(char16_t)) - (sizeof(Buffer) + sizeof(char16_t)))
18 nsScannerBufferList::Buffer
* nsScannerBufferList::AllocBufferFromString(
19 const nsAString
& aString
) {
20 uint32_t len
= aString
.Length();
21 Buffer
* buf
= AllocBuffer(len
);
24 nsAString::const_iterator source
;
25 aString
.BeginReading(source
);
26 nsCharTraits
<char16_t
>::copy(buf
->DataStart(), source
.get(), len
);
31 nsScannerBufferList::Buffer
* nsScannerBufferList::AllocBuffer(
33 if (capacity
> MAX_CAPACITY
) return nullptr;
35 void* ptr
= malloc(sizeof(Buffer
) + (capacity
+ 1) * sizeof(char16_t
));
36 if (!ptr
) return nullptr;
38 Buffer
* buf
= new (ptr
) Buffer();
41 buf
->mDataEnd
= buf
->DataStart() + capacity
;
43 // XXX null terminate. this shouldn't be required, but we do it because
44 // nsScanner erroneously thinks it can dereference DataEnd :-(
45 *buf
->mDataEnd
= char16_t(0);
49 void nsScannerBufferList::ReleaseAll() {
50 while (!mBuffers
.isEmpty()) {
51 Buffer
* node
= mBuffers
.popFirst();
52 // printf(">>> freeing buffer @%p\n", node);
57 void nsScannerBufferList::SplitBuffer(const Position
& pos
) {
58 // splitting to the right keeps the work string and any extant token
59 // pointing to and holding a reference count on the same buffer.
61 Buffer
* bufferToSplit
= pos
.mBuffer
;
62 NS_ASSERTION(bufferToSplit
, "null pointer");
64 uint32_t splitOffset
= pos
.mPosition
- bufferToSplit
->DataStart();
65 NS_ASSERTION(pos
.mPosition
>= bufferToSplit
->DataStart() &&
66 splitOffset
<= bufferToSplit
->DataLength(),
67 "split offset is outside buffer");
69 uint32_t len
= bufferToSplit
->DataLength() - splitOffset
;
70 Buffer
* new_buffer
= AllocBuffer(len
);
72 nsCharTraits
<char16_t
>::copy(new_buffer
->DataStart(),
73 bufferToSplit
->DataStart() + splitOffset
, len
);
74 InsertAfter(new_buffer
, bufferToSplit
);
75 bufferToSplit
->SetDataLength(splitOffset
);
79 void nsScannerBufferList::DiscardUnreferencedPrefix(Buffer
* aBuf
) {
81 while (!mBuffers
.isEmpty() && !Head()->IsInUse()) {
82 Buffer
* buffer
= Head();
89 size_t nsScannerBufferList::Position::Distance(const Position
& aStart
,
90 const Position
& aEnd
) {
92 if (aStart
.mBuffer
== aEnd
.mBuffer
) {
93 result
= aEnd
.mPosition
- aStart
.mPosition
;
95 result
= aStart
.mBuffer
->DataEnd() - aStart
.mPosition
;
96 for (Buffer
* b
= aStart
.mBuffer
->Next(); b
!= aEnd
.mBuffer
; b
= b
->Next())
97 result
+= b
->DataLength();
98 result
+= aEnd
.mPosition
- aEnd
.mBuffer
->DataStart();
107 nsScannerSubstring::nsScannerSubstring()
108 : mStart(nullptr, nullptr),
109 mEnd(nullptr, nullptr),
110 mBufferList(nullptr),
114 nsScannerSubstring::nsScannerSubstring(const nsAString
& s
)
115 : mBufferList(nullptr), mIsDirty(true) {
119 nsScannerSubstring::~nsScannerSubstring() {
120 release_ownership_of_buffer_list();
123 int32_t nsScannerSubstring::CountChar(char16_t c
) const {
125 re-write this to use a counting sink
128 size_type result
= 0;
129 size_type lengthToExamine
= Length();
131 nsScannerIterator iter
;
132 for (BeginReading(iter
);;) {
133 int32_t lengthToExamineInThisFragment
= iter
.size_forward();
134 const char16_t
* fromBegin
= iter
.get();
136 NS_COUNT(fromBegin
, fromBegin
+ lengthToExamineInThisFragment
, c
));
137 if (!(lengthToExamine
-= lengthToExamineInThisFragment
)) return result
;
138 iter
.advance(lengthToExamineInThisFragment
);
140 // never reached; quiets warnings
144 void nsScannerSubstring::Rebind(const nsScannerSubstring
& aString
,
145 const nsScannerIterator
& aStart
,
146 const nsScannerIterator
& aEnd
) {
147 // allow for the case where &aString == this
149 aString
.acquire_ownership_of_buffer_list();
150 release_ownership_of_buffer_list();
154 mBufferList
= aString
.mBufferList
;
155 mLength
= Distance(aStart
, aEnd
);
159 void nsScannerSubstring::Rebind(const nsAString
& aString
) {
160 release_ownership_of_buffer_list();
162 mBufferList
= new nsScannerBufferList(AllocBufferFromString(aString
));
165 init_range_from_buffer_list();
166 acquire_ownership_of_buffer_list();
169 const nsAString
& nsScannerSubstring::AsString() const {
171 nsScannerSubstring
* mutable_this
= const_cast<nsScannerSubstring
*>(this);
173 if (mStart
.mBuffer
== mEnd
.mBuffer
) {
174 // We only have a single fragment to deal with, so just return it
176 mutable_this
->mFlattenedRep
.Rebind(mStart
.mPosition
, mEnd
.mPosition
);
178 // Otherwise, we need to copy the data into a flattened buffer.
179 nsScannerIterator start
, end
;
180 CopyUnicodeTo(BeginReading(start
), EndReading(end
),
181 mutable_this
->mFlattenedRep
);
184 mutable_this
->mIsDirty
= false;
187 return mFlattenedRep
;
190 nsScannerIterator
& nsScannerSubstring::BeginReading(
191 nsScannerIterator
& iter
) const {
194 iter
.mFragment
.mBuffer
= mStart
.mBuffer
;
195 iter
.mFragment
.mFragmentStart
= mStart
.mPosition
;
196 if (mStart
.mBuffer
== mEnd
.mBuffer
)
197 iter
.mFragment
.mFragmentEnd
= mEnd
.mPosition
;
199 iter
.mFragment
.mFragmentEnd
= mStart
.mBuffer
->DataEnd();
201 iter
.mPosition
= mStart
.mPosition
;
202 iter
.normalize_forward();
206 nsScannerIterator
& nsScannerSubstring::EndReading(
207 nsScannerIterator
& iter
) const {
210 iter
.mFragment
.mBuffer
= mEnd
.mBuffer
;
211 iter
.mFragment
.mFragmentEnd
= mEnd
.mPosition
;
212 if (mStart
.mBuffer
== mEnd
.mBuffer
)
213 iter
.mFragment
.mFragmentStart
= mStart
.mPosition
;
215 iter
.mFragment
.mFragmentStart
= mEnd
.mBuffer
->DataStart();
217 iter
.mPosition
= mEnd
.mPosition
;
218 // must not |normalize_backward| as that would likely invalidate tests like
219 // |while ( first != last )|
223 bool nsScannerSubstring::GetNextFragment(nsScannerFragment
& frag
) const {
224 // check to see if we are at the end of the buffer list
225 if (frag
.mBuffer
== mEnd
.mBuffer
) return false;
227 frag
.mBuffer
= frag
.mBuffer
->getNext();
229 if (frag
.mBuffer
== mStart
.mBuffer
)
230 frag
.mFragmentStart
= mStart
.mPosition
;
232 frag
.mFragmentStart
= frag
.mBuffer
->DataStart();
234 if (frag
.mBuffer
== mEnd
.mBuffer
)
235 frag
.mFragmentEnd
= mEnd
.mPosition
;
237 frag
.mFragmentEnd
= frag
.mBuffer
->DataEnd();
242 bool nsScannerSubstring::GetPrevFragment(nsScannerFragment
& frag
) const {
243 // check to see if we are at the beginning of the buffer list
244 if (frag
.mBuffer
== mStart
.mBuffer
) return false;
246 frag
.mBuffer
= frag
.mBuffer
->getPrevious();
248 if (frag
.mBuffer
== mStart
.mBuffer
)
249 frag
.mFragmentStart
= mStart
.mPosition
;
251 frag
.mFragmentStart
= frag
.mBuffer
->DataStart();
253 if (frag
.mBuffer
== mEnd
.mBuffer
)
254 frag
.mFragmentEnd
= mEnd
.mPosition
;
256 frag
.mFragmentEnd
= frag
.mBuffer
->DataEnd();
265 nsScannerString::nsScannerString(Buffer
* aBuf
) {
266 mBufferList
= new nsScannerBufferList(aBuf
);
268 init_range_from_buffer_list();
269 acquire_ownership_of_buffer_list();
272 void nsScannerString::AppendBuffer(Buffer
* aBuf
) {
273 mBufferList
->Append(aBuf
);
274 mLength
+= aBuf
->DataLength();
277 mEnd
.mPosition
= aBuf
->DataEnd();
282 void nsScannerString::DiscardPrefix(const nsScannerIterator
& aIter
) {
283 Position
old_start(mStart
);
285 mLength
-= Position::Distance(old_start
, mStart
);
287 mStart
.mBuffer
->IncrementUsageCount();
288 old_start
.mBuffer
->DecrementUsageCount();
290 mBufferList
->DiscardUnreferencedPrefix(old_start
.mBuffer
);
295 void nsScannerString::UngetReadable(const nsAString
& aReadable
,
296 const nsScannerIterator
& aInsertPoint
)
298 * Warning: this routine manipulates the shared buffer list in an
299 * unexpected way. The original design did not really allow for
300 * insertions, but this call promises that if called for a point after the
301 * end of all extant token strings, that no token string or the work string
302 * will be invalidated.
304 * This routine is protected because it is the responsibility of the
305 * derived class to keep those promises.
308 Position
insertPos(aInsertPoint
);
310 mBufferList
->SplitBuffer(insertPos
);
311 // splitting to the right keeps the work string and any extant token
312 // pointing to and holding a reference count on the same buffer
314 Buffer
* new_buffer
= AllocBufferFromString(aReadable
);
315 // make a new buffer with all the data to insert...
316 // ALERT: we may have empty space to re-use in the split buffer,
317 // measure the cost of this and decide if we should do the work to fill
320 Buffer
* buffer_to_split
= insertPos
.mBuffer
;
321 mBufferList
->InsertAfter(new_buffer
, buffer_to_split
);
322 mLength
+= aReadable
.Length();
324 mEnd
.mBuffer
= mBufferList
->Tail();
325 mEnd
.mPosition
= mEnd
.mBuffer
->DataEnd();
331 * nsScannerSharedSubstring
334 void nsScannerSharedSubstring::Rebind(const nsScannerIterator
& aStart
,
335 const nsScannerIterator
& aEnd
) {
336 // If the start and end positions are inside the same buffer, we must
337 // acquire ownership of the buffer. If not, we can optimize by not holding
340 Buffer
* buffer
= const_cast<Buffer
*>(aStart
.buffer());
341 bool sameBuffer
= buffer
== aEnd
.buffer();
343 nsScannerBufferList
* bufferList
;
346 bufferList
= aStart
.mOwner
->mBufferList
;
347 bufferList
->AddRef();
348 buffer
->IncrementUsageCount();
351 if (mBufferList
) ReleaseBuffer();
355 mBufferList
= bufferList
;
356 mString
.Rebind(aStart
.mPosition
, aEnd
.mPosition
);
359 mBufferList
= nullptr;
360 CopyUnicodeTo(aStart
, aEnd
, mString
);
364 void nsScannerSharedSubstring::ReleaseBuffer() {
365 NS_ASSERTION(mBufferList
, "Should only be called with non-null mBufferList");
366 mBuffer
->DecrementUsageCount();
367 mBufferList
->DiscardUnreferencedPrefix(mBuffer
);
368 mBufferList
->Release();
371 void nsScannerSharedSubstring::MakeMutable() {
372 nsString
temp(mString
); // this will force a copy of the data
373 mString
.Assign(temp
); // mString will now share the just-allocated buffer
378 mBufferList
= nullptr;
382 * utils -- based on code from nsReadableUtils.cpp
385 // private helper function
386 static inline nsAString::iterator
& copy_multifragment_string(
387 nsScannerIterator
& first
, const nsScannerIterator
& last
,
388 nsAString::iterator
& result
) {
389 typedef nsCharSourceTraits
<nsScannerIterator
> source_traits
;
390 typedef nsCharSinkTraits
<nsAString::iterator
> sink_traits
;
392 while (first
!= last
) {
393 uint32_t distance
= source_traits::readable_distance(first
, last
);
394 sink_traits::write(result
, source_traits::read(first
), distance
);
395 NS_ASSERTION(distance
> 0,
396 "|copy_multifragment_string| will never terminate");
397 source_traits::advance(first
, distance
);
403 bool CopyUnicodeTo(const nsScannerIterator
& aSrcStart
,
404 const nsScannerIterator
& aSrcEnd
, nsAString
& aDest
) {
405 mozilla::CheckedInt
<nsAString::size_type
> distance(
406 Distance(aSrcStart
, aSrcEnd
));
407 if (!distance
.isValid()) {
408 return false; // overflow detected
411 if (!aDest
.SetLength(distance
.value(), mozilla::fallible
)) {
413 return false; // out of memory
415 auto writer
= aDest
.BeginWriting();
416 nsScannerIterator
fromBegin(aSrcStart
);
418 copy_multifragment_string(fromBegin
, aSrcEnd
, writer
);
422 bool AppendUnicodeTo(const nsScannerIterator
& aSrcStart
,
423 const nsScannerIterator
& aSrcEnd
,
424 nsScannerSharedSubstring
& aDest
) {
425 // Check whether we can just create a dependent string.
426 if (aDest
.str().IsEmpty()) {
427 // We can just make |aDest| point to the buffer.
428 // This will take care of copying if the buffer spans fragments.
429 aDest
.Rebind(aSrcStart
, aSrcEnd
);
432 // The dest string is not empty, so it can't be a dependent substring.
433 return AppendUnicodeTo(aSrcStart
, aSrcEnd
, aDest
.writable());
436 bool AppendUnicodeTo(const nsScannerIterator
& aSrcStart
,
437 const nsScannerIterator
& aSrcEnd
, nsAString
& aDest
) {
438 const nsAString::size_type oldLength
= aDest
.Length();
439 mozilla::CheckedInt
<nsAString::size_type
> newLen(
440 Distance(aSrcStart
, aSrcEnd
));
442 if (!newLen
.isValid()) {
443 return false; // overflow detected
446 if (!aDest
.SetLength(newLen
.value(), mozilla::fallible
))
447 return false; // out of memory
448 auto writer
= aDest
.BeginWriting();
449 std::advance(writer
, oldLength
);
450 nsScannerIterator
fromBegin(aSrcStart
);
452 copy_multifragment_string(fromBegin
, aSrcEnd
, writer
);
456 bool FindCharInReadable(char16_t aChar
, nsScannerIterator
& aSearchStart
,
457 const nsScannerIterator
& aSearchEnd
) {
458 while (aSearchStart
!= aSearchEnd
) {
459 int32_t fragmentLength
;
460 if (SameFragment(aSearchStart
, aSearchEnd
))
461 fragmentLength
= aSearchEnd
.get() - aSearchStart
.get();
463 fragmentLength
= aSearchStart
.size_forward();
465 const char16_t
* charFoundAt
=
466 nsCharTraits
<char16_t
>::find(aSearchStart
.get(), fragmentLength
, aChar
);
468 aSearchStart
.advance(charFoundAt
- aSearchStart
.get());
472 aSearchStart
.advance(fragmentLength
);
478 bool FindInReadable(const nsAString
& aPattern
, nsScannerIterator
& aSearchStart
,
479 nsScannerIterator
& aSearchEnd
,
480 const nsStringComparator compare
) {
481 bool found_it
= false;
483 // only bother searching at all if we're given a non-empty range to search
484 if (aSearchStart
!= aSearchEnd
) {
485 nsAString::const_iterator aPatternStart
, aPatternEnd
;
486 aPattern
.BeginReading(aPatternStart
);
487 aPattern
.EndReading(aPatternEnd
);
489 // outer loop keeps searching till we find it or run out of string to search
491 // fast inner loop (that's what it's called, not what it is) looks for a
493 while (aSearchStart
!= aSearchEnd
&&
494 compare(aPatternStart
.get(), aSearchStart
.get(), 1, 1))
497 // if we broke out of the `fast' loop because we're out of string ...
498 // we're done: no match
499 if (aSearchStart
== aSearchEnd
) break;
501 // otherwise, we're at a potential match, let's see if we really hit one
502 nsAString::const_iterator
testPattern(aPatternStart
);
503 nsScannerIterator
testSearch(aSearchStart
);
505 // slow inner loop verifies the potential match (found by the `fast' loop)
506 // at the current position
508 // we already compared the first character in the outer loop,
509 // so we'll advance before the next comparison
513 // if we verified all the way to the end of the pattern, then we found
515 if (testPattern
== aPatternEnd
) {
517 aSearchEnd
= testSearch
; // return the exact found range through the
522 // if we got to end of the string we're searching before we hit the end
524 // pattern, we'll never find what we're looking for
525 if (testSearch
== aSearchEnd
) {
526 aSearchStart
= aSearchEnd
;
530 // else if we mismatched ... it's time to advance to the next search
532 // and get back into the `fast' loop
533 if (compare(testPattern
.get(), testSearch
.get(), 1, 1)) {
545 * This implementation is simple, but does too much work.
546 * It searches the entire string from left to right, and returns the last match
547 * found, if any. This implementation will be replaced when I get
548 * |reverse_iterator|s working.
550 bool RFindInReadable(const nsAString
& aPattern
, nsScannerIterator
& aSearchStart
,
551 nsScannerIterator
& aSearchEnd
,
552 const nsStringComparator aComparator
) {
553 bool found_it
= false;
555 nsScannerIterator
savedSearchEnd(aSearchEnd
);
556 nsScannerIterator
searchStart(aSearchStart
), searchEnd(aSearchEnd
);
558 while (searchStart
!= searchEnd
) {
559 if (FindInReadable(aPattern
, searchStart
, searchEnd
, aComparator
)) {
562 // this is the best match so far, so remember it
563 aSearchStart
= searchStart
;
564 aSearchEnd
= searchEnd
;
566 // ...and get ready to search some more
567 // (it's tempting to set |searchStart=searchEnd| ... but that misses
568 // overlapping patterns)
570 searchEnd
= savedSearchEnd
;
574 // if we never found it, return an empty range
575 if (!found_it
) aSearchStart
= aSearchEnd
;