1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #ifndef nsScannerString_h___
8 #define nsScannerString_h___
11 #include "nsUnicharUtils.h" // for nsCaseInsensitiveStringComparator
12 #include "mozilla/LinkedList.h"
16 * NOTE: nsScannerString (and the other classes defined in this file) are
17 * not related to nsAString or any of the other xpcom/string classes.
19 * nsScannerString is based on the nsSlidingString implementation that used
20 * to live in xpcom/string. Now that nsAString is limited to representing
21 * only single fragment strings, nsSlidingString can no longer be used.
23 * An advantage to this design is that it does not employ any virtual
26 * This file uses SCC-style indenting in deference to the nsSlidingString
27 * code from which this code is derived ;-)
30 class nsScannerIterator
;
31 class nsScannerSubstring
;
32 class nsScannerString
;
37 * This class maintains a list of heap-allocated Buffer objects. The buffers
38 * are maintained in a circular linked list. Each buffer has a usage count
39 * that is decremented by the owning nsScannerSubstring.
41 * The buffer list itself is reference counted. This allows the buffer list
42 * to be shared by multiple nsScannerSubstring objects. The reference
43 * counting is not threadsafe, which is not at all a requirement.
45 * When a nsScannerSubstring releases its reference to a buffer list, it
46 * decrements the usage count of the first buffer in the buffer list that it
47 * was referencing. It informs the buffer list that it can discard buffers
48 * starting at that prefix. The buffer list will do so if the usage count of
49 * that buffer is 0 and if it is the first buffer in the list. It will
50 * continue to prune buffers starting from the front of the buffer list until
51 * it finds a buffer that has a usage count that is non-zero.
53 class nsScannerBufferList
{
56 * Buffer objects are directly followed by a data segment. The start
57 * of the data segment is determined by increment the |this| pointer
60 class Buffer
: public mozilla::LinkedListElement
<Buffer
> {
62 void IncrementUsageCount() { ++mUsageCount
; }
63 void DecrementUsageCount() { --mUsageCount
; }
65 bool IsInUse() const { return mUsageCount
!= 0; }
67 const char16_t
* DataStart() const { return (const char16_t
*)(this + 1); }
68 char16_t
* DataStart() { return (char16_t
*)(this + 1); }
70 const char16_t
* DataEnd() const { return mDataEnd
; }
71 char16_t
* DataEnd() { return mDataEnd
; }
73 const Buffer
* Next() const { return getNext(); }
74 Buffer
* Next() { return getNext(); }
76 const Buffer
* Prev() const { return getPrevious(); }
77 Buffer
* Prev() { return getPrevious(); }
79 uint32_t DataLength() const { return mDataEnd
- DataStart(); }
80 void SetDataLength(uint32_t len
) { mDataEnd
= DataStart() + len
; }
83 friend class nsScannerBufferList
;
90 * Position objects serve as lightweight pointers into a buffer list.
91 * The mPosition member must be contained with mBuffer->DataStart()
92 * and mBuffer->DataEnd().
96 Position() : mBuffer(nullptr), mPosition(nullptr) {}
98 Position(Buffer
* buffer
, char16_t
* position
)
99 : mBuffer(buffer
), mPosition(position
) {}
101 inline explicit Position(const nsScannerIterator
& aIter
);
103 inline Position
& operator=(const nsScannerIterator
& aIter
);
105 static size_t Distance(const Position
& p1
, const Position
& p2
);
111 static Buffer
* AllocBufferFromString(const nsAString
&);
112 static Buffer
* AllocBuffer(uint32_t capacity
); // capacity = number of chars
114 explicit nsScannerBufferList(Buffer
* buf
) : mRefCnt(0) {
115 mBuffers
.insertBack(buf
);
118 void AddRef() { ++mRefCnt
; }
120 if (--mRefCnt
== 0) delete this;
123 void Append(Buffer
* buf
) { mBuffers
.insertBack(buf
); }
124 void InsertAfter(Buffer
* buf
, Buffer
* prev
) { prev
->setNext(buf
); }
125 void SplitBuffer(const Position
&);
126 void DiscardUnreferencedPrefix(Buffer
*);
128 Buffer
* Head() { return mBuffers
.getFirst(); }
129 const Buffer
* Head() const { return mBuffers
.getFirst(); }
131 Buffer
* Tail() { return mBuffers
.getLast(); }
132 const Buffer
* Tail() const { return mBuffers
.getLast(); }
135 friend class nsScannerSubstring
;
137 ~nsScannerBufferList() { ReleaseAll(); }
141 mozilla::LinkedList
<Buffer
> mBuffers
;
145 * nsScannerFragment represents a "slice" of a Buffer object.
147 struct nsScannerFragment
{
148 typedef nsScannerBufferList::Buffer Buffer
;
150 const Buffer
* mBuffer
;
151 const char16_t
* mFragmentStart
;
152 const char16_t
* mFragmentEnd
;
156 * nsScannerSubstring is the base class for nsScannerString. It provides
157 * access to iterators and methods to bind the substring to another
158 * substring or nsAString instance.
160 * This class owns the buffer list.
162 class nsScannerSubstring
{
164 typedef nsScannerBufferList::Buffer Buffer
;
165 typedef nsScannerBufferList::Position Position
;
166 typedef uint32_t size_type
;
168 nsScannerSubstring();
169 explicit nsScannerSubstring(const nsAString
& s
);
171 ~nsScannerSubstring();
173 nsScannerIterator
& BeginReading(nsScannerIterator
& iter
) const;
174 nsScannerIterator
& EndReading(nsScannerIterator
& iter
) const;
176 size_type
Length() const { return mLength
; }
178 int32_t CountChar(char16_t
) const;
180 void Rebind(const nsScannerSubstring
&, const nsScannerIterator
&,
181 const nsScannerIterator
&);
182 void Rebind(const nsAString
&);
184 const nsAString
& AsString() const;
186 bool GetNextFragment(nsScannerFragment
&) const;
187 bool GetPrevFragment(nsScannerFragment
&) const;
189 static inline Buffer
* AllocBufferFromString(const nsAString
& aStr
) {
190 return nsScannerBufferList::AllocBufferFromString(aStr
);
192 static inline Buffer
* AllocBuffer(size_type aCapacity
) {
193 return nsScannerBufferList::AllocBuffer(aCapacity
);
197 void acquire_ownership_of_buffer_list() const {
198 mBufferList
->AddRef();
199 mStart
.mBuffer
->IncrementUsageCount();
202 void release_ownership_of_buffer_list() {
204 mStart
.mBuffer
->DecrementUsageCount();
205 mBufferList
->DiscardUnreferencedPrefix(mStart
.mBuffer
);
206 mBufferList
->Release();
210 void init_range_from_buffer_list() {
211 mStart
.mBuffer
= mBufferList
->Head();
212 mStart
.mPosition
= mStart
.mBuffer
->DataStart();
214 mEnd
.mBuffer
= mBufferList
->Tail();
215 mEnd
.mPosition
= mEnd
.mBuffer
->DataEnd();
217 mLength
= Position::Distance(mStart
, mEnd
);
222 nsScannerBufferList
* mBufferList
;
225 // these fields are used to implement AsString
226 nsDependentSubstring mFlattenedRep
;
229 friend class nsScannerSharedSubstring
;
233 * nsScannerString provides methods to grow and modify a buffer list.
235 class nsScannerString
: public nsScannerSubstring
{
237 explicit nsScannerString(Buffer
*);
239 // you are giving ownership to the string, it takes and keeps your
240 // buffer, deleting it when done.
241 // Use AllocBuffer or AllocBufferFromString to create a Buffer object
242 // for use with this function.
243 void AppendBuffer(Buffer
*);
245 void DiscardPrefix(const nsScannerIterator
&);
246 // any other way you want to do this?
248 void UngetReadable(const nsAString
& aReadable
,
249 const nsScannerIterator
& aCurrentPosition
);
253 * nsScannerSharedSubstring implements copy-on-write semantics for
254 * nsScannerSubstring. When you call .writable(), it will copy the data
255 * and return a mutable string object. This class also manages releasing
256 * the reference to the scanner buffer when it is no longer needed.
259 class nsScannerSharedSubstring
{
261 nsScannerSharedSubstring() : mBuffer(nullptr), mBufferList(nullptr) {}
263 ~nsScannerSharedSubstring() {
264 if (mBufferList
) ReleaseBuffer();
267 // Acquire a copy-on-write reference to the given substring.
268 void Rebind(const nsScannerIterator
& aStart
, const nsScannerIterator
& aEnd
);
270 // Get a mutable reference to this string
271 nsAString
& writable() {
272 if (mBufferList
) MakeMutable();
277 // Get a const reference to this string
278 const nsAString
& str() const { return mString
; }
281 typedef nsScannerBufferList::Buffer Buffer
;
283 void ReleaseBuffer();
286 nsDependentSubstring mString
;
288 nsScannerBufferList
* mBufferList
;
292 * nsScannerIterator works just like nsReadingIterator<CharT> except that
293 * it knows how to iterate over a list of scanner buffers.
295 class nsScannerIterator
{
297 typedef nsScannerIterator self_type
;
298 typedef ptrdiff_t difference_type
;
299 typedef char16_t value_type
;
300 typedef const char16_t
* pointer
;
301 typedef const char16_t
& reference
;
302 typedef nsScannerSubstring::Buffer Buffer
;
305 nsScannerFragment mFragment
;
306 const char16_t
* mPosition
;
307 const nsScannerSubstring
* mOwner
;
309 friend class nsScannerSubstring
;
310 friend class nsScannerSharedSubstring
;
313 // nsScannerIterator(); // auto-generate
314 // default constructor is OK nsScannerIterator( const nsScannerIterator& ); //
315 // auto-generated copy-constructor OK nsScannerIterator& operator=( const
316 // nsScannerIterator& ); // auto-generated copy-assignment operator OK
318 inline void normalize_forward();
319 inline void normalize_backward();
321 pointer
get() const { return mPosition
; }
323 char16_t
operator*() const { return *get(); }
325 const nsScannerFragment
& fragment() const { return mFragment
; }
327 const Buffer
* buffer() const { return mFragment
.mBuffer
; }
329 self_type
& operator++() {
335 self_type
operator++(int) {
336 self_type
result(*this);
342 self_type
& operator--() {
343 normalize_backward();
348 self_type
operator--(int) {
349 self_type
result(*this);
350 normalize_backward();
355 difference_type
size_forward() const {
356 return mFragment
.mFragmentEnd
- mPosition
;
359 difference_type
size_backward() const {
360 return mPosition
- mFragment
.mFragmentStart
;
363 self_type
& advance(difference_type n
) {
365 difference_type one_hop
= std::min(n
, size_forward());
367 NS_ASSERTION(one_hop
> 0,
368 "Infinite loop: can't advance a reading iterator beyond the "
370 // perhaps I should |break| if |!one_hop|?
372 mPosition
+= one_hop
;
378 normalize_backward();
379 difference_type one_hop
= std::max(n
, -size_backward());
381 NS_ASSERTION(one_hop
< 0,
382 "Infinite loop: can't advance (backward) a reading iterator "
383 "beyond the end of a string");
384 // perhaps I should |break| if |!one_hop|?
386 mPosition
+= one_hop
;
394 inline bool SameFragment(const nsScannerIterator
& a
,
395 const nsScannerIterator
& b
) {
396 return a
.fragment().mFragmentStart
== b
.fragment().mFragmentStart
;
400 * this class is needed in order to make use of the methods in nsAlgorithm.h
403 struct nsCharSourceTraits
<nsScannerIterator
> {
404 typedef nsScannerIterator::difference_type difference_type
;
406 static uint32_t readable_distance(const nsScannerIterator
& first
,
407 const nsScannerIterator
& last
) {
408 return uint32_t(SameFragment(first
, last
) ? last
.get() - first
.get()
409 : first
.size_forward());
412 static const nsScannerIterator::value_type
* read(
413 const nsScannerIterator
& iter
) {
417 static void advance(nsScannerIterator
& s
, difference_type n
) { s
.advance(n
); }
421 * inline methods follow
424 inline void nsScannerIterator::normalize_forward() {
425 while (mPosition
== mFragment
.mFragmentEnd
&&
426 mOwner
->GetNextFragment(mFragment
))
427 mPosition
= mFragment
.mFragmentStart
;
430 inline void nsScannerIterator::normalize_backward() {
431 while (mPosition
== mFragment
.mFragmentStart
&&
432 mOwner
->GetPrevFragment(mFragment
))
433 mPosition
= mFragment
.mFragmentEnd
;
436 inline bool operator==(const nsScannerIterator
& lhs
,
437 const nsScannerIterator
& rhs
) {
438 return lhs
.get() == rhs
.get();
441 inline bool operator!=(const nsScannerIterator
& lhs
,
442 const nsScannerIterator
& rhs
) {
443 return lhs
.get() != rhs
.get();
446 inline nsScannerBufferList::Position::Position(const nsScannerIterator
& aIter
)
447 : mBuffer(const_cast<Buffer
*>(aIter
.buffer())),
448 mPosition(const_cast<char16_t
*>(aIter
.get())) {}
450 inline nsScannerBufferList::Position
& nsScannerBufferList::Position::operator=(
451 const nsScannerIterator
& aIter
) {
452 mBuffer
= const_cast<Buffer
*>(aIter
.buffer());
453 mPosition
= const_cast<char16_t
*>(aIter
.get());
458 * scanner string utils
460 * These methods mimic the API provided by nsReadableUtils in xpcom/string.
461 * Here we provide only the methods that the htmlparser module needs.
464 inline size_t Distance(const nsScannerIterator
& aStart
,
465 const nsScannerIterator
& aEnd
) {
466 typedef nsScannerBufferList::Position Position
;
467 return Position::Distance(Position(aStart
), Position(aEnd
));
470 bool CopyUnicodeTo(const nsScannerIterator
& aSrcStart
,
471 const nsScannerIterator
& aSrcEnd
, nsAString
& aDest
);
473 inline bool CopyUnicodeTo(const nsScannerSubstring
& aSrc
, nsAString
& aDest
) {
474 nsScannerIterator begin
, end
;
475 return CopyUnicodeTo(aSrc
.BeginReading(begin
), aSrc
.EndReading(end
), aDest
);
478 bool AppendUnicodeTo(const nsScannerIterator
& aSrcStart
,
479 const nsScannerIterator
& aSrcEnd
, nsAString
& aDest
);
481 inline bool AppendUnicodeTo(const nsScannerSubstring
& aSrc
, nsAString
& aDest
) {
482 nsScannerIterator begin
, end
;
483 return AppendUnicodeTo(aSrc
.BeginReading(begin
), aSrc
.EndReading(end
), aDest
);
486 bool AppendUnicodeTo(const nsScannerIterator
& aSrcStart
,
487 const nsScannerIterator
& aSrcEnd
,
488 nsScannerSharedSubstring
& aDest
);
490 bool FindCharInReadable(char16_t aChar
, nsScannerIterator
& aStart
,
491 const nsScannerIterator
& aEnd
);
493 bool FindInReadable(const nsAString
& aPattern
, nsScannerIterator
& aStart
,
494 nsScannerIterator
& aEnd
,
495 nsStringComparator
= nsTDefaultStringComparator
);
497 bool RFindInReadable(const nsAString
& aPattern
, nsScannerIterator
& aStart
,
498 nsScannerIterator
& aEnd
,
499 nsStringComparator
= nsTDefaultStringComparator
);
501 inline bool CaseInsensitiveFindInReadable(const nsAString
& aPattern
,
502 nsScannerIterator
& aStart
,
503 nsScannerIterator
& aEnd
) {
504 return FindInReadable(aPattern
, aStart
, aEnd
,
505 nsCaseInsensitiveStringComparator
);
508 #endif // !defined(nsScannerString_h___)