1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #ifndef nsScannerString_h___
8 #define nsScannerString_h___
11 #include "nsUnicharUtils.h" // for nsCaseInsensitiveStringComparator
12 #include "mozilla/LinkedList.h"
16 * NOTE: nsScannerString (and the other classes defined in this file) are
17 * not related to nsAString or any of the other xpcom/string classes.
19 * nsScannerString is based on the nsSlidingString implementation that used
20 * to live in xpcom/string. Now that nsAString is limited to representing
21 * only single fragment strings, nsSlidingString can no longer be used.
23 * An advantage to this design is that it does not employ any virtual
26 * This file uses SCC-style indenting in deference to the nsSlidingString
27 * code from which this code is derived ;-)
30 class nsScannerIterator
;
31 class nsScannerSubstring
;
32 class nsScannerString
;
37 * This class maintains a list of heap-allocated Buffer objects. The buffers
38 * are maintained in a circular linked list. Each buffer has a usage count
39 * that is decremented by the owning nsScannerSubstring.
41 * The buffer list itself is reference counted. This allows the buffer list
42 * to be shared by multiple nsScannerSubstring objects. The reference
43 * counting is not threadsafe, which is not at all a requirement.
45 * When a nsScannerSubstring releases its reference to a buffer list, it
46 * decrements the usage count of the first buffer in the buffer list that it
47 * was referencing. It informs the buffer list that it can discard buffers
48 * starting at that prefix. The buffer list will do so if the usage count of
49 * that buffer is 0 and if it is the first buffer in the list. It will
50 * continue to prune buffers starting from the front of the buffer list until
51 * it finds a buffer that has a usage count that is non-zero.
53 class nsScannerBufferList
{
56 * Buffer objects are directly followed by a data segment. The start
57 * of the data segment is determined by increment the |this| pointer
60 class Buffer
: public mozilla::LinkedListElement
<Buffer
> {
62 void IncrementUsageCount() { ++mUsageCount
; }
63 void DecrementUsageCount() { --mUsageCount
; }
65 bool IsInUse() const { return mUsageCount
!= 0; }
67 const char16_t
* DataStart() const { return (const char16_t
*)(this + 1); }
68 char16_t
* DataStart() { return (char16_t
*)(this + 1); }
70 const char16_t
* DataEnd() const { return mDataEnd
; }
71 char16_t
* DataEnd() { return mDataEnd
; }
73 const Buffer
* Next() const { return getNext(); }
74 Buffer
* Next() { return getNext(); }
76 const Buffer
* Prev() const { return getPrevious(); }
77 Buffer
* Prev() { return getPrevious(); }
79 uint32_t DataLength() const { return mDataEnd
- DataStart(); }
80 void SetDataLength(uint32_t len
) { mDataEnd
= DataStart() + len
; }
83 friend class nsScannerBufferList
;
90 * Position objects serve as lightweight pointers into a buffer list.
91 * The mPosition member must be contained with mBuffer->DataStart()
92 * and mBuffer->DataEnd().
96 Position() : mBuffer(nullptr), mPosition(nullptr) {}
98 Position(Buffer
* buffer
, char16_t
* position
)
99 : mBuffer(buffer
), mPosition(position
) {}
101 inline explicit Position(const nsScannerIterator
& aIter
);
103 inline Position
& operator=(const nsScannerIterator
& aIter
);
105 static size_t Distance(const Position
& p1
, const Position
& p2
);
111 static Buffer
* AllocBufferFromString(const nsAString
&);
112 static Buffer
* AllocBuffer(uint32_t capacity
); // capacity = number of chars
114 explicit nsScannerBufferList(Buffer
* buf
) : mRefCnt(0) {
115 mBuffers
.insertBack(buf
);
118 void AddRef() { ++mRefCnt
; }
120 if (--mRefCnt
== 0) delete this;
123 void Append(Buffer
* buf
) { mBuffers
.insertBack(buf
); }
124 void InsertAfter(Buffer
* buf
, Buffer
* prev
) { prev
->setNext(buf
); }
125 void SplitBuffer(const Position
&);
126 void DiscardUnreferencedPrefix(Buffer
*);
128 Buffer
* Head() { return mBuffers
.getFirst(); }
129 const Buffer
* Head() const { return mBuffers
.getFirst(); }
131 Buffer
* Tail() { return mBuffers
.getLast(); }
132 const Buffer
* Tail() const { return mBuffers
.getLast(); }
135 friend class nsScannerSubstring
;
137 ~nsScannerBufferList() { ReleaseAll(); }
141 mozilla::LinkedList
<Buffer
> mBuffers
;
145 * nsScannerFragment represents a "slice" of a Buffer object.
147 struct nsScannerFragment
{
148 typedef nsScannerBufferList::Buffer Buffer
;
150 const Buffer
* mBuffer
;
151 const char16_t
* mFragmentStart
;
152 const char16_t
* mFragmentEnd
;
156 * nsScannerSubstring is the base class for nsScannerString. It provides
157 * access to iterators and methods to bind the substring to another
158 * substring or nsAString instance.
160 * This class owns the buffer list.
162 class nsScannerSubstring
{
164 typedef nsScannerBufferList::Buffer Buffer
;
165 typedef nsScannerBufferList::Position Position
;
166 typedef uint32_t size_type
;
168 nsScannerSubstring();
169 explicit nsScannerSubstring(const nsAString
& s
);
171 ~nsScannerSubstring();
173 nsScannerIterator
& BeginReading(nsScannerIterator
& iter
) const;
174 nsScannerIterator
& EndReading(nsScannerIterator
& iter
) const;
176 size_type
Length() const { return mLength
; }
178 void Rebind(const nsScannerSubstring
&, const nsScannerIterator
&,
179 const nsScannerIterator
&);
180 void Rebind(const nsAString
&);
182 bool GetNextFragment(nsScannerFragment
&) const;
183 bool GetPrevFragment(nsScannerFragment
&) const;
185 static inline Buffer
* AllocBufferFromString(const nsAString
& aStr
) {
186 return nsScannerBufferList::AllocBufferFromString(aStr
);
188 static inline Buffer
* AllocBuffer(size_type aCapacity
) {
189 return nsScannerBufferList::AllocBuffer(aCapacity
);
193 void acquire_ownership_of_buffer_list() const {
194 mBufferList
->AddRef();
195 mStart
.mBuffer
->IncrementUsageCount();
198 void release_ownership_of_buffer_list() {
200 mStart
.mBuffer
->DecrementUsageCount();
201 mBufferList
->DiscardUnreferencedPrefix(mStart
.mBuffer
);
202 mBufferList
->Release();
206 void init_range_from_buffer_list() {
207 mStart
.mBuffer
= mBufferList
->Head();
208 mStart
.mPosition
= mStart
.mBuffer
->DataStart();
210 mEnd
.mBuffer
= mBufferList
->Tail();
211 mEnd
.mPosition
= mEnd
.mBuffer
->DataEnd();
213 mLength
= Position::Distance(mStart
, mEnd
);
218 nsScannerBufferList
* mBufferList
;
221 friend class nsScannerSharedSubstring
;
225 * nsScannerString provides methods to grow and modify a buffer list.
227 class nsScannerString
: public nsScannerSubstring
{
229 explicit nsScannerString(Buffer
*);
231 // you are giving ownership to the string, it takes and keeps your
232 // buffer, deleting it when done.
233 // Use AllocBuffer or AllocBufferFromString to create a Buffer object
234 // for use with this function.
235 void AppendBuffer(Buffer
*);
237 void DiscardPrefix(const nsScannerIterator
&);
238 // any other way you want to do this?
240 void UngetReadable(const nsAString
& aReadable
,
241 const nsScannerIterator
& aCurrentPosition
);
245 * nsScannerSharedSubstring implements copy-on-write semantics for
246 * nsScannerSubstring. This class also manages releasing
247 * the reference to the scanner buffer when it is no longer needed.
250 class nsScannerSharedSubstring
{
252 nsScannerSharedSubstring() : mBuffer(nullptr), mBufferList(nullptr) {}
254 ~nsScannerSharedSubstring() {
255 if (mBufferList
) ReleaseBuffer();
258 // Acquire a copy-on-write reference to the given substring.
259 void Rebind(const nsScannerIterator
& aStart
, const nsScannerIterator
& aEnd
);
261 // Get a const reference to this string
262 const nsAString
& str() const { return mString
; }
265 typedef nsScannerBufferList::Buffer Buffer
;
267 void ReleaseBuffer();
269 nsDependentSubstring mString
;
271 nsScannerBufferList
* mBufferList
;
275 * nsScannerIterator works just like nsReadingIterator<CharT> except that
276 * it knows how to iterate over a list of scanner buffers.
278 class nsScannerIterator
{
280 typedef nsScannerIterator self_type
;
281 typedef ptrdiff_t difference_type
;
282 typedef char16_t value_type
;
283 typedef const char16_t
* pointer
;
284 typedef const char16_t
& reference
;
285 typedef nsScannerSubstring::Buffer Buffer
;
288 nsScannerFragment mFragment
;
289 const char16_t
* mPosition
;
290 const nsScannerSubstring
* mOwner
;
292 friend class nsScannerSubstring
;
293 friend class nsScannerSharedSubstring
;
296 // nsScannerIterator(); // auto-generate
297 // default constructor is OK nsScannerIterator( const nsScannerIterator& ); //
298 // auto-generated copy-constructor OK nsScannerIterator& operator=( const
299 // nsScannerIterator& ); // auto-generated copy-assignment operator OK
301 inline void normalize_forward();
302 inline void normalize_backward();
304 pointer
get() const { return mPosition
; }
306 char16_t
operator*() const { return *get(); }
308 const nsScannerFragment
& fragment() const { return mFragment
; }
310 const Buffer
* buffer() const { return mFragment
.mBuffer
; }
312 self_type
& operator++() {
318 self_type
operator++(int) {
319 self_type
result(*this);
325 self_type
& operator--() {
326 normalize_backward();
331 self_type
operator--(int) {
332 self_type
result(*this);
333 normalize_backward();
338 difference_type
size_forward() const {
339 return mFragment
.mFragmentEnd
- mPosition
;
342 difference_type
size_backward() const {
343 return mPosition
- mFragment
.mFragmentStart
;
346 self_type
& advance(difference_type n
) {
348 difference_type one_hop
= std::min(n
, size_forward());
350 NS_ASSERTION(one_hop
> 0,
351 "Infinite loop: can't advance a reading iterator beyond the "
353 // perhaps I should |break| if |!one_hop|?
355 mPosition
+= one_hop
;
361 normalize_backward();
362 difference_type one_hop
= std::max(n
, -size_backward());
364 NS_ASSERTION(one_hop
< 0,
365 "Infinite loop: can't advance (backward) a reading iterator "
366 "beyond the end of a string");
367 // perhaps I should |break| if |!one_hop|?
369 mPosition
+= one_hop
;
377 inline bool SameFragment(const nsScannerIterator
& a
,
378 const nsScannerIterator
& b
) {
379 return a
.fragment().mFragmentStart
== b
.fragment().mFragmentStart
;
383 * this class is needed in order to make use of the methods in nsAlgorithm.h
386 struct nsCharSourceTraits
<nsScannerIterator
> {
387 typedef nsScannerIterator::difference_type difference_type
;
389 static uint32_t readable_distance(const nsScannerIterator
& first
,
390 const nsScannerIterator
& last
) {
391 return uint32_t(SameFragment(first
, last
) ? last
.get() - first
.get()
392 : first
.size_forward());
395 static const nsScannerIterator::value_type
* read(
396 const nsScannerIterator
& iter
) {
400 static void advance(nsScannerIterator
& s
, difference_type n
) { s
.advance(n
); }
404 * inline methods follow
407 inline void nsScannerIterator::normalize_forward() {
408 while (mPosition
== mFragment
.mFragmentEnd
&&
409 mOwner
->GetNextFragment(mFragment
))
410 mPosition
= mFragment
.mFragmentStart
;
413 inline void nsScannerIterator::normalize_backward() {
414 while (mPosition
== mFragment
.mFragmentStart
&&
415 mOwner
->GetPrevFragment(mFragment
))
416 mPosition
= mFragment
.mFragmentEnd
;
419 inline bool operator==(const nsScannerIterator
& lhs
,
420 const nsScannerIterator
& rhs
) {
421 return lhs
.get() == rhs
.get();
424 inline bool operator!=(const nsScannerIterator
& lhs
,
425 const nsScannerIterator
& rhs
) {
426 return lhs
.get() != rhs
.get();
429 inline nsScannerBufferList::Position::Position(const nsScannerIterator
& aIter
)
430 : mBuffer(const_cast<Buffer
*>(aIter
.buffer())),
431 mPosition(const_cast<char16_t
*>(aIter
.get())) {}
433 inline nsScannerBufferList::Position
& nsScannerBufferList::Position::operator=(
434 const nsScannerIterator
& aIter
) {
435 mBuffer
= const_cast<Buffer
*>(aIter
.buffer());
436 mPosition
= const_cast<char16_t
*>(aIter
.get());
441 * scanner string utils
443 * These methods mimic the API provided by nsReadableUtils in xpcom/string.
444 * Here we provide only the methods that the htmlparser module needs.
447 inline size_t Distance(const nsScannerIterator
& aStart
,
448 const nsScannerIterator
& aEnd
) {
449 typedef nsScannerBufferList::Position Position
;
450 return Position::Distance(Position(aStart
), Position(aEnd
));
453 bool CopyUnicodeTo(const nsScannerIterator
& aSrcStart
,
454 const nsScannerIterator
& aSrcEnd
, nsAString
& aDest
);
456 bool AppendUnicodeTo(const nsScannerIterator
& aSrcStart
,
457 const nsScannerIterator
& aSrcEnd
, nsAString
& aDest
);
459 #endif // !defined(nsScannerString_h___)