Bug 1700051: part 33) Move `AdjustSoftBeginAndBuildSoftText` to `SoftText`. r=smaug
[gecko.git] / parser / htmlparser / nsScannerString.h
blob071e4b47613981527e08c0baffd54eb49fe916de
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #ifndef nsScannerString_h___
8 #define nsScannerString_h___
10 #include "nsString.h"
11 #include "nsUnicharUtils.h" // for nsCaseInsensitiveStringComparator
12 #include "mozilla/LinkedList.h"
13 #include <algorithm>
15 /**
16 * NOTE: nsScannerString (and the other classes defined in this file) are
17 * not related to nsAString or any of the other xpcom/string classes.
19 * nsScannerString is based on the nsSlidingString implementation that used
20 * to live in xpcom/string. Now that nsAString is limited to representing
21 * only single fragment strings, nsSlidingString can no longer be used.
23 * An advantage to this design is that it does not employ any virtual
24 * functions.
26 * This file uses SCC-style indenting in deference to the nsSlidingString
27 * code from which this code is derived ;-)
30 class nsScannerIterator;
31 class nsScannerSubstring;
32 class nsScannerString;
34 /**
35 * nsScannerBufferList
37 * This class maintains a list of heap-allocated Buffer objects. The buffers
38 * are maintained in a circular linked list. Each buffer has a usage count
39 * that is decremented by the owning nsScannerSubstring.
41 * The buffer list itself is reference counted. This allows the buffer list
42 * to be shared by multiple nsScannerSubstring objects. The reference
43 * counting is not threadsafe, which is not at all a requirement.
45 * When a nsScannerSubstring releases its reference to a buffer list, it
46 * decrements the usage count of the first buffer in the buffer list that it
47 * was referencing. It informs the buffer list that it can discard buffers
48 * starting at that prefix. The buffer list will do so if the usage count of
49 * that buffer is 0 and if it is the first buffer in the list. It will
50 * continue to prune buffers starting from the front of the buffer list until
51 * it finds a buffer that has a usage count that is non-zero.
53 class nsScannerBufferList {
54 public:
55 /**
56 * Buffer objects are directly followed by a data segment. The start
57 * of the data segment is determined by increment the |this| pointer
58 * by 1 unit.
60 class Buffer : public mozilla::LinkedListElement<Buffer> {
61 public:
62 void IncrementUsageCount() { ++mUsageCount; }
63 void DecrementUsageCount() { --mUsageCount; }
65 bool IsInUse() const { return mUsageCount != 0; }
67 const char16_t* DataStart() const { return (const char16_t*)(this + 1); }
68 char16_t* DataStart() { return (char16_t*)(this + 1); }
70 const char16_t* DataEnd() const { return mDataEnd; }
71 char16_t* DataEnd() { return mDataEnd; }
73 const Buffer* Next() const { return getNext(); }
74 Buffer* Next() { return getNext(); }
76 const Buffer* Prev() const { return getPrevious(); }
77 Buffer* Prev() { return getPrevious(); }
79 uint32_t DataLength() const { return mDataEnd - DataStart(); }
80 void SetDataLength(uint32_t len) { mDataEnd = DataStart() + len; }
82 private:
83 friend class nsScannerBufferList;
85 int32_t mUsageCount;
86 char16_t* mDataEnd;
89 /**
90 * Position objects serve as lightweight pointers into a buffer list.
91 * The mPosition member must be contained with mBuffer->DataStart()
92 * and mBuffer->DataEnd().
94 class Position {
95 public:
96 Position() : mBuffer(nullptr), mPosition(nullptr) {}
98 Position(Buffer* buffer, char16_t* position)
99 : mBuffer(buffer), mPosition(position) {}
101 inline explicit Position(const nsScannerIterator& aIter);
103 inline Position& operator=(const nsScannerIterator& aIter);
105 static size_t Distance(const Position& p1, const Position& p2);
107 Buffer* mBuffer;
108 char16_t* mPosition;
111 static Buffer* AllocBufferFromString(const nsAString&);
112 static Buffer* AllocBuffer(uint32_t capacity); // capacity = number of chars
114 explicit nsScannerBufferList(Buffer* buf) : mRefCnt(0) {
115 mBuffers.insertBack(buf);
118 void AddRef() { ++mRefCnt; }
119 void Release() {
120 if (--mRefCnt == 0) delete this;
123 void Append(Buffer* buf) { mBuffers.insertBack(buf); }
124 void InsertAfter(Buffer* buf, Buffer* prev) { prev->setNext(buf); }
125 void SplitBuffer(const Position&);
126 void DiscardUnreferencedPrefix(Buffer*);
128 Buffer* Head() { return mBuffers.getFirst(); }
129 const Buffer* Head() const { return mBuffers.getFirst(); }
131 Buffer* Tail() { return mBuffers.getLast(); }
132 const Buffer* Tail() const { return mBuffers.getLast(); }
134 private:
135 friend class nsScannerSubstring;
137 ~nsScannerBufferList() { ReleaseAll(); }
138 void ReleaseAll();
140 int32_t mRefCnt;
141 mozilla::LinkedList<Buffer> mBuffers;
145 * nsScannerFragment represents a "slice" of a Buffer object.
147 struct nsScannerFragment {
148 typedef nsScannerBufferList::Buffer Buffer;
150 const Buffer* mBuffer;
151 const char16_t* mFragmentStart;
152 const char16_t* mFragmentEnd;
156 * nsScannerSubstring is the base class for nsScannerString. It provides
157 * access to iterators and methods to bind the substring to another
158 * substring or nsAString instance.
160 * This class owns the buffer list.
162 class nsScannerSubstring {
163 public:
164 typedef nsScannerBufferList::Buffer Buffer;
165 typedef nsScannerBufferList::Position Position;
166 typedef uint32_t size_type;
168 nsScannerSubstring();
169 explicit nsScannerSubstring(const nsAString& s);
171 ~nsScannerSubstring();
173 nsScannerIterator& BeginReading(nsScannerIterator& iter) const;
174 nsScannerIterator& EndReading(nsScannerIterator& iter) const;
176 size_type Length() const { return mLength; }
178 int32_t CountChar(char16_t) const;
180 void Rebind(const nsScannerSubstring&, const nsScannerIterator&,
181 const nsScannerIterator&);
182 void Rebind(const nsAString&);
184 const nsAString& AsString() const;
186 bool GetNextFragment(nsScannerFragment&) const;
187 bool GetPrevFragment(nsScannerFragment&) const;
189 static inline Buffer* AllocBufferFromString(const nsAString& aStr) {
190 return nsScannerBufferList::AllocBufferFromString(aStr);
192 static inline Buffer* AllocBuffer(size_type aCapacity) {
193 return nsScannerBufferList::AllocBuffer(aCapacity);
196 protected:
197 void acquire_ownership_of_buffer_list() const {
198 mBufferList->AddRef();
199 mStart.mBuffer->IncrementUsageCount();
202 void release_ownership_of_buffer_list() {
203 if (mBufferList) {
204 mStart.mBuffer->DecrementUsageCount();
205 mBufferList->DiscardUnreferencedPrefix(mStart.mBuffer);
206 mBufferList->Release();
210 void init_range_from_buffer_list() {
211 mStart.mBuffer = mBufferList->Head();
212 mStart.mPosition = mStart.mBuffer->DataStart();
214 mEnd.mBuffer = mBufferList->Tail();
215 mEnd.mPosition = mEnd.mBuffer->DataEnd();
217 mLength = Position::Distance(mStart, mEnd);
220 Position mStart;
221 Position mEnd;
222 nsScannerBufferList* mBufferList;
223 size_type mLength;
225 // these fields are used to implement AsString
226 nsDependentSubstring mFlattenedRep;
227 bool mIsDirty;
229 friend class nsScannerSharedSubstring;
233 * nsScannerString provides methods to grow and modify a buffer list.
235 class nsScannerString : public nsScannerSubstring {
236 public:
237 explicit nsScannerString(Buffer*);
239 // you are giving ownership to the string, it takes and keeps your
240 // buffer, deleting it when done.
241 // Use AllocBuffer or AllocBufferFromString to create a Buffer object
242 // for use with this function.
243 void AppendBuffer(Buffer*);
245 void DiscardPrefix(const nsScannerIterator&);
246 // any other way you want to do this?
248 void UngetReadable(const nsAString& aReadable,
249 const nsScannerIterator& aCurrentPosition);
253 * nsScannerSharedSubstring implements copy-on-write semantics for
254 * nsScannerSubstring. When you call .writable(), it will copy the data
255 * and return a mutable string object. This class also manages releasing
256 * the reference to the scanner buffer when it is no longer needed.
259 class nsScannerSharedSubstring {
260 public:
261 nsScannerSharedSubstring() : mBuffer(nullptr), mBufferList(nullptr) {}
263 ~nsScannerSharedSubstring() {
264 if (mBufferList) ReleaseBuffer();
267 // Acquire a copy-on-write reference to the given substring.
268 void Rebind(const nsScannerIterator& aStart, const nsScannerIterator& aEnd);
270 // Get a mutable reference to this string
271 nsAString& writable() {
272 if (mBufferList) MakeMutable();
274 return mString;
277 // Get a const reference to this string
278 const nsAString& str() const { return mString; }
280 private:
281 typedef nsScannerBufferList::Buffer Buffer;
283 void ReleaseBuffer();
284 void MakeMutable();
286 nsDependentSubstring mString;
287 Buffer* mBuffer;
288 nsScannerBufferList* mBufferList;
292 * nsScannerIterator works just like nsReadingIterator<CharT> except that
293 * it knows how to iterate over a list of scanner buffers.
295 class nsScannerIterator {
296 public:
297 typedef nsScannerIterator self_type;
298 typedef ptrdiff_t difference_type;
299 typedef char16_t value_type;
300 typedef const char16_t* pointer;
301 typedef const char16_t& reference;
302 typedef nsScannerSubstring::Buffer Buffer;
304 protected:
305 nsScannerFragment mFragment;
306 const char16_t* mPosition;
307 const nsScannerSubstring* mOwner;
309 friend class nsScannerSubstring;
310 friend class nsScannerSharedSubstring;
312 public:
313 // nsScannerIterator(); // auto-generate
314 // default constructor is OK nsScannerIterator( const nsScannerIterator& ); //
315 // auto-generated copy-constructor OK nsScannerIterator& operator=( const
316 // nsScannerIterator& ); // auto-generated copy-assignment operator OK
318 inline void normalize_forward();
319 inline void normalize_backward();
321 pointer get() const { return mPosition; }
323 char16_t operator*() const { return *get(); }
325 const nsScannerFragment& fragment() const { return mFragment; }
327 const Buffer* buffer() const { return mFragment.mBuffer; }
329 self_type& operator++() {
330 ++mPosition;
331 normalize_forward();
332 return *this;
335 self_type operator++(int) {
336 self_type result(*this);
337 ++mPosition;
338 normalize_forward();
339 return result;
342 self_type& operator--() {
343 normalize_backward();
344 --mPosition;
345 return *this;
348 self_type operator--(int) {
349 self_type result(*this);
350 normalize_backward();
351 --mPosition;
352 return result;
355 difference_type size_forward() const {
356 return mFragment.mFragmentEnd - mPosition;
359 difference_type size_backward() const {
360 return mPosition - mFragment.mFragmentStart;
363 self_type& advance(difference_type n) {
364 while (n > 0) {
365 difference_type one_hop = std::min(n, size_forward());
367 NS_ASSERTION(one_hop > 0,
368 "Infinite loop: can't advance a reading iterator beyond the "
369 "end of a string");
370 // perhaps I should |break| if |!one_hop|?
372 mPosition += one_hop;
373 normalize_forward();
374 n -= one_hop;
377 while (n < 0) {
378 normalize_backward();
379 difference_type one_hop = std::max(n, -size_backward());
381 NS_ASSERTION(one_hop < 0,
382 "Infinite loop: can't advance (backward) a reading iterator "
383 "beyond the end of a string");
384 // perhaps I should |break| if |!one_hop|?
386 mPosition += one_hop;
387 n -= one_hop;
390 return *this;
394 inline bool SameFragment(const nsScannerIterator& a,
395 const nsScannerIterator& b) {
396 return a.fragment().mFragmentStart == b.fragment().mFragmentStart;
400 * this class is needed in order to make use of the methods in nsAlgorithm.h
402 template <>
403 struct nsCharSourceTraits<nsScannerIterator> {
404 typedef nsScannerIterator::difference_type difference_type;
406 static uint32_t readable_distance(const nsScannerIterator& first,
407 const nsScannerIterator& last) {
408 return uint32_t(SameFragment(first, last) ? last.get() - first.get()
409 : first.size_forward());
412 static const nsScannerIterator::value_type* read(
413 const nsScannerIterator& iter) {
414 return iter.get();
417 static void advance(nsScannerIterator& s, difference_type n) { s.advance(n); }
421 * inline methods follow
424 inline void nsScannerIterator::normalize_forward() {
425 while (mPosition == mFragment.mFragmentEnd &&
426 mOwner->GetNextFragment(mFragment))
427 mPosition = mFragment.mFragmentStart;
430 inline void nsScannerIterator::normalize_backward() {
431 while (mPosition == mFragment.mFragmentStart &&
432 mOwner->GetPrevFragment(mFragment))
433 mPosition = mFragment.mFragmentEnd;
436 inline bool operator==(const nsScannerIterator& lhs,
437 const nsScannerIterator& rhs) {
438 return lhs.get() == rhs.get();
441 inline bool operator!=(const nsScannerIterator& lhs,
442 const nsScannerIterator& rhs) {
443 return lhs.get() != rhs.get();
446 inline nsScannerBufferList::Position::Position(const nsScannerIterator& aIter)
447 : mBuffer(const_cast<Buffer*>(aIter.buffer())),
448 mPosition(const_cast<char16_t*>(aIter.get())) {}
450 inline nsScannerBufferList::Position& nsScannerBufferList::Position::operator=(
451 const nsScannerIterator& aIter) {
452 mBuffer = const_cast<Buffer*>(aIter.buffer());
453 mPosition = const_cast<char16_t*>(aIter.get());
454 return *this;
458 * scanner string utils
460 * These methods mimic the API provided by nsReadableUtils in xpcom/string.
461 * Here we provide only the methods that the htmlparser module needs.
464 inline size_t Distance(const nsScannerIterator& aStart,
465 const nsScannerIterator& aEnd) {
466 typedef nsScannerBufferList::Position Position;
467 return Position::Distance(Position(aStart), Position(aEnd));
470 bool CopyUnicodeTo(const nsScannerIterator& aSrcStart,
471 const nsScannerIterator& aSrcEnd, nsAString& aDest);
473 inline bool CopyUnicodeTo(const nsScannerSubstring& aSrc, nsAString& aDest) {
474 nsScannerIterator begin, end;
475 return CopyUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
478 bool AppendUnicodeTo(const nsScannerIterator& aSrcStart,
479 const nsScannerIterator& aSrcEnd, nsAString& aDest);
481 inline bool AppendUnicodeTo(const nsScannerSubstring& aSrc, nsAString& aDest) {
482 nsScannerIterator begin, end;
483 return AppendUnicodeTo(aSrc.BeginReading(begin), aSrc.EndReading(end), aDest);
486 bool AppendUnicodeTo(const nsScannerIterator& aSrcStart,
487 const nsScannerIterator& aSrcEnd,
488 nsScannerSharedSubstring& aDest);
490 bool FindCharInReadable(char16_t aChar, nsScannerIterator& aStart,
491 const nsScannerIterator& aEnd);
493 bool FindInReadable(const nsAString& aPattern, nsScannerIterator& aStart,
494 nsScannerIterator& aEnd,
495 nsStringComparator = nsTDefaultStringComparator);
497 bool RFindInReadable(const nsAString& aPattern, nsScannerIterator& aStart,
498 nsScannerIterator& aEnd,
499 nsStringComparator = nsTDefaultStringComparator);
501 inline bool CaseInsensitiveFindInReadable(const nsAString& aPattern,
502 nsScannerIterator& aStart,
503 nsScannerIterator& aEnd) {
504 return FindInReadable(aPattern, aStart, aEnd,
505 nsCaseInsensitiveStringComparator);
508 #endif // !defined(nsScannerString_h___)