Bug 1845715 - Check for failure when getting RegExp match result template r=iain
[gecko.git] / parser / html / nsHtml5Highlighter.h
blob4966b216085c79a373f10238faaec8215fa08f4b
1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 #ifndef nsHtml5Highlighter_h
5 #define nsHtml5Highlighter_h
7 #include "nsCOMPtr.h"
8 #include "nsHtml5TreeOperation.h"
9 #include "nsHtml5UTF16Buffer.h"
10 #include "nsHtml5TreeOperation.h"
11 #include "nsAHtml5TreeOpSink.h"
13 #define NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH 512
15 /**
16 * A state machine for generating HTML for display in View Source based on
17 * the transitions the tokenizer makes on the source being viewed.
19 class nsHtml5Highlighter {
20 public:
21 /**
22 * The constructor.
24 * @param aOpSink the sink for the tree ops generated by this highlighter
26 explicit nsHtml5Highlighter(nsAHtml5TreeOpSink* aOpSink);
28 /**
29 * The destructor.
31 ~nsHtml5Highlighter();
33 /**
34 * Set the op sink (for speculation).
36 void SetOpSink(nsAHtml5TreeOpSink* aOpSink);
38 /**
39 * Reset state to after generated head but before processing any of the input
40 * stream.
42 void Rewind();
44 /**
45 * Starts the generated document.
47 void Start(const nsAutoString& aTitle);
49 /**
50 * Updates the charset source via the op queue.
52 void UpdateCharsetSource(nsCharsetSource aCharsetSource);
54 /**
55 * Report a tokenizer state transition.
57 * @param aState the state being transitioned to
58 * @param aReconsume whether this is a reconsuming transition
59 * @param aPos the tokenizer's current position into the buffer
61 int32_t Transition(int32_t aState, bool aReconsume, int32_t aPos);
63 /**
64 * Report end of file.
66 * Returns `true` normally and `false` on OOM.
68 [[nodiscard]] bool End();
70 /**
71 * Set the current buffer being tokenized
73 void SetBuffer(nsHtml5UTF16Buffer* aBuffer);
75 /**
76 * Let go of the buffer being tokenized but first, flush text from it.
78 * @param aPos the first UTF-16 code unit not to flush
80 void DropBuffer(int32_t aPos);
82 /**
83 * Query whether there are some many ops in the queue
84 * that they should be flushed now.
86 * @return true if FlushOps() should be called now
88 bool ShouldFlushOps();
90 /**
91 * Flush the tree ops into the sink.
93 * @return Ok(true) if there were ops to flush, Ok(false)
94 * if there were no ops to flush and Err() on OOM.
96 mozilla::Result<bool, nsresult> FlushOps();
98 /**
99 * Linkify the current attribute value if the attribute name is one of
100 * known URL attributes. (When executing tree ops, javascript: URLs will
101 * not be linkified, though.)
103 * @param aName the name of the attribute
104 * @param aValue the value of the attribute
106 void MaybeLinkifyAttributeValue(nsHtml5AttributeName* aName,
107 nsHtml5String aValue);
110 * Inform the highlighter that the tokenizer successfully completed a
111 * named character reference.
113 void CompletedNamedCharacterReference();
116 * Adds an error annotation to the node that's currently on top of
117 * mStack.
119 * @param aMsgId the id of the message in the property file
121 void AddErrorToCurrentNode(const char* aMsgId);
124 * Adds an error annotation to the node that corresponds to the most
125 * recently opened markup declaration/tag span, character reference or
126 * run of text.
128 * @param aMsgId the id of the message in the property file
130 void AddErrorToCurrentRun(const char* aMsgId);
133 * Adds an error annotation to the node that corresponds to the most
134 * recently opened markup declaration/tag span, character reference or
135 * run of text with one atom to use when formatting the message.
137 * @param aMsgId the id of the message in the property file
138 * @param aName the atom
140 void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName);
143 * Adds an error annotation to the node that corresponds to the most
144 * recently opened markup declaration/tag span, character reference or
145 * run of text with two atoms to use when formatting the message.
147 * @param aMsgId the id of the message in the property file
148 * @param aName the first atom
149 * @param aOther the second atom
151 void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName, nsAtom* aOther);
154 * Adds an error annotation to the node that corresponds to the most
155 * recent potentially character reference-starting ampersand.
157 * @param aMsgId the id of the message in the property file
159 void AddErrorToCurrentAmpersand(const char* aMsgId);
162 * Adds an error annotation to the node that corresponds to the most
163 * recent potentially self-closing slash.
165 * @param aMsgId the id of the message in the property file
167 void AddErrorToCurrentSlash(const char* aMsgId);
170 * Enqueues a tree op for adding base to the urls with the view-source:
172 * @param aValue the base URL to add
174 void AddBase(nsHtml5String aValue);
177 * Starts a wrapper around a run of characters.
179 void StartCharacters();
181 private:
183 * Starts a span with no class.
185 void StartSpan();
188 * Starts a <span> and sets the class attribute on it.
190 * @param aClass the class to set (MUST be a static string that does not
191 * need to be released!)
193 void StartSpan(const char16_t* aClass);
196 * End the current <span> or <a> in the highlighter output.
198 void EndSpanOrA();
201 * Ends a wrapper around a run of characters.
203 void EndCharactersAndStartMarkupRun();
206 * Starts an <a>.
208 void StartA();
211 * Flushes characters up to but not including the current one.
213 void FlushChars();
216 * Flushes characters up to and including the current one.
218 void FlushCurrent();
221 * Finishes highlighting a tag in the input data by closing the open
222 * <span> and <a> elements in the highlighter output and then starts
223 * another <span> for potentially highlighting characters potentially
224 * appearing next.
226 void FinishTag();
229 * Adds a class attribute to the current node.
231 * @param aClass the class to set (MUST be a static string that does not
232 * need to be released!)
234 void AddClass(const char16_t* aClass);
237 * Allocates a handle for an element.
239 * See the documentation for nsHtml5TreeBuilder::AllocateContentHandle()
240 * in nsHtml5TreeBuilderHSupplement.h.
242 * @return the handle
244 nsIContent** AllocateContentHandle();
247 * Enqueues an element creation tree operation.
249 * @param aName the name of the element
250 * @param aAttributes the attribute holder (ownership will be taken) or
251 * nullptr for no attributes
252 * @param aIntendedParent the intended parent node for the created element
253 * @param aCreator the content creator function
254 * @return the handle for the element that will be created
256 nsIContent** CreateElement(nsAtom* aName, nsHtml5HtmlAttributes* aAttributes,
257 nsIContent** aIntendedParent,
258 mozilla::dom::HTMLContentCreatorFunction aCreator);
261 * Gets the handle for the current node. May be called only after the
262 * root element has been set.
264 * @return the handle for the current node
266 nsIContent** CurrentNode();
269 * Create an element and push it (its handle) on the stack.
271 * @param aName the name of the element
272 * @param aAttributes the attribute holder (ownership will be taken) or
273 * nullptr for no attributes
274 * @param aCreator the content creator function
276 void Push(nsAtom* aName, nsHtml5HtmlAttributes* aAttributes,
277 mozilla::dom::HTMLContentCreatorFunction aCreator);
280 * Pops the current node off the stack.
282 void Pop();
285 * Appends text content to the current node.
287 * @param aBuffer the buffer to copy from
288 * @param aStart the index of the first code unit to copy
289 * @param aLength the number of code units to copy
291 void AppendCharacters(const char16_t* aBuffer, int32_t aStart,
292 int32_t aLength);
295 * Enqueues a tree op for adding an href attribute with the view-source:
296 * URL scheme to the current node.
298 * @param aValue the (potentially relative) URL to link to
300 void AddViewSourceHref(nsHtml5String aValue);
303 * The state we are transitioning away from.
305 int32_t mState;
308 * The index of the first UTF-16 code unit in mBuffer that hasn't been
309 * flushed yet.
311 int32_t mCStart;
314 * The position of the code unit in mBuffer that caused the current
315 * transition.
317 int32_t mPos;
320 * The current line number.
322 int32_t mLineNumber;
325 * The number of inline elements open inside the <pre> excluding the
326 * span potentially wrapping a run of characters.
328 int32_t mInlinesOpen;
331 * Whether there's a span wrapping a run of characters (excluding CDATA
332 * section) open.
334 bool mInCharacters;
337 * The current buffer being tokenized.
339 nsHtml5UTF16Buffer* mBuffer;
342 * The outgoing tree op queue.
344 nsTArray<nsHtml5TreeOperation> mOpQueue;
347 * The tree op stage for the tree op executor or a speculation when looking
348 * for meta charset.
350 * The op sink is owned by the nsHtml5TreeOpExecutor, which outlives this
351 * object, because this object is owned by the nsHtml5Tokenizer instance that
352 * is owned by the nsHtml5StreamParser, which keeps the executor alive via
353 * nsHtml5Streamparser::mExecutorFlusher.
355 nsAHtml5TreeOpSink* mOpSink;
358 * The most recently opened markup declaration/tag or run of characters.
360 nsIContent** mCurrentRun;
363 * The most recent ampersand in a place where character references were
364 * allowed.
366 nsIContent** mAmpersand;
369 * The most recent slash that might become a self-closing slash.
371 nsIContent** mSlash;
374 * Memory for element handles.
376 mozilla::UniquePtr<nsIContent*[]> mHandles;
379 * Number of handles used in mHandles
381 int32_t mHandlesUsed;
384 * A holder for old contents of mHandles
386 nsTArray<mozilla::UniquePtr<nsIContent*[]>> mOldHandles;
389 * The element stack.
391 nsTArray<nsIContent**> mStack;
394 * The string "comment"
396 static char16_t sComment[];
399 * The string "cdata"
401 static char16_t sCdata[];
404 * The string "start-tag"
406 static char16_t sStartTag[];
409 * The string "attribute-name"
411 static char16_t sAttributeName[];
414 * The string "attribute-value"
416 static char16_t sAttributeValue[];
419 * The string "end-tag"
421 static char16_t sEndTag[];
424 * The string "doctype"
426 static char16_t sDoctype[];
429 * The string "entity"
431 static char16_t sEntity[];
434 * The string "pi"
436 static char16_t sPi[];
439 * Whether base is already visited once.
441 bool mSeenBase;
444 #endif // nsHtml5Highlighter_h