1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 #ifndef nsHtml5Highlighter_h
5 #define nsHtml5Highlighter_h
8 #include "nsHtml5TreeOperation.h"
9 #include "nsHtml5UTF16Buffer.h"
10 #include "nsHtml5TreeOperation.h"
11 #include "nsAHtml5TreeOpSink.h"
13 #define NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH 512
16 * A state machine for generating HTML for display in View Source based on
17 * the transitions the tokenizer makes on the source being viewed.
19 class nsHtml5Highlighter
{
24 * @param aOpSink the sink for the tree ops generated by this highlighter
26 explicit nsHtml5Highlighter(nsAHtml5TreeOpSink
* aOpSink
);
31 ~nsHtml5Highlighter();
34 * Set the op sink (for speculation).
36 void SetOpSink(nsAHtml5TreeOpSink
* aOpSink
);
39 * Reset state to after generated head but before processing any of the input
45 * Starts the generated document.
47 void Start(const nsAutoString
& aTitle
);
50 * Updates the charset source via the op queue.
52 void UpdateCharsetSource(nsCharsetSource aCharsetSource
);
55 * Report a tokenizer state transition.
57 * @param aState the state being transitioned to
58 * @param aReconsume whether this is a reconsuming transition
59 * @param aPos the tokenizer's current position into the buffer
61 int32_t Transition(int32_t aState
, bool aReconsume
, int32_t aPos
);
66 * Returns `true` normally and `false` on OOM.
68 [[nodiscard
]] bool End();
71 * Set the current buffer being tokenized
73 void SetBuffer(nsHtml5UTF16Buffer
* aBuffer
);
76 * Let go of the buffer being tokenized but first, flush text from it.
78 * @param aPos the first UTF-16 code unit not to flush
80 void DropBuffer(int32_t aPos
);
83 * Query whether there are some many ops in the queue
84 * that they should be flushed now.
86 * @return true if FlushOps() should be called now
88 bool ShouldFlushOps();
91 * Flush the tree ops into the sink.
93 * @return Ok(true) if there were ops to flush, Ok(false)
94 * if there were no ops to flush and Err() on OOM.
96 mozilla::Result
<bool, nsresult
> FlushOps();
99 * Linkify the current attribute value if the attribute name is one of
100 * known URL attributes. (When executing tree ops, javascript: URLs will
101 * not be linkified, though.)
103 * @param aName the name of the attribute
104 * @param aValue the value of the attribute
106 void MaybeLinkifyAttributeValue(nsHtml5AttributeName
* aName
,
107 nsHtml5String aValue
);
110 * Inform the highlighter that the tokenizer successfully completed a
111 * named character reference.
113 void CompletedNamedCharacterReference();
116 * Adds an error annotation to the node that's currently on top of
119 * @param aMsgId the id of the message in the property file
121 void AddErrorToCurrentNode(const char* aMsgId
);
124 * Adds an error annotation to the node that corresponds to the most
125 * recently opened markup declaration/tag span, character reference or
128 * @param aMsgId the id of the message in the property file
130 void AddErrorToCurrentRun(const char* aMsgId
);
133 * Adds an error annotation to the node that corresponds to the most
134 * recently opened markup declaration/tag span, character reference or
135 * run of text with one atom to use when formatting the message.
137 * @param aMsgId the id of the message in the property file
138 * @param aName the atom
140 void AddErrorToCurrentRun(const char* aMsgId
, nsAtom
* aName
);
143 * Adds an error annotation to the node that corresponds to the most
144 * recently opened markup declaration/tag span, character reference or
145 * run of text with two atoms to use when formatting the message.
147 * @param aMsgId the id of the message in the property file
148 * @param aName the first atom
149 * @param aOther the second atom
151 void AddErrorToCurrentRun(const char* aMsgId
, nsAtom
* aName
, nsAtom
* aOther
);
154 * Adds an error annotation to the node that corresponds to the most
155 * recent potentially character reference-starting ampersand.
157 * @param aMsgId the id of the message in the property file
159 void AddErrorToCurrentAmpersand(const char* aMsgId
);
162 * Adds an error annotation to the node that corresponds to the most
163 * recent potentially self-closing slash.
165 * @param aMsgId the id of the message in the property file
167 void AddErrorToCurrentSlash(const char* aMsgId
);
170 * Enqueues a tree op for adding base to the urls with the view-source:
172 * @param aValue the base URL to add
174 void AddBase(nsHtml5String aValue
);
177 * Starts a wrapper around a run of characters.
179 void StartCharacters();
183 * Starts a span with no class.
188 * Starts a <span> and sets the class attribute on it.
190 * @param aClass the class to set (MUST be a static string that does not
191 * need to be released!)
193 void StartSpan(const char16_t
* aClass
);
196 * End the current <span> or <a> in the highlighter output.
201 * Ends a wrapper around a run of characters.
203 void EndCharactersAndStartMarkupRun();
211 * Flushes characters up to but not including the current one.
216 * Flushes characters up to and including the current one.
221 * Finishes highlighting a tag in the input data by closing the open
222 * <span> and <a> elements in the highlighter output and then starts
223 * another <span> for potentially highlighting characters potentially
229 * Adds a class attribute to the current node.
231 * @param aClass the class to set (MUST be a static string that does not
232 * need to be released!)
234 void AddClass(const char16_t
* aClass
);
237 * Allocates a handle for an element.
239 * See the documentation for nsHtml5TreeBuilder::AllocateContentHandle()
240 * in nsHtml5TreeBuilderHSupplement.h.
244 nsIContent
** AllocateContentHandle();
247 * Enqueues an element creation tree operation.
249 * @param aName the name of the element
250 * @param aAttributes the attribute holder (ownership will be taken) or
251 * nullptr for no attributes
252 * @param aIntendedParent the intended parent node for the created element
253 * @param aCreator the content creator function
254 * @return the handle for the element that will be created
256 nsIContent
** CreateElement(nsAtom
* aName
, nsHtml5HtmlAttributes
* aAttributes
,
257 nsIContent
** aIntendedParent
,
258 mozilla::dom::HTMLContentCreatorFunction aCreator
);
261 * Gets the handle for the current node. May be called only after the
262 * root element has been set.
264 * @return the handle for the current node
266 nsIContent
** CurrentNode();
269 * Create an element and push it (its handle) on the stack.
271 * @param aName the name of the element
272 * @param aAttributes the attribute holder (ownership will be taken) or
273 * nullptr for no attributes
274 * @param aCreator the content creator function
276 void Push(nsAtom
* aName
, nsHtml5HtmlAttributes
* aAttributes
,
277 mozilla::dom::HTMLContentCreatorFunction aCreator
);
280 * Pops the current node off the stack.
285 * Appends text content to the current node.
287 * @param aBuffer the buffer to copy from
288 * @param aStart the index of the first code unit to copy
289 * @param aLength the number of code units to copy
291 void AppendCharacters(const char16_t
* aBuffer
, int32_t aStart
,
295 * Enqueues a tree op for adding an href attribute with the view-source:
296 * URL scheme to the current node.
298 * @param aValue the (potentially relative) URL to link to
300 void AddViewSourceHref(nsHtml5String aValue
);
303 * The state we are transitioning away from.
308 * The index of the first UTF-16 code unit in mBuffer that hasn't been
314 * The position of the code unit in mBuffer that caused the current
320 * The current line number.
325 * The number of inline elements open inside the <pre> excluding the
326 * span potentially wrapping a run of characters.
328 int32_t mInlinesOpen
;
331 * Whether there's a span wrapping a run of characters (excluding CDATA
337 * The current buffer being tokenized.
339 nsHtml5UTF16Buffer
* mBuffer
;
342 * The outgoing tree op queue.
344 nsTArray
<nsHtml5TreeOperation
> mOpQueue
;
347 * The tree op stage for the tree op executor or a speculation when looking
350 * The op sink is owned by the nsHtml5TreeOpExecutor, which outlives this
351 * object, because this object is owned by the nsHtml5Tokenizer instance that
352 * is owned by the nsHtml5StreamParser, which keeps the executor alive via
353 * nsHtml5Streamparser::mExecutorFlusher.
355 nsAHtml5TreeOpSink
* mOpSink
;
358 * The most recently opened markup declaration/tag or run of characters.
360 nsIContent
** mCurrentRun
;
363 * The most recent ampersand in a place where character references were
366 nsIContent
** mAmpersand
;
369 * The most recent slash that might become a self-closing slash.
374 * Memory for element handles.
376 mozilla::UniquePtr
<nsIContent
*[]> mHandles
;
379 * Number of handles used in mHandles
381 int32_t mHandlesUsed
;
384 * A holder for old contents of mHandles
386 nsTArray
<mozilla::UniquePtr
<nsIContent
*[]>> mOldHandles
;
391 nsTArray
<nsIContent
**> mStack
;
394 * The string "comment"
396 static char16_t sComment
[];
401 static char16_t sCdata
[];
404 * The string "start-tag"
406 static char16_t sStartTag
[];
409 * The string "attribute-name"
411 static char16_t sAttributeName
[];
414 * The string "attribute-value"
416 static char16_t sAttributeValue
[];
419 * The string "end-tag"
421 static char16_t sEndTag
[];
424 * The string "doctype"
426 static char16_t sDoctype
[];
429 * The string "entity"
431 static char16_t sEntity
[];
436 static char16_t sPi
[];
439 * Whether base is already visited once.
444 #endif // nsHtml5Highlighter_h