1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set sw=2 ts=2 et tw=79: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #include "nsContentUtils.h"
8 #include "nsHtml5Tokenizer.h"
9 #include "nsHtml5TreeBuilder.h"
10 #include "nsHtml5Parser.h"
11 #include "nsHtml5AtomTable.h"
12 #include "nsHtml5DependentUTF16Buffer.h"
14 NS_INTERFACE_TABLE_HEAD(nsHtml5Parser
)
15 NS_INTERFACE_TABLE2(nsHtml5Parser
, nsIParser
, nsISupportsWeakReference
)
16 NS_INTERFACE_TABLE_TO_MAP_SEGUE_CYCLE_COLLECTION(nsHtml5Parser
)
19 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsHtml5Parser
)
20 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsHtml5Parser
)
22 NS_IMPL_CYCLE_COLLECTION_CLASS(nsHtml5Parser
)
24 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsHtml5Parser
)
25 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_NSCOMPTR_AMBIGUOUS(mExecutor
,
27 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_NSCOMPTR_AMBIGUOUS(mStreamParser
,
29 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
31 NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsHtml5Parser
)
32 NS_IMPL_CYCLE_COLLECTION_UNLINK_NSCOMPTR(mExecutor
)
33 tmp
->DropStreamParser();
34 NS_IMPL_CYCLE_COLLECTION_UNLINK_END
36 nsHtml5Parser::nsHtml5Parser()
37 : mFirstBuffer(new nsHtml5OwningUTF16Buffer((void*)nullptr))
38 , mLastBuffer(mFirstBuffer
)
39 , mExecutor(new nsHtml5TreeOpExecutor())
40 , mTreeBuilder(new nsHtml5TreeBuilder(mExecutor
, nullptr))
41 , mTokenizer(new nsHtml5Tokenizer(mTreeBuilder
, false))
42 , mRootContextLineNumber(1)
44 mAtomTable
.Init(); // we aren't checking for OOM anyway...
45 mTokenizer
->setInterner(&mAtomTable
);
46 // There's a zeroing operator new for everything else
49 nsHtml5Parser::~nsHtml5Parser()
52 if (mDocWriteSpeculativeTokenizer
) {
53 mDocWriteSpeculativeTokenizer
->end();
58 nsHtml5Parser::SetContentSink(nsIContentSink
* aSink
)
60 NS_ASSERTION(aSink
== static_cast<nsIContentSink
*> (mExecutor
),
61 "Attempt to set a foreign sink.");
64 NS_IMETHODIMP_(nsIContentSink
*)
65 nsHtml5Parser::GetContentSink()
67 return static_cast<nsIContentSink
*> (mExecutor
);
71 nsHtml5Parser::GetCommand(nsCString
& aCommand
)
73 aCommand
.Assign("view");
77 nsHtml5Parser::SetCommand(const char* aCommand
)
79 NS_ASSERTION(!strcmp(aCommand
, "view") ||
80 !strcmp(aCommand
, "view-source") ||
81 !strcmp(aCommand
, "external-resource") ||
82 !strcmp(aCommand
, kLoadAsData
),
83 "Unsupported parser command");
87 nsHtml5Parser::SetCommand(eParserCommands aParserCommand
)
89 NS_ASSERTION(aParserCommand
== eViewNormal
,
90 "Parser command was not eViewNormal.");
94 nsHtml5Parser::SetDocumentCharset(const nsACString
& aCharset
,
95 int32_t aCharsetSource
)
97 NS_PRECONDITION(!mExecutor
->HasStarted(),
98 "Document charset set too late.");
99 NS_PRECONDITION(mStreamParser
, "Setting charset on a script-only parser.");
100 nsAutoCString trimmed
;
101 trimmed
.Assign(aCharset
);
102 trimmed
.Trim(" \t\r\n\f");
103 mStreamParser
->SetDocumentCharset(trimmed
, aCharsetSource
);
104 mExecutor
->SetDocumentCharsetAndSource(trimmed
,
109 nsHtml5Parser::GetChannel(nsIChannel
** aChannel
)
112 return mStreamParser
->GetChannel(aChannel
);
114 return NS_ERROR_NOT_AVAILABLE
;
119 nsHtml5Parser::GetDTD(nsIDTD
** aDTD
)
126 nsHtml5Parser::GetStreamListener()
128 return mStreamParser
;
132 nsHtml5Parser::ContinueInterruptedParsing()
134 NS_NOTREACHED("Don't call. For interface compat only.");
135 return NS_ERROR_NOT_IMPLEMENTED
;
139 nsHtml5Parser::BlockParser()
145 nsHtml5Parser::UnblockParser()
148 mExecutor
->ContinueInterruptedParsingAsync();
152 nsHtml5Parser::ContinueInterruptedParsingAsync()
154 mExecutor
->ContinueInterruptedParsingAsync();
158 nsHtml5Parser::IsParserEnabled()
164 nsHtml5Parser::IsComplete()
166 return mExecutor
->IsComplete();
170 nsHtml5Parser::Parse(nsIURI
* aURL
,
171 nsIRequestObserver
* aObserver
,
172 void* aKey
, // legacy; ignored
173 nsDTDMode aMode
) // legacy; ignored
176 * Do NOT cause WillBuildModel to be called synchronously from here!
177 * The document won't be ready for it until OnStartRequest!
179 NS_PRECONDITION(!mExecutor
->HasStarted(),
180 "Tried to start parse without initializing the parser.");
181 NS_PRECONDITION(mStreamParser
,
182 "Can't call this Parse() variant on script-created parser");
183 mStreamParser
->SetObserver(aObserver
);
184 mStreamParser
->SetViewSourceTitle(aURL
); // In case we're viewing source
185 mExecutor
->SetStreamParser(mStreamParser
);
186 mExecutor
->SetParser(this);
191 nsHtml5Parser::Parse(const nsAString
& aSourceBuffer
,
193 const nsACString
& aContentType
,
195 nsDTDMode aMode
) // ignored
198 if (NS_FAILED(rv
= mExecutor
->IsBroken())) {
201 if (aSourceBuffer
.Length() > INT32_MAX
) {
202 return mExecutor
->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY
);
205 // Maintain a reference to ourselves so we don't go away
206 // till we're completely done. The old parser grips itself in this method.
207 nsCOMPtr
<nsIParser
> kungFuDeathGrip(this);
209 // Gripping the other objects just in case, since the other old grip
210 // required grips to these, too.
211 nsRefPtr
<nsHtml5StreamParser
> streamKungFuDeathGrip(mStreamParser
);
212 nsRefPtr
<nsHtml5TreeOpExecutor
> treeOpKungFuDeathGrip(mExecutor
);
214 if (!mExecutor
->HasStarted()) {
215 NS_ASSERTION(!mStreamParser
,
216 "Had stream parser but document.write started life cycle.");
217 // This is the first document.write() on a document.open()ed document
218 mExecutor
->SetParser(this);
219 mTreeBuilder
->setScriptingEnabled(mExecutor
->IsScriptEnabled());
222 if (!aContentType
.EqualsLiteral("text/html")) {
223 mTreeBuilder
->StartPlainText();
224 mTokenizer
->StartPlainText();
227 * If you move the following line, be very careful not to cause
228 * WillBuildModel to be called before the document has had its
229 * script global object set.
231 mExecutor
->WillBuildModel(eDTDMode_unknown
);
234 // Return early if the parser has processed EOF
235 if (mExecutor
->IsComplete()) {
239 if (aLastCall
&& aSourceBuffer
.IsEmpty() && !aKey
) {
241 NS_ASSERTION(!mStreamParser
,
242 "Had stream parser but got document.close().");
243 if (mDocumentClosed
) {
247 mDocumentClosed
= true;
248 if (!mBlocked
&& !mInDocumentWrite
) {
254 // If we got this far, we are dealing with a document.write or
255 // document.writeln call--not document.close().
257 NS_ASSERTION(IsInsertionPointDefined(),
258 "Doc.write reached parser with undefined insertion point.");
260 NS_ASSERTION(!(mStreamParser
&& !aKey
),
261 "Got a null key in a non-script-created parser");
263 // XXX is this optimization bogus?
264 if (aSourceBuffer
.IsEmpty()) {
268 // This guard is here to prevent document.close from tokenizing synchronously
269 // while a document.write (that wrote the script that called document.close!)
270 // is still on the call stack.
271 mozilla::AutoRestore
<bool> guard(mInDocumentWrite
);
272 mInDocumentWrite
= true;
274 // The script is identified by aKey. If there's nothing in the buffer
275 // chain for that key, we'll insert at the head of the queue.
276 // When the script leaves something in the queue, a zero-length
277 // key-holder "buffer" is inserted in the queue. If the same script
278 // leaves something in the chain again, it will be inserted immediately
279 // before the old key holder belonging to the same script.
281 // We don't do the actual data insertion yet in the hope that the data gets
282 // tokenized and there no data or less data to copy to the heap after
283 // tokenization. Also, this way, we avoid inserting one empty data buffer
284 // per document.write, which matters for performance when the parser isn't
285 // blocked and a badly-authored script calls document.write() once per
286 // input character. (As seen in a benchmark!)
288 // The insertion into the input stream happens conceptually before anything
289 // gets tokenized. To make sure multi-level document.write works right,
290 // it's necessary to establish the location of our parser key up front
291 // in case this is the first write with this key.
293 // In a document.open() case, the first write level has a null key, so that
294 // case is handled separately, because normal buffers containing data
297 // These don't need to be owning references, because they always point to
298 // the buffer queue and buffers can't be removed from the buffer queue
299 // before document.write() returns. The buffer queue clean-up happens the
300 // next time ParseUntilBlocked() is called.
301 // However, they are made owning just in case the reasoning above is flawed
302 // and a flaw would lead to worse problems with plain pointers. If this
303 // turns out to be a perf problem, it's worthwhile to consider making
304 // prevSearchbuf a plain pointer again.
305 nsRefPtr
<nsHtml5OwningUTF16Buffer
> prevSearchBuf
;
306 nsRefPtr
<nsHtml5OwningUTF16Buffer
> firstLevelMarker
;
309 if (mFirstBuffer
== mLastBuffer
) {
310 nsHtml5OwningUTF16Buffer
* keyHolder
= new nsHtml5OwningUTF16Buffer(aKey
);
311 keyHolder
->next
= mLastBuffer
;
312 mFirstBuffer
= keyHolder
;
313 } else if (mFirstBuffer
->key
!= aKey
) {
314 prevSearchBuf
= mFirstBuffer
;
316 if (prevSearchBuf
->next
== mLastBuffer
) {
318 nsHtml5OwningUTF16Buffer
* keyHolder
=
319 new nsHtml5OwningUTF16Buffer(aKey
);
320 keyHolder
->next
= mFirstBuffer
;
321 mFirstBuffer
= keyHolder
;
322 prevSearchBuf
= nullptr;
325 if (prevSearchBuf
->next
->key
== aKey
) {
326 // found a key holder
329 prevSearchBuf
= prevSearchBuf
->next
;
331 } // else mFirstBuffer is the keyholder
333 // prevSearchBuf is the previous buffer before the keyholder or null if
336 // We have a first-level write in the document.open() case. We insert before
337 // mLastBuffer, effectively, by making mLastBuffer be a new sentinel object
338 // and redesignating the previous mLastBuffer as our firstLevelMarker. We
339 // need to put a marker there, because otherwise additional document.writes
340 // from nested event loops would insert in the wrong place. Sigh.
341 mLastBuffer
->next
= new nsHtml5OwningUTF16Buffer((void*)nullptr);
342 firstLevelMarker
= mLastBuffer
;
343 mLastBuffer
= mLastBuffer
->next
;
346 nsHtml5DependentUTF16Buffer
stackBuffer(aSourceBuffer
);
348 while (!mBlocked
&& stackBuffer
.hasMore()) {
349 stackBuffer
.adjust(mLastWasCR
);
351 if (stackBuffer
.hasMore()) {
352 int32_t lineNumberSave
;
353 bool inRootContext
= (!mStreamParser
&& !aKey
);
355 mTokenizer
->setLineNumber(mRootContextLineNumber
);
357 // we aren't the root context, so save the line number on the
358 // *stack* so that we can restore it.
359 lineNumberSave
= mTokenizer
->getLineNumber();
362 mLastWasCR
= mTokenizer
->tokenizeBuffer(&stackBuffer
);
365 mRootContextLineNumber
= mTokenizer
->getLineNumber();
367 mTokenizer
->setLineNumber(lineNumberSave
);
370 if (mTreeBuilder
->HasScript()) {
371 mTreeBuilder
->Flush(); // Move ops to the executor
372 mExecutor
->FlushDocumentWrite(); // run the ops
373 // Flushing tree ops can cause all sorts of things.
374 // Return early if the parser got terminated.
375 if (mExecutor
->IsComplete()) {
379 // Ignore suspension requests
383 nsRefPtr
<nsHtml5OwningUTF16Buffer
> heapBuffer
;
384 if (stackBuffer
.hasMore()) {
385 // The buffer wasn't tokenized to completion. Create a copy of the tail
387 heapBuffer
= stackBuffer
.FalliblyCopyAsOwningBuffer();
389 // Allocation failed. The parser is now broken.
390 return mExecutor
->MarkAsBroken(NS_ERROR_OUT_OF_MEMORY
);
395 // We have something to insert before the keyholder holding in the non-null
396 // aKey case and we have something to swap into firstLevelMarker in the
399 NS_ASSERTION(mFirstBuffer
!= mLastBuffer
,
400 "Where's the keyholder?");
401 // the key holder is still somewhere further down the list from
402 // prevSearchBuf (which may be null)
403 if (mFirstBuffer
->key
== aKey
) {
404 NS_ASSERTION(!prevSearchBuf
,
405 "Non-null prevSearchBuf when mFirstBuffer is the key holder?");
406 heapBuffer
->next
= mFirstBuffer
;
407 mFirstBuffer
= heapBuffer
;
409 if (!prevSearchBuf
) {
410 prevSearchBuf
= mFirstBuffer
;
412 // We created a key holder earlier, so we will find it without walking
413 // past the end of the list.
414 while (prevSearchBuf
->next
->key
!= aKey
) {
415 prevSearchBuf
= prevSearchBuf
->next
;
417 heapBuffer
->next
= prevSearchBuf
->next
;
418 prevSearchBuf
->next
= heapBuffer
;
421 NS_ASSERTION(firstLevelMarker
, "How come we don't have a marker.");
422 firstLevelMarker
->Swap(heapBuffer
);
426 if (!mBlocked
) { // buffer was tokenized to completion
427 NS_ASSERTION(!stackBuffer
.hasMore(),
428 "Buffer wasn't tokenized to completion?");
429 // Scripting semantics require a forced tree builder flush here
430 mTreeBuilder
->Flush(); // Move ops to the executor
431 mExecutor
->FlushDocumentWrite(); // run the ops
432 } else if (stackBuffer
.hasMore()) {
433 // The buffer wasn't tokenized to completion. Tokenize the untokenized
434 // content in order to preload stuff. This content will be retokenized
435 // later for normal parsing.
436 if (!mDocWriteSpeculatorActive
) {
437 mDocWriteSpeculatorActive
= true;
438 if (!mDocWriteSpeculativeTreeBuilder
) {
439 // Lazily initialize if uninitialized
440 mDocWriteSpeculativeTreeBuilder
=
441 new nsHtml5TreeBuilder(nullptr, mExecutor
->GetStage());
442 mDocWriteSpeculativeTreeBuilder
->setScriptingEnabled(
443 mTreeBuilder
->isScriptingEnabled());
444 mDocWriteSpeculativeTokenizer
=
445 new nsHtml5Tokenizer(mDocWriteSpeculativeTreeBuilder
, false);
446 mDocWriteSpeculativeTokenizer
->setInterner(&mAtomTable
);
447 mDocWriteSpeculativeTokenizer
->start();
449 mDocWriteSpeculativeTokenizer
->resetToDataState();
450 mDocWriteSpeculativeTreeBuilder
->loadState(mTreeBuilder
, &mAtomTable
);
451 mDocWriteSpeculativeLastWasCR
= false;
454 // Note that with multilevel document.write if we didn't just activate the
455 // speculator, it's possible that the speculator is now in the wrong state.
456 // That's OK for the sake of simplicity. The worst that can happen is
457 // that the speculative loads aren't exactly right. The content will be
458 // reparsed anyway for non-preload purposes.
460 // The buffer position for subsequent non-speculative parsing now lives
461 // in heapBuffer, so it's ok to let the buffer position of stackBuffer
462 // to be overwritten and not restored below.
463 while (stackBuffer
.hasMore()) {
464 stackBuffer
.adjust(mDocWriteSpeculativeLastWasCR
);
465 if (stackBuffer
.hasMore()) {
466 mDocWriteSpeculativeLastWasCR
=
467 mDocWriteSpeculativeTokenizer
->tokenizeBuffer(&stackBuffer
);
471 mDocWriteSpeculativeTreeBuilder
->Flush();
472 mDocWriteSpeculativeTreeBuilder
->DropHandles();
473 mExecutor
->FlushSpeculativeLoads();
480 nsHtml5Parser::Terminate()
482 // We should only call DidBuildModel once, so don't do anything if this is
483 // the second time that Terminate has been called.
484 if (mExecutor
->IsComplete()) {
487 // XXX - [ until we figure out a way to break parser-sink circularity ]
488 // Hack - Hold a reference until we are completely done...
489 nsCOMPtr
<nsIParser
> kungFuDeathGrip(this);
490 nsRefPtr
<nsHtml5StreamParser
> streamKungFuDeathGrip(mStreamParser
);
491 nsRefPtr
<nsHtml5TreeOpExecutor
> treeOpKungFuDeathGrip(mExecutor
);
493 mStreamParser
->Terminate();
495 return mExecutor
->DidBuildModel(true);
499 nsHtml5Parser::ParseFragment(const nsAString
& aSourceBuffer
,
500 nsTArray
<nsString
>& aTagStack
)
502 return NS_ERROR_NOT_IMPLEMENTED
;
506 nsHtml5Parser::BuildModel()
508 NS_NOTREACHED("Don't call this!");
509 return NS_ERROR_NOT_IMPLEMENTED
;
513 nsHtml5Parser::CancelParsingEvents()
515 NS_NOTREACHED("Don't call this!");
516 return NS_ERROR_NOT_IMPLEMENTED
;
520 nsHtml5Parser::Reset()
522 NS_NOTREACHED("Don't call this!");
526 nsHtml5Parser::CanInterrupt()
528 // nsContentSink needs this to let nsContentSink::DidProcessATokenImpl
534 nsHtml5Parser::IsInsertionPointDefined()
536 return !mExecutor
->IsFlushing() &&
537 (!mStreamParser
|| mParserInsertedScriptsBeingEvaluated
);
541 nsHtml5Parser::BeginEvaluatingParserInsertedScript()
543 ++mParserInsertedScriptsBeingEvaluated
;
547 nsHtml5Parser::EndEvaluatingParserInsertedScript()
549 --mParserInsertedScriptsBeingEvaluated
;
553 nsHtml5Parser::MarkAsNotScriptCreated(const char* aCommand
)
555 NS_PRECONDITION(!mStreamParser
, "Must not call this twice.");
556 eParserMode mode
= NORMAL
;
557 if (!nsCRT::strcmp(aCommand
, "view-source")) {
558 mode
= VIEW_SOURCE_HTML
;
559 } else if (!nsCRT::strcmp(aCommand
, "view-source-xml")) {
560 mode
= VIEW_SOURCE_XML
;
561 } else if (!nsCRT::strcmp(aCommand
, "view-source-plain")) {
562 mode
= VIEW_SOURCE_PLAIN
;
563 } else if (!nsCRT::strcmp(aCommand
, "plain-text")) {
565 } else if (!nsCRT::strcmp(aCommand
, kLoadAsData
)) {
570 NS_ASSERTION(!nsCRT::strcmp(aCommand
, "view") ||
571 !nsCRT::strcmp(aCommand
, "external-resource"),
572 "Unsupported parser command!");
575 mStreamParser
= new nsHtml5StreamParser(mExecutor
, this, mode
);
579 nsHtml5Parser::IsScriptCreated()
581 return !mStreamParser
;
586 // not from interface
588 nsHtml5Parser::ParseUntilBlocked()
590 if (mBlocked
|| mExecutor
->IsComplete() || NS_FAILED(mExecutor
->IsBroken())) {
593 NS_ASSERTION(mExecutor
->HasStarted(), "Bad life cycle.");
594 NS_ASSERTION(!mInDocumentWrite
,
595 "ParseUntilBlocked entered while in doc.write!");
597 mDocWriteSpeculatorActive
= false;
600 if (!mFirstBuffer
->hasMore()) {
601 if (mFirstBuffer
== mLastBuffer
) {
602 if (mExecutor
->IsComplete()) {
603 // something like cache manisfests stopped the parse in mid-flight
606 if (mDocumentClosed
) {
607 NS_ASSERTION(!mStreamParser
,
608 "This should only happen with script-created parser.");
610 mTreeBuilder
->StreamEnded();
611 mTreeBuilder
->Flush();
612 mExecutor
->FlushDocumentWrite();
616 // never release the last buffer.
617 NS_ASSERTION(!mLastBuffer
->getStart() && !mLastBuffer
->getEnd(),
618 "Sentinel buffer had its indeces changed.");
620 if (mReturnToStreamParserPermitted
&&
621 !mExecutor
->IsScriptExecuting()) {
622 mTreeBuilder
->Flush();
623 mReturnToStreamParserPermitted
= false;
624 mStreamParser
->ContinueAfterScripts(mTokenizer
,
629 // Script-created parser
630 mTreeBuilder
->Flush();
631 // No need to flush the executor, because the executor is already
633 NS_ASSERTION(mExecutor
->IsInFlushLoop(),
634 "How did we come here without being in the flush loop?");
636 return; // no more data for now but expecting more
638 mFirstBuffer
= mFirstBuffer
->next
;
642 if (mBlocked
|| mExecutor
->IsComplete()) {
646 // now we have a non-empty buffer
647 mFirstBuffer
->adjust(mLastWasCR
);
649 if (mFirstBuffer
->hasMore()) {
650 bool inRootContext
= (!mStreamParser
&& !mFirstBuffer
->key
);
652 mTokenizer
->setLineNumber(mRootContextLineNumber
);
654 mLastWasCR
= mTokenizer
->tokenizeBuffer(mFirstBuffer
);
656 mRootContextLineNumber
= mTokenizer
->getLineNumber();
658 if (mTreeBuilder
->HasScript()) {
659 mTreeBuilder
->Flush();
660 mExecutor
->FlushDocumentWrite();
671 nsHtml5Parser::Initialize(nsIDocument
* aDoc
,
673 nsISupports
* aContainer
,
674 nsIChannel
* aChannel
)
676 return mExecutor
->Init(aDoc
, aURI
, aContainer
, aChannel
);
680 nsHtml5Parser::StartTokenizer(bool aScriptingEnabled
) {
681 mTreeBuilder
->SetPreventScriptExecution(!aScriptingEnabled
);
682 mTreeBuilder
->setScriptingEnabled(aScriptingEnabled
);
687 nsHtml5Parser::InitializeDocWriteParserState(nsAHtml5TreeBuilderState
* aState
,
690 mTokenizer
->resetToDataState();
691 mTokenizer
->setLineNumber(aLine
);
692 mTreeBuilder
->loadState(aState
, &mAtomTable
);
694 mReturnToStreamParserPermitted
= true;
698 nsHtml5Parser::ContinueAfterFailedCharsetSwitch()
700 NS_PRECONDITION(mStreamParser
,
701 "Tried to continue after failed charset switch without a stream parser");
702 mStreamParser
->ContinueAfterFailedCharsetSwitch();