From 1a70bffd5a8a7fbead590133d7d5ae101d917402 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Tue, 4 May 2010 13:41:09 -0400 Subject: [PATCH] Emit errors when body is extracted. Signed-off-by: Edward Z. Yang --- NEWS | 3 +++ library/HTMLPurifier/Language/messages/en.php | 1 + library/HTMLPurifier/Lexer.php | 10 +++++++++- tests/HTMLPurifier/Lexer/DirectLex_ErrorsTest.php | 5 +++++ 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 0e8eda6f..b78cf391 100644 --- a/NEWS +++ b/NEWS @@ -9,6 +9,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier . Internal change ========================== +4.1.1, unknown release date +- Emit an error for CollectErrors if a body is extracted + 4.1.0, released 2010-04-26 ! Support proprietary height attribute on table element ! Support YouTube slideshows that contain /cp/ in their URL. diff --git a/library/HTMLPurifier/Language/messages/en.php b/library/HTMLPurifier/Language/messages/en.php index aab2e52e..8d7b5736 100644 --- a/library/HTMLPurifier/Language/messages/en.php +++ b/library/HTMLPurifier/Language/messages/en.php @@ -23,6 +23,7 @@ $messages = array( 'Lexer: Missing gt' => 'Missing greater-than sign (>), previous less-than sign (<) should be escaped', 'Lexer: Missing attribute key' => 'Attribute declaration has no key', 'Lexer: Missing end quote' => 'Attribute declaration has no end quote', +'Lexer: Extracted body' => 'Removed document metadata tags', 'Strategy_RemoveForeignElements: Tag transform' => '<$1> element transformed into $CurrentToken.Serialized', 'Strategy_RemoveForeignElements: Missing required attribute' => '$CurrentToken.Compact element missing required attribute $1', diff --git a/library/HTMLPurifier/Lexer.php b/library/HTMLPurifier/Lexer.php index 8cce008d..b05e1154 100644 --- a/library/HTMLPurifier/Lexer.php +++ b/library/HTMLPurifier/Lexer.php @@ -265,7 +265,15 @@ class HTMLPurifier_Lexer // extract body from document if applicable if ($config->get('Core.ConvertDocumentToFragment')) { - $html = $this->extractBody($html); + $e = false; + if ($config->get('Core.CollectErrors')) { + $e =& $context->get('ErrorCollector'); + } + $new_html = $this->extractBody($html); + if ($e && $new_html != $html) { + $e->send(E_WARNING, 'Lexer: Extracted body'); + } + $html = $new_html; } // expand entities that aren't the big five diff --git a/tests/HTMLPurifier/Lexer/DirectLex_ErrorsTest.php b/tests/HTMLPurifier/Lexer/DirectLex_ErrorsTest.php index b6ea7093..69d3c628 100644 --- a/tests/HTMLPurifier/Lexer/DirectLex_ErrorsTest.php +++ b/tests/HTMLPurifier/Lexer/DirectLex_ErrorsTest.php @@ -13,6 +13,11 @@ class HTMLPurifier_Lexer_DirectLex_ErrorsTest extends HTMLPurifier_ErrorsHarness $lexer->parseAttributeString($input, $this->config, $this->context); } + function testExtractBody() { + $this->expectErrorCollection(E_WARNING, 'Lexer: Extracted body'); + $this->invoke('foo'); + } + function testUnclosedComment() { $this->expectErrorCollection(E_WARNING, 'Lexer: Unclosed comment'); $this->expectContext('CurrentLine', 1); -- 2.11.4.GIT