From 1d90bb239763da7ee4e0d84c8621307e5aac3868 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Fri, 1 Aug 2008 19:06:28 -0400 Subject: [PATCH] Allow ...]]> not to trigger Core.ConvertDocumentToFragment Signed-off-by: Edward Z. Yang --- NEWS | 1 + library/HTMLPurifier/Lexer.php | 10 +++++----- tests/HTMLPurifier/LexerTest.php | 20 ++++++++++++++++++++ 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/NEWS b/NEWS index a3c5c8cf..7aacdc20 100644 --- a/NEWS +++ b/NEWS @@ -36,6 +36,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier - Do not re-munge URL if the output URL has the same host as the input URL. Requested by Chris. - Fix error in documentation regarding %Filter.ExtractStyleBlocks +- Prevent ]]> from triggering %Core.ConvertDocumentToFragment . Strategy_MakeWellFormed now operates in-place, saving memory and allowing for more interesting filter-backtracking . New HTMLPurifier_Injector->rewind() functionality, allows injectors to rewind diff --git a/library/HTMLPurifier/Lexer.php b/library/HTMLPurifier/Lexer.php index 01364f65..8c60ab33 100644 --- a/library/HTMLPurifier/Lexer.php +++ b/library/HTMLPurifier/Lexer.php @@ -226,11 +226,6 @@ class HTMLPurifier_Lexer */ public function normalize($html, $config, $context) { - // extract body from document if applicable - if ($config->get('Core', 'ConvertDocumentToFragment')) { - $html = $this->extractBody($html); - } - // normalize newlines to \n $html = str_replace("\r\n", "\n", $html); $html = str_replace("\r", "\n", $html); @@ -243,6 +238,11 @@ class HTMLPurifier_Lexer // escape CDATA $html = $this->escapeCDATA($html); + // extract body from document if applicable + if ($config->get('Core', 'ConvertDocumentToFragment')) { + $html = $this->extractBody($html); + } + // expand entities that aren't the big five $html = $this->_entity_parser->substituteNonSpecialEntities($html); diff --git a/tests/HTMLPurifier/LexerTest.php b/tests/HTMLPurifier/LexerTest.php index 605ee2f2..349e5b24 100644 --- a/tests/HTMLPurifier/LexerTest.php +++ b/tests/HTMLPurifier/LexerTest.php @@ -634,6 +634,26 @@ div {} ); } + function test_tokenizeHTML_bodyInCDATA() { + $this->assertTokenization( + 'Foo]]>', + array( + new HTMLPurifier_Token_Text('Foo'), + ), + array( + 'PH5P' => array( + new HTMLPurifier_Token_Text('<'), + new HTMLPurifier_Token_Text('body'), + new HTMLPurifier_Token_Text('>'), + new HTMLPurifier_Token_Text('Foo'), + new HTMLPurifier_Token_Text('<'), + new HTMLPurifier_Token_Text('/body'), + new HTMLPurifier_Token_Text('>'), + ), + ) + ); + } + /* function test_tokenizeHTML_() { -- 2.11.4.GIT