From b4981c3395159d942ecb42a56f7c5273ace97c64 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Sun, 27 Mar 2016 15:19:32 -0700 Subject: [PATCH] Fix #67, don't use tags in comments for %Core.ConvertDocumentToFragment Signed-off-by: Edward Z. Yang --- NEWS | 2 ++ library/HTMLPurifier/Lexer.php | 13 +++++++++---- tests/HTMLPurifier/LexerTest.php | 10 ++++++++++ 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/NEWS b/NEWS index 6efe38a3..58c4ee18 100644 --- a/NEWS +++ b/NEWS @@ -24,6 +24,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier ul/ol without allowing li. - On some versions of PHP, the Serializer DefinitionCache could infinite loop when the directory exists but is not listable. (#49) +- Don't match for inside comments with + %Core.ConvertDocumentToFragment. (#67) 4.7.0, released 2015-08-04 # opacity is now considered a "tricky" CSS property rather than a diff --git a/library/HTMLPurifier/Lexer.php b/library/HTMLPurifier/Lexer.php index 43732621..44c5c659 100644 --- a/library/HTMLPurifier/Lexer.php +++ b/library/HTMLPurifier/Lexer.php @@ -345,12 +345,17 @@ class HTMLPurifier_Lexer public function extractBody($html) { $matches = array(); - $result = preg_match('!]*>(.*)!is', $html, $matches); + $result = preg_match('|(.*?)]*>(.*)|is', $html, $matches); if ($result) { - return $matches[1]; - } else { - return $html; + // Make sure it's not in a comment + $comment_start = strrpos($matches[1], ''); + if ($comment_start === false || + ($comment_end !== false && $comment_end > $comment_start)) { + return $matches[2]; + } } + return $html; } } diff --git a/tests/HTMLPurifier/LexerTest.php b/tests/HTMLPurifier/LexerTest.php index ecdbe1b8..00e08097 100644 --- a/tests/HTMLPurifier/LexerTest.php +++ b/tests/HTMLPurifier/LexerTest.php @@ -169,6 +169,16 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness $this->assertExtractBody('foobar', 'foobar'); } + public function test_extractBody_ignoreCommented() + { + $this->assertExtractBody('$^'); + } + + public function test_extractBody_butCanStillWork() + { + $this->assertExtractBody('a', 'a'); + } + // HTMLPurifier_Lexer->tokenizeHTML() -------------------------------------- public function assertTokenization($input, $expect, $alt_expect = array()) -- 2.11.4.GIT