From ba9fd175d707a91ae3768bcc13e47f190edcb833 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Tue, 7 Jul 2009 22:19:04 -0400 Subject: [PATCH] Make extractBody not terminate prematurely on first . Previously, if two tags were present, HTML Purifier would truncate everything after the first . This is not ideal behavior; so HTML Purifier has been changed to match up to the last . Signed-off-by: Edward Z. Yang --- NEWS | 2 ++ library/HTMLPurifier/Lexer.php | 2 +- tests/HTMLPurifier/LexerTest.php | 4 ++++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/NEWS b/NEWS index 2bf34aa5..5535ed99 100644 --- a/NEWS +++ b/NEWS @@ -51,6 +51,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier - Fix fatal error in HTMLPurifier_Encoder on certain platforms (probably NetBSD 5.0) - Fix bug in Linkify autoformatter involving http://foo - Make %URI.Munge not apply to links that have the same host as your host. +- Prevent stray tag from truncating output, if a second + is present. . Created script maintenance/rename-config.php for renaming a configuration directive while maintaining its alias. This script does not change source code. . Implement namespace locking for definition construction, to prevent diff --git a/library/HTMLPurifier/Lexer.php b/library/HTMLPurifier/Lexer.php index 7cbba469..8cce008d 100644 --- a/library/HTMLPurifier/Lexer.php +++ b/library/HTMLPurifier/Lexer.php @@ -285,7 +285,7 @@ class HTMLPurifier_Lexer */ public function extractBody($html) { $matches = array(); - $result = preg_match('!]*>(.+?)!is', $html, $matches); + $result = preg_match('!]*>(.*)!is', $html, $matches); if ($result) { return $matches[1]; } else { diff --git a/tests/HTMLPurifier/LexerTest.php b/tests/HTMLPurifier/LexerTest.php index 4866a416..bec33a3e 100644 --- a/tests/HTMLPurifier/LexerTest.php +++ b/tests/HTMLPurifier/LexerTest.php @@ -151,6 +151,10 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness $this->assertExtractBody('asdf'); // not closed, don't accept } + function test_extractBody_useLastBody() { + $this->assertExtractBody('foobar', 'foobar'); + } + // HTMLPurifier_Lexer->tokenizeHTML() -------------------------------------- function assertTokenization($input, $expect, $alt_expect = array()) { -- 2.11.4.GIT