From 1bed8b6d5f94e03202bd6e54c9993d37df894761 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Sun, 20 Jun 2010 18:26:44 -0700 Subject: [PATCH] Added %Core.RemoveProcessingInstructions. Signed-off-by: Edward Z. Yang --- NEWS | 2 ++ configdoc/usage.xml | 11 ++++++++--- library/HTMLPurifier/ConfigSchema/schema.ser | Bin 13244 -> 13379 bytes .../schema/Core.RemoveProcessingInstructions.txt | 11 +++++++++++ library/HTMLPurifier/Lexer.php | 5 +++++ tests/HTMLPurifier/LexerTest.php | 8 ++++++++ 6 files changed, 34 insertions(+), 3 deletions(-) rewrite library/HTMLPurifier/ConfigSchema/schema.ser (70%) create mode 100644 library/HTMLPurifier/ConfigSchema/schema/Core.RemoveProcessingInstructions.txt diff --git a/NEWS b/NEWS index f08163d0..617d6a17 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier ========================== 4.1.2, unknown release date +! Added %Core.RemoveProcessingInstructions, which lets you remove + statements. - Fix improper handling of Internet Explorer conditional comments by parser. Thanks zmonteca for reporting. diff --git a/configdoc/usage.xml b/configdoc/usage.xml index ec5b4166..444c3ad5 100644 --- a/configdoc/usage.xml +++ b/configdoc/usage.xml @@ -6,7 +6,7 @@ 81 - 269 + 282 53 @@ -149,7 +149,7 @@ 202 - 258 + 269 27 @@ -211,7 +211,12 @@ - 267 + 280 + + + + + 301 diff --git a/library/HTMLPurifier/ConfigSchema/schema.ser b/library/HTMLPurifier/ConfigSchema/schema.ser dissimilarity index 70% index 22b8d54a59f17f73071055bc601d5a5f3a3f6b31..ac93a0c4ea640720ff6d628003d9cefafcdf541f 100644 GIT binary patch delta 182 zcwXC+emG--IiulbIe}abEn{OVCFlI2RK1|o-2AfCfTH~5)Z*gIymZgJ;*z4$J{K%?$vzYY-Ix diff --git a/library/HTMLPurifier/ConfigSchema/schema/Core.RemoveProcessingInstructions.txt b/library/HTMLPurifier/ConfigSchema/schema/Core.RemoveProcessingInstructions.txt new file mode 100644 index 00000000..0a6d4ec1 --- /dev/null +++ b/library/HTMLPurifier/ConfigSchema/schema/Core.RemoveProcessingInstructions.txt @@ -0,0 +1,11 @@ +Core.RemoveProcessingInstructions +TYPE: bool +VERSION: 4.1.2 +DEFAULT: false +--DESCRIPTION-- +Instead of escaping processing instructions in the form <? ... +?>, remove it out-right. This may be useful if the HTML +you are validating contains XML processing instruction gunk, however, +it can also be user-unfriendly for people attempting to post PHP +snippets. +--# vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Lexer.php b/library/HTMLPurifier/Lexer.php index e3522009..f853421a 100644 --- a/library/HTMLPurifier/Lexer.php +++ b/library/HTMLPurifier/Lexer.php @@ -297,6 +297,11 @@ class HTMLPurifier_Lexer // represent non-SGML characters (horror, horror!) $html = HTMLPurifier_Encoder::cleanUTF8($html); + // if processing instructions are to removed, remove them now + if ($config->get('Core.RemoveProcessingInstructions')) { + $html = preg_replace('#<\?.+?\?>#s', '', $html); + } + return $html; } diff --git a/tests/HTMLPurifier/LexerTest.php b/tests/HTMLPurifier/LexerTest.php index e6b0e0fb..0cb95155 100644 --- a/tests/HTMLPurifier/LexerTest.php +++ b/tests/HTMLPurifier/LexerTest.php @@ -717,6 +717,14 @@ div {} ); } + function test_tokenizeHTML_removeProcessingInstruction() { + $this->config->set('Core.RemoveProcessingInstructions', true); + $this->assertTokenization( + '', + array() + ); + } + /* function test_tokenizeHTML_() { -- 2.11.4.GIT