From 74f123a84cc011b085c489bc09d0551eb867ea30 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Tue, 7 Mar 2017 17:52:41 -0800 Subject: [PATCH] Fix #83. Signed-off-by: Edward Z. Yang --- NEWS | 3 +++ configdoc/usage.xml | 22 ++++++++++++++++----- library/HTMLPurifier/ConfigSchema/schema.ser | Bin 15800 -> 15923 bytes .../schema/Core.AggressivelyRemoveScript.txt | 16 +++++++++++++++ library/HTMLPurifier/Lexer.php | 6 ++++++ 5 files changed, 42 insertions(+), 5 deletions(-) rewrite library/HTMLPurifier/ConfigSchema/schema.ser (90%) create mode 100644 library/HTMLPurifier/ConfigSchema/schema/Core.AggressivelyRemoveScript.txt diff --git a/NEWS b/NEWS index beef6b20..de383ae1 100644 --- a/NEWS +++ b/NEWS @@ -36,6 +36,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier decoding entities that are missing trailing semicolon. To get old behavior, set %Core.LegacyEntityDecoder to true. (#119) +- Workaround libxml bug when HTML tags are embedded inside + script tags. To disable workaround set %Core.AggressivelyRemoveScript + to false. (#83) # By default, when a link has a target attribute associated with it, we now also add rel="noopener" in order to prevent the new window from being able to overwrite diff --git a/configdoc/usage.xml b/configdoc/usage.xml index 49bddaa5..de395b8d 100644 --- a/configdoc/usage.xml +++ b/configdoc/usage.xml @@ -6,7 +6,7 @@ 85 - 322 + 326 67 @@ -124,7 +124,7 @@ 122 - 304 + 308 @@ -172,7 +172,8 @@ 234 - 309 + 313 + 351 37 @@ -260,14 +261,25 @@ 62 + + + 215 + 337 + + - 320 + 324 - 343 + 347 + + + + + 351 diff --git a/library/HTMLPurifier/ConfigSchema/schema.ser b/library/HTMLPurifier/ConfigSchema/schema.ser dissimilarity index 90% index df8c5c466d9298cbb8fc0bce8676452324736ef4..371e948f1c76d99bacea65b4735454656858edbf 100644 GIT binary patch delta 156 zcwXC4y}4$BIiu<3iNb7b@|IRg&iO^DdXDMoMXANbnPsUtl|iYw`DLlW$wiq3n|mci scqe-Zdl0YQm6y?C@ + This directive enables aggressive pre-filter removal of + script tags. This is not necessary for security, + but it can help work around a bug in libxml where embedded + HTML elements inside script sections cause the parser to + choke. To revert to pre-4.9.0 behavior, set this to false. + This directive has no effect if %Core.Trusted is true, + %Core.RemoveScriptContents is false, or %Core.HiddenElements + does not contain script. +

+--# vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Lexer.php b/library/HTMLPurifier/Lexer.php index 37174eae..0fc048f6 100644 --- a/library/HTMLPurifier/Lexer.php +++ b/library/HTMLPurifier/Lexer.php @@ -348,6 +348,12 @@ class HTMLPurifier_Lexer $html = preg_replace('#<\?.+?\?>#s', '', $html); } + if ($config->get('Core.AggressivelyRemoveScript') && + !($config->get('HTML.Trusted') || !$config->get('Core.RemoveScriptContents') + || empty($config->get('Core.HiddenElements')["script"]))) { + $html = preg_replace('#]*>.*?#i', '', $html); + } + return $html; } -- 2.11.4.GIT