From 74f123a84cc011b085c489bc09d0551eb867ea30 Mon Sep 17 00:00:00 2001
From: "Edward Z. Yang"
Date: Tue, 7 Mar 2017 17:52:41 -0800
Subject: [PATCH] Fix #83.
Signed-off-by: Edward Z. Yang
---
NEWS | 3 +++
configdoc/usage.xml | 22 ++++++++++++++++-----
library/HTMLPurifier/ConfigSchema/schema.ser | Bin 15800 -> 15923 bytes
.../schema/Core.AggressivelyRemoveScript.txt | 16 +++++++++++++++
library/HTMLPurifier/Lexer.php | 6 ++++++
5 files changed, 42 insertions(+), 5 deletions(-)
rewrite library/HTMLPurifier/ConfigSchema/schema.ser (90%)
create mode 100644 library/HTMLPurifier/ConfigSchema/schema/Core.AggressivelyRemoveScript.txt
diff --git a/NEWS b/NEWS
index beef6b20..de383ae1 100644
--- a/NEWS
+++ b/NEWS
@@ -36,6 +36,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
decoding entities that are missing trailing semicolon.
To get old behavior, set %Core.LegacyEntityDecoder to true.
(#119)
+- Workaround libxml bug when HTML tags are embedded inside
+ script tags. To disable workaround set %Core.AggressivelyRemoveScript
+ to false. (#83)
# By default, when a link has a target attribute associated
with it, we now also add rel="noopener" in order to
prevent the new window from being able to overwrite
diff --git a/configdoc/usage.xml b/configdoc/usage.xml
index 49bddaa5..de395b8d 100644
--- a/configdoc/usage.xml
+++ b/configdoc/usage.xml
@@ -6,7 +6,7 @@
85
- 322
+ 326
67
@@ -124,7 +124,7 @@
122
- 304
+ 308
@@ -172,7 +172,8 @@
234
- 309
+ 313
+ 351
37
@@ -260,14 +261,25 @@
62
+
+
+ 215
+ 337
+
+
- 320
+ 324
- 343
+ 347
+
+
+
+
+ 351
diff --git a/library/HTMLPurifier/ConfigSchema/schema.ser b/library/HTMLPurifier/ConfigSchema/schema.ser
dissimilarity index 90%
index df8c5c466d9298cbb8fc0bce8676452324736ef4..371e948f1c76d99bacea65b4735454656858edbf 100644
GIT binary patch
delta 156
zcwXC4y}4$BIiu<3iNb7b@|IRg&iO^DdXDMoMXANbnPsUtl|iYw`DLlW$wiq3n|mci
scqe-Zdl0YQm6y?C@
+ This directive enables aggressive pre-filter removal of
+ script tags. This is not necessary for security,
+ but it can help work around a bug in libxml where embedded
+ HTML elements inside script sections cause the parser to
+ choke. To revert to pre-4.9.0 behavior, set this to false.
+ This directive has no effect if %Core.Trusted is true,
+ %Core.RemoveScriptContents is false, or %Core.HiddenElements
+ does not contain script.
+
+--# vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Lexer.php b/library/HTMLPurifier/Lexer.php
index 37174eae..0fc048f6 100644
--- a/library/HTMLPurifier/Lexer.php
+++ b/library/HTMLPurifier/Lexer.php
@@ -348,6 +348,12 @@ class HTMLPurifier_Lexer
$html = preg_replace('#<\?.+?\?>#s', '', $html);
}
+ if ($config->get('Core.AggressivelyRemoveScript') &&
+ !($config->get('HTML.Trusted') || !$config->get('Core.RemoveScriptContents')
+ || empty($config->get('Core.HiddenElements')["script"]))) {
+ $html = preg_replace('##i', '', $html);
+ }
+
return $html;
}
--
2.11.4.GIT