From 39d3df1fd742299e0d8f3727a43c726c5fc0933a Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Sun, 31 Aug 2014 11:16:30 +0100 Subject: [PATCH] Add AutoFormat.RemoveEmpty.Predicate, fixes #35. Signed-off-by: Edward Z. Yang --- NEWS | 5 +++++ configdoc/usage.xml | 5 +++++ library/HTMLPurifier/ConfigSchema/schema.ser | Bin 15000 -> 15305 bytes .../schema/AutoFormat.RemoveEmpty.Predicate.txt | 14 ++++++++++++++ library/HTMLPurifier/Injector/RemoveEmpty.php | 15 ++++++++++----- tests/HTMLPurifier/Injector/RemoveEmptyTest.php | 19 +++++++++++++++++++ 6 files changed, 53 insertions(+), 5 deletions(-) rewrite library/HTMLPurifier/ConfigSchema/schema.ser (94%) create mode 100644 library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.Predicate.txt diff --git a/NEWS b/NEWS index 9b087022..b4abb857 100644 --- a/NEWS +++ b/NEWS @@ -12,6 +12,11 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier 4.7.0, unknown release date # opacity is now considered a "tricky" CSS property rather than a proprietary one. +! %AutoFormat.RemoveEmpty.Predicate for specifying exactly when + an element should be considered "empty" (maybe preserve if it + has attributes), and modify iframe support so that the iframe + is removed if it is missing a src attribute. Thanks meeva for + reporting. - Don't truncate upon encountering when using DOMLex. Thanks Myrto Christina for finally convincing me to fix this. - Update YouTube filter for new code. diff --git a/configdoc/usage.xml b/configdoc/usage.xml index f3f7a36a..97bc34cb 100644 --- a/configdoc/usage.xml +++ b/configdoc/usage.xml @@ -481,6 +481,11 @@ 47 + + + 48 + + 54 diff --git a/library/HTMLPurifier/ConfigSchema/schema.ser b/library/HTMLPurifier/ConfigSchema/schema.ser dissimilarity index 94% index 22ea32185db63b19d525f509ebe431f593e92271..1e6ccd22755dfa27722e17f457a00ea42bdc1d6f 100644 GIT binary patch delta 348 zcwU=Pda`_iIiuy|dKKA?p*xtEjEyEg6cx^Q%`GUY)C(v|P0377EJ;Kouq?AUh|o))3fi#B9Ppm|XLQZ?dn7A*0dedX@XkQ0GWtI49G}5M&CH&Sni! FApojLaN7U? delta 54 zcwReMKBIJkIitnqmqJ&WHamz3@=snM array(), 'th' => array(), 'td' => array(), 'iframe' => array('src')) +--DESCRIPTION-- +

+ Given that an element has no contents, it will be removed by default, unless + this predicate dictates otherwise. The predicate can either be an associative + map from tag name to list of attributes that must be present for the element + to be considered preserved: thus, the default always preserves colgroup, + th and td, and also iframe if it + has a src. +

+--# vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/Injector/RemoveEmpty.php b/library/HTMLPurifier/Injector/RemoveEmpty.php index cd885722..01353ff1 100644 --- a/library/HTMLPurifier/Injector/RemoveEmpty.php +++ b/library/HTMLPurifier/Injector/RemoveEmpty.php @@ -28,10 +28,10 @@ class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector private $removeNbspExceptions; /** + * Cached contents of %AutoFormat.RemoveEmpty.Predicate * @type array - * TODO: make me configurable */ - private $_exclude = array('colgroup' => 1, 'th' => 1, 'td' => 1, 'iframe' => 1); + private $exclude; /** * @param HTMLPurifier_Config $config @@ -45,6 +45,7 @@ class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector $this->context = $context; $this->removeNbsp = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp'); $this->removeNbspExceptions = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions'); + $this->exclude = $config->get('AutoFormat.RemoveEmpty.Predicate'); $this->attrValidator = new HTMLPurifier_AttrValidator(); } @@ -75,11 +76,15 @@ class HTMLPurifier_Injector_RemoveEmpty extends HTMLPurifier_Injector break; } if (!$next || ($next instanceof HTMLPurifier_Token_End && $next->name == $token->name)) { - if (isset($this->_exclude[$token->name])) { - return; - } $this->attrValidator->validateToken($token, $this->config, $this->context); $token->armor['ValidateAttributes'] = true; + if (isset($this->exclude[$token->name])) { + $r = true; + foreach ($this->exclude[$token->name] as $elem) { + if (!isset($token->attr[$elem])) $r = false; + } + if ($r) return; + } if (isset($token->attr['id']) || isset($token->attr['name'])) { return; } diff --git a/tests/HTMLPurifier/Injector/RemoveEmptyTest.php b/tests/HTMLPurifier/Injector/RemoveEmptyTest.php index d25b218f..d719ba8a 100644 --- a/tests/HTMLPurifier/Injector/RemoveEmptyTest.php +++ b/tests/HTMLPurifier/Injector/RemoveEmptyTest.php @@ -91,6 +91,25 @@ class HTMLPurifier_Injector_RemoveEmptyTest extends HTMLPurifier_InjectorHarness $this->assertResult(' ', "\xC2\xA0"); } + public function testRemoveIframe() + { + $this->config->set('HTML.SafeIframe', true); + $this->assertResult('', ''); + } + + public function testNoRemoveIframe() + { + $this->config->set('HTML.SafeIframe', true); + $this->assertResult('', ''); + } + + public function testRemoveDisallowedIframe() + { + $this->config->set('HTML.SafeIframe', true); + $this->config->set('URI.SafeIframeRegexp', '%^http://www.youtube.com/embed/%'); + $this->assertResult('', ''); + } + } // vim: et sw=4 sts=4 -- 2.11.4.GIT