From 84abae08f527e2b704675a23acdfb3c1a28d548d Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Tue, 26 May 2009 01:07:40 -0400 Subject: [PATCH] Relax allowed values of class for certain doctypes, see %Attr.ClassUseCDATA Signed-off-by: Edward Z. Yang --- NEWS | 5 +++- TODO | 2 -- library/HTMLPurifier/AttrDef/HTML/Class.php | 13 ++++++++-- library/HTMLPurifier/AttrDef/HTML/Nmtokens.php | 4 +-- library/HTMLPurifier/ConfigSchema/schema.ser | Bin 12912 -> 12999 bytes .../ConfigSchema/schema/Attr.ClassUseCDATA.txt | 19 +++++++++++++++ tests/HTMLPurifier/AttrDef/HTML/ClassTest.php | 27 +++++++++++++++++++++ .../Strategy/ValidateAttributesTest.php | 1 + 8 files changed, 64 insertions(+), 7 deletions(-) rewrite library/HTMLPurifier/ConfigSchema/schema.ser (99%) create mode 100644 library/HTMLPurifier/ConfigSchema/schema/Attr.ClassUseCDATA.txt diff --git a/NEWS b/NEWS index 3630ff36..bad9d7ba 100644 --- a/NEWS +++ b/NEWS @@ -18,8 +18,11 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier %FilterParam.ExtractStyleBlocksEscaping -> %Filter.ExtractStyleBlocks.Escaping %FilterParam.ExtractStyleBlocksScope -> %Filter.ExtractStyleBlocks.Scope %FilterParam.ExtractStyleBlocksTidyImpl -> %Filter.ExtractStyleBlocks.TidyImpl - As usual, the old directive names will still work, but will through E_NOTICE + As usual, the old directive names will still work, but will throw E_NOTICE errors. +# The allowed values for class have been relaxed to allow all of CDATA for + doctypes that are not XHTML 1.1 or XHTML 2.0. For old behavior, set + %Attr.ClassUseCDATA to false. ! More robust support for name="" and id="" ! HTMLPurifier_Config::inherit($config) allows you to inherit one configuration, and have changes to that configuration be propagated diff --git a/TODO b/TODO index de07f378..67bb8421 100644 --- a/TODO +++ b/TODO @@ -18,8 +18,6 @@ afraid to cast your vote for the next feature to be implemented! http://htmlpurifier.org/phorum/read.php?3,3491,3548 - Fix ImgRequired to handle data correctly - Think about allowing explicit order of operations hooks for transforms -- Allow more relaxed "class" definition than NMTOKENS for appropriate - doctypes FUTURE VERSIONS --------------- diff --git a/library/HTMLPurifier/AttrDef/HTML/Class.php b/library/HTMLPurifier/AttrDef/HTML/Class.php index a2f4a98a..c925cd39 100644 --- a/library/HTMLPurifier/AttrDef/HTML/Class.php +++ b/library/HTMLPurifier/AttrDef/HTML/Class.php @@ -5,6 +5,15 @@ */ class HTMLPurifier_AttrDef_HTML_Class extends HTMLPurifier_AttrDef_HTML_Nmtokens { + protected function split($string, $config, $context) { + // really, this twiddle should be lazy loaded + $name = $config->getDefinition('HTML')->doctype->name; + if ($name == "XHTML 1.1" || $name == "XHTML 2.0") { + return parent::split($string, $config, $context); + } else { + return preg_split('/\s+/', $string); + } + } protected function filter($tokens, $config, $context) { $allowed = $config->get('Attr.AllowedClasses'); $forbidden = $config->get('Attr.ForbiddenClasses'); @@ -14,9 +23,9 @@ class HTMLPurifier_AttrDef_HTML_Class extends HTMLPurifier_AttrDef_HTML_Nmtokens ($allowed === null || isset($allowed[$token])) && !isset($forbidden[$token]) ) { - $ret[] = $token; + $ret[$token] = true; } } - return $ret; + return array_keys($ret); } } diff --git a/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php b/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php index 7dab1fea..aa34120b 100644 --- a/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php +++ b/library/HTMLPurifier/AttrDef/HTML/Nmtokens.php @@ -13,7 +13,7 @@ class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef // early abort: '' and '0' (strings that convert to false) are invalid if (!$string) return false; - $tokens = $this->split($string); + $tokens = $this->split($string, $config, $context); $tokens = $this->filter($tokens, $config, $context); if (empty($tokens)) return false; return implode(' ', $tokens); @@ -23,7 +23,7 @@ class HTMLPurifier_AttrDef_HTML_Nmtokens extends HTMLPurifier_AttrDef /** * Splits a space separated list of tokens into its constituent parts. */ - protected function split($string) { + protected function split($string, $config, $context) { // OPTIMIZABLE! // do the preg_match, capture all subpatterns for reformulation diff --git a/library/HTMLPurifier/ConfigSchema/schema.ser b/library/HTMLPurifier/ConfigSchema/schema.ser dissimilarity index 99% index b534c3f128024cd9d46764e283de7f297860f92b..bbf12f9c3e7392aa8143727d2485f6f9ad1f97e1 100644 GIT binary patch delta 139 zcwYOEay)f{8KcEyL3!E9XZTa33@xmb97{@y^qg}Ni;F{xQ=MHLLmZW?{j4|Z32fw> joX+n~tm;R6jD|p!b+WirWm@T)TTj*#klDOa{X9QhjSnBF&Y4wvXj%~6DMAg+N5w?2mtkp6J7uS diff --git a/library/HTMLPurifier/ConfigSchema/schema/Attr.ClassUseCDATA.txt b/library/HTMLPurifier/ConfigSchema/schema/Attr.ClassUseCDATA.txt new file mode 100644 index 00000000..e774b823 --- /dev/null +++ b/library/HTMLPurifier/ConfigSchema/schema/Attr.ClassUseCDATA.txt @@ -0,0 +1,19 @@ +Attr.ClassUseCDATA +TYPE: bool/null +DEFAULT: null +VERSION: 4.0.0 +--DESCRIPTION-- +If null, class will auto-detect the doctype and, if matching XHTML 1.1 or +XHTML 2.0, will use the restrictive NMTOKENS specification of class. Otherwise, +it will use a relaxed CDATA definition. If true, the relaxed CDATA definition +is forced; if false, the NMTOKENS definition is forced. To get behavior +of HTML Purifier prior to 4.0.0, set this directive to false. + +Some rational behind the auto-detection: +in previous versions of HTML Purifier, it was assumed that the form of +class was NMTOKENS, as specified by the XHTML Modularization (representing +XHTML 1.1 and XHTML 2.0). The DTDs for HTML 4.01 and XHTML 1.0, however +specify class as CDATA. HTML 5 effectively defines it as CDATA, but +with the additional constraint that each name should be unique (this is not +explicitly outlined in previous specifications). +--# vim: et sw=4 sts=4 diff --git a/tests/HTMLPurifier/AttrDef/HTML/ClassTest.php b/tests/HTMLPurifier/AttrDef/HTML/ClassTest.php index 0e959b5e..6effd3cd 100644 --- a/tests/HTMLPurifier/AttrDef/HTML/ClassTest.php +++ b/tests/HTMLPurifier/AttrDef/HTML/ClassTest.php @@ -18,4 +18,31 @@ class HTMLPurifier_AttrDef_HTML_ClassTest extends HTMLPurifier_AttrDef_HTML_Nmto $this->assertDef('bar', false); $this->assertDef('foo bar', 'foo'); } + function testDefault() { + $this->assertDef('valid'); + $this->assertDef('a0-_'); + $this->assertDef('-valid'); + $this->assertDef('_valid'); + $this->assertDef('double valid'); + + $this->assertDef('0stillvalid'); + $this->assertDef('-0'); + + // test conditional replacement + $this->assertDef('validassoc 0valid', 'validassoc 0valid'); + + // test whitespace leniency + $this->assertDef(" double\nvalid\r", 'double valid'); + + // test case sensitivity + $this->assertDef('VALID'); + + // test duplicate removal + $this->assertDef('valid valid', 'valid'); + } + function testXHTML11Behavior() { + $this->config->set('HTML.Doctype', 'XHTML 1.1'); + $this->assertDef('0invalid', false); + $this->assertDef('valid valid', 'valid'); + } } diff --git a/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php b/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php index 8c49d3e6..5fc86cbd 100644 --- a/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php +++ b/tests/HTMLPurifier/Strategy/ValidateAttributesTest.php @@ -32,6 +32,7 @@ class HTMLPurifier_Strategy_ValidateAttributesTest extends } function testSelectivelyRemoveInvalidClasses() { + $this->config->set('HTML.Doctype', 'XHTML 1.1'); $this->assertResult( '
Keep valid.
', '
Keep valid.
' -- 2.11.4.GIT