From d3abcb90e30592c619047d878cf9c72b7c5836a3 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Fri, 21 May 2010 11:53:52 -0400 Subject: [PATCH] Rewrite CSS url() and font-family output logic. The new logic is as follows: * Given a URL to insert into url(), check that it is properly URL encoded (in particular, a doublequote and backslash never occurs within it) and then place it as url("http://example.com"). * Given a font name, if it is strictly alphanumeric, it is safe to omit quotes. Otherwise, wrap in double quotes and replace '"' with '\22 ' (note trailing space) and '\' with '\5C ' (ditto). We introduce expandCSSEscape() which is a hack for common parsing idioms in CSS; this means that CSS escapes are now recognized inside URLs as well as unquoted font names. Signed-off-by: Edward Z. Yang --- NEWS | 3 ++ library/HTMLPurifier/AttrDef.php | 36 ++++++++++++++++ library/HTMLPurifier/AttrDef/CSS/FontFamily.php | 50 +++++++--------------- library/HTMLPurifier/AttrDef/CSS/URI.php | 12 ++---- tests/HTMLPurifier/AttrDef/CSS/BackgroundTest.php | 6 +-- tests/HTMLPurifier/AttrDef/CSS/FontFamilyTest.php | 30 ++++++------- tests/HTMLPurifier/AttrDef/CSS/FontTest.php | 4 +- tests/HTMLPurifier/AttrDef/CSS/ListStyleTest.php | 6 +-- tests/HTMLPurifier/AttrDef/CSS/URITest.php | 10 ++--- tests/HTMLPurifier/AttrDef/CSSTest.php | 10 ++--- .../HTMLPurifier/Filter/ExtractStyleBlocksTest.php | 6 +-- tests/HTMLPurifier/HTMLT/munge-extra.htmlt | 2 +- .../HTMLT/shift-jis-preserve-yen.htmlt | 2 +- .../HTMLPurifier/HTMLT/shift-jis-remove-yen.htmlt | 4 +- tests/HTMLPurifier/HTMLT/tidy-background.htmlt | 2 +- 15 files changed, 98 insertions(+), 85 deletions(-) diff --git a/NEWS b/NEWS index 7a507bec..1e06a8fb 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,9 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier ========================== 4.1.1, unknown release date +- Rewrite CSS output logic for font-family and url(). Thanks Mario + Heiderich for reporting and Takeshi + Terada for suggesting the fix. - Emit an error for CollectErrors if a body is extracted - Fix bug where in background-position for center keyword handling. - Fix infinite loop when a wrapper element is inserted in a context diff --git a/library/HTMLPurifier/AttrDef.php b/library/HTMLPurifier/AttrDef.php index d32fa62d..b2e4f36c 100644 --- a/library/HTMLPurifier/AttrDef.php +++ b/library/HTMLPurifier/AttrDef.php @@ -82,6 +82,42 @@ abstract class HTMLPurifier_AttrDef return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string); } + /** + * Parses a possibly escaped CSS string and returns the "pure" + * version of it. + */ + protected function expandCSSEscape($string) { + // flexibly parse it + $ret = ''; + for ($i = 0, $c = strlen($string); $i < $c; $i++) { + if ($string[$i] === '\\') { + $i++; + if ($i >= $c) { + $ret .= '\\'; + break; + } + if (ctype_xdigit($string[$i])) { + $code = $string[$i]; + for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) { + if (!ctype_xdigit($string[$i])) break; + $code .= $string[$i]; + } + // We have to be extremely careful when adding + // new characters, to make sure we're not breaking + // the encoding. + $char = HTMLPurifier_Encoder::unichr(hexdec($code)); + if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue; + $ret .= $char; + if ($i < $c && trim($string[$i]) !== '') $i--; + continue; + } + if ($string[$i] === "\n") continue; + } + $ret .= $string[$i]; + } + return $ret; + } + } // vim: et sw=4 sts=4 diff --git a/library/HTMLPurifier/AttrDef/CSS/FontFamily.php b/library/HTMLPurifier/AttrDef/CSS/FontFamily.php index 705ac893..42c2054c 100644 --- a/library/HTMLPurifier/AttrDef/CSS/FontFamily.php +++ b/library/HTMLPurifier/AttrDef/CSS/FontFamily.php @@ -34,37 +34,10 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef $quote = $font[0]; if ($font[$length - 1] !== $quote) continue; $font = substr($font, 1, $length - 2); + } - $new_font = ''; - for ($i = 0, $c = strlen($font); $i < $c; $i++) { - if ($font[$i] === '\\') { - $i++; - if ($i >= $c) { - $new_font .= '\\'; - break; - } - if (ctype_xdigit($font[$i])) { - $code = $font[$i]; - for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) { - if (!ctype_xdigit($font[$i])) break; - $code .= $font[$i]; - } - // We have to be extremely careful when adding - // new characters, to make sure we're not breaking - // the encoding. - $char = HTMLPurifier_Encoder::unichr(hexdec($code)); - if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue; - $new_font .= $char; - if ($i < $c && trim($font[$i]) !== '') $i--; - continue; - } - if ($font[$i] === "\n") continue; - } - $new_font .= $font[$i]; - } + $font = $this->expandCSSEscape($font); - $font = $new_font; - } // $font is a pure representation of the font name if (ctype_alnum($font) && $font !== '') { @@ -73,12 +46,21 @@ class HTMLPurifier_AttrDef_CSS_FontFamily extends HTMLPurifier_AttrDef continue; } - // complicated font, requires quoting + // bugger out on whitespace. form feed (0C) really + // shouldn't show up regardless + $font = str_replace(array("\n", "\t", "\r", "\x0C"), ' ', $font); - // armor single quotes and new lines - $font = str_replace("\\", "\\\\", $font); - $font = str_replace("'", "\\'", $font); - $final .= "'$font', "; + // These ugly transforms don't pose a security + // risk (as \\ and \" might). We could try to be clever and + // use single-quote wrapping when there is a double quote + // present, but I have choosen not to implement that. + // (warning: this code relies on the selection of quotation + // mark below) + $font = str_replace('\\', '\\5C ', $font); + $font = str_replace('"', '\\22 ', $font); + + // complicated font, requires quoting + $final .= "\"$font\", "; // note that this will later get turned into " } $final = rtrim($final, ', '); if ($final === '') return false; diff --git a/library/HTMLPurifier/AttrDef/CSS/URI.php b/library/HTMLPurifier/AttrDef/CSS/URI.php index 54b7d63f..1df17dc2 100644 --- a/library/HTMLPurifier/AttrDef/CSS/URI.php +++ b/library/HTMLPurifier/AttrDef/CSS/URI.php @@ -34,20 +34,16 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI $uri = substr($uri, 1, $new_length - 1); } - $keys = array( '(', ')', ',', ' ', '"', "'"); - $values = array('\\(', '\\)', '\\,', '\\ ', '\\"', "\\'"); - $uri = str_replace($values, $keys, $uri); + $uri = $this->expandCSSEscape($uri); $result = parent::validate($uri, $config, $context); if ($result === false) return false; - // escape necessary characters according to CSS spec - // except for the comma, none of these should appear in the - // URI at all - $result = str_replace($keys, $values, $result); + // extra sanity check; should have been done by URI + $result = str_replace(array('"', "\\", "\n", "\x0c", "\r"), "", $result); - return "url('$result')"; + return "url(\"$result\")"; } diff --git a/tests/HTMLPurifier/AttrDef/CSS/BackgroundTest.php b/tests/HTMLPurifier/AttrDef/CSS/BackgroundTest.php index b36d09a6..83461c36 100644 --- a/tests/HTMLPurifier/AttrDef/CSS/BackgroundTest.php +++ b/tests/HTMLPurifier/AttrDef/CSS/BackgroundTest.php @@ -8,12 +8,12 @@ class HTMLPurifier_AttrDef_CSS_BackgroundTest extends HTMLPurifier_AttrDefHarnes $config = HTMLPurifier_Config::createDefault(); $this->def = new HTMLPurifier_AttrDef_CSS_Background($config); - $valid = '#333 url(\'chess.png\') repeat fixed 50% top'; + $valid = '#333 url("chess.png") repeat fixed 50% top'; $this->assertDef($valid); - $this->assertDef('url("chess.png") #333 50% top repeat fixed', $valid); + $this->assertDef('url(\'chess.png\') #333 50% top repeat fixed', $valid); $this->assertDef( 'rgb(34, 56, 33) url(chess.png) repeat fixed top', - 'rgb(34,56,33) url(\'chess.png\') repeat fixed top' + 'rgb(34,56,33) url("chess.png") repeat fixed top' ); } diff --git a/tests/HTMLPurifier/AttrDef/CSS/FontFamilyTest.php b/tests/HTMLPurifier/AttrDef/CSS/FontFamilyTest.php index 4d8f647a..aff10528 100644 --- a/tests/HTMLPurifier/AttrDef/CSS/FontFamilyTest.php +++ b/tests/HTMLPurifier/AttrDef/CSS/FontFamilyTest.php @@ -8,29 +8,29 @@ class HTMLPurifier_AttrDef_CSS_FontFamilyTest extends HTMLPurifier_AttrDefHarnes $this->def = new HTMLPurifier_AttrDef_CSS_FontFamily(); $this->assertDef('Gill, Helvetica, sans-serif'); - $this->assertDef('\'Times New Roman\', serif'); - $this->assertDef('"Times New Roman"', "'Times New Roman'"); + $this->assertDef('"Times New Roman", serif'); + $this->assertDef('\'Times New Roman\'', '"Times New Roman"'); $this->assertDef('01234'); $this->assertDef(',', false); - $this->assertDef('Times New Roman, serif', '\'Times New Roman\', serif'); - $this->assertDef($d = "'John\\'s Font'"); + $this->assertDef('Times New Roman, serif', '"Times New Roman", serif'); + $this->assertDef($d = '"John\'s Font"'); $this->assertDef("John's Font", $d); - $this->assertDef($d = "'\xE5\xAE\x8B\xE4\xBD\x93'"); + $this->assertDef($d = "\"\xE5\xAE\x8B\xE4\xBD\x93\""); $this->assertDef("\xE5\xAE\x8B\xE4\xBD\x93", $d); - $this->assertDef("'\\','f'", "'\\\\', f"); - $this->assertDef("'\\01'", "''"); - $this->assertDef("'\\20'", "' '"); - $this->assertDef("\\0020", "'\\\\0020'"); + $this->assertDef("'\\','f'", "\"\\5C \", f"); + $this->assertDef("'\\01'", "\"\""); + $this->assertDef("'\\20'", "\" \""); + $this->assertDef("\\0020", "\" \""); $this->assertDef("'\\000045'", "E"); $this->assertDef("','", false); - $this->assertDef("',' foobar','", "' foobar'"); - $this->assertDef("'\\27'", "'\''"); - $this->assertDef('"\\22"', "'\"'"); - $this->assertDef('"\\""', "'\"'"); - $this->assertDef('"\'"', "'\\''"); + $this->assertDef("',' foobar','", "\" foobar\""); + $this->assertDef("'\\27'", "\"'\""); + $this->assertDef('"\\22"', "\"\\22 \""); + $this->assertDef('"\\""', "\"\\22 \""); + $this->assertDef('"\'"', "\"'\""); $this->assertDef("'\\000045a'", "Ea"); $this->assertDef("'\\00045 a'", "Ea"); - $this->assertDef("'\\00045 a'", "'E a'"); + $this->assertDef("'\\00045 a'", "\"E a\""); $this->assertDef("'\\\nf'", "f"); } diff --git a/tests/HTMLPurifier/AttrDef/CSS/FontTest.php b/tests/HTMLPurifier/AttrDef/CSS/FontTest.php index 1e42ee24..40559485 100644 --- a/tests/HTMLPurifier/AttrDef/CSS/FontTest.php +++ b/tests/HTMLPurifier/AttrDef/CSS/FontTest.php @@ -11,10 +11,10 @@ class HTMLPurifier_AttrDef_CSS_FontTest extends HTMLPurifier_AttrDefHarness // hodgepodge of usage cases from W3C spec, but " -> ' $this->assertDef('12px/14px sans-serif'); $this->assertDef('80% sans-serif'); - $this->assertDef('x-large/110% \'New Century Schoolbook\', serif'); + $this->assertDef('x-large/110% "New Century Schoolbook", serif'); $this->assertDef('bold italic large Palatino, serif'); $this->assertDef('normal small-caps 120%/120% fantasy'); - $this->assertDef('300 italic 1.3em/1.7em \'FB Armada\', sans-serif'); + $this->assertDef('300 italic 1.3em/1.7em "FB Armada", sans-serif'); $this->assertDef('600 9px Charcoal'); $this->assertDef('600 9px/ 12px Charcoal', '600 9px/12px Charcoal'); diff --git a/tests/HTMLPurifier/AttrDef/CSS/ListStyleTest.php b/tests/HTMLPurifier/AttrDef/CSS/ListStyleTest.php index 8fd43e59..07006670 100644 --- a/tests/HTMLPurifier/AttrDef/CSS/ListStyleTest.php +++ b/tests/HTMLPurifier/AttrDef/CSS/ListStyleTest.php @@ -13,14 +13,14 @@ class HTMLPurifier_AttrDef_CSS_ListStyleTest extends HTMLPurifier_AttrDefHarness $this->assertDef('circle outside'); $this->assertDef('inside'); $this->assertDef('none'); - $this->assertDef('url(\'foo.gif\')'); - $this->assertDef('circle url(\'foo.gif\') inside'); + $this->assertDef('url("foo.gif")'); + $this->assertDef('circle url("foo.gif") inside'); // invalid values $this->assertDef('outside inside', 'outside'); // ordering - $this->assertDef('url(foo.gif) none', 'none url(\'foo.gif\')'); + $this->assertDef('url(foo.gif) none', 'none url("foo.gif")'); $this->assertDef('circle lower-alpha', 'circle'); // the spec is ambiguous about what happens in these // cases, so we're going off the W3C CSS validator diff --git a/tests/HTMLPurifier/AttrDef/CSS/URITest.php b/tests/HTMLPurifier/AttrDef/CSS/URITest.php index 20cc79d2..836f1034 100644 --- a/tests/HTMLPurifier/AttrDef/CSS/URITest.php +++ b/tests/HTMLPurifier/AttrDef/CSS/URITest.php @@ -12,20 +12,16 @@ class HTMLPurifier_AttrDef_CSS_URITest extends HTMLPurifier_AttrDefHarness // we could be nice but we won't be $this->assertDef('http://www.example.com/', false); - // no quotes are used, since that's the most widely supported - // syntax $this->assertDef('url(', false); - $this->assertDef('url(\'\')', true); - $result = "url('http://www.example.com/')"; + $this->assertDef('url("")', true); + $result = 'url("http://www.example.com/")'; $this->assertDef('url(http://www.example.com/)', $result); $this->assertDef('url("http://www.example.com/")', $result); $this->assertDef("url('http://www.example.com/')", $result); $this->assertDef( ' url( "http://www.example.com/" ) ', $result); - - // escaping $this->assertDef("url(http://www.example.com/foo,bar\))", - "url('http://www.example.com/foo\,bar\)')"); + 'url("http://www.example.com/foo,bar)")'); } } diff --git a/tests/HTMLPurifier/AttrDef/CSSTest.php b/tests/HTMLPurifier/AttrDef/CSSTest.php index 8ef818db..27e5c432 100644 --- a/tests/HTMLPurifier/AttrDef/CSSTest.php +++ b/tests/HTMLPurifier/AttrDef/CSSTest.php @@ -25,7 +25,7 @@ class HTMLPurifier_AttrDef_CSSTest extends HTMLPurifier_AttrDefHarness $this->assertDef('text-transform:capitalize;'); $this->assertDef('background-color:rgb(0,0,255);'); $this->assertDef('background-color:transparent;'); - $this->assertDef('background:#333 url(\'chess.png\') repeat fixed 50% top;'); + $this->assertDef('background:#333 url("chess.png") repeat fixed 50% top;'); $this->assertDef('color:#F00;'); $this->assertDef('border-top-color:#F00;'); $this->assertDef('border-color:#F00 #FF0;'); @@ -62,7 +62,7 @@ class HTMLPurifier_AttrDef_CSSTest extends HTMLPurifier_AttrDefHarness $this->assertDef('width:-50px;', false); $this->assertDef('text-decoration:underline;'); $this->assertDef('font-family:sans-serif;'); - $this->assertDef('font-family:Gill, \'Times New Roman\', sans-serif;'); + $this->assertDef('font-family:Gill, "Times New Roman", sans-serif;'); $this->assertDef('font:12px serif;'); $this->assertDef('border:1px solid #000;'); $this->assertDef('border-bottom:2em double #FF00FA;'); @@ -73,9 +73,9 @@ class HTMLPurifier_AttrDef_CSSTest extends HTMLPurifier_AttrDefHarness $this->assertDef('vertical-align:12px;'); $this->assertDef('vertical-align:50%;'); $this->assertDef('table-layout:fixed;'); - $this->assertDef('list-style-image:url(\'nice.jpg\');'); - $this->assertDef('list-style:disc url(\'nice.jpg\') inside;'); - $this->assertDef('background-image:url(\'foo.jpg\');'); + $this->assertDef('list-style-image:url("nice.jpg");'); + $this->assertDef('list-style:disc url("nice.jpg") inside;'); + $this->assertDef('background-image:url("foo.jpg");'); $this->assertDef('background-image:none;'); $this->assertDef('background-repeat:repeat-y;'); $this->assertDef('background-attachment:fixed;'); diff --git a/tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php b/tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php index 9c26d4c9..e93d65e6 100644 --- a/tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php +++ b/tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php @@ -81,7 +81,7 @@ class HTMLPurifier_Filter_ExtractStyleBlocksTest extends HTMLPurifier_Harness function test_cleanCSS_angledBrackets() { $this->assertCleanCSS( ".class {\nfont-family:'';\n}", - ".class {\nfont-family:'\\3C /style\\3E ';\n}" + ".class {\nfont-family:\"\\3C /style\\3E \";\n}" ); } @@ -99,14 +99,14 @@ class HTMLPurifier_Filter_ExtractStyleBlocksTest extends HTMLPurifier_Harness function test_cleanCSS_escapeCodes() { $this->assertCleanCSS( - ".class {\nfont-family:'\\3C /style\\3E ';\n}" + ".class {\nfont-family:\"\\3C /style\\3E \";\n}" ); } function test_cleanCSS_noEscapeCodes() { $this->config->set('Filter.ExtractStyleBlocks.Escaping', false); $this->assertCleanCSS( - ".class {\nfont-family:'';\n}" + ".class {\nfont-family:\"\";\n}" ); } diff --git a/tests/HTMLPurifier/HTMLT/munge-extra.htmlt b/tests/HTMLPurifier/HTMLT/munge-extra.htmlt index c10109ed..4b1c70a9 100644 --- a/tests/HTMLPurifier/HTMLT/munge-extra.htmlt +++ b/tests/HTMLPurifier/HTMLT/munge-extra.htmlt @@ -7,5 +7,5 @@ URI.MungeResources = true example.com --EXPECT-- Link -example.com +example.com --# vim: et sw=4 sts=4 diff --git a/tests/HTMLPurifier/HTMLT/shift-jis-preserve-yen.htmlt b/tests/HTMLPurifier/HTMLT/shift-jis-preserve-yen.htmlt index f22417c0..6e9640be 100644 --- a/tests/HTMLPurifier/HTMLT/shift-jis-preserve-yen.htmlt +++ b/tests/HTMLPurifier/HTMLT/shift-jis-preserve-yen.htmlt @@ -4,5 +4,5 @@ if (!function_exists('iconv')) return true; Core.Encoding = "Shift_JIS" Core.EscapeNonASCIICharacters = true --HTML-- -111 +111 --# vim: et sw=4 sts=4 diff --git a/tests/HTMLPurifier/HTMLT/shift-jis-remove-yen.htmlt b/tests/HTMLPurifier/HTMLT/shift-jis-remove-yen.htmlt index 6c2d3bc4..dd6dee6c 100644 --- a/tests/HTMLPurifier/HTMLT/shift-jis-remove-yen.htmlt +++ b/tests/HTMLPurifier/HTMLT/shift-jis-remove-yen.htmlt @@ -3,7 +3,7 @@ if (!function_exists('iconv')) return true; --INI-- Core.Encoding = Shift_JIS --HTML-- -111 +111 --EXPECT-- -111 +111 --# vim: et sw=4 sts=4 diff --git a/tests/HTMLPurifier/HTMLT/tidy-background.htmlt b/tests/HTMLPurifier/HTMLT/tidy-background.htmlt index e9438b84..08bda267 100644 --- a/tests/HTMLPurifier/HTMLT/tidy-background.htmlt +++ b/tests/HTMLPurifier/HTMLT/tidy-background.htmlt @@ -1,5 +1,5 @@ --HTML--
asdf
--EXPECT-- -
asdf
+
asdf
--# vim: et sw=4 sts=4 -- 2.11.4.GIT