From d1c5d75027349e127e134a3629145215541ef89c Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Sat, 16 Jul 2016 05:52:45 -0700 Subject: [PATCH] Fix #73 with Attr.ID.HTML5 Signed-off-by: Edward Z. Yang --- NEWS | 1 + configdoc/usage.xml | 7 ++++- library/HTMLPurifier/AttrDef/HTML/ID.php | 32 +++++++++++++-------- library/HTMLPurifier/ConfigSchema/schema.ser | Bin 15526 -> 15598 bytes .../ConfigSchema/schema/Attr.ID.HTML5.txt | 10 +++++++ tests/HTMLPurifier/AttrDef/HTML/IDTest.php | 11 +++++++ 6 files changed, 48 insertions(+), 13 deletions(-) rewrite library/HTMLPurifier/ConfigSchema/schema.ser (97%) create mode 100644 library/HTMLPurifier/ConfigSchema/schema/Attr.ID.HTML5.txt diff --git a/NEWS b/NEWS index a93723f9..8e46860e 100644 --- a/NEWS +++ b/NEWS @@ -21,6 +21,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier ! Partial support for 'border-radius' properties when %CSS.AllowProprietary is true. The slash syntax, i.e., 'border-radius: 2em 1em 4em / 0.5em 3em' is not yet supported. +! %Attr.ID.HTML5 turns on HTML5-style ID handling. - alt truncation could result in malformed UTF-8 sequence. Don't truncate. Thanks Brandon Farber for reporting. - Linkify regex is smarter, based off of Gruber's regex. diff --git a/configdoc/usage.xml b/configdoc/usage.xml index d59b6b1c..69f9b3a5 100644 --- a/configdoc/usage.xml +++ b/configdoc/usage.xml @@ -355,9 +355,14 @@ 58 + + + 75 + + - 89 + 97 diff --git a/library/HTMLPurifier/AttrDef/HTML/ID.php b/library/HTMLPurifier/AttrDef/HTML/ID.php index 3d86efb4..4ba45610 100644 --- a/library/HTMLPurifier/AttrDef/HTML/ID.php +++ b/library/HTMLPurifier/AttrDef/HTML/ID.php @@ -72,18 +72,26 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef // we purposely avoid using regex, hopefully this is faster - if (ctype_alpha($id)) { - $result = true; - } else { - if (!ctype_alpha(@$id[0])) { + if ($config->get('Attr.ID.HTML5') === true) { + if (preg_match('/[\t\n\x0b\x0c ]/', $id)) { return false; } - // primitive style of regexps, I suppose - $trim = trim( - $id, - 'A..Za..z0..9:-._' - ); - $result = ($trim === ''); + } else { + if (ctype_alpha($id)) { + // OK + } else { + if (!ctype_alpha(@$id[0])) { + return false; + } + // primitive style of regexps, I suppose + $trim = trim( + $id, + 'A..Za..z0..9:-._' + ); + if ($trim !== '') { + return false; + } + } } $regexp = $config->get('Attr.IDBlacklistRegexp'); @@ -91,14 +99,14 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef return false; } - if (!$this->selector && $result) { + if (!$this->selector) { $id_accumulator->add($id); } // if no change was made to the ID, return the result // else, return the new id if stripping whitespace made it // valid, or return false. - return $result ? $id : false; + return $id; } } diff --git a/library/HTMLPurifier/ConfigSchema/schema.ser b/library/HTMLPurifier/ConfigSchema/schema.ser dissimilarity index 97% index 9ea0978d43c1d800bd1cf50634cd73efe120f81c..0a7a406e132dd5dfe79e4b52f8f44b7d4f8d5481 100644 GIT binary patch delta 122 zcwSpp`L1$;Iit~Jc{SOMq4A6&##Ty>B_&0Ao-TSGA-+DQO4feXn>RAH3KCXhD>(VB fsv)E4W_dL}CUHzfnO3^y){{3fN^Ex0ND%=5$N(ch delta 50 ycwT!~xvX-6IiulbKj9k2%@0Ma1Shi!dm#APf{Z4U{e)#VXQ_Q;++3config->set('Attr.ID.HTML5', true); + + $this->assertDef('123'); + $this->assertDef('x[1]'); + $this->assertDef('not ok', false); + $this->assertDef(' ', false); + $this->assertDef('', false); + } + } // vim: et sw=4 sts=4 -- 2.11.4.GIT