From 75e52a12a6773b5b3b1d7162cb814b195588fd5c Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Sun, 24 Jun 2007 04:22:28 +0000 Subject: [PATCH] Make context errors more friendly; factor out disabled; fix broken test cases; update TODO. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1220 48356398-32a2-884e-a903-53898d9a118a --- TODO | 13 +++-- library/HTMLPurifier/Context.php | 8 +-- library/HTMLPurifier/Strategy/MakeWellFormed.php | 59 +++++++++++++--------- tests/HTMLPurifier/ContextTest.php | 6 +-- tests/HTMLPurifier/Strategy/MakeWellFormedTest.php | 2 +- 5 files changed, 51 insertions(+), 37 deletions(-) diff --git a/TODO b/TODO index 9ec59074..dd359c0e 100644 --- a/TODO +++ b/TODO @@ -43,14 +43,10 @@ TODO List AttrDef class) # More control over allowed CSS properties (maybe modularize it in the same fashion!) - # Formatters for plaintext (COMPLEX) - - Auto-paragraphing (be sure to leverage fact that we know when things - shouldn't be paragraphed, such as lists and tables). - - Linkify URLs + # Formatters for plaintext - Smileys - Linkification for HTML Purifier docs: notably configuration and classes - - Allow tags to be "armored", an internal flag that protects them - from validation and passes them out unharmed + - Standardize token armor for all areas of armor - Fixes for Firefox's inability to handle COL alignment props (Bug 915) - Automatically add non-breaking spaces to empty table cells when empty-cells:show is applied to have compatibility with Internet Explorer @@ -68,6 +64,8 @@ Ongoing - Lots of profiling, make it faster! - Plugins for major CMSes (COMPLEX) - WordPress (mostly written, needs beta-testing) + - phpBB + - Phorum - eFiction - more! (look for ones that use WYSIWYGs) - Complete basic smoketests @@ -76,7 +74,8 @@ Unknown release (on a scratch-an-itch basis) ? Semi-lossy dumb alternate character encoding transfor ? Have 'lang' attribute be checked against official lists, achieved by encoding all characters that have string entity equivalents - - Explain how to use HTML Purifier in non-PHP languages + - Explain how to use HTML Purifier in non-PHP languages / create + a simple command line stub - Abstract ChildDef_BlockQuote to work with all elements that only allow blocks in them, required or optional - Reorganize Unit Tests diff --git a/library/HTMLPurifier/Context.php b/library/HTMLPurifier/Context.php index ce6fe51e..c5ad8236 100644 --- a/library/HTMLPurifier/Context.php +++ b/library/HTMLPurifier/Context.php @@ -2,6 +2,8 @@ /** * Registry object that contains information about the current context. + * @warning Is a bit buggy when variables are set to null: it thinks + * they don't exist! So use false instead, please. */ class HTMLPurifier_Context { @@ -19,7 +21,7 @@ class HTMLPurifier_Context */ function register($name, &$ref) { if (isset($this->_storage[$name])) { - trigger_error('Name collision, cannot re-register', + trigger_error("Name $name produces collision, cannot re-register", E_USER_ERROR); return; } @@ -32,7 +34,7 @@ class HTMLPurifier_Context */ function &get($name) { if (!isset($this->_storage[$name])) { - trigger_error('Attempted to retrieve non-existent variable', + trigger_error("Attempted to retrieve non-existent variable $name", E_USER_ERROR); $var = null; // so we can return by reference return $var; @@ -46,7 +48,7 @@ class HTMLPurifier_Context */ function destroy($name) { if (!isset($this->_storage[$name])) { - trigger_error('Attempted to destroy non-existent variable', + trigger_error("Attempted to destroy non-existent variable $name", E_USER_ERROR); return; } diff --git a/library/HTMLPurifier/Strategy/MakeWellFormed.php b/library/HTMLPurifier/Strategy/MakeWellFormed.php index ee989a3e..3029f62e 100644 --- a/library/HTMLPurifier/Strategy/MakeWellFormed.php +++ b/library/HTMLPurifier/Strategy/MakeWellFormed.php @@ -50,48 +50,58 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags'); + // -- begin INJECTOR -- + // factor this stuff out to its own class + $injector = array(); $injector_skip = array(); - $injector_disabled = array(); if ($config->get('Core', 'AutoParagraph')) { $injector[] = new HTMLPurifier_Injector_AutoParagraph(); - $injector_skip[] = 0; - $injector_disabled[] = false; + // decrement happens first, so set to one so we start at zero + $injector_skip[] = 1; } if ($config->get('Core', 'AutoLinkify')) { $injector[] = new HTMLPurifier_Injector_Linkify(); - $injector_skip[] = 0; - $injector_disabled[] = false; + $injector_skip[] = 1; } - $current_injector = 0; + // array index of the injector that resulted in an array + // substitution. This enables processTokens() to know which + // injectors are affected by the added tokens and which are + // not (namely, the ones after the current injector are not + // affected) + $current_injector = false; $context->register('Injector', $injector); - $context->register('InjectorSkip', $injector_skip); $context->register('CurrentInjector', $current_injector); + // number of tokens to skip + 1 + // before processing, this gets decremented: if it equals zero, + // it means the injector is active and is processing tokens, if + // it is greater than zero, then it is inactive, presumably having + // been the source of the tokens + $context->register('InjectorSkip', $injector_skip); + + // -- end INJECTOR -- + for ($tokens_index = 0; isset($tokens[$tokens_index]); $tokens_index++) { // if all goes well, this token will be passed through unharmed $token = $tokens[$tokens_index]; foreach ($injector as $i => $x) { - if ($injector_skip[$i] > 0) { - $injector_skip[$i]--; - $injector_disabled[$i] = true; - } else { - $injector_disabled[$i] = false; - } + if ($injector_skip[$i] > 0) $injector_skip[$i]--; } // quick-check: if it's not a tag, no need to process if (empty( $token->is_tag )) { + // duplicated with handleStart if ($token->type === 'text') { foreach ($injector as $i => $x) { - if (!$injector_disabled[$i]) { + if (!$injector_skip[$i]) { $x->handleText($token, $config, $context); } if (is_array($token)) { @@ -149,8 +159,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $current_nesting[] = $parent; // undo the pop } + // injectors foreach ($injector as $i => $x) { - if (!$injector_disabled[$i]) { + if (!$injector_skip[$i]) { $x->handleStart($token, $config, $context); } if (is_array($token)) { @@ -236,12 +247,16 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $context->destroy('InputIndex'); $context->destroy('OutputTokens'); + $context->destroy('Injector'); + $context->destroy('CurrentInjector'); + $context->destroy('InjectorSkip'); + return $result; } function processToken($token, $config, &$context) { if (is_array($token)) { - // the original token was overloaded by a formatter, time + // the original token was overloaded by an injector, time // to some fancy acrobatics $tokens =& $context->get('InputTokens'); @@ -250,13 +265,11 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy // re-processed array_splice($tokens, $tokens_index--, 1, $token); - // this will be a bit more complicated when we add more formatters - // we need to prevent the same formatter from running twice on it - $injector_skip =& $context->get('InjectorSkip'); - $injector =& $context->get('Injector'); + // adjust the injector skips based on the array substitution + $injector_skip =& $context->get('InjectorSkip'); $current_injector =& $context->get('CurrentInjector'); - $offset = count($token); + $offset = count($token) + 1; for ($i = 0; $i <= $current_injector; $i++) { $injector_skip[$i] += $offset; } @@ -269,9 +282,9 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy if ($token->type == 'start') { $current_nesting[] = $token; } elseif ($token->type == 'end') { - // theoretical: this isn't used because performing + // theoretical: this code doesn't get run because performing // the calculations inline is more efficient, and - // end tokens currently do not cause a handler invocation + // end tokens (currently) do not cause a handler invocation array_pop($current_nesting); } } diff --git a/tests/HTMLPurifier/ContextTest.php b/tests/HTMLPurifier/ContextTest.php index 195a5030..c4616573 100644 --- a/tests/HTMLPurifier/ContextTest.php +++ b/tests/HTMLPurifier/ContextTest.php @@ -30,11 +30,11 @@ class HTMLPurifier_ContextTest extends UnitTestCase $this->context->destroy('IDAccumulator'); $this->assertFalse($this->context->exists('IDAccumulator')); - $this->expectError('Attempted to retrieve non-existent variable'); + $this->expectError('Attempted to retrieve non-existent variable IDAccumulator'); $accumulator_3 =& $this->context->get('IDAccumulator'); $this->assertNull($accumulator_3); - $this->expectError('Attempted to destroy non-existent variable'); + $this->expectError('Attempted to destroy non-existent variable IDAccumulator'); $this->context->destroy('IDAccumulator'); } @@ -44,7 +44,7 @@ class HTMLPurifier_ContextTest extends UnitTestCase $var = true; $this->context->register('OnceOnly', $var); - $this->expectError('Name collision, cannot re-register'); + $this->expectError('Name OnceOnly produces collision, cannot re-register'); $this->context->register('OnceOnly', $var); // destroy it, now registration is okay diff --git a/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php b/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php index 5991ebb7..92d19f58 100644 --- a/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php +++ b/tests/HTMLPurifier/Strategy/MakeWellFormedTest.php @@ -235,7 +235,7 @@ http://dev.example.com', $this->assertResult( 'http://example.com
http://example.com
', - '

http://example.com

http://example.com
' + '

http://example.com

http://example.com
' ); $this->assertResult( -- 2.11.4.GIT