From 94468f3c2494301501a014a69feb9a6bf8c67bb9 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Tue, 3 Jan 2012 20:40:17 +0800 Subject: [PATCH] Remove PEARSax3 lexer. Signed-off-by: Edward Z. Yang --- NEWS | 1 + library/HTMLPurifier/Lexer/PEARSax3.php | 139 -------------------------------- tests/HTMLPurifier/LexerTest.php | 62 ++------------ 3 files changed, 8 insertions(+), 194 deletions(-) delete mode 100644 library/HTMLPurifier/Lexer/PEARSax3.php diff --git a/NEWS b/NEWS index df347552..0669a313 100644 --- a/NEWS +++ b/NEWS @@ -10,6 +10,7 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier ========================== 4.4.0, unknown release date +# Removed PEARSax3 handler. # URI.Munge now munges URIs inside the same host that go from https to http. Reported by Neike Taika-Tessaro. # Core.EscapeNonASCIICharacters now always transforms entities to diff --git a/library/HTMLPurifier/Lexer/PEARSax3.php b/library/HTMLPurifier/Lexer/PEARSax3.php deleted file mode 100644 index 1d358c7b..00000000 --- a/library/HTMLPurifier/Lexer/PEARSax3.php +++ /dev/null @@ -1,139 +0,0 @@ -tokens = array(); - $this->last_token_was_empty = false; - - $string = $this->normalize($string, $config, $context); - - $this->parent_handler = set_error_handler(array($this, 'muteStrictErrorHandler')); - - $parser = new XML_HTMLSax3(); - $parser->set_object($this); - $parser->set_element_handler('openHandler','closeHandler'); - $parser->set_data_handler('dataHandler'); - $parser->set_escape_handler('escapeHandler'); - - // doesn't seem to work correctly for attributes - $parser->set_option('XML_OPTION_ENTITIES_PARSED', 1); - - $parser->parse($string); - - restore_error_handler(); - - return $this->tokens; - - } - - /** - * Open tag event handler, interface is defined by PEAR package. - */ - public function openHandler(&$parser, $name, $attrs, $closed) { - // entities are not resolved in attrs - foreach ($attrs as $key => $attr) { - $attrs[$key] = $this->parseData($attr); - } - if ($closed) { - $this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs); - $this->last_token_was_empty = true; - } else { - $this->tokens[] = new HTMLPurifier_Token_Start($name, $attrs); - } - $this->stack[] = $name; - return true; - } - - /** - * Close tag event handler, interface is defined by PEAR package. - */ - public function closeHandler(&$parser, $name) { - // HTMLSax3 seems to always send empty tags an extra close tag - // check and ignore if you see it: - // [TESTME] to make sure it doesn't overreach - if ($this->last_token_was_empty) { - $this->last_token_was_empty = false; - return true; - } - $this->tokens[] = new HTMLPurifier_Token_End($name); - if (!empty($this->stack)) array_pop($this->stack); - return true; - } - - /** - * Data event handler, interface is defined by PEAR package. - */ - public function dataHandler(&$parser, $data) { - $this->last_token_was_empty = false; - $this->tokens[] = new HTMLPurifier_Token_Text($data); - return true; - } - - /** - * Escaped text handler, interface is defined by PEAR package. - */ - public function escapeHandler(&$parser, $data) { - if (strpos($data, '--') === 0) { - // remove trailing and leading double-dashes - $data = substr($data, 2); - if (strlen($data) >= 2 && substr($data, -2) == "--") { - $data = substr($data, 0, -2); - } - if (isset($this->stack[sizeof($this->stack) - 1]) && - $this->stack[sizeof($this->stack) - 1] == "style") { - $this->tokens[] = new HTMLPurifier_Token_Text($data); - } else { - $this->tokens[] = new HTMLPurifier_Token_Comment($data); - } - $this->last_token_was_empty = false; - } - // CDATA is handled elsewhere, but if it was handled here: - //if (strpos($data, '[CDATA[') === 0) { - // $this->tokens[] = new HTMLPurifier_Token_Text( - // substr($data, 7, strlen($data) - 9) ); - //} - return true; - } - - /** - * An error handler that mutes strict errors - */ - public function muteStrictErrorHandler($errno, $errstr, $errfile=null, $errline=null, $errcontext=null) { - if ($errno == E_STRICT) return; - return call_user_func($this->parent_handler, $errno, $errstr, $errfile, $errline, $errcontext); - } - -} - -// vim: et sw=4 sts=4 diff --git a/tests/HTMLPurifier/LexerTest.php b/tests/HTMLPurifier/LexerTest.php index edc9e975..42a59aeb 100644 --- a/tests/HTMLPurifier/LexerTest.php +++ b/tests/HTMLPurifier/LexerTest.php @@ -7,13 +7,6 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness public function __construct() { parent::__construct(); - if ($GLOBALS['HTMLPurifierTest']['PEAR'] && - // PEARSax3 is not maintained and throws loads of DEPRECATED - // errors in PHP 5.3 - version_compare(PHP_VERSION, '5.3', '<')) { - require_once 'HTMLPurifier/Lexer/PEARSax3.php'; - $this->_has_pear = true; - } if ($GLOBALS['HTMLPurifierTest']['PH5P']) { require_once 'HTMLPurifier/Lexer/PH5P.php'; } @@ -158,7 +151,6 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness function assertTokenization($input, $expect, $alt_expect = array()) { $lexers = array(); $lexers['DirectLex'] = new HTMLPurifier_Lexer_DirectLex(); - if ($this->_has_pear) $lexers['PEARSax3'] = new HTMLPurifier_Lexer_PEARSax3(); if (class_exists('DOMDocument')) { $lexers['DOMLex'] = new HTMLPurifier_Lexer_DOMLex(); $lexers['PH5P'] = new HTMLPurifier_Lexer_PH5P(); @@ -299,7 +291,6 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness array( // I like our behavior better, but it's non-standard 'DOMLex' => array( new HTMLPurifier_Token_Empty('a', array('href'=>'')) ), - 'PEARSax3' => array( new HTMLPurifier_Token_Start('a', array('href'=>'')) ), 'PH5P' => false, // total barfing, grabs scaffolding too ) ); @@ -313,12 +304,11 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness ), array( // some parsers will separate entities out - 'PEARSax3' => $split = array( + 'PH5P' => array( new HTMLPurifier_Token_Text('<'), new HTMLPurifier_Token_Text('b'), new HTMLPurifier_Token_Text('>'), ), - 'PH5P' => $split, ) ); } @@ -329,10 +319,9 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness array( new HTMLPurifier_Token_Empty('a') ), array( // we barf on this input - 'DirectLex' => $tokens = array( + 'DirectLex' => array( new HTMLPurifier_Token_Start('a', array('"' => '')) ), - 'PEARSax3' => $tokens, 'PH5P' => false, // behavior varies; handle this personally ) ); @@ -363,10 +352,7 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness function test_tokenizeHTML_escapedQuote() { $this->assertTokenization( '"', - array( new HTMLPurifier_Token_Text('"') ), - array( - 'PEARSax3' => false, // PEAR barfs on this - ) + array( new HTMLPurifier_Token_Text('"') ) ); } @@ -375,8 +361,7 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness 'can't get me!]]>', array( new HTMLPurifier_Token_Text('You can't get me!') ), array( - // PEAR splits up all of the CDATA - 'PEARSax3' => $split = array( + 'PH5P' => array( new HTMLPurifier_Token_Text('You '), new HTMLPurifier_Token_Text('<'), new HTMLPurifier_Token_Text('b'), @@ -389,7 +374,6 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness new HTMLPurifier_Token_Text('>'), new HTMLPurifier_Token_Text(' get me!'), ), - 'PH5P' => $split, ) ); } @@ -406,11 +390,10 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness '', array( new HTMLPurifier_Token_Text("→") ), array( - 'PEARSax3' => $split = array( + 'PH5P' => array( new HTMLPurifier_Token_Text('&'), new HTMLPurifier_Token_Text('rarr;'), ), - 'PH5P' => $split, ) ); } @@ -457,7 +440,6 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness new HTMLPurifier_Token_Text('Whoa! <3 That\'s not good >.>'), new HTMLPurifier_Token_End('b'), ), - 'PEARSax3' => false, // totally mangled 'PH5P' => array( // interesting grouping new HTMLPurifier_Token_Start('b'), new HTMLPurifier_Token_Text('Whoa! '), @@ -475,9 +457,6 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness array( new HTMLPurifier_Token_Comment(' This >< comment '), new HTMLPurifier_Token_Empty('br'), - ), - array( - 'PEARSax3' => false, ) ); } @@ -488,7 +467,6 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness array( new HTMLPurifier_Token_Comment(' This >< comment') ), array( 'DOMLex' => false, - 'PEARSax3' => false, 'PH5P' => false, ) ); @@ -505,7 +483,6 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness new HTMLPurifier_Token_End('script'), ), array( - 'PEARSax3' => false, // PH5P, for some reason, bubbles the script to 'PH5P' => false, ) @@ -515,10 +492,7 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness function test_tokenizeHTML_entitiesInComment() { $this->assertTokenization( '', - array( new HTMLPurifier_Token_Comment(' This comment < < & ') ), - array( - 'PEARSax3' => false - ) + array( new HTMLPurifier_Token_Comment(' This comment < < & ') ) ); } @@ -531,8 +505,7 @@ class HTMLPurifier_LexerTest extends HTMLPurifier_Harness new HTMLPurifier_Token_Start('a', array('href' => '')), new HTMLPurifier_Token_Text('<'), new HTMLPurifier_Token_Text('">'), - ), - 'PEARSax3' => false, + ) ) ); } @@ -595,7 +568,6 @@ div {} ), array( 'DirectLex' => $alt_expect, - 'PEARSax3' => $alt_expect, ) ); } @@ -615,11 +587,6 @@ div {} new HTMLPurifier_Token_Text('<3'), new HTMLPurifier_Token_Empty('br'), ), - 'PEARSax3' => array( - // bah too lazy to fix this - new HTMLPurifier_Token_Empty('br'), - new HTMLPurifier_Token_Empty('3 array( - // also too lazy to fix - new HTMLPurifier_Token_Start('b'), - new HTMLPurifier_Token_Empty('<<'), - new HTMLPurifier_Token_Text('b>'), - ), ) ); } @@ -666,13 +627,6 @@ div {} new HTMLPurifier_Token_Text('test'), new HTMLPurifier_Token_End('b'), ), - 'PEARSax3' => array( - // totally doing the wrong thing here - new HTMLPurifier_Token_Text(' '), - new HTMLPurifier_Token_Start('b'), - new HTMLPurifier_Token_Text('test'), - new HTMLPurifier_Token_End('b'), - ), ) ); } @@ -694,7 +648,6 @@ div {} ), array( 'PH5P' => $alt_tokens, - 'PEARSax3' => $alt_tokens, ) ); } @@ -777,7 +730,6 @@ div {} ), array( 'DirectLex' => $start, - 'PEARSax3' => $start, ) ); } -- 2.11.4.GIT