From 412bae13b56a6a10d37b618efa2695ee6a4aae71 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Tue, 17 Sep 2013 00:48:42 -0700 Subject: [PATCH] Fix quadratic behavior in DOMLex due to array_shift. Signed-off-by: Edward Z. Yang --- NEWS | 2 + configdoc/usage.xml | 246 ++++++++++++++++----------------- library/HTMLPurifier.includes.php | 1 + library/HTMLPurifier.safe-includes.php | 1 + library/HTMLPurifier/Lexer/DOMLex.php | 10 +- library/HTMLPurifier/Queue.php | 56 ++++++++ 6 files changed, 188 insertions(+), 128 deletions(-) create mode 100644 library/HTMLPurifier/Queue.php diff --git a/NEWS b/NEWS index 4eb34c6c..e4a92264 100644 --- a/NEWS +++ b/NEWS @@ -17,6 +17,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier Michael Gusev for fixing. ! New directive %Core.AllowHostnameUnderscore which allows underscores in hostnames. +- Eliminate quadratic behavior in DOMLex by using a proper queue. + Thanks Ole Laursen for noticing this. - Made Linkify URL parser a bit less permissive, so that non-breaking spaces and commas are not included as part of URL. Thanks nAS for fixing. - Fix some bad interactions with %HTML.Allowed and injectors. Thanks diff --git a/configdoc/usage.xml b/configdoc/usage.xml index 6349b26d..79e48d90 100644 --- a/configdoc/usage.xml +++ b/configdoc/usage.xml @@ -2,551 +2,551 @@ - 150 + 162 - 81 - 284 + 85 + 315 - 53 - 73 - 348 + 67 + 87 + 385 - 50 + 57 - 157 + 226 - 215 + 319 - 219 + 323 - 223 + 327 - 227 + 331 - 302 + 447 - 316 + 463 - 59 + 66 - 83 + 119 - 85 + 123 - 88 + 128 - 93 + 133 - 337 - 372 + 374 + 422 - 341 - 379 + 382 + 433 - 373 + 423 - 61 + 70 - 62 + 71 - 63 + 72 - 64 + 73 - 93 + 104 - 107 + 122 - 266 + 297 - 108 + 123 - 222 + 263 - 230 + 273 - 247 + 291 - 248 + 292 - 251 + 295 - 342 + 399 - 343 + 400 - 204 + 234 - 271 + 302 - 27 + 37 - 36 + 47 - 23 + 30 - 211 + 241 - 212 + 242 - 222 + 256 - 225 + 259 - 228 + 262 - 231 + 265 - 17 + 22 - 234 + 268 - 237 + 271 - 26 + 27 - 88 + 93 - 76 + 80 - 80 + 84 - 48 + 62 - 282 + 313 - 303 + 334 - 60 + 65 - 12 + 46 - 70 + 76 - 81 + 89 - 71 + 77 - 78 + 84 - 41 + 48 - 42 + 49 - 28 + 47 - 12 + 19 - 12 + 19 - 50 + 64 - 18 + 33 - 19 + 34 - 15 + 32 - 30 + 41 - 36 + 51 - 38 - 41 + 53 + 58 - 64 + 89 - 30 + 46 - 61 + 77 - 80 + 96 - 13 + 22 - 18 + 24 - 20 + 27 - 19 + 27 - 25 + 33 - 33 + 41 - 11 + 18 - 13 + 19 - 38 + 53 - 62 + 86 - 91 + 171 - 107 - 124 + 188 + 206 - 55 + 94 - 79 + 122 - 277 + 327 - 17 + 28 - 23 + 48 - 14 + 21 - 13 + 18 - 19 + 24 - 45 + 50 - 49 + 54 - 50 + 55 - 15 + 31 - 15 + 46 - 16 + 47 - 44 + 54 - 70 + 84 - 57 + 64 - 53 + 66 - 19 + 26 - 24 + 31 - 25 + 32 - 28 + 35 - 29 + 36 - 12 + 25 - 14 + 48 - 15 + 49 - 18 + 35 diff --git a/library/HTMLPurifier.includes.php b/library/HTMLPurifier.includes.php index 18cb0013..f18db992 100644 --- a/library/HTMLPurifier.includes.php +++ b/library/HTMLPurifier.includes.php @@ -57,6 +57,7 @@ require 'HTMLPurifier/Lexer.php'; require 'HTMLPurifier/PercentEncoder.php'; require 'HTMLPurifier/PropertyList.php'; require 'HTMLPurifier/PropertyListIterator.php'; +require 'HTMLPurifier/Queue.php'; require 'HTMLPurifier/Strategy.php'; require 'HTMLPurifier/StringHash.php'; require 'HTMLPurifier/StringHashParser.php'; diff --git a/library/HTMLPurifier.safe-includes.php b/library/HTMLPurifier.safe-includes.php index e23a81a7..b9847559 100644 --- a/library/HTMLPurifier.safe-includes.php +++ b/library/HTMLPurifier.safe-includes.php @@ -51,6 +51,7 @@ require_once $__dir . '/HTMLPurifier/Lexer.php'; require_once $__dir . '/HTMLPurifier/PercentEncoder.php'; require_once $__dir . '/HTMLPurifier/PropertyList.php'; require_once $__dir . '/HTMLPurifier/PropertyListIterator.php'; +require_once $__dir . '/HTMLPurifier/Queue.php'; require_once $__dir . '/HTMLPurifier/Strategy.php'; require_once $__dir . '/HTMLPurifier/StringHash.php'; require_once $__dir . '/HTMLPurifier/StringHashParser.php'; diff --git a/library/HTMLPurifier/Lexer/DOMLex.php b/library/HTMLPurifier/Lexer/DOMLex.php index c07ef4c5..72075445 100644 --- a/library/HTMLPurifier/Lexer/DOMLex.php +++ b/library/HTMLPurifier/Lexer/DOMLex.php @@ -92,11 +92,11 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer protected function tokenizeDOM($node, &$tokens) { $level = 0; - $nodes = array($level => array($node)); + $nodes = array($level => new HTMLPurifier_Queue(array($node))); $closingNodes = array(); do { - while (!empty($nodes[$level])) { - $node = array_shift($nodes[$level]); // FIFO + while (!$nodes[$level]->isEmpty()) { + $node = $nodes[$level]->shift(); // FIFO $collect = $level > 0 ? true : false; $needEndingTag = $this->createStartNode($node, $tokens, $collect); if ($needEndingTag) { @@ -104,9 +104,9 @@ class HTMLPurifier_Lexer_DOMLex extends HTMLPurifier_Lexer } if ($node->childNodes && $node->childNodes->length) { $level++; - $nodes[$level] = array(); + $nodes[$level] = new HTMLPurifier_Queue(); foreach ($node->childNodes as $childNode) { - array_push($nodes[$level], $childNode); + $nodes[$level]->push($childNode); } } } diff --git a/library/HTMLPurifier/Queue.php b/library/HTMLPurifier/Queue.php new file mode 100644 index 00000000..f58db904 --- /dev/null +++ b/library/HTMLPurifier/Queue.php @@ -0,0 +1,56 @@ +input = $input; + $this->output = array(); + } + + /** + * Shifts an element off the front of the queue. + */ + public function shift() { + if (empty($this->output)) { + $this->output = array_reverse($this->input); + $this->input = array(); + } + if (empty($this->output)) { + return NULL; + } + return array_pop($this->output); + } + + /** + * Pushes an element onto the front of the queue. + */ + public function push($x) { + array_push($this->input, $x); + } + + /** + * Checks if it's empty. + */ + public function isEmpty() { + return empty($this->input) && empty($this->output); + } +} -- 2.11.4.GIT