1 --- C:\Users\Edward\Webs\htmlpurifier\maintenance\PH5P.php 2008-07-07 09:12:12.000000000 -0400
2 +++ C:\Users\Edward\Webs\htmlpurifier\maintenance/PH5P.new.php 2008-12-06 02:29:34.988800000 -0500
5 public function __construct($data) {
6 $data = str_replace("\r\n", "\n", $data);
7 - $date = str_replace("\r", null, $data);
8 + $data = str_replace("\r", null, $data);
13 // If nothing is returned, emit a U+0026 AMPERSAND character token.
14 // Otherwise, emit the character token that was returned.
15 $char = (!$entity) ? '&' : $entity;
16 - $this->emitToken($char);
17 + $this->emitToken(array(
18 + 'type' => self::CHARACTR,
22 // Finally, switch to the data state.
23 $this->state = 'data';
25 } elseif($char === '&') {
26 /* U+0026 AMPERSAND (&)
27 Switch to the entity in attribute value state. */
28 - $this->entityInAttributeValueState('non');
29 + $this->entityInAttributeValueState();
31 } elseif($char === '>') {
32 /* U+003E GREATER-THAN SIGN (>)
37 - $this->emitToken($char);
38 + $last = count($this->token['attr']) - 1;
39 + $this->token['attr'][$last]['value'] .= $char;
42 private function bogusCommentState() {
43 @@ -1066,6 +1070,11 @@
46 if(in_array($id, $this->entities)) {
47 + if ($e_name[$c-1] !== ';') {
48 + if ($c < $len && $e_name[$c] == ';') {
49 + $this->char++; // consume extra semicolon
56 /* Reconstruct the active formatting elements, if any. */
57 $this->reconstructActiveFormattingElements();
59 - $this->insertElement($token);
60 + $this->insertElement($token, true, true);
64 @@ -3465,7 +3474,18 @@
68 - private function insertElement($token, $append = true) {
69 + private function insertElement($token, $append = true, $check = false) {
70 + // Proprietary workaround for libxml2's limitations with tag names
72 + // Slightly modified HTML5 tag-name modification,
73 + // removing anything that's not an ASCII letter, digit, or hyphen
74 + $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']);
75 + // Remove leading hyphens and numbers
76 + $token['name'] = ltrim($token['name'], '-0..9');
77 + // In theory, this should ever be needed, but just in case
78 + if ($token['name'] === '') $token['name'] = 'span'; // arbitrary generic choice
81 $el = $this->dom->createElement($token['name']);
83 foreach($token['attr'] as $attr) {
88 - private function generateImpliedEndTags(array $exclude = array()) {
89 + private function generateImpliedEndTags($exclude = array()) {
90 /* When the steps below require the UA to generate implied end tags,
91 then, if the current node is a dd element, a dt element, an li element,
92 a p element, a td element, a th element, or a tr element, the UA must
97 - private function getElementCategory($name) {
98 + private function getElementCategory($node) {
99 + $name = $node->tagName;
100 if(in_array($name, $this->special))
101 return self::SPECIAL;