library/HTMLPurifier/Token.php

   1 <?php
   2
   3 /**
   4  * Defines a set of immutable value object tokens for HTML representation.
   5  *
   6  * @file
   7  */
   8
   9 /**
  10  * Abstract base token class that all others inherit from.
  11  */
  12 class HTMLPurifier_Token {
  13     var $type; /**< Type of node to bypass <tt>is_a()</tt>. @public */
  14
  15     /**
  16      * Copies the tag into a new one (clone substitute).
  17      * @return Copied token
  18      */
  19     function copy() {
  20         trigger_error('Cannot copy abstract class', E_USER_ERROR);
  21     }
  22 }
  23
  24 /**
  25  * Abstract class of a tag token (start, end or empty), and its behavior.
  26  */
  27 class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract
  28 {
  29     /**
  30      * Static bool marker that indicates the class is a tag.
  31      *
  32      * This allows us to check objects with <tt>!empty($obj->is_tag)</tt>
  33      * without having to use a function call <tt>is_a()</tt>.
  34      *
  35      * @public
  36      */
  37     var $is_tag = true;
  38
  39     /**
  40      * The lower-case name of the tag, like 'a', 'b' or 'blockquote'.
  41      *
  42      * @note Strictly speaking, XML tags are case sensitive, so we shouldn't
  43      * be lower-casing them, but these tokens cater to HTML tags, which are
  44      * insensitive.
  45      *
  46      * @public
  47      */
  48     var $name;
  49
  50     /**
  51      * Associative array of the tag's attributes.
  52      */
  53     var $attributes = array();
  54
  55     /**
  56      * Non-overloaded constructor, which lower-cases passed tag name.
  57      *
  58      * @param $name         String name.
  59      * @param $attributes   Associative array of attributes.
  60      */
  61     function HTMLPurifier_Token_Tag($name, $attributes = array()) {
  62         //if ($attributes === null) var_dump(debug_backtrace());
  63         $this->name = ctype_lower($name) ? $name : strtolower($name);
  64         foreach ($attributes as $key => $value) {
  65             // normalization only necessary when key is not lowercase
  66             if (!ctype_lower($key)) {
  67                 $new_key = strtolower($key);
  68                 if (!isset($attributes[$new_key])) {
  69                     $attributes[$new_key] = $attributes[$key];
  70                 }
  71                 if ($new_key !== $key) {
  72                     unset($attributes[$key]);
  73                 }
  74             }
  75         }
  76         $this->attributes = $attributes;
  77     }
  78 }
  79
  80 /**
  81  * Concrete start token class.
  82  */
  83 class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
  84 {
  85     var $type = 'start';
  86     function copy() {
  87         return new HTMLPurifier_Token_Start($this->name, $this->attributes);
  88     }
  89 }
  90
  91 /**
  92  * Concrete empty token class.
  93  */
  94 class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
  95 {
  96     var $type = 'empty';
  97     function copy() {
  98         return new HTMLPurifier_Token_Empty($this->name, $this->attributes);
  99     }
 100 }
 101
 102 /**
 103  * Concrete end token class.
 104  *
 105  * @warning This class accepts attributes even though end tags cannot. This
 106  * is for optimization reasons, as under normal circumstances, the Lexers
 107  * do not pass attributes.
 108  */
 109 class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag
 110 {
 111     var $type = 'end';
 112     function copy() {
 113         return new HTMLPurifier_Token_End($this->name);
 114     }
 115 }
 116
 117 /**
 118  * Concrete text token class.
 119  *
 120  * Text tokens comprise of regular parsed character data (PCDATA) and raw
 121  * character data (from the CDATA sections). Internally, their
 122  * data is parsed with all entities expanded. Surprisingly, the text token
 123  * does have a "tag name" called #PCDATA, which is how the DTD represents it
 124  * in permissible child nodes.
 125  */
 126 class HTMLPurifier_Token_Text extends HTMLPurifier_Token
 127 {
 128
 129     var $name = '#PCDATA'; /**< PCDATA tag name compatible with DTD. @public */
 130     var $type = 'text';
 131     var $data; /**< Parsed character data of text. @public */
 132     var $is_whitespace; /**< Bool indicating if node is whitespace. @public */
 133
 134     /**
 135      * Constructor, accepts data and determines if it is whitespace.
 136      *
 137      * @param $data String parsed character data.
 138      */
 139     function HTMLPurifier_Token_Text($data) {
 140         $this->data = $data;
 141         $this->is_whitespace = ctype_space($data);
 142     }
 143     function copy() {
 144         return new HTMLPurifier_Token_Text($this->data);
 145     }
 146
 147 }
 148
 149 /**
 150  * Concrete comment token class. Generally will be ignored.
 151  */
 152 class HTMLPurifier_Token_Comment extends HTMLPurifier_Token
 153 {
 154     var $data; /**< Character data within comment. @public */
 155     var $type = 'comment';
 156     /**
 157      * Transparent constructor.
 158      *
 159      * @param $data String comment data.
 160      */
 161     function HTMLPurifier_Token_Comment($data) {
 162         $this->data = $data;
 163     }
 164     function copy() {
 165         return new HTMLPurifier_Token_Comment($this->data);
 166     }
 167 }
 168
 169 ?>