4 * Defines a set of immutable value object tokens for HTML representation.
10 * Abstract base token class that all others inherit from.
12 class HTMLPurifier_Token
{
13 var $type; /**< Type of node to bypass <tt>is_a()</tt>. @public */
16 * Copies the tag into a new one (clone substitute).
17 * @return Copied token
20 trigger_error('Cannot copy abstract class', E_USER_ERROR
);
25 * Abstract class of a tag token (start, end or empty), and its behavior.
27 class HTMLPurifier_Token_Tag
extends HTMLPurifier_Token
// abstract
30 * Static bool marker that indicates the class is a tag.
32 * This allows us to check objects with <tt>!empty($obj->is_tag)</tt>
33 * without having to use a function call <tt>is_a()</tt>.
40 * The lower-case name of the tag, like 'a', 'b' or 'blockquote'.
42 * @note Strictly speaking, XML tags are case sensitive, so we shouldn't
43 * be lower-casing them, but these tokens cater to HTML tags, which are
51 * Associative array of the tag's attributes.
53 var $attributes = array();
56 * Non-overloaded constructor, which lower-cases passed tag name.
58 * @param $name String name.
59 * @param $attributes Associative array of attributes.
61 function HTMLPurifier_Token_Tag($name, $attributes = array()) {
62 //if ($attributes === null) var_dump(debug_backtrace());
63 $this->name
= ctype_lower($name) ?
$name : strtolower($name);
64 foreach ($attributes as $key => $value) {
65 // normalization only necessary when key is not lowercase
66 if (!ctype_lower($key)) {
67 $new_key = strtolower($key);
68 if (!isset($attributes[$new_key])) {
69 $attributes[$new_key] = $attributes[$key];
71 if ($new_key !== $key) {
72 unset($attributes[$key]);
76 $this->attributes
= $attributes;
81 * Concrete start token class.
83 class HTMLPurifier_Token_Start
extends HTMLPurifier_Token_Tag
87 return new HTMLPurifier_Token_Start($this->name
, $this->attributes
);
92 * Concrete empty token class.
94 class HTMLPurifier_Token_Empty
extends HTMLPurifier_Token_Tag
98 return new HTMLPurifier_Token_Empty($this->name
, $this->attributes
);
103 * Concrete end token class.
105 * @warning This class accepts attributes even though end tags cannot. This
106 * is for optimization reasons, as under normal circumstances, the Lexers
107 * do not pass attributes.
109 class HTMLPurifier_Token_End
extends HTMLPurifier_Token_Tag
113 return new HTMLPurifier_Token_End($this->name
);
118 * Concrete text token class.
120 * Text tokens comprise of regular parsed character data (PCDATA) and raw
121 * character data (from the CDATA sections). Internally, their
122 * data is parsed with all entities expanded. Surprisingly, the text token
123 * does have a "tag name" called #PCDATA, which is how the DTD represents it
124 * in permissible child nodes.
126 class HTMLPurifier_Token_Text
extends HTMLPurifier_Token
129 var $name = '#PCDATA'; /**< PCDATA tag name compatible with DTD. @public */
131 var $data; /**< Parsed character data of text. @public */
132 var $is_whitespace; /**< Bool indicating if node is whitespace. @public */
135 * Constructor, accepts data and determines if it is whitespace.
137 * @param $data String parsed character data.
139 function HTMLPurifier_Token_Text($data) {
141 $this->is_whitespace
= ctype_space($data);
144 return new HTMLPurifier_Token_Text($this->data
);
150 * Concrete comment token class. Generally will be ignored.
152 class HTMLPurifier_Token_Comment
extends HTMLPurifier_Token
154 var $data; /**< Character data within comment. @public */
155 var $type = 'comment';
157 * Transparent constructor.
159 * @param $data String comment data.
161 function HTMLPurifier_Token_Comment($data) {
165 return new HTMLPurifier_Token_Comment($this->data
);