Fix bug: number spans should not allow zero as a value. This required augmenting...
[htmlpurifier.git] / library / HTMLPurifier / ChildDef.php
blob6df698463d24ed5e7fba560ff995302da506c876
1 <?php
3 // HTMLPurifier_ChildDef and inheritance have three types of output:
4 // true = leave nodes as is
5 // false = delete parent node and all children
6 // array(...) = replace children nodes with these
8 // this is the hardest one to implement. We'll use fancy regexp tricks
9 // right now, we only expect it to return TRUE or FALSE (it won't attempt
10 // to fix the tree)
12 // we may end up writing custom code for each HTML case
13 // in order to make it self correcting
15 HTMLPurifier_ConfigDef::define(
16 'Core', 'EscapeInvalidChildren', false,
17 'When true, a child is found that is not allowed in the context of the '.
18 'parent element will be transformed into text as if it were ASCII. When '.
19 'false, that element and all internal tags will be dropped, though text '.
20 'will be preserved. There is no option for dropping the element but '.
21 'preserving child nodes.'
24 /**
25 * Defines allowed child nodes and validates tokens against it.
27 class HTMLPurifier_ChildDef
29 /**
30 * Type of child definition, usually right-most part of class name lowercase
32 * Used occasionally in terms of context. Possible values include
33 * custom, required, optional and empty.
35 var $type;
37 /**
38 * Bool that indicates whether or not an empty array of children is okay
40 * This is necessary for redundant checking when changes affecting
41 * a child node may cause a parent node to now be disallowed.
43 var $allow_empty;
45 /**
46 * Validates nodes according to definition and returns modification.
48 * @warning $context is NOT HTMLPurifier_AttrContext
49 * @param $tokens_of_children Array of HTMLPurifier_Token
50 * @param $config HTMLPurifier_Config object
51 * @param $context String context indicating inline, block or unknown
52 * @return bool true to leave nodes as is
53 * @return bool false to remove parent node
54 * @return array of replacement child tokens
56 function validateChildren($tokens_of_children, $config, $context) {
57 trigger_error('Call to abstract function', E_USER_ERROR);
61 /**
62 * Custom validation class, accepts DTD child definitions
64 * @warning Currently this class is an all or nothing proposition, that is,
65 * it will only give a bool return value. Table is the only
66 * child definition that uses this class, and we ought to give
67 * it a dedicated one.
69 class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
71 var $type = 'custom';
72 var $allow_empty = false;
73 /**
74 * Allowed child pattern as defined by the DTD
76 var $dtd_regex;
77 /**
78 * PCRE regex derived from $dtd_regex
79 * @private
81 var $_pcre_regex;
82 /**
83 * @param $dtd_regex Allowed child pattern from the DTD
85 function HTMLPurifier_ChildDef_Custom($dtd_regex) {
86 $this->dtd_regex = $dtd_regex;
87 $this->_compileRegex();
89 /**
90 * Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex)
92 function _compileRegex() {
93 $raw = str_replace(' ', '', $this->dtd_regex);
94 if ($raw{0} != '(') {
95 $raw = "($raw)";
97 $reg = str_replace(',', ',?', $raw);
98 $reg = preg_replace('/([#a-zA-Z0-9_.-]+)/', '(,?\\0)', $reg);
99 $this->_pcre_regex = $reg;
101 function validateChildren($tokens_of_children, $config, $context) {
102 $list_of_children = '';
103 $nesting = 0; // depth into the nest
104 foreach ($tokens_of_children as $token) {
105 if (!empty($token->is_whitespace)) continue;
107 $is_child = ($nesting == 0); // direct
109 if ($token->type == 'start') {
110 $nesting++;
111 } elseif ($token->type == 'end') {
112 $nesting--;
115 if ($is_child) {
116 $list_of_children .= $token->name . ',';
119 $list_of_children = rtrim($list_of_children, ',');
121 $okay =
122 preg_match(
123 '/^'.$this->_pcre_regex.'$/',
124 $list_of_children
127 return (bool) $okay;
132 * Definition that allows a set of elements, but disallows empty children.
134 class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
137 * Lookup table of allowed elements.
139 var $elements = array();
141 * @param $elements List of allowed element names (lowercase).
143 function HTMLPurifier_ChildDef_Required($elements) {
144 if (is_string($elements)) {
145 $elements = str_replace(' ', '', $elements);
146 $elements = explode('|', $elements);
148 $elements = array_flip($elements);
149 foreach ($elements as $i => $x) $elements[$i] = true;
150 $this->elements = $elements;
151 $this->gen = new HTMLPurifier_Generator();
153 var $allow_empty = false;
154 var $type = 'required';
155 function validateChildren($tokens_of_children, $config, $context) {
156 // if there are no tokens, delete parent node
157 if (empty($tokens_of_children)) return false;
159 // the new set of children
160 $result = array();
162 // current depth into the nest
163 $nesting = 0;
165 // whether or not we're deleting a node
166 $is_deleting = false;
168 // whether or not parsed character data is allowed
169 // this controls whether or not we silently drop a tag
170 // or generate escaped HTML from it
171 $pcdata_allowed = isset($this->elements['#PCDATA']);
173 // a little sanity check to make sure it's not ALL whitespace
174 $all_whitespace = true;
176 // some configuration
177 $escape_invalid_children = $config->get('Core', 'EscapeInvalidChildren');
179 foreach ($tokens_of_children as $token) {
180 if (!empty($token->is_whitespace)) {
181 $result[] = $token;
182 continue;
184 $all_whitespace = false; // phew, we're not talking about whitespace
186 $is_child = ($nesting == 0);
188 if ($token->type == 'start') {
189 $nesting++;
190 } elseif ($token->type == 'end') {
191 $nesting--;
194 if ($is_child) {
195 $is_deleting = false;
196 if (!isset($this->elements[$token->name])) {
197 $is_deleting = true;
198 if ($pcdata_allowed && $token->type == 'text') {
199 $result[] = $token;
200 } elseif ($pcdata_allowed && $escape_invalid_children) {
201 $result[] = new HTMLPurifier_Token_Text(
202 $this->gen->generateFromToken($token, $config)
205 continue;
208 if (!$is_deleting || ($pcdata_allowed && $token->type == 'text')) {
209 $result[] = $token;
210 } elseif ($pcdata_allowed && $escape_invalid_children) {
211 $result[] =
212 new HTMLPurifier_Token_Text(
213 $this->gen->generateFromToken( $token, $config )
215 } else {
216 // drop silently
219 if (empty($result)) return false;
220 if ($all_whitespace) return false;
221 if ($tokens_of_children == $result) return true;
222 return $result;
227 * Definition that allows a set of elements, and allows no children.
228 * @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required,
229 * really, one shouldn't inherit from the other. Only altered behavior
230 * is to overload a returned false with an array. Thus, it will never
231 * return false.
233 class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
235 var $allow_empty = true;
236 var $type = 'optional';
237 function validateChildren($tokens_of_children, $config, $context) {
238 $result = parent::validateChildren($tokens_of_children, $config, $context);
239 if ($result === false) return array();
240 return $result;
245 * Definition that disallows all elements.
246 * @warning validateChildren() in this class is actually never called, because
247 * empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed
248 * before child definitions are parsed in earnest by
249 * HTMLPurifier_Strategy_FixNesting.
251 class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
253 var $allow_empty = true;
254 var $type = 'empty';
255 function HTMLPurifier_ChildDef_Empty() {}
256 function validateChildren($tokens_of_children, $config, $context) {
257 return array();
262 * Definition that uses different definitions depending on context.
264 * The del and ins tags are notable because they allow different types of
265 * elements depending on whether or not they're in a block or inline context.
266 * Chameleon allows this behavior to happen by using two different
267 * definitions depending on context. While this somewhat generalized,
268 * it is specifically intended for those two tags.
270 class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
274 * Instance of the definition object to use when inline. Usually stricter.
276 var $inline;
278 * Instance of the definition object to use when block.
280 var $block;
283 * @param $inline List of elements to allow when inline.
284 * @param $block List of elements to allow when block.
286 function HTMLPurifier_ChildDef_Chameleon($inline, $block) {
287 $this->inline = new HTMLPurifier_ChildDef_Optional($inline);
288 $this->block = new HTMLPurifier_ChildDef_Optional($block);
291 function validateChildren($tokens_of_children, $config, $context) {
292 switch ($context) {
293 case 'unknown':
294 case 'inline':
295 $result = $this->inline->validateChildren(
296 $tokens_of_children, $config, $context);
297 break;
298 case 'block':
299 $result = $this->block->validateChildren(
300 $tokens_of_children, $config, $context);
301 break;
302 default:
303 trigger_error('Invalid context', E_USER_ERROR);
304 return false;
306 return $result;