Fix incorrect PEARSax3 test assertion.
[htmlpurifier.git] / library / HTMLPurifier / HTMLDefinition.php
blob33bb38ac5f43cad1fb4c03408ed56e207f66691c
1 <?php
3 /**
4 * Definition of the purified HTML that describes allowed children,
5 * attributes, and many other things.
7 * Conventions:
9 * All member variables that are prefixed with info
10 * (including the main $info array) are used by HTML Purifier internals
11 * and should not be directly edited when customizing the HTMLDefinition.
12 * They can usually be set via configuration directives or custom
13 * modules.
15 * On the other hand, member variables without the info prefix are used
16 * internally by the HTMLDefinition and MUST NOT be used by other HTML
17 * Purifier internals. Many of them, however, are public, and may be
18 * edited by userspace code to tweak the behavior of HTMLDefinition.
20 * @note This class is inspected by Printer_HTMLDefinition; please
21 * update that class if things here change.
23 * @warning Directives that change this object's structure must be in
24 * the HTML or Attr namespace!
26 class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition
29 // FULLY-PUBLIC VARIABLES ---------------------------------------------
31 /**
32 * Associative array of element names to HTMLPurifier_ElementDef
34 public $info = array();
36 /**
37 * Associative array of global attribute name to attribute definition.
39 public $info_global_attr = array();
41 /**
42 * String name of parent element HTML will be going into.
44 public $info_parent = 'div';
46 /**
47 * Definition for parent element, allows parent element to be a
48 * tag that's not allowed inside the HTML fragment.
50 public $info_parent_def;
52 /**
53 * String name of element used to wrap inline elements in block context
54 * @note This is rarely used except for BLOCKQUOTEs in strict mode
56 public $info_block_wrapper = 'p';
58 /**
59 * Associative array of deprecated tag name to HTMLPurifier_TagTransform
61 public $info_tag_transform = array();
63 /**
64 * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
66 public $info_attr_transform_pre = array();
68 /**
69 * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
71 public $info_attr_transform_post = array();
73 /**
74 * Nested lookup array of content set name (Block, Inline) to
75 * element name to whether or not it belongs in that content set.
77 public $info_content_sets = array();
79 /**
80 * Indexed list of HTMLPurifier_Injector to be used.
82 public $info_injector = array();
84 /**
85 * Doctype object
87 public $doctype;
91 // RAW CUSTOMIZATION STUFF --------------------------------------------
93 /**
94 * Adds a custom attribute to a pre-existing element
95 * @note This is strictly convenience, and does not have a corresponding
96 * method in HTMLPurifier_HTMLModule
97 * @param $element_name String element name to add attribute to
98 * @param $attr_name String name of attribute
99 * @param $def Attribute definition, can be string or object, see
100 * HTMLPurifier_AttrTypes for details
102 public function addAttribute($element_name, $attr_name, $def) {
103 $module = $this->getAnonymousModule();
104 if (!isset($module->info[$element_name])) {
105 $element = $module->addBlankElement($element_name);
106 } else {
107 $element = $module->info[$element_name];
109 $element->attr[$attr_name] = $def;
113 * Adds a custom element to your HTML definition
114 * @note See HTMLPurifier_HTMLModule::addElement for detailed
115 * parameter and return value descriptions.
117 public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array()) {
118 $module = $this->getAnonymousModule();
119 // assume that if the user is calling this, the element
120 // is safe. This may not be a good idea
121 $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes);
122 return $element;
126 * Adds a blank element to your HTML definition, for overriding
127 * existing behavior
128 * @note See HTMLPurifier_HTMLModule::addBlankElement for detailed
129 * parameter and return value descriptions.
131 public function addBlankElement($element_name) {
132 $module = $this->getAnonymousModule();
133 $element = $module->addBlankElement($element_name);
134 return $element;
138 * Retrieves a reference to the anonymous module, so you can
139 * bust out advanced features without having to make your own
140 * module.
142 public function getAnonymousModule() {
143 if (!$this->_anonModule) {
144 $this->_anonModule = new HTMLPurifier_HTMLModule();
145 $this->_anonModule->name = 'Anonymous';
147 return $this->_anonModule;
150 private $_anonModule;
153 // PUBLIC BUT INTERNAL VARIABLES --------------------------------------
155 public $type = 'HTML';
156 public $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */
159 * Performs low-cost, preliminary initialization.
161 public function __construct() {
162 $this->manager = new HTMLPurifier_HTMLModuleManager();
165 protected function doSetup($config) {
166 $this->processModules($config);
167 $this->setupConfigStuff($config);
168 unset($this->manager);
170 // cleanup some of the element definitions
171 foreach ($this->info as $k => $v) {
172 unset($this->info[$k]->content_model);
173 unset($this->info[$k]->content_model_type);
178 * Extract out the information from the manager
180 protected function processModules($config) {
182 if ($this->_anonModule) {
183 // for user specific changes
184 // this is late-loaded so we don't have to deal with PHP4
185 // reference wonky-ness
186 $this->manager->addModule($this->_anonModule);
187 unset($this->_anonModule);
190 $this->manager->setup($config);
191 $this->doctype = $this->manager->doctype;
193 foreach ($this->manager->modules as $module) {
194 foreach($module->info_tag_transform as $k => $v) {
195 if ($v === false) unset($this->info_tag_transform[$k]);
196 else $this->info_tag_transform[$k] = $v;
198 foreach($module->info_attr_transform_pre as $k => $v) {
199 if ($v === false) unset($this->info_attr_transform_pre[$k]);
200 else $this->info_attr_transform_pre[$k] = $v;
202 foreach($module->info_attr_transform_post as $k => $v) {
203 if ($v === false) unset($this->info_attr_transform_post[$k]);
204 else $this->info_attr_transform_post[$k] = $v;
206 foreach ($module->info_injector as $k => $v) {
207 if ($v === false) unset($this->info_injector[$k]);
208 else $this->info_injector[$k] = $v;
212 $this->info = $this->manager->getElements();
213 $this->info_content_sets = $this->manager->contentSets->lookup;
218 * Sets up stuff based on config. We need a better way of doing this.
220 protected function setupConfigStuff($config) {
222 $block_wrapper = $config->get('HTML.BlockWrapper');
223 if (isset($this->info_content_sets['Block'][$block_wrapper])) {
224 $this->info_block_wrapper = $block_wrapper;
225 } else {
226 trigger_error('Cannot use non-block element as block wrapper',
227 E_USER_ERROR);
230 $parent = $config->get('HTML.Parent');
231 $def = $this->manager->getElement($parent, true);
232 if ($def) {
233 $this->info_parent = $parent;
234 $this->info_parent_def = $def;
235 } else {
236 trigger_error('Cannot use unrecognized element as parent',
237 E_USER_ERROR);
238 $this->info_parent_def = $this->manager->getElement($this->info_parent, true);
241 // support template text
242 $support = "(for information on implementing this, see the ".
243 "support forums) ";
245 // setup allowed elements -----------------------------------------
247 $allowed_elements = $config->get('HTML.AllowedElements');
248 $allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early
250 if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
251 $allowed = $config->get('HTML.Allowed');
252 if (is_string($allowed)) {
253 list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);
257 if (is_array($allowed_elements)) {
258 foreach ($this->info as $name => $d) {
259 if(!isset($allowed_elements[$name])) unset($this->info[$name]);
260 unset($allowed_elements[$name]);
262 // emit errors
263 foreach ($allowed_elements as $element => $d) {
264 $element = htmlspecialchars($element); // PHP doesn't escape errors, be careful!
265 trigger_error("Element '$element' is not supported $support", E_USER_WARNING);
269 // setup allowed attributes ---------------------------------------
271 $allowed_attributes_mutable = $allowed_attributes; // by copy!
272 if (is_array($allowed_attributes)) {
274 // This actually doesn't do anything, since we went away from
275 // global attributes. It's possible that userland code uses
276 // it, but HTMLModuleManager doesn't!
277 foreach ($this->info_global_attr as $attr => $x) {
278 $keys = array($attr, "*@$attr", "*.$attr");
279 $delete = true;
280 foreach ($keys as $key) {
281 if ($delete && isset($allowed_attributes[$key])) {
282 $delete = false;
284 if (isset($allowed_attributes_mutable[$key])) {
285 unset($allowed_attributes_mutable[$key]);
288 if ($delete) unset($this->info_global_attr[$attr]);
291 foreach ($this->info as $tag => $info) {
292 foreach ($info->attr as $attr => $x) {
293 $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr");
294 $delete = true;
295 foreach ($keys as $key) {
296 if ($delete && isset($allowed_attributes[$key])) {
297 $delete = false;
299 if (isset($allowed_attributes_mutable[$key])) {
300 unset($allowed_attributes_mutable[$key]);
303 if ($delete) {
304 if ($this->info[$tag]->attr[$attr]->required) {
305 trigger_error("Required attribute '$attr' in element '$tag' was not allowed, which means '$tag' will not be allowed either", E_USER_WARNING);
307 unset($this->info[$tag]->attr[$attr]);
311 // emit errors
312 foreach ($allowed_attributes_mutable as $elattr => $d) {
313 $bits = preg_split('/[.@]/', $elattr, 2);
314 $c = count($bits);
315 switch ($c) {
316 case 2:
317 if ($bits[0] !== '*') {
318 $element = htmlspecialchars($bits[0]);
319 $attribute = htmlspecialchars($bits[1]);
320 if (!isset($this->info[$element])) {
321 trigger_error("Cannot allow attribute '$attribute' if element '$element' is not allowed/supported $support");
322 } else {
323 trigger_error("Attribute '$attribute' in element '$element' not supported $support",
324 E_USER_WARNING);
326 break;
328 // otherwise fall through
329 case 1:
330 $attribute = htmlspecialchars($bits[0]);
331 trigger_error("Global attribute '$attribute' is not ".
332 "supported in any elements $support",
333 E_USER_WARNING);
334 break;
340 // setup forbidden elements ---------------------------------------
342 $forbidden_elements = $config->get('HTML.ForbiddenElements');
343 $forbidden_attributes = $config->get('HTML.ForbiddenAttributes');
345 foreach ($this->info as $tag => $info) {
346 if (isset($forbidden_elements[$tag])) {
347 unset($this->info[$tag]);
348 continue;
350 foreach ($info->attr as $attr => $x) {
351 if (
352 isset($forbidden_attributes["$tag@$attr"]) ||
353 isset($forbidden_attributes["*@$attr"]) ||
354 isset($forbidden_attributes[$attr])
356 unset($this->info[$tag]->attr[$attr]);
357 continue;
358 } // this segment might get removed eventually
359 elseif (isset($forbidden_attributes["$tag.$attr"])) {
360 // $tag.$attr are not user supplied, so no worries!
361 trigger_error("Error with $tag.$attr: tag.attr syntax not supported for HTML.ForbiddenAttributes; use tag@attr instead", E_USER_WARNING);
365 foreach ($forbidden_attributes as $key => $v) {
366 if (strlen($key) < 2) continue;
367 if ($key[0] != '*') continue;
368 if ($key[1] == '.') {
369 trigger_error("Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING);
373 // setup injectors -----------------------------------------------------
374 foreach ($this->info_injector as $i => $injector) {
375 if ($injector->checkNeeded($config) !== false) {
376 // remove injector that does not have it's required
377 // elements/attributes present, and is thus not needed.
378 unset($this->info_injector[$i]);
384 * Parses a TinyMCE-flavored Allowed Elements and Attributes list into
385 * separate lists for processing. Format is element[attr1|attr2],element2...
386 * @warning Although it's largely drawn from TinyMCE's implementation,
387 * it is different, and you'll probably have to modify your lists
388 * @param $list String list to parse
389 * @param array($allowed_elements, $allowed_attributes)
390 * @todo Give this its own class, probably static interface
392 public function parseTinyMCEAllowedList($list) {
394 $list = str_replace(array(' ', "\t"), '', $list);
396 $elements = array();
397 $attributes = array();
399 $chunks = preg_split('/(,|[\n\r]+)/', $list);
400 foreach ($chunks as $chunk) {
401 if (empty($chunk)) continue;
402 // remove TinyMCE element control characters
403 if (!strpos($chunk, '[')) {
404 $element = $chunk;
405 $attr = false;
406 } else {
407 list($element, $attr) = explode('[', $chunk);
409 if ($element !== '*') $elements[$element] = true;
410 if (!$attr) continue;
411 $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
412 $attr = explode('|', $attr);
413 foreach ($attr as $key) {
414 $attributes["$element.$key"] = true;
418 return array($elements, $attributes);
425 // vim: et sw=4 sts=4