4 * Definition of the purified HTML that describes allowed children,
5 * attributes, and many other things.
9 * All member variables that are prefixed with info
10 * (including the main $info array) are used by HTML Purifier internals
11 * and should not be directly edited when customizing the HTMLDefinition.
12 * They can usually be set via configuration directives or custom
15 * On the other hand, member variables without the info prefix are used
16 * internally by the HTMLDefinition and MUST NOT be used by other HTML
17 * Purifier internals. Many of them, however, are public, and may be
18 * edited by userspace code to tweak the behavior of HTMLDefinition.
20 * @note This class is inspected by Printer_HTMLDefinition; please
21 * update that class if things here change.
23 * @warning Directives that change this object's structure must be in
24 * the HTML or Attr namespace!
26 class HTMLPurifier_HTMLDefinition
extends HTMLPurifier_Definition
29 // FULLY-PUBLIC VARIABLES ---------------------------------------------
32 * Associative array of element names to HTMLPurifier_ElementDef
34 public $info = array();
37 * Associative array of global attribute name to attribute definition.
39 public $info_global_attr = array();
42 * String name of parent element HTML will be going into.
44 public $info_parent = 'div';
47 * Definition for parent element, allows parent element to be a
48 * tag that's not allowed inside the HTML fragment.
50 public $info_parent_def;
53 * String name of element used to wrap inline elements in block context
54 * @note This is rarely used except for BLOCKQUOTEs in strict mode
56 public $info_block_wrapper = 'p';
59 * Associative array of deprecated tag name to HTMLPurifier_TagTransform
61 public $info_tag_transform = array();
64 * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
66 public $info_attr_transform_pre = array();
69 * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
71 public $info_attr_transform_post = array();
74 * Nested lookup array of content set name (Block, Inline) to
75 * element name to whether or not it belongs in that content set.
77 public $info_content_sets = array();
86 // RAW CUSTOMIZATION STUFF --------------------------------------------
89 * Adds a custom attribute to a pre-existing element
90 * @note This is strictly convenience, and does not have a corresponding
91 * method in HTMLPurifier_HTMLModule
92 * @param $element_name String element name to add attribute to
93 * @param $attr_name String name of attribute
94 * @param $def Attribute definition, can be string or object, see
95 * HTMLPurifier_AttrTypes for details
97 public function addAttribute($element_name, $attr_name, $def) {
98 $module = $this->getAnonymousModule();
99 if (!isset($module->info
[$element_name])) {
100 $element = $module->addBlankElement($element_name);
102 $element = $module->info
[$element_name];
104 $element->attr
[$attr_name] = $def;
108 * Adds a custom element to your HTML definition
109 * @note See HTMLPurifier_HTMLModule::addElement for detailed
110 * parameter and return value descriptions.
112 public function addElement($element_name, $type, $contents, $attr_collections, $attributes) {
113 $module = $this->getAnonymousModule();
114 // assume that if the user is calling this, the element
115 // is safe. This may not be a good idea
116 $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes);
121 * Adds a blank element to your HTML definition, for overriding
123 * @note See HTMLPurifier_HTMLModule::addBlankElement for detailed
124 * parameter and return value descriptions.
126 public function addBlankElement($element_name) {
127 $module = $this->getAnonymousModule();
128 $element = $module->addBlankElement($element_name);
133 * Retrieves a reference to the anonymous module, so you can
134 * bust out advanced features without having to make your own
137 public function getAnonymousModule() {
138 if (!$this->_anonModule
) {
139 $this->_anonModule
= new HTMLPurifier_HTMLModule();
140 $this->_anonModule
->name
= 'Anonymous';
142 return $this->_anonModule
;
145 private $_anonModule;
148 // PUBLIC BUT INTERNAL VARIABLES --------------------------------------
150 public $type = 'HTML';
151 public $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */
154 * Performs low-cost, preliminary initialization.
156 public function __construct() {
157 $this->manager
= new HTMLPurifier_HTMLModuleManager();
160 protected function doSetup($config) {
161 $this->processModules($config);
162 $this->setupConfigStuff($config);
163 unset($this->manager
);
165 // cleanup some of the element definitions
166 foreach ($this->info
as $k => $v) {
167 unset($this->info
[$k]->content_model
);
168 unset($this->info
[$k]->content_model_type
);
173 * Extract out the information from the manager
175 protected function processModules($config) {
177 if ($this->_anonModule
) {
178 // for user specific changes
179 // this is late-loaded so we don't have to deal with PHP4
180 // reference wonky-ness
181 $this->manager
->addModule($this->_anonModule
);
182 unset($this->_anonModule
);
185 $this->manager
->setup($config);
186 $this->doctype
= $this->manager
->doctype
;
188 foreach ($this->manager
->modules
as $module) {
189 foreach($module->info_tag_transform
as $k => $v) {
190 if ($v === false) unset($this->info_tag_transform
[$k]);
191 else $this->info_tag_transform
[$k] = $v;
193 foreach($module->info_attr_transform_pre
as $k => $v) {
194 if ($v === false) unset($this->info_attr_transform_pre
[$k]);
195 else $this->info_attr_transform_pre
[$k] = $v;
197 foreach($module->info_attr_transform_post
as $k => $v) {
198 if ($v === false) unset($this->info_attr_transform_post
[$k]);
199 else $this->info_attr_transform_post
[$k] = $v;
203 $this->info
= $this->manager
->getElements();
204 $this->info_content_sets
= $this->manager
->contentSets
->lookup
;
209 * Sets up stuff based on config. We need a better way of doing this.
211 protected function setupConfigStuff($config) {
213 $block_wrapper = $config->get('HTML', 'BlockWrapper');
214 if (isset($this->info_content_sets
['Block'][$block_wrapper])) {
215 $this->info_block_wrapper
= $block_wrapper;
217 trigger_error('Cannot use non-block element as block wrapper',
221 $parent = $config->get('HTML', 'Parent');
222 $def = $this->manager
->getElement($parent, true);
224 $this->info_parent
= $parent;
225 $this->info_parent_def
= $def;
227 trigger_error('Cannot use unrecognized element as parent',
229 $this->info_parent_def
= $this->manager
->getElement($this->info_parent
, true);
232 // support template text
233 $support = "(for information on implementing this, see the ".
236 // setup allowed elements -----------------------------------------
238 $allowed_elements = $config->get('HTML', 'AllowedElements');
239 $allowed_attributes = $config->get('HTML', 'AllowedAttributes'); // retrieve early
241 if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
242 $allowed = $config->get('HTML', 'Allowed');
243 if (is_string($allowed)) {
244 list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);
248 if (is_array($allowed_elements)) {
249 foreach ($this->info
as $name => $d) {
250 if(!isset($allowed_elements[$name])) unset($this->info
[$name]);
251 unset($allowed_elements[$name]);
254 foreach ($allowed_elements as $element => $d) {
255 $element = htmlspecialchars($element); // PHP doesn't escape errors, be careful!
256 trigger_error("Element '$element' is not supported $support", E_USER_WARNING
);
260 // setup allowed attributes ---------------------------------------
262 $allowed_attributes_mutable = $allowed_attributes; // by copy!
263 if (is_array($allowed_attributes)) {
265 // This actually doesn't do anything, since we went away from
266 // global attributes. It's possible that userland code uses
267 // it, but HTMLModuleManager doesn't!
268 foreach ($this->info_global_attr
as $attr => $x) {
269 $keys = array($attr, "*@$attr", "*.$attr");
271 foreach ($keys as $key) {
272 if ($delete && isset($allowed_attributes[$key])) {
275 if (isset($allowed_attributes_mutable[$key])) {
276 unset($allowed_attributes_mutable[$key]);
279 if ($delete) unset($this->info_global_attr
[$attr]);
282 foreach ($this->info
as $tag => $info) {
283 foreach ($info->attr
as $attr => $x) {
284 $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr");
286 foreach ($keys as $key) {
287 if ($delete && isset($allowed_attributes[$key])) {
290 if (isset($allowed_attributes_mutable[$key])) {
291 unset($allowed_attributes_mutable[$key]);
294 if ($delete) unset($this->info
[$tag]->attr
[$attr]);
298 foreach ($allowed_attributes_mutable as $elattr => $d) {
299 $bits = preg_split('/[.@]/', $elattr, 2);
303 if ($bits[0] !== '*') {
304 $element = htmlspecialchars($bits[0]);
305 $attribute = htmlspecialchars($bits[1]);
306 if (!isset($this->info
[$element])) {
307 trigger_error("Cannot allow attribute '$attribute' if element '$element' is not allowed/supported $support");
309 trigger_error("Attribute '$attribute' in element '$element' not supported $support",
314 // otherwise fall through
316 $attribute = htmlspecialchars($bits[0]);
317 trigger_error("Global attribute '$attribute' is not ".
318 "supported in any elements $support",
326 // setup forbidden elements ---------------------------------------
328 $forbidden_elements = $config->get('HTML', 'ForbiddenElements');
329 $forbidden_attributes = $config->get('HTML', 'ForbiddenAttributes');
331 foreach ($this->info
as $tag => $info) {
332 if (isset($forbidden_elements[$tag])) {
333 unset($this->info
[$tag]);
336 foreach ($info->attr
as $attr => $x) {
338 isset($forbidden_attributes["$tag@$attr"]) ||
339 isset($forbidden_attributes["*@$attr"]) ||
340 isset($forbidden_attributes[$attr])
342 unset($this->info
[$tag]->attr
[$attr]);
344 } // this segment might get removed eventually
345 elseif (isset($forbidden_attributes["$tag.$attr"])) {
346 // $tag.$attr are not user supplied, so no worries!
347 trigger_error("Error with $tag.$attr: tag.attr syntax not supported for HTML.ForbiddenAttributes; use tag@attr instead", E_USER_WARNING
);
351 foreach ($forbidden_attributes as $key => $v) {
352 if (strlen($key) < 2) continue;
353 if ($key[0] != '*') continue;
354 if ($key[1] == '.') {
355 trigger_error("Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING
);
362 * Parses a TinyMCE-flavored Allowed Elements and Attributes list into
363 * separate lists for processing. Format is element[attr1|attr2],element2...
364 * @warning Although it's largely drawn from TinyMCE's implementation,
365 * it is different, and you'll probably have to modify your lists
366 * @param $list String list to parse
367 * @param array($allowed_elements, $allowed_attributes)
368 * @todo Give this its own class, probably static interface
370 public function parseTinyMCEAllowedList($list) {
372 $list = str_replace(array(' ', "\t"), '', $list);
375 $attributes = array();
377 $chunks = preg_split('/(,|[\n\r]+)/', $list);
378 foreach ($chunks as $chunk) {
379 if (empty($chunk)) continue;
380 // remove TinyMCE element control characters
381 if (!strpos($chunk, '[')) {
385 list($element, $attr) = explode('[', $chunk);
387 if ($element !== '*') $elements[$element] = true;
388 if (!$attr) continue;
389 $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
390 $attr = explode('|', $attr);
391 foreach ($attr as $key) {
392 $attributes["$element.$key"] = true;
396 return array($elements, $attributes);