3 require_once 'HTMLPurifier/Definition.php';
4 require_once 'HTMLPurifier/HTMLModuleManager.php';
6 // this definition and its modules MUST NOT define configuration directives
7 // outside of the HTML or Attr namespaces
9 HTMLPurifier_ConfigSchema
::define(
10 'HTML', 'DefinitionID', null, 'string/null', '
12 Unique identifier for a custom-built HTML definition. If you edit
13 the raw version of the HTMLDefinition, introducing changes that the
14 configuration object does not reflect, you must specify this variable.
15 If you change your custom edits, you should change this directive, or
16 clear your cache. Example:
19 $config = HTMLPurifier_Config::createDefault();
20 $config->set(\'HTML\', \'DefinitionID\', \'1\');
21 $def = $config->getHTMLDefinition();
22 $def->addAttribute(\'a\', \'tabindex\', \'Number\');
25 In the above example, the configuration is still at the defaults, but
26 using the advanced API, an extra attribute has been added. The
27 configuration object normally has no way of knowing that this change
28 has taken place, so it needs an extra directive: %HTML.DefinitionID.
29 If someone else attempts to use the default configuration, these two
30 pieces of code will not clobber each other in the cache, since one has
31 an extra directive attached to it.
34 This directive has been available since 2.0.0, and in that version or
35 later you <em>must</em> specify a value to this directive to use the
36 advanced API features.
40 HTMLPurifier_ConfigSchema
::define(
41 'HTML', 'DefinitionRev', 1, 'int', '
43 Revision identifier for your custom definition specified in
44 %HTML.DefinitionID. This serves the same purpose: uniquely identifying
45 your custom definition, but this one does so in a chronological
46 context: revision 3 is more up-to-date then revision 2. Thus, when
47 this gets incremented, the cache handling is smart enough to clean
48 up any older revisions of your definition as well as flush the
49 cache. This directive has been available since 2.0.0.
53 HTMLPurifier_ConfigSchema
::define(
54 'HTML', 'BlockWrapper', 'p', 'string', '
56 String name of element to wrap inline elements that are inside a block
57 context. This only occurs in the children of blockquote in strict mode.
60 Example: by default value,
61 <code><blockquote>Foo</blockquote></code> would become
62 <code><blockquote><p>Foo</p></blockquote></code>.
63 The <code><p></code> tags can be replaced with whatever you desire,
64 as long as it is a block level element. This directive has been available
69 HTMLPurifier_ConfigSchema
::define(
70 'HTML', 'Parent', 'div', 'string', '
72 String name of element that HTML fragment passed to library will be
73 inserted in. An interesting variation would be using span as the
74 parent element, meaning that only inline tags would be allowed.
75 This directive has been available since 1.3.0.
79 HTMLPurifier_ConfigSchema
::define(
80 'HTML', 'AllowedElements', null, 'lookup/null', '
82 If HTML Purifier\'s tag set is unsatisfactory for your needs, you
83 can overload it with your own list of tags to allow. Note that this
84 method is subtractive: it does its job by taking away from HTML Purifier
85 usual feature set, so you cannot add a tag that HTML Purifier never
86 supported in the first place (like embed, form or head). If you
87 change this, you probably also want to change %HTML.AllowedAttributes.
90 <strong>Warning:</strong> If another directive conflicts with the
91 elements here, <em>that</em> directive will win and override.
92 This directive has been available since 1.3.0.
96 HTMLPurifier_ConfigSchema
::define(
97 'HTML', 'AllowedAttributes', null, 'lookup/null', '
99 If HTML Purifier\'s attribute set is unsatisfactory, overload it!
100 The syntax is "tag.attr" or "*.attr" for the global attributes
101 (style, id, class, dir, lang, xml:lang).
104 <strong>Warning:</strong> If another directive conflicts with the
105 elements here, <em>that</em> directive will win and override. For
106 example, %HTML.EnableAttrID will take precedence over *.id in this
107 directive. You must set that directive to true before you can use
108 IDs at all. This directive has been available since 1.3.0.
112 HTMLPurifier_ConfigSchema
::define(
113 'HTML', 'Allowed', null, 'itext/null', '
115 This is a convenience directive that rolls the functionality of
116 %HTML.AllowedElements and %HTML.AllowedAttributes into one directive.
117 Specify elements and attributes that are allowed using:
118 <code>element1[attr1|attr2],element2...</code>. You can also use
119 newlines instead of commas to separate elements.
122 <strong>Warning</strong>:
123 All of the constraints on the component directives are still enforced.
124 The syntax is a <em>subset</em> of TinyMCE\'s <code>valid_elements</code>
125 whitelist: directly copy-pasting it here will probably result in
126 broken whitelists. If %HTML.AllowedElements or %HTML.AllowedAttributes
127 are set, this directive has no effect.
128 This directive has been available since 2.0.0.
133 * Definition of the purified HTML that describes allowed children,
134 * attributes, and many other things.
138 * All member variables that are prefixed with info
139 * (including the main $info array) are used by HTML Purifier internals
140 * and should not be directly edited when customizing the HTMLDefinition.
141 * They can usually be set via configuration directives or custom
144 * On the other hand, member variables without the info prefix are used
145 * internally by the HTMLDefinition and MUST NOT be used by other HTML
146 * Purifier internals. Many of them, however, are public, and may be
147 * edited by userspace code to tweak the behavior of HTMLDefinition.
149 * @note This class is inspected by Printer_HTMLDefinition; please
150 * update that class if things here change.
152 class HTMLPurifier_HTMLDefinition
extends HTMLPurifier_Definition
155 // FULLY-PUBLIC VARIABLES ---------------------------------------------
158 * Associative array of element names to HTMLPurifier_ElementDef
164 * Associative array of global attribute name to attribute definition.
167 var $info_global_attr = array();
170 * String name of parent element HTML will be going into.
173 var $info_parent = 'div';
176 * Definition for parent element, allows parent element to be a
177 * tag that's not allowed inside the HTML fragment.
180 var $info_parent_def;
183 * String name of element used to wrap inline elements in block context
184 * @note This is rarely used except for BLOCKQUOTEs in strict mode
187 var $info_block_wrapper = 'p';
190 * Associative array of deprecated tag name to HTMLPurifier_TagTransform
193 var $info_tag_transform = array();
196 * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.
199 var $info_attr_transform_pre = array();
202 * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.
205 var $info_attr_transform_post = array();
208 * Nested lookup array of content set name (Block, Inline) to
209 * element name to whether or not it belongs in that content set.
212 var $info_content_sets = array();
221 // RAW CUSTOMIZATION STUFF --------------------------------------------
224 * Adds a custom attribute to a pre-existing element
225 * @param $element_name String element name to add attribute to
226 * @param $attr_name String name of attribute
227 * @param $def Attribute definition, can be string or object, see
228 * HTMLPurifier_AttrTypes for details
230 function addAttribute($element_name, $attr_name, $def) {
231 $module =& $this->getAnonymousModule();
232 $element =& $module->addBlankElement($element_name);
233 $element->attr
[$attr_name] = $def;
237 * Adds a custom element to your HTML definition
238 * @note See HTMLPurifier_HTMLModule::addElement for detailed
239 * parameter and return value descriptions.
241 function &addElement($element_name, $type, $contents, $attr_collections, $attributes) {
242 $module =& $this->getAnonymousModule();
243 // assume that if the user is calling this, the element
244 // is safe. This may not be a good idea
245 $element =& $module->addElement($element_name, true, $type, $contents, $attr_collections, $attributes);
250 * Adds a blank element to your HTML definition, for overriding
252 * @note See HTMLPurifier_HTMLModule::addBlankElement for detailed
253 * parameter and return value descriptions.
255 function &addBlankElement($element_name) {
256 $module =& $this->getAnonymousModule();
257 $element =& $module->addBlankElement($element_name);
262 * Retrieves a reference to the anonymous module, so you can
263 * bust out advanced features without having to make your own
266 function &getAnonymousModule() {
267 if (!$this->_anonModule
) {
268 $this->_anonModule
= new HTMLPurifier_HTMLModule();
269 $this->_anonModule
->name
= 'Anonymous';
271 return $this->_anonModule
;
277 // PUBLIC BUT INTERNAL VARIABLES --------------------------------------
280 var $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */
283 * Performs low-cost, preliminary initialization.
285 function HTMLPurifier_HTMLDefinition() {
286 $this->manager
= new HTMLPurifier_HTMLModuleManager();
289 function doSetup($config) {
290 $this->processModules($config);
291 $this->setupConfigStuff($config);
292 unset($this->manager
);
294 // cleanup some of the element definitions
295 foreach ($this->info
as $k => $v) {
296 unset($this->info
[$k]->content_model
);
297 unset($this->info
[$k]->content_model_type
);
302 * Extract out the information from the manager
304 function processModules($config) {
306 if ($this->_anonModule
) {
307 // for user specific changes
308 // this is late-loaded so we don't have to deal with PHP4
309 // reference wonky-ness
310 $this->manager
->addModule($this->_anonModule
);
311 unset($this->_anonModule
);
314 $this->manager
->setup($config);
315 $this->doctype
= $this->manager
->doctype
;
317 foreach ($this->manager
->modules
as $module) {
318 foreach($module->info_tag_transform
as $k => $v) {
319 if ($v === false) unset($this->info_tag_transform
[$k]);
320 else $this->info_tag_transform
[$k] = $v;
322 foreach($module->info_attr_transform_pre
as $k => $v) {
323 if ($v === false) unset($this->info_attr_transform_pre
[$k]);
324 else $this->info_attr_transform_pre
[$k] = $v;
326 foreach($module->info_attr_transform_post
as $k => $v) {
327 if ($v === false) unset($this->info_attr_transform_post
[$k]);
328 else $this->info_attr_transform_post
[$k] = $v;
332 $this->info
= $this->manager
->getElements();
333 $this->info_content_sets
= $this->manager
->contentSets
->lookup
;
338 * Sets up stuff based on config. We need a better way of doing this.
340 function setupConfigStuff($config) {
342 $block_wrapper = $config->get('HTML', 'BlockWrapper');
343 if (isset($this->info_content_sets
['Block'][$block_wrapper])) {
344 $this->info_block_wrapper
= $block_wrapper;
346 trigger_error('Cannot use non-block element as block wrapper',
350 $parent = $config->get('HTML', 'Parent');
351 $def = $this->manager
->getElement($parent, true);
353 $this->info_parent
= $parent;
354 $this->info_parent_def
= $def;
356 trigger_error('Cannot use unrecognized element as parent',
358 $this->info_parent_def
= $this->manager
->getElement($this->info_parent
, true);
361 // support template text
362 $support = "(for information on implementing this, see the ".
365 // setup allowed elements
367 $allowed_elements = $config->get('HTML', 'AllowedElements');
368 $allowed_attributes = $config->get('HTML', 'AllowedAttributes');
370 if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {
371 $allowed = $config->get('HTML', 'Allowed');
372 if (is_string($allowed)) {
373 list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);
377 if (is_array($allowed_elements)) {
378 foreach ($this->info
as $name => $d) {
379 if(!isset($allowed_elements[$name])) unset($this->info
[$name]);
380 unset($allowed_elements[$name]);
383 foreach ($allowed_elements as $element => $d) {
384 $element = htmlspecialchars($element);
385 trigger_error("Element '$element' is not supported $support", E_USER_WARNING
);
389 $allowed_attributes_mutable = $allowed_attributes; // by copy!
390 if (is_array($allowed_attributes)) {
391 foreach ($this->info_global_attr
as $attr_key => $info) {
392 if (!isset($allowed_attributes["*.$attr_key"])) {
393 unset($this->info_global_attr
[$attr_key]);
394 } elseif (isset($allowed_attributes_mutable["*.$attr_key"])) {
395 unset($allowed_attributes_mutable["*.$attr_key"]);
398 foreach ($this->info
as $tag => $info) {
399 foreach ($info->attr
as $attr => $attr_info) {
400 if (!isset($allowed_attributes["$tag.$attr"]) &&
401 !isset($allowed_attributes["*.$attr"])) {
402 unset($this->info
[$tag]->attr
[$attr]);
404 if (isset($allowed_attributes_mutable["$tag.$attr"])) {
405 unset($allowed_attributes_mutable["$tag.$attr"]);
406 } elseif (isset($allowed_attributes_mutable["*.$attr"])) {
407 unset($allowed_attributes_mutable["*.$attr"]);
413 foreach ($allowed_attributes_mutable as $elattr => $d) {
414 list($element, $attribute) = explode('.', $elattr);
415 $element = htmlspecialchars($element);
416 $attribute = htmlspecialchars($attribute);
417 if ($element == '*') {
418 trigger_error("Global attribute '$attribute' is not ".
419 "supported in any elements $support",
422 trigger_error("Attribute '$attribute' in element '$element' not supported $support",
431 * Parses a TinyMCE-flavored Allowed Elements and Attributes list into
432 * separate lists for processing. Format is element[attr1|attr2],element2...
433 * @warning Although it's largely drawn from TinyMCE's implementation,
434 * it is different, and you'll probably have to modify your lists
435 * @param $list String list to parse
436 * @param array($allowed_elements, $allowed_attributes)
438 function parseTinyMCEAllowedList($list) {
441 $attributes = array();
443 $chunks = preg_split('/(,|[\n\r]+)/', $list);
444 foreach ($chunks as $chunk) {
445 if (empty($chunk)) continue;
446 // remove TinyMCE element control characters
447 if (!strpos($chunk, '[')) {
451 list($element, $attr) = explode('[', $chunk);
453 if ($element !== '*') $elements[$element] = true;
454 if (!$attr) continue;
455 $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]
456 $attr = explode('|', $attr);
457 foreach ($attr as $key) {
458 $attributes["$element.$key"] = true;
462 return array($elements, $attributes);