Release 2.0.1, merged in 1181 to HEAD.
[htmlpurifier.git] / library / HTMLPurifier / HTMLModule.php
blob077daff88eb2e7f373ec4361eaae96141b1bc1d5
1 <?php
3 /**
4 * Represents an XHTML 1.1 module, with information on elements, tags
5 * and attributes.
6 * @note Even though this is technically XHTML 1.1, it is also used for
7 * regular HTML parsing. We are using modulization as a convenient
8 * way to represent the internals of HTMLDefinition, and our
9 * implementation is by no means conforming and does not directly
10 * use the normative DTDs or XML schemas.
11 * @note The public variables in a module should almost directly
12 * correspond to the variables in HTMLPurifier_HTMLDefinition.
13 * However, the prefix info carries no special meaning in these
14 * objects (include it anyway if that's the correspondence though).
17 class HTMLPurifier_HTMLModule
20 // -- Overloadable ----------------------------------------------------
22 /**
23 * Short unique string identifier of the module
25 var $name;
27 /**
28 * Informally, a list of elements this module changes. Not used in
29 * any significant way.
30 * @protected
32 var $elements = array();
34 /**
35 * Associative array of element names to element definitions.
36 * Some definitions may be incomplete, to be merged in later
37 * with the full definition.
38 * @public
40 var $info = array();
42 /**
43 * Associative array of content set names to content set additions.
44 * This is commonly used to, say, add an A element to the Inline
45 * content set. This corresponds to an internal variable $content_sets
46 * and NOT info_content_sets member variable of HTMLDefinition.
47 * @public
49 var $content_sets = array();
51 /**
52 * Associative array of attribute collection names to attribute
53 * collection additions. More rarely used for adding attributes to
54 * the global collections. Example is the StyleAttribute module adding
55 * the style attribute to the Core. Corresponds to HTMLDefinition's
56 * attr_collections->info, since the object's data is only info,
57 * with extra behavior associated with it.
58 * @public
60 var $attr_collections = array();
62 /**
63 * Associative array of deprecated tag name to HTMLPurifier_TagTransform
64 * @public
66 var $info_tag_transform = array();
68 /**
69 * List of HTMLPurifier_AttrTransform to be performed before validation.
70 * @public
72 var $info_attr_transform_pre = array();
74 /**
75 * List of HTMLPurifier_AttrTransform to be performed after validation.
76 * @public
78 var $info_attr_transform_post = array();
80 /**
81 * Boolean flag that indicates whether or not getChildDef is implemented.
82 * For optimization reasons: may save a call to a function. Be sure
83 * to set it if you do implement getChildDef(), otherwise it will have
84 * no effect!
85 * @public
87 var $defines_child_def = false;
89 /**
90 * Retrieves a proper HTMLPurifier_ChildDef subclass based on
91 * content_model and content_model_type member variables of
92 * the HTMLPurifier_ElementDef class. There is a similar function
93 * in HTMLPurifier_HTMLDefinition.
94 * @param $def HTMLPurifier_ElementDef instance
95 * @return HTMLPurifier_ChildDef subclass
96 * @public
98 function getChildDef($def) {return false;}
100 // -- Convenience -----------------------------------------------------
103 * Convenience function that sets up a new element
104 * @param $element Name of element to add
105 * @param $safe Is element safe for untrusted users to use?
106 * @param $type What content set should element be registered to?
107 * Set as false to skip this step.
108 * @param $contents Allowed children in form of:
109 * "$content_model_type: $content_model"
110 * @param $attr_includes What attribute collections to register to
111 * element?
112 * @param $attr What unique attributes does the element define?
113 * @note See ElementDef for in-depth descriptions of these parameters.
114 * @return Reference to created element definition object, so you
115 * can set advanced parameters
116 * @protected
118 function &addElement($element, $safe, $type, $contents, $attr_includes = array(), $attr = array()) {
119 $this->elements[] = $element;
120 // parse content_model
121 list($content_model_type, $content_model) = $this->parseContents($contents);
122 // merge in attribute inclusions
123 $this->mergeInAttrIncludes($attr, $attr_includes);
124 // add element to content sets
125 if ($type) $this->addElementToContentSet($element, $type);
126 // create element
127 $this->info[$element] = HTMLPurifier_ElementDef::create(
128 $safe, $content_model, $content_model_type, $attr
130 // literal object $contents means direct child manipulation
131 if (!is_string($contents)) $this->info[$element]->child = $contents;
132 return $this->info[$element];
136 * Convenience function that creates a totally blank, non-standalone
137 * element.
138 * @param $element Name of element to create
139 * @return Reference to created element
141 function &addBlankElement($element) {
142 if (!isset($this->info[$element])) {
143 $this->elements[] = $element;
144 $this->info[$element] = new HTMLPurifier_ElementDef();
145 $this->info[$element]->standalone = false;
146 } else {
147 trigger_error("Definition for $element already exists in module, cannot redefine");
149 return $this->info[$element];
153 * Convenience function that registers an element to a content set
154 * @param Element to register
155 * @param Name content set (warning: case sensitive, usually upper-case
156 * first letter)
157 * @protected
159 function addElementToContentSet($element, $type) {
160 if (!isset($this->content_sets[$type])) $this->content_sets[$type] = '';
161 else $this->content_sets[$type] .= ' | ';
162 $this->content_sets[$type] .= $element;
166 * Convenience function that transforms single-string contents
167 * into separate content model and content model type
168 * @param $contents Allowed children in form of:
169 * "$content_model_type: $content_model"
170 * @note If contents is an object, an array of two nulls will be
171 * returned, and the callee needs to take the original $contents
172 * and use it directly.
174 function parseContents($contents) {
175 if (!is_string($contents)) return array(null, null); // defer
176 switch ($contents) {
177 // check for shorthand content model forms
178 case 'Empty':
179 return array('empty', '');
180 case 'Inline':
181 return array('optional', 'Inline | #PCDATA');
182 case 'Flow':
183 return array('optional', 'Flow | #PCDATA');
185 list($content_model_type, $content_model) = explode(':', $contents);
186 $content_model_type = strtolower(trim($content_model_type));
187 $content_model = trim($content_model);
188 return array($content_model_type, $content_model);
192 * Convenience function that merges a list of attribute includes into
193 * an attribute array.
194 * @param $attr Reference to attr array to modify
195 * @param $attr_includes Array of includes / string include to merge in
197 function mergeInAttrIncludes(&$attr, $attr_includes) {
198 if (!is_array($attr_includes)) {
199 if (empty($attr_includes)) $attr_includes = array();
200 else $attr_includes = array($attr_includes);
202 $attr[0] = $attr_includes;
206 * Convenience function that generates a lookup table with boolean
207 * true as value.
208 * @param $list List of values to turn into a lookup
209 * @note You can also pass an arbitrary number of arguments in
210 * place of the regular argument
211 * @return Lookup array equivalent of list
213 function makeLookup($list) {
214 if (is_string($list)) $list = func_get_args();
215 $ret = array();
216 foreach ($list as $value) {
217 if (is_null($value)) continue;
218 $ret[$value] = true;
220 return $ret;