Deal with old libxml incompatibilities.
[htmlpurifier.git] / library / HTMLPurifier / HTMLModule.php
blobbb3a9230b1a9fc0ba6d75e82e31ded79dfd074d6
1 <?php
3 /**
4 * Represents an XHTML 1.1 module, with information on elements, tags
5 * and attributes.
6 * @note Even though this is technically XHTML 1.1, it is also used for
7 * regular HTML parsing. We are using modulization as a convenient
8 * way to represent the internals of HTMLDefinition, and our
9 * implementation is by no means conforming and does not directly
10 * use the normative DTDs or XML schemas.
11 * @note The public variables in a module should almost directly
12 * correspond to the variables in HTMLPurifier_HTMLDefinition.
13 * However, the prefix info carries no special meaning in these
14 * objects (include it anyway if that's the correspondence though).
15 * @todo Consider making some member functions protected
18 class HTMLPurifier_HTMLModule
21 // -- Overloadable ----------------------------------------------------
23 /**
24 * Short unique string identifier of the module.
25 * @type string
27 public $name;
29 /**
30 * Informally, a list of elements this module changes.
31 * Not used in any significant way.
32 * @type array
34 public $elements = array();
36 /**
37 * Associative array of element names to element definitions.
38 * Some definitions may be incomplete, to be merged in later
39 * with the full definition.
40 * @type array
42 public $info = array();
44 /**
45 * Associative array of content set names to content set additions.
46 * This is commonly used to, say, add an A element to the Inline
47 * content set. This corresponds to an internal variable $content_sets
48 * and NOT info_content_sets member variable of HTMLDefinition.
49 * @type array
51 public $content_sets = array();
53 /**
54 * Associative array of attribute collection names to attribute
55 * collection additions. More rarely used for adding attributes to
56 * the global collections. Example is the StyleAttribute module adding
57 * the style attribute to the Core. Corresponds to HTMLDefinition's
58 * attr_collections->info, since the object's data is only info,
59 * with extra behavior associated with it.
60 * @type array
62 public $attr_collections = array();
64 /**
65 * Associative array of deprecated tag name to HTMLPurifier_TagTransform.
66 * @type array
68 public $info_tag_transform = array();
70 /**
71 * List of HTMLPurifier_AttrTransform to be performed before validation.
72 * @type array
74 public $info_attr_transform_pre = array();
76 /**
77 * List of HTMLPurifier_AttrTransform to be performed after validation.
78 * @type array
80 public $info_attr_transform_post = array();
82 /**
83 * List of HTMLPurifier_Injector to be performed during well-formedness fixing.
84 * An injector will only be invoked if all of it's pre-requisites are met;
85 * if an injector fails setup, there will be no error; it will simply be
86 * silently disabled.
87 * @type array
89 public $info_injector = array();
91 /**
92 * Boolean flag that indicates whether or not getChildDef is implemented.
93 * For optimization reasons: may save a call to a function. Be sure
94 * to set it if you do implement getChildDef(), otherwise it will have
95 * no effect!
96 * @type bool
98 public $defines_child_def = false;
101 * Boolean flag whether or not this module is safe. If it is not safe, all
102 * of its members are unsafe. Modules are safe by default (this might be
103 * slightly dangerous, but it doesn't make much sense to force HTML Purifier,
104 * which is based off of safe HTML, to explicitly say, "This is safe," even
105 * though there are modules which are "unsafe")
107 * @type bool
108 * @note Previously, safety could be applied at an element level granularity.
109 * We've removed this ability, so in order to add "unsafe" elements
110 * or attributes, a dedicated module with this property set to false
111 * must be used.
113 public $safe = true;
116 * Retrieves a proper HTMLPurifier_ChildDef subclass based on
117 * content_model and content_model_type member variables of
118 * the HTMLPurifier_ElementDef class. There is a similar function
119 * in HTMLPurifier_HTMLDefinition.
120 * @param HTMLPurifier_ElementDef $def
121 * @return HTMLPurifier_ChildDef subclass
123 public function getChildDef($def)
125 return false;
128 // -- Convenience -----------------------------------------------------
131 * Convenience function that sets up a new element
132 * @param string $element Name of element to add
133 * @param string|bool $type What content set should element be registered to?
134 * Set as false to skip this step.
135 * @param string $contents Allowed children in form of:
136 * "$content_model_type: $content_model"
137 * @param array $attr_includes What attribute collections to register to
138 * element?
139 * @param array $attr What unique attributes does the element define?
140 * @see HTMLPurifier_ElementDef:: for in-depth descriptions of these parameters.
141 * @return HTMLPurifier_ElementDef Created element definition object, so you
142 * can set advanced parameters
144 public function addElement($element, $type, $contents, $attr_includes = array(), $attr = array())
146 $this->elements[] = $element;
147 // parse content_model
148 list($content_model_type, $content_model) = $this->parseContents($contents);
149 // merge in attribute inclusions
150 $this->mergeInAttrIncludes($attr, $attr_includes);
151 // add element to content sets
152 if ($type) {
153 $this->addElementToContentSet($element, $type);
155 // create element
156 $this->info[$element] = HTMLPurifier_ElementDef::create(
157 $content_model,
158 $content_model_type,
159 $attr
161 // literal object $contents means direct child manipulation
162 if (!is_string($contents)) {
163 $this->info[$element]->child = $contents;
165 return $this->info[$element];
169 * Convenience function that creates a totally blank, non-standalone
170 * element.
171 * @param string $element Name of element to create
172 * @return HTMLPurifier_ElementDef Created element
174 public function addBlankElement($element)
176 if (!isset($this->info[$element])) {
177 $this->elements[] = $element;
178 $this->info[$element] = new HTMLPurifier_ElementDef();
179 $this->info[$element]->standalone = false;
180 } else {
181 trigger_error("Definition for $element already exists in module, cannot redefine");
183 return $this->info[$element];
187 * Convenience function that registers an element to a content set
188 * @param string $element Element to register
189 * @param string $type Name content set (warning: case sensitive, usually upper-case
190 * first letter)
192 public function addElementToContentSet($element, $type)
194 if (!isset($this->content_sets[$type])) {
195 $this->content_sets[$type] = '';
196 } else {
197 $this->content_sets[$type] .= ' | ';
199 $this->content_sets[$type] .= $element;
203 * Convenience function that transforms single-string contents
204 * into separate content model and content model type
205 * @param string $contents Allowed children in form of:
206 * "$content_model_type: $content_model"
207 * @return array
208 * @note If contents is an object, an array of two nulls will be
209 * returned, and the callee needs to take the original $contents
210 * and use it directly.
212 public function parseContents($contents)
214 if (!is_string($contents)) {
215 return array(null, null);
216 } // defer
217 switch ($contents) {
218 // check for shorthand content model forms
219 case 'Empty':
220 return array('empty', '');
221 case 'Inline':
222 return array('optional', 'Inline | #PCDATA');
223 case 'Flow':
224 return array('optional', 'Flow | #PCDATA');
226 list($content_model_type, $content_model) = explode(':', $contents);
227 $content_model_type = strtolower(trim($content_model_type));
228 $content_model = trim($content_model);
229 return array($content_model_type, $content_model);
233 * Convenience function that merges a list of attribute includes into
234 * an attribute array.
235 * @param array $attr Reference to attr array to modify
236 * @param array $attr_includes Array of includes / string include to merge in
238 public function mergeInAttrIncludes(&$attr, $attr_includes)
240 if (!is_array($attr_includes)) {
241 if (empty($attr_includes)) {
242 $attr_includes = array();
243 } else {
244 $attr_includes = array($attr_includes);
247 $attr[0] = $attr_includes;
251 * Convenience function that generates a lookup table with boolean
252 * true as value.
253 * @param string $list List of values to turn into a lookup
254 * @note You can also pass an arbitrary number of arguments in
255 * place of the regular argument
256 * @return array array equivalent of list
258 public function makeLookup($list)
260 if (is_string($list)) {
261 $list = func_get_args();
263 $ret = array();
264 foreach ($list as $value) {
265 if (is_null($value)) {
266 continue;
268 $ret[$value] = true;
270 return $ret;
274 * Lazy load construction of the module after determining whether
275 * or not it's needed, and also when a finalized configuration object
276 * is available.
277 * @param HTMLPurifier_Config $config
279 public function setup($config)
284 // vim: et sw=4 sts=4