Release 2.0.1, merged in 1181 to HEAD.
[htmlpurifier.git] / library / HTMLPurifier / HTMLModule / Tidy.php
blob411fd47bfef48a6b0ce7a3c7270360918925ca68
1 <?php
3 require_once 'HTMLPurifier/HTMLModule.php';
5 HTMLPurifier_ConfigSchema::define(
6 'HTML', 'TidyLevel', 'medium', 'string', '
7 <p>General level of cleanliness the Tidy module should enforce.
8 There are four allowed values:</p>
9 <dl>
10 <dt>none</dt>
11 <dd>No extra tidying should be done</dd>
12 <dt>light</dt>
13 <dd>Only fix elements that would be discarded otherwise due to
14 lack of support in doctype</dd>
15 <dt>medium</dt>
16 <dd>Enforce best practices</dd>
17 <dt>heavy</dt>
18 <dd>Transform all deprecated elements and attributes to standards
19 compliant equivalents</dd>
20 </dl>
21 <p>This directive has been available since 2.0.0</p>
22 ' );
23 HTMLPurifier_ConfigSchema::defineAllowedValues(
24 'HTML', 'TidyLevel', array('none', 'light', 'medium', 'heavy')
27 HTMLPurifier_ConfigSchema::define(
28 'HTML', 'TidyAdd', array(), 'lookup', '
29 Fixes to add to the default set of Tidy fixes as per your level. This
30 directive has been available since 2.0.0.
31 ' );
33 HTMLPurifier_ConfigSchema::define(
34 'HTML', 'TidyRemove', array(), 'lookup', '
35 Fixes to remove from the default set of Tidy fixes as per your level. This
36 directive has been available since 2.0.0.
37 ' );
39 /**
40 * Abstract class for a set of proprietary modules that clean up (tidy)
41 * poorly written HTML.
43 class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
46 /**
47 * List of supported levels. Index zero is a special case "no fixes"
48 * level.
50 var $levels = array(0 => 'none', 'light', 'medium', 'heavy');
52 /**
53 * Default level to place all fixes in. Disabled by default
55 var $defaultLevel = null;
57 /**
58 * Lists of fixes used by getFixesForLevel(). Format is:
59 * HTMLModule_Tidy->fixesForLevel[$level] = array('fix-1', 'fix-2');
61 var $fixesForLevel = array(
62 'light' => array(),
63 'medium' => array(),
64 'heavy' => array()
67 /**
68 * Lazy load constructs the module by determining the necessary
69 * fixes to create and then delegating to the populate() function.
70 * @todo Wildcard matching and error reporting when an added or
71 * subtracted fix has no effect.
73 function construct($config) {
75 // create fixes, initialize fixesForLevel
76 $fixes = $this->makeFixes();
77 $this->makeFixesForLevel($fixes);
79 // figure out which fixes to use
80 $level = $config->get('HTML', 'TidyLevel');
81 $fixes_lookup = $this->getFixesForLevel($level);
83 // get custom fix declarations: these need namespace processing
84 $add_fixes = $config->get('HTML', 'TidyAdd');
85 $remove_fixes = $config->get('HTML', 'TidyRemove');
87 foreach ($fixes as $name => $fix) {
88 // needs to be refactored a little to implement globbing
89 if (
90 isset($remove_fixes[$name]) ||
91 (!isset($add_fixes[$name]) && !isset($fixes_lookup[$name]))
92 ) {
93 unset($fixes[$name]);
97 // populate this module with necessary fixes
98 $this->populate($fixes);
103 * Retrieves all fixes per a level, returning fixes for that specific
104 * level as well as all levels below it.
105 * @param $level String level identifier, see $levels for valid values
106 * @return Lookup up table of fixes
108 function getFixesForLevel($level) {
109 if ($level == $this->levels[0]) {
110 return array();
112 $activated_levels = array();
113 for ($i = 1, $c = count($this->levels); $i < $c; $i++) {
114 $activated_levels[] = $this->levels[$i];
115 if ($this->levels[$i] == $level) break;
117 if ($i == $c) {
118 trigger_error(
119 'Tidy level ' . htmlspecialchars($level) . ' not recognized',
120 E_USER_WARNING
122 return array();
124 $ret = array();
125 foreach ($activated_levels as $level) {
126 foreach ($this->fixesForLevel[$level] as $fix) {
127 $ret[$fix] = true;
130 return $ret;
134 * Dynamically populates the $fixesForLevel member variable using
135 * the fixes array. It may be custom overloaded, used in conjunction
136 * with $defaultLevel, or not used at all.
138 function makeFixesForLevel($fixes) {
139 if (!isset($this->defaultLevel)) return;
140 if (!isset($this->fixesForLevel[$this->defaultLevel])) {
141 trigger_error(
142 'Default level ' . $this->defaultLevel . ' does not exist',
143 E_USER_ERROR
145 return;
147 $this->fixesForLevel[$this->defaultLevel] = array_keys($fixes);
151 * Populates the module with transforms and other special-case code
152 * based on a list of fixes passed to it
153 * @param $lookup Lookup table of fixes to activate
155 function populate($fixes) {
156 foreach ($fixes as $name => $fix) {
157 // determine what the fix is for
158 list($type, $params) = $this->getFixType($name);
159 switch ($type) {
160 case 'attr_transform_pre':
161 case 'attr_transform_post':
162 $attr = $params['attr'];
163 if (isset($params['element'])) {
164 $element = $params['element'];
165 if (empty($this->info[$element])) {
166 $e =& $this->addBlankElement($element);
167 } else {
168 $e =& $this->info[$element];
170 } else {
171 $type = "info_$type";
172 $e =& $this;
174 $f =& $e->$type;
175 $f[$attr] = $fix;
176 break;
177 case 'tag_transform':
178 $this->info_tag_transform[$params['element']] = $fix;
179 break;
180 case 'child':
181 case 'content_model_type':
182 $element = $params['element'];
183 if (empty($this->info[$element])) {
184 $e =& $this->addBlankElement($element);
185 } else {
186 $e =& $this->info[$element];
188 $e->$type = $fix;
189 break;
190 default:
191 trigger_error("Fix type $type not supported", E_USER_ERROR);
192 break;
198 * Parses a fix name and determines what kind of fix it is, as well
199 * as other information defined by the fix
200 * @param $name String name of fix
201 * @return array(string $fix_type, array $fix_parameters)
202 * @note $fix_parameters is type dependant, see populate() for usage
203 * of these parameters
205 function getFixType($name) {
206 // parse it
207 $property = $attr = null;
208 if (strpos($name, '#') !== false) list($name, $property) = explode('#', $name);
209 if (strpos($name, '@') !== false) list($name, $attr) = explode('@', $name);
211 // figure out the parameters
212 $params = array();
213 if ($name !== '') $params['element'] = $name;
214 if (!is_null($attr)) $params['attr'] = $attr;
216 // special case: attribute transform
217 if (!is_null($attr)) {
218 if (is_null($property)) $property = 'pre';
219 $type = 'attr_transform_' . $property;
220 return array($type, $params);
223 // special case: tag transform
224 if (is_null($property)) {
225 return array('tag_transform', $params);
228 return array($property, $params);
233 * Defines all fixes the module will perform in a compact
234 * associative array of fix name to fix implementation.
235 * @abstract
237 function makeFixes() {}