Fix problem where stacked AttrTransforms clobber each other.
[htmlpurifier.git] / library / HTMLPurifier / HTMLModule / Tidy.php
blob21783f18eb82bf9de772ce2d0d8f807cba93944c
1 <?php
3 /**
4 * Abstract class for a set of proprietary modules that clean up (tidy)
5 * poorly written HTML.
6 * @todo Figure out how to protect some of these methods/properties
7 */
8 class HTMLPurifier_HTMLModule_Tidy extends HTMLPurifier_HTMLModule
11 /**
12 * List of supported levels. Index zero is a special case "no fixes"
13 * level.
15 public $levels = array(0 => 'none', 'light', 'medium', 'heavy');
17 /**
18 * Default level to place all fixes in. Disabled by default
20 public $defaultLevel = null;
22 /**
23 * Lists of fixes used by getFixesForLevel(). Format is:
24 * HTMLModule_Tidy->fixesForLevel[$level] = array('fix-1', 'fix-2');
26 public $fixesForLevel = array(
27 'light' => array(),
28 'medium' => array(),
29 'heavy' => array()
32 /**
33 * Lazy load constructs the module by determining the necessary
34 * fixes to create and then delegating to the populate() function.
35 * @todo Wildcard matching and error reporting when an added or
36 * subtracted fix has no effect.
38 public function setup($config) {
40 // create fixes, initialize fixesForLevel
41 $fixes = $this->makeFixes();
42 $this->makeFixesForLevel($fixes);
44 // figure out which fixes to use
45 $level = $config->get('HTML.TidyLevel');
46 $fixes_lookup = $this->getFixesForLevel($level);
48 // get custom fix declarations: these need namespace processing
49 $add_fixes = $config->get('HTML.TidyAdd');
50 $remove_fixes = $config->get('HTML.TidyRemove');
52 foreach ($fixes as $name => $fix) {
53 // needs to be refactored a little to implement globbing
54 if (
55 isset($remove_fixes[$name]) ||
56 (!isset($add_fixes[$name]) && !isset($fixes_lookup[$name]))
57 ) {
58 unset($fixes[$name]);
62 // populate this module with necessary fixes
63 $this->populate($fixes);
67 /**
68 * Retrieves all fixes per a level, returning fixes for that specific
69 * level as well as all levels below it.
70 * @param $level String level identifier, see $levels for valid values
71 * @return Lookup up table of fixes
73 public function getFixesForLevel($level) {
74 if ($level == $this->levels[0]) {
75 return array();
77 $activated_levels = array();
78 for ($i = 1, $c = count($this->levels); $i < $c; $i++) {
79 $activated_levels[] = $this->levels[$i];
80 if ($this->levels[$i] == $level) break;
82 if ($i == $c) {
83 trigger_error(
84 'Tidy level ' . htmlspecialchars($level) . ' not recognized',
85 E_USER_WARNING
87 return array();
89 $ret = array();
90 foreach ($activated_levels as $level) {
91 foreach ($this->fixesForLevel[$level] as $fix) {
92 $ret[$fix] = true;
95 return $ret;
98 /**
99 * Dynamically populates the $fixesForLevel member variable using
100 * the fixes array. It may be custom overloaded, used in conjunction
101 * with $defaultLevel, or not used at all.
103 public function makeFixesForLevel($fixes) {
104 if (!isset($this->defaultLevel)) return;
105 if (!isset($this->fixesForLevel[$this->defaultLevel])) {
106 trigger_error(
107 'Default level ' . $this->defaultLevel . ' does not exist',
108 E_USER_ERROR
110 return;
112 $this->fixesForLevel[$this->defaultLevel] = array_keys($fixes);
116 * Populates the module with transforms and other special-case code
117 * based on a list of fixes passed to it
118 * @param $lookup Lookup table of fixes to activate
120 public function populate($fixes) {
121 foreach ($fixes as $name => $fix) {
122 // determine what the fix is for
123 list($type, $params) = $this->getFixType($name);
124 switch ($type) {
125 case 'attr_transform_pre':
126 case 'attr_transform_post':
127 $attr = $params['attr'];
128 if (isset($params['element'])) {
129 $element = $params['element'];
130 if (empty($this->info[$element])) {
131 $e = $this->addBlankElement($element);
132 } else {
133 $e = $this->info[$element];
135 } else {
136 $type = "info_$type";
137 $e = $this;
139 // PHP does some weird parsing when I do
140 // $e->$type[$attr], so I have to assign a ref.
141 $f =& $e->$type;
142 $f[$attr] = $fix;
143 break;
144 case 'tag_transform':
145 $this->info_tag_transform[$params['element']] = $fix;
146 break;
147 case 'child':
148 case 'content_model_type':
149 $element = $params['element'];
150 if (empty($this->info[$element])) {
151 $e = $this->addBlankElement($element);
152 } else {
153 $e = $this->info[$element];
155 $e->$type = $fix;
156 break;
157 default:
158 trigger_error("Fix type $type not supported", E_USER_ERROR);
159 break;
165 * Parses a fix name and determines what kind of fix it is, as well
166 * as other information defined by the fix
167 * @param $name String name of fix
168 * @return array(string $fix_type, array $fix_parameters)
169 * @note $fix_parameters is type dependant, see populate() for usage
170 * of these parameters
172 public function getFixType($name) {
173 // parse it
174 $property = $attr = null;
175 if (strpos($name, '#') !== false) list($name, $property) = explode('#', $name);
176 if (strpos($name, '@') !== false) list($name, $attr) = explode('@', $name);
178 // figure out the parameters
179 $params = array();
180 if ($name !== '') $params['element'] = $name;
181 if (!is_null($attr)) $params['attr'] = $attr;
183 // special case: attribute transform
184 if (!is_null($attr)) {
185 if (is_null($property)) $property = 'pre';
186 $type = 'attr_transform_' . $property;
187 return array($type, $params);
190 // special case: tag transform
191 if (is_null($property)) {
192 return array('tag_transform', $params);
195 return array($property, $params);
200 * Defines all fixes the module will perform in a compact
201 * associative array of fix name to fix implementation.
203 public function makeFixes() {}
207 // vim: et sw=4 sts=4