[3.1.1] Allow injectors to be specified by modules.
[htmlpurifier.git] / library / HTMLPurifier / Strategy / MakeWellFormed.php
blob1ca6271158477cf64b132105804e6e4aa4bd4a6e
1 <?php
3 /**
4 * Takes tokens makes them well-formed (balance end tags, etc.)
5 */
6 class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
9 /**
10 * Locally shared variable references
12 protected $inputTokens, $inputIndex, $outputTokens, $currentNesting,
13 $currentInjector, $injectors;
15 public function execute($tokens, $config, $context) {
17 $definition = $config->getHTMLDefinition();
19 // local variables
20 $result = array();
21 $generator = new HTMLPurifier_Generator($config, $context);
22 $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
23 $e = $context->get('ErrorCollector', true);
25 // member variables
26 $this->currentNesting = array();
27 $this->inputIndex = false;
28 $this->inputTokens =& $tokens;
29 $this->outputTokens =& $result;
31 // context variables
32 $context->register('CurrentNesting', $this->currentNesting);
33 $context->register('InputIndex', $this->inputIndex);
34 $context->register('InputTokens', $tokens);
36 // -- begin INJECTOR --
38 $this->injectors = array();
40 $injectors = $config->getBatch('AutoFormat');
41 $def_injectors = $definition->info_injector;
42 $custom_injectors = $injectors['Custom'];
43 unset($injectors['Custom']); // special case
44 foreach ($injectors as $injector => $b) {
45 $injector = "HTMLPurifier_Injector_$injector";
46 if (!$b) continue;
47 $this->injectors[] = new $injector;
49 foreach ($def_injectors as $injector) {
50 // assumed to be objects
51 $this->injectors[] = $injector;
53 foreach ($custom_injectors as $injector) {
54 if (is_string($injector)) {
55 $injector = "HTMLPurifier_Injector_$injector";
56 $injector = new $injector;
58 $this->injectors[] = $injector;
61 // array index of the injector that resulted in an array
62 // substitution. This enables processTokens() to know which
63 // injectors are affected by the added tokens and which are
64 // not (namely, the ones after the current injector are not
65 // affected)
66 $this->currentInjector = false;
68 // give the injectors references to the definition and context
69 // variables for performance reasons
70 foreach ($this->injectors as $i => $injector) {
71 $error = $injector->prepare($config, $context);
72 if (!$error) continue;
73 array_splice($this->injectors, $i, 1); // rm the injector
74 trigger_error("Cannot enable {$injector->name} injector because $error is not allowed", E_USER_WARNING);
77 // warning: most foreach loops follow the convention $i => $injector.
78 // Don't define these as loop-wide variables, please!
80 // -- end INJECTOR --
82 $token = false;
83 $context->register('CurrentToken', $token);
85 // isset is in loop because $tokens size changes during loop exec
86 for ($this->inputIndex = 0; isset($tokens[$this->inputIndex]); $this->inputIndex++) {
88 // if all goes well, this token will be passed through unharmed
89 $token = $tokens[$this->inputIndex];
91 //printTokens($tokens, $this->inputIndex);
93 foreach ($this->injectors as $injector) {
94 if ($injector->skip > 0) $injector->skip--;
97 // quick-check: if it's not a tag, no need to process
98 if (empty( $token->is_tag )) {
99 if ($token instanceof HTMLPurifier_Token_Text) {
100 // injector handler code; duplicated for performance reasons
101 foreach ($this->injectors as $i => $injector) {
102 if (!$injector->skip) $injector->handleText($token);
103 if (is_array($token)) {
104 $this->currentInjector = $i;
105 break;
109 $this->processToken($token, $config, $context);
110 continue;
113 $info = $definition->info[$token->name]->child;
115 // quick tag checks: anything that's *not* an end tag
116 $ok = false;
117 if ($info->type === 'empty' && $token instanceof HTMLPurifier_Token_Start) {
118 // test if it claims to be a start tag but is empty
119 $token = new HTMLPurifier_Token_Empty($token->name, $token->attr);
120 $ok = true;
121 } elseif ($info->type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) {
122 // claims to be empty but really is a start tag
123 $token = array(
124 new HTMLPurifier_Token_Start($token->name, $token->attr),
125 new HTMLPurifier_Token_End($token->name)
127 $ok = true;
128 } elseif ($token instanceof HTMLPurifier_Token_Empty) {
129 // real empty token
130 $ok = true;
131 } elseif ($token instanceof HTMLPurifier_Token_Start) {
132 // start tag
134 // ...unless they also have to close their parent
135 if (!empty($this->currentNesting)) {
137 $parent = array_pop($this->currentNesting);
138 $parent_info = $definition->info[$parent->name];
140 // this can be replaced with a more general algorithm:
141 // if the token is not allowed by the parent, auto-close
142 // the parent
143 if (!isset($parent_info->child->elements[$token->name])) {
144 if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
145 // close the parent, then re-loop to reprocess token
146 $result[] = new HTMLPurifier_Token_End($parent->name);
147 $this->inputIndex--;
148 continue;
151 $this->currentNesting[] = $parent; // undo the pop
153 $ok = true;
156 // injector handler code; duplicated for performance reasons
157 if ($ok) {
158 foreach ($this->injectors as $i => $injector) {
159 if (!$injector->skip) $injector->handleElement($token);
160 if (is_array($token)) {
161 $this->currentInjector = $i;
162 break;
165 $this->processToken($token, $config, $context);
166 continue;
169 // sanity check: we should be dealing with a closing tag
170 if (!$token instanceof HTMLPurifier_Token_End) continue;
172 // make sure that we have something open
173 if (empty($this->currentNesting)) {
174 if ($escape_invalid_tags) {
175 if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
176 $result[] = new HTMLPurifier_Token_Text(
177 $generator->generateFromToken($token)
179 } elseif ($e) {
180 $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
182 continue;
185 // first, check for the simplest case: everything closes neatly
186 $current_parent = array_pop($this->currentNesting);
187 if ($current_parent->name == $token->name) {
188 $result[] = $token;
189 foreach ($this->injectors as $i => $injector) {
190 $injector->notifyEnd($token);
192 continue;
195 // okay, so we're trying to close the wrong tag
197 // undo the pop previous pop
198 $this->currentNesting[] = $current_parent;
200 // scroll back the entire nest, trying to find our tag.
201 // (feature could be to specify how far you'd like to go)
202 $size = count($this->currentNesting);
203 // -2 because -1 is the last element, but we already checked that
204 $skipped_tags = false;
205 for ($i = $size - 2; $i >= 0; $i--) {
206 if ($this->currentNesting[$i]->name == $token->name) {
207 // current nesting is modified
208 $skipped_tags = array_splice($this->currentNesting, $i);
209 break;
213 // we still didn't find the tag, so remove
214 if ($skipped_tags === false) {
215 if ($escape_invalid_tags) {
216 $result[] = new HTMLPurifier_Token_Text(
217 $generator->generateFromToken($token)
219 if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
220 } elseif ($e) {
221 $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
223 continue;
226 // okay, we found it, close all the skipped tags
227 // note that skipped tags contains the element we need closed
228 for ($i = count($skipped_tags) - 1; $i >= 0; $i--) {
229 // please don't redefine $i!
230 if ($i && $e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
231 $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]);
233 $result[] = $new_token = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
234 foreach ($this->injectors as $injector) {
235 $injector->notifyEnd($new_token);
241 $context->destroy('CurrentNesting');
242 $context->destroy('InputTokens');
243 $context->destroy('InputIndex');
244 $context->destroy('CurrentToken');
246 // we're at the end now, fix all still unclosed tags (this is
247 // duplicated from the end of the loop with some slight modifications)
248 // not using $skipped_tags since it would invariably be all of them
249 if (!empty($this->currentNesting)) {
250 for ($i = count($this->currentNesting) - 1; $i >= 0; $i--) {
251 // please don't redefine $i!
252 if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) {
253 $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]);
255 $result[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
256 foreach ($this->injectors as $injector) {
257 $injector->notifyEnd($new_token);
262 unset($this->outputTokens, $this->injectors, $this->currentInjector,
263 $this->currentNesting, $this->inputTokens, $this->inputIndex);
265 return $result;
268 function processToken($token, $config, $context) {
269 if (is_array($token)) {
270 // the original token was overloaded by an injector, time
271 // to some fancy acrobatics
273 // $this->inputIndex is decremented so that the entire set gets
274 // re-processed
275 array_splice($this->inputTokens, $this->inputIndex--, 1, $token);
277 // adjust the injector skips based on the array substitution
278 if ($this->injectors) {
279 $offset = count($token);
280 for ($i = 0; $i <= $this->currentInjector; $i++) {
281 // because of the skip back, we need to add one more
282 // for uninitialized injectors. I'm not exactly
283 // sure why this is the case, but I think it has to
284 // do with the fact that we're decrementing skips
285 // before re-checking text
286 if (!$this->injectors[$i]->skip) $this->injectors[$i]->skip++;
287 $this->injectors[$i]->skip += $offset;
290 } elseif ($token) {
291 // regular case
292 $this->outputTokens[] = $token;
293 if ($token instanceof HTMLPurifier_Token_Start) {
294 $this->currentNesting[] = $token;
295 } elseif ($token instanceof HTMLPurifier_Token_End) {
296 array_pop($this->currentNesting); // not actually used