Make context errors more friendly; factor out disabled; fix broken test cases; update...
[htmlpurifier.git] / library / HTMLPurifier / Strategy / MakeWellFormed.php
blob3029f62e4bc7774fbd56d03bf45f5be221a4acc7
1 <?php
3 require_once 'HTMLPurifier/Strategy.php';
4 require_once 'HTMLPurifier/HTMLDefinition.php';
5 require_once 'HTMLPurifier/Generator.php';
7 require_once 'HTMLPurifier/Injector/AutoParagraph.php';
8 require_once 'HTMLPurifier/Injector/Linkify.php';
10 HTMLPurifier_ConfigSchema::define(
11 'Core', 'AutoParagraph', false, 'bool', '
12 <p>
13 This directive will cause HTML Purifier to automatically paragraph text
14 in the document fragment root based on two newlines and block tags.
15 This directive has been available since 2.0.1.
16 </p>
20 HTMLPurifier_ConfigSchema::define(
21 'Core', 'AutoLinkify', false, 'bool', '
22 <p>
23 This directive will cause HTML Purifier to automatically linkify
24 text that looks like URLs. This directive has been available since
25 2.0.1.
26 </p>
30 /**
31 * Takes tokens makes them well-formed (balance end tags, etc.)
33 class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
36 function execute($tokens, $config, &$context) {
38 $definition = $config->getHTMLDefinition();
39 $generator = new HTMLPurifier_Generator();
41 $current_nesting = array();
42 $context->register('CurrentNesting', $current_nesting);
44 $tokens_index = null;
45 $context->register('InputIndex', $tokens_index);
46 $context->register('InputTokens', $tokens);
48 $result = array();
49 $context->register('OutputTokens', $result);
51 $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
53 // -- begin INJECTOR --
54 // factor this stuff out to its own class
56 $injector = array();
57 $injector_skip = array();
59 if ($config->get('Core', 'AutoParagraph')) {
60 $injector[] = new HTMLPurifier_Injector_AutoParagraph();
61 // decrement happens first, so set to one so we start at zero
62 $injector_skip[] = 1;
65 if ($config->get('Core', 'AutoLinkify')) {
66 $injector[] = new HTMLPurifier_Injector_Linkify();
67 $injector_skip[] = 1;
70 // array index of the injector that resulted in an array
71 // substitution. This enables processTokens() to know which
72 // injectors are affected by the added tokens and which are
73 // not (namely, the ones after the current injector are not
74 // affected)
75 $current_injector = false;
77 $context->register('Injector', $injector);
78 $context->register('CurrentInjector', $current_injector);
80 // number of tokens to skip + 1
81 // before processing, this gets decremented: if it equals zero,
82 // it means the injector is active and is processing tokens, if
83 // it is greater than zero, then it is inactive, presumably having
84 // been the source of the tokens
85 $context->register('InjectorSkip', $injector_skip);
87 // -- end INJECTOR --
89 for ($tokens_index = 0; isset($tokens[$tokens_index]); $tokens_index++) {
91 // if all goes well, this token will be passed through unharmed
92 $token = $tokens[$tokens_index];
94 foreach ($injector as $i => $x) {
95 if ($injector_skip[$i] > 0) $injector_skip[$i]--;
98 // quick-check: if it's not a tag, no need to process
99 if (empty( $token->is_tag )) {
101 // duplicated with handleStart
102 if ($token->type === 'text') {
103 foreach ($injector as $i => $x) {
104 if (!$injector_skip[$i]) {
105 $x->handleText($token, $config, $context);
107 if (is_array($token)) {
108 $current_injector = $i;
109 break;
114 $this->processToken($token, $config, $context);
115 continue;
118 $info = $definition->info[$token->name]->child;
120 // test if it claims to be a start tag but is empty
121 if ($info->type == 'empty' && $token->type == 'start') {
122 $result[] = new HTMLPurifier_Token_Empty($token->name, $token->attr);
123 continue;
126 // test if it claims to be empty but really is a start tag
127 if ($info->type != 'empty' && $token->type == 'empty' ) {
128 $result[] = new HTMLPurifier_Token_Start($token->name, $token->attr);
129 $result[] = new HTMLPurifier_Token_End($token->name);
130 continue;
133 // automatically insert empty tags
134 if ($token->type == 'empty') {
135 $result[] = $token;
136 continue;
139 // start tags have precedence, so they get passed through...
140 if ($token->type == 'start') {
142 // ...unless they also have to close their parent
143 if (!empty($current_nesting)) {
145 $parent = array_pop($current_nesting);
146 $parent_info = $definition->info[$parent->name];
148 // this can be replaced with a more general algorithm:
149 // if the token is not allowed by the parent, auto-close
150 // the parent
151 if (!isset($parent_info->child->elements[$token->name])) {
152 // close the parent, then append the token
153 $result[] = new HTMLPurifier_Token_End($parent->name);
154 $result[] = $token;
155 $current_nesting[] = $token;
156 continue;
159 $current_nesting[] = $parent; // undo the pop
162 // injectors
163 foreach ($injector as $i => $x) {
164 if (!$injector_skip[$i]) {
165 $x->handleStart($token, $config, $context);
167 if (is_array($token)) {
168 $current_injector = $i;
169 break;
173 $this->processToken($token, $config, $context);
174 continue;
177 // sanity check: we should be dealing with a closing tag
178 if ($token->type != 'end') continue;
180 // make sure that we have something open
181 if (empty($current_nesting)) {
182 if ($escape_invalid_tags) {
183 $result[] = new HTMLPurifier_Token_Text(
184 $generator->generateFromToken($token, $config, $context)
187 continue;
190 // first, check for the simplest case: everything closes neatly
191 $current_parent = array_pop($current_nesting);
192 if ($current_parent->name == $token->name) {
193 $result[] = $token;
194 continue;
197 // okay, so we're trying to close the wrong tag
199 // undo the pop previous pop
200 $current_nesting[] = $current_parent;
202 // scroll back the entire nest, trying to find our tag.
203 // (feature could be to specify how far you'd like to go)
204 $size = count($current_nesting);
205 // -2 because -1 is the last element, but we already checked that
206 $skipped_tags = false;
207 for ($i = $size - 2; $i >= 0; $i--) {
208 if ($current_nesting[$i]->name == $token->name) {
209 // current nesting is modified
210 $skipped_tags = array_splice($current_nesting, $i);
211 break;
215 // we still didn't find the tag, so remove
216 if ($skipped_tags === false) {
217 if ($escape_invalid_tags) {
218 $result[] = new HTMLPurifier_Token_Text(
219 $generator->generateFromToken($token, $config, $context)
222 continue;
225 // okay, we found it, close all the skipped tags
226 // note that skipped tags contains the element we need closed
227 $size = count($skipped_tags);
228 for ($i = $size - 1; $i >= 0; $i--) {
229 $result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
234 // we're at the end now, fix all still unclosed tags
235 // not using processToken() because at this point we don't
236 // care about current nesting
237 if (!empty($current_nesting)) {
238 $size = count($current_nesting);
239 for ($i = $size - 1; $i >= 0; $i--) {
240 $result[] =
241 new HTMLPurifier_Token_End($current_nesting[$i]->name);
245 $context->destroy('CurrentNesting');
246 $context->destroy('InputTokens');
247 $context->destroy('InputIndex');
248 $context->destroy('OutputTokens');
250 $context->destroy('Injector');
251 $context->destroy('CurrentInjector');
252 $context->destroy('InjectorSkip');
254 return $result;
257 function processToken($token, $config, &$context) {
258 if (is_array($token)) {
259 // the original token was overloaded by an injector, time
260 // to some fancy acrobatics
262 $tokens =& $context->get('InputTokens');
263 $tokens_index =& $context->get('InputIndex');
264 // $tokens_index is decremented so that the entire set gets
265 // re-processed
266 array_splice($tokens, $tokens_index--, 1, $token);
268 // adjust the injector skips based on the array substitution
269 $injector_skip =& $context->get('InjectorSkip');
270 $current_injector =& $context->get('CurrentInjector');
272 $offset = count($token) + 1;
273 for ($i = 0; $i <= $current_injector; $i++) {
274 $injector_skip[$i] += $offset;
277 } elseif ($token) {
278 // regular case
279 $result =& $context->get('OutputTokens');
280 $current_nesting =& $context->get('CurrentNesting');
281 $result[] = $token;
282 if ($token->type == 'start') {
283 $current_nesting[] = $token;
284 } elseif ($token->type == 'end') {
285 // theoretical: this code doesn't get run because performing
286 // the calculations inline is more efficient, and
287 // end tokens (currently) do not cause a handler invocation
288 array_pop($current_nesting);