Implement working linkification, now, the real challenge is to get it to play nice...
[htmlpurifier.git] / library / HTMLPurifier / Strategy / MakeWellFormed.php
blobabd3cf67372a37ea5471a8c8b2e0da8d005e7ab1
1 <?php
3 require_once 'HTMLPurifier/Strategy.php';
4 require_once 'HTMLPurifier/HTMLDefinition.php';
5 require_once 'HTMLPurifier/Generator.php';
7 require_once 'HTMLPurifier/Injector/AutoParagraph.php';
8 require_once 'HTMLPurifier/Injector/Linkify.php';
10 HTMLPurifier_ConfigSchema::define(
11 'Core', 'AutoParagraph', false, 'bool', '
12 <p>
13 This directive will cause HTML Purifier to automatically paragraph text
14 in the document fragment root based on two newlines and block tags.
15 This directive has been available since 2.0.1.
16 </p>
20 HTMLPurifier_ConfigSchema::define(
21 'Core', 'AutoLinkify', false, 'bool', '
22 <p>
23 This directive will cause HTML Purifier to automatically linkify
24 text that looks like URLs. This directive has been available since
25 2.0.1.
26 </p>
30 /**
31 * Takes tokens makes them well-formed (balance end tags, etc.)
33 class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
36 function execute($tokens, $config, &$context) {
38 $definition = $config->getHTMLDefinition();
39 $generator = new HTMLPurifier_Generator();
41 $current_nesting = array();
42 $context->register('CurrentNesting', $current_nesting);
44 $tokens_index = null;
45 $context->register('InputIndex', $tokens_index);
46 $context->register('InputTokens', $tokens);
48 $result = array();
49 $context->register('OutputTokens', $result);
51 $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
53 $injector = array();
54 $injector_skip = array();
55 $injector_disabled = array();
57 if ($config->get('Core', 'AutoParagraph')) {
58 $injector[] = new HTMLPurifier_Injector_AutoParagraph();
59 $injector_skip[] = 0;
60 $injector_disabled[] = false;
63 if ($config->get('Core', 'AutoLinkify')) {
64 $injector[] = new HTMLPurifier_Injector_Linkify();
65 $injector_skip[] = 0;
66 $injector_disabled[] = false;
69 $current_injector = 0;
71 $context->register('Injector', $injector);
72 $context->register('InjectorSkip', $injector_skip);
73 $context->register('CurrentInjector', $current_injector);
75 for ($tokens_index = 0; isset($tokens[$tokens_index]); $tokens_index++) {
77 // if all goes well, this token will be passed through unharmed
78 $token = $tokens[$tokens_index];
80 foreach ($injector as $i => $x) {
81 if ($injector_skip[$i] > 0) {
82 $injector_skip[$i]--;
83 $injector_disabled[$i] = true;
84 } else {
85 $injector_disabled[$i] = false;
89 // quick-check: if it's not a tag, no need to process
90 if (empty( $token->is_tag )) {
92 if ($token->type === 'text') {
93 foreach ($injector as $i => $x) {
94 if (!$injector_disabled[$i]) {
95 $x->handleText($token, $config, $context);
97 if (is_array($token)) {
98 $current_injector = $i;
99 break;
104 $this->processToken($token, $config, $context);
105 continue;
108 $info = $definition->info[$token->name]->child;
110 // test if it claims to be a start tag but is empty
111 if ($info->type == 'empty' && $token->type == 'start') {
112 $result[] = new HTMLPurifier_Token_Empty($token->name, $token->attr);
113 continue;
116 // test if it claims to be empty but really is a start tag
117 if ($info->type != 'empty' && $token->type == 'empty' ) {
118 $result[] = new HTMLPurifier_Token_Start($token->name, $token->attr);
119 $result[] = new HTMLPurifier_Token_End($token->name);
120 continue;
123 // automatically insert empty tags
124 if ($token->type == 'empty') {
125 $result[] = $token;
126 continue;
129 // start tags have precedence, so they get passed through...
130 if ($token->type == 'start') {
132 // ...unless they also have to close their parent
133 if (!empty($current_nesting)) {
135 $parent = array_pop($current_nesting);
136 $parent_info = $definition->info[$parent->name];
138 // this can be replaced with a more general algorithm:
139 // if the token is not allowed by the parent, auto-close
140 // the parent
141 if (!isset($parent_info->child->elements[$token->name])) {
142 // close the parent, then append the token
143 $result[] = new HTMLPurifier_Token_End($parent->name);
144 $result[] = $token;
145 $current_nesting[] = $token;
146 continue;
149 $current_nesting[] = $parent; // undo the pop
152 foreach ($injector as $i => $x) {
153 if (!$injector_disabled[$i]) {
154 $x->handleStart($token, $config, $context);
156 if (is_array($token)) {
157 $current_injector = $i;
158 break;
162 $this->processToken($token, $config, $context);
163 continue;
166 // sanity check: we should be dealing with a closing tag
167 if ($token->type != 'end') continue;
169 // make sure that we have something open
170 if (empty($current_nesting)) {
171 if ($escape_invalid_tags) {
172 $result[] = new HTMLPurifier_Token_Text(
173 $generator->generateFromToken($token, $config, $context)
176 continue;
179 // first, check for the simplest case: everything closes neatly
180 $current_parent = array_pop($current_nesting);
181 if ($current_parent->name == $token->name) {
182 $result[] = $token;
183 continue;
186 // okay, so we're trying to close the wrong tag
188 // undo the pop previous pop
189 $current_nesting[] = $current_parent;
191 // scroll back the entire nest, trying to find our tag.
192 // (feature could be to specify how far you'd like to go)
193 $size = count($current_nesting);
194 // -2 because -1 is the last element, but we already checked that
195 $skipped_tags = false;
196 for ($i = $size - 2; $i >= 0; $i--) {
197 if ($current_nesting[$i]->name == $token->name) {
198 // current nesting is modified
199 $skipped_tags = array_splice($current_nesting, $i);
200 break;
204 // we still didn't find the tag, so remove
205 if ($skipped_tags === false) {
206 if ($escape_invalid_tags) {
207 $result[] = new HTMLPurifier_Token_Text(
208 $generator->generateFromToken($token, $config, $context)
211 continue;
214 // okay, we found it, close all the skipped tags
215 // note that skipped tags contains the element we need closed
216 $size = count($skipped_tags);
217 for ($i = $size - 1; $i >= 0; $i--) {
218 $result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
223 // we're at the end now, fix all still unclosed tags
224 // not using processToken() because at this point we don't
225 // care about current nesting
226 if (!empty($current_nesting)) {
227 $size = count($current_nesting);
228 for ($i = $size - 1; $i >= 0; $i--) {
229 $result[] =
230 new HTMLPurifier_Token_End($current_nesting[$i]->name);
234 $context->destroy('CurrentNesting');
235 $context->destroy('InputTokens');
236 $context->destroy('InputIndex');
237 $context->destroy('OutputTokens');
239 return $result;
242 function processToken($token, $config, &$context) {
243 if (is_array($token)) {
244 // the original token was overloaded by a formatter, time
245 // to some fancy acrobatics
247 $tokens =& $context->get('InputTokens');
248 $tokens_index =& $context->get('InputIndex');
249 // $tokens_index is decremented so that the entire set gets
250 // re-processed
251 array_splice($tokens, $tokens_index--, 1, $token);
253 // this will be a bit more complicated when we add more formatters
254 // we need to prevent the same formatter from running twice on it
255 $injector_skip =& $context->get('InjectorSkip');
256 $injector =& $context->get('Injector');
257 $current_injector =& $context->get('CurrentInjector');
259 if (isset($injector[$current_injector])) {
260 $injector_skip[$current_injector] = count($token);
263 } elseif ($token) {
264 // regular case
265 $result =& $context->get('OutputTokens');
266 $current_nesting =& $context->get('CurrentNesting');
267 $result[] = $token;
268 if ($token->type == 'start') {
269 $current_nesting[] = $token;
270 } elseif ($token->type == 'end') {
271 // theoretical: this isn't used because performing
272 // the calculations inline is more efficient, and
273 // end tokens currently do not cause a handler invocation
274 array_pop($current_nesting);