3 require_once 'HTMLPurifier/Strategy.php';
4 require_once 'HTMLPurifier/HTMLDefinition.php';
5 require_once 'HTMLPurifier/Generator.php';
7 require_once 'HTMLPurifier/Injector/AutoParagraph.php';
8 require_once 'HTMLPurifier/Injector/Linkify.php';
10 HTMLPurifier_ConfigSchema
::define(
11 'Core', 'AutoParagraph', false, 'bool', '
13 This directive will cause HTML Purifier to automatically paragraph text
14 in the document fragment root based on two newlines and block tags.
15 This directive has been available since 2.0.1.
20 HTMLPurifier_ConfigSchema
::define(
21 'Core', 'AutoLinkify', false, 'bool', '
23 This directive will cause HTML Purifier to automatically linkify
24 text that looks like URLs. This directive has been available since
31 * Takes tokens makes them well-formed (balance end tags, etc.)
33 class HTMLPurifier_Strategy_MakeWellFormed
extends HTMLPurifier_Strategy
36 function execute($tokens, $config, &$context) {
38 $definition = $config->getHTMLDefinition();
39 $generator = new HTMLPurifier_Generator();
41 $current_nesting = array();
42 $context->register('CurrentNesting', $current_nesting);
45 $context->register('InputIndex', $tokens_index);
46 $context->register('InputTokens', $tokens);
49 $context->register('OutputTokens', $result);
51 $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
53 // -- begin INJECTOR --
54 // factor this stuff out to its own class
57 $injector_skip = array();
59 if ($config->get('Core', 'AutoParagraph')) {
60 $injector[] = new HTMLPurifier_Injector_AutoParagraph();
61 // decrement happens first, so set to one so we start at zero
65 if ($config->get('Core', 'AutoLinkify')) {
66 $injector[] = new HTMLPurifier_Injector_Linkify();
70 // array index of the injector that resulted in an array
71 // substitution. This enables processTokens() to know which
72 // injectors are affected by the added tokens and which are
73 // not (namely, the ones after the current injector are not
75 $current_injector = false;
77 $context->register('Injector', $injector);
78 $context->register('CurrentInjector', $current_injector);
80 // number of tokens to skip + 1
81 // before processing, this gets decremented: if it equals zero,
82 // it means the injector is active and is processing tokens, if
83 // it is greater than zero, then it is inactive, presumably having
84 // been the source of the tokens
85 $context->register('InjectorSkip', $injector_skip);
89 for ($tokens_index = 0; isset($tokens[$tokens_index]); $tokens_index++
) {
91 // if all goes well, this token will be passed through unharmed
92 $token = $tokens[$tokens_index];
94 foreach ($injector as $i => $x) {
95 if ($injector_skip[$i] > 0) $injector_skip[$i]--;
98 // quick-check: if it's not a tag, no need to process
99 if (empty( $token->is_tag
)) {
101 // duplicated with handleStart
102 if ($token->type
=== 'text') {
103 foreach ($injector as $i => $x) {
104 if (!$injector_skip[$i]) {
105 $x->handleText($token, $config, $context);
107 if (is_array($token)) {
108 $current_injector = $i;
114 $this->processToken($token, $config, $context);
118 $info = $definition->info
[$token->name
]->child
;
120 // test if it claims to be a start tag but is empty
121 if ($info->type
== 'empty' && $token->type
== 'start') {
122 $result[] = new HTMLPurifier_Token_Empty($token->name
, $token->attr
);
126 // test if it claims to be empty but really is a start tag
127 if ($info->type
!= 'empty' && $token->type
== 'empty' ) {
128 $result[] = new HTMLPurifier_Token_Start($token->name
, $token->attr
);
129 $result[] = new HTMLPurifier_Token_End($token->name
);
133 // automatically insert empty tags
134 if ($token->type
== 'empty') {
139 // start tags have precedence, so they get passed through...
140 if ($token->type
== 'start') {
142 // ...unless they also have to close their parent
143 if (!empty($current_nesting)) {
145 $parent = array_pop($current_nesting);
146 $parent_info = $definition->info
[$parent->name
];
148 // this can be replaced with a more general algorithm:
149 // if the token is not allowed by the parent, auto-close
151 if (!isset($parent_info->child
->elements
[$token->name
])) {
152 // close the parent, then append the token
153 $result[] = new HTMLPurifier_Token_End($parent->name
);
155 $current_nesting[] = $token;
159 $current_nesting[] = $parent; // undo the pop
163 foreach ($injector as $i => $x) {
164 if (!$injector_skip[$i]) {
165 $x->handleStart($token, $config, $context);
167 if (is_array($token)) {
168 $current_injector = $i;
173 $this->processToken($token, $config, $context);
177 // sanity check: we should be dealing with a closing tag
178 if ($token->type
!= 'end') continue;
180 // make sure that we have something open
181 if (empty($current_nesting)) {
182 if ($escape_invalid_tags) {
183 $result[] = new HTMLPurifier_Token_Text(
184 $generator->generateFromToken($token, $config, $context)
190 // first, check for the simplest case: everything closes neatly
191 $current_parent = array_pop($current_nesting);
192 if ($current_parent->name
== $token->name
) {
197 // okay, so we're trying to close the wrong tag
199 // undo the pop previous pop
200 $current_nesting[] = $current_parent;
202 // scroll back the entire nest, trying to find our tag.
203 // (feature could be to specify how far you'd like to go)
204 $size = count($current_nesting);
205 // -2 because -1 is the last element, but we already checked that
206 $skipped_tags = false;
207 for ($i = $size - 2; $i >= 0; $i--) {
208 if ($current_nesting[$i]->name
== $token->name
) {
209 // current nesting is modified
210 $skipped_tags = array_splice($current_nesting, $i);
215 // we still didn't find the tag, so remove
216 if ($skipped_tags === false) {
217 if ($escape_invalid_tags) {
218 $result[] = new HTMLPurifier_Token_Text(
219 $generator->generateFromToken($token, $config, $context)
225 // okay, we found it, close all the skipped tags
226 // note that skipped tags contains the element we need closed
227 $size = count($skipped_tags);
228 for ($i = $size - 1; $i >= 0; $i--) {
229 $result[] = new HTMLPurifier_Token_End($skipped_tags[$i]->name
);
234 // we're at the end now, fix all still unclosed tags
235 // not using processToken() because at this point we don't
236 // care about current nesting
237 if (!empty($current_nesting)) {
238 $size = count($current_nesting);
239 for ($i = $size - 1; $i >= 0; $i--) {
241 new HTMLPurifier_Token_End($current_nesting[$i]->name
);
245 $context->destroy('CurrentNesting');
246 $context->destroy('InputTokens');
247 $context->destroy('InputIndex');
248 $context->destroy('OutputTokens');
250 $context->destroy('Injector');
251 $context->destroy('CurrentInjector');
252 $context->destroy('InjectorSkip');
257 function processToken($token, $config, &$context) {
258 if (is_array($token)) {
259 // the original token was overloaded by an injector, time
260 // to some fancy acrobatics
262 $tokens =& $context->get('InputTokens');
263 $tokens_index =& $context->get('InputIndex');
264 // $tokens_index is decremented so that the entire set gets
266 array_splice($tokens, $tokens_index--, 1, $token);
268 // adjust the injector skips based on the array substitution
269 $injector_skip =& $context->get('InjectorSkip');
270 $current_injector =& $context->get('CurrentInjector');
272 $offset = count($token) +
1;
273 for ($i = 0; $i <= $current_injector; $i++
) {
274 $injector_skip[$i] +
= $offset;
279 $result =& $context->get('OutputTokens');
280 $current_nesting =& $context->get('CurrentNesting');
282 if ($token->type
== 'start') {
283 $current_nesting[] = $token;
284 } elseif ($token->type
== 'end') {
285 // theoretical: this code doesn't get run because performing
286 // the calculations inline is more efficient, and
287 // end tokens (currently) do not cause a handler invocation
288 array_pop($current_nesting);