[3.1.2] Implement comments when %HTML.Trusted is on.
[htmlpurifier.git] / library / HTMLPurifier / Strategy / RemoveForeignElements.php
blob6a6929904191b251fbcd1c744bbcc4c33f28ba4c
1 <?php
3 /**
4 * Removes all unrecognized tags from the list of tokens.
5 *
6 * This strategy iterates through all the tokens and removes unrecognized
7 * tokens. If a token is not recognized but a TagTransform is defined for
8 * that element, the element will be transformed accordingly.
9 */
11 class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
14 public function execute($tokens, $config, $context) {
15 $definition = $config->getHTMLDefinition();
16 $generator = new HTMLPurifier_Generator($config, $context);
17 $result = array();
19 $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
20 $remove_invalid_img = $config->get('Core', 'RemoveInvalidImg');
22 // currently only used to determine if comments should be kept
23 $trusted = $config->get('HTML', 'Trusted');
25 $remove_script_contents = $config->get('Core', 'RemoveScriptContents');
26 $hidden_elements = $config->get('Core', 'HiddenElements');
28 // remove script contents compatibility
29 if ($remove_script_contents === true) {
30 $hidden_elements['script'] = true;
31 } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
32 unset($hidden_elements['script']);
35 $attr_validator = new HTMLPurifier_AttrValidator();
37 // removes tokens until it reaches a closing tag with its value
38 $remove_until = false;
40 // converts comments into text tokens when this is equal to a tag name
41 $textify_comments = false;
43 $token = false;
44 $context->register('CurrentToken', $token);
46 $e = false;
47 if ($config->get('Core', 'CollectErrors')) {
48 $e =& $context->get('ErrorCollector');
51 foreach($tokens as $token) {
52 if ($remove_until) {
53 if (empty($token->is_tag) || $token->name !== $remove_until) {
54 continue;
57 if (!empty( $token->is_tag )) {
58 // DEFINITION CALL
60 // before any processing, try to transform the element
61 if (
62 isset($definition->info_tag_transform[$token->name])
63 ) {
64 $original_name = $token->name;
65 // there is a transformation for this tag
66 // DEFINITION CALL
67 $token = $definition->
68 info_tag_transform[$token->name]->
69 transform($token, $config, $context);
70 if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
73 if (isset($definition->info[$token->name])) {
75 // mostly everything's good, but
76 // we need to make sure required attributes are in order
77 if (
78 ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
79 $definition->info[$token->name]->required_attr &&
80 ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
81 ) {
82 $attr_validator->validateToken($token, $config, $context);
83 $ok = true;
84 foreach ($definition->info[$token->name]->required_attr as $name) {
85 if (!isset($token->attr[$name])) {
86 $ok = false;
87 break;
90 if (!$ok) {
91 if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $name);
92 continue;
94 $token->armor['ValidateAttributes'] = true;
97 if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
98 $textify_comments = $token->name;
99 } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
100 $textify_comments = false;
103 } elseif ($escape_invalid_tags) {
104 // invalid tag, generate HTML representation and insert in
105 if ($e) $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
106 $token = new HTMLPurifier_Token_Text(
107 $generator->generateFromToken($token)
109 } else {
110 // check if we need to destroy all of the tag's children
111 // CAN BE GENERICIZED
112 if (isset($hidden_elements[$token->name])) {
113 if ($token instanceof HTMLPurifier_Token_Start) {
114 $remove_until = $token->name;
115 } elseif ($token instanceof HTMLPurifier_Token_Empty) {
116 // do nothing: we're still looking
117 } else {
118 $remove_until = false;
120 if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
121 } else {
122 if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
124 continue;
126 } elseif ($token instanceof HTMLPurifier_Token_Comment) {
127 // textify comments in script tags when they are allowed
128 if ($textify_comments !== false) {
129 $data = $token->data;
130 $token = new HTMLPurifier_Token_Text($data);
131 } elseif ($trusted) {
132 // keep, but perform comment cleaning
133 if ($e) {
134 // perform check whether or not there's a trailing hyphen
135 if (substr($token->data, -1) == '-') {
136 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
139 $token->data = rtrim($token->data, '-');
140 $found_double_hyphen = false;
141 while (strpos($token->data, '--') !== false) {
142 if ($e && !$found_double_hyphen) {
143 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
145 $found_double_hyphen = true; // prevent double-erroring
146 $token->data = str_replace('--', '-', $token->data);
148 } else {
149 // strip comments
150 if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
151 continue;
153 } elseif ($token instanceof HTMLPurifier_Token_Text) {
154 } else {
155 continue;
157 $result[] = $token;
159 if ($remove_until && $e) {
160 // we removed tokens until the end, throw error
161 $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
164 $context->destroy('CurrentToken');
166 return $result;