Implement %HTML.AllowedComments and %HTML.AllowedCommentsRegexp
[htmlpurifier.git] / library / HTMLPurifier / Strategy / RemoveForeignElements.php
blobbccaf14d3c53e653e8ad5bae64ba226bcfb711d1
1 <?php
3 /**
4 * Removes all unrecognized tags from the list of tokens.
6 * This strategy iterates through all the tokens and removes unrecognized
7 * tokens. If a token is not recognized but a TagTransform is defined for
8 * that element, the element will be transformed accordingly.
9 */
11 class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy
14 public function execute($tokens, $config, $context) {
15 $definition = $config->getHTMLDefinition();
16 $generator = new HTMLPurifier_Generator($config, $context);
17 $result = array();
19 $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');
20 $remove_invalid_img = $config->get('Core.RemoveInvalidImg');
22 // currently only used to determine if comments should be kept
23 $trusted = $config->get('HTML.Trusted');
24 $comment_lookup = $config->get('HTML.AllowedComments');
25 $comment_regexp = $config->get('HTML.AllowedCommentsRegexp');
26 $check_comments = $comment_lookup !== array() || $comment_regexp !== null;
28 $remove_script_contents = $config->get('Core.RemoveScriptContents');
29 $hidden_elements = $config->get('Core.HiddenElements');
31 // remove script contents compatibility
32 if ($remove_script_contents === true) {
33 $hidden_elements['script'] = true;
34 } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {
35 unset($hidden_elements['script']);
38 $attr_validator = new HTMLPurifier_AttrValidator();
40 // removes tokens until it reaches a closing tag with its value
41 $remove_until = false;
43 // converts comments into text tokens when this is equal to a tag name
44 $textify_comments = false;
46 $token = false;
47 $context->register('CurrentToken', $token);
49 $e = false;
50 if ($config->get('Core.CollectErrors')) {
51 $e =& $context->get('ErrorCollector');
54 foreach($tokens as $token) {
55 if ($remove_until) {
56 if (empty($token->is_tag) || $token->name !== $remove_until) {
57 continue;
60 if (!empty( $token->is_tag )) {
61 // DEFINITION CALL
63 // before any processing, try to transform the element
64 if (
65 isset($definition->info_tag_transform[$token->name])
66 ) {
67 $original_name = $token->name;
68 // there is a transformation for this tag
69 // DEFINITION CALL
70 $token = $definition->
71 info_tag_transform[$token->name]->
72 transform($token, $config, $context);
73 if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);
76 if (isset($definition->info[$token->name])) {
78 // mostly everything's good, but
79 // we need to make sure required attributes are in order
80 if (
81 ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&
82 $definition->info[$token->name]->required_attr &&
83 ($token->name != 'img' || $remove_invalid_img) // ensure config option still works
84 ) {
85 $attr_validator->validateToken($token, $config, $context);
86 $ok = true;
87 foreach ($definition->info[$token->name]->required_attr as $name) {
88 if (!isset($token->attr[$name])) {
89 $ok = false;
90 break;
93 if (!$ok) {
94 if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $name);
95 continue;
97 $token->armor['ValidateAttributes'] = true;
100 if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {
101 $textify_comments = $token->name;
102 } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {
103 $textify_comments = false;
106 } elseif ($escape_invalid_tags) {
107 // invalid tag, generate HTML representation and insert in
108 if ($e) $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');
109 $token = new HTMLPurifier_Token_Text(
110 $generator->generateFromToken($token)
112 } else {
113 // check if we need to destroy all of the tag's children
114 // CAN BE GENERICIZED
115 if (isset($hidden_elements[$token->name])) {
116 if ($token instanceof HTMLPurifier_Token_Start) {
117 $remove_until = $token->name;
118 } elseif ($token instanceof HTMLPurifier_Token_Empty) {
119 // do nothing: we're still looking
120 } else {
121 $remove_until = false;
123 if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');
124 } else {
125 if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');
127 continue;
129 } elseif ($token instanceof HTMLPurifier_Token_Comment) {
130 // textify comments in script tags when they are allowed
131 if ($textify_comments !== false) {
132 $data = $token->data;
133 $token = new HTMLPurifier_Token_Text($data);
134 } elseif ($trusted || $check_comments) {
135 // always cleanup comments
136 $trailing_hyphen = false;
137 if ($e) {
138 // perform check whether or not there's a trailing hyphen
139 if (substr($token->data, -1) == '-') {
140 $trailing_hyphen = true;
143 $token->data = rtrim($token->data, '-');
144 $found_double_hyphen = false;
145 while (strpos($token->data, '--') !== false) {
146 $found_double_hyphen = true;
147 $token->data = str_replace('--', '-', $token->data);
149 if ($trusted || !empty($comment_lookup[trim($token->data)]) || ($comment_regexp !== NULL && preg_match($comment_regexp, trim($token->data)))) {
150 // OK good
151 if ($e) {
152 if ($trailing_hyphen) {
153 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed');
155 if ($found_double_hyphen) {
156 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');
159 } else {
160 if ($e) {
161 $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
163 continue;
165 } else {
166 // strip comments
167 if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');
168 continue;
170 } elseif ($token instanceof HTMLPurifier_Token_Text) {
171 } else {
172 continue;
174 $result[] = $token;
176 if ($remove_until && $e) {
177 // we removed tokens until the end, throw error
178 $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);
181 $context->destroy('CurrentToken');
183 return $result;
188 // vim: et sw=4 sts=4