Release Phorum module 4.0.0.
[htmlpurifier.git] / plugins / phorum / htmlpurifier.php
blob6f74fc8c98ecf776c0e6f79bbf48003d82c6b40a
1 <?php
3 /**
4 * HTML Purifier Phorum Mod. Filter your HTML the Standards-Compliant Way!
6 * This Phorum mod enables users to post raw HTML into Phorum. But never
7 * fear: with the help of HTML Purifier, this HTML will be beat into
8 * de-XSSed and standards-compliant form, safe for general consumption.
9 * It is not recommended, but possible to run this mod in parallel
10 * with other formatters (in short, please DISABLE the BBcode mod).
12 * For help migrating from your previous markup language to pure HTML
13 * please check the migrate.bbcode.php file.
15 * If you'd like to use this with a WYSIWYG editor, make sure that
16 * editor sets $PHORUM['mod_htmlpurifier']['wysiwyg'] to true. Otherwise,
17 * administrators who need to edit other people's comments may be at
18 * risk for some nasty attacks.
20 * Tested with Phorum 5.2.11.
23 // Note: Cache data is base64 encoded because Phorum insists on flinging
24 // to the user and expecting it to come back unharmed, newlines and
25 // all, which ain't happening. It's slower, it takes up more space, but
26 // at least it won't get mutilated
28 /**
29 * Purifies a data array
31 function phorum_htmlpurifier_format($data)
33 $PHORUM = $GLOBALS["PHORUM"];
35 $purifier =& HTMLPurifier::getInstance();
36 $cache_serial = $PHORUM['mod_htmlpurifier']['body_cache_serial'];
38 foreach($data as $message_id => $message){
39 if(isset($message['body'])) {
41 if ($message_id) {
42 // we're dealing with a real message, not a fake, so
43 // there a number of shortcuts that can be taken
45 if (isset($message['meta']['htmlpurifier_light'])) {
46 // format hook was called outside of Phorum's normal
47 // functions, do the abridged purification
48 $data[$message_id]['body'] = $purifier->purify($message['body']);
49 continue;
52 if (!empty($PHORUM['args']['purge'])) {
53 // purge the cache, must be below the following if
54 unset($message['meta']['body_cache']);
57 if (
58 isset($message['meta']['body_cache']) &&
59 isset($message['meta']['body_cache_serial']) &&
60 $message['meta']['body_cache_serial'] == $cache_serial
61 ) {
62 // cached version is present, bail out early
63 $data[$message_id]['body'] = base64_decode($message['meta']['body_cache']);
64 continue;
68 // migration might edit this array, that's why it's defined
69 // so early
70 $updated_message = array();
72 // create the $body variable
73 if (
74 $message_id && // message must be real to migrate
75 !isset($message['meta']['body_cache_serial'])
76 ) {
77 // perform migration
78 $fake_data = array();
79 list($signature, $edit_message) = phorum_htmlpurifier_remove_sig_and_editmessage($message);
80 $fake_data[$message_id] = $message;
81 $fake_data = phorum_htmlpurifier_migrate($fake_data);
82 $body = $fake_data[$message_id]['body'];
83 $body = str_replace("<phorum break>\n", "\n", $body);
84 $updated_message['body'] = $body; // save it in
85 $body .= $signature . $edit_message; // add it back in
86 } else {
87 // reverse Phorum's pre-processing
88 $body = $message['body'];
89 // order is important
90 $body = str_replace("<phorum break>\n", "\n", $body);
91 $body = str_replace(array('&lt;','&gt;','&amp;', '&quot;'), array('<','>','&','"'), $body);
92 if (!$message_id && defined('PHORUM_CONTROL_CENTER')) {
93 // we're in control.php, so it was double-escaped
94 $body = str_replace(array('&lt;','&gt;','&amp;', '&quot;'), array('<','>','&','"'), $body);
98 $body = $purifier->purify($body);
100 // dynamically update the cache (MUST BE DONE HERE!)
101 // this is inefficient because it's one db call per
102 // cache miss, but once the cache is in place things are
103 // a lot zippier.
105 if ($message_id) { // make sure it's not a fake id
106 $updated_message['meta'] = $message['meta'];
107 $updated_message['meta']['body_cache'] = base64_encode($body);
108 $updated_message['meta']['body_cache_serial'] = $cache_serial;
109 phorum_db_update_message($message_id, $updated_message);
112 // must not get overloaded until after we cache it, otherwise
113 // we'll inadvertently change the original text
114 $data[$message_id]['body'] = $body;
119 return $data;
122 // -----------------------------------------------------------------------
123 // This is fragile code, copied from read.php:596 (Phorum 5.2.6). Please
124 // keep this code in-sync with Phorum
127 * Generates a signature based on a message array
129 function phorum_htmlpurifier_generate_sig($row) {
130 $phorum_sig = '';
131 if(isset($row["user"]["signature"])
132 && isset($row['meta']['show_signature']) && $row['meta']['show_signature']==1){
133 $phorum_sig=trim($row["user"]["signature"]);
134 if(!empty($phorum_sig)){
135 $phorum_sig="\n\n$phorum_sig";
138 return $phorum_sig;
142 * Generates an edit message based on a message array
144 function phorum_htmlpurifier_generate_editmessage($row) {
145 $PHORUM = $GLOBALS['PHORUM'];
146 $editmessage = '';
147 if(isset($row['meta']['edit_count']) && $row['meta']['edit_count'] > 0) {
148 $editmessage = str_replace ("%count%", $row['meta']['edit_count'], $PHORUM["DATA"]["LANG"]["EditedMessage"]);
149 $editmessage = str_replace ("%lastedit%", phorum_date($PHORUM["short_date_time"],$row['meta']['edit_date']), $editmessage);
150 $editmessage = str_replace ("%lastuser%", $row['meta']['edit_username'], $editmessage);
151 $editmessage = "\n\n\n\n$editmessage";
153 return $editmessage;
156 // End fragile code
157 // -----------------------------------------------------------------------
160 * Removes the signature and edit message from a message
161 * @param $row Message passed by reference
163 function phorum_htmlpurifier_remove_sig_and_editmessage(&$row) {
164 $signature = phorum_htmlpurifier_generate_sig($row);
165 $editmessage = phorum_htmlpurifier_generate_editmessage($row);
166 $replacements = array();
167 // we need to remove add <phorum break> as that is the form these
168 // extra bits are in.
169 if ($signature) $replacements[str_replace("\n", "<phorum break>\n", $signature)] = '';
170 if ($editmessage) $replacements[str_replace("\n", "<phorum break>\n", $editmessage)] = '';
171 $row['body'] = strtr($row['body'], $replacements);
172 return array($signature, $editmessage);
176 * Indicate that data is fully HTML and not from migration, invalidate
177 * previous caches
178 * @note This function could generate the actual cache entries, but
179 * since there's data missing that must be deferred to the first read
181 function phorum_htmlpurifier_posting($message) {
182 $PHORUM = $GLOBALS["PHORUM"];
183 unset($message['meta']['body_cache']); // invalidate the cache
184 $message['meta']['body_cache_serial'] = $PHORUM['mod_htmlpurifier']['body_cache_serial'];
185 return $message;
189 * Overload quoting mechanism to prevent default, mail-style quote from happening
191 function phorum_htmlpurifier_quote($array) {
192 $PHORUM = $GLOBALS["PHORUM"];
193 $purifier =& HTMLPurifier::getInstance();
194 $text = $purifier->purify($array[1]);
195 $source = htmlspecialchars($array[0]);
196 return "<blockquote cite=\"$source\">\n$text\n</blockquote>";
200 * Ensure that our format hook is processed last. Also, loads the library.
201 * @credits <http://secretsauce.phorum.org/snippets/make_bbcode_last_formatter.php.txt>
203 function phorum_htmlpurifier_common() {
205 require_once(dirname(__FILE__).'/htmlpurifier/HTMLPurifier.auto.php');
206 require(dirname(__FILE__).'/init-config.php');
208 $config = phorum_htmlpurifier_get_config();
209 HTMLPurifier::getInstance($config);
211 // increment revision.txt if you want to invalidate the cache
212 $GLOBALS['PHORUM']['mod_htmlpurifier']['body_cache_serial'] = $config->getSerial();
214 // load migration
215 if (file_exists(dirname(__FILE__) . '/migrate.php')) {
216 include(dirname(__FILE__) . '/migrate.php');
217 } else {
218 echo '<strong>Error:</strong> No migration path specified for HTML Purifier, please check
219 <tt>modes/htmlpurifier/migrate.bbcode.php</tt> for instructions on
220 how to migrate from your previous markup language.';
221 exit;
224 if (!function_exists('phorum_htmlpurifier_migrate')) {
225 // Dummy function
226 function phorum_htmlpurifier_migrate($data) {return $data;}
232 * Pre-emptively performs purification if it looks like a WYSIWYG editor
233 * is being used
235 function phorum_htmlpurifier_before_editor($message) {
236 if (!empty($GLOBALS['PHORUM']['mod_htmlpurifier']['wysiwyg'])) {
237 if (!empty($message['body'])) {
238 $body = $message['body'];
239 // de-entity-ize contents
240 $body = str_replace(array('&lt;','&gt;','&amp;'), array('<','>','&'), $body);
241 $purifier =& HTMLPurifier::getInstance();
242 $body = $purifier->purify($body);
243 // re-entity-ize contents
244 $body = htmlspecialchars($body, ENT_QUOTES, $GLOBALS['PHORUM']['DATA']['CHARSET']);
245 $message['body'] = $body;
248 return $message;
251 function phorum_htmlpurifier_editor_after_subject() {
252 // don't show this message if it's a WYSIWYG editor, since it will
253 // then be handled automatically
254 if (!empty($GLOBALS['PHORUM']['mod_htmlpurifier']['wysiwyg'])) {
255 $i = $GLOBALS['PHORUM']['DATA']['MODE'];
256 if ($i == 'quote' || $i == 'edit' || $i == 'moderation') {
258 <div>
260 <strong>Notice:</strong> HTML has been scrubbed for your safety.
261 If you would like to see the original, turn off WYSIWYG mode
262 (consult your administrator for details.)
263 </p>
264 </div>
265 <?php
267 return;
269 if (!empty($GLOBALS['PHORUM']['mod_htmlpurifier']['suppress_message'])) return;
270 ?><div class="htmlpurifier-help">
272 <strong>HTML input</strong> is enabled. Make sure you escape all HTML and
273 angled brackets with <code>&amp;lt;</code> and <code>&amp;gt;</code>.
274 </p><?php
275 $purifier =& HTMLPurifier::getInstance();
276 $config = $purifier->config;
277 if ($config->get('AutoFormat.AutoParagraph')) {
278 ?><p>
279 <strong>Auto-paragraphing</strong> is enabled. Double
280 newlines will be converted to paragraphs; for single
281 newlines, use the <code>pre</code> tag.
282 </p><?php
284 $html_definition = $config->getDefinition('HTML');
285 $allowed = array();
286 foreach ($html_definition->info as $name => $x) $allowed[] = "<code>$name</code>";
287 sort($allowed);
288 $allowed_text = implode(', ', $allowed);
289 ?><p><strong>Allowed tags:</strong> <?php
290 echo $allowed_text;
291 ?>.</p><?php
293 </p>
295 For inputting literal code such as HTML and PHP for display, use
296 CDATA tags to auto-escape your angled brackets, and <code>pre</code>
297 to preserve newlines:
298 </p>
299 <pre>&lt;pre&gt;&lt;![CDATA[
300 <em>Place code here</em>
301 ]]&gt;&lt;/pre&gt;</pre>
303 Power users, you can hide this notice with:
304 <pre>.htmlpurifier-help {display:none;}</pre>
305 </p>
306 </div><?php
309 // vim: et sw=4 sts=4