Merge branch 'MDL-73883-master' of https://github.com/andrewnicols/moodle
[moodle.git] / lib / tests / htmlpurifier_test.php
blobf1ae916c93af60338be286003adf58da6ba5f018
1 <?php
2 // This file is part of Moodle - http://moodle.org/
3 //
4 // Moodle is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // Moodle is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
14 // You should have received a copy of the GNU General Public License
15 // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
17 /**
18 * Unit tests for the HTMLPurifier integration
20 * @package core
21 * @category phpunit
22 * @copyright 2012 Petr Skoda {@link http://skodak.org}
23 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
26 defined('MOODLE_INTERNAL') || die();
29 /**
30 * HTMLPurifier test case
32 * @package core
33 * @category phpunit
34 * @copyright 2012 Petr Skoda {@link http://skodak.org}
35 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
37 class core_htmlpurifier_testcase extends basic_testcase {
39 /**
40 * Verify _blank target is allowed.
42 public function test_allow_blank_target() {
43 // See MDL-52651 for an explanation as to why the rel="noreferrer" attribute is expected here.
44 // Also note we do not need to test links with an existing rel attribute as the HTML Purifier is configured to remove
45 // the rel attribute.
46 $text = '<a href="http://moodle.org" target="_blank">Some link</a>';
47 $expected = '<a href="http://moodle.org" target="_blank" rel="noreferrer noopener">Some link</a>';
48 $result = format_text($text, FORMAT_HTML);
49 $this->assertSame($expected, $result);
51 $result = format_text('<a href="http://moodle.org" target="some">Some link</a>', FORMAT_HTML);
52 $this->assertSame('<a href="http://moodle.org">Some link</a>', $result);
55 /**
56 * Verify our nolink tag accepted.
58 public function test_nolink() {
59 // We can not use format text because nolink changes result.
60 $text = '<nolink><div>no filters</div></nolink>';
61 $result = purify_html($text, array());
62 $this->assertSame($text, $result);
64 $text = '<nolink>xxx<em>xx</em><div>xxx</div></nolink>';
65 $result = purify_html($text, array());
66 $this->assertSame($text, $result);
68 // Ensure nolink doesn't force open tags to be closed, so can be virtually everywhere.
69 $text = '<p><nolink><div>no filters</div></nolink></p>';
70 $result = purify_html($text, array());
71 $this->assertSame($text, $result);
74 /**
75 * Verify our tex tag accepted.
77 public function test_tex() {
78 $text = '<tex>a+b=c</tex>';
79 $result = purify_html($text, array());
80 $this->assertSame($text, $result);
83 /**
84 * Verify our algebra tag accepted.
86 public function test_algebra() {
87 $text = '<algebra>a+b=c</algebra>';
88 $result = purify_html($text, array());
89 $this->assertSame($text, $result);
92 /**
93 * Verify our hacky multilang works.
95 public function test_multilang() {
96 $text = '<lang lang="en">hmmm</lang><lang lang="anything">hm</lang>';
97 $result = purify_html($text, array());
98 $this->assertSame($text, $result);
100 $text = '<span lang="en" class="multilang">hmmm</span><span lang="anything" class="multilang">hm</span>';
101 $result = purify_html($text, array());
102 $this->assertSame($text, $result);
104 $text = '<span lang="en">hmmm</span>';
105 $result = purify_html($text, array());
106 $this->assertNotSame($text, $result);
108 // Keep standard lang tags.
110 $text = '<span lang="de_DU" class="multilang">asas</span>';
111 $result = purify_html($text, array());
112 $this->assertSame($text, $result);
114 $text = '<lang lang="de_DU">xxxxxx</lang>';
115 $result = purify_html($text, array());
116 $this->assertSame($text, $result);
120 * Tests the 'allowid' option for format_text.
122 public function test_format_text_allowid() {
123 // Start off by not allowing ids (default).
124 $options = array(
125 'nocache' => true
127 $result = format_text('<div id="example">Frog</div>', FORMAT_HTML, $options);
128 $this->assertSame('<div>Frog</div>', $result);
130 // Now allow ids.
131 $options['allowid'] = true;
132 $result = format_text('<div id="example">Frog</div>', FORMAT_HTML, $options);
133 $this->assertSame('<div id="example">Frog</div>', $result);
136 public function test_allowobjectembed() {
137 global $CFG;
139 $this->assertSame('0', $CFG->allowobjectembed);
141 $text = '<object width="425" height="350">
142 <param name="movie" value="http://www.youtube.com/v/AyPzM5WK8ys" />
143 <param name="wmode" value="transparent" />
144 <embed src="http://www.youtube.com/v/AyPzM5WK8ys" type="application/x-shockwave-flash" wmode="transparent" width="425" height="350" />
145 </object>hmmm';
146 $result = purify_html($text, array());
147 $this->assertSame('hmmm', trim($result));
149 $CFG->allowobjectembed = '1';
151 $expected = '<object width="425" height="350" data="http://www.youtube.com/v/AyPzM5WK8ys" type="application/x-shockwave-flash">
152 <param name="allowScriptAccess" value="never" />
153 <param name="allowNetworking" value="internal" />
154 <param name="movie" value="http://www.youtube.com/v/AyPzM5WK8ys" />
155 <param name="wmode" value="transparent" />
156 <embed src="http://www.youtube.com/v/AyPzM5WK8ys" type="application/x-shockwave-flash" wmode="transparent" width="425" height="350" allowscriptaccess="never" allownetworking="internal" />
157 </object>hmmm';
158 $result = purify_html($text, array());
159 $this->assertSame(str_replace("\n", '', $expected), str_replace("\n", '', $result));
161 $CFG->allowobjectembed = '0';
163 $result = purify_html($text, array());
164 $this->assertSame('hmmm', trim($result));
168 * Test if linebreaks kept unchanged.
170 public function test_line_breaking() {
171 $text = "\n\raa\rsss\nsss\r";
172 $this->assertSame($text, purify_html($text));
176 * Test fixing of strict problems.
178 public function test_tidy() {
179 $text = "<p>xx";
180 $this->assertSame('<p>xx</p>', purify_html($text));
182 $text = "<P>xx</P>";
183 $this->assertSame('<p>xx</p>', purify_html($text));
185 $text = "xx<br>";
186 $this->assertSame('xx<br />', purify_html($text));
190 * Test nesting - this used to cause problems in earlier versions.
192 public function test_nested_lists() {
193 $text = "<ul><li>One<ul><li>Two</li></ul></li><li>Three</li></ul>";
194 $this->assertSame($text, purify_html($text));
198 * Test that XSS protection works, complete smoke tests are in htmlpurifier itself.
200 public function test_cleaning_nastiness() {
201 $text = "x<SCRIPT>alert('XSS')</SCRIPT>x";
202 $this->assertSame('xx', purify_html($text));
204 $text = '<DIV STYLE="background-image:url(javascript:alert(\'XSS\'))">xx</DIV>';
205 $this->assertSame('<div>xx</div>', purify_html($text));
207 $text = '<DIV STYLE="width:expression(alert(\'XSS\'));">xx</DIV>';
208 $this->assertSame('<div>xx</div>', purify_html($text));
210 $text = 'x<IFRAME SRC="javascript:alert(\'XSS\');"></IFRAME>x';
211 $this->assertSame('xx', purify_html($text));
213 $text = 'x<OBJECT TYPE="text/x-scriptlet" DATA="http://ha.ckers.org/scriptlet.html"></OBJECT>x';
214 $this->assertSame('xx', purify_html($text));
216 $text = 'x<EMBED SRC="http://ha.ckers.org/xss.swf" AllowScriptAccess="always"></EMBED>x';
217 $this->assertSame('xx', purify_html($text));
219 $text = 'x<form></form>x';
220 $this->assertSame('xx', purify_html($text));
224 * Test internal function used for clean_text() speedup.
226 public function test_is_purify_html_necessary() {
227 // First our shortcuts.
228 $text = "";
229 $this->assertFalse(is_purify_html_necessary($text));
230 $this->assertSame($text, purify_html($text));
232 $text = "666";
233 $this->assertFalse(is_purify_html_necessary($text));
234 $this->assertSame($text, purify_html($text));
236 $text = "abc\ndef \" ' ";
237 $this->assertFalse(is_purify_html_necessary($text));
238 $this->assertSame($text, purify_html($text));
240 $text = "abc\n<p>def</p>efg<p>hij</p>";
241 $this->assertFalse(is_purify_html_necessary($text));
242 $this->assertSame($text, purify_html($text));
244 $text = "<br />abc\n<p>def<em>efg</em><strong>hi<br />j</strong></p>";
245 $this->assertFalse(is_purify_html_necessary($text));
246 $this->assertSame($text, purify_html($text));
248 // Now failures.
249 $text = "&nbsp;";
250 $this->assertTrue(is_purify_html_necessary($text));
252 $text = "Gin & Tonic";
253 $this->assertTrue(is_purify_html_necessary($text));
255 $text = "Gin > Tonic";
256 $this->assertTrue(is_purify_html_necessary($text));
258 $text = "Gin < Tonic";
259 $this->assertTrue(is_purify_html_necessary($text));
261 $text = "<div>abc</div>";
262 $this->assertTrue(is_purify_html_necessary($text));
264 $text = "<span>abc</span>";
265 $this->assertTrue(is_purify_html_necessary($text));
267 $text = "<br>abc";
268 $this->assertTrue(is_purify_html_necessary($text));
270 $text = "<p class='xxx'>abc</p>";
271 $this->assertTrue(is_purify_html_necessary($text));
273 $text = "<p>abc<em></p></em>";
274 $this->assertTrue(is_purify_html_necessary($text));
276 $text = "<p>abc";
277 $this->assertTrue(is_purify_html_necessary($text));
280 public function test_allowed_schemes() {
281 // First standard schemas.
282 $text = '<a href="http://www.example.com/course/view.php?id=5">link</a>';
283 $this->assertSame($text, purify_html($text));
285 $text = '<a href="https://www.example.com/course/view.php?id=5">link</a>';
286 $this->assertSame($text, purify_html($text));
288 $text = '<a href="ftp://user@ftp.example.com/some/file.txt">link</a>';
289 $this->assertSame($text, purify_html($text));
291 $text = '<a href="nntp://example.com/group/123">link</a>';
292 $this->assertSame($text, purify_html($text));
294 $text = '<a href="news:groupname">link</a>';
295 $this->assertSame($text, purify_html($text));
297 $text = '<a href="mailto:user@example.com">link</a>';
298 $this->assertSame($text, purify_html($text));
300 // Extra schemes allowed in moodle.
301 $text = '<a href="irc://irc.example.com/3213?pass">link</a>';
302 $this->assertSame($text, purify_html($text));
304 $text = '<a href="rtsp://www.example.com/movie.mov">link</a>';
305 $this->assertSame($text, purify_html($text));
307 $text = '<a href="rtmp://www.example.com/video.f4v">link</a>';
308 $this->assertSame($text, purify_html($text));
310 $text = '<a href="teamspeak://speak.example.com/?par=val?par2=val2">link</a>';
311 $this->assertSame($text, purify_html($text));
313 $text = '<a href="gopher://gopher.example.com/resource">link</a>';
314 $this->assertSame($text, purify_html($text));
316 $text = '<a href="mms://www.example.com/movie.mms">link</a>';
317 $this->assertSame($text, purify_html($text));
319 // Now some borked or dangerous schemes.
320 $text = '<a href="javascript://www.example.com">link</a>';
321 $this->assertSame('<a>link</a>', purify_html($text));
323 $text = '<a href="hmmm://www.example.com">link</a>';
324 $this->assertSame('<a>link</a>', purify_html($text));
328 * Test non-ascii domain names
330 public function test_idn() {
332 // Example of domain that gives the same result in IDNA2003 and IDNA2008 .
333 $text = '<a href="http://правительство.рф">правительство.рф</a>';
334 $expected = '<a href="http://xn--80aealotwbjpid2k.xn--p1ai">правительство.рф</a>';
335 $this->assertSame($expected, purify_html($text));
337 // Examples of deviations from http://www.unicode.org/reports/tr46/#Table_Deviation_Characters .
338 $text = '<a href="http://teßt.de">teßt.de</a>';
339 $expected = '<a href="http://xn--tet-6ka.de">teßt.de</a>';
340 $this->assertSame($expected, purify_html($text));
342 $text = '<a href="http://βόλος.com">http://βόλος.com</a>';
343 $expected = '<a href="http://xn--nxasmm1c.com">http://βόλος.com</a>';
344 $this->assertSame($expected, purify_html($text));
346 $text = '<a href="http://نامه‌ای.com">http://نامه‌ای.com</a>';
347 $expected = '<a href="http://xn--mgba3gch31f060k.com">http://نامه‌ای.com</a>';
348 $this->assertSame($expected, purify_html($text));
352 * Tests media tags.
354 * @dataProvider media_tags_provider
355 * @param string $mediatag HTML media tag
356 * @param string $expected expected result
358 public function test_media_tags($mediatag, $expected) {
359 $actual = format_text($mediatag, FORMAT_MOODLE, ['filter' => false]);
360 $this->assertEquals($expected, $actual);
364 * Test cases for the test_media_tags test.
366 public function media_tags_provider() {
367 // Takes an array of attributes, then generates a test for each of them.
368 $generatetestcases = function($prefix, array $attrs, array $templates) {
369 return array_reduce($attrs, function($carry, $attr) use ($prefix, $templates) {
370 $testcase = [$prefix . '/' . $attr => [
371 sprintf($templates[0], $attr),
372 sprintf($templates[1], $attr)
374 return empty(array_values($carry)[0]) ? $testcase : $carry + $testcase;
375 }, [[]]);
378 $audioattrs = [
379 'preload="auto"', 'autoplay=""', 'loop=""', 'muted=""', 'controls=""',
380 'crossorigin="anonymous"', 'crossorigin="use-credentials"'
382 $videoattrs = [
383 'crossorigin="anonymous"', 'crossorigin="use-credentials"',
384 'poster="https://upload.wikimedia.org/wikipedia/en/1/14/Space_jam.jpg"',
385 'preload="auto"', 'autoplay=""', 'playsinline=""', 'loop=""', 'muted=""',
386 'controls=""', 'width="420"', 'height="69"'
388 return $generatetestcases('Plain audio', $audioattrs + ['src="http://example.com/jam.wav"'], [
389 '<audio %1$s>Looks like you can\'t slam the jams.</audio>',
390 '<div class="text_to_html"><audio %1$s>Looks like you can\'t slam the jams.</audio></div>'
391 ]) + $generatetestcases('Audio with one source', $audioattrs, [
392 '<audio %1$s><source src="http://example.com/getup.wav">No tasty jams for you.</audio>',
393 '<div class="text_to_html">' .
394 '<audio %1$s>' .
395 '<source src="http://example.com/getup.wav" />' .
396 'No tasty jams for you.' .
397 '</audio>' .
398 '</div>'
399 ]) + $generatetestcases('Audio with multiple sources', $audioattrs, [
400 '<audio %1$s>' .
401 '<source src="http://example.com/getup.wav" type="audio/wav">' .
402 '<source src="http://example.com/getup.mp3" type="audio/mpeg">' .
403 '<source src="http://example.com/getup.ogg" type="audio/ogg">' .
404 'No tasty jams for you.' .
405 '</audio>',
406 '<div class="text_to_html">' .
407 '<audio %1$s>' .
408 '<source src="http://example.com/getup.wav" type="audio/wav" />' .
409 '<source src="http://example.com/getup.mp3" type="audio/mpeg" />' .
410 '<source src="http://example.com/getup.ogg" type="audio/ogg" />' .
411 'No tasty jams for you.' .
412 '</audio>' .
413 '</div>'
414 ]) + $generatetestcases('Audio with sources and tracks', $audioattrs, [
415 '<audio %1$s>' .
416 '<source src="http://example.com/getup.wav" type="audio/wav">' .
417 '<track kind="subtitles" src="http://example.com/subtitles_en.vtt" label="English" srclang="en">' .
418 '<track kind="subtitles" src="http://example.com/subtitles_es.vtt" label="Espanol" srclang="es">' .
419 'No tasty jams for you.' .
420 '</audio>',
421 '<div class="text_to_html">' .
422 '<audio %1$s>' .
423 '<source src="http://example.com/getup.wav" type="audio/wav" />' .
424 '<track kind="subtitles" src="http://example.com/subtitles_en.vtt" label="English" srclang="en" />' .
425 '<track kind="subtitles" src="http://example.com/subtitles_es.vtt" label="Espanol" srclang="es" />' .
426 'No tasty jams for you.' .
427 '</audio>' .
428 '</div>'
429 ]) + $generatetestcases('Plain video', $videoattrs + ['src="http://example.com/prettygood.mp4'], [
430 '<video %1$s>Oh, that\'s pretty bad 😦</video>',
431 '<div class="text_to_html"><video %1$s>Oh, that\'s pretty bad 😦</video></div>'
432 ]) + $generatetestcases('Video with illegal subtag', $videoattrs + ['src="http://example.com/prettygood.mp4'], [
433 '<video %1$s><subtag></subtag>Oh, that\'s pretty bad 😦</video>',
434 '<div class="text_to_html"><video %1$s>Oh, that\'s pretty bad 😦</video></div>'
435 ]) + $generatetestcases('Video with legal subtag', $videoattrs + ['src="http://example.com/prettygood.mp4'], [
436 '<video %1$s>Did not work <a href="http://example.com/prettygood.mp4">click here to download</a></video>',
437 '<div class="text_to_html"><video %1$s>Did not work <a href="http://example.com/prettygood.mp4">' .
438 'click here to download</a></video></div>'
439 ]) + $generatetestcases('Source tag without video or audio', $videoattrs, [
440 'some text <source src="http://example.com/getup.wav" type="audio/wav"> the end',
441 '<div class="text_to_html">some text the end</div>'
442 ]) + $generatetestcases('Video with one source', $videoattrs, [
443 '<video %1$s><source src="http://example.com/prettygood.mp4">Oh, that\'s pretty bad 😦</video>',
444 '<div class="text_to_html">' .
445 '<video %1$s>' .
446 '<source src="http://example.com/prettygood.mp4" />' .
447 'Oh, that\'s pretty bad 😦' .
448 '</video>' .
449 '</div>'
450 ]) + $generatetestcases('Video with multiple sources', $videoattrs, [
451 '<video %1$s>' .
452 '<source src="http://example.com/prettygood.mp4" type="video/mp4">' .
453 '<source src="http://example.com/eljefe.mp4" type="video/mp4">' .
454 '<source src="http://example.com/turnitup.mov" type="video/mov">' .
455 'Oh, that\'s pretty bad 😦' .
456 '</video>',
457 '<div class="text_to_html">' .
458 '<video %1$s>' .
459 '<source src="http://example.com/prettygood.mp4" type="video/mp4" />' .
460 '<source src="http://example.com/eljefe.mp4" type="video/mp4" />' .
461 '<source src="http://example.com/turnitup.mov" type="video/mov" />' .
462 'Oh, that\'s pretty bad 😦' .
463 '</video>' .
464 '</div>'
465 ]) + $generatetestcases('Video with sources and tracks', $audioattrs, [
466 '<video %1$s>' .
467 '<source src="http://example.com/getup.wav" type="audio/wav">' .
468 '<track kind="subtitles" src="http://example.com/subtitles_en.vtt" label="English" srclang="en">' .
469 '<track kind="subtitles" src="http://example.com/subtitles_es.vtt" label="Espanol" srclang="es">' .
470 'No tasty jams for you.' .
471 '</video>',
472 '<div class="text_to_html">' .
473 '<video %1$s>' .
474 '<source src="http://example.com/getup.wav" type="audio/wav" />' .
475 '<track kind="subtitles" src="http://example.com/subtitles_en.vtt" label="English" srclang="en" />' .
476 '<track kind="subtitles" src="http://example.com/subtitles_es.vtt" label="Espanol" srclang="es" />' .
477 'No tasty jams for you.' .
478 '</video>' .
479 '</div>'
480 ]) + ['Video with invalid crossorigin' => [
481 '<video src="http://example.com/turnitup.mov" crossorigin="can i pls hab?">' .
482 'Oh, that\'s pretty bad 😦' .
483 '</video>',
484 '<div class="text_to_html">' .
485 '<video src="http://example.com/turnitup.mov">' .
486 'Oh, that\'s pretty bad 😦' .
487 '</video>' .
488 '</div>'
489 ]] + ['Audio with invalid crossorigin' => [
490 '<audio src="http://example.com/getup.wav" crossorigin="give me. the jams.">' .
491 'nyemnyemnyem' .
492 '</audio>',
493 '<div class="text_to_html">' .
494 '<audio src="http://example.com/getup.wav">' .
495 'nyemnyemnyem' .
496 '</audio>' .
497 '</div>'
498 ]] + ['Other attributes' => [
499 '<video src="http://example.com/turnitdown.mov" class="nofilter" data-something="data attribute" someattribute="somevalue" onclick="boom">' .
500 '<source src="http://example.com/getup.wav" type="audio/wav" class="shouldberemoved" data-sourcedata="source data" onmouseover="kill session" />' .
501 '<track src="http://example.com/subtitles_en.vtt" class="shouldberemoved" data-trackdata="track data" onmouseover="removeme" />' .
502 'Do not remove attribute class but remove other attributes' .
503 '</video>',
504 '<div class="text_to_html">' .
505 '<video src="http://example.com/turnitdown.mov" class="nofilter">' .
506 '<source src="http://example.com/getup.wav" type="audio/wav" />' .
507 '<track src="http://example.com/subtitles_en.vtt" />' .
508 'Do not remove attribute class but remove other attributes' .
509 '</video>' .
510 '</div>'