tests/HTMLPurifier/LexerTest.php

   1 <?php
   2
   3 class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
   4 {
   5
   6     protected $_has_pear = false;
   7
   8     public function __construct() {
   9         parent::__construct();
  10         // E_STRICT = 2048, int used for PHP4 compat: this check disables
  11         // PEAR if PHP 5 strict mode is on, since the class is not strict safe
  12         if (
  13             $GLOBALS['HTMLPurifierTest']['PEAR'] &&
  14             ((error_reporting() & 2048) != 2048) // ought to be a better way
  15         ) {
  16             require_once 'HTMLPurifier/Lexer/PEARSax3.php';
  17             $this->_has_pear = true;
  18         }
  19         if ($GLOBALS['HTMLPurifierTest']['PH5P']) {
  20             require_once 'HTMLPurifier/Lexer/PH5P.php';
  21         }
  22     }
  23
  24     // HTMLPurifier_Lexer::create() --------------------------------------------
  25
  26     function test_create() {
  27         $this->config->set('Core.MaintainLineNumbers', true);
  28         $lexer = HTMLPurifier_Lexer::create($this->config);
  29         $this->assertIsA($lexer, 'HTMLPurifier_Lexer_DirectLex');
  30     }
  31
  32     function test_create_objectLexerImpl() {
  33         $this->config->set('Core.LexerImpl', new HTMLPurifier_Lexer_DirectLex());
  34         $lexer = HTMLPurifier_Lexer::create($this->config);
  35         $this->assertIsA($lexer, 'HTMLPurifier_Lexer_DirectLex');
  36     }
  37
  38     function test_create_unknownLexer() {
  39         $this->config->set('Core.LexerImpl', 'AsdfAsdf');
  40         $this->expectException(new HTMLPurifier_Exception('Cannot instantiate unrecognized Lexer type AsdfAsdf'));
  41         HTMLPurifier_Lexer::create($this->config);
  42     }
  43
  44     function test_create_incompatibleLexer() {
  45         $this->config->set('Core.LexerImpl', 'DOMLex');
  46         $this->config->set('Core.MaintainLineNumbers', true);
  47         $this->expectException(new HTMLPurifier_Exception('Cannot use lexer that does not support line numbers with Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)'));
  48         HTMLPurifier_Lexer::create($this->config);
  49     }
  50
  51     // HTMLPurifier_Lexer->parseData() -----------------------------------------
  52
  53     function assertParseData($input, $expect = true) {
  54         if ($expect === true) $expect = $input;
  55         $lexer = new HTMLPurifier_Lexer();
  56         $this->assertIdentical($expect, $lexer->parseData($input));
  57     }
  58
  59     function test_parseData_plainText() {
  60         $this->assertParseData('asdf');
  61     }
  62
  63     function test_parseData_ampersandEntity() {
  64         $this->assertParseData('&amp;', '&');
  65     }
  66
  67     function test_parseData_quotEntity() {
  68         $this->assertParseData('&quot;', '"');
  69     }
  70
  71     function test_parseData_aposNumericEntity() {
  72         $this->assertParseData('&#039;', "'");
  73     }
  74
  75     function test_parseData_aposCompactNumericEntity() {
  76         $this->assertParseData('&#39;', "'");
  77     }
  78
  79     function test_parseData_adjacentAmpersandEntities() {
  80         $this->assertParseData('&amp;&amp;&amp;', '&&&');
  81     }
  82
  83     function test_parseData_trailingUnescapedAmpersand() {
  84         $this->assertParseData('&amp;&', '&&');
  85     }
  86
  87     function test_parseData_internalUnescapedAmpersand() {
  88         $this->assertParseData('Procter & Gamble');
  89     }
  90
  91     function test_parseData_improperEntityFaultToleranceTest() {
  92         $this->assertParseData('&#x2D;');
  93     }
  94
  95     // HTMLPurifier_Lexer->extractBody() ---------------------------------------
  96
  97     function assertExtractBody($text, $extract = true) {
  98         $lexer = new HTMLPurifier_Lexer();
  99         $result = $lexer->extractBody($text);
 100         if ($extract === true) $extract = $text;
 101         $this->assertIdentical($extract, $result);
 102     }
 103
 104     function test_extractBody_noBodyTags() {
 105         $this->assertExtractBody('<b>Bold</b>');
 106     }
 107
 108     function test_extractBody_lowercaseBodyTags() {
 109         $this->assertExtractBody('<html><body><b>Bold</b></body></html>', '<b>Bold</b>');
 110     }
 111
 112     function test_extractBody_uppercaseBodyTags() {
 113         $this->assertExtractBody('<HTML><BODY><B>Bold</B></BODY></HTML>', '<B>Bold</B>');
 114     }
 115
 116     function test_extractBody_realisticUseCase() {
 117         $this->assertExtractBody(
 118 '<?xml version="1.0"
 119 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
 120     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 121 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
 122    <head>
 123       <title>xyz</title>
 124    </head>
 125    <body>
 126       <form method="post" action="whatever1">
 127          <div>
 128             <input type="text" name="username" />
 129             <input type="text" name="password" />
 130             <input type="submit" />
 131          </div>
 132       </form>
 133    </body>
 134 </html>',
 135     '
 136       <form method="post" action="whatever1">
 137          <div>
 138             <input type="text" name="username" />
 139             <input type="text" name="password" />
 140             <input type="submit" />
 141          </div>
 142       </form>
 143    ');
 144     }
 145
 146     function test_extractBody_bodyWithAttributes() {
 147         $this->assertExtractBody('<html><body bgcolor="#F00"><b>Bold</b></body></html>', '<b>Bold</b>');
 148     }
 149
 150     function test_extractBody_preserveUnclosedBody() {
 151         $this->assertExtractBody('<body>asdf'); // not closed, don't accept
 152     }
 153
 154     function test_extractBody_useLastBody() {
 155         $this->assertExtractBody('<body>foo</body>bar</body>', 'foo</body>bar');
 156     }
 157
 158     // HTMLPurifier_Lexer->tokenizeHTML() --------------------------------------
 159
 160     function assertTokenization($input, $expect, $alt_expect = array()) {
 161         $lexers = array();
 162         $lexers['DirectLex']  = new HTMLPurifier_Lexer_DirectLex();
 163         if ($this->_has_pear) $lexers['PEARSax3']   = new HTMLPurifier_Lexer_PEARSax3();
 164         if (class_exists('DOMDocument')) {
 165             $lexers['DOMLex'] = new HTMLPurifier_Lexer_DOMLex();
 166             $lexers['PH5P']   = new HTMLPurifier_Lexer_PH5P();
 167         }
 168         foreach ($lexers as $name => $lexer) {
 169             $result = $lexer->tokenizeHTML($input, $this->config, $this->context);
 170             if (isset($alt_expect[$name])) {
 171                 if ($alt_expect[$name] === false) continue;
 172                 $t_expect = $alt_expect[$name];
 173                 $this->assertIdentical($result, $alt_expect[$name], "$name: %s");
 174             } else {
 175                 $t_expect = $expect;
 176                 $this->assertIdentical($result, $expect, "$name: %s");
 177             }
 178             if ($t_expect != $result) {
 179                 printTokens($result);
 180                 //var_dump($result);
 181             }
 182         }
 183     }
 184
 185     function test_tokenizeHTML_emptyInput() {
 186         $this->assertTokenization('', array());
 187     }
 188
 189     function test_tokenizeHTML_plainText() {
 190         $this->assertTokenization(
 191             'This is regular text.',
 192             array(
 193                 new HTMLPurifier_Token_Text('This is regular text.')
 194             )
 195         );
 196     }
 197
 198     function test_tokenizeHTML_textAndTags() {
 199         $this->assertTokenization(
 200             'This is <b>bold</b> text',
 201             array(
 202                 new HTMLPurifier_Token_Text('This is '),
 203                 new HTMLPurifier_Token_Start('b', array()),
 204                 new HTMLPurifier_Token_Text('bold'),
 205                 new HTMLPurifier_Token_End('b'),
 206                 new HTMLPurifier_Token_Text(' text'),
 207             )
 208         );
 209     }
 210
 211     function test_tokenizeHTML_normalizeCase() {
 212         $this->assertTokenization(
 213             '<DIV>Totally rad dude. <b>asdf</b></div>',
 214             array(
 215                 new HTMLPurifier_Token_Start('DIV', array()),
 216                 new HTMLPurifier_Token_Text('Totally rad dude. '),
 217                 new HTMLPurifier_Token_Start('b', array()),
 218                 new HTMLPurifier_Token_Text('asdf'),
 219                 new HTMLPurifier_Token_End('b'),
 220                 new HTMLPurifier_Token_End('div'),
 221             )
 222         );
 223     }
 224
 225     function test_tokenizeHTML_notWellFormed() {
 226         $this->assertTokenization(
 227             '<asdf></asdf><d></d><poOloka><poolasdf><ds></asdf></ASDF>',
 228             array(
 229                 new HTMLPurifier_Token_Start('asdf'),
 230                 new HTMLPurifier_Token_End('asdf'),
 231                 new HTMLPurifier_Token_Start('d'),
 232                 new HTMLPurifier_Token_End('d'),
 233                 new HTMLPurifier_Token_Start('poOloka'),
 234                 new HTMLPurifier_Token_Start('poolasdf'),
 235                 new HTMLPurifier_Token_Start('ds'),
 236                 new HTMLPurifier_Token_End('asdf'),
 237                 new HTMLPurifier_Token_End('ASDF'),
 238             ),
 239             array(
 240                 'DOMLex' => $alt = array(
 241                     new HTMLPurifier_Token_Empty('asdf'),
 242                     new HTMLPurifier_Token_Empty('d'),
 243                     new HTMLPurifier_Token_Start('pooloka'),
 244                     new HTMLPurifier_Token_Start('poolasdf'),
 245                     new HTMLPurifier_Token_Empty('ds'),
 246                     new HTMLPurifier_Token_End('poolasdf'),
 247                     new HTMLPurifier_Token_End('pooloka'),
 248                 ),
 249                 'PH5P' => $alt,
 250             )
 251         );
 252     }
 253
 254     function test_tokenizeHTML_whitespaceInTag() {
 255         $this->assertTokenization(
 256             '<a'."\t".'href="foobar.php"'."\n".'title="foo!">Link to <b id="asdf">foobar</b></a>',
 257             array(
 258                 new HTMLPurifier_Token_Start('a',array('href'=>'foobar.php','title'=>'foo!')),
 259                 new HTMLPurifier_Token_Text('Link to '),
 260                 new HTMLPurifier_Token_Start('b',array('id'=>'asdf')),
 261                 new HTMLPurifier_Token_Text('foobar'),
 262                 new HTMLPurifier_Token_End('b'),
 263                 new HTMLPurifier_Token_End('a'),
 264             )
 265         );
 266     }
 267
 268     function test_tokenizeHTML_emptyTag() {
 269         $this->assertTokenization(
 270             '<br />',
 271             array( new HTMLPurifier_Token_Empty('br') )
 272         );
 273     }
 274
 275     function test_tokenizeHTML_comment() {
 276         $this->assertTokenization(
 277             '<!-- Comment -->',
 278             array( new HTMLPurifier_Token_Comment(' Comment ') ),
 279             array(
 280                 'PEARSax3' => array( new HTMLPurifier_Token_Comment('-- Comment --') ),
 281             )
 282         );
 283     }
 284
 285     function test_tokenizeHTML_malformedComment() {
 286         $this->assertTokenization(
 287             '<!-- not so well formed --->',
 288             array( new HTMLPurifier_Token_Comment(' not so well formed -') ),
 289             array(
 290                 'PEARSax3' => array( new HTMLPurifier_Token_Comment('-- not so well formed ---') ),
 291             )
 292         );
 293     }
 294
 295     function test_tokenizeHTML_unterminatedTag() {
 296         $this->assertTokenization(
 297             '<a href=""',
 298             array( new HTMLPurifier_Token_Text('<a href=""') ),
 299             array(
 300                 // I like our behavior better, but it's non-standard
 301                 'DOMLex'   => array( new HTMLPurifier_Token_Empty('a', array('href'=>'')) ),
 302                 'PEARSax3' => array( new HTMLPurifier_Token_Start('a', array('href'=>'')) ),
 303                 'PH5P' => false, // total barfing, grabs scaffolding too
 304             )
 305         );
 306     }
 307
 308     function test_tokenizeHTML_specialEntities() {
 309         $this->assertTokenization(
 310             '&lt;b&gt;',
 311             array(
 312                 new HTMLPurifier_Token_Text('<b>')
 313             ),
 314             array(
 315                 // some parsers will separate entities out
 316                 'PEARSax3' => $split = array(
 317                     new HTMLPurifier_Token_Text('<'),
 318                     new HTMLPurifier_Token_Text('b'),
 319                     new HTMLPurifier_Token_Text('>'),
 320                 ),
 321                 'PH5P' => $split,
 322             )
 323         );
 324     }
 325
 326     function test_tokenizeHTML_earlyQuote() {
 327         $this->assertTokenization(
 328             '<a "=>',
 329             array( new HTMLPurifier_Token_Empty('a') ),
 330             array(
 331                 // we barf on this input
 332                 'DirectLex' => $tokens = array(
 333                     new HTMLPurifier_Token_Start('a', array('"' => ''))
 334                 ),
 335                 'PEARSax3' => $tokens,
 336                 'PH5P' => false, // behavior varies; handle this personally
 337             )
 338         );
 339     }
 340
 341     function test_tokenizeHTML_earlyQuote_PH5P() {
 342         if (!class_exists('DOMDocument')) return;
 343         $lexer = new HTMLPurifier_Lexer_PH5P();
 344         $result = $lexer->tokenizeHTML('<a "=>', $this->config, $this->context);
 345         if ($this->context->get('PH5PError', true)) {
 346             $this->assertIdentical(array(
 347                 new HTMLPurifier_Token_Start('a', array('"' => ''))
 348             ), $result);
 349         } else {
 350             $this->assertIdentical(array(
 351                 new HTMLPurifier_Token_Empty('a', array('"' => ''))
 352             ), $result);
 353         }
 354     }
 355
 356     function test_tokenizeHTML_unescapedQuote() {
 357         $this->assertTokenization(
 358             '"',
 359             array( new HTMLPurifier_Token_Text('"') )
 360         );
 361     }
 362
 363     function test_tokenizeHTML_escapedQuote() {
 364         $this->assertTokenization(
 365             '&quot;',
 366             array( new HTMLPurifier_Token_Text('"') ),
 367             array(
 368                 'PEARSax3' => false, // PEAR barfs on this
 369             )
 370         );
 371     }
 372
 373     function test_tokenizeHTML_cdata() {
 374         $this->assertTokenization(
 375             '<![CDATA[You <b>can&#39;t</b> get me!]]>',
 376             array( new HTMLPurifier_Token_Text('You <b>can&#39;t</b> get me!') ),
 377             array(
 378                 // PEAR splits up all of the CDATA
 379                 'PEARSax3' => $split = array(
 380                     new HTMLPurifier_Token_Text('You '),
 381                     new HTMLPurifier_Token_Text('<'),
 382                     new HTMLPurifier_Token_Text('b'),
 383                     new HTMLPurifier_Token_Text('>'),
 384                     new HTMLPurifier_Token_Text('can'),
 385                     new HTMLPurifier_Token_Text('&'),
 386                     new HTMLPurifier_Token_Text('#39;t'),
 387                     new HTMLPurifier_Token_Text('<'),
 388                     new HTMLPurifier_Token_Text('/b'),
 389                     new HTMLPurifier_Token_Text('>'),
 390                     new HTMLPurifier_Token_Text(' get me!'),
 391                 ),
 392                 'PH5P' => $split,
 393             )
 394         );
 395     }
 396
 397     function test_tokenizeHTML_characterEntity() {
 398         $this->assertTokenization(
 399             '&theta;',
 400             array( new HTMLPurifier_Token_Text("\xCE\xB8") )
 401         );
 402     }
 403
 404     function test_tokenizeHTML_characterEntityInCDATA() {
 405         $this->assertTokenization(
 406             '<![CDATA[&rarr;]]>',
 407             array( new HTMLPurifier_Token_Text("&rarr;") ),
 408             array(
 409                 'PEARSax3' => $split = array(
 410                     new HTMLPurifier_Token_Text('&'),
 411                     new HTMLPurifier_Token_Text('rarr;'),
 412                 ),
 413                 'PH5P' => $split,
 414             )
 415         );
 416     }
 417
 418     function test_tokenizeHTML_entityInAttribute() {
 419         $this->assertTokenization(
 420             '<a href="index.php?title=foo&amp;id=bar">Link</a>',
 421             array(
 422                 new HTMLPurifier_Token_Start('a',array('href' => 'index.php?title=foo&id=bar')),
 423                 new HTMLPurifier_Token_Text('Link'),
 424                 new HTMLPurifier_Token_End('a'),
 425             )
 426         );
 427     }
 428
 429     function test_tokenizeHTML_preserveUTF8() {
 430         $this->assertTokenization(
 431             "\xCE\xB8",
 432             array( new HTMLPurifier_Token_Text("\xCE\xB8") )
 433         );
 434     }
 435
 436     function test_tokenizeHTML_specialEntityInAttribute() {
 437         $this->assertTokenization(
 438             '<br test="x &lt; 6" />',
 439             array( new HTMLPurifier_Token_Empty('br', array('test' => 'x < 6')) )
 440         );
 441     }
 442
 443     function test_tokenizeHTML_emoticonProtection() {
 444         $this->assertTokenization(
 445             '<b>Whoa! <3 That\'s not good >.></b>',
 446             array(
 447                 new HTMLPurifier_Token_Start('b'),
 448                 new HTMLPurifier_Token_Text('Whoa! '),
 449                 new HTMLPurifier_Token_Text('<'),
 450                 new HTMLPurifier_Token_Text('3 That\'s not good >.>'),
 451                 new HTMLPurifier_Token_End('b')
 452             ),
 453             array(
 454                 // text is absorbed together
 455                 'DOMLex' => array(
 456                     new HTMLPurifier_Token_Start('b'),
 457                     new HTMLPurifier_Token_Text('Whoa! <3 That\'s not good >.>'),
 458                     new HTMLPurifier_Token_End('b'),
 459                 ),
 460                 'PEARSax3' => false, // totally mangled
 461                 'PH5P' => array( // interesting grouping
 462                     new HTMLPurifier_Token_Start('b'),
 463                     new HTMLPurifier_Token_Text('Whoa! '),
 464                     new HTMLPurifier_Token_Text('<'),
 465                     new HTMLPurifier_Token_Text('3 That\'s not good >.>'),
 466                     new HTMLPurifier_Token_End('b'),
 467                 ),
 468             )
 469         );
 470     }
 471
 472     function test_tokenizeHTML_commentWithFunkyChars() {
 473         $this->assertTokenization(
 474             '<!-- This >< comment --><br />',
 475             array(
 476                 new HTMLPurifier_Token_Comment(' This >< comment '),
 477                 new HTMLPurifier_Token_Empty('br'),
 478             ),
 479             array(
 480                 'PEARSax3' => false,
 481             )
 482         );
 483     }
 484
 485     function test_tokenizeHTML_unterminatedComment() {
 486         $this->assertTokenization(
 487             '<!-- This >< comment',
 488             array( new HTMLPurifier_Token_Comment(' This >< comment') ),
 489             array(
 490                 'DOMLex'   => false,
 491                 'PEARSax3' => false,
 492                 'PH5P'     => false,
 493             )
 494         );
 495     }
 496
 497     function test_tokenizeHTML_scriptCDATAContents() {
 498         $this->config->set('HTML.Trusted', true);
 499         $this->assertTokenization(
 500             'Foo: <script>alert("<foo>");</script>',
 501             array(
 502                 new HTMLPurifier_Token_Text('Foo: '),
 503                 new HTMLPurifier_Token_Start('script'),
 504                 new HTMLPurifier_Token_Text('alert("<foo>");'),
 505                 new HTMLPurifier_Token_End('script'),
 506             ),
 507             array(
 508                 'PEARSax3' => false,
 509                 // PH5P, for some reason, bubbles the script to <head>
 510                 'PH5P' => false,
 511             )
 512         );
 513     }
 514
 515     function test_tokenizeHTML_entitiesInComment() {
 516         $this->assertTokenization(
 517             '<!-- This comment < &lt; & -->',
 518             array( new HTMLPurifier_Token_Comment(' This comment < &lt; & ') ),
 519             array(
 520                 'PEARSax3' => false
 521             )
 522         );
 523     }
 524
 525     function test_tokenizeHTML_attributeWithSpecialCharacters() {
 526         $this->assertTokenization(
 527             '<a href="><>">',
 528             array( new HTMLPurifier_Token_Empty('a', array('href' => '><>')) ),
 529             array(
 530                 'DirectLex' => array(
 531                     new HTMLPurifier_Token_Start('a', array('href' => '')),
 532                     new HTMLPurifier_Token_Text('<'),
 533                     new HTMLPurifier_Token_Text('">'),
 534                 ),
 535                 'PEARSax3' => false,
 536             )
 537         );
 538     }
 539
 540     function test_tokenizeHTML_emptyTagWithSlashInAttribute() {
 541         $this->assertTokenization(
 542             '<param name="src" value="http://example.com/video.wmv" />',
 543             array( new HTMLPurifier_Token_Empty('param', array('name' => 'src', 'value' => 'http://example.com/video.wmv')) )
 544         );
 545     }
 546
 547     function test_tokenizeHTML_style() {
 548         $extra = array(
 549                 // PH5P doesn't seem to like style tags
 550                 'PH5P' => false,
 551                 // DirectLex defers to RemoveForeignElements for textification
 552                 'DirectLex' => array(
 553                     new HTMLPurifier_Token_Start('style', array('type' => 'text/css')),
 554                     new HTMLPurifier_Token_Comment("\ndiv {}\n"),
 555                     new HTMLPurifier_Token_End('style'),
 556                 ),
 557             );
 558         if (!defined('LIBXML_VERSION')) {
 559             // LIBXML_VERSION is missing in early versions of PHP
 560             // prior to 1.30 of php-src/ext/libxml/libxml.c (version-wise,
 561             // this translates to 5.0.x. In such cases, punt the test entirely.
 562             return;
 563         } elseif (LIBXML_VERSION < 20628) {
 564             // libxml's behavior is wrong prior to this version, so make
 565             // appropriate accomodations
 566             $extra['DOMLex'] = $extra['DirectLex'];
 567         }
 568         $this->assertTokenization(
 569 '<style type="text/css"><!--
 570 div {}
 571 --></style>',
 572             array(
 573                 new HTMLPurifier_Token_Start('style', array('type' => 'text/css')),
 574                 new HTMLPurifier_Token_Text("\ndiv {}\n"),
 575                 new HTMLPurifier_Token_End('style'),
 576             ),
 577             $extra
 578         );
 579     }
 580
 581     function test_tokenizeHTML_tagWithAtSignAndExtraGt() {
 582         $this->assertTokenization(
 583             '<a@>>',
 584             array(
 585                 new HTMLPurifier_Token_Start('a'),
 586                 new HTMLPurifier_Token_Text('>'),
 587                 new HTMLPurifier_Token_End('a'),
 588             ),
 589             array(
 590                 'DirectLex' => array(
 591                     // Technically this is invalid, but it won't be a
 592                     // problem with invalid element removal; also, this
 593                     // mimics Mozilla's parsing of the tag.
 594                     new HTMLPurifier_Token_Start('a@'),
 595                     new HTMLPurifier_Token_Text('>'),
 596                 ),
 597             )
 598         );
 599     }
 600
 601     function test_tokenizeHTML_emoticonHeart() {
 602         $this->assertTokenization(
 603             '<br /><3<br />',
 604             array(
 605                 new HTMLPurifier_Token_Empty('br'),
 606                 new HTMLPurifier_Token_Text('<'),
 607                 new HTMLPurifier_Token_Text('3'),
 608                 new HTMLPurifier_Token_Empty('br'),
 609             ),
 610             array(
 611                 'DOMLex' => array(
 612                     new HTMLPurifier_Token_Empty('br'),
 613                     new HTMLPurifier_Token_Text('<3'),
 614                     new HTMLPurifier_Token_Empty('br'),
 615                 ),
 616             )
 617         );
 618     }
 619
 620     function test_tokenizeHTML_emoticonShiftyEyes() {
 621         $this->assertTokenization(
 622             '<b><<</b>',
 623             array(
 624                 new HTMLPurifier_Token_Start('b'),
 625                 new HTMLPurifier_Token_Text('<'),
 626                 new HTMLPurifier_Token_Text('<'),
 627                 new HTMLPurifier_Token_End('b'),
 628             ),
 629             array(
 630                 'DOMLex' => array(
 631                     new HTMLPurifier_Token_Start('b'),
 632                     new HTMLPurifier_Token_Text('<<'),
 633                     new HTMLPurifier_Token_End('b'),
 634                 ),
 635             )
 636         );
 637     }
 638
 639     function test_tokenizeHTML_eon1996() {
 640         $this->assertTokenization(
 641             '< <b>test</b>',
 642             array(
 643                 new HTMLPurifier_Token_Text('<'),
 644                 new HTMLPurifier_Token_Text(' '),
 645                 new HTMLPurifier_Token_Start('b'),
 646                 new HTMLPurifier_Token_Text('test'),
 647                 new HTMLPurifier_Token_End('b'),
 648             ),
 649             array(
 650                 'DOMLex' => array(
 651                     new HTMLPurifier_Token_Text('< '),
 652                     new HTMLPurifier_Token_Start('b'),
 653                     new HTMLPurifier_Token_Text('test'),
 654                     new HTMLPurifier_Token_End('b'),
 655                 ),
 656             )
 657         );
 658     }
 659
 660     function test_tokenizeHTML_bodyInCDATA() {
 661         $this->assertTokenization(
 662             '<![CDATA[<body>Foo</body>]]>',
 663             array(
 664                 new HTMLPurifier_Token_Text('<body>Foo</body>'),
 665             ),
 666             array(
 667                 'PH5P' => array(
 668                     new HTMLPurifier_Token_Text('<'),
 669                     new HTMLPurifier_Token_Text('body'),
 670                     new HTMLPurifier_Token_Text('>'),
 671                     new HTMLPurifier_Token_Text('Foo'),
 672                     new HTMLPurifier_Token_Text('<'),
 673                     new HTMLPurifier_Token_Text('/body'),
 674                     new HTMLPurifier_Token_Text('>'),
 675                 ),
 676             )
 677         );
 678     }
 679
 680     /*
 681
 682     function test_tokenizeHTML_() {
 683         $this->assertTokenization(
 684             ,
 685             array(
 686
 687             )
 688         );
 689     }
 690     */
 691
 692 }
 693
 694 // vim: et sw=4 sts=4