tests/HTMLPurifier/LexerTest.php

   1 <?php
   2
   3 class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
   4 {
   5
   6     protected $_has_pear = false;
   7
   8     public function __construct() {
   9         parent::__construct();
  10         // E_STRICT = 2048, int used for PHP4 compat: this check disables
  11         // PEAR if PHP 5 strict mode is on, since the class is not strict safe
  12         if (
  13             $GLOBALS['HTMLPurifierTest']['PEAR'] &&
  14             ((error_reporting() & 2048) != 2048) // ought to be a better way
  15         ) {
  16             require_once 'HTMLPurifier/Lexer/PEARSax3.php';
  17             $this->_has_pear = true;
  18         }
  19         if ($GLOBALS['HTMLPurifierTest']['PH5P']) {
  20             require_once 'HTMLPurifier/Lexer/PH5P.php';
  21         }
  22     }
  23
  24     // HTMLPurifier_Lexer::create() --------------------------------------------
  25
  26     function test_create() {
  27         $this->config->set('Core', 'MaintainLineNumbers', true);
  28         $lexer = HTMLPurifier_Lexer::create($this->config);
  29         $this->assertIsA($lexer, 'HTMLPurifier_Lexer_DirectLex');
  30     }
  31
  32     function test_create_objectLexerImpl() {
  33         $this->config->set('Core', 'LexerImpl', new HTMLPurifier_Lexer_DirectLex());
  34         $lexer = HTMLPurifier_Lexer::create($this->config);
  35         $this->assertIsA($lexer, 'HTMLPurifier_Lexer_DirectLex');
  36     }
  37
  38     function test_create_unknownLexer() {
  39         $this->config->set('Core', 'LexerImpl', 'AsdfAsdf');
  40         $this->expectException(new HTMLPurifier_Exception('Cannot instantiate unrecognized Lexer type AsdfAsdf'));
  41         HTMLPurifier_Lexer::create($this->config);
  42     }
  43
  44     function test_create_incompatibleLexer() {
  45         $this->config->set('Core', 'LexerImpl', 'DOMLex');
  46         $this->config->set('Core', 'MaintainLineNumbers', true);
  47         $this->expectException(new HTMLPurifier_Exception('Cannot use lexer that does not support line numbers with Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)'));
  48         HTMLPurifier_Lexer::create($this->config);
  49     }
  50
  51     // HTMLPurifier_Lexer->parseData() -----------------------------------------
  52
  53     function assertParseData($input, $expect = true) {
  54         if ($expect === true) $expect = $input;
  55         $lexer = new HTMLPurifier_Lexer();
  56         $this->assertIdentical($expect, $lexer->parseData($input));
  57     }
  58
  59     function test_parseData_plainText() {
  60         $this->assertParseData('asdf');
  61     }
  62
  63     function test_parseData_ampersandEntity() {
  64         $this->assertParseData('&amp;', '&');
  65     }
  66
  67     function test_parseData_quotEntity() {
  68         $this->assertParseData('&quot;', '"');
  69     }
  70
  71     function test_parseData_aposNumericEntity() {
  72         $this->assertParseData('&#039;', "'");
  73     }
  74
  75     function test_parseData_aposCompactNumericEntity() {
  76         $this->assertParseData('&#39;', "'");
  77     }
  78
  79     function test_parseData_adjacentAmpersandEntities() {
  80         $this->assertParseData('&amp;&amp;&amp;', '&&&');
  81     }
  82
  83     function test_parseData_trailingUnescapedAmpersand() {
  84         $this->assertParseData('&amp;&', '&&');
  85     }
  86
  87     function test_parseData_internalUnescapedAmpersand() {
  88         $this->assertParseData('Procter & Gamble');
  89     }
  90
  91     function test_parseData_improperEntityFaultToleranceTest() {
  92         $this->assertParseData('&#x2D;');
  93     }
  94
  95     // HTMLPurifier_Lexer->extractBody() ---------------------------------------
  96
  97     function assertExtractBody($text, $extract = true) {
  98         $lexer = new HTMLPurifier_Lexer();
  99         $result = $lexer->extractBody($text);
 100         if ($extract === true) $extract = $text;
 101         $this->assertIdentical($extract, $result);
 102     }
 103
 104     function test_extractBody_noBodyTags() {
 105         $this->assertExtractBody('<b>Bold</b>');
 106     }
 107
 108     function test_extractBody_lowercaseBodyTags() {
 109         $this->assertExtractBody('<html><body><b>Bold</b></body></html>', '<b>Bold</b>');
 110     }
 111
 112     function test_extractBody_uppercaseBodyTags() {
 113         $this->assertExtractBody('<HTML><BODY><B>Bold</B></BODY></HTML>', '<B>Bold</B>');
 114     }
 115
 116     function test_extractBody_realisticUseCase() {
 117         $this->assertExtractBody(
 118 '<?xml version="1.0"
 119 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
 120     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 121 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
 122    <head>
 123       <title>xyz</title>
 124    </head>
 125    <body>
 126       <form method="post" action="whatever1">
 127          <div>
 128             <input type="text" name="username" />
 129             <input type="text" name="password" />
 130             <input type="submit" />
 131          </div>
 132       </form>
 133    </body>
 134 </html>',
 135     '
 136       <form method="post" action="whatever1">
 137          <div>
 138             <input type="text" name="username" />
 139             <input type="text" name="password" />
 140             <input type="submit" />
 141          </div>
 142       </form>
 143    ');
 144     }
 145
 146     function test_extractBody_bodyWithAttributes() {
 147         $this->assertExtractBody('<html><body bgcolor="#F00"><b>Bold</b></body></html>', '<b>Bold</b>');
 148     }
 149
 150     function test_extractBody_preserveUnclosedBody() {
 151         $this->assertExtractBody('<body>asdf'); // not closed, don't accept
 152     }
 153
 154     // HTMLPurifier_Lexer->tokenizeHTML() --------------------------------------
 155
 156     function assertTokenization($input, $expect, $alt_expect = array()) {
 157         $lexers = array();
 158         $lexers['DirectLex']  = new HTMLPurifier_Lexer_DirectLex();
 159         if ($this->_has_pear) $lexers['PEARSax3']   = new HTMLPurifier_Lexer_PEARSax3();
 160         if (class_exists('DOMDocument')) {
 161             $lexers['DOMLex'] = new HTMLPurifier_Lexer_DOMLex();
 162             $lexers['PH5P']   = new HTMLPurifier_Lexer_PH5P();
 163         }
 164         foreach ($lexers as $name => $lexer) {
 165             $result = $lexer->tokenizeHTML($input, $this->config, $this->context);
 166             if (isset($alt_expect[$name])) {
 167                 if ($alt_expect[$name] === false) continue;
 168                 $t_expect = $alt_expect[$name];
 169                 $this->assertIdentical($result, $alt_expect[$name], "$name: %s");
 170             } else {
 171                 $t_expect = $expect;
 172                 $this->assertIdentical($result, $expect, "$name: %s");
 173             }
 174             if ($t_expect != $result) {
 175                 printTokens($result);
 176                 //var_dump($result);
 177             }
 178         }
 179     }
 180
 181     function test_tokenizeHTML_emptyInput() {
 182         $this->assertTokenization('', array());
 183     }
 184
 185     function test_tokenizeHTML_plainText() {
 186         $this->assertTokenization(
 187             'This is regular text.',
 188             array(
 189                 new HTMLPurifier_Token_Text('This is regular text.')
 190             )
 191         );
 192     }
 193
 194     function test_tokenizeHTML_textAndTags() {
 195         $this->assertTokenization(
 196             'This is <b>bold</b> text',
 197             array(
 198                 new HTMLPurifier_Token_Text('This is '),
 199                 new HTMLPurifier_Token_Start('b', array()),
 200                 new HTMLPurifier_Token_Text('bold'),
 201                 new HTMLPurifier_Token_End('b'),
 202                 new HTMLPurifier_Token_Text(' text'),
 203             )
 204         );
 205     }
 206
 207     function test_tokenizeHTML_normalizeCase() {
 208         $this->assertTokenization(
 209             '<DIV>Totally rad dude. <b>asdf</b></div>',
 210             array(
 211                 new HTMLPurifier_Token_Start('DIV', array()),
 212                 new HTMLPurifier_Token_Text('Totally rad dude. '),
 213                 new HTMLPurifier_Token_Start('b', array()),
 214                 new HTMLPurifier_Token_Text('asdf'),
 215                 new HTMLPurifier_Token_End('b'),
 216                 new HTMLPurifier_Token_End('div'),
 217             )
 218         );
 219     }
 220
 221     function test_tokenizeHTML_notWellFormed() {
 222         $this->assertTokenization(
 223             '<asdf></asdf><d></d><poOloka><poolasdf><ds></asdf></ASDF>',
 224             array(
 225                 new HTMLPurifier_Token_Start('asdf'),
 226                 new HTMLPurifier_Token_End('asdf'),
 227                 new HTMLPurifier_Token_Start('d'),
 228                 new HTMLPurifier_Token_End('d'),
 229                 new HTMLPurifier_Token_Start('poOloka'),
 230                 new HTMLPurifier_Token_Start('poolasdf'),
 231                 new HTMLPurifier_Token_Start('ds'),
 232                 new HTMLPurifier_Token_End('asdf'),
 233                 new HTMLPurifier_Token_End('ASDF'),
 234             ),
 235             array(
 236                 'DOMLex' => $alt = array(
 237                     new HTMLPurifier_Token_Empty('asdf'),
 238                     new HTMLPurifier_Token_Empty('d'),
 239                     new HTMLPurifier_Token_Start('pooloka'),
 240                     new HTMLPurifier_Token_Start('poolasdf'),
 241                     new HTMLPurifier_Token_Empty('ds'),
 242                     new HTMLPurifier_Token_End('poolasdf'),
 243                     new HTMLPurifier_Token_End('pooloka'),
 244                 ),
 245                 'PH5P' => $alt,
 246             )
 247         );
 248     }
 249
 250     function test_tokenizeHTML_whitespaceInTag() {
 251         $this->assertTokenization(
 252             '<a'."\t".'href="foobar.php"'."\n".'title="foo!">Link to <b id="asdf">foobar</b></a>',
 253             array(
 254                 new HTMLPurifier_Token_Start('a',array('href'=>'foobar.php','title'=>'foo!')),
 255                 new HTMLPurifier_Token_Text('Link to '),
 256                 new HTMLPurifier_Token_Start('b',array('id'=>'asdf')),
 257                 new HTMLPurifier_Token_Text('foobar'),
 258                 new HTMLPurifier_Token_End('b'),
 259                 new HTMLPurifier_Token_End('a'),
 260             )
 261         );
 262     }
 263
 264     function test_tokenizeHTML_emptyTag() {
 265         $this->assertTokenization(
 266             '<br />',
 267             array( new HTMLPurifier_Token_Empty('br') )
 268         );
 269     }
 270
 271     function test_tokenizeHTML_comment() {
 272         $this->assertTokenization(
 273             '<!-- Comment -->',
 274             array( new HTMLPurifier_Token_Comment(' Comment ') ),
 275             array(
 276                 'PEARSax3' => array( new HTMLPurifier_Token_Comment('-- Comment --') ),
 277             )
 278         );
 279     }
 280
 281     function test_tokenizeHTML_malformedComment() {
 282         $this->assertTokenization(
 283             '<!-- not so well formed --->',
 284             array( new HTMLPurifier_Token_Comment(' not so well formed -') ),
 285             array(
 286                 'PEARSax3' => array( new HTMLPurifier_Token_Comment('-- not so well formed ---') ),
 287             )
 288         );
 289     }
 290
 291     function test_tokenizeHTML_unterminatedTag() {
 292         $this->assertTokenization(
 293             '<a href=""',
 294             array( new HTMLPurifier_Token_Text('<a href=""') ),
 295             array(
 296                 // I like our behavior better, but it's non-standard
 297                 'DOMLex'   => array( new HTMLPurifier_Token_Empty('a', array('href'=>'')) ),
 298                 'PEARSax3' => array( new HTMLPurifier_Token_Start('a', array('href'=>'')) ),
 299                 'PH5P' => false, // total barfing, grabs scaffolding too
 300             )
 301         );
 302     }
 303
 304     function test_tokenizeHTML_specialEntities() {
 305         $this->assertTokenization(
 306             '&lt;b&gt;',
 307             array(
 308                 new HTMLPurifier_Token_Text('<b>')
 309             ),
 310             array(
 311                 // some parsers will separate entities out
 312                 'PEARSax3' => $split = array(
 313                     new HTMLPurifier_Token_Text('<'),
 314                     new HTMLPurifier_Token_Text('b'),
 315                     new HTMLPurifier_Token_Text('>'),
 316                 ),
 317                 'PH5P' => $split,
 318             )
 319         );
 320     }
 321
 322     function test_tokenizeHTML_earlyQuote() {
 323         $this->assertTokenization(
 324             '<a "=>',
 325             array( new HTMLPurifier_Token_Empty('a') ),
 326             array(
 327                 // we barf on this input
 328                 'DirectLex' => $tokens = array(
 329                     new HTMLPurifier_Token_Start('a', array('"' => ''))
 330                 ),
 331                 'PEARSax3' => $tokens,
 332                 'PH5P' => false, // behavior varies; handle this personally
 333             )
 334         );
 335     }
 336
 337     function test_tokenizeHTML_earlyQuote_PH5P() {
 338         if (!class_exists('DOMDocument')) return;
 339         $lexer = new HTMLPurifier_Lexer_PH5P();
 340         $result = $lexer->tokenizeHTML('<a "=>', $this->config, $this->context);
 341         if ($this->context->get('PH5PError', true)) {
 342             $this->assertIdentical(array(
 343                 new HTMLPurifier_Token_Start('a', array('"' => ''))
 344             ), $result);
 345         } else {
 346             $this->assertIdentical(array(
 347                 new HTMLPurifier_Token_Empty('a', array('"' => ''))
 348             ), $result);
 349         }
 350     }
 351
 352     function test_tokenizeHTML_unescapedQuote() {
 353         $this->assertTokenization(
 354             '"',
 355             array( new HTMLPurifier_Token_Text('"') )
 356         );
 357     }
 358
 359     function test_tokenizeHTML_escapedQuote() {
 360         $this->assertTokenization(
 361             '&quot;',
 362             array( new HTMLPurifier_Token_Text('"') ),
 363             array(
 364                 'PEARSax3' => false, // PEAR barfs on this
 365             )
 366         );
 367     }
 368
 369     function test_tokenizeHTML_cdata() {
 370         $this->assertTokenization(
 371             '<![CDATA[You <b>can&#39;t</b> get me!]]>',
 372             array( new HTMLPurifier_Token_Text('You <b>can&#39;t</b> get me!') ),
 373             array(
 374                 // PEAR splits up all of the CDATA
 375                 'PEARSax3' => $split = array(
 376                     new HTMLPurifier_Token_Text('You '),
 377                     new HTMLPurifier_Token_Text('<'),
 378                     new HTMLPurifier_Token_Text('b'),
 379                     new HTMLPurifier_Token_Text('>'),
 380                     new HTMLPurifier_Token_Text('can'),
 381                     new HTMLPurifier_Token_Text('&'),
 382                     new HTMLPurifier_Token_Text('#39;t'),
 383                     new HTMLPurifier_Token_Text('<'),
 384                     new HTMLPurifier_Token_Text('/b'),
 385                     new HTMLPurifier_Token_Text('>'),
 386                     new HTMLPurifier_Token_Text(' get me!'),
 387                 ),
 388                 'PH5P' => $split,
 389             )
 390         );
 391     }
 392
 393     function test_tokenizeHTML_characterEntity() {
 394         $this->assertTokenization(
 395             '&theta;',
 396             array( new HTMLPurifier_Token_Text("\xCE\xB8") )
 397         );
 398     }
 399
 400     function test_tokenizeHTML_characterEntityInCDATA() {
 401         $this->assertTokenization(
 402             '<![CDATA[&rarr;]]>',
 403             array( new HTMLPurifier_Token_Text("&rarr;") ),
 404             array(
 405                 'PEARSax3' => $split = array(
 406                     new HTMLPurifier_Token_Text('&'),
 407                     new HTMLPurifier_Token_Text('rarr;'),
 408                 ),
 409                 'PH5P' => $split,
 410             )
 411         );
 412     }
 413
 414     function test_tokenizeHTML_entityInAttribute() {
 415         $this->assertTokenization(
 416             '<a href="index.php?title=foo&amp;id=bar">Link</a>',
 417             array(
 418                 new HTMLPurifier_Token_Start('a',array('href' => 'index.php?title=foo&id=bar')),
 419                 new HTMLPurifier_Token_Text('Link'),
 420                 new HTMLPurifier_Token_End('a'),
 421             )
 422         );
 423     }
 424
 425     function test_tokenizeHTML_preserveUTF8() {
 426         $this->assertTokenization(
 427             "\xCE\xB8",
 428             array( new HTMLPurifier_Token_Text("\xCE\xB8") )
 429         );
 430     }
 431
 432     function test_tokenizeHTML_specialEntityInAttribute() {
 433         $this->assertTokenization(
 434             '<br test="x &lt; 6" />',
 435             array( new HTMLPurifier_Token_Empty('br', array('test' => 'x < 6')) )
 436         );
 437     }
 438
 439     function test_tokenizeHTML_emoticonProtection() {
 440         $this->assertTokenization(
 441             '<b>Whoa! <3 That\'s not good >.></b>',
 442             array(
 443                 new HTMLPurifier_Token_Start('b'),
 444                 new HTMLPurifier_Token_Text('Whoa! '),
 445                 new HTMLPurifier_Token_Text('<'),
 446                 new HTMLPurifier_Token_Text('3 That\'s not good >.>'),
 447                 new HTMLPurifier_Token_End('b')
 448             ),
 449             array(
 450                 // text is absorbed together
 451                 'DOMLex' => array(
 452                     new HTMLPurifier_Token_Start('b'),
 453                     new HTMLPurifier_Token_Text('Whoa! <3 That\'s not good >.>'),
 454                     new HTMLPurifier_Token_End('b'),
 455                 ),
 456                 'PEARSax3' => false, // totally mangled
 457                 'PH5P' => array( // interesting grouping
 458                     new HTMLPurifier_Token_Start('b'),
 459                     new HTMLPurifier_Token_Text('Whoa! '),
 460                     new HTMLPurifier_Token_Text('<'),
 461                     new HTMLPurifier_Token_Text('3 That\'s not good >.>'),
 462                     new HTMLPurifier_Token_End('b'),
 463                 ),
 464             )
 465         );
 466     }
 467
 468     function test_tokenizeHTML_commentWithFunkyChars() {
 469         $this->assertTokenization(
 470             '<!-- This >< comment --><br />',
 471             array(
 472                 new HTMLPurifier_Token_Comment(' This >< comment '),
 473                 new HTMLPurifier_Token_Empty('br'),
 474             ),
 475             array(
 476                 'PEARSax3' => false,
 477             )
 478         );
 479     }
 480
 481     function test_tokenizeHTML_unterminatedComment() {
 482         $this->assertTokenization(
 483             '<!-- This >< comment',
 484             array( new HTMLPurifier_Token_Comment(' This >< comment') ),
 485             array(
 486                 'DOMLex'   => false,
 487                 'PEARSax3' => false,
 488                 'PH5P'     => false,
 489             )
 490         );
 491     }
 492
 493     function test_tokenizeHTML_scriptCDATAContents() {
 494         $this->config->set('HTML', 'Trusted', true);
 495         $this->assertTokenization(
 496             'Foo: <script>alert("<foo>");</script>',
 497             array(
 498                 new HTMLPurifier_Token_Text('Foo: '),
 499                 new HTMLPurifier_Token_Start('script'),
 500                 new HTMLPurifier_Token_Text('alert("<foo>");'),
 501                 new HTMLPurifier_Token_End('script'),
 502             ),
 503             array(
 504                 'PEARSax3' => false,
 505                 // PH5P, for some reason, bubbles the script to <head>
 506                 'PH5P' => false,
 507             )
 508         );
 509     }
 510
 511     function test_tokenizeHTML_entitiesInComment() {
 512         $this->assertTokenization(
 513             '<!-- This comment < &lt; & -->',
 514             array( new HTMLPurifier_Token_Comment(' This comment < &lt; & ') ),
 515             array(
 516                 'PEARSax3' => false
 517             )
 518         );
 519     }
 520
 521     function test_tokenizeHTML_attributeWithSpecialCharacters() {
 522         $this->assertTokenization(
 523             '<a href="><>">',
 524             array( new HTMLPurifier_Token_Empty('a', array('href' => '><>')) ),
 525             array(
 526                 'DirectLex' => array(
 527                     new HTMLPurifier_Token_Start('a', array('href' => '')),
 528                     new HTMLPurifier_Token_Text('<'),
 529                     new HTMLPurifier_Token_Text('">'),
 530                 ),
 531                 'PEARSax3' => false,
 532             )
 533         );
 534     }
 535
 536     function test_tokenizeHTML_emptyTagWithSlashInAttribute() {
 537         $this->assertTokenization(
 538             '<param name="src" value="http://example.com/video.wmv" />',
 539             array( new HTMLPurifier_Token_Empty('param', array('name' => 'src', 'value' => 'http://example.com/video.wmv')) )
 540         );
 541     }
 542
 543     function test_tokenizeHTML_style() {
 544         $extra = array(
 545                 // PH5P doesn't seem to like style tags
 546                 'PH5P' => false,
 547                 // DirectLex defers to RemoveForeignElements for textification
 548                 'DirectLex' => array(
 549                     new HTMLPurifier_Token_Start('style', array('type' => 'text/css')),
 550                     new HTMLPurifier_Token_Comment("\ndiv {}\n"),
 551                     new HTMLPurifier_Token_End('style'),
 552                 ),
 553             );
 554         if (!defined('LIBXML_VERSION') || LIBXML_VERSION < 20628) {
 555             // libxml's behavior is wrong prior to this version, so make
 556             // appropriate accomodations
 557             // :NOTE: LIBXML_VERSION is missing in early versions of PHP
 558             // prior to 1.30 of php-src/ext/libxml/libxml.c (version-wise,
 559             // this translates to 5.0.x. In such cases, we assume that an old
 560             // version of libxml is being used, although that *might* not
 561             // be the case (it's very unlikely though)
 562             $extra['DOMLex'] = $extra['DirectLex'];
 563         }
 564         $this->assertTokenization(
 565 '<style type="text/css"><!--
 566 div {}
 567 --></style>',
 568             array(
 569                 new HTMLPurifier_Token_Start('style', array('type' => 'text/css')),
 570                 new HTMLPurifier_Token_Text("\ndiv {}\n"),
 571                 new HTMLPurifier_Token_End('style'),
 572             ),
 573             $extra
 574         );
 575     }
 576
 577     function test_tokenizeHTML_tagWithAtSignAndExtraGt() {
 578         $this->assertTokenization(
 579             '<a@>>',
 580             array(
 581                 new HTMLPurifier_Token_Start('a'),
 582                 new HTMLPurifier_Token_Text('>'),
 583                 new HTMLPurifier_Token_End('a'),
 584             ),
 585             array(
 586                 'DirectLex' => array(
 587                     // Technically this is invalid, but it won't be a
 588                     // problem with invalid element removal; also, this
 589                     // mimics Mozilla's parsing of the tag.
 590                     new HTMLPurifier_Token_Start('a@'),
 591                     new HTMLPurifier_Token_Text('>'),
 592                 ),
 593             )
 594         );
 595     }
 596
 597     function test_tokenizeHTML_emoticonHeart() {
 598         $this->assertTokenization(
 599             '<br /><3<br />',
 600             array(
 601                 new HTMLPurifier_Token_Empty('br'),
 602                 new HTMLPurifier_Token_Text('<'),
 603                 new HTMLPurifier_Token_Text('3'),
 604                 new HTMLPurifier_Token_Empty('br'),
 605             ),
 606             array(
 607                 'DOMLex' => array(
 608                     new HTMLPurifier_Token_Empty('br'),
 609                     new HTMLPurifier_Token_Text('<3'),
 610                     new HTMLPurifier_Token_Empty('br'),
 611                 ),
 612             )
 613         );
 614     }
 615
 616     function test_tokenizeHTML_emoticonShiftyEyes() {
 617         $this->assertTokenization(
 618             '<b><<</b>',
 619             array(
 620                 new HTMLPurifier_Token_Start('b'),
 621                 new HTMLPurifier_Token_Text('<'),
 622                 new HTMLPurifier_Token_Text('<'),
 623                 new HTMLPurifier_Token_End('b'),
 624             ),
 625             array(
 626                 'DOMLex' => array(
 627                     new HTMLPurifier_Token_Start('b'),
 628                     new HTMLPurifier_Token_Text('<<'),
 629                     new HTMLPurifier_Token_End('b'),
 630                 ),
 631             )
 632         );
 633     }
 634
 635     function test_tokenizeHTML_eon1996() {
 636         $this->assertTokenization(
 637             '< <b>test</b>',
 638             array(
 639                 new HTMLPurifier_Token_Text('<'),
 640                 new HTMLPurifier_Token_Text(' '),
 641                 new HTMLPurifier_Token_Start('b'),
 642                 new HTMLPurifier_Token_Text('test'),
 643                 new HTMLPurifier_Token_End('b'),
 644             ),
 645             array(
 646                 'DOMLex' => array(
 647                     new HTMLPurifier_Token_Text('< '),
 648                     new HTMLPurifier_Token_Start('b'),
 649                     new HTMLPurifier_Token_Text('test'),
 650                     new HTMLPurifier_Token_End('b'),
 651                 ),
 652             )
 653         );
 654     }
 655
 656     function test_tokenizeHTML_bodyInCDATA() {
 657         $this->assertTokenization(
 658             '<![CDATA[<body>Foo</body>]]>',
 659             array(
 660                 new HTMLPurifier_Token_Text('<body>Foo</body>'),
 661             ),
 662             array(
 663                 'PH5P' => array(
 664                     new HTMLPurifier_Token_Text('<'),
 665                     new HTMLPurifier_Token_Text('body'),
 666                     new HTMLPurifier_Token_Text('>'),
 667                     new HTMLPurifier_Token_Text('Foo'),
 668                     new HTMLPurifier_Token_Text('<'),
 669                     new HTMLPurifier_Token_Text('/body'),
 670                     new HTMLPurifier_Token_Text('>'),
 671                 ),
 672             )
 673         );
 674     }
 675
 676     /*
 677
 678     function test_tokenizeHTML_() {
 679         $this->assertTokenization(
 680             ,
 681             array(
 682
 683             )
 684         );
 685     }
 686     */
 687
 688 }
 689
 690 // vim: et sw=4 sts=4