khtml/html/htmltokenizer.cpp

   1 /*
   2     This file is part of the KDE libraries
   3
   4     Copyright (C) 1997 Martin Jones (mjones@kde.org)
   5               (C) 1997 Torben Weis (weis@kde.org)
   6               (C) 1998 Waldo Bastian (bastian@kde.org)
   7               (C) 1999 Lars Knoll (knoll@kde.org)
   8               (C) 1999 Antti Koivisto (koivisto@kde.org)
   9               (C) 2001-2003 Dirk Mueller (mueller@kde.org)
  10               (C) 2004 Apple Computer, Inc.
  11               (C) 2006 Germain Garand (germain@ebooksfrance.org)
  12
  13     This library is free software; you can redistribute it and/or
  14     modify it under the terms of the GNU Library General Public
  15     License as published by the Free Software Foundation; either
  16     version 2 of the License, or (at your option) any later version.
  17
  18     This library is distributed in the hope that it will be useful,
  19     but WITHOUT ANY WARRANTY; without even the implied warranty of
  20     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21     Library General Public License for more details.
  22
  23     You should have received a copy of the GNU Library General Public License
  24     along with this library; see the file COPYING.LIB.  If not, write to
  25     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  26     Boston, MA 02110-1301, USA.
  27 */
  28 //----------------------------------------------------------------------------
  29 //
  30 // KDE HTML Widget - Tokenizers
  31
  32 // #define TOKEN_DEBUG 1
  33 //#define TOKEN_DEBUG 2
  34
  35 #include "htmltokenizer.h"
  36 #include "html_documentimpl.h"
  37 #include "htmlparser.h"
  38 #include "dtd.h"
  39
  40 #include <misc/loader.h>
  41 #include <misc/htmlhashes.h>
  42
  43 #include <khtmlview.h>
  44 #include <khtml_part.h>
  45 #include <xml/dom_docimpl.h>
  46 #include <css/csshelper.h>
  47 #include <ecma/kjs_proxy.h>
  48 #include <kcharsets.h>
  49 #include <kglobal.h>
  50 #include <ctype.h>
  51 #include <assert.h>
  52 #include <QtCore/QVariant>
  53 #include <kdebug.h>
  54 #include <stdlib.h>
  55
  56 #include <config.h>
  57
  58 #include "kentities.c"
  59 #include "htmlprospectivetokenizer.h"
  60
  61 #define PROSPECTIVE_TOKENIZER_ENABLED 1
  62
  63 using namespace khtml;
  64
  65 static const QChar commentStart [] = { '<','!','-','-', QChar::Null };
  66 static const char doctypeStart [] = "<!doctype";
  67 static const char publicStart [] = "public";
  68 static const char systemStart [] = "system";
  69
  70 static const char scriptEnd [] = "</script";
  71 static const char xmpEnd [] = "</xmp";
  72 static const char styleEnd [] =  "</style";
  73 static const char textareaEnd [] = "</textarea";
  74 static const char titleEnd [] = "</title";
  75
  76 #define KHTML_ALLOC_QCHAR_VEC( N ) (QChar*) malloc( sizeof(QChar)*( N ) )
  77 #define KHTML_REALLOC_QCHAR_VEC(P, N ) (QChar*) realloc(P, sizeof(QChar)*( N ))
  78 #define KHTML_DELETE_QCHAR_VEC( P ) free((char*)( P ))
  79
  80 // Full support for MS Windows extensions to Latin-1.
  81 // Technically these extensions should only be activated for pages
  82 // marked "windows-1252" or "cp1252", but
  83 // in the standard Microsoft way, these extensions infect hundreds of thousands
  84 // of web pages.  Note that people with non-latin-1 Microsoft extensions
  85 // are SOL.
  86 //
  87 // See: http://www.microsoft.com/globaldev/reference/WinCP.asp
  88 //      http://www.bbsinc.com/iso8859.html
  89 //      http://www.obviously.com/
  90 //
  91 // There may be better equivalents
  92 #if 0
  93 #define fixUpChar(x)
  94 #else
  95 #define fixUpChar(x) \
  96             switch ((x).unicode()) \
  97             { \
  98             case 0x80: (x) = 0x20ac; break; \
  99             case 0x82: (x) = 0x201a;    break; \
 100             case 0x83: (x) = 0x0192; break; \
 101             case 0x84: (x) = 0x201e;    break; \
 102             case 0x85: (x) = 0x2026; break; \
 103             case 0x86: (x) = 0x2020; break; \
 104             case 0x87: (x) = 0x2021; break; \
 105             case 0x88: (x) = 0x02C6; break; \
 106             case 0x89: (x) = 0x2030; break; \
 107             case 0x8A: (x) = 0x0160; break; \
 108             case 0x8b: (x) = 0x2039;    break; \
 109             case 0x8C: (x) = 0x0152; break; \
 110             case 0x8E: (x) = 0x017D; break; \
 111             case 0x91: (x) = 0x2018;   break; \
 112             case 0x92: (x) = 0x2019;   break; \
 113             case 0x93: (x) = 0x201C;    break; \
 114             case 0x94: (x) = 0X201D;    break; \
 115             case 0x95: (x) = 0x2022;    break; \
 116             case 0x96: (x) = 0x2013;    break; \
 117             case 0x97: (x) = 0x2014;    break; \
 118             case 0x98: (x) = 0x02DC;    break; \
 119             case 0x99: (x) = 0x2122; break; \
 120             case 0x9A: (x) = 0x0161; break; \
 121             case 0x9b: (x) = 0x203A;    break; \
 122             case 0x9C: (x) = 0x0153; break; \
 123             case 0x9E: (x) = 0x017E; break; \
 124             case 0x9F: (x) = 0x0178; break; \
 125             default: break; \
 126             }
 127 #endif
 128 // ----------------------------------------------------------------------------
 129
 130 HTMLTokenizer::HTMLTokenizer(DOM::DocumentImpl *_doc, KHTMLView *_view)
 131 {
 132     view = _view;
 133     buffer = 0;
 134     scriptCode = 0;
 135     scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0;
 136     charsets = KGlobal::charsets();
 137     parser = new KHTMLParser(_view, _doc);
 138     m_executingScript = 0;
 139     m_autoCloseTimer = 0;
 140     m_prospectiveTokenizer = 0;
 141     onHold = false;
 142
 143     reset();
 144 }
 145
 146 HTMLTokenizer::HTMLTokenizer(DOM::DocumentImpl *_doc, DOM::DocumentFragmentImpl *i)
 147 {
 148     view = 0;
 149     buffer = 0;
 150     scriptCode = 0;
 151     scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0;
 152     charsets = KGlobal::charsets();
 153     parser = new KHTMLParser( i, _doc );
 154     m_executingScript = 0;
 155     m_autoCloseTimer = 0;
 156     m_prospectiveTokenizer = 0;
 157     onHold = false;
 158
 159     reset();
 160 }
 161
 162 void HTMLTokenizer::reset()
 163 {
 164     assert(m_executingScript == 0);
 165     Q_ASSERT(onHold == false);
 166     m_abort = false;
 167
 168     while (!cachedScript.isEmpty())
 169         cachedScript.dequeue()->deref(this);
 170
 171     if ( buffer )
 172         KHTML_DELETE_QCHAR_VEC(buffer);
 173     buffer = dest = 0;
 174     size = 0;
 175
 176     if ( scriptCode )
 177         KHTML_DELETE_QCHAR_VEC(scriptCode);
 178     scriptCode = 0;
 179     scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0;
 180
 181     if (m_autoCloseTimer) {
 182         killTimer(m_autoCloseTimer);
 183         m_autoCloseTimer = 0;
 184     }
 185
 186     currToken.reset();
 187     doctypeToken.reset();
 188 }
 189
 190 void HTMLTokenizer::begin()
 191 {
 192     m_executingScript = 0;
 193     onHold = false;
 194     reset();
 195     size = 254;
 196     buffer = KHTML_ALLOC_QCHAR_VEC( 255 );
 197     dest = buffer;
 198     tag = NoTag;
 199     pending = NonePending;
 200     discard = NoneDiscard;
 201     pre = false;
 202     prePos = 0;
 203     plaintext = false;
 204     xmp = false;
 205     processingInstruction = false;
 206     script = false;
 207     escaped = false;
 208     style = false;
 209     skipLF = false;
 210     select = false;
 211     comment = false;
 212     doctype = false;
 213     doctypeComment = NoDoctypeComment;
 214     doctypeAllowComment = false;
 215     server = false;
 216     textarea = false;
 217     title = false;
 218     startTag = false;
 219     tquote = NoQuote;
 220     searchCount = 0;
 221     doctypeSearchCount = 0;
 222     doctypeSecondarySearchCount = 0;
 223     Entity = NoEntity;
 224     noMoreData = false;
 225     brokenComments = false;
 226     brokenServer = false;
 227     brokenScript = false;
 228     lineno = 0;
 229     scriptStartLineno = 0;
 230     tagStartLineno = 0;
 231 }
 232
 233 void HTMLTokenizer::processListing(TokenizerString list)
 234 {
 235     bool old_pre = pre;
 236
 237     // This function adds the listing 'list' as
 238     // preformatted text-tokens to the token-collection
 239     // thereby converting TABs.
 240     if(!style) pre = true;
 241     prePos = 0;
 242
 243     while ( !list.isEmpty() )
 244     {
 245         checkBuffer(3*TAB_SIZE);
 246
 247         if (skipLF && ( list->unicode() != '\n' ))
 248         {
 249             skipLF = false;
 250         }
 251
 252         if (skipLF)
 253         {
 254             skipLF = false;
 255             ++list;
 256         }
 257         else if (( list->unicode() == '\n' ) || ( list->unicode() == '\r' ))
 258         {
 259             if (discard == LFDiscard)
 260             {
 261                 // Ignore this LF
 262                 discard = NoneDiscard; // We have discarded 1 LF
 263             }
 264             else
 265             {
 266                 // Process this LF
 267                 if (pending)
 268                     addPending();
 269
 270                 // we used to do it not at all and we want to have
 271                 // it fixed for textarea. So here we are
 272                 if ( textarea ) {
 273                     prePos++;
 274                     *dest++ = *list;
 275                 } else
 276                     pending = LFPending;
 277             }
 278             /* Check for MS-DOS CRLF sequence */
 279             if (list->unicode() == '\r')
 280             {
 281                 skipLF = true;
 282             }
 283             ++list;
 284         }
 285         else if (( list->unicode() == ' ' ) || ( list->unicode() == '\t'))
 286         {
 287             if (pending)
 288                 addPending();
 289             if (*list == ' ')
 290                 pending = SpacePending;
 291             else
 292                 pending = TabPending;
 293
 294             ++list;
 295         }
 296         else
 297         {
 298             discard = NoneDiscard;
 299             if (pending)
 300                 addPending();
 301
 302             prePos++;
 303             *dest++ = *list;
 304             ++list;
 305         }
 306
 307     }
 308
 309     if ((pending == SpacePending) || (pending == TabPending))
 310         addPending();
 311     else
 312         pending = NonePending;
 313
 314     prePos = 0;
 315     pre = old_pre;
 316 }
 317
 318 void HTMLTokenizer::parseSpecial(TokenizerString &src)
 319 {
 320     assert( textarea || title || !Entity );
 321     assert( !tag );
 322     assert( xmp+textarea+title+style+script == 1 );
 323     if (script)
 324         scriptStartLineno = lineno+src.lineCount();
 325
 326     if ( comment ) parseComment( src );
 327
 328     while ( !src.isEmpty() ) {
 329         checkScriptBuffer();
 330         unsigned char ch = src->toLatin1();
 331         if ( !scriptCodeResync && !brokenComments && !textarea && !xmp && ch == '-' && scriptCodeSize >= 3 && !src.escaped() && QString::fromRawData( scriptCode+scriptCodeSize-3, 3 ) == "<!-" ) {
 332             comment = true;
 333             scriptCode[ scriptCodeSize++ ] = ch;
 334             ++src;
 335             parseComment( src );
 336             continue;
 337         }
 338         if ( scriptCodeResync && !tquote && ( ch == '>' ) ) {
 339             ++src;
 340             scriptCodeSize = scriptCodeResync-1;
 341             scriptCodeResync = 0;
 342             scriptCode[ scriptCodeSize ] = scriptCode[ scriptCodeSize + 1 ] = 0;
 343             if ( script )
 344                 scriptHandler();
 345             else {
 346                 processListing(TokenizerString(scriptCode, scriptCodeSize));
 347                 processToken();
 348                 if ( style )         { currToken.tid = ID_STYLE + ID_CLOSE_TAG; }
 349                 else if ( textarea ) { currToken.tid = ID_TEXTAREA + ID_CLOSE_TAG; }
 350                 else if ( title ) { currToken.tid = ID_TITLE + ID_CLOSE_TAG; }
 351                 else if ( xmp )  { currToken.tid = ID_XMP + ID_CLOSE_TAG; }
 352                 processToken();
 353                 script = style = textarea = title = xmp = false;
 354                 tquote = NoQuote;
 355                 scriptCodeSize = scriptCodeResync = 0;
 356             }
 357             return;
 358         }
 359         // possible end of tagname, lets check.
 360         if ( !scriptCodeResync && !escaped && !src.escaped() && ( ch == '>' || ch == '/' || ch <= ' ' ) && ch &&
 361              scriptCodeSize >= searchStopperLen &&
 362              !QString::fromRawData( scriptCode+scriptCodeSize-searchStopperLen, searchStopperLen ).indexOf( searchStopper, 0, Qt::CaseInsensitive )) {
 363             scriptCodeResync = scriptCodeSize-searchStopperLen+1;
 364             tquote = NoQuote;
 365             continue;
 366         }
 367         if ( scriptCodeResync && !escaped ) {
 368             if(ch == '\"')
 369                 tquote = (tquote == NoQuote) ? DoubleQuote : ((tquote == SingleQuote) ? SingleQuote : NoQuote);
 370             else if(ch == '\'')
 371                 tquote = (tquote == NoQuote) ? SingleQuote : (tquote == DoubleQuote) ? DoubleQuote : NoQuote;
 372             else if (tquote != NoQuote && (ch == '\r' || ch == '\n'))
 373                 tquote = NoQuote;
 374         }
 375         escaped = ( !escaped && ch == '\\' );
 376         if (!scriptCodeResync && (textarea||title) && !src.escaped() && ch == '&') {
 377             QChar *scriptCodeDest = scriptCode+scriptCodeSize;
 378             ++src;
 379             parseEntity(src,scriptCodeDest,true);
 380             scriptCodeSize = scriptCodeDest-scriptCode;
 381         }
 382         else {
 383             scriptCode[ scriptCodeSize++ ] = *src;
 384             ++src;
 385         }
 386     }
 387 }
 388
 389 void HTMLTokenizer::scriptHandler()
 390 {
 391     QString currentScriptSrc = scriptSrc;
 392     scriptSrc.clear();
 393
 394     processListing(TokenizerString(scriptCode, scriptCodeSize));
 395     QString exScript( buffer, dest-buffer );
 396
 397     processToken();
 398     currToken.tid = ID_SCRIPT + ID_CLOSE_TAG;
 399     processToken();
 400
 401     // Scripts following a frameset element should not be executed or even loaded in the case of extern scripts.
 402     bool followingFrameset = (parser->doc()->body() && parser->doc()->body()->id() == ID_FRAMESET);
 403     bool effectiveScript = !parser->skipMode() && !followingFrameset;
 404     bool deferredScript = false;
 405
 406     if ( effectiveScript ) {
 407         CachedScript* cs = 0;
 408
 409         // forget what we just got, load from src url instead
 410         if ( !currentScriptSrc.isEmpty() && javascript &&
 411              (cs = parser->doc()->docLoader()->requestScript(currentScriptSrc, scriptSrcCharset) )) {
 412             cachedScript.enqueue(cs);
 413         }
 414
 415         if (cs) {
 416             pendingQueue.push(src);
 417             int scriptCount = cachedScript.count();
 418             setSrc(TokenizerString());
 419             scriptCodeSize = scriptCodeResync = 0;
 420             cs->ref(this);
 421             if (cachedScript.count() == scriptCount)
 422                 deferredScript = true;
 423         }
 424         else if (currentScriptSrc.isEmpty() && view && javascript ) {
 425             pendingQueue.push(src);
 426             setSrc(TokenizerString());
 427             scriptCodeSize = scriptCodeResync = 0;
 428             scriptExecution( exScript, QString(), tagStartLineno /*scriptStartLineno*/ );
 429         } else {
 430             // script was filtered or disallowed
 431             effectiveScript = false;
 432         }
 433     }
 434
 435     script = false;
 436     scriptCodeSize = scriptCodeResync = 0;
 437
 438     if ( !effectiveScript )
 439         return;
 440
 441     if ( !m_executingScript && cachedScript.isEmpty() ) {
 442         src.append(pendingQueue.pop());
 443     } else if ( cachedScript.isEmpty() ) {
 444         write( pendingQueue.pop(), false );
 445     } else if ( !deferredScript && pendingQueue.count() > 1) {
 446         TokenizerString t = pendingQueue.pop();
 447         pendingQueue.top().prepend( t );
 448     }
 449 #if PROSPECTIVE_TOKENIZER_ENABLED
 450     if (!cachedScript.isEmpty() && !m_executingScript) {
 451         if (!m_prospectiveTokenizer)
 452             m_prospectiveTokenizer = new ProspectiveTokenizer(parser->docPtr());
 453         if (!m_prospectiveTokenizer->inProgress() && !pendingQueue.isEmpty()) {
 454             m_prospectiveTokenizer->begin();
 455             m_prospectiveTokenizer->write(pendingQueue.top());
 456         }
 457     }
 458 #endif
 459
 460 }
 461
 462 void HTMLTokenizer::scriptExecution( const QString& str, const QString& scriptURL,
 463                                      int baseLine)
 464 {
 465     bool oldscript = script;
 466     m_executingScript++;
 467     script = false;
 468     QString url;
 469     if (scriptURL.isNull() && view)
 470       url = static_cast<DocumentImpl*>(view->part()->document().handle())->URL().url();
 471     else
 472       url = scriptURL;
 473
 474     if (view)
 475         view->part()->executeScript(url,baseLine+1,Node(),str);
 476     m_executingScript--;
 477     script = oldscript;
 478 }
 479
 480 void HTMLTokenizer::parseComment(TokenizerString &src)
 481 {
 482     // SGML strict
 483     bool strict = parser->doc()->inStrictMode() && parser->doc()->htmlMode() != DocumentImpl::XHtml && !script && !style;
 484     int delimiterCount = 0;
 485     bool canClose = false;
 486
 487     checkScriptBuffer(src.length());
 488     while ( src.length() ) {
 489         scriptCode[ scriptCodeSize++ ] = *src;
 490
 491 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
 492         qDebug("comment is now: *%s*", src.toString().left(16).toLatin1().constData());
 493 #endif
 494
 495         if (strict)
 496         {
 497             if (src->unicode() == '-') {
 498                 delimiterCount++;
 499                 if (delimiterCount == 2) {
 500                     delimiterCount = 0;
 501                     canClose = !canClose;
 502                 }
 503             }
 504             else
 505                 delimiterCount = 0;
 506         }
 507
 508         if ((!strict || canClose) && src->unicode() == '>')
 509         {
 510             bool handleBrokenComments =  brokenComments && !( script || style );
 511             bool scriptEnd=false;
 512             if (!strict)
 513             {
 514                 if ( scriptCodeSize > 2 && scriptCode[scriptCodeSize-3] == '-' &&
 515                      scriptCode[scriptCodeSize-2] == '-' )
 516                     scriptEnd=true;
 517             }
 518
 519             if (canClose || handleBrokenComments || scriptEnd ){
 520                 ++src;
 521                 if ( !( title || script || xmp || textarea || style) ) {
 522                     checkScriptBuffer();
 523                     scriptCode[ scriptCodeSize ] = 0;
 524                     scriptCode[ scriptCodeSize + 1 ] = 0;
 525                     currToken.tid = ID_COMMENT;
 526                     processListing(TokenizerString(scriptCode, scriptCodeSize - 2));
 527                     processToken();
 528                     currToken.tid = ID_COMMENT + ID_CLOSE_TAG;
 529                     processToken();
 530                     scriptCodeSize = 0;
 531                 }
 532                 comment = false;
 533                 return; // Finished parsing comment
 534             }
 535         }
 536         ++src;
 537     }
 538 }
 539
 540 void HTMLTokenizer::parseDoctypeComment(TokenizerString &src)
 541 {
 542     while (!src.isEmpty()) {
 543         QChar c = *src;
 544         switch (doctypeComment) {
 545             case DoctypeCommentHalfBegin: {
 546                 if (c != '-') {
 547                     // Ooops, it's not comment
 548                     doctypeComment = DoctypeCommentBogus;
 549                     return;
 550                 } else {
 551                     // Doctype comment begins
 552                     doctypeComment = DoctypeComment;
 553                     ++src;
 554                 }
 555                 break;
 556             }
 557             case DoctypeComment: {
 558                 if (c == '-') {
 559                     // Perhaps this is end of comment
 560                     doctypeComment = DoctypeCommentHalfEnd;
 561                     ++src;
 562                 } else {
 563                     // Keep scanning for '--'
 564                     ++src;
 565                 }
 566                 break;
 567             }
 568             case DoctypeCommentHalfEnd: {
 569                 if (c == '-') {
 570                     // Doctype comment ends
 571                     doctypeComment = DoctypeCommentEnd;
 572                     return;
 573                 } else {
 574                     // It's not '--'
 575                     ++src;
 576                     doctypeComment = DoctypeComment;
 577                 }
 578                 break;
 579             }
 580             default: {
 581                 assert(!"Undefined doctype comment state");
 582                 break;
 583             }
 584         }
 585     }
 586 }
 587
 588 void HTMLTokenizer::parseDoctype(TokenizerString &src)
 589 {
 590     while (!src.isEmpty() && doctype) {
 591         QChar c;
 592         bool isWhitespace = false;
 593         int dontAdvance = 0;
 594         if (doctypeComment == DoctypeCommentEnd) {
 595             doctypeComment = NoDoctypeComment;
 596             isWhitespace = true;
 597         } else if (doctypeComment == DoctypeCommentBogus) {
 598             doctypeComment = NoDoctypeComment;
 599             c = '-';
 600             dontAdvance++;
 601         } else {
 602             c = *src;
 603             if (doctypeAllowComment) {
 604                 if (!doctypeComment && c == '-') {
 605                     doctypeComment = DoctypeCommentHalfBegin;
 606                     ++src;
 607                 }
 608                 if (doctypeComment) {
 609                     parseDoctypeComment(src);
 610                     continue;
 611                 }
 612                 isWhitespace = c == '\r' || c == '\n' || c == '\t' || c == ' ';
 613             }
 614         }
 615
 616         switch (doctypeToken.state) {
 617             case DoctypeBegin: {
 618                 doctypeToken.state = DoctypeBeforeName;
 619                 if (isWhitespace) {
 620                     // nothing
 621                 }
 622                 break;
 623             }
 624             case DoctypeBeforeName: {
 625                 if (c == '>') {
 626                     // Malformed. Just exit.
 627                     doctype = false;
 628                 } else if (isWhitespace) {
 629                     // nothing
 630                 } else {
 631                     dontAdvance++;
 632                     doctypeToken.state = DoctypeName;
 633                 }
 634                 break;
 635             }
 636             case DoctypeName: {
 637                 if (c == '>') {
 638                     // Valid doctype. Emit it.
 639                     doctype = false;
 640                     processDoctypeToken();
 641                 } else if (isWhitespace) {
 642                     doctypeSearchCount = 0; // Used now to scan for PUBLIC
 643                     doctypeSecondarySearchCount = 0; // Used now to scan for SYSTEM
 644                     doctypeToken.state = DoctypeAfterName;
 645                 } else {
 646                     doctypeToken.name.append(c);
 647                 }
 648                 break;
 649             }
 650             case DoctypeAfterName: {
 651                 if (c == '>') {
 652                     // Valid doctype. Emit it.
 653                     doctype = false;
 654                     processDoctypeToken();
 655                 } else if (c == '[') {
 656                     if(doctypeSearchCount > 0 || doctypeSecondarySearchCount > 0) { // is there any public/system indicator before?
 657                         doctypeSearchCount = doctypeSecondarySearchCount = 0;
 658                         doctypeToken.state = DoctypeBogus;
 659                     }
 660                     // Found internal subset
 661                     doctypeToken.state = DoctypeInternalSubset;
 662                     doctypeAllowComment = false;
 663                 } else if (!isWhitespace) {
 664                     if (c.toLower() == publicStart[doctypeSearchCount]) {
 665                         doctypeSearchCount++;
 666                         if(doctypeSearchCount == 6)
 667                             // Found 'PUBLIC' sequence
 668                             doctypeToken.state = DoctypeBeforePublicID;
 669                     } else if (doctypeSearchCount > 0) {
 670                         doctypeSearchCount = 0;
 671                         doctypeToken.state = DoctypeBogus;
 672                     } else if (c.toLower() == systemStart[doctypeSecondarySearchCount]) {
 673                         doctypeSecondarySearchCount++;
 674                         if(doctypeSecondarySearchCount == 6)
 675                             // Found 'SYSTEM' sequence
 676                             doctypeToken.state = DoctypeBeforeSystemID;
 677                     } else {
 678                         doctypeSecondarySearchCount = 0;
 679                         doctypeToken.state = DoctypeBogus;
 680                     }
 681                 } else {
 682                     // Whitespace keeps us in the after name state
 683                 }
 684                 break;
 685             }
 686             case DoctypeBeforePublicID: {
 687                 if (c == '\"' || c == '\'') {
 688                     tquote = c == '\"' ? DoubleQuote : SingleQuote;
 689                     doctypeToken.state = DoctypePublicID;
 690                     doctypeAllowComment = false;
 691                 } else if (c == '>') {
 692                     // Considered bogus. Don't process the doctype.
 693                     doctype = false;
 694                 } else if (isWhitespace) {
 695                     // nothing
 696                 } else
 697                     doctypeToken.state = DoctypeBogus;
 698                 break;
 699             }
 700             case DoctypePublicID: {
 701                 if ((c == '\"' && tquote == DoubleQuote) || (c == '\'' && tquote == SingleQuote)) {
 702                     doctypeToken.state = DoctypeAfterPublicID;
 703                     doctypeAllowComment = true;
 704                 } else if (c == '>') {
 705                     // Considered bogus. Don't process the doctype.
 706                     doctype = false;
 707                 } else {
 708                     doctypeToken.publicID.append(c);
 709                 }
 710                 break;
 711             }
 712             case DoctypeAfterPublicID: {
 713                 if (c == '\"' || c == '\'') {
 714                     tquote = c == '\"' ? DoubleQuote : SingleQuote;
 715                     doctypeToken.state = DoctypeSystemID;
 716                 } else if (c == '>') {
 717                     // Valid doctype. Emit it now.
 718                     doctype = false;
 719                     processDoctypeToken();
 720                 } else if (isWhitespace) {
 721                     // nothing
 722                 } else if (c == '[') {
 723                     // Found internal subset
 724                     doctypeToken.state = DoctypeInternalSubset;
 725                     doctypeAllowComment = false;
 726                 } else
 727                     doctypeToken.state = DoctypeBogus;
 728                 break;
 729             }
 730             case DoctypeBeforeSystemID: {
 731                 if (c == '\"' || c == '\'') {
 732                     tquote = c == '\"' ? DoubleQuote : SingleQuote;
 733                     doctypeToken.state = DoctypeSystemID;
 734                     doctypeAllowComment = false;
 735                 } else if (c == '>') {
 736                     // Considered bogus. Don't process the doctype.
 737                     doctype = false;
 738                 } else if (isWhitespace) {
 739                     // nothing
 740                 } else
 741                     doctypeToken.state = DoctypeBogus;
 742                 break;
 743             }
 744             case DoctypeSystemID: {
 745                 if ((c == '\"' && tquote == DoubleQuote) || (c == '\'' && tquote == SingleQuote)) {
 746                     doctypeToken.state = DoctypeAfterSystemID;
 747                     doctypeAllowComment = true;
 748                 } else if (c == '>') {
 749                     // Considered bogus. Don't process the doctype.
 750                     doctype = false;
 751                 } else {
 752                     doctypeToken.systemID.append(c);
 753                 }
 754                 break;
 755             }
 756             case DoctypeAfterSystemID: {
 757                 if (c == '>') {
 758                     // Valid doctype. Emit it now.
 759                     doctype = false;
 760                     processDoctypeToken();
 761                 } else if (isWhitespace) {
 762                     // nothing
 763                 } else if (c == '[') {
 764                     // Found internal subset
 765                     doctypeToken.state = DoctypeInternalSubset;
 766                     doctypeAllowComment = false;
 767                 } else {
 768                     doctypeToken.state = DoctypeBogus;
 769                 }
 770                 break;
 771             }
 772             case DoctypeInternalSubset: {
 773                 if(c == ']') {
 774                     // Done
 775                     doctypeToken.state = DoctypeAfterInternalSubset;
 776                     doctypeAllowComment = true;
 777                 } else {
 778                     doctypeToken.internalSubset.append(c);
 779                 }
 780                 break;
 781             }
 782             case DoctypeAfterInternalSubset: {
 783                 if (c == '>') {
 784                     // Valid doctype. Emit it now.
 785                     doctype = false;
 786                     processDoctypeToken();
 787                 } else if (isWhitespace) {
 788                     // nothing
 789                 } else
 790                     doctypeToken.state = DoctypeBogus;
 791                 break;
 792             }
 793             case DoctypeBogus: {
 794                 if (c == '>') {
 795                     // Done with the bogus doctype.
 796                     doctype = false;
 797                 } else {
 798                     // Just keep scanning for '>'
 799                 }
 800                 break;
 801             }
 802             default:
 803                 break;
 804         }
 805         if (!dontAdvance)
 806             ++src;
 807         else if (dontAdvance == 1)
 808             continue;
 809         else // double dontAdvance++, do workaround
 810             doctypeComment = DoctypeCommentBogus;
 811     }
 812 }
 813
 814 void HTMLTokenizer::parseServer(TokenizerString &src)
 815 {
 816     checkScriptBuffer(src.length());
 817     while ( !src.isEmpty() ) {
 818         scriptCode[ scriptCodeSize++ ] = *src;
 819         if (src->unicode() == '>' &&
 820             scriptCodeSize > 1 && scriptCode[scriptCodeSize-2] == '%') {
 821             ++src;
 822             server = false;
 823             scriptCodeSize = 0;
 824             return; // Finished parsing server include
 825         }
 826         ++src;
 827     }
 828 }
 829
 830 void HTMLTokenizer::parseProcessingInstruction(TokenizerString &src)
 831 {
 832     char oldchar = 0;
 833     while ( !src.isEmpty() )
 834     {
 835         unsigned char chbegin = src->toLatin1();
 836         if(chbegin == '\'') {
 837             tquote = tquote == SingleQuote ? NoQuote : SingleQuote;
 838         }
 839         else if(chbegin == '\"') {
 840             tquote = tquote == DoubleQuote ? NoQuote : DoubleQuote;
 841         }
 842         // Look for '?>'
 843         // some crappy sites omit the "?" before it, so
 844         // we look for an unquoted '>' instead. (IE compatible)
 845         else if ( chbegin == '>' && ( !tquote || oldchar == '?' ) )
 846         {
 847             // We got a '?>' sequence
 848             processingInstruction = false;
 849             ++src;
 850             discard=LFDiscard;
 851             return; // Finished parsing comment!
 852         }
 853         ++src;
 854         oldchar = chbegin;
 855     }
 856 }
 857
 858 void HTMLTokenizer::parseText(TokenizerString &src)
 859 {
 860     while ( !src.isEmpty() )
 861     {
 862         // do we need to enlarge the buffer?
 863         checkBuffer();
 864
 865         // ascii is okay because we only do ascii comparisons
 866         unsigned char chbegin = src->toLatin1();
 867
 868         if (skipLF && ( chbegin != '\n' ))
 869         {
 870             skipLF = false;
 871         }
 872
 873         if (skipLF)
 874         {
 875             skipLF = false;
 876             ++src;
 877         }
 878         else if (( chbegin == '\n' ) || ( chbegin == '\r' ))
 879         {
 880             if (chbegin == '\r')
 881                 skipLF = true;
 882
 883             *dest++ = '\n';
 884             ++src;
 885         }
 886         else {
 887             *dest++ = *src;
 888             ++src;
 889         }
 890     }
 891 }
 892
 893
 894 void HTMLTokenizer::parseEntity(TokenizerString &src, QChar *&dest, bool start)
 895 {
 896     if( start )
 897     {
 898         cBufferPos = 0;
 899         entityLen = 0;
 900         Entity = SearchEntity;
 901     }
 902
 903     while( !src.isEmpty() )
 904     {
 905         ushort cc = src->unicode();
 906         switch(Entity) {
 907         case NoEntity:
 908             return;
 909
 910             break;
 911         case SearchEntity:
 912             if(cc == '#') {
 913                 cBuffer[cBufferPos++] = cc;
 914                 ++src;
 915                 Entity = NumericSearch;
 916             }
 917             else
 918                 Entity = EntityName;
 919
 920             break;
 921
 922         case NumericSearch:
 923             if(cc == 'x' || cc == 'X') {
 924                 cBuffer[cBufferPos++] = cc;
 925                 ++src;
 926                 Entity = Hexadecimal;
 927             }
 928             else if(cc >= '0' && cc <= '9')
 929                 Entity = Decimal;
 930             else
 931                 Entity = SearchSemicolon;
 932
 933             break;
 934
 935         case Hexadecimal:
 936         {
 937             int uc = EntityChar.unicode();
 938             int ll = qMin<uint>(src.length(), 8);
 939             while(ll--) {
 940                 QChar csrc(src->toLower());
 941                 cc = csrc.cell();
 942
 943                 if(csrc.row() || !((cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f'))) {
 944                     break;
 945                 }
 946                 uc = uc*16 + (cc - ( cc < 'a' ? '0' : 'a' - 10));
 947                 cBuffer[cBufferPos++] = cc;
 948                 ++src;
 949             }
 950             EntityChar = QChar(uc);
 951             Entity = SearchSemicolon;
 952             break;
 953         }
 954         case Decimal:
 955         {
 956             int uc = EntityChar.unicode();
 957             int ll = qMin(src.length(), 9-cBufferPos);
 958             while(ll--) {
 959                 cc = src->cell();
 960
 961                 if(src->row() || !(cc >= '0' && cc <= '9')) {
 962                     Entity = SearchSemicolon;
 963                     break;
 964                 }
 965
 966                 uc = uc * 10 + (cc - '0');
 967                 cBuffer[cBufferPos++] = cc;
 968                 ++src;
 969             }
 970             EntityChar = QChar(uc);
 971             if(cBufferPos == 9)  Entity = SearchSemicolon;
 972             break;
 973         }
 974         case EntityName:
 975         {
 976             int ll = qMin(src.length(), 9-cBufferPos);
 977             while(ll--) {
 978                 QChar csrc = *src;
 979                 cc = csrc.cell();
 980
 981                 if(csrc.row() || !((cc >= 'a' && cc <= 'z') ||
 982                                    (cc >= '0' && cc <= '9') || (cc >= 'A' && cc <= 'Z'))) {
 983                     Entity = SearchSemicolon;
 984                     break;
 985                 }
 986
 987                 cBuffer[cBufferPos++] = cc;
 988                 ++src;
 989
 990                 // be IE compatible and interpret even unterminated entities
 991                 // outside tags. like "foo &nbspstuff bla".
 992                 if ( tag == NoTag ) {
 993                     const entity* e = kde_findEntity(cBuffer, cBufferPos);
 994                     if ( e && e->code < 256 ) {
 995                         EntityChar = e->code;
 996                         entityLen = cBufferPos;
 997                     }
 998                 }
 999             }
1000             if(cBufferPos == 9) Entity = SearchSemicolon;
1001             if(Entity == SearchSemicolon) {
1002                 if(cBufferPos > 1) {
1003                     const entity *e = kde_findEntity(cBuffer, cBufferPos);
1004                     // IE only accepts unterminated entities < 256,
1005                     // Gecko accepts them all, but only outside tags
1006                     if(e && ( tag == NoTag || e->code < 256 || *src == ';' )) {
1007                         EntityChar = e->code;
1008                         entityLen = cBufferPos;
1009                     }
1010                 }
1011             }
1012             break;
1013         }
1014         case SearchSemicolon:
1015 #ifdef TOKEN_DEBUG
1016             kDebug( 6036 ) << "ENTITY " << EntityChar.unicode();
1017 #endif
1018             fixUpChar(EntityChar);
1019
1020             if (*src == ';')
1021                     ++src;
1022
1023             if ( !EntityChar.isNull() ) {
1024                 checkBuffer();
1025                 if (entityLen > 0 && entityLen < cBufferPos) {
1026                     int rem = cBufferPos - entityLen;
1027                     src.prepend( TokenizerString(QString::fromAscii(cBuffer+entityLen, rem)) );
1028                 }
1029                 src.push( EntityChar );
1030             } else {
1031 #ifdef TOKEN_DEBUG
1032                 kDebug( 6036 ) << "unknown entity!";
1033 #endif
1034                 checkBuffer(10);
1035                 // ignore the sequence, add it to the buffer as plaintext
1036                 *dest++ = '&';
1037                 for(unsigned int i = 0; i < cBufferPos; i++)
1038                     dest[i] = cBuffer[i];
1039                 dest += cBufferPos;
1040                 if (pre)
1041                     prePos += cBufferPos+1;
1042             }
1043
1044             Entity = NoEntity;
1045             EntityChar = QChar::Null;
1046             return;
1047         };
1048     }
1049 }
1050
1051 void HTMLTokenizer::parseTag(TokenizerString &src)
1052 {
1053     assert(!Entity );
1054     checkScriptBuffer( src.length() );
1055
1056     while ( !src.isEmpty() )
1057     {
1058         checkBuffer();
1059 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
1060         uint l = 0;
1061         while(l < src.length() && (src.toString()[l]).toLatin1().constData() != '>')
1062             l++;
1063         qDebug("src is now: *%s*, tquote: %d",
1064                src.toString().left(l).toLatin1().constData(), tquote);
1065 #endif
1066         switch(tag) {
1067         case NoTag:
1068             return;
1069         case TagName:
1070         {
1071 #if defined(TOKEN_DEBUG) &&  TOKEN_DEBUG > 1
1072             qDebug("TagName");
1073 #endif
1074             if (searchCount > 0)
1075             {
1076                 if (*src == commentStart[searchCount])
1077                 {
1078                     searchCount++;
1079                     if (searchCount == 2)
1080                         doctypeSearchCount++; // A '!' is also part of doctype, so we are moving through that still as well
1081                     else
1082                         doctypeSearchCount = 0;
1083
1084                     if (searchCount == 4)
1085                     {
1086 #ifdef TOKEN_DEBUG
1087                         kDebug( 6036 ) << "Found comment";
1088 #endif
1089                         // Found '<!--' sequence
1090                         ++src;
1091                         dest = buffer; // ignore the previous part of this tag
1092                         tag = NoTag;
1093
1094                         comment = true;
1095                         parseComment(src);
1096                         return; // Finished parsing tag!
1097                     }
1098                     // cuts of high part, is okay
1099                     cBuffer[cBufferPos++] = src->cell();
1100                     ++src;
1101                     break;
1102                 }
1103                 else
1104                     searchCount = 0; // Stop looking for '<!--' sequence
1105             }
1106
1107             if (doctypeSearchCount > 0) {
1108                 if((*src).toLower() == doctypeStart[doctypeSearchCount]) {
1109                     doctypeSearchCount++;
1110                     cBuffer[cBufferPos++] = src->cell();
1111                     ++src;
1112                     if(doctypeSearchCount == 9) {
1113                         // Found '<!DOCTYPE' sequence
1114                         tag = NoTag;
1115                         doctypeAllowComment = true;
1116                         doctypeComment = NoDoctypeComment;
1117                         doctypeToken.reset();
1118                         doctype = true;
1119
1120                         parseDoctype(src);
1121                         return;
1122                     }
1123                     break;
1124                 } else
1125                     doctypeSearchCount = 0; // Stop looking for '<!DOCTYPE' sequence
1126             }
1127
1128             bool finish = false;
1129             unsigned int ll = qMin(src.length(), CBUFLEN-cBufferPos);
1130             while(ll--) {
1131                 ushort curchar = src->unicode();
1132                 if(curchar <= ' ' || curchar == '>' ) {
1133                     finish = true;
1134                     break;
1135                 }
1136                 // this is a nasty performance trick. will work for the A-Z
1137                 // characters, but not for others. if it contains one,
1138                 // we fail anyway
1139                 char cc = curchar;
1140                 cBuffer[cBufferPos++] = cc | 0x20;
1141                 ++src;
1142             }
1143
1144             // Disadvantage: we add the possible rest of the tag
1145             // as attribute names. ### judge if this causes problems
1146             if(finish || CBUFLEN == cBufferPos) {
1147                 bool beginTag;
1148                 char* ptr = cBuffer;
1149                 unsigned int len = cBufferPos;
1150                 cBuffer[cBufferPos] = '\0';
1151                 if ((cBufferPos > 0) && (*ptr == '/'))
1152                 {
1153                     // End Tag
1154                     beginTag = false;
1155                     ptr++;
1156                     len--;
1157                 }
1158                 else
1159                     // Start Tag
1160                     beginTag = true;
1161                 // Accept empty xml tags like <br/>
1162                 if(len > 1 && ptr[len-1] == '/' ) {
1163                     ptr[--len] = '\0';
1164                     // if its like <br/> and not like <input/ value=foo>, take it as flat
1165                     if (*src == '>')
1166                         currToken.flat = true;
1167                 }
1168
1169                 uint tagID = khtml::getTagID(ptr, len);
1170                 if (!tagID) {
1171                     DOMString tagName(ptr);
1172                     DocumentImpl *doc = parser->docPtr();
1173                     if (Element::khtmlValidQualifiedName(tagName))
1174                         tagID = doc->getId(NodeImpl::ElementId, tagName.implementation(), false, false);
1175 #ifdef TOKEN_DEBUG
1176                     QByteArray tmp(ptr, len+1);
1177                     kDebug( 6036 ) << "Unknown tag: \"" << tmp.data() << "\"";
1178 #endif
1179                 }
1180                 if (tagID) {
1181 #ifdef TOKEN_DEBUG
1182                     QByteArray tmp(ptr, len+1);
1183                     kDebug( 6036 ) << "found tag id=" << tagID << ": " << tmp.data();
1184 #endif
1185                     currToken.tid = beginTag ? tagID : tagID + ID_CLOSE_TAG;
1186                 }
1187                 dest = buffer;
1188                 tag = SearchAttribute;
1189                 cBufferPos = 0;
1190             }
1191             break;
1192         }
1193         case SearchAttribute:
1194         {
1195 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
1196                 qDebug("SearchAttribute");
1197 #endif
1198             bool atespace = false;
1199             ushort curchar;
1200             while(!src.isEmpty()) {
1201                 curchar = src->unicode();
1202                 if(curchar > ' ') {
1203                     if(curchar == '<' || curchar == '>')
1204                         tag = SearchEnd;
1205                     else if(atespace && (curchar == '\'' || curchar == '"'))
1206                     {
1207                         tag = SearchValue;
1208                         *dest++ = 0;
1209                         attrName.clear();
1210                     }
1211                     else
1212                         tag = AttributeName;
1213
1214                     cBufferPos = 0;
1215                     break;
1216                 }
1217                 atespace = true;
1218                 ++src;
1219             }
1220             break;
1221         }
1222         case AttributeName:
1223         {
1224 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
1225                 qDebug("AttributeName");
1226 #endif
1227             ushort curchar;
1228             int ll = qMin(src.length(), CBUFLEN-cBufferPos);
1229
1230             while(ll--) {
1231                 curchar = src->unicode();
1232                 if(curchar <= '>') {
1233                     if(curchar <= ' ' || curchar == '=' || curchar == '>') {
1234                         unsigned int a;
1235                         cBuffer[cBufferPos] = '\0';
1236                         a = khtml::getAttrID(cBuffer, cBufferPos);
1237
1238                         if ( !a ) {
1239                             // did we just get /> or e.g checked/>
1240                             if (curchar == '>' && cBufferPos >=1 && cBuffer[cBufferPos-1] == '/') {
1241                                 currToken.flat = true;
1242                                 if (cBufferPos>1)
1243                                     a = khtml::getAttrID(cBuffer, cBufferPos-1);
1244                             }
1245                             if (!a)
1246                                 attrName = QLatin1String(QByteArray(cBuffer, cBufferPos+1).data());
1247                         }
1248
1249                         dest = buffer;
1250                         *dest++ = a;
1251 #ifdef TOKEN_DEBUG
1252                         if (!a || (cBufferPos && *cBuffer == '!'))
1253                             kDebug( 6036 ) << "Unknown attribute: *" << QByteArray(cBuffer, cBufferPos+1).data() << "*";
1254                         else
1255                             kDebug( 6036 ) << "Known attribute: " << QByteArray(cBuffer, cBufferPos+1).data();
1256 #endif
1257
1258                         tag = SearchEqual;
1259                         break;
1260                     }
1261                 }
1262                 cBuffer[cBufferPos++] =
1263                      (  curchar >= 'A' && curchar <= 'Z' ) ? curchar | 0x20 : curchar;
1264                 ++src;
1265             }
1266             if ( cBufferPos == CBUFLEN ) {
1267                 cBuffer[cBufferPos] = '\0';
1268                 attrName = QLatin1String(QByteArray(cBuffer, cBufferPos+1).data());
1269                 dest = buffer;
1270                 *dest++ = 0;
1271                 tag = SearchEqual;
1272             }
1273             break;
1274         }
1275         case SearchEqual:
1276         {
1277 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
1278                 qDebug("SearchEqual");
1279 #endif
1280             ushort curchar;
1281             bool atespace = false;
1282             while(!src.isEmpty()) {
1283                 curchar = src->unicode();
1284                 if(curchar > ' ') {
1285                     if(curchar == '=') {
1286 #ifdef TOKEN_DEBUG
1287                         kDebug(6036) << "found equal";
1288 #endif
1289                         tag = SearchValue;
1290                         ++src;
1291                     }
1292                     else if(atespace && (curchar == '\'' || curchar == '"'))
1293                     {
1294                         tag = SearchValue;
1295                         *dest++ = 0;
1296                         attrName.clear();
1297                     }
1298                     else {
1299                         DOMString v("");
1300                         currToken.addAttribute(parser->docPtr(), buffer, attrName, v);
1301                         dest = buffer;
1302                         tag = SearchAttribute;
1303                     }
1304                     break;
1305                 }
1306                 atespace = true;
1307                 ++src;
1308             }
1309             break;
1310         }
1311         case SearchValue:
1312         {
1313             ushort curchar;
1314             while(!src.isEmpty()) {
1315                 curchar = src->unicode();
1316                 if(curchar > ' ') {
1317                     if(( curchar == '\'' || curchar == '\"' )) {
1318                         tquote = curchar == '\"' ? DoubleQuote : SingleQuote;
1319                         tag = QuotedValue;
1320                         ++src;
1321                     } else
1322                         tag = Value;
1323
1324                     break;
1325                 }
1326                 ++src;
1327             }
1328             break;
1329         }
1330         case QuotedValue:
1331         {
1332 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
1333                 qDebug("QuotedValue");
1334 #endif
1335             ushort curchar;
1336             while(!src.isEmpty()) {
1337                 checkBuffer();
1338
1339                 curchar = src->unicode();
1340                 if(curchar <= '\'' && !src.escaped()) {
1341                     // ### attributes like '&{blaa....};' are supposed to be treated as jscript.
1342                     if ( curchar == '&' )
1343                     {
1344                         ++src;
1345                         parseEntity(src, dest, true);
1346                         break;
1347                     }
1348                     else if ( (tquote == SingleQuote && curchar == '\'') ||
1349                               (tquote == DoubleQuote && curchar == '\"') )
1350                     {
1351                         // some <input type=hidden> rely on trailing spaces. argh
1352                         while(dest > buffer+1 && (*(dest-1) == '\n' || *(dest-1) == '\r'))
1353                             dest--; // remove trailing newlines
1354                         DOMString v(buffer+1, dest-buffer-1);
1355                         currToken.addAttribute(parser->docPtr(), buffer, attrName, v);
1356
1357                         dest = buffer;
1358                         tag = SearchAttribute;
1359                         tquote = NoQuote;
1360                         ++src;
1361                         break;
1362                     }
1363                 }
1364                 *dest++ = *src;
1365                 ++src;
1366             }
1367             break;
1368         }
1369         case Value:
1370         {
1371 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
1372             qDebug("Value");
1373 #endif
1374             ushort curchar;
1375             while(!src.isEmpty()) {
1376                 checkBuffer();
1377                 curchar = src->unicode();
1378                 if(curchar <= '>' && !src.escaped()) {
1379                     // parse Entities
1380                     if ( curchar == '&' )
1381                     {
1382                         ++src;
1383                         parseEntity(src, dest, true);
1384                         break;
1385                     }
1386                     // no quotes. Every space means end of value
1387                     // '/' does not delimit in IE!
1388                     if ( curchar <= ' ' || curchar == '>' )
1389                     {
1390                         DOMString v(buffer+1, dest-buffer-1);
1391                         currToken.addAttribute(parser->docPtr(), buffer, attrName, v);
1392                         dest = buffer;
1393                         tag = SearchAttribute;
1394                         break;
1395                     }
1396                 }
1397
1398                 *dest++ = *src;
1399                 ++src;
1400             }
1401             break;
1402         }
1403         case SearchEnd:
1404         {
1405 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 1
1406                 qDebug("SearchEnd");
1407 #endif
1408             while(!src.isEmpty()) {
1409                 if(*src == '<' || *src == '>')
1410                     break;
1411
1412                 if (*src == '/')
1413                     currToken.flat = true;
1414
1415                 ++src;
1416             }
1417             if(src.isEmpty() && *src != '<' && *src != '>') break;
1418
1419             searchCount = 0; // Stop looking for '<!--' sequence
1420             tag = NoTag;
1421             tquote = NoQuote;
1422             if ( *src == '>' )
1423                 ++src;
1424
1425             if ( !currToken.tid ) //stop if tag is unknown
1426                 return;
1427
1428             uint tagID = currToken.tid;
1429 #if defined(TOKEN_DEBUG) && TOKEN_DEBUG > 0
1430             kDebug( 6036 ) << "appending Tag: " << tagID;
1431 #endif
1432             // If the tag requires an end tag it cannot be flat,
1433             // unless we are using the HTML parser to parse XHTML
1434             // The only exception is SCRIPT and priority 0 tokens.
1435             if (tagID < ID_CLOSE_TAG && tagID != ID_SCRIPT &&
1436                 DOM::endTagRequirement(tagID) == DOM::REQUIRED &&
1437                 parser->doc()->htmlMode() != DocumentImpl::XHtml)
1438                 currToken.flat = false;
1439
1440             bool beginTag = !currToken.flat && (tagID < ID_CLOSE_TAG);
1441
1442             if(tagID >= ID_CLOSE_TAG)
1443                 tagID -= ID_CLOSE_TAG;
1444             else if ( !brokenScript && tagID == ID_SCRIPT ) {
1445                 DOMStringImpl* a = 0;
1446                 bool foundTypeAttribute = false;
1447                 scriptSrc = scriptSrcCharset = QString();
1448                 if ( currToken.attrs && /* potentially have a ATTR_SRC ? */
1449                      view &&  /* are we a regular tokenizer or just for innerHTML ? */
1450                      parser->doc()->view()->part()->jScriptEnabled() /* jscript allowed at all? */
1451                     ) {
1452                     if ( ( a = currToken.attrs->getValue( ATTR_SRC ) ) )
1453                         scriptSrc = parser->doc()->completeURL(khtml::parseURL( DOMString(a) ).string() );
1454                     if ( ( a = currToken.attrs->getValue( ATTR_CHARSET ) ) )
1455                         scriptSrcCharset = DOMString(a).string().trimmed();
1456                     if ( scriptSrcCharset.isEmpty() && view)
1457                         scriptSrcCharset = parser->doc()->view()->part()->encoding();
1458                     /* Check type before language, since language is deprecated */
1459                     if ((a = currToken.attrs->getValue(ATTR_TYPE)) != 0 && !DOMString(a).string().isEmpty())
1460                         foundTypeAttribute = true;
1461                     else
1462                         a = currToken.attrs->getValue(ATTR_LANGUAGE);
1463                 }
1464                 javascript = true;
1465
1466                 if( foundTypeAttribute ) {
1467                     /*
1468                         Mozilla 1.5 doesn't accept the text/javascript1.x formats, but WinIE 6 does.
1469                         Mozilla 1.5 doesn't accept text/jscript, text/ecmascript, and text/livescript, but WinIE 6 does.
1470                         Mozilla 1.5 accepts application/x-javascript, WinIE 6 doesn't.
1471                         Mozilla 1.5 allows leading and trailing whitespace, but WinIE 6 doesn't.
1472                         Mozilla 1.5 and WinIE 6 both accept the empty string, but neither accept a whitespace-only string.
1473                         We want to accept all the values that either of these browsers accept, but not other values.
1474                      */
1475                     QString type = DOMString(a).string().trimmed().toLower();
1476                     if( type.compare("text/javascript") != 0 &&
1477                         type.compare("text/javascript1.0") != 0 &&
1478                         type.compare("text/javascript1.1") != 0 &&
1479                         type.compare("text/javascript1.2") != 0 &&
1480                         type.compare("text/javascript1.3") != 0 &&
1481                         type.compare("text/javascript1.4") != 0 &&
1482                         type.compare("text/javascript1.5") != 0 &&
1483                         type.compare("text/jscript") != 0 &&
1484                         type.compare("text/ecmascript") != 0 &&
1485                         type.compare("text/livescript") != 0 &&
1486                         type.compare("application/x-javascript") != 0 &&
1487                         type.compare("application/x-ecmascript") != 0 &&
1488                         type.compare("application/javascript") != 0 &&
1489                         type.compare("application/ecmascript") != 0 )
1490                         javascript = false;
1491                 } else if( a ) {
1492                     /*
1493                      Mozilla 1.5 doesn't accept jscript or ecmascript, but WinIE 6 does.
1494                      Mozilla 1.5 accepts javascript1.0, javascript1.4, and javascript1.5, but WinIE 6 accepts only 1.1 - 1.3.
1495                      Neither Mozilla 1.5 nor WinIE 6 accept leading or trailing whitespace.
1496                      We want to accept all the values that either of these browsers accept, but not other values.
1497                      */
1498                     QString lang = DOMString(a).string();
1499                     lang = lang.toLower();
1500                     if( lang.compare("") != 0 &&
1501                         lang.compare("javascript") != 0 &&
1502                         lang.compare("javascript1.0") != 0 &&
1503                         lang.compare("javascript1.1") != 0 &&
1504                         lang.compare("javascript1.2") != 0 &&
1505                         lang.compare("javascript1.3") != 0 &&
1506                         lang.compare("javascript1.4") != 0 &&
1507                         lang.compare("javascript1.5") != 0 &&
1508                         lang.compare("ecmascript") != 0 &&
1509                         lang.compare("livescript") != 0 &&
1510                         lang.compare("jscript") )
1511                         javascript = false;
1512                 }
1513             }
1514
1515             processToken();
1516
1517             if ( parser->selectMode() && beginTag)
1518                 discard = AllDiscard;
1519
1520             switch( tagID ) {
1521             case ID_PRE:
1522                 pre = beginTag;
1523                 if (beginTag)
1524                     discard = LFDiscard;
1525                 prePos = 0;
1526                 break;
1527             case ID_BR:
1528                 prePos = 0;
1529                 break;
1530             case ID_SCRIPT:
1531                 if (beginTag) {
1532                     searchStopper = scriptEnd;
1533                     searchStopperLen = 8;
1534                     script = true;
1535                     parseSpecial(src);
1536                 }
1537                 else if (tagID < ID_CLOSE_TAG) // Handle <script src="foo"/>
1538                     scriptHandler();
1539                 break;
1540             case ID_STYLE:
1541                 if (beginTag) {
1542                     searchStopper = styleEnd;
1543                     searchStopperLen = 7;
1544                     style = true;
1545                     parseSpecial(src);
1546                 }
1547                 break;
1548             case ID_TEXTAREA:
1549                 if(beginTag) {
1550                     searchStopper = textareaEnd;
1551                     searchStopperLen = 10;
1552                     textarea = true;
1553                     discard = NoneDiscard;
1554                     parseSpecial(src);
1555                 }
1556                 break;
1557             case ID_TITLE:
1558                 if (beginTag) {
1559                     searchStopper = titleEnd;
1560                     searchStopperLen = 7;
1561                     title = true;
1562                     parseSpecial(src);
1563                 }
1564                 break;
1565             case ID_XMP:
1566                 if (beginTag) {
1567                     searchStopper = xmpEnd;
1568                     searchStopperLen = 5;
1569                     xmp = true;
1570                     parseSpecial(src);
1571                 }
1572                 break;
1573             case ID_SELECT:
1574                 select = beginTag;
1575                 break;
1576             case ID_PLAINTEXT:
1577                 plaintext = beginTag;
1578                 break;
1579             }
1580             return; // Finished parsing tag!
1581         }
1582         } // end switch
1583     }
1584     return;
1585 }
1586
1587 void HTMLTokenizer::addPending()
1588 {
1589     if ( select && !(comment || script))
1590     {
1591         *dest++ = ' ';
1592     }
1593     else if ( textarea )
1594     {
1595         switch(pending) {
1596         case LFPending:  *dest++ = QLatin1Char('\n'); prePos = 0; break;
1597         case SpacePending: *dest++ = QLatin1Char(' '); ++prePos; break;
1598         case TabPending: *dest++ = QLatin1Char('\t'); prePos += TAB_SIZE - (prePos % TAB_SIZE); break;
1599         case NonePending:
1600             assert(0);
1601         }
1602     }
1603     else
1604     {
1605         int p;
1606
1607         switch (pending)
1608         {
1609         case SpacePending:
1610             // Insert a breaking space
1611             *dest++ = QLatin1Char(' ');
1612             prePos++;
1613             break;
1614
1615         case LFPending:
1616             *dest = QLatin1Char('\n');
1617             dest++;
1618             prePos = 0;
1619             break;
1620
1621         case TabPending:
1622             p = TAB_SIZE - ( prePos % TAB_SIZE );
1623             for ( int x = 0; x < p; x++ )
1624                 *dest++ = QLatin1Char(' ');
1625             prePos += p;
1626             break;
1627
1628         case NonePending:
1629             assert(0);
1630             break;
1631         }
1632     }
1633
1634     pending = NonePending;
1635 }
1636
1637 void HTMLTokenizer::write( const TokenizerString &str, bool appendData )
1638 {
1639 #ifdef TOKEN_DEBUG
1640     kDebug( 6036 ) << this << " Tokenizer::write(\"" << str.toString() << "\"," << appendData << ")";
1641 #endif
1642
1643     if ( !buffer )
1644         return;
1645
1646     if ( ( m_executingScript && appendData ) || cachedScript.count() ) {
1647         // don't parse; we will do this later
1648         if (pendingQueue.isEmpty())
1649             pendingQueue.push(str);
1650         else if (appendData)
1651             pendingQueue.bottom().append(str);
1652         else
1653             pendingQueue.top().append(str);
1654 #if PROSPECTIVE_TOKENIZER_ENABLED
1655         if (m_prospectiveTokenizer && m_prospectiveTokenizer->inProgress() && appendData)
1656             m_prospectiveTokenizer->write(str);
1657 #endif
1658         return;
1659     }
1660
1661     if ( onHold ) {
1662         src.append(str);
1663         return;
1664     }
1665
1666     if (!src.isEmpty())
1667         src.append(str);
1668     else
1669         setSrc(str);
1670     m_abort = false;
1671
1672 //     if (Entity)
1673 //         parseEntity(src, dest);
1674
1675     while ( !src.isEmpty() )
1676     {
1677         if ( m_abort )
1678             return;
1679         // do we need to enlarge the buffer?
1680         checkBuffer();
1681
1682         ushort cc = src->unicode();
1683
1684         if (skipLF && (cc != '\n'))
1685             skipLF = false;
1686
1687         if (skipLF) {
1688             skipLF = false;
1689             ++src;
1690         }
1691         else if ( Entity )
1692             parseEntity( src, dest );
1693         else if ( plaintext )
1694             parseText( src );
1695         else if (script)
1696             parseSpecial(src);
1697         else if (style)
1698             parseSpecial(src);
1699         else if (xmp)
1700             parseSpecial(src);
1701         else if (textarea)
1702             parseSpecial(src);
1703         else if (title)
1704             parseSpecial(src);
1705         else if (comment)
1706             parseComment(src);
1707         else if (doctypeComment && doctypeComment != DoctypeCommentEnd && doctypeComment != DoctypeCommentBogus)
1708             parseDoctypeComment(src);
1709         else if (doctype)
1710             parseDoctype(src);
1711         else if (server)
1712             parseServer(src);
1713         else if (processingInstruction)
1714             parseProcessingInstruction(src);
1715         else if (tag)
1716             parseTag(src);
1717         else if ( startTag )
1718         {
1719             startTag = false;
1720             bool endTag = false;
1721
1722             switch(cc) {
1723             case '/':
1724                 endTag = true;
1725                 break;
1726             case '!':
1727             {
1728                 // <!-- comment --> or <!DOCTYPE ...>
1729                 searchCount = 1; // Look for '<!--' sequence to start comment...
1730                 doctypeSearchCount = 1; // ... or for '<!DOCTYPE' sequence to start doctype
1731                 break;
1732             }
1733             case '?':
1734             {
1735                 // xml processing instruction
1736                 processingInstruction = true;
1737                 tquote = NoQuote;
1738                 parseProcessingInstruction(src);
1739                 continue;
1740
1741                 break;
1742             }
1743             case '%':
1744                 if (!brokenServer) {
1745                     // <% server stuff, handle as comment %>
1746                     server = true;
1747                     tquote = NoQuote;
1748                     parseServer(src);
1749                     continue;
1750                 }
1751                 // else fall through
1752             default:
1753             {
1754                 if( ((cc >= 'a') && (cc <= 'z')) || ((cc >= 'A') && (cc <= 'Z')))
1755                 {
1756                     // Start of a Start-Tag
1757                 }
1758                 else
1759                 {
1760                     // Invalid tag
1761                     // Add as is
1762                     if (pending)
1763                         addPending();
1764                     *dest = '<';
1765                     dest++;
1766                     continue;
1767                 }
1768             }
1769             }; // end case
1770
1771             // According to SGML any LF immediately after a starttag, or
1772             // immediately before an endtag should be ignored.
1773             // ### Gecko and MSIE though only ignores LF immediately after
1774             // starttags and only for PRE elements -- asj (28/06-2005)
1775             if ( pending )
1776                 if (!select)
1777                     addPending();
1778                 else
1779                     pending = NonePending;
1780
1781             // Cancel unused discards
1782             discard = NoneDiscard;
1783             // if (!endTag) discard = LFDiscard;
1784
1785             processToken();
1786
1787             cBufferPos = 0;
1788             tag = TagName;
1789             parseTag(src);
1790         }
1791         else if ( cc == '&' && !src.escaped())
1792         {
1793             ++src;
1794             if ( pending )
1795                 addPending();
1796             discard = NoneDiscard;
1797             parseEntity(src, dest, true);
1798         }
1799         else if ( cc == '<' && !src.escaped())
1800         {
1801             tagStartLineno = lineno+src.lineCount();
1802             ++src;
1803             discard = NoneDiscard;
1804             startTag = true;
1805         }
1806         else if (( cc == '\n' ) || ( cc == '\r' ))
1807         {
1808             if (discard == SpaceDiscard)
1809                 discard = NoneDiscard;
1810
1811             if (discard == LFDiscard) {
1812                 // Ignore one LF
1813                 discard = NoneDiscard;
1814             }
1815             else if (discard == AllDiscard)
1816             {
1817                 // Ignore
1818             }
1819             else
1820             {
1821                 if (select && !script) {
1822                     pending = LFPending;
1823                 } else {
1824                     if (pending)
1825                         addPending();
1826                     pending = LFPending;
1827                 }
1828             }
1829
1830             /* Check for MS-DOS CRLF sequence */
1831             if (cc == '\r')
1832             {
1833                 skipLF = true;
1834             }
1835             ++src;
1836         }
1837         else if (( cc == ' ' ) || ( cc == '\t' ))
1838         {
1839             if(discard == LFDiscard)
1840                 discard = NoneDiscard;
1841
1842             if(discard == SpaceDiscard) {
1843                 // Ignore one space
1844                 discard = NoneDiscard;
1845             }
1846             else if(discard == AllDiscard)
1847             {
1848                 // Ignore
1849             }
1850             else {
1851                 if (select && !script) {
1852                     if (!pending)
1853                         pending = SpacePending;
1854                 } else {
1855                     if (pending)
1856                         addPending();
1857                     if (cc == ' ')
1858                         pending = SpacePending;
1859                     else
1860                         pending = TabPending;
1861                 }
1862             }
1863
1864             ++src;
1865         }
1866         else
1867         {
1868             if (pending)
1869                 addPending();
1870
1871             discard = NoneDiscard;
1872             if ( pre )
1873             {
1874                 prePos++;
1875             }
1876             *dest = *src;
1877             fixUpChar( *dest );
1878             ++dest;
1879             ++src;
1880         }
1881     }
1882
1883     if (noMoreData && cachedScript.isEmpty() && !m_executingScript)
1884         end(); // this actually causes us to be deleted
1885 }
1886
1887 void HTMLTokenizer::timerEvent( QTimerEvent *e )
1888 {
1889     if ( e->timerId() == m_autoCloseTimer && cachedScript.isEmpty() ) {
1890          finish();
1891     }
1892 }
1893
1894 void HTMLTokenizer::setAutoClose( bool b ) {
1895     killTimer( m_autoCloseTimer );
1896     m_autoCloseTimer = 0;
1897     if ( b )
1898         m_autoCloseTimer = startTimer(100);
1899 }
1900
1901 void HTMLTokenizer::end()
1902 {
1903     if ( buffer == 0 ) {
1904         emit finishedParsing();
1905         return;
1906     }
1907
1908     // parseTag is using the buffer for different matters
1909     if ( !tag )
1910         processToken();
1911
1912     if(buffer)
1913         KHTML_DELETE_QCHAR_VEC(buffer);
1914
1915     if(scriptCode)
1916         KHTML_DELETE_QCHAR_VEC(scriptCode);
1917
1918     scriptCode = 0;
1919     scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0;
1920     buffer = 0;
1921     emit finishedParsing();
1922 }
1923
1924 void HTMLTokenizer::finish()
1925 {
1926     if ( m_autoCloseTimer ) {
1927         killTimer( m_autoCloseTimer );
1928         m_autoCloseTimer = 0;
1929     }
1930     // do this as long as we don't find matching comment ends
1931     while((title || script || comment || server) && scriptCode && scriptCodeSize)
1932     {
1933         // we've found an unmatched comment start
1934         if (comment)
1935             brokenComments = true;
1936         else if (server)
1937             brokenServer = true;
1938         else if (script)
1939             brokenScript = true;
1940
1941         checkScriptBuffer();
1942         scriptCode[ scriptCodeSize ] = 0;
1943         scriptCode[ scriptCodeSize + 1 ] = 0;
1944         int pos;
1945         QString food;
1946         if (title || style || script)
1947             food.setUnicode(scriptCode, scriptCodeSize);
1948         else if (server) {
1949             food = "<";
1950             food += QString(scriptCode, scriptCodeSize);
1951         }
1952         else {
1953             pos = QString::fromRawData(scriptCode, scriptCodeSize).indexOf('>');
1954             food.setUnicode(scriptCode+pos+1, scriptCodeSize-pos-1); // deep copy
1955         }
1956         KHTML_DELETE_QCHAR_VEC(scriptCode);
1957         scriptCode = 0;
1958         scriptCodeSize = scriptCodeMaxSize = scriptCodeResync = 0;
1959         if (script)
1960             scriptHandler();
1961
1962         comment = title = server = script = false;
1963         if ( !food.isEmpty() )
1964             write(food, true);
1965     }
1966     // this indicates we will not receive any more data... but if we are waiting on
1967     // an external script to load, we can't finish parsing until that is done
1968     noMoreData = true;
1969     if (cachedScript.isEmpty() && !m_executingScript && !onHold)
1970         end(); // this actually causes us to be deleted
1971 }
1972
1973 void HTMLTokenizer::processToken()
1974 {
1975     KJSProxy *jsProxy = view ? view->part()->jScript() : 0L;
1976     if (jsProxy)
1977         jsProxy->setEventHandlerLineno(tagStartLineno+1);
1978     if ( dest > buffer )
1979     {
1980 #if 0
1981         if(currToken.tid) {
1982             qDebug( "unexpected token id: %d, str: *%s*", currToken.tid,QString::fromRawData( buffer, dest-buffer ).toLatin1().constData() );
1983             assert(0);
1984         }
1985
1986 #endif
1987         currToken.text = new DOMStringImpl( buffer, dest - buffer );
1988         currToken.text->ref();
1989         if (currToken.tid != ID_COMMENT)
1990             currToken.tid = ID_TEXT;
1991     }
1992     else if(!currToken.tid) {
1993         currToken.reset();
1994         if (jsProxy)
1995             jsProxy->setEventHandlerLineno(lineno+src.lineCount()+1);
1996         return;
1997     }
1998
1999     dest = buffer;
2000
2001 #ifdef TOKEN_DEBUG
2002     QString name = QString( getTagName(currToken.tid) );
2003     QString text;
2004     if(currToken.text)
2005         text = QString::fromRawData(currToken.text->s, currToken.text->l);
2006
2007     kDebug( 6036 ) << "Token --> " << name << "   id = " << currToken.tid;
2008     if (currToken.flat)
2009         kDebug( 6036 ) << "Token is FLAT!";
2010     if(!text.isNull())
2011         kDebug( 6036 ) << "text: \"" << text << "\"";
2012     unsigned long l = currToken.attrs ? currToken.attrs->length() : 0;
2013     if(l) {
2014         kDebug( 6036 ) << "Attributes: " << l;
2015         for (unsigned long i = 0; i < l; ++i) {
2016             NodeImpl::Id tid = currToken.attrs->idAt(i);
2017             DOMString value = currToken.attrs->valueAt(i);
2018             kDebug( 6036 ) << "    " << tid << " " << parser->doc()->getDocument()->getName(NodeImpl::AttributeId, tid).string()
2019                             << "=\"" << value.string() << "\"" << endl;
2020         }
2021     }
2022     kDebug( 6036 );
2023 #endif
2024
2025     // In some cases, parseToken() can cause javascript code to be executed
2026     // (for example, when setting an attribute that causes an event handler
2027     // to be created). So we need to protect against re-entrancy into the parser
2028     m_executingScript++;
2029
2030     // pass the token over to the parser, the parser DOES NOT delete the token
2031     parser->parseToken(&currToken);
2032
2033     m_executingScript--;
2034
2035     if ( currToken.flat && currToken.tid != ID_TEXT && !parser->noSpaces() )
2036         discard = NoneDiscard;
2037
2038     currToken.reset();
2039     if (jsProxy)
2040         jsProxy->setEventHandlerLineno(1);
2041 }
2042
2043 void HTMLTokenizer::processDoctypeToken()
2044 {
2045     // kDebug( 6036 ) << "Process DoctypeToken (name: " << doctypeToken.name << ", publicID: " << doctypeToken.publicID << ", systemID: " << doctypeToken.systemID;
2046     doctypeToken.publicID = doctypeToken.publicID.simplified();
2047     doctypeToken.systemID = doctypeToken.systemID.simplified();
2048     parser->parseDoctypeToken(&doctypeToken);
2049 }
2050
2051
2052 HTMLTokenizer::~HTMLTokenizer()
2053 {
2054     reset();
2055     delete m_prospectiveTokenizer;
2056     delete parser;
2057 }
2058
2059
2060 void HTMLTokenizer::enlargeBuffer(int len)
2061 {
2062     int newsize = qMax(size*2, size+len);
2063     int oldoffs = (dest - buffer);
2064
2065     buffer = KHTML_REALLOC_QCHAR_VEC(buffer, newsize);
2066     dest = buffer + oldoffs;
2067     size = newsize;
2068 }
2069
2070 void HTMLTokenizer::enlargeScriptBuffer(int len)
2071 {
2072     int newsize = qMax(scriptCodeMaxSize*2, scriptCodeMaxSize+len);
2073     scriptCode = KHTML_REALLOC_QCHAR_VEC(scriptCode, newsize);
2074     scriptCodeMaxSize = newsize;
2075 }
2076
2077 void HTMLTokenizer::notifyFinished(CachedObject* /*finishedObj*/)
2078 {
2079     assert(!cachedScript.isEmpty());
2080     bool done = false;
2081     while (!done && cachedScript.head()->isLoaded()) {
2082
2083         kDebug( 6036 ) << "Finished loading an external script";
2084
2085         CachedScript* cs = cachedScript.dequeue();
2086         DOMString scriptSource = cs->script();
2087 #ifdef TOKEN_DEBUG
2088         kDebug( 6036 ) << "External script is:" << endl << scriptSource.string();
2089 #endif
2090         setSrc(TokenizerString());
2091
2092         // make sure we forget about the script before we execute the new one
2093         // infinite recursion might happen otherwise
2094         QString cachedScriptUrl( cs->url().string() );
2095         cs->deref(this);
2096
2097         scriptExecution( scriptSource.string(), cachedScriptUrl );
2098
2099         done = cachedScript.isEmpty();
2100
2101         // 'script' is true when we are called synchronously from
2102         // scriptHandler(). In that case scriptHandler() will take care
2103         // of 'scriptOutput'.
2104         if ( !script ) {
2105             while (pendingQueue.count() > 1) {
2106                TokenizerString t = pendingQueue.pop();
2107                pendingQueue.top().prepend( t );
2108             }
2109             if (done) {
2110                 write(pendingQueue.pop(), false);
2111             }
2112             // we might be deleted at this point, do not
2113             // access any members.
2114         }
2115     }
2116 }
2117
2118 bool HTMLTokenizer::isWaitingForScripts() const
2119 {
2120     return cachedScript.count();
2121 }
2122
2123 bool HTMLTokenizer::isExecutingScript() const
2124 {
2125     return (m_executingScript > 0);
2126 }
2127
2128 void HTMLTokenizer::setSrc(const TokenizerString& source)
2129 {
2130     lineno += src.lineCount();
2131     src = source;
2132     src.resetLineCount();
2133 }
2134
2135 void HTMLTokenizer::setOnHold(bool _onHold)
2136 {
2137     if (onHold == _onHold) return;
2138     onHold = _onHold;
2139 }
2140