tagmanager/js.c

   1 /*
   2  *       $Id$
   3  *
   4  *       Copyright (c) 2003, Darren Hiebert
   5  *
   6  *       This source code is released for free distribution under the terms of the
   7  *       GNU General Public License.
   8  *
   9  *       This module contains functions for generating tags for JavaScript language
  10  *       files.
  11  *
  12  *       This is a good reference for different forms of the function statement:
  13  *               http://www.permadi.com/tutorial/jsFunc/
  14  *   Another good reference:
  15  *       http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide
  16  */
  17
  18 /*
  19  *       INCLUDE FILES
  20  */
  21 #include "general.h"    /* must always come first */
  22 #include <ctype.h>      /* to define isalpha () */
  23 #include <setjmp.h>
  24 #ifdef DEBUG
  25 #include <stdio.h>
  26 #endif
  27
  28 #include "keyword.h"
  29 #include "parse.h"
  30 #include "read.h"
  31 #include "main.h"
  32 #include "vstring.h"
  33
  34 /*
  35  *       MACROS
  36  */
  37 #define isType(token,t)         (boolean) ((token)->type == (t))
  38 #define isKeyword(token,k)      (boolean) ((token)->keyword == (k))
  39
  40 /*
  41  *       DATA DECLARATIONS
  42  */
  43
  44 typedef enum eException { ExceptionNone, ExceptionEOF } exception_t;
  45
  46 /*
  47  * Tracks class and function names already created
  48  */
  49 static stringList *ClassNames;
  50 static stringList *FunctionNames;
  51
  52 /*      Used to specify type of keyword.
  53 */
  54 typedef enum eKeywordId {
  55         KEYWORD_NONE = -1,
  56         KEYWORD_function,
  57         KEYWORD_capital_function,
  58         KEYWORD_object,
  59         KEYWORD_capital_object,
  60         KEYWORD_prototype,
  61         KEYWORD_var,
  62         KEYWORD_new,
  63         KEYWORD_this,
  64         KEYWORD_for,
  65         KEYWORD_while,
  66         KEYWORD_do,
  67         KEYWORD_if,
  68         KEYWORD_else,
  69         KEYWORD_switch,
  70         KEYWORD_try,
  71         KEYWORD_catch,
  72         KEYWORD_finally
  73 } keywordId;
  74
  75 /*      Used to determine whether keyword is valid for the token language and
  76  *      what its ID is.
  77  */
  78 typedef struct sKeywordDesc {
  79         const char *name;
  80         keywordId id;
  81 } keywordDesc;
  82
  83 typedef enum eTokenType {
  84         TOKEN_UNDEFINED,
  85         TOKEN_CHARACTER,
  86         TOKEN_CLOSE_PAREN,
  87         TOKEN_SEMICOLON,
  88         TOKEN_COLON,
  89         TOKEN_COMMA,
  90         TOKEN_KEYWORD,
  91         TOKEN_OPEN_PAREN,
  92         TOKEN_OPERATOR,
  93         TOKEN_IDENTIFIER,
  94         TOKEN_STRING,
  95         TOKEN_PERIOD,
  96         TOKEN_OPEN_CURLY,
  97         TOKEN_CLOSE_CURLY,
  98         TOKEN_EQUAL_SIGN,
  99         TOKEN_FORWARD_SLASH,
 100         TOKEN_OPEN_SQUARE,
 101         TOKEN_CLOSE_SQUARE
 102 } tokenType;
 103
 104 typedef struct sTokenInfo {
 105         tokenType               type;
 106         keywordId               keyword;
 107         vString *               string;
 108         vString *               scope;
 109         unsigned long   lineNumber;
 110         fpos_t                  filePosition;
 111         int                             nestLevel;
 112         boolean                 ignoreTag;
 113     int bufferPosition; /* buffer position of line containing name */
 114 } tokenInfo;
 115
 116 /*
 117  *      DATA DEFINITIONS
 118  */
 119
 120 static langType Lang_js;
 121
 122 static jmp_buf Exception;
 123
 124 typedef enum {
 125         JSTAG_FUNCTION,
 126         JSTAG_CLASS,
 127         JSTAG_METHOD,
 128         JSTAG_PROPERTY,
 129         JSTAG_VARIABLE,
 130         JSTAG_COUNT
 131 } jsKind;
 132
 133 static kindOption JsKinds [] = {
 134         { TRUE,  'f', "function",         "functions"              },
 135         { TRUE,  'c', "class",            "classes"                        },
 136         { TRUE,  'm', "method",           "methods"                        },
 137         { TRUE,  'p', "member",           "properties"             },
 138         { TRUE,  'v', "variable",         "global variables"   }
 139 };
 140
 141 static const keywordDesc JsKeywordTable [] = {
 142         /* keyword              keyword ID */
 143         { "function",   KEYWORD_function                        },
 144         { "Function",   KEYWORD_capital_function        },
 145         { "object",             KEYWORD_object                          },
 146         { "Object",             KEYWORD_capital_object          },
 147         { "prototype",  KEYWORD_prototype                       },
 148         { "var",                KEYWORD_var                                     },
 149         { "new",                KEYWORD_new                                     },
 150         { "this",               KEYWORD_this                            },
 151         { "for",                KEYWORD_for                                     },
 152         { "while",              KEYWORD_while                           },
 153         { "do",                 KEYWORD_do                                      },
 154         { "if",                 KEYWORD_if                                      },
 155         { "else",               KEYWORD_else                            },
 156         { "switch",             KEYWORD_switch                          },
 157         { "try",                KEYWORD_try                                     },
 158         { "catch",              KEYWORD_catch                           },
 159         { "finally",    KEYWORD_finally                         }
 160 };
 161
 162 /*
 163  *       FUNCTION DEFINITIONS
 164  */
 165
 166 /* Recursive functions */
 167 static void parseFunction (tokenInfo *const token);
 168 static boolean parseBlock (tokenInfo *const token, tokenInfo *const parent);
 169 static boolean parseLine (tokenInfo *const token, boolean is_inside_class);
 170
 171 static boolean isIdentChar (const int c)
 172 {
 173         return (boolean)
 174                 (isalpha (c) || isdigit (c) || c == '$' ||
 175                  c == '@' || c == '_' || c == '#');
 176 }
 177
 178 static void buildJsKeywordHash (void)
 179 {
 180         const size_t count = sizeof (JsKeywordTable) /
 181                 sizeof (JsKeywordTable [0]);
 182         size_t i;
 183         for (i = 0      ;  i < count  ;  ++i)
 184         {
 185                 const keywordDesc* const p = &JsKeywordTable [i];
 186                 addKeyword (p->name, Lang_js, (int) p->id);
 187         }
 188 }
 189
 190 static tokenInfo *newToken (void)
 191 {
 192         tokenInfo *const token = xMalloc (1, tokenInfo);
 193
 194         token->type                     = TOKEN_UNDEFINED;
 195         token->keyword          = KEYWORD_NONE;
 196         token->string           = vStringNew ();
 197         token->scope            = vStringNew ();
 198         token->nestLevel        = 0;
 199         token->ignoreTag        = FALSE;
 200         token->lineNumber   = getSourceLineNumber ();
 201     if (useFile())
 202                 token->filePosition = getInputFilePosition ();
 203     else
 204                 token->bufferPosition = getInputBufferPosition ();
 205
 206         return token;
 207 }
 208
 209 static void deleteToken (tokenInfo *const token)
 210 {
 211         vStringDelete (token->string);
 212         vStringDelete (token->scope);
 213         eFree (token);
 214 }
 215
 216 /*
 217  *       Tag generation functions
 218  */
 219
 220 static void makeConstTag (tokenInfo *const token, const jsKind kind)
 221 {
 222         if (JsKinds [kind].enabled && ! token->ignoreTag )
 223         {
 224                 const char *const name = vStringValue (token->string);
 225                 tagEntryInfo e;
 226                 initTagEntry (&e, name);
 227
 228                 e.lineNumber   = token->lineNumber;
 229                 e.filePosition = token->filePosition;
 230                 e.kindName         = JsKinds [kind].name;
 231                 e.kind             = JsKinds [kind].letter;
 232
 233                 makeTagEntry (&e);
 234         }
 235 }
 236
 237 static void makeJsTag (tokenInfo *const token, const jsKind kind)
 238 {
 239         vString *       fulltag;
 240
 241         if (JsKinds [kind].enabled && ! token->ignoreTag )
 242         {
 243                 /*
 244                  * If a scope has been added to the token, change the token
 245                  * string to include the scope when making the tag.
 246                  */
 247                 if ( vStringLength(token->scope) > 0 )
 248                 {
 249                         fulltag = vStringNew ();
 250                         vStringCopy(fulltag, token->scope);
 251                         vStringCatS (fulltag, ".");
 252                         vStringCatS (fulltag, vStringValue(token->string));
 253                         vStringTerminate(fulltag);
 254                         vStringCopy(token->string, fulltag);
 255                         vStringDelete (fulltag);
 256                 }
 257                 makeConstTag (token, kind);
 258         }
 259 }
 260
 261 static void makeClassTag (tokenInfo *const token)
 262 {
 263         if ( ! token->ignoreTag )
 264         {
 265                 if ( ! stringListHas(ClassNames, vStringValue (token->string)) )
 266                 {
 267                         stringListAdd (ClassNames, vStringNewCopy (token->string));
 268                         makeJsTag (token, JSTAG_CLASS);
 269                 }
 270         }
 271 }
 272
 273 static void makeFunctionTag (tokenInfo *const token)
 274 {
 275         if ( ! token->ignoreTag )
 276         {
 277                 if ( ! stringListHas(FunctionNames, vStringValue (token->string)) )
 278                 {
 279                         stringListAdd (FunctionNames, vStringNewCopy (token->string));
 280                         makeJsTag (token, JSTAG_FUNCTION);
 281                 }
 282         }
 283 }
 284
 285 /*
 286  *       Parsing functions
 287  */
 288
 289 static int skipToCharacter (const int c)
 290 {
 291         int d;
 292         do
 293         {
 294                 d = fileGetc ();
 295         } while (d != EOF  &&  d != c);
 296         return d;
 297 }
 298
 299 static void parseString (vString *const string, const int delimiter)
 300 {
 301         boolean end = FALSE;
 302         while (! end)
 303         {
 304                 int c = fileGetc ();
 305                 if (c == EOF)
 306                         end = TRUE;
 307                 else if (c == '\\')
 308                 {
 309                         c = fileGetc(); /* This maybe a ' or ". */
 310                         vStringPut(string, c);
 311                 }
 312                 else if (c == delimiter)
 313                         end = TRUE;
 314                 else
 315                         vStringPut (string, c);
 316         }
 317         vStringTerminate (string);
 318 }
 319
 320 /*      Read a C identifier beginning with "firstChar" and places it into
 321  *      "name".
 322  */
 323 static void parseIdentifier (vString *const string, const int firstChar)
 324 {
 325         int c = firstChar;
 326         Assert (isIdentChar (c));
 327         do
 328         {
 329                 vStringPut (string, c);
 330                 c = fileGetc ();
 331         } while (isIdentChar (c));
 332         vStringTerminate (string);
 333         if (!isspace (c))
 334                 fileUngetc (c);         /* unget non-identifier character */
 335 }
 336
 337 static keywordId analyzeToken (vString *const name)
 338 {
 339         vString *keyword = vStringNew ();
 340         keywordId result;
 341         vStringCopyToLower (keyword, name);
 342         result = (keywordId) lookupKeyword (vStringValue (keyword), Lang_js);
 343         vStringDelete (keyword);
 344         return result;
 345 }
 346
 347 static void readToken (tokenInfo *const token)
 348 {
 349         int c;
 350
 351         token->type                     = TOKEN_UNDEFINED;
 352         token->keyword          = KEYWORD_NONE;
 353         vStringClear (token->string);
 354
 355 getNextChar:
 356         do
 357         {
 358                 c = fileGetc ();
 359                 token->lineNumber   = getSourceLineNumber ();
 360                 if (useFile())
 361                         token->filePosition = getInputFilePosition ();
 362                 else
 363                         token->bufferPosition = getInputBufferPosition ();
 364                 }
 365         while (c == '\t'  ||  c == ' ' ||  c == '\n');
 366
 367         switch (c)
 368         {
 369                 case EOF: longjmp (Exception, (int)ExceptionEOF);       break;
 370                 case '(': token->type = TOKEN_OPEN_PAREN;                       break;
 371                 case ')': token->type = TOKEN_CLOSE_PAREN;                      break;
 372                 case ';': token->type = TOKEN_SEMICOLON;                        break;
 373                 case ',': token->type = TOKEN_COMMA;                            break;
 374                 case '.': token->type = TOKEN_PERIOD;                           break;
 375                 case ':': token->type = TOKEN_COLON;                            break;
 376                 case '{': token->type = TOKEN_OPEN_CURLY;                       break;
 377                 case '}': token->type = TOKEN_CLOSE_CURLY;                      break;
 378                 case '=': token->type = TOKEN_EQUAL_SIGN;                       break;
 379                 case '[': token->type = TOKEN_OPEN_SQUARE;                      break;
 380                 case ']': token->type = TOKEN_CLOSE_SQUARE;                     break;
 381
 382                 case '\'':
 383                 case '"':
 384                                   token->type = TOKEN_STRING;
 385                                   parseString (token->string, c);
 386                                   token->lineNumber = getSourceLineNumber ();
 387                                   if (useFile())
 388                                         token->filePosition = getInputFilePosition ();
 389                                   else
 390                                         token->bufferPosition = getInputBufferPosition ();
 391                                   break;
 392
 393                 case '\\':
 394                                   c = fileGetc ();
 395                                   if (c != '\\'  && c != '"'  &&  !isspace (c))
 396                                           fileUngetc (c);
 397                                   token->type = TOKEN_CHARACTER;
 398                                   token->lineNumber = getSourceLineNumber ();
 399                                   if (useFile())
 400                                         token->filePosition = getInputFilePosition ();
 401                                   else
 402                                         token->bufferPosition = getInputBufferPosition ();
 403                                   break;
 404
 405                 case '/':
 406                                   {
 407                                           int d = fileGetc ();
 408                                           if ( (d != '*') &&            /* is this the start of a comment? */
 409                                                           (d != '/') )          /* is a one line comment? */
 410                                           {
 411                                                   token->type = TOKEN_FORWARD_SLASH;
 412                                                   fileUngetc (d);
 413                                           }
 414                                           else
 415                                           {
 416                                                   if (d == '*')
 417                                                   {
 418                                                           do
 419                                                           {
 420                                                                   skipToCharacter ('*');
 421                                                                   c = fileGetc ();
 422                                                                   if (c == '/')
 423                                                                           break;
 424                                                                   else
 425                                                                           fileUngetc (c);
 426                                                           } while (c != EOF && c != '\0');
 427                                                           goto getNextChar;
 428                                                   }
 429                                                   else if (d == '/')    /* is this the start of a comment?  */
 430                                                   {
 431                                                           skipToCharacter ('\n');
 432                                                           goto getNextChar;
 433                                                   }
 434                                           }
 435                                           break;
 436                                   }
 437
 438                 default:
 439                                   if (! isIdentChar (c))
 440                                           token->type = TOKEN_UNDEFINED;
 441                                   else
 442                                   {
 443                                           parseIdentifier (token->string, c);
 444                                           token->lineNumber = getSourceLineNumber ();
 445                                           if (useFile())
 446                                                 token->filePosition = getInputFilePosition ();
 447                                           else
 448                                                 token->bufferPosition = getInputBufferPosition ();
 449                                           token->keyword = analyzeToken (token->string);
 450                                           if (isKeyword (token, KEYWORD_NONE))
 451                                                   token->type = TOKEN_IDENTIFIER;
 452                                           else
 453                                                   token->type = TOKEN_KEYWORD;
 454                                   }
 455                                   break;
 456         }
 457 }
 458
 459 static void copyToken (tokenInfo *const dest, tokenInfo *const src)
 460 {
 461         dest->nestLevel = src->nestLevel;
 462         dest->lineNumber = src->lineNumber;
 463     if (useFile())
 464                 dest->filePosition = src->filePosition;
 465     else
 466                 dest->bufferPosition = src->bufferPosition;
 467         dest->type = src->type;
 468         dest->keyword = src->keyword;
 469         vStringCopy(dest->string, src->string);
 470         vStringCopy(dest->scope, src->scope);
 471 }
 472
 473 /*
 474  *       Token parsing functions
 475  */
 476
 477 static void skipArgumentList (tokenInfo *const token)
 478 {
 479         int nest_level = 0;
 480
 481         /*
 482          * Other databases can have arguments with fully declared
 483          * datatypes:
 484          *       (      name varchar(30), text binary(10)  )
 485          * So we must check for nested open and closing parantheses
 486          */
 487
 488         if (isType (token, TOKEN_OPEN_PAREN))   /* arguments? */
 489         {
 490                 nest_level++;
 491                 while (! (isType (token, TOKEN_CLOSE_PAREN) && (nest_level == 0)))
 492                 {
 493                         readToken (token);
 494                         if (isType (token, TOKEN_OPEN_PAREN))
 495                         {
 496                                 nest_level++;
 497                         }
 498                         if (isType (token, TOKEN_CLOSE_PAREN))
 499                         {
 500                                 if (nest_level > 0)
 501                                 {
 502                                         nest_level--;
 503                                 }
 504                         }
 505                 }
 506                 readToken (token);
 507         }
 508 }
 509
 510 static void skipArrayList (tokenInfo *const token)
 511 {
 512         int nest_level = 0;
 513
 514         /*
 515          * Handle square brackets
 516          *       var name[1]
 517          * So we must check for nested open and closing square brackets
 518          */
 519
 520         if (isType (token, TOKEN_OPEN_SQUARE))  /* arguments? */
 521         {
 522                 nest_level++;
 523                 while (! (isType (token, TOKEN_CLOSE_SQUARE) && (nest_level == 0)))
 524                 {
 525                         readToken (token);
 526                         if (isType (token, TOKEN_OPEN_SQUARE))
 527                         {
 528                                 nest_level++;
 529                         }
 530                         if (isType (token, TOKEN_CLOSE_SQUARE))
 531                         {
 532                                 if (nest_level > 0)
 533                                 {
 534                                         nest_level--;
 535                                 }
 536                         }
 537                 }
 538                 readToken (token);
 539         }
 540 }
 541
 542 static void addContext (tokenInfo* const parent, const tokenInfo* const child)
 543 {
 544         if (vStringLength (parent->string) > 0)
 545         {
 546                 vStringCatS (parent->string, ".");
 547         }
 548         vStringCatS (parent->string, vStringValue(child->string));
 549         vStringTerminate(parent->string);
 550 }
 551
 552 static void addToScope (tokenInfo* const token, vString* const extra)
 553 {
 554         if (vStringLength (token->scope) > 0)
 555         {
 556                 vStringCatS (token->scope, ".");
 557         }
 558         vStringCatS (token->scope, vStringValue(extra));
 559         vStringTerminate(token->scope);
 560 }
 561
 562 /*
 563  *       Scanning functions
 564  */
 565
 566 static void findCmdTerm (tokenInfo *const token)
 567 {
 568         /*
 569          * Read until we find either a semicolon or closing brace.
 570          * Any nested braces will be handled within.
 571          */
 572         while (! ( isType (token, TOKEN_SEMICOLON) ||
 573                                 isType (token, TOKEN_CLOSE_CURLY) ) )
 574         {
 575                 /* Handle nested blocks */
 576                 if ( isType (token, TOKEN_OPEN_CURLY))
 577                 {
 578                         parseBlock (token, token);
 579                 }
 580                 else if ( isType (token, TOKEN_OPEN_PAREN) )
 581                 {
 582                         skipArgumentList(token);
 583                 }
 584                 else
 585                 {
 586                         readToken (token);
 587                 }
 588         }
 589 }
 590
 591 static void parseSwitch (tokenInfo *const token)
 592 {
 593         /*
 594          * switch (expression){
 595          * case value1:
 596          *         statement;
 597          *         break;
 598          * case value2:
 599          *         statement;
 600          *         break;
 601          * default : statement;
 602          * }
 603          */
 604
 605         readToken (token);
 606
 607         if (isType (token, TOKEN_OPEN_PAREN))
 608         {
 609                 /*
 610                  * Handle nameless functions, these will only
 611                  * be considered methods.
 612                  */
 613                 skipArgumentList(token);
 614         }
 615
 616         if (isType (token, TOKEN_OPEN_CURLY))
 617         {
 618                 /*
 619                  * This will be either a function or a class.
 620                  * We can only determine this by checking the body
 621                  * of the function.  If we find a "this." we know
 622                  * it is a class, otherwise it is a function.
 623                  */
 624                 parseBlock (token, token);
 625         }
 626
 627 }
 628
 629 static void parseLoop (tokenInfo *const token)
 630 {
 631         /*
 632          * Handles these statements
 633          *         for (x=0; x<3; x++)
 634          *                 document.write("This text is repeated three times<br>");
 635          *
 636          *         for (x=0; x<3; x++)
 637          *         {
 638          *                 document.write("This text is repeated three times<br>");
 639          *         }
 640          *
 641          *         while (number<5){
 642          *                 document.write(number+"<br>");
 643          *                 number++;
 644          *         }
 645          *
 646          *         do{
 647          *                 document.write(number+"<br>");
 648          *                 number++;
 649          *         }
 650          *         while (number<5);
 651          */
 652
 653         if (isKeyword (token, KEYWORD_for) || isKeyword (token, KEYWORD_while))
 654         {
 655                 readToken(token);
 656
 657                 if (isType (token, TOKEN_OPEN_PAREN))
 658                 {
 659                         /*
 660                          * Handle nameless functions, these will only
 661                          * be considered methods.
 662                          */
 663                         skipArgumentList(token);
 664                 }
 665
 666                 if (isType (token, TOKEN_OPEN_CURLY))
 667                 {
 668                         /*
 669                          * This will be either a function or a class.
 670                          * We can only determine this by checking the body
 671                          * of the function.  If we find a "this." we know
 672                          * it is a class, otherwise it is a function.
 673                          */
 674                         parseBlock (token, token);
 675                 }
 676                 else
 677                 {
 678                         parseLine(token, FALSE);
 679                 }
 680         }
 681         else if (isKeyword (token, KEYWORD_do))
 682         {
 683                 readToken(token);
 684
 685                 if (isType (token, TOKEN_OPEN_CURLY))
 686                 {
 687                         /*
 688                          * This will be either a function or a class.
 689                          * We can only determine this by checking the body
 690                          * of the function.  If we find a "this." we know
 691                          * it is a class, otherwise it is a function.
 692                          */
 693                         parseBlock (token, token);
 694                 }
 695                 else
 696                 {
 697                         parseLine(token, FALSE);
 698                 }
 699
 700                 readToken(token);
 701
 702                 if (isKeyword (token, KEYWORD_while))
 703                 {
 704                         readToken(token);
 705
 706                         if (isType (token, TOKEN_OPEN_PAREN))
 707                         {
 708                                 /*
 709                                  * Handle nameless functions, these will only
 710                                  * be considered methods.
 711                                  */
 712                                 skipArgumentList(token);
 713                         }
 714                 }
 715         }
 716 }
 717
 718 static boolean parseIf (tokenInfo *const token)
 719 {
 720         boolean read_next_token = TRUE;
 721         /*
 722          * If statements have two forms
 723          *         if ( ... )
 724          *                 one line;
 725          *
 726          *         if ( ... )
 727          *                statement;
 728          *         else
 729          *                statement
 730          *
 731          *         if ( ... ) {
 732          *                multiple;
 733          *                statements;
 734          *         }
 735          *
 736          *
 737          *         if ( ... ) {
 738          *                return elem
 739          *         }
 740          *
 741          *     This example if correctly written, but the
 742          *     else contains only 1 statement without a terminator
 743          *     since the function finishes with the closing brace.
 744          *
 745      *     function a(flag){
 746      *         if(flag)
 747      *             test(1);
 748      *         else
 749      *             test(2)
 750      *     }
 751          *
 752          * TODO:  Deal with statements that can optional end
 753          *                without a semi-colon.  Currently this messes up
 754          *                the parsing of blocks.
 755          *                Need to somehow detect this has happened, and either
 756          *                backup a token, or skip reading the next token if
 757          *                that is possible from all code locations.
 758          *
 759          */
 760
 761         readToken (token);
 762
 763         if (isType (token, TOKEN_OPEN_PAREN))
 764         {
 765                 /*
 766                  * Handle nameless functions, these will only
 767                  * be considered methods.
 768                  */
 769                 skipArgumentList(token);
 770         }
 771
 772         if (isType (token, TOKEN_OPEN_CURLY))
 773         {
 774                 /*
 775                  * This will be either a function or a class.
 776                  * We can only determine this by checking the body
 777                  * of the function.  If we find a "this." we know
 778                  * it is a class, otherwise it is a function.
 779                  */
 780                 parseBlock (token, token);
 781         }
 782         else
 783         {
 784                 findCmdTerm (token);
 785
 786                 /*
 787                  * The IF could be followed by an ELSE statement.
 788                  * This too could have two formats, a curly braced
 789                  * multiline section, or another single line.
 790                  */
 791
 792                 if (isType (token, TOKEN_CLOSE_CURLY))
 793                 {
 794                         /*
 795                          * This statement did not have a line terminator.
 796                          */
 797                         read_next_token = FALSE;
 798                 }
 799                 else
 800                 {
 801                         readToken (token);
 802
 803                         if (isType (token, TOKEN_CLOSE_CURLY))
 804                         {
 805                                 /*
 806                                 * This statement did not have a line terminator.
 807                                 */
 808                                 read_next_token = FALSE;
 809                         }
 810                         else
 811                         {
 812                                 if (isKeyword (token, KEYWORD_else))
 813                                         read_next_token = parseIf (token);
 814                         }
 815                 }
 816         }
 817         return read_next_token;
 818 }
 819
 820 static void parseFunction (tokenInfo *const token)
 821 {
 822         tokenInfo *const name = newToken ();
 823         boolean is_class = FALSE;
 824
 825         /*
 826          * This deals with these formats
 827          *         function validFunctionTwo(a,b) {}
 828          */
 829
 830         readToken (name);
 831         /* Add scope in case this is an INNER function */
 832         addToScope(name, token->scope);
 833
 834         readToken (token);
 835         if (isType (token, TOKEN_PERIOD))
 836         {
 837                 do
 838                 {
 839                         readToken (token);
 840                         if ( isKeyword(token, KEYWORD_NONE) )
 841                         {
 842                                 addContext (name, token);
 843                                 readToken (token);
 844                         }
 845                 } while (isType (token, TOKEN_PERIOD));
 846         }
 847
 848         if ( isType (token, TOKEN_OPEN_PAREN) )
 849                 skipArgumentList(token);
 850
 851         if ( isType (token, TOKEN_OPEN_CURLY) )
 852         {
 853                 is_class = parseBlock (token, name);
 854                 if ( is_class )
 855                         makeClassTag (name);
 856                 else
 857                         makeFunctionTag (name);
 858         }
 859
 860         findCmdTerm (token);
 861
 862         deleteToken (name);
 863 }
 864
 865 static boolean parseBlock (tokenInfo *const token, tokenInfo *const parent)
 866 {
 867         boolean is_class = FALSE;
 868         boolean read_next_token = TRUE;
 869         vString * saveScope = vStringNew ();
 870
 871         token->nestLevel++;
 872         /*
 873          * Make this routine a bit more forgiving.
 874          * If called on an open_curly advance it
 875          */
 876         if ( isType (token, TOKEN_OPEN_CURLY) &&
 877                         isKeyword(token, KEYWORD_NONE) )
 878                 readToken(token);
 879
 880         if (! isType (token, TOKEN_CLOSE_CURLY))
 881         {
 882                 /*
 883                  * Read until we find the closing brace,
 884                  * any nested braces will be handled within
 885                  */
 886                 do
 887                 {
 888                         read_next_token = TRUE;
 889                         if (isKeyword (token, KEYWORD_this))
 890                         {
 891                                 /*
 892                                  * Means we are inside a class and have found
 893                                  * a class, not a function
 894                                  */
 895                                 is_class = TRUE;
 896                                 vStringCopy(saveScope, token->scope);
 897                                 addToScope (token, parent->string);
 898
 899                                 /*
 900                                  * Ignore the remainder of the line
 901                                  * findCmdTerm(token);
 902                                  */
 903                                 parseLine (token, is_class);
 904
 905                                 vStringCopy(token->scope, saveScope);
 906                         }
 907                         else if (isKeyword (token, KEYWORD_var))
 908                         {
 909                                 /*
 910                                  * Potentially we have found an inner function.
 911                                  * Set something to indicate the scope
 912                                  */
 913                                 vStringCopy(saveScope, token->scope);
 914                                 addToScope (token, parent->string);
 915                                 parseLine (token, is_class);
 916                                 vStringCopy(token->scope, saveScope);
 917                         }
 918                         else if (isKeyword (token, KEYWORD_function))
 919                         {
 920                                 vStringCopy(saveScope, token->scope);
 921                                 addToScope (token, parent->string);
 922                                 parseFunction (token);
 923                                 vStringCopy(token->scope, saveScope);
 924                         }
 925                         else if (isType (token, TOKEN_OPEN_CURLY))
 926                         {
 927                                 /* Handle nested blocks */
 928                                 parseBlock (token, parent);
 929                         }
 930                         else
 931                         {
 932                                 /*
 933                                  * It is possible for a line to have no terminator
 934                                  * if the following line is a closing brace.
 935                                  * parseLine will detect this case and indicate
 936                                  * whether we should read an additional token.
 937                                  */
 938                                 read_next_token = parseLine (token, is_class);
 939                         }
 940
 941                         /*
 942                          * Always read a new token unless we find a statement without
 943                          * a ending terminator
 944                          */
 945                         if( read_next_token )
 946                                 readToken(token);
 947
 948                         /*
 949                          * If we find a statement without a terminator consider the
 950                          * block finished, otherwise the stack will be off by one.
 951                          */
 952                 } while (! isType (token, TOKEN_CLOSE_CURLY) && read_next_token );
 953         }
 954
 955         vStringDelete(saveScope);
 956         token->nestLevel--;
 957
 958         return is_class;
 959 }
 960
 961 static void parseMethods (tokenInfo *const token, tokenInfo *const class)
 962 {
 963         tokenInfo *const name = newToken ();
 964
 965         /*
 966          * This deals with these formats
 967          *         validProperty  : 2,
 968          *         validMethod    : function(a,b) {}
 969          *         'validMethod2' : function(a,b) {}
 970          */
 971
 972         do
 973         {
 974                 readToken (token);
 975                 if (isType (token, TOKEN_STRING) || isKeyword(token, KEYWORD_NONE))
 976                 {
 977                         copyToken(name, token);
 978
 979                         readToken (token);
 980                         if ( isType (token, TOKEN_COLON) )
 981                         {
 982                                 readToken (token);
 983                                 if ( isKeyword (token, KEYWORD_function) )
 984                                 {
 985                                         readToken (token);
 986                                         if ( isType (token, TOKEN_OPEN_PAREN) )
 987                                         {
 988                                                 skipArgumentList(token);
 989                                         }
 990
 991                                         if (isType (token, TOKEN_OPEN_CURLY))
 992                                         {
 993                                                 addToScope (name, class->string);
 994                                                 makeJsTag (name, JSTAG_METHOD);
 995                                                 parseBlock (token, name);
 996
 997                                                 /*
 998                                                  * Read to the closing curly, check next
 999                                                  * token, if a comma, we must loop again
1000                                                  */
1001                                                 readToken (token);
1002                                         }
1003                                 }
1004                                 else
1005                                 {
1006                                                 addToScope (name, class->string);
1007                                                 makeJsTag (name, JSTAG_PROPERTY);
1008
1009                                                 /*
1010                                                  * Read the next token, if a comma
1011                                                  * we must loop again
1012                                                  */
1013                                                 readToken (token);
1014                                 }
1015                         }
1016                 }
1017         } while ( isType(token, TOKEN_COMMA) );
1018
1019         findCmdTerm (token);
1020
1021         deleteToken (name);
1022 }
1023
1024 static boolean parseStatement (tokenInfo *const token, boolean is_inside_class)
1025 {
1026         tokenInfo *const name = newToken ();
1027         tokenInfo *const secondary_name = newToken ();
1028         vString * saveScope = vStringNew ();
1029         boolean is_class = FALSE;
1030         boolean is_terminated = TRUE;
1031         boolean is_global = FALSE;
1032         boolean is_prototype = FALSE;
1033
1034         vStringClear(saveScope);
1035         /*
1036          * Functions can be named or unnamed.
1037          * This deals with these formats:
1038          * Function
1039          *         validFunctionOne = function(a,b) {}
1040          *         testlib.validFunctionFive = function(a,b) {}
1041          *         var innerThree = function(a,b) {}
1042          *         var innerFour = (a,b) {}
1043          *         var D2 = secondary_fcn_name(a,b) {}
1044          *         var D3 = new Function("a", "b", "return a+b;");
1045          * Class
1046          *         testlib.extras.ValidClassOne = function(a,b) {
1047          *                 this.a = a;
1048          *         }
1049          * Class Methods
1050          *         testlib.extras.ValidClassOne.prototype = {
1051          *                 'validMethodOne' : function(a,b) {},
1052          *                 'validMethodTwo' : function(a,b) {}
1053          *         }
1054      *     ValidClassTwo = function ()
1055      *     {
1056      *         this.validMethodThree = function() {}
1057      *         // unnamed method
1058      *         this.validMethodFour = () {}
1059      *     }
1060          *         Database.prototype.validMethodThree = Database_getTodaysDate;
1061          */
1062
1063         if ( is_inside_class )
1064                 is_class = TRUE;
1065         /*
1066          * var can preceed an inner function
1067          */
1068         if ( isKeyword(token, KEYWORD_var) )
1069         {
1070                 /*
1071                  * Only create variables for global scope
1072                  */
1073                 if ( token->nestLevel == 0 )
1074                 {
1075                         is_global = TRUE;
1076                 }
1077                 readToken(token);
1078         }
1079
1080         if ( isKeyword(token, KEYWORD_this) )
1081         {
1082                 readToken(token);
1083                 if (isType (token, TOKEN_PERIOD))
1084                 {
1085                         readToken(token);
1086                 }
1087         }
1088
1089         copyToken(name, token);
1090
1091         while (! isType (token, TOKEN_CLOSE_CURLY) &&
1092                ! isType (token, TOKEN_SEMICOLON)   &&
1093                ! isType (token, TOKEN_EQUAL_SIGN)  )
1094         {
1095                 /* Potentially the name of the function */
1096                 readToken (token);
1097                 if (isType (token, TOKEN_PERIOD))
1098                 {
1099                         /*
1100                          * Cannot be a global variable is it has dot references in the name
1101                          */
1102                         is_global = FALSE;
1103                         do
1104                         {
1105                                 readToken (token);
1106                                 if ( isKeyword(token, KEYWORD_NONE) )
1107                                 {
1108                                         if ( is_class )
1109                                         {
1110                                                 vStringCopy(saveScope, token->scope);
1111                                                 addToScope(token, name->string);
1112                                         }
1113                                         else
1114                                                 addContext (name, token);
1115                                 }
1116                                 else if ( isKeyword(token, KEYWORD_prototype) )
1117                                 {
1118                                         /*
1119                                          * When we reach the "prototype" tag, we infer:
1120                                          *     "BindAgent" is a class
1121                                          *     "build"     is a method
1122                                          *
1123                                          * function BindAgent( repeatableIdName, newParentIdName ) {
1124                                          * }
1125                                          *
1126                                          * CASE 1
1127                                          * Specified function name: "build"
1128                                          *     BindAgent.prototype.build = function( mode ) {
1129                                          *        ignore everything within this function
1130                                          *     }
1131                                          *
1132                                          * CASE 2
1133                                          * Prototype listing
1134                                          *     ValidClassOne.prototype = {
1135                                          *         'validMethodOne' : function(a,b) {},
1136                                          *         'validMethodTwo' : function(a,b) {}
1137                                          *     }
1138                                          *
1139                                          */
1140                                         makeClassTag (name);
1141                                         is_class = TRUE;
1142                                         is_prototype = TRUE;
1143
1144                                         /*
1145                                          * There should a ".function_name" next.
1146                                          */
1147                                         readToken (token);
1148                                         if (isType (token, TOKEN_PERIOD))
1149                                         {
1150                                                 /*
1151                                                  * Handle CASE 1
1152                                                  */
1153                                                 readToken (token);
1154                                                 if ( isKeyword(token, KEYWORD_NONE) )
1155                                                 {
1156                                                         vStringCopy(saveScope, token->scope);
1157                                                         addToScope(token, name->string);
1158
1159                                                         makeJsTag (token, JSTAG_METHOD);
1160                                                         /*
1161                                                          * We can read until the end of the block / statement.
1162                                                          * We need to correctly parse any nested blocks, but
1163                                                          * we do NOT want to create any tags based on what is
1164                                                          * within the blocks.
1165                                                          */
1166                                                         token->ignoreTag = TRUE;
1167                                                         /*
1168                                                          * Find to the end of the statement
1169                                                          */
1170                                                         findCmdTerm (token);
1171                                                         token->ignoreTag = FALSE;
1172                                                         is_terminated = TRUE;
1173                                                         goto cleanUp;
1174                                                 }
1175                                         }
1176                                         else if (isType (token, TOKEN_EQUAL_SIGN))
1177                                         {
1178                                                 readToken (token);
1179                                                 if (isType (token, TOKEN_OPEN_CURLY))
1180                                                 {
1181                                                         /*
1182                                                          * Handle CASE 2
1183                                                          *
1184                                                          * Creates tags for each of these class methods
1185                                                          *     ValidClassOne.prototype = {
1186                                                          *         'validMethodOne' : function(a,b) {},
1187                                                          *         'validMethodTwo' : function(a,b) {}
1188                                                          *     }
1189                                                          */
1190                                                         parseMethods(token, name);
1191                                                         /*
1192                                                          * Find to the end of the statement
1193                                                          */
1194                                                         findCmdTerm (token);
1195                                                         token->ignoreTag = FALSE;
1196                                                         is_terminated = TRUE;
1197                                                         goto cleanUp;
1198                                                 }
1199                                         }
1200                                 }
1201                                 readToken (token);
1202                         } while (isType (token, TOKEN_PERIOD));
1203                 }
1204
1205                 if ( isType (token, TOKEN_OPEN_PAREN) )
1206                         skipArgumentList(token);
1207
1208                 if ( isType (token, TOKEN_OPEN_SQUARE) )
1209                         skipArrayList(token);
1210
1211                 /*
1212                 if ( isType (token, TOKEN_OPEN_CURLY) )
1213                 {
1214                         is_class = parseBlock (token, name);
1215                 }
1216                 */
1217         }
1218
1219         if ( isType (token, TOKEN_CLOSE_CURLY) )
1220         {
1221                 /*
1222                  * Reaching this section without having
1223                  * processed an open curly brace indicates
1224                  * the statement is most likely not terminated.
1225                  */
1226                 is_terminated = FALSE;
1227                 goto cleanUp;
1228         }
1229
1230         if ( isType (token, TOKEN_SEMICOLON) )
1231         {
1232                 /*
1233                  * Only create variables for global scope
1234                  */
1235                 if ( token->nestLevel == 0 && is_global )
1236                 {
1237                         /*
1238                          * Handles this syntax:
1239                          *         var g_var2;
1240                          */
1241                         if (isType (token, TOKEN_SEMICOLON))
1242                                 makeJsTag (name, JSTAG_VARIABLE);
1243                 }
1244                 /*
1245                  * Statement has ended.
1246                  * This deals with calls to functions, like:
1247                  *     alert(..);
1248                  */
1249                 goto cleanUp;
1250         }
1251
1252         if ( isType (token, TOKEN_EQUAL_SIGN) )
1253         {
1254                 readToken (token);
1255
1256                 if ( isKeyword (token, KEYWORD_function) )
1257                 {
1258                         readToken (token);
1259
1260                         if ( isKeyword (token, KEYWORD_NONE) &&
1261                                         ! isType (token, TOKEN_OPEN_PAREN) )
1262                         {
1263                                 /*
1264                                  * Functions of this format:
1265                                  *         var D2A = function theAdd(a, b)
1266                                  *         {
1267                                  *                return a+b;
1268                                  *         }
1269                                  * Are really two separate defined functions and
1270                                  * can be referenced in two ways:
1271                                  *         alert( D2A(1,2) );                     // produces 3
1272                                  *         alert( theAdd(1,2) );                  // also produces 3
1273                                  * So it must have two tags:
1274                                  *         D2A
1275                                  *         theAdd
1276                                  * Save the reference to the name for later use, once
1277                                  * we have established this is a valid function we will
1278                                  * create the secondary reference to it.
1279                                  */
1280                                 copyToken(secondary_name, token);
1281                                 readToken (token);
1282                         }
1283
1284                         if ( isType (token, TOKEN_OPEN_PAREN) )
1285                                 skipArgumentList(token);
1286
1287                         if (isType (token, TOKEN_OPEN_CURLY))
1288                         {
1289                                 /*
1290                                  * This will be either a function or a class.
1291                                  * We can only determine this by checking the body
1292                                  * of the function.  If we find a "this." we know
1293                                  * it is a class, otherwise it is a function.
1294                                  */
1295                                 if ( is_inside_class )
1296                                 {
1297                                         makeJsTag (name, JSTAG_METHOD);
1298                                         if ( vStringLength(secondary_name->string) > 0 )
1299                                                 makeFunctionTag (secondary_name);
1300                                         parseBlock (token, name);
1301                                 }
1302                                 else
1303                                 {
1304                                         is_class = parseBlock (token, name);
1305                                         if ( is_class )
1306                                                 makeClassTag (name);
1307                                         else
1308                                                 makeFunctionTag (name);
1309
1310                                         if ( vStringLength(secondary_name->string) > 0 )
1311                                                 makeFunctionTag (secondary_name);
1312
1313                                         /*
1314                                          * Find to the end of the statement
1315                                          */
1316                                         goto cleanUp;
1317                                 }
1318                         }
1319                 }
1320                 else if (isType (token, TOKEN_OPEN_PAREN))
1321                 {
1322                         /*
1323                          * Handle nameless functions
1324                          *     this.method_name = () {}
1325                          */
1326                         skipArgumentList(token);
1327
1328                         if (isType (token, TOKEN_OPEN_CURLY))
1329                         {
1330                                 /*
1331                                  * Nameless functions are only setup as methods.
1332                                  */
1333                                 makeJsTag (name, JSTAG_METHOD);
1334                                 parseBlock (token, name);
1335                         }
1336                 }
1337                 else if (isType (token, TOKEN_OPEN_CURLY))
1338                 {
1339                         /*
1340                          * Creates tags for each of these class methods
1341                          *     ValidClassOne.prototype = {
1342                          *         'validMethodOne' : function(a,b) {},
1343                          *         'validMethodTwo' : function(a,b) {}
1344                          *     }
1345                          */
1346                         parseMethods(token, name);
1347                 }
1348                 else if (isKeyword (token, KEYWORD_new))
1349                 {
1350                         readToken (token);
1351                         if ( isKeyword (token, KEYWORD_function) ||
1352                                         isKeyword (token, KEYWORD_capital_function) ||
1353                                         isKeyword (token, KEYWORD_object) ||
1354                                         isKeyword (token, KEYWORD_capital_object) )
1355                         {
1356                                 if ( isKeyword (token, KEYWORD_object) ||
1357                                                 isKeyword (token, KEYWORD_capital_object) )
1358                                         is_class = TRUE;
1359
1360                                 readToken (token);
1361                                 if ( isType (token, TOKEN_OPEN_PAREN) )
1362                                         skipArgumentList(token);
1363
1364                                 if (isType (token, TOKEN_SEMICOLON))
1365                                 {
1366                                         if ( is_class )
1367                                         {
1368                                                 makeClassTag (name);
1369                                         } else {
1370                                                 makeFunctionTag (name);
1371                                         }
1372                                 }
1373                         }
1374                 }
1375                 else if (isKeyword (token, KEYWORD_NONE))
1376                 {
1377                         /*
1378                          * Only create variables for global scope
1379                          */
1380                         if ( token->nestLevel == 0 && is_global )
1381                         {
1382                                 /*
1383                                  * A pointer can be created to the function.
1384                                  * If we recognize the function/class name ignore the variable.
1385                                  * This format looks identical to a variable definition.
1386                                  * A variable defined outside of a block is considered
1387                                  * a global variable:
1388                                  *         var g_var1 = 1;
1389                                  *         var g_var2;
1390                                  * This is not a global variable:
1391                                  *         var g_var = function;
1392                                  * This is a global variable:
1393                                  *         var g_var = different_var_name;
1394                                  */
1395                                 if ( ! stringListHas(FunctionNames, vStringValue (token->string)) &&
1396                                                 ! stringListHas(ClassNames, vStringValue (token->string)) )
1397                                 {
1398                                         findCmdTerm (token);
1399                                         if (isType (token, TOKEN_SEMICOLON))
1400                                                 makeJsTag (name, JSTAG_VARIABLE);
1401                                 }
1402                         }
1403                 }
1404         }
1405         findCmdTerm (token);
1406
1407         /*
1408          * Statements can be optionally terminated in the case of
1409          * statement prior to a close curly brace as in the
1410          * document.write line below:
1411          *
1412          * function checkForUpdate() {
1413          *         if( 1==1 ) {
1414          *                 document.write("hello from checkForUpdate<br>")
1415          *         }
1416          *         return 1;
1417          * }
1418          */
1419         if (isType (token, TOKEN_CLOSE_CURLY))
1420                 is_terminated = FALSE;
1421
1422
1423 cleanUp:
1424         vStringCopy(token->scope, saveScope);
1425         deleteToken (name);
1426         deleteToken (secondary_name);
1427         vStringDelete(saveScope);
1428
1429         return is_terminated;
1430 }
1431
1432 static boolean parseLine (tokenInfo *const token, boolean is_inside_class)
1433 {
1434         boolean is_terminated = TRUE;
1435         /*
1436          * Detect the common statements, if, while, for, do, ...
1437          * This is necessary since the last statement within a block "{}"
1438          * can be optionally terminated.
1439          *
1440          * If the statement is not terminated, we need to tell
1441          * the calling routine to prevent reading an additional token
1442          * looking for the end of the statement.
1443          */
1444
1445         if (isType(token, TOKEN_KEYWORD))
1446         {
1447                 switch (token->keyword)
1448                 {
1449                         case KEYWORD_for:
1450                         case KEYWORD_while:
1451                         case KEYWORD_do:
1452                                 parseLoop (token);
1453                                 break;
1454                         case KEYWORD_if:
1455                         case KEYWORD_else:
1456                         case KEYWORD_try:
1457                         case KEYWORD_catch:
1458                         case KEYWORD_finally:
1459                                 /* Common semantics */
1460                                 is_terminated = parseIf (token);
1461                                 break;
1462                         case KEYWORD_switch:
1463                                 parseSwitch (token);
1464                                 break;
1465                         default:
1466                                 parseStatement (token, is_inside_class);
1467                                 break;
1468                 }
1469         }
1470         else
1471         {
1472                 /*
1473                  * Special case where single line statements may not be
1474                  * SEMICOLON terminated.  parseBlock needs to know this
1475                  * so that it does not read the next token.
1476                  */
1477                 is_terminated = parseStatement (token, is_inside_class);
1478         }
1479         return is_terminated;
1480 }
1481
1482 static void parseJsFile (tokenInfo *const token)
1483 {
1484         do
1485         {
1486                 readToken (token);
1487
1488                 if (isType(token, TOKEN_KEYWORD))
1489                 {
1490                         switch (token->keyword)
1491                         {
1492                                 case KEYWORD_function:  parseFunction (token); break;
1493                                 default:                                parseLine (token, FALSE); break;
1494                         }
1495                 }
1496                 else
1497                 {
1498                         parseLine (token, FALSE);
1499                 }
1500         } while (TRUE);
1501 }
1502
1503 static void initialize (const langType language)
1504 {
1505         Assert (sizeof (JsKinds) / sizeof (JsKinds [0]) == JSTAG_COUNT);
1506         Lang_js = language;
1507         buildJsKeywordHash ();
1508 }
1509
1510 static void findJsTags (void)
1511 {
1512         tokenInfo *const token = newToken ();
1513         exception_t exception;
1514
1515         ClassNames = stringListNew ();
1516         FunctionNames = stringListNew ();
1517
1518         exception = (exception_t) (setjmp (Exception));
1519         while (exception == ExceptionNone)
1520                 parseJsFile (token);
1521
1522         stringListDelete (ClassNames);
1523         stringListDelete (FunctionNames);
1524         ClassNames = NULL;
1525         FunctionNames = NULL;
1526         deleteToken (token);
1527 }
1528
1529 /* Create parser definition stucture */
1530 extern parserDefinition* JavaScriptParser (void)
1531 {
1532         static const char *const extensions [] = { "js", NULL };
1533         parserDefinition *const def = parserNew ("JavaScript");
1534         def->extensions = extensions;
1535         /*
1536          * New definitions for parsing instead of regex
1537          */
1538         def->kinds              = JsKinds;
1539         def->kindCount  = KIND_COUNT (JsKinds);
1540         def->parser             = findJsTags;
1541         def->initialize = initialize;
1542
1543         return def;
1544 }
1545 /* vi:set tabstop=4 shiftwidth=4 noexpandtab: */