ctags/parsers/jscript.c

   1 /*
   2  *       Copyright (c) 2003, Darren Hiebert
   3  *
   4  *       This source code is released for free distribution under the terms of the
   5  *       GNU General Public License version 2 or (at your option) any later version.
   6  *
   7  *       This module contains functions for generating tags for JavaScript language
   8  *       files.
   9  *
  10  *       Reference: http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf
  11  *
  12  *       This is a good reference for different forms of the function statement:
  13  *               http://www.permadi.com/tutorial/jsFunc/
  14  *   Another good reference:
  15  *       http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide
  16  */
  17
  18 /*
  19  *       INCLUDE FILES
  20  */
  21 #include "general.h"    /* must always come first */
  22 #include <ctype.h>      /* to define isalpha () */
  23 #include <string.h>
  24 #ifdef DEBUG
  25 #include <stdio.h>
  26 #endif
  27
  28 #include "debug.h"
  29 #include "mio.h"
  30 #include "keyword.h"
  31 #include "parse.h"
  32 #include "read.h"
  33 #include "routines.h"
  34 #include "vstring.h"
  35
  36 /*
  37  *       MACROS
  38  */
  39 #define isType(token,t)         (bool) ((token)->type == (t))
  40 #define isKeyword(token,k)      (bool) ((token)->keyword == (k))
  41
  42 /*
  43  *       DATA DECLARATIONS
  44  */
  45
  46 /*
  47  * Tracks class and function names already created
  48  */
  49 static stringList *ClassNames;
  50 static stringList *FunctionNames;
  51
  52 /*      Used to specify type of keyword.
  53 */
  54 enum eKeywordId {
  55         KEYWORD_function,
  56         KEYWORD_capital_function,
  57         KEYWORD_capital_object,
  58         KEYWORD_prototype,
  59         KEYWORD_var,
  60         KEYWORD_let,
  61         KEYWORD_const,
  62         KEYWORD_new,
  63         KEYWORD_this,
  64         KEYWORD_for,
  65         KEYWORD_while,
  66         KEYWORD_do,
  67         KEYWORD_if,
  68         KEYWORD_else,
  69         KEYWORD_switch,
  70         KEYWORD_try,
  71         KEYWORD_catch,
  72         KEYWORD_finally,
  73         KEYWORD_sap,
  74         KEYWORD_return
  75 };
  76 typedef int keywordId; /* to allow KEYWORD_NONE */
  77
  78 typedef enum eTokenType {
  79         TOKEN_UNDEFINED,
  80         TOKEN_EOF,
  81         TOKEN_CHARACTER,
  82         TOKEN_CLOSE_PAREN,
  83         TOKEN_SEMICOLON,
  84         TOKEN_COLON,
  85         TOKEN_COMMA,
  86         TOKEN_KEYWORD,
  87         TOKEN_OPEN_PAREN,
  88         TOKEN_OPERATOR,
  89         TOKEN_IDENTIFIER,
  90         TOKEN_STRING,
  91         TOKEN_PERIOD,
  92         TOKEN_OPEN_CURLY,
  93         TOKEN_CLOSE_CURLY,
  94         TOKEN_EQUAL_SIGN,
  95         TOKEN_FORWARD_SLASH,
  96         TOKEN_OPEN_SQUARE,
  97         TOKEN_CLOSE_SQUARE,
  98         TOKEN_REGEXP,
  99         TOKEN_POSTFIX_OPERATOR,
 100         TOKEN_BINARY_OPERATOR
 101 } tokenType;
 102
 103 typedef struct sTokenInfo {
 104         tokenType               type;
 105         keywordId               keyword;
 106         vString *               string;
 107         vString *               scope;
 108         unsigned long   lineNumber;
 109         MIOPos                  filePosition;
 110         int                             nestLevel;
 111         bool                    ignoreTag;
 112 } tokenInfo;
 113
 114 /*
 115  *      DATA DEFINITIONS
 116  */
 117
 118 static tokenType LastTokenType;
 119
 120 static langType Lang_js;
 121
 122 typedef enum {
 123         JSTAG_FUNCTION,
 124         JSTAG_CLASS,
 125         JSTAG_METHOD,
 126         JSTAG_PROPERTY,
 127         JSTAG_CONSTANT,
 128         JSTAG_VARIABLE,
 129         JSTAG_COUNT
 130 } jsKind;
 131
 132 static kindOption JsKinds [] = {
 133         { true,  'f', "function",         "functions"              },
 134         { true,  'c', "class",            "classes"                        },
 135         { true,  'm', "method",           "methods"                        },
 136         { true,  'p', "property",         "properties"             },
 137         { true,  'C', "constant",         "constants"              },
 138         { true,  'v', "variable",         "global variables"   }
 139 };
 140
 141 static const keywordTable JsKeywordTable [] = {
 142         /* keyword              keyword ID */
 143         { "function",   KEYWORD_function                        },
 144         { "Function",   KEYWORD_capital_function        },
 145         { "Object",             KEYWORD_capital_object          },
 146         { "prototype",  KEYWORD_prototype                       },
 147         { "var",                KEYWORD_var                                     },
 148         { "let",                KEYWORD_let                                     },
 149         { "const",              KEYWORD_const                           },
 150         { "new",                KEYWORD_new                                     },
 151         { "this",               KEYWORD_this                            },
 152         { "for",                KEYWORD_for                                     },
 153         { "while",              KEYWORD_while                           },
 154         { "do",                 KEYWORD_do                                      },
 155         { "if",                 KEYWORD_if                                      },
 156         { "else",               KEYWORD_else                            },
 157         { "switch",             KEYWORD_switch                          },
 158         { "try",                KEYWORD_try                                     },
 159         { "catch",              KEYWORD_catch                           },
 160         { "finally",    KEYWORD_finally                         },
 161         { "sap",            KEYWORD_sap                                 },
 162         { "return",             KEYWORD_return                          }
 163 };
 164
 165 /*
 166  *       FUNCTION DEFINITIONS
 167  */
 168
 169 /* Recursive functions */
 170 static void parseFunction (tokenInfo *const token);
 171 static bool parseBlock (tokenInfo *const token, tokenInfo *const orig_parent);
 172 static bool parseLine (tokenInfo *const token, tokenInfo *const parent, bool is_inside_class);
 173 static void parseUI5 (tokenInfo *const token);
 174
 175 static bool isIdentChar (const int c)
 176 {
 177         return (bool)
 178                 (isalpha (c) || isdigit (c) || c == '$' ||
 179                  c == '@' || c == '_' || c == '#');
 180 }
 181
 182 static tokenInfo *newToken (void)
 183 {
 184         tokenInfo *const token = xMalloc (1, tokenInfo);
 185
 186         token->type                     = TOKEN_UNDEFINED;
 187         token->keyword          = KEYWORD_NONE;
 188         token->string           = vStringNew ();
 189         token->scope            = vStringNew ();
 190         token->nestLevel        = 0;
 191         token->ignoreTag        = false;
 192         token->lineNumber   = getInputLineNumber ();
 193         token->filePosition = getInputFilePosition ();
 194
 195         return token;
 196 }
 197
 198 static void deleteToken (tokenInfo *const token)
 199 {
 200         vStringDelete (token->string);
 201         vStringDelete (token->scope);
 202         eFree (token);
 203 }
 204
 205 /*
 206  *       Tag generation functions
 207  */
 208
 209 static void makeJsTag (tokenInfo *const token, const jsKind kind, vString *const signature)
 210 {
 211         if (JsKinds [kind].enabled && ! token->ignoreTag )
 212         {
 213                 const char *name = vStringValue (token->string);
 214                 vString *fullscope = vStringNewCopy (token->scope);
 215                 const char *p;
 216                 tagEntryInfo e;
 217
 218                 if ((p = strrchr (name, '.')) != NULL)
 219                 {
 220                         if (vStringLength (fullscope) > 0)
 221                                 vStringPut (fullscope, '.');
 222                         vStringNCatS (fullscope, name, p - name);
 223                         name = p + 1;
 224                 }
 225
 226                 initTagEntry (&e, name, &(JsKinds [kind]));
 227
 228                 e.lineNumber   = token->lineNumber;
 229                 e.filePosition = token->filePosition;
 230
 231                 if ( vStringLength(fullscope) > 0 )
 232                 {
 233                         jsKind parent_kind = JSTAG_CLASS;
 234
 235                         /*
 236                          * If we're creating a function (and not a method),
 237                          * guess we're inside another function
 238                          */
 239                         if (kind == JSTAG_FUNCTION)
 240                                 parent_kind = JSTAG_FUNCTION;
 241
 242                         e.extensionFields.scopeKind = &(JsKinds [parent_kind]);
 243                         e.extensionFields.scopeName = vStringValue (fullscope);
 244                 }
 245
 246                 if (signature && vStringLength(signature))
 247                 {
 248                         size_t i;
 249                         /* sanitize signature by replacing all control characters with a
 250                          * space (because it's simple).
 251                          * there should never be any junk in a valid signature, but who
 252                          * knows what the user wrote and CTags doesn't cope well with weird
 253                          * characters. */
 254                         for (i = 0; i < signature->length; i++)
 255                         {
 256                                 unsigned char c = (unsigned char) signature->buffer[i];
 257                                 if (c < 0x20 /* below space */ || c == 0x7F /* DEL */)
 258                                         signature->buffer[i] = ' ';
 259                         }
 260                         e.extensionFields.signature = vStringValue(signature);
 261                 }
 262
 263                 makeTagEntry (&e);
 264                 vStringDelete (fullscope);
 265         }
 266 }
 267
 268 static void makeClassTag (tokenInfo *const token, vString *const signature)
 269 {
 270         vString *       fulltag;
 271
 272         if ( ! token->ignoreTag )
 273         {
 274                 fulltag = vStringNew ();
 275                 if (vStringLength (token->scope) > 0)
 276                 {
 277                         vStringCopy(fulltag, token->scope);
 278                         vStringCatS (fulltag, ".");
 279                         vStringCatS (fulltag, vStringValue(token->string));
 280                 }
 281                 else
 282                 {
 283                         vStringCopy(fulltag, token->string);
 284                 }
 285                 if ( ! stringListHas(ClassNames, vStringValue (fulltag)) )
 286                 {
 287                         stringListAdd (ClassNames, vStringNewCopy (fulltag));
 288                         makeJsTag (token, JSTAG_CLASS, signature);
 289                 }
 290                 vStringDelete (fulltag);
 291         }
 292 }
 293
 294 static void makeFunctionTag (tokenInfo *const token, vString *const signature)
 295 {
 296         vString *       fulltag;
 297
 298         if ( ! token->ignoreTag )
 299         {
 300                 fulltag = vStringNew ();
 301                 if (vStringLength (token->scope) > 0)
 302                 {
 303                         vStringCopy(fulltag, token->scope);
 304                         vStringCatS (fulltag, ".");
 305                         vStringCatS (fulltag, vStringValue(token->string));
 306                 }
 307                 else
 308                 {
 309                         vStringCopy(fulltag, token->string);
 310                 }
 311                 if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) )
 312                 {
 313                         stringListAdd (FunctionNames, vStringNewCopy (fulltag));
 314                         makeJsTag (token, JSTAG_FUNCTION, signature);
 315                 }
 316                 vStringDelete (fulltag);
 317         }
 318 }
 319
 320 /*
 321  *       Parsing functions
 322  */
 323
 324 static void parseString (vString *const string, const int delimiter)
 325 {
 326         bool end = false;
 327         while (! end)
 328         {
 329                 int c = getcFromInputFile ();
 330                 if (c == EOF)
 331                         end = true;
 332                 else if (c == '\\')
 333                 {
 334                         /* Eat the escape sequence (\", \', etc).  We properly handle
 335                          * <LineContinuation> by eating a whole \<CR><LF> not to see <LF>
 336                          * as an unescaped character, which is invalid and handled below.
 337                          * Also, handle the fact that <LineContinuation> produces an empty
 338                          * sequence.
 339                          * See ECMA-262 7.8.4 */
 340                         c = getcFromInputFile ();
 341                         if (c != '\r' && c != '\n')
 342                                 vStringPut(string, c);
 343                         else if (c == '\r')
 344                         {
 345                                 c = getcFromInputFile();
 346                                 if (c != '\n')
 347                                         ungetcToInputFile (c);
 348                         }
 349                 }
 350                 else if (c == delimiter)
 351                         end = true;
 352                 else if (c == '\r' || c == '\n')
 353                 {
 354                         /* those are invalid when not escaped */
 355                         end = true;
 356                         /* we don't want to eat the newline itself to let the automatic
 357                          * semicolon insertion code kick in */
 358                         ungetcToInputFile (c);
 359                 }
 360                 else
 361                         vStringPut (string, c);
 362         }
 363 }
 364
 365 static void parseRegExp (void)
 366 {
 367         int c;
 368         bool in_range = false;
 369
 370         do
 371         {
 372                 c = getcFromInputFile ();
 373                 if (! in_range && c == '/')
 374                 {
 375                         do /* skip flags */
 376                         {
 377                                 c = getcFromInputFile ();
 378                         } while (isalpha (c));
 379                         ungetcToInputFile (c);
 380                         break;
 381                 }
 382                 else if (c == '\\')
 383                         c = getcFromInputFile (); /* skip next character */
 384                 else if (c == '[')
 385                         in_range = true;
 386                 else if (c == ']')
 387                         in_range = false;
 388         } while (c != EOF);
 389 }
 390
 391 /*      Read a C identifier beginning with "firstChar" and places it into
 392  *      "name".
 393  */
 394 static void parseIdentifier (vString *const string, const int firstChar)
 395 {
 396         int c = firstChar;
 397         Assert (isIdentChar (c));
 398         do
 399         {
 400                 vStringPut (string, c);
 401                 c = getcFromInputFile ();
 402         } while (isIdentChar (c));
 403         ungetcToInputFile (c);          /* unget non-identifier character */
 404 }
 405
 406 static keywordId analyzeToken (vString *const name)
 407 {
 408         vString *keyword = vStringNew ();
 409         keywordId result;
 410         vStringCopyToLower (keyword, name);
 411         result = (keywordId) lookupKeyword (vStringValue (keyword), Lang_js);
 412         vStringDelete (keyword);
 413         return result;
 414 }
 415
 416 static void readTokenFull (tokenInfo *const token, bool include_newlines, vString *const repr)
 417 {
 418         int c;
 419         int i;
 420
 421         token->type                     = TOKEN_UNDEFINED;
 422         token->keyword          = KEYWORD_NONE;
 423         vStringClear (token->string);
 424
 425 getNextChar:
 426         i = 0;
 427         do
 428         {
 429                 c = getcFromInputFile ();
 430                 i++;
 431         }
 432         while (c == '\t'  ||  c == ' ' ||
 433                    ((c == '\r' || c == '\n') && ! include_newlines));
 434
 435         token->lineNumber   = getInputLineNumber ();
 436         token->filePosition = getInputFilePosition ();
 437
 438         if (repr)
 439         {
 440                 if (i > 1)
 441                         vStringPut (repr, ' ');
 442                 vStringPut (repr, c);
 443         }
 444
 445         switch (c)
 446         {
 447                 case EOF: token->type = TOKEN_EOF;                                      break;
 448                 case '(': token->type = TOKEN_OPEN_PAREN;                       break;
 449                 case ')': token->type = TOKEN_CLOSE_PAREN;                      break;
 450                 case ';': token->type = TOKEN_SEMICOLON;                        break;
 451                 case ',': token->type = TOKEN_COMMA;                            break;
 452                 case '.': token->type = TOKEN_PERIOD;                           break;
 453                 case ':': token->type = TOKEN_COLON;                            break;
 454                 case '{': token->type = TOKEN_OPEN_CURLY;                       break;
 455                 case '}': token->type = TOKEN_CLOSE_CURLY;                      break;
 456                 case '=': token->type = TOKEN_EQUAL_SIGN;                       break;
 457                 case '[': token->type = TOKEN_OPEN_SQUARE;                      break;
 458                 case ']': token->type = TOKEN_CLOSE_SQUARE;                     break;
 459
 460                 case '+':
 461                 case '-':
 462                         {
 463                                 int d = getcFromInputFile ();
 464                                 if (d == c) /* ++ or -- */
 465                                         token->type = TOKEN_POSTFIX_OPERATOR;
 466                                 else
 467                                 {
 468                                         ungetcToInputFile (d);
 469                                         token->type = TOKEN_BINARY_OPERATOR;
 470                                 }
 471                                 break;
 472                         }
 473
 474                 case '*':
 475                 case '%':
 476                 case '?':
 477                 case '>':
 478                 case '<':
 479                 case '^':
 480                 case '|':
 481                 case '&':
 482                         token->type = TOKEN_BINARY_OPERATOR;
 483                         break;
 484
 485                 case '\r':
 486                 case '\n':
 487                         /* This isn't strictly correct per the standard, but following the
 488                          * real rules means understanding all statements, and that's not
 489                          * what the parser currently does.  What we do here is a guess, by
 490                          * avoiding inserting semicolons that would make the statement on
 491                          * the left invalid.  Hopefully this should not have false negatives
 492                          * (e.g. should not miss insertion of a semicolon) but might have
 493                          * false positives (e.g. it will wrongfully emit a semicolon for the
 494                          * newline in "foo\n+bar").
 495                          * This should however be mostly harmless as we only deal with
 496                          * newlines in specific situations where we know a false positive
 497                          * wouldn't hurt too bad. */
 498                         switch (LastTokenType)
 499                         {
 500                                 /* these cannot be the end of a statement, so hold the newline */
 501                                 case TOKEN_EQUAL_SIGN:
 502                                 case TOKEN_COLON:
 503                                 case TOKEN_PERIOD:
 504                                 case TOKEN_FORWARD_SLASH:
 505                                 case TOKEN_BINARY_OPERATOR:
 506                                 /* and these already end one, no need to duplicate it */
 507                                 case TOKEN_SEMICOLON:
 508                                 case TOKEN_COMMA:
 509                                 case TOKEN_CLOSE_CURLY:
 510                                 case TOKEN_OPEN_CURLY:
 511                                         include_newlines = false; /* no need to recheck */
 512                                         goto getNextChar;
 513                                         break;
 514                                 default:
 515                                         token->type = TOKEN_SEMICOLON;
 516                         }
 517                         break;
 518
 519                 case '\'':
 520                 case '"':
 521                                   token->type = TOKEN_STRING;
 522                                   parseString (token->string, c);
 523                                   token->lineNumber = getInputLineNumber ();
 524                                   token->filePosition = getInputFilePosition ();
 525                                   if (repr)
 526                                   {
 527                                           vStringCat (repr, token->string);
 528                                           vStringPut (repr, c);
 529                                   }
 530                                   break;
 531
 532                 case '\\':
 533                                   c = getcFromInputFile ();
 534                                   if (c != '\\'  && c != '"'  &&  !isspace (c))
 535                                           ungetcToInputFile (c);
 536                                   token->type = TOKEN_CHARACTER;
 537                                   token->lineNumber = getInputLineNumber ();
 538                                   token->filePosition = getInputFilePosition ();
 539                                   break;
 540
 541                 case '/':
 542                                   {
 543                                           int d = getcFromInputFile ();
 544                                           if ( (d != '*') &&            /* is this the start of a comment? */
 545                                                           (d != '/') )          /* is a one line comment? */
 546                                           {
 547                                                   ungetcToInputFile (d);
 548                                                   switch (LastTokenType)
 549                                                   {
 550                                                           case TOKEN_CHARACTER:
 551                                                           case TOKEN_IDENTIFIER:
 552                                                           case TOKEN_STRING:
 553                                                           case TOKEN_CLOSE_CURLY:
 554                                                           case TOKEN_CLOSE_PAREN:
 555                                                           case TOKEN_CLOSE_SQUARE:
 556                                                                   token->type = TOKEN_FORWARD_SLASH;
 557                                                                   break;
 558
 559                                                           default:
 560                                                                   token->type = TOKEN_REGEXP;
 561                                                                   parseRegExp ();
 562                                                                   token->lineNumber = getInputLineNumber ();
 563                                                                   token->filePosition = getInputFilePosition ();
 564                                                                   break;
 565                                                   }
 566                                           }
 567                                           else
 568                                           {
 569                                                   if (repr) /* remove the / we added */
 570                                                           repr->buffer[--repr->length] = 0;
 571                                                   if (d == '*')
 572                                                   {
 573                                                           do
 574                                                           {
 575                                                                   skipToCharacterInInputFile ('*');
 576                                                                   c = getcFromInputFile ();
 577                                                                   if (c == '/')
 578                                                                           break;
 579                                                                   else
 580                                                                           ungetcToInputFile (c);
 581                                                           } while (c != EOF && c != '\0');
 582                                                           goto getNextChar;
 583                                                   }
 584                                                   else if (d == '/')    /* is this the start of a comment?  */
 585                                                   {
 586                                                           skipToCharacterInInputFile ('\n');
 587                                                           /* if we care about newlines, put it back so it is seen */
 588                                                           if (include_newlines)
 589                                                                   ungetcToInputFile ('\n');
 590                                                           goto getNextChar;
 591                                                   }
 592                                           }
 593                                           break;
 594                                   }
 595
 596                 case '#':
 597                                   /* skip shebang in case of e.g. Node.js scripts */
 598                                   if (token->lineNumber > 1)
 599                                           token->type = TOKEN_UNDEFINED;
 600                                   else if ((c = getcFromInputFile ()) != '!')
 601                                   {
 602                                           ungetcToInputFile (c);
 603                                           token->type = TOKEN_UNDEFINED;
 604                                   }
 605                                   else
 606                                   {
 607                                           skipToCharacterInInputFile ('\n');
 608                                           goto getNextChar;
 609                                   }
 610                                   break;
 611
 612                 default:
 613                                   if (! isIdentChar (c))
 614                                           token->type = TOKEN_UNDEFINED;
 615                                   else
 616                                   {
 617                                           parseIdentifier (token->string, c);
 618                                           token->lineNumber = getInputLineNumber ();
 619                                           token->filePosition = getInputFilePosition ();
 620                                           token->keyword = analyzeToken (token->string);
 621                                           if (isKeyword (token, KEYWORD_NONE))
 622                                                   token->type = TOKEN_IDENTIFIER;
 623                                           else
 624                                                   token->type = TOKEN_KEYWORD;
 625                                           if (repr && vStringLength (token->string) > 1)
 626                                                   vStringCatS (repr, vStringValue (token->string) + 1);
 627                                   }
 628                                   break;
 629         }
 630
 631         LastTokenType = token->type;
 632 }
 633
 634 static void readToken (tokenInfo *const token)
 635 {
 636         readTokenFull (token, false, NULL);
 637 }
 638
 639 static void copyToken (tokenInfo *const dest, tokenInfo *const src)
 640 {
 641         dest->nestLevel = src->nestLevel;
 642         dest->lineNumber = src->lineNumber;
 643         dest->filePosition = src->filePosition;
 644         dest->type = src->type;
 645         dest->keyword = src->keyword;
 646         vStringCopy(dest->string, src->string);
 647         vStringCopy(dest->scope, src->scope);
 648 }
 649
 650 /*
 651  *       Token parsing functions
 652  */
 653
 654 static void skipArgumentList (tokenInfo *const token, bool include_newlines, vString *const repr)
 655 {
 656         int nest_level = 0;
 657
 658         if (isType (token, TOKEN_OPEN_PAREN))   /* arguments? */
 659         {
 660                 nest_level++;
 661                 if (repr)
 662                         vStringPut (repr, '(');
 663                 while (nest_level > 0 && ! isType (token, TOKEN_EOF))
 664                 {
 665                         readTokenFull (token, false, repr);
 666                         if (isType (token, TOKEN_OPEN_PAREN))
 667                                 nest_level++;
 668                         else if (isType (token, TOKEN_CLOSE_PAREN))
 669                                 nest_level--;
 670                 }
 671                 readTokenFull (token, include_newlines, NULL);
 672         }
 673 }
 674
 675 static void skipArrayList (tokenInfo *const token, bool include_newlines)
 676 {
 677         int nest_level = 0;
 678
 679         /*
 680          * Handle square brackets
 681          *       var name[1]
 682          * So we must check for nested open and closing square brackets
 683          */
 684
 685         if (isType (token, TOKEN_OPEN_SQUARE))  /* arguments? */
 686         {
 687                 nest_level++;
 688                 while (nest_level > 0 && ! isType (token, TOKEN_EOF))
 689                 {
 690                         readToken (token);
 691                         if (isType (token, TOKEN_OPEN_SQUARE))
 692                                 nest_level++;
 693                         else if (isType (token, TOKEN_CLOSE_SQUARE))
 694                                 nest_level--;
 695                 }
 696                 readTokenFull (token, include_newlines, NULL);
 697         }
 698 }
 699
 700 static void addContext (tokenInfo* const parent, const tokenInfo* const child)
 701 {
 702         if (vStringLength (parent->string) > 0)
 703         {
 704                 vStringCatS (parent->string, ".");
 705         }
 706         vStringCatS (parent->string, vStringValue(child->string));
 707 }
 708
 709 static void addToScope (tokenInfo* const token, vString* const extra)
 710 {
 711         if (vStringLength (token->scope) > 0)
 712         {
 713                 vStringCatS (token->scope, ".");
 714         }
 715         vStringCatS (token->scope, vStringValue(extra));
 716 }
 717
 718 /*
 719  *       Scanning functions
 720  */
 721
 722 static bool findCmdTerm (tokenInfo *const token, bool include_newlines)
 723 {
 724         /*
 725          * Read until we find either a semicolon or closing brace.
 726          * Any nested braces will be handled within.
 727          */
 728         while (! isType (token, TOKEN_SEMICOLON) &&
 729                    ! isType (token, TOKEN_CLOSE_CURLY) &&
 730                    ! isType (token, TOKEN_EOF))
 731         {
 732                 /* Handle nested blocks */
 733                 if ( isType (token, TOKEN_OPEN_CURLY))
 734                 {
 735                         parseBlock (token, token);
 736                         readTokenFull (token, include_newlines, NULL);
 737                 }
 738                 else if ( isType (token, TOKEN_OPEN_PAREN) )
 739                 {
 740                         skipArgumentList(token, include_newlines, NULL);
 741                 }
 742                 else if ( isType (token, TOKEN_OPEN_SQUARE) )
 743                 {
 744                         skipArrayList(token, include_newlines);
 745                 }
 746                 else
 747                 {
 748                         readTokenFull (token, include_newlines, NULL);
 749                 }
 750         }
 751
 752         return isType (token, TOKEN_SEMICOLON);
 753 }
 754
 755 static void parseSwitch (tokenInfo *const token)
 756 {
 757         /*
 758          * switch (expression) {
 759          * case value1:
 760          *         statement;
 761          *         break;
 762          * case value2:
 763          *         statement;
 764          *         break;
 765          * default : statement;
 766          * }
 767          */
 768
 769         readToken (token);
 770
 771         if (isType (token, TOKEN_OPEN_PAREN))
 772         {
 773                 /*
 774                  * Handle nameless functions, these will only
 775                  * be considered methods.
 776                  */
 777                 skipArgumentList(token, false, NULL);
 778         }
 779
 780         if (isType (token, TOKEN_OPEN_CURLY))
 781         {
 782                 parseBlock (token, token);
 783         }
 784 }
 785
 786 static bool parseLoop (tokenInfo *const token, tokenInfo *const parent)
 787 {
 788         /*
 789          * Handles these statements
 790          *         for (x=0; x<3; x++)
 791          *                 document.write("This text is repeated three times<br>");
 792          *
 793          *         for (x=0; x<3; x++)
 794          *         {
 795          *                 document.write("This text is repeated three times<br>");
 796          *         }
 797          *
 798          *         while (number<5){
 799          *                 document.write(number+"<br>");
 800          *                 number++;
 801          *         }
 802          *
 803          *         do{
 804          *                 document.write(number+"<br>");
 805          *                 number++;
 806          *         }
 807          *         while (number<5);
 808          */
 809         bool is_terminated = true;
 810
 811         if (isKeyword (token, KEYWORD_for) || isKeyword (token, KEYWORD_while))
 812         {
 813                 readToken(token);
 814
 815                 if (isType (token, TOKEN_OPEN_PAREN))
 816                 {
 817                         /*
 818                          * Handle nameless functions, these will only
 819                          * be considered methods.
 820                          */
 821                         skipArgumentList(token, false, NULL);
 822                 }
 823
 824                 if (isType (token, TOKEN_OPEN_CURLY))
 825                 {
 826                         /*
 827                          * This will be either a function or a class.
 828                          * We can only determine this by checking the body
 829                          * of the function.  If we find a "this." we know
 830                          * it is a class, otherwise it is a function.
 831                          */
 832                         parseBlock (token, parent);
 833                 }
 834                 else
 835                 {
 836                         is_terminated = parseLine(token, parent, false);
 837                 }
 838         }
 839         else if (isKeyword (token, KEYWORD_do))
 840         {
 841                 readToken(token);
 842
 843                 if (isType (token, TOKEN_OPEN_CURLY))
 844                 {
 845                         /*
 846                          * This will be either a function or a class.
 847                          * We can only determine this by checking the body
 848                          * of the function.  If we find a "this." we know
 849                          * it is a class, otherwise it is a function.
 850                          */
 851                         parseBlock (token, parent);
 852                 }
 853                 else
 854                 {
 855                         is_terminated = parseLine(token, parent, false);
 856                 }
 857
 858                 if (is_terminated)
 859                         readToken(token);
 860
 861                 if (isKeyword (token, KEYWORD_while))
 862                 {
 863                         readToken(token);
 864
 865                         if (isType (token, TOKEN_OPEN_PAREN))
 866                         {
 867                                 /*
 868                                  * Handle nameless functions, these will only
 869                                  * be considered methods.
 870                                  */
 871                                 skipArgumentList(token, true, NULL);
 872                         }
 873                         if (! isType (token, TOKEN_SEMICOLON))
 874                                 is_terminated = false;
 875                 }
 876         }
 877
 878         return is_terminated;
 879 }
 880
 881 static bool parseIf (tokenInfo *const token, tokenInfo *const parent)
 882 {
 883         bool read_next_token = true;
 884         /*
 885          * If statements have two forms
 886          *         if ( ... )
 887          *                 one line;
 888          *
 889          *         if ( ... )
 890          *                statement;
 891          *         else
 892          *                statement
 893          *
 894          *         if ( ... ) {
 895          *                multiple;
 896          *                statements;
 897          *         }
 898          *
 899          *
 900          *         if ( ... ) {
 901          *                return elem
 902          *         }
 903          *
 904          *     This example if correctly written, but the
 905          *     else contains only 1 statement without a terminator
 906          *     since the function finishes with the closing brace.
 907          *
 908      *     function a(flag){
 909      *         if(flag)
 910      *             test(1);
 911      *         else
 912      *             test(2)
 913      *     }
 914          *
 915          * TODO:  Deal with statements that can optional end
 916          *                without a semi-colon.  Currently this messes up
 917          *                the parsing of blocks.
 918          *                Need to somehow detect this has happened, and either
 919          *                backup a token, or skip reading the next token if
 920          *                that is possible from all code locations.
 921          *
 922          */
 923
 924         readToken (token);
 925
 926         if (isKeyword (token, KEYWORD_if))
 927         {
 928                 /*
 929                  * Check for an "else if" and consume the "if"
 930                  */
 931                 readToken (token);
 932         }
 933
 934         if (isType (token, TOKEN_OPEN_PAREN))
 935         {
 936                 /*
 937                  * Handle nameless functions, these will only
 938                  * be considered methods.
 939                  */
 940                 skipArgumentList(token, false, NULL);
 941         }
 942
 943         if (isType (token, TOKEN_OPEN_CURLY))
 944         {
 945                 /*
 946                  * This will be either a function or a class.
 947                  * We can only determine this by checking the body
 948                  * of the function.  If we find a "this." we know
 949                  * it is a class, otherwise it is a function.
 950                  */
 951                 parseBlock (token, parent);
 952         }
 953         else
 954         {
 955                 /* The next token should only be read if this statement had its own
 956                  * terminator */
 957                 read_next_token = findCmdTerm (token, true);
 958         }
 959         return read_next_token;
 960 }
 961
 962 static void parseFunction (tokenInfo *const token)
 963 {
 964         tokenInfo *const name = newToken ();
 965         vString *const signature = vStringNew ();
 966         bool is_class = false;
 967
 968         /*
 969          * This deals with these formats
 970          *         function validFunctionTwo(a,b) {}
 971          */
 972
 973         readToken (name);
 974         /* Add scope in case this is an INNER function */
 975         addToScope(name, token->scope);
 976
 977         readToken (token);
 978         while (isType (token, TOKEN_PERIOD))
 979         {
 980                 readToken (token);
 981                 if ( isKeyword(token, KEYWORD_NONE) )
 982                 {
 983                         addContext (name, token);
 984                         readToken (token);
 985                 }
 986         }
 987
 988         if ( isType (token, TOKEN_OPEN_PAREN) )
 989                 skipArgumentList(token, false, signature);
 990
 991         if ( isType (token, TOKEN_OPEN_CURLY) )
 992         {
 993                 is_class = parseBlock (token, name);
 994                 if ( is_class )
 995                         makeClassTag (name, signature);
 996                 else
 997                         makeFunctionTag (name, signature);
 998         }
 999
1000         findCmdTerm (token, false);
1001
1002         vStringDelete (signature);
1003         deleteToken (name);
1004 }
1005
1006 static bool parseBlock (tokenInfo *const token, tokenInfo *const orig_parent)
1007 {
1008         bool is_class = false;
1009         bool read_next_token = true;
1010         vString * saveScope = vStringNew ();
1011         tokenInfo *const parent = newToken ();
1012
1013         /* backup the parent token to allow calls like parseBlock(token, token) */
1014         copyToken (parent, orig_parent);
1015
1016         token->nestLevel++;
1017         /*
1018          * Make this routine a bit more forgiving.
1019          * If called on an open_curly advance it
1020          */
1021         if ( isType (token, TOKEN_OPEN_CURLY) &&
1022                         isKeyword(token, KEYWORD_NONE) )
1023                 readToken(token);
1024
1025         if (! isType (token, TOKEN_CLOSE_CURLY))
1026         {
1027                 /*
1028                  * Read until we find the closing brace,
1029                  * any nested braces will be handled within
1030                  */
1031                 do
1032                 {
1033                         read_next_token = true;
1034                         if (isKeyword (token, KEYWORD_this))
1035                         {
1036                                 /*
1037                                  * Means we are inside a class and have found
1038                                  * a class, not a function
1039                                  */
1040                                 is_class = true;
1041                                 vStringCopy(saveScope, token->scope);
1042                                 addToScope (token, parent->string);
1043
1044                                 /*
1045                                  * Ignore the remainder of the line
1046                                  * findCmdTerm(token);
1047                                  */
1048                                 read_next_token = parseLine (token, parent, is_class);
1049
1050                                 vStringCopy(token->scope, saveScope);
1051                         }
1052                         else if (isKeyword (token, KEYWORD_var) ||
1053                                          isKeyword (token, KEYWORD_let) ||
1054                                          isKeyword (token, KEYWORD_const))
1055                         {
1056                                 /*
1057                                  * Potentially we have found an inner function.
1058                                  * Set something to indicate the scope
1059                                  */
1060                                 vStringCopy(saveScope, token->scope);
1061                                 addToScope (token, parent->string);
1062                                 read_next_token = parseLine (token, parent, is_class);
1063                                 vStringCopy(token->scope, saveScope);
1064                         }
1065                         else if (isKeyword (token, KEYWORD_function))
1066                         {
1067                                 vStringCopy(saveScope, token->scope);
1068                                 addToScope (token, parent->string);
1069                                 parseFunction (token);
1070                                 vStringCopy(token->scope, saveScope);
1071                         }
1072                         else if (isType (token, TOKEN_OPEN_CURLY))
1073                         {
1074                                 /* Handle nested blocks */
1075                                 parseBlock (token, parent);
1076                         }
1077                         else
1078                         {
1079                                 /*
1080                                  * It is possible for a line to have no terminator
1081                                  * if the following line is a closing brace.
1082                                  * parseLine will detect this case and indicate
1083                                  * whether we should read an additional token.
1084                                  */
1085                                 read_next_token = parseLine (token, parent, is_class);
1086                         }
1087
1088                         /*
1089                          * Always read a new token unless we find a statement without
1090                          * a ending terminator
1091                          */
1092                         if( read_next_token )
1093                                 readToken(token);
1094
1095                         /*
1096                          * If we find a statement without a terminator consider the
1097                          * block finished, otherwise the stack will be off by one.
1098                          */
1099                 } while (! isType (token, TOKEN_EOF) &&
1100                                  ! isType (token, TOKEN_CLOSE_CURLY) && read_next_token);
1101         }
1102
1103         deleteToken (parent);
1104         vStringDelete(saveScope);
1105         token->nestLevel--;
1106
1107         return is_class;
1108 }
1109
1110 static bool parseMethods (tokenInfo *const token, tokenInfo *const class)
1111 {
1112         tokenInfo *const name = newToken ();
1113         bool has_methods = false;
1114
1115         /*
1116          * This deals with these formats
1117          *         validProperty  : 2,
1118          *         validMethod    : function(a,b) {}
1119          *         'validMethod2' : function(a,b) {}
1120      *     container.dirtyTab = {'url': false, 'title':false, 'snapshot':false, '*': false}
1121          */
1122
1123         do
1124         {
1125                 readToken (token);
1126                 if (isType (token, TOKEN_CLOSE_CURLY))
1127                 {
1128                         /*
1129                          * This was most likely a variable declaration of a hash table.
1130                          * indicate there were no methods and return.
1131                          */
1132                         has_methods = false;
1133                         goto cleanUp;
1134                 }
1135
1136                 if (isType (token, TOKEN_STRING) || isKeyword(token, KEYWORD_NONE))
1137                 {
1138                         copyToken(name, token);
1139
1140                         readToken (token);
1141                         if ( isType (token, TOKEN_COLON) )
1142                         {
1143                                 readToken (token);
1144                                 if ( isKeyword (token, KEYWORD_function) )
1145                                 {
1146                                         vString *const signature = vStringNew ();
1147
1148                                         readToken (token);
1149                                         if ( isType (token, TOKEN_OPEN_PAREN) )
1150                                         {
1151                                                 skipArgumentList(token, false, signature);
1152                                         }
1153
1154                                         if (isType (token, TOKEN_OPEN_CURLY))
1155                                         {
1156                                                 has_methods = true;
1157                                                 addToScope (name, class->string);
1158                                                 makeJsTag (name, JSTAG_METHOD, signature);
1159                                                 parseBlock (token, name);
1160
1161                                                 /*
1162                                                  * Read to the closing curly, check next
1163                                                  * token, if a comma, we must loop again
1164                                                  */
1165                                                 readToken (token);
1166                                         }
1167
1168                                         vStringDelete (signature);
1169                                 }
1170                                 else
1171                                 {
1172                                                 vString * saveScope = vStringNew ();
1173                                                 bool has_child_methods = false;
1174
1175                                                 /* skip whatever is the value */
1176                                                 while (! isType (token, TOKEN_COMMA) &&
1177                                                        ! isType (token, TOKEN_CLOSE_CURLY) &&
1178                                                        ! isType (token, TOKEN_EOF))
1179                                                 {
1180                                                         if (isType (token, TOKEN_OPEN_CURLY))
1181                                                         {
1182                                                                 /* Recurse to find child properties/methods */
1183                                                                 vStringCopy (saveScope, token->scope);
1184                                                                 addToScope (token, class->string);
1185                                                                 has_child_methods = parseMethods (token, name);
1186                                                                 vStringCopy (token->scope, saveScope);
1187                                                                 readToken (token);
1188                                                         }
1189                                                         else if (isType (token, TOKEN_OPEN_PAREN))
1190                                                         {
1191                                                                 skipArgumentList (token, false, NULL);
1192                                                         }
1193                                                         else if (isType (token, TOKEN_OPEN_SQUARE))
1194                                                         {
1195                                                                 skipArrayList (token, false);
1196                                                         }
1197                                                         else
1198                                                         {
1199                                                                 readToken (token);
1200                                                         }
1201                                                 }
1202                                                 vStringDelete (saveScope);
1203
1204                                                 has_methods = true;
1205                                                 addToScope (name, class->string);
1206                                                 if (has_child_methods)
1207                                                         makeJsTag (name, JSTAG_CLASS, NULL);
1208                                                 else
1209                                                         makeJsTag (name, JSTAG_PROPERTY, NULL);
1210                                 }
1211                         }
1212                 }
1213         } while ( isType(token, TOKEN_COMMA) );
1214
1215         findCmdTerm (token, false);
1216
1217 cleanUp:
1218         deleteToken (name);
1219
1220         return has_methods;
1221 }
1222
1223 static bool parseStatement (tokenInfo *const token, tokenInfo *const parent, bool is_inside_class)
1224 {
1225         tokenInfo *const name = newToken ();
1226         tokenInfo *const secondary_name = newToken ();
1227         tokenInfo *const method_body_token = newToken ();
1228         vString * saveScope = vStringNew ();
1229         bool is_class = false;
1230         bool is_var = false;
1231         bool is_const = false;
1232         bool is_terminated = true;
1233         bool is_global = false;
1234         bool has_methods = false;
1235         vString *       fulltag;
1236
1237         vStringClear(saveScope);
1238         /*
1239          * Functions can be named or unnamed.
1240          * This deals with these formats:
1241          * Function
1242          *         validFunctionOne = function(a,b) {}
1243          *         testlib.validFunctionFive = function(a,b) {}
1244          *         var innerThree = function(a,b) {}
1245          *         var innerFour = (a,b) {}
1246          *         var D2 = secondary_fcn_name(a,b) {}
1247          *         var D3 = new Function("a", "b", "return a+b;");
1248          * Class
1249          *         testlib.extras.ValidClassOne = function(a,b) {
1250          *                 this.a = a;
1251          *         }
1252          * Class Methods
1253          *         testlib.extras.ValidClassOne.prototype = {
1254          *                 'validMethodOne' : function(a,b) {},
1255          *                 'validMethodTwo' : function(a,b) {}
1256          *         }
1257      *     ValidClassTwo = function ()
1258      *     {
1259      *         this.validMethodThree = function() {}
1260      *         // unnamed method
1261      *         this.validMethodFour = () {}
1262      *     }
1263          *         Database.prototype.validMethodThree = Database_getTodaysDate;
1264          */
1265
1266         if ( is_inside_class )
1267                 is_class = true;
1268         /*
1269          * var can precede an inner function
1270          */
1271         if ( isKeyword(token, KEYWORD_var) ||
1272                  isKeyword(token, KEYWORD_let) ||
1273                  isKeyword(token, KEYWORD_const) )
1274         {
1275                 is_const = isKeyword(token, KEYWORD_const);
1276                 /*
1277                  * Only create variables for global scope
1278                  */
1279                 if ( token->nestLevel == 0 )
1280                 {
1281                         is_global = true;
1282                 }
1283                 readToken(token);
1284         }
1285
1286         if ( isKeyword(token, KEYWORD_this) )
1287         {
1288                 readToken(token);
1289                 if (isType (token, TOKEN_PERIOD))
1290                 {
1291                         readToken(token);
1292                 }
1293         }
1294
1295         copyToken(name, token);
1296
1297         while (! isType (token, TOKEN_CLOSE_CURLY) &&
1298                ! isType (token, TOKEN_SEMICOLON)   &&
1299                ! isType (token, TOKEN_EQUAL_SIGN)  &&
1300                ! isType (token, TOKEN_EOF))
1301         {
1302                 if (isType (token, TOKEN_OPEN_CURLY))
1303                         parseBlock (token, parent);
1304
1305                 /* Potentially the name of the function */
1306                 readToken (token);
1307                 if (isType (token, TOKEN_PERIOD))
1308                 {
1309                         /*
1310                          * Cannot be a global variable is it has dot references in the name
1311                          */
1312                         is_global = false;
1313                         do
1314                         {
1315                                 readToken (token);
1316                                 if ( isKeyword(token, KEYWORD_NONE) )
1317                                 {
1318                                         if ( is_class )
1319                                         {
1320                                                 addToScope(token, name->string);
1321                                         }
1322                                         else
1323                                                 addContext (name, token);
1324
1325                                         readToken (token);
1326                                 }
1327                                 else if ( isKeyword(token, KEYWORD_prototype) )
1328                                 {
1329                                         /*
1330                                          * When we reach the "prototype" tag, we infer:
1331                                          *     "BindAgent" is a class
1332                                          *     "build"     is a method
1333                                          *
1334                                          * function BindAgent( repeatableIdName, newParentIdName ) {
1335                                          * }
1336                                          *
1337                                          * CASE 1
1338                                          * Specified function name: "build"
1339                                          *     BindAgent.prototype.build = function( mode ) {
1340                                          *        maybe parse nested functions
1341                                          *     }
1342                                          *
1343                                          * CASE 2
1344                                          * Prototype listing
1345                                          *     ValidClassOne.prototype = {
1346                                          *         'validMethodOne' : function(a,b) {},
1347                                          *         'validMethodTwo' : function(a,b) {}
1348                                          *     }
1349                                          *
1350                                          */
1351                                         makeClassTag (name, NULL);
1352                                         is_class = true;
1353
1354                                         /*
1355                                          * There should a ".function_name" next.
1356                                          */
1357                                         readToken (token);
1358                                         if (isType (token, TOKEN_PERIOD))
1359                                         {
1360                                                 /*
1361                                                  * Handle CASE 1
1362                                                  */
1363                                                 readToken (token);
1364                                                 if ( isKeyword(token, KEYWORD_NONE) )
1365                                                 {
1366                                                         vString *const signature = vStringNew ();
1367
1368                                                         vStringCopy(saveScope, token->scope);
1369                                                         addToScope(token, name->string);
1370
1371                                                         readToken (method_body_token);
1372                                                         vStringCopy (method_body_token->scope, token->scope);
1373
1374                                                         while (! isType (method_body_token, TOKEN_SEMICOLON) &&
1375                                                                ! isType (method_body_token, TOKEN_CLOSE_CURLY) &&
1376                                                                ! isType (method_body_token, TOKEN_OPEN_CURLY) &&
1377                                                                ! isType (method_body_token, TOKEN_EOF))
1378                                                         {
1379                                                                 if ( isType (method_body_token, TOKEN_OPEN_PAREN) )
1380                                                                         skipArgumentList(method_body_token, false,
1381                                                                                                          vStringLength (signature) == 0 ? signature : NULL);
1382                                                                 else
1383                                                                         readToken (method_body_token);
1384                                                         }
1385
1386                                                         makeJsTag (token, JSTAG_METHOD, signature);
1387                                                         vStringDelete (signature);
1388
1389                                                         if ( isType (method_body_token, TOKEN_OPEN_CURLY))
1390                                                         {
1391                                                                 parseBlock (method_body_token, token);
1392                                                                 is_terminated = true;
1393                                                         }
1394                                                         else
1395                                                                 is_terminated = isType (method_body_token, TOKEN_SEMICOLON);
1396                                                         goto cleanUp;
1397                                                 }
1398                                         }
1399                                         else if (isType (token, TOKEN_EQUAL_SIGN))
1400                                         {
1401                                                 readToken (token);
1402                                                 if (isType (token, TOKEN_OPEN_CURLY))
1403                                                 {
1404                                                         /*
1405                                                          * Handle CASE 2
1406                                                          *
1407                                                          * Creates tags for each of these class methods
1408                                                          *     ValidClassOne.prototype = {
1409                                                          *         'validMethodOne' : function(a,b) {},
1410                                                          *         'validMethodTwo' : function(a,b) {}
1411                                                          *     }
1412                                                          */
1413                                                         parseMethods(token, name);
1414                                                         /*
1415                                                          * Find to the end of the statement
1416                                                          */
1417                                                         findCmdTerm (token, false);
1418                                                         token->ignoreTag = false;
1419                                                         is_terminated = true;
1420                                                         goto cleanUp;
1421                                                 }
1422                                         }
1423                                 }
1424                                 else
1425                                         readToken (token);
1426                         } while (isType (token, TOKEN_PERIOD));
1427                 }
1428
1429                 if ( isType (token, TOKEN_OPEN_PAREN) )
1430                         skipArgumentList(token, false, NULL);
1431
1432                 if ( isType (token, TOKEN_OPEN_SQUARE) )
1433                         skipArrayList(token, false);
1434
1435                 /*
1436                 if ( isType (token, TOKEN_OPEN_CURLY) )
1437                 {
1438                         is_class = parseBlock (token, name);
1439                 }
1440                 */
1441         }
1442
1443         if ( isType (token, TOKEN_CLOSE_CURLY) )
1444         {
1445                 /*
1446                  * Reaching this section without having
1447                  * processed an open curly brace indicates
1448                  * the statement is most likely not terminated.
1449                  */
1450                 is_terminated = false;
1451                 goto cleanUp;
1452         }
1453
1454         if ( isType (token, TOKEN_SEMICOLON) )
1455         {
1456                 /*
1457                  * Only create variables for global scope
1458                  */
1459                 if ( token->nestLevel == 0 && is_global )
1460                 {
1461                         /*
1462                          * Handles this syntax:
1463                          *         var g_var2;
1464                          */
1465                         if (isType (token, TOKEN_SEMICOLON))
1466                                 makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL);
1467                 }
1468                 /*
1469                  * Statement has ended.
1470                  * This deals with calls to functions, like:
1471                  *     alert(..);
1472                  */
1473                 goto cleanUp;
1474         }
1475
1476         if ( isType (token, TOKEN_EQUAL_SIGN) )
1477         {
1478                 int parenDepth = 0;
1479
1480                 readToken (token);
1481
1482                 /* rvalue might be surrounded with parentheses */
1483                 while (isType (token, TOKEN_OPEN_PAREN))
1484                 {
1485                         parenDepth++;
1486                         readToken (token);
1487                 }
1488
1489                 if ( isKeyword (token, KEYWORD_function) )
1490                 {
1491                         vString *const signature = vStringNew ();
1492
1493                         readToken (token);
1494
1495                         if ( isKeyword (token, KEYWORD_NONE) &&
1496                                         ! isType (token, TOKEN_OPEN_PAREN) )
1497                         {
1498                                 /*
1499                                  * Functions of this format:
1500                                  *         var D2A = function theAdd(a, b)
1501                                  *         {
1502                                  *                return a+b;
1503                                  *         }
1504                                  * Are really two separate defined functions and
1505                                  * can be referenced in two ways:
1506                                  *         alert( D2A(1,2) );                     // produces 3
1507                                  *         alert( theAdd(1,2) );                  // also produces 3
1508                                  * So it must have two tags:
1509                                  *         D2A
1510                                  *         theAdd
1511                                  * Save the reference to the name for later use, once
1512                                  * we have established this is a valid function we will
1513                                  * create the secondary reference to it.
1514                                  */
1515                                 copyToken(secondary_name, token);
1516                                 readToken (token);
1517                         }
1518
1519                         if ( isType (token, TOKEN_OPEN_PAREN) )
1520                                 skipArgumentList(token, false, signature);
1521
1522                         if (isType (token, TOKEN_OPEN_CURLY))
1523                         {
1524                                 /*
1525                                  * This will be either a function or a class.
1526                                  * We can only determine this by checking the body
1527                                  * of the function.  If we find a "this." we know
1528                                  * it is a class, otherwise it is a function.
1529                                  */
1530                                 if ( is_inside_class )
1531                                 {
1532                                         makeJsTag (name, JSTAG_METHOD, signature);
1533                                         if ( vStringLength(secondary_name->string) > 0 )
1534                                                 makeFunctionTag (secondary_name, signature);
1535                                         parseBlock (token, name);
1536                                 }
1537                                 else
1538                                 {
1539                                         is_class = parseBlock (token, name);
1540                                         if ( is_class )
1541                                                 makeClassTag (name, signature);
1542                                         else
1543                                                 makeFunctionTag (name, signature);
1544
1545                                         if ( vStringLength(secondary_name->string) > 0 )
1546                                                 makeFunctionTag (secondary_name, signature);
1547                                 }
1548                         }
1549
1550                         vStringDelete (signature);
1551                 }
1552                 else if (isType (token, TOKEN_OPEN_CURLY))
1553                 {
1554                         /*
1555                          * Creates tags for each of these class methods
1556                          *     ValidClassOne.prototype = {
1557                          *         'validMethodOne' : function(a,b) {},
1558                          *         'validMethodTwo' : function(a,b) {}
1559                          *     }
1560                          * Or checks if this is a hash variable.
1561                          *     var z = {};
1562                          */
1563                         has_methods = parseMethods(token, name);
1564                         if (has_methods)
1565                                 makeJsTag (name, JSTAG_CLASS, NULL);
1566                         else
1567                         {
1568                                 /*
1569                                  * Only create variables for global scope
1570                                  */
1571                                 if ( token->nestLevel == 0 && is_global )
1572                                 {
1573                                         /*
1574                                          * A pointer can be created to the function.
1575                                          * If we recognize the function/class name ignore the variable.
1576                                          * This format looks identical to a variable definition.
1577                                          * A variable defined outside of a block is considered
1578                                          * a global variable:
1579                                          *         var g_var1 = 1;
1580                                          *         var g_var2;
1581                                          * This is not a global variable:
1582                                          *         var g_var = function;
1583                                          * This is a global variable:
1584                                          *         var g_var = different_var_name;
1585                                          */
1586                                         fulltag = vStringNew ();
1587                                         if (vStringLength (token->scope) > 0)
1588                                         {
1589                                                 vStringCopy(fulltag, token->scope);
1590                                                 vStringCatS (fulltag, ".");
1591                                                 vStringCatS (fulltag, vStringValue(token->string));
1592                                         }
1593                                         else
1594                                         {
1595                                                 vStringCopy(fulltag, token->string);
1596                                         }
1597                                         if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) &&
1598                                                         ! stringListHas(ClassNames, vStringValue (fulltag)) )
1599                                         {
1600                                                 makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL);
1601                                         }
1602                                         vStringDelete (fulltag);
1603                                 }
1604                         }
1605                         if (isType (token, TOKEN_CLOSE_CURLY))
1606                         {
1607                                 /*
1608                                  * Assume the closing parentheses terminates
1609                                  * this statements.
1610                                  */
1611                                 is_terminated = true;
1612                         }
1613                 }
1614                 else if (isKeyword (token, KEYWORD_new))
1615                 {
1616                         readToken (token);
1617                         is_var = isType (token, TOKEN_IDENTIFIER);
1618                         if ( isKeyword (token, KEYWORD_function) ||
1619                                         isKeyword (token, KEYWORD_capital_function) ||
1620                                         isKeyword (token, KEYWORD_capital_object) ||
1621                                         is_var )
1622                         {
1623                                 if ( isKeyword (token, KEYWORD_capital_object) )
1624                                         is_class = true;
1625
1626                                 readToken (token);
1627                                 if ( isType (token, TOKEN_OPEN_PAREN) )
1628                                         skipArgumentList(token, true, NULL);
1629
1630                                 if (isType (token, TOKEN_SEMICOLON))
1631                                 {
1632                                         if ( token->nestLevel == 0 )
1633                                         {
1634                                                 if ( is_var )
1635                                                 {
1636                                                         makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL);
1637                                                 }
1638                                                 else
1639                                                 {
1640                                                         if ( is_class )
1641                                                         {
1642                                                                 makeClassTag (name, NULL);
1643                                                         } else {
1644                                                                 /* FIXME: we cannot really get a meaningful
1645                                                                  * signature from a `new Function()` call,
1646                                                                  * so for now just don't set any */
1647                                                                 makeFunctionTag (name, NULL);
1648                                                         }
1649                                                 }
1650                                         }
1651                                 }
1652                                 else if (isType (token, TOKEN_CLOSE_CURLY))
1653                                         is_terminated = false;
1654                         }
1655                 }
1656                 else if (isKeyword (token, KEYWORD_NONE))
1657                 {
1658                         /*
1659                          * Only create variables for global scope
1660                          */
1661                         if ( token->nestLevel == 0 && is_global )
1662                         {
1663                                 /*
1664                                  * A pointer can be created to the function.
1665                                  * If we recognize the function/class name ignore the variable.
1666                                  * This format looks identical to a variable definition.
1667                                  * A variable defined outside of a block is considered
1668                                  * a global variable:
1669                                  *         var g_var1 = 1;
1670                                  *         var g_var2;
1671                                  * This is not a global variable:
1672                                  *         var g_var = function;
1673                                  * This is a global variable:
1674                                  *         var g_var = different_var_name;
1675                                  */
1676                                 fulltag = vStringNew ();
1677                                 if (vStringLength (token->scope) > 0)
1678                                 {
1679                                         vStringCopy(fulltag, token->scope);
1680                                         vStringCatS (fulltag, ".");
1681                                         vStringCatS (fulltag, vStringValue(token->string));
1682                                 }
1683                                 else
1684                                 {
1685                                         vStringCopy(fulltag, token->string);
1686                                 }
1687                                 if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) &&
1688                                                 ! stringListHas(ClassNames, vStringValue (fulltag)) )
1689                                 {
1690                                         makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL);
1691                                 }
1692                                 vStringDelete (fulltag);
1693                         }
1694                 }
1695
1696                 if (parenDepth > 0)
1697                 {
1698                         while (parenDepth > 0 && ! isType (token, TOKEN_EOF))
1699                         {
1700                                 if (isType (token, TOKEN_OPEN_PAREN))
1701                                         parenDepth++;
1702                                 else if (isType (token, TOKEN_CLOSE_PAREN))
1703                                         parenDepth--;
1704                                 readTokenFull (token, true, NULL);
1705                         }
1706                         if (isType (token, TOKEN_CLOSE_CURLY))
1707                                 is_terminated = false;
1708                 }
1709         }
1710
1711         /* if we aren't already at the cmd end, advance to it and check whether
1712          * the statement was terminated */
1713         if (! isType (token, TOKEN_CLOSE_CURLY) &&
1714             ! isType (token, TOKEN_SEMICOLON))
1715         {
1716                 /*
1717                  * Statements can be optionally terminated in the case of
1718                  * statement prior to a close curly brace as in the
1719                  * document.write line below:
1720                  *
1721                  * function checkForUpdate() {
1722                  *         if( 1==1 ) {
1723                  *                 document.write("hello from checkForUpdate<br>")
1724                  *         }
1725                  *         return 1;
1726                  * }
1727                  */
1728                 is_terminated = findCmdTerm (token, true);
1729         }
1730
1731 cleanUp:
1732         vStringCopy(token->scope, saveScope);
1733         deleteToken (name);
1734         deleteToken (secondary_name);
1735         deleteToken (method_body_token);
1736         vStringDelete(saveScope);
1737
1738         return is_terminated;
1739 }
1740
1741 static void parseUI5 (tokenInfo *const token)
1742 {
1743         tokenInfo *const name = newToken ();
1744         /*
1745          * SAPUI5 is built on top of jQuery.
1746          * It follows a standard format:
1747          *     sap.ui.controller("id.of.controller", {
1748          *         method_name : function... {
1749          *         },
1750          *
1751          *         method_name : function ... {
1752          *         }
1753          *     }
1754          *
1755          * Handle the parsing of the initial controller (and the
1756          * same for "view") and then allow the methods to be
1757          * parsed as usual.
1758          */
1759
1760         readToken (token);
1761
1762         if (isType (token, TOKEN_PERIOD))
1763         {
1764                 readToken (token);
1765                 while (! isType (token, TOKEN_OPEN_PAREN) &&
1766                            ! isType (token, TOKEN_EOF))
1767                 {
1768                         readToken (token);
1769                 }
1770                 readToken (token);
1771
1772                 if (isType (token, TOKEN_STRING))
1773                 {
1774                         copyToken(name, token);
1775                         readToken (token);
1776                 }
1777
1778                 if (isType (token, TOKEN_COMMA))
1779                         readToken (token);
1780
1781                 do
1782                 {
1783                         parseMethods (token, name);
1784                 } while (! isType (token, TOKEN_CLOSE_CURLY) &&
1785                                  ! isType (token, TOKEN_EOF));
1786         }
1787
1788         deleteToken (name);
1789 }
1790
1791 static bool parseLine (tokenInfo *const token, tokenInfo *const parent, bool is_inside_class)
1792 {
1793         bool is_terminated = true;
1794         /*
1795          * Detect the common statements, if, while, for, do, ...
1796          * This is necessary since the last statement within a block "{}"
1797          * can be optionally terminated.
1798          *
1799          * If the statement is not terminated, we need to tell
1800          * the calling routine to prevent reading an additional token
1801          * looking for the end of the statement.
1802          */
1803
1804         if (isType(token, TOKEN_KEYWORD))
1805         {
1806                 switch (token->keyword)
1807                 {
1808                         case KEYWORD_for:
1809                         case KEYWORD_while:
1810                         case KEYWORD_do:
1811                                 is_terminated = parseLoop (token, parent);
1812                                 break;
1813                         case KEYWORD_if:
1814                         case KEYWORD_else:
1815                         case KEYWORD_try:
1816                         case KEYWORD_catch:
1817                         case KEYWORD_finally:
1818                                 /* Common semantics */
1819                                 is_terminated = parseIf (token, parent);
1820                                 break;
1821                         case KEYWORD_switch:
1822                                 parseSwitch (token);
1823                                 break;
1824                         case KEYWORD_return:
1825                                 is_terminated = findCmdTerm (token, true);
1826                                 break;
1827                         default:
1828                                 is_terminated = parseStatement (token, parent, is_inside_class);
1829                                 break;
1830                 }
1831         }
1832         else
1833         {
1834                 /*
1835                  * Special case where single line statements may not be
1836                  * SEMICOLON terminated.  parseBlock needs to know this
1837                  * so that it does not read the next token.
1838                  */
1839                 is_terminated = parseStatement (token, parent, is_inside_class);
1840         }
1841         return is_terminated;
1842 }
1843
1844 static void parseJsFile (tokenInfo *const token)
1845 {
1846         do
1847         {
1848                 readToken (token);
1849
1850                 if (isType (token, TOKEN_KEYWORD) && token->keyword == KEYWORD_function)
1851                         parseFunction (token);
1852                 else if (isType (token, TOKEN_KEYWORD) && token->keyword == KEYWORD_sap)
1853                         parseUI5 (token);
1854                 else
1855                         parseLine (token, token, false);
1856         } while (! isType (token, TOKEN_EOF));
1857 }
1858
1859 static void initialize (const langType language)
1860 {
1861         Assert (ARRAY_SIZE (JsKinds) == JSTAG_COUNT);
1862         Lang_js = language;
1863 }
1864
1865 static void findJsTags (void)
1866 {
1867         tokenInfo *const token = newToken ();
1868
1869         ClassNames = stringListNew ();
1870         FunctionNames = stringListNew ();
1871         LastTokenType = TOKEN_UNDEFINED;
1872
1873         parseJsFile (token);
1874
1875         stringListDelete (ClassNames);
1876         stringListDelete (FunctionNames);
1877         ClassNames = NULL;
1878         FunctionNames = NULL;
1879         deleteToken (token);
1880 }
1881
1882 /* Create parser definition structure */
1883 extern parserDefinition* JavaScriptParser (void)
1884 {
1885         static const char *const extensions [] = { "js", NULL };
1886         parserDefinition *const def = parserNew ("JavaScript");
1887         def->extensions = extensions;
1888         /*
1889          * New definitions for parsing instead of regex
1890          */
1891         def->kinds              = JsKinds;
1892         def->kindCount  = ARRAY_SIZE (JsKinds);
1893         def->parser             = findJsTags;
1894         def->initialize = initialize;
1895         def->keywordTable = JsKeywordTable;
1896         def->keywordCount = ARRAY_SIZE (JsKeywordTable);
1897
1898         return def;
1899 }