ctags/parsers/jscript.c

   1 /*
   2  *       Copyright (c) 2003, Darren Hiebert
   3  *
   4  *       This source code is released for free distribution under the terms of the
   5  *       GNU General Public License version 2 or (at your option) any later version.
   6  *
   7  *       This module contains functions for generating tags for JavaScript language
   8  *       files.
   9  *
  10  *       Reference: http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf
  11  *
  12  *       This is a good reference for different forms of the function statement:
  13  *               http://www.permadi.com/tutorial/jsFunc/
  14  *   Another good reference:
  15  *       http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide
  16  */
  17
  18 /*
  19  *       INCLUDE FILES
  20  */
  21 #include "general.h"    /* must always come first */
  22 #include <ctype.h>      /* to define isalpha () */
  23 #include <string.h>
  24 #ifdef DEBUG
  25 #include <stdio.h>
  26 #endif
  27
  28 #include "debug.h"
  29 #include "mio.h"
  30 #include "keyword.h"
  31 #include "parse.h"
  32 #include "read.h"
  33 #include "routines.h"
  34 #include "vstring.h"
  35
  36 /*
  37  *       MACROS
  38  */
  39 #define isType(token,t)         (boolean) ((token)->type == (t))
  40 #define isKeyword(token,k)      (boolean) ((token)->keyword == (k))
  41
  42 /*
  43  *       DATA DECLARATIONS
  44  */
  45
  46 /*
  47  * Tracks class and function names already created
  48  */
  49 static stringList *ClassNames;
  50 static stringList *FunctionNames;
  51
  52 /*      Used to specify type of keyword.
  53 */
  54 typedef enum eKeywordId {
  55         KEYWORD_NONE = -1,
  56         KEYWORD_function,
  57         KEYWORD_capital_function,
  58         KEYWORD_capital_object,
  59         KEYWORD_prototype,
  60         KEYWORD_var,
  61         KEYWORD_let,
  62         KEYWORD_const,
  63         KEYWORD_new,
  64         KEYWORD_this,
  65         KEYWORD_for,
  66         KEYWORD_while,
  67         KEYWORD_do,
  68         KEYWORD_if,
  69         KEYWORD_else,
  70         KEYWORD_switch,
  71         KEYWORD_try,
  72         KEYWORD_catch,
  73         KEYWORD_finally,
  74         KEYWORD_sap,
  75         KEYWORD_return
  76 } keywordId;
  77
  78 typedef enum eTokenType {
  79         TOKEN_UNDEFINED,
  80         TOKEN_EOF,
  81         TOKEN_CHARACTER,
  82         TOKEN_CLOSE_PAREN,
  83         TOKEN_SEMICOLON,
  84         TOKEN_COLON,
  85         TOKEN_COMMA,
  86         TOKEN_KEYWORD,
  87         TOKEN_OPEN_PAREN,
  88         TOKEN_OPERATOR,
  89         TOKEN_IDENTIFIER,
  90         TOKEN_STRING,
  91         TOKEN_PERIOD,
  92         TOKEN_OPEN_CURLY,
  93         TOKEN_CLOSE_CURLY,
  94         TOKEN_EQUAL_SIGN,
  95         TOKEN_FORWARD_SLASH,
  96         TOKEN_OPEN_SQUARE,
  97         TOKEN_CLOSE_SQUARE,
  98         TOKEN_REGEXP,
  99         TOKEN_POSTFIX_OPERATOR,
 100         TOKEN_BINARY_OPERATOR
 101 } tokenType;
 102
 103 typedef struct sTokenInfo {
 104         tokenType               type;
 105         keywordId               keyword;
 106         vString *               string;
 107         vString *               scope;
 108         unsigned long   lineNumber;
 109         MIOPos                  filePosition;
 110         int                             nestLevel;
 111         boolean                 ignoreTag;
 112 } tokenInfo;
 113
 114 /*
 115  *      DATA DEFINITIONS
 116  */
 117
 118 static tokenType LastTokenType;
 119
 120 static langType Lang_js;
 121
 122 typedef enum {
 123         JSTAG_FUNCTION,
 124         JSTAG_CLASS,
 125         JSTAG_METHOD,
 126         JSTAG_PROPERTY,
 127         JSTAG_CONSTANT,
 128         JSTAG_VARIABLE,
 129         JSTAG_COUNT
 130 } jsKind;
 131
 132 static kindOption JsKinds [] = {
 133         { TRUE,  'f', "function",         "functions"              },
 134         { TRUE,  'c', "class",            "classes"                        },
 135         { TRUE,  'm', "method",           "methods"                        },
 136         { TRUE,  'p', "property",         "properties"             },
 137         { TRUE,  'C', "constant",         "constants"              },
 138         { TRUE,  'v', "variable",         "global variables"   }
 139 };
 140
 141 static const keywordTable JsKeywordTable [] = {
 142         /* keyword              keyword ID */
 143         { "function",   KEYWORD_function                        },
 144         { "Function",   KEYWORD_capital_function        },
 145         { "Object",             KEYWORD_capital_object          },
 146         { "prototype",  KEYWORD_prototype                       },
 147         { "var",                KEYWORD_var                                     },
 148         { "let",                KEYWORD_let                                     },
 149         { "const",              KEYWORD_const                           },
 150         { "new",                KEYWORD_new                                     },
 151         { "this",               KEYWORD_this                            },
 152         { "for",                KEYWORD_for                                     },
 153         { "while",              KEYWORD_while                           },
 154         { "do",                 KEYWORD_do                                      },
 155         { "if",                 KEYWORD_if                                      },
 156         { "else",               KEYWORD_else                            },
 157         { "switch",             KEYWORD_switch                          },
 158         { "try",                KEYWORD_try                                     },
 159         { "catch",              KEYWORD_catch                           },
 160         { "finally",    KEYWORD_finally                         },
 161         { "sap",            KEYWORD_sap                                 },
 162         { "return",             KEYWORD_return                          }
 163 };
 164
 165 /*
 166  *       FUNCTION DEFINITIONS
 167  */
 168
 169 /* Recursive functions */
 170 static void parseFunction (tokenInfo *const token);
 171 static boolean parseBlock (tokenInfo *const token, tokenInfo *const orig_parent);
 172 static boolean parseLine (tokenInfo *const token, tokenInfo *const parent, boolean is_inside_class);
 173 static void parseUI5 (tokenInfo *const token);
 174
 175 static boolean isIdentChar (const int c)
 176 {
 177         return (boolean)
 178                 (isalpha (c) || isdigit (c) || c == '$' ||
 179                  c == '@' || c == '_' || c == '#');
 180 }
 181
 182 static tokenInfo *newToken (void)
 183 {
 184         tokenInfo *const token = xMalloc (1, tokenInfo);
 185
 186         token->type                     = TOKEN_UNDEFINED;
 187         token->keyword          = KEYWORD_NONE;
 188         token->string           = vStringNew ();
 189         token->scope            = vStringNew ();
 190         token->nestLevel        = 0;
 191         token->ignoreTag        = FALSE;
 192         token->lineNumber   = getInputLineNumber ();
 193         token->filePosition = getInputFilePosition ();
 194
 195         return token;
 196 }
 197
 198 static void deleteToken (tokenInfo *const token)
 199 {
 200         vStringDelete (token->string);
 201         vStringDelete (token->scope);
 202         eFree (token);
 203 }
 204
 205 /*
 206  *       Tag generation functions
 207  */
 208
 209 static void makeJsTag (tokenInfo *const token, const jsKind kind, vString *const signature)
 210 {
 211         if (JsKinds [kind].enabled && ! token->ignoreTag )
 212         {
 213                 const char *name = vStringValue (token->string);
 214                 vString *fullscope = vStringNewCopy (token->scope);
 215                 const char *p;
 216                 tagEntryInfo e;
 217
 218                 if ((p = strrchr (name, '.')) != NULL)
 219                 {
 220                         if (vStringLength (fullscope) > 0)
 221                                 vStringPut (fullscope, '.');
 222                         vStringNCatS (fullscope, name, p - name);
 223                         name = p + 1;
 224                 }
 225
 226                 initTagEntry (&e, name, &(JsKinds [kind]));
 227
 228                 e.lineNumber   = token->lineNumber;
 229                 e.filePosition = token->filePosition;
 230
 231                 if ( vStringLength(fullscope) > 0 )
 232                 {
 233                         jsKind parent_kind = JSTAG_CLASS;
 234
 235                         /*
 236                          * If we're creating a function (and not a method),
 237                          * guess we're inside another function
 238                          */
 239                         if (kind == JSTAG_FUNCTION)
 240                                 parent_kind = JSTAG_FUNCTION;
 241
 242                         e.extensionFields.scopeKind = &(JsKinds [parent_kind]);
 243                         e.extensionFields.scopeName = vStringValue (fullscope);
 244                 }
 245
 246                 if (signature && vStringLength(signature))
 247                 {
 248                         size_t i;
 249                         /* sanitize signature by replacing all control characters with a
 250                          * space (because it's simple).
 251                          * there should never be any junk in a valid signature, but who
 252                          * knows what the user wrote and CTags doesn't cope well with weird
 253                          * characters. */
 254                         for (i = 0; i < signature->length; i++)
 255                         {
 256                                 unsigned char c = (unsigned char) signature->buffer[i];
 257                                 if (c < 0x20 /* below space */ || c == 0x7F /* DEL */)
 258                                         signature->buffer[i] = ' ';
 259                         }
 260                         e.extensionFields.signature = vStringValue(signature);
 261                 }
 262
 263                 makeTagEntry (&e);
 264                 vStringDelete (fullscope);
 265         }
 266 }
 267
 268 static void makeClassTag (tokenInfo *const token, vString *const signature)
 269 {
 270         vString *       fulltag;
 271
 272         if ( ! token->ignoreTag )
 273         {
 274                 fulltag = vStringNew ();
 275                 if (vStringLength (token->scope) > 0)
 276                 {
 277                         vStringCopy(fulltag, token->scope);
 278                         vStringCatS (fulltag, ".");
 279                         vStringCatS (fulltag, vStringValue(token->string));
 280                 }
 281                 else
 282                 {
 283                         vStringCopy(fulltag, token->string);
 284                 }
 285                 vStringTerminate(fulltag);
 286                 if ( ! stringListHas(ClassNames, vStringValue (fulltag)) )
 287                 {
 288                         stringListAdd (ClassNames, vStringNewCopy (fulltag));
 289                         makeJsTag (token, JSTAG_CLASS, signature);
 290                 }
 291                 vStringDelete (fulltag);
 292         }
 293 }
 294
 295 static void makeFunctionTag (tokenInfo *const token, vString *const signature)
 296 {
 297         vString *       fulltag;
 298
 299         if ( ! token->ignoreTag )
 300         {
 301                 fulltag = vStringNew ();
 302                 if (vStringLength (token->scope) > 0)
 303                 {
 304                         vStringCopy(fulltag, token->scope);
 305                         vStringCatS (fulltag, ".");
 306                         vStringCatS (fulltag, vStringValue(token->string));
 307                 }
 308                 else
 309                 {
 310                         vStringCopy(fulltag, token->string);
 311                 }
 312                 vStringTerminate(fulltag);
 313                 if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) )
 314                 {
 315                         stringListAdd (FunctionNames, vStringNewCopy (fulltag));
 316                         makeJsTag (token, JSTAG_FUNCTION, signature);
 317                 }
 318                 vStringDelete (fulltag);
 319         }
 320 }
 321
 322 /*
 323  *       Parsing functions
 324  */
 325
 326 static int skipToCharacter (const int c)
 327 {
 328         int d;
 329         do
 330         {
 331                 d = getcFromInputFile ();
 332         } while (d != EOF  &&  d != c);
 333         return d;
 334 }
 335
 336 static void parseString (vString *const string, const int delimiter)
 337 {
 338         boolean end = FALSE;
 339         while (! end)
 340         {
 341                 int c = getcFromInputFile ();
 342                 if (c == EOF)
 343                         end = TRUE;
 344                 else if (c == '\\')
 345                 {
 346                         /* Eat the escape sequence (\", \', etc).  We properly handle
 347                          * <LineContinuation> by eating a whole \<CR><LF> not to see <LF>
 348                          * as an unescaped character, which is invalid and handled below.
 349                          * Also, handle the fact that <LineContinuation> produces an empty
 350                          * sequence.
 351                          * See ECMA-262 7.8.4 */
 352                         c = getcFromInputFile ();
 353                         if (c != '\r' && c != '\n')
 354                                 vStringPut(string, c);
 355                         else if (c == '\r')
 356                         {
 357                                 c = getcFromInputFile();
 358                                 if (c != '\n')
 359                                         ungetcToInputFile (c);
 360                         }
 361                 }
 362                 else if (c == delimiter)
 363                         end = TRUE;
 364                 else if (c == '\r' || c == '\n')
 365                 {
 366                         /* those are invalid when not escaped */
 367                         end = TRUE;
 368                         /* we don't want to eat the newline itself to let the automatic
 369                          * semicolon insertion code kick in */
 370                         ungetcToInputFile (c);
 371                 }
 372                 else
 373                         vStringPut (string, c);
 374         }
 375         vStringTerminate (string);
 376 }
 377
 378 static void parseRegExp (void)
 379 {
 380         int c;
 381         boolean in_range = FALSE;
 382
 383         do
 384         {
 385                 c = getcFromInputFile ();
 386                 if (! in_range && c == '/')
 387                 {
 388                         do /* skip flags */
 389                         {
 390                                 c = getcFromInputFile ();
 391                         } while (isalpha (c));
 392                         ungetcToInputFile (c);
 393                         break;
 394                 }
 395                 else if (c == '\\')
 396                         c = getcFromInputFile (); /* skip next character */
 397                 else if (c == '[')
 398                         in_range = TRUE;
 399                 else if (c == ']')
 400                         in_range = FALSE;
 401         } while (c != EOF);
 402 }
 403
 404 /*      Read a C identifier beginning with "firstChar" and places it into
 405  *      "name".
 406  */
 407 static void parseIdentifier (vString *const string, const int firstChar)
 408 {
 409         int c = firstChar;
 410         Assert (isIdentChar (c));
 411         do
 412         {
 413                 vStringPut (string, c);
 414                 c = getcFromInputFile ();
 415         } while (isIdentChar (c));
 416         vStringTerminate (string);
 417         ungetcToInputFile (c);          /* unget non-identifier character */
 418 }
 419
 420 static keywordId analyzeToken (vString *const name)
 421 {
 422         vString *keyword = vStringNew ();
 423         keywordId result;
 424         vStringCopyToLower (keyword, name);
 425         result = (keywordId) lookupKeyword (vStringValue (keyword), Lang_js);
 426         vStringDelete (keyword);
 427         return result;
 428 }
 429
 430 static void readTokenFull (tokenInfo *const token, boolean include_newlines, vString *const repr)
 431 {
 432         int c;
 433         int i;
 434
 435         token->type                     = TOKEN_UNDEFINED;
 436         token->keyword          = KEYWORD_NONE;
 437         vStringClear (token->string);
 438
 439 getNextChar:
 440         i = 0;
 441         do
 442         {
 443                 c = getcFromInputFile ();
 444                 i++;
 445         }
 446         while (c == '\t'  ||  c == ' ' ||
 447                    ((c == '\r' || c == '\n') && ! include_newlines));
 448
 449         token->lineNumber   = getInputLineNumber ();
 450         token->filePosition = getInputFilePosition ();
 451
 452         if (repr)
 453         {
 454                 if (i > 1)
 455                         vStringPut (repr, ' ');
 456                 vStringPut (repr, c);
 457         }
 458
 459         switch (c)
 460         {
 461                 case EOF: token->type = TOKEN_EOF;                                      break;
 462                 case '(': token->type = TOKEN_OPEN_PAREN;                       break;
 463                 case ')': token->type = TOKEN_CLOSE_PAREN;                      break;
 464                 case ';': token->type = TOKEN_SEMICOLON;                        break;
 465                 case ',': token->type = TOKEN_COMMA;                            break;
 466                 case '.': token->type = TOKEN_PERIOD;                           break;
 467                 case ':': token->type = TOKEN_COLON;                            break;
 468                 case '{': token->type = TOKEN_OPEN_CURLY;                       break;
 469                 case '}': token->type = TOKEN_CLOSE_CURLY;                      break;
 470                 case '=': token->type = TOKEN_EQUAL_SIGN;                       break;
 471                 case '[': token->type = TOKEN_OPEN_SQUARE;                      break;
 472                 case ']': token->type = TOKEN_CLOSE_SQUARE;                     break;
 473
 474                 case '+':
 475                 case '-':
 476                         {
 477                                 int d = getcFromInputFile ();
 478                                 if (d == c) /* ++ or -- */
 479                                         token->type = TOKEN_POSTFIX_OPERATOR;
 480                                 else
 481                                 {
 482                                         ungetcToInputFile (d);
 483                                         token->type = TOKEN_BINARY_OPERATOR;
 484                                 }
 485                                 break;
 486                         }
 487
 488                 case '*':
 489                 case '%':
 490                 case '?':
 491                 case '>':
 492                 case '<':
 493                 case '^':
 494                 case '|':
 495                 case '&':
 496                         token->type = TOKEN_BINARY_OPERATOR;
 497                         break;
 498
 499                 case '\r':
 500                 case '\n':
 501                         /* This isn't strictly correct per the standard, but following the
 502                          * real rules means understanding all statements, and that's not
 503                          * what the parser currently does.  What we do here is a guess, by
 504                          * avoiding inserting semicolons that would make the statement on
 505                          * the left invalid.  Hopefully this should not have false negatives
 506                          * (e.g. should not miss insertion of a semicolon) but might have
 507                          * false positives (e.g. it will wrongfully emit a semicolon for the
 508                          * newline in "foo\n+bar").
 509                          * This should however be mostly harmless as we only deal with
 510                          * newlines in specific situations where we know a false positive
 511                          * wouldn't hurt too bad. */
 512                         switch (LastTokenType)
 513                         {
 514                                 /* these cannot be the end of a statement, so hold the newline */
 515                                 case TOKEN_EQUAL_SIGN:
 516                                 case TOKEN_COLON:
 517                                 case TOKEN_PERIOD:
 518                                 case TOKEN_FORWARD_SLASH:
 519                                 case TOKEN_BINARY_OPERATOR:
 520                                 /* and these already end one, no need to duplicate it */
 521                                 case TOKEN_SEMICOLON:
 522                                 case TOKEN_COMMA:
 523                                 case TOKEN_CLOSE_CURLY:
 524                                 case TOKEN_OPEN_CURLY:
 525                                         include_newlines = FALSE; /* no need to recheck */
 526                                         goto getNextChar;
 527                                         break;
 528                                 default:
 529                                         token->type = TOKEN_SEMICOLON;
 530                         }
 531                         break;
 532
 533                 case '\'':
 534                 case '"':
 535                                   token->type = TOKEN_STRING;
 536                                   parseString (token->string, c);
 537                                   token->lineNumber = getInputLineNumber ();
 538                                   token->filePosition = getInputFilePosition ();
 539                                   if (repr)
 540                                   {
 541                                           vStringCat (repr, token->string);
 542                                           vStringPut (repr, c);
 543                                   }
 544                                   break;
 545
 546                 case '\\':
 547                                   c = getcFromInputFile ();
 548                                   if (c != '\\'  && c != '"'  &&  !isspace (c))
 549                                           ungetcToInputFile (c);
 550                                   token->type = TOKEN_CHARACTER;
 551                                   token->lineNumber = getInputLineNumber ();
 552                                   token->filePosition = getInputFilePosition ();
 553                                   break;
 554
 555                 case '/':
 556                                   {
 557                                           int d = getcFromInputFile ();
 558                                           if ( (d != '*') &&            /* is this the start of a comment? */
 559                                                           (d != '/') )          /* is a one line comment? */
 560                                           {
 561                                                   ungetcToInputFile (d);
 562                                                   switch (LastTokenType)
 563                                                   {
 564                                                           case TOKEN_CHARACTER:
 565                                                           case TOKEN_IDENTIFIER:
 566                                                           case TOKEN_STRING:
 567                                                           case TOKEN_CLOSE_CURLY:
 568                                                           case TOKEN_CLOSE_PAREN:
 569                                                           case TOKEN_CLOSE_SQUARE:
 570                                                                   token->type = TOKEN_FORWARD_SLASH;
 571                                                                   break;
 572
 573                                                           default:
 574                                                                   token->type = TOKEN_REGEXP;
 575                                                                   parseRegExp ();
 576                                                                   token->lineNumber = getInputLineNumber ();
 577                                                                   token->filePosition = getInputFilePosition ();
 578                                                                   break;
 579                                                   }
 580                                           }
 581                                           else
 582                                           {
 583                                                   if (repr) /* remove the / we added */
 584                                                           repr->buffer[--repr->length] = 0;
 585                                                   if (d == '*')
 586                                                   {
 587                                                           do
 588                                                           {
 589                                                                   skipToCharacter ('*');
 590                                                                   c = getcFromInputFile ();
 591                                                                   if (c == '/')
 592                                                                           break;
 593                                                                   else
 594                                                                           ungetcToInputFile (c);
 595                                                           } while (c != EOF && c != '\0');
 596                                                           goto getNextChar;
 597                                                   }
 598                                                   else if (d == '/')    /* is this the start of a comment?  */
 599                                                   {
 600                                                           skipToCharacter ('\n');
 601                                                           /* if we care about newlines, put it back so it is seen */
 602                                                           if (include_newlines)
 603                                                                   ungetcToInputFile ('\n');
 604                                                           goto getNextChar;
 605                                                   }
 606                                           }
 607                                           break;
 608                                   }
 609
 610                 case '#':
 611                                   /* skip shebang in case of e.g. Node.js scripts */
 612                                   if (token->lineNumber > 1)
 613                                           token->type = TOKEN_UNDEFINED;
 614                                   else if ((c = getcFromInputFile ()) != '!')
 615                                   {
 616                                           ungetcToInputFile (c);
 617                                           token->type = TOKEN_UNDEFINED;
 618                                   }
 619                                   else
 620                                   {
 621                                           skipToCharacter ('\n');
 622                                           goto getNextChar;
 623                                   }
 624                                   break;
 625
 626                 default:
 627                                   if (! isIdentChar (c))
 628                                           token->type = TOKEN_UNDEFINED;
 629                                   else
 630                                   {
 631                                           parseIdentifier (token->string, c);
 632                                           token->lineNumber = getInputLineNumber ();
 633                                           token->filePosition = getInputFilePosition ();
 634                                           token->keyword = analyzeToken (token->string);
 635                                           if (isKeyword (token, KEYWORD_NONE))
 636                                                   token->type = TOKEN_IDENTIFIER;
 637                                           else
 638                                                   token->type = TOKEN_KEYWORD;
 639                                           if (repr && vStringLength (token->string) > 1)
 640                                                   vStringCatS (repr, vStringValue (token->string) + 1);
 641                                   }
 642                                   break;
 643         }
 644
 645         LastTokenType = token->type;
 646 }
 647
 648 static void readToken (tokenInfo *const token)
 649 {
 650         readTokenFull (token, FALSE, NULL);
 651 }
 652
 653 static void copyToken (tokenInfo *const dest, tokenInfo *const src)
 654 {
 655         dest->nestLevel = src->nestLevel;
 656         dest->lineNumber = src->lineNumber;
 657         dest->filePosition = src->filePosition;
 658         dest->type = src->type;
 659         dest->keyword = src->keyword;
 660         vStringCopy(dest->string, src->string);
 661         vStringCopy(dest->scope, src->scope);
 662 }
 663
 664 /*
 665  *       Token parsing functions
 666  */
 667
 668 static void skipArgumentList (tokenInfo *const token, boolean include_newlines, vString *const repr)
 669 {
 670         int nest_level = 0;
 671
 672         if (isType (token, TOKEN_OPEN_PAREN))   /* arguments? */
 673         {
 674                 nest_level++;
 675                 if (repr)
 676                         vStringPut (repr, '(');
 677                 while (nest_level > 0 && ! isType (token, TOKEN_EOF))
 678                 {
 679                         readTokenFull (token, FALSE, repr);
 680                         if (isType (token, TOKEN_OPEN_PAREN))
 681                                 nest_level++;
 682                         else if (isType (token, TOKEN_CLOSE_PAREN))
 683                                 nest_level--;
 684                 }
 685                 readTokenFull (token, include_newlines, NULL);
 686         }
 687 }
 688
 689 static void skipArrayList (tokenInfo *const token, boolean include_newlines)
 690 {
 691         int nest_level = 0;
 692
 693         /*
 694          * Handle square brackets
 695          *       var name[1]
 696          * So we must check for nested open and closing square brackets
 697          */
 698
 699         if (isType (token, TOKEN_OPEN_SQUARE))  /* arguments? */
 700         {
 701                 nest_level++;
 702                 while (nest_level > 0 && ! isType (token, TOKEN_EOF))
 703                 {
 704                         readToken (token);
 705                         if (isType (token, TOKEN_OPEN_SQUARE))
 706                                 nest_level++;
 707                         else if (isType (token, TOKEN_CLOSE_SQUARE))
 708                                 nest_level--;
 709                 }
 710                 readTokenFull (token, include_newlines, NULL);
 711         }
 712 }
 713
 714 static void addContext (tokenInfo* const parent, const tokenInfo* const child)
 715 {
 716         if (vStringLength (parent->string) > 0)
 717         {
 718                 vStringCatS (parent->string, ".");
 719         }
 720         vStringCatS (parent->string, vStringValue(child->string));
 721         vStringTerminate(parent->string);
 722 }
 723
 724 static void addToScope (tokenInfo* const token, vString* const extra)
 725 {
 726         if (vStringLength (token->scope) > 0)
 727         {
 728                 vStringCatS (token->scope, ".");
 729         }
 730         vStringCatS (token->scope, vStringValue(extra));
 731         vStringTerminate(token->scope);
 732 }
 733
 734 /*
 735  *       Scanning functions
 736  */
 737
 738 static boolean findCmdTerm (tokenInfo *const token, boolean include_newlines)
 739 {
 740         /*
 741          * Read until we find either a semicolon or closing brace.
 742          * Any nested braces will be handled within.
 743          */
 744         while (! isType (token, TOKEN_SEMICOLON) &&
 745                    ! isType (token, TOKEN_CLOSE_CURLY) &&
 746                    ! isType (token, TOKEN_EOF))
 747         {
 748                 /* Handle nested blocks */
 749                 if ( isType (token, TOKEN_OPEN_CURLY))
 750                 {
 751                         parseBlock (token, token);
 752                         readTokenFull (token, include_newlines, NULL);
 753                 }
 754                 else if ( isType (token, TOKEN_OPEN_PAREN) )
 755                 {
 756                         skipArgumentList(token, include_newlines, NULL);
 757                 }
 758                 else if ( isType (token, TOKEN_OPEN_SQUARE) )
 759                 {
 760                         skipArrayList(token, include_newlines);
 761                 }
 762                 else
 763                 {
 764                         readTokenFull (token, include_newlines, NULL);
 765                 }
 766         }
 767
 768         return isType (token, TOKEN_SEMICOLON);
 769 }
 770
 771 static void parseSwitch (tokenInfo *const token)
 772 {
 773         /*
 774          * switch (expression) {
 775          * case value1:
 776          *         statement;
 777          *         break;
 778          * case value2:
 779          *         statement;
 780          *         break;
 781          * default : statement;
 782          * }
 783          */
 784
 785         readToken (token);
 786
 787         if (isType (token, TOKEN_OPEN_PAREN))
 788         {
 789                 /*
 790                  * Handle nameless functions, these will only
 791                  * be considered methods.
 792                  */
 793                 skipArgumentList(token, FALSE, NULL);
 794         }
 795
 796         if (isType (token, TOKEN_OPEN_CURLY))
 797         {
 798                 parseBlock (token, token);
 799         }
 800 }
 801
 802 static boolean parseLoop (tokenInfo *const token, tokenInfo *const parent)
 803 {
 804         /*
 805          * Handles these statements
 806          *         for (x=0; x<3; x++)
 807          *                 document.write("This text is repeated three times<br>");
 808          *
 809          *         for (x=0; x<3; x++)
 810          *         {
 811          *                 document.write("This text is repeated three times<br>");
 812          *         }
 813          *
 814          *         while (number<5){
 815          *                 document.write(number+"<br>");
 816          *                 number++;
 817          *         }
 818          *
 819          *         do{
 820          *                 document.write(number+"<br>");
 821          *                 number++;
 822          *         }
 823          *         while (number<5);
 824          */
 825         boolean is_terminated = TRUE;
 826
 827         if (isKeyword (token, KEYWORD_for) || isKeyword (token, KEYWORD_while))
 828         {
 829                 readToken(token);
 830
 831                 if (isType (token, TOKEN_OPEN_PAREN))
 832                 {
 833                         /*
 834                          * Handle nameless functions, these will only
 835                          * be considered methods.
 836                          */
 837                         skipArgumentList(token, FALSE, NULL);
 838                 }
 839
 840                 if (isType (token, TOKEN_OPEN_CURLY))
 841                 {
 842                         /*
 843                          * This will be either a function or a class.
 844                          * We can only determine this by checking the body
 845                          * of the function.  If we find a "this." we know
 846                          * it is a class, otherwise it is a function.
 847                          */
 848                         parseBlock (token, parent);
 849                 }
 850                 else
 851                 {
 852                         is_terminated = parseLine(token, parent, FALSE);
 853                 }
 854         }
 855         else if (isKeyword (token, KEYWORD_do))
 856         {
 857                 readToken(token);
 858
 859                 if (isType (token, TOKEN_OPEN_CURLY))
 860                 {
 861                         /*
 862                          * This will be either a function or a class.
 863                          * We can only determine this by checking the body
 864                          * of the function.  If we find a "this." we know
 865                          * it is a class, otherwise it is a function.
 866                          */
 867                         parseBlock (token, parent);
 868                 }
 869                 else
 870                 {
 871                         is_terminated = parseLine(token, parent, FALSE);
 872                 }
 873
 874                 if (is_terminated)
 875                         readToken(token);
 876
 877                 if (isKeyword (token, KEYWORD_while))
 878                 {
 879                         readToken(token);
 880
 881                         if (isType (token, TOKEN_OPEN_PAREN))
 882                         {
 883                                 /*
 884                                  * Handle nameless functions, these will only
 885                                  * be considered methods.
 886                                  */
 887                                 skipArgumentList(token, TRUE, NULL);
 888                         }
 889                         if (! isType (token, TOKEN_SEMICOLON))
 890                                 is_terminated = FALSE;
 891                 }
 892         }
 893
 894         return is_terminated;
 895 }
 896
 897 static boolean parseIf (tokenInfo *const token, tokenInfo *const parent)
 898 {
 899         boolean read_next_token = TRUE;
 900         /*
 901          * If statements have two forms
 902          *         if ( ... )
 903          *                 one line;
 904          *
 905          *         if ( ... )
 906          *                statement;
 907          *         else
 908          *                statement
 909          *
 910          *         if ( ... ) {
 911          *                multiple;
 912          *                statements;
 913          *         }
 914          *
 915          *
 916          *         if ( ... ) {
 917          *                return elem
 918          *         }
 919          *
 920          *     This example if correctly written, but the
 921          *     else contains only 1 statement without a terminator
 922          *     since the function finishes with the closing brace.
 923          *
 924      *     function a(flag){
 925      *         if(flag)
 926      *             test(1);
 927      *         else
 928      *             test(2)
 929      *     }
 930          *
 931          * TODO:  Deal with statements that can optional end
 932          *                without a semi-colon.  Currently this messes up
 933          *                the parsing of blocks.
 934          *                Need to somehow detect this has happened, and either
 935          *                backup a token, or skip reading the next token if
 936          *                that is possible from all code locations.
 937          *
 938          */
 939
 940         readToken (token);
 941
 942         if (isKeyword (token, KEYWORD_if))
 943         {
 944                 /*
 945                  * Check for an "else if" and consume the "if"
 946                  */
 947                 readToken (token);
 948         }
 949
 950         if (isType (token, TOKEN_OPEN_PAREN))
 951         {
 952                 /*
 953                  * Handle nameless functions, these will only
 954                  * be considered methods.
 955                  */
 956                 skipArgumentList(token, FALSE, NULL);
 957         }
 958
 959         if (isType (token, TOKEN_OPEN_CURLY))
 960         {
 961                 /*
 962                  * This will be either a function or a class.
 963                  * We can only determine this by checking the body
 964                  * of the function.  If we find a "this." we know
 965                  * it is a class, otherwise it is a function.
 966                  */
 967                 parseBlock (token, parent);
 968         }
 969         else
 970         {
 971                 /* The next token should only be read if this statement had its own
 972                  * terminator */
 973                 read_next_token = findCmdTerm (token, TRUE);
 974         }
 975         return read_next_token;
 976 }
 977
 978 static void parseFunction (tokenInfo *const token)
 979 {
 980         tokenInfo *const name = newToken ();
 981         vString *const signature = vStringNew ();
 982         boolean is_class = FALSE;
 983
 984         /*
 985          * This deals with these formats
 986          *         function validFunctionTwo(a,b) {}
 987          */
 988
 989         readToken (name);
 990         /* Add scope in case this is an INNER function */
 991         addToScope(name, token->scope);
 992
 993         readToken (token);
 994         while (isType (token, TOKEN_PERIOD))
 995         {
 996                 readToken (token);
 997                 if ( isKeyword(token, KEYWORD_NONE) )
 998                 {
 999                         addContext (name, token);
1000                         readToken (token);
1001                 }
1002         }
1003
1004         if ( isType (token, TOKEN_OPEN_PAREN) )
1005                 skipArgumentList(token, FALSE, signature);
1006
1007         if ( isType (token, TOKEN_OPEN_CURLY) )
1008         {
1009                 is_class = parseBlock (token, name);
1010                 if ( is_class )
1011                         makeClassTag (name, signature);
1012                 else
1013                         makeFunctionTag (name, signature);
1014         }
1015
1016         findCmdTerm (token, FALSE);
1017
1018         vStringDelete (signature);
1019         deleteToken (name);
1020 }
1021
1022 static boolean parseBlock (tokenInfo *const token, tokenInfo *const orig_parent)
1023 {
1024         boolean is_class = FALSE;
1025         boolean read_next_token = TRUE;
1026         vString * saveScope = vStringNew ();
1027         tokenInfo *const parent = newToken ();
1028
1029         /* backup the parent token to allow calls like parseBlock(token, token) */
1030         copyToken (parent, orig_parent);
1031
1032         token->nestLevel++;
1033         /*
1034          * Make this routine a bit more forgiving.
1035          * If called on an open_curly advance it
1036          */
1037         if ( isType (token, TOKEN_OPEN_CURLY) &&
1038                         isKeyword(token, KEYWORD_NONE) )
1039                 readToken(token);
1040
1041         if (! isType (token, TOKEN_CLOSE_CURLY))
1042         {
1043                 /*
1044                  * Read until we find the closing brace,
1045                  * any nested braces will be handled within
1046                  */
1047                 do
1048                 {
1049                         read_next_token = TRUE;
1050                         if (isKeyword (token, KEYWORD_this))
1051                         {
1052                                 /*
1053                                  * Means we are inside a class and have found
1054                                  * a class, not a function
1055                                  */
1056                                 is_class = TRUE;
1057                                 vStringCopy(saveScope, token->scope);
1058                                 addToScope (token, parent->string);
1059
1060                                 /*
1061                                  * Ignore the remainder of the line
1062                                  * findCmdTerm(token);
1063                                  */
1064                                 read_next_token = parseLine (token, parent, is_class);
1065
1066                                 vStringCopy(token->scope, saveScope);
1067                         }
1068                         else if (isKeyword (token, KEYWORD_var) ||
1069                                          isKeyword (token, KEYWORD_let) ||
1070                                          isKeyword (token, KEYWORD_const))
1071                         {
1072                                 /*
1073                                  * Potentially we have found an inner function.
1074                                  * Set something to indicate the scope
1075                                  */
1076                                 vStringCopy(saveScope, token->scope);
1077                                 addToScope (token, parent->string);
1078                                 read_next_token = parseLine (token, parent, is_class);
1079                                 vStringCopy(token->scope, saveScope);
1080                         }
1081                         else if (isKeyword (token, KEYWORD_function))
1082                         {
1083                                 vStringCopy(saveScope, token->scope);
1084                                 addToScope (token, parent->string);
1085                                 parseFunction (token);
1086                                 vStringCopy(token->scope, saveScope);
1087                         }
1088                         else if (isType (token, TOKEN_OPEN_CURLY))
1089                         {
1090                                 /* Handle nested blocks */
1091                                 parseBlock (token, parent);
1092                         }
1093                         else
1094                         {
1095                                 /*
1096                                  * It is possible for a line to have no terminator
1097                                  * if the following line is a closing brace.
1098                                  * parseLine will detect this case and indicate
1099                                  * whether we should read an additional token.
1100                                  */
1101                                 read_next_token = parseLine (token, parent, is_class);
1102                         }
1103
1104                         /*
1105                          * Always read a new token unless we find a statement without
1106                          * a ending terminator
1107                          */
1108                         if( read_next_token )
1109                                 readToken(token);
1110
1111                         /*
1112                          * If we find a statement without a terminator consider the
1113                          * block finished, otherwise the stack will be off by one.
1114                          */
1115                 } while (! isType (token, TOKEN_EOF) &&
1116                                  ! isType (token, TOKEN_CLOSE_CURLY) && read_next_token);
1117         }
1118
1119         deleteToken (parent);
1120         vStringDelete(saveScope);
1121         token->nestLevel--;
1122
1123         return is_class;
1124 }
1125
1126 static boolean parseMethods (tokenInfo *const token, tokenInfo *const class)
1127 {
1128         tokenInfo *const name = newToken ();
1129         boolean has_methods = FALSE;
1130
1131         /*
1132          * This deals with these formats
1133          *         validProperty  : 2,
1134          *         validMethod    : function(a,b) {}
1135          *         'validMethod2' : function(a,b) {}
1136      *     container.dirtyTab = {'url': false, 'title':false, 'snapshot':false, '*': false}
1137          */
1138
1139         do
1140         {
1141                 readToken (token);
1142                 if (isType (token, TOKEN_CLOSE_CURLY))
1143                 {
1144                         /*
1145                          * This was most likely a variable declaration of a hash table.
1146                          * indicate there were no methods and return.
1147                          */
1148                         has_methods = FALSE;
1149                         goto cleanUp;
1150                 }
1151
1152                 if (isType (token, TOKEN_STRING) || isKeyword(token, KEYWORD_NONE))
1153                 {
1154                         copyToken(name, token);
1155
1156                         readToken (token);
1157                         if ( isType (token, TOKEN_COLON) )
1158                         {
1159                                 readToken (token);
1160                                 if ( isKeyword (token, KEYWORD_function) )
1161                                 {
1162                                         vString *const signature = vStringNew ();
1163
1164                                         readToken (token);
1165                                         if ( isType (token, TOKEN_OPEN_PAREN) )
1166                                         {
1167                                                 skipArgumentList(token, FALSE, signature);
1168                                         }
1169
1170                                         if (isType (token, TOKEN_OPEN_CURLY))
1171                                         {
1172                                                 has_methods = TRUE;
1173                                                 addToScope (name, class->string);
1174                                                 makeJsTag (name, JSTAG_METHOD, signature);
1175                                                 parseBlock (token, name);
1176
1177                                                 /*
1178                                                  * Read to the closing curly, check next
1179                                                  * token, if a comma, we must loop again
1180                                                  */
1181                                                 readToken (token);
1182                                         }
1183
1184                                         vStringDelete (signature);
1185                                 }
1186                                 else
1187                                 {
1188                                                 vString * saveScope = vStringNew ();
1189                                                 boolean has_child_methods = FALSE;
1190
1191                                                 /* skip whatever is the value */
1192                                                 while (! isType (token, TOKEN_COMMA) &&
1193                                                        ! isType (token, TOKEN_CLOSE_CURLY) &&
1194                                                        ! isType (token, TOKEN_EOF))
1195                                                 {
1196                                                         if (isType (token, TOKEN_OPEN_CURLY))
1197                                                         {
1198                                                                 /* Recurse to find child properties/methods */
1199                                                                 vStringCopy (saveScope, token->scope);
1200                                                                 addToScope (token, class->string);
1201                                                                 has_child_methods = parseMethods (token, name);
1202                                                                 vStringCopy (token->scope, saveScope);
1203                                                                 readToken (token);
1204                                                         }
1205                                                         else if (isType (token, TOKEN_OPEN_PAREN))
1206                                                         {
1207                                                                 skipArgumentList (token, FALSE, NULL);
1208                                                         }
1209                                                         else if (isType (token, TOKEN_OPEN_SQUARE))
1210                                                         {
1211                                                                 skipArrayList (token, FALSE);
1212                                                         }
1213                                                         else
1214                                                         {
1215                                                                 readToken (token);
1216                                                         }
1217                                                 }
1218                                                 vStringDelete (saveScope);
1219
1220                                                 has_methods = TRUE;
1221                                                 addToScope (name, class->string);
1222                                                 if (has_child_methods)
1223                                                         makeJsTag (name, JSTAG_CLASS, NULL);
1224                                                 else
1225                                                         makeJsTag (name, JSTAG_PROPERTY, NULL);
1226                                 }
1227                         }
1228                 }
1229         } while ( isType(token, TOKEN_COMMA) );
1230
1231         findCmdTerm (token, FALSE);
1232
1233 cleanUp:
1234         deleteToken (name);
1235
1236         return has_methods;
1237 }
1238
1239 static boolean parseStatement (tokenInfo *const token, tokenInfo *const parent, boolean is_inside_class)
1240 {
1241         tokenInfo *const name = newToken ();
1242         tokenInfo *const secondary_name = newToken ();
1243         tokenInfo *const method_body_token = newToken ();
1244         vString * saveScope = vStringNew ();
1245         boolean is_class = FALSE;
1246         boolean is_var = FALSE;
1247         boolean is_const = FALSE;
1248         boolean is_terminated = TRUE;
1249         boolean is_global = FALSE;
1250         boolean has_methods = FALSE;
1251         vString *       fulltag;
1252
1253         vStringClear(saveScope);
1254         /*
1255          * Functions can be named or unnamed.
1256          * This deals with these formats:
1257          * Function
1258          *         validFunctionOne = function(a,b) {}
1259          *         testlib.validFunctionFive = function(a,b) {}
1260          *         var innerThree = function(a,b) {}
1261          *         var innerFour = (a,b) {}
1262          *         var D2 = secondary_fcn_name(a,b) {}
1263          *         var D3 = new Function("a", "b", "return a+b;");
1264          * Class
1265          *         testlib.extras.ValidClassOne = function(a,b) {
1266          *                 this.a = a;
1267          *         }
1268          * Class Methods
1269          *         testlib.extras.ValidClassOne.prototype = {
1270          *                 'validMethodOne' : function(a,b) {},
1271          *                 'validMethodTwo' : function(a,b) {}
1272          *         }
1273      *     ValidClassTwo = function ()
1274      *     {
1275      *         this.validMethodThree = function() {}
1276      *         // unnamed method
1277      *         this.validMethodFour = () {}
1278      *     }
1279          *         Database.prototype.validMethodThree = Database_getTodaysDate;
1280          */
1281
1282         if ( is_inside_class )
1283                 is_class = TRUE;
1284         /*
1285          * var can precede an inner function
1286          */
1287         if ( isKeyword(token, KEYWORD_var) ||
1288                  isKeyword(token, KEYWORD_let) ||
1289                  isKeyword(token, KEYWORD_const) )
1290         {
1291                 is_const = isKeyword(token, KEYWORD_const);
1292                 /*
1293                  * Only create variables for global scope
1294                  */
1295                 if ( token->nestLevel == 0 )
1296                 {
1297                         is_global = TRUE;
1298                 }
1299                 readToken(token);
1300         }
1301
1302         if ( isKeyword(token, KEYWORD_this) )
1303         {
1304                 readToken(token);
1305                 if (isType (token, TOKEN_PERIOD))
1306                 {
1307                         readToken(token);
1308                 }
1309         }
1310
1311         copyToken(name, token);
1312
1313         while (! isType (token, TOKEN_CLOSE_CURLY) &&
1314                ! isType (token, TOKEN_SEMICOLON)   &&
1315                ! isType (token, TOKEN_EQUAL_SIGN)  &&
1316                ! isType (token, TOKEN_EOF))
1317         {
1318                 if (isType (token, TOKEN_OPEN_CURLY))
1319                         parseBlock (token, parent);
1320
1321                 /* Potentially the name of the function */
1322                 readToken (token);
1323                 if (isType (token, TOKEN_PERIOD))
1324                 {
1325                         /*
1326                          * Cannot be a global variable is it has dot references in the name
1327                          */
1328                         is_global = FALSE;
1329                         do
1330                         {
1331                                 readToken (token);
1332                                 if ( isKeyword(token, KEYWORD_NONE) )
1333                                 {
1334                                         if ( is_class )
1335                                         {
1336                                                 addToScope(token, name->string);
1337                                         }
1338                                         else
1339                                                 addContext (name, token);
1340
1341                                         readToken (token);
1342                                 }
1343                                 else if ( isKeyword(token, KEYWORD_prototype) )
1344                                 {
1345                                         /*
1346                                          * When we reach the "prototype" tag, we infer:
1347                                          *     "BindAgent" is a class
1348                                          *     "build"     is a method
1349                                          *
1350                                          * function BindAgent( repeatableIdName, newParentIdName ) {
1351                                          * }
1352                                          *
1353                                          * CASE 1
1354                                          * Specified function name: "build"
1355                                          *     BindAgent.prototype.build = function( mode ) {
1356                                          *        maybe parse nested functions
1357                                          *     }
1358                                          *
1359                                          * CASE 2
1360                                          * Prototype listing
1361                                          *     ValidClassOne.prototype = {
1362                                          *         'validMethodOne' : function(a,b) {},
1363                                          *         'validMethodTwo' : function(a,b) {}
1364                                          *     }
1365                                          *
1366                                          */
1367                                         makeClassTag (name, NULL);
1368                                         is_class = TRUE;
1369
1370                                         /*
1371                                          * There should a ".function_name" next.
1372                                          */
1373                                         readToken (token);
1374                                         if (isType (token, TOKEN_PERIOD))
1375                                         {
1376                                                 /*
1377                                                  * Handle CASE 1
1378                                                  */
1379                                                 readToken (token);
1380                                                 if ( isKeyword(token, KEYWORD_NONE) )
1381                                                 {
1382                                                         vString *const signature = vStringNew ();
1383
1384                                                         vStringCopy(saveScope, token->scope);
1385                                                         addToScope(token, name->string);
1386
1387                                                         readToken (method_body_token);
1388                                                         vStringCopy (method_body_token->scope, token->scope);
1389
1390                                                         while (! isType (method_body_token, TOKEN_SEMICOLON) &&
1391                                                                ! isType (method_body_token, TOKEN_CLOSE_CURLY) &&
1392                                                                ! isType (method_body_token, TOKEN_OPEN_CURLY) &&
1393                                                                ! isType (method_body_token, TOKEN_EOF))
1394                                                         {
1395                                                                 if ( isType (method_body_token, TOKEN_OPEN_PAREN) )
1396                                                                         skipArgumentList(method_body_token, FALSE,
1397                                                                                                          vStringLength (signature) == 0 ? signature : NULL);
1398                                                                 else
1399                                                                         readToken (method_body_token);
1400                                                         }
1401
1402                                                         makeJsTag (token, JSTAG_METHOD, signature);
1403                                                         vStringDelete (signature);
1404
1405                                                         if ( isType (method_body_token, TOKEN_OPEN_CURLY))
1406                                                         {
1407                                                                 parseBlock (method_body_token, token);
1408                                                                 is_terminated = TRUE;
1409                                                         }
1410                                                         else
1411                                                                 is_terminated = isType (method_body_token, TOKEN_SEMICOLON);
1412                                                         goto cleanUp;
1413                                                 }
1414                                         }
1415                                         else if (isType (token, TOKEN_EQUAL_SIGN))
1416                                         {
1417                                                 readToken (token);
1418                                                 if (isType (token, TOKEN_OPEN_CURLY))
1419                                                 {
1420                                                         /*
1421                                                          * Handle CASE 2
1422                                                          *
1423                                                          * Creates tags for each of these class methods
1424                                                          *     ValidClassOne.prototype = {
1425                                                          *         'validMethodOne' : function(a,b) {},
1426                                                          *         'validMethodTwo' : function(a,b) {}
1427                                                          *     }
1428                                                          */
1429                                                         parseMethods(token, name);
1430                                                         /*
1431                                                          * Find to the end of the statement
1432                                                          */
1433                                                         findCmdTerm (token, FALSE);
1434                                                         token->ignoreTag = FALSE;
1435                                                         is_terminated = TRUE;
1436                                                         goto cleanUp;
1437                                                 }
1438                                         }
1439                                 }
1440                                 else
1441                                         readToken (token);
1442                         } while (isType (token, TOKEN_PERIOD));
1443                 }
1444
1445                 if ( isType (token, TOKEN_OPEN_PAREN) )
1446                         skipArgumentList(token, FALSE, NULL);
1447
1448                 if ( isType (token, TOKEN_OPEN_SQUARE) )
1449                         skipArrayList(token, FALSE);
1450
1451                 /*
1452                 if ( isType (token, TOKEN_OPEN_CURLY) )
1453                 {
1454                         is_class = parseBlock (token, name);
1455                 }
1456                 */
1457         }
1458
1459         if ( isType (token, TOKEN_CLOSE_CURLY) )
1460         {
1461                 /*
1462                  * Reaching this section without having
1463                  * processed an open curly brace indicates
1464                  * the statement is most likely not terminated.
1465                  */
1466                 is_terminated = FALSE;
1467                 goto cleanUp;
1468         }
1469
1470         if ( isType (token, TOKEN_SEMICOLON) )
1471         {
1472                 /*
1473                  * Only create variables for global scope
1474                  */
1475                 if ( token->nestLevel == 0 && is_global )
1476                 {
1477                         /*
1478                          * Handles this syntax:
1479                          *         var g_var2;
1480                          */
1481                         if (isType (token, TOKEN_SEMICOLON))
1482                                 makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL);
1483                 }
1484                 /*
1485                  * Statement has ended.
1486                  * This deals with calls to functions, like:
1487                  *     alert(..);
1488                  */
1489                 goto cleanUp;
1490         }
1491
1492         if ( isType (token, TOKEN_EQUAL_SIGN) )
1493         {
1494                 int parenDepth = 0;
1495
1496                 readToken (token);
1497
1498                 /* rvalue might be surrounded with parentheses */
1499                 while (isType (token, TOKEN_OPEN_PAREN))
1500                 {
1501                         parenDepth++;
1502                         readToken (token);
1503                 }
1504
1505                 if ( isKeyword (token, KEYWORD_function) )
1506                 {
1507                         vString *const signature = vStringNew ();
1508
1509                         readToken (token);
1510
1511                         if ( isKeyword (token, KEYWORD_NONE) &&
1512                                         ! isType (token, TOKEN_OPEN_PAREN) )
1513                         {
1514                                 /*
1515                                  * Functions of this format:
1516                                  *         var D2A = function theAdd(a, b)
1517                                  *         {
1518                                  *                return a+b;
1519                                  *         }
1520                                  * Are really two separate defined functions and
1521                                  * can be referenced in two ways:
1522                                  *         alert( D2A(1,2) );                     // produces 3
1523                                  *         alert( theAdd(1,2) );                  // also produces 3
1524                                  * So it must have two tags:
1525                                  *         D2A
1526                                  *         theAdd
1527                                  * Save the reference to the name for later use, once
1528                                  * we have established this is a valid function we will
1529                                  * create the secondary reference to it.
1530                                  */
1531                                 copyToken(secondary_name, token);
1532                                 readToken (token);
1533                         }
1534
1535                         if ( isType (token, TOKEN_OPEN_PAREN) )
1536                                 skipArgumentList(token, FALSE, signature);
1537
1538                         if (isType (token, TOKEN_OPEN_CURLY))
1539                         {
1540                                 /*
1541                                  * This will be either a function or a class.
1542                                  * We can only determine this by checking the body
1543                                  * of the function.  If we find a "this." we know
1544                                  * it is a class, otherwise it is a function.
1545                                  */
1546                                 if ( is_inside_class )
1547                                 {
1548                                         makeJsTag (name, JSTAG_METHOD, signature);
1549                                         if ( vStringLength(secondary_name->string) > 0 )
1550                                                 makeFunctionTag (secondary_name, signature);
1551                                         parseBlock (token, name);
1552                                 }
1553                                 else
1554                                 {
1555                                         is_class = parseBlock (token, name);
1556                                         if ( is_class )
1557                                                 makeClassTag (name, signature);
1558                                         else
1559                                                 makeFunctionTag (name, signature);
1560
1561                                         if ( vStringLength(secondary_name->string) > 0 )
1562                                                 makeFunctionTag (secondary_name, signature);
1563                                 }
1564                         }
1565
1566                         vStringDelete (signature);
1567                 }
1568                 else if (isType (token, TOKEN_OPEN_CURLY))
1569                 {
1570                         /*
1571                          * Creates tags for each of these class methods
1572                          *     ValidClassOne.prototype = {
1573                          *         'validMethodOne' : function(a,b) {},
1574                          *         'validMethodTwo' : function(a,b) {}
1575                          *     }
1576                          * Or checks if this is a hash variable.
1577                          *     var z = {};
1578                          */
1579                         has_methods = parseMethods(token, name);
1580                         if (has_methods)
1581                                 makeJsTag (name, JSTAG_CLASS, NULL);
1582                         else
1583                         {
1584                                 /*
1585                                  * Only create variables for global scope
1586                                  */
1587                                 if ( token->nestLevel == 0 && is_global )
1588                                 {
1589                                         /*
1590                                          * A pointer can be created to the function.
1591                                          * If we recognize the function/class name ignore the variable.
1592                                          * This format looks identical to a variable definition.
1593                                          * A variable defined outside of a block is considered
1594                                          * a global variable:
1595                                          *         var g_var1 = 1;
1596                                          *         var g_var2;
1597                                          * This is not a global variable:
1598                                          *         var g_var = function;
1599                                          * This is a global variable:
1600                                          *         var g_var = different_var_name;
1601                                          */
1602                                         fulltag = vStringNew ();
1603                                         if (vStringLength (token->scope) > 0)
1604                                         {
1605                                                 vStringCopy(fulltag, token->scope);
1606                                                 vStringCatS (fulltag, ".");
1607                                                 vStringCatS (fulltag, vStringValue(token->string));
1608                                         }
1609                                         else
1610                                         {
1611                                                 vStringCopy(fulltag, token->string);
1612                                         }
1613                                         vStringTerminate(fulltag);
1614                                         if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) &&
1615                                                         ! stringListHas(ClassNames, vStringValue (fulltag)) )
1616                                         {
1617                                                 makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL);
1618                                         }
1619                                         vStringDelete (fulltag);
1620                                 }
1621                         }
1622                         if (isType (token, TOKEN_CLOSE_CURLY))
1623                         {
1624                                 /*
1625                                  * Assume the closing parentheses terminates
1626                                  * this statements.
1627                                  */
1628                                 is_terminated = TRUE;
1629                         }
1630                 }
1631                 else if (isKeyword (token, KEYWORD_new))
1632                 {
1633                         readToken (token);
1634                         is_var = isType (token, TOKEN_IDENTIFIER);
1635                         if ( isKeyword (token, KEYWORD_function) ||
1636                                         isKeyword (token, KEYWORD_capital_function) ||
1637                                         isKeyword (token, KEYWORD_capital_object) ||
1638                                         is_var )
1639                         {
1640                                 if ( isKeyword (token, KEYWORD_capital_object) )
1641                                         is_class = TRUE;
1642
1643                                 readToken (token);
1644                                 if ( isType (token, TOKEN_OPEN_PAREN) )
1645                                         skipArgumentList(token, TRUE, NULL);
1646
1647                                 if (isType (token, TOKEN_SEMICOLON))
1648                                 {
1649                                         if ( token->nestLevel == 0 )
1650                                         {
1651                                                 if ( is_var )
1652                                                 {
1653                                                         makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL);
1654                                                 }
1655                                                 else
1656                                                 {
1657                                                         if ( is_class )
1658                                                         {
1659                                                                 makeClassTag (name, NULL);
1660                                                         } else {
1661                                                                 /* FIXME: we cannot really get a meaningful
1662                                                                  * signature from a `new Function()` call,
1663                                                                  * so for now just don't set any */
1664                                                                 makeFunctionTag (name, NULL);
1665                                                         }
1666                                                 }
1667                                         }
1668                                 }
1669                                 else if (isType (token, TOKEN_CLOSE_CURLY))
1670                                         is_terminated = FALSE;
1671                         }
1672                 }
1673                 else if (isKeyword (token, KEYWORD_NONE))
1674                 {
1675                         /*
1676                          * Only create variables for global scope
1677                          */
1678                         if ( token->nestLevel == 0 && is_global )
1679                         {
1680                                 /*
1681                                  * A pointer can be created to the function.
1682                                  * If we recognize the function/class name ignore the variable.
1683                                  * This format looks identical to a variable definition.
1684                                  * A variable defined outside of a block is considered
1685                                  * a global variable:
1686                                  *         var g_var1 = 1;
1687                                  *         var g_var2;
1688                                  * This is not a global variable:
1689                                  *         var g_var = function;
1690                                  * This is a global variable:
1691                                  *         var g_var = different_var_name;
1692                                  */
1693                                 fulltag = vStringNew ();
1694                                 if (vStringLength (token->scope) > 0)
1695                                 {
1696                                         vStringCopy(fulltag, token->scope);
1697                                         vStringCatS (fulltag, ".");
1698                                         vStringCatS (fulltag, vStringValue(token->string));
1699                                 }
1700                                 else
1701                                 {
1702                                         vStringCopy(fulltag, token->string);
1703                                 }
1704                                 vStringTerminate(fulltag);
1705                                 if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) &&
1706                                                 ! stringListHas(ClassNames, vStringValue (fulltag)) )
1707                                 {
1708                                         makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL);
1709                                 }
1710                                 vStringDelete (fulltag);
1711                         }
1712                 }
1713
1714                 if (parenDepth > 0)
1715                 {
1716                         while (parenDepth > 0 && ! isType (token, TOKEN_EOF))
1717                         {
1718                                 if (isType (token, TOKEN_OPEN_PAREN))
1719                                         parenDepth++;
1720                                 else if (isType (token, TOKEN_CLOSE_PAREN))
1721                                         parenDepth--;
1722                                 readTokenFull (token, TRUE, NULL);
1723                         }
1724                         if (isType (token, TOKEN_CLOSE_CURLY))
1725                                 is_terminated = FALSE;
1726                 }
1727         }
1728
1729         /* if we aren't already at the cmd end, advance to it and check whether
1730          * the statement was terminated */
1731         if (! isType (token, TOKEN_CLOSE_CURLY) &&
1732             ! isType (token, TOKEN_SEMICOLON))
1733         {
1734                 /*
1735                  * Statements can be optionally terminated in the case of
1736                  * statement prior to a close curly brace as in the
1737                  * document.write line below:
1738                  *
1739                  * function checkForUpdate() {
1740                  *         if( 1==1 ) {
1741                  *                 document.write("hello from checkForUpdate<br>")
1742                  *         }
1743                  *         return 1;
1744                  * }
1745                  */
1746                 is_terminated = findCmdTerm (token, TRUE);
1747         }
1748
1749 cleanUp:
1750         vStringCopy(token->scope, saveScope);
1751         deleteToken (name);
1752         deleteToken (secondary_name);
1753         deleteToken (method_body_token);
1754         vStringDelete(saveScope);
1755
1756         return is_terminated;
1757 }
1758
1759 static void parseUI5 (tokenInfo *const token)
1760 {
1761         tokenInfo *const name = newToken ();
1762         /*
1763          * SAPUI5 is built on top of jQuery.
1764          * It follows a standard format:
1765          *     sap.ui.controller("id.of.controller", {
1766          *         method_name : function... {
1767          *         },
1768          *
1769          *         method_name : function ... {
1770          *         }
1771          *     }
1772          *
1773          * Handle the parsing of the initial controller (and the
1774          * same for "view") and then allow the methods to be
1775          * parsed as usual.
1776          */
1777
1778         readToken (token);
1779
1780         if (isType (token, TOKEN_PERIOD))
1781         {
1782                 readToken (token);
1783                 while (! isType (token, TOKEN_OPEN_PAREN) &&
1784                            ! isType (token, TOKEN_EOF))
1785                 {
1786                         readToken (token);
1787                 }
1788                 readToken (token);
1789
1790                 if (isType (token, TOKEN_STRING))
1791                 {
1792                         copyToken(name, token);
1793                         readToken (token);
1794                 }
1795
1796                 if (isType (token, TOKEN_COMMA))
1797                         readToken (token);
1798
1799                 do
1800                 {
1801                         parseMethods (token, name);
1802                 } while (! isType (token, TOKEN_CLOSE_CURLY) &&
1803                                  ! isType (token, TOKEN_EOF));
1804         }
1805
1806         deleteToken (name);
1807 }
1808
1809 static boolean parseLine (tokenInfo *const token, tokenInfo *const parent, boolean is_inside_class)
1810 {
1811         boolean is_terminated = TRUE;
1812         /*
1813          * Detect the common statements, if, while, for, do, ...
1814          * This is necessary since the last statement within a block "{}"
1815          * can be optionally terminated.
1816          *
1817          * If the statement is not terminated, we need to tell
1818          * the calling routine to prevent reading an additional token
1819          * looking for the end of the statement.
1820          */
1821
1822         if (isType(token, TOKEN_KEYWORD))
1823         {
1824                 switch (token->keyword)
1825                 {
1826                         case KEYWORD_for:
1827                         case KEYWORD_while:
1828                         case KEYWORD_do:
1829                                 is_terminated = parseLoop (token, parent);
1830                                 break;
1831                         case KEYWORD_if:
1832                         case KEYWORD_else:
1833                         case KEYWORD_try:
1834                         case KEYWORD_catch:
1835                         case KEYWORD_finally:
1836                                 /* Common semantics */
1837                                 is_terminated = parseIf (token, parent);
1838                                 break;
1839                         case KEYWORD_switch:
1840                                 parseSwitch (token);
1841                                 break;
1842                         case KEYWORD_return:
1843                                 is_terminated = findCmdTerm (token, TRUE);
1844                                 break;
1845                         default:
1846                                 is_terminated = parseStatement (token, parent, is_inside_class);
1847                                 break;
1848                 }
1849         }
1850         else
1851         {
1852                 /*
1853                  * Special case where single line statements may not be
1854                  * SEMICOLON terminated.  parseBlock needs to know this
1855                  * so that it does not read the next token.
1856                  */
1857                 is_terminated = parseStatement (token, parent, is_inside_class);
1858         }
1859         return is_terminated;
1860 }
1861
1862 static void parseJsFile (tokenInfo *const token)
1863 {
1864         do
1865         {
1866                 readToken (token);
1867
1868                 if (isType (token, TOKEN_KEYWORD) && token->keyword == KEYWORD_function)
1869                         parseFunction (token);
1870                 else if (isType (token, TOKEN_KEYWORD) && token->keyword == KEYWORD_sap)
1871                         parseUI5 (token);
1872                 else
1873                         parseLine (token, token, FALSE);
1874         } while (! isType (token, TOKEN_EOF));
1875 }
1876
1877 static void initialize (const langType language)
1878 {
1879         Assert (ARRAY_SIZE (JsKinds) == JSTAG_COUNT);
1880         Lang_js = language;
1881 }
1882
1883 static void findJsTags (void)
1884 {
1885         tokenInfo *const token = newToken ();
1886
1887         ClassNames = stringListNew ();
1888         FunctionNames = stringListNew ();
1889         LastTokenType = TOKEN_UNDEFINED;
1890
1891         parseJsFile (token);
1892
1893         stringListDelete (ClassNames);
1894         stringListDelete (FunctionNames);
1895         ClassNames = NULL;
1896         FunctionNames = NULL;
1897         deleteToken (token);
1898 }
1899
1900 /* Create parser definition structure */
1901 extern parserDefinition* JavaScriptParser (void)
1902 {
1903         static const char *const extensions [] = { "js", NULL };
1904         parserDefinition *const def = parserNew ("JavaScript");
1905         def->extensions = extensions;
1906         /*
1907          * New definitions for parsing instead of regex
1908          */
1909         def->kinds              = JsKinds;
1910         def->kindCount  = ARRAY_SIZE (JsKinds);
1911         def->parser             = findJsTags;
1912         def->initialize = initialize;
1913         def->keywordTable = JsKeywordTable;
1914         def->keywordCount = ARRAY_SIZE (JsKeywordTable);
1915
1916         return def;
1917 }
1918 /* vi:set tabstop=4 shiftwidth=4 noexpandtab: */