ctags/parsers/jscript.c

   1 /*
   2  *       Copyright (c) 2003, Darren Hiebert
   3  *
   4  *       This source code is released for free distribution under the terms of the
   5  *       GNU General Public License version 2 or (at your option) any later version.
   6  *
   7  *       This module contains functions for generating tags for JavaScript language
   8  *       files.
   9  *
  10  *       Reference: http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf
  11  *
  12  *       This is a good reference for different forms of the function statement:
  13  *               http://www.permadi.com/tutorial/jsFunc/
  14  *   Another good reference:
  15  *       http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide
  16  */
  17
  18 /*
  19  *       INCLUDE FILES
  20  */
  21 #include "general.h"    /* must always come first */
  22 #include <ctype.h>      /* to define isalpha () */
  23 #include <string.h>
  24 #ifdef DEBUG
  25 #include <stdio.h>
  26 #endif
  27
  28 #include "debug.h"
  29 #include "mio.h"
  30 #include "keyword.h"
  31 #include "parse.h"
  32 #include "read.h"
  33 #include "main.h"
  34 #include "routines.h"
  35 #include "vstring.h"
  36
  37 /*
  38  *       MACROS
  39  */
  40 #define isType(token,t)         (boolean) ((token)->type == (t))
  41 #define isKeyword(token,k)      (boolean) ((token)->keyword == (k))
  42
  43 /*
  44  *       DATA DECLARATIONS
  45  */
  46
  47 /*
  48  * Tracks class and function names already created
  49  */
  50 static stringList *ClassNames;
  51 static stringList *FunctionNames;
  52
  53 /*      Used to specify type of keyword.
  54 */
  55 typedef enum eKeywordId {
  56         KEYWORD_NONE = -1,
  57         KEYWORD_function,
  58         KEYWORD_capital_function,
  59         KEYWORD_capital_object,
  60         KEYWORD_prototype,
  61         KEYWORD_var,
  62         KEYWORD_let,
  63         KEYWORD_const,
  64         KEYWORD_new,
  65         KEYWORD_this,
  66         KEYWORD_for,
  67         KEYWORD_while,
  68         KEYWORD_do,
  69         KEYWORD_if,
  70         KEYWORD_else,
  71         KEYWORD_switch,
  72         KEYWORD_try,
  73         KEYWORD_catch,
  74         KEYWORD_finally,
  75         KEYWORD_sap,
  76         KEYWORD_return
  77 } keywordId;
  78
  79 typedef enum eTokenType {
  80         TOKEN_UNDEFINED,
  81         TOKEN_EOF,
  82         TOKEN_CHARACTER,
  83         TOKEN_CLOSE_PAREN,
  84         TOKEN_SEMICOLON,
  85         TOKEN_COLON,
  86         TOKEN_COMMA,
  87         TOKEN_KEYWORD,
  88         TOKEN_OPEN_PAREN,
  89         TOKEN_OPERATOR,
  90         TOKEN_IDENTIFIER,
  91         TOKEN_STRING,
  92         TOKEN_PERIOD,
  93         TOKEN_OPEN_CURLY,
  94         TOKEN_CLOSE_CURLY,
  95         TOKEN_EQUAL_SIGN,
  96         TOKEN_FORWARD_SLASH,
  97         TOKEN_OPEN_SQUARE,
  98         TOKEN_CLOSE_SQUARE,
  99         TOKEN_REGEXP,
 100         TOKEN_POSTFIX_OPERATOR,
 101         TOKEN_BINARY_OPERATOR
 102 } tokenType;
 103
 104 typedef struct sTokenInfo {
 105         tokenType               type;
 106         keywordId               keyword;
 107         vString *               string;
 108         vString *               scope;
 109         unsigned long   lineNumber;
 110         MIOPos                  filePosition;
 111         int                             nestLevel;
 112         boolean                 ignoreTag;
 113 } tokenInfo;
 114
 115 /*
 116  *      DATA DEFINITIONS
 117  */
 118
 119 static tokenType LastTokenType;
 120
 121 static langType Lang_js;
 122
 123 typedef enum {
 124         JSTAG_FUNCTION,
 125         JSTAG_CLASS,
 126         JSTAG_METHOD,
 127         JSTAG_PROPERTY,
 128         JSTAG_CONSTANT,
 129         JSTAG_VARIABLE,
 130         JSTAG_COUNT
 131 } jsKind;
 132
 133 static kindOption JsKinds [] = {
 134         { TRUE,  'f', "function",         "functions"              },
 135         { TRUE,  'c', "class",            "classes"                        },
 136         { TRUE,  'm', "method",           "methods"                        },
 137         { TRUE,  'p', "property",         "properties"             },
 138         { TRUE,  'C', "constant",         "constants"              },
 139         { TRUE,  'v', "variable",         "global variables"   }
 140 };
 141
 142 static const keywordTable JsKeywordTable [] = {
 143         /* keyword              keyword ID */
 144         { "function",   KEYWORD_function                        },
 145         { "Function",   KEYWORD_capital_function        },
 146         { "Object",             KEYWORD_capital_object          },
 147         { "prototype",  KEYWORD_prototype                       },
 148         { "var",                KEYWORD_var                                     },
 149         { "let",                KEYWORD_let                                     },
 150         { "const",              KEYWORD_const                           },
 151         { "new",                KEYWORD_new                                     },
 152         { "this",               KEYWORD_this                            },
 153         { "for",                KEYWORD_for                                     },
 154         { "while",              KEYWORD_while                           },
 155         { "do",                 KEYWORD_do                                      },
 156         { "if",                 KEYWORD_if                                      },
 157         { "else",               KEYWORD_else                            },
 158         { "switch",             KEYWORD_switch                          },
 159         { "try",                KEYWORD_try                                     },
 160         { "catch",              KEYWORD_catch                           },
 161         { "finally",    KEYWORD_finally                         },
 162         { "sap",            KEYWORD_sap                                 },
 163         { "return",             KEYWORD_return                          }
 164 };
 165
 166 /*
 167  *       FUNCTION DEFINITIONS
 168  */
 169
 170 /* Recursive functions */
 171 static void parseFunction (tokenInfo *const token);
 172 static boolean parseBlock (tokenInfo *const token, tokenInfo *const orig_parent);
 173 static boolean parseLine (tokenInfo *const token, tokenInfo *const parent, boolean is_inside_class);
 174 static void parseUI5 (tokenInfo *const token);
 175
 176 static boolean isIdentChar (const int c)
 177 {
 178         return (boolean)
 179                 (isalpha (c) || isdigit (c) || c == '$' ||
 180                  c == '@' || c == '_' || c == '#');
 181 }
 182
 183 static tokenInfo *newToken (void)
 184 {
 185         tokenInfo *const token = xMalloc (1, tokenInfo);
 186
 187         token->type                     = TOKEN_UNDEFINED;
 188         token->keyword          = KEYWORD_NONE;
 189         token->string           = vStringNew ();
 190         token->scope            = vStringNew ();
 191         token->nestLevel        = 0;
 192         token->ignoreTag        = FALSE;
 193         token->lineNumber   = getSourceLineNumber ();
 194         token->filePosition = getInputFilePosition ();
 195
 196         return token;
 197 }
 198
 199 static void deleteToken (tokenInfo *const token)
 200 {
 201         vStringDelete (token->string);
 202         vStringDelete (token->scope);
 203         eFree (token);
 204 }
 205
 206 /*
 207  *       Tag generation functions
 208  */
 209
 210 static void makeJsTag (tokenInfo *const token, const jsKind kind, vString *const signature)
 211 {
 212         if (JsKinds [kind].enabled && ! token->ignoreTag )
 213         {
 214                 const char *name = vStringValue (token->string);
 215                 vString *fullscope = vStringNewCopy (token->scope);
 216                 const char *p;
 217                 tagEntryInfo e;
 218
 219                 if ((p = strrchr (name, '.')) != NULL)
 220                 {
 221                         if (vStringLength (fullscope) > 0)
 222                                 vStringPut (fullscope, '.');
 223                         vStringNCatS (fullscope, name, p - name);
 224                         name = p + 1;
 225                 }
 226
 227                 initTagEntry (&e, name);
 228
 229                 e.lineNumber   = token->lineNumber;
 230                 e.filePosition = token->filePosition;
 231                 e.kindName         = JsKinds [kind].name;
 232                 e.kind             = JsKinds [kind].letter;
 233
 234                 if ( vStringLength(fullscope) > 0 )
 235                 {
 236                         jsKind parent_kind = JSTAG_CLASS;
 237
 238                         /* if we're creating a function (and not a method),
 239                          * guess we're inside another function */
 240                         if (kind == JSTAG_FUNCTION)
 241                                 parent_kind = JSTAG_FUNCTION;
 242
 243                         e.extensionFields.scope[0] = JsKinds [parent_kind].name;
 244                         e.extensionFields.scope[1] = vStringValue (fullscope);
 245                 }
 246
 247                 if (signature && vStringLength(signature))
 248                 {
 249                         size_t i;
 250                         /* sanitize signature by replacing all control characters with a
 251                          * space (because it's simple).
 252                          * there should never be any junk in a valid signature, but who
 253                          * knows what the user wrote and CTags doesn't cope well with weird
 254                          * characters. */
 255                         for (i = 0; i < signature->length; i++)
 256                         {
 257                                 unsigned char c = (unsigned char) signature->buffer[i];
 258                                 if (c < 0x20 /* below space */ || c == 0x7F /* DEL */)
 259                                         signature->buffer[i] = ' ';
 260                         }
 261                         e.extensionFields.signature = vStringValue(signature);
 262                 }
 263
 264                 makeTagEntry (&e);
 265                 vStringDelete (fullscope);
 266         }
 267 }
 268
 269 static void makeClassTag (tokenInfo *const token, vString *const signature)
 270 {
 271         vString *       fulltag;
 272
 273         if ( ! token->ignoreTag )
 274         {
 275                 fulltag = vStringNew ();
 276                 if (vStringLength (token->scope) > 0)
 277                 {
 278                         vStringCopy(fulltag, token->scope);
 279                         vStringCatS (fulltag, ".");
 280                         vStringCatS (fulltag, vStringValue(token->string));
 281                 }
 282                 else
 283                 {
 284                         vStringCopy(fulltag, token->string);
 285                 }
 286                 vStringTerminate(fulltag);
 287                 if ( ! stringListHas(ClassNames, vStringValue (fulltag)) )
 288                 {
 289                         stringListAdd (ClassNames, vStringNewCopy (fulltag));
 290                         makeJsTag (token, JSTAG_CLASS, signature);
 291                 }
 292                 vStringDelete (fulltag);
 293         }
 294 }
 295
 296 static void makeFunctionTag (tokenInfo *const token, vString *const signature)
 297 {
 298         vString *       fulltag;
 299
 300         if ( ! token->ignoreTag )
 301         {
 302                 fulltag = vStringNew ();
 303                 if (vStringLength (token->scope) > 0)
 304                 {
 305                         vStringCopy(fulltag, token->scope);
 306                         vStringCatS (fulltag, ".");
 307                         vStringCatS (fulltag, vStringValue(token->string));
 308                 }
 309                 else
 310                 {
 311                         vStringCopy(fulltag, token->string);
 312                 }
 313                 vStringTerminate(fulltag);
 314                 if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) )
 315                 {
 316                         stringListAdd (FunctionNames, vStringNewCopy (fulltag));
 317                         makeJsTag (token, JSTAG_FUNCTION, signature);
 318                 }
 319                 vStringDelete (fulltag);
 320         }
 321 }
 322
 323 /*
 324  *       Parsing functions
 325  */
 326
 327 static int skipToCharacter (const int c)
 328 {
 329         int d;
 330         do
 331         {
 332                 d = getcFromInputFile ();
 333         } while (d != EOF  &&  d != c);
 334         return d;
 335 }
 336
 337 static void parseString (vString *const string, const int delimiter)
 338 {
 339         boolean end = FALSE;
 340         while (! end)
 341         {
 342                 int c = getcFromInputFile ();
 343                 if (c == EOF)
 344                         end = TRUE;
 345                 else if (c == '\\')
 346                 {
 347                         /* Eat the escape sequence (\", \', etc).  We properly handle
 348                          * <LineContinuation> by eating a whole \<CR><LF> not to see <LF>
 349                          * as an unescaped character, which is invalid and handled below.
 350                          * Also, handle the fact that <LineContinuation> produces an empty
 351                          * sequence.
 352                          * See ECMA-262 7.8.4 */
 353                         c = getcFromInputFile();
 354                         if (c != '\r' && c != '\n')
 355                                 vStringPut(string, c);
 356                         else if (c == '\r')
 357                         {
 358                                 c = getcFromInputFile();
 359                                 if (c != '\n')
 360                                         ungetcToInputFile (c);
 361                         }
 362                 }
 363                 else if (c == delimiter)
 364                         end = TRUE;
 365                 else if (c == '\r' || c == '\n')
 366                 {
 367                         /* those are invalid when not escaped */
 368                         end = TRUE;
 369                         /* we don't want to eat the newline itself to let the automatic
 370                          * semicolon insertion code kick in */
 371                         ungetcToInputFile (c);
 372                 }
 373                 else
 374                         vStringPut (string, c);
 375         }
 376         vStringTerminate (string);
 377 }
 378
 379 static void parseRegExp (void)
 380 {
 381         int c;
 382         boolean in_range = FALSE;
 383
 384         do
 385         {
 386                 c = getcFromInputFile ();
 387                 if (! in_range && c == '/')
 388                 {
 389                         do /* skip flags */
 390                         {
 391                                 c = getcFromInputFile ();
 392                         } while (isalpha (c));
 393                         ungetcToInputFile (c);
 394                         break;
 395                 }
 396                 else if (c == '\\')
 397                         c = getcFromInputFile (); /* skip next character */
 398                 else if (c == '[')
 399                         in_range = TRUE;
 400                 else if (c == ']')
 401                         in_range = FALSE;
 402         } while (c != EOF);
 403 }
 404
 405 /*      Read a C identifier beginning with "firstChar" and places it into
 406  *      "name".
 407  */
 408 static void parseIdentifier (vString *const string, const int firstChar)
 409 {
 410         int c = firstChar;
 411         Assert (isIdentChar (c));
 412         do
 413         {
 414                 vStringPut (string, c);
 415                 c = getcFromInputFile ();
 416         } while (isIdentChar (c));
 417         vStringTerminate (string);
 418         ungetcToInputFile (c);          /* unget non-identifier character */
 419 }
 420
 421 static keywordId analyzeToken (vString *const name)
 422 {
 423         vString *keyword = vStringNew ();
 424         keywordId result;
 425         vStringCopyToLower (keyword, name);
 426         result = (keywordId) lookupKeyword (vStringValue (keyword), Lang_js);
 427         vStringDelete (keyword);
 428         return result;
 429 }
 430
 431 static void readTokenFull (tokenInfo *const token, boolean include_newlines, vString *const repr)
 432 {
 433         int c;
 434         int i;
 435
 436         token->type                     = TOKEN_UNDEFINED;
 437         token->keyword          = KEYWORD_NONE;
 438         vStringClear (token->string);
 439
 440 getNextChar:
 441         i = 0;
 442         do
 443         {
 444                 c = getcFromInputFile ();
 445                 i++;
 446         }
 447         while (c == '\t'  ||  c == ' ' ||
 448                    ((c == '\r' || c == '\n') && ! include_newlines));
 449
 450         token->lineNumber   = getSourceLineNumber ();
 451         token->filePosition = getInputFilePosition ();
 452
 453         if (repr)
 454         {
 455                 if (i > 1)
 456                         vStringPut (repr, ' ');
 457                 vStringPut (repr, c);
 458         }
 459
 460         switch (c)
 461         {
 462                 case EOF: token->type = TOKEN_EOF;                                      break;
 463                 case '(': token->type = TOKEN_OPEN_PAREN;                       break;
 464                 case ')': token->type = TOKEN_CLOSE_PAREN;                      break;
 465                 case ';': token->type = TOKEN_SEMICOLON;                        break;
 466                 case ',': token->type = TOKEN_COMMA;                            break;
 467                 case '.': token->type = TOKEN_PERIOD;                           break;
 468                 case ':': token->type = TOKEN_COLON;                            break;
 469                 case '{': token->type = TOKEN_OPEN_CURLY;                       break;
 470                 case '}': token->type = TOKEN_CLOSE_CURLY;                      break;
 471                 case '=': token->type = TOKEN_EQUAL_SIGN;                       break;
 472                 case '[': token->type = TOKEN_OPEN_SQUARE;                      break;
 473                 case ']': token->type = TOKEN_CLOSE_SQUARE;                     break;
 474
 475                 case '+':
 476                 case '-':
 477                         {
 478                                 int d = getcFromInputFile ();
 479                                 if (d == c) /* ++ or -- */
 480                                         token->type = TOKEN_POSTFIX_OPERATOR;
 481                                 else
 482                                 {
 483                                         ungetcToInputFile (d);
 484                                         token->type = TOKEN_BINARY_OPERATOR;
 485                                 }
 486                                 break;
 487                         }
 488
 489                 case '*':
 490                 case '%':
 491                 case '?':
 492                 case '>':
 493                 case '<':
 494                 case '^':
 495                 case '|':
 496                 case '&':
 497                         token->type = TOKEN_BINARY_OPERATOR;
 498                         break;
 499
 500                 case '\r':
 501                 case '\n':
 502                         /* This isn't strictly correct per the standard, but following the
 503                          * real rules means understanding all statements, and that's not
 504                          * what the parser currently does.  What we do here is a guess, by
 505                          * avoiding inserting semicolons that would make the statement on
 506                          * the left invalid.  Hopefully this should not have false negatives
 507                          * (e.g. should not miss insertion of a semicolon) but might have
 508                          * false positives (e.g. it will wrongfully emit a semicolon for the
 509                          * newline in "foo\n+bar").
 510                          * This should however be mostly harmless as we only deal with
 511                          * newlines in specific situations where we know a false positive
 512                          * wouldn't hurt too bad. */
 513                         switch (LastTokenType)
 514                         {
 515                                 /* these cannot be the end of a statement, so hold the newline */
 516                                 case TOKEN_EQUAL_SIGN:
 517                                 case TOKEN_COLON:
 518                                 case TOKEN_PERIOD:
 519                                 case TOKEN_FORWARD_SLASH:
 520                                 case TOKEN_BINARY_OPERATOR:
 521                                 /* and these already end one, no need to duplicate it */
 522                                 case TOKEN_SEMICOLON:
 523                                 case TOKEN_COMMA:
 524                                 case TOKEN_CLOSE_CURLY:
 525                                 case TOKEN_OPEN_CURLY:
 526                                         include_newlines = FALSE; /* no need to recheck */
 527                                         goto getNextChar;
 528                                         break;
 529                                 default:
 530                                         token->type = TOKEN_SEMICOLON;
 531                         }
 532                         break;
 533
 534                 case '\'':
 535                 case '"':
 536                                   token->type = TOKEN_STRING;
 537                                   parseString (token->string, c);
 538                                   token->lineNumber = getSourceLineNumber ();
 539                                   token->filePosition = getInputFilePosition ();
 540                                   if (repr)
 541                                   {
 542                                           vStringCat (repr, token->string);
 543                                           vStringPut (repr, c);
 544                                   }
 545                                   break;
 546
 547                 case '\\':
 548                                   c = getcFromInputFile ();
 549                                   if (c != '\\'  && c != '"'  &&  !isspace (c))
 550                                           ungetcToInputFile (c);
 551                                   token->type = TOKEN_CHARACTER;
 552                                   token->lineNumber = getSourceLineNumber ();
 553                                   token->filePosition = getInputFilePosition ();
 554                                   break;
 555
 556                 case '/':
 557                                   {
 558                                           int d = getcFromInputFile ();
 559                                           if ( (d != '*') &&            /* is this the start of a comment? */
 560                                                           (d != '/') )          /* is a one line comment? */
 561                                           {
 562                                                   ungetcToInputFile (d);
 563                                                   switch (LastTokenType)
 564                                                   {
 565                                                           case TOKEN_CHARACTER:
 566                                                           case TOKEN_IDENTIFIER:
 567                                                           case TOKEN_STRING:
 568                                                           case TOKEN_CLOSE_CURLY:
 569                                                           case TOKEN_CLOSE_PAREN:
 570                                                           case TOKEN_CLOSE_SQUARE:
 571                                                                   token->type = TOKEN_FORWARD_SLASH;
 572                                                                   break;
 573
 574                                                           default:
 575                                                                   token->type = TOKEN_REGEXP;
 576                                                                   parseRegExp ();
 577                                                                   token->lineNumber = getSourceLineNumber ();
 578                                                                   token->filePosition = getInputFilePosition ();
 579                                                                   break;
 580                                                   }
 581                                           }
 582                                           else
 583                                           {
 584                                                   if (repr) /* remove the / we added */
 585                                                           repr->buffer[--repr->length] = 0;
 586                                                   if (d == '*')
 587                                                   {
 588                                                           do
 589                                                           {
 590                                                                   skipToCharacter ('*');
 591                                                                   c = getcFromInputFile ();
 592                                                                   if (c == '/')
 593                                                                           break;
 594                                                                   else
 595                                                                           ungetcToInputFile (c);
 596                                                           } while (c != EOF && c != '\0');
 597                                                           goto getNextChar;
 598                                                   }
 599                                                   else if (d == '/')    /* is this the start of a comment?  */
 600                                                   {
 601                                                           skipToCharacter ('\n');
 602                                                           /* if we care about newlines, put it back so it is seen */
 603                                                           if (include_newlines)
 604                                                                   ungetcToInputFile ('\n');
 605                                                           goto getNextChar;
 606                                                   }
 607                                           }
 608                                           break;
 609                                   }
 610
 611                 case '#':
 612                                   /* skip shebang in case of e.g. Node.js scripts */
 613                                   if (token->lineNumber > 1)
 614                                           token->type = TOKEN_UNDEFINED;
 615                                   else if ((c = getcFromInputFile ()) != '!')
 616                                   {
 617                                           ungetcToInputFile (c);
 618                                           token->type = TOKEN_UNDEFINED;
 619                                   }
 620                                   else
 621                                   {
 622                                           skipToCharacter ('\n');
 623                                           goto getNextChar;
 624                                   }
 625                                   break;
 626
 627                 default:
 628                                   if (! isIdentChar (c))
 629                                           token->type = TOKEN_UNDEFINED;
 630                                   else
 631                                   {
 632                                           parseIdentifier (token->string, c);
 633                                           token->lineNumber = getSourceLineNumber ();
 634                                           token->filePosition = getInputFilePosition ();
 635                                           token->keyword = analyzeToken (token->string);
 636                                           if (isKeyword (token, KEYWORD_NONE))
 637                                                   token->type = TOKEN_IDENTIFIER;
 638                                           else
 639                                                   token->type = TOKEN_KEYWORD;
 640                                           if (repr && vStringLength (token->string) > 1)
 641                                                   vStringCatS (repr, vStringValue (token->string) + 1);
 642                                   }
 643                                   break;
 644         }
 645
 646         LastTokenType = token->type;
 647 }
 648
 649 static void readToken (tokenInfo *const token)
 650 {
 651         readTokenFull (token, FALSE, NULL);
 652 }
 653
 654 static void copyToken (tokenInfo *const dest, tokenInfo *const src)
 655 {
 656         dest->nestLevel = src->nestLevel;
 657         dest->lineNumber = src->lineNumber;
 658         dest->filePosition = src->filePosition;
 659         dest->type = src->type;
 660         dest->keyword = src->keyword;
 661         vStringCopy(dest->string, src->string);
 662         vStringCopy(dest->scope, src->scope);
 663 }
 664
 665 /*
 666  *       Token parsing functions
 667  */
 668
 669 static void skipArgumentList (tokenInfo *const token, boolean include_newlines, vString *const repr)
 670 {
 671         int nest_level = 0;
 672
 673         if (isType (token, TOKEN_OPEN_PAREN))   /* arguments? */
 674         {
 675                 nest_level++;
 676                 if (repr)
 677                         vStringPut (repr, '(');
 678                 while (nest_level > 0 && ! isType (token, TOKEN_EOF))
 679                 {
 680                         readTokenFull (token, FALSE, repr);
 681                         if (isType (token, TOKEN_OPEN_PAREN))
 682                                 nest_level++;
 683                         else if (isType (token, TOKEN_CLOSE_PAREN))
 684                                 nest_level--;
 685                 }
 686                 readTokenFull (token, include_newlines, NULL);
 687         }
 688 }
 689
 690 static void skipArrayList (tokenInfo *const token, boolean include_newlines)
 691 {
 692         int nest_level = 0;
 693
 694         /*
 695          * Handle square brackets
 696          *       var name[1]
 697          * So we must check for nested open and closing square brackets
 698          */
 699
 700         if (isType (token, TOKEN_OPEN_SQUARE))  /* arguments? */
 701         {
 702                 nest_level++;
 703                 while (nest_level > 0 && ! isType (token, TOKEN_EOF))
 704                 {
 705                         readToken (token);
 706                         if (isType (token, TOKEN_OPEN_SQUARE))
 707                                 nest_level++;
 708                         else if (isType (token, TOKEN_CLOSE_SQUARE))
 709                                 nest_level--;
 710                 }
 711                 readTokenFull (token, include_newlines, NULL);
 712         }
 713 }
 714
 715 static void addContext (tokenInfo* const parent, const tokenInfo* const child)
 716 {
 717         if (vStringLength (parent->string) > 0)
 718         {
 719                 vStringCatS (parent->string, ".");
 720         }
 721         vStringCatS (parent->string, vStringValue(child->string));
 722         vStringTerminate(parent->string);
 723 }
 724
 725 static void addToScope (tokenInfo* const token, vString* const extra)
 726 {
 727         if (vStringLength (token->scope) > 0)
 728         {
 729                 vStringCatS (token->scope, ".");
 730         }
 731         vStringCatS (token->scope, vStringValue(extra));
 732         vStringTerminate(token->scope);
 733 }
 734
 735 /*
 736  *       Scanning functions
 737  */
 738
 739 static boolean findCmdTerm (tokenInfo *const token, boolean include_newlines)
 740 {
 741         /*
 742          * Read until we find either a semicolon or closing brace.
 743          * Any nested braces will be handled within.
 744          */
 745         while (! isType (token, TOKEN_SEMICOLON) &&
 746                    ! isType (token, TOKEN_CLOSE_CURLY) &&
 747                    ! isType (token, TOKEN_EOF))
 748         {
 749                 /* Handle nested blocks */
 750                 if ( isType (token, TOKEN_OPEN_CURLY))
 751                 {
 752                         parseBlock (token, token);
 753                         readTokenFull (token, include_newlines, NULL);
 754                 }
 755                 else if ( isType (token, TOKEN_OPEN_PAREN) )
 756                 {
 757                         skipArgumentList(token, include_newlines, NULL);
 758                 }
 759                 else if ( isType (token, TOKEN_OPEN_SQUARE) )
 760                 {
 761                         skipArrayList(token, include_newlines);
 762                 }
 763                 else
 764                 {
 765                         readTokenFull (token, include_newlines, NULL);
 766                 }
 767         }
 768
 769         return isType (token, TOKEN_SEMICOLON);
 770 }
 771
 772 static void parseSwitch (tokenInfo *const token)
 773 {
 774         /*
 775          * switch (expression) {
 776          * case value1:
 777          *         statement;
 778          *         break;
 779          * case value2:
 780          *         statement;
 781          *         break;
 782          * default : statement;
 783          * }
 784          */
 785
 786         readToken (token);
 787
 788         if (isType (token, TOKEN_OPEN_PAREN))
 789         {
 790                 /*
 791                  * Handle nameless functions, these will only
 792                  * be considered methods.
 793                  */
 794                 skipArgumentList(token, FALSE, NULL);
 795         }
 796
 797         if (isType (token, TOKEN_OPEN_CURLY))
 798         {
 799                 parseBlock (token, token);
 800         }
 801 }
 802
 803 static boolean parseLoop (tokenInfo *const token, tokenInfo *const parent)
 804 {
 805         /*
 806          * Handles these statements
 807          *         for (x=0; x<3; x++)
 808          *                 document.write("This text is repeated three times<br>");
 809          *
 810          *         for (x=0; x<3; x++)
 811          *         {
 812          *                 document.write("This text is repeated three times<br>");
 813          *         }
 814          *
 815          *         while (number<5){
 816          *                 document.write(number+"<br>");
 817          *                 number++;
 818          *         }
 819          *
 820          *         do{
 821          *                 document.write(number+"<br>");
 822          *                 number++;
 823          *         }
 824          *         while (number<5);
 825          */
 826         boolean is_terminated = TRUE;
 827
 828         if (isKeyword (token, KEYWORD_for) || isKeyword (token, KEYWORD_while))
 829         {
 830                 readToken(token);
 831
 832                 if (isType (token, TOKEN_OPEN_PAREN))
 833                 {
 834                         /*
 835                          * Handle nameless functions, these will only
 836                          * be considered methods.
 837                          */
 838                         skipArgumentList(token, FALSE, NULL);
 839                 }
 840
 841                 if (isType (token, TOKEN_OPEN_CURLY))
 842                 {
 843                         /*
 844                          * This will be either a function or a class.
 845                          * We can only determine this by checking the body
 846                          * of the function.  If we find a "this." we know
 847                          * it is a class, otherwise it is a function.
 848                          */
 849                         parseBlock (token, parent);
 850                 }
 851                 else
 852                 {
 853                         is_terminated = parseLine(token, parent, FALSE);
 854                 }
 855         }
 856         else if (isKeyword (token, KEYWORD_do))
 857         {
 858                 readToken(token);
 859
 860                 if (isType (token, TOKEN_OPEN_CURLY))
 861                 {
 862                         /*
 863                          * This will be either a function or a class.
 864                          * We can only determine this by checking the body
 865                          * of the function.  If we find a "this." we know
 866                          * it is a class, otherwise it is a function.
 867                          */
 868                         parseBlock (token, parent);
 869                 }
 870                 else
 871                 {
 872                         is_terminated = parseLine(token, parent, FALSE);
 873                 }
 874
 875                 if (is_terminated)
 876                         readToken(token);
 877
 878                 if (isKeyword (token, KEYWORD_while))
 879                 {
 880                         readToken(token);
 881
 882                         if (isType (token, TOKEN_OPEN_PAREN))
 883                         {
 884                                 /*
 885                                  * Handle nameless functions, these will only
 886                                  * be considered methods.
 887                                  */
 888                                 skipArgumentList(token, TRUE, NULL);
 889                         }
 890                         if (! isType (token, TOKEN_SEMICOLON))
 891                                 is_terminated = FALSE;
 892                 }
 893         }
 894
 895         return is_terminated;
 896 }
 897
 898 static boolean parseIf (tokenInfo *const token, tokenInfo *const parent)
 899 {
 900         boolean read_next_token = TRUE;
 901         /*
 902          * If statements have two forms
 903          *         if ( ... )
 904          *                 one line;
 905          *
 906          *         if ( ... )
 907          *                statement;
 908          *         else
 909          *                statement
 910          *
 911          *         if ( ... ) {
 912          *                multiple;
 913          *                statements;
 914          *         }
 915          *
 916          *
 917          *         if ( ... ) {
 918          *                return elem
 919          *         }
 920          *
 921          *     This example if correctly written, but the
 922          *     else contains only 1 statement without a terminator
 923          *     since the function finishes with the closing brace.
 924          *
 925      *     function a(flag){
 926      *         if(flag)
 927      *             test(1);
 928      *         else
 929      *             test(2)
 930      *     }
 931          *
 932          * TODO:  Deal with statements that can optional end
 933          *                without a semi-colon.  Currently this messes up
 934          *                the parsing of blocks.
 935          *                Need to somehow detect this has happened, and either
 936          *                backup a token, or skip reading the next token if
 937          *                that is possible from all code locations.
 938          *
 939          */
 940
 941         readToken (token);
 942
 943         if (isKeyword (token, KEYWORD_if))
 944         {
 945                 /*
 946                  * Check for an "else if" and consume the "if"
 947                  */
 948                 readToken (token);
 949         }
 950
 951         if (isType (token, TOKEN_OPEN_PAREN))
 952         {
 953                 /*
 954                  * Handle nameless functions, these will only
 955                  * be considered methods.
 956                  */
 957                 skipArgumentList(token, FALSE, NULL);
 958         }
 959
 960         if (isType (token, TOKEN_OPEN_CURLY))
 961         {
 962                 /*
 963                  * This will be either a function or a class.
 964                  * We can only determine this by checking the body
 965                  * of the function.  If we find a "this." we know
 966                  * it is a class, otherwise it is a function.
 967                  */
 968                 parseBlock (token, parent);
 969         }
 970         else
 971         {
 972                 /* The next token should only be read if this statement had its own
 973                  * terminator */
 974                 read_next_token = findCmdTerm (token, TRUE);
 975         }
 976         return read_next_token;
 977 }
 978
 979 static void parseFunction (tokenInfo *const token)
 980 {
 981         tokenInfo *const name = newToken ();
 982         vString *const signature = vStringNew ();
 983         boolean is_class = FALSE;
 984
 985         /*
 986          * This deals with these formats
 987          *         function validFunctionTwo(a,b) {}
 988          */
 989
 990         readToken (name);
 991         /* Add scope in case this is an INNER function */
 992         addToScope(name, token->scope);
 993
 994         readToken (token);
 995         while (isType (token, TOKEN_PERIOD))
 996         {
 997                 readToken (token);
 998                 if ( isKeyword(token, KEYWORD_NONE) )
 999                 {
1000                         addContext (name, token);
1001                         readToken (token);
1002                 }
1003         }
1004
1005         if ( isType (token, TOKEN_OPEN_PAREN) )
1006                 skipArgumentList(token, FALSE, signature);
1007
1008         if ( isType (token, TOKEN_OPEN_CURLY) )
1009         {
1010                 is_class = parseBlock (token, name);
1011                 if ( is_class )
1012                         makeClassTag (name, signature);
1013                 else
1014                         makeFunctionTag (name, signature);
1015         }
1016
1017         findCmdTerm (token, FALSE);
1018
1019         vStringDelete (signature);
1020         deleteToken (name);
1021 }
1022
1023 static boolean parseBlock (tokenInfo *const token, tokenInfo *const orig_parent)
1024 {
1025         boolean is_class = FALSE;
1026         boolean read_next_token = TRUE;
1027         vString * saveScope = vStringNew ();
1028         tokenInfo *const parent = newToken ();
1029
1030         /* backup the parent token to allow calls like parseBlock(token, token) */
1031         copyToken (parent, orig_parent);
1032
1033         token->nestLevel++;
1034         /*
1035          * Make this routine a bit more forgiving.
1036          * If called on an open_curly advance it
1037          */
1038         if ( isType (token, TOKEN_OPEN_CURLY) &&
1039                         isKeyword(token, KEYWORD_NONE) )
1040                 readToken(token);
1041
1042         if (! isType (token, TOKEN_CLOSE_CURLY))
1043         {
1044                 /*
1045                  * Read until we find the closing brace,
1046                  * any nested braces will be handled within
1047                  */
1048                 do
1049                 {
1050                         read_next_token = TRUE;
1051                         if (isKeyword (token, KEYWORD_this))
1052                         {
1053                                 /*
1054                                  * Means we are inside a class and have found
1055                                  * a class, not a function
1056                                  */
1057                                 is_class = TRUE;
1058                                 vStringCopy(saveScope, token->scope);
1059                                 addToScope (token, parent->string);
1060
1061                                 /*
1062                                  * Ignore the remainder of the line
1063                                  * findCmdTerm(token);
1064                                  */
1065                                 read_next_token = parseLine (token, parent, is_class);
1066
1067                                 vStringCopy(token->scope, saveScope);
1068                         }
1069                         else if (isKeyword (token, KEYWORD_var) ||
1070                                          isKeyword (token, KEYWORD_let) ||
1071                                          isKeyword (token, KEYWORD_const))
1072                         {
1073                                 /*
1074                                  * Potentially we have found an inner function.
1075                                  * Set something to indicate the scope
1076                                  */
1077                                 vStringCopy(saveScope, token->scope);
1078                                 addToScope (token, parent->string);
1079                                 read_next_token = parseLine (token, parent, is_class);
1080                                 vStringCopy(token->scope, saveScope);
1081                         }
1082                         else if (isKeyword (token, KEYWORD_function))
1083                         {
1084                                 vStringCopy(saveScope, token->scope);
1085                                 addToScope (token, parent->string);
1086                                 parseFunction (token);
1087                                 vStringCopy(token->scope, saveScope);
1088                         }
1089                         else if (isType (token, TOKEN_OPEN_CURLY))
1090                         {
1091                                 /* Handle nested blocks */
1092                                 parseBlock (token, parent);
1093                         }
1094                         else
1095                         {
1096                                 /*
1097                                  * It is possible for a line to have no terminator
1098                                  * if the following line is a closing brace.
1099                                  * parseLine will detect this case and indicate
1100                                  * whether we should read an additional token.
1101                                  */
1102                                 read_next_token = parseLine (token, parent, is_class);
1103                         }
1104
1105                         /*
1106                          * Always read a new token unless we find a statement without
1107                          * a ending terminator
1108                          */
1109                         if( read_next_token )
1110                                 readToken(token);
1111
1112                         /*
1113                          * If we find a statement without a terminator consider the
1114                          * block finished, otherwise the stack will be off by one.
1115                          */
1116                 } while (! isType (token, TOKEN_EOF) &&
1117                                  ! isType (token, TOKEN_CLOSE_CURLY) && read_next_token);
1118         }
1119
1120         deleteToken (parent);
1121         vStringDelete(saveScope);
1122         token->nestLevel--;
1123
1124         return is_class;
1125 }
1126
1127 static boolean parseMethods (tokenInfo *const token, tokenInfo *const class)
1128 {
1129         tokenInfo *const name = newToken ();
1130         boolean has_methods = FALSE;
1131
1132         /*
1133          * This deals with these formats
1134          *         validProperty  : 2,
1135          *         validMethod    : function(a,b) {}
1136          *         'validMethod2' : function(a,b) {}
1137      *     container.dirtyTab = {'url': false, 'title':false, 'snapshot':false, '*': false}
1138          */
1139
1140         do
1141         {
1142                 readToken (token);
1143                 if (isType (token, TOKEN_CLOSE_CURLY))
1144                 {
1145                         /*
1146                          * This was most likely a variable declaration of a hash table.
1147                          * indicate there were no methods and return.
1148                          */
1149                         has_methods = FALSE;
1150                         goto cleanUp;
1151                 }
1152
1153                 if (isType (token, TOKEN_STRING) || isKeyword(token, KEYWORD_NONE))
1154                 {
1155                         copyToken(name, token);
1156
1157                         readToken (token);
1158                         if ( isType (token, TOKEN_COLON) )
1159                         {
1160                                 readToken (token);
1161                                 if ( isKeyword (token, KEYWORD_function) )
1162                                 {
1163                                         vString *const signature = vStringNew ();
1164
1165                                         readToken (token);
1166                                         if ( isType (token, TOKEN_OPEN_PAREN) )
1167                                         {
1168                                                 skipArgumentList(token, FALSE, signature);
1169                                         }
1170
1171                                         if (isType (token, TOKEN_OPEN_CURLY))
1172                                         {
1173                                                 has_methods = TRUE;
1174                                                 addToScope (name, class->string);
1175                                                 makeJsTag (name, JSTAG_METHOD, signature);
1176                                                 parseBlock (token, name);
1177
1178                                                 /*
1179                                                  * Read to the closing curly, check next
1180                                                  * token, if a comma, we must loop again
1181                                                  */
1182                                                 readToken (token);
1183                                         }
1184
1185                                         vStringDelete (signature);
1186                                 }
1187                                 else
1188                                 {
1189                                                 vString * saveScope = vStringNew ();
1190                                                 boolean has_child_methods = FALSE;
1191
1192                                                 /* skip whatever is the value */
1193                                                 while (! isType (token, TOKEN_COMMA) &&
1194                                                        ! isType (token, TOKEN_CLOSE_CURLY) &&
1195                                                        ! isType (token, TOKEN_EOF))
1196                                                 {
1197                                                         if (isType (token, TOKEN_OPEN_CURLY))
1198                                                         {
1199                                                                 vStringCopy (saveScope, token->scope);
1200                                                                 addToScope (token, class->string);
1201                                                                 has_child_methods = parseMethods (token, name);
1202                                                                 vStringCopy (token->scope, saveScope);
1203                                                                 readToken (token);
1204                                                         }
1205                                                         else if (isType (token, TOKEN_OPEN_PAREN))
1206                                                         {
1207                                                                 skipArgumentList (token, FALSE, NULL);
1208                                                         }
1209                                                         else if (isType (token, TOKEN_OPEN_SQUARE))
1210                                                         {
1211                                                                 skipArrayList (token, FALSE);
1212                                                         }
1213                                                         else
1214                                                         {
1215                                                                 readToken (token);
1216                                                         }
1217                                                 }
1218                                                 vStringDelete (saveScope);
1219
1220                                                 has_methods = TRUE;
1221                                                 addToScope (name, class->string);
1222                                                 if (has_child_methods)
1223                                                         makeJsTag (name, JSTAG_CLASS, NULL);
1224                                                 else
1225                                                         makeJsTag (name, JSTAG_PROPERTY, NULL);
1226                                 }
1227                         }
1228                 }
1229         } while ( isType(token, TOKEN_COMMA) );
1230
1231         findCmdTerm (token, FALSE);
1232
1233 cleanUp:
1234         deleteToken (name);
1235
1236         return has_methods;
1237 }
1238
1239 static boolean parseStatement (tokenInfo *const token, tokenInfo *const parent, boolean is_inside_class)
1240 {
1241         tokenInfo *const name = newToken ();
1242         tokenInfo *const secondary_name = newToken ();
1243         tokenInfo *const method_body_token = newToken ();
1244         vString * saveScope = vStringNew ();
1245         boolean is_class = FALSE;
1246         boolean is_var = FALSE;
1247         boolean is_const = FALSE;
1248         boolean is_terminated = TRUE;
1249         boolean is_global = FALSE;
1250         boolean has_methods = FALSE;
1251         vString *       fulltag;
1252
1253         vStringClear(saveScope);
1254         /*
1255          * Functions can be named or unnamed.
1256          * This deals with these formats:
1257          * Function
1258          *         validFunctionOne = function(a,b) {}
1259          *         testlib.validFunctionFive = function(a,b) {}
1260          *         var innerThree = function(a,b) {}
1261          *         var innerFour = (a,b) {}
1262          *         var D2 = secondary_fcn_name(a,b) {}
1263          *         var D3 = new Function("a", "b", "return a+b;");
1264          * Class
1265          *         testlib.extras.ValidClassOne = function(a,b) {
1266          *                 this.a = a;
1267          *         }
1268          * Class Methods
1269          *         testlib.extras.ValidClassOne.prototype = {
1270          *                 'validMethodOne' : function(a,b) {},
1271          *                 'validMethodTwo' : function(a,b) {}
1272          *         }
1273      *     ValidClassTwo = function ()
1274      *     {
1275      *         this.validMethodThree = function() {}
1276      *         // unnamed method
1277      *         this.validMethodFour = () {}
1278      *     }
1279          *         Database.prototype.validMethodThree = Database_getTodaysDate;
1280          */
1281
1282         if ( is_inside_class )
1283                 is_class = TRUE;
1284         /*
1285          * var can precede an inner function
1286          */
1287         if ( isKeyword(token, KEYWORD_var) ||
1288                  isKeyword(token, KEYWORD_let) ||
1289                  isKeyword(token, KEYWORD_const) )
1290         {
1291                 is_const = isKeyword(token, KEYWORD_const);
1292                 /*
1293                  * Only create variables for global scope
1294                  */
1295                 if ( token->nestLevel == 0 )
1296                 {
1297                         is_global = TRUE;
1298                 }
1299                 readToken(token);
1300         }
1301
1302         if ( isKeyword(token, KEYWORD_this) )
1303         {
1304                 readToken(token);
1305                 if (isType (token, TOKEN_PERIOD))
1306                 {
1307                         readToken(token);
1308                 }
1309         }
1310
1311         copyToken(name, token);
1312
1313         while (! isType (token, TOKEN_CLOSE_CURLY) &&
1314                ! isType (token, TOKEN_SEMICOLON)   &&
1315                ! isType (token, TOKEN_EQUAL_SIGN)  &&
1316                ! isType (token, TOKEN_EOF))
1317         {
1318                 if (isType (token, TOKEN_OPEN_CURLY))
1319                         parseBlock (token, parent);
1320
1321                 /* Potentially the name of the function */
1322                 readToken (token);
1323                 if (isType (token, TOKEN_PERIOD))
1324                 {
1325                         /*
1326                          * Cannot be a global variable is it has dot references in the name
1327                          */
1328                         is_global = FALSE;
1329                         do
1330                         {
1331                                 readToken (token);
1332                                 if ( isKeyword(token, KEYWORD_NONE) )
1333                                 {
1334                                         if ( is_class )
1335                                         {
1336                                                 addToScope(token, name->string);
1337                                         }
1338                                         else
1339                                                 addContext (name, token);
1340
1341                                         readToken (token);
1342                                 }
1343                                 else if ( isKeyword(token, KEYWORD_prototype) )
1344                                 {
1345                                         /*
1346                                          * When we reach the "prototype" tag, we infer:
1347                                          *     "BindAgent" is a class
1348                                          *     "build"     is a method
1349                                          *
1350                                          * function BindAgent( repeatableIdName, newParentIdName ) {
1351                                          * }
1352                                          *
1353                                          * CASE 1
1354                                          * Specified function name: "build"
1355                                          *     BindAgent.prototype.build = function( mode ) {
1356                                          *        maybe parse nested functions
1357                                          *     }
1358                                          *
1359                                          * CASE 2
1360                                          * Prototype listing
1361                                          *     ValidClassOne.prototype = {
1362                                          *         'validMethodOne' : function(a,b) {},
1363                                          *         'validMethodTwo' : function(a,b) {}
1364                                          *     }
1365                                          *
1366                                          */
1367                                         makeClassTag (name, NULL);
1368                                         is_class = TRUE;
1369
1370                                         /*
1371                                          * There should a ".function_name" next.
1372                                          */
1373                                         readToken (token);
1374                                         if (isType (token, TOKEN_PERIOD))
1375                                         {
1376                                                 /*
1377                                                  * Handle CASE 1
1378                                                  */
1379                                                 readToken (token);
1380                                                 if ( isKeyword(token, KEYWORD_NONE) )
1381                                                 {
1382                                                         vString *const signature = vStringNew ();
1383
1384                                                         vStringCopy(saveScope, token->scope);
1385                                                         addToScope(token, name->string);
1386
1387                                                         readToken (method_body_token);
1388                                                         vStringCopy (method_body_token->scope, token->scope);
1389
1390                                                         while (! isType (method_body_token, TOKEN_SEMICOLON) &&
1391                                                                ! isType (method_body_token, TOKEN_CLOSE_CURLY) &&
1392                                                                ! isType (method_body_token, TOKEN_OPEN_CURLY) &&
1393                                                                ! isType (method_body_token, TOKEN_EOF))
1394                                                         {
1395                                                                 if ( isType (method_body_token, TOKEN_OPEN_PAREN) )
1396                                                                         skipArgumentList(method_body_token, FALSE,
1397                                                                                                          vStringLength (signature) == 0 ? signature : NULL);
1398                                                                 else
1399                                                                         readToken (method_body_token);
1400                                                         }
1401
1402                                                         makeJsTag (token, JSTAG_METHOD, signature);
1403                                                         vStringDelete (signature);
1404
1405                                                         if ( isType (method_body_token, TOKEN_OPEN_CURLY))
1406                                                         {
1407                                                                 parseBlock (method_body_token, token);
1408                                                                 is_terminated = TRUE;
1409                                                         }
1410                                                         else
1411                                                                 is_terminated = isType (method_body_token, TOKEN_SEMICOLON);
1412                                                         goto cleanUp;
1413                                                 }
1414                                         }
1415                                         else if (isType (token, TOKEN_EQUAL_SIGN))
1416                                         {
1417                                                 readToken (token);
1418                                                 if (isType (token, TOKEN_OPEN_CURLY))
1419                                                 {
1420                                                         /*
1421                                                          * Handle CASE 2
1422                                                          *
1423                                                          * Creates tags for each of these class methods
1424                                                          *     ValidClassOne.prototype = {
1425                                                          *         'validMethodOne' : function(a,b) {},
1426                                                          *         'validMethodTwo' : function(a,b) {}
1427                                                          *     }
1428                                                          */
1429                                                         parseMethods(token, name);
1430                                                         /*
1431                                                          * Find to the end of the statement
1432                                                          */
1433                                                         findCmdTerm (token, FALSE);
1434                                                         token->ignoreTag = FALSE;
1435                                                         is_terminated = TRUE;
1436                                                         goto cleanUp;
1437                                                 }
1438                                         }
1439                                 }
1440                                 else
1441                                         readToken (token);
1442                         } while (isType (token, TOKEN_PERIOD));
1443                 }
1444
1445                 if ( isType (token, TOKEN_OPEN_PAREN) )
1446                         skipArgumentList(token, FALSE, NULL);
1447
1448                 if ( isType (token, TOKEN_OPEN_SQUARE) )
1449                         skipArrayList(token, FALSE);
1450
1451                 /*
1452                 if ( isType (token, TOKEN_OPEN_CURLY) )
1453                 {
1454                         is_class = parseBlock (token, name);
1455                 }
1456                 */
1457         }
1458
1459         if ( isType (token, TOKEN_CLOSE_CURLY) )
1460         {
1461                 /*
1462                  * Reaching this section without having
1463                  * processed an open curly brace indicates
1464                  * the statement is most likely not terminated.
1465                  */
1466                 is_terminated = FALSE;
1467                 goto cleanUp;
1468         }
1469
1470         if ( isType (token, TOKEN_SEMICOLON) )
1471         {
1472                 /*
1473                  * Only create variables for global scope
1474                  */
1475                 if ( token->nestLevel == 0 && is_global )
1476                 {
1477                         /*
1478                          * Handles this syntax:
1479                          *         var g_var2;
1480                          */
1481                         if (isType (token, TOKEN_SEMICOLON))
1482                                 makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL);
1483                 }
1484                 /*
1485                  * Statement has ended.
1486                  * This deals with calls to functions, like:
1487                  *     alert(..);
1488                  */
1489                 goto cleanUp;
1490         }
1491
1492         if ( isType (token, TOKEN_EQUAL_SIGN) )
1493         {
1494                 int parenDepth = 0;
1495
1496                 readToken (token);
1497
1498                 /* rvalue might be surrounded with parentheses */
1499                 while (isType (token, TOKEN_OPEN_PAREN))
1500                 {
1501                         parenDepth++;
1502                         readToken (token);
1503                 }
1504
1505                 if ( isKeyword (token, KEYWORD_function) )
1506                 {
1507                         vString *const signature = vStringNew ();
1508
1509                         readToken (token);
1510
1511                         if ( isKeyword (token, KEYWORD_NONE) &&
1512                                         ! isType (token, TOKEN_OPEN_PAREN) )
1513                         {
1514                                 /*
1515                                  * Functions of this format:
1516                                  *         var D2A = function theAdd(a, b)
1517                                  *         {
1518                                  *                return a+b;
1519                                  *         }
1520                                  * Are really two separate defined functions and
1521                                  * can be referenced in two ways:
1522                                  *         alert( D2A(1,2) );                     // produces 3
1523                                  *         alert( theAdd(1,2) );                  // also produces 3
1524                                  * So it must have two tags:
1525                                  *         D2A
1526                                  *         theAdd
1527                                  * Save the reference to the name for later use, once
1528                                  * we have established this is a valid function we will
1529                                  * create the secondary reference to it.
1530                                  */
1531                                 copyToken(secondary_name, token);
1532                                 readToken (token);
1533                         }
1534
1535                         if ( isType (token, TOKEN_OPEN_PAREN) )
1536                                 skipArgumentList(token, FALSE, signature);
1537
1538                         if (isType (token, TOKEN_OPEN_CURLY))
1539                         {
1540                                 /*
1541                                  * This will be either a function or a class.
1542                                  * We can only determine this by checking the body
1543                                  * of the function.  If we find a "this." we know
1544                                  * it is a class, otherwise it is a function.
1545                                  */
1546                                 if ( is_inside_class )
1547                                 {
1548                                         makeJsTag (name, JSTAG_METHOD, signature);
1549                                         if ( vStringLength(secondary_name->string) > 0 )
1550                                                 makeFunctionTag (secondary_name, signature);
1551                                         parseBlock (token, name);
1552                                 }
1553                                 else
1554                                 {
1555                                         is_class = parseBlock (token, name);
1556                                         if ( is_class )
1557                                                 makeClassTag (name, signature);
1558                                         else
1559                                                 makeFunctionTag (name, signature);
1560
1561                                         if ( vStringLength(secondary_name->string) > 0 )
1562                                                 makeFunctionTag (secondary_name, signature);
1563                                 }
1564                         }
1565
1566                         vStringDelete (signature);
1567                 }
1568                 else if (isType (token, TOKEN_OPEN_CURLY))
1569                 {
1570                         /*
1571                          * Creates tags for each of these class methods
1572                          *     ValidClassOne.prototype = {
1573                          *         'validMethodOne' : function(a,b) {},
1574                          *         'validMethodTwo' : function(a,b) {}
1575                          *     }
1576                          * Or checks if this is a hash variable.
1577                          *     var z = {};
1578                          */
1579                         has_methods = parseMethods(token, name);
1580                         if (has_methods)
1581                                 makeJsTag (name, JSTAG_CLASS, NULL);
1582                         else
1583                         {
1584                                 /*
1585                                  * Only create variables for global scope
1586                          */
1587                                 if ( token->nestLevel == 0 && is_global )
1588                                 {
1589                                         /*
1590                                          * A pointer can be created to the function.
1591                                          * If we recognize the function/class name ignore the variable.
1592                                          * This format looks identical to a variable definition.
1593                                          * A variable defined outside of a block is considered
1594                                          * a global variable:
1595                                          *         var g_var1 = 1;
1596                                          *         var g_var2;
1597                                          * This is not a global variable:
1598                                          *         var g_var = function;
1599                                          * This is a global variable:
1600                                          *         var g_var = different_var_name;
1601                                          */
1602                                         fulltag = vStringNew ();
1603                                         if (vStringLength (token->scope) > 0)
1604                                         {
1605                                                 vStringCopy(fulltag, token->scope);
1606                                                 vStringCatS (fulltag, ".");
1607                                                 vStringCatS (fulltag, vStringValue(token->string));
1608                                         }
1609                                         else
1610                                         {
1611                                                 vStringCopy(fulltag, token->string);
1612                                         }
1613                                         vStringTerminate(fulltag);
1614                                         if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) &&
1615                                                         ! stringListHas(ClassNames, vStringValue (fulltag)) )
1616                                         {
1617                                                 makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL);
1618                                         }
1619                                         vStringDelete (fulltag);
1620                                 }
1621                         }
1622                         if (isType (token, TOKEN_CLOSE_CURLY))
1623                         {
1624                                 /*
1625                                  * Assume the closing parantheses terminates
1626                                  * this statements.
1627                                  */
1628                                 is_terminated = TRUE;
1629                         }
1630                 }
1631                 else if (isKeyword (token, KEYWORD_new))
1632                 {
1633                         readToken (token);
1634                         is_var = isType (token, TOKEN_IDENTIFIER);
1635                         if ( isKeyword (token, KEYWORD_function) ||
1636                                         isKeyword (token, KEYWORD_capital_function) ||
1637                                         isKeyword (token, KEYWORD_capital_object) ||
1638                                         is_var )
1639                         {
1640                                 if ( isKeyword (token, KEYWORD_capital_object) )
1641                                         is_class = TRUE;
1642
1643                                 readToken (token);
1644                                 if ( isType (token, TOKEN_OPEN_PAREN) )
1645                                         skipArgumentList(token, TRUE, NULL);
1646
1647                                 if (isType (token, TOKEN_SEMICOLON))
1648                                 {
1649                                         if ( token->nestLevel == 0 )
1650                                         {
1651                                                 if ( is_var )
1652                                                 {
1653                                                         makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL);
1654                                                 }
1655                                                 else
1656                                                 {
1657                                                         if ( is_class )
1658                                                         {
1659                                                                 makeClassTag (name, NULL);
1660                                                         } else {
1661                                                                 /* FIXME: we cannot really get a meaningful
1662                                                                  * signature from a `new Function()` call,
1663                                                                  * so for now just don't set any */
1664                                                                 makeFunctionTag (name, NULL);
1665                                                         }
1666                                                 }
1667                                         }
1668                                 }
1669                                 else if (isType (token, TOKEN_CLOSE_CURLY))
1670                                         is_terminated = FALSE;
1671                         }
1672                 }
1673                 else if (isKeyword (token, KEYWORD_NONE))
1674                 {
1675                         /*
1676                          * Only create variables for global scope
1677                          */
1678                         if ( token->nestLevel == 0 && is_global )
1679                         {
1680                                 /*
1681                                  * A pointer can be created to the function.
1682                                  * If we recognize the function/class name ignore the variable.
1683                                  * This format looks identical to a variable definition.
1684                                  * A variable defined outside of a block is considered
1685                                  * a global variable:
1686                                  *         var g_var1 = 1;
1687                                  *         var g_var2;
1688                                  * This is not a global variable:
1689                                  *         var g_var = function;
1690                                  * This is a global variable:
1691                                  *         var g_var = different_var_name;
1692                                  */
1693                                 fulltag = vStringNew ();
1694                                 if (vStringLength (token->scope) > 0)
1695                                 {
1696                                         vStringCopy(fulltag, token->scope);
1697                                         vStringCatS (fulltag, ".");
1698                                         vStringCatS (fulltag, vStringValue(token->string));
1699                                 }
1700                                 else
1701                                 {
1702                                         vStringCopy(fulltag, token->string);
1703                                 }
1704                                 vStringTerminate(fulltag);
1705                                 if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) &&
1706                                                 ! stringListHas(ClassNames, vStringValue (fulltag)) )
1707                                 {
1708                                         makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL);
1709                                 }
1710                                 vStringDelete (fulltag);
1711                         }
1712                 }
1713
1714                 if (parenDepth > 0)
1715                 {
1716                         while (parenDepth > 0 && ! isType (token, TOKEN_EOF))
1717                         {
1718                                 if (isType (token, TOKEN_OPEN_PAREN))
1719                                         parenDepth++;
1720                                 else if (isType (token, TOKEN_CLOSE_PAREN))
1721                                         parenDepth--;
1722                                 readTokenFull (token, TRUE, NULL);
1723                         }
1724                         if (isType (token, TOKEN_CLOSE_CURLY))
1725                                 is_terminated = FALSE;
1726                 }
1727         }
1728
1729         /* if we aren't already at the cmd end, advance to it and check whether
1730          * the statement was terminated */
1731         if (! isType (token, TOKEN_CLOSE_CURLY) &&
1732             ! isType (token, TOKEN_SEMICOLON))
1733         {
1734                 /*
1735                  * Statements can be optionally terminated in the case of
1736                  * statement prior to a close curly brace as in the
1737                  * document.write line below:
1738                  *
1739                  * function checkForUpdate() {
1740                  *         if( 1==1 ) {
1741                  *                 document.write("hello from checkForUpdate<br>")
1742                  *         }
1743                  *         return 1;
1744                  * }
1745                  */
1746                 is_terminated = findCmdTerm (token, TRUE);
1747         }
1748
1749 cleanUp:
1750         vStringCopy(token->scope, saveScope);
1751         deleteToken (name);
1752         deleteToken (secondary_name);
1753         deleteToken (method_body_token);
1754         vStringDelete(saveScope);
1755
1756         return is_terminated;
1757 }
1758
1759 static void parseUI5 (tokenInfo *const token)
1760 {
1761         tokenInfo *const name = newToken ();
1762         /*
1763          * SAPUI5 is built on top of jQuery.
1764          * It follows a standard format:
1765          *     sap.ui.controller("id.of.controller", {
1766          *         method_name : function... {
1767          *         },
1768          *
1769          *         method_name : function ... {
1770          *         }
1771          *     }
1772          *
1773          * Handle the parsing of the initial controller (and the
1774          * same for "view") and then allow the methods to be
1775          * parsed as usual.
1776          */
1777
1778         readToken (token);
1779
1780         if (isType (token, TOKEN_PERIOD))
1781         {
1782                 readToken (token);
1783                 while (! isType (token, TOKEN_OPEN_PAREN) &&
1784                            ! isType (token, TOKEN_EOF))
1785                 {
1786                         readToken (token);
1787                 }
1788                 readToken (token);
1789
1790                 if (isType (token, TOKEN_STRING))
1791                 {
1792                         copyToken(name, token);
1793                         readToken (token);
1794                 }
1795
1796                 if (isType (token, TOKEN_COMMA))
1797                         readToken (token);
1798
1799                 do
1800                 {
1801                         parseMethods (token, name);
1802                 } while (! isType (token, TOKEN_CLOSE_CURLY) &&
1803                                  ! isType (token, TOKEN_EOF));
1804         }
1805
1806         deleteToken (name);
1807 }
1808
1809 static boolean parseLine (tokenInfo *const token, tokenInfo *const parent, boolean is_inside_class)
1810 {
1811         boolean is_terminated = TRUE;
1812         /*
1813          * Detect the common statements, if, while, for, do, ...
1814          * This is necessary since the last statement within a block "{}"
1815          * can be optionally terminated.
1816          *
1817          * If the statement is not terminated, we need to tell
1818          * the calling routine to prevent reading an additional token
1819          * looking for the end of the statement.
1820          */
1821
1822         if (isType(token, TOKEN_KEYWORD))
1823         {
1824                 switch (token->keyword)
1825                 {
1826                         case KEYWORD_for:
1827                         case KEYWORD_while:
1828                         case KEYWORD_do:
1829                                 is_terminated = parseLoop (token, parent);
1830                                 break;
1831                         case KEYWORD_if:
1832                         case KEYWORD_else:
1833                         case KEYWORD_try:
1834                         case KEYWORD_catch:
1835                         case KEYWORD_finally:
1836                                 /* Common semantics */
1837                                 is_terminated = parseIf (token, parent);
1838                                 break;
1839                         case KEYWORD_switch:
1840                                 parseSwitch (token);
1841                                 break;
1842                         case KEYWORD_return:
1843                                 is_terminated = findCmdTerm (token, TRUE);
1844                                 break;
1845                         default:
1846                                 is_terminated = parseStatement (token, parent, is_inside_class);
1847                                 break;
1848                 }
1849         }
1850         else
1851         {
1852                 /*
1853                  * Special case where single line statements may not be
1854                  * SEMICOLON terminated.  parseBlock needs to know this
1855                  * so that it does not read the next token.
1856                  */
1857                 is_terminated = parseStatement (token, parent, is_inside_class);
1858         }
1859         return is_terminated;
1860 }
1861
1862 static void parseJsFile (tokenInfo *const token)
1863 {
1864         do
1865         {
1866                 readToken (token);
1867
1868                 if (isType (token, TOKEN_KEYWORD) && token->keyword == KEYWORD_function)
1869                         parseFunction (token);
1870                 else if (isType (token, TOKEN_KEYWORD) && token->keyword == KEYWORD_sap)
1871                         parseUI5 (token);
1872                 else
1873                         parseLine (token, token, FALSE);
1874         } while (! isType (token, TOKEN_EOF));
1875 }
1876
1877 static void initialize (const langType language)
1878 {
1879         Assert (ARRAY_SIZE (JsKinds) == JSTAG_COUNT);
1880         Lang_js = language;
1881 }
1882
1883 static void findJsTags (void)
1884 {
1885         tokenInfo *const token = newToken ();
1886
1887         ClassNames = stringListNew ();
1888         FunctionNames = stringListNew ();
1889         LastTokenType = TOKEN_UNDEFINED;
1890
1891         parseJsFile (token);
1892
1893         stringListDelete (ClassNames);
1894         stringListDelete (FunctionNames);
1895         ClassNames = NULL;
1896         FunctionNames = NULL;
1897         deleteToken (token);
1898 }
1899
1900 /* Create parser definition structure */
1901 extern parserDefinition* JavaScriptParser (void)
1902 {
1903         static const char *const extensions [] = { "js", NULL };
1904         parserDefinition *const def = parserNew ("JavaScript");
1905         def->extensions = extensions;
1906         /*
1907          * New definitions for parsing instead of regex
1908          */
1909         def->kinds              = JsKinds;
1910         def->kindCount  = ARRAY_SIZE (JsKinds);
1911         def->parser             = findJsTags;
1912         def->initialize = initialize;
1913         def->keywordTable = JsKeywordTable;
1914         def->keywordCount = ARRAY_SIZE (JsKeywordTable);
1915
1916         return def;
1917 }
1918 /* vi:set tabstop=4 shiftwidth=4 noexpandtab: */