ctags/parsers/js.c

   1 /*
   2  *       Copyright (c) 2003, Darren Hiebert
   3  *
   4  *       This source code is released for free distribution under the terms of the
   5  *       GNU General Public License.
   6  *
   7  *       This module contains functions for generating tags for JavaScript language
   8  *       files.
   9  *
  10  *       This is a good reference for different forms of the function statement:
  11  *               http://www.permadi.com/tutorial/jsFunc/
  12  *   Another good reference:
  13  *       http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide
  14  */
  15
  16 /*
  17  *       INCLUDE FILES
  18  */
  19 #include "general.h"    /* must always come first */
  20 #include <ctype.h>      /* to define isalpha () */
  21 #include <string.h>
  22 #ifdef DEBUG
  23 #include <stdio.h>
  24 #endif
  25
  26 #include "mio.h"
  27 #include "keyword.h"
  28 #include "parse.h"
  29 #include "read.h"
  30 #include "main.h"
  31 #include "vstring.h"
  32
  33 /*
  34  *       MACROS
  35  */
  36 #define isType(token,t)         (boolean) ((token)->type == (t))
  37 #define isKeyword(token,k)      (boolean) ((token)->keyword == (k))
  38
  39 /*
  40  *       DATA DECLARATIONS
  41  */
  42
  43 /*
  44  * Tracks class and function names already created
  45  */
  46 static stringList *ClassNames;
  47 static stringList *FunctionNames;
  48
  49 /*      Used to specify type of keyword.
  50 */
  51 typedef enum eKeywordId {
  52         KEYWORD_NONE = -1,
  53         KEYWORD_function,
  54         KEYWORD_capital_function,
  55         KEYWORD_capital_object,
  56         KEYWORD_prototype,
  57         KEYWORD_var,
  58         KEYWORD_let,
  59         KEYWORD_const,
  60         KEYWORD_new,
  61         KEYWORD_this,
  62         KEYWORD_for,
  63         KEYWORD_while,
  64         KEYWORD_do,
  65         KEYWORD_if,
  66         KEYWORD_else,
  67         KEYWORD_switch,
  68         KEYWORD_try,
  69         KEYWORD_catch,
  70         KEYWORD_finally,
  71         KEYWORD_sap,
  72         KEYWORD_return
  73 } keywordId;
  74
  75 /*      Used to determine whether keyword is valid for the token language and
  76  *      what its ID is.
  77  */
  78 typedef struct sKeywordDesc {
  79         const char *name;
  80         keywordId id;
  81 } keywordDesc;
  82
  83 typedef enum eTokenType {
  84         TOKEN_UNDEFINED,
  85         TOKEN_EOF,
  86         TOKEN_CHARACTER,
  87         TOKEN_CLOSE_PAREN,
  88         TOKEN_SEMICOLON,
  89         TOKEN_COLON,
  90         TOKEN_COMMA,
  91         TOKEN_KEYWORD,
  92         TOKEN_OPEN_PAREN,
  93         TOKEN_OPERATOR,
  94         TOKEN_IDENTIFIER,
  95         TOKEN_STRING,
  96         TOKEN_PERIOD,
  97         TOKEN_OPEN_CURLY,
  98         TOKEN_CLOSE_CURLY,
  99         TOKEN_EQUAL_SIGN,
 100         TOKEN_FORWARD_SLASH,
 101         TOKEN_OPEN_SQUARE,
 102         TOKEN_CLOSE_SQUARE,
 103         TOKEN_REGEXP,
 104         TOKEN_POSTFIX_OPERATOR,
 105         TOKEN_BINARY_OPERATOR
 106 } tokenType;
 107
 108 typedef struct sTokenInfo {
 109         tokenType               type;
 110         keywordId               keyword;
 111         vString *               string;
 112         vString *               scope;
 113         unsigned long   lineNumber;
 114         MIOPos                  filePosition;
 115         int                             nestLevel;
 116         boolean                 ignoreTag;
 117 } tokenInfo;
 118
 119 /*
 120  *      DATA DEFINITIONS
 121  */
 122
 123 static tokenType LastTokenType;
 124
 125 static langType Lang_js;
 126
 127 typedef enum {
 128         JSTAG_FUNCTION,
 129         JSTAG_CLASS,
 130         JSTAG_METHOD,
 131         JSTAG_PROPERTY,
 132         JSTAG_CONSTANT,
 133         JSTAG_VARIABLE,
 134         JSTAG_COUNT
 135 } jsKind;
 136
 137 static kindOption JsKinds [] = {
 138         { TRUE,  'f', "function",         "functions"              },
 139         { TRUE,  'c', "class",            "classes"                        },
 140         { TRUE,  'm', "method",           "methods"                        },
 141         { TRUE,  'p', "property",         "properties"             },
 142         { TRUE,  'C', "constant",         "constants"              },
 143         { TRUE,  'v', "variable",         "global variables"   }
 144 };
 145
 146 static const keywordDesc JsKeywordTable [] = {
 147         /* keyword              keyword ID */
 148         { "function",   KEYWORD_function                        },
 149         { "Function",   KEYWORD_capital_function        },
 150         { "Object",             KEYWORD_capital_object          },
 151         { "prototype",  KEYWORD_prototype                       },
 152         { "var",                KEYWORD_var                                     },
 153         { "let",                KEYWORD_let                                     },
 154         { "const",              KEYWORD_const                           },
 155         { "new",                KEYWORD_new                                     },
 156         { "this",               KEYWORD_this                            },
 157         { "for",                KEYWORD_for                                     },
 158         { "while",              KEYWORD_while                           },
 159         { "do",                 KEYWORD_do                                      },
 160         { "if",                 KEYWORD_if                                      },
 161         { "else",               KEYWORD_else                            },
 162         { "switch",             KEYWORD_switch                          },
 163         { "try",                KEYWORD_try                                     },
 164         { "catch",              KEYWORD_catch                           },
 165         { "finally",    KEYWORD_finally                         },
 166         { "sap",            KEYWORD_sap                                 },
 167         { "return",             KEYWORD_return                          }
 168 };
 169
 170 /*
 171  *       FUNCTION DEFINITIONS
 172  */
 173
 174 /* Recursive functions */
 175 static void parseFunction (tokenInfo *const token);
 176 static boolean parseBlock (tokenInfo *const token, tokenInfo *const orig_parent);
 177 static boolean parseLine (tokenInfo *const token, tokenInfo *const parent, boolean is_inside_class);
 178 static void parseUI5 (tokenInfo *const token);
 179
 180 static boolean isIdentChar (const int c)
 181 {
 182         return (boolean)
 183                 (isalpha (c) || isdigit (c) || c == '$' ||
 184                  c == '@' || c == '_' || c == '#');
 185 }
 186
 187 static void buildJsKeywordHash (void)
 188 {
 189         const size_t count = sizeof (JsKeywordTable) /
 190                 sizeof (JsKeywordTable [0]);
 191         size_t i;
 192         for (i = 0      ;  i < count  ;  ++i)
 193         {
 194                 const keywordDesc* const p = &JsKeywordTable [i];
 195                 addKeyword (p->name, Lang_js, (int) p->id);
 196         }
 197 }
 198
 199 static tokenInfo *newToken (void)
 200 {
 201         tokenInfo *const token = xMalloc (1, tokenInfo);
 202
 203         token->type                     = TOKEN_UNDEFINED;
 204         token->keyword          = KEYWORD_NONE;
 205         token->string           = vStringNew ();
 206         token->scope            = vStringNew ();
 207         token->nestLevel        = 0;
 208         token->ignoreTag        = FALSE;
 209         token->lineNumber   = getSourceLineNumber ();
 210         token->filePosition = getInputFilePosition ();
 211
 212         return token;
 213 }
 214
 215 static void deleteToken (tokenInfo *const token)
 216 {
 217         vStringDelete (token->string);
 218         vStringDelete (token->scope);
 219         eFree (token);
 220 }
 221
 222 /*
 223  *       Tag generation functions
 224  */
 225
 226 static void makeJsTag (tokenInfo *const token, const jsKind kind, vString *const signature)
 227 {
 228         if (JsKinds [kind].enabled && ! token->ignoreTag )
 229         {
 230                 const char *name = vStringValue (token->string);
 231                 vString *fullscope = vStringNewCopy (token->scope);
 232                 const char *p;
 233                 tagEntryInfo e;
 234
 235                 if ((p = strrchr (name, '.')) != NULL)
 236                 {
 237                         if (vStringLength (fullscope) > 0)
 238                                 vStringPut (fullscope, '.');
 239                         vStringNCatS (fullscope, name, p - name);
 240                         name = p + 1;
 241                 }
 242
 243                 initTagEntry (&e, name);
 244
 245                 e.lineNumber   = token->lineNumber;
 246                 e.filePosition = token->filePosition;
 247                 e.kindName         = JsKinds [kind].name;
 248                 e.kind             = JsKinds [kind].letter;
 249
 250                 if ( vStringLength(fullscope) > 0 )
 251                 {
 252                         jsKind parent_kind = JSTAG_CLASS;
 253
 254                         /* if we're creating a function (and not a method),
 255                          * guess we're inside another function */
 256                         if (kind == JSTAG_FUNCTION)
 257                                 parent_kind = JSTAG_FUNCTION;
 258
 259                         e.extensionFields.scope[0] = JsKinds [parent_kind].name;
 260                         e.extensionFields.scope[1] = vStringValue (fullscope);
 261                 }
 262
 263                 if (signature && vStringLength(signature))
 264                 {
 265                         size_t i;
 266                         /* sanitize signature by replacing all control characters with a
 267                          * space (because it's simple).
 268                          * there should never be any junk in a valid signature, but who
 269                          * knows what the user wrote and CTags doesn't cope well with weird
 270                          * characters. */
 271                         for (i = 0; i < signature->length; i++)
 272                         {
 273                                 unsigned char c = (unsigned char) signature->buffer[i];
 274                                 if (c < 0x20 /* below space */ || c == 0x7F /* DEL */)
 275                                         signature->buffer[i] = ' ';
 276                         }
 277                         e.extensionFields.arglist = vStringValue(signature);
 278                 }
 279
 280                 makeTagEntry (&e);
 281                 vStringDelete (fullscope);
 282         }
 283 }
 284
 285 static void makeClassTag (tokenInfo *const token, vString *const signature)
 286 {
 287         vString *       fulltag;
 288
 289         if ( ! token->ignoreTag )
 290         {
 291                 fulltag = vStringNew ();
 292                 if (vStringLength (token->scope) > 0)
 293                 {
 294                         vStringCopy(fulltag, token->scope);
 295                         vStringCatS (fulltag, ".");
 296                         vStringCatS (fulltag, vStringValue(token->string));
 297                 }
 298                 else
 299                 {
 300                         vStringCopy(fulltag, token->string);
 301                 }
 302                 vStringTerminate(fulltag);
 303                 if ( ! stringListHas(ClassNames, vStringValue (fulltag)) )
 304                 {
 305                         stringListAdd (ClassNames, vStringNewCopy (fulltag));
 306                         makeJsTag (token, JSTAG_CLASS, signature);
 307                 }
 308                 vStringDelete (fulltag);
 309         }
 310 }
 311
 312 static void makeFunctionTag (tokenInfo *const token, vString *const signature)
 313 {
 314         vString *       fulltag;
 315
 316         if ( ! token->ignoreTag )
 317         {
 318                 fulltag = vStringNew ();
 319                 if (vStringLength (token->scope) > 0)
 320                 {
 321                         vStringCopy(fulltag, token->scope);
 322                         vStringCatS (fulltag, ".");
 323                         vStringCatS (fulltag, vStringValue(token->string));
 324                 }
 325                 else
 326                 {
 327                         vStringCopy(fulltag, token->string);
 328                 }
 329                 vStringTerminate(fulltag);
 330                 if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) )
 331                 {
 332                         stringListAdd (FunctionNames, vStringNewCopy (fulltag));
 333                         makeJsTag (token, JSTAG_FUNCTION, signature);
 334                 }
 335                 vStringDelete (fulltag);
 336         }
 337 }
 338
 339 /*
 340  *       Parsing functions
 341  */
 342
 343 static int skipToCharacter (const int c)
 344 {
 345         int d;
 346         do
 347         {
 348                 d = fileGetc ();
 349         } while (d != EOF  &&  d != c);
 350         return d;
 351 }
 352
 353 static void parseString (vString *const string, const int delimiter)
 354 {
 355         boolean end = FALSE;
 356         while (! end)
 357         {
 358                 int c = fileGetc ();
 359                 if (c == EOF)
 360                         end = TRUE;
 361                 else if (c == '\\')
 362                 {
 363                         /* Eat the escape sequence (\", \', etc).  We properly handle
 364                          * <LineContinuation> by eating a whole \<CR><LF> not to see <LF>
 365                          * as an unescaped character, which is invalid and handled below.
 366                          * Also, handle the fact that <LineContinuation> produces an empty
 367                          * sequence.
 368                          * See ECMA-262 7.8.4 */
 369                         c = fileGetc();
 370                         if (c != '\r' && c != '\n')
 371                                 vStringPut(string, c);
 372                         else if (c == '\r')
 373                         {
 374                                 c = fileGetc();
 375                                 if (c != '\n')
 376                                         fileUngetc (c);
 377                         }
 378                 }
 379                 else if (c == delimiter)
 380                         end = TRUE;
 381                 else if (c == '\r' || c == '\n')
 382                 {
 383                         /* those are invalid when not escaped */
 384                         end = TRUE;
 385                         /* we don't want to eat the newline itself to let the automatic
 386                          * semicolon insertion code kick in */
 387                         fileUngetc (c);
 388                 }
 389                 else
 390                         vStringPut (string, c);
 391         }
 392         vStringTerminate (string);
 393 }
 394
 395 static void parseRegExp (void)
 396 {
 397         int c;
 398         boolean in_range = FALSE;
 399
 400         do
 401         {
 402                 c = fileGetc ();
 403                 if (! in_range && c == '/')
 404                 {
 405                         do /* skip flags */
 406                         {
 407                                 c = fileGetc ();
 408                         } while (isalpha (c));
 409                         fileUngetc (c);
 410                         break;
 411                 }
 412                 else if (c == '\\')
 413                         c = fileGetc (); /* skip next character */
 414                 else if (c == '[')
 415                         in_range = TRUE;
 416                 else if (c == ']')
 417                         in_range = FALSE;
 418         } while (c != EOF);
 419 }
 420
 421 /*      Read a C identifier beginning with "firstChar" and places it into
 422  *      "name".
 423  */
 424 static void parseIdentifier (vString *const string, const int firstChar)
 425 {
 426         int c = firstChar;
 427         Assert (isIdentChar (c));
 428         do
 429         {
 430                 vStringPut (string, c);
 431                 c = fileGetc ();
 432         } while (isIdentChar (c));
 433         vStringTerminate (string);
 434         fileUngetc (c);         /* unget non-identifier character */
 435 }
 436
 437 static keywordId analyzeToken (vString *const name)
 438 {
 439         vString *keyword = vStringNew ();
 440         keywordId result;
 441         vStringCopyToLower (keyword, name);
 442         result = (keywordId) lookupKeyword (vStringValue (keyword), Lang_js);
 443         vStringDelete (keyword);
 444         return result;
 445 }
 446
 447 static void readTokenFull (tokenInfo *const token, boolean include_newlines, vString *const repr)
 448 {
 449         int c;
 450         int i;
 451
 452         token->type                     = TOKEN_UNDEFINED;
 453         token->keyword          = KEYWORD_NONE;
 454         vStringClear (token->string);
 455
 456 getNextChar:
 457         i = 0;
 458         do
 459         {
 460                 c = fileGetc ();
 461                 i++;
 462         }
 463         while (c == '\t'  ||  c == ' ' ||
 464                    ((c == '\r' || c == '\n') && ! include_newlines));
 465
 466         token->lineNumber   = getSourceLineNumber ();
 467         token->filePosition = getInputFilePosition ();
 468
 469         if (repr)
 470         {
 471                 if (i > 1)
 472                         vStringPut (repr, ' ');
 473                 vStringPut (repr, c);
 474         }
 475
 476         switch (c)
 477         {
 478                 case EOF: token->type = TOKEN_EOF;                                      break;
 479                 case '(': token->type = TOKEN_OPEN_PAREN;                       break;
 480                 case ')': token->type = TOKEN_CLOSE_PAREN;                      break;
 481                 case ';': token->type = TOKEN_SEMICOLON;                        break;
 482                 case ',': token->type = TOKEN_COMMA;                            break;
 483                 case '.': token->type = TOKEN_PERIOD;                           break;
 484                 case ':': token->type = TOKEN_COLON;                            break;
 485                 case '{': token->type = TOKEN_OPEN_CURLY;                       break;
 486                 case '}': token->type = TOKEN_CLOSE_CURLY;                      break;
 487                 case '=': token->type = TOKEN_EQUAL_SIGN;                       break;
 488                 case '[': token->type = TOKEN_OPEN_SQUARE;                      break;
 489                 case ']': token->type = TOKEN_CLOSE_SQUARE;                     break;
 490
 491                 case '+':
 492                 case '-':
 493                         {
 494                                 int d = fileGetc ();
 495                                 if (d == c) /* ++ or -- */
 496                                         token->type = TOKEN_POSTFIX_OPERATOR;
 497                                 else
 498                                 {
 499                                         fileUngetc (d);
 500                                         token->type = TOKEN_BINARY_OPERATOR;
 501                                 }
 502                                 break;
 503                         }
 504
 505                 case '*':
 506                 case '%':
 507                 case '?':
 508                 case '>':
 509                 case '<':
 510                 case '^':
 511                 case '|':
 512                 case '&':
 513                         token->type = TOKEN_BINARY_OPERATOR;
 514                         break;
 515
 516                 case '\r':
 517                 case '\n':
 518                         /* This isn't strictly correct per the standard, but following the
 519                          * real rules means understanding all statements, and that's not
 520                          * what the parser currently does.  What we do here is a guess, by
 521                          * avoiding inserting semicolons that would make the statement on
 522                          * the left invalid.  Hopefully this should not have false negatives
 523                          * (e.g. should not miss insertion of a semicolon) but might have
 524                          * false positives (e.g. it will wrongfully emit a semicolon for the
 525                          * newline in "foo\n+bar").
 526                          * This should however be mostly harmless as we only deal with
 527                          * newlines in specific situations where we know a false positive
 528                          * wouldn't hurt too bad. */
 529                         switch (LastTokenType)
 530                         {
 531                                 /* these cannot be the end of a statement, so hold the newline */
 532                                 case TOKEN_EQUAL_SIGN:
 533                                 case TOKEN_COLON:
 534                                 case TOKEN_PERIOD:
 535                                 case TOKEN_FORWARD_SLASH:
 536                                 case TOKEN_BINARY_OPERATOR:
 537                                 /* and these already end one, no need to duplicate it */
 538                                 case TOKEN_SEMICOLON:
 539                                 case TOKEN_COMMA:
 540                                 case TOKEN_CLOSE_CURLY:
 541                                 case TOKEN_OPEN_CURLY:
 542                                         include_newlines = FALSE; /* no need to recheck */
 543                                         goto getNextChar;
 544                                         break;
 545                                 default:
 546                                         token->type = TOKEN_SEMICOLON;
 547                         }
 548                         break;
 549
 550                 case '\'':
 551                 case '"':
 552                                   token->type = TOKEN_STRING;
 553                                   parseString (token->string, c);
 554                                   token->lineNumber = getSourceLineNumber ();
 555                                   token->filePosition = getInputFilePosition ();
 556                                   if (repr)
 557                                   {
 558                                           vStringCat (repr, token->string);
 559                                           vStringPut (repr, c);
 560                                   }
 561                                   break;
 562
 563                 case '\\':
 564                                   c = fileGetc ();
 565                                   if (c != '\\'  && c != '"'  &&  !isspace (c))
 566                                           fileUngetc (c);
 567                                   token->type = TOKEN_CHARACTER;
 568                                   token->lineNumber = getSourceLineNumber ();
 569                                   token->filePosition = getInputFilePosition ();
 570                                   break;
 571
 572                 case '/':
 573                                   {
 574                                           int d = fileGetc ();
 575                                           if ( (d != '*') &&            /* is this the start of a comment? */
 576                                                           (d != '/') )          /* is a one line comment? */
 577                                           {
 578                                                   fileUngetc (d);
 579                                                   switch (LastTokenType)
 580                                                   {
 581                                                           case TOKEN_CHARACTER:
 582                                                           case TOKEN_IDENTIFIER:
 583                                                           case TOKEN_STRING:
 584                                                           case TOKEN_CLOSE_CURLY:
 585                                                           case TOKEN_CLOSE_PAREN:
 586                                                           case TOKEN_CLOSE_SQUARE:
 587                                                                   token->type = TOKEN_FORWARD_SLASH;
 588                                                                   break;
 589
 590                                                           default:
 591                                                                   token->type = TOKEN_REGEXP;
 592                                                                   parseRegExp ();
 593                                                                   token->lineNumber = getSourceLineNumber ();
 594                                                                   token->filePosition = getInputFilePosition ();
 595                                                                   break;
 596                                                   }
 597                                           }
 598                                           else
 599                                           {
 600                                                   if (repr) /* remove the / we added */
 601                                                           repr->buffer[--repr->length] = 0;
 602                                                   if (d == '*')
 603                                                   {
 604                                                           do
 605                                                           {
 606                                                                   skipToCharacter ('*');
 607                                                                   c = fileGetc ();
 608                                                                   if (c == '/')
 609                                                                           break;
 610                                                                   else
 611                                                                           fileUngetc (c);
 612                                                           } while (c != EOF && c != '\0');
 613                                                           goto getNextChar;
 614                                                   }
 615                                                   else if (d == '/')    /* is this the start of a comment?  */
 616                                                   {
 617                                                           skipToCharacter ('\n');
 618                                                           /* if we care about newlines, put it back so it is seen */
 619                                                           if (include_newlines)
 620                                                                   fileUngetc ('\n');
 621                                                           goto getNextChar;
 622                                                   }
 623                                           }
 624                                           break;
 625                                   }
 626
 627                 case '#':
 628                                   /* skip shebang in case of e.g. Node.js scripts */
 629                                   if (token->lineNumber > 1)
 630                                           token->type = TOKEN_UNDEFINED;
 631                                   else if ((c = fileGetc ()) != '!')
 632                                   {
 633                                           fileUngetc (c);
 634                                           token->type = TOKEN_UNDEFINED;
 635                                   }
 636                                   else
 637                                   {
 638                                           skipToCharacter ('\n');
 639                                           goto getNextChar;
 640                                   }
 641                                   break;
 642
 643                 default:
 644                                   if (! isIdentChar (c))
 645                                           token->type = TOKEN_UNDEFINED;
 646                                   else
 647                                   {
 648                                           parseIdentifier (token->string, c);
 649                                           token->lineNumber = getSourceLineNumber ();
 650                                           token->filePosition = getInputFilePosition ();
 651                                           token->keyword = analyzeToken (token->string);
 652                                           if (isKeyword (token, KEYWORD_NONE))
 653                                                   token->type = TOKEN_IDENTIFIER;
 654                                           else
 655                                                   token->type = TOKEN_KEYWORD;
 656                                           if (repr && vStringLength (token->string) > 1)
 657                                                   vStringCatS (repr, vStringValue (token->string) + 1);
 658                                   }
 659                                   break;
 660         }
 661
 662         LastTokenType = token->type;
 663 }
 664
 665 static void readToken (tokenInfo *const token)
 666 {
 667         readTokenFull (token, FALSE, NULL);
 668 }
 669
 670 static void copyToken (tokenInfo *const dest, tokenInfo *const src)
 671 {
 672         dest->nestLevel = src->nestLevel;
 673         dest->lineNumber = src->lineNumber;
 674         dest->filePosition = src->filePosition;
 675         dest->type = src->type;
 676         dest->keyword = src->keyword;
 677         vStringCopy(dest->string, src->string);
 678         vStringCopy(dest->scope, src->scope);
 679 }
 680
 681 /*
 682  *       Token parsing functions
 683  */
 684
 685 static void skipArgumentList (tokenInfo *const token, boolean include_newlines, vString *const repr)
 686 {
 687         int nest_level = 0;
 688
 689         if (isType (token, TOKEN_OPEN_PAREN))   /* arguments? */
 690         {
 691                 nest_level++;
 692                 if (repr)
 693                         vStringPut (repr, '(');
 694                 while (nest_level > 0 && ! isType (token, TOKEN_EOF))
 695                 {
 696                         readTokenFull (token, FALSE, repr);
 697                         if (isType (token, TOKEN_OPEN_PAREN))
 698                                 nest_level++;
 699                         else if (isType (token, TOKEN_CLOSE_PAREN))
 700                                 nest_level--;
 701                 }
 702                 readTokenFull (token, include_newlines, NULL);
 703         }
 704 }
 705
 706 static void skipArrayList (tokenInfo *const token, boolean include_newlines)
 707 {
 708         int nest_level = 0;
 709
 710         /*
 711          * Handle square brackets
 712          *       var name[1]
 713          * So we must check for nested open and closing square brackets
 714          */
 715
 716         if (isType (token, TOKEN_OPEN_SQUARE))  /* arguments? */
 717         {
 718                 nest_level++;
 719                 while (nest_level > 0 && ! isType (token, TOKEN_EOF))
 720                 {
 721                         readToken (token);
 722                         if (isType (token, TOKEN_OPEN_SQUARE))
 723                                 nest_level++;
 724                         else if (isType (token, TOKEN_CLOSE_SQUARE))
 725                                 nest_level--;
 726                 }
 727                 readTokenFull (token, include_newlines, NULL);
 728         }
 729 }
 730
 731 static void addContext (tokenInfo* const parent, const tokenInfo* const child)
 732 {
 733         if (vStringLength (parent->string) > 0)
 734         {
 735                 vStringCatS (parent->string, ".");
 736         }
 737         vStringCatS (parent->string, vStringValue(child->string));
 738         vStringTerminate(parent->string);
 739 }
 740
 741 static void addToScope (tokenInfo* const token, vString* const extra)
 742 {
 743         if (vStringLength (token->scope) > 0)
 744         {
 745                 vStringCatS (token->scope, ".");
 746         }
 747         vStringCatS (token->scope, vStringValue(extra));
 748         vStringTerminate(token->scope);
 749 }
 750
 751 /*
 752  *       Scanning functions
 753  */
 754
 755 static boolean findCmdTerm (tokenInfo *const token, boolean include_newlines)
 756 {
 757         /*
 758          * Read until we find either a semicolon or closing brace.
 759          * Any nested braces will be handled within.
 760          */
 761         while (! isType (token, TOKEN_SEMICOLON) &&
 762                    ! isType (token, TOKEN_CLOSE_CURLY) &&
 763                    ! isType (token, TOKEN_EOF))
 764         {
 765                 /* Handle nested blocks */
 766                 if ( isType (token, TOKEN_OPEN_CURLY))
 767                 {
 768                         parseBlock (token, token);
 769                         readTokenFull (token, include_newlines, NULL);
 770                 }
 771                 else if ( isType (token, TOKEN_OPEN_PAREN) )
 772                 {
 773                         skipArgumentList(token, include_newlines, NULL);
 774                 }
 775                 else if ( isType (token, TOKEN_OPEN_SQUARE) )
 776                 {
 777                         skipArrayList(token, include_newlines);
 778                 }
 779                 else
 780                 {
 781                         readTokenFull (token, include_newlines, NULL);
 782                 }
 783         }
 784
 785         return isType (token, TOKEN_SEMICOLON);
 786 }
 787
 788 static void parseSwitch (tokenInfo *const token)
 789 {
 790         /*
 791          * switch (expression) {
 792          * case value1:
 793          *         statement;
 794          *         break;
 795          * case value2:
 796          *         statement;
 797          *         break;
 798          * default : statement;
 799          * }
 800          */
 801
 802         readToken (token);
 803
 804         if (isType (token, TOKEN_OPEN_PAREN))
 805         {
 806                 /*
 807                  * Handle nameless functions, these will only
 808                  * be considered methods.
 809                  */
 810                 skipArgumentList(token, FALSE, NULL);
 811         }
 812
 813         if (isType (token, TOKEN_OPEN_CURLY))
 814         {
 815                 parseBlock (token, token);
 816         }
 817 }
 818
 819 static boolean parseLoop (tokenInfo *const token, tokenInfo *const parent)
 820 {
 821         /*
 822          * Handles these statements
 823          *         for (x=0; x<3; x++)
 824          *                 document.write("This text is repeated three times<br>");
 825          *
 826          *         for (x=0; x<3; x++)
 827          *         {
 828          *                 document.write("This text is repeated three times<br>");
 829          *         }
 830          *
 831          *         while (number<5){
 832          *                 document.write(number+"<br>");
 833          *                 number++;
 834          *         }
 835          *
 836          *         do{
 837          *                 document.write(number+"<br>");
 838          *                 number++;
 839          *         }
 840          *         while (number<5);
 841          */
 842         boolean is_terminated = TRUE;
 843
 844         if (isKeyword (token, KEYWORD_for) || isKeyword (token, KEYWORD_while))
 845         {
 846                 readToken(token);
 847
 848                 if (isType (token, TOKEN_OPEN_PAREN))
 849                 {
 850                         /*
 851                          * Handle nameless functions, these will only
 852                          * be considered methods.
 853                          */
 854                         skipArgumentList(token, FALSE, NULL);
 855                 }
 856
 857                 if (isType (token, TOKEN_OPEN_CURLY))
 858                 {
 859                         /*
 860                          * This will be either a function or a class.
 861                          * We can only determine this by checking the body
 862                          * of the function.  If we find a "this." we know
 863                          * it is a class, otherwise it is a function.
 864                          */
 865                         parseBlock (token, parent);
 866                 }
 867                 else
 868                 {
 869                         is_terminated = parseLine(token, parent, FALSE);
 870                 }
 871         }
 872         else if (isKeyword (token, KEYWORD_do))
 873         {
 874                 readToken(token);
 875
 876                 if (isType (token, TOKEN_OPEN_CURLY))
 877                 {
 878                         /*
 879                          * This will be either a function or a class.
 880                          * We can only determine this by checking the body
 881                          * of the function.  If we find a "this." we know
 882                          * it is a class, otherwise it is a function.
 883                          */
 884                         parseBlock (token, parent);
 885                 }
 886                 else
 887                 {
 888                         is_terminated = parseLine(token, parent, FALSE);
 889                 }
 890
 891                 if (is_terminated)
 892                         readToken(token);
 893
 894                 if (isKeyword (token, KEYWORD_while))
 895                 {
 896                         readToken(token);
 897
 898                         if (isType (token, TOKEN_OPEN_PAREN))
 899                         {
 900                                 /*
 901                                  * Handle nameless functions, these will only
 902                                  * be considered methods.
 903                                  */
 904                                 skipArgumentList(token, TRUE, NULL);
 905                         }
 906                         if (! isType (token, TOKEN_SEMICOLON))
 907                                 is_terminated = FALSE;
 908                 }
 909         }
 910
 911         return is_terminated;
 912 }
 913
 914 static boolean parseIf (tokenInfo *const token, tokenInfo *const parent)
 915 {
 916         boolean read_next_token = TRUE;
 917         /*
 918          * If statements have two forms
 919          *         if ( ... )
 920          *                 one line;
 921          *
 922          *         if ( ... )
 923          *                statement;
 924          *         else
 925          *                statement
 926          *
 927          *         if ( ... ) {
 928          *                multiple;
 929          *                statements;
 930          *         }
 931          *
 932          *
 933          *         if ( ... ) {
 934          *                return elem
 935          *         }
 936          *
 937          *     This example if correctly written, but the
 938          *     else contains only 1 statement without a terminator
 939          *     since the function finishes with the closing brace.
 940          *
 941      *     function a(flag){
 942      *         if(flag)
 943      *             test(1);
 944      *         else
 945      *             test(2)
 946      *     }
 947          *
 948          * TODO:  Deal with statements that can optional end
 949          *                without a semi-colon.  Currently this messes up
 950          *                the parsing of blocks.
 951          *                Need to somehow detect this has happened, and either
 952          *                backup a token, or skip reading the next token if
 953          *                that is possible from all code locations.
 954          *
 955          */
 956
 957         readToken (token);
 958
 959         if (isKeyword (token, KEYWORD_if))
 960         {
 961                 /*
 962                  * Check for an "else if" and consume the "if"
 963                  */
 964                 readToken (token);
 965         }
 966
 967         if (isType (token, TOKEN_OPEN_PAREN))
 968         {
 969                 /*
 970                  * Handle nameless functions, these will only
 971                  * be considered methods.
 972                  */
 973                 skipArgumentList(token, FALSE, NULL);
 974         }
 975
 976         if (isType (token, TOKEN_OPEN_CURLY))
 977         {
 978                 /*
 979                  * This will be either a function or a class.
 980                  * We can only determine this by checking the body
 981                  * of the function.  If we find a "this." we know
 982                  * it is a class, otherwise it is a function.
 983                  */
 984                 parseBlock (token, parent);
 985         }
 986         else
 987         {
 988                 /* The next token should only be read if this statement had its own
 989                  * terminator */
 990                 read_next_token = findCmdTerm (token, TRUE);
 991         }
 992         return read_next_token;
 993 }
 994
 995 static void parseFunction (tokenInfo *const token)
 996 {
 997         tokenInfo *const name = newToken ();
 998         vString *const signature = vStringNew ();
 999         boolean is_class = FALSE;
1000
1001         /*
1002          * This deals with these formats
1003          *         function validFunctionTwo(a,b) {}
1004          */
1005
1006         readToken (name);
1007         /* Add scope in case this is an INNER function */
1008         addToScope(name, token->scope);
1009
1010         readToken (token);
1011         while (isType (token, TOKEN_PERIOD))
1012         {
1013                 readToken (token);
1014                 if ( isKeyword(token, KEYWORD_NONE) )
1015                 {
1016                         addContext (name, token);
1017                         readToken (token);
1018                 }
1019         }
1020
1021         if ( isType (token, TOKEN_OPEN_PAREN) )
1022                 skipArgumentList(token, FALSE, signature);
1023
1024         if ( isType (token, TOKEN_OPEN_CURLY) )
1025         {
1026                 is_class = parseBlock (token, name);
1027                 if ( is_class )
1028                         makeClassTag (name, signature);
1029                 else
1030                         makeFunctionTag (name, signature);
1031         }
1032
1033         findCmdTerm (token, FALSE);
1034
1035         vStringDelete (signature);
1036         deleteToken (name);
1037 }
1038
1039 static boolean parseBlock (tokenInfo *const token, tokenInfo *const orig_parent)
1040 {
1041         boolean is_class = FALSE;
1042         boolean read_next_token = TRUE;
1043         vString * saveScope = vStringNew ();
1044         tokenInfo *const parent = newToken ();
1045
1046         /* backup the parent token to allow calls like parseBlock(token, token) */
1047         copyToken (parent, orig_parent);
1048
1049         token->nestLevel++;
1050         /*
1051          * Make this routine a bit more forgiving.
1052          * If called on an open_curly advance it
1053          */
1054         if ( isType (token, TOKEN_OPEN_CURLY) &&
1055                         isKeyword(token, KEYWORD_NONE) )
1056                 readToken(token);
1057
1058         if (! isType (token, TOKEN_CLOSE_CURLY))
1059         {
1060                 /*
1061                  * Read until we find the closing brace,
1062                  * any nested braces will be handled within
1063                  */
1064                 do
1065                 {
1066                         read_next_token = TRUE;
1067                         if (isKeyword (token, KEYWORD_this))
1068                         {
1069                                 /*
1070                                  * Means we are inside a class and have found
1071                                  * a class, not a function
1072                                  */
1073                                 is_class = TRUE;
1074                                 vStringCopy(saveScope, token->scope);
1075                                 addToScope (token, parent->string);
1076
1077                                 /*
1078                                  * Ignore the remainder of the line
1079                                  * findCmdTerm(token);
1080                                  */
1081                                 read_next_token = parseLine (token, parent, is_class);
1082
1083                                 vStringCopy(token->scope, saveScope);
1084                         }
1085                         else if (isKeyword (token, KEYWORD_var) ||
1086                                          isKeyword (token, KEYWORD_let) ||
1087                                          isKeyword (token, KEYWORD_const))
1088                         {
1089                                 /*
1090                                  * Potentially we have found an inner function.
1091                                  * Set something to indicate the scope
1092                                  */
1093                                 vStringCopy(saveScope, token->scope);
1094                                 addToScope (token, parent->string);
1095                                 read_next_token = parseLine (token, parent, is_class);
1096                                 vStringCopy(token->scope, saveScope);
1097                         }
1098                         else if (isKeyword (token, KEYWORD_function))
1099                         {
1100                                 vStringCopy(saveScope, token->scope);
1101                                 addToScope (token, parent->string);
1102                                 parseFunction (token);
1103                                 vStringCopy(token->scope, saveScope);
1104                         }
1105                         else if (isType (token, TOKEN_OPEN_CURLY))
1106                         {
1107                                 /* Handle nested blocks */
1108                                 parseBlock (token, parent);
1109                         }
1110                         else
1111                         {
1112                                 /*
1113                                  * It is possible for a line to have no terminator
1114                                  * if the following line is a closing brace.
1115                                  * parseLine will detect this case and indicate
1116                                  * whether we should read an additional token.
1117                                  */
1118                                 read_next_token = parseLine (token, parent, is_class);
1119                         }
1120
1121                         /*
1122                          * Always read a new token unless we find a statement without
1123                          * a ending terminator
1124                          */
1125                         if( read_next_token )
1126                                 readToken(token);
1127
1128                         /*
1129                          * If we find a statement without a terminator consider the
1130                          * block finished, otherwise the stack will be off by one.
1131                          */
1132                 } while (! isType (token, TOKEN_EOF) &&
1133                                  ! isType (token, TOKEN_CLOSE_CURLY) && read_next_token);
1134         }
1135
1136         deleteToken (parent);
1137         vStringDelete(saveScope);
1138         token->nestLevel--;
1139
1140         return is_class;
1141 }
1142
1143 static boolean parseMethods (tokenInfo *const token, tokenInfo *const class)
1144 {
1145         tokenInfo *const name = newToken ();
1146         boolean has_methods = FALSE;
1147
1148         /*
1149          * This deals with these formats
1150          *         validProperty  : 2,
1151          *         validMethod    : function(a,b) {}
1152          *         'validMethod2' : function(a,b) {}
1153      *     container.dirtyTab = {'url': false, 'title':false, 'snapshot':false, '*': false}
1154          */
1155
1156         do
1157         {
1158                 readToken (token);
1159                 if (isType (token, TOKEN_CLOSE_CURLY))
1160                 {
1161                         /*
1162                          * This was most likely a variable declaration of a hash table.
1163                          * indicate there were no methods and return.
1164                          */
1165                         has_methods = FALSE;
1166                         goto cleanUp;
1167                 }
1168
1169                 if (isType (token, TOKEN_STRING) || isKeyword(token, KEYWORD_NONE))
1170                 {
1171                         copyToken(name, token);
1172
1173                         readToken (token);
1174                         if ( isType (token, TOKEN_COLON) )
1175                         {
1176                                 readToken (token);
1177                                 if ( isKeyword (token, KEYWORD_function) )
1178                                 {
1179                                         vString *const signature = vStringNew ();
1180
1181                                         readToken (token);
1182                                         if ( isType (token, TOKEN_OPEN_PAREN) )
1183                                         {
1184                                                 skipArgumentList(token, FALSE, signature);
1185                                         }
1186
1187                                         if (isType (token, TOKEN_OPEN_CURLY))
1188                                         {
1189                                                 has_methods = TRUE;
1190                                                 addToScope (name, class->string);
1191                                                 makeJsTag (name, JSTAG_METHOD, signature);
1192                                                 parseBlock (token, name);
1193
1194                                                 /*
1195                                                  * Read to the closing curly, check next
1196                                                  * token, if a comma, we must loop again
1197                                                  */
1198                                                 readToken (token);
1199                                         }
1200
1201                                         vStringDelete (signature);
1202                                 }
1203                                 else
1204                                 {
1205                                                 vString * saveScope = vStringNew ();
1206                                                 boolean has_child_methods = FALSE;
1207
1208                                                 /* skip whatever is the value */
1209                                                 while (! isType (token, TOKEN_COMMA) &&
1210                                                        ! isType (token, TOKEN_CLOSE_CURLY) &&
1211                                                        ! isType (token, TOKEN_EOF))
1212                                                 {
1213                                                         if (isType (token, TOKEN_OPEN_CURLY))
1214                                                         {
1215                                                                 vStringCopy (saveScope, token->scope);
1216                                                                 addToScope (token, class->string);
1217                                                                 has_child_methods = parseMethods (token, name);
1218                                                                 vStringCopy (token->scope, saveScope);
1219                                                                 readToken (token);
1220                                                         }
1221                                                         else if (isType (token, TOKEN_OPEN_PAREN))
1222                                                         {
1223                                                                 skipArgumentList (token, FALSE, NULL);
1224                                                         }
1225                                                         else if (isType (token, TOKEN_OPEN_SQUARE))
1226                                                         {
1227                                                                 skipArrayList (token, FALSE);
1228                                                         }
1229                                                         else
1230                                                         {
1231                                                                 readToken (token);
1232                                                         }
1233                                                 }
1234                                                 vStringDelete (saveScope);
1235
1236                                                 has_methods = TRUE;
1237                                                 addToScope (name, class->string);
1238                                                 if (has_child_methods)
1239                                                         makeJsTag (name, JSTAG_CLASS, NULL);
1240                                                 else
1241                                                         makeJsTag (name, JSTAG_PROPERTY, NULL);
1242                                 }
1243                         }
1244                 }
1245         } while ( isType(token, TOKEN_COMMA) );
1246
1247         findCmdTerm (token, FALSE);
1248
1249 cleanUp:
1250         deleteToken (name);
1251
1252         return has_methods;
1253 }
1254
1255 static boolean parseStatement (tokenInfo *const token, tokenInfo *const parent, boolean is_inside_class)
1256 {
1257         tokenInfo *const name = newToken ();
1258         tokenInfo *const secondary_name = newToken ();
1259         tokenInfo *const method_body_token = newToken ();
1260         vString * saveScope = vStringNew ();
1261         boolean is_class = FALSE;
1262         boolean is_var = FALSE;
1263         boolean is_const = FALSE;
1264         boolean is_terminated = TRUE;
1265         boolean is_global = FALSE;
1266         boolean has_methods = FALSE;
1267         vString *       fulltag;
1268
1269         vStringClear(saveScope);
1270         /*
1271          * Functions can be named or unnamed.
1272          * This deals with these formats:
1273          * Function
1274          *         validFunctionOne = function(a,b) {}
1275          *         testlib.validFunctionFive = function(a,b) {}
1276          *         var innerThree = function(a,b) {}
1277          *         var innerFour = (a,b) {}
1278          *         var D2 = secondary_fcn_name(a,b) {}
1279          *         var D3 = new Function("a", "b", "return a+b;");
1280          * Class
1281          *         testlib.extras.ValidClassOne = function(a,b) {
1282          *                 this.a = a;
1283          *         }
1284          * Class Methods
1285          *         testlib.extras.ValidClassOne.prototype = {
1286          *                 'validMethodOne' : function(a,b) {},
1287          *                 'validMethodTwo' : function(a,b) {}
1288          *         }
1289      *     ValidClassTwo = function ()
1290      *     {
1291      *         this.validMethodThree = function() {}
1292      *         // unnamed method
1293      *         this.validMethodFour = () {}
1294      *     }
1295          *         Database.prototype.validMethodThree = Database_getTodaysDate;
1296          */
1297
1298         if ( is_inside_class )
1299                 is_class = TRUE;
1300         /*
1301          * var can precede an inner function
1302          */
1303         if ( isKeyword(token, KEYWORD_var) ||
1304                  isKeyword(token, KEYWORD_let) ||
1305                  isKeyword(token, KEYWORD_const) )
1306         {
1307                 is_const = isKeyword(token, KEYWORD_const);
1308                 /*
1309                  * Only create variables for global scope
1310                  */
1311                 if ( token->nestLevel == 0 )
1312                 {
1313                         is_global = TRUE;
1314                 }
1315                 readToken(token);
1316         }
1317
1318         if ( isKeyword(token, KEYWORD_this) )
1319         {
1320                 readToken(token);
1321                 if (isType (token, TOKEN_PERIOD))
1322                 {
1323                         readToken(token);
1324                 }
1325         }
1326
1327         copyToken(name, token);
1328
1329         while (! isType (token, TOKEN_CLOSE_CURLY) &&
1330                ! isType (token, TOKEN_SEMICOLON)   &&
1331                ! isType (token, TOKEN_EQUAL_SIGN)  &&
1332                ! isType (token, TOKEN_EOF))
1333         {
1334                 if (isType (token, TOKEN_OPEN_CURLY))
1335                         parseBlock (token, parent);
1336
1337                 /* Potentially the name of the function */
1338                 readToken (token);
1339                 if (isType (token, TOKEN_PERIOD))
1340                 {
1341                         /*
1342                          * Cannot be a global variable is it has dot references in the name
1343                          */
1344                         is_global = FALSE;
1345                         do
1346                         {
1347                                 readToken (token);
1348                                 if ( isKeyword(token, KEYWORD_NONE) )
1349                                 {
1350                                         if ( is_class )
1351                                         {
1352                                                 addToScope(token, name->string);
1353                                         }
1354                                         else
1355                                                 addContext (name, token);
1356
1357                                         readToken (token);
1358                                 }
1359                                 else if ( isKeyword(token, KEYWORD_prototype) )
1360                                 {
1361                                         /*
1362                                          * When we reach the "prototype" tag, we infer:
1363                                          *     "BindAgent" is a class
1364                                          *     "build"     is a method
1365                                          *
1366                                          * function BindAgent( repeatableIdName, newParentIdName ) {
1367                                          * }
1368                                          *
1369                                          * CASE 1
1370                                          * Specified function name: "build"
1371                                          *     BindAgent.prototype.build = function( mode ) {
1372                                          *        maybe parse nested functions
1373                                          *     }
1374                                          *
1375                                          * CASE 2
1376                                          * Prototype listing
1377                                          *     ValidClassOne.prototype = {
1378                                          *         'validMethodOne' : function(a,b) {},
1379                                          *         'validMethodTwo' : function(a,b) {}
1380                                          *     }
1381                                          *
1382                                          */
1383                                         makeClassTag (name, NULL);
1384                                         is_class = TRUE;
1385
1386                                         /*
1387                                          * There should a ".function_name" next.
1388                                          */
1389                                         readToken (token);
1390                                         if (isType (token, TOKEN_PERIOD))
1391                                         {
1392                                                 /*
1393                                                  * Handle CASE 1
1394                                                  */
1395                                                 readToken (token);
1396                                                 if ( isKeyword(token, KEYWORD_NONE) )
1397                                                 {
1398                                                         vString *const signature = vStringNew ();
1399
1400                                                         vStringCopy(saveScope, token->scope);
1401                                                         addToScope(token, name->string);
1402
1403                                                         readToken (method_body_token);
1404                                                         vStringCopy (method_body_token->scope, token->scope);
1405
1406                                                         while (! isType (method_body_token, TOKEN_SEMICOLON) &&
1407                                                                ! isType (method_body_token, TOKEN_CLOSE_CURLY) &&
1408                                                                ! isType (method_body_token, TOKEN_OPEN_CURLY) &&
1409                                                                ! isType (method_body_token, TOKEN_EOF))
1410                                                         {
1411                                                                 if ( isType (method_body_token, TOKEN_OPEN_PAREN) )
1412                                                                         skipArgumentList(method_body_token, FALSE,
1413                                                                                                          vStringLength (signature) == 0 ? signature : NULL);
1414                                                                 else
1415                                                                         readToken (method_body_token);
1416                                                         }
1417
1418                                                         makeJsTag (token, JSTAG_METHOD, signature);
1419                                                         vStringDelete (signature);
1420
1421                                                         if ( isType (method_body_token, TOKEN_OPEN_CURLY))
1422                                                         {
1423                                                                 parseBlock (method_body_token, token);
1424                                                                 is_terminated = TRUE;
1425                                                         }
1426                                                         else
1427                                                                 is_terminated = isType (method_body_token, TOKEN_SEMICOLON);
1428                                                         goto cleanUp;
1429                                                 }
1430                                         }
1431                                         else if (isType (token, TOKEN_EQUAL_SIGN))
1432                                         {
1433                                                 readToken (token);
1434                                                 if (isType (token, TOKEN_OPEN_CURLY))
1435                                                 {
1436                                                         /*
1437                                                          * Handle CASE 2
1438                                                          *
1439                                                          * Creates tags for each of these class methods
1440                                                          *     ValidClassOne.prototype = {
1441                                                          *         'validMethodOne' : function(a,b) {},
1442                                                          *         'validMethodTwo' : function(a,b) {}
1443                                                          *     }
1444                                                          */
1445                                                         parseMethods(token, name);
1446                                                         /*
1447                                                          * Find to the end of the statement
1448                                                          */
1449                                                         findCmdTerm (token, FALSE);
1450                                                         token->ignoreTag = FALSE;
1451                                                         is_terminated = TRUE;
1452                                                         goto cleanUp;
1453                                                 }
1454                                         }
1455                                 }
1456                                 else
1457                                         readToken (token);
1458                         } while (isType (token, TOKEN_PERIOD));
1459                 }
1460
1461                 if ( isType (token, TOKEN_OPEN_PAREN) )
1462                         skipArgumentList(token, FALSE, NULL);
1463
1464                 if ( isType (token, TOKEN_OPEN_SQUARE) )
1465                         skipArrayList(token, FALSE);
1466
1467                 /*
1468                 if ( isType (token, TOKEN_OPEN_CURLY) )
1469                 {
1470                         is_class = parseBlock (token, name);
1471                 }
1472                 */
1473         }
1474
1475         if ( isType (token, TOKEN_CLOSE_CURLY) )
1476         {
1477                 /*
1478                  * Reaching this section without having
1479                  * processed an open curly brace indicates
1480                  * the statement is most likely not terminated.
1481                  */
1482                 is_terminated = FALSE;
1483                 goto cleanUp;
1484         }
1485
1486         if ( isType (token, TOKEN_SEMICOLON) )
1487         {
1488                 /*
1489                  * Only create variables for global scope
1490                  */
1491                 if ( token->nestLevel == 0 && is_global )
1492                 {
1493                         /*
1494                          * Handles this syntax:
1495                          *         var g_var2;
1496                          */
1497                         if (isType (token, TOKEN_SEMICOLON))
1498                                 makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL);
1499                 }
1500                 /*
1501                  * Statement has ended.
1502                  * This deals with calls to functions, like:
1503                  *     alert(..);
1504                  */
1505                 goto cleanUp;
1506         }
1507
1508         if ( isType (token, TOKEN_EQUAL_SIGN) )
1509         {
1510                 int parenDepth = 0;
1511
1512                 readToken (token);
1513
1514                 /* rvalue might be surrounded with parentheses */
1515                 while (isType (token, TOKEN_OPEN_PAREN))
1516                 {
1517                         parenDepth++;
1518                         readToken (token);
1519                 }
1520
1521                 if ( isKeyword (token, KEYWORD_function) )
1522                 {
1523                         vString *const signature = vStringNew ();
1524
1525                         readToken (token);
1526
1527                         if ( isKeyword (token, KEYWORD_NONE) &&
1528                                         ! isType (token, TOKEN_OPEN_PAREN) )
1529                         {
1530                                 /*
1531                                  * Functions of this format:
1532                                  *         var D2A = function theAdd(a, b)
1533                                  *         {
1534                                  *                return a+b;
1535                                  *         }
1536                                  * Are really two separate defined functions and
1537                                  * can be referenced in two ways:
1538                                  *         alert( D2A(1,2) );                     // produces 3
1539                                  *         alert( theAdd(1,2) );                  // also produces 3
1540                                  * So it must have two tags:
1541                                  *         D2A
1542                                  *         theAdd
1543                                  * Save the reference to the name for later use, once
1544                                  * we have established this is a valid function we will
1545                                  * create the secondary reference to it.
1546                                  */
1547                                 copyToken(secondary_name, token);
1548                                 readToken (token);
1549                         }
1550
1551                         if ( isType (token, TOKEN_OPEN_PAREN) )
1552                                 skipArgumentList(token, FALSE, signature);
1553
1554                         if (isType (token, TOKEN_OPEN_CURLY))
1555                         {
1556                                 /*
1557                                  * This will be either a function or a class.
1558                                  * We can only determine this by checking the body
1559                                  * of the function.  If we find a "this." we know
1560                                  * it is a class, otherwise it is a function.
1561                                  */
1562                                 if ( is_inside_class )
1563                                 {
1564                                         makeJsTag (name, JSTAG_METHOD, signature);
1565                                         if ( vStringLength(secondary_name->string) > 0 )
1566                                                 makeFunctionTag (secondary_name, signature);
1567                                         parseBlock (token, name);
1568                                 }
1569                                 else
1570                                 {
1571                                         is_class = parseBlock (token, name);
1572                                         if ( is_class )
1573                                                 makeClassTag (name, signature);
1574                                         else
1575                                                 makeFunctionTag (name, signature);
1576
1577                                         if ( vStringLength(secondary_name->string) > 0 )
1578                                                 makeFunctionTag (secondary_name, signature);
1579                                 }
1580                         }
1581
1582                         vStringDelete (signature);
1583                 }
1584                 else if (isType (token, TOKEN_OPEN_CURLY))
1585                 {
1586                         /*
1587                          * Creates tags for each of these class methods
1588                          *     ValidClassOne.prototype = {
1589                          *         'validMethodOne' : function(a,b) {},
1590                          *         'validMethodTwo' : function(a,b) {}
1591                          *     }
1592                          * Or checks if this is a hash variable.
1593                          *     var z = {};
1594                          */
1595                         has_methods = parseMethods(token, name);
1596                         if (has_methods)
1597                                 makeJsTag (name, JSTAG_CLASS, NULL);
1598                         else
1599                         {
1600                                 /*
1601                                  * Only create variables for global scope
1602                          */
1603                                 if ( token->nestLevel == 0 && is_global )
1604                                 {
1605                                         /*
1606                                          * A pointer can be created to the function.
1607                                          * If we recognize the function/class name ignore the variable.
1608                                          * This format looks identical to a variable definition.
1609                                          * A variable defined outside of a block is considered
1610                                          * a global variable:
1611                                          *         var g_var1 = 1;
1612                                          *         var g_var2;
1613                                          * This is not a global variable:
1614                                          *         var g_var = function;
1615                                          * This is a global variable:
1616                                          *         var g_var = different_var_name;
1617                                          */
1618                                         fulltag = vStringNew ();
1619                                         if (vStringLength (token->scope) > 0)
1620                                         {
1621                                                 vStringCopy(fulltag, token->scope);
1622                                                 vStringCatS (fulltag, ".");
1623                                                 vStringCatS (fulltag, vStringValue(token->string));
1624                                         }
1625                                         else
1626                                         {
1627                                                 vStringCopy(fulltag, token->string);
1628                                         }
1629                                         vStringTerminate(fulltag);
1630                                         if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) &&
1631                                                         ! stringListHas(ClassNames, vStringValue (fulltag)) )
1632                                         {
1633                                                 makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL);
1634                                         }
1635                                         vStringDelete (fulltag);
1636                                 }
1637                         }
1638                         if (isType (token, TOKEN_CLOSE_CURLY))
1639                         {
1640                                 /*
1641                                  * Assume the closing parantheses terminates
1642                                  * this statements.
1643                                  */
1644                                 is_terminated = TRUE;
1645                         }
1646                 }
1647                 else if (isKeyword (token, KEYWORD_new))
1648                 {
1649                         readToken (token);
1650                         is_var = isType (token, TOKEN_IDENTIFIER);
1651                         if ( isKeyword (token, KEYWORD_function) ||
1652                                         isKeyword (token, KEYWORD_capital_function) ||
1653                                         isKeyword (token, KEYWORD_capital_object) ||
1654                                         is_var )
1655                         {
1656                                 if ( isKeyword (token, KEYWORD_capital_object) )
1657                                         is_class = TRUE;
1658
1659                                 readToken (token);
1660                                 if ( isType (token, TOKEN_OPEN_PAREN) )
1661                                         skipArgumentList(token, TRUE, NULL);
1662
1663                                 if (isType (token, TOKEN_SEMICOLON))
1664                                 {
1665                                         if ( token->nestLevel == 0 )
1666                                         {
1667                                                 if ( is_var )
1668                                                 {
1669                                                         makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL);
1670                                                 }
1671                                                 else
1672                                                 {
1673                                                         if ( is_class )
1674                                                         {
1675                                                                 makeClassTag (name, NULL);
1676                                                         } else {
1677                                                                 /* FIXME: we cannot really get a meaningful
1678                                                                  * signature from a `new Function()` call,
1679                                                                  * so for now just don't set any */
1680                                                                 makeFunctionTag (name, NULL);
1681                                                         }
1682                                                 }
1683                                         }
1684                                 }
1685                                 else if (isType (token, TOKEN_CLOSE_CURLY))
1686                                         is_terminated = FALSE;
1687                         }
1688                 }
1689                 else if (isKeyword (token, KEYWORD_NONE))
1690                 {
1691                         /*
1692                          * Only create variables for global scope
1693                          */
1694                         if ( token->nestLevel == 0 && is_global )
1695                         {
1696                                 /*
1697                                  * A pointer can be created to the function.
1698                                  * If we recognize the function/class name ignore the variable.
1699                                  * This format looks identical to a variable definition.
1700                                  * A variable defined outside of a block is considered
1701                                  * a global variable:
1702                                  *         var g_var1 = 1;
1703                                  *         var g_var2;
1704                                  * This is not a global variable:
1705                                  *         var g_var = function;
1706                                  * This is a global variable:
1707                                  *         var g_var = different_var_name;
1708                                  */
1709                                 fulltag = vStringNew ();
1710                                 if (vStringLength (token->scope) > 0)
1711                                 {
1712                                         vStringCopy(fulltag, token->scope);
1713                                         vStringCatS (fulltag, ".");
1714                                         vStringCatS (fulltag, vStringValue(token->string));
1715                                 }
1716                                 else
1717                                 {
1718                                         vStringCopy(fulltag, token->string);
1719                                 }
1720                                 vStringTerminate(fulltag);
1721                                 if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) &&
1722                                                 ! stringListHas(ClassNames, vStringValue (fulltag)) )
1723                                 {
1724                                         makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL);
1725                                 }
1726                                 vStringDelete (fulltag);
1727                         }
1728                 }
1729
1730                 if (parenDepth > 0)
1731                 {
1732                         while (parenDepth > 0 && ! isType (token, TOKEN_EOF))
1733                         {
1734                                 if (isType (token, TOKEN_OPEN_PAREN))
1735                                         parenDepth++;
1736                                 else if (isType (token, TOKEN_CLOSE_PAREN))
1737                                         parenDepth--;
1738                                 readTokenFull (token, TRUE, NULL);
1739                         }
1740                         if (isType (token, TOKEN_CLOSE_CURLY))
1741                                 is_terminated = FALSE;
1742                 }
1743         }
1744
1745         /* if we aren't already at the cmd end, advance to it and check whether
1746          * the statement was terminated */
1747         if (! isType (token, TOKEN_CLOSE_CURLY) &&
1748             ! isType (token, TOKEN_SEMICOLON))
1749         {
1750                 /*
1751                  * Statements can be optionally terminated in the case of
1752                  * statement prior to a close curly brace as in the
1753                  * document.write line below:
1754                  *
1755                  * function checkForUpdate() {
1756                  *         if( 1==1 ) {
1757                  *                 document.write("hello from checkForUpdate<br>")
1758                  *         }
1759                  *         return 1;
1760                  * }
1761                  */
1762                 is_terminated = findCmdTerm (token, TRUE);
1763         }
1764
1765 cleanUp:
1766         vStringCopy(token->scope, saveScope);
1767         deleteToken (name);
1768         deleteToken (secondary_name);
1769         deleteToken (method_body_token);
1770         vStringDelete(saveScope);
1771
1772         return is_terminated;
1773 }
1774
1775 static void parseUI5 (tokenInfo *const token)
1776 {
1777         tokenInfo *const name = newToken ();
1778         /*
1779          * SAPUI5 is built on top of jQuery.
1780          * It follows a standard format:
1781          *     sap.ui.controller("id.of.controller", {
1782          *         method_name : function... {
1783          *         },
1784          *
1785          *         method_name : function ... {
1786          *         }
1787          *     }
1788          *
1789          * Handle the parsing of the initial controller (and the
1790          * same for "view") and then allow the methods to be
1791          * parsed as usual.
1792          */
1793
1794         readToken (token);
1795
1796         if (isType (token, TOKEN_PERIOD))
1797         {
1798                 readToken (token);
1799                 while (! isType (token, TOKEN_OPEN_PAREN) &&
1800                            ! isType (token, TOKEN_EOF))
1801                 {
1802                         readToken (token);
1803                 }
1804                 readToken (token);
1805
1806                 if (isType (token, TOKEN_STRING))
1807                 {
1808                         copyToken(name, token);
1809                         readToken (token);
1810                 }
1811
1812                 if (isType (token, TOKEN_COMMA))
1813                         readToken (token);
1814
1815                 do
1816                 {
1817                         parseMethods (token, name);
1818                 } while (! isType (token, TOKEN_CLOSE_CURLY) &&
1819                                  ! isType (token, TOKEN_EOF));
1820         }
1821
1822         deleteToken (name);
1823 }
1824
1825 static boolean parseLine (tokenInfo *const token, tokenInfo *const parent, boolean is_inside_class)
1826 {
1827         boolean is_terminated = TRUE;
1828         /*
1829          * Detect the common statements, if, while, for, do, ...
1830          * This is necessary since the last statement within a block "{}"
1831          * can be optionally terminated.
1832          *
1833          * If the statement is not terminated, we need to tell
1834          * the calling routine to prevent reading an additional token
1835          * looking for the end of the statement.
1836          */
1837
1838         if (isType(token, TOKEN_KEYWORD))
1839         {
1840                 switch (token->keyword)
1841                 {
1842                         case KEYWORD_for:
1843                         case KEYWORD_while:
1844                         case KEYWORD_do:
1845                                 is_terminated = parseLoop (token, parent);
1846                                 break;
1847                         case KEYWORD_if:
1848                         case KEYWORD_else:
1849                         case KEYWORD_try:
1850                         case KEYWORD_catch:
1851                         case KEYWORD_finally:
1852                                 /* Common semantics */
1853                                 is_terminated = parseIf (token, parent);
1854                                 break;
1855                         case KEYWORD_switch:
1856                                 parseSwitch (token);
1857                                 break;
1858                         case KEYWORD_return:
1859                                 is_terminated = findCmdTerm (token, TRUE);
1860                                 break;
1861                         default:
1862                                 is_terminated = parseStatement (token, parent, is_inside_class);
1863                                 break;
1864                 }
1865         }
1866         else
1867         {
1868                 /*
1869                  * Special case where single line statements may not be
1870                  * SEMICOLON terminated.  parseBlock needs to know this
1871                  * so that it does not read the next token.
1872                  */
1873                 is_terminated = parseStatement (token, parent, is_inside_class);
1874         }
1875         return is_terminated;
1876 }
1877
1878 static void parseJsFile (tokenInfo *const token)
1879 {
1880         do
1881         {
1882                 readToken (token);
1883
1884                 if (isType (token, TOKEN_KEYWORD) && token->keyword == KEYWORD_function)
1885                         parseFunction (token);
1886                 else if (isType (token, TOKEN_KEYWORD) && token->keyword == KEYWORD_sap)
1887                         parseUI5 (token);
1888                 else
1889                         parseLine (token, token, FALSE);
1890         } while (! isType (token, TOKEN_EOF));
1891 }
1892
1893 static void initialize (const langType language)
1894 {
1895         Assert (sizeof (JsKinds) / sizeof (JsKinds [0]) == JSTAG_COUNT);
1896         Lang_js = language;
1897         buildJsKeywordHash ();
1898 }
1899
1900 static void findJsTags (void)
1901 {
1902         tokenInfo *const token = newToken ();
1903
1904         ClassNames = stringListNew ();
1905         FunctionNames = stringListNew ();
1906         LastTokenType = TOKEN_UNDEFINED;
1907
1908         parseJsFile (token);
1909
1910         stringListDelete (ClassNames);
1911         stringListDelete (FunctionNames);
1912         ClassNames = NULL;
1913         FunctionNames = NULL;
1914         deleteToken (token);
1915 }
1916
1917 /* Create parser definition structure */
1918 extern parserDefinition* JavaScriptParser (void)
1919 {
1920         static const char *const extensions [] = { "js", NULL };
1921         parserDefinition *const def = parserNew ("JavaScript");
1922         def->extensions = extensions;
1923         /*
1924          * New definitions for parsing instead of regex
1925          */
1926         def->kinds              = JsKinds;
1927         def->kindCount  = KIND_COUNT (JsKinds);
1928         def->parser             = findJsTags;
1929         def->initialize = initialize;
1930
1931         return def;
1932 }
1933 /* vi:set tabstop=4 shiftwidth=4 noexpandtab: */