ctags/parsers/geany_jscript.c

   1 /*
   2  *       Copyright (c) 2003, Darren Hiebert
   3  *
   4  *       This source code is released for free distribution under the terms of the
   5  *       GNU General Public License version 2 or (at your option) any later version.
   6  *
   7  *       This module contains functions for generating tags for JavaScript language
   8  *       files.
   9  *
  10  *       Reference: http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf
  11  *
  12  *       This is a good reference for different forms of the function statement:
  13  *               http://www.permadi.com/tutorial/jsFunc/
  14  *   Another good reference:
  15  *       http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide
  16  */
  17
  18 /*
  19  *       INCLUDE FILES
  20  */
  21 #include "general.h"    /* must always come first */
  22 #include <ctype.h>      /* to define isalpha () */
  23 #ifdef DEBUG
  24 #include <stdio.h>
  25 #endif
  26
  27 #ifdef HAVE_ICONV
  28 #include <iconv.h>
  29 #include <errno.h>
  30 #       ifdef WORDS_BIGENDIAN
  31 #               define INTERNAL_ENCODING "UTF-32BE"
  32 #       else
  33 #               define INTERNAL_ENCODING "UTF-32LE"
  34 #       endif /* WORDS_BIGENDIAN */
  35 #endif
  36
  37 #include <string.h>
  38 #include "debug.h"
  39 #include "entry.h"
  40 #include "keyword.h"
  41 #include "parse.h"
  42 #include "read.h"
  43 #include "routines.h"
  44 #include "vstring.h"
  45 #include "objpool.h"
  46 #include "options.h"
  47 #include "mbcs.h"
  48 #include "trace.h"
  49 #include "strlist.h"
  50
  51 /*
  52  *       MACROS
  53  */
  54 #define isType(token,t)         (bool) ((token)->type == (t))
  55 #define isKeyword(token,k)      (bool) ((token)->keyword == (k))
  56 #define isIdentChar(c) \
  57         (isalpha (c) || isdigit (c) || (c) == '$' || \
  58                 (c) == '@' || (c) == '_' || (c) == '#' || \
  59                 (c) >= 0x80)
  60 #define newToken() (objPoolGet (TokenPool))
  61 #define deleteToken(t) (objPoolPut (TokenPool, (t)))
  62
  63 /*
  64  * Debugging
  65  *
  66  * Uncomment this to enable extensive debugging to stderr in jscript code.
  67  * Please note that TRACING_ENABLED should be #defined in main/trace.h
  68  * for this to work.
  69  *
  70  */
  71 //#define JSCRIPT_DEBUGGING_ENABLED 1
  72
  73 #if defined(DO_TRACING) && defined(JSCRIPT_DEBUGGING_ENABLED)
  74         #define JSCRIPT_DO_DEBUGGING
  75 #endif
  76
  77 #ifdef JSCRIPT_DO_DEBUGGING
  78
  79 #define JSCRIPT_DEBUG_ENTER() TRACE_ENTER()
  80 #define JSCRIPT_DEBUG_LEAVE() TRACE_LEAVE()
  81
  82 #define JSCRIPT_DEBUG_ENTER_TEXT(_szFormat,...) \
  83         TRACE_ENTER_TEXT(_szFormat,## __VA_ARGS__)
  84
  85 #define JSCRIPT_DEBUG_LEAVE_TEXT(_szFormat,...) \
  86         TRACE_LEAVE_TEXT(_szFormat,## __VA_ARGS__)
  87
  88 #define JSCRIPT_DEBUG_PRINT(_szFormat,...) \
  89         TRACE_PRINT(_szFormat,## __VA_ARGS__)
  90
  91 #define JSCRIPT_DEBUG_ASSERT(_condition,_szFormat,...) \
  92         TRACE_ASSERT(_condition,_szFormat,## __VA_ARGS__)
  93
  94 #else //!JSCRIPT_DO_DEBUGGING
  95
  96 #define JSCRIPT_DEBUG_ENTER() do { } while(0)
  97 #define JSCRIPT_DEBUG_LEAVE() do { } while(0)
  98
  99 #define JSCRIPT_DEBUG_ENTER_TEXT(_szFormat,...) do { } while(0)
 100 #define JSCRIPT_DEBUG_LEAVE_TEXT(_szFormat,...) do { } while(0)
 101
 102 #define JSCRIPT_DEBUG_PRINT(_szFormat,...) do { } while(0)
 103
 104 #define JSCRIPT_DEBUG_ASSERT(_condition,_szFormat,...) do { } while(0)
 105
 106 #endif //!JSCRIPT_DO_DEBUGGING
 107
 108
 109 /*
 110  *       DATA DECLARATIONS
 111  */
 112
 113 /*
 114  * Tracks class and function names already created
 115  */
 116 static stringList *ClassNames;
 117 static stringList *FunctionNames;
 118
 119 /*      Used to specify type of keyword.
 120 */
 121 enum eKeywordId {
 122         KEYWORD_function,
 123         KEYWORD_capital_function,
 124         KEYWORD_capital_object,
 125         KEYWORD_prototype,
 126         KEYWORD_var,
 127         KEYWORD_let,
 128         KEYWORD_const,
 129         KEYWORD_new,
 130         KEYWORD_this,
 131         KEYWORD_for,
 132         KEYWORD_while,
 133         KEYWORD_do,
 134         KEYWORD_if,
 135         KEYWORD_else,
 136         KEYWORD_switch,
 137         KEYWORD_try,
 138         KEYWORD_catch,
 139         KEYWORD_finally,
 140         KEYWORD_sap,
 141         KEYWORD_return,
 142         KEYWORD_class,
 143         KEYWORD_extends,
 144         KEYWORD_static,
 145         KEYWORD_default,
 146         KEYWORD_export,
 147         KEYWORD_async,
 148 };
 149 typedef int keywordId; /* to allow KEYWORD_NONE */
 150
 151 typedef enum eTokenType {
 152         TOKEN_UNDEFINED,
 153         TOKEN_EOF,
 154         TOKEN_CHARACTER,
 155         TOKEN_CLOSE_PAREN,
 156         TOKEN_SEMICOLON,
 157         TOKEN_COLON,
 158         TOKEN_COMMA,
 159         TOKEN_KEYWORD,
 160         TOKEN_OPEN_PAREN,
 161         TOKEN_IDENTIFIER,
 162         TOKEN_STRING,
 163         TOKEN_TEMPLATE_STRING,
 164         TOKEN_PERIOD,
 165         TOKEN_OPEN_CURLY,
 166         TOKEN_CLOSE_CURLY,
 167         TOKEN_EQUAL_SIGN,
 168         TOKEN_OPEN_SQUARE,
 169         TOKEN_CLOSE_SQUARE,
 170         TOKEN_REGEXP,
 171         TOKEN_POSTFIX_OPERATOR,
 172         TOKEN_STAR,
 173         TOKEN_BINARY_OPERATOR
 174 } tokenType;
 175
 176 typedef struct sTokenInfo {
 177         tokenType               type;
 178         keywordId               keyword;
 179         vString *               string;
 180         vString *               scope;
 181         unsigned long   lineNumber;
 182         MIOPos                  filePosition;
 183         int                             nestLevel;
 184         bool                    ignoreTag;
 185 } tokenInfo;
 186
 187 /*
 188  *      DATA DEFINITIONS
 189  */
 190
 191 static tokenType LastTokenType;
 192 static tokenInfo *NextToken;
 193
 194 static langType Lang_js;
 195
 196 static objPool *TokenPool = NULL;
 197
 198 #ifdef HAVE_ICONV
 199 static iconv_t JSUnicodeConverter = (iconv_t) -2;
 200 #endif
 201
 202 typedef enum {
 203         JSTAG_FUNCTION,
 204         JSTAG_CLASS,
 205         JSTAG_METHOD,
 206         JSTAG_PROPERTY,
 207         JSTAG_CONSTANT,
 208         JSTAG_VARIABLE,
 209         JSTAG_GENERATOR,
 210         JSTAG_COUNT
 211 } jsKind;
 212
 213 static kindDefinition JsKinds [] = {
 214         { true,  'f', "function",         "functions"              },
 215         { true,  'c', "class",            "classes"                        },
 216         { true,  'm', "method",           "methods"                        },
 217         { true,  'p', "property",         "properties"             },
 218         { true,  'C', "constant",         "constants"              },
 219         { true,  'v', "variable",         "global variables"   },
 220         { true,  'g', "generator",        "generators"             }
 221 };
 222
 223 static const keywordTable JsKeywordTable [] = {
 224         /* keyword              keyword ID */
 225         { "function",   KEYWORD_function                        },
 226         { "Function",   KEYWORD_capital_function        },
 227         { "Object",             KEYWORD_capital_object          },
 228         { "prototype",  KEYWORD_prototype                       },
 229         { "var",                KEYWORD_var                                     },
 230         { "let",                KEYWORD_let                                     },
 231         { "const",              KEYWORD_const                           },
 232         { "new",                KEYWORD_new                                     },
 233         { "this",               KEYWORD_this                            },
 234         { "for",                KEYWORD_for                                     },
 235         { "while",              KEYWORD_while                           },
 236         { "do",                 KEYWORD_do                                      },
 237         { "if",                 KEYWORD_if                                      },
 238         { "else",               KEYWORD_else                            },
 239         { "switch",             KEYWORD_switch                          },
 240         { "try",                KEYWORD_try                                     },
 241         { "catch",              KEYWORD_catch                           },
 242         { "finally",    KEYWORD_finally                         },
 243         { "sap",            KEYWORD_sap                                 },
 244         { "return",             KEYWORD_return                          },
 245         { "class",              KEYWORD_class                           },
 246         { "extends",    KEYWORD_extends                         },
 247         { "static",             KEYWORD_static                          },
 248         { "default",    KEYWORD_default                         },
 249         { "export",             KEYWORD_export                          },
 250         { "async",              KEYWORD_async                           },
 251 };
 252
 253 /*
 254  *       FUNCTION DEFINITIONS
 255  */
 256
 257 /* Recursive functions */
 258 static void readTokenFull (tokenInfo *const token, bool include_newlines, vString *const repr);
 259 static void parseFunction (tokenInfo *const token);
 260 static bool parseBlock (tokenInfo *const token, const vString *const parentScope);
 261 static bool parseLine (tokenInfo *const token, bool is_inside_class);
 262 static void parseUI5 (tokenInfo *const token);
 263
 264 static void *newPoolToken (void *createArg CTAGS_ATTR_UNUSED)
 265 {
 266         tokenInfo *token = xMalloc (1, tokenInfo);
 267
 268         token->string           = vStringNew ();
 269         token->scope            = vStringNew ();
 270
 271         return token;
 272 }
 273
 274 static void clearPoolToken (void *data)
 275 {
 276         tokenInfo *token = data;
 277
 278         token->type                     = TOKEN_UNDEFINED;
 279         token->keyword          = KEYWORD_NONE;
 280         token->nestLevel        = 0;
 281         token->ignoreTag        = false;
 282         token->lineNumber   = getInputLineNumber ();
 283         token->filePosition = getInputFilePosition ();
 284         vStringClear (token->string);
 285         vStringClear (token->scope);
 286 }
 287
 288 static void deletePoolToken (void *data)
 289 {
 290         tokenInfo *token = data;
 291         vStringDelete (token->string);
 292         vStringDelete (token->scope);
 293         eFree (token);
 294 }
 295
 296 static void copyToken (tokenInfo *const dest, const tokenInfo *const src,
 297                        bool const include_non_read_info)
 298 {
 299         dest->lineNumber = src->lineNumber;
 300         dest->filePosition = src->filePosition;
 301         dest->type = src->type;
 302         dest->keyword = src->keyword;
 303         vStringCopy(dest->string, src->string);
 304         if (include_non_read_info)
 305         {
 306                 dest->nestLevel = src->nestLevel;
 307                 vStringCopy(dest->scope, src->scope);
 308         }
 309 }
 310
 311 /*
 312  *       Tag generation functions
 313  */
 314
 315 static void makeJsTagCommon (const tokenInfo *const token, const jsKind kind,
 316                                                          vString *const signature, vString *const inheritance,
 317                                                          bool anonymous)
 318 {
 319         if (JsKinds [kind].enabled && ! token->ignoreTag )
 320         {
 321                 const char *name = vStringValue (token->string);
 322                 vString *fullscope = vStringNewCopy (token->scope);
 323                 const char *p;
 324                 tagEntryInfo e;
 325
 326                 if (kind != JSTAG_PROPERTY &&  (p = strrchr (name, '.')) != NULL )
 327                 {
 328                         if (vStringLength (fullscope) > 0)
 329                                 vStringPut (fullscope, '.');
 330                         vStringNCatS (fullscope, name, (size_t) (p - name));
 331                         name = p + 1;
 332                 }
 333
 334                 initTagEntry (&e, name, kind);
 335
 336                 JSCRIPT_DEBUG_PRINT("Emitting tag for symbol '%s' of kind %02x with scope '%s'",name,kind,vStringValue(fullscope));
 337
 338                 e.lineNumber   = token->lineNumber;
 339                 e.filePosition = token->filePosition;
 340
 341                 if ( vStringLength(fullscope) > 0 )
 342                 {
 343                         /* FIXME: proper parent type */
 344                         jsKind parent_kind = JSTAG_CLASS;
 345
 346                         /*
 347                          * If we're creating a function (and not a method),
 348                          * guess we're inside another function
 349                          */
 350                         if (kind == JSTAG_FUNCTION)
 351                                 parent_kind = JSTAG_FUNCTION;
 352
 353                         e.extensionFields.scopeKindIndex = parent_kind;
 354                         e.extensionFields.scopeName = vStringValue (fullscope);
 355                 }
 356
 357                 if (signature && vStringLength(signature))
 358                 {
 359                         size_t i;
 360                         /* sanitize signature by replacing all control characters with a
 361                          * space (because it's simple).
 362                          * there should never be any junk in a valid signature, but who
 363                          * knows what the user wrote and CTags doesn't cope well with weird
 364                          * characters. */
 365                         for (i = 0; i < signature->length; i++)
 366                         {
 367                                 unsigned char c = (unsigned char) signature->buffer[i];
 368                                 if (c < 0x20 /* below space */ || c == 0x7F /* DEL */)
 369                                         signature->buffer[i] = ' ';
 370                         }
 371                         e.extensionFields.signature = vStringValue(signature);
 372                 }
 373
 374                 if (inheritance)
 375                         e.extensionFields.inheritance = vStringValue(inheritance);
 376
 377                 if (anonymous)
 378                         markTagExtraBit (&e, XTAG_ANONYMOUS);
 379
 380                 makeTagEntry (&e);
 381                 vStringDelete (fullscope);
 382         }
 383 }
 384
 385 static void makeJsTag (const tokenInfo *const token, const jsKind kind,
 386                                            vString *const signature, vString *const inheritance)
 387 {
 388         makeJsTagCommon (token, kind, signature, inheritance, false);
 389 }
 390
 391 static void makeClassTagCommon (tokenInfo *const token, vString *const signature,
 392                           vString *const inheritance, bool anonymous)
 393 {
 394         vString *       fulltag;
 395
 396         if ( ! token->ignoreTag )
 397         {
 398                 fulltag = vStringNew ();
 399                 if (vStringLength (token->scope) > 0)
 400                 {
 401                         vStringCopy(fulltag, token->scope);
 402                         vStringPut (fulltag, '.');
 403                         vStringCat (fulltag, token->string);
 404                 }
 405                 else
 406                 {
 407                         vStringCopy(fulltag, token->string);
 408                 }
 409                 if ( ! stringListHas(ClassNames, vStringValue (fulltag)) )
 410                 {
 411                         stringListAdd (ClassNames, vStringNewCopy (fulltag));
 412                         makeJsTagCommon (token, JSTAG_CLASS, signature, inheritance,
 413                                                          anonymous);
 414                 }
 415                 vStringDelete (fulltag);
 416         }
 417 }
 418
 419 static void makeClassTag (tokenInfo *const token, vString *const signature,
 420                                                   vString *const inheritance)
 421 {
 422         makeClassTagCommon (token, signature, inheritance, false);
 423 }
 424
 425 static void makeFunctionTagCommon (tokenInfo *const token, vString *const signature, bool generator,
 426                                                                    bool anonymous)
 427 {
 428         vString *       fulltag;
 429
 430         if ( ! token->ignoreTag )
 431         {
 432                 fulltag = vStringNew ();
 433                 if (vStringLength (token->scope) > 0)
 434                 {
 435                         vStringCopy(fulltag, token->scope);
 436                         vStringPut (fulltag, '.');
 437                         vStringCat (fulltag, token->string);
 438                 }
 439                 else
 440                 {
 441                         vStringCopy(fulltag, token->string);
 442                 }
 443                 if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) )
 444                 {
 445                         stringListAdd (FunctionNames, vStringNewCopy (fulltag));
 446                         makeJsTagCommon (token, generator ? JSTAG_GENERATOR : JSTAG_FUNCTION, signature, NULL,
 447                                                          anonymous);
 448                 }
 449                 vStringDelete (fulltag);
 450         }
 451 }
 452
 453 static void makeFunctionTag (tokenInfo *const token, vString *const signature, bool generator)
 454 {
 455         makeFunctionTagCommon (token, signature, generator, false);
 456 }
 457
 458 /*
 459  *       Parsing functions
 460  */
 461
 462 /* given @p point, returns the first byte of the encoded output sequence, and
 463  * make sure the next ones will be returned by calls to getcFromInputFile()
 464  * as if the code point was simply written in the input file. */
 465 static int handleUnicodeCodePoint (uint32_t point)
 466 {
 467         int c = (int) point;
 468
 469         Assert (point < 0x110000);
 470
 471 #ifdef HAVE_ICONV
 472         /* if we do have iconv and the encodings are specified, use this */
 473         if (isConverting () && JSUnicodeConverter == (iconv_t) -2)
 474         {
 475                 /* if we didn't try creating the converter yet, try and do so */
 476                 JSUnicodeConverter = iconv_open (getLanguageEncoding (Lang_js), INTERNAL_ENCODING);
 477         }
 478         if (isConverting () && JSUnicodeConverter != (iconv_t) -1)
 479         {
 480                 char *input_ptr = (char *) &point;
 481                 size_t input_left = sizeof point;
 482                 /* 4 bytes should be enough for any encoding (it's how much UTF-32
 483                  * would need). */
 484                 /* FIXME: actually iconv has a tendency to output a BOM for Unicode
 485                  * encodings where it matters when the endianess is not specified in
 486                  * the target encoding name.  E.g., if the target encoding is "UTF-32"
 487                  * or "UTF-16" it will output 2 code points, the BOM (U+FEFF) and the
 488                  * one we expect. This does not happen if the endianess is specified
 489                  * explicitly, e.g. with "UTF-32LE", or "UTF-16BE".
 490                  * However, it's not very relevant for the moment as nothing in CTags
 491                  * cope well (if at all) with non-ASCII-compatible encodings like
 492                  * UTF-32 or UTF-16 anyway. */
 493                 char output[4] = { 0 };
 494                 char *output_ptr = output;
 495                 size_t output_left = ARRAY_SIZE (output);
 496
 497                 if (iconv (JSUnicodeConverter, &input_ptr, &input_left, &output_ptr, &output_left) == (size_t) -1)
 498                 {
 499                         /* something went wrong, which probably means the output encoding
 500                          * cannot represent the character.  Use a placeholder likely to be
 501                          * supported instead, that's also valid in an identifier */
 502                         verbose ("JavaScript: Encoding: %s\n", strerror (errno));
 503                         c = '_';
 504                 }
 505                 else
 506                 {
 507                         const size_t output_len = ARRAY_SIZE (output) - output_left;
 508
 509                         /* put all but the first byte back so that getcFromInputFile() will
 510                          * return them in the right order */
 511                         for (unsigned int i = 1; i < output_len; i++)
 512                                 ungetcToInputFile ((unsigned char) output[output_len - i]);
 513                         c = (unsigned char) output[0];
 514                 }
 515
 516                 iconv (JSUnicodeConverter, NULL, NULL, NULL, NULL);
 517         }
 518         else
 519 #endif
 520         {
 521                 /* when no encoding is specified (or no iconv), assume UTF-8 is good.
 522                  * Why UTF-8?  Because it's an ASCII-compatible common Unicode encoding. */
 523                 if (point < 0x80)
 524                         c = (unsigned char) point;
 525                 else if (point < 0x800)
 526                 {
 527                         c = (unsigned char) (0xc0 | ((point >> 6) & 0x1f));
 528                         ungetcToInputFile ((unsigned char) (0x80 | (point & 0x3f)));
 529                 }
 530                 else if (point < 0x10000)
 531                 {
 532                         c = (unsigned char) (0xe0 | ((point >> 12) & 0x0f));
 533                         ungetcToInputFile ((unsigned char) (0x80 | ((point >>  0) & 0x3f)));
 534                         ungetcToInputFile ((unsigned char) (0x80 | ((point >>  6) & 0x3f)));
 535                 }
 536                 else if (point < 0x110000)
 537                 {
 538                         c = (unsigned char) (0xf0 | ((point >> 18) & 0x07));
 539                         ungetcToInputFile ((unsigned char) (0x80 | ((point >>  0) & 0x3f)));
 540                         ungetcToInputFile ((unsigned char) (0x80 | ((point >>  6) & 0x3f)));
 541                         ungetcToInputFile ((unsigned char) (0x80 | ((point >> 12) & 0x3f)));
 542                 }
 543         }
 544
 545         return c;
 546 }
 547
 548 /* reads a Unicode escape sequence after the "\" prefix.
 549  * @param value Location to store the escape sequence value.
 550  * @param isUTF16 Location to store whether @param value is an UTF-16 word.
 551  * @returns Whether a valid sequence was read. */
 552 static bool readUnicodeEscapeSequenceValue (uint32_t *const value,
 553                                             bool *const isUTF16)
 554 {
 555         bool valid = false;
 556         int d = getcFromInputFile ();
 557
 558         if (d != 'u')
 559                 ungetcToInputFile (d);
 560         else
 561         {
 562                 int e = getcFromInputFile ();
 563                 char cp[6 + 1]; /* up to 6 hex + possible closing '}' or invalid char */
 564                 unsigned int cp_len = 0;
 565
 566                 *isUTF16 = (e != '{');
 567                 if (e == '{')
 568                 {       /* Handles Unicode code point escapes: \u{ HexDigits }
 569                          * We skip the leading 0s because there can be any number of them
 570                          * and they don't change any meaning. */
 571                         bool has_leading_zero = false;
 572
 573                         while ((cp[cp_len] = (char) getcFromInputFile ()) == '0')
 574                                 has_leading_zero = true;
 575
 576                         while (isxdigit (cp[cp_len]) && ++cp_len < ARRAY_SIZE (cp))
 577                                 cp[cp_len] = (char) getcFromInputFile ();
 578                         valid = ((cp_len > 0 || has_leading_zero) &&
 579                                          cp_len < ARRAY_SIZE (cp) && cp[cp_len] == '}' &&
 580                                          /* also check if it's a valid Unicode code point */
 581                                          (cp_len < 6 ||
 582                                           (cp_len == 6 && strncmp (cp, "110000", 6) < 0)));
 583                         if (! valid) /* put back the last (likely invalid) character */
 584                                 ungetcToInputFile (cp[cp_len]);
 585                 }
 586                 else
 587                 {       /* Handles Unicode escape sequences: \u Hex4Digits */
 588                         do
 589                                 cp[cp_len] = (char) ((cp_len == 0) ? e : getcFromInputFile ());
 590                         while (isxdigit (cp[cp_len]) && ++cp_len < 4);
 591                         valid = (cp_len == 4);
 592                 }
 593
 594                 if (! valid)
 595                 {
 596                         /* we don't get every character back, but it would require to
 597                          * be able to put up to 9 characters back (in the worst case
 598                          * for handling invalid \u{10FFFFx}), and here we're recovering
 599                          * from invalid syntax anyway. */
 600                         ungetcToInputFile (e);
 601                         ungetcToInputFile (d);
 602                 }
 603                 else
 604                 {
 605                         *value = 0;
 606                         for (unsigned int i = 0; i < cp_len; i++)
 607                         {
 608                                 *value *= 16;
 609
 610                                 /* we know it's a hex digit, no need to double check */
 611                                 if (cp[i] < 'A')
 612                                         *value += (unsigned int) cp[i] - '0';
 613                                 else if (cp[i] < 'a')
 614                                         *value += 10 + (unsigned int) cp[i] - 'A';
 615                                 else
 616                                         *value += 10 + (unsigned int) cp[i] - 'a';
 617                         }
 618                 }
 619         }
 620
 621         return valid;
 622 }
 623
 624 static int valueToXDigit (unsigned char v)
 625 {
 626         Assert (v <= 0xF);
 627
 628         if (v >= 0xA)
 629                 return 'A' + (v - 0xA);
 630         else
 631                 return '0' + v;
 632 }
 633
 634 /* Reads and expands a Unicode escape sequence after the "\" prefix.  If the
 635  * escape sequence is a UTF16 high surrogate, also try and read the low
 636  * surrogate to emit the proper code point.
 637  * @param fallback The character to return if the sequence is invalid. Usually
 638  *                 this would be the '\' character starting the sequence.
 639  * @returns The first byte of the sequence, or @param fallback if the sequence
 640  *          is invalid. On success, next calls to getcFromInputFile() will
 641  *          return subsequent bytes (if any). */
 642 static int readUnicodeEscapeSequence (const int fallback)
 643 {
 644         int c;
 645         uint32_t value;
 646         bool isUTF16;
 647
 648         if (! readUnicodeEscapeSequenceValue (&value, &isUTF16))
 649                 c = fallback;
 650         else
 651         {
 652                 if (isUTF16 && (value & 0xfc00) == 0xd800)
 653                 {       /* this is a high surrogate, try and read its low surrogate and
 654                          * emit the resulting code point */
 655                         uint32_t low;
 656                         int d = getcFromInputFile ();
 657
 658                         if (d != '\\' || ! readUnicodeEscapeSequenceValue (&low, &isUTF16))
 659                                 ungetcToInputFile (d);
 660                         else if (! isUTF16)
 661                         {       /* not UTF-16 low surrogate but a plain code point */
 662                                 d = handleUnicodeCodePoint (low);
 663                                 ungetcToInputFile (d);
 664                         }
 665                         else if ((low & 0xfc00) != 0xdc00)
 666                         {       /* not a low surrogate, so put back the escaped representation
 667                                  * in case it was another high surrogate we should read as part
 668                                  * of another pair. */
 669                                 ungetcToInputFile (valueToXDigit ((unsigned char) ((low & 0x000f) >>  0)));
 670                                 ungetcToInputFile (valueToXDigit ((unsigned char) ((low & 0x00f0) >>  4)));
 671                                 ungetcToInputFile (valueToXDigit ((unsigned char) ((low & 0x0f00) >>  8)));
 672                                 ungetcToInputFile (valueToXDigit ((unsigned char) ((low & 0xf000) >> 12)));
 673                                 ungetcToInputFile ('u');
 674                                 ungetcToInputFile ('\\');
 675                         }
 676                         else
 677                                 value = 0x010000 + ((value & 0x03ff) << 10) + (low & 0x03ff);
 678                 }
 679                 c = handleUnicodeCodePoint (value);
 680         }
 681
 682         return c;
 683 }
 684
 685 static void parseString (vString *const string, const int delimiter)
 686 {
 687         bool end = false;
 688         while (! end)
 689         {
 690                 int c = getcFromInputFile ();
 691                 if (c == EOF)
 692                         end = true;
 693                 else if (c == '\\')
 694                 {
 695                         /* Eat the escape sequence (\", \', etc).  We properly handle
 696                          * <LineContinuation> by eating a whole \<CR><LF> not to see <LF>
 697                          * as an unescaped character, which is invalid and handled below.
 698                          * Also, handle the fact that <LineContinuation> produces an empty
 699                          * sequence.
 700                          * See ECMA-262 7.8.4 */
 701                         c = getcFromInputFile ();
 702                         if (c == 'u')
 703                         {
 704                                 ungetcToInputFile (c);
 705                                 c = readUnicodeEscapeSequence ('\\');
 706                                 vStringPut (string, c);
 707                         }
 708                         else if (c != '\r' && c != '\n')
 709                                 vStringPut(string, c);
 710                         else if (c == '\r')
 711                         {
 712                                 c = getcFromInputFile();
 713                                 if (c != '\n')
 714                                         ungetcToInputFile (c);
 715                         }
 716                 }
 717                 else if (c == delimiter)
 718                         end = true;
 719                 else if (c == '\r' || c == '\n')
 720                 {
 721                         /* those are invalid when not escaped */
 722                         end = true;
 723                         /* we don't want to eat the newline itself to let the automatic
 724                          * semicolon insertion code kick in */
 725                         ungetcToInputFile (c);
 726                 }
 727                 else
 728                         vStringPut (string, c);
 729         }
 730 }
 731
 732 static void parseRegExp (void)
 733 {
 734         int c;
 735         bool in_range = false;
 736
 737         do
 738         {
 739                 c = getcFromInputFile ();
 740                 if (! in_range && c == '/')
 741                 {
 742                         do /* skip flags */
 743                         {
 744                                 c = getcFromInputFile ();
 745                         } while (isalpha (c));
 746                         ungetcToInputFile (c);
 747                         break;
 748                 }
 749                 else if (c == '\n' || c == '\r')
 750                 {
 751                         /* invalid in a regex */
 752                         ungetcToInputFile (c);
 753                         break;
 754                 }
 755                 else if (c == '\\')
 756                         c = getcFromInputFile (); /* skip next character */
 757                 else if (c == '[')
 758                         in_range = true;
 759                 else if (c == ']')
 760                         in_range = false;
 761         } while (c != EOF);
 762 }
 763
 764 /*      Read a C identifier beginning with "firstChar" and places it into
 765  *      "name".
 766  */
 767 static void parseIdentifier (vString *const string, const int firstChar)
 768 {
 769         int c = firstChar;
 770         Assert (isIdentChar (c));
 771         do
 772         {
 773                 vStringPut (string, c);
 774                 c = getcFromInputFile ();
 775                 if (c == '\\')
 776                         c = readUnicodeEscapeSequence (c);
 777         } while (isIdentChar (c));
 778         /* if readUnicodeEscapeSequence() read an escape sequence this is incorrect,
 779          * as we should actually put back the whole escape sequence and not the
 780          * decoded character.  However, it's not really worth the hassle as it can
 781          * only happen if the input has an invalid escape sequence. */
 782         ungetcToInputFile (c);          /* unget non-identifier character */
 783 }
 784
 785 static void parseTemplateString (vString *const string)
 786 {
 787         int c;
 788         do
 789         {
 790                 c = getcFromInputFile ();
 791                 if (c == '`' || c == EOF)
 792                         break;
 793
 794                 vStringPut (string, c);
 795
 796                 if (c == '\\')
 797                 {
 798                         c = getcFromInputFile();
 799                         if (c != EOF)
 800                                 vStringPut(string, c);
 801                 }
 802                 else if (c == '$')
 803                 {
 804                         c = getcFromInputFile ();
 805                         if (c != '{')
 806                                 ungetcToInputFile (c);
 807                         else
 808                         {
 809                                 int depth = 1;
 810                                 /* we need to use the real token machinery to handle strings,
 811                                  * comments, regexes and whatnot */
 812                                 tokenInfo *token = newToken ();
 813                                 LastTokenType = TOKEN_UNDEFINED;
 814                                 vStringPut(string, c);
 815                                 do
 816                                 {
 817                                         readTokenFull (token, false, string);
 818                                         if (isType (token, TOKEN_OPEN_CURLY))
 819                                                 depth++;
 820                                         else if (isType (token, TOKEN_CLOSE_CURLY))
 821                                                 depth--;
 822                                 }
 823                                 while (! isType (token, TOKEN_EOF) && depth > 0);
 824                                 deleteToken (token);
 825                         }
 826                 }
 827         }
 828         while (c != EOF);
 829 }
 830
 831 static void readTokenFull (tokenInfo *const token, bool include_newlines, vString *const repr)
 832 {
 833         int c;
 834         int i;
 835         bool newline_encountered = false;
 836
 837         /* if we've got a token held back, emit it */
 838         if (NextToken)
 839         {
 840                 copyToken (token, NextToken, false);
 841                 deleteToken (NextToken);
 842                 NextToken = NULL;
 843                 return;
 844         }
 845
 846         token->type                     = TOKEN_UNDEFINED;
 847         token->keyword          = KEYWORD_NONE;
 848         vStringClear (token->string);
 849
 850 getNextChar:
 851         i = 0;
 852         do
 853         {
 854                 c = getcFromInputFile ();
 855                 if (include_newlines && (c == '\r' || c == '\n'))
 856                         newline_encountered = true;
 857                 i++;
 858         }
 859         while (c == '\t' || c == ' ' || c == '\r' || c == '\n');
 860
 861         token->lineNumber   = getInputLineNumber ();
 862         token->filePosition = getInputFilePosition ();
 863
 864         if (repr && c != EOF)
 865         {
 866                 if (i > 1)
 867                         vStringPut (repr, ' ');
 868                 vStringPut (repr, c);
 869         }
 870
 871         switch (c)
 872         {
 873                 case EOF: token->type = TOKEN_EOF;                                      break;
 874                 case '(': token->type = TOKEN_OPEN_PAREN;                       break;
 875                 case ')': token->type = TOKEN_CLOSE_PAREN;                      break;
 876                 case ';': token->type = TOKEN_SEMICOLON;                        break;
 877                 case ',': token->type = TOKEN_COMMA;                            break;
 878                 case '.': token->type = TOKEN_PERIOD;                           break;
 879                 case ':': token->type = TOKEN_COLON;                            break;
 880                 case '{': token->type = TOKEN_OPEN_CURLY;                       break;
 881                 case '}': token->type = TOKEN_CLOSE_CURLY;                      break;
 882                 case '=': token->type = TOKEN_EQUAL_SIGN;                       break;
 883                 case '[': token->type = TOKEN_OPEN_SQUARE;                      break;
 884                 case ']': token->type = TOKEN_CLOSE_SQUARE;                     break;
 885
 886                 case '+':
 887                 case '-':
 888                         {
 889                                 int d = getcFromInputFile ();
 890                                 if (d == c) /* ++ or -- */
 891                                         token->type = TOKEN_POSTFIX_OPERATOR;
 892                                 else
 893                                 {
 894                                         ungetcToInputFile (d);
 895                                         token->type = TOKEN_BINARY_OPERATOR;
 896                                 }
 897                                 break;
 898                         }
 899
 900                 case '*':
 901                         token->type = TOKEN_STAR;
 902                         break;
 903                 case '%':
 904                 case '?':
 905                 case '>':
 906                 case '<':
 907                 case '^':
 908                 case '|':
 909                 case '&':
 910                         token->type = TOKEN_BINARY_OPERATOR;
 911                         break;
 912
 913                 case '\'':
 914                 case '"':
 915                                   token->type = TOKEN_STRING;
 916                                   parseString (token->string, c);
 917                                   token->lineNumber = getInputLineNumber ();
 918                                   token->filePosition = getInputFilePosition ();
 919                                   if (repr)
 920                                   {
 921                                           vStringCat (repr, token->string);
 922                                           vStringPut (repr, c);
 923                                   }
 924                                   break;
 925
 926                 case '`':
 927                                   token->type = TOKEN_TEMPLATE_STRING;
 928                                   parseTemplateString (token->string);
 929                                   token->lineNumber = getInputLineNumber ();
 930                                   token->filePosition = getInputFilePosition ();
 931                                   if (repr)
 932                                   {
 933                                           vStringCat (repr, token->string);
 934                                           vStringPut (repr, c);
 935                                   }
 936                                   break;
 937
 938                 case '/':
 939                                   {
 940                                           int d = getcFromInputFile ();
 941                                           if ( (d != '*') &&            /* is this the start of a comment? */
 942                                                           (d != '/') )          /* is a one line comment? */
 943                                           {
 944                                                   ungetcToInputFile (d);
 945                                                   switch (LastTokenType)
 946                                                   {
 947                                                           case TOKEN_CHARACTER:
 948                                                           case TOKEN_IDENTIFIER:
 949                                                           case TOKEN_STRING:
 950                                                           case TOKEN_TEMPLATE_STRING:
 951                                                           case TOKEN_CLOSE_CURLY:
 952                                                           case TOKEN_CLOSE_PAREN:
 953                                                           case TOKEN_CLOSE_SQUARE:
 954                                                                   token->type = TOKEN_BINARY_OPERATOR;
 955                                                                   break;
 956
 957                                                           default:
 958                                                                   token->type = TOKEN_REGEXP;
 959                                                                   parseRegExp ();
 960                                                                   token->lineNumber = getInputLineNumber ();
 961                                                                   token->filePosition = getInputFilePosition ();
 962                                                                   break;
 963                                                   }
 964                                           }
 965                                           else
 966                                           {
 967                                                   if (repr) /* remove the / we added */
 968                                                           repr->buffer[--repr->length] = 0;
 969                                                   if (d == '*')
 970                                                   {
 971                                                           do
 972                                                           {
 973                                                                   skipToCharacterInInputFile ('*');
 974                                                                   c = getcFromInputFile ();
 975                                                                   if (c == '/')
 976                                                                           break;
 977                                                                   else
 978                                                                           ungetcToInputFile (c);
 979                                                           } while (c != EOF && c != '\0');
 980                                                           goto getNextChar;
 981                                                   }
 982                                                   else if (d == '/')    /* is this the start of a comment?  */
 983                                                   {
 984                                                           skipToCharacterInInputFile ('\n');
 985                                                           /* if we care about newlines, put it back so it is seen */
 986                                                           if (include_newlines)
 987                                                                   ungetcToInputFile ('\n');
 988                                                           goto getNextChar;
 989                                                   }
 990                                           }
 991                                           break;
 992                                   }
 993
 994                 case '#':
 995                                   /* skip shebang in case of e.g. Node.js scripts */
 996                                   if (token->lineNumber > 1)
 997                                           token->type = TOKEN_UNDEFINED;
 998                                   else if ((c = getcFromInputFile ()) != '!')
 999                                   {
1000                                           ungetcToInputFile (c);
1001                                           token->type = TOKEN_UNDEFINED;
1002                                   }
1003                                   else
1004                                   {
1005                                           skipToCharacterInInputFile ('\n');
1006                                           goto getNextChar;
1007                                   }
1008                                   break;
1009
1010                 case '\\':
1011                                   c = readUnicodeEscapeSequence (c);
1012                                   /* fallthrough */
1013                 default:
1014                                   if (! isIdentChar (c))
1015                                           token->type = TOKEN_UNDEFINED;
1016                                   else
1017                                   {
1018                                           parseIdentifier (token->string, c);
1019                                           token->lineNumber = getInputLineNumber ();
1020                                           token->filePosition = getInputFilePosition ();
1021                                           token->keyword = lookupKeyword (vStringValue (token->string), Lang_js);
1022                                           if (isKeyword (token, KEYWORD_NONE))
1023                                                   token->type = TOKEN_IDENTIFIER;
1024                                           else
1025                                                   token->type = TOKEN_KEYWORD;
1026                                           if (repr && vStringLength (token->string) > 1)
1027                                                   vStringCatS (repr, vStringValue (token->string) + 1);
1028                                   }
1029                                   break;
1030         }
1031
1032         if (include_newlines && newline_encountered)
1033         {
1034                 /* This isn't strictly correct per the standard, but following the
1035                  * real rules means understanding all statements, and that's not
1036                  * what the parser currently does.  What we do here is a guess, by
1037                  * avoiding inserting semicolons that would make the statement on
1038                  * the left or right obviously invalid.  Hopefully this should not
1039                  * have false negatives (e.g. should not miss insertion of a semicolon)
1040                  * but might have false positives (e.g. it will wrongfully emit a
1041                  * semicolon sometimes, i.e. for the newline in "foo\n(bar)").
1042                  * This should however be mostly harmless as we only deal with
1043                  * newlines in specific situations where we know a false positive
1044                  * wouldn't hurt too bad. */
1045
1046                 /* these already end a statement, so no need to duplicate it */
1047                 #define IS_STMT_SEPARATOR(t) ((t) == TOKEN_SEMICOLON    || \
1048                                               (t) == TOKEN_EOF          || \
1049                                               (t) == TOKEN_COMMA        || \
1050                                               (t) == TOKEN_OPEN_CURLY)
1051                 /* these cannot be the start or end of a statement */
1052                 #define IS_BINARY_OPERATOR(t) ((t) == TOKEN_EQUAL_SIGN      || \
1053                                                (t) == TOKEN_COLON           || \
1054                                                (t) == TOKEN_PERIOD          || \
1055                                                (t) == TOKEN_STAR            || \
1056                                                (t) == TOKEN_BINARY_OPERATOR)
1057
1058                 if (! IS_STMT_SEPARATOR(LastTokenType) &&
1059                     ! IS_STMT_SEPARATOR(token->type) &&
1060                     ! IS_BINARY_OPERATOR(LastTokenType) &&
1061                     ! IS_BINARY_OPERATOR(token->type) &&
1062                     /* these cannot be followed by a semicolon */
1063                     ! (LastTokenType == TOKEN_OPEN_PAREN ||
1064                        LastTokenType == TOKEN_OPEN_SQUARE))
1065                 {
1066                         /* hold the token... */
1067                         Assert (NextToken == NULL);
1068                         NextToken = newToken ();
1069                         copyToken (NextToken, token, false);
1070
1071                         /* ...and emit a semicolon instead */
1072                         token->type             = TOKEN_SEMICOLON;
1073                         token->keyword  = KEYWORD_NONE;
1074                         vStringClear (token->string);
1075                         if (repr)
1076                                 vStringPut (token->string, '\n');
1077                 }
1078
1079                 #undef IS_STMT_SEPARATOR
1080                 #undef IS_BINARY_OPERATOR
1081         }
1082
1083         LastTokenType = token->type;
1084 }
1085
1086 #ifdef JSCRIPT_DO_DEBUGGING
1087 /* trace readTokenFull() */
1088 static void readTokenFullDebug (tokenInfo *const token, bool include_newlines, vString *const repr)
1089 {
1090         readTokenFull (token, include_newlines, repr);
1091         JSCRIPT_DEBUG_PRINT("token '%s' of type %02x with scope '%s'",vStringValue(token->string),token->type, vStringValue(token->scope));
1092 }
1093 # define readTokenFull readTokenFullDebug
1094 #endif
1095
1096 static void readToken (tokenInfo *const token)
1097 {
1098         readTokenFull (token, false, NULL);
1099 }
1100
1101 /*
1102  *       Token parsing functions
1103  */
1104
1105 static void skipArgumentList (tokenInfo *const token, bool include_newlines, vString *const repr)
1106 {
1107         int nest_level = 0;
1108
1109         if (isType (token, TOKEN_OPEN_PAREN))   /* arguments? */
1110         {
1111                 nest_level++;
1112                 if (repr)
1113                         vStringPut (repr, '(');
1114                 while (nest_level > 0 && ! isType (token, TOKEN_EOF))
1115                 {
1116                         readTokenFull (token, false, repr);
1117                         if (isType (token, TOKEN_OPEN_PAREN))
1118                                 nest_level++;
1119                         else if (isType (token, TOKEN_CLOSE_PAREN))
1120                                 nest_level--;
1121                         else if (isKeyword (token, KEYWORD_function))
1122                                 parseFunction (token);
1123                 }
1124                 readTokenFull (token, include_newlines, NULL);
1125         }
1126 }
1127
1128 static void skipArrayList (tokenInfo *const token, bool include_newlines)
1129 {
1130         int nest_level = 0;
1131
1132         /*
1133          * Handle square brackets
1134          *       var name[1]
1135          * So we must check for nested open and closing square brackets
1136          */
1137
1138         if (isType (token, TOKEN_OPEN_SQUARE))  /* arguments? */
1139         {
1140                 nest_level++;
1141                 while (nest_level > 0 && ! isType (token, TOKEN_EOF))
1142                 {
1143                         readToken (token);
1144                         if (isType (token, TOKEN_OPEN_SQUARE))
1145                                 nest_level++;
1146                         else if (isType (token, TOKEN_CLOSE_SQUARE))
1147                                 nest_level--;
1148                 }
1149                 readTokenFull (token, include_newlines, NULL);
1150         }
1151 }
1152
1153 static void addContext (tokenInfo* const parent, const tokenInfo* const child)
1154 {
1155         if (vStringLength (parent->string) > 0)
1156         {
1157                 vStringPut (parent->string, '.');
1158         }
1159         vStringCat (parent->string, child->string);
1160 }
1161
1162 static void addToScope (tokenInfo* const token, const vString* const extra)
1163 {
1164         if (vStringLength (token->scope) > 0)
1165         {
1166                 vStringPut (token->scope, '.');
1167         }
1168         vStringCat (token->scope, extra);
1169 }
1170
1171 /*
1172  *       Scanning functions
1173  */
1174
1175 static bool findCmdTerm (tokenInfo *const token, bool include_newlines,
1176                             bool include_commas)
1177 {
1178         /*
1179          * Read until we find either a semicolon or closing brace.
1180          * Any nested braces will be handled within.
1181          */
1182         while (! isType (token, TOKEN_SEMICOLON) &&
1183                    ! isType (token, TOKEN_CLOSE_CURLY) &&
1184                    ! (include_commas && isType (token, TOKEN_COMMA)) &&
1185                    ! isType (token, TOKEN_EOF))
1186         {
1187                 /* Handle nested blocks */
1188                 if ( isType (token, TOKEN_OPEN_CURLY))
1189                 {
1190                         parseBlock (token, NULL);
1191                         readTokenFull (token, include_newlines, NULL);
1192                 }
1193                 else if ( isType (token, TOKEN_OPEN_PAREN) )
1194                 {
1195                         skipArgumentList(token, include_newlines, NULL);
1196                 }
1197                 else if ( isType (token, TOKEN_OPEN_SQUARE) )
1198                 {
1199                         skipArrayList(token, include_newlines);
1200                 }
1201                 else
1202                 {
1203                         readTokenFull (token, include_newlines, NULL);
1204                 }
1205         }
1206
1207         return isType (token, TOKEN_SEMICOLON);
1208 }
1209
1210 static void parseSwitch (tokenInfo *const token)
1211 {
1212         /*
1213          * switch (expression) {
1214          * case value1:
1215          *         statement;
1216          *         break;
1217          * case value2:
1218          *         statement;
1219          *         break;
1220          * default : statement;
1221          * }
1222          */
1223
1224         readToken (token);
1225
1226         if (isType (token, TOKEN_OPEN_PAREN))
1227         {
1228                 skipArgumentList(token, false, NULL);
1229         }
1230
1231         if (isType (token, TOKEN_OPEN_CURLY))
1232         {
1233                 parseBlock (token, NULL);
1234         }
1235 }
1236
1237 static bool parseLoop (tokenInfo *const token)
1238 {
1239         /*
1240          * Handles these statements
1241          *         for (x=0; x<3; x++)
1242          *                 document.write("This text is repeated three times<br>");
1243          *
1244          *         for (x=0; x<3; x++)
1245          *         {
1246          *                 document.write("This text is repeated three times<br>");
1247          *         }
1248          *
1249          *         while (number<5){
1250          *                 document.write(number+"<br>");
1251          *                 number++;
1252          *         }
1253          *
1254          *         do{
1255          *                 document.write(number+"<br>");
1256          *                 number++;
1257          *         }
1258          *         while (number<5);
1259          */
1260         bool is_terminated = true;
1261
1262         if (isKeyword (token, KEYWORD_for) || isKeyword (token, KEYWORD_while))
1263         {
1264                 readToken(token);
1265
1266                 if (isType (token, TOKEN_OPEN_PAREN))
1267                 {
1268                         skipArgumentList(token, false, NULL);
1269                 }
1270
1271                 if (isType (token, TOKEN_OPEN_CURLY))
1272                 {
1273                         parseBlock (token, NULL);
1274                 }
1275                 else
1276                 {
1277                         is_terminated = parseLine(token, false);
1278                 }
1279         }
1280         else if (isKeyword (token, KEYWORD_do))
1281         {
1282                 readToken(token);
1283
1284                 if (isType (token, TOKEN_OPEN_CURLY))
1285                 {
1286                         parseBlock (token, NULL);
1287                 }
1288                 else
1289                 {
1290                         is_terminated = parseLine(token, false);
1291                 }
1292
1293                 if (is_terminated)
1294                         readToken(token);
1295
1296                 if (isKeyword (token, KEYWORD_while))
1297                 {
1298                         readToken(token);
1299
1300                         if (isType (token, TOKEN_OPEN_PAREN))
1301                         {
1302                                 skipArgumentList(token, true, NULL);
1303                         }
1304                         if (! isType (token, TOKEN_SEMICOLON))
1305                         {
1306                                 /* oddly enough, `do {} while (0) var foo = 42` is perfectly
1307                                  * valid JS, so explicitly handle the remaining of the line
1308                                  * for the sake of the root scope handling (as parseJsFile()
1309                                  * always advances a token not to ever get stuck) */
1310                                 is_terminated = parseLine(token, false);
1311                         }
1312                 }
1313         }
1314
1315         return is_terminated;
1316 }
1317
1318 static bool parseIf (tokenInfo *const token)
1319 {
1320         bool read_next_token = true;
1321         /*
1322          * If statements have two forms
1323          *         if ( ... )
1324          *                 one line;
1325          *
1326          *         if ( ... )
1327          *                statement;
1328          *         else
1329          *                statement
1330          *
1331          *         if ( ... ) {
1332          *                multiple;
1333          *                statements;
1334          *         }
1335          *
1336          *
1337          *         if ( ... ) {
1338          *                return elem
1339          *         }
1340          *
1341          *     This example if correctly written, but the
1342          *     else contains only 1 statement without a terminator
1343          *     since the function finishes with the closing brace.
1344          *
1345      *     function a(flag){
1346      *         if(flag)
1347      *             test(1);
1348      *         else
1349      *             test(2)
1350      *     }
1351          *
1352          * TODO:  Deal with statements that can optional end
1353          *                without a semi-colon.  Currently this messes up
1354          *                the parsing of blocks.
1355          *                Need to somehow detect this has happened, and either
1356          *                backup a token, or skip reading the next token if
1357          *                that is possible from all code locations.
1358          *
1359          */
1360
1361         readToken (token);
1362
1363         if (isKeyword (token, KEYWORD_if))
1364         {
1365                 /*
1366                  * Check for an "else if" and consume the "if"
1367                  */
1368                 readToken (token);
1369         }
1370
1371         if (isType (token, TOKEN_OPEN_PAREN))
1372         {
1373                 skipArgumentList(token, false, NULL);
1374         }
1375
1376         if (isType (token, TOKEN_OPEN_CURLY))
1377         {
1378                 parseBlock (token, NULL);
1379         }
1380         else
1381         {
1382                 /* The next token should only be read if this statement had its own
1383                  * terminator */
1384                 read_next_token = findCmdTerm (token, true, false);
1385         }
1386         return read_next_token;
1387 }
1388
1389 static void parseFunction (tokenInfo *const token)
1390 {
1391         tokenInfo *const name = newToken ();
1392         vString *const signature = vStringNew ();
1393         bool is_class = false;
1394         bool is_generator = false;
1395         bool is_anonymous = false;
1396         /*
1397          * This deals with these formats
1398          *         function validFunctionTwo(a,b) {}
1399          *         function * generator(a,b) {}
1400          */
1401
1402         copyToken (name, token, true);
1403         readToken (name);
1404         if (isType (name, TOKEN_STAR))
1405         {
1406                 is_generator = true;
1407                 readToken (name);
1408         }
1409         if (isType (name, TOKEN_OPEN_PAREN))
1410         {
1411                 /* anonymous function */
1412                 copyToken (token, name, false);
1413                 anonGenerate (name->string, "AnonymousFunction", JSTAG_FUNCTION);
1414                 is_anonymous = true;
1415         }
1416         else if (!isType (name, TOKEN_IDENTIFIER))
1417                 goto cleanUp;
1418         else
1419                 readToken (token);
1420
1421         while (isType (token, TOKEN_PERIOD))
1422         {
1423                 readToken (token);
1424                 if (! isType(token, TOKEN_KEYWORD))
1425                 {
1426                         addContext (name, token);
1427                         readToken (token);
1428                 }
1429         }
1430
1431         if ( isType (token, TOKEN_OPEN_PAREN) )
1432                 skipArgumentList(token, false, signature);
1433
1434         if ( isType (token, TOKEN_OPEN_CURLY) )
1435         {
1436                 is_class = parseBlock (token, name->string);
1437                 if ( is_class )
1438                         makeClassTagCommon (name, signature, NULL, is_anonymous);
1439                 else
1440                         makeFunctionTagCommon (name, signature, is_generator, is_anonymous);
1441         }
1442
1443         findCmdTerm (token, false, false);
1444
1445  cleanUp:
1446         vStringDelete (signature);
1447         deleteToken (name);
1448 }
1449
1450 /* Parses a block surrounded by curly braces.
1451  * @p parentScope is the scope name for this block, or NULL for unnamed scopes */
1452 static bool parseBlock (tokenInfo *const token, const vString *const parentScope)
1453 {
1454         JSCRIPT_DEBUG_ENTER();
1455
1456         bool is_class = false;
1457         bool read_next_token = true;
1458         vString * saveScope = vStringNew ();
1459
1460         vStringCopy(saveScope, token->scope);
1461         if (parentScope)
1462         {
1463                 addToScope (token, parentScope);
1464                 token->nestLevel++;
1465         }
1466
1467         /*
1468          * Make this routine a bit more forgiving.
1469          * If called on an open_curly advance it
1470          */
1471         if (isType (token, TOKEN_OPEN_CURLY))
1472                 readToken(token);
1473
1474         if (! isType (token, TOKEN_CLOSE_CURLY))
1475         {
1476                 /*
1477                  * Read until we find the closing brace,
1478                  * any nested braces will be handled within
1479                  */
1480                 do
1481                 {
1482                         read_next_token = true;
1483                         if (isKeyword (token, KEYWORD_this))
1484                         {
1485                                 /*
1486                                  * Means we are inside a class and have found
1487                                  * a class, not a function
1488                                  */
1489                                 is_class = true;
1490
1491                                 /*
1492                                  * Ignore the remainder of the line
1493                                  * findCmdTerm(token);
1494                                  */
1495                                 read_next_token = parseLine (token, is_class);
1496                         }
1497                         else if (isKeyword (token, KEYWORD_var) ||
1498                                          isKeyword (token, KEYWORD_let) ||
1499                                          isKeyword (token, KEYWORD_const))
1500                         {
1501                                 /*
1502                                  * Potentially we have found an inner function.
1503                                  * Set something to indicate the scope
1504                                  */
1505                                 read_next_token = parseLine (token, is_class);
1506                         }
1507                         else if (isType (token, TOKEN_OPEN_CURLY))
1508                         {
1509                                 /* Handle nested blocks */
1510                                 parseBlock (token, NULL);
1511                         }
1512                         else
1513                         {
1514                                 /*
1515                                  * It is possible for a line to have no terminator
1516                                  * if the following line is a closing brace.
1517                                  * parseLine will detect this case and indicate
1518                                  * whether we should read an additional token.
1519                                  */
1520                                 read_next_token = parseLine (token, is_class);
1521                         }
1522
1523                         /*
1524                          * Always read a new token unless we find a statement without
1525                          * a ending terminator
1526                          */
1527                         if( read_next_token )
1528                                 readToken(token);
1529
1530                         /*
1531                          * If we find a statement without a terminator consider the
1532                          * block finished, otherwise the stack will be off by one.
1533                          */
1534                 } while (! isType (token, TOKEN_EOF) &&
1535                                  ! isType (token, TOKEN_CLOSE_CURLY) && read_next_token);
1536         }
1537
1538         vStringCopy(token->scope, saveScope);
1539         vStringDelete(saveScope);
1540         if (parentScope)
1541                 token->nestLevel--;
1542
1543         JSCRIPT_DEBUG_LEAVE();
1544
1545         return is_class;
1546 }
1547
1548 static bool parseMethods (tokenInfo *const token, const tokenInfo *const class,
1549                           const bool is_es6_class)
1550 {
1551         JSCRIPT_DEBUG_ENTER();
1552
1553         tokenInfo *const name = newToken ();
1554         bool has_methods = false;
1555         vString *saveScope = vStringNew ();
1556
1557         vStringCopy (saveScope, token->scope);
1558         addToScope (token, class->string);
1559
1560         /*
1561          * This deals with these formats
1562          *         validProperty  : 2,
1563          *         validMethod    : function(a,b) {}
1564          *         'validMethod2' : function(a,b) {}
1565      *     container.dirtyTab = {'url': false, 'title':false, 'snapshot':false, '*': false}
1566      *
1567      * ES6 methods:
1568      *     property(...) {}
1569      *     *generator() {}
1570      * FIXME: what to do with computed names?
1571      *     [property]() {}
1572      *     *[generator]() {}
1573          */
1574
1575         do
1576         {
1577                 readToken (token);
1578                 if (isType (token, TOKEN_CLOSE_CURLY))
1579                 {
1580                         goto cleanUp;
1581                 }
1582
1583                 if (isKeyword (token, KEYWORD_async))
1584                         readToken (token);
1585
1586                 if (! isType (token, TOKEN_KEYWORD) &&
1587                     ! isType (token, TOKEN_SEMICOLON))
1588                 {
1589                         bool is_generator = false;
1590                         bool is_shorthand = false; /* ES6 shorthand syntax */
1591
1592                         if (isType (token, TOKEN_STAR)) /* shorthand generator */
1593                         {
1594                                 is_generator = true;
1595                                 readToken (token);
1596                         }
1597
1598                         copyToken(name, token, true);
1599
1600                         readToken (token);
1601                         is_shorthand = isType (token, TOKEN_OPEN_PAREN);
1602                         if ( isType (token, TOKEN_COLON) || is_shorthand )
1603                         {
1604                                 if (! is_shorthand)
1605                                 {
1606                                         readToken (token);
1607                                         if (isKeyword (token, KEYWORD_async))
1608                                                 readToken (token);
1609                                 }
1610                                 if ( is_shorthand || isKeyword (token, KEYWORD_function) )
1611                                 {
1612                                         JSCRIPT_DEBUG_PRINT("Seems to be a function or shorthand");
1613                                         vString *const signature = vStringNew ();
1614
1615                                         if (! is_shorthand)
1616                                         {
1617                                                 readToken (token);
1618                                                 if (isType (token, TOKEN_STAR))
1619                                                 {
1620                                                         /* generator: 'function' '*' '(' ... ')' '{' ... '}' */
1621                                                         is_generator = true;
1622                                                         readToken (token);
1623                                                 }
1624                                         }
1625                                         if ( isType (token, TOKEN_OPEN_PAREN) )
1626                                         {
1627                                                 skipArgumentList(token, false, signature);
1628                                         }
1629
1630                                         if (isType (token, TOKEN_OPEN_CURLY))
1631                                         {
1632                                                 has_methods = true;
1633                                                 makeJsTag (name, is_generator ? JSTAG_GENERATOR : JSTAG_METHOD, signature, NULL);
1634                                                 parseBlock (token, name->string);
1635
1636                                                 /*
1637                                                  * If we aren't parsing an ES6 class (for which there
1638                                                  * is no mandatory separators), read to the closing
1639                                                  * curly, check next token, if a comma, we must loop
1640                                                  * again.
1641                                                  */
1642                                                 if (! is_es6_class)
1643                                                         readToken (token);
1644                                         }
1645
1646                                         vStringDelete (signature);
1647                                 }
1648                                 else if (! is_es6_class)
1649                                 {
1650                                                 bool has_child_methods = false;
1651
1652                                                 /* skip whatever is the value */
1653                                                 while (! isType (token, TOKEN_COMMA) &&
1654                                                        ! isType (token, TOKEN_CLOSE_CURLY) &&
1655                                                        ! isType (token, TOKEN_EOF))
1656                                                 {
1657                                                         if (isType (token, TOKEN_OPEN_CURLY))
1658                                                         {
1659                                                                 /* Recurse to find child properties/methods */
1660                                                                 has_child_methods = parseMethods (token, name, false);
1661                                                                 readToken (token);
1662                                                         }
1663                                                         else if (isType (token, TOKEN_OPEN_PAREN))
1664                                                         {
1665                                                                 skipArgumentList (token, false, NULL);
1666                                                         }
1667                                                         else if (isType (token, TOKEN_OPEN_SQUARE))
1668                                                         {
1669                                                                 skipArrayList (token, false);
1670                                                         }
1671                                                         else
1672                                                         {
1673                                                                 readToken (token);
1674                                                         }
1675                                                 }
1676
1677                                                 has_methods = true;
1678                                                 if (has_child_methods)
1679                                                         makeJsTag (name, JSTAG_CLASS, NULL, NULL);
1680                                                 else
1681                                                         makeJsTag (name, JSTAG_PROPERTY, NULL, NULL);
1682                                 }
1683                         }
1684                 }
1685         } while ( isType(token, TOKEN_COMMA) ||
1686                   ( is_es6_class && ! isType(token, TOKEN_EOF) ) );
1687
1688         JSCRIPT_DEBUG_PRINT("Finished parsing methods");
1689
1690         findCmdTerm (token, false, false);
1691
1692 cleanUp:
1693         vStringCopy (token->scope, saveScope);
1694         vStringDelete (saveScope);
1695         deleteToken (name);
1696
1697         JSCRIPT_DEBUG_LEAVE();
1698
1699         return has_methods;
1700 }
1701
1702 static bool parseES6Class (tokenInfo *const token, const tokenInfo *targetName)
1703 {
1704         JSCRIPT_DEBUG_ENTER();
1705
1706         tokenInfo * className = newToken ();
1707         vString *inheritance = NULL;
1708         bool is_anonymous = true;
1709
1710         copyToken (className, token, true);
1711         readToken (className);
1712
1713         /* optional name */
1714         if (isType (className, TOKEN_IDENTIFIER))
1715         {
1716                 readToken (token);
1717                 is_anonymous = false;
1718         }
1719         else
1720         {
1721                 copyToken (token, className, true);
1722                 /* We create a fake name so we have a scope for the members */
1723                 if (! targetName)
1724                         anonGenerate (className->string, "AnonymousClass", JSTAG_CLASS);
1725         }
1726
1727         if (! targetName)
1728                 targetName = className;
1729
1730         if (isKeyword (token, KEYWORD_extends))
1731                 inheritance = vStringNew ();
1732
1733         /* skip inheritance info */
1734         while (! isType (token, TOKEN_OPEN_CURLY) &&
1735                ! isType (token, TOKEN_EOF) &&
1736                ! isType (token, TOKEN_SEMICOLON))
1737                 readTokenFull (token, false, inheritance);
1738
1739         /* remove the last added token (here we assume it's one char, "{" or ";" */
1740         if (inheritance && vStringLength (inheritance) > 0 &&
1741             ! isType (token, TOKEN_EOF))
1742         {
1743                 vStringChop (inheritance);
1744                 vStringStripTrailing (inheritance);
1745                 vStringStripLeading (inheritance);
1746         }
1747
1748         JSCRIPT_DEBUG_PRINT("Emitting tag for class '%s'", vStringValue(targetName->string));
1749
1750         makeJsTagCommon (targetName, JSTAG_CLASS, NULL, inheritance,
1751                                          (is_anonymous && (targetName == className)));
1752
1753         if (! is_anonymous && targetName != className)
1754         {
1755                 /* FIXME: what to do with the secondary name?  It's local to the
1756                  *        class itself, so not very useful... let's hope people
1757                  *        don't give it another name than the target in case of
1758                  *              var MyClass = class MyClassSecondaryName { ... }
1759                  *        I guess it could be an alias to MyClass, or duplicate it
1760                  *        altogether, not sure. */
1761                 makeJsTag (className, JSTAG_CLASS, NULL, inheritance);
1762         }
1763
1764         if (inheritance)
1765                 vStringDelete (inheritance);
1766
1767         if (isType (token, TOKEN_OPEN_CURLY))
1768                 parseMethods (token, targetName, true);
1769
1770         deleteToken (className);
1771
1772         JSCRIPT_DEBUG_LEAVE();
1773         return true;
1774 }
1775
1776 static bool parseStatement (tokenInfo *const token, bool is_inside_class)
1777 {
1778         JSCRIPT_DEBUG_ENTER();
1779
1780         tokenInfo *const name = newToken ();
1781         tokenInfo *const secondary_name = newToken ();
1782         tokenInfo *const method_body_token = newToken ();
1783         vString * saveScope = vStringNew ();
1784         bool is_class = false;
1785         bool is_var = false;
1786         bool is_const = false;
1787         bool is_terminated = true;
1788         bool is_global = false;
1789         bool has_methods = false;
1790         vString *       fulltag;
1791
1792         vStringCopy (saveScope, token->scope);
1793         /*
1794          * Functions can be named or unnamed.
1795          * This deals with these formats:
1796          * Function
1797          *         validFunctionOne = function(a,b) {}
1798          *         testlib.validFunctionFive = function(a,b) {}
1799          *         var innerThree = function(a,b) {}
1800          *         var innerFour = (a,b) {}
1801          *         var D2 = secondary_fcn_name(a,b) {}
1802          *         var D3 = new Function("a", "b", "return a+b;");
1803          * Class
1804          *         testlib.extras.ValidClassOne = function(a,b) {
1805          *                 this.a = a;
1806          *         }
1807          * Class Methods
1808          *         testlib.extras.ValidClassOne.prototype = {
1809          *                 'validMethodOne' : function(a,b) {},
1810          *                 'validMethodTwo' : function(a,b) {}
1811          *         }
1812      *     ValidClassTwo = function ()
1813      *     {
1814      *         this.validMethodThree = function() {}
1815      *         // unnamed method
1816      *         this.validMethodFour = () {}
1817      *     }
1818          *         Database.prototype.validMethodThree = Database_getTodaysDate;
1819          */
1820
1821         if ( is_inside_class )
1822                 is_class = true;
1823         /*
1824          * var can precede an inner function
1825          */
1826         if ( isKeyword(token, KEYWORD_var) ||
1827                  isKeyword(token, KEYWORD_let) ||
1828                  isKeyword(token, KEYWORD_const) )
1829         {
1830                 JSCRIPT_DEBUG_PRINT("var/let/const case");
1831                 is_const = isKeyword(token, KEYWORD_const);
1832                 /*
1833                  * Only create variables for global scope
1834                  */
1835                 if ( token->nestLevel == 0 )
1836                 {
1837                         is_global = true;
1838                 }
1839                 readToken(token);
1840         }
1841
1842 nextVar:
1843         if ( isKeyword(token, KEYWORD_this) )
1844         {
1845                 JSCRIPT_DEBUG_PRINT("found 'this' keyword");
1846
1847                 readToken(token);
1848                 if (isType (token, TOKEN_PERIOD))
1849                 {
1850                         readToken(token);
1851                 }
1852         }
1853
1854         copyToken(name, token, true);
1855         JSCRIPT_DEBUG_PRINT("name becomes '%s'",vStringValue(name->string));
1856
1857         while (! isType (token, TOKEN_CLOSE_CURLY) &&
1858                ! isType (token, TOKEN_SEMICOLON)   &&
1859                ! isType (token, TOKEN_EQUAL_SIGN)  &&
1860                ! isType (token, TOKEN_COMMA)       &&
1861                ! isType (token, TOKEN_EOF))
1862         {
1863                 if (isType (token, TOKEN_OPEN_CURLY))
1864                         parseBlock (token, NULL);
1865
1866                 /* Potentially the name of the function */
1867                 if (isType (token, TOKEN_PERIOD))
1868                 {
1869                         /*
1870                          * Cannot be a global variable is it has dot references in the name
1871                          */
1872                         is_global = false;
1873                         /* Assume it's an assignment to a global name (e.g. a class) using
1874                          * its fully qualified name, so strip the scope.
1875                          * FIXME: resolve the scope so we can make more than an assumption. */
1876                         vStringClear (token->scope);
1877                         vStringClear (name->scope);
1878                         do
1879                         {
1880                                 readToken (token);
1881                                 if (! isType(token, TOKEN_KEYWORD))
1882                                 {
1883                                         if ( is_class )
1884                                         {
1885                                                 addToScope(token, name->string);
1886                                         }
1887                                         else
1888                                                 addContext (name, token);
1889
1890                                         readToken (token);
1891                                 }
1892                                 else if ( isKeyword(token, KEYWORD_prototype) )
1893                                 {
1894                                         /*
1895                                          * When we reach the "prototype" tag, we infer:
1896                                          *     "BindAgent" is a class
1897                                          *     "build"     is a method
1898                                          *
1899                                          * function BindAgent( repeatableIdName, newParentIdName ) {
1900                                          * }
1901                                          *
1902                                          * CASE 1
1903                                          * Specified function name: "build"
1904                                          *     BindAgent.prototype.build = function( mode ) {
1905                                          *        maybe parse nested functions
1906                                          *     }
1907                                          *
1908                                          * CASE 2
1909                                          * Prototype listing
1910                                          *     ValidClassOne.prototype = {
1911                                          *         'validMethodOne' : function(a,b) {},
1912                                          *         'validMethodTwo' : function(a,b) {}
1913                                          *     }
1914                                          *
1915                                          */
1916                                         if (! ( isType (name, TOKEN_IDENTIFIER)
1917                                                 || isType (name, TOKEN_STRING) ) )
1918                                                 /*
1919                                                  * Unexpected input. Try to reset the parsing.
1920                                                  *
1921                                                  * TOKEN_STRING is acceptable. e.g.:
1922                                                  * -----------------------------------
1923                                                  * "a".prototype = function( mode ) {}
1924                                                  */
1925                                                 goto cleanUp;
1926
1927                                         makeClassTag (name, NULL, NULL);
1928                                         is_class = true;
1929
1930                                         /*
1931                                          * There should a ".function_name" next.
1932                                          */
1933                                         readToken (token);
1934                                         if (isType (token, TOKEN_PERIOD))
1935                                         {
1936                                                 /*
1937                                                  * Handle CASE 1
1938                                                  */
1939                                                 readToken (token);
1940                                                 if (! isType(token, TOKEN_KEYWORD))
1941                                                 {
1942                                                         vString *const signature = vStringNew ();
1943
1944                                                         addToScope(token, name->string);
1945
1946                                                         copyToken (method_body_token, token, true);
1947                                                         readToken (method_body_token);
1948
1949                                                         while (! isType (method_body_token, TOKEN_SEMICOLON) &&
1950                                                                ! isType (method_body_token, TOKEN_CLOSE_CURLY) &&
1951                                                                ! isType (method_body_token, TOKEN_OPEN_CURLY) &&
1952                                                                ! isType (method_body_token, TOKEN_EOF))
1953                                                         {
1954                                                                 if ( isType (method_body_token, TOKEN_OPEN_PAREN) )
1955                                                                         skipArgumentList(method_body_token, false,
1956                                                                                                          vStringLength (signature) == 0 ? signature : NULL);
1957                                                                 else
1958                                                                         readToken (method_body_token);
1959                                                         }
1960
1961                                                         makeJsTag (token, JSTAG_METHOD, signature, NULL);
1962                                                         vStringDelete (signature);
1963
1964                                                         if ( isType (method_body_token, TOKEN_OPEN_CURLY))
1965                                                         {
1966                                                                 parseBlock (method_body_token, token->string);
1967                                                                 is_terminated = true;
1968                                                         }
1969                                                         else
1970                                                                 is_terminated = isType (method_body_token, TOKEN_SEMICOLON);
1971                                                         goto cleanUp;
1972                                                 }
1973                                         }
1974                                         else if (isType (token, TOKEN_EQUAL_SIGN))
1975                                         {
1976                                                 readToken (token);
1977                                                 if (isType (token, TOKEN_OPEN_CURLY))
1978                                                 {
1979                                                         /*
1980                                                          * Handle CASE 2
1981                                                          *
1982                                                          * Creates tags for each of these class methods
1983                                                          *     ValidClassOne.prototype = {
1984                                                          *         'validMethodOne' : function(a,b) {},
1985                                                          *         'validMethodTwo' : function(a,b) {}
1986                                                          *     }
1987                                                          */
1988                                                         parseMethods(token, name, false);
1989                                                         /*
1990                                                          * Find to the end of the statement
1991                                                          */
1992                                                         findCmdTerm (token, false, false);
1993                                                         token->ignoreTag = false;
1994                                                         is_terminated = true;
1995                                                         goto cleanUp;
1996                                                 }
1997                                         }
1998                                 }
1999                                 else
2000                                         readToken (token);
2001                         } while (isType (token, TOKEN_PERIOD));
2002                 }
2003                 else
2004                         readTokenFull (token, true, NULL);
2005
2006                 if ( isType (token, TOKEN_OPEN_PAREN) )
2007                         skipArgumentList(token, false, NULL);
2008
2009                 if ( isType (token, TOKEN_OPEN_SQUARE) )
2010                         skipArrayList(token, false);
2011
2012                 /*
2013                 if ( isType (token, TOKEN_OPEN_CURLY) )
2014                 {
2015                         is_class = parseBlock (token, name->string);
2016                 }
2017                 */
2018         }
2019
2020         if ( isType (token, TOKEN_CLOSE_CURLY) )
2021         {
2022                 /*
2023                  * Reaching this section without having
2024                  * processed an open curly brace indicates
2025                  * the statement is most likely not terminated.
2026                  */
2027                 is_terminated = false;
2028                 goto cleanUp;
2029         }
2030
2031         if ( isType (token, TOKEN_SEMICOLON) ||
2032              isType (token, TOKEN_EOF) ||
2033              isType (token, TOKEN_COMMA) )
2034         {
2035                 /*
2036                  * Only create variables for global scope
2037                  */
2038                 if ( token->nestLevel == 0 && is_global )
2039                 {
2040                         /*
2041                          * Handles this syntax:
2042                          *         var g_var2;
2043                          */
2044                         makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL, NULL);
2045                 }
2046                 /*
2047                  * Statement has ended.
2048                  * This deals with calls to functions, like:
2049                  *     alert(..);
2050                  */
2051                 if (isType (token, TOKEN_COMMA))
2052                 {
2053                         readToken (token);
2054                         goto nextVar;
2055                 }
2056                 goto cleanUp;
2057         }
2058
2059         if ( isType (token, TOKEN_EQUAL_SIGN) )
2060         {
2061                 int parenDepth = 0;
2062
2063                 readToken (token);
2064
2065                 /* rvalue might be surrounded with parentheses */
2066                 while (isType (token, TOKEN_OPEN_PAREN))
2067                 {
2068                         parenDepth++;
2069                         readToken (token);
2070                 }
2071
2072                 if (isKeyword (token, KEYWORD_async))
2073                         readToken (token);
2074
2075                 if ( isKeyword (token, KEYWORD_function) )
2076                 {
2077                         vString *const signature = vStringNew ();
2078                         bool is_generator = false;
2079
2080                         readToken (token);
2081                         if (isType (token, TOKEN_STAR))
2082                         {
2083                                 is_generator = true;
2084                                 readToken (token);
2085                         }
2086
2087                         if (! isType (token, TOKEN_KEYWORD) &&
2088                             ! isType (token, TOKEN_OPEN_PAREN))
2089                         {
2090                                 /*
2091                                  * Functions of this format:
2092                                  *         var D2A = function theAdd(a, b)
2093                                  *         {
2094                                  *                return a+b;
2095                                  *         }
2096                                  * Are really two separate defined functions and
2097                                  * can be referenced in two ways:
2098                                  *         alert( D2A(1,2) );                     // produces 3
2099                                  *         alert( theAdd(1,2) );                  // also produces 3
2100                                  * So it must have two tags:
2101                                  *         D2A
2102                                  *         theAdd
2103                                  * Save the reference to the name for later use, once
2104                                  * we have established this is a valid function we will
2105                                  * create the secondary reference to it.
2106                                  */
2107                                 copyToken(secondary_name, token, true);
2108                                 readToken (token);
2109                         }
2110
2111                         if ( isType (token, TOKEN_OPEN_PAREN) )
2112                                 skipArgumentList(token, false, signature);
2113
2114                         if (isType (token, TOKEN_OPEN_CURLY))
2115                         {
2116                                 /*
2117                                  * This will be either a function or a class.
2118                                  * We can only determine this by checking the body
2119                                  * of the function.  If we find a "this." we know
2120                                  * it is a class, otherwise it is a function.
2121                                  */
2122                                 if ( is_inside_class )
2123                                 {
2124                                         makeJsTag (name, is_generator ? JSTAG_GENERATOR : JSTAG_METHOD, signature, NULL);
2125                                         if ( vStringLength(secondary_name->string) > 0 )
2126                                                 makeFunctionTag (secondary_name, signature, is_generator);
2127                                         parseBlock (token, name->string);
2128                                 }
2129                                 else
2130                                 {
2131                                         if (! ( isType (name, TOKEN_IDENTIFIER)
2132                                              || isType (name, TOKEN_STRING)
2133                                              || isType (name, TOKEN_KEYWORD) ) )
2134                                         {
2135                                                 /* Unexpected input. Try to reset the parsing. */
2136                                                 JSCRIPT_DEBUG_PRINT("Unexpected input, trying to reset");
2137                                                 vStringDelete (signature);
2138                                                 goto cleanUp;
2139                                         }
2140
2141                                         is_class = parseBlock (token, name->string);
2142                                         if ( is_class )
2143                                                 makeClassTag (name, signature, NULL);
2144                                         else
2145                                                 makeFunctionTag (name, signature, is_generator);
2146
2147                                         if ( vStringLength(secondary_name->string) > 0 )
2148                                                 makeFunctionTag (secondary_name, signature, is_generator);
2149                                 }
2150                         }
2151
2152                         vStringDelete (signature);
2153                 }
2154                 else if (isKeyword (token, KEYWORD_class))
2155                 {
2156                         is_terminated = parseES6Class (token, name);
2157                 }
2158                 else if (isType (token, TOKEN_OPEN_CURLY))
2159                 {
2160                         /*
2161                          * Creates tags for each of these class methods
2162                          *     ValidClassOne.prototype = {
2163                          *         'validMethodOne' : function(a,b) {},
2164                          *         'validMethodTwo' : function(a,b) {}
2165                          *     }
2166                          * Or checks if this is a hash variable.
2167                          *     var z = {};
2168                          */
2169                         has_methods = parseMethods(token, name, false);
2170                         if (has_methods)
2171                                 makeJsTag (name, JSTAG_CLASS, NULL, NULL);
2172                         else
2173                         {
2174                                 /*
2175                                  * Only create variables for global scope
2176                                  */
2177                                 if ( token->nestLevel == 0 && is_global )
2178                                 {
2179                                         /*
2180                                          * A pointer can be created to the function.
2181                                          * If we recognize the function/class name ignore the variable.
2182                                          * This format looks identical to a variable definition.
2183                                          * A variable defined outside of a block is considered
2184                                          * a global variable:
2185                                          *         var g_var1 = 1;
2186                                          *         var g_var2;
2187                                          * This is not a global variable:
2188                                          *         var g_var = function;
2189                                          * This is a global variable:
2190                                          *         var g_var = different_var_name;
2191                                          */
2192                                         fulltag = vStringNew ();
2193                                         if (vStringLength (token->scope) > 0)
2194                                         {
2195                                                 vStringCopy(fulltag, token->scope);
2196                                                 vStringPut (fulltag, '.');
2197                                                 vStringCat (fulltag, token->string);
2198                                         }
2199                                         else
2200                                         {
2201                                                 vStringCopy(fulltag, token->string);
2202                                         }
2203                                         if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) &&
2204                                                         ! stringListHas(ClassNames, vStringValue (fulltag)) )
2205                                         {
2206                                                 makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL, NULL);
2207                                         }
2208                                         vStringDelete (fulltag);
2209                                 }
2210                         }
2211                         /* Here we should be at the end of the block, on the close curly.
2212                          * If so, read the next token not to confuse that close curly with
2213                          * the end of the current statement. */
2214                         if (isType (token, TOKEN_CLOSE_CURLY))
2215                         {
2216                                 readTokenFull(token, true, NULL);
2217                                 is_terminated = isType (token, TOKEN_SEMICOLON);
2218                         }
2219                 }
2220                 else if (isKeyword (token, KEYWORD_new))
2221                 {
2222                         readToken (token);
2223                         is_var = isType (token, TOKEN_IDENTIFIER);
2224                         if ( isKeyword (token, KEYWORD_function) ||
2225                                         isKeyword (token, KEYWORD_capital_function) ||
2226                                         isKeyword (token, KEYWORD_capital_object) ||
2227                                         is_var )
2228                         {
2229                                 if ( isKeyword (token, KEYWORD_capital_object) )
2230                                         is_class = true;
2231
2232                                 readToken (token);
2233                                 if ( isType (token, TOKEN_OPEN_PAREN) )
2234                                         skipArgumentList(token, true, NULL);
2235
2236                                 if (isType (token, TOKEN_SEMICOLON))
2237                                 {
2238                                         if ( token->nestLevel == 0 )
2239                                         {
2240                                                 if ( is_var )
2241                                                 {
2242                                                         makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL, NULL);
2243                                                 }
2244                                                 else
2245                                                 {
2246                                                         if ( is_class )
2247                                                         {
2248                                                                 makeClassTag (name, NULL, NULL);
2249                                                         } else {
2250                                                                 /* FIXME: we cannot really get a meaningful
2251                                                                  * signature from a `new Function()` call,
2252                                                                  * so for now just don't set any */
2253                                                                 makeFunctionTag (name, NULL, false);
2254                                                         }
2255                                                 }
2256                                         }
2257                                 }
2258                                 else if (isType (token, TOKEN_CLOSE_CURLY))
2259                                         is_terminated = false;
2260                         }
2261                 }
2262                 else if (! isType (token, TOKEN_KEYWORD))
2263                 {
2264                         /*
2265                          * Only create variables for global scope
2266                          */
2267                         if ( token->nestLevel == 0 && is_global )
2268                         {
2269                                 /*
2270                                  * A pointer can be created to the function.
2271                                  * If we recognize the function/class name ignore the variable.
2272                                  * This format looks identical to a variable definition.
2273                                  * A variable defined outside of a block is considered
2274                                  * a global variable:
2275                                  *         var g_var1 = 1;
2276                                  *         var g_var2;
2277                                  * This is not a global variable:
2278                                  *         var g_var = function;
2279                                  * This is a global variable:
2280                                  *         var g_var = different_var_name;
2281                                  */
2282                                 fulltag = vStringNew ();
2283                                 if (vStringLength (token->scope) > 0)
2284                                 {
2285                                         vStringCopy(fulltag, token->scope);
2286                                         vStringPut (fulltag, '.');
2287                                         vStringCat (fulltag, token->string);
2288                                 }
2289                                 else
2290                                 {
2291                                         vStringCopy(fulltag, token->string);
2292                                 }
2293                                 if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) &&
2294                                                 ! stringListHas(ClassNames, vStringValue (fulltag)) )
2295                                 {
2296                                         makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL, NULL);
2297                                 }
2298                                 vStringDelete (fulltag);
2299                         }
2300                 }
2301
2302                 if (parenDepth > 0)
2303                 {
2304                         while (parenDepth > 0 && ! isType (token, TOKEN_EOF))
2305                         {
2306                                 if (isType (token, TOKEN_OPEN_PAREN))
2307                                         parenDepth++;
2308                                 else if (isType (token, TOKEN_CLOSE_PAREN))
2309                                         parenDepth--;
2310                                 readTokenFull (token, true, NULL);
2311                         }
2312                         if (isType (token, TOKEN_CLOSE_CURLY))
2313                                 is_terminated = false;
2314                 }
2315         }
2316         /* if we aren't already at the cmd end, advance to it and check whether
2317          * the statement was terminated */
2318         if (! isType (token, TOKEN_CLOSE_CURLY) &&
2319             ! isType (token, TOKEN_SEMICOLON))
2320         {
2321                 /*
2322                  * Statements can be optionally terminated in the case of
2323                  * statement prior to a close curly brace as in the
2324                  * document.write line below:
2325                  *
2326                  * function checkForUpdate() {
2327                  *         if( 1==1 ) {
2328                  *                 document.write("hello from checkForUpdate<br>")
2329                  *         }
2330                  *         return 1;
2331                  * }
2332                  */
2333                 is_terminated = findCmdTerm (token, true, true);
2334                 /* if we're at a comma, try and read a second var */
2335                 if (isType (token, TOKEN_COMMA))
2336                 {
2337                         readToken (token);
2338                         goto nextVar;
2339                 }
2340         }
2341
2342 cleanUp:
2343         vStringCopy(token->scope, saveScope);
2344         deleteToken (name);
2345         deleteToken (secondary_name);
2346         deleteToken (method_body_token);
2347         vStringDelete(saveScope);
2348
2349         JSCRIPT_DEBUG_LEAVE();
2350
2351         return is_terminated;
2352 }
2353
2354 static void parseUI5 (tokenInfo *const token)
2355 {
2356         tokenInfo *const name = newToken ();
2357         /*
2358          * SAPUI5 is built on top of jQuery.
2359          * It follows a standard format:
2360          *     sap.ui.controller("id.of.controller", {
2361          *         method_name : function... {
2362          *         },
2363          *
2364          *         method_name : function ... {
2365          *         }
2366          *     }
2367          *
2368          * Handle the parsing of the initial controller (and the
2369          * same for "view") and then allow the methods to be
2370          * parsed as usual.
2371          */
2372
2373         readToken (token);
2374
2375         if (isType (token, TOKEN_PERIOD))
2376         {
2377                 readToken (token);
2378                 while (! isType (token, TOKEN_OPEN_PAREN) &&
2379                            ! isType (token, TOKEN_EOF))
2380                 {
2381                         readToken (token);
2382                 }
2383                 readToken (token);
2384
2385                 if (isType (token, TOKEN_STRING))
2386                 {
2387                         copyToken(name, token, true);
2388                         readToken (token);
2389                 }
2390
2391                 if (isType (token, TOKEN_COMMA))
2392                         readToken (token);
2393
2394                 do
2395                 {
2396                         parseMethods (token, name, false);
2397                 } while (! isType (token, TOKEN_CLOSE_CURLY) &&
2398                                  ! isType (token, TOKEN_EOF));
2399         }
2400
2401         deleteToken (name);
2402 }
2403
2404 static bool parseLine (tokenInfo *const token, bool is_inside_class)
2405 {
2406         JSCRIPT_DEBUG_ENTER_TEXT("token is '%s' of type %02x",vStringValue(token->string),token->type);
2407
2408         bool is_terminated = true;
2409         /*
2410          * Detect the common statements, if, while, for, do, ...
2411          * This is necessary since the last statement within a block "{}"
2412          * can be optionally terminated.
2413          *
2414          * If the statement is not terminated, we need to tell
2415          * the calling routine to prevent reading an additional token
2416          * looking for the end of the statement.
2417          */
2418
2419         if (isType(token, TOKEN_KEYWORD))
2420         {
2421                 switch (token->keyword)
2422                 {
2423                         case KEYWORD_for:
2424                         case KEYWORD_while:
2425                         case KEYWORD_do:
2426                                 is_terminated = parseLoop (token);
2427                                 break;
2428                         case KEYWORD_if:
2429                         case KEYWORD_else:
2430                         case KEYWORD_try:
2431                         case KEYWORD_catch:
2432                         case KEYWORD_finally:
2433                                 /* Common semantics */
2434                                 is_terminated = parseIf (token);
2435                                 break;
2436                         case KEYWORD_switch:
2437                                 parseSwitch (token);
2438                                 break;
2439                         case KEYWORD_return:
2440                         case KEYWORD_async:
2441                                 readToken (token);
2442                                 is_terminated = parseLine (token, is_inside_class);
2443                                 break;
2444                         case KEYWORD_function:
2445                                 parseFunction (token);
2446                                 break;
2447                         case KEYWORD_class:
2448                                 is_terminated = parseES6Class (token, NULL);
2449                                 break;
2450                         default:
2451                                 is_terminated = parseStatement (token, is_inside_class);
2452                                 break;
2453                 }
2454         }
2455         else
2456         {
2457                 /*
2458                  * Special case where single line statements may not be
2459                  * SEMICOLON terminated.  parseBlock needs to know this
2460                  * so that it does not read the next token.
2461                  */
2462                 is_terminated = parseStatement (token, is_inside_class);
2463         }
2464
2465         JSCRIPT_DEBUG_LEAVE();
2466
2467         return is_terminated;
2468 }
2469
2470 static void parseJsFile (tokenInfo *const token)
2471 {
2472         JSCRIPT_DEBUG_ENTER();
2473
2474         do
2475         {
2476                 readToken (token);
2477
2478                 if (isType (token, TOKEN_KEYWORD) && token->keyword == KEYWORD_sap)
2479                         parseUI5 (token);
2480                 else if (isType (token, TOKEN_KEYWORD) && (token->keyword == KEYWORD_export ||
2481                                                            token->keyword == KEYWORD_default))
2482                         /* skip those at top-level */;
2483                 else
2484                         parseLine (token, false);
2485         } while (! isType (token, TOKEN_EOF));
2486
2487         JSCRIPT_DEBUG_LEAVE();
2488 }
2489
2490 static void initialize (const langType language)
2491 {
2492         Assert (ARRAY_SIZE (JsKinds) == JSTAG_COUNT);
2493         Lang_js = language;
2494
2495         TokenPool = objPoolNew (16, newPoolToken, deletePoolToken, clearPoolToken, NULL);
2496 }
2497
2498 static void finalize (langType language CTAGS_ATTR_UNUSED, bool initialized)
2499 {
2500         if (!initialized)
2501                 return;
2502
2503         objPoolDelete (TokenPool);
2504 }
2505
2506 static void findJsTags (void)
2507 {
2508         tokenInfo *const token = newToken ();
2509
2510         NextToken = NULL;
2511         ClassNames = stringListNew ();
2512         FunctionNames = stringListNew ();
2513         LastTokenType = TOKEN_UNDEFINED;
2514
2515         parseJsFile (token);
2516
2517         stringListDelete (ClassNames);
2518         stringListDelete (FunctionNames);
2519         ClassNames = NULL;
2520         FunctionNames = NULL;
2521         deleteToken (token);
2522
2523 #ifdef HAVE_ICONV
2524         if (JSUnicodeConverter != (iconv_t) -2 && /* not created */
2525             JSUnicodeConverter != (iconv_t) -1 /* creation failed */)
2526         {
2527                 iconv_close (JSUnicodeConverter);
2528                 JSUnicodeConverter = (iconv_t) -2;
2529         }
2530 #endif
2531
2532         Assert (NextToken == NULL);
2533 }
2534
2535 /* Create parser definition structure */
2536 extern parserDefinition* JavaScriptParser (void)
2537 {
2538         // .jsx files are JSX: https://facebook.github.io/jsx/
2539         // which have JS function definitions, so we just use the JS parser
2540         static const char *const extensions [] = { "js", "jsx", NULL };
2541         static const char *const aliases [] = { "js", "node", "nodejs",
2542                                                 "seed", "gjs", NULL };
2543         parserDefinition *const def = parserNew ("JavaScript");
2544         def->extensions = extensions;
2545         def->aliases = aliases;
2546         /*
2547          * New definitions for parsing instead of regex
2548          */
2549         def->kindTable  = JsKinds;
2550         def->kindCount  = ARRAY_SIZE (JsKinds);
2551         def->parser             = findJsTags;
2552         def->initialize = initialize;
2553         def->finalize   = finalize;
2554         def->keywordTable = JsKeywordTable;
2555         def->keywordCount = ARRAY_SIZE (JsKeywordTable);
2556
2557         return def;
2558 }