ctags/parsers/jscript.c

   1 /*
   2  * Copyright (c) 2003, Darren Hiebert
   3  *
   4  * This source code is released for free distribution under the terms of the
   5  * GNU General Public License version 2 or (at your option) any later version.
   6  *
   7  * This module contains functions for generating tags for JavaScript language
   8  * files.
   9  *
  10  * Reference: http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf
  11  *
  12  * This is a good reference for different forms of the function statement:
  13  *     http://www.permadi.com/tutorial/jsFunc/
  14  * Another good reference:
  15  *     http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide
  16  */
  17
  18 /*
  19  * INCLUDE FILES
  20  */
  21 #include "general.h"    /* must always come first */
  22 #include <ctype.h>      /* to define isalpha () */
  23 #ifdef DEBUG
  24 #include <stdio.h>
  25 #endif
  26
  27 #ifdef HAVE_ICONV
  28 #include <iconv.h>
  29 #include <errno.h>
  30 #       ifdef WORDS_BIGENDIAN
  31 #               define INTERNAL_ENCODING "UTF-32BE"
  32 #       else
  33 #               define INTERNAL_ENCODING "UTF-32LE"
  34 #       endif /* WORDS_BIGENDIAN */
  35 #endif
  36
  37 #include <string.h>
  38 #include "debug.h"
  39 #include "entry.h"
  40 #include "keyword.h"
  41 #include "numarray.h"
  42 #include "parse.h"
  43 #include "read.h"
  44 #include "routines.h"
  45 #include "vstring.h"
  46 #include "objpool.h"
  47 #include "options.h"
  48 #include "mbcs.h"
  49 #include "trace.h"
  50
  51 #include "jscript.h"
  52
  53 /*
  54  * MACROS
  55  */
  56 #define isType(token,t)         (bool) ((token)->type == (t))
  57 #define isKeyword(token,k)      (bool) ((token)->keyword == (k))
  58 #define newToken() (objPoolGet (TokenPool))
  59 #define deleteToken(t) (objPoolPut (TokenPool, (t)))
  60
  61 /*
  62  * DATA DECLARATIONS
  63  */
  64
  65 /* Used to specify type of keyword.
  66 */
  67 enum eKeywordId {
  68         KEYWORD_function,
  69         KEYWORD_capital_function,
  70         KEYWORD_capital_object,
  71         KEYWORD_prototype,
  72         KEYWORD_var,
  73         KEYWORD_let,
  74         KEYWORD_const,
  75         KEYWORD_new,
  76         KEYWORD_this,
  77         KEYWORD_for,
  78         KEYWORD_while,
  79         KEYWORD_do,
  80         KEYWORD_if,
  81         KEYWORD_else,
  82         KEYWORD_switch,
  83         KEYWORD_try,
  84         KEYWORD_catch,
  85         KEYWORD_finally,
  86         KEYWORD_sap,
  87         KEYWORD_return,
  88         KEYWORD_class,
  89         KEYWORD_extends,
  90         KEYWORD_static,
  91         KEYWORD_default,
  92         KEYWORD_export,
  93         KEYWORD_async,
  94         KEYWORD_get,
  95         KEYWORD_set,
  96 };
  97 typedef int keywordId; /* to allow KEYWORD_NONE */
  98
  99 typedef enum eTokenType {
 100         TOKEN_UNDEFINED,
 101         TOKEN_EOF,
 102         TOKEN_CHARACTER,
 103         TOKEN_CLOSE_PAREN,
 104         TOKEN_SEMICOLON,
 105         TOKEN_COLON,
 106         TOKEN_COMMA,
 107         TOKEN_KEYWORD,
 108         TOKEN_OPEN_PAREN,
 109         TOKEN_IDENTIFIER,
 110         TOKEN_STRING,
 111         TOKEN_TEMPLATE_STRING,
 112         TOKEN_PERIOD,
 113         TOKEN_OPEN_CURLY,
 114         TOKEN_CLOSE_CURLY,
 115         TOKEN_EQUAL_SIGN,
 116         TOKEN_OPEN_SQUARE,
 117         TOKEN_CLOSE_SQUARE,
 118         TOKEN_REGEXP,
 119         TOKEN_POSTFIX_OPERATOR,
 120         TOKEN_STAR,
 121         /* To handle Babel's decorators.
 122          * Used only in readTokenFull or lower functions. */
 123         TOKEN_ATMARK,
 124         TOKEN_BINARY_OPERATOR,
 125         TOKEN_ARROW,
 126         TOKEN_DOTS,                                     /* ... */
 127 } tokenType;
 128
 129 typedef struct sTokenInfo {
 130         tokenType               type;
 131         keywordId               keyword;
 132         vString *               string;
 133         int                             scope;
 134         unsigned long   lineNumber;
 135         MIOPos                  filePosition;
 136         int                             nestLevel;
 137         bool                    dynamicProp;
 138         int                             c;
 139 } tokenInfo;
 140
 141 /*
 142  * DATA DEFINITIONS
 143  */
 144
 145 static tokenType LastTokenType;
 146 static tokenInfo *NextToken;
 147
 148 static langType Lang_js;
 149
 150 static objPool *TokenPool = NULL;
 151
 152 #ifdef HAVE_ICONV
 153 static iconv_t JSUnicodeConverter = (iconv_t) -2;
 154 #endif
 155
 156 /*
 157  * "chain element" role is introduced when adapting the JavaScript parser
 158  * to corkAPI.
 159  *
 160  * In the corkAPI, a cork index returned from makeTagEntry() can
 161  * represent a scope of another tag. Let's think about `input-0.js' that
 162  * the node command accepts as an input for ctags.
 163  *
 164  +---+ input-0.js ------------------------------------------------------
 165  | 1 | class A {
 166  | 2 |    f = function(x) {
 167  | 3 |       return x
 168  | 4 |    }
 169  | 5 | }
 170  +---+------------------------------------------------------------------
 171  *
 172  * The following pseudo C code illustrate the code for
 173  * tagging `A' and `f' in input-0.js:
 174  +---+------------------------------------------------------------------
 175  |   |...
 176  |   | tagEntryFor e_for_A, e_for_f;
 177  |   | ...
 178  |   | int index_for_A = makeTagEntry (&e_for_A);
 179  |   | ...
 180  |>>>| e_for_f.extensionFields.scopeIndex = index_for_A;
 181  |   | ...
 182  |   | makeTagEntry (&e_for_f);
 183  |   | ...
 184  +---+------------------------------------------------------------------
 185  *
 186  * `index_for_A' represents "A" in "class A".
 187  * `f' is defined in `A'. To fill the scope field of the tag for `f',
 188  * `scopeIndex' member of the tag is filled with `index_for_A' at line |>>>|.
 189  *
 190  * If `A' is defined in the input source file, this technique based on
 191  * the cork API works fine. However, if `A' is not defined in the input
 192  * source file, the technique doesn't work well.
 193  +---+ input-1.js -------------------------------------------------------
 194  | 1 | import {A} from 'input-0.js';
 195  | 2 | A.g = function(x) {
 196  | 3 |    return x
 197  | 4 | }
 198  +---+------------------------------------------------------------------
 199  *
 200  * In this case, `A' may be defined in input-0.js.
 201  * The current implementation of ctags processes file by file; it doesn't
 202  * use the knowledge in other input source files than current input source
 203  * file. ctags processing input-1.js doesn't know the cork index for `A'.
 204  *
 205  * When tagging `g' with "function" kind, how can we fill the scope field
 206  * of the tag for `g'?
 207  *
 208  * Here the "chain element" role comes.
 209  * This role is used for tagging `z' in "x.y.z" in the case when ctags
 210  * doesn't see the definitions for `x' and `y'.
 211  * The JavaScript parser makes reference tags for `x' and `'y' with
 212  * "chain element" role. makeTagEntry() returns a cork index regardless the
 213  * type of tags (definition or reference).
 214  * The index for the reference tag for `y' can be used to fill the scope
 215  * field of the tag for `z'. The index for `x' can be used to fill the
 216  * field for `y'.
 217  *
 218  * With these trick and technique, the scope field for `g' is filled:
 219  +---+ tags for input-1.js ---------------------------------------------
 220  | 1 | A  input-1.js  /^A.g = function(x) {$/;"  f  roles:chainElt  extras:reference
 221  | 2 | g  input-1.js  /^A.g = function(x) {$/;"  f  scope:function:A  signature:(x)  roles:def
 222  +---+------------------------------------------------------------------
 223  *
 224  * By default, reference tags are not emitted. So non-ctags-expert users may
 225  * not see the tag entry for `A'.
 226  *
 227  * makeJsRefTagsForNameChain() and makeJsTagCommon() implement the trick
 228  * and technique.
 229  *
 230  * Arguable points:
 231  *
 232  * Is "chain element(chainElt)" suitable name for people familier with JavaScript?
 233  *
 234  * Kinds assigned to the tag having chainElt role must revised. Eventually
 235  * we may need to introduce "unknown" kind like the Python parser. Assigning
 236  * "function" kind to `A' in input-1.js is obviously wrong.
 237  */
 238
 239 typedef enum {
 240         JS_VARIABLE_CHAINELT,
 241 } jsVariableRole;
 242
 243 typedef enum {
 244         JS_CLASS_CHAINELT,
 245 } jsClassRole;
 246
 247 static roleDefinition JsFunctionRoles [] = {
 248         /* Currently V parser wants this items. */
 249         { true, "foreigndecl", "declared in foreign languages" },
 250 };
 251
 252 static roleDefinition JsVariableRoles [] = {
 253         { false, "chainElt", "(EXPERIMENTAL)used as an element in a name chain like a.b.c" },
 254 };
 255
 256 static roleDefinition JsClassRoles [] = {
 257         { false, "chainElt", "(EXPERIMENTAL)used as an element in a name chain like a.b.c" },
 258 };
 259
 260 static kindDefinition JsKinds [] = {
 261         { true,  'f', "function",         "functions",
 262           .referenceOnly = false, ATTACH_ROLES(JsFunctionRoles) },
 263         { true,  'c', "class",            "classes",
 264           .referenceOnly = false, ATTACH_ROLES(JsClassRoles)    },
 265         { true,  'm', "method",           "methods"          },
 266         { true,  'p', "property",         "properties"       },
 267         { true,  'C', "constant",         "constants"        },
 268         { true,  'v', "variable",         "global variables",
 269           .referenceOnly = false, ATTACH_ROLES(JsVariableRoles) },
 270         { true,  'g', "generator",        "generators"       },
 271         { true,  'G', "getter",           "getters"          },
 272         { true,  'S', "setter",           "setters"          },
 273         { true,  'M', "field",            "fields"           },
 274 };
 275
 276 static const keywordTable JsKeywordTable [] = {
 277         /* keyword              keyword ID */
 278         { "function",   KEYWORD_function                        },
 279         { "Function",   KEYWORD_capital_function        },
 280         { "Object",             KEYWORD_capital_object          },
 281         { "prototype",  KEYWORD_prototype                       },
 282         { "var",                KEYWORD_var                                     },
 283         { "let",                KEYWORD_let                                     },
 284         { "const",              KEYWORD_const                           },
 285         { "new",                KEYWORD_new                                     },
 286         { "this",               KEYWORD_this                            },
 287         { "for",                KEYWORD_for                                     },
 288         { "while",              KEYWORD_while                           },
 289         { "do",                 KEYWORD_do                                      },
 290         { "if",                 KEYWORD_if                                      },
 291         { "else",               KEYWORD_else                            },
 292         { "switch",             KEYWORD_switch                          },
 293         { "try",                KEYWORD_try                                     },
 294         { "catch",              KEYWORD_catch                           },
 295         { "finally",    KEYWORD_finally                         },
 296         { "sap",            KEYWORD_sap                                 },
 297         { "return",             KEYWORD_return                          },
 298         { "class",              KEYWORD_class                           },
 299         { "extends",    KEYWORD_extends                         },
 300         { "static",             KEYWORD_static                          },
 301         { "default",    KEYWORD_default                         },
 302         { "export",             KEYWORD_export                          },
 303         { "async",              KEYWORD_async                           },
 304         { "get",                KEYWORD_get                                     },
 305         { "set",                KEYWORD_set                                     },
 306 };
 307
 308 /*
 309  * FUNCTION DEFINITIONS
 310  */
 311
 312 /* Recursive functions */
 313 static void readTokenFull (tokenInfo *const token, bool include_newlines, vString *const repr);
 314 static void skipArgumentList (tokenInfo *const token, bool include_newlines, vString *const repr);
 315 static bool parseFunction (tokenInfo *const token, tokenInfo *const name, const bool is_inside_class);
 316 static bool parseBlock (tokenInfo *const token, int parent_scope);
 317 static bool parseMethods (tokenInfo *const token, int class_index, const bool is_es6_class);
 318 static bool parseLine (tokenInfo *const token, bool is_inside_class);
 319 static void parseUI5 (tokenInfo *const token);
 320
 321 #ifdef DO_TRACING
 322 static const char *tokenTypeName(enum eTokenType e);
 323 static const char* getNameStringForCorkIndex(int index);
 324 static const char* getKindStringForCorkIndex(int index);
 325 static const char *kindName(jsKind kind);
 326 // #define DO_TRACING_USE_DUMP_TOKEN
 327 #ifdef DO_TRACING_USE_DUMP_TOKEN
 328 static void dumpToken (const tokenInfo *const token);
 329 #endif
 330 #endif
 331
 332 static void *newPoolToken (void *createArg CTAGS_ATTR_UNUSED)
 333 {
 334         tokenInfo *token = xMalloc (1, tokenInfo);
 335
 336         token->string = vStringNew ();
 337         token->scope = CORK_NIL;
 338
 339         return token;
 340 }
 341
 342 static void clearPoolToken (void *data)
 343 {
 344         tokenInfo *token = data;
 345
 346         token->type = TOKEN_UNDEFINED;
 347         token->keyword = KEYWORD_NONE;
 348         token->nestLevel = 0;
 349         token->dynamicProp = false;
 350         token->lineNumber = getInputLineNumber ();
 351         token->filePosition = getInputFilePosition ();
 352         vStringClear (token->string);
 353         token->scope = CORK_NIL;
 354 }
 355
 356 static void deletePoolToken (void *data)
 357 {
 358         tokenInfo *token = data;
 359         vStringDelete (token->string);
 360         eFree (token);
 361 }
 362
 363 static void copyToken (tokenInfo *const dest, const tokenInfo *const src,
 364                                            bool const include_non_read_info)
 365 {
 366         dest->lineNumber = src->lineNumber;
 367         dest->filePosition = src->filePosition;
 368         dest->type = src->type;
 369         dest->keyword = src->keyword;
 370         dest->dynamicProp = src->dynamicProp;
 371         dest->c = src->c;
 372         vStringCopy(dest->string, src->string);
 373         if (include_non_read_info)
 374         {
 375                 dest->nestLevel = src->nestLevel;
 376                 dest->scope = src->scope;
 377         }
 378 }
 379
 380 static void injectDynamicName (tokenInfo *const token, vString *newName)
 381 {
 382         token->dynamicProp = true;
 383         vStringDelete (token->string);
 384         token->string = newName;
 385 }
 386
 387 /*
 388  * Tag generation functions
 389  */
 390
 391 struct  bestJSEntryInScopeData {
 392         int index;
 393 };
 394
 395 static bool findBestJSEntry (int corkIndex, tagEntryInfo *entry, void *cb_data)
 396 {
 397         struct  bestJSEntryInScopeData *data = cb_data;
 398
 399         if (isRoleAssigned (entry, ROLE_DEFINITION_INDEX))
 400         {
 401                 data->index = corkIndex;
 402                 return false;
 403         }
 404
 405         if (data->index == CORK_NIL || data->index > corkIndex)
 406                 data->index = corkIndex;
 407
 408         return true;
 409 }
 410
 411 static int bestJSEntryInScope(int scope, const char *name)
 412 {
 413         /* If the SCOPE has a tag entry having NAME, the tag is the best
 414          * even if there are reference tag entries having NAME.
 415          * If the scope has only reference tag entries having NAME, the
 416          * tag having smallest cork index is the best.
 417          */
 418
 419         struct  bestJSEntryInScopeData data = {
 420                 .index = CORK_NIL,
 421         };
 422         foreachEntriesInScope (scope, name,  findBestJSEntry, &data);
 423         return data.index;
 424 }
 425
 426 static int makeJsRefTagsForNameChain (char *name_chain, const tokenInfo *token, int leaf_kind, int scope)
 427 {
 428         /* To fill the scope field of "d" of "a.b.c.d",
 429          * "c" must be tagged if the cork API is used.
 430          * ----------------------------------------------------------
 431          * How the fields for "a", "b", and "c" are filled.
 432          *   a  kind:class  scope:<given by SCOPE>  roles:chainElt
 433          *   b  kind:class  scope:class:a  roles:chainElt
 434          *
 435          *   The fields of c depends on LEAF_KIND that is passed to this functions.
 436          *
 437          *   if (LEAF_KIND == FUNCTION)
 438          *       c  kind:function  scope:class:b  roles:chainElt
 439          *   else
 440          *       c  kind:class  scope:class:b  roles:chainElt
 441          */
 442
 443         const char *name = name_chain;
 444         char *next = strchr(name_chain, '.');
 445         if (next)
 446                 *next = '\0';
 447         int index = bestJSEntryInScope (scope, name);
 448
 449         if (index == CORK_NIL)
 450         {
 451                 tagEntryInfo e;
 452                 int kind = JSTAG_CLASS;
 453                 int role = JS_CLASS_CHAINELT;
 454                 if (next == NULL && leaf_kind == JSTAG_FUNCTION)
 455                 {
 456                         /*
 457                          * If we're creating a function (and not a method),
 458                          * assume the parent is a plain variable.
 459                          */
 460                         kind = JSTAG_VARIABLE;
 461                         role = JS_VARIABLE_CHAINELT;
 462                 }
 463
 464                 initRefTagEntry (&e, name, kind, role);
 465                 updateTagLine (&e, token->lineNumber, token->filePosition);
 466                 e.extensionFields.scopeIndex = scope;
 467
 468                 index = makeTagEntry (&e);
 469                 /* We should remove this condition. We should fix the callers passing
 470                  * an empty name instead. makeTagEntry() returns CORK_NIL if the tag
 471                  * name is empty. */
 472                 if (index != CORK_NIL)
 473                         registerEntry (index);
 474         }
 475
 476         return next
 477                 ? makeJsRefTagsForNameChain (next + 1, token, leaf_kind, index)
 478                 : index;
 479 }
 480
 481 static int makeJsTagCommon (const tokenInfo *const token, const jsKind kind,
 482                                                         vString *const signature, vString *const inheritance,
 483                                                         bool anonymous)
 484 {
 485         int index = CORK_NIL;
 486         const char *name = vStringValue (token->string);
 487
 488         const char *p;
 489         char *name_chain = NULL;
 490         if (!token->dynamicProp && kind != JSTAG_PROPERTY &&  (p = strrchr (name, '.')) != NULL )
 491         {
 492                 if ((p - name) != 0)
 493                         name_chain = eStrndup (name, (size_t) (p - name));
 494                 name = p + 1;
 495                 if (name[0] == '\0')
 496                         return CORK_NIL;
 497         }
 498
 499         int scope = token->scope;
 500         if (name_chain)
 501         {
 502                 scope = makeJsRefTagsForNameChain (name_chain, token, kind, scope);
 503                 eFree (name_chain);
 504         }
 505
 506         /*
 507          * Check whether NAME is already defined in SCOPE.
 508          * If the NAME is already defined, return the cork index for the NAME.
 509          */
 510         if (kind == JSTAG_FUNCTION || kind == JSTAG_CLASS)
 511         {
 512                 index = anyKindEntryInScope (scope, name, kind, true);
 513                 if (index != CORK_NIL)
 514                         return index;
 515         }
 516
 517         tagEntryInfo e;
 518         initTagEntry (&e, name, kind);
 519         updateTagLine (&e, token->lineNumber, token->filePosition);
 520         e.extensionFields.scopeIndex = scope;
 521
 522 #ifdef DO_TRACING
 523         {
 524                 const char *scope_str = getNameStringForCorkIndex (scope);
 525                 const char *scope_kind_str = getKindStringForCorkIndex (scope);
 526                 TRACE_PRINT("Emitting tag for symbol '%s' of kind %s with scope '%s:%s'", name, kindName(kind), scope_kind_str, scope_str);
 527         }
 528 #endif
 529
 530         if (signature && vStringLength(signature))
 531         {
 532                 size_t i;
 533                 /* sanitize signature by replacing all control characters with a
 534                  * space (because it's simple).
 535                  * there should never be any junk in a valid signature, but who
 536                  * knows what the user wrote and CTags doesn't cope well with weird
 537                  * characters. */
 538                 for (i = 0; i < signature->length; i++)
 539                 {
 540                         unsigned char c = (unsigned char) vStringChar (signature, i);
 541                         if (c < 0x20 /* below space */ || c == 0x7F /* DEL */)
 542                                 vStringChar (signature, i) = ' ';
 543                 }
 544                 e.extensionFields.signature = vStringValue(signature);
 545         }
 546
 547         if (inheritance)
 548                 e.extensionFields.inheritance = vStringValue(inheritance);
 549
 550         if (anonymous)
 551                 markTagExtraBit (&e, XTAG_ANONYMOUS);
 552
 553         index = makeTagEntry (&e);
 554         /* We shold remove This condition. We should fix the callers passing
 555          * an empty name instead. makeTagEntry() returns CORK_NIL if the tag
 556          * name is empty. */
 557         if (index != CORK_NIL)
 558                 registerEntry (index);
 559
 560         return index;
 561 }
 562
 563 static int makeJsTag (const tokenInfo *const token, const jsKind kind,
 564                                            vString *const signature, vString *const inheritance)
 565 {
 566         return makeJsTagCommon (token, kind, signature, inheritance, false);
 567 }
 568
 569 static int makeClassTagCommon (tokenInfo *const token, vString *const signature,
 570                                                   vString *const inheritance, bool anonymous)
 571 {
 572         return makeJsTagCommon (token, JSTAG_CLASS, signature, inheritance, anonymous);
 573 }
 574
 575 static int makeClassTag (tokenInfo *const token, vString *const signature,
 576                                                   vString *const inheritance)
 577 {
 578         return makeClassTagCommon (token, signature, inheritance, false);
 579 }
 580
 581 static int makeFunctionTagCommon (tokenInfo *const token, vString *const signature,
 582                                                                   bool generator, bool anonymous)
 583 {
 584         return makeJsTagCommon (token, generator ? JSTAG_GENERATOR : JSTAG_FUNCTION, signature, NULL,
 585                                                         anonymous);
 586 }
 587
 588 static int makeFunctionTag (tokenInfo *const token, vString *const signature, bool generator)
 589 {
 590         return makeFunctionTagCommon (token, signature, generator, false);
 591 }
 592
 593 static bool isClassName (tokenInfo *const name)
 594 {
 595         char * p = strrchr(vStringValue (name->string), '.');
 596         if (p == NULL)
 597                 p = vStringValue (name->string);
 598         else
 599                 p++;
 600
 601         return isupper((unsigned char) *p);
 602 }
 603
 604 /*
 605  * Parsing functions
 606  */
 607
 608 /* given @p point, returns the first byte of the encoded output sequence, and
 609  * make sure the next ones will be returned by calls to getcFromInputFile()
 610  * as if the code point was simply written in the input file. */
 611 static int handleUnicodeCodePoint (uint32_t point)
 612 {
 613         int c = (int) point;
 614
 615         Assert (point < 0x110000);
 616
 617 #ifdef HAVE_ICONV
 618         /* if we do have iconv and the encodings are specified, use this */
 619         if (isConverting () && JSUnicodeConverter == (iconv_t) -2)
 620         {
 621                 /* if we didn't try creating the converter yet, try and do so */
 622                 JSUnicodeConverter = iconv_open (getLanguageEncoding (Lang_js), INTERNAL_ENCODING);
 623         }
 624         if (isConverting () && JSUnicodeConverter != (iconv_t) -1)
 625         {
 626                 char *input_ptr = (char *) &point;
 627                 size_t input_left = sizeof point;
 628                 /* 4 bytes should be enough for any encoding (it's how much UTF-32
 629                  * would need). */
 630                 /* FIXME: actually iconv has a tendency to output a BOM for Unicode
 631                  * encodings where it matters when the endianness is not specified in
 632                  * the target encoding name.  E.g., if the target encoding is "UTF-32"
 633                  * or "UTF-16" it will output 2 code points, the BOM (U+FEFF) and the
 634                  * one we expect. This does not happen if the endianness is specified
 635                  * explicitly, e.g. with "UTF-32LE", or "UTF-16BE".
 636                  * However, it's not very relevant for the moment as nothing in CTags
 637                  * cope well (if at all) with non-ASCII-compatible encodings like
 638                  * UTF-32 or UTF-16 anyway. */
 639                 char output[4] = { 0 };
 640                 char *output_ptr = output;
 641                 size_t output_left = ARRAY_SIZE (output);
 642
 643                 if (iconv (JSUnicodeConverter, &input_ptr, &input_left, &output_ptr, &output_left) == (size_t) -1)
 644                 {
 645                         /* something went wrong, which probably means the output encoding
 646                          * cannot represent the character.  Use a placeholder likely to be
 647                          * supported instead, that's also valid in an identifier */
 648                         verbose ("JavaScript: Encoding: %s\n", strerror (errno));
 649                         c = '_';
 650                 }
 651                 else
 652                 {
 653                         const size_t output_len = ARRAY_SIZE (output) - output_left;
 654
 655                         /* put all but the first byte back so that getcFromInputFile() will
 656                          * return them in the right order */
 657                         for (unsigned int i = 1; i < output_len; i++)
 658                                 ungetcToInputFile ((unsigned char) output[output_len - i]);
 659                         c = (unsigned char) output[0];
 660                 }
 661
 662                 iconv (JSUnicodeConverter, NULL, NULL, NULL, NULL);
 663         }
 664         else
 665 #endif
 666         {
 667                 /* when no encoding is specified (or no iconv), assume UTF-8 is good.
 668                  * Why UTF-8?  Because it's an ASCII-compatible common Unicode encoding. */
 669                 if (point < 0x80)
 670                         c = (unsigned char) point;
 671                 else if (point < 0x800)
 672                 {
 673                         c = (unsigned char) (0xc0 | ((point >> 6) & 0x1f));
 674                         ungetcToInputFile ((unsigned char) (0x80 | (point & 0x3f)));
 675                 }
 676                 else if (point < 0x10000)
 677                 {
 678                         c = (unsigned char) (0xe0 | ((point >> 12) & 0x0f));
 679                         ungetcToInputFile ((unsigned char) (0x80 | ((point >>  0) & 0x3f)));
 680                         ungetcToInputFile ((unsigned char) (0x80 | ((point >>  6) & 0x3f)));
 681                 }
 682                 else if (point < 0x110000)
 683                 {
 684                         c = (unsigned char) (0xf0 | ((point >> 18) & 0x07));
 685                         ungetcToInputFile ((unsigned char) (0x80 | ((point >>  0) & 0x3f)));
 686                         ungetcToInputFile ((unsigned char) (0x80 | ((point >>  6) & 0x3f)));
 687                         ungetcToInputFile ((unsigned char) (0x80 | ((point >> 12) & 0x3f)));
 688                 }
 689         }
 690
 691         return c;
 692 }
 693
 694 /* reads a Unicode escape sequence after the "\" prefix.
 695  * @param value Location to store the escape sequence value.
 696  * @param isUTF16 Location to store whether @param value is an UTF-16 word.
 697  * @returns Whether a valid sequence was read. */
 698 static bool readUnicodeEscapeSequenceValue (uint32_t *const value,
 699                                                                                         bool *const isUTF16)
 700 {
 701         bool valid = false;
 702         int d = getcFromInputFile ();
 703
 704         if (d != 'u')
 705                 ungetcToInputFile (d);
 706         else
 707         {
 708                 int e = getcFromInputFile ();
 709                 char cp[6 + 1]; /* up to 6 hex + possible closing '}' or invalid char */
 710                 unsigned int cp_len = 0;
 711
 712                 *isUTF16 = (e != '{');
 713                 if (e == '{')
 714                 {       /* Handles Unicode code point escapes: \u{ HexDigits }
 715                          * We skip the leading 0s because there can be any number of them
 716                          * and they don't change any meaning. */
 717                         bool has_leading_zero = false;
 718                         int l;
 719
 720                         while ((cp[cp_len] = (char) (l = getcFromInputFile ())) == '0')
 721                                 has_leading_zero = true;
 722
 723                         while (isxdigit (l) && ++cp_len < ARRAY_SIZE (cp))
 724                                 cp[cp_len] = (char) (l = getcFromInputFile ());
 725                         valid = ((cp_len > 0 || has_leading_zero) &&
 726                                          cp_len < ARRAY_SIZE (cp) && cp[cp_len] == '}' &&
 727                                          /* also check if it's a valid Unicode code point */
 728                                          (cp_len < 6 ||
 729                                           (cp_len == 6 && strncmp (cp, "110000", 6) < 0)));
 730                         if (! valid) /* put back the last (likely invalid) character */
 731                                 ungetcToInputFile (l);
 732                 }
 733                 else
 734                 {       /* Handles Unicode escape sequences: \u Hex4Digits */
 735                         int l;
 736                         do
 737                                 cp[cp_len] = (char) (l = ((cp_len == 0) ? e : getcFromInputFile ()));
 738                         while (isxdigit (l) && ++cp_len < 4);
 739                         valid = (cp_len == 4);
 740                 }
 741
 742                 if (! valid)
 743                 {
 744                         /* we don't get every character back, but it would require to
 745                          * be able to put up to 9 characters back (in the worst case
 746                          * for handling invalid \u{10FFFFx}), and here we're recovering
 747                          * from invalid syntax anyway. */
 748                         ungetcToInputFile (e);
 749                         ungetcToInputFile (d);
 750                 }
 751                 else
 752                 {
 753                         *value = 0;
 754                         for (unsigned int i = 0; i < cp_len; i++)
 755                         {
 756                                 *value *= 16;
 757
 758                                 /* we know it's a hex digit, no need to double check */
 759                                 if (cp[i] < 'A')
 760                                         *value += (unsigned int) cp[i] - '0';
 761                                 else if (cp[i] < 'a')
 762                                         *value += 10 + (unsigned int) cp[i] - 'A';
 763                                 else
 764                                         *value += 10 + (unsigned int) cp[i] - 'a';
 765                         }
 766                 }
 767         }
 768
 769         return valid;
 770 }
 771
 772 static int valueToXDigit (unsigned char v)
 773 {
 774         Assert (v <= 0xF);
 775
 776         if (v >= 0xA)
 777                 return 'A' + (v - 0xA);
 778         else
 779                 return '0' + v;
 780 }
 781
 782 /* Reads and expands a Unicode escape sequence after the "\" prefix.  If the
 783  * escape sequence is a UTF16 high surrogate, also try and read the low
 784  * surrogate to emit the proper code point.
 785  * @param fallback The character to return if the sequence is invalid. Usually
 786  *                 this would be the '\' character starting the sequence.
 787  * @returns The first byte of the sequence, or @param fallback if the sequence
 788  *          is invalid. On success, next calls to getcFromInputFile() will
 789  *          return subsequent bytes (if any). */
 790 static int readUnicodeEscapeSequence (const int fallback)
 791 {
 792         int c;
 793         uint32_t value;
 794         bool isUTF16;
 795
 796         if (! readUnicodeEscapeSequenceValue (&value, &isUTF16))
 797                 c = fallback;
 798         else
 799         {
 800                 if (isUTF16 && (value & 0xfc00) == 0xd800)
 801                 {       /* this is a high surrogate, try and read its low surrogate and
 802                          * emit the resulting code point */
 803                         uint32_t low;
 804                         int d = getcFromInputFile ();
 805
 806                         if (d != '\\' || ! readUnicodeEscapeSequenceValue (&low, &isUTF16))
 807                                 ungetcToInputFile (d);
 808                         else if (! isUTF16)
 809                         {       /* not UTF-16 low surrogate but a plain code point */
 810                                 d = handleUnicodeCodePoint (low);
 811                                 ungetcToInputFile (d);
 812                         }
 813                         else if ((low & 0xfc00) != 0xdc00)
 814                         {       /* not a low surrogate, so put back the escaped representation
 815                                  * in case it was another high surrogate we should read as part
 816                                  * of another pair. */
 817                                 ungetcToInputFile (valueToXDigit ((unsigned char) ((low & 0x000f) >>  0)));
 818                                 ungetcToInputFile (valueToXDigit ((unsigned char) ((low & 0x00f0) >>  4)));
 819                                 ungetcToInputFile (valueToXDigit ((unsigned char) ((low & 0x0f00) >>  8)));
 820                                 ungetcToInputFile (valueToXDigit ((unsigned char) ((low & 0xf000) >> 12)));
 821                                 ungetcToInputFile ('u');
 822                                 ungetcToInputFile ('\\');
 823                         }
 824                         else
 825                                 value = 0x010000 + ((value & 0x03ff) << 10) + (low & 0x03ff);
 826                 }
 827                 c = handleUnicodeCodePoint (value);
 828         }
 829
 830         return c;
 831 }
 832
 833 static void parseString (vString *const string, const int delimiter)
 834 {
 835         bool end = false;
 836         while (! end)
 837         {
 838                 int c = getcFromInputFile ();
 839                 if (c == EOF)
 840                         end = true;
 841                 else if (c == '\\')
 842                 {
 843                         /* Eat the escape sequence (\", \', etc).  We properly handle
 844                          * <LineContinuation> by eating a whole \<CR><LF> not to see <LF>
 845                          * as an unescaped character, which is invalid and handled below.
 846                          * Also, handle the fact that <LineContinuation> produces an empty
 847                          * sequence.
 848                          * See ECMA-262 7.8.4 */
 849                         c = getcFromInputFile ();
 850                         if (c == 'u')
 851                         {
 852                                 ungetcToInputFile (c);
 853                                 c = readUnicodeEscapeSequence ('\\');
 854                                 vStringPut (string, c);
 855                         }
 856                         else if (c != '\r' && c != '\n')
 857                                 vStringPut(string, c);
 858                         else if (c == '\r')
 859                         {
 860                                 c = getcFromInputFile();
 861                                 if (c != '\n')
 862                                         ungetcToInputFile (c);
 863                         }
 864                 }
 865                 else if (c == delimiter)
 866                         end = true;
 867                 else if (c == '\r' || c == '\n')
 868                 {
 869                         /* those are invalid when not escaped */
 870                         end = true;
 871                         /* we don't want to eat the newline itself to let the automatic
 872                          * semicolon insertion code kick in */
 873                         ungetcToInputFile (c);
 874                 }
 875                 else
 876                         vStringPut (string, c);
 877         }
 878 }
 879
 880 static void parseRegExp (void)
 881 {
 882         int c;
 883         bool in_range = false;
 884
 885         do
 886         {
 887                 c = getcFromInputFile ();
 888                 if (! in_range && c == '/')
 889                 {
 890                         do /* skip flags */
 891                         {
 892                                 c = getcFromInputFile ();
 893                         } while (isalpha (c));
 894                         ungetcToInputFile (c);
 895                         break;
 896                 }
 897                 else if (c == '\n' || c == '\r')
 898                 {
 899                         /* invalid in a regex */
 900                         ungetcToInputFile (c);
 901                         break;
 902                 }
 903                 else if (c == '\\')
 904                         c = getcFromInputFile (); /* skip next character */
 905                 else if (c == '[')
 906                         in_range = true;
 907                 else if (c == ']')
 908                         in_range = false;
 909         } while (c != EOF);
 910 }
 911
 912 /* Read a C identifier beginning with "first_char" and places it into
 913  * "name".
 914  */
 915
 916 static int include_period_in_identifier = 0;
 917
 918 static void accept_period_in_identifier(bool incl)
 919 {
 920         if (incl)
 921         {
 922                 include_period_in_identifier++;
 923         }
 924         else if (!incl && include_period_in_identifier > 0)
 925         {
 926                 include_period_in_identifier--;
 927         }
 928 }
 929
 930 static bool isIdentChar(const int c)
 931 {
 932         return (isalpha (c) || isdigit (c) || c == '$' || \
 933                         c == '@' || c == '_' || c == '#' || \
 934                         c >= 0x80 || (include_period_in_identifier > 0 && c == '.'));
 935 }
 936
 937 static void parseIdentifier (vString *const string, const int first_char)
 938 {
 939         int c = first_char;
 940         Assert (isIdentChar (c));
 941         do
 942         {
 943                 vStringPut (string, c);
 944                 c = getcFromInputFile ();
 945                 if (c == '\\')
 946                         c = readUnicodeEscapeSequence (c);
 947         } while (isIdentChar (c));
 948         /* if readUnicodeEscapeSequence() read an escape sequence this is incorrect,
 949          * as we should actually put back the whole escape sequence and not the
 950          * decoded character.  However, it's not really worth the hassle as it can
 951          * only happen if the input has an invalid escape sequence. */
 952         ungetcToInputFile (c);          /* unget non-identifier character */
 953 }
 954
 955 static void parseTemplateString (vString *const string)
 956 {
 957         int c;
 958         do
 959         {
 960                 c = getcFromInputFile ();
 961                 if (c == '`' || c == EOF)
 962                         break;
 963
 964                 vStringPut (string, c);
 965
 966                 if (c == '\\')
 967                 {
 968                         c = getcFromInputFile();
 969                         if (c != EOF)
 970                                 vStringPut(string, c);
 971                 }
 972                 else if (c == '$')
 973                 {
 974                         c = getcFromInputFile ();
 975                         if (c != '{')
 976                                 ungetcToInputFile (c);
 977                         else
 978                         {
 979                                 int depth = 1;
 980                                 /* we need to use the real token machinery to handle strings,
 981                                  * comments, regexes and whatnot */
 982                                 tokenInfo *token = newToken ();
 983                                 LastTokenType = TOKEN_UNDEFINED;
 984                                 vStringPut(string, c);
 985                                 do
 986                                 {
 987                                         readTokenFull (token, false, string);
 988                                         if (isType (token, TOKEN_OPEN_CURLY))
 989                                                 depth++;
 990                                         else if (isType (token, TOKEN_CLOSE_CURLY))
 991                                                 depth--;
 992                                 }
 993                                 while (! isType (token, TOKEN_EOF) && depth > 0);
 994                                 deleteToken (token);
 995                         }
 996                 }
 997         }
 998         while (c != EOF);
 999 }
1000
1001 static void reprToken (const tokenInfo *const token, vString *const repr)
1002 {
1003         switch (token->type)
1004         {
1005                 case TOKEN_DOTS:
1006                         vStringCatS (repr, "...");
1007                         break;
1008
1009                 case TOKEN_STRING:
1010                 case TOKEN_TEMPLATE_STRING:
1011                         vStringPut (repr, token->c);
1012                         vStringCat (repr, token->string);
1013                         vStringPut (repr, token->c);
1014                         break;
1015
1016                 case TOKEN_IDENTIFIER:
1017                 case TOKEN_KEYWORD:
1018                         vStringCat (repr, token->string);
1019                         break;
1020
1021                 default:
1022                         vStringPut (repr, token->c);
1023                         break;
1024         }
1025 }
1026
1027 static void readTokenFullRaw (tokenInfo *const token, bool include_newlines, vString *const repr)
1028 {
1029         int c;
1030         int i;
1031         bool newline_encountered = false;
1032
1033         /* if we've got a token held back, emit it */
1034         if (NextToken)
1035         {
1036                 TRACE_PRINT("Emitting held token");
1037                 copyToken (token, NextToken, false);
1038                 deleteToken (NextToken);
1039                 NextToken = NULL;
1040                 if (repr)
1041                         reprToken (token, repr);
1042                 return;
1043         }
1044
1045         token->type                     = TOKEN_UNDEFINED;
1046         token->keyword          = KEYWORD_NONE;
1047         vStringClear (token->string);
1048
1049 getNextChar:
1050         i = 0;
1051         do
1052         {
1053                 c = getcFromInputFile ();
1054                 if (include_newlines && (c == '\r' || c == '\n'))
1055                         newline_encountered = true;
1056                 i++;
1057         }
1058         while (c == '\t' || c == ' ' || c == '\r' || c == '\n');
1059
1060         token->lineNumber   = getInputLineNumber ();
1061         token->filePosition = getInputFilePosition ();
1062
1063         /* special case to insert a separator */
1064         if (repr && c != EOF && i > 1)
1065                 vStringPut (repr, ' ');
1066
1067         token->c = c;
1068
1069         switch (c)
1070         {
1071                 case EOF: token->type = TOKEN_EOF;                      break;
1072                 case '(': token->type = TOKEN_OPEN_PAREN;       break;
1073                 case ')': token->type = TOKEN_CLOSE_PAREN;      break;
1074                 case ';': token->type = TOKEN_SEMICOLON;        break;
1075                 case ',': token->type = TOKEN_COMMA;            break;
1076                 case '.':
1077                 {
1078                         token->type = TOKEN_PERIOD;
1079
1080                         int d = getcFromInputFile ();
1081                         if (d != '.')
1082                         {
1083                                 ungetcToInputFile (d);
1084                                 break;
1085                         }
1086
1087                         d = getcFromInputFile ();
1088                         if (d != '.')
1089                         {
1090                                 ungetcToInputFile (d);
1091                                 ungetcToInputFile ('.');
1092                                 break;
1093                         }
1094
1095                         token->type = TOKEN_DOTS;
1096                         break;
1097                 }
1098                 case ':': token->type = TOKEN_COLON;            break;
1099                 case '{': token->type = TOKEN_OPEN_CURLY;       break;
1100                 case '}': token->type = TOKEN_CLOSE_CURLY;      break;
1101                 case '[': token->type = TOKEN_OPEN_SQUARE;      break;
1102                 case ']': token->type = TOKEN_CLOSE_SQUARE;     break;
1103
1104                 case '=':
1105                 {
1106                         int d = getcFromInputFile ();
1107                         if (d == '>')
1108                                 token->type = TOKEN_ARROW;
1109                         else
1110                         {
1111                                 ungetcToInputFile (d);
1112                                 token->type = TOKEN_EQUAL_SIGN;
1113                         }
1114                         break;
1115                 }
1116
1117                 case '+':
1118                 case '-':
1119                 {
1120                         int d = getcFromInputFile ();
1121                         if (d == c) /* ++ or -- */
1122                                 token->type = TOKEN_POSTFIX_OPERATOR;
1123                         else
1124                         {
1125                                 ungetcToInputFile (d);
1126                                 token->type = TOKEN_BINARY_OPERATOR;
1127                         }
1128                         break;
1129                 }
1130
1131                 case '*':
1132                         token->type = TOKEN_STAR;
1133                         break;
1134                 case '%':
1135                 case '?':
1136                 case '>':
1137                 case '<':
1138                 case '^':
1139                 case '|':
1140                 case '&':
1141                         token->type = TOKEN_BINARY_OPERATOR;
1142                         break;
1143
1144                 case '\'':
1145                 case '"':
1146                         token->type = TOKEN_STRING;
1147                         parseString (token->string, c);
1148                         token->lineNumber = getInputLineNumber ();
1149                         token->filePosition = getInputFilePosition ();
1150                         break;
1151
1152                 case '`':
1153                         token->type = TOKEN_TEMPLATE_STRING;
1154                         parseTemplateString (token->string);
1155                         token->lineNumber = getInputLineNumber ();
1156                         token->filePosition = getInputFilePosition ();
1157                         break;
1158
1159                 case '/':
1160                 {
1161                         int d = getcFromInputFile ();
1162                         if ( (d != '*') &&              /* is this the start of a comment? */
1163                                         (d != '/') )            /* is a one line comment? */
1164                         {
1165                                 ungetcToInputFile (d);
1166                                 switch (LastTokenType)
1167                                 {
1168                                         case TOKEN_CHARACTER:
1169                                         case TOKEN_IDENTIFIER:
1170                                         case TOKEN_STRING:
1171                                         case TOKEN_TEMPLATE_STRING:
1172                                         case TOKEN_CLOSE_CURLY:
1173                                         case TOKEN_CLOSE_PAREN:
1174                                         case TOKEN_CLOSE_SQUARE:
1175                                                 token->type = TOKEN_BINARY_OPERATOR;
1176                                                 break;
1177
1178                                         default:
1179                                                 token->type = TOKEN_REGEXP;
1180                                                 parseRegExp ();
1181                                                 token->lineNumber = getInputLineNumber ();
1182                                                 token->filePosition = getInputFilePosition ();
1183                                                 break;
1184                                 }
1185                         }
1186                         else
1187                         {
1188                                 if (d == '*')
1189                                 {
1190                                         skipToCharacterInInputFile2('*', '/');
1191                                         goto getNextChar;
1192                                 }
1193                                 else if (d == '/')      /* is this the start of a comment?  */
1194                                 {
1195                                         skipToCharacterInInputFile ('\n');
1196                                         /* if we care about newlines, put it back so it is seen */
1197                                         if (include_newlines)
1198                                                 ungetcToInputFile ('\n');
1199                                         goto getNextChar;
1200                                 }
1201                         }
1202                         break;
1203                 }
1204
1205                 case '#':
1206                         /* skip shebang in case of e.g. Node.js scripts */
1207                         if (token->lineNumber > 1)
1208                                 token->type = TOKEN_UNDEFINED;
1209                         else if ((c = getcFromInputFile ()) != '!')
1210                         {
1211                                 ungetcToInputFile (c);
1212                                 token->type = TOKEN_UNDEFINED;
1213                         }
1214                         else
1215                         {
1216                                 skipToCharacterInInputFile ('\n');
1217                                 goto getNextChar;
1218                         }
1219                         break;
1220
1221                 case '@':
1222                         token->type = TOKEN_ATMARK;
1223                         break;
1224
1225                 case '\\':
1226                         c = readUnicodeEscapeSequence (c);
1227                         /* fallthrough */
1228                 default:
1229                         if (! isIdentChar (c))
1230                                 token->type = TOKEN_UNDEFINED;
1231                         else
1232                         {
1233                                 parseIdentifier (token->string, c);
1234                                 token->lineNumber = getInputLineNumber ();
1235                                 token->filePosition = getInputFilePosition ();
1236                                 token->keyword = lookupKeyword (vStringValue (token->string), Lang_js);
1237                                 if (isKeyword (token, KEYWORD_NONE))
1238                                         token->type = TOKEN_IDENTIFIER;
1239                                 else
1240                                         token->type = TOKEN_KEYWORD;
1241                         }
1242                         break;
1243         }
1244
1245         if (include_newlines && newline_encountered)
1246         {
1247                 /* This isn't strictly correct per the standard, but following the
1248                  * real rules means understanding all statements, and that's not
1249                  * what the parser currently does.  What we do here is a guess, by
1250                  * avoiding inserting semicolons that would make the statement on
1251                  * the left or right obviously invalid.  Hopefully this should not
1252                  * have false negatives (e.g. should not miss insertion of a semicolon)
1253                  * but might have false positives (e.g. it will wrongfully emit a
1254                  * semicolon sometimes, i.e. for the newline in "foo\n(bar)").
1255                  * This should however be mostly harmless as we only deal with
1256                  * newlines in specific situations where we know a false positive
1257                  * wouldn't hurt too bad. */
1258
1259                 /* these already end a statement, so no need to duplicate it */
1260                 #define IS_STMT_SEPARATOR(t) ((t) == TOKEN_SEMICOLON    || \
1261                                                                           (t) == TOKEN_EOF          || \
1262                                                                           (t) == TOKEN_COMMA        || \
1263                                                                           (t) == TOKEN_OPEN_CURLY)
1264                 /* these cannot be the start or end of a statement */
1265                 #define IS_BINARY_OPERATOR(t) ((t) == TOKEN_EQUAL_SIGN      || \
1266                                                                            (t) == TOKEN_ARROW           || \
1267                                                                            (t) == TOKEN_COLON           || \
1268                                                                            (t) == TOKEN_PERIOD          || \
1269                                                                            (t) == TOKEN_STAR            || \
1270                                                                            (t) == TOKEN_BINARY_OPERATOR)
1271
1272                 if (! IS_STMT_SEPARATOR(LastTokenType) &&
1273                         ! IS_STMT_SEPARATOR(token->type) &&
1274                         ! IS_BINARY_OPERATOR(LastTokenType) &&
1275                         ! IS_BINARY_OPERATOR(token->type) &&
1276                         /* these cannot be followed by a semicolon */
1277                         ! (LastTokenType == TOKEN_OPEN_PAREN ||
1278                            LastTokenType == TOKEN_OPEN_SQUARE))
1279                 {
1280                         /* hold the token... */
1281                         Assert (NextToken == NULL);
1282                         NextToken = newToken ();
1283                         copyToken (NextToken, token, false);
1284
1285                         /* ...and emit a semicolon instead */
1286                         token->type = TOKEN_SEMICOLON;
1287                         token->keyword = KEYWORD_NONE;
1288                         vStringClear (token->string);
1289                         token->c = '\n';
1290                 }
1291
1292                 #undef IS_STMT_SEPARATOR
1293                 #undef IS_BINARY_OPERATOR
1294         }
1295
1296         LastTokenType = token->type;
1297
1298         if (repr)
1299                 reprToken (token, repr);
1300 }
1301
1302 /* whether something we consider a keyword (either because it sometimes is or
1303  * because of the parser's perks) is actually valid as a function name
1304  * See https://tc39.es/ecma262/multipage/ecmascript-language-lexical-grammar.html#sec-keywords-and-reserved-words */
1305 static bool canBeFunctionName (const tokenInfo *const token, bool strict_mode)
1306 {
1307         switch (token->keyword)
1308         {
1309                 /* non-keywords specific to this parser */
1310                 case KEYWORD_capital_function:
1311                 case KEYWORD_capital_object:
1312                 case KEYWORD_prototype:
1313                 case KEYWORD_sap:
1314                 /* syntactic, but not keyword:
1315                  *     as async from get meta of set target
1316                  * "await" is OK as well */
1317                 case KEYWORD_async:
1318                 case KEYWORD_get:
1319                 case KEYWORD_set:
1320                         return true;
1321
1322                 /* strict-mode keywords
1323                  *     let static implements interface package private protected public
1324                  * we need to also include those which are OK as function names
1325                  *     yield
1326                  */
1327                 case KEYWORD_let:
1328                 case KEYWORD_static:
1329                         return ! strict_mode;
1330
1331                 default:
1332                         return isType (token, TOKEN_IDENTIFIER);
1333         }
1334 }
1335
1336 static bool canBePropertyName (const tokenInfo *const token)
1337 {
1338         /* property names are pretty relaxed, any non reserved word is OK, even
1339          * strict-mode ones in strict-mode */
1340         return canBeFunctionName (token, false);
1341 }
1342
1343 /* See https://babeljs.io/blog/2018/09/17/decorators */
1344 static void skipBabelDecorator (tokenInfo *token, bool include_newlines, vString *const repr)
1345 {
1346         readTokenFullRaw (token, include_newlines, repr);
1347         if (isType (token, TOKEN_OPEN_PAREN))
1348         {
1349                 /*  @(complex ? dec1 : dec2) */
1350                 skipArgumentList (token, include_newlines, repr);
1351                 TRACE_PRINT ("found @(...) style decorator");
1352         }
1353         else if (isType (token, TOKEN_IDENTIFIER))
1354         {
1355                 /*  @namespace.foo (...) */
1356                 bool found_period = false;
1357                 while (1)
1358                 {
1359                         readTokenFullRaw (token, include_newlines, repr);
1360                         if (isType (token, TOKEN_IDENTIFIER))
1361                         {
1362                                 if (!found_period)
1363                                 {
1364                                         TRACE_PRINT("found @namespace.bar style decorator");
1365                                         break;
1366                                 }
1367                                 found_period = false;
1368                         }
1369                         else if (isType (token, TOKEN_PERIOD))
1370                                 found_period = true;
1371                         else if (isType (token, TOKEN_OPEN_PAREN))
1372                         {
1373                                 skipArgumentList (token, include_newlines, repr);
1374                                 TRACE_PRINT("found @foo(...) style decorator");
1375                                 break;
1376                         }
1377                         else
1378                         {
1379                                 TRACE_PRINT("found @foo style decorator");
1380                                 break;
1381                         }
1382                 }
1383         }
1384         else
1385                 /* Unexpected token after @ */
1386                 TRACE_PRINT("found unexpected token during skipping a decorator");
1387 }
1388
1389 static void readTokenFull (tokenInfo *const token, bool include_newlines, vString *const repr)
1390 {
1391         readTokenFullRaw (token, include_newlines, repr);
1392
1393         while (1)
1394         {
1395                 if (!isType (token, TOKEN_ATMARK))
1396                         break;
1397                 skipBabelDecorator (token, include_newlines, repr);
1398                 /* @decorator0 @decorator1 ... There can be more than one decorator. */
1399         }
1400 }
1401
1402 #ifdef DO_TRACING_USE_DUMP_TOKEN
1403 /* trace readTokenFull() */
1404 static void readTokenFullDebug (tokenInfo *const token, bool include_newlines, vString *const repr)
1405 {
1406         readTokenFull (token, include_newlines, repr);
1407         dumpToken (token);
1408 }
1409 # define readTokenFull readTokenFullDebug
1410 #endif
1411
1412 static void readToken (tokenInfo *const token)
1413 {
1414         readTokenFull (token, false, NULL);
1415 }
1416
1417 /*
1418  * Token parsing functions
1419  */
1420
1421 static int parseMethodsInAnonymousObject (tokenInfo *const token)
1422 {
1423         int index = CORK_NIL;
1424
1425         tokenInfo *const anon_object = newToken ();
1426         copyToken (anon_object, token, true);
1427         anonGenerate (anon_object->string, "anonymousObject", JSTAG_VARIABLE);
1428         anon_object->type = TOKEN_IDENTIFIER;
1429
1430         index = makeJsTagCommon (anon_object, JSTAG_VARIABLE, NULL, NULL, true);
1431         if (! parseMethods (token, index, false))
1432         {
1433                 /* If no method is found, the anonymous object
1434                  * should not be tagged.
1435                  */
1436                 tagEntryInfo *e = getEntryInCorkQueue (index);
1437                 if (e)
1438                         markTagAsPlaceholder (e, true);
1439                 index = CORK_NIL;
1440         }
1441
1442         deleteToken (anon_object);
1443
1444         return index;
1445 }
1446
1447 static void skipArgumentList (tokenInfo *const token, bool include_newlines, vString *const repr)
1448 {
1449         if (isType (token, TOKEN_OPEN_PAREN))   /* arguments? */
1450         {
1451                 int nest_level = 1;
1452                 if (repr)
1453                         vStringPut (repr, '(');
1454
1455                 tokenType prev_token_type = token->type;
1456                 while (nest_level > 0 && ! isType (token, TOKEN_EOF))
1457                 {
1458                         readTokenFull (token, false, repr);
1459                         if (isType (token, TOKEN_OPEN_PAREN))
1460                                 nest_level++;
1461                         else if (isType (token, TOKEN_CLOSE_PAREN))
1462                                 nest_level--;
1463                         else if (isType (token, TOKEN_OPEN_CURLY))
1464                         {
1465                                 if (prev_token_type == TOKEN_ARROW)
1466                                         parseBlock (token, CORK_NIL);
1467                                 else
1468                                         parseMethodsInAnonymousObject (token);
1469                         }
1470                         else if (isKeyword (token, KEYWORD_function))
1471                                 parseFunction (token, NULL, false);
1472
1473                         prev_token_type = token->type;
1474                 }
1475                 readTokenFull (token, include_newlines, NULL);
1476         }
1477 }
1478
1479 static void skipArrayList (tokenInfo *const token, bool include_newlines)
1480 {
1481         /*
1482          * Handle square brackets
1483          *   var name[1]
1484          * So we must check for nested open and closing square brackets
1485          */
1486
1487         if (isType (token, TOKEN_OPEN_SQUARE))  /* arguments? */
1488         {
1489                 int nest_level = 1;
1490                 tokenType prev_token_type = token->type;
1491                 while (nest_level > 0 && ! isType (token, TOKEN_EOF))
1492                 {
1493                         readToken (token);
1494                         if (isType (token, TOKEN_OPEN_SQUARE))
1495                                 nest_level++;
1496                         else if (isType (token, TOKEN_CLOSE_SQUARE))
1497                                 nest_level--;
1498                         else if (isType (token, TOKEN_OPEN_CURLY))
1499                         {
1500                                 if (prev_token_type == TOKEN_ARROW)
1501                                         parseBlock (token, CORK_NIL);
1502                                 else
1503                                         parseMethodsInAnonymousObject (token);
1504                         }
1505
1506                         prev_token_type = token->type;
1507                 }
1508                 readTokenFull (token, include_newlines, NULL);
1509         }
1510 }
1511
1512 static void skipQualifiedIdentifier (tokenInfo *const token)
1513 {
1514         /* Skip foo.bar.baz */
1515         while (isType (token, TOKEN_IDENTIFIER))
1516         {
1517                 readToken (token);
1518                 if (isType (token, TOKEN_PERIOD))
1519                         readToken (token);
1520                 else
1521                         break;
1522         }
1523 }
1524
1525 static void addContext (tokenInfo* const parent, const tokenInfo* const child)
1526 {
1527         vStringJoin (parent->string, '.', child->string);
1528 }
1529
1530 /*
1531  * Scanning functions
1532  */
1533
1534 static bool findCmdTerm (tokenInfo *const token, bool include_newlines, bool include_commas)
1535 {
1536         /*
1537          * Read until we find either a semicolon or closing brace.
1538          * Any nested braces will be handled within.
1539          */
1540         while (! isType (token, TOKEN_SEMICOLON) &&
1541                    ! isType (token, TOKEN_CLOSE_CURLY) &&
1542                    ! (include_commas && isType (token, TOKEN_COMMA)) &&
1543                    ! isType (token, TOKEN_EOF))
1544         {
1545                 /* Handle nested blocks */
1546                 if ( isType (token, TOKEN_OPEN_CURLY))
1547                 {
1548                         parseBlock (token, CORK_NIL);
1549                         readTokenFull (token, include_newlines, NULL);
1550                 }
1551                 else if ( isType (token, TOKEN_OPEN_PAREN) )
1552                         skipArgumentList(token, include_newlines, NULL);
1553                 else if ( isType (token, TOKEN_OPEN_SQUARE) )
1554                         skipArrayList(token, include_newlines);
1555                 else
1556                         readTokenFull (token, include_newlines, NULL);
1557         }
1558
1559         return isType (token, TOKEN_SEMICOLON);
1560 }
1561
1562 static void parseSwitch (tokenInfo *const token)
1563 {
1564         /*
1565          * switch (expression) {
1566          * case value1:
1567          *     statement;
1568          *     break;
1569          * case value2:
1570          *     statement;
1571          *     break;
1572          * default : statement;
1573          * }
1574          */
1575
1576         readToken (token);
1577
1578         if (isType (token, TOKEN_OPEN_PAREN))
1579         {
1580                 skipArgumentList(token, false, NULL);
1581         }
1582
1583         if (isType (token, TOKEN_OPEN_CURLY))
1584         {
1585                 parseBlock (token, CORK_NIL);
1586         }
1587 }
1588
1589 static bool parseLoop (tokenInfo *const token)
1590 {
1591         /*
1592          * Handles these statements
1593          *     for (x=0; x<3; x++)
1594          *         document.write("This text is repeated three times<br>");
1595          *
1596          *     for (x=0; x<3; x++)
1597          *     {
1598          *         document.write("This text is repeated three times<br>");
1599          *     }
1600          *
1601          *     while (number<5){
1602          *         document.write(number+"<br>");
1603          *         number++;
1604          *     }
1605          *
1606          *     do{
1607          *         document.write(number+"<br>");
1608          *         number++;
1609          *     }
1610          *     while (number<5);
1611          */
1612         bool is_terminated = true;
1613
1614         if (isKeyword (token, KEYWORD_for) || isKeyword (token, KEYWORD_while))
1615         {
1616                 readToken(token);
1617
1618                 if (isType (token, TOKEN_OPEN_PAREN))
1619                         skipArgumentList(token, false, NULL);
1620
1621                 if (isType (token, TOKEN_OPEN_CURLY))
1622                         parseBlock (token, CORK_NIL);
1623                 else
1624                         is_terminated = parseLine(token, false);
1625         }
1626         else if (isKeyword (token, KEYWORD_do))
1627         {
1628                 readToken(token);
1629
1630                 if (isType (token, TOKEN_OPEN_CURLY))
1631                         parseBlock (token, CORK_NIL);
1632                 else
1633                         is_terminated = parseLine(token, false);
1634
1635                 if (is_terminated)
1636                         readToken(token);
1637
1638                 if (isKeyword (token, KEYWORD_while))
1639                 {
1640                         readToken(token);
1641
1642                         if (isType (token, TOKEN_OPEN_PAREN))
1643                                 skipArgumentList(token, true, NULL);
1644
1645                         if (! isType (token, TOKEN_SEMICOLON))
1646                         {
1647                                 /* oddly enough, `do {} while (0) var foo = 42` is perfectly
1648                                  * valid JS, so explicitly handle the remaining of the line
1649                                  * for the sake of the root scope handling (as parseJsFile()
1650                                  * always advances a token not to ever get stuck) */
1651                                 is_terminated = parseLine(token, false);
1652                         }
1653                 }
1654         }
1655
1656         return is_terminated;
1657 }
1658
1659 static bool parseIf (tokenInfo *const token)
1660 {
1661         bool read_next_token = true;
1662         /*
1663          * If statements have two forms
1664          *     if ( ... )
1665          *         one line;
1666          *
1667          *     if ( ... )
1668          *         statement;
1669          *     else
1670          *         statement
1671          *
1672          *     if ( ... ) {
1673          *         multiple;
1674          *         statements;
1675          *     }
1676          *
1677          *
1678          *     if ( ... ) {
1679          *         return elem
1680          *     }
1681          *
1682          *     This example if correctly written, but the
1683          *     else contains only 1 statement without a terminator
1684          *     since the function finishes with the closing brace.
1685          *
1686          *     function a(flag){
1687          *         if(flag)
1688          *             test(1);
1689          *         else
1690          *             test(2)
1691          *     }
1692          *
1693          * TODO:  Deal with statements that can optional end
1694          *        without a semi-colon.  Currently this messes up
1695          *        the parsing of blocks.
1696          *        Need to somehow detect this has happened, and either
1697          *        backup a token, or skip reading the next token if
1698          *        that is possible from all code locations.
1699          *
1700          */
1701
1702         readToken (token);
1703
1704         if (isKeyword (token, KEYWORD_if))
1705         {
1706                 /*
1707                  * Check for an "else if" and consume the "if"
1708                  */
1709                 readToken (token);
1710         }
1711
1712         if (isType (token, TOKEN_OPEN_PAREN))
1713                 skipArgumentList(token, false, NULL);
1714
1715         if (isType (token, TOKEN_OPEN_CURLY))
1716                 parseBlock (token, CORK_NIL);
1717         else
1718         {
1719                 /* The next token should only be read if this statement had its own
1720                  * terminator */
1721                 read_next_token = findCmdTerm (token, true, false);
1722         }
1723         return read_next_token;
1724 }
1725
1726 static bool collectChildren (int corkIndex, tagEntryInfo *entry, void *data)
1727 {
1728         intArray *children = (intArray *)data;
1729
1730         Assert (entry->extensionFields.scopeIndex != CORK_NIL);
1731         intArrayAdd (children, corkIndex);
1732
1733         return true;
1734 }
1735
1736 /* During parsing, there is a case that a language object (parent)
1737  * should be tagged only when there are language objects (children)
1738  * are defined in the parent; if the parent has no child, the parser
1739  * should not make a tag for the parent.
1740  *
1741  * Handling the this case was not easy because the parser must fill
1742  * the scope field of children with the cork index of parent.
1743  * However, the parser can decide whether the parent should be tagged
1744  * or not after parsing inside the parent where the children are
1745  * defined.
1746  *
1747  * "class" is an example of the language object of the parent.
1748  * "methods" are examples of the language object of the children.
1749  * "class" is tagged as a class only when methods are found in it.
1750  *
1751  *
1752  * The parser handles this case with the following steps:
1753  *
1754  * 1.  make a dummy tag entry for the candidate of parent with
1755  *
1756  * >       int dummyIndex = makeSimplePlaceholder().
1757  *
1758  *     ctags doesn't emit this dummy tag entry.
1759  *
1760  * 2.  parse inside the candidate of parent and count children.
1761  *     If a child is found, make a tag for it with filling its
1762  *     scope field with the dummyIndex.
1763  *
1764  * 3. make a true tag entry for the parent if a child is found:
1765  *
1766  * >       int trueIdex = makeTagEntry (...);
1767  *
1768  * 4. update the scope fields of children with the trueIdex.
1769  *
1770  *         moveChildren (dummyIndex, trueIdex);
1771  *
1772  */
1773 static void moveChildren (int old_parent, int new_parent)
1774 {
1775         intArray *children = intArrayNew ();
1776         foreachEntriesInScope (old_parent, NULL, collectChildren, children);
1777         for (unsigned int i = 0; i < intArrayCount (children); i++)
1778         {
1779                 int c = intArrayItem (children, i);
1780
1781                 unregisterEntry (c);
1782                 tagEntryInfo *e = getEntryInCorkQueue (c);
1783                 Assert (e);
1784                 e->extensionFields.scopeIndex = new_parent;
1785                 registerEntry (c);
1786         }
1787         intArrayDelete (children);
1788 }
1789
1790 static bool parseFunction (tokenInfo *const token, tokenInfo *const lhs_name, const bool is_inside_class)
1791 {
1792 #ifdef DO_TRACING
1793         {
1794                 const char *scope_str = getNameStringForCorkIndex (token->scope);
1795                 const char *scope_kind_str = getKindStringForCorkIndex (token->scope);
1796                 TRACE_ENTER_TEXT("token has scope '%s' of kind %s", scope_str, scope_kind_str);
1797         }
1798 #endif
1799
1800         tokenInfo *const name = newToken ();
1801         vString *const signature = vStringNew ();
1802         bool is_generator = false;
1803         bool is_anonymous = false;
1804         int index_for_name = CORK_NIL;
1805         /*
1806          * This deals with these formats
1807          *     function validFunctionTwo(a,b) {}
1808          *     function * generator(a,b) {}
1809          */
1810
1811         copyToken (name, token, true);
1812         readToken (name);
1813         if (isType (name, TOKEN_KEYWORD) &&
1814                 canBeFunctionName (name, false /* true if we're in strict mode */))
1815         {
1816                 // treat as function name
1817                 name->type = TOKEN_IDENTIFIER;
1818                 name->keyword = KEYWORD_NONE;
1819         }
1820
1821         if (isType (name, TOKEN_STAR))
1822         {
1823                 is_generator = true;
1824                 readToken (name);
1825         }
1826         if (isType (name, TOKEN_OPEN_PAREN))
1827         {
1828                 /* anonymous function */
1829                 copyToken (token, name, false);
1830                 anonGenerate (name->string, "anonymousFunction", JSTAG_FUNCTION);
1831                 is_anonymous = true;
1832         }
1833         else if (!isType (name, TOKEN_IDENTIFIER))
1834                 goto cleanUp;
1835         else
1836                 readToken (token);
1837
1838         if ( isType (token, TOKEN_OPEN_PAREN) )
1839                 skipArgumentList(token, false, signature);
1840
1841         if ( isType (token, TOKEN_OPEN_CURLY) )
1842         {
1843                 if ( lhs_name != NULL && is_inside_class )
1844                 {
1845                         index_for_name = makeJsTag (lhs_name, is_generator ? JSTAG_GENERATOR : JSTAG_METHOD, signature, NULL);
1846                 }
1847                 else if ( lhs_name != NULL )
1848                 {
1849                         index_for_name = isClassName (lhs_name) ?
1850                                 makeClassTag (lhs_name, signature, NULL):
1851                                 makeFunctionTag (lhs_name, signature, is_generator);
1852                 }
1853
1854                 int f = index_for_name,
1855                         p = CORK_NIL;
1856                 if ( f == CORK_NIL || !is_anonymous )
1857                         p = isClassName (name) ?
1858                                 makeClassTagCommon (name, signature, NULL, is_anonymous) :
1859                                 makeFunctionTagCommon (name, signature, is_generator, is_anonymous);
1860
1861                 if (f == CORK_NIL)
1862                         f = p;
1863
1864                 parseBlock (token, f);
1865         }
1866
1867         if ( lhs_name == NULL )
1868                 findCmdTerm (token, false, false);
1869
1870  cleanUp:
1871         vStringDelete (signature);
1872         deleteToken (name);
1873
1874         TRACE_LEAVE();
1875         return index_for_name;
1876 }
1877
1878 /* Parses a block surrounded by curly braces.
1879  * @p parent_scope is the scope name for this block, or NULL for unnamed scopes */
1880 static bool parseBlock (tokenInfo *const token, int parent_scope)
1881 {
1882         TRACE_ENTER();
1883
1884         bool is_class = false;
1885         bool read_next_token = true;
1886         int save_scope = token->scope;
1887
1888         if (parent_scope != CORK_NIL)
1889         {
1890                 token->scope = parent_scope;
1891                 token->nestLevel++;
1892         }
1893
1894         /*
1895          * Make this routine a bit more forgiving.
1896          * If called on an open_curly advance it
1897          */
1898         if (isType (token, TOKEN_OPEN_CURLY))
1899                 readToken(token);
1900
1901         if (! isType (token, TOKEN_CLOSE_CURLY))
1902         {
1903                 /*
1904                  * Read until we find the closing brace,
1905                  * any nested braces will be handled within
1906                  */
1907                 do
1908                 {
1909                         read_next_token = true;
1910                         if (isKeyword (token, KEYWORD_this))
1911                         {
1912                                 /*
1913                                  * Means we are inside a class and have found
1914                                  * a class, not a function
1915                                  */
1916                                 is_class = true;
1917
1918                                 /*
1919                                  * Ignore the remainder of the line
1920                                  * findCmdTerm(token);
1921                                  */
1922                                 read_next_token = parseLine (token, is_class);
1923                         }
1924                         else if (isKeyword (token, KEYWORD_var) ||
1925                                          isKeyword (token, KEYWORD_let) ||
1926                                          isKeyword (token, KEYWORD_const))
1927                         {
1928                                 /*
1929                                  * Potentially we have found an inner function.
1930                                  * Set something to indicate the scope
1931                                  */
1932                                 read_next_token = parseLine (token, is_class);
1933                         }
1934                         else if (isType (token, TOKEN_OPEN_CURLY))
1935                         {
1936                                 /* Handle nested blocks */
1937                                 parseBlock (token, CORK_NIL);
1938                         }
1939                         else
1940                         {
1941                                 /*
1942                                  * It is possible for a line to have no terminator
1943                                  * if the following line is a closing brace.
1944                                  * parseLine will detect this case and indicate
1945                                  * whether we should read an additional token.
1946                                  */
1947                                 read_next_token = parseLine (token, is_class);
1948                         }
1949
1950                         /*
1951                          * Always read a new token unless we find a statement without
1952                          * a ending terminator
1953                          */
1954                         if( read_next_token )
1955                                 readToken(token);
1956
1957                         /*
1958                          * If we find a statement without a terminator consider the
1959                          * block finished, otherwise the stack will be off by one.
1960                          */
1961                 } while (! isType (token, TOKEN_EOF) &&
1962                                  ! isType (token, TOKEN_CLOSE_CURLY) && read_next_token);
1963         }
1964
1965         token->scope = save_scope;
1966         if (parent_scope != CORK_NIL)
1967                 token->nestLevel--;
1968
1969         TRACE_LEAVE();
1970         return is_class;
1971 }
1972
1973 static bool parseMethods (tokenInfo *const token, int class_index,
1974                                                   const bool is_es6_class)
1975 {
1976         TRACE_ENTER_TEXT("token is '%s' of type %s in parentToken '%s' of kind %s (es6: %s)",
1977                                          vStringValue(token->string), tokenTypeName (token->type),
1978                                          class_index == CORK_NIL ? "none" : getNameStringForCorkIndex (class_index),
1979                                          class_index == CORK_NIL ? "none" : getKindStringForCorkIndex (class_index),
1980                                          is_es6_class? "yes": "no");
1981
1982         /*
1983          * When making a tag for `name', its core index is stored to
1984          * `indexForName'. The value stored to `indexForName' is valid
1985          * till the value for `name' is updated. If the value for `name'
1986          * is changed, `indexForName' is reset to CORK_NIL.
1987          */
1988         tokenInfo *const name = newToken ();
1989         int index_for_name = CORK_NIL;
1990         bool has_methods = false;
1991         int save_scope = token->scope;
1992
1993         if (class_index != CORK_NIL)
1994                 token->scope = class_index;
1995
1996         /*
1997          * This deals with these formats
1998          *     validProperty  : 2,
1999          *     validMethod    : function(a,b) {}
2000          *     'validMethod2' : function(a,b) {}
2001          *     container.dirtyTab = {'url': false, 'title':false, 'snapshot':false, '*': false}
2002          *     get prop() {}
2003          *     set prop(val) {}
2004          *     get(...) {}
2005          *     set(...) {}
2006          *
2007          * ES6 methods:
2008          *     property(...) {}
2009          *     *generator() {}
2010          *
2011          * ES6 computed name:
2012          *     [property]() {}
2013          *     get [property]() {}
2014          *     set [property]() {}
2015          *     *[generator]() {}
2016          *
2017          * tc39/proposal-class-fields
2018          *     field0 = function(a,b) {}
2019          *     field1 = 1
2020          * The parser extracts field0 as a method because the left value
2021          * is a function (kind propagation), and field1 as a field.
2022          *
2023          * static methods and static initialization blocks
2024          * - ref. https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Classes/Static_initialization_blocks
2025          *
2026          *      static func() {}
2027          *      static {}
2028          *      static prop;
2029          *      static prop = val;
2030          */
2031
2032         bool dont_read = false;
2033         do
2034         {
2035                 bool is_setter = false;
2036                 bool is_getter = false;
2037
2038                 if (!dont_read)
2039                         readToken (token);
2040                 dont_read = false;
2041
2042 start:
2043                 if (isType (token, TOKEN_CLOSE_CURLY))
2044                 {
2045                         goto cleanUp;
2046                 }
2047
2048                 if (isType (token, TOKEN_KEYWORD) && canBePropertyName (token))
2049                 {
2050                         tokenInfo *saved_token = newToken ();
2051                         copyToken (saved_token, token, true);
2052                         readToken (token);
2053
2054                         /* it wasn't actually a keyword after all, make it an identifier */
2055                         if (isType(token, TOKEN_OPEN_PAREN) || isType(token, TOKEN_COLON))
2056                         {
2057                                 Assert (NextToken == NULL);
2058                                 NextToken = newToken ();
2059                                 copyToken (NextToken, token, false);    /* save token for next read */
2060                                 copyToken (token, saved_token, true);   /* restore token to process */
2061                                 token->type = TOKEN_IDENTIFIER;                 /* process as identifier */
2062                                 token->keyword = KEYWORD_NONE;
2063                         }
2064                         else if (isKeyword (saved_token, KEYWORD_static) &&
2065                                          isType (token, TOKEN_OPEN_CURLY))
2066                         {
2067                                 /* static initialization block */
2068                                 deleteToken (saved_token);
2069                                 parseBlock (token, class_index);
2070                                 continue;
2071                         }
2072                         else if (isKeyword (saved_token, KEYWORD_get))
2073                                 is_getter = true;
2074                         else if (isKeyword (saved_token, KEYWORD_set))
2075                                 is_setter = true;
2076                         else if (isKeyword (saved_token, KEYWORD_async) ||
2077                                          isKeyword (saved_token, KEYWORD_static))
2078                         {
2079                                 /* can be a qualifier for another "keyword", so start over */
2080                                 deleteToken (saved_token);
2081                                 goto start;
2082                         }
2083
2084                         deleteToken (saved_token);
2085                 }
2086                 else if (isType (token, TOKEN_DOTS))
2087                 {
2088                         /* maybe spread operator. Just skip the next expression. */
2089                         findCmdTerm(token, true, true);
2090                         continue;
2091                 }
2092
2093                 if (! isType (token, TOKEN_KEYWORD) &&
2094                          ! isType (token, TOKEN_SEMICOLON))
2095                 {
2096                         bool is_generator = false;
2097                         bool is_shorthand = false; /* ES6 shorthand syntax */
2098                         bool is_computed_name = false; /* ES6 computed property name */
2099                         bool is_dynamic_prop = false;
2100                         vString *dprop = NULL; /* is_computed_name is true but
2101                                                                         * the name is not represented in
2102                                                                         * a string literal. The expressions
2103                                                                         * go this string. */
2104
2105                         if (isType (token, TOKEN_STAR)) /* shorthand generator */
2106                         {
2107                                 is_generator = true;
2108                                 readToken (token);
2109                         }
2110
2111                         if (isType (token, TOKEN_OPEN_SQUARE))
2112                         {
2113                                 is_computed_name = true;
2114                                 dprop = vStringNewInit ("[");
2115                                 readTokenFull (token, false, dprop);
2116                         }
2117
2118                         copyToken(name, token, true);
2119                         index_for_name = CORK_NIL;
2120                         if (is_computed_name && ! isType (token, TOKEN_STRING))
2121                                 is_dynamic_prop = true;
2122
2123                         readTokenFull (token, false, dprop);
2124
2125                         if (is_computed_name)
2126                         {
2127                                 int depth = 1;
2128                                 do
2129                                 {
2130                                         if (isType (token, TOKEN_CLOSE_SQUARE))
2131                                                 depth--;
2132                                         else
2133                                         {
2134                                                 is_dynamic_prop = true;
2135                                                 if (isType (token, TOKEN_OPEN_SQUARE))
2136                                                         depth++;
2137                                         }
2138                                         readTokenFull (token, false, (is_dynamic_prop && depth != 0)? dprop: NULL);
2139                                 } while (! isType (token, TOKEN_EOF) && depth > 0);
2140                         }
2141
2142                         if (is_dynamic_prop)
2143                         {
2144                                 injectDynamicName (name, dprop);
2145                                 index_for_name = CORK_NIL;
2146                                 dprop = NULL;
2147                         }
2148                         else
2149                                 vStringDelete (dprop);
2150
2151                         is_shorthand = isType (token, TOKEN_OPEN_PAREN);
2152                         bool can_be_field = isType (token, TOKEN_EQUAL_SIGN);
2153                         if ( isType (token, TOKEN_COLON) || can_be_field || is_shorthand )
2154                         {
2155                                 if (! is_shorthand)
2156                                 {
2157                                         readToken (token);
2158                                         if (isKeyword (token, KEYWORD_async))
2159                                                 readToken (token);
2160                                 }
2161
2162                                 vString * signature = vStringNew ();
2163                                 if ( is_shorthand || isKeyword (token, KEYWORD_function) )
2164                                 {
2165                                         TRACE_PRINT("Seems to be a function or shorthand");
2166
2167                                         if (! is_shorthand)
2168                                         {
2169                                                 readToken (token);
2170                                                 if (isType (token, TOKEN_STAR))
2171                                                 {
2172                                                         /* generator: 'function' '*' '(' ... ')' '{' ... '}' */
2173                                                         is_generator = true;
2174                                                         readToken (token);
2175                                                 }
2176                                         }
2177                                         if ( isType (token, TOKEN_OPEN_PAREN) )
2178                                         {
2179                                                 skipArgumentList(token, false, signature);
2180                                         }
2181
2182 function:
2183                                         if (isType (token, TOKEN_OPEN_CURLY))
2184                                         {
2185                                                 has_methods = true;
2186
2187                                                 int kind = JSTAG_METHOD;
2188                                                 if (is_generator)
2189                                                         kind = JSTAG_GENERATOR;
2190                                                 else if (is_getter)
2191                                                         kind = JSTAG_GETTER;
2192                                                 else if (is_setter)
2193                                                         kind = JSTAG_SETTER;
2194
2195                                                 index_for_name = makeJsTag (name, kind, signature, NULL);
2196                                                 parseBlock (token, index_for_name);
2197
2198                                                 /*
2199                                                  * If we aren't parsing an ES6 class (for which there
2200                                                  * is no mandatory separators), read to the closing
2201                                                  * curly, check next token, if a comma, we must loop
2202                                                  * again.
2203                                                  */
2204                                                 if (! is_es6_class)
2205                                                         readToken (token);
2206                                         }
2207                                 }
2208                                 else if (! is_es6_class)
2209                                 {
2210                                         int p = CORK_NIL;
2211                                         tokenInfo *saved_token = newToken ();
2212
2213                                         /* skip whatever is the value */
2214                                         while (! isType (token, TOKEN_COMMA) &&
2215                                                    ! isType (token, TOKEN_CLOSE_CURLY) &&
2216                                                    ! isType (token, TOKEN_EOF))
2217                                         {
2218                                                 if (isType (token, TOKEN_OPEN_CURLY))
2219                                                 {
2220                                                         /* Recurse to find child properties/methods */
2221                                                         p = makeSimplePlaceholder (name->string);
2222                                                         parseMethods (token, p, false);
2223                                                         readToken (token);
2224                                                 }
2225                                                 else if (isType (token, TOKEN_OPEN_PAREN))
2226                                                 {
2227                                                         vStringClear (signature);
2228                                                         skipArgumentList (token, false, signature);
2229                                                 }
2230                                                 else if (isType (token, TOKEN_OPEN_SQUARE))
2231                                                 {
2232                                                         skipArrayList (token, false);
2233                                                 }
2234                                                 else if (isType (token, TOKEN_ARROW))
2235                                                 {
2236                                                         TRACE_PRINT("Seems to be an anonymous function");
2237                                                         if (vStringIsEmpty (signature) &&
2238                                                                 isType (saved_token, TOKEN_IDENTIFIER))
2239                                                         {
2240                                                                 vStringPut (signature, '(');
2241                                                                 vStringCat (signature, saved_token->string);
2242                                                                 vStringPut (signature, ')');
2243                                                         }
2244                                                         readToken (token);
2245                                                         deleteToken (saved_token);
2246                                                         goto function;
2247                                                 }
2248                                                 else
2249                                                 {
2250                                                         copyToken (saved_token, token, true);
2251                                                         readToken (token);
2252                                                 }
2253                                         }
2254                                         deleteToken (saved_token);
2255
2256                                         has_methods = true;
2257                                         index_for_name = makeJsTag (name, JSTAG_PROPERTY, NULL, NULL);
2258                                         if (p != CORK_NIL)
2259                                                 moveChildren (p, index_for_name);
2260                                 }
2261                                 else if (can_be_field)
2262                                 {
2263                                         makeJsTag (name, JSTAG_FIELD, NULL, NULL);
2264                                         parseLine (token, true);
2265                                 }
2266
2267                                 vStringDelete (signature);
2268                         }
2269                         else
2270                         {
2271                                 bool is_property = isType (token, TOKEN_COMMA);
2272                                 makeJsTag (name, is_property ? JSTAG_PROPERTY : JSTAG_FIELD, NULL, NULL);
2273                                 if (!isType (token, TOKEN_SEMICOLON) && !is_property)
2274                                         dont_read = true;
2275                         }
2276                 }
2277         } while ( isType(token, TOKEN_COMMA) ||
2278                           ( is_es6_class && ! isType(token, TOKEN_EOF) ) );
2279
2280         TRACE_PRINT("Finished parsing methods");
2281
2282         findCmdTerm (token, false, false);
2283
2284 cleanUp:
2285         token->scope = save_scope;
2286         deleteToken (name);
2287
2288         TRACE_LEAVE_TEXT("found method(s): %s", has_methods? "yes": "no");
2289         return has_methods;
2290 }
2291
2292 static bool parseES6Class (tokenInfo *const token, const tokenInfo *target_name)
2293 {
2294         TRACE_ENTER();
2295
2296         tokenInfo * class_name = newToken ();
2297         vString *inheritance = NULL;
2298         bool is_anonymous = true;
2299
2300         copyToken (class_name, token, true);
2301         readToken (class_name);
2302
2303         /* optional name */
2304         if (isType (class_name, TOKEN_IDENTIFIER))
2305         {
2306                 readToken (token);
2307                 is_anonymous = false;
2308         }
2309         else
2310         {
2311                 copyToken (token, class_name, true);
2312                 /* We create a fake name so we have a scope for the members */
2313                 if (! target_name)
2314                         anonGenerate (class_name->string, "AnonymousClass", JSTAG_CLASS);
2315         }
2316
2317         if (! target_name)
2318                 target_name = class_name;
2319
2320         if (isKeyword (token, KEYWORD_extends))
2321                 inheritance = vStringNew ();
2322
2323         /* skip inheritance info */
2324         while (! isType (token, TOKEN_OPEN_CURLY) &&
2325                    ! isType (token, TOKEN_EOF) &&
2326                    ! isType (token, TOKEN_SEMICOLON))
2327                 readTokenFull (token, false, inheritance);
2328
2329         /* remove the last added token (here we assume it's one char, "{" or ";" */
2330         if (inheritance && vStringLength (inheritance) > 0 &&
2331                 ! isType (token, TOKEN_EOF))
2332         {
2333                 vStringChop (inheritance);
2334                 vStringStripTrailing (inheritance);
2335                 vStringStripLeading (inheritance);
2336         }
2337
2338         TRACE_PRINT("Emitting tag for class '%s'", vStringValue(target_name->string));
2339
2340         int r = makeJsTagCommon (target_name, JSTAG_CLASS, NULL, inheritance,
2341                                                          (is_anonymous && (target_name == class_name)));
2342
2343         if (! is_anonymous && target_name != class_name)
2344         {
2345                 /* FIXME: what to do with the secondary name?  It's local to the
2346                  *        class itself, so not very useful... let's hope people
2347                  *        don't give it another name than the target in case of
2348                  *          var MyClass = class MyClassSecondaryName { ... }
2349                  *        I guess it could be an alias to MyClass, or duplicate it
2350                  *        altogether, not sure. */
2351                 makeJsTag (class_name, JSTAG_CLASS, NULL, inheritance);
2352         }
2353
2354         if (inheritance)
2355                 vStringDelete (inheritance);
2356
2357         if (isType (token, TOKEN_OPEN_CURLY))
2358                 parseMethods (token, r, true);
2359
2360         deleteToken (class_name);
2361
2362         TRACE_LEAVE();
2363         return true;
2364 }
2365
2366 static void convertToFunction (int index, const char *signature)
2367 {
2368         tagEntryInfo *e = getEntryInCorkQueue(index);
2369         if (e && e->kindIndex != JSTAG_FUNCTION
2370                 && ( signature == NULL || e->extensionFields.signature == NULL))
2371         {
2372                 e->kindIndex = JSTAG_FUNCTION;
2373                 if (signature)
2374                         e->extensionFields.signature = eStrdup (signature);
2375         }
2376 }
2377
2378 static vString *trimGarbageInSignature (vString *sig)
2379 {
2380         /* Drop "=>" at the end. */
2381         const char *sigstr = vStringValue (sig);
2382         char *last = strrchr (sigstr, ')');
2383         Assert (last);
2384         vStringTruncate (sig, last - sigstr + 1);
2385         return sig;
2386 }
2387
2388 static vString *makeVStringForSignature (tokenInfo *const token)
2389 {
2390         vString * sig = vStringNewInit ("(");
2391
2392         if (isType (token, TOKEN_IDENTIFIER))
2393                 vStringCat (sig, token->string);
2394         else if (isType (token, TOKEN_CLOSE_PAREN))
2395                 vStringPut (sig, ')');
2396         else if (isType (token, TOKEN_DOTS))
2397                 vStringCatS (sig, "...");
2398
2399         return sig;
2400 }
2401
2402 typedef struct sStatementState {
2403         int  indexForName;
2404         bool isClass;
2405         bool isConst;
2406         bool isTerminated;
2407         bool isGlobal;
2408         bool foundThis;
2409 } statementState;
2410
2411 static void deleteTokenFn(void *token) { deleteToken(token); }
2412
2413 static bool parsePrototype (tokenInfo *const name, tokenInfo *const token, statementState *const state)
2414 {
2415         TRACE_ENTER();
2416
2417         /*
2418          * When we reach the "prototype" tag, we infer:
2419          *     "BindAgent" is a class
2420          *     "build"     is a method
2421          *
2422          * function BindAgent( repeatableIdName, newParentIdName ) {
2423          * }
2424          *
2425          * CASE 1
2426          * Specified function name: "build"
2427          *     BindAgent.prototype.build =
2428          *     BondAgent.prototype.crush = function( mode ) {
2429          *         maybe parse nested functions
2430          *     }
2431          *
2432          * CASE 2
2433          * Prototype listing
2434          *     ValidClassOne.prototype = {
2435          *         'validMethodOne' : function(a,b) {},
2436          *         'validMethodTwo' : function(a,b) {}
2437          *     }
2438          *
2439          */
2440         if (! ( isType (name, TOKEN_IDENTIFIER)
2441                 || isType (name, TOKEN_STRING) ) )
2442                 /*
2443                  * Unexpected input. Try to reset the parsing.
2444                  *
2445                  * TOKEN_STRING is acceptable. e.g.:
2446                  * -----------------------------------
2447                  * "a".prototype = function( mode ) {}
2448                  */
2449         {
2450                 TRACE_LEAVE_TEXT("bad input");
2451                 return false;
2452         }
2453
2454         state->indexForName = makeClassTag (name, NULL, NULL);
2455         state->isClass = true;
2456
2457         /*
2458          * There should a ".function_name" next.
2459          */
2460         readToken (token);
2461         if (isType (token, TOKEN_PERIOD))
2462         {
2463                 /*
2464                  * Handle CASE 1
2465                  */
2466                 readToken (token);
2467                 if (isType (token, TOKEN_KEYWORD) && canBePropertyName (token))
2468                 {
2469                         // treat as function name
2470                         token->type = TOKEN_IDENTIFIER;
2471                         token->keyword = KEYWORD_NONE;
2472                 }
2473
2474                 if (! isType(token, TOKEN_KEYWORD))
2475                 {
2476                         vString *const signature = vStringNew ();
2477
2478                         token->scope = state->indexForName;
2479
2480                         tokenInfo *identifier_token = newToken ();
2481                         ptrArray *prototype_tokens = NULL;
2482                         accept_period_in_identifier(true);
2483
2484                         tokenInfo *const method_body_token = newToken ();
2485                         copyToken (method_body_token, token, true);
2486                         readToken (method_body_token);
2487
2488                         while (! isType (method_body_token, TOKEN_SEMICOLON) &&
2489                                    ! isType (method_body_token, TOKEN_CLOSE_CURLY) &&
2490                                    ! isType (method_body_token, TOKEN_OPEN_CURLY) &&
2491                                    ! isType (method_body_token, TOKEN_EOF))
2492                         {
2493                                 if ( isType (method_body_token, TOKEN_OPEN_PAREN) )
2494                                         skipArgumentList(method_body_token, false,
2495                                                                          vStringLength (signature) == 0 ? signature : NULL);
2496                                 else
2497                                 {
2498                                         char* s1 = vStringValue (identifier_token->string);
2499                                         char* s2 = NULL;
2500                                         if ( isType (method_body_token, TOKEN_EQUAL_SIGN) &&
2501                                                 ! isType (identifier_token, TOKEN_UNDEFINED) &&
2502                                                 (s2 = strstr (s1, ".prototype.")))
2503                                         {
2504                                                 if (prototype_tokens == NULL)
2505                                                         prototype_tokens = ptrArrayNew (deleteTokenFn);
2506
2507                                                 memmove (s2, s2+10, strlen (s2+10) + 1);
2508                                                 vStringSetLength (identifier_token->string);
2509
2510                                                 tokenInfo *const save_token = newToken ();
2511                                                 copyToken (save_token, identifier_token, true);
2512                                                 ptrArrayAdd (prototype_tokens, save_token);
2513                                                 identifier_token->type = TOKEN_UNDEFINED;
2514                                         }
2515                                         else if ( isType(method_body_token, TOKEN_IDENTIFIER))
2516                                                 copyToken (identifier_token, method_body_token, false);
2517
2518                                         readToken (method_body_token);
2519                                 }
2520                         }
2521                         deleteToken (identifier_token);
2522                         accept_period_in_identifier(false);
2523
2524                         int index = makeJsTag (token, JSTAG_METHOD, signature, NULL);
2525
2526                         if (prototype_tokens != NULL)
2527                         {
2528                                 for (int i=0; i<ptrArrayCount (prototype_tokens); i++)
2529                                 {
2530                                         makeJsTag (ptrArrayItem (prototype_tokens, i), JSTAG_METHOD, signature, NULL);
2531                                 }
2532                                 ptrArrayUnref (prototype_tokens);
2533                         }
2534
2535                         vStringDelete (signature);
2536
2537                         if ( isType (method_body_token, TOKEN_OPEN_CURLY))
2538                         {
2539                                 parseBlock (method_body_token, index);
2540                                 state->isTerminated = true;
2541                         }
2542                         else
2543                                 state->isTerminated = isType (method_body_token, TOKEN_SEMICOLON);
2544
2545                         deleteToken (method_body_token);
2546                         TRACE_LEAVE_TEXT("done: single");
2547                         return false;
2548                 }
2549         }
2550         else if (isType (token, TOKEN_EQUAL_SIGN))
2551         {
2552                 readToken (token);
2553                 if (isType (token, TOKEN_OPEN_CURLY))
2554                 {
2555                         /*
2556                          * Handle CASE 2
2557                          *
2558                          * Creates tags for each of these class methods
2559                          *     ValidClassOne.prototype = {
2560                          *         'validMethodOne' : function(a,b) {},
2561                          *         'validMethodTwo' : function(a,b) {}
2562                          *     }
2563                          */
2564                         parseMethods(token, state->indexForName, false);
2565                         /*
2566                          * Find to the end of the statement
2567                          */
2568                         findCmdTerm (token, false, false);
2569                         state->isTerminated = true;
2570                         TRACE_LEAVE_TEXT("done: multiple");
2571                         return false;
2572                 }
2573         }
2574
2575         TRACE_LEAVE_TEXT("done: not found");
2576         return true;
2577 }
2578
2579 static bool parseStatementLHS (tokenInfo *const name, tokenInfo *const token, statementState *const state)
2580 {
2581         TRACE_ENTER();
2582
2583         do
2584         {
2585                 readToken (token);
2586                 if (! isType(token, TOKEN_KEYWORD))
2587                 {
2588                         if ( state->isClass )
2589                                 token->scope = state->indexForName;
2590                         else
2591                         {
2592                                 addContext (name, token);
2593                                 state->indexForName = CORK_NIL;
2594                         }
2595
2596                         readToken (token);
2597                 }
2598                 else if ( isKeyword(token, KEYWORD_prototype) )
2599                 {
2600                         if (! parsePrototype (name, token, state) )
2601                         {
2602                                 TRACE_LEAVE_TEXT("done: prototype");
2603                                 return false;
2604                         }
2605                 }
2606                 else
2607                         readToken (token);
2608         } while (isType (token, TOKEN_PERIOD));
2609
2610         TRACE_LEAVE();
2611         return true;
2612 }
2613
2614 static bool parseStatementRHS (tokenInfo *const name, tokenInfo *const token, statementState *const state, bool is_inside_class)
2615 {
2616         TRACE_ENTER();
2617
2618         int paren_depth = 0;
2619         int arrowfun_paren_depth = 0;
2620         bool canbe_arrowfun = false;
2621
2622         readToken (token);
2623
2624         /* rvalue might be surrounded with parentheses */
2625         while (isType (token, TOKEN_OPEN_PAREN))
2626         {
2627                 paren_depth++;
2628                 arrowfun_paren_depth++;
2629                 readToken (token);
2630         }
2631
2632         if (isKeyword (token, KEYWORD_async))
2633         {
2634                 arrowfun_paren_depth = 0;
2635                 readToken (token);
2636
2637                 /* check for function signature */
2638                 while (isType (token, TOKEN_OPEN_PAREN))
2639                 {
2640                         paren_depth++;
2641                         arrowfun_paren_depth++;
2642                         readToken (token);
2643                 }
2644         }
2645
2646         if ( isKeyword (token, KEYWORD_function) )
2647         {
2648                 state->indexForName = parseFunction (token, name, is_inside_class);
2649         }
2650         else if (isKeyword (token, KEYWORD_class))
2651         {
2652                 state->isTerminated = parseES6Class (token, name);
2653         }
2654         else if (isType (token, TOKEN_OPEN_CURLY))
2655         {
2656                 /*
2657                  * Creates tags for each of these class methods
2658                  *     objectOne = {
2659                  *         'validMethodOne' : function(a,b) {},
2660                  *         'validMethodTwo' : function(a,b) {}
2661                  *     }
2662                  * Or checks if this is a hash variable.
2663                  *     var z = {};
2664                  */
2665                 bool anon_object = vStringIsEmpty (name->string);
2666                 if (anon_object)
2667                 {
2668                         anonGenerate (name->string, "anonymousObject", JSTAG_VARIABLE);
2669                         state->indexForName = CORK_NIL;
2670                 }
2671                 int p = makeSimplePlaceholder (name->string);
2672                 if ( parseMethods(token, p, false) )
2673                 {
2674                         jsKind kind = state->foundThis || strchr (vStringValue(name->string), '.') != NULL ? JSTAG_PROPERTY : JSTAG_VARIABLE;
2675                         state->indexForName = makeJsTagCommon (name, kind, NULL, NULL, anon_object);
2676                         moveChildren (p, state->indexForName);
2677                 }
2678                 else if ( token->nestLevel == 0 && state->isGlobal )
2679                 {
2680                         /*
2681                          * Only create variables for global scope
2682                          *
2683                          * A pointer can be created to the function.
2684                          * If we recognize the function/class name ignore the variable.
2685                          * This format looks identical to a variable definition.
2686                          * A variable defined outside of a block is considered
2687                          * a global variable:
2688                          *     var g_var1 = 1;
2689                          *     var g_var2;
2690                          * This is not a global variable:
2691                          *     var g_var = function;
2692                          * This is a global variable:
2693                          *     var g_var = different_var_name;
2694                          */
2695                         state->indexForName = anyKindsEntryInScope (name->scope, vStringValue (name->string),
2696                                                                                                                  (int[]){JSTAG_VARIABLE, JSTAG_FUNCTION, JSTAG_CLASS}, 3, true);
2697
2698                         if (state->indexForName == CORK_NIL)
2699                                 state->indexForName = makeJsTag (name, state->isConst ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL, NULL);
2700                 }
2701                 /* Here we should be at the end of the block, on the close curly.
2702                  * If so, read the next token not to confuse that close curly with
2703                  * the end of the current statement. */
2704                 if (isType (token, TOKEN_CLOSE_CURLY))
2705                 {
2706                         readTokenFull(token, true, NULL);
2707                         state->isTerminated = isType (token, TOKEN_SEMICOLON);
2708                 }
2709         }
2710         else if (isType (token, TOKEN_OPEN_SQUARE) && !vStringIsEmpty (name->string))
2711         {
2712                 /*
2713                  * Creates tag for an array
2714                  */
2715                 skipArrayList(token, true);
2716                 jsKind kind = state->foundThis || strchr (vStringValue(name->string), '.') != NULL ? JSTAG_PROPERTY : JSTAG_VARIABLE;
2717                 /*
2718                  * Only create variables for global scope or class/object properties
2719                  */
2720                 if ( ( token->nestLevel == 0 && state->isGlobal ) || kind == JSTAG_PROPERTY )
2721                 {
2722                         state->indexForName = makeJsTagCommon (name, kind, NULL, NULL, false);
2723                 }
2724         }
2725         else if (isKeyword (token, KEYWORD_new))
2726         {
2727                 readToken (token);
2728                 bool is_var = isType (token, TOKEN_IDENTIFIER) || isKeyword (token, KEYWORD_capital_object);
2729                 if ( isKeyword (token, KEYWORD_function) ||
2730                          isKeyword (token, KEYWORD_capital_function) ||
2731                          is_var )
2732                 {
2733                         if ( isKeyword (token, KEYWORD_capital_function) && isClassName (name) )
2734                                 state->isClass = true;
2735
2736                         if ( isType (token, TOKEN_IDENTIFIER) )
2737                                 skipQualifiedIdentifier (token);
2738                         else
2739                                 readToken (token);
2740
2741                         if ( isType (token, TOKEN_OPEN_PAREN) )
2742                                 skipArgumentList(token, true, NULL);
2743
2744                         if (isType (token, TOKEN_SEMICOLON) && token->nestLevel == 0)
2745                         {
2746                                 if ( is_var )
2747                                         state->indexForName = makeJsTag (name, state->isConst ? JSTAG_CONSTANT : state->foundThis ? JSTAG_PROPERTY : JSTAG_VARIABLE, NULL, NULL);
2748                                 else if ( state->isClass )
2749                                         state->indexForName = makeClassTag (name, NULL, NULL);
2750                                 else
2751                                 {
2752                                         /* FIXME: we cannot really get a meaningful
2753                                          * signature from a `new Function()` call,
2754                                          * so for now just don't set any */
2755                                         state->indexForName = makeFunctionTag (name, NULL, false);
2756                                 }
2757                         }
2758                         else if (isType (token, TOKEN_CLOSE_CURLY))
2759                                 state->isTerminated = false;
2760                 }
2761         }
2762         else if (! isType (token, TOKEN_KEYWORD) &&
2763                          token->nestLevel == 0 && state->isGlobal )
2764         {
2765                 /*
2766                  * Only create variables for global scope
2767                  *
2768                  * A pointer can be created to the function.
2769                  * If we recognize the function/class name ignore the variable.
2770                  * This format looks identical to a variable definition.
2771                  * A variable defined outside of a block is considered
2772                  * a global variable:
2773                  *     var g_var1 = 1;
2774                  *     var g_var2;
2775                  * This is not a global variable:
2776                  *     var g_var = function;
2777                  * This is a global variable:
2778                  *     var g_var = different_var_name;
2779                  */
2780                 state->indexForName = anyKindsEntryInScope (name->scope, vStringValue (name->string),
2781                                                                                                          (int[]){JSTAG_VARIABLE, JSTAG_FUNCTION, JSTAG_CLASS}, 3, true);
2782
2783                 if (state->indexForName == CORK_NIL)
2784                 {
2785                         state->indexForName = makeJsTag (name, state->isConst ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL, NULL);
2786                         if (isType (token, TOKEN_IDENTIFIER))
2787                                 canbe_arrowfun = true;
2788                 }
2789         }
2790         else if ( isType (token, TOKEN_IDENTIFIER) )
2791         {
2792                 canbe_arrowfun = true;
2793         }
2794
2795         if (arrowfun_paren_depth == 0 && canbe_arrowfun)
2796         {
2797                 /* var v = a => { ... } */
2798                 vString *sig = vStringNewInit ("(");
2799                 vStringCat (sig, token->string);
2800                 vStringPut (sig, ')');
2801                 readTokenFull (token, true, NULL);
2802                 if (isType (token, TOKEN_ARROW))
2803                 {
2804                         if (state->indexForName == CORK_NIL)    // was not a global variable
2805                                 state->indexForName = makeFunctionTag (name, sig, false);
2806                         else
2807                                 convertToFunction (state->indexForName, vStringValue (sig));
2808                 }
2809                 vStringDelete (sig);
2810         }
2811
2812         if (paren_depth > 0)
2813         {
2814                 /* Collect parameters for arrow function. */
2815                 vString *sig = (arrowfun_paren_depth == 1)? makeVStringForSignature (token): NULL;
2816
2817                 while (paren_depth > 0 && ! isType (token, TOKEN_EOF))
2818                 {
2819                         if (isType (token, TOKEN_OPEN_PAREN))
2820                         {
2821                                 paren_depth++;
2822                                 arrowfun_paren_depth++;
2823                         }
2824                         else if (isType (token, TOKEN_CLOSE_PAREN))
2825                         {
2826                                 paren_depth--;
2827                                 arrowfun_paren_depth--;
2828                         }
2829                         readTokenFull (token, true, sig);
2830
2831                         /* var f = (a, b) => { ... } */
2832                         if (arrowfun_paren_depth == 0 && isType (token, TOKEN_ARROW) && sig)
2833                         {
2834                                 if (state->indexForName == CORK_NIL)    // was not a global variable
2835                                         state->indexForName = makeFunctionTag (name, trimGarbageInSignature (sig), false);
2836                                 else
2837                                         convertToFunction (state->indexForName,
2838                                                                            vStringValue (trimGarbageInSignature (sig)));
2839
2840                                 vStringDelete (sig);
2841                                 sig = NULL;
2842                         }
2843                 }
2844                 if (isType (token, TOKEN_CLOSE_CURLY))
2845                         state->isTerminated = false;
2846
2847                 vStringDelete (sig); /* NULL is acceptable. */
2848         }
2849
2850         TRACE_LEAVE();
2851         return true;
2852 }
2853
2854 static bool parseStatement (tokenInfo *const token, bool is_inside_class)
2855 {
2856         TRACE_ENTER_TEXT("is_inside_class: %s", is_inside_class? "yes": "no");
2857
2858         /*
2859          * When making a tag for `name', its core index is stored to
2860          * `indexForName'. The value stored to `indexForName' is valid
2861          * till the value for `name' is updated. If the value for `name'
2862          * is changed, `indexForName' is reset to CORK_NIL.
2863          */
2864         tokenInfo *const name = newToken ();
2865         int save_scope = token->scope;
2866         bool found_lhs = false;
2867         statementState state = {
2868                 .indexForName = CORK_NIL,
2869                 .isClass = is_inside_class,
2870                 .isConst = false,
2871                 .isTerminated = true,
2872                 .isGlobal = false,
2873                 .foundThis = false
2874         };
2875
2876         /*
2877          * Functions can be named or unnamed.
2878          * This deals with these formats:
2879          * Function
2880          *     validFunctionOne = function(a,b) {}
2881          *     testlib.validFunctionFive = function(a,b) {}
2882          *     var innerThree = function(a,b) {}
2883          *     var innerFour = (a,b) {}
2884          *     var D2 = secondary_fcn_name(a,b) {}
2885          *     var D3 = new Function("a", "b", "return a+b;");
2886          * Class
2887          *     testlib.extras.ValidClassOne = function(a,b) {
2888          *         this.a = a;
2889          *     }
2890          * Class Methods
2891          *     testlib.extras.ValidClassOne.prototype = {
2892          *         'validMethodOne' : function(a,b) {},
2893          *         'validMethodTwo' : function(a,b) {}
2894          *     }
2895          *     ValidClassTwo = function ()
2896          *     {
2897          *         this.validMethodThree = function() {}
2898          *         // unnamed method
2899          *         this.validMethodFour = () {}
2900          *     }
2901          *     Database.prototype.validMethodThree = Database_getTodaysDate;
2902          */
2903
2904         /*
2905          * var can precede an inner function
2906          */
2907         if ( isKeyword(token, KEYWORD_var) ||
2908                  isKeyword(token, KEYWORD_let) ||
2909                  isKeyword(token, KEYWORD_const) )
2910         {
2911                 TRACE_PRINT("var/let/const case");
2912                 state.isConst = isKeyword(token, KEYWORD_const);
2913                 /*
2914                  * Only create variables for global scope
2915                  */
2916                 if ( token->nestLevel == 0 )
2917                 {
2918                         state.isGlobal = true;
2919                 }
2920                 readToken(token);
2921         }
2922
2923 nextVar:
2924         state.indexForName = CORK_NIL;
2925         state.foundThis = false;
2926         if ( isKeyword(token, KEYWORD_this) )
2927         {
2928                 TRACE_PRINT("found 'this' keyword");
2929                 state.foundThis = true;
2930
2931                 readToken(token);
2932                 if (isType (token, TOKEN_PERIOD))
2933                 {
2934                         readToken(token);
2935                 }
2936                 else if (isType (token, TOKEN_OPEN_SQUARE))
2937                 {
2938                         skipArrayList (token, false);
2939                 }
2940         }
2941
2942         copyToken(name, token, true);
2943         TRACE_PRINT("name becomes '%s' of type %s",
2944                                 vStringValue(token->string), tokenTypeName (token->type));
2945
2946         while (! isType (token, TOKEN_CLOSE_CURLY) &&
2947                    ! isType (token, TOKEN_SEMICOLON)   &&
2948                    ! isType (token, TOKEN_EQUAL_SIGN)  &&
2949                    ! isType (token, TOKEN_COMMA)       &&
2950                    ! isType (token, TOKEN_EOF))
2951         {
2952                 found_lhs = true;
2953                 if (isType (token, TOKEN_OPEN_CURLY))
2954                 {
2955                         parseBlock (token, CORK_NIL);
2956                         readTokenFull (token, true, NULL);
2957                 }
2958                 else if (isKeyword (token, KEYWORD_function))
2959                 {
2960                         parseFunction (token, NULL, false);
2961                         readTokenFull (token, true, NULL);
2962                 }
2963
2964                 /* Potentially the name of the function */
2965                 else if (isType (token, TOKEN_PERIOD))
2966                 {
2967                         /*
2968                          * Cannot be a global variable is it has dot references in the name
2969                          */
2970                         state.isGlobal = false;
2971                         /* Assume it's an assignment to a global name (e.g. a class) using
2972                          * its fully qualified name, so strip the scope.
2973                          * FIXME: resolve the scope so we can make more than an assumption. */
2974                         token->scope = CORK_NIL;
2975                         name->scope = CORK_NIL;
2976                         if ( ! parseStatementLHS (name, token, &state) )
2977                                 goto cleanUp;
2978                 }
2979                 else
2980                         readTokenFull (token, true, NULL);
2981
2982                 if ( isType (token, TOKEN_OPEN_PAREN) )
2983                         skipArgumentList(token, false, NULL);
2984
2985                 if ( isType (token, TOKEN_OPEN_SQUARE) )
2986                         skipArrayList(token, false);
2987         }
2988
2989         if ( isType (token, TOKEN_CLOSE_CURLY) )
2990         {
2991                 /*
2992                  * Reaching this section without having
2993                  * processed an open curly brace indicates
2994                  * the statement is most likely not terminated.
2995                  */
2996                 state.isTerminated = false;
2997         }
2998         else if ( isType (token, TOKEN_SEMICOLON) ||
2999                           isType (token, TOKEN_EOF) ||
3000                           isType (token, TOKEN_COMMA) )
3001         {
3002                 /*
3003                  * Only create variables for global scope
3004                  */
3005                 if ( token->nestLevel == 0 && state.isGlobal )
3006                 {
3007                         /*
3008                          * Handles this syntax:
3009                          *     var g_var2;
3010                          */
3011                         state.indexForName = makeJsTag (name, state.isConst ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL, NULL);
3012                 }
3013                 /*
3014                  * Statement has ended.
3015                  * This deals with calls to functions, like:
3016                  *     alert(..);
3017                  */
3018                 if (isType (token, TOKEN_COMMA))
3019                 {
3020                         readToken (token);
3021                         state.isClass = false;
3022                         goto nextVar;
3023                 }
3024         }
3025         else
3026         {
3027                 bool ok = found_lhs;
3028                 if ( ok && isType (token, TOKEN_EQUAL_SIGN) )
3029                 {
3030                         ok = parseStatementRHS (name, token, &state, is_inside_class);
3031                 }
3032                 /* if we aren't already at the cmd end, advance to it and check whether
3033                  * the statement was terminated */
3034                 if (ok &&
3035                         ! isType (token, TOKEN_CLOSE_CURLY) &&
3036                         ! isType (token, TOKEN_SEMICOLON))
3037                 {
3038                         /*
3039                          * Statements can be optionally terminated in the case of
3040                          * statement prior to a close curly brace as in the
3041                          * document.write line below:
3042                          *
3043                          * function checkForUpdate() {
3044                          *     if( 1==1 ) {
3045                          *         document.write("hello from checkForUpdate<br>")
3046                          *     }
3047                          *     return 1;
3048                          * }
3049                          */
3050                         state.isTerminated = findCmdTerm (token, true, true);
3051                         /* if we're at a comma, try and read a second var */
3052                         if (isType (token, TOKEN_COMMA))
3053                         {
3054                                 readToken (token);
3055                                 state.isClass = false;
3056                                 goto nextVar;
3057                         }
3058                 }
3059                 else if (ok && isType (token, TOKEN_SEMICOLON))
3060                         state.isTerminated = true;
3061         }
3062
3063 cleanUp:
3064         token->scope = save_scope;
3065         deleteToken (name);
3066
3067         TRACE_LEAVE_TEXT("is terminated: %d", (int) state.isTerminated);
3068         return state.isTerminated;
3069 }
3070
3071 static void parseUI5 (tokenInfo *const token)
3072 {
3073         tokenInfo *const name = newToken ();
3074         /*
3075          * SAPUI5 is built on top of jQuery.
3076          * It follows a standard format:
3077          *     sap.ui.controller("id.of.controller", {
3078          *         method_name : function... {
3079          *         },
3080          *
3081          *         method_name : function ... {
3082          *         }
3083          *     }
3084          *
3085          * Handle the parsing of the initial controller (and the
3086          * same for "view") and then allow the methods to be
3087          * parsed as usual.
3088          */
3089
3090         readToken (token);
3091
3092         if (isType (token, TOKEN_PERIOD))
3093         {
3094                 int r = CORK_NIL;
3095
3096                 readToken (token);
3097                 while (! isType (token, TOKEN_OPEN_PAREN) &&
3098                            ! isType (token, TOKEN_EOF))
3099                 {
3100                         readToken (token);
3101                 }
3102                 readToken (token);
3103
3104                 if (isType (token, TOKEN_STRING))
3105                 {
3106                         copyToken(name, token, true);
3107                         readToken (token);
3108                 }
3109
3110                 if (isType (token, TOKEN_COMMA))
3111                         readToken (token);
3112
3113                 if (isType(name, TOKEN_STRING))
3114                 {
3115                         /*
3116                          * `name' can include '.'.
3117                          * Setting dynamicProp to true can prohibit
3118                          * that makeClassTag ispects the inside
3119                          * of `name'.
3120                          */
3121                         name->dynamicProp = true;
3122                         r = makeClassTag (name, NULL, NULL);
3123                         /*
3124                          * TODO
3125                          * `name' specifies a class of OpenUI5.
3126                          * So tagging it as a language object of
3127                          * JavaScript is incorrect. We have to introduce
3128                          * OpenUI5 language as a subparser of JavaScript
3129                          * to fix this situation.
3130                          */
3131                 }
3132
3133                 do
3134                 {
3135                         parseMethods (token, r, false);
3136                 } while (! isType (token, TOKEN_CLOSE_CURLY) &&
3137                                  ! isType (token, TOKEN_EOF));
3138         }
3139
3140         deleteToken (name);
3141 }
3142
3143 static bool parseLine (tokenInfo *const token, bool is_inside_class)
3144 {
3145         TRACE_ENTER_TEXT("token is '%s' of type %s",
3146                                          vStringValue(token->string), tokenTypeName (token->type));
3147
3148         bool is_terminated = true;
3149         /*
3150          * Detect the common statements, if, while, for, do, ...
3151          * This is necessary since the last statement within a block "{}"
3152          * can be optionally terminated.
3153          *
3154          * If the statement is not terminated, we need to tell
3155          * the calling routine to prevent reading an additional token
3156          * looking for the end of the statement.
3157          */
3158
3159         if (isType(token, TOKEN_KEYWORD))
3160         {
3161                 switch (token->keyword)
3162                 {
3163                         case KEYWORD_for:
3164                         case KEYWORD_while:
3165                         case KEYWORD_do:
3166                                 is_terminated = parseLoop (token);
3167                                 break;
3168                         case KEYWORD_if:
3169                         case KEYWORD_else:
3170                         case KEYWORD_try:
3171                         case KEYWORD_catch:
3172                         case KEYWORD_finally:
3173                                 /* Common semantics */
3174                                 is_terminated = parseIf (token);
3175                                 break;
3176                         case KEYWORD_switch:
3177                                 parseSwitch (token);
3178                                 break;
3179                         case KEYWORD_return:
3180                         case KEYWORD_async:
3181                                 readToken (token);
3182                                 is_terminated = parseLine (token, is_inside_class);
3183                                 break;
3184                         case KEYWORD_function:
3185                                 parseFunction (token, NULL, false);
3186                                 break;
3187                         case KEYWORD_class:
3188                                 is_terminated = parseES6Class (token, NULL);
3189                                 break;
3190                         default:
3191                                 is_terminated = parseStatement (token, is_inside_class);
3192                                 break;
3193                 }
3194         }
3195         else
3196         {
3197                 /*
3198                  * Special case where single line statements may not be
3199                  * SEMICOLON terminated.  parseBlock needs to know this
3200                  * so that it does not read the next token.
3201                  */
3202                 is_terminated = parseStatement (token, is_inside_class);
3203         }
3204
3205         TRACE_LEAVE();
3206         return is_terminated;
3207 }
3208
3209 static void parseJsFile (tokenInfo *const token)
3210 {
3211         TRACE_ENTER();
3212
3213         do
3214         {
3215                 readToken (token);
3216
3217                 if (isType (token, TOKEN_KEYWORD) && token->keyword == KEYWORD_sap)
3218                         parseUI5 (token);
3219                 else if (isType (token, TOKEN_KEYWORD) && (token->keyword == KEYWORD_export ||
3220                                                                                                    token->keyword == KEYWORD_default))
3221                         /* skip those at top-level */;
3222                 else
3223                         parseLine (token, false);
3224         } while (! isType (token, TOKEN_EOF));
3225
3226         TRACE_LEAVE();
3227 }
3228
3229 #ifdef DO_TRACING
3230 #ifdef DO_TRACING_USE_DUMP_TOKEN
3231 static void dumpToken (const tokenInfo *const token)
3232 {
3233         const char *scope_str = getNameStringForCorkIndex (token->scope);
3234         const char *scope_kind_str = getKindStringForCorkIndex (token->scope);
3235
3236         if (strcmp(scope_str, "placeholder") == 0)
3237         {
3238                 TRACE_PRINT("%s: %s",
3239                         tokenTypeName (token->type),
3240                         vStringValue (token->string));
3241         }
3242         else
3243         {
3244                 TRACE_PRINT("%s: %s (scope '%s' of kind %s)",
3245                         tokenTypeName (token->type),
3246                         vStringValue (token->string),
3247                         scope_str, scope_kind_str);
3248         }
3249 }
3250 #endif
3251
3252 static const char*
3253 getNameStringForCorkIndex(int index)
3254 {
3255         if (index == CORK_NIL)
3256                 return "none";
3257         tagEntryInfo *e = getEntryInCorkQueue (index);
3258         if (e == NULL)
3259                 return "ghost";                 /* Can this happen? */
3260
3261         if (e->placeholder)
3262                 return "placeholder";
3263
3264         return e->name;
3265 }
3266
3267 static const char*
3268 getKindStringForCorkIndex(int index)
3269 {
3270         if (index == CORK_NIL)
3271                 return "none";
3272         tagEntryInfo *e = getEntryInCorkQueue (index);
3273         if (e == NULL)
3274                 return "ghost";                 /* Can this happen? */
3275
3276         if (e->placeholder)
3277                 return "placeholder";
3278
3279         if (e->kindIndex == KIND_GHOST_INDEX)
3280                 return "ghost";
3281
3282         return JsKinds [e->kindIndex].name;
3283 }
3284
3285 static const char *kindName(jsKind kind)
3286 {
3287         return ((int)kind) >= 0 ? JsKinds[kind].name : "none";
3288 }
3289
3290 static const char *tokenTypeName(enum eTokenType e)
3291 { /* Generated by misc/enumstr.sh with cmdline:
3292          parsers/jscript.c eTokenType tokenTypeName */
3293         switch (e)
3294         {
3295                 case      TOKEN_UNDEFINED: return "TOKEN_UNDEFINED";
3296                 case            TOKEN_EOF: return "TOKEN_EOF";
3297                 case      TOKEN_CHARACTER: return "TOKEN_CHARACTER";
3298                 case    TOKEN_CLOSE_PAREN: return "TOKEN_CLOSE_PAREN";
3299                 case      TOKEN_SEMICOLON: return "TOKEN_SEMICOLON";
3300                 case          TOKEN_COLON: return "TOKEN_COLON";
3301                 case          TOKEN_COMMA: return "TOKEN_COMMA";
3302                 case        TOKEN_KEYWORD: return "TOKEN_KEYWORD";
3303                 case     TOKEN_OPEN_PAREN: return "TOKEN_OPEN_PAREN";
3304                 case     TOKEN_IDENTIFIER: return "TOKEN_IDENTIFIER";
3305                 case         TOKEN_STRING: return "TOKEN_STRING";
3306                 case TOKEN_TEMPLATE_STRING: return "TOKEN_TEMPLATE_STRING";
3307                 case         TOKEN_PERIOD: return "TOKEN_PERIOD";
3308                 case     TOKEN_OPEN_CURLY: return "TOKEN_OPEN_CURLY";
3309                 case    TOKEN_CLOSE_CURLY: return "TOKEN_CLOSE_CURLY";
3310                 case     TOKEN_EQUAL_SIGN: return "TOKEN_EQUAL_SIGN";
3311                 case    TOKEN_OPEN_SQUARE: return "TOKEN_OPEN_SQUARE";
3312                 case   TOKEN_CLOSE_SQUARE: return "TOKEN_CLOSE_SQUARE";
3313                 case         TOKEN_REGEXP: return "TOKEN_REGEXP";
3314                 case TOKEN_POSTFIX_OPERATOR: return "TOKEN_POSTFIX_OPERATOR";
3315                 case           TOKEN_STAR: return "TOKEN_STAR";
3316                 case         TOKEN_ATMARK: return "TOKEN_ATMARK";
3317                 case TOKEN_BINARY_OPERATOR: return "TOKEN_BINARY_OPERATOR";
3318                 case          TOKEN_ARROW: return "TOKEN_ARROW";
3319                 case           TOKEN_DOTS: return "TOKEN_DOTS";
3320                 default:                   return "UNKNOWN";
3321         }
3322 }
3323 #endif
3324
3325 static void initialize (const langType language)
3326 {
3327         Assert (ARRAY_SIZE (JsKinds) == JSTAG_COUNT);
3328         Lang_js = language;
3329
3330         TokenPool = objPoolNew (16, newPoolToken, deletePoolToken, clearPoolToken, NULL);
3331 }
3332
3333 static void finalize (langType language CTAGS_ATTR_UNUSED, bool initialized)
3334 {
3335         if (!initialized)
3336                 return;
3337
3338         objPoolDelete (TokenPool);
3339 }
3340
3341 static void findJsTags (void)
3342 {
3343         tokenInfo *const token = newToken ();
3344
3345         NextToken = NULL;
3346         LastTokenType = TOKEN_UNDEFINED;
3347
3348         parseJsFile (token);
3349
3350         deleteToken (token);
3351
3352 #ifdef HAVE_ICONV
3353         if (JSUnicodeConverter != (iconv_t) -2 && /* not created */
3354                 JSUnicodeConverter != (iconv_t) -1 /* creation failed */)
3355         {
3356                 iconv_close (JSUnicodeConverter);
3357                 JSUnicodeConverter = (iconv_t) -2;
3358         }
3359 #endif
3360
3361         Assert (NextToken == NULL);
3362 }
3363
3364 /* Create parser definition structure */
3365 extern parserDefinition* JavaScriptParser (void)
3366 {
3367         // .jsx files are JSX: https://facebook.github.io/jsx/
3368         // which have JS function definitions, so we just use the JS parser
3369         static const char *const extensions [] = { "js", "jsx", "mjs", NULL };
3370         static const char *const aliases [] = { "js", "node", "nodejs",
3371                                                                                         "seed", "gjs",
3372                                                                                         /* Used in PostgreSQL
3373                                                                                          * https://github.com/plv8/plv8 */
3374                                                                                         "v8",
3375                                                                                         NULL };
3376         parserDefinition *const def = parserNew ("JavaScript");
3377         def->extensions = extensions;
3378         def->aliases = aliases;
3379         /*
3380          * New definitions for parsing instead of regex
3381          */
3382         def->kindTable  = JsKinds;
3383         def->kindCount  = ARRAY_SIZE (JsKinds);
3384         def->parser             = findJsTags;
3385         def->initialize = initialize;
3386         def->finalize   = finalize;
3387         def->keywordTable = JsKeywordTable;
3388         def->keywordCount = ARRAY_SIZE (JsKeywordTable);
3389         def->useCork    = CORK_QUEUE|CORK_SYMTAB;
3390         def->requestAutomaticFQTag = true;
3391
3392         def->versionCurrent = 1;
3393         def->versionAge = 1;
3394
3395         return def;
3396 }