Merge pull request #3560 from techee/cancel_popups
[geany-mirror.git] / ctags / parsers / jscript.c
blob3888cbc7b83ab63198f615725f874955d13f902c
1 /*
2 * Copyright (c) 2003, Darren Hiebert
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
7 * This module contains functions for generating tags for JavaScript language
8 * files.
10 * Reference: http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf
12 * This is a good reference for different forms of the function statement:
13 * http://www.permadi.com/tutorial/jsFunc/
14 * Another good reference:
15 * http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide
19 * INCLUDE FILES
21 #include "general.h" /* must always come first */
22 #include <ctype.h> /* to define isalpha () */
23 #ifdef DEBUG
24 #include <stdio.h>
25 #endif
27 #ifdef HAVE_ICONV
28 #include <iconv.h>
29 #include <errno.h>
30 # ifdef WORDS_BIGENDIAN
31 # define INTERNAL_ENCODING "UTF-32BE"
32 # else
33 # define INTERNAL_ENCODING "UTF-32LE"
34 # endif /* WORDS_BIGENDIAN */
35 #endif
37 #include <string.h>
38 #include "debug.h"
39 #include "entry.h"
40 #include "keyword.h"
41 #include "numarray.h"
42 #include "parse.h"
43 #include "read.h"
44 #include "routines.h"
45 #include "vstring.h"
46 #include "objpool.h"
47 #include "options.h"
48 #include "mbcs.h"
49 #include "trace.h"
51 #include "jscript.h"
54 * MACROS
56 #define isType(token,t) (bool) ((token)->type == (t))
57 #define isKeyword(token,k) (bool) ((token)->keyword == (k))
58 #define newToken() (objPoolGet (TokenPool))
59 #define deleteToken(t) (objPoolPut (TokenPool, (t)))
62 * DATA DECLARATIONS
65 /* Used to specify type of keyword.
67 enum eKeywordId {
68 KEYWORD_function,
69 KEYWORD_capital_function,
70 KEYWORD_capital_object,
71 KEYWORD_prototype,
72 KEYWORD_var,
73 KEYWORD_let,
74 KEYWORD_const,
75 KEYWORD_new,
76 KEYWORD_this,
77 KEYWORD_for,
78 KEYWORD_while,
79 KEYWORD_do,
80 KEYWORD_if,
81 KEYWORD_else,
82 KEYWORD_switch,
83 KEYWORD_try,
84 KEYWORD_catch,
85 KEYWORD_finally,
86 KEYWORD_sap,
87 KEYWORD_return,
88 KEYWORD_class,
89 KEYWORD_extends,
90 KEYWORD_static,
91 KEYWORD_default,
92 KEYWORD_export,
93 KEYWORD_async,
94 KEYWORD_get,
95 KEYWORD_set,
97 typedef int keywordId; /* to allow KEYWORD_NONE */
99 typedef enum eTokenType {
100 TOKEN_UNDEFINED,
101 TOKEN_EOF,
102 TOKEN_CHARACTER,
103 TOKEN_CLOSE_PAREN,
104 TOKEN_SEMICOLON,
105 TOKEN_COLON,
106 TOKEN_COMMA,
107 TOKEN_KEYWORD,
108 TOKEN_OPEN_PAREN,
109 TOKEN_IDENTIFIER,
110 TOKEN_STRING,
111 TOKEN_TEMPLATE_STRING,
112 TOKEN_PERIOD,
113 TOKEN_OPEN_CURLY,
114 TOKEN_CLOSE_CURLY,
115 TOKEN_EQUAL_SIGN,
116 TOKEN_OPEN_SQUARE,
117 TOKEN_CLOSE_SQUARE,
118 TOKEN_REGEXP,
119 TOKEN_POSTFIX_OPERATOR,
120 TOKEN_STAR,
121 /* To handle Babel's decorators.
122 * Used only in readTokenFull or lower functions. */
123 TOKEN_ATMARK,
124 TOKEN_BINARY_OPERATOR,
125 TOKEN_ARROW,
126 TOKEN_DOTS, /* ... */
127 } tokenType;
129 typedef struct sTokenInfo {
130 tokenType type;
131 keywordId keyword;
132 vString * string;
133 int scope;
134 unsigned long lineNumber;
135 MIOPos filePosition;
136 int nestLevel;
137 bool dynamicProp;
138 int c;
139 } tokenInfo;
142 * DATA DEFINITIONS
145 static tokenType LastTokenType;
146 static tokenInfo *NextToken;
148 static langType Lang_js;
150 static objPool *TokenPool = NULL;
152 #ifdef HAVE_ICONV
153 static iconv_t JSUnicodeConverter = (iconv_t) -2;
154 #endif
157 * "chain element" role is introduced when adapting the JavaScript parser
158 * to corkAPI.
160 * In the corkAPI, a cork index returned from makeTagEntry() can
161 * represent a scope of another tag. Let's think about `input-0.js' that
162 * the node command accepts as an input for ctags.
164 +---+ input-0.js ------------------------------------------------------
165 | 1 | class A {
166 | 2 | f = function(x) {
167 | 3 | return x
168 | 4 | }
169 | 5 | }
170 +---+------------------------------------------------------------------
172 * The following pseudo C code illustrate the code for
173 * tagging `A' and `f' in input-0.js:
174 +---+------------------------------------------------------------------
175 | |...
176 | | tagEntryFor e_for_A, e_for_f;
177 | | ...
178 | | int index_for_A = makeTagEntry (&e_for_A);
179 | | ...
180 |>>>| e_for_f.extensionFields.scopeIndex = index_for_A;
181 | | ...
182 | | makeTagEntry (&e_for_f);
183 | | ...
184 +---+------------------------------------------------------------------
186 * `index_for_A' represents "A" in "class A".
187 * `f' is defined in `A'. To fill the scope field of the tag for `f',
188 * `scopeIndex' member of the tag is filled with `index_for_A' at line |>>>|.
190 * If `A' is defined in the input source file, this technique based on
191 * the cork API works fine. However, if `A' is not defined in the input
192 * source file, the technique doesn't work well.
193 +---+ input-1.js -------------------------------------------------------
194 | 1 | import {A} from 'input-0.js';
195 | 2 | A.g = function(x) {
196 | 3 | return x
197 | 4 | }
198 +---+------------------------------------------------------------------
200 * In this case, `A' may be defined in input-0.js.
201 * The current implementation of ctags processes file by file; it doesn't
202 * use the knowledge in other input source files than current input source
203 * file. ctags processing input-1.js doesn't know the cork index for `A'.
205 * When tagging `g' with "function" kind, how can we fill the scope field
206 * of the tag for `g'?
208 * Here the "chain element" role comes.
209 * This role is used for tagging `z' in "x.y.z" in the case when ctags
210 * doesn't see the definitions for `x' and `y'.
211 * The JavaScript parser makes reference tags for `x' and `'y' with
212 * "chain element" role. makeTagEntry() returns a cork index regardless the
213 * type of tags (definition or reference).
214 * The index for the reference tag for `y' can be used to fill the scope
215 * field of the tag for `z'. The index for `x' can be used to fill the
216 * field for `y'.
218 * With these trick and technique, the scope field for `g' is filled:
219 +---+ tags for input-1.js ---------------------------------------------
220 | 1 | A input-1.js /^A.g = function(x) {$/;" f roles:chainElt extras:reference
221 | 2 | g input-1.js /^A.g = function(x) {$/;" f scope:function:A signature:(x) roles:def
222 +---+------------------------------------------------------------------
224 * By default, reference tags are not emitted. So non-ctags-expert users may
225 * not see the tag entry for `A'.
227 * makeJsRefTagsForNameChain() and makeJsTagCommon() implement the trick
228 * and technique.
230 * Arguable points:
232 * Is "chain element(chainElt)" suitable name for people familier with JavaScript?
234 * Kinds assigned to the tag having chainElt role must revised. Eventually
235 * we may need to introduce "unknown" kind like the Python parser. Assigning
236 * "function" kind to `A' in input-1.js is obviously wrong.
239 typedef enum {
240 JS_VARIABLE_CHAINELT,
241 } jsVariableRole;
243 typedef enum {
244 JS_CLASS_CHAINELT,
245 } jsClassRole;
247 static roleDefinition JsFunctionRoles [] = {
248 /* Currently V parser wants this items. */
249 { true, "foreigndecl", "declared in foreign languages" },
252 static roleDefinition JsVariableRoles [] = {
253 { false, "chainElt", "(EXPERIMENTAL)used as an element in a name chain like a.b.c" },
256 static roleDefinition JsClassRoles [] = {
257 { false, "chainElt", "(EXPERIMENTAL)used as an element in a name chain like a.b.c" },
260 static kindDefinition JsKinds [] = {
261 { true, 'f', "function", "functions",
262 .referenceOnly = false, ATTACH_ROLES(JsFunctionRoles) },
263 { true, 'c', "class", "classes",
264 .referenceOnly = false, ATTACH_ROLES(JsClassRoles) },
265 { true, 'm', "method", "methods" },
266 { true, 'p', "property", "properties" },
267 { true, 'C', "constant", "constants" },
268 { true, 'v', "variable", "global variables",
269 .referenceOnly = false, ATTACH_ROLES(JsVariableRoles) },
270 { true, 'g', "generator", "generators" },
271 { true, 'G', "getter", "getters" },
272 { true, 'S', "setter", "setters" },
273 { true, 'M', "field", "fields" },
276 static const keywordTable JsKeywordTable [] = {
277 /* keyword keyword ID */
278 { "function", KEYWORD_function },
279 { "Function", KEYWORD_capital_function },
280 { "Object", KEYWORD_capital_object },
281 { "prototype", KEYWORD_prototype },
282 { "var", KEYWORD_var },
283 { "let", KEYWORD_let },
284 { "const", KEYWORD_const },
285 { "new", KEYWORD_new },
286 { "this", KEYWORD_this },
287 { "for", KEYWORD_for },
288 { "while", KEYWORD_while },
289 { "do", KEYWORD_do },
290 { "if", KEYWORD_if },
291 { "else", KEYWORD_else },
292 { "switch", KEYWORD_switch },
293 { "try", KEYWORD_try },
294 { "catch", KEYWORD_catch },
295 { "finally", KEYWORD_finally },
296 { "sap", KEYWORD_sap },
297 { "return", KEYWORD_return },
298 { "class", KEYWORD_class },
299 { "extends", KEYWORD_extends },
300 { "static", KEYWORD_static },
301 { "default", KEYWORD_default },
302 { "export", KEYWORD_export },
303 { "async", KEYWORD_async },
304 { "get", KEYWORD_get },
305 { "set", KEYWORD_set },
309 * FUNCTION DEFINITIONS
312 /* Recursive functions */
313 static void readTokenFull (tokenInfo *const token, bool include_newlines, vString *const repr);
314 static void skipArgumentList (tokenInfo *const token, bool include_newlines, vString *const repr);
315 static bool parseFunction (tokenInfo *const token, tokenInfo *const name, const bool is_inside_class);
316 static bool parseBlock (tokenInfo *const token, int parent_scope);
317 static bool parseMethods (tokenInfo *const token, int class_index, const bool is_es6_class);
318 static bool parseLine (tokenInfo *const token, bool is_inside_class);
319 static void parseUI5 (tokenInfo *const token);
321 #ifdef DO_TRACING
322 static const char *tokenTypeName(enum eTokenType e);
323 static const char* getNameStringForCorkIndex(int index);
324 static const char* getKindStringForCorkIndex(int index);
325 static const char *kindName(jsKind kind);
326 // #define DO_TRACING_USE_DUMP_TOKEN
327 #ifdef DO_TRACING_USE_DUMP_TOKEN
328 static void dumpToken (const tokenInfo *const token);
329 #endif
330 #endif
332 static void *newPoolToken (void *createArg CTAGS_ATTR_UNUSED)
334 tokenInfo *token = xMalloc (1, tokenInfo);
336 token->string = vStringNew ();
337 token->scope = CORK_NIL;
339 return token;
342 static void clearPoolToken (void *data)
344 tokenInfo *token = data;
346 token->type = TOKEN_UNDEFINED;
347 token->keyword = KEYWORD_NONE;
348 token->nestLevel = 0;
349 token->dynamicProp = false;
350 token->lineNumber = getInputLineNumber ();
351 token->filePosition = getInputFilePosition ();
352 vStringClear (token->string);
353 token->scope = CORK_NIL;
356 static void deletePoolToken (void *data)
358 tokenInfo *token = data;
359 vStringDelete (token->string);
360 eFree (token);
363 static void copyToken (tokenInfo *const dest, const tokenInfo *const src,
364 bool const include_non_read_info)
366 dest->lineNumber = src->lineNumber;
367 dest->filePosition = src->filePosition;
368 dest->type = src->type;
369 dest->keyword = src->keyword;
370 dest->dynamicProp = src->dynamicProp;
371 dest->c = src->c;
372 vStringCopy(dest->string, src->string);
373 if (include_non_read_info)
375 dest->nestLevel = src->nestLevel;
376 dest->scope = src->scope;
380 static void injectDynamicName (tokenInfo *const token, vString *newName)
382 token->dynamicProp = true;
383 vStringDelete (token->string);
384 token->string = newName;
388 * Tag generation functions
391 struct bestJSEntryInScopeData {
392 int index;
395 static bool findBestJSEntry (int corkIndex, tagEntryInfo *entry, void *cb_data)
397 struct bestJSEntryInScopeData *data = cb_data;
399 if (isRoleAssigned (entry, ROLE_DEFINITION_INDEX))
401 data->index = corkIndex;
402 return false;
405 if (data->index == CORK_NIL || data->index > corkIndex)
406 data->index = corkIndex;
408 return true;
411 static int bestJSEntryInScope(int scope, const char *name)
413 /* If the SCOPE has a tag entry having NAME, the tag is the best
414 * even if there are reference tag entries having NAME.
415 * If the scope has only reference tag entries having NAME, the
416 * tag having smallest cork index is the best.
419 struct bestJSEntryInScopeData data = {
420 .index = CORK_NIL,
422 foreachEntriesInScope (scope, name, findBestJSEntry, &data);
423 return data.index;
426 static int makeJsRefTagsForNameChain (char *name_chain, const tokenInfo *token, int leaf_kind, int scope)
428 /* To fill the scope field of "d" of "a.b.c.d",
429 * "c" must be tagged if the cork API is used.
430 * ----------------------------------------------------------
431 * How the fields for "a", "b", and "c" are filled.
432 * a kind:class scope:<given by SCOPE> roles:chainElt
433 * b kind:class scope:class:a roles:chainElt
435 * The fields of c depends on LEAF_KIND that is passed to this functions.
437 * if (LEAF_KIND == FUNCTION)
438 * c kind:function scope:class:b roles:chainElt
439 * else
440 * c kind:class scope:class:b roles:chainElt
443 const char *name = name_chain;
444 char *next = strchr(name_chain, '.');
445 if (next)
446 *next = '\0';
447 int index = bestJSEntryInScope (scope, name);
449 if (index == CORK_NIL)
451 tagEntryInfo e;
452 int kind = JSTAG_CLASS;
453 int role = JS_CLASS_CHAINELT;
454 if (next == NULL && leaf_kind == JSTAG_FUNCTION)
457 * If we're creating a function (and not a method),
458 * assume the parent is a plain variable.
460 kind = JSTAG_VARIABLE;
461 role = JS_VARIABLE_CHAINELT;
464 initRefTagEntry (&e, name, kind, role);
465 updateTagLine (&e, token->lineNumber, token->filePosition);
466 e.extensionFields.scopeIndex = scope;
468 index = makeTagEntry (&e);
469 /* We should remove this condition. We should fix the callers passing
470 * an empty name instead. makeTagEntry() returns CORK_NIL if the tag
471 * name is empty. */
472 if (index != CORK_NIL)
473 registerEntry (index);
476 return next
477 ? makeJsRefTagsForNameChain (next + 1, token, leaf_kind, index)
478 : index;
481 static int makeJsTagCommon (const tokenInfo *const token, const jsKind kind,
482 vString *const signature, vString *const inheritance,
483 bool anonymous)
485 int index = CORK_NIL;
486 const char *name = vStringValue (token->string);
488 const char *p;
489 char *name_chain = NULL;
490 if (!token->dynamicProp && kind != JSTAG_PROPERTY && (p = strrchr (name, '.')) != NULL )
492 if ((p - name) != 0)
493 name_chain = eStrndup (name, (size_t) (p - name));
494 name = p + 1;
495 if (name[0] == '\0')
496 return CORK_NIL;
499 int scope = token->scope;
500 if (name_chain)
502 scope = makeJsRefTagsForNameChain (name_chain, token, kind, scope);
503 eFree (name_chain);
507 * Check whether NAME is already defined in SCOPE.
508 * If the NAME is already defined, return the cork index for the NAME.
510 if (kind == JSTAG_FUNCTION || kind == JSTAG_CLASS)
512 index = anyKindEntryInScope (scope, name, kind, true);
513 if (index != CORK_NIL)
514 return index;
517 tagEntryInfo e;
518 initTagEntry (&e, name, kind);
519 updateTagLine (&e, token->lineNumber, token->filePosition);
520 e.extensionFields.scopeIndex = scope;
522 #ifdef DO_TRACING
524 const char *scope_str = getNameStringForCorkIndex (scope);
525 const char *scope_kind_str = getKindStringForCorkIndex (scope);
526 TRACE_PRINT("Emitting tag for symbol '%s' of kind %s with scope '%s:%s'", name, kindName(kind), scope_kind_str, scope_str);
528 #endif
530 if (signature && vStringLength(signature))
532 size_t i;
533 /* sanitize signature by replacing all control characters with a
534 * space (because it's simple).
535 * there should never be any junk in a valid signature, but who
536 * knows what the user wrote and CTags doesn't cope well with weird
537 * characters. */
538 for (i = 0; i < signature->length; i++)
540 unsigned char c = (unsigned char) vStringChar (signature, i);
541 if (c < 0x20 /* below space */ || c == 0x7F /* DEL */)
542 vStringChar (signature, i) = ' ';
544 e.extensionFields.signature = vStringValue(signature);
547 if (inheritance)
548 e.extensionFields.inheritance = vStringValue(inheritance);
550 if (anonymous)
551 markTagExtraBit (&e, XTAG_ANONYMOUS);
553 index = makeTagEntry (&e);
554 /* We shold remove This condition. We should fix the callers passing
555 * an empty name instead. makeTagEntry() returns CORK_NIL if the tag
556 * name is empty. */
557 if (index != CORK_NIL)
558 registerEntry (index);
560 return index;
563 static int makeJsTag (const tokenInfo *const token, const jsKind kind,
564 vString *const signature, vString *const inheritance)
566 return makeJsTagCommon (token, kind, signature, inheritance, false);
569 static int makeClassTagCommon (tokenInfo *const token, vString *const signature,
570 vString *const inheritance, bool anonymous)
572 return makeJsTagCommon (token, JSTAG_CLASS, signature, inheritance, anonymous);
575 static int makeClassTag (tokenInfo *const token, vString *const signature,
576 vString *const inheritance)
578 return makeClassTagCommon (token, signature, inheritance, false);
581 static int makeFunctionTagCommon (tokenInfo *const token, vString *const signature,
582 bool generator, bool anonymous)
584 return makeJsTagCommon (token, generator ? JSTAG_GENERATOR : JSTAG_FUNCTION, signature, NULL,
585 anonymous);
588 static int makeFunctionTag (tokenInfo *const token, vString *const signature, bool generator)
590 return makeFunctionTagCommon (token, signature, generator, false);
593 static bool isClassName (tokenInfo *const name)
595 char * p = strrchr(vStringValue (name->string), '.');
596 if (p == NULL)
597 p = vStringValue (name->string);
598 else
599 p++;
601 return isupper((unsigned char) *p);
605 * Parsing functions
608 /* given @p point, returns the first byte of the encoded output sequence, and
609 * make sure the next ones will be returned by calls to getcFromInputFile()
610 * as if the code point was simply written in the input file. */
611 static int handleUnicodeCodePoint (uint32_t point)
613 int c = (int) point;
615 Assert (point < 0x110000);
617 #ifdef HAVE_ICONV
618 /* if we do have iconv and the encodings are specified, use this */
619 if (isConverting () && JSUnicodeConverter == (iconv_t) -2)
621 /* if we didn't try creating the converter yet, try and do so */
622 JSUnicodeConverter = iconv_open (getLanguageEncoding (Lang_js), INTERNAL_ENCODING);
624 if (isConverting () && JSUnicodeConverter != (iconv_t) -1)
626 char *input_ptr = (char *) &point;
627 size_t input_left = sizeof point;
628 /* 4 bytes should be enough for any encoding (it's how much UTF-32
629 * would need). */
630 /* FIXME: actually iconv has a tendency to output a BOM for Unicode
631 * encodings where it matters when the endianness is not specified in
632 * the target encoding name. E.g., if the target encoding is "UTF-32"
633 * or "UTF-16" it will output 2 code points, the BOM (U+FEFF) and the
634 * one we expect. This does not happen if the endianness is specified
635 * explicitly, e.g. with "UTF-32LE", or "UTF-16BE".
636 * However, it's not very relevant for the moment as nothing in CTags
637 * cope well (if at all) with non-ASCII-compatible encodings like
638 * UTF-32 or UTF-16 anyway. */
639 char output[4] = { 0 };
640 char *output_ptr = output;
641 size_t output_left = ARRAY_SIZE (output);
643 if (iconv (JSUnicodeConverter, &input_ptr, &input_left, &output_ptr, &output_left) == (size_t) -1)
645 /* something went wrong, which probably means the output encoding
646 * cannot represent the character. Use a placeholder likely to be
647 * supported instead, that's also valid in an identifier */
648 verbose ("JavaScript: Encoding: %s\n", strerror (errno));
649 c = '_';
651 else
653 const size_t output_len = ARRAY_SIZE (output) - output_left;
655 /* put all but the first byte back so that getcFromInputFile() will
656 * return them in the right order */
657 for (unsigned int i = 1; i < output_len; i++)
658 ungetcToInputFile ((unsigned char) output[output_len - i]);
659 c = (unsigned char) output[0];
662 iconv (JSUnicodeConverter, NULL, NULL, NULL, NULL);
664 else
665 #endif
667 /* when no encoding is specified (or no iconv), assume UTF-8 is good.
668 * Why UTF-8? Because it's an ASCII-compatible common Unicode encoding. */
669 if (point < 0x80)
670 c = (unsigned char) point;
671 else if (point < 0x800)
673 c = (unsigned char) (0xc0 | ((point >> 6) & 0x1f));
674 ungetcToInputFile ((unsigned char) (0x80 | (point & 0x3f)));
676 else if (point < 0x10000)
678 c = (unsigned char) (0xe0 | ((point >> 12) & 0x0f));
679 ungetcToInputFile ((unsigned char) (0x80 | ((point >> 0) & 0x3f)));
680 ungetcToInputFile ((unsigned char) (0x80 | ((point >> 6) & 0x3f)));
682 else if (point < 0x110000)
684 c = (unsigned char) (0xf0 | ((point >> 18) & 0x07));
685 ungetcToInputFile ((unsigned char) (0x80 | ((point >> 0) & 0x3f)));
686 ungetcToInputFile ((unsigned char) (0x80 | ((point >> 6) & 0x3f)));
687 ungetcToInputFile ((unsigned char) (0x80 | ((point >> 12) & 0x3f)));
691 return c;
694 /* reads a Unicode escape sequence after the "\" prefix.
695 * @param value Location to store the escape sequence value.
696 * @param isUTF16 Location to store whether @param value is an UTF-16 word.
697 * @returns Whether a valid sequence was read. */
698 static bool readUnicodeEscapeSequenceValue (uint32_t *const value,
699 bool *const isUTF16)
701 bool valid = false;
702 int d = getcFromInputFile ();
704 if (d != 'u')
705 ungetcToInputFile (d);
706 else
708 int e = getcFromInputFile ();
709 char cp[6 + 1]; /* up to 6 hex + possible closing '}' or invalid char */
710 unsigned int cp_len = 0;
712 *isUTF16 = (e != '{');
713 if (e == '{')
714 { /* Handles Unicode code point escapes: \u{ HexDigits }
715 * We skip the leading 0s because there can be any number of them
716 * and they don't change any meaning. */
717 bool has_leading_zero = false;
718 int l;
720 while ((cp[cp_len] = (char) (l = getcFromInputFile ())) == '0')
721 has_leading_zero = true;
723 while (isxdigit (l) && ++cp_len < ARRAY_SIZE (cp))
724 cp[cp_len] = (char) (l = getcFromInputFile ());
725 valid = ((cp_len > 0 || has_leading_zero) &&
726 cp_len < ARRAY_SIZE (cp) && cp[cp_len] == '}' &&
727 /* also check if it's a valid Unicode code point */
728 (cp_len < 6 ||
729 (cp_len == 6 && strncmp (cp, "110000", 6) < 0)));
730 if (! valid) /* put back the last (likely invalid) character */
731 ungetcToInputFile (l);
733 else
734 { /* Handles Unicode escape sequences: \u Hex4Digits */
735 int l;
737 cp[cp_len] = (char) (l = ((cp_len == 0) ? e : getcFromInputFile ()));
738 while (isxdigit (l) && ++cp_len < 4);
739 valid = (cp_len == 4);
742 if (! valid)
744 /* we don't get every character back, but it would require to
745 * be able to put up to 9 characters back (in the worst case
746 * for handling invalid \u{10FFFFx}), and here we're recovering
747 * from invalid syntax anyway. */
748 ungetcToInputFile (e);
749 ungetcToInputFile (d);
751 else
753 *value = 0;
754 for (unsigned int i = 0; i < cp_len; i++)
756 *value *= 16;
758 /* we know it's a hex digit, no need to double check */
759 if (cp[i] < 'A')
760 *value += (unsigned int) cp[i] - '0';
761 else if (cp[i] < 'a')
762 *value += 10 + (unsigned int) cp[i] - 'A';
763 else
764 *value += 10 + (unsigned int) cp[i] - 'a';
769 return valid;
772 static int valueToXDigit (unsigned char v)
774 Assert (v <= 0xF);
776 if (v >= 0xA)
777 return 'A' + (v - 0xA);
778 else
779 return '0' + v;
782 /* Reads and expands a Unicode escape sequence after the "\" prefix. If the
783 * escape sequence is a UTF16 high surrogate, also try and read the low
784 * surrogate to emit the proper code point.
785 * @param fallback The character to return if the sequence is invalid. Usually
786 * this would be the '\' character starting the sequence.
787 * @returns The first byte of the sequence, or @param fallback if the sequence
788 * is invalid. On success, next calls to getcFromInputFile() will
789 * return subsequent bytes (if any). */
790 static int readUnicodeEscapeSequence (const int fallback)
792 int c;
793 uint32_t value;
794 bool isUTF16;
796 if (! readUnicodeEscapeSequenceValue (&value, &isUTF16))
797 c = fallback;
798 else
800 if (isUTF16 && (value & 0xfc00) == 0xd800)
801 { /* this is a high surrogate, try and read its low surrogate and
802 * emit the resulting code point */
803 uint32_t low;
804 int d = getcFromInputFile ();
806 if (d != '\\' || ! readUnicodeEscapeSequenceValue (&low, &isUTF16))
807 ungetcToInputFile (d);
808 else if (! isUTF16)
809 { /* not UTF-16 low surrogate but a plain code point */
810 d = handleUnicodeCodePoint (low);
811 ungetcToInputFile (d);
813 else if ((low & 0xfc00) != 0xdc00)
814 { /* not a low surrogate, so put back the escaped representation
815 * in case it was another high surrogate we should read as part
816 * of another pair. */
817 ungetcToInputFile (valueToXDigit ((unsigned char) ((low & 0x000f) >> 0)));
818 ungetcToInputFile (valueToXDigit ((unsigned char) ((low & 0x00f0) >> 4)));
819 ungetcToInputFile (valueToXDigit ((unsigned char) ((low & 0x0f00) >> 8)));
820 ungetcToInputFile (valueToXDigit ((unsigned char) ((low & 0xf000) >> 12)));
821 ungetcToInputFile ('u');
822 ungetcToInputFile ('\\');
824 else
825 value = 0x010000 + ((value & 0x03ff) << 10) + (low & 0x03ff);
827 c = handleUnicodeCodePoint (value);
830 return c;
833 static void parseString (vString *const string, const int delimiter)
835 bool end = false;
836 while (! end)
838 int c = getcFromInputFile ();
839 if (c == EOF)
840 end = true;
841 else if (c == '\\')
843 /* Eat the escape sequence (\", \', etc). We properly handle
844 * <LineContinuation> by eating a whole \<CR><LF> not to see <LF>
845 * as an unescaped character, which is invalid and handled below.
846 * Also, handle the fact that <LineContinuation> produces an empty
847 * sequence.
848 * See ECMA-262 7.8.4 */
849 c = getcFromInputFile ();
850 if (c == 'u')
852 ungetcToInputFile (c);
853 c = readUnicodeEscapeSequence ('\\');
854 vStringPut (string, c);
856 else if (c != '\r' && c != '\n')
857 vStringPut(string, c);
858 else if (c == '\r')
860 c = getcFromInputFile();
861 if (c != '\n')
862 ungetcToInputFile (c);
865 else if (c == delimiter)
866 end = true;
867 else if (c == '\r' || c == '\n')
869 /* those are invalid when not escaped */
870 end = true;
871 /* we don't want to eat the newline itself to let the automatic
872 * semicolon insertion code kick in */
873 ungetcToInputFile (c);
875 else
876 vStringPut (string, c);
880 static void parseRegExp (void)
882 int c;
883 bool in_range = false;
887 c = getcFromInputFile ();
888 if (! in_range && c == '/')
890 do /* skip flags */
892 c = getcFromInputFile ();
893 } while (isalpha (c));
894 ungetcToInputFile (c);
895 break;
897 else if (c == '\n' || c == '\r')
899 /* invalid in a regex */
900 ungetcToInputFile (c);
901 break;
903 else if (c == '\\')
904 c = getcFromInputFile (); /* skip next character */
905 else if (c == '[')
906 in_range = true;
907 else if (c == ']')
908 in_range = false;
909 } while (c != EOF);
912 /* Read a C identifier beginning with "first_char" and places it into
913 * "name".
916 static int include_period_in_identifier = 0;
918 static void accept_period_in_identifier(bool incl)
920 if (incl)
922 include_period_in_identifier++;
924 else if (!incl && include_period_in_identifier > 0)
926 include_period_in_identifier--;
930 static bool isIdentChar(const int c)
932 return (isalpha (c) || isdigit (c) || c == '$' || \
933 c == '@' || c == '_' || c == '#' || \
934 c >= 0x80 || (include_period_in_identifier > 0 && c == '.'));
937 static void parseIdentifier (vString *const string, const int first_char)
939 int c = first_char;
940 Assert (isIdentChar (c));
943 vStringPut (string, c);
944 c = getcFromInputFile ();
945 if (c == '\\')
946 c = readUnicodeEscapeSequence (c);
947 } while (isIdentChar (c));
948 /* if readUnicodeEscapeSequence() read an escape sequence this is incorrect,
949 * as we should actually put back the whole escape sequence and not the
950 * decoded character. However, it's not really worth the hassle as it can
951 * only happen if the input has an invalid escape sequence. */
952 ungetcToInputFile (c); /* unget non-identifier character */
955 static void parseTemplateString (vString *const string)
957 int c;
960 c = getcFromInputFile ();
961 if (c == '`' || c == EOF)
962 break;
964 vStringPut (string, c);
966 if (c == '\\')
968 c = getcFromInputFile();
969 if (c != EOF)
970 vStringPut(string, c);
972 else if (c == '$')
974 c = getcFromInputFile ();
975 if (c != '{')
976 ungetcToInputFile (c);
977 else
979 int depth = 1;
980 /* we need to use the real token machinery to handle strings,
981 * comments, regexes and whatnot */
982 tokenInfo *token = newToken ();
983 LastTokenType = TOKEN_UNDEFINED;
984 vStringPut(string, c);
987 readTokenFull (token, false, string);
988 if (isType (token, TOKEN_OPEN_CURLY))
989 depth++;
990 else if (isType (token, TOKEN_CLOSE_CURLY))
991 depth--;
993 while (! isType (token, TOKEN_EOF) && depth > 0);
994 deleteToken (token);
998 while (c != EOF);
1001 static void reprToken (const tokenInfo *const token, vString *const repr)
1003 switch (token->type)
1005 case TOKEN_DOTS:
1006 vStringCatS (repr, "...");
1007 break;
1009 case TOKEN_STRING:
1010 case TOKEN_TEMPLATE_STRING:
1011 vStringPut (repr, token->c);
1012 vStringCat (repr, token->string);
1013 vStringPut (repr, token->c);
1014 break;
1016 case TOKEN_IDENTIFIER:
1017 case TOKEN_KEYWORD:
1018 vStringCat (repr, token->string);
1019 break;
1021 default:
1022 vStringPut (repr, token->c);
1023 break;
1027 static void readTokenFullRaw (tokenInfo *const token, bool include_newlines, vString *const repr)
1029 int c;
1030 int i;
1031 bool newline_encountered = false;
1033 /* if we've got a token held back, emit it */
1034 if (NextToken)
1036 TRACE_PRINT("Emitting held token");
1037 copyToken (token, NextToken, false);
1038 deleteToken (NextToken);
1039 NextToken = NULL;
1040 if (repr)
1041 reprToken (token, repr);
1042 return;
1045 token->type = TOKEN_UNDEFINED;
1046 token->keyword = KEYWORD_NONE;
1047 vStringClear (token->string);
1049 getNextChar:
1050 i = 0;
1053 c = getcFromInputFile ();
1054 if (include_newlines && (c == '\r' || c == '\n'))
1055 newline_encountered = true;
1056 i++;
1058 while (c == '\t' || c == ' ' || c == '\r' || c == '\n');
1060 token->lineNumber = getInputLineNumber ();
1061 token->filePosition = getInputFilePosition ();
1063 /* special case to insert a separator */
1064 if (repr && c != EOF && i > 1)
1065 vStringPut (repr, ' ');
1067 token->c = c;
1069 switch (c)
1071 case EOF: token->type = TOKEN_EOF; break;
1072 case '(': token->type = TOKEN_OPEN_PAREN; break;
1073 case ')': token->type = TOKEN_CLOSE_PAREN; break;
1074 case ';': token->type = TOKEN_SEMICOLON; break;
1075 case ',': token->type = TOKEN_COMMA; break;
1076 case '.':
1078 token->type = TOKEN_PERIOD;
1080 int d = getcFromInputFile ();
1081 if (d != '.')
1083 ungetcToInputFile (d);
1084 break;
1087 d = getcFromInputFile ();
1088 if (d != '.')
1090 ungetcToInputFile (d);
1091 ungetcToInputFile ('.');
1092 break;
1095 token->type = TOKEN_DOTS;
1096 break;
1098 case ':': token->type = TOKEN_COLON; break;
1099 case '{': token->type = TOKEN_OPEN_CURLY; break;
1100 case '}': token->type = TOKEN_CLOSE_CURLY; break;
1101 case '[': token->type = TOKEN_OPEN_SQUARE; break;
1102 case ']': token->type = TOKEN_CLOSE_SQUARE; break;
1104 case '=':
1106 int d = getcFromInputFile ();
1107 if (d == '>')
1108 token->type = TOKEN_ARROW;
1109 else
1111 ungetcToInputFile (d);
1112 token->type = TOKEN_EQUAL_SIGN;
1114 break;
1117 case '+':
1118 case '-':
1120 int d = getcFromInputFile ();
1121 if (d == c) /* ++ or -- */
1122 token->type = TOKEN_POSTFIX_OPERATOR;
1123 else
1125 ungetcToInputFile (d);
1126 token->type = TOKEN_BINARY_OPERATOR;
1128 break;
1131 case '*':
1132 token->type = TOKEN_STAR;
1133 break;
1134 case '%':
1135 case '?':
1136 case '>':
1137 case '<':
1138 case '^':
1139 case '|':
1140 case '&':
1141 token->type = TOKEN_BINARY_OPERATOR;
1142 break;
1144 case '\'':
1145 case '"':
1146 token->type = TOKEN_STRING;
1147 parseString (token->string, c);
1148 token->lineNumber = getInputLineNumber ();
1149 token->filePosition = getInputFilePosition ();
1150 break;
1152 case '`':
1153 token->type = TOKEN_TEMPLATE_STRING;
1154 parseTemplateString (token->string);
1155 token->lineNumber = getInputLineNumber ();
1156 token->filePosition = getInputFilePosition ();
1157 break;
1159 case '/':
1161 int d = getcFromInputFile ();
1162 if ( (d != '*') && /* is this the start of a comment? */
1163 (d != '/') ) /* is a one line comment? */
1165 ungetcToInputFile (d);
1166 switch (LastTokenType)
1168 case TOKEN_CHARACTER:
1169 case TOKEN_IDENTIFIER:
1170 case TOKEN_STRING:
1171 case TOKEN_TEMPLATE_STRING:
1172 case TOKEN_CLOSE_CURLY:
1173 case TOKEN_CLOSE_PAREN:
1174 case TOKEN_CLOSE_SQUARE:
1175 token->type = TOKEN_BINARY_OPERATOR;
1176 break;
1178 default:
1179 token->type = TOKEN_REGEXP;
1180 parseRegExp ();
1181 token->lineNumber = getInputLineNumber ();
1182 token->filePosition = getInputFilePosition ();
1183 break;
1186 else
1188 if (d == '*')
1190 skipToCharacterInInputFile2('*', '/');
1191 goto getNextChar;
1193 else if (d == '/') /* is this the start of a comment? */
1195 skipToCharacterInInputFile ('\n');
1196 /* if we care about newlines, put it back so it is seen */
1197 if (include_newlines)
1198 ungetcToInputFile ('\n');
1199 goto getNextChar;
1202 break;
1205 case '#':
1206 /* skip shebang in case of e.g. Node.js scripts */
1207 if (token->lineNumber > 1)
1208 token->type = TOKEN_UNDEFINED;
1209 else if ((c = getcFromInputFile ()) != '!')
1211 ungetcToInputFile (c);
1212 token->type = TOKEN_UNDEFINED;
1214 else
1216 skipToCharacterInInputFile ('\n');
1217 goto getNextChar;
1219 break;
1221 case '@':
1222 token->type = TOKEN_ATMARK;
1223 break;
1225 case '\\':
1226 c = readUnicodeEscapeSequence (c);
1227 /* fallthrough */
1228 default:
1229 if (! isIdentChar (c))
1230 token->type = TOKEN_UNDEFINED;
1231 else
1233 parseIdentifier (token->string, c);
1234 token->lineNumber = getInputLineNumber ();
1235 token->filePosition = getInputFilePosition ();
1236 token->keyword = lookupKeyword (vStringValue (token->string), Lang_js);
1237 if (isKeyword (token, KEYWORD_NONE))
1238 token->type = TOKEN_IDENTIFIER;
1239 else
1240 token->type = TOKEN_KEYWORD;
1242 break;
1245 if (include_newlines && newline_encountered)
1247 /* This isn't strictly correct per the standard, but following the
1248 * real rules means understanding all statements, and that's not
1249 * what the parser currently does. What we do here is a guess, by
1250 * avoiding inserting semicolons that would make the statement on
1251 * the left or right obviously invalid. Hopefully this should not
1252 * have false negatives (e.g. should not miss insertion of a semicolon)
1253 * but might have false positives (e.g. it will wrongfully emit a
1254 * semicolon sometimes, i.e. for the newline in "foo\n(bar)").
1255 * This should however be mostly harmless as we only deal with
1256 * newlines in specific situations where we know a false positive
1257 * wouldn't hurt too bad. */
1259 /* these already end a statement, so no need to duplicate it */
1260 #define IS_STMT_SEPARATOR(t) ((t) == TOKEN_SEMICOLON || \
1261 (t) == TOKEN_EOF || \
1262 (t) == TOKEN_COMMA || \
1263 (t) == TOKEN_OPEN_CURLY)
1264 /* these cannot be the start or end of a statement */
1265 #define IS_BINARY_OPERATOR(t) ((t) == TOKEN_EQUAL_SIGN || \
1266 (t) == TOKEN_ARROW || \
1267 (t) == TOKEN_COLON || \
1268 (t) == TOKEN_PERIOD || \
1269 (t) == TOKEN_STAR || \
1270 (t) == TOKEN_BINARY_OPERATOR)
1272 if (! IS_STMT_SEPARATOR(LastTokenType) &&
1273 ! IS_STMT_SEPARATOR(token->type) &&
1274 ! IS_BINARY_OPERATOR(LastTokenType) &&
1275 ! IS_BINARY_OPERATOR(token->type) &&
1276 /* these cannot be followed by a semicolon */
1277 ! (LastTokenType == TOKEN_OPEN_PAREN ||
1278 LastTokenType == TOKEN_OPEN_SQUARE))
1280 /* hold the token... */
1281 Assert (NextToken == NULL);
1282 NextToken = newToken ();
1283 copyToken (NextToken, token, false);
1285 /* ...and emit a semicolon instead */
1286 token->type = TOKEN_SEMICOLON;
1287 token->keyword = KEYWORD_NONE;
1288 vStringClear (token->string);
1289 token->c = '\n';
1292 #undef IS_STMT_SEPARATOR
1293 #undef IS_BINARY_OPERATOR
1296 LastTokenType = token->type;
1298 if (repr)
1299 reprToken (token, repr);
1302 /* whether something we consider a keyword (either because it sometimes is or
1303 * because of the parser's perks) is actually valid as a function name
1304 * See https://tc39.es/ecma262/multipage/ecmascript-language-lexical-grammar.html#sec-keywords-and-reserved-words */
1305 static bool canBeFunctionName (const tokenInfo *const token, bool strict_mode)
1307 switch (token->keyword)
1309 /* non-keywords specific to this parser */
1310 case KEYWORD_capital_function:
1311 case KEYWORD_capital_object:
1312 case KEYWORD_prototype:
1313 case KEYWORD_sap:
1314 /* syntactic, but not keyword:
1315 * as async from get meta of set target
1316 * "await" is OK as well */
1317 case KEYWORD_async:
1318 case KEYWORD_get:
1319 case KEYWORD_set:
1320 return true;
1322 /* strict-mode keywords
1323 * let static implements interface package private protected public
1324 * we need to also include those which are OK as function names
1325 * yield
1327 case KEYWORD_let:
1328 case KEYWORD_static:
1329 return ! strict_mode;
1331 default:
1332 return isType (token, TOKEN_IDENTIFIER);
1336 static bool canBePropertyName (const tokenInfo *const token)
1338 /* property names are pretty relaxed, any non reserved word is OK, even
1339 * strict-mode ones in strict-mode */
1340 return canBeFunctionName (token, false);
1343 /* See https://babeljs.io/blog/2018/09/17/decorators */
1344 static void skipBabelDecorator (tokenInfo *token, bool include_newlines, vString *const repr)
1346 readTokenFullRaw (token, include_newlines, repr);
1347 if (isType (token, TOKEN_OPEN_PAREN))
1349 /* @(complex ? dec1 : dec2) */
1350 skipArgumentList (token, include_newlines, repr);
1351 TRACE_PRINT ("found @(...) style decorator");
1353 else if (isType (token, TOKEN_IDENTIFIER))
1355 /* @namespace.foo (...) */
1356 bool found_period = false;
1357 while (1)
1359 readTokenFullRaw (token, include_newlines, repr);
1360 if (isType (token, TOKEN_IDENTIFIER))
1362 if (!found_period)
1364 TRACE_PRINT("found @namespace.bar style decorator");
1365 break;
1367 found_period = false;
1369 else if (isType (token, TOKEN_PERIOD))
1370 found_period = true;
1371 else if (isType (token, TOKEN_OPEN_PAREN))
1373 skipArgumentList (token, include_newlines, repr);
1374 TRACE_PRINT("found @foo(...) style decorator");
1375 break;
1377 else
1379 TRACE_PRINT("found @foo style decorator");
1380 break;
1384 else
1385 /* Unexpected token after @ */
1386 TRACE_PRINT("found unexpected token during skipping a decorator");
1389 static void readTokenFull (tokenInfo *const token, bool include_newlines, vString *const repr)
1391 readTokenFullRaw (token, include_newlines, repr);
1393 while (1)
1395 if (!isType (token, TOKEN_ATMARK))
1396 break;
1397 skipBabelDecorator (token, include_newlines, repr);
1398 /* @decorator0 @decorator1 ... There can be more than one decorator. */
1402 #ifdef DO_TRACING_USE_DUMP_TOKEN
1403 /* trace readTokenFull() */
1404 static void readTokenFullDebug (tokenInfo *const token, bool include_newlines, vString *const repr)
1406 readTokenFull (token, include_newlines, repr);
1407 dumpToken (token);
1409 # define readTokenFull readTokenFullDebug
1410 #endif
1412 static void readToken (tokenInfo *const token)
1414 readTokenFull (token, false, NULL);
1418 * Token parsing functions
1421 static int parseMethodsInAnonymousObject (tokenInfo *const token)
1423 int index = CORK_NIL;
1425 tokenInfo *const anon_object = newToken ();
1426 copyToken (anon_object, token, true);
1427 anonGenerate (anon_object->string, "anonymousObject", JSTAG_VARIABLE);
1428 anon_object->type = TOKEN_IDENTIFIER;
1430 index = makeJsTagCommon (anon_object, JSTAG_VARIABLE, NULL, NULL, true);
1431 if (! parseMethods (token, index, false))
1433 /* If no method is found, the anonymous object
1434 * should not be tagged.
1436 tagEntryInfo *e = getEntryInCorkQueue (index);
1437 if (e)
1438 markTagAsPlaceholder (e, true);
1439 index = CORK_NIL;
1442 deleteToken (anon_object);
1444 return index;
1447 static void skipArgumentList (tokenInfo *const token, bool include_newlines, vString *const repr)
1449 if (isType (token, TOKEN_OPEN_PAREN)) /* arguments? */
1451 int nest_level = 1;
1452 if (repr)
1453 vStringPut (repr, '(');
1455 tokenType prev_token_type = token->type;
1456 while (nest_level > 0 && ! isType (token, TOKEN_EOF))
1458 readTokenFull (token, false, repr);
1459 if (isType (token, TOKEN_OPEN_PAREN))
1460 nest_level++;
1461 else if (isType (token, TOKEN_CLOSE_PAREN))
1462 nest_level--;
1463 else if (isType (token, TOKEN_OPEN_CURLY))
1465 if (prev_token_type == TOKEN_ARROW)
1466 parseBlock (token, CORK_NIL);
1467 else
1468 parseMethodsInAnonymousObject (token);
1470 else if (isKeyword (token, KEYWORD_function))
1471 parseFunction (token, NULL, false);
1473 prev_token_type = token->type;
1475 readTokenFull (token, include_newlines, NULL);
1479 static void skipArrayList (tokenInfo *const token, bool include_newlines)
1482 * Handle square brackets
1483 * var name[1]
1484 * So we must check for nested open and closing square brackets
1487 if (isType (token, TOKEN_OPEN_SQUARE)) /* arguments? */
1489 int nest_level = 1;
1490 tokenType prev_token_type = token->type;
1491 while (nest_level > 0 && ! isType (token, TOKEN_EOF))
1493 readToken (token);
1494 if (isType (token, TOKEN_OPEN_SQUARE))
1495 nest_level++;
1496 else if (isType (token, TOKEN_CLOSE_SQUARE))
1497 nest_level--;
1498 else if (isType (token, TOKEN_OPEN_CURLY))
1500 if (prev_token_type == TOKEN_ARROW)
1501 parseBlock (token, CORK_NIL);
1502 else
1503 parseMethodsInAnonymousObject (token);
1506 prev_token_type = token->type;
1508 readTokenFull (token, include_newlines, NULL);
1512 static void skipQualifiedIdentifier (tokenInfo *const token)
1514 /* Skip foo.bar.baz */
1515 while (isType (token, TOKEN_IDENTIFIER))
1517 readToken (token);
1518 if (isType (token, TOKEN_PERIOD))
1519 readToken (token);
1520 else
1521 break;
1525 static void addContext (tokenInfo* const parent, const tokenInfo* const child)
1527 vStringJoin (parent->string, '.', child->string);
1531 * Scanning functions
1534 static bool findCmdTerm (tokenInfo *const token, bool include_newlines, bool include_commas)
1537 * Read until we find either a semicolon or closing brace.
1538 * Any nested braces will be handled within.
1540 while (! isType (token, TOKEN_SEMICOLON) &&
1541 ! isType (token, TOKEN_CLOSE_CURLY) &&
1542 ! (include_commas && isType (token, TOKEN_COMMA)) &&
1543 ! isType (token, TOKEN_EOF))
1545 /* Handle nested blocks */
1546 if ( isType (token, TOKEN_OPEN_CURLY))
1548 parseBlock (token, CORK_NIL);
1549 readTokenFull (token, include_newlines, NULL);
1551 else if ( isType (token, TOKEN_OPEN_PAREN) )
1552 skipArgumentList(token, include_newlines, NULL);
1553 else if ( isType (token, TOKEN_OPEN_SQUARE) )
1554 skipArrayList(token, include_newlines);
1555 else
1556 readTokenFull (token, include_newlines, NULL);
1559 return isType (token, TOKEN_SEMICOLON);
1562 static void parseSwitch (tokenInfo *const token)
1565 * switch (expression) {
1566 * case value1:
1567 * statement;
1568 * break;
1569 * case value2:
1570 * statement;
1571 * break;
1572 * default : statement;
1576 readToken (token);
1578 if (isType (token, TOKEN_OPEN_PAREN))
1580 skipArgumentList(token, false, NULL);
1583 if (isType (token, TOKEN_OPEN_CURLY))
1585 parseBlock (token, CORK_NIL);
1589 static bool parseLoop (tokenInfo *const token)
1592 * Handles these statements
1593 * for (x=0; x<3; x++)
1594 * document.write("This text is repeated three times<br>");
1596 * for (x=0; x<3; x++)
1598 * document.write("This text is repeated three times<br>");
1601 * while (number<5){
1602 * document.write(number+"<br>");
1603 * number++;
1606 * do{
1607 * document.write(number+"<br>");
1608 * number++;
1610 * while (number<5);
1612 bool is_terminated = true;
1614 if (isKeyword (token, KEYWORD_for) || isKeyword (token, KEYWORD_while))
1616 readToken(token);
1618 if (isType (token, TOKEN_OPEN_PAREN))
1619 skipArgumentList(token, false, NULL);
1621 if (isType (token, TOKEN_OPEN_CURLY))
1622 parseBlock (token, CORK_NIL);
1623 else
1624 is_terminated = parseLine(token, false);
1626 else if (isKeyword (token, KEYWORD_do))
1628 readToken(token);
1630 if (isType (token, TOKEN_OPEN_CURLY))
1631 parseBlock (token, CORK_NIL);
1632 else
1633 is_terminated = parseLine(token, false);
1635 if (is_terminated)
1636 readToken(token);
1638 if (isKeyword (token, KEYWORD_while))
1640 readToken(token);
1642 if (isType (token, TOKEN_OPEN_PAREN))
1643 skipArgumentList(token, true, NULL);
1645 if (! isType (token, TOKEN_SEMICOLON))
1647 /* oddly enough, `do {} while (0) var foo = 42` is perfectly
1648 * valid JS, so explicitly handle the remaining of the line
1649 * for the sake of the root scope handling (as parseJsFile()
1650 * always advances a token not to ever get stuck) */
1651 is_terminated = parseLine(token, false);
1656 return is_terminated;
1659 static bool parseIf (tokenInfo *const token)
1661 bool read_next_token = true;
1663 * If statements have two forms
1664 * if ( ... )
1665 * one line;
1667 * if ( ... )
1668 * statement;
1669 * else
1670 * statement
1672 * if ( ... ) {
1673 * multiple;
1674 * statements;
1678 * if ( ... ) {
1679 * return elem
1682 * This example if correctly written, but the
1683 * else contains only 1 statement without a terminator
1684 * since the function finishes with the closing brace.
1686 * function a(flag){
1687 * if(flag)
1688 * test(1);
1689 * else
1690 * test(2)
1693 * TODO: Deal with statements that can optional end
1694 * without a semi-colon. Currently this messes up
1695 * the parsing of blocks.
1696 * Need to somehow detect this has happened, and either
1697 * backup a token, or skip reading the next token if
1698 * that is possible from all code locations.
1702 readToken (token);
1704 if (isKeyword (token, KEYWORD_if))
1707 * Check for an "else if" and consume the "if"
1709 readToken (token);
1712 if (isType (token, TOKEN_OPEN_PAREN))
1713 skipArgumentList(token, false, NULL);
1715 if (isType (token, TOKEN_OPEN_CURLY))
1716 parseBlock (token, CORK_NIL);
1717 else
1719 /* The next token should only be read if this statement had its own
1720 * terminator */
1721 read_next_token = findCmdTerm (token, true, false);
1723 return read_next_token;
1726 static bool collectChildren (int corkIndex, tagEntryInfo *entry, void *data)
1728 intArray *children = (intArray *)data;
1730 Assert (entry->extensionFields.scopeIndex != CORK_NIL);
1731 intArrayAdd (children, corkIndex);
1733 return true;
1736 /* During parsing, there is a case that a language object (parent)
1737 * should be tagged only when there are language objects (children)
1738 * are defined in the parent; if the parent has no child, the parser
1739 * should not make a tag for the parent.
1741 * Handling the this case was not easy because the parser must fill
1742 * the scope field of children with the cork index of parent.
1743 * However, the parser can decide whether the parent should be tagged
1744 * or not after parsing inside the parent where the children are
1745 * defined.
1747 * "class" is an example of the language object of the parent.
1748 * "methods" are examples of the language object of the children.
1749 * "class" is tagged as a class only when methods are found in it.
1752 * The parser handles this case with the following steps:
1754 * 1. make a dummy tag entry for the candidate of parent with
1756 * > int dummyIndex = makeSimplePlaceholder().
1758 * ctags doesn't emit this dummy tag entry.
1760 * 2. parse inside the candidate of parent and count children.
1761 * If a child is found, make a tag for it with filling its
1762 * scope field with the dummyIndex.
1764 * 3. make a true tag entry for the parent if a child is found:
1766 * > int trueIdex = makeTagEntry (...);
1768 * 4. update the scope fields of children with the trueIdex.
1770 * moveChildren (dummyIndex, trueIdex);
1773 static void moveChildren (int old_parent, int new_parent)
1775 intArray *children = intArrayNew ();
1776 foreachEntriesInScope (old_parent, NULL, collectChildren, children);
1777 for (unsigned int i = 0; i < intArrayCount (children); i++)
1779 int c = intArrayItem (children, i);
1781 unregisterEntry (c);
1782 tagEntryInfo *e = getEntryInCorkQueue (c);
1783 Assert (e);
1784 e->extensionFields.scopeIndex = new_parent;
1785 registerEntry (c);
1787 intArrayDelete (children);
1790 static bool parseFunction (tokenInfo *const token, tokenInfo *const lhs_name, const bool is_inside_class)
1792 #ifdef DO_TRACING
1794 const char *scope_str = getNameStringForCorkIndex (token->scope);
1795 const char *scope_kind_str = getKindStringForCorkIndex (token->scope);
1796 TRACE_ENTER_TEXT("token has scope '%s' of kind %s", scope_str, scope_kind_str);
1798 #endif
1800 tokenInfo *const name = newToken ();
1801 vString *const signature = vStringNew ();
1802 bool is_generator = false;
1803 bool is_anonymous = false;
1804 int index_for_name = CORK_NIL;
1806 * This deals with these formats
1807 * function validFunctionTwo(a,b) {}
1808 * function * generator(a,b) {}
1811 copyToken (name, token, true);
1812 readToken (name);
1813 if (isType (name, TOKEN_KEYWORD) &&
1814 canBeFunctionName (name, false /* true if we're in strict mode */))
1816 // treat as function name
1817 name->type = TOKEN_IDENTIFIER;
1818 name->keyword = KEYWORD_NONE;
1821 if (isType (name, TOKEN_STAR))
1823 is_generator = true;
1824 readToken (name);
1826 if (isType (name, TOKEN_OPEN_PAREN))
1828 /* anonymous function */
1829 copyToken (token, name, false);
1830 anonGenerate (name->string, "anonymousFunction", JSTAG_FUNCTION);
1831 is_anonymous = true;
1833 else if (!isType (name, TOKEN_IDENTIFIER))
1834 goto cleanUp;
1835 else
1836 readToken (token);
1838 if ( isType (token, TOKEN_OPEN_PAREN) )
1839 skipArgumentList(token, false, signature);
1841 if ( isType (token, TOKEN_OPEN_CURLY) )
1843 if ( lhs_name != NULL && is_inside_class )
1845 index_for_name = makeJsTag (lhs_name, is_generator ? JSTAG_GENERATOR : JSTAG_METHOD, signature, NULL);
1847 else if ( lhs_name != NULL )
1849 index_for_name = isClassName (lhs_name) ?
1850 makeClassTag (lhs_name, signature, NULL):
1851 makeFunctionTag (lhs_name, signature, is_generator);
1854 int f = index_for_name,
1855 p = CORK_NIL;
1856 if ( f == CORK_NIL || !is_anonymous )
1857 p = isClassName (name) ?
1858 makeClassTagCommon (name, signature, NULL, is_anonymous) :
1859 makeFunctionTagCommon (name, signature, is_generator, is_anonymous);
1861 if (f == CORK_NIL)
1862 f = p;
1864 parseBlock (token, f);
1867 if ( lhs_name == NULL )
1868 findCmdTerm (token, false, false);
1870 cleanUp:
1871 vStringDelete (signature);
1872 deleteToken (name);
1874 TRACE_LEAVE();
1875 return index_for_name;
1878 /* Parses a block surrounded by curly braces.
1879 * @p parent_scope is the scope name for this block, or NULL for unnamed scopes */
1880 static bool parseBlock (tokenInfo *const token, int parent_scope)
1882 TRACE_ENTER();
1884 bool is_class = false;
1885 bool read_next_token = true;
1886 int save_scope = token->scope;
1888 if (parent_scope != CORK_NIL)
1890 token->scope = parent_scope;
1891 token->nestLevel++;
1895 * Make this routine a bit more forgiving.
1896 * If called on an open_curly advance it
1898 if (isType (token, TOKEN_OPEN_CURLY))
1899 readToken(token);
1901 if (! isType (token, TOKEN_CLOSE_CURLY))
1904 * Read until we find the closing brace,
1905 * any nested braces will be handled within
1909 read_next_token = true;
1910 if (isKeyword (token, KEYWORD_this))
1913 * Means we are inside a class and have found
1914 * a class, not a function
1916 is_class = true;
1919 * Ignore the remainder of the line
1920 * findCmdTerm(token);
1922 read_next_token = parseLine (token, is_class);
1924 else if (isKeyword (token, KEYWORD_var) ||
1925 isKeyword (token, KEYWORD_let) ||
1926 isKeyword (token, KEYWORD_const))
1929 * Potentially we have found an inner function.
1930 * Set something to indicate the scope
1932 read_next_token = parseLine (token, is_class);
1934 else if (isType (token, TOKEN_OPEN_CURLY))
1936 /* Handle nested blocks */
1937 parseBlock (token, CORK_NIL);
1939 else
1942 * It is possible for a line to have no terminator
1943 * if the following line is a closing brace.
1944 * parseLine will detect this case and indicate
1945 * whether we should read an additional token.
1947 read_next_token = parseLine (token, is_class);
1951 * Always read a new token unless we find a statement without
1952 * a ending terminator
1954 if( read_next_token )
1955 readToken(token);
1958 * If we find a statement without a terminator consider the
1959 * block finished, otherwise the stack will be off by one.
1961 } while (! isType (token, TOKEN_EOF) &&
1962 ! isType (token, TOKEN_CLOSE_CURLY) && read_next_token);
1965 token->scope = save_scope;
1966 if (parent_scope != CORK_NIL)
1967 token->nestLevel--;
1969 TRACE_LEAVE();
1970 return is_class;
1973 static bool parseMethods (tokenInfo *const token, int class_index,
1974 const bool is_es6_class)
1976 TRACE_ENTER_TEXT("token is '%s' of type %s in parentToken '%s' of kind %s (es6: %s)",
1977 vStringValue(token->string), tokenTypeName (token->type),
1978 class_index == CORK_NIL ? "none" : getNameStringForCorkIndex (class_index),
1979 class_index == CORK_NIL ? "none" : getKindStringForCorkIndex (class_index),
1980 is_es6_class? "yes": "no");
1983 * When making a tag for `name', its core index is stored to
1984 * `indexForName'. The value stored to `indexForName' is valid
1985 * till the value for `name' is updated. If the value for `name'
1986 * is changed, `indexForName' is reset to CORK_NIL.
1988 tokenInfo *const name = newToken ();
1989 int index_for_name = CORK_NIL;
1990 bool has_methods = false;
1991 int save_scope = token->scope;
1993 if (class_index != CORK_NIL)
1994 token->scope = class_index;
1997 * This deals with these formats
1998 * validProperty : 2,
1999 * validMethod : function(a,b) {}
2000 * 'validMethod2' : function(a,b) {}
2001 * container.dirtyTab = {'url': false, 'title':false, 'snapshot':false, '*': false}
2002 * get prop() {}
2003 * set prop(val) {}
2004 * get(...) {}
2005 * set(...) {}
2007 * ES6 methods:
2008 * property(...) {}
2009 * *generator() {}
2011 * ES6 computed name:
2012 * [property]() {}
2013 * get [property]() {}
2014 * set [property]() {}
2015 * *[generator]() {}
2017 * tc39/proposal-class-fields
2018 * field0 = function(a,b) {}
2019 * field1 = 1
2020 * The parser extracts field0 as a method because the left value
2021 * is a function (kind propagation), and field1 as a field.
2023 * static methods and static initialization blocks
2024 * - ref. https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Classes/Static_initialization_blocks
2026 * static func() {}
2027 * static {}
2028 * static prop;
2029 * static prop = val;
2032 bool dont_read = false;
2035 bool is_setter = false;
2036 bool is_getter = false;
2038 if (!dont_read)
2039 readToken (token);
2040 dont_read = false;
2042 start:
2043 if (isType (token, TOKEN_CLOSE_CURLY))
2045 goto cleanUp;
2048 if (isType (token, TOKEN_KEYWORD) && canBePropertyName (token))
2050 tokenInfo *saved_token = newToken ();
2051 copyToken (saved_token, token, true);
2052 readToken (token);
2054 /* it wasn't actually a keyword after all, make it an identifier */
2055 if (isType(token, TOKEN_OPEN_PAREN) || isType(token, TOKEN_COLON))
2057 Assert (NextToken == NULL);
2058 NextToken = newToken ();
2059 copyToken (NextToken, token, false); /* save token for next read */
2060 copyToken (token, saved_token, true); /* restore token to process */
2061 token->type = TOKEN_IDENTIFIER; /* process as identifier */
2062 token->keyword = KEYWORD_NONE;
2064 else if (isKeyword (saved_token, KEYWORD_static) &&
2065 isType (token, TOKEN_OPEN_CURLY))
2067 /* static initialization block */
2068 deleteToken (saved_token);
2069 parseBlock (token, class_index);
2070 continue;
2072 else if (isKeyword (saved_token, KEYWORD_get))
2073 is_getter = true;
2074 else if (isKeyword (saved_token, KEYWORD_set))
2075 is_setter = true;
2076 else if (isKeyword (saved_token, KEYWORD_async) ||
2077 isKeyword (saved_token, KEYWORD_static))
2079 /* can be a qualifier for another "keyword", so start over */
2080 deleteToken (saved_token);
2081 goto start;
2084 deleteToken (saved_token);
2086 else if (isType (token, TOKEN_DOTS))
2088 /* maybe spread operator. Just skip the next expression. */
2089 findCmdTerm(token, true, true);
2090 continue;
2093 if (! isType (token, TOKEN_KEYWORD) &&
2094 ! isType (token, TOKEN_SEMICOLON))
2096 bool is_generator = false;
2097 bool is_shorthand = false; /* ES6 shorthand syntax */
2098 bool is_computed_name = false; /* ES6 computed property name */
2099 bool is_dynamic_prop = false;
2100 vString *dprop = NULL; /* is_computed_name is true but
2101 * the name is not represented in
2102 * a string literal. The expressions
2103 * go this string. */
2105 if (isType (token, TOKEN_STAR)) /* shorthand generator */
2107 is_generator = true;
2108 readToken (token);
2111 if (isType (token, TOKEN_OPEN_SQUARE))
2113 is_computed_name = true;
2114 dprop = vStringNewInit ("[");
2115 readTokenFull (token, false, dprop);
2118 copyToken(name, token, true);
2119 index_for_name = CORK_NIL;
2120 if (is_computed_name && ! isType (token, TOKEN_STRING))
2121 is_dynamic_prop = true;
2123 readTokenFull (token, false, dprop);
2125 if (is_computed_name)
2127 int depth = 1;
2130 if (isType (token, TOKEN_CLOSE_SQUARE))
2131 depth--;
2132 else
2134 is_dynamic_prop = true;
2135 if (isType (token, TOKEN_OPEN_SQUARE))
2136 depth++;
2138 readTokenFull (token, false, (is_dynamic_prop && depth != 0)? dprop: NULL);
2139 } while (! isType (token, TOKEN_EOF) && depth > 0);
2142 if (is_dynamic_prop)
2144 injectDynamicName (name, dprop);
2145 index_for_name = CORK_NIL;
2146 dprop = NULL;
2148 else
2149 vStringDelete (dprop);
2151 is_shorthand = isType (token, TOKEN_OPEN_PAREN);
2152 bool can_be_field = isType (token, TOKEN_EQUAL_SIGN);
2153 if ( isType (token, TOKEN_COLON) || can_be_field || is_shorthand )
2155 if (! is_shorthand)
2157 readToken (token);
2158 if (isKeyword (token, KEYWORD_async))
2159 readToken (token);
2162 vString * signature = vStringNew ();
2163 if ( is_shorthand || isKeyword (token, KEYWORD_function) )
2165 TRACE_PRINT("Seems to be a function or shorthand");
2167 if (! is_shorthand)
2169 readToken (token);
2170 if (isType (token, TOKEN_STAR))
2172 /* generator: 'function' '*' '(' ... ')' '{' ... '}' */
2173 is_generator = true;
2174 readToken (token);
2177 if ( isType (token, TOKEN_OPEN_PAREN) )
2179 skipArgumentList(token, false, signature);
2182 function:
2183 if (isType (token, TOKEN_OPEN_CURLY))
2185 has_methods = true;
2187 int kind = JSTAG_METHOD;
2188 if (is_generator)
2189 kind = JSTAG_GENERATOR;
2190 else if (is_getter)
2191 kind = JSTAG_GETTER;
2192 else if (is_setter)
2193 kind = JSTAG_SETTER;
2195 index_for_name = makeJsTag (name, kind, signature, NULL);
2196 parseBlock (token, index_for_name);
2199 * If we aren't parsing an ES6 class (for which there
2200 * is no mandatory separators), read to the closing
2201 * curly, check next token, if a comma, we must loop
2202 * again.
2204 if (! is_es6_class)
2205 readToken (token);
2208 else if (! is_es6_class)
2210 int p = CORK_NIL;
2211 tokenInfo *saved_token = newToken ();
2213 /* skip whatever is the value */
2214 while (! isType (token, TOKEN_COMMA) &&
2215 ! isType (token, TOKEN_CLOSE_CURLY) &&
2216 ! isType (token, TOKEN_EOF))
2218 if (isType (token, TOKEN_OPEN_CURLY))
2220 /* Recurse to find child properties/methods */
2221 p = makeSimplePlaceholder (name->string);
2222 parseMethods (token, p, false);
2223 readToken (token);
2225 else if (isType (token, TOKEN_OPEN_PAREN))
2227 vStringClear (signature);
2228 skipArgumentList (token, false, signature);
2230 else if (isType (token, TOKEN_OPEN_SQUARE))
2232 skipArrayList (token, false);
2234 else if (isType (token, TOKEN_ARROW))
2236 TRACE_PRINT("Seems to be an anonymous function");
2237 if (vStringIsEmpty (signature) &&
2238 isType (saved_token, TOKEN_IDENTIFIER))
2240 vStringPut (signature, '(');
2241 vStringCat (signature, saved_token->string);
2242 vStringPut (signature, ')');
2244 readToken (token);
2245 deleteToken (saved_token);
2246 goto function;
2248 else
2250 copyToken (saved_token, token, true);
2251 readToken (token);
2254 deleteToken (saved_token);
2256 has_methods = true;
2257 index_for_name = makeJsTag (name, JSTAG_PROPERTY, NULL, NULL);
2258 if (p != CORK_NIL)
2259 moveChildren (p, index_for_name);
2261 else if (can_be_field)
2263 makeJsTag (name, JSTAG_FIELD, NULL, NULL);
2264 parseLine (token, true);
2267 vStringDelete (signature);
2269 else
2271 bool is_property = isType (token, TOKEN_COMMA);
2272 makeJsTag (name, is_property ? JSTAG_PROPERTY : JSTAG_FIELD, NULL, NULL);
2273 if (!isType (token, TOKEN_SEMICOLON) && !is_property)
2274 dont_read = true;
2277 } while ( isType(token, TOKEN_COMMA) ||
2278 ( is_es6_class && ! isType(token, TOKEN_EOF) ) );
2280 TRACE_PRINT("Finished parsing methods");
2282 findCmdTerm (token, false, false);
2284 cleanUp:
2285 token->scope = save_scope;
2286 deleteToken (name);
2288 TRACE_LEAVE_TEXT("found method(s): %s", has_methods? "yes": "no");
2289 return has_methods;
2292 static bool parseES6Class (tokenInfo *const token, const tokenInfo *target_name)
2294 TRACE_ENTER();
2296 tokenInfo * class_name = newToken ();
2297 vString *inheritance = NULL;
2298 bool is_anonymous = true;
2300 copyToken (class_name, token, true);
2301 readToken (class_name);
2303 /* optional name */
2304 if (isType (class_name, TOKEN_IDENTIFIER))
2306 readToken (token);
2307 is_anonymous = false;
2309 else
2311 copyToken (token, class_name, true);
2312 /* We create a fake name so we have a scope for the members */
2313 if (! target_name)
2314 anonGenerate (class_name->string, "AnonymousClass", JSTAG_CLASS);
2317 if (! target_name)
2318 target_name = class_name;
2320 if (isKeyword (token, KEYWORD_extends))
2321 inheritance = vStringNew ();
2323 /* skip inheritance info */
2324 while (! isType (token, TOKEN_OPEN_CURLY) &&
2325 ! isType (token, TOKEN_EOF) &&
2326 ! isType (token, TOKEN_SEMICOLON))
2327 readTokenFull (token, false, inheritance);
2329 /* remove the last added token (here we assume it's one char, "{" or ";" */
2330 if (inheritance && vStringLength (inheritance) > 0 &&
2331 ! isType (token, TOKEN_EOF))
2333 vStringChop (inheritance);
2334 vStringStripTrailing (inheritance);
2335 vStringStripLeading (inheritance);
2338 TRACE_PRINT("Emitting tag for class '%s'", vStringValue(target_name->string));
2340 int r = makeJsTagCommon (target_name, JSTAG_CLASS, NULL, inheritance,
2341 (is_anonymous && (target_name == class_name)));
2343 if (! is_anonymous && target_name != class_name)
2345 /* FIXME: what to do with the secondary name? It's local to the
2346 * class itself, so not very useful... let's hope people
2347 * don't give it another name than the target in case of
2348 * var MyClass = class MyClassSecondaryName { ... }
2349 * I guess it could be an alias to MyClass, or duplicate it
2350 * altogether, not sure. */
2351 makeJsTag (class_name, JSTAG_CLASS, NULL, inheritance);
2354 if (inheritance)
2355 vStringDelete (inheritance);
2357 if (isType (token, TOKEN_OPEN_CURLY))
2358 parseMethods (token, r, true);
2360 deleteToken (class_name);
2362 TRACE_LEAVE();
2363 return true;
2366 static void convertToFunction (int index, const char *signature)
2368 tagEntryInfo *e = getEntryInCorkQueue(index);
2369 if (e && e->kindIndex != JSTAG_FUNCTION
2370 && ( signature == NULL || e->extensionFields.signature == NULL))
2372 e->kindIndex = JSTAG_FUNCTION;
2373 if (signature)
2374 e->extensionFields.signature = eStrdup (signature);
2378 static vString *trimGarbageInSignature (vString *sig)
2380 /* Drop "=>" at the end. */
2381 const char *sigstr = vStringValue (sig);
2382 char *last = strrchr (sigstr, ')');
2383 Assert (last);
2384 vStringTruncate (sig, last - sigstr + 1);
2385 return sig;
2388 static vString *makeVStringForSignature (tokenInfo *const token)
2390 vString * sig = vStringNewInit ("(");
2392 if (isType (token, TOKEN_IDENTIFIER))
2393 vStringCat (sig, token->string);
2394 else if (isType (token, TOKEN_CLOSE_PAREN))
2395 vStringPut (sig, ')');
2396 else if (isType (token, TOKEN_DOTS))
2397 vStringCatS (sig, "...");
2399 return sig;
2402 typedef struct sStatementState {
2403 int indexForName;
2404 bool isClass;
2405 bool isConst;
2406 bool isTerminated;
2407 bool isGlobal;
2408 bool foundThis;
2409 } statementState;
2411 static void deleteTokenFn(void *token) { deleteToken(token); }
2413 static bool parsePrototype (tokenInfo *const name, tokenInfo *const token, statementState *const state)
2415 TRACE_ENTER();
2418 * When we reach the "prototype" tag, we infer:
2419 * "BindAgent" is a class
2420 * "build" is a method
2422 * function BindAgent( repeatableIdName, newParentIdName ) {
2425 * CASE 1
2426 * Specified function name: "build"
2427 * BindAgent.prototype.build =
2428 * BondAgent.prototype.crush = function( mode ) {
2429 * maybe parse nested functions
2432 * CASE 2
2433 * Prototype listing
2434 * ValidClassOne.prototype = {
2435 * 'validMethodOne' : function(a,b) {},
2436 * 'validMethodTwo' : function(a,b) {}
2440 if (! ( isType (name, TOKEN_IDENTIFIER)
2441 || isType (name, TOKEN_STRING) ) )
2443 * Unexpected input. Try to reset the parsing.
2445 * TOKEN_STRING is acceptable. e.g.:
2446 * -----------------------------------
2447 * "a".prototype = function( mode ) {}
2450 TRACE_LEAVE_TEXT("bad input");
2451 return false;
2454 state->indexForName = makeClassTag (name, NULL, NULL);
2455 state->isClass = true;
2458 * There should a ".function_name" next.
2460 readToken (token);
2461 if (isType (token, TOKEN_PERIOD))
2464 * Handle CASE 1
2466 readToken (token);
2467 if (isType (token, TOKEN_KEYWORD) && canBePropertyName (token))
2469 // treat as function name
2470 token->type = TOKEN_IDENTIFIER;
2471 token->keyword = KEYWORD_NONE;
2474 if (! isType(token, TOKEN_KEYWORD))
2476 vString *const signature = vStringNew ();
2478 token->scope = state->indexForName;
2480 tokenInfo *identifier_token = newToken ();
2481 ptrArray *prototype_tokens = NULL;
2482 accept_period_in_identifier(true);
2484 tokenInfo *const method_body_token = newToken ();
2485 copyToken (method_body_token, token, true);
2486 readToken (method_body_token);
2488 while (! isType (method_body_token, TOKEN_SEMICOLON) &&
2489 ! isType (method_body_token, TOKEN_CLOSE_CURLY) &&
2490 ! isType (method_body_token, TOKEN_OPEN_CURLY) &&
2491 ! isType (method_body_token, TOKEN_EOF))
2493 if ( isType (method_body_token, TOKEN_OPEN_PAREN) )
2494 skipArgumentList(method_body_token, false,
2495 vStringLength (signature) == 0 ? signature : NULL);
2496 else
2498 char* s1 = vStringValue (identifier_token->string);
2499 char* s2 = NULL;
2500 if ( isType (method_body_token, TOKEN_EQUAL_SIGN) &&
2501 ! isType (identifier_token, TOKEN_UNDEFINED) &&
2502 (s2 = strstr (s1, ".prototype.")))
2504 if (prototype_tokens == NULL)
2505 prototype_tokens = ptrArrayNew (deleteTokenFn);
2507 memmove (s2, s2+10, strlen (s2+10) + 1);
2508 vStringSetLength (identifier_token->string);
2510 tokenInfo *const save_token = newToken ();
2511 copyToken (save_token, identifier_token, true);
2512 ptrArrayAdd (prototype_tokens, save_token);
2513 identifier_token->type = TOKEN_UNDEFINED;
2515 else if ( isType(method_body_token, TOKEN_IDENTIFIER))
2516 copyToken (identifier_token, method_body_token, false);
2518 readToken (method_body_token);
2521 deleteToken (identifier_token);
2522 accept_period_in_identifier(false);
2524 int index = makeJsTag (token, JSTAG_METHOD, signature, NULL);
2526 if (prototype_tokens != NULL)
2528 for (int i=0; i<ptrArrayCount (prototype_tokens); i++)
2530 makeJsTag (ptrArrayItem (prototype_tokens, i), JSTAG_METHOD, signature, NULL);
2532 ptrArrayUnref (prototype_tokens);
2535 vStringDelete (signature);
2537 if ( isType (method_body_token, TOKEN_OPEN_CURLY))
2539 parseBlock (method_body_token, index);
2540 state->isTerminated = true;
2542 else
2543 state->isTerminated = isType (method_body_token, TOKEN_SEMICOLON);
2545 deleteToken (method_body_token);
2546 TRACE_LEAVE_TEXT("done: single");
2547 return false;
2550 else if (isType (token, TOKEN_EQUAL_SIGN))
2552 readToken (token);
2553 if (isType (token, TOKEN_OPEN_CURLY))
2556 * Handle CASE 2
2558 * Creates tags for each of these class methods
2559 * ValidClassOne.prototype = {
2560 * 'validMethodOne' : function(a,b) {},
2561 * 'validMethodTwo' : function(a,b) {}
2564 parseMethods(token, state->indexForName, false);
2566 * Find to the end of the statement
2568 findCmdTerm (token, false, false);
2569 state->isTerminated = true;
2570 TRACE_LEAVE_TEXT("done: multiple");
2571 return false;
2575 TRACE_LEAVE_TEXT("done: not found");
2576 return true;
2579 static bool parseStatementLHS (tokenInfo *const name, tokenInfo *const token, statementState *const state)
2581 TRACE_ENTER();
2585 readToken (token);
2586 if (! isType(token, TOKEN_KEYWORD))
2588 if ( state->isClass )
2589 token->scope = state->indexForName;
2590 else
2592 addContext (name, token);
2593 state->indexForName = CORK_NIL;
2596 readToken (token);
2598 else if ( isKeyword(token, KEYWORD_prototype) )
2600 if (! parsePrototype (name, token, state) )
2602 TRACE_LEAVE_TEXT("done: prototype");
2603 return false;
2606 else
2607 readToken (token);
2608 } while (isType (token, TOKEN_PERIOD));
2610 TRACE_LEAVE();
2611 return true;
2614 static bool parseStatementRHS (tokenInfo *const name, tokenInfo *const token, statementState *const state, bool is_inside_class)
2616 TRACE_ENTER();
2618 int paren_depth = 0;
2619 int arrowfun_paren_depth = 0;
2620 bool canbe_arrowfun = false;
2622 readToken (token);
2624 /* rvalue might be surrounded with parentheses */
2625 while (isType (token, TOKEN_OPEN_PAREN))
2627 paren_depth++;
2628 arrowfun_paren_depth++;
2629 readToken (token);
2632 if (isKeyword (token, KEYWORD_async))
2634 arrowfun_paren_depth = 0;
2635 readToken (token);
2637 /* check for function signature */
2638 while (isType (token, TOKEN_OPEN_PAREN))
2640 paren_depth++;
2641 arrowfun_paren_depth++;
2642 readToken (token);
2646 if ( isKeyword (token, KEYWORD_function) )
2648 state->indexForName = parseFunction (token, name, is_inside_class);
2650 else if (isKeyword (token, KEYWORD_class))
2652 state->isTerminated = parseES6Class (token, name);
2654 else if (isType (token, TOKEN_OPEN_CURLY))
2657 * Creates tags for each of these class methods
2658 * objectOne = {
2659 * 'validMethodOne' : function(a,b) {},
2660 * 'validMethodTwo' : function(a,b) {}
2662 * Or checks if this is a hash variable.
2663 * var z = {};
2665 bool anon_object = vStringIsEmpty (name->string);
2666 if (anon_object)
2668 anonGenerate (name->string, "anonymousObject", JSTAG_VARIABLE);
2669 state->indexForName = CORK_NIL;
2671 int p = makeSimplePlaceholder (name->string);
2672 if ( parseMethods(token, p, false) )
2674 jsKind kind = state->foundThis || strchr (vStringValue(name->string), '.') != NULL ? JSTAG_PROPERTY : JSTAG_VARIABLE;
2675 state->indexForName = makeJsTagCommon (name, kind, NULL, NULL, anon_object);
2676 moveChildren (p, state->indexForName);
2678 else if ( token->nestLevel == 0 && state->isGlobal )
2681 * Only create variables for global scope
2683 * A pointer can be created to the function.
2684 * If we recognize the function/class name ignore the variable.
2685 * This format looks identical to a variable definition.
2686 * A variable defined outside of a block is considered
2687 * a global variable:
2688 * var g_var1 = 1;
2689 * var g_var2;
2690 * This is not a global variable:
2691 * var g_var = function;
2692 * This is a global variable:
2693 * var g_var = different_var_name;
2695 state->indexForName = anyKindsEntryInScope (name->scope, vStringValue (name->string),
2696 (int[]){JSTAG_VARIABLE, JSTAG_FUNCTION, JSTAG_CLASS}, 3, true);
2698 if (state->indexForName == CORK_NIL)
2699 state->indexForName = makeJsTag (name, state->isConst ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL, NULL);
2701 /* Here we should be at the end of the block, on the close curly.
2702 * If so, read the next token not to confuse that close curly with
2703 * the end of the current statement. */
2704 if (isType (token, TOKEN_CLOSE_CURLY))
2706 readTokenFull(token, true, NULL);
2707 state->isTerminated = isType (token, TOKEN_SEMICOLON);
2710 else if (isType (token, TOKEN_OPEN_SQUARE) && !vStringIsEmpty (name->string))
2713 * Creates tag for an array
2715 skipArrayList(token, true);
2716 jsKind kind = state->foundThis || strchr (vStringValue(name->string), '.') != NULL ? JSTAG_PROPERTY : JSTAG_VARIABLE;
2718 * Only create variables for global scope or class/object properties
2720 if ( ( token->nestLevel == 0 && state->isGlobal ) || kind == JSTAG_PROPERTY )
2722 state->indexForName = makeJsTagCommon (name, kind, NULL, NULL, false);
2725 else if (isKeyword (token, KEYWORD_new))
2727 readToken (token);
2728 bool is_var = isType (token, TOKEN_IDENTIFIER) || isKeyword (token, KEYWORD_capital_object);
2729 if ( isKeyword (token, KEYWORD_function) ||
2730 isKeyword (token, KEYWORD_capital_function) ||
2731 is_var )
2733 if ( isKeyword (token, KEYWORD_capital_function) && isClassName (name) )
2734 state->isClass = true;
2736 if ( isType (token, TOKEN_IDENTIFIER) )
2737 skipQualifiedIdentifier (token);
2738 else
2739 readToken (token);
2741 if ( isType (token, TOKEN_OPEN_PAREN) )
2742 skipArgumentList(token, true, NULL);
2744 if (isType (token, TOKEN_SEMICOLON) && token->nestLevel == 0)
2746 if ( is_var )
2747 state->indexForName = makeJsTag (name, state->isConst ? JSTAG_CONSTANT : state->foundThis ? JSTAG_PROPERTY : JSTAG_VARIABLE, NULL, NULL);
2748 else if ( state->isClass )
2749 state->indexForName = makeClassTag (name, NULL, NULL);
2750 else
2752 /* FIXME: we cannot really get a meaningful
2753 * signature from a `new Function()` call,
2754 * so for now just don't set any */
2755 state->indexForName = makeFunctionTag (name, NULL, false);
2758 else if (isType (token, TOKEN_CLOSE_CURLY))
2759 state->isTerminated = false;
2762 else if (! isType (token, TOKEN_KEYWORD) &&
2763 token->nestLevel == 0 && state->isGlobal )
2766 * Only create variables for global scope
2768 * A pointer can be created to the function.
2769 * If we recognize the function/class name ignore the variable.
2770 * This format looks identical to a variable definition.
2771 * A variable defined outside of a block is considered
2772 * a global variable:
2773 * var g_var1 = 1;
2774 * var g_var2;
2775 * This is not a global variable:
2776 * var g_var = function;
2777 * This is a global variable:
2778 * var g_var = different_var_name;
2780 state->indexForName = anyKindsEntryInScope (name->scope, vStringValue (name->string),
2781 (int[]){JSTAG_VARIABLE, JSTAG_FUNCTION, JSTAG_CLASS}, 3, true);
2783 if (state->indexForName == CORK_NIL)
2785 state->indexForName = makeJsTag (name, state->isConst ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL, NULL);
2786 if (isType (token, TOKEN_IDENTIFIER))
2787 canbe_arrowfun = true;
2790 else if ( isType (token, TOKEN_IDENTIFIER) )
2792 canbe_arrowfun = true;
2795 if (arrowfun_paren_depth == 0 && canbe_arrowfun)
2797 /* var v = a => { ... } */
2798 vString *sig = vStringNewInit ("(");
2799 vStringCat (sig, token->string);
2800 vStringPut (sig, ')');
2801 readTokenFull (token, true, NULL);
2802 if (isType (token, TOKEN_ARROW))
2804 if (state->indexForName == CORK_NIL) // was not a global variable
2805 state->indexForName = makeFunctionTag (name, sig, false);
2806 else
2807 convertToFunction (state->indexForName, vStringValue (sig));
2809 vStringDelete (sig);
2812 if (paren_depth > 0)
2814 /* Collect parameters for arrow function. */
2815 vString *sig = (arrowfun_paren_depth == 1)? makeVStringForSignature (token): NULL;
2817 while (paren_depth > 0 && ! isType (token, TOKEN_EOF))
2819 if (isType (token, TOKEN_OPEN_PAREN))
2821 paren_depth++;
2822 arrowfun_paren_depth++;
2824 else if (isType (token, TOKEN_CLOSE_PAREN))
2826 paren_depth--;
2827 arrowfun_paren_depth--;
2829 readTokenFull (token, true, sig);
2831 /* var f = (a, b) => { ... } */
2832 if (arrowfun_paren_depth == 0 && isType (token, TOKEN_ARROW) && sig)
2834 if (state->indexForName == CORK_NIL) // was not a global variable
2835 state->indexForName = makeFunctionTag (name, trimGarbageInSignature (sig), false);
2836 else
2837 convertToFunction (state->indexForName,
2838 vStringValue (trimGarbageInSignature (sig)));
2840 vStringDelete (sig);
2841 sig = NULL;
2844 if (isType (token, TOKEN_CLOSE_CURLY))
2845 state->isTerminated = false;
2847 vStringDelete (sig); /* NULL is acceptable. */
2850 TRACE_LEAVE();
2851 return true;
2854 static bool parseStatement (tokenInfo *const token, bool is_inside_class)
2856 TRACE_ENTER_TEXT("is_inside_class: %s", is_inside_class? "yes": "no");
2859 * When making a tag for `name', its core index is stored to
2860 * `indexForName'. The value stored to `indexForName' is valid
2861 * till the value for `name' is updated. If the value for `name'
2862 * is changed, `indexForName' is reset to CORK_NIL.
2864 tokenInfo *const name = newToken ();
2865 int save_scope = token->scope;
2866 bool found_lhs = false;
2867 statementState state = {
2868 .indexForName = CORK_NIL,
2869 .isClass = is_inside_class,
2870 .isConst = false,
2871 .isTerminated = true,
2872 .isGlobal = false,
2873 .foundThis = false
2877 * Functions can be named or unnamed.
2878 * This deals with these formats:
2879 * Function
2880 * validFunctionOne = function(a,b) {}
2881 * testlib.validFunctionFive = function(a,b) {}
2882 * var innerThree = function(a,b) {}
2883 * var innerFour = (a,b) {}
2884 * var D2 = secondary_fcn_name(a,b) {}
2885 * var D3 = new Function("a", "b", "return a+b;");
2886 * Class
2887 * testlib.extras.ValidClassOne = function(a,b) {
2888 * this.a = a;
2890 * Class Methods
2891 * testlib.extras.ValidClassOne.prototype = {
2892 * 'validMethodOne' : function(a,b) {},
2893 * 'validMethodTwo' : function(a,b) {}
2895 * ValidClassTwo = function ()
2897 * this.validMethodThree = function() {}
2898 * // unnamed method
2899 * this.validMethodFour = () {}
2901 * Database.prototype.validMethodThree = Database_getTodaysDate;
2905 * var can precede an inner function
2907 if ( isKeyword(token, KEYWORD_var) ||
2908 isKeyword(token, KEYWORD_let) ||
2909 isKeyword(token, KEYWORD_const) )
2911 TRACE_PRINT("var/let/const case");
2912 state.isConst = isKeyword(token, KEYWORD_const);
2914 * Only create variables for global scope
2916 if ( token->nestLevel == 0 )
2918 state.isGlobal = true;
2920 readToken(token);
2923 nextVar:
2924 state.indexForName = CORK_NIL;
2925 state.foundThis = false;
2926 if ( isKeyword(token, KEYWORD_this) )
2928 TRACE_PRINT("found 'this' keyword");
2929 state.foundThis = true;
2931 readToken(token);
2932 if (isType (token, TOKEN_PERIOD))
2934 readToken(token);
2936 else if (isType (token, TOKEN_OPEN_SQUARE))
2938 skipArrayList (token, false);
2942 copyToken(name, token, true);
2943 TRACE_PRINT("name becomes '%s' of type %s",
2944 vStringValue(token->string), tokenTypeName (token->type));
2946 while (! isType (token, TOKEN_CLOSE_CURLY) &&
2947 ! isType (token, TOKEN_SEMICOLON) &&
2948 ! isType (token, TOKEN_EQUAL_SIGN) &&
2949 ! isType (token, TOKEN_COMMA) &&
2950 ! isType (token, TOKEN_EOF))
2952 found_lhs = true;
2953 if (isType (token, TOKEN_OPEN_CURLY))
2955 parseBlock (token, CORK_NIL);
2956 readTokenFull (token, true, NULL);
2958 else if (isKeyword (token, KEYWORD_function))
2960 parseFunction (token, NULL, false);
2961 readTokenFull (token, true, NULL);
2964 /* Potentially the name of the function */
2965 else if (isType (token, TOKEN_PERIOD))
2968 * Cannot be a global variable is it has dot references in the name
2970 state.isGlobal = false;
2971 /* Assume it's an assignment to a global name (e.g. a class) using
2972 * its fully qualified name, so strip the scope.
2973 * FIXME: resolve the scope so we can make more than an assumption. */
2974 token->scope = CORK_NIL;
2975 name->scope = CORK_NIL;
2976 if ( ! parseStatementLHS (name, token, &state) )
2977 goto cleanUp;
2979 else
2980 readTokenFull (token, true, NULL);
2982 if ( isType (token, TOKEN_OPEN_PAREN) )
2983 skipArgumentList(token, false, NULL);
2985 if ( isType (token, TOKEN_OPEN_SQUARE) )
2986 skipArrayList(token, false);
2989 if ( isType (token, TOKEN_CLOSE_CURLY) )
2992 * Reaching this section without having
2993 * processed an open curly brace indicates
2994 * the statement is most likely not terminated.
2996 state.isTerminated = false;
2998 else if ( isType (token, TOKEN_SEMICOLON) ||
2999 isType (token, TOKEN_EOF) ||
3000 isType (token, TOKEN_COMMA) )
3003 * Only create variables for global scope
3005 if ( token->nestLevel == 0 && state.isGlobal )
3008 * Handles this syntax:
3009 * var g_var2;
3011 state.indexForName = makeJsTag (name, state.isConst ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL, NULL);
3014 * Statement has ended.
3015 * This deals with calls to functions, like:
3016 * alert(..);
3018 if (isType (token, TOKEN_COMMA))
3020 readToken (token);
3021 state.isClass = false;
3022 goto nextVar;
3025 else
3027 bool ok = found_lhs;
3028 if ( ok && isType (token, TOKEN_EQUAL_SIGN) )
3030 ok = parseStatementRHS (name, token, &state, is_inside_class);
3032 /* if we aren't already at the cmd end, advance to it and check whether
3033 * the statement was terminated */
3034 if (ok &&
3035 ! isType (token, TOKEN_CLOSE_CURLY) &&
3036 ! isType (token, TOKEN_SEMICOLON))
3039 * Statements can be optionally terminated in the case of
3040 * statement prior to a close curly brace as in the
3041 * document.write line below:
3043 * function checkForUpdate() {
3044 * if( 1==1 ) {
3045 * document.write("hello from checkForUpdate<br>")
3047 * return 1;
3050 state.isTerminated = findCmdTerm (token, true, true);
3051 /* if we're at a comma, try and read a second var */
3052 if (isType (token, TOKEN_COMMA))
3054 readToken (token);
3055 state.isClass = false;
3056 goto nextVar;
3059 else if (ok && isType (token, TOKEN_SEMICOLON))
3060 state.isTerminated = true;
3063 cleanUp:
3064 token->scope = save_scope;
3065 deleteToken (name);
3067 TRACE_LEAVE_TEXT("is terminated: %d", (int) state.isTerminated);
3068 return state.isTerminated;
3071 static void parseUI5 (tokenInfo *const token)
3073 tokenInfo *const name = newToken ();
3075 * SAPUI5 is built on top of jQuery.
3076 * It follows a standard format:
3077 * sap.ui.controller("id.of.controller", {
3078 * method_name : function... {
3079 * },
3081 * method_name : function ... {
3085 * Handle the parsing of the initial controller (and the
3086 * same for "view") and then allow the methods to be
3087 * parsed as usual.
3090 readToken (token);
3092 if (isType (token, TOKEN_PERIOD))
3094 int r = CORK_NIL;
3096 readToken (token);
3097 while (! isType (token, TOKEN_OPEN_PAREN) &&
3098 ! isType (token, TOKEN_EOF))
3100 readToken (token);
3102 readToken (token);
3104 if (isType (token, TOKEN_STRING))
3106 copyToken(name, token, true);
3107 readToken (token);
3110 if (isType (token, TOKEN_COMMA))
3111 readToken (token);
3113 if (isType(name, TOKEN_STRING))
3116 * `name' can include '.'.
3117 * Setting dynamicProp to true can prohibit
3118 * that makeClassTag ispects the inside
3119 * of `name'.
3121 name->dynamicProp = true;
3122 r = makeClassTag (name, NULL, NULL);
3124 * TODO
3125 * `name' specifies a class of OpenUI5.
3126 * So tagging it as a language object of
3127 * JavaScript is incorrect. We have to introduce
3128 * OpenUI5 language as a subparser of JavaScript
3129 * to fix this situation.
3135 parseMethods (token, r, false);
3136 } while (! isType (token, TOKEN_CLOSE_CURLY) &&
3137 ! isType (token, TOKEN_EOF));
3140 deleteToken (name);
3143 static bool parseLine (tokenInfo *const token, bool is_inside_class)
3145 TRACE_ENTER_TEXT("token is '%s' of type %s",
3146 vStringValue(token->string), tokenTypeName (token->type));
3148 bool is_terminated = true;
3150 * Detect the common statements, if, while, for, do, ...
3151 * This is necessary since the last statement within a block "{}"
3152 * can be optionally terminated.
3154 * If the statement is not terminated, we need to tell
3155 * the calling routine to prevent reading an additional token
3156 * looking for the end of the statement.
3159 if (isType(token, TOKEN_KEYWORD))
3161 switch (token->keyword)
3163 case KEYWORD_for:
3164 case KEYWORD_while:
3165 case KEYWORD_do:
3166 is_terminated = parseLoop (token);
3167 break;
3168 case KEYWORD_if:
3169 case KEYWORD_else:
3170 case KEYWORD_try:
3171 case KEYWORD_catch:
3172 case KEYWORD_finally:
3173 /* Common semantics */
3174 is_terminated = parseIf (token);
3175 break;
3176 case KEYWORD_switch:
3177 parseSwitch (token);
3178 break;
3179 case KEYWORD_return:
3180 case KEYWORD_async:
3181 readToken (token);
3182 is_terminated = parseLine (token, is_inside_class);
3183 break;
3184 case KEYWORD_function:
3185 parseFunction (token, NULL, false);
3186 break;
3187 case KEYWORD_class:
3188 is_terminated = parseES6Class (token, NULL);
3189 break;
3190 default:
3191 is_terminated = parseStatement (token, is_inside_class);
3192 break;
3195 else
3198 * Special case where single line statements may not be
3199 * SEMICOLON terminated. parseBlock needs to know this
3200 * so that it does not read the next token.
3202 is_terminated = parseStatement (token, is_inside_class);
3205 TRACE_LEAVE();
3206 return is_terminated;
3209 static void parseJsFile (tokenInfo *const token)
3211 TRACE_ENTER();
3215 readToken (token);
3217 if (isType (token, TOKEN_KEYWORD) && token->keyword == KEYWORD_sap)
3218 parseUI5 (token);
3219 else if (isType (token, TOKEN_KEYWORD) && (token->keyword == KEYWORD_export ||
3220 token->keyword == KEYWORD_default))
3221 /* skip those at top-level */;
3222 else
3223 parseLine (token, false);
3224 } while (! isType (token, TOKEN_EOF));
3226 TRACE_LEAVE();
3229 #ifdef DO_TRACING
3230 #ifdef DO_TRACING_USE_DUMP_TOKEN
3231 static void dumpToken (const tokenInfo *const token)
3233 const char *scope_str = getNameStringForCorkIndex (token->scope);
3234 const char *scope_kind_str = getKindStringForCorkIndex (token->scope);
3236 if (strcmp(scope_str, "placeholder") == 0)
3238 TRACE_PRINT("%s: %s",
3239 tokenTypeName (token->type),
3240 vStringValue (token->string));
3242 else
3244 TRACE_PRINT("%s: %s (scope '%s' of kind %s)",
3245 tokenTypeName (token->type),
3246 vStringValue (token->string),
3247 scope_str, scope_kind_str);
3250 #endif
3252 static const char*
3253 getNameStringForCorkIndex(int index)
3255 if (index == CORK_NIL)
3256 return "none";
3257 tagEntryInfo *e = getEntryInCorkQueue (index);
3258 if (e == NULL)
3259 return "ghost"; /* Can this happen? */
3261 if (e->placeholder)
3262 return "placeholder";
3264 return e->name;
3267 static const char*
3268 getKindStringForCorkIndex(int index)
3270 if (index == CORK_NIL)
3271 return "none";
3272 tagEntryInfo *e = getEntryInCorkQueue (index);
3273 if (e == NULL)
3274 return "ghost"; /* Can this happen? */
3276 if (e->placeholder)
3277 return "placeholder";
3279 if (e->kindIndex == KIND_GHOST_INDEX)
3280 return "ghost";
3282 return JsKinds [e->kindIndex].name;
3285 static const char *kindName(jsKind kind)
3287 return ((int)kind) >= 0 ? JsKinds[kind].name : "none";
3290 static const char *tokenTypeName(enum eTokenType e)
3291 { /* Generated by misc/enumstr.sh with cmdline:
3292 parsers/jscript.c eTokenType tokenTypeName */
3293 switch (e)
3295 case TOKEN_UNDEFINED: return "TOKEN_UNDEFINED";
3296 case TOKEN_EOF: return "TOKEN_EOF";
3297 case TOKEN_CHARACTER: return "TOKEN_CHARACTER";
3298 case TOKEN_CLOSE_PAREN: return "TOKEN_CLOSE_PAREN";
3299 case TOKEN_SEMICOLON: return "TOKEN_SEMICOLON";
3300 case TOKEN_COLON: return "TOKEN_COLON";
3301 case TOKEN_COMMA: return "TOKEN_COMMA";
3302 case TOKEN_KEYWORD: return "TOKEN_KEYWORD";
3303 case TOKEN_OPEN_PAREN: return "TOKEN_OPEN_PAREN";
3304 case TOKEN_IDENTIFIER: return "TOKEN_IDENTIFIER";
3305 case TOKEN_STRING: return "TOKEN_STRING";
3306 case TOKEN_TEMPLATE_STRING: return "TOKEN_TEMPLATE_STRING";
3307 case TOKEN_PERIOD: return "TOKEN_PERIOD";
3308 case TOKEN_OPEN_CURLY: return "TOKEN_OPEN_CURLY";
3309 case TOKEN_CLOSE_CURLY: return "TOKEN_CLOSE_CURLY";
3310 case TOKEN_EQUAL_SIGN: return "TOKEN_EQUAL_SIGN";
3311 case TOKEN_OPEN_SQUARE: return "TOKEN_OPEN_SQUARE";
3312 case TOKEN_CLOSE_SQUARE: return "TOKEN_CLOSE_SQUARE";
3313 case TOKEN_REGEXP: return "TOKEN_REGEXP";
3314 case TOKEN_POSTFIX_OPERATOR: return "TOKEN_POSTFIX_OPERATOR";
3315 case TOKEN_STAR: return "TOKEN_STAR";
3316 case TOKEN_ATMARK: return "TOKEN_ATMARK";
3317 case TOKEN_BINARY_OPERATOR: return "TOKEN_BINARY_OPERATOR";
3318 case TOKEN_ARROW: return "TOKEN_ARROW";
3319 case TOKEN_DOTS: return "TOKEN_DOTS";
3320 default: return "UNKNOWN";
3323 #endif
3325 static void initialize (const langType language)
3327 Assert (ARRAY_SIZE (JsKinds) == JSTAG_COUNT);
3328 Lang_js = language;
3330 TokenPool = objPoolNew (16, newPoolToken, deletePoolToken, clearPoolToken, NULL);
3333 static void finalize (langType language CTAGS_ATTR_UNUSED, bool initialized)
3335 if (!initialized)
3336 return;
3338 objPoolDelete (TokenPool);
3341 static void findJsTags (void)
3343 tokenInfo *const token = newToken ();
3345 NextToken = NULL;
3346 LastTokenType = TOKEN_UNDEFINED;
3348 parseJsFile (token);
3350 deleteToken (token);
3352 #ifdef HAVE_ICONV
3353 if (JSUnicodeConverter != (iconv_t) -2 && /* not created */
3354 JSUnicodeConverter != (iconv_t) -1 /* creation failed */)
3356 iconv_close (JSUnicodeConverter);
3357 JSUnicodeConverter = (iconv_t) -2;
3359 #endif
3361 Assert (NextToken == NULL);
3364 /* Create parser definition structure */
3365 extern parserDefinition* JavaScriptParser (void)
3367 // .jsx files are JSX: https://facebook.github.io/jsx/
3368 // which have JS function definitions, so we just use the JS parser
3369 static const char *const extensions [] = { "js", "jsx", "mjs", NULL };
3370 static const char *const aliases [] = { "js", "node", "nodejs",
3371 "seed", "gjs",
3372 /* Used in PostgreSQL
3373 * https://github.com/plv8/plv8 */
3374 "v8",
3375 NULL };
3376 parserDefinition *const def = parserNew ("JavaScript");
3377 def->extensions = extensions;
3378 def->aliases = aliases;
3380 * New definitions for parsing instead of regex
3382 def->kindTable = JsKinds;
3383 def->kindCount = ARRAY_SIZE (JsKinds);
3384 def->parser = findJsTags;
3385 def->initialize = initialize;
3386 def->finalize = finalize;
3387 def->keywordTable = JsKeywordTable;
3388 def->keywordCount = ARRAY_SIZE (JsKeywordTable);
3389 def->useCork = CORK_QUEUE|CORK_SYMTAB;
3390 def->requestAutomaticFQTag = true;
3392 def->versionCurrent = 1;
3393 def->versionAge = 1;
3395 return def;