Remove unused variable
[geany-mirror.git] / ctags / parsers / jscript.c
blobdf572df1cfcafc50c4351f62699b5a9b612a186b
1 /*
2 * Copyright (c) 2003, Darren Hiebert
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
7 * This module contains functions for generating tags for JavaScript language
8 * files.
10 * Reference: http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf
12 * This is a good reference for different forms of the function statement:
13 * http://www.permadi.com/tutorial/jsFunc/
14 * Another good reference:
15 * http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide
19 * INCLUDE FILES
21 #include "general.h" /* must always come first */
22 #include <ctype.h> /* to define isalpha () */
23 #ifdef DEBUG
24 #include <stdio.h>
25 #endif
27 #ifdef HAVE_ICONV
28 #include <iconv.h>
29 #include <errno.h>
30 # ifdef WORDS_BIGENDIAN
31 # define INTERNAL_ENCODING "UTF-32BE"
32 # else
33 # define INTERNAL_ENCODING "UTF-32LE"
34 # endif /* WORDS_BIGENDIAN */
35 #endif
37 #include <string.h>
38 #include "debug.h"
39 #include "entry.h"
40 #include "keyword.h"
41 #include "parse.h"
42 #include "read.h"
43 #include "routines.h"
44 #include "vstring.h"
45 #include "objpool.h"
46 #include "options.h"
47 #include "mbcs.h"
48 #include "trace.h"
49 #include "strlist.h"
52 * MACROS
54 #define isType(token,t) (bool) ((token)->type == (t))
55 #define isKeyword(token,k) (bool) ((token)->keyword == (k))
56 #define isIdentChar(c) \
57 (isalpha (c) || isdigit (c) || (c) == '$' || \
58 (c) == '@' || (c) == '_' || (c) == '#' || \
59 (c) >= 0x80)
60 #define newToken() (objPoolGet (TokenPool))
61 #define deleteToken(t) (objPoolPut (TokenPool, (t)))
64 * DATA DECLARATIONS
68 * Tracks class and function names already created
70 static stringList *ClassNames;
71 static stringList *FunctionNames;
73 /* Used to specify type of keyword.
75 enum eKeywordId {
76 KEYWORD_function,
77 KEYWORD_capital_function,
78 KEYWORD_capital_object,
79 KEYWORD_prototype,
80 KEYWORD_var,
81 KEYWORD_let,
82 KEYWORD_const,
83 KEYWORD_new,
84 KEYWORD_this,
85 KEYWORD_for,
86 KEYWORD_while,
87 KEYWORD_do,
88 KEYWORD_if,
89 KEYWORD_else,
90 KEYWORD_switch,
91 KEYWORD_try,
92 KEYWORD_catch,
93 KEYWORD_finally,
94 KEYWORD_sap,
95 KEYWORD_return,
96 KEYWORD_class,
97 KEYWORD_extends,
98 KEYWORD_static,
99 KEYWORD_default,
100 KEYWORD_export,
101 KEYWORD_async,
102 KEYWORD_get,
103 KEYWORD_set,
105 typedef int keywordId; /* to allow KEYWORD_NONE */
107 typedef enum eTokenType {
108 TOKEN_UNDEFINED,
109 TOKEN_EOF,
110 TOKEN_CHARACTER,
111 TOKEN_CLOSE_PAREN,
112 TOKEN_SEMICOLON,
113 TOKEN_COLON,
114 TOKEN_COMMA,
115 TOKEN_KEYWORD,
116 TOKEN_OPEN_PAREN,
117 TOKEN_IDENTIFIER,
118 TOKEN_STRING,
119 TOKEN_TEMPLATE_STRING,
120 TOKEN_PERIOD,
121 TOKEN_OPEN_CURLY,
122 TOKEN_CLOSE_CURLY,
123 TOKEN_EQUAL_SIGN,
124 TOKEN_OPEN_SQUARE,
125 TOKEN_CLOSE_SQUARE,
126 TOKEN_REGEXP,
127 TOKEN_POSTFIX_OPERATOR,
128 TOKEN_STAR,
129 /* To handle Babel's decorators.
130 * Used only in readTokenFull or lower functions. */
131 TOKEN_ATMARK,
132 TOKEN_BINARY_OPERATOR
133 } tokenType;
135 typedef struct sTokenInfo {
136 tokenType type;
137 keywordId keyword;
138 vString * string;
139 vString * scope;
140 unsigned long lineNumber;
141 MIOPos filePosition;
142 int nestLevel;
143 bool dynamicProp;
144 } tokenInfo;
147 * DATA DEFINITIONS
150 static tokenType LastTokenType;
151 static tokenInfo *NextToken;
153 static langType Lang_js;
155 static objPool *TokenPool = NULL;
157 #ifdef HAVE_ICONV
158 static iconv_t JSUnicodeConverter = (iconv_t) -2;
159 #endif
161 typedef enum {
162 JSTAG_FUNCTION,
163 JSTAG_CLASS,
164 JSTAG_METHOD,
165 JSTAG_PROPERTY,
166 JSTAG_CONSTANT,
167 JSTAG_VARIABLE,
168 JSTAG_GENERATOR,
169 JSTAG_GETTER,
170 JSTAG_SETTER,
171 JSTAG_FIELD,
172 JSTAG_COUNT
173 } jsKind;
175 static kindDefinition JsKinds [] = {
176 { true, 'f', "function", "functions" },
177 { true, 'c', "class", "classes" },
178 { true, 'm', "method", "methods" },
179 { true, 'p', "property", "properties" },
180 { true, 'C', "constant", "constants" },
181 { true, 'v', "variable", "global variables" },
182 { true, 'g', "generator", "generators" },
183 { true, 'G', "getter", "getters" },
184 { true, 'S', "setter", "setters" },
185 { true, 'M', "field", "fields" },
188 static const keywordTable JsKeywordTable [] = {
189 /* keyword keyword ID */
190 { "function", KEYWORD_function },
191 { "Function", KEYWORD_capital_function },
192 { "Object", KEYWORD_capital_object },
193 { "prototype", KEYWORD_prototype },
194 { "var", KEYWORD_var },
195 { "let", KEYWORD_let },
196 { "const", KEYWORD_const },
197 { "new", KEYWORD_new },
198 { "this", KEYWORD_this },
199 { "for", KEYWORD_for },
200 { "while", KEYWORD_while },
201 { "do", KEYWORD_do },
202 { "if", KEYWORD_if },
203 { "else", KEYWORD_else },
204 { "switch", KEYWORD_switch },
205 { "try", KEYWORD_try },
206 { "catch", KEYWORD_catch },
207 { "finally", KEYWORD_finally },
208 { "sap", KEYWORD_sap },
209 { "return", KEYWORD_return },
210 { "class", KEYWORD_class },
211 { "extends", KEYWORD_extends },
212 { "static", KEYWORD_static },
213 { "default", KEYWORD_default },
214 { "export", KEYWORD_export },
215 { "async", KEYWORD_async },
216 { "get", KEYWORD_get },
217 { "set", KEYWORD_set },
221 * FUNCTION DEFINITIONS
224 /* Recursive functions */
225 static void readTokenFull (tokenInfo *const token, bool include_newlines, vString *const repr);
226 static void skipArgumentList (tokenInfo *const token, bool include_newlines, vString *const repr);
227 static void parseFunction (tokenInfo *const token);
228 static bool parseBlock (tokenInfo *const token, const vString *const parentScope);
229 static bool parseLine (tokenInfo *const token, bool is_inside_class);
230 static void parseUI5 (tokenInfo *const token);
232 #ifdef DO_TRACING
233 //static void dumpToken (const tokenInfo *const token);
234 static const char *tokenTypeName(enum eTokenType e);
235 //static const char *keywordName(enum eKeywordId e);
236 #endif
238 static void *newPoolToken (void *createArg CTAGS_ATTR_UNUSED)
240 tokenInfo *token = xMalloc (1, tokenInfo);
242 token->string = vStringNew ();
243 token->scope = vStringNew ();
245 return token;
248 static void clearPoolToken (void *data)
250 tokenInfo *token = data;
252 token->type = TOKEN_UNDEFINED;
253 token->keyword = KEYWORD_NONE;
254 token->nestLevel = 0;
255 token->dynamicProp = false;
256 token->lineNumber = getInputLineNumber ();
257 token->filePosition = getInputFilePosition ();
258 vStringClear (token->string);
259 vStringClear (token->scope);
262 static void deletePoolToken (void *data)
264 tokenInfo *token = data;
265 vStringDelete (token->string);
266 vStringDelete (token->scope);
267 eFree (token);
270 static void copyToken (tokenInfo *const dest, const tokenInfo *const src,
271 bool const include_non_read_info)
273 dest->lineNumber = src->lineNumber;
274 dest->filePosition = src->filePosition;
275 dest->type = src->type;
276 dest->keyword = src->keyword;
277 dest->dynamicProp = src->dynamicProp;
278 vStringCopy(dest->string, src->string);
279 if (include_non_read_info)
281 dest->nestLevel = src->nestLevel;
282 vStringCopy(dest->scope, src->scope);
286 static void injectDynamicName (tokenInfo *const token, vString *newName)
288 token->dynamicProp = true;
289 vStringDelete (token->string);
290 token->string = newName;
294 * Tag generation functions
297 static void makeJsTagCommon (const tokenInfo *const token, const jsKind kind,
298 vString *const signature, vString *const inheritance,
299 bool anonymous)
301 if (JsKinds [kind].enabled )
303 const char *name = vStringValue (token->string);
304 vString *fullscope = vStringNewCopy (token->scope);
305 const char *p;
306 tagEntryInfo e;
308 if (!token->dynamicProp && kind != JSTAG_PROPERTY && (p = strrchr (name, '.')) != NULL )
310 if (vStringLength (fullscope) > 0)
311 vStringPut (fullscope, '.');
312 vStringNCatS (fullscope, name, (size_t) (p - name));
313 name = p + 1;
316 initTagEntry (&e, name, kind);
318 TRACE_PRINT("Emitting tag for symbol '%s' of kind %02x with scope '%s'",name,kind,vStringValue(fullscope));
320 e.lineNumber = token->lineNumber;
321 e.filePosition = token->filePosition;
323 if ( vStringLength(fullscope) > 0 )
325 /* FIXME: proper parent type */
326 jsKind parent_kind = JSTAG_CLASS;
329 * If we're creating a function (and not a method),
330 * guess we're inside another function
332 if (kind == JSTAG_FUNCTION)
333 parent_kind = JSTAG_FUNCTION;
335 e.extensionFields.scopeKindIndex = parent_kind;
336 e.extensionFields.scopeName = vStringValue (fullscope);
339 if (signature && vStringLength(signature))
341 size_t i;
342 /* sanitize signature by replacing all control characters with a
343 * space (because it's simple).
344 * there should never be any junk in a valid signature, but who
345 * knows what the user wrote and CTags doesn't cope well with weird
346 * characters. */
347 for (i = 0; i < signature->length; i++)
349 unsigned char c = (unsigned char) vStringChar (signature, i);
350 if (c < 0x20 /* below space */ || c == 0x7F /* DEL */)
351 vStringChar (signature, i) = ' ';
353 e.extensionFields.signature = vStringValue(signature);
356 if (inheritance)
357 e.extensionFields.inheritance = vStringValue(inheritance);
359 if (anonymous)
360 markTagExtraBit (&e, XTAG_ANONYMOUS);
362 makeTagEntry (&e);
363 vStringDelete (fullscope);
367 static void makeJsTag (const tokenInfo *const token, const jsKind kind,
368 vString *const signature, vString *const inheritance)
370 makeJsTagCommon (token, kind, signature, inheritance, false);
373 static void makeClassTagCommon (tokenInfo *const token, vString *const signature,
374 vString *const inheritance, bool anonymous)
379 vString * fulltag = vStringNew ();
380 if (vStringLength (token->scope) > 0)
382 vStringCopy(fulltag, token->scope);
383 vStringPut (fulltag, '.');
384 vStringCat (fulltag, token->string);
386 else
388 vStringCopy(fulltag, token->string);
390 if ( ! stringListHas(ClassNames, vStringValue (fulltag)) )
392 stringListAdd (ClassNames, vStringNewCopy (fulltag));
393 makeJsTagCommon (token, JSTAG_CLASS, signature, inheritance,
394 anonymous);
396 vStringDelete (fulltag);
400 static void makeClassTag (tokenInfo *const token, vString *const signature,
401 vString *const inheritance)
403 makeClassTagCommon (token, signature, inheritance, false);
406 static void makeFunctionTagCommon (tokenInfo *const token, vString *const signature, bool generator,
407 bool anonymous)
410 vString * fulltag = vStringNew ();
411 if (vStringLength (token->scope) > 0)
413 vStringCopy(fulltag, token->scope);
414 vStringPut (fulltag, '.');
415 vStringCat (fulltag, token->string);
417 else
419 vStringCopy(fulltag, token->string);
421 if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) )
423 stringListAdd (FunctionNames, vStringNewCopy (fulltag));
424 makeJsTagCommon (token, generator ? JSTAG_GENERATOR : JSTAG_FUNCTION, signature, NULL,
425 anonymous);
427 vStringDelete (fulltag);
431 static void makeFunctionTag (tokenInfo *const token, vString *const signature, bool generator)
433 makeFunctionTagCommon (token, signature, generator, false);
437 * Parsing functions
440 /* given @p point, returns the first byte of the encoded output sequence, and
441 * make sure the next ones will be returned by calls to getcFromInputFile()
442 * as if the code point was simply written in the input file. */
443 static int handleUnicodeCodePoint (uint32_t point)
445 int c = (int) point;
447 Assert (point < 0x110000);
449 #ifdef HAVE_ICONV
450 /* if we do have iconv and the encodings are specified, use this */
451 if (isConverting () && JSUnicodeConverter == (iconv_t) -2)
453 /* if we didn't try creating the converter yet, try and do so */
454 JSUnicodeConverter = iconv_open (getLanguageEncoding (Lang_js), INTERNAL_ENCODING);
456 if (isConverting () && JSUnicodeConverter != (iconv_t) -1)
458 char *input_ptr = (char *) &point;
459 size_t input_left = sizeof point;
460 /* 4 bytes should be enough for any encoding (it's how much UTF-32
461 * would need). */
462 /* FIXME: actually iconv has a tendency to output a BOM for Unicode
463 * encodings where it matters when the endianness is not specified in
464 * the target encoding name. E.g., if the target encoding is "UTF-32"
465 * or "UTF-16" it will output 2 code points, the BOM (U+FEFF) and the
466 * one we expect. This does not happen if the endianness is specified
467 * explicitly, e.g. with "UTF-32LE", or "UTF-16BE".
468 * However, it's not very relevant for the moment as nothing in CTags
469 * cope well (if at all) with non-ASCII-compatible encodings like
470 * UTF-32 or UTF-16 anyway. */
471 char output[4] = { 0 };
472 char *output_ptr = output;
473 size_t output_left = ARRAY_SIZE (output);
475 if (iconv (JSUnicodeConverter, &input_ptr, &input_left, &output_ptr, &output_left) == (size_t) -1)
477 /* something went wrong, which probably means the output encoding
478 * cannot represent the character. Use a placeholder likely to be
479 * supported instead, that's also valid in an identifier */
480 verbose ("JavaScript: Encoding: %s\n", strerror (errno));
481 c = '_';
483 else
485 const size_t output_len = ARRAY_SIZE (output) - output_left;
487 /* put all but the first byte back so that getcFromInputFile() will
488 * return them in the right order */
489 for (unsigned int i = 1; i < output_len; i++)
490 ungetcToInputFile ((unsigned char) output[output_len - i]);
491 c = (unsigned char) output[0];
494 iconv (JSUnicodeConverter, NULL, NULL, NULL, NULL);
496 else
497 #endif
499 /* when no encoding is specified (or no iconv), assume UTF-8 is good.
500 * Why UTF-8? Because it's an ASCII-compatible common Unicode encoding. */
501 if (point < 0x80)
502 c = (unsigned char) point;
503 else if (point < 0x800)
505 c = (unsigned char) (0xc0 | ((point >> 6) & 0x1f));
506 ungetcToInputFile ((unsigned char) (0x80 | (point & 0x3f)));
508 else if (point < 0x10000)
510 c = (unsigned char) (0xe0 | ((point >> 12) & 0x0f));
511 ungetcToInputFile ((unsigned char) (0x80 | ((point >> 0) & 0x3f)));
512 ungetcToInputFile ((unsigned char) (0x80 | ((point >> 6) & 0x3f)));
514 else if (point < 0x110000)
516 c = (unsigned char) (0xf0 | ((point >> 18) & 0x07));
517 ungetcToInputFile ((unsigned char) (0x80 | ((point >> 0) & 0x3f)));
518 ungetcToInputFile ((unsigned char) (0x80 | ((point >> 6) & 0x3f)));
519 ungetcToInputFile ((unsigned char) (0x80 | ((point >> 12) & 0x3f)));
523 return c;
526 /* reads a Unicode escape sequence after the "\" prefix.
527 * @param value Location to store the escape sequence value.
528 * @param isUTF16 Location to store whether @param value is an UTF-16 word.
529 * @returns Whether a valid sequence was read. */
530 static bool readUnicodeEscapeSequenceValue (uint32_t *const value,
531 bool *const isUTF16)
533 bool valid = false;
534 int d = getcFromInputFile ();
536 if (d != 'u')
537 ungetcToInputFile (d);
538 else
540 int e = getcFromInputFile ();
541 char cp[6 + 1]; /* up to 6 hex + possible closing '}' or invalid char */
542 unsigned int cp_len = 0;
544 *isUTF16 = (e != '{');
545 if (e == '{')
546 { /* Handles Unicode code point escapes: \u{ HexDigits }
547 * We skip the leading 0s because there can be any number of them
548 * and they don't change any meaning. */
549 bool has_leading_zero = false;
551 while ((cp[cp_len] = (char) getcFromInputFile ()) == '0')
552 has_leading_zero = true;
554 while (isxdigit (cp[cp_len]) && ++cp_len < ARRAY_SIZE (cp))
555 cp[cp_len] = (char) getcFromInputFile ();
556 valid = ((cp_len > 0 || has_leading_zero) &&
557 cp_len < ARRAY_SIZE (cp) && cp[cp_len] == '}' &&
558 /* also check if it's a valid Unicode code point */
559 (cp_len < 6 ||
560 (cp_len == 6 && strncmp (cp, "110000", 6) < 0)));
561 if (! valid) /* put back the last (likely invalid) character */
562 ungetcToInputFile (cp[cp_len]);
564 else
565 { /* Handles Unicode escape sequences: \u Hex4Digits */
567 cp[cp_len] = (char) ((cp_len == 0) ? e : getcFromInputFile ());
568 while (isxdigit (cp[cp_len]) && ++cp_len < 4);
569 valid = (cp_len == 4);
572 if (! valid)
574 /* we don't get every character back, but it would require to
575 * be able to put up to 9 characters back (in the worst case
576 * for handling invalid \u{10FFFFx}), and here we're recovering
577 * from invalid syntax anyway. */
578 ungetcToInputFile (e);
579 ungetcToInputFile (d);
581 else
583 *value = 0;
584 for (unsigned int i = 0; i < cp_len; i++)
586 *value *= 16;
588 /* we know it's a hex digit, no need to double check */
589 if (cp[i] < 'A')
590 *value += (unsigned int) cp[i] - '0';
591 else if (cp[i] < 'a')
592 *value += 10 + (unsigned int) cp[i] - 'A';
593 else
594 *value += 10 + (unsigned int) cp[i] - 'a';
599 return valid;
602 static int valueToXDigit (unsigned char v)
604 Assert (v <= 0xF);
606 if (v >= 0xA)
607 return 'A' + (v - 0xA);
608 else
609 return '0' + v;
612 /* Reads and expands a Unicode escape sequence after the "\" prefix. If the
613 * escape sequence is a UTF16 high surrogate, also try and read the low
614 * surrogate to emit the proper code point.
615 * @param fallback The character to return if the sequence is invalid. Usually
616 * this would be the '\' character starting the sequence.
617 * @returns The first byte of the sequence, or @param fallback if the sequence
618 * is invalid. On success, next calls to getcFromInputFile() will
619 * return subsequent bytes (if any). */
620 static int readUnicodeEscapeSequence (const int fallback)
622 int c;
623 uint32_t value;
624 bool isUTF16;
626 if (! readUnicodeEscapeSequenceValue (&value, &isUTF16))
627 c = fallback;
628 else
630 if (isUTF16 && (value & 0xfc00) == 0xd800)
631 { /* this is a high surrogate, try and read its low surrogate and
632 * emit the resulting code point */
633 uint32_t low;
634 int d = getcFromInputFile ();
636 if (d != '\\' || ! readUnicodeEscapeSequenceValue (&low, &isUTF16))
637 ungetcToInputFile (d);
638 else if (! isUTF16)
639 { /* not UTF-16 low surrogate but a plain code point */
640 d = handleUnicodeCodePoint (low);
641 ungetcToInputFile (d);
643 else if ((low & 0xfc00) != 0xdc00)
644 { /* not a low surrogate, so put back the escaped representation
645 * in case it was another high surrogate we should read as part
646 * of another pair. */
647 ungetcToInputFile (valueToXDigit ((unsigned char) ((low & 0x000f) >> 0)));
648 ungetcToInputFile (valueToXDigit ((unsigned char) ((low & 0x00f0) >> 4)));
649 ungetcToInputFile (valueToXDigit ((unsigned char) ((low & 0x0f00) >> 8)));
650 ungetcToInputFile (valueToXDigit ((unsigned char) ((low & 0xf000) >> 12)));
651 ungetcToInputFile ('u');
652 ungetcToInputFile ('\\');
654 else
655 value = 0x010000 + ((value & 0x03ff) << 10) + (low & 0x03ff);
657 c = handleUnicodeCodePoint (value);
660 return c;
663 static void parseString (vString *const string, const int delimiter)
665 bool end = false;
666 while (! end)
668 int c = getcFromInputFile ();
669 if (c == EOF)
670 end = true;
671 else if (c == '\\')
673 /* Eat the escape sequence (\", \', etc). We properly handle
674 * <LineContinuation> by eating a whole \<CR><LF> not to see <LF>
675 * as an unescaped character, which is invalid and handled below.
676 * Also, handle the fact that <LineContinuation> produces an empty
677 * sequence.
678 * See ECMA-262 7.8.4 */
679 c = getcFromInputFile ();
680 if (c == 'u')
682 ungetcToInputFile (c);
683 c = readUnicodeEscapeSequence ('\\');
684 vStringPut (string, c);
686 else if (c != '\r' && c != '\n')
687 vStringPut(string, c);
688 else if (c == '\r')
690 c = getcFromInputFile();
691 if (c != '\n')
692 ungetcToInputFile (c);
695 else if (c == delimiter)
696 end = true;
697 else if (c == '\r' || c == '\n')
699 /* those are invalid when not escaped */
700 end = true;
701 /* we don't want to eat the newline itself to let the automatic
702 * semicolon insertion code kick in */
703 ungetcToInputFile (c);
705 else
706 vStringPut (string, c);
710 static void parseRegExp (void)
712 int c;
713 bool in_range = false;
717 c = getcFromInputFile ();
718 if (! in_range && c == '/')
720 do /* skip flags */
722 c = getcFromInputFile ();
723 } while (isalpha (c));
724 ungetcToInputFile (c);
725 break;
727 else if (c == '\n' || c == '\r')
729 /* invalid in a regex */
730 ungetcToInputFile (c);
731 break;
733 else if (c == '\\')
734 c = getcFromInputFile (); /* skip next character */
735 else if (c == '[')
736 in_range = true;
737 else if (c == ']')
738 in_range = false;
739 } while (c != EOF);
742 /* Read a C identifier beginning with "firstChar" and places it into
743 * "name".
745 static void parseIdentifier (vString *const string, const int firstChar)
747 int c = firstChar;
748 Assert (isIdentChar (c));
751 vStringPut (string, c);
752 c = getcFromInputFile ();
753 if (c == '\\')
754 c = readUnicodeEscapeSequence (c);
755 } while (isIdentChar (c));
756 /* if readUnicodeEscapeSequence() read an escape sequence this is incorrect,
757 * as we should actually put back the whole escape sequence and not the
758 * decoded character. However, it's not really worth the hassle as it can
759 * only happen if the input has an invalid escape sequence. */
760 ungetcToInputFile (c); /* unget non-identifier character */
763 static void parseTemplateString (vString *const string)
765 int c;
768 c = getcFromInputFile ();
769 if (c == '`' || c == EOF)
770 break;
772 vStringPut (string, c);
774 if (c == '\\')
776 c = getcFromInputFile();
777 if (c != EOF)
778 vStringPut(string, c);
780 else if (c == '$')
782 c = getcFromInputFile ();
783 if (c != '{')
784 ungetcToInputFile (c);
785 else
787 int depth = 1;
788 /* we need to use the real token machinery to handle strings,
789 * comments, regexes and whatnot */
790 tokenInfo *token = newToken ();
791 LastTokenType = TOKEN_UNDEFINED;
792 vStringPut(string, c);
795 readTokenFull (token, false, string);
796 if (isType (token, TOKEN_OPEN_CURLY))
797 depth++;
798 else if (isType (token, TOKEN_CLOSE_CURLY))
799 depth--;
801 while (! isType (token, TOKEN_EOF) && depth > 0);
802 deleteToken (token);
806 while (c != EOF);
809 static void readTokenFullRaw (tokenInfo *const token, bool include_newlines, vString *const repr)
811 int c;
812 int i;
813 bool newline_encountered = false;
815 /* if we've got a token held back, emit it */
816 if (NextToken)
818 copyToken (token, NextToken, false);
819 deleteToken (NextToken);
820 NextToken = NULL;
821 return;
824 token->type = TOKEN_UNDEFINED;
825 token->keyword = KEYWORD_NONE;
826 vStringClear (token->string);
828 getNextChar:
829 i = 0;
832 c = getcFromInputFile ();
833 if (include_newlines && (c == '\r' || c == '\n'))
834 newline_encountered = true;
835 i++;
837 while (c == '\t' || c == ' ' || c == '\r' || c == '\n');
839 token->lineNumber = getInputLineNumber ();
840 token->filePosition = getInputFilePosition ();
842 if (repr && c != EOF)
844 if (i > 1)
845 vStringPut (repr, ' ');
846 vStringPut (repr, c);
849 switch (c)
851 case EOF: token->type = TOKEN_EOF; break;
852 case '(': token->type = TOKEN_OPEN_PAREN; break;
853 case ')': token->type = TOKEN_CLOSE_PAREN; break;
854 case ';': token->type = TOKEN_SEMICOLON; break;
855 case ',': token->type = TOKEN_COMMA; break;
856 case '.': token->type = TOKEN_PERIOD; break;
857 case ':': token->type = TOKEN_COLON; break;
858 case '{': token->type = TOKEN_OPEN_CURLY; break;
859 case '}': token->type = TOKEN_CLOSE_CURLY; break;
860 case '=': token->type = TOKEN_EQUAL_SIGN; break;
861 case '[': token->type = TOKEN_OPEN_SQUARE; break;
862 case ']': token->type = TOKEN_CLOSE_SQUARE; break;
864 case '+':
865 case '-':
867 int d = getcFromInputFile ();
868 if (d == c) /* ++ or -- */
869 token->type = TOKEN_POSTFIX_OPERATOR;
870 else
872 ungetcToInputFile (d);
873 token->type = TOKEN_BINARY_OPERATOR;
875 break;
878 case '*':
879 token->type = TOKEN_STAR;
880 break;
881 case '%':
882 case '?':
883 case '>':
884 case '<':
885 case '^':
886 case '|':
887 case '&':
888 token->type = TOKEN_BINARY_OPERATOR;
889 break;
891 case '\'':
892 case '"':
893 token->type = TOKEN_STRING;
894 parseString (token->string, c);
895 token->lineNumber = getInputLineNumber ();
896 token->filePosition = getInputFilePosition ();
897 if (repr)
899 vStringCat (repr, token->string);
900 vStringPut (repr, c);
902 break;
904 case '`':
905 token->type = TOKEN_TEMPLATE_STRING;
906 parseTemplateString (token->string);
907 token->lineNumber = getInputLineNumber ();
908 token->filePosition = getInputFilePosition ();
909 if (repr)
911 vStringCat (repr, token->string);
912 vStringPut (repr, c);
914 break;
916 case '/':
918 int d = getcFromInputFile ();
919 if ( (d != '*') && /* is this the start of a comment? */
920 (d != '/') ) /* is a one line comment? */
922 ungetcToInputFile (d);
923 switch (LastTokenType)
925 case TOKEN_CHARACTER:
926 case TOKEN_IDENTIFIER:
927 case TOKEN_STRING:
928 case TOKEN_TEMPLATE_STRING:
929 case TOKEN_CLOSE_CURLY:
930 case TOKEN_CLOSE_PAREN:
931 case TOKEN_CLOSE_SQUARE:
932 token->type = TOKEN_BINARY_OPERATOR;
933 break;
935 default:
936 token->type = TOKEN_REGEXP;
937 parseRegExp ();
938 token->lineNumber = getInputLineNumber ();
939 token->filePosition = getInputFilePosition ();
940 break;
943 else
945 if (repr) /* remove the / we added */
946 vStringChop(repr);
947 if (d == '*')
949 skipToCharacterInInputFile2('*', '/');
950 goto getNextChar;
952 else if (d == '/') /* is this the start of a comment? */
954 skipToCharacterInInputFile ('\n');
955 /* if we care about newlines, put it back so it is seen */
956 if (include_newlines)
957 ungetcToInputFile ('\n');
958 goto getNextChar;
961 break;
964 case '#':
965 /* skip shebang in case of e.g. Node.js scripts */
966 if (token->lineNumber > 1)
967 token->type = TOKEN_UNDEFINED;
968 else if ((c = getcFromInputFile ()) != '!')
970 ungetcToInputFile (c);
971 token->type = TOKEN_UNDEFINED;
973 else
975 skipToCharacterInInputFile ('\n');
976 goto getNextChar;
978 break;
980 case '@':
981 token->type = TOKEN_ATMARK;
982 break;
984 case '\\':
985 c = readUnicodeEscapeSequence (c);
986 /* fallthrough */
987 default:
988 if (! isIdentChar (c))
989 token->type = TOKEN_UNDEFINED;
990 else
992 parseIdentifier (token->string, c);
993 token->lineNumber = getInputLineNumber ();
994 token->filePosition = getInputFilePosition ();
995 token->keyword = lookupKeyword (vStringValue (token->string), Lang_js);
996 if (isKeyword (token, KEYWORD_NONE))
997 token->type = TOKEN_IDENTIFIER;
998 else
999 token->type = TOKEN_KEYWORD;
1000 if (repr && vStringLength (token->string) > 1)
1001 vStringCatS (repr, vStringValue (token->string) + 1);
1003 break;
1006 if (include_newlines && newline_encountered)
1008 /* This isn't strictly correct per the standard, but following the
1009 * real rules means understanding all statements, and that's not
1010 * what the parser currently does. What we do here is a guess, by
1011 * avoiding inserting semicolons that would make the statement on
1012 * the left or right obviously invalid. Hopefully this should not
1013 * have false negatives (e.g. should not miss insertion of a semicolon)
1014 * but might have false positives (e.g. it will wrongfully emit a
1015 * semicolon sometimes, i.e. for the newline in "foo\n(bar)").
1016 * This should however be mostly harmless as we only deal with
1017 * newlines in specific situations where we know a false positive
1018 * wouldn't hurt too bad. */
1020 /* these already end a statement, so no need to duplicate it */
1021 #define IS_STMT_SEPARATOR(t) ((t) == TOKEN_SEMICOLON || \
1022 (t) == TOKEN_EOF || \
1023 (t) == TOKEN_COMMA || \
1024 (t) == TOKEN_OPEN_CURLY)
1025 /* these cannot be the start or end of a statement */
1026 #define IS_BINARY_OPERATOR(t) ((t) == TOKEN_EQUAL_SIGN || \
1027 (t) == TOKEN_COLON || \
1028 (t) == TOKEN_PERIOD || \
1029 (t) == TOKEN_STAR || \
1030 (t) == TOKEN_BINARY_OPERATOR)
1032 if (! IS_STMT_SEPARATOR(LastTokenType) &&
1033 ! IS_STMT_SEPARATOR(token->type) &&
1034 ! IS_BINARY_OPERATOR(LastTokenType) &&
1035 ! IS_BINARY_OPERATOR(token->type) &&
1036 /* these cannot be followed by a semicolon */
1037 ! (LastTokenType == TOKEN_OPEN_PAREN ||
1038 LastTokenType == TOKEN_OPEN_SQUARE))
1040 /* hold the token... */
1041 Assert (NextToken == NULL);
1042 NextToken = newToken ();
1043 copyToken (NextToken, token, false);
1045 /* ...and emit a semicolon instead */
1046 token->type = TOKEN_SEMICOLON;
1047 token->keyword = KEYWORD_NONE;
1048 vStringClear (token->string);
1049 if (repr)
1050 vStringPut (token->string, '\n');
1053 #undef IS_STMT_SEPARATOR
1054 #undef IS_BINARY_OPERATOR
1057 LastTokenType = token->type;
1060 /* See https://babeljs.io/blog/2018/09/17/decorators */
1061 static void skipBabelDecorator (tokenInfo *token, bool include_newlines, vString *const repr)
1063 readTokenFullRaw (token, include_newlines, repr);
1064 if (isType (token, TOKEN_OPEN_PAREN))
1066 /* @(complex ? dec1 : dec2) */
1067 skipArgumentList (token, include_newlines, repr);
1068 TRACE_PRINT ("found @(...) style decorator");
1070 else if (isType (token, TOKEN_IDENTIFIER))
1072 /* @namespace.foo (...) */
1073 bool found_period = false;
1074 while (1)
1076 readTokenFullRaw (token, include_newlines, repr);
1077 if (isType (token, TOKEN_IDENTIFIER))
1079 if (!found_period)
1081 TRACE_PRINT("found @namespace.bar style decorator");
1082 break;
1084 found_period = false;
1086 else if (isType (token, TOKEN_PERIOD))
1087 found_period = true;
1088 else if (isType (token, TOKEN_OPEN_PAREN))
1090 skipArgumentList (token, include_newlines, repr);
1091 TRACE_PRINT("found @foo(...) style decorator");
1092 break;
1094 else
1096 TRACE_PRINT("found @foo style decorator");
1097 break;
1101 else
1102 /* Unexpected token after @ */
1103 TRACE_PRINT("found unexpected token during skipping a decorator");
1106 static void readTokenFull (tokenInfo *const token, bool include_newlines, vString *const repr)
1108 readTokenFullRaw (token, include_newlines, repr);
1110 while (1)
1112 if (!isType (token, TOKEN_ATMARK))
1113 break;
1114 skipBabelDecorator (token, include_newlines, repr);
1115 /* @decorator0 @decorator1 ... There can be more than one decorator. */
1119 #ifdef JSCRIPT_DO_DEBUGGING
1120 /* trace readTokenFull() */
1121 static void readTokenFullDebug (tokenInfo *const token, bool include_newlines, vString *const repr)
1123 readTokenFull (token, include_newlines, repr);
1124 TRACE_PRINT("token '%s' of type %02x with scope '%s'",vStringValue(token->string),token->type, vStringValue(token->scope));
1126 # define readTokenFull readTokenFullDebug
1127 #endif
1129 static void readToken (tokenInfo *const token)
1131 readTokenFull (token, false, NULL);
1135 * Token parsing functions
1138 static void skipArgumentList (tokenInfo *const token, bool include_newlines, vString *const repr)
1140 int nest_level = 0;
1142 if (isType (token, TOKEN_OPEN_PAREN)) /* arguments? */
1144 nest_level++;
1145 if (repr)
1146 vStringPut (repr, '(');
1147 while (nest_level > 0 && ! isType (token, TOKEN_EOF))
1149 readTokenFull (token, false, repr);
1150 if (isType (token, TOKEN_OPEN_PAREN))
1151 nest_level++;
1152 else if (isType (token, TOKEN_CLOSE_PAREN))
1153 nest_level--;
1154 else if (isKeyword (token, KEYWORD_function))
1155 parseFunction (token);
1157 readTokenFull (token, include_newlines, NULL);
1161 static void skipArrayList (tokenInfo *const token, bool include_newlines)
1163 int nest_level = 0;
1166 * Handle square brackets
1167 * var name[1]
1168 * So we must check for nested open and closing square brackets
1171 if (isType (token, TOKEN_OPEN_SQUARE)) /* arguments? */
1173 nest_level++;
1174 while (nest_level > 0 && ! isType (token, TOKEN_EOF))
1176 readToken (token);
1177 if (isType (token, TOKEN_OPEN_SQUARE))
1178 nest_level++;
1179 else if (isType (token, TOKEN_CLOSE_SQUARE))
1180 nest_level--;
1182 readTokenFull (token, include_newlines, NULL);
1186 static void skipQualifiedIdentifier (tokenInfo *const token)
1188 /* Skip foo.bar.baz */
1189 while (isType (token, TOKEN_IDENTIFIER))
1191 readToken (token);
1192 if (isType (token, TOKEN_PERIOD))
1193 readToken (token);
1194 else
1195 break;
1199 static void addContext (tokenInfo* const parent, const tokenInfo* const child)
1201 if (vStringLength (parent->string) > 0)
1203 vStringPut (parent->string, '.');
1205 vStringCat (parent->string, child->string);
1208 static void addToScope (tokenInfo* const token, const vString* const extra)
1210 if (vStringLength (token->scope) > 0)
1212 vStringPut (token->scope, '.');
1214 vStringCat (token->scope, extra);
1218 * Scanning functions
1221 static bool findCmdTerm (tokenInfo *const token, bool include_newlines,
1222 bool include_commas)
1225 * Read until we find either a semicolon or closing brace.
1226 * Any nested braces will be handled within.
1228 while (! isType (token, TOKEN_SEMICOLON) &&
1229 ! isType (token, TOKEN_CLOSE_CURLY) &&
1230 ! (include_commas && isType (token, TOKEN_COMMA)) &&
1231 ! isType (token, TOKEN_EOF))
1233 /* Handle nested blocks */
1234 if ( isType (token, TOKEN_OPEN_CURLY))
1236 parseBlock (token, NULL);
1237 readTokenFull (token, include_newlines, NULL);
1239 else if ( isType (token, TOKEN_OPEN_PAREN) )
1241 skipArgumentList(token, include_newlines, NULL);
1243 else if ( isType (token, TOKEN_OPEN_SQUARE) )
1245 skipArrayList(token, include_newlines);
1247 else
1249 readTokenFull (token, include_newlines, NULL);
1253 return isType (token, TOKEN_SEMICOLON);
1256 static void parseSwitch (tokenInfo *const token)
1259 * switch (expression) {
1260 * case value1:
1261 * statement;
1262 * break;
1263 * case value2:
1264 * statement;
1265 * break;
1266 * default : statement;
1270 readToken (token);
1272 if (isType (token, TOKEN_OPEN_PAREN))
1274 skipArgumentList(token, false, NULL);
1277 if (isType (token, TOKEN_OPEN_CURLY))
1279 parseBlock (token, NULL);
1283 static bool parseLoop (tokenInfo *const token)
1286 * Handles these statements
1287 * for (x=0; x<3; x++)
1288 * document.write("This text is repeated three times<br>");
1290 * for (x=0; x<3; x++)
1292 * document.write("This text is repeated three times<br>");
1295 * while (number<5){
1296 * document.write(number+"<br>");
1297 * number++;
1300 * do{
1301 * document.write(number+"<br>");
1302 * number++;
1304 * while (number<5);
1306 bool is_terminated = true;
1308 if (isKeyword (token, KEYWORD_for) || isKeyword (token, KEYWORD_while))
1310 readToken(token);
1312 if (isType (token, TOKEN_OPEN_PAREN))
1314 skipArgumentList(token, false, NULL);
1317 if (isType (token, TOKEN_OPEN_CURLY))
1319 parseBlock (token, NULL);
1321 else
1323 is_terminated = parseLine(token, false);
1326 else if (isKeyword (token, KEYWORD_do))
1328 readToken(token);
1330 if (isType (token, TOKEN_OPEN_CURLY))
1332 parseBlock (token, NULL);
1334 else
1336 is_terminated = parseLine(token, false);
1339 if (is_terminated)
1340 readToken(token);
1342 if (isKeyword (token, KEYWORD_while))
1344 readToken(token);
1346 if (isType (token, TOKEN_OPEN_PAREN))
1348 skipArgumentList(token, true, NULL);
1350 if (! isType (token, TOKEN_SEMICOLON))
1352 /* oddly enough, `do {} while (0) var foo = 42` is perfectly
1353 * valid JS, so explicitly handle the remaining of the line
1354 * for the sake of the root scope handling (as parseJsFile()
1355 * always advances a token not to ever get stuck) */
1356 is_terminated = parseLine(token, false);
1361 return is_terminated;
1364 static bool parseIf (tokenInfo *const token)
1366 bool read_next_token = true;
1368 * If statements have two forms
1369 * if ( ... )
1370 * one line;
1372 * if ( ... )
1373 * statement;
1374 * else
1375 * statement
1377 * if ( ... ) {
1378 * multiple;
1379 * statements;
1383 * if ( ... ) {
1384 * return elem
1387 * This example if correctly written, but the
1388 * else contains only 1 statement without a terminator
1389 * since the function finishes with the closing brace.
1391 * function a(flag){
1392 * if(flag)
1393 * test(1);
1394 * else
1395 * test(2)
1398 * TODO: Deal with statements that can optional end
1399 * without a semi-colon. Currently this messes up
1400 * the parsing of blocks.
1401 * Need to somehow detect this has happened, and either
1402 * backup a token, or skip reading the next token if
1403 * that is possible from all code locations.
1407 readToken (token);
1409 if (isKeyword (token, KEYWORD_if))
1412 * Check for an "else if" and consume the "if"
1414 readToken (token);
1417 if (isType (token, TOKEN_OPEN_PAREN))
1419 skipArgumentList(token, false, NULL);
1422 if (isType (token, TOKEN_OPEN_CURLY))
1424 parseBlock (token, NULL);
1426 else
1428 /* The next token should only be read if this statement had its own
1429 * terminator */
1430 read_next_token = findCmdTerm (token, true, false);
1432 return read_next_token;
1435 static void parseFunction (tokenInfo *const token)
1437 TRACE_ENTER();
1439 tokenInfo *const name = newToken ();
1440 vString *const signature = vStringNew ();
1441 bool is_class = false;
1442 bool is_generator = false;
1443 bool is_anonymous = false;
1445 * This deals with these formats
1446 * function validFunctionTwo(a,b) {}
1447 * function * generator(a,b) {}
1450 copyToken (name, token, true);
1451 readToken (name);
1452 if (isType (name, TOKEN_STAR))
1454 is_generator = true;
1455 readToken (name);
1457 if (isType (name, TOKEN_OPEN_PAREN))
1459 /* anonymous function */
1460 copyToken (token, name, false);
1461 anonGenerate (name->string, "AnonymousFunction", JSTAG_FUNCTION);
1462 is_anonymous = true;
1464 else if (!isType (name, TOKEN_IDENTIFIER))
1465 goto cleanUp;
1466 else
1467 readToken (token);
1469 while (isType (token, TOKEN_PERIOD))
1471 readToken (token);
1472 if (! isType(token, TOKEN_KEYWORD))
1474 addContext (name, token);
1475 readToken (token);
1479 if ( isType (token, TOKEN_OPEN_PAREN) )
1480 skipArgumentList(token, false, signature);
1482 if ( isType (token, TOKEN_OPEN_CURLY) )
1484 is_class = parseBlock (token, name->string);
1485 if ( is_class )
1486 makeClassTagCommon (name, signature, NULL, is_anonymous);
1487 else
1488 makeFunctionTagCommon (name, signature, is_generator, is_anonymous);
1491 findCmdTerm (token, false, false);
1493 cleanUp:
1494 vStringDelete (signature);
1495 deleteToken (name);
1497 TRACE_LEAVE();
1500 /* Parses a block surrounded by curly braces.
1501 * @p parentScope is the scope name for this block, or NULL for unnamed scopes */
1502 static bool parseBlock (tokenInfo *const token, const vString *const parentScope)
1504 TRACE_ENTER();
1506 bool is_class = false;
1507 bool read_next_token = true;
1508 vString * saveScope = vStringNew ();
1510 vStringCopy(saveScope, token->scope);
1511 if (parentScope)
1513 addToScope (token, parentScope);
1514 token->nestLevel++;
1518 * Make this routine a bit more forgiving.
1519 * If called on an open_curly advance it
1521 if (isType (token, TOKEN_OPEN_CURLY))
1522 readToken(token);
1524 if (! isType (token, TOKEN_CLOSE_CURLY))
1527 * Read until we find the closing brace,
1528 * any nested braces will be handled within
1532 read_next_token = true;
1533 if (isKeyword (token, KEYWORD_this))
1536 * Means we are inside a class and have found
1537 * a class, not a function
1539 is_class = true;
1542 * Ignore the remainder of the line
1543 * findCmdTerm(token);
1545 read_next_token = parseLine (token, is_class);
1547 else if (isKeyword (token, KEYWORD_var) ||
1548 isKeyword (token, KEYWORD_let) ||
1549 isKeyword (token, KEYWORD_const))
1552 * Potentially we have found an inner function.
1553 * Set something to indicate the scope
1555 read_next_token = parseLine (token, is_class);
1557 else if (isType (token, TOKEN_OPEN_CURLY))
1559 /* Handle nested blocks */
1560 parseBlock (token, NULL);
1562 else
1565 * It is possible for a line to have no terminator
1566 * if the following line is a closing brace.
1567 * parseLine will detect this case and indicate
1568 * whether we should read an additional token.
1570 read_next_token = parseLine (token, is_class);
1574 * Always read a new token unless we find a statement without
1575 * a ending terminator
1577 if( read_next_token )
1578 readToken(token);
1581 * If we find a statement without a terminator consider the
1582 * block finished, otherwise the stack will be off by one.
1584 } while (! isType (token, TOKEN_EOF) &&
1585 ! isType (token, TOKEN_CLOSE_CURLY) && read_next_token);
1588 vStringCopy(token->scope, saveScope);
1589 vStringDelete(saveScope);
1590 if (parentScope)
1591 token->nestLevel--;
1593 TRACE_LEAVE();
1595 return is_class;
1598 static bool parseMethods (tokenInfo *const token, const tokenInfo *const class,
1599 const bool is_es6_class)
1601 TRACE_ENTER_TEXT("token is '%s' of type %s in classToken '%s' of type %s (es6: %s)",
1602 vStringValue(token->string), tokenTypeName (token->type),
1603 vStringValue(class->string), tokenTypeName (class->type),
1604 is_es6_class? "yes": "no");
1606 tokenInfo *const name = newToken ();
1607 bool has_methods = false;
1608 vString *saveScope = vStringNew ();
1610 vStringCopy (saveScope, token->scope);
1611 addToScope (token, class->string);
1614 * This deals with these formats
1615 * validProperty : 2,
1616 * validMethod : function(a,b) {}
1617 * 'validMethod2' : function(a,b) {}
1618 * container.dirtyTab = {'url': false, 'title':false, 'snapshot':false, '*': false}
1619 * get prop() {}
1620 * set prop(val) {}
1622 * ES6 methods:
1623 * property(...) {}
1624 * *generator() {}
1626 * ES6 computed name:
1627 * [property]() {}
1628 * get [property]() {}
1629 * set [property]() {}
1630 * *[generator]() {}
1632 * tc39/proposal-class-fields
1633 * field0 = function(a,b) {}
1634 * field1 = 1
1635 * The parser extracts field0 as a method because the left value
1636 * is a function (kind propagation), and field1 as a field.
1639 bool dont_read = false;
1642 bool is_setter = false;
1643 bool is_getter = false;
1645 if (!dont_read)
1646 readToken (token);
1647 dont_read = false;
1649 if (isType (token, TOKEN_CLOSE_CURLY))
1651 goto cleanUp;
1654 if (isKeyword (token, KEYWORD_async))
1655 readToken (token);
1656 else if (isType(token, TOKEN_KEYWORD) && isKeyword (token, KEYWORD_get))
1658 is_getter = true;
1659 readToken (token);
1661 else if (isType(token, TOKEN_KEYWORD) && isKeyword (token, KEYWORD_set))
1663 is_setter = true;
1664 readToken (token);
1667 if (! isType (token, TOKEN_KEYWORD) &&
1668 ! isType (token, TOKEN_SEMICOLON))
1670 bool is_generator = false;
1671 bool is_shorthand = false; /* ES6 shorthand syntax */
1672 bool is_computed_name = false; /* ES6 computed property name */
1673 bool is_dynamic_prop = false;
1674 vString *dprop = NULL; /* is_computed_name is true but
1675 * the name is not represented in
1676 * a string literal. The expressions
1677 * go this string. */
1679 if (isType (token, TOKEN_STAR)) /* shorthand generator */
1681 is_generator = true;
1682 readToken (token);
1685 if (isType (token, TOKEN_OPEN_SQUARE))
1687 is_computed_name = true;
1688 dprop = vStringNewInit ("[");
1689 readTokenFull (token, false, dprop);
1692 copyToken(name, token, true);
1693 if (is_computed_name && ! isType (token, TOKEN_STRING))
1694 is_dynamic_prop = true;
1696 readTokenFull (token, false, dprop);
1698 if (is_computed_name)
1700 int depth = 1;
1703 if (isType (token, TOKEN_CLOSE_SQUARE))
1704 depth--;
1705 else
1707 is_dynamic_prop = true;
1708 if (isType (token, TOKEN_OPEN_SQUARE))
1709 depth++;
1711 readTokenFull (token, false, (is_dynamic_prop && depth != 0)? dprop: NULL);
1712 } while (! isType (token, TOKEN_EOF) && depth > 0);
1715 if (is_dynamic_prop)
1717 injectDynamicName (name, dprop);
1718 dprop = NULL;
1720 else
1721 vStringDelete (dprop);
1723 is_shorthand = isType (token, TOKEN_OPEN_PAREN);
1724 bool can_be_field = isType (token, TOKEN_EQUAL_SIGN);
1725 if ( isType (token, TOKEN_COLON) || can_be_field || is_shorthand )
1727 if (! is_shorthand)
1729 readToken (token);
1730 if (isKeyword (token, KEYWORD_async))
1731 readToken (token);
1733 if ( is_shorthand || isKeyword (token, KEYWORD_function) )
1735 TRACE_PRINT("Seems to be a function or shorthand");
1736 vString *const signature = vStringNew ();
1738 if (! is_shorthand)
1740 readToken (token);
1741 if (isType (token, TOKEN_STAR))
1743 /* generator: 'function' '*' '(' ... ')' '{' ... '}' */
1744 is_generator = true;
1745 readToken (token);
1748 if ( isType (token, TOKEN_OPEN_PAREN) )
1750 skipArgumentList(token, false, signature);
1753 if (isType (token, TOKEN_OPEN_CURLY))
1755 has_methods = true;
1757 int kind = JSTAG_METHOD;
1758 if (is_generator)
1759 kind = JSTAG_GENERATOR;
1760 else if (is_getter)
1761 kind = JSTAG_GETTER;
1762 else if (is_setter)
1763 kind = JSTAG_SETTER;
1765 makeJsTag (name, kind, signature, NULL);
1766 parseBlock (token, name->string);
1769 * If we aren't parsing an ES6 class (for which there
1770 * is no mandatory separators), read to the closing
1771 * curly, check next token, if a comma, we must loop
1772 * again.
1774 if (! is_es6_class)
1775 readToken (token);
1778 vStringDelete (signature);
1780 else if (! is_es6_class)
1782 bool has_child_methods = false;
1784 /* skip whatever is the value */
1785 while (! isType (token, TOKEN_COMMA) &&
1786 ! isType (token, TOKEN_CLOSE_CURLY) &&
1787 ! isType (token, TOKEN_EOF))
1789 if (isType (token, TOKEN_OPEN_CURLY))
1791 /* Recurse to find child properties/methods */
1792 has_child_methods = parseMethods (token, name, false);
1793 readToken (token);
1795 else if (isType (token, TOKEN_OPEN_PAREN))
1797 skipArgumentList (token, false, NULL);
1799 else if (isType (token, TOKEN_OPEN_SQUARE))
1801 skipArrayList (token, false);
1803 else
1805 readToken (token);
1809 has_methods = true;
1810 if (has_child_methods)
1811 makeJsTag (name, JSTAG_CLASS, NULL, NULL);
1812 else
1813 makeJsTag (name, JSTAG_PROPERTY, NULL, NULL);
1815 else if (can_be_field)
1817 makeJsTag (name, JSTAG_FIELD, NULL, NULL);
1818 parseLine (token, true);
1821 else
1823 makeJsTag (name, JSTAG_FIELD, NULL, NULL);
1824 if (!isType (token, TOKEN_SEMICOLON))
1825 dont_read = true;
1828 } while ( isType(token, TOKEN_COMMA) ||
1829 ( is_es6_class && ! isType(token, TOKEN_EOF) ) );
1831 TRACE_PRINT("Finished parsing methods");
1833 findCmdTerm (token, false, false);
1835 cleanUp:
1836 vStringCopy (token->scope, saveScope);
1837 vStringDelete (saveScope);
1838 deleteToken (name);
1840 TRACE_LEAVE_TEXT("found method(s): %s", has_methods? "yes": "no");
1842 return has_methods;
1845 static bool parseES6Class (tokenInfo *const token, const tokenInfo *targetName)
1847 TRACE_ENTER();
1849 tokenInfo * className = newToken ();
1850 vString *inheritance = NULL;
1851 bool is_anonymous = true;
1853 copyToken (className, token, true);
1854 readToken (className);
1856 /* optional name */
1857 if (isType (className, TOKEN_IDENTIFIER))
1859 readToken (token);
1860 is_anonymous = false;
1862 else
1864 copyToken (token, className, true);
1865 /* We create a fake name so we have a scope for the members */
1866 if (! targetName)
1867 anonGenerate (className->string, "AnonymousClass", JSTAG_CLASS);
1870 if (! targetName)
1871 targetName = className;
1873 if (isKeyword (token, KEYWORD_extends))
1874 inheritance = vStringNew ();
1876 /* skip inheritance info */
1877 while (! isType (token, TOKEN_OPEN_CURLY) &&
1878 ! isType (token, TOKEN_EOF) &&
1879 ! isType (token, TOKEN_SEMICOLON))
1880 readTokenFull (token, false, inheritance);
1882 /* remove the last added token (here we assume it's one char, "{" or ";" */
1883 if (inheritance && vStringLength (inheritance) > 0 &&
1884 ! isType (token, TOKEN_EOF))
1886 vStringChop (inheritance);
1887 vStringStripTrailing (inheritance);
1888 vStringStripLeading (inheritance);
1891 TRACE_PRINT("Emitting tag for class '%s'", vStringValue(targetName->string));
1893 makeJsTagCommon (targetName, JSTAG_CLASS, NULL, inheritance,
1894 (is_anonymous && (targetName == className)));
1896 if (! is_anonymous && targetName != className)
1898 /* FIXME: what to do with the secondary name? It's local to the
1899 * class itself, so not very useful... let's hope people
1900 * don't give it another name than the target in case of
1901 * var MyClass = class MyClassSecondaryName { ... }
1902 * I guess it could be an alias to MyClass, or duplicate it
1903 * altogether, not sure. */
1904 makeJsTag (className, JSTAG_CLASS, NULL, inheritance);
1907 if (inheritance)
1908 vStringDelete (inheritance);
1910 if (isType (token, TOKEN_OPEN_CURLY))
1911 parseMethods (token, targetName, true);
1913 deleteToken (className);
1915 TRACE_LEAVE();
1916 return true;
1919 static bool parseStatement (tokenInfo *const token, bool is_inside_class)
1921 TRACE_ENTER_TEXT("is_inside_class: %s", is_inside_class? "yes": "no");
1923 tokenInfo *const name = newToken ();
1924 tokenInfo *const secondary_name = newToken ();
1925 tokenInfo *const method_body_token = newToken ();
1926 vString * saveScope = vStringNew ();
1927 bool is_class = false;
1928 bool is_var = false;
1929 bool is_const = false;
1930 bool is_terminated = true;
1931 bool is_global = false;
1932 bool has_methods = false;
1933 vString * fulltag;
1935 vStringCopy (saveScope, token->scope);
1937 * Functions can be named or unnamed.
1938 * This deals with these formats:
1939 * Function
1940 * validFunctionOne = function(a,b) {}
1941 * testlib.validFunctionFive = function(a,b) {}
1942 * var innerThree = function(a,b) {}
1943 * var innerFour = (a,b) {}
1944 * var D2 = secondary_fcn_name(a,b) {}
1945 * var D3 = new Function("a", "b", "return a+b;");
1946 * Class
1947 * testlib.extras.ValidClassOne = function(a,b) {
1948 * this.a = a;
1950 * Class Methods
1951 * testlib.extras.ValidClassOne.prototype = {
1952 * 'validMethodOne' : function(a,b) {},
1953 * 'validMethodTwo' : function(a,b) {}
1955 * ValidClassTwo = function ()
1957 * this.validMethodThree = function() {}
1958 * // unnamed method
1959 * this.validMethodFour = () {}
1961 * Database.prototype.validMethodThree = Database_getTodaysDate;
1964 if ( is_inside_class )
1965 is_class = true;
1967 * var can precede an inner function
1969 if ( isKeyword(token, KEYWORD_var) ||
1970 isKeyword(token, KEYWORD_let) ||
1971 isKeyword(token, KEYWORD_const) )
1973 TRACE_PRINT("var/let/const case");
1974 is_const = isKeyword(token, KEYWORD_const);
1976 * Only create variables for global scope
1978 if ( token->nestLevel == 0 )
1980 is_global = true;
1982 readToken(token);
1985 nextVar:
1986 if ( isKeyword(token, KEYWORD_this) )
1988 TRACE_PRINT("found 'this' keyword");
1990 readToken(token);
1991 if (isType (token, TOKEN_PERIOD))
1993 readToken(token);
1997 copyToken(name, token, true);
1998 TRACE_PRINT("name becomes '%s' of type %s",
1999 vStringValue(token->string), tokenTypeName (token->type));
2001 while (! isType (token, TOKEN_CLOSE_CURLY) &&
2002 ! isType (token, TOKEN_SEMICOLON) &&
2003 ! isType (token, TOKEN_EQUAL_SIGN) &&
2004 ! isType (token, TOKEN_COMMA) &&
2005 ! isType (token, TOKEN_EOF))
2007 if (isType (token, TOKEN_OPEN_CURLY))
2008 parseBlock (token, NULL);
2010 /* Potentially the name of the function */
2011 if (isType (token, TOKEN_PERIOD))
2014 * Cannot be a global variable is it has dot references in the name
2016 is_global = false;
2017 /* Assume it's an assignment to a global name (e.g. a class) using
2018 * its fully qualified name, so strip the scope.
2019 * FIXME: resolve the scope so we can make more than an assumption. */
2020 vStringClear (token->scope);
2021 vStringClear (name->scope);
2024 readToken (token);
2025 if (! isType(token, TOKEN_KEYWORD))
2027 if ( is_class )
2029 addToScope(token, name->string);
2031 else
2032 addContext (name, token);
2034 readToken (token);
2036 else if ( isKeyword(token, KEYWORD_prototype) )
2039 * When we reach the "prototype" tag, we infer:
2040 * "BindAgent" is a class
2041 * "build" is a method
2043 * function BindAgent( repeatableIdName, newParentIdName ) {
2046 * CASE 1
2047 * Specified function name: "build"
2048 * BindAgent.prototype.build = function( mode ) {
2049 * maybe parse nested functions
2052 * CASE 2
2053 * Prototype listing
2054 * ValidClassOne.prototype = {
2055 * 'validMethodOne' : function(a,b) {},
2056 * 'validMethodTwo' : function(a,b) {}
2060 if (! ( isType (name, TOKEN_IDENTIFIER)
2061 || isType (name, TOKEN_STRING) ) )
2063 * Unexpected input. Try to reset the parsing.
2065 * TOKEN_STRING is acceptable. e.g.:
2066 * -----------------------------------
2067 * "a".prototype = function( mode ) {}
2069 goto cleanUp;
2071 makeClassTag (name, NULL, NULL);
2072 is_class = true;
2075 * There should a ".function_name" next.
2077 readToken (token);
2078 if (isType (token, TOKEN_PERIOD))
2081 * Handle CASE 1
2083 readToken (token);
2084 if (! isType(token, TOKEN_KEYWORD))
2086 vString *const signature = vStringNew ();
2088 addToScope(token, name->string);
2090 copyToken (method_body_token, token, true);
2091 readToken (method_body_token);
2093 while (! isType (method_body_token, TOKEN_SEMICOLON) &&
2094 ! isType (method_body_token, TOKEN_CLOSE_CURLY) &&
2095 ! isType (method_body_token, TOKEN_OPEN_CURLY) &&
2096 ! isType (method_body_token, TOKEN_EOF))
2098 if ( isType (method_body_token, TOKEN_OPEN_PAREN) )
2099 skipArgumentList(method_body_token, false,
2100 vStringLength (signature) == 0 ? signature : NULL);
2101 else
2102 readToken (method_body_token);
2105 makeJsTag (token, JSTAG_METHOD, signature, NULL);
2106 vStringDelete (signature);
2108 if ( isType (method_body_token, TOKEN_OPEN_CURLY))
2110 parseBlock (method_body_token, token->string);
2111 is_terminated = true;
2113 else
2114 is_terminated = isType (method_body_token, TOKEN_SEMICOLON);
2115 goto cleanUp;
2118 else if (isType (token, TOKEN_EQUAL_SIGN))
2120 readToken (token);
2121 if (isType (token, TOKEN_OPEN_CURLY))
2124 * Handle CASE 2
2126 * Creates tags for each of these class methods
2127 * ValidClassOne.prototype = {
2128 * 'validMethodOne' : function(a,b) {},
2129 * 'validMethodTwo' : function(a,b) {}
2132 parseMethods(token, name, false);
2134 * Find to the end of the statement
2136 findCmdTerm (token, false, false);
2137 is_terminated = true;
2138 goto cleanUp;
2142 else
2143 readToken (token);
2144 } while (isType (token, TOKEN_PERIOD));
2146 else
2147 readTokenFull (token, true, NULL);
2149 if ( isType (token, TOKEN_OPEN_PAREN) )
2150 skipArgumentList(token, false, NULL);
2152 if ( isType (token, TOKEN_OPEN_SQUARE) )
2153 skipArrayList(token, false);
2156 if ( isType (token, TOKEN_OPEN_CURLY) )
2158 is_class = parseBlock (token, name->string);
2163 if ( isType (token, TOKEN_CLOSE_CURLY) )
2166 * Reaching this section without having
2167 * processed an open curly brace indicates
2168 * the statement is most likely not terminated.
2170 is_terminated = false;
2171 goto cleanUp;
2174 if ( isType (token, TOKEN_SEMICOLON) ||
2175 isType (token, TOKEN_EOF) ||
2176 isType (token, TOKEN_COMMA) )
2179 * Only create variables for global scope
2181 if ( token->nestLevel == 0 && is_global )
2184 * Handles this syntax:
2185 * var g_var2;
2187 makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL, NULL);
2190 * Statement has ended.
2191 * This deals with calls to functions, like:
2192 * alert(..);
2194 if (isType (token, TOKEN_COMMA))
2196 readToken (token);
2197 goto nextVar;
2199 goto cleanUp;
2202 if ( isType (token, TOKEN_EQUAL_SIGN) )
2204 int parenDepth = 0;
2206 readToken (token);
2208 /* rvalue might be surrounded with parentheses */
2209 while (isType (token, TOKEN_OPEN_PAREN))
2211 parenDepth++;
2212 readToken (token);
2215 if (isKeyword (token, KEYWORD_async))
2216 readToken (token);
2218 if ( isKeyword (token, KEYWORD_function) )
2220 vString *const signature = vStringNew ();
2221 bool is_generator = false;
2223 readToken (token);
2224 if (isType (token, TOKEN_STAR))
2226 is_generator = true;
2227 readToken (token);
2230 if (! isType (token, TOKEN_KEYWORD) &&
2231 ! isType (token, TOKEN_OPEN_PAREN))
2234 * Functions of this format:
2235 * var D2A = function theAdd(a, b)
2237 * return a+b;
2239 * Are really two separate defined functions and
2240 * can be referenced in two ways:
2241 * alert( D2A(1,2) ); // produces 3
2242 * alert( theAdd(1,2) ); // also produces 3
2243 * So it must have two tags:
2244 * D2A
2245 * theAdd
2246 * Save the reference to the name for later use, once
2247 * we have established this is a valid function we will
2248 * create the secondary reference to it.
2250 copyToken(secondary_name, token, true);
2251 readToken (token);
2254 if ( isType (token, TOKEN_OPEN_PAREN) )
2255 skipArgumentList(token, false, signature);
2257 if (isType (token, TOKEN_OPEN_CURLY))
2260 * This will be either a function or a class.
2261 * We can only determine this by checking the body
2262 * of the function. If we find a "this." we know
2263 * it is a class, otherwise it is a function.
2265 if ( is_inside_class )
2267 makeJsTag (name, is_generator ? JSTAG_GENERATOR : JSTAG_METHOD, signature, NULL);
2268 if ( vStringLength(secondary_name->string) > 0 )
2269 makeFunctionTag (secondary_name, signature, is_generator);
2270 parseBlock (token, name->string);
2272 else
2274 if (! ( isType (name, TOKEN_IDENTIFIER)
2275 || isType (name, TOKEN_STRING)
2276 || isType (name, TOKEN_KEYWORD) ) )
2278 /* Unexpected input. Try to reset the parsing. */
2279 TRACE_PRINT("Unexpected input, trying to reset");
2280 vStringDelete (signature);
2281 goto cleanUp;
2284 is_class = parseBlock (token, name->string);
2285 if ( is_class )
2286 makeClassTag (name, signature, NULL);
2287 else
2288 makeFunctionTag (name, signature, is_generator);
2290 if ( vStringLength(secondary_name->string) > 0 )
2291 makeFunctionTag (secondary_name, signature, is_generator);
2295 vStringDelete (signature);
2297 else if (isKeyword (token, KEYWORD_class))
2299 is_terminated = parseES6Class (token, name);
2301 else if (isType (token, TOKEN_OPEN_CURLY))
2304 * Creates tags for each of these class methods
2305 * ValidClassOne.prototype = {
2306 * 'validMethodOne' : function(a,b) {},
2307 * 'validMethodTwo' : function(a,b) {}
2309 * Or checks if this is a hash variable.
2310 * var z = {};
2312 bool anonClass = vStringIsEmpty (name->string);
2313 if (anonClass)
2314 anonGenerate (name->string, "AnonymousClass", JSTAG_CLASS);
2315 has_methods = parseMethods(token, name, false);
2316 if (has_methods)
2317 makeJsTagCommon (name, JSTAG_CLASS, NULL, NULL, anonClass);
2318 else
2321 * Only create variables for global scope
2323 if ( token->nestLevel == 0 && is_global )
2326 * A pointer can be created to the function.
2327 * If we recognize the function/class name ignore the variable.
2328 * This format looks identical to a variable definition.
2329 * A variable defined outside of a block is considered
2330 * a global variable:
2331 * var g_var1 = 1;
2332 * var g_var2;
2333 * This is not a global variable:
2334 * var g_var = function;
2335 * This is a global variable:
2336 * var g_var = different_var_name;
2338 fulltag = vStringNew ();
2339 if (vStringLength (token->scope) > 0)
2341 vStringCopy(fulltag, token->scope);
2342 vStringPut (fulltag, '.');
2343 vStringCat (fulltag, token->string);
2345 else
2347 vStringCopy(fulltag, token->string);
2349 if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) &&
2350 ! stringListHas(ClassNames, vStringValue (fulltag)) )
2352 makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL, NULL);
2354 vStringDelete (fulltag);
2357 /* Here we should be at the end of the block, on the close curly.
2358 * If so, read the next token not to confuse that close curly with
2359 * the end of the current statement. */
2360 if (isType (token, TOKEN_CLOSE_CURLY))
2362 readTokenFull(token, true, NULL);
2363 is_terminated = isType (token, TOKEN_SEMICOLON);
2366 else if (isKeyword (token, KEYWORD_new))
2368 readToken (token);
2369 is_var = isType (token, TOKEN_IDENTIFIER);
2370 if ( isKeyword (token, KEYWORD_function) ||
2371 isKeyword (token, KEYWORD_capital_function) ||
2372 isKeyword (token, KEYWORD_capital_object) ||
2373 is_var )
2375 if ( isKeyword (token, KEYWORD_capital_object) )
2376 is_class = true;
2378 if (is_var)
2379 skipQualifiedIdentifier (token);
2380 else
2381 readToken (token);
2383 if ( isType (token, TOKEN_OPEN_PAREN) )
2384 skipArgumentList(token, true, NULL);
2386 if (isType (token, TOKEN_SEMICOLON))
2388 if ( token->nestLevel == 0 )
2390 if ( is_var )
2392 makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL, NULL);
2394 else if ( is_class )
2396 makeClassTag (name, NULL, NULL);
2398 else
2400 /* FIXME: we cannot really get a meaningful
2401 * signature from a `new Function()` call,
2402 * so for now just don't set any */
2403 makeFunctionTag (name, NULL, false);
2407 else if (isType (token, TOKEN_CLOSE_CURLY))
2408 is_terminated = false;
2411 else if (! isType (token, TOKEN_KEYWORD))
2414 * Only create variables for global scope
2416 if ( token->nestLevel == 0 && is_global )
2419 * A pointer can be created to the function.
2420 * If we recognize the function/class name ignore the variable.
2421 * This format looks identical to a variable definition.
2422 * A variable defined outside of a block is considered
2423 * a global variable:
2424 * var g_var1 = 1;
2425 * var g_var2;
2426 * This is not a global variable:
2427 * var g_var = function;
2428 * This is a global variable:
2429 * var g_var = different_var_name;
2431 fulltag = vStringNew ();
2432 if (vStringLength (token->scope) > 0)
2434 vStringCopy(fulltag, token->scope);
2435 vStringPut (fulltag, '.');
2436 vStringCat (fulltag, token->string);
2438 else
2440 vStringCopy(fulltag, token->string);
2442 if ( ! stringListHas(FunctionNames, vStringValue (fulltag)) &&
2443 ! stringListHas(ClassNames, vStringValue (fulltag)) )
2445 makeJsTag (name, is_const ? JSTAG_CONSTANT : JSTAG_VARIABLE, NULL, NULL);
2447 vStringDelete (fulltag);
2451 if (parenDepth > 0)
2453 while (parenDepth > 0 && ! isType (token, TOKEN_EOF))
2455 if (isType (token, TOKEN_OPEN_PAREN))
2456 parenDepth++;
2457 else if (isType (token, TOKEN_CLOSE_PAREN))
2458 parenDepth--;
2459 readTokenFull (token, true, NULL);
2461 if (isType (token, TOKEN_CLOSE_CURLY))
2462 is_terminated = false;
2465 /* if we aren't already at the cmd end, advance to it and check whether
2466 * the statement was terminated */
2467 if (! isType (token, TOKEN_CLOSE_CURLY) &&
2468 ! isType (token, TOKEN_SEMICOLON))
2471 * Statements can be optionally terminated in the case of
2472 * statement prior to a close curly brace as in the
2473 * document.write line below:
2475 * function checkForUpdate() {
2476 * if( 1==1 ) {
2477 * document.write("hello from checkForUpdate<br>")
2479 * return 1;
2482 is_terminated = findCmdTerm (token, true, true);
2483 /* if we're at a comma, try and read a second var */
2484 if (isType (token, TOKEN_COMMA))
2486 readToken (token);
2487 goto nextVar;
2491 cleanUp:
2492 vStringCopy(token->scope, saveScope);
2493 deleteToken (name);
2494 deleteToken (secondary_name);
2495 deleteToken (method_body_token);
2496 vStringDelete(saveScope);
2498 TRACE_LEAVE();
2500 return is_terminated;
2503 static void parseUI5 (tokenInfo *const token)
2505 tokenInfo *const name = newToken ();
2507 * SAPUI5 is built on top of jQuery.
2508 * It follows a standard format:
2509 * sap.ui.controller("id.of.controller", {
2510 * method_name : function... {
2511 * },
2513 * method_name : function ... {
2517 * Handle the parsing of the initial controller (and the
2518 * same for "view") and then allow the methods to be
2519 * parsed as usual.
2522 readToken (token);
2524 if (isType (token, TOKEN_PERIOD))
2526 readToken (token);
2527 while (! isType (token, TOKEN_OPEN_PAREN) &&
2528 ! isType (token, TOKEN_EOF))
2530 readToken (token);
2532 readToken (token);
2534 if (isType (token, TOKEN_STRING))
2536 copyToken(name, token, true);
2537 readToken (token);
2540 if (isType (token, TOKEN_COMMA))
2541 readToken (token);
2545 parseMethods (token, name, false);
2546 } while (! isType (token, TOKEN_CLOSE_CURLY) &&
2547 ! isType (token, TOKEN_EOF));
2550 deleteToken (name);
2553 static bool parseLine (tokenInfo *const token, bool is_inside_class)
2555 TRACE_ENTER_TEXT("token is '%s' of type %s",
2556 vStringValue(token->string), tokenTypeName (token->type));
2558 bool is_terminated = true;
2560 * Detect the common statements, if, while, for, do, ...
2561 * This is necessary since the last statement within a block "{}"
2562 * can be optionally terminated.
2564 * If the statement is not terminated, we need to tell
2565 * the calling routine to prevent reading an additional token
2566 * looking for the end of the statement.
2569 if (isType(token, TOKEN_KEYWORD))
2571 switch (token->keyword)
2573 case KEYWORD_for:
2574 case KEYWORD_while:
2575 case KEYWORD_do:
2576 is_terminated = parseLoop (token);
2577 break;
2578 case KEYWORD_if:
2579 case KEYWORD_else:
2580 case KEYWORD_try:
2581 case KEYWORD_catch:
2582 case KEYWORD_finally:
2583 /* Common semantics */
2584 is_terminated = parseIf (token);
2585 break;
2586 case KEYWORD_switch:
2587 parseSwitch (token);
2588 break;
2589 case KEYWORD_return:
2590 case KEYWORD_async:
2591 readToken (token);
2592 is_terminated = parseLine (token, is_inside_class);
2593 break;
2594 case KEYWORD_function:
2595 parseFunction (token);
2596 break;
2597 case KEYWORD_class:
2598 is_terminated = parseES6Class (token, NULL);
2599 break;
2600 default:
2601 is_terminated = parseStatement (token, is_inside_class);
2602 break;
2605 else
2608 * Special case where single line statements may not be
2609 * SEMICOLON terminated. parseBlock needs to know this
2610 * so that it does not read the next token.
2612 is_terminated = parseStatement (token, is_inside_class);
2615 TRACE_LEAVE();
2617 return is_terminated;
2620 static void parseJsFile (tokenInfo *const token)
2622 TRACE_ENTER();
2626 readToken (token);
2628 if (isType (token, TOKEN_KEYWORD) && token->keyword == KEYWORD_sap)
2629 parseUI5 (token);
2630 else if (isType (token, TOKEN_KEYWORD) && (token->keyword == KEYWORD_export ||
2631 token->keyword == KEYWORD_default))
2632 /* skip those at top-level */;
2633 else
2634 parseLine (token, false);
2635 } while (! isType (token, TOKEN_EOF));
2637 TRACE_LEAVE();
2640 #ifdef DO_TRACING
2641 #if 0
2642 static void dumpToken (const tokenInfo *const token)
2644 fprintf(stderr, "Token <%p>: %s: %s\n",
2645 token,
2646 tokenTypeName (token->type),
2647 (token->type == TOKEN_KEYWORD ? keywordName (token->keyword):
2648 token->type == TOKEN_IDENTIFIER? vStringValue (token->string):
2649 ""));
2651 #endif
2653 static const char *tokenTypeName(enum eTokenType e)
2654 { /* Generated by misc/enumstr.sh with cmdline "parsers/jscript.c" "eTokenType" "tokenTypeName" */
2655 switch (e)
2657 case TOKEN_BINARY_OPERATOR: return "TOKEN_BINARY_OPERATOR";
2658 case TOKEN_CHARACTER: return "TOKEN_CHARACTER";
2659 case TOKEN_CLOSE_CURLY: return "TOKEN_CLOSE_CURLY";
2660 case TOKEN_CLOSE_PAREN: return "TOKEN_CLOSE_PAREN";
2661 case TOKEN_CLOSE_SQUARE: return "TOKEN_CLOSE_SQUARE";
2662 case TOKEN_COLON: return "TOKEN_COLON";
2663 case TOKEN_COMMA: return "TOKEN_COMMA";
2664 case TOKEN_EOF: return "TOKEN_EOF";
2665 case TOKEN_EQUAL_SIGN: return "TOKEN_EQUAL_SIGN";
2666 case TOKEN_IDENTIFIER: return "TOKEN_IDENTIFIER";
2667 case TOKEN_KEYWORD: return "TOKEN_KEYWORD";
2668 case TOKEN_OPEN_CURLY: return "TOKEN_OPEN_CURLY";
2669 case TOKEN_OPEN_PAREN: return "TOKEN_OPEN_PAREN";
2670 case TOKEN_OPEN_SQUARE: return "TOKEN_OPEN_SQUARE";
2671 case TOKEN_PERIOD: return "TOKEN_PERIOD";
2672 case TOKEN_POSTFIX_OPERATOR: return "TOKEN_POSTFIX_OPERATOR";
2673 case TOKEN_REGEXP: return "TOKEN_REGEXP";
2674 case TOKEN_SEMICOLON: return "TOKEN_SEMICOLON";
2675 case TOKEN_STAR: return "TOKEN_STAR";
2676 case TOKEN_STRING: return "TOKEN_STRING";
2677 case TOKEN_TEMPLATE_STRING: return "TOKEN_TEMPLATE_STRING";
2678 case TOKEN_UNDEFINED: return "TOKEN_UNDEFINED";
2679 default: return "UNKNOWN";
2683 #if 0
2684 static const char *keywordName(enum eKeywordId e)
2685 { /* Generated by misc/enumstr.sh with cmdline "parsers/jscript.c" "eKeywordId" "keywordName" */
2686 switch (e)
2688 case KEYWORD_async: return "KEYWORD_async";
2689 case KEYWORD_capital_function: return "KEYWORD_capital_function";
2690 case KEYWORD_capital_object: return "KEYWORD_capital_object";
2691 case KEYWORD_catch: return "KEYWORD_catch";
2692 case KEYWORD_class: return "KEYWORD_class";
2693 case KEYWORD_const: return "KEYWORD_const";
2694 case KEYWORD_default: return "KEYWORD_default";
2695 case KEYWORD_do: return "KEYWORD_do";
2696 case KEYWORD_else: return "KEYWORD_else";
2697 case KEYWORD_export: return "KEYWORD_export";
2698 case KEYWORD_extends: return "KEYWORD_extends";
2699 case KEYWORD_finally: return "KEYWORD_finally";
2700 case KEYWORD_for: return "KEYWORD_for";
2701 case KEYWORD_function: return "KEYWORD_function";
2702 case KEYWORD_get: return "KEYWORD_get";
2703 case KEYWORD_if: return "KEYWORD_if";
2704 case KEYWORD_let: return "KEYWORD_let";
2705 case KEYWORD_new: return "KEYWORD_new";
2706 case KEYWORD_prototype: return "KEYWORD_prototype";
2707 case KEYWORD_return: return "KEYWORD_return";
2708 case KEYWORD_sap: return "KEYWORD_sap";
2709 case KEYWORD_set: return "KEYWORD_set";
2710 case KEYWORD_static: return "KEYWORD_static";
2711 case KEYWORD_switch: return "KEYWORD_switch";
2712 case KEYWORD_this: return "KEYWORD_this";
2713 case KEYWORD_try: return "KEYWORD_try";
2714 case KEYWORD_var: return "KEYWORD_var";
2715 case KEYWORD_while: return "KEYWORD_while";
2716 default: return "UNKNOWN";
2719 #endif
2720 #endif
2722 static void initialize (const langType language)
2724 Assert (ARRAY_SIZE (JsKinds) == JSTAG_COUNT);
2725 Lang_js = language;
2727 TokenPool = objPoolNew (16, newPoolToken, deletePoolToken, clearPoolToken, NULL);
2730 static void finalize (langType language CTAGS_ATTR_UNUSED, bool initialized)
2732 if (!initialized)
2733 return;
2735 objPoolDelete (TokenPool);
2738 static void findJsTags (void)
2740 tokenInfo *const token = newToken ();
2742 NextToken = NULL;
2743 ClassNames = stringListNew ();
2744 FunctionNames = stringListNew ();
2745 LastTokenType = TOKEN_UNDEFINED;
2747 parseJsFile (token);
2749 stringListDelete (ClassNames);
2750 stringListDelete (FunctionNames);
2751 ClassNames = NULL;
2752 FunctionNames = NULL;
2753 deleteToken (token);
2755 #ifdef HAVE_ICONV
2756 if (JSUnicodeConverter != (iconv_t) -2 && /* not created */
2757 JSUnicodeConverter != (iconv_t) -1 /* creation failed */)
2759 iconv_close (JSUnicodeConverter);
2760 JSUnicodeConverter = (iconv_t) -2;
2762 #endif
2764 Assert (NextToken == NULL);
2767 /* Create parser definition structure */
2768 extern parserDefinition* JavaScriptParser (void)
2770 // .jsx files are JSX: https://facebook.github.io/jsx/
2771 // which have JS function definitions, so we just use the JS parser
2772 static const char *const extensions [] = { "js", "jsx", "mjs", NULL };
2773 static const char *const aliases [] = { "js", "node", "nodejs",
2774 "seed", "gjs",
2775 /* Used in PostgreSQL
2776 * https://github.com/plv8/plv8 */
2777 "v8",
2778 NULL };
2779 parserDefinition *const def = parserNew ("JavaScript");
2780 def->extensions = extensions;
2781 def->aliases = aliases;
2783 * New definitions for parsing instead of regex
2785 def->kindTable = JsKinds;
2786 def->kindCount = ARRAY_SIZE (JsKinds);
2787 def->parser = findJsTags;
2788 def->initialize = initialize;
2789 def->finalize = finalize;
2790 def->keywordTable = JsKeywordTable;
2791 def->keywordCount = ARRAY_SIZE (JsKeywordTable);
2793 return def;