2 * Copyright (c) 2003, Darren Hiebert
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
7 * This module contains functions for generating tags for JavaScript language
10 * Reference: http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf
12 * This is a good reference for different forms of the function statement:
13 * http://www.permadi.com/tutorial/jsFunc/
14 * Another good reference:
15 * http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide
21 #include "general.h" /* must always come first */
22 #include <ctype.h> /* to define isalpha () */
30 # ifdef WORDS_BIGENDIAN
31 # define INTERNAL_ENCODING "UTF-32BE"
33 # define INTERNAL_ENCODING "UTF-32LE"
34 # endif /* WORDS_BIGENDIAN */
56 #define isType(token,t) (bool) ((token)->type == (t))
57 #define isKeyword(token,k) (bool) ((token)->keyword == (k))
58 #define newToken() (objPoolGet (TokenPool))
59 #define deleteToken(t) (objPoolPut (TokenPool, (t)))
65 /* Used to specify type of keyword.
69 KEYWORD_capital_function
,
70 KEYWORD_capital_object
,
97 typedef int keywordId
; /* to allow KEYWORD_NONE */
99 typedef enum eTokenType
{
111 TOKEN_TEMPLATE_STRING
,
119 TOKEN_POSTFIX_OPERATOR
,
121 /* To handle Babel's decorators.
122 * Used only in readTokenFull or lower functions. */
124 TOKEN_BINARY_OPERATOR
,
126 TOKEN_DOTS
, /* ... */
129 typedef struct sTokenInfo
{
134 unsigned long lineNumber
;
145 static tokenType LastTokenType
;
146 static tokenInfo
*NextToken
;
148 static langType Lang_js
;
150 static objPool
*TokenPool
= NULL
;
153 static iconv_t JSUnicodeConverter
= (iconv_t
) -2;
157 * "chain element" role is introduced when adapting the JavaScript parser
160 * In the corkAPI, a cork index returned from makeTagEntry() can
161 * represent a scope of another tag. Let's think about `input-0.js' that
162 * the node command accepts as an input for ctags.
164 +---+ input-0.js ------------------------------------------------------
166 | 2 | f = function(x) {
170 +---+------------------------------------------------------------------
172 * The following pseudo C code illustrate the code for
173 * tagging `A' and `f' in input-0.js:
174 +---+------------------------------------------------------------------
176 | | tagEntryFor e_for_A, e_for_f;
178 | | int index_for_A = makeTagEntry (&e_for_A);
180 |>>>| e_for_f.extensionFields.scopeIndex = index_for_A;
182 | | makeTagEntry (&e_for_f);
184 +---+------------------------------------------------------------------
186 * `index_for_A' represents "A" in "class A".
187 * `f' is defined in `A'. To fill the scope field of the tag for `f',
188 * `scopeIndex' member of the tag is filled with `index_for_A' at line |>>>|.
190 * If `A' is defined in the input source file, this technique based on
191 * the cork API works fine. However, if `A' is not defined in the input
192 * source file, the technique doesn't work well.
193 +---+ input-1.js -------------------------------------------------------
194 | 1 | import {A} from 'input-0.js';
195 | 2 | A.g = function(x) {
198 +---+------------------------------------------------------------------
200 * In this case, `A' may be defined in input-0.js.
201 * The current implementation of ctags processes file by file; it doesn't
202 * use the knowledge in other input source files than current input source
203 * file. ctags processing input-1.js doesn't know the cork index for `A'.
205 * When tagging `g' with "function" kind, how can we fill the scope field
206 * of the tag for `g'?
208 * Here the "chain element" role comes.
209 * This role is used for tagging `z' in "x.y.z" in the case when ctags
210 * doesn't see the definitions for `x' and `y'.
211 * The JavaScript parser makes reference tags for `x' and `'y' with
212 * "chain element" role. makeTagEntry() returns a cork index regardless the
213 * type of tags (definition or reference).
214 * The index for the reference tag for `y' can be used to fill the scope
215 * field of the tag for `z'. The index for `x' can be used to fill the
218 * With these trick and technique, the scope field for `g' is filled:
219 +---+ tags for input-1.js ---------------------------------------------
220 | 1 | A input-1.js /^A.g = function(x) {$/;" f roles:chainElt extras:reference
221 | 2 | g input-1.js /^A.g = function(x) {$/;" f scope:function:A signature:(x) roles:def
222 +---+------------------------------------------------------------------
224 * By default, reference tags are not emitted. So non-ctags-expert users may
225 * not see the tag entry for `A'.
227 * makeJsRefTagsForNameChain() and makeJsTagCommon() implement the trick
232 * Is "chain element(chainElt)" suitable name for people familier with JavaScript?
234 * Kinds assigned to the tag having chainElt role must revised. Eventually
235 * we may need to introduce "unknown" kind like the Python parser. Assigning
236 * "function" kind to `A' in input-1.js is obviously wrong.
240 JS_VARIABLE_CHAINELT
,
247 static roleDefinition JsFunctionRoles
[] = {
248 /* Currently V parser wants this items. */
249 { true, "foreigndecl", "declared in foreign languages" },
252 static roleDefinition JsVariableRoles
[] = {
253 { false, "chainElt", "(EXPERIMENTAL)used as an element in a name chain like a.b.c" },
256 static roleDefinition JsClassRoles
[] = {
257 { false, "chainElt", "(EXPERIMENTAL)used as an element in a name chain like a.b.c" },
260 static kindDefinition JsKinds
[] = {
261 { true, 'f', "function", "functions",
262 .referenceOnly
= false, ATTACH_ROLES(JsFunctionRoles
) },
263 { true, 'c', "class", "classes",
264 .referenceOnly
= false, ATTACH_ROLES(JsClassRoles
) },
265 { true, 'm', "method", "methods" },
266 { true, 'p', "property", "properties" },
267 { true, 'C', "constant", "constants" },
268 { true, 'v', "variable", "global variables",
269 .referenceOnly
= false, ATTACH_ROLES(JsVariableRoles
) },
270 { true, 'g', "generator", "generators" },
271 { true, 'G', "getter", "getters" },
272 { true, 'S', "setter", "setters" },
273 { true, 'M', "field", "fields" },
276 static const keywordTable JsKeywordTable
[] = {
277 /* keyword keyword ID */
278 { "function", KEYWORD_function
},
279 { "Function", KEYWORD_capital_function
},
280 { "Object", KEYWORD_capital_object
},
281 { "prototype", KEYWORD_prototype
},
282 { "var", KEYWORD_var
},
283 { "let", KEYWORD_let
},
284 { "const", KEYWORD_const
},
285 { "new", KEYWORD_new
},
286 { "this", KEYWORD_this
},
287 { "for", KEYWORD_for
},
288 { "while", KEYWORD_while
},
289 { "do", KEYWORD_do
},
290 { "if", KEYWORD_if
},
291 { "else", KEYWORD_else
},
292 { "switch", KEYWORD_switch
},
293 { "try", KEYWORD_try
},
294 { "catch", KEYWORD_catch
},
295 { "finally", KEYWORD_finally
},
296 { "sap", KEYWORD_sap
},
297 { "return", KEYWORD_return
},
298 { "class", KEYWORD_class
},
299 { "extends", KEYWORD_extends
},
300 { "static", KEYWORD_static
},
301 { "default", KEYWORD_default
},
302 { "export", KEYWORD_export
},
303 { "async", KEYWORD_async
},
304 { "get", KEYWORD_get
},
305 { "set", KEYWORD_set
},
309 * FUNCTION DEFINITIONS
312 /* Recursive functions */
313 static void readTokenFull (tokenInfo
*const token
, bool include_newlines
, vString
*const repr
);
314 static void skipArgumentList (tokenInfo
*const token
, bool include_newlines
, vString
*const repr
);
315 static bool parseFunction (tokenInfo
*const token
, tokenInfo
*const name
, const bool is_inside_class
);
316 static bool parseBlock (tokenInfo
*const token
, int parent_scope
);
317 static bool parseMethods (tokenInfo
*const token
, int class_index
, const bool is_es6_class
);
318 static bool parseLine (tokenInfo
*const token
, bool is_inside_class
);
319 static void parseUI5 (tokenInfo
*const token
);
322 static const char *tokenTypeName(enum eTokenType e
);
323 static const char* getNameStringForCorkIndex(int index
);
324 static const char* getKindStringForCorkIndex(int index
);
325 static const char *kindName(jsKind kind
);
326 // #define DO_TRACING_USE_DUMP_TOKEN
327 #ifdef DO_TRACING_USE_DUMP_TOKEN
328 static void dumpToken (const tokenInfo
*const token
);
332 static void *newPoolToken (void *createArg CTAGS_ATTR_UNUSED
)
334 tokenInfo
*token
= xMalloc (1, tokenInfo
);
336 token
->string
= vStringNew ();
337 token
->scope
= CORK_NIL
;
342 static void clearPoolToken (void *data
)
344 tokenInfo
*token
= data
;
346 token
->type
= TOKEN_UNDEFINED
;
347 token
->keyword
= KEYWORD_NONE
;
348 token
->nestLevel
= 0;
349 token
->dynamicProp
= false;
350 token
->lineNumber
= getInputLineNumber ();
351 token
->filePosition
= getInputFilePosition ();
352 vStringClear (token
->string
);
353 token
->scope
= CORK_NIL
;
356 static void deletePoolToken (void *data
)
358 tokenInfo
*token
= data
;
359 vStringDelete (token
->string
);
363 static void copyToken (tokenInfo
*const dest
, const tokenInfo
*const src
,
364 bool const include_non_read_info
)
366 dest
->lineNumber
= src
->lineNumber
;
367 dest
->filePosition
= src
->filePosition
;
368 dest
->type
= src
->type
;
369 dest
->keyword
= src
->keyword
;
370 dest
->dynamicProp
= src
->dynamicProp
;
372 vStringCopy(dest
->string
, src
->string
);
373 if (include_non_read_info
)
375 dest
->nestLevel
= src
->nestLevel
;
376 dest
->scope
= src
->scope
;
380 static void injectDynamicName (tokenInfo
*const token
, vString
*newName
)
382 token
->dynamicProp
= true;
383 vStringDelete (token
->string
);
384 token
->string
= newName
;
388 * Tag generation functions
391 struct bestJSEntryInScopeData
{
395 static bool findBestJSEntry (int corkIndex
, tagEntryInfo
*entry
, void *cb_data
)
397 struct bestJSEntryInScopeData
*data
= cb_data
;
399 if (isRoleAssigned (entry
, ROLE_DEFINITION_INDEX
))
401 data
->index
= corkIndex
;
405 if (data
->index
== CORK_NIL
|| data
->index
> corkIndex
)
406 data
->index
= corkIndex
;
411 static int bestJSEntryInScope(int scope
, const char *name
)
413 /* If the SCOPE has a tag entry having NAME, the tag is the best
414 * even if there are reference tag entries having NAME.
415 * If the scope has only reference tag entries having NAME, the
416 * tag having smallest cork index is the best.
419 struct bestJSEntryInScopeData data
= {
422 foreachEntriesInScope (scope
, name
, findBestJSEntry
, &data
);
426 static int makeJsRefTagsForNameChain (char *name_chain
, const tokenInfo
*token
, int leaf_kind
, int scope
)
428 /* To fill the scope field of "d" of "a.b.c.d",
429 * "c" must be tagged if the cork API is used.
430 * ----------------------------------------------------------
431 * How the fields for "a", "b", and "c" are filled.
432 * a kind:class scope:<given by SCOPE> roles:chainElt
433 * b kind:class scope:class:a roles:chainElt
435 * The fields of c depends on LEAF_KIND that is passed to this functions.
437 * if (LEAF_KIND == FUNCTION)
438 * c kind:function scope:class:b roles:chainElt
440 * c kind:class scope:class:b roles:chainElt
443 const char *name
= name_chain
;
444 char *next
= strchr(name_chain
, '.');
447 int index
= bestJSEntryInScope (scope
, name
);
449 if (index
== CORK_NIL
)
452 int kind
= JSTAG_CLASS
;
453 int role
= JS_CLASS_CHAINELT
;
454 if (next
== NULL
&& leaf_kind
== JSTAG_FUNCTION
)
457 * If we're creating a function (and not a method),
458 * assume the parent is a plain variable.
460 kind
= JSTAG_VARIABLE
;
461 role
= JS_VARIABLE_CHAINELT
;
464 initRefTagEntry (&e
, name
, kind
, role
);
465 updateTagLine (&e
, token
->lineNumber
, token
->filePosition
);
466 e
.extensionFields
.scopeIndex
= scope
;
468 index
= makeTagEntry (&e
);
469 /* We should remove this condition. We should fix the callers passing
470 * an empty name instead. makeTagEntry() returns CORK_NIL if the tag
472 if (index
!= CORK_NIL
)
473 registerEntry (index
);
477 ? makeJsRefTagsForNameChain (next
+ 1, token
, leaf_kind
, index
)
481 static int makeJsTagCommon (const tokenInfo
*const token
, const jsKind kind
,
482 vString
*const signature
, vString
*const inheritance
,
485 int index
= CORK_NIL
;
486 const char *name
= vStringValue (token
->string
);
489 char *name_chain
= NULL
;
490 if (!token
->dynamicProp
&& kind
!= JSTAG_PROPERTY
&& (p
= strrchr (name
, '.')) != NULL
)
493 name_chain
= eStrndup (name
, (size_t) (p
- name
));
499 int scope
= token
->scope
;
502 scope
= makeJsRefTagsForNameChain (name_chain
, token
, kind
, scope
);
507 * Check whether NAME is already defined in SCOPE.
508 * If the NAME is already defined, return the cork index for the NAME.
510 if (kind
== JSTAG_FUNCTION
|| kind
== JSTAG_CLASS
)
512 index
= anyKindEntryInScope (scope
, name
, kind
, true);
513 if (index
!= CORK_NIL
)
518 initTagEntry (&e
, name
, kind
);
519 updateTagLine (&e
, token
->lineNumber
, token
->filePosition
);
520 e
.extensionFields
.scopeIndex
= scope
;
524 const char *scope_str
= getNameStringForCorkIndex (scope
);
525 const char *scope_kind_str
= getKindStringForCorkIndex (scope
);
526 TRACE_PRINT("Emitting tag for symbol '%s' of kind %s with scope '%s:%s'", name
, kindName(kind
), scope_kind_str
, scope_str
);
530 if (signature
&& vStringLength(signature
))
533 /* sanitize signature by replacing all control characters with a
534 * space (because it's simple).
535 * there should never be any junk in a valid signature, but who
536 * knows what the user wrote and CTags doesn't cope well with weird
538 for (i
= 0; i
< signature
->length
; i
++)
540 unsigned char c
= (unsigned char) vStringChar (signature
, i
);
541 if (c
< 0x20 /* below space */ || c
== 0x7F /* DEL */)
542 vStringChar (signature
, i
) = ' ';
544 e
.extensionFields
.signature
= vStringValue(signature
);
548 e
.extensionFields
.inheritance
= vStringValue(inheritance
);
551 markTagExtraBit (&e
, XTAG_ANONYMOUS
);
553 index
= makeTagEntry (&e
);
554 /* We shold remove This condition. We should fix the callers passing
555 * an empty name instead. makeTagEntry() returns CORK_NIL if the tag
557 if (index
!= CORK_NIL
)
558 registerEntry (index
);
563 static int makeJsTag (const tokenInfo
*const token
, const jsKind kind
,
564 vString
*const signature
, vString
*const inheritance
)
566 return makeJsTagCommon (token
, kind
, signature
, inheritance
, false);
569 static int makeClassTagCommon (tokenInfo
*const token
, vString
*const signature
,
570 vString
*const inheritance
, bool anonymous
)
572 return makeJsTagCommon (token
, JSTAG_CLASS
, signature
, inheritance
, anonymous
);
575 static int makeClassTag (tokenInfo
*const token
, vString
*const signature
,
576 vString
*const inheritance
)
578 return makeClassTagCommon (token
, signature
, inheritance
, false);
581 static int makeFunctionTagCommon (tokenInfo
*const token
, vString
*const signature
,
582 bool generator
, bool anonymous
)
584 return makeJsTagCommon (token
, generator
? JSTAG_GENERATOR
: JSTAG_FUNCTION
, signature
, NULL
,
588 static int makeFunctionTag (tokenInfo
*const token
, vString
*const signature
, bool generator
)
590 return makeFunctionTagCommon (token
, signature
, generator
, false);
593 static bool isClassName (tokenInfo
*const name
)
595 char * p
= strrchr(vStringValue (name
->string
), '.');
597 p
= vStringValue (name
->string
);
601 return isupper((unsigned char) *p
);
608 /* given @p point, returns the first byte of the encoded output sequence, and
609 * make sure the next ones will be returned by calls to getcFromInputFile()
610 * as if the code point was simply written in the input file. */
611 static int handleUnicodeCodePoint (uint32_t point
)
615 Assert (point
< 0x110000);
618 /* if we do have iconv and the encodings are specified, use this */
619 if (isConverting () && JSUnicodeConverter
== (iconv_t
) -2)
621 /* if we didn't try creating the converter yet, try and do so */
622 JSUnicodeConverter
= iconv_open (getLanguageEncoding (Lang_js
), INTERNAL_ENCODING
);
624 if (isConverting () && JSUnicodeConverter
!= (iconv_t
) -1)
626 char *input_ptr
= (char *) &point
;
627 size_t input_left
= sizeof point
;
628 /* 4 bytes should be enough for any encoding (it's how much UTF-32
630 /* FIXME: actually iconv has a tendency to output a BOM for Unicode
631 * encodings where it matters when the endianness is not specified in
632 * the target encoding name. E.g., if the target encoding is "UTF-32"
633 * or "UTF-16" it will output 2 code points, the BOM (U+FEFF) and the
634 * one we expect. This does not happen if the endianness is specified
635 * explicitly, e.g. with "UTF-32LE", or "UTF-16BE".
636 * However, it's not very relevant for the moment as nothing in CTags
637 * cope well (if at all) with non-ASCII-compatible encodings like
638 * UTF-32 or UTF-16 anyway. */
639 char output
[4] = { 0 };
640 char *output_ptr
= output
;
641 size_t output_left
= ARRAY_SIZE (output
);
643 if (iconv (JSUnicodeConverter
, &input_ptr
, &input_left
, &output_ptr
, &output_left
) == (size_t) -1)
645 /* something went wrong, which probably means the output encoding
646 * cannot represent the character. Use a placeholder likely to be
647 * supported instead, that's also valid in an identifier */
648 verbose ("JavaScript: Encoding: %s\n", strerror (errno
));
653 const size_t output_len
= ARRAY_SIZE (output
) - output_left
;
655 /* put all but the first byte back so that getcFromInputFile() will
656 * return them in the right order */
657 for (unsigned int i
= 1; i
< output_len
; i
++)
658 ungetcToInputFile ((unsigned char) output
[output_len
- i
]);
659 c
= (unsigned char) output
[0];
662 iconv (JSUnicodeConverter
, NULL
, NULL
, NULL
, NULL
);
667 /* when no encoding is specified (or no iconv), assume UTF-8 is good.
668 * Why UTF-8? Because it's an ASCII-compatible common Unicode encoding. */
670 c
= (unsigned char) point
;
671 else if (point
< 0x800)
673 c
= (unsigned char) (0xc0 | ((point
>> 6) & 0x1f));
674 ungetcToInputFile ((unsigned char) (0x80 | (point
& 0x3f)));
676 else if (point
< 0x10000)
678 c
= (unsigned char) (0xe0 | ((point
>> 12) & 0x0f));
679 ungetcToInputFile ((unsigned char) (0x80 | ((point
>> 0) & 0x3f)));
680 ungetcToInputFile ((unsigned char) (0x80 | ((point
>> 6) & 0x3f)));
682 else if (point
< 0x110000)
684 c
= (unsigned char) (0xf0 | ((point
>> 18) & 0x07));
685 ungetcToInputFile ((unsigned char) (0x80 | ((point
>> 0) & 0x3f)));
686 ungetcToInputFile ((unsigned char) (0x80 | ((point
>> 6) & 0x3f)));
687 ungetcToInputFile ((unsigned char) (0x80 | ((point
>> 12) & 0x3f)));
694 /* reads a Unicode escape sequence after the "\" prefix.
695 * @param value Location to store the escape sequence value.
696 * @param isUTF16 Location to store whether @param value is an UTF-16 word.
697 * @returns Whether a valid sequence was read. */
698 static bool readUnicodeEscapeSequenceValue (uint32_t *const value
,
702 int d
= getcFromInputFile ();
705 ungetcToInputFile (d
);
708 int e
= getcFromInputFile ();
709 char cp
[6 + 1]; /* up to 6 hex + possible closing '}' or invalid char */
710 unsigned int cp_len
= 0;
712 *isUTF16
= (e
!= '{');
714 { /* Handles Unicode code point escapes: \u{ HexDigits }
715 * We skip the leading 0s because there can be any number of them
716 * and they don't change any meaning. */
717 bool has_leading_zero
= false;
720 while ((cp
[cp_len
] = (char) (l
= getcFromInputFile ())) == '0')
721 has_leading_zero
= true;
723 while (isxdigit (l
) && ++cp_len
< ARRAY_SIZE (cp
))
724 cp
[cp_len
] = (char) (l
= getcFromInputFile ());
725 valid
= ((cp_len
> 0 || has_leading_zero
) &&
726 cp_len
< ARRAY_SIZE (cp
) && cp
[cp_len
] == '}' &&
727 /* also check if it's a valid Unicode code point */
729 (cp_len
== 6 && strncmp (cp
, "110000", 6) < 0)));
730 if (! valid
) /* put back the last (likely invalid) character */
731 ungetcToInputFile (l
);
734 { /* Handles Unicode escape sequences: \u Hex4Digits */
737 cp
[cp_len
] = (char) (l
= ((cp_len
== 0) ? e
: getcFromInputFile ()));
738 while (isxdigit (l
) && ++cp_len
< 4);
739 valid
= (cp_len
== 4);
744 /* we don't get every character back, but it would require to
745 * be able to put up to 9 characters back (in the worst case
746 * for handling invalid \u{10FFFFx}), and here we're recovering
747 * from invalid syntax anyway. */
748 ungetcToInputFile (e
);
749 ungetcToInputFile (d
);
754 for (unsigned int i
= 0; i
< cp_len
; i
++)
758 /* we know it's a hex digit, no need to double check */
760 *value
+= (unsigned int) cp
[i
] - '0';
761 else if (cp
[i
] < 'a')
762 *value
+= 10 + (unsigned int) cp
[i
] - 'A';
764 *value
+= 10 + (unsigned int) cp
[i
] - 'a';
772 static int valueToXDigit (unsigned char v
)
777 return 'A' + (v
- 0xA);
782 /* Reads and expands a Unicode escape sequence after the "\" prefix. If the
783 * escape sequence is a UTF16 high surrogate, also try and read the low
784 * surrogate to emit the proper code point.
785 * @param fallback The character to return if the sequence is invalid. Usually
786 * this would be the '\' character starting the sequence.
787 * @returns The first byte of the sequence, or @param fallback if the sequence
788 * is invalid. On success, next calls to getcFromInputFile() will
789 * return subsequent bytes (if any). */
790 static int readUnicodeEscapeSequence (const int fallback
)
796 if (! readUnicodeEscapeSequenceValue (&value
, &isUTF16
))
800 if (isUTF16
&& (value
& 0xfc00) == 0xd800)
801 { /* this is a high surrogate, try and read its low surrogate and
802 * emit the resulting code point */
804 int d
= getcFromInputFile ();
806 if (d
!= '\\' || ! readUnicodeEscapeSequenceValue (&low
, &isUTF16
))
807 ungetcToInputFile (d
);
809 { /* not UTF-16 low surrogate but a plain code point */
810 d
= handleUnicodeCodePoint (low
);
811 ungetcToInputFile (d
);
813 else if ((low
& 0xfc00) != 0xdc00)
814 { /* not a low surrogate, so put back the escaped representation
815 * in case it was another high surrogate we should read as part
816 * of another pair. */
817 ungetcToInputFile (valueToXDigit ((unsigned char) ((low
& 0x000f) >> 0)));
818 ungetcToInputFile (valueToXDigit ((unsigned char) ((low
& 0x00f0) >> 4)));
819 ungetcToInputFile (valueToXDigit ((unsigned char) ((low
& 0x0f00) >> 8)));
820 ungetcToInputFile (valueToXDigit ((unsigned char) ((low
& 0xf000) >> 12)));
821 ungetcToInputFile ('u');
822 ungetcToInputFile ('\\');
825 value
= 0x010000 + ((value
& 0x03ff) << 10) + (low
& 0x03ff);
827 c
= handleUnicodeCodePoint (value
);
833 static void parseString (vString
*const string
, const int delimiter
)
838 int c
= getcFromInputFile ();
843 /* Eat the escape sequence (\", \', etc). We properly handle
844 * <LineContinuation> by eating a whole \<CR><LF> not to see <LF>
845 * as an unescaped character, which is invalid and handled below.
846 * Also, handle the fact that <LineContinuation> produces an empty
848 * See ECMA-262 7.8.4 */
849 c
= getcFromInputFile ();
852 ungetcToInputFile (c
);
853 c
= readUnicodeEscapeSequence ('\\');
854 vStringPut (string
, c
);
856 else if (c
!= '\r' && c
!= '\n')
857 vStringPut(string
, c
);
860 c
= getcFromInputFile();
862 ungetcToInputFile (c
);
865 else if (c
== delimiter
)
867 else if (c
== '\r' || c
== '\n')
869 /* those are invalid when not escaped */
871 /* we don't want to eat the newline itself to let the automatic
872 * semicolon insertion code kick in */
873 ungetcToInputFile (c
);
876 vStringPut (string
, c
);
880 static void parseRegExp (void)
883 bool in_range
= false;
887 c
= getcFromInputFile ();
888 if (! in_range
&& c
== '/')
892 c
= getcFromInputFile ();
893 } while (isalpha (c
));
894 ungetcToInputFile (c
);
897 else if (c
== '\n' || c
== '\r')
899 /* invalid in a regex */
900 ungetcToInputFile (c
);
904 c
= getcFromInputFile (); /* skip next character */
912 /* Read a C identifier beginning with "first_char" and places it into
916 static int include_period_in_identifier
= 0;
918 static void accept_period_in_identifier(bool incl
)
922 include_period_in_identifier
++;
924 else if (!incl
&& include_period_in_identifier
> 0)
926 include_period_in_identifier
--;
930 static bool isIdentChar(const int c
)
932 return (isalpha (c
) || isdigit (c
) || c
== '$' || \
933 c
== '@' || c
== '_' || c
== '#' || \
934 c
>= 0x80 || (include_period_in_identifier
> 0 && c
== '.'));
937 static void parseIdentifier (vString
*const string
, const int first_char
)
940 Assert (isIdentChar (c
));
943 vStringPut (string
, c
);
944 c
= getcFromInputFile ();
946 c
= readUnicodeEscapeSequence (c
);
947 } while (isIdentChar (c
));
948 /* if readUnicodeEscapeSequence() read an escape sequence this is incorrect,
949 * as we should actually put back the whole escape sequence and not the
950 * decoded character. However, it's not really worth the hassle as it can
951 * only happen if the input has an invalid escape sequence. */
952 ungetcToInputFile (c
); /* unget non-identifier character */
955 static void parseTemplateString (vString
*const string
)
960 c
= getcFromInputFile ();
961 if (c
== '`' || c
== EOF
)
964 vStringPut (string
, c
);
968 c
= getcFromInputFile();
970 vStringPut(string
, c
);
974 c
= getcFromInputFile ();
976 ungetcToInputFile (c
);
980 /* we need to use the real token machinery to handle strings,
981 * comments, regexes and whatnot */
982 tokenInfo
*token
= newToken ();
983 LastTokenType
= TOKEN_UNDEFINED
;
984 vStringPut(string
, c
);
987 readTokenFull (token
, false, string
);
988 if (isType (token
, TOKEN_OPEN_CURLY
))
990 else if (isType (token
, TOKEN_CLOSE_CURLY
))
993 while (! isType (token
, TOKEN_EOF
) && depth
> 0);
1001 static void reprToken (const tokenInfo
*const token
, vString
*const repr
)
1003 switch (token
->type
)
1006 vStringCatS (repr
, "...");
1010 case TOKEN_TEMPLATE_STRING
:
1011 vStringPut (repr
, token
->c
);
1012 vStringCat (repr
, token
->string
);
1013 vStringPut (repr
, token
->c
);
1016 case TOKEN_IDENTIFIER
:
1018 vStringCat (repr
, token
->string
);
1022 vStringPut (repr
, token
->c
);
1027 static void readTokenFullRaw (tokenInfo
*const token
, bool include_newlines
, vString
*const repr
)
1031 bool newline_encountered
= false;
1033 /* if we've got a token held back, emit it */
1036 TRACE_PRINT("Emitting held token");
1037 copyToken (token
, NextToken
, false);
1038 deleteToken (NextToken
);
1041 reprToken (token
, repr
);
1045 token
->type
= TOKEN_UNDEFINED
;
1046 token
->keyword
= KEYWORD_NONE
;
1047 vStringClear (token
->string
);
1053 c
= getcFromInputFile ();
1054 if (include_newlines
&& (c
== '\r' || c
== '\n'))
1055 newline_encountered
= true;
1058 while (c
== '\t' || c
== ' ' || c
== '\r' || c
== '\n');
1060 token
->lineNumber
= getInputLineNumber ();
1061 token
->filePosition
= getInputFilePosition ();
1063 /* special case to insert a separator */
1064 if (repr
&& c
!= EOF
&& i
> 1)
1065 vStringPut (repr
, ' ');
1071 case EOF
: token
->type
= TOKEN_EOF
; break;
1072 case '(': token
->type
= TOKEN_OPEN_PAREN
; break;
1073 case ')': token
->type
= TOKEN_CLOSE_PAREN
; break;
1074 case ';': token
->type
= TOKEN_SEMICOLON
; break;
1075 case ',': token
->type
= TOKEN_COMMA
; break;
1078 token
->type
= TOKEN_PERIOD
;
1080 int d
= getcFromInputFile ();
1083 ungetcToInputFile (d
);
1087 d
= getcFromInputFile ();
1090 ungetcToInputFile (d
);
1091 ungetcToInputFile ('.');
1095 token
->type
= TOKEN_DOTS
;
1098 case ':': token
->type
= TOKEN_COLON
; break;
1099 case '{': token
->type
= TOKEN_OPEN_CURLY
; break;
1100 case '}': token
->type
= TOKEN_CLOSE_CURLY
; break;
1101 case '[': token
->type
= TOKEN_OPEN_SQUARE
; break;
1102 case ']': token
->type
= TOKEN_CLOSE_SQUARE
; break;
1106 int d
= getcFromInputFile ();
1108 token
->type
= TOKEN_ARROW
;
1111 ungetcToInputFile (d
);
1112 token
->type
= TOKEN_EQUAL_SIGN
;
1120 int d
= getcFromInputFile ();
1121 if (d
== c
) /* ++ or -- */
1122 token
->type
= TOKEN_POSTFIX_OPERATOR
;
1125 ungetcToInputFile (d
);
1126 token
->type
= TOKEN_BINARY_OPERATOR
;
1132 token
->type
= TOKEN_STAR
;
1141 token
->type
= TOKEN_BINARY_OPERATOR
;
1146 token
->type
= TOKEN_STRING
;
1147 parseString (token
->string
, c
);
1148 token
->lineNumber
= getInputLineNumber ();
1149 token
->filePosition
= getInputFilePosition ();
1153 token
->type
= TOKEN_TEMPLATE_STRING
;
1154 parseTemplateString (token
->string
);
1155 token
->lineNumber
= getInputLineNumber ();
1156 token
->filePosition
= getInputFilePosition ();
1161 int d
= getcFromInputFile ();
1162 if ( (d
!= '*') && /* is this the start of a comment? */
1163 (d
!= '/') ) /* is a one line comment? */
1165 ungetcToInputFile (d
);
1166 switch (LastTokenType
)
1168 case TOKEN_CHARACTER
:
1169 case TOKEN_IDENTIFIER
:
1171 case TOKEN_TEMPLATE_STRING
:
1172 case TOKEN_CLOSE_CURLY
:
1173 case TOKEN_CLOSE_PAREN
:
1174 case TOKEN_CLOSE_SQUARE
:
1175 token
->type
= TOKEN_BINARY_OPERATOR
;
1179 token
->type
= TOKEN_REGEXP
;
1181 token
->lineNumber
= getInputLineNumber ();
1182 token
->filePosition
= getInputFilePosition ();
1190 skipToCharacterInInputFile2('*', '/');
1193 else if (d
== '/') /* is this the start of a comment? */
1195 skipToCharacterInInputFile ('\n');
1196 /* if we care about newlines, put it back so it is seen */
1197 if (include_newlines
)
1198 ungetcToInputFile ('\n');
1206 /* skip shebang in case of e.g. Node.js scripts */
1207 if (token
->lineNumber
> 1)
1208 token
->type
= TOKEN_UNDEFINED
;
1209 else if ((c
= getcFromInputFile ()) != '!')
1211 ungetcToInputFile (c
);
1212 token
->type
= TOKEN_UNDEFINED
;
1216 skipToCharacterInInputFile ('\n');
1222 token
->type
= TOKEN_ATMARK
;
1226 c
= readUnicodeEscapeSequence (c
);
1229 if (! isIdentChar (c
))
1230 token
->type
= TOKEN_UNDEFINED
;
1233 parseIdentifier (token
->string
, c
);
1234 token
->lineNumber
= getInputLineNumber ();
1235 token
->filePosition
= getInputFilePosition ();
1236 token
->keyword
= lookupKeyword (vStringValue (token
->string
), Lang_js
);
1237 if (isKeyword (token
, KEYWORD_NONE
))
1238 token
->type
= TOKEN_IDENTIFIER
;
1240 token
->type
= TOKEN_KEYWORD
;
1245 if (include_newlines
&& newline_encountered
)
1247 /* This isn't strictly correct per the standard, but following the
1248 * real rules means understanding all statements, and that's not
1249 * what the parser currently does. What we do here is a guess, by
1250 * avoiding inserting semicolons that would make the statement on
1251 * the left or right obviously invalid. Hopefully this should not
1252 * have false negatives (e.g. should not miss insertion of a semicolon)
1253 * but might have false positives (e.g. it will wrongfully emit a
1254 * semicolon sometimes, i.e. for the newline in "foo\n(bar)").
1255 * This should however be mostly harmless as we only deal with
1256 * newlines in specific situations where we know a false positive
1257 * wouldn't hurt too bad. */
1259 /* these already end a statement, so no need to duplicate it */
1260 #define IS_STMT_SEPARATOR(t) ((t) == TOKEN_SEMICOLON || \
1261 (t) == TOKEN_EOF || \
1262 (t) == TOKEN_COMMA || \
1263 (t) == TOKEN_OPEN_CURLY)
1264 /* these cannot be the start or end of a statement */
1265 #define IS_BINARY_OPERATOR(t) ((t) == TOKEN_EQUAL_SIGN || \
1266 (t) == TOKEN_ARROW || \
1267 (t) == TOKEN_COLON || \
1268 (t) == TOKEN_PERIOD || \
1269 (t) == TOKEN_STAR || \
1270 (t) == TOKEN_BINARY_OPERATOR)
1272 if (! IS_STMT_SEPARATOR(LastTokenType
) &&
1273 ! IS_STMT_SEPARATOR(token
->type
) &&
1274 ! IS_BINARY_OPERATOR(LastTokenType
) &&
1275 ! IS_BINARY_OPERATOR(token
->type
) &&
1276 /* these cannot be followed by a semicolon */
1277 ! (LastTokenType
== TOKEN_OPEN_PAREN
||
1278 LastTokenType
== TOKEN_OPEN_SQUARE
))
1280 /* hold the token... */
1281 Assert (NextToken
== NULL
);
1282 NextToken
= newToken ();
1283 copyToken (NextToken
, token
, false);
1285 /* ...and emit a semicolon instead */
1286 token
->type
= TOKEN_SEMICOLON
;
1287 token
->keyword
= KEYWORD_NONE
;
1288 vStringClear (token
->string
);
1292 #undef IS_STMT_SEPARATOR
1293 #undef IS_BINARY_OPERATOR
1296 LastTokenType
= token
->type
;
1299 reprToken (token
, repr
);
1302 /* whether something we consider a keyword (either because it sometimes is or
1303 * because of the parser's perks) is actually valid as a function name
1304 * See https://tc39.es/ecma262/multipage/ecmascript-language-lexical-grammar.html#sec-keywords-and-reserved-words */
1305 static bool canBeFunctionName (const tokenInfo
*const token
, bool strict_mode
)
1307 switch (token
->keyword
)
1309 /* non-keywords specific to this parser */
1310 case KEYWORD_capital_function
:
1311 case KEYWORD_capital_object
:
1312 case KEYWORD_prototype
:
1314 /* syntactic, but not keyword:
1315 * as async from get meta of set target
1316 * "await" is OK as well */
1322 /* strict-mode keywords
1323 * let static implements interface package private protected public
1324 * we need to also include those which are OK as function names
1328 case KEYWORD_static
:
1329 return ! strict_mode
;
1332 return isType (token
, TOKEN_IDENTIFIER
);
1336 static bool canBePropertyName (const tokenInfo
*const token
)
1338 /* property names are pretty relaxed, any non reserved word is OK, even
1339 * strict-mode ones in strict-mode */
1340 return canBeFunctionName (token
, false);
1343 /* See https://babeljs.io/blog/2018/09/17/decorators */
1344 static void skipBabelDecorator (tokenInfo
*token
, bool include_newlines
, vString
*const repr
)
1346 readTokenFullRaw (token
, include_newlines
, repr
);
1347 if (isType (token
, TOKEN_OPEN_PAREN
))
1349 /* @(complex ? dec1 : dec2) */
1350 skipArgumentList (token
, include_newlines
, repr
);
1351 TRACE_PRINT ("found @(...) style decorator");
1353 else if (isType (token
, TOKEN_IDENTIFIER
))
1355 /* @namespace.foo (...) */
1356 bool found_period
= false;
1359 readTokenFullRaw (token
, include_newlines
, repr
);
1360 if (isType (token
, TOKEN_IDENTIFIER
))
1364 TRACE_PRINT("found @namespace.bar style decorator");
1367 found_period
= false;
1369 else if (isType (token
, TOKEN_PERIOD
))
1370 found_period
= true;
1371 else if (isType (token
, TOKEN_OPEN_PAREN
))
1373 skipArgumentList (token
, include_newlines
, repr
);
1374 TRACE_PRINT("found @foo(...) style decorator");
1379 TRACE_PRINT("found @foo style decorator");
1385 /* Unexpected token after @ */
1386 TRACE_PRINT("found unexpected token during skipping a decorator");
1389 static void readTokenFull (tokenInfo
*const token
, bool include_newlines
, vString
*const repr
)
1391 readTokenFullRaw (token
, include_newlines
, repr
);
1395 if (!isType (token
, TOKEN_ATMARK
))
1397 skipBabelDecorator (token
, include_newlines
, repr
);
1398 /* @decorator0 @decorator1 ... There can be more than one decorator. */
1402 #ifdef DO_TRACING_USE_DUMP_TOKEN
1403 /* trace readTokenFull() */
1404 static void readTokenFullDebug (tokenInfo
*const token
, bool include_newlines
, vString
*const repr
)
1406 readTokenFull (token
, include_newlines
, repr
);
1409 # define readTokenFull readTokenFullDebug
1412 static void readToken (tokenInfo
*const token
)
1414 readTokenFull (token
, false, NULL
);
1418 * Token parsing functions
1421 static int parseMethodsInAnonymousObject (tokenInfo
*const token
)
1423 int index
= CORK_NIL
;
1425 tokenInfo
*const anon_object
= newToken ();
1426 copyToken (anon_object
, token
, true);
1427 anonGenerate (anon_object
->string
, "anonymousObject", JSTAG_VARIABLE
);
1428 anon_object
->type
= TOKEN_IDENTIFIER
;
1430 index
= makeJsTagCommon (anon_object
, JSTAG_VARIABLE
, NULL
, NULL
, true);
1431 if (! parseMethods (token
, index
, false))
1433 /* If no method is found, the anonymous object
1434 * should not be tagged.
1436 tagEntryInfo
*e
= getEntryInCorkQueue (index
);
1438 markTagAsPlaceholder (e
, true);
1442 deleteToken (anon_object
);
1447 static void skipArgumentList (tokenInfo
*const token
, bool include_newlines
, vString
*const repr
)
1449 if (isType (token
, TOKEN_OPEN_PAREN
)) /* arguments? */
1453 vStringPut (repr
, '(');
1455 tokenType prev_token_type
= token
->type
;
1456 while (nest_level
> 0 && ! isType (token
, TOKEN_EOF
))
1458 readTokenFull (token
, false, repr
);
1459 if (isType (token
, TOKEN_OPEN_PAREN
))
1461 else if (isType (token
, TOKEN_CLOSE_PAREN
))
1463 else if (isType (token
, TOKEN_OPEN_CURLY
))
1465 if (prev_token_type
== TOKEN_ARROW
)
1466 parseBlock (token
, CORK_NIL
);
1468 parseMethodsInAnonymousObject (token
);
1470 else if (isKeyword (token
, KEYWORD_function
))
1471 parseFunction (token
, NULL
, false);
1473 prev_token_type
= token
->type
;
1475 readTokenFull (token
, include_newlines
, NULL
);
1479 static void skipArrayList (tokenInfo
*const token
, bool include_newlines
)
1482 * Handle square brackets
1484 * So we must check for nested open and closing square brackets
1487 if (isType (token
, TOKEN_OPEN_SQUARE
)) /* arguments? */
1490 tokenType prev_token_type
= token
->type
;
1491 while (nest_level
> 0 && ! isType (token
, TOKEN_EOF
))
1494 if (isType (token
, TOKEN_OPEN_SQUARE
))
1496 else if (isType (token
, TOKEN_CLOSE_SQUARE
))
1498 else if (isType (token
, TOKEN_OPEN_CURLY
))
1500 if (prev_token_type
== TOKEN_ARROW
)
1501 parseBlock (token
, CORK_NIL
);
1503 parseMethodsInAnonymousObject (token
);
1506 prev_token_type
= token
->type
;
1508 readTokenFull (token
, include_newlines
, NULL
);
1512 static void skipQualifiedIdentifier (tokenInfo
*const token
)
1514 /* Skip foo.bar.baz */
1515 while (isType (token
, TOKEN_IDENTIFIER
))
1518 if (isType (token
, TOKEN_PERIOD
))
1525 static void addContext (tokenInfo
* const parent
, const tokenInfo
* const child
)
1527 vStringJoin (parent
->string
, '.', child
->string
);
1531 * Scanning functions
1534 static bool findCmdTerm (tokenInfo
*const token
, bool include_newlines
, bool include_commas
)
1537 * Read until we find either a semicolon or closing brace.
1538 * Any nested braces will be handled within.
1540 while (! isType (token
, TOKEN_SEMICOLON
) &&
1541 ! isType (token
, TOKEN_CLOSE_CURLY
) &&
1542 ! (include_commas
&& isType (token
, TOKEN_COMMA
)) &&
1543 ! isType (token
, TOKEN_EOF
))
1545 /* Handle nested blocks */
1546 if ( isType (token
, TOKEN_OPEN_CURLY
))
1548 parseBlock (token
, CORK_NIL
);
1549 readTokenFull (token
, include_newlines
, NULL
);
1551 else if ( isType (token
, TOKEN_OPEN_PAREN
) )
1552 skipArgumentList(token
, include_newlines
, NULL
);
1553 else if ( isType (token
, TOKEN_OPEN_SQUARE
) )
1554 skipArrayList(token
, include_newlines
);
1556 readTokenFull (token
, include_newlines
, NULL
);
1559 return isType (token
, TOKEN_SEMICOLON
);
1562 static void parseSwitch (tokenInfo
*const token
)
1565 * switch (expression) {
1572 * default : statement;
1578 if (isType (token
, TOKEN_OPEN_PAREN
))
1580 skipArgumentList(token
, false, NULL
);
1583 if (isType (token
, TOKEN_OPEN_CURLY
))
1585 parseBlock (token
, CORK_NIL
);
1589 static bool parseLoop (tokenInfo
*const token
)
1592 * Handles these statements
1593 * for (x=0; x<3; x++)
1594 * document.write("This text is repeated three times<br>");
1596 * for (x=0; x<3; x++)
1598 * document.write("This text is repeated three times<br>");
1602 * document.write(number+"<br>");
1607 * document.write(number+"<br>");
1612 bool is_terminated
= true;
1614 if (isKeyword (token
, KEYWORD_for
) || isKeyword (token
, KEYWORD_while
))
1618 if (isType (token
, TOKEN_OPEN_PAREN
))
1619 skipArgumentList(token
, false, NULL
);
1621 if (isType (token
, TOKEN_OPEN_CURLY
))
1622 parseBlock (token
, CORK_NIL
);
1624 is_terminated
= parseLine(token
, false);
1626 else if (isKeyword (token
, KEYWORD_do
))
1630 if (isType (token
, TOKEN_OPEN_CURLY
))
1631 parseBlock (token
, CORK_NIL
);
1633 is_terminated
= parseLine(token
, false);
1638 if (isKeyword (token
, KEYWORD_while
))
1642 if (isType (token
, TOKEN_OPEN_PAREN
))
1643 skipArgumentList(token
, true, NULL
);
1645 if (! isType (token
, TOKEN_SEMICOLON
))
1647 /* oddly enough, `do {} while (0) var foo = 42` is perfectly
1648 * valid JS, so explicitly handle the remaining of the line
1649 * for the sake of the root scope handling (as parseJsFile()
1650 * always advances a token not to ever get stuck) */
1651 is_terminated
= parseLine(token
, false);
1656 return is_terminated
;
1659 static bool parseIf (tokenInfo
*const token
)
1661 bool read_next_token
= true;
1663 * If statements have two forms
1682 * This example if correctly written, but the
1683 * else contains only 1 statement without a terminator
1684 * since the function finishes with the closing brace.
1693 * TODO: Deal with statements that can optional end
1694 * without a semi-colon. Currently this messes up
1695 * the parsing of blocks.
1696 * Need to somehow detect this has happened, and either
1697 * backup a token, or skip reading the next token if
1698 * that is possible from all code locations.
1704 if (isKeyword (token
, KEYWORD_if
))
1707 * Check for an "else if" and consume the "if"
1712 if (isType (token
, TOKEN_OPEN_PAREN
))
1713 skipArgumentList(token
, false, NULL
);
1715 if (isType (token
, TOKEN_OPEN_CURLY
))
1716 parseBlock (token
, CORK_NIL
);
1719 /* The next token should only be read if this statement had its own
1721 read_next_token
= findCmdTerm (token
, true, false);
1723 return read_next_token
;
1726 static bool collectChildren (int corkIndex
, tagEntryInfo
*entry
, void *data
)
1728 intArray
*children
= (intArray
*)data
;
1730 Assert (entry
->extensionFields
.scopeIndex
!= CORK_NIL
);
1731 intArrayAdd (children
, corkIndex
);
1736 /* During parsing, there is a case that a language object (parent)
1737 * should be tagged only when there are language objects (children)
1738 * are defined in the parent; if the parent has no child, the parser
1739 * should not make a tag for the parent.
1741 * Handling the this case was not easy because the parser must fill
1742 * the scope field of children with the cork index of parent.
1743 * However, the parser can decide whether the parent should be tagged
1744 * or not after parsing inside the parent where the children are
1747 * "class" is an example of the language object of the parent.
1748 * "methods" are examples of the language object of the children.
1749 * "class" is tagged as a class only when methods are found in it.
1752 * The parser handles this case with the following steps:
1754 * 1. make a dummy tag entry for the candidate of parent with
1756 * > int dummyIndex = makeSimplePlaceholder().
1758 * ctags doesn't emit this dummy tag entry.
1760 * 2. parse inside the candidate of parent and count children.
1761 * If a child is found, make a tag for it with filling its
1762 * scope field with the dummyIndex.
1764 * 3. make a true tag entry for the parent if a child is found:
1766 * > int trueIdex = makeTagEntry (...);
1768 * 4. update the scope fields of children with the trueIdex.
1770 * moveChildren (dummyIndex, trueIdex);
1773 static void moveChildren (int old_parent
, int new_parent
)
1775 intArray
*children
= intArrayNew ();
1776 foreachEntriesInScope (old_parent
, NULL
, collectChildren
, children
);
1777 for (unsigned int i
= 0; i
< intArrayCount (children
); i
++)
1779 int c
= intArrayItem (children
, i
);
1781 unregisterEntry (c
);
1782 tagEntryInfo
*e
= getEntryInCorkQueue (c
);
1784 e
->extensionFields
.scopeIndex
= new_parent
;
1787 intArrayDelete (children
);
1790 static bool parseFunction (tokenInfo
*const token
, tokenInfo
*const lhs_name
, const bool is_inside_class
)
1794 const char *scope_str
= getNameStringForCorkIndex (token
->scope
);
1795 const char *scope_kind_str
= getKindStringForCorkIndex (token
->scope
);
1796 TRACE_ENTER_TEXT("token has scope '%s' of kind %s", scope_str
, scope_kind_str
);
1800 tokenInfo
*const name
= newToken ();
1801 vString
*const signature
= vStringNew ();
1802 bool is_generator
= false;
1803 bool is_anonymous
= false;
1804 int index_for_name
= CORK_NIL
;
1806 * This deals with these formats
1807 * function validFunctionTwo(a,b) {}
1808 * function * generator(a,b) {}
1811 copyToken (name
, token
, true);
1813 if (isType (name
, TOKEN_KEYWORD
) &&
1814 canBeFunctionName (name
, false /* true if we're in strict mode */))
1816 // treat as function name
1817 name
->type
= TOKEN_IDENTIFIER
;
1818 name
->keyword
= KEYWORD_NONE
;
1821 if (isType (name
, TOKEN_STAR
))
1823 is_generator
= true;
1826 if (isType (name
, TOKEN_OPEN_PAREN
))
1828 /* anonymous function */
1829 copyToken (token
, name
, false);
1830 anonGenerate (name
->string
, "anonymousFunction", JSTAG_FUNCTION
);
1831 is_anonymous
= true;
1833 else if (!isType (name
, TOKEN_IDENTIFIER
))
1838 if ( isType (token
, TOKEN_OPEN_PAREN
) )
1839 skipArgumentList(token
, false, signature
);
1841 if ( isType (token
, TOKEN_OPEN_CURLY
) )
1843 if ( lhs_name
!= NULL
&& is_inside_class
)
1845 index_for_name
= makeJsTag (lhs_name
, is_generator
? JSTAG_GENERATOR
: JSTAG_METHOD
, signature
, NULL
);
1847 else if ( lhs_name
!= NULL
)
1849 index_for_name
= isClassName (lhs_name
) ?
1850 makeClassTag (lhs_name
, signature
, NULL
):
1851 makeFunctionTag (lhs_name
, signature
, is_generator
);
1854 int f
= index_for_name
,
1856 if ( f
== CORK_NIL
|| !is_anonymous
)
1857 p
= isClassName (name
) ?
1858 makeClassTagCommon (name
, signature
, NULL
, is_anonymous
) :
1859 makeFunctionTagCommon (name
, signature
, is_generator
, is_anonymous
);
1864 parseBlock (token
, f
);
1867 if ( lhs_name
== NULL
)
1868 findCmdTerm (token
, false, false);
1871 vStringDelete (signature
);
1875 return index_for_name
;
1878 /* Parses a block surrounded by curly braces.
1879 * @p parent_scope is the scope name for this block, or NULL for unnamed scopes */
1880 static bool parseBlock (tokenInfo
*const token
, int parent_scope
)
1884 bool is_class
= false;
1885 bool read_next_token
= true;
1886 int save_scope
= token
->scope
;
1888 if (parent_scope
!= CORK_NIL
)
1890 token
->scope
= parent_scope
;
1895 * Make this routine a bit more forgiving.
1896 * If called on an open_curly advance it
1898 if (isType (token
, TOKEN_OPEN_CURLY
))
1901 if (! isType (token
, TOKEN_CLOSE_CURLY
))
1904 * Read until we find the closing brace,
1905 * any nested braces will be handled within
1909 read_next_token
= true;
1910 if (isKeyword (token
, KEYWORD_this
))
1913 * Means we are inside a class and have found
1914 * a class, not a function
1919 * Ignore the remainder of the line
1920 * findCmdTerm(token);
1922 read_next_token
= parseLine (token
, is_class
);
1924 else if (isKeyword (token
, KEYWORD_var
) ||
1925 isKeyword (token
, KEYWORD_let
) ||
1926 isKeyword (token
, KEYWORD_const
))
1929 * Potentially we have found an inner function.
1930 * Set something to indicate the scope
1932 read_next_token
= parseLine (token
, is_class
);
1934 else if (isType (token
, TOKEN_OPEN_CURLY
))
1936 /* Handle nested blocks */
1937 parseBlock (token
, CORK_NIL
);
1942 * It is possible for a line to have no terminator
1943 * if the following line is a closing brace.
1944 * parseLine will detect this case and indicate
1945 * whether we should read an additional token.
1947 read_next_token
= parseLine (token
, is_class
);
1951 * Always read a new token unless we find a statement without
1952 * a ending terminator
1954 if( read_next_token
)
1958 * If we find a statement without a terminator consider the
1959 * block finished, otherwise the stack will be off by one.
1961 } while (! isType (token
, TOKEN_EOF
) &&
1962 ! isType (token
, TOKEN_CLOSE_CURLY
) && read_next_token
);
1965 token
->scope
= save_scope
;
1966 if (parent_scope
!= CORK_NIL
)
1973 static bool parseMethods (tokenInfo
*const token
, int class_index
,
1974 const bool is_es6_class
)
1976 TRACE_ENTER_TEXT("token is '%s' of type %s in parentToken '%s' of kind %s (es6: %s)",
1977 vStringValue(token
->string
), tokenTypeName (token
->type
),
1978 class_index
== CORK_NIL
? "none" : getNameStringForCorkIndex (class_index
),
1979 class_index
== CORK_NIL
? "none" : getKindStringForCorkIndex (class_index
),
1980 is_es6_class
? "yes": "no");
1983 * When making a tag for `name', its core index is stored to
1984 * `indexForName'. The value stored to `indexForName' is valid
1985 * till the value for `name' is updated. If the value for `name'
1986 * is changed, `indexForName' is reset to CORK_NIL.
1988 tokenInfo
*const name
= newToken ();
1989 int index_for_name
= CORK_NIL
;
1990 bool has_methods
= false;
1991 int save_scope
= token
->scope
;
1993 if (class_index
!= CORK_NIL
)
1994 token
->scope
= class_index
;
1997 * This deals with these formats
1998 * validProperty : 2,
1999 * validMethod : function(a,b) {}
2000 * 'validMethod2' : function(a,b) {}
2001 * container.dirtyTab = {'url': false, 'title':false, 'snapshot':false, '*': false}
2011 * ES6 computed name:
2013 * get [property]() {}
2014 * set [property]() {}
2017 * tc39/proposal-class-fields
2018 * field0 = function(a,b) {}
2020 * The parser extracts field0 as a method because the left value
2021 * is a function (kind propagation), and field1 as a field.
2023 * static methods and static initialization blocks
2024 * - ref. https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Classes/Static_initialization_blocks
2029 * static prop = val;
2032 bool dont_read
= false;
2035 bool is_setter
= false;
2036 bool is_getter
= false;
2043 if (isType (token
, TOKEN_CLOSE_CURLY
))
2048 if (isType (token
, TOKEN_KEYWORD
) && canBePropertyName (token
))
2050 tokenInfo
*saved_token
= newToken ();
2051 copyToken (saved_token
, token
, true);
2054 /* it wasn't actually a keyword after all, make it an identifier */
2055 if (isType(token
, TOKEN_OPEN_PAREN
) || isType(token
, TOKEN_COLON
))
2057 Assert (NextToken
== NULL
);
2058 NextToken
= newToken ();
2059 copyToken (NextToken
, token
, false); /* save token for next read */
2060 copyToken (token
, saved_token
, true); /* restore token to process */
2061 token
->type
= TOKEN_IDENTIFIER
; /* process as identifier */
2062 token
->keyword
= KEYWORD_NONE
;
2064 else if (isKeyword (saved_token
, KEYWORD_static
) &&
2065 isType (token
, TOKEN_OPEN_CURLY
))
2067 /* static initialization block */
2068 deleteToken (saved_token
);
2069 parseBlock (token
, class_index
);
2072 else if (isKeyword (saved_token
, KEYWORD_get
))
2074 else if (isKeyword (saved_token
, KEYWORD_set
))
2076 else if (isKeyword (saved_token
, KEYWORD_async
) ||
2077 isKeyword (saved_token
, KEYWORD_static
))
2079 /* can be a qualifier for another "keyword", so start over */
2080 deleteToken (saved_token
);
2084 deleteToken (saved_token
);
2086 else if (isType (token
, TOKEN_DOTS
))
2088 /* maybe spread operator. Just skip the next expression. */
2089 findCmdTerm(token
, true, true);
2093 if (! isType (token
, TOKEN_KEYWORD
) &&
2094 ! isType (token
, TOKEN_SEMICOLON
))
2096 bool is_generator
= false;
2097 bool is_shorthand
= false; /* ES6 shorthand syntax */
2098 bool is_computed_name
= false; /* ES6 computed property name */
2099 bool is_dynamic_prop
= false;
2100 vString
*dprop
= NULL
; /* is_computed_name is true but
2101 * the name is not represented in
2102 * a string literal. The expressions
2103 * go this string. */
2105 if (isType (token
, TOKEN_STAR
)) /* shorthand generator */
2107 is_generator
= true;
2111 if (isType (token
, TOKEN_OPEN_SQUARE
))
2113 is_computed_name
= true;
2114 dprop
= vStringNewInit ("[");
2115 readTokenFull (token
, false, dprop
);
2118 copyToken(name
, token
, true);
2119 index_for_name
= CORK_NIL
;
2120 if (is_computed_name
&& ! isType (token
, TOKEN_STRING
))
2121 is_dynamic_prop
= true;
2123 readTokenFull (token
, false, dprop
);
2125 if (is_computed_name
)
2130 if (isType (token
, TOKEN_CLOSE_SQUARE
))
2134 is_dynamic_prop
= true;
2135 if (isType (token
, TOKEN_OPEN_SQUARE
))
2138 readTokenFull (token
, false, (is_dynamic_prop
&& depth
!= 0)? dprop
: NULL
);
2139 } while (! isType (token
, TOKEN_EOF
) && depth
> 0);
2142 if (is_dynamic_prop
)
2144 injectDynamicName (name
, dprop
);
2145 index_for_name
= CORK_NIL
;
2149 vStringDelete (dprop
);
2151 is_shorthand
= isType (token
, TOKEN_OPEN_PAREN
);
2152 bool can_be_field
= isType (token
, TOKEN_EQUAL_SIGN
);
2153 if ( isType (token
, TOKEN_COLON
) || can_be_field
|| is_shorthand
)
2158 if (isKeyword (token
, KEYWORD_async
))
2162 vString
* signature
= vStringNew ();
2163 if ( is_shorthand
|| isKeyword (token
, KEYWORD_function
) )
2165 TRACE_PRINT("Seems to be a function or shorthand");
2170 if (isType (token
, TOKEN_STAR
))
2172 /* generator: 'function' '*' '(' ... ')' '{' ... '}' */
2173 is_generator
= true;
2177 if ( isType (token
, TOKEN_OPEN_PAREN
) )
2179 skipArgumentList(token
, false, signature
);
2183 if (isType (token
, TOKEN_OPEN_CURLY
))
2187 int kind
= JSTAG_METHOD
;
2189 kind
= JSTAG_GENERATOR
;
2191 kind
= JSTAG_GETTER
;
2193 kind
= JSTAG_SETTER
;
2195 index_for_name
= makeJsTag (name
, kind
, signature
, NULL
);
2196 parseBlock (token
, index_for_name
);
2199 * If we aren't parsing an ES6 class (for which there
2200 * is no mandatory separators), read to the closing
2201 * curly, check next token, if a comma, we must loop
2208 else if (! is_es6_class
)
2211 tokenInfo
*saved_token
= newToken ();
2213 /* skip whatever is the value */
2214 while (! isType (token
, TOKEN_COMMA
) &&
2215 ! isType (token
, TOKEN_CLOSE_CURLY
) &&
2216 ! isType (token
, TOKEN_EOF
))
2218 if (isType (token
, TOKEN_OPEN_CURLY
))
2220 /* Recurse to find child properties/methods */
2221 p
= makeSimplePlaceholder (name
->string
);
2222 parseMethods (token
, p
, false);
2225 else if (isType (token
, TOKEN_OPEN_PAREN
))
2227 vStringClear (signature
);
2228 skipArgumentList (token
, false, signature
);
2230 else if (isType (token
, TOKEN_OPEN_SQUARE
))
2232 skipArrayList (token
, false);
2234 else if (isType (token
, TOKEN_ARROW
))
2236 TRACE_PRINT("Seems to be an anonymous function");
2237 if (vStringIsEmpty (signature
) &&
2238 isType (saved_token
, TOKEN_IDENTIFIER
))
2240 vStringPut (signature
, '(');
2241 vStringCat (signature
, saved_token
->string
);
2242 vStringPut (signature
, ')');
2245 deleteToken (saved_token
);
2250 copyToken (saved_token
, token
, true);
2254 deleteToken (saved_token
);
2257 index_for_name
= makeJsTag (name
, JSTAG_PROPERTY
, NULL
, NULL
);
2259 moveChildren (p
, index_for_name
);
2261 else if (can_be_field
)
2263 makeJsTag (name
, JSTAG_FIELD
, NULL
, NULL
);
2264 parseLine (token
, true);
2267 vStringDelete (signature
);
2271 bool is_property
= isType (token
, TOKEN_COMMA
);
2272 makeJsTag (name
, is_property
? JSTAG_PROPERTY
: JSTAG_FIELD
, NULL
, NULL
);
2273 if (!isType (token
, TOKEN_SEMICOLON
) && !is_property
)
2277 } while ( isType(token
, TOKEN_COMMA
) ||
2278 ( is_es6_class
&& ! isType(token
, TOKEN_EOF
) ) );
2280 TRACE_PRINT("Finished parsing methods");
2282 findCmdTerm (token
, false, false);
2285 token
->scope
= save_scope
;
2288 TRACE_LEAVE_TEXT("found method(s): %s", has_methods
? "yes": "no");
2292 static bool parseES6Class (tokenInfo
*const token
, const tokenInfo
*target_name
)
2296 tokenInfo
* class_name
= newToken ();
2297 vString
*inheritance
= NULL
;
2298 bool is_anonymous
= true;
2300 copyToken (class_name
, token
, true);
2301 readToken (class_name
);
2304 if (isType (class_name
, TOKEN_IDENTIFIER
))
2307 is_anonymous
= false;
2311 copyToken (token
, class_name
, true);
2312 /* We create a fake name so we have a scope for the members */
2314 anonGenerate (class_name
->string
, "AnonymousClass", JSTAG_CLASS
);
2318 target_name
= class_name
;
2320 if (isKeyword (token
, KEYWORD_extends
))
2321 inheritance
= vStringNew ();
2323 /* skip inheritance info */
2324 while (! isType (token
, TOKEN_OPEN_CURLY
) &&
2325 ! isType (token
, TOKEN_EOF
) &&
2326 ! isType (token
, TOKEN_SEMICOLON
))
2327 readTokenFull (token
, false, inheritance
);
2329 /* remove the last added token (here we assume it's one char, "{" or ";" */
2330 if (inheritance
&& vStringLength (inheritance
) > 0 &&
2331 ! isType (token
, TOKEN_EOF
))
2333 vStringChop (inheritance
);
2334 vStringStripTrailing (inheritance
);
2335 vStringStripLeading (inheritance
);
2338 TRACE_PRINT("Emitting tag for class '%s'", vStringValue(target_name
->string
));
2340 int r
= makeJsTagCommon (target_name
, JSTAG_CLASS
, NULL
, inheritance
,
2341 (is_anonymous
&& (target_name
== class_name
)));
2343 if (! is_anonymous
&& target_name
!= class_name
)
2345 /* FIXME: what to do with the secondary name? It's local to the
2346 * class itself, so not very useful... let's hope people
2347 * don't give it another name than the target in case of
2348 * var MyClass = class MyClassSecondaryName { ... }
2349 * I guess it could be an alias to MyClass, or duplicate it
2350 * altogether, not sure. */
2351 makeJsTag (class_name
, JSTAG_CLASS
, NULL
, inheritance
);
2355 vStringDelete (inheritance
);
2357 if (isType (token
, TOKEN_OPEN_CURLY
))
2358 parseMethods (token
, r
, true);
2360 deleteToken (class_name
);
2366 static void convertToFunction (int index
, const char *signature
)
2368 tagEntryInfo
*e
= getEntryInCorkQueue(index
);
2369 if (e
&& e
->kindIndex
!= JSTAG_FUNCTION
2370 && ( signature
== NULL
|| e
->extensionFields
.signature
== NULL
))
2372 e
->kindIndex
= JSTAG_FUNCTION
;
2374 e
->extensionFields
.signature
= eStrdup (signature
);
2378 static vString
*trimGarbageInSignature (vString
*sig
)
2380 /* Drop "=>" at the end. */
2381 const char *sigstr
= vStringValue (sig
);
2382 char *last
= strrchr (sigstr
, ')');
2384 vStringTruncate (sig
, last
- sigstr
+ 1);
2388 static vString
*makeVStringForSignature (tokenInfo
*const token
)
2390 vString
* sig
= vStringNewInit ("(");
2392 if (isType (token
, TOKEN_IDENTIFIER
))
2393 vStringCat (sig
, token
->string
);
2394 else if (isType (token
, TOKEN_CLOSE_PAREN
))
2395 vStringPut (sig
, ')');
2396 else if (isType (token
, TOKEN_DOTS
))
2397 vStringCatS (sig
, "...");
2402 typedef struct sStatementState
{
2411 static void deleteTokenFn(void *token
) { deleteToken(token
); }
2413 static bool parsePrototype (tokenInfo
*const name
, tokenInfo
*const token
, statementState
*const state
)
2418 * When we reach the "prototype" tag, we infer:
2419 * "BindAgent" is a class
2420 * "build" is a method
2422 * function BindAgent( repeatableIdName, newParentIdName ) {
2426 * Specified function name: "build"
2427 * BindAgent.prototype.build =
2428 * BondAgent.prototype.crush = function( mode ) {
2429 * maybe parse nested functions
2434 * ValidClassOne.prototype = {
2435 * 'validMethodOne' : function(a,b) {},
2436 * 'validMethodTwo' : function(a,b) {}
2440 if (! ( isType (name
, TOKEN_IDENTIFIER
)
2441 || isType (name
, TOKEN_STRING
) ) )
2443 * Unexpected input. Try to reset the parsing.
2445 * TOKEN_STRING is acceptable. e.g.:
2446 * -----------------------------------
2447 * "a".prototype = function( mode ) {}
2450 TRACE_LEAVE_TEXT("bad input");
2454 state
->indexForName
= makeClassTag (name
, NULL
, NULL
);
2455 state
->isClass
= true;
2458 * There should a ".function_name" next.
2461 if (isType (token
, TOKEN_PERIOD
))
2467 if (isType (token
, TOKEN_KEYWORD
) && canBePropertyName (token
))
2469 // treat as function name
2470 token
->type
= TOKEN_IDENTIFIER
;
2471 token
->keyword
= KEYWORD_NONE
;
2474 if (! isType(token
, TOKEN_KEYWORD
))
2476 vString
*const signature
= vStringNew ();
2478 token
->scope
= state
->indexForName
;
2480 tokenInfo
*identifier_token
= newToken ();
2481 ptrArray
*prototype_tokens
= NULL
;
2482 accept_period_in_identifier(true);
2484 tokenInfo
*const method_body_token
= newToken ();
2485 copyToken (method_body_token
, token
, true);
2486 readToken (method_body_token
);
2488 while (! isType (method_body_token
, TOKEN_SEMICOLON
) &&
2489 ! isType (method_body_token
, TOKEN_CLOSE_CURLY
) &&
2490 ! isType (method_body_token
, TOKEN_OPEN_CURLY
) &&
2491 ! isType (method_body_token
, TOKEN_EOF
))
2493 if ( isType (method_body_token
, TOKEN_OPEN_PAREN
) )
2494 skipArgumentList(method_body_token
, false,
2495 vStringLength (signature
) == 0 ? signature
: NULL
);
2498 char* s1
= vStringValue (identifier_token
->string
);
2500 if ( isType (method_body_token
, TOKEN_EQUAL_SIGN
) &&
2501 ! isType (identifier_token
, TOKEN_UNDEFINED
) &&
2502 (s2
= strstr (s1
, ".prototype.")))
2504 if (prototype_tokens
== NULL
)
2505 prototype_tokens
= ptrArrayNew (deleteTokenFn
);
2507 memmove (s2
, s2
+10, strlen (s2
+10) + 1);
2508 vStringSetLength (identifier_token
->string
);
2510 tokenInfo
*const save_token
= newToken ();
2511 copyToken (save_token
, identifier_token
, true);
2512 ptrArrayAdd (prototype_tokens
, save_token
);
2513 identifier_token
->type
= TOKEN_UNDEFINED
;
2515 else if ( isType(method_body_token
, TOKEN_IDENTIFIER
))
2516 copyToken (identifier_token
, method_body_token
, false);
2518 readToken (method_body_token
);
2521 deleteToken (identifier_token
);
2522 accept_period_in_identifier(false);
2524 int index
= makeJsTag (token
, JSTAG_METHOD
, signature
, NULL
);
2526 if (prototype_tokens
!= NULL
)
2528 for (int i
=0; i
<ptrArrayCount (prototype_tokens
); i
++)
2530 makeJsTag (ptrArrayItem (prototype_tokens
, i
), JSTAG_METHOD
, signature
, NULL
);
2532 ptrArrayUnref (prototype_tokens
);
2535 vStringDelete (signature
);
2537 if ( isType (method_body_token
, TOKEN_OPEN_CURLY
))
2539 parseBlock (method_body_token
, index
);
2540 state
->isTerminated
= true;
2543 state
->isTerminated
= isType (method_body_token
, TOKEN_SEMICOLON
);
2545 deleteToken (method_body_token
);
2546 TRACE_LEAVE_TEXT("done: single");
2550 else if (isType (token
, TOKEN_EQUAL_SIGN
))
2553 if (isType (token
, TOKEN_OPEN_CURLY
))
2558 * Creates tags for each of these class methods
2559 * ValidClassOne.prototype = {
2560 * 'validMethodOne' : function(a,b) {},
2561 * 'validMethodTwo' : function(a,b) {}
2564 parseMethods(token
, state
->indexForName
, false);
2566 * Find to the end of the statement
2568 findCmdTerm (token
, false, false);
2569 state
->isTerminated
= true;
2570 TRACE_LEAVE_TEXT("done: multiple");
2575 TRACE_LEAVE_TEXT("done: not found");
2579 static bool parseStatementLHS (tokenInfo
*const name
, tokenInfo
*const token
, statementState
*const state
)
2586 if (! isType(token
, TOKEN_KEYWORD
))
2588 if ( state
->isClass
)
2589 token
->scope
= state
->indexForName
;
2592 addContext (name
, token
);
2593 state
->indexForName
= CORK_NIL
;
2598 else if ( isKeyword(token
, KEYWORD_prototype
) )
2600 if (! parsePrototype (name
, token
, state
) )
2602 TRACE_LEAVE_TEXT("done: prototype");
2608 } while (isType (token
, TOKEN_PERIOD
));
2614 static bool parseStatementRHS (tokenInfo
*const name
, tokenInfo
*const token
, statementState
*const state
, bool is_inside_class
)
2618 int paren_depth
= 0;
2619 int arrowfun_paren_depth
= 0;
2620 bool canbe_arrowfun
= false;
2624 /* rvalue might be surrounded with parentheses */
2625 while (isType (token
, TOKEN_OPEN_PAREN
))
2628 arrowfun_paren_depth
++;
2632 if (isKeyword (token
, KEYWORD_async
))
2634 arrowfun_paren_depth
= 0;
2637 /* check for function signature */
2638 while (isType (token
, TOKEN_OPEN_PAREN
))
2641 arrowfun_paren_depth
++;
2646 if ( isKeyword (token
, KEYWORD_function
) )
2648 state
->indexForName
= parseFunction (token
, name
, is_inside_class
);
2650 else if (isKeyword (token
, KEYWORD_class
))
2652 state
->isTerminated
= parseES6Class (token
, name
);
2654 else if (isType (token
, TOKEN_OPEN_CURLY
))
2657 * Creates tags for each of these class methods
2659 * 'validMethodOne' : function(a,b) {},
2660 * 'validMethodTwo' : function(a,b) {}
2662 * Or checks if this is a hash variable.
2665 bool anon_object
= vStringIsEmpty (name
->string
);
2668 anonGenerate (name
->string
, "anonymousObject", JSTAG_VARIABLE
);
2669 state
->indexForName
= CORK_NIL
;
2671 int p
= makeSimplePlaceholder (name
->string
);
2672 if ( parseMethods(token
, p
, false) )
2674 jsKind kind
= state
->foundThis
|| strchr (vStringValue(name
->string
), '.') != NULL
? JSTAG_PROPERTY
: JSTAG_VARIABLE
;
2675 state
->indexForName
= makeJsTagCommon (name
, kind
, NULL
, NULL
, anon_object
);
2676 moveChildren (p
, state
->indexForName
);
2678 else if ( token
->nestLevel
== 0 && state
->isGlobal
)
2681 * Only create variables for global scope
2683 * A pointer can be created to the function.
2684 * If we recognize the function/class name ignore the variable.
2685 * This format looks identical to a variable definition.
2686 * A variable defined outside of a block is considered
2687 * a global variable:
2690 * This is not a global variable:
2691 * var g_var = function;
2692 * This is a global variable:
2693 * var g_var = different_var_name;
2695 state
->indexForName
= anyKindsEntryInScope (name
->scope
, vStringValue (name
->string
),
2696 (int[]){JSTAG_VARIABLE
, JSTAG_FUNCTION
, JSTAG_CLASS
}, 3, true);
2698 if (state
->indexForName
== CORK_NIL
)
2699 state
->indexForName
= makeJsTag (name
, state
->isConst
? JSTAG_CONSTANT
: JSTAG_VARIABLE
, NULL
, NULL
);
2701 /* Here we should be at the end of the block, on the close curly.
2702 * If so, read the next token not to confuse that close curly with
2703 * the end of the current statement. */
2704 if (isType (token
, TOKEN_CLOSE_CURLY
))
2706 readTokenFull(token
, true, NULL
);
2707 state
->isTerminated
= isType (token
, TOKEN_SEMICOLON
);
2710 else if (isType (token
, TOKEN_OPEN_SQUARE
) && !vStringIsEmpty (name
->string
))
2713 * Creates tag for an array
2715 skipArrayList(token
, true);
2716 jsKind kind
= state
->foundThis
|| strchr (vStringValue(name
->string
), '.') != NULL
? JSTAG_PROPERTY
: JSTAG_VARIABLE
;
2718 * Only create variables for global scope or class/object properties
2720 if ( ( token
->nestLevel
== 0 && state
->isGlobal
) || kind
== JSTAG_PROPERTY
)
2722 state
->indexForName
= makeJsTagCommon (name
, kind
, NULL
, NULL
, false);
2725 else if (isKeyword (token
, KEYWORD_new
))
2728 bool is_var
= isType (token
, TOKEN_IDENTIFIER
) || isKeyword (token
, KEYWORD_capital_object
);
2729 if ( isKeyword (token
, KEYWORD_function
) ||
2730 isKeyword (token
, KEYWORD_capital_function
) ||
2733 if ( isKeyword (token
, KEYWORD_capital_function
) && isClassName (name
) )
2734 state
->isClass
= true;
2736 if ( isType (token
, TOKEN_IDENTIFIER
) )
2737 skipQualifiedIdentifier (token
);
2741 if ( isType (token
, TOKEN_OPEN_PAREN
) )
2742 skipArgumentList(token
, true, NULL
);
2744 if (isType (token
, TOKEN_SEMICOLON
) && token
->nestLevel
== 0)
2747 state
->indexForName
= makeJsTag (name
, state
->isConst
? JSTAG_CONSTANT
: state
->foundThis
? JSTAG_PROPERTY
: JSTAG_VARIABLE
, NULL
, NULL
);
2748 else if ( state
->isClass
)
2749 state
->indexForName
= makeClassTag (name
, NULL
, NULL
);
2752 /* FIXME: we cannot really get a meaningful
2753 * signature from a `new Function()` call,
2754 * so for now just don't set any */
2755 state
->indexForName
= makeFunctionTag (name
, NULL
, false);
2758 else if (isType (token
, TOKEN_CLOSE_CURLY
))
2759 state
->isTerminated
= false;
2762 else if (! isType (token
, TOKEN_KEYWORD
) &&
2763 token
->nestLevel
== 0 && state
->isGlobal
)
2766 * Only create variables for global scope
2768 * A pointer can be created to the function.
2769 * If we recognize the function/class name ignore the variable.
2770 * This format looks identical to a variable definition.
2771 * A variable defined outside of a block is considered
2772 * a global variable:
2775 * This is not a global variable:
2776 * var g_var = function;
2777 * This is a global variable:
2778 * var g_var = different_var_name;
2780 state
->indexForName
= anyKindsEntryInScope (name
->scope
, vStringValue (name
->string
),
2781 (int[]){JSTAG_VARIABLE
, JSTAG_FUNCTION
, JSTAG_CLASS
}, 3, true);
2783 if (state
->indexForName
== CORK_NIL
)
2785 state
->indexForName
= makeJsTag (name
, state
->isConst
? JSTAG_CONSTANT
: JSTAG_VARIABLE
, NULL
, NULL
);
2786 if (isType (token
, TOKEN_IDENTIFIER
))
2787 canbe_arrowfun
= true;
2790 else if ( isType (token
, TOKEN_IDENTIFIER
) )
2792 canbe_arrowfun
= true;
2795 if (arrowfun_paren_depth
== 0 && canbe_arrowfun
)
2797 /* var v = a => { ... } */
2798 vString
*sig
= vStringNewInit ("(");
2799 vStringCat (sig
, token
->string
);
2800 vStringPut (sig
, ')');
2801 readTokenFull (token
, true, NULL
);
2802 if (isType (token
, TOKEN_ARROW
))
2804 if (state
->indexForName
== CORK_NIL
) // was not a global variable
2805 state
->indexForName
= makeFunctionTag (name
, sig
, false);
2807 convertToFunction (state
->indexForName
, vStringValue (sig
));
2809 vStringDelete (sig
);
2812 if (paren_depth
> 0)
2814 /* Collect parameters for arrow function. */
2815 vString
*sig
= (arrowfun_paren_depth
== 1)? makeVStringForSignature (token
): NULL
;
2817 while (paren_depth
> 0 && ! isType (token
, TOKEN_EOF
))
2819 if (isType (token
, TOKEN_OPEN_PAREN
))
2822 arrowfun_paren_depth
++;
2824 else if (isType (token
, TOKEN_CLOSE_PAREN
))
2827 arrowfun_paren_depth
--;
2829 readTokenFull (token
, true, sig
);
2831 /* var f = (a, b) => { ... } */
2832 if (arrowfun_paren_depth
== 0 && isType (token
, TOKEN_ARROW
) && sig
)
2834 if (state
->indexForName
== CORK_NIL
) // was not a global variable
2835 state
->indexForName
= makeFunctionTag (name
, trimGarbageInSignature (sig
), false);
2837 convertToFunction (state
->indexForName
,
2838 vStringValue (trimGarbageInSignature (sig
)));
2840 vStringDelete (sig
);
2844 if (isType (token
, TOKEN_CLOSE_CURLY
))
2845 state
->isTerminated
= false;
2847 vStringDelete (sig
); /* NULL is acceptable. */
2854 static bool parseStatement (tokenInfo
*const token
, bool is_inside_class
)
2856 TRACE_ENTER_TEXT("is_inside_class: %s", is_inside_class
? "yes": "no");
2859 * When making a tag for `name', its core index is stored to
2860 * `indexForName'. The value stored to `indexForName' is valid
2861 * till the value for `name' is updated. If the value for `name'
2862 * is changed, `indexForName' is reset to CORK_NIL.
2864 tokenInfo
*const name
= newToken ();
2865 int save_scope
= token
->scope
;
2866 bool found_lhs
= false;
2867 statementState state
= {
2868 .indexForName
= CORK_NIL
,
2869 .isClass
= is_inside_class
,
2871 .isTerminated
= true,
2877 * Functions can be named or unnamed.
2878 * This deals with these formats:
2880 * validFunctionOne = function(a,b) {}
2881 * testlib.validFunctionFive = function(a,b) {}
2882 * var innerThree = function(a,b) {}
2883 * var innerFour = (a,b) {}
2884 * var D2 = secondary_fcn_name(a,b) {}
2885 * var D3 = new Function("a", "b", "return a+b;");
2887 * testlib.extras.ValidClassOne = function(a,b) {
2891 * testlib.extras.ValidClassOne.prototype = {
2892 * 'validMethodOne' : function(a,b) {},
2893 * 'validMethodTwo' : function(a,b) {}
2895 * ValidClassTwo = function ()
2897 * this.validMethodThree = function() {}
2899 * this.validMethodFour = () {}
2901 * Database.prototype.validMethodThree = Database_getTodaysDate;
2905 * var can precede an inner function
2907 if ( isKeyword(token
, KEYWORD_var
) ||
2908 isKeyword(token
, KEYWORD_let
) ||
2909 isKeyword(token
, KEYWORD_const
) )
2911 TRACE_PRINT("var/let/const case");
2912 state
.isConst
= isKeyword(token
, KEYWORD_const
);
2914 * Only create variables for global scope
2916 if ( token
->nestLevel
== 0 )
2918 state
.isGlobal
= true;
2924 state
.indexForName
= CORK_NIL
;
2925 state
.foundThis
= false;
2926 if ( isKeyword(token
, KEYWORD_this
) )
2928 TRACE_PRINT("found 'this' keyword");
2929 state
.foundThis
= true;
2932 if (isType (token
, TOKEN_PERIOD
))
2936 else if (isType (token
, TOKEN_OPEN_SQUARE
))
2938 skipArrayList (token
, false);
2942 copyToken(name
, token
, true);
2943 TRACE_PRINT("name becomes '%s' of type %s",
2944 vStringValue(token
->string
), tokenTypeName (token
->type
));
2946 while (! isType (token
, TOKEN_CLOSE_CURLY
) &&
2947 ! isType (token
, TOKEN_SEMICOLON
) &&
2948 ! isType (token
, TOKEN_EQUAL_SIGN
) &&
2949 ! isType (token
, TOKEN_COMMA
) &&
2950 ! isType (token
, TOKEN_EOF
))
2953 if (isType (token
, TOKEN_OPEN_CURLY
))
2955 parseBlock (token
, CORK_NIL
);
2956 readTokenFull (token
, true, NULL
);
2958 else if (isKeyword (token
, KEYWORD_function
))
2960 parseFunction (token
, NULL
, false);
2961 readTokenFull (token
, true, NULL
);
2964 /* Potentially the name of the function */
2965 else if (isType (token
, TOKEN_PERIOD
))
2968 * Cannot be a global variable is it has dot references in the name
2970 state
.isGlobal
= false;
2971 /* Assume it's an assignment to a global name (e.g. a class) using
2972 * its fully qualified name, so strip the scope.
2973 * FIXME: resolve the scope so we can make more than an assumption. */
2974 token
->scope
= CORK_NIL
;
2975 name
->scope
= CORK_NIL
;
2976 if ( ! parseStatementLHS (name
, token
, &state
) )
2980 readTokenFull (token
, true, NULL
);
2982 if ( isType (token
, TOKEN_OPEN_PAREN
) )
2983 skipArgumentList(token
, false, NULL
);
2985 if ( isType (token
, TOKEN_OPEN_SQUARE
) )
2986 skipArrayList(token
, false);
2989 if ( isType (token
, TOKEN_CLOSE_CURLY
) )
2992 * Reaching this section without having
2993 * processed an open curly brace indicates
2994 * the statement is most likely not terminated.
2996 state
.isTerminated
= false;
2998 else if ( isType (token
, TOKEN_SEMICOLON
) ||
2999 isType (token
, TOKEN_EOF
) ||
3000 isType (token
, TOKEN_COMMA
) )
3003 * Only create variables for global scope
3005 if ( token
->nestLevel
== 0 && state
.isGlobal
)
3008 * Handles this syntax:
3011 state
.indexForName
= makeJsTag (name
, state
.isConst
? JSTAG_CONSTANT
: JSTAG_VARIABLE
, NULL
, NULL
);
3014 * Statement has ended.
3015 * This deals with calls to functions, like:
3018 if (isType (token
, TOKEN_COMMA
))
3021 state
.isClass
= false;
3027 bool ok
= found_lhs
;
3028 if ( ok
&& isType (token
, TOKEN_EQUAL_SIGN
) )
3030 ok
= parseStatementRHS (name
, token
, &state
, is_inside_class
);
3032 /* if we aren't already at the cmd end, advance to it and check whether
3033 * the statement was terminated */
3035 ! isType (token
, TOKEN_CLOSE_CURLY
) &&
3036 ! isType (token
, TOKEN_SEMICOLON
))
3039 * Statements can be optionally terminated in the case of
3040 * statement prior to a close curly brace as in the
3041 * document.write line below:
3043 * function checkForUpdate() {
3045 * document.write("hello from checkForUpdate<br>")
3050 state
.isTerminated
= findCmdTerm (token
, true, true);
3051 /* if we're at a comma, try and read a second var */
3052 if (isType (token
, TOKEN_COMMA
))
3055 state
.isClass
= false;
3059 else if (ok
&& isType (token
, TOKEN_SEMICOLON
))
3060 state
.isTerminated
= true;
3064 token
->scope
= save_scope
;
3067 TRACE_LEAVE_TEXT("is terminated: %d", (int) state
.isTerminated
);
3068 return state
.isTerminated
;
3071 static void parseUI5 (tokenInfo
*const token
)
3073 tokenInfo
*const name
= newToken ();
3075 * SAPUI5 is built on top of jQuery.
3076 * It follows a standard format:
3077 * sap.ui.controller("id.of.controller", {
3078 * method_name : function... {
3081 * method_name : function ... {
3085 * Handle the parsing of the initial controller (and the
3086 * same for "view") and then allow the methods to be
3092 if (isType (token
, TOKEN_PERIOD
))
3097 while (! isType (token
, TOKEN_OPEN_PAREN
) &&
3098 ! isType (token
, TOKEN_EOF
))
3104 if (isType (token
, TOKEN_STRING
))
3106 copyToken(name
, token
, true);
3110 if (isType (token
, TOKEN_COMMA
))
3113 if (isType(name
, TOKEN_STRING
))
3116 * `name' can include '.'.
3117 * Setting dynamicProp to true can prohibit
3118 * that makeClassTag ispects the inside
3121 name
->dynamicProp
= true;
3122 r
= makeClassTag (name
, NULL
, NULL
);
3125 * `name' specifies a class of OpenUI5.
3126 * So tagging it as a language object of
3127 * JavaScript is incorrect. We have to introduce
3128 * OpenUI5 language as a subparser of JavaScript
3129 * to fix this situation.
3135 parseMethods (token
, r
, false);
3136 } while (! isType (token
, TOKEN_CLOSE_CURLY
) &&
3137 ! isType (token
, TOKEN_EOF
));
3143 static bool parseLine (tokenInfo
*const token
, bool is_inside_class
)
3145 TRACE_ENTER_TEXT("token is '%s' of type %s",
3146 vStringValue(token
->string
), tokenTypeName (token
->type
));
3148 bool is_terminated
= true;
3150 * Detect the common statements, if, while, for, do, ...
3151 * This is necessary since the last statement within a block "{}"
3152 * can be optionally terminated.
3154 * If the statement is not terminated, we need to tell
3155 * the calling routine to prevent reading an additional token
3156 * looking for the end of the statement.
3159 if (isType(token
, TOKEN_KEYWORD
))
3161 switch (token
->keyword
)
3166 is_terminated
= parseLoop (token
);
3172 case KEYWORD_finally
:
3173 /* Common semantics */
3174 is_terminated
= parseIf (token
);
3176 case KEYWORD_switch
:
3177 parseSwitch (token
);
3179 case KEYWORD_return
:
3182 is_terminated
= parseLine (token
, is_inside_class
);
3184 case KEYWORD_function
:
3185 parseFunction (token
, NULL
, false);
3188 is_terminated
= parseES6Class (token
, NULL
);
3191 is_terminated
= parseStatement (token
, is_inside_class
);
3198 * Special case where single line statements may not be
3199 * SEMICOLON terminated. parseBlock needs to know this
3200 * so that it does not read the next token.
3202 is_terminated
= parseStatement (token
, is_inside_class
);
3206 return is_terminated
;
3209 static void parseJsFile (tokenInfo
*const token
)
3217 if (isType (token
, TOKEN_KEYWORD
) && token
->keyword
== KEYWORD_sap
)
3219 else if (isType (token
, TOKEN_KEYWORD
) && (token
->keyword
== KEYWORD_export
||
3220 token
->keyword
== KEYWORD_default
))
3221 /* skip those at top-level */;
3223 parseLine (token
, false);
3224 } while (! isType (token
, TOKEN_EOF
));
3230 #ifdef DO_TRACING_USE_DUMP_TOKEN
3231 static void dumpToken (const tokenInfo
*const token
)
3233 const char *scope_str
= getNameStringForCorkIndex (token
->scope
);
3234 const char *scope_kind_str
= getKindStringForCorkIndex (token
->scope
);
3236 if (strcmp(scope_str
, "placeholder") == 0)
3238 TRACE_PRINT("%s: %s",
3239 tokenTypeName (token
->type
),
3240 vStringValue (token
->string
));
3244 TRACE_PRINT("%s: %s (scope '%s' of kind %s)",
3245 tokenTypeName (token
->type
),
3246 vStringValue (token
->string
),
3247 scope_str
, scope_kind_str
);
3253 getNameStringForCorkIndex(int index
)
3255 if (index
== CORK_NIL
)
3257 tagEntryInfo
*e
= getEntryInCorkQueue (index
);
3259 return "ghost"; /* Can this happen? */
3262 return "placeholder";
3268 getKindStringForCorkIndex(int index
)
3270 if (index
== CORK_NIL
)
3272 tagEntryInfo
*e
= getEntryInCorkQueue (index
);
3274 return "ghost"; /* Can this happen? */
3277 return "placeholder";
3279 if (e
->kindIndex
== KIND_GHOST_INDEX
)
3282 return JsKinds
[e
->kindIndex
].name
;
3285 static const char *kindName(jsKind kind
)
3287 return ((int)kind
) >= 0 ? JsKinds
[kind
].name
: "none";
3290 static const char *tokenTypeName(enum eTokenType e
)
3291 { /* Generated by misc/enumstr.sh with cmdline:
3292 parsers/jscript.c eTokenType tokenTypeName */
3295 case TOKEN_UNDEFINED
: return "TOKEN_UNDEFINED";
3296 case TOKEN_EOF
: return "TOKEN_EOF";
3297 case TOKEN_CHARACTER
: return "TOKEN_CHARACTER";
3298 case TOKEN_CLOSE_PAREN
: return "TOKEN_CLOSE_PAREN";
3299 case TOKEN_SEMICOLON
: return "TOKEN_SEMICOLON";
3300 case TOKEN_COLON
: return "TOKEN_COLON";
3301 case TOKEN_COMMA
: return "TOKEN_COMMA";
3302 case TOKEN_KEYWORD
: return "TOKEN_KEYWORD";
3303 case TOKEN_OPEN_PAREN
: return "TOKEN_OPEN_PAREN";
3304 case TOKEN_IDENTIFIER
: return "TOKEN_IDENTIFIER";
3305 case TOKEN_STRING
: return "TOKEN_STRING";
3306 case TOKEN_TEMPLATE_STRING
: return "TOKEN_TEMPLATE_STRING";
3307 case TOKEN_PERIOD
: return "TOKEN_PERIOD";
3308 case TOKEN_OPEN_CURLY
: return "TOKEN_OPEN_CURLY";
3309 case TOKEN_CLOSE_CURLY
: return "TOKEN_CLOSE_CURLY";
3310 case TOKEN_EQUAL_SIGN
: return "TOKEN_EQUAL_SIGN";
3311 case TOKEN_OPEN_SQUARE
: return "TOKEN_OPEN_SQUARE";
3312 case TOKEN_CLOSE_SQUARE
: return "TOKEN_CLOSE_SQUARE";
3313 case TOKEN_REGEXP
: return "TOKEN_REGEXP";
3314 case TOKEN_POSTFIX_OPERATOR
: return "TOKEN_POSTFIX_OPERATOR";
3315 case TOKEN_STAR
: return "TOKEN_STAR";
3316 case TOKEN_ATMARK
: return "TOKEN_ATMARK";
3317 case TOKEN_BINARY_OPERATOR
: return "TOKEN_BINARY_OPERATOR";
3318 case TOKEN_ARROW
: return "TOKEN_ARROW";
3319 case TOKEN_DOTS
: return "TOKEN_DOTS";
3320 default: return "UNKNOWN";
3325 static void initialize (const langType language
)
3327 Assert (ARRAY_SIZE (JsKinds
) == JSTAG_COUNT
);
3330 TokenPool
= objPoolNew (16, newPoolToken
, deletePoolToken
, clearPoolToken
, NULL
);
3333 static void finalize (langType language CTAGS_ATTR_UNUSED
, bool initialized
)
3338 objPoolDelete (TokenPool
);
3341 static void findJsTags (void)
3343 tokenInfo
*const token
= newToken ();
3346 LastTokenType
= TOKEN_UNDEFINED
;
3348 parseJsFile (token
);
3350 deleteToken (token
);
3353 if (JSUnicodeConverter
!= (iconv_t
) -2 && /* not created */
3354 JSUnicodeConverter
!= (iconv_t
) -1 /* creation failed */)
3356 iconv_close (JSUnicodeConverter
);
3357 JSUnicodeConverter
= (iconv_t
) -2;
3361 Assert (NextToken
== NULL
);
3364 /* Create parser definition structure */
3365 extern parserDefinition
* JavaScriptParser (void)
3367 // .jsx files are JSX: https://facebook.github.io/jsx/
3368 // which have JS function definitions, so we just use the JS parser
3369 static const char *const extensions
[] = { "js", "jsx", "mjs", NULL
};
3370 static const char *const aliases
[] = { "js", "node", "nodejs",
3372 /* Used in PostgreSQL
3373 * https://github.com/plv8/plv8 */
3376 parserDefinition
*const def
= parserNew ("JavaScript");
3377 def
->extensions
= extensions
;
3378 def
->aliases
= aliases
;
3380 * New definitions for parsing instead of regex
3382 def
->kindTable
= JsKinds
;
3383 def
->kindCount
= ARRAY_SIZE (JsKinds
);
3384 def
->parser
= findJsTags
;
3385 def
->initialize
= initialize
;
3386 def
->finalize
= finalize
;
3387 def
->keywordTable
= JsKeywordTable
;
3388 def
->keywordCount
= ARRAY_SIZE (JsKeywordTable
);
3389 def
->useCork
= CORK_QUEUE
|CORK_SYMTAB
;
3390 def
->requestAutomaticFQTag
= true;
3392 def
->versionCurrent
= 1;
3393 def
->versionAge
= 1;