2 * Copyright (c) 2003, Darren Hiebert
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
7 * This module contains functions for generating tags for JavaScript language
10 * Reference: http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf
12 * This is a good reference for different forms of the function statement:
13 * http://www.permadi.com/tutorial/jsFunc/
14 * Another good reference:
15 * http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide
21 #include "general.h" /* must always come first */
22 #include <ctype.h> /* to define isalpha () */
39 #define isType(token,t) (bool) ((token)->type == (t))
40 #define isKeyword(token,k) (bool) ((token)->keyword == (k))
47 * Tracks class and function names already created
49 static stringList
*ClassNames
;
50 static stringList
*FunctionNames
;
52 /* Used to specify type of keyword.
56 KEYWORD_capital_function
,
57 KEYWORD_capital_object
,
76 typedef int keywordId
; /* to allow KEYWORD_NONE */
78 typedef enum eTokenType
{
99 TOKEN_POSTFIX_OPERATOR
,
100 TOKEN_BINARY_OPERATOR
103 typedef struct sTokenInfo
{
108 unsigned long lineNumber
;
118 static tokenType LastTokenType
;
120 static langType Lang_js
;
132 static kindOption JsKinds
[] = {
133 { true, 'f', "function", "functions" },
134 { true, 'c', "class", "classes" },
135 { true, 'm', "method", "methods" },
136 { true, 'p', "property", "properties" },
137 { true, 'C', "constant", "constants" },
138 { true, 'v', "variable", "global variables" }
141 static const keywordTable JsKeywordTable
[] = {
142 /* keyword keyword ID */
143 { "function", KEYWORD_function
},
144 { "Function", KEYWORD_capital_function
},
145 { "Object", KEYWORD_capital_object
},
146 { "prototype", KEYWORD_prototype
},
147 { "var", KEYWORD_var
},
148 { "let", KEYWORD_let
},
149 { "const", KEYWORD_const
},
150 { "new", KEYWORD_new
},
151 { "this", KEYWORD_this
},
152 { "for", KEYWORD_for
},
153 { "while", KEYWORD_while
},
154 { "do", KEYWORD_do
},
155 { "if", KEYWORD_if
},
156 { "else", KEYWORD_else
},
157 { "switch", KEYWORD_switch
},
158 { "try", KEYWORD_try
},
159 { "catch", KEYWORD_catch
},
160 { "finally", KEYWORD_finally
},
161 { "sap", KEYWORD_sap
},
162 { "return", KEYWORD_return
}
166 * FUNCTION DEFINITIONS
169 /* Recursive functions */
170 static void parseFunction (tokenInfo
*const token
);
171 static bool parseBlock (tokenInfo
*const token
, tokenInfo
*const orig_parent
);
172 static bool parseLine (tokenInfo
*const token
, tokenInfo
*const parent
, bool is_inside_class
);
173 static void parseUI5 (tokenInfo
*const token
);
175 static bool isIdentChar (const int c
)
178 (isalpha (c
) || isdigit (c
) || c
== '$' ||
179 c
== '@' || c
== '_' || c
== '#');
182 static tokenInfo
*newToken (void)
184 tokenInfo
*const token
= xMalloc (1, tokenInfo
);
186 token
->type
= TOKEN_UNDEFINED
;
187 token
->keyword
= KEYWORD_NONE
;
188 token
->string
= vStringNew ();
189 token
->scope
= vStringNew ();
190 token
->nestLevel
= 0;
191 token
->ignoreTag
= false;
192 token
->lineNumber
= getInputLineNumber ();
193 token
->filePosition
= getInputFilePosition ();
198 static void deleteToken (tokenInfo
*const token
)
200 vStringDelete (token
->string
);
201 vStringDelete (token
->scope
);
206 * Tag generation functions
209 static void makeJsTag (tokenInfo
*const token
, const jsKind kind
, vString
*const signature
)
211 if (JsKinds
[kind
].enabled
&& ! token
->ignoreTag
)
213 const char *name
= vStringValue (token
->string
);
214 vString
*fullscope
= vStringNewCopy (token
->scope
);
218 if ((p
= strrchr (name
, '.')) != NULL
)
220 if (vStringLength (fullscope
) > 0)
221 vStringPut (fullscope
, '.');
222 vStringNCatS (fullscope
, name
, p
- name
);
226 initTagEntry (&e
, name
, &(JsKinds
[kind
]));
228 e
.lineNumber
= token
->lineNumber
;
229 e
.filePosition
= token
->filePosition
;
231 if ( vStringLength(fullscope
) > 0 )
233 jsKind parent_kind
= JSTAG_CLASS
;
236 * If we're creating a function (and not a method),
237 * guess we're inside another function
239 if (kind
== JSTAG_FUNCTION
)
240 parent_kind
= JSTAG_FUNCTION
;
242 e
.extensionFields
.scopeKind
= &(JsKinds
[parent_kind
]);
243 e
.extensionFields
.scopeName
= vStringValue (fullscope
);
246 if (signature
&& vStringLength(signature
))
249 /* sanitize signature by replacing all control characters with a
250 * space (because it's simple).
251 * there should never be any junk in a valid signature, but who
252 * knows what the user wrote and CTags doesn't cope well with weird
254 for (i
= 0; i
< signature
->length
; i
++)
256 unsigned char c
= (unsigned char) signature
->buffer
[i
];
257 if (c
< 0x20 /* below space */ || c
== 0x7F /* DEL */)
258 signature
->buffer
[i
] = ' ';
260 e
.extensionFields
.signature
= vStringValue(signature
);
264 vStringDelete (fullscope
);
268 static void makeClassTag (tokenInfo
*const token
, vString
*const signature
)
272 if ( ! token
->ignoreTag
)
274 fulltag
= vStringNew ();
275 if (vStringLength (token
->scope
) > 0)
277 vStringCopy(fulltag
, token
->scope
);
278 vStringCatS (fulltag
, ".");
279 vStringCatS (fulltag
, vStringValue(token
->string
));
283 vStringCopy(fulltag
, token
->string
);
285 if ( ! stringListHas(ClassNames
, vStringValue (fulltag
)) )
287 stringListAdd (ClassNames
, vStringNewCopy (fulltag
));
288 makeJsTag (token
, JSTAG_CLASS
, signature
);
290 vStringDelete (fulltag
);
294 static void makeFunctionTag (tokenInfo
*const token
, vString
*const signature
)
298 if ( ! token
->ignoreTag
)
300 fulltag
= vStringNew ();
301 if (vStringLength (token
->scope
) > 0)
303 vStringCopy(fulltag
, token
->scope
);
304 vStringCatS (fulltag
, ".");
305 vStringCatS (fulltag
, vStringValue(token
->string
));
309 vStringCopy(fulltag
, token
->string
);
311 if ( ! stringListHas(FunctionNames
, vStringValue (fulltag
)) )
313 stringListAdd (FunctionNames
, vStringNewCopy (fulltag
));
314 makeJsTag (token
, JSTAG_FUNCTION
, signature
);
316 vStringDelete (fulltag
);
324 static void parseString (vString
*const string
, const int delimiter
)
329 int c
= getcFromInputFile ();
334 /* Eat the escape sequence (\", \', etc). We properly handle
335 * <LineContinuation> by eating a whole \<CR><LF> not to see <LF>
336 * as an unescaped character, which is invalid and handled below.
337 * Also, handle the fact that <LineContinuation> produces an empty
339 * See ECMA-262 7.8.4 */
340 c
= getcFromInputFile ();
341 if (c
!= '\r' && c
!= '\n')
342 vStringPut(string
, c
);
345 c
= getcFromInputFile();
347 ungetcToInputFile (c
);
350 else if (c
== delimiter
)
352 else if (c
== '\r' || c
== '\n')
354 /* those are invalid when not escaped */
356 /* we don't want to eat the newline itself to let the automatic
357 * semicolon insertion code kick in */
358 ungetcToInputFile (c
);
361 vStringPut (string
, c
);
365 static void parseRegExp (void)
368 bool in_range
= false;
372 c
= getcFromInputFile ();
373 if (! in_range
&& c
== '/')
377 c
= getcFromInputFile ();
378 } while (isalpha (c
));
379 ungetcToInputFile (c
);
383 c
= getcFromInputFile (); /* skip next character */
391 /* Read a C identifier beginning with "firstChar" and places it into
394 static void parseIdentifier (vString
*const string
, const int firstChar
)
397 Assert (isIdentChar (c
));
400 vStringPut (string
, c
);
401 c
= getcFromInputFile ();
402 } while (isIdentChar (c
));
403 ungetcToInputFile (c
); /* unget non-identifier character */
406 static keywordId
analyzeToken (vString
*const name
)
408 vString
*keyword
= vStringNew ();
410 vStringCopyToLower (keyword
, name
);
411 result
= (keywordId
) lookupKeyword (vStringValue (keyword
), Lang_js
);
412 vStringDelete (keyword
);
416 static void readTokenFull (tokenInfo
*const token
, bool include_newlines
, vString
*const repr
)
421 token
->type
= TOKEN_UNDEFINED
;
422 token
->keyword
= KEYWORD_NONE
;
423 vStringClear (token
->string
);
429 c
= getcFromInputFile ();
432 while (c
== '\t' || c
== ' ' ||
433 ((c
== '\r' || c
== '\n') && ! include_newlines
));
435 token
->lineNumber
= getInputLineNumber ();
436 token
->filePosition
= getInputFilePosition ();
441 vStringPut (repr
, ' ');
442 vStringPut (repr
, c
);
447 case EOF
: token
->type
= TOKEN_EOF
; break;
448 case '(': token
->type
= TOKEN_OPEN_PAREN
; break;
449 case ')': token
->type
= TOKEN_CLOSE_PAREN
; break;
450 case ';': token
->type
= TOKEN_SEMICOLON
; break;
451 case ',': token
->type
= TOKEN_COMMA
; break;
452 case '.': token
->type
= TOKEN_PERIOD
; break;
453 case ':': token
->type
= TOKEN_COLON
; break;
454 case '{': token
->type
= TOKEN_OPEN_CURLY
; break;
455 case '}': token
->type
= TOKEN_CLOSE_CURLY
; break;
456 case '=': token
->type
= TOKEN_EQUAL_SIGN
; break;
457 case '[': token
->type
= TOKEN_OPEN_SQUARE
; break;
458 case ']': token
->type
= TOKEN_CLOSE_SQUARE
; break;
463 int d
= getcFromInputFile ();
464 if (d
== c
) /* ++ or -- */
465 token
->type
= TOKEN_POSTFIX_OPERATOR
;
468 ungetcToInputFile (d
);
469 token
->type
= TOKEN_BINARY_OPERATOR
;
482 token
->type
= TOKEN_BINARY_OPERATOR
;
487 /* This isn't strictly correct per the standard, but following the
488 * real rules means understanding all statements, and that's not
489 * what the parser currently does. What we do here is a guess, by
490 * avoiding inserting semicolons that would make the statement on
491 * the left invalid. Hopefully this should not have false negatives
492 * (e.g. should not miss insertion of a semicolon) but might have
493 * false positives (e.g. it will wrongfully emit a semicolon for the
494 * newline in "foo\n+bar").
495 * This should however be mostly harmless as we only deal with
496 * newlines in specific situations where we know a false positive
497 * wouldn't hurt too bad. */
498 switch (LastTokenType
)
500 /* these cannot be the end of a statement, so hold the newline */
501 case TOKEN_EQUAL_SIGN
:
504 case TOKEN_FORWARD_SLASH
:
505 case TOKEN_BINARY_OPERATOR
:
506 /* and these already end one, no need to duplicate it */
507 case TOKEN_SEMICOLON
:
509 case TOKEN_CLOSE_CURLY
:
510 case TOKEN_OPEN_CURLY
:
511 include_newlines
= false; /* no need to recheck */
515 token
->type
= TOKEN_SEMICOLON
;
521 token
->type
= TOKEN_STRING
;
522 parseString (token
->string
, c
);
523 token
->lineNumber
= getInputLineNumber ();
524 token
->filePosition
= getInputFilePosition ();
527 vStringCat (repr
, token
->string
);
528 vStringPut (repr
, c
);
533 c
= getcFromInputFile ();
534 if (c
!= '\\' && c
!= '"' && !isspace (c
))
535 ungetcToInputFile (c
);
536 token
->type
= TOKEN_CHARACTER
;
537 token
->lineNumber
= getInputLineNumber ();
538 token
->filePosition
= getInputFilePosition ();
543 int d
= getcFromInputFile ();
544 if ( (d
!= '*') && /* is this the start of a comment? */
545 (d
!= '/') ) /* is a one line comment? */
547 ungetcToInputFile (d
);
548 switch (LastTokenType
)
550 case TOKEN_CHARACTER
:
551 case TOKEN_IDENTIFIER
:
553 case TOKEN_CLOSE_CURLY
:
554 case TOKEN_CLOSE_PAREN
:
555 case TOKEN_CLOSE_SQUARE
:
556 token
->type
= TOKEN_FORWARD_SLASH
;
560 token
->type
= TOKEN_REGEXP
;
562 token
->lineNumber
= getInputLineNumber ();
563 token
->filePosition
= getInputFilePosition ();
569 if (repr
) /* remove the / we added */
570 repr
->buffer
[--repr
->length
] = 0;
575 skipToCharacterInInputFile ('*');
576 c
= getcFromInputFile ();
580 ungetcToInputFile (c
);
581 } while (c
!= EOF
&& c
!= '\0');
584 else if (d
== '/') /* is this the start of a comment? */
586 skipToCharacterInInputFile ('\n');
587 /* if we care about newlines, put it back so it is seen */
588 if (include_newlines
)
589 ungetcToInputFile ('\n');
597 /* skip shebang in case of e.g. Node.js scripts */
598 if (token
->lineNumber
> 1)
599 token
->type
= TOKEN_UNDEFINED
;
600 else if ((c
= getcFromInputFile ()) != '!')
602 ungetcToInputFile (c
);
603 token
->type
= TOKEN_UNDEFINED
;
607 skipToCharacterInInputFile ('\n');
613 if (! isIdentChar (c
))
614 token
->type
= TOKEN_UNDEFINED
;
617 parseIdentifier (token
->string
, c
);
618 token
->lineNumber
= getInputLineNumber ();
619 token
->filePosition
= getInputFilePosition ();
620 token
->keyword
= analyzeToken (token
->string
);
621 if (isKeyword (token
, KEYWORD_NONE
))
622 token
->type
= TOKEN_IDENTIFIER
;
624 token
->type
= TOKEN_KEYWORD
;
625 if (repr
&& vStringLength (token
->string
) > 1)
626 vStringCatS (repr
, vStringValue (token
->string
) + 1);
631 LastTokenType
= token
->type
;
634 static void readToken (tokenInfo
*const token
)
636 readTokenFull (token
, false, NULL
);
639 static void copyToken (tokenInfo
*const dest
, tokenInfo
*const src
)
641 dest
->nestLevel
= src
->nestLevel
;
642 dest
->lineNumber
= src
->lineNumber
;
643 dest
->filePosition
= src
->filePosition
;
644 dest
->type
= src
->type
;
645 dest
->keyword
= src
->keyword
;
646 vStringCopy(dest
->string
, src
->string
);
647 vStringCopy(dest
->scope
, src
->scope
);
651 * Token parsing functions
654 static void skipArgumentList (tokenInfo
*const token
, bool include_newlines
, vString
*const repr
)
658 if (isType (token
, TOKEN_OPEN_PAREN
)) /* arguments? */
662 vStringPut (repr
, '(');
663 while (nest_level
> 0 && ! isType (token
, TOKEN_EOF
))
665 readTokenFull (token
, false, repr
);
666 if (isType (token
, TOKEN_OPEN_PAREN
))
668 else if (isType (token
, TOKEN_CLOSE_PAREN
))
671 readTokenFull (token
, include_newlines
, NULL
);
675 static void skipArrayList (tokenInfo
*const token
, bool include_newlines
)
680 * Handle square brackets
682 * So we must check for nested open and closing square brackets
685 if (isType (token
, TOKEN_OPEN_SQUARE
)) /* arguments? */
688 while (nest_level
> 0 && ! isType (token
, TOKEN_EOF
))
691 if (isType (token
, TOKEN_OPEN_SQUARE
))
693 else if (isType (token
, TOKEN_CLOSE_SQUARE
))
696 readTokenFull (token
, include_newlines
, NULL
);
700 static void addContext (tokenInfo
* const parent
, const tokenInfo
* const child
)
702 if (vStringLength (parent
->string
) > 0)
704 vStringCatS (parent
->string
, ".");
706 vStringCatS (parent
->string
, vStringValue(child
->string
));
709 static void addToScope (tokenInfo
* const token
, vString
* const extra
)
711 if (vStringLength (token
->scope
) > 0)
713 vStringCatS (token
->scope
, ".");
715 vStringCatS (token
->scope
, vStringValue(extra
));
722 static bool findCmdTerm (tokenInfo
*const token
, bool include_newlines
)
725 * Read until we find either a semicolon or closing brace.
726 * Any nested braces will be handled within.
728 while (! isType (token
, TOKEN_SEMICOLON
) &&
729 ! isType (token
, TOKEN_CLOSE_CURLY
) &&
730 ! isType (token
, TOKEN_EOF
))
732 /* Handle nested blocks */
733 if ( isType (token
, TOKEN_OPEN_CURLY
))
735 parseBlock (token
, token
);
736 readTokenFull (token
, include_newlines
, NULL
);
738 else if ( isType (token
, TOKEN_OPEN_PAREN
) )
740 skipArgumentList(token
, include_newlines
, NULL
);
742 else if ( isType (token
, TOKEN_OPEN_SQUARE
) )
744 skipArrayList(token
, include_newlines
);
748 readTokenFull (token
, include_newlines
, NULL
);
752 return isType (token
, TOKEN_SEMICOLON
);
755 static void parseSwitch (tokenInfo
*const token
)
758 * switch (expression) {
765 * default : statement;
771 if (isType (token
, TOKEN_OPEN_PAREN
))
774 * Handle nameless functions, these will only
775 * be considered methods.
777 skipArgumentList(token
, false, NULL
);
780 if (isType (token
, TOKEN_OPEN_CURLY
))
782 parseBlock (token
, token
);
786 static bool parseLoop (tokenInfo
*const token
, tokenInfo
*const parent
)
789 * Handles these statements
790 * for (x=0; x<3; x++)
791 * document.write("This text is repeated three times<br>");
793 * for (x=0; x<3; x++)
795 * document.write("This text is repeated three times<br>");
799 * document.write(number+"<br>");
804 * document.write(number+"<br>");
809 bool is_terminated
= true;
811 if (isKeyword (token
, KEYWORD_for
) || isKeyword (token
, KEYWORD_while
))
815 if (isType (token
, TOKEN_OPEN_PAREN
))
818 * Handle nameless functions, these will only
819 * be considered methods.
821 skipArgumentList(token
, false, NULL
);
824 if (isType (token
, TOKEN_OPEN_CURLY
))
827 * This will be either a function or a class.
828 * We can only determine this by checking the body
829 * of the function. If we find a "this." we know
830 * it is a class, otherwise it is a function.
832 parseBlock (token
, parent
);
836 is_terminated
= parseLine(token
, parent
, false);
839 else if (isKeyword (token
, KEYWORD_do
))
843 if (isType (token
, TOKEN_OPEN_CURLY
))
846 * This will be either a function or a class.
847 * We can only determine this by checking the body
848 * of the function. If we find a "this." we know
849 * it is a class, otherwise it is a function.
851 parseBlock (token
, parent
);
855 is_terminated
= parseLine(token
, parent
, false);
861 if (isKeyword (token
, KEYWORD_while
))
865 if (isType (token
, TOKEN_OPEN_PAREN
))
868 * Handle nameless functions, these will only
869 * be considered methods.
871 skipArgumentList(token
, true, NULL
);
873 if (! isType (token
, TOKEN_SEMICOLON
))
874 is_terminated
= false;
878 return is_terminated
;
881 static bool parseIf (tokenInfo
*const token
, tokenInfo
*const parent
)
883 bool read_next_token
= true;
885 * If statements have two forms
904 * This example if correctly written, but the
905 * else contains only 1 statement without a terminator
906 * since the function finishes with the closing brace.
915 * TODO: Deal with statements that can optional end
916 * without a semi-colon. Currently this messes up
917 * the parsing of blocks.
918 * Need to somehow detect this has happened, and either
919 * backup a token, or skip reading the next token if
920 * that is possible from all code locations.
926 if (isKeyword (token
, KEYWORD_if
))
929 * Check for an "else if" and consume the "if"
934 if (isType (token
, TOKEN_OPEN_PAREN
))
937 * Handle nameless functions, these will only
938 * be considered methods.
940 skipArgumentList(token
, false, NULL
);
943 if (isType (token
, TOKEN_OPEN_CURLY
))
946 * This will be either a function or a class.
947 * We can only determine this by checking the body
948 * of the function. If we find a "this." we know
949 * it is a class, otherwise it is a function.
951 parseBlock (token
, parent
);
955 /* The next token should only be read if this statement had its own
957 read_next_token
= findCmdTerm (token
, true);
959 return read_next_token
;
962 static void parseFunction (tokenInfo
*const token
)
964 tokenInfo
*const name
= newToken ();
965 vString
*const signature
= vStringNew ();
966 bool is_class
= false;
969 * This deals with these formats
970 * function validFunctionTwo(a,b) {}
974 /* Add scope in case this is an INNER function */
975 addToScope(name
, token
->scope
);
978 while (isType (token
, TOKEN_PERIOD
))
981 if ( isKeyword(token
, KEYWORD_NONE
) )
983 addContext (name
, token
);
988 if ( isType (token
, TOKEN_OPEN_PAREN
) )
989 skipArgumentList(token
, false, signature
);
991 if ( isType (token
, TOKEN_OPEN_CURLY
) )
993 is_class
= parseBlock (token
, name
);
995 makeClassTag (name
, signature
);
997 makeFunctionTag (name
, signature
);
1000 findCmdTerm (token
, false);
1002 vStringDelete (signature
);
1006 static bool parseBlock (tokenInfo
*const token
, tokenInfo
*const orig_parent
)
1008 bool is_class
= false;
1009 bool read_next_token
= true;
1010 vString
* saveScope
= vStringNew ();
1011 tokenInfo
*const parent
= newToken ();
1013 /* backup the parent token to allow calls like parseBlock(token, token) */
1014 copyToken (parent
, orig_parent
);
1018 * Make this routine a bit more forgiving.
1019 * If called on an open_curly advance it
1021 if ( isType (token
, TOKEN_OPEN_CURLY
) &&
1022 isKeyword(token
, KEYWORD_NONE
) )
1025 if (! isType (token
, TOKEN_CLOSE_CURLY
))
1028 * Read until we find the closing brace,
1029 * any nested braces will be handled within
1033 read_next_token
= true;
1034 if (isKeyword (token
, KEYWORD_this
))
1037 * Means we are inside a class and have found
1038 * a class, not a function
1041 vStringCopy(saveScope
, token
->scope
);
1042 addToScope (token
, parent
->string
);
1045 * Ignore the remainder of the line
1046 * findCmdTerm(token);
1048 read_next_token
= parseLine (token
, parent
, is_class
);
1050 vStringCopy(token
->scope
, saveScope
);
1052 else if (isKeyword (token
, KEYWORD_var
) ||
1053 isKeyword (token
, KEYWORD_let
) ||
1054 isKeyword (token
, KEYWORD_const
))
1057 * Potentially we have found an inner function.
1058 * Set something to indicate the scope
1060 vStringCopy(saveScope
, token
->scope
);
1061 addToScope (token
, parent
->string
);
1062 read_next_token
= parseLine (token
, parent
, is_class
);
1063 vStringCopy(token
->scope
, saveScope
);
1065 else if (isKeyword (token
, KEYWORD_function
))
1067 vStringCopy(saveScope
, token
->scope
);
1068 addToScope (token
, parent
->string
);
1069 parseFunction (token
);
1070 vStringCopy(token
->scope
, saveScope
);
1072 else if (isType (token
, TOKEN_OPEN_CURLY
))
1074 /* Handle nested blocks */
1075 parseBlock (token
, parent
);
1080 * It is possible for a line to have no terminator
1081 * if the following line is a closing brace.
1082 * parseLine will detect this case and indicate
1083 * whether we should read an additional token.
1085 read_next_token
= parseLine (token
, parent
, is_class
);
1089 * Always read a new token unless we find a statement without
1090 * a ending terminator
1092 if( read_next_token
)
1096 * If we find a statement without a terminator consider the
1097 * block finished, otherwise the stack will be off by one.
1099 } while (! isType (token
, TOKEN_EOF
) &&
1100 ! isType (token
, TOKEN_CLOSE_CURLY
) && read_next_token
);
1103 deleteToken (parent
);
1104 vStringDelete(saveScope
);
1110 static bool parseMethods (tokenInfo
*const token
, tokenInfo
*const class)
1112 tokenInfo
*const name
= newToken ();
1113 bool has_methods
= false;
1116 * This deals with these formats
1117 * validProperty : 2,
1118 * validMethod : function(a,b) {}
1119 * 'validMethod2' : function(a,b) {}
1120 * container.dirtyTab = {'url': false, 'title':false, 'snapshot':false, '*': false}
1126 if (isType (token
, TOKEN_CLOSE_CURLY
))
1129 * This was most likely a variable declaration of a hash table.
1130 * indicate there were no methods and return.
1132 has_methods
= false;
1136 if (isType (token
, TOKEN_STRING
) || isKeyword(token
, KEYWORD_NONE
))
1138 copyToken(name
, token
);
1141 if ( isType (token
, TOKEN_COLON
) )
1144 if ( isKeyword (token
, KEYWORD_function
) )
1146 vString
*const signature
= vStringNew ();
1149 if ( isType (token
, TOKEN_OPEN_PAREN
) )
1151 skipArgumentList(token
, false, signature
);
1154 if (isType (token
, TOKEN_OPEN_CURLY
))
1157 addToScope (name
, class->string
);
1158 makeJsTag (name
, JSTAG_METHOD
, signature
);
1159 parseBlock (token
, name
);
1162 * Read to the closing curly, check next
1163 * token, if a comma, we must loop again
1168 vStringDelete (signature
);
1172 vString
* saveScope
= vStringNew ();
1173 bool has_child_methods
= false;
1175 /* skip whatever is the value */
1176 while (! isType (token
, TOKEN_COMMA
) &&
1177 ! isType (token
, TOKEN_CLOSE_CURLY
) &&
1178 ! isType (token
, TOKEN_EOF
))
1180 if (isType (token
, TOKEN_OPEN_CURLY
))
1182 /* Recurse to find child properties/methods */
1183 vStringCopy (saveScope
, token
->scope
);
1184 addToScope (token
, class->string
);
1185 has_child_methods
= parseMethods (token
, name
);
1186 vStringCopy (token
->scope
, saveScope
);
1189 else if (isType (token
, TOKEN_OPEN_PAREN
))
1191 skipArgumentList (token
, false, NULL
);
1193 else if (isType (token
, TOKEN_OPEN_SQUARE
))
1195 skipArrayList (token
, false);
1202 vStringDelete (saveScope
);
1205 addToScope (name
, class->string
);
1206 if (has_child_methods
)
1207 makeJsTag (name
, JSTAG_CLASS
, NULL
);
1209 makeJsTag (name
, JSTAG_PROPERTY
, NULL
);
1213 } while ( isType(token
, TOKEN_COMMA
) );
1215 findCmdTerm (token
, false);
1223 static bool parseStatement (tokenInfo
*const token
, tokenInfo
*const parent
, bool is_inside_class
)
1225 tokenInfo
*const name
= newToken ();
1226 tokenInfo
*const secondary_name
= newToken ();
1227 tokenInfo
*const method_body_token
= newToken ();
1228 vString
* saveScope
= vStringNew ();
1229 bool is_class
= false;
1230 bool is_var
= false;
1231 bool is_const
= false;
1232 bool is_terminated
= true;
1233 bool is_global
= false;
1234 bool has_methods
= false;
1237 vStringClear(saveScope
);
1239 * Functions can be named or unnamed.
1240 * This deals with these formats:
1242 * validFunctionOne = function(a,b) {}
1243 * testlib.validFunctionFive = function(a,b) {}
1244 * var innerThree = function(a,b) {}
1245 * var innerFour = (a,b) {}
1246 * var D2 = secondary_fcn_name(a,b) {}
1247 * var D3 = new Function("a", "b", "return a+b;");
1249 * testlib.extras.ValidClassOne = function(a,b) {
1253 * testlib.extras.ValidClassOne.prototype = {
1254 * 'validMethodOne' : function(a,b) {},
1255 * 'validMethodTwo' : function(a,b) {}
1257 * ValidClassTwo = function ()
1259 * this.validMethodThree = function() {}
1261 * this.validMethodFour = () {}
1263 * Database.prototype.validMethodThree = Database_getTodaysDate;
1266 if ( is_inside_class
)
1269 * var can precede an inner function
1271 if ( isKeyword(token
, KEYWORD_var
) ||
1272 isKeyword(token
, KEYWORD_let
) ||
1273 isKeyword(token
, KEYWORD_const
) )
1275 is_const
= isKeyword(token
, KEYWORD_const
);
1277 * Only create variables for global scope
1279 if ( token
->nestLevel
== 0 )
1286 if ( isKeyword(token
, KEYWORD_this
) )
1289 if (isType (token
, TOKEN_PERIOD
))
1295 copyToken(name
, token
);
1297 while (! isType (token
, TOKEN_CLOSE_CURLY
) &&
1298 ! isType (token
, TOKEN_SEMICOLON
) &&
1299 ! isType (token
, TOKEN_EQUAL_SIGN
) &&
1300 ! isType (token
, TOKEN_EOF
))
1302 if (isType (token
, TOKEN_OPEN_CURLY
))
1303 parseBlock (token
, parent
);
1305 /* Potentially the name of the function */
1307 if (isType (token
, TOKEN_PERIOD
))
1310 * Cannot be a global variable is it has dot references in the name
1316 if ( isKeyword(token
, KEYWORD_NONE
) )
1320 addToScope(token
, name
->string
);
1323 addContext (name
, token
);
1327 else if ( isKeyword(token
, KEYWORD_prototype
) )
1330 * When we reach the "prototype" tag, we infer:
1331 * "BindAgent" is a class
1332 * "build" is a method
1334 * function BindAgent( repeatableIdName, newParentIdName ) {
1338 * Specified function name: "build"
1339 * BindAgent.prototype.build = function( mode ) {
1340 * maybe parse nested functions
1345 * ValidClassOne.prototype = {
1346 * 'validMethodOne' : function(a,b) {},
1347 * 'validMethodTwo' : function(a,b) {}
1351 makeClassTag (name
, NULL
);
1355 * There should a ".function_name" next.
1358 if (isType (token
, TOKEN_PERIOD
))
1364 if ( isKeyword(token
, KEYWORD_NONE
) )
1366 vString
*const signature
= vStringNew ();
1368 vStringCopy(saveScope
, token
->scope
);
1369 addToScope(token
, name
->string
);
1371 readToken (method_body_token
);
1372 vStringCopy (method_body_token
->scope
, token
->scope
);
1374 while (! isType (method_body_token
, TOKEN_SEMICOLON
) &&
1375 ! isType (method_body_token
, TOKEN_CLOSE_CURLY
) &&
1376 ! isType (method_body_token
, TOKEN_OPEN_CURLY
) &&
1377 ! isType (method_body_token
, TOKEN_EOF
))
1379 if ( isType (method_body_token
, TOKEN_OPEN_PAREN
) )
1380 skipArgumentList(method_body_token
, false,
1381 vStringLength (signature
) == 0 ? signature
: NULL
);
1383 readToken (method_body_token
);
1386 makeJsTag (token
, JSTAG_METHOD
, signature
);
1387 vStringDelete (signature
);
1389 if ( isType (method_body_token
, TOKEN_OPEN_CURLY
))
1391 parseBlock (method_body_token
, token
);
1392 is_terminated
= true;
1395 is_terminated
= isType (method_body_token
, TOKEN_SEMICOLON
);
1399 else if (isType (token
, TOKEN_EQUAL_SIGN
))
1402 if (isType (token
, TOKEN_OPEN_CURLY
))
1407 * Creates tags for each of these class methods
1408 * ValidClassOne.prototype = {
1409 * 'validMethodOne' : function(a,b) {},
1410 * 'validMethodTwo' : function(a,b) {}
1413 parseMethods(token
, name
);
1415 * Find to the end of the statement
1417 findCmdTerm (token
, false);
1418 token
->ignoreTag
= false;
1419 is_terminated
= true;
1426 } while (isType (token
, TOKEN_PERIOD
));
1429 if ( isType (token
, TOKEN_OPEN_PAREN
) )
1430 skipArgumentList(token
, false, NULL
);
1432 if ( isType (token
, TOKEN_OPEN_SQUARE
) )
1433 skipArrayList(token
, false);
1436 if ( isType (token, TOKEN_OPEN_CURLY) )
1438 is_class = parseBlock (token, name);
1443 if ( isType (token
, TOKEN_CLOSE_CURLY
) )
1446 * Reaching this section without having
1447 * processed an open curly brace indicates
1448 * the statement is most likely not terminated.
1450 is_terminated
= false;
1454 if ( isType (token
, TOKEN_SEMICOLON
) )
1457 * Only create variables for global scope
1459 if ( token
->nestLevel
== 0 && is_global
)
1462 * Handles this syntax:
1465 if (isType (token
, TOKEN_SEMICOLON
))
1466 makeJsTag (name
, is_const
? JSTAG_CONSTANT
: JSTAG_VARIABLE
, NULL
);
1469 * Statement has ended.
1470 * This deals with calls to functions, like:
1476 if ( isType (token
, TOKEN_EQUAL_SIGN
) )
1482 /* rvalue might be surrounded with parentheses */
1483 while (isType (token
, TOKEN_OPEN_PAREN
))
1489 if ( isKeyword (token
, KEYWORD_function
) )
1491 vString
*const signature
= vStringNew ();
1495 if ( isKeyword (token
, KEYWORD_NONE
) &&
1496 ! isType (token
, TOKEN_OPEN_PAREN
) )
1499 * Functions of this format:
1500 * var D2A = function theAdd(a, b)
1504 * Are really two separate defined functions and
1505 * can be referenced in two ways:
1506 * alert( D2A(1,2) ); // produces 3
1507 * alert( theAdd(1,2) ); // also produces 3
1508 * So it must have two tags:
1511 * Save the reference to the name for later use, once
1512 * we have established this is a valid function we will
1513 * create the secondary reference to it.
1515 copyToken(secondary_name
, token
);
1519 if ( isType (token
, TOKEN_OPEN_PAREN
) )
1520 skipArgumentList(token
, false, signature
);
1522 if (isType (token
, TOKEN_OPEN_CURLY
))
1525 * This will be either a function or a class.
1526 * We can only determine this by checking the body
1527 * of the function. If we find a "this." we know
1528 * it is a class, otherwise it is a function.
1530 if ( is_inside_class
)
1532 makeJsTag (name
, JSTAG_METHOD
, signature
);
1533 if ( vStringLength(secondary_name
->string
) > 0 )
1534 makeFunctionTag (secondary_name
, signature
);
1535 parseBlock (token
, name
);
1539 is_class
= parseBlock (token
, name
);
1541 makeClassTag (name
, signature
);
1543 makeFunctionTag (name
, signature
);
1545 if ( vStringLength(secondary_name
->string
) > 0 )
1546 makeFunctionTag (secondary_name
, signature
);
1550 vStringDelete (signature
);
1552 else if (isType (token
, TOKEN_OPEN_CURLY
))
1555 * Creates tags for each of these class methods
1556 * ValidClassOne.prototype = {
1557 * 'validMethodOne' : function(a,b) {},
1558 * 'validMethodTwo' : function(a,b) {}
1560 * Or checks if this is a hash variable.
1563 has_methods
= parseMethods(token
, name
);
1565 makeJsTag (name
, JSTAG_CLASS
, NULL
);
1569 * Only create variables for global scope
1571 if ( token
->nestLevel
== 0 && is_global
)
1574 * A pointer can be created to the function.
1575 * If we recognize the function/class name ignore the variable.
1576 * This format looks identical to a variable definition.
1577 * A variable defined outside of a block is considered
1578 * a global variable:
1581 * This is not a global variable:
1582 * var g_var = function;
1583 * This is a global variable:
1584 * var g_var = different_var_name;
1586 fulltag
= vStringNew ();
1587 if (vStringLength (token
->scope
) > 0)
1589 vStringCopy(fulltag
, token
->scope
);
1590 vStringCatS (fulltag
, ".");
1591 vStringCatS (fulltag
, vStringValue(token
->string
));
1595 vStringCopy(fulltag
, token
->string
);
1597 if ( ! stringListHas(FunctionNames
, vStringValue (fulltag
)) &&
1598 ! stringListHas(ClassNames
, vStringValue (fulltag
)) )
1600 makeJsTag (name
, is_const
? JSTAG_CONSTANT
: JSTAG_VARIABLE
, NULL
);
1602 vStringDelete (fulltag
);
1605 if (isType (token
, TOKEN_CLOSE_CURLY
))
1608 * Assume the closing parentheses terminates
1611 is_terminated
= true;
1614 else if (isKeyword (token
, KEYWORD_new
))
1617 is_var
= isType (token
, TOKEN_IDENTIFIER
);
1618 if ( isKeyword (token
, KEYWORD_function
) ||
1619 isKeyword (token
, KEYWORD_capital_function
) ||
1620 isKeyword (token
, KEYWORD_capital_object
) ||
1623 if ( isKeyword (token
, KEYWORD_capital_object
) )
1627 if ( isType (token
, TOKEN_OPEN_PAREN
) )
1628 skipArgumentList(token
, true, NULL
);
1630 if (isType (token
, TOKEN_SEMICOLON
))
1632 if ( token
->nestLevel
== 0 )
1636 makeJsTag (name
, is_const
? JSTAG_CONSTANT
: JSTAG_VARIABLE
, NULL
);
1642 makeClassTag (name
, NULL
);
1644 /* FIXME: we cannot really get a meaningful
1645 * signature from a `new Function()` call,
1646 * so for now just don't set any */
1647 makeFunctionTag (name
, NULL
);
1652 else if (isType (token
, TOKEN_CLOSE_CURLY
))
1653 is_terminated
= false;
1656 else if (isKeyword (token
, KEYWORD_NONE
))
1659 * Only create variables for global scope
1661 if ( token
->nestLevel
== 0 && is_global
)
1664 * A pointer can be created to the function.
1665 * If we recognize the function/class name ignore the variable.
1666 * This format looks identical to a variable definition.
1667 * A variable defined outside of a block is considered
1668 * a global variable:
1671 * This is not a global variable:
1672 * var g_var = function;
1673 * This is a global variable:
1674 * var g_var = different_var_name;
1676 fulltag
= vStringNew ();
1677 if (vStringLength (token
->scope
) > 0)
1679 vStringCopy(fulltag
, token
->scope
);
1680 vStringCatS (fulltag
, ".");
1681 vStringCatS (fulltag
, vStringValue(token
->string
));
1685 vStringCopy(fulltag
, token
->string
);
1687 if ( ! stringListHas(FunctionNames
, vStringValue (fulltag
)) &&
1688 ! stringListHas(ClassNames
, vStringValue (fulltag
)) )
1690 makeJsTag (name
, is_const
? JSTAG_CONSTANT
: JSTAG_VARIABLE
, NULL
);
1692 vStringDelete (fulltag
);
1698 while (parenDepth
> 0 && ! isType (token
, TOKEN_EOF
))
1700 if (isType (token
, TOKEN_OPEN_PAREN
))
1702 else if (isType (token
, TOKEN_CLOSE_PAREN
))
1704 readTokenFull (token
, true, NULL
);
1706 if (isType (token
, TOKEN_CLOSE_CURLY
))
1707 is_terminated
= false;
1711 /* if we aren't already at the cmd end, advance to it and check whether
1712 * the statement was terminated */
1713 if (! isType (token
, TOKEN_CLOSE_CURLY
) &&
1714 ! isType (token
, TOKEN_SEMICOLON
))
1717 * Statements can be optionally terminated in the case of
1718 * statement prior to a close curly brace as in the
1719 * document.write line below:
1721 * function checkForUpdate() {
1723 * document.write("hello from checkForUpdate<br>")
1728 is_terminated
= findCmdTerm (token
, true);
1732 vStringCopy(token
->scope
, saveScope
);
1734 deleteToken (secondary_name
);
1735 deleteToken (method_body_token
);
1736 vStringDelete(saveScope
);
1738 return is_terminated
;
1741 static void parseUI5 (tokenInfo
*const token
)
1743 tokenInfo
*const name
= newToken ();
1745 * SAPUI5 is built on top of jQuery.
1746 * It follows a standard format:
1747 * sap.ui.controller("id.of.controller", {
1748 * method_name : function... {
1751 * method_name : function ... {
1755 * Handle the parsing of the initial controller (and the
1756 * same for "view") and then allow the methods to be
1762 if (isType (token
, TOKEN_PERIOD
))
1765 while (! isType (token
, TOKEN_OPEN_PAREN
) &&
1766 ! isType (token
, TOKEN_EOF
))
1772 if (isType (token
, TOKEN_STRING
))
1774 copyToken(name
, token
);
1778 if (isType (token
, TOKEN_COMMA
))
1783 parseMethods (token
, name
);
1784 } while (! isType (token
, TOKEN_CLOSE_CURLY
) &&
1785 ! isType (token
, TOKEN_EOF
));
1791 static bool parseLine (tokenInfo
*const token
, tokenInfo
*const parent
, bool is_inside_class
)
1793 bool is_terminated
= true;
1795 * Detect the common statements, if, while, for, do, ...
1796 * This is necessary since the last statement within a block "{}"
1797 * can be optionally terminated.
1799 * If the statement is not terminated, we need to tell
1800 * the calling routine to prevent reading an additional token
1801 * looking for the end of the statement.
1804 if (isType(token
, TOKEN_KEYWORD
))
1806 switch (token
->keyword
)
1811 is_terminated
= parseLoop (token
, parent
);
1817 case KEYWORD_finally
:
1818 /* Common semantics */
1819 is_terminated
= parseIf (token
, parent
);
1821 case KEYWORD_switch
:
1822 parseSwitch (token
);
1824 case KEYWORD_return
:
1825 is_terminated
= findCmdTerm (token
, true);
1828 is_terminated
= parseStatement (token
, parent
, is_inside_class
);
1835 * Special case where single line statements may not be
1836 * SEMICOLON terminated. parseBlock needs to know this
1837 * so that it does not read the next token.
1839 is_terminated
= parseStatement (token
, parent
, is_inside_class
);
1841 return is_terminated
;
1844 static void parseJsFile (tokenInfo
*const token
)
1850 if (isType (token
, TOKEN_KEYWORD
) && token
->keyword
== KEYWORD_function
)
1851 parseFunction (token
);
1852 else if (isType (token
, TOKEN_KEYWORD
) && token
->keyword
== KEYWORD_sap
)
1855 parseLine (token
, token
, false);
1856 } while (! isType (token
, TOKEN_EOF
));
1859 static void initialize (const langType language
)
1861 Assert (ARRAY_SIZE (JsKinds
) == JSTAG_COUNT
);
1865 static void findJsTags (void)
1867 tokenInfo
*const token
= newToken ();
1869 ClassNames
= stringListNew ();
1870 FunctionNames
= stringListNew ();
1871 LastTokenType
= TOKEN_UNDEFINED
;
1873 parseJsFile (token
);
1875 stringListDelete (ClassNames
);
1876 stringListDelete (FunctionNames
);
1878 FunctionNames
= NULL
;
1879 deleteToken (token
);
1882 /* Create parser definition structure */
1883 extern parserDefinition
* JavaScriptParser (void)
1885 static const char *const extensions
[] = { "js", NULL
};
1886 parserDefinition
*const def
= parserNew ("JavaScript");
1887 def
->extensions
= extensions
;
1889 * New definitions for parsing instead of regex
1891 def
->kinds
= JsKinds
;
1892 def
->kindCount
= ARRAY_SIZE (JsKinds
);
1893 def
->parser
= findJsTags
;
1894 def
->initialize
= initialize
;
1895 def
->keywordTable
= JsKeywordTable
;
1896 def
->keywordCount
= ARRAY_SIZE (JsKeywordTable
);