2 * Copyright (c) 2003, Darren Hiebert
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
7 * This module contains functions for generating tags for JavaScript language
10 * Reference: http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf
12 * This is a good reference for different forms of the function statement:
13 * http://www.permadi.com/tutorial/jsFunc/
14 * Another good reference:
15 * http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide
21 #include "general.h" /* must always come first */
22 #include <ctype.h> /* to define isalpha () */
40 #define isType(token,t) (boolean) ((token)->type == (t))
41 #define isKeyword(token,k) (boolean) ((token)->keyword == (k))
48 * Tracks class and function names already created
50 static stringList
*ClassNames
;
51 static stringList
*FunctionNames
;
53 /* Used to specify type of keyword.
55 typedef enum eKeywordId
{
58 KEYWORD_capital_function
,
59 KEYWORD_capital_object
,
79 typedef enum eTokenType
{
100 TOKEN_POSTFIX_OPERATOR
,
101 TOKEN_BINARY_OPERATOR
104 typedef struct sTokenInfo
{
109 unsigned long lineNumber
;
119 static tokenType LastTokenType
;
121 static langType Lang_js
;
133 static kindOption JsKinds
[] = {
134 { TRUE
, 'f', "function", "functions" },
135 { TRUE
, 'c', "class", "classes" },
136 { TRUE
, 'm', "method", "methods" },
137 { TRUE
, 'p', "property", "properties" },
138 { TRUE
, 'C', "constant", "constants" },
139 { TRUE
, 'v', "variable", "global variables" }
142 static const keywordTable JsKeywordTable
[] = {
143 /* keyword keyword ID */
144 { "function", KEYWORD_function
},
145 { "Function", KEYWORD_capital_function
},
146 { "Object", KEYWORD_capital_object
},
147 { "prototype", KEYWORD_prototype
},
148 { "var", KEYWORD_var
},
149 { "let", KEYWORD_let
},
150 { "const", KEYWORD_const
},
151 { "new", KEYWORD_new
},
152 { "this", KEYWORD_this
},
153 { "for", KEYWORD_for
},
154 { "while", KEYWORD_while
},
155 { "do", KEYWORD_do
},
156 { "if", KEYWORD_if
},
157 { "else", KEYWORD_else
},
158 { "switch", KEYWORD_switch
},
159 { "try", KEYWORD_try
},
160 { "catch", KEYWORD_catch
},
161 { "finally", KEYWORD_finally
},
162 { "sap", KEYWORD_sap
},
163 { "return", KEYWORD_return
}
167 * FUNCTION DEFINITIONS
170 /* Recursive functions */
171 static void parseFunction (tokenInfo
*const token
);
172 static boolean
parseBlock (tokenInfo
*const token
, tokenInfo
*const orig_parent
);
173 static boolean
parseLine (tokenInfo
*const token
, tokenInfo
*const parent
, boolean is_inside_class
);
174 static void parseUI5 (tokenInfo
*const token
);
176 static boolean
isIdentChar (const int c
)
179 (isalpha (c
) || isdigit (c
) || c
== '$' ||
180 c
== '@' || c
== '_' || c
== '#');
183 static tokenInfo
*newToken (void)
185 tokenInfo
*const token
= xMalloc (1, tokenInfo
);
187 token
->type
= TOKEN_UNDEFINED
;
188 token
->keyword
= KEYWORD_NONE
;
189 token
->string
= vStringNew ();
190 token
->scope
= vStringNew ();
191 token
->nestLevel
= 0;
192 token
->ignoreTag
= FALSE
;
193 token
->lineNumber
= getSourceLineNumber ();
194 token
->filePosition
= getInputFilePosition ();
199 static void deleteToken (tokenInfo
*const token
)
201 vStringDelete (token
->string
);
202 vStringDelete (token
->scope
);
207 * Tag generation functions
210 static void makeJsTag (tokenInfo
*const token
, const jsKind kind
, vString
*const signature
)
212 if (JsKinds
[kind
].enabled
&& ! token
->ignoreTag
)
214 const char *name
= vStringValue (token
->string
);
215 vString
*fullscope
= vStringNewCopy (token
->scope
);
219 if ((p
= strrchr (name
, '.')) != NULL
)
221 if (vStringLength (fullscope
) > 0)
222 vStringPut (fullscope
, '.');
223 vStringNCatS (fullscope
, name
, p
- name
);
227 initTagEntry (&e
, name
);
229 e
.lineNumber
= token
->lineNumber
;
230 e
.filePosition
= token
->filePosition
;
231 e
.kindName
= JsKinds
[kind
].name
;
232 e
.kind
= JsKinds
[kind
].letter
;
234 if ( vStringLength(fullscope
) > 0 )
236 jsKind parent_kind
= JSTAG_CLASS
;
238 /* if we're creating a function (and not a method),
239 * guess we're inside another function */
240 if (kind
== JSTAG_FUNCTION
)
241 parent_kind
= JSTAG_FUNCTION
;
243 e
.extensionFields
.scope
[0] = JsKinds
[parent_kind
].name
;
244 e
.extensionFields
.scope
[1] = vStringValue (fullscope
);
247 if (signature
&& vStringLength(signature
))
250 /* sanitize signature by replacing all control characters with a
251 * space (because it's simple).
252 * there should never be any junk in a valid signature, but who
253 * knows what the user wrote and CTags doesn't cope well with weird
255 for (i
= 0; i
< signature
->length
; i
++)
257 unsigned char c
= (unsigned char) signature
->buffer
[i
];
258 if (c
< 0x20 /* below space */ || c
== 0x7F /* DEL */)
259 signature
->buffer
[i
] = ' ';
261 e
.extensionFields
.signature
= vStringValue(signature
);
265 vStringDelete (fullscope
);
269 static void makeClassTag (tokenInfo
*const token
, vString
*const signature
)
273 if ( ! token
->ignoreTag
)
275 fulltag
= vStringNew ();
276 if (vStringLength (token
->scope
) > 0)
278 vStringCopy(fulltag
, token
->scope
);
279 vStringCatS (fulltag
, ".");
280 vStringCatS (fulltag
, vStringValue(token
->string
));
284 vStringCopy(fulltag
, token
->string
);
286 vStringTerminate(fulltag
);
287 if ( ! stringListHas(ClassNames
, vStringValue (fulltag
)) )
289 stringListAdd (ClassNames
, vStringNewCopy (fulltag
));
290 makeJsTag (token
, JSTAG_CLASS
, signature
);
292 vStringDelete (fulltag
);
296 static void makeFunctionTag (tokenInfo
*const token
, vString
*const signature
)
300 if ( ! token
->ignoreTag
)
302 fulltag
= vStringNew ();
303 if (vStringLength (token
->scope
) > 0)
305 vStringCopy(fulltag
, token
->scope
);
306 vStringCatS (fulltag
, ".");
307 vStringCatS (fulltag
, vStringValue(token
->string
));
311 vStringCopy(fulltag
, token
->string
);
313 vStringTerminate(fulltag
);
314 if ( ! stringListHas(FunctionNames
, vStringValue (fulltag
)) )
316 stringListAdd (FunctionNames
, vStringNewCopy (fulltag
));
317 makeJsTag (token
, JSTAG_FUNCTION
, signature
);
319 vStringDelete (fulltag
);
327 static int skipToCharacter (const int c
)
332 d
= getcFromInputFile ();
333 } while (d
!= EOF
&& d
!= c
);
337 static void parseString (vString
*const string
, const int delimiter
)
342 int c
= getcFromInputFile ();
347 /* Eat the escape sequence (\", \', etc). We properly handle
348 * <LineContinuation> by eating a whole \<CR><LF> not to see <LF>
349 * as an unescaped character, which is invalid and handled below.
350 * Also, handle the fact that <LineContinuation> produces an empty
352 * See ECMA-262 7.8.4 */
353 c
= getcFromInputFile();
354 if (c
!= '\r' && c
!= '\n')
355 vStringPut(string
, c
);
358 c
= getcFromInputFile();
360 ungetcToInputFile (c
);
363 else if (c
== delimiter
)
365 else if (c
== '\r' || c
== '\n')
367 /* those are invalid when not escaped */
369 /* we don't want to eat the newline itself to let the automatic
370 * semicolon insertion code kick in */
371 ungetcToInputFile (c
);
374 vStringPut (string
, c
);
376 vStringTerminate (string
);
379 static void parseRegExp (void)
382 boolean in_range
= FALSE
;
386 c
= getcFromInputFile ();
387 if (! in_range
&& c
== '/')
391 c
= getcFromInputFile ();
392 } while (isalpha (c
));
393 ungetcToInputFile (c
);
397 c
= getcFromInputFile (); /* skip next character */
405 /* Read a C identifier beginning with "firstChar" and places it into
408 static void parseIdentifier (vString
*const string
, const int firstChar
)
411 Assert (isIdentChar (c
));
414 vStringPut (string
, c
);
415 c
= getcFromInputFile ();
416 } while (isIdentChar (c
));
417 vStringTerminate (string
);
418 ungetcToInputFile (c
); /* unget non-identifier character */
421 static keywordId
analyzeToken (vString
*const name
)
423 vString
*keyword
= vStringNew ();
425 vStringCopyToLower (keyword
, name
);
426 result
= (keywordId
) lookupKeyword (vStringValue (keyword
), Lang_js
);
427 vStringDelete (keyword
);
431 static void readTokenFull (tokenInfo
*const token
, boolean include_newlines
, vString
*const repr
)
436 token
->type
= TOKEN_UNDEFINED
;
437 token
->keyword
= KEYWORD_NONE
;
438 vStringClear (token
->string
);
444 c
= getcFromInputFile ();
447 while (c
== '\t' || c
== ' ' ||
448 ((c
== '\r' || c
== '\n') && ! include_newlines
));
450 token
->lineNumber
= getSourceLineNumber ();
451 token
->filePosition
= getInputFilePosition ();
456 vStringPut (repr
, ' ');
457 vStringPut (repr
, c
);
462 case EOF
: token
->type
= TOKEN_EOF
; break;
463 case '(': token
->type
= TOKEN_OPEN_PAREN
; break;
464 case ')': token
->type
= TOKEN_CLOSE_PAREN
; break;
465 case ';': token
->type
= TOKEN_SEMICOLON
; break;
466 case ',': token
->type
= TOKEN_COMMA
; break;
467 case '.': token
->type
= TOKEN_PERIOD
; break;
468 case ':': token
->type
= TOKEN_COLON
; break;
469 case '{': token
->type
= TOKEN_OPEN_CURLY
; break;
470 case '}': token
->type
= TOKEN_CLOSE_CURLY
; break;
471 case '=': token
->type
= TOKEN_EQUAL_SIGN
; break;
472 case '[': token
->type
= TOKEN_OPEN_SQUARE
; break;
473 case ']': token
->type
= TOKEN_CLOSE_SQUARE
; break;
478 int d
= getcFromInputFile ();
479 if (d
== c
) /* ++ or -- */
480 token
->type
= TOKEN_POSTFIX_OPERATOR
;
483 ungetcToInputFile (d
);
484 token
->type
= TOKEN_BINARY_OPERATOR
;
497 token
->type
= TOKEN_BINARY_OPERATOR
;
502 /* This isn't strictly correct per the standard, but following the
503 * real rules means understanding all statements, and that's not
504 * what the parser currently does. What we do here is a guess, by
505 * avoiding inserting semicolons that would make the statement on
506 * the left invalid. Hopefully this should not have false negatives
507 * (e.g. should not miss insertion of a semicolon) but might have
508 * false positives (e.g. it will wrongfully emit a semicolon for the
509 * newline in "foo\n+bar").
510 * This should however be mostly harmless as we only deal with
511 * newlines in specific situations where we know a false positive
512 * wouldn't hurt too bad. */
513 switch (LastTokenType
)
515 /* these cannot be the end of a statement, so hold the newline */
516 case TOKEN_EQUAL_SIGN
:
519 case TOKEN_FORWARD_SLASH
:
520 case TOKEN_BINARY_OPERATOR
:
521 /* and these already end one, no need to duplicate it */
522 case TOKEN_SEMICOLON
:
524 case TOKEN_CLOSE_CURLY
:
525 case TOKEN_OPEN_CURLY
:
526 include_newlines
= FALSE
; /* no need to recheck */
530 token
->type
= TOKEN_SEMICOLON
;
536 token
->type
= TOKEN_STRING
;
537 parseString (token
->string
, c
);
538 token
->lineNumber
= getSourceLineNumber ();
539 token
->filePosition
= getInputFilePosition ();
542 vStringCat (repr
, token
->string
);
543 vStringPut (repr
, c
);
548 c
= getcFromInputFile ();
549 if (c
!= '\\' && c
!= '"' && !isspace (c
))
550 ungetcToInputFile (c
);
551 token
->type
= TOKEN_CHARACTER
;
552 token
->lineNumber
= getSourceLineNumber ();
553 token
->filePosition
= getInputFilePosition ();
558 int d
= getcFromInputFile ();
559 if ( (d
!= '*') && /* is this the start of a comment? */
560 (d
!= '/') ) /* is a one line comment? */
562 ungetcToInputFile (d
);
563 switch (LastTokenType
)
565 case TOKEN_CHARACTER
:
566 case TOKEN_IDENTIFIER
:
568 case TOKEN_CLOSE_CURLY
:
569 case TOKEN_CLOSE_PAREN
:
570 case TOKEN_CLOSE_SQUARE
:
571 token
->type
= TOKEN_FORWARD_SLASH
;
575 token
->type
= TOKEN_REGEXP
;
577 token
->lineNumber
= getSourceLineNumber ();
578 token
->filePosition
= getInputFilePosition ();
584 if (repr
) /* remove the / we added */
585 repr
->buffer
[--repr
->length
] = 0;
590 skipToCharacter ('*');
591 c
= getcFromInputFile ();
595 ungetcToInputFile (c
);
596 } while (c
!= EOF
&& c
!= '\0');
599 else if (d
== '/') /* is this the start of a comment? */
601 skipToCharacter ('\n');
602 /* if we care about newlines, put it back so it is seen */
603 if (include_newlines
)
604 ungetcToInputFile ('\n');
612 /* skip shebang in case of e.g. Node.js scripts */
613 if (token
->lineNumber
> 1)
614 token
->type
= TOKEN_UNDEFINED
;
615 else if ((c
= getcFromInputFile ()) != '!')
617 ungetcToInputFile (c
);
618 token
->type
= TOKEN_UNDEFINED
;
622 skipToCharacter ('\n');
628 if (! isIdentChar (c
))
629 token
->type
= TOKEN_UNDEFINED
;
632 parseIdentifier (token
->string
, c
);
633 token
->lineNumber
= getSourceLineNumber ();
634 token
->filePosition
= getInputFilePosition ();
635 token
->keyword
= analyzeToken (token
->string
);
636 if (isKeyword (token
, KEYWORD_NONE
))
637 token
->type
= TOKEN_IDENTIFIER
;
639 token
->type
= TOKEN_KEYWORD
;
640 if (repr
&& vStringLength (token
->string
) > 1)
641 vStringCatS (repr
, vStringValue (token
->string
) + 1);
646 LastTokenType
= token
->type
;
649 static void readToken (tokenInfo
*const token
)
651 readTokenFull (token
, FALSE
, NULL
);
654 static void copyToken (tokenInfo
*const dest
, tokenInfo
*const src
)
656 dest
->nestLevel
= src
->nestLevel
;
657 dest
->lineNumber
= src
->lineNumber
;
658 dest
->filePosition
= src
->filePosition
;
659 dest
->type
= src
->type
;
660 dest
->keyword
= src
->keyword
;
661 vStringCopy(dest
->string
, src
->string
);
662 vStringCopy(dest
->scope
, src
->scope
);
666 * Token parsing functions
669 static void skipArgumentList (tokenInfo
*const token
, boolean include_newlines
, vString
*const repr
)
673 if (isType (token
, TOKEN_OPEN_PAREN
)) /* arguments? */
677 vStringPut (repr
, '(');
678 while (nest_level
> 0 && ! isType (token
, TOKEN_EOF
))
680 readTokenFull (token
, FALSE
, repr
);
681 if (isType (token
, TOKEN_OPEN_PAREN
))
683 else if (isType (token
, TOKEN_CLOSE_PAREN
))
686 readTokenFull (token
, include_newlines
, NULL
);
690 static void skipArrayList (tokenInfo
*const token
, boolean include_newlines
)
695 * Handle square brackets
697 * So we must check for nested open and closing square brackets
700 if (isType (token
, TOKEN_OPEN_SQUARE
)) /* arguments? */
703 while (nest_level
> 0 && ! isType (token
, TOKEN_EOF
))
706 if (isType (token
, TOKEN_OPEN_SQUARE
))
708 else if (isType (token
, TOKEN_CLOSE_SQUARE
))
711 readTokenFull (token
, include_newlines
, NULL
);
715 static void addContext (tokenInfo
* const parent
, const tokenInfo
* const child
)
717 if (vStringLength (parent
->string
) > 0)
719 vStringCatS (parent
->string
, ".");
721 vStringCatS (parent
->string
, vStringValue(child
->string
));
722 vStringTerminate(parent
->string
);
725 static void addToScope (tokenInfo
* const token
, vString
* const extra
)
727 if (vStringLength (token
->scope
) > 0)
729 vStringCatS (token
->scope
, ".");
731 vStringCatS (token
->scope
, vStringValue(extra
));
732 vStringTerminate(token
->scope
);
739 static boolean
findCmdTerm (tokenInfo
*const token
, boolean include_newlines
)
742 * Read until we find either a semicolon or closing brace.
743 * Any nested braces will be handled within.
745 while (! isType (token
, TOKEN_SEMICOLON
) &&
746 ! isType (token
, TOKEN_CLOSE_CURLY
) &&
747 ! isType (token
, TOKEN_EOF
))
749 /* Handle nested blocks */
750 if ( isType (token
, TOKEN_OPEN_CURLY
))
752 parseBlock (token
, token
);
753 readTokenFull (token
, include_newlines
, NULL
);
755 else if ( isType (token
, TOKEN_OPEN_PAREN
) )
757 skipArgumentList(token
, include_newlines
, NULL
);
759 else if ( isType (token
, TOKEN_OPEN_SQUARE
) )
761 skipArrayList(token
, include_newlines
);
765 readTokenFull (token
, include_newlines
, NULL
);
769 return isType (token
, TOKEN_SEMICOLON
);
772 static void parseSwitch (tokenInfo
*const token
)
775 * switch (expression) {
782 * default : statement;
788 if (isType (token
, TOKEN_OPEN_PAREN
))
791 * Handle nameless functions, these will only
792 * be considered methods.
794 skipArgumentList(token
, FALSE
, NULL
);
797 if (isType (token
, TOKEN_OPEN_CURLY
))
799 parseBlock (token
, token
);
803 static boolean
parseLoop (tokenInfo
*const token
, tokenInfo
*const parent
)
806 * Handles these statements
807 * for (x=0; x<3; x++)
808 * document.write("This text is repeated three times<br>");
810 * for (x=0; x<3; x++)
812 * document.write("This text is repeated three times<br>");
816 * document.write(number+"<br>");
821 * document.write(number+"<br>");
826 boolean is_terminated
= TRUE
;
828 if (isKeyword (token
, KEYWORD_for
) || isKeyword (token
, KEYWORD_while
))
832 if (isType (token
, TOKEN_OPEN_PAREN
))
835 * Handle nameless functions, these will only
836 * be considered methods.
838 skipArgumentList(token
, FALSE
, NULL
);
841 if (isType (token
, TOKEN_OPEN_CURLY
))
844 * This will be either a function or a class.
845 * We can only determine this by checking the body
846 * of the function. If we find a "this." we know
847 * it is a class, otherwise it is a function.
849 parseBlock (token
, parent
);
853 is_terminated
= parseLine(token
, parent
, FALSE
);
856 else if (isKeyword (token
, KEYWORD_do
))
860 if (isType (token
, TOKEN_OPEN_CURLY
))
863 * This will be either a function or a class.
864 * We can only determine this by checking the body
865 * of the function. If we find a "this." we know
866 * it is a class, otherwise it is a function.
868 parseBlock (token
, parent
);
872 is_terminated
= parseLine(token
, parent
, FALSE
);
878 if (isKeyword (token
, KEYWORD_while
))
882 if (isType (token
, TOKEN_OPEN_PAREN
))
885 * Handle nameless functions, these will only
886 * be considered methods.
888 skipArgumentList(token
, TRUE
, NULL
);
890 if (! isType (token
, TOKEN_SEMICOLON
))
891 is_terminated
= FALSE
;
895 return is_terminated
;
898 static boolean
parseIf (tokenInfo
*const token
, tokenInfo
*const parent
)
900 boolean read_next_token
= TRUE
;
902 * If statements have two forms
921 * This example if correctly written, but the
922 * else contains only 1 statement without a terminator
923 * since the function finishes with the closing brace.
932 * TODO: Deal with statements that can optional end
933 * without a semi-colon. Currently this messes up
934 * the parsing of blocks.
935 * Need to somehow detect this has happened, and either
936 * backup a token, or skip reading the next token if
937 * that is possible from all code locations.
943 if (isKeyword (token
, KEYWORD_if
))
946 * Check for an "else if" and consume the "if"
951 if (isType (token
, TOKEN_OPEN_PAREN
))
954 * Handle nameless functions, these will only
955 * be considered methods.
957 skipArgumentList(token
, FALSE
, NULL
);
960 if (isType (token
, TOKEN_OPEN_CURLY
))
963 * This will be either a function or a class.
964 * We can only determine this by checking the body
965 * of the function. If we find a "this." we know
966 * it is a class, otherwise it is a function.
968 parseBlock (token
, parent
);
972 /* The next token should only be read if this statement had its own
974 read_next_token
= findCmdTerm (token
, TRUE
);
976 return read_next_token
;
979 static void parseFunction (tokenInfo
*const token
)
981 tokenInfo
*const name
= newToken ();
982 vString
*const signature
= vStringNew ();
983 boolean is_class
= FALSE
;
986 * This deals with these formats
987 * function validFunctionTwo(a,b) {}
991 /* Add scope in case this is an INNER function */
992 addToScope(name
, token
->scope
);
995 while (isType (token
, TOKEN_PERIOD
))
998 if ( isKeyword(token
, KEYWORD_NONE
) )
1000 addContext (name
, token
);
1005 if ( isType (token
, TOKEN_OPEN_PAREN
) )
1006 skipArgumentList(token
, FALSE
, signature
);
1008 if ( isType (token
, TOKEN_OPEN_CURLY
) )
1010 is_class
= parseBlock (token
, name
);
1012 makeClassTag (name
, signature
);
1014 makeFunctionTag (name
, signature
);
1017 findCmdTerm (token
, FALSE
);
1019 vStringDelete (signature
);
1023 static boolean
parseBlock (tokenInfo
*const token
, tokenInfo
*const orig_parent
)
1025 boolean is_class
= FALSE
;
1026 boolean read_next_token
= TRUE
;
1027 vString
* saveScope
= vStringNew ();
1028 tokenInfo
*const parent
= newToken ();
1030 /* backup the parent token to allow calls like parseBlock(token, token) */
1031 copyToken (parent
, orig_parent
);
1035 * Make this routine a bit more forgiving.
1036 * If called on an open_curly advance it
1038 if ( isType (token
, TOKEN_OPEN_CURLY
) &&
1039 isKeyword(token
, KEYWORD_NONE
) )
1042 if (! isType (token
, TOKEN_CLOSE_CURLY
))
1045 * Read until we find the closing brace,
1046 * any nested braces will be handled within
1050 read_next_token
= TRUE
;
1051 if (isKeyword (token
, KEYWORD_this
))
1054 * Means we are inside a class and have found
1055 * a class, not a function
1058 vStringCopy(saveScope
, token
->scope
);
1059 addToScope (token
, parent
->string
);
1062 * Ignore the remainder of the line
1063 * findCmdTerm(token);
1065 read_next_token
= parseLine (token
, parent
, is_class
);
1067 vStringCopy(token
->scope
, saveScope
);
1069 else if (isKeyword (token
, KEYWORD_var
) ||
1070 isKeyword (token
, KEYWORD_let
) ||
1071 isKeyword (token
, KEYWORD_const
))
1074 * Potentially we have found an inner function.
1075 * Set something to indicate the scope
1077 vStringCopy(saveScope
, token
->scope
);
1078 addToScope (token
, parent
->string
);
1079 read_next_token
= parseLine (token
, parent
, is_class
);
1080 vStringCopy(token
->scope
, saveScope
);
1082 else if (isKeyword (token
, KEYWORD_function
))
1084 vStringCopy(saveScope
, token
->scope
);
1085 addToScope (token
, parent
->string
);
1086 parseFunction (token
);
1087 vStringCopy(token
->scope
, saveScope
);
1089 else if (isType (token
, TOKEN_OPEN_CURLY
))
1091 /* Handle nested blocks */
1092 parseBlock (token
, parent
);
1097 * It is possible for a line to have no terminator
1098 * if the following line is a closing brace.
1099 * parseLine will detect this case and indicate
1100 * whether we should read an additional token.
1102 read_next_token
= parseLine (token
, parent
, is_class
);
1106 * Always read a new token unless we find a statement without
1107 * a ending terminator
1109 if( read_next_token
)
1113 * If we find a statement without a terminator consider the
1114 * block finished, otherwise the stack will be off by one.
1116 } while (! isType (token
, TOKEN_EOF
) &&
1117 ! isType (token
, TOKEN_CLOSE_CURLY
) && read_next_token
);
1120 deleteToken (parent
);
1121 vStringDelete(saveScope
);
1127 static boolean
parseMethods (tokenInfo
*const token
, tokenInfo
*const class)
1129 tokenInfo
*const name
= newToken ();
1130 boolean has_methods
= FALSE
;
1133 * This deals with these formats
1134 * validProperty : 2,
1135 * validMethod : function(a,b) {}
1136 * 'validMethod2' : function(a,b) {}
1137 * container.dirtyTab = {'url': false, 'title':false, 'snapshot':false, '*': false}
1143 if (isType (token
, TOKEN_CLOSE_CURLY
))
1146 * This was most likely a variable declaration of a hash table.
1147 * indicate there were no methods and return.
1149 has_methods
= FALSE
;
1153 if (isType (token
, TOKEN_STRING
) || isKeyword(token
, KEYWORD_NONE
))
1155 copyToken(name
, token
);
1158 if ( isType (token
, TOKEN_COLON
) )
1161 if ( isKeyword (token
, KEYWORD_function
) )
1163 vString
*const signature
= vStringNew ();
1166 if ( isType (token
, TOKEN_OPEN_PAREN
) )
1168 skipArgumentList(token
, FALSE
, signature
);
1171 if (isType (token
, TOKEN_OPEN_CURLY
))
1174 addToScope (name
, class->string
);
1175 makeJsTag (name
, JSTAG_METHOD
, signature
);
1176 parseBlock (token
, name
);
1179 * Read to the closing curly, check next
1180 * token, if a comma, we must loop again
1185 vStringDelete (signature
);
1189 vString
* saveScope
= vStringNew ();
1190 boolean has_child_methods
= FALSE
;
1192 /* skip whatever is the value */
1193 while (! isType (token
, TOKEN_COMMA
) &&
1194 ! isType (token
, TOKEN_CLOSE_CURLY
) &&
1195 ! isType (token
, TOKEN_EOF
))
1197 if (isType (token
, TOKEN_OPEN_CURLY
))
1199 vStringCopy (saveScope
, token
->scope
);
1200 addToScope (token
, class->string
);
1201 has_child_methods
= parseMethods (token
, name
);
1202 vStringCopy (token
->scope
, saveScope
);
1205 else if (isType (token
, TOKEN_OPEN_PAREN
))
1207 skipArgumentList (token
, FALSE
, NULL
);
1209 else if (isType (token
, TOKEN_OPEN_SQUARE
))
1211 skipArrayList (token
, FALSE
);
1218 vStringDelete (saveScope
);
1221 addToScope (name
, class->string
);
1222 if (has_child_methods
)
1223 makeJsTag (name
, JSTAG_CLASS
, NULL
);
1225 makeJsTag (name
, JSTAG_PROPERTY
, NULL
);
1229 } while ( isType(token
, TOKEN_COMMA
) );
1231 findCmdTerm (token
, FALSE
);
1239 static boolean
parseStatement (tokenInfo
*const token
, tokenInfo
*const parent
, boolean is_inside_class
)
1241 tokenInfo
*const name
= newToken ();
1242 tokenInfo
*const secondary_name
= newToken ();
1243 tokenInfo
*const method_body_token
= newToken ();
1244 vString
* saveScope
= vStringNew ();
1245 boolean is_class
= FALSE
;
1246 boolean is_var
= FALSE
;
1247 boolean is_const
= FALSE
;
1248 boolean is_terminated
= TRUE
;
1249 boolean is_global
= FALSE
;
1250 boolean has_methods
= FALSE
;
1253 vStringClear(saveScope
);
1255 * Functions can be named or unnamed.
1256 * This deals with these formats:
1258 * validFunctionOne = function(a,b) {}
1259 * testlib.validFunctionFive = function(a,b) {}
1260 * var innerThree = function(a,b) {}
1261 * var innerFour = (a,b) {}
1262 * var D2 = secondary_fcn_name(a,b) {}
1263 * var D3 = new Function("a", "b", "return a+b;");
1265 * testlib.extras.ValidClassOne = function(a,b) {
1269 * testlib.extras.ValidClassOne.prototype = {
1270 * 'validMethodOne' : function(a,b) {},
1271 * 'validMethodTwo' : function(a,b) {}
1273 * ValidClassTwo = function ()
1275 * this.validMethodThree = function() {}
1277 * this.validMethodFour = () {}
1279 * Database.prototype.validMethodThree = Database_getTodaysDate;
1282 if ( is_inside_class
)
1285 * var can precede an inner function
1287 if ( isKeyword(token
, KEYWORD_var
) ||
1288 isKeyword(token
, KEYWORD_let
) ||
1289 isKeyword(token
, KEYWORD_const
) )
1291 is_const
= isKeyword(token
, KEYWORD_const
);
1293 * Only create variables for global scope
1295 if ( token
->nestLevel
== 0 )
1302 if ( isKeyword(token
, KEYWORD_this
) )
1305 if (isType (token
, TOKEN_PERIOD
))
1311 copyToken(name
, token
);
1313 while (! isType (token
, TOKEN_CLOSE_CURLY
) &&
1314 ! isType (token
, TOKEN_SEMICOLON
) &&
1315 ! isType (token
, TOKEN_EQUAL_SIGN
) &&
1316 ! isType (token
, TOKEN_EOF
))
1318 if (isType (token
, TOKEN_OPEN_CURLY
))
1319 parseBlock (token
, parent
);
1321 /* Potentially the name of the function */
1323 if (isType (token
, TOKEN_PERIOD
))
1326 * Cannot be a global variable is it has dot references in the name
1332 if ( isKeyword(token
, KEYWORD_NONE
) )
1336 addToScope(token
, name
->string
);
1339 addContext (name
, token
);
1343 else if ( isKeyword(token
, KEYWORD_prototype
) )
1346 * When we reach the "prototype" tag, we infer:
1347 * "BindAgent" is a class
1348 * "build" is a method
1350 * function BindAgent( repeatableIdName, newParentIdName ) {
1354 * Specified function name: "build"
1355 * BindAgent.prototype.build = function( mode ) {
1356 * maybe parse nested functions
1361 * ValidClassOne.prototype = {
1362 * 'validMethodOne' : function(a,b) {},
1363 * 'validMethodTwo' : function(a,b) {}
1367 makeClassTag (name
, NULL
);
1371 * There should a ".function_name" next.
1374 if (isType (token
, TOKEN_PERIOD
))
1380 if ( isKeyword(token
, KEYWORD_NONE
) )
1382 vString
*const signature
= vStringNew ();
1384 vStringCopy(saveScope
, token
->scope
);
1385 addToScope(token
, name
->string
);
1387 readToken (method_body_token
);
1388 vStringCopy (method_body_token
->scope
, token
->scope
);
1390 while (! isType (method_body_token
, TOKEN_SEMICOLON
) &&
1391 ! isType (method_body_token
, TOKEN_CLOSE_CURLY
) &&
1392 ! isType (method_body_token
, TOKEN_OPEN_CURLY
) &&
1393 ! isType (method_body_token
, TOKEN_EOF
))
1395 if ( isType (method_body_token
, TOKEN_OPEN_PAREN
) )
1396 skipArgumentList(method_body_token
, FALSE
,
1397 vStringLength (signature
) == 0 ? signature
: NULL
);
1399 readToken (method_body_token
);
1402 makeJsTag (token
, JSTAG_METHOD
, signature
);
1403 vStringDelete (signature
);
1405 if ( isType (method_body_token
, TOKEN_OPEN_CURLY
))
1407 parseBlock (method_body_token
, token
);
1408 is_terminated
= TRUE
;
1411 is_terminated
= isType (method_body_token
, TOKEN_SEMICOLON
);
1415 else if (isType (token
, TOKEN_EQUAL_SIGN
))
1418 if (isType (token
, TOKEN_OPEN_CURLY
))
1423 * Creates tags for each of these class methods
1424 * ValidClassOne.prototype = {
1425 * 'validMethodOne' : function(a,b) {},
1426 * 'validMethodTwo' : function(a,b) {}
1429 parseMethods(token
, name
);
1431 * Find to the end of the statement
1433 findCmdTerm (token
, FALSE
);
1434 token
->ignoreTag
= FALSE
;
1435 is_terminated
= TRUE
;
1442 } while (isType (token
, TOKEN_PERIOD
));
1445 if ( isType (token
, TOKEN_OPEN_PAREN
) )
1446 skipArgumentList(token
, FALSE
, NULL
);
1448 if ( isType (token
, TOKEN_OPEN_SQUARE
) )
1449 skipArrayList(token
, FALSE
);
1452 if ( isType (token, TOKEN_OPEN_CURLY) )
1454 is_class = parseBlock (token, name);
1459 if ( isType (token
, TOKEN_CLOSE_CURLY
) )
1462 * Reaching this section without having
1463 * processed an open curly brace indicates
1464 * the statement is most likely not terminated.
1466 is_terminated
= FALSE
;
1470 if ( isType (token
, TOKEN_SEMICOLON
) )
1473 * Only create variables for global scope
1475 if ( token
->nestLevel
== 0 && is_global
)
1478 * Handles this syntax:
1481 if (isType (token
, TOKEN_SEMICOLON
))
1482 makeJsTag (name
, is_const
? JSTAG_CONSTANT
: JSTAG_VARIABLE
, NULL
);
1485 * Statement has ended.
1486 * This deals with calls to functions, like:
1492 if ( isType (token
, TOKEN_EQUAL_SIGN
) )
1498 /* rvalue might be surrounded with parentheses */
1499 while (isType (token
, TOKEN_OPEN_PAREN
))
1505 if ( isKeyword (token
, KEYWORD_function
) )
1507 vString
*const signature
= vStringNew ();
1511 if ( isKeyword (token
, KEYWORD_NONE
) &&
1512 ! isType (token
, TOKEN_OPEN_PAREN
) )
1515 * Functions of this format:
1516 * var D2A = function theAdd(a, b)
1520 * Are really two separate defined functions and
1521 * can be referenced in two ways:
1522 * alert( D2A(1,2) ); // produces 3
1523 * alert( theAdd(1,2) ); // also produces 3
1524 * So it must have two tags:
1527 * Save the reference to the name for later use, once
1528 * we have established this is a valid function we will
1529 * create the secondary reference to it.
1531 copyToken(secondary_name
, token
);
1535 if ( isType (token
, TOKEN_OPEN_PAREN
) )
1536 skipArgumentList(token
, FALSE
, signature
);
1538 if (isType (token
, TOKEN_OPEN_CURLY
))
1541 * This will be either a function or a class.
1542 * We can only determine this by checking the body
1543 * of the function. If we find a "this." we know
1544 * it is a class, otherwise it is a function.
1546 if ( is_inside_class
)
1548 makeJsTag (name
, JSTAG_METHOD
, signature
);
1549 if ( vStringLength(secondary_name
->string
) > 0 )
1550 makeFunctionTag (secondary_name
, signature
);
1551 parseBlock (token
, name
);
1555 is_class
= parseBlock (token
, name
);
1557 makeClassTag (name
, signature
);
1559 makeFunctionTag (name
, signature
);
1561 if ( vStringLength(secondary_name
->string
) > 0 )
1562 makeFunctionTag (secondary_name
, signature
);
1566 vStringDelete (signature
);
1568 else if (isType (token
, TOKEN_OPEN_CURLY
))
1571 * Creates tags for each of these class methods
1572 * ValidClassOne.prototype = {
1573 * 'validMethodOne' : function(a,b) {},
1574 * 'validMethodTwo' : function(a,b) {}
1576 * Or checks if this is a hash variable.
1579 has_methods
= parseMethods(token
, name
);
1581 makeJsTag (name
, JSTAG_CLASS
, NULL
);
1585 * Only create variables for global scope
1587 if ( token
->nestLevel
== 0 && is_global
)
1590 * A pointer can be created to the function.
1591 * If we recognize the function/class name ignore the variable.
1592 * This format looks identical to a variable definition.
1593 * A variable defined outside of a block is considered
1594 * a global variable:
1597 * This is not a global variable:
1598 * var g_var = function;
1599 * This is a global variable:
1600 * var g_var = different_var_name;
1602 fulltag
= vStringNew ();
1603 if (vStringLength (token
->scope
) > 0)
1605 vStringCopy(fulltag
, token
->scope
);
1606 vStringCatS (fulltag
, ".");
1607 vStringCatS (fulltag
, vStringValue(token
->string
));
1611 vStringCopy(fulltag
, token
->string
);
1613 vStringTerminate(fulltag
);
1614 if ( ! stringListHas(FunctionNames
, vStringValue (fulltag
)) &&
1615 ! stringListHas(ClassNames
, vStringValue (fulltag
)) )
1617 makeJsTag (name
, is_const
? JSTAG_CONSTANT
: JSTAG_VARIABLE
, NULL
);
1619 vStringDelete (fulltag
);
1622 if (isType (token
, TOKEN_CLOSE_CURLY
))
1625 * Assume the closing parantheses terminates
1628 is_terminated
= TRUE
;
1631 else if (isKeyword (token
, KEYWORD_new
))
1634 is_var
= isType (token
, TOKEN_IDENTIFIER
);
1635 if ( isKeyword (token
, KEYWORD_function
) ||
1636 isKeyword (token
, KEYWORD_capital_function
) ||
1637 isKeyword (token
, KEYWORD_capital_object
) ||
1640 if ( isKeyword (token
, KEYWORD_capital_object
) )
1644 if ( isType (token
, TOKEN_OPEN_PAREN
) )
1645 skipArgumentList(token
, TRUE
, NULL
);
1647 if (isType (token
, TOKEN_SEMICOLON
))
1649 if ( token
->nestLevel
== 0 )
1653 makeJsTag (name
, is_const
? JSTAG_CONSTANT
: JSTAG_VARIABLE
, NULL
);
1659 makeClassTag (name
, NULL
);
1661 /* FIXME: we cannot really get a meaningful
1662 * signature from a `new Function()` call,
1663 * so for now just don't set any */
1664 makeFunctionTag (name
, NULL
);
1669 else if (isType (token
, TOKEN_CLOSE_CURLY
))
1670 is_terminated
= FALSE
;
1673 else if (isKeyword (token
, KEYWORD_NONE
))
1676 * Only create variables for global scope
1678 if ( token
->nestLevel
== 0 && is_global
)
1681 * A pointer can be created to the function.
1682 * If we recognize the function/class name ignore the variable.
1683 * This format looks identical to a variable definition.
1684 * A variable defined outside of a block is considered
1685 * a global variable:
1688 * This is not a global variable:
1689 * var g_var = function;
1690 * This is a global variable:
1691 * var g_var = different_var_name;
1693 fulltag
= vStringNew ();
1694 if (vStringLength (token
->scope
) > 0)
1696 vStringCopy(fulltag
, token
->scope
);
1697 vStringCatS (fulltag
, ".");
1698 vStringCatS (fulltag
, vStringValue(token
->string
));
1702 vStringCopy(fulltag
, token
->string
);
1704 vStringTerminate(fulltag
);
1705 if ( ! stringListHas(FunctionNames
, vStringValue (fulltag
)) &&
1706 ! stringListHas(ClassNames
, vStringValue (fulltag
)) )
1708 makeJsTag (name
, is_const
? JSTAG_CONSTANT
: JSTAG_VARIABLE
, NULL
);
1710 vStringDelete (fulltag
);
1716 while (parenDepth
> 0 && ! isType (token
, TOKEN_EOF
))
1718 if (isType (token
, TOKEN_OPEN_PAREN
))
1720 else if (isType (token
, TOKEN_CLOSE_PAREN
))
1722 readTokenFull (token
, TRUE
, NULL
);
1724 if (isType (token
, TOKEN_CLOSE_CURLY
))
1725 is_terminated
= FALSE
;
1729 /* if we aren't already at the cmd end, advance to it and check whether
1730 * the statement was terminated */
1731 if (! isType (token
, TOKEN_CLOSE_CURLY
) &&
1732 ! isType (token
, TOKEN_SEMICOLON
))
1735 * Statements can be optionally terminated in the case of
1736 * statement prior to a close curly brace as in the
1737 * document.write line below:
1739 * function checkForUpdate() {
1741 * document.write("hello from checkForUpdate<br>")
1746 is_terminated
= findCmdTerm (token
, TRUE
);
1750 vStringCopy(token
->scope
, saveScope
);
1752 deleteToken (secondary_name
);
1753 deleteToken (method_body_token
);
1754 vStringDelete(saveScope
);
1756 return is_terminated
;
1759 static void parseUI5 (tokenInfo
*const token
)
1761 tokenInfo
*const name
= newToken ();
1763 * SAPUI5 is built on top of jQuery.
1764 * It follows a standard format:
1765 * sap.ui.controller("id.of.controller", {
1766 * method_name : function... {
1769 * method_name : function ... {
1773 * Handle the parsing of the initial controller (and the
1774 * same for "view") and then allow the methods to be
1780 if (isType (token
, TOKEN_PERIOD
))
1783 while (! isType (token
, TOKEN_OPEN_PAREN
) &&
1784 ! isType (token
, TOKEN_EOF
))
1790 if (isType (token
, TOKEN_STRING
))
1792 copyToken(name
, token
);
1796 if (isType (token
, TOKEN_COMMA
))
1801 parseMethods (token
, name
);
1802 } while (! isType (token
, TOKEN_CLOSE_CURLY
) &&
1803 ! isType (token
, TOKEN_EOF
));
1809 static boolean
parseLine (tokenInfo
*const token
, tokenInfo
*const parent
, boolean is_inside_class
)
1811 boolean is_terminated
= TRUE
;
1813 * Detect the common statements, if, while, for, do, ...
1814 * This is necessary since the last statement within a block "{}"
1815 * can be optionally terminated.
1817 * If the statement is not terminated, we need to tell
1818 * the calling routine to prevent reading an additional token
1819 * looking for the end of the statement.
1822 if (isType(token
, TOKEN_KEYWORD
))
1824 switch (token
->keyword
)
1829 is_terminated
= parseLoop (token
, parent
);
1835 case KEYWORD_finally
:
1836 /* Common semantics */
1837 is_terminated
= parseIf (token
, parent
);
1839 case KEYWORD_switch
:
1840 parseSwitch (token
);
1842 case KEYWORD_return
:
1843 is_terminated
= findCmdTerm (token
, TRUE
);
1846 is_terminated
= parseStatement (token
, parent
, is_inside_class
);
1853 * Special case where single line statements may not be
1854 * SEMICOLON terminated. parseBlock needs to know this
1855 * so that it does not read the next token.
1857 is_terminated
= parseStatement (token
, parent
, is_inside_class
);
1859 return is_terminated
;
1862 static void parseJsFile (tokenInfo
*const token
)
1868 if (isType (token
, TOKEN_KEYWORD
) && token
->keyword
== KEYWORD_function
)
1869 parseFunction (token
);
1870 else if (isType (token
, TOKEN_KEYWORD
) && token
->keyword
== KEYWORD_sap
)
1873 parseLine (token
, token
, FALSE
);
1874 } while (! isType (token
, TOKEN_EOF
));
1877 static void initialize (const langType language
)
1879 Assert (ARRAY_SIZE (JsKinds
) == JSTAG_COUNT
);
1883 static void findJsTags (void)
1885 tokenInfo
*const token
= newToken ();
1887 ClassNames
= stringListNew ();
1888 FunctionNames
= stringListNew ();
1889 LastTokenType
= TOKEN_UNDEFINED
;
1891 parseJsFile (token
);
1893 stringListDelete (ClassNames
);
1894 stringListDelete (FunctionNames
);
1896 FunctionNames
= NULL
;
1897 deleteToken (token
);
1900 /* Create parser definition structure */
1901 extern parserDefinition
* JavaScriptParser (void)
1903 static const char *const extensions
[] = { "js", NULL
};
1904 parserDefinition
*const def
= parserNew ("JavaScript");
1905 def
->extensions
= extensions
;
1907 * New definitions for parsing instead of regex
1909 def
->kinds
= JsKinds
;
1910 def
->kindCount
= ARRAY_SIZE (JsKinds
);
1911 def
->parser
= findJsTags
;
1912 def
->initialize
= initialize
;
1913 def
->keywordTable
= JsKeywordTable
;
1914 def
->keywordCount
= ARRAY_SIZE (JsKeywordTable
);
1918 /* vi:set tabstop=4 shiftwidth=4 noexpandtab: */