2 * Copyright (c) 2003, Darren Hiebert
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License.
7 * This module contains functions for generating tags for JavaScript language
10 * This is a good reference for different forms of the function statement:
11 * http://www.permadi.com/tutorial/jsFunc/
12 * Another good reference:
13 * http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide
19 #include "general.h" /* must always come first */
20 #include <ctype.h> /* to define isalpha () */
36 #define isType(token,t) (boolean) ((token)->type == (t))
37 #define isKeyword(token,k) (boolean) ((token)->keyword == (k))
44 * Tracks class and function names already created
46 static stringList
*ClassNames
;
47 static stringList
*FunctionNames
;
49 /* Used to specify type of keyword.
51 typedef enum eKeywordId
{
54 KEYWORD_capital_function
,
55 KEYWORD_capital_object
,
75 /* Used to determine whether keyword is valid for the token language and
78 typedef struct sKeywordDesc
{
83 typedef enum eTokenType
{
104 TOKEN_POSTFIX_OPERATOR
,
105 TOKEN_BINARY_OPERATOR
108 typedef struct sTokenInfo
{
113 unsigned long lineNumber
;
123 static tokenType LastTokenType
;
125 static langType Lang_js
;
137 static kindOption JsKinds
[] = {
138 { TRUE
, 'f', "function", "functions" },
139 { TRUE
, 'c', "class", "classes" },
140 { TRUE
, 'm', "method", "methods" },
141 { TRUE
, 'p', "property", "properties" },
142 { TRUE
, 'C', "constant", "constants" },
143 { TRUE
, 'v', "variable", "global variables" }
146 static const keywordDesc JsKeywordTable
[] = {
147 /* keyword keyword ID */
148 { "function", KEYWORD_function
},
149 { "Function", KEYWORD_capital_function
},
150 { "Object", KEYWORD_capital_object
},
151 { "prototype", KEYWORD_prototype
},
152 { "var", KEYWORD_var
},
153 { "let", KEYWORD_let
},
154 { "const", KEYWORD_const
},
155 { "new", KEYWORD_new
},
156 { "this", KEYWORD_this
},
157 { "for", KEYWORD_for
},
158 { "while", KEYWORD_while
},
159 { "do", KEYWORD_do
},
160 { "if", KEYWORD_if
},
161 { "else", KEYWORD_else
},
162 { "switch", KEYWORD_switch
},
163 { "try", KEYWORD_try
},
164 { "catch", KEYWORD_catch
},
165 { "finally", KEYWORD_finally
},
166 { "sap", KEYWORD_sap
},
167 { "return", KEYWORD_return
}
171 * FUNCTION DEFINITIONS
174 /* Recursive functions */
175 static void parseFunction (tokenInfo
*const token
);
176 static boolean
parseBlock (tokenInfo
*const token
, tokenInfo
*const orig_parent
);
177 static boolean
parseLine (tokenInfo
*const token
, tokenInfo
*const parent
, boolean is_inside_class
);
178 static void parseUI5 (tokenInfo
*const token
);
180 static boolean
isIdentChar (const int c
)
183 (isalpha (c
) || isdigit (c
) || c
== '$' ||
184 c
== '@' || c
== '_' || c
== '#');
187 static void buildJsKeywordHash (void)
189 const size_t count
= sizeof (JsKeywordTable
) /
190 sizeof (JsKeywordTable
[0]);
192 for (i
= 0 ; i
< count
; ++i
)
194 const keywordDesc
* const p
= &JsKeywordTable
[i
];
195 addKeyword (p
->name
, Lang_js
, (int) p
->id
);
199 static tokenInfo
*newToken (void)
201 tokenInfo
*const token
= xMalloc (1, tokenInfo
);
203 token
->type
= TOKEN_UNDEFINED
;
204 token
->keyword
= KEYWORD_NONE
;
205 token
->string
= vStringNew ();
206 token
->scope
= vStringNew ();
207 token
->nestLevel
= 0;
208 token
->ignoreTag
= FALSE
;
209 token
->lineNumber
= getSourceLineNumber ();
210 token
->filePosition
= getInputFilePosition ();
215 static void deleteToken (tokenInfo
*const token
)
217 vStringDelete (token
->string
);
218 vStringDelete (token
->scope
);
223 * Tag generation functions
226 static void makeJsTag (tokenInfo
*const token
, const jsKind kind
, vString
*const signature
)
228 if (JsKinds
[kind
].enabled
&& ! token
->ignoreTag
)
230 const char *name
= vStringValue (token
->string
);
231 vString
*fullscope
= vStringNewCopy (token
->scope
);
235 if ((p
= strrchr (name
, '.')) != NULL
)
237 if (vStringLength (fullscope
) > 0)
238 vStringPut (fullscope
, '.');
239 vStringNCatS (fullscope
, name
, p
- name
);
243 initTagEntry (&e
, name
);
245 e
.lineNumber
= token
->lineNumber
;
246 e
.filePosition
= token
->filePosition
;
247 e
.kindName
= JsKinds
[kind
].name
;
248 e
.kind
= JsKinds
[kind
].letter
;
250 if ( vStringLength(fullscope
) > 0 )
252 jsKind parent_kind
= JSTAG_CLASS
;
254 /* if we're creating a function (and not a method),
255 * guess we're inside another function */
256 if (kind
== JSTAG_FUNCTION
)
257 parent_kind
= JSTAG_FUNCTION
;
259 e
.extensionFields
.scope
[0] = JsKinds
[parent_kind
].name
;
260 e
.extensionFields
.scope
[1] = vStringValue (fullscope
);
263 if (signature
&& vStringLength(signature
))
266 /* sanitize signature by replacing all control characters with a
267 * space (because it's simple).
268 * there should never be any junk in a valid signature, but who
269 * knows what the user wrote and CTags doesn't cope well with weird
271 for (i
= 0; i
< signature
->length
; i
++)
273 unsigned char c
= (unsigned char) signature
->buffer
[i
];
274 if (c
< 0x20 /* below space */ || c
== 0x7F /* DEL */)
275 signature
->buffer
[i
] = ' ';
277 e
.extensionFields
.arglist
= vStringValue(signature
);
281 vStringDelete (fullscope
);
285 static void makeClassTag (tokenInfo
*const token
, vString
*const signature
)
289 if ( ! token
->ignoreTag
)
291 fulltag
= vStringNew ();
292 if (vStringLength (token
->scope
) > 0)
294 vStringCopy(fulltag
, token
->scope
);
295 vStringCatS (fulltag
, ".");
296 vStringCatS (fulltag
, vStringValue(token
->string
));
300 vStringCopy(fulltag
, token
->string
);
302 vStringTerminate(fulltag
);
303 if ( ! stringListHas(ClassNames
, vStringValue (fulltag
)) )
305 stringListAdd (ClassNames
, vStringNewCopy (fulltag
));
306 makeJsTag (token
, JSTAG_CLASS
, signature
);
308 vStringDelete (fulltag
);
312 static void makeFunctionTag (tokenInfo
*const token
, vString
*const signature
)
316 if ( ! token
->ignoreTag
)
318 fulltag
= vStringNew ();
319 if (vStringLength (token
->scope
) > 0)
321 vStringCopy(fulltag
, token
->scope
);
322 vStringCatS (fulltag
, ".");
323 vStringCatS (fulltag
, vStringValue(token
->string
));
327 vStringCopy(fulltag
, token
->string
);
329 vStringTerminate(fulltag
);
330 if ( ! stringListHas(FunctionNames
, vStringValue (fulltag
)) )
332 stringListAdd (FunctionNames
, vStringNewCopy (fulltag
));
333 makeJsTag (token
, JSTAG_FUNCTION
, signature
);
335 vStringDelete (fulltag
);
343 static int skipToCharacter (const int c
)
349 } while (d
!= EOF
&& d
!= c
);
353 static void parseString (vString
*const string
, const int delimiter
)
363 /* Eat the escape sequence (\", \', etc). We properly handle
364 * <LineContinuation> by eating a whole \<CR><LF> not to see <LF>
365 * as an unescaped character, which is invalid and handled below.
366 * Also, handle the fact that <LineContinuation> produces an empty
368 * See ECMA-262 7.8.4 */
370 if (c
!= '\r' && c
!= '\n')
371 vStringPut(string
, c
);
379 else if (c
== delimiter
)
381 else if (c
== '\r' || c
== '\n')
383 /* those are invalid when not escaped */
385 /* we don't want to eat the newline itself to let the automatic
386 * semicolon insertion code kick in */
390 vStringPut (string
, c
);
392 vStringTerminate (string
);
395 static void parseRegExp (void)
398 boolean in_range
= FALSE
;
403 if (! in_range
&& c
== '/')
408 } while (isalpha (c
));
413 c
= fileGetc (); /* skip next character */
421 /* Read a C identifier beginning with "firstChar" and places it into
424 static void parseIdentifier (vString
*const string
, const int firstChar
)
427 Assert (isIdentChar (c
));
430 vStringPut (string
, c
);
432 } while (isIdentChar (c
));
433 vStringTerminate (string
);
434 fileUngetc (c
); /* unget non-identifier character */
437 static keywordId
analyzeToken (vString
*const name
)
439 vString
*keyword
= vStringNew ();
441 vStringCopyToLower (keyword
, name
);
442 result
= (keywordId
) lookupKeyword (vStringValue (keyword
), Lang_js
);
443 vStringDelete (keyword
);
447 static void readTokenFull (tokenInfo
*const token
, boolean include_newlines
, vString
*const repr
)
452 token
->type
= TOKEN_UNDEFINED
;
453 token
->keyword
= KEYWORD_NONE
;
454 vStringClear (token
->string
);
463 while (c
== '\t' || c
== ' ' ||
464 ((c
== '\r' || c
== '\n') && ! include_newlines
));
466 token
->lineNumber
= getSourceLineNumber ();
467 token
->filePosition
= getInputFilePosition ();
472 vStringPut (repr
, ' ');
473 vStringPut (repr
, c
);
478 case EOF
: token
->type
= TOKEN_EOF
; break;
479 case '(': token
->type
= TOKEN_OPEN_PAREN
; break;
480 case ')': token
->type
= TOKEN_CLOSE_PAREN
; break;
481 case ';': token
->type
= TOKEN_SEMICOLON
; break;
482 case ',': token
->type
= TOKEN_COMMA
; break;
483 case '.': token
->type
= TOKEN_PERIOD
; break;
484 case ':': token
->type
= TOKEN_COLON
; break;
485 case '{': token
->type
= TOKEN_OPEN_CURLY
; break;
486 case '}': token
->type
= TOKEN_CLOSE_CURLY
; break;
487 case '=': token
->type
= TOKEN_EQUAL_SIGN
; break;
488 case '[': token
->type
= TOKEN_OPEN_SQUARE
; break;
489 case ']': token
->type
= TOKEN_CLOSE_SQUARE
; break;
495 if (d
== c
) /* ++ or -- */
496 token
->type
= TOKEN_POSTFIX_OPERATOR
;
500 token
->type
= TOKEN_BINARY_OPERATOR
;
513 token
->type
= TOKEN_BINARY_OPERATOR
;
518 /* This isn't strictly correct per the standard, but following the
519 * real rules means understanding all statements, and that's not
520 * what the parser currently does. What we do here is a guess, by
521 * avoiding inserting semicolons that would make the statement on
522 * the left invalid. Hopefully this should not have false negatives
523 * (e.g. should not miss insertion of a semicolon) but might have
524 * false positives (e.g. it will wrongfully emit a semicolon for the
525 * newline in "foo\n+bar").
526 * This should however be mostly harmless as we only deal with
527 * newlines in specific situations where we know a false positive
528 * wouldn't hurt too bad. */
529 switch (LastTokenType
)
531 /* these cannot be the end of a statement, so hold the newline */
532 case TOKEN_EQUAL_SIGN
:
535 case TOKEN_FORWARD_SLASH
:
536 case TOKEN_BINARY_OPERATOR
:
537 /* and these already end one, no need to duplicate it */
538 case TOKEN_SEMICOLON
:
540 case TOKEN_CLOSE_CURLY
:
541 case TOKEN_OPEN_CURLY
:
542 include_newlines
= FALSE
; /* no need to recheck */
546 token
->type
= TOKEN_SEMICOLON
;
552 token
->type
= TOKEN_STRING
;
553 parseString (token
->string
, c
);
554 token
->lineNumber
= getSourceLineNumber ();
555 token
->filePosition
= getInputFilePosition ();
558 vStringCat (repr
, token
->string
);
559 vStringPut (repr
, c
);
565 if (c
!= '\\' && c
!= '"' && !isspace (c
))
567 token
->type
= TOKEN_CHARACTER
;
568 token
->lineNumber
= getSourceLineNumber ();
569 token
->filePosition
= getInputFilePosition ();
575 if ( (d
!= '*') && /* is this the start of a comment? */
576 (d
!= '/') ) /* is a one line comment? */
579 switch (LastTokenType
)
581 case TOKEN_CHARACTER
:
582 case TOKEN_IDENTIFIER
:
584 case TOKEN_CLOSE_CURLY
:
585 case TOKEN_CLOSE_PAREN
:
586 case TOKEN_CLOSE_SQUARE
:
587 token
->type
= TOKEN_FORWARD_SLASH
;
591 token
->type
= TOKEN_REGEXP
;
593 token
->lineNumber
= getSourceLineNumber ();
594 token
->filePosition
= getInputFilePosition ();
600 if (repr
) /* remove the / we added */
601 repr
->buffer
[--repr
->length
] = 0;
606 skipToCharacter ('*');
612 } while (c
!= EOF
&& c
!= '\0');
615 else if (d
== '/') /* is this the start of a comment? */
617 skipToCharacter ('\n');
618 /* if we care about newlines, put it back so it is seen */
619 if (include_newlines
)
628 /* skip shebang in case of e.g. Node.js scripts */
629 if (token
->lineNumber
> 1)
630 token
->type
= TOKEN_UNDEFINED
;
631 else if ((c
= fileGetc ()) != '!')
634 token
->type
= TOKEN_UNDEFINED
;
638 skipToCharacter ('\n');
644 if (! isIdentChar (c
))
645 token
->type
= TOKEN_UNDEFINED
;
648 parseIdentifier (token
->string
, c
);
649 token
->lineNumber
= getSourceLineNumber ();
650 token
->filePosition
= getInputFilePosition ();
651 token
->keyword
= analyzeToken (token
->string
);
652 if (isKeyword (token
, KEYWORD_NONE
))
653 token
->type
= TOKEN_IDENTIFIER
;
655 token
->type
= TOKEN_KEYWORD
;
656 if (repr
&& vStringLength (token
->string
) > 1)
657 vStringCatS (repr
, vStringValue (token
->string
) + 1);
662 LastTokenType
= token
->type
;
665 static void readToken (tokenInfo
*const token
)
667 readTokenFull (token
, FALSE
, NULL
);
670 static void copyToken (tokenInfo
*const dest
, tokenInfo
*const src
)
672 dest
->nestLevel
= src
->nestLevel
;
673 dest
->lineNumber
= src
->lineNumber
;
674 dest
->filePosition
= src
->filePosition
;
675 dest
->type
= src
->type
;
676 dest
->keyword
= src
->keyword
;
677 vStringCopy(dest
->string
, src
->string
);
678 vStringCopy(dest
->scope
, src
->scope
);
682 * Token parsing functions
685 static void skipArgumentList (tokenInfo
*const token
, boolean include_newlines
, vString
*const repr
)
689 if (isType (token
, TOKEN_OPEN_PAREN
)) /* arguments? */
693 vStringPut (repr
, '(');
694 while (nest_level
> 0 && ! isType (token
, TOKEN_EOF
))
696 readTokenFull (token
, FALSE
, repr
);
697 if (isType (token
, TOKEN_OPEN_PAREN
))
699 else if (isType (token
, TOKEN_CLOSE_PAREN
))
702 readTokenFull (token
, include_newlines
, NULL
);
706 static void skipArrayList (tokenInfo
*const token
, boolean include_newlines
)
711 * Handle square brackets
713 * So we must check for nested open and closing square brackets
716 if (isType (token
, TOKEN_OPEN_SQUARE
)) /* arguments? */
719 while (nest_level
> 0 && ! isType (token
, TOKEN_EOF
))
722 if (isType (token
, TOKEN_OPEN_SQUARE
))
724 else if (isType (token
, TOKEN_CLOSE_SQUARE
))
727 readTokenFull (token
, include_newlines
, NULL
);
731 static void addContext (tokenInfo
* const parent
, const tokenInfo
* const child
)
733 if (vStringLength (parent
->string
) > 0)
735 vStringCatS (parent
->string
, ".");
737 vStringCatS (parent
->string
, vStringValue(child
->string
));
738 vStringTerminate(parent
->string
);
741 static void addToScope (tokenInfo
* const token
, vString
* const extra
)
743 if (vStringLength (token
->scope
) > 0)
745 vStringCatS (token
->scope
, ".");
747 vStringCatS (token
->scope
, vStringValue(extra
));
748 vStringTerminate(token
->scope
);
755 static boolean
findCmdTerm (tokenInfo
*const token
, boolean include_newlines
)
758 * Read until we find either a semicolon or closing brace.
759 * Any nested braces will be handled within.
761 while (! isType (token
, TOKEN_SEMICOLON
) &&
762 ! isType (token
, TOKEN_CLOSE_CURLY
) &&
763 ! isType (token
, TOKEN_EOF
))
765 /* Handle nested blocks */
766 if ( isType (token
, TOKEN_OPEN_CURLY
))
768 parseBlock (token
, token
);
769 readTokenFull (token
, include_newlines
, NULL
);
771 else if ( isType (token
, TOKEN_OPEN_PAREN
) )
773 skipArgumentList(token
, include_newlines
, NULL
);
775 else if ( isType (token
, TOKEN_OPEN_SQUARE
) )
777 skipArrayList(token
, include_newlines
);
781 readTokenFull (token
, include_newlines
, NULL
);
785 return isType (token
, TOKEN_SEMICOLON
);
788 static void parseSwitch (tokenInfo
*const token
)
791 * switch (expression) {
798 * default : statement;
804 if (isType (token
, TOKEN_OPEN_PAREN
))
807 * Handle nameless functions, these will only
808 * be considered methods.
810 skipArgumentList(token
, FALSE
, NULL
);
813 if (isType (token
, TOKEN_OPEN_CURLY
))
815 parseBlock (token
, token
);
819 static boolean
parseLoop (tokenInfo
*const token
, tokenInfo
*const parent
)
822 * Handles these statements
823 * for (x=0; x<3; x++)
824 * document.write("This text is repeated three times<br>");
826 * for (x=0; x<3; x++)
828 * document.write("This text is repeated three times<br>");
832 * document.write(number+"<br>");
837 * document.write(number+"<br>");
842 boolean is_terminated
= TRUE
;
844 if (isKeyword (token
, KEYWORD_for
) || isKeyword (token
, KEYWORD_while
))
848 if (isType (token
, TOKEN_OPEN_PAREN
))
851 * Handle nameless functions, these will only
852 * be considered methods.
854 skipArgumentList(token
, FALSE
, NULL
);
857 if (isType (token
, TOKEN_OPEN_CURLY
))
860 * This will be either a function or a class.
861 * We can only determine this by checking the body
862 * of the function. If we find a "this." we know
863 * it is a class, otherwise it is a function.
865 parseBlock (token
, parent
);
869 is_terminated
= parseLine(token
, parent
, FALSE
);
872 else if (isKeyword (token
, KEYWORD_do
))
876 if (isType (token
, TOKEN_OPEN_CURLY
))
879 * This will be either a function or a class.
880 * We can only determine this by checking the body
881 * of the function. If we find a "this." we know
882 * it is a class, otherwise it is a function.
884 parseBlock (token
, parent
);
888 is_terminated
= parseLine(token
, parent
, FALSE
);
894 if (isKeyword (token
, KEYWORD_while
))
898 if (isType (token
, TOKEN_OPEN_PAREN
))
901 * Handle nameless functions, these will only
902 * be considered methods.
904 skipArgumentList(token
, TRUE
, NULL
);
906 if (! isType (token
, TOKEN_SEMICOLON
))
907 is_terminated
= FALSE
;
911 return is_terminated
;
914 static boolean
parseIf (tokenInfo
*const token
, tokenInfo
*const parent
)
916 boolean read_next_token
= TRUE
;
918 * If statements have two forms
937 * This example if correctly written, but the
938 * else contains only 1 statement without a terminator
939 * since the function finishes with the closing brace.
948 * TODO: Deal with statements that can optional end
949 * without a semi-colon. Currently this messes up
950 * the parsing of blocks.
951 * Need to somehow detect this has happened, and either
952 * backup a token, or skip reading the next token if
953 * that is possible from all code locations.
959 if (isKeyword (token
, KEYWORD_if
))
962 * Check for an "else if" and consume the "if"
967 if (isType (token
, TOKEN_OPEN_PAREN
))
970 * Handle nameless functions, these will only
971 * be considered methods.
973 skipArgumentList(token
, FALSE
, NULL
);
976 if (isType (token
, TOKEN_OPEN_CURLY
))
979 * This will be either a function or a class.
980 * We can only determine this by checking the body
981 * of the function. If we find a "this." we know
982 * it is a class, otherwise it is a function.
984 parseBlock (token
, parent
);
988 /* The next token should only be read if this statement had its own
990 read_next_token
= findCmdTerm (token
, TRUE
);
992 return read_next_token
;
995 static void parseFunction (tokenInfo
*const token
)
997 tokenInfo
*const name
= newToken ();
998 vString
*const signature
= vStringNew ();
999 boolean is_class
= FALSE
;
1002 * This deals with these formats
1003 * function validFunctionTwo(a,b) {}
1007 /* Add scope in case this is an INNER function */
1008 addToScope(name
, token
->scope
);
1011 while (isType (token
, TOKEN_PERIOD
))
1014 if ( isKeyword(token
, KEYWORD_NONE
) )
1016 addContext (name
, token
);
1021 if ( isType (token
, TOKEN_OPEN_PAREN
) )
1022 skipArgumentList(token
, FALSE
, signature
);
1024 if ( isType (token
, TOKEN_OPEN_CURLY
) )
1026 is_class
= parseBlock (token
, name
);
1028 makeClassTag (name
, signature
);
1030 makeFunctionTag (name
, signature
);
1033 findCmdTerm (token
, FALSE
);
1035 vStringDelete (signature
);
1039 static boolean
parseBlock (tokenInfo
*const token
, tokenInfo
*const orig_parent
)
1041 boolean is_class
= FALSE
;
1042 boolean read_next_token
= TRUE
;
1043 vString
* saveScope
= vStringNew ();
1044 tokenInfo
*const parent
= newToken ();
1046 /* backup the parent token to allow calls like parseBlock(token, token) */
1047 copyToken (parent
, orig_parent
);
1051 * Make this routine a bit more forgiving.
1052 * If called on an open_curly advance it
1054 if ( isType (token
, TOKEN_OPEN_CURLY
) &&
1055 isKeyword(token
, KEYWORD_NONE
) )
1058 if (! isType (token
, TOKEN_CLOSE_CURLY
))
1061 * Read until we find the closing brace,
1062 * any nested braces will be handled within
1066 read_next_token
= TRUE
;
1067 if (isKeyword (token
, KEYWORD_this
))
1070 * Means we are inside a class and have found
1071 * a class, not a function
1074 vStringCopy(saveScope
, token
->scope
);
1075 addToScope (token
, parent
->string
);
1078 * Ignore the remainder of the line
1079 * findCmdTerm(token);
1081 read_next_token
= parseLine (token
, parent
, is_class
);
1083 vStringCopy(token
->scope
, saveScope
);
1085 else if (isKeyword (token
, KEYWORD_var
) ||
1086 isKeyword (token
, KEYWORD_let
) ||
1087 isKeyword (token
, KEYWORD_const
))
1090 * Potentially we have found an inner function.
1091 * Set something to indicate the scope
1093 vStringCopy(saveScope
, token
->scope
);
1094 addToScope (token
, parent
->string
);
1095 read_next_token
= parseLine (token
, parent
, is_class
);
1096 vStringCopy(token
->scope
, saveScope
);
1098 else if (isKeyword (token
, KEYWORD_function
))
1100 vStringCopy(saveScope
, token
->scope
);
1101 addToScope (token
, parent
->string
);
1102 parseFunction (token
);
1103 vStringCopy(token
->scope
, saveScope
);
1105 else if (isType (token
, TOKEN_OPEN_CURLY
))
1107 /* Handle nested blocks */
1108 parseBlock (token
, parent
);
1113 * It is possible for a line to have no terminator
1114 * if the following line is a closing brace.
1115 * parseLine will detect this case and indicate
1116 * whether we should read an additional token.
1118 read_next_token
= parseLine (token
, parent
, is_class
);
1122 * Always read a new token unless we find a statement without
1123 * a ending terminator
1125 if( read_next_token
)
1129 * If we find a statement without a terminator consider the
1130 * block finished, otherwise the stack will be off by one.
1132 } while (! isType (token
, TOKEN_EOF
) &&
1133 ! isType (token
, TOKEN_CLOSE_CURLY
) && read_next_token
);
1136 deleteToken (parent
);
1137 vStringDelete(saveScope
);
1143 static boolean
parseMethods (tokenInfo
*const token
, tokenInfo
*const class)
1145 tokenInfo
*const name
= newToken ();
1146 boolean has_methods
= FALSE
;
1149 * This deals with these formats
1150 * validProperty : 2,
1151 * validMethod : function(a,b) {}
1152 * 'validMethod2' : function(a,b) {}
1153 * container.dirtyTab = {'url': false, 'title':false, 'snapshot':false, '*': false}
1159 if (isType (token
, TOKEN_CLOSE_CURLY
))
1162 * This was most likely a variable declaration of a hash table.
1163 * indicate there were no methods and return.
1165 has_methods
= FALSE
;
1169 if (isType (token
, TOKEN_STRING
) || isKeyword(token
, KEYWORD_NONE
))
1171 copyToken(name
, token
);
1174 if ( isType (token
, TOKEN_COLON
) )
1177 if ( isKeyword (token
, KEYWORD_function
) )
1179 vString
*const signature
= vStringNew ();
1182 if ( isType (token
, TOKEN_OPEN_PAREN
) )
1184 skipArgumentList(token
, FALSE
, signature
);
1187 if (isType (token
, TOKEN_OPEN_CURLY
))
1190 addToScope (name
, class->string
);
1191 makeJsTag (name
, JSTAG_METHOD
, signature
);
1192 parseBlock (token
, name
);
1195 * Read to the closing curly, check next
1196 * token, if a comma, we must loop again
1201 vStringDelete (signature
);
1205 vString
* saveScope
= vStringNew ();
1206 boolean has_child_methods
= FALSE
;
1208 /* skip whatever is the value */
1209 while (! isType (token
, TOKEN_COMMA
) &&
1210 ! isType (token
, TOKEN_CLOSE_CURLY
) &&
1211 ! isType (token
, TOKEN_EOF
))
1213 if (isType (token
, TOKEN_OPEN_CURLY
))
1215 vStringCopy (saveScope
, token
->scope
);
1216 addToScope (token
, class->string
);
1217 has_child_methods
= parseMethods (token
, name
);
1218 vStringCopy (token
->scope
, saveScope
);
1221 else if (isType (token
, TOKEN_OPEN_PAREN
))
1223 skipArgumentList (token
, FALSE
, NULL
);
1225 else if (isType (token
, TOKEN_OPEN_SQUARE
))
1227 skipArrayList (token
, FALSE
);
1234 vStringDelete (saveScope
);
1237 addToScope (name
, class->string
);
1238 if (has_child_methods
)
1239 makeJsTag (name
, JSTAG_CLASS
, NULL
);
1241 makeJsTag (name
, JSTAG_PROPERTY
, NULL
);
1245 } while ( isType(token
, TOKEN_COMMA
) );
1247 findCmdTerm (token
, FALSE
);
1255 static boolean
parseStatement (tokenInfo
*const token
, tokenInfo
*const parent
, boolean is_inside_class
)
1257 tokenInfo
*const name
= newToken ();
1258 tokenInfo
*const secondary_name
= newToken ();
1259 tokenInfo
*const method_body_token
= newToken ();
1260 vString
* saveScope
= vStringNew ();
1261 boolean is_class
= FALSE
;
1262 boolean is_var
= FALSE
;
1263 boolean is_const
= FALSE
;
1264 boolean is_terminated
= TRUE
;
1265 boolean is_global
= FALSE
;
1266 boolean has_methods
= FALSE
;
1269 vStringClear(saveScope
);
1271 * Functions can be named or unnamed.
1272 * This deals with these formats:
1274 * validFunctionOne = function(a,b) {}
1275 * testlib.validFunctionFive = function(a,b) {}
1276 * var innerThree = function(a,b) {}
1277 * var innerFour = (a,b) {}
1278 * var D2 = secondary_fcn_name(a,b) {}
1279 * var D3 = new Function("a", "b", "return a+b;");
1281 * testlib.extras.ValidClassOne = function(a,b) {
1285 * testlib.extras.ValidClassOne.prototype = {
1286 * 'validMethodOne' : function(a,b) {},
1287 * 'validMethodTwo' : function(a,b) {}
1289 * ValidClassTwo = function ()
1291 * this.validMethodThree = function() {}
1293 * this.validMethodFour = () {}
1295 * Database.prototype.validMethodThree = Database_getTodaysDate;
1298 if ( is_inside_class
)
1301 * var can precede an inner function
1303 if ( isKeyword(token
, KEYWORD_var
) ||
1304 isKeyword(token
, KEYWORD_let
) ||
1305 isKeyword(token
, KEYWORD_const
) )
1307 is_const
= isKeyword(token
, KEYWORD_const
);
1309 * Only create variables for global scope
1311 if ( token
->nestLevel
== 0 )
1318 if ( isKeyword(token
, KEYWORD_this
) )
1321 if (isType (token
, TOKEN_PERIOD
))
1327 copyToken(name
, token
);
1329 while (! isType (token
, TOKEN_CLOSE_CURLY
) &&
1330 ! isType (token
, TOKEN_SEMICOLON
) &&
1331 ! isType (token
, TOKEN_EQUAL_SIGN
) &&
1332 ! isType (token
, TOKEN_EOF
))
1334 if (isType (token
, TOKEN_OPEN_CURLY
))
1335 parseBlock (token
, parent
);
1337 /* Potentially the name of the function */
1339 if (isType (token
, TOKEN_PERIOD
))
1342 * Cannot be a global variable is it has dot references in the name
1348 if ( isKeyword(token
, KEYWORD_NONE
) )
1352 addToScope(token
, name
->string
);
1355 addContext (name
, token
);
1359 else if ( isKeyword(token
, KEYWORD_prototype
) )
1362 * When we reach the "prototype" tag, we infer:
1363 * "BindAgent" is a class
1364 * "build" is a method
1366 * function BindAgent( repeatableIdName, newParentIdName ) {
1370 * Specified function name: "build"
1371 * BindAgent.prototype.build = function( mode ) {
1372 * maybe parse nested functions
1377 * ValidClassOne.prototype = {
1378 * 'validMethodOne' : function(a,b) {},
1379 * 'validMethodTwo' : function(a,b) {}
1383 makeClassTag (name
, NULL
);
1387 * There should a ".function_name" next.
1390 if (isType (token
, TOKEN_PERIOD
))
1396 if ( isKeyword(token
, KEYWORD_NONE
) )
1398 vString
*const signature
= vStringNew ();
1400 vStringCopy(saveScope
, token
->scope
);
1401 addToScope(token
, name
->string
);
1403 readToken (method_body_token
);
1404 vStringCopy (method_body_token
->scope
, token
->scope
);
1406 while (! isType (method_body_token
, TOKEN_SEMICOLON
) &&
1407 ! isType (method_body_token
, TOKEN_CLOSE_CURLY
) &&
1408 ! isType (method_body_token
, TOKEN_OPEN_CURLY
) &&
1409 ! isType (method_body_token
, TOKEN_EOF
))
1411 if ( isType (method_body_token
, TOKEN_OPEN_PAREN
) )
1412 skipArgumentList(method_body_token
, FALSE
,
1413 vStringLength (signature
) == 0 ? signature
: NULL
);
1415 readToken (method_body_token
);
1418 makeJsTag (token
, JSTAG_METHOD
, signature
);
1419 vStringDelete (signature
);
1421 if ( isType (method_body_token
, TOKEN_OPEN_CURLY
))
1423 parseBlock (method_body_token
, token
);
1424 is_terminated
= TRUE
;
1427 is_terminated
= isType (method_body_token
, TOKEN_SEMICOLON
);
1431 else if (isType (token
, TOKEN_EQUAL_SIGN
))
1434 if (isType (token
, TOKEN_OPEN_CURLY
))
1439 * Creates tags for each of these class methods
1440 * ValidClassOne.prototype = {
1441 * 'validMethodOne' : function(a,b) {},
1442 * 'validMethodTwo' : function(a,b) {}
1445 parseMethods(token
, name
);
1447 * Find to the end of the statement
1449 findCmdTerm (token
, FALSE
);
1450 token
->ignoreTag
= FALSE
;
1451 is_terminated
= TRUE
;
1458 } while (isType (token
, TOKEN_PERIOD
));
1461 if ( isType (token
, TOKEN_OPEN_PAREN
) )
1462 skipArgumentList(token
, FALSE
, NULL
);
1464 if ( isType (token
, TOKEN_OPEN_SQUARE
) )
1465 skipArrayList(token
, FALSE
);
1468 if ( isType (token, TOKEN_OPEN_CURLY) )
1470 is_class = parseBlock (token, name);
1475 if ( isType (token
, TOKEN_CLOSE_CURLY
) )
1478 * Reaching this section without having
1479 * processed an open curly brace indicates
1480 * the statement is most likely not terminated.
1482 is_terminated
= FALSE
;
1486 if ( isType (token
, TOKEN_SEMICOLON
) )
1489 * Only create variables for global scope
1491 if ( token
->nestLevel
== 0 && is_global
)
1494 * Handles this syntax:
1497 if (isType (token
, TOKEN_SEMICOLON
))
1498 makeJsTag (name
, is_const
? JSTAG_CONSTANT
: JSTAG_VARIABLE
, NULL
);
1501 * Statement has ended.
1502 * This deals with calls to functions, like:
1508 if ( isType (token
, TOKEN_EQUAL_SIGN
) )
1514 /* rvalue might be surrounded with parentheses */
1515 while (isType (token
, TOKEN_OPEN_PAREN
))
1521 if ( isKeyword (token
, KEYWORD_function
) )
1523 vString
*const signature
= vStringNew ();
1527 if ( isKeyword (token
, KEYWORD_NONE
) &&
1528 ! isType (token
, TOKEN_OPEN_PAREN
) )
1531 * Functions of this format:
1532 * var D2A = function theAdd(a, b)
1536 * Are really two separate defined functions and
1537 * can be referenced in two ways:
1538 * alert( D2A(1,2) ); // produces 3
1539 * alert( theAdd(1,2) ); // also produces 3
1540 * So it must have two tags:
1543 * Save the reference to the name for later use, once
1544 * we have established this is a valid function we will
1545 * create the secondary reference to it.
1547 copyToken(secondary_name
, token
);
1551 if ( isType (token
, TOKEN_OPEN_PAREN
) )
1552 skipArgumentList(token
, FALSE
, signature
);
1554 if (isType (token
, TOKEN_OPEN_CURLY
))
1557 * This will be either a function or a class.
1558 * We can only determine this by checking the body
1559 * of the function. If we find a "this." we know
1560 * it is a class, otherwise it is a function.
1562 if ( is_inside_class
)
1564 makeJsTag (name
, JSTAG_METHOD
, signature
);
1565 if ( vStringLength(secondary_name
->string
) > 0 )
1566 makeFunctionTag (secondary_name
, signature
);
1567 parseBlock (token
, name
);
1571 is_class
= parseBlock (token
, name
);
1573 makeClassTag (name
, signature
);
1575 makeFunctionTag (name
, signature
);
1577 if ( vStringLength(secondary_name
->string
) > 0 )
1578 makeFunctionTag (secondary_name
, signature
);
1582 vStringDelete (signature
);
1584 else if (isType (token
, TOKEN_OPEN_CURLY
))
1587 * Creates tags for each of these class methods
1588 * ValidClassOne.prototype = {
1589 * 'validMethodOne' : function(a,b) {},
1590 * 'validMethodTwo' : function(a,b) {}
1592 * Or checks if this is a hash variable.
1595 has_methods
= parseMethods(token
, name
);
1597 makeJsTag (name
, JSTAG_CLASS
, NULL
);
1601 * Only create variables for global scope
1603 if ( token
->nestLevel
== 0 && is_global
)
1606 * A pointer can be created to the function.
1607 * If we recognize the function/class name ignore the variable.
1608 * This format looks identical to a variable definition.
1609 * A variable defined outside of a block is considered
1610 * a global variable:
1613 * This is not a global variable:
1614 * var g_var = function;
1615 * This is a global variable:
1616 * var g_var = different_var_name;
1618 fulltag
= vStringNew ();
1619 if (vStringLength (token
->scope
) > 0)
1621 vStringCopy(fulltag
, token
->scope
);
1622 vStringCatS (fulltag
, ".");
1623 vStringCatS (fulltag
, vStringValue(token
->string
));
1627 vStringCopy(fulltag
, token
->string
);
1629 vStringTerminate(fulltag
);
1630 if ( ! stringListHas(FunctionNames
, vStringValue (fulltag
)) &&
1631 ! stringListHas(ClassNames
, vStringValue (fulltag
)) )
1633 makeJsTag (name
, is_const
? JSTAG_CONSTANT
: JSTAG_VARIABLE
, NULL
);
1635 vStringDelete (fulltag
);
1638 if (isType (token
, TOKEN_CLOSE_CURLY
))
1641 * Assume the closing parantheses terminates
1644 is_terminated
= TRUE
;
1647 else if (isKeyword (token
, KEYWORD_new
))
1650 is_var
= isType (token
, TOKEN_IDENTIFIER
);
1651 if ( isKeyword (token
, KEYWORD_function
) ||
1652 isKeyword (token
, KEYWORD_capital_function
) ||
1653 isKeyword (token
, KEYWORD_capital_object
) ||
1656 if ( isKeyword (token
, KEYWORD_capital_object
) )
1660 if ( isType (token
, TOKEN_OPEN_PAREN
) )
1661 skipArgumentList(token
, TRUE
, NULL
);
1663 if (isType (token
, TOKEN_SEMICOLON
))
1665 if ( token
->nestLevel
== 0 )
1669 makeJsTag (name
, is_const
? JSTAG_CONSTANT
: JSTAG_VARIABLE
, NULL
);
1675 makeClassTag (name
, NULL
);
1677 /* FIXME: we cannot really get a meaningful
1678 * signature from a `new Function()` call,
1679 * so for now just don't set any */
1680 makeFunctionTag (name
, NULL
);
1685 else if (isType (token
, TOKEN_CLOSE_CURLY
))
1686 is_terminated
= FALSE
;
1689 else if (isKeyword (token
, KEYWORD_NONE
))
1692 * Only create variables for global scope
1694 if ( token
->nestLevel
== 0 && is_global
)
1697 * A pointer can be created to the function.
1698 * If we recognize the function/class name ignore the variable.
1699 * This format looks identical to a variable definition.
1700 * A variable defined outside of a block is considered
1701 * a global variable:
1704 * This is not a global variable:
1705 * var g_var = function;
1706 * This is a global variable:
1707 * var g_var = different_var_name;
1709 fulltag
= vStringNew ();
1710 if (vStringLength (token
->scope
) > 0)
1712 vStringCopy(fulltag
, token
->scope
);
1713 vStringCatS (fulltag
, ".");
1714 vStringCatS (fulltag
, vStringValue(token
->string
));
1718 vStringCopy(fulltag
, token
->string
);
1720 vStringTerminate(fulltag
);
1721 if ( ! stringListHas(FunctionNames
, vStringValue (fulltag
)) &&
1722 ! stringListHas(ClassNames
, vStringValue (fulltag
)) )
1724 makeJsTag (name
, is_const
? JSTAG_CONSTANT
: JSTAG_VARIABLE
, NULL
);
1726 vStringDelete (fulltag
);
1732 while (parenDepth
> 0 && ! isType (token
, TOKEN_EOF
))
1734 if (isType (token
, TOKEN_OPEN_PAREN
))
1736 else if (isType (token
, TOKEN_CLOSE_PAREN
))
1738 readTokenFull (token
, TRUE
, NULL
);
1740 if (isType (token
, TOKEN_CLOSE_CURLY
))
1741 is_terminated
= FALSE
;
1745 /* if we aren't already at the cmd end, advance to it and check whether
1746 * the statement was terminated */
1747 if (! isType (token
, TOKEN_CLOSE_CURLY
) &&
1748 ! isType (token
, TOKEN_SEMICOLON
))
1751 * Statements can be optionally terminated in the case of
1752 * statement prior to a close curly brace as in the
1753 * document.write line below:
1755 * function checkForUpdate() {
1757 * document.write("hello from checkForUpdate<br>")
1762 is_terminated
= findCmdTerm (token
, TRUE
);
1766 vStringCopy(token
->scope
, saveScope
);
1768 deleteToken (secondary_name
);
1769 deleteToken (method_body_token
);
1770 vStringDelete(saveScope
);
1772 return is_terminated
;
1775 static void parseUI5 (tokenInfo
*const token
)
1777 tokenInfo
*const name
= newToken ();
1779 * SAPUI5 is built on top of jQuery.
1780 * It follows a standard format:
1781 * sap.ui.controller("id.of.controller", {
1782 * method_name : function... {
1785 * method_name : function ... {
1789 * Handle the parsing of the initial controller (and the
1790 * same for "view") and then allow the methods to be
1796 if (isType (token
, TOKEN_PERIOD
))
1799 while (! isType (token
, TOKEN_OPEN_PAREN
) &&
1800 ! isType (token
, TOKEN_EOF
))
1806 if (isType (token
, TOKEN_STRING
))
1808 copyToken(name
, token
);
1812 if (isType (token
, TOKEN_COMMA
))
1817 parseMethods (token
, name
);
1818 } while (! isType (token
, TOKEN_CLOSE_CURLY
) &&
1819 ! isType (token
, TOKEN_EOF
));
1825 static boolean
parseLine (tokenInfo
*const token
, tokenInfo
*const parent
, boolean is_inside_class
)
1827 boolean is_terminated
= TRUE
;
1829 * Detect the common statements, if, while, for, do, ...
1830 * This is necessary since the last statement within a block "{}"
1831 * can be optionally terminated.
1833 * If the statement is not terminated, we need to tell
1834 * the calling routine to prevent reading an additional token
1835 * looking for the end of the statement.
1838 if (isType(token
, TOKEN_KEYWORD
))
1840 switch (token
->keyword
)
1845 is_terminated
= parseLoop (token
, parent
);
1851 case KEYWORD_finally
:
1852 /* Common semantics */
1853 is_terminated
= parseIf (token
, parent
);
1855 case KEYWORD_switch
:
1856 parseSwitch (token
);
1858 case KEYWORD_return
:
1859 is_terminated
= findCmdTerm (token
, TRUE
);
1862 is_terminated
= parseStatement (token
, parent
, is_inside_class
);
1869 * Special case where single line statements may not be
1870 * SEMICOLON terminated. parseBlock needs to know this
1871 * so that it does not read the next token.
1873 is_terminated
= parseStatement (token
, parent
, is_inside_class
);
1875 return is_terminated
;
1878 static void parseJsFile (tokenInfo
*const token
)
1884 if (isType (token
, TOKEN_KEYWORD
) && token
->keyword
== KEYWORD_function
)
1885 parseFunction (token
);
1886 else if (isType (token
, TOKEN_KEYWORD
) && token
->keyword
== KEYWORD_sap
)
1889 parseLine (token
, token
, FALSE
);
1890 } while (! isType (token
, TOKEN_EOF
));
1893 static void initialize (const langType language
)
1895 Assert (sizeof (JsKinds
) / sizeof (JsKinds
[0]) == JSTAG_COUNT
);
1897 buildJsKeywordHash ();
1900 static void findJsTags (void)
1902 tokenInfo
*const token
= newToken ();
1904 ClassNames
= stringListNew ();
1905 FunctionNames
= stringListNew ();
1906 LastTokenType
= TOKEN_UNDEFINED
;
1908 parseJsFile (token
);
1910 stringListDelete (ClassNames
);
1911 stringListDelete (FunctionNames
);
1913 FunctionNames
= NULL
;
1914 deleteToken (token
);
1917 /* Create parser definition structure */
1918 extern parserDefinition
* JavaScriptParser (void)
1920 static const char *const extensions
[] = { "js", NULL
};
1921 parserDefinition
*const def
= parserNew ("JavaScript");
1922 def
->extensions
= extensions
;
1924 * New definitions for parsing instead of regex
1926 def
->kinds
= JsKinds
;
1927 def
->kindCount
= KIND_COUNT (JsKinds
);
1928 def
->parser
= findJsTags
;
1929 def
->initialize
= initialize
;
1933 /* vi:set tabstop=4 shiftwidth=4 noexpandtab: */