2 * Copyright (c) 2003, Darren Hiebert
4 * This source code is released for free distribution under the terms of the
5 * GNU General Public License version 2 or (at your option) any later version.
7 * This module contains functions for generating tags for JavaScript language
10 * Reference: http://www.ecma-international.org/publications/files/ECMA-ST/Ecma-262.pdf
12 * This is a good reference for different forms of the function statement:
13 * http://www.permadi.com/tutorial/jsFunc/
14 * Another good reference:
15 * http://developer.mozilla.org/en/docs/Core_JavaScript_1.5_Guide
21 #include "general.h" /* must always come first */
22 #include <ctype.h> /* to define isalpha () */
39 #define isType(token,t) (boolean) ((token)->type == (t))
40 #define isKeyword(token,k) (boolean) ((token)->keyword == (k))
47 * Tracks class and function names already created
49 static stringList
*ClassNames
;
50 static stringList
*FunctionNames
;
52 /* Used to specify type of keyword.
54 typedef enum eKeywordId
{
57 KEYWORD_capital_function
,
58 KEYWORD_capital_object
,
78 typedef enum eTokenType
{
99 TOKEN_POSTFIX_OPERATOR
,
100 TOKEN_BINARY_OPERATOR
103 typedef struct sTokenInfo
{
108 unsigned long lineNumber
;
118 static tokenType LastTokenType
;
120 static langType Lang_js
;
132 static kindOption JsKinds
[] = {
133 { TRUE
, 'f', "function", "functions" },
134 { TRUE
, 'c', "class", "classes" },
135 { TRUE
, 'm', "method", "methods" },
136 { TRUE
, 'p', "property", "properties" },
137 { TRUE
, 'C', "constant", "constants" },
138 { TRUE
, 'v', "variable", "global variables" }
141 static const keywordTable JsKeywordTable
[] = {
142 /* keyword keyword ID */
143 { "function", KEYWORD_function
},
144 { "Function", KEYWORD_capital_function
},
145 { "Object", KEYWORD_capital_object
},
146 { "prototype", KEYWORD_prototype
},
147 { "var", KEYWORD_var
},
148 { "let", KEYWORD_let
},
149 { "const", KEYWORD_const
},
150 { "new", KEYWORD_new
},
151 { "this", KEYWORD_this
},
152 { "for", KEYWORD_for
},
153 { "while", KEYWORD_while
},
154 { "do", KEYWORD_do
},
155 { "if", KEYWORD_if
},
156 { "else", KEYWORD_else
},
157 { "switch", KEYWORD_switch
},
158 { "try", KEYWORD_try
},
159 { "catch", KEYWORD_catch
},
160 { "finally", KEYWORD_finally
},
161 { "sap", KEYWORD_sap
},
162 { "return", KEYWORD_return
}
166 * FUNCTION DEFINITIONS
169 /* Recursive functions */
170 static void parseFunction (tokenInfo
*const token
);
171 static boolean
parseBlock (tokenInfo
*const token
, tokenInfo
*const orig_parent
);
172 static boolean
parseLine (tokenInfo
*const token
, tokenInfo
*const parent
, boolean is_inside_class
);
173 static void parseUI5 (tokenInfo
*const token
);
175 static boolean
isIdentChar (const int c
)
178 (isalpha (c
) || isdigit (c
) || c
== '$' ||
179 c
== '@' || c
== '_' || c
== '#');
182 static tokenInfo
*newToken (void)
184 tokenInfo
*const token
= xMalloc (1, tokenInfo
);
186 token
->type
= TOKEN_UNDEFINED
;
187 token
->keyword
= KEYWORD_NONE
;
188 token
->string
= vStringNew ();
189 token
->scope
= vStringNew ();
190 token
->nestLevel
= 0;
191 token
->ignoreTag
= FALSE
;
192 token
->lineNumber
= getInputLineNumber ();
193 token
->filePosition
= getInputFilePosition ();
198 static void deleteToken (tokenInfo
*const token
)
200 vStringDelete (token
->string
);
201 vStringDelete (token
->scope
);
206 * Tag generation functions
209 static void makeJsTag (tokenInfo
*const token
, const jsKind kind
, vString
*const signature
)
211 if (JsKinds
[kind
].enabled
&& ! token
->ignoreTag
)
213 const char *name
= vStringValue (token
->string
);
214 vString
*fullscope
= vStringNewCopy (token
->scope
);
218 if ((p
= strrchr (name
, '.')) != NULL
)
220 if (vStringLength (fullscope
) > 0)
221 vStringPut (fullscope
, '.');
222 vStringNCatS (fullscope
, name
, p
- name
);
226 initTagEntry (&e
, name
, &(JsKinds
[kind
]));
228 e
.lineNumber
= token
->lineNumber
;
229 e
.filePosition
= token
->filePosition
;
231 if ( vStringLength(fullscope
) > 0 )
233 jsKind parent_kind
= JSTAG_CLASS
;
236 * If we're creating a function (and not a method),
237 * guess we're inside another function
239 if (kind
== JSTAG_FUNCTION
)
240 parent_kind
= JSTAG_FUNCTION
;
242 e
.extensionFields
.scopeKind
= &(JsKinds
[parent_kind
]);
243 e
.extensionFields
.scopeName
= vStringValue (fullscope
);
246 if (signature
&& vStringLength(signature
))
249 /* sanitize signature by replacing all control characters with a
250 * space (because it's simple).
251 * there should never be any junk in a valid signature, but who
252 * knows what the user wrote and CTags doesn't cope well with weird
254 for (i
= 0; i
< signature
->length
; i
++)
256 unsigned char c
= (unsigned char) signature
->buffer
[i
];
257 if (c
< 0x20 /* below space */ || c
== 0x7F /* DEL */)
258 signature
->buffer
[i
] = ' ';
260 e
.extensionFields
.signature
= vStringValue(signature
);
264 vStringDelete (fullscope
);
268 static void makeClassTag (tokenInfo
*const token
, vString
*const signature
)
272 if ( ! token
->ignoreTag
)
274 fulltag
= vStringNew ();
275 if (vStringLength (token
->scope
) > 0)
277 vStringCopy(fulltag
, token
->scope
);
278 vStringCatS (fulltag
, ".");
279 vStringCatS (fulltag
, vStringValue(token
->string
));
283 vStringCopy(fulltag
, token
->string
);
285 vStringTerminate(fulltag
);
286 if ( ! stringListHas(ClassNames
, vStringValue (fulltag
)) )
288 stringListAdd (ClassNames
, vStringNewCopy (fulltag
));
289 makeJsTag (token
, JSTAG_CLASS
, signature
);
291 vStringDelete (fulltag
);
295 static void makeFunctionTag (tokenInfo
*const token
, vString
*const signature
)
299 if ( ! token
->ignoreTag
)
301 fulltag
= vStringNew ();
302 if (vStringLength (token
->scope
) > 0)
304 vStringCopy(fulltag
, token
->scope
);
305 vStringCatS (fulltag
, ".");
306 vStringCatS (fulltag
, vStringValue(token
->string
));
310 vStringCopy(fulltag
, token
->string
);
312 vStringTerminate(fulltag
);
313 if ( ! stringListHas(FunctionNames
, vStringValue (fulltag
)) )
315 stringListAdd (FunctionNames
, vStringNewCopy (fulltag
));
316 makeJsTag (token
, JSTAG_FUNCTION
, signature
);
318 vStringDelete (fulltag
);
326 static int skipToCharacter (const int c
)
331 d
= getcFromInputFile ();
332 } while (d
!= EOF
&& d
!= c
);
336 static void parseString (vString
*const string
, const int delimiter
)
341 int c
= getcFromInputFile ();
346 /* Eat the escape sequence (\", \', etc). We properly handle
347 * <LineContinuation> by eating a whole \<CR><LF> not to see <LF>
348 * as an unescaped character, which is invalid and handled below.
349 * Also, handle the fact that <LineContinuation> produces an empty
351 * See ECMA-262 7.8.4 */
352 c
= getcFromInputFile ();
353 if (c
!= '\r' && c
!= '\n')
354 vStringPut(string
, c
);
357 c
= getcFromInputFile();
359 ungetcToInputFile (c
);
362 else if (c
== delimiter
)
364 else if (c
== '\r' || c
== '\n')
366 /* those are invalid when not escaped */
368 /* we don't want to eat the newline itself to let the automatic
369 * semicolon insertion code kick in */
370 ungetcToInputFile (c
);
373 vStringPut (string
, c
);
375 vStringTerminate (string
);
378 static void parseRegExp (void)
381 boolean in_range
= FALSE
;
385 c
= getcFromInputFile ();
386 if (! in_range
&& c
== '/')
390 c
= getcFromInputFile ();
391 } while (isalpha (c
));
392 ungetcToInputFile (c
);
396 c
= getcFromInputFile (); /* skip next character */
404 /* Read a C identifier beginning with "firstChar" and places it into
407 static void parseIdentifier (vString
*const string
, const int firstChar
)
410 Assert (isIdentChar (c
));
413 vStringPut (string
, c
);
414 c
= getcFromInputFile ();
415 } while (isIdentChar (c
));
416 vStringTerminate (string
);
417 ungetcToInputFile (c
); /* unget non-identifier character */
420 static keywordId
analyzeToken (vString
*const name
)
422 vString
*keyword
= vStringNew ();
424 vStringCopyToLower (keyword
, name
);
425 result
= (keywordId
) lookupKeyword (vStringValue (keyword
), Lang_js
);
426 vStringDelete (keyword
);
430 static void readTokenFull (tokenInfo
*const token
, boolean include_newlines
, vString
*const repr
)
435 token
->type
= TOKEN_UNDEFINED
;
436 token
->keyword
= KEYWORD_NONE
;
437 vStringClear (token
->string
);
443 c
= getcFromInputFile ();
446 while (c
== '\t' || c
== ' ' ||
447 ((c
== '\r' || c
== '\n') && ! include_newlines
));
449 token
->lineNumber
= getInputLineNumber ();
450 token
->filePosition
= getInputFilePosition ();
455 vStringPut (repr
, ' ');
456 vStringPut (repr
, c
);
461 case EOF
: token
->type
= TOKEN_EOF
; break;
462 case '(': token
->type
= TOKEN_OPEN_PAREN
; break;
463 case ')': token
->type
= TOKEN_CLOSE_PAREN
; break;
464 case ';': token
->type
= TOKEN_SEMICOLON
; break;
465 case ',': token
->type
= TOKEN_COMMA
; break;
466 case '.': token
->type
= TOKEN_PERIOD
; break;
467 case ':': token
->type
= TOKEN_COLON
; break;
468 case '{': token
->type
= TOKEN_OPEN_CURLY
; break;
469 case '}': token
->type
= TOKEN_CLOSE_CURLY
; break;
470 case '=': token
->type
= TOKEN_EQUAL_SIGN
; break;
471 case '[': token
->type
= TOKEN_OPEN_SQUARE
; break;
472 case ']': token
->type
= TOKEN_CLOSE_SQUARE
; break;
477 int d
= getcFromInputFile ();
478 if (d
== c
) /* ++ or -- */
479 token
->type
= TOKEN_POSTFIX_OPERATOR
;
482 ungetcToInputFile (d
);
483 token
->type
= TOKEN_BINARY_OPERATOR
;
496 token
->type
= TOKEN_BINARY_OPERATOR
;
501 /* This isn't strictly correct per the standard, but following the
502 * real rules means understanding all statements, and that's not
503 * what the parser currently does. What we do here is a guess, by
504 * avoiding inserting semicolons that would make the statement on
505 * the left invalid. Hopefully this should not have false negatives
506 * (e.g. should not miss insertion of a semicolon) but might have
507 * false positives (e.g. it will wrongfully emit a semicolon for the
508 * newline in "foo\n+bar").
509 * This should however be mostly harmless as we only deal with
510 * newlines in specific situations where we know a false positive
511 * wouldn't hurt too bad. */
512 switch (LastTokenType
)
514 /* these cannot be the end of a statement, so hold the newline */
515 case TOKEN_EQUAL_SIGN
:
518 case TOKEN_FORWARD_SLASH
:
519 case TOKEN_BINARY_OPERATOR
:
520 /* and these already end one, no need to duplicate it */
521 case TOKEN_SEMICOLON
:
523 case TOKEN_CLOSE_CURLY
:
524 case TOKEN_OPEN_CURLY
:
525 include_newlines
= FALSE
; /* no need to recheck */
529 token
->type
= TOKEN_SEMICOLON
;
535 token
->type
= TOKEN_STRING
;
536 parseString (token
->string
, c
);
537 token
->lineNumber
= getInputLineNumber ();
538 token
->filePosition
= getInputFilePosition ();
541 vStringCat (repr
, token
->string
);
542 vStringPut (repr
, c
);
547 c
= getcFromInputFile ();
548 if (c
!= '\\' && c
!= '"' && !isspace (c
))
549 ungetcToInputFile (c
);
550 token
->type
= TOKEN_CHARACTER
;
551 token
->lineNumber
= getInputLineNumber ();
552 token
->filePosition
= getInputFilePosition ();
557 int d
= getcFromInputFile ();
558 if ( (d
!= '*') && /* is this the start of a comment? */
559 (d
!= '/') ) /* is a one line comment? */
561 ungetcToInputFile (d
);
562 switch (LastTokenType
)
564 case TOKEN_CHARACTER
:
565 case TOKEN_IDENTIFIER
:
567 case TOKEN_CLOSE_CURLY
:
568 case TOKEN_CLOSE_PAREN
:
569 case TOKEN_CLOSE_SQUARE
:
570 token
->type
= TOKEN_FORWARD_SLASH
;
574 token
->type
= TOKEN_REGEXP
;
576 token
->lineNumber
= getInputLineNumber ();
577 token
->filePosition
= getInputFilePosition ();
583 if (repr
) /* remove the / we added */
584 repr
->buffer
[--repr
->length
] = 0;
589 skipToCharacter ('*');
590 c
= getcFromInputFile ();
594 ungetcToInputFile (c
);
595 } while (c
!= EOF
&& c
!= '\0');
598 else if (d
== '/') /* is this the start of a comment? */
600 skipToCharacter ('\n');
601 /* if we care about newlines, put it back so it is seen */
602 if (include_newlines
)
603 ungetcToInputFile ('\n');
611 /* skip shebang in case of e.g. Node.js scripts */
612 if (token
->lineNumber
> 1)
613 token
->type
= TOKEN_UNDEFINED
;
614 else if ((c
= getcFromInputFile ()) != '!')
616 ungetcToInputFile (c
);
617 token
->type
= TOKEN_UNDEFINED
;
621 skipToCharacter ('\n');
627 if (! isIdentChar (c
))
628 token
->type
= TOKEN_UNDEFINED
;
631 parseIdentifier (token
->string
, c
);
632 token
->lineNumber
= getInputLineNumber ();
633 token
->filePosition
= getInputFilePosition ();
634 token
->keyword
= analyzeToken (token
->string
);
635 if (isKeyword (token
, KEYWORD_NONE
))
636 token
->type
= TOKEN_IDENTIFIER
;
638 token
->type
= TOKEN_KEYWORD
;
639 if (repr
&& vStringLength (token
->string
) > 1)
640 vStringCatS (repr
, vStringValue (token
->string
) + 1);
645 LastTokenType
= token
->type
;
648 static void readToken (tokenInfo
*const token
)
650 readTokenFull (token
, FALSE
, NULL
);
653 static void copyToken (tokenInfo
*const dest
, tokenInfo
*const src
)
655 dest
->nestLevel
= src
->nestLevel
;
656 dest
->lineNumber
= src
->lineNumber
;
657 dest
->filePosition
= src
->filePosition
;
658 dest
->type
= src
->type
;
659 dest
->keyword
= src
->keyword
;
660 vStringCopy(dest
->string
, src
->string
);
661 vStringCopy(dest
->scope
, src
->scope
);
665 * Token parsing functions
668 static void skipArgumentList (tokenInfo
*const token
, boolean include_newlines
, vString
*const repr
)
672 if (isType (token
, TOKEN_OPEN_PAREN
)) /* arguments? */
676 vStringPut (repr
, '(');
677 while (nest_level
> 0 && ! isType (token
, TOKEN_EOF
))
679 readTokenFull (token
, FALSE
, repr
);
680 if (isType (token
, TOKEN_OPEN_PAREN
))
682 else if (isType (token
, TOKEN_CLOSE_PAREN
))
685 readTokenFull (token
, include_newlines
, NULL
);
689 static void skipArrayList (tokenInfo
*const token
, boolean include_newlines
)
694 * Handle square brackets
696 * So we must check for nested open and closing square brackets
699 if (isType (token
, TOKEN_OPEN_SQUARE
)) /* arguments? */
702 while (nest_level
> 0 && ! isType (token
, TOKEN_EOF
))
705 if (isType (token
, TOKEN_OPEN_SQUARE
))
707 else if (isType (token
, TOKEN_CLOSE_SQUARE
))
710 readTokenFull (token
, include_newlines
, NULL
);
714 static void addContext (tokenInfo
* const parent
, const tokenInfo
* const child
)
716 if (vStringLength (parent
->string
) > 0)
718 vStringCatS (parent
->string
, ".");
720 vStringCatS (parent
->string
, vStringValue(child
->string
));
721 vStringTerminate(parent
->string
);
724 static void addToScope (tokenInfo
* const token
, vString
* const extra
)
726 if (vStringLength (token
->scope
) > 0)
728 vStringCatS (token
->scope
, ".");
730 vStringCatS (token
->scope
, vStringValue(extra
));
731 vStringTerminate(token
->scope
);
738 static boolean
findCmdTerm (tokenInfo
*const token
, boolean include_newlines
)
741 * Read until we find either a semicolon or closing brace.
742 * Any nested braces will be handled within.
744 while (! isType (token
, TOKEN_SEMICOLON
) &&
745 ! isType (token
, TOKEN_CLOSE_CURLY
) &&
746 ! isType (token
, TOKEN_EOF
))
748 /* Handle nested blocks */
749 if ( isType (token
, TOKEN_OPEN_CURLY
))
751 parseBlock (token
, token
);
752 readTokenFull (token
, include_newlines
, NULL
);
754 else if ( isType (token
, TOKEN_OPEN_PAREN
) )
756 skipArgumentList(token
, include_newlines
, NULL
);
758 else if ( isType (token
, TOKEN_OPEN_SQUARE
) )
760 skipArrayList(token
, include_newlines
);
764 readTokenFull (token
, include_newlines
, NULL
);
768 return isType (token
, TOKEN_SEMICOLON
);
771 static void parseSwitch (tokenInfo
*const token
)
774 * switch (expression) {
781 * default : statement;
787 if (isType (token
, TOKEN_OPEN_PAREN
))
790 * Handle nameless functions, these will only
791 * be considered methods.
793 skipArgumentList(token
, FALSE
, NULL
);
796 if (isType (token
, TOKEN_OPEN_CURLY
))
798 parseBlock (token
, token
);
802 static boolean
parseLoop (tokenInfo
*const token
, tokenInfo
*const parent
)
805 * Handles these statements
806 * for (x=0; x<3; x++)
807 * document.write("This text is repeated three times<br>");
809 * for (x=0; x<3; x++)
811 * document.write("This text is repeated three times<br>");
815 * document.write(number+"<br>");
820 * document.write(number+"<br>");
825 boolean is_terminated
= TRUE
;
827 if (isKeyword (token
, KEYWORD_for
) || isKeyword (token
, KEYWORD_while
))
831 if (isType (token
, TOKEN_OPEN_PAREN
))
834 * Handle nameless functions, these will only
835 * be considered methods.
837 skipArgumentList(token
, FALSE
, NULL
);
840 if (isType (token
, TOKEN_OPEN_CURLY
))
843 * This will be either a function or a class.
844 * We can only determine this by checking the body
845 * of the function. If we find a "this." we know
846 * it is a class, otherwise it is a function.
848 parseBlock (token
, parent
);
852 is_terminated
= parseLine(token
, parent
, FALSE
);
855 else if (isKeyword (token
, KEYWORD_do
))
859 if (isType (token
, TOKEN_OPEN_CURLY
))
862 * This will be either a function or a class.
863 * We can only determine this by checking the body
864 * of the function. If we find a "this." we know
865 * it is a class, otherwise it is a function.
867 parseBlock (token
, parent
);
871 is_terminated
= parseLine(token
, parent
, FALSE
);
877 if (isKeyword (token
, KEYWORD_while
))
881 if (isType (token
, TOKEN_OPEN_PAREN
))
884 * Handle nameless functions, these will only
885 * be considered methods.
887 skipArgumentList(token
, TRUE
, NULL
);
889 if (! isType (token
, TOKEN_SEMICOLON
))
890 is_terminated
= FALSE
;
894 return is_terminated
;
897 static boolean
parseIf (tokenInfo
*const token
, tokenInfo
*const parent
)
899 boolean read_next_token
= TRUE
;
901 * If statements have two forms
920 * This example if correctly written, but the
921 * else contains only 1 statement without a terminator
922 * since the function finishes with the closing brace.
931 * TODO: Deal with statements that can optional end
932 * without a semi-colon. Currently this messes up
933 * the parsing of blocks.
934 * Need to somehow detect this has happened, and either
935 * backup a token, or skip reading the next token if
936 * that is possible from all code locations.
942 if (isKeyword (token
, KEYWORD_if
))
945 * Check for an "else if" and consume the "if"
950 if (isType (token
, TOKEN_OPEN_PAREN
))
953 * Handle nameless functions, these will only
954 * be considered methods.
956 skipArgumentList(token
, FALSE
, NULL
);
959 if (isType (token
, TOKEN_OPEN_CURLY
))
962 * This will be either a function or a class.
963 * We can only determine this by checking the body
964 * of the function. If we find a "this." we know
965 * it is a class, otherwise it is a function.
967 parseBlock (token
, parent
);
971 /* The next token should only be read if this statement had its own
973 read_next_token
= findCmdTerm (token
, TRUE
);
975 return read_next_token
;
978 static void parseFunction (tokenInfo
*const token
)
980 tokenInfo
*const name
= newToken ();
981 vString
*const signature
= vStringNew ();
982 boolean is_class
= FALSE
;
985 * This deals with these formats
986 * function validFunctionTwo(a,b) {}
990 /* Add scope in case this is an INNER function */
991 addToScope(name
, token
->scope
);
994 while (isType (token
, TOKEN_PERIOD
))
997 if ( isKeyword(token
, KEYWORD_NONE
) )
999 addContext (name
, token
);
1004 if ( isType (token
, TOKEN_OPEN_PAREN
) )
1005 skipArgumentList(token
, FALSE
, signature
);
1007 if ( isType (token
, TOKEN_OPEN_CURLY
) )
1009 is_class
= parseBlock (token
, name
);
1011 makeClassTag (name
, signature
);
1013 makeFunctionTag (name
, signature
);
1016 findCmdTerm (token
, FALSE
);
1018 vStringDelete (signature
);
1022 static boolean
parseBlock (tokenInfo
*const token
, tokenInfo
*const orig_parent
)
1024 boolean is_class
= FALSE
;
1025 boolean read_next_token
= TRUE
;
1026 vString
* saveScope
= vStringNew ();
1027 tokenInfo
*const parent
= newToken ();
1029 /* backup the parent token to allow calls like parseBlock(token, token) */
1030 copyToken (parent
, orig_parent
);
1034 * Make this routine a bit more forgiving.
1035 * If called on an open_curly advance it
1037 if ( isType (token
, TOKEN_OPEN_CURLY
) &&
1038 isKeyword(token
, KEYWORD_NONE
) )
1041 if (! isType (token
, TOKEN_CLOSE_CURLY
))
1044 * Read until we find the closing brace,
1045 * any nested braces will be handled within
1049 read_next_token
= TRUE
;
1050 if (isKeyword (token
, KEYWORD_this
))
1053 * Means we are inside a class and have found
1054 * a class, not a function
1057 vStringCopy(saveScope
, token
->scope
);
1058 addToScope (token
, parent
->string
);
1061 * Ignore the remainder of the line
1062 * findCmdTerm(token);
1064 read_next_token
= parseLine (token
, parent
, is_class
);
1066 vStringCopy(token
->scope
, saveScope
);
1068 else if (isKeyword (token
, KEYWORD_var
) ||
1069 isKeyword (token
, KEYWORD_let
) ||
1070 isKeyword (token
, KEYWORD_const
))
1073 * Potentially we have found an inner function.
1074 * Set something to indicate the scope
1076 vStringCopy(saveScope
, token
->scope
);
1077 addToScope (token
, parent
->string
);
1078 read_next_token
= parseLine (token
, parent
, is_class
);
1079 vStringCopy(token
->scope
, saveScope
);
1081 else if (isKeyword (token
, KEYWORD_function
))
1083 vStringCopy(saveScope
, token
->scope
);
1084 addToScope (token
, parent
->string
);
1085 parseFunction (token
);
1086 vStringCopy(token
->scope
, saveScope
);
1088 else if (isType (token
, TOKEN_OPEN_CURLY
))
1090 /* Handle nested blocks */
1091 parseBlock (token
, parent
);
1096 * It is possible for a line to have no terminator
1097 * if the following line is a closing brace.
1098 * parseLine will detect this case and indicate
1099 * whether we should read an additional token.
1101 read_next_token
= parseLine (token
, parent
, is_class
);
1105 * Always read a new token unless we find a statement without
1106 * a ending terminator
1108 if( read_next_token
)
1112 * If we find a statement without a terminator consider the
1113 * block finished, otherwise the stack will be off by one.
1115 } while (! isType (token
, TOKEN_EOF
) &&
1116 ! isType (token
, TOKEN_CLOSE_CURLY
) && read_next_token
);
1119 deleteToken (parent
);
1120 vStringDelete(saveScope
);
1126 static boolean
parseMethods (tokenInfo
*const token
, tokenInfo
*const class)
1128 tokenInfo
*const name
= newToken ();
1129 boolean has_methods
= FALSE
;
1132 * This deals with these formats
1133 * validProperty : 2,
1134 * validMethod : function(a,b) {}
1135 * 'validMethod2' : function(a,b) {}
1136 * container.dirtyTab = {'url': false, 'title':false, 'snapshot':false, '*': false}
1142 if (isType (token
, TOKEN_CLOSE_CURLY
))
1145 * This was most likely a variable declaration of a hash table.
1146 * indicate there were no methods and return.
1148 has_methods
= FALSE
;
1152 if (isType (token
, TOKEN_STRING
) || isKeyword(token
, KEYWORD_NONE
))
1154 copyToken(name
, token
);
1157 if ( isType (token
, TOKEN_COLON
) )
1160 if ( isKeyword (token
, KEYWORD_function
) )
1162 vString
*const signature
= vStringNew ();
1165 if ( isType (token
, TOKEN_OPEN_PAREN
) )
1167 skipArgumentList(token
, FALSE
, signature
);
1170 if (isType (token
, TOKEN_OPEN_CURLY
))
1173 addToScope (name
, class->string
);
1174 makeJsTag (name
, JSTAG_METHOD
, signature
);
1175 parseBlock (token
, name
);
1178 * Read to the closing curly, check next
1179 * token, if a comma, we must loop again
1184 vStringDelete (signature
);
1188 vString
* saveScope
= vStringNew ();
1189 boolean has_child_methods
= FALSE
;
1191 /* skip whatever is the value */
1192 while (! isType (token
, TOKEN_COMMA
) &&
1193 ! isType (token
, TOKEN_CLOSE_CURLY
) &&
1194 ! isType (token
, TOKEN_EOF
))
1196 if (isType (token
, TOKEN_OPEN_CURLY
))
1198 /* Recurse to find child properties/methods */
1199 vStringCopy (saveScope
, token
->scope
);
1200 addToScope (token
, class->string
);
1201 has_child_methods
= parseMethods (token
, name
);
1202 vStringCopy (token
->scope
, saveScope
);
1205 else if (isType (token
, TOKEN_OPEN_PAREN
))
1207 skipArgumentList (token
, FALSE
, NULL
);
1209 else if (isType (token
, TOKEN_OPEN_SQUARE
))
1211 skipArrayList (token
, FALSE
);
1218 vStringDelete (saveScope
);
1221 addToScope (name
, class->string
);
1222 if (has_child_methods
)
1223 makeJsTag (name
, JSTAG_CLASS
, NULL
);
1225 makeJsTag (name
, JSTAG_PROPERTY
, NULL
);
1229 } while ( isType(token
, TOKEN_COMMA
) );
1231 findCmdTerm (token
, FALSE
);
1239 static boolean
parseStatement (tokenInfo
*const token
, tokenInfo
*const parent
, boolean is_inside_class
)
1241 tokenInfo
*const name
= newToken ();
1242 tokenInfo
*const secondary_name
= newToken ();
1243 tokenInfo
*const method_body_token
= newToken ();
1244 vString
* saveScope
= vStringNew ();
1245 boolean is_class
= FALSE
;
1246 boolean is_var
= FALSE
;
1247 boolean is_const
= FALSE
;
1248 boolean is_terminated
= TRUE
;
1249 boolean is_global
= FALSE
;
1250 boolean has_methods
= FALSE
;
1253 vStringClear(saveScope
);
1255 * Functions can be named or unnamed.
1256 * This deals with these formats:
1258 * validFunctionOne = function(a,b) {}
1259 * testlib.validFunctionFive = function(a,b) {}
1260 * var innerThree = function(a,b) {}
1261 * var innerFour = (a,b) {}
1262 * var D2 = secondary_fcn_name(a,b) {}
1263 * var D3 = new Function("a", "b", "return a+b;");
1265 * testlib.extras.ValidClassOne = function(a,b) {
1269 * testlib.extras.ValidClassOne.prototype = {
1270 * 'validMethodOne' : function(a,b) {},
1271 * 'validMethodTwo' : function(a,b) {}
1273 * ValidClassTwo = function ()
1275 * this.validMethodThree = function() {}
1277 * this.validMethodFour = () {}
1279 * Database.prototype.validMethodThree = Database_getTodaysDate;
1282 if ( is_inside_class
)
1285 * var can precede an inner function
1287 if ( isKeyword(token
, KEYWORD_var
) ||
1288 isKeyword(token
, KEYWORD_let
) ||
1289 isKeyword(token
, KEYWORD_const
) )
1291 is_const
= isKeyword(token
, KEYWORD_const
);
1293 * Only create variables for global scope
1295 if ( token
->nestLevel
== 0 )
1302 if ( isKeyword(token
, KEYWORD_this
) )
1305 if (isType (token
, TOKEN_PERIOD
))
1311 copyToken(name
, token
);
1313 while (! isType (token
, TOKEN_CLOSE_CURLY
) &&
1314 ! isType (token
, TOKEN_SEMICOLON
) &&
1315 ! isType (token
, TOKEN_EQUAL_SIGN
) &&
1316 ! isType (token
, TOKEN_EOF
))
1318 if (isType (token
, TOKEN_OPEN_CURLY
))
1319 parseBlock (token
, parent
);
1321 /* Potentially the name of the function */
1323 if (isType (token
, TOKEN_PERIOD
))
1326 * Cannot be a global variable is it has dot references in the name
1332 if ( isKeyword(token
, KEYWORD_NONE
) )
1336 addToScope(token
, name
->string
);
1339 addContext (name
, token
);
1343 else if ( isKeyword(token
, KEYWORD_prototype
) )
1346 * When we reach the "prototype" tag, we infer:
1347 * "BindAgent" is a class
1348 * "build" is a method
1350 * function BindAgent( repeatableIdName, newParentIdName ) {
1354 * Specified function name: "build"
1355 * BindAgent.prototype.build = function( mode ) {
1356 * maybe parse nested functions
1361 * ValidClassOne.prototype = {
1362 * 'validMethodOne' : function(a,b) {},
1363 * 'validMethodTwo' : function(a,b) {}
1367 makeClassTag (name
, NULL
);
1371 * There should a ".function_name" next.
1374 if (isType (token
, TOKEN_PERIOD
))
1380 if ( isKeyword(token
, KEYWORD_NONE
) )
1382 vString
*const signature
= vStringNew ();
1384 vStringCopy(saveScope
, token
->scope
);
1385 addToScope(token
, name
->string
);
1387 readToken (method_body_token
);
1388 vStringCopy (method_body_token
->scope
, token
->scope
);
1390 while (! isType (method_body_token
, TOKEN_SEMICOLON
) &&
1391 ! isType (method_body_token
, TOKEN_CLOSE_CURLY
) &&
1392 ! isType (method_body_token
, TOKEN_OPEN_CURLY
) &&
1393 ! isType (method_body_token
, TOKEN_EOF
))
1395 if ( isType (method_body_token
, TOKEN_OPEN_PAREN
) )
1396 skipArgumentList(method_body_token
, FALSE
,
1397 vStringLength (signature
) == 0 ? signature
: NULL
);
1399 readToken (method_body_token
);
1402 makeJsTag (token
, JSTAG_METHOD
, signature
);
1403 vStringDelete (signature
);
1405 if ( isType (method_body_token
, TOKEN_OPEN_CURLY
))
1407 parseBlock (method_body_token
, token
);
1408 is_terminated
= TRUE
;
1411 is_terminated
= isType (method_body_token
, TOKEN_SEMICOLON
);
1415 else if (isType (token
, TOKEN_EQUAL_SIGN
))
1418 if (isType (token
, TOKEN_OPEN_CURLY
))
1423 * Creates tags for each of these class methods
1424 * ValidClassOne.prototype = {
1425 * 'validMethodOne' : function(a,b) {},
1426 * 'validMethodTwo' : function(a,b) {}
1429 parseMethods(token
, name
);
1431 * Find to the end of the statement
1433 findCmdTerm (token
, FALSE
);
1434 token
->ignoreTag
= FALSE
;
1435 is_terminated
= TRUE
;
1442 } while (isType (token
, TOKEN_PERIOD
));
1445 if ( isType (token
, TOKEN_OPEN_PAREN
) )
1446 skipArgumentList(token
, FALSE
, NULL
);
1448 if ( isType (token
, TOKEN_OPEN_SQUARE
) )
1449 skipArrayList(token
, FALSE
);
1452 if ( isType (token, TOKEN_OPEN_CURLY) )
1454 is_class = parseBlock (token, name);
1459 if ( isType (token
, TOKEN_CLOSE_CURLY
) )
1462 * Reaching this section without having
1463 * processed an open curly brace indicates
1464 * the statement is most likely not terminated.
1466 is_terminated
= FALSE
;
1470 if ( isType (token
, TOKEN_SEMICOLON
) )
1473 * Only create variables for global scope
1475 if ( token
->nestLevel
== 0 && is_global
)
1478 * Handles this syntax:
1481 if (isType (token
, TOKEN_SEMICOLON
))
1482 makeJsTag (name
, is_const
? JSTAG_CONSTANT
: JSTAG_VARIABLE
, NULL
);
1485 * Statement has ended.
1486 * This deals with calls to functions, like:
1492 if ( isType (token
, TOKEN_EQUAL_SIGN
) )
1498 /* rvalue might be surrounded with parentheses */
1499 while (isType (token
, TOKEN_OPEN_PAREN
))
1505 if ( isKeyword (token
, KEYWORD_function
) )
1507 vString
*const signature
= vStringNew ();
1511 if ( isKeyword (token
, KEYWORD_NONE
) &&
1512 ! isType (token
, TOKEN_OPEN_PAREN
) )
1515 * Functions of this format:
1516 * var D2A = function theAdd(a, b)
1520 * Are really two separate defined functions and
1521 * can be referenced in two ways:
1522 * alert( D2A(1,2) ); // produces 3
1523 * alert( theAdd(1,2) ); // also produces 3
1524 * So it must have two tags:
1527 * Save the reference to the name for later use, once
1528 * we have established this is a valid function we will
1529 * create the secondary reference to it.
1531 copyToken(secondary_name
, token
);
1535 if ( isType (token
, TOKEN_OPEN_PAREN
) )
1536 skipArgumentList(token
, FALSE
, signature
);
1538 if (isType (token
, TOKEN_OPEN_CURLY
))
1541 * This will be either a function or a class.
1542 * We can only determine this by checking the body
1543 * of the function. If we find a "this." we know
1544 * it is a class, otherwise it is a function.
1546 if ( is_inside_class
)
1548 makeJsTag (name
, JSTAG_METHOD
, signature
);
1549 if ( vStringLength(secondary_name
->string
) > 0 )
1550 makeFunctionTag (secondary_name
, signature
);
1551 parseBlock (token
, name
);
1555 is_class
= parseBlock (token
, name
);
1557 makeClassTag (name
, signature
);
1559 makeFunctionTag (name
, signature
);
1561 if ( vStringLength(secondary_name
->string
) > 0 )
1562 makeFunctionTag (secondary_name
, signature
);
1566 vStringDelete (signature
);
1568 else if (isType (token
, TOKEN_OPEN_CURLY
))
1571 * Creates tags for each of these class methods
1572 * ValidClassOne.prototype = {
1573 * 'validMethodOne' : function(a,b) {},
1574 * 'validMethodTwo' : function(a,b) {}
1576 * Or checks if this is a hash variable.
1579 has_methods
= parseMethods(token
, name
);
1581 makeJsTag (name
, JSTAG_CLASS
, NULL
);
1585 * Only create variables for global scope
1587 if ( token
->nestLevel
== 0 && is_global
)
1590 * A pointer can be created to the function.
1591 * If we recognize the function/class name ignore the variable.
1592 * This format looks identical to a variable definition.
1593 * A variable defined outside of a block is considered
1594 * a global variable:
1597 * This is not a global variable:
1598 * var g_var = function;
1599 * This is a global variable:
1600 * var g_var = different_var_name;
1602 fulltag
= vStringNew ();
1603 if (vStringLength (token
->scope
) > 0)
1605 vStringCopy(fulltag
, token
->scope
);
1606 vStringCatS (fulltag
, ".");
1607 vStringCatS (fulltag
, vStringValue(token
->string
));
1611 vStringCopy(fulltag
, token
->string
);
1613 vStringTerminate(fulltag
);
1614 if ( ! stringListHas(FunctionNames
, vStringValue (fulltag
)) &&
1615 ! stringListHas(ClassNames
, vStringValue (fulltag
)) )
1617 makeJsTag (name
, is_const
? JSTAG_CONSTANT
: JSTAG_VARIABLE
, NULL
);
1619 vStringDelete (fulltag
);
1622 if (isType (token
, TOKEN_CLOSE_CURLY
))
1625 * Assume the closing parentheses terminates
1628 is_terminated
= TRUE
;
1631 else if (isKeyword (token
, KEYWORD_new
))
1634 is_var
= isType (token
, TOKEN_IDENTIFIER
);
1635 if ( isKeyword (token
, KEYWORD_function
) ||
1636 isKeyword (token
, KEYWORD_capital_function
) ||
1637 isKeyword (token
, KEYWORD_capital_object
) ||
1640 if ( isKeyword (token
, KEYWORD_capital_object
) )
1644 if ( isType (token
, TOKEN_OPEN_PAREN
) )
1645 skipArgumentList(token
, TRUE
, NULL
);
1647 if (isType (token
, TOKEN_SEMICOLON
))
1649 if ( token
->nestLevel
== 0 )
1653 makeJsTag (name
, is_const
? JSTAG_CONSTANT
: JSTAG_VARIABLE
, NULL
);
1659 makeClassTag (name
, NULL
);
1661 /* FIXME: we cannot really get a meaningful
1662 * signature from a `new Function()` call,
1663 * so for now just don't set any */
1664 makeFunctionTag (name
, NULL
);
1669 else if (isType (token
, TOKEN_CLOSE_CURLY
))
1670 is_terminated
= FALSE
;
1673 else if (isKeyword (token
, KEYWORD_NONE
))
1676 * Only create variables for global scope
1678 if ( token
->nestLevel
== 0 && is_global
)
1681 * A pointer can be created to the function.
1682 * If we recognize the function/class name ignore the variable.
1683 * This format looks identical to a variable definition.
1684 * A variable defined outside of a block is considered
1685 * a global variable:
1688 * This is not a global variable:
1689 * var g_var = function;
1690 * This is a global variable:
1691 * var g_var = different_var_name;
1693 fulltag
= vStringNew ();
1694 if (vStringLength (token
->scope
) > 0)
1696 vStringCopy(fulltag
, token
->scope
);
1697 vStringCatS (fulltag
, ".");
1698 vStringCatS (fulltag
, vStringValue(token
->string
));
1702 vStringCopy(fulltag
, token
->string
);
1704 vStringTerminate(fulltag
);
1705 if ( ! stringListHas(FunctionNames
, vStringValue (fulltag
)) &&
1706 ! stringListHas(ClassNames
, vStringValue (fulltag
)) )
1708 makeJsTag (name
, is_const
? JSTAG_CONSTANT
: JSTAG_VARIABLE
, NULL
);
1710 vStringDelete (fulltag
);
1716 while (parenDepth
> 0 && ! isType (token
, TOKEN_EOF
))
1718 if (isType (token
, TOKEN_OPEN_PAREN
))
1720 else if (isType (token
, TOKEN_CLOSE_PAREN
))
1722 readTokenFull (token
, TRUE
, NULL
);
1724 if (isType (token
, TOKEN_CLOSE_CURLY
))
1725 is_terminated
= FALSE
;
1729 /* if we aren't already at the cmd end, advance to it and check whether
1730 * the statement was terminated */
1731 if (! isType (token
, TOKEN_CLOSE_CURLY
) &&
1732 ! isType (token
, TOKEN_SEMICOLON
))
1735 * Statements can be optionally terminated in the case of
1736 * statement prior to a close curly brace as in the
1737 * document.write line below:
1739 * function checkForUpdate() {
1741 * document.write("hello from checkForUpdate<br>")
1746 is_terminated
= findCmdTerm (token
, TRUE
);
1750 vStringCopy(token
->scope
, saveScope
);
1752 deleteToken (secondary_name
);
1753 deleteToken (method_body_token
);
1754 vStringDelete(saveScope
);
1756 return is_terminated
;
1759 static void parseUI5 (tokenInfo
*const token
)
1761 tokenInfo
*const name
= newToken ();
1763 * SAPUI5 is built on top of jQuery.
1764 * It follows a standard format:
1765 * sap.ui.controller("id.of.controller", {
1766 * method_name : function... {
1769 * method_name : function ... {
1773 * Handle the parsing of the initial controller (and the
1774 * same for "view") and then allow the methods to be
1780 if (isType (token
, TOKEN_PERIOD
))
1783 while (! isType (token
, TOKEN_OPEN_PAREN
) &&
1784 ! isType (token
, TOKEN_EOF
))
1790 if (isType (token
, TOKEN_STRING
))
1792 copyToken(name
, token
);
1796 if (isType (token
, TOKEN_COMMA
))
1801 parseMethods (token
, name
);
1802 } while (! isType (token
, TOKEN_CLOSE_CURLY
) &&
1803 ! isType (token
, TOKEN_EOF
));
1809 static boolean
parseLine (tokenInfo
*const token
, tokenInfo
*const parent
, boolean is_inside_class
)
1811 boolean is_terminated
= TRUE
;
1813 * Detect the common statements, if, while, for, do, ...
1814 * This is necessary since the last statement within a block "{}"
1815 * can be optionally terminated.
1817 * If the statement is not terminated, we need to tell
1818 * the calling routine to prevent reading an additional token
1819 * looking for the end of the statement.
1822 if (isType(token
, TOKEN_KEYWORD
))
1824 switch (token
->keyword
)
1829 is_terminated
= parseLoop (token
, parent
);
1835 case KEYWORD_finally
:
1836 /* Common semantics */
1837 is_terminated
= parseIf (token
, parent
);
1839 case KEYWORD_switch
:
1840 parseSwitch (token
);
1842 case KEYWORD_return
:
1843 is_terminated
= findCmdTerm (token
, TRUE
);
1846 is_terminated
= parseStatement (token
, parent
, is_inside_class
);
1853 * Special case where single line statements may not be
1854 * SEMICOLON terminated. parseBlock needs to know this
1855 * so that it does not read the next token.
1857 is_terminated
= parseStatement (token
, parent
, is_inside_class
);
1859 return is_terminated
;
1862 static void parseJsFile (tokenInfo
*const token
)
1868 if (isType (token
, TOKEN_KEYWORD
) && token
->keyword
== KEYWORD_function
)
1869 parseFunction (token
);
1870 else if (isType (token
, TOKEN_KEYWORD
) && token
->keyword
== KEYWORD_sap
)
1873 parseLine (token
, token
, FALSE
);
1874 } while (! isType (token
, TOKEN_EOF
));
1877 static void initialize (const langType language
)
1879 Assert (ARRAY_SIZE (JsKinds
) == JSTAG_COUNT
);
1883 static void findJsTags (void)
1885 tokenInfo
*const token
= newToken ();
1887 ClassNames
= stringListNew ();
1888 FunctionNames
= stringListNew ();
1889 LastTokenType
= TOKEN_UNDEFINED
;
1891 parseJsFile (token
);
1893 stringListDelete (ClassNames
);
1894 stringListDelete (FunctionNames
);
1896 FunctionNames
= NULL
;
1897 deleteToken (token
);
1900 /* Create parser definition structure */
1901 extern parserDefinition
* JavaScriptParser (void)
1903 static const char *const extensions
[] = { "js", NULL
};
1904 parserDefinition
*const def
= parserNew ("JavaScript");
1905 def
->extensions
= extensions
;
1907 * New definitions for parsing instead of regex
1909 def
->kinds
= JsKinds
;
1910 def
->kindCount
= ARRAY_SIZE (JsKinds
);
1911 def
->parser
= findJsTags
;
1912 def
->initialize
= initialize
;
1913 def
->keywordTable
= JsKeywordTable
;
1914 def
->keywordCount
= ARRAY_SIZE (JsKeywordTable
);
1918 /* vi:set tabstop=4 shiftwidth=4 noexpandtab: */